Upgrade qemu-tools package to version 8.2.2 92/322992/1
authorwanchao.xu <wanchao.xu@samsung.com>
Tue, 15 Apr 2025 11:52:14 +0000 (19:52 +0800)
committerwanchao.xu <wanchao.xu@samsung.com>
Tue, 15 Apr 2025 11:52:14 +0000 (19:52 +0800)
* Upgrade qemu-tools based on qemu 8.2.2
* Remove the source code of qemu which are not used when building qemu-tools.
* Update the configuration and meson build for qemu-tools.

Change-Id: I2da9f34f8947f1ff5289f4cd2c6685c9f019aece
Signed-off-by: wanchao.xu <wanchao.xu@samsung.com>
1762 files changed:
.gitattributes
.gitignore
LICENSE [new file with mode: 0644]
MAINTAINERS [new file with mode: 0644]
Makefile
VERSION
authz/listfile.c
authz/meson.build
authz/trace-events
block.c
block/accounting.c
block/aio_task.c
block/amend.c
block/backup-top.c [deleted file]
block/backup-top.h [deleted file]
block/backup.c
block/blkdebug.c
block/blkio.c [new file with mode: 0644]
block/blklogwrites.c
block/blkreplay.c
block/blkverify.c
block/block-backend.c
block/block-copy.c
block/block-gen.h
block/block-ram-registrar.c [new file with mode: 0644]
block/bochs.c
block/cloop.c
block/commit.c
block/copy-before-write.c [new file with mode: 0644]
block/copy-before-write.h [new file with mode: 0644]
block/copy-on-read.c
block/copy-on-read.h [new file with mode: 0644]
block/coroutines.h
block/create.c
block/crypto.c
block/curl.c
block/dirty-bitmap.c
block/dmg-lzfse.c
block/dmg.c
block/dmg.h
block/export/export.c
block/export/fuse.c [new file with mode: 0644]
block/export/meson.build
block/export/vduse-blk.c [new file with mode: 0644]
block/export/vduse-blk.h [new file with mode: 0644]
block/export/vhost-user-blk-server.c
block/export/vhost-user-blk-server.h
block/export/virtio-blk-handler.c [new file with mode: 0644]
block/export/virtio-blk-handler.h [new file with mode: 0644]
block/file-posix.c
block/file-win32.c
block/filter-compress.c
block/gluster.c
block/graph-lock.c [new file with mode: 0644]
block/io.c
block/io_uring.c
block/iscsi-opts.c
block/iscsi.c
block/linux-aio.c
block/meson.build
block/mirror.c
block/monitor/bitmap-qmp-cmds.c
block/monitor/block-hmp-cmds.c
block/monitor/meson.build
block/nbd.c
block/nfs.c
block/null.c
block/nvme.c
block/parallels-ext.c [new file with mode: 0644]
block/parallels.c
block/parallels.h
block/preallocate.c [new file with mode: 0644]
block/progress_meter.c [new file with mode: 0644]
block/qapi-sysemu.c
block/qapi.c
block/qcow.c
block/qcow2-bitmap.c
block/qcow2-cache.c
block/qcow2-cluster.c
block/qcow2-refcount.c
block/qcow2-snapshot.c
block/qcow2-threads.c
block/qcow2.c
block/qcow2.h
block/qed-check.c
block/qed-l2-cache.c
block/qed-table.c
block/qed.c
block/qed.h
block/quorum.c
block/raw-format.c
block/rbd.c
block/replication.c
block/reqlist.c [new file with mode: 0644]
block/sheepdog.c [deleted file]
block/snapshot-access.c [new file with mode: 0644]
block/snapshot.c
block/ssh.c
block/stream.c
block/throttle-groups.c
block/throttle.c
block/trace-events
block/vdi.c
block/vhdx-log.c
block/vhdx.c
block/vhdx.h
block/vmdk.c
block/vpc.c
block/vvfat.c
block/win32-aio.c
block/write-threshold.c
blockdev-nbd.c
blockdev.c
blockjob.c
configure
crypto/aes.c
crypto/afalg.c
crypto/akcipher-gcrypt.c.inc [new file with mode: 0644]
crypto/akcipher-nettle.c.inc [new file with mode: 0644]
crypto/akcipher.c [new file with mode: 0644]
crypto/akcipherpriv.h [new file with mode: 0644]
crypto/block-luks-priv.h [new file with mode: 0644]
crypto/block-luks.c
crypto/block.c
crypto/cipher-afalg.c
crypto/cipher-builtin.c.inc
crypto/cipher-gcrypt.c.inc
crypto/cipher-gnutls.c.inc [new file with mode: 0644]
crypto/cipher-nettle.c.inc
crypto/cipher.c
crypto/clmul.c [new file with mode: 0644]
crypto/der.c [new file with mode: 0644]
crypto/der.h [new file with mode: 0644]
crypto/desrfb.c [deleted file]
crypto/hash-afalg.c
crypto/hash-gnutls.c [new file with mode: 0644]
crypto/hash-nettle.c
crypto/hmac-gnutls.c [new file with mode: 0644]
crypto/hmac-nettle.c
crypto/hmacpriv.h
crypto/init.c
crypto/ivgen-plain.h
crypto/meson.build
crypto/pbkdf-gnutls.c [new file with mode: 0644]
crypto/pbkdf.c
crypto/rsakey-builtin.c.inc [new file with mode: 0644]
crypto/rsakey-nettle.c.inc [new file with mode: 0644]
crypto/rsakey.c [new file with mode: 0644]
crypto/rsakey.h [new file with mode: 0644]
crypto/secret.c
crypto/secret_common.c
crypto/secret_keyring.c
crypto/sm4.c [new file with mode: 0644]
crypto/tls-cipher-suites.c
crypto/tlscreds.c
crypto/tlscredsanon.c
crypto/tlscredspriv.h
crypto/tlscredspsk.c
crypto/tlscredsx509.c
crypto/tlssession.c
crypto/trace-events
event-loop-base.c [new file with mode: 0644]
host/include/aarch64/host/atomic128-cas.h [new file with mode: 0644]
host/include/aarch64/host/atomic128-ldst.h [new file with mode: 0644]
host/include/aarch64/host/cpuinfo.h [new file with mode: 0644]
host/include/aarch64/host/crypto/aes-round.h [new file with mode: 0644]
host/include/aarch64/host/crypto/clmul.h [new file with mode: 0644]
host/include/aarch64/host/load-extract-al16-al8.h [new file with mode: 0644]
host/include/aarch64/host/store-insert-al16.h [new file with mode: 0644]
host/include/generic/host/atomic128-cas.h [new file with mode: 0644]
host/include/generic/host/atomic128-ldst.h [new file with mode: 0644]
host/include/generic/host/cpuinfo.h [new file with mode: 0644]
host/include/generic/host/crypto/aes-round.h [new file with mode: 0644]
host/include/generic/host/crypto/clmul.h [new file with mode: 0644]
host/include/generic/host/load-extract-al16-al8.h [new file with mode: 0644]
host/include/generic/host/store-insert-al16.h [new file with mode: 0644]
host/include/i386/host/cpuinfo.h [new file with mode: 0644]
host/include/i386/host/crypto/aes-round.h [new file with mode: 0644]
host/include/i386/host/crypto/clmul.h [new file with mode: 0644]
host/include/loongarch64/host/atomic128-ldst.h [new file with mode: 0644]
host/include/loongarch64/host/cpuinfo.h [new file with mode: 0644]
host/include/loongarch64/host/load-extract-al16-al8.h [new file with mode: 0644]
host/include/loongarch64/host/store-insert-al16.h [new file with mode: 0644]
host/include/ppc/host/cpuinfo.h [new file with mode: 0644]
host/include/ppc/host/crypto/aes-round.h [new file with mode: 0644]
host/include/ppc64/host/cpuinfo.h [new file with mode: 0644]
host/include/ppc64/host/crypto/aes-round.h [new file with mode: 0644]
host/include/x86_64/host/atomic128-ldst.h [new file with mode: 0644]
host/include/x86_64/host/cpuinfo.h [new file with mode: 0644]
host/include/x86_64/host/crypto/aes-round.h [new file with mode: 0644]
host/include/x86_64/host/crypto/clmul.h [new file with mode: 0644]
host/include/x86_64/host/load-extract-al16-al8.h [new file with mode: 0644]
include/authz/listfile.h
include/block/accounting.h
include/block/aio-wait.h
include/block/aio.h
include/block/aio_task.h
include/block/block-common.h [new file with mode: 0644]
include/block/block-copy.h
include/block/block-global-state.h [new file with mode: 0644]
include/block/block-hmp-cmds.h
include/block/block-io.h [new file with mode: 0644]
include/block/block.h
include/block/block_backup.h
include/block/block_int-common.h [new file with mode: 0644]
include/block/block_int-global-state.h [new file with mode: 0644]
include/block/block_int-io.h [new file with mode: 0644]
include/block/block_int.h
include/block/blockjob.h
include/block/blockjob_int.h
include/block/dirty-bitmap.h
include/block/export.h
include/block/fuse.h [new file with mode: 0644]
include/block/graph-lock.h [new file with mode: 0644]
include/block/nbd.h
include/block/nvme.h
include/block/qapi.h
include/block/qdict.h
include/block/raw-aio.h
include/block/replication.h [new file with mode: 0644]
include/block/reqlist.h [new file with mode: 0644]
include/block/snapshot.h
include/block/thread-pool.h
include/block/throttle-groups.h
include/block/ufs.h [new file with mode: 0644]
include/block/write-threshold.h
include/chardev/char-fe.h
include/chardev/char-socket.h [new file with mode: 0644]
include/chardev/char.h
include/crypto/aes-round.h [new file with mode: 0644]
include/crypto/aes.h
include/crypto/akcipher.h [new file with mode: 0644]
include/crypto/block.h
include/crypto/clmul.h [new file with mode: 0644]
include/crypto/desrfb.h
include/crypto/ivgen.h
include/crypto/secret_common.h
include/crypto/sm4.h [new file with mode: 0644]
include/crypto/tls-cipher-suites.h
include/crypto/tlscreds.h
include/crypto/tlscredsanon.h
include/crypto/tlscredspsk.h
include/crypto/tlscredsx509.h
include/crypto/tlssession.h
include/disas/dis-asm.h
include/disas/disas.h
include/elf.h
include/exec/address-spaces.h
include/exec/confidential-guest-support.h [new file with mode: 0644]
include/exec/cpu-all.h
include/exec/cpu-common.h
include/exec/cpu-defs.h
include/exec/cpu_ldst.h
include/exec/cputlb.h
include/exec/exec-all.h
include/exec/gdbstub.h
include/exec/gen-icount.h [deleted file]
include/exec/helper-gen-common.h [new file with mode: 0644]
include/exec/helper-gen.h
include/exec/helper-gen.h.inc [new file with mode: 0644]
include/exec/helper-head.h
include/exec/helper-info.c.inc [new file with mode: 0644]
include/exec/helper-proto-common.h [new file with mode: 0644]
include/exec/helper-proto.h
include/exec/helper-proto.h.inc [new file with mode: 0644]
include/exec/helper-tcg.h [deleted file]
include/exec/hwaddr.h
include/exec/log.h
include/exec/memattrs.h
include/exec/memop.h
include/exec/memopidx.h [new file with mode: 0644]
include/exec/memory-internal.h
include/exec/memory.h
include/exec/memory_ldst.h.inc
include/exec/memory_ldst_cached.h.inc
include/exec/memory_ldst_phys.h.inc
include/exec/page-vary.h [new file with mode: 0644]
include/exec/plugin-gen.h
include/exec/poison.h
include/exec/ram_addr.h
include/exec/ramblock.h
include/exec/ramlist.h
include/exec/replay-core.h [new file with mode: 0644]
include/exec/softmmu-semi.h [deleted file]
include/exec/target_long.h [new file with mode: 0644]
include/exec/target_page.h
include/exec/tb-context.h [deleted file]
include/exec/tb-flush.h [new file with mode: 0644]
include/exec/tb-hash.h [deleted file]
include/exec/tb-lookup.h [deleted file]
include/exec/tlb-common.h [new file with mode: 0644]
include/exec/translate-all.h [new file with mode: 0644]
include/exec/translation-block.h [new file with mode: 0644]
include/exec/translator.h
include/exec/tswap.h [new file with mode: 0644]
include/exec/user/abitypes.h
include/exec/user/guest-base.h [new file with mode: 0644]
include/exec/user/thunk.h
include/fpu/softfloat-helpers.h
include/fpu/softfloat-macros.h
include/fpu/softfloat-types.h
include/fpu/softfloat.h
include/gdbstub/helpers.h [new file with mode: 0644]
include/gdbstub/syscalls.h [new file with mode: 0644]
include/gdbstub/user.h [new file with mode: 0644]
include/glib-compat.h
include/hw/acpi/acpi-defs.h
include/hw/acpi/acpi.h
include/hw/acpi/acpi_aml_interface.h [new file with mode: 0644]
include/hw/acpi/acpi_dev_interface.h
include/hw/acpi/aml-build.h
include/hw/acpi/cpu.h
include/hw/acpi/cxl.h [new file with mode: 0644]
include/hw/acpi/erst.h [new file with mode: 0644]
include/hw/acpi/generic_event_device.h
include/hw/acpi/ghes.h
include/hw/acpi/ich9.h
include/hw/acpi/ich9_tco.h [new file with mode: 0644]
include/hw/acpi/ipmi.h
include/hw/acpi/pc-hotplug.h
include/hw/acpi/pci.h
include/hw/acpi/pcihp.h
include/hw/acpi/piix4.h [new file with mode: 0644]
include/hw/acpi/tco.h [deleted file]
include/hw/acpi/tpm.h
include/hw/acpi/utils.h
include/hw/acpi/vmgenid.h
include/hw/adc/aspeed_adc.h [new file with mode: 0644]
include/hw/adc/max111x.h [new file with mode: 0644]
include/hw/adc/npcm7xx_adc.h [new file with mode: 0644]
include/hw/adc/stm32f2xx_adc.h [new file with mode: 0644]
include/hw/adc/zynq-xadc.h [new file with mode: 0644]
include/hw/arm/allwinner-a10.h [new file with mode: 0644]
include/hw/arm/allwinner-h3.h [new file with mode: 0644]
include/hw/arm/allwinner-r40.h [new file with mode: 0644]
include/hw/arm/armsse-version.h [new file with mode: 0644]
include/hw/arm/armsse.h [new file with mode: 0644]
include/hw/arm/armv7m.h [new file with mode: 0644]
include/hw/arm/aspeed.h [new file with mode: 0644]
include/hw/arm/aspeed_soc.h [new file with mode: 0644]
include/hw/arm/bcm2835_peripherals.h [new file with mode: 0644]
include/hw/arm/bcm2836.h [new file with mode: 0644]
include/hw/arm/boot.h [new file with mode: 0644]
include/hw/arm/bsa.h [new file with mode: 0644]
include/hw/arm/digic.h [new file with mode: 0644]
include/hw/arm/exynos4210.h [new file with mode: 0644]
include/hw/arm/fdt.h [new file with mode: 0644]
include/hw/arm/fsl-imx25.h [new file with mode: 0644]
include/hw/arm/fsl-imx31.h [new file with mode: 0644]
include/hw/arm/fsl-imx6.h [new file with mode: 0644]
include/hw/arm/fsl-imx6ul.h [new file with mode: 0644]
include/hw/arm/fsl-imx7.h [new file with mode: 0644]
include/hw/arm/linux-boot-if.h [new file with mode: 0644]
include/hw/arm/msf2-soc.h [new file with mode: 0644]
include/hw/arm/npcm7xx.h [new file with mode: 0644]
include/hw/arm/nrf51.h [new file with mode: 0644]
include/hw/arm/nrf51_soc.h [new file with mode: 0644]
include/hw/arm/omap.h [new file with mode: 0644]
include/hw/arm/primecell.h [new file with mode: 0644]
include/hw/arm/pxa.h [new file with mode: 0644]
include/hw/arm/raspberrypi-fw-defs.h [new file with mode: 0644]
include/hw/arm/raspi_platform.h [new file with mode: 0644]
include/hw/arm/sharpsl.h [new file with mode: 0644]
include/hw/arm/smmu-common.h [new file with mode: 0644]
include/hw/arm/smmuv3.h [new file with mode: 0644]
include/hw/arm/soc_dma.h [new file with mode: 0644]
include/hw/arm/stm32f100_soc.h [new file with mode: 0644]
include/hw/arm/stm32f205_soc.h [new file with mode: 0644]
include/hw/arm/stm32f405_soc.h [new file with mode: 0644]
include/hw/arm/virt.h [new file with mode: 0644]
include/hw/arm/xen_arch_hvm.h [new file with mode: 0644]
include/hw/arm/xlnx-versal.h [new file with mode: 0644]
include/hw/arm/xlnx-zynqmp.h [new file with mode: 0644]
include/hw/audio/asc.h [new file with mode: 0644]
include/hw/audio/pcspk.h [new file with mode: 0644]
include/hw/audio/soundhw.h [new file with mode: 0644]
include/hw/audio/virtio-snd.h [new file with mode: 0644]
include/hw/audio/wm8750.h [new file with mode: 0644]
include/hw/block/block.h
include/hw/block/fdc.h
include/hw/block/flash.h
include/hw/block/swim.h
include/hw/boards.h
include/hw/char/avr_usart.h [new file with mode: 0644]
include/hw/char/bcm2835_aux.h [new file with mode: 0644]
include/hw/char/cadence_uart.h [new file with mode: 0644]
include/hw/char/cmsdk-apb-uart.h [new file with mode: 0644]
include/hw/char/digic-uart.h [new file with mode: 0644]
include/hw/char/escc.h [new file with mode: 0644]
include/hw/char/goldfish_tty.h [new file with mode: 0644]
include/hw/char/ibex_uart.h [new file with mode: 0644]
include/hw/char/imx_serial.h [new file with mode: 0644]
include/hw/char/mchp_pfsoc_mmuart.h [new file with mode: 0644]
include/hw/char/nrf51_uart.h [new file with mode: 0644]
include/hw/char/parallel-isa.h [new file with mode: 0644]
include/hw/char/parallel.h [new file with mode: 0644]
include/hw/char/pl011.h [new file with mode: 0644]
include/hw/char/renesas_sci.h [new file with mode: 0644]
include/hw/char/riscv_htif.h [new file with mode: 0644]
include/hw/char/serial.h [new file with mode: 0644]
include/hw/char/shakti_uart.h [new file with mode: 0644]
include/hw/char/sifive_uart.h [new file with mode: 0644]
include/hw/char/stm32f2xx_usart.h [new file with mode: 0644]
include/hw/char/xilinx_uartlite.h [new file with mode: 0644]
include/hw/clock.h
include/hw/core/accel-cpu.h [new file with mode: 0644]
include/hw/core/cpu.h
include/hw/core/sysbus-fdt.h [new file with mode: 0644]
include/hw/core/sysemu-cpu-ops.h [new file with mode: 0644]
include/hw/core/tcg-cpu-ops.h [new file with mode: 0644]
include/hw/cpu/a15mpcore.h [new file with mode: 0644]
include/hw/cpu/a9mpcore.h [new file with mode: 0644]
include/hw/cpu/arm11mpcore.h [new file with mode: 0644]
include/hw/cpu/cluster.h [new file with mode: 0644]
include/hw/cpu/core.h [new file with mode: 0644]
include/hw/cris/etraxfs.h [new file with mode: 0644]
include/hw/cris/etraxfs_dma.h [new file with mode: 0644]
include/hw/cxl/cxl.h [new file with mode: 0644]
include/hw/cxl/cxl_cdat.h [new file with mode: 0644]
include/hw/cxl/cxl_component.h [new file with mode: 0644]
include/hw/cxl/cxl_device.h [new file with mode: 0644]
include/hw/cxl/cxl_events.h [new file with mode: 0644]
include/hw/cxl/cxl_host.h [new file with mode: 0644]
include/hw/cxl/cxl_pci.h [new file with mode: 0644]
include/hw/display/edid.h
include/hw/display/macfb.h
include/hw/display/milkymist_tmu2.h [deleted file]
include/hw/display/ramfb.h
include/hw/display/vga.h
include/hw/display/xlnx_dp.h
include/hw/dma/bcm2835_dma.h [new file with mode: 0644]
include/hw/dma/i8257.h [new file with mode: 0644]
include/hw/dma/pl080.h [new file with mode: 0644]
include/hw/dma/sifive_pdma.h [new file with mode: 0644]
include/hw/dma/xlnx-zdma.h [new file with mode: 0644]
include/hw/dma/xlnx-zynq-devcfg.h [new file with mode: 0644]
include/hw/dma/xlnx_csu_dma.h [new file with mode: 0644]
include/hw/dma/xlnx_dpdma.h [new file with mode: 0644]
include/hw/elf_ops.h
include/hw/firmware/smbios.h [new file with mode: 0644]
include/hw/gpio/aspeed_gpio.h [new file with mode: 0644]
include/hw/gpio/bcm2835_gpio.h [new file with mode: 0644]
include/hw/gpio/imx_gpio.h [new file with mode: 0644]
include/hw/gpio/npcm7xx_gpio.h [new file with mode: 0644]
include/hw/gpio/nrf51_gpio.h [new file with mode: 0644]
include/hw/gpio/sifive_gpio.h [new file with mode: 0644]
include/hw/hotplug.h
include/hw/hw.h
include/hw/hyperv/dynmem-proto.h [new file with mode: 0644]
include/hw/hyperv/hv-balloon.h [new file with mode: 0644]
include/hw/hyperv/hyperv-proto.h [new file with mode: 0644]
include/hw/hyperv/hyperv.h [new file with mode: 0644]
include/hw/hyperv/vmbus-bridge.h [new file with mode: 0644]
include/hw/hyperv/vmbus-proto.h [new file with mode: 0644]
include/hw/hyperv/vmbus.h [new file with mode: 0644]
include/hw/i2c/allwinner-i2c.h [new file with mode: 0644]
include/hw/i2c/arm_sbcon_i2c.h [new file with mode: 0644]
include/hw/i2c/aspeed_i2c.h [new file with mode: 0644]
include/hw/i2c/bitbang_i2c.h [new file with mode: 0644]
include/hw/i2c/i2c.h [new file with mode: 0644]
include/hw/i2c/i2c_mux_pca954x.h [new file with mode: 0644]
include/hw/i2c/imx_i2c.h [new file with mode: 0644]
include/hw/i2c/microbit_i2c.h [new file with mode: 0644]
include/hw/i2c/npcm7xx_smbus.h [new file with mode: 0644]
include/hw/i2c/pm_smbus.h [new file with mode: 0644]
include/hw/i2c/pmbus_device.h [new file with mode: 0644]
include/hw/i2c/ppc4xx_i2c.h [new file with mode: 0644]
include/hw/i2c/smbus_eeprom.h [new file with mode: 0644]
include/hw/i2c/smbus_master.h [new file with mode: 0644]
include/hw/i2c/smbus_slave.h [new file with mode: 0644]
include/hw/i386/apic-msidef.h [new file with mode: 0644]
include/hw/i386/apic.h [new file with mode: 0644]
include/hw/i386/apic_internal.h [new file with mode: 0644]
include/hw/i386/hostmem-epc.h [new file with mode: 0644]
include/hw/i386/intel_iommu.h [new file with mode: 0644]
include/hw/i386/microvm.h [new file with mode: 0644]
include/hw/i386/pc.h [new file with mode: 0644]
include/hw/i386/sgx-epc.h [new file with mode: 0644]
include/hw/i386/topology.h [new file with mode: 0644]
include/hw/i386/vmport.h [new file with mode: 0644]
include/hw/i386/x86-iommu.h [new file with mode: 0644]
include/hw/i386/x86.h [new file with mode: 0644]
include/hw/i386/xen_arch_hvm.h [new file with mode: 0644]
include/hw/ide.h
include/hw/ide/ahci.h [new file with mode: 0644]
include/hw/ide/internal.h [new file with mode: 0644]
include/hw/ide/isa.h [new file with mode: 0644]
include/hw/ide/mmio.h [new file with mode: 0644]
include/hw/ide/pci.h [new file with mode: 0644]
include/hw/ide/piix.h [new file with mode: 0644]
include/hw/input/adb-keys.h [new file with mode: 0644]
include/hw/input/adb.h [new file with mode: 0644]
include/hw/input/hid.h [new file with mode: 0644]
include/hw/input/i8042.h [new file with mode: 0644]
include/hw/input/lasips2.h [new file with mode: 0644]
include/hw/input/lm832x.h [new file with mode: 0644]
include/hw/input/pl050.h [new file with mode: 0644]
include/hw/input/ps2.h [new file with mode: 0644]
include/hw/input/stellaris_gamepad.h [new file with mode: 0644]
include/hw/input/tsc2xxx.h [new file with mode: 0644]
include/hw/intc/allwinner-a10-pic.h [new file with mode: 0644]
include/hw/intc/arm_gic.h [new file with mode: 0644]
include/hw/intc/arm_gic_common.h [new file with mode: 0644]
include/hw/intc/arm_gicv3.h [new file with mode: 0644]
include/hw/intc/arm_gicv3_common.h [new file with mode: 0644]
include/hw/intc/arm_gicv3_its_common.h [new file with mode: 0644]
include/hw/intc/armv7m_nvic.h [new file with mode: 0644]
include/hw/intc/aspeed_vic.h [new file with mode: 0644]
include/hw/intc/bcm2835_ic.h [new file with mode: 0644]
include/hw/intc/bcm2836_control.h [new file with mode: 0644]
include/hw/intc/exynos4210_combiner.h [new file with mode: 0644]
include/hw/intc/exynos4210_gic.h [new file with mode: 0644]
include/hw/intc/goldfish_pic.h [new file with mode: 0644]
include/hw/intc/heathrow_pic.h [new file with mode: 0644]
include/hw/intc/i8259.h [new file with mode: 0644]
include/hw/intc/imx_avic.h [new file with mode: 0644]
include/hw/intc/imx_gpcv2.h [new file with mode: 0644]
include/hw/intc/intc.h [new file with mode: 0644]
include/hw/intc/ioapic.h [new file with mode: 0644]
include/hw/intc/kvm_irqcount.h [new file with mode: 0644]
include/hw/intc/loongarch_extioi.h [new file with mode: 0644]
include/hw/intc/loongarch_ipi.h [new file with mode: 0644]
include/hw/intc/loongarch_pch_msi.h [new file with mode: 0644]
include/hw/intc/loongarch_pch_pic.h [new file with mode: 0644]
include/hw/intc/loongson_liointc.h [new file with mode: 0644]
include/hw/intc/m68k_irqc.h [new file with mode: 0644]
include/hw/intc/mips_gic.h [new file with mode: 0644]
include/hw/intc/nios2_vic.h [new file with mode: 0644]
include/hw/intc/ppc-uic.h [new file with mode: 0644]
include/hw/intc/realview_gic.h [new file with mode: 0644]
include/hw/intc/riscv_aclint.h [new file with mode: 0644]
include/hw/intc/riscv_aplic.h [new file with mode: 0644]
include/hw/intc/riscv_imsic.h [new file with mode: 0644]
include/hw/intc/rx_icu.h [new file with mode: 0644]
include/hw/intc/sifive_plic.h [new file with mode: 0644]
include/hw/intc/xlnx-pmu-iomod-intc.h [new file with mode: 0644]
include/hw/intc/xlnx-zynqmp-ipi.h [new file with mode: 0644]
include/hw/ipack/ipack.h [new file with mode: 0644]
include/hw/ipmi/ipmi.h [new file with mode: 0644]
include/hw/ipmi/ipmi_bt.h [new file with mode: 0644]
include/hw/ipmi/ipmi_kcs.h [new file with mode: 0644]
include/hw/irq.h
include/hw/isa/i8259_internal.h
include/hw/isa/isa.h
include/hw/isa/superio.h
include/hw/isa/vt82c686.h
include/hw/loader.h
include/hw/loongarch/virt.h [new file with mode: 0644]
include/hw/m68k/mcf.h [new file with mode: 0644]
include/hw/m68k/mcf_fec.h [new file with mode: 0644]
include/hw/m68k/next-cube.h [new file with mode: 0644]
include/hw/m68k/q800-glue.h [new file with mode: 0644]
include/hw/m68k/q800.h [new file with mode: 0644]
include/hw/mem/memory-device.h
include/hw/mem/nvdimm.h
include/hw/mem/pc-dimm.h
include/hw/mem/sparse-mem.h [new file with mode: 0644]
include/hw/mips/bios.h [new file with mode: 0644]
include/hw/mips/bootloader.h [new file with mode: 0644]
include/hw/mips/cps.h [new file with mode: 0644]
include/hw/mips/mips.h [new file with mode: 0644]
include/hw/misc/a9scu.h [new file with mode: 0644]
include/hw/misc/allwinner-a10-ccm.h [new file with mode: 0644]
include/hw/misc/allwinner-a10-dramc.h [new file with mode: 0644]
include/hw/misc/allwinner-cpucfg.h [new file with mode: 0644]
include/hw/misc/allwinner-h3-ccu.h [new file with mode: 0644]
include/hw/misc/allwinner-h3-dramc.h [new file with mode: 0644]
include/hw/misc/allwinner-h3-sysctrl.h [new file with mode: 0644]
include/hw/misc/allwinner-r40-ccu.h [new file with mode: 0644]
include/hw/misc/allwinner-r40-dramc.h [new file with mode: 0644]
include/hw/misc/allwinner-sid.h [new file with mode: 0644]
include/hw/misc/allwinner-sramc.h [new file with mode: 0644]
include/hw/misc/arm11scu.h [new file with mode: 0644]
include/hw/misc/arm_integrator_debug.h [new file with mode: 0644]
include/hw/misc/armsse-cpu-pwrctrl.h [new file with mode: 0644]
include/hw/misc/armsse-cpuid.h [new file with mode: 0644]
include/hw/misc/armsse-mhu.h [new file with mode: 0644]
include/hw/misc/armv7m_ras.h [new file with mode: 0644]
include/hw/misc/aspeed_hace.h [new file with mode: 0644]
include/hw/misc/aspeed_i3c.h [new file with mode: 0644]
include/hw/misc/aspeed_lpc.h [new file with mode: 0644]
include/hw/misc/aspeed_peci.h [new file with mode: 0644]
include/hw/misc/aspeed_sbc.h [new file with mode: 0644]
include/hw/misc/aspeed_scu.h [new file with mode: 0644]
include/hw/misc/aspeed_sdmc.h [new file with mode: 0644]
include/hw/misc/aspeed_xdma.h [new file with mode: 0644]
include/hw/misc/auxbus.h [new file with mode: 0644]
include/hw/misc/avr_power.h [new file with mode: 0644]
include/hw/misc/bcm2835_cprman.h [new file with mode: 0644]
include/hw/misc/bcm2835_cprman_internals.h [new file with mode: 0644]
include/hw/misc/bcm2835_mbox.h [new file with mode: 0644]
include/hw/misc/bcm2835_mbox_defs.h [new file with mode: 0644]
include/hw/misc/bcm2835_mphi.h [new file with mode: 0644]
include/hw/misc/bcm2835_powermgt.h [new file with mode: 0644]
include/hw/misc/bcm2835_property.h [new file with mode: 0644]
include/hw/misc/bcm2835_rng.h [new file with mode: 0644]
include/hw/misc/bcm2835_thermal.h [new file with mode: 0644]
include/hw/misc/cbus.h [new file with mode: 0644]
include/hw/misc/djmemc.h [new file with mode: 0644]
include/hw/misc/empty_slot.h [new file with mode: 0644]
include/hw/misc/grlib_ahb_apb_pnp.h [new file with mode: 0644]
include/hw/misc/imx25_ccm.h [new file with mode: 0644]
include/hw/misc/imx31_ccm.h [new file with mode: 0644]
include/hw/misc/imx6_ccm.h [new file with mode: 0644]
include/hw/misc/imx6_src.h [new file with mode: 0644]
include/hw/misc/imx6ul_ccm.h [new file with mode: 0644]
include/hw/misc/imx7_ccm.h [new file with mode: 0644]
include/hw/misc/imx7_gpr.h [new file with mode: 0644]
include/hw/misc/imx7_snvs.h [new file with mode: 0644]
include/hw/misc/imx7_src.h [new file with mode: 0644]
include/hw/misc/imx_ccm.h [new file with mode: 0644]
include/hw/misc/imx_rngc.h [new file with mode: 0644]
include/hw/misc/iosb.h [new file with mode: 0644]
include/hw/misc/iotkit-secctl.h [new file with mode: 0644]
include/hw/misc/iotkit-sysctl.h [new file with mode: 0644]
include/hw/misc/iotkit-sysinfo.h [new file with mode: 0644]
include/hw/misc/ivshmem.h [new file with mode: 0644]
include/hw/misc/lasi.h [new file with mode: 0644]
include/hw/misc/led.h [new file with mode: 0644]
include/hw/misc/mac_via.h [new file with mode: 0644]
include/hw/misc/macio/cuda.h [new file with mode: 0644]
include/hw/misc/macio/gpio.h [new file with mode: 0644]
include/hw/misc/macio/macio.h [new file with mode: 0644]
include/hw/misc/macio/pmu.h [new file with mode: 0644]
include/hw/misc/mchp_pfsoc_dmc.h [new file with mode: 0644]
include/hw/misc/mchp_pfsoc_ioscb.h [new file with mode: 0644]
include/hw/misc/mchp_pfsoc_sysreg.h [new file with mode: 0644]
include/hw/misc/mips_cmgcr.h [new file with mode: 0644]
include/hw/misc/mips_cpc.h [new file with mode: 0644]
include/hw/misc/mips_itu.h [new file with mode: 0644]
include/hw/misc/mos6522.h [new file with mode: 0644]
include/hw/misc/mps2-fpgaio.h [new file with mode: 0644]
include/hw/misc/mps2-scc.h [new file with mode: 0644]
include/hw/misc/msf2-sysreg.h [new file with mode: 0644]
include/hw/misc/npcm7xx_clk.h [new file with mode: 0644]
include/hw/misc/npcm7xx_gcr.h [new file with mode: 0644]
include/hw/misc/npcm7xx_mft.h [new file with mode: 0644]
include/hw/misc/npcm7xx_pwm.h [new file with mode: 0644]
include/hw/misc/npcm7xx_rng.h [new file with mode: 0644]
include/hw/misc/nrf51_rng.h [new file with mode: 0644]
include/hw/misc/pca9552.h [new file with mode: 0644]
include/hw/misc/pca9552_regs.h [new file with mode: 0644]
include/hw/misc/pvpanic.h [new file with mode: 0644]
include/hw/misc/sifive_e_aon.h [new file with mode: 0644]
include/hw/misc/sifive_e_prci.h [new file with mode: 0644]
include/hw/misc/sifive_test.h [new file with mode: 0644]
include/hw/misc/sifive_u_otp.h [new file with mode: 0644]
include/hw/misc/sifive_u_prci.h [new file with mode: 0644]
include/hw/misc/stm32f2xx_syscfg.h [new file with mode: 0644]
include/hw/misc/stm32f4xx_exti.h [new file with mode: 0644]
include/hw/misc/stm32f4xx_syscfg.h [new file with mode: 0644]
include/hw/misc/tz-mpc.h [new file with mode: 0644]
include/hw/misc/tz-msc.h [new file with mode: 0644]
include/hw/misc/tz-ppc.h [new file with mode: 0644]
include/hw/misc/unimp.h [new file with mode: 0644]
include/hw/misc/virt_ctrl.h [new file with mode: 0644]
include/hw/misc/vmcoreinfo.h [new file with mode: 0644]
include/hw/misc/xlnx-cfi-if.h [new file with mode: 0644]
include/hw/misc/xlnx-versal-cframe-reg.h [new file with mode: 0644]
include/hw/misc/xlnx-versal-cfu.h [new file with mode: 0644]
include/hw/misc/xlnx-versal-crl.h [new file with mode: 0644]
include/hw/misc/xlnx-versal-pmc-iou-slcr.h [new file with mode: 0644]
include/hw/misc/xlnx-versal-trng.h [new file with mode: 0644]
include/hw/misc/xlnx-versal-xramc.h [new file with mode: 0644]
include/hw/misc/xlnx-zynqmp-apu-ctrl.h [new file with mode: 0644]
include/hw/misc/xlnx-zynqmp-crf.h [new file with mode: 0644]
include/hw/net/allwinner-sun8i-emac.h [new file with mode: 0644]
include/hw/net/allwinner_emac.h [new file with mode: 0644]
include/hw/net/cadence_gem.h [new file with mode: 0644]
include/hw/net/dp8393x.h [new file with mode: 0644]
include/hw/net/ftgmac100.h [new file with mode: 0644]
include/hw/net/imx_fec.h [new file with mode: 0644]
include/hw/net/lan9118.h [new file with mode: 0644]
include/hw/net/lance.h [new file with mode: 0644]
include/hw/net/lasi_82596.h [new file with mode: 0644]
include/hw/net/mii.h [new file with mode: 0644]
include/hw/net/msf2-emac.h [new file with mode: 0644]
include/hw/net/mv88w8618_eth.h [new file with mode: 0644]
include/hw/net/ne2000-isa.h [new file with mode: 0644]
include/hw/net/npcm7xx_emc.h [new file with mode: 0644]
include/hw/net/smc91c111.h [new file with mode: 0644]
include/hw/net/xlnx-versal-canfd.h [new file with mode: 0644]
include/hw/net/xlnx-zynqmp-can.h [new file with mode: 0644]
include/hw/nubus/mac-nubus-bridge.h [new file with mode: 0644]
include/hw/nubus/nubus.h [new file with mode: 0644]
include/hw/nvram/eeprom_at24c.h [new file with mode: 0644]
include/hw/nvram/fw_cfg.h
include/hw/nvram/mac_nvram.h [new file with mode: 0644]
include/hw/nvram/npcm7xx_otp.h
include/hw/nvram/xlnx-bbram.h [new file with mode: 0644]
include/hw/nvram/xlnx-efuse.h [new file with mode: 0644]
include/hw/nvram/xlnx-versal-efuse.h [new file with mode: 0644]
include/hw/nvram/xlnx-zynqmp-efuse.h [new file with mode: 0644]
include/hw/openrisc/boot.h [new file with mode: 0644]
include/hw/or-irq.h
include/hw/pci-bridge/cxl_upstream_port.h [new file with mode: 0644]
include/hw/pci-bridge/pci_expander_bridge.h [new file with mode: 0644]
include/hw/pci-bridge/simba.h [new file with mode: 0644]
include/hw/pci-bridge/xio3130_downstream.h [new file with mode: 0644]
include/hw/pci-host/articia.h [new file with mode: 0644]
include/hw/pci-host/astro.h [new file with mode: 0644]
include/hw/pci-host/bonito.h [new file with mode: 0644]
include/hw/pci-host/designware.h
include/hw/pci-host/dino.h [new file with mode: 0644]
include/hw/pci-host/gpex.h
include/hw/pci-host/grackle.h [new file with mode: 0644]
include/hw/pci-host/i440fx.h
include/hw/pci-host/ls7a.h [new file with mode: 0644]
include/hw/pci-host/mv64361.h [new file with mode: 0644]
include/hw/pci-host/pam.h
include/hw/pci-host/pnv_phb3.h
include/hw/pci-host/pnv_phb3_regs.h
include/hw/pci-host/pnv_phb4.h
include/hw/pci-host/pnv_phb4_regs.h
include/hw/pci-host/q35.h
include/hw/pci-host/remote.h [new file with mode: 0644]
include/hw/pci-host/sabre.h
include/hw/pci-host/spapr.h
include/hw/pci-host/xilinx-pcie.h
include/hw/pci/msi.h
include/hw/pci/msix.h
include/hw/pci/pci.h
include/hw/pci/pci_bridge.h
include/hw/pci/pci_bus.h
include/hw/pci/pci_device.h [new file with mode: 0644]
include/hw/pci/pci_host.h
include/hw/pci/pci_ids.h
include/hw/pci/pci_regs.h
include/hw/pci/pcie.h
include/hw/pci/pcie_aer.h
include/hw/pci/pcie_doe.h [new file with mode: 0644]
include/hw/pci/pcie_host.h
include/hw/pci/pcie_port.h
include/hw/pci/pcie_regs.h
include/hw/pci/pcie_sriov.h [new file with mode: 0644]
include/hw/pci/shpc.h
include/hw/pcmcia.h
include/hw/ppc/fdt.h [new file with mode: 0644]
include/hw/ppc/mac_dbdma.h [new file with mode: 0644]
include/hw/ppc/openpic.h [new file with mode: 0644]
include/hw/ppc/openpic_kvm.h [new file with mode: 0644]
include/hw/ppc/pef.h [new file with mode: 0644]
include/hw/ppc/pnv.h [new file with mode: 0644]
include/hw/ppc/pnv_chip.h [new file with mode: 0644]
include/hw/ppc/pnv_core.h [new file with mode: 0644]
include/hw/ppc/pnv_homer.h [new file with mode: 0644]
include/hw/ppc/pnv_i2c.h [new file with mode: 0644]
include/hw/ppc/pnv_lpc.h [new file with mode: 0644]
include/hw/ppc/pnv_occ.h [new file with mode: 0644]
include/hw/ppc/pnv_pnor.h [new file with mode: 0644]
include/hw/ppc/pnv_psi.h [new file with mode: 0644]
include/hw/ppc/pnv_sbe.h [new file with mode: 0644]
include/hw/ppc/pnv_xive.h [new file with mode: 0644]
include/hw/ppc/pnv_xscom.h [new file with mode: 0644]
include/hw/ppc/ppc.h [new file with mode: 0644]
include/hw/ppc/ppc4xx.h [new file with mode: 0644]
include/hw/ppc/ppc_e500.h [new file with mode: 0644]
include/hw/ppc/spapr.h [new file with mode: 0644]
include/hw/ppc/spapr_cpu_core.h [new file with mode: 0644]
include/hw/ppc/spapr_drc.h [new file with mode: 0644]
include/hw/ppc/spapr_irq.h [new file with mode: 0644]
include/hw/ppc/spapr_nested.h [new file with mode: 0644]
include/hw/ppc/spapr_numa.h [new file with mode: 0644]
include/hw/ppc/spapr_nvdimm.h [new file with mode: 0644]
include/hw/ppc/spapr_ovec.h [new file with mode: 0644]
include/hw/ppc/spapr_tpm_proxy.h [new file with mode: 0644]
include/hw/ppc/spapr_vio.h [new file with mode: 0644]
include/hw/ppc/spapr_xive.h [new file with mode: 0644]
include/hw/ppc/vof.h [new file with mode: 0644]
include/hw/ppc/xics.h [new file with mode: 0644]
include/hw/ppc/xics_spapr.h [new file with mode: 0644]
include/hw/ppc/xive.h [new file with mode: 0644]
include/hw/ppc/xive2.h [new file with mode: 0644]
include/hw/ppc/xive2_regs.h [new file with mode: 0644]
include/hw/ppc/xive_regs.h [new file with mode: 0644]
include/hw/ptimer.h
include/hw/qdev-clock.h
include/hw/qdev-core.h
include/hw/qdev-properties-system.h [new file with mode: 0644]
include/hw/qdev-properties.h
include/hw/rdma/rdma.h [new file with mode: 0644]
include/hw/register.h
include/hw/registerfields.h
include/hw/remote/iohub.h [new file with mode: 0644]
include/hw/remote/iommu.h [new file with mode: 0644]
include/hw/remote/machine.h [new file with mode: 0644]
include/hw/remote/memory.h [new file with mode: 0644]
include/hw/remote/mpqemu-link.h [new file with mode: 0644]
include/hw/remote/proxy-memory-listener.h [new file with mode: 0644]
include/hw/remote/proxy.h [new file with mode: 0644]
include/hw/remote/vfio-user-obj.h [new file with mode: 0644]
include/hw/riscv/boot.h [new file with mode: 0644]
include/hw/riscv/boot_opensbi.h [new file with mode: 0644]
include/hw/riscv/microchip_pfsoc.h [new file with mode: 0644]
include/hw/riscv/numa.h [new file with mode: 0644]
include/hw/riscv/opentitan.h [new file with mode: 0644]
include/hw/riscv/riscv_hart.h [new file with mode: 0644]
include/hw/riscv/shakti_c.h [new file with mode: 0644]
include/hw/riscv/sifive_cpu.h [new file with mode: 0644]
include/hw/riscv/sifive_e.h [new file with mode: 0644]
include/hw/riscv/sifive_u.h [new file with mode: 0644]
include/hw/riscv/spike.h [new file with mode: 0644]
include/hw/riscv/virt.h [new file with mode: 0644]
include/hw/rtc/allwinner-rtc.h [new file with mode: 0644]
include/hw/rtc/aspeed_rtc.h [new file with mode: 0644]
include/hw/rtc/goldfish_rtc.h [new file with mode: 0644]
include/hw/rtc/m48t59.h [new file with mode: 0644]
include/hw/rtc/mc146818rtc.h [new file with mode: 0644]
include/hw/rtc/mc146818rtc_regs.h [new file with mode: 0644]
include/hw/rtc/pl031.h [new file with mode: 0644]
include/hw/rtc/sun4v-rtc.h [new file with mode: 0644]
include/hw/rtc/xlnx-zynqmp-rtc.h [new file with mode: 0644]
include/hw/rx/rx62n.h [new file with mode: 0644]
include/hw/s390x/3270-ccw.h [new file with mode: 0644]
include/hw/s390x/adapter.h [new file with mode: 0644]
include/hw/s390x/ap-bridge.h [new file with mode: 0644]
include/hw/s390x/ap-device.h [new file with mode: 0644]
include/hw/s390x/cpu-topology.h [new file with mode: 0644]
include/hw/s390x/css-bridge.h [new file with mode: 0644]
include/hw/s390x/css.h [new file with mode: 0644]
include/hw/s390x/ebcdic.h [new file with mode: 0644]
include/hw/s390x/event-facility.h [new file with mode: 0644]
include/hw/s390x/ioinst.h [new file with mode: 0644]
include/hw/s390x/s390-ccw.h [new file with mode: 0644]
include/hw/s390x/s390-pci-bus.h [new file with mode: 0644]
include/hw/s390x/s390-pci-clp.h [new file with mode: 0644]
include/hw/s390x/s390-pci-inst.h [new file with mode: 0644]
include/hw/s390x/s390-pci-kvm.h [new file with mode: 0644]
include/hw/s390x/s390-pci-vfio.h [new file with mode: 0644]
include/hw/s390x/s390-virtio-ccw.h [new file with mode: 0644]
include/hw/s390x/s390_flic.h [new file with mode: 0644]
include/hw/s390x/sclp.h [new file with mode: 0644]
include/hw/s390x/storage-attributes.h [new file with mode: 0644]
include/hw/s390x/storage-keys.h [new file with mode: 0644]
include/hw/s390x/tod.h [new file with mode: 0644]
include/hw/s390x/vfio-ccw.h [new file with mode: 0644]
include/hw/scsi/emulation.h [new file with mode: 0644]
include/hw/scsi/esp.h [new file with mode: 0644]
include/hw/scsi/scsi.h [new file with mode: 0644]
include/hw/sd/allwinner-sdhost.h [new file with mode: 0644]
include/hw/sd/aspeed_sdhci.h [new file with mode: 0644]
include/hw/sd/bcm2835_sdhost.h [new file with mode: 0644]
include/hw/sd/cadence_sdhci.h [new file with mode: 0644]
include/hw/sd/npcm7xx_sdhci.h [new file with mode: 0644]
include/hw/sd/sd.h [new file with mode: 0644]
include/hw/sd/sdcard_legacy.h [new file with mode: 0644]
include/hw/sd/sdhci.h [new file with mode: 0644]
include/hw/sensor/emc141x_regs.h [new file with mode: 0644]
include/hw/sensor/isl_pmbus_vr.h [new file with mode: 0644]
include/hw/sensor/tmp105.h [new file with mode: 0644]
include/hw/sensor/tmp105_regs.h [new file with mode: 0644]
include/hw/sh4/sh.h [new file with mode: 0644]
include/hw/sh4/sh_intc.h [new file with mode: 0644]
include/hw/southbridge/ich9.h [new file with mode: 0644]
include/hw/southbridge/piix.h [new file with mode: 0644]
include/hw/sparc/grlib.h [new file with mode: 0644]
include/hw/sparc/sparc32_dma.h [new file with mode: 0644]
include/hw/sparc/sparc64.h [new file with mode: 0644]
include/hw/sparc/sun4m_iommu.h [new file with mode: 0644]
include/hw/sparc/sun4u_iommu.h [new file with mode: 0644]
include/hw/ssi/aspeed_smc.h [new file with mode: 0644]
include/hw/ssi/ibex_spi_host.h [new file with mode: 0644]
include/hw/ssi/imx_spi.h [new file with mode: 0644]
include/hw/ssi/mss-spi.h [new file with mode: 0644]
include/hw/ssi/npcm7xx_fiu.h [new file with mode: 0644]
include/hw/ssi/npcm_pspi.h [new file with mode: 0644]
include/hw/ssi/pl022.h [new file with mode: 0644]
include/hw/ssi/sifive_spi.h [new file with mode: 0644]
include/hw/ssi/ssi.h [new file with mode: 0644]
include/hw/ssi/stm32f2xx_spi.h [new file with mode: 0644]
include/hw/ssi/xilinx_spips.h [new file with mode: 0644]
include/hw/ssi/xlnx-versal-ospi.h [new file with mode: 0644]
include/hw/stream.h
include/hw/timer/a9gtimer.h [new file with mode: 0644]
include/hw/timer/allwinner-a10-pit.h [new file with mode: 0644]
include/hw/timer/arm_mptimer.h [new file with mode: 0644]
include/hw/timer/armv7m_systick.h [new file with mode: 0644]
include/hw/timer/aspeed_timer.h [new file with mode: 0644]
include/hw/timer/avr_timer16.h [new file with mode: 0644]
include/hw/timer/bcm2835_systmr.h [new file with mode: 0644]
include/hw/timer/cadence_ttc.h [new file with mode: 0644]
include/hw/timer/cmsdk-apb-dualtimer.h [new file with mode: 0644]
include/hw/timer/cmsdk-apb-timer.h [new file with mode: 0644]
include/hw/timer/digic-timer.h [new file with mode: 0644]
include/hw/timer/hpet.h [new file with mode: 0644]
include/hw/timer/i8254.h [new file with mode: 0644]
include/hw/timer/i8254_internal.h [new file with mode: 0644]
include/hw/timer/ibex_timer.h [new file with mode: 0644]
include/hw/timer/imx_epit.h [new file with mode: 0644]
include/hw/timer/imx_gpt.h [new file with mode: 0644]
include/hw/timer/mips_gictimer.h [new file with mode: 0644]
include/hw/timer/mss-timer.h [new file with mode: 0644]
include/hw/timer/npcm7xx_timer.h [new file with mode: 0644]
include/hw/timer/nrf51_timer.h [new file with mode: 0644]
include/hw/timer/renesas_cmt.h [new file with mode: 0644]
include/hw/timer/renesas_tmr.h [new file with mode: 0644]
include/hw/timer/sifive_pwm.h [new file with mode: 0644]
include/hw/timer/sse-counter.h [new file with mode: 0644]
include/hw/timer/sse-timer.h [new file with mode: 0644]
include/hw/timer/stellaris-gptm.h [new file with mode: 0644]
include/hw/timer/stm32f2xx_timer.h [new file with mode: 0644]
include/hw/timer/tmu012.h [new file with mode: 0644]
include/hw/tricore/tc27x_soc.h [new file with mode: 0644]
include/hw/tricore/triboard.h [new file with mode: 0644]
include/hw/tricore/tricore.h [new file with mode: 0644]
include/hw/tricore/tricore_testdevice.h [new file with mode: 0644]
include/hw/usb.h
include/hw/usb/chipidea.h [new file with mode: 0644]
include/hw/usb/dwc2-regs.h [new file with mode: 0644]
include/hw/usb/ehci-regs.h [new file with mode: 0644]
include/hw/usb/hcd-dwc3.h [new file with mode: 0644]
include/hw/usb/hcd-musb.h [new file with mode: 0644]
include/hw/usb/hid.h [new file with mode: 0644]
include/hw/usb/imx-usb-phy.h [new file with mode: 0644]
include/hw/usb/msd.h [new file with mode: 0644]
include/hw/usb/uhci-regs.h [new file with mode: 0644]
include/hw/usb/xhci.h [new file with mode: 0644]
include/hw/usb/xlnx-usb-subsystem.h [new file with mode: 0644]
include/hw/usb/xlnx-versal-usb2-ctrl-regs.h [new file with mode: 0644]
include/hw/vfio/vfio-amd-xgbe.h [new file with mode: 0644]
include/hw/vfio/vfio-calxeda-xgmac.h [new file with mode: 0644]
include/hw/vfio/vfio-common.h [new file with mode: 0644]
include/hw/vfio/vfio-platform.h [new file with mode: 0644]
include/hw/virtio/vdpa-dev.h [new file with mode: 0644]
include/hw/virtio/vhost-backend.h [new file with mode: 0644]
include/hw/virtio/vhost-scsi-common.h [new file with mode: 0644]
include/hw/virtio/vhost-scsi.h [new file with mode: 0644]
include/hw/virtio/vhost-user-blk.h [new file with mode: 0644]
include/hw/virtio/vhost-user-device.h [new file with mode: 0644]
include/hw/virtio/vhost-user-fs.h [new file with mode: 0644]
include/hw/virtio/vhost-user-gpio.h [new file with mode: 0644]
include/hw/virtio/vhost-user-i2c.h [new file with mode: 0644]
include/hw/virtio/vhost-user-rng.h [new file with mode: 0644]
include/hw/virtio/vhost-user-scmi.h [new file with mode: 0644]
include/hw/virtio/vhost-user-scsi.h [new file with mode: 0644]
include/hw/virtio/vhost-user-vsock.h [new file with mode: 0644]
include/hw/virtio/vhost-user.h [new file with mode: 0644]
include/hw/virtio/vhost-vdpa.h [new file with mode: 0644]
include/hw/virtio/vhost-vsock-common.h [new file with mode: 0644]
include/hw/virtio/vhost-vsock.h [new file with mode: 0644]
include/hw/virtio/vhost.h [new file with mode: 0644]
include/hw/virtio/virtio-access.h [new file with mode: 0644]
include/hw/virtio/virtio-balloon.h [new file with mode: 0644]
include/hw/virtio/virtio-blk-common.h [new file with mode: 0644]
include/hw/virtio/virtio-blk.h [new file with mode: 0644]
include/hw/virtio/virtio-bus.h [new file with mode: 0644]
include/hw/virtio/virtio-crypto.h [new file with mode: 0644]
include/hw/virtio/virtio-dmabuf.h [new file with mode: 0644]
include/hw/virtio/virtio-gpu-bswap.h [new file with mode: 0644]
include/hw/virtio/virtio-gpu-pci.h [new file with mode: 0644]
include/hw/virtio/virtio-gpu-pixman.h [new file with mode: 0644]
include/hw/virtio/virtio-gpu.h [new file with mode: 0644]
include/hw/virtio/virtio-input.h [new file with mode: 0644]
include/hw/virtio/virtio-iommu.h [new file with mode: 0644]
include/hw/virtio/virtio-md-pci.h [new file with mode: 0644]
include/hw/virtio/virtio-mem.h [new file with mode: 0644]
include/hw/virtio/virtio-mmio.h [new file with mode: 0644]
include/hw/virtio/virtio-net.h [new file with mode: 0644]
include/hw/virtio/virtio-pci.h [new file with mode: 0644]
include/hw/virtio/virtio-pmem.h [new file with mode: 0644]
include/hw/virtio/virtio-rng.h [new file with mode: 0644]
include/hw/virtio/virtio-scsi.h [new file with mode: 0644]
include/hw/virtio/virtio-serial.h [new file with mode: 0644]
include/hw/virtio/virtio.h [new file with mode: 0644]
include/hw/watchdog/allwinner-wdt.h [new file with mode: 0644]
include/hw/watchdog/cmsdk-apb-watchdog.h [new file with mode: 0644]
include/hw/watchdog/sbsa_gwdt.h [new file with mode: 0644]
include/hw/watchdog/wdt_aspeed.h [new file with mode: 0644]
include/hw/watchdog/wdt_diag288.h [new file with mode: 0644]
include/hw/watchdog/wdt_imx2.h [new file with mode: 0644]
include/hw/xen/arch_hvm.h [new file with mode: 0644]
include/hw/xen/interface/arch-arm.h [new file with mode: 0644]
include/hw/xen/interface/arch-x86/cpuid.h [new file with mode: 0644]
include/hw/xen/interface/arch-x86/xen-x86_32.h [new file with mode: 0644]
include/hw/xen/interface/arch-x86/xen-x86_64.h [new file with mode: 0644]
include/hw/xen/interface/arch-x86/xen.h [new file with mode: 0644]
include/hw/xen/interface/event_channel.h [new file with mode: 0644]
include/hw/xen/interface/features.h [new file with mode: 0644]
include/hw/xen/interface/grant_table.h [new file with mode: 0644]
include/hw/xen/interface/hvm/hvm_op.h [new file with mode: 0644]
include/hw/xen/interface/hvm/params.h [new file with mode: 0644]
include/hw/xen/interface/io/blkif.h [new file with mode: 0644]
include/hw/xen/interface/io/console.h [new file with mode: 0644]
include/hw/xen/interface/io/fbif.h [new file with mode: 0644]
include/hw/xen/interface/io/kbdif.h [new file with mode: 0644]
include/hw/xen/interface/io/netif.h [new file with mode: 0644]
include/hw/xen/interface/io/protocols.h [new file with mode: 0644]
include/hw/xen/interface/io/ring.h [new file with mode: 0644]
include/hw/xen/interface/io/usbif.h [new file with mode: 0644]
include/hw/xen/interface/io/xenbus.h [new file with mode: 0644]
include/hw/xen/interface/io/xs_wire.h [new file with mode: 0644]
include/hw/xen/interface/memory.h [new file with mode: 0644]
include/hw/xen/interface/physdev.h [new file with mode: 0644]
include/hw/xen/interface/sched.h [new file with mode: 0644]
include/hw/xen/interface/trace.h [new file with mode: 0644]
include/hw/xen/interface/vcpu.h [new file with mode: 0644]
include/hw/xen/interface/version.h [new file with mode: 0644]
include/hw/xen/interface/xen-compat.h [new file with mode: 0644]
include/hw/xen/interface/xen.h [new file with mode: 0644]
include/hw/xen/start_info.h [new file with mode: 0644]
include/hw/xen/xen-backend.h [new file with mode: 0644]
include/hw/xen/xen-block.h [new file with mode: 0644]
include/hw/xen/xen-bus-helper.h [new file with mode: 0644]
include/hw/xen/xen-bus.h [new file with mode: 0644]
include/hw/xen/xen-hvm-common.h [new file with mode: 0644]
include/hw/xen/xen-legacy-backend.h [new file with mode: 0644]
include/hw/xen/xen-x86.h [new file with mode: 0644]
include/hw/xen/xen.h [new file with mode: 0644]
include/hw/xen/xen_backend_ops.h [new file with mode: 0644]
include/hw/xen/xen_native.h [new file with mode: 0644]
include/hw/xen/xen_pvdev.h [new file with mode: 0644]
include/hw/xtensa/mx_pic.h [new file with mode: 0644]
include/hw/xtensa/xtensa-isa.h [new file with mode: 0644]
include/io/channel-command.h
include/io/channel-null.h [new file with mode: 0644]
include/io/channel-socket.h
include/io/channel-tls.h
include/io/channel-util.h
include/io/channel.h
include/io/task.h
include/libdecnumber/dconfig.h [new file with mode: 0644]
include/libdecnumber/decContext.h [new file with mode: 0644]
include/libdecnumber/decDPD.h [new file with mode: 0644]
include/libdecnumber/decNumber.h [new file with mode: 0644]
include/libdecnumber/decNumberLocal.h [new file with mode: 0644]
include/libdecnumber/dpd/decimal128.h [new file with mode: 0644]
include/libdecnumber/dpd/decimal128Local.h [new file with mode: 0644]
include/libdecnumber/dpd/decimal32.h [new file with mode: 0644]
include/libdecnumber/dpd/decimal64.h [new file with mode: 0644]
include/migration/blocker.h
include/migration/colo.h
include/migration/global_state.h
include/migration/misc.h
include/migration/qemu-file-types.h
include/migration/register.h
include/migration/snapshot.h
include/migration/vmstate.h
include/monitor/hmp-target.h
include/monitor/hmp.h
include/monitor/monitor.h
include/monitor/qdev.h
include/monitor/qmp-helpers.h [new file with mode: 0644]
include/net/checksum.h
include/net/eth.h
include/net/net.h
include/net/vhost-user.h
include/net/vhost-vdpa.h
include/net/vhost_net.h
include/qapi/compat-policy.h [new file with mode: 0644]
include/qapi/error.h
include/qapi/forward-visitor.h [new file with mode: 0644]
include/qapi/qmp/dispatch.h
include/qapi/qmp/json-writer.h [new file with mode: 0644]
include/qapi/qmp/qbool.h
include/qapi/qmp/qdict.h
include/qapi/qmp/qerror.h
include/qapi/qmp/qjson.h
include/qapi/qmp/qlist.h
include/qapi/qmp/qnull.h
include/qapi/qmp/qnum.h
include/qapi/qmp/qobject.h
include/qapi/qmp/qstring.h
include/qapi/type-helpers.h [new file with mode: 0644]
include/qapi/util.h
include/qapi/visitor-impl.h
include/qapi/visitor.h
include/qemu-common.h [deleted file]
include/qemu-main.h [new file with mode: 0644]
include/qemu/accel.h [new file with mode: 0644]
include/qemu/async-teardown.h [new file with mode: 0644]
include/qemu/atomic.h
include/qemu/atomic128.h
include/qemu/bitmap.h
include/qemu/bitops.h
include/qemu/bswap.h
include/qemu/buffer.h
include/qemu/cacheflush.h [new file with mode: 0644]
include/qemu/cacheinfo.h [new file with mode: 0644]
include/qemu/clang-tsa.h [new file with mode: 0644]
include/qemu/co-shared-resource.h
include/qemu/compiler.h
include/qemu/config-file.h
include/qemu/coroutine-core.h [new file with mode: 0644]
include/qemu/coroutine-tls.h [new file with mode: 0644]
include/qemu/coroutine.h
include/qemu/cpu-float.h [new file with mode: 0644]
include/qemu/cpuid.h
include/qemu/crc-ccitt.h [new file with mode: 0644]
include/qemu/crc32c.h
include/qemu/cutils.h
include/qemu/datadir.h [new file with mode: 0644]
include/qemu/dbus.h
include/qemu/defer-call.h [new file with mode: 0644]
include/qemu/envlist.h
include/qemu/error-report.h
include/qemu/event_notifier.h
include/qemu/fifo8.h
include/qemu/guest-random.h
include/qemu/hbitmap.h
include/qemu/help-texts.h [new file with mode: 0644]
include/qemu/host-utils.h
include/qemu/hw-version.h [new file with mode: 0644]
include/qemu/id.h
include/qemu/int128.h
include/qemu/interval-tree.h [new file with mode: 0644]
include/qemu/iov.h
include/qemu/iova-tree.h
include/qemu/job.h
include/qemu/keyval.h [new file with mode: 0644]
include/qemu/lockable.h
include/qemu/log-for-trace.h
include/qemu/log.h
include/qemu/madvise.h [new file with mode: 0644]
include/qemu/main-loop.h
include/qemu/memalign.h [new file with mode: 0644]
include/qemu/mmap-alloc.h
include/qemu/module.h
include/qemu/mprotect.h [new file with mode: 0644]
include/qemu/nvdimm-utils.h
include/qemu/option.h
include/qemu/osdep.h
include/qemu/plugin-event.h [new file with mode: 0644]
include/qemu/plugin-memory.h
include/qemu/plugin.h
include/qemu/processor.h
include/qemu/progress_meter.h
include/qemu/qemu-plugin.h
include/qemu/qemu-print.h
include/qemu/qemu-progress.h [new file with mode: 0644]
include/qemu/qht.h
include/qemu/qtree.h
include/qemu/range.h
include/qemu/ratelimit.h
include/qemu/rcu.h
include/qemu/rcu_queue.h
include/qemu/readline.h
include/qemu/reserved-region.h [new file with mode: 0644]
include/qemu/selfmap.h
include/qemu/sockets.h
include/qemu/stats64.h
include/qemu/sys_membarrier.h
include/qemu/thread-context.h [new file with mode: 0644]
include/qemu/thread-posix.h
include/qemu/thread-win32.h
include/qemu/thread.h
include/qemu/throttle.h
include/qemu/timer.h
include/qemu/transactions.h [new file with mode: 0644]
include/qemu/typedefs.h
include/qemu/uri.h
include/qemu/userfaultfd.h [new file with mode: 0644]
include/qemu/uuid.h
include/qemu/vfio-helpers.h
include/qemu/vhost-user-server.h
include/qemu/win_dump_defs.h
include/qemu/xattr.h
include/qemu/xxhash.h
include/qemu/yank.h [new file with mode: 0644]
include/qom/object.h
include/qom/object_interfaces.h
include/scsi/constants.h
include/scsi/pr-manager.h
include/scsi/utils.h
include/semihosting/common-semi.h [new file with mode: 0644]
include/semihosting/console.h [new file with mode: 0644]
include/semihosting/guestfd.h [new file with mode: 0644]
include/semihosting/semihost.h [new file with mode: 0644]
include/semihosting/syscalls.h [new file with mode: 0644]
include/semihosting/uaccess.h [new file with mode: 0644]
include/standard-headers/asm-m68k/bootinfo-mac.h [new file with mode: 0644]
include/standard-headers/asm-m68k/bootinfo-virt.h [new file with mode: 0644]
include/standard-headers/asm-m68k/bootinfo.h [new file with mode: 0644]
include/standard-headers/asm-s390/virtio-ccw.h [new file with mode: 0644]
include/standard-headers/asm-x86/bootparam.h [new file with mode: 0644]
include/standard-headers/asm-x86/kvm_para.h [new file with mode: 0644]
include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h [new file with mode: 0644]
include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h [new file with mode: 0644]
include/standard-headers/drm/drm_fourcc.h [new file with mode: 0644]
include/standard-headers/linux/const.h
include/standard-headers/linux/ethtool.h
include/standard-headers/linux/fuse.h
include/standard-headers/linux/input-event-codes.h
include/standard-headers/linux/input.h
include/standard-headers/linux/pci_regs.h
include/standard-headers/linux/pvpanic.h [new file with mode: 0644]
include/standard-headers/linux/udmabuf.h [new file with mode: 0644]
include/standard-headers/linux/vhost_types.h
include/standard-headers/linux/virtio_9p.h
include/standard-headers/linux/virtio_blk.h
include/standard-headers/linux/virtio_bt.h [new file with mode: 0644]
include/standard-headers/linux/virtio_config.h
include/standard-headers/linux/virtio_crypto.h
include/standard-headers/linux/virtio_gpio.h [new file with mode: 0644]
include/standard-headers/linux/virtio_gpu.h
include/standard-headers/linux/virtio_i2c.h [new file with mode: 0644]
include/standard-headers/linux/virtio_ids.h
include/standard-headers/linux/virtio_iommu.h
include/standard-headers/linux/virtio_mem.h
include/standard-headers/linux/virtio_net.h
include/standard-headers/linux/virtio_pci.h
include/standard-headers/linux/virtio_pcidev.h [new file with mode: 0644]
include/standard-headers/linux/virtio_ring.h
include/standard-headers/linux/virtio_scmi.h [new file with mode: 0644]
include/standard-headers/linux/virtio_snd.h [new file with mode: 0644]
include/standard-headers/linux/virtio_vsock.h
include/standard-headers/rdma/vmw_pvrdma-abi.h [new file with mode: 0644]
include/sysemu/accel-blocker.h [new file with mode: 0644]
include/sysemu/accel-ops.h [new file with mode: 0644]
include/sysemu/accel.h [deleted file]
include/sysemu/arch_init.h
include/sysemu/block-backend-common.h [new file with mode: 0644]
include/sysemu/block-backend-global-state.h [new file with mode: 0644]
include/sysemu/block-backend-io.h [new file with mode: 0644]
include/sysemu/block-backend.h
include/sysemu/block-ram-registrar.h [new file with mode: 0644]
include/sysemu/blockdev.h
include/sysemu/cpu-timers-internal.h [new file with mode: 0644]
include/sysemu/cpu-timers.h
include/sysemu/cpus.h
include/sysemu/cryptodev-vhost.h
include/sysemu/cryptodev.h
include/sysemu/device_tree.h
include/sysemu/dirtylimit.h [new file with mode: 0644]
include/sysemu/dirtyrate.h [new file with mode: 0644]
include/sysemu/dma.h
include/sysemu/dump-arch.h
include/sysemu/dump.h
include/sysemu/event-loop-base.h [new file with mode: 0644]
include/sysemu/hax.h [deleted file]
include/sysemu/hostmem.h
include/sysemu/hvf.h
include/sysemu/hvf_int.h [new file with mode: 0644]
include/sysemu/hw_accel.h
include/sysemu/iothread.h
include/sysemu/kvm.h
include/sysemu/kvm_int.h
include/sysemu/kvm_xen.h [new file with mode: 0644]
include/sysemu/memory_mapping.h
include/sysemu/nvmm.h [new file with mode: 0644]
include/sysemu/os-posix.h
include/sysemu/os-win32.h
include/sysemu/qtest.h
include/sysemu/replay.h
include/sysemu/reset.h
include/sysemu/rtc.h [new file with mode: 0644]
include/sysemu/runstate-action.h [new file with mode: 0644]
include/sysemu/runstate.h
include/sysemu/sev.h [deleted file]
include/sysemu/stats.h [new file with mode: 0644]
include/sysemu/sysemu.h
include/sysemu/tcg.h
include/sysemu/tpm.h
include/sysemu/tpm_backend.h
include/sysemu/watchdog.h
include/sysemu/whpx.h
include/sysemu/xen.h
include/tcg/debug-assert.h [new file with mode: 0644]
include/tcg/helper-info.h [new file with mode: 0644]
include/tcg/insn-start-words.h [new file with mode: 0644]
include/tcg/oversized-guest.h [new file with mode: 0644]
include/tcg/startup.h [new file with mode: 0644]
include/tcg/tcg-cond.h [new file with mode: 0644]
include/tcg/tcg-gvec-desc.h [new file with mode: 0644]
include/tcg/tcg-ldst.h [new file with mode: 0644]
include/tcg/tcg-mo.h [new file with mode: 0644]
include/tcg/tcg-op-common.h [new file with mode: 0644]
include/tcg/tcg-op-gvec-common.h [new file with mode: 0644]
include/tcg/tcg-op-gvec.h [new file with mode: 0644]
include/tcg/tcg-op.h [new file with mode: 0644]
include/tcg/tcg-opc.h [new file with mode: 0644]
include/tcg/tcg-temp-internal.h [new file with mode: 0644]
include/tcg/tcg.h [new file with mode: 0644]
include/trace-tcg.h [deleted file]
include/ui/clipboard.h [new file with mode: 0644]
include/ui/console.h
include/ui/dbus-display.h [new file with mode: 0644]
include/ui/dbus-module.h [new file with mode: 0644]
include/ui/egl-context.h
include/ui/egl-helpers.h
include/ui/gtk.h
include/ui/input.h
include/ui/kbd-state.h
include/ui/pixman-minimal.h [new file with mode: 0644]
include/ui/qemu-pixman.h
include/ui/qemu-spice.h
include/ui/rect.h [new file with mode: 0644]
include/ui/sdl2.h
include/ui/spice-display.h
include/ui/surface.h [new file with mode: 0644]
include/user/safe-syscall.h [new file with mode: 0644]
include/user/syscall-trace.h [new file with mode: 0644]
io/channel-buffer.c
io/channel-command.c
io/channel-file.c
io/channel-null.c [new file with mode: 0644]
io/channel-socket.c
io/channel-tls.c
io/channel-util.c
io/channel-watch.c
io/channel-websock.c
io/channel.c
io/dns-resolver.c
io/meson.build
io/net-listener.c
io/trace-events
iothread.c
job-qmp.c
job.c
meson.build
meson_options.txt
monitor/fds.c [new file with mode: 0644]
monitor/hmp-cmds-target.c [new file with mode: 0644]
monitor/hmp-cmds.c
monitor/hmp-target.c [new file with mode: 0644]
monitor/hmp.c
monitor/meson.build
monitor/misc.c [deleted file]
monitor/monitor-internal.h
monitor/monitor.c
monitor/qmp-cmds-control.c
monitor/qmp-cmds.c
monitor/qmp.c
monitor/trace-events
nbd/client-connection.c [new file with mode: 0644]
nbd/client.c
nbd/common.c
nbd/meson.build
nbd/nbd-internal.h
nbd/server.c
nbd/trace-events
os-posix.c
packaging/bridge.conf [changed mode: 0755->0644]
packaging/qemu-tools.spec [changed mode: 0755->0644]
python/.gitignore [new file with mode: 0644]
python/MANIFEST.in [new file with mode: 0644]
python/Makefile [new file with mode: 0644]
python/PACKAGE.rst [new file with mode: 0644]
python/README.rst [new file with mode: 0644]
python/VERSION [new file with mode: 0644]
python/avocado.cfg [new file with mode: 0644]
python/qemu/README.rst [new file with mode: 0644]
python/qemu/machine/README.rst [new file with mode: 0644]
python/qemu/machine/__init__.py [new file with mode: 0644]
python/qemu/machine/console_socket.py [new file with mode: 0644]
python/qemu/machine/machine.py [new file with mode: 0644]
python/qemu/machine/py.typed [new file with mode: 0644]
python/qemu/machine/qtest.py [new file with mode: 0644]
python/qemu/qmp/__init__.py [new file with mode: 0644]
python/qemu/qmp/error.py [new file with mode: 0644]
python/qemu/qmp/events.py [new file with mode: 0644]
python/qemu/qmp/legacy.py [new file with mode: 0644]
python/qemu/qmp/message.py [new file with mode: 0644]
python/qemu/qmp/models.py [new file with mode: 0644]
python/qemu/qmp/protocol.py [new file with mode: 0644]
python/qemu/qmp/py.typed [new file with mode: 0644]
python/qemu/qmp/qmp_client.py [new file with mode: 0644]
python/qemu/qmp/qmp_shell.py [new file with mode: 0644]
python/qemu/qmp/qmp_tui.py [new file with mode: 0644]
python/qemu/qmp/util.py [new file with mode: 0644]
python/qemu/utils/README.rst [new file with mode: 0644]
python/qemu/utils/__init__.py [new file with mode: 0644]
python/qemu/utils/accel.py [new file with mode: 0644]
python/qemu/utils/py.typed [new file with mode: 0644]
python/qemu/utils/qemu_ga_client.py [new file with mode: 0644]
python/qemu/utils/qom.py [new file with mode: 0644]
python/qemu/utils/qom_common.py [new file with mode: 0644]
python/qemu/utils/qom_fuse.py [new file with mode: 0644]
python/scripts/mkvenv.py [new file with mode: 0644]
python/scripts/vendor.py [new file with mode: 0644]
python/setup.cfg [new file with mode: 0644]
python/setup.py [new file with mode: 0644]
python/tests/flake8.sh [new file with mode: 0755]
python/tests/iotests-mypy.sh [new file with mode: 0755]
python/tests/iotests-pylint.sh [new file with mode: 0755]
python/tests/isort.sh [new file with mode: 0755]
python/tests/minreqs.txt [new file with mode: 0644]
python/tests/mypy.sh [new file with mode: 0755]
python/tests/protocol.py [new file with mode: 0644]
python/tests/pylint.sh [new file with mode: 0755]
python/wheels/meson-1.2.3-py3-none-any.whl [new file with mode: 0644]
python/wheels/tomli-2.0.1-py3-none-any.whl [new file with mode: 0644]
pythondeps.toml [new file with mode: 0644]
qapi/acpi.json
qapi/audio.json
qapi/authz.json
qapi/block-core.json
qapi/block-export.json
qapi/block.json
qapi/char.json
qapi/common.json
qapi/compat.json [new file with mode: 0644]
qapi/control.json
qapi/crypto.json
qapi/cryptodev.json [new file with mode: 0644]
qapi/cxl.json [new file with mode: 0644]
qapi/dump.json
qapi/error.json
qapi/introspect.json
qapi/job.json
qapi/machine-common.json [new file with mode: 0644]
qapi/machine-target.json
qapi/machine.json
qapi/meson.build
qapi/migration.json
qapi/misc-target.json
qapi/misc.json
qapi/net.json
qapi/opts-visitor.c
qapi/pci.json
qapi/pragma.json
qapi/qapi-forward-visitor.c [new file with mode: 0644]
qapi/qapi-schema.json
qapi/qapi-type-helpers.c [new file with mode: 0644]
qapi/qapi-util.c
qapi/qapi-visit-core.c
qapi/qdev.json
qapi/qmp-dispatch.c
qapi/qmp-event.c
qapi/qmp-registry.c
qapi/qobject-input-visitor.c
qapi/qobject-output-visitor.c
qapi/qom.json
qapi/rdma.json
qapi/replay.json
qapi/rocker.json
qapi/run-state.json
qapi/sockets.json
qapi/stats.json [new file with mode: 0644]
qapi/string-output-visitor.c
qapi/tpm.json
qapi/trace-events
qapi/trace.json
qapi/transaction.json
qapi/ui.json
qapi/virtio.json [new file with mode: 0644]
qapi/yank.json [new file with mode: 0644]
qemu-img-cmds.hx
qemu-img.c
qemu-io-cmds.c
qemu-io.c
qemu-nbd.c
qemu-options-wrapper.h [deleted file]
qemu-options.h [deleted file]
qemu-options.hx
qobject/block-qdict.c
qobject/json-lexer.c
qobject/json-parser.c
qobject/json-writer.c [new file with mode: 0644]
qobject/meson.build
qobject/qbool.c
qobject/qdict.c
qobject/qjson.c
qobject/qlist.c
qobject/qnull.c
qobject/qnum.c
qobject/qobject-internal.h [new file with mode: 0644]
qobject/qobject.c
qobject/qstring.c
qom/meson.build
qom/object.c
qom/object_interfaces.c
qom/qom-hmp-cmds.c
qom/qom-qmp-cmds.c
qom/trace-events
scripts/analyse-9p-simpletrace.py [changed mode: 0755->0644]
scripts/analyse-locks-simpletrace.py [changed mode: 0755->0644]
scripts/analyze-inclusions
scripts/analyze-migration.py [changed mode: 0755->0644]
scripts/archive-source.sh
scripts/block-coroutine-wrapper.py
scripts/checkpatch.pl [changed mode: 0755->0644]
scripts/ci/coverage-summary.sh [new file with mode: 0755]
scripts/ci/gitlab-kubernetes-runners/values.yaml [new file with mode: 0644]
scripts/ci/gitlab-pipeline-status [changed mode: 0755->0644]
scripts/ci/org.centos/stream/8/build-environment.yml [new file with mode: 0644]
scripts/ci/org.centos/stream/8/x86_64/configure [new file with mode: 0755]
scripts/ci/org.centos/stream/8/x86_64/test-avocado [new file with mode: 0644]
scripts/ci/org.centos/stream/README [new file with mode: 0644]
scripts/ci/setup/.gitignore [new file with mode: 0644]
scripts/ci/setup/build-environment.yml [new file with mode: 0644]
scripts/ci/setup/gitlab-runner.yml [new file with mode: 0644]
scripts/ci/setup/inventory.template [new file with mode: 0644]
scripts/ci/setup/vars.yml.template [new file with mode: 0644]
scripts/clean-header-guards.pl [changed mode: 0755->0644]
scripts/clean-includes [changed mode: 0755->0644]
scripts/cleanup-trace-events.pl [changed mode: 0755->0644]
scripts/cocci-macro-file.h
scripts/coccinelle/memory-region-housekeeping.cocci
scripts/coccinelle/return_directly.cocci
scripts/coccinelle/timer-del-timer-free.cocci [new file with mode: 0644]
scripts/coccinelle/use-g_new-etc.cocci [new file with mode: 0644]
scripts/codeconverter/codeconverter/qom_macros.py
scripts/codeconverter/converter.py [changed mode: 0755->0644]
scripts/coverage/compare_gcov_json.py [new file with mode: 0644]
scripts/coverity-model.c [deleted file]
scripts/coverity-scan/COMPONENTS.md [new file with mode: 0644]
scripts/coverity-scan/coverity-scan.docker
scripts/coverity-scan/model.c [new file with mode: 0644]
scripts/coverity-scan/run-coverity-scan [changed mode: 0755->0644]
scripts/cpu-x86-uarch-abi.py [new file with mode: 0644]
scripts/decodetree.py
scripts/device-crash-test [changed mode: 0755->0644]
scripts/disas-objdump.pl [changed mode: 0755->0644]
scripts/entitlement.sh [new file with mode: 0755]
scripts/extract-vsssdk-headers [changed mode: 0755->0644]
scripts/feature_to_c.py [new file with mode: 0644]
scripts/feature_to_c.sh [deleted file]
scripts/fix-multiline-comments.sh
scripts/gensyscalls.sh
scripts/get_maintainer.pl [changed mode: 0755->0644]
scripts/git-submodule.sh
scripts/git.orderfile
scripts/hxtool [changed mode: 0755->0644]
scripts/hxtool-conv.pl [deleted file]
scripts/kernel-doc [changed mode: 0755->0644]
scripts/kvm/kvm_flightrecorder [changed mode: 0755->0644]
scripts/kvm/vmxcap [changed mode: 0755->0644]
scripts/make-config-poison.sh [new file with mode: 0755]
scripts/make-release [changed mode: 0755->0644]
scripts/meson-buildoptions.py [new file with mode: 0644]
scripts/meson-buildoptions.sh [new file with mode: 0755]
scripts/meson.build
scripts/modinfo-collect.py [new file with mode: 0644]
scripts/modinfo-generate.py [new file with mode: 0644]
scripts/mtest2make.py
scripts/nsis.py
scripts/oss-fuzz/build.sh
scripts/oss-fuzz/instrumentation-filter-template [new file with mode: 0644]
scripts/oss-fuzz/lsan_suppressions.txt [new file with mode: 0644]
scripts/oss-fuzz/minimize_qtest_trace.py [changed mode: 0755->0644]
scripts/oss-fuzz/output_reproducer.py [new file with mode: 0644]
scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py [changed mode: 0755->0644]
scripts/performance/dissect.py [changed mode: 0755->0644]
scripts/performance/topN_callgrind.py [changed mode: 0755->0644]
scripts/performance/topN_perf.py [changed mode: 0755->0644]
scripts/probe-gdb-support.py [new file with mode: 0644]
scripts/python_qmp_updater.py [new file with mode: 0644]
scripts/qapi/.flake8
scripts/qapi/commands.py
scripts/qapi/common.py
scripts/qapi/error.py
scripts/qapi/events.py
scripts/qapi/expr.py
scripts/qapi/gen.py
scripts/qapi/introspect.py
scripts/qapi/main.py
scripts/qapi/mypy.ini
scripts/qapi/parser.py
scripts/qapi/pylintrc
scripts/qapi/schema.py
scripts/qapi/source.py
scripts/qapi/types.py
scripts/qapi/visit.py
scripts/qemu-binfmt-conf.sh
scripts/qemu-gdb.py
scripts/qemu-guest-agent/fsfreeze-hook [changed mode: 0755->0644]
scripts/qemu-guest-agent/fsfreeze-hook.d/mysql-flush.sh.sample [changed mode: 0755->0644]
scripts/qemu-stamp.py [new file with mode: 0644]
scripts/qemu-trace-stap [changed mode: 0755->0644]
scripts/qemugdb/coroutine.py
scripts/qmp/qemu-ga-client [changed mode: 0755->0644]
scripts/qmp/qmp [changed mode: 0755->0644]
scripts/qmp/qmp-shell [changed mode: 0755->0644]
scripts/qmp/qmp-shell-wrap [new file with mode: 0644]
scripts/qmp/qom-fuse [changed mode: 0755->0644]
scripts/qmp/qom-get [changed mode: 0755->0644]
scripts/qmp/qom-list [changed mode: 0755->0644]
scripts/qmp/qom-set [changed mode: 0755->0644]
scripts/qmp/qom-tree [changed mode: 0755->0644]
scripts/qom-cast-macro-clean-cocci-gen.py [new file with mode: 0644]
scripts/render_block_graph.py [changed mode: 0755->0644]
scripts/replay-dump.py [changed mode: 0755->0644]
scripts/shaderinclude.pl [deleted file]
scripts/shaderinclude.py [new file with mode: 0644]
scripts/show-fixed-bugs.sh [deleted file]
scripts/simplebench/bench-backup.py [new file with mode: 0644]
scripts/simplebench/bench-example.py
scripts/simplebench/bench_block_job.py [changed mode: 0755->0644]
scripts/simplebench/bench_prealloc.py [new file with mode: 0644]
scripts/simplebench/bench_write_req.py [changed mode: 0755->0644]
scripts/simplebench/img_bench_templater.py [new file with mode: 0644]
scripts/simplebench/results_to_text.py [new file with mode: 0644]
scripts/simplebench/simplebench.py
scripts/simplebench/table_templater.py [new file with mode: 0644]
scripts/simpletrace.py [changed mode: 0755->0644]
scripts/switch-timer-api [deleted file]
scripts/symlink-install-tree.py [new file with mode: 0644]
scripts/tap-driver.pl [deleted file]
scripts/tap-merge.pl [deleted file]
scripts/test-driver.py [deleted file]
scripts/tracetool.py [changed mode: 0755->0644]
scripts/tracetool/__init__.py
scripts/tracetool/backend/ftrace.py
scripts/tracetool/backend/log.py
scripts/tracetool/backend/syslog.py
scripts/tracetool/format/c.py
scripts/tracetool/format/h.py
scripts/tracetool/format/log_stap.py
scripts/tracetool/format/tcg_h.py [deleted file]
scripts/tracetool/format/tcg_helper_c.py [deleted file]
scripts/tracetool/format/tcg_helper_h.py [deleted file]
scripts/tracetool/format/tcg_helper_wrapper_h.py [deleted file]
scripts/tracetool/format/ust_events_h.py
scripts/tracetool/transform.py [deleted file]
scripts/tracetool/vcpu.py
scripts/travis/coverage-summary.sh [deleted file]
scripts/u2f-setup-gen.py [changed mode: 0755->0644]
scripts/update-linux-headers.sh
scripts/update-mips-syscall-args.sh
scripts/userfaultfd-wrlat.py [new file with mode: 0644]
scripts/vmstate-static-checker.py [changed mode: 0755->0644]
scripts/xen-detect.c [new file with mode: 0644]
scripts/xml-preprocess-test.py [new file with mode: 0644]
scripts/xml-preprocess.py [new file with mode: 0644]
scsi/pr-manager-helper.c
scsi/pr-manager.c
scsi/qemu-pr-helper.c
scsi/trace-events
scsi/utils.c
stubs/arch_type.c [deleted file]
stubs/colo-compare.c [new file with mode: 0644]
stubs/colo.c [new file with mode: 0644]
stubs/error-printf.c
stubs/gdbstub.c
stubs/get-vm-name.c
stubs/graph-lock.c [new file with mode: 0644]
stubs/icount.c
stubs/iothread-lock-block.c [new file with mode: 0644]
stubs/iothread-lock.c
stubs/iothread.c [deleted file]
stubs/machine-init-done.c [deleted file]
stubs/memory_device.c [new file with mode: 0644]
stubs/meson.build
stubs/migr-blocker.c
stubs/module-opts.c [new file with mode: 0644]
stubs/monitor-core.c
stubs/pci-host-piix.c [deleted file]
stubs/physmem.c [new file with mode: 0644]
stubs/qdev.c [new file with mode: 0644]
stubs/qmp-command-available.c [new file with mode: 0644]
stubs/qmp-quit.c [new file with mode: 0644]
stubs/qmp_memory_device.c [deleted file]
stubs/ramfb.c
stubs/replay-tools.c
stubs/replay.c
stubs/semihost-all.c [new file with mode: 0644]
stubs/semihost.c
stubs/set-fd-handler.c [deleted file]
stubs/tpm.c [deleted file]
stubs/trace-control.c
stubs/usb-dev-stub.c [new file with mode: 0644]
stubs/virtio-md-pci.c [new file with mode: 0644]
stubs/vmgenid.c [deleted file]
stubs/vmstate.c
stubs/xen-hw-stub.c
trace-events
trace/control-internal.h
trace/control-target.c
trace/control-vcpu.h [deleted file]
trace/control.c
trace/control.h
trace/event-internal.h
trace/mem-internal.h [deleted file]
trace/mem.h [deleted file]
trace/meson.build
trace/qmp.c
trace/simple.c
trace/simple.h
trace/trace-hmp-cmds.c [new file with mode: 0644]
util/aio-posix.c
util/aio-posix.h
util/aio-wait.c
util/aio-win32.c
util/async.c
util/atomic64.c
util/bitmap.c
util/bitops.c
util/bufferiszero.c
util/cacheflush.c [new file with mode: 0644]
util/cacheinfo.c [deleted file]
util/compatfd.c
util/coroutine-sigaltstack.c
util/coroutine-ucontext.c
util/coroutine-win32.c [deleted file]
util/coroutine-windows.c [new file with mode: 0644]
util/cpuinfo-aarch64.c [new file with mode: 0644]
util/cpuinfo-i386.c [new file with mode: 0644]
util/cpuinfo-loongarch.c [new file with mode: 0644]
util/cpuinfo-ppc.c [new file with mode: 0644]
util/crc-ccitt.c [new file with mode: 0644]
util/crc32c.c
util/cutils.c
util/defer-call.c [new file with mode: 0644]
util/envlist.c
util/error-report.c [new file with mode: 0644]
util/error.c
util/event_notifier-posix.c
util/event_notifier-win32.c
util/fdmon-epoll.c
util/fdmon-io_uring.c
util/fdmon-poll.c
util/fifo8.c
util/filemonitor-inotify.c
util/guest-random.c
util/hbitmap.c
util/hexdump.c
util/host-utils.c
util/id.c
util/int128.c [new file with mode: 0644]
util/interval-tree.c [new file with mode: 0644]
util/iov.c
util/iova-tree.c
util/keyval.c
util/log.c
util/main-loop.c
util/memalign.c [new file with mode: 0644]
util/meson.build
util/mmap-alloc.c
util/module.c
util/nvdimm-utils.c
util/osdep.c
util/oslib-posix.c
util/oslib-win32.c
util/pagesize.c [deleted file]
util/qdist.c
util/qemu-co-shared-resource.c
util/qemu-co-timeout.c [new file with mode: 0644]
util/qemu-config.c
util/qemu-coroutine-io.c
util/qemu-coroutine-lock.c
util/qemu-coroutine-sleep.c
util/qemu-coroutine.c
util/qemu-error.c [deleted file]
util/qemu-openpty.c [deleted file]
util/qemu-option.c
util/qemu-progress.c
util/qemu-sockets.c
util/qemu-thread-posix.c
util/qemu-thread-win32.c
util/qemu-timer-common.c
util/qemu-timer.c
util/qht.c
util/qsp.c
util/qtree.c
util/range.c
util/rcu.c
util/readline.c
util/reserved-region.c [new file with mode: 0644]
util/selfmap.c
util/stats64.c
util/systemd.c
util/thread-context.c [new file with mode: 0644]
util/thread-pool.c
util/throttle.c
util/trace-events
util/transactions.c [new file with mode: 0644]
util/uri.c
util/userfaultfd.c [new file with mode: 0644]
util/uuid.c
util/vfio-helpers.c
util/vhost-user-server.c
util/yank.c [new file with mode: 0644]

index 3d2fe2ecda8b3d8338ccf602b571a3f8e3dc31f1..a217cb7bfe96ff6c8548f510c62c6e16b2d5faa9 100644 (file)
@@ -1,2 +1,4 @@
 *.c.inc         diff=c
 *.h.inc         diff=c
+*.m             diff=objc
+*.py            diff=python
index b32bca1315e1d303d62b58989a78787863efa052..cb3040bfb33476ec86bcd70520445c881cdab10f 100644 (file)
@@ -1,12 +1,21 @@
 /GNUmakefile
 /build/
+/.cache/
+/.vscode/
 *.pyc
 .sdk
 .stgit-*
 .git-submodule-status
+.clang-format
+.gdb_history
 cscope.*
 tags
 TAGS
+GPATH
+GRTAGS
+GTAGS
 *~
 *.ast_raw
 *.depend_raw
+*.swp
+*.gcov
diff --git a/LICENSE b/LICENSE
new file mode 100644 (file)
index 0000000..f19b018
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,27 @@
+The QEMU distribution includes both the QEMU emulator and
+various firmware files.  These are separate programs that are
+distributed together for our users' convenience, and they have
+separate licenses.
+
+The following points clarify the license of the QEMU emulator:
+
+1) The QEMU emulator as a whole is released under the GNU General
+Public License, version 2.
+
+2) Parts of the QEMU emulator have specific licenses which are compatible
+with the GNU General Public License, version 2. Hence each source file
+contains its own licensing information.  Source files with no licensing
+information are released under the GNU General Public License, version
+2 or (at your option) any later version.
+
+As of July 2013, contributions under version 2 of the GNU General Public
+License (and no later version) are only accepted for the following files
+or directories: bsd-user/, linux-user/, hw/vfio/, hw/xen/xen_pt*.
+
+3) The Tiny Code Generator (TCG) is mostly under the BSD or MIT licenses;
+   but some parts may be GPLv2 or other licenses. Again, see the
+   specific licensing information in each source file.
+
+4) QEMU is a trademark of Fabrice Bellard.
+
+Fabrice Bellard and the QEMU team
diff --git a/MAINTAINERS b/MAINTAINERS
new file mode 100644 (file)
index 0000000..695e0bd
--- /dev/null
@@ -0,0 +1,4163 @@
+QEMU Maintainers
+================
+
+The intention of this file is not to establish who owns what portions of the
+code base, but to provide a set of names that developers can consult when they
+have a question about a particular subset and also to provide a set of names
+to be CC'd when submitting a patch to obtain appropriate review.
+
+In general, if you have a question about inclusion of a patch, you should
+consult qemu-devel and not any specific individual privately.
+
+Descriptions of section entries:
+
+       M: Mail patches to: FullName <address@domain>
+          Maintainers are looking after a certain area and must be CCed on
+          patches. They are considered the main contact point.
+       R: Designated reviewer: FullName <address@domain>
+          These reviewers should be CCed on patches.
+          Reviewers are familiar with the subject matter and provide feedback
+          even though they are not maintainers.
+       L: Mailing list that is relevant to this area
+          These lists should be CCed on patches.
+       W: Web-page with status/info
+       Q: Patchwork web based patch tracking system site
+       T: SCM tree type and location.  Type is one of: git, hg, quilt, stgit.
+       S: Status, one of the following (keep in sync with docs/devel/maintainers.rst):
+          Supported:   Someone is actually paid to look after this.
+          Maintained:  Someone actually looks after it.
+          Odd Fixes:   It has a maintainer but they don't have time to do
+                       much other than throw the odd patch in. See below.
+          Orphan:      No current maintainer [but maybe you could take the
+                       role as you write your new code].
+          Obsolete:    Old code. Something tagged obsolete generally means
+                       it has been replaced by a better system and you
+                       should be using that.
+       F: Files and directories with wildcard patterns.
+          A trailing slash includes all files and subdirectory files.
+          F:   drivers/net/    all files in and below drivers/net
+          F:   drivers/net/*   all files in drivers/net, but not below
+          F:   */net/*         all files in "any top level directory"/net
+          One pattern per line.  Multiple F: lines acceptable.
+       X: Files and directories that are NOT maintained, same rules as F:
+          Files exclusions are tested before file matches.
+          Can be useful for excluding a specific subdirectory, for instance:
+          F:   net/
+          X:   net/ipv6/
+          matches all files in and below net excluding net/ipv6/
+       K: Keyword perl extended regex pattern to match content in a
+          patch or file.  For instance:
+          K: of_get_profile
+             matches patches or files that contain "of_get_profile"
+          K: \b(printk|pr_(info|err))\b
+             matches patches or files that contain one or more of the words
+             printk, pr_info or pr_err
+          One regex pattern per line.  Multiple K: lines acceptable.
+
+
+General Project Administration
+------------------------------
+M: Peter Maydell <peter.maydell@linaro.org>
+
+All patches CC here
+L: qemu-devel@nongnu.org
+F: *
+F: */
+
+Project policy and developer guides
+R: Alex Bennée <alex.bennee@linaro.org>
+R: Daniel P. Berrangé <berrange@redhat.com>
+R: Thomas Huth <thuth@redhat.com>
+R: Markus Armbruster <armbru@redhat.com>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Juan Quintela <quintela@redhat.com>
+W: https://www.qemu.org/docs/master/devel/index.html
+S: Odd Fixes
+F: docs/devel/style.rst
+F: docs/devel/code-of-conduct.rst
+F: docs/devel/conflict-resolution.rst
+F: docs/devel/submitting-a-patch.rst
+F: docs/devel/submitting-a-pull-request.rst
+
+Responsible Disclosure, Reporting Security Issues
+-------------------------------------------------
+W: https://wiki.qemu.org/SecurityProcess
+M: Michael S. Tsirkin <mst@redhat.com>
+L: secalert@redhat.com
+
+Trivial patches
+---------------
+Trivial patches
+M: Michael Tokarev <mjt@tls.msk.ru>
+M: Laurent Vivier <laurent@vivier.eu>
+S: Maintained
+L: qemu-trivial@nongnu.org
+K: ^Subject:.*(?i)trivial
+F: docs/devel/trivial-patches.rst
+T: git git://git.corpit.ru/qemu.git trivial-patches
+T: git https://github.com/vivier/qemu.git trivial-patches
+
+Architecture support
+--------------------
+S390 general architecture support
+M: Thomas Huth <thuth@redhat.com>
+S: Supported
+F: configs/devices/s390x-softmmu/default.mak
+F: gdb-xml/s390*.xml
+F: hw/char/sclp*.[hc]
+F: hw/char/terminal3270.c
+F: hw/intc/s390_flic.c
+F: hw/intc/s390_flic_kvm.c
+F: hw/s390x/
+F: hw/vfio/ap.c
+F: hw/vfio/ccw.c
+F: hw/watchdog/wdt_diag288.c
+F: include/hw/s390x/
+F: include/hw/watchdog/wdt_diag288.h
+F: pc-bios/s390-ccw/
+F: pc-bios/s390-ccw.img
+F: target/s390x/
+F: docs/system/target-s390x.rst
+F: docs/system/s390x/
+F: tests/migration/s390x/
+K: ^Subject:.*(?i)s390x?
+L: qemu-s390x@nongnu.org
+
+MIPS general architecture support
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Jiaxun Yang <jiaxun.yang@flygoat.com>
+S: Odd Fixes
+K: ^Subject:.*(?i)mips
+F: docs/system/target-mips.rst
+F: configs/targets/mips*
+
+X86 general architecture support
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: configs/devices/i386-softmmu/default.mak
+F: configs/targets/i386-softmmu.mak
+F: configs/targets/x86_64-softmmu.mak
+F: docs/system/target-i386*
+F: target/i386/*.[ch]
+F: target/i386/Kconfig
+F: target/i386/meson.build
+
+Guest CPU cores (TCG)
+---------------------
+Overall TCG CPUs
+M: Richard Henderson <richard.henderson@linaro.org>
+R: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: system/cpus.c
+F: system/watchpoint.c
+F: cpu-common.c
+F: cpu-target.c
+F: page-vary-target.c
+F: page-vary-common.c
+F: accel/tcg/
+F: accel/stubs/tcg-stub.c
+F: util/cacheinfo.c
+F: util/cacheflush.c
+F: scripts/decodetree.py
+F: docs/devel/decodetree.rst
+F: docs/devel/tcg*
+F: include/exec/cpu*.h
+F: include/exec/exec-all.h
+F: include/exec/tb-flush.h
+F: include/exec/target_long.h
+F: include/exec/helper*.h
+F: include/exec/helper*.h.inc
+F: include/exec/helper-info.c.inc
+F: include/sysemu/cpus.h
+F: include/sysemu/tcg.h
+F: include/hw/core/tcg-cpu-ops.h
+F: host/include/*/host/cpuinfo.h
+F: util/cpuinfo-*.c
+F: include/tcg/
+F: tests/decode/
+
+FPU emulation
+M: Aurelien Jarno <aurelien@aurel32.net>
+M: Peter Maydell <peter.maydell@linaro.org>
+M: Alex Bennée <alex.bennee@linaro.org>
+S: Maintained
+F: fpu/
+F: include/fpu/
+F: tests/fp/
+
+Alpha TCG CPUs
+M: Richard Henderson <richard.henderson@linaro.org>
+S: Maintained
+F: target/alpha/
+F: tests/tcg/alpha/
+F: disas/alpha.c
+
+ARM TCG CPUs
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: target/arm/
+F: target/arm/tcg/
+F: tests/tcg/arm/
+F: tests/tcg/aarch64/
+F: tests/qtest/arm-cpu-features.c
+F: hw/arm/
+F: hw/cpu/a*mpcore.c
+F: include/hw/cpu/a*mpcore.h
+F: docs/system/target-arm.rst
+F: docs/system/arm/cpu-features.rst
+
+ARM SMMU
+M: Eric Auger <eric.auger@redhat.com>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/smmu*
+F: include/hw/arm/smmu*
+F: tests/avocado/smmu.py
+
+AVR TCG CPUs
+M: Michael Rolnik <mrolnik@gmail.com>
+S: Maintained
+F: docs/system/target-avr.rst
+F: gdb-xml/avr-cpu.xml
+F: target/avr/
+F: tests/avocado/machine_avr6.py
+
+CRIS TCG CPUs
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+S: Maintained
+F: target/cris/
+F: hw/cris/
+F: include/hw/cris/
+F: tests/tcg/cris/
+F: disas/cris.c
+
+Hexagon TCG CPUs
+M: Brian Cain <bcain@quicinc.com>
+S: Supported
+F: target/hexagon/
+X: target/hexagon/idef-parser/
+X: target/hexagon/gen_idef_parser_funcs.py
+F: linux-user/hexagon/
+F: tests/tcg/hexagon/
+F: disas/hexagon.c
+F: configs/targets/hexagon-linux-user/default.mak
+F: docker/dockerfiles/debian-hexagon-cross.docker
+F: gdb-xml/hexagon*.xml
+
+Hexagon idef-parser
+M: Alessandro Di Federico <ale@rev.ng>
+M: Anton Johansson <anjo@rev.ng>
+S: Supported
+F: target/hexagon/idef-parser/
+F: target/hexagon/gen_idef_parser_funcs.py
+
+HPPA (PA-RISC) TCG CPUs
+M: Richard Henderson <richard.henderson@linaro.org>
+S: Maintained
+F: target/hppa/
+F: disas/hppa.c
+F: tests/tcg/hppa/
+
+LoongArch TCG CPUs
+M: Song Gao <gaosong@loongson.cn>
+S: Maintained
+F: target/loongarch/
+F: tests/tcg/loongarch64/
+F: tests/avocado/machine_loongarch.py
+
+M68K TCG CPUs
+M: Laurent Vivier <laurent@vivier.eu>
+S: Maintained
+F: target/m68k/
+F: disas/m68k.c
+F: tests/tcg/m68k/
+
+MicroBlaze TCG CPUs
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+S: Maintained
+F: target/microblaze/
+F: hw/microblaze/
+F: disas/microblaze.c
+F: tests/docker/dockerfiles/debian-microblaze-cross.d/build-toolchain.sh
+
+MIPS TCG CPUs
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Aurelien Jarno <aurelien@aurel32.net>
+R: Jiaxun Yang <jiaxun.yang@flygoat.com>
+R: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>
+S: Odd Fixes
+F: target/mips/
+F: disas/*mips.c
+F: docs/system/cpu-models-mips.rst.inc
+F: tests/tcg/mips/
+
+NiosII TCG CPUs
+R: Chris Wulff <crwulff@gmail.com>
+R: Marek Vasut <marex@denx.de>
+S: Orphan
+F: target/nios2/
+F: hw/nios2/
+F: hw/intc/nios2_vic.c
+F: disas/nios2.c
+F: include/hw/intc/nios2_vic.h
+F: configs/devices/nios2-softmmu/default.mak
+F: tests/docker/dockerfiles/debian-nios2-cross.d/build-toolchain.sh
+F: tests/tcg/nios2/
+
+OpenRISC TCG CPUs
+M: Stafford Horne <shorne@gmail.com>
+S: Odd Fixes
+F: docs/system/openrisc/cpu-features.rst
+F: target/openrisc/
+F: hw/openrisc/
+F: include/hw/openrisc/
+F: tests/tcg/openrisc/
+
+PowerPC TCG CPUs
+M: Nicholas Piggin <npiggin@gmail.com>
+M: Daniel Henrique Barboza <danielhb413@gmail.com>
+R: Cédric Le Goater <clg@kaod.org>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: target/ppc/
+F: hw/ppc/ppc.c
+F: hw/ppc/ppc_booke.c
+F: include/hw/ppc/ppc.h
+F: hw/ppc/meson.build
+F: hw/ppc/trace*
+F: configs/devices/ppc*
+F: docs/system/ppc/embedded.rst
+F: docs/system/target-ppc.rst
+F: tests/tcg/ppc*/*
+
+RISC-V TCG CPUs
+M: Palmer Dabbelt <palmer@dabbelt.com>
+M: Alistair Francis <alistair.francis@wdc.com>
+M: Bin Meng <bin.meng@windriver.com>
+R: Weiwei Li <liwei1518@gmail.com>
+R: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
+R: Liu Zhiwei <zhiwei_liu@linux.alibaba.com>
+L: qemu-riscv@nongnu.org
+S: Supported
+F: configs/targets/riscv*
+F: docs/system/target-riscv.rst
+F: target/riscv/
+F: hw/riscv/
+F: hw/intc/riscv*
+F: include/hw/riscv/
+F: linux-user/host/riscv32/
+F: linux-user/host/riscv64/
+F: tests/tcg/riscv64/
+
+RISC-V XThead* extensions
+M: Christoph Muellner <christoph.muellner@vrull.eu>
+M: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
+L: qemu-riscv@nongnu.org
+S: Supported
+F: target/riscv/insn_trans/trans_xthead.c.inc
+F: target/riscv/xthead*.decode
+F: disas/riscv-xthead*
+
+RISC-V XVentanaCondOps extension
+M: Philipp Tomsich <philipp.tomsich@vrull.eu>
+L: qemu-riscv@nongnu.org
+S: Maintained
+F: target/riscv/XVentanaCondOps.decode
+F: target/riscv/insn_trans/trans_xventanacondops.c.inc
+F: disas/riscv-xventana*
+
+RENESAS RX CPUs
+R: Yoshinori Sato <ysato@users.sourceforge.jp>
+S: Orphan
+F: target/rx/
+
+S390 TCG CPUs
+M: Richard Henderson <richard.henderson@linaro.org>
+M: David Hildenbrand <david@redhat.com>
+R: Ilya Leoshkevich <iii@linux.ibm.com>
+S: Maintained
+F: target/s390x/
+F: target/s390x/tcg
+F: hw/s390x/
+F: tests/tcg/s390x/
+L: qemu-s390x@nongnu.org
+
+SH4 TCG CPUs
+R: Yoshinori Sato <ysato@users.sourceforge.jp>
+S: Orphan
+F: target/sh4/
+F: hw/sh4/
+F: disas/sh4.c
+F: include/hw/sh4/
+F: tests/tcg/sh4/
+
+SPARC TCG CPUs
+M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
+M: Artyom Tarasenko <atar4qemu@gmail.com>
+S: Maintained
+F: target/sparc/
+F: hw/sparc/
+F: hw/sparc64/
+F: include/hw/sparc/sparc64.h
+F: disas/sparc.c
+F: tests/tcg/sparc64/
+
+X86 TCG CPUs
+M: Paolo Bonzini <pbonzini@redhat.com>
+M: Richard Henderson <richard.henderson@linaro.org>
+M: Eduardo Habkost <eduardo@habkost.net>
+S: Maintained
+F: target/i386/tcg/
+F: tests/tcg/i386/
+F: tests/tcg/x86_64/
+F: hw/i386/
+F: docs/system/i386/cpu.rst
+F: docs/system/cpu-models-x86*
+T: git https://gitlab.com/ehabkost/qemu.git x86-next
+
+Xtensa TCG CPUs
+M: Max Filippov <jcmvbkbc@gmail.com>
+W: http://wiki.osll.ru/doku.php?id=etc:users:jcmvbkbc:qemu-target-xtensa
+S: Maintained
+F: target/xtensa/
+F: hw/xtensa/
+F: tests/tcg/xtensa/
+F: tests/tcg/xtensaeb/
+F: disas/xtensa.c
+F: include/hw/xtensa/xtensa-isa.h
+F: configs/devices/xtensa*/default.mak
+
+TriCore TCG CPUs
+M: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
+S: Maintained
+F: target/tricore/
+F: hw/tricore/
+F: include/hw/tricore/
+F: tests/tcg/tricore/
+
+Multiarch Linux User Tests
+M: Alex Bennée <alex.bennee@linaro.org>
+S: Maintained
+F: tests/tcg/multiarch/
+
+Guest CPU Cores (KVM)
+---------------------
+Overall KVM CPUs
+M: Paolo Bonzini <pbonzini@redhat.com>
+L: kvm@vger.kernel.org
+S: Supported
+F: */*/kvm*
+F: accel/kvm/
+F: accel/stubs/kvm-stub.c
+F: include/hw/kvm/
+F: include/sysemu/kvm*.h
+F: scripts/kvm/kvm_flightrecorder
+
+ARM KVM CPUs
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: target/arm/kvm.c
+
+MIPS KVM CPUs
+M: Huacai Chen <chenhuacai@kernel.org>
+S: Odd Fixes
+F: target/mips/kvm*
+F: target/mips/sysemu/
+
+PPC KVM CPUs
+M: Nicholas Piggin <npiggin@gmail.com>
+R: Daniel Henrique Barboza <danielhb413@gmail.com>
+R: Cédric Le Goater <clg@kaod.org>
+S: Odd Fixes
+F: target/ppc/kvm.c
+
+S390 KVM CPUs
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Supported
+F: target/s390x/kvm/
+F: target/s390x/machine.c
+F: target/s390x/sigp.c
+F: gdb-xml/s390*.xml
+T: git https://github.com/borntraeger/qemu.git s390-next
+L: qemu-s390x@nongnu.org
+
+X86 KVM CPUs
+M: Paolo Bonzini <pbonzini@redhat.com>
+M: Marcelo Tosatti <mtosatti@redhat.com>
+L: kvm@vger.kernel.org
+S: Supported
+F: docs/system/i386/amd-memory-encryption.rst
+F: docs/system/i386/sgx.rst
+F: target/i386/kvm/
+F: target/i386/sev*
+F: scripts/kvm/vmxcap
+
+Xen emulation on X86 KVM CPUs
+M: David Woodhouse <dwmw2@infradead.org>
+M: Paul Durrant <paul@xen.org>
+S: Supported
+F: include/sysemu/kvm_xen.h
+F: target/i386/kvm/xen*
+F: hw/i386/kvm/xen*
+F: tests/avocado/kvm_xen_guest.py
+
+Guest CPU Cores (other accelerators)
+------------------------------------
+Overall
+M: Richard Henderson <richard.henderson@linaro.org>
+R: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: include/qemu/accel.h
+F: include/sysemu/accel-*.h
+F: include/hw/core/accel-cpu.h
+F: accel/accel-*.c
+F: accel/Makefile.objs
+F: accel/stubs/Makefile.objs
+
+Apple Silicon HVF CPUs
+M: Alexander Graf <agraf@csgraf.de>
+S: Maintained
+F: target/arm/hvf/
+
+X86 HVF CPUs
+M: Cameron Esfahani <dirty@apple.com>
+M: Roman Bolshakov <rbolshakov@ddn.com>
+W: https://wiki.qemu.org/Features/HVF
+S: Maintained
+F: target/i386/hvf/
+
+HVF
+M: Cameron Esfahani <dirty@apple.com>
+M: Roman Bolshakov <rbolshakov@ddn.com>
+W: https://wiki.qemu.org/Features/HVF
+S: Maintained
+F: accel/hvf/
+F: include/sysemu/hvf.h
+F: include/sysemu/hvf_int.h
+
+WHPX CPUs
+M: Sunil Muthuswamy <sunilmut@microsoft.com>
+S: Supported
+F: target/i386/whpx/
+F: include/sysemu/whpx.h
+
+Guest CPU Cores (Xen)
+---------------------
+X86 Xen CPUs
+M: Stefano Stabellini <sstabellini@kernel.org>
+M: Anthony Perard <anthony.perard@citrix.com>
+M: Paul Durrant <paul@xen.org>
+L: xen-devel@lists.xenproject.org
+S: Supported
+F: */xen*
+F: accel/xen/*
+F: hw/9pfs/xen-9p*
+F: hw/char/xen_console.c
+F: hw/display/xenfb.c
+F: hw/net/xen_nic.c
+F: hw/usb/xen-usb.c
+F: hw/block/xen*
+F: hw/block/dataplane/xen*
+F: hw/xen/
+F: hw/xenpv/
+F: hw/i386/xen/
+F: hw/pci-host/xen_igd_pt.c
+F: include/hw/block/dataplane/xen*
+F: include/hw/xen/
+F: include/sysemu/xen.h
+F: include/sysemu/xen-mapcache.h
+F: stubs/xen-hw-stub.c
+
+Guest CPU Cores (NVMM)
+----------------------
+NetBSD Virtual Machine Monitor (NVMM) CPU support
+M: Reinoud Zandijk <reinoud@netbsd.org>
+S: Maintained
+F: include/sysemu/nvmm.h
+F: target/i386/nvmm/
+
+Hosts
+-----
+LINUX
+M: Michael S. Tsirkin <mst@redhat.com>
+M: Cornelia Huck <cohuck@redhat.com>
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: linux-headers/
+F: include/standard-headers/
+F: scripts/update-linux-headers.sh
+
+POSIX
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: os-posix.c
+F: include/sysemu/os-posix.h
+F: util/*posix*.c
+F: include/qemu/*posix*.h
+
+NETBSD
+M: Reinoud Zandijk <reinoud@netbsd.org>
+M: Ryo ONODERA <ryoon@netbsd.org>
+S: Maintained
+K: ^Subject:.*(?i)NetBSD
+
+OPENBSD
+M: Brad Smith <brad@comstyle.com>
+S: Maintained
+K: ^Subject:.*(?i)OpenBSD
+
+W32, W64
+M: Stefan Weil <sw@weilnetz.de>
+S: Maintained
+F: *win32*
+F: */*win32*
+F: include/*/*win32*
+X: qga/*win32*
+F: qemu.nsi
+F: scripts/nsis.py
+
+Darwin (macOS, iOS)
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Odd Fixes
+F: .gitlab-ci.d/cirrus/macos-*
+F: */*.m
+F: scripts/entitlement.sh
+
+Alpha Machines
+--------------
+M: Richard Henderson <richard.henderson@linaro.org>
+S: Maintained
+F: hw/alpha/
+F: hw/isa/smc37c669-superio.c
+F: tests/tcg/alpha/system/
+
+ARM Machines
+------------
+Allwinner-a10
+M: Beniamino Galvani <b.galvani@gmail.com>
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Strahinja Jankovic <strahinja.p.jankovic@gmail.com>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/*/allwinner*
+F: include/hw/*/allwinner*
+F: hw/arm/cubieboard.c
+F: docs/system/arm/cubieboard.rst
+F: hw/misc/axp209.c
+
+Allwinner-h3
+M: Niek Linnenbank <nieklinnenbank@gmail.com>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/*/allwinner-h3*
+F: include/hw/*/allwinner-h3*
+F: hw/arm/orangepi.c
+F: docs/system/arm/orangepi.rst
+
+ARM PrimeCell and CMSDK devices
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/char/pl011.c
+F: include/hw/char/pl011.h
+F: hw/display/pl110*
+F: hw/dma/pl080.c
+F: include/hw/dma/pl080.h
+F: hw/dma/pl330.c
+F: hw/gpio/pl061.c
+F: hw/input/pl050.c
+F: include/hw/input/pl050.h
+F: hw/intc/pl190.c
+F: hw/sd/pl181.c
+F: hw/ssi/pl022.c
+F: include/hw/ssi/pl022.h
+F: hw/rtc/pl031.c
+F: include/hw/rtc/pl031.h
+F: include/hw/arm/primecell.h
+F: hw/timer/cmsdk-apb-timer.c
+F: include/hw/timer/cmsdk-apb-timer.h
+F: tests/qtest/cmsdk-apb-timer-test.c
+F: hw/timer/cmsdk-apb-dualtimer.c
+F: include/hw/timer/cmsdk-apb-dualtimer.h
+F: tests/qtest/cmsdk-apb-dualtimer-test.c
+F: hw/char/cmsdk-apb-uart.c
+F: include/hw/char/cmsdk-apb-uart.h
+F: hw/watchdog/cmsdk-apb-watchdog.c
+F: include/hw/watchdog/cmsdk-apb-watchdog.h
+F: tests/qtest/cmsdk-apb-watchdog-test.c
+F: hw/misc/tz-ppc.c
+F: include/hw/misc/tz-ppc.h
+F: hw/misc/tz-mpc.c
+F: include/hw/misc/tz-mpc.h
+F: hw/misc/tz-msc.c
+F: include/hw/misc/tz-msc.h
+
+ARM cores
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/intc/arm*
+F: hw/intc/gic*_internal.h
+F: hw/misc/a9scu.c
+F: hw/misc/arm11scu.c
+F: hw/misc/arm_l2x0.c
+F: hw/misc/armv7m_ras.c
+F: hw/timer/a9gtimer*
+F: hw/timer/arm*
+F: include/hw/arm/arm*.h
+F: include/hw/intc/arm*
+F: include/hw/misc/a9scu.h
+F: include/hw/misc/arm11scu.h
+F: include/hw/timer/a9gtimer.h
+F: include/hw/timer/arm_mptimer.h
+F: include/hw/timer/armv7m_systick.h
+F: include/hw/misc/armv7m_ras.h
+F: tests/qtest/test-arm-mptimer.c
+
+Exynos
+M: Igor Mitsyanko <i.mitsyanko@gmail.com>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/*/exynos*
+F: include/hw/*/exynos*
+
+Calxeda Highbank
+M: Rob Herring <robh@kernel.org>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/highbank.c
+F: hw/net/xgmac.c
+F: docs/system/arm/highbank.rst
+
+Canon DIGIC
+M: Antony Pavlov <antonynpavlov@gmail.com>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: include/hw/arm/digic.h
+F: hw/*/digic*
+F: include/hw/*/digic*
+F: tests/avocado/machine_arm_canona1100.py
+F: docs/system/arm/digic.rst
+
+Goldfish RTC
+M: Anup Patel <anup.patel@wdc.com>
+M: Alistair Francis <Alistair.Francis@wdc.com>
+L: qemu-riscv@nongnu.org
+S: Maintained
+F: hw/rtc/goldfish_rtc.c
+F: include/hw/rtc/goldfish_rtc.h
+
+Gumstix
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/gumstix.c
+F: docs/system/arm/gumstix.rst
+
+i.MX25 PDK
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Jean-Christophe Dubois <jcd@tribudubois.net>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/fsl-imx25.c
+F: hw/arm/imx25_pdk.c
+F: hw/misc/imx25_ccm.c
+F: hw/watchdog/wdt_imx2.c
+F: include/hw/arm/fsl-imx25.h
+F: include/hw/misc/imx25_ccm.h
+F: include/hw/watchdog/wdt_imx2.h
+F: docs/system/arm/imx25-pdk.rst
+
+i.MX31 (kzm)
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/kzm.c
+F: hw/*/imx_*
+F: hw/*/*imx31*
+F: include/hw/*/imx_*
+F: include/hw/*/*imx31*
+F: docs/system/arm/kzm.rst
+
+Integrator CP
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/integratorcp.c
+F: hw/misc/arm_integrator_debug.c
+F: include/hw/misc/arm_integrator_debug.h
+F: tests/avocado/machine_arm_integratorcp.py
+F: docs/system/arm/integratorcp.rst
+
+MCIMX6UL EVK / i.MX6ul
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Jean-Christophe Dubois <jcd@tribudubois.net>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/mcimx6ul-evk.c
+F: hw/arm/fsl-imx6ul.c
+F: hw/misc/imx6ul_ccm.c
+F: include/hw/arm/fsl-imx6ul.h
+F: include/hw/misc/imx6ul_ccm.h
+
+MCIMX7D SABRE / i.MX7
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Andrey Smirnov <andrew.smirnov@gmail.com>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/mcimx7d-sabre.c
+F: hw/arm/fsl-imx7.c
+F: hw/misc/imx7_*.c
+F: include/hw/arm/fsl-imx7.h
+F: include/hw/misc/imx7_*.h
+F: hw/pci-host/designware.c
+F: include/hw/pci-host/designware.h
+
+MPS2
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/mps2.c
+F: hw/arm/mps2-tz.c
+F: hw/misc/mps2-*.c
+F: include/hw/misc/mps2-*.h
+F: hw/arm/armsse.c
+F: include/hw/arm/armsse.h
+F: hw/misc/iotkit-secctl.c
+F: include/hw/misc/iotkit-secctl.h
+F: hw/misc/iotkit-sysctl.c
+F: include/hw/misc/iotkit-sysctl.h
+F: hw/misc/iotkit-sysinfo.c
+F: include/hw/misc/iotkit-sysinfo.h
+F: hw/misc/armsse-cpu-pwrctrl.c
+F: include/hw/misc/armsse-cpu-pwrctrl.h
+F: hw/misc/armsse-cpuid.c
+F: include/hw/misc/armsse-cpuid.h
+F: hw/misc/armsse-mhu.c
+F: include/hw/misc/armsse-mhu.h
+F: hw/timer/sse-counter.c
+F: include/hw/timer/sse-counter.h
+F: hw/timer/sse-timer.c
+F: include/hw/timer/sse-timer.h
+F: tests/qtest/sse-timer-test.c
+F: docs/system/arm/mps2.rst
+
+Musca
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/musca.c
+F: docs/system/arm/musca.rst
+
+Musicpal
+M: Jan Kiszka <jan.kiszka@web.de>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/musicpal.c
+F: hw/net/mv88w8618_eth.c
+F: include/hw/net/mv88w8618_eth.h
+F: docs/system/arm/musicpal.rst
+
+Nuvoton NPCM7xx
+M: Tyrone Ting <kfting@nuvoton.com>
+M: Hao Wu <wuhaotsh@google.com>
+L: qemu-arm@nongnu.org
+S: Supported
+F: hw/*/npcm*
+F: hw/sensor/adm1266.c
+F: include/hw/*/npcm*
+F: tests/qtest/npcm*
+F: tests/qtest/adm1266-test.c
+F: pc-bios/npcm7xx_bootrom.bin
+F: roms/vbootrom
+F: docs/system/arm/nuvoton.rst
+
+nSeries
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/nseries.c
+F: hw/display/blizzard.c
+F: hw/input/lm832x.c
+F: hw/input/tsc2005.c
+F: hw/misc/cbus.c
+F: hw/rtc/twl92230.c
+F: include/hw/display/blizzard.h
+F: include/hw/input/lm832x.h
+F: include/hw/input/tsc2xxx.h
+F: include/hw/misc/cbus.h
+F: tests/avocado/machine_arm_n8x0.py
+F: docs/system/arm/nseries.rst
+
+Palm
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/palm.c
+F: hw/input/tsc210x.c
+F: include/hw/input/tsc2xxx.h
+F: docs/system/arm/palm.rst
+
+Raspberry Pi
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/raspi.c
+F: hw/arm/raspi_platform.h
+F: hw/*/bcm283*
+F: include/hw/arm/rasp*
+F: include/hw/*/bcm283*
+F: docs/system/arm/raspi.rst
+
+Real View
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/realview*
+F: hw/cpu/realview_mpcore.c
+F: hw/intc/realview_gic.c
+F: include/hw/intc/realview_gic.h
+F: docs/system/arm/realview.rst
+
+PXA2XX
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/mainstone.c
+F: hw/arm/spitz.c
+F: hw/arm/tosa.c
+F: hw/arm/z2.c
+F: hw/*/pxa2xx*
+F: hw/display/tc6393xb.c
+F: hw/gpio/max7310.c
+F: hw/gpio/zaurus.c
+F: hw/input/ads7846.c
+F: hw/misc/mst_fpga.c
+F: hw/adc/max111x.c
+F: include/hw/adc/max111x.h
+F: include/hw/arm/pxa.h
+F: include/hw/arm/sharpsl.h
+F: include/hw/display/tc6393xb.h
+F: docs/system/arm/xscale.rst
+F: docs/system/arm/mainstone.rst
+
+SABRELITE / i.MX6
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Jean-Christophe Dubois <jcd@tribudubois.net>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: docs/system/arm/sabrelite.rst
+F: hw/arm/sabrelite.c
+F: hw/arm/fsl-imx6.c
+F: hw/misc/imx6_*.c
+F: hw/ssi/imx_spi.c
+F: hw/usb/imx-usb-phy.c
+F: include/hw/usb/imx-usb-phy.h
+F: include/hw/arm/fsl-imx6.h
+F: include/hw/misc/imx6_*.h
+F: include/hw/ssi/imx_spi.h
+
+SBSA-REF
+M: Radoslaw Biernacki <rad@semihalf.com>
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Leif Lindholm <quic_llindhol@quicinc.com>
+R: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/sbsa-ref.c
+F: hw/misc/sbsa_ec.c
+F: hw/watchdog/sbsa_gwdt.c
+F: include/hw/watchdog/sbsa_gwdt.h
+F: docs/system/arm/sbsa.rst
+F: tests/avocado/machine_aarch64_sbsaref.py
+
+Sharp SL-5500 (Collie) PDA
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Odd Fixes
+F: hw/arm/collie.c
+F: hw/arm/strongarm*
+F: docs/system/arm/collie.rst
+
+Stellaris
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/*/stellaris*
+F: hw/display/ssd03*
+F: include/hw/input/gamepad.h
+F: include/hw/timer/stellaris-gptm.h
+F: docs/system/arm/stellaris.rst
+
+STM32VLDISCOVERY
+M: Alexandre Iooss <erdnaxe@crans.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/stm32vldiscovery.c
+F: docs/system/arm/stm32.rst
+
+Versatile Express
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/vexpress.c
+F: hw/display/sii9022.c
+F: docs/system/arm/vexpress.rst
+
+Versatile PB
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/*/versatile*
+F: hw/i2c/arm_sbcon_i2c.c
+F: include/hw/i2c/arm_sbcon_i2c.h
+F: hw/misc/arm_sysctl.c
+F: docs/system/arm/versatile.rst
+
+Virt
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/virt*
+F: include/hw/arm/virt.h
+F: docs/system/arm/virt.rst
+F: tests/avocado/machine_aarch64_virt.py
+
+Xilinx Zynq
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+M: Alistair Francis <alistair@alistair23.me>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/*/xilinx_*
+F: hw/*/cadence_*
+F: hw/misc/zynq_slcr.c
+F: hw/adc/zynq-xadc.c
+F: include/hw/misc/zynq_slcr.h
+F: include/hw/adc/zynq-xadc.h
+X: hw/ssi/xilinx_*
+
+Xilinx ZynqMP and Versal
+M: Alistair Francis <alistair@alistair23.me>
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/*/xlnx*.c
+F: include/hw/*/xlnx*.h
+F: include/hw/ssi/xilinx_spips.h
+F: hw/display/dpcd.c
+F: include/hw/display/dpcd.h
+F: docs/system/arm/xlnx-versal-virt.rst
+
+Xilinx Versal OSPI
+M: Francisco Iglesias <francisco.iglesias@xilinx.com>
+S: Maintained
+F: hw/ssi/xlnx-versal-ospi.c
+F: include/hw/ssi/xlnx-versal-ospi.h
+
+Xilinx Versal CFI
+M: Francisco Iglesias <francisco.iglesias@amd.com>
+S: Maintained
+F: hw/misc/xlnx-cfi-if.c
+F: include/hw/misc/xlnx-cfi-if.h
+F: hw/misc/xlnx-versal-cfu.c
+F: include/hw/misc/xlnx-versal-cfu.h
+F: hw/misc/xlnx-versal-cframe-reg.c
+F: include/hw/misc/xlnx-versal-cframe-reg.h
+
+STM32F100
+M: Alexandre Iooss <erdnaxe@crans.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/stm32f100_soc.c
+
+STM32F205
+M: Alistair Francis <alistair@alistair23.me>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/stm32f205_soc.c
+F: hw/misc/stm32f2xx_syscfg.c
+F: hw/char/stm32f2xx_usart.c
+F: hw/timer/stm32f2xx_timer.c
+F: hw/adc/*
+F: hw/ssi/stm32f2xx_spi.c
+F: include/hw/*/stm32*.h
+
+STM32F405
+M: Alistair Francis <alistair@alistair23.me>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/stm32f405_soc.c
+F: hw/misc/stm32f4xx_syscfg.c
+F: hw/misc/stm32f4xx_exti.c
+
+Netduino 2
+M: Alistair Francis <alistair@alistair23.me>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/netduino2.c
+
+Netduino Plus 2
+M: Alistair Francis <alistair@alistair23.me>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/netduinoplus2.c
+
+Olimex STM32 H405
+M: Felipe Balbi <balbi@kernel.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/olimex-stm32-h405.c
+
+SmartFusion2
+M: Subbaraya Sundeep <sundeep.lkml@gmail.com>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/msf2-soc.c
+F: hw/misc/msf2-sysreg.c
+F: hw/timer/mss-timer.c
+F: hw/ssi/mss-spi.c
+F: include/hw/arm/msf2-soc.h
+F: include/hw/misc/msf2-sysreg.h
+F: include/hw/timer/mss-timer.h
+F: include/hw/ssi/mss-spi.h
+F: hw/net/msf2-emac.c
+F: include/hw/net/msf2-emac.h
+
+Emcraft M2S-FG484
+M: Subbaraya Sundeep <sundeep.lkml@gmail.com>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/msf2-som.c
+F: docs/system/arm/emcraft-sf2.rst
+
+ASPEED BMCs
+M: Cédric Le Goater <clg@kaod.org>
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Andrew Jeffery <andrew@codeconstruct.com.au>
+R: Joel Stanley <joel@jms.id.au>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/*/*aspeed*
+F: hw/misc/pca9552.c
+F: include/hw/*/*aspeed*
+F: include/hw/misc/pca9552*.h
+F: hw/net/ftgmac100.c
+F: include/hw/net/ftgmac100.h
+F: docs/system/arm/aspeed.rst
+F: tests/*/*aspeed*
+F: hw/arm/fby35.c
+
+NRF51
+M: Joel Stanley <joel@jms.id.au>
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/*/nrf51*.c
+F: hw/*/microbit*.c
+F: include/hw/*/nrf51*.h
+F: include/hw/*/microbit*.h
+F: tests/qtest/microbit-test.c
+F: docs/system/arm/nrf.rst
+
+AVR Machines
+-------------
+
+AVR MCUs
+M: Michael Rolnik <mrolnik@gmail.com>
+S: Maintained
+F: configs/devices/avr-softmmu/default.mak
+F: hw/avr/
+F: include/hw/char/avr_usart.h
+F: hw/char/avr_usart.c
+F: include/hw/timer/avr_timer16.h
+F: hw/timer/avr_timer16.c
+F: include/hw/misc/avr_power.h
+F: hw/misc/avr_power.c
+
+Arduino
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Maintained
+F: hw/avr/arduino.c
+
+CRIS Machines
+-------------
+Axis Dev88
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+S: Maintained
+F: hw/cris/axis_dev88.c
+F: hw/*/etraxfs_*.c
+
+HP-PARISC Machines
+------------------
+HP B160L, HP C3700
+M: Richard Henderson <richard.henderson@linaro.org>
+R: Helge Deller <deller@gmx.de>
+S: Odd Fixes
+F: configs/devices/hppa-softmmu/default.mak
+F: hw/display/artist.c
+F: hw/hppa/
+F: hw/input/lasips2.c
+F: hw/net/*i82596*
+F: hw/misc/lasi.c
+F: hw/pci-host/astro.c
+F: hw/pci-host/dino.c
+F: include/hw/input/lasips2.h
+F: include/hw/misc/lasi.h
+F: include/hw/net/lasi_82596.h
+F: include/hw/pci-host/astro.h
+F: include/hw/pci-host/dino.h
+F: pc-bios/hppa-firmware.img
+F: roms/seabios-hppa/
+
+LoongArch Machines
+------------------
+Virt
+M: Song Gao <gaosong@loongson.cn>
+S: Maintained
+F: docs/system/loongarch/virt.rst
+F: configs/targets/loongarch64-softmmu.mak
+F: configs/devices/loongarch64-softmmu/default.mak
+F: hw/loongarch/
+F: include/hw/loongarch/virt.h
+F: include/hw/intc/loongarch_*.h
+F: hw/intc/loongarch_*.c
+F: include/hw/pci-host/ls7a.h
+F: hw/rtc/ls7a_rtc.c
+F: gdb-xml/loongarch*.xml
+
+M68K Machines
+-------------
+an5206
+M: Thomas Huth <huth@tuxfamily.org>
+S: Odd Fixes
+F: hw/m68k/an5206.c
+F: hw/m68k/mcf5206.c
+
+mcf5208
+M: Thomas Huth <huth@tuxfamily.org>
+S: Odd Fixes
+F: hw/m68k/mcf5208.c
+F: hw/m68k/mcf_intc.c
+F: hw/char/mcf_uart.c
+F: hw/net/mcf_fec.c
+F: include/hw/m68k/mcf*.h
+
+NeXTcube
+M: Thomas Huth <huth@tuxfamily.org>
+S: Odd Fixes
+F: hw/m68k/next-*.c
+F: hw/display/next-fb.c
+F: include/hw/m68k/next-cube.h
+
+q800
+M: Laurent Vivier <laurent@vivier.eu>
+S: Maintained
+F: hw/m68k/q800.c
+F: hw/m68k/q800-glue.c
+F: hw/misc/mac_via.c
+F: hw/nubus/*
+F: hw/display/macfb.c
+F: hw/block/swim.c
+F: hw/misc/djmemc.c
+F: hw/misc/iosb.c
+F: hw/audio/asc.c
+F: hw/m68k/bootinfo.h
+F: include/standard-headers/asm-m68k/bootinfo.h
+F: include/standard-headers/asm-m68k/bootinfo-mac.h
+F: include/hw/misc/mac_via.h
+F: include/hw/nubus/*
+F: include/hw/display/macfb.h
+F: include/hw/block/swim.h
+F: include/hw/m68k/q800.h
+F: include/hw/m68k/q800-glue.h
+F: include/hw/misc/djmemc.h
+F: include/hw/misc/iosb.h
+F: include/hw/audio/asc.h
+
+virt
+M: Laurent Vivier <laurent@vivier.eu>
+S: Maintained
+F: hw/m68k/virt.c
+F: hw/char/goldfish_tty.c
+F: hw/intc/goldfish_pic.c
+F: hw/intc/m68k_irqc.c
+F: hw/misc/virt_ctrl.c
+F: include/hw/char/goldfish_tty.h
+F: include/hw/intc/goldfish_pic.h
+F: include/hw/intc/m68k_irqc.h
+F: include/hw/misc/virt_ctrl.h
+F: docs/specs/virt-ctlr.rst
+
+MicroBlaze Machines
+-------------------
+petalogix_s3adsp1800
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+S: Maintained
+F: hw/microblaze/petalogix_s3adsp1800_mmu.c
+F: include/hw/char/xilinx_uartlite.h
+F: tests/avocado/machine_microblaze.py
+
+petalogix_ml605
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+S: Maintained
+F: hw/microblaze/petalogix_ml605_mmu.c
+
+MIPS Machines
+-------------
+Overall MIPS Machines
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Odd Fixes
+F: configs/devices/mips*/*
+F: hw/mips/
+F: include/hw/mips/
+
+Jazz
+M: Hervé Poussineau <hpoussin@reactos.org>
+R: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>
+S: Maintained
+F: hw/mips/jazz.c
+F: hw/display/g364fb.c
+F: hw/display/jazz_led.c
+F: hw/dma/rc4030.c
+F: hw/nvram/ds1225y.c
+
+Malta
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Aurelien Jarno <aurelien@aurel32.net>
+S: Odd Fixes
+F: hw/isa/piix.c
+F: hw/acpi/piix4.c
+F: hw/mips/malta.c
+F: hw/pci-host/gt64120.c
+F: include/hw/southbridge/piix.h
+F: tests/avocado/linux_ssh_mips_malta.py
+F: tests/avocado/machine_mips_malta.py
+
+Mipssim
+R: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>
+S: Orphan
+F: hw/mips/mipssim.c
+F: hw/net/mipsnet.c
+
+Fuloong 2E
+M: Huacai Chen <chenhuacai@kernel.org>
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Jiaxun Yang <jiaxun.yang@flygoat.com>
+S: Odd Fixes
+F: hw/mips/fuloong2e.c
+F: hw/pci-host/bonito.c
+F: include/hw/pci-host/bonito.h
+F: tests/avocado/machine_mips_fuloong2e.py
+
+Loongson-3 virtual platforms
+M: Huacai Chen <chenhuacai@kernel.org>
+R: Jiaxun Yang <jiaxun.yang@flygoat.com>
+S: Maintained
+F: hw/intc/loongson_liointc.c
+F: hw/mips/loongson3_bootp.c
+F: hw/mips/loongson3_bootp.h
+F: hw/mips/loongson3_virt.c
+F: include/hw/intc/loongson_liointc.h
+F: tests/avocado/machine_mips_loongson3v.py
+
+Boston
+M: Paul Burton <paulburton@kernel.org>
+R: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>
+S: Odd Fixes
+F: hw/core/loader-fit.c
+F: hw/mips/boston.c
+F: hw/pci-host/xilinx-pcie.c
+F: include/hw/pci-host/xilinx-pcie.h
+
+OpenRISC Machines
+-----------------
+or1k-sim
+M: Jia Liu <proljc@gmail.com>
+S: Maintained
+F: docs/system/openrisc/or1k-sim.rst
+F: hw/intc/ompic.c
+F: hw/openrisc/openrisc_sim.c
+
+PowerPC Machines
+----------------
+405 (ref405ep)
+L: qemu-ppc@nongnu.org
+S: Orphan
+F: hw/ppc/ppc405*
+F: tests/avocado/ppc_405.py
+
+Bamboo
+L: qemu-ppc@nongnu.org
+S: Orphan
+F: hw/ppc/ppc440_bamboo.c
+F: tests/avocado/ppc_bamboo.py
+
+e500
+L: qemu-ppc@nongnu.org
+S: Orphan
+F: hw/ppc/e500*
+F: hw/ppc/ppce500_spin.c
+F: hw/gpio/mpc8xxx.c
+F: hw/i2c/mpc_i2c.c
+F: hw/net/fsl_etsec/
+F: hw/pci-host/ppce500.c
+F: include/hw/ppc/ppc_e500.h
+F: include/hw/pci-host/ppce500.h
+F: pc-bios/u-boot.e500
+F: hw/intc/openpic_kvm.c
+F: include/hw/ppc/openpic_kvm.h
+F: docs/system/ppc/ppce500.rst
+
+mpc8544ds
+L: qemu-ppc@nongnu.org
+S: Orphan
+F: hw/ppc/mpc8544ds.c
+F: hw/ppc/mpc8544_guts.c
+F: tests/avocado/ppc_mpc8544ds.py
+
+New World (mac99)
+M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: docs/system/ppc/powermac.rst
+F: hw/ppc/mac_newworld.c
+F: hw/pci-host/uninorth.c
+F: hw/pci-bridge/dec.[hc]
+F: hw/misc/macio/
+F: hw/misc/mos6522.c
+F: hw/nvram/mac_nvram.c
+F: hw/ppc/fw_cfg.c
+F: hw/input/adb*
+F: include/hw/misc/macio/
+F: include/hw/misc/mos6522.h
+F: include/hw/nvram/mac_nvram.h
+F: include/hw/ppc/mac_dbdma.h
+F: include/hw/pci-host/uninorth.h
+F: include/hw/input/adb*
+F: pc-bios/qemu_vga.ndrv
+
+Old World (g3beige)
+M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: docs/system/ppc/powermac.rst
+F: hw/ppc/mac_oldworld.c
+F: hw/pci-host/grackle.c
+F: hw/misc/macio/
+F: hw/intc/heathrow_pic.c
+F: hw/input/adb*
+F: include/hw/intc/heathrow_pic.h
+F: include/hw/input/adb*
+F: include/hw/pci-host/grackle.h
+F: pc-bios/qemu_vga.ndrv
+
+PReP
+M: Hervé Poussineau <hpoussin@reactos.org>
+L: qemu-ppc@nongnu.org
+S: Maintained
+F: docs/system/ppc/prep.rst
+F: hw/ppc/prep.c
+F: hw/ppc/prep_systemio.c
+F: hw/ppc/rs6000_mc.c
+F: hw/pci-host/raven.c
+F: hw/isa/i82378.c
+F: hw/isa/pc87312.c
+F: hw/dma/i82374.c
+F: hw/rtc/m48t59-isa.c
+F: include/hw/isa/pc87312.h
+F: include/hw/rtc/m48t59.h
+F: tests/avocado/ppc_prep_40p.py
+
+sPAPR (pseries)
+M: Nicholas Piggin <npiggin@gmail.com>
+R: Daniel Henrique Barboza <danielhb413@gmail.com>
+R: Cédric Le Goater <clg@kaod.org>
+R: David Gibson <david@gibson.dropbear.id.au>
+R: Harsh Prateek Bora <harshpb@linux.ibm.com>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/*/spapr*
+F: include/hw/*/spapr*
+F: hw/*/xics*
+F: include/hw/*/xics*
+F: include/hw/ppc/fdt.h
+F: hw/ppc/fdt.c
+F: include/hw/ppc/pef.h
+F: hw/ppc/pef.c
+F: pc-bios/slof.bin
+F: docs/system/ppc/pseries.rst
+F: docs/specs/ppc-spapr-*
+F: tests/qtest/spapr*
+F: tests/qtest/libqos/*spapr*
+F: tests/qtest/rtas*
+F: tests/qtest/libqos/rtas*
+F: tests/avocado/ppc_pseries.py
+
+PowerNV (Non-Virtualized)
+M: Cédric Le Goater <clg@kaod.org>
+M: Nicholas Piggin <npiggin@gmail.com>
+R: Frédéric Barrat <fbarrat@linux.ibm.com>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: docs/system/ppc/powernv.rst
+F: hw/ppc/pnv*
+F: hw/intc/pnv*
+F: hw/intc/xics_pnv.c
+F: hw/pci-host/pnv*
+F: include/hw/ppc/pnv*
+F: include/hw/pci-host/pnv*
+F: pc-bios/skiboot.lid
+F: tests/qtest/pnv*
+
+virtex_ml507
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/ppc/virtex_ml507.c
+F: tests/avocado/ppc_virtex_ml507.py
+
+sam460ex
+M: BALATON Zoltan <balaton@eik.bme.hu>
+L: qemu-ppc@nongnu.org
+S: Maintained
+F: hw/ppc/sam460ex.c
+F: hw/ppc/ppc440_uc.c
+F: hw/ppc/ppc440_pcix.c
+F: hw/display/sm501*
+F: hw/ide/sii3112.c
+F: hw/rtc/m41t80.c
+F: pc-bios/canyonlands.dt[sb]
+F: pc-bios/u-boot-sam460ex-20100605.bin
+F: roms/u-boot-sam460ex
+
+pegasos2
+M: BALATON Zoltan <balaton@eik.bme.hu>
+L: qemu-ppc@nongnu.org
+S: Maintained
+F: hw/ppc/pegasos2.c
+F: hw/pci-host/mv64361.c
+F: hw/pci-host/mv643xx.h
+F: include/hw/pci-host/mv64361.h
+
+amigaone
+M: BALATON Zoltan <balaton@eik.bme.hu>
+L: qemu-ppc@nongnu.org
+S: Maintained
+F: hw/ppc/amigaone.c
+F: hw/pci-host/articia.c
+F: include/hw/pci-host/articia.h
+
+Virtual Open Firmware (VOF)
+M: Alexey Kardashevskiy <aik@ozlabs.ru>
+R: David Gibson <david@gibson.dropbear.id.au>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/ppc/spapr_vof*
+F: hw/ppc/vof*
+F: include/hw/ppc/vof*
+F: pc-bios/vof/*
+F: pc-bios/vof*
+
+RISC-V Machines
+---------------
+OpenTitan
+M: Alistair Francis <Alistair.Francis@wdc.com>
+L: qemu-riscv@nongnu.org
+S: Supported
+F: hw/riscv/opentitan.c
+F: hw/*/ibex_*.c
+F: include/hw/riscv/opentitan.h
+F: include/hw/*/ibex_*.h
+
+Microchip PolarFire SoC Icicle Kit
+M: Bin Meng <bin.meng@windriver.com>
+L: qemu-riscv@nongnu.org
+S: Supported
+F: docs/system/riscv/microchip-icicle-kit.rst
+F: hw/riscv/microchip_pfsoc.c
+F: hw/char/mchp_pfsoc_mmuart.c
+F: hw/misc/mchp_pfsoc_dmc.c
+F: hw/misc/mchp_pfsoc_ioscb.c
+F: hw/misc/mchp_pfsoc_sysreg.c
+F: include/hw/riscv/microchip_pfsoc.h
+F: include/hw/char/mchp_pfsoc_mmuart.h
+F: include/hw/misc/mchp_pfsoc_dmc.h
+F: include/hw/misc/mchp_pfsoc_ioscb.h
+F: include/hw/misc/mchp_pfsoc_sysreg.h
+
+Shakti C class SoC
+M: Vijai Kumar K <vijai@behindbytes.com>
+L: qemu-riscv@nongnu.org
+S: Supported
+F: docs/system/riscv/shakti-c.rst
+F: hw/riscv/shakti_c.c
+F: hw/char/shakti_uart.c
+F: include/hw/riscv/shakti_c.h
+F: include/hw/char/shakti_uart.h
+
+SiFive Machines
+M: Alistair Francis <Alistair.Francis@wdc.com>
+M: Bin Meng <bin.meng@windriver.com>
+M: Palmer Dabbelt <palmer@dabbelt.com>
+L: qemu-riscv@nongnu.org
+S: Supported
+F: docs/system/riscv/sifive_u.rst
+F: hw/*/*sifive*.c
+F: include/hw/*/*sifive*.h
+
+RX Machines
+-----------
+rx-gdbsim
+R: Yoshinori Sato <ysato@users.sourceforge.jp>
+S: Orphan
+F: docs/system/target-rx.rst
+F: hw/rx/rx-gdbsim.c
+F: tests/avocado/machine_rx_gdbsim.py
+
+SH4 Machines
+------------
+R2D
+R: Yoshinori Sato <ysato@users.sourceforge.jp>
+R: Magnus Damm <magnus.damm@gmail.com>
+S: Odd Fixes
+F: hw/char/sh_serial.c
+F: hw/sh4/r2d.c
+F: hw/intc/sh_intc.c
+F: hw/pci-host/sh_pci.c
+F: hw/timer/sh_timer.c
+F: include/hw/sh4/sh_intc.h
+F: include/hw/timer/tmu012.h
+
+Shix
+R: Yoshinori Sato <ysato@users.sourceforge.jp>
+R: Magnus Damm <magnus.damm@gmail.com>
+S: Odd Fixes
+F: hw/block/tc58128.c
+F: hw/char/sh_serial.c
+F: hw/sh4/shix.c
+F: hw/intc/sh_intc.c
+F: hw/timer/sh_timer.c
+F: include/hw/sh4/sh_intc.h
+
+SPARC Machines
+--------------
+Sun4m
+M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
+S: Maintained
+F: hw/sparc/sun4m.c
+F: hw/sparc/sun4m_iommu.c
+F: hw/display/cg3.c
+F: hw/display/tcx.c
+F: hw/dma/sparc32_dma.c
+F: hw/misc/eccmemctl.c
+F: hw/*/slavio_*.c
+F: include/hw/nvram/sun_nvram.h
+F: include/hw/sparc/sparc32_dma.h
+F: include/hw/sparc/sun4m_iommu.h
+F: pc-bios/openbios-sparc32
+
+Sun4u
+M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
+S: Maintained
+F: hw/sparc64/sun4u.c
+F: hw/sparc64/sun4u_iommu.c
+F: include/hw/sparc/sun4u_iommu.h
+F: hw/pci-host/sabre.c
+F: include/hw/pci-host/sabre.h
+F: hw/pci-bridge/simba.c
+F: include/hw/pci-bridge/simba.h
+F: pc-bios/openbios-sparc64
+F: tests/avocado/machine_sparc64_sun4u.py
+
+Sun4v
+M: Artyom Tarasenko <atar4qemu@gmail.com>
+S: Maintained
+F: hw/sparc64/niagara.c
+F: hw/rtc/sun4v-rtc.c
+F: include/hw/rtc/sun4v-rtc.h
+
+Leon3
+M: Fabien Chouteau <chouteau@adacore.com>
+M: Frederic Konrad <konrad.frederic@yahoo.fr>
+S: Maintained
+F: hw/sparc/leon3.c
+F: hw/*/grlib*
+F: include/hw/*/grlib*
+F: tests/avocado/machine_sparc_leon3.py
+
+S390 Machines
+-------------
+S390 Virtio-ccw
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+M: Eric Farman <farman@linux.ibm.com>
+S: Supported
+F: hw/s390x/
+F: include/hw/s390x/
+F: configs/devices/s390x-softmmu/default.mak
+F: tests/avocado/machine_s390_ccw_virtio.py
+T: git https://github.com/borntraeger/qemu.git s390-next
+L: qemu-s390x@nongnu.org
+
+S390-ccw boot
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+M: Thomas Huth <thuth@redhat.com>
+S: Supported
+F: hw/s390x/ipl.*
+F: pc-bios/s390-ccw/
+F: pc-bios/s390-ccw.img
+F: docs/devel/s390-dasd-ipl.rst
+T: git https://github.com/borntraeger/qemu.git s390-next
+L: qemu-s390x@nongnu.org
+
+S390 PCI
+M: Matthew Rosato <mjrosato@linux.ibm.com>
+M: Eric Farman <farman@linux.ibm.com>
+S: Supported
+F: hw/s390x/s390-pci*
+F: include/hw/s390x/s390-pci*
+L: qemu-s390x@nongnu.org
+
+S390 channel subsystem
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+M: Eric Farman <farman@linux.ibm.com>
+S: Supported
+F: hw/s390x/ccw-device.[ch]
+F: hw/s390x/css.c
+F: hw/s390x/css-bridge.c
+F: include/hw/s390x/css.h
+F: include/hw/s390x/css-bridge.h
+F: include/hw/s390x/ioinst.h
+F: target/s390x/ioinst.c
+L: qemu-s390x@nongnu.org
+
+S390 CPU models
+M: David Hildenbrand <david@redhat.com>
+S: Maintained
+F: target/s390x/cpu_features*.[ch]
+F: target/s390x/cpu_models.[ch]
+L: qemu-s390x@nongnu.org
+
+S390 SCLP-backed devices
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Supported
+F: include/hw/s390x/event-facility.h
+F: include/hw/s390x/sclp.h
+F: hw/char/sclp*.[hc]
+F: hw/s390x/event-facility.c
+F: hw/s390x/sclp*.c
+L: qemu-s390x@nongnu.org
+
+S390 CPU topology
+M: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
+S: Supported
+F: include/hw/s390x/cpu-topology.h
+F: hw/s390x/cpu-topology.c
+F: target/s390x/kvm/stsi-topology.c
+F: docs/devel/s390-cpu-topology.rst
+F: docs/system/s390x/cpu-topology.rst
+F: tests/avocado/s390_topology.py
+
+X86 Machines
+------------
+PC
+M: Michael S. Tsirkin <mst@redhat.com>
+M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
+S: Supported
+F: include/hw/i386/
+F: hw/i386/
+F: hw/pci-host/i440fx.c
+F: hw/pci-host/q35.c
+F: hw/pci-host/pam.c
+F: include/hw/pci-host/i440fx.h
+F: include/hw/pci-host/q35.h
+F: include/hw/pci-host/pam.h
+F: hw/isa/piix.c
+F: hw/isa/lpc_ich9.c
+F: hw/i2c/smbus_ich9.c
+F: hw/acpi/piix4.c
+F: hw/acpi/ich9*.c
+F: include/hw/acpi/ich9*.h
+F: include/hw/southbridge/ich9.h
+F: include/hw/southbridge/piix.h
+F: hw/isa/apm.c
+F: include/hw/isa/apm.h
+F: tests/unit/test-x86-topo.c
+F: tests/qtest/test-x86-cpuid-compat.c
+
+PC Chipset
+M: Michael S. Tsirkin <mst@redhat.com>
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Supported
+F: hw/char/debugcon.c
+F: hw/char/parallel*
+F: hw/char/serial*
+F: hw/dma/i8257*
+F: hw/i2c/pm_smbus.c
+F: hw/input/pckbd.c
+F: hw/intc/apic*
+F: hw/intc/ioapic*
+F: hw/intc/i8259*
+F: hw/isa/isa-superio.c
+F: hw/misc/debugexit.c
+F: hw/misc/pc-testdev.c
+F: hw/timer/hpet*
+F: hw/timer/i8254*
+F: hw/rtc/mc146818rtc*
+F: hw/watchdog/wdt_ib700.c
+F: hw/watchdog/wdt_i6300esb.c
+F: include/hw/display/vga.h
+F: include/hw/char/parallel*.h
+F: include/hw/dma/i8257.h
+F: include/hw/i2c/pm_smbus.h
+F: include/hw/input/i8042.h
+F: include/hw/intc/ioapic*
+F: include/hw/intc/i8259.h
+F: include/hw/isa/i8259_internal.h
+F: include/hw/isa/superio.h
+F: include/hw/timer/hpet.h
+F: include/hw/timer/i8254*
+F: include/hw/rtc/mc146818rtc*
+
+microvm
+M: Sergio Lopez <slp@redhat.com>
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: docs/system/i386/microvm.rst
+F: hw/i386/microvm.c
+F: include/hw/i386/microvm.h
+F: pc-bios/bios-microvm.bin
+
+Machine core
+M: Eduardo Habkost <eduardo@habkost.net>
+M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Yanan Wang <wangyanan55@huawei.com>
+S: Supported
+F: hw/core/cpu.c
+F: hw/core/machine-qmp-cmds.c
+F: hw/core/machine.c
+F: hw/core/machine-smp.c
+F: hw/core/null-machine.c
+F: hw/core/numa.c
+F: hw/cpu/cluster.c
+F: qapi/machine.json
+F: qapi/machine-common.json
+F: qapi/machine-target.json
+F: include/hw/boards.h
+F: include/hw/core/cpu.h
+F: include/hw/cpu/cluster.h
+F: include/sysemu/numa.h
+F: tests/unit/test-smp-parse.c
+T: git https://gitlab.com/ehabkost/qemu.git machine-next
+
+Xtensa Machines
+---------------
+sim
+M: Max Filippov <jcmvbkbc@gmail.com>
+S: Maintained
+F: hw/xtensa/sim.c
+
+virt
+M: Max Filippov <jcmvbkbc@gmail.com>
+S: Maintained
+F: hw/xtensa/virt.c
+
+XTFPGA (LX60, LX200, ML605, KC705)
+M: Max Filippov <jcmvbkbc@gmail.com>
+S: Maintained
+F: hw/xtensa/xtfpga.c
+F: hw/net/opencores_eth.c
+F: include/hw/xtensa/mx_pic.h
+
+Devices
+-------
+Overall Audio frontends
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: hw/audio/
+F: include/hw/audio/
+F: tests/qtest/ac97-test.c
+F: tests/qtest/es1370-test.c
+F: tests/qtest/intel-hda-test.c
+F: tests/qtest/fuzz-sb16-test.c
+
+Xilinx CAN
+M: Vikram Garhwal <vikram.garhwal@amd.com>
+M: Francisco Iglesias <francisco.iglesias@amd.com>
+S: Maintained
+F: hw/net/can/xlnx-*
+F: include/hw/net/xlnx-*
+F: tests/qtest/xlnx-can*-test*
+
+EDU
+M: Jiri Slaby <jslaby@suse.cz>
+S: Maintained
+F: hw/misc/edu.c
+F: docs/specs/edu.rst
+
+IDE
+M: John Snow <jsnow@redhat.com>
+L: qemu-block@nongnu.org
+S: Odd Fixes
+F: include/hw/ide.h
+F: include/hw/ide/
+F: hw/ide/
+F: hw/block/block.c
+F: hw/block/cdrom.c
+F: hw/block/hd-geometry.c
+F: tests/qtest/ide-test.c
+F: tests/qtest/ahci-test.c
+F: tests/qtest/cdrom-test.c
+F: tests/qtest/libqos/ahci*
+T: git https://gitlab.com/jsnow/qemu.git ide
+
+IPMI
+M: Corey Minyard <minyard@acm.org>
+S: Maintained
+F: include/hw/ipmi/*
+F: hw/ipmi/*
+F: hw/smbios/smbios_type_38.c
+F: tests/qtest/ipmi*
+T: git https://github.com/cminyard/qemu.git master-ipmi-rebase
+
+Floppy
+M: John Snow <jsnow@redhat.com>
+L: qemu-block@nongnu.org
+S: Odd Fixes
+F: hw/block/fdc.c
+F: hw/block/fdc-internal.h
+F: hw/block/fdc-isa.c
+F: hw/block/fdc-sysbus.c
+F: include/hw/block/fdc.h
+F: tests/qtest/fdc-test.c
+T: git https://gitlab.com/jsnow/qemu.git ide
+
+Hyper-V VMBus
+M: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+S: Odd Fixes
+F: hw/hyperv/vmbus.c
+F: include/hw/hyperv/vmbus*.h
+
+OMAP
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/*/omap*
+F: include/hw/arm/omap.h
+F: docs/system/arm/sx1.rst
+
+IPack
+M: Alberto Garcia <berto@igalia.com>
+S: Odd Fixes
+F: hw/char/ipoctal232.c
+F: hw/ipack/
+
+PCI
+M: Michael S. Tsirkin <mst@redhat.com>
+M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
+S: Supported
+F: include/hw/pci/*
+F: hw/misc/pci-testdev.c
+F: hw/pci/*
+F: hw/pci-bridge/*
+F: qapi/pci.json
+F: docs/pci*
+F: docs/specs/*pci*
+
+PCIE DOE
+M: Huai-Cheng Kuo <hchkuo@avery-design.com.tw>
+M: Chris Browy <cbrowy@avery-design.com>
+S: Supported
+F: include/hw/pci/pcie_doe.h
+F: hw/pci/pcie_doe.c
+
+ACPI/SMBIOS
+M: Michael S. Tsirkin <mst@redhat.com>
+M: Igor Mammedov <imammedo@redhat.com>
+R: Ani Sinha <anisinha@redhat.com>
+S: Supported
+F: include/hw/acpi/*
+F: include/hw/firmware/smbios.h
+F: hw/acpi/*
+F: hw/smbios/*
+F: hw/i386/acpi-build.[hc]
+F: hw/arm/virt-acpi-build.c
+F: qapi/acpi.json
+F: tests/qtest/bios-tables-test*
+F: tests/qtest/acpi-utils.[hc]
+F: tests/data/acpi/
+F: docs/specs/acpi_cpu_hotplug.rst
+F: docs/specs/acpi_mem_hotplug.rst
+F: docs/specs/acpi_nvdimm.rst
+F: docs/specs/acpi_pci_hotplug.rst
+F: docs/specs/acpi_hw_reduced_hotplug.rst
+
+ARM ACPI Subsystem
+M: Shannon Zhao <shannon.zhaosl@gmail.com>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/virt-acpi-build.c
+
+RISC-V ACPI Subsystem
+M: Sunil V L <sunilvl@ventanamicro.com>
+L: qemu-riscv@nongnu.org
+S: Maintained
+F: hw/riscv/virt-acpi-build.c
+
+ACPI/VIOT
+M: Jean-Philippe Brucker <jean-philippe@linaro.org>
+S: Supported
+F: hw/acpi/viot.c
+F: hw/acpi/viot.h
+
+ACPI/AVOCADO/BIOSBITS
+M: Ani Sinha <anisinha@redhat.com>
+M: Michael S. Tsirkin <mst@redhat.com>
+S: Supported
+F: tests/avocado/acpi-bits/*
+F: tests/avocado/acpi-bits.py
+F: docs/devel/acpi-bits.rst
+
+ACPI/HEST/GHES
+R: Dongjiu Geng <gengdongjiu1@gmail.com>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/acpi/ghes.c
+F: include/hw/acpi/ghes.h
+F: docs/specs/acpi_hest_ghes.rst
+
+ppc4xx
+L: qemu-ppc@nongnu.org
+S: Orphan
+F: hw/ppc/ppc4xx*.c
+F: hw/ppc/ppc440_uc.c
+F: hw/ppc/ppc440.h
+F: hw/i2c/ppc4xx_i2c.c
+F: include/hw/ppc/ppc4xx.h
+F: include/hw/i2c/ppc4xx_i2c.h
+F: hw/intc/ppc-uic.c
+F: include/hw/intc/ppc-uic.h
+
+Character devices
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
+R: Paolo Bonzini <pbonzini@redhat.com>
+S: Odd Fixes
+F: hw/char/
+F: include/hw/char/
+
+Network devices
+M: Jason Wang <jasowang@redhat.com>
+S: Odd Fixes
+F: hw/net/
+F: include/hw/net/
+F: tests/qtest/virtio-net-test.c
+F: docs/virtio-net-failover.rst
+T: git https://github.com/jasowang/qemu.git net
+
+Parallel NOR Flash devices
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+T: git https://gitlab.com/philmd/qemu.git pflash-next
+S: Maintained
+F: hw/block/pflash_cfi*.c
+F: include/hw/block/flash.h
+
+SCSI
+M: Paolo Bonzini <pbonzini@redhat.com>
+R: Fam Zheng <fam@euphon.net>
+S: Supported
+F: include/hw/scsi/*
+F: hw/scsi/*
+F: tests/qtest/virtio-scsi-test.c
+F: tests/qtest/fuzz-virtio-scsi-test.c
+F: tests/qtest/am53c974-test.c
+F: tests/qtest/fuzz-lsi53c895a-test.c
+T: git https://github.com/bonzini/qemu.git scsi-next
+
+SSI
+M: Alistair Francis <alistair@alistair23.me>
+S: Maintained
+F: hw/ssi/*
+F: hw/block/m25p80*
+F: include/hw/ssi/ssi.h
+X: hw/ssi/xilinx_*
+F: tests/qtest/m25p80-test.c
+
+Xilinx SPI
+M: Alistair Francis <alistair@alistair23.me>
+S: Maintained
+F: hw/ssi/xilinx_*
+
+SD (Secure Card)
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+M: Bin Meng <bin.meng@windriver.com>
+L: qemu-block@nongnu.org
+S: Odd Fixes
+F: include/hw/sd/sd*
+F: hw/sd/core.c
+F: hw/sd/sd*
+F: hw/sd/ssi-sd.c
+F: tests/qtest/fuzz-sdcard-test.c
+F: tests/qtest/sdhci-test.c
+
+USB
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: hw/usb/*
+F: stubs/usb-dev-stub.c
+F: tests/qtest/usb-*-test.c
+F: docs/system/devices/usb.rst
+F: include/hw/usb.h
+F: include/hw/usb/
+
+USB (serial adapter)
+R: Gerd Hoffmann <kraxel@redhat.com>
+M: Samuel Thibault <samuel.thibault@ens-lyon.org>
+S: Maintained
+F: hw/usb/dev-serial.c
+
+VFIO
+M: Alex Williamson <alex.williamson@redhat.com>
+M: Cédric Le Goater <clg@redhat.com>
+S: Supported
+F: hw/vfio/*
+F: include/hw/vfio/
+F: docs/igd-assign.txt
+F: docs/devel/vfio-migration.rst
+
+vfio-ccw
+M: Eric Farman <farman@linux.ibm.com>
+M: Matthew Rosato <mjrosato@linux.ibm.com>
+S: Supported
+F: hw/vfio/ccw.c
+F: hw/s390x/s390-ccw.c
+F: include/hw/s390x/s390-ccw.h
+F: include/hw/s390x/vfio-ccw.h
+L: qemu-s390x@nongnu.org
+
+vfio-ap
+M: Tony Krowiak <akrowiak@linux.ibm.com>
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Jason Herne <jjherne@linux.ibm.com>
+S: Supported
+F: hw/s390x/ap-device.c
+F: hw/s390x/ap-bridge.c
+F: include/hw/s390x/ap-device.h
+F: include/hw/s390x/ap-bridge.h
+F: hw/vfio/ap.c
+F: docs/system/s390x/vfio-ap.rst
+L: qemu-s390x@nongnu.org
+
+vhost
+M: Michael S. Tsirkin <mst@redhat.com>
+S: Supported
+F: hw/*/*vhost*
+F: docs/interop/vhost-user.json
+F: docs/interop/vhost-user.rst
+F: contrib/vhost-user-*/
+F: backends/vhost-user.c
+F: include/sysemu/vhost-user-backend.h
+F: subprojects/libvhost-user/
+
+vhost-shadow-virtqueue
+R: Eugenio Pérez <eperezma@redhat.com>
+F: hw/virtio/vhost-shadow-virtqueue.*
+
+virtio
+M: Michael S. Tsirkin <mst@redhat.com>
+S: Supported
+F: hw/*/virtio*
+F: hw/virtio/Makefile.objs
+F: hw/virtio/trace-events
+F: qapi/virtio.json
+F: net/vhost-user.c
+F: include/hw/virtio/
+F: docs/devel/virtio*
+
+virtio-balloon
+M: Michael S. Tsirkin <mst@redhat.com>
+M: David Hildenbrand <david@redhat.com>
+S: Maintained
+F: docs/interop/virtio-balloon-stats.rst
+F: hw/virtio/virtio-balloon*.c
+F: include/hw/virtio/virtio-balloon.h
+F: system/balloon.c
+F: include/sysemu/balloon.h
+
+virtio-9p
+M: Greg Kurz <groug@kaod.org>
+M: Christian Schoenebeck <qemu_oss@crudebyte.com>
+S: Maintained
+W: https://wiki.qemu.org/Documentation/9p
+F: hw/9pfs/
+X: hw/9pfs/xen-9p*
+X: hw/9pfs/9p-proxy*
+F: fsdev/
+X: fsdev/virtfs-proxy-helper.c
+F: tests/qtest/virtio-9p-test.c
+F: tests/qtest/libqos/virtio-9p*
+T: git https://gitlab.com/gkurz/qemu.git 9p-next
+T: git https://github.com/cschoenebeck/qemu.git 9p.next
+
+virtio-9p-proxy
+F: hw/9pfs/9p-proxy*
+F: fsdev/virtfs-proxy-helper.c
+F: docs/tools/virtfs-proxy-helper.rst
+S: Obsolete
+
+virtio-blk
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: hw/block/virtio-blk-common.c
+F: hw/block/virtio-blk.c
+F: hw/block/dataplane/*
+F: include/hw/virtio/virtio-blk-common.h
+F: tests/qtest/virtio-blk-test.c
+T: git https://github.com/stefanha/qemu.git block
+
+virtio-ccw
+M: Cornelia Huck <cohuck@redhat.com>
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Eric Farman <farman@linux.ibm.com>
+S: Supported
+F: hw/s390x/virtio-ccw*.[hc]
+F: hw/s390x/vhost-*-ccw.c
+T: git https://gitlab.com/cohuck/qemu.git s390-next
+T: git https://github.com/borntraeger/qemu.git s390-next
+L: qemu-s390x@nongnu.org
+
+virtio-dmabuf
+M: Albert Esteve <aesteve@redhat.com>
+S: Supported
+F: hw/display/virtio-dmabuf.c
+F: include/hw/virtio/virtio-dmabuf.h
+F: tests/unit/test-virtio-dmabuf.c
+
+virtiofs
+M: Stefan Hajnoczi <stefanha@redhat.com>
+S: Supported
+F: hw/virtio/vhost-user-fs*
+F: include/hw/virtio/vhost-user-fs.h
+L: virtio-fs@lists.linux.dev
+
+virtio-input
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: hw/input/vhost-user-input.c
+F: hw/input/virtio-input*.c
+F: include/hw/virtio/virtio-input.h
+F: contrib/vhost-user-input/*
+
+virtio-iommu
+M: Eric Auger <eric.auger@redhat.com>
+S: Maintained
+F: hw/virtio/virtio-iommu*.c
+F: include/hw/virtio/virtio-iommu.h
+
+virtio-serial
+M: Laurent Vivier <lvivier@redhat.com>
+R: Amit Shah <amit@kernel.org>
+S: Supported
+F: hw/char/virtio-serial-bus.c
+F: hw/char/virtio-console.c
+F: include/hw/virtio/virtio-serial.h
+F: tests/qtest/virtio-serial-test.c
+
+virtio-rng
+M: Laurent Vivier <lvivier@redhat.com>
+R: Amit Shah <amit@kernel.org>
+S: Supported
+F: hw/virtio/virtio-rng.c
+F: include/hw/virtio/virtio-rng.h
+F: include/sysemu/rng*.h
+F: backends/rng*.c
+F: tests/qtest/virtio-rng-test.c
+
+vhost-user-rng
+M: Mathieu Poirier <mathieu.poirier@linaro.org>
+S: Supported
+F: docs/system/devices/vhost-user-rng.rst
+F: hw/virtio/vhost-user-rng.c
+F: hw/virtio/vhost-user-rng-pci.c
+F: include/hw/virtio/vhost-user-rng.h
+F: tools/vhost-user-rng/*
+
+vhost-user-gpio
+M: Alex Bennée <alex.bennee@linaro.org>
+R: Viresh Kumar <viresh.kumar@linaro.org>
+S: Maintained
+F: hw/virtio/vhost-user-gpio*
+F: include/hw/virtio/vhost-user-gpio.h
+F: tests/qtest/libqos/virtio-gpio.*
+
+vhost-user-scmi
+R: mzamazal@redhat.com
+S: Supported
+F: hw/virtio/vhost-user-scmi*
+F: include/hw/virtio/vhost-user-scmi.h
+F: tests/qtest/libqos/virtio-scmi.*
+
+virtio-crypto
+M: Gonglei <arei.gonglei@huawei.com>
+S: Supported
+F: hw/virtio/virtio-crypto.c
+F: hw/virtio/virtio-crypto-pci.c
+F: include/hw/virtio/virtio-crypto.h
+
+virtio based memory device
+M: David Hildenbrand <david@redhat.com>
+S: Supported
+F: hw/virtio/virtio-md-pci.c
+F: include/hw/virtio/virtio-md-pci.h
+F: stubs/virtio-md-pci.c
+
+virtio-mem
+M: David Hildenbrand <david@redhat.com>
+S: Supported
+W: https://virtio-mem.gitlab.io/
+F: hw/virtio/virtio-mem.c
+F: hw/virtio/virtio-mem-pci.h
+F: hw/virtio/virtio-mem-pci.c
+F: include/hw/virtio/virtio-mem.h
+
+virtio-snd
+M: Gerd Hoffmann <kraxel@redhat.com>
+R: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
+S: Supported
+F: hw/audio/virtio-snd.c
+F: hw/audio/virtio-snd-pci.c
+F: include/hw/audio/virtio-snd.h
+F: docs/system/devices/virtio-snd.rst
+
+nvme
+M: Keith Busch <kbusch@kernel.org>
+M: Klaus Jensen <its@irrelevant.dk>
+L: qemu-block@nongnu.org
+S: Supported
+F: hw/nvme/*
+F: include/block/nvme.h
+F: tests/qtest/nvme-test.c
+F: docs/system/devices/nvme.rst
+T: git git://git.infradead.org/qemu-nvme.git nvme-next
+
+ufs
+M: Jeuk Kim <jeuk20.kim@samsung.com>
+S: Supported
+F: hw/ufs/*
+F: include/block/ufs.h
+F: tests/qtest/ufs-test.c
+
+megasas
+M: Hannes Reinecke <hare@suse.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: hw/scsi/megasas.c
+F: hw/scsi/mfi.h
+F: tests/qtest/megasas-test.c
+F: tests/qtest/fuzz-megasas-test.c
+
+Network packet abstractions
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
+R: Akihiko Odaki <akihiko.odaki@daynix.com>
+S: Maintained
+F: include/net/eth.h
+F: net/eth.c
+F: hw/net/net_rx_pkt*
+F: hw/net/net_tx_pkt*
+
+Vmware
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
+S: Maintained
+F: hw/net/vmxnet*
+F: hw/scsi/vmw_pvscsi*
+F: tests/qtest/vmxnet3-test.c
+F: docs/specs/vwm_pvscsi-spec.rst
+
+Rocker
+M: Jiri Pirko <jiri@resnulli.us>
+S: Maintained
+F: hw/net/rocker/
+F: qapi/rocker.json
+F: tests/rocker/
+F: docs/specs/rocker.txt
+
+e1000x
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
+R: Akihiko Odaki <akihiko.odaki@daynix.com>
+S: Maintained
+F: hw/net/e1000x*
+
+e1000e
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
+R: Akihiko Odaki <akihiko.odaki@daynix.com>
+S: Maintained
+F: hw/net/e1000e*
+F: tests/qtest/fuzz-e1000e-test.c
+F: tests/qtest/e1000e-test.c
+F: tests/qtest/libqos/e1000e.*
+
+igb
+M: Akihiko Odaki <akihiko.odaki@daynix.com>
+R: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+S: Maintained
+F: docs/system/devices/igb.rst
+F: hw/net/igb*
+F: tests/avocado/netdev-ethtool.py
+F: tests/qtest/igb-test.c
+F: tests/qtest/libqos/igb.c
+
+eepro100
+M: Stefan Weil <sw@weilnetz.de>
+S: Maintained
+F: hw/net/eepro100.c
+
+tulip
+M: Sven Schnelle <svens@stackframe.org>
+S: Maintained
+F: hw/net/tulip.c
+F: hw/net/tulip.h
+
+pca954x
+M: Patrick Venture <venture@google.com>
+S: Maintained
+F: hw/i2c/i2c_mux_pca954x.c
+F: include/hw/i2c/i2c_mux_pca954x.h
+
+Generic Loader
+M: Alistair Francis <alistair@alistair23.me>
+S: Maintained
+F: hw/core/generic-loader.c
+F: hw/core/uboot_image.h
+F: include/hw/core/generic-loader.h
+F: docs/system/generic-loader.rst
+
+Guest Loader
+M: Alex Bennée <alex.bennee@linaro.org>
+S: Maintained
+F: hw/core/guest-loader.c
+F: docs/system/guest-loader.rst
+F: tests/avocado/boot_xen.py
+
+Intel Hexadecimal Object File Loader
+M: Su Hang <suhang16@mails.ucas.ac.cn>
+S: Maintained
+F: tests/qtest/hexloader-test.c
+F: tests/data/hex-loader/test.hex
+
+CHRP NVRAM
+M: Thomas Huth <thuth@redhat.com>
+S: Maintained
+F: hw/nvram/chrp_nvram.c
+F: include/hw/nvram/chrp_nvram.h
+F: tests/qtest/prom-env-test.c
+
+VM Generation ID
+S: Orphan
+R: Ani Sinha <ani@anisinha.ca>
+F: hw/acpi/vmgenid.c
+F: include/hw/acpi/vmgenid.h
+F: docs/specs/vmgenid.rst
+F: tests/qtest/vmgenid-test.c
+
+LED
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Maintained
+F: include/hw/misc/led.h
+F: hw/misc/led.c
+
+Unimplemented device
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Ani Sinha <ani@anisinha.ca>
+S: Maintained
+F: include/hw/misc/unimp.h
+F: hw/misc/unimp.c
+
+Empty slot
+M: Artyom Tarasenko <atar4qemu@gmail.com>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Ani Sinha <ani@anisinha.ca>
+S: Maintained
+F: include/hw/misc/empty_slot.h
+F: hw/misc/empty_slot.c
+
+Standard VGA
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Maintained
+F: hw/display/vga*
+F: hw/display/bochs-display.c
+F: include/hw/display/vga.h
+F: include/hw/display/bochs-vbe.h
+F: docs/specs/standard-vga.rst
+
+ramfb
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Maintained
+F: hw/display/ramfb*.c
+F: include/hw/display/ramfb.h
+
+virtio-gpu
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: hw/display/virtio-gpu*
+F: hw/display/virtio-vga.*
+F: include/hw/virtio/virtio-gpu.h
+F: docs/system/devices/virtio-gpu.rst
+
+vhost-user-blk
+M: Raphael Norwitz <raphael.norwitz@nutanix.com>
+S: Maintained
+F: contrib/vhost-user-blk/
+F: contrib/vhost-user-scsi/
+F: hw/block/vhost-user-blk.c
+F: hw/block/virtio-blk-common.c
+F: hw/scsi/vhost-user-scsi.c
+F: hw/virtio/vhost-user-blk-pci.c
+F: hw/virtio/vhost-user-scsi-pci.c
+F: include/hw/virtio/vhost-user-blk.h
+F: include/hw/virtio/vhost-user-scsi.h
+F: include/hw/virtio/virtio-blk-common.h
+
+vhost-user-gpu
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
+R: Gerd Hoffmann <kraxel@redhat.com>
+S: Maintained
+F: docs/interop/vhost-user-gpu.rst
+F: contrib/vhost-user-gpu
+F: hw/display/vhost-user-*
+
+Cirrus VGA
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+W: https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/
+F: hw/display/cirrus*
+
+EDID Generator
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Maintained
+F: hw/display/edid*
+F: include/hw/display/edid.h
+F: qemu-edid.c
+
+PIIX4 South Bridge (i82371AB)
+M: Hervé Poussineau <hpoussin@reactos.org>
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Maintained
+F: hw/isa/piix.c
+F: include/hw/southbridge/piix.h
+
+VIA South Bridges (VT82C686B, VT8231)
+M: BALATON Zoltan <balaton@eik.bme.hu>
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Jiaxun Yang <jiaxun.yang@flygoat.com>
+S: Maintained
+F: hw/isa/vt82c686.c
+F: hw/usb/vt82c686-uhci-pci.c
+F: include/hw/isa/vt82c686.h
+
+Firmware configuration (fw_cfg)
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Gerd Hoffmann <kraxel@redhat.com>
+S: Supported
+F: docs/specs/fw_cfg.txt
+F: hw/nvram/fw_cfg*.c
+F: stubs/fw_cfg.c
+F: include/hw/nvram/fw_cfg.h
+F: include/standard-headers/linux/qemu_fw_cfg.h
+F: tests/qtest/libqos/fw_cfg.c
+F: tests/qtest/fw_cfg-test.c
+T: git https://github.com/philmd/qemu.git fw_cfg-next
+
+XIVE
+M: Cédric Le Goater <clg@kaod.org>
+R: Frédéric Barrat <fbarrat@linux.ibm.com>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/*/*xive*
+F: include/hw/*/*xive*
+F: docs/*/*xive*
+
+Renesas peripherals
+R: Yoshinori Sato <ysato@users.sourceforge.jp>
+R: Magnus Damm <magnus.damm@gmail.com>
+S: Odd Fixes
+F: hw/char/renesas_sci.c
+F: hw/char/sh_serial.c
+F: hw/timer/renesas_*.c
+F: hw/timer/sh_timer.c
+F: include/hw/char/renesas_sci.h
+F: include/hw/sh4/sh.h
+F: include/hw/timer/renesas_*.h
+
+Renesas RX peripherals
+R: Yoshinori Sato <ysato@users.sourceforge.jp>
+S: Orphan
+F: hw/intc/rx_icu.c
+F: hw/rx/
+F: include/hw/intc/rx_icu.h
+F: include/hw/rx/
+
+CAN bus subsystem and hardware
+M: Pavel Pisa <pisa@cmp.felk.cvut.cz>
+M: Vikram Garhwal <fnu.vikram@xilinx.com>
+S: Maintained
+W: https://canbus.pages.fel.cvut.cz/
+F: net/can/*
+F: hw/net/can/*
+F: include/net/can_*.h
+F: docs/system/devices/can.rst
+
+OpenPIC interrupt controller
+M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
+S: Odd Fixes
+F: hw/intc/openpic.c
+F: include/hw/ppc/openpic.h
+
+MIPS CPS
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Odd Fixes
+F: hw/misc/mips_*
+F: include/hw/misc/mips_*
+
+MIPS GIC
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Odd Fixes
+F: hw/intc/mips_gic.c
+F: hw/timer/mips_gictimer.c
+F: include/hw/intc/mips_gic.h
+F: include/hw/timer/mips_gictimer.h
+
+S390 3270 device
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Odd fixes
+F: include/hw/s390x/3270-ccw.h
+F: hw/char/terminal3270.c
+F: hw/s390x/3270-ccw.c
+L: qemu-s390x@nongnu.org
+
+S390 diag 288 watchdog
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Supported
+F: hw/watchdog/wdt_diag288.c
+F: include/hw/watchdog/wdt_diag288.h
+L: qemu-s390x@nongnu.org
+
+S390 storage key device
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Supported
+F: hw/s390x/storage-keys.h
+F: hw/s390x/s390-skeys*.c
+L: qemu-s390x@nongnu.org
+
+S390 storage attribute device
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Supported
+F: hw/s390x/storage-attributes.h
+F: hw/s390x/s390-stattrib*.c
+L: qemu-s390x@nongnu.org
+
+S390 floating interrupt controller
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+M: David Hildenbrand <david@redhat.com>
+S: Supported
+F: hw/intc/s390_flic*.c
+F: include/hw/s390x/s390_flic.h
+L: qemu-s390x@nongnu.org
+
+CanoKey
+M: Hongren (Zenithal) Zheng <i@zenithal.me>
+S: Maintained
+R: Canokeys.org <contact@canokeys.org>
+F: hw/usb/canokey.c
+F: hw/usb/canokey.h
+F: docs/system/devices/canokey.rst
+
+Hyper-V Dynamic Memory Protocol
+M: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+S: Supported
+F: hw/hyperv/hv-balloon*.c
+F: hw/hyperv/hv-balloon*.h
+F: include/hw/hyperv/dynmem-proto.h
+F: include/hw/hyperv/hv-balloon.h
+
+Subsystems
+----------
+Overall Audio backends
+M: Gerd Hoffmann <kraxel@redhat.com>
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
+S: Odd Fixes
+F: audio/
+X: audio/alsaaudio.c
+X: audio/coreaudio.c
+X: audio/dsound*
+X: audio/jackaudio.c
+X: audio/ossaudio.c
+X: audio/paaudio.c
+X: audio/sdlaudio.c
+X: audio/sndioaudio.c
+X: audio/spiceaudio.c
+F: qapi/audio.json
+
+ALSA Audio backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+R: Christian Schoenebeck <qemu_oss@crudebyte.com>
+S: Odd Fixes
+F: audio/alsaaudio.c
+
+Core Audio framework backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Christian Schoenebeck <qemu_oss@crudebyte.com>
+R: Akihiko Odaki <akihiko.odaki@daynix.com>
+S: Odd Fixes
+F: audio/coreaudio.c
+
+DSound Audio backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: audio/dsound*
+
+JACK Audio Connection Kit backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+R: Christian Schoenebeck <qemu_oss@crudebyte.com>
+S: Odd Fixes
+F: audio/jackaudio.c
+
+Open Sound System (OSS) Audio backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: audio/ossaudio.c
+
+PulseAudio backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: audio/paaudio.c
+
+SDL Audio backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+R: Thomas Huth <huth@tuxfamily.org>
+S: Odd Fixes
+F: audio/sdlaudio.c
+
+Sndio Audio backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+R: Alexandre Ratchov <alex@caoua.org>
+S: Odd Fixes
+F: audio/sndioaudio.c
+
+Block layer core
+M: Kevin Wolf <kwolf@redhat.com>
+M: Hanna Reitz <hreitz@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block*
+F: block/
+F: hw/block/
+F: qapi/block*.json
+F: qapi/transaction.json
+F: include/block/
+F: include/sysemu/block-*.h
+F: qemu-img*
+F: docs/tools/qemu-img.rst
+F: qemu-io*
+F: tests/qemu-iotests/
+F: util/qemu-progress.c
+F: qobject/block-qdict.c
+F: tests/unit/check-block-qdict.c
+T: git https://repo.or.cz/qemu/kevin.git block
+
+Storage daemon
+M: Kevin Wolf <kwolf@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: storage-daemon/
+F: docs/interop/qemu-storage-daemon-qmp-ref.rst
+F: docs/tools/qemu-storage-daemon.rst
+T: git https://repo.or.cz/qemu/kevin.git block
+
+Block I/O path
+M: Stefan Hajnoczi <stefanha@redhat.com>
+M: Fam Zheng <fam@euphon.net>
+L: qemu-block@nongnu.org
+S: Supported
+F: util/async.c
+F: util/aio-*.c
+F: util/aio-*.h
+F: util/defer-call.c
+F: util/fdmon-*.c
+F: block/io.c
+F: migration/block*
+F: include/block/aio.h
+F: include/block/aio-wait.h
+F: include/qemu/defer-call.h
+F: scripts/qemugdb/aio.py
+F: tests/unit/test-fdmon-epoll.c
+T: git https://github.com/stefanha/qemu.git block
+
+Block SCSI subsystem
+M: Paolo Bonzini <pbonzini@redhat.com>
+R: Fam Zheng <fam@euphon.net>
+L: qemu-block@nongnu.org
+S: Supported
+F: include/scsi/*
+F: scsi/*
+
+Block Jobs
+M: John Snow <jsnow@redhat.com>
+M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+L: qemu-block@nongnu.org
+S: Supported
+F: blockjob.c
+F: include/block/blockjob.h
+F: job.c
+F: job-qmp.c
+F: include/qemu/job.h
+F: block/backup.c
+F: block/commit.c
+F: block/stream.c
+F: block/mirror.c
+F: qapi/job.json
+F: block/block-copy.c
+F: include/block/block-copy.h
+F: block/reqlist.c
+F: include/block/reqlist.h
+F: block/copy-before-write.h
+F: block/copy-before-write.c
+F: block/snapshot-access.c
+F: include/block/aio_task.h
+F: block/aio_task.c
+F: util/qemu-co-shared-resource.c
+F: include/qemu/co-shared-resource.h
+T: git https://gitlab.com/jsnow/qemu.git jobs
+T: git https://gitlab.com/vsementsov/qemu.git block
+
+Compute Express Link
+M: Jonathan Cameron <jonathan.cameron@huawei.com>
+R: Fan Ni <fan.ni@samsung.com>
+S: Supported
+F: hw/cxl/
+F: hw/mem/cxl_type3.c
+F: include/hw/cxl/
+
+Dirty Bitmaps
+M: Eric Blake <eblake@redhat.com>
+M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+R: John Snow <jsnow@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: include/qemu/hbitmap.h
+F: include/block/dirty-bitmap.h
+F: block/monitor/bitmap-qmp-cmds.c
+F: block/dirty-bitmap.c
+F: block/qcow2-bitmap.c
+F: migration/block-dirty-bitmap.c
+F: util/hbitmap.c
+F: tests/unit/test-hbitmap.c
+F: docs/interop/bitmaps.rst
+T: git https://repo.or.cz/qemu/ericb.git bitmaps
+T: git https://gitlab.com/vsementsov/qemu.git block
+
+Character device backends
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
+R: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: chardev/
+F: include/chardev/
+F: qapi/char.json
+
+Character Devices (Braille)
+M: Samuel Thibault <samuel.thibault@ens-lyon.org>
+S: Maintained
+F: chardev/baum.c
+
+Command line option argument parsing
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: include/qemu/option.h
+F: tests/unit/test-keyval.c
+F: tests/unit/test-qemu-opts.c
+F: util/keyval.c
+F: util/qemu-option.c
+
+Coverity model
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: scripts/coverity-model.c
+
+Coverity Scan integration
+M: Peter Maydell <peter.maydell@linaro.org>
+S: Maintained
+F: scripts/coverity-scan/
+
+Device Tree
+M: Alistair Francis <alistair.francis@wdc.com>
+R: David Gibson <david@gibson.dropbear.id.au>
+S: Maintained
+F: system/device_tree.c
+F: include/sysemu/device_tree.h
+
+Dump
+S: Supported
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
+F: dump/
+F: hw/misc/vmcoreinfo.c
+F: include/hw/misc/vmcoreinfo.h
+F: include/qemu/win_dump_defs
+F: include/sysemu/dump-arch.h
+F: include/sysemu/dump.h
+F: qapi/dump.json
+F: scripts/dump-guest-memory.py
+F: stubs/dump.c
+F: docs/specs/vmcoreinfo.rst
+
+Error reporting
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: include/qapi/error.h
+F: include/qemu/error-report.h
+F: qapi/error.json
+F: util/error.c
+F: util/qemu-error.c
+F: scripts/coccinelle/err-bad-newline.cocci
+F: scripts/coccinelle/error-use-after-free.cocci
+F: scripts/coccinelle/error_propagate_null.cocci
+F: scripts/coccinelle/remove_local_err.cocci
+F: scripts/coccinelle/use-error_fatal.cocci
+F: scripts/coccinelle/errp-guard.cocci
+
+GDB stub
+M: Alex Bennée <alex.bennee@linaro.org>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Maintained
+F: docs/system/gdb.rst
+F: gdbstub/*
+F: include/exec/gdbstub.h
+F: include/gdbstub/*
+F: gdb-xml/
+F: tests/tcg/multiarch/gdbstub/*
+F: scripts/feature_to_c.py
+F: scripts/probe-gdb-support.py
+
+Memory API
+M: Paolo Bonzini <pbonzini@redhat.com>
+M: Peter Xu <peterx@redhat.com>
+M: David Hildenbrand <david@redhat.com>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Supported
+F: include/exec/ioport.h
+F: include/exec/memop.h
+F: include/exec/memory.h
+F: include/exec/ram_addr.h
+F: include/exec/ramblock.h
+F: include/sysemu/memory_mapping.h
+F: system/dma-helpers.c
+F: system/ioport.c
+F: system/memory.c
+F: system/memory_mapping.c
+F: system/physmem.c
+F: include/exec/memory-internal.h
+F: scripts/coccinelle/memory-region-housekeeping.cocci
+
+Memory devices
+M: David Hildenbrand <david@redhat.com>
+M: Igor Mammedov <imammedo@redhat.com>
+R: Xiao Guangrong <xiaoguangrong.eric@gmail.com>
+S: Supported
+F: hw/mem/memory-device.c
+F: hw/mem/nvdimm.c
+F: hw/mem/pc-dimm.c
+F: include/hw/mem/memory-device.h
+F: include/hw/mem/nvdimm.h
+F: include/hw/mem/pc-dimm.h
+F: stubs/memory_device.c
+F: docs/nvdimm.txt
+
+SPICE
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: include/ui/qemu-spice.h
+F: include/ui/spice-display.h
+F: ui/spice-*.c
+F: audio/spiceaudio.c
+F: hw/display/qxl*
+F: qapi/ui.json
+F: docs/spice-port-fqdn.txt
+
+Graphics
+M: Gerd Hoffmann <kraxel@redhat.com>
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
+S: Odd Fixes
+F: ui/
+F: include/ui/
+F: qapi/ui.json
+F: util/drm.c
+F: docs/devel/ui.rst
+
+Cocoa graphics
+M: Peter Maydell <peter.maydell@linaro.org>
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Akihiko Odaki <akihiko.odaki@daynix.com>
+S: Odd Fixes
+F: ui/cocoa.m
+
+Main loop
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: include/qemu/main-loop.h
+F: include/sysemu/runstate.h
+F: include/sysemu/runstate-action.h
+F: util/main-loop.c
+F: util/qemu-timer*.c
+F: system/vl.c
+F: system/main.c
+F: system/cpus.c
+F: system/cpu-throttle.c
+F: system/cpu-timers.c
+F: system/runstate*
+F: qapi/run-state.json
+
+Read, Copy, Update (RCU)
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: docs/devel/lockcnt.txt
+F: docs/devel/rcu.txt
+F: include/qemu/rcu*.h
+F: tests/unit/rcutorture.c
+F: tests/unit/test-rcu-*.c
+F: util/rcu.c
+
+Human Monitor (HMP)
+M: Dr. David Alan Gilbert <dave@treblig.org>
+S: Maintained
+F: monitor/monitor-internal.h
+F: monitor/misc.c
+F: monitor/monitor.c
+F: monitor/hmp*
+F: hmp.h
+F: hmp-commands*.hx
+F: include/monitor/hmp-target.h
+F: tests/qtest/test-hmp.c
+F: include/qemu/qemu-print.h
+F: util/qemu-print.c
+
+Network device backends
+M: Jason Wang <jasowang@redhat.com>
+S: Maintained
+F: net/
+F: include/net/
+F: qemu-bridge-helper.c
+T: git https://github.com/jasowang/qemu.git net
+F: qapi/net.json
+
+Netmap network backend
+M: Luigi Rizzo <rizzo@iet.unipi.it>
+M: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
+M: Vincenzo Maffione <v.maffione@gmail.com>
+W: http://info.iet.unipi.it/~luigi/netmap/
+S: Maintained
+F: net/netmap.c
+
+AF_XDP network backend
+R: Ilya Maximets <i.maximets@ovn.org>
+F: net/af-xdp.c
+
+Host Memory Backends
+M: David Hildenbrand <david@redhat.com>
+M: Igor Mammedov <imammedo@redhat.com>
+S: Maintained
+F: backends/hostmem*.c
+F: include/sysemu/hostmem.h
+F: docs/system/vm-templating.rst
+T: git https://gitlab.com/ehabkost/qemu.git machine-next
+
+Cryptodev Backends
+M: Gonglei <arei.gonglei@huawei.com>
+M: zhenwei pi <pizhenwei@bytedance.com>
+S: Maintained
+F: include/sysemu/cryptodev*.h
+F: backends/cryptodev*.c
+F: qapi/cryptodev.json
+
+Python library
+M: John Snow <jsnow@redhat.com>
+M: Cleber Rosa <crosa@redhat.com>
+R: Beraldo Leal <bleal@redhat.com>
+S: Maintained
+F: python/
+T: git https://gitlab.com/jsnow/qemu.git python
+
+Python scripts
+M: John Snow <jsnow@redhat.com>
+M: Cleber Rosa <crosa@redhat.com>
+S: Odd Fixes
+F: scripts/*.py
+F: tests/*.py
+
+Benchmark util
+M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+S: Maintained
+F: scripts/simplebench/
+T: git https://gitlab.com/vsementsov/qemu.git simplebench
+
+Transactions helper
+M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+S: Maintained
+F: include/qemu/transactions.h
+F: util/transactions.c
+T: git https://gitlab.com/vsementsov/qemu.git block
+
+QAPI
+M: Markus Armbruster <armbru@redhat.com>
+M: Michael Roth <michael.roth@amd.com>
+S: Supported
+F: qapi/
+X: qapi/*.json
+F: include/qapi/
+X: include/qapi/qmp/
+F: include/qapi/qmp/dispatch.h
+F: tests/qapi-schema/
+F: tests/unit/test-*-visitor.c
+F: tests/unit/test-qapi-*.c
+F: tests/unit/test-qmp-*.c
+F: tests/unit/test-visitor-serialization.c
+F: scripts/qapi-gen.py
+F: scripts/qapi/*
+F: docs/sphinx/qapidoc.py
+F: docs/devel/qapi*
+T: git https://repo.or.cz/qemu/armbru.git qapi-next
+
+QAPI Schema
+M: Eric Blake <eblake@redhat.com>
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: qapi/*.json
+T: git https://repo.or.cz/qemu/armbru.git qapi-next
+
+QObject
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: qobject/
+F: include/qapi/qmp/
+X: include/qapi/qmp/dispatch.h
+F: scripts/coccinelle/qobject.cocci
+F: tests/unit/check-qdict.c
+F: tests/unit/check-qjson.c
+F: tests/unit/check-qlist.c
+F: tests/unit/check-qlit.c
+F: tests/unit/check-qnull.c
+F: tests/unit/check-qnum.c
+F: tests/unit/check-qobject.c
+F: tests/unit/check-qstring.c
+F: tests/data/qobject/qdict.txt
+T: git https://repo.or.cz/qemu/armbru.git qapi-next
+
+QEMU Guest Agent
+M: Michael Roth <michael.roth@amd.com>
+M: Konstantin Kostiuk <kkostiuk@redhat.com>
+S: Maintained
+F: qga/
+F: contrib/systemd/qemu-guest-agent.service
+F: docs/interop/qemu-ga.rst
+F: docs/interop/qemu-ga-ref.rst
+F: scripts/qemu-guest-agent/
+F: tests/*/test-qga*
+T: git https://github.com/mdroth/qemu.git qga
+
+QEMU Guest Agent Win32
+M: Konstantin Kostiuk <kkostiuk@redhat.com>
+S: Maintained
+F: qga/*win32*
+F: qga/vss-win32/
+F: qga/installer/
+T: git https://github.com/kostyanf14/qemu.git qga-win32
+
+QOM
+M: Paolo Bonzini <pbonzini@redhat.com>
+R: Daniel P. Berrange <berrange@redhat.com>
+R: Eduardo Habkost <eduardo@habkost.net>
+S: Supported
+F: docs/devel/qom.rst
+F: docs/qdev-device-use.txt
+F: hw/core/qdev*
+F: hw/core/bus.c
+F: hw/core/sysbus.c
+F: include/hw/qdev*
+F: include/monitor/qdev.h
+F: include/qom/
+F: qapi/qom.json
+F: qapi/qdev.json
+F: scripts/coccinelle/qom-parent-type.cocci
+F: scripts/qom-cast-macro-clean-cocci-gen.py
+F: system/qdev-monitor.c
+F: stubs/qdev.c
+F: qom/
+F: tests/unit/check-qom-interface.c
+F: tests/unit/check-qom-proplist.c
+F: tests/unit/test-qdev-global-props.c
+
+QOM boilerplate conversion script
+M: Eduardo Habkost <eduardo@habkost.net>
+S: Maintained
+F: scripts/codeconverter/
+
+QMP
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: monitor/monitor-internal.h
+F: monitor/qmp*
+F: monitor/misc.c
+F: monitor/monitor.c
+F: qapi/control.json
+F: qapi/error.json
+F: qapi/introspect.json
+F: docs/devel/*qmp-*
+F: docs/interop/*qmp-*
+F: scripts/qmp/
+F: tests/qtest/qmp-test.c
+F: tests/qtest/qmp-cmd-test.c
+T: git https://repo.or.cz/qemu/armbru.git qapi-next
+
+qtest
+M: Thomas Huth <thuth@redhat.com>
+M: Laurent Vivier <lvivier@redhat.com>
+R: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: system/qtest.c
+F: include/sysemu/qtest.h
+F: accel/qtest/
+F: tests/qtest/
+F: docs/devel/qgraph.rst
+F: docs/devel/qtest.rst
+X: tests/qtest/bios-tables-test*
+
+Device Fuzzing
+M: Alexander Bulekov <alxndr@bu.edu>
+R: Paolo Bonzini <pbonzini@redhat.com>
+R: Bandan Das <bsd@redhat.com>
+R: Stefan Hajnoczi <stefanha@redhat.com>
+R: Thomas Huth <thuth@redhat.com>
+R: Darren Kenny <darren.kenny@oracle.com> 
+R: Qiuhao Li <Qiuhao.Li@outlook.com>
+S: Maintained
+F: tests/qtest/fuzz/
+F: tests/qtest/fuzz-*test.c
+F: tests/docker/test-fuzz
+F: scripts/oss-fuzz/
+F: hw/mem/sparse-mem.c
+F: docs/devel/fuzzing.rst
+
+Register API
+M: Alistair Francis <alistair@alistair23.me>
+S: Maintained
+F: hw/core/register.c
+F: include/hw/register.h
+F: include/hw/registerfields.h
+
+SLIRP
+M: Samuel Thibault <samuel.thibault@ens-lyon.org>
+S: Maintained
+F: net/slirp.c
+F: include/net/slirp.h
+T: git https://people.debian.org/~sthibault/qemu.git slirp
+
+Stats
+S: Orphan
+F: include/sysemu/stats.h
+F: stats/
+
+Streams
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+S: Maintained
+F: hw/core/stream.c
+F: include/hw/stream.h
+
+Stubs
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: stubs/
+
+Tracing
+M: Stefan Hajnoczi <stefanha@redhat.com>
+R: Mads Ynddal <mads@ynddal.dk>
+S: Maintained
+F: trace/
+F: trace-events
+F: docs/qemu-option-trace.rst.inc
+F: qapi/trace.json
+F: scripts/tracetool.py
+F: scripts/tracetool/
+F: scripts/qemu-trace-stap*
+F: docs/tools/qemu-trace-stap.rst
+F: docs/devel/tracing.rst
+T: git https://github.com/stefanha/qemu.git tracing
+
+Simpletrace
+M: Mads Ynddal <mads@ynddal.dk>
+S: Maintained
+F: scripts/simpletrace.py
+
+TPM
+M: Stefan Berger <stefanb@linux.ibm.com>
+S: Maintained
+F: system/tpm*
+F: hw/tpm/*
+F: include/hw/acpi/tpm.h
+F: include/sysemu/tpm*
+F: qapi/tpm.json
+F: backends/tpm/
+F: tests/qtest/*tpm*
+F: docs/specs/tpm.rst
+T: git https://github.com/stefanberger/qemu-tpm.git tpm-next
+
+Checkpatch
+S: Odd Fixes
+F: scripts/checkpatch.pl
+
+Migration
+M: Juan Quintela <quintela@redhat.com>
+M: Peter Xu <peterx@redhat.com>
+M: Fabiano Rosas <farosas@suse.de>
+R: Leonardo Bras <leobras@redhat.com>
+S: Maintained
+F: hw/core/vmstate-if.c
+F: include/hw/vmstate-if.h
+F: include/migration/
+F: include/qemu/userfaultfd.h
+F: migration/
+F: scripts/vmstate-static-checker.py
+F: tests/vmstate-static-checker-data/
+F: tests/qtest/migration-test.c
+F: docs/devel/migration.rst
+F: qapi/migration.json
+F: tests/migration/
+F: util/userfaultfd.c
+X: migration/rdma*
+
+RDMA Migration
+M: Juan Quintela <quintela@redhat.com>
+R: Li Zhijian <lizhijian@fujitsu.com>
+R: Peter Xu <peterx@redhat.com>
+R: Leonardo Bras <leobras@redhat.com>
+S: Odd Fixes
+F: migration/rdma*
+
+Migration dirty limit and dirty page rate
+M: Hyman Huang <yong.huang@smartx.com>
+S: Maintained
+F: system/dirtylimit.c
+F: include/sysemu/dirtylimit.h
+F: migration/dirtyrate.c
+F: migration/dirtyrate.h
+F: include/sysemu/dirtyrate.h
+
+D-Bus
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
+S: Maintained
+F: backends/dbus-vmstate.c
+F: ui/dbus*
+F: audio/dbus*
+F: util/dbus.c
+F: include/ui/dbus*
+F: include/qemu/dbus.h
+F: docs/interop/dbus*
+F: docs/sphinx/dbus*
+F: docs/sphinx/fakedbusdoc.py
+F: tests/qtest/dbus*
+F: scripts/xml-preprocess*
+
+Seccomp
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Odd Fixes
+F: system/qemu-seccomp.c
+F: include/sysemu/seccomp.h
+F: tests/unit/test-seccomp.c
+
+Cryptography
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Maintained
+F: crypto/
+F: include/crypto/
+F: host/include/*/host/crypto/
+F: qapi/crypto.json
+F: tests/unit/test-crypto-*
+F: tests/bench/benchmark-crypto-*
+F: tests/unit/crypto-tls-*
+F: tests/unit/pkix_asn1_tab.c
+F: qemu.sasl
+
+Coroutines
+M: Stefan Hajnoczi <stefanha@redhat.com>
+M: Kevin Wolf <kwolf@redhat.com>
+S: Maintained
+F: util/*coroutine*
+F: include/qemu/coroutine*
+F: tests/unit/test-coroutine.c
+
+Buffers
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Odd Fixes
+F: util/buffer.c
+F: include/qemu/buffer.h
+
+I/O Channels
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Maintained
+F: io/
+F: include/io/
+F: tests/unit/test-io-*
+
+User authorization
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Maintained
+F: authz/
+F: qapi/authz.json
+F: include/authz/
+F: tests/unit/test-authz-*
+
+Sockets
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Maintained
+F: include/qemu/sockets.h
+F: util/qemu-sockets.c
+F: qapi/sockets.json
+
+File monitor
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Odd Fixes
+F: util/filemonitor*.c
+F: include/qemu/filemonitor.h
+F: tests/unit/test-util-filemonitor.c
+
+Throttling infrastructure
+M: Alberto Garcia <berto@igalia.com>
+S: Supported
+F: block/throttle-groups.c
+F: include/block/throttle-groups.h
+F: include/qemu/throttle*.h
+F: util/throttle.c
+F: docs/throttle.txt
+F: tests/unit/test-throttle.c
+L: qemu-block@nongnu.org
+
+UUID
+M: Fam Zheng <fam@euphon.net>
+S: Supported
+F: util/uuid.c
+F: include/qemu/uuid.h
+F: tests/unit/test-uuid.c
+
+Yank feature
+M: Lukas Straub <lukasstraub2@web.de>
+S: Odd fixes
+F: util/yank.c
+F: migration/yank_functions*
+F: tests/unit/test-yank.c
+F: include/qemu/yank.h
+F: qapi/yank.json
+
+COLO Framework
+M: Hailiang Zhang <zhanghailiang@xfusion.com>
+S: Maintained
+F: migration/colo*
+F: include/migration/colo.h
+F: include/migration/failover.h
+F: docs/COLO-FT.txt
+
+COLO Proxy
+M: Zhang Chen <chen.zhang@intel.com>
+M: Li Zhijian <lizhijian@fujitsu.com>
+S: Supported
+F: docs/colo-proxy.txt
+F: net/colo*
+F: net/filter-rewriter.c
+F: net/filter-mirror.c
+F: tests/qtest/test-filter*
+
+Record/replay
+M: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
+R: Paolo Bonzini <pbonzini@redhat.com>
+W: https://wiki.qemu.org/Features/record-replay
+S: Supported
+F: replay/*
+F: block/blkreplay.c
+F: net/filter-replay.c
+F: include/exec/replay-core.h
+F: include/sysemu/replay.h
+F: docs/devel/replay.rst
+F: docs/system/replay.rst
+F: stubs/replay.c
+F: tests/avocado/replay_kernel.py
+F: tests/avocado/replay_linux.py
+F: tests/avocado/reverse_debugging.py
+F: qapi/replay.json
+
+IOVA Tree
+M: Peter Xu <peterx@redhat.com>
+S: Maintained
+F: include/qemu/iova-tree.h
+F: util/iova-tree.c
+
+elf2dmp
+M: Viktor Prutyanov <viktor.prutyanov@phystech.edu>
+S: Maintained
+F: contrib/elf2dmp/
+
+Overall sensors
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Odd Fixes
+F: hw/sensor
+F: include/hw/sensor
+
+I2C and SMBus
+M: Corey Minyard <cminyard@mvista.com>
+S: Maintained
+F: hw/i2c/core.c
+F: hw/i2c/smbus_slave.c
+F: hw/i2c/smbus_master.c
+F: hw/i2c/smbus_eeprom.c
+F: include/hw/i2c/i2c.h
+F: include/hw/i2c/smbus_master.h
+F: include/hw/i2c/smbus_slave.h
+F: include/hw/i2c/smbus_eeprom.h
+
+PMBus
+M: Titus Rwantare <titusr@google.com>
+S: Maintained
+F: hw/i2c/pmbus_device.c
+F: hw/sensor/adm1272.c
+F: hw/sensor/isl_pmbus_vr.c
+F: hw/sensor/max34451.c
+F: include/hw/i2c/pmbus_device.h
+F: include/hw/sensor/isl_pmbus_vr.h
+F: tests/qtest/adm1272-test.c
+F: tests/qtest/max34451-test.c
+F: tests/qtest/isl_pmbus_vr-test.c
+
+Firmware schema specifications
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Daniel P. Berrange <berrange@redhat.com>
+R: Kashyap Chamarthy <kchamart@redhat.com>
+S: Maintained
+F: docs/interop/firmware.json
+
+EDK2 Firmware
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Supported
+F: hw/i386/*ovmf*
+F: pc-bios/descriptors/??-edk2-*.json
+F: pc-bios/edk2-*
+F: roms/Makefile.edk2
+F: roms/edk2
+F: roms/edk2-*
+F: tests/data/uefi-boot-images/
+F: tests/uefi-test-tools/
+
+VT-d Emulation
+M: Michael S. Tsirkin <mst@redhat.com>
+M: Peter Xu <peterx@redhat.com>
+R: Jason Wang <jasowang@redhat.com>
+S: Supported
+F: hw/i386/intel_iommu.c
+F: hw/i386/intel_iommu_internal.h
+F: include/hw/i386/intel_iommu.h
+
+AMD-Vi Emulation
+S: Orphan
+F: hw/i386/amd_iommu.?
+
+OpenSBI Firmware
+M: Bin Meng <bmeng.cn@gmail.com>
+S: Supported
+F: pc-bios/opensbi-*
+F: .gitlab-ci.d/opensbi.yml
+F: .gitlab-ci.d/opensbi/
+
+Clock framework
+M: Luc Michel <luc@lmichel.fr>
+R: Damien Hedde <damien.hedde@dahe.fr>
+S: Maintained
+F: include/hw/clock.h
+F: include/hw/qdev-clock.h
+F: hw/core/clock.c
+F: hw/core/clock-vmstate.c
+F: hw/core/qdev-clock.c
+F: docs/devel/clocks.rst
+
+Usermode Emulation
+------------------
+Overall usermode emulation
+M: Riku Voipio <riku.voipio@iki.fi>
+S: Maintained
+F: accel/tcg/user-exec*.c
+F: include/user/
+F: common-user/
+
+BSD user
+M: Warner Losh <imp@bsdimp.com>
+R: Kyle Evans <kevans@freebsd.org>
+S: Maintained
+F: bsd-user/
+F: configs/targets/*-bsd-user.mak
+F: tests/vm/*bsd
+T: git https://github.com/qemu-bsd-user/qemu-bsd-user bsd-user-rebase-3.1
+
+Linux user
+M: Laurent Vivier <laurent@vivier.eu>
+S: Maintained
+F: linux-user/
+F: configs/targets/*linux-user.mak
+F: scripts/qemu-binfmt-conf.sh
+F: scripts/update-syscalltbl.sh
+F: scripts/update-mips-syscall-args.sh
+F: scripts/gensyscalls.sh
+
+Tiny Code Generator (TCG)
+-------------------------
+Common TCG code
+M: Richard Henderson <richard.henderson@linaro.org>
+S: Maintained
+F: tcg/
+F: include/tcg/
+
+TCG Plugins
+M: Alex Bennée <alex.bennee@linaro.org>
+R: Alexandre Iooss <erdnaxe@crans.org>
+R: Mahmoud Mandour <ma.mandourr@gmail.com>
+S: Maintained
+F: docs/devel/tcg-plugins.rst
+F: plugins/
+F: tests/plugin/
+F: tests/avocado/tcg_plugins.py
+F: contrib/plugins/
+
+AArch64 TCG target
+M: Richard Henderson <richard.henderson@linaro.org>
+S: Maintained
+L: qemu-arm@nongnu.org
+F: tcg/aarch64/
+
+ARM TCG target
+M: Richard Henderson <richard.henderson@linaro.org>
+S: Maintained
+L: qemu-arm@nongnu.org
+F: tcg/arm/
+
+i386 TCG target
+M: Richard Henderson <richard.henderson@linaro.org>
+S: Maintained
+F: tcg/i386/
+
+LoongArch64 TCG target
+M: WANG Xuerui <git@xen0n.name>
+S: Maintained
+F: tcg/loongarch64/
+
+MIPS TCG target
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Aurelien Jarno <aurelien@aurel32.net>
+R: Huacai Chen <chenhuacai@kernel.org>
+R: Jiaxun Yang <jiaxun.yang@flygoat.com>
+R: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>
+S: Odd Fixes
+F: tcg/mips/
+
+PPC TCG target
+M: Richard Henderson <richard.henderson@linaro.org>
+S: Odd Fixes
+F: tcg/ppc/
+
+RISC-V TCG target
+M: Palmer Dabbelt <palmer@dabbelt.com>
+M: Alistair Francis <Alistair.Francis@wdc.com>
+L: qemu-riscv@nongnu.org
+S: Maintained
+F: tcg/riscv/
+F: disas/riscv.[ch]
+
+S390 TCG target
+M: Richard Henderson <richard.henderson@linaro.org>
+S: Maintained
+F: tcg/s390/
+L: qemu-s390x@nongnu.org
+
+SPARC TCG target
+S: Odd Fixes
+F: tcg/sparc64/
+F: disas/sparc.c
+
+TCI TCG target
+M: Stefan Weil <sw@weilnetz.de>
+S: Maintained
+F: tcg/tci/
+F: tcg/tci.c
+F: disas/tci.c
+
+Block drivers
+-------------
+VMDK
+M: Fam Zheng <fam@euphon.net>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/vmdk.c
+
+RBD
+M: Ilya Dryomov <idryomov@gmail.com>
+R: Peter Lieven <pl@kamp.de>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/rbd.c
+
+VHDX
+M: Jeff Cody <codyprime@gmail.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/vhdx*
+
+VDI
+M: Stefan Weil <sw@weilnetz.de>
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/vdi.c
+
+blkio
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/blkio.c
+
+iSCSI
+M: Ronnie Sahlberg <ronniesahlberg@gmail.com>
+M: Paolo Bonzini <pbonzini@redhat.com>
+M: Peter Lieven <pl@kamp.de>
+L: qemu-block@nongnu.org
+S: Odd Fixes
+F: block/iscsi.c
+F: block/iscsi-opts.c
+
+Network Block Device (NBD)
+M: Eric Blake <eblake@redhat.com>
+M: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/nbd*
+F: nbd/
+F: include/block/nbd*
+F: qemu-nbd.*
+F: blockdev-nbd.c
+F: docs/interop/nbd.txt
+F: docs/tools/qemu-nbd.rst
+F: tests/qemu-iotests/tests/*nbd*
+T: git https://repo.or.cz/qemu/ericb.git nbd
+T: git https://gitlab.com/vsementsov/qemu.git block
+
+NFS
+M: Peter Lieven <pl@kamp.de>
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/nfs.c
+
+SSH
+M: Richard W.M. Jones <rjones@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/ssh.c
+
+CURL
+L: qemu-block@nongnu.org
+S: Odd Fixes
+F: block/curl.c
+
+GLUSTER
+L: qemu-block@nongnu.org
+L: integration@gluster.org
+S: Odd Fixes
+F: block/gluster.c
+
+Null Block Driver
+M: Fam Zheng <fam@euphon.net>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/null.c
+
+NVMe Block Driver
+M: Stefan Hajnoczi <stefanha@redhat.com>
+R: Fam Zheng <fam@euphon.net>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/nvme*
+F: include/block/nvme.h
+T: git https://github.com/stefanha/qemu.git block
+
+Bootdevice
+M: Gonglei <arei.gonglei@huawei.com>
+S: Maintained
+F: system/bootdevice.c
+
+Quorum
+M: Alberto Garcia <berto@igalia.com>
+S: Supported
+F: block/quorum.c
+L: qemu-block@nongnu.org
+
+blklogwrites
+M: Ari Sundholm <ari@tuxera.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/blklogwrites.c
+
+blkverify
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/blkverify.c
+
+bochs
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/bochs.c
+
+cloop
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/cloop.c
+
+dmg
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/dmg.c
+
+parallels
+M: Stefan Hajnoczi <stefanha@redhat.com>
+M: Denis V. Lunev <den@openvz.org>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/parallels.c
+F: block/parallels-ext.c
+F: docs/interop/parallels.txt
+T: git https://src.openvz.org/scm/~den/qemu.git parallels
+
+qed
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/qed.c
+
+raw
+M: Kevin Wolf <kwolf@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/linux-aio.c
+F: include/block/raw-aio.h
+F: block/raw-format.c
+F: block/file-posix.c
+F: block/file-win32.c
+F: block/win32-aio.c
+
+Linux io_uring
+M: Aarushi Mehta <mehta.aaru20@gmail.com>
+M: Julia Suvorova <jusual@redhat.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
+R: Stefano Garzarella <sgarzare@redhat.com>
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/io_uring.c
+F: stubs/io_uring.c
+
+qcow2
+M: Kevin Wolf <kwolf@redhat.com>
+M: Hanna Reitz <hreitz@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/qcow2*
+F: docs/interop/qcow2.txt
+
+qcow
+M: Kevin Wolf <kwolf@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/qcow.c
+
+blkdebug
+M: Kevin Wolf <kwolf@redhat.com>
+M: Hanna Reitz <hreitz@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/blkdebug.c
+
+vpc
+M: Kevin Wolf <kwolf@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/vpc.c
+
+vvfat
+M: Kevin Wolf <kwolf@redhat.com>
+L: qemu-block@nongnu.org
+S: Odd Fixes
+F: block/vvfat.c
+
+Image format fuzzer
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: tests/image-fuzzer/
+
+Vhost-user block device backend server
+M: Coiby Xu <Coiby.Xu@gmail.com>
+S: Maintained
+F: block/export/vhost-user-blk-server.c
+F: block/export/vhost-user-blk-server.h
+F: block/export/virtio-blk-handler.c
+F: block/export/virtio-blk-handler.h
+F: include/qemu/vhost-user-server.h
+F: tests/qtest/libqos/vhost-user-blk.c
+F: tests/qtest/libqos/vhost-user-blk.h
+F: tests/qtest/vhost-user-blk-test.c
+F: util/vhost-user-server.c
+
+FUSE block device exports
+M: Hanna Reitz <hreitz@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/export/fuse.c
+
+VDUSE library and block device exports
+M: Xie Yongji <xieyongji@bytedance.com>
+S: Maintained
+F: subprojects/libvduse/
+F: block/export/vduse-blk.c
+F: block/export/vduse-blk.h
+
+Replication
+M: Wen Congyang <wencongyang2@huawei.com>
+M: Xie Changlong <xiechanglong.d@gmail.com>
+S: Supported
+F: replication*
+F: block/replication.c
+F: tests/unit/test-replication.c
+F: docs/block-replication.txt
+
+PVRDMA
+M: Yuval Shaia <yuval.shaia.ml@gmail.com>
+M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
+S: Odd Fixes
+F: hw/rdma/*
+F: hw/rdma/vmw/*
+F: docs/pvrdma.txt
+F: contrib/rdmacm-mux/*
+F: qapi/rdma.json
+
+Semihosting
+M: Alex Bennée <alex.bennee@linaro.org>
+S: Maintained
+F: semihosting/
+F: include/semihosting/
+F: tests/tcg/multiarch/arm-compat-semi/
+F: tests/tcg/aarch64/system/semiheap.c
+
+Multi-process QEMU
+M: Elena Ufimtseva <elena.ufimtseva@oracle.com>
+M: Jagannathan Raman <jag.raman@oracle.com>
+S: Maintained
+F: docs/devel/multi-process.rst
+F: docs/system/multi-process.rst
+F: hw/pci-host/remote.c
+F: include/hw/pci-host/remote.h
+F: hw/remote/machine.c
+F: include/hw/remote/machine.h
+F: hw/remote/mpqemu-link.c
+F: include/hw/remote/mpqemu-link.h
+F: hw/remote/message.c
+F: hw/remote/remote-obj.c
+F: include/hw/remote/memory.h
+F: hw/remote/memory.c
+F: hw/remote/proxy.c
+F: include/hw/remote/proxy.h
+F: hw/remote/proxy-memory-listener.c
+F: include/hw/remote/proxy-memory-listener.h
+F: hw/remote/iohub.c
+F: include/hw/remote/iohub.h
+F: subprojects/libvfio-user
+F: hw/remote/vfio-user-obj.c
+F: include/hw/remote/vfio-user-obj.h
+F: hw/remote/iommu.c
+F: include/hw/remote/iommu.h
+
+EBPF:
+M: Jason Wang <jasowang@redhat.com>
+R: Andrew Melnychenko <andrew@daynix.com>
+R: Yuri Benditovich <yuri.benditovich@daynix.com>
+S: Maintained
+F: docs/devel/ebpf_rss.rst
+F: ebpf/*
+F: tools/ebpf/*
+
+Build and test automation
+-------------------------
+Build and test automation, general continuous integration
+M: Alex Bennée <alex.bennee@linaro.org>
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+M: Thomas Huth <thuth@redhat.com>
+R: Wainer dos Santos Moschetta <wainersm@redhat.com>
+R: Beraldo Leal <bleal@redhat.com>
+S: Maintained
+F: .github/workflows/lockdown.yml
+F: .gitlab-ci.yml
+F: .gitlab-ci.d/
+F: .travis.yml
+F: docs/devel/ci*
+F: scripts/ci/
+F: tests/docker/
+F: tests/vm/
+F: tests/lcitool/
+F: tests/avocado/tuxrun_baselines.py
+F: scripts/archive-source.sh
+F: docs/devel/testing.rst
+W: https://gitlab.com/qemu-project/qemu/pipelines
+W: https://travis-ci.org/qemu/qemu
+
+FreeBSD Hosted Continuous Integration
+M: Ed Maste <emaste@freebsd.org>
+M: Li-Wen Hsu <lwhsu@freebsd.org>
+S: Maintained
+F: .gitlab-ci.d/cirrus/freebsd*
+F: tests/vm/freebsd
+W: https://cirrus-ci.com/github/qemu/qemu
+
+Windows Hosted Continuous Integration
+M: Yonggang Luo <luoyonggang@gmail.com>
+S: Maintained
+F: .gitlab-ci.d/windows.yml
+
+Guest Test Compilation Support
+M: Alex Bennée <alex.bennee@linaro.org>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Maintained
+F: tests/tcg/Makefile.target
+
+Integration Testing with the Avocado framework
+W: https://trello.com/b/6Qi1pxVn/avocado-qemu
+R: Cleber Rosa <crosa@redhat.com>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+R: Wainer dos Santos Moschetta <wainersm@redhat.com>
+R: Beraldo Leal <bleal@redhat.com>
+S: Odd Fixes
+F: tests/avocado/
+
+GitLab custom runner (Works On Arm Sponsored)
+M: Alex Bennée <alex.bennee@linaro.org>
+M: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Maintained
+F: .gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml
+F: .gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml
+
+Documentation
+-------------
+Build system architecture
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Odd Fixes
+F: docs/devel/build-system.rst
+
+GIT Data Mining Config
+M: Alex Bennée <alex.bennee@linaro.org>
+S: Odd Fixes
+F: gitdm.config
+F: contrib/gitdm/*
+
+Incompatible changes
+R: devel@lists.libvirt.org
+F: docs/about/deprecated.rst
+
+Build System
+------------
+Meson
+M: Paolo Bonzini <pbonzini@redhat.com>
+R: Marc-André Lureau <marcandre.lureau@redhat.com>
+R: Daniel P. Berrange <berrange@redhat.com>
+R: Thomas Huth <thuth@redhat.com>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
+S: Maintained
+F: meson.build
+F: meson_options.txt
+F: scripts/meson-buildoptions.*
+F: scripts/check_sparse.py
+F: scripts/symlink-install-tree.py
+
+Top Level Makefile and configure
+M: Paolo Bonzini <pbonzini@redhat.com>
+R: Alex Bennée <alex.bennee@linaro.org>
+R: Thomas Huth <thuth@redhat.com>
+S: Maintained
+F: Makefile
+F: configure
+F: scripts/mtest2make.py
+F: tests/Makefile.include
+
+Kconfig
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: scripts/minikconf.py
+F: docs/devel/kconfig.rst
+F: Kconfig*
+F: */Kconfig*
+F: hw/*/Kconfig*
+F: target/*/Kconfig*
+
+GIT submodules
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Odd Fixes
+F: scripts/git-submodule.sh
+
+UI translations
+S: Orphan
+F: po/*.po
+
+Sphinx documentation configuration and build machinery
+M: Peter Maydell <peter.maydell@linaro.org>
+S: Maintained
+F: docs/conf.py
+F: docs/*/conf.py
+F: docs/sphinx/
+F: docs/_templates/
+
+Miscellaneous
+-------------
+Performance Tools and Tests
+M: Ahmed Karaman <ahmedkhaledkaraman@gmail.com>
+S: Maintained
+F: scripts/performance/
+
+Code Coverage Tools
+M: Alex Bennée <alex.bennee@linaro.org>
+S: Odd Fixes
+F: scripts/coverage/
index 2f69d31f2f25a3eac528be6c410bea5a92bbea30..a1b8c634068b0fd445432317b9638770d384de1c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@ SRC_PATH=.
 # we have explicit rules for everything
 MAKEFLAGS += -rR
 
-SHELL = /usr/bin/env bash -o pipefail
+SHELL = bash -o pipefail
 
 # Usage: $(call quiet-command,command and args,"NAME","args to print")
 # This will run "command and args", and either:
@@ -26,9 +26,9 @@ quiet-command-run = $(if $(V),,$(if $2,printf "  %-7s %s\n" $2 $3 && ))$1
 quiet-@ = $(if $(V),,@)
 quiet-command = $(quiet-@)$(call quiet-command-run,$1,$2,$3)
 
-UNCHECKED_GOALS := %clean TAGS cscope ctags dist \
+UNCHECKED_GOALS := TAGS gtags cscope ctags dist \
     help check-help print-% \
-    docker docker-% vm-help vm-test vm-build-%
+    docker docker-% lcitool-refresh vm-help vm-test vm-build-%
 
 all:
 .PHONY: all clean distclean recurse-all dist msi FORCE
@@ -42,35 +42,8 @@ configure: ;
 ifneq ($(wildcard config-host.mak),)
 include config-host.mak
 
-git-submodule-update:
-.git-submodule-status: git-submodule-update config-host.mak
-Makefile: .git-submodule-status
-
-.PHONY: git-submodule-update
-
-git_module_status := $(shell \
-  cd '$(SRC_PATH)' && \
-  GIT="$(GIT)" ./scripts/git-submodule.sh status $(GIT_SUBMODULES); \
-  echo $$?; \
-)
-
-ifeq (1,$(git_module_status))
-ifeq (no,$(GIT_UPDATE))
-git-submodule-update:
-       $(call quiet-command, \
-            echo && \
-            echo "GIT submodule checkout is out of date. Please run" && \
-            echo "  scripts/git-submodule.sh update $(GIT_SUBMODULES)" && \
-            echo "from the source directory checkout $(SRC_PATH)" && \
-            echo && \
-            exit 1)
-else
-git-submodule-update:
-       $(call quiet-command, \
-          (cd $(SRC_PATH) && GIT="$(GIT)" ./scripts/git-submodule.sh update $(GIT_SUBMODULES)), \
-          "GIT","$(GIT_SUBMODULES)")
-endif
-endif
+include Makefile.prereqs
+Makefile.prereqs: config-host.mak
 
 # 0. ensure the build tree is okay
 
@@ -105,21 +78,22 @@ x := $(shell rm -rf meson-private meson-info meson-logs)
 endif
 
 # 1. ensure config-host.mak is up-to-date
-config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/VERSION
+config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/scripts/meson-buildoptions.sh $(SRC_PATH)/VERSION
        @echo config-host.mak is out-of-date, running configure
        @if test -f meson-private/coredata.dat; then \
          ./config.status --skip-meson; \
        else \
-         ./config.status && touch build.ninja.stamp; \
+         ./config.status; \
        fi
 
 # 2. meson.stamp exists if meson has run at least once (so ninja reconfigure
 # works), but otherwise never needs to be updated
+
 meson-private/coredata.dat: meson.stamp
 meson.stamp: config-host.mak
        @touch meson.stamp
 
-# 3. ensure generated build files are up-to-date
+# 3. ensure meson-generated build files are up-to-date
 
 ifneq ($(NINJA),)
 Makefile.ninja: build.ninja
@@ -130,33 +104,50 @@ Makefile.ninja: build.ninja
          $(NINJA) -t query build.ninja | sed -n '1,/^  input:/d; /^  outputs:/q; s/$$/ \\/p'; \
        } > $@.tmp && mv $@.tmp $@
 -include Makefile.ninja
+endif
 
-# A separate rule is needed for Makefile dependencies to avoid -n
+ifneq ($(MESON),)
+# The path to meson always points to pyvenv/bin/meson, but the absolute
+# paths could change.  In that case, force a regeneration of build.ninja.
+# Note that this invocation of $(NINJA), just like when Make rebuilds
+# Makefiles, does not include -n.
 build.ninja: build.ninja.stamp
+$(build-files):
 build.ninja.stamp: meson.stamp $(build-files)
-       $(NINJA) $(if $V,-v,) build.ninja && touch $@
-endif
+       @if test "$$(cat build.ninja.stamp)" = "$(MESON)" && test -n "$(NINJA)"; then \
+         $(NINJA) build.ninja; \
+       else \
+         echo "$(MESON) setup --reconfigure $(SRC_PATH)"; \
+         $(MESON) setup --reconfigure $(SRC_PATH); \
+       fi && echo "$(MESON)" > $@
 
-ifneq ($(MESON),)
 Makefile.mtest: build.ninja scripts/mtest2make.py
        $(MESON) introspect --targets --tests --benchmarks | $(PYTHON) scripts/mtest2make.py > $@
 -include Makefile.mtest
+
+.PHONY: update-buildoptions
+all update-buildoptions: $(SRC_PATH)/scripts/meson-buildoptions.sh
+$(SRC_PATH)/scripts/meson-buildoptions.sh: $(SRC_PATH)/meson_options.txt
+       $(MESON) introspect --buildoptions $(SRC_PATH)/meson.build | $(PYTHON) \
+         scripts/meson-buildoptions.py > $@.tmp && mv $@.tmp $@
 endif
 
 # 4. Rules to bridge to other makefiles
 
 ifneq ($(NINJA),)
-MAKE.n = $(findstring n,$(firstword $(MAKEFLAGS)))
-MAKE.k = $(findstring k,$(firstword $(MAKEFLAGS)))
-MAKE.q = $(findstring q,$(firstword $(MAKEFLAGS)))
+# Filter out long options to avoid flags like --no-print-directory which
+# may result in false positive match for MAKE.n
+MAKE.n = $(findstring n,$(firstword $(filter-out --%,$(MAKEFLAGS))))
+MAKE.k = $(findstring k,$(firstword $(filter-out --%,$(MAKEFLAGS))))
+MAKE.q = $(findstring q,$(firstword $(filter-out --%,$(MAKEFLAGS))))
 MAKE.nq = $(if $(word 2, $(MAKE.n) $(MAKE.q)),nq)
 NINJAFLAGS = $(if $V,-v) $(if $(MAKE.n), -n) $(if $(MAKE.k), -k0) \
         $(filter-out -j, $(lastword -j1 $(filter -l% -j%, $(MAKEFLAGS)))) \
-
+        -d keepdepfile
 ninja-cmd-goals = $(or $(MAKECMDGOALS), all)
-ninja-cmd-goals += $(foreach t, $(.tests), $(.test.deps.$t))
+ninja-cmd-goals += $(foreach g, $(MAKECMDGOALS), $(.ninja-goals.$g))
 
-makefile-targets := build.ninja ctags TAGS cscope dist clean uninstall
+makefile-targets := build.ninja ctags TAGS cscope dist clean
 # "ninja -t targets" also lists all prerequisites.  If build system
 # files are marked as PHONY, however, Make will always try to execute
 # "ninja build.ninja".
@@ -168,27 +159,14 @@ $(ninja-targets): run-ninja
 # --output-sync line.
 run-ninja: config-host.mak
 ifneq ($(filter $(ninja-targets), $(ninja-cmd-goals)),)
-       +$(quiet-@)$(if $(MAKE.nq),@:, $(NINJA) \
-          $(NINJAFLAGS) $(sort $(filter $(ninja-targets), $(ninja-cmd-goals))) | cat)
+       +$(if $(MAKE.nq),@:,$(quiet-@)$(NINJA) $(NINJAFLAGS) \
+          $(sort $(filter $(ninja-targets), $(ninja-cmd-goals))) | cat)
 endif
 endif
 
-# Force configure to re-run if the API symbols are updated
-ifeq ($(CONFIG_PLUGIN),y)
-config-host.mak: $(SRC_PATH)/plugins/qemu-plugins.symbols
-
-.PHONY: plugins
-plugins:
-       $(call quiet-command,\
-               $(MAKE) $(SUBDIR_MAKEFLAGS) -C contrib/plugins V="$(V)", \
-               "BUILD", "example plugins")
-endif # $(CONFIG_PLUGIN)
-
 else # config-host.mak does not exist
-config-host.mak:
 ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
-       @echo "Please call configure before running make!"
-       @exit 1
+$(error Please call configure before running make)
 endif
 endif # config-host.mak does not exist
 
@@ -198,30 +176,32 @@ SUBDIR_MAKEFLAGS=$(if $(V),,--no-print-directory --quiet)
 
 all: recurse-all
 
-ROM_DIRS = $(addprefix pc-bios/, $(ROMS))
-ROM_DIRS_RULES=$(foreach t, all clean, $(addsuffix /$(t), $(ROM_DIRS)))
-# Only keep -O and -g cflags
-.PHONY: $(ROM_DIRS_RULES)
-$(ROM_DIRS_RULES):
+SUBDIR_RULES=$(foreach t, all clean distclean, $(addsuffix /$(t), $(SUBDIRS)))
+.PHONY: $(SUBDIR_RULES)
+$(SUBDIR_RULES):
        $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C $(dir $@) V="$(V)" TARGET_DIR="$(dir $@)" $(notdir $@),)
 
+ifneq ($(filter contrib/plugins, $(SUBDIRS)),)
+.PHONY: plugins
+plugins: contrib/plugins/all
+endif
+
 .PHONY: recurse-all recurse-clean
-recurse-all: $(addsuffix /all, $(ROM_DIRS))
-recurse-clean: $(addsuffix /clean, $(ROM_DIRS))
+recurse-all: $(addsuffix /all, $(SUBDIRS))
+recurse-clean: $(addsuffix /clean, $(SUBDIRS))
+recurse-distclean: $(addsuffix /distclean, $(SUBDIRS))
 
 ######################################################################
 
 clean: recurse-clean
        -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean || :
        -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) clean-ctlist || :
-# avoid old build problems by removing potentially incorrect old files
-       rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
-       find . \( -name '*.so' -o -name '*.dll' -o -name '*.[oda]' \) -type f \
+       find . \( -name '*.so' -o -name '*.dll' -o \
+                 -name '*.[oda]' -o -name '*.gcno' \) -type f \
                ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-aarch64.a \
                ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-arm.a \
                -exec rm {} +
-       rm -f TAGS cscope.* *.pod *~ */*~
-       rm -f fsdev/*.pod scsi/*.pod
+       rm -f TAGS cscope.* *~ */*~
 
 VERSION = $(shell cat $(SRC_PATH)/VERSION)
 
@@ -230,59 +210,97 @@ dist: qemu-$(VERSION).tar.bz2
 qemu-%.tar.bz2:
        $(SRC_PATH)/scripts/make-release "$(SRC_PATH)" "$(patsubst qemu-%.tar.bz2,%,$@)"
 
-distclean: clean
+distclean: clean recurse-distclean
        -$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean -g || :
-       rm -f config-host.mak config-host.h*
-       rm -f tests/tcg/config-*.mak
-       rm -f config-all-disas.mak config.status
-       rm -f tests/qemu-iotests/common.env
-       rm -f roms/seabios/config.mak roms/vgabios/config.mak
+       rm -f config-host.mak Makefile.prereqs
+       rm -f tests/tcg/*/config-target.mak tests/tcg/config-host.mak
+       rm -f config.status
+       rm -f roms/seabios/config.mak
        rm -f qemu-plugins-ld.symbols qemu-plugins-ld64.symbols
        rm -f *-config-target.h *-config-devices.mak *-config-devices.h
        rm -rf meson-private meson-logs meson-info compile_commands.json
        rm -f Makefile.ninja Makefile.mtest build.ninja.stamp meson.stamp
        rm -f config.log
        rm -f linux-headers/asm
-       rm -Rf .sdk
+       rm -Rf .sdk qemu-bundle
 
-find-src-path = find "$(SRC_PATH)/" -path "$(SRC_PATH)/meson" -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \)
+find-src-path = find "$(SRC_PATH)" -path "$(SRC_PATH)/meson" -prune -o \
+       -type l -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \)
 
 .PHONY: ctags
 ctags:
-       rm -f "$(SRC_PATH)/"tags
-       $(find-src-path) -exec ctags -f "$(SRC_PATH)/"tags --append {} +
+       $(call quiet-command,                   \
+               rm -f "$(SRC_PATH)/"tags,       \
+               "CTAGS", "Remove old tags")
+       $(call quiet-command, \
+               $(find-src-path) -exec ctags            \
+               -f "$(SRC_PATH)/"tags --append {} +,    \
+               "CTAGS", "Re-index $(SRC_PATH)")
+
+.PHONY: gtags
+gtags:
+       $(call quiet-command,                   \
+               rm -f "$(SRC_PATH)/"GTAGS;      \
+               rm -f "$(SRC_PATH)/"GRTAGS;     \
+               rm -f "$(SRC_PATH)/"GPATH,      \
+               "GTAGS", "Remove old $@ files")
+       $(call quiet-command,                           \
+               (cd $(SRC_PATH) &&                      \
+                $(find-src-path) -print | gtags -f -), \
+               "GTAGS", "Re-index $(SRC_PATH)")
 
 .PHONY: TAGS
 TAGS:
-       rm -f "$(SRC_PATH)/"TAGS
-       $(find-src-path) -exec etags -f "$(SRC_PATH)/"TAGS --append {} +
+       $(call quiet-command,                   \
+               rm -f "$(SRC_PATH)/"TAGS,       \
+               "TAGS", "Remove old $@")
+       $(call quiet-command,                           \
+               $(find-src-path) -exec etags            \
+               -f "$(SRC_PATH)/"TAGS --append {} +,    \
+               "TAGS", "Re-index $(SRC_PATH)")
 
 .PHONY: cscope
 cscope:
-       rm -f "$(SRC_PATH)"/cscope.*
-       $(find-src-path) -print | sed -e 's,^\./,,' > "$(SRC_PATH)/cscope.files"
-       cscope -b -i"$(SRC_PATH)/cscope.files" -f"$(SRC_PATH)"/cscope.out
+       $(call quiet-command,                   \
+               rm -f "$(SRC_PATH)/"cscope.* ,  \
+               "cscope", "Remove old $@ files")
+       $(call quiet-command,                                   \
+               ($(find-src-path) -print | sed -e 's,^\./,,'    \
+               > "$(SRC_PATH)/cscope.files"),                  \
+               "cscope", "Create file list")
+       $(call quiet-command,                           \
+               cscope -b -i"$(SRC_PATH)/cscope.files"  \
+               -f"$(SRC_PATH)"/cscope.out,             \
+               "cscope", "Re-index $(SRC_PATH)")
 
 # Needed by "meson install"
 export DESTDIR
 
+#include $(SRC_PATH)/tests/lcitool/Makefile.include
 #include $(SRC_PATH)/tests/docker/Makefile.include
 #include $(SRC_PATH)/tests/vm/Makefile.include
 
 print-help-run = printf "  %-30s - %s\\n" "$1" "$2"
 print-help = @$(call print-help-run,$1,$2)
 
+.PHONY: update-linux-vdso
+update-linux-vdso:
+       @for m in $(SRC_PATH)/linux-user/*/Makefile.vdso; do \
+         $(MAKE) $(SUBDIR_MAKEFLAGS) -C $$(dirname $$m) -f Makefile.vdso \
+               SRC_PATH=$(SRC_PATH) BUILD_DIR=$(BUILD_DIR); \
+       done
+
 .PHONY: help
 help:
        @echo  'Generic targets:'
        $(call print-help,all,Build all)
        $(call print-help,dir/file.o,Build specified target only)
        $(call print-help,install,Install QEMU, documentation and tools)
-       $(call print-help,ctags/TAGS,Generate tags file for editors)
+       $(call print-help,ctags/gtags/TAGS,Generate tags file for editors)
        $(call print-help,cscope,Generate cscope index)
        $(call print-help,sparse,Run sparse on the QEMU source)
        @echo  ''
-ifeq ($(CONFIG_PLUGIN),y)
+ifneq ($(filter contrib/plugins, $(SUBDIRS)),)
        @echo  'Plugin targets:'
        $(call print-help,plugins,Build the example TCG plugins)
        @echo  ''
@@ -292,21 +310,23 @@ endif
        $(call print-help,distclean,Remove all generated files)
        $(call print-help,dist,Build a distributable tarball)
        @echo  ''
+       @echo  'Linux-user targets:'
+       $(call print-help,update-linux-vdso,Build linux-user vdso images)
+       @echo  ''
        @echo  'Test targets:'
        $(call print-help,check,Run all tests (check-help for details))
        $(call print-help,bench,Run all benchmarks)
-       $(call print-help,docker,Help about targets running tests inside containers)
+       $(call print-help,lcitool-help,Help about targets for managing build environment manifests)
+       $(call print-help,docker-help,Help about targets running tests inside containers)
        $(call print-help,vm-help,Help about targets running tests inside VM)
        @echo  ''
        @echo  'Documentation targets:'
        $(call print-help,html man,Build documentation in specified format)
        @echo  ''
-ifdef CONFIG_WIN32
+ifneq ($(filter msi, $(ninja-targets)),)
        @echo  'Windows targets:'
        $(call print-help,installer,Build NSIS-based installer for QEMU)
-ifdef CONFIG_QGA_MSI
        $(call print-help,msi,Build MSI-based installer for qemu-ga)
-endif
        @echo  ''
 endif
        $(call print-help,$(MAKE) [targets],(quiet build, default))
diff --git a/VERSION b/VERSION
index 91ff57278e37ef9cecfeaea47f0d77966799af28..308c0cb541ab69249d1ae2e64ea32c75fa5d6001 100644 (file)
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-5.2.0
+8.2.2
index da3a0e69a2ed472c053a2b64113405f371e1b2e1..45a60e987df9e96310c16a5fbe44de0342d56a74 100644 (file)
@@ -30,7 +30,6 @@
 #include "qapi/qapi-visit-authz.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qobject.h"
-#include "qapi/qmp/qerror.h"
 #include "qapi/qobject-input-visitor.h"
 
 
index 88fa7769cb1b2ff0d4a5c2f383817979deef910a..42a1ec0ff6267d6f358cc81d10b715e933e8acdf 100644 (file)
@@ -6,4 +6,4 @@ authz_ss.add(files(
   'simple.c',
 ))
 
-authz_ss.add(when: ['CONFIG_AUTH_PAM', pam], if_true: files('pamacct.c'))
+authz_ss.add(when: pam, if_true: files('pamacct.c'))
index e62ebb36b7ee0eae577dc4657e2e4423a77f5f70..9c255dafb640c2a1f98d8551ec3938fce48b5553 100644 (file)
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # base.c
 qauthz_is_allowed(void *authz, const char *identity, bool allowed) "AuthZ %p check identity=%s allowed=%d"
diff --git a/block.c b/block.c
index f1cedac362577c2796b69e3972e9c74a89d55dc9..1d20040c1d69a837d505ea283ce8b4277f462f15 100644 (file)
--- a/block.c
+++ b/block.c
@@ -2,6 +2,7 @@
  * QEMU System Emulator block driver
  *
  * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2020 Virtuozzo International GmbH.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -26,6 +27,8 @@
 #include "block/trace.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/dirty-bitmap.h"
+#include "block/fuse.h"
 #include "block/nbd.h"
 #include "block/qdict.h"
 #include "qemu/error-report.h"
@@ -40,7 +43,6 @@
 #include "qapi/qobject-output-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "sysemu/block-backend.h"
-#include "sysemu/sysemu.h"
 #include "qemu/notify.h"
 #include "qemu/option.h"
 #include "qemu/coroutine.h"
 #include "qemu/timer.h"
 #include "qemu/cutils.h"
 #include "qemu/id.h"
+#include "qemu/range.h"
+#include "qemu/rcu.h"
 #include "block/coroutines.h"
 
 #ifdef CONFIG_BSD
 #include <sys/ioctl.h>
 #include <sys/queue.h>
-#ifndef __DragonFly__
+#if defined(HAVE_SYS_DISK_H)
 #include <sys/disk.h>
 #endif
 #endif
 
 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
 
+/* Protected by BQL */
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
 
+/* Protected by BQL */
 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
     QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
 
+/* Protected by BQL */
 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
     QLIST_HEAD_INITIALIZER(bdrv_drivers);
 
@@ -81,6 +88,27 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
                                            BdrvChildRole child_role,
                                            Error **errp);
 
+static bool bdrv_recurse_has_child(BlockDriverState *bs,
+                                   BlockDriverState *child);
+
+static void GRAPH_WRLOCK
+bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs);
+
+static void GRAPH_WRLOCK
+bdrv_remove_child(BdrvChild *child, Transaction *tran);
+
+static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+                               BlockReopenQueue *queue,
+                               Transaction *change_child_tran, Error **errp);
+static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
+
+static bool bdrv_backing_overridden(BlockDriverState *bs);
+
+static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
+                                    GHashTable *visited, Transaction *tran,
+                                    Error **errp);
+
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
@@ -108,8 +136,9 @@ size_t bdrv_opt_mem_align(BlockDriverState *bs)
 {
     if (!bs || !bs->drv) {
         /* page size or 4k (hdd sector size) should be on the safe side */
-        return MAX(4096, qemu_real_host_page_size);
+        return MAX(4096, qemu_real_host_page_size());
     }
+    IO_CODE();
 
     return bs->bl.opt_mem_alignment;
 }
@@ -118,8 +147,9 @@ size_t bdrv_min_mem_align(BlockDriverState *bs)
 {
     if (!bs || !bs->drv) {
         /* page size or 4k (hdd sector size) should be on the safe side */
-        return MAX(4096, qemu_real_host_page_size);
+        return MAX(4096, qemu_real_host_page_size());
     }
+    IO_CODE();
 
     return bs->bl.min_mem_alignment;
 }
@@ -216,7 +246,7 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
         /* Stripping the explicit protocol prefix may result in a protocol
          * prefix being (wrongly) detected (if the filename contains a colon) */
         if (path_has_protocol(filename)) {
-            QString *fat_filename;
+            GString *fat_filename;
 
             /* This means there is some colon before the first slash; therefore,
              * this cannot be an absolute path */
@@ -224,12 +254,13 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
 
             /* And we can thus fix the protocol detection issue by prefixing it
              * by "./" */
-            fat_filename = qstring_from_str("./");
-            qstring_append(fat_filename, filename);
+            fat_filename = g_string_new("./");
+            g_string_append(fat_filename, filename);
 
-            assert(!path_has_protocol(qstring_get_str(fat_filename)));
+            assert(!path_has_protocol(fat_filename->str));
 
-            qdict_put(options, "filename", fat_filename);
+            qdict_put(options, "filename",
+                      qstring_from_gstring(fat_filename));
         } else {
             /* If no protocol prefix was detected, we can use the shortened
              * filename as-is */
@@ -244,12 +275,16 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
  * image is inactivated. */
 bool bdrv_is_read_only(BlockDriverState *bs)
 {
-    return bs->read_only;
+    IO_CODE();
+    return !(bs->open_flags & BDRV_O_RDWR);
 }
 
-int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
-                           bool ignore_allow_rdw, Error **errp)
+static int GRAPH_RDLOCK
+bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
+                       bool ignore_allow_rdw, Error **errp)
 {
+    IO_CODE();
+
     /* Do not set read_only if copy_on_read is enabled */
     if (bs->copy_on_read && read_only) {
         error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
@@ -283,6 +318,7 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
                               Error **errp)
 {
     int ret = 0;
+    IO_CODE();
 
     if (!(bs->open_flags & BDRV_O_RDWR)) {
         return 0;
@@ -296,7 +332,6 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
         goto fail;
     }
 
-    bs->read_only = true;
     bs->open_flags &= ~BDRV_O_RDWR;
 
     return 0;
@@ -337,8 +372,9 @@ char *bdrv_get_full_backing_filename_from_filename(const char *backed,
  * setting @errp.  In all other cases, NULL will only be returned with
  * @errp set.
  */
-static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
-                                         const char *filename, Error **errp)
+static char * GRAPH_RDLOCK
+bdrv_make_absolute_filename(BlockDriverState *relative_to,
+                            const char *filename, Error **errp)
 {
     char *dir, *full_name;
 
@@ -360,12 +396,14 @@ static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
 
 char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
 {
+    GLOBAL_STATE_CODE();
     return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
 }
 
 void bdrv_register(BlockDriver *bdrv)
 {
     assert(bdrv->format_name);
+    GLOBAL_STATE_CODE();
     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
 }
 
@@ -374,19 +412,23 @@ BlockDriverState *bdrv_new(void)
     BlockDriverState *bs;
     int i;
 
+    GLOBAL_STATE_CODE();
+
     bs = g_new0(BlockDriverState, 1);
     QLIST_INIT(&bs->dirty_bitmaps);
     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
         QLIST_INIT(&bs->op_blockers[i]);
     }
-    notifier_with_return_list_init(&bs->before_write_notifiers);
-    qemu_co_mutex_init(&bs->reqs_lock);
+    qemu_mutex_init(&bs->reqs_lock);
     qemu_mutex_init(&bs->dirty_bitmap_mutex);
     bs->refcnt = 1;
     bs->aio_context = qemu_get_aio_context();
 
     qemu_co_queue_init(&bs->flush_queue);
 
+    qemu_co_mutex_init(&bs->bsc_modify_lock);
+    bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
+
     for (i = 0; i < bdrv_drain_all_count; i++) {
         bdrv_drained_begin(bs);
     }
@@ -399,6 +441,7 @@ BlockDriverState *bdrv_new(void)
 static BlockDriver *bdrv_do_find_format(const char *format_name)
 {
     BlockDriver *drv1;
+    GLOBAL_STATE_CODE();
 
     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
         if (!strcmp(drv1->format_name, format_name)) {
@@ -414,6 +457,8 @@ BlockDriver *bdrv_find_format(const char *format_name)
     BlockDriver *drv1;
     int i;
 
+    GLOBAL_STATE_CODE();
+
     drv1 = bdrv_do_find_format(format_name);
     if (drv1) {
         return drv1;
@@ -422,12 +467,18 @@ BlockDriver *bdrv_find_format(const char *format_name)
     /* The driver isn't registered, maybe we need to load a module */
     for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
         if (!strcmp(block_driver_modules[i].format_name, format_name)) {
-            block_module_load_one(block_driver_modules[i].library_name);
+            Error *local_err = NULL;
+            int rv = block_module_load(block_driver_modules[i].library_name,
+                                       &local_err);
+            if (rv > 0) {
+                return bdrv_do_find_format(format_name);
+            } else if (rv < 0) {
+                error_report_err(local_err);
+            }
             break;
         }
     }
-
-    return bdrv_do_find_format(format_name);
+    return NULL;
 }
 
 static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
@@ -463,6 +514,7 @@ static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
 
 int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
 {
+    GLOBAL_STATE_CODE();
     return bdrv_format_is_whitelisted(drv->format_name, read_only);
 }
 
@@ -479,62 +531,24 @@ typedef struct CreateCo {
     Error *err;
 } CreateCo;
 
-static void coroutine_fn bdrv_create_co_entry(void *opaque)
-{
-    Error *local_err = NULL;
-    int ret;
-
-    CreateCo *cco = opaque;
-    assert(cco->drv);
-
-    ret = cco->drv->bdrv_co_create_opts(cco->drv,
-                                        cco->filename, cco->opts, &local_err);
-    error_propagate(&cco->err, local_err);
-    cco->ret = ret;
-}
-
-int bdrv_create(BlockDriver *drv, const char* filename,
-                QemuOpts *opts, Error **errp)
+int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
+                                QemuOpts *opts, Error **errp)
 {
     int ret;
-
-    Coroutine *co;
-    CreateCo cco = {
-        .drv = drv,
-        .filename = g_strdup(filename),
-        .opts = opts,
-        .ret = NOT_DONE,
-        .err = NULL,
-    };
+    GLOBAL_STATE_CODE();
+    ERRP_GUARD();
 
     if (!drv->bdrv_co_create_opts) {
-        error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
-        ret = -ENOTSUP;
-        goto out;
-    }
-
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_create_co_entry(&cco);
-    } else {
-        co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
-        qemu_coroutine_enter(co);
-        while (cco.ret == NOT_DONE) {
-            aio_poll(qemu_get_aio_context(), true);
-        }
+        error_setg(errp, "Driver '%s' does not support image creation",
+                   drv->format_name);
+        return -ENOTSUP;
     }
 
-    ret = cco.ret;
-    if (ret < 0) {
-        if (cco.err) {
-            error_propagate(errp, cco.err);
-        } else {
-            error_setg_errno(errp, -ret, "Could not create image");
-        }
+    ret = drv->bdrv_co_create_opts(drv, filename, opts, errp);
+    if (ret < 0 && !*errp) {
+        error_setg_errno(errp, -ret, "Could not create image");
     }
 
-out:
-    g_free(cco.filename);
     return ret;
 }
 
@@ -545,21 +559,24 @@ out:
  * On success, return @blk's actual length.
  * Otherwise, return -errno.
  */
-static int64_t create_file_fallback_truncate(BlockBackend *blk,
-                                             int64_t minimum_size, Error **errp)
+static int64_t coroutine_fn GRAPH_UNLOCKED
+create_file_fallback_truncate(BlockBackend *blk, int64_t minimum_size,
+                              Error **errp)
 {
     Error *local_err = NULL;
     int64_t size;
     int ret;
 
-    ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
-                       &local_err);
+    GLOBAL_STATE_CODE();
+
+    ret = blk_co_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
+                          &local_err);
     if (ret < 0 && ret != -ENOTSUP) {
         error_propagate(errp, local_err);
         return ret;
     }
 
-    size = blk_getlength(blk);
+    size = blk_co_getlength(blk);
     if (size < 0) {
         error_free(local_err);
         error_setg_errno(errp, -size,
@@ -583,16 +600,19 @@ static int64_t create_file_fallback_truncate(BlockBackend *blk,
  * Helper function for bdrv_create_file_fallback(): Zero the first
  * sector to remove any potentially pre-existing image header.
  */
-static int create_file_fallback_zero_first_sector(BlockBackend *blk,
-                                                  int64_t current_size,
-                                                  Error **errp)
+static int coroutine_fn
+create_file_fallback_zero_first_sector(BlockBackend *blk,
+                                       int64_t current_size,
+                                       Error **errp)
 {
     int64_t bytes_to_clear;
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
     if (bytes_to_clear) {
-        ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
+        ret = blk_co_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
         if (ret < 0) {
             error_setg_errno(errp, -ret,
                              "Failed to clear the new image's first sector");
@@ -621,6 +641,8 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
     Error *local_err = NULL;
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
     buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
     prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
@@ -640,11 +662,13 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
     options = qdict_new();
     qdict_put_str(options, "driver", drv->format_name);
 
-    blk = blk_new_open(filename, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_RESIZE, errp);
+    blk = blk_co_new_open(filename, NULL, options,
+                          BDRV_O_RDWR | BDRV_O_RESIZE, errp);
     if (!blk) {
-        error_prepend(errp, "Protocol driver '%s' does not support image "
-                      "creation, and opening the image failed: ",
+        error_prepend(errp, "Protocol driver '%s' does not support creating "
+                      "new images, so an existing image must be selected as "
+                      "the target; however, opening the given target as an "
+                      "existing image failed: ",
                       drv->format_name);
         return -EINVAL;
     }
@@ -662,20 +686,57 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
 
     ret = 0;
 out:
-    blk_unref(blk);
+    blk_co_unref(blk);
     return ret;
 }
 
-int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
+int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
+                                     Error **errp)
 {
+    QemuOpts *protocol_opts;
     BlockDriver *drv;
+    QDict *qdict;
+    int ret;
+
+    GLOBAL_STATE_CODE();
 
     drv = bdrv_find_protocol(filename, true, errp);
     if (drv == NULL) {
         return -ENOENT;
     }
 
-    return bdrv_create(drv, filename, opts, errp);
+    if (!drv->create_opts) {
+        error_setg(errp, "Driver '%s' does not support image creation",
+                   drv->format_name);
+        return -ENOTSUP;
+    }
+
+    /*
+     * 'opts' contains a QemuOptsList with a combination of format and protocol
+     * default values.
+     *
+     * The format properly removes its options, but the default values remain
+     * in 'opts->list'.  So if the protocol has options with the same name
+     * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values
+     * of the format, since for overlapping options, the format wins.
+     *
+     * To avoid this issue, lets convert QemuOpts to QDict, in this way we take
+     * only the set options, and then convert it back to QemuOpts, using the
+     * create_opts of the protocol. So the new QemuOpts, will contain only the
+     * protocol defaults.
+     */
+    qdict = qemu_opts_to_qdict(opts, NULL);
+    protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp);
+    if (protocol_opts == NULL) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = bdrv_co_create(drv, filename, protocol_opts, errp);
+out:
+    qemu_opts_del(protocol_opts);
+    qobject_unref(qdict);
+    return ret;
 }
 
 int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
@@ -683,7 +744,9 @@ int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
     Error *local_err = NULL;
     int ret;
 
+    IO_CODE();
     assert(bs != NULL);
+    assert_bdrv_graph_readable();
 
     if (!bs->drv) {
         error_setg(errp, "Block node '%s' is not opened", bs->filename);
@@ -704,6 +767,29 @@ int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
     return ret;
 }
 
+void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs)
+{
+    Error *local_err = NULL;
+    int ret;
+    IO_CODE();
+
+    if (!bs) {
+        return;
+    }
+
+    ret = bdrv_co_delete_file(bs, &local_err);
+    /*
+     * ENOTSUP will happen if the block driver doesn't support
+     * the 'bdrv_co_delete_file' interface. This is a predictable
+     * scenario and shouldn't be reported back to the user.
+     */
+    if (ret == -ENOTSUP) {
+        error_free(local_err);
+    } else if (ret < 0) {
+        error_report_err(local_err);
+    }
+}
+
 /**
  * Try to get @bs's logical and physical block size.
  * On success, store them in @bsz struct and return 0.
@@ -714,6 +800,7 @@ int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
     BlockDriver *drv = bs->drv;
     BlockDriverState *filtered = bdrv_filter_bs(bs);
+    GLOBAL_STATE_CODE();
 
     if (drv && drv->bdrv_probe_blocksizes) {
         return drv->bdrv_probe_blocksizes(bs, bsz);
@@ -733,11 +820,17 @@ int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 {
     BlockDriver *drv = bs->drv;
-    BlockDriverState *filtered = bdrv_filter_bs(bs);
+    BlockDriverState *filtered;
+
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
     if (drv && drv->bdrv_probe_geometry) {
         return drv->bdrv_probe_geometry(bs, geo);
-    } else if (filtered) {
+    }
+
+    filtered = bdrv_filter_bs(bs);
+    if (filtered) {
         return bdrv_probe_geometry(filtered, geo);
     }
 
@@ -746,38 +839,42 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 
 /*
  * Create a uniquely-named empty temporary file.
- * Return 0 upon success, otherwise a negative errno value.
+ * Return the actual file name used upon success, otherwise NULL.
+ * This string should be freed with g_free() when not needed any longer.
+ *
+ * Note: creating a temporary file for the caller to (re)open is
+ * inherently racy. Use g_file_open_tmp() instead whenever practical.
  */
-int get_tmp_filename(char *filename, int size)
+char *create_tmp_file(Error **errp)
 {
-#ifdef _WIN32
-    char temp_dir[MAX_PATH];
-    /* GetTempFileName requires that its output buffer (4th param)
-       have length MAX_PATH or greater.  */
-    assert(size >= MAX_PATH);
-    return (GetTempPath(MAX_PATH, temp_dir)
-            && GetTempFileName(temp_dir, "qem", 0, filename)
-            ? 0 : -GetLastError());
-#else
     int fd;
     const char *tmpdir;
-    tmpdir = getenv("TMPDIR");
-    if (!tmpdir) {
+    g_autofree char *filename = NULL;
+
+    tmpdir = g_get_tmp_dir();
+#ifndef _WIN32
+    /*
+     * See commit 69bef79 ("block: use /var/tmp instead of /tmp for -snapshot")
+     *
+     * This function is used to create temporary disk images (like -snapshot),
+     * so the files can become very large. /tmp is often a tmpfs where as
+     * /var/tmp is usually on a disk, so more appropriate for disk images.
+     */
+    if (!g_strcmp0(tmpdir, "/tmp")) {
         tmpdir = "/var/tmp";
     }
-    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
-        return -EOVERFLOW;
-    }
-    fd = mkstemp(filename);
+#endif
+
+    filename = g_strdup_printf("%s/vl.XXXXXX", tmpdir);
+    fd = g_mkstemp(filename);
     if (fd < 0) {
-        return -errno;
-    }
-    if (close(fd) != 0) {
-        unlink(filename);
-        return -errno;
+        error_setg_errno(errp, errno, "Could not open temporary file '%s'",
+                         filename);
+        return NULL;
     }
-    return 0;
-#endif
+    close(fd);
+
+    return g_steal_pointer(&filename);
 }
 
 /*
@@ -788,6 +885,7 @@ static BlockDriver *find_hdev_driver(const char *filename)
 {
     int score_max = 0, score;
     BlockDriver *drv = NULL, *d;
+    GLOBAL_STATE_CODE();
 
     QLIST_FOREACH(d, &bdrv_drivers, list) {
         if (d->bdrv_probe_device) {
@@ -805,6 +903,7 @@ static BlockDriver *find_hdev_driver(const char *filename)
 static BlockDriver *bdrv_do_find_protocol(const char *protocol)
 {
     BlockDriver *drv1;
+    GLOBAL_STATE_CODE();
 
     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
         if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
@@ -825,6 +924,7 @@ BlockDriver *bdrv_find_protocol(const char *filename,
     const char *p;
     int i;
 
+    GLOBAL_STATE_CODE();
     /* TODO Drivers without bdrv_file_open must be specified explicitly */
 
     /*
@@ -859,12 +959,16 @@ BlockDriver *bdrv_find_protocol(const char *filename,
     for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
         if (block_driver_modules[i].protocol_name &&
             !strcmp(block_driver_modules[i].protocol_name, protocol)) {
-            block_module_load_one(block_driver_modules[i].library_name);
+            int rv = block_module_load(block_driver_modules[i].library_name, errp);
+            if (rv > 0) {
+                drv1 = bdrv_do_find_protocol(protocol);
+            } else if (rv < 0) {
+                return NULL;
+            }
             break;
         }
     }
 
-    drv1 = bdrv_do_find_protocol(protocol);
     if (!drv1) {
         error_setg(errp, "Unknown protocol '%s'", protocol);
     }
@@ -890,6 +994,7 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
 {
     int score_max = 0, score;
     BlockDriver *drv = NULL, *d;
+    IO_CODE();
 
     QLIST_FOREACH(d, &bdrv_drivers, list) {
         if (d->bdrv_probe) {
@@ -911,13 +1016,15 @@ static int find_image_format(BlockBackend *file, const char *filename,
     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
     int ret = 0;
 
+    GLOBAL_STATE_CODE();
+
     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
     if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
         *pdrv = &bdrv_raw;
         return ret;
     }
 
-    ret = blk_pread(file, 0, buf, sizeof(buf));
+    ret = blk_pread(file, 0, sizeof(buf), buf, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not read image for determining its "
                          "format");
@@ -925,35 +1032,40 @@ static int find_image_format(BlockBackend *file, const char *filename,
         return ret;
     }
 
-    drv = bdrv_probe_all(buf, ret, filename);
+    drv = bdrv_probe_all(buf, sizeof(buf), filename);
     if (!drv) {
         error_setg(errp, "Could not determine image format: No compatible "
                    "driver found");
-        ret = -ENOENT;
+        *pdrv = NULL;
+        return -ENOENT;
     }
+
     *pdrv = drv;
-    return ret;
+    return 0;
 }
 
 /**
  * Set the current 'total_sectors' value
  * Return 0 on success, -errno on error.
  */
-int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
+int coroutine_fn bdrv_co_refresh_total_sectors(BlockDriverState *bs,
+                                               int64_t hint)
 {
     BlockDriver *drv = bs->drv;
+    IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return -ENOMEDIUM;
     }
 
-    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
+    /* Do not attempt drv->bdrv_co_getlength() on scsi-generic devices */
     if (bdrv_is_sg(bs))
         return 0;
 
     /* query actual device if possible, otherwise just trust the hint */
-    if (drv->bdrv_getlength) {
-        int64_t length = drv->bdrv_getlength(bs);
+    if (drv->bdrv_co_getlength) {
+        int64_t length = drv->bdrv_co_getlength(bs);
         if (length < 0) {
             return length;
         }
@@ -961,6 +1073,11 @@ int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
     }
 
     bs->total_sectors = hint;
+
+    if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) {
+        return -EFBIG;
+    }
+
     return 0;
 }
 
@@ -971,6 +1088,7 @@ int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
                               QDict *old_options)
 {
+    GLOBAL_STATE_CODE();
     if (bs->drv && bs->drv->bdrv_join_options) {
         bs->drv->bdrv_join_options(options, old_options);
     } else {
@@ -987,6 +1105,7 @@ static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
     BlockdevDetectZeroesOptions detect_zeroes =
         qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
                         BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
+    GLOBAL_STATE_CODE();
     g_free(value);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -1077,47 +1196,41 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
 static char *bdrv_child_get_parent_desc(BdrvChild *c)
 {
     BlockDriverState *parent = c->opaque;
-    return g_strdup(bdrv_get_device_or_node_name(parent));
+    return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
 }
 
-static void bdrv_child_cb_drained_begin(BdrvChild *child)
+static void GRAPH_RDLOCK bdrv_child_cb_drained_begin(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
-    bdrv_do_drained_begin_quiesce(bs, NULL, false);
+    bdrv_do_drained_begin_quiesce(bs, NULL);
 }
 
-static bool bdrv_child_cb_drained_poll(BdrvChild *child)
+static bool GRAPH_RDLOCK bdrv_child_cb_drained_poll(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
-    return bdrv_drain_poll(bs, false, NULL, false);
+    return bdrv_drain_poll(bs, NULL, false);
 }
 
-static void bdrv_child_cb_drained_end(BdrvChild *child,
-                                      int *drained_end_counter)
+static void GRAPH_RDLOCK bdrv_child_cb_drained_end(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
-    bdrv_drained_end_no_poll(bs, drained_end_counter);
+    bdrv_drained_end(bs);
 }
 
 static int bdrv_child_cb_inactivate(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
+    GLOBAL_STATE_CODE();
     assert(bs->open_flags & BDRV_O_INACTIVE);
     return 0;
 }
 
-static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
-                                          GSList **ignore, Error **errp)
-{
-    BlockDriverState *bs = child->opaque;
-    return bdrv_can_set_aio_context(bs, ctx, ignore, errp);
-}
-
-static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx,
-                                      GSList **ignore)
+static bool bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx,
+                                         GHashTable *visited, Transaction *tran,
+                                         Error **errp)
 {
     BlockDriverState *bs = child->opaque;
-    return bdrv_set_aio_context_ignore(bs, ctx, ignore);
+    return bdrv_change_aio_context(bs, ctx, visited, tran, errp);
 }
 
 /*
@@ -1128,6 +1241,7 @@ static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx,
 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
                                        int parent_flags, QDict *parent_options)
 {
+    GLOBAL_STATE_CODE();
     *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
 
     /* For temporary files, unconditional cache=unsafe is fine */
@@ -1143,11 +1257,12 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
     *child_flags &= ~BDRV_O_NATIVE_AIO;
 }
 
-static void bdrv_backing_attach(BdrvChild *c)
+static void GRAPH_WRLOCK bdrv_backing_attach(BdrvChild *c)
 {
     BlockDriverState *parent = c->opaque;
     BlockDriverState *backing_hd = c->bs;
 
+    GLOBAL_STATE_CODE();
     assert(!parent->backing_blocker);
     error_setg(&parent->backing_blocker,
                "node is used as backing hd of '%s'",
@@ -1186,6 +1301,7 @@ static void bdrv_backing_detach(BdrvChild *c)
 {
     BlockDriverState *parent = c->opaque;
 
+    GLOBAL_STATE_CODE();
     assert(parent->backing_blocker);
     bdrv_op_unblock_all(c->bs, parent->backing_blocker);
     error_free(parent->backing_blocker);
@@ -1198,6 +1314,7 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
     BlockDriverState *parent = c->opaque;
     bool read_only = bdrv_is_read_only(parent);
     int ret;
+    GLOBAL_STATE_CODE();
 
     if (read_only) {
         ret = bdrv_reopen_set_read_only(parent, false, errp);
@@ -1229,6 +1346,7 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
                                    int parent_flags, QDict *parent_options)
 {
     int flags = parent_flags;
+    GLOBAL_STATE_CODE();
 
     /*
      * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
@@ -1300,18 +1418,49 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
     *child_flags = flags;
 }
 
-static void bdrv_child_cb_attach(BdrvChild *child)
+static void GRAPH_WRLOCK bdrv_child_cb_attach(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
 
-    if (child->role & BDRV_CHILD_COW) {
+    assert_bdrv_graph_writable();
+    QLIST_INSERT_HEAD(&bs->children, child, next);
+    if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) {
+        /*
+         * Here we handle filters and block/raw-format.c when it behave like
+         * filter. They generally have a single PRIMARY child, which is also the
+         * FILTERED child, and that they may have multiple more children, which
+         * are neither PRIMARY nor FILTERED. And never we have a COW child here.
+         * So bs->file will be the PRIMARY child, unless the PRIMARY child goes
+         * into bs->backing on exceptional cases; and bs->backing will be
+         * nothing else.
+         */
+        assert(!(child->role & BDRV_CHILD_COW));
+        if (child->role & BDRV_CHILD_PRIMARY) {
+            assert(child->role & BDRV_CHILD_FILTERED);
+            assert(!bs->backing);
+            assert(!bs->file);
+
+            if (bs->drv->filtered_child_is_backing) {
+                bs->backing = child;
+            } else {
+                bs->file = child;
+            }
+        } else {
+            assert(!(child->role & BDRV_CHILD_FILTERED));
+        }
+    } else if (child->role & BDRV_CHILD_COW) {
+        assert(bs->drv->supports_backing);
+        assert(!(child->role & BDRV_CHILD_PRIMARY));
+        assert(!bs->backing);
+        bs->backing = child;
         bdrv_backing_attach(child);
+    } else if (child->role & BDRV_CHILD_PRIMARY) {
+        assert(!bs->file);
+        bs->file = child;
     }
-
-    bdrv_apply_subtree_drain(child, bs);
 }
 
-static void bdrv_child_cb_detach(BdrvChild *child)
+static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
 
@@ -1319,7 +1468,14 @@ static void bdrv_child_cb_detach(BdrvChild *child)
         bdrv_backing_detach(child);
     }
 
-    bdrv_unapply_subtree_drain(child, bs);
+    assert_bdrv_graph_writable();
+    QLIST_REMOVE(child, next);
+    if (child == bs->backing) {
+        assert(child != bs->file);
+        bs->backing = NULL;
+    } else if (child == bs->file) {
+        bs->file = NULL;
+    }
 }
 
 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
@@ -1331,6 +1487,14 @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
     return 0;
 }
 
+AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
+{
+    BlockDriverState *bs = c->opaque;
+    IO_CODE();
+
+    return bdrv_get_aio_context(bs);
+}
+
 const BdrvChildClass child_of_bds = {
     .parent_is_bds   = true,
     .get_parent_desc = bdrv_child_get_parent_desc,
@@ -1341,14 +1505,21 @@ const BdrvChildClass child_of_bds = {
     .attach          = bdrv_child_cb_attach,
     .detach          = bdrv_child_cb_detach,
     .inactivate      = bdrv_child_cb_inactivate,
-    .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
-    .set_aio_ctx     = bdrv_child_cb_set_aio_ctx,
+    .change_aio_ctx  = bdrv_child_cb_change_aio_ctx,
     .update_filename = bdrv_child_cb_update_filename,
+    .get_parent_aio_context = child_of_bds_get_parent_aio_context,
 };
 
+AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
+{
+    IO_CODE();
+    return c->klass->get_parent_aio_context(c);
+}
+
 static int bdrv_open_flags(BlockDriverState *bs, int flags)
 {
     int open_flags = flags;
+    GLOBAL_STATE_CODE();
 
     /*
      * Clear flags that are internal to the block layer before opening the
@@ -1361,6 +1532,8 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
 
 static void update_flags_from_options(int *flags, QemuOpts *opts)
 {
+    GLOBAL_STATE_CODE();
+
     *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
 
     if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
@@ -1382,6 +1555,7 @@ static void update_flags_from_options(int *flags, QemuOpts *opts)
 
 static void update_options_from_flags(QDict *options, int flags)
 {
+    GLOBAL_STATE_CODE();
     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
         qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
     }
@@ -1403,6 +1577,7 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
                                   Error **errp)
 {
     char *gen_node_name = NULL;
+    GLOBAL_STATE_CODE();
 
     if (!node_name) {
         node_name = gen_node_name = id_generate(ID_BLOCK);
@@ -1411,7 +1586,7 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
          * Check for empty string or invalid characters, but not if it is
          * generated (generated names use characters not available to the user)
          */
-        error_setg(errp, "Invalid node name");
+        error_setg(errp, "Invalid node-name: '%s'", node_name);
         return;
     }
 
@@ -1424,7 +1599,7 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
 
     /* takes care of avoiding duplicates node names */
     if (bdrv_find_node(node_name)) {
-        error_setg(errp, "Duplicate node name");
+        error_setg(errp, "Duplicate nodes with node-name='%s'", node_name);
         goto out;
     }
 
@@ -1441,12 +1616,19 @@ out:
     g_free(gen_node_name);
 }
 
-static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
-                            const char *node_name, QDict *options,
-                            int open_flags, Error **errp)
+/*
+ * The caller must always hold @bs AioContext lock, because this function calls
+ * bdrv_refresh_total_sectors() which polls when called from non-coroutine
+ * context.
+ */
+static int no_coroutine_fn GRAPH_UNLOCKED
+bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
+                 QDict *options, int open_flags, Error **errp)
 {
+    AioContext *ctx;
     Error *local_err = NULL;
     int i, ret;
+    GLOBAL_STATE_CODE();
 
     bdrv_assign_node_name(bs, node_name, &local_err);
     if (local_err) {
@@ -1455,7 +1637,6 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
     }
 
     bs->drv = drv;
-    bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
     bs->opaque = g_malloc0(drv->instance_size);
 
     if (drv->bdrv_file_open) {
@@ -1478,13 +1659,36 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
         goto open_failed;
     }
 
-    ret = refresh_total_sectors(bs, bs->total_sectors);
+    assert(!(bs->supported_read_flags & ~BDRV_REQ_MASK));
+    assert(!(bs->supported_write_flags & ~BDRV_REQ_MASK));
+
+    /*
+     * Always allow the BDRV_REQ_REGISTERED_BUF optimization hint. This saves
+     * drivers that pass read/write requests through to a child the trouble of
+     * declaring support explicitly.
+     *
+     * Drivers must not propagate this flag accidentally when they initiate I/O
+     * to a bounce buffer. That case should be rare though.
+     */
+    bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
+    bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;
+
+    /* Get the context after .bdrv_open, it can change the context */
+    ctx = bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
+
+    ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not refresh total sector count");
+        aio_context_release(ctx);
         return ret;
     }
 
-    bdrv_refresh_limits(bs, &local_err);
+    bdrv_graph_rdlock_main_loop();
+    bdrv_refresh_limits(bs, NULL, &local_err);
+    bdrv_graph_rdunlock_main_loop();
+    aio_context_release(ctx);
+
     if (local_err) {
         error_propagate(errp, local_err);
         return -EINVAL;
@@ -1495,33 +1699,49 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
     assert(is_power_of_2(bs->bl.request_alignment));
 
     for (i = 0; i < bs->quiesce_counter; i++) {
-        if (drv->bdrv_co_drain_begin) {
-            drv->bdrv_co_drain_begin(bs);
+        if (drv->bdrv_drain_begin) {
+            drv->bdrv_drain_begin(bs);
         }
     }
 
     return 0;
 open_failed:
     bs->drv = NULL;
+
+    bdrv_graph_wrlock(NULL);
     if (bs->file != NULL) {
         bdrv_unref_child(bs, bs->file);
-        bs->file = NULL;
+        assert(!bs->file);
     }
+    bdrv_graph_wrunlock(NULL);
+
     g_free(bs->opaque);
     bs->opaque = NULL;
     return ret;
 }
 
-BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
-                                       int flags, Error **errp)
+/*
+ * Create and open a block node.
+ *
+ * @options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use qobject_ref() before calling bdrv_open.
+ */
+BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
+                                            const char *node_name,
+                                            QDict *options, int flags,
+                                            Error **errp)
 {
     BlockDriverState *bs;
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     bs = bdrv_new();
     bs->open_flags = flags;
-    bs->explicit_options = qdict_new();
-    bs->options = qdict_new();
+    bs->options = options ?: qdict_new();
+    bs->explicit_options = qdict_clone_shallow(bs->options);
     bs->opaque = NULL;
 
     update_options_from_flags(bs->options, flags);
@@ -1539,6 +1759,14 @@ BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
     return bs;
 }
 
+/* Create and open a block node. */
+BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
+                                       int flags, Error **errp)
+{
+    GLOBAL_STATE_CODE();
+    return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
+}
+
 QemuOptsList bdrv_runtime_opts = {
     .name = "bdrv_common",
     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
@@ -1626,9 +1854,14 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
     QemuOpts *opts;
     BlockDriver *drv;
     Error *local_err = NULL;
+    bool ro;
 
+    GLOBAL_STATE_CODE();
+
+    bdrv_graph_rdlock_main_loop();
     assert(bs->file == NULL);
     assert(options != NULL && bs->options != options);
+    bdrv_graph_rdunlock_main_loop();
 
     opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
@@ -1653,7 +1886,10 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
     }
 
     if (file != NULL) {
+        bdrv_graph_rdlock_main_loop();
         bdrv_refresh_filename(blk_bs(file));
+        bdrv_graph_rdunlock_main_loop();
+
         filename = blk_bs(file)->filename;
     } else {
         /*
@@ -1676,17 +1912,19 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
                            drv->format_name);
 
-    bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
+    ro = bdrv_is_read_only(bs);
 
-    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
-        if (!bs->read_only && bdrv_is_whitelisted(drv, true)) {
+    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
+        if (!ro && bdrv_is_whitelisted(drv, true)) {
+            bdrv_graph_rdlock_main_loop();
             ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
+            bdrv_graph_rdunlock_main_loop();
         } else {
             ret = -ENOTSUP;
         }
         if (ret < 0) {
             error_setg(errp,
-                       !bs->read_only && bdrv_is_whitelisted(drv, true)
+                       !ro && bdrv_is_whitelisted(drv, true)
                        ? "Driver '%s' can only be used for read-only devices"
                        : "Driver '%s' is not whitelisted",
                        drv->format_name);
@@ -1698,7 +1936,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
     assert(qatomic_read(&bs->copy_on_read) == 0);
 
     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
-        if (!bs->read_only) {
+        if (!ro) {
             bdrv_enable_copy_on_read(bs);
         } else {
             error_setg(errp, "Can't use copy-on-read on read-only device");
@@ -1754,6 +1992,7 @@ static QDict *parse_json_filename(const char *filename, Error **errp)
     QObject *options_obj;
     QDict *options;
     int ret;
+    GLOBAL_STATE_CODE();
 
     ret = strstart(filename, "json:", &filename);
     assert(ret);
@@ -1781,6 +2020,7 @@ static void parse_json_protocol(QDict *options, const char **pfilename,
 {
     QDict *json_options;
     Error *local_err = NULL;
+    GLOBAL_STATE_CODE();
 
     /* Parse json: pseudo-protocol */
     if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
@@ -1815,6 +2055,8 @@ static int bdrv_fill_options(QDict **options, const char *filename,
     BlockDriver *drv = NULL;
     Error *local_err = NULL;
 
+    GLOBAL_STATE_CODE();
+
     /*
      * Caution: while qdict_get_try_str() is fine, getting non-string
      * types would require more care.  When @options come from
@@ -1892,16 +2134,8 @@ static int bdrv_fill_options(QDict **options, const char *filename,
     return 0;
 }
 
-static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
-                                 uint64_t perm, uint64_t shared,
-                                 GSList *ignore_children,
-                                 bool *tighten_restrictions, Error **errp);
-static void bdrv_child_abort_perm_update(BdrvChild *c);
-static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
-
 typedef struct BlockReopenQueueEntry {
      bool prepared;
-     bool perms_checked;
      BDRVReopenState state;
      QTAILQ_ENTRY(BlockReopenQueueEntry) entry;
 } BlockReopenQueueEntry;
@@ -1943,198 +2177,437 @@ static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
  */
 bool bdrv_is_writable(BlockDriverState *bs)
 {
+    IO_CODE();
     return bdrv_is_writable_after_reopen(bs, NULL);
 }
 
-static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
-                            BdrvChild *c, BdrvChildRole role,
-                            BlockReopenQueue *reopen_queue,
-                            uint64_t parent_perm, uint64_t parent_shared,
-                            uint64_t *nperm, uint64_t *nshared)
+static char *bdrv_child_user_desc(BdrvChild *c)
 {
-    assert(bs->drv && bs->drv->bdrv_child_perm);
-    bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
-                             parent_perm, parent_shared,
-                             nperm, nshared);
-    /* TODO Take force_share from reopen_queue */
-    if (child_bs && child_bs->force_share) {
-        *nshared = BLK_PERM_ALL;
-    }
+    GLOBAL_STATE_CODE();
+    return c->klass->get_parent_desc(c);
 }
 
 /*
- * Check whether permissions on this node can be changed in a way that
- * @cumulative_perms and @cumulative_shared_perms are the new cumulative
- * permissions of all its parents. This involves checking whether all necessary
- * permission changes to child nodes can be performed.
- *
- * Will set *tighten_restrictions to true if and only if new permissions have to
- * be taken or currently shared permissions are to be unshared.  Otherwise,
- * errors are not fatal as long as the caller accepts that the restrictions
- * remain tighter than they need to be.  The caller still has to abort the
- * transaction.
- * @tighten_restrictions cannot be used together with @q: When reopening, we may
- * encounter fatal errors even though no restrictions are to be tightened.  For
- * example, changing a node from RW to RO will fail if the WRITE permission is
- * to be kept.
- *
- * A call to this function must always be followed by a call to bdrv_set_perm()
- * or bdrv_abort_perm_update().
+ * Check that @a allows everything that @b needs. @a and @b must reference same
+ * child node.
  */
-static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
-                           uint64_t cumulative_perms,
-                           uint64_t cumulative_shared_perms,
-                           GSList *ignore_children,
-                           bool *tighten_restrictions, Error **errp)
+static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
 {
-    BlockDriver *drv = bs->drv;
-    BdrvChild *c;
-    int ret;
+    const char *child_bs_name;
+    g_autofree char *a_user = NULL;
+    g_autofree char *b_user = NULL;
+    g_autofree char *perms = NULL;
 
-    assert(!q || !tighten_restrictions);
+    assert(a->bs);
+    assert(a->bs == b->bs);
+    GLOBAL_STATE_CODE();
 
-    if (tighten_restrictions) {
-        uint64_t current_perms, current_shared;
-        uint64_t added_perms, removed_shared_perms;
-
-        bdrv_get_cumulative_perm(bs, &current_perms, &current_shared);
+    if ((b->perm & a->shared_perm) == b->perm) {
+        return true;
+    }
 
-        added_perms = cumulative_perms & ~current_perms;
-        removed_shared_perms = current_shared & ~cumulative_shared_perms;
+    child_bs_name = bdrv_get_node_name(b->bs);
+    a_user = bdrv_child_user_desc(a);
+    b_user = bdrv_child_user_desc(b);
+    perms = bdrv_perm_names(b->perm & ~a->shared_perm);
 
-        *tighten_restrictions = added_perms || removed_shared_perms;
-    }
+    error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
+               "both required by %s (uses node '%s' as '%s' child) and "
+               "unshared by %s (uses node '%s' as '%s' child).",
+               child_bs_name, perms,
+               b_user, child_bs_name, b->name,
+               a_user, child_bs_name, a->name);
 
-    /* Write permissions never work with read-only images */
-    if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
-        !bdrv_is_writable_after_reopen(bs, q))
-    {
-        if (!bdrv_is_writable_after_reopen(bs, NULL)) {
-            error_setg(errp, "Block node is read-only");
-        } else {
-            uint64_t current_perms, current_shared;
-            bdrv_get_cumulative_perm(bs, &current_perms, &current_shared);
-            if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
-                error_setg(errp, "Cannot make block node read-only, there is "
-                           "a writer on it");
-            } else {
-                error_setg(errp, "Cannot make block node read-only and create "
-                           "a writer on it");
-            }
-        }
+    return false;
+}
 
-        return -EPERM;
-    }
+static bool GRAPH_RDLOCK
+bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
+{
+    BdrvChild *a, *b;
+    GLOBAL_STATE_CODE();
 
     /*
-     * Unaligned requests will automatically be aligned to bl.request_alignment
-     * and without RESIZE we can't extend requests to write to space beyond the
-     * end of the image, so it's required that the image size is aligned.
+     * During the loop we'll look at each pair twice. That's correct because
+     * bdrv_a_allow_b() is asymmetric and we should check each pair in both
+     * directions.
      */
-    if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
-        !(cumulative_perms & BLK_PERM_RESIZE))
-    {
-        if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) {
-            error_setg(errp, "Cannot get 'write' permission without 'resize': "
-                             "Image size is not a multiple of request "
-                             "alignment");
-            return -EPERM;
-        }
-    }
-
-    /* Check this node */
-    if (!drv) {
-        return 0;
-    }
-
-    if (drv->bdrv_check_perm) {
-        return drv->bdrv_check_perm(bs, cumulative_perms,
-                                    cumulative_shared_perms, errp);
-    }
+    QLIST_FOREACH(a, &bs->parents, next_parent) {
+        QLIST_FOREACH(b, &bs->parents, next_parent) {
+            if (a == b) {
+                continue;
+            }
 
-    /* Drivers that never have children can omit .bdrv_child_perm() */
-    if (!drv->bdrv_child_perm) {
-        assert(QLIST_EMPTY(&bs->children));
-        return 0;
+            if (!bdrv_a_allow_b(a, b, errp)) {
+                return true;
+            }
+        }
     }
 
-    /* Check all children */
-    QLIST_FOREACH(c, &bs->children, next) {
-        uint64_t cur_perm, cur_shared;
-        bool child_tighten_restr;
+    return false;
+}
 
-        bdrv_child_perm(bs, c->bs, c, c->role, q,
-                        cumulative_perms, cumulative_shared_perms,
-                        &cur_perm, &cur_shared);
-        ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children,
-                                    tighten_restrictions ? &child_tighten_restr
-                                                         : NULL,
-                                    errp);
-        if (tighten_restrictions) {
-            *tighten_restrictions |= child_tighten_restr;
-        }
-        if (ret < 0) {
-            return ret;
-        }
+static void GRAPH_RDLOCK
+bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
+                BdrvChild *c, BdrvChildRole role,
+                BlockReopenQueue *reopen_queue,
+                uint64_t parent_perm, uint64_t parent_shared,
+                uint64_t *nperm, uint64_t *nshared)
+{
+    assert(bs->drv && bs->drv->bdrv_child_perm);
+    GLOBAL_STATE_CODE();
+    bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
+                             parent_perm, parent_shared,
+                             nperm, nshared);
+    /* TODO Take force_share from reopen_queue */
+    if (child_bs && child_bs->force_share) {
+        *nshared = BLK_PERM_ALL;
     }
-
-    return 0;
 }
 
 /*
- * Notifies drivers that after a previous bdrv_check_perm() call, the
- * permission update is not performed and any preparations made for it (e.g.
- * taken file locks) need to be undone.
+ * Adds the whole subtree of @bs (including @bs itself) to the @list (except for
+ * nodes that are already in the @list, of course) so that final list is
+ * topologically sorted. Return the result (GSList @list object is updated, so
+ * don't use old reference after function call).
  *
- * This function recursively notifies all child nodes.
+ * On function start @list must be already topologically sorted and for any node
+ * in the @list the whole subtree of the node must be in the @list as well. The
+ * simplest way to satisfy this criteria: use only result of
+ * bdrv_topological_dfs() or NULL as @list parameter.
  */
-static void bdrv_abort_perm_update(BlockDriverState *bs)
+static GSList * GRAPH_RDLOCK
+bdrv_topological_dfs(GSList *list, GHashTable *found, BlockDriverState *bs)
 {
-    BlockDriver *drv = bs->drv;
-    BdrvChild *c;
+    BdrvChild *child;
+    g_autoptr(GHashTable) local_found = NULL;
 
-    if (!drv) {
-        return;
+    GLOBAL_STATE_CODE();
+
+    if (!found) {
+        assert(!list);
+        found = local_found = g_hash_table_new(NULL, NULL);
     }
 
-    if (drv->bdrv_abort_perm_update) {
-        drv->bdrv_abort_perm_update(bs);
+    if (g_hash_table_contains(found, bs)) {
+        return list;
     }
+    g_hash_table_add(found, bs);
 
-    QLIST_FOREACH(c, &bs->children, next) {
-        bdrv_child_abort_perm_update(c);
+    QLIST_FOREACH(child, &bs->children, next) {
+        list = bdrv_topological_dfs(list, found, child->bs);
+    }
+
+    return g_slist_prepend(list, bs);
+}
+
+typedef struct BdrvChildSetPermState {
+    BdrvChild *child;
+    uint64_t old_perm;
+    uint64_t old_shared_perm;
+} BdrvChildSetPermState;
+
+static void bdrv_child_set_perm_abort(void *opaque)
+{
+    BdrvChildSetPermState *s = opaque;
+
+    GLOBAL_STATE_CODE();
+
+    s->child->perm = s->old_perm;
+    s->child->shared_perm = s->old_shared_perm;
+}
+
+static TransactionActionDrv bdrv_child_set_pem_drv = {
+    .abort = bdrv_child_set_perm_abort,
+    .clean = g_free,
+};
+
+static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
+                                uint64_t shared, Transaction *tran)
+{
+    BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
+    GLOBAL_STATE_CODE();
+
+    *s = (BdrvChildSetPermState) {
+        .child = c,
+        .old_perm = c->perm,
+        .old_shared_perm = c->shared_perm,
+    };
+
+    c->perm = perm;
+    c->shared_perm = shared;
+
+    tran_add(tran, &bdrv_child_set_pem_drv, s);
+}
+
+static void GRAPH_RDLOCK bdrv_drv_set_perm_commit(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    uint64_t cumulative_perms, cumulative_shared_perms;
+    GLOBAL_STATE_CODE();
+
+    if (bs->drv->bdrv_set_perm) {
+        bdrv_get_cumulative_perm(bs, &cumulative_perms,
+                                 &cumulative_shared_perms);
+        bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
+    }
+}
+
+static void GRAPH_RDLOCK bdrv_drv_set_perm_abort(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    GLOBAL_STATE_CODE();
+
+    if (bs->drv->bdrv_abort_perm_update) {
+        bs->drv->bdrv_abort_perm_update(bs);
     }
 }
 
-static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
-                          uint64_t cumulative_shared_perms)
+TransactionActionDrv bdrv_drv_set_perm_drv = {
+    .abort = bdrv_drv_set_perm_abort,
+    .commit = bdrv_drv_set_perm_commit,
+};
+
+/*
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a reader lock for the graph.
+ */
+static int GRAPH_RDLOCK
+bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
+                  Transaction *tran, Error **errp)
+{
+    GLOBAL_STATE_CODE();
+    if (!bs->drv) {
+        return 0;
+    }
+
+    if (bs->drv->bdrv_check_perm) {
+        int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    if (tran) {
+        tran_add(tran, &bdrv_drv_set_perm_drv, bs);
+    }
+
+    return 0;
+}
+
+typedef struct BdrvReplaceChildState {
+    BdrvChild *child;
+    BlockDriverState *old_bs;
+} BdrvReplaceChildState;
+
+static void GRAPH_WRLOCK bdrv_replace_child_commit(void *opaque)
+{
+    BdrvReplaceChildState *s = opaque;
+    GLOBAL_STATE_CODE();
+
+    bdrv_schedule_unref(s->old_bs);
+}
+
+static void GRAPH_WRLOCK bdrv_replace_child_abort(void *opaque)
+{
+    BdrvReplaceChildState *s = opaque;
+    BlockDriverState *new_bs = s->child->bs;
+
+    GLOBAL_STATE_CODE();
+    assert_bdrv_graph_writable();
+
+    /* old_bs reference is transparently moved from @s to @s->child */
+    if (!s->child->bs) {
+        /*
+         * The parents were undrained when removing old_bs from the child. New
+         * requests can't have been made, though, because the child was empty.
+         *
+         * TODO Make bdrv_replace_child_noperm() transactionable to avoid
+         * undraining the parent in the first place. Once this is done, having
+         * new_bs drained when calling bdrv_replace_child_tran() is not a
+         * requirement any more.
+         */
+        bdrv_parent_drained_begin_single(s->child);
+        assert(!bdrv_parent_drained_poll_single(s->child));
+    }
+    assert(s->child->quiesced_parent);
+    bdrv_replace_child_noperm(s->child, s->old_bs);
+
+    bdrv_unref(new_bs);
+}
+
+static TransactionActionDrv bdrv_replace_child_drv = {
+    .commit = bdrv_replace_child_commit,
+    .abort = bdrv_replace_child_abort,
+    .clean = g_free,
+};
+
+/*
+ * bdrv_replace_child_tran
+ *
+ * Note: real unref of old_bs is done only on commit.
+ *
+ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
+ * kept drained until the transaction is completed.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
+ *
+ * The function doesn't update permissions, caller is responsible for this.
+ */
+static void GRAPH_WRLOCK
+bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
+                        Transaction *tran)
+{
+    BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
+
+    assert(child->quiesced_parent);
+    assert(!new_bs || new_bs->quiesce_counter);
+
+    *s = (BdrvReplaceChildState) {
+        .child = child,
+        .old_bs = child->bs,
+    };
+    tran_add(tran, &bdrv_replace_child_drv, s);
+
+    if (new_bs) {
+        bdrv_ref(new_bs);
+    }
+
+    bdrv_replace_child_noperm(child, new_bs);
+    /* old_bs reference is transparently moved from @child to @s */
+}
+
+/*
+ * Refresh permissions in @bs subtree. The function is intended to be called
+ * after some graph modification that was done without permission update.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a reader lock for the graph.
+ */
+static int GRAPH_RDLOCK
+bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
+                       Transaction *tran, Error **errp)
 {
     BlockDriver *drv = bs->drv;
     BdrvChild *c;
+    int ret;
+    uint64_t cumulative_perms, cumulative_shared_perms;
+    GLOBAL_STATE_CODE();
+
+    bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
+
+    /* Write permissions never work with read-only images */
+    if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
+        !bdrv_is_writable_after_reopen(bs, q))
+    {
+        if (!bdrv_is_writable_after_reopen(bs, NULL)) {
+            error_setg(errp, "Block node is read-only");
+        } else {
+            error_setg(errp, "Read-only block node '%s' cannot support "
+                       "read-write users", bdrv_get_node_name(bs));
+        }
 
+        return -EPERM;
+    }
+
+    /*
+     * Unaligned requests will automatically be aligned to bl.request_alignment
+     * and without RESIZE we can't extend requests to write to space beyond the
+     * end of the image, so it's required that the image size is aligned.
+     */
+    if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
+        !(cumulative_perms & BLK_PERM_RESIZE))
+    {
+        if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) {
+            error_setg(errp, "Cannot get 'write' permission without 'resize': "
+                             "Image size is not a multiple of request "
+                             "alignment");
+            return -EPERM;
+        }
+    }
+
+    /* Check this node */
     if (!drv) {
-        return;
+        return 0;
     }
 
-    /* Update this node */
-    if (drv->bdrv_set_perm) {
-        drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
+    ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran,
+                            errp);
+    if (ret < 0) {
+        return ret;
     }
 
     /* Drivers that never have children can omit .bdrv_child_perm() */
     if (!drv->bdrv_child_perm) {
         assert(QLIST_EMPTY(&bs->children));
-        return;
+        return 0;
     }
 
-    /* Update all children */
+    /* Check all children */
     QLIST_FOREACH(c, &bs->children, next) {
         uint64_t cur_perm, cur_shared;
-        bdrv_child_perm(bs, c->bs, c, c->role, NULL,
+
+        bdrv_child_perm(bs, c->bs, c, c->role, q,
                         cumulative_perms, cumulative_shared_perms,
                         &cur_perm, &cur_shared);
-        bdrv_child_set_perm(c, cur_perm, cur_shared);
+        bdrv_child_set_perm(c, cur_perm, cur_shared, tran);
     }
+
+    return 0;
+}
+
+/*
+ * @list is a product of bdrv_topological_dfs() (may be called several times) -
+ * a topologically sorted subgraph.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a reader lock for the graph.
+ */
+static int GRAPH_RDLOCK
+bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran,
+                      Error **errp)
+{
+    int ret;
+    BlockDriverState *bs;
+    GLOBAL_STATE_CODE();
+
+    for ( ; list; list = list->next) {
+        bs = list->data;
+
+        if (bdrv_parent_perms_conflict(bs, errp)) {
+            return -EINVAL;
+        }
+
+        ret = bdrv_node_refresh_perm(bs, q, tran, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * @list is any list of nodes. List is completed by all subtrees and
+ * topologically sorted. It's not a problem if some node occurs in the @list
+ * several times.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a reader lock for the graph.
+ */
+static int GRAPH_RDLOCK
+bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran,
+                        Error **errp)
+{
+    g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL);
+    g_autoptr(GSList) refresh_list = NULL;
+
+    for ( ; list; list = list->next) {
+        refresh_list = bdrv_topological_dfs(refresh_list, found, list->data);
+    }
+
+    return bdrv_do_refresh_perms(refresh_list, q, tran, errp);
 }
 
 void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
@@ -2144,6 +2617,8 @@ void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
     uint64_t cumulative_perms = 0;
     uint64_t cumulative_shared_perms = BLK_PERM_ALL;
 
+    GLOBAL_STATE_CODE();
+
     QLIST_FOREACH(c, &bs->parents, next_parent) {
         cumulative_perms |= c->perm;
         cumulative_shared_perms &= c->shared_perm;
@@ -2153,15 +2628,6 @@ void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
     *shared_perm = cumulative_shared_perms;
 }
 
-static char *bdrv_child_user_desc(BdrvChild *c)
-{
-    if (c->klass->get_parent_desc) {
-        return c->klass->get_parent_desc(c);
-    }
-
-    return g_strdup("another user");
-}
-
 char *bdrv_perm_names(uint64_t perm)
 {
     struct perm_name {
@@ -2172,7 +2638,6 @@ char *bdrv_perm_names(uint64_t perm)
         { BLK_PERM_WRITE,           "write" },
         { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
         { BLK_PERM_RESIZE,          "resize" },
-        { BLK_PERM_GRAPH_MOD,       "change children" },
         { 0, NULL }
     };
 
@@ -2191,148 +2656,52 @@ char *bdrv_perm_names(uint64_t perm)
     return g_string_free(result, FALSE);
 }
 
+
 /*
- * Checks whether a new reference to @bs can be added if the new user requires
- * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is
- * set, the BdrvChild objects in this list are ignored in the calculations;
- * this allows checking permission updates for an existing reference.
+ * @tran is allowed to be NULL. In this case no rollback is possible.
  *
- * See bdrv_check_perm() for the semantics of @tighten_restrictions.
- *
- * Needs to be followed by a call to either bdrv_set_perm() or
- * bdrv_abort_perm_update(). */
-static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q,
-                                  uint64_t new_used_perm,
-                                  uint64_t new_shared_perm,
-                                  GSList *ignore_children,
-                                  bool *tighten_restrictions,
-                                  Error **errp)
-{
-    BdrvChild *c;
-    uint64_t cumulative_perms = new_used_perm;
-    uint64_t cumulative_shared_perms = new_shared_perm;
-
-    assert(!q || !tighten_restrictions);
-
-    /* There is no reason why anyone couldn't tolerate write_unchanged */
-    assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
-
-    QLIST_FOREACH(c, &bs->parents, next_parent) {
-        if (g_slist_find(ignore_children, c)) {
-            continue;
-        }
-
-        if ((new_used_perm & c->shared_perm) != new_used_perm) {
-            char *user = bdrv_child_user_desc(c);
-            char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
-
-            if (tighten_restrictions) {
-                *tighten_restrictions = true;
-            }
-
-            error_setg(errp, "Conflicts with use by %s as '%s', which does not "
-                             "allow '%s' on %s",
-                       user, c->name, perm_names, bdrv_get_node_name(c->bs));
-            g_free(user);
-            g_free(perm_names);
-            return -EPERM;
-        }
-
-        if ((c->perm & new_shared_perm) != c->perm) {
-            char *user = bdrv_child_user_desc(c);
-            char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
-
-            if (tighten_restrictions) {
-                *tighten_restrictions = true;
-            }
-
-            error_setg(errp, "Conflicts with use by %s as '%s', which uses "
-                             "'%s' on %s",
-                       user, c->name, perm_names, bdrv_get_node_name(c->bs));
-            g_free(user);
-            g_free(perm_names);
-            return -EPERM;
-        }
-
-        cumulative_perms |= c->perm;
-        cumulative_shared_perms &= c->shared_perm;
-    }
-
-    return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms,
-                           ignore_children, tighten_restrictions, errp);
-}
-
-/* Needs to be followed by a call to either bdrv_child_set_perm() or
- * bdrv_child_abort_perm_update(). */
-static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
-                                 uint64_t perm, uint64_t shared,
-                                 GSList *ignore_children,
-                                 bool *tighten_restrictions, Error **errp)
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a reader lock for the graph.
+ */
+static int GRAPH_RDLOCK
+bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, Error **errp)
 {
     int ret;
+    Transaction *local_tran = NULL;
+    g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
+    GLOBAL_STATE_CODE();
 
-    ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c);
-    ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children,
-                                 tighten_restrictions, errp);
-    g_slist_free(ignore_children);
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (!c->has_backup_perm) {
-        c->has_backup_perm = true;
-        c->backup_perm = c->perm;
-        c->backup_shared_perm = c->shared_perm;
+    if (!tran) {
+        tran = local_tran = tran_new();
     }
-    /*
-     * Note: it's OK if c->has_backup_perm was already set, as we can find the
-     * same child twice during check_perm procedure
-     */
-
-    c->perm = perm;
-    c->shared_perm = shared;
-
-    return 0;
-}
-
-static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
-{
-    uint64_t cumulative_perms, cumulative_shared_perms;
 
-    c->has_backup_perm = false;
+    ret = bdrv_do_refresh_perms(list, NULL, tran, errp);
 
-    c->perm = perm;
-    c->shared_perm = shared;
-
-    bdrv_get_cumulative_perm(c->bs, &cumulative_perms,
-                             &cumulative_shared_perms);
-    bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms);
-}
-
-static void bdrv_child_abort_perm_update(BdrvChild *c)
-{
-    if (c->has_backup_perm) {
-        c->perm = c->backup_perm;
-        c->shared_perm = c->backup_shared_perm;
-        c->has_backup_perm = false;
+    if (local_tran) {
+        tran_finalize(local_tran, ret);
     }
 
-    bdrv_abort_perm_update(c->bs);
+    return ret;
 }
 
 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
                             Error **errp)
 {
     Error *local_err = NULL;
+    Transaction *tran = tran_new();
     int ret;
-    bool tighten_restrictions;
 
-    ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL,
-                                &tighten_restrictions, &local_err);
+    GLOBAL_STATE_CODE();
+
+    bdrv_child_set_perm(c, perm, shared, tran);
+
+    ret = bdrv_refresh_perms(c->bs, tran, &local_err);
+
+    tran_finalize(tran, ret);
+
     if (ret < 0) {
-        bdrv_child_abort_perm_update(c);
-        if (tighten_restrictions) {
+        if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
+            /* tighten permissions */
             error_propagate(errp, local_err);
         } else {
             /*
@@ -2344,12 +2713,9 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
             error_free(local_err);
             ret = 0;
         }
-        return ret;
     }
 
-    bdrv_child_set_perm(c, perm, shared);
-
-    return 0;
+    return ret;
 }
 
 int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
@@ -2357,6 +2723,8 @@ int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
     uint64_t parent_perms, parent_shared;
     uint64_t perms, shared;
 
+    GLOBAL_STATE_CODE();
+
     bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
     bdrv_child_perm(bs, c->bs, c, c->role, NULL,
                     parent_perms, parent_shared, &perms, &shared);
@@ -2375,6 +2743,7 @@ static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
                                       uint64_t perm, uint64_t shared,
                                       uint64_t *nperm, uint64_t *nshared)
 {
+    GLOBAL_STATE_CODE();
     *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
     *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
 }
@@ -2386,6 +2755,7 @@ static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
                                        uint64_t *nperm, uint64_t *nshared)
 {
     assert(role & BDRV_CHILD_COW);
+    GLOBAL_STATE_CODE();
 
     /*
      * We want consistent read from backing files if the parent needs it.
@@ -2404,8 +2774,7 @@ static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
         shared = 0;
     }
 
-    shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
-              BLK_PERM_WRITE_UNCHANGED;
+    shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
 
     if (bs->open_flags & BDRV_O_INACTIVE) {
         shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
@@ -2423,6 +2792,7 @@ static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
 {
     int flags;
 
+    GLOBAL_STATE_CODE();
     assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
 
     flags = bdrv_reopen_get_flags(reopen_queue, bs);
@@ -2499,6 +2869,7 @@ void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
                         uint64_t perm, uint64_t shared,
                         uint64_t *nperm, uint64_t *nshared)
 {
+    GLOBAL_STATE_CODE();
     if (role & BDRV_CHILD_FILTERED) {
         assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
                          BDRV_CHILD_COW)));
@@ -2523,7 +2894,6 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
         [BLOCK_PERMISSION_WRITE]            = BLK_PERM_WRITE,
         [BLOCK_PERMISSION_WRITE_UNCHANGED]  = BLK_PERM_WRITE_UNCHANGED,
         [BLOCK_PERMISSION_RESIZE]           = BLK_PERM_RESIZE,
-        [BLOCK_PERMISSION_GRAPH_MOD]        = BLK_PERM_GRAPH_MOD,
     };
 
     QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
@@ -2534,35 +2904,49 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
     return permissions[qapi_perm];
 }
 
-static void bdrv_replace_child_noperm(BdrvChild *child,
-                                      BlockDriverState *new_bs)
+/*
+ * Replaces the node that a BdrvChild points to without updating permissions.
+ *
+ * If @new_bs is non-NULL, the parent of @child must already be drained through
+ * @child and the caller must hold the AioContext lock for @new_bs.
+ */
+static void GRAPH_WRLOCK
+bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs)
 {
     BlockDriverState *old_bs = child->bs;
     int new_bs_quiesce_counter;
-    int drain_saldo;
 
     assert(!child->frozen);
 
-    if (old_bs && new_bs) {
-        assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
-    }
-
-    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
-    drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
-
     /*
-     * If the new child node is drained but the old one was not, flush
-     * all outstanding requests to the old child node.
+     * If we want to change the BdrvChild to point to a drained node as its new
+     * child->bs, we need to make sure that its new parent is drained, too. In
+     * other words, either child->quiesce_parent must already be true or we must
+     * be able to set it and keep the parent's quiesce_counter consistent with
+     * that, but without polling or starting new requests (this function
+     * guarantees that it doesn't poll, and starting new requests would be
+     * against the invariants of drain sections).
+     *
+     * To keep things simple, we pick the first option (child->quiesce_parent
+     * must already be true). We also generalise the rule a bit to make it
+     * easier to verify in callers and more likely to be covered in test cases:
+     * The parent must be quiesced through this child even if new_bs isn't
+     * currently drained.
+     *
+     * The only exception is for callers that always pass new_bs == NULL. In
+     * this case, we obviously never need to consider the case of a drained
+     * new_bs, so we can keep the callers simpler by allowing them not to drain
+     * the parent.
      */
-    while (drain_saldo > 0 && child->klass->drained_begin) {
-        bdrv_parent_drained_begin_single(child, true);
-        drain_saldo--;
+    assert(!new_bs || child->quiesced_parent);
+    assert(old_bs != new_bs);
+    GLOBAL_STATE_CODE();
+
+    if (old_bs && new_bs) {
+        assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
     }
 
     if (old_bs) {
-        /* Detach first so that the recursive drain sections coming from @child
-         * are already gone and we only end the drain sections that came from
-         * elsewhere. */
         if (child->klass->detach) {
             child->klass->detach(child);
         }
@@ -2573,90 +2957,243 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 
     if (new_bs) {
         QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
-
-        /*
-         * Detaching the old node may have led to the new node's
-         * quiesce_counter having been decreased.  Not a problem, we
-         * just need to recognize this here and then invoke
-         * drained_end appropriately more often.
-         */
-        assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
-        drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
-
-        /* Attach only after starting new drained sections, so that recursive
-         * drain sections coming from @child don't get an extra .drained_begin
-         * callback. */
         if (child->klass->attach) {
             child->klass->attach(child);
         }
     }
 
     /*
-     * If the old child node was drained but the new one is not, allow
-     * requests to come in only after the new node has been attached.
+     * If the parent was drained through this BdrvChild previously, but new_bs
+     * is not drained, allow requests to come in only after the new node has
+     * been attached.
      */
-    while (drain_saldo < 0 && child->klass->drained_end) {
+    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+    if (!new_bs_quiesce_counter && child->quiesced_parent) {
         bdrv_parent_drained_end_single(child);
-        drain_saldo++;
     }
 }
 
+/**
+ * Free the given @child.
+ *
+ * The child must be empty (i.e. `child->bs == NULL`) and it must be
+ * unused (i.e. not in a children list).
+ */
+static void bdrv_child_free(BdrvChild *child)
+{
+    assert(!child->bs);
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    assert(!child->next.le_prev); /* not in children list */
+
+    g_free(child->name);
+    g_free(child);
+}
+
+typedef struct BdrvAttachChildCommonState {
+    BdrvChild *child;
+    AioContext *old_parent_ctx;
+    AioContext *old_child_ctx;
+} BdrvAttachChildCommonState;
+
+static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque)
+{
+    BdrvAttachChildCommonState *s = opaque;
+    BlockDriverState *bs = s->child->bs;
+
+    GLOBAL_STATE_CODE();
+    assert_bdrv_graph_writable();
+
+    bdrv_replace_child_noperm(s->child, NULL);
+
+    if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
+        bdrv_try_change_aio_context(bs, s->old_child_ctx, NULL, &error_abort);
+    }
+
+    if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) {
+        Transaction *tran;
+        GHashTable *visited;
+        bool ret;
+
+        tran = tran_new();
+
+        /* No need to visit `child`, because it has been detached already */
+        visited = g_hash_table_new(NULL, NULL);
+        ret = s->child->klass->change_aio_ctx(s->child, s->old_parent_ctx,
+                                              visited, tran, &error_abort);
+        g_hash_table_destroy(visited);
+
+        /* transaction is supposed to always succeed */
+        assert(ret == true);
+        tran_commit(tran);
+    }
+
+    bdrv_schedule_unref(bs);
+    bdrv_child_free(s->child);
+}
+
+static TransactionActionDrv bdrv_attach_child_common_drv = {
+    .abort = bdrv_attach_child_common_abort,
+    .clean = g_free,
+};
+
 /*
- * Updates @child to change its reference to point to @new_bs, including
- * checking and applying the necessary permission updates both to the old node
- * and to @new_bs.
+ * Common part of attaching bdrv child to bs or to blk or to job
+ *
+ * Function doesn't update permissions, caller is responsible for this.
  *
- * NULL is passed as @new_bs for removing the reference before freeing @child.
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
  *
- * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this
- * function uses bdrv_set_perm() to update the permissions according to the new
- * reference that @new_bs gets.
+ * Returns new created child.
  *
- * Callers must ensure that child->frozen is false.
+ * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
+ * @child_bs can move to a different AioContext in this function. Callers must
+ * make sure that their AioContext locking is still correct after this.
  */
-static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
-{
-    BlockDriverState *old_bs = child->bs;
-    uint64_t perm, shared_perm;
+static BdrvChild * GRAPH_WRLOCK
+bdrv_attach_child_common(BlockDriverState *child_bs,
+                         const char *child_name,
+                         const BdrvChildClass *child_class,
+                         BdrvChildRole child_role,
+                         uint64_t perm, uint64_t shared_perm,
+                         void *opaque,
+                         Transaction *tran, Error **errp)
+{
+    BdrvChild *new_child;
+    AioContext *parent_ctx, *new_child_ctx;
+    AioContext *child_ctx = bdrv_get_aio_context(child_bs);
+
+    assert(child_class->get_parent_desc);
+    GLOBAL_STATE_CODE();
+
+    new_child = g_new(BdrvChild, 1);
+    *new_child = (BdrvChild) {
+        .bs             = NULL,
+        .name           = g_strdup(child_name),
+        .klass          = child_class,
+        .role           = child_role,
+        .perm           = perm,
+        .shared_perm    = shared_perm,
+        .opaque         = opaque,
+    };
 
-    /* Asserts that child->frozen == false */
-    bdrv_replace_child_noperm(child, new_bs);
+    /*
+     * If the AioContexts don't match, first try to move the subtree of
+     * child_bs into the AioContext of the new parent. If this doesn't work,
+     * try moving the parent into the AioContext of child_bs instead.
+     */
+    parent_ctx = bdrv_child_get_parent_aio_context(new_child);
+    if (child_ctx != parent_ctx) {
+        Error *local_err = NULL;
+        int ret = bdrv_try_change_aio_context(child_bs, parent_ctx, NULL,
+                                              &local_err);
+
+        if (ret < 0 && child_class->change_aio_ctx) {
+            Transaction *aio_ctx_tran = tran_new();
+            GHashTable *visited = g_hash_table_new(NULL, NULL);
+            bool ret_child;
+
+            g_hash_table_add(visited, new_child);
+            ret_child = child_class->change_aio_ctx(new_child, child_ctx,
+                                                    visited, aio_ctx_tran,
+                                                    NULL);
+            if (ret_child == true) {
+                error_free(local_err);
+                ret = 0;
+            }
+            tran_finalize(aio_ctx_tran, ret_child == true ? 0 : -1);
+            g_hash_table_destroy(visited);
+        }
 
+        if (ret < 0) {
+            error_propagate(errp, local_err);
+            bdrv_child_free(new_child);
+            return NULL;
+        }
+    }
+
+    new_child_ctx = bdrv_get_aio_context(child_bs);
+    if (new_child_ctx != child_ctx) {
+        aio_context_release(child_ctx);
+        aio_context_acquire(new_child_ctx);
+    }
+
+    bdrv_ref(child_bs);
     /*
-     * Start with the new node's permissions.  If @new_bs is a (direct
-     * or indirect) child of @old_bs, we must complete the permission
-     * update on @new_bs before we loosen the restrictions on @old_bs.
-     * Otherwise, bdrv_check_perm() on @old_bs would re-initiate
-     * updating the permissions of @new_bs, and thus not purely loosen
-     * restrictions.
+     * Let every new BdrvChild start with a drained parent. Inserting the child
+     * in the graph with bdrv_replace_child_noperm() will undrain it if
+     * @child_bs is not drained.
+     *
+     * The child was only just created and is not yet visible in global state
+     * until bdrv_replace_child_noperm() inserts it into the graph, so nobody
+     * could have sent requests and polling is not necessary.
+     *
+     * Note that this means that the parent isn't fully drained yet, we only
+     * stop new requests from coming in. This is fine, we don't care about the
+     * old requests here, they are not for this child. If another place enters a
+     * drain section for the same parent, but wants it to be fully quiesced, it
+     * will not run most of the the code in .drained_begin() again (which is not
+     * a problem, we already did this), but it will still poll until the parent
+     * is fully quiesced, so it will not be negatively affected either.
      */
-    if (new_bs) {
-        bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm);
-        bdrv_set_perm(new_bs, perm, shared_perm);
+    bdrv_parent_drained_begin_single(new_child);
+    bdrv_replace_child_noperm(new_child, child_bs);
+
+    BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
+    *s = (BdrvAttachChildCommonState) {
+        .child = new_child,
+        .old_parent_ctx = parent_ctx,
+        .old_child_ctx = child_ctx,
+    };
+    tran_add(tran, &bdrv_attach_child_common_drv, s);
+
+    if (new_child_ctx != child_ctx) {
+        aio_context_release(new_child_ctx);
+        aio_context_acquire(child_ctx);
     }
 
-    if (old_bs) {
-        /* Update permissions for old node. This is guaranteed to succeed
-         * because we're just taking a parent away, so we're loosening
-         * restrictions. */
-        bool tighten_restrictions;
-        int ret;
+    return new_child;
+}
 
-        bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
-        ret = bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL,
-                              &tighten_restrictions, NULL);
-        assert(tighten_restrictions == false);
-        if (ret < 0) {
-            /* We only tried to loosen restrictions, so errors are not fatal */
-            bdrv_abort_perm_update(old_bs);
-        } else {
-            bdrv_set_perm(old_bs, perm, shared_perm);
-        }
+/*
+ * Function doesn't update permissions, caller is responsible for this.
+ *
+ * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
+ * @child_bs can move to a different AioContext in this function. Callers must
+ * make sure that their AioContext locking is still correct after this.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
+ */
+static BdrvChild * GRAPH_WRLOCK
+bdrv_attach_child_noperm(BlockDriverState *parent_bs,
+                         BlockDriverState *child_bs,
+                         const char *child_name,
+                         const BdrvChildClass *child_class,
+                         BdrvChildRole child_role,
+                         Transaction *tran,
+                         Error **errp)
+{
+    uint64_t perm, shared_perm;
+
+    assert(parent_bs->drv);
+    GLOBAL_STATE_CODE();
 
-        /* When the parent requiring a non-default AioContext is removed, the
-         * node moves back to the main AioContext */
-        bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
+    if (bdrv_recurse_has_child(child_bs, parent_bs)) {
+        error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
+                   child_bs->node_name, child_name, parent_bs->node_name);
+        return NULL;
     }
+
+    bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
+    bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
+                    perm, shared_perm, &perm, &shared_perm);
+
+    return bdrv_attach_child_common(child_bs, child_name, child_class,
+                                    child_role, perm, shared_perm, parent_bs,
+                                    tran, errp);
 }
 
 /*
@@ -2673,63 +3210,31 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
                                   const BdrvChildClass *child_class,
                                   BdrvChildRole child_role,
-                                  AioContext *ctx,
                                   uint64_t perm, uint64_t shared_perm,
                                   void *opaque, Error **errp)
 {
-    BdrvChild *child;
-    Error *local_err = NULL;
     int ret;
+    BdrvChild *child;
+    Transaction *tran = tran_new();
 
-    ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, NULL,
-                                 errp);
-    if (ret < 0) {
-        bdrv_abort_perm_update(child_bs);
-        bdrv_unref(child_bs);
-        return NULL;
+    GLOBAL_STATE_CODE();
+
+    child = bdrv_attach_child_common(child_bs, child_name, child_class,
+                                   child_role, perm, shared_perm, opaque,
+                                   tran, errp);
+    if (!child) {
+        ret = -EINVAL;
+        goto out;
     }
 
-    child = g_new(BdrvChild, 1);
-    *child = (BdrvChild) {
-        .bs             = NULL,
-        .name           = g_strdup(child_name),
-        .klass          = child_class,
-        .role           = child_role,
-        .perm           = perm,
-        .shared_perm    = shared_perm,
-        .opaque         = opaque,
-    };
+    ret = bdrv_refresh_perms(child_bs, tran, errp);
 
-    /* If the AioContexts don't match, first try to move the subtree of
-     * child_bs into the AioContext of the new parent. If this doesn't work,
-     * try moving the parent into the AioContext of child_bs instead. */
-    if (bdrv_get_aio_context(child_bs) != ctx) {
-        ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err);
-        if (ret < 0 && child_class->can_set_aio_ctx) {
-            GSList *ignore = g_slist_prepend(NULL, child);
-            ctx = bdrv_get_aio_context(child_bs);
-            if (child_class->can_set_aio_ctx(child, ctx, &ignore, NULL)) {
-                error_free(local_err);
-                ret = 0;
-                g_slist_free(ignore);
-                ignore = g_slist_prepend(NULL, child);
-                child_class->set_aio_ctx(child, ctx, &ignore);
-            }
-            g_slist_free(ignore);
-        }
-        if (ret < 0) {
-            error_propagate(errp, local_err);
-            g_free(child);
-            bdrv_abort_perm_update(child_bs);
-            bdrv_unref(child_bs);
-            return NULL;
-        }
-    }
+out:
+    tran_finalize(tran, ret);
 
-    /* This performs the matching bdrv_set_perm() for the above check. */
-    bdrv_replace_child(child, child_bs);
+    bdrv_schedule_unref(child_bs);
 
-    return child;
+    return ret < 0 ? NULL : child;
 }
 
 /*
@@ -2750,51 +3255,104 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                              BdrvChildRole child_role,
                              Error **errp)
 {
+    int ret;
     BdrvChild *child;
-    uint64_t perm, shared_perm;
+    Transaction *tran = tran_new();
 
-    bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
+    GLOBAL_STATE_CODE();
 
-    assert(parent_bs->drv);
-    bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
-                    perm, shared_perm, &perm, &shared_perm);
+    child = bdrv_attach_child_noperm(parent_bs, child_bs, child_name,
+                                     child_class, child_role, tran, errp);
+    if (!child) {
+        ret = -EINVAL;
+        goto out;
+    }
 
-    child = bdrv_root_attach_child(child_bs, child_name, child_class,
-                                   child_role, bdrv_get_aio_context(parent_bs),
-                                   perm, shared_perm, parent_bs, errp);
-    if (child == NULL) {
-        return NULL;
+    ret = bdrv_refresh_perms(parent_bs, tran, errp);
+    if (ret < 0) {
+        goto out;
     }
 
-    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
-    return child;
+out:
+    tran_finalize(tran, ret);
+
+    bdrv_schedule_unref(child_bs);
+
+    return ret < 0 ? NULL : child;
 }
 
-static void bdrv_detach_child(BdrvChild *child)
+/* Callers must ensure that child->frozen is false. */
+void bdrv_root_unref_child(BdrvChild *child)
 {
-    QLIST_SAFE_REMOVE(child, next);
+    BlockDriverState *child_bs = child->bs;
 
-    bdrv_replace_child(child, NULL);
+    GLOBAL_STATE_CODE();
+    bdrv_replace_child_noperm(child, NULL);
+    bdrv_child_free(child);
 
-    g_free(child->name);
-    g_free(child);
+    if (child_bs) {
+        /*
+         * Update permissions for old node. We're just taking a parent away, so
+         * we're loosening restrictions. Errors of permission update are not
+         * fatal in this case, ignore them.
+         */
+        bdrv_refresh_perms(child_bs, NULL, NULL);
+
+        /*
+         * When the parent requiring a non-default AioContext is removed, the
+         * node moves back to the main AioContext
+         */
+        bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL,
+                                    NULL);
+    }
+
+    bdrv_schedule_unref(child_bs);
 }
 
-/* Callers must ensure that child->frozen is false. */
-void bdrv_root_unref_child(BdrvChild *child)
+typedef struct BdrvSetInheritsFrom {
+    BlockDriverState *bs;
+    BlockDriverState *old_inherits_from;
+} BdrvSetInheritsFrom;
+
+static void bdrv_set_inherits_from_abort(void *opaque)
 {
-    BlockDriverState *child_bs;
+    BdrvSetInheritsFrom *s = opaque;
+
+    s->bs->inherits_from = s->old_inherits_from;
+}
+
+static TransactionActionDrv bdrv_set_inherits_from_drv = {
+    .abort = bdrv_set_inherits_from_abort,
+    .clean = g_free,
+};
+
+/* @tran is allowed to be NULL. In this case no rollback is possible */
+static void bdrv_set_inherits_from(BlockDriverState *bs,
+                                   BlockDriverState *new_inherits_from,
+                                   Transaction *tran)
+{
+    if (tran) {
+        BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1);
 
-    child_bs = child->bs;
-    bdrv_detach_child(child);
-    bdrv_unref(child_bs);
+        *s = (BdrvSetInheritsFrom) {
+            .bs = bs,
+            .old_inherits_from = bs->inherits_from,
+        };
+
+        tran_add(tran, &bdrv_set_inherits_from_drv, s);
+    }
+
+    bs->inherits_from = new_inherits_from;
 }
 
 /**
  * Clear all inherits_from pointers from children and grandchildren of
  * @root that point to @root, where necessary.
+ * @tran is allowed to be NULL. In this case no rollback is possible
  */
-static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child)
+static void GRAPH_WRLOCK
+bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
+                         Transaction *tran)
 {
     BdrvChild *c;
 
@@ -2809,30 +3367,33 @@ static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child)
             }
         }
         if (c == NULL) {
-            child->bs->inherits_from = NULL;
+            bdrv_set_inherits_from(child->bs, NULL, tran);
         }
     }
 
     QLIST_FOREACH(c, &child->bs->children, next) {
-        bdrv_unset_inherits_from(root, c);
+        bdrv_unset_inherits_from(root, c, tran);
     }
 }
 
 /* Callers must ensure that child->frozen is false. */
 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
 {
+    GLOBAL_STATE_CODE();
     if (child == NULL) {
         return;
     }
 
-    bdrv_unset_inherits_from(parent, child);
+    bdrv_unset_inherits_from(parent, child, NULL);
     bdrv_root_unref_child(child);
 }
 
 
-static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
+static void GRAPH_RDLOCK
+bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
 {
     BdrvChild *c;
+    GLOBAL_STATE_CODE();
     QLIST_FOREACH(c, &bs->parents, next_parent) {
         if (c->klass->change_media) {
             c->klass->change_media(c, load);
@@ -2849,62 +3410,178 @@ static bool bdrv_inherits_from_recursive(BlockDriverState *child,
         child = child->inherits_from;
     }
 
-    return child != NULL;
-}
+    return child != NULL;
+}
+
+/*
+ * Return the BdrvChildRole for @bs's backing child.  bs->backing is
+ * mostly used for COW backing children (role = COW), but also for
+ * filtered children (role = FILTERED | PRIMARY).
+ */
+static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
+{
+    if (bs->drv && bs->drv->is_filter) {
+        return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
+    } else {
+        return BDRV_CHILD_COW;
+    }
+}
+
+/*
+ * Sets the bs->backing or bs->file link of a BDS. A new reference is created;
+ * callers which don't need their own reference any more must call bdrv_unref().
+ *
+ * If the respective child is already present (i.e. we're detaching a node),
+ * that child node must be drained.
+ *
+ * Function doesn't update permissions, caller is responsible for this.
+ *
+ * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
+ * @child_bs can move to a different AioContext in this function. Callers must
+ * make sure that their AioContext locking is still correct after this.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
+ */
+static int GRAPH_WRLOCK
+bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
+                                BlockDriverState *child_bs,
+                                bool is_backing,
+                                Transaction *tran, Error **errp)
+{
+    bool update_inherits_from =
+        bdrv_inherits_from_recursive(child_bs, parent_bs);
+    BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file;
+    BdrvChildRole role;
+
+    GLOBAL_STATE_CODE();
+
+    if (!parent_bs->drv) {
+        /*
+         * Node without drv is an object without a class :/. TODO: finally fix
+         * qcow2 driver to never clear bs->drv and implement format corruption
+         * handling in other way.
+         */
+        error_setg(errp, "Node corrupted");
+        return -EINVAL;
+    }
+
+    if (child && child->frozen) {
+        error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'",
+                   child->name, parent_bs->node_name, child->bs->node_name);
+        return -EPERM;
+    }
+
+    if (is_backing && !parent_bs->drv->is_filter &&
+        !parent_bs->drv->supports_backing)
+    {
+        error_setg(errp, "Driver '%s' of node '%s' does not support backing "
+                   "files", parent_bs->drv->format_name, parent_bs->node_name);
+        return -EINVAL;
+    }
+
+    if (parent_bs->drv->is_filter) {
+        role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
+    } else if (is_backing) {
+        role = BDRV_CHILD_COW;
+    } else {
+        /*
+         * We only can use same role as it is in existing child. We don't have
+         * infrastructure to determine role of file child in generic way
+         */
+        if (!child) {
+            error_setg(errp, "Cannot set file child to format node without "
+                       "file child");
+            return -EINVAL;
+        }
+        role = child->role;
+    }
+
+    if (child) {
+        assert(child->bs->quiesce_counter);
+        bdrv_unset_inherits_from(parent_bs, child, tran);
+        bdrv_remove_child(child, tran);
+    }
+
+    if (!child_bs) {
+        goto out;
+    }
+
+    child = bdrv_attach_child_noperm(parent_bs, child_bs,
+                                     is_backing ? "backing" : "file",
+                                     &child_of_bds, role,
+                                     tran, errp);
+    if (!child) {
+        return -EINVAL;
+    }
+
 
-/*
- * Return the BdrvChildRole for @bs's backing child.  bs->backing is
- * mostly used for COW backing children (role = COW), but also for
- * filtered children (role = FILTERED | PRIMARY).
- */
-static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
-{
-    if (bs->drv && bs->drv->is_filter) {
-        return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
-    } else {
-        return BDRV_CHILD_COW;
+    /*
+     * If inherits_from pointed recursively to bs then let's update it to
+     * point directly to bs (else it will become NULL).
+     */
+    if (update_inherits_from) {
+        bdrv_set_inherits_from(child_bs, parent_bs, tran);
     }
+
+out:
+    bdrv_refresh_limits(parent_bs, tran, NULL);
+
+    return 0;
 }
 
 /*
- * Sets the bs->backing link of a BDS. A new reference is created; callers
- * which don't need their own reference any more must call bdrv_unref().
+ * The caller must hold the AioContext lock for @backing_hd. Both @bs and
+ * @backing_hd can move to a different AioContext in this function. Callers must
+ * make sure that their AioContext locking is still correct after this.
+ *
+ * If a backing child is already present (i.e. we're detaching a node), that
+ * child node must be drained.
  */
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
-                         Error **errp)
+int bdrv_set_backing_hd_drained(BlockDriverState *bs,
+                                BlockDriverState *backing_hd,
+                                Error **errp)
 {
-    bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
-        bdrv_inherits_from_recursive(backing_hd, bs);
-
-    if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) {
-        return;
-    }
-
-    if (backing_hd) {
-        bdrv_ref(backing_hd);
-    }
+    int ret;
+    Transaction *tran = tran_new();
 
+    GLOBAL_STATE_CODE();
+    assert(bs->quiesce_counter > 0);
     if (bs->backing) {
-        /* Cannot be frozen, we checked that above */
-        bdrv_unref_child(bs, bs->backing);
-        bs->backing = NULL;
+        assert(bs->backing->bs->quiesce_counter > 0);
     }
 
-    if (!backing_hd) {
+    ret = bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
+    if (ret < 0) {
         goto out;
     }
 
-    bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_of_bds,
-                                    bdrv_backing_role(bs), errp);
-    /* If backing_hd was already part of bs's backing chain, and
-     * inherits_from pointed recursively to bs then let's update it to
-     * point directly to bs (else it will become NULL). */
-    if (bs->backing && update_inherits_from) {
-        backing_hd->inherits_from = bs;
-    }
-
+    ret = bdrv_refresh_perms(bs, tran, errp);
 out:
-    bdrv_refresh_limits(bs, NULL);
+    tran_finalize(tran, ret);
+    return ret;
+}
+
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+                        Error **errp)
+{
+    BlockDriverState *drain_bs;
+    int ret;
+    GLOBAL_STATE_CODE();
+
+    bdrv_graph_rdlock_main_loop();
+    drain_bs = bs->backing ? bs->backing->bs : bs;
+    bdrv_graph_rdunlock_main_loop();
+
+    bdrv_ref(drain_bs);
+    bdrv_drained_begin(drain_bs);
+    bdrv_graph_wrlock(backing_hd);
+    ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp);
+    bdrv_graph_wrunlock(backing_hd);
+    bdrv_drained_end(drain_bs);
+    bdrv_unref(drain_bs);
+
+    return ret;
 }
 
 /*
@@ -2915,6 +3592,8 @@ out:
  * itself, all options starting with "${bdref_key}." are considered part of the
  * BlockdevRef.
  *
+ * The caller must hold the main AioContext lock.
+ *
  * TODO Can this be unified with bdrv_open_image()?
  */
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
@@ -2926,10 +3605,14 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
     int ret = 0;
     bool implicit_backing = false;
     BlockDriverState *backing_hd;
+    AioContext *backing_hd_ctx;
     QDict *options;
     QDict *tmp_parent_options = NULL;
     Error *local_err = NULL;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (bs->backing != NULL) {
         goto free_exit;
     }
@@ -3008,11 +3691,13 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
 
     /* Hook up the backing file link; drop our reference, bs owns the
      * backing_hd reference now */
-    bdrv_set_backing_hd(bs, backing_hd, &local_err);
+    backing_hd_ctx = bdrv_get_aio_context(backing_hd);
+    aio_context_acquire(backing_hd_ctx);
+    ret = bdrv_set_backing_hd(bs, backing_hd, errp);
     bdrv_unref(backing_hd);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
+    aio_context_release(backing_hd_ctx);
+
+    if (ret < 0) {
         goto free_exit;
     }
 
@@ -3081,6 +3766,10 @@ done:
  * BlockdevRef.
  *
  * The BlockdevRef will be removed from the options QDict.
+ *
+ * The caller must hold the lock of the main AioContext and no other AioContext.
+ * @parent can move to a different AioContext in this function. Callers must
+ * make sure that their AioContext locking is still correct after this.
  */
 BdrvChild *bdrv_open_child(const char *filename,
                            QDict *options, const char *bdref_key,
@@ -3090,6 +3779,10 @@ BdrvChild *bdrv_open_child(const char *filename,
                            bool allow_none, Error **errp)
 {
     BlockDriverState *bs;
+    BdrvChild *child;
+    AioContext *ctx;
+
+    GLOBAL_STATE_CODE();
 
     bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
                             child_role, allow_none, errp);
@@ -3097,8 +3790,42 @@ BdrvChild *bdrv_open_child(const char *filename,
         return NULL;
     }
 
-    return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
-                             errp);
+    bdrv_graph_wrlock(NULL);
+    ctx = bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
+    child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
+                              errp);
+    aio_context_release(ctx);
+    bdrv_graph_wrunlock(NULL);
+
+    return child;
+}
+
+/*
+ * Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
+ *
+ * The caller must hold the lock of the main AioContext and no other AioContext.
+ * @parent can move to a different AioContext in this function. Callers must
+ * make sure that their AioContext locking is still correct after this.
+ */
+int bdrv_open_file_child(const char *filename,
+                         QDict *options, const char *bdref_key,
+                         BlockDriverState *parent, Error **errp)
+{
+    BdrvChildRole role;
+
+    /* commit_top and mirror_top don't use this function */
+    assert(!parent->drv->filtered_child_is_backing);
+    role = parent->drv->is_filter ?
+        (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE;
+
+    if (!bdrv_open_child(filename, options, bdref_key, parent,
+                         &child_of_bds, role, false, errp))
+    {
+        return -EINVAL;
+    }
+
+    return 0;
 }
 
 /*
@@ -3113,6 +3840,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
     const char *reference = NULL;
     Visitor *v = NULL;
 
+    GLOBAL_STATE_CODE();
+
     if (ref->type == QTYPE_QSTRING) {
         reference = ref->u.reference;
     } else {
@@ -3148,28 +3877,31 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
                                                    QDict *snapshot_options,
                                                    Error **errp)
 {
-    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
-    char *tmp_filename = g_malloc0(PATH_MAX + 1);
+    g_autofree char *tmp_filename = NULL;
     int64_t total_size;
     QemuOpts *opts = NULL;
     BlockDriverState *bs_snapshot = NULL;
-    Error *local_err = NULL;
+    AioContext *ctx = bdrv_get_aio_context(bs);
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     /* if snapshot, we create a temporary backing file and open it
        instead of opening 'filename' directly */
 
     /* Get the required size from the image */
+    aio_context_acquire(ctx);
     total_size = bdrv_getlength(bs);
+    aio_context_release(ctx);
+
     if (total_size < 0) {
         error_setg_errno(errp, -total_size, "Could not get image size");
         goto out;
     }
 
     /* Create the temporary image */
-    ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not get temporary filename");
+    tmp_filename = create_tmp_file(errp);
+    if (!tmp_filename) {
         goto out;
     }
 
@@ -3195,21 +3927,17 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
         goto out;
     }
 
-    /* bdrv_append() consumes a strong reference to bs_snapshot
-     * (i.e. it will call bdrv_unref() on it) even on error, so in
-     * order to be able to return one, we have to increase
-     * bs_snapshot's refcount here */
-    bdrv_ref(bs_snapshot);
-    bdrv_append(bs_snapshot, bs, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    aio_context_acquire(ctx);
+    ret = bdrv_append(bs_snapshot, bs, errp);
+    aio_context_release(ctx);
+
+    if (ret < 0) {
         bs_snapshot = NULL;
         goto out;
     }
 
 out:
     qobject_unref(snapshot_options);
-    g_free(tmp_filename);
     return bs_snapshot;
 }
 
@@ -3227,14 +3955,14 @@ out:
  * The reference parameter may be used to specify an existing block device which
  * should be opened. If specified, neither options nor a filename may be given,
  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
+ *
+ * The caller must always hold the main AioContext lock.
  */
-static BlockDriverState *bdrv_open_inherit(const char *filename,
-                                           const char *reference,
-                                           QDict *options, int flags,
-                                           BlockDriverState *parent,
-                                           const BdrvChildClass *child_class,
-                                           BdrvChildRole child_role,
-                                           Error **errp)
+static BlockDriverState * no_coroutine_fn
+bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
+                  int flags, BlockDriverState *parent,
+                  const BdrvChildClass *child_class, BdrvChildRole child_role,
+                  Error **errp)
 {
     int ret;
     BlockBackend *file = NULL;
@@ -3246,9 +3974,15 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
     Error *local_err = NULL;
     QDict *snapshot_options = NULL;
     int snapshot_flags = 0;
+    AioContext *ctx = qemu_get_aio_context();
 
     assert(!child_class || !flags);
     assert(!child_class == !parent);
+    GLOBAL_STATE_CODE();
+    assert(!qemu_in_coroutine());
+
+    /* TODO We'll eventually have to take a writer lock in this function */
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
     if (reference) {
         bool options_non_empty = options ? qdict_size(options) : false;
@@ -3381,9 +4115,13 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
             /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
              * looking at the header to guess the image format. This works even
              * in cases where a guest would not see a consistent state. */
-            file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL);
+            ctx = bdrv_get_aio_context(file_bs);
+            aio_context_acquire(ctx);
+            file = blk_new(ctx, 0, BLK_PERM_ALL);
             blk_insert_bs(file, file_bs, &local_err);
             bdrv_unref(file_bs);
+            aio_context_release(ctx);
+
             if (local_err) {
                 goto fail;
             }
@@ -3429,8 +4167,13 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
         goto fail;
     }
 
+    /* The AioContext could have changed during bdrv_open_common() */
+    ctx = bdrv_get_aio_context(bs);
+
     if (file) {
+        aio_context_acquire(ctx);
         blk_unref(file);
+        aio_context_release(ctx);
         file = NULL;
     }
 
@@ -3488,13 +4231,16 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
          * (snapshot_bs); thus, we have to drop the strong reference to bs
          * (which we obtained by calling bdrv_new()). bs will not be deleted,
          * though, because the overlay still has a reference to it. */
+        aio_context_acquire(ctx);
         bdrv_unref(bs);
+        aio_context_release(ctx);
         bs = snapshot_bs;
     }
 
     return bs;
 
 fail:
+    aio_context_acquire(ctx);
     blk_unref(file);
     qobject_unref(snapshot_options);
     qobject_unref(bs->explicit_options);
@@ -3503,20 +4249,26 @@ fail:
     bs->options = NULL;
     bs->explicit_options = NULL;
     bdrv_unref(bs);
+    aio_context_release(ctx);
     error_propagate(errp, local_err);
     return NULL;
 
 close_and_fail:
+    aio_context_acquire(ctx);
     bdrv_unref(bs);
+    aio_context_release(ctx);
     qobject_unref(snapshot_options);
     qobject_unref(options);
     error_propagate(errp, local_err);
     return NULL;
 }
 
+/* The caller must always hold the main AioContext lock. */
 BlockDriverState *bdrv_open(const char *filename, const char *reference,
                             QDict *options, int flags, Error **errp)
 {
+    GLOBAL_STATE_CODE();
+
     return bdrv_open_inherit(filename, reference, options, flags, NULL,
                              NULL, 0, errp);
 }
@@ -3571,8 +4323,8 @@ static int bdrv_reset_options_allowed(BlockDriverState *bs,
 /*
  * Returns true if @child can be reached recursively from @bs
  */
-static bool bdrv_recurse_has_child(BlockDriverState *bs,
-                                   BlockDriverState *child)
+static bool GRAPH_RDLOCK
+bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child)
 {
     BdrvChild *c;
 
@@ -3609,17 +4361,16 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
  * returns a pointer to bs_queue, which is either the newly allocated
  * bs_queue, or the existing bs_queue being used.
  *
- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
+ * bs is drained here and undrained by bdrv_reopen_queue_free().
+ *
+ * To be called with bs->aio_context locked.
  */
-static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
-                                                 BlockDriverState *bs,
-                                                 QDict *options,
-                                                 const BdrvChildClass *klass,
-                                                 BdrvChildRole role,
-                                                 bool parent_is_format,
-                                                 QDict *parent_options,
-                                                 int parent_flags,
-                                                 bool keep_old_opts)
+static BlockReopenQueue * GRAPH_RDLOCK
+bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs,
+                        QDict *options, const BdrvChildClass *klass,
+                        BdrvChildRole role, bool parent_is_format,
+                        QDict *parent_options, int parent_flags,
+                        bool keep_old_opts)
 {
     assert(bs != NULL);
 
@@ -3629,10 +4380,14 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
     int flags;
     QemuOpts *opts;
 
-    /* Make sure that the caller remembered to use a drained section. This is
-     * important to avoid graph changes between the recursive queuing here and
-     * bdrv_reopen_multiple(). */
-    assert(bs->quiesce_counter > 0);
+    GLOBAL_STATE_CODE();
+
+    /*
+     * Strictly speaking, draining is illegal under GRAPH_RDLOCK. We know that
+     * we've been called with bdrv_graph_rdlock_main_loop(), though, so it's ok
+     * in practice.
+     */
+    bdrv_drained_begin(bs);
 
     if (bs_queue == NULL) {
         bs_queue = g_new0(BlockReopenQueue, 1);
@@ -3712,10 +4467,6 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
     bs_entry->state.explicit_options = explicit_options;
     bs_entry->state.flags = flags;
 
-    /* This needs to be overwritten in bdrv_reopen_prepare() */
-    bs_entry->state.perm = UINT64_MAX;
-    bs_entry->state.shared_perm = 0;
-
     /*
      * If keep_old_opts is false then it means that unspecified
      * options must be reset to their original value. We don't allow
@@ -3771,14 +4522,38 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
     return bs_queue;
 }
 
+/* To be called with bs->aio_context locked */
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
                                     BlockDriverState *bs,
                                     QDict *options, bool keep_old_opts)
 {
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false,
                                    NULL, 0, keep_old_opts);
 }
 
+void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
+{
+    GLOBAL_STATE_CODE();
+    if (bs_queue) {
+        BlockReopenQueueEntry *bs_entry, *next;
+        QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+            AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs);
+
+            aio_context_acquire(ctx);
+            bdrv_drained_end(bs_entry->state.bs);
+            aio_context_release(ctx);
+
+            qobject_unref(bs_entry->state.explicit_options);
+            qobject_unref(bs_entry->state.options);
+            g_free(bs_entry);
+        }
+        g_free(bs_queue);
+    }
+}
+
 /*
  * Reopen multiple BlockDriverStates atomically & transactionally.
  *
@@ -3795,43 +4570,68 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
  *
  * All affected nodes must be drained between bdrv_reopen_queue() and
  * bdrv_reopen_multiple().
+ *
+ * To be called from the main thread, with all other AioContexts unlocked.
  */
 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
 {
     int ret = -1;
     BlockReopenQueueEntry *bs_entry, *next;
+    AioContext *ctx;
+    Transaction *tran = tran_new();
+    g_autoptr(GSList) refresh_list = NULL;
 
+    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
     assert(bs_queue != NULL);
+    GLOBAL_STATE_CODE();
+
+    QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
+        ctx = bdrv_get_aio_context(bs_entry->state.bs);
+        aio_context_acquire(ctx);
+        ret = bdrv_flush(bs_entry->state.bs);
+        aio_context_release(ctx);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Error flushing drive");
+            goto abort;
+        }
+    }
 
     QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
         assert(bs_entry->state.bs->quiesce_counter > 0);
-        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, errp)) {
-            goto cleanup;
+        ctx = bdrv_get_aio_context(bs_entry->state.bs);
+        aio_context_acquire(ctx);
+        ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp);
+        aio_context_release(ctx);
+        if (ret < 0) {
+            goto abort;
         }
         bs_entry->prepared = true;
     }
 
     QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
         BDRVReopenState *state = &bs_entry->state;
-        ret = bdrv_check_perm(state->bs, bs_queue, state->perm,
-                              state->shared_perm, NULL, NULL, errp);
-        if (ret < 0) {
-            goto cleanup_perm;
-        }
-        /* Check if new_backing_bs would accept the new permissions */
-        if (state->replace_backing_bs && state->new_backing_bs) {
-            uint64_t nperm, nshared;
-            bdrv_child_perm(state->bs, state->new_backing_bs,
-                            NULL, bdrv_backing_role(state->bs),
-                            bs_queue, state->perm, state->shared_perm,
-                            &nperm, &nshared);
-            ret = bdrv_check_update_perm(state->new_backing_bs, NULL,
-                                         nperm, nshared, NULL, NULL, errp);
-            if (ret < 0) {
-                goto cleanup_perm;
-            }
+
+        refresh_list = g_slist_prepend(refresh_list, state->bs);
+        if (state->old_backing_bs) {
+            refresh_list = g_slist_prepend(refresh_list, state->old_backing_bs);
         }
-        bs_entry->perms_checked = true;
+        if (state->old_file_bs) {
+            refresh_list = g_slist_prepend(refresh_list, state->old_file_bs);
+        }
+    }
+
+    /*
+     * Note that file-posix driver rely on permission update done during reopen
+     * (even if no permission changed), because it wants "new" permissions for
+     * reconfiguring the fd and that's why it does it in raw_check_perm(), not
+     * in raw_reopen_prepare() which is called with "old" permissions.
+     */
+    bdrv_graph_rdlock_main_loop();
+    ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp);
+    bdrv_graph_rdunlock_main_loop();
+
+    if (ret < 0) {
+        goto abort;
     }
 
     /*
@@ -3844,140 +4644,83 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
      * to first element.
      */
     QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
+        ctx = bdrv_get_aio_context(bs_entry->state.bs);
+        aio_context_acquire(ctx);
         bdrv_reopen_commit(&bs_entry->state);
+        aio_context_release(ctx);
     }
 
-    ret = 0;
-cleanup_perm:
-    QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
-        BDRVReopenState *state = &bs_entry->state;
+    bdrv_graph_wrlock(NULL);
+    tran_commit(tran);
+    bdrv_graph_wrunlock(NULL);
 
-        if (!bs_entry->perms_checked) {
-            continue;
-        }
+    QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
+        BlockDriverState *bs = bs_entry->state.bs;
 
-        if (ret == 0) {
-            bdrv_set_perm(state->bs, state->perm, state->shared_perm);
-        } else {
-            bdrv_abort_perm_update(state->bs);
-            if (state->replace_backing_bs && state->new_backing_bs) {
-                bdrv_abort_perm_update(state->new_backing_bs);
-            }
+        if (bs->drv->bdrv_reopen_commit_post) {
+            ctx = bdrv_get_aio_context(bs);
+            aio_context_acquire(ctx);
+            bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
+            aio_context_release(ctx);
         }
     }
 
-    if (ret == 0) {
-        QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
-            BlockDriverState *bs = bs_entry->state.bs;
+    ret = 0;
+    goto cleanup;
+
+abort:
+    bdrv_graph_wrlock(NULL);
+    tran_abort(tran);
+    bdrv_graph_wrunlock(NULL);
 
-            if (bs->drv->bdrv_reopen_commit_post)
-                bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
-        }
-    }
-cleanup:
     QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
-        if (ret) {
-            if (bs_entry->prepared) {
-                bdrv_reopen_abort(&bs_entry->state);
-            }
-            qobject_unref(bs_entry->state.explicit_options);
-            qobject_unref(bs_entry->state.options);
-        }
-        if (bs_entry->state.new_backing_bs) {
-            bdrv_unref(bs_entry->state.new_backing_bs);
+        if (bs_entry->prepared) {
+            ctx = bdrv_get_aio_context(bs_entry->state.bs);
+            aio_context_acquire(ctx);
+            bdrv_reopen_abort(&bs_entry->state);
+            aio_context_release(ctx);
         }
-        g_free(bs_entry);
     }
-    g_free(bs_queue);
+
+cleanup:
+    bdrv_reopen_queue_free(bs_queue);
 
     return ret;
 }
 
-int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
-                              Error **errp)
+int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
+                Error **errp)
 {
-    int ret;
+    AioContext *ctx = bdrv_get_aio_context(bs);
     BlockReopenQueue *queue;
-    QDict *opts = qdict_new();
-
-    qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
-
-    bdrv_subtree_drained_begin(bs);
-    queue = bdrv_reopen_queue(NULL, bs, opts, true);
-    ret = bdrv_reopen_multiple(queue, errp);
-    bdrv_subtree_drained_end(bs);
+    int ret;
 
-    return ret;
-}
+    GLOBAL_STATE_CODE();
 
-static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q,
-                                                          BdrvChild *c)
-{
-    BlockReopenQueueEntry *entry;
+    queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
 
-    QTAILQ_FOREACH(entry, q, entry) {
-        BlockDriverState *bs = entry->state.bs;
-        BdrvChild *child;
+    if (ctx != qemu_get_aio_context()) {
+        aio_context_release(ctx);
+    }
+    ret = bdrv_reopen_multiple(queue, errp);
 
-        QLIST_FOREACH(child, &bs->children, next) {
-            if (child == c) {
-                return entry;
-            }
-        }
+    if (ctx != qemu_get_aio_context()) {
+        aio_context_acquire(ctx);
     }
 
-    return NULL;
+    return ret;
 }
 
-static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs,
-                             uint64_t *perm, uint64_t *shared)
+int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
+                              Error **errp)
 {
-    BdrvChild *c;
-    BlockReopenQueueEntry *parent;
-    uint64_t cumulative_perms = 0;
-    uint64_t cumulative_shared_perms = BLK_PERM_ALL;
-
-    QLIST_FOREACH(c, &bs->parents, next_parent) {
-        parent = find_parent_in_reopen_queue(q, c);
-        if (!parent) {
-            cumulative_perms |= c->perm;
-            cumulative_shared_perms &= c->shared_perm;
-        } else {
-            uint64_t nperm, nshared;
-
-            bdrv_child_perm(parent->state.bs, bs, c, c->role, q,
-                            parent->state.perm, parent->state.shared_perm,
-                            &nperm, &nshared);
-
-            cumulative_perms |= nperm;
-            cumulative_shared_perms &= nshared;
-        }
-    }
-    *perm = cumulative_perms;
-    *shared = cumulative_shared_perms;
-}
+    QDict *opts = qdict_new();
 
-static bool bdrv_reopen_can_attach(BlockDriverState *parent,
-                                   BdrvChild *child,
-                                   BlockDriverState *new_child,
-                                   Error **errp)
-{
-    AioContext *parent_ctx = bdrv_get_aio_context(parent);
-    AioContext *child_ctx = bdrv_get_aio_context(new_child);
-    GSList *ignore;
-    bool ret;
+    GLOBAL_STATE_CODE();
 
-    ignore = g_slist_prepend(NULL, child);
-    ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL);
-    g_slist_free(ignore);
-    if (ret) {
-        return ret;
-    }
+    qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
 
-    ignore = g_slist_prepend(NULL, child);
-    ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp);
-    g_slist_free(ignore);
-    return ret;
+    return bdrv_reopen(bs, opts, true, errp);
 }
 
 /*
@@ -3995,112 +4738,142 @@ static bool bdrv_reopen_can_attach(BlockDriverState *parent,
  * true and reopen_state->new_backing_bs contains a pointer to the new
  * backing BlockDriverState (or NULL).
  *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
+ *
  * Return 0 on success, otherwise return < 0 and set @errp.
+ *
+ * The caller must hold the AioContext lock of @reopen_state->bs.
+ * @reopen_state->bs can move to a different AioContext in this function.
+ * Callers must make sure that their AioContext locking is still correct after
+ * this.
  */
-static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
-                                     Error **errp)
+static int GRAPH_UNLOCKED
+bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
+                                  bool is_backing, Transaction *tran,
+                                  Error **errp)
 {
     BlockDriverState *bs = reopen_state->bs;
-    BlockDriverState *overlay_bs, *below_bs, *new_backing_bs;
+    BlockDriverState *new_child_bs;
+    BlockDriverState *old_child_bs;
+
+    const char *child_name = is_backing ? "backing" : "file";
     QObject *value;
     const char *str;
+    AioContext *ctx, *old_ctx;
+    bool has_child;
+    int ret;
+
+    GLOBAL_STATE_CODE();
 
-    value = qdict_get(reopen_state->options, "backing");
+    value = qdict_get(reopen_state->options, child_name);
     if (value == NULL) {
         return 0;
     }
 
+    bdrv_graph_rdlock_main_loop();
+
     switch (qobject_type(value)) {
     case QTYPE_QNULL:
-        new_backing_bs = NULL;
+        assert(is_backing); /* The 'file' option does not allow a null value */
+        new_child_bs = NULL;
         break;
     case QTYPE_QSTRING:
-        str = qobject_get_try_str(value);
-        new_backing_bs = bdrv_lookup_bs(NULL, str, errp);
-        if (new_backing_bs == NULL) {
-            return -EINVAL;
-        } else if (bdrv_recurse_has_child(new_backing_bs, bs)) {
-            error_setg(errp, "Making '%s' a backing file of '%s' "
-                       "would create a cycle", str, bs->node_name);
-            return -EINVAL;
+        str = qstring_get_str(qobject_to(QString, value));
+        new_child_bs = bdrv_lookup_bs(NULL, str, errp);
+        if (new_child_bs == NULL) {
+            ret = -EINVAL;
+            goto out_rdlock;
+        }
+
+        has_child = bdrv_recurse_has_child(new_child_bs, bs);
+        if (has_child) {
+            error_setg(errp, "Making '%s' a %s child of '%s' would create a "
+                       "cycle", str, child_name, bs->node_name);
+            ret = -EINVAL;
+            goto out_rdlock;
         }
         break;
     default:
-        /* 'backing' does not allow any other data type */
+        /*
+         * The options QDict has been flattened, so 'backing' and 'file'
+         * do not allow any other data type here.
+         */
         g_assert_not_reached();
     }
 
-    /*
-     * Check AioContext compatibility so that the bdrv_set_backing_hd() call in
-     * bdrv_reopen_commit() won't fail.
-     */
-    if (new_backing_bs) {
-        if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) {
-            return -EINVAL;
-        }
+    old_child_bs = is_backing ? child_bs(bs->backing) : child_bs(bs->file);
+    if (old_child_bs == new_child_bs) {
+        ret = 0;
+        goto out_rdlock;
     }
 
-    /*
-     * Ensure that @bs can really handle backing files, because we are
-     * about to give it one (or swap the existing one)
-     */
-    if (bs->drv->is_filter) {
-        /* Filters always have a file or a backing child */
-        if (!bs->backing) {
-            error_setg(errp, "'%s' is a %s filter node that does not support a "
-                       "backing child", bs->node_name, bs->drv->format_name);
-            return -EINVAL;
+    if (old_child_bs) {
+        if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) {
+            ret = 0;
+            goto out_rdlock;
         }
-    } else if (!bs->drv->supports_backing) {
-        error_setg(errp, "Driver '%s' of node '%s' does not support backing "
-                   "files", bs->drv->format_name, bs->node_name);
-        return -EINVAL;
-    }
 
-    /*
-     * Find the "actual" backing file by skipping all links that point
-     * to an implicit node, if any (e.g. a commit filter node).
-     * We cannot use any of the bdrv_skip_*() functions here because
-     * those return the first explicit node, while we are looking for
-     * its overlay here.
-     */
-    overlay_bs = bs;
-    for (below_bs = bdrv_filter_or_cow_bs(overlay_bs);
-         below_bs && below_bs->implicit;
-         below_bs = bdrv_filter_or_cow_bs(overlay_bs))
-    {
-        overlay_bs = below_bs;
+        if (old_child_bs->implicit) {
+            error_setg(errp, "Cannot replace implicit %s child of %s",
+                       child_name, bs->node_name);
+            ret = -EPERM;
+            goto out_rdlock;
+        }
     }
 
-    /* If we want to replace the backing file we need some extra checks */
-    if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) {
-        /* Check for implicit nodes between bs and its backing file */
-        if (bs != overlay_bs) {
-            error_setg(errp, "Cannot change backing link if '%s' has "
-                       "an implicit backing file", bs->node_name);
-            return -EPERM;
-        }
+    if (bs->drv->is_filter && !old_child_bs) {
         /*
-         * Check if the backing link that we want to replace is frozen.
-         * Note that
-         * bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing,
-         * because we know that overlay_bs == bs, and that @bs
-         * either is a filter that uses ->backing or a COW format BDS
-         * with bs->drv->supports_backing == true.
+         * Filters always have a file or a backing child, so we are trying to
+         * change wrong child
          */
-        if (bdrv_is_backing_chain_frozen(overlay_bs,
-                                         child_bs(overlay_bs->backing), errp))
-        {
-            return -EPERM;
-        }
-        reopen_state->replace_backing_bs = true;
-        if (new_backing_bs) {
-            bdrv_ref(new_backing_bs);
-            reopen_state->new_backing_bs = new_backing_bs;
-        }
+        error_setg(errp, "'%s' is a %s filter node that does not support a "
+                   "%s child", bs->node_name, bs->drv->format_name, child_name);
+        ret = -EINVAL;
+        goto out_rdlock;
+    }
+
+    if (is_backing) {
+        reopen_state->old_backing_bs = old_child_bs;
+    } else {
+        reopen_state->old_file_bs = old_child_bs;
+    }
+
+    if (old_child_bs) {
+        bdrv_ref(old_child_bs);
+        bdrv_drained_begin(old_child_bs);
+    }
+
+    old_ctx = bdrv_get_aio_context(bs);
+    ctx = bdrv_get_aio_context(new_child_bs);
+    if (old_ctx != ctx) {
+        aio_context_release(old_ctx);
+        aio_context_acquire(ctx);
+    }
+
+    bdrv_graph_rdunlock_main_loop();
+    bdrv_graph_wrlock(new_child_bs);
+
+    ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
+                                          tran, errp);
+
+    bdrv_graph_wrunlock_ctx(ctx);
+
+    if (old_ctx != ctx) {
+        aio_context_release(ctx);
+        aio_context_acquire(old_ctx);
     }
 
-    return 0;
+    if (old_child_bs) {
+        bdrv_drained_end(old_child_bs);
+        bdrv_unref(old_child_bs);
+    }
+
+    return ret;
+
+out_rdlock:
+    bdrv_graph_rdunlock_main_loop();
+    return ret;
 }
 
 /*
@@ -4119,9 +4892,14 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
  * It is the responsibility of the caller to then call the abort() or
  * commit() for any other BDS that have been left in a prepare() state
  *
+ * The caller must hold the AioContext lock of @reopen_state->bs.
+ *
+ * After calling this function, the transaction @change_child_tran may only be
+ * completed while holding a writer lock for the graph.
  */
-int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
-                        Error **errp)
+static int GRAPH_UNLOCKED
+bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
+                    Transaction *change_child_tran, Error **errp)
 {
     int ret = -1;
     int old_flags;
@@ -4135,6 +4913,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
 
     assert(reopen_state != NULL);
     assert(reopen_state->bs->drv != NULL);
+    GLOBAL_STATE_CODE();
     drv = reopen_state->bs->drv;
 
     /* This function and each driver's bdrv_reopen_prepare() remove
@@ -4182,22 +4961,15 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
      * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
      * not set, or if the BDS still has copy_on_read enabled */
     read_only = !(reopen_state->flags & BDRV_O_RDWR);
+
+    bdrv_graph_rdlock_main_loop();
     ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err);
+    bdrv_graph_rdunlock_main_loop();
     if (local_err) {
         error_propagate(errp, local_err);
         goto error;
     }
 
-    /* Calculate required permissions after reopening */
-    bdrv_reopen_perm(queue, reopen_state->bs,
-                     &reopen_state->perm, &reopen_state->shared_perm);
-
-    ret = bdrv_flush(reopen_state->bs);
-    if (ret) {
-        error_setg_errno(errp, -ret, "Error flushing drive");
-        goto error;
-    }
-
     if (drv->bdrv_reopen_prepare) {
         /*
          * If a driver-specific option is missing, it means that we
@@ -4215,7 +4987,9 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
             if (local_err != NULL) {
                 error_propagate(errp, local_err);
             } else {
+                bdrv_graph_rdlock_main_loop();
                 bdrv_refresh_filename(reopen_state->bs);
+                bdrv_graph_rdunlock_main_loop();
                 error_setg(errp, "failed while preparing to reopen image '%s'",
                            reopen_state->bs->filename);
             }
@@ -4224,9 +4998,11 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
     } else {
         /* It is currently mandatory to have a bdrv_reopen_prepare()
          * handler for each supported drv. */
+        bdrv_graph_rdlock_main_loop();
         error_setg(errp, "Block format '%s' used by node '%s' "
                    "does not support reopening files", drv->format_name,
                    bdrv_get_device_or_node_name(reopen_state->bs));
+        bdrv_graph_rdunlock_main_loop();
         ret = -1;
         goto error;
     }
@@ -4238,31 +5014,45 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
      * file or if the image file has a backing file name as part of
      * its metadata. Otherwise the 'backing' option can be omitted.
      */
+    bdrv_graph_rdlock_main_loop();
     if (drv->supports_backing && reopen_state->backing_missing &&
         (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) {
         error_setg(errp, "backing is missing for '%s'",
                    reopen_state->bs->node_name);
+        bdrv_graph_rdunlock_main_loop();
         ret = -EINVAL;
         goto error;
     }
+    bdrv_graph_rdunlock_main_loop();
 
     /*
      * Allow changing the 'backing' option. The new value can be
      * either a reference to an existing node (using its node name)
      * or NULL to simply detach the current backing file.
      */
-    ret = bdrv_reopen_parse_backing(reopen_state, errp);
+    ret = bdrv_reopen_parse_file_or_backing(reopen_state, true,
+                                            change_child_tran, errp);
     if (ret < 0) {
         goto error;
     }
     qdict_del(reopen_state->options, "backing");
 
+    /* Allow changing the 'file' option. In this case NULL is not allowed */
+    ret = bdrv_reopen_parse_file_or_backing(reopen_state, false,
+                                            change_child_tran, errp);
+    if (ret < 0) {
+        goto error;
+    }
+    qdict_del(reopen_state->options, "file");
+
     /* Options that are not handled are only okay if they are unchanged
      * compared to the old state. It is expected that some options are only
      * used for the initial open, but not reopen (e.g. filename) */
     if (qdict_size(reopen_state->options)) {
         const QDictEntry *entry = qdict_first(reopen_state->options);
 
+        GRAPH_RDLOCK_GUARD_MAINLOOP();
+
         do {
             QObject *new = entry->value;
             QObject *old = qdict_get(reopen_state->bs->options, entry->key);
@@ -4278,8 +5068,8 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
                 }
 
                 if (child) {
-                    const char *str = qobject_get_try_str(new);
-                    if (!strcmp(child->bs->node_name, str)) {
+                    if (!strcmp(child->bs->node_name,
+                                qstring_get_str(qobject_to(QString, new)))) {
                         continue; /* Found child with this name, skip option */
                     }
                 }
@@ -4336,7 +5126,7 @@ error:
  * makes them final by swapping the staging BlockDriverState contents into
  * the active BlockDriverState contents.
  */
-void bdrv_reopen_commit(BDRVReopenState *reopen_state)
+static void GRAPH_UNLOCKED bdrv_reopen_commit(BDRVReopenState *reopen_state)
 {
     BlockDriver *drv;
     BlockDriverState *bs;
@@ -4346,63 +5136,52 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
     bs = reopen_state->bs;
     drv = bs->drv;
     assert(drv != NULL);
+    GLOBAL_STATE_CODE();
 
     /* If there are any driver level actions to take */
     if (drv->bdrv_reopen_commit) {
         drv->bdrv_reopen_commit(reopen_state);
     }
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     /* set BDS specific flags now */
     qobject_unref(bs->explicit_options);
     qobject_unref(bs->options);
+    qobject_ref(reopen_state->explicit_options);
+    qobject_ref(reopen_state->options);
 
     bs->explicit_options   = reopen_state->explicit_options;
     bs->options            = reopen_state->options;
     bs->open_flags         = reopen_state->flags;
-    bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
     bs->detect_zeroes      = reopen_state->detect_zeroes;
 
-    if (reopen_state->replace_backing_bs) {
-        qdict_del(bs->explicit_options, "backing");
-        qdict_del(bs->options, "backing");
-    }
-
     /* Remove child references from bs->options and bs->explicit_options.
      * Child options were already removed in bdrv_reopen_queue_child() */
     QLIST_FOREACH(child, &bs->children, next) {
         qdict_del(bs->explicit_options, child->name);
         qdict_del(bs->options, child->name);
     }
+    /* backing is probably removed, so it's not handled by previous loop */
+    qdict_del(bs->explicit_options, "backing");
+    qdict_del(bs->options, "backing");
 
-    /*
-     * Change the backing file if a new one was specified. We do this
-     * after updating bs->options, so bdrv_refresh_filename() (called
-     * from bdrv_set_backing_hd()) has the new values.
-     */
-    if (reopen_state->replace_backing_bs) {
-        BlockDriverState *old_backing_bs = child_bs(bs->backing);
-        assert(!old_backing_bs || !old_backing_bs->implicit);
-        /* Abort the permission update on the backing bs we're detaching */
-        if (old_backing_bs) {
-            bdrv_abort_perm_update(old_backing_bs);
-        }
-        bdrv_set_backing_hd(bs, reopen_state->new_backing_bs, &error_abort);
-    }
-
-    bdrv_refresh_limits(bs, NULL);
+    bdrv_refresh_limits(bs, NULL, NULL);
+    bdrv_refresh_total_sectors(bs, bs->total_sectors);
 }
 
 /*
  * Abort the reopen, and delete and free the staged changes in
  * reopen_state
  */
-void bdrv_reopen_abort(BDRVReopenState *reopen_state)
+static void GRAPH_UNLOCKED bdrv_reopen_abort(BDRVReopenState *reopen_state)
 {
     BlockDriver *drv;
 
     assert(reopen_state != NULL);
     drv = reopen_state->bs->drv;
     assert(drv != NULL);
+    GLOBAL_STATE_CODE();
 
     if (drv->bdrv_reopen_abort) {
         drv->bdrv_reopen_abort(reopen_state);
@@ -4415,6 +5194,7 @@ static void bdrv_close(BlockDriverState *bs)
     BdrvAioNotifier *ban, *ban_next;
     BdrvChild *child, *next;
 
+    GLOBAL_STATE_CODE();
     assert(!bs->refcnt);
 
     bdrv_drained_begin(bs); /* complete I/O */
@@ -4429,12 +5209,15 @@ static void bdrv_close(BlockDriverState *bs)
         bs->drv = NULL;
     }
 
+    bdrv_graph_wrlock(bs);
     QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
         bdrv_unref_child(bs, child);
     }
 
-    bs->backing = NULL;
-    bs->file = NULL;
+    assert(!bs->backing);
+    assert(!bs->file);
+    bdrv_graph_wrunlock(bs);
+
     g_free(bs->opaque);
     bs->opaque = NULL;
     qatomic_set(&bs->copy_on_read, 0);
@@ -4449,6 +5232,8 @@ static void bdrv_close(BlockDriverState *bs)
     bs->explicit_options = NULL;
     qobject_unref(bs->full_open_options);
     bs->full_open_options = NULL;
+    g_free(bs->block_status_cache);
+    bs->block_status_cache = NULL;
 
     bdrv_release_named_dirty_bitmaps(bs);
     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
@@ -4471,8 +5256,8 @@ static void bdrv_close(BlockDriverState *bs)
 
 void bdrv_close_all(void)
 {
+    GLOBAL_STATE_CODE();
     assert(job_next(NULL) == NULL);
-    blk_exp_close_all();
 
     /* Drop references from requests still in flight, such as canceled block
      * jobs whose AIO context has not been polled yet */
@@ -4484,7 +5269,7 @@ void bdrv_close_all(void)
     assert(QTAILQ_EMPTY(&all_bdrv_states));
 }
 
-static bool should_update_child(BdrvChild *c, BlockDriverState *to)
+static bool GRAPH_RDLOCK should_update_child(BdrvChild *c, BlockDriverState *to)
 {
     GQueue *queue;
     GHashTable *found;
@@ -4563,31 +5348,58 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to)
     return ret;
 }
 
+static void bdrv_remove_child_commit(void *opaque)
+{
+    GLOBAL_STATE_CODE();
+    bdrv_child_free(opaque);
+}
+
+static TransactionActionDrv bdrv_remove_child_drv = {
+    .commit = bdrv_remove_child_commit,
+};
+
 /*
- * With auto_skip=true bdrv_replace_node_common skips updating from parents
- * if it creates a parent-child relation loop or if parent is block-job.
+ * Function doesn't update permissions, caller is responsible for this.
  *
- * With auto_skip=false the error is returned if from has a parent which should
- * not be updated.
+ * @child->bs (if non-NULL) must be drained.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
+ */
+static void GRAPH_WRLOCK bdrv_remove_child(BdrvChild *child, Transaction *tran)
+{
+    if (!child) {
+        return;
+    }
+
+    if (child->bs) {
+        assert(child->quiesced_parent);
+        bdrv_replace_child_tran(child, NULL, tran);
+    }
+
+    tran_add(tran, &bdrv_remove_child_drv, child);
+}
+
+/*
+ * Both @from and @to (if non-NULL) must be drained. @to must be kept drained
+ * until the transaction is completed.
+ *
+ * After calling this function, the transaction @tran may only be completed
+ * while holding a writer lock for the graph.
  */
-static void bdrv_replace_node_common(BlockDriverState *from,
-                                     BlockDriverState *to,
-                                     bool auto_skip, Error **errp)
+static int GRAPH_WRLOCK
+bdrv_replace_node_noperm(BlockDriverState *from,
+                         BlockDriverState *to,
+                         bool auto_skip, Transaction *tran,
+                         Error **errp)
 {
     BdrvChild *c, *next;
-    GSList *list = NULL, *p;
-    uint64_t perm = 0, shared = BLK_PERM_ALL;
-    int ret;
 
-    /* Make sure that @from doesn't go away until we have successfully attached
-     * all of its parents to @to. */
-    bdrv_ref(from);
+    GLOBAL_STATE_CODE();
 
-    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
-    assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
-    bdrv_drained_begin(from);
+    assert(from->quiesce_counter);
+    assert(to->quiesce_counter);
 
-    /* Put all parents into @list and calculate their cumulative permissions */
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
         assert(c->bs == from);
         if (!should_update_child(c, to)) {
@@ -4596,50 +5408,113 @@ static void bdrv_replace_node_common(BlockDriverState *from,
             }
             error_setg(errp, "Should not change '%s' link to '%s'",
                        c->name, from->node_name);
-            goto out;
+            return -EINVAL;
         }
         if (c->frozen) {
             error_setg(errp, "Cannot change '%s' link to '%s'",
                        c->name, from->node_name);
-            goto out;
+            return -EPERM;
+        }
+        bdrv_replace_child_tran(c, to, tran);
+    }
+
+    return 0;
+}
+
+/*
+ * Switch all parents of @from to point to @to instead. @from and @to must be in
+ * the same AioContext and both must be drained.
+ *
+ * With auto_skip=true bdrv_replace_node_common skips updating from parents
+ * if it creates a parent-child relation loop or if parent is block-job.
+ *
+ * With auto_skip=false the error is returned if from has a parent which should
+ * not be updated.
+ *
+ * With @detach_subchain=true @to must be in a backing chain of @from. In this
+ * case backing link of the cow-parent of @to is removed.
+ */
+static int GRAPH_WRLOCK
+bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to,
+                         bool auto_skip, bool detach_subchain, Error **errp)
+{
+    Transaction *tran = tran_new();
+    g_autoptr(GSList) refresh_list = NULL;
+    BlockDriverState *to_cow_parent = NULL;
+    int ret;
+
+    GLOBAL_STATE_CODE();
+
+    assert(from->quiesce_counter);
+    assert(to->quiesce_counter);
+    assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
+
+    if (detach_subchain) {
+        assert(bdrv_chain_contains(from, to));
+        assert(from != to);
+        for (to_cow_parent = from;
+             bdrv_filter_or_cow_bs(to_cow_parent) != to;
+             to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent))
+        {
+            ;
         }
-        list = g_slist_prepend(list, c);
-        perm |= c->perm;
-        shared &= c->shared_perm;
     }
 
-    /* Check whether the required permissions can be granted on @to, ignoring
-     * all BdrvChild in @list so that they can't block themselves. */
-    ret = bdrv_check_update_perm(to, NULL, perm, shared, list, NULL, errp);
+    /*
+     * Do the replacement without permission update.
+     * Replacement may influence the permissions, we should calculate new
+     * permissions based on new graph. If we fail, we'll roll-back the
+     * replacement.
+     */
+    ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp);
     if (ret < 0) {
-        bdrv_abort_perm_update(to);
         goto out;
     }
 
-    /* Now actually perform the change. We performed the permission check for
-     * all elements of @list at once, so set the permissions all at once at the
-     * very end. */
-    for (p = list; p != NULL; p = p->next) {
-        c = p->data;
+    if (detach_subchain) {
+        /* to_cow_parent is already drained because from is drained */
+        bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
+    }
+
+    refresh_list = g_slist_prepend(refresh_list, to);
+    refresh_list = g_slist_prepend(refresh_list, from);
 
-        bdrv_ref(to);
-        bdrv_replace_child_noperm(c, to);
-        bdrv_unref(from);
+    ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
+    if (ret < 0) {
+        goto out;
     }
 
-    bdrv_get_cumulative_perm(to, &perm, &shared);
-    bdrv_set_perm(to, perm, shared);
+    ret = 0;
 
 out:
-    g_slist_free(list);
-    bdrv_drained_end(from);
-    bdrv_unref(from);
+    tran_finalize(tran, ret);
+    return ret;
 }
 
-void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
-                       Error **errp)
+int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
+                      Error **errp)
+{
+    return bdrv_replace_node_common(from, to, true, false, errp);
+}
+
+int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
 {
-    return bdrv_replace_node_common(from, to, true, errp);
+    BlockDriverState *child_bs;
+    int ret;
+
+    GLOBAL_STATE_CODE();
+
+    bdrv_graph_rdlock_main_loop();
+    child_bs = bdrv_filter_or_cow_bs(bs);
+    bdrv_graph_rdunlock_main_loop();
+
+    bdrv_drained_begin(child_bs);
+    bdrv_graph_wrlock(bs);
+    ret = bdrv_replace_node_common(bs, child_bs, true, true, errp);
+    bdrv_graph_wrunlock(bs);
+    bdrv_drained_end(child_bs);
+
+    return ret;
 }
 
 /*
@@ -4649,43 +5524,126 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
  * This will modify the BlockDriverState fields, and swap contents
  * between bs_new and bs_top. Both bs_new and bs_top are modified.
  *
- * bs_new must not be attached to a BlockBackend.
+ * bs_new must not be attached to a BlockBackend and must not have backing
+ * child.
  *
  * This function does not create any image files.
  *
- * bdrv_append() takes ownership of a bs_new reference and unrefs it because
- * that's what the callers commonly need. bs_new will be referenced by the old
- * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
- * reference of its own, it must call bdrv_ref().
+ * The caller must hold the AioContext lock for @bs_top.
  */
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
-                 Error **errp)
+int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+                Error **errp)
 {
-    Error *local_err = NULL;
+    int ret;
+    BdrvChild *child;
+    Transaction *tran = tran_new();
+    AioContext *old_context, *new_context = NULL;
 
-    bdrv_set_backing_hd(bs_new, bs_top, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    GLOBAL_STATE_CODE();
+
+    bdrv_graph_rdlock_main_loop();
+    assert(!bs_new->backing);
+    bdrv_graph_rdunlock_main_loop();
+
+    old_context = bdrv_get_aio_context(bs_top);
+    bdrv_drained_begin(bs_top);
+
+    /*
+     * bdrv_drained_begin() requires that only the AioContext of the drained
+     * node is locked, and at this point it can still differ from the AioContext
+     * of bs_top.
+     */
+    new_context = bdrv_get_aio_context(bs_new);
+    aio_context_release(old_context);
+    aio_context_acquire(new_context);
+    bdrv_drained_begin(bs_new);
+    aio_context_release(new_context);
+    aio_context_acquire(old_context);
+    new_context = NULL;
+
+    bdrv_graph_wrlock(bs_top);
+
+    child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
+                                     &child_of_bds, bdrv_backing_role(bs_new),
+                                     tran, errp);
+    if (!child) {
+        ret = -EINVAL;
         goto out;
     }
 
-    bdrv_replace_node(bs_top, bs_new, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        bdrv_set_backing_hd(bs_new, NULL, &error_abort);
+    /*
+     * bdrv_attach_child_noperm could change the AioContext of bs_top and
+     * bs_new, but at least they are in the same AioContext now. This is the
+     * AioContext that we need to lock for the rest of the function.
+     */
+    new_context = bdrv_get_aio_context(bs_top);
+
+    if (old_context != new_context) {
+        aio_context_release(old_context);
+        aio_context_acquire(new_context);
+    }
+
+    ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
+    if (ret < 0) {
         goto out;
     }
 
-    /* bs_new is now referenced by its new parents, we don't need the
-     * additional reference any more. */
+    ret = bdrv_refresh_perms(bs_new, tran, errp);
 out:
-    bdrv_unref(bs_new);
+    tran_finalize(tran, ret);
+
+    bdrv_refresh_limits(bs_top, NULL, NULL);
+    bdrv_graph_wrunlock(bs_top);
+
+    bdrv_drained_end(bs_top);
+    bdrv_drained_end(bs_new);
+
+    if (new_context && old_context != new_context) {
+        aio_context_release(new_context);
+        aio_context_acquire(old_context);
+    }
+
+    return ret;
+}
+
+/* Not for empty child */
+int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
+                          Error **errp)
+{
+    int ret;
+    Transaction *tran = tran_new();
+    g_autoptr(GSList) refresh_list = NULL;
+    BlockDriverState *old_bs = child->bs;
+
+    GLOBAL_STATE_CODE();
+
+    bdrv_ref(old_bs);
+    bdrv_drained_begin(old_bs);
+    bdrv_drained_begin(new_bs);
+    bdrv_graph_wrlock(new_bs);
+
+    bdrv_replace_child_tran(child, new_bs, tran);
+
+    refresh_list = g_slist_prepend(refresh_list, old_bs);
+    refresh_list = g_slist_prepend(refresh_list, new_bs);
+
+    ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
+
+    tran_finalize(tran, ret);
+
+    bdrv_graph_wrunlock(new_bs);
+    bdrv_drained_end(old_bs);
+    bdrv_drained_end(new_bs);
+    bdrv_unref(old_bs);
+
+    return ret;
 }
 
 static void bdrv_delete(BlockDriverState *bs)
 {
     assert(bdrv_op_blocker_is_empty(bs));
     assert(!bs->refcnt);
+    GLOBAL_STATE_CODE();
 
     /* remove from list, if necessary */
     if (bs->node_name[0] != '\0') {
@@ -4695,9 +5653,91 @@ static void bdrv_delete(BlockDriverState *bs)
 
     bdrv_close(bs);
 
+    qemu_mutex_destroy(&bs->reqs_lock);
+
     g_free(bs);
 }
 
+
+/*
+ * Replace @bs by newly created block node.
+ *
+ * @options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use qobject_ref() before calling bdrv_open.
+ *
+ * The caller holds the AioContext lock for @bs. It must make sure that @bs
+ * stays in the same AioContext, i.e. @options must not refer to nodes in a
+ * different AioContext.
+ */
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
+                                   int flags, Error **errp)
+{
+    ERRP_GUARD();
+    int ret;
+    AioContext *ctx = bdrv_get_aio_context(bs);
+    BlockDriverState *new_node_bs = NULL;
+    const char *drvname, *node_name;
+    BlockDriver *drv;
+
+    drvname = qdict_get_try_str(options, "driver");
+    if (!drvname) {
+        error_setg(errp, "driver is not specified");
+        goto fail;
+    }
+
+    drv = bdrv_find_format(drvname);
+    if (!drv) {
+        error_setg(errp, "Unknown driver: '%s'", drvname);
+        goto fail;
+    }
+
+    node_name = qdict_get_try_str(options, "node-name");
+
+    GLOBAL_STATE_CODE();
+
+    aio_context_release(ctx);
+    aio_context_acquire(qemu_get_aio_context());
+    new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
+                                            errp);
+    aio_context_release(qemu_get_aio_context());
+    aio_context_acquire(ctx);
+    assert(bdrv_get_aio_context(bs) == ctx);
+
+    options = NULL; /* bdrv_new_open_driver() eats options */
+    if (!new_node_bs) {
+        error_prepend(errp, "Could not create node: ");
+        goto fail;
+    }
+
+    /*
+     * Make sure that @bs doesn't go away until we have successfully attached
+     * all of its parents to @new_node_bs and undrained it again.
+     */
+    bdrv_ref(bs);
+    bdrv_drained_begin(bs);
+    bdrv_drained_begin(new_node_bs);
+    bdrv_graph_wrlock(new_node_bs);
+    ret = bdrv_replace_node(bs, new_node_bs, errp);
+    bdrv_graph_wrunlock(new_node_bs);
+    bdrv_drained_end(new_node_bs);
+    bdrv_drained_end(bs);
+    bdrv_unref(bs);
+
+    if (ret < 0) {
+        error_prepend(errp, "Could not replace node: ");
+        goto fail;
+    }
+
+    return new_node_bs;
+
+fail:
+    qobject_unref(options);
+    bdrv_unref(new_node_bs);
+    return NULL;
+}
+
 /*
  * Run consistency checks on an image
  *
@@ -4708,6 +5748,8 @@ static void bdrv_delete(BlockDriverState *bs)
 int coroutine_fn bdrv_co_check(BlockDriverState *bs,
                                BdrvCheckResult *res, BdrvCheckMode fix)
 {
+    IO_CODE();
+    assert_bdrv_graph_readable();
     if (bs->drv == NULL) {
         return -ENOMEDIUM;
     }
@@ -4727,12 +5769,15 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs,
  *            image file header
  * -ENOTSUP - format driver doesn't support changing the backing file
  */
-int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
-                             const char *backing_fmt, bool warn)
+int coroutine_fn
+bdrv_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
+                            const char *backing_fmt, bool require)
 {
     BlockDriver *drv = bs->drv;
     int ret;
 
+    IO_CODE();
+
     if (!drv) {
         return -ENOMEDIUM;
     }
@@ -4742,14 +5787,12 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
         return -EINVAL;
     }
 
-    if (warn && backing_file && !backing_fmt) {
-        warn_report("Deprecated use of backing file without explicit "
-                    "backing format, use of this image requires "
-                    "potentially unsafe format probing");
+    if (require && backing_file && !backing_fmt) {
+        return -EINVAL;
     }
 
-    if (drv->bdrv_change_backing_file != NULL) {
-        ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
+    if (drv->bdrv_co_change_backing_file != NULL) {
+        ret = drv->bdrv_co_change_backing_file(bs, backing_file, backing_fmt);
     } else {
         ret = -ENOTSUP;
     }
@@ -4776,6 +5819,9 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
                                     BlockDriverState *bs)
 {
+
+    GLOBAL_STATE_CODE();
+
     bs = bdrv_skip_filters(bs);
     active = bdrv_skip_filters(active);
 
@@ -4793,6 +5839,8 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
 /* Given a BDS, searches for the base layer. */
 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
+
     return bdrv_find_overlay(bs, NULL);
 }
 
@@ -4801,12 +5849,15 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs)
  * between @bs and @base is frozen. @errp is set if that's the case.
  * @base must be reachable from @bs, or NULL.
  */
-bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
-                                  Error **errp)
+static bool GRAPH_RDLOCK
+bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
+                             Error **errp)
 {
     BlockDriverState *i;
     BdrvChild *child;
 
+    GLOBAL_STATE_CODE();
+
     for (i = bs; i != base; i = child_bs(child)) {
         child = bdrv_filter_or_cow_child(i);
 
@@ -4833,6 +5884,8 @@ int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
     BlockDriverState *i;
     BdrvChild *child;
 
+    GLOBAL_STATE_CODE();
+
     if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
         return -EPERM;
     }
@@ -4867,6 +5920,8 @@ void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
     BlockDriverState *i;
     BdrvChild *child;
 
+    GLOBAL_STATE_CODE();
+
     for (i = bs; i != base; i = child_bs(child)) {
         child = bdrv_filter_or_cow_child(i);
         if (child) {
@@ -4916,16 +5971,19 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
     g_autoptr(GSList) updated_children = NULL;
     GSList *p;
 
+    GLOBAL_STATE_CODE();
+
     bdrv_ref(top);
-    bdrv_subtree_drained_begin(top);
+    bdrv_drained_begin(base);
+    bdrv_graph_wrlock(base);
 
     if (!top->drv || !base->drv) {
-        goto exit;
+        goto exit_wrlock;
     }
 
     /* Make sure that base is in the backing chain of top */
     if (!bdrv_chain_contains(top, base)) {
-        goto exit;
+        goto exit_wrlock;
     }
 
     /* If 'base' recursively inherits from 'top' then we should set
@@ -4937,8 +5995,6 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
     update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);
 
     /* success - we can delete the intermediate states, and link top->base */
-    /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once
-     * we've figured out how they should work. */
     if (!backing_file_str) {
         bdrv_refresh_filename(base);
         backing_file_str = base->filename;
@@ -4948,7 +6004,19 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
         updated_children = g_slist_prepend(updated_children, c);
     }
 
-    bdrv_replace_node_common(top, base, false, &local_err);
+    /*
+     * It seems correct to pass detach_subchain=true here, but it triggers
+     * one more yet not fixed bug, when due to nested aio_poll loop we switch to
+     * another drained section, which modify the graph (for example, removing
+     * the child, which we keep in updated_children list). So, it's a TODO.
+     *
+     * Note, bug triggered if pass detach_subchain=true here and run
+     * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash.
+     * That's a FIXME.
+     */
+    bdrv_replace_node_common(top, base, false, false, &local_err);
+    bdrv_graph_wrunlock(base);
+
     if (local_err) {
         error_report_err(local_err);
         goto exit;
@@ -4981,18 +6049,23 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
     }
 
     ret = 0;
+    goto exit;
+
+exit_wrlock:
+    bdrv_graph_wrunlock(base);
 exit:
-    bdrv_subtree_drained_end(top);
+    bdrv_drained_end(base);
     bdrv_unref(top);
     return ret;
 }
 
 /**
- * Implementation of BlockDriver.bdrv_get_allocated_file_size() that
+ * Implementation of BlockDriver.bdrv_co_get_allocated_file_size() that
  * sums the size of all data-bearing children.  (This excludes backing
  * children.)
  */
-static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+bdrv_sum_allocated_file_size(BlockDriverState *bs)
 {
     BdrvChild *child;
     int64_t child_size, sum = 0;
@@ -5001,7 +6074,7 @@ static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
         if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
                            BDRV_CHILD_FILTERED))
         {
-            child_size = bdrv_get_allocated_file_size(child->bs);
+            child_size = bdrv_co_get_allocated_file_size(child->bs);
             if (child_size < 0) {
                 return child_size;
             }
@@ -5016,14 +6089,17 @@ static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
  * Length of a allocated file in bytes. Sparse files are counted by actual
  * allocated space. Return < 0 if error or unknown.
  */
-int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
+int64_t coroutine_fn bdrv_co_get_allocated_file_size(BlockDriverState *bs)
 {
     BlockDriver *drv = bs->drv;
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
     if (!drv) {
         return -ENOMEDIUM;
     }
-    if (drv->bdrv_get_allocated_file_size) {
-        return drv->bdrv_get_allocated_file_size(bs);
+    if (drv->bdrv_co_get_allocated_file_size) {
+        return drv->bdrv_co_get_allocated_file_size(bs);
     }
 
     if (drv->bdrv_file_open) {
@@ -5035,7 +6111,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
         return -ENOTSUP;
     } else if (drv->is_filter) {
         /* Filter drivers default to the size of their filtered child */
-        return bdrv_get_allocated_file_size(bdrv_filter_bs(bs));
+        return bdrv_co_get_allocated_file_size(bdrv_filter_bs(bs));
     } else {
         /* Other drivers default to summing their children's sizes */
         return bdrv_sum_allocated_file_size(bs);
@@ -5068,6 +6144,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
                                BlockDriverState *in_bs, Error **errp)
 {
+    IO_CODE();
     if (!drv->bdrv_measure) {
         error_setg(errp, "Block driver '%s' does not support size measurement",
                    drv->format_name);
@@ -5080,19 +6157,43 @@ BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
 /**
  * Return number of sectors on success, -errno on error.
  */
-int64_t bdrv_nb_sectors(BlockDriverState *bs)
+int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
+    if (!drv)
+        return -ENOMEDIUM;
+
+    if (bs->bl.has_variable_length) {
+        int ret = bdrv_co_refresh_total_sectors(bs, bs->total_sectors);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+    return bs->total_sectors;
+}
+
+/*
+ * This wrapper is written by hand because this function is in the hot I/O path,
+ * via blk_get_geometry.
+ */
+int64_t coroutine_mixed_fn bdrv_nb_sectors(BlockDriverState *bs)
 {
     BlockDriver *drv = bs->drv;
+    IO_CODE();
 
     if (!drv)
         return -ENOMEDIUM;
 
-    if (drv->has_variable_length) {
-        int ret = refresh_total_sectors(bs, bs->total_sectors);
+    if (bs->bl.has_variable_length) {
+        int ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
         if (ret < 0) {
             return ret;
         }
     }
+
     return bs->total_sectors;
 }
 
@@ -5100,10 +6201,13 @@ int64_t bdrv_nb_sectors(BlockDriverState *bs)
  * Return length in bytes on success, -errno on error.
  * The length is always a multiple of BDRV_SECTOR_SIZE.
  */
-int64_t bdrv_getlength(BlockDriverState *bs)
+int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs)
 {
-    int64_t ret = bdrv_nb_sectors(bs);
+    int64_t ret;
+    IO_CODE();
+    assert_bdrv_graph_readable();
 
+    ret = bdrv_co_nb_sectors(bs);
     if (ret < 0) {
         return ret;
     }
@@ -5113,16 +6217,9 @@ int64_t bdrv_getlength(BlockDriverState *bs)
     return ret * BDRV_SECTOR_SIZE;
 }
 
-/* return 0 as number of sectors if no device present or error */
-void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
-{
-    int64_t nb_sectors = bdrv_nb_sectors(bs);
-
-    *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
-}
-
 bool bdrv_is_sg(BlockDriverState *bs)
 {
+    IO_CODE();
     return bs->sg;
 }
 
@@ -5132,6 +6229,7 @@ bool bdrv_is_sg(BlockDriverState *bs)
 bool bdrv_supports_compressed_writes(BlockDriverState *bs)
 {
     BlockDriverState *filtered;
+    IO_CODE();
 
     if (!bs->drv || !block_driver_can_compress(bs->drv)) {
         return false;
@@ -5151,6 +6249,7 @@ bool bdrv_supports_compressed_writes(BlockDriverState *bs)
 
 const char *bdrv_get_format_name(BlockDriverState *bs)
 {
+    IO_CODE();
     return bs->drv ? bs->drv->format_name : NULL;
 }
 
@@ -5167,15 +6266,17 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
     int i;
     const char **formats = NULL;
 
+    GLOBAL_STATE_CODE();
+
     QLIST_FOREACH(drv, &bdrv_drivers, list) {
         if (drv->format_name) {
             bool found = false;
-            int i = count;
 
             if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) {
                 continue;
             }
 
+            i = count;
             while (formats && i && !found) {
                 found = !strcmp(formats[--i], drv->format_name);
             }
@@ -5225,6 +6326,7 @@ BlockDriverState *bdrv_find_node(const char *node_name)
     BlockDriverState *bs;
 
     assert(node_name);
+    GLOBAL_STATE_CODE();
 
     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
         if (!strcmp(node_name, bs->node_name)) {
@@ -5241,6 +6343,9 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
     BlockDeviceInfoList *list;
     BlockDriverState *bs;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD();
+
     list = NULL;
     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
         BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp);
@@ -5316,6 +6421,7 @@ static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
 {
     BlockPermission qapi_perm;
     XDbgBlockGraphEdge *edge;
+    GLOBAL_STATE_CODE();
 
     edge = g_new0(XDbgBlockGraphEdge, 1);
 
@@ -5346,6 +6452,8 @@ XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
     BdrvChild *child;
     XDbgBlockGraphConstructor *gr = xdbg_graph_new();
 
+    GLOBAL_STATE_CODE();
+
     for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
         char *allocated_name = NULL;
         const char *name = blk_name(blk);
@@ -5361,13 +6469,16 @@ XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
         }
     }
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
-        GSList *el;
+    WITH_JOB_LOCK_GUARD() {
+        for (job = block_job_next_locked(NULL); job;
+             job = block_job_next_locked(job)) {
+            GSList *el;
 
-        xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
-                           job->job.id);
-        for (el = job->nodes; el; el = el->next) {
-            xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
+            xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
+                                job->job.id);
+            for (el = job->nodes; el; el = el->next) {
+                xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
+            }
         }
     }
 
@@ -5389,6 +6500,8 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
     BlockBackend *blk;
     BlockDriverState *bs;
 
+    GLOBAL_STATE_CODE();
+
     if (device) {
         blk = blk_by_name(device);
 
@@ -5410,7 +6523,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
         }
     }
 
-    error_setg(errp, "Cannot find device=%s nor node_name=%s",
+    error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'",
                      device ? device : "",
                      node_name ? node_name : "");
     return NULL;
@@ -5420,6 +6533,9 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
  * return false.  If either argument is NULL, return false. */
 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
 {
+
+    GLOBAL_STATE_CODE();
+
     while (top && top != base) {
         top = bdrv_filter_or_cow_bs(top);
     }
@@ -5429,6 +6545,7 @@ bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
 
 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
     if (!bs) {
         return QTAILQ_FIRST(&graph_bdrv_states);
     }
@@ -5437,6 +6554,7 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
 
 BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
     if (!bs) {
         return QTAILQ_FIRST(&all_bdrv_states);
     }
@@ -5445,6 +6563,7 @@ BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
 
 const char *bdrv_get_node_name(const BlockDriverState *bs)
 {
+    IO_CODE();
     return bs->node_name;
 }
 
@@ -5452,6 +6571,7 @@ const char *bdrv_get_parent_name(const BlockDriverState *bs)
 {
     BdrvChild *c;
     const char *name;
+    IO_CODE();
 
     /* If multiple parents have a name, just pick the first one. */
     QLIST_FOREACH(c, &bs->parents, next_parent) {
@@ -5469,6 +6589,7 @@ const char *bdrv_get_parent_name(const BlockDriverState *bs)
 /* TODO check what callers really want: bs->node_name or blk_name() */
 const char *bdrv_get_device_name(const BlockDriverState *bs)
 {
+    IO_CODE();
     return bdrv_get_parent_name(bs) ?: "";
 }
 
@@ -5478,22 +6599,26 @@ const char *bdrv_get_device_name(const BlockDriverState *bs)
  * absent, then this returns an empty (non-null) string. */
 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
 {
+    IO_CODE();
     return bdrv_get_parent_name(bs) ?: bs->node_name;
 }
 
 int bdrv_get_flags(BlockDriverState *bs)
 {
+    IO_CODE();
     return bs->open_flags;
 }
 
 int bdrv_has_zero_init_1(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
     return 1;
 }
 
-int bdrv_has_zero_init(BlockDriverState *bs)
+int coroutine_mixed_fn bdrv_has_zero_init(BlockDriverState *bs)
 {
     BlockDriverState *filtered;
+    GLOBAL_STATE_CODE();
 
     if (!bs->drv) {
         return 0;
@@ -5519,6 +6644,7 @@ int bdrv_has_zero_init(BlockDriverState *bs)
 
 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
 {
+    IO_CODE();
     if (!(bs->open_flags & BDRV_O_UNMAP)) {
         return false;
     }
@@ -5529,31 +6655,53 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
 void bdrv_get_backing_filename(BlockDriverState *bs,
                                char *filename, int filename_size)
 {
+    IO_CODE();
     pstrcpy(filename, filename_size, bs->backing_file);
 }
 
-int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
+    int ret;
     BlockDriver *drv = bs->drv;
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
     /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
     if (!drv) {
         return -ENOMEDIUM;
     }
-    if (!drv->bdrv_get_info) {
+    if (!drv->bdrv_co_get_info) {
         BlockDriverState *filtered = bdrv_filter_bs(bs);
         if (filtered) {
-            return bdrv_get_info(filtered, bdi);
+            return bdrv_co_get_info(filtered, bdi);
         }
         return -ENOTSUP;
     }
     memset(bdi, 0, sizeof(*bdi));
-    return drv->bdrv_get_info(bs, bdi);
+    ret = drv->bdrv_co_get_info(bs, bdi);
+    if (bdi->subcluster_size == 0) {
+        /*
+         * If the driver left this unset, subclusters are not supported.
+         * Then it is safe to treat each cluster as having only one subcluster.
+         */
+        bdi->subcluster_size = bdi->cluster_size;
+    }
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) {
+        return -EINVAL;
+    }
+
+    return 0;
 }
 
 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
                                           Error **errp)
 {
     BlockDriver *drv = bs->drv;
+    IO_CODE();
     if (drv && drv->bdrv_get_specific_info) {
         return drv->bdrv_get_specific_info(bs, errp);
     }
@@ -5563,23 +6711,29 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
 BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
 {
     BlockDriver *drv = bs->drv;
+    IO_CODE();
     if (!drv || !drv->bdrv_get_specific_stats) {
         return NULL;
     }
     return drv->bdrv_get_specific_stats(bs);
 }
 
-void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
+void coroutine_fn bdrv_co_debug_event(BlockDriverState *bs, BlkdebugEvent event)
 {
-    if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
+    if (!bs || !bs->drv || !bs->drv->bdrv_co_debug_event) {
         return;
     }
 
-    bs->drv->bdrv_debug_event(bs, event);
+    bs->drv->bdrv_co_debug_event(bs, event);
 }
 
-static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
+static BlockDriverState * GRAPH_RDLOCK
+bdrv_find_debug_node(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
         bs = bdrv_primary_bs(bs);
     }
@@ -5595,6 +6749,9 @@ static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
                           const char *tag)
 {
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     bs = bdrv_find_debug_node(bs);
     if (bs) {
         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
@@ -5605,6 +6762,9 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
 
 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
 {
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     bs = bdrv_find_debug_node(bs);
     if (bs) {
         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
@@ -5615,6 +6775,9 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
 
 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
 {
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
         bs = bdrv_primary_bs(bs);
     }
@@ -5628,6 +6791,9 @@ int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
 
 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
 {
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
         bs = bdrv_primary_bs(bs);
     }
@@ -5655,6 +6821,9 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
     BlockDriverState *retval = NULL;
     BlockDriverState *bs_below;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!bs || !bs->drv || !backing_file) {
         return NULL;
     }
@@ -5753,6 +6922,9 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
 
 void bdrv_init(void)
 {
+#ifdef CONFIG_BDRV_WHITELIST_TOOLS
+    use_bdrv_whitelist = 1;
+#endif
     module_call_init(MODULE_INIT_BLOCK);
 }
 
@@ -5762,20 +6934,22 @@ void bdrv_init_with_whitelist(void)
     bdrv_init();
 }
 
-int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
+int bdrv_activate(BlockDriverState *bs, Error **errp)
 {
     BdrvChild *child, *parent;
-    uint64_t perm, shared_perm;
     Error *local_err = NULL;
     int ret;
     BdrvDirtyBitmap *bm;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!bs->drv)  {
         return -ENOMEDIUM;
     }
 
     QLIST_FOREACH(child, &bs->children, next) {
-        bdrv_co_invalidate_cache(child->bs, &local_err);
+        bdrv_activate(child->bs, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             return -EINVAL;
@@ -5788,7 +6962,7 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
      * Note that the required permissions of inactive images are always a
      * subset of the permissions required after activating the image. This
      * allows us to just get the permissions upfront without restricting
-     * drv->bdrv_invalidate_cache().
+     * bdrv_co_invalidate_cache().
      *
      * It also means that in error cases, we don't have to try and revert to
      * the old permissions (which is an operation that could fail, too). We can
@@ -5797,29 +6971,23 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
      */
     if (bs->open_flags & BDRV_O_INACTIVE) {
         bs->open_flags &= ~BDRV_O_INACTIVE;
-        bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
-        ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, errp);
+        ret = bdrv_refresh_perms(bs, NULL, errp);
         if (ret < 0) {
-            bdrv_abort_perm_update(bs);
             bs->open_flags |= BDRV_O_INACTIVE;
             return ret;
         }
-        bdrv_set_perm(bs, perm, shared_perm);
 
-        if (bs->drv->bdrv_co_invalidate_cache) {
-            bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
-            if (local_err) {
-                bs->open_flags |= BDRV_O_INACTIVE;
-                error_propagate(errp, local_err);
-                return -EINVAL;
-            }
+        ret = bdrv_invalidate_cache(bs, errp);
+        if (ret < 0) {
+            bs->open_flags |= BDRV_O_INACTIVE;
+            return ret;
         }
 
         FOR_EACH_DIRTY_BITMAP(bs, bm) {
             bdrv_dirty_bitmap_skip_store(bm, false);
         }
 
-        ret = refresh_total_sectors(bs, bs->total_sectors);
+        ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
         if (ret < 0) {
             bs->open_flags |= BDRV_O_INACTIVE;
             error_setg_errno(errp, -ret, "Could not refresh total sector count");
@@ -5841,17 +7009,39 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
     return 0;
 }
 
-void bdrv_invalidate_cache_all(Error **errp)
+int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
+{
+    Error *local_err = NULL;
+    IO_CODE();
+
+    assert(!(bs->open_flags & BDRV_O_INACTIVE));
+    assert_bdrv_graph_readable();
+
+    if (bs->drv->bdrv_co_invalidate_cache) {
+        bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return -EINVAL;
+        }
+    }
+
+    return 0;
+}
+
+void bdrv_activate_all(Error **errp)
 {
     BlockDriverState *bs;
     BdrvNextIterator it;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
         int ret;
 
         aio_context_acquire(aio_context);
-        ret = bdrv_invalidate_cache(bs, errp);
+        ret = bdrv_activate(bs, errp);
         aio_context_release(aio_context);
         if (ret < 0) {
             bdrv_next_cleanup(&it);
@@ -5860,9 +7050,11 @@ void bdrv_invalidate_cache_all(Error **errp)
     }
 }
 
-static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
+static bool GRAPH_RDLOCK
+bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
 {
     BdrvChild *parent;
+    GLOBAL_STATE_CODE();
 
     QLIST_FOREACH(parent, &bs->parents, next_parent) {
         if (parent->klass->parent_is_bds) {
@@ -5876,12 +7068,13 @@ static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
     return false;
 }
 
-static int bdrv_inactivate_recurse(BlockDriverState *bs)
+static int GRAPH_RDLOCK bdrv_inactivate_recurse(BlockDriverState *bs)
 {
     BdrvChild *child, *parent;
-    bool tighten_restrictions;
-    uint64_t perm, shared_perm;
     int ret;
+    uint64_t cumulative_perms, cumulative_shared_perms;
+
+    GLOBAL_STATE_CODE();
 
     if (!bs->drv) {
         return -ENOMEDIUM;
@@ -5912,20 +7105,21 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
         }
     }
 
-    bs->open_flags |= BDRV_O_INACTIVE;
-
-    /* Update permissions, they may differ for inactive nodes */
-    bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
-    ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL,
-                          &tighten_restrictions, NULL);
-    assert(tighten_restrictions == false);
-    if (ret < 0) {
-        /* We only tried to loosen restrictions, so errors are not fatal */
-        bdrv_abort_perm_update(bs);
-    } else {
-        bdrv_set_perm(bs, perm, shared_perm);
+    bdrv_get_cumulative_perm(bs, &cumulative_perms,
+                             &cumulative_shared_perms);
+    if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
+        /* Our inactive parents still need write access. Inactivation failed. */
+        return -EPERM;
     }
 
+    bs->open_flags |= BDRV_O_INACTIVE;
+
+    /*
+     * Update permissions, they may differ for inactive nodes.
+     * We only tried to loosen restrictions, so errors are not fatal, ignore
+     * them.
+     */
+    bdrv_refresh_perms(bs, NULL, NULL);
 
     /* Recursively inactivate children */
     QLIST_FOREACH(child, &bs->children, next) {
@@ -5945,6 +7139,9 @@ int bdrv_inactivate_all(void)
     int ret = 0;
     GSList *aio_ctxs = NULL, *ctx;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
@@ -5984,19 +7181,21 @@ out:
 /**
  * Return TRUE if the media is present
  */
-bool bdrv_is_inserted(BlockDriverState *bs)
+bool coroutine_fn bdrv_co_is_inserted(BlockDriverState *bs)
 {
     BlockDriver *drv = bs->drv;
     BdrvChild *child;
+    IO_CODE();
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         return false;
     }
-    if (drv->bdrv_is_inserted) {
-        return drv->bdrv_is_inserted(bs);
+    if (drv->bdrv_co_is_inserted) {
+        return drv->bdrv_co_is_inserted(bs);
     }
     QLIST_FOREACH(child, &bs->children, next) {
-        if (!bdrv_is_inserted(child->bs)) {
+        if (!bdrv_co_is_inserted(child->bs)) {
             return false;
         }
     }
@@ -6006,12 +7205,14 @@ bool bdrv_is_inserted(BlockDriverState *bs)
 /**
  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
  */
-void bdrv_eject(BlockDriverState *bs, bool eject_flag)
+void coroutine_fn bdrv_co_eject(BlockDriverState *bs, bool eject_flag)
 {
     BlockDriver *drv = bs->drv;
+    IO_CODE();
+    assert_bdrv_graph_readable();
 
-    if (drv && drv->bdrv_eject) {
-        drv->bdrv_eject(bs, eject_flag);
+    if (drv && drv->bdrv_co_eject) {
+        drv->bdrv_co_eject(bs, eject_flag);
     }
 }
 
@@ -6019,20 +7220,22 @@ void bdrv_eject(BlockDriverState *bs, bool eject_flag)
  * Lock or unlock the media (if it is locked, the user won't be able
  * to eject it manually).
  */
-void bdrv_lock_medium(BlockDriverState *bs, bool locked)
+void coroutine_fn bdrv_co_lock_medium(BlockDriverState *bs, bool locked)
 {
     BlockDriver *drv = bs->drv;
-
+    IO_CODE();
+    assert_bdrv_graph_readable();
     trace_bdrv_lock_medium(bs, locked);
 
-    if (drv && drv->bdrv_lock_medium) {
-        drv->bdrv_lock_medium(bs, locked);
+    if (drv && drv->bdrv_co_lock_medium) {
+        drv->bdrv_co_lock_medium(bs, locked);
     }
 }
 
 /* Get a reference to bs */
 void bdrv_ref(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
     bs->refcnt++;
 }
 
@@ -6041,6 +7244,7 @@ void bdrv_ref(BlockDriverState *bs)
  * deleted. */
 void bdrv_unref(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
     if (!bs) {
         return;
     }
@@ -6050,6 +7254,32 @@ void bdrv_unref(BlockDriverState *bs)
     }
 }
 
+static void bdrv_schedule_unref_bh(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    AioContext *ctx = bdrv_get_aio_context(bs);
+
+    aio_context_acquire(ctx);
+    bdrv_unref(bs);
+    aio_context_release(ctx);
+}
+
+/*
+ * Release a BlockDriverState reference while holding the graph write lock.
+ *
+ * Calling bdrv_unref() directly is forbidden while holding the graph lock
+ * because bdrv_close() both involves polling and taking the graph lock
+ * internally. bdrv_schedule_unref() instead delays decreasing the refcount and
+ * possibly closing @bs until the graph lock is released.
+ */
+void bdrv_schedule_unref(BlockDriverState *bs)
+{
+    if (!bs) {
+        return;
+    }
+    aio_bh_schedule_oneshot(qemu_get_aio_context(), bdrv_schedule_unref_bh, bs);
+}
+
 struct BdrvOpBlocker {
     Error *reason;
     QLIST_ENTRY(BdrvOpBlocker) list;
@@ -6058,6 +7288,8 @@ struct BdrvOpBlocker {
 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
 {
     BdrvOpBlocker *blocker;
+    GLOBAL_STATE_CODE();
+
     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
         blocker = QLIST_FIRST(&bs->op_blockers[op]);
@@ -6072,6 +7304,7 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
 {
     BdrvOpBlocker *blocker;
+    GLOBAL_STATE_CODE();
     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
 
     blocker = g_new0(BdrvOpBlocker, 1);
@@ -6082,6 +7315,7 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
 {
     BdrvOpBlocker *blocker, *next;
+    GLOBAL_STATE_CODE();
     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
         if (blocker->reason == reason) {
@@ -6094,6 +7328,7 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
 {
     int i;
+    GLOBAL_STATE_CODE();
     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
         bdrv_op_block(bs, i, reason);
     }
@@ -6102,6 +7337,7 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
 {
     int i;
+    GLOBAL_STATE_CODE();
     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
         bdrv_op_unblock(bs, i, reason);
     }
@@ -6110,7 +7346,7 @@ void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
 {
     int i;
-
+    GLOBAL_STATE_CODE();
     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
             return false;
@@ -6119,6 +7355,10 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
     return true;
 }
 
+/*
+ * Must not be called while holding the lock of an AioContext other than the
+ * current one.
+ */
 void bdrv_img_create(const char *filename, const char *fmt,
                      const char *base_filename, const char *base_fmt,
                      char *options, uint64_t img_size, int flags, bool quiet,
@@ -6132,6 +7372,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
     Error *local_err = NULL;
     int ret = 0;
 
+    GLOBAL_STATE_CODE();
+
     /* Find driver and parse its options */
     drv = bdrv_find_format(fmt);
     if (!drv) {
@@ -6156,6 +7398,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
         return;
     }
 
+    aio_context_acquire(qemu_get_aio_context());
+
     /* Create parameter list */
     create_opts = qemu_opts_append(create_opts, drv->create_opts);
     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
@@ -6225,9 +7469,13 @@ void bdrv_img_create(const char *filename, const char *fmt,
         }
         assert(full_backing);
 
-        /* backing files always opened read-only */
+        /*
+         * No need to do I/O here, which allows us to open encrypted
+         * backing images without needing the secret
+         */
         back_flags = flags;
         back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+        back_flags |= BDRV_O_NO_IO;
 
         backing_options = qdict_new();
         if (backing_fmt) {
@@ -6243,24 +7491,11 @@ void bdrv_img_create(const char *filename, const char *fmt,
             goto out;
         } else {
             if (!backing_fmt) {
-                warn_report("Deprecated use of backing file without explicit "
-                            "backing format (detected format of %s)",
-                            bs->drv->format_name);
-                if (bs->drv != &bdrv_raw) {
-                    /*
-                     * A probe of raw deserves the most attention:
-                     * leaving the backing format out of the image
-                     * will ensure bs->probed is set (ensuring we
-                     * don't accidentally commit into the backing
-                     * file), and allow more spots to warn the users
-                     * to fix their toolchain when opening this image
-                     * later.  For other images, we can safely record
-                     * the format that we probed.
-                     */
-                    backing_fmt = bs->drv->format_name;
-                    qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, backing_fmt,
-                                 NULL);
-                }
+                error_setg(&local_err,
+                           "Backing file specified without backing format");
+                error_append_hint(&local_err, "Detected format of %s.\n",
+                                  bs->drv->format_name);
+                goto out;
             }
             if (size == -1) {
                 /* Opened BS, have no size */
@@ -6277,9 +7512,9 @@ void bdrv_img_create(const char *filename, const char *fmt,
         }
         /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
     } else if (backing_file && !backing_fmt) {
-        warn_report("Deprecated use of unopened backing file without "
-                    "explicit backing format, use of this image requires "
-                    "potentially unsafe format probing");
+        error_setg(&local_err,
+                   "Backing file specified without backing format");
+        goto out;
     }
 
     if (size == -1) {
@@ -6314,10 +7549,12 @@ out:
     qemu_opts_del(opts);
     qemu_opts_free(create_opts);
     error_propagate(errp, local_err);
+    aio_context_release(qemu_get_aio_context());
 }
 
 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
 {
+    IO_CODE();
     return bs ? bs->aio_context : qemu_get_aio_context();
 }
 
@@ -6326,6 +7563,7 @@ AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
     Coroutine *self = qemu_coroutine_self();
     AioContext *old_ctx = qemu_coroutine_get_aio_context(self);
     AioContext *new_ctx;
+    IO_CODE();
 
     /*
      * Increase bs->in_flight to ensure that this operation is completed before
@@ -6340,6 +7578,7 @@ AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
 
 void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
 {
+    IO_CODE();
     aio_co_reschedule_self(old_ctx);
     bdrv_dec_in_flight(bs);
 }
@@ -6371,13 +7610,9 @@ void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
     }
 }
 
-void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
-{
-    aio_co_enter(bdrv_get_aio_context(bs), co);
-}
-
 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
 {
+    GLOBAL_STATE_CODE();
     QLIST_REMOVE(ban, list);
     g_free(ban);
 }
@@ -6387,6 +7622,7 @@ static void bdrv_detach_aio_context(BlockDriverState *bs)
     BdrvAioNotifier *baf, *baf_tmp;
 
     assert(!bs->walking_aio_notifiers);
+    GLOBAL_STATE_CODE();
     bs->walking_aio_notifiers = true;
     QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
         if (baf->deleted) {
@@ -6404,9 +7640,6 @@ static void bdrv_detach_aio_context(BlockDriverState *bs)
         bs->drv->bdrv_detach_aio_context(bs);
     }
 
-    if (bs->quiesce_counter) {
-        aio_enable_external(bs->aio_context);
-    }
     bs->aio_context = NULL;
 }
 
@@ -6414,10 +7647,7 @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
                                     AioContext *new_context)
 {
     BdrvAioNotifier *ban, *ban_tmp;
-
-    if (bs->quiesce_counter) {
-        aio_disable_external(new_context);
-    }
+    GLOBAL_STATE_CODE();
 
     bs->aio_context = new_context;
 
@@ -6437,163 +7667,225 @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
     bs->walking_aio_notifiers = false;
 }
 
-/*
- * Changes the AioContext used for fd handlers, timers, and BHs by this
- * BlockDriverState and all its children and parents.
- *
- * Must be called from the main AioContext.
- *
- * The caller must own the AioContext lock for the old AioContext of bs, but it
- * must not own the AioContext lock for new_context (unless new_context is the
- * same as the current context of bs).
- *
- * @ignore will accumulate all visited BdrvChild object. The caller is
- * responsible for freeing the list afterwards.
- */
-void bdrv_set_aio_context_ignore(BlockDriverState *bs,
-                                 AioContext *new_context, GSList **ignore)
-{
-    AioContext *old_context = bdrv_get_aio_context(bs);
-    BdrvChild *child;
-
-    g_assert(qemu_get_current_aio_context() == qemu_get_aio_context());
-
-    if (old_context == new_context) {
-        return;
-    }
-
-    bdrv_drained_begin(bs);
-
-    QLIST_FOREACH(child, &bs->children, next) {
-        if (g_slist_find(*ignore, child)) {
-            continue;
-        }
-        *ignore = g_slist_prepend(*ignore, child);
-        bdrv_set_aio_context_ignore(child->bs, new_context, ignore);
-    }
-    QLIST_FOREACH(child, &bs->parents, next_parent) {
-        if (g_slist_find(*ignore, child)) {
-            continue;
-        }
-        assert(child->klass->set_aio_ctx);
-        *ignore = g_slist_prepend(*ignore, child);
-        child->klass->set_aio_ctx(child, new_context, ignore);
-    }
-
-    bdrv_detach_aio_context(bs);
-
-    /* Acquire the new context, if necessary */
-    if (qemu_get_aio_context() != new_context) {
-        aio_context_acquire(new_context);
-    }
-
-    bdrv_attach_aio_context(bs, new_context);
-
-    /*
-     * If this function was recursively called from
-     * bdrv_set_aio_context_ignore(), there may be nodes in the
-     * subtree that have not yet been moved to the new AioContext.
-     * Release the old one so bdrv_drained_end() can poll them.
-     */
-    if (qemu_get_aio_context() != old_context) {
-        aio_context_release(old_context);
-    }
-
-    bdrv_drained_end(bs);
-
-    if (qemu_get_aio_context() != old_context) {
-        aio_context_acquire(old_context);
-    }
-    if (qemu_get_aio_context() != new_context) {
-        aio_context_release(new_context);
-    }
-}
+typedef struct BdrvStateSetAioContext {
+    AioContext *new_ctx;
+    BlockDriverState *bs;
+} BdrvStateSetAioContext;
 
-static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
-                                            GSList **ignore, Error **errp)
+static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx,
+                                           GHashTable *visited,
+                                           Transaction *tran,
+                                           Error **errp)
 {
-    if (g_slist_find(*ignore, c)) {
+    GLOBAL_STATE_CODE();
+    if (g_hash_table_contains(visited, c)) {
         return true;
     }
-    *ignore = g_slist_prepend(*ignore, c);
+    g_hash_table_add(visited, c);
 
     /*
      * A BdrvChildClass that doesn't handle AioContext changes cannot
      * tolerate any AioContext changes
      */
-    if (!c->klass->can_set_aio_ctx) {
+    if (!c->klass->change_aio_ctx) {
         char *user = bdrv_child_user_desc(c);
         error_setg(errp, "Changing iothreads is not supported by %s", user);
         g_free(user);
         return false;
     }
-    if (!c->klass->can_set_aio_ctx(c, ctx, ignore, errp)) {
+    if (!c->klass->change_aio_ctx(c, ctx, visited, tran, errp)) {
         assert(!errp || *errp);
         return false;
     }
     return true;
 }
 
-bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
-                                    GSList **ignore, Error **errp)
+bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
+                                   GHashTable *visited, Transaction *tran,
+                                   Error **errp)
 {
-    if (g_slist_find(*ignore, c)) {
+    GLOBAL_STATE_CODE();
+    if (g_hash_table_contains(visited, c)) {
         return true;
     }
-    *ignore = g_slist_prepend(*ignore, c);
-    return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp);
+    g_hash_table_add(visited, c);
+    return bdrv_change_aio_context(c->bs, ctx, visited, tran, errp);
+}
+
+static void bdrv_set_aio_context_clean(void *opaque)
+{
+    BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque;
+    BlockDriverState *bs = (BlockDriverState *) state->bs;
+
+    /* Paired with bdrv_drained_begin in bdrv_change_aio_context() */
+    bdrv_drained_end(bs);
+
+    g_free(state);
+}
+
+static void bdrv_set_aio_context_commit(void *opaque)
+{
+    BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque;
+    BlockDriverState *bs = (BlockDriverState *) state->bs;
+    AioContext *new_context = state->new_ctx;
+    AioContext *old_context = bdrv_get_aio_context(bs);
+
+    /*
+     * Take the old AioContex when detaching it from bs.
+     * At this point, new_context lock is already acquired, and we are now
+     * also taking old_context. This is safe as long as bdrv_detach_aio_context
+     * does not call AIO_POLL_WHILE().
+     */
+    if (old_context != qemu_get_aio_context()) {
+        aio_context_acquire(old_context);
+    }
+    bdrv_detach_aio_context(bs);
+    if (old_context != qemu_get_aio_context()) {
+        aio_context_release(old_context);
+    }
+    bdrv_attach_aio_context(bs, new_context);
 }
 
-/* @ignore will accumulate all visited BdrvChild object. The caller is
- * responsible for freeing the list afterwards. */
-bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
-                              GSList **ignore, Error **errp)
+static TransactionActionDrv set_aio_context = {
+    .commit = bdrv_set_aio_context_commit,
+    .clean = bdrv_set_aio_context_clean,
+};
+
+/*
+ * Changes the AioContext used for fd handlers, timers, and BHs by this
+ * BlockDriverState and all its children and parents.
+ *
+ * Must be called from the main AioContext.
+ *
+ * The caller must own the AioContext lock for the old AioContext of bs, but it
+ * must not own the AioContext lock for new_context (unless new_context is the
+ * same as the current context of bs).
+ *
+ * @visited will accumulate all visited BdrvChild objects. The caller is
+ * responsible for freeing the list afterwards.
+ */
+static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
+                                    GHashTable *visited, Transaction *tran,
+                                    Error **errp)
 {
     BdrvChild *c;
+    BdrvStateSetAioContext *state;
+
+    GLOBAL_STATE_CODE();
 
     if (bdrv_get_aio_context(bs) == ctx) {
         return true;
     }
 
+    bdrv_graph_rdlock_main_loop();
     QLIST_FOREACH(c, &bs->parents, next_parent) {
-        if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) {
+        if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) {
+            bdrv_graph_rdunlock_main_loop();
             return false;
         }
     }
+
     QLIST_FOREACH(c, &bs->children, next) {
-        if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) {
+        if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) {
+            bdrv_graph_rdunlock_main_loop();
             return false;
         }
     }
+    bdrv_graph_rdunlock_main_loop();
+
+    state = g_new(BdrvStateSetAioContext, 1);
+    *state = (BdrvStateSetAioContext) {
+        .new_ctx = ctx,
+        .bs = bs,
+    };
+
+    /* Paired with bdrv_drained_end in bdrv_set_aio_context_clean() */
+    bdrv_drained_begin(bs);
+
+    tran_add(tran, &set_aio_context, state);
 
     return true;
 }
 
-int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
-                                   BdrvChild *ignore_child, Error **errp)
+/*
+ * Change bs's and recursively all of its parents' and children's AioContext
+ * to the given new context, returning an error if that isn't possible.
+ *
+ * If ignore_child is not NULL, that child (and its subgraph) will not
+ * be touched.
+ *
+ * This function still requires the caller to take the bs current
+ * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE
+ * assumes the lock is always held if bs is in another AioContext.
+ * For the same reason, it temporarily also holds the new AioContext, since
+ * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too.
+ * Therefore the new AioContext lock must not be taken by the caller.
+ */
+int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
+                                BdrvChild *ignore_child, Error **errp)
 {
-    GSList *ignore;
-    bool ret;
+    Transaction *tran;
+    GHashTable *visited;
+    int ret;
+    AioContext *old_context = bdrv_get_aio_context(bs);
+    GLOBAL_STATE_CODE();
+
+    /*
+     * Recursion phase: go through all nodes of the graph.
+     * Take care of checking that all nodes support changing AioContext
+     * and drain them, building a linear list of callbacks to run if everything
+     * is successful (the transaction itself).
+     */
+    tran = tran_new();
+    visited = g_hash_table_new(NULL, NULL);
+    if (ignore_child) {
+        g_hash_table_add(visited, ignore_child);
+    }
+    ret = bdrv_change_aio_context(bs, ctx, visited, tran, errp);
+    g_hash_table_destroy(visited);
 
-    ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
-    ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp);
-    g_slist_free(ignore);
+    /*
+     * Linear phase: go through all callbacks collected in the transaction.
+     * Run all callbacks collected in the recursion to switch all nodes
+     * AioContext lock (transaction commit), or undo all changes done in the
+     * recursion (transaction abort).
+     */
 
     if (!ret) {
+        /* Just run clean() callbacks. No AioContext changed. */
+        tran_abort(tran);
         return -EPERM;
     }
 
-    ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
-    bdrv_set_aio_context_ignore(bs, ctx, &ignore);
-    g_slist_free(ignore);
+    /*
+     * Release old AioContext, it won't be needed anymore, as all
+     * bdrv_drained_begin() have been called already.
+     */
+    if (qemu_get_aio_context() != old_context) {
+        aio_context_release(old_context);
+    }
+
+    /*
+     * Acquire new AioContext since bdrv_drained_end() is going to be called
+     * after we switched all nodes in the new AioContext, and the function
+     * assumes that the lock of the bs is always taken.
+     */
+    if (qemu_get_aio_context() != ctx) {
+        aio_context_acquire(ctx);
+    }
 
-    return 0;
-}
+    tran_commit(tran);
 
-int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
-                             Error **errp)
-{
-    return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp);
+    if (qemu_get_aio_context() != ctx) {
+        aio_context_release(ctx);
+    }
+
+    /* Re-acquire the old AioContext, since the caller takes and releases it. */
+    if (qemu_get_aio_context() != old_context) {
+        aio_context_acquire(old_context);
+    }
+
+    return 0;
 }
 
 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
@@ -6606,6 +7898,7 @@ void bdrv_add_aio_context_notifier(BlockDriverState *bs,
         .detach_aio_context   = detach_aio_context,
         .opaque               = opaque
     };
+    GLOBAL_STATE_CODE();
 
     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
 }
@@ -6617,6 +7910,7 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
                                       void *opaque)
 {
     BdrvAioNotifier *ban, *ban_next;
+    GLOBAL_STATE_CODE();
 
     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
         if (ban->attached_aio_context == attached_aio_context &&
@@ -6641,6 +7935,7 @@ int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
                        bool force,
                        Error **errp)
 {
+    GLOBAL_STATE_CODE();
     if (!bs->drv) {
         error_setg(errp, "Node is ejected");
         return -ENOMEDIUM;
@@ -6671,6 +7966,8 @@ bool bdrv_recurse_can_replace(BlockDriverState *bs,
 {
     BlockDriverState *filtered;
 
+    GLOBAL_STATE_CODE();
+
     if (!bs || !bs->drv) {
         return false;
     }
@@ -6711,8 +8008,10 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
     AioContext *aio_context;
 
+    GLOBAL_STATE_CODE();
+
     if (!to_replace_bs) {
-        error_setg(errp, "Node name '%s' not found", node_name);
+        error_setg(errp, "Failed to find node with node-name='%s'", node_name);
         return NULL;
     }
 
@@ -6838,8 +8137,9 @@ static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
 /* Note: This function may return false positives; it may return true
  * even if opening the backing file specified by bs's image header
  * would result in exactly bs->backing. */
-bool bdrv_backing_overridden(BlockDriverState *bs)
+static bool GRAPH_RDLOCK bdrv_backing_overridden(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
     if (bs->backing) {
         return strcmp(bs->auto_backing_file,
                       bs->backing->bs->filename);
@@ -6872,6 +8172,8 @@ void bdrv_refresh_filename(BlockDriverState *bs)
     bool generate_json_filename; /* Whether our default implementation should
                                     fill exact_filename (false) or not (true) */
 
+    GLOBAL_STATE_CODE();
+
     if (!drv) {
         return;
     }
@@ -6979,13 +8281,13 @@ void bdrv_refresh_filename(BlockDriverState *bs)
     if (bs->exact_filename[0]) {
         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
     } else {
-        QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
+        GString *json = qobject_to_json(QOBJECT(bs->full_open_options));
         if (snprintf(bs->filename, sizeof(bs->filename), "json:%s",
-                     qstring_get_str(json)) >= sizeof(bs->filename)) {
+                     json->str) >= sizeof(bs->filename)) {
             /* Give user a hint if we truncated things. */
             strcpy(bs->filename + sizeof(bs->filename) - 4, "...");
         }
-        qobject_unref(json);
+        g_string_free(json, true);
     }
 }
 
@@ -6994,6 +8296,8 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp)
     BlockDriver *drv = bs->drv;
     BlockDriverState *child_bs;
 
+    GLOBAL_STATE_CODE();
+
     if (!drv) {
         error_setg(errp, "Node '%s' is ejected", bs->node_name);
         return NULL;
@@ -7025,13 +8329,32 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp)
 void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
                     Error **errp)
 {
-
+    GLOBAL_STATE_CODE();
     if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
         error_setg(errp, "The node %s does not support adding a child",
                    bdrv_get_device_or_node_name(parent_bs));
         return;
     }
 
+    /*
+     * Non-zoned block drivers do not follow zoned storage constraints
+     * (i.e. sequential writes to zones). Refuse mixing zoned and non-zoned
+     * drivers in a graph.
+     */
+    if (!parent_bs->drv->supports_zoned_children &&
+        child_bs->bl.zoned == BLK_Z_HM) {
+        /*
+         * The host-aware model allows zoned storage constraints and random
+         * write. Allow mixing host-aware and non-zoned drivers. Using
+         * host-aware device as a regular device.
+         */
+        error_setg(errp, "Cannot add a %s child to a %s parent",
+                   child_bs->bl.zoned == BLK_Z_HM ? "zoned" : "non-zoned",
+                   parent_bs->drv->supports_zoned_children ?
+                   "support zoned children" : "not support zoned children");
+        return;
+    }
+
     if (!QLIST_EMPTY(&child_bs->parents)) {
         error_setg(errp, "The node %s already has a parent",
                    child_bs->node_name);
@@ -7045,6 +8368,7 @@ void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
 {
     BdrvChild *tmp;
 
+    GLOBAL_STATE_CODE();
     if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
         error_setg(errp, "The node %s does not support removing a child",
                    bdrv_get_device_or_node_name(parent_bs));
@@ -7072,6 +8396,7 @@ int bdrv_make_empty(BdrvChild *c, Error **errp)
     BlockDriver *drv = c->bs->drv;
     int ret;
 
+    GLOBAL_STATE_CODE();
     assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED));
 
     if (!drv->bdrv_make_empty) {
@@ -7096,6 +8421,8 @@ int bdrv_make_empty(BdrvChild *c, Error **errp)
  */
 BdrvChild *bdrv_cow_child(BlockDriverState *bs)
 {
+    IO_CODE();
+
     if (!bs || !bs->drv) {
         return NULL;
     }
@@ -7119,6 +8446,7 @@ BdrvChild *bdrv_cow_child(BlockDriverState *bs)
 BdrvChild *bdrv_filter_child(BlockDriverState *bs)
 {
     BdrvChild *c;
+    IO_CODE();
 
     if (!bs || !bs->drv) {
         return NULL;
@@ -7150,6 +8478,7 @@ BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
 {
     BdrvChild *cow_child = bdrv_cow_child(bs);
     BdrvChild *filter_child = bdrv_filter_child(bs);
+    IO_CODE();
 
     /* Filter nodes cannot have COW backing files */
     assert(!(cow_child && filter_child));
@@ -7170,6 +8499,7 @@ BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
 BdrvChild *bdrv_primary_child(BlockDriverState *bs)
 {
     BdrvChild *c, *found = NULL;
+    IO_CODE();
 
     QLIST_FOREACH(c, &bs->children, next) {
         if (c->role & BDRV_CHILD_PRIMARY) {
@@ -7181,8 +8511,8 @@ BdrvChild *bdrv_primary_child(BlockDriverState *bs)
     return found;
 }
 
-static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs,
-                                              bool stop_on_explicit_filter)
+static BlockDriverState * GRAPH_RDLOCK
+bdrv_do_skip_filters(BlockDriverState *bs, bool stop_on_explicit_filter)
 {
     BdrvChild *c;
 
@@ -7222,6 +8552,7 @@ static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs,
  */
 BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
     return bdrv_do_skip_filters(bs, true);
 }
 
@@ -7231,6 +8562,7 @@ BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
  */
 BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
 {
+    IO_CODE();
     return bdrv_do_skip_filters(bs, false);
 }
 
@@ -7240,5 +8572,81 @@ BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
  */
 BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
 {
+    IO_CODE();
     return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
 }
+
+/**
+ * Check whether [offset, offset + bytes) overlaps with the cached
+ * block-status data region.
+ *
+ * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`,
+ * which is what bdrv_bsc_is_data()'s interface needs.
+ * Otherwise, *pnum is not touched.
+ */
+static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
+                                           int64_t offset, int64_t bytes,
+                                           int64_t *pnum)
+{
+    BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache);
+    bool overlaps;
+
+    overlaps =
+        qatomic_read(&bsc->valid) &&
+        ranges_overlap(offset, bytes, bsc->data_start,
+                       bsc->data_end - bsc->data_start);
+
+    if (overlaps && pnum) {
+        *pnum = bsc->data_end - offset;
+    }
+
+    return overlaps;
+}
+
+/**
+ * See block_int.h for this function's documentation.
+ */
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
+{
+    IO_CODE();
+    RCU_READ_LOCK_GUARD();
+    return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
+}
+
+/**
+ * See block_int.h for this function's documentation.
+ */
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
+                               int64_t offset, int64_t bytes)
+{
+    IO_CODE();
+    RCU_READ_LOCK_GUARD();
+
+    if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
+        qatomic_set(&bs->block_status_cache->valid, false);
+    }
+}
+
+/**
+ * See block_int.h for this function's documentation.
+ */
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
+    BdrvBlockStatusCache *old_bsc;
+    IO_CODE();
+
+    *new_bsc = (BdrvBlockStatusCache) {
+        .valid = true,
+        .data_start = offset,
+        .data_end = offset + bytes,
+    };
+
+    QEMU_LOCK_GUARD(&bs->bsc_modify_lock);
+
+    old_bsc = qatomic_rcu_read(&bs->block_status_cache);
+    qatomic_rcu_set(&bs->block_status_cache, new_bsc);
+    if (old_bsc) {
+        g_free_rcu(old_bsc, rcu);
+    }
+}
index 8d41c8a83ac5d9c4eb65a1b6a760e51bae3601aa..2829745377a5743d7c7a4fd0af5a5e402e91c041 100644 (file)
@@ -38,13 +38,31 @@ void block_acct_init(BlockAcctStats *stats)
     if (qtest_enabled()) {
         clock_type = QEMU_CLOCK_VIRTUAL;
     }
+    stats->account_invalid = true;
+    stats->account_failed = true;
 }
 
-void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
-                      bool account_failed)
+static bool bool_from_onoffauto(OnOffAuto val, bool def)
 {
-    stats->account_invalid = account_invalid;
-    stats->account_failed = account_failed;
+    switch (val) {
+    case ON_OFF_AUTO_AUTO:
+        return def;
+    case ON_OFF_AUTO_ON:
+        return true;
+    case ON_OFF_AUTO_OFF:
+        return false;
+    default:
+        abort();
+    }
+}
+
+void block_acct_setup(BlockAcctStats *stats, enum OnOffAuto account_invalid,
+                      enum OnOffAuto account_failed)
+{
+    stats->account_invalid = bool_from_onoffauto(account_invalid,
+                                                 stats->account_invalid);
+    stats->account_failed = bool_from_onoffauto(account_failed,
+                                                stats->account_failed);
 }
 
 void block_acct_cleanup(BlockAcctStats *stats)
@@ -199,29 +217,27 @@ static void block_account_one_io(BlockAcctStats *stats, BlockAcctCookie *cookie,
         return;
     }
 
-    qemu_mutex_lock(&stats->lock);
-
-    if (failed) {
-        stats->failed_ops[cookie->type]++;
-    } else {
-        stats->nr_bytes[cookie->type] += cookie->bytes;
-        stats->nr_ops[cookie->type]++;
-    }
+    WITH_QEMU_LOCK_GUARD(&stats->lock) {
+        if (failed) {
+            stats->failed_ops[cookie->type]++;
+        } else {
+            stats->nr_bytes[cookie->type] += cookie->bytes;
+            stats->nr_ops[cookie->type]++;
+        }
 
-    block_latency_histogram_account(&stats->latency_histogram[cookie->type],
-                                    latency_ns);
+        block_latency_histogram_account(&stats->latency_histogram[cookie->type],
+                                        latency_ns);
 
-    if (!failed || stats->account_failed) {
-        stats->total_time_ns[cookie->type] += latency_ns;
-        stats->last_access_time_ns = time_ns;
+        if (!failed || stats->account_failed) {
+            stats->total_time_ns[cookie->type] += latency_ns;
+            stats->last_access_time_ns = time_ns;
 
-        QSLIST_FOREACH(s, &stats->intervals, entries) {
-            timed_average_account(&s->latency[cookie->type], latency_ns);
+            QSLIST_FOREACH(s, &stats->intervals, entries) {
+                timed_average_account(&s->latency[cookie->type], latency_ns);
+            }
         }
     }
 
-    qemu_mutex_unlock(&stats->lock);
-
     cookie->type = BLOCK_ACCT_NONE;
 }
 
index 88989fa248cef929dd6e3b36cb43a7d661330daa..9bd17ea2c13d2e6763c5afc538021a4ea3e3330f 100644 (file)
@@ -98,6 +98,8 @@ AioTaskPool *coroutine_fn aio_task_pool_new(int max_busy_tasks)
 {
     AioTaskPool *pool = g_new0(AioTaskPool, 1);
 
+    assert(max_busy_tasks > 0);
+
     pool->main_co = qemu_coroutine_self();
     pool->max_busy_tasks = max_busy_tasks;
 
index 392df9ef83f5dd72cb67bfe73e1208b5928934e1..53a410247cfcb64296764d0b2eae476d4ee95799 100644 (file)
@@ -26,6 +26,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qemu/job.h"
 #include "qemu/main-loop.h"
@@ -45,6 +46,7 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp)
 {
     BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common);
     int ret;
+    GRAPH_RDLOCK_GUARD();
 
     job_progress_set_remaining(&s->common, 1);
     ret = s->bs->drv->bdrv_co_amend(s->bs, s->opts, s->force, errp);
@@ -53,10 +55,34 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp)
     return ret;
 }
 
+static int GRAPH_RDLOCK
+blockdev_amend_pre_run(BlockdevAmendJob *s, Error **errp)
+{
+    if (s->bs->drv->bdrv_amend_pre_run) {
+        return s->bs->drv->bdrv_amend_pre_run(s->bs, errp);
+    }
+
+    return 0;
+}
+
+static void blockdev_amend_free(Job *job)
+{
+    BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common);
+
+    bdrv_graph_rdlock_main_loop();
+    if (s->bs->drv->bdrv_amend_clean) {
+        s->bs->drv->bdrv_amend_clean(s->bs);
+    }
+    bdrv_graph_rdunlock_main_loop();
+
+    bdrv_unref(s->bs);
+}
+
 static const JobDriver blockdev_amend_job_driver = {
     .instance_size = sizeof(BlockdevAmendJob),
     .job_type      = JOB_TYPE_AMEND,
     .run           = blockdev_amend_run,
+    .free          = blockdev_amend_free,
 };
 
 void qmp_x_blockdev_amend(const char *job_id,
@@ -71,6 +97,8 @@ void qmp_x_blockdev_amend(const char *job_id,
     BlockDriver *drv = bdrv_find_format(fmt);
     BlockDriverState *bs;
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     bs = bdrv_lookup_bs(NULL, node_name, errp);
     if (!bs) {
         return;
@@ -110,8 +138,15 @@ void qmp_x_blockdev_amend(const char *job_id,
         return;
     }
 
+    bdrv_ref(bs);
     s->bs = bs,
     s->opts = QAPI_CLONE(BlockdevAmendOptions, options),
     s->force = has_force ? force : false;
+
+    if (blockdev_amend_pre_run(s, errp)) {
+        job_early_fail(&s->common);
+        return;
+    }
+
     job_start(&s->common);
 }
diff --git a/block/backup-top.c b/block/backup-top.c
deleted file mode 100644 (file)
index fe6883c..0000000
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * backup-top filter driver
- *
- * The driver performs Copy-Before-Write (CBW) operation: it is injected above
- * some node, and before each write it copies _old_ data to the target node.
- *
- * Copyright (c) 2018-2019 Virtuozzo International GmbH.
- *
- * Author:
- *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-
-#include "sysemu/block-backend.h"
-#include "qemu/cutils.h"
-#include "qapi/error.h"
-#include "block/block_int.h"
-#include "block/qdict.h"
-#include "block/block-copy.h"
-
-#include "block/backup-top.h"
-
-typedef struct BDRVBackupTopState {
-    BlockCopyState *bcs;
-    BdrvChild *target;
-    bool active;
-    int64_t cluster_size;
-} BDRVBackupTopState;
-
-static coroutine_fn int backup_top_co_preadv(
-        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-        QEMUIOVector *qiov, int flags)
-{
-    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
-}
-
-static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, BdrvRequestFlags flags)
-{
-    BDRVBackupTopState *s = bs->opaque;
-    uint64_t off, end;
-
-    if (flags & BDRV_REQ_WRITE_UNCHANGED) {
-        return 0;
-    }
-
-    off = QEMU_ALIGN_DOWN(offset, s->cluster_size);
-    end = QEMU_ALIGN_UP(offset + bytes, s->cluster_size);
-
-    return block_copy(s->bcs, off, end - off, NULL);
-}
-
-static int coroutine_fn backup_top_co_pdiscard(BlockDriverState *bs,
-                                               int64_t offset, int bytes)
-{
-    int ret = backup_top_cbw(bs, offset, bytes, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_pdiscard(bs->backing, offset, bytes);
-}
-
-static int coroutine_fn backup_top_co_pwrite_zeroes(BlockDriverState *bs,
-        int64_t offset, int bytes, BdrvRequestFlags flags)
-{
-    int ret = backup_top_cbw(bs, offset, bytes, flags);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
-}
-
-static coroutine_fn int backup_top_co_pwritev(BlockDriverState *bs,
-                                              uint64_t offset,
-                                              uint64_t bytes,
-                                              QEMUIOVector *qiov, int flags)
-{
-    int ret = backup_top_cbw(bs, offset, bytes, flags);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
-}
-
-static int coroutine_fn backup_top_co_flush(BlockDriverState *bs)
-{
-    if (!bs->backing) {
-        return 0;
-    }
-
-    return bdrv_co_flush(bs->backing->bs);
-}
-
-static void backup_top_refresh_filename(BlockDriverState *bs)
-{
-    if (bs->backing == NULL) {
-        /*
-         * we can be here after failed bdrv_attach_child in
-         * bdrv_set_backing_hd
-         */
-        return;
-    }
-    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
-            bs->backing->bs->filename);
-}
-
-static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c,
-                                  BdrvChildRole role,
-                                  BlockReopenQueue *reopen_queue,
-                                  uint64_t perm, uint64_t shared,
-                                  uint64_t *nperm, uint64_t *nshared)
-{
-    BDRVBackupTopState *s = bs->opaque;
-
-    if (!s->active) {
-        /*
-         * The filter node may be in process of bdrv_append(), which firstly do
-         * bdrv_set_backing_hd() and then bdrv_replace_node(). This means that
-         * we can't unshare BLK_PERM_WRITE during bdrv_append() operation. So,
-         * let's require nothing during bdrv_append() and refresh permissions
-         * after it (see bdrv_backup_top_append()).
-         */
-        *nperm = 0;
-        *nshared = BLK_PERM_ALL;
-        return;
-    }
-
-    if (!(role & BDRV_CHILD_FILTERED)) {
-        /*
-         * Target child
-         *
-         * Share write to target (child_file), to not interfere
-         * with guest writes to its disk which may be in target backing chain.
-         * Can't resize during a backup block job because we check the size
-         * only upfront.
-         */
-        *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
-        *nperm = BLK_PERM_WRITE;
-    } else {
-        /* Source child */
-        bdrv_default_perms(bs, c, role, reopen_queue,
-                           perm, shared, nperm, nshared);
-
-        if (perm & BLK_PERM_WRITE) {
-            *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
-        }
-        *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
-    }
-}
-
-BlockDriver bdrv_backup_top_filter = {
-    .format_name = "backup-top",
-    .instance_size = sizeof(BDRVBackupTopState),
-
-    .bdrv_co_preadv             = backup_top_co_preadv,
-    .bdrv_co_pwritev            = backup_top_co_pwritev,
-    .bdrv_co_pwrite_zeroes      = backup_top_co_pwrite_zeroes,
-    .bdrv_co_pdiscard           = backup_top_co_pdiscard,
-    .bdrv_co_flush              = backup_top_co_flush,
-
-    .bdrv_refresh_filename      = backup_top_refresh_filename,
-
-    .bdrv_child_perm            = backup_top_child_perm,
-
-    .is_filter = true,
-};
-
-BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
-                                         BlockDriverState *target,
-                                         const char *filter_node_name,
-                                         uint64_t cluster_size,
-                                         BdrvRequestFlags write_flags,
-                                         BlockCopyState **bcs,
-                                         Error **errp)
-{
-    Error *local_err = NULL;
-    BDRVBackupTopState *state;
-    BlockDriverState *top;
-    bool appended = false;
-
-    assert(source->total_sectors == target->total_sectors);
-
-    top = bdrv_new_open_driver(&bdrv_backup_top_filter, filter_node_name,
-                               BDRV_O_RDWR, errp);
-    if (!top) {
-        return NULL;
-    }
-
-    state = top->opaque;
-    top->total_sectors = source->total_sectors;
-    top->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
-            (BDRV_REQ_FUA & source->supported_write_flags);
-    top->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
-            ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
-             source->supported_zero_flags);
-
-    bdrv_ref(target);
-    state->target = bdrv_attach_child(top, target, "target", &child_of_bds,
-                                      BDRV_CHILD_DATA, errp);
-    if (!state->target) {
-        bdrv_unref(target);
-        bdrv_unref(top);
-        return NULL;
-    }
-
-    bdrv_drained_begin(source);
-
-    bdrv_ref(top);
-    bdrv_append(top, source, &local_err);
-    if (local_err) {
-        error_prepend(&local_err, "Cannot append backup-top filter: ");
-        goto fail;
-    }
-    appended = true;
-
-    /*
-     * bdrv_append() finished successfully, now we can require permissions
-     * we want.
-     */
-    state->active = true;
-    bdrv_child_refresh_perms(top, top->backing, &local_err);
-    if (local_err) {
-        error_prepend(&local_err,
-                      "Cannot set permissions for backup-top filter: ");
-        goto fail;
-    }
-
-    state->cluster_size = cluster_size;
-    state->bcs = block_copy_state_new(top->backing, state->target,
-                                      cluster_size, write_flags, &local_err);
-    if (local_err) {
-        error_prepend(&local_err, "Cannot create block-copy-state: ");
-        goto fail;
-    }
-    *bcs = state->bcs;
-
-    bdrv_drained_end(source);
-
-    return top;
-
-fail:
-    if (appended) {
-        state->active = false;
-        bdrv_backup_top_drop(top);
-    } else {
-        bdrv_unref(top);
-    }
-
-    bdrv_drained_end(source);
-    error_propagate(errp, local_err);
-
-    return NULL;
-}
-
-void bdrv_backup_top_drop(BlockDriverState *bs)
-{
-    BDRVBackupTopState *s = bs->opaque;
-
-    bdrv_drained_begin(bs);
-
-    block_copy_state_free(s->bcs);
-
-    s->active = false;
-    bdrv_child_refresh_perms(bs, bs->backing, &error_abort);
-    bdrv_replace_node(bs, bs->backing->bs, &error_abort);
-    bdrv_set_backing_hd(bs, NULL, &error_abort);
-
-    bdrv_drained_end(bs);
-
-    bdrv_unref(bs);
-}
diff --git a/block/backup-top.h b/block/backup-top.h
deleted file mode 100644 (file)
index e5cabfa..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * backup-top filter driver
- *
- * The driver performs Copy-Before-Write (CBW) operation: it is injected above
- * some node, and before each write it copies _old_ data to the target node.
- *
- * Copyright (c) 2018-2019 Virtuozzo International GmbH.
- *
- * Author:
- *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef BACKUP_TOP_H
-#define BACKUP_TOP_H
-
-#include "block/block_int.h"
-#include "block/block-copy.h"
-
-BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
-                                         BlockDriverState *target,
-                                         const char *filter_node_name,
-                                         uint64_t cluster_size,
-                                         BdrvRequestFlags write_flags,
-                                         BlockCopyState **bcs,
-                                         Error **errp);
-void bdrv_backup_top_drop(BlockDriverState *bs);
-
-#endif /* BACKUP_TOP_H */
index 9afa0bf3b41229536a5b9e67df8d2965c734406f..8aae5836d760a2218514d3d805019165f9868743 100644 (file)
 #include "block/blockjob_int.h"
 #include "block/block_backup.h"
 #include "block/block-copy.h"
+#include "block/dirty-bitmap.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
-#include "qemu/ratelimit.h"
 #include "qemu/cutils.h"
 #include "sysemu/block-backend.h"
 #include "qemu/bitmap.h"
 #include "qemu/error-report.h"
 
-#include "block/backup-top.h"
-
-#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
+#include "block/copy-before-write.h"
 
 typedef struct BackupBlockJob {
     BlockJob common;
-    BlockDriverState *backup_top;
+    BlockDriverState *cbw;
     BlockDriverState *source_bs;
+    BlockDriverState *target_bs;
 
     BdrvDirtyBitmap *sync_bitmap;
 
@@ -44,40 +42,17 @@ typedef struct BackupBlockJob {
     BlockdevOnError on_source_error;
     BlockdevOnError on_target_error;
     uint64_t len;
-    uint64_t bytes_read;
     int64_t cluster_size;
+    BackupPerf perf;
 
     BlockCopyState *bcs;
+
+    bool wait;
+    BlockCopyCallState *bg_bcs_call;
 } BackupBlockJob;
 
 static const BlockJobDriver backup_job_driver;
 
-static void backup_progress_bytes_callback(int64_t bytes, void *opaque)
-{
-    BackupBlockJob *s = opaque;
-
-    s->bytes_read += bytes;
-}
-
-static int coroutine_fn backup_do_cow(BackupBlockJob *job,
-                                      int64_t offset, uint64_t bytes,
-                                      bool *error_is_read)
-{
-    int ret = 0;
-    int64_t start, end; /* bytes */
-
-    start = QEMU_ALIGN_DOWN(offset, job->cluster_size);
-    end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size);
-
-    trace_backup_do_cow_enter(job, start, offset, bytes);
-
-    ret = block_copy(job->bcs, start, end - start, error_is_read);
-
-    trace_backup_do_cow_return(job, offset, bytes, ret);
-
-    return ret;
-}
-
 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
 {
     BdrvDirtyBitmap *bm;
@@ -126,7 +101,8 @@ static void backup_abort(Job *job)
 static void backup_clean(Job *job)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
-    bdrv_backup_top_drop(s->backup_top);
+    block_job_remove_all_bdrv(&s->common);
+    bdrv_cbw_drop(s->cbw);
 }
 
 void backup_do_checkpoint(BlockJob *job, Error **errp)
@@ -157,75 +133,114 @@ static BlockErrorAction backup_error_action(BackupBlockJob *job,
     }
 }
 
-static bool coroutine_fn yield_and_check(BackupBlockJob *job)
+static void coroutine_fn backup_block_copy_callback(void *opaque)
 {
-    uint64_t delay_ns;
-
-    if (job_is_cancelled(&job->common.job)) {
-        return true;
-    }
-
-    /*
-     * We need to yield even for delay_ns = 0 so that bdrv_drain_all() can
-     * return. Without a yield, the VM would not reboot.
-     */
-    delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read);
-    job->bytes_read = 0;
-    job_sleep_ns(&job->common.job, delay_ns);
+    BackupBlockJob *s = opaque;
 
-    if (job_is_cancelled(&job->common.job)) {
-        return true;
+    if (s->wait) {
+        s->wait = false;
+        aio_co_wake(s->common.job.co);
+    } else {
+        job_enter(&s->common.job);
     }
-
-    return false;
 }
 
 static int coroutine_fn backup_loop(BackupBlockJob *job)
 {
-    bool error_is_read;
-    int64_t offset;
-    BdrvDirtyBitmapIter *bdbi;
+    BlockCopyCallState *s = NULL;
     int ret = 0;
+    bool error_is_read;
+    BlockErrorAction act;
+
+    while (true) { /* retry loop */
+        job->bg_bcs_call = s = block_copy_async(job->bcs, 0,
+                QEMU_ALIGN_UP(job->len, job->cluster_size),
+                job->perf.max_workers, job->perf.max_chunk,
+                backup_block_copy_callback, job);
+
+        while (!block_copy_call_finished(s) &&
+               !job_is_cancelled(&job->common.job))
+        {
+            job_yield(&job->common.job);
+        }
 
-    bdbi = bdrv_dirty_iter_new(block_copy_dirty_bitmap(job->bcs));
-    while ((offset = bdrv_dirty_iter_next(bdbi)) != -1) {
-        do {
-            if (yield_and_check(job)) {
-                goto out;
-            }
-            ret = backup_do_cow(job, offset, job->cluster_size, &error_is_read);
-            if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
-                           BLOCK_ERROR_ACTION_REPORT)
-            {
-                goto out;
-            }
-        } while (ret < 0);
+        if (!block_copy_call_finished(s)) {
+            assert(job_is_cancelled(&job->common.job));
+            /*
+             * Note that we can't use job_yield() here, as it doesn't work for
+             * cancelled job.
+             */
+            block_copy_call_cancel(s);
+            job->wait = true;
+            qemu_coroutine_yield();
+            assert(block_copy_call_finished(s));
+            ret = 0;
+            goto out;
+        }
+
+        if (job_is_cancelled(&job->common.job) ||
+            block_copy_call_succeeded(s))
+        {
+            ret = 0;
+            goto out;
+        }
+
+        if (block_copy_call_cancelled(s)) {
+            /*
+             * Job is not cancelled but only block-copy call. This is possible
+             * after job pause. Now the pause is finished, start new block-copy
+             * iteration.
+             */
+            block_copy_call_free(s);
+            continue;
+        }
+
+        /* The only remaining case is failed block-copy call. */
+        assert(block_copy_call_failed(s));
+
+        ret = block_copy_call_status(s, &error_is_read);
+        act = backup_error_action(job, error_is_read, -ret);
+        switch (act) {
+        case BLOCK_ERROR_ACTION_REPORT:
+            goto out;
+        case BLOCK_ERROR_ACTION_STOP:
+            /*
+             * Go to pause prior to starting new block-copy call on the next
+             * iteration.
+             */
+            job_pause_point(&job->common.job);
+            break;
+        case BLOCK_ERROR_ACTION_IGNORE:
+            /* Proceed to new block-copy call to retry. */
+            break;
+        default:
+            abort();
+        }
+
+        block_copy_call_free(s);
     }
 
- out:
-    bdrv_dirty_iter_free(bdbi);
+out:
+    block_copy_call_free(s);
+    job->bg_bcs_call = NULL;
     return ret;
 }
 
 static void backup_init_bcs_bitmap(BackupBlockJob *job)
 {
-    bool ret;
     uint64_t estimate;
     BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs);
 
     if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
-        ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap,
-                                               NULL, true);
-        assert(ret);
-    } else {
-        if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
-            /*
-             * We can't hog the coroutine to initialize this thoroughly.
-             * Set a flag and resume work when we are able to yield safely.
-             */
-            block_copy_set_skip_unallocated(job->bcs, true);
-        }
-        bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len);
+        bdrv_clear_dirty_bitmap(bcs_bitmap, NULL);
+        bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap, NULL,
+                                         true);
+    } else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
+        /*
+         * We can't hog the coroutine to initialize this thoroughly.
+         * Set a flag and resume work when we are able to yield safely.
+         */
+        block_copy_set_skip_unallocated(job->bcs, true);
     }
 
     estimate = bdrv_get_dirty_count(bcs_bitmap);
@@ -235,7 +250,7 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
 static int coroutine_fn backup_run(Job *job, Error **errp)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
-    int ret = 0;
+    int ret;
 
     backup_init_bcs_bitmap(s);
 
@@ -244,14 +259,22 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
         int64_t count;
 
         for (offset = 0; offset < s->len; ) {
-            if (yield_and_check(s)) {
-                ret = -ECANCELED;
-                goto out;
+            if (job_is_cancelled(job)) {
+                return -ECANCELED;
             }
 
+            job_pause_point(job);
+
+            if (job_is_cancelled(job)) {
+                return -ECANCELED;
+            }
+
+            /* rdlock protects the subsequent call to bdrv_is_allocated() */
+            bdrv_graph_co_rdlock();
             ret = block_copy_reset_unallocated(s->bcs, offset, &count);
+            bdrv_graph_co_rdunlock();
             if (ret < 0) {
-                goto out;
+                return ret;
             }
 
             offset += count;
@@ -272,11 +295,45 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
             job_yield(job);
         }
     } else {
-        ret = backup_loop(s);
+        return backup_loop(s);
     }
 
- out:
-    return ret;
+    return 0;
+}
+
+static void coroutine_fn backup_pause(Job *job)
+{
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
+
+    if (s->bg_bcs_call && !block_copy_call_finished(s->bg_bcs_call)) {
+        block_copy_call_cancel(s->bg_bcs_call);
+        s->wait = true;
+        qemu_coroutine_yield();
+    }
+}
+
+static void backup_set_speed(BlockJob *job, int64_t speed)
+{
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+
+    /*
+     * block_job_set_speed() is called first from block_job_create(), when we
+     * don't yet have s->bcs.
+     */
+    if (s->bcs) {
+        block_copy_set_speed(s->bcs, speed);
+        if (s->bg_bcs_call) {
+            block_copy_kick(s->bg_bcs_call);
+        }
+    }
+}
+
+static bool backup_cancel(Job *job, bool force)
+{
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
+
+    bdrv_cancel_in_flight(s->target_bs);
+    return true;
 }
 
 static const BlockJobDriver backup_job_driver = {
@@ -289,52 +346,19 @@ static const BlockJobDriver backup_job_driver = {
         .commit                 = backup_commit,
         .abort                  = backup_abort,
         .clean                  = backup_clean,
-    }
+        .pause                  = backup_pause,
+        .cancel                 = backup_cancel,
+    },
+    .set_speed = backup_set_speed,
 };
 
-static int64_t backup_calculate_cluster_size(BlockDriverState *target,
-                                             Error **errp)
-{
-    int ret;
-    BlockDriverInfo bdi;
-    bool target_does_cow = bdrv_backing_chain_next(target);
-
-    /*
-     * If there is no backing file on the target, we cannot rely on COW if our
-     * backup cluster size is smaller than the target cluster size. Even for
-     * targets with a backing file, try to avoid COW if possible.
-     */
-    ret = bdrv_get_info(target, &bdi);
-    if (ret == -ENOTSUP && !target_does_cow) {
-        /* Cluster size is not defined */
-        warn_report("The target block device doesn't provide "
-                    "information about the block size and it doesn't have a "
-                    "backing file. The default block size of %u bytes is "
-                    "used. If the actual block size of the target exceeds "
-                    "this default, the backup may be unusable",
-                    BACKUP_CLUSTER_SIZE_DEFAULT);
-        return BACKUP_CLUSTER_SIZE_DEFAULT;
-    } else if (ret < 0 && !target_does_cow) {
-        error_setg_errno(errp, -ret,
-            "Couldn't determine the cluster size of the target image, "
-            "which has no backing file");
-        error_append_hint(errp,
-            "Aborting, since this may create an unusable destination image\n");
-        return ret;
-    } else if (ret < 0 && target_does_cow) {
-        /* Not fatal; just trudge on ahead. */
-        return BACKUP_CLUSTER_SIZE_DEFAULT;
-    }
-
-    return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
-}
-
 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *target, int64_t speed,
                   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
                   BitmapSyncMode bitmap_mode,
                   bool compress,
                   const char *filter_node_name,
+                  BackupPerf *perf,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   int creation_flags,
@@ -344,12 +368,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     int64_t len, target_len;
     BackupBlockJob *job = NULL;
     int64_t cluster_size;
-    BdrvRequestFlags write_flags;
-    BlockDriverState *backup_top = NULL;
+    BlockDriverState *cbw = NULL;
     BlockCopyState *bcs = NULL;
 
     assert(bs);
     assert(target);
+    GLOBAL_STATE_CODE();
 
     /* QMP interface protects us from these cases */
     assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL);
@@ -360,29 +384,42 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
         return NULL;
     }
 
+    bdrv_graph_rdlock_main_loop();
     if (!bdrv_is_inserted(bs)) {
         error_setg(errp, "Device is not inserted: %s",
                    bdrv_get_device_name(bs));
-        return NULL;
+        goto error_rdlock;
     }
 
     if (!bdrv_is_inserted(target)) {
         error_setg(errp, "Device is not inserted: %s",
                    bdrv_get_device_name(target));
-        return NULL;
+        goto error_rdlock;
     }
 
     if (compress && !bdrv_supports_compressed_writes(target)) {
         error_setg(errp, "Compression is not supported for this drive %s",
                    bdrv_get_device_name(target));
-        return NULL;
+        goto error_rdlock;
     }
 
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
-        return NULL;
+        goto error_rdlock;
     }
 
     if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
+        goto error_rdlock;
+    }
+    bdrv_graph_rdunlock_main_loop();
+
+    if (perf->max_workers < 1 || perf->max_workers > INT_MAX) {
+        error_setg(errp, "max-workers must be between 1 and %d", INT_MAX);
+        return NULL;
+    }
+
+    if (perf->max_chunk < 0) {
+        error_setg(errp, "max-chunk must be zero (which means no limit) or "
+                   "positive");
         return NULL;
     }
 
@@ -401,6 +438,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
 
     len = bdrv_getlength(bs);
     if (len < 0) {
+        GRAPH_RDLOCK_GUARD_MAINLOOP();
         error_setg_errno(errp, -len, "Unable to get length for '%s'",
                          bdrv_get_device_or_node_name(bs));
         goto error;
@@ -408,6 +446,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
 
     target_len = bdrv_getlength(target);
     if (target_len < 0) {
+        GRAPH_RDLOCK_GUARD_MAINLOOP();
         error_setg_errno(errp, -target_len, "Unable to get length for '%s'",
                          bdrv_get_device_or_node_name(bs));
         goto error;
@@ -418,44 +457,30 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
         goto error;
     }
 
-    cluster_size = backup_calculate_cluster_size(target, errp);
-    if (cluster_size < 0) {
+    cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
+    if (!cbw) {
         goto error;
     }
 
-    /*
-     * If source is in backing chain of target assume that target is going to be
-     * used for "image fleecing", i.e. it should represent a kind of snapshot of
-     * source at backup-start point in time. And target is going to be read by
-     * somebody (for example, used as NBD export) during backup job.
-     *
-     * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
-     * intersection of backup writes and third party reads from target,
-     * otherwise reading from target we may occasionally read already updated by
-     * guest data.
-     *
-     * For more information see commit f8d59dfb40bb and test
-     * tests/qemu-iotests/222
-     */
-    write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) |
-                  (compress ? BDRV_REQ_WRITE_COMPRESSED : 0),
+    cluster_size = block_copy_cluster_size(bcs);
 
-    backup_top = bdrv_backup_top_append(bs, target, filter_node_name,
-                                        cluster_size, write_flags, &bcs, errp);
-    if (!backup_top) {
+    if (perf->max_chunk && perf->max_chunk < cluster_size) {
+        error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
+                   "cluster size (%" PRIi64 ")", perf->max_chunk, cluster_size);
         goto error;
     }
 
     /* job->len is fixed, so we can't allow resize */
-    job = block_job_create(job_id, &backup_job_driver, txn, backup_top,
+    job = block_job_create(job_id, &backup_job_driver, txn, cbw,
                            0, BLK_PERM_ALL,
                            speed, creation_flags, cb, opaque, errp);
     if (!job) {
         goto error;
     }
 
-    job->backup_top = backup_top;
+    job->cbw = cbw;
     job->source_bs = bs;
+    job->target_bs = target;
     job->on_source_error = on_source_error;
     job->on_target_error = on_target_error;
     job->sync_mode = sync_mode;
@@ -464,13 +489,17 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     job->bcs = bcs;
     job->cluster_size = cluster_size;
     job->len = len;
+    job->perf = *perf;
 
-    block_copy_set_progress_callback(bcs, backup_progress_bytes_callback, job);
+    block_copy_set_copy_opts(bcs, perf->use_copy_range, compress);
     block_copy_set_progress_meter(bcs, &job->common.job.progress);
+    block_copy_set_speed(bcs, speed);
 
-    /* Required permissions are already taken by backup-top target */
+    /* Required permissions are taken by copy-before-write filter target */
+    bdrv_graph_wrlock(target);
     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
                        &error_abort);
+    bdrv_graph_wrunlock(target);
 
     return &job->common;
 
@@ -478,9 +507,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     if (sync_bitmap) {
         bdrv_reclaim_dirty_bitmap(sync_bitmap, NULL);
     }
-    if (backup_top) {
-        bdrv_backup_top_drop(backup_top);
+    if (cbw) {
+        bdrv_cbw_drop(cbw);
     }
 
     return NULL;
+
+error_rdlock:
+    bdrv_graph_rdunlock_main_loop();
+    return NULL;
 }
index 5fe6172da98902056062cfb2fe490bd9841b5898..9da8c9eddc2fdc49a6bf431a1fe096ab3e40381f 100644 (file)
@@ -27,6 +27,7 @@
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 #include "qemu/config-file.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "block/qdict.h"
 #include "qemu/module.h"
 #include "qapi/qobject-input-visitor.h"
 #include "sysemu/qtest.h"
 
+/* All APIs are thread-safe */
+
 typedef struct BDRVBlkdebugState {
-    int state;
-    int new_state;
+    /* IN: initialized in blkdebug_open() and never changed */
     uint64_t align;
     uint64_t max_transfer;
     uint64_t opt_write_zero;
     uint64_t max_write_zero;
     uint64_t opt_discard;
     uint64_t max_discard;
-
+    char *config_file; /* For blkdebug_refresh_filename() */
+    /* initialized in blkdebug_parse_perms() */
     uint64_t take_child_perms;
     uint64_t unshare_child_perms;
 
-    /* For blkdebug_refresh_filename() */
-    char *config_file;
-
+    /* State. Protected by lock */
+    int state;
     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
     QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
+    QemuMutex lock;
 } BDRVBlkdebugState;
 
 typedef struct BlkdebugAIOCB {
@@ -65,8 +68,11 @@ typedef struct BlkdebugAIOCB {
 } BlkdebugAIOCB;
 
 typedef struct BlkdebugSuspendedReq {
+    /* IN: initialized in suspend_request() */
     Coroutine *co;
     char *tag;
+
+    /* List entry protected BDRVBlkdebugState's lock */
     QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;
 
@@ -74,9 +80,11 @@ enum {
     ACTION_INJECT_ERROR,
     ACTION_SET_STATE,
     ACTION_SUSPEND,
+    ACTION__MAX,
 };
 
 typedef struct BlkdebugRule {
+    /* IN: initialized in add_rule() or blkdebug_debug_breakpoint() */
     BlkdebugEvent event;
     int action;
     int state;
@@ -95,6 +103,8 @@ typedef struct BlkdebugRule {
             char *tag;
         } suspend;
     } options;
+
+    /* List entries protected BDRVBlkdebugState's lock */
     QLIST_ENTRY(BlkdebugRule) next;
     QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
 } BlkdebugRule;
@@ -244,11 +254,14 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
     };
 
     /* Add the rule */
+    qemu_mutex_lock(&s->lock);
     QLIST_INSERT_HEAD(&s->rules[event], rule, next);
+    qemu_mutex_unlock(&s->lock);
 
     return 0;
 }
 
+/* Called with lock held or from .bdrv_close */
 static void remove_rule(BlkdebugRule *rule)
 {
     switch (rule->action) {
@@ -279,16 +292,13 @@ static int read_config(BDRVBlkdebugState *s, const char *filename,
             return -errno;
         }
 
-        ret = qemu_config_parse(f, config_groups, filename);
+        ret = qemu_config_parse(f, config_groups, filename, errp);
         if (ret < 0) {
-            error_setg(errp, "Could not parse blkdebug config file");
             goto fail;
         }
     }
 
-    qemu_config_parse_qdict(options, config_groups, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (!qemu_config_parse_qdict(options, config_groups, errp)) {
         ret = -EINVAL;
         goto fail;
     }
@@ -465,10 +475,10 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
 {
     BDRVBlkdebugState *s = bs->opaque;
     QemuOpts *opts;
-    Error *local_err = NULL;
     int ret;
     uint64_t align;
 
+    qemu_mutex_init(&s->lock);
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
         ret = -EINVAL;
@@ -492,16 +502,14 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* Open the image file */
-    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
-                               bs, &child_of_bds,
-                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                               false, &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        error_propagate(errp, local_err);
+    ret = bdrv_open_file_child(qemu_opt_get(opts, "x-image"), options, "image",
+                               bs, errp);
+    if (ret < 0) {
         goto out;
     }
 
+    bdrv_graph_rdlock_main_loop();
+
     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
@@ -514,7 +522,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
     if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
         error_setg(errp, "Cannot meet constraints with align %" PRIu64,
                    s->align);
-        goto out;
+        goto out_rdlock;
     }
     align = MAX(s->align, bs->file->bs->bl.request_alignment);
 
@@ -524,7 +532,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
          !QEMU_IS_ALIGNED(s->max_transfer, align))) {
         error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
                    s->max_transfer);
-        goto out;
+        goto out_rdlock;
     }
 
     s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
@@ -533,7 +541,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
          !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
         error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
                    s->opt_write_zero);
-        goto out;
+        goto out_rdlock;
     }
 
     s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
@@ -543,7 +551,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
                           MAX(s->opt_write_zero, align)))) {
         error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
                    s->max_write_zero);
-        goto out;
+        goto out_rdlock;
     }
 
     s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
@@ -552,7 +560,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
          !QEMU_IS_ALIGNED(s->opt_discard, align))) {
         error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
                    s->opt_discard);
-        goto out;
+        goto out_rdlock;
     }
 
     s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
@@ -562,28 +570,32 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
                           MAX(s->opt_discard, align)))) {
         error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
                    s->max_discard);
-        goto out;
+        goto out_rdlock;
     }
 
     bdrv_debug_event(bs, BLKDBG_NONE);
 
     ret = 0;
+out_rdlock:
+    bdrv_graph_rdunlock_main_loop();
 out:
     if (ret < 0) {
+        qemu_mutex_destroy(&s->lock);
         g_free(s->config_file);
     }
     qemu_opts_del(opts);
     return ret;
 }
 
-static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                      BlkdebugIOType iotype)
+static int coroutine_fn rule_check(BlockDriverState *bs, uint64_t offset,
+                                   uint64_t bytes, BlkdebugIOType iotype)
 {
     BDRVBlkdebugState *s = bs->opaque;
     BlkdebugRule *rule = NULL;
     int error;
     bool immediately;
 
+    qemu_mutex_lock(&s->lock);
     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
         uint64_t inject_offset = rule->options.inject.offset;
 
@@ -597,6 +609,7 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     }
 
     if (!rule || !rule->options.inject.error) {
+        qemu_mutex_unlock(&s->lock);
         return 0;
     }
 
@@ -608,6 +621,7 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
         remove_rule(rule);
     }
 
+    qemu_mutex_unlock(&s->lock);
     if (!immediately) {
         aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
         qemu_coroutine_yield();
@@ -616,9 +630,9 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return -error;
 }
 
-static int coroutine_fn
-blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                   QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int err;
 
@@ -637,9 +651,9 @@ blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn
-blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                    QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int err;
 
@@ -658,7 +672,7 @@ blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
-static int blkdebug_co_flush(BlockDriverState *bs)
+static int GRAPH_RDLOCK coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
 {
     int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
 
@@ -669,9 +683,9 @@ static int blkdebug_co_flush(BlockDriverState *bs)
     return bdrv_co_flush(bs->file->bs);
 }
 
-static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkdebug_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     uint32_t align = MAX(bs->bl.request_alignment,
                          bs->bl.pwrite_zeroes_alignment);
@@ -702,8 +716,8 @@ static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int bytes)
+static int coroutine_fn GRAPH_RDLOCK
+blkdebug_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     uint32_t align = bs->bl.pdiscard_alignment;
     int err;
@@ -736,13 +750,10 @@ static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
-static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
-                                                 bool want_zero,
-                                                 int64_t offset,
-                                                 int64_t bytes,
-                                                 int64_t *pnum,
-                                                 int64_t *map,
-                                                 BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+blkdebug_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
+                         int64_t bytes, int64_t *pnum, int64_t *map,
+                         BlockDriverState **file)
 {
     int err;
 
@@ -773,78 +784,81 @@ static void blkdebug_close(BlockDriverState *bs)
     }
 
     g_free(s->config_file);
+    qemu_mutex_destroy(&s->lock);
 }
 
+/* Called with lock held.  */
 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
 {
     BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq r;
+    BlkdebugSuspendedReq *r;
 
-    r = (BlkdebugSuspendedReq) {
-        .co         = qemu_coroutine_self(),
-        .tag        = g_strdup(rule->options.suspend.tag),
-    };
+    r = g_new(BlkdebugSuspendedReq, 1);
+
+    r->co         = qemu_coroutine_self();
+    r->tag        = g_strdup(rule->options.suspend.tag);
 
     remove_rule(rule);
-    QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);
+    QLIST_INSERT_HEAD(&s->suspended_reqs, r, next);
 
     if (!qtest_enabled()) {
-        printf("blkdebug: Suspended request '%s'\n", r.tag);
+        printf("blkdebug: Suspended request '%s'\n", r->tag);
     }
-    qemu_coroutine_yield();
-    if (!qtest_enabled()) {
-        printf("blkdebug: Resuming request '%s'\n", r.tag);
-    }
-
-    QLIST_REMOVE(&r, next);
-    g_free(r.tag);
 }
 
-static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
-    bool injected)
+/* Called with lock held.  */
+static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
+                         int *action_count, int *new_state)
 {
     BDRVBlkdebugState *s = bs->opaque;
 
     /* Only process rules for the current state */
     if (rule->state && rule->state != s->state) {
-        return injected;
+        return;
     }
 
     /* Take the action */
+    action_count[rule->action]++;
     switch (rule->action) {
     case ACTION_INJECT_ERROR:
-        if (!injected) {
+        if (action_count[ACTION_INJECT_ERROR] == 1) {
             QSIMPLEQ_INIT(&s->active_rules);
-            injected = true;
         }
         QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
         break;
 
     case ACTION_SET_STATE:
-        s->new_state = rule->options.set_state.new_state;
+        *new_state = rule->options.set_state.new_state;
         break;
 
     case ACTION_SUSPEND:
         suspend_request(bs, rule);
         break;
     }
-    return injected;
 }
 
-static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
+static void coroutine_fn
+blkdebug_co_debug_event(BlockDriverState *bs, BlkdebugEvent event)
 {
     BDRVBlkdebugState *s = bs->opaque;
     struct BlkdebugRule *rule, *next;
-    bool injected;
+    int new_state;
+    int actions_count[ACTION__MAX] = { 0 };
 
     assert((int)event >= 0 && event < BLKDBG__MAX);
 
-    injected = false;
-    s->new_state = s->state;
-    QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
-        injected = process_rule(bs, rule, injected);
+    WITH_QEMU_LOCK_GUARD(&s->lock) {
+        new_state = s->state;
+        QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
+            process_rule(bs, rule, actions_count, &new_state);
+        }
+        s->state = new_state;
+    }
+
+    while (actions_count[ACTION_SUSPEND] > 0) {
+        qemu_coroutine_yield();
+        actions_count[ACTION_SUSPEND]--;
     }
-    s->state = s->new_state;
 }
 
 static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
@@ -867,33 +881,64 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
         .options.suspend.tag = g_strdup(tag),
     };
 
+    qemu_mutex_lock(&s->lock);
     QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
+    qemu_mutex_unlock(&s->lock);
 
     return 0;
 }
 
-static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
+/* Called with lock held. May temporarily release lock. */
+static int resume_req_by_tag(BDRVBlkdebugState *s, const char *tag, bool all)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *next;
+    BlkdebugSuspendedReq *r;
 
-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
+retry:
+    /*
+     * No need for _SAFE, since a different coroutine can remove another node
+     * (not the current one) in this list, and when the current one is removed
+     * the iteration starts back from beginning anyways.
+     */
+    QLIST_FOREACH(r, &s->suspended_reqs, next) {
         if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co);
+            Coroutine *co = r->co;
+
+            if (!qtest_enabled()) {
+                printf("blkdebug: Resuming request '%s'\n", r->tag);
+            }
+
+            QLIST_REMOVE(r, next);
+            g_free(r->tag);
+            g_free(r);
+
+            qemu_mutex_unlock(&s->lock);
+            qemu_coroutine_enter(co);
+            qemu_mutex_lock(&s->lock);
+
+            if (all) {
+                goto retry;
+            }
             return 0;
         }
     }
     return -ENOENT;
 }
 
+static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+    QEMU_LOCK_GUARD(&s->lock);
+    return resume_req_by_tag(s, tag, false);
+}
+
 static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
                                             const char *tag)
 {
     BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *r_next;
     BlkdebugRule *rule, *next;
     int i, ret = -ENOENT;
 
+    QEMU_LOCK_GUARD(&s->lock);
     for (i = 0; i < BLKDBG__MAX; i++) {
         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
             if (rule->action == ACTION_SUSPEND &&
@@ -903,11 +948,8 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
             }
         }
     }
-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
-        if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co);
-            ret = 0;
-        }
+    if (resume_req_by_tag(s, tag, true) == 0) {
+        ret = 0;
     }
     return ret;
 }
@@ -917,6 +959,7 @@ static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
     BDRVBlkdebugState *s = bs->opaque;
     BlkdebugSuspendedReq *r;
 
+    QEMU_LOCK_GUARD(&s->lock);
     QLIST_FOREACH(r, &s->suspended_reqs, next) {
         if (!strcmp(r->tag, tag)) {
             return true;
@@ -925,12 +968,13 @@ static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
     return false;
 }
 
-static int64_t blkdebug_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blkdebug_co_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
+    return bdrv_co_getlength(bs->file->bs);
 }
 
-static void blkdebug_refresh_filename(BlockDriverState *bs)
+static void GRAPH_RDLOCK blkdebug_refresh_filename(BlockDriverState *bs)
 {
     BDRVBlkdebugState *s = bs->opaque;
     const QDictEntry *e;
@@ -1034,7 +1078,7 @@ static BlockDriver bdrv_blkdebug = {
     .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
     .bdrv_child_perm        = blkdebug_child_perm,
 
-    .bdrv_getlength         = blkdebug_getlength,
+    .bdrv_co_getlength      = blkdebug_co_getlength,
     .bdrv_refresh_filename  = blkdebug_refresh_filename,
     .bdrv_refresh_limits    = blkdebug_refresh_limits,
 
@@ -1045,7 +1089,7 @@ static BlockDriver bdrv_blkdebug = {
     .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
     .bdrv_co_block_status   = blkdebug_co_block_status,
 
-    .bdrv_debug_event           = blkdebug_debug_event,
+    .bdrv_co_debug_event        = blkdebug_co_debug_event,
     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
     .bdrv_debug_remove_breakpoint
                                 = blkdebug_debug_remove_breakpoint,
diff --git a/block/blkio.c b/block/blkio.c
new file mode 100644 (file)
index 0000000..bc2f217
--- /dev/null
@@ -0,0 +1,1149 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * libblkio BlockDriver
+ *
+ * Copyright Red Hat, Inc.
+ *
+ * Author:
+ *   Stefan Hajnoczi <stefanha@redhat.com>
+ */
+
+#include "qemu/osdep.h"
+#include <blkio.h>
+#include "block/block_int.h"
+#include "exec/memory.h"
+#include "exec/cpu-common.h" /* for qemu_ram_get_fd() */
+#include "qemu/defer-call.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qapi/qmp/qdict.h"
+#include "qemu/module.h"
+#include "sysemu/block-backend.h"
+#include "exec/memory.h" /* for ram_block_discard_disable() */
+
+#include "block/block-io.h"
+
+/*
+ * Allocated bounce buffers are kept in a list sorted by buffer address.
+ */
+typedef struct BlkioBounceBuf {
+    QLIST_ENTRY(BlkioBounceBuf) next;
+
+    /* The bounce buffer */
+    struct iovec buf;
+} BlkioBounceBuf;
+
+typedef struct {
+    /*
+     * libblkio is not thread-safe so this lock protects ->blkio and
+     * ->blkioq.
+     */
+    QemuMutex blkio_lock;
+    struct blkio *blkio;
+    struct blkioq *blkioq; /* make this multi-queue in the future... */
+    int completion_fd;
+
+    /*
+     * Polling fetches the next completion into this field.
+     *
+     * No lock is necessary since only one thread calls aio_poll() and invokes
+     * fd and poll handlers.
+     */
+    struct blkio_completion poll_completion;
+
+    /*
+     * Protects ->bounce_pool, ->bounce_bufs, ->bounce_available.
+     *
+     * Lock ordering: ->bounce_lock before ->blkio_lock.
+     */
+    CoMutex bounce_lock;
+
+    /* Bounce buffer pool */
+    struct blkio_mem_region bounce_pool;
+
+    /* Sorted list of allocated bounce buffers */
+    QLIST_HEAD(, BlkioBounceBuf) bounce_bufs;
+
+    /* Queue for coroutines waiting for bounce buffer space */
+    CoQueue bounce_available;
+
+    /* The value of the "mem-region-alignment" property */
+    uint64_t mem_region_alignment;
+
+    /* Can we skip adding/deleting blkio_mem_regions? */
+    bool needs_mem_regions;
+
+    /* Are file descriptors necessary for blkio_mem_regions? */
+    bool needs_mem_region_fd;
+
+    /* Are madvise(MADV_DONTNEED)-style operations unavailable? */
+    bool may_pin_mem_regions;
+} BDRVBlkioState;
+
+/* Called with s->bounce_lock held */
+static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes)
+{
+    /* There can be no allocated bounce buffers during resize */
+    assert(QLIST_EMPTY(&s->bounce_bufs));
+
+    /* Pad size to reduce frequency of resize calls */
+    bytes += 128 * 1024;
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        int ret;
+
+        if (s->bounce_pool.addr) {
+            blkio_unmap_mem_region(s->blkio, &s->bounce_pool);
+            blkio_free_mem_region(s->blkio, &s->bounce_pool);
+            memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
+        }
+
+        /* Automatically freed when s->blkio is destroyed */
+        ret = blkio_alloc_mem_region(s->blkio, &s->bounce_pool, bytes);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = blkio_map_mem_region(s->blkio, &s->bounce_pool);
+        if (ret < 0) {
+            blkio_free_mem_region(s->blkio, &s->bounce_pool);
+            memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+/* Called with s->bounce_lock held */
+static bool
+blkio_do_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
+                             int64_t bytes)
+{
+    void *addr = s->bounce_pool.addr;
+    BlkioBounceBuf *cur = NULL;
+    BlkioBounceBuf *prev = NULL;
+    ptrdiff_t space;
+
+    /*
+     * This is just a linear search over the holes between requests. An
+     * efficient allocator would be nice.
+     */
+    QLIST_FOREACH(cur, &s->bounce_bufs, next) {
+        space = cur->buf.iov_base - addr;
+        if (bytes <= space) {
+            QLIST_INSERT_BEFORE(cur, bounce, next);
+            bounce->buf.iov_base = addr;
+            bounce->buf.iov_len = bytes;
+            return true;
+        }
+
+        addr = cur->buf.iov_base + cur->buf.iov_len;
+        prev = cur;
+    }
+
+    /* Is there space after the last request? */
+    space = s->bounce_pool.addr + s->bounce_pool.len - addr;
+    if (bytes > space) {
+        return false;
+    }
+    if (prev) {
+        QLIST_INSERT_AFTER(prev, bounce, next);
+    } else {
+        QLIST_INSERT_HEAD(&s->bounce_bufs, bounce, next);
+    }
+    bounce->buf.iov_base = addr;
+    bounce->buf.iov_len = bytes;
+    return true;
+}
+
+static int coroutine_fn
+blkio_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
+                          int64_t bytes)
+{
+    /*
+     * Ensure fairness: first time around we join the back of the queue,
+     * subsequently we join the front so we don't lose our place.
+     */
+    CoQueueWaitFlags wait_flags = 0;
+
+    QEMU_LOCK_GUARD(&s->bounce_lock);
+
+    /* Ensure fairness: don't even try if other requests are already waiting */
+    if (!qemu_co_queue_empty(&s->bounce_available)) {
+        qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
+                                 wait_flags);
+        wait_flags = CO_QUEUE_WAIT_FRONT;
+    }
+
+    while (true) {
+        if (blkio_do_alloc_bounce_buffer(s, bounce, bytes)) {
+            /* Kick the next queued request since there may be space */
+            qemu_co_queue_next(&s->bounce_available);
+            return 0;
+        }
+
+        /*
+         * If there are no in-flight requests then the pool was simply too
+         * small.
+         */
+        if (QLIST_EMPTY(&s->bounce_bufs)) {
+            bool ok;
+            int ret;
+
+            ret = blkio_resize_bounce_pool(s, bytes);
+            if (ret < 0) {
+                /* Kick the next queued request since that may fail too */
+                qemu_co_queue_next(&s->bounce_available);
+                return ret;
+            }
+
+            ok = blkio_do_alloc_bounce_buffer(s, bounce, bytes);
+            assert(ok); /* must have space this time */
+            return 0;
+        }
+
+        qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
+                                 wait_flags);
+        wait_flags = CO_QUEUE_WAIT_FRONT;
+    }
+}
+
+static void coroutine_fn blkio_free_bounce_buffer(BDRVBlkioState *s,
+                                                  BlkioBounceBuf *bounce)
+{
+    QEMU_LOCK_GUARD(&s->bounce_lock);
+
+    QLIST_REMOVE(bounce, next);
+
+    /* Wake up waiting coroutines since space may now be available */
+    qemu_co_queue_next(&s->bounce_available);
+}
+
+/* For async to .bdrv_co_*() conversion */
+typedef struct {
+    Coroutine *coroutine;
+    int ret;
+} BlkioCoData;
+
+static void blkio_completion_fd_read(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    BDRVBlkioState *s = bs->opaque;
+    uint64_t val;
+    int ret;
+
+    /* Polling may have already fetched a completion */
+    if (s->poll_completion.user_data != NULL) {
+        BlkioCoData *cod = s->poll_completion.user_data;
+        cod->ret = s->poll_completion.ret;
+
+        /* Clear it in case aio_co_wake() enters a nested event loop */
+        s->poll_completion.user_data = NULL;
+
+        aio_co_wake(cod->coroutine);
+    }
+
+    /* Reset completion fd status */
+    ret = read(s->completion_fd, &val, sizeof(val));
+
+    /* Ignore errors, there's nothing we can do */
+    (void)ret;
+
+    /*
+     * Reading one completion at a time makes nested event loop re-entrancy
+     * simple. Change this loop to get multiple completions in one go if it
+     * becomes a performance bottleneck.
+     */
+    while (true) {
+        struct blkio_completion completion;
+
+        WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+            ret = blkioq_do_io(s->blkioq, &completion, 0, 1, NULL);
+        }
+        if (ret != 1) {
+            break;
+        }
+
+        BlkioCoData *cod = completion.user_data;
+        cod->ret = completion.ret;
+        aio_co_wake(cod->coroutine);
+    }
+}
+
+static bool blkio_completion_fd_poll(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    BDRVBlkioState *s = bs->opaque;
+    int ret;
+
+    /* Just in case we already fetched a completion */
+    if (s->poll_completion.user_data != NULL) {
+        return true;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        ret = blkioq_do_io(s->blkioq, &s->poll_completion, 0, 1, NULL);
+    }
+    return ret == 1;
+}
+
+static void blkio_completion_fd_poll_ready(void *opaque)
+{
+    blkio_completion_fd_read(opaque);
+}
+
+static void blkio_attach_aio_context(BlockDriverState *bs,
+                                     AioContext *new_context)
+{
+    BDRVBlkioState *s = bs->opaque;
+
+    aio_set_fd_handler(new_context, s->completion_fd,
+                       blkio_completion_fd_read, NULL,
+                       blkio_completion_fd_poll,
+                       blkio_completion_fd_poll_ready, bs);
+}
+
+static void blkio_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVBlkioState *s = bs->opaque;
+
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->completion_fd, NULL, NULL,
+                       NULL, NULL, NULL);
+}
+
+/*
+ * Called by defer_call_end() or immediately if not in a deferred section.
+ * Called without blkio_lock.
+ */
+static void blkio_deferred_fn(void *opaque)
+{
+    BDRVBlkioState *s = opaque;
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        blkioq_do_io(s->blkioq, NULL, 0, 0, NULL);
+    }
+}
+
+/*
+ * Schedule I/O submission after enqueuing a new request. Called without
+ * blkio_lock.
+ */
+static void blkio_submit_io(BlockDriverState *bs)
+{
+    BDRVBlkioState *s = bs->opaque;
+
+    defer_call(blkio_deferred_fn, s);
+}
+
+static int coroutine_fn
+blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    BDRVBlkioState *s = bs->opaque;
+    BlkioCoData cod = {
+        .coroutine = qemu_coroutine_self(),
+    };
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        blkioq_discard(s->blkioq, offset, bytes, &cod, 0);
+    }
+
+    blkio_submit_io(bs);
+    qemu_coroutine_yield();
+    return cod.ret;
+}
+
+static int coroutine_fn
+blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+    BlkioCoData cod = {
+        .coroutine = qemu_coroutine_self(),
+    };
+    BDRVBlkioState *s = bs->opaque;
+    bool use_bounce_buffer =
+        s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
+    BlkioBounceBuf bounce;
+    struct iovec *iov = qiov->iov;
+    int iovcnt = qiov->niov;
+
+    if (use_bounce_buffer) {
+        int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
+        if (ret < 0) {
+            return ret;
+        }
+
+        iov = &bounce.buf;
+        iovcnt = 1;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0);
+    }
+
+    blkio_submit_io(bs);
+    qemu_coroutine_yield();
+
+    if (use_bounce_buffer) {
+        if (cod.ret == 0) {
+            qemu_iovec_from_buf(qiov, 0,
+                                bounce.buf.iov_base,
+                                bounce.buf.iov_len);
+        }
+
+        blkio_free_bounce_buffer(s, &bounce);
+    }
+
+    return cod.ret;
+}
+
+static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset,
+        int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+    uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
+    BlkioCoData cod = {
+        .coroutine = qemu_coroutine_self(),
+    };
+    BDRVBlkioState *s = bs->opaque;
+    bool use_bounce_buffer =
+        s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
+    BlkioBounceBuf bounce;
+    struct iovec *iov = qiov->iov;
+    int iovcnt = qiov->niov;
+
+    if (use_bounce_buffer) {
+        int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
+        if (ret < 0) {
+            return ret;
+        }
+
+        qemu_iovec_to_buf(qiov, 0, bounce.buf.iov_base, bytes);
+        iov = &bounce.buf;
+        iovcnt = 1;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags);
+    }
+
+    blkio_submit_io(bs);
+    qemu_coroutine_yield();
+
+    if (use_bounce_buffer) {
+        blkio_free_bounce_buffer(s, &bounce);
+    }
+
+    return cod.ret;
+}
+
+static int coroutine_fn blkio_co_flush(BlockDriverState *bs)
+{
+    BDRVBlkioState *s = bs->opaque;
+    BlkioCoData cod = {
+        .coroutine = qemu_coroutine_self(),
+    };
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        blkioq_flush(s->blkioq, &cod, 0);
+    }
+
+    blkio_submit_io(bs);
+    qemu_coroutine_yield();
+    return cod.ret;
+}
+
+static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs,
+    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+{
+    BDRVBlkioState *s = bs->opaque;
+    BlkioCoData cod = {
+        .coroutine = qemu_coroutine_self(),
+    };
+    uint32_t blkio_flags = 0;
+
+    if (flags & BDRV_REQ_FUA) {
+        blkio_flags |= BLKIO_REQ_FUA;
+    }
+    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+        blkio_flags |= BLKIO_REQ_NO_UNMAP;
+    }
+    if (flags & BDRV_REQ_NO_FALLBACK) {
+        blkio_flags |= BLKIO_REQ_NO_FALLBACK;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags);
+    }
+
+    blkio_submit_io(bs);
+    qemu_coroutine_yield();
+    return cod.ret;
+}
+
+typedef enum {
+    BMRR_OK,
+    BMRR_SKIP,
+    BMRR_FAIL,
+} BlkioMemRegionResult;
+
+/*
+ * Produce a struct blkio_mem_region for a given address and size.
+ *
+ * This function produces identical results when called multiple times with the
+ * same arguments. This property is necessary because blkio_unmap_mem_region()
+ * must receive the same struct blkio_mem_region field values that were passed
+ * to blkio_map_mem_region().
+ */
+static BlkioMemRegionResult
+blkio_mem_region_from_host(BlockDriverState *bs,
+                           void *host, size_t size,
+                           struct blkio_mem_region *region,
+                           Error **errp)
+{
+    BDRVBlkioState *s = bs->opaque;
+    int fd = -1;
+    ram_addr_t fd_offset = 0;
+
+    if (((uintptr_t)host | size) % s->mem_region_alignment) {
+        error_setg(errp, "unaligned buf %p with size %zu", host, size);
+        return BMRR_FAIL;
+    }
+
+    /* Attempt to find the fd for the underlying memory */
+    if (s->needs_mem_region_fd) {
+        RAMBlock *ram_block;
+        RAMBlock *end_block;
+        ram_addr_t offset;
+
+        /*
+         * bdrv_register_buf() is called with the BQL held so mr lives at least
+         * until this function returns.
+         */
+        ram_block = qemu_ram_block_from_host(host, false, &fd_offset);
+        if (ram_block) {
+            fd = qemu_ram_get_fd(ram_block);
+        }
+        if (fd == -1) {
+            /*
+             * Ideally every RAMBlock would have an fd. pc-bios and other
+             * things don't. Luckily they are usually not I/O buffers and we
+             * can just ignore them.
+             */
+            return BMRR_SKIP;
+        }
+
+        /* Make sure the fd covers the entire range */
+        end_block = qemu_ram_block_from_host(host + size - 1, false, &offset);
+        if (ram_block != end_block) {
+            error_setg(errp, "registered buffer at %p with size %zu extends "
+                       "beyond RAMBlock", host, size);
+            return BMRR_FAIL;
+        }
+    }
+
+    *region = (struct blkio_mem_region){
+        .addr = host,
+        .len = size,
+        .fd = fd,
+        .fd_offset = fd_offset,
+    };
+    return BMRR_OK;
+}
+
+static bool blkio_register_buf(BlockDriverState *bs, void *host, size_t size,
+                               Error **errp)
+{
+    BDRVBlkioState *s = bs->opaque;
+    struct blkio_mem_region region;
+    BlkioMemRegionResult region_result;
+    int ret;
+
+    /*
+     * Mapping memory regions conflicts with RAM discard (virtio-mem) when
+     * there is pinning, so only do it when necessary.
+     */
+    if (!s->needs_mem_regions && s->may_pin_mem_regions) {
+        return true;
+    }
+
+    region_result = blkio_mem_region_from_host(bs, host, size, &region, errp);
+    if (region_result == BMRR_SKIP) {
+        return true;
+    } else if (region_result != BMRR_OK) {
+        return false;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        ret = blkio_map_mem_region(s->blkio, &region);
+    }
+
+    if (ret < 0) {
+        error_setg(errp, "Failed to add blkio mem region %p with size %zu: %s",
+                   host, size, blkio_get_error_msg());
+        return false;
+    }
+    return true;
+}
+
+static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
+{
+    BDRVBlkioState *s = bs->opaque;
+    struct blkio_mem_region region;
+
+    /* See blkio_register_buf() */
+    if (!s->needs_mem_regions && s->may_pin_mem_regions) {
+        return;
+    }
+
+    if (blkio_mem_region_from_host(bs, host, size, &region, NULL) != BMRR_OK) {
+        return;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        blkio_unmap_mem_region(s->blkio, &region);
+    }
+}
+
+static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options,
+                                  int flags, Error **errp)
+{
+    const char *filename = qdict_get_str(options, "filename");
+    BDRVBlkioState *s = bs->opaque;
+    int ret;
+
+    ret = blkio_set_str(s->blkio, "path", filename);
+    qdict_del(options, "filename");
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "failed to set path: %s",
+                         blkio_get_error_msg());
+        return ret;
+    }
+
+    if (flags & BDRV_O_NOCACHE) {
+        ret = blkio_set_bool(s->blkio, "direct", true);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "failed to set direct: %s",
+                             blkio_get_error_msg());
+            return ret;
+        }
+    }
+
+    ret = blkio_connect(s->blkio);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+                         blkio_get_error_msg());
+        return ret;
+    }
+
+    return 0;
+}
+
+static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options,
+                                       int flags, Error **errp)
+{
+    const char *path = qdict_get_try_str(options, "path");
+    BDRVBlkioState *s = bs->opaque;
+    int ret;
+
+    if (!path) {
+        error_setg(errp, "missing 'path' option");
+        return -EINVAL;
+    }
+
+    ret = blkio_set_str(s->blkio, "path", path);
+    qdict_del(options, "path");
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "failed to set path: %s",
+                         blkio_get_error_msg());
+        return ret;
+    }
+
+    if (!(flags & BDRV_O_NOCACHE)) {
+        error_setg(errp, "cache.direct=off is not supported");
+        return -EINVAL;
+    }
+
+    ret = blkio_connect(s->blkio);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+                         blkio_get_error_msg());
+        return ret;
+    }
+
+    return 0;
+}
+
+static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
+                                    int flags, Error **errp)
+{
+    const char *path = qdict_get_try_str(options, "path");
+    BDRVBlkioState *s = bs->opaque;
+    bool fd_supported = false;
+    int fd = -1, ret;
+
+    if (!path) {
+        error_setg(errp, "missing 'path' option");
+        return -EINVAL;
+    }
+
+    if (!(flags & BDRV_O_NOCACHE)) {
+        error_setg(errp, "cache.direct=off is not supported");
+        return -EINVAL;
+    }
+
+    if (blkio_set_int(s->blkio, "fd", -1) == 0) {
+        fd_supported = true;
+    }
+
+    /*
+     * If the libblkio driver supports fd passing, let's always use qemu_open()
+     * to open the `path`, so we can handle fd passing from the management
+     * layer through the "/dev/fdset/N" special path.
+     */
+    if (fd_supported) {
+        /*
+         * `path` can contain the path of a character device
+         * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket.
+         *
+         * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR
+         * is not set in the open flags, because the exchange of IOCTL commands
+         * for example will fail.
+         *
+         * In order to open the device read-only, we are using the `read-only`
+         * property of the libblkio driver in blkio_file_open().
+         */
+        fd = qemu_open(path, O_RDWR, NULL);
+        if (fd < 0) {
+            /*
+             * qemu_open() can fail if the user specifies a path that is not
+             * a file or device, for example in the case of Unix Domain Socket
+             * for the virtio-blk-vhost-user driver. In such cases let's have
+             * libblkio open the path directly.
+             */
+            fd_supported = false;
+        } else {
+            ret = blkio_set_int(s->blkio, "fd", fd);
+            if (ret < 0) {
+                fd_supported = false;
+                qemu_close(fd);
+                fd = -1;
+            }
+        }
+    }
+
+    if (!fd_supported) {
+        ret = blkio_set_str(s->blkio, "path", path);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "failed to set path: %s",
+                             blkio_get_error_msg());
+            return ret;
+        }
+    }
+
+    ret = blkio_connect(s->blkio);
+    if (ret < 0 && fd >= 0) {
+        /* Failed to give the FD to libblkio, close it */
+        qemu_close(fd);
+        fd = -1;
+    }
+
+    /*
+     * Before https://gitlab.com/libblkio/libblkio/-/merge_requests/208
+     * (libblkio <= v1.3.0), setting the `fd` property is not enough to check
+     * whether the driver supports the `fd` property or not. In that case,
+     * blkio_connect() will fail with -EINVAL.
+     * So let's try calling blkio_connect() again by directly setting `path`
+     * to cover this scenario.
+     */
+    if (fd_supported && ret == -EINVAL) {
+        /*
+         * We need to clear the `fd` property we set previously by setting
+         * it to -1.
+         */
+        ret = blkio_set_int(s->blkio, "fd", -1);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "failed to set fd: %s",
+                             blkio_get_error_msg());
+            return ret;
+        }
+
+        ret = blkio_set_str(s->blkio, "path", path);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "failed to set path: %s",
+                             blkio_get_error_msg());
+            return ret;
+        }
+
+        ret = blkio_connect(s->blkio);
+    }
+
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+                         blkio_get_error_msg());
+        return ret;
+    }
+
+    qdict_del(options, "path");
+
+    return 0;
+}
+
+static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
+                           Error **errp)
+{
+    const char *blkio_driver = bs->drv->protocol_name;
+    BDRVBlkioState *s = bs->opaque;
+    int ret;
+
+    ret = blkio_create(blkio_driver, &s->blkio);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "blkio_create failed: %s",
+                         blkio_get_error_msg());
+        return ret;
+    }
+
+    if (!(flags & BDRV_O_RDWR)) {
+        ret = blkio_set_bool(s->blkio, "read-only", true);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "failed to set read-only: %s",
+                             blkio_get_error_msg());
+            blkio_destroy(&s->blkio);
+            return ret;
+        }
+    }
+
+    if (strcmp(blkio_driver, "io_uring") == 0) {
+        ret = blkio_io_uring_connect(bs, options, flags, errp);
+    } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
+        ret = blkio_nvme_io_uring_connect(bs, options, flags, errp);
+    } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
+        ret = blkio_virtio_blk_connect(bs, options, flags, errp);
+    } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
+        ret = blkio_virtio_blk_connect(bs, options, flags, errp);
+    } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
+        ret = blkio_virtio_blk_connect(bs, options, flags, errp);
+    } else {
+        g_assert_not_reached();
+    }
+    if (ret < 0) {
+        blkio_destroy(&s->blkio);
+        return ret;
+    }
+
+    ret = blkio_get_bool(s->blkio,
+                         "needs-mem-regions",
+                         &s->needs_mem_regions);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret,
+                         "failed to get needs-mem-regions: %s",
+                         blkio_get_error_msg());
+        blkio_destroy(&s->blkio);
+        return ret;
+    }
+
+    ret = blkio_get_bool(s->blkio,
+                         "needs-mem-region-fd",
+                         &s->needs_mem_region_fd);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret,
+                         "failed to get needs-mem-region-fd: %s",
+                         blkio_get_error_msg());
+        blkio_destroy(&s->blkio);
+        return ret;
+    }
+
+    ret = blkio_get_uint64(s->blkio,
+                           "mem-region-alignment",
+                           &s->mem_region_alignment);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret,
+                         "failed to get mem-region-alignment: %s",
+                         blkio_get_error_msg());
+        blkio_destroy(&s->blkio);
+        return ret;
+    }
+
+    ret = blkio_get_bool(s->blkio,
+                         "may-pin-mem-regions",
+                         &s->may_pin_mem_regions);
+    if (ret < 0) {
+        /* Be conservative (assume pinning) if the property is not supported */
+        s->may_pin_mem_regions = s->needs_mem_regions;
+    }
+
+    /*
+     * Notify if libblkio drivers pin memory and prevent features like
+     * virtio-mem from working.
+     */
+    if (s->may_pin_mem_regions) {
+        ret = ram_block_discard_disable(true);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "ram_block_discard_disable() failed");
+            blkio_destroy(&s->blkio);
+            return ret;
+        }
+    }
+
+    ret = blkio_start(s->blkio);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "blkio_start failed: %s",
+                         blkio_get_error_msg());
+        blkio_destroy(&s->blkio);
+        if (s->may_pin_mem_regions) {
+            ram_block_discard_disable(false);
+        }
+        return ret;
+    }
+
+    bs->supported_write_flags = BDRV_REQ_FUA | BDRV_REQ_REGISTERED_BUF;
+    bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP |
+                               BDRV_REQ_NO_FALLBACK;
+
+    qemu_mutex_init(&s->blkio_lock);
+    qemu_co_mutex_init(&s->bounce_lock);
+    qemu_co_queue_init(&s->bounce_available);
+    QLIST_INIT(&s->bounce_bufs);
+    s->blkioq = blkio_get_queue(s->blkio, 0);
+    s->completion_fd = blkioq_get_completion_fd(s->blkioq);
+    blkioq_set_completion_fd_enabled(s->blkioq, true);
+
+    blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
+    return 0;
+}
+
+static void blkio_close(BlockDriverState *bs)
+{
+    BDRVBlkioState *s = bs->opaque;
+
+    /* There is no destroy() API for s->bounce_lock */
+
+    qemu_mutex_destroy(&s->blkio_lock);
+    blkio_detach_aio_context(bs);
+    blkio_destroy(&s->blkio);
+
+    if (s->may_pin_mem_regions) {
+        ram_block_discard_disable(false);
+    }
+}
+
+static int64_t coroutine_fn blkio_co_getlength(BlockDriverState *bs)
+{
+    BDRVBlkioState *s = bs->opaque;
+    uint64_t capacity;
+    int ret;
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        ret = blkio_get_uint64(s->blkio, "capacity", &capacity);
+    }
+    if (ret < 0) {
+        return -ret;
+    }
+
+    return capacity;
+}
+
+static int coroutine_fn blkio_truncate(BlockDriverState *bs, int64_t offset,
+                                       bool exact, PreallocMode prealloc,
+                                       BdrvRequestFlags flags, Error **errp)
+{
+    int64_t current_length;
+
+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_str(prealloc));
+        return -ENOTSUP;
+    }
+
+    current_length = blkio_co_getlength(bs);
+
+    if (offset > current_length) {
+        error_setg(errp, "Cannot grow device");
+        return -EINVAL;
+    } else if (exact && offset != current_length) {
+        error_setg(errp, "Cannot resize device");
+        return -ENOTSUP;
+    }
+
+    return 0;
+}
+
+static int coroutine_fn
+blkio_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+    return 0;
+}
+
+static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    BDRVBlkioState *s = bs->opaque;
+    QEMU_LOCK_GUARD(&s->blkio_lock);
+    int value;
+    int ret;
+
+    ret = blkio_get_int(s->blkio, "request-alignment", &value);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "failed to get \"request-alignment\": %s",
+                         blkio_get_error_msg());
+        return;
+    }
+    bs->bl.request_alignment = value;
+    if (bs->bl.request_alignment < 1 ||
+        bs->bl.request_alignment >= INT_MAX ||
+        !is_power_of_2(bs->bl.request_alignment)) {
+        error_setg(errp, "invalid \"request-alignment\" value %" PRIu32 ", "
+                   "must be a power of 2 less than INT_MAX",
+                   bs->bl.request_alignment);
+        return;
+    }
+
+    ret = blkio_get_int(s->blkio, "optimal-io-size", &value);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "failed to get \"optimal-io-size\": %s",
+                         blkio_get_error_msg());
+        return;
+    }
+    bs->bl.opt_transfer = value;
+    if (bs->bl.opt_transfer > INT_MAX ||
+        (bs->bl.opt_transfer % bs->bl.request_alignment)) {
+        error_setg(errp, "invalid \"optimal-io-size\" value %" PRIu32 ", must "
+                   "be a multiple of %" PRIu32, bs->bl.opt_transfer,
+                   bs->bl.request_alignment);
+        return;
+    }
+
+    ret = blkio_get_int(s->blkio, "max-transfer", &value);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "failed to get \"max-transfer\": %s",
+                         blkio_get_error_msg());
+        return;
+    }
+    bs->bl.max_transfer = value;
+    if ((bs->bl.max_transfer % bs->bl.request_alignment) ||
+        (bs->bl.opt_transfer && (bs->bl.max_transfer % bs->bl.opt_transfer))) {
+        error_setg(errp, "invalid \"max-transfer\" value %" PRIu32 ", must be "
+                   "a multiple of %" PRIu32 " and %" PRIu32 " (if non-zero)",
+                   bs->bl.max_transfer, bs->bl.request_alignment,
+                   bs->bl.opt_transfer);
+        return;
+    }
+
+    ret = blkio_get_int(s->blkio, "buf-alignment", &value);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "failed to get \"buf-alignment\": %s",
+                         blkio_get_error_msg());
+        return;
+    }
+    if (value < 1) {
+        error_setg(errp, "invalid \"buf-alignment\" value %d, must be "
+                   "positive", value);
+        return;
+    }
+    bs->bl.min_mem_alignment = value;
+
+    ret = blkio_get_int(s->blkio, "optimal-buf-alignment", &value);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret,
+                         "failed to get \"optimal-buf-alignment\": %s",
+                         blkio_get_error_msg());
+        return;
+    }
+    if (value < 1) {
+        error_setg(errp, "invalid \"optimal-buf-alignment\" value %d, "
+                   "must be positive", value);
+        return;
+    }
+    bs->bl.opt_mem_alignment = value;
+
+    ret = blkio_get_int(s->blkio, "max-segments", &value);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "failed to get \"max-segments\": %s",
+                         blkio_get_error_msg());
+        return;
+    }
+    if (value < 1) {
+        error_setg(errp, "invalid \"max-segments\" value %d, must be positive",
+                   value);
+        return;
+    }
+    bs->bl.max_iov = value;
+}
+
+/*
+ * TODO
+ * Missing libblkio APIs:
+ * - block_status
+ * - co_invalidate_cache
+ *
+ * Out of scope?
+ * - create
+ * - truncate
+ */
+
+/*
+ * Do not include .format_name and .protocol_name because module_block.py
+ * does not parse macros in the source code.
+ */
+#define BLKIO_DRIVER_COMMON \
+    .instance_size           = sizeof(BDRVBlkioState), \
+    .bdrv_file_open          = blkio_file_open, \
+    .bdrv_close              = blkio_close, \
+    .bdrv_co_getlength       = blkio_co_getlength, \
+    .bdrv_co_truncate        = blkio_truncate, \
+    .bdrv_co_get_info        = blkio_co_get_info, \
+    .bdrv_attach_aio_context = blkio_attach_aio_context, \
+    .bdrv_detach_aio_context = blkio_detach_aio_context, \
+    .bdrv_co_pdiscard        = blkio_co_pdiscard, \
+    .bdrv_co_preadv          = blkio_co_preadv, \
+    .bdrv_co_pwritev         = blkio_co_pwritev, \
+    .bdrv_co_flush_to_disk   = blkio_co_flush, \
+    .bdrv_co_pwrite_zeroes   = blkio_co_pwrite_zeroes, \
+    .bdrv_refresh_limits     = blkio_refresh_limits, \
+    .bdrv_register_buf       = blkio_register_buf, \
+    .bdrv_unregister_buf     = blkio_unregister_buf,
+
+/*
+ * Use the same .format_name and .protocol_name as the libblkio driver name for
+ * consistency.
+ */
+
+static BlockDriver bdrv_io_uring = {
+    .format_name         = "io_uring",
+    .protocol_name       = "io_uring",
+    .bdrv_needs_filename = true,
+    BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_nvme_io_uring = {
+    .format_name         = "nvme-io_uring",
+    .protocol_name       = "nvme-io_uring",
+    BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_virtio_blk_vfio_pci = {
+    .format_name         = "virtio-blk-vfio-pci",
+    .protocol_name       = "virtio-blk-vfio-pci",
+    BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_virtio_blk_vhost_user = {
+    .format_name         = "virtio-blk-vhost-user",
+    .protocol_name       = "virtio-blk-vhost-user",
+    BLKIO_DRIVER_COMMON
+};
+
+static BlockDriver bdrv_virtio_blk_vhost_vdpa = {
+    .format_name         = "virtio-blk-vhost-vdpa",
+    .protocol_name       = "virtio-blk-vhost-vdpa",
+    BLKIO_DRIVER_COMMON
+};
+
+static void bdrv_blkio_init(void)
+{
+    bdrv_register(&bdrv_io_uring);
+    bdrv_register(&bdrv_nvme_io_uring);
+    bdrv_register(&bdrv_virtio_blk_vfio_pci);
+    bdrv_register(&bdrv_virtio_blk_vhost_user);
+    bdrv_register(&bdrv_virtio_blk_vhost_vdpa);
+}
+
+block_init(bdrv_blkio_init);
index 13ae63983bc0c4ed89d3b6a6dce1fa2578e95c3b..84e03f309f31c1abe99a979c5c46e3370e55b069 100644 (file)
@@ -12,6 +12,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/sockets.h" /* for EINPROGRESS on Windows */
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
@@ -107,8 +108,8 @@ static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
     struct log_write_entry cur_entry;
 
     while (cur_idx < nr_entries) {
-        int read_ret = bdrv_pread(log, cur_sector << sector_bits, &cur_entry,
-                                  sizeof(cur_entry));
+        int read_ret = bdrv_pread(log, cur_sector << sector_bits,
+                                  sizeof(cur_entry), &cur_entry, 0);
         if (read_ret < 0) {
             error_setg_errno(errp, -read_ret,
                              "Failed to read log entry %"PRIu64, cur_idx);
@@ -155,21 +156,16 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* Open the file */
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, false,
-                               &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        error_propagate(errp, local_err);
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
         goto fail;
     }
 
     /* Open the log file */
     s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_of_bds,
-                                  BDRV_CHILD_METADATA, false, &local_err);
-    if (local_err) {
+                                  BDRV_CHILD_METADATA, false, errp);
+    if (!s->log_file) {
         ret = -EINVAL;
-        error_propagate(errp, local_err);
         goto fail;
     }
 
@@ -192,7 +188,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
             log_sb.nr_entries = cpu_to_le64(0);
             log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE);
         } else {
-            ret = bdrv_pread(s->log_file, 0, &log_sb, sizeof(log_sb));
+            ret = bdrv_pread(s->log_file, 0, sizeof(log_sb), &log_sb, 0);
             if (ret < 0) {
                 error_setg_errno(errp, -ret, "Could not read log superblock");
                 goto fail_log;
@@ -255,14 +251,12 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
     ret = 0;
 fail_log:
     if (ret < 0) {
+        bdrv_graph_wrlock(NULL);
         bdrv_unref_child(bs, s->log_file);
+        bdrv_graph_wrunlock(NULL);
         s->log_file = NULL;
     }
 fail:
-    if (ret < 0) {
-        bdrv_unref_child(bs, bs->file);
-        bs->file = NULL;
-    }
     qemu_opts_del(opts);
     return ret;
 }
@@ -271,13 +265,16 @@ static void blk_log_writes_close(BlockDriverState *bs)
 {
     BDRVBlkLogWritesState *s = bs->opaque;
 
+    bdrv_graph_wrlock(NULL);
     bdrv_unref_child(bs, s->log_file);
     s->log_file = NULL;
+    bdrv_graph_wrunlock(NULL);
 }
 
-static int64_t blk_log_writes_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
+    return bdrv_co_getlength(bs->file->bs);
 }
 
 static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c,
@@ -302,9 +299,9 @@ static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.request_alignment = s->sectorsize;
 }
 
-static int coroutine_fn
-blk_log_writes_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                         QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                         QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
@@ -315,7 +312,7 @@ typedef struct BlkLogWritesFileReq {
     uint64_t bytes;
     int file_flags;
     QEMUIOVector *qiov;
-    int (*func)(struct BlkLogWritesFileReq *r);
+    int GRAPH_RDLOCK_PTR (*func)(struct BlkLogWritesFileReq *r);
     int file_ret;
 } BlkLogWritesFileReq;
 
@@ -327,25 +324,43 @@ typedef struct {
     int log_ret;
 } BlkLogWritesLogReq;
 
-static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
+static void coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
 {
     BDRVBlkLogWritesState *s = lr->bs->opaque;
-    uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits;
 
-    s->nr_entries++;
-    s->cur_log_sector +=
-            ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits;
+    /*
+     * Determine the offsets and sizes of different parts of the entry, and
+     * update the state of the driver.
+     *
+     * This needs to be done in one go, before any actual I/O is done, as the
+     * log entry may have to be written in two parts, and the state of the
+     * driver may be modified by other driver operations while waiting for the
+     * I/O to complete.
+     */
+    const uint64_t entry_start_sector = s->cur_log_sector;
+    const uint64_t entry_offset = entry_start_sector << s->sectorbits;
+    const uint64_t qiov_aligned_size = ROUND_UP(lr->qiov->size, s->sectorsize);
+    const uint64_t entry_aligned_size = qiov_aligned_size +
+        ROUND_UP(lr->zero_size, s->sectorsize);
+    const uint64_t entry_nr_sectors = entry_aligned_size >> s->sectorbits;
 
-    lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size,
+    s->nr_entries++;
+    s->cur_log_sector += entry_nr_sectors;
+
+    /*
+     * Write the log entry. Note that if this is a "write zeroes" operation,
+     * only the entry header is written here, with the zeroing being done
+     * separately below.
+     */
+    lr->log_ret = bdrv_co_pwritev(s->log_file, entry_offset, lr->qiov->size,
                                   lr->qiov, 0);
 
     /* Logging for the "write zeroes" operation */
     if (lr->log_ret == 0 && lr->zero_size) {
-        cur_log_offset = s->cur_log_sector << s->sectorbits;
-        s->cur_log_sector +=
-                ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits;
+        const uint64_t zeroes_offset = entry_offset + qiov_aligned_size;
 
-        lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset,
+        lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, zeroes_offset,
                                             lr->zero_size, 0);
     }
 
@@ -376,15 +391,16 @@ static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
     }
 }
 
-static void coroutine_fn blk_log_writes_co_do_file(BlkLogWritesFileReq *fr)
+static void coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_do_file(BlkLogWritesFileReq *fr)
 {
     fr->file_ret = fr->func(fr);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                       QEMUIOVector *qiov, int flags,
-                      int (*file_func)(BlkLogWritesFileReq *r),
+                      int /*GRAPH_RDLOCK*/ (*file_func)(BlkLogWritesFileReq *r),
                       uint64_t entry_flags, bool is_zero_write)
 {
     QEMUIOVector log_qiov;
@@ -436,59 +452,61 @@ blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return fr.file_ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_do_file_pwritev(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_pwritev(fr->bs->file, fr->offset, fr->bytes,
                            fr->qiov, fr->file_flags);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_do_file_pwrite_zeroes(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_pwrite_zeroes(fr->bs->file, fr->offset, fr->bytes,
                                  fr->file_flags);
 }
 
-static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr)
+static int coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_flush(fr->bs->file->bs);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr)
 {
     return bdrv_co_pdiscard(fr->bs->file, fr->offset, fr->bytes);
 }
 
-static int coroutine_fn
-blk_log_writes_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                          QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return blk_log_writes_co_log(bs, offset, bytes, qiov, flags,
                                  blk_log_writes_co_do_file_pwritev, 0, false);
 }
 
-static int coroutine_fn
-blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
-                                BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                int64_t bytes, BdrvRequestFlags flags)
 {
     return blk_log_writes_co_log(bs, offset, bytes, NULL, flags,
                                  blk_log_writes_co_do_file_pwrite_zeroes, 0,
                                  true);
 }
 
-static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
 {
     return blk_log_writes_co_log(bs, 0, 0, NULL, 0,
                                  blk_log_writes_co_do_file_flush,
                                  LOG_FLUSH_FLAG, false);
 }
 
-static int coroutine_fn
-blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
+static int coroutine_fn GRAPH_RDLOCK
+blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
-    return blk_log_writes_co_log(bs, offset, count, NULL, 0,
+    return blk_log_writes_co_log(bs, offset, bytes, NULL, 0,
                                  blk_log_writes_co_do_file_pdiscard,
                                  LOG_DISCARD_FLAG, false);
 }
@@ -506,7 +524,7 @@ static BlockDriver bdrv_blk_log_writes = {
 
     .bdrv_open              = blk_log_writes_open,
     .bdrv_close             = blk_log_writes_close,
-    .bdrv_getlength         = blk_log_writes_getlength,
+    .bdrv_co_getlength      = blk_log_writes_co_getlength,
     .bdrv_child_perm        = blk_log_writes_child_perm,
     .bdrv_refresh_limits    = blk_log_writes_refresh_limits,
 
index 30a0f5d57aa0177159389c5da180db03109977cd..792d980aa9df8cf09603954cc3853ef2373e3dc8 100644 (file)
@@ -11,6 +11,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/module.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "sysemu/replay.h"
 #include "qapi/error.h"
@@ -23,16 +24,11 @@ typedef struct Request {
 static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
                           Error **errp)
 {
-    Error *local_err = NULL;
     int ret;
 
     /* Open the image file */
-    bs->file = bdrv_open_child(NULL, options, "image", bs, &child_of_bds,
-                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                               false, &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        error_propagate(errp, local_err);
+    ret = bdrv_open_file_child(NULL, options, "image", bs, errp);
+    if (ret < 0) {
         goto fail;
     }
 
@@ -44,9 +40,10 @@ fail:
     return ret;
 }
 
-static int64_t blkreplay_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blkreplay_co_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
+    return bdrv_co_getlength(bs->file->bs);
 }
 
 /* This bh is used for synchronization of return from coroutines.
@@ -73,8 +70,9 @@ static void block_request_create(uint64_t reqid, BlockDriverState *bs,
     replay_block_event(req->bh, reqid);
 }
 
-static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -84,8 +82,9 @@ static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -95,8 +94,9 @@ static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
@@ -106,8 +106,8 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
-                                              int64_t offset, int bytes)
+static int coroutine_fn GRAPH_RDLOCK
+blkreplay_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pdiscard(bs->file, offset, bytes);
@@ -117,7 +117,7 @@ static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK blkreplay_co_flush(BlockDriverState *bs)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_flush(bs->file->bs);
@@ -130,7 +130,13 @@ static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
 static int blkreplay_snapshot_goto(BlockDriverState *bs,
                                    const char *snapshot_id)
 {
-    return bdrv_snapshot_goto(bs->file->bs, snapshot_id, NULL);
+    BlockDriverState *file_bs;
+
+    bdrv_graph_rdlock_main_loop();
+    file_bs = bs->file->bs;
+    bdrv_graph_rdunlock_main_loop();
+
+    return bdrv_snapshot_goto(file_bs, snapshot_id, NULL);
 }
 
 static BlockDriver bdrv_blkreplay = {
@@ -140,7 +146,7 @@ static BlockDriver bdrv_blkreplay = {
 
     .bdrv_open              = blkreplay_open,
     .bdrv_child_perm        = bdrv_default_perms,
-    .bdrv_getlength         = blkreplay_getlength,
+    .bdrv_co_getlength      = blkreplay_co_getlength,
 
     .bdrv_co_preadv         = blkreplay_co_preadv,
     .bdrv_co_pwritev        = blkreplay_co_pwritev,
index 4aed53ab598205f2d2c2a0221232426867f60327..9b17c46644358dd0675567512e4d8fcc2cd294a8 100644 (file)
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/sockets.h" /* for EINPROGRESS on Windows */
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
 #include "qemu/cutils.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
+#include "qemu/memalign.h"
 
 typedef struct {
     BdrvChild *test_file;
@@ -31,8 +33,8 @@ typedef struct BlkverifyRequest {
     uint64_t bytes;
     int flags;
 
-    int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *,
-                      BdrvRequestFlags);
+    int GRAPH_RDLOCK_PTR (*request_fn)(
+        BdrvChild *, int64_t, int64_t, QEMUIOVector *, BdrvRequestFlags);
 
     int ret;                    /* test image result */
     int raw_ret;                /* raw image result */
@@ -43,7 +45,7 @@ typedef struct BlkverifyRequest {
     QEMUIOVector *raw_qiov;     /* cloned I/O vector for raw file */
 } BlkverifyRequest;
 
-static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r,
+static void G_GNUC_PRINTF(2, 3) blkverify_err(BlkverifyRequest *r,
                                              const char *fmt, ...)
 {
     va_list ap;
@@ -112,7 +114,6 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
 {
     BDRVBlkverifyState *s = bs->opaque;
     QemuOpts *opts;
-    Error *local_err = NULL;
     int ret;
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
@@ -122,23 +123,18 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* Open the raw file */
-    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw",
-                               bs, &child_of_bds,
-                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                               false, &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        error_propagate(errp, local_err);
+    ret = bdrv_open_file_child(qemu_opt_get(opts, "x-raw"), options, "raw",
+                               bs, errp);
+    if (ret < 0) {
         goto fail;
     }
 
     /* Open the test file */
     s->test_file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options,
                                    "test", bs, &child_of_bds, BDRV_CHILD_DATA,
-                                   false, &local_err);
-    if (local_err) {
+                                   false, errp);
+    if (!s->test_file) {
         ret = -EINVAL;
-        error_propagate(errp, local_err);
         goto fail;
     }
 
@@ -155,15 +151,18 @@ static void blkverify_close(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
+    bdrv_graph_wrlock(NULL);
     bdrv_unref_child(bs, s->test_file);
     s->test_file = NULL;
+    bdrv_graph_wrunlock(NULL);
 }
 
-static int64_t blkverify_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+blkverify_co_getlength(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
-    return bdrv_getlength(s->test_file->bs);
+    return bdrv_co_getlength(s->test_file->bs);
 }
 
 static void coroutine_fn blkverify_do_test_req(void *opaque)
@@ -171,8 +170,11 @@ static void coroutine_fn blkverify_do_test_req(void *opaque)
     BlkverifyRequest *r = opaque;
     BDRVBlkverifyState *s = r->bs->opaque;
 
+    bdrv_graph_co_rdlock();
     r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov,
                            r->flags);
+    bdrv_graph_co_rdunlock();
+
     r->done++;
     qemu_coroutine_enter_if_inactive(r->co);
 }
@@ -181,13 +183,16 @@ static void coroutine_fn blkverify_do_raw_req(void *opaque)
 {
     BlkverifyRequest *r = opaque;
 
+    bdrv_graph_co_rdlock();
     r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov,
                                r->flags);
+    bdrv_graph_co_rdunlock();
+
     r->done++;
     qemu_coroutine_enter_if_inactive(r->co);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
                   uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov,
                   int flags, bool is_write)
@@ -223,9 +228,9 @@ blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
     return r->ret;
 }
 
-static int coroutine_fn
-blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                    QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkverify_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BlkverifyRequest r;
     QEMUIOVector raw_qiov;
@@ -237,8 +242,8 @@ blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     qemu_iovec_init(&raw_qiov, qiov->niov);
     qemu_iovec_clone(&raw_qiov, qiov, buf);
 
-    ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
-                            false);
+    ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov,
+                            flags & ~BDRV_REQ_REGISTERED_BUF, false);
 
     cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
     if (cmp_offset != -1) {
@@ -252,15 +257,15 @@ blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return ret;
 }
 
-static int coroutine_fn
-blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                     QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+blkverify_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BlkverifyRequest r;
     return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
 }
 
-static int blkverify_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK blkverify_co_flush(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
@@ -268,8 +273,9 @@ static int blkverify_co_flush(BlockDriverState *bs)
     return bdrv_co_flush(s->test_file->bs);
 }
 
-static bool blkverify_recurse_can_replace(BlockDriverState *bs,
-                                          BlockDriverState *to_replace)
+static bool GRAPH_RDLOCK
+blkverify_recurse_can_replace(BlockDriverState *bs,
+                              BlockDriverState *to_replace)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
@@ -282,7 +288,7 @@ static bool blkverify_recurse_can_replace(BlockDriverState *bs,
            bdrv_recurse_can_replace(s->test_file->bs, to_replace);
 }
 
-static void blkverify_refresh_filename(BlockDriverState *bs)
+static void GRAPH_RDLOCK blkverify_refresh_filename(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
@@ -318,7 +324,7 @@ static BlockDriver bdrv_blkverify = {
     .bdrv_file_open                   = blkverify_open,
     .bdrv_close                       = blkverify_close,
     .bdrv_child_perm                  = bdrv_default_perms,
-    .bdrv_getlength                   = blkverify_getlength,
+    .bdrv_co_getlength                = blkverify_co_getlength,
     .bdrv_refresh_filename            = blkverify_refresh_filename,
     .bdrv_dirname                     = blkverify_dirname,
 
index e493f17515d88465796d298b5566d9687d54f2e2..ec21148806982ae42c5a58526b31069f3c481a71 100644 (file)
 #include "sysemu/block-backend.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/coroutines.h"
 #include "block/throttle-groups.h"
 #include "hw/qdev-core.h"
 #include "sysemu/blockdev.h"
 #include "sysemu/runstate.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/replay.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-block.h"
@@ -33,8 +33,6 @@
 
 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
 
-static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
-
 typedef struct BlockBackendAioNotifier {
     void (*attached_aio_context)(AioContext *new_context, void *opaque);
     void (*detach_aio_context)(void *opaque);
@@ -56,9 +54,6 @@ struct BlockBackend {
     const BlockDevOps *dev_ops;
     void *dev_opaque;
 
-    /* the block size for which the guest device expects atomicity */
-    int guest_block_size;
-
     /* If the BDS tree is removed, some of its options are stored here (which
      * can be used to restore those options in the new BDS on insert) */
     BlockBackendRootState root_state;
@@ -79,12 +74,14 @@ struct BlockBackend {
     bool allow_aio_context_change;
     bool allow_write_beyond_eof;
 
+    /* Protected by BQL */
     NotifierList remove_bs_notifiers, insert_bs_notifiers;
     QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
 
-    int quiesce_counter;
+    int quiesce_counter; /* atomic: written under BQL, read by other threads */
+    QemuMutex queued_requests_lock; /* protects queued_requests */
     CoQueue queued_requests;
-    bool disable_request_queuing;
+    bool disable_request_queuing; /* atomic */
 
     VMChangeStateEntry *vmsh;
     bool force_allow_inactivate;
@@ -104,22 +101,27 @@ typedef struct BlockBackendAIOCB {
 } BlockBackendAIOCB;
 
 static const AIOCBInfo block_backend_aiocb_info = {
-    .get_aio_context = blk_aiocb_get_aio_context,
     .aiocb_size = sizeof(BlockBackendAIOCB),
 };
 
 static void drive_info_del(DriveInfo *dinfo);
 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
 
-/* All BlockBackends */
+/* All BlockBackends. Protected by BQL. */
 static QTAILQ_HEAD(, BlockBackend) block_backends =
     QTAILQ_HEAD_INITIALIZER(block_backends);
 
-/* All BlockBackends referenced by the monitor and which are iterated through by
- * blk_next() */
+/*
+ * All BlockBackends referenced by the monitor and which are iterated through by
+ * blk_next(). Protected by BQL.
+ */
 static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
     QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
 
+static int coroutine_mixed_fn GRAPH_RDLOCK
+blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+                    Error **errp);
+
 static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
                                      int *child_flags, QDict *child_options,
                                      int parent_flags, QDict *parent_options)
@@ -129,32 +131,30 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
 }
 static void blk_root_drained_begin(BdrvChild *child);
 static bool blk_root_drained_poll(BdrvChild *child);
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
+static void blk_root_drained_end(BdrvChild *child);
 
 static void blk_root_change_media(BdrvChild *child, bool load);
 static void blk_root_resize(BdrvChild *child);
 
-static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
-                                     GSList **ignore, Error **errp);
-static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
-                                 GSList **ignore);
+static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx,
+                                    GHashTable *visited, Transaction *tran,
+                                    Error **errp);
 
 static char *blk_root_get_parent_desc(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
-    char *dev_id;
+    g_autofree char *dev_id = NULL;
 
     if (blk->name) {
-        return g_strdup(blk->name);
+        return g_strdup_printf("block device '%s'", blk->name);
     }
 
     dev_id = blk_get_attached_dev_id(blk);
     if (*dev_id) {
-        return dev_id;
+        return g_strdup_printf("block device '%s'", dev_id);
     } else {
         /* TODO Callback into the BB owner for something more detailed */
-        g_free(dev_id);
-        return g_strdup("a block device");
+        return g_strdup("an unnamed block device");
     }
 }
 
@@ -163,7 +163,7 @@ static const char *blk_root_get_name(BdrvChild *child)
     return blk_name(child->opaque);
 }
 
-static void blk_vm_state_changed(void *opaque, int running, RunState state)
+static void blk_vm_state_changed(void *opaque, bool running, RunState state)
 {
     Error *local_err = NULL;
     BlockBackend *blk = opaque;
@@ -187,10 +187,11 @@ static void blk_vm_state_changed(void *opaque, int running, RunState state)
  *
  * If an error is returned, the VM cannot be allowed to be resumed.
  */
-static void blk_root_activate(BdrvChild *child, Error **errp)
+static void GRAPH_RDLOCK blk_root_activate(BdrvChild *child, Error **errp)
 {
     BlockBackend *blk = child->opaque;
     Error *local_err = NULL;
+    uint64_t saved_shared_perm;
 
     if (!blk->disable_perm) {
         return;
@@ -198,12 +199,22 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
 
     blk->disable_perm = false;
 
-    blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
+    /*
+     * blk->shared_perm contains the permissions we want to share once
+     * migration is really completely done.  For now, we need to share
+     * all; but we also need to retain blk->shared_perm, which is
+     * overwritten by a successful blk_set_perm() call.  Save it and
+     * restore it below.
+     */
+    saved_shared_perm = blk->shared_perm;
+
+    blk_set_perm_locked(blk, blk->perm, BLK_PERM_ALL, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         blk->disable_perm = true;
         return;
     }
+    blk->shared_perm = saved_shared_perm;
 
     if (runstate_check(RUN_STATE_INMIGRATE)) {
         /* Activation can happen when migration process is still active, for
@@ -216,7 +227,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
         return;
     }
 
-    blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+    blk_set_perm_locked(blk, blk->perm, blk->shared_perm, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         blk->disable_perm = true;
@@ -226,6 +237,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
 
 void blk_set_force_allow_inactivate(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     blk->force_allow_inactivate = true;
 }
 
@@ -248,7 +260,7 @@ static bool blk_can_inactivate(BlockBackend *blk)
     return blk->force_allow_inactivate;
 }
 
-static int blk_root_inactivate(BdrvChild *child)
+static int GRAPH_RDLOCK blk_root_inactivate(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
 
@@ -298,6 +310,14 @@ static void blk_root_detach(BdrvChild *child)
     }
 }
 
+static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
+{
+    BlockBackend *blk = c->opaque;
+    IO_CODE();
+
+    return blk_get_aio_context(blk);
+}
+
 static const BdrvChildClass child_root = {
     .inherit_options    = blk_root_inherit_options,
 
@@ -316,8 +336,9 @@ static const BdrvChildClass child_root = {
     .attach             = blk_root_attach,
     .detach             = blk_root_detach,
 
-    .can_set_aio_ctx    = blk_root_can_set_aio_ctx,
-    .set_aio_ctx        = blk_root_set_aio_ctx,
+    .change_aio_ctx     = blk_root_change_aio_ctx,
+
+    .get_parent_aio_context = blk_root_get_parent_aio_context,
 };
 
 /*
@@ -335,6 +356,8 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
 {
     BlockBackend *blk;
 
+    GLOBAL_STATE_CODE();
+
     blk = g_new0(BlockBackend, 1);
     blk->refcnt = 1;
     blk->ctx = ctx;
@@ -347,6 +370,7 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
 
     block_acct_init(&blk->stats);
 
+    qemu_mutex_init(&blk->queued_requests_lock);
     qemu_co_queue_init(&blk->queued_requests);
     notifier_list_init(&blk->remove_bs_notifiers);
     notifier_list_init(&blk->insert_bs_notifiers);
@@ -366,12 +390,16 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
  * Both sets of permissions can be changed later using blk_set_perm().
  *
  * Return the new BlockBackend on success, null on failure.
+ *
+ * Callers must hold the AioContext lock of @bs.
  */
 BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
                               uint64_t shared_perm, Error **errp)
 {
     BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
 
+    GLOBAL_STATE_CODE();
+
     if (blk_insert_bs(blk, bs, errp) < 0) {
         blk_unref(blk);
         return NULL;
@@ -381,11 +409,15 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
 
 /*
  * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
- * The new BlockBackend is in the main AioContext.
+ * By default, the new BlockBackend is in the main AioContext, but if the
+ * parameters connect it with any existing node in a different AioContext, it
+ * may end up there instead.
  *
  * Just as with bdrv_open(), after having called this function the reference to
  * @options belongs to the block layer (even on failure).
  *
+ * Called without holding an AioContext lock.
+ *
  * TODO: Remove @filename and @flags; it should be possible to specify a whole
  * BDS tree just by specifying the @options QDict (or @reference,
  * alternatively). At the time of adding this function, this is not possible,
@@ -397,16 +429,23 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
 {
     BlockBackend *blk;
     BlockDriverState *bs;
+    AioContext *ctx;
     uint64_t perm = 0;
+    uint64_t shared = BLK_PERM_ALL;
+
+    GLOBAL_STATE_CODE();
 
-    /* blk_new_open() is mainly used in .bdrv_create implementations and the
-     * tools where sharing isn't a concern because the BDS stays private, so we
-     * just request permission according to the flags.
+    /*
+     * blk_new_open() is mainly used in .bdrv_create implementations and the
+     * tools where sharing isn't a major concern because the BDS stays private
+     * and the file is generally not supposed to be used by a second process,
+     * so we just request permission according to the flags.
      *
      * The exceptions are xen_disk and blockdev_init(); in these cases, the
      * caller of blk_new_open() doesn't make use of the permissions, but they
      * shouldn't hurt either. We can still share everything here because the
-     * guest devices will add their own blockers if they can't share. */
+     * guest devices will add their own blockers if they can't share.
+     */
     if ((flags & BDRV_O_NO_IO) == 0) {
         perm |= BLK_PERM_CONSISTENT_READ;
         if (flags & BDRV_O_RDWR) {
@@ -416,17 +455,28 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
     if (flags & BDRV_O_RESIZE) {
         perm |= BLK_PERM_RESIZE;
     }
+    if (flags & BDRV_O_NO_SHARE) {
+        shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
+    }
 
-    blk = blk_new(qemu_get_aio_context(), perm, BLK_PERM_ALL);
+    aio_context_acquire(qemu_get_aio_context());
     bs = bdrv_open(filename, reference, options, flags, errp);
+    aio_context_release(qemu_get_aio_context());
     if (!bs) {
-        blk_unref(blk);
         return NULL;
     }
 
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
-                                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                                       blk->ctx, perm, BLK_PERM_ALL, blk, errp);
+    /* bdrv_open() could have moved bs to a different AioContext */
+    ctx = bdrv_get_aio_context(bs);
+    blk = blk_new(bdrv_get_aio_context(bs), perm, shared);
+    blk->perm = perm;
+    blk->shared_perm = shared;
+
+    aio_context_acquire(ctx);
+    blk_insert_bs(blk, bs, errp);
+    bdrv_unref(bs);
+    aio_context_release(ctx);
+
     if (!blk->root) {
         blk_unref(blk);
         return NULL;
@@ -453,6 +503,8 @@ static void blk_delete(BlockBackend *blk)
     assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
     assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
     assert(QLIST_EMPTY(&blk->aio_notifiers));
+    assert(qemu_co_queue_empty(&blk->queued_requests));
+    qemu_mutex_destroy(&blk->queued_requests_lock);
     QTAILQ_REMOVE(&block_backends, blk, link);
     drive_info_del(blk->legacy_dinfo);
     block_acct_cleanup(&blk->stats);
@@ -470,6 +522,7 @@ static void drive_info_del(DriveInfo *dinfo)
 
 int blk_get_refcnt(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     return blk ? blk->refcnt : 0;
 }
 
@@ -480,6 +533,7 @@ int blk_get_refcnt(BlockBackend *blk)
 void blk_ref(BlockBackend *blk)
 {
     assert(blk->refcnt > 0);
+    GLOBAL_STATE_CODE();
     blk->refcnt++;
 }
 
@@ -490,6 +544,7 @@ void blk_ref(BlockBackend *blk)
  */
 void blk_unref(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     if (blk) {
         assert(blk->refcnt > 0);
         if (blk->refcnt > 1) {
@@ -510,6 +565,7 @@ void blk_unref(BlockBackend *blk)
  */
 BlockBackend *blk_all_next(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     return blk ? QTAILQ_NEXT(blk, link)
                : QTAILQ_FIRST(&block_backends);
 }
@@ -518,6 +574,8 @@ void blk_remove_all_bs(void)
 {
     BlockBackend *blk = NULL;
 
+    GLOBAL_STATE_CODE();
+
     while ((blk = blk_all_next(blk)) != NULL) {
         AioContext *ctx = blk_get_aio_context(blk);
 
@@ -541,6 +599,7 @@ void blk_remove_all_bs(void)
  */
 BlockBackend *blk_next(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     return blk ? QTAILQ_NEXT(blk, monitor_link)
                : QTAILQ_FIRST(&monitor_block_backends);
 }
@@ -607,6 +666,7 @@ static void bdrv_next_reset(BdrvNextIterator *it)
 
 BlockDriverState *bdrv_first(BdrvNextIterator *it)
 {
+    GLOBAL_STATE_CODE();
     bdrv_next_reset(it);
     return bdrv_next(it);
 }
@@ -644,6 +704,7 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
 {
     assert(!blk->name);
     assert(name && name[0]);
+    GLOBAL_STATE_CODE();
 
     if (!id_wellformed(name)) {
         error_setg(errp, "Invalid device name");
@@ -671,6 +732,8 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
  */
 void monitor_remove_blk(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
+
     if (!blk->name) {
         return;
     }
@@ -686,6 +749,7 @@ void monitor_remove_blk(BlockBackend *blk)
  */
 const char *blk_name(const BlockBackend *blk)
 {
+    IO_CODE();
     return blk->name ?: "";
 }
 
@@ -697,6 +761,7 @@ BlockBackend *blk_by_name(const char *name)
 {
     BlockBackend *blk = NULL;
 
+    GLOBAL_STATE_CODE();
     assert(name);
     while ((blk = blk_next(blk)) != NULL) {
         if (!strcmp(name, blk->name)) {
@@ -711,12 +776,17 @@ BlockBackend *blk_by_name(const char *name)
  */
 BlockDriverState *blk_bs(BlockBackend *blk)
 {
+    IO_CODE();
     return blk->root ? blk->root->bs : NULL;
 }
 
-static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
+static BlockBackend * GRAPH_RDLOCK bdrv_first_blk(BlockDriverState *bs)
 {
     BdrvChild *child;
+
+    GLOBAL_STATE_CODE();
+    assert_bdrv_graph_readable();
+
     QLIST_FOREACH(child, &bs->parents, next_parent) {
         if (child->klass == &child_root) {
             return child->opaque;
@@ -731,6 +801,7 @@ static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
  */
 bool bdrv_has_blk(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
     return bdrv_first_blk(bs) != NULL;
 }
 
@@ -741,6 +812,9 @@ bool bdrv_is_root_node(BlockDriverState *bs)
 {
     BdrvChild *c;
 
+    GLOBAL_STATE_CODE();
+    assert_bdrv_graph_readable();
+
     QLIST_FOREACH(c, &bs->parents, next_parent) {
         if (c->klass != &child_root) {
             return false;
@@ -755,6 +829,7 @@ bool bdrv_is_root_node(BlockDriverState *bs)
  */
 DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     return blk->legacy_dinfo;
 }
 
@@ -766,6 +841,7 @@ DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
 {
     assert(!blk->legacy_dinfo);
+    GLOBAL_STATE_CODE();
     return blk->legacy_dinfo = dinfo;
 }
 
@@ -776,6 +852,7 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
 {
     BlockBackend *blk = NULL;
+    GLOBAL_STATE_CODE();
 
     while ((blk = blk_next(blk)) != NULL) {
         if (blk->legacy_dinfo == dinfo) {
@@ -790,6 +867,7 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
  */
 BlockBackendPublic *blk_get_public(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     return &blk->public;
 }
 
@@ -798,25 +876,37 @@ BlockBackendPublic *blk_get_public(BlockBackend *blk)
  */
 BlockBackend *blk_by_public(BlockBackendPublic *public)
 {
+    GLOBAL_STATE_CODE();
     return container_of(public, BlockBackend, public);
 }
 
 /*
  * Disassociates the currently associated BlockDriverState from @blk.
+ *
+ * The caller must hold the AioContext lock for the BlockBackend.
  */
 void blk_remove_bs(BlockBackend *blk)
 {
     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
-    BlockDriverState *bs;
     BdrvChild *root;
+    AioContext *ctx;
+
+    GLOBAL_STATE_CODE();
 
     notifier_list_notify(&blk->remove_bs_notifiers, blk);
     if (tgm->throttle_state) {
-        bs = blk_bs(blk);
+        BlockDriverState *bs = blk_bs(blk);
+
+        /*
+         * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for
+         * example, if a temporary filter node is removed by a blockjob.
+         */
+        bdrv_ref(bs);
         bdrv_drained_begin(bs);
         throttle_group_detach_aio_context(tgm);
         throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
         bdrv_drained_end(bs);
+        bdrv_unref(bs);
     }
 
     blk_update_root_state(blk);
@@ -828,20 +918,31 @@ void blk_remove_bs(BlockBackend *blk)
     blk_drain(blk);
     root = blk->root;
     blk->root = NULL;
+
+    ctx = bdrv_get_aio_context(root->bs);
+    bdrv_graph_wrlock(root->bs);
     bdrv_root_unref_child(root);
+    bdrv_graph_wrunlock_ctx(ctx);
 }
 
 /*
  * Associates a new BlockDriverState with @blk.
+ *
+ * Callers must hold the AioContext lock of @bs.
  */
 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
 {
     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
+    AioContext *ctx = bdrv_get_aio_context(bs);
+
+    GLOBAL_STATE_CODE();
     bdrv_ref(bs);
+    bdrv_graph_wrlock(bs);
     blk->root = bdrv_root_attach_child(bs, "root", &child_root,
                                        BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                                       blk->ctx, blk->perm, blk->shared_perm,
+                                       blk->perm, blk->shared_perm,
                                        blk, errp);
+    bdrv_graph_wrunlock_ctx(ctx);
     if (blk->root == NULL) {
         return -EPERM;
     }
@@ -855,13 +956,24 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
     return 0;
 }
 
+/*
+ * Change BlockDriverState associated with @blk.
+ */
+int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
+{
+    GLOBAL_STATE_CODE();
+    return bdrv_replace_child_bs(blk->root, new_bs, errp);
+}
+
 /*
  * Sets the permission bitmasks that the user of the BlockBackend needs.
  */
-int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
-                 Error **errp)
+static int coroutine_mixed_fn GRAPH_RDLOCK
+blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+                    Error **errp)
 {
     int ret;
+    GLOBAL_STATE_CODE();
 
     if (blk->root && !blk->disable_perm) {
         ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
@@ -876,8 +988,18 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
     return 0;
 }
 
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+                 Error **errp)
+{
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    return blk_set_perm_locked(blk, perm, shared_perm, errp);
+}
+
 void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
 {
+    GLOBAL_STATE_CODE();
     *perm = blk->perm;
     *shared_perm = blk->shared_perm;
 }
@@ -888,6 +1010,7 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
  */
 int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
 {
+    GLOBAL_STATE_CODE();
     if (blk->dev) {
         return -EBUSY;
     }
@@ -913,10 +1036,10 @@ int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
 void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
 {
     assert(blk->dev == dev);
+    GLOBAL_STATE_CODE();
     blk->dev = NULL;
     blk->dev_ops = NULL;
     blk->dev_opaque = NULL;
-    blk->guest_block_size = 512;
     blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
     blk_unref(blk);
 }
@@ -926,6 +1049,7 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
  */
 DeviceState *blk_get_attached_dev(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     return blk->dev;
 }
 
@@ -934,6 +1058,7 @@ DeviceState *blk_get_attached_dev(BlockBackend *blk)
 char *blk_get_attached_dev_id(BlockBackend *blk)
 {
     DeviceState *dev = blk->dev;
+    IO_CODE();
 
     if (!dev) {
         return g_strdup("");
@@ -954,6 +1079,8 @@ BlockBackend *blk_by_dev(void *dev)
 {
     BlockBackend *blk = NULL;
 
+    GLOBAL_STATE_CODE();
+
     assert(dev != NULL);
     while ((blk = blk_all_next(blk)) != NULL) {
         if (blk->dev == dev) {
@@ -971,11 +1098,12 @@ BlockBackend *blk_by_dev(void *dev)
 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
                      void *opaque)
 {
+    GLOBAL_STATE_CODE();
     blk->dev_ops = ops;
     blk->dev_opaque = opaque;
 
     /* Are we currently quiesced? Should we enforce this right now? */
-    if (blk->quiesce_counter && ops->drained_begin) {
+    if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) {
         ops->drained_begin(opaque);
     }
 }
@@ -992,6 +1120,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
  */
 void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
 {
+    GLOBAL_STATE_CODE();
     if (blk->dev_ops && blk->dev_ops->change_media_cb) {
         bool tray_was_open, tray_is_open;
         Error *local_err = NULL;
@@ -1024,6 +1153,7 @@ static void blk_root_change_media(BdrvChild *child, bool load)
  */
 bool blk_dev_has_removable_media(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
 }
 
@@ -1032,6 +1162,7 @@ bool blk_dev_has_removable_media(BlockBackend *blk)
  */
 bool blk_dev_has_tray(BlockBackend *blk)
 {
+    IO_CODE();
     return blk->dev_ops && blk->dev_ops->is_tray_open;
 }
 
@@ -1041,6 +1172,7 @@ bool blk_dev_has_tray(BlockBackend *blk)
  */
 void blk_dev_eject_request(BlockBackend *blk, bool force)
 {
+    GLOBAL_STATE_CODE();
     if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
         blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
     }
@@ -1051,6 +1183,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force)
  */
 bool blk_dev_is_tray_open(BlockBackend *blk)
 {
+    IO_CODE();
     if (blk_dev_has_tray(blk)) {
         return blk->dev_ops->is_tray_open(blk->dev_opaque);
     }
@@ -1063,6 +1196,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk)
  */
 bool blk_dev_is_medium_locked(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
         return blk->dev_ops->is_medium_locked(blk->dev_opaque);
     }
@@ -1083,6 +1217,7 @@ static void blk_root_resize(BdrvChild *child)
 
 void blk_iostatus_enable(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     blk->iostatus_enabled = true;
     blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
 }
@@ -1091,6 +1226,7 @@ void blk_iostatus_enable(BlockBackend *blk)
  * enables it _and_ the VM is configured to stop on errors */
 bool blk_iostatus_is_enabled(const BlockBackend *blk)
 {
+    IO_CODE();
     return (blk->iostatus_enabled &&
            (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
             blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
@@ -1099,16 +1235,19 @@ bool blk_iostatus_is_enabled(const BlockBackend *blk)
 
 BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     return blk->iostatus;
 }
 
 void blk_iostatus_disable(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     blk->iostatus_enabled = false;
 }
 
 void blk_iostatus_reset(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     if (blk_iostatus_is_enabled(blk)) {
         blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
     }
@@ -1116,6 +1255,7 @@ void blk_iostatus_reset(BlockBackend *blk)
 
 void blk_iostatus_set_err(BlockBackend *blk, int error)
 {
+    IO_CODE();
     assert(blk_iostatus_is_enabled(blk));
     if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
         blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
@@ -1125,29 +1265,32 @@ void blk_iostatus_set_err(BlockBackend *blk, int error)
 
 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
 {
+    IO_CODE();
     blk->allow_write_beyond_eof = allow;
 }
 
 void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
 {
+    IO_CODE();
     blk->allow_aio_context_change = allow;
 }
 
 void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
 {
-    blk->disable_request_queuing = disable;
+    IO_CODE();
+    qatomic_set(&blk->disable_request_queuing, disable);
 }
 
-static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
-                                  size_t size)
+static int coroutine_fn GRAPH_RDLOCK
+blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes)
 {
     int64_t len;
 
-    if (size > INT_MAX) {
+    if (bytes < 0) {
         return -EIO;
     }
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
@@ -1156,12 +1299,12 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
     }
 
     if (!blk->allow_write_beyond_eof) {
-        len = blk_getlength(blk);
+        len = bdrv_co_getlength(blk_bs(blk));
         if (len < 0) {
             return len;
         }
 
-        if (offset > len || len - offset < size) {
+        if (offset > len || len - offset < bytes) {
             return -EIO;
         }
     }
@@ -1169,27 +1312,45 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
     return 0;
 }
 
+/* Are we currently in a drained section? */
+bool blk_in_drain(BlockBackend *blk)
+{
+    GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */
+    return qatomic_read(&blk->quiesce_counter);
+}
+
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
 static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
 {
     assert(blk->in_flight > 0);
 
-    if (blk->quiesce_counter && !blk->disable_request_queuing) {
+    if (qatomic_read(&blk->quiesce_counter) &&
+        !qatomic_read(&blk->disable_request_queuing)) {
+        /*
+         * Take lock before decrementing in flight counter so main loop thread
+         * waits for us to enqueue ourselves before it can leave the drained
+         * section.
+         */
+        qemu_mutex_lock(&blk->queued_requests_lock);
         blk_dec_in_flight(blk);
-        qemu_co_queue_wait(&blk->queued_requests, NULL);
+        qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock);
         blk_inc_in_flight(blk);
+        qemu_mutex_unlock(&blk->queued_requests_lock);
     }
 }
 
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
 static int coroutine_fn
-blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
-              QEMUIOVector *qiov, BdrvRequestFlags flags)
+blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes,
+                      QEMUIOVector *qiov, size_t qiov_offset,
+                      BdrvRequestFlags flags)
 {
     int ret;
     BlockDriverState *bs;
+    IO_CODE();
 
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
     /* Call blk_bs() only after waiting, the graph may have changed */
     bs = blk_bs(blk);
@@ -1205,22 +1366,49 @@ blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
     /* throttling disk I/O */
     if (blk->public.throttle_group_member.throttle_state) {
         throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
-                bytes, false);
+                bytes, THROTTLE_READ);
     }
 
-    ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+    ret = bdrv_co_preadv_part(blk->root, offset, bytes, qiov, qiov_offset,
+                              flags);
     bdrv_dec_in_flight(bs);
     return ret;
 }
 
+int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes,
+                              void *buf, BdrvRequestFlags flags)
+{
+    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+    IO_OR_GS_CODE();
+
+    assert(bytes <= SIZE_MAX);
+
+    return blk_co_preadv(blk, offset, bytes, &qiov, flags);
+}
+
 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
-                               unsigned int bytes, QEMUIOVector *qiov,
+                               int64_t bytes, QEMUIOVector *qiov,
                                BdrvRequestFlags flags)
 {
     int ret;
+    IO_OR_GS_CODE();
 
     blk_inc_in_flight(blk);
-    ret = blk_do_preadv(blk, offset, bytes, qiov, flags);
+    ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, 0, flags);
+    blk_dec_in_flight(blk);
+
+    return ret;
+}
+
+int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset,
+                                    int64_t bytes, QEMUIOVector *qiov,
+                                    size_t qiov_offset, BdrvRequestFlags flags)
+{
+    int ret;
+    IO_OR_GS_CODE();
+
+    blk_inc_in_flight(blk);
+    ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, qiov_offset, flags);
     blk_dec_in_flight(blk);
 
     return ret;
@@ -1228,14 +1416,16 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
 
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
 static int coroutine_fn
-blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
-                    QEMUIOVector *qiov, size_t qiov_offset,
-                    BdrvRequestFlags flags)
+blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, size_t qiov_offset,
+                       BdrvRequestFlags flags)
 {
     int ret;
     BlockDriverState *bs;
+    IO_CODE();
 
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
     /* Call blk_bs() only after waiting, the graph may have changed */
     bs = blk_bs(blk);
@@ -1250,7 +1440,7 @@ blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
     /* throttling disk I/O */
     if (blk->public.throttle_group_member.throttle_state) {
         throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
-                bytes, true);
+                bytes, THROTTLE_WRITE);
     }
 
     if (!blk->enable_write_cache) {
@@ -1264,100 +1454,85 @@ blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
 }
 
 int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
-                                     unsigned int bytes,
+                                     int64_t bytes,
                                      QEMUIOVector *qiov, size_t qiov_offset,
                                      BdrvRequestFlags flags)
 {
     int ret;
+    IO_OR_GS_CODE();
 
     blk_inc_in_flight(blk);
-    ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
+    ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
     blk_dec_in_flight(blk);
 
     return ret;
 }
 
-int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
-                                unsigned int bytes, QEMUIOVector *qiov,
-                                BdrvRequestFlags flags)
+int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes,
+                               const void *buf, BdrvRequestFlags flags)
 {
-    return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
-}
-
-typedef struct BlkRwCo {
-    BlockBackend *blk;
-    int64_t offset;
-    void *iobuf;
-    int ret;
-    BdrvRequestFlags flags;
-} BlkRwCo;
+    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+    IO_OR_GS_CODE();
 
-static void blk_read_entry(void *opaque)
-{
-    BlkRwCo *rwco = opaque;
-    QEMUIOVector *qiov = rwco->iobuf;
+    assert(bytes <= SIZE_MAX);
 
-    rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size,
-                              qiov, rwco->flags);
-    aio_wait_kick();
+    return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
 }
 
-static void blk_write_entry(void *opaque)
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+                                int64_t bytes, QEMUIOVector *qiov,
+                                BdrvRequestFlags flags)
 {
-    BlkRwCo *rwco = opaque;
-    QEMUIOVector *qiov = rwco->iobuf;
-
-    rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size,
-                                    qiov, 0, rwco->flags);
-    aio_wait_kick();
+    IO_OR_GS_CODE();
+    return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
 }
 
-static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
-                   int64_t bytes, CoroutineEntry co_entry,
-                   BdrvRequestFlags flags)
+int coroutine_fn blk_co_block_status_above(BlockBackend *blk,
+                                           BlockDriverState *base,
+                                           int64_t offset, int64_t bytes,
+                                           int64_t *pnum, int64_t *map,
+                                           BlockDriverState **file)
 {
-    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-    BlkRwCo rwco = {
-        .blk    = blk,
-        .offset = offset,
-        .iobuf  = &qiov,
-        .flags  = flags,
-        .ret    = NOT_DONE,
-    };
-
-    blk_inc_in_flight(blk);
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        co_entry(&rwco);
-    } else {
-        Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
-        bdrv_coroutine_enter(blk_bs(blk), co);
-        BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
-    }
-    blk_dec_in_flight(blk);
-
-    return rwco.ret;
+    IO_CODE();
+    GRAPH_RDLOCK_GUARD();
+    return bdrv_co_block_status_above(blk_bs(blk), base, offset, bytes, pnum,
+                                      map, file);
 }
 
-int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                      int bytes, BdrvRequestFlags flags)
+int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk,
+                                           BlockDriverState *base,
+                                           bool include_base, int64_t offset,
+                                           int64_t bytes, int64_t *pnum)
 {
-    return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
-                   flags | BDRV_REQ_ZERO_WRITE);
+    IO_CODE();
+    GRAPH_RDLOCK_GUARD();
+    return bdrv_co_is_allocated_above(blk_bs(blk), base, include_base, offset,
+                                      bytes, pnum);
 }
 
+typedef struct BlkRwCo {
+    BlockBackend *blk;
+    int64_t offset;
+    void *iobuf;
+    int ret;
+    BdrvRequestFlags flags;
+} BlkRwCo;
+
 int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
 {
+    GLOBAL_STATE_CODE();
     return bdrv_make_zero(blk->root, flags);
 }
 
 void blk_inc_in_flight(BlockBackend *blk)
 {
+    IO_CODE();
     qatomic_inc(&blk->in_flight);
 }
 
 void blk_dec_in_flight(BlockBackend *blk)
 {
+    IO_CODE();
     qatomic_dec(&blk->in_flight);
     aio_wait_kick();
 }
@@ -1376,13 +1551,14 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
                                   void *opaque, int ret)
 {
     struct BlockBackendAIOCB *acb;
+    IO_CODE();
 
     blk_inc_in_flight(blk);
     acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
     acb->blk = blk;
     acb->ret = ret;
 
-    replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+    replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
                                      error_callback_bh, acb);
     return &acb->common;
 }
@@ -1390,20 +1566,12 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
 typedef struct BlkAioEmAIOCB {
     BlockAIOCB common;
     BlkRwCo rwco;
-    int bytes;
+    int64_t bytes;
     bool has_returned;
 } BlkAioEmAIOCB;
 
-static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_)
-{
-    BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common);
-
-    return blk_get_aio_context(acb->rwco.blk);
-}
-
 static const AIOCBInfo blk_aio_em_aiocb_info = {
     .aiocb_size         = sizeof(BlkAioEmAIOCB),
-    .get_aio_context    = blk_aio_em_aiocb_get_aio_context,
 };
 
 static void blk_aio_complete(BlkAioEmAIOCB *acb)
@@ -1422,7 +1590,8 @@ static void blk_aio_complete_bh(void *opaque)
     blk_aio_complete(acb);
 }
 
-static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
+static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
+                                int64_t bytes,
                                 void *iobuf, CoroutineEntry co_entry,
                                 BdrvRequestFlags flags,
                                 BlockCompletionFunc *cb, void *opaque)
@@ -1443,100 +1612,117 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
     acb->has_returned = false;
 
     co = qemu_coroutine_create(co_entry, acb);
-    bdrv_coroutine_enter(blk_bs(blk), co);
+    aio_co_enter(qemu_get_current_aio_context(), co);
 
     acb->has_returned = true;
     if (acb->rwco.ret != NOT_DONE) {
-        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+        replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
                                          blk_aio_complete_bh, acb);
     }
 
     return &acb->common;
 }
 
-static void blk_aio_read_entry(void *opaque)
+static void coroutine_fn blk_aio_read_entry(void *opaque)
 {
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
     QEMUIOVector *qiov = rwco->iobuf;
 
     assert(qiov->size == acb->bytes);
-    rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
-                              qiov, rwco->flags);
+    rwco->ret = blk_co_do_preadv_part(rwco->blk, rwco->offset, acb->bytes, qiov,
+                                      0, rwco->flags);
     blk_aio_complete(acb);
 }
 
-static void blk_aio_write_entry(void *opaque)
+static void coroutine_fn blk_aio_write_entry(void *opaque)
 {
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
     QEMUIOVector *qiov = rwco->iobuf;
 
     assert(!qiov || qiov->size == acb->bytes);
-    rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
-                                    qiov, 0, rwco->flags);
+    rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
+                                       qiov, 0, rwco->flags);
     blk_aio_complete(acb);
 }
 
 BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                  int count, BdrvRequestFlags flags,
+                                  int64_t bytes, BdrvRequestFlags flags,
                                   BlockCompletionFunc *cb, void *opaque)
 {
-    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
+    IO_CODE();
+    return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry,
                         flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
 }
 
-int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
+int64_t coroutine_fn blk_co_getlength(BlockBackend *blk)
 {
-    int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
-    if (ret < 0) {
-        return ret;
-    }
-    return count;
-}
+    IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
-               BdrvRequestFlags flags)
-{
-    int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
-                      flags);
-    if (ret < 0) {
-        return ret;
+    if (!blk_co_is_available(blk)) {
+        return -ENOMEDIUM;
     }
-    return count;
+
+    return bdrv_co_getlength(blk_bs(blk));
 }
 
-int64_t blk_getlength(BlockBackend *blk)
+int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk)
 {
-    if (!blk_is_available(blk)) {
+    BlockDriverState *bs = blk_bs(blk);
+
+    IO_CODE();
+    GRAPH_RDLOCK_GUARD();
+
+    if (!bs) {
         return -ENOMEDIUM;
+    } else {
+        return bdrv_co_nb_sectors(bs);
     }
-
-    return bdrv_getlength(blk_bs(blk));
 }
 
-void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
+/*
+ * This wrapper is written by hand because this function is in the hot I/O path,
+ * via blk_get_geometry.
+ */
+int64_t coroutine_mixed_fn blk_nb_sectors(BlockBackend *blk)
 {
-    if (!blk_bs(blk)) {
-        *nb_sectors_ptr = 0;
+    BlockDriverState *bs = blk_bs(blk);
+
+    IO_CODE();
+
+    if (!bs) {
+        return -ENOMEDIUM;
     } else {
-        bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
+        return bdrv_nb_sectors(bs);
     }
 }
 
-int64_t blk_nb_sectors(BlockBackend *blk)
+/* return 0 as number of sectors if no device present or error */
+void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
+                                      uint64_t *nb_sectors_ptr)
 {
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
+    int64_t ret = blk_co_nb_sectors(blk);
+    *nb_sectors_ptr = ret < 0 ? 0 : ret;
+}
 
-    return bdrv_nb_sectors(blk_bs(blk));
+/*
+ * This wrapper is written by hand because this function is in the hot I/O path.
+ */
+void coroutine_mixed_fn blk_get_geometry(BlockBackend *blk,
+                                         uint64_t *nb_sectors_ptr)
+{
+    int64_t ret = blk_nb_sectors(blk);
+    *nb_sectors_ptr = ret < 0 ? 0 : ret;
 }
 
 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
                            QEMUIOVector *qiov, BdrvRequestFlags flags,
                            BlockCompletionFunc *cb, void *opaque)
 {
+    IO_CODE();
+    assert((uint64_t)qiov->size <= INT64_MAX);
     return blk_aio_prwv(blk, offset, qiov->size, qiov,
                         blk_aio_read_entry, flags, cb, opaque);
 }
@@ -1545,53 +1731,59 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
                             QEMUIOVector *qiov, BdrvRequestFlags flags,
                             BlockCompletionFunc *cb, void *opaque)
 {
+    IO_CODE();
+    assert((uint64_t)qiov->size <= INT64_MAX);
     return blk_aio_prwv(blk, offset, qiov->size, qiov,
                         blk_aio_write_entry, flags, cb, opaque);
 }
 
 void blk_aio_cancel(BlockAIOCB *acb)
 {
+    GLOBAL_STATE_CODE();
     bdrv_aio_cancel(acb);
 }
 
 void blk_aio_cancel_async(BlockAIOCB *acb)
 {
+    IO_CODE();
     bdrv_aio_cancel_async(acb);
 }
 
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
 static int coroutine_fn
-blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
+    IO_CODE();
+
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
     return bdrv_co_ioctl(blk_bs(blk), req, buf);
 }
 
-static void blk_ioctl_entry(void *opaque)
+int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req,
+                              void *buf)
 {
-    BlkRwCo *rwco = opaque;
-    QEMUIOVector *qiov = rwco->iobuf;
+    int ret;
+    IO_OR_GS_CODE();
 
-    rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base);
-    aio_wait_kick();
-}
+    blk_inc_in_flight(blk);
+    ret = blk_co_do_ioctl(blk, req, buf);
+    blk_dec_in_flight(blk);
 
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
-{
-    return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
+    return ret;
 }
 
-static void blk_aio_ioctl_entry(void *opaque)
+static void coroutine_fn blk_aio_ioctl_entry(void *opaque)
 {
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
 
-    rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
+    rwco->ret = blk_co_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
 
     blk_aio_complete(acb);
 }
@@ -1599,16 +1791,19 @@ static void blk_aio_ioctl_entry(void *opaque)
 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                           BlockCompletionFunc *cb, void *opaque)
 {
+    IO_CODE();
     return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
 }
 
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
 static int coroutine_fn
-blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
 {
     int ret;
+    IO_CODE();
 
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
     ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
@@ -1618,112 +1813,294 @@ blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
     return bdrv_co_pdiscard(blk->root, offset, bytes);
 }
 
-static void blk_aio_pdiscard_entry(void *opaque)
+static void coroutine_fn blk_aio_pdiscard_entry(void *opaque)
 {
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
 
-    rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
+    rwco->ret = blk_co_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
     blk_aio_complete(acb);
 }
 
 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
-                             int64_t offset, int bytes,
+                             int64_t offset, int64_t bytes,
                              BlockCompletionFunc *cb, void *opaque)
 {
+    IO_CODE();
     return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
                         cb, opaque);
 }
 
-int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
+                                 int64_t bytes)
 {
     int ret;
+    IO_OR_GS_CODE();
 
     blk_inc_in_flight(blk);
-    ret = blk_do_pdiscard(blk, offset, bytes);
+    ret = blk_co_do_pdiscard(blk, offset, bytes);
     blk_dec_in_flight(blk);
 
     return ret;
 }
 
-static void blk_pdiscard_entry(void *opaque)
-{
-    BlkRwCo *rwco = opaque;
-    QEMUIOVector *qiov = rwco->iobuf;
-
-    rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size);
-    aio_wait_kick();
-}
-
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
-{
-    return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
-}
-
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
-static int coroutine_fn blk_do_flush(BlockBackend *blk)
+static int coroutine_fn blk_co_do_flush(BlockBackend *blk)
 {
+    IO_CODE();
     blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
 
-    if (!blk_is_available(blk)) {
+    if (!blk_co_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
     return bdrv_co_flush(blk_bs(blk));
 }
 
-static void blk_aio_flush_entry(void *opaque)
+static void coroutine_fn blk_aio_flush_entry(void *opaque)
 {
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
 
-    rwco->ret = blk_do_flush(rwco->blk);
+    rwco->ret = blk_co_do_flush(rwco->blk);
     blk_aio_complete(acb);
 }
 
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
                           BlockCompletionFunc *cb, void *opaque)
 {
+    IO_CODE();
     return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
 }
 
 int coroutine_fn blk_co_flush(BlockBackend *blk)
 {
     int ret;
+    IO_OR_GS_CODE();
 
     blk_inc_in_flight(blk);
-    ret = blk_do_flush(blk);
+    ret = blk_co_do_flush(blk);
     blk_dec_in_flight(blk);
 
     return ret;
 }
 
-static void blk_flush_entry(void *opaque)
+static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
 {
-    BlkRwCo *rwco = opaque;
-    rwco->ret = blk_do_flush(rwco->blk);
-    aio_wait_kick();
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
+                                   (unsigned int*)(uintptr_t)acb->bytes,
+                                   rwco->iobuf);
+    blk_aio_complete(acb);
 }
 
-int blk_flush(BlockBackend *blk)
+BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
+                                unsigned int *nr_zones,
+                                BlockZoneDescriptor  *zones,
+                                BlockCompletionFunc *cb, void *opaque)
 {
-    return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
+    BlkAioEmAIOCB *acb;
+    Coroutine *co;
+    IO_CODE();
+
+    blk_inc_in_flight(blk);
+    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+    acb->rwco = (BlkRwCo) {
+        .blk    = blk,
+        .offset = offset,
+        .iobuf  = zones,
+        .ret    = NOT_DONE,
+    };
+    acb->bytes = (int64_t)(uintptr_t)nr_zones,
+    acb->has_returned = false;
+
+    co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
+    aio_co_enter(qemu_get_current_aio_context(), co);
+
+    acb->has_returned = true;
+    if (acb->rwco.ret != NOT_DONE) {
+        replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
+                                         blk_aio_complete_bh, acb);
+    }
+
+    return &acb->common;
+}
+
+static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_co_zone_mgmt(rwco->blk,
+                                 (BlockZoneOp)(uintptr_t)rwco->iobuf,
+                                 rwco->offset, acb->bytes);
+    blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+                              int64_t offset, int64_t len,
+                              BlockCompletionFunc *cb, void *opaque) {
+    BlkAioEmAIOCB *acb;
+    Coroutine *co;
+    IO_CODE();
+
+    blk_inc_in_flight(blk);
+    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+    acb->rwco = (BlkRwCo) {
+        .blk    = blk,
+        .offset = offset,
+        .iobuf  = (void *)(uintptr_t)op,
+        .ret    = NOT_DONE,
+    };
+    acb->bytes = len;
+    acb->has_returned = false;
+
+    co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
+    aio_co_enter(qemu_get_current_aio_context(), co);
+
+    acb->has_returned = true;
+    if (acb->rwco.ret != NOT_DONE) {
+        replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
+                                         blk_aio_complete_bh, acb);
+    }
+
+    return &acb->common;
+}
+
+static void coroutine_fn blk_aio_zone_append_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes,
+                                   rwco->iobuf, rwco->flags);
+    blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
+                                QEMUIOVector *qiov, BdrvRequestFlags flags,
+                                BlockCompletionFunc *cb, void *opaque) {
+    BlkAioEmAIOCB *acb;
+    Coroutine *co;
+    IO_CODE();
+
+    blk_inc_in_flight(blk);
+    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+    acb->rwco = (BlkRwCo) {
+        .blk    = blk,
+        .ret    = NOT_DONE,
+        .flags  = flags,
+        .iobuf  = qiov,
+    };
+    acb->bytes = (int64_t)(uintptr_t)offset;
+    acb->has_returned = false;
+
+    co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
+    aio_co_enter(qemu_get_current_aio_context(), co);
+    acb->has_returned = true;
+    if (acb->rwco.ret != NOT_DONE) {
+        replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
+                                         blk_aio_complete_bh, acb);
+    }
+
+    return &acb->common;
+}
+
+/*
+ * Send a zone_report command.
+ * offset is a byte offset from the start of the device. No alignment
+ * required for offset.
+ * nr_zones represents IN maximum and OUT actual.
+ */
+int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
+                                    unsigned int *nr_zones,
+                                    BlockZoneDescriptor *zones)
+{
+    int ret;
+    IO_CODE();
+
+    blk_inc_in_flight(blk); /* increase before waiting */
+    blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
+    if (!blk_is_available(blk)) {
+        blk_dec_in_flight(blk);
+        return -ENOMEDIUM;
+    }
+    ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
+    blk_dec_in_flight(blk);
+    return ret;
+}
+
+/*
+ * Send a zone_management command.
+ * op is the zone operation;
+ * offset is the byte offset from the start of the zoned device;
+ * len is the maximum number of bytes the command should operate on. It
+ * should be aligned with the device zone size.
+ */
+int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+        int64_t offset, int64_t len)
+{
+    int ret;
+    IO_CODE();
+
+    blk_inc_in_flight(blk);
+    blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
+
+    ret = blk_check_byte_request(blk, offset, len);
+    if (ret < 0) {
+        blk_dec_in_flight(blk);
+        return ret;
+    }
+
+    ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
+    blk_dec_in_flight(blk);
+    return ret;
+}
+
+/*
+ * Send a zone_append command.
+ */
+int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
+        QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+    int ret;
+    IO_CODE();
+
+    blk_inc_in_flight(blk);
+    blk_wait_while_drained(blk);
+    GRAPH_RDLOCK_GUARD();
+    if (!blk_is_available(blk)) {
+        blk_dec_in_flight(blk);
+        return -ENOMEDIUM;
+    }
+
+    ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
+    blk_dec_in_flight(blk);
+    return ret;
 }
 
 void blk_drain(BlockBackend *blk)
 {
     BlockDriverState *bs = blk_bs(blk);
+    GLOBAL_STATE_CODE();
 
     if (bs) {
+        bdrv_ref(bs);
         bdrv_drained_begin(bs);
     }
 
     /* We may have -ENOMEDIUM completions in flight */
     AIO_WAIT_WHILE(blk_get_aio_context(blk),
-                   qatomic_mb_read(&blk->in_flight) > 0);
+                   qatomic_read(&blk->in_flight) > 0);
 
     if (bs) {
         bdrv_drained_end(bs);
+        bdrv_unref(bs);
     }
 }
 
@@ -1731,17 +2108,13 @@ void blk_drain_all(void)
 {
     BlockBackend *blk = NULL;
 
+    GLOBAL_STATE_CODE();
+
     bdrv_drain_all_begin();
 
     while ((blk = blk_all_next(blk)) != NULL) {
-        AioContext *ctx = blk_get_aio_context(blk);
-
-        aio_context_acquire(ctx);
-
         /* We may have -ENOMEDIUM completions in flight */
-        AIO_WAIT_WHILE(ctx, qatomic_mb_read(&blk->in_flight) > 0);
-
-        aio_context_release(ctx);
+        AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0);
     }
 
     bdrv_drain_all_end();
@@ -1750,12 +2123,14 @@ void blk_drain_all(void)
 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
                       BlockdevOnError on_write_error)
 {
+    GLOBAL_STATE_CODE();
     blk->on_read_error = on_read_error;
     blk->on_write_error = on_write_error;
 }
 
 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
 {
+    IO_CODE();
     return is_read ? blk->on_read_error : blk->on_write_error;
 }
 
@@ -1763,6 +2138,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
                                       int error)
 {
     BlockdevOnError on_err = blk_get_on_error(blk, is_read);
+    IO_CODE();
 
     switch (on_err) {
     case BLOCKDEV_ON_ERROR_ENOSPC:
@@ -1788,7 +2164,7 @@ static void send_qmp_error_event(BlockBackend *blk,
     BlockDriverState *bs = blk_bs(blk);
 
     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-    qapi_event_send_block_io_error(blk_name(blk), !!bs,
+    qapi_event_send_block_io_error(blk_name(blk),
                                    bs ? bdrv_get_node_name(bs) : NULL, optype,
                                    action, blk_iostatus_is_enabled(blk),
                                    error == ENOSPC, strerror(error));
@@ -1802,6 +2178,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action,
                       bool is_read, int error)
 {
     assert(error >= 0);
+    IO_CODE();
 
     if (action == BLOCK_ERROR_ACTION_STOP) {
         /* First set the iostatus, so that "info block" returns an iostatus
@@ -1833,11 +2210,12 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action,
 bool blk_supports_write_perm(BlockBackend *blk)
 {
     BlockDriverState *bs = blk_bs(blk);
+    GLOBAL_STATE_CODE();
 
     if (bs) {
         return !bdrv_is_read_only(bs);
     } else {
-        return !blk->root_state.read_only;
+        return blk->root_state.open_flags & BDRV_O_RDWR;
     }
 }
 
@@ -1847,12 +2225,14 @@ bool blk_supports_write_perm(BlockBackend *blk)
  */
 bool blk_is_writable(BlockBackend *blk)
 {
+    IO_CODE();
     return blk->perm & BLK_PERM_WRITE;
 }
 
 bool blk_is_sg(BlockBackend *blk)
 {
     BlockDriverState *bs = blk_bs(blk);
+    GLOBAL_STATE_CODE();
 
     if (!bs) {
         return false;
@@ -1863,54 +2243,73 @@ bool blk_is_sg(BlockBackend *blk)
 
 bool blk_enable_write_cache(BlockBackend *blk)
 {
+    IO_CODE();
     return blk->enable_write_cache;
 }
 
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
 {
+    IO_CODE();
     blk->enable_write_cache = wce;
 }
 
-void blk_invalidate_cache(BlockBackend *blk, Error **errp)
+void blk_activate(BlockBackend *blk, Error **errp)
 {
     BlockDriverState *bs = blk_bs(blk);
+    GLOBAL_STATE_CODE();
 
     if (!bs) {
         error_setg(errp, "Device '%s' has no medium", blk->name);
         return;
     }
 
-    bdrv_invalidate_cache(bs, errp);
+    /*
+     * Migration code can call this function in coroutine context, so leave
+     * coroutine context if necessary.
+     */
+    if (qemu_in_coroutine()) {
+        bdrv_co_activate(bs, errp);
+    } else {
+        GRAPH_RDLOCK_GUARD_MAINLOOP();
+        bdrv_activate(bs, errp);
+    }
 }
 
-bool blk_is_inserted(BlockBackend *blk)
+bool coroutine_fn blk_co_is_inserted(BlockBackend *blk)
 {
     BlockDriverState *bs = blk_bs(blk);
+    IO_CODE();
+    assert_bdrv_graph_readable();
 
-    return bs && bdrv_is_inserted(bs);
+    return bs && bdrv_co_is_inserted(bs);
 }
 
-bool blk_is_available(BlockBackend *blk)
+bool coroutine_fn blk_co_is_available(BlockBackend *blk)
 {
-    return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
+    IO_CODE();
+    return blk_co_is_inserted(blk) && !blk_dev_is_tray_open(blk);
 }
 
-void blk_lock_medium(BlockBackend *blk, bool locked)
+void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked)
 {
     BlockDriverState *bs = blk_bs(blk);
+    IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     if (bs) {
-        bdrv_lock_medium(bs, locked);
+        bdrv_co_lock_medium(bs, locked);
     }
 }
 
-void blk_eject(BlockBackend *blk, bool eject_flag)
+void coroutine_fn blk_co_eject(BlockBackend *blk, bool eject_flag)
 {
     BlockDriverState *bs = blk_bs(blk);
     char *id;
+    IO_CODE();
+    GRAPH_RDLOCK_GUARD();
 
     if (bs) {
-        bdrv_eject(bs, eject_flag);
+        bdrv_co_eject(bs, eject_flag);
     }
 
     /* Whether or not we ejected on the backend,
@@ -1924,6 +2323,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag)
 int blk_get_flags(BlockBackend *blk)
 {
     BlockDriverState *bs = blk_bs(blk);
+    GLOBAL_STATE_CODE();
 
     if (bs) {
         return bdrv_get_flags(bs);
@@ -1936,44 +2336,67 @@ int blk_get_flags(BlockBackend *blk)
 uint32_t blk_get_request_alignment(BlockBackend *blk)
 {
     BlockDriverState *bs = blk_bs(blk);
+    IO_CODE();
     return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
 }
 
+/* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero */
+uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
+{
+    BlockDriverState *bs = blk_bs(blk);
+    uint64_t max = INT_MAX;
+    IO_CODE();
+
+    if (bs) {
+        max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer);
+        max = MIN_NON_ZERO(max, bs->bl.max_transfer);
+    }
+    return ROUND_DOWN(max, blk_get_request_alignment(blk));
+}
+
 /* Returns the maximum transfer length, in bytes; guaranteed nonzero */
 uint32_t blk_get_max_transfer(BlockBackend *blk)
 {
     BlockDriverState *bs = blk_bs(blk);
-    uint32_t max = 0;
+    uint32_t max = INT_MAX;
+    IO_CODE();
 
     if (bs) {
-        max = bs->bl.max_transfer;
+        max = MIN_NON_ZERO(max, bs->bl.max_transfer);
     }
-    return MIN_NON_ZERO(max, INT_MAX);
+    return ROUND_DOWN(max, blk_get_request_alignment(blk));
 }
 
-int blk_get_max_iov(BlockBackend *blk)
+int blk_get_max_hw_iov(BlockBackend *blk)
 {
-    return blk->root->bs->bl.max_iov;
+    IO_CODE();
+    return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov,
+                        blk->root->bs->bl.max_iov);
 }
 
-void blk_set_guest_block_size(BlockBackend *blk, int align)
+int blk_get_max_iov(BlockBackend *blk)
 {
-    blk->guest_block_size = align;
+    IO_CODE();
+    return blk->root->bs->bl.max_iov;
 }
 
 void *blk_try_blockalign(BlockBackend *blk, size_t size)
 {
+    IO_CODE();
     return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
 }
 
 void *blk_blockalign(BlockBackend *blk, size_t size)
 {
+    IO_CODE();
     return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
 }
 
 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
 {
     BlockDriverState *bs = blk_bs(blk);
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
     if (!bs) {
         return false;
@@ -1985,6 +2408,7 @@ bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
 {
     BlockDriverState *bs = blk_bs(blk);
+    GLOBAL_STATE_CODE();
 
     if (bs) {
         bdrv_op_unblock(bs, op, reason);
@@ -1994,6 +2418,7 @@ void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
 void blk_op_block_all(BlockBackend *blk, Error *reason)
 {
     BlockDriverState *bs = blk_bs(blk);
+    GLOBAL_STATE_CODE();
 
     if (bs) {
         bdrv_op_block_all(bs, reason);
@@ -2003,6 +2428,7 @@ void blk_op_block_all(BlockBackend *blk, Error *reason)
 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
 {
     BlockDriverState *bs = blk_bs(blk);
+    GLOBAL_STATE_CODE();
 
     if (bs) {
         bdrv_op_unblock_all(bs, reason);
@@ -2011,8 +2437,14 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
 
 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
-    BlockDriverState *bs = blk_bs(blk);
+    BlockDriverState *bs;
+    IO_CODE();
 
+    if (!blk) {
+        return qemu_get_aio_context();
+    }
+
+    bs = blk_bs(blk);
     if (bs) {
         AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
         assert(ctx == blk->ctx);
@@ -2021,78 +2453,94 @@ AioContext *blk_get_aio_context(BlockBackend *blk)
     return blk->ctx;
 }
 
-static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
-{
-    BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
-    return blk_get_aio_context(blk_acb->blk);
-}
-
-static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
-                                  bool update_root_node, Error **errp)
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+                        Error **errp)
 {
+    bool old_allow_change;
     BlockDriverState *bs = blk_bs(blk);
-    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
     int ret;
 
-    if (bs) {
-        if (update_root_node) {
-            ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root,
-                                                 errp);
-            if (ret < 0) {
-                return ret;
-            }
-        }
-        if (tgm->throttle_state) {
-            bdrv_drained_begin(bs);
-            throttle_group_detach_aio_context(tgm);
-            throttle_group_attach_aio_context(tgm, new_context);
-            bdrv_drained_end(bs);
-        }
+    GLOBAL_STATE_CODE();
+
+    if (!bs) {
+        blk->ctx = new_context;
+        return 0;
     }
 
-    blk->ctx = new_context;
-    return 0;
+    bdrv_ref(bs);
+
+    old_allow_change = blk->allow_aio_context_change;
+    blk->allow_aio_context_change = true;
+
+    ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp);
+
+    blk->allow_aio_context_change = old_allow_change;
+
+    bdrv_unref(bs);
+    return ret;
 }
 
-int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
-                        Error **errp)
+typedef struct BdrvStateBlkRootContext {
+    AioContext *new_ctx;
+    BlockBackend *blk;
+} BdrvStateBlkRootContext;
+
+static void blk_root_set_aio_ctx_commit(void *opaque)
 {
-    return blk_do_set_aio_context(blk, new_context, true, errp);
+    BdrvStateBlkRootContext *s = opaque;
+    BlockBackend *blk = s->blk;
+    AioContext *new_context = s->new_ctx;
+    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
+
+    blk->ctx = new_context;
+    if (tgm->throttle_state) {
+        throttle_group_detach_aio_context(tgm);
+        throttle_group_attach_aio_context(tgm, new_context);
+    }
 }
 
-static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
-                                     GSList **ignore, Error **errp)
+static TransactionActionDrv set_blk_root_context = {
+    .commit = blk_root_set_aio_ctx_commit,
+    .clean = g_free,
+};
+
+static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx,
+                                    GHashTable *visited, Transaction *tran,
+                                    Error **errp)
 {
     BlockBackend *blk = child->opaque;
+    BdrvStateBlkRootContext *s;
 
-    if (blk->allow_aio_context_change) {
-        return true;
+    if (!blk->allow_aio_context_change) {
+        /*
+         * Manually created BlockBackends (those with a name) that are not
+         * attached to anything can change their AioContext without updating
+         * their user; return an error for others.
+         */
+        if (!blk->name || blk->dev) {
+            /* TODO Add BB name/QOM path */
+            error_setg(errp, "Cannot change iothread of active block backend");
+            return false;
+        }
     }
 
-    /* Only manually created BlockBackends that are not attached to anything
-     * can change their AioContext without updating their user. */
-    if (!blk->name || blk->dev) {
-        /* TODO Add BB name/QOM path */
-        error_setg(errp, "Cannot change iothread of active block backend");
-        return false;
-    }
+    s = g_new(BdrvStateBlkRootContext, 1);
+    *s = (BdrvStateBlkRootContext) {
+        .new_ctx = ctx,
+        .blk = blk,
+    };
 
+    tran_add(tran, &set_blk_root_context, s);
     return true;
 }
 
-static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
-                                 GSList **ignore)
-{
-    BlockBackend *blk = child->opaque;
-    blk_do_set_aio_context(blk, ctx, false, &error_abort);
-}
-
 void blk_add_aio_context_notifier(BlockBackend *blk,
         void (*attached_aio_context)(AioContext *new_context, void *opaque),
         void (*detach_aio_context)(void *opaque), void *opaque)
 {
     BlockBackendAioNotifier *notifier;
     BlockDriverState *bs = blk_bs(blk);
+    GLOBAL_STATE_CODE();
 
     notifier = g_new(BlockBackendAioNotifier, 1);
     notifier->attached_aio_context = attached_aio_context;
@@ -2115,6 +2563,8 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
     BlockBackendAioNotifier *notifier;
     BlockDriverState *bs = blk_bs(blk);
 
+    GLOBAL_STATE_CODE();
+
     if (bs) {
         bdrv_remove_aio_context_notifier(bs, attached_aio_context,
                                          detach_aio_context, opaque);
@@ -2135,72 +2585,65 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
 
 void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
 {
+    GLOBAL_STATE_CODE();
     notifier_list_add(&blk->remove_bs_notifiers, notify);
 }
 
 void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
 {
+    GLOBAL_STATE_CODE();
     notifier_list_add(&blk->insert_bs_notifiers, notify);
 }
 
-void blk_io_plug(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_io_plug(bs);
-    }
-}
-
-void blk_io_unplug(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_io_unplug(bs);
-    }
-}
-
 BlockAcctStats *blk_get_stats(BlockBackend *blk)
 {
+    IO_CODE();
     return &blk->stats;
 }
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
                   BlockCompletionFunc *cb, void *opaque)
 {
+    IO_CODE();
     return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
 }
 
 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                      int bytes, BdrvRequestFlags flags)
+                                      int64_t bytes, BdrvRequestFlags flags)
 {
+    IO_OR_GS_CODE();
     return blk_co_pwritev(blk, offset, bytes, NULL,
                           flags | BDRV_REQ_ZERO_WRITE);
 }
 
-int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
-                          int count)
+int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset,
+                                          int64_t bytes, const void *buf)
 {
-    return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
-                   BDRV_REQ_WRITE_COMPRESSED);
+    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+    IO_OR_GS_CODE();
+    return blk_co_pwritev_part(blk, offset, bytes, &qiov, 0,
+                               BDRV_REQ_WRITE_COMPRESSED);
 }
 
-int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
-                 PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
+int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact,
+                                 PreallocMode prealloc, BdrvRequestFlags flags,
+                                 Error **errp)
 {
-    if (!blk_is_available(blk)) {
+    IO_OR_GS_CODE();
+    GRAPH_RDLOCK_GUARD();
+    if (!blk_co_is_available(blk)) {
         error_setg(errp, "No medium inserted");
         return -ENOMEDIUM;
     }
 
-    return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp);
+    return bdrv_co_truncate(blk->root, offset, exact, prealloc, flags, errp);
 }
 
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
                      int64_t pos, int size)
 {
     int ret;
+    GLOBAL_STATE_CODE();
 
     if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
@@ -2220,6 +2663,7 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
 
 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
 {
+    GLOBAL_STATE_CODE();
     if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
@@ -2229,6 +2673,9 @@ int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
 
 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
 {
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
@@ -2238,6 +2685,7 @@ int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
 
 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
 {
+    GLOBAL_STATE_CODE();
     if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
@@ -2251,10 +2699,10 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
  */
 void blk_update_root_state(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     assert(blk->root);
 
     blk->root_state.open_flags    = blk->root->bs->open_flags;
-    blk->root_state.read_only     = blk->root->bs->read_only;
     blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
 }
 
@@ -2264,6 +2712,7 @@ void blk_update_root_state(BlockBackend *blk)
  */
 bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     return blk->root_state.detect_zeroes;
 }
 
@@ -2273,22 +2722,21 @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
  */
 int blk_get_open_flags_from_root_state(BlockBackend *blk)
 {
-    int bs_flags;
-
-    bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
-    bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
-
-    return bs_flags;
+    GLOBAL_STATE_CODE();
+    return blk->root_state.open_flags;
 }
 
 BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     return &blk->root_state;
 }
 
 int blk_commit_all(void)
 {
     BlockBackend *blk = NULL;
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
     while ((blk = blk_all_next(blk)) != NULL) {
         AioContext *aio_context = blk_get_aio_context(blk);
@@ -2313,6 +2761,7 @@ int blk_commit_all(void)
 /* throttling disk I/O limits */
 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
 {
+    GLOBAL_STATE_CODE();
     throttle_group_config(&blk->public.throttle_group_member, cfg);
 }
 
@@ -2321,12 +2770,15 @@ void blk_io_limits_disable(BlockBackend *blk)
     BlockDriverState *bs = blk_bs(blk);
     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
     assert(tgm->throttle_state);
+    GLOBAL_STATE_CODE();
     if (bs) {
+        bdrv_ref(bs);
         bdrv_drained_begin(bs);
     }
     throttle_group_unregister_tgm(tgm);
     if (bs) {
         bdrv_drained_end(bs);
+        bdrv_unref(bs);
     }
 }
 
@@ -2334,12 +2786,14 @@ void blk_io_limits_disable(BlockBackend *blk)
 void blk_io_limits_enable(BlockBackend *blk, const char *group)
 {
     assert(!blk->public.throttle_group_member.throttle_state);
+    GLOBAL_STATE_CODE();
     throttle_group_register_tgm(&blk->public.throttle_group_member,
                                 group, blk_get_aio_context(blk));
 }
 
 void blk_io_limits_update_group(BlockBackend *blk, const char *group)
 {
+    GLOBAL_STATE_CODE();
     /* this BB is not part of any group */
     if (!blk->public.throttle_group_member.throttle_state) {
         return;
@@ -2361,7 +2815,7 @@ static void blk_root_drained_begin(BdrvChild *child)
     BlockBackend *blk = child->opaque;
     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
 
-    if (++blk->quiesce_counter == 1) {
+    if (qatomic_fetch_inc(&blk->quiesce_counter) == 0) {
         if (blk->dev_ops && blk->dev_ops->drained_begin) {
             blk->dev_ops->drained_begin(blk->dev_opaque);
         }
@@ -2378,44 +2832,68 @@ static void blk_root_drained_begin(BdrvChild *child)
 static bool blk_root_drained_poll(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
-    assert(blk->quiesce_counter);
-    return !!blk->in_flight;
+    bool busy = false;
+    assert(qatomic_read(&blk->quiesce_counter));
+
+    if (blk->dev_ops && blk->dev_ops->drained_poll) {
+        busy = blk->dev_ops->drained_poll(blk->dev_opaque);
+    }
+    return busy || !!blk->in_flight;
 }
 
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
+static void blk_root_drained_end(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
-    assert(blk->quiesce_counter);
+    assert(qatomic_read(&blk->quiesce_counter));
 
     assert(blk->public.throttle_group_member.io_limits_disabled);
     qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
 
-    if (--blk->quiesce_counter == 0) {
+    if (qatomic_fetch_dec(&blk->quiesce_counter) == 1) {
         if (blk->dev_ops && blk->dev_ops->drained_end) {
             blk->dev_ops->drained_end(blk->dev_opaque);
         }
-        while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
+        qemu_mutex_lock(&blk->queued_requests_lock);
+        while (qemu_co_enter_next(&blk->queued_requests,
+                                  &blk->queued_requests_lock)) {
             /* Resume all queued requests */
         }
+        qemu_mutex_unlock(&blk->queued_requests_lock);
     }
 }
 
-void blk_register_buf(BlockBackend *blk, void *host, size_t size)
+bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp)
 {
-    bdrv_register_buf(blk_bs(blk), host, size);
+    BlockDriverState *bs = blk_bs(blk);
+
+    GLOBAL_STATE_CODE();
+
+    if (bs) {
+        return bdrv_register_buf(bs, host, size, errp);
+    }
+    return true;
 }
 
-void blk_unregister_buf(BlockBackend *blk, void *host)
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size)
 {
-    bdrv_unregister_buf(blk_bs(blk), host);
+    BlockDriverState *bs = blk_bs(blk);
+
+    GLOBAL_STATE_CODE();
+
+    if (bs) {
+        bdrv_unregister_buf(bs, host, size);
+    }
 }
 
 int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
                                    BlockBackend *blk_out, int64_t off_out,
-                                   int bytes, BdrvRequestFlags read_flags,
+                                   int64_t bytes, BdrvRequestFlags read_flags,
                                    BdrvRequestFlags write_flags)
 {
     int r;
+    IO_CODE();
+    GRAPH_RDLOCK_GUARD();
+
     r = blk_check_byte_request(blk_in, off_in, bytes);
     if (r) {
         return r;
@@ -2424,6 +2902,7 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
     if (r) {
         return r;
     }
+
     return bdrv_co_copy_range(blk_in->root, off_in,
                               blk_out->root, off_out,
                               bytes, read_flags, write_flags);
@@ -2431,11 +2910,15 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
 
 const BdrvChild *blk_root(BlockBackend *blk)
 {
+    GLOBAL_STATE_CODE();
     return blk->root;
 }
 
 int blk_make_empty(BlockBackend *blk, Error **errp)
 {
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!blk_is_available(blk)) {
         error_setg(errp, "No medium inserted");
         return -ENOMEDIUM;
index cd9bc47c8f6d0509fe7240b53341092458b6b487..9ee3dd7ef57b3695a5fc9c66bd76fcda9fc93fc0 100644 (file)
 #include "trace.h"
 #include "qapi/error.h"
 #include "block/block-copy.h"
+#include "block/block_int-io.h"
+#include "block/dirty-bitmap.h"
+#include "block/reqlist.h"
 #include "sysemu/block-backend.h"
 #include "qemu/units.h"
+#include "qemu/co-shared-resource.h"
 #include "qemu/coroutine.h"
+#include "qemu/ratelimit.h"
 #include "block/aio_task.h"
+#include "qemu/error-report.h"
+#include "qemu/memalign.h"
 
 #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
 #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
 #define BLOCK_COPY_MAX_MEM (128 * MiB)
 #define BLOCK_COPY_MAX_WORKERS 64
+#define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
+#define BLOCK_COPY_CLUSTER_SIZE_DEFAULT (1 << 16)
+
+typedef enum {
+    COPY_READ_WRITE_CLUSTER,
+    COPY_READ_WRITE,
+    COPY_WRITE_ZEROES,
+    COPY_RANGE_SMALL,
+    COPY_RANGE_FULL
+} BlockCopyMethod;
 
 static coroutine_fn int block_copy_task_entry(AioTask *task);
 
 typedef struct BlockCopyCallState {
-    bool failed;
+    /* Fields initialized in block_copy_async() and never changed. */
+    BlockCopyState *s;
+    int64_t offset;
+    int64_t bytes;
+    int max_workers;
+    int64_t max_chunk;
+    bool ignore_ratelimit;
+    BlockCopyAsyncCallbackFunc cb;
+    void *cb_opaque;
+    /* Coroutine where async block-copy is running */
+    Coroutine *co;
+
+    /* Fields whose state changes throughout the execution */
+    bool finished; /* atomic */
+    QemuCoSleep sleep; /* TODO: protect API with a lock */
+    bool cancelled; /* atomic */
+    /* To reference all call states from BlockCopyState */
+    QLIST_ENTRY(BlockCopyCallState) list;
+
+    /*
+     * Fields that report information about return values and errors.
+     * Protected by lock in BlockCopyState.
+     */
     bool error_is_read;
+    /*
+     * @ret is set concurrently by tasks under mutex. Only set once by first
+     * failed task (and untouched if no task failed).
+     * After finishing (call_state->finished is true), it is not modified
+     * anymore and may be safely read without mutex.
+     */
+    int ret;
 } BlockCopyCallState;
 
 typedef struct BlockCopyTask {
     AioTask task;
 
+    /*
+     * Fields initialized in block_copy_task_create()
+     * and never changed.
+     */
     BlockCopyState *s;
     BlockCopyCallState *call_state;
-    int64_t offset;
-    int64_t bytes;
-    bool zeroes;
-    QLIST_ENTRY(BlockCopyTask) list;
-    CoQueue wait_queue; /* coroutines blocked on this task */
+    /*
+     * @method can also be set again in the while loop of
+     * block_copy_dirty_clusters(), but it is never accessed concurrently
+     * because the only other function that reads it is
+     * block_copy_task_entry() and it is invoked afterwards in the same
+     * iteration.
+     */
+    BlockCopyMethod method;
+
+    /*
+     * Generally, req is protected by lock in BlockCopyState, Still req.offset
+     * is only set on task creation, so may be read concurrently after creation.
+     * req.bytes is changed at most once, and need only protecting the case of
+     * parallel read while updating @bytes value in block_copy_task_shrink().
+     */
+    BlockReq req;
 } BlockCopyTask;
 
 static int64_t task_end(BlockCopyTask *task)
 {
-    return task->offset + task->bytes;
+    return task->req.offset + task->req.bytes;
 }
 
 typedef struct BlockCopyState {
@@ -59,16 +120,25 @@ typedef struct BlockCopyState {
      */
     BdrvChild *source;
     BdrvChild *target;
-    BdrvDirtyBitmap *copy_bitmap;
-    int64_t in_flight_bytes;
+
+    /*
+     * Fields initialized in block_copy_state_new()
+     * and never changed.
+     */
     int64_t cluster_size;
-    bool use_copy_range;
-    int64_t copy_size;
+    int64_t max_transfer;
     uint64_t len;
-    QLIST_HEAD(, BlockCopyTask) tasks;
-
     BdrvRequestFlags write_flags;
 
+    /*
+     * Fields whose state changes throughout the execution
+     * Protected by lock.
+     */
+    CoMutex lock;
+    int64_t in_flight_bytes;
+    BlockCopyMethod method;
+    BlockReqList reqs;
+    QLIST_HEAD(, BlockCopyCallState) calls;
     /*
      * skip_unallocated:
      *
@@ -83,61 +153,49 @@ typedef struct BlockCopyState {
      * skip unallocated regions, clear them in the copy_bitmap, and invoke
      * block_copy_reset_unallocated() every time it does.
      */
-    bool skip_unallocated;
-
+    bool skip_unallocated; /* atomic */
+    /* State fields that use a thread-safe API */
+    BdrvDirtyBitmap *copy_bitmap;
     ProgressMeter *progress;
-    /* progress_bytes_callback: called when some copying progress is done. */
-    ProgressBytesCallbackFunc progress_bytes_callback;
-    void *progress_opaque;
-
     SharedResource *mem;
+    RateLimit rate_limit;
 } BlockCopyState;
 
-static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
-                                            int64_t offset, int64_t bytes)
+/* Called with lock held */
+static int64_t block_copy_chunk_size(BlockCopyState *s)
 {
-    BlockCopyTask *t;
-
-    QLIST_FOREACH(t, &s->tasks, list) {
-        if (offset + bytes > t->offset && offset < t->offset + t->bytes) {
-            return t;
-        }
+    switch (s->method) {
+    case COPY_READ_WRITE_CLUSTER:
+        return s->cluster_size;
+    case COPY_READ_WRITE:
+    case COPY_RANGE_SMALL:
+        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
+                   s->max_transfer);
+    case COPY_RANGE_FULL:
+        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
+                   s->max_transfer);
+    default:
+        /* Cannot have COPY_WRITE_ZEROES here.  */
+        abort();
     }
-
-    return NULL;
-}
-
-/*
- * If there are no intersecting tasks return false. Otherwise, wait for the
- * first found intersecting tasks to finish and return true.
- */
-static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
-                                             int64_t bytes)
-{
-    BlockCopyTask *task = find_conflicting_task(s, offset, bytes);
-
-    if (!task) {
-        return false;
-    }
-
-    qemu_co_queue_wait(&task->wait_queue, NULL);
-
-    return true;
 }
 
 /*
  * Search for the first dirty area in offset/bytes range and create task at
  * the beginning of it.
  */
-static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
-                                             BlockCopyCallState *call_state,
-                                             int64_t offset, int64_t bytes)
+static coroutine_fn BlockCopyTask *
+block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state,
+                       int64_t offset, int64_t bytes)
 {
     BlockCopyTask *task;
+    int64_t max_chunk;
 
+    QEMU_LOCK_GUARD(&s->lock);
+    max_chunk = MIN_NON_ZERO(block_copy_chunk_size(s), call_state->max_chunk);
     if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
                                            offset, offset + bytes,
-                                           s->copy_size, &offset, &bytes))
+                                           max_chunk, &offset, &bytes))
     {
         return NULL;
     }
@@ -146,7 +204,7 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
     bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
 
     /* region is dirty, so no existent tasks possible in it */
-    assert(!find_conflicting_task(s, offset, bytes));
+    assert(!reqlist_find_conflict(&s->reqs, offset, bytes));
 
     bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
     s->in_flight_bytes += bytes;
@@ -156,11 +214,9 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
         .task.func = block_copy_task_entry,
         .s = s,
         .call_state = call_state,
-        .offset = offset,
-        .bytes = bytes,
+        .method = s->method,
     };
-    qemu_co_queue_init(&task->wait_queue);
-    QLIST_INSERT_HEAD(&s->tasks, task, list);
+    reqlist_init_req(&s->reqs, &task->req, offset, bytes);
 
     return task;
 }
@@ -175,28 +231,35 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
 static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
                                                 int64_t new_bytes)
 {
-    if (new_bytes == task->bytes) {
+    QEMU_LOCK_GUARD(&task->s->lock);
+    if (new_bytes == task->req.bytes) {
         return;
     }
 
-    assert(new_bytes > 0 && new_bytes < task->bytes);
+    assert(new_bytes > 0 && new_bytes < task->req.bytes);
 
-    task->s->in_flight_bytes -= task->bytes - new_bytes;
+    task->s->in_flight_bytes -= task->req.bytes - new_bytes;
     bdrv_set_dirty_bitmap(task->s->copy_bitmap,
-                          task->offset + new_bytes, task->bytes - new_bytes);
+                          task->req.offset + new_bytes,
+                          task->req.bytes - new_bytes);
 
-    task->bytes = new_bytes;
-    qemu_co_queue_restart_all(&task->wait_queue);
+    reqlist_shrink_req(&task->req, new_bytes);
 }
 
 static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
 {
-    task->s->in_flight_bytes -= task->bytes;
+    QEMU_LOCK_GUARD(&task->s->lock);
+    task->s->in_flight_bytes -= task->req.bytes;
     if (ret < 0) {
-        bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes);
+        bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->req.offset,
+                              task->req.bytes);
+    }
+    if (task->s->progress) {
+        progress_set_remaining(task->s->progress,
+                               bdrv_get_dirty_count(task->s->copy_bitmap) +
+                               task->s->in_flight_bytes);
     }
-    QLIST_REMOVE(task, list);
-    qemu_co_queue_restart_all(&task->wait_queue);
+    reqlist_remove_req(&task->req);
 }
 
 void block_copy_state_free(BlockCopyState *s)
@@ -205,6 +268,7 @@ void block_copy_state_free(BlockCopyState *s)
         return;
     }
 
+    ratelimit_destroy(&s->rate_limit);
     bdrv_release_dirty_bitmap(s->copy_bitmap);
     shres_destroy(s->mem);
     g_free(s);
@@ -217,12 +281,91 @@ static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
                                      target->bs->bl.max_transfer));
 }
 
+void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+                              bool compress)
+{
+    /* Keep BDRV_REQ_SERIALISING set (or not set) in block_copy_state_new() */
+    s->write_flags = (s->write_flags & BDRV_REQ_SERIALISING) |
+        (compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
+
+    if (s->max_transfer < s->cluster_size) {
+        /*
+         * copy_range does not respect max_transfer. We don't want to bother
+         * with requests smaller than block-copy cluster size, so fallback to
+         * buffered copying (read and write respect max_transfer on their
+         * behalf).
+         */
+        s->method = COPY_READ_WRITE_CLUSTER;
+    } else if (compress) {
+        /* Compression supports only cluster-size writes and no copy-range. */
+        s->method = COPY_READ_WRITE_CLUSTER;
+    } else {
+        /*
+         * If copy range enabled, start with COPY_RANGE_SMALL, until first
+         * successful copy_range (look at block_copy_do_copy).
+         */
+        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
+    }
+}
+
+static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+                                                 Error **errp)
+{
+    int ret;
+    BlockDriverInfo bdi;
+    bool target_does_cow;
+
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    target_does_cow = bdrv_backing_chain_next(target);
+
+    /*
+     * If there is no backing file on the target, we cannot rely on COW if our
+     * backup cluster size is smaller than the target cluster size. Even for
+     * targets with a backing file, try to avoid COW if possible.
+     */
+    ret = bdrv_get_info(target, &bdi);
+    if (ret == -ENOTSUP && !target_does_cow) {
+        /* Cluster size is not defined */
+        warn_report("The target block device doesn't provide "
+                    "information about the block size and it doesn't have a "
+                    "backing file. The default block size of %u bytes is "
+                    "used. If the actual block size of the target exceeds "
+                    "this default, the backup may be unusable",
+                    BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+    } else if (ret < 0 && !target_does_cow) {
+        error_setg_errno(errp, -ret,
+            "Couldn't determine the cluster size of the target image, "
+            "which has no backing file");
+        error_append_hint(errp,
+            "Aborting, since this may create an unusable destination image\n");
+        return ret;
+    } else if (ret < 0 && target_does_cow) {
+        /* Not fatal; just trudge on ahead. */
+        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+    }
+
+    return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+}
+
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-                                     int64_t cluster_size,
-                                     BdrvRequestFlags write_flags, Error **errp)
+                                     const BdrvDirtyBitmap *bitmap,
+                                     Error **errp)
 {
+    ERRP_GUARD();
     BlockCopyState *s;
+    int64_t cluster_size;
     BdrvDirtyBitmap *copy_bitmap;
+    bool is_fleecing;
+
+    GLOBAL_STATE_CODE();
+
+    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
+    if (cluster_size < 0) {
+        return NULL;
+    }
 
     copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
                                            errp);
@@ -230,6 +373,35 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
         return NULL;
     }
     bdrv_disable_dirty_bitmap(copy_bitmap);
+    if (bitmap) {
+        if (!bdrv_merge_dirty_bitmap(copy_bitmap, bitmap, NULL, errp)) {
+            error_prepend(errp, "Failed to merge bitmap '%s' to internal "
+                          "copy-bitmap: ", bdrv_dirty_bitmap_name(bitmap));
+            bdrv_release_dirty_bitmap(copy_bitmap);
+            return NULL;
+        }
+    } else {
+        bdrv_set_dirty_bitmap(copy_bitmap, 0,
+                              bdrv_dirty_bitmap_size(copy_bitmap));
+    }
+
+    /*
+     * If source is in backing chain of target assume that target is going to be
+     * used for "image fleecing", i.e. it should represent a kind of snapshot of
+     * source at backup-start point in time. And target is going to be read by
+     * somebody (for example, used as NBD export) during backup job.
+     *
+     * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
+     * intersection of backup writes and third party reads from target,
+     * otherwise reading from target we may occasionally read already updated by
+     * guest data.
+     *
+     * For more information see commit f8d59dfb40bb and test
+     * tests/qemu-iotests/222
+     */
+    bdrv_graph_rdlock_main_loop();
+    is_fleecing = bdrv_chain_contains(target->bs, source->bs);
+    bdrv_graph_rdunlock_main_loop();
 
     s = g_new(BlockCopyState, 1);
     *s = (BlockCopyState) {
@@ -238,46 +410,24 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
         .copy_bitmap = copy_bitmap,
         .cluster_size = cluster_size,
         .len = bdrv_dirty_bitmap_size(copy_bitmap),
-        .write_flags = write_flags,
+        .write_flags = (is_fleecing ? BDRV_REQ_SERIALISING : 0),
         .mem = shres_create(BLOCK_COPY_MAX_MEM),
+        .max_transfer = QEMU_ALIGN_DOWN(
+                                    block_copy_max_transfer(source, target),
+                                    cluster_size),
     };
 
-    if (block_copy_max_transfer(source, target) < cluster_size) {
-        /*
-         * copy_range does not respect max_transfer. We don't want to bother
-         * with requests smaller than block-copy cluster size, so fallback to
-         * buffered copying (read and write respect max_transfer on their
-         * behalf).
-         */
-        s->use_copy_range = false;
-        s->copy_size = cluster_size;
-    } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
-        /* Compression supports only cluster-size writes and no copy-range. */
-        s->use_copy_range = false;
-        s->copy_size = cluster_size;
-    } else {
-        /*
-         * We enable copy-range, but keep small copy_size, until first
-         * successful copy_range (look at block_copy_do_copy).
-         */
-        s->use_copy_range = true;
-        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
-    }
+    block_copy_set_copy_opts(s, false, false);
 
-    QLIST_INIT(&s->tasks);
+    ratelimit_init(&s->rate_limit);
+    qemu_co_mutex_init(&s->lock);
+    QLIST_INIT(&s->reqs);
+    QLIST_INIT(&s->calls);
 
     return s;
 }
 
-void block_copy_set_progress_callback(
-        BlockCopyState *s,
-        ProgressBytesCallbackFunc progress_bytes_callback,
-        void *progress_opaque)
-{
-    s->progress_bytes_callback = progress_bytes_callback;
-    s->progress_opaque = progress_opaque;
-}
-
+/* Only set before running the job, no need for locking. */
 void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
 {
     s->progress = pm;
@@ -304,7 +454,7 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
 
     aio_task_pool_wait_slot(pool);
     if (aio_task_pool_status(pool) < 0) {
-        co_put_to_shres(task->s->mem, task->bytes);
+        co_put_to_shres(task->s->mem, task->req.bytes);
         block_copy_task_end(task, -ECANCELED);
         g_free(task);
         return -ECANCELED;
@@ -321,13 +471,16 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
  * Do copy of cluster-aligned chunk. Requested region is allowed to exceed
  * s->len only to cover last cluster when s->len is not aligned to clusters.
  *
- * No sync here: nor bitmap neighter intersecting requests handling, only copy.
+ * No sync here: neither bitmap nor intersecting requests handling, only copy.
  *
+ * @method is an in-out argument, so that copy_range can be either extended to
+ * a full-size buffer or disabled if the copy_range attempt fails.  The output
+ * value of @method should be used for subsequent tasks.
  * Returns 0 on success.
  */
-static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
-                                           int64_t offset, int64_t bytes,
-                                           bool zeroes, bool *error_is_read)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_do_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
+                   BlockCopyMethod *method, bool *error_is_read)
 {
     int ret;
     int64_t nbytes = MIN(offset + bytes, s->len) - offset;
@@ -341,7 +494,8 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
            offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
     assert(nbytes < INT_MAX);
 
-    if (zeroes) {
+    switch (*method) {
+    case COPY_WRITE_ZEROES:
         ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
                                     ~BDRV_REQ_WRITE_COMPRESSED);
         if (ret < 0) {
@@ -349,105 +503,110 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
             *error_is_read = false;
         }
         return ret;
-    }
 
-    if (s->use_copy_range) {
+    case COPY_RANGE_SMALL:
+    case COPY_RANGE_FULL:
         ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
                                  0, s->write_flags);
+        if (ret >= 0) {
+            /* Successful copy-range, increase chunk size.  */
+            *method = COPY_RANGE_FULL;
+            return 0;
+        }
+
+        trace_block_copy_copy_range_fail(s, offset, ret);
+        *method = COPY_READ_WRITE;
+        /* Fall through to read+write with allocated buffer */
+
+    case COPY_READ_WRITE_CLUSTER:
+    case COPY_READ_WRITE:
+        /*
+         * In case of failed copy_range request above, we may proceed with
+         * buffered request larger than BLOCK_COPY_MAX_BUFFER.
+         * Still, further requests will be properly limited, so don't care too
+         * much. Moreover the most likely case (copy_range is unsupported for
+         * the configuration, so the very first copy_range request fails)
+         * is handled by setting large copy_size only after first successful
+         * copy_range.
+         */
+
+        bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
+
+        ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
         if (ret < 0) {
-            trace_block_copy_copy_range_fail(s, offset, ret);
-            s->use_copy_range = false;
-            s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
-            /* Fallback to read+write with allocated buffer */
-        } else {
-            if (s->use_copy_range) {
-                /*
-                 * Successful copy-range. Now increase copy_size.  copy_range
-                 * does not respect max_transfer (it's a TODO), so we factor
-                 * that in here.
-                 *
-                 * Note: we double-check s->use_copy_range for the case when
-                 * parallel block-copy request unsets it during previous
-                 * bdrv_co_copy_range call.
-                 */
-                s->copy_size =
-                        MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
-                            QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
-                                                                    s->target),
-                                            s->cluster_size));
-            }
+            trace_block_copy_read_fail(s, offset, ret);
+            *error_is_read = true;
             goto out;
         }
-    }
 
-    /*
-     * In case of failed copy_range request above, we may proceed with buffered
-     * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will
-     * be properly limited, so don't care too much. Moreover the most likely
-     * case (copy_range is unsupported for the configuration, so the very first
-     * copy_range request fails) is handled by setting large copy_size only
-     * after first successful copy_range.
-     */
+        ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
+                             s->write_flags);
+        if (ret < 0) {
+            trace_block_copy_write_fail(s, offset, ret);
+            *error_is_read = false;
+            goto out;
+        }
 
-    bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
+    out:
+        qemu_vfree(bounce_buffer);
+        break;
 
-    ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
-    if (ret < 0) {
-        trace_block_copy_read_fail(s, offset, ret);
-        *error_is_read = true;
-        goto out;
+    default:
+        abort();
     }
 
-    ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
-                         s->write_flags);
-    if (ret < 0) {
-        trace_block_copy_write_fail(s, offset, ret);
-        *error_is_read = false;
-        goto out;
-    }
-
-out:
-    qemu_vfree(bounce_buffer);
-
     return ret;
 }
 
 static coroutine_fn int block_copy_task_entry(AioTask *task)
 {
     BlockCopyTask *t = container_of(task, BlockCopyTask, task);
+    BlockCopyState *s = t->s;
     bool error_is_read = false;
+    BlockCopyMethod method = t->method;
     int ret;
 
-    ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes,
-                             &error_is_read);
-    if (ret < 0 && !t->call_state->failed) {
-        t->call_state->failed = true;
-        t->call_state->error_is_read = error_is_read;
-    } else {
-        progress_work_done(t->s->progress, t->bytes);
-        t->s->progress_bytes_callback(t->bytes, t->s->progress_opaque);
+    WITH_GRAPH_RDLOCK_GUARD() {
+        ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method,
+                                 &error_is_read);
     }
-    co_put_to_shres(t->s->mem, t->bytes);
+
+    WITH_QEMU_LOCK_GUARD(&s->lock) {
+        if (s->method == t->method) {
+            s->method = method;
+        }
+
+        if (ret < 0) {
+            if (!t->call_state->ret) {
+                t->call_state->ret = ret;
+                t->call_state->error_is_read = error_is_read;
+            }
+        } else if (s->progress) {
+            progress_work_done(s->progress, t->req.bytes);
+        }
+    }
+    co_put_to_shres(s->mem, t->req.bytes);
     block_copy_task_end(t, ret);
 
     return ret;
 }
 
-static int block_copy_block_status(BlockCopyState *s, int64_t offset,
-                                   int64_t bytes, int64_t *pnum)
+static coroutine_fn GRAPH_RDLOCK
+int block_copy_block_status(BlockCopyState *s, int64_t offset, int64_t bytes,
+                            int64_t *pnum)
 {
     int64_t num;
     BlockDriverState *base;
     int ret;
 
-    if (s->skip_unallocated) {
+    if (qatomic_read(&s->skip_unallocated)) {
         base = bdrv_backing_chain_next(s->source->bs);
     } else {
         base = NULL;
     }
 
-    ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num,
-                                  NULL, NULL);
+    ret = bdrv_co_block_status_above(s->source->bs, base, offset, bytes, &num,
+                                     NULL, NULL);
     if (ret < 0 || num < s->cluster_size) {
         /*
          * On error or if failed to obtain large enough chunk just fallback to
@@ -469,8 +628,9 @@ static int block_copy_block_status(BlockCopyState *s, int64_t offset,
  * Check if the cluster starting at offset is allocated or not.
  * return via pnum the number of contiguous clusters sharing this allocation.
  */
-static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
-                                           int64_t *pnum)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
+                                int64_t *pnum)
 {
     BlockDriverState *bs = s->source->bs;
     int64_t count, total_count = 0;
@@ -480,7 +640,8 @@ static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
 
     while (true) {
-        ret = bdrv_is_allocated(bs, offset, bytes, &count);
+        /* protected in backup_run() */
+        ret = bdrv_co_is_allocated(bs, offset, bytes, &count);
         if (ret < 0) {
             return ret;
         }
@@ -507,14 +668,27 @@ static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
     }
 }
 
+void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes)
+{
+    QEMU_LOCK_GUARD(&s->lock);
+
+    bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
+    if (s->progress) {
+        progress_set_remaining(s->progress,
+                               bdrv_get_dirty_count(s->copy_bitmap) +
+                               s->in_flight_bytes);
+    }
+}
+
 /*
  * Reset bits in copy_bitmap starting at offset if they represent unallocated
  * data in the image. May reset subsequent contiguous bits.
  * @return 0 when the cluster at @offset was unallocated,
  *         1 otherwise, and -ret on error.
  */
-int64_t block_copy_reset_unallocated(BlockCopyState *s,
-                                     int64_t offset, int64_t *count)
+int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
+                                                  int64_t offset,
+                                                  int64_t *count)
 {
     int ret;
     int64_t clusters, bytes;
@@ -527,10 +701,7 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
     bytes = clusters * s->cluster_size;
 
     if (!ret) {
-        bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
-        progress_set_remaining(s->progress,
-                               bdrv_get_dirty_count(s->copy_bitmap) +
-                               s->in_flight_bytes);
+        block_copy_reset(s, offset, bytes);
     }
 
     *count = bytes;
@@ -544,15 +715,17 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
  * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
  * clusters found and -errno on failure.
  */
-static int coroutine_fn block_copy_dirty_clusters(BlockCopyState *s,
-                                                  int64_t offset, int64_t bytes,
-                                                  bool *error_is_read)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_dirty_clusters(BlockCopyCallState *call_state)
 {
+    BlockCopyState *s = call_state->s;
+    int64_t offset = call_state->offset;
+    int64_t bytes = call_state->bytes;
+
     int ret = 0;
     bool found_dirty = false;
     int64_t end = offset + bytes;
     AioTaskPool *aio = NULL;
-    BlockCopyCallState call_state = {false, false};
 
     /*
      * block_copy() user is responsible for keeping source and target in same
@@ -564,50 +737,64 @@ static int coroutine_fn block_copy_dirty_clusters(BlockCopyState *s,
     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
     assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
 
-    while (bytes && aio_task_pool_status(aio) == 0) {
+    while (bytes && aio_task_pool_status(aio) == 0 &&
+           !qatomic_read(&call_state->cancelled)) {
         BlockCopyTask *task;
         int64_t status_bytes;
 
-        task = block_copy_task_create(s, &call_state, offset, bytes);
+        task = block_copy_task_create(s, call_state, offset, bytes);
         if (!task) {
             /* No more dirty bits in the bitmap */
             trace_block_copy_skip_range(s, offset, bytes);
             break;
         }
-        if (task->offset > offset) {
-            trace_block_copy_skip_range(s, offset, task->offset - offset);
+        if (task->req.offset > offset) {
+            trace_block_copy_skip_range(s, offset, task->req.offset - offset);
         }
 
         found_dirty = true;
 
-        ret = block_copy_block_status(s, task->offset, task->bytes,
+        ret = block_copy_block_status(s, task->req.offset, task->req.bytes,
                                       &status_bytes);
         assert(ret >= 0); /* never fail */
-        if (status_bytes < task->bytes) {
+        if (status_bytes < task->req.bytes) {
             block_copy_task_shrink(task, status_bytes);
         }
-        if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
+        if (qatomic_read(&s->skip_unallocated) &&
+            !(ret & BDRV_BLOCK_ALLOCATED)) {
             block_copy_task_end(task, 0);
-            progress_set_remaining(s->progress,
-                                   bdrv_get_dirty_count(s->copy_bitmap) +
-                                   s->in_flight_bytes);
-            trace_block_copy_skip_range(s, task->offset, task->bytes);
+            trace_block_copy_skip_range(s, task->req.offset, task->req.bytes);
             offset = task_end(task);
             bytes = end - offset;
             g_free(task);
             continue;
         }
-        task->zeroes = ret & BDRV_BLOCK_ZERO;
+        if (ret & BDRV_BLOCK_ZERO) {
+            task->method = COPY_WRITE_ZEROES;
+        }
+
+        if (!call_state->ignore_ratelimit) {
+            uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
+            if (ns > 0) {
+                block_copy_task_end(task, -EAGAIN);
+                g_free(task);
+                qemu_co_sleep_ns_wakeable(&call_state->sleep,
+                                          QEMU_CLOCK_REALTIME, ns);
+                continue;
+            }
+        }
 
-        trace_block_copy_process(s, task->offset);
+        ratelimit_calculate_delay(&s->rate_limit, task->req.bytes);
 
-        co_get_from_shres(s->mem, task->bytes);
+        trace_block_copy_process(s, task->req.offset);
+
+        co_get_from_shres(s->mem, task->req.bytes);
 
         offset = task_end(task);
         bytes = end - offset;
 
         if (!aio && bytes) {
-            aio = aio_task_pool_new(BLOCK_COPY_MAX_WORKERS);
+            aio = aio_task_pool_new(call_state->max_workers);
         }
 
         ret = block_copy_task_run(aio, task);
@@ -633,15 +820,17 @@ out:
 
         aio_task_pool_free(aio);
     }
-    if (error_is_read && ret < 0) {
-        *error_is_read = call_state.error_is_read;
-    }
 
     return ret < 0 ? ret : found_dirty;
 }
 
+void block_copy_kick(BlockCopyCallState *call_state)
+{
+    qemu_co_sleep_wake(&call_state->sleep);
+}
+
 /*
- * block_copy
+ * block_copy_common
  *
  * Copy requested region, accordingly to dirty bitmap.
  * Collaborate with parallel block_copy requests: if they succeed it will help
@@ -649,16 +838,44 @@ out:
  * it means that some I/O operation failed in context of _this_ block_copy call,
  * not some parallel operation.
  */
-int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
-                            bool *error_is_read)
+static int coroutine_fn GRAPH_RDLOCK
+block_copy_common(BlockCopyCallState *call_state)
 {
     int ret;
+    BlockCopyState *s = call_state->s;
+
+    qemu_co_mutex_lock(&s->lock);
+    QLIST_INSERT_HEAD(&s->calls, call_state, list);
+    qemu_co_mutex_unlock(&s->lock);
 
     do {
-        ret = block_copy_dirty_clusters(s, offset, bytes, error_is_read);
+        ret = block_copy_dirty_clusters(call_state);
 
-        if (ret == 0) {
-            ret = block_copy_wait_one(s, offset, bytes);
+        if (ret == 0 && !qatomic_read(&call_state->cancelled)) {
+            WITH_QEMU_LOCK_GUARD(&s->lock) {
+                /*
+                 * Check that there is no task we still need to
+                 * wait to complete
+                 */
+                ret = reqlist_wait_one(&s->reqs, call_state->offset,
+                                       call_state->bytes, &s->lock);
+                if (ret == 0) {
+                    /*
+                     * No pending tasks, but check again the bitmap in this
+                     * same critical section, since a task might have failed
+                     * between this and the critical section in
+                     * block_copy_dirty_clusters().
+                     *
+                     * reqlist_wait_one return value 0 also means that it
+                     * didn't release the lock. So, we are still in the same
+                     * critical section, not interrupted by any concurrent
+                     * access to state.
+                     */
+                    ret = bdrv_dirty_bitmap_next_dirty(s->copy_bitmap,
+                                                       call_state->offset,
+                                                       call_state->bytes) >= 0;
+                }
+            }
         }
 
         /*
@@ -670,17 +887,161 @@ int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
          * 2. We have waited for some intersecting block-copy request
          *    It may have failed and produced new dirty bits.
          */
-    } while (ret > 0);
+    } while (ret > 0 && !qatomic_read(&call_state->cancelled));
+
+    qatomic_store_release(&call_state->finished, true);
+
+    if (call_state->cb) {
+        call_state->cb(call_state->cb_opaque);
+    }
+
+    qemu_co_mutex_lock(&s->lock);
+    QLIST_REMOVE(call_state, list);
+    qemu_co_mutex_unlock(&s->lock);
 
     return ret;
 }
 
+static void coroutine_fn block_copy_async_co_entry(void *opaque)
+{
+    GRAPH_RDLOCK_GUARD();
+    block_copy_common(opaque);
+}
+
+int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes,
+                            bool ignore_ratelimit, uint64_t timeout_ns,
+                            BlockCopyAsyncCallbackFunc cb,
+                            void *cb_opaque)
+{
+    int ret;
+    BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
+
+    *call_state = (BlockCopyCallState) {
+        .s = s,
+        .offset = start,
+        .bytes = bytes,
+        .ignore_ratelimit = ignore_ratelimit,
+        .max_workers = BLOCK_COPY_MAX_WORKERS,
+        .cb = cb,
+        .cb_opaque = cb_opaque,
+    };
+
+    ret = qemu_co_timeout(block_copy_async_co_entry, call_state, timeout_ns,
+                          g_free);
+    if (ret < 0) {
+        assert(ret == -ETIMEDOUT);
+        block_copy_call_cancel(call_state);
+        /* call_state will be freed by running coroutine. */
+        return ret;
+    }
+
+    ret = call_state->ret;
+    g_free(call_state);
+
+    return ret;
+}
+
+BlockCopyCallState *block_copy_async(BlockCopyState *s,
+                                     int64_t offset, int64_t bytes,
+                                     int max_workers, int64_t max_chunk,
+                                     BlockCopyAsyncCallbackFunc cb,
+                                     void *cb_opaque)
+{
+    BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
+
+    *call_state = (BlockCopyCallState) {
+        .s = s,
+        .offset = offset,
+        .bytes = bytes,
+        .max_workers = max_workers,
+        .max_chunk = max_chunk,
+        .cb = cb,
+        .cb_opaque = cb_opaque,
+
+        .co = qemu_coroutine_create(block_copy_async_co_entry, call_state),
+    };
+
+    qemu_coroutine_enter(call_state->co);
+
+    return call_state;
+}
+
+void block_copy_call_free(BlockCopyCallState *call_state)
+{
+    if (!call_state) {
+        return;
+    }
+
+    assert(qatomic_read(&call_state->finished));
+    g_free(call_state);
+}
+
+bool block_copy_call_finished(BlockCopyCallState *call_state)
+{
+    return qatomic_read(&call_state->finished);
+}
+
+bool block_copy_call_succeeded(BlockCopyCallState *call_state)
+{
+    return qatomic_load_acquire(&call_state->finished) &&
+           !qatomic_read(&call_state->cancelled) &&
+           call_state->ret == 0;
+}
+
+bool block_copy_call_failed(BlockCopyCallState *call_state)
+{
+    return qatomic_load_acquire(&call_state->finished) &&
+           !qatomic_read(&call_state->cancelled) &&
+           call_state->ret < 0;
+}
+
+bool block_copy_call_cancelled(BlockCopyCallState *call_state)
+{
+    return qatomic_read(&call_state->cancelled);
+}
+
+int block_copy_call_status(BlockCopyCallState *call_state, bool *error_is_read)
+{
+    assert(qatomic_load_acquire(&call_state->finished));
+    if (error_is_read) {
+        *error_is_read = call_state->error_is_read;
+    }
+    return call_state->ret;
+}
+
+/*
+ * Note that cancelling and finishing are racy.
+ * User can cancel a block-copy that is already finished.
+ */
+void block_copy_call_cancel(BlockCopyCallState *call_state)
+{
+    qatomic_set(&call_state->cancelled, true);
+    block_copy_kick(call_state);
+}
+
 BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
 {
     return s->copy_bitmap;
 }
 
+int64_t block_copy_cluster_size(BlockCopyState *s)
+{
+    return s->cluster_size;
+}
+
 void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
 {
-    s->skip_unallocated = skip;
+    qatomic_set(&s->skip_unallocated, skip);
+}
+
+void block_copy_set_speed(BlockCopyState *s, uint64_t speed)
+{
+    ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
+
+    /*
+     * Note: it's good to kick all call states from here, but it should be done
+     * only from a coroutine, to not crash if s->calls list changed while
+     * entering one call. So for now, the only user of this function kicks its
+     * only one call_state by hand.
+     */
 }
index f80cf4897d115fdbbef9b5128b807a42d3700cec..89b7daaa1f39b24b734d4c40cfc1121f7be15800 100644 (file)
 
 /* Base structure for argument packing structures */
 typedef struct BdrvPollCo {
-    BlockDriverState *bs;
+    AioContext *ctx;
     bool in_progress;
-    int ret;
     Coroutine *co; /* Keep pointer here for debugging */
 } BdrvPollCo;
 
-static inline int bdrv_poll_co(BdrvPollCo *s)
+static inline void bdrv_poll_co(BdrvPollCo *s)
 {
     assert(!qemu_in_coroutine());
 
-    bdrv_coroutine_enter(s->bs, s->co);
-    BDRV_POLL_WHILE(s->bs, s->in_progress);
-
-    return s->ret;
+    aio_co_enter(s->ctx, s->co);
+    AIO_WAIT_WHILE(s->ctx, s->in_progress);
 }
 
 #endif /* BLOCK_BLOCK_GEN_H */
diff --git a/block/block-ram-registrar.c b/block/block-ram-registrar.c
new file mode 100644 (file)
index 0000000..25dbafa
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * BlockBackend RAM Registrar
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/block-ram-registrar.h"
+#include "qapi/error.h"
+
+static void ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
+                            size_t max_size)
+{
+    BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier);
+    Error *err = NULL;
+
+    if (!r->ok) {
+        return; /* don't try again if we've already failed */
+    }
+
+    if (!blk_register_buf(r->blk, host, max_size, &err)) {
+        error_report_err(err);
+        ram_block_notifier_remove(&r->notifier);
+        r->ok = false;
+    }
+}
+
+static void ram_block_removed(RAMBlockNotifier *n, void *host, size_t size,
+                              size_t max_size)
+{
+    BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier);
+    blk_unregister_buf(r->blk, host, max_size);
+}
+
+void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk)
+{
+    r->blk = blk;
+    r->notifier = (RAMBlockNotifier){
+        .ram_block_added = ram_block_added,
+        .ram_block_removed = ram_block_removed,
+
+        /*
+         * .ram_block_resized() is not necessary because we use the max_size
+         * value that does not change across resize.
+         */
+    };
+    r->ok = true;
+
+    ram_block_notifier_add(&r->notifier);
+}
+
+void blk_ram_registrar_destroy(BlockRAMRegistrar *r)
+{
+    if (r->ok) {
+        ram_block_notifier_remove(&r->notifier);
+    }
+}
index 2f010ab40a1ccd3669b64aed3c10aff17435335c..b099fb52fe820de372568407dde071a78284c9e9 100644 (file)
@@ -24,6 +24,7 @@
  */
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
@@ -104,19 +105,24 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
     struct bochs_header bochs;
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     /* No write support yet */
+    bdrv_graph_rdlock_main_loop();
     ret = bdrv_apply_auto_read_only(bs, NULL, errp);
+    bdrv_graph_rdunlock_main_loop();
     if (ret < 0) {
         return ret;
     }
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
-    ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    ret = bdrv_pread(bs->file, 0, sizeof(bochs), &bochs, 0);
     if (ret < 0) {
         return ret;
     }
@@ -150,8 +156,8 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
         return -ENOMEM;
     }
 
-    ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
-                     s->catalog_size * 4);
+    ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_size * 4,
+                     s->catalog_bitmap, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -203,7 +209,8 @@ static void bochs_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
 }
 
-static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
+static int64_t coroutine_fn GRAPH_RDLOCK
+seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 {
     BDRVBochsState *s = bs->opaque;
     uint64_t offset = sector_num * 512;
@@ -224,8 +231,8 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
         (s->extent_blocks + s->bitmap_blocks));
 
     /* read in bitmap for current extent */
-    ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
-                     &bitmap_entry, 1);
+    ret = bdrv_co_pread(bs->file, bitmap_offset + (extent_offset / 8), 1,
+                        &bitmap_entry, 0);
     if (ret < 0) {
         return ret;
     }
@@ -237,9 +244,9 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
     return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
 }
 
-static int coroutine_fn
-bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bochs_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVBochsState *s = bs->opaque;
     uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
index c99192a57f451e6ae6d404184159299b91b980d2..443af1444e89faca638a37b1a959f76f5bdc8589 100644 (file)
@@ -24,6 +24,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
@@ -66,19 +67,24 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
     uint32_t offsets_size, max_compressed_block_size = 1, i;
     int ret;
 
+    GLOBAL_STATE_CODE();
+
+    bdrv_graph_rdlock_main_loop();
     ret = bdrv_apply_auto_read_only(bs, NULL, errp);
+    bdrv_graph_rdunlock_main_loop();
     if (ret < 0) {
         return ret;
     }
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     /* read header */
-    ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
+    ret = bdrv_pread(bs->file, 128, 4, &s->block_size, 0);
     if (ret < 0) {
         return ret;
     }
@@ -104,7 +110,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
-    ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
+    ret = bdrv_pread(bs->file, 128 + 4, 4, &s->n_blocks, 0);
     if (ret < 0) {
         return ret;
     }
@@ -135,7 +141,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
         return -ENOMEM;
     }
 
-    ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
+    ret = bdrv_pread(bs->file, 128 + 4 + 4, offsets_size, s->offsets, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -212,7 +218,8 @@ static void cloop_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
 }
 
-static inline int cloop_read_block(BlockDriverState *bs, int block_num)
+static int coroutine_fn GRAPH_RDLOCK
+cloop_read_block(BlockDriverState *bs, int block_num)
 {
     BDRVCloopState *s = bs->opaque;
 
@@ -220,9 +227,9 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
         int ret;
         uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];
 
-        ret = bdrv_pread(bs->file, s->offsets[block_num],
-                         s->compressed_block, bytes);
-        if (ret != bytes) {
+        ret = bdrv_co_pread(bs->file, s->offsets[block_num], bytes,
+                            s->compressed_block, 0);
+        if (ret < 0) {
             return -1;
         }
 
@@ -244,9 +251,9 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
     return 0;
 }
 
-static int coroutine_fn
-cloop_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+cloop_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVCloopState *s = bs->opaque;
     uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
index 71db7ba7472e7c64b6b306823644a37759d33e0b..69cc75be0c329a140c73896e820e5cbe2a8c0dc3 100644 (file)
@@ -18,8 +18,8 @@
 #include "block/block_int.h"
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
+#include "qemu/memalign.h"
 #include "sysemu/block-backend.h"
 
 enum {
@@ -48,8 +48,10 @@ static int commit_prepare(Job *job)
 {
     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
 
+    bdrv_graph_rdlock_main_loop();
     bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
     s->chain_frozen = false;
+    bdrv_graph_rdunlock_main_loop();
 
     /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
      * the normal backing chain can be restored. */
@@ -66,9 +68,12 @@ static void commit_abort(Job *job)
 {
     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
     BlockDriverState *top_bs = blk_bs(s->top);
+    BlockDriverState *commit_top_backing_bs;
 
     if (s->chain_frozen) {
+        bdrv_graph_rdlock_main_loop();
         bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
+        bdrv_graph_rdunlock_main_loop();
     }
 
     /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
@@ -90,8 +95,15 @@ static void commit_abort(Job *job)
      * XXX Can (or should) we somehow keep 'consistent read' blocked even
      * after the failed/cancelled commit job is gone? If we already wrote
      * something to base, the intermediate images aren't valid any more. */
-    bdrv_replace_node(s->commit_top_bs, s->commit_top_bs->backing->bs,
-                      &error_abort);
+    bdrv_graph_rdlock_main_loop();
+    commit_top_backing_bs = s->commit_top_bs->backing->bs;
+    bdrv_graph_rdunlock_main_loop();
+
+    bdrv_drained_begin(commit_top_backing_bs);
+    bdrv_graph_wrlock(commit_top_backing_bs);
+    bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort);
+    bdrv_graph_wrunlock(commit_top_backing_bs);
+    bdrv_drained_end(commit_top_backing_bs);
 
     bdrv_unref(s->commit_top_bs);
     bdrv_unref(top_bs);
@@ -116,27 +128,26 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 {
     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
     int64_t offset;
-    uint64_t delay_ns = 0;
     int ret = 0;
     int64_t n = 0; /* bytes */
-    void *buf = NULL;
+    QEMU_AUTO_VFREE void *buf = NULL;
     int64_t len, base_len;
 
-    ret = len = blk_getlength(s->top);
+    len = blk_co_getlength(s->top);
     if (len < 0) {
-        goto out;
+        return len;
     }
     job_progress_set_remaining(&s->common.job, len);
 
-    ret = base_len = blk_getlength(s->base);
+    base_len = blk_co_getlength(s->base);
     if (base_len < 0) {
-        goto out;
+        return base_len;
     }
 
     if (base_len < len) {
-        ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
+        ret = blk_co_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
         if (ret) {
-            goto out;
+            return ret;
         }
     }
 
@@ -149,13 +160,13 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
         /* Note that even when no rate limit is applied we need to yield
          * with no pending I/O here so that bdrv_drain_all() returns.
          */
-        job_sleep_ns(&s->common.job, delay_ns);
+        block_job_ratelimit_sleep(&s->common);
         if (job_is_cancelled(&s->common.job)) {
             break;
         }
         /* Copy if allocated above the base */
-        ret = bdrv_is_allocated_above(blk_bs(s->top), s->base_overlay, true,
-                                      offset, COMMIT_BUFFER_SIZE, &n);
+        ret = blk_co_is_allocated_above(s->top, s->base_overlay, true,
+                                        offset, COMMIT_BUFFER_SIZE, &n);
         copy = (ret > 0);
         trace_commit_one_iteration(s, offset, n, ret);
         if (copy) {
@@ -174,7 +185,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
                 block_job_error_action(&s->common, s->on_error,
                                        error_in_source, -ret);
             if (action == BLOCK_ERROR_ACTION_REPORT) {
-                goto out;
+                return ret;
             } else {
                 n = 0;
                 continue;
@@ -184,18 +195,11 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
         job_progress_update(&s->common.job, n);
 
         if (copy) {
-            delay_ns = block_job_ratelimit_get_delay(&s->common, n);
-        } else {
-            delay_ns = 0;
+            block_job_ratelimit_processed_bytes(&s->common, n);
         }
     }
 
-    ret = 0;
-
-out:
-    qemu_vfree(buf);
-
-    return ret;
+    return 0;
 }
 
 static const BlockJobDriver commit_job_driver = {
@@ -211,13 +215,14 @@ static const BlockJobDriver commit_job_driver = {
     },
 };
 
-static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_commit_top_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }
 
-static void bdrv_commit_top_refresh_filename(BlockDriverState *bs)
+static GRAPH_RDLOCK void bdrv_commit_top_refresh_filename(BlockDriverState *bs)
 {
     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
             bs->backing->bs->filename);
@@ -242,6 +247,7 @@ static BlockDriver bdrv_commit_top = {
     .bdrv_child_perm            = bdrv_commit_top_child_perm,
 
     .is_filter                  = true,
+    .filtered_child_is_backing  = true,
 };
 
 void commit_start(const char *job_id, BlockDriverState *bs,
@@ -254,16 +260,20 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     BlockDriverState *iter;
     BlockDriverState *commit_top_bs = NULL;
     BlockDriverState *filtered_base;
-    Error *local_err = NULL;
     int64_t base_size, top_size;
     uint64_t base_perms, iter_shared_perms;
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     assert(top != bs);
+    bdrv_graph_rdlock_main_loop();
     if (bdrv_skip_filters(top) == bdrv_skip_filters(base)) {
         error_setg(errp, "Invalid files for merge: top and base are the same");
+        bdrv_graph_rdunlock_main_loop();
         return;
     }
+    bdrv_graph_rdunlock_main_loop();
 
     base_size = bdrv_getlength(base);
     if (base_size < 0) {
@@ -312,10 +322,10 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 
     commit_top_bs->total_sectors = top->total_sectors;
 
-    bdrv_append(commit_top_bs, top, &local_err);
-    if (local_err) {
+    ret = bdrv_append(commit_top_bs, top, errp);
+    bdrv_unref(commit_top_bs); /* referenced by new parents or failed */
+    if (ret < 0) {
         commit_top_bs = NULL;
-        error_propagate(errp, local_err);
         goto fail;
     }
 
@@ -329,6 +339,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
      * this is the responsibility of the interface (i.e. whoever calls
      * commit_start()).
      */
+    bdrv_graph_wrlock(top);
     s->base_overlay = bdrv_find_overlay(top, base);
     assert(s->base_overlay);
 
@@ -359,16 +370,20 @@ void commit_start(const char *job_id, BlockDriverState *bs,
         ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
                                  iter_shared_perms, errp);
         if (ret < 0) {
+            bdrv_graph_wrunlock(top);
             goto fail;
         }
     }
 
     if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) {
+        bdrv_graph_wrunlock(top);
         goto fail;
     }
     s->chain_frozen = true;
 
     ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
+    bdrv_graph_wrunlock(top);
+
     if (ret < 0) {
         goto fail;
     }
@@ -376,7 +391,6 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     s->base = blk_new(s->common.job.aio_context,
                       base_perms,
                       BLK_PERM_CONSISTENT_READ
-                      | BLK_PERM_GRAPH_MOD
                       | BLK_PERM_WRITE_UNCHANGED);
     ret = blk_insert_bs(s->base, base, errp);
     if (ret < 0) {
@@ -402,7 +416,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 
 fail:
     if (s->chain_frozen) {
+        bdrv_graph_rdlock_main_loop();
         bdrv_unfreeze_backing_chain(commit_top_bs, base);
+        bdrv_graph_rdunlock_main_loop();
     }
     if (s->base) {
         blk_unref(s->base);
@@ -417,7 +433,11 @@ fail:
     /* commit_top_bs has to be replaced after deleting the block job,
      * otherwise this would fail because of lack of permissions. */
     if (commit_top_bs) {
+        bdrv_drained_begin(top);
+        bdrv_graph_wrlock(top);
         bdrv_replace_node(commit_top_bs, top, &error_abort);
+        bdrv_graph_wrunlock(top);
+        bdrv_drained_end(top);
     }
 }
 
@@ -436,9 +456,12 @@ int bdrv_commit(BlockDriverState *bs)
     int ro;
     int64_t n;
     int ret = 0;
-    uint8_t *buf = NULL;
+    QEMU_AUTO_VFREE uint8_t *buf = NULL;
     Error *local_err = NULL;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!drv)
         return -ENOMEDIUM;
 
@@ -454,7 +477,7 @@ int bdrv_commit(BlockDriverState *bs)
         return -EBUSY;
     }
 
-    ro = backing_file_bs->read_only;
+    ro = bdrv_is_read_only(backing_file_bs);
 
     if (ro) {
         if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) {
@@ -529,12 +552,12 @@ int bdrv_commit(BlockDriverState *bs)
             goto ro_cleanup;
         }
         if (ret) {
-            ret = blk_pread(src, offset, buf, n);
+            ret = blk_pread(src, offset, n, buf, 0);
             if (ret < 0) {
                 goto ro_cleanup;
             }
 
-            ret = blk_pwrite(backing, offset, buf, n, 0);
+            ret = blk_pwrite(backing, offset, n, buf, 0);
             if (ret < 0) {
                 goto ro_cleanup;
             }
@@ -557,8 +580,6 @@ int bdrv_commit(BlockDriverState *bs)
 
     ret = 0;
 ro_cleanup:
-    qemu_vfree(buf);
-
     blk_unref(backing);
     if (bdrv_cow_bs(bs) != backing_file_bs) {
         bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
new file mode 100644 (file)
index 0000000..1397287
--- /dev/null
@@ -0,0 +1,576 @@
+/*
+ * copy-before-write filter driver
+ *
+ * The driver performs Copy-Before-Write (CBW) operation: it is injected above
+ * some node, and before each write it copies _old_ data to the target node.
+ *
+ * Copyright (c) 2018-2021 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qmp/qjson.h"
+
+#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/qdict.h"
+#include "block/block-copy.h"
+#include "block/dirty-bitmap.h"
+
+#include "block/copy-before-write.h"
+#include "block/reqlist.h"
+
+#include "qapi/qapi-visit-block-core.h"
+
+typedef struct BDRVCopyBeforeWriteState {
+    BlockCopyState *bcs;
+    BdrvChild *target;
+    OnCbwError on_cbw_error;
+    uint32_t cbw_timeout_ns;
+
+    /*
+     * @lock: protects access to @access_bitmap, @done_bitmap and
+     * @frozen_read_reqs
+     */
+    CoMutex lock;
+
+    /*
+     * @access_bitmap: represents areas allowed for reading by fleecing user.
+     * Reading from non-dirty areas leads to -EACCES.
+     */
+    BdrvDirtyBitmap *access_bitmap;
+
+    /*
+     * @done_bitmap: represents areas that was successfully copied to @target by
+     * copy-before-write operations.
+     */
+    BdrvDirtyBitmap *done_bitmap;
+
+    /*
+     * @frozen_read_reqs: current read requests for fleecing user in bs->file
+     * node. These areas must not be rewritten by guest.
+     */
+    BlockReqList frozen_read_reqs;
+
+    /*
+     * @snapshot_error is normally zero. But on first copy-before-write failure
+     * when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
+     * value of this error (<0). After that all in-flight and further
+     * snapshot-API requests will fail with that error.
+     */
+    int snapshot_error;
+} BDRVCopyBeforeWriteState;
+
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+}
+
+static void block_copy_cb(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+
+    bdrv_dec_in_flight(bs);
+}
+
+/*
+ * Do copy-before-write operation.
+ *
+ * On failure guest request must be failed too.
+ *
+ * On success, we also wait for all in-flight fleecing read requests in source
+ * node, and it's guaranteed that after cbw_do_copy_before_write() successful
+ * return there are no such requests and they will never appear.
+ */
+static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
+        uint64_t offset, uint64_t bytes, BdrvRequestFlags flags)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+    int ret;
+    uint64_t off, end;
+    int64_t cluster_size = block_copy_cluster_size(s->bcs);
+
+    if (flags & BDRV_REQ_WRITE_UNCHANGED) {
+        return 0;
+    }
+
+    if (s->snapshot_error) {
+        return 0;
+    }
+
+    off = QEMU_ALIGN_DOWN(offset, cluster_size);
+    end = QEMU_ALIGN_UP(offset + bytes, cluster_size);
+
+    /*
+     * Increase in_flight, so that in case of timed-out block-copy, the
+     * remaining background block_copy() request (which can't be immediately
+     * cancelled by timeout) is presented in bs->in_flight. This way we are
+     * sure that on bs close() we'll previously wait for all timed-out but yet
+     * running block_copy calls.
+     */
+    bdrv_inc_in_flight(bs);
+    ret = block_copy(s->bcs, off, end - off, true, s->cbw_timeout_ns,
+                     block_copy_cb, bs);
+    if (ret < 0 && s->on_cbw_error == ON_CBW_ERROR_BREAK_GUEST_WRITE) {
+        return ret;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&s->lock) {
+        if (ret < 0) {
+            assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
+            if (!s->snapshot_error) {
+                s->snapshot_error = ret;
+            }
+        } else {
+            bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
+        }
+        reqlist_wait_all(&s->frozen_read_reqs, off, end - off, &s->lock);
+    }
+
+    return 0;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    int ret = cbw_do_copy_before_write(bs, offset, bytes, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pdiscard(bs->file, offset, bytes);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     BdrvRequestFlags flags)
+{
+    int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+}
+
+static coroutine_fn GRAPH_RDLOCK
+int cbw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+    int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK cbw_co_flush(BlockDriverState *bs)
+{
+    if (!bs->file) {
+        return 0;
+    }
+
+    return bdrv_co_flush(bs->file->bs);
+}
+
+/*
+ * If @offset not accessible - return NULL.
+ *
+ * Otherwise, set @pnum to some bytes that accessible from @file (@file is set
+ * to bs->file or to s->target). Return newly allocated BlockReq object that
+ * should be than passed to cbw_snapshot_read_unlock().
+ *
+ * It's guaranteed that guest writes will not interact in the region until
+ * cbw_snapshot_read_unlock() called.
+ */
+static BlockReq * coroutine_fn GRAPH_RDLOCK
+cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       int64_t *pnum, BdrvChild **file)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+    BlockReq *req = g_new(BlockReq, 1);
+    bool done;
+
+    QEMU_LOCK_GUARD(&s->lock);
+
+    if (s->snapshot_error) {
+        g_free(req);
+        return NULL;
+    }
+
+    if (bdrv_dirty_bitmap_next_zero(s->access_bitmap, offset, bytes) != -1) {
+        g_free(req);
+        return NULL;
+    }
+
+    done = bdrv_dirty_bitmap_status(s->done_bitmap, offset, bytes, pnum);
+    if (done) {
+        /*
+         * Special invalid BlockReq, that is handled in
+         * cbw_snapshot_read_unlock(). We don't need to lock something to read
+         * from s->target.
+         */
+        *req = (BlockReq) {.offset = -1, .bytes = -1};
+        *file = s->target;
+    } else {
+        reqlist_init_req(&s->frozen_read_reqs, req, offset, bytes);
+        *file = bs->file;
+    }
+
+    return req;
+}
+
+static coroutine_fn void
+cbw_snapshot_read_unlock(BlockDriverState *bs, BlockReq *req)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+
+    if (req->offset == -1 && req->bytes == -1) {
+        g_free(req);
+        return;
+    }
+
+    QEMU_LOCK_GUARD(&s->lock);
+
+    reqlist_remove_req(req);
+    g_free(req);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, size_t qiov_offset)
+{
+    BlockReq *req;
+    BdrvChild *file;
+    int ret;
+
+    /* TODO: upgrade to async loop using AioTask */
+    while (bytes) {
+        int64_t cur_bytes;
+
+        req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &file);
+        if (!req) {
+            return -EACCES;
+        }
+
+        ret = bdrv_co_preadv_part(file, offset, cur_bytes,
+                                  qiov, qiov_offset, 0);
+        cbw_snapshot_read_unlock(bs, req);
+        if (ret < 0) {
+            return ret;
+        }
+
+        bytes -= cur_bytes;
+        offset += cur_bytes;
+        qiov_offset += cur_bytes;
+    }
+
+    return 0;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_snapshot_block_status(BlockDriverState *bs,
+                             bool want_zero, int64_t offset, int64_t bytes,
+                             int64_t *pnum, int64_t *map,
+                             BlockDriverState **file)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+    BlockReq *req;
+    int ret;
+    int64_t cur_bytes;
+    BdrvChild *child;
+
+    req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &child);
+    if (!req) {
+        return -EACCES;
+    }
+
+    ret = bdrv_co_block_status(child->bs, offset, cur_bytes, pnum, map, file);
+    if (child == s->target) {
+        /*
+         * We refer to s->target only for areas that we've written to it.
+         * And we can not report unallocated blocks in s->target: this will
+         * break generic block-status-above logic, that will go to
+         * copy-before-write filtered child in this case.
+         */
+        assert(ret & BDRV_BLOCK_ALLOCATED);
+    }
+
+    cbw_snapshot_read_unlock(bs, req);
+
+    return ret;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+
+    WITH_QEMU_LOCK_GUARD(&s->lock) {
+        bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
+    }
+
+    block_copy_reset(s->bcs, offset, bytes);
+
+    return bdrv_co_pdiscard(s->target, offset, bytes);
+}
+
+static void GRAPH_RDLOCK cbw_refresh_filename(BlockDriverState *bs)
+{
+    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
+            bs->file->bs->filename);
+}
+
+static void GRAPH_RDLOCK
+cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+               BlockReopenQueue *reopen_queue,
+               uint64_t perm, uint64_t shared,
+               uint64_t *nperm, uint64_t *nshared)
+{
+    if (!(role & BDRV_CHILD_FILTERED)) {
+        /*
+         * Target child
+         *
+         * Share write to target (child_file), to not interfere
+         * with guest writes to its disk which may be in target backing chain.
+         * Can't resize during a backup block job because we check the size
+         * only upfront.
+         */
+        *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
+        *nperm = BLK_PERM_WRITE;
+    } else {
+        /* Source child */
+        bdrv_default_perms(bs, c, role, reopen_queue,
+                           perm, shared, nperm, nshared);
+
+        if (!QLIST_EMPTY(&bs->parents)) {
+            if (perm & BLK_PERM_WRITE) {
+                *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+            }
+            *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+        }
+    }
+}
+
+static BlockdevOptions *cbw_parse_options(QDict *options, Error **errp)
+{
+    BlockdevOptions *opts = NULL;
+    Visitor *v = NULL;
+
+    qdict_put_str(options, "driver", "copy-before-write");
+
+    v = qobject_input_visitor_new_flat_confused(options, errp);
+    if (!v) {
+        goto out;
+    }
+
+    visit_type_BlockdevOptions(v, NULL, &opts, errp);
+    if (!opts) {
+        goto out;
+    }
+
+    /*
+     * Delete options which we are going to parse through BlockdevOptions
+     * object for original options.
+     */
+    qdict_extract_subqdict(options, NULL, "bitmap");
+    qdict_del(options, "on-cbw-error");
+    qdict_del(options, "cbw-timeout");
+
+out:
+    visit_free(v);
+    qdict_del(options, "driver");
+
+    return opts;
+}
+
+static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+                    Error **errp)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+    BdrvDirtyBitmap *bitmap = NULL;
+    int64_t cluster_size;
+    g_autoptr(BlockdevOptions) full_opts = NULL;
+    BlockdevOptionsCbw *opts;
+    AioContext *ctx;
+    int ret;
+
+    full_opts = cbw_parse_options(options, errp);
+    if (!full_opts) {
+        return -EINVAL;
+    }
+    assert(full_opts->driver == BLOCKDEV_DRIVER_COPY_BEFORE_WRITE);
+    opts = &full_opts->u.copy_before_write;
+
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds,
+                                BDRV_CHILD_DATA, false, errp);
+    if (!s->target) {
+        return -EINVAL;
+    }
+
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    ctx = bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
+
+    if (opts->bitmap) {
+        bitmap = block_dirty_bitmap_lookup(opts->bitmap->node,
+                                           opts->bitmap->name, NULL, errp);
+        if (!bitmap) {
+            ret = -EINVAL;
+            goto out;
+        }
+    }
+    s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error :
+            ON_CBW_ERROR_BREAK_GUEST_WRITE;
+    s->cbw_timeout_ns = opts->has_cbw_timeout ?
+        opts->cbw_timeout * NANOSECONDS_PER_SECOND : 0;
+
+    bs->total_sectors = bs->file->bs->total_sectors;
+    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+            (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
+    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+            ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+             bs->file->bs->supported_zero_flags);
+
+    s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
+    if (!s->bcs) {
+        error_prepend(errp, "Cannot create block-copy-state: ");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    cluster_size = block_copy_cluster_size(s->bcs);
+
+    s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
+    if (!s->done_bitmap) {
+        ret = -EINVAL;
+        goto out;
+    }
+    bdrv_disable_dirty_bitmap(s->done_bitmap);
+
+    /* s->access_bitmap starts equal to bcs bitmap */
+    s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
+    if (!s->access_bitmap) {
+        ret = -EINVAL;
+        goto out;
+    }
+    bdrv_disable_dirty_bitmap(s->access_bitmap);
+    bdrv_dirty_bitmap_merge_internal(s->access_bitmap,
+                                     block_copy_dirty_bitmap(s->bcs), NULL,
+                                     true);
+
+    qemu_co_mutex_init(&s->lock);
+    QLIST_INIT(&s->frozen_read_reqs);
+
+    ret = 0;
+out:
+    aio_context_release(ctx);
+    return ret;
+}
+
+static void cbw_close(BlockDriverState *bs)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+
+    bdrv_release_dirty_bitmap(s->access_bitmap);
+    bdrv_release_dirty_bitmap(s->done_bitmap);
+
+    block_copy_state_free(s->bcs);
+    s->bcs = NULL;
+}
+
+static BlockDriver bdrv_cbw_filter = {
+    .format_name = "copy-before-write",
+    .instance_size = sizeof(BDRVCopyBeforeWriteState),
+
+    .bdrv_open                  = cbw_open,
+    .bdrv_close                 = cbw_close,
+
+    .bdrv_co_preadv             = cbw_co_preadv,
+    .bdrv_co_pwritev            = cbw_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = cbw_co_pwrite_zeroes,
+    .bdrv_co_pdiscard           = cbw_co_pdiscard,
+    .bdrv_co_flush              = cbw_co_flush,
+
+    .bdrv_co_preadv_snapshot       = cbw_co_preadv_snapshot,
+    .bdrv_co_pdiscard_snapshot     = cbw_co_pdiscard_snapshot,
+    .bdrv_co_snapshot_block_status = cbw_co_snapshot_block_status,
+
+    .bdrv_refresh_filename      = cbw_refresh_filename,
+
+    .bdrv_child_perm            = cbw_child_perm,
+
+    .is_filter = true,
+};
+
+BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                  BlockDriverState *target,
+                                  const char *filter_node_name,
+                                  BlockCopyState **bcs,
+                                  Error **errp)
+{
+    BDRVCopyBeforeWriteState *state;
+    BlockDriverState *top;
+    QDict *opts;
+
+    assert(source->total_sectors == target->total_sectors);
+    GLOBAL_STATE_CODE();
+
+    opts = qdict_new();
+    qdict_put_str(opts, "driver", "copy-before-write");
+    if (filter_node_name) {
+        qdict_put_str(opts, "node-name", filter_node_name);
+    }
+    qdict_put_str(opts, "file", bdrv_get_node_name(source));
+    qdict_put_str(opts, "target", bdrv_get_node_name(target));
+
+    top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
+    if (!top) {
+        return NULL;
+    }
+
+    state = top->opaque;
+    *bcs = state->bcs;
+
+    return top;
+}
+
+void bdrv_cbw_drop(BlockDriverState *bs)
+{
+    GLOBAL_STATE_CODE();
+    bdrv_drop_filter(bs, &error_abort);
+    bdrv_unref(bs);
+}
+
+static void cbw_init(void)
+{
+    bdrv_register(&bdrv_cbw_filter);
+}
+
+block_init(cbw_init);
diff --git a/block/copy-before-write.h b/block/copy-before-write.h
new file mode 100644 (file)
index 0000000..6e72bb2
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * copy-before-write filter driver
+ *
+ * The driver performs Copy-Before-Write (CBW) operation: it is injected above
+ * some node, and before each write it copies _old_ data to the target node.
+ *
+ * Copyright (c) 2018-2021 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef COPY_BEFORE_WRITE_H
+#define COPY_BEFORE_WRITE_H
+
+#include "block/block_int.h"
+#include "block/block-copy.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                  BlockDriverState *target,
+                                  const char *filter_node_name,
+                                  BlockCopyState **bcs,
+                                  Error **errp);
+void bdrv_cbw_drop(BlockDriverState *bs);
+
+#endif /* COPY_BEFORE_WRITE_H */
index 2816e61afeb03cbfec2f59363c84132ccfe67672..c36f253d169a954496931f1c43e9a0218137c88b 100644 (file)
  */
 
 #include "qemu/osdep.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
 
 
-static int cor_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
+typedef struct BDRVStateCOR {
+    BlockDriverState *bottom_bs;
+    bool chain_frozen;
+} BDRVStateCOR;
+
+
+static int GRAPH_UNLOCKED
+cor_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
 {
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    BlockDriverState *bottom_bs = NULL;
+    BDRVStateCOR *state = bs->opaque;
+    /* Find a bottom node name, if any */
+    const char *bottom_node = qdict_get_try_str(options, "bottom");
+    int ret;
+
+    GLOBAL_STATE_CODE();
+
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    bs->supported_read_flags = BDRV_REQ_PREFETCH;
+
     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
 
@@ -42,6 +62,43 @@ static int cor_open(BlockDriverState *bs, QDict *options, int flags,
         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
             bs->file->bs->supported_zero_flags);
 
+    if (bottom_node) {
+        bottom_bs = bdrv_find_node(bottom_node);
+        if (!bottom_bs) {
+            error_setg(errp, "Bottom node '%s' not found", bottom_node);
+            qdict_del(options, "bottom");
+            return -EINVAL;
+        }
+        qdict_del(options, "bottom");
+
+        if (!bottom_bs->drv) {
+            error_setg(errp, "Bottom node '%s' not opened", bottom_node);
+            return -EINVAL;
+        }
+
+        if (bottom_bs->drv->is_filter) {
+            error_setg(errp, "Bottom node '%s' is a filter", bottom_node);
+            return -EINVAL;
+        }
+
+        if (bdrv_freeze_backing_chain(bs, bottom_bs, errp) < 0) {
+            return -EINVAL;
+        }
+        state->chain_frozen = true;
+
+        /*
+         * We do freeze the chain, so it shouldn't be removed. Still, storing a
+         * pointer worth bdrv_ref().
+         */
+        bdrv_ref(bottom_bs);
+    }
+    state->bottom_bs = bottom_bs;
+
+    /*
+     * We don't need to call bdrv_child_refresh_perms() now as the permissions
+     * will be updated later when the filter node gets its parent.
+     */
+
     return 0;
 }
 
@@ -68,88 +125,169 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
 }
 
 
-static int64_t cor_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK cor_co_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
+    return bdrv_co_getlength(bs->file->bs);
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-                                      uint64_t offset, uint64_t bytes,
-                                      QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, size_t qiov_offset,
+                   BdrvRequestFlags flags)
 {
-    return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-                          flags | BDRV_REQ_COPY_ON_READ);
+    int64_t n;
+    int local_flags;
+    int ret;
+    BDRVStateCOR *state = bs->opaque;
+
+    if (!state->bottom_bs) {
+        return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+                                   flags | BDRV_REQ_COPY_ON_READ);
+    }
+
+    while (bytes) {
+        local_flags = flags;
+
+        /* In case of failure, try to copy-on-read anyway */
+        ret = bdrv_co_is_allocated(bs->file->bs, offset, bytes, &n);
+        if (ret <= 0) {
+            ret = bdrv_co_is_allocated_above(bdrv_backing_chain_next(bs->file->bs),
+                                             state->bottom_bs, true, offset,
+                                             n, &n);
+            if (ret > 0 || ret < 0) {
+                local_flags |= BDRV_REQ_COPY_ON_READ;
+            }
+            /* Finish earlier if the end of a backing file has been reached */
+            if (n == 0) {
+                break;
+            }
+        }
+
+        /* Skip if neither read nor write are needed */
+        if ((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) !=
+            BDRV_REQ_PREFETCH) {
+            ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+                                      local_flags);
+            if (ret < 0) {
+                return ret;
+            }
+        }
+
+        offset += n;
+        qiov_offset += n;
+        bytes -= n;
+    }
+
+    return 0;
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-                                       uint64_t offset, uint64_t bytes,
-                                       QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, size_t qiov_offset,
+                    BdrvRequestFlags flags)
 {
-
-    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+    return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+                                flags);
 }
 
 
-static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
-                                             int64_t offset, int bytes,
-                                             BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     BdrvRequestFlags flags)
 {
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
 
-static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int bytes)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
 
-static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,
-                                                  uint64_t offset,
-                                                  uint64_t bytes,
-                                                  QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+cor_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          QEMUIOVector *qiov)
 {
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
                            BDRV_REQ_WRITE_COMPRESSED);
 }
 
 
-static void cor_eject(BlockDriverState *bs, bool eject_flag)
+static void coroutine_fn GRAPH_RDLOCK
+cor_co_eject(BlockDriverState *bs, bool eject_flag)
 {
-    bdrv_eject(bs->file->bs, eject_flag);
+    bdrv_co_eject(bs->file->bs, eject_flag);
 }
 
 
-static void cor_lock_medium(BlockDriverState *bs, bool locked)
+static void coroutine_fn GRAPH_RDLOCK
+cor_co_lock_medium(BlockDriverState *bs, bool locked)
 {
-    bdrv_lock_medium(bs->file->bs, locked);
+    bdrv_co_lock_medium(bs->file->bs, locked);
+}
+
+
+static void GRAPH_UNLOCKED cor_close(BlockDriverState *bs)
+{
+    BDRVStateCOR *s = bs->opaque;
+
+    GLOBAL_STATE_CODE();
+
+    if (s->chain_frozen) {
+        bdrv_graph_rdlock_main_loop();
+        s->chain_frozen = false;
+        bdrv_unfreeze_backing_chain(bs, s->bottom_bs);
+        bdrv_graph_rdunlock_main_loop();
+    }
+
+    bdrv_unref(s->bottom_bs);
 }
 
 
 static BlockDriver bdrv_copy_on_read = {
     .format_name                        = "copy-on-read",
+    .instance_size                      = sizeof(BDRVStateCOR),
 
     .bdrv_open                          = cor_open,
+    .bdrv_close                         = cor_close,
     .bdrv_child_perm                    = cor_child_perm,
 
-    .bdrv_getlength                     = cor_getlength,
+    .bdrv_co_getlength                  = cor_co_getlength,
 
-    .bdrv_co_preadv                     = cor_co_preadv,
-    .bdrv_co_pwritev                    = cor_co_pwritev,
+    .bdrv_co_preadv_part                = cor_co_preadv_part,
+    .bdrv_co_pwritev_part               = cor_co_pwritev_part,
     .bdrv_co_pwrite_zeroes              = cor_co_pwrite_zeroes,
     .bdrv_co_pdiscard                   = cor_co_pdiscard,
     .bdrv_co_pwritev_compressed         = cor_co_pwritev_compressed,
 
-    .bdrv_eject                         = cor_eject,
-    .bdrv_lock_medium                   = cor_lock_medium,
+    .bdrv_co_eject                      = cor_co_eject,
+    .bdrv_co_lock_medium                = cor_co_lock_medium,
 
-    .has_variable_length                = true,
     .is_filter                          = true,
 };
 
+
+void no_coroutine_fn bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+    BDRVStateCOR *s = cor_filter_bs->opaque;
+
+    GLOBAL_STATE_CODE();
+
+    /* unfreeze, as otherwise bdrv_replace_node() will fail */
+    if (s->chain_frozen) {
+        GRAPH_RDLOCK_GUARD_MAINLOOP();
+        s->chain_frozen = false;
+        bdrv_unfreeze_backing_chain(cor_filter_bs, s->bottom_bs);
+    }
+    bdrv_drop_filter(cor_filter_bs, &error_abort);
+    bdrv_unref(cor_filter_bs);
+}
+
+
 static void bdrv_copy_on_read_init(void)
 {
     bdrv_register(&bdrv_copy_on_read);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644 (file)
index 0000000..72f9b37
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 2018-2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *   Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef BLOCK_COPY_ON_READ_H
+#define BLOCK_COPY_ON_READ_H
+
+#include "block/block_int.h"
+
+void no_coroutine_fn GRAPH_UNLOCKED
+bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs);
+
+#endif /* BLOCK_COPY_ON_READ_H */
index 4cfb4946e65e8d3e691e4d64feb81152f737e40a..f3226682d6cb6a901d5a0d9058f0069fda8935dc 100644 (file)
  * THE SOFTWARE.
  */
 
-#ifndef BLOCK_COROUTINES_INT_H
-#define BLOCK_COROUTINES_INT_H
+#ifndef BLOCK_COROUTINES_H
+#define BLOCK_COROUTINES_H
 
 #include "block/block_int.h"
 
-int coroutine_fn bdrv_co_check(BlockDriverState *bs,
-                               BdrvCheckResult *res, BdrvCheckMode fix);
-int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
+/* For blk_bs() in generated block/block-gen.c */
+#include "sysemu/block-backend.h"
 
-int generated_co_wrapper
-bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
-            QEMUIOVector *qiov, BdrvRequestFlags flags);
-int generated_co_wrapper
-bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
-             QEMUIOVector *qiov, BdrvRequestFlags flags);
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
 
-int coroutine_fn
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
+
+int coroutine_fn GRAPH_RDLOCK
 bdrv_co_common_block_status_above(BlockDriverState *bs,
                                   BlockDriverState *base,
                                   bool include_base,
@@ -49,7 +54,27 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
                                   int64_t *map,
                                   BlockDriverState **file,
                                   int *depth);
-int generated_co_wrapper
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+int coroutine_fn GRAPH_RDLOCK
+nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking,
+                               Error **errp);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * See include/block/block-io.h for more information about
+ * the "I/O or GS" API.
+ */
+
+int co_wrapper_mixed_bdrv_rdlock
 bdrv_common_block_status_above(BlockDriverState *bs,
                                BlockDriverState *base,
                                bool include_base,
@@ -61,9 +86,7 @@ bdrv_common_block_status_above(BlockDriverState *bs,
                                BlockDriverState **file,
                                int *depth);
 
-int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
-                                       QEMUIOVector *qiov, int64_t pos);
-int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
-                                        QEMUIOVector *qiov, int64_t pos);
+int co_wrapper_mixed_bdrv_rdlock
+nbd_do_establish_connection(BlockDriverState *bs, bool blocking, Error **errp);
 
-#endif /* BLOCK_COROUTINES_INT_H */
+#endif /* BLOCK_COROUTINES_H */
index 89812669df45f7f3f2f75d7cb0590f81a97f989e..6b23a2167535e7fe7321b22c1a28686a6189492b 100644 (file)
@@ -42,6 +42,8 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp)
     BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common);
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     job_progress_set_remaining(&s->common, 1);
     ret = s->drv->bdrv_co_create(s->opts, errp);
     job_progress_update(&s->common, 1);
@@ -57,6 +59,12 @@ static const JobDriver blockdev_create_job_driver = {
     .run           = blockdev_create_run,
 };
 
+/* Checking whether the function is present doesn't require the graph lock */
+static inline bool TSA_NO_TSA has_bdrv_co_create(BlockDriver *drv)
+{
+    return drv->bdrv_co_create;
+}
+
 void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options,
                          Error **errp)
 {
@@ -77,7 +85,7 @@ void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options,
     }
 
     /* Error out if the driver doesn't support .bdrv_co_create */
-    if (!drv->bdrv_co_create) {
+    if (!has_bdrv_co_create(drv)) {
         error_setg(errp, "Driver does not support blockdev-create");
         return;
     }
index aef5a5721a929eec2b3614fab73e6549497d4b93..921933a5e5f20156aae543781ec8b0941114def8 100644 (file)
@@ -31,6 +31,7 @@
 #include "qemu/module.h"
 #include "qemu/option.h"
 #include "qemu/cutils.h"
+#include "qemu/memalign.h"
 #include "crypto.h"
 
 typedef struct BlockCrypto BlockCrypto;
@@ -54,40 +55,46 @@ static int block_crypto_probe_generic(QCryptoBlockFormat format,
 }
 
 
-static ssize_t block_crypto_read_func(QCryptoBlock *block,
-                                      size_t offset,
-                                      uint8_t *buf,
-                                      size_t buflen,
-                                      void *opaque,
-                                      Error **errp)
+static int block_crypto_read_func(QCryptoBlock *block,
+                                  size_t offset,
+                                  uint8_t *buf,
+                                  size_t buflen,
+                                  void *opaque,
+                                  Error **errp)
 {
     BlockDriverState *bs = opaque;
     ssize_t ret;
 
-    ret = bdrv_pread(bs->file, offset, buf, buflen);
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    ret = bdrv_pread(bs->file, offset, buflen, buf, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not read encryption header");
         return ret;
     }
-    return ret;
+    return 0;
 }
 
-static ssize_t block_crypto_write_func(QCryptoBlock *block,
-                                       size_t offset,
-                                       const uint8_t *buf,
-                                       size_t buflen,
-                                       void *opaque,
-                                       Error **errp)
+static int block_crypto_write_func(QCryptoBlock *block,
+                                   size_t offset,
+                                   const uint8_t *buf,
+                                   size_t buflen,
+                                   void *opaque,
+                                   Error **errp)
 {
     BlockDriverState *bs = opaque;
     ssize_t ret;
 
-    ret = bdrv_pwrite(bs->file, offset, buf, buflen);
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    ret = bdrv_pwrite(bs->file, offset, buflen, buf, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not write encryption header");
         return ret;
     }
-    return ret;
+    return 0;
 }
 
 
@@ -98,28 +105,25 @@ struct BlockCryptoCreateData {
 };
 
 
-static ssize_t block_crypto_create_write_func(QCryptoBlock *block,
-                                              size_t offset,
-                                              const uint8_t *buf,
-                                              size_t buflen,
-                                              void *opaque,
-                                              Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+block_crypto_create_write_func(QCryptoBlock *block, size_t offset,
+                               const uint8_t *buf, size_t buflen, void *opaque,
+                               Error **errp)
 {
     struct BlockCryptoCreateData *data = opaque;
     ssize_t ret;
 
-    ret = blk_pwrite(data->blk, offset, buf, buflen, 0);
+    ret = blk_pwrite(data->blk, offset, buflen, buf, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not write encryption header");
         return ret;
     }
-    return ret;
+    return 0;
 }
 
-static ssize_t block_crypto_create_init_func(QCryptoBlock *block,
-                                             size_t headerlen,
-                                             void *opaque,
-                                             Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+block_crypto_create_init_func(QCryptoBlock *block, size_t headerlen,
+                              void *opaque, Error **errp)
 {
     struct BlockCryptoCreateData *data = opaque;
     Error *local_error = NULL;
@@ -138,7 +142,7 @@ static ssize_t block_crypto_create_init_func(QCryptoBlock *block,
                        data->prealloc, 0, &local_error);
 
     if (ret >= 0) {
-        return ret;
+        return 0;
     }
 
 error:
@@ -260,22 +264,26 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
 {
     BlockCrypto *crypto = bs->opaque;
     QemuOpts *opts = NULL;
-    int ret = -EINVAL;
+    int ret;
     QCryptoBlockOpenOptions *open_opts = NULL;
     unsigned int cflags = 0;
     QDict *cryptoopts = NULL;
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    GLOBAL_STATE_CODE();
+
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     bs->supported_write_flags = BDRV_REQ_FUA &
         bs->file->bs->supported_write_flags;
 
     opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
+        ret = -EINVAL;
         goto cleanup;
     }
 
@@ -284,6 +292,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
 
     open_opts = block_crypto_open_opts_init(cryptoopts, errp);
     if (!open_opts) {
+        ret = -EINVAL;
         goto cleanup;
     }
 
@@ -312,19 +321,18 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
 }
 
 
-static int block_crypto_co_create_generic(BlockDriverState *bs,
-                                          int64_t size,
-                                          QCryptoBlockCreateOptions *opts,
-                                          PreallocMode prealloc,
-                                          Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
+                               QCryptoBlockCreateOptions *opts,
+                               PreallocMode prealloc, Error **errp)
 {
     int ret;
     BlockBackend *blk;
     QCryptoBlock *crypto = NULL;
     struct BlockCryptoCreateData data;
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto cleanup;
@@ -354,11 +362,11 @@ static int block_crypto_co_create_generic(BlockDriverState *bs,
     ret = 0;
  cleanup:
     qcrypto_block_free(crypto);
-    blk_unref(blk);
+    blk_co_unref(blk);
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
                          PreallocMode prealloc, BdrvRequestFlags flags,
                          Error **errp)
@@ -396,9 +404,9 @@ static int block_crypto_reopen_prepare(BDRVReopenState *state,
  */
 #define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024)
 
-static coroutine_fn int
-block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                       QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BlockCrypto *crypto = bs->opaque;
     uint64_t cur_bytes; /* number of bytes in current iteration */
@@ -409,7 +417,6 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
     uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
 
-    assert(!flags);
     assert(payload_offset < INT64_MAX);
     assert(QEMU_IS_ALIGNED(offset, sector_size));
     assert(QEMU_IS_ALIGNED(bytes, sector_size));
@@ -459,9 +466,9 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 }
 
 
-static coroutine_fn int
-block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                        QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BlockCrypto *crypto = bs->opaque;
     uint64_t cur_bytes; /* number of bytes in current iteration */
@@ -472,7 +479,8 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
     uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
 
-    assert(!(flags & ~BDRV_REQ_FUA));
+    flags &= ~BDRV_REQ_REGISTERED_BUF;
+
     assert(payload_offset < INT64_MAX);
     assert(QEMU_IS_ALIGNED(offset, sector_size));
     assert(QEMU_IS_ALIGNED(bytes, sector_size));
@@ -529,10 +537,11 @@ static void block_crypto_refresh_limits(BlockDriverState *bs, Error **errp)
 }
 
 
-static int64_t block_crypto_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+block_crypto_co_getlength(BlockDriverState *bs)
 {
     BlockCrypto *crypto = bs->opaque;
-    int64_t len = bdrv_getlength(bs->file->bs);
+    int64_t len = bdrv_co_getlength(bs->file->bs);
 
     uint64_t offset = qcrypto_block_get_payload_offset(crypto->block);
     assert(offset < INT64_MAX);
@@ -625,7 +634,7 @@ static int block_crypto_open_luks(BlockDriverState *bs,
                                      bs, options, flags, errp);
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_UNLOCKED
 block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
 {
     BlockdevCreateOptionsLUKS *luks_opts;
@@ -637,7 +646,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
     assert(create_options->driver == BLOCKDEV_DRIVER_LUKS);
     luks_opts = &create_options->u.luks;
 
-    bs = bdrv_open_blockdev_ref(luks_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(luks_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
@@ -659,14 +668,13 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
 
     ret = 0;
 fail:
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
     return ret;
 }
 
-static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv,
-                                                         const char *filename,
-                                                         QemuOpts *opts,
-                                                         Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename,
+                                 QemuOpts *opts, Error **errp)
 {
     QCryptoBlockCreateOptions *create_opts = NULL;
     BlockDriverState *bs = NULL;
@@ -701,13 +709,13 @@ static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv,
     }
 
     /* Create protocol layer */
-    ret = bdrv_create_file(filename, opts, errp);
+    ret = bdrv_co_create_file(filename, opts, errp);
     if (ret < 0) {
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (!bs) {
         ret = -EINVAL;
         goto fail;
@@ -725,32 +733,25 @@ fail:
      * If an error occurred, delete 'filename'. Even if the file existed
      * beforehand, it has been truncated and corrupted in the process.
      */
-    if (ret && bs) {
-        Error *local_delete_err = NULL;
-        int r_del = bdrv_co_delete_file(bs, &local_delete_err);
-        /*
-         * ENOTSUP will happen if the block driver doesn't support
-         * the 'bdrv_co_delete_file' interface. This is a predictable
-         * scenario and shouldn't be reported back to the user.
-         */
-        if ((r_del < 0) && (r_del != -ENOTSUP)) {
-            error_report_err(local_delete_err);
-        }
+    if (ret) {
+        bdrv_graph_co_rdlock();
+        bdrv_co_delete_file_noerr(bs);
+        bdrv_graph_co_rdunlock();
     }
 
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
     qapi_free_QCryptoBlockCreateOptions(create_opts);
     qobject_unref(cryptoopts);
     return ret;
 }
 
-static int block_crypto_get_info_luks(BlockDriverState *bs,
-                                      BlockDriverInfo *bdi)
+static int coroutine_fn GRAPH_RDLOCK
+block_crypto_co_get_info_luks(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     BlockDriverInfo subbdi;
     int ret;
 
-    ret = bdrv_get_info(bs->file->bs, &subbdi);
+    ret = bdrv_co_get_info(bs->file->bs, &subbdi);
     if (ret != 0) {
         return ret;
     }
@@ -786,6 +787,37 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, Error **errp)
     return spec_info;
 }
 
+static int GRAPH_RDLOCK
+block_crypto_amend_prepare(BlockDriverState *bs, Error **errp)
+{
+    BlockCrypto *crypto = bs->opaque;
+    int ret;
+
+    /* apply for exclusive read/write permissions to the underlying file */
+    crypto->updating_keys = true;
+    ret = bdrv_child_refresh_perms(bs, bs->file, errp);
+    if (ret < 0) {
+        /* Well, in this case we will not be updating any keys */
+        crypto->updating_keys = false;
+    }
+    return ret;
+}
+
+static void GRAPH_RDLOCK
+block_crypto_amend_cleanup(BlockDriverState *bs)
+{
+    BlockCrypto *crypto = bs->opaque;
+    Error *errp = NULL;
+
+    /* release exclusive read/write permissions to the underlying file */
+    crypto->updating_keys = false;
+    bdrv_child_refresh_perms(bs, bs->file, &errp);
+
+    if (errp) {
+        error_report_err(errp);
+    }
+}
+
 static int
 block_crypto_amend_options_generic_luks(BlockDriverState *bs,
                                         QCryptoBlockAmendOptions *amend_options,
@@ -793,33 +825,20 @@ block_crypto_amend_options_generic_luks(BlockDriverState *bs,
                                         Error **errp)
 {
     BlockCrypto *crypto = bs->opaque;
-    int ret;
 
     assert(crypto);
     assert(crypto->block);
 
-    /* apply for exclusive read/write permissions to the underlying file*/
-    crypto->updating_keys = true;
-    ret = bdrv_child_refresh_perms(bs, bs->file, errp);
-    if (ret) {
-        goto cleanup;
-    }
-
-    ret = qcrypto_block_amend_options(crypto->block,
-                                      block_crypto_read_func,
-                                      block_crypto_write_func,
-                                      bs,
-                                      amend_options,
-                                      force,
-                                      errp);
-cleanup:
-    /* release exclusive read/write permissions to the underlying file*/
-    crypto->updating_keys = false;
-    bdrv_child_refresh_perms(bs, bs->file, errp);
-    return ret;
+    return qcrypto_block_amend_options(crypto->block,
+                                       block_crypto_read_func,
+                                       block_crypto_write_func,
+                                       bs,
+                                       amend_options,
+                                       force,
+                                       errp);
 }
 
-static int
+static int GRAPH_RDLOCK
 block_crypto_amend_options_luks(BlockDriverState *bs,
                                 QemuOpts *opts,
                                 BlockDriverAmendStatusCB *status_cb,
@@ -842,8 +861,16 @@ block_crypto_amend_options_luks(BlockDriverState *bs,
     if (!amend_options) {
         goto cleanup;
     }
+
+    ret = block_crypto_amend_prepare(bs, errp);
+    if (ret) {
+        goto perm_cleanup;
+    }
     ret = block_crypto_amend_options_generic_luks(bs, amend_options,
                                                   force, errp);
+
+perm_cleanup:
+    block_crypto_amend_cleanup(bs);
 cleanup:
     qapi_free_QCryptoBlockAmendOptions(amend_options);
     return ret;
@@ -934,12 +961,14 @@ static BlockDriver bdrv_crypto_luks = {
     .bdrv_refresh_limits = block_crypto_refresh_limits,
     .bdrv_co_preadv     = block_crypto_co_preadv,
     .bdrv_co_pwritev    = block_crypto_co_pwritev,
-    .bdrv_getlength     = block_crypto_getlength,
+    .bdrv_co_getlength  = block_crypto_co_getlength,
     .bdrv_measure       = block_crypto_measure,
-    .bdrv_get_info      = block_crypto_get_info_luks,
+    .bdrv_co_get_info   = block_crypto_co_get_info_luks,
     .bdrv_get_specific_info = block_crypto_get_specific_info_luks,
     .bdrv_amend_options = block_crypto_amend_options_luks,
     .bdrv_co_amend      = block_crypto_co_amend_luks,
+    .bdrv_amend_pre_run = block_crypto_amend_prepare,
+    .bdrv_amend_clean   = block_crypto_amend_cleanup,
 
     .is_format          = true,
 
index 4f907c47befe460ee3be9f64134b8853ce3fd888..419f7c89ef228e146a7b4a9ee29829d389949bc5 100644 (file)
@@ -27,6 +27,7 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
 
 // #define DEBUG_VERBOSE
 
-#if LIBCURL_VERSION_NUM >= 0x071000
-/* The multi interface timer callback was introduced in 7.16.0 */
-#define NEED_CURL_TIMER_CALLBACK
-#define HAVE_SOCKET_ACTION
-#endif
-
-#ifndef HAVE_SOCKET_ACTION
-/* If curl_multi_socket_action isn't available, define it statically here in
- * terms of curl_multi_socket. Note that ev_bitmask will be ignored, which is
- * less efficient but still safe. */
-static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
-                                            curl_socket_t sockfd,
-                                            int ev_bitmask,
-                                            int *running_handles)
-{
-    return curl_multi_socket(multi_handle, sockfd, running_handles);
-}
-#define curl_multi_socket_action __curl_multi_socket_action
-#endif
-
+/* CURL 7.85.0 switches to a string based API for specifying
+ * the desired protocols.
+ */
+#if LIBCURL_VERSION_NUM >= 0x075500
+#define PROTOCOLS "HTTP,HTTPS,FTP,FTPS"
+#else
 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
                    CURLPROTO_FTP | CURLPROTO_FTPS)
+#endif
 
 #define CURL_NUM_STATES 8
 #define CURL_NUM_ACB    8
@@ -98,8 +86,7 @@ typedef struct CURLAIOCB {
 
 typedef struct CURLSocket {
     int fd;
-    struct CURLState *state;
-    QLIST_ENTRY(CURLSocket) next;
+    struct BDRVCURLState *s;
 } CURLSocket;
 
 typedef struct CURLState
@@ -107,7 +94,6 @@ typedef struct CURLState
     struct BDRVCURLState *s;
     CURLAIOCB *acb[CURL_NUM_ACB];
     CURL *curl;
-    QLIST_HEAD(, CURLSocket) sockets;
     char *orig_buf;
     uint64_t buf_start;
     size_t buf_off;
@@ -122,6 +108,7 @@ typedef struct BDRVCURLState {
     QEMUTimer timer;
     uint64_t len;
     CURLState states[CURL_NUM_STATES];
+    GHashTable *sockets; /* GINT_TO_POINTER(fd) -> socket */
     char *url;
     size_t readahead_size;
     bool sslverify;
@@ -140,7 +127,21 @@ typedef struct BDRVCURLState {
 static void curl_clean_state(CURLState *s);
 static void curl_multi_do(void *arg);
 
-#ifdef NEED_CURL_TIMER_CALLBACK
+static gboolean curl_drop_socket(void *key, void *value, void *opaque)
+{
+    CURLSocket *socket = value;
+    BDRVCURLState *s = socket->s;
+
+    aio_set_fd_handler(s->aio_context, socket->fd,
+                       NULL, NULL, NULL, NULL, NULL);
+    return true;
+}
+
+static void curl_drop_all_sockets(GHashTable *sockets)
+{
+    g_hash_table_foreach_remove(sockets, curl_drop_socket, NULL);
+}
+
 /* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
 {
@@ -156,7 +157,6 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
     }
     return 0;
 }
-#endif
 
 /* Called from curl_multi_do_locked, with s->mutex held.  */
 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
@@ -169,41 +169,37 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
     curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
     s = state->s;
 
-    QLIST_FOREACH(socket, &state->sockets, next) {
-        if (socket->fd == fd) {
-            break;
-        }
-    }
+    socket = g_hash_table_lookup(s->sockets, GINT_TO_POINTER(fd));
     if (!socket) {
         socket = g_new0(CURLSocket, 1);
         socket->fd = fd;
-        socket->state = state;
-        QLIST_INSERT_HEAD(&state->sockets, socket, next);
+        socket->s = s;
+        g_hash_table_insert(s->sockets, GINT_TO_POINTER(fd), socket);
     }
 
     trace_curl_sock_cb(action, (int)fd);
     switch (action) {
         case CURL_POLL_IN:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_do, NULL, NULL, socket);
+            aio_set_fd_handler(s->aio_context, fd,
+                               curl_multi_do, NULL, NULL, NULL, socket);
             break;
         case CURL_POLL_OUT:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, curl_multi_do, NULL, socket);
+            aio_set_fd_handler(s->aio_context, fd,
+                               NULL, curl_multi_do, NULL, NULL, socket);
             break;
         case CURL_POLL_INOUT:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_do, curl_multi_do, NULL, socket);
+            aio_set_fd_handler(s->aio_context, fd,
+                               curl_multi_do, curl_multi_do,
+                               NULL, NULL, socket);
             break;
         case CURL_POLL_REMOVE:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, NULL, NULL, NULL);
+            aio_set_fd_handler(s->aio_context, fd,
+                               NULL, NULL, NULL, NULL, NULL);
             break;
     }
 
     if (action == CURL_POLL_REMOVE) {
-        QLIST_REMOVE(socket, next);
-        g_free(socket);
+        g_hash_table_remove(s->sockets, GINT_TO_POINTER(fd));
     }
 
     return 0;
@@ -407,7 +403,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
 /* Called with s->mutex held.  */
 static void curl_multi_do_locked(CURLSocket *socket)
 {
-    BDRVCURLState *s = socket->state->s;
+    BDRVCURLState *s = socket->s;
     int running;
     int r;
 
@@ -423,7 +419,7 @@ static void curl_multi_do_locked(CURLSocket *socket)
 static void curl_multi_do(void *arg)
 {
     CURLSocket *socket = arg;
-    BDRVCURLState *s = socket->state->s;
+    BDRVCURLState *s = socket->s;
 
     qemu_mutex_lock(&s->mutex);
     curl_multi_do_locked(socket);
@@ -433,7 +429,6 @@ static void curl_multi_do(void *arg)
 
 static void curl_multi_timeout_do(void *arg)
 {
-#ifdef NEED_CURL_TIMER_CALLBACK
     BDRVCURLState *s = (BDRVCURLState *)arg;
     int running;
 
@@ -446,9 +441,6 @@ static void curl_multi_timeout_do(void *arg)
 
     curl_multi_check_completion(s);
     qemu_mutex_unlock(&s->mutex);
-#else
-    abort();
-#endif
 }
 
 /* Called with s->mutex held.  */
@@ -474,60 +466,90 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state)
         if (!state->curl) {
             return -EIO;
         }
-        curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
-        curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
-                         (long) s->sslverify);
-        curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYHOST,
-                         s->sslverify ? 2L : 0L);
+        if (curl_easy_setopt(state->curl, CURLOPT_URL, s->url) ||
+            curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
+                             (long) s->sslverify) ||
+            curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYHOST,
+                             s->sslverify ? 2L : 0L)) {
+            goto err;
+        }
         if (s->cookie) {
-            curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie);
+            if (curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie)) {
+                goto err;
+            }
+        }
+        if (curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout) ||
+            curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
+                             (void *)curl_read_cb) ||
+            curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state) ||
+            curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state) ||
+            curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1) ||
+            curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1) ||
+            curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1) ||
+            curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg) ||
+            curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1)) {
+            goto err;
         }
-        curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout);
-        curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
-                         (void *)curl_read_cb);
-        curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
-        curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state);
-        curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1);
-        curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1);
-        curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1);
-        curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
-        curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);
-
         if (s->username) {
-            curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username);
+            if (curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username)) {
+                goto err;
+            }
         }
         if (s->password) {
-            curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password);
+            if (curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password)) {
+                goto err;
+            }
         }
         if (s->proxyusername) {
-            curl_easy_setopt(state->curl,
-                             CURLOPT_PROXYUSERNAME, s->proxyusername);
+            if (curl_easy_setopt(state->curl,
+                                 CURLOPT_PROXYUSERNAME, s->proxyusername)) {
+                goto err;
+            }
         }
         if (s->proxypassword) {
-            curl_easy_setopt(state->curl,
-                             CURLOPT_PROXYPASSWORD, s->proxypassword);
+            if (curl_easy_setopt(state->curl,
+                                 CURLOPT_PROXYPASSWORD, s->proxypassword)) {
+                goto err;
+            }
         }
 
         /* Restrict supported protocols to avoid security issues in the more
          * obscure protocols.  For example, do not allow POP3/SMTP/IMAP see
          * CVE-2013-0249.
          *
-         * Restricting protocols is only supported from 7.19.4 upwards.
+         * Restricting protocols is only supported from 7.19.4 upwards. Note:
+         * version 7.85.0 deprecates CURLOPT_*PROTOCOLS in favour of a string
+         * based CURLOPT_*PROTOCOLS_STR API.
          */
-#if LIBCURL_VERSION_NUM >= 0x071304
-        curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS);
-        curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS);
+#if LIBCURL_VERSION_NUM >= 0x075500
+        if (curl_easy_setopt(state->curl,
+                             CURLOPT_PROTOCOLS_STR, PROTOCOLS) ||
+            curl_easy_setopt(state->curl,
+                             CURLOPT_REDIR_PROTOCOLS_STR, PROTOCOLS)) {
+            goto err;
+        }
+#elif LIBCURL_VERSION_NUM >= 0x071304
+        if (curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS) ||
+            curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS)) {
+            goto err;
+        }
 #endif
 
 #ifdef DEBUG_VERBOSE
-        curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1);
+        if (curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1)) {
+            goto err;
+        }
 #endif
     }
 
-    QLIST_INIT(&state->sockets);
     state->s = s;
 
     return 0;
+
+err:
+    curl_easy_cleanup(state->curl);
+    state->curl = NULL;
+    return -EIO;
 }
 
 /* Called with s->mutex held.  */
@@ -541,13 +563,6 @@ static void curl_clean_state(CURLState *s)
     if (s->s->multi)
         curl_multi_remove_handle(s->s->multi, s->curl);
 
-    while (!QLIST_EMPTY(&s->sockets)) {
-        CURLSocket *socket = QLIST_FIRST(&s->sockets);
-
-        QLIST_REMOVE(socket, next);
-        g_free(socket);
-    }
-
     s->in_use = 0;
 
     qemu_co_enter_next(&s->s->free_state_waitq, &s->s->mutex);
@@ -564,23 +579,24 @@ static void curl_detach_aio_context(BlockDriverState *bs)
     BDRVCURLState *s = bs->opaque;
     int i;
 
-    qemu_mutex_lock(&s->mutex);
-    for (i = 0; i < CURL_NUM_STATES; i++) {
-        if (s->states[i].in_use) {
-            curl_clean_state(&s->states[i]);
+    WITH_QEMU_LOCK_GUARD(&s->mutex) {
+        curl_drop_all_sockets(s->sockets);
+        for (i = 0; i < CURL_NUM_STATES; i++) {
+            if (s->states[i].in_use) {
+                curl_clean_state(&s->states[i]);
+            }
+            if (s->states[i].curl) {
+                curl_easy_cleanup(s->states[i].curl);
+                s->states[i].curl = NULL;
+            }
+            g_free(s->states[i].orig_buf);
+            s->states[i].orig_buf = NULL;
         }
-        if (s->states[i].curl) {
-            curl_easy_cleanup(s->states[i].curl);
-            s->states[i].curl = NULL;
+        if (s->multi) {
+            curl_multi_cleanup(s->multi);
+            s->multi = NULL;
         }
-        g_free(s->states[i].orig_buf);
-        s->states[i].orig_buf = NULL;
     }
-    if (s->multi) {
-        curl_multi_cleanup(s->multi);
-        s->multi = NULL;
-    }
-    qemu_mutex_unlock(&s->mutex);
 
     timer_del(&s->timer);
 }
@@ -598,10 +614,8 @@ static void curl_attach_aio_context(BlockDriverState *bs,
     s->multi = curl_multi_init();
     s->aio_context = new_context;
     curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
-#ifdef NEED_CURL_TIMER_CALLBACK
     curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
     curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
-#endif
 }
 
 static QemuOptsList runtime_opts = {
@@ -672,13 +686,20 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     const char *file;
     const char *cookie;
     const char *cookie_secret;
-    double d;
+    /* CURL >= 7.55.0 uses curl_off_t for content length instead of a double */
+#if LIBCURL_VERSION_NUM >= 0x073700
+    curl_off_t cl;
+#else
+    double cl;
+#endif
     const char *secretid;
     const char *protocol_delimiter;
     int ret;
 
+    bdrv_graph_rdlock_main_loop();
     ret = bdrv_apply_auto_read_only(bs, "curl driver does not support writes",
                                     errp);
+    bdrv_graph_rdunlock_main_loop();
     if (ret < 0) {
         return ret;
     }
@@ -773,6 +794,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     qemu_co_queue_init(&s->free_state_waitq);
     s->aio_context = bdrv_get_aio_context(bs);
     s->url = g_strdup(file);
+    s->sockets = g_hash_table_new_full(NULL, NULL, NULL, g_free);
     qemu_mutex_lock(&s->mutex);
     state = curl_find_state(s);
     qemu_mutex_unlock(&s->mutex);
@@ -783,37 +805,51 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     // Get file size
 
     if (curl_init_state(s, state) < 0) {
+        pstrcpy(state->errmsg, CURL_ERROR_SIZE,
+                "curl library initialization failed.");
         goto out;
     }
 
     s->accept_range = false;
-    curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1);
-    curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION,
-                     curl_header_cb);
-    curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s);
+    if (curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1) ||
+        curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION, curl_header_cb) ||
+        curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s)) {
+        pstrcpy(state->errmsg, CURL_ERROR_SIZE,
+                "curl library initialization failed.");
+        goto out;
+    }
     if (curl_easy_perform(state->curl))
         goto out;
-    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d)) {
+    /* CURL 7.55.0 deprecates CURLINFO_CONTENT_LENGTH_DOWNLOAD in favour of
+     * the *_T version which returns a more sensible type for content length.
+     */
+#if LIBCURL_VERSION_NUM >= 0x073700
+    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &cl)) {
+        goto out;
+    }
+#else
+    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl)) {
         goto out;
     }
+#endif
     /* Prior CURL 7.19.4 return value of 0 could mean that the file size is not
      * know or the size is zero. From 7.19.4 CURL returns -1 if size is not
      * known and zero if it is really zero-length file. */
 #if LIBCURL_VERSION_NUM >= 0x071304
-    if (d < 0) {
+    if (cl < 0) {
         pstrcpy(state->errmsg, CURL_ERROR_SIZE,
                 "Server didn't report file size.");
         goto out;
     }
 #else
-    if (d <= 0) {
+    if (cl <= 0) {
         pstrcpy(state->errmsg, CURL_ERROR_SIZE,
                 "Unknown file size or zero-length file.");
         goto out;
     }
 #endif
 
-    s->len = d;
+    s->len = cl;
 
     if ((!strncasecmp(s->url, "http://", strlen("http://"))
         || !strncasecmp(s->url, "https://", strlen("https://")))
@@ -846,11 +882,15 @@ out_noclean:
     g_free(s->username);
     g_free(s->proxyusername);
     g_free(s->proxypassword);
+    if (s->sockets) {
+        curl_drop_all_sockets(s->sockets);
+        g_hash_table_destroy(s->sockets);
+    }
     qemu_opts_del(opts);
     return -EINVAL;
 }
 
-static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
+static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
 {
     CURLState *state;
     int running;
@@ -901,9 +941,8 @@ static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
 
     snprintf(state->range, 127, "%" PRIu64 "-%" PRIu64, start, end);
     trace_curl_setup_preadv(acb->bytes, start, state->range);
-    curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);
-
-    if (curl_multi_add_handle(s->multi, state->curl) != CURLM_OK) {
+    if (curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range) ||
+        curl_multi_add_handle(s->multi, state->curl) != CURLM_OK) {
         state->acb[0] = NULL;
         acb->ret = -EIO;
 
@@ -919,7 +958,8 @@ out:
 }
 
 static int coroutine_fn curl_co_preadv(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        BdrvRequestFlags flags)
 {
     CURLAIOCB acb = {
         .co = qemu_coroutine_self(),
@@ -944,6 +984,7 @@ static void curl_close(BlockDriverState *bs)
     curl_detach_aio_context(bs);
     qemu_mutex_destroy(&s->mutex);
 
+    g_hash_table_destroy(s->sockets);
     g_free(s->cookie);
     g_free(s->url);
     g_free(s->username);
@@ -951,7 +992,7 @@ static void curl_close(BlockDriverState *bs)
     g_free(s->proxypassword);
 }
 
-static int64_t curl_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn curl_co_getlength(BlockDriverState *bs)
 {
     BDRVCURLState *s = bs->opaque;
     return s->len;
@@ -995,7 +1036,7 @@ static BlockDriver bdrv_http = {
     .bdrv_parse_filename        = curl_parse_filename,
     .bdrv_file_open             = curl_open,
     .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
+    .bdrv_co_getlength          = curl_co_getlength,
 
     .bdrv_co_preadv             = curl_co_preadv,
 
@@ -1014,7 +1055,7 @@ static BlockDriver bdrv_https = {
     .bdrv_parse_filename        = curl_parse_filename,
     .bdrv_file_open             = curl_open,
     .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
+    .bdrv_co_getlength          = curl_co_getlength,
 
     .bdrv_co_preadv             = curl_co_preadv,
 
@@ -1033,7 +1074,7 @@ static BlockDriver bdrv_ftp = {
     .bdrv_parse_filename        = curl_parse_filename,
     .bdrv_file_open             = curl_open,
     .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
+    .bdrv_co_getlength          = curl_co_getlength,
 
     .bdrv_co_preadv             = curl_co_preadv,
 
@@ -1052,7 +1093,7 @@ static BlockDriver bdrv_ftps = {
     .bdrv_parse_filename        = curl_parse_filename,
     .bdrv_file_open             = curl_open,
     .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
+    .bdrv_co_getlength          = curl_co_getlength,
 
     .bdrv_co_preadv             = curl_co_preadv,
 
index c01319b188c3c37e380092ca7c5a5b69f8ebc1b3..13a1979755d8a52b2369a6f3bdd1145a7bdeae4f 100644 (file)
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "trace.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/dirty-bitmap.h"
 #include "qemu/main-loop.h"
 
 struct BdrvDirtyBitmap {
@@ -166,43 +168,6 @@ bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
     return !bitmap->disabled;
 }
 
-/**
- * bdrv_dirty_bitmap_status: This API is now deprecated.
- * Called with BQL taken.
- *
- * A BdrvDirtyBitmap can be in four possible user-visible states:
- * (1) Active:   successor is NULL, and disabled is false: full r/w mode
- * (2) Disabled: successor is NULL, and disabled is true: qualified r/w mode,
- *               guest writes are dropped, but monitor writes are possible,
- *               through commands like merge and clear.
- * (3) Frozen:   successor is not NULL.
- *               A frozen bitmap cannot be renamed, deleted, cleared, set,
- *               enabled, merged to, etc. A frozen bitmap can only abdicate()
- *               or reclaim().
- *               In this state, the anonymous successor bitmap may be either
- *               Active and recording writes from the guest (e.g. backup jobs),
- *               or it can be Disabled and not recording writes.
- * (4) Locked:   Whether Active or Disabled, the user cannot modify this bitmap
- *               in any way from the monitor.
- * (5) Inconsistent: This is a persistent bitmap whose "in use" bit is set, and
- *                   is unusable by QEMU. It can be deleted to remove it from
- *                   the qcow2.
- */
-DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
-{
-    if (bdrv_dirty_bitmap_inconsistent(bitmap)) {
-        return DIRTY_BITMAP_STATUS_INCONSISTENT;
-    } else if (bdrv_dirty_bitmap_has_successor(bitmap)) {
-        return DIRTY_BITMAP_STATUS_FROZEN;
-    } else if (bdrv_dirty_bitmap_busy(bitmap)) {
-        return DIRTY_BITMAP_STATUS_LOCKED;
-    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
-        return DIRTY_BITMAP_STATUS_DISABLED;
-    } else {
-        return DIRTY_BITMAP_STATUS_ACTIVE;
-    }
-}
-
 /* Called with BQL taken.  */
 static bool bdrv_dirty_bitmap_recording(BdrvDirtyBitmap *bitmap)
 {
@@ -230,7 +195,7 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags,
         error_setg(errp, "Bitmap '%s' is inconsistent and cannot be used",
                    bitmap->name);
         error_append_hint(errp, "Try block-dirty-bitmap-remove to delete"
-                          " this bitmap from disk");
+                          " this bitmap from disk\n");
         return -1;
     }
 
@@ -346,10 +311,7 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *parent,
         return NULL;
     }
 
-    if (!hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap)) {
-        error_setg(errp, "Merging of parent and successor bitmap failed");
-        return NULL;
-    }
+    hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap);
 
     parent->disabled = successor->disabled;
     parent->busy = false;
@@ -428,10 +390,11 @@ void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
  * not fail.
  * This function doesn't release corresponding BdrvDirtyBitmap.
  */
-static int coroutine_fn
+int coroutine_fn
 bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
                                        Error **errp)
 {
+    assert_bdrv_graph_readable();
     if (bs->drv && bs->drv->bdrv_co_remove_persistent_dirty_bitmap) {
         return bs->drv->bdrv_co_remove_persistent_dirty_bitmap(bs, name, errp);
     }
@@ -439,45 +402,6 @@ bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
     return 0;
 }
 
-typedef struct BdrvRemovePersistentDirtyBitmapCo {
-    BlockDriverState *bs;
-    const char *name;
-    Error **errp;
-    int ret;
-} BdrvRemovePersistentDirtyBitmapCo;
-
-static void coroutine_fn
-bdrv_co_remove_persistent_dirty_bitmap_entry(void *opaque)
-{
-    BdrvRemovePersistentDirtyBitmapCo *s = opaque;
-
-    s->ret = bdrv_co_remove_persistent_dirty_bitmap(s->bs, s->name, s->errp);
-    aio_wait_kick();
-}
-
-int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
-                                        Error **errp)
-{
-    if (qemu_in_coroutine()) {
-        return bdrv_co_remove_persistent_dirty_bitmap(bs, name, errp);
-    } else {
-        Coroutine *co;
-        BdrvRemovePersistentDirtyBitmapCo s = {
-            .bs = bs,
-            .name = name,
-            .errp = errp,
-            .ret = -EINPROGRESS,
-        };
-
-        co = qemu_coroutine_create(bdrv_co_remove_persistent_dirty_bitmap_entry,
-                                   &s);
-        bdrv_coroutine_enter(bs, co);
-        BDRV_POLL_WHILE(bs, s.ret == -EINPROGRESS);
-
-        return s.ret;
-    }
-}
-
 bool
 bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs)
 {
@@ -487,11 +411,12 @@ bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs)
     return false;
 }
 
-static bool coroutine_fn
+bool coroutine_fn
 bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
                                    uint32_t granularity, Error **errp)
 {
     BlockDriver *drv = bs->drv;
+    assert_bdrv_graph_readable();
 
     if (!drv) {
         error_setg_errno(errp, ENOMEDIUM,
@@ -510,50 +435,6 @@ bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
     return drv->bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp);
 }
 
-typedef struct BdrvCanStoreNewDirtyBitmapCo {
-    BlockDriverState *bs;
-    const char *name;
-    uint32_t granularity;
-    Error **errp;
-    bool ret;
-
-    bool in_progress;
-} BdrvCanStoreNewDirtyBitmapCo;
-
-static void coroutine_fn bdrv_co_can_store_new_dirty_bitmap_entry(void *opaque)
-{
-    BdrvCanStoreNewDirtyBitmapCo *s = opaque;
-
-    s->ret = bdrv_co_can_store_new_dirty_bitmap(s->bs, s->name, s->granularity,
-                                                s->errp);
-    s->in_progress = false;
-    aio_wait_kick();
-}
-
-bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
-                                     uint32_t granularity, Error **errp)
-{
-    if (qemu_in_coroutine()) {
-        return bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp);
-    } else {
-        Coroutine *co;
-        BdrvCanStoreNewDirtyBitmapCo s = {
-            .bs = bs,
-            .name = name,
-            .granularity = granularity,
-            .errp = errp,
-            .in_progress = true,
-        };
-
-        co = qemu_coroutine_create(bdrv_co_can_store_new_dirty_bitmap_entry,
-                                   &s);
-        bdrv_coroutine_enter(bs, co);
-        BDRV_POLL_WHILE(bs, s.in_progress);
-
-        return s.ret;
-    }
-}
-
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
     bdrv_dirty_bitmaps_lock(bitmap->bs);
@@ -572,25 +453,21 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
 {
     BdrvDirtyBitmap *bm;
     BlockDirtyInfoList *list = NULL;
-    BlockDirtyInfoList **plist = &list;
+    BlockDirtyInfoList **tail = &list;
 
     bdrv_dirty_bitmaps_lock(bs);
     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
-        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
+
         info->count = bdrv_get_dirty_count(bm);
         info->granularity = bdrv_dirty_bitmap_granularity(bm);
-        info->has_name = !!bm->name;
         info->name = g_strdup(bm->name);
-        info->status = bdrv_dirty_bitmap_status(bm);
         info->recording = bdrv_dirty_bitmap_recording(bm);
         info->busy = bdrv_dirty_bitmap_busy(bm);
         info->persistent = bm->persistent;
         info->has_inconsistent = bm->inconsistent;
         info->inconsistent = bm->inconsistent;
-        entry->value = info;
-        *plist = entry;
-        plist = &entry->next;
+        QAPI_LIST_APPEND(tail, info);
     }
     bdrv_dirty_bitmaps_unlock(bs);
 
@@ -696,6 +573,7 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
 
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
 {
+    IO_CODE();
     assert(!bdrv_dirty_bitmap_readonly(bitmap));
     bdrv_dirty_bitmaps_lock(bitmap->bs);
     if (!out) {
@@ -713,6 +591,7 @@ void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup)
 {
     HBitmap *tmp = bitmap->bitmap;
     assert(!bdrv_dirty_bitmap_readonly(bitmap));
+    GLOBAL_STATE_CODE();
     bitmap->bitmap = backup;
     hbitmap_free(tmp);
 }
@@ -728,6 +607,19 @@ uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap)
     return hbitmap_serialization_align(bitmap->bitmap);
 }
 
+/* Return the disk size covered by a chunk of serialized bitmap data. */
+uint64_t bdrv_dirty_bitmap_serialization_coverage(int serialized_chunk_size,
+                                                  const BdrvDirtyBitmap *bitmap)
+{
+    uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap);
+    uint64_t limit = granularity * (serialized_chunk_size << 3);
+
+    assert(QEMU_IS_ALIGNED(limit,
+                           bdrv_dirty_bitmap_serialization_align(bitmap)));
+    return limit;
+}
+
+
 void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
                                       uint8_t *buf, uint64_t offset,
                                       uint64_t bytes)
@@ -764,6 +656,7 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
 void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     BdrvDirtyBitmap *bitmap;
+    IO_CODE();
 
     if (QLIST_EMPTY(&bs->dirty_bitmaps)) {
         return;
@@ -902,16 +795,25 @@ bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap,
                                    dirty_start, dirty_count);
 }
 
+bool bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap, int64_t offset,
+                              int64_t bytes, int64_t *count)
+{
+    return hbitmap_status(bitmap->bitmap, offset, bytes, count);
+}
+
 /**
  * bdrv_merge_dirty_bitmap: merge src into dest.
  * Ensures permissions on bitmaps are reasonable; use for public API.
  *
  * @backup: If provided, make a copy of dest here prior to merge.
+ *
+ * Returns true on success, false on failure. In case of failure bitmaps are
+ * untouched.
  */
-void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
+bool bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
                              HBitmap **backup, Error **errp)
 {
-    bool ret;
+    bool ret = false;
 
     bdrv_dirty_bitmaps_lock(dest->bs);
     if (src->bs != dest->bs) {
@@ -926,35 +828,39 @@ void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
         goto out;
     }
 
-    if (!hbitmap_can_merge(dest->bitmap, src->bitmap)) {
-        error_setg(errp, "Bitmaps are incompatible and can't be merged");
+    if (bdrv_dirty_bitmap_size(src) != bdrv_dirty_bitmap_size(dest)) {
+        error_setg(errp, "Bitmaps are of different sizes (destination size is %"
+                   PRId64 ", source size is %" PRId64 ") and can't be merged",
+                   bdrv_dirty_bitmap_size(dest), bdrv_dirty_bitmap_size(src));
         goto out;
     }
 
-    ret = bdrv_dirty_bitmap_merge_internal(dest, src, backup, false);
-    assert(ret);
+    bdrv_dirty_bitmap_merge_internal(dest, src, backup, false);
+    ret = true;
 
 out:
     bdrv_dirty_bitmaps_unlock(dest->bs);
     if (src->bs != dest->bs) {
         bdrv_dirty_bitmaps_unlock(src->bs);
     }
+
+    return ret;
 }
 
 /**
  * bdrv_dirty_bitmap_merge_internal: merge src into dest.
  * Does NOT check bitmap permissions; not suitable for use as public API.
+ * @dest, @src and @backup (if not NULL) must have same size.
  *
  * @backup: If provided, make a copy of dest here prior to merge.
  * @lock: If true, lock and unlock bitmaps on the way in/out.
- * returns true if the merge succeeded; false if unattempted.
  */
-bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
+void bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
                                       const BdrvDirtyBitmap *src,
                                       HBitmap **backup,
                                       bool lock)
 {
-    bool ret;
+    IO_CODE();
 
     assert(!bdrv_dirty_bitmap_readonly(dest));
     assert(!bdrv_dirty_bitmap_inconsistent(dest));
@@ -970,9 +876,9 @@ bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
     if (backup) {
         *backup = dest->bitmap;
         dest->bitmap = hbitmap_alloc(dest->size, hbitmap_granularity(*backup));
-        ret = hbitmap_merge(*backup, src->bitmap, dest->bitmap);
+        hbitmap_merge(*backup, src->bitmap, dest->bitmap);
     } else {
-        ret = hbitmap_merge(dest->bitmap, src->bitmap, dest->bitmap);
+        hbitmap_merge(dest->bitmap, src->bitmap, dest->bitmap);
     }
 
     if (lock) {
@@ -981,6 +887,4 @@ bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
             bdrv_dirty_bitmaps_unlock(src->bs);
         }
     }
-
-    return ret;
 }
index 6798cf4fbfffe2b05b17d1e452edd3e434a87df5..4ea0b9b20da9c89900e43016b1b3a1a18f3448ec 100644 (file)
  */
 #include "qemu/osdep.h"
 #include "dmg.h"
+
+/* Work around a -Wstrict-prototypes warning in LZFSE headers */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
 #include <lzfse.h>
+#pragma GCC diagnostic pop
 
 static int dmg_uncompress_lzfse_do(char *next_in, unsigned int avail_in,
                                    char *next_out, unsigned int avail_out)
index ef35a505f2662049fb2b32007fd6dcb313ddff9a..33dcb3a34981a0414a1070d3ac4362d264ee519a 100644 (file)
  */
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qemu/bswap.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
+#include "qemu/memalign.h"
 #include "dmg.h"
 
-int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
-                          char *next_out, unsigned int avail_out);
-
-int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
-                            char *next_out, unsigned int avail_out);
+BdrvDmgUncompressFunc *dmg_uncompress_bz2;
+BdrvDmgUncompressFunc *dmg_uncompress_lzfse;
 
 enum {
     /* Limit chunk sizes to prevent unreasonable amounts of memory being used
@@ -71,12 +70,13 @@ static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
     return 0;
 }
 
-static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
+static int GRAPH_RDLOCK
+read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
 {
     uint64_t buffer;
     int ret;
 
-    ret = bdrv_pread(bs->file, offset, &buffer, 8);
+    ret = bdrv_pread(bs->file, offset, 8, &buffer, 0);
     if (ret < 0) {
         return ret;
     }
@@ -85,12 +85,13 @@ static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
     return 0;
 }
 
-static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
+static int GRAPH_RDLOCK
+read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
 {
     uint32_t buffer;
     int ret;
 
-    ret = bdrv_pread(bs->file, offset, &buffer, 4);
+    ret = bdrv_pread(bs->file, offset, 4, &buffer, 0);
     if (ret < 0) {
         return ret;
     }
@@ -171,7 +172,7 @@ static int64_t dmg_find_koly_offset(BdrvChild *file, Error **errp)
         offset = length - 511 - 512;
     }
     length = length < 515 ? length : 515;
-    ret = bdrv_pread(file, offset, buffer, length);
+    ret = bdrv_pread(file, offset, length, buffer, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Failed while reading UDIF trailer");
         return ret;
@@ -253,6 +254,25 @@ static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
     for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
         s->types[i] = buff_read_uint32(buffer, offset);
         if (!dmg_is_known_block_type(s->types[i])) {
+            switch (s->types[i]) {
+            case UDBZ:
+                warn_report_once("dmg-bzip2 module is missing, accessing bzip2 "
+                                 "compressed blocks will result in I/O errors");
+                break;
+            case ULFO:
+                warn_report_once("dmg-lzfse module is missing, accessing lzfse "
+                                 "compressed blocks will result in I/O errors");
+                break;
+            case UDCM:
+            case UDLE:
+                /* Comments and last entry can be ignored without problems */
+                break;
+            default:
+                warn_report_once("Image contains chunks of unknown type %x, "
+                                 "accessing them will result in I/O errors",
+                                 s->types[i]);
+                break;
+            }
             chunk_count--;
             i--;
             offset += 40;
@@ -303,8 +323,9 @@ fail:
     return ret;
 }
 
-static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
-                                  uint64_t info_begin, uint64_t info_length)
+static int GRAPH_RDLOCK
+dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
+                       uint64_t info_begin, uint64_t info_length)
 {
     BDRVDMGState *s = bs->opaque;
     int ret;
@@ -351,7 +372,7 @@ static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
         offset += 4;
 
         buffer = g_realloc(buffer, count);
-        ret = bdrv_pread(bs->file, offset, buffer, count);
+        ret = bdrv_pread(bs->file, offset, count, buffer, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -370,8 +391,9 @@ fail:
     return ret;
 }
 
-static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
-                              uint64_t info_begin, uint64_t info_length)
+static int GRAPH_RDLOCK
+dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
+                   uint64_t info_begin, uint64_t info_length)
 {
     BDRVDMGState *s = bs->opaque;
     int ret;
@@ -388,8 +410,8 @@ static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
 
     buffer = g_malloc(info_length + 1);
     buffer[info_length] = '\0';
-    ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
-    if (ret != info_length) {
+    ret = bdrv_pread(bs->file, info_begin, info_length, buffer, 0);
+    if (ret < 0) {
         ret = -EINVAL;
         goto fail;
     }
@@ -434,19 +456,33 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
     int64_t offset;
     int ret;
 
+    GLOBAL_STATE_CODE();
+
+    bdrv_graph_rdlock_main_loop();
     ret = bdrv_apply_auto_read_only(bs, NULL, errp);
+    bdrv_graph_rdunlock_main_loop();
     if (ret < 0) {
         return ret;
     }
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
-    block_module_load_one("dmg-bz2");
-    block_module_load_one("dmg-lzfse");
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    /*
+     * NB: if uncompress submodules are absent,
+     * ie block_module_load return value == 0, the function pointers
+     * dmg_uncompress_bz2 and dmg_uncompress_lzfse will be NULL.
+     */
+    if (block_module_load("dmg-bz2", errp) < 0) {
+        return -EINVAL;
+    }
+    if (block_module_load("dmg-lzfse", errp) < 0) {
+        return -EINVAL;
+    }
 
     s->n_chunks = 0;
     s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
@@ -591,7 +627,8 @@ err:
     return s->n_chunks; /* error */
 }
 
-static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
+static int coroutine_fn GRAPH_RDLOCK
+dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
 {
     BDRVDMGState *s = bs->opaque;
 
@@ -608,9 +645,9 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
         case UDZO: { /* zlib compressed */
             /* we need to buffer, because only the chunk as whole can be
              * inflated. */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
-                             s->compressed_chunk, s->lengths[chunk]);
-            if (ret != s->lengths[chunk]) {
+            ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
+                                s->compressed_chunk, 0);
+            if (ret < 0) {
                 return -1;
             }
 
@@ -634,9 +671,9 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
             }
             /* we need to buffer, because only the chunk as whole can be
              * inflated. */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
-                             s->compressed_chunk, s->lengths[chunk]);
-            if (ret != s->lengths[chunk]) {
+            ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
+                                s->compressed_chunk, 0);
+            if (ret < 0) {
                 return -1;
             }
 
@@ -655,9 +692,9 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
             }
             /* we need to buffer, because only the chunk as whole can be
              * inflated. */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
-                             s->compressed_chunk, s->lengths[chunk]);
-            if (ret != s->lengths[chunk]) {
+            ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
+                                s->compressed_chunk, 0);
+            if (ret < 0) {
                 return -1;
             }
 
@@ -671,9 +708,9 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
             }
             break;
         case UDRW: /* copy */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
-                             s->uncompressed_chunk, s->lengths[chunk]);
-            if (ret != s->lengths[chunk]) {
+            ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
+                                s->uncompressed_chunk, 0);
+            if (ret < 0) {
                 return -1;
             }
             break;
@@ -688,9 +725,9 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
     return 0;
 }
 
-static int coroutine_fn
-dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-              QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+dmg_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVDMGState *s = bs->opaque;
     uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
index e488601b626d4244a06f6edc1c8a529470453074..dcd6165e6397f95ed661aacd5b9fe2ed7e8e472e 100644 (file)
@@ -51,10 +51,10 @@ typedef struct BDRVDMGState {
     z_stream zstream;
 } BDRVDMGState;
 
-extern int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
-                                 char *next_out, unsigned int avail_out);
+typedef int BdrvDmgUncompressFunc(char *next_in, unsigned int avail_in,
+                                  char *next_out, unsigned int avail_out);
 
-extern int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
-                                   char *next_out, unsigned int avail_out);
+extern BdrvDmgUncompressFunc *dmg_uncompress_bz2;
+extern BdrvDmgUncompressFunc *dmg_uncompress_lzfse;
 
 #endif
index bad6f21b1c15ac849cf4f3f82a5bca56228e7573..a8f274e5268934c5ff2b0e8b3ae5242a5f8fc904 100644 (file)
@@ -17,6 +17,7 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/iothread.h"
 #include "block/export.h"
+#include "block/fuse.h"
 #include "block/nbd.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-block-export.h"
 #ifdef CONFIG_VHOST_USER_BLK_SERVER
 #include "vhost-user-blk-server.h"
 #endif
+#ifdef CONFIG_VDUSE_BLK_EXPORT
+#include "vduse-blk.h"
+#endif
 
 static const BlockExportDriver *blk_exp_drivers[] = {
     &blk_exp_nbd,
 #ifdef CONFIG_VHOST_USER_BLK_SERVER
     &blk_exp_vhost_user_blk,
 #endif
+#ifdef CONFIG_FUSE
+    &blk_exp_fuse,
+#endif
+#ifdef CONFIG_VDUSE_BLK_EXPORT
+    &blk_exp_vduse_blk,
+#endif
 };
 
 /* Only accessed from the main thread */
@@ -73,6 +83,8 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
     uint64_t perm;
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     if (!id_wellformed(export->id)) {
         error_setg(errp, "Invalid block export id");
         return NULL;
@@ -104,9 +116,10 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
     ctx = bdrv_get_aio_context(bs);
     aio_context_acquire(ctx);
 
-    if (export->has_iothread) {
+    if (export->iothread) {
         IOThread *iothread;
         AioContext *new_ctx;
+        Error **set_context_errp;
 
         iothread = iothread_by_id(export->iothread);
         if (!iothread) {
@@ -116,7 +129,9 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
 
         new_ctx = iothread_get_aio_context(iothread);
 
-        ret = bdrv_try_set_aio_context(bs, new_ctx, errp);
+        /* Ignore errors with fixed-iothread=false */
+        set_context_errp = fixed_iothread ? errp : NULL;
+        ret = bdrv_try_change_aio_context(bs, new_ctx, NULL, set_context_errp);
         if (ret == 0) {
             aio_context_release(ctx);
             aio_context_acquire(new_ctx);
@@ -132,7 +147,9 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
      * access since the export could be available before migration handover.
      * ctx was acquired in the caller.
      */
-    bdrv_invalidate_cache(bs, NULL);
+    bdrv_graph_rdlock_main_loop();
+    bdrv_activate(bs, NULL);
+    bdrv_graph_rdunlock_main_loop();
 
     perm = BLK_PERM_CONSISTENT_READ;
     if (export->writable) {
@@ -179,7 +196,10 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
     return exp;
 
 fail:
-    blk_unref(blk);
+    if (blk) {
+        blk_set_dev_ops(blk, NULL, NULL);
+        blk_unref(blk);
+    }
     aio_context_release(ctx);
     if (exp) {
         g_free(exp->id);
@@ -188,11 +208,10 @@ fail:
     return NULL;
 }
 
-/* Callers must hold exp->ctx lock */
 void blk_exp_ref(BlockExport *exp)
 {
-    assert(exp->refcount > 0);
-    exp->refcount++;
+    assert(qatomic_read(&exp->refcount) > 0);
+    qatomic_inc(&exp->refcount);
 }
 
 /* Runs in the main thread */
@@ -206,6 +225,7 @@ static void blk_exp_delete_bh(void *opaque)
     assert(exp->refcount == 0);
     QLIST_REMOVE(exp, next);
     exp->drv->delete(exp);
+    blk_set_dev_ops(exp->blk, NULL, NULL);
     blk_unref(exp->blk);
     qapi_event_send_block_export_deleted(exp->id);
     g_free(exp->id);
@@ -214,11 +234,10 @@ static void blk_exp_delete_bh(void *opaque)
     aio_context_release(aio_context);
 }
 
-/* Callers must hold exp->ctx lock */
 void blk_exp_unref(BlockExport *exp)
 {
-    assert(exp->refcount > 0);
-    if (--exp->refcount == 0) {
+    assert(qatomic_read(&exp->refcount) > 0);
+    if (qatomic_fetch_dec(&exp->refcount) == 1) {
         /* Touch the block_exports list only in the main thread */
         aio_bh_schedule_oneshot(qemu_get_aio_context(), blk_exp_delete_bh,
                                 exp);
@@ -293,7 +312,7 @@ void blk_exp_close_all_type(BlockExportType type)
         blk_exp_request_shutdown(exp);
     }
 
-    AIO_WAIT_WHILE(NULL, blk_exp_has_type(type));
+    AIO_WAIT_WHILE_UNLOCKED(NULL, blk_exp_has_type(type));
 }
 
 void blk_exp_close_all(void)
@@ -326,7 +345,8 @@ void qmp_block_export_del(const char *id,
     if (!has_mode) {
         mode = BLOCK_EXPORT_REMOVE_MODE_SAFE;
     }
-    if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE && exp->refcount > 1) {
+    if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE &&
+        qatomic_read(&exp->refcount) > 1) {
         error_setg(errp, "export '%s' still in use", exp->id);
         error_append_hint(errp, "Use mode='hard' to force client "
                           "disconnect\n");
@@ -338,11 +358,10 @@ void qmp_block_export_del(const char *id,
 
 BlockExportInfoList *qmp_query_block_exports(Error **errp)
 {
-    BlockExportInfoList *head = NULL, **p_next = &head;
+    BlockExportInfoList *head = NULL, **tail = &head;
     BlockExport *exp;
 
     QLIST_FOREACH(exp, &block_exports, next) {
-        BlockExportInfoList *entry = g_new0(BlockExportInfoList, 1);
         BlockExportInfo *info = g_new(BlockExportInfo, 1);
         *info = (BlockExportInfo) {
             .id             = g_strdup(exp->id),
@@ -351,9 +370,7 @@ BlockExportInfoList *qmp_query_block_exports(Error **errp)
             .shutting_down  = !exp->user_owned,
         };
 
-        entry->value = info;
-        *p_next = entry;
-        p_next = &entry->next;
+        QAPI_LIST_APPEND(tail, info);
     }
 
     return head;
diff --git a/block/export/fuse.c b/block/export/fuse.c
new file mode 100644 (file)
index 0000000..3307b64
--- /dev/null
@@ -0,0 +1,891 @@
+/*
+ * Present a block device as a raw image through FUSE
+ *
+ * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 or later of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define FUSE_USE_VERSION 31
+
+#include "qemu/osdep.h"
+#include "qemu/memalign.h"
+#include "block/aio.h"
+#include "block/block_int-common.h"
+#include "block/export.h"
+#include "block/fuse.h"
+#include "block/qapi.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-block.h"
+#include "qemu/main-loop.h"
+#include "sysemu/block-backend.h"
+
+#include <fuse.h>
+#include <fuse_lowlevel.h>
+
+#if defined(CONFIG_FALLOCATE_ZERO_RANGE)
+#include <linux/falloc.h>
+#endif
+
+#ifdef __linux__
+#include <linux/fs.h>
+#endif
+
+/* Prevent overly long bounce buffer allocations */
+#define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
+
+
+typedef struct FuseExport {
+    BlockExport common;
+
+    struct fuse_session *fuse_session;
+    struct fuse_buf fuse_buf;
+    unsigned int in_flight; /* atomic */
+    bool mounted, fd_handler_set_up;
+
+    char *mountpoint;
+    bool writable;
+    bool growable;
+    /* Whether allow_other was used as a mount option or not */
+    bool allow_other;
+
+    mode_t st_mode;
+    uid_t st_uid;
+    gid_t st_gid;
+} FuseExport;
+
+static GHashTable *exports;
+static const struct fuse_lowlevel_ops fuse_ops;
+
+static void fuse_export_shutdown(BlockExport *exp);
+static void fuse_export_delete(BlockExport *exp);
+
+static void init_exports_table(void);
+
+static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
+                             bool allow_other, Error **errp);
+static void read_from_fuse_export(void *opaque);
+
+static bool is_regular_file(const char *path, Error **errp);
+
+
+static void fuse_export_drained_begin(void *opaque)
+{
+    FuseExport *exp = opaque;
+
+    aio_set_fd_handler(exp->common.ctx,
+                       fuse_session_fd(exp->fuse_session),
+                       NULL, NULL, NULL, NULL, NULL);
+    exp->fd_handler_set_up = false;
+}
+
+static void fuse_export_drained_end(void *opaque)
+{
+    FuseExport *exp = opaque;
+
+    /* Refresh AioContext in case it changed */
+    exp->common.ctx = blk_get_aio_context(exp->common.blk);
+
+    aio_set_fd_handler(exp->common.ctx,
+                       fuse_session_fd(exp->fuse_session),
+                       read_from_fuse_export, NULL, NULL, NULL, exp);
+    exp->fd_handler_set_up = true;
+}
+
+static bool fuse_export_drained_poll(void *opaque)
+{
+    FuseExport *exp = opaque;
+
+    return qatomic_read(&exp->in_flight) > 0;
+}
+
+static const BlockDevOps fuse_export_blk_dev_ops = {
+    .drained_begin = fuse_export_drained_begin,
+    .drained_end   = fuse_export_drained_end,
+    .drained_poll  = fuse_export_drained_poll,
+};
+
+static int fuse_export_create(BlockExport *blk_exp,
+                              BlockExportOptions *blk_exp_args,
+                              Error **errp)
+{
+    FuseExport *exp = container_of(blk_exp, FuseExport, common);
+    BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
+    int ret;
+
+    assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
+
+    /* For growable and writable exports, take the RESIZE permission */
+    if (args->growable || blk_exp_args->writable) {
+        uint64_t blk_perm, blk_shared_perm;
+
+        blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
+
+        ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
+                           blk_shared_perm, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    blk_set_dev_ops(exp->common.blk, &fuse_export_blk_dev_ops, exp);
+
+    /*
+     * We handle draining ourselves using an in-flight counter and by disabling
+     * the FUSE fd handler. Do not queue BlockBackend requests, they need to
+     * complete so the in-flight counter reaches zero.
+     */
+    blk_set_disable_request_queuing(exp->common.blk, true);
+
+    init_exports_table();
+
+    /*
+     * It is important to do this check before calling is_regular_file() --
+     * that function will do a stat(), which we would have to handle if we
+     * already exported something on @mountpoint.  But we cannot, because
+     * we are currently caught up here.
+     * (Note that ideally we would want to resolve relative paths here,
+     * but bdrv_make_absolute_filename() might do the wrong thing for
+     * paths that contain colons, and realpath() would resolve symlinks,
+     * which we do not want: The mount point is not going to be the
+     * symlink's destination, but the link itself.)
+     * So this will not catch all potential clashes, but hopefully at
+     * least the most common one of specifying exactly the same path
+     * string twice.
+     */
+    if (g_hash_table_contains(exports, args->mountpoint)) {
+        error_setg(errp, "There already is a FUSE export on '%s'",
+                   args->mountpoint);
+        ret = -EEXIST;
+        goto fail;
+    }
+
+    if (!is_regular_file(args->mountpoint, errp)) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    exp->mountpoint = g_strdup(args->mountpoint);
+    exp->writable = blk_exp_args->writable;
+    exp->growable = args->growable;
+
+    /* set default */
+    if (!args->has_allow_other) {
+        args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO;
+    }
+
+    exp->st_mode = S_IFREG | S_IRUSR;
+    if (exp->writable) {
+        exp->st_mode |= S_IWUSR;
+    }
+    exp->st_uid = getuid();
+    exp->st_gid = getgid();
+
+    if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) {
+        /* Ignore errors on our first attempt */
+        ret = setup_fuse_export(exp, args->mountpoint, true, NULL);
+        exp->allow_other = ret == 0;
+        if (ret < 0) {
+            ret = setup_fuse_export(exp, args->mountpoint, false, errp);
+        }
+    } else {
+        exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON;
+        ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp);
+    }
+    if (ret < 0) {
+        goto fail;
+    }
+
+    return 0;
+
+fail:
+    fuse_export_delete(blk_exp);
+    return ret;
+}
+
+/**
+ * Allocates the global @exports hash table.
+ */
+static void init_exports_table(void)
+{
+    if (exports) {
+        return;
+    }
+
+    exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
+}
+
+/**
+ * Create exp->fuse_session and mount it.
+ */
+static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
+                             bool allow_other, Error **errp)
+{
+    const char *fuse_argv[4];
+    char *mount_opts;
+    struct fuse_args fuse_args;
+    int ret;
+
+    /*
+     * max_read needs to match what fuse_init() sets.
+     * max_write need not be supplied.
+     */
+    mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s",
+                                 FUSE_MAX_BOUNCE_BYTES,
+                                 allow_other ? ",allow_other" : "");
+
+    fuse_argv[0] = ""; /* Dummy program name */
+    fuse_argv[1] = "-o";
+    fuse_argv[2] = mount_opts;
+    fuse_argv[3] = NULL;
+    fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
+
+    exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
+                                         sizeof(fuse_ops), exp);
+    g_free(mount_opts);
+    if (!exp->fuse_session) {
+        error_setg(errp, "Failed to set up FUSE session");
+        ret = -EIO;
+        goto fail;
+    }
+
+    ret = fuse_session_mount(exp->fuse_session, mountpoint);
+    if (ret < 0) {
+        error_setg(errp, "Failed to mount FUSE session to export");
+        ret = -EIO;
+        goto fail;
+    }
+    exp->mounted = true;
+
+    g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
+
+    aio_set_fd_handler(exp->common.ctx,
+                       fuse_session_fd(exp->fuse_session),
+                       read_from_fuse_export, NULL, NULL, NULL, exp);
+    exp->fd_handler_set_up = true;
+
+    return 0;
+
+fail:
+    fuse_export_shutdown(&exp->common);
+    return ret;
+}
+
+/**
+ * Callback to be invoked when the FUSE session FD can be read from.
+ * (This is basically the FUSE event loop.)
+ */
+static void read_from_fuse_export(void *opaque)
+{
+    FuseExport *exp = opaque;
+    int ret;
+
+    blk_exp_ref(&exp->common);
+
+    qatomic_inc(&exp->in_flight);
+
+    do {
+        ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
+    } while (ret == -EINTR);
+    if (ret < 0) {
+        goto out;
+    }
+
+    fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
+
+out:
+    if (qatomic_fetch_dec(&exp->in_flight) == 1) {
+        aio_wait_kick(); /* wake AIO_WAIT_WHILE() */
+    }
+
+    blk_exp_unref(&exp->common);
+}
+
+static void fuse_export_shutdown(BlockExport *blk_exp)
+{
+    FuseExport *exp = container_of(blk_exp, FuseExport, common);
+
+    if (exp->fuse_session) {
+        fuse_session_exit(exp->fuse_session);
+
+        if (exp->fd_handler_set_up) {
+            aio_set_fd_handler(exp->common.ctx,
+                               fuse_session_fd(exp->fuse_session),
+                               NULL, NULL, NULL, NULL, NULL);
+            exp->fd_handler_set_up = false;
+        }
+    }
+
+    if (exp->mountpoint) {
+        /*
+         * Safe to drop now, because we will not handle any requests
+         * for this export anymore anyway.
+         */
+        g_hash_table_remove(exports, exp->mountpoint);
+    }
+}
+
+static void fuse_export_delete(BlockExport *blk_exp)
+{
+    FuseExport *exp = container_of(blk_exp, FuseExport, common);
+
+    if (exp->fuse_session) {
+        if (exp->mounted) {
+            fuse_session_unmount(exp->fuse_session);
+        }
+
+        fuse_session_destroy(exp->fuse_session);
+    }
+
+    free(exp->fuse_buf.mem);
+    g_free(exp->mountpoint);
+}
+
+/**
+ * Check whether @path points to a regular file.  If not, put an
+ * appropriate message into *errp.
+ */
+static bool is_regular_file(const char *path, Error **errp)
+{
+    struct stat statbuf;
+    int ret;
+
+    ret = stat(path, &statbuf);
+    if (ret < 0) {
+        error_setg_errno(errp, errno, "Failed to stat '%s'", path);
+        return false;
+    }
+
+    if (!S_ISREG(statbuf.st_mode)) {
+        error_setg(errp, "'%s' is not a regular file", path);
+        return false;
+    }
+
+    return true;
+}
+
+/**
+ * A chance to set change some parameters supplied to FUSE_INIT.
+ */
+static void fuse_init(void *userdata, struct fuse_conn_info *conn)
+{
+    /*
+     * MIN_NON_ZERO() would not be wrong here, but what we set here
+     * must equal what has been passed to fuse_session_new().
+     * Therefore, as long as max_read must be passed as a mount option
+     * (which libfuse claims will be changed at some point), we have
+     * to set max_read to a fixed value here.
+     */
+    conn->max_read = FUSE_MAX_BOUNCE_BYTES;
+
+    conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
+}
+
+/**
+ * Let clients look up files.  Always return ENOENT because we only
+ * care about the mountpoint itself.
+ */
+static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
+{
+    fuse_reply_err(req, ENOENT);
+}
+
+/**
+ * Let clients get file attributes (i.e., stat() the file).
+ */
+static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
+                         struct fuse_file_info *fi)
+{
+    struct stat statbuf;
+    int64_t length, allocated_blocks;
+    time_t now = time(NULL);
+    FuseExport *exp = fuse_req_userdata(req);
+
+    length = blk_getlength(exp->common.blk);
+    if (length < 0) {
+        fuse_reply_err(req, -length);
+        return;
+    }
+
+    allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
+    if (allocated_blocks <= 0) {
+        allocated_blocks = DIV_ROUND_UP(length, 512);
+    } else {
+        allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
+    }
+
+    statbuf = (struct stat) {
+        .st_ino     = inode,
+        .st_mode    = exp->st_mode,
+        .st_nlink   = 1,
+        .st_uid     = exp->st_uid,
+        .st_gid     = exp->st_gid,
+        .st_size    = length,
+        .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
+        .st_blocks  = allocated_blocks,
+        .st_atime   = now,
+        .st_mtime   = now,
+        .st_ctime   = now,
+    };
+
+    fuse_reply_attr(req, &statbuf, 1.);
+}
+
+static int fuse_do_truncate(const FuseExport *exp, int64_t size,
+                            bool req_zero_write, PreallocMode prealloc)
+{
+    uint64_t blk_perm, blk_shared_perm;
+    BdrvRequestFlags truncate_flags = 0;
+    bool add_resize_perm;
+    int ret, ret_check;
+
+    /* Growable and writable exports have a permanent RESIZE permission */
+    add_resize_perm = !exp->growable && !exp->writable;
+
+    if (req_zero_write) {
+        truncate_flags |= BDRV_REQ_ZERO_WRITE;
+    }
+
+    if (add_resize_perm) {
+
+        if (!qemu_in_main_thread()) {
+            /* Changing permissions like below only works in the main thread */
+            return -EPERM;
+        }
+
+        blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
+
+        ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
+                           blk_shared_perm, NULL);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    ret = blk_truncate(exp->common.blk, size, true, prealloc,
+                       truncate_flags, NULL);
+
+    if (add_resize_perm) {
+        /* Must succeed, because we are only giving up the RESIZE permission */
+        ret_check = blk_set_perm(exp->common.blk, blk_perm,
+                                 blk_shared_perm, &error_abort);
+        assert(ret_check == 0);
+    }
+
+    return ret;
+}
+
+/**
+ * Let clients set file attributes.  Only resizing and changing
+ * permissions (st_mode, st_uid, st_gid) is allowed.
+ * Changing permissions is only allowed as far as it will actually
+ * permit access: Read-only exports cannot be given +w, and exports
+ * without allow_other cannot be given a different UID or GID, and
+ * they cannot be given non-owner access.
+ */
+static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
+                         int to_set, struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+    int supported_attrs;
+    int ret;
+
+    supported_attrs = FUSE_SET_ATTR_SIZE | FUSE_SET_ATTR_MODE;
+    if (exp->allow_other) {
+        supported_attrs |= FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID;
+    }
+
+    if (to_set & ~supported_attrs) {
+        fuse_reply_err(req, ENOTSUP);
+        return;
+    }
+
+    /* Do some argument checks first before committing to anything */
+    if (to_set & FUSE_SET_ATTR_MODE) {
+        /*
+         * Without allow_other, non-owners can never access the export, so do
+         * not allow setting permissions for them
+         */
+        if (!exp->allow_other &&
+            (statbuf->st_mode & (S_IRWXG | S_IRWXO)) != 0)
+        {
+            fuse_reply_err(req, EPERM);
+            return;
+        }
+
+        /* +w for read-only exports makes no sense, disallow it */
+        if (!exp->writable &&
+            (statbuf->st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
+        {
+            fuse_reply_err(req, EROFS);
+            return;
+        }
+    }
+
+    if (to_set & FUSE_SET_ATTR_SIZE) {
+        if (!exp->writable) {
+            fuse_reply_err(req, EACCES);
+            return;
+        }
+
+        ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
+        if (ret < 0) {
+            fuse_reply_err(req, -ret);
+            return;
+        }
+    }
+
+    if (to_set & FUSE_SET_ATTR_MODE) {
+        /* Ignore FUSE-supplied file type, only change the mode */
+        exp->st_mode = (statbuf->st_mode & 07777) | S_IFREG;
+    }
+
+    if (to_set & FUSE_SET_ATTR_UID) {
+        exp->st_uid = statbuf->st_uid;
+    }
+
+    if (to_set & FUSE_SET_ATTR_GID) {
+        exp->st_gid = statbuf->st_gid;
+    }
+
+    fuse_getattr(req, inode, fi);
+}
+
+/**
+ * Let clients open a file (i.e., the exported image).
+ */
+static void fuse_open(fuse_req_t req, fuse_ino_t inode,
+                      struct fuse_file_info *fi)
+{
+    fuse_reply_open(req, fi);
+}
+
+/**
+ * Handle client reads from the exported image.
+ */
+static void fuse_read(fuse_req_t req, fuse_ino_t inode,
+                      size_t size, off_t offset, struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+    int64_t length;
+    void *buf;
+    int ret;
+
+    /* Limited by max_read, should not happen */
+    if (size > FUSE_MAX_BOUNCE_BYTES) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    /**
+     * Clients will expect short reads at EOF, so we have to limit
+     * offset+size to the image length.
+     */
+    length = blk_getlength(exp->common.blk);
+    if (length < 0) {
+        fuse_reply_err(req, -length);
+        return;
+    }
+
+    if (offset + size > length) {
+        size = length - offset;
+    }
+
+    buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
+    if (!buf) {
+        fuse_reply_err(req, ENOMEM);
+        return;
+    }
+
+    ret = blk_pread(exp->common.blk, offset, size, buf, 0);
+    if (ret >= 0) {
+        fuse_reply_buf(req, buf, size);
+    } else {
+        fuse_reply_err(req, -ret);
+    }
+
+    qemu_vfree(buf);
+}
+
+/**
+ * Handle client writes to the exported image.
+ */
+static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
+                       size_t size, off_t offset, struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+    int64_t length;
+    int ret;
+
+    /* Limited by max_write, should not happen */
+    if (size > BDRV_REQUEST_MAX_BYTES) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (!exp->writable) {
+        fuse_reply_err(req, EACCES);
+        return;
+    }
+
+    /**
+     * Clients will expect short writes at EOF, so we have to limit
+     * offset+size to the image length.
+     */
+    length = blk_getlength(exp->common.blk);
+    if (length < 0) {
+        fuse_reply_err(req, -length);
+        return;
+    }
+
+    if (offset + size > length) {
+        if (exp->growable) {
+            ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
+            if (ret < 0) {
+                fuse_reply_err(req, -ret);
+                return;
+            }
+        } else {
+            size = length - offset;
+        }
+    }
+
+    ret = blk_pwrite(exp->common.blk, offset, size, buf, 0);
+    if (ret >= 0) {
+        fuse_reply_write(req, size);
+    } else {
+        fuse_reply_err(req, -ret);
+    }
+}
+
+/**
+ * Let clients perform various fallocate() operations.
+ */
+static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
+                           off_t offset, off_t length,
+                           struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+    int64_t blk_len;
+    int ret;
+
+    if (!exp->writable) {
+        fuse_reply_err(req, EACCES);
+        return;
+    }
+
+    blk_len = blk_getlength(exp->common.blk);
+    if (blk_len < 0) {
+        fuse_reply_err(req, -blk_len);
+        return;
+    }
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+    if (mode & FALLOC_FL_KEEP_SIZE) {
+        length = MIN(length, blk_len - offset);
+    }
+#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
+
+    if (!mode) {
+        /* We can only fallocate at the EOF with a truncate */
+        if (offset < blk_len) {
+            fuse_reply_err(req, EOPNOTSUPP);
+            return;
+        }
+
+        if (offset > blk_len) {
+            /* No preallocation needed here */
+            ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
+            if (ret < 0) {
+                fuse_reply_err(req, -ret);
+                return;
+            }
+        }
+
+        ret = fuse_do_truncate(exp, offset + length, true,
+                               PREALLOC_MODE_FALLOC);
+    }
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+    else if (mode & FALLOC_FL_PUNCH_HOLE) {
+        if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+            fuse_reply_err(req, EINVAL);
+            return;
+        }
+
+        do {
+            int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
+
+            ret = blk_pwrite_zeroes(exp->common.blk, offset, size,
+                                    BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
+            if (ret == -ENOTSUP) {
+                /*
+                 * fallocate() specifies to return EOPNOTSUPP for unsupported
+                 * operations
+                 */
+                ret = -EOPNOTSUPP;
+            }
+
+            offset += size;
+            length -= size;
+        } while (ret == 0 && length > 0);
+    }
+#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
+#ifdef CONFIG_FALLOCATE_ZERO_RANGE
+    else if (mode & FALLOC_FL_ZERO_RANGE) {
+        if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
+            /* No need for zeroes, we are going to write them ourselves */
+            ret = fuse_do_truncate(exp, offset + length, false,
+                                   PREALLOC_MODE_OFF);
+            if (ret < 0) {
+                fuse_reply_err(req, -ret);
+                return;
+            }
+        }
+
+        do {
+            int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
+
+            ret = blk_pwrite_zeroes(exp->common.blk,
+                                    offset, size, 0);
+            offset += size;
+            length -= size;
+        } while (ret == 0 && length > 0);
+    }
+#endif /* CONFIG_FALLOCATE_ZERO_RANGE */
+    else {
+        ret = -EOPNOTSUPP;
+    }
+
+    fuse_reply_err(req, ret < 0 ? -ret : 0);
+}
+
+/**
+ * Let clients fsync the exported image.
+ */
+static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
+                       struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+    int ret;
+
+    ret = blk_flush(exp->common.blk);
+    fuse_reply_err(req, ret < 0 ? -ret : 0);
+}
+
+/**
+ * Called before an FD to the exported image is closed.  (libfuse
+ * notes this to be a way to return last-minute errors.)
+ */
+static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
+                        struct fuse_file_info *fi)
+{
+    fuse_fsync(req, inode, 1, fi);
+}
+
+#ifdef CONFIG_FUSE_LSEEK
+/**
+ * Let clients inquire allocation status.
+ */
+static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
+                       int whence, struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+
+    if (whence != SEEK_HOLE && whence != SEEK_DATA) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    while (true) {
+        int64_t pnum;
+        int ret;
+
+        ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
+                                      offset, INT64_MAX, &pnum, NULL, NULL);
+        if (ret < 0) {
+            fuse_reply_err(req, -ret);
+            return;
+        }
+
+        if (!pnum && (ret & BDRV_BLOCK_EOF)) {
+            int64_t blk_len;
+
+            /*
+             * If blk_getlength() rounds (e.g. by sectors), then the
+             * export length will be rounded, too.  However,
+             * bdrv_block_status_above() may return EOF at unaligned
+             * offsets.  We must not let this become visible and thus
+             * always simulate a hole between @offset (the real EOF)
+             * and @blk_len (the client-visible EOF).
+             */
+
+            blk_len = blk_getlength(exp->common.blk);
+            if (blk_len < 0) {
+                fuse_reply_err(req, -blk_len);
+                return;
+            }
+
+            if (offset > blk_len || whence == SEEK_DATA) {
+                fuse_reply_err(req, ENXIO);
+            } else {
+                fuse_reply_lseek(req, offset);
+            }
+            return;
+        }
+
+        if (ret & BDRV_BLOCK_DATA) {
+            if (whence == SEEK_DATA) {
+                fuse_reply_lseek(req, offset);
+                return;
+            }
+        } else {
+            if (whence == SEEK_HOLE) {
+                fuse_reply_lseek(req, offset);
+                return;
+            }
+        }
+
+        /* Safety check against infinite loops */
+        if (!pnum) {
+            fuse_reply_err(req, ENXIO);
+            return;
+        }
+
+        offset += pnum;
+    }
+}
+#endif
+
+static const struct fuse_lowlevel_ops fuse_ops = {
+    .init       = fuse_init,
+    .lookup     = fuse_lookup,
+    .getattr    = fuse_getattr,
+    .setattr    = fuse_setattr,
+    .open       = fuse_open,
+    .read       = fuse_read,
+    .write      = fuse_write,
+    .fallocate  = fuse_fallocate,
+    .flush      = fuse_flush,
+    .fsync      = fuse_fsync,
+#ifdef CONFIG_FUSE_LSEEK
+    .lseek      = fuse_lseek,
+#endif
+};
+
+const BlockExportDriver blk_exp_fuse = {
+    .type               = BLOCK_EXPORT_TYPE_FUSE,
+    .instance_size      = sizeof(FuseExport),
+    .create             = fuse_export_create,
+    .delete             = fuse_export_delete,
+    .request_shutdown   = fuse_export_shutdown,
+};
index 135b356775b378cc91b6600a99186d2eaeb53caa..c60116f455af3d5e945ef1b63f115e43e2648763 100644 (file)
@@ -1,5 +1,12 @@
 blockdev_ss.add(files('export.c'))
 
 if have_vhost_user_blk_server
-    blockdev_ss.add(files('vhost-user-blk-server.c'))
+    blockdev_ss.add(files('vhost-user-blk-server.c', 'virtio-blk-handler.c'))
+endif
+
+blockdev_ss.add(when: fuse, if_true: files('fuse.c'))
+
+if have_vduse_blk_export
+    blockdev_ss.add(files('vduse-blk.c', 'virtio-blk-handler.c'))
+    blockdev_ss.add(libvduse)
 endif
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
new file mode 100644 (file)
index 0000000..172f73c
--- /dev/null
@@ -0,0 +1,422 @@
+/*
+ * Export QEMU block device via VDUSE
+ *
+ * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author:
+ *   Xie Yongji <xieyongji@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include <sys/eventfd.h>
+
+#include "qapi/error.h"
+#include "block/export.h"
+#include "qemu/error-report.h"
+#include "util/block-helpers.h"
+#include "subprojects/libvduse/libvduse.h"
+#include "virtio-blk-handler.h"
+
+#include "standard-headers/linux/virtio_blk.h"
+
+#define VDUSE_DEFAULT_NUM_QUEUE 1
+#define VDUSE_DEFAULT_QUEUE_SIZE 256
+
+typedef struct VduseBlkExport {
+    BlockExport export;
+    VirtioBlkHandler handler;
+    VduseDev *dev;
+    uint16_t num_queues;
+    char *recon_file;
+    unsigned int inflight; /* atomic */
+    bool vqs_started;
+} VduseBlkExport;
+
+typedef struct VduseBlkReq {
+    VduseVirtqElement elem;
+    VduseVirtq *vq;
+} VduseBlkReq;
+
+static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
+{
+    if (qatomic_fetch_inc(&vblk_exp->inflight) == 0) {
+        /* Prevent export from being deleted */
+        blk_exp_ref(&vblk_exp->export);
+    }
+}
+
+static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
+{
+    if (qatomic_fetch_dec(&vblk_exp->inflight) == 1) {
+        /* Wake AIO_WAIT_WHILE() */
+        aio_wait_kick();
+
+        /* Now the export can be deleted */
+        blk_exp_unref(&vblk_exp->export);
+    }
+}
+
+static void vduse_blk_req_complete(VduseBlkReq *req, size_t in_len)
+{
+    vduse_queue_push(req->vq, &req->elem, in_len);
+    vduse_queue_notify(req->vq);
+
+    free(req);
+}
+
+static void coroutine_fn vduse_blk_virtio_process_req(void *opaque)
+{
+    VduseBlkReq *req = opaque;
+    VduseVirtq *vq = req->vq;
+    VduseDev *dev = vduse_queue_get_dev(vq);
+    VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
+    VirtioBlkHandler *handler = &vblk_exp->handler;
+    VduseVirtqElement *elem = &req->elem;
+    struct iovec *in_iov = elem->in_sg;
+    struct iovec *out_iov = elem->out_sg;
+    unsigned in_num = elem->in_num;
+    unsigned out_num = elem->out_num;
+    int in_len;
+
+    in_len = virtio_blk_process_req(handler, in_iov,
+                                    out_iov, in_num, out_num);
+    if (in_len < 0) {
+        free(req);
+        return;
+    }
+
+    vduse_blk_req_complete(req, in_len);
+    vduse_blk_inflight_dec(vblk_exp);
+}
+
+static void vduse_blk_vq_handler(VduseDev *dev, VduseVirtq *vq)
+{
+    VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
+
+    while (1) {
+        VduseBlkReq *req;
+
+        req = vduse_queue_pop(vq, sizeof(VduseBlkReq));
+        if (!req) {
+            break;
+        }
+        req->vq = vq;
+
+        Coroutine *co =
+            qemu_coroutine_create(vduse_blk_virtio_process_req, req);
+
+        vduse_blk_inflight_inc(vblk_exp);
+        qemu_coroutine_enter(co);
+    }
+}
+
+static void on_vduse_vq_kick(void *opaque)
+{
+    VduseVirtq *vq = opaque;
+    VduseDev *dev = vduse_queue_get_dev(vq);
+    int fd = vduse_queue_get_fd(vq);
+    eventfd_t kick_data;
+
+    if (eventfd_read(fd, &kick_data) == -1) {
+        error_report("failed to read data from eventfd");
+        return;
+    }
+
+    vduse_blk_vq_handler(dev, vq);
+}
+
+static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
+{
+    VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
+
+    if (!vblk_exp->vqs_started) {
+        return; /* vduse_blk_drained_end() will start vqs later */
+    }
+
+    aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
+                       on_vduse_vq_kick, NULL, NULL, NULL, vq);
+    /* Make sure we don't miss any kick after reconnecting */
+    eventfd_write(vduse_queue_get_fd(vq), 1);
+}
+
+static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
+{
+    VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
+    int fd = vduse_queue_get_fd(vq);
+
+    if (fd < 0) {
+        return;
+    }
+
+    aio_set_fd_handler(vblk_exp->export.ctx, fd,
+                       NULL, NULL, NULL, NULL, NULL);
+}
+
+static const VduseOps vduse_blk_ops = {
+    .enable_queue = vduse_blk_enable_queue,
+    .disable_queue = vduse_blk_disable_queue,
+};
+
+static void on_vduse_dev_kick(void *opaque)
+{
+    VduseDev *dev = opaque;
+
+    vduse_dev_handler(dev);
+}
+
+static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
+{
+    aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
+                       on_vduse_dev_kick, NULL, NULL, NULL,
+                       vblk_exp->dev);
+
+    /* Virtqueues are handled by vduse_blk_drained_end() */
+}
+
+static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
+{
+    aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
+                       NULL, NULL, NULL, NULL, NULL);
+
+    /* Virtqueues are handled by vduse_blk_drained_begin() */
+}
+
+
+static void blk_aio_attached(AioContext *ctx, void *opaque)
+{
+    VduseBlkExport *vblk_exp = opaque;
+
+    vblk_exp->export.ctx = ctx;
+    vduse_blk_attach_ctx(vblk_exp, ctx);
+}
+
+static void blk_aio_detach(void *opaque)
+{
+    VduseBlkExport *vblk_exp = opaque;
+
+    vduse_blk_detach_ctx(vblk_exp);
+    vblk_exp->export.ctx = NULL;
+}
+
+static void vduse_blk_resize(void *opaque)
+{
+    BlockExport *exp = opaque;
+    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
+    struct virtio_blk_config config;
+
+    config.capacity =
+            cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
+    vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity),
+                            offsetof(struct virtio_blk_config, capacity),
+                            (char *)&config.capacity);
+}
+
+static void vduse_blk_stop_virtqueues(VduseBlkExport *vblk_exp)
+{
+    for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
+        VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
+        vduse_blk_disable_queue(vblk_exp->dev, vq);
+    }
+
+    vblk_exp->vqs_started = false;
+}
+
+static void vduse_blk_start_virtqueues(VduseBlkExport *vblk_exp)
+{
+    vblk_exp->vqs_started = true;
+
+    for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
+        VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
+        vduse_blk_enable_queue(vblk_exp->dev, vq);
+    }
+}
+
+static void vduse_blk_drained_begin(void *opaque)
+{
+    BlockExport *exp = opaque;
+    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
+
+    vduse_blk_stop_virtqueues(vblk_exp);
+}
+
+static void vduse_blk_drained_end(void *opaque)
+{
+    BlockExport *exp = opaque;
+    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
+
+    vduse_blk_start_virtqueues(vblk_exp);
+}
+
+static bool vduse_blk_drained_poll(void *opaque)
+{
+    BlockExport *exp = opaque;
+    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
+
+    return qatomic_read(&vblk_exp->inflight) > 0;
+}
+
+static const BlockDevOps vduse_block_ops = {
+    .resize_cb     = vduse_blk_resize,
+    .drained_begin = vduse_blk_drained_begin,
+    .drained_end   = vduse_blk_drained_end,
+    .drained_poll  = vduse_blk_drained_poll,
+};
+
+static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
+                                Error **errp)
+{
+    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
+    BlockExportOptionsVduseBlk *vblk_opts = &opts->u.vduse_blk;
+    uint64_t logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
+    uint16_t num_queues = VDUSE_DEFAULT_NUM_QUEUE;
+    uint16_t queue_size = VDUSE_DEFAULT_QUEUE_SIZE;
+    Error *local_err = NULL;
+    struct virtio_blk_config config = { 0 };
+    uint64_t features;
+    int i, ret;
+
+    if (vblk_opts->has_num_queues) {
+        num_queues = vblk_opts->num_queues;
+        if (num_queues == 0) {
+            error_setg(errp, "num-queues must be greater than 0");
+            return -EINVAL;
+        }
+    }
+
+    if (vblk_opts->has_queue_size) {
+        queue_size = vblk_opts->queue_size;
+        if (queue_size <= 2 || !is_power_of_2(queue_size) ||
+            queue_size > VIRTQUEUE_MAX_SIZE) {
+            error_setg(errp, "queue-size is invalid");
+            return -EINVAL;
+        }
+    }
+
+    if (vblk_opts->has_logical_block_size) {
+        logical_block_size = vblk_opts->logical_block_size;
+        check_block_size(exp->id, "logical-block-size", logical_block_size,
+                         &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return -EINVAL;
+        }
+    }
+    vblk_exp->num_queues = num_queues;
+    vblk_exp->handler.blk = exp->blk;
+    vblk_exp->handler.serial = g_strdup(vblk_opts->serial ?: "");
+    vblk_exp->handler.logical_block_size = logical_block_size;
+    vblk_exp->handler.writable = opts->writable;
+    vblk_exp->vqs_started = true;
+
+    config.capacity =
+            cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
+    config.seg_max = cpu_to_le32(queue_size - 2);
+    config.min_io_size = cpu_to_le16(1);
+    config.opt_io_size = cpu_to_le32(1);
+    config.num_queues = cpu_to_le16(num_queues);
+    config.blk_size = cpu_to_le32(logical_block_size);
+    config.max_discard_sectors = cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
+    config.max_discard_seg = cpu_to_le32(1);
+    config.discard_sector_alignment =
+        cpu_to_le32(logical_block_size >> VIRTIO_BLK_SECTOR_BITS);
+    config.max_write_zeroes_sectors =
+        cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
+    config.max_write_zeroes_seg = cpu_to_le32(1);
+
+    features = vduse_get_virtio_features() |
+               (1ULL << VIRTIO_BLK_F_SEG_MAX) |
+               (1ULL << VIRTIO_BLK_F_TOPOLOGY) |
+               (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
+               (1ULL << VIRTIO_BLK_F_FLUSH) |
+               (1ULL << VIRTIO_BLK_F_DISCARD) |
+               (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
+
+    if (num_queues > 1) {
+        features |= 1ULL << VIRTIO_BLK_F_MQ;
+    }
+    if (!opts->writable) {
+        features |= 1ULL << VIRTIO_BLK_F_RO;
+    }
+
+    vblk_exp->dev = vduse_dev_create(vblk_opts->name, VIRTIO_ID_BLOCK, 0,
+                                     features, num_queues,
+                                     sizeof(struct virtio_blk_config),
+                                     (char *)&config, &vduse_blk_ops,
+                                     vblk_exp);
+    if (!vblk_exp->dev) {
+        error_setg(errp, "failed to create vduse device");
+        ret = -ENOMEM;
+        goto err_dev;
+    }
+
+    vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s",
+                                           g_get_tmp_dir(), vblk_opts->name);
+    if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) {
+        error_setg(errp, "failed to set reconnect log file");
+        ret = -EINVAL;
+        goto err;
+    }
+
+    for (i = 0; i < num_queues; i++) {
+        vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
+    }
+
+    aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev),
+                       on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
+
+    blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
+                                 vblk_exp);
+    blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
+
+    /*
+     * We handle draining ourselves using an in-flight counter and by disabling
+     * virtqueue fd handlers. Do not queue BlockBackend requests, they need to
+     * complete so the in-flight counter reaches zero.
+     */
+    blk_set_disable_request_queuing(exp->blk, true);
+
+    return 0;
+err:
+    vduse_dev_destroy(vblk_exp->dev);
+    g_free(vblk_exp->recon_file);
+err_dev:
+    g_free(vblk_exp->handler.serial);
+    return ret;
+}
+
+static void vduse_blk_exp_delete(BlockExport *exp)
+{
+    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
+    int ret;
+
+    assert(qatomic_read(&vblk_exp->inflight) == 0);
+
+    vduse_blk_detach_ctx(vblk_exp);
+    blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
+                                    vblk_exp);
+    ret = vduse_dev_destroy(vblk_exp->dev);
+    if (ret != -EBUSY) {
+        unlink(vblk_exp->recon_file);
+    }
+    g_free(vblk_exp->recon_file);
+    g_free(vblk_exp->handler.serial);
+}
+
+/* Called with exp->ctx acquired */
+static void vduse_blk_exp_request_shutdown(BlockExport *exp)
+{
+    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
+
+    vduse_blk_stop_virtqueues(vblk_exp);
+}
+
+const BlockExportDriver blk_exp_vduse_blk = {
+    .type               = BLOCK_EXPORT_TYPE_VDUSE_BLK,
+    .instance_size      = sizeof(VduseBlkExport),
+    .create             = vduse_blk_exp_create,
+    .delete             = vduse_blk_exp_delete,
+    .request_shutdown   = vduse_blk_exp_request_shutdown,
+};
diff --git a/block/export/vduse-blk.h b/block/export/vduse-blk.h
new file mode 100644 (file)
index 0000000..c4eeb1b
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ * Export QEMU block device via VDUSE
+ *
+ * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author:
+ *   Xie Yongji <xieyongji@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef VDUSE_BLK_H
+#define VDUSE_BLK_H
+
+#include "block/export.h"
+
+extern const BlockExportDriver blk_exp_vduse_blk;
+
+#endif /* VDUSE_BLK_H */
index 62672d1cb95a37a104b9d4b2e91b92ab6ccfc929..16f48388d38d183c7d92d12f81ec51ea221a9a8e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Sharing QEMU block devices via vhost-user protocal
+ * Sharing QEMU block devices via vhost-user protocol
  *
  * Parts of the code based on nbd/server.c.
  *
  * later.  See the COPYING file in the top-level directory.
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "block/block.h"
-#include "contrib/libvhost-user/libvhost-user.h"
+#include "subprojects/libvhost-user/libvhost-user.h" /* only for the type definitions */
 #include "standard-headers/linux/virtio_blk.h"
 #include "qemu/vhost-user-server.h"
 #include "vhost-user-blk-server.h"
 #include "qapi/error.h"
 #include "qom/object_interfaces.h"
-#include "sysemu/block-backend.h"
 #include "util/block-helpers.h"
+#include "virtio-blk-handler.h"
 
 enum {
     VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1,
 };
-struct virtio_blk_inhdr {
-    unsigned char status;
-};
 
 typedef struct VuBlkReq {
     VuVirtqElement elem;
-    int64_t sector_num;
-    size_t size;
-    struct virtio_blk_inhdr *in;
-    struct virtio_blk_outhdr out;
     VuServer *server;
     struct VuVirtq *vq;
 } VuBlkReq;
@@ -41,160 +35,48 @@ typedef struct VuBlkReq {
 typedef struct {
     BlockExport export;
     VuServer vu_server;
-    uint32_t blk_size;
+    VirtioBlkHandler handler;
     QIOChannelSocket *sioc;
     struct virtio_blk_config blkcfg;
-    bool writable;
 } VuBlkExport;
 
-static void vu_blk_req_complete(VuBlkReq *req)
+static void vu_blk_req_complete(VuBlkReq *req, size_t in_len)
 {
     VuDev *vu_dev = &req->server->vu_dev;
 
-    /* IO size with 1 extra status byte */
-    vu_queue_push(vu_dev, req->vq, &req->elem, req->size + 1);
+    vu_queue_push(vu_dev, req->vq, &req->elem, in_len);
     vu_queue_notify(vu_dev, req->vq);
 
     free(req);
 }
 
-static int coroutine_fn
-vu_blk_discard_write_zeroes(BlockBackend *blk, struct iovec *iov,
-                            uint32_t iovcnt, uint32_t type)
-{
-    struct virtio_blk_discard_write_zeroes desc;
-    ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
-    if (unlikely(size != sizeof(desc))) {
-        error_report("Invalid size %zd, expect %zu", size, sizeof(desc));
-        return -EINVAL;
-    }
-
-    uint64_t range[2] = { le64_to_cpu(desc.sector) << 9,
-                          le32_to_cpu(desc.num_sectors) << 9 };
-    if (type == VIRTIO_BLK_T_DISCARD) {
-        if (blk_co_pdiscard(blk, range[0], range[1]) == 0) {
-            return 0;
-        }
-    } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
-        if (blk_co_pwrite_zeroes(blk, range[0], range[1], 0) == 0) {
-            return 0;
-        }
-    }
-
-    return -EINVAL;
-}
-
+/*
+ * Called with server in_flight counter increased, must decrease before
+ * returning.
+ */
 static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
 {
     VuBlkReq *req = opaque;
     VuServer *server = req->server;
     VuVirtqElement *elem = &req->elem;
-    uint32_t type;
-
     VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
-    BlockBackend *blk = vexp->export.blk;
-
+    VirtioBlkHandler *handler = &vexp->handler;
     struct iovec *in_iov = elem->in_sg;
     struct iovec *out_iov = elem->out_sg;
     unsigned in_num = elem->in_num;
     unsigned out_num = elem->out_num;
-
-    /* refer to hw/block/virtio_blk.c */
-    if (elem->out_num < 1 || elem->in_num < 1) {
-        error_report("virtio-blk request missing headers");
-        goto err;
-    }
-
-    if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
-                            sizeof(req->out)) != sizeof(req->out))) {
-        error_report("virtio-blk request outhdr too short");
-        goto err;
-    }
-
-    iov_discard_front(&out_iov, &out_num, sizeof(req->out));
-
-    if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
-        error_report("virtio-blk request inhdr too short");
-        goto err;
-    }
-
-    /* We always touch the last byte, so just see how big in_iov is.  */
-    req->in = (void *)in_iov[in_num - 1].iov_base
-              + in_iov[in_num - 1].iov_len
-              - sizeof(struct virtio_blk_inhdr);
-    iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
-
-    type = le32_to_cpu(req->out.type);
-    switch (type & ~VIRTIO_BLK_T_BARRIER) {
-    case VIRTIO_BLK_T_IN:
-    case VIRTIO_BLK_T_OUT: {
-        ssize_t ret = 0;
-        bool is_write = type & VIRTIO_BLK_T_OUT;
-        req->sector_num = le64_to_cpu(req->out.sector);
-
-        if (is_write && !vexp->writable) {
-            req->in->status = VIRTIO_BLK_S_IOERR;
-            break;
-        }
-
-        int64_t offset = req->sector_num * vexp->blk_size;
-        QEMUIOVector qiov;
-        if (is_write) {
-            qemu_iovec_init_external(&qiov, out_iov, out_num);
-            ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
-        } else {
-            qemu_iovec_init_external(&qiov, in_iov, in_num);
-            ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
-        }
-        if (ret >= 0) {
-            req->in->status = VIRTIO_BLK_S_OK;
-        } else {
-            req->in->status = VIRTIO_BLK_S_IOERR;
-        }
-        break;
-    }
-    case VIRTIO_BLK_T_FLUSH:
-        if (blk_co_flush(blk) == 0) {
-            req->in->status = VIRTIO_BLK_S_OK;
-        } else {
-            req->in->status = VIRTIO_BLK_S_IOERR;
-        }
-        break;
-    case VIRTIO_BLK_T_GET_ID: {
-        size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
-                          VIRTIO_BLK_ID_BYTES);
-        snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
-        req->in->status = VIRTIO_BLK_S_OK;
-        req->size = elem->in_sg[0].iov_len;
-        break;
-    }
-    case VIRTIO_BLK_T_DISCARD:
-    case VIRTIO_BLK_T_WRITE_ZEROES: {
-        int rc;
-
-        if (!vexp->writable) {
-            req->in->status = VIRTIO_BLK_S_IOERR;
-            break;
-        }
-
-        rc = vu_blk_discard_write_zeroes(blk, &elem->out_sg[1], out_num, type);
-        if (rc == 0) {
-            req->in->status = VIRTIO_BLK_S_OK;
-        } else {
-            req->in->status = VIRTIO_BLK_S_IOERR;
-        }
-        break;
+    int in_len;
+
+    in_len = virtio_blk_process_req(handler, in_iov, out_iov,
+                                    in_num, out_num);
+    if (in_len < 0) {
+        free(req);
+        vhost_user_server_dec_in_flight(server);
+        return;
     }
-    default:
-        req->in->status = VIRTIO_BLK_S_UNSUPP;
-        break;
-    }
-
-    vu_blk_req_complete(req);
-    return;
 
-err:
-    free(req);
+    vu_blk_req_complete(req, in_len);
+    vhost_user_server_dec_in_flight(server);
 }
 
 static void vu_blk_process_vq(VuDev *vu_dev, int idx)
@@ -215,6 +97,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx)
 
         Coroutine *co =
             qemu_coroutine_create(vu_blk_virtio_process_req, req);
+
+        vhost_user_server_inc_in_flight(server);
         qemu_coroutine_enter(co);
     }
 }
@@ -248,7 +132,7 @@ static uint64_t vu_blk_get_features(VuDev *dev)
                1ull << VIRTIO_RING_F_EVENT_IDX |
                1ull << VHOST_USER_F_PROTOCOL_FEATURES;
 
-    if (!vexp->writable) {
+    if (!vexp->handler.writable) {
         features |= 1ull << VIRTIO_BLK_F_RO;
     }
 
@@ -257,8 +141,7 @@ static uint64_t vu_blk_get_features(VuDev *dev)
 
 static uint64_t vu_blk_get_protocol_features(VuDev *dev)
 {
-    return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
-           1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
+    return 1ull << VHOST_USER_PROTOCOL_F_CONFIG;
 }
 
 static int
@@ -267,7 +150,9 @@ vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
     VuServer *server = container_of(vu_dev, VuServer, vu_dev);
     VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
 
-    g_return_val_if_fail(len <= sizeof(struct virtio_blk_config), -1);
+    if (len > sizeof(struct virtio_blk_config)) {
+        return -1;
+    }
 
     memcpy(config, &vexp->blkcfg, len);
     return 0;
@@ -282,7 +167,7 @@ vu_blk_set_config(VuDev *vu_dev, const uint8_t *data,
     uint8_t wce;
 
     /* don't support live migration */
-    if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
+    if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) {
         return -EINVAL;
     }
 
@@ -327,15 +212,21 @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
 {
     VuBlkExport *vexp = opaque;
 
+    /*
+     * The actual attach will happen in vu_blk_drained_end() and we just
+     * restore ctx here.
+     */
     vexp->export.ctx = ctx;
-    vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
 }
 
 static void blk_aio_detach(void *opaque)
 {
     VuBlkExport *vexp = opaque;
 
-    vhost_user_server_detach_aio_context(&vexp->vu_server);
+    /*
+     * The actual detach already happened in vu_blk_drained_begin() but from
+     * this point on we must not access ctx anymore.
+     */
     vexp->export.ctx = NULL;
 }
 
@@ -345,17 +236,21 @@ vu_blk_initialize_config(BlockDriverState *bs,
                          uint32_t blk_size,
                          uint16_t num_queues)
 {
-    config->capacity = cpu_to_le64(bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+    config->capacity =
+        cpu_to_le64(bdrv_getlength(bs) >> VIRTIO_BLK_SECTOR_BITS);
     config->blk_size = cpu_to_le32(blk_size);
     config->size_max = cpu_to_le32(0);
     config->seg_max = cpu_to_le32(128 - 2);
     config->min_io_size = cpu_to_le16(1);
     config->opt_io_size = cpu_to_le32(1);
     config->num_queues = cpu_to_le16(num_queues);
-    config->max_discard_sectors = cpu_to_le32(32768);
+    config->max_discard_sectors =
+        cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
     config->max_discard_seg = cpu_to_le32(1);
-    config->discard_sector_alignment = cpu_to_le32(config->blk_size >> 9);
-    config->max_write_zeroes_sectors = cpu_to_le32(32768);
+    config->discard_sector_alignment =
+        cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS);
+    config->max_write_zeroes_sectors
+        = cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
     config->max_write_zeroes_seg = cpu_to_le32(1);
 }
 
@@ -366,6 +261,63 @@ static void vu_blk_exp_request_shutdown(BlockExport *exp)
     vhost_user_server_stop(&vexp->vu_server);
 }
 
+static void vu_blk_exp_resize(void *opaque)
+{
+    VuBlkExport *vexp = opaque;
+    BlockDriverState *bs = blk_bs(vexp->handler.blk);
+    int64_t new_size = bdrv_getlength(bs);
+
+    if (new_size < 0) {
+        error_printf("Failed to get length of block node '%s'",
+                     bdrv_get_node_name(bs));
+        return;
+    }
+
+    vexp->blkcfg.capacity = cpu_to_le64(new_size >> VIRTIO_BLK_SECTOR_BITS);
+
+    vu_config_change_msg(&vexp->vu_server.vu_dev);
+}
+
+/* Called with vexp->export.ctx acquired */
+static void vu_blk_drained_begin(void *opaque)
+{
+    VuBlkExport *vexp = opaque;
+
+    vexp->vu_server.quiescing = true;
+    vhost_user_server_detach_aio_context(&vexp->vu_server);
+}
+
+/* Called with vexp->export.blk AioContext acquired */
+static void vu_blk_drained_end(void *opaque)
+{
+    VuBlkExport *vexp = opaque;
+
+    vexp->vu_server.quiescing = false;
+    vhost_user_server_attach_aio_context(&vexp->vu_server, vexp->export.ctx);
+}
+
+/*
+ * Ensures that bdrv_drained_begin() waits until in-flight requests complete
+ * and the server->co_trip coroutine has terminated. It will be restarted in
+ * vhost_user_server_attach_aio_context().
+ *
+ * Called with vexp->export.ctx acquired.
+ */
+static bool vu_blk_drained_poll(void *opaque)
+{
+    VuBlkExport *vexp = opaque;
+    VuServer *server = &vexp->vu_server;
+
+    return server->co_trip || vhost_user_server_has_in_flight(server);
+}
+
+static const BlockDevOps vu_blk_dev_ops = {
+    .drained_begin = vu_blk_drained_begin,
+    .drained_end   = vu_blk_drained_end,
+    .drained_poll  = vu_blk_drained_poll,
+    .resize_cb = vu_blk_exp_resize,
+};
+
 static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
                              Error **errp)
 {
@@ -375,13 +327,12 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
     uint64_t logical_block_size;
     uint16_t num_queues = VHOST_USER_BLK_NUM_QUEUES_DEFAULT;
 
-    vexp->writable = opts->writable;
     vexp->blkcfg.wce = 0;
 
     if (vu_opts->has_logical_block_size) {
         logical_block_size = vu_opts->logical_block_size;
     } else {
-        logical_block_size = BDRV_SECTOR_SIZE;
+        logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
     }
     check_block_size(exp->id, "logical-block-size", logical_block_size,
                      &local_err);
@@ -389,8 +340,6 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
         error_propagate(errp, local_err);
         return -EINVAL;
     }
-    vexp->blk_size = logical_block_size;
-    blk_set_guest_block_size(exp->blk, logical_block_size);
 
     if (vu_opts->has_num_queues) {
         num_queues = vu_opts->num_queues;
@@ -399,6 +348,10 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
         error_setg(errp, "num-queues must be greater than 0");
         return -EINVAL;
     }
+    vexp->handler.blk = exp->blk;
+    vexp->handler.serial = g_strdup("vhost_user_blk");
+    vexp->handler.logical_block_size = logical_block_size;
+    vexp->handler.writable = opts->writable;
 
     vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
                              logical_block_size, num_queues);
@@ -406,10 +359,13 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
     blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
                                  vexp);
 
+    blk_set_dev_ops(exp->blk, &vu_blk_dev_ops, vexp);
+
     if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
                                  num_queues, &vu_blk_iface, errp)) {
         blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
                                         blk_aio_detach, vexp);
+        g_free(vexp->handler.serial);
         return -EADDRNOTAVAIL;
     }
 
@@ -422,6 +378,7 @@ static void vu_blk_exp_delete(BlockExport *exp)
 
     blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
                                     vexp);
+    g_free(vexp->handler.serial);
 }
 
 const BlockExportDriver blk_exp_vhost_user_blk = {
index fcf46fc8a5fce9b362a35a8a6837ec3a6bb80cb2..77fb5c0131c6b7bb63239cfe3e4762d8e393d30b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Sharing QEMU block devices via vhost-user protocal
+ * Sharing QEMU block devices via vhost-user protocol
  *
  * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
  * Copyright (c) 2020 Red Hat, Inc.
diff --git a/block/export/virtio-blk-handler.c b/block/export/virtio-blk-handler.c
new file mode 100644 (file)
index 0000000..bc1cec6
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * Handler for virtio-blk I/O
+ *
+ * Copyright (c) 2020 Red Hat, Inc.
+ * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author:
+ *   Coiby Xu <coiby.xu@gmail.com>
+ *   Xie Yongji <xieyongji@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "virtio-blk-handler.h"
+
+#include "standard-headers/linux/virtio_blk.h"
+
+struct virtio_blk_inhdr {
+    unsigned char status;
+};
+
+static bool coroutine_fn
+virtio_blk_sect_range_ok(BlockBackend *blk, uint32_t block_size,
+                         uint64_t sector, size_t size)
+{
+    uint64_t nb_sectors;
+    uint64_t total_sectors;
+
+    if (size % VIRTIO_BLK_SECTOR_SIZE) {
+        return false;
+    }
+
+    nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS;
+
+    QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE);
+    if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+        return false;
+    }
+    if ((sector << VIRTIO_BLK_SECTOR_BITS) % block_size) {
+        return false;
+    }
+    blk_co_get_geometry(blk, &total_sectors);
+    if (sector > total_sectors || nb_sectors > total_sectors - sector) {
+        return false;
+    }
+    return true;
+}
+
+static int coroutine_fn
+virtio_blk_discard_write_zeroes(VirtioBlkHandler *handler, struct iovec *iov,
+                                uint32_t iovcnt, uint32_t type)
+{
+    BlockBackend *blk = handler->blk;
+    struct virtio_blk_discard_write_zeroes desc;
+    ssize_t size;
+    uint64_t sector;
+    uint32_t num_sectors;
+    uint32_t max_sectors;
+    uint32_t flags;
+    int bytes;
+
+    /* Only one desc is currently supported */
+    if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) {
+        return VIRTIO_BLK_S_UNSUPP;
+    }
+
+    size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
+    if (unlikely(size != sizeof(desc))) {
+        error_report("Invalid size %zd, expected %zu", size, sizeof(desc));
+        return VIRTIO_BLK_S_IOERR;
+    }
+
+    sector = le64_to_cpu(desc.sector);
+    num_sectors = le32_to_cpu(desc.num_sectors);
+    flags = le32_to_cpu(desc.flags);
+    max_sectors = (type == VIRTIO_BLK_T_WRITE_ZEROES) ?
+                  VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS :
+                  VIRTIO_BLK_MAX_DISCARD_SECTORS;
+
+    /* This check ensures that 'bytes' fits in an int */
+    if (unlikely(num_sectors > max_sectors)) {
+        return VIRTIO_BLK_S_IOERR;
+    }
+
+    bytes = num_sectors << VIRTIO_BLK_SECTOR_BITS;
+
+    if (unlikely(!virtio_blk_sect_range_ok(blk, handler->logical_block_size,
+                                           sector, bytes))) {
+        return VIRTIO_BLK_S_IOERR;
+    }
+
+    /*
+     * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
+     * and write zeroes commands if any unknown flag is set.
+     */
+    if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+        return VIRTIO_BLK_S_UNSUPP;
+    }
+
+    if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
+        int blk_flags = 0;
+
+        if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
+            blk_flags |= BDRV_REQ_MAY_UNMAP;
+        }
+
+        if (blk_co_pwrite_zeroes(blk, sector << VIRTIO_BLK_SECTOR_BITS,
+                                 bytes, blk_flags) == 0) {
+            return VIRTIO_BLK_S_OK;
+        }
+    } else if (type == VIRTIO_BLK_T_DISCARD) {
+        /*
+         * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
+         * discard commands if the unmap flag is set.
+         */
+        if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+            return VIRTIO_BLK_S_UNSUPP;
+        }
+
+        if (blk_co_pdiscard(blk, sector << VIRTIO_BLK_SECTOR_BITS,
+                            bytes) == 0) {
+            return VIRTIO_BLK_S_OK;
+        }
+    }
+
+    return VIRTIO_BLK_S_IOERR;
+}
+
+int coroutine_fn virtio_blk_process_req(VirtioBlkHandler *handler,
+                                        struct iovec *in_iov,
+                                        struct iovec *out_iov,
+                                        unsigned int in_num,
+                                        unsigned int out_num)
+{
+    BlockBackend *blk = handler->blk;
+    struct virtio_blk_inhdr *in;
+    struct virtio_blk_outhdr out;
+    uint32_t type;
+    int in_len;
+
+    if (out_num < 1 || in_num < 1) {
+        error_report("virtio-blk request missing headers");
+        return -EINVAL;
+    }
+
+    if (unlikely(iov_to_buf(out_iov, out_num, 0, &out,
+                            sizeof(out)) != sizeof(out))) {
+        error_report("virtio-blk request outhdr too short");
+        return -EINVAL;
+    }
+
+    iov_discard_front(&out_iov, &out_num, sizeof(out));
+
+    if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
+        error_report("virtio-blk request inhdr too short");
+        return -EINVAL;
+    }
+
+    /* We always touch the last byte, so just see how big in_iov is. */
+    in_len = iov_size(in_iov, in_num);
+    in = (void *)in_iov[in_num - 1].iov_base
+                 + in_iov[in_num - 1].iov_len
+                 - sizeof(struct virtio_blk_inhdr);
+    iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
+
+    type = le32_to_cpu(out.type);
+    switch (type & ~VIRTIO_BLK_T_BARRIER) {
+    case VIRTIO_BLK_T_IN:
+    case VIRTIO_BLK_T_OUT: {
+        QEMUIOVector qiov;
+        int64_t offset;
+        ssize_t ret = 0;
+        bool is_write = type & VIRTIO_BLK_T_OUT;
+        int64_t sector_num = le64_to_cpu(out.sector);
+
+        if (is_write && !handler->writable) {
+            in->status = VIRTIO_BLK_S_IOERR;
+            break;
+        }
+
+        if (is_write) {
+            qemu_iovec_init_external(&qiov, out_iov, out_num);
+        } else {
+            qemu_iovec_init_external(&qiov, in_iov, in_num);
+        }
+
+        if (unlikely(!virtio_blk_sect_range_ok(blk,
+                                               handler->logical_block_size,
+                                               sector_num, qiov.size))) {
+            in->status = VIRTIO_BLK_S_IOERR;
+            break;
+        }
+
+        offset = sector_num << VIRTIO_BLK_SECTOR_BITS;
+
+        if (is_write) {
+            ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
+        } else {
+            ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
+        }
+        if (ret >= 0) {
+            in->status = VIRTIO_BLK_S_OK;
+        } else {
+            in->status = VIRTIO_BLK_S_IOERR;
+        }
+        break;
+    }
+    case VIRTIO_BLK_T_FLUSH:
+        if (blk_co_flush(blk) == 0) {
+            in->status = VIRTIO_BLK_S_OK;
+        } else {
+            in->status = VIRTIO_BLK_S_IOERR;
+        }
+        break;
+    case VIRTIO_BLK_T_GET_ID: {
+        size_t size = MIN(strlen(handler->serial) + 1,
+                          MIN(iov_size(in_iov, in_num),
+                              VIRTIO_BLK_ID_BYTES));
+        iov_from_buf(in_iov, in_num, 0, handler->serial, size);
+        in->status = VIRTIO_BLK_S_OK;
+        break;
+    }
+    case VIRTIO_BLK_T_DISCARD:
+    case VIRTIO_BLK_T_WRITE_ZEROES:
+        if (!handler->writable) {
+            in->status = VIRTIO_BLK_S_IOERR;
+            break;
+        }
+        in->status = virtio_blk_discard_write_zeroes(handler, out_iov,
+                                                     out_num, type);
+        break;
+    default:
+        in->status = VIRTIO_BLK_S_UNSUPP;
+        break;
+    }
+
+    return in_len;
+}
diff --git a/block/export/virtio-blk-handler.h b/block/export/virtio-blk-handler.h
new file mode 100644 (file)
index 0000000..150d44c
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Handler for virtio-blk I/O
+ *
+ * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author:
+ *   Xie Yongji <xieyongji@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef VIRTIO_BLK_HANDLER_H
+#define VIRTIO_BLK_HANDLER_H
+
+#include "sysemu/block-backend.h"
+
+#define VIRTIO_BLK_SECTOR_BITS 9
+#define VIRTIO_BLK_SECTOR_SIZE (1ULL << VIRTIO_BLK_SECTOR_BITS)
+
+#define VIRTIO_BLK_MAX_DISCARD_SECTORS 32768
+#define VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS 32768
+
+typedef struct {
+    BlockBackend *blk;
+    char *serial;
+    uint32_t logical_block_size;
+    bool writable;
+} VirtioBlkHandler;
+
+int coroutine_fn virtio_blk_process_req(VirtioBlkHandler *handler,
+                                        struct iovec *in_iov,
+                                        struct iovec *out_iov,
+                                        unsigned int in_num,
+                                        unsigned int out_num);
+
+#endif /* VIRTIO_BLK_HANDLER_H */
index d5fd1dbcd20bddc320a86c0cdaca03bca3b73974..f59f7a57ae4607e502f82298279255bc1f4c5ca9 100644 (file)
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
 #include "qemu/units.h"
+#include "qemu/memalign.h"
 #include "trace.h"
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
 #include "scsi/constants.h"
 
 #if defined(__APPLE__) && (__MACH__)
+#include <sys/ioctl.h>
+#if defined(HAVE_HOST_BLOCK_DEVICE)
 #include <paths.h>
 #include <sys/param.h>
+#include <sys/mount.h>
 #include <IOKit/IOKitLib.h>
 #include <IOKit/IOBSD.h>
 #include <IOKit/storage/IOMediaBSDClient.h>
@@ -52,6 +56,7 @@
 //#include <IOKit/storage/IOCDTypes.h>
 #include <IOKit/storage/IODVDMedia.h>
 #include <CoreFoundation/CoreFoundation.h>
+#endif /* defined(HAVE_HOST_BLOCK_DEVICE) */
 #endif
 
 #ifdef __sun__
@@ -63,6 +68,9 @@
 #include <sys/param.h>
 #include <sys/syscall.h>
 #include <sys/vfs.h>
+#if defined(CONFIG_BLKZONED)
+#include <linux/blkzoned.h>
+#endif
 #include <linux/cdrom.h>
 #include <linux/fd.h>
 #include <linux/fs.h>
 #include <sys/diskslice.h>
 #endif
 
-#ifdef CONFIG_XFS
-#include <xfs/xfs.h>
-#endif
-
-#include "trace.h"
-
 /* OS X does not have O_DSYNC */
 #ifndef O_DSYNC
 #ifdef O_SYNC
@@ -148,21 +150,20 @@ typedef struct BDRVRawState {
     uint64_t locked_perm;
     uint64_t locked_shared_perm;
 
+    uint64_t aio_max_batch;
+
     int perm_change_fd;
     int perm_change_flags;
     BDRVReopenState *reopen_state;
 
-#ifdef CONFIG_XFS
-    bool is_xfs:1;
-#endif
     bool has_discard:1;
     bool has_write_zeroes:1;
-    bool discard_zeroes:1;
     bool use_linux_aio:1;
     bool use_linux_io_uring:1;
-    bool page_cache_inconsistent:1;
+    int page_cache_inconsistent; /* errno from fdatasync failure */
     bool has_fallocate;
     bool needs_alignment;
+    bool force_alignment;
     bool drop_cache;
     bool check_cache_dropped;
     struct {
@@ -175,13 +176,22 @@ typedef struct BDRVRawState {
 } BDRVRawState;
 
 typedef struct BDRVRawReopenState {
-    int fd;
     int open_flags;
     bool drop_cache;
     bool check_cache_dropped;
 } BDRVRawReopenState;
 
-static int fd_open(BlockDriverState *bs);
+static int fd_open(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    /* this is just to ensure s->fd is sane (its called by io ops) */
+    if (s->fd >= 0) {
+        return 0;
+    }
+    return -EIO;
+}
+
 static int64_t raw_getlength(BlockDriverState *bs);
 
 typedef struct RawPosixAIOData {
@@ -209,6 +219,16 @@ typedef struct RawPosixAIOData {
             PreallocMode prealloc;
             Error **errp;
         } truncate;
+        struct {
+            unsigned int *nr_zones;
+            BlockZoneDescriptor *zones;
+        } zone_report;
+        struct {
+            unsigned long op;
+        } zone_mgmt;
+        struct {
+            struct stat *st;
+        } fstat;
     };
 } RawPosixAIOData;
 
@@ -216,6 +236,20 @@ typedef struct RawPosixAIOData {
 static int cdrom_reopen(BlockDriverState *bs);
 #endif
 
+/*
+ * Elide EAGAIN and EACCES details when failing to lock, as this
+ * indicates that the specified file region is already locked by
+ * another process, which is considered a common scenario.
+ */
+#define raw_lock_error_setg_errno(errp, err, fmt, ...)                  \
+    do {                                                                \
+        if ((err) == EAGAIN || (err) == EACCES) {                       \
+            error_setg((errp), (fmt), ## __VA_ARGS__);                  \
+        } else {                                                        \
+            error_setg_errno((errp), (err), (fmt), ## __VA_ARGS__);     \
+        }                                                               \
+    } while (0)
+
 #if defined(__NetBSD__)
 static int raw_normalize_devicepath(const char **filename, Error **errp)
 {
@@ -324,6 +358,17 @@ static bool dio_byte_aligned(int fd)
     return false;
 }
 
+static bool raw_needs_alignment(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) {
+        return true;
+    }
+
+    return s->force_alignment;
+}
+
 /* Check if read is allowed with given memory buffer and length.
  *
  * This function is used to check O_DIRECT memory buffer and request alignment.
@@ -353,7 +398,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
     char *buf;
-    size_t max_align = MAX(MAX_BLOCKSIZE, qemu_real_host_page_size);
+    size_t max_align = MAX(MAX_BLOCKSIZE, qemu_real_host_page_size());
     size_t alignments[] = {1, 512, 1024, 2048, 4096};
 
     /* For SCSI generic devices the alignment is not really used.
@@ -370,14 +415,22 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
     if (probe_logical_blocksize(fd, &bs->bl.request_alignment) < 0) {
         bs->bl.request_alignment = 0;
     }
-#ifdef CONFIG_XFS
-    if (s->is_xfs) {
-        struct dioattr da;
-        if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
-            bs->bl.request_alignment = da.d_miniosz;
-            /* The kernel returns wrong information for d_mem */
-            /* s->buf_align = da.d_mem; */
-        }
+
+#ifdef __linux__
+    /*
+     * The XFS ioctl definitions are shipped in extra packages that might
+     * not always be available. Since we just need the XFS_IOC_DIOINFO ioctl
+     * here, we simply use our own definition instead:
+     */
+    struct xfs_dioattr {
+        uint32_t d_mem;
+        uint32_t d_miniosz;
+        uint32_t d_maxiosz;
+    } da;
+    if (ioctl(fd, _IOR('X', 30, struct xfs_dioattr), &da) >= 0) {
+        bs->bl.request_alignment = da.d_miniosz;
+        /* The kernel returns wrong information for d_mem */
+        /* s->buf_align = da.d_mem; */
     }
 #endif
 
@@ -505,6 +558,11 @@ static QemuOptsList raw_runtime_opts = {
             .type = QEMU_OPT_STRING,
             .help = "host AIO implementation (threads, native, io_uring)",
         },
+        {
+            .name = "aio-max-batch",
+            .type = QEMU_OPT_NUMBER,
+            .help = "AIO max batch size (0 = auto handled by AIO backend, default: 0)",
+        },
         {
             .name = "locking",
             .type = QEMU_OPT_STRING,
@@ -584,6 +642,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING);
 #endif
 
+    s->aio_max_batch = qemu_opt_get_number(opts, "aio-max-batch", 0);
+
     locking = qapi_enum_parse(&OnOffAuto_lookup,
                               qemu_opt_get(opts, "locking"),
                               ON_OFF_AUTO_AUTO, &local_err);
@@ -694,9 +754,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
 
     s->has_discard = true;
     s->has_write_zeroes = true;
-    if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) {
-        s->needs_alignment = true;
-    }
 
     if (fstat(s->fd, &st) < 0) {
         ret = -errno;
@@ -705,44 +762,43 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
 
     if (!device) {
-        if (S_ISBLK(st.st_mode)) {
-            warn_report("Opening a block device as a file using the '%s' "
-                        "driver is deprecated", bs->drv->format_name);
-        } else if (S_ISCHR(st.st_mode)) {
-            warn_report("Opening a character device as a file using the '%s' "
-                        "driver is deprecated", bs->drv->format_name);
-        } else if (!S_ISREG(st.st_mode)) {
-            error_setg(errp, "A regular file was expected by the '%s' driver, "
-                       "but something else was given", bs->drv->format_name);
+        if (!S_ISREG(st.st_mode)) {
+            error_setg(errp, "'%s' driver requires '%s' to be a regular file",
+                       bs->drv->format_name, bs->filename);
             ret = -EINVAL;
             goto fail;
         } else {
-            s->discard_zeroes = true;
             s->has_fallocate = true;
         }
     } else {
         if (!(S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
-            error_setg(errp, "'%s' driver expects either "
-                       "a character or block device", bs->drv->format_name);
+            error_setg(errp, "'%s' driver requires '%s' to be either "
+                       "a character or block device",
+                       bs->drv->format_name, bs->filename);
             ret = -EINVAL;
             goto fail;
         }
     }
+#ifdef CONFIG_BLKZONED
+    /*
+     * The kernel page cache does not reliably work for writes to SWR zones
+     * of zoned block device because it can not guarantee the order of writes.
+     */
+    if ((bs->bl.zoned != BLK_Z_NONE) &&
+        (!(s->open_flags & O_DIRECT))) {
+        error_setg(errp, "The driver supports zoned devices, and it requires "
+                         "cache.direct=on, which was not specified.");
+        return -EINVAL; /* No host kernel page cache */
+    }
+#endif
 
     if (S_ISBLK(st.st_mode)) {
-#ifdef BLKDISCARDZEROES
-        unsigned int arg;
-        if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) {
-            s->discard_zeroes = true;
-        }
-#endif
 #ifdef __linux__
         /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache.  Do
          * not rely on the contents of discarded blocks unless using O_DIRECT.
          * Same for BLKZEROOUT.
          */
         if (!(bs->open_flags & BDRV_O_NOCACHE)) {
-            s->discard_zeroes = false;
             s->has_write_zeroes = false;
         }
 #endif
@@ -755,15 +811,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
          * so QEMU makes sure all IO operations on the device are aligned
          * to sector size, or else FreeBSD will reject them with EINVAL.
          */
-        s->needs_alignment = true;
-    }
-#endif
-
-#ifdef CONFIG_XFS
-    if (platform_test_xfs_fd(s->fd)) {
-        s->is_xfs = true;
+        s->force_alignment = true;
     }
 #endif
+    s->needs_alignment = raw_needs_alignment(bs);
 
     bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
     if (S_ISREG(st.st_mode)) {
@@ -836,7 +887,8 @@ static int raw_apply_lock_bytes(BDRVRawState *s, int fd,
         if ((perm_lock_bits & bit) && !(locked_perm & bit)) {
             ret = qemu_lock_fd(fd, off, 1, false);
             if (ret) {
-                error_setg(errp, "Failed to lock byte %d", off);
+                raw_lock_error_setg_errno(errp, -ret, "Failed to lock byte %d",
+                                          off);
                 return ret;
             } else if (s) {
                 s->locked_perm |= bit;
@@ -844,7 +896,7 @@ static int raw_apply_lock_bytes(BDRVRawState *s, int fd,
         } else if (unlock && (locked_perm & bit) && !(perm_lock_bits & bit)) {
             ret = qemu_unlock_fd(fd, off, 1);
             if (ret) {
-                error_setg(errp, "Failed to unlock byte %d", off);
+                error_setg_errno(errp, -ret, "Failed to unlock byte %d", off);
                 return ret;
             } else if (s) {
                 s->locked_perm &= ~bit;
@@ -857,7 +909,8 @@ static int raw_apply_lock_bytes(BDRVRawState *s, int fd,
         if ((shared_perm_lock_bits & bit) && !(locked_shared_perm & bit)) {
             ret = qemu_lock_fd(fd, off, 1, false);
             if (ret) {
-                error_setg(errp, "Failed to lock byte %d", off);
+                raw_lock_error_setg_errno(errp, -ret, "Failed to lock byte %d",
+                                          off);
                 return ret;
             } else if (s) {
                 s->locked_shared_perm |= bit;
@@ -866,7 +919,7 @@ static int raw_apply_lock_bytes(BDRVRawState *s, int fd,
                    !(shared_perm_lock_bits & bit)) {
             ret = qemu_unlock_fd(fd, off, 1);
             if (ret) {
-                error_setg(errp, "Failed to unlock byte %d", off);
+                error_setg_errno(errp, -ret, "Failed to unlock byte %d", off);
                 return ret;
             } else if (s) {
                 s->locked_shared_perm &= ~bit;
@@ -890,9 +943,10 @@ static int raw_check_lock_bytes(int fd, uint64_t perm, uint64_t shared_perm,
             ret = qemu_lock_fd_test(fd, off, 1, true);
             if (ret) {
                 char *perm_name = bdrv_perm_names(p);
-                error_setg(errp,
-                           "Failed to get \"%s\" lock",
-                           perm_name);
+
+                raw_lock_error_setg_errno(errp, -ret,
+                                          "Failed to get \"%s\" lock",
+                                          perm_name);
                 g_free(perm_name);
                 return ret;
             }
@@ -905,9 +959,10 @@ static int raw_check_lock_bytes(int fd, uint64_t perm, uint64_t shared_perm,
             ret = qemu_lock_fd_test(fd, off, 1, true);
             if (ret) {
                 char *perm_name = bdrv_perm_names(p);
-                error_setg(errp,
-                           "Failed to get shared \"%s\" lock",
-                           perm_name);
+
+                raw_lock_error_setg_errno(errp, -ret,
+                                          "Failed to get shared \"%s\" lock",
+                                          perm_name);
                 g_free(perm_name);
                 return ret;
             }
@@ -984,6 +1039,21 @@ static int raw_handle_perm_lock(BlockDriverState *bs,
     return ret;
 }
 
+/* Sets a specific flag */
+static int fcntl_setfl(int fd, int flag)
+{
+    int flags;
+
+    flags = fcntl(fd, F_GETFL);
+    if (flags == -1) {
+        return -errno;
+    }
+    if (fcntl(fd, F_SETFL, flags | flag) == -1) {
+        return -errno;
+    }
+    return 0;
+}
+
 static int raw_reconfigure_getfd(BlockDriverState *bs, int flags,
                                  int *open_flags, uint64_t perm, bool force_dup,
                                  Error **errp)
@@ -1062,7 +1132,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     BDRVRawReopenState *rs;
     QemuOpts *opts;
     int ret;
-    Error *local_err = NULL;
 
     assert(state != NULL);
     assert(state->bs != NULL);
@@ -1088,32 +1157,18 @@ static int raw_reopen_prepare(BDRVReopenState *state,
      * bdrv_reopen_prepare() will detect changes and complain. */
     qemu_opts_to_qdict(opts, state->options);
 
-    rs->fd = raw_reconfigure_getfd(state->bs, state->flags, &rs->open_flags,
-                                   state->perm, true, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -1;
-        goto out;
-    }
-
-    /* Fail already reopen_prepare() if we can't get a working O_DIRECT
-     * alignment with the new fd. */
-    if (rs->fd != -1) {
-        raw_probe_alignment(state->bs, rs->fd, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            ret = -EINVAL;
-            goto out_fd;
-        }
-    }
+    /*
+     * As part of reopen prepare we also want to create new fd by
+     * raw_reconfigure_getfd(). But it wants updated "perm", when in
+     * bdrv_reopen_multiple() .bdrv_reopen_prepare() callback called prior to
+     * permission update. Happily, permission update is always a part
+     * (a separate stage) of bdrv_reopen_multiple() so we can rely on this
+     * fact and reconfigure fd in raw_check_perm().
+     */
 
     s->reopen_state = state;
     ret = 0;
-out_fd:
-    if (ret < 0) {
-        qemu_close(rs->fd);
-        rs->fd = -1;
-    }
+
 out:
     qemu_opts_del(opts);
     return ret;
@@ -1127,10 +1182,6 @@ static void raw_reopen_commit(BDRVReopenState *state)
     s->drop_cache = rs->drop_cache;
     s->check_cache_dropped = rs->check_cache_dropped;
     s->open_flags = rs->open_flags;
-
-    qemu_close(s->fd);
-    s->fd = rs->fd;
-
     g_free(state->opaque);
     state->opaque = NULL;
 
@@ -1149,10 +1200,6 @@ static void raw_reopen_abort(BDRVReopenState *state)
         return;
     }
 
-    if (rs->fd >= 0) {
-        qemu_close(rs->fd);
-        rs->fd = -1;
-    }
     g_free(state->opaque);
     state->opaque = NULL;
 
@@ -1160,93 +1207,332 @@ static void raw_reopen_abort(BDRVReopenState *state)
     s->reopen_state = NULL;
 }
 
-static int sg_get_max_transfer_length(int fd)
+static int hdev_get_max_hw_transfer(int fd, struct stat *st)
 {
 #ifdef BLKSECTGET
-    int max_bytes = 0;
-
-    if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
-        return max_bytes;
+    if (S_ISBLK(st->st_mode)) {
+        unsigned short max_sectors = 0;
+        if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
+            return max_sectors * 512;
+        }
     } else {
-        return -errno;
+        int max_bytes = 0;
+        if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
+            return max_bytes;
+        }
     }
+    return -errno;
 #else
     return -ENOSYS;
 #endif
 }
 
-static int sg_get_max_segments(int fd)
-{
+/*
+ * Get a sysfs attribute value as character string.
+ */
 #ifdef CONFIG_LINUX
-    char buf[32];
-    const char *end;
-    char *sysfspath = NULL;
+static int get_sysfs_str_val(struct stat *st, const char *attribute,
+                             char **val) {
+    g_autofree char *sysfspath = NULL;
+    size_t len;
+
+    if (!S_ISBLK(st->st_mode)) {
+        return -ENOTSUP;
+    }
+
+    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/%s",
+                                major(st->st_rdev), minor(st->st_rdev),
+                                attribute);
+    if (!g_file_get_contents(sysfspath, val, &len, NULL)) {
+        return -ENOENT;
+    }
+
+    /* The file is ended with '\n' */
+    char *p;
+    p = *val;
+    if (*(p + len - 1) == '\n') {
+        *(p + len - 1) = '\0';
+    }
+    return 0;
+}
+#endif
+
+#if defined(CONFIG_BLKZONED)
+static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned)
+{
+    g_autofree char *val = NULL;
     int ret;
-    int sysfd = -1;
-    long max_segments;
-    struct stat st;
 
-    if (fstat(fd, &st)) {
-        ret = -errno;
-        goto out;
+    ret = get_sysfs_str_val(st, "zoned", &val);
+    if (ret < 0) {
+        return ret;
     }
 
-    sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
-                                major(st.st_rdev), minor(st.st_rdev));
-    sysfd = open(sysfspath, O_RDONLY);
-    if (sysfd == -1) {
-        ret = -errno;
-        goto out;
+    if (strcmp(val, "host-managed") == 0) {
+        *zoned = BLK_Z_HM;
+    } else if (strcmp(val, "host-aware") == 0) {
+        *zoned = BLK_Z_HA;
+    } else if (strcmp(val, "none") == 0) {
+        *zoned = BLK_Z_NONE;
+    } else {
+        return -ENOTSUP;
     }
-    do {
-        ret = read(sysfd, buf, sizeof(buf) - 1);
-    } while (ret == -1 && errno == EINTR);
+    return 0;
+}
+#endif /* defined(CONFIG_BLKZONED) */
+
+/*
+ * Get a sysfs attribute value as a long integer.
+ */
+#ifdef CONFIG_LINUX
+static long get_sysfs_long_val(struct stat *st, const char *attribute)
+{
+    g_autofree char *str = NULL;
+    const char *end;
+    long val;
+    int ret;
+
+    ret = get_sysfs_str_val(st, attribute, &str);
     if (ret < 0) {
-        ret = -errno;
-        goto out;
-    } else if (ret == 0) {
-        ret = -EIO;
-        goto out;
+        return ret;
     }
-    buf[ret] = 0;
+
     /* The file is ended with '\n', pass 'end' to accept that. */
-    ret = qemu_strtol(buf, &end, 10, &max_segments);
-    if (ret == 0 && end && *end == '\n') {
-        ret = max_segments;
+    ret = qemu_strtol(str, &end, 10, &val);
+    if (ret == 0 && end && *end == '\0') {
+        ret = val;
     }
+    return ret;
+}
+#endif
 
-out:
-    if (sysfd != -1) {
-        close(sysfd);
+static int hdev_get_max_segments(int fd, struct stat *st)
+{
+#ifdef CONFIG_LINUX
+    int ret;
+
+    if (S_ISCHR(st->st_mode)) {
+        if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
+            return ret;
+        }
+        return -ENOTSUP;
     }
-    g_free(sysfspath);
-    return ret;
+    return get_sysfs_long_val(st, "max_segments");
 #else
     return -ENOTSUP;
 #endif
 }
 
+#if defined(CONFIG_BLKZONED)
+/*
+ * If the reset_all flag is true, then the wps of zone whose state is
+ * not readonly or offline should be all reset to the start sector.
+ * Else, take the real wp of the device.
+ */
+static int get_zones_wp(BlockDriverState *bs, int fd, int64_t offset,
+                        unsigned int nrz, bool reset_all)
+{
+    struct blk_zone *blkz;
+    size_t rep_size;
+    uint64_t sector = offset >> BDRV_SECTOR_BITS;
+    BlockZoneWps *wps = bs->wps;
+    unsigned int j = offset / bs->bl.zone_size;
+    unsigned int n = 0, i = 0;
+    int ret;
+    rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct blk_zone);
+    g_autofree struct blk_zone_report *rep = NULL;
+
+    rep = g_malloc(rep_size);
+    blkz = (struct blk_zone *)(rep + 1);
+    while (n < nrz) {
+        memset(rep, 0, rep_size);
+        rep->sector = sector;
+        rep->nr_zones = nrz - n;
+
+        do {
+            ret = ioctl(fd, BLKREPORTZONE, rep);
+        } while (ret != 0 && errno == EINTR);
+        if (ret != 0) {
+            error_report("%d: ioctl BLKREPORTZONE at %" PRId64 " failed %d",
+                    fd, offset, errno);
+            return -errno;
+        }
+
+        if (!rep->nr_zones) {
+            break;
+        }
+
+        for (i = 0; i < rep->nr_zones; ++i, ++n, ++j) {
+            /*
+             * The wp tracking cares only about sequential writes required and
+             * sequential write preferred zones so that the wp can advance to
+             * the right location.
+             * Use the most significant bit of the wp location to indicate the
+             * zone type: 0 for SWR/SWP zones and 1 for conventional zones.
+             */
+            if (blkz[i].type == BLK_ZONE_TYPE_CONVENTIONAL) {
+                wps->wp[j] |= 1ULL << 63;
+            } else {
+                switch(blkz[i].cond) {
+                case BLK_ZONE_COND_FULL:
+                case BLK_ZONE_COND_READONLY:
+                    /* Zone not writable */
+                    wps->wp[j] = (blkz[i].start + blkz[i].len) << BDRV_SECTOR_BITS;
+                    break;
+                case BLK_ZONE_COND_OFFLINE:
+                    /* Zone not writable nor readable */
+                    wps->wp[j] = (blkz[i].start) << BDRV_SECTOR_BITS;
+                    break;
+                default:
+                    if (reset_all) {
+                        wps->wp[j] = blkz[i].start << BDRV_SECTOR_BITS;
+                    } else {
+                        wps->wp[j] = blkz[i].wp << BDRV_SECTOR_BITS;
+                    }
+                    break;
+                }
+            }
+        }
+        sector = blkz[i - 1].start + blkz[i - 1].len;
+    }
+
+    return 0;
+}
+
+static void update_zones_wp(BlockDriverState *bs, int fd, int64_t offset,
+                            unsigned int nrz)
+{
+    if (get_zones_wp(bs, fd, offset, nrz, 0) < 0) {
+        error_report("update zone wp failed");
+    }
+}
+
+static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
+                                     Error **errp)
+{
+    BDRVRawState *s = bs->opaque;
+    BlockZoneModel zoned;
+    int ret;
+
+    ret = get_sysfs_zoned_model(st, &zoned);
+    if (ret < 0 || zoned == BLK_Z_NONE) {
+        goto no_zoned;
+    }
+    bs->bl.zoned = zoned;
+
+    ret = get_sysfs_long_val(st, "max_open_zones");
+    if (ret >= 0) {
+        bs->bl.max_open_zones = ret;
+    }
+
+    ret = get_sysfs_long_val(st, "max_active_zones");
+    if (ret >= 0) {
+        bs->bl.max_active_zones = ret;
+    }
+
+    /*
+     * The zoned device must at least have zone size and nr_zones fields.
+     */
+    ret = get_sysfs_long_val(st, "chunk_sectors");
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Unable to read chunk_sectors "
+                                     "sysfs attribute");
+        goto no_zoned;
+    } else if (!ret) {
+        error_setg(errp, "Read 0 from chunk_sectors sysfs attribute");
+        goto no_zoned;
+    }
+    bs->bl.zone_size = ret << BDRV_SECTOR_BITS;
+
+    ret = get_sysfs_long_val(st, "nr_zones");
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Unable to read nr_zones "
+                                     "sysfs attribute");
+        goto no_zoned;
+    } else if (!ret) {
+        error_setg(errp, "Read 0 from nr_zones sysfs attribute");
+        goto no_zoned;
+    }
+    bs->bl.nr_zones = ret;
+
+    ret = get_sysfs_long_val(st, "zone_append_max_bytes");
+    if (ret > 0) {
+        bs->bl.max_append_sectors = ret >> BDRV_SECTOR_BITS;
+    }
+
+    ret = get_sysfs_long_val(st, "physical_block_size");
+    if (ret >= 0) {
+        bs->bl.write_granularity = ret;
+    }
+
+    /* The refresh_limits() function can be called multiple times. */
+    g_free(bs->wps);
+    bs->wps = g_malloc(sizeof(BlockZoneWps) +
+            sizeof(int64_t) * bs->bl.nr_zones);
+    ret = get_zones_wp(bs, s->fd, 0, bs->bl.nr_zones, 0);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "report wps failed");
+        goto no_zoned;
+    }
+    qemu_co_mutex_init(&bs->wps->colock);
+    return;
+
+no_zoned:
+    bs->bl.zoned = BLK_Z_NONE;
+    g_free(bs->wps);
+    bs->wps = NULL;
+}
+#else /* !defined(CONFIG_BLKZONED) */
+static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
+                                     Error **errp)
+{
+    bs->bl.zoned = BLK_Z_NONE;
+}
+#endif /* !defined(CONFIG_BLKZONED) */
+
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
+    struct stat st;
+
+    s->needs_alignment = raw_needs_alignment(bs);
+    raw_probe_alignment(bs, s->fd, errp);
+
+    bs->bl.min_mem_alignment = s->buf_align;
+    bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size());
+
+    /*
+     * Maximum transfers are best effort, so it is okay to ignore any
+     * errors.  That said, based on the man page errors in fstat would be
+     * very much unexpected; the only possible case seems to be ENOMEM.
+     */
+    if (fstat(s->fd, &st)) {
+        return;
+    }
 
-    if (bs->sg) {
-        int ret = sg_get_max_transfer_length(s->fd);
+#if defined(__APPLE__) && (__MACH__)
+    struct statfs buf;
+
+    if (!fstatfs(s->fd, &buf)) {
+        bs->bl.opt_transfer = buf.f_iosize;
+        bs->bl.pdiscard_alignment = buf.f_bsize;
+    }
+#endif
+
+    if (bdrv_is_sg(bs) || S_ISBLK(st.st_mode)) {
+        int ret = hdev_get_max_hw_transfer(s->fd, &st);
 
         if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
-            bs->bl.max_transfer = pow2floor(ret);
+            bs->bl.max_hw_transfer = ret;
         }
 
-        ret = sg_get_max_segments(s->fd);
+        ret = hdev_get_max_segments(s->fd, &st);
         if (ret > 0) {
-            bs->bl.max_transfer = MIN(bs->bl.max_transfer,
-                                      ret * qemu_real_host_page_size);
+            bs->bl.max_hw_iov = ret;
         }
     }
 
-    raw_probe_alignment(bs, s->fd, errp);
-    bs->bl.min_mem_alignment = s->buf_align;
-    bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size);
+    raw_refresh_zoned_limits(bs, &st, errp);
 }
 
 static int check_for_dasd(int fd)
@@ -1270,9 +1556,12 @@ static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
     BDRVRawState *s = bs->opaque;
     int ret;
 
-    /* If DASD, get blocksizes */
+    /* If DASD or zoned devices, get blocksizes */
     if (check_for_dasd(s->fd) < 0) {
-        return -ENOTSUP;
+        /* zoned devices are not DASD */
+        if (bs->bl.zoned == BLK_Z_NONE) {
+            return -ENOTSUP;
+        }
     }
     ret = probe_logical_blocksize(s->fd, &bsz->log);
     if (ret < 0) {
@@ -1328,7 +1617,9 @@ static int handle_aiocb_ioctl(void *opaque)
     RawPosixAIOData *aiocb = opaque;
     int ret;
 
-    ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf);
+    ret = RETRY_ON_EINTR(
+        ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf)
+    );
     if (ret == -1) {
         return -errno;
     }
@@ -1344,11 +1635,13 @@ static int handle_aiocb_flush(void *opaque)
     int ret;
 
     if (s->page_cache_inconsistent) {
-        return -EIO;
+        return -s->page_cache_inconsistent;
     }
 
     ret = qemu_fdatasync(aiocb->aio_fildes);
     if (ret == -1) {
+        trace_file_flush_fdatasync_failed(errno);
+
         /* There is no clear definition of the semantics of a failing fsync(),
          * so we may have to assume the worst. The sad truth is that this
          * assumption is correct for Linux. Some pages are now probably marked
@@ -1363,7 +1656,7 @@ static int handle_aiocb_flush(void *opaque)
          * Obviously, this doesn't affect O_DIRECT, which bypasses the page
          * cache. */
         if ((s->open_flags & O_DIRECT) == 0) {
-            s->page_cache_inconsistent = true;
+            s->page_cache_inconsistent = errno;
         }
         return -errno;
     }
@@ -1408,18 +1701,17 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
 {
     ssize_t len;
 
-    do {
-        if (aiocb->aio_type & QEMU_AIO_WRITE)
-            len = qemu_pwritev(aiocb->aio_fildes,
-                               aiocb->io.iov,
-                               aiocb->io.niov,
-                               aiocb->aio_offset);
-         else
-            len = qemu_preadv(aiocb->aio_fildes,
-                              aiocb->io.iov,
-                              aiocb->io.niov,
-                              aiocb->aio_offset);
-    } while (len == -1 && errno == EINTR);
+    len = RETRY_ON_EINTR(
+        (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) ?
+            qemu_pwritev(aiocb->aio_fildes,
+                           aiocb->io.iov,
+                           aiocb->io.niov,
+                           aiocb->aio_offset) :
+            qemu_preadv(aiocb->aio_fildes,
+                          aiocb->io.iov,
+                          aiocb->io.niov,
+                          aiocb->aio_offset)
+    );
 
     if (len == -1) {
         return -errno;
@@ -1439,7 +1731,7 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
     ssize_t len;
 
     while (offset < aiocb->aio_nbytes) {
-        if (aiocb->aio_type & QEMU_AIO_WRITE) {
+        if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
             len = pwrite(aiocb->aio_fildes,
                          (const char *)buf + offset,
                          aiocb->aio_nbytes - offset,
@@ -1532,7 +1824,7 @@ static int handle_aiocb_rw(void *opaque)
     }
 
     nbytes = handle_aiocb_rw_linear(aiocb, buf);
-    if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
+    if (!(aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))) {
         char *p = buf;
         size_t count = aiocb->aio_nbytes, copy;
         int i;
@@ -1568,6 +1860,7 @@ out:
     }
 }
 
+#if defined(CONFIG_FALLOCATE) || defined(BLKZEROOUT) || defined(BLKDISCARD)
 static int translate_err(int err)
 {
     if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
@@ -1576,6 +1869,7 @@ static int translate_err(int err)
     }
     return err;
 }
+#endif
 
 #ifdef CONFIG_FALLOCATE
 static int do_fallocate(int fd, int mode, off_t offset, off_t len)
@@ -1636,17 +1930,17 @@ static int handle_aiocb_write_zeroes(void *opaque)
     if (s->has_write_zeroes) {
         int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
                                aiocb->aio_offset, aiocb->aio_nbytes);
-        if (ret == -EINVAL) {
-            /*
-             * Allow falling back to pwrite for file systems that
-             * do not support fallocate() for an unaligned byte range.
-             */
-            return -ENOTSUP;
-        }
-        if (ret == 0 || ret != -ENOTSUP) {
+        if (ret == -ENOTSUP) {
+            s->has_write_zeroes = false;
+        } else if (ret == 0 || ret != -EINVAL) {
             return ret;
         }
-        s->has_write_zeroes = false;
+        /*
+         * Note: Some file systems do not like unaligned byte ranges, and
+         * return EINVAL in such a case, though they should not do it according
+         * to the man-page of fallocate(). Thus we simply ignore this return
+         * value and try the other fallbacks instead.
+         */
     }
 #endif
 
@@ -1661,6 +1955,17 @@ static int handle_aiocb_write_zeroes(void *opaque)
                 return ret;
             }
             s->has_fallocate = false;
+        } else if (ret == -EINVAL) {
+            /*
+             * Some file systems like older versions of GPFS do not like un-
+             * aligned byte ranges, and return EINVAL in such a case, though
+             * they should not do it according to the man-page of fallocate().
+             * Warn about the bad filesystem and try the final fallback instead.
+             */
+            warn_report_once("Your file system is misbehaving: "
+                             "fallocate(FALLOC_FL_PUNCH_HOLE) returned EINVAL. "
+                             "Please report this bug to your file system "
+                             "vendor.");
         } else if (ret != -ENOTSUP) {
             return ret;
         } else {
@@ -1672,7 +1977,7 @@ static int handle_aiocb_write_zeroes(void *opaque)
 #ifdef CONFIG_FALLOCATE
     /* Last resort: we are trying to extend the file with zeroed data. This
      * can be done via fallocate(fd, 0) */
-    len = bdrv_getlength(aiocb->bs);
+    len = raw_getlength(aiocb->bs);
     if (s->has_fallocate && len >= 0 && aiocb->aio_offset >= len) {
         int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
         if (ret == 0 || ret != -ENOTSUP) {
@@ -1703,24 +2008,165 @@ static int handle_aiocb_write_zeroes_unmap(void *opaque)
     default:
         return ret;
     }
-#endif
+#endif
+
+    /* If we couldn't manage to unmap while guaranteed that the area reads as
+     * all-zero afterwards, just write zeroes without unmapping */
+    return handle_aiocb_write_zeroes(aiocb);
+}
+
+#ifndef HAVE_COPY_FILE_RANGE
+static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
+                             off_t *out_off, size_t len, unsigned int flags)
+{
+#ifdef __NR_copy_file_range
+    return syscall(__NR_copy_file_range, in_fd, in_off, out_fd,
+                   out_off, len, flags);
+#else
+    errno = ENOSYS;
+    return -1;
+#endif
+}
+#endif
+
+/*
+ * parse_zone - Fill a zone descriptor
+ */
+#if defined(CONFIG_BLKZONED)
+static inline int parse_zone(struct BlockZoneDescriptor *zone,
+                              const struct blk_zone *blkz) {
+    zone->start = blkz->start << BDRV_SECTOR_BITS;
+    zone->length = blkz->len << BDRV_SECTOR_BITS;
+    zone->wp = blkz->wp << BDRV_SECTOR_BITS;
+
+#ifdef HAVE_BLK_ZONE_REP_CAPACITY
+    zone->cap = blkz->capacity << BDRV_SECTOR_BITS;
+#else
+    zone->cap = blkz->len << BDRV_SECTOR_BITS;
+#endif
+
+    switch (blkz->type) {
+    case BLK_ZONE_TYPE_SEQWRITE_REQ:
+        zone->type = BLK_ZT_SWR;
+        break;
+    case BLK_ZONE_TYPE_SEQWRITE_PREF:
+        zone->type = BLK_ZT_SWP;
+        break;
+    case BLK_ZONE_TYPE_CONVENTIONAL:
+        zone->type = BLK_ZT_CONV;
+        break;
+    default:
+        error_report("Unsupported zone type: 0x%x", blkz->type);
+        return -ENOTSUP;
+    }
+
+    switch (blkz->cond) {
+    case BLK_ZONE_COND_NOT_WP:
+        zone->state = BLK_ZS_NOT_WP;
+        break;
+    case BLK_ZONE_COND_EMPTY:
+        zone->state = BLK_ZS_EMPTY;
+        break;
+    case BLK_ZONE_COND_IMP_OPEN:
+        zone->state = BLK_ZS_IOPEN;
+        break;
+    case BLK_ZONE_COND_EXP_OPEN:
+        zone->state = BLK_ZS_EOPEN;
+        break;
+    case BLK_ZONE_COND_CLOSED:
+        zone->state = BLK_ZS_CLOSED;
+        break;
+    case BLK_ZONE_COND_READONLY:
+        zone->state = BLK_ZS_RDONLY;
+        break;
+    case BLK_ZONE_COND_FULL:
+        zone->state = BLK_ZS_FULL;
+        break;
+    case BLK_ZONE_COND_OFFLINE:
+        zone->state = BLK_ZS_OFFLINE;
+        break;
+    default:
+        error_report("Unsupported zone state: 0x%x", blkz->cond);
+        return -ENOTSUP;
+    }
+    return 0;
+}
+#endif
+
+#if defined(CONFIG_BLKZONED)
+static int handle_aiocb_zone_report(void *opaque)
+{
+    RawPosixAIOData *aiocb = opaque;
+    int fd = aiocb->aio_fildes;
+    unsigned int *nr_zones = aiocb->zone_report.nr_zones;
+    BlockZoneDescriptor *zones = aiocb->zone_report.zones;
+    /* zoned block devices use 512-byte sectors */
+    uint64_t sector = aiocb->aio_offset / 512;
+
+    struct blk_zone *blkz;
+    size_t rep_size;
+    unsigned int nrz;
+    int ret;
+    unsigned int n = 0, i = 0;
+
+    nrz = *nr_zones;
+    rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct blk_zone);
+    g_autofree struct blk_zone_report *rep = NULL;
+    rep = g_malloc(rep_size);
+
+    blkz = (struct blk_zone *)(rep + 1);
+    while (n < nrz) {
+        memset(rep, 0, rep_size);
+        rep->sector = sector;
+        rep->nr_zones = nrz - n;
+
+        do {
+            ret = ioctl(fd, BLKREPORTZONE, rep);
+        } while (ret != 0 && errno == EINTR);
+        if (ret != 0) {
+            error_report("%d: ioctl BLKREPORTZONE at %" PRId64 " failed %d",
+                         fd, sector, errno);
+            return -errno;
+        }
+
+        if (!rep->nr_zones) {
+            break;
+        }
+
+        for (i = 0; i < rep->nr_zones; i++, n++) {
+            ret = parse_zone(&zones[n], &blkz[i]);
+            if (ret != 0) {
+                return ret;
+            }
+
+            /* The next report should start after the last zone reported */
+            sector = blkz[i].start + blkz[i].len;
+        }
+    }
 
-    /* If we couldn't manage to unmap while guaranteed that the area reads as
-     * all-zero afterwards, just write zeroes without unmapping */
-    return handle_aiocb_write_zeroes(aiocb);
+    *nr_zones = n;
+    return 0;
 }
+#endif
 
-#ifndef HAVE_COPY_FILE_RANGE
-static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
-                             off_t *out_off, size_t len, unsigned int flags)
+#if defined(CONFIG_BLKZONED)
+static int handle_aiocb_zone_mgmt(void *opaque)
 {
-#ifdef __NR_copy_file_range
-    return syscall(__NR_copy_file_range, in_fd, in_off, out_fd,
-                   out_off, len, flags);
-#else
-    errno = ENOSYS;
-    return -1;
-#endif
+    RawPosixAIOData *aiocb = opaque;
+    int fd = aiocb->aio_fildes;
+    uint64_t sector = aiocb->aio_offset / 512;
+    int64_t nr_sectors = aiocb->aio_nbytes / 512;
+    struct blk_zone_range range;
+    int ret;
+
+    /* Execute the operation */
+    range.sector = sector;
+    range.nr_sectors = nr_sectors;
+    do {
+        ret = ioctl(fd, aiocb->zone_mgmt.op, &range);
+    } while (ret != 0 && errno == EINTR);
+
+    return ret < 0 ? -errno : ret;
 }
 #endif
 
@@ -1761,7 +2207,7 @@ static int handle_aiocb_copy_range(void *opaque)
 static int handle_aiocb_discard(void *opaque)
 {
     RawPosixAIOData *aiocb = opaque;
-    int ret = -EOPNOTSUPP;
+    int ret = -ENOTSUP;
     BDRVRawState *s = aiocb->bs->opaque;
 
     if (!s->has_discard) {
@@ -1777,16 +2223,27 @@ static int handle_aiocb_discard(void *opaque)
             }
         } while (errno == EINTR);
 
-        ret = -errno;
+        ret = translate_err(-errno);
 #endif
     } else {
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
         ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                            aiocb->aio_offset, aiocb->aio_nbytes);
+        ret = translate_err(ret);
+#elif defined(__APPLE__) && (__MACH__)
+        fpunchhole_t fpunchhole;
+        fpunchhole.fp_flags = 0;
+        fpunchhole.reserved = 0;
+        fpunchhole.fp_offset = aiocb->aio_offset;
+        fpunchhole.fp_length = aiocb->aio_nbytes;
+        if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) {
+            ret = errno == ENODEV ? -ENOTSUP : -errno;
+        } else {
+            ret = 0;
+        }
 #endif
     }
 
-    ret = translate_err(ret);
     if (ret == -ENOTSUP) {
         s->has_discard = false;
     }
@@ -1812,7 +2269,7 @@ static int allocate_first_block(int fd, size_t max_size)
     size_t write_size = (max_size < MAX_BLOCKSIZE)
         ? BDRV_SECTOR_SIZE
         : MAX_BLOCKSIZE;
-    size_t max_align = MAX(MAX_BLOCKSIZE, qemu_real_host_page_size);
+    size_t max_align = MAX(MAX_BLOCKSIZE, qemu_real_host_page_size());
     void *buf;
     ssize_t n;
     int ret;
@@ -1820,9 +2277,7 @@ static int allocate_first_block(int fd, size_t max_size)
     buf = qemu_memalign(max_align, write_size);
     memset(buf, 0, write_size);
 
-    do {
-        n = pwrite(fd, buf, write_size, 0);
-    } while (n == -1 && errno == EINTR);
+    n = RETRY_ON_EINTR(pwrite(fd, buf, write_size, 0));
 
     ret = (n == -1) ? -errno : 0;
 
@@ -1965,22 +2420,53 @@ out:
     return result;
 }
 
-static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs,
-                                               ThreadPoolFunc func, void *arg)
+static int coroutine_fn raw_thread_pool_submit(ThreadPoolFunc func, void *arg)
 {
-    /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */
-    ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    return thread_pool_submit_co(pool, func, arg);
+    return thread_pool_submit_co(func, arg);
 }
 
-static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
+/*
+ * Check if all memory in this vector is sector aligned.
+ */
+static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
+{
+    int i;
+    size_t alignment = bdrv_min_mem_align(bs);
+    size_t len = bs->bl.request_alignment;
+    IO_CODE();
+
+    for (i = 0; i < qiov->niov; i++) {
+        if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
+            return false;
+        }
+        if (qiov->iov[i].iov_len % len) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
                                    uint64_t bytes, QEMUIOVector *qiov, int type)
 {
     BDRVRawState *s = bs->opaque;
     RawPosixAIOData acb;
+    int ret;
+    uint64_t offset = *offset_ptr;
 
     if (fd_open(bs) < 0)
         return -EIO;
+#if defined(CONFIG_BLKZONED)
+    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
+        bs->bl.zoned != BLK_Z_NONE) {
+        qemu_co_mutex_lock(&bs->wps->colock);
+        if (type & QEMU_AIO_ZONE_APPEND) {
+            int index = offset / bs->bl.zone_size;
+            offset = bs->wps->wp[index];
+        }
+    }
+#endif
 
     /*
      * When using O_DIRECT, the request must be aligned to be able to use
@@ -1992,15 +2478,16 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
         type |= QEMU_AIO_MISALIGNED;
 #ifdef CONFIG_LINUX_IO_URING
     } else if (s->use_linux_io_uring) {
-        LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
         assert(qiov->size == bytes);
-        return luring_co_submit(bs, aio, s->fd, offset, qiov, type);
+        ret = luring_co_submit(bs, s->fd, offset, qiov, type);
+        goto out;
 #endif
 #ifdef CONFIG_LINUX_AIO
     } else if (s->use_linux_aio) {
-        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
         assert(qiov->size == bytes);
-        return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
+        ret = laio_co_submit(s->fd, offset, qiov, type,
+                              s->aio_max_batch);
+        goto out;
 #endif
     }
 
@@ -2017,59 +2504,55 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
     };
 
     assert(qiov->size == bytes);
-    return raw_thread_pool_submit(bs, handle_aiocb_rw, &acb);
-}
+    ret = raw_thread_pool_submit(handle_aiocb_rw, &acb);
+    goto out; /* Avoid the compiler err of unused label */
 
-static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                      uint64_t bytes, QEMUIOVector *qiov,
-                                      int flags)
-{
-    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
-}
+out:
+#if defined(CONFIG_BLKZONED)
+    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
+        bs->bl.zoned != BLK_Z_NONE) {
+        BlockZoneWps *wps = bs->wps;
+        if (ret == 0) {
+            uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
+            if (!BDRV_ZT_IS_CONV(*wp)) {
+                if (type & QEMU_AIO_ZONE_APPEND) {
+                    *offset_ptr = *wp;
+                    trace_zbd_zone_append_complete(bs, *offset_ptr
+                        >> BDRV_SECTOR_BITS);
+                }
+                /* Advance the wp if needed */
+                if (offset + bytes > *wp) {
+                    *wp = offset + bytes;
+                }
+            }
+        } else {
+            /*
+             * write and append write are not allowed to cross zone boundaries
+             */
+            update_zones_wp(bs, s->fd, offset, 1);
+        }
 
-static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, QEMUIOVector *qiov,
-                                       int flags)
-{
-    assert(flags == 0);
-    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
+        qemu_co_mutex_unlock(&wps->colock);
+    }
+#endif
+    return ret;
 }
 
-static void raw_aio_plug(BlockDriverState *bs)
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
+                                      int64_t bytes, QEMUIOVector *qiov,
+                                      BdrvRequestFlags flags)
 {
-    BDRVRawState __attribute__((unused)) *s = bs->opaque;
-#ifdef CONFIG_LINUX_AIO
-    if (s->use_linux_aio) {
-        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
-        laio_io_plug(bs, aio);
-    }
-#endif
-#ifdef CONFIG_LINUX_IO_URING
-    if (s->use_linux_io_uring) {
-        LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
-        luring_io_plug(bs, aio);
-    }
-#endif
+    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
 }
 
-static void raw_aio_unplug(BlockDriverState *bs)
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
+                                       int64_t bytes, QEMUIOVector *qiov,
+                                       BdrvRequestFlags flags)
 {
-    BDRVRawState __attribute__((unused)) *s = bs->opaque;
-#ifdef CONFIG_LINUX_AIO
-    if (s->use_linux_aio) {
-        LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
-        laio_io_unplug(bs, aio);
-    }
-#endif
-#ifdef CONFIG_LINUX_IO_URING
-    if (s->use_linux_io_uring) {
-        LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
-        luring_io_unplug(bs, aio);
-    }
-#endif
+    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
 }
 
-static int raw_co_flush_to_disk(BlockDriverState *bs)
+static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
     RawPosixAIOData acb;
@@ -2088,11 +2571,10 @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
 
 #ifdef CONFIG_LINUX_IO_URING
     if (s->use_linux_io_uring) {
-        LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
-        return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH);
+        return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
     }
 #endif
-    return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
+    return raw_thread_pool_submit(handle_aiocb_flush, &acb);
 }
 
 static void raw_aio_attach_aio_context(BlockDriverState *bs,
@@ -2111,7 +2593,7 @@ static void raw_aio_attach_aio_context(BlockDriverState *bs,
 #endif
 #ifdef CONFIG_LINUX_IO_URING
     if (s->use_linux_io_uring) {
-        Error *local_err;
+        Error *local_err = NULL;
         if (!aio_setup_linux_io_uring(new_context, &local_err)) {
             error_reportf_err(local_err, "Unable to use linux io_uring, "
                                          "falling back to thread pool: ");
@@ -2126,11 +2608,42 @@ static void raw_close(BlockDriverState *bs)
     BDRVRawState *s = bs->opaque;
 
     if (s->fd >= 0) {
+#if defined(CONFIG_BLKZONED)
+        g_free(bs->wps);
+#endif
         qemu_close(s->fd);
         s->fd = -1;
     }
 }
 
+static int handle_aiocb_fstat(void *opaque)
+{
+    RawPosixAIOData *aiocb = opaque;
+
+    if (fstat(aiocb->aio_fildes, aiocb->fstat.st) < 0) {
+        return -errno;
+    }
+
+    return 0;
+}
+
+static int coroutine_fn raw_co_fstat(BlockDriverState *bs, struct stat *st)
+{
+    BDRVRawState *s = bs->opaque;
+    RawPosixAIOData acb;
+
+    acb = (RawPosixAIOData) {
+        .bs             = bs,
+        .aio_fildes     = s->fd,
+        .aio_type       = QEMU_AIO_FSTAT,
+        .fstat          = {
+            .st = st,
+        },
+    };
+
+    return raw_thread_pool_submit(handle_aiocb_fstat, &acb);
+}
+
 /**
  * Truncates the given regular file @fd to @offset and, when growing, fills the
  * new space according to @prealloc.
@@ -2154,7 +2667,7 @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset,
         },
     };
 
-    return raw_thread_pool_submit(bs, handle_aiocb_truncate, &acb);
+    return raw_thread_pool_submit(handle_aiocb_truncate, &acb);
 }
 
 static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
@@ -2295,39 +2808,37 @@ static int64_t raw_getlength(BlockDriverState *bs)
 again:
 #endif
     if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
+        size = 0;
 #ifdef DIOCGMEDIASIZE
-        if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
-#elif defined(DIOCGPART)
-        {
-                struct partinfo pi;
-                if (ioctl(fd, DIOCGPART, &pi) == 0)
-                        size = pi.media_size;
-                else
-                        size = 0;
+        if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size)) {
+            size = 0;
         }
-        if (size == 0)
 #endif
-#if defined(__APPLE__) && defined(__MACH__)
-        {
+#ifdef DIOCGPART
+        if (size == 0) {
+            struct partinfo pi;
+            if (ioctl(fd, DIOCGPART, &pi) == 0) {
+                size = pi.media_size;
+            }
+        }
+#endif
+#if defined(DKIOCGETBLOCKCOUNT) && defined(DKIOCGETBLOCKSIZE)
+        if (size == 0) {
             uint64_t sectors = 0;
             uint32_t sector_size = 0;
 
             if (ioctl(fd, DKIOCGETBLOCKCOUNT, &sectors) == 0
                && ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) == 0) {
                 size = sectors * sector_size;
-            } else {
-                size = lseek(fd, 0LL, SEEK_END);
-                if (size < 0) {
-                    return -errno;
-                }
             }
         }
-#else
-        size = lseek(fd, 0LL, SEEK_END);
+#endif
+        if (size == 0) {
+            size = lseek(fd, 0LL, SEEK_END);
+        }
         if (size < 0) {
             return -errno;
         }
-#endif
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
         switch(s->type) {
         case FTYPE_CD:
@@ -2369,14 +2880,22 @@ static int64_t raw_getlength(BlockDriverState *bs)
 }
 #endif
 
-static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
+static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
+{
+    return raw_getlength(bs);
+}
+
+static int64_t coroutine_fn raw_co_get_allocated_file_size(BlockDriverState *bs)
 {
     struct stat st;
-    BDRVRawState *s = bs->opaque;
+    int ret;
 
-    if (fstat(s->fd, &st) < 0) {
-        return -errno;
+    ret = raw_co_fstat(bs, &st);
+
+    if (ret) {
+        return ret;
     }
+
     return (int64_t)st.st_blocks * 512;
 }
 
@@ -2512,10 +3031,9 @@ out:
     return result;
 }
 
-static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions options;
     int64_t total_size = 0;
@@ -2689,7 +3207,8 @@ static int find_allocation(BlockDriverState *bs, off_t start,
  * the specified offset) that are known to be in the same
  * allocated/unallocated state.
  *
- * 'bytes' is the max value 'pnum' should be set to.
+ * 'bytes' is a soft cap for 'pnum'.  If the information is free, 'pnum' may
+ * well exceed it.
  */
 static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
                                             bool want_zero,
@@ -2727,7 +3246,7 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
     } else if (data == offset) {
         /* On a data extent, compute bytes to the end of the extent,
          * possibly including a partial sector at EOF. */
-        *pnum = MIN(bytes, hole - offset);
+        *pnum = hole - offset;
 
         /*
          * We are not allowed to return partial sectors, though, so
@@ -2746,7 +3265,7 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
     } else {
         /* On a hole, compute bytes to the beginning of the next extent.  */
         assert(hole == offset);
-        *pnum = MIN(bytes, data - offset);
+        *pnum = data - offset;
         ret = BDRV_BLOCK_ZERO;
     }
     *map = offset;
@@ -2824,8 +3343,8 @@ static void check_cache_dropped(BlockDriverState *bs, Error **errp)
 }
 #endif /* __linux__ */
 
-static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs,
-                                                 Error **errp)
+static void coroutine_fn GRAPH_RDLOCK
+raw_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
     int ret;
@@ -2885,8 +3404,172 @@ static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret)
     }
 }
 
+/*
+ * zone report - Get a zone block device's information in the form
+ * of an array of zone descriptors.
+ * zones is an array of zone descriptors to hold zone information on reply;
+ * offset can be any byte within the entire size of the device;
+ * nr_zones is the maximum number of sectors the command should operate on.
+ */
+#if defined(CONFIG_BLKZONED)
+static int coroutine_fn raw_co_zone_report(BlockDriverState *bs, int64_t offset,
+                                           unsigned int *nr_zones,
+                                           BlockZoneDescriptor *zones) {
+    BDRVRawState *s = bs->opaque;
+    RawPosixAIOData acb = (RawPosixAIOData) {
+        .bs         = bs,
+        .aio_fildes = s->fd,
+        .aio_type   = QEMU_AIO_ZONE_REPORT,
+        .aio_offset = offset,
+        .zone_report    = {
+            .nr_zones       = nr_zones,
+            .zones          = zones,
+        },
+    };
+
+    trace_zbd_zone_report(bs, *nr_zones, offset >> BDRV_SECTOR_BITS);
+    return raw_thread_pool_submit(handle_aiocb_zone_report, &acb);
+}
+#endif
+
+/*
+ * zone management operations - Execute an operation on a zone
+ */
+#if defined(CONFIG_BLKZONED)
+static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
+        int64_t offset, int64_t len) {
+    BDRVRawState *s = bs->opaque;
+    RawPosixAIOData acb;
+    int64_t zone_size, zone_size_mask;
+    const char *op_name;
+    unsigned long zo;
+    int ret;
+    BlockZoneWps *wps = bs->wps;
+    int64_t capacity = bs->total_sectors << BDRV_SECTOR_BITS;
+
+    zone_size = bs->bl.zone_size;
+    zone_size_mask = zone_size - 1;
+    if (offset & zone_size_mask) {
+        error_report("sector offset %" PRId64 " is not aligned to zone size "
+                     "%" PRId64 "", offset / 512, zone_size / 512);
+        return -EINVAL;
+    }
+
+    if (((offset + len) < capacity && len & zone_size_mask) ||
+        offset + len > capacity) {
+        error_report("number of sectors %" PRId64 " is not aligned to zone size"
+                      " %" PRId64 "", len / 512, zone_size / 512);
+        return -EINVAL;
+    }
+
+    uint32_t i = offset / bs->bl.zone_size;
+    uint32_t nrz = len / bs->bl.zone_size;
+    uint64_t *wp = &wps->wp[i];
+    if (BDRV_ZT_IS_CONV(*wp) && len != capacity) {
+        error_report("zone mgmt operations are not allowed for conventional zones");
+        return -EIO;
+    }
+
+    switch (op) {
+    case BLK_ZO_OPEN:
+        op_name = "BLKOPENZONE";
+        zo = BLKOPENZONE;
+        break;
+    case BLK_ZO_CLOSE:
+        op_name = "BLKCLOSEZONE";
+        zo = BLKCLOSEZONE;
+        break;
+    case BLK_ZO_FINISH:
+        op_name = "BLKFINISHZONE";
+        zo = BLKFINISHZONE;
+        break;
+    case BLK_ZO_RESET:
+        op_name = "BLKRESETZONE";
+        zo = BLKRESETZONE;
+        break;
+    default:
+        error_report("Unsupported zone op: 0x%x", op);
+        return -ENOTSUP;
+    }
+
+    acb = (RawPosixAIOData) {
+        .bs             = bs,
+        .aio_fildes     = s->fd,
+        .aio_type       = QEMU_AIO_ZONE_MGMT,
+        .aio_offset     = offset,
+        .aio_nbytes     = len,
+        .zone_mgmt  = {
+            .op = zo,
+        },
+    };
+
+    trace_zbd_zone_mgmt(bs, op_name, offset >> BDRV_SECTOR_BITS,
+                        len >> BDRV_SECTOR_BITS);
+    ret = raw_thread_pool_submit(handle_aiocb_zone_mgmt, &acb);
+    if (ret != 0) {
+        update_zones_wp(bs, s->fd, offset, nrz);
+        error_report("ioctl %s failed %d", op_name, ret);
+        return ret;
+    }
+
+    if (zo == BLKRESETZONE && len == capacity) {
+        ret = get_zones_wp(bs, s->fd, 0, bs->bl.nr_zones, 1);
+        if (ret < 0) {
+            error_report("reporting single wp failed");
+            return ret;
+        }
+    } else if (zo == BLKRESETZONE) {
+        for (unsigned int j = 0; j < nrz; ++j) {
+            wp[j] = offset + j * zone_size;
+        }
+    } else if (zo == BLKFINISHZONE) {
+        for (unsigned int j = 0; j < nrz; ++j) {
+            /* The zoned device allows the last zone smaller that the
+             * zone size. */
+            wp[j] = MIN(offset + (j + 1) * zone_size, offset + len);
+        }
+    }
+
+    return ret;
+}
+#endif
+
+#if defined(CONFIG_BLKZONED)
+static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
+                                           int64_t *offset,
+                                           QEMUIOVector *qiov,
+                                           BdrvRequestFlags flags) {
+    assert(flags == 0);
+    int64_t zone_size_mask = bs->bl.zone_size - 1;
+    int64_t iov_len = 0;
+    int64_t len = 0;
+
+    if (*offset & zone_size_mask) {
+        error_report("sector offset %" PRId64 " is not aligned to zone size "
+                     "%" PRId32 "", *offset / 512, bs->bl.zone_size / 512);
+        return -EINVAL;
+    }
+
+    int64_t wg = bs->bl.write_granularity;
+    int64_t wg_mask = wg - 1;
+    for (int i = 0; i < qiov->niov; i++) {
+        iov_len = qiov->iov[i].iov_len;
+        if (iov_len & wg_mask) {
+            error_report("len of IOVector[%d] %" PRId64 " is not aligned to "
+                         "block size %" PRId64 "", i, iov_len, wg);
+            return -EINVAL;
+        }
+        len += iov_len;
+    }
+
+    trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
+    return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
+}
+#endif
+
 static coroutine_fn int
-raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev)
+raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                bool blkdev)
 {
     BDRVRawState *s = bs->opaque;
     RawPosixAIOData acb;
@@ -2904,19 +3587,19 @@ raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev)
         acb.aio_type |= QEMU_AIO_BLKDEV;
     }
 
-    ret = raw_thread_pool_submit(bs, handle_aiocb_discard, &acb);
+    ret = raw_thread_pool_submit(handle_aiocb_discard, &acb);
     raw_account_discard(s, bytes, ret);
     return ret;
 }
 
 static coroutine_fn int
-raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return raw_do_pdiscard(bs, offset, bytes, false);
 }
 
 static int coroutine_fn
-raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
+raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
                      BdrvRequestFlags flags, bool blkdev)
 {
     BDRVRawState *s = bs->opaque;
@@ -2926,7 +3609,6 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
 #ifdef CONFIG_FALLOCATE
     if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
         BdrvTrackedRequest *req;
-        uint64_t end;
 
         /*
          * This is a workaround for a bug in the Linux XFS driver,
@@ -2950,11 +3632,11 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
         assert(req->offset <= offset);
         assert(req->offset + req->bytes >= offset + bytes);
 
-        end = INT64_MAX & -(uint64_t)bs->bl.request_alignment;
-        req->bytes = end - req->offset;
-        req->overlap_bytes = req->bytes;
+        req->bytes = BDRV_MAX_LENGTH - req->offset;
+
+        bdrv_check_request(req->offset, req->bytes, &error_abort);
 
-        bdrv_mark_request_serialising(req, bs->bl.request_alignment);
+        bdrv_make_request_serialising(req, bs->bl.request_alignment);
     }
 #endif
 
@@ -2980,21 +3662,50 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
         handler = handle_aiocb_write_zeroes;
     }
 
-    return raw_thread_pool_submit(bs, handler, &acb);
+    return raw_thread_pool_submit(handler, &acb);
 }
 
 static int coroutine_fn raw_co_pwrite_zeroes(
     BlockDriverState *bs, int64_t offset,
-    int bytes, BdrvRequestFlags flags)
+    int64_t bytes, BdrvRequestFlags flags)
 {
     return raw_do_pwrite_zeroes(bs, offset, bytes, flags, false);
 }
 
-static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int coroutine_fn
+raw_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     return 0;
 }
 
+static ImageInfoSpecific *raw_get_specific_info(BlockDriverState *bs,
+                                                Error **errp)
+{
+    ImageInfoSpecificFile *file_info = g_new0(ImageInfoSpecificFile, 1);
+    ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1);
+
+    *spec_info = (ImageInfoSpecific){
+        .type = IMAGE_INFO_SPECIFIC_KIND_FILE,
+        .u.file.data = file_info,
+    };
+
+#ifdef FS_IOC_FSGETXATTR
+    {
+        BDRVRawState *s = bs->opaque;
+        struct fsxattr attr;
+        int ret;
+
+        ret = ioctl(s->fd, FS_IOC_FSGETXATTR, &attr);
+        if (!ret && attr.fsx_extsize != 0) {
+            file_info->has_extent_size_hint = true;
+            file_info->extent_size_hint = attr.fsx_extsize;
+        }
+    }
+#endif
+
+    return spec_info;
+}
+
 static BlockStatsSpecificFile get_blockstats_specific_file(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
@@ -3015,6 +3726,7 @@ static BlockStatsSpecific *raw_get_specific_stats(BlockDriverState *bs)
     return stats;
 }
 
+#if defined(HAVE_HOST_BLOCK_DEVICE)
 static BlockStatsSpecific *hdev_get_specific_stats(BlockDriverState *bs)
 {
     BlockStatsSpecific *stats = g_new(BlockStatsSpecific, 1);
@@ -3024,6 +3736,7 @@ static BlockStatsSpecific *hdev_get_specific_stats(BlockDriverState *bs)
 
     return stats;
 }
+#endif /* HAVE_HOST_BLOCK_DEVICE */
 
 static QemuOptsList raw_create_opts = {
     .name = "raw-create-opts",
@@ -3061,39 +3774,30 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
                           Error **errp)
 {
     BDRVRawState *s = bs->opaque;
-    BDRVRawReopenState *rs = NULL;
+    int input_flags = s->reopen_state ? s->reopen_state->flags : bs->open_flags;
     int open_flags;
     int ret;
 
-    if (s->perm_change_fd) {
+    /* We may need a new fd if auto-read-only switches the mode */
+    ret = raw_reconfigure_getfd(bs, input_flags, &open_flags, perm,
+                                false, errp);
+    if (ret < 0) {
+        return ret;
+    } else if (ret != s->fd) {
+        Error *local_err = NULL;
+
         /*
-         * In the context of reopen, this function may be called several times
-         * (directly and recursively while change permissions of the parent).
-         * This is even true for children that don't inherit from the original
-         * reopen node, so s->reopen_state is not set.
-         *
-         * Ignore all but the first call.
+         * Fail already check_perm() if we can't get a working O_DIRECT
+         * alignment with the new fd.
          */
-        return 0;
-    }
-
-    if (s->reopen_state) {
-        /* We already have a new file descriptor to set permissions for */
-        assert(s->reopen_state->perm == perm);
-        assert(s->reopen_state->shared_perm == shared);
-        rs = s->reopen_state->opaque;
-        s->perm_change_fd = rs->fd;
-        s->perm_change_flags = rs->open_flags;
-    } else {
-        /* We may need a new fd if auto-read-only switches the mode */
-        ret = raw_reconfigure_getfd(bs, bs->open_flags, &open_flags, perm,
-                                    false, errp);
-        if (ret < 0) {
-            return ret;
-        } else if (ret != s->fd) {
-            s->perm_change_fd = ret;
-            s->perm_change_flags = open_flags;
+        raw_probe_alignment(bs, ret, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return -EINVAL;
         }
+
+        s->perm_change_fd = ret;
+        s->perm_change_flags = open_flags;
     }
 
     /* Prepare permissions on old fd to avoid conflicts between old and new,
@@ -3104,7 +3808,7 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
     }
 
     /* Copy locks to the new fd */
-    if (s->perm_change_fd) {
+    if (s->perm_change_fd && s->use_lock) {
         ret = raw_apply_lock_bytes(NULL, s->perm_change_fd, perm, ~shared,
                                    false, errp);
         if (ret < 0) {
@@ -3115,7 +3819,7 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
     return 0;
 
 fail:
-    if (s->perm_change_fd && !s->reopen_state) {
+    if (s->perm_change_fd) {
         qemu_close(s->perm_change_fd);
     }
     s->perm_change_fd = 0;
@@ -3146,7 +3850,7 @@ static void raw_abort_perm_update(BlockDriverState *bs)
 
     /* For reopen, .bdrv_reopen_abort is called afterwards and will close
      * the file descriptor. */
-    if (s->perm_change_fd && !s->reopen_state) {
+    if (s->perm_change_fd) {
         qemu_close(s->perm_change_fd);
     }
     s->perm_change_fd = 0;
@@ -3154,23 +3858,21 @@ static void raw_abort_perm_update(BlockDriverState *bs)
     raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
 }
 
-static int coroutine_fn raw_co_copy_range_from(
-        BlockDriverState *bs, BdrvChild *src, uint64_t src_offset,
-        BdrvChild *dst, uint64_t dst_offset, uint64_t bytes,
+static int coroutine_fn GRAPH_RDLOCK raw_co_copy_range_from(
+        BlockDriverState *bs, BdrvChild *src, int64_t src_offset,
+        BdrvChild *dst, int64_t dst_offset, int64_t bytes,
         BdrvRequestFlags read_flags, BdrvRequestFlags write_flags)
 {
     return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
                                  read_flags, write_flags);
 }
 
-static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
-                                             BdrvChild *src,
-                                             uint64_t src_offset,
-                                             BdrvChild *dst,
-                                             uint64_t dst_offset,
-                                             uint64_t bytes,
-                                             BdrvRequestFlags read_flags,
-                                             BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_copy_range_to(BlockDriverState *bs,
+                     BdrvChild *src, int64_t src_offset,
+                     BdrvChild *dst, int64_t dst_offset,
+                     int64_t bytes, BdrvRequestFlags read_flags,
+                     BdrvRequestFlags write_flags)
 {
     RawPosixAIOData acb;
     BDRVRawState *s = bs->opaque;
@@ -3198,7 +3900,7 @@ static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
         },
     };
 
-    return raw_thread_pool_submit(bs, handle_aiocb_copy_range, &acb);
+    return raw_thread_pool_submit(handle_aiocb_copy_range, &acb);
 }
 
 BlockDriver bdrv_file = {
@@ -3228,15 +3930,13 @@ BlockDriver bdrv_file = {
     .bdrv_co_copy_range_from = raw_co_copy_range_from,
     .bdrv_co_copy_range_to  = raw_co_copy_range_to,
     .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
-    .bdrv_co_truncate = raw_co_truncate,
-    .bdrv_getlength = raw_getlength,
-    .bdrv_get_info = raw_get_info,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
+    .bdrv_co_truncate                   = raw_co_truncate,
+    .bdrv_co_getlength                  = raw_co_getlength,
+    .bdrv_co_get_info                   = raw_co_get_info,
+    .bdrv_get_specific_info             = raw_get_specific_info,
+    .bdrv_co_get_allocated_file_size    = raw_co_get_allocated_file_size,
     .bdrv_get_specific_stats = raw_get_specific_stats,
     .bdrv_check_perm = raw_check_perm,
     .bdrv_set_perm   = raw_set_perm,
@@ -3248,20 +3948,28 @@ BlockDriver bdrv_file = {
 /***********************************************/
 /* host device */
 
+#if defined(HAVE_HOST_BLOCK_DEVICE)
+
 #if defined(__APPLE__) && defined(__MACH__)
 static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
                                 CFIndex maxPathSize, int flags);
+
+#if !defined(MAC_OS_VERSION_12_0) \
+    || (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_12_0)
+#define IOMainPort IOMasterPort
+#endif
+
 static char *FindEjectableOpticalMedia(io_iterator_t *mediaIterator)
 {
     kern_return_t kernResult = KERN_FAILURE;
-    mach_port_t     masterPort;
+    mach_port_t mainPort;
     CFMutableDictionaryRef  classesToMatch;
     const char *matching_array[] = {kIODVDMediaClass, kIOCDMediaClass};
     char *mediaType = NULL;
 
-    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
+    kernResult = IOMainPort(MACH_PORT_NULL, &mainPort);
     if ( KERN_SUCCESS != kernResult ) {
-        printf( "IOMasterPort returned %d\n", kernResult );
+        printf("IOMainPort returned %d\n", kernResult);
     }
 
     int index;
@@ -3274,7 +3982,7 @@ static char *FindEjectableOpticalMedia(io_iterator_t *mediaIterator)
         }
         CFDictionarySetValue(classesToMatch, CFSTR(kIOMediaEjectableKey),
                              kCFBooleanTrue);
-        kernResult = IOServiceGetMatchingServices(masterPort, classesToMatch,
+        kernResult = IOServiceGetMatchingServices(mainPort, classesToMatch,
                                                   mediaIterator);
         if (kernResult != KERN_SUCCESS) {
             error_report("Note: IOServiceGetMatchingServices returned %d",
@@ -3520,7 +4228,7 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
         struct sg_io_hdr *io_hdr = buf;
         if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT ||
             io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) {
-            return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs),
+            return pr_manager_execute(s->pr_mgr, qemu_get_current_aio_context(),
                                       s->fd, io_hdr);
         }
     }
@@ -3536,22 +4244,12 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
         },
     };
 
-    return raw_thread_pool_submit(bs, handle_aiocb_ioctl, &acb);
+    return raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
 }
 #endif /* linux */
 
-static int fd_open(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-
-    /* this is just to ensure s->fd is sane (its called by io ops) */
-    if (s->fd >= 0)
-        return 0;
-    return -EIO;
-}
-
 static coroutine_fn int
-hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     BDRVRawState *s = bs->opaque;
     int ret;
@@ -3565,7 +4263,7 @@ hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
 }
 
 static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags)
+    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
 {
     int rc;
 
@@ -3602,15 +4300,13 @@ static BlockDriver bdrv_host_device = {
     .bdrv_co_copy_range_from = raw_co_copy_range_from,
     .bdrv_co_copy_range_to  = raw_co_copy_range_to,
     .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
-    .bdrv_co_truncate       = raw_co_truncate,
-    .bdrv_getlength    = raw_getlength,
-    .bdrv_get_info = raw_get_info,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
+    .bdrv_co_truncate                   = raw_co_truncate,
+    .bdrv_co_getlength                  = raw_co_getlength,
+    .bdrv_co_get_info                   = raw_co_get_info,
+    .bdrv_get_specific_info             = raw_get_specific_info,
+    .bdrv_co_get_allocated_file_size    = raw_co_get_allocated_file_size,
     .bdrv_get_specific_stats = hdev_get_specific_stats,
     .bdrv_check_perm = raw_check_perm,
     .bdrv_set_perm   = raw_set_perm,
@@ -3622,6 +4318,14 @@ static BlockDriver bdrv_host_device = {
 #ifdef __linux__
     .bdrv_co_ioctl          = hdev_co_ioctl,
 #endif
+
+    /* zoned device */
+#if defined(CONFIG_BLKZONED)
+    /* zone management operations */
+    .bdrv_co_zone_report = raw_co_zone_report,
+    .bdrv_co_zone_mgmt = raw_co_zone_mgmt,
+    .bdrv_co_zone_append = raw_co_zone_append,
+#endif
 };
 
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -3630,6 +4334,12 @@ static void cdrom_parse_filename(const char *filename, QDict *options,
 {
     bdrv_parse_filename_strip_prefix(filename, "host_cdrom:", options);
 }
+
+static void cdrom_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    bs->bl.has_variable_length = true;
+    raw_refresh_limits(bs, errp);
+}
 #endif
 
 #ifdef __linux__
@@ -3670,7 +4380,7 @@ out:
     return prio;
 }
 
-static bool cdrom_is_inserted(BlockDriverState *bs)
+static bool coroutine_fn cdrom_co_is_inserted(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
     int ret;
@@ -3679,7 +4389,7 @@ static bool cdrom_is_inserted(BlockDriverState *bs)
     return ret == CDS_DISC_OK;
 }
 
-static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
+static void coroutine_fn cdrom_co_eject(BlockDriverState *bs, bool eject_flag)
 {
     BDRVRawState *s = bs->opaque;
 
@@ -3692,7 +4402,7 @@ static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
     }
 }
 
-static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
+static void coroutine_fn cdrom_co_lock_medium(BlockDriverState *bs, bool locked)
 {
     BDRVRawState *s = bs->opaque;
 
@@ -3725,21 +4435,17 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_co_preadv         = raw_co_preadv,
     .bdrv_co_pwritev        = raw_co_pwritev,
     .bdrv_co_flush_to_disk  = raw_co_flush_to_disk,
-    .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
+    .bdrv_refresh_limits    = cdrom_refresh_limits,
     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
-    .bdrv_co_truncate    = raw_co_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
+    .bdrv_co_truncate                   = raw_co_truncate,
+    .bdrv_co_getlength                  = raw_co_getlength,
+    .bdrv_co_get_allocated_file_size    = raw_co_get_allocated_file_size,
 
     /* removable device support */
-    .bdrv_is_inserted   = cdrom_is_inserted,
-    .bdrv_eject         = cdrom_eject,
-    .bdrv_lock_medium   = cdrom_lock_medium,
+    .bdrv_co_is_inserted    = cdrom_co_is_inserted,
+    .bdrv_co_eject          = cdrom_co_eject,
+    .bdrv_co_lock_medium    = cdrom_co_lock_medium,
 
     /* generic scsi device */
     .bdrv_co_ioctl      = hdev_co_ioctl,
@@ -3796,12 +4502,12 @@ static int cdrom_reopen(BlockDriverState *bs)
     return 0;
 }
 
-static bool cdrom_is_inserted(BlockDriverState *bs)
+static bool coroutine_fn cdrom_co_is_inserted(BlockDriverState *bs)
 {
     return raw_getlength(bs) > 0;
 }
 
-static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
+static void coroutine_fn cdrom_co_eject(BlockDriverState *bs, bool eject_flag)
 {
     BDRVRawState *s = bs->opaque;
 
@@ -3821,7 +4527,7 @@ static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
     cdrom_reopen(bs);
 }
 
-static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
+static void coroutine_fn cdrom_co_lock_medium(BlockDriverState *bs, bool locked)
 {
     BDRVRawState *s = bs->opaque;
 
@@ -3855,24 +4561,22 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_co_preadv         = raw_co_preadv,
     .bdrv_co_pwritev        = raw_co_pwritev,
     .bdrv_co_flush_to_disk  = raw_co_flush_to_disk,
-    .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
+    .bdrv_refresh_limits    = cdrom_refresh_limits,
     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
-    .bdrv_co_truncate    = raw_co_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
+    .bdrv_co_truncate                   = raw_co_truncate,
+    .bdrv_co_getlength                  = raw_co_getlength,
+    .bdrv_co_get_allocated_file_size    = raw_co_get_allocated_file_size,
 
     /* removable device support */
-    .bdrv_is_inserted   = cdrom_is_inserted,
-    .bdrv_eject         = cdrom_eject,
-    .bdrv_lock_medium   = cdrom_lock_medium,
+    .bdrv_co_is_inserted     = cdrom_co_is_inserted,
+    .bdrv_co_eject           = cdrom_co_eject,
+    .bdrv_co_lock_medium     = cdrom_co_lock_medium,
 };
 #endif /* __FreeBSD__ */
 
+#endif /* HAVE_HOST_BLOCK_DEVICE */
+
 static void bdrv_file_init(void)
 {
     /*
@@ -3880,6 +4584,7 @@ static void bdrv_file_init(void)
      * registered last will get probed first.
      */
     bdrv_register(&bdrv_file);
+#if defined(HAVE_HOST_BLOCK_DEVICE)
     bdrv_register(&bdrv_host_device);
 #ifdef __linux__
     bdrv_register(&bdrv_host_cdrom);
@@ -3887,6 +4592,7 @@ static void bdrv_file_init(void)
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
     bdrv_register(&bdrv_host_cdrom);
 #endif
+#endif /* HAVE_HOST_BLOCK_DEVICE */
 }
 
 block_init(bdrv_file_init);
index 2642088bd6eecb87e9032b135ccc6ec629c8f27b..48b790d91739429e6f93ce6e9cbb5d5957850ba3 100644 (file)
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/cutils.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
@@ -58,6 +59,10 @@ typedef struct BDRVRawState {
     QEMUWin32AIOState *aio;
 } BDRVRawState;
 
+typedef struct BDRVRawReopenState {
+    HANDLE hfile;
+} BDRVRawReopenState;
+
 /*
  * Read/writes the data to/from a given linear buffer.
  *
@@ -148,7 +153,6 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
         BlockCompletionFunc *cb, void *opaque, int type)
 {
     RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
-    ThreadPool *pool;
 
     acb->bs = bs;
     acb->hfile = hfile;
@@ -163,8 +167,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
     acb->aio_offset = offset;
 
     trace_file_paio_submit(acb, opaque, offset, count, type);
-    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }
 
 int qemu_ftruncate64(int fd, int64_t length)
@@ -392,7 +395,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     s->hfile = CreateFile(filename, access_flags,
-                          FILE_SHARE_READ, NULL,
+                          FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
                           OPEN_EXISTING, overlapped, NULL);
     if (s->hfile == INVALID_HANDLE_VALUE) {
         int err = GetLastError();
@@ -436,8 +439,8 @@ fail:
 }
 
 static BlockAIOCB *raw_aio_preadv(BlockDriverState *bs,
-                                  uint64_t offset, uint64_t bytes,
-                                  QEMUIOVector *qiov, int flags,
+                                  int64_t offset, int64_t bytes,
+                                  QEMUIOVector *qiov, BdrvRequestFlags flags,
                                   BlockCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
@@ -451,8 +454,8 @@ static BlockAIOCB *raw_aio_preadv(BlockDriverState *bs,
 }
 
 static BlockAIOCB *raw_aio_pwritev(BlockDriverState *bs,
-                                   uint64_t offset, uint64_t bytes,
-                                   QEMUIOVector *qiov, int flags,
+                                   int64_t offset, int64_t bytes,
+                                   QEMUIOVector *qiov, BdrvRequestFlags flags,
                                    BlockCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
@@ -521,7 +524,7 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
     return 0;
 }
 
-static int64_t raw_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
     LARGE_INTEGER l;
@@ -554,7 +557,7 @@ static int64_t raw_getlength(BlockDriverState *bs)
     return l.QuadPart;
 }
 
-static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
+static int64_t coroutine_fn raw_co_get_allocated_file_size(BlockDriverState *bs)
 {
     typedef DWORD (WINAPI * get_compressed_t)(const char *filename,
                                               DWORD * high);
@@ -608,10 +611,9 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp)
     return 0;
 }
 
-static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions options;
     int64_t total_size = 0;
@@ -634,6 +636,97 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
     return raw_co_create(&options, errp);
 }
 
+static int raw_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue, Error **errp)
+{
+    BDRVRawState *s = state->bs->opaque;
+    BDRVRawReopenState *rs;
+    int access_flags;
+    DWORD overlapped;
+    int ret = 0;
+
+    if (s->type != FTYPE_FILE) {
+        error_setg(errp, "Can only reopen files");
+        return -EINVAL;
+    }
+
+    rs = g_new0(BDRVRawReopenState, 1);
+
+    /*
+     * We do not support changing any options (only flags). By leaving
+     * all options in state->options, we tell the generic reopen code
+     * that we do not support changing any of them, so it will verify
+     * that their values did not change.
+     */
+
+    raw_parse_flags(state->flags, s->aio != NULL, &access_flags, &overlapped);
+    rs->hfile = CreateFile(state->bs->filename, access_flags,
+                           FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+                           OPEN_EXISTING, overlapped, NULL);
+
+    if (rs->hfile == INVALID_HANDLE_VALUE) {
+        int err = GetLastError();
+
+        error_setg_win32(errp, err, "Could not reopen '%s'",
+                         state->bs->filename);
+        if (err == ERROR_ACCESS_DENIED) {
+            ret = -EACCES;
+        } else {
+            ret = -EINVAL;
+        }
+        goto fail;
+    }
+
+    if (s->aio) {
+        ret = win32_aio_attach(s->aio, rs->hfile);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Could not enable AIO");
+            CloseHandle(rs->hfile);
+            goto fail;
+        }
+    }
+
+    state->opaque = rs;
+
+    return 0;
+
+fail:
+    g_free(rs);
+    state->opaque = NULL;
+
+    return ret;
+}
+
+static void raw_reopen_commit(BDRVReopenState *state)
+{
+    BDRVRawState *s = state->bs->opaque;
+    BDRVRawReopenState *rs = state->opaque;
+
+    assert(rs != NULL);
+
+    CloseHandle(s->hfile);
+    s->hfile = rs->hfile;
+
+    g_free(rs);
+    state->opaque = NULL;
+}
+
+static void raw_reopen_abort(BDRVReopenState *state)
+{
+    BDRVRawReopenState *rs = state->opaque;
+
+    if (!rs) {
+        return;
+    }
+
+    if (rs->hfile != INVALID_HANDLE_VALUE) {
+        CloseHandle(rs->hfile);
+    }
+
+    g_free(rs);
+    state->opaque = NULL;
+}
+
 static QemuOptsList raw_create_opts = {
     .name = "raw-create-opts",
     .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
@@ -659,14 +752,18 @@ BlockDriver bdrv_file = {
     .bdrv_co_create_opts = raw_co_create_opts,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
 
+    .bdrv_reopen_prepare = raw_reopen_prepare,
+    .bdrv_reopen_commit  = raw_reopen_commit,
+    .bdrv_reopen_abort   = raw_reopen_abort,
+
     .bdrv_aio_preadv    = raw_aio_preadv,
     .bdrv_aio_pwritev   = raw_aio_pwritev,
     .bdrv_aio_flush     = raw_aio_flush,
 
     .bdrv_co_truncate   = raw_co_truncate,
-    .bdrv_getlength    = raw_getlength,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
+    .bdrv_co_getlength  = raw_co_getlength,
+    .bdrv_co_get_allocated_file_size
+                        = raw_co_get_allocated_file_size,
 
     .create_opts        = &raw_create_opts,
 };
@@ -739,6 +836,7 @@ static void hdev_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     /* XXX Does Windows support AIO on less than 512-byte alignment? */
     bs->bl.request_alignment = 512;
+    bs->bl.has_variable_length = true;
 }
 
 static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
@@ -833,11 +931,8 @@ static BlockDriver bdrv_host_device = {
     .bdrv_detach_aio_context = raw_detach_aio_context,
     .bdrv_attach_aio_context = raw_attach_aio_context,
 
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
+    .bdrv_co_getlength                = raw_co_getlength,
+    .bdrv_co_get_allocated_file_size  = raw_co_get_allocated_file_size,
 };
 
 static void bdrv_file_init(void)
index 5136371bf8b98cbd3dd1a4941bfa591cd01186ca..9b68a2be645f0d56d78e36667c677102b88c4ac3 100644 (file)
@@ -22,6 +22,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
 static int compress_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp)
 {
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    int ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!bs->file->bs->drv || !block_driver_can_compress(bs->file->bs->drv)) {
         error_setg(errp,
                    "Compression is not supported for underlying format: %s",
@@ -56,50 +57,50 @@ static int compress_open(BlockDriverState *bs, QDict *options, int flags,
 }
 
 
-static int64_t compress_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+compress_co_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
+    return bdrv_co_getlength(bs->file->bs);
 }
 
 
-static int coroutine_fn compress_co_preadv_part(BlockDriverState *bs,
-                                                uint64_t offset, uint64_t bytes,
-                                                QEMUIOVector *qiov,
-                                                size_t qiov_offset,
-                                                int flags)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        QEMUIOVector *qiov, size_t qiov_offset,
+                        BdrvRequestFlags flags)
 {
     return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
                                flags);
 }
 
 
-static int coroutine_fn compress_co_pwritev_part(BlockDriverState *bs,
-                                                 uint64_t offset,
-                                                 uint64_t bytes,
-                                                 QEMUIOVector *qiov,
-                                                 size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                         QEMUIOVector *qiov, size_t qiov_offset,
+                         BdrvRequestFlags flags)
 {
     return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
                                 flags | BDRV_REQ_WRITE_COMPRESSED);
 }
 
 
-static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
 
-static int coroutine_fn compress_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int bytes)
+static int coroutine_fn GRAPH_RDLOCK
+compress_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
 
-static void compress_refresh_limits(BlockDriverState *bs, Error **errp)
+static void GRAPH_RDLOCK
+compress_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BlockDriverInfo bdi;
     int ret;
@@ -117,15 +118,17 @@ static void compress_refresh_limits(BlockDriverState *bs, Error **errp)
 }
 
 
-static void compress_eject(BlockDriverState *bs, bool eject_flag)
+static void coroutine_fn GRAPH_RDLOCK
+compress_co_eject(BlockDriverState *bs, bool eject_flag)
 {
-    bdrv_eject(bs->file->bs, eject_flag);
+    bdrv_co_eject(bs->file->bs, eject_flag);
 }
 
 
-static void compress_lock_medium(BlockDriverState *bs, bool locked)
+static void coroutine_fn GRAPH_RDLOCK
+compress_co_lock_medium(BlockDriverState *bs, bool locked)
 {
-    bdrv_lock_medium(bs->file->bs, locked);
+    bdrv_co_lock_medium(bs->file->bs, locked);
 }
 
 
@@ -135,7 +138,7 @@ static BlockDriver bdrv_compress = {
     .bdrv_open                          = compress_open,
     .bdrv_child_perm                    = bdrv_default_perms,
 
-    .bdrv_getlength                     = compress_getlength,
+    .bdrv_co_getlength                  = compress_co_getlength,
 
     .bdrv_co_preadv_part                = compress_co_preadv_part,
     .bdrv_co_pwritev_part               = compress_co_pwritev_part,
@@ -143,10 +146,9 @@ static BlockDriver bdrv_compress = {
     .bdrv_co_pdiscard                   = compress_co_pdiscard,
     .bdrv_refresh_limits                = compress_refresh_limits,
 
-    .bdrv_eject                         = compress_eject,
-    .bdrv_lock_medium                   = compress_lock_medium,
+    .bdrv_co_eject                      = compress_co_eject,
+    .bdrv_co_lock_medium                = compress_co_lock_medium,
 
-    .has_variable_length                = true,
     .is_filter                          = true,
 };
 
index 4f1448e2bc88e7990beb431337d11ac715490d21..cc74af06dc56e8891a3313368298e2bb90a22bfd 100644 (file)
@@ -11,6 +11,7 @@
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include <glusterfs/api/glfs.h>
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "block/qdict.h"
 #include "qapi/error.h"
@@ -359,8 +360,8 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
         return -EINVAL;
     }
 
-    gconf->server = g_new0(SocketAddressList, 1);
-    gconf->server->value = gsconf = g_new0(SocketAddress, 1);
+    gsconf = g_new0(SocketAddress, 1);
+    QAPI_LIST_PREPEND(gconf->server, gsconf);
 
     /* transport */
     if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
@@ -423,7 +424,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
     int ret;
     int old_errno;
     SocketAddressList *server;
-    unsigned long long port;
+    uint64_t port;
 
     glfs = glfs_find_preopened(gconf->volume);
     if (glfs) {
@@ -444,7 +445,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
                                    server->value->u.q_unix.path, 0);
             break;
         case SOCKET_ADDRESS_TYPE_INET:
-            if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 ||
+            if (parse_uint_full(server->value->u.inet.port, 10, &port) < 0 ||
                 port > 65535) {
                 error_setg(errp, "'%s' is not a valid port number",
                            server->value->u.inet.port);
@@ -514,7 +515,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
 {
     QemuOpts *opts;
     SocketAddress *gsconf = NULL;
-    SocketAddressList *curr = NULL;
+    SocketAddressList **tail;
     QDict *backing_options = NULL;
     Error *local_err = NULL;
     char *str = NULL;
@@ -547,6 +548,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
     }
     gconf->path = g_strdup(ptr);
     qemu_opts_del(opts);
+    tail = &gconf->server;
 
     for (i = 0; i < num_servers; i++) {
         str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i);
@@ -655,15 +657,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
             qemu_opts_del(opts);
         }
 
-        if (gconf->server == NULL) {
-            gconf->server = g_new0(SocketAddressList, 1);
-            gconf->server->value = gsconf;
-            curr = gconf->server;
-        } else {
-            curr->next = g_new0(SocketAddressList, 1);
-            curr->next->value = gsconf;
-            curr = curr->next;
-        }
+        QAPI_LIST_APPEND(tail, gsconf);
         gsconf = NULL;
 
         qobject_unref(backing_options);
@@ -837,7 +831,6 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
     s->logfile = g_strdup(logfile ? logfile : GLUSTER_LOGFILE_DEFAULT);
 
     gconf->logfile = g_strdup(s->logfile);
-    gconf->has_logfile = true;
 
     s->glfs = qemu_gluster_init(gconf, filename, options, errp);
     if (!s->glfs) {
@@ -870,11 +863,13 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
     if (ret == -EACCES || ret == -EROFS) {
         /* Try to degrade to read-only, but if it doesn't work, still use the
          * normal error message. */
+        bdrv_graph_rdlock_main_loop();
         if (bdrv_apply_auto_read_only(bs, NULL, NULL) == 0) {
             open_flags = (open_flags & ~O_RDWR) | O_RDONLY;
             s->fd = glfs_open(s->glfs, gconf->path, open_flags);
             ret = s->fd ? 0 : -errno;
         }
+        bdrv_graph_rdunlock_main_loop();
     }
 
     s->supports_seek_data = qemu_gluster_test_seek(s->fd);
@@ -898,6 +893,7 @@ out:
 static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
+    bs->bl.max_pdiscard = MIN(SIZE_MAX, INT64_MAX);
 }
 
 static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
@@ -923,7 +919,6 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
     gconf->debug = s->debug;
     gconf->has_debug = true;
     gconf->logfile = g_strdup(s->logfile);
-    gconf->has_logfile = true;
 
     /*
      * If 'state->bs->exact_filename' is empty, 'state->options' should contain
@@ -1010,19 +1005,19 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
 static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
                                                       int64_t offset,
-                                                      int size,
+                                                      int64_t bytes,
                                                       BdrvRequestFlags flags)
 {
     int ret;
     GlusterAIOCB acb;
     BDRVGlusterState *s = bs->opaque;
 
-    acb.size = size;
+    acb.size = bytes;
     acb.ret = 0;
     acb.coroutine = qemu_coroutine_self();
     acb.aio_context = bdrv_get_aio_context(bs);
 
-    ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
+    ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
     if (ret < 0) {
         return -errno;
     }
@@ -1168,7 +1163,6 @@ static int coroutine_fn qemu_gluster_co_create_opts(BlockDriver *drv,
     if (!gconf->logfile) {
         gconf->logfile = g_strdup(GLUSTER_LOGFILE_DEFAULT);
     }
-    gconf->has_logfile = true;
 
     ret = qemu_gluster_parse(gconf, filename, NULL, errp);
     if (ret < 0) {
@@ -1242,7 +1236,6 @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
                                                QEMUIOVector *qiov,
                                                int flags)
 {
-    assert(!flags);
     return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
 }
 
@@ -1304,18 +1297,20 @@ error:
 
 #ifdef CONFIG_GLUSTERFS_DISCARD
 static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
-                                                 int64_t offset, int size)
+                                                 int64_t offset, int64_t bytes)
 {
     int ret;
     GlusterAIOCB acb;
     BDRVGlusterState *s = bs->opaque;
 
+    assert(bytes <= SIZE_MAX); /* rely on max_pdiscard */
+
     acb.size = 0;
     acb.ret = 0;
     acb.coroutine = qemu_coroutine_self();
     acb.aio_context = bdrv_get_aio_context(bs);
 
-    ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
+    ret = glfs_discard_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
     if (ret < 0) {
         return -errno;
     }
@@ -1325,7 +1320,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
 }
 #endif
 
-static int64_t qemu_gluster_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn qemu_gluster_co_getlength(BlockDriverState *bs)
 {
     BDRVGlusterState *s = bs->opaque;
     int64_t ret;
@@ -1338,7 +1333,8 @@ static int64_t qemu_gluster_getlength(BlockDriverState *bs)
     }
 }
 
-static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
+static int64_t coroutine_fn
+qemu_gluster_co_get_allocated_file_size(BlockDriverState *bs)
 {
     BDRVGlusterState *s = bs->opaque;
     struct stat st;
@@ -1468,7 +1464,8 @@ exit:
  * the specified offset) that are known to be in the same
  * allocated/unallocated state.
  *
- * 'bytes' is the max value 'pnum' should be set to.
+ * 'bytes' is a soft cap for 'pnum'.  If the information is free, 'pnum' may
+ * well exceed it.
  *
  * (Based on raw_co_block_status() from file-posix.c.)
  */
@@ -1484,6 +1481,8 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
     off_t data = 0, hole = 0;
     int ret = -EINVAL;
 
+    assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
+
     if (!s->fd) {
         return ret;
     }
@@ -1507,12 +1506,26 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
     } else if (data == offset) {
         /* On a data extent, compute bytes to the end of the extent,
          * possibly including a partial sector at EOF. */
-        *pnum = MIN(bytes, hole - offset);
+        *pnum = hole - offset;
+
+        /*
+         * We are not allowed to return partial sectors, though, so
+         * round up if necessary.
+         */
+        if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) {
+            int64_t file_length = qemu_gluster_co_getlength(bs);
+            if (file_length > 0) {
+                /* Ignore errors, this is just a safeguard */
+                assert(hole == file_length);
+            }
+            *pnum = ROUND_UP(*pnum, bs->bl.request_alignment);
+        }
+
         ret = BDRV_BLOCK_DATA;
     } else {
         /* On a hole, compute bytes to the beginning of the next extent.  */
         assert(hole == offset);
-        *pnum = MIN(bytes, data - offset);
+        *pnum = data - offset;
         ret = BDRV_BLOCK_ZERO;
     }
 
@@ -1542,7 +1555,6 @@ static BlockDriver bdrv_gluster = {
     .format_name                  = "gluster",
     .protocol_name                = "gluster",
     .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = false,
     .bdrv_file_open               = qemu_gluster_open,
     .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
     .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
@@ -1550,8 +1562,8 @@ static BlockDriver bdrv_gluster = {
     .bdrv_close                   = qemu_gluster_close,
     .bdrv_co_create               = qemu_gluster_co_create,
     .bdrv_co_create_opts          = qemu_gluster_co_create_opts,
-    .bdrv_getlength               = qemu_gluster_getlength,
-    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_co_getlength            = qemu_gluster_co_getlength,
+    .bdrv_co_get_allocated_file_size = qemu_gluster_co_get_allocated_file_size,
     .bdrv_co_truncate             = qemu_gluster_co_truncate,
     .bdrv_co_readv                = qemu_gluster_co_readv,
     .bdrv_co_writev               = qemu_gluster_co_writev,
@@ -1572,7 +1584,6 @@ static BlockDriver bdrv_gluster_tcp = {
     .format_name                  = "gluster",
     .protocol_name                = "gluster+tcp",
     .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = false,
     .bdrv_file_open               = qemu_gluster_open,
     .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
     .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
@@ -1580,8 +1591,8 @@ static BlockDriver bdrv_gluster_tcp = {
     .bdrv_close                   = qemu_gluster_close,
     .bdrv_co_create               = qemu_gluster_co_create,
     .bdrv_co_create_opts          = qemu_gluster_co_create_opts,
-    .bdrv_getlength               = qemu_gluster_getlength,
-    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_co_getlength            = qemu_gluster_co_getlength,
+    .bdrv_co_get_allocated_file_size = qemu_gluster_co_get_allocated_file_size,
     .bdrv_co_truncate             = qemu_gluster_co_truncate,
     .bdrv_co_readv                = qemu_gluster_co_readv,
     .bdrv_co_writev               = qemu_gluster_co_writev,
@@ -1602,7 +1613,6 @@ static BlockDriver bdrv_gluster_unix = {
     .format_name                  = "gluster",
     .protocol_name                = "gluster+unix",
     .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
     .bdrv_file_open               = qemu_gluster_open,
     .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
     .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
@@ -1610,8 +1620,8 @@ static BlockDriver bdrv_gluster_unix = {
     .bdrv_close                   = qemu_gluster_close,
     .bdrv_co_create               = qemu_gluster_co_create,
     .bdrv_co_create_opts          = qemu_gluster_co_create_opts,
-    .bdrv_getlength               = qemu_gluster_getlength,
-    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_co_getlength            = qemu_gluster_co_getlength,
+    .bdrv_co_get_allocated_file_size = qemu_gluster_co_get_allocated_file_size,
     .bdrv_co_truncate             = qemu_gluster_co_truncate,
     .bdrv_co_readv                = qemu_gluster_co_readv,
     .bdrv_co_writev               = qemu_gluster_co_writev,
@@ -1638,7 +1648,6 @@ static BlockDriver bdrv_gluster_rdma = {
     .format_name                  = "gluster",
     .protocol_name                = "gluster+rdma",
     .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
     .bdrv_file_open               = qemu_gluster_open,
     .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
     .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
@@ -1646,8 +1655,8 @@ static BlockDriver bdrv_gluster_rdma = {
     .bdrv_close                   = qemu_gluster_close,
     .bdrv_co_create               = qemu_gluster_co_create,
     .bdrv_co_create_opts          = qemu_gluster_co_create_opts,
-    .bdrv_getlength               = qemu_gluster_getlength,
-    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_co_getlength            = qemu_gluster_co_getlength,
+    .bdrv_co_get_allocated_file_size = qemu_gluster_co_get_allocated_file_size,
     .bdrv_co_truncate             = qemu_gluster_co_truncate,
     .bdrv_co_readv                = qemu_gluster_co_readv,
     .bdrv_co_writev               = qemu_gluster_co_writev,
diff --git a/block/graph-lock.c b/block/graph-lock.c
new file mode 100644 (file)
index 0000000..079e878
--- /dev/null
@@ -0,0 +1,321 @@
+/*
+ * Graph lock: rwlock to protect block layer graph manipulations (add/remove
+ * edges and nodes)
+ *
+ *  Copyright (c) 2022 Red Hat
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "block/graph-lock.h"
+#include "block/block.h"
+#include "block/block_int.h"
+
+/* Dummy lock object to use for Thread Safety Analysis (TSA) */
+BdrvGraphLock graph_lock;
+
+/* Protects the list of aiocontext and orphaned_reader_count */
+static QemuMutex aio_context_list_lock;
+
+/* Written and read with atomic operations. */
+static int has_writer;
+
+/*
+ * A reader coroutine could move from an AioContext to another.
+ * If this happens, there is no problem from the point of view of
+ * counters. The problem is that the total count becomes
+ * unbalanced if one of the two AioContexts gets deleted.
+ * The count of readers must remain correct, so the AioContext's
+ * balance is transferred to this glboal variable.
+ * Protected by aio_context_list_lock.
+ */
+static uint32_t orphaned_reader_count;
+
+/* Queue of readers waiting for the writer to finish */
+static CoQueue reader_queue;
+
+struct BdrvGraphRWlock {
+    /* How many readers are currently reading the graph. */
+    uint32_t reader_count;
+
+    /*
+     * List of BdrvGraphRWlock kept in graph-lock.c
+     * Protected by aio_context_list_lock
+     */
+    QTAILQ_ENTRY(BdrvGraphRWlock) next_aio;
+};
+
+/*
+ * List of BdrvGraphRWlock. This list ensures that each BdrvGraphRWlock
+ * can safely modify only its own counter, avoid reading/writing
+ * others and thus improving performances by avoiding cacheline bounces.
+ */
+static QTAILQ_HEAD(, BdrvGraphRWlock) aio_context_list =
+    QTAILQ_HEAD_INITIALIZER(aio_context_list);
+
+static void __attribute__((__constructor__)) bdrv_init_graph_lock(void)
+{
+    qemu_mutex_init(&aio_context_list_lock);
+    qemu_co_queue_init(&reader_queue);
+}
+
+void register_aiocontext(AioContext *ctx)
+{
+    ctx->bdrv_graph = g_new0(BdrvGraphRWlock, 1);
+    QEMU_LOCK_GUARD(&aio_context_list_lock);
+    assert(ctx->bdrv_graph->reader_count == 0);
+    QTAILQ_INSERT_TAIL(&aio_context_list, ctx->bdrv_graph, next_aio);
+}
+
+void unregister_aiocontext(AioContext *ctx)
+{
+    QEMU_LOCK_GUARD(&aio_context_list_lock);
+    orphaned_reader_count += ctx->bdrv_graph->reader_count;
+    QTAILQ_REMOVE(&aio_context_list, ctx->bdrv_graph, next_aio);
+    g_free(ctx->bdrv_graph);
+}
+
+static uint32_t reader_count(void)
+{
+    BdrvGraphRWlock *brdv_graph;
+    uint32_t rd;
+
+    QEMU_LOCK_GUARD(&aio_context_list_lock);
+
+    /* rd can temporarily be negative, but the total will *always* be >= 0 */
+    rd = orphaned_reader_count;
+    QTAILQ_FOREACH(brdv_graph, &aio_context_list, next_aio) {
+        rd += qatomic_read(&brdv_graph->reader_count);
+    }
+
+    /* shouldn't overflow unless there are 2^31 readers */
+    assert((int32_t)rd >= 0);
+    return rd;
+}
+
+void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs)
+{
+    AioContext *ctx = NULL;
+
+    GLOBAL_STATE_CODE();
+    assert(!qatomic_read(&has_writer));
+    assert(!qemu_in_coroutine());
+
+    /*
+     * Release only non-mainloop AioContext. The mainloop often relies on the
+     * BQL and doesn't lock the main AioContext before doing things.
+     */
+    if (bs) {
+        ctx = bdrv_get_aio_context(bs);
+        if (ctx != qemu_get_aio_context()) {
+            aio_context_release(ctx);
+        } else {
+            ctx = NULL;
+        }
+    }
+
+    /* Make sure that constantly arriving new I/O doesn't cause starvation */
+    bdrv_drain_all_begin_nopoll();
+
+    /*
+     * reader_count == 0: this means writer will read has_reader as 1
+     * reader_count >= 1: we don't know if writer read has_writer == 0 or 1,
+     *                    but we need to wait.
+     * Wait by allowing other coroutine (and possible readers) to continue.
+     */
+    do {
+        /*
+         * has_writer must be 0 while polling, otherwise we get a deadlock if
+         * any callback involved during AIO_WAIT_WHILE() tries to acquire the
+         * reader lock.
+         */
+        qatomic_set(&has_writer, 0);
+        AIO_WAIT_WHILE_UNLOCKED(NULL, reader_count() >= 1);
+        qatomic_set(&has_writer, 1);
+
+        /*
+         * We want to only check reader_count() after has_writer = 1 is visible
+         * to other threads. That way no more readers can sneak in after we've
+         * determined reader_count() == 0.
+         */
+        smp_mb();
+    } while (reader_count() >= 1);
+
+    bdrv_drain_all_end();
+
+    if (ctx) {
+        aio_context_acquire(bdrv_get_aio_context(bs));
+    }
+}
+
+void no_coroutine_fn bdrv_graph_wrunlock_ctx(AioContext *ctx)
+{
+    GLOBAL_STATE_CODE();
+    assert(qatomic_read(&has_writer));
+
+    /*
+     * Release only non-mainloop AioContext. The mainloop often relies on the
+     * BQL and doesn't lock the main AioContext before doing things.
+     */
+    if (ctx && ctx != qemu_get_aio_context()) {
+        aio_context_release(ctx);
+    } else {
+        ctx = NULL;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) {
+        /*
+         * No need for memory barriers, this works in pair with
+         * the slow path of rdlock() and both take the lock.
+         */
+        qatomic_store_release(&has_writer, 0);
+
+        /* Wake up all coroutines that are waiting to read the graph */
+        qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+    }
+
+    /*
+     * Run any BHs that were scheduled during the wrlock section and that
+     * callers might expect to have finished (in particular, this is important
+     * for bdrv_schedule_unref()).
+     *
+     * Do this only after restarting coroutines so that nested event loops in
+     * BHs don't deadlock if their condition relies on the coroutine making
+     * progress.
+     */
+    aio_bh_poll(qemu_get_aio_context());
+
+    if (ctx) {
+        aio_context_acquire(ctx);
+    }
+}
+
+void no_coroutine_fn bdrv_graph_wrunlock(BlockDriverState *bs)
+{
+    AioContext *ctx = bs ? bdrv_get_aio_context(bs) : NULL;
+
+    bdrv_graph_wrunlock_ctx(ctx);
+}
+
+void coroutine_fn bdrv_graph_co_rdlock(void)
+{
+    BdrvGraphRWlock *bdrv_graph;
+    bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
+
+    for (;;) {
+        qatomic_set(&bdrv_graph->reader_count,
+                    bdrv_graph->reader_count + 1);
+        /* make sure writer sees reader_count before we check has_writer */
+        smp_mb();
+
+        /*
+         * has_writer == 0: this means writer will read reader_count as >= 1
+         * has_writer == 1: we don't know if writer read reader_count == 0
+         *                  or > 0, but we need to wait anyways because
+         *                  it will write.
+         */
+        if (!qatomic_read(&has_writer)) {
+            break;
+        }
+
+        /*
+         * Synchronize access with reader_count() in bdrv_graph_wrlock().
+         * Case 1:
+         * If this critical section gets executed first, reader_count will
+         * decrease and the reader will go to sleep.
+         * Then the writer will read reader_count that does not take into
+         * account this reader, and if there's no other reader it will
+         * enter the write section.
+         * Case 2:
+         * If reader_count() critical section gets executed first,
+         * then writer will read reader_count >= 1.
+         * It will wait in AIO_WAIT_WHILE(), but once it releases the lock
+         * we will enter this critical section and call aio_wait_kick().
+         */
+        WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) {
+            /*
+             * Additional check when we use the above lock to synchronize
+             * with bdrv_graph_wrunlock().
+             * Case 1:
+             * If this gets executed first, has_writer is still 1, so we reduce
+             * reader_count and go to sleep.
+             * Then the writer will set has_writer to 0 and wake up all readers,
+             * us included.
+             * Case 2:
+             * If bdrv_graph_wrunlock() critical section gets executed first,
+             * then it will set has_writer to 0 and wake up all other readers.
+             * Then we execute this critical section, and therefore must check
+             * again for has_writer, otherwise we sleep without any writer
+             * actually running.
+             */
+            if (!qatomic_read(&has_writer)) {
+                return;
+            }
+
+            /* slow path where reader sleeps */
+            bdrv_graph->reader_count--;
+            aio_wait_kick();
+            qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
+        }
+    }
+}
+
+void coroutine_fn bdrv_graph_co_rdunlock(void)
+{
+    BdrvGraphRWlock *bdrv_graph;
+    bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
+
+    qatomic_store_release(&bdrv_graph->reader_count,
+                          bdrv_graph->reader_count - 1);
+    /* make sure writer sees reader_count before we check has_writer */
+    smp_mb();
+
+    /*
+     * has_writer == 0: this means reader will read reader_count decreased
+     * has_writer == 1: we don't know if writer read reader_count old or
+     *                  new. Therefore, kick again so on next iteration
+     *                  writer will for sure read the updated value.
+     */
+    if (qatomic_read(&has_writer)) {
+        aio_wait_kick();
+    }
+}
+
+void bdrv_graph_rdlock_main_loop(void)
+{
+    GLOBAL_STATE_CODE();
+    assert(!qemu_in_coroutine());
+}
+
+void bdrv_graph_rdunlock_main_loop(void)
+{
+    GLOBAL_STATE_CODE();
+    assert(!qemu_in_coroutine());
+}
+
+void assert_bdrv_graph_readable(void)
+{
+    /* reader_count() is slow due to aio_context_list_lock lock contention */
+#ifdef CONFIG_DEBUG_GRAPH_LOCK
+    assert(qemu_in_main_thread() || reader_count());
+#endif
+}
+
+void assert_bdrv_graph_writable(void)
+{
+    assert(qemu_in_main_thread());
+    assert(qatomic_read(&has_writer));
+}
index a9f56a9ab1c56a3ca83833bfb0fa07fef17df0cf..d202987770d6ab1891a1d338b30c4f2efc4835b7 100644 (file)
 #include "block/blockjob_int.h"
 #include "block/block_int.h"
 #include "block/coroutines.h"
+#include "block/dirty-bitmap.h"
+#include "block/write-threshold.h"
 #include "qemu/cutils.h"
+#include "qemu/memalign.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
 #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
 
-static void bdrv_parent_cb_resize(BlockDriverState *bs);
+static void coroutine_fn GRAPH_RDLOCK
+bdrv_parent_cb_resize(BlockDriverState *bs);
+
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags);
+    int64_t offset, int64_t bytes, BdrvRequestFlags flags);
 
-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
-                                      bool ignore_bds_parents)
+static void GRAPH_RDLOCK
+bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
 {
     BdrvChild *c, *next;
+    IO_OR_GS_CODE();
+    assert_bdrv_graph_readable();
 
     QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
-        if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+        if (c == ignore) {
             continue;
         }
-        bdrv_parent_drained_begin_single(c, false);
+        bdrv_parent_drained_begin_single(c);
     }
 }
 
-static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
-                                                   int *drained_end_counter)
+void bdrv_parent_drained_end_single(BdrvChild *c)
 {
-    assert(c->parent_quiesce_counter > 0);
-    c->parent_quiesce_counter--;
+    GLOBAL_STATE_CODE();
+
+    assert(c->quiesced_parent);
+    c->quiesced_parent = false;
+
     if (c->klass->drained_end) {
-        c->klass->drained_end(c, drained_end_counter);
+        c->klass->drained_end(c);
     }
 }
 
-void bdrv_parent_drained_end_single(BdrvChild *c)
-{
-    int drained_end_counter = 0;
-    bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
-    BDRV_POLL_WHILE(c->bs, qatomic_read(&drained_end_counter) > 0);
-}
-
-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
-                                    bool ignore_bds_parents,
-                                    int *drained_end_counter)
+static void GRAPH_RDLOCK
+bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
 {
     BdrvChild *c;
+    IO_OR_GS_CODE();
+    assert_bdrv_graph_readable();
 
     QLIST_FOREACH(c, &bs->parents, next_parent) {
-        if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+        if (c == ignore) {
             continue;
         }
-        bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
+        bdrv_parent_drained_end_single(c);
     }
 }
 
-static bool bdrv_parent_drained_poll_single(BdrvChild *c)
+bool bdrv_parent_drained_poll_single(BdrvChild *c)
 {
+    IO_OR_GS_CODE();
+
     if (c->klass->drained_poll) {
         return c->klass->drained_poll(c);
     }
     return false;
 }
 
-static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
-                                     bool ignore_bds_parents)
+static bool GRAPH_RDLOCK
+bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
+                         bool ignore_bds_parents)
 {
     BdrvChild *c, *next;
     bool busy = false;
+    IO_OR_GS_CODE();
+    assert_bdrv_graph_readable();
 
     QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
         if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
@@ -111,34 +119,70 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
     return busy;
 }
 
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
+void bdrv_parent_drained_begin_single(BdrvChild *c)
 {
-    c->parent_quiesce_counter++;
+    GLOBAL_STATE_CODE();
+
+    assert(!c->quiesced_parent);
+    c->quiesced_parent = true;
+
     if (c->klass->drained_begin) {
+        /* called with rdlock taken, but it doesn't really need it. */
         c->klass->drained_begin(c);
     }
-    if (poll) {
-        BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
-    }
 }
 
 static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
 {
+    dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
+                                  src->pdiscard_alignment);
     dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
     dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
+    dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
+                                        src->max_hw_transfer);
     dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
                                  src->opt_mem_alignment);
     dst->min_mem_alignment = MAX(dst->min_mem_alignment,
                                  src->min_mem_alignment);
     dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
+    dst->max_hw_iov = MIN_NON_ZERO(dst->max_hw_iov, src->max_hw_iov);
+}
+
+typedef struct BdrvRefreshLimitsState {
+    BlockDriverState *bs;
+    BlockLimits old_bl;
+} BdrvRefreshLimitsState;
+
+static void bdrv_refresh_limits_abort(void *opaque)
+{
+    BdrvRefreshLimitsState *s = opaque;
+
+    s->bs->bl = s->old_bl;
 }
 
-void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
+static TransactionActionDrv bdrv_refresh_limits_drv = {
+    .abort = bdrv_refresh_limits_abort,
+    .clean = g_free,
+};
+
+/* @tran is allowed to be NULL, in this case no rollback is possible. */
+void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
 {
+    ERRP_GUARD();
     BlockDriver *drv = bs->drv;
     BdrvChild *c;
     bool have_limits;
-    Error *local_err = NULL;
+
+    GLOBAL_STATE_CODE();
+
+    if (tran) {
+        BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
+        *s = (BdrvRefreshLimitsState) {
+            .bs = bs,
+            .old_bl = bs->bl,
+        };
+        tran_add(tran, &bdrv_refresh_limits_drv, s);
+    }
 
     memset(&bs->bl, 0, sizeof(bs->bl));
 
@@ -156,19 +200,18 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
     QLIST_FOREACH(c, &bs->children, next) {
         if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
         {
-            bdrv_refresh_limits(c->bs, &local_err);
-            if (local_err) {
-                error_propagate(errp, local_err);
-                return;
-            }
             bdrv_merge_limits(&bs->bl, &c->bs->bl);
             have_limits = true;
         }
+
+        if (c->role & BDRV_CHILD_FILTERED) {
+            bs->bl.has_variable_length |= c->bs->bl.has_variable_length;
+        }
     }
 
     if (!have_limits) {
         bs->bl.min_mem_alignment = 512;
-        bs->bl.opt_mem_alignment = qemu_real_host_page_size;
+        bs->bl.opt_mem_alignment = qemu_real_host_page_size();
 
         /* Safe default since most protocols use readv()/writev()/etc */
         bs->bl.max_iov = IOV_MAX;
@@ -177,6 +220,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
     /* Then let the driver override it */
     if (drv->bdrv_refresh_limits) {
         drv->bdrv_refresh_limits(bs, errp);
+        if (*errp) {
+            return;
+        }
+    }
+
+    if (bs->bl.request_alignment > BDRV_MAX_ALIGNMENT) {
+        error_setg(errp, "Driver requires too large request alignment");
     }
 }
 
@@ -187,12 +237,14 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
  */
 void bdrv_enable_copy_on_read(BlockDriverState *bs)
 {
+    IO_CODE();
     qatomic_inc(&bs->copy_on_read);
 }
 
 void bdrv_disable_copy_on_read(BlockDriverState *bs)
 {
     int old = qatomic_fetch_dec(&bs->copy_on_read);
+    IO_CODE();
     assert(old >= 1);
 }
 
@@ -201,69 +253,15 @@ typedef struct {
     BlockDriverState *bs;
     bool done;
     bool begin;
-    bool recursive;
     bool poll;
     BdrvChild *parent;
-    bool ignore_bds_parents;
-    int *drained_end_counter;
 } BdrvCoDrainData;
 
-static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
-{
-    BdrvCoDrainData *data = opaque;
-    BlockDriverState *bs = data->bs;
-
-    if (data->begin) {
-        bs->drv->bdrv_co_drain_begin(bs);
-    } else {
-        bs->drv->bdrv_co_drain_end(bs);
-    }
-
-    /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */
-    qatomic_mb_set(&data->done, true);
-    if (!data->begin) {
-        qatomic_dec(data->drained_end_counter);
-    }
-    bdrv_dec_in_flight(bs);
-
-    g_free(data);
-}
-
-/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
-                              int *drained_end_counter)
-{
-    BdrvCoDrainData *data;
-
-    if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
-            (!begin && !bs->drv->bdrv_co_drain_end)) {
-        return;
-    }
-
-    data = g_new(BdrvCoDrainData, 1);
-    *data = (BdrvCoDrainData) {
-        .bs = bs,
-        .done = false,
-        .begin = begin,
-        .drained_end_counter = drained_end_counter,
-    };
-
-    if (!begin) {
-        qatomic_inc(drained_end_counter);
-    }
-
-    /* Make sure the driver callback completes during the polling phase for
-     * drain_begin. */
-    bdrv_inc_in_flight(bs);
-    data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
-    aio_co_schedule(bdrv_get_aio_context(bs), data->co);
-}
-
 /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
-                     BdrvChild *ignore_parent, bool ignore_bds_parents)
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
+                     bool ignore_bds_parents)
 {
-    BdrvChild *child, *next;
+    GLOBAL_STATE_CODE();
 
     if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
         return true;
@@ -273,30 +271,21 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
         return true;
     }
 
-    if (recursive) {
-        assert(!ignore_bds_parents);
-        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
-            if (bdrv_drain_poll(child->bs, recursive, child, false)) {
-                return true;
-            }
-        }
-    }
-
     return false;
 }
 
-static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
+static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
                                       BdrvChild *ignore_parent)
 {
-    return bdrv_drain_poll(bs, recursive, ignore_parent, false);
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    return bdrv_drain_poll(bs, ignore_parent, false);
 }
 
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
-                                  BdrvChild *parent, bool ignore_bds_parents,
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
                                   bool poll);
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
-                                BdrvChild *parent, bool ignore_bds_parents,
-                                int *drained_end_counter);
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
 
 static void bdrv_co_drain_bh_cb(void *opaque)
 {
@@ -309,14 +298,10 @@ static void bdrv_co_drain_bh_cb(void *opaque)
         aio_context_acquire(ctx);
         bdrv_dec_in_flight(bs);
         if (data->begin) {
-            assert(!data->drained_end_counter);
-            bdrv_do_drained_begin(bs, data->recursive, data->parent,
-                                  data->ignore_bds_parents, data->poll);
+            bdrv_do_drained_begin(bs, data->parent, data->poll);
         } else {
             assert(!data->poll);
-            bdrv_do_drained_end(bs, data->recursive, data->parent,
-                                data->ignore_bds_parents,
-                                data->drained_end_counter);
+            bdrv_do_drained_end(bs, data->parent);
         }
         aio_context_release(ctx);
     } else {
@@ -329,11 +314,9 @@ static void bdrv_co_drain_bh_cb(void *opaque)
 }
 
 static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
-                                                bool begin, bool recursive,
+                                                bool begin,
                                                 BdrvChild *parent,
-                                                bool ignore_bds_parents,
-                                                bool poll,
-                                                int *drained_end_counter)
+                                                bool poll)
 {
     BdrvCoDrainData data;
     Coroutine *self = qemu_coroutine_self();
@@ -349,11 +332,8 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
         .bs = bs,
         .done = false,
         .begin = begin,
-        .recursive = recursive,
         .parent = parent,
-        .ignore_bds_parents = ignore_bds_parents,
         .poll = poll,
-        .drained_end_counter = drained_end_counter,
     };
 
     if (bs) {
@@ -371,53 +351,38 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
     if (ctx != co_ctx) {
         aio_context_release(ctx);
     }
-    replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
+    replay_bh_schedule_oneshot_event(qemu_get_aio_context(),
+                                     bdrv_co_drain_bh_cb, &data);
 
     qemu_coroutine_yield();
     /* If we are resumed from some other event (such as an aio completion or a
      * timer callback), it is a bug in the caller that should be fixed. */
     assert(data.done);
 
-    /* Reaquire the AioContext of bs if we dropped it */
+    /* Reacquire the AioContext of bs if we dropped it */
     if (ctx != co_ctx) {
         aio_context_acquire(ctx);
     }
 }
 
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
-                                   BdrvChild *parent, bool ignore_bds_parents)
-{
-    assert(!qemu_in_coroutine());
-
-    /* Stop things in parent-to-child order */
-    if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
-        aio_disable_external(bdrv_get_aio_context(bs));
-    }
-
-    bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
-    bdrv_drain_invoke(bs, true, NULL);
-}
-
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
-                                  BdrvChild *parent, bool ignore_bds_parents,
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
                                   bool poll)
 {
-    BdrvChild *child, *next;
+    IO_OR_GS_CODE();
 
     if (qemu_in_coroutine()) {
-        bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
-                               poll, NULL);
+        bdrv_co_yield_to_drain(bs, true, parent, poll);
         return;
     }
 
-    bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
+    GLOBAL_STATE_CODE();
 
-    if (recursive) {
-        assert(!ignore_bds_parents);
-        bs->recursive_quiesce_counter++;
-        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
-            bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
-                                  false);
+    /* Stop things in parent-to-child order */
+    if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
+        GRAPH_RDLOCK_GUARD_MAINLOOP();
+        bdrv_parent_drained_begin(bs, parent);
+        if (bs->drv && bs->drv->bdrv_drain_begin) {
+            bs->drv->bdrv_drain_begin(bs);
         }
     }
 
@@ -431,128 +396,62 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
      * nodes.
      */
     if (poll) {
-        assert(!ignore_bds_parents);
-        BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
+        BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
     }
 }
 
-void bdrv_drained_begin(BlockDriverState *bs)
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
 {
-    bdrv_do_drained_begin(bs, false, NULL, false, true);
+    bdrv_do_drained_begin(bs, parent, false);
 }
 
-void bdrv_subtree_drained_begin(BlockDriverState *bs)
+void coroutine_mixed_fn
+bdrv_drained_begin(BlockDriverState *bs)
 {
-    bdrv_do_drained_begin(bs, true, NULL, false, true);
+    IO_OR_GS_CODE();
+    bdrv_do_drained_begin(bs, NULL, true);
 }
 
 /**
  * This function does not poll, nor must any of its recursively called
- * functions.  The *drained_end_counter pointee will be incremented
- * once for every background operation scheduled, and decremented once
- * the operation settles.  Therefore, the pointer must remain valid
- * until the pointee reaches 0.  That implies that whoever sets up the
- * pointee has to poll until it is 0.
- *
- * We use atomic operations to access *drained_end_counter, because
- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of
- *     @bs may contain nodes in different AioContexts,
- * (2) bdrv_drain_all_end() uses the same counter for all nodes,
- *     regardless of which AioContext they are in.
+ * functions.
  */
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
-                                BdrvChild *parent, bool ignore_bds_parents,
-                                int *drained_end_counter)
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
 {
-    BdrvChild *child;
     int old_quiesce_counter;
 
-    assert(drained_end_counter != NULL);
+    IO_OR_GS_CODE();
 
     if (qemu_in_coroutine()) {
-        bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
-                               false, drained_end_counter);
+        bdrv_co_yield_to_drain(bs, false, parent, false);
         return;
     }
+
+    /* At this point, we should be always running in the main loop. */
+    GLOBAL_STATE_CODE();
     assert(bs->quiesce_counter > 0);
+    GLOBAL_STATE_CODE();
 
     /* Re-enable things in child-to-parent order */
-    bdrv_drain_invoke(bs, false, drained_end_counter);
-    bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
-                            drained_end_counter);
-
     old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
     if (old_quiesce_counter == 1) {
-        aio_enable_external(bdrv_get_aio_context(bs));
-    }
-
-    if (recursive) {
-        assert(!ignore_bds_parents);
-        bs->recursive_quiesce_counter--;
-        QLIST_FOREACH(child, &bs->children, next) {
-            bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
-                                drained_end_counter);
+        GRAPH_RDLOCK_GUARD_MAINLOOP();
+        if (bs->drv && bs->drv->bdrv_drain_end) {
+            bs->drv->bdrv_drain_end(bs);
         }
+        bdrv_parent_drained_end(bs, parent);
     }
 }
 
 void bdrv_drained_end(BlockDriverState *bs)
 {
-    int drained_end_counter = 0;
-    bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
-    BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
-}
-
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
-{
-    bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
-}
-
-void bdrv_subtree_drained_end(BlockDriverState *bs)
-{
-    int drained_end_counter = 0;
-    bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
-    BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
-}
-
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
-{
-    int i;
-
-    for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
-        bdrv_do_drained_begin(child->bs, true, child, false, true);
-    }
-}
-
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
-{
-    int drained_end_counter = 0;
-    int i;
-
-    for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
-        bdrv_do_drained_end(child->bs, true, child, false,
-                            &drained_end_counter);
-    }
-
-    BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
-}
-
-/*
- * Wait for pending requests to complete on a single BlockDriverState subtree,
- * and suspend block driver's internal I/O until next request arrives.
- *
- * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
- * AioContext.
- */
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
-{
-    assert(qemu_in_coroutine());
-    bdrv_drained_begin(bs);
-    bdrv_drained_end(bs);
+    IO_OR_GS_CODE();
+    bdrv_do_drained_end(bs, NULL);
 }
 
 void bdrv_drain(BlockDriverState *bs)
 {
+    IO_OR_GS_CODE();
     bdrv_drained_begin(bs);
     bdrv_drained_end(bs);
 }
@@ -560,6 +459,8 @@ void bdrv_drain(BlockDriverState *bs)
 static void bdrv_drain_assert_idle(BlockDriverState *bs)
 {
     BdrvChild *child, *next;
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
     assert(qatomic_read(&bs->in_flight) == 0);
     QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
@@ -574,12 +475,15 @@ static bool bdrv_drain_all_poll(void)
     BlockDriverState *bs = NULL;
     bool result = false;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     /* bdrv_drain_poll() can't make changes to the graph and we are holding the
      * main AioContext lock, so iterating bdrv_next_all_states() is safe. */
     while ((bs = bdrv_next_all_states(bs))) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
         aio_context_acquire(aio_context);
-        result |= bdrv_drain_poll(bs, false, NULL, true);
+        result |= bdrv_drain_poll(bs, NULL, true);
         aio_context_release(aio_context);
     }
 
@@ -598,14 +502,10 @@ static bool bdrv_drain_all_poll(void)
  * NOTE: no new block jobs or BlockDriverStates can be created between
  * the bdrv_drain_all_begin() and bdrv_drain_all_end() calls.
  */
-void bdrv_drain_all_begin(void)
+void bdrv_drain_all_begin_nopoll(void)
 {
     BlockDriverState *bs = NULL;
-
-    if (qemu_in_coroutine()) {
-        bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
-        return;
-    }
+    GLOBAL_STATE_CODE();
 
     /*
      * bdrv queue is managed by record/replay,
@@ -628,12 +528,33 @@ void bdrv_drain_all_begin(void)
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
-        bdrv_do_drained_begin(bs, false, NULL, true, false);
+        bdrv_do_drained_begin(bs, NULL, false);
         aio_context_release(aio_context);
     }
+}
+
+void coroutine_mixed_fn bdrv_drain_all_begin(void)
+{
+    BlockDriverState *bs = NULL;
+
+    if (qemu_in_coroutine()) {
+        bdrv_co_yield_to_drain(NULL, true, NULL, true);
+        return;
+    }
+
+    /*
+     * bdrv queue is managed by record/replay,
+     * waiting for finishing the I/O requests may
+     * be infinite
+     */
+    if (replay_events_enabled()) {
+        return;
+    }
+
+    bdrv_drain_all_begin_nopoll();
 
     /* Now poll the in-flight requests */
-    AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
+    AIO_WAIT_WHILE_UNLOCKED(NULL, bdrv_drain_all_poll());
 
     while ((bs = bdrv_next_all_states(bs))) {
         bdrv_drain_assert_idle(bs);
@@ -642,21 +563,20 @@ void bdrv_drain_all_begin(void)
 
 void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
 {
-    int drained_end_counter = 0;
+    GLOBAL_STATE_CODE();
 
     g_assert(bs->quiesce_counter > 0);
     g_assert(!bs->refcnt);
 
     while (bs->quiesce_counter) {
-        bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
+        bdrv_do_drained_end(bs, NULL);
     }
-    BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
 }
 
 void bdrv_drain_all_end(void)
 {
     BlockDriverState *bs = NULL;
-    int drained_end_counter = 0;
+    GLOBAL_STATE_CODE();
 
     /*
      * bdrv queue is managed by record/replay,
@@ -671,19 +591,18 @@ void bdrv_drain_all_end(void)
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
-        bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
+        bdrv_do_drained_end(bs, NULL);
         aio_context_release(aio_context);
     }
 
     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
-    AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
-
     assert(bdrv_drain_all_count > 0);
     bdrv_drain_all_count--;
 }
 
 void bdrv_drain_all(void)
 {
+    GLOBAL_STATE_CODE();
     bdrv_drain_all_begin();
     bdrv_drain_all_end();
 }
@@ -693,28 +612,34 @@ void bdrv_drain_all(void)
  *
  * This function should be called when a tracked request is completing.
  */
-static void tracked_request_end(BdrvTrackedRequest *req)
+static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req)
 {
     if (req->serialising) {
         qatomic_dec(&req->bs->serialising_in_flight);
     }
 
-    qemu_co_mutex_lock(&req->bs->reqs_lock);
+    qemu_mutex_lock(&req->bs->reqs_lock);
     QLIST_REMOVE(req, list);
+    qemu_mutex_unlock(&req->bs->reqs_lock);
+
+    /*
+     * At this point qemu_co_queue_wait(&req->wait_queue, ...) won't be called
+     * anymore because the request has been removed from the list, so it's safe
+     * to restart the queue outside reqs_lock to minimize the critical section.
+     */
     qemu_co_queue_restart_all(&req->wait_queue);
-    qemu_co_mutex_unlock(&req->bs->reqs_lock);
 }
 
 /**
  * Add an active request to the tracked requests list
  */
-static void tracked_request_begin(BdrvTrackedRequest *req,
-                                  BlockDriverState *bs,
-                                  int64_t offset,
-                                  uint64_t bytes,
-                                  enum BdrvTrackedRequestType type)
+static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req,
+                                               BlockDriverState *bs,
+                                               int64_t offset,
+                                               int64_t bytes,
+                                               enum BdrvTrackedRequestType type)
 {
-    assert(bytes <= INT64_MAX && offset <= INT64_MAX - bytes);
+    bdrv_check_request(offset, bytes, &error_abort);
 
     *req = (BdrvTrackedRequest){
         .bs = bs,
@@ -729,14 +654,16 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
 
     qemu_co_queue_init(&req->wait_queue);
 
-    qemu_co_mutex_lock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_unlock(&bs->reqs_lock);
 }
 
 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
-                                     int64_t offset, uint64_t bytes)
+                                     int64_t offset, int64_t bytes)
 {
+    bdrv_check_request(offset, bytes, &error_abort);
+
     /*        aaaa   bbbb */
     if (offset >= req->overlap_offset + req->overlap_bytes) {
         return false;
@@ -748,55 +675,63 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
     return true;
 }
 
-static bool coroutine_fn
-bdrv_wait_serialising_requests_locked(BlockDriverState *bs,
-                                      BdrvTrackedRequest *self)
+/* Called with self->bs->reqs_lock held */
+static coroutine_fn BdrvTrackedRequest *
+bdrv_find_conflicting_request(BdrvTrackedRequest *self)
 {
     BdrvTrackedRequest *req;
-    bool retry;
-    bool waited = false;
-
-    do {
-        retry = false;
-        QLIST_FOREACH(req, &bs->tracked_requests, list) {
-            if (req == self || (!req->serialising && !self->serialising)) {
-                continue;
-            }
-            if (tracked_request_overlaps(req, self->overlap_offset,
-                                         self->overlap_bytes))
-            {
-                /* Hitting this means there was a reentrant request, for
-                 * example, a block driver issuing nested requests.  This must
-                 * never happen since it means deadlock.
-                 */
-                assert(qemu_coroutine_self() != req->co);
-
-                /* If the request is already (indirectly) waiting for us, or
-                 * will wait for us as soon as it wakes up, then just go on
-                 * (instead of producing a deadlock in the former case). */
-                if (!req->waiting_for) {
-                    self->waiting_for = req;
-                    qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
-                    self->waiting_for = NULL;
-                    retry = true;
-                    waited = true;
-                    break;
-                }
+
+    QLIST_FOREACH(req, &self->bs->tracked_requests, list) {
+        if (req == self || (!req->serialising && !self->serialising)) {
+            continue;
+        }
+        if (tracked_request_overlaps(req, self->overlap_offset,
+                                     self->overlap_bytes))
+        {
+            /*
+             * Hitting this means there was a reentrant request, for
+             * example, a block driver issuing nested requests.  This must
+             * never happen since it means deadlock.
+             */
+            assert(qemu_coroutine_self() != req->co);
+
+            /*
+             * If the request is already (indirectly) waiting for us, or
+             * will wait for us as soon as it wakes up, then just go on
+             * (instead of producing a deadlock in the former case).
+             */
+            if (!req->waiting_for) {
+                return req;
             }
         }
-    } while (retry);
-    return waited;
+    }
+
+    return NULL;
+}
+
+/* Called with self->bs->reqs_lock held */
+static void coroutine_fn
+bdrv_wait_serialising_requests_locked(BdrvTrackedRequest *self)
+{
+    BdrvTrackedRequest *req;
+
+    while ((req = bdrv_find_conflicting_request(self))) {
+        self->waiting_for = req;
+        qemu_co_queue_wait(&req->wait_queue, &self->bs->reqs_lock);
+        self->waiting_for = NULL;
+    }
 }
 
-bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
+/* Called with req->bs->reqs_lock held */
+static void tracked_request_set_serialising(BdrvTrackedRequest *req,
+                                            uint64_t align)
 {
-    BlockDriverState *bs = req->bs;
     int64_t overlap_offset = req->offset & ~(align - 1);
-    uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
-                               - overlap_offset;
-    bool waited;
+    int64_t overlap_bytes =
+        ROUND_UP(req->offset + req->bytes, align) - overlap_offset;
+
+    bdrv_check_request(req->offset, req->bytes, &error_abort);
 
-    qemu_co_mutex_lock(&bs->reqs_lock);
     if (!req->serialising) {
         qatomic_inc(&req->bs->serialising_in_flight);
         req->serialising = true;
@@ -804,9 +739,6 @@ bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
 
     req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
     req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
-    waited = bdrv_wait_serialising_requests_locked(bs, req);
-    qemu_co_mutex_unlock(&bs->reqs_lock);
-    return waited;
 }
 
 /**
@@ -817,6 +749,7 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
 {
     BdrvTrackedRequest *req;
     Coroutine *self = qemu_coroutine_self();
+    IO_CODE();
 
     QLIST_FOREACH(req, &bs->tracked_requests, list) {
         if (req->co == self) {
@@ -828,31 +761,30 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
 }
 
 /**
- * Round a region to cluster boundaries
+ * Round a region to subcluster (if supported) or cluster boundaries
  */
-void bdrv_round_to_clusters(BlockDriverState *bs,
-                            int64_t offset, int64_t bytes,
-                            int64_t *cluster_offset,
-                            int64_t *cluster_bytes)
+void coroutine_fn GRAPH_RDLOCK
+bdrv_round_to_subclusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          int64_t *align_offset, int64_t *align_bytes)
 {
     BlockDriverInfo bdi;
-
-    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
-        *cluster_offset = offset;
-        *cluster_bytes = bytes;
+    IO_CODE();
+    if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.subcluster_size == 0) {
+        *align_offset = offset;
+        *align_bytes = bytes;
     } else {
-        int64_t c = bdi.cluster_size;
-        *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
-        *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
+        int64_t c = bdi.subcluster_size;
+        *align_offset = QEMU_ALIGN_DOWN(offset, c);
+        *align_bytes = QEMU_ALIGN_UP(offset - *align_offset + bytes, c);
     }
 }
 
-static int bdrv_get_cluster_size(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK bdrv_get_cluster_size(BlockDriverState *bs)
 {
     BlockDriverInfo bdi;
     int ret;
 
-    ret = bdrv_get_info(bs, &bdi);
+    ret = bdrv_co_get_info(bs, &bdi);
     if (ret < 0 || bdi.cluster_size == 0) {
         return bs->bl.request_alignment;
     } else {
@@ -862,59 +794,128 @@ static int bdrv_get_cluster_size(BlockDriverState *bs)
 
 void bdrv_inc_in_flight(BlockDriverState *bs)
 {
+    IO_CODE();
     qatomic_inc(&bs->in_flight);
 }
 
 void bdrv_wakeup(BlockDriverState *bs)
 {
+    IO_CODE();
     aio_wait_kick();
 }
 
 void bdrv_dec_in_flight(BlockDriverState *bs)
 {
+    IO_CODE();
     qatomic_dec(&bs->in_flight);
     bdrv_wakeup(bs);
 }
 
-static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
+static void coroutine_fn
+bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
 {
     BlockDriverState *bs = self->bs;
-    bool waited = false;
 
     if (!qatomic_read(&bs->serialising_in_flight)) {
-        return false;
+        return;
     }
 
-    qemu_co_mutex_lock(&bs->reqs_lock);
-    waited = bdrv_wait_serialising_requests_locked(bs, self);
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
+    bdrv_wait_serialising_requests_locked(self);
+    qemu_mutex_unlock(&bs->reqs_lock);
+}
+
+void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
+                                                uint64_t align)
+{
+    IO_CODE();
+
+    qemu_mutex_lock(&req->bs->reqs_lock);
 
-    return waited;
+    tracked_request_set_serialising(req, align);
+    bdrv_wait_serialising_requests_locked(req);
+
+    qemu_mutex_unlock(&req->bs->reqs_lock);
 }
 
-static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
-                                   size_t size)
+int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
+                            QEMUIOVector *qiov, size_t qiov_offset,
+                            Error **errp)
 {
-    if (size > BDRV_REQUEST_MAX_BYTES) {
+    /*
+     * Check generic offset/bytes correctness
+     */
+
+    if (offset < 0) {
+        error_setg(errp, "offset is negative: %" PRIi64, offset);
         return -EIO;
     }
 
-    if (!bdrv_is_inserted(bs)) {
-        return -ENOMEDIUM;
+    if (bytes < 0) {
+        error_setg(errp, "bytes is negative: %" PRIi64, bytes);
+        return -EIO;
     }
 
-    if (offset < 0) {
+    if (bytes > BDRV_MAX_LENGTH) {
+        error_setg(errp, "bytes(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
+                   bytes, BDRV_MAX_LENGTH);
+        return -EIO;
+    }
+
+    if (offset > BDRV_MAX_LENGTH) {
+        error_setg(errp, "offset(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
+                   offset, BDRV_MAX_LENGTH);
+        return -EIO;
+    }
+
+    if (offset > BDRV_MAX_LENGTH - bytes) {
+        error_setg(errp, "sum of offset(%" PRIi64 ") and bytes(%" PRIi64 ") "
+                   "exceeds maximum(%" PRIi64 ")", offset, bytes,
+                   BDRV_MAX_LENGTH);
+        return -EIO;
+    }
+
+    if (!qiov) {
+        return 0;
+    }
+
+    /*
+     * Check qiov and qiov_offset
+     */
+
+    if (qiov_offset > qiov->size) {
+        error_setg(errp, "qiov_offset(%zu) overflow io vector size(%zu)",
+                   qiov_offset, qiov->size);
+        return -EIO;
+    }
+
+    if (bytes > qiov->size - qiov_offset) {
+        error_setg(errp, "bytes(%" PRIi64 ") + qiov_offset(%zu) overflow io "
+                   "vector size(%zu)", bytes, qiov_offset, qiov->size);
         return -EIO;
     }
 
     return 0;
 }
 
-int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-                       int bytes, BdrvRequestFlags flags)
+int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
 {
-    return bdrv_pwritev(child, offset, bytes, NULL,
-                        BDRV_REQ_ZERO_WRITE | flags);
+    return bdrv_check_qiov_request(offset, bytes, NULL, 0, errp);
+}
+
+static int bdrv_check_request32(int64_t offset, int64_t bytes,
+                                QEMUIOVector *qiov, size_t qiov_offset)
+{
+    int ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (bytes > BDRV_REQUEST_MAX_BYTES) {
+        return -EIO;
+    }
+
+    return 0;
 }
 
 /*
@@ -931,6 +932,7 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
     int ret;
     int64_t target_size, bytes, offset = 0;
     BlockDriverState *bs = child->bs;
+    IO_CODE();
 
     target_size = bdrv_getlength(bs);
     if (target_size < 0) {
@@ -958,58 +960,26 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
     }
 }
 
-/* See bdrv_pwrite() for the return codes */
-int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
-{
-    int ret;
-    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
-    if (bytes < 0) {
-        return -EINVAL;
-    }
-
-    ret = bdrv_preadv(child, offset, bytes, &qiov,  0);
-
-    return ret < 0 ? ret : bytes;
-}
-
-/* Return no. of bytes on success or < 0 on error. Important errors are:
-  -EIO         generic I/O error (may happen for all errors)
-  -ENOMEDIUM   No media inserted.
-  -EINVAL      Invalid offset or number of bytes
-  -EACCES      Trying to write a read-only device
-*/
-int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
-{
-    int ret;
-    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
-    if (bytes < 0) {
-        return -EINVAL;
-    }
-
-    ret = bdrv_pwritev(child, offset, bytes, &qiov, 0);
-
-    return ret < 0 ? ret : bytes;
-}
-
 /*
  * Writes to the file and ensures that no writes are reordered across this
  * request (acts as a barrier)
  *
  * Returns 0 on success, -errno in error cases.
  */
-int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
-                     const void *buf, int count)
+int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
+                                     int64_t bytes, const void *buf,
+                                     BdrvRequestFlags flags)
 {
     int ret;
+    IO_CODE();
+    assert_bdrv_graph_readable();
 
-    ret = bdrv_pwrite(child, offset, buf, count);
+    ret = bdrv_co_pwrite(child, offset, bytes, buf, flags);
     if (ret < 0) {
         return ret;
     }
 
-    ret = bdrv_flush(child->bs);
+    ret = bdrv_co_flush(child->bs);
     if (ret < 0) {
         return ret;
     }
@@ -1030,19 +1000,19 @@ static void bdrv_co_io_em_complete(void *opaque, int ret)
     aio_co_wake(co->coroutine);
 }
 
-static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
-                                           uint64_t offset, uint64_t bytes,
-                                           QEMUIOVector *qiov,
-                                           size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_driver_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
     BlockDriver *drv = bs->drv;
     int64_t sector_num;
     unsigned int nb_sectors;
     QEMUIOVector local_qiov;
     int ret;
+    assert_bdrv_graph_readable();
 
-    assert(!(flags & ~BDRV_REQ_MASK));
-    assert(!(flags & BDRV_REQ_NO_FALLBACK));
+    bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
+    assert(!(flags & ~bs->supported_read_flags));
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -1099,28 +1069,36 @@ out:
     return ret;
 }
 
-static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
-                                            uint64_t offset, uint64_t bytes,
-                                            QEMUIOVector *qiov,
-                                            size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_driver_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, size_t qiov_offset,
+                    BdrvRequestFlags flags)
 {
     BlockDriver *drv = bs->drv;
+    bool emulate_fua = false;
     int64_t sector_num;
     unsigned int nb_sectors;
     QEMUIOVector local_qiov;
     int ret;
+    assert_bdrv_graph_readable();
 
-    assert(!(flags & ~BDRV_REQ_MASK));
-    assert(!(flags & BDRV_REQ_NO_FALLBACK));
+    bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
 
     if (!drv) {
         return -ENOMEDIUM;
     }
 
+    if ((flags & BDRV_REQ_FUA) &&
+        (~bs->supported_write_flags & BDRV_REQ_FUA)) {
+        flags &= ~BDRV_REQ_FUA;
+        emulate_fua = true;
+    }
+
+    flags &= bs->supported_write_flags;
+
     if (drv->bdrv_co_pwritev_part) {
         ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
-                                        flags & bs->supported_write_flags);
-        flags &= ~bs->supported_write_flags;
+                                        flags);
         goto emulate_flags;
     }
 
@@ -1130,9 +1108,7 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
     }
 
     if (drv->bdrv_co_pwritev) {
-        ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
-                                   flags & bs->supported_write_flags);
-        flags &= ~bs->supported_write_flags;
+        ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags);
         goto emulate_flags;
     }
 
@@ -1142,10 +1118,8 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
             .coroutine = qemu_coroutine_self(),
         };
 
-        acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
-                                    flags & bs->supported_write_flags,
+        acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov, flags,
                                     bdrv_co_io_em_complete, &co);
-        flags &= ~bs->supported_write_flags;
         if (acb == NULL) {
             ret = -EIO;
         } else {
@@ -1163,12 +1137,10 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
     assert(bytes <= BDRV_REQUEST_MAX_BYTES);
 
     assert(drv->bdrv_co_writev);
-    ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
-                              flags & bs->supported_write_flags);
-    flags &= ~bs->supported_write_flags;
+    ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, flags);
 
 emulate_flags:
-    if (ret == 0 && (flags & BDRV_REQ_FUA)) {
+    if (ret == 0 && emulate_fua) {
         ret = bdrv_co_flush(bs);
     }
 
@@ -1179,14 +1151,17 @@ emulate_flags:
     return ret;
 }
 
-static int coroutine_fn
-bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
-                               uint64_t bytes, QEMUIOVector *qiov,
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
+                               int64_t bytes, QEMUIOVector *qiov,
                                size_t qiov_offset)
 {
     BlockDriver *drv = bs->drv;
     QEMUIOVector local_qiov;
     int ret;
+    assert_bdrv_graph_readable();
+
+    bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -1212,9 +1187,9 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
     return ret;
 }
 
-static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
-        int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
-        size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
+                         QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
     BlockDriverState *bs = child->bs;
 
@@ -1226,15 +1201,17 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
     void *bounce_buffer = NULL;
 
     BlockDriver *drv = bs->drv;
-    int64_t cluster_offset;
-    int64_t cluster_bytes;
-    size_t skip_bytes;
+    int64_t align_offset;
+    int64_t align_bytes;
+    int64_t skip_bytes;
     int ret;
     int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
                                     BDRV_REQUEST_MAX_BYTES);
-    unsigned int progress = 0;
+    int64_t progress = 0;
     bool skip_write;
 
+    bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
+
     if (!drv) {
         return -ENOMEDIUM;
     }
@@ -1259,28 +1236,28 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
      * BDRV_REQUEST_MAX_BYTES (even when the original read did not), which
      * is one reason we loop rather than doing it all at once.
      */
-    bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
-    skip_bytes = offset - cluster_offset;
+    bdrv_round_to_subclusters(bs, offset, bytes, &align_offset, &align_bytes);
+    skip_bytes = offset - align_offset;
 
     trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
-                                   cluster_offset, cluster_bytes);
+                                   align_offset, align_bytes);
 
-    while (cluster_bytes) {
+    while (align_bytes) {
         int64_t pnum;
 
         if (skip_write) {
             ret = 1; /* "already allocated", so nothing will be copied */
-            pnum = MIN(cluster_bytes, max_transfer);
+            pnum = MIN(align_bytes, max_transfer);
         } else {
-            ret = bdrv_is_allocated(bs, cluster_offset,
-                                    MIN(cluster_bytes, max_transfer), &pnum);
+            ret = bdrv_co_is_allocated(bs, align_offset,
+                                       MIN(align_bytes, max_transfer), &pnum);
             if (ret < 0) {
                 /*
                  * Safe to treat errors in querying allocation as if
                  * unallocated; we'll probably fail again soon on the
                  * read, but at least that will set a decent errno.
                  */
-                pnum = MIN(cluster_bytes, max_transfer);
+                pnum = MIN(align_bytes, max_transfer);
             }
 
             /* Stop at EOF if the image ends in the middle of the cluster */
@@ -1298,7 +1275,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
             /* Must copy-on-read; use the bounce buffer */
             pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
             if (!bounce_buffer) {
-                int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
+                int64_t max_we_need = MAX(pnum, align_bytes - pnum);
                 int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
                 int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
 
@@ -1310,25 +1287,25 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
             }
             qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
 
-            ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
+            ret = bdrv_driver_preadv(bs, align_offset, pnum,
                                      &local_qiov, 0, 0);
             if (ret < 0) {
                 goto err;
             }
 
-            bdrv_debug_event(bs, BLKDBG_COR_WRITE);
+            bdrv_co_debug_event(bs, BLKDBG_COR_WRITE);
             if (drv->bdrv_co_pwrite_zeroes &&
                 buffer_is_zero(bounce_buffer, pnum)) {
                 /* FIXME: Should we (perhaps conditionally) be setting
                  * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
                  * that still correctly reads as zero? */
-                ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
+                ret = bdrv_co_do_pwrite_zeroes(bs, align_offset, pnum,
                                                BDRV_REQ_WRITE_UNCHANGED);
             } else {
                 /* This does not change the data on the disk, it is not
                  * necessary to flush even in cache=writethrough mode.
                  */
-                ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
+                ret = bdrv_driver_pwritev(bs, align_offset, pnum,
                                           &local_qiov, 0,
                                           BDRV_REQ_WRITE_UNCHANGED);
             }
@@ -1357,8 +1334,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
             }
         }
 
-        cluster_offset += pnum;
-        cluster_bytes -= pnum;
+        align_offset += pnum;
+        align_bytes -= pnum;
         progress += pnum - skip_bytes;
         skip_bytes = 0;
     }
@@ -1374,16 +1351,18 @@ err:
  * handles copy on read, zeroing after EOF, and fragmentation of large
  * reads; any other features must be implemented by the caller.
  */
-static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
-    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
-    int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req,
+                    int64_t offset, int64_t bytes, int64_t align,
+                    QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
     BlockDriverState *bs = child->bs;
     int64_t total_bytes, max_bytes;
     int ret = 0;
-    uint64_t bytes_remaining = bytes;
+    int64_t bytes_remaining = bytes;
     int max_transfer;
 
+    bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
     assert(is_power_of_2(align));
     assert((offset & (align - 1)) == 0);
     assert((bytes & (align - 1)) == 0);
@@ -1391,11 +1370,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
     max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
                                    align);
 
-    /* TODO: We would need a per-BDS .supported_read_flags and
+    /*
+     * TODO: We would need a per-BDS .supported_read_flags and
      * potential fallback support, if we ever implement any read flags
      * to pass through to drivers.  For now, there aren't any
-     * passthrough flags.  */
-    assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
+     * passthrough flags except the BDRV_REQ_REGISTERED_BUF optimization hint.
+     */
+    assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH |
+                       BDRV_REQ_REGISTERED_BUF)));
 
     /* Handle Copy on Read and associated serialisation */
     if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -1404,7 +1386,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
          * with each other for the same cluster.  For example, in copy-on-read
          * it ensures that the CoR read and write operations are atomic and
          * guest writes cannot interleave between them. */
-        bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
+        bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
     } else {
         bdrv_wait_serialising_requests(req);
     }
@@ -1412,7 +1394,10 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
     if (flags & BDRV_REQ_COPY_ON_READ) {
         int64_t pnum;
 
-        ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
+        /* The flag BDRV_REQ_COPY_ON_READ has reached its addressee */
+        flags &= ~BDRV_REQ_COPY_ON_READ;
+
+        ret = bdrv_co_is_allocated(bs, offset, bytes, &pnum);
         if (ret < 0) {
             goto out;
         }
@@ -1427,20 +1412,22 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
     }
 
     /* Forward the request to the BlockDriver, possibly fragmenting it */
-    total_bytes = bdrv_getlength(bs);
+    total_bytes = bdrv_co_getlength(bs);
     if (total_bytes < 0) {
         ret = total_bytes;
         goto out;
     }
 
+    assert(!(flags & ~(bs->supported_read_flags | BDRV_REQ_REGISTERED_BUF)));
+
     max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
     if (bytes <= max_bytes && bytes <= max_transfer) {
-        ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
+        ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
         goto out;
     }
 
     while (bytes_remaining) {
-        int num;
+        int64_t num;
 
         if (max_bytes) {
             num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
@@ -1448,7 +1435,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
 
             ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
                                      num, qiov,
-                                     qiov_offset + bytes - bytes_remaining, 0);
+                                     qiov_offset + bytes - bytes_remaining,
+                                     flags);
             max_bytes -= num;
         } else {
             num = bytes_remaining;
@@ -1486,6 +1474,14 @@ out:
  * @merge_reads is true for small requests,
  * if @buf_len == @head + bytes + @tail. In this case it is possible that both
  * head and tail exist but @buf_len == align and @tail_buf == @buf.
+ *
+ * @write is true for write requests, false for read requests.
+ *
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
+ * merge existing vector elements into a single one.  @collapse_bounce_buf acts
+ * as the bounce buffer in such cases.  @pre_collapse_qiov has the pre-collapse
+ * I/O vector elements so for read requests, the data can be copied back after
+ * the read is done.
  */
 typedef struct BdrvRequestPadding {
     uint8_t *buf;
@@ -1494,15 +1490,25 @@ typedef struct BdrvRequestPadding {
     size_t head;
     size_t tail;
     bool merge_reads;
+    bool write;
     QEMUIOVector local_qiov;
+
+    uint8_t *collapse_bounce_buf;
+    size_t collapse_len;
+    QEMUIOVector pre_collapse_qiov;
 } BdrvRequestPadding;
 
 static bool bdrv_init_padding(BlockDriverState *bs,
                               int64_t offset, int64_t bytes,
+                              bool write,
                               BdrvRequestPadding *pad)
 {
-    uint64_t align = bs->bl.request_alignment;
-    size_t sum;
+    int64_t align = bs->bl.request_alignment;
+    int64_t sum;
+
+    bdrv_check_request(offset, bytes, &error_abort);
+    assert(align <= INT_MAX); /* documented in block/block_int.h */
+    assert(align <= SIZE_MAX / 2); /* so we can allocate the buffer */
 
     memset(pad, 0, sizeof(*pad));
 
@@ -1526,13 +1532,14 @@ static bool bdrv_init_padding(BlockDriverState *bs,
         pad->tail_buf = pad->buf + pad->buf_len - align;
     }
 
+    pad->write = write;
+
     return true;
 }
 
-static int bdrv_padding_rmw_read(BdrvChild *child,
-                                 BdrvTrackedRequest *req,
-                                 BdrvRequestPadding *pad,
-                                 bool zero_middle)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_padding_rmw_read(BdrvChild *child, BdrvTrackedRequest *req,
+                      BdrvRequestPadding *pad, bool zero_middle)
 {
     QEMUIOVector local_qiov;
     BlockDriverState *bs = child->bs;
@@ -1542,15 +1549,15 @@ static int bdrv_padding_rmw_read(BdrvChild *child,
     assert(req->serialising && pad->buf);
 
     if (pad->head || pad->merge_reads) {
-        uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
+        int64_t bytes = pad->merge_reads ? pad->buf_len : align;
 
         qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
 
         if (pad->head) {
-            bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
+            bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
         }
         if (pad->merge_reads && pad->tail) {
-            bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
+            bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
         }
         ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
                                   align, &local_qiov, 0, 0);
@@ -1558,10 +1565,10 @@ static int bdrv_padding_rmw_read(BdrvChild *child,
             return ret;
         }
         if (pad->head) {
-            bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+            bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
         }
         if (pad->merge_reads && pad->tail) {
-            bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+            bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
         }
 
         if (pad->merge_reads) {
@@ -1572,7 +1579,7 @@ static int bdrv_padding_rmw_read(BdrvChild *child,
     if (pad->tail) {
         qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
 
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
+        bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
         ret = bdrv_aligned_preadv(
                 child, req,
                 req->overlap_offset + req->overlap_bytes - align,
@@ -1580,7 +1587,7 @@ static int bdrv_padding_rmw_read(BdrvChild *child,
         if (ret < 0) {
             return ret;
         }
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+        bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
     }
 
 zero_mem:
@@ -1591,12 +1598,123 @@ zero_mem:
     return 0;
 }
 
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
+/**
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
+ */
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
 {
+    if (pad->collapse_bounce_buf) {
+        if (!pad->write) {
+            /*
+             * If padding required elements in the vector to be collapsed into a
+             * bounce buffer, copy the bounce buffer content back
+             */
+            qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
+                                pad->collapse_bounce_buf, pad->collapse_len);
+        }
+        qemu_vfree(pad->collapse_bounce_buf);
+        qemu_iovec_destroy(&pad->pre_collapse_qiov);
+    }
     if (pad->buf) {
         qemu_vfree(pad->buf);
         qemu_iovec_destroy(&pad->local_qiov);
     }
+    memset(pad, 0, sizeof(*pad));
+}
+
+/*
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
+ *
+ * To ensure this, when necessary, the first two or three elements of @iov are
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
+ * bounce buffer in pad->local_qiov.
+ *
+ * After performing a read request, the data from the bounce buffer must be
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
+ */
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
+                                   BdrvRequestPadding *pad,
+                                   struct iovec *iov, int niov,
+                                   size_t iov_offset, size_t bytes)
+{
+    int padded_niov, surplus_count, collapse_count;
+
+    /* Assert this invariant */
+    assert(niov <= IOV_MAX);
+
+    /*
+     * Cannot pad if resulting length would exceed SIZE_MAX.  Returning an error
+     * to the guest is not ideal, but there is little else we can do.  At least
+     * this will practically never happen on 64-bit systems.
+     */
+    if (SIZE_MAX - pad->head < bytes ||
+        SIZE_MAX - pad->head - bytes < pad->tail)
+    {
+        return -EINVAL;
+    }
+
+    /* Length of the resulting IOV if we just concatenated everything */
+    padded_niov = !!pad->head + niov + !!pad->tail;
+
+    qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
+
+    if (pad->head) {
+        qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
+    }
+
+    /*
+     * If padded_niov > IOV_MAX, we cannot just concatenate everything.
+     * Instead, merge the first two or three elements of @iov to reduce the
+     * number of vector elements as necessary.
+     */
+    if (padded_niov > IOV_MAX) {
+        /*
+         * Only head and tail can have lead to the number of entries exceeding
+         * IOV_MAX, so we can exceed it by the head and tail at most.  We need
+         * to reduce the number of elements by `surplus_count`, so we merge that
+         * many elements plus one into one element.
+         */
+        surplus_count = padded_niov - IOV_MAX;
+        assert(surplus_count <= !!pad->head + !!pad->tail);
+        collapse_count = surplus_count + 1;
+
+        /*
+         * Move the elements to collapse into `pad->pre_collapse_qiov`, then
+         * advance `iov` (and associated variables) by those elements.
+         */
+        qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
+        qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
+                              collapse_count, iov_offset, SIZE_MAX);
+        iov += collapse_count;
+        iov_offset = 0;
+        niov -= collapse_count;
+        bytes -= pad->pre_collapse_qiov.size;
+
+        /*
+         * Construct the bounce buffer to match the length of the to-collapse
+         * vector elements, and for write requests, initialize it with the data
+         * from those elements.  Then add it to `pad->local_qiov`.
+         */
+        pad->collapse_len = pad->pre_collapse_qiov.size;
+        pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
+        if (pad->write) {
+            qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
+                              pad->collapse_bounce_buf, pad->collapse_len);
+        }
+        qemu_iovec_add(&pad->local_qiov,
+                       pad->collapse_bounce_buf, pad->collapse_len);
+    }
+
+    qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
+
+    if (pad->tail) {
+        qemu_iovec_add(&pad->local_qiov,
+                       pad->buf + pad->buf_len - pad->tail, pad->tail);
+    }
+
+    assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
+    return 0;
 }
 
 /*
@@ -1606,40 +1724,75 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
  * read of padding, bdrv_padding_rmw_read() should be called separately if
  * needed.
  *
- * All parameters except @bs are in-out: they represent original request at
- * function call and padded (if padding needed) at function finish.
+ * @write is true for write requests, false for read requests.
  *
- * Function always succeeds.
+ * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
+ *  - on function start they represent original request
+ *  - on failure or when padding is not needed they are unchanged
+ *  - on success when padding is needed they represent padded request
  */
-static bool bdrv_pad_request(BlockDriverState *bs,
-                             QEMUIOVector **qiov, size_t *qiov_offset,
-                             int64_t *offset, unsigned int *bytes,
-                             BdrvRequestPadding *pad)
+static int bdrv_pad_request(BlockDriverState *bs,
+                            QEMUIOVector **qiov, size_t *qiov_offset,
+                            int64_t *offset, int64_t *bytes,
+                            bool write,
+                            BdrvRequestPadding *pad, bool *padded,
+                            BdrvRequestFlags *flags)
 {
-    if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
-        return false;
+    int ret;
+    struct iovec *sliced_iov;
+    int sliced_niov;
+    size_t sliced_head, sliced_tail;
+
+    /* Should have been checked by the caller already */
+    ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
+        if (padded) {
+            *padded = false;
+        }
+        return 0;
     }
 
-    qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
-                             *qiov, *qiov_offset, *bytes,
-                             pad->buf + pad->buf_len - pad->tail, pad->tail);
+    sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
+                                  &sliced_head, &sliced_tail,
+                                  &sliced_niov);
+
+    /* Guaranteed by bdrv_check_request32() */
+    assert(*bytes <= SIZE_MAX);
+    ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
+                                  sliced_head, *bytes);
+    if (ret < 0) {
+        bdrv_padding_finalize(pad);
+        return ret;
+    }
     *bytes += pad->head + pad->tail;
     *offset -= pad->head;
     *qiov = &pad->local_qiov;
     *qiov_offset = 0;
+    if (padded) {
+        *padded = true;
+    }
+    if (flags) {
+        /* Can't use optimization hint with bounce buffer */
+        *flags &= ~BDRV_REQ_REGISTERED_BUF;
+    }
 
-    return true;
+    return 0;
 }
 
 int coroutine_fn bdrv_co_preadv(BdrvChild *child,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+    int64_t offset, int64_t bytes, QEMUIOVector *qiov,
     BdrvRequestFlags flags)
 {
+    IO_CODE();
     return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
 }
 
 int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
-    int64_t offset, unsigned int bytes,
+    int64_t offset, int64_t bytes,
     QEMUIOVector *qiov, size_t qiov_offset,
     BdrvRequestFlags flags)
 {
@@ -1647,10 +1800,15 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
     BdrvTrackedRequest req;
     BdrvRequestPadding pad;
     int ret;
+    IO_CODE();
+
+    trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
 
-    trace_bdrv_co_preadv(bs, offset, bytes, flags);
+    if (!bdrv_co_is_inserted(bs)) {
+        return -ENOMEDIUM;
+    }
 
-    ret = bdrv_check_byte_request(bs, offset, bytes);
+    ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
     if (ret < 0) {
         return ret;
     }
@@ -1674,22 +1832,28 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
         flags |= BDRV_REQ_COPY_ON_READ;
     }
 
-    bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad);
+    ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
+                           &pad, NULL, &flags);
+    if (ret < 0) {
+        goto fail;
+    }
 
     tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
     ret = bdrv_aligned_preadv(child, &req, offset, bytes,
                               bs->bl.request_alignment,
                               qiov, qiov_offset, flags);
     tracked_request_end(&req);
-    bdrv_dec_in_flight(bs);
+    bdrv_padding_finalize(&pad);
 
-    bdrv_padding_destroy(&pad);
+fail:
+    bdrv_dec_in_flight(bs);
 
     return ret;
 }
 
-static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                         BdrvRequestFlags flags)
 {
     BlockDriver *drv = bs->drv;
     QEMUIOVector qiov;
@@ -1699,11 +1863,15 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
     int head = 0;
     int tail = 0;
 
-    int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
+    int64_t max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes,
+                                            INT64_MAX);
     int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
                         bs->bl.request_alignment);
     int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
 
+    assert_bdrv_graph_readable();
+    bdrv_check_request(offset, bytes, &error_abort);
+
     if (!drv) {
         return -ENOMEDIUM;
     }
@@ -1712,6 +1880,14 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
         return -ENOTSUP;
     }
 
+    /* By definition there is no user buffer so this flag doesn't make sense */
+    if (flags & BDRV_REQ_REGISTERED_BUF) {
+        return -EINVAL;
+    }
+
+    /* Invalidate the cached block-status data range if this write overlaps */
+    bdrv_bsc_invalidate_range(bs, offset, bytes);
+
     assert(alignment % bs->bl.request_alignment == 0);
     head = offset % alignment;
     tail = (offset + bytes) % alignment;
@@ -1719,7 +1895,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
     assert(max_write_zeroes >= bs->bl.request_alignment);
 
     while (bytes > 0 && !ret) {
-        int num = bytes;
+        int64_t num = bytes;
 
         /* Align request.  Block drivers can expect the "bulk" of the request
          * to be aligned, and that unaligned requests do not cross cluster
@@ -1799,39 +1975,41 @@ fail:
     return ret;
 }
 
-static inline int coroutine_fn
-bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
+static inline int coroutine_fn GRAPH_RDLOCK
+bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
                           BdrvTrackedRequest *req, int flags)
 {
     BlockDriverState *bs = child->bs;
-    bool waited;
-    int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
 
-    if (bs->read_only) {
+    bdrv_check_request(offset, bytes, &error_abort);
+
+    if (bdrv_is_read_only(bs)) {
         return -EPERM;
     }
 
     assert(!(bs->open_flags & BDRV_O_INACTIVE));
     assert((bs->open_flags & BDRV_O_NO_IO) == 0);
     assert(!(flags & ~BDRV_REQ_MASK));
+    assert(!((flags & BDRV_REQ_NO_WAIT) && !(flags & BDRV_REQ_SERIALISING)));
 
     if (flags & BDRV_REQ_SERIALISING) {
-        waited = bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
-        /*
-         * For a misaligned request we should have already waited earlier,
-         * because we come after bdrv_padding_rmw_read which must be called
-         * with the request already marked as serialising.
-         */
-        assert(!waited ||
-               (req->offset == req->overlap_offset &&
-                req->bytes == req->overlap_bytes));
+        QEMU_LOCK_GUARD(&bs->reqs_lock);
+
+        tracked_request_set_serialising(req, bdrv_get_cluster_size(bs));
+
+        if ((flags & BDRV_REQ_NO_WAIT) && bdrv_find_conflicting_request(req)) {
+            return -EBUSY;
+        }
+
+        bdrv_wait_serialising_requests_locked(req);
     } else {
         bdrv_wait_serialising_requests(req);
     }
 
     assert(req->overlap_offset <= offset);
     assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
-    assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
+    assert(offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE ||
+           child->perm & BLK_PERM_RESIZE);
 
     switch (req->type) {
     case BDRV_TRACKED_WRITE:
@@ -1841,8 +2019,8 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
         } else {
             assert(child->perm & BLK_PERM_WRITE);
         }
-        return notifier_with_return_list_notify(&bs->before_write_notifiers,
-                                                req);
+        bdrv_write_threshold_check_write(bs, offset, bytes);
+        return 0;
     case BDRV_TRACKED_TRUNCATE:
         assert(child->perm & BLK_PERM_RESIZE);
         return 0;
@@ -1851,13 +2029,15 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
     }
 }
 
-static inline void coroutine_fn
-bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes,
+static inline void coroutine_fn GRAPH_RDLOCK
+bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
                          BdrvTrackedRequest *req, int ret)
 {
     int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
     BlockDriverState *bs = child->bs;
 
+    bdrv_check_request(offset, bytes, &error_abort);
+
     qatomic_inc(&bs->write_gen);
 
     /*
@@ -1893,17 +2073,21 @@ bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes,
  * Forwards an already correctly aligned write request to the BlockDriver,
  * after possibly fragmenting it.
  */
-static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
-    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
-    int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req,
+                     int64_t offset, int64_t bytes, int64_t align,
+                     QEMUIOVector *qiov, size_t qiov_offset,
+                     BdrvRequestFlags flags)
 {
     BlockDriverState *bs = child->bs;
     BlockDriver *drv = bs->drv;
     int ret;
 
-    uint64_t bytes_remaining = bytes;
+    int64_t bytes_remaining = bytes;
     int max_transfer;
 
+    bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
+
     if (!drv) {
         return -ENOMEDIUM;
     }
@@ -1915,7 +2099,6 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
     assert(is_power_of_2(align));
     assert((offset & (align - 1)) == 0);
     assert((bytes & (align - 1)) == 0);
-    assert(!qiov || qiov_offset + bytes <= qiov->size);
     max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
                                    align);
 
@@ -1928,21 +2111,24 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
         if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
             flags |= BDRV_REQ_MAY_UNMAP;
         }
+
+        /* Can't use optimization hint with bufferless zero write */
+        flags &= ~BDRV_REQ_REGISTERED_BUF;
     }
 
     if (ret < 0) {
         /* Do nothing, write notifier decided to fail this request */
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
+        bdrv_co_debug_event(bs, BLKDBG_PWRITEV_ZERO);
         ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
     } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
         ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
                                              qiov, qiov_offset);
     } else if (bytes <= max_transfer) {
-        bdrv_debug_event(bs, BLKDBG_PWRITEV);
+        bdrv_co_debug_event(bs, BLKDBG_PWRITEV);
         ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
     } else {
-        bdrv_debug_event(bs, BLKDBG_PWRITEV);
+        bdrv_co_debug_event(bs, BLKDBG_PWRITEV);
         while (bytes_remaining) {
             int num = MIN(bytes_remaining, max_transfer);
             int local_flags = flags;
@@ -1965,7 +2151,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
             bytes_remaining -= num;
         }
     }
-    bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
+    bdrv_co_debug_event(bs, BLKDBG_PWRITEV_DONE);
 
     if (ret >= 0) {
         ret = 0;
@@ -1975,11 +2161,9 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
     return ret;
 }
 
-static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
-                                                int64_t offset,
-                                                unsigned int bytes,
-                                                BdrvRequestFlags flags,
-                                                BdrvTrackedRequest *req)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
+                        BdrvRequestFlags flags, BdrvTrackedRequest *req)
 {
     BlockDriverState *bs = child->bs;
     QEMUIOVector local_qiov;
@@ -1988,9 +2172,13 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
     bool padding;
     BdrvRequestPadding pad;
 
-    padding = bdrv_init_padding(bs, offset, bytes, &pad);
+    /* This flag doesn't make sense for padding or zero writes */
+    flags &= ~BDRV_REQ_REGISTERED_BUF;
+
+    padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
     if (padding) {
-        bdrv_mark_request_serialising(req, align);
+        assert(!(flags & BDRV_REQ_NO_WAIT));
+        bdrv_make_request_serialising(req, align);
 
         bdrv_padding_rmw_read(child, req, &pad, true);
 
@@ -2014,7 +2202,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
     assert(!bytes || (offset & (align - 1)) == 0);
     if (bytes >= align) {
         /* Write the aligned part in the middle. */
-        uint64_t aligned_bytes = bytes & ~(align - 1);
+        int64_t aligned_bytes = bytes & ~(align - 1);
         ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
                                    NULL, 0, flags);
         if (ret < 0) {
@@ -2035,7 +2223,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
     }
 
 out:
-    bdrv_padding_destroy(&pad);
+    bdrv_padding_finalize(&pad);
 
     return ret;
 }
@@ -2044,14 +2232,15 @@ out:
  * Handle a write request in coroutine context
  */
 int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+    int64_t offset, int64_t bytes, QEMUIOVector *qiov,
     BdrvRequestFlags flags)
 {
+    IO_CODE();
     return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
 }
 
 int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
+    int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
     BdrvRequestFlags flags)
 {
     BlockDriverState *bs = child->bs;
@@ -2059,14 +2248,20 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
     uint64_t align = bs->bl.request_alignment;
     BdrvRequestPadding pad;
     int ret;
+    bool padded = false;
+    IO_CODE();
 
-    trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
+    trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
 
-    if (!bs->drv) {
+    if (!bdrv_co_is_inserted(bs)) {
         return -ENOMEDIUM;
     }
 
-    ret = bdrv_check_byte_request(bs, offset, bytes);
+    if (flags & BDRV_REQ_ZERO_WRITE) {
+        ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
+    } else {
+        ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
+    }
     if (ret < 0) {
         return ret;
     }
@@ -2090,28 +2285,44 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
         return 0;
     }
 
+    if (!(flags & BDRV_REQ_ZERO_WRITE)) {
+        /*
+         * Pad request for following read-modify-write cycle.
+         * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
+         * alignment only if there is no ZERO flag.
+         */
+        ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
+                               &pad, &padded, &flags);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
     bdrv_inc_in_flight(bs);
-    /*
-     * Align write if necessary by performing a read-modify-write cycle.
-     * Pad qiov with the read parts and be sure to have a tracked request not
-     * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
-     */
     tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
 
     if (flags & BDRV_REQ_ZERO_WRITE) {
+        assert(!padded);
         ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
         goto out;
     }
 
-    if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) {
-        bdrv_mark_request_serialising(&req, align);
+    if (padded) {
+        /*
+         * Request was unaligned to request_alignment and therefore
+         * padded.  We are going to do read-modify-write, and must
+         * serialize the request to prevent interactions of the
+         * widened region with other transactions.
+         */
+        assert(!(flags & BDRV_REQ_NO_WAIT));
+        bdrv_make_request_serialising(&req, align);
         bdrv_padding_rmw_read(child, &req, &pad, false);
     }
 
     ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
                                qiov, qiov_offset, flags);
 
-    bdrv_padding_destroy(&pad);
+    bdrv_padding_finalize(&pad);
 
 out:
     tracked_request_end(&req);
@@ -2121,9 +2332,11 @@ out:
 }
 
 int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
-                                       int bytes, BdrvRequestFlags flags)
+                                       int64_t bytes, BdrvRequestFlags flags)
 {
+    IO_CODE();
     trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
+    assert_bdrv_graph_readable();
 
     if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
         flags &= ~BDRV_REQ_MAY_UNMAP;
@@ -2142,6 +2355,9 @@ int bdrv_flush_all(void)
     BlockDriverState *bs = NULL;
     int result = 0;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     /*
      * bdrv queue is managed by record/replay,
      * creating new flush request for stopping
@@ -2193,11 +2409,10 @@ int bdrv_flush_all(void)
  * BDRV_BLOCK_OFFSET_VALID bit is set, 'map' and 'file' (if non-NULL) are
  * set to the host mapping and BDS corresponding to the guest offset.
  */
-static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
-                                             bool want_zero,
-                                             int64_t offset, int64_t bytes,
-                                             int64_t *pnum, int64_t *map,
-                                             BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero,
+                        int64_t offset, int64_t bytes,
+                        int64_t *pnum, int64_t *map, BlockDriverState **file)
 {
     int64_t total_size;
     int64_t n; /* bytes */
@@ -2209,8 +2424,9 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
     bool has_filtered_child;
 
     assert(pnum);
+    assert_bdrv_graph_readable();
     *pnum = 0;
-    total_size = bdrv_getlength(bs);
+    total_size = bdrv_co_getlength(bs);
     if (total_size < 0) {
         ret = total_size;
         goto early_out;
@@ -2230,7 +2446,7 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
         bytes = n;
     }
 
-    /* Must be non-NULL or bdrv_getlength() would have failed */
+    /* Must be non-NULL or bdrv_co_getlength() would have failed */
     assert(bs->drv);
     has_filtered_child = bdrv_filter_child(bs);
     if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
@@ -2255,9 +2471,69 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
     aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
 
     if (bs->drv->bdrv_co_block_status) {
-        ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
-                                            aligned_bytes, pnum, &local_map,
-                                            &local_file);
+        /*
+         * Use the block-status cache only for protocol nodes: Format
+         * drivers are generally quick to inquire the status, but protocol
+         * drivers often need to get information from outside of qemu, so
+         * we do not have control over the actual implementation.  There
+         * have been cases where inquiring the status took an unreasonably
+         * long time, and we can do nothing in qemu to fix it.
+         * This is especially problematic for images with large data areas,
+         * because finding the few holes in them and giving them special
+         * treatment does not gain much performance.  Therefore, we try to
+         * cache the last-identified data region.
+         *
+         * Second, limiting ourselves to protocol nodes allows us to assume
+         * the block status for data regions to be DATA | OFFSET_VALID, and
+         * that the host offset is the same as the guest offset.
+         *
+         * Note that it is possible that external writers zero parts of
+         * the cached regions without the cache being invalidated, and so
+         * we may report zeroes as data.  This is not catastrophic,
+         * however, because reporting zeroes as data is fine.
+         */
+        if (QLIST_EMPTY(&bs->children) &&
+            bdrv_bsc_is_data(bs, aligned_offset, pnum))
+        {
+            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+            local_file = bs;
+            local_map = aligned_offset;
+        } else {
+            ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
+                                                aligned_bytes, pnum, &local_map,
+                                                &local_file);
+
+            /*
+             * Note that checking QLIST_EMPTY(&bs->children) is also done when
+             * the cache is queried above.  Technically, we do not need to check
+             * it here; the worst that can happen is that we fill the cache for
+             * non-protocol nodes, and then it is never used.  However, filling
+             * the cache requires an RCU update, so double check here to avoid
+             * such an update if possible.
+             *
+             * Check want_zero, because we only want to update the cache when we
+             * have accurate information about what is zero and what is data.
+             */
+            if (want_zero &&
+                ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
+                QLIST_EMPTY(&bs->children))
+            {
+                /*
+                 * When a protocol driver reports BLOCK_OFFSET_VALID, the
+                 * returned local_map value must be the same as the offset we
+                 * have passed (aligned_offset), and local_bs must be the node
+                 * itself.
+                 * Assert this, because we follow this rule when reading from
+                 * the cache (see the `local_file = bs` and
+                 * `local_map = aligned_offset` assignments above), and the
+                 * result the cache delivers must be the same as the driver
+                 * would deliver.
+                 */
+                assert(local_file == bs);
+                assert(local_map == aligned_offset);
+                bdrv_bsc_fill(bs, aligned_offset, *pnum);
+            }
+        }
     } else {
         /* Default code for filters */
 
@@ -2295,8 +2571,8 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
 
     if (ret & BDRV_BLOCK_RAW) {
         assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
-        ret = bdrv_co_block_status(local_file, want_zero, local_map,
-                                   *pnum, pnum, &local_map, &local_file);
+        ret = bdrv_co_do_block_status(local_file, want_zero, local_map,
+                                      *pnum, pnum, &local_map, &local_file);
         goto out;
     }
 
@@ -2308,7 +2584,7 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
         if (!cow_bs) {
             ret |= BDRV_BLOCK_ZERO;
         } else if (want_zero) {
-            int64_t size2 = bdrv_getlength(cow_bs);
+            int64_t size2 = bdrv_co_getlength(cow_bs);
 
             if (size2 >= 0 && offset >= size2) {
                 ret |= BDRV_BLOCK_ZERO;
@@ -2323,8 +2599,8 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
         int64_t file_pnum;
         int ret2;
 
-        ret2 = bdrv_co_block_status(local_file, want_zero, local_map,
-                                    *pnum, &file_pnum, NULL, NULL);
+        ret2 = bdrv_co_do_block_status(local_file, want_zero, local_map,
+                                       *pnum, &file_pnum, NULL, NULL);
         if (ret2 >= 0) {
             /* Ignore errors.  This is just providing extra information, it
              * is useful but not necessary.
@@ -2343,6 +2619,16 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
                 ret |= (ret2 & BDRV_BLOCK_ZERO);
             }
         }
+
+        /*
+         * Now that the recursive search was done, clear the flag. Otherwise,
+         * with more complicated block graphs like snapshot-access ->
+         * copy-before-write -> qcow2, where the return value will be propagated
+         * further up to a parent bdrv_co_do_block_status() call, both the
+         * BDRV_BLOCK_RECURSE and BDRV_BLOCK_ZERO flags would be set, which is
+         * not allowed.
+         */
+        ret &= ~BDRV_BLOCK_RECURSE;
     }
 
 out:
@@ -2376,8 +2662,10 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
     BlockDriverState *p;
     int64_t eof = 0;
     int dummy;
+    IO_CODE();
 
     assert(!include_base || base); /* Can't include NULL base */
+    assert_bdrv_graph_readable();
 
     if (!depth) {
         depth = &dummy;
@@ -2389,7 +2677,8 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
         return 0;
     }
 
-    ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
+    ret = bdrv_co_do_block_status(bs, want_zero, offset, bytes, pnum,
+                                  map, file);
     ++*depth;
     if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
         return ret;
@@ -2405,8 +2694,8 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
     for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
          p = bdrv_filter_or_cow_bs(p))
     {
-        ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
-                                   file);
+        ret = bdrv_co_do_block_status(p, want_zero, offset, bytes, pnum,
+                                      map, file);
         ++*depth;
         if (ret < 0) {
             return ret;
@@ -2461,19 +2750,24 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
     return ret;
 }
 
-int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
-                            int64_t offset, int64_t bytes, int64_t *pnum,
-                            int64_t *map, BlockDriverState **file)
+int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
+                                            BlockDriverState *base,
+                                            int64_t offset, int64_t bytes,
+                                            int64_t *pnum, int64_t *map,
+                                            BlockDriverState **file)
 {
-    return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
-                                          pnum, map, file, NULL);
+    IO_CODE();
+    return bdrv_co_common_block_status_above(bs, base, false, true, offset,
+                                             bytes, pnum, map, file, NULL);
 }
 
-int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                      int64_t *pnum, int64_t *map, BlockDriverState **file)
+int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, int64_t offset,
+                                      int64_t bytes, int64_t *pnum,
+                                      int64_t *map, BlockDriverState **file)
 {
-    return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
-                                   offset, bytes, pnum, map, file);
+    IO_CODE();
+    return bdrv_co_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
+                                      offset, bytes, pnum, map, file);
 }
 
 /*
@@ -2488,13 +2782,14 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
 {
     int ret;
     int64_t pnum = bytes;
+    IO_CODE();
 
     if (!bytes) {
         return 1;
     }
 
-    ret = bdrv_common_block_status_above(bs, NULL, false, false, offset,
-                                         bytes, &pnum, NULL, NULL, NULL);
+    ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset,
+                                            bytes, &pnum, NULL, NULL, NULL);
 
     if (ret < 0) {
         return ret;
@@ -2503,15 +2798,16 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
     return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
 }
 
-int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
-                                   int64_t bytes, int64_t *pnum)
+int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset,
+                                      int64_t bytes, int64_t *pnum)
 {
     int ret;
     int64_t dummy;
+    IO_CODE();
 
-    ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
-                                         bytes, pnum ? pnum : &dummy, NULL,
-                                         NULL, NULL);
+    ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset,
+                                            bytes, pnum ? pnum : &dummy, NULL,
+                                            NULL, NULL);
     if (ret < 0) {
         return ret;
     }
@@ -2535,15 +2831,18 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
  * words, the result is not necessarily the maximum possible range);
  * but 'pnum' will only be 0 when end of file is reached.
  */
-int bdrv_is_allocated_above(BlockDriverState *top,
-                            BlockDriverState *base,
-                            bool include_base, int64_t offset,
-                            int64_t bytes, int64_t *pnum)
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *bs,
+                                            BlockDriverState *base,
+                                            bool include_base, int64_t offset,
+                                            int64_t bytes, int64_t *pnum)
 {
     int depth;
-    int ret = bdrv_common_block_status_above(top, base, include_base, false,
-                                             offset, bytes, pnum, NULL, NULL,
-                                             &depth);
+    int ret;
+    IO_CODE();
+
+    ret = bdrv_co_common_block_status_above(bs, base, include_base, false,
+                                            offset, bytes, pnum, NULL, NULL,
+                                            &depth);
     if (ret < 0) {
         return ret;
     }
@@ -2559,7 +2858,14 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
     BlockDriver *drv = bs->drv;
     BlockDriverState *child_bs = bdrv_primary_bs(bs);
-    int ret = -ENOTSUP;
+    int ret;
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
+    ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
+    if (ret < 0) {
+        return ret;
+    }
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -2567,10 +2873,12 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 
     bdrv_inc_in_flight(bs);
 
-    if (drv->bdrv_load_vmstate) {
-        ret = drv->bdrv_load_vmstate(bs, qiov, pos);
+    if (drv->bdrv_co_load_vmstate) {
+        ret = drv->bdrv_co_load_vmstate(bs, qiov, pos);
     } else if (child_bs) {
         ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
+    } else {
+        ret = -ENOTSUP;
     }
 
     bdrv_dec_in_flight(bs);
@@ -2583,7 +2891,14 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
     BlockDriver *drv = bs->drv;
     BlockDriverState *child_bs = bdrv_primary_bs(bs);
-    int ret = -ENOTSUP;
+    int ret;
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
+    ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
+    if (ret < 0) {
+        return ret;
+    }
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -2591,10 +2906,12 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 
     bdrv_inc_in_flight(bs);
 
-    if (drv->bdrv_save_vmstate) {
-        ret = drv->bdrv_save_vmstate(bs, qiov, pos);
+    if (drv->bdrv_co_save_vmstate) {
+        ret = drv->bdrv_co_save_vmstate(bs, qiov, pos);
     } else if (child_bs) {
         ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
+    } else {
+        ret = -ENOTSUP;
     }
 
     bdrv_dec_in_flight(bs);
@@ -2607,6 +2924,7 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
 {
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
     int ret = bdrv_writev_vmstate(bs, &qiov, pos);
+    IO_CODE();
 
     return ret < 0 ? ret : size;
 }
@@ -2616,6 +2934,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
 {
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
     int ret = bdrv_readv_vmstate(bs, &qiov, pos);
+    IO_CODE();
 
     return ret < 0 ? ret : size;
 }
@@ -2623,24 +2942,18 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
 /**************************************************************/
 /* async I/Os */
 
+/**
+ * Synchronously cancels an acb. Must be called with the BQL held and the acb
+ * must be processed with the BQL held too (IOThreads are not allowed).
+ *
+ * Use bdrv_aio_cancel_async() instead when possible.
+ */
 void bdrv_aio_cancel(BlockAIOCB *acb)
 {
+    GLOBAL_STATE_CODE();
     qemu_aio_ref(acb);
     bdrv_aio_cancel_async(acb);
-    while (acb->refcnt > 1) {
-        if (acb->aiocb_info->get_aio_context) {
-            aio_poll(acb->aiocb_info->get_aio_context(acb), true);
-        } else if (acb->bs) {
-            /* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
-             * assert that we're not using an I/O thread.  Thread-safe
-             * code should use bdrv_aio_cancel_async exclusively.
-             */
-            assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
-            aio_poll(bdrv_get_aio_context(acb->bs), true);
-        } else {
-            abort();
-        }
-    }
+    AIO_WAIT_WHILE_UNLOCKED(NULL, acb->refcnt > 1);
     qemu_aio_unref(acb);
 }
 
@@ -2649,6 +2962,7 @@ void bdrv_aio_cancel(BlockAIOCB *acb)
  * In either case the completion callback must be called. */
 void bdrv_aio_cancel_async(BlockAIOCB *acb)
 {
+    IO_CODE();
     if (acb->aiocb_info->cancel_async) {
         acb->aiocb_info->cancel_async(acb);
     }
@@ -2663,15 +2977,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
     BdrvChild *child;
     int current_gen;
     int ret = 0;
+    IO_CODE();
 
+    assert_bdrv_graph_readable();
     bdrv_inc_in_flight(bs);
 
-    if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
+    if (!bdrv_co_is_inserted(bs) || bdrv_is_read_only(bs) ||
         bdrv_is_sg(bs)) {
         goto early_exit;
     }
 
-    qemu_co_mutex_lock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
     current_gen = qatomic_read(&bs->write_gen);
 
     /* Wait until any previous flushes are completed */
@@ -2681,7 +2997,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 
     /* Flushes reach this point in nondecreasing current_gen order.  */
     bs->active_flush_req = true;
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_unlock(&bs->reqs_lock);
 
     /* Write back all layers by calling one driver function */
     if (bs->drv->bdrv_co_flush) {
@@ -2690,7 +3006,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
     }
 
     /* Write back cached data to the OS even with cache=unsafe */
-    BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
+    BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
     if (bs->drv->bdrv_co_flush_to_os) {
         ret = bs->drv->bdrv_co_flush_to_os(bs);
         if (ret < 0) {
@@ -2708,7 +3024,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
         goto flush_children;
     }
 
-    BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
+    BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
     if (!bs->drv) {
         /* bs->drv->bdrv_co_flush() might have ejected the BDS
          * (even in case of apparent success) */
@@ -2769,11 +3085,11 @@ out:
         bs->flushed_gen = current_gen;
     }
 
-    qemu_co_mutex_lock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
     bs->active_flush_req = false;
     /* Return value is ignored - it's ok if wait queue is empty */
     qemu_co_queue_next(&bs->flush_queue);
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_unlock(&bs->reqs_lock);
 
 early_exit:
     bdrv_dec_in_flight(bs);
@@ -2784,11 +3100,14 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
                                   int64_t bytes)
 {
     BdrvTrackedRequest req;
-    int max_pdiscard, ret;
+    int ret;
+    int64_t max_pdiscard;
     int head, tail, align;
     BlockDriverState *bs = child->bs;
+    IO_CODE();
+    assert_bdrv_graph_readable();
 
-    if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
+    if (!bs || !bs->drv || !bdrv_co_is_inserted(bs)) {
         return -ENOMEDIUM;
     }
 
@@ -2796,8 +3115,9 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
         return -EPERM;
     }
 
-    if (offset < 0 || bytes < 0 || bytes > INT64_MAX - offset) {
-        return -EIO;
+    ret = bdrv_check_request(offset, bytes, NULL);
+    if (ret < 0) {
+        return ret;
     }
 
     /* Do nothing if disabled.  */
@@ -2809,6 +3129,9 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
         return 0;
     }
 
+    /* Invalidate the cached block-status data range if this discard overlaps */
+    bdrv_bsc_invalidate_range(bs, offset, bytes);
+
     /* Discard is advisory, but some devices track and coalesce
      * unaligned requests, so we must pass everything down rather than
      * round here.  Still, most devices will just silently ignore
@@ -2827,7 +3150,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
         goto out;
     }
 
-    max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
+    max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT64_MAX),
                                    align);
     assert(max_pdiscard >= bs->bl.request_alignment);
 
@@ -2894,13 +3217,15 @@ out:
     return ret;
 }
 
-int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
+int coroutine_fn bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
 {
     BlockDriver *drv = bs->drv;
     CoroutineIOCompletion co = {
         .coroutine = qemu_coroutine_self(),
     };
     BlockAIOCB *acb;
+    IO_CODE();
+    assert_bdrv_graph_readable();
 
     bdrv_inc_in_flight(bs);
     if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
@@ -2923,19 +3248,90 @@ out:
     return co.ret;
 }
 
+int coroutine_fn bdrv_co_zone_report(BlockDriverState *bs, int64_t offset,
+                        unsigned int *nr_zones,
+                        BlockZoneDescriptor *zones)
+{
+    BlockDriver *drv = bs->drv;
+    CoroutineIOCompletion co = {
+            .coroutine = qemu_coroutine_self(),
+    };
+    IO_CODE();
+
+    bdrv_inc_in_flight(bs);
+    if (!drv || !drv->bdrv_co_zone_report || bs->bl.zoned == BLK_Z_NONE) {
+        co.ret = -ENOTSUP;
+        goto out;
+    }
+    co.ret = drv->bdrv_co_zone_report(bs, offset, nr_zones, zones);
+out:
+    bdrv_dec_in_flight(bs);
+    return co.ret;
+}
+
+int coroutine_fn bdrv_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
+        int64_t offset, int64_t len)
+{
+    BlockDriver *drv = bs->drv;
+    CoroutineIOCompletion co = {
+            .coroutine = qemu_coroutine_self(),
+    };
+    IO_CODE();
+
+    bdrv_inc_in_flight(bs);
+    if (!drv || !drv->bdrv_co_zone_mgmt || bs->bl.zoned == BLK_Z_NONE) {
+        co.ret = -ENOTSUP;
+        goto out;
+    }
+    co.ret = drv->bdrv_co_zone_mgmt(bs, op, offset, len);
+out:
+    bdrv_dec_in_flight(bs);
+    return co.ret;
+}
+
+int coroutine_fn bdrv_co_zone_append(BlockDriverState *bs, int64_t *offset,
+                        QEMUIOVector *qiov,
+                        BdrvRequestFlags flags)
+{
+    int ret;
+    BlockDriver *drv = bs->drv;
+    CoroutineIOCompletion co = {
+            .coroutine = qemu_coroutine_self(),
+    };
+    IO_CODE();
+
+    ret = bdrv_check_qiov_request(*offset, qiov->size, qiov, 0, NULL);
+    if (ret < 0) {
+        return ret;
+    }
+
+    bdrv_inc_in_flight(bs);
+    if (!drv || !drv->bdrv_co_zone_append || bs->bl.zoned == BLK_Z_NONE) {
+        co.ret = -ENOTSUP;
+        goto out;
+    }
+    co.ret = drv->bdrv_co_zone_append(bs, offset, qiov, flags);
+out:
+    bdrv_dec_in_flight(bs);
+    return co.ret;
+}
+
 void *qemu_blockalign(BlockDriverState *bs, size_t size)
 {
+    IO_CODE();
     return qemu_memalign(bdrv_opt_mem_align(bs), size);
 }
 
 void *qemu_blockalign0(BlockDriverState *bs, size_t size)
 {
+    IO_CODE();
     return memset(qemu_blockalign(bs, size), 0, size);
 }
 
 void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
 {
     size_t align = bdrv_opt_mem_align(bs);
+    IO_CODE();
 
     /* Ensure that NULL is never returned on success */
     assert(align > 0);
@@ -2949,6 +3345,7 @@ void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
 void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
 {
     void *mem = qemu_try_blockalign(bs, size);
+    IO_CODE();
 
     if (mem) {
         memset(mem, 0, size);
@@ -2957,106 +3354,86 @@ void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
     return mem;
 }
 
-/*
- * Check if all memory in this vector is sector aligned.
- */
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
-{
-    int i;
-    size_t alignment = bdrv_min_mem_align(bs);
-
-    for (i = 0; i < qiov->niov; i++) {
-        if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
-            return false;
-        }
-        if (qiov->iov[i].iov_len % alignment) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
-                                    NotifierWithReturn *notifier)
-{
-    notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
-}
-
-void bdrv_io_plug(BlockDriverState *bs)
+/* Helper that undoes bdrv_register_buf() when it fails partway through */
+static void GRAPH_RDLOCK
+bdrv_register_buf_rollback(BlockDriverState *bs, void *host, size_t size,
+                           BdrvChild *final_child)
 {
     BdrvChild *child;
 
-    QLIST_FOREACH(child, &bs->children, next) {
-        bdrv_io_plug(child->bs);
-    }
+    GLOBAL_STATE_CODE();
+    assert_bdrv_graph_readable();
 
-    if (qatomic_fetch_inc(&bs->io_plugged) == 0) {
-        BlockDriver *drv = bs->drv;
-        if (drv && drv->bdrv_io_plug) {
-            drv->bdrv_io_plug(bs);
+    QLIST_FOREACH(child, &bs->children, next) {
+        if (child == final_child) {
+            break;
         }
-    }
-}
 
-void bdrv_io_unplug(BlockDriverState *bs)
-{
-    BdrvChild *child;
-
-    assert(bs->io_plugged);
-    if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
-        BlockDriver *drv = bs->drv;
-        if (drv && drv->bdrv_io_unplug) {
-            drv->bdrv_io_unplug(bs);
-        }
+        bdrv_unregister_buf(child->bs, host, size);
     }
 
-    QLIST_FOREACH(child, &bs->children, next) {
-        bdrv_io_unplug(child->bs);
+    if (bs->drv && bs->drv->bdrv_unregister_buf) {
+        bs->drv->bdrv_unregister_buf(bs, host, size);
     }
 }
 
-void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
+bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
+                       Error **errp)
 {
     BdrvChild *child;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (bs->drv && bs->drv->bdrv_register_buf) {
-        bs->drv->bdrv_register_buf(bs, host, size);
+        if (!bs->drv->bdrv_register_buf(bs, host, size, errp)) {
+            return false;
+        }
     }
     QLIST_FOREACH(child, &bs->children, next) {
-        bdrv_register_buf(child->bs, host, size);
+        if (!bdrv_register_buf(child->bs, host, size, errp)) {
+            bdrv_register_buf_rollback(bs, host, size, child);
+            return false;
+        }
     }
+    return true;
 }
 
-void bdrv_unregister_buf(BlockDriverState *bs, void *host)
+void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
 {
     BdrvChild *child;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (bs->drv && bs->drv->bdrv_unregister_buf) {
-        bs->drv->bdrv_unregister_buf(bs, host);
+        bs->drv->bdrv_unregister_buf(bs, host, size);
     }
     QLIST_FOREACH(child, &bs->children, next) {
-        bdrv_unregister_buf(child->bs, host);
+        bdrv_unregister_buf(child->bs, host, size);
     }
 }
 
-static int coroutine_fn bdrv_co_copy_range_internal(
-        BdrvChild *src, uint64_t src_offset, BdrvChild *dst,
-        uint64_t dst_offset, uint64_t bytes,
+static int coroutine_fn GRAPH_RDLOCK bdrv_co_copy_range_internal(
+        BdrvChild *src, int64_t src_offset, BdrvChild *dst,
+        int64_t dst_offset, int64_t bytes,
         BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
         bool recurse_src)
 {
     BdrvTrackedRequest req;
     int ret;
+    assert_bdrv_graph_readable();
 
     /* TODO We can support BDRV_REQ_NO_FALLBACK here */
     assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
     assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
+    assert(!(read_flags & BDRV_REQ_NO_WAIT));
+    assert(!(write_flags & BDRV_REQ_NO_WAIT));
 
-    if (!dst || !dst->bs) {
+    if (!dst || !dst->bs || !bdrv_co_is_inserted(dst->bs)) {
         return -ENOMEDIUM;
     }
-    ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
+    ret = bdrv_check_request32(dst_offset, bytes, NULL, 0);
     if (ret) {
         return ret;
     }
@@ -3064,10 +3441,10 @@ static int coroutine_fn bdrv_co_copy_range_internal(
         return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
     }
 
-    if (!src || !src->bs) {
+    if (!src || !src->bs || !bdrv_co_is_inserted(src->bs)) {
         return -ENOMEDIUM;
     }
-    ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
+    ret = bdrv_check_request32(src_offset, bytes, NULL, 0);
     if (ret) {
         return ret;
     }
@@ -3120,12 +3497,14 @@ static int coroutine_fn bdrv_co_copy_range_internal(
  *
  * See the comment of bdrv_co_copy_range for the parameter and return value
  * semantics. */
-int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
-                                         BdrvChild *dst, uint64_t dst_offset,
-                                         uint64_t bytes,
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
+                                         BdrvChild *dst, int64_t dst_offset,
+                                         int64_t bytes,
                                          BdrvRequestFlags read_flags,
                                          BdrvRequestFlags write_flags)
 {
+    IO_CODE();
+    assert_bdrv_graph_readable();
     trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
                                   read_flags, write_flags);
     return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
@@ -3136,31 +3515,40 @@ int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
  *
  * See the comment of bdrv_co_copy_range for the parameter and return value
  * semantics. */
-int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
-                                       BdrvChild *dst, uint64_t dst_offset,
-                                       uint64_t bytes,
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
+                                       BdrvChild *dst, int64_t dst_offset,
+                                       int64_t bytes,
                                        BdrvRequestFlags read_flags,
                                        BdrvRequestFlags write_flags)
 {
+    IO_CODE();
+    assert_bdrv_graph_readable();
     trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
                                 read_flags, write_flags);
     return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
                                        bytes, read_flags, write_flags, false);
 }
 
-int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
-                                    BdrvChild *dst, uint64_t dst_offset,
-                                    uint64_t bytes, BdrvRequestFlags read_flags,
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
+                                    BdrvChild *dst, int64_t dst_offset,
+                                    int64_t bytes, BdrvRequestFlags read_flags,
                                     BdrvRequestFlags write_flags)
 {
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
     return bdrv_co_copy_range_from(src, src_offset,
                                    dst, dst_offset,
                                    bytes, read_flags, write_flags);
 }
 
-static void bdrv_parent_cb_resize(BlockDriverState *bs)
+static void coroutine_fn GRAPH_RDLOCK
+bdrv_parent_cb_resize(BlockDriverState *bs)
 {
     BdrvChild *c;
+
+    assert_bdrv_graph_readable();
+
     QLIST_FOREACH(c, &bs->parents, next_parent) {
         if (c->klass->resize) {
             c->klass->resize(c);
@@ -3185,7 +3573,8 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
     BdrvTrackedRequest req;
     int64_t old_size, new_bytes;
     int ret;
-
+    IO_CODE();
+    assert_bdrv_graph_readable();
 
     /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
     if (!drv) {
@@ -3197,12 +3586,22 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
         return -EINVAL;
     }
 
-    old_size = bdrv_getlength(bs);
+    ret = bdrv_check_request(offset, 0, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    old_size = bdrv_co_getlength(bs);
     if (old_size < 0) {
         error_setg_errno(errp, -old_size, "Failed to get old image size");
         return old_size;
     }
 
+    if (bdrv_is_read_only(bs)) {
+        error_setg(errp, "Image is read-only");
+        return -EACCES;
+    }
+
     if (offset > old_size) {
         new_bytes = offset - old_size;
     } else {
@@ -3217,12 +3616,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
      * new area, we need to make sure that no write requests are made to it
      * concurrently or they might be overwritten by preallocation. */
     if (new_bytes) {
-        bdrv_mark_request_serialising(&req, 1);
-    }
-    if (bs->read_only) {
-        error_setg(errp, "Image is read-only");
-        ret = -EACCES;
-        goto out;
+        bdrv_make_request_serialising(&req, 1);
     }
     ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
                                     0);
@@ -3248,7 +3642,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
     if (new_bytes && backing) {
         int64_t backing_len;
 
-        backing_len = bdrv_getlength(backing->bs);
+        backing_len = bdrv_co_getlength(backing->bs);
         if (backing_len < 0) {
             ret = backing_len;
             error_setg_errno(errp, -ret, "Could not get backing file size");
@@ -3278,15 +3672,17 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
         goto out;
     }
 
-    ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
+    ret = bdrv_co_refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not refresh total sector count");
     } else {
         offset = bs->total_sectors * BDRV_SECTOR_SIZE;
     }
-    /* It's possible that truncation succeeded but refresh_total_sectors
+    /*
+     * It's possible that truncation succeeded but bdrv_refresh_total_sectors
      * failed, but the latter doesn't affect how we should finish the request.
-     * Pass 0 as the last parameter so that dirty bitmaps etc. are handled. */
+     * Pass 0 as the last parameter so that dirty bitmaps etc. are handled.
+     */
     bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
 
 out:
@@ -3295,3 +3691,92 @@ out:
 
     return ret;
 }
+
+void bdrv_cancel_in_flight(BlockDriverState *bs)
+{
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    if (!bs || !bs->drv) {
+        return;
+    }
+
+    if (bs->drv->bdrv_cancel_in_flight) {
+        bs->drv->bdrv_cancel_in_flight(bs);
+    }
+}
+
+int coroutine_fn
+bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes,
+                        QEMUIOVector *qiov, size_t qiov_offset)
+{
+    BlockDriverState *bs = child->bs;
+    BlockDriver *drv = bs->drv;
+    int ret;
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
+    if (!drv->bdrv_co_preadv_snapshot) {
+        return -ENOTSUP;
+    }
+
+    bdrv_inc_in_flight(bs);
+    ret = drv->bdrv_co_preadv_snapshot(bs, offset, bytes, qiov, qiov_offset);
+    bdrv_dec_in_flight(bs);
+
+    return ret;
+}
+
+int coroutine_fn
+bdrv_co_snapshot_block_status(BlockDriverState *bs,
+                              bool want_zero, int64_t offset, int64_t bytes,
+                              int64_t *pnum, int64_t *map,
+                              BlockDriverState **file)
+{
+    BlockDriver *drv = bs->drv;
+    int ret;
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
+    if (!drv->bdrv_co_snapshot_block_status) {
+        return -ENOTSUP;
+    }
+
+    bdrv_inc_in_flight(bs);
+    ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes,
+                                             pnum, map, file);
+    bdrv_dec_in_flight(bs);
+
+    return ret;
+}
+
+int coroutine_fn
+bdrv_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    BlockDriver *drv = bs->drv;
+    int ret;
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
+    if (!drv->bdrv_co_pdiscard_snapshot) {
+        return -ENOTSUP;
+    }
+
+    bdrv_inc_in_flight(bs);
+    ret = drv->bdrv_co_pdiscard_snapshot(bs, offset, bytes);
+    bdrv_dec_in_flight(bs);
+
+    return ret;
+}
index 00a3ee9fb852c82295aace92f71517afa45bf9be..7cdd00e9f167460c98e3029bc1f4eff9b7cc30e0 100644 (file)
  */
 #include "qemu/osdep.h"
 #include <liburing.h>
-#include "qemu-common.h"
 #include "block/aio.h"
 #include "qemu/queue.h"
 #include "block/block.h"
 #include "block/raw-aio.h"
 #include "qemu/coroutine.h"
+#include "qemu/defer-call.h"
 #include "qapi/error.h"
+#include "sysemu/block-backend.h"
 #include "trace.h"
 
+/* Only used for assertions.  */
+#include "qemu/coroutine_int.h"
+
 /* io_uring ring size */
 #define MAX_ENTRIES 128
 
@@ -39,7 +43,6 @@ typedef struct LuringAIOCB {
 } LuringAIOCB;
 
 typedef struct LuringQueue {
-    int plugged;
     unsigned int in_queue;
     unsigned int in_flight;
     bool blocked;
@@ -51,10 +54,9 @@ typedef struct LuringState {
 
     struct io_uring ring;
 
-    /* io queue for submit at batch.  Protected by AioContext lock. */
+    /* No locking required, only accessed from AioContext home thread */
     LuringQueue io_q;
 
-    /* I/O completion processing.  Only runs in I/O thread.  */
     QEMUBH *completion_bh;
 } LuringState;
 
@@ -73,12 +75,8 @@ static void luring_resubmit(LuringState *s, LuringAIOCB *luringcb)
 /**
  * luring_resubmit_short_read:
  *
- * Before Linux commit 9d93a3f5a0c ("io_uring: punt short reads to async
- * context") a buffered I/O request with the start of the file range in the
- * page cache could result in a short read.  Applications need to resubmit the
- * remaining read request.
- *
- * This is a slow path but recent kernels never take it.
+ * Short reads are rare but may occur. The remaining read request needs to be
+ * resubmitted.
  */
 static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
                                        int nread)
@@ -89,7 +87,7 @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
     trace_luring_resubmit_short_read(s, luringcb, nread);
 
     /* Update read position */
-    luringcb->total_read = nread;
+    luringcb->total_read += nread;
     remaining = luringcb->qiov->size - luringcb->total_read;
 
     /* Shorten qiov */
@@ -103,7 +101,7 @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
                       remaining);
 
     /* Update sqe */
-    luringcb->sqeq.off = nread;
+    luringcb->sqeq.off += nread;
     luringcb->sqeq.addr = (__u64)(uintptr_t)luringcb->resubmit_qiov.iov;
     luringcb->sqeq.len = luringcb->resubmit_qiov.niov;
 
@@ -127,6 +125,9 @@ static void luring_process_completions(LuringState *s)
 {
     struct io_uring_cqe *cqes;
     int total_bytes;
+
+    defer_call_begin();
+
     /*
      * Request completion callbacks can run the nested event loop.
      * Schedule ourselves so the nested event loop will "see" remaining
@@ -165,7 +166,21 @@ static void luring_process_completions(LuringState *s)
         total_bytes = ret + luringcb->total_read;
 
         if (ret < 0) {
-            if (ret == -EINTR) {
+            /*
+             * Only writev/readv/fsync requests on regular files or host block
+             * devices are submitted. Therefore -EAGAIN is not expected but it's
+             * known to happen sometimes with Linux SCSI. Submit again and hope
+             * the request completes successfully.
+             *
+             * For more information, see:
+             * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u
+             *
+             * If the code is changed to submit other types of requests in the
+             * future, then this workaround may need to be extended to deal with
+             * genuine -EAGAIN results that should not be resubmitted
+             * immediately.
+             */
+            if (ret == -EINTR || ret == -EAGAIN) {
                 luring_resubmit(s, luringcb);
                 continue;
             }
@@ -200,11 +215,15 @@ end:
          * eventually runs later. Coroutines cannot be entered recursively
          * so avoid doing that!
          */
+        assert(luringcb->co->ctx == s->aio_context);
         if (!qemu_coroutine_entered(luringcb->co)) {
             aio_co_wake(luringcb->co);
         }
     }
+
     qemu_bh_cancel(s->completion_bh);
+
+    defer_call_end();
 }
 
 static int ioq_submit(LuringState *s)
@@ -253,13 +272,11 @@ static int ioq_submit(LuringState *s)
 
 static void luring_process_completions_and_submit(LuringState *s)
 {
-    aio_context_acquire(s->aio_context);
     luring_process_completions(s);
 
-    if (!s->io_q.plugged && s->io_q.in_queue > 0) {
+    if (s->io_q.in_queue > 0) {
         ioq_submit(s);
     }
-    aio_context_release(s->aio_context);
 }
 
 static void qemu_luring_completion_bh(void *opaque)
@@ -278,36 +295,30 @@ static bool qemu_luring_poll_cb(void *opaque)
 {
     LuringState *s = opaque;
 
-    if (io_uring_cq_ready(&s->ring)) {
-        luring_process_completions_and_submit(s);
-        return true;
-    }
+    return io_uring_cq_ready(&s->ring);
+}
+
+static void qemu_luring_poll_ready(void *opaque)
+{
+    LuringState *s = opaque;
 
-    return false;
+    luring_process_completions_and_submit(s);
 }
 
 static void ioq_init(LuringQueue *io_q)
 {
     QSIMPLEQ_INIT(&io_q->submit_queue);
-    io_q->plugged = 0;
     io_q->in_queue = 0;
     io_q->in_flight = 0;
     io_q->blocked = false;
 }
 
-void luring_io_plug(BlockDriverState *bs, LuringState *s)
-{
-    trace_luring_io_plug(s);
-    s->io_q.plugged++;
-}
-
-void luring_io_unplug(BlockDriverState *bs, LuringState *s)
+static void luring_deferred_fn(void *opaque)
 {
-    assert(s->io_q.plugged);
-    trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged,
-                           s->io_q.in_queue, s->io_q.in_flight);
-    if (--s->io_q.plugged == 0 &&
-        !s->io_q.blocked && s->io_q.in_queue > 0) {
+    LuringState *s = opaque;
+    trace_luring_unplug_fn(s, s->io_q.blocked, s->io_q.in_queue,
+                           s->io_q.in_flight);
+    if (!s->io_q.blocked && s->io_q.in_queue > 0) {
         ioq_submit(s);
     }
 }
@@ -334,6 +345,10 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
         io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
                              luringcb->qiov->niov, offset);
         break;
+    case QEMU_AIO_ZONE_APPEND:
+        io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
+                             luringcb->qiov->niov, offset);
+        break;
     case QEMU_AIO_READ:
         io_uring_prep_readv(sqes, fd, luringcb->qiov->iov,
                             luringcb->qiov->niov, offset);
@@ -350,22 +365,26 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
 
     QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next);
     s->io_q.in_queue++;
-    trace_luring_do_submit(s, s->io_q.blocked, s->io_q.plugged,
-                           s->io_q.in_queue, s->io_q.in_flight);
-    if (!s->io_q.blocked &&
-        (!s->io_q.plugged ||
-         s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) {
-        ret = ioq_submit(s);
-        trace_luring_do_submit_done(s, ret);
-        return ret;
+    trace_luring_do_submit(s, s->io_q.blocked, s->io_q.in_queue,
+                           s->io_q.in_flight);
+    if (!s->io_q.blocked) {
+        if (s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES) {
+            ret = ioq_submit(s);
+            trace_luring_do_submit_done(s, ret);
+            return ret;
+        }
+
+        defer_call(luring_deferred_fn, s);
     }
     return 0;
 }
 
-int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
-                                  uint64_t offset, QEMUIOVector *qiov, int type)
+int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
+                                  QEMUIOVector *qiov, int type)
 {
     int ret;
+    AioContext *ctx = qemu_get_current_aio_context();
+    LuringState *s = aio_get_linux_io_uring(ctx);
     LuringAIOCB luringcb = {
         .co         = qemu_coroutine_self(),
         .ret        = -EINPROGRESS,
@@ -388,8 +407,8 @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
 
 void luring_detach_aio_context(LuringState *s, AioContext *old_context)
 {
-    aio_set_fd_handler(old_context, s->ring.ring_fd, false, NULL, NULL, NULL,
-                       s);
+    aio_set_fd_handler(old_context, s->ring.ring_fd,
+                       NULL, NULL, NULL, NULL, s);
     qemu_bh_delete(s->completion_bh);
     s->aio_context = NULL;
 }
@@ -398,8 +417,9 @@ void luring_attach_aio_context(LuringState *s, AioContext *new_context)
 {
     s->aio_context = new_context;
     s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s);
-    aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false,
-                       qemu_luring_completion_cb, NULL, qemu_luring_poll_cb, s);
+    aio_set_fd_handler(s->aio_context, s->ring.ring_fd,
+                       qemu_luring_completion_cb, NULL,
+                       qemu_luring_poll_cb, qemu_luring_poll_ready, s);
 }
 
 LuringState *luring_init(Error **errp)
index afaf8837d6c144f574794886b93b4351ce517dee..4f2da405e645e535450ea04872676f90a4103323 100644 (file)
@@ -68,3 +68,4 @@ static void iscsi_block_opts_init(void)
 }
 
 block_init(iscsi_block_opts_init);
+module_opts("iscsi");
index e30a7e3606b6e10495fa969c5c986af4cfb8e0ee..2ff14b74724d61a2d8325662b70c1b9729a8a187 100644 (file)
 #include <poll.h>
 #include <math.h>
 #include <arpa/inet.h>
-#include "qemu-common.h"
+#include "sysemu/sysemu.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qemu/bitops.h"
 #include "qemu/bitmap.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "block/qdict.h"
 #include "scsi/constants.h"
@@ -268,6 +269,7 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                 timer_mod(&iTask->retry_timer,
                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
                 iTask->do_retry = 1;
+                return;
             } else if (status == SCSI_STATUS_CHECK_CONDITION) {
                 int error = iscsi_translate_sense(&task->sense);
                 if (error == EAGAIN) {
@@ -290,7 +292,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
     }
 }
 
-static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
+static void coroutine_fn
+iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
 {
     *iTask = (struct IscsiTask) {
         .co         = qemu_coroutine_self(),
@@ -322,25 +325,23 @@ iscsi_aio_cancel(BlockAIOCB *blockacb)
     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
     IscsiLun *iscsilun = acb->iscsilun;
 
-    qemu_mutex_lock(&iscsilun->mutex);
+    WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) {
 
-    /* If it was cancelled or completed already, our work is done here */
-    if (acb->cancelled || acb->status != -EINPROGRESS) {
-        qemu_mutex_unlock(&iscsilun->mutex);
-        return;
-    }
+        /* If it was cancelled or completed already, our work is done here */
+        if (acb->cancelled || acb->status != -EINPROGRESS) {
+            return;
+        }
 
-    acb->cancelled = true;
+        acb->cancelled = true;
 
-    qemu_aio_ref(acb); /* released in iscsi_abort_task_cb() */
+        qemu_aio_ref(acb); /* released in iscsi_abort_task_cb() */
 
-    /* send a task mgmt call to the target to cancel the task on the target */
-    if (iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
-                                         iscsi_abort_task_cb, acb) < 0) {
-        qemu_aio_unref(acb); /* since iscsi_abort_task_cb() won't be called */
+        /* send a task mgmt call to the target to cancel the task on the target */
+        if (iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
+                                             iscsi_abort_task_cb, acb) < 0) {
+            qemu_aio_unref(acb); /* since iscsi_abort_task_cb() won't be called */
+        }
     }
-
-    qemu_mutex_unlock(&iscsilun->mutex);
 }
 
 static const AIOCBInfo iscsi_aiocb_info = {
@@ -362,10 +363,9 @@ iscsi_set_events(IscsiLun *iscsilun)
 
     if (ev != iscsilun->events) {
         aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
-                           false,
                            (ev & POLLIN) ? iscsi_process_read : NULL,
                            (ev & POLLOUT) ? iscsi_process_write : NULL,
-                           NULL,
+                           NULL, NULL,
                            iscsilun);
         iscsilun->events = ev;
     }
@@ -375,22 +375,22 @@ static void iscsi_timed_check_events(void *opaque)
 {
     IscsiLun *iscsilun = opaque;
 
-    qemu_mutex_lock(&iscsilun->mutex);
+    WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) {
+        /* check for timed out requests */
+        iscsi_service(iscsilun->iscsi, 0);
 
-    /* check for timed out requests */
-    iscsi_service(iscsilun->iscsi, 0);
+        if (iscsilun->request_timed_out) {
+            iscsilun->request_timed_out = false;
+            iscsi_reconnect(iscsilun->iscsi);
+        }
 
-    if (iscsilun->request_timed_out) {
-        iscsilun->request_timed_out = false;
-        iscsi_reconnect(iscsilun->iscsi);
+        /*
+         * newer versions of libiscsi may return zero events. Ensure we are
+         * able to return to service once this situation changes.
+         */
+        iscsi_set_events(iscsilun);
     }
 
-    /* newer versions of libiscsi may return zero events. Ensure we are able
-     * to return to service once this situation changes. */
-    iscsi_set_events(iscsilun);
-
-    qemu_mutex_unlock(&iscsilun->mutex);
-
     timer_mod(iscsilun->event_timer,
               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
 }
@@ -429,14 +429,14 @@ static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
 }
 
-static bool is_byte_request_lun_aligned(int64_t offset, int count,
+static bool is_byte_request_lun_aligned(int64_t offset, int64_t bytes,
                                         IscsiLun *iscsilun)
 {
-    if (offset % iscsilun->block_size || count % iscsilun->block_size) {
+    if (offset % iscsilun->block_size || bytes % iscsilun->block_size) {
         error_report("iSCSI misaligned request: "
                      "iscsilun->block_size %u, offset %" PRIi64
-                     ", count %d",
-                     iscsilun->block_size, offset, count);
+                     ", bytes %" PRIi64,
+                     iscsilun->block_size, offset, bytes);
         return false;
     }
     return true;
@@ -783,9 +783,6 @@ retry:
         iscsi_allocmap_set_allocated(iscsilun, offset, *pnum);
     }
 
-    if (*pnum > bytes) {
-        *pnum = bytes;
-    }
 out_unlock:
     qemu_mutex_unlock(&iscsilun->mutex);
     g_free(iTask.err_str);
@@ -1061,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
         return NULL;
     }
 
+    /* Must use malloc(): this is freed via scsi_free_scsi_task() */
     acb->task = malloc(sizeof(struct scsi_task));
     if (acb->task == NULL) {
         error_report("iSCSI: Failed to allocate task for scsi command. %s",
@@ -1130,8 +1128,8 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
 
 #endif
 
-static int64_t
-iscsi_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn
+iscsi_co_getlength(BlockDriverState *bs)
 {
     IscsiLun *iscsilun = bs->opaque;
     int64_t len;
@@ -1143,7 +1141,8 @@ iscsi_getlength(BlockDriverState *bs)
 }
 
 static int
-coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset,
+                               int64_t bytes)
 {
     IscsiLun *iscsilun = bs->opaque;
     struct IscsiTask iTask;
@@ -1159,6 +1158,12 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
         return 0;
     }
 
+    /*
+     * We don't want to overflow list.num which is uint32_t.
+     * We rely on our max_pdiscard.
+     */
+    assert(bytes / iscsilun->block_size <= UINT32_MAX);
+
     list.lba = offset / iscsilun->block_size;
     list.num = bytes / iscsilun->block_size;
 
@@ -1207,12 +1212,12 @@ out_unlock:
 
 static int
 coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                    int bytes, BdrvRequestFlags flags)
+                                    int64_t bytes, BdrvRequestFlags flags)
 {
     IscsiLun *iscsilun = bs->opaque;
     struct IscsiTask iTask;
     uint64_t lba;
-    uint32_t nb_blocks;
+    uint64_t nb_blocks;
     bool use_16_for_ws = iscsilun->use_16_for_rw;
     int r = 0;
 
@@ -1252,11 +1257,21 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
     iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
     if (use_16_for_ws) {
+        /*
+         * iscsi_writesame16_task num_blocks argument is uint32_t. We rely here
+         * on our max_pwrite_zeroes limit.
+         */
+        assert(nb_blocks <= UINT32_MAX);
         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
                                             iscsilun->zeroblock, iscsilun->block_size,
                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
                                             0, 0, iscsi_co_generic_cb, &iTask);
     } else {
+        /*
+         * iscsi_writesame10_task num_blocks argument is uint16_t. We rely here
+         * on our max_pwrite_zeroes limit.
+         */
+        assert(nb_blocks <= UINT16_MAX);
         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
                                             iscsilun->zeroblock, iscsilun->block_size,
                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
@@ -1339,6 +1354,9 @@ static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
     } else if (!password) {
         error_setg(errp, "CHAP username specified but no password was given");
         return;
+    } else {
+        warn_report("iSCSI block driver 'password' option is deprecated, "
+                    "use 'password-secret' instead");
     }
 
     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
@@ -1522,16 +1540,14 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
     IscsiLun *iscsilun = bs->opaque;
 
     aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
-                       false, NULL, NULL, NULL, NULL);
+                       NULL, NULL, NULL, NULL, NULL);
     iscsilun->events = 0;
 
     if (iscsilun->nop_timer) {
-        timer_del(iscsilun->nop_timer);
         timer_free(iscsilun->nop_timer);
         iscsilun->nop_timer = NULL;
     }
     if (iscsilun->event_timer) {
-        timer_del(iscsilun->event_timer);
         timer_free(iscsilun->event_timer);
         iscsilun->event_timer = NULL;
     }
@@ -1909,7 +1925,9 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     /* Check the write protect flag of the LUN if we want to write */
     if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
         iscsilun->write_protected) {
+        bdrv_graph_rdlock_main_loop();
         ret = bdrv_apply_auto_read_only(bs, "LUN is write protected", errp);
+        bdrv_graph_rdunlock_main_loop();
         if (ret < 0) {
             goto out;
         }
@@ -2055,7 +2073,7 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
     uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
     unsigned int block_size = MAX(BDRV_SECTOR_SIZE, iscsilun->block_size);
 
-    assert(iscsilun->block_size >= BDRV_SECTOR_SIZE || bs->sg);
+    assert(iscsilun->block_size >= BDRV_SECTOR_SIZE || bdrv_is_sg(bs));
 
     bs->bl.request_alignment = block_size;
 
@@ -2068,20 +2086,19 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
     }
 
     if (iscsilun->lbp.lbpu) {
-        if (iscsilun->bl.max_unmap < 0xffffffff / block_size) {
-            bs->bl.max_pdiscard =
-                iscsilun->bl.max_unmap * iscsilun->block_size;
-        }
+        bs->bl.max_pdiscard =
+            MIN_NON_ZERO(iscsilun->bl.max_unmap * iscsilun->block_size,
+                         (uint64_t)UINT32_MAX * iscsilun->block_size);
         bs->bl.pdiscard_alignment =
             iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
     } else {
         bs->bl.pdiscard_alignment = iscsilun->block_size;
     }
 
-    if (iscsilun->bl.max_ws_len < 0xffffffff / block_size) {
-        bs->bl.max_pwrite_zeroes =
-            iscsilun->bl.max_ws_len * iscsilun->block_size;
-    }
+    bs->bl.max_pwrite_zeroes =
+        MIN_NON_ZERO(iscsilun->bl.max_ws_len * iscsilun->block_size,
+                     max_xfer_len * iscsilun->block_size);
+
     if (iscsilun->lbp.lbpws) {
         bs->bl.pwrite_zeroes_alignment =
             iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
@@ -2144,7 +2161,7 @@ static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset,
         return -EIO;
     }
 
-    cur_length = iscsi_getlength(bs);
+    cur_length = iscsi_co_getlength(bs);
     if (offset != cur_length && exact) {
         error_setg(errp, "Cannot resize iSCSI devices");
         return -ENOTSUP;
@@ -2160,7 +2177,8 @@ static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset,
     return 0;
 }
 
-static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int coroutine_fn
+iscsi_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     IscsiLun *iscsilun = bs->opaque;
     bdi->cluster_size = iscsilun->cluster_size;
@@ -2174,14 +2192,12 @@ static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,
     iscsi_allocmap_invalidate(iscsilun);
 }
 
-static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs,
-                                                 BdrvChild *src,
-                                                 uint64_t src_offset,
-                                                 BdrvChild *dst,
-                                                 uint64_t dst_offset,
-                                                 uint64_t bytes,
-                                                 BdrvRequestFlags read_flags,
-                                                 BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+iscsi_co_copy_range_from(BlockDriverState *bs,
+                         BdrvChild *src, int64_t src_offset,
+                         BdrvChild *dst, int64_t dst_offset,
+                         int64_t bytes, BdrvRequestFlags read_flags,
+                         BdrvRequestFlags write_flags)
 {
     return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
                                  read_flags, write_flags);
@@ -2315,14 +2331,12 @@ static void iscsi_xcopy_data(struct iscsi_data *data,
                               src_lba, dst_lba);
 }
 
-static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs,
-                                               BdrvChild *src,
-                                               uint64_t src_offset,
-                                               BdrvChild *dst,
-                                               uint64_t dst_offset,
-                                               uint64_t bytes,
-                                               BdrvRequestFlags read_flags,
-                                               BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+iscsi_co_copy_range_to(BlockDriverState *bs,
+                       BdrvChild *src, int64_t src_offset,
+                       BdrvChild *dst, int64_t dst_offset,
+                       int64_t bytes, BdrvRequestFlags read_flags,
+                       BdrvRequestFlags write_flags)
 {
     IscsiLun *dst_lun = dst->bs->opaque;
     IscsiLun *src_lun;
@@ -2423,8 +2437,8 @@ static BlockDriver bdrv_iscsi = {
     .bdrv_reopen_commit     = iscsi_reopen_commit,
     .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache,
 
-    .bdrv_getlength  = iscsi_getlength,
-    .bdrv_get_info   = iscsi_get_info,
+    .bdrv_co_getlength   = iscsi_co_getlength,
+    .bdrv_co_get_info    = iscsi_co_get_info,
     .bdrv_co_truncate    = iscsi_co_truncate,
     .bdrv_refresh_limits = iscsi_refresh_limits,
 
@@ -2462,8 +2476,8 @@ static BlockDriver bdrv_iser = {
     .bdrv_reopen_commit     = iscsi_reopen_commit,
     .bdrv_co_invalidate_cache  = iscsi_co_invalidate_cache,
 
-    .bdrv_getlength  = iscsi_getlength,
-    .bdrv_get_info   = iscsi_get_info,
+    .bdrv_co_getlength   = iscsi_co_getlength,
+    .bdrv_co_get_info    = iscsi_co_get_info,
     .bdrv_co_truncate    = iscsi_co_truncate,
     .bdrv_refresh_limits = iscsi_refresh_limits,
 
index 3c0527c2bf8970960bd66077d6ba81da6a7cc85c..ec05d946f312bb90f13c4fb6b7b8a8159a34a9e3 100644 (file)
 #include "block/raw-aio.h"
 #include "qemu/event_notifier.h"
 #include "qemu/coroutine.h"
+#include "qemu/defer-call.h"
 #include "qapi/error.h"
+#include "sysemu/block-backend.h"
+
+/* Only used for assertions.  */
+#include "qemu/coroutine_int.h"
 
 #include <libaio.h>
 
@@ -28,6 +33,9 @@
  */
 #define MAX_EVENTS 1024
 
+/* Maximum number of requests in a batch. (default value) */
+#define DEFAULT_MAX_BATCH 32
+
 struct qemu_laiocb {
     Coroutine *co;
     LinuxAioState *ctx;
@@ -40,7 +48,6 @@ struct qemu_laiocb {
 };
 
 typedef struct {
-    int plugged;
     unsigned int in_queue;
     unsigned int in_flight;
     bool blocked;
@@ -53,10 +60,8 @@ struct LinuxAioState {
     io_context_t ctx;
     EventNotifier e;
 
-    /* io queue for submit at batch.  Protected by AioContext lock. */
+    /* No locking required, only accessed from AioContext home thread */
     LaioQueue io_q;
-
-    /* I/O completion processing.  Only runs in I/O thread.  */
     QEMUBH *completion_bh;
     int event_idx;
     int event_max;
@@ -99,6 +104,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
      * later.  Coroutines cannot be entered recursively so avoid doing
      * that!
      */
+    assert(laiocb->co->ctx == laiocb->ctx->aio_context);
     if (!qemu_coroutine_entered(laiocb->co)) {
         aio_co_wake(laiocb->co);
     }
@@ -199,6 +205,8 @@ static void qemu_laio_process_completions(LinuxAioState *s)
 {
     struct io_event *events;
 
+    defer_call_begin();
+
     /* Reschedule so nested event loops see currently pending completions */
     qemu_bh_schedule(s->completion_bh);
 
@@ -222,20 +230,20 @@ static void qemu_laio_process_completions(LinuxAioState *s)
 
     /* If we are nested we have to notify the level above that we are done
      * by setting event_max to zero, upper level will then jump out of it's
-     * own `for` loop.  If we are the last all counters droped to zero. */
+     * own `for` loop.  If we are the last all counters dropped to zero. */
     s->event_max = 0;
     s->event_idx = 0;
+
+    defer_call_end();
 }
 
 static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
 {
-    aio_context_acquire(s->aio_context);
     qemu_laio_process_completions(s);
 
-    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+    if (!QSIMPLEQ_EMPTY(&s->io_q.pending)) {
         ioq_submit(s);
     }
-    aio_context_release(s->aio_context);
 }
 
 static void qemu_laio_completion_bh(void *opaque)
@@ -260,18 +268,20 @@ static bool qemu_laio_poll_cb(void *opaque)
     LinuxAioState *s = container_of(e, LinuxAioState, e);
     struct io_event *events;
 
-    if (!io_getevents_peek(s->ctx, &events)) {
-        return false;
-    }
+    return io_getevents_peek(s->ctx, &events);
+}
+
+static void qemu_laio_poll_ready(EventNotifier *opaque)
+{
+    EventNotifier *e = opaque;
+    LinuxAioState *s = container_of(e, LinuxAioState, e);
 
     qemu_laio_process_completions_and_submit(s);
-    return true;
 }
 
 static void ioq_init(LaioQueue *io_q)
 {
     QSIMPLEQ_INIT(&io_q->pending);
-    io_q->plugged = 0;
     io_q->in_queue = 0;
     io_q->in_flight = 0;
     io_q->blocked = false;
@@ -331,22 +341,34 @@ static void ioq_submit(LinuxAioState *s)
     }
 }
 
-void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
+static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch)
 {
-    s->io_q.plugged++;
+    uint64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;
+
+    /*
+     * AIO context can be shared between multiple block devices, so
+     * `dev_max_batch` allows reducing the batch size for latency-sensitive
+     * devices.
+     */
+    max_batch = MIN_NON_ZERO(dev_max_batch, max_batch);
+
+    /* limit the batch with the number of available events */
+    max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch);
+
+    return max_batch;
 }
 
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
+static void laio_deferred_fn(void *opaque)
 {
-    assert(s->io_q.plugged);
-    if (--s->io_q.plugged == 0 &&
-        !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+    LinuxAioState *s = opaque;
+
+    if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
         ioq_submit(s);
     }
 }
 
 static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
-                          int type)
+                          int type, uint64_t dev_max_batch)
 {
     LinuxAioState *s = laiocb->ctx;
     struct iocb *iocbs = &laiocb->iocb;
@@ -356,6 +378,9 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
     case QEMU_AIO_WRITE:
         io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
         break;
+    case QEMU_AIO_ZONE_APPEND:
+        io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
+        break;
     case QEMU_AIO_READ:
         io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
         break;
@@ -369,29 +394,32 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
 
     QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
     s->io_q.in_queue++;
-    if (!s->io_q.blocked &&
-        (!s->io_q.plugged ||
-         s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) {
-        ioq_submit(s);
+    if (!s->io_q.blocked) {
+        if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch)) {
+            ioq_submit(s);
+        } else {
+            defer_call(laio_deferred_fn, s);
+        }
     }
 
     return 0;
 }
 
-int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-                                uint64_t offset, QEMUIOVector *qiov, int type)
+int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
+                                int type, uint64_t dev_max_batch)
 {
     int ret;
+    AioContext *ctx = qemu_get_current_aio_context();
     struct qemu_laiocb laiocb = {
         .co         = qemu_coroutine_self(),
         .nbytes     = qiov->size,
-        .ctx        = s,
+        .ctx        = aio_get_linux_aio(ctx),
         .ret        = -EINPROGRESS,
         .is_read    = (type == QEMU_AIO_READ),
         .qiov       = qiov,
     };
 
-    ret = laio_do_submit(fd, &laiocb, offset, type);
+    ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch);
     if (ret < 0) {
         return ret;
     }
@@ -404,7 +432,7 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
 
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &s->e, false, NULL, NULL);
+    aio_set_event_notifier(old_context, &s->e, NULL, NULL, NULL);
     qemu_bh_delete(s->completion_bh);
     s->aio_context = NULL;
 }
@@ -413,9 +441,10 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
 {
     s->aio_context = new_context;
     s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
-    aio_set_event_notifier(new_context, &s->e, false,
+    aio_set_event_notifier(new_context, &s->e,
                            qemu_laio_completion_cb,
-                           qemu_laio_poll_cb);
+                           qemu_laio_poll_cb,
+                           qemu_laio_poll_ready);
 }
 
 LinuxAioState *laio_init(Error **errp)
@@ -426,7 +455,7 @@ LinuxAioState *laio_init(Error **errp)
     s = g_malloc0(sizeof(*s));
     rc = event_notifier_init(&s->e, false);
     if (rc < 0) {
-        error_setg_errno(errp, -rc, "failed to to initialize event notifier");
+        error_setg_errno(errp, -rc, "failed to initialize event notifier");
         goto out_free_state;
     }
 
index 5dcc1e5cceb0ca05e382ebc265d3bac1ecf8b9a2..1ac0c68f94a26e19130e5826bc5c5f5de137868e 100644 (file)
@@ -4,96 +4,129 @@ block_ss.add(files(
   'aio_task.c',
   'amend.c',
   'backup.c',
-  'backup-top.c',
   'blkdebug.c',
   'blklogwrites.c',
   'blkverify.c',
   'block-backend.c',
   'block-copy.c',
   'commit.c',
+  'copy-before-write.c',
   'copy-on-read.c',
   'create.c',
   'crypto.c',
   'dirty-bitmap.c',
   'filter-compress.c',
+  'graph-lock.c',
   'io.c',
   'mirror.c',
   'nbd.c',
   'null.c',
+  'preallocate.c',
+  'progress_meter.c',
   'qapi.c',
+  'qcow2.c',
   'qcow2-bitmap.c',
   'qcow2-cache.c',
   'qcow2-cluster.c',
   'qcow2-refcount.c',
   'qcow2-snapshot.c',
   'qcow2-threads.c',
-  'qcow2.c',
   'quorum.c',
   'raw-format.c',
+  'reqlist.c',
   'snapshot.c',
-  'throttle-groups.c',
+  'snapshot-access.c',
   'throttle.c',
-  'vhdx-endian.c',
-  'vhdx-log.c',
-  'vhdx.c',
-  'vmdk.c',
-  'vpc.c',
+  'throttle-groups.c',
   'write-threshold.c',
-), zstd, zlib)
+), zstd, zlib, gnutls)
 
-softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
+system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
+system_ss.add(files('block-ram-registrar.c'))
+
+if get_option('qcow1').allowed()
+  block_ss.add(files('qcow.c'))
+endif
+if get_option('vdi').allowed()
+  block_ss.add(files('vdi.c'))
+endif
+if get_option('vhdx').allowed()
+  block_ss.add(files(
+    'vhdx-endian.c',
+    'vhdx-log.c',
+    'vhdx.c'
+  ))
+endif
+if get_option('vmdk').allowed()
+  block_ss.add(files('vmdk.c'))
+endif
+if get_option('vpc').allowed()
+  block_ss.add(files('vpc.c'))
+endif
+if get_option('cloop').allowed()
+  block_ss.add(files('cloop.c'))
+endif
+if get_option('bochs').allowed()
+  block_ss.add(files('bochs.c'))
+endif
+if get_option('vvfat').allowed()
+  block_ss.add(files('vvfat.c'))
+endif
+if get_option('dmg').allowed()
+  block_ss.add(files('dmg.c'))
+endif
+if get_option('qed').allowed()
+  block_ss.add(files(
+    'qed-check.c',
+    'qed-cluster.c',
+    'qed-l2-cache.c',
+    'qed-table.c',
+    'qed.c',
+  ))
+endif
+if get_option('parallels').allowed()
+  block_ss.add(files('parallels.c', 'parallels-ext.c'))
+endif
 
-block_ss.add(when: 'CONFIG_QCOW1', if_true: files('qcow.c'))
-block_ss.add(when: 'CONFIG_VDI', if_true: files('vdi.c'))
-block_ss.add(when: 'CONFIG_CLOOP', if_true: files('cloop.c'))
-block_ss.add(when: 'CONFIG_BOCHS', if_true: files('bochs.c'))
-block_ss.add(when: 'CONFIG_VVFAT', if_true: files('vvfat.c'))
-block_ss.add(when: 'CONFIG_DMG', if_true: files('dmg.c'))
-block_ss.add(when: 'CONFIG_QED', if_true: files(
-  'qed-check.c',
-  'qed-cluster.c',
-  'qed-l2-cache.c',
-  'qed-table.c',
-  'qed.c',
-))
-block_ss.add(when: [libxml2, 'CONFIG_PARALLELS'], if_true: files('parallels.c'))
 block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c'))
 block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit])
-block_ss.add(when: 'CONFIG_LIBISCSI', if_true: files('iscsi-opts.c'))
+block_ss.add(when: libiscsi, if_true: files('iscsi-opts.c'))
 block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
-block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c'))
-block_ss.add(when: 'CONFIG_SHEEPDOG', if_true: files('sheepdog.c'))
-block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c'))
-block_ss.add(when: ['CONFIG_LINUX_IO_URING', linux_io_uring], if_true: files('io_uring.c'))
+if get_option('replication').allowed()
+  block_ss.add(files('replication.c'))
+endif
+block_ss.add(when: libaio, if_true: files('linux-aio.c'))
+block_ss.add(when: linux_io_uring, if_true: files('io_uring.c'))
 
 block_modules = {}
 
 modsrc = []
 foreach m : [
-  ['CONFIG_CURL', 'curl', [curl, glib], 'curl.c'],
-  ['CONFIG_GLUSTERFS', 'gluster', glusterfs, 'gluster.c'],
-  ['CONFIG_LIBISCSI', 'iscsi', libiscsi, 'iscsi.c'],
-  ['CONFIG_LIBNFS', 'nfs', libnfs, 'nfs.c'],
-  ['CONFIG_LIBSSH', 'ssh', libssh, 'ssh.c'],
-  ['CONFIG_RBD', 'rbd', rbd, 'rbd.c'],
+  [blkio, 'blkio', files('blkio.c')],
+  [curl, 'curl', files('curl.c')],
+  [glusterfs, 'gluster', files('gluster.c')],
+  [libiscsi, 'iscsi', [files('iscsi.c'), libm]],
+  [libnfs, 'nfs', files('nfs.c')],
+  [libssh, 'ssh', files('ssh.c')],
+  [rbd, 'rbd', files('rbd.c')],
 ]
-  if config_host.has_key(m[0])
+  if m[0].found()
+    module_ss = ss.source_set()
+    module_ss.add(when: m[0], if_true: m[2])
     if enable_modules
-      modsrc += files(m[3])
+      modsrc += module_ss.all_sources()
     endif
-    module_ss = ss.source_set()
-    module_ss.add(when: m[2], if_true: files(m[3]))
     block_modules += {m[1] : module_ss}
   endif
 endforeach
 
 # those are not exactly regular block modules, so treat them apart
-if 'CONFIG_DMG' in config_host
+if get_option('dmg').allowed()
   foreach m : [
-    ['CONFIG_LZFSE', 'dmg-lzfse', liblzfse, 'dmg-lzfse.c'],
-    ['CONFIG_BZIP2', 'dmg-bz2', [glib, libbzip2], 'dmg-bz2.c']
+    [liblzfse, 'dmg-lzfse', liblzfse, 'dmg-lzfse.c'],
+    [libbzip2, 'dmg-bz2', [glib, libbzip2], 'dmg-bz2.c']
   ]
-    if config_host.has_key(m[0])
+    if m[0].found()
       module_ss = ss.source_set()
       module_ss.add(when: m[2], if_true: files(m[3]))
       block_modules += {m[1] : module_ss}
@@ -111,14 +144,22 @@ block_ss.add(module_block_h)
 wrapper_py = find_program('../scripts/block-coroutine-wrapper.py')
 block_gen_c = custom_target('block-gen.c',
                             output: 'block-gen.c',
-                            input: files('../include/block/block.h',
-                                         'coroutines.h'),
+                            input: files(
+                                      '../include/block/block-io.h',
+                                      '../include/block/dirty-bitmap.h',
+                                      '../include/block/block_int-io.h',
+                                      '../include/block/block-global-state.h',
+                                      '../include/block/qapi.h',
+                                      '../include/sysemu/block-backend-global-state.h',
+                                      '../include/sysemu/block-backend-io.h',
+                                      'coroutines.h'
+                                      ),
                             command: [wrapper_py, '@OUTPUT@', '@INPUT@'])
 block_ss.add(block_gen_c)
 
 block_ss.add(files('stream.c'))
 
-softmmu_ss.add(files('qapi-sysemu.c'))
+system_ss.add(files('qapi-sysemu.c'))
 
 subdir('export')
 subdir('monitor')
index 8e1ad6eceb57bd5943d62b7d016a1990010ad230..cd9d3ad4a8065ac18528f1ac07a25093d7b496cb 100644 (file)
 #include "trace.h"
 #include "block/blockjob_int.h"
 #include "block/block_int.h"
+#include "block/dirty-bitmap.h"
 #include "sysemu/block-backend.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
+#include "qemu/memalign.h"
 
 #define MAX_IN_FLIGHT 16
 #define MAX_IO_BYTES (1 << 20) /* 1 Mb */
@@ -54,11 +55,18 @@ typedef struct MirrorBlockJob {
     BlockMirrorBackingMode backing_mode;
     /* Whether the target image requires explicit zero-initialization */
     bool zero_target;
+    /*
+     * To be accesssed with atomics. Written only under the BQL (required by the
+     * current implementation of mirror_change()).
+     */
     MirrorCopyMode copy_mode;
     BlockdevOnError on_source_error, on_target_error;
-    bool synced;
-    /* Set when the target is synced (dirty bitmap is clean, nothing
-     * in flight) and the job is running in active mode */
+    /*
+     * To be accessed with atomics.
+     *
+     * Set when the target is synced (dirty bitmap is clean, nothing in flight)
+     * and the job is running in active mode.
+     */
     bool actively_synced;
     bool should_complete;
     int64_t granularity;
@@ -73,7 +81,7 @@ typedef struct MirrorBlockJob {
 
     uint64_t last_pause_ns;
     unsigned long *in_flight_bitmap;
-    int in_flight;
+    unsigned in_flight;
     int64_t bytes_in_flight;
     QTAILQ_HEAD(, MirrorOp) ops_in_flight;
     int ret;
@@ -82,6 +90,7 @@ typedef struct MirrorBlockJob {
     int max_iov;
     bool initial_zeroing_ongoing;
     int in_active_write_counter;
+    int64_t active_write_bytes_in_flight;
     bool prepared;
     bool in_drain;
 } MirrorBlockJob;
@@ -89,6 +98,7 @@ typedef struct MirrorBlockJob {
 typedef struct MirrorBDSOpaque {
     MirrorBlockJob *job;
     bool stop;
+    bool is_commit;
 } MirrorBDSOpaque;
 
 struct MirrorOp {
@@ -106,6 +116,7 @@ struct MirrorOp {
     bool is_in_flight;
     CoQueue waiting_requests;
     Coroutine *co;
+    MirrorOp *waiting_for_op;
 
     QTAILQ_ENTRY(MirrorOp) next;
 };
@@ -119,8 +130,7 @@ typedef enum MirrorMethod {
 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
                                             int error)
 {
-    s->synced = false;
-    s->actively_synced = false;
+    qatomic_set(&s->actively_synced, false);
     if (read) {
         return block_job_error_action(&s->common, s->on_source_error,
                                       true, error);
@@ -158,7 +168,25 @@ static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
             if (ranges_overlap(self_start_chunk, self_nb_chunks,
                                op_start_chunk, op_nb_chunks))
             {
+                if (self) {
+                    /*
+                     * If the operation is already (indirectly) waiting for us,
+                     * or will wait for us as soon as it wakes up, then just go
+                     * on (instead of producing a deadlock in the former case).
+                     */
+                    if (op->waiting_for_op) {
+                        continue;
+                    }
+
+                    self->waiting_for_op = op;
+                }
+
                 qemu_co_queue_wait(&op->waiting_requests, NULL);
+
+                if (self) {
+                    self->waiting_for_op = NULL;
+                }
+
                 break;
             }
         }
@@ -250,8 +278,8 @@ static inline int64_t mirror_clip_bytes(MirrorBlockJob *s,
 
 /* Round offset and/or bytes to target cluster if COW is needed, and
  * return the offset of the adjusted tail against original. */
-static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
-                            uint64_t *bytes)
+static int coroutine_fn mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
+                                         uint64_t *bytes)
 {
     bool need_cow;
     int ret = 0;
@@ -263,8 +291,8 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
     need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
                           s->cow_bitmap);
     if (need_cow) {
-        bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes,
-                               &align_offset, &align_bytes);
+        bdrv_round_to_subclusters(blk_bs(s->target), *offset, *bytes,
+                                  &align_offset, &align_bytes);
     }
 
     if (align_bytes > max_bytes) {
@@ -285,19 +313,21 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
 }
 
 static inline void coroutine_fn
-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
 {
     MirrorOp *op;
 
     QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
-        /* Do not wait on pseudo ops, because it may in turn wait on
+        /*
+         * Do not wait on pseudo ops, because it may in turn wait on
          * some other operation to start, which may in fact be the
          * caller of this function.  Since there is only one pseudo op
          * at any given time, we will always find some real operation
-         * to wait on. */
-        if (!op->is_pseudo_op && op->is_in_flight &&
-            op->is_active_write == active)
-        {
+         * to wait on.
+         * Also, do not wait on active operations, because they do not
+         * use up in-flight slots.
+         */
+        if (!op->is_pseudo_op && op->is_in_flight && !op->is_active_write) {
             qemu_co_queue_wait(&op->waiting_requests, NULL);
             return;
         }
@@ -305,13 +335,6 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
     abort();
 }
 
-static inline void coroutine_fn
-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
-{
-    /* Only non-active operations use up in-flight slots */
-    mirror_wait_for_any_operation(s, false);
-}
-
 /* Perform a mirror copy operation.
  *
  * *op->bytes_handled is set to the number of bytes copied after and
@@ -374,8 +397,10 @@ static void coroutine_fn mirror_co_read(void *opaque)
     op->is_in_flight = true;
     trace_mirror_one_iteration(s, op->offset, op->bytes);
 
-    ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes,
-                         &op->qiov, 0);
+    WITH_GRAPH_RDLOCK_GUARD() {
+        ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes,
+                             &op->qiov, 0);
+    }
     mirror_read_complete(op, ret);
 }
 
@@ -454,12 +479,11 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
     return bytes_handled;
 }
 
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s)
 {
     BlockDriverState *source = s->mirror_top_bs->backing->bs;
     MirrorOp *pseudo_op;
     int64_t offset;
-    uint64_t delay_ns = 0, ret = 0;
     /* At least the first dirty chunk is mirrored in one iteration. */
     int nb_chunks = 1;
     bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
@@ -475,11 +499,18 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     }
     bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
 
+    /*
+     * Wait for concurrent requests to @offset.  The next loop will limit the
+     * copied area based on in_flight_bitmap so we only copy an area that does
+     * not overlap with concurrent in-flight requests.  Still, we would like to
+     * copy something, so wait until there are at least no more requests to the
+     * very beginning of the area.
+     */
     mirror_wait_on_conflicts(NULL, s, offset, 1);
 
     job_pause_point(&s->common.job);
 
-    /* Find the number of consective dirty chunks following the first dirty
+    /* Find the number of consecutive dirty chunks following the first dirty
      * one, and wait for in flight requests in them. */
     bdrv_dirty_bitmap_lock(s->dirty_bitmap);
     while (nb_chunks * s->granularity < s->buf_size) {
@@ -535,9 +566,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         MirrorMethod mirror_method = MIRROR_METHOD_COPY;
 
         assert(!(offset % s->granularity));
-        ret = bdrv_block_status_above(source, NULL, offset,
-                                      nb_chunks * s->granularity,
-                                      &io_bytes, NULL, NULL);
+        WITH_GRAPH_RDLOCK_GUARD() {
+            ret = bdrv_co_block_status_above(source, NULL, offset,
+                                             nb_chunks * s->granularity,
+                                             &io_bytes, NULL, NULL);
+        }
         if (ret < 0) {
             io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes);
         } else if (ret & BDRV_BLOCK_DATA) {
@@ -550,8 +583,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
             int64_t target_offset;
             int64_t target_bytes;
-            bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
-                                   &target_offset, &target_bytes);
+            WITH_GRAPH_RDLOCK_GUARD() {
+                bdrv_round_to_subclusters(blk_bs(s->target), offset, io_bytes,
+                                          &target_offset, &target_bytes);
+            }
             if (target_offset == offset &&
                 target_bytes == io_bytes) {
                 mirror_method = ret & BDRV_BLOCK_ZERO ?
@@ -580,16 +615,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         assert(io_bytes);
         offset += io_bytes;
         nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
-        delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct);
+        block_job_ratelimit_processed_bytes(&s->common, io_bytes_acct);
     }
 
-    ret = delay_ns;
 fail:
     QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next);
     qemu_co_queue_restart_all(&pseudo_op->waiting_requests);
     g_free(pseudo_op);
-
-    return ret;
 }
 
 static void mirror_free_init(MirrorBlockJob *s)
@@ -638,11 +670,16 @@ static int mirror_exit_common(Job *job)
     bool abort = job->ret < 0;
     int ret = 0;
 
+    GLOBAL_STATE_CODE();
+
     if (s->prepared) {
         return 0;
     }
     s->prepared = true;
 
+    aio_context_acquire(qemu_get_aio_context());
+    bdrv_graph_rdlock_main_loop();
+
     mirror_top_bs = s->mirror_top_bs;
     bs_opaque = mirror_top_bs->opaque;
     src = mirror_top_bs->backing->bs;
@@ -660,6 +697,8 @@ static int mirror_exit_common(Job *job)
     bdrv_ref(mirror_top_bs);
     bdrv_ref(target_bs);
 
+    bdrv_graph_rdunlock_main_loop();
+
     /*
      * Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
      * inserting target_bs at s->to_replace, where we might not be able to get
@@ -673,9 +712,13 @@ static int mirror_exit_common(Job *job)
      * these permissions any more means that we can't allow any new requests on
      * mirror_top_bs from now on, so keep it drained. */
     bdrv_drained_begin(mirror_top_bs);
+    bdrv_drained_begin(target_bs);
     bs_opaque->stop = true;
+
+    bdrv_graph_rdlock_main_loop();
     bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
                              &error_abort);
+
     if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
         BlockDriverState *backing = s->is_none_mode ? src : s->base;
         BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
@@ -688,7 +731,16 @@ static int mirror_exit_common(Job *job)
                 ret = -EPERM;
             }
         }
+    } else if (!abort && s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
+        assert(!bdrv_backing_chain_next(target_bs));
+        ret = bdrv_open_backing_file(bdrv_skip_filters(target_bs), NULL,
+                                     "backing", &local_err);
+        if (ret < 0) {
+            error_report_err(local_err);
+            local_err = NULL;
+        }
     }
+    bdrv_graph_rdunlock_main_loop();
 
     if (s->to_replace) {
         replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -706,12 +758,13 @@ static int mirror_exit_common(Job *job)
         /* The mirror job has no requests in flight any more, but we need to
          * drain potential other users of the BDS before changing the graph. */
         assert(s->in_drain);
-        bdrv_drained_begin(target_bs);
+        bdrv_drained_begin(to_replace);
         /*
          * Cannot use check_to_replace_node() here, because that would
          * check for an op blocker on @to_replace, and we have our own
          * there.
          */
+        bdrv_graph_wrlock(target_bs);
         if (bdrv_recurse_can_replace(src, to_replace)) {
             bdrv_replace_node(to_replace, target_bs, &local_err);
         } else {
@@ -720,7 +773,8 @@ static int mirror_exit_common(Job *job)
                        "would not lead to an abrupt change of visible data",
                        to_replace->node_name, target_bs->node_name);
         }
-        bdrv_drained_end(target_bs);
+        bdrv_graph_wrunlock(target_bs);
+        bdrv_drained_end(to_replace);
         if (local_err) {
             error_report_err(local_err);
             ret = -EPERM;
@@ -735,7 +789,6 @@ static int mirror_exit_common(Job *job)
         aio_context_release(replace_aio_context);
     }
     g_free(s->replaces);
-    bdrv_unref(target_bs);
 
     /*
      * Remove the mirror filter driver from the graph. Before this, get rid of
@@ -743,14 +796,12 @@ static int mirror_exit_common(Job *job)
      * valid.
      */
     block_job_remove_all_bdrv(bjob);
+    bdrv_graph_wrlock(mirror_top_bs);
     bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
+    bdrv_graph_wrunlock(mirror_top_bs);
 
-    /* We just changed the BDS the job BB refers to (with either or both of the
-     * bdrv_replace_node() calls), so switch the BB back so the cleanup does
-     * the right thing. We don't need any permissions any more now. */
-    blk_remove_bs(bjob->blk);
-    blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
-    blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
+    bdrv_drained_end(target_bs);
+    bdrv_unref(target_bs);
 
     bs_opaque->job = NULL;
 
@@ -760,6 +811,8 @@ static int mirror_exit_common(Job *job)
     bdrv_unref(mirror_top_bs);
     bdrv_unref(src);
 
+    aio_context_release(qemu_get_aio_context());
+
     return ret;
 }
 
@@ -786,14 +839,18 @@ static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
     }
 }
 
-static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
+static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s)
 {
     int64_t offset;
-    BlockDriverState *bs = s->mirror_top_bs->backing->bs;
+    BlockDriverState *bs;
     BlockDriverState *target_bs = blk_bs(s->target);
     int ret;
     int64_t count;
 
+    bdrv_graph_co_rdlock();
+    bs = s->mirror_top_bs->backing->bs;
+    bdrv_graph_co_rdunlock();
+
     if (s->zero_target) {
         if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
             bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length);
@@ -839,8 +896,10 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             return 0;
         }
 
-        ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
-                                      &count);
+        WITH_GRAPH_RDLOCK_GUARD() {
+            ret = bdrv_co_is_allocated_above(bs, s->base_overlay, true, offset,
+                                             bytes, &count);
+        }
         if (ret < 0) {
             return ret;
         }
@@ -857,9 +916,9 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 /* Called when going out of the streaming phase to flush the bulk of the
  * data to the medium, or just before completing.
  */
-static int mirror_flush(MirrorBlockJob *s)
+static int coroutine_fn mirror_flush(MirrorBlockJob *s)
 {
-    int ret = blk_flush(s->target);
+    int ret = blk_co_flush(s->target);
     if (ret < 0) {
         if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) {
             s->ret = ret;
@@ -871,9 +930,11 @@ static int mirror_flush(MirrorBlockJob *s)
 static int coroutine_fn mirror_run(Job *job, Error **errp)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
-    BlockDriverState *bs = s->mirror_top_bs->backing->bs;
+    BlockDriverState *bs;
+    MirrorBDSOpaque *mirror_top_opaque = s->mirror_top_bs->opaque;
     BlockDriverState *target_bs = blk_bs(s->target);
     bool need_drain = true;
+    BlockDeviceIoStatus iostatus;
     int64_t length;
     int64_t target_length;
     BlockDriverInfo bdi;
@@ -881,17 +942,24 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
                                  checking for a NULL string */
     int ret = 0;
 
+    bdrv_graph_co_rdlock();
+    bs = bdrv_filter_bs(s->mirror_top_bs);
+    bdrv_graph_co_rdunlock();
+
     if (job_is_cancelled(&s->common.job)) {
         goto immediate_exit;
     }
 
-    s->bdev_length = bdrv_getlength(bs);
+    bdrv_graph_co_rdlock();
+    s->bdev_length = bdrv_co_getlength(bs);
+    bdrv_graph_co_rdunlock();
+
     if (s->bdev_length < 0) {
         ret = s->bdev_length;
         goto immediate_exit;
     }
 
-    target_length = blk_getlength(s->target);
+    target_length = blk_co_getlength(s->target);
     if (target_length < 0) {
         ret = target_length;
         goto immediate_exit;
@@ -901,8 +969,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
      * active layer. */
     if (s->base == blk_bs(s->target)) {
         if (s->bdev_length > target_length) {
-            ret = blk_truncate(s->target, s->bdev_length, false,
-                               PREALLOC_MODE_OFF, 0, NULL);
+            ret = blk_co_truncate(s->target, s->bdev_length, false,
+                                  PREALLOC_MODE_OFF, 0, NULL);
             if (ret < 0) {
                 goto immediate_exit;
             }
@@ -916,12 +984,10 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
     if (s->bdev_length == 0) {
         /* Transition to the READY state and wait for complete. */
         job_transition_to_ready(&s->common.job);
-        s->synced = true;
-        s->actively_synced = true;
-        while (!job_is_cancelled(&s->common.job) && !s->should_complete) {
+        qatomic_set(&s->actively_synced, true);
+        while (!job_cancel_requested(&s->common.job) && !s->should_complete) {
             job_yield(&s->common.job);
         }
-        s->common.job.cancelled = false;
         goto immediate_exit;
     }
 
@@ -934,7 +1000,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
      */
     bdrv_get_backing_filename(target_bs, backing_filename,
                               sizeof(backing_filename));
-    if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) {
+    bdrv_graph_co_rdlock();
+    if (!bdrv_co_get_info(target_bs, &bdi) && bdi.cluster_size) {
         s->target_cluster_size = bdi.cluster_size;
     } else {
         s->target_cluster_size = BDRV_SECTOR_SIZE;
@@ -945,6 +1012,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
         s->cow_bitmap = bitmap_new(length);
     }
     s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
+    bdrv_graph_co_rdunlock();
 
     s->buf = qemu_try_blockalign(bs, s->buf_size);
     if (s->buf == NULL) {
@@ -962,19 +1030,18 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
         }
     }
 
+    /*
+     * Only now the job is fully initialised and mirror_top_bs should start
+     * accessing it.
+     */
+    mirror_top_opaque->job = s;
+
     assert(!s->dbi);
     s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap);
     for (;;) {
-        uint64_t delay_ns = 0;
         int64_t cnt, delta;
         bool should_complete;
 
-        /* Do not start passive operations while there are active
-         * writes in progress */
-        while (s->in_active_write_counter) {
-            mirror_wait_for_any_operation(s, true);
-        }
-
         if (s->ret < 0) {
             ret = s->ret;
             goto immediate_exit;
@@ -982,33 +1049,45 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 
         job_pause_point(&s->common.job);
 
+        if (job_is_cancelled(&s->common.job)) {
+            ret = 0;
+            goto immediate_exit;
+        }
+
         cnt = bdrv_get_dirty_count(s->dirty_bitmap);
         /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
          * the number of bytes currently being processed; together those are
          * the current remaining operation length */
-        job_progress_set_remaining(&s->common.job, s->bytes_in_flight + cnt);
+        job_progress_set_remaining(&s->common.job,
+                                   s->bytes_in_flight + cnt +
+                                   s->active_write_bytes_in_flight);
 
         /* Note that even when no rate limit is applied we need to yield
          * periodically with no pending I/O so that bdrv_drain_all() returns.
          * We do so every BLKOCK_JOB_SLICE_TIME nanoseconds, or when there is
          * an error, or when the source is clean, whichever comes first. */
         delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
+        WITH_JOB_LOCK_GUARD() {
+            iostatus = s->common.iostatus;
+        }
         if (delta < BLOCK_JOB_SLICE_TIME &&
-            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+            iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
             if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                 (cnt == 0 && s->in_flight > 0)) {
                 trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
                 mirror_wait_for_free_in_flight_slot(s);
                 continue;
             } else if (cnt != 0) {
-                delay_ns = mirror_iteration(s);
+                bdrv_graph_co_rdlock();
+                mirror_iteration(s);
+                bdrv_graph_co_rdunlock();
             }
         }
 
         should_complete = false;
         if (s->in_flight == 0 && cnt == 0) {
             trace_mirror_before_flush(s);
-            if (!s->synced) {
+            if (!job_is_ready(&s->common.job)) {
                 if (mirror_flush(s) < 0) {
                     /* Go check s->ret.  */
                     continue;
@@ -1019,14 +1098,13 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
                  * the target in a consistent state.
                  */
                 job_transition_to_ready(&s->common.job);
-                s->synced = true;
-                if (s->copy_mode != MIRROR_COPY_MODE_BACKGROUND) {
-                    s->actively_synced = true;
-                }
+            }
+            if (qatomic_read(&s->copy_mode) != MIRROR_COPY_MODE_BACKGROUND) {
+                qatomic_set(&s->actively_synced, true);
             }
 
             should_complete = s->should_complete ||
-                job_is_cancelled(&s->common.job);
+                job_cancel_requested(&s->common.job);
             cnt = bdrv_get_dirty_count(s->dirty_bitmap);
         }
 
@@ -1045,6 +1123,10 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 
             s->in_drain = true;
             bdrv_drained_begin(bs);
+
+            /* Must be zero because we are drained */
+            assert(s->in_active_write_counter == 0);
+
             cnt = bdrv_get_dirty_count(s->dirty_bitmap);
             if (cnt > 0 || mirror_flush(s) < 0) {
                 bdrv_drained_end(bs);
@@ -1056,23 +1138,18 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
              * completion.
              */
             assert(QLIST_EMPTY(&bs->tracked_requests));
-            s->common.job.cancelled = false;
             need_drain = false;
             break;
         }
 
-        ret = 0;
-
-        if (s->synced && !should_complete) {
-            delay_ns = (s->in_flight == 0 &&
-                        cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0);
-        }
-        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
-        job_sleep_ns(&s->common.job, delay_ns);
-        if (job_is_cancelled(&s->common.job) &&
-            (!s->synced || s->common.job.force_cancel))
-        {
-            break;
+        if (job_is_ready(&s->common.job) && !should_complete) {
+            if (s->in_flight == 0 && cnt == 0) {
+                trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job),
+                                          BLOCK_JOB_SLICE_TIME);
+                job_sleep_ns(&s->common.job, BLOCK_JOB_SLICE_TIME);
+            }
+        } else {
+            block_job_ratelimit_sleep(&s->common);
         }
         s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     }
@@ -1083,8 +1160,7 @@ immediate_exit:
          * or it was cancelled prematurely so that we do not guarantee that
          * the target is a copy of the source.
          */
-        assert(ret < 0 || ((s->common.job.force_cancel || !s->synced) &&
-               job_is_cancelled(&s->common.job)));
+        assert(ret < 0 || job_is_cancelled(&s->common.job));
         assert(need_drain);
         mirror_wait_for_all_io(s);
     }
@@ -1106,27 +1182,13 @@ immediate_exit:
 static void mirror_complete(Job *job, Error **errp)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
-    BlockDriverState *target;
-
-    target = blk_bs(s->target);
 
-    if (!s->synced) {
+    if (!job_is_ready(job)) {
         error_setg(errp, "The active block job '%s' cannot be completed",
                    job->id);
         return;
     }
 
-    if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
-        int ret;
-
-        assert(!bdrv_backing_chain_next(target));
-        ret = bdrv_open_backing_file(bdrv_skip_filters(target), NULL,
-                                     "backing", errp);
-        if (ret < 0) {
-            return;
-        }
-    }
-
     /* block all operations on to_replace bs */
     if (s->replaces) {
         AioContext *replace_aio_context;
@@ -1140,10 +1202,7 @@ static void mirror_complete(Job *job, Error **errp)
         replace_aio_context = bdrv_get_aio_context(s->to_replace);
         aio_context_acquire(replace_aio_context);
 
-        /* TODO Translate this into permission system. Current definition of
-         * GRAPH_MOD would require to request it for the parents; they might
-         * not even be BlockDriverStates, however, so a BdrvChild can't address
-         * them. May need redefinition of GRAPH_MOD. */
+        /* TODO Translate this into child freeze system. */
         error_setg(&s->replace_blocker,
                    "block device is in use by block-job-complete");
         bdrv_op_block_all(s->to_replace, s->replace_blocker);
@@ -1153,7 +1212,13 @@ static void mirror_complete(Job *job, Error **errp)
     }
 
     s->should_complete = true;
-    job_enter(job);
+
+    /* If the job is paused, it will be re-entered when it is resumed */
+    WITH_JOB_LOCK_GUARD() {
+        if (!job->paused) {
+            job_enter_cond_locked(job, NULL);
+        }
+    }
 }
 
 static void coroutine_fn mirror_pause(Job *job)
@@ -1172,13 +1237,81 @@ static bool mirror_drained_poll(BlockJob *job)
      * from one of our own drain sections, to avoid a deadlock waiting for
      * ourselves.
      */
-    if (!s->common.job.paused && !s->common.job.cancelled && !s->in_drain) {
-        return true;
+    WITH_JOB_LOCK_GUARD() {
+        if (!s->common.job.paused && !job_is_cancelled_locked(&job->job)
+            && !s->in_drain) {
+            return true;
+        }
     }
 
     return !!s->in_flight;
 }
 
+static bool mirror_cancel(Job *job, bool force)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
+    BlockDriverState *target = blk_bs(s->target);
+
+    /*
+     * Before the job is READY, we treat any cancellation like a
+     * force-cancellation.
+     */
+    force = force || !job_is_ready(job);
+
+    if (force) {
+        bdrv_cancel_in_flight(target);
+    }
+    return force;
+}
+
+static bool commit_active_cancel(Job *job, bool force)
+{
+    /* Same as above in mirror_cancel() */
+    return force || !job_is_ready(job);
+}
+
+static void mirror_change(BlockJob *job, BlockJobChangeOptions *opts,
+                          Error **errp)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+    BlockJobChangeOptionsMirror *change_opts = &opts->u.mirror;
+    MirrorCopyMode current;
+
+    /*
+     * The implementation relies on the fact that copy_mode is only written
+     * under the BQL. Otherwise, further synchronization would be required.
+     */
+
+    GLOBAL_STATE_CODE();
+
+    if (qatomic_read(&s->copy_mode) == change_opts->copy_mode) {
+        return;
+    }
+
+    if (change_opts->copy_mode != MIRROR_COPY_MODE_WRITE_BLOCKING) {
+        error_setg(errp, "Change to copy mode '%s' is not implemented",
+                   MirrorCopyMode_str(change_opts->copy_mode));
+        return;
+    }
+
+    current = qatomic_cmpxchg(&s->copy_mode, MIRROR_COPY_MODE_BACKGROUND,
+                              change_opts->copy_mode);
+    if (current != MIRROR_COPY_MODE_BACKGROUND) {
+        error_setg(errp, "Expected current copy mode '%s', got '%s'",
+                   MirrorCopyMode_str(MIRROR_COPY_MODE_BACKGROUND),
+                   MirrorCopyMode_str(current));
+    }
+}
+
+static void mirror_query(BlockJob *job, BlockJobInfo *info)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+    info->u.mirror = (BlockJobInfoMirror) {
+        .actively_synced = qatomic_read(&s->actively_synced),
+    };
+}
+
 static const BlockJobDriver mirror_job_driver = {
     .job_driver = {
         .instance_size          = sizeof(MirrorBlockJob),
@@ -1190,8 +1323,11 @@ static const BlockJobDriver mirror_job_driver = {
         .abort                  = mirror_abort,
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
+        .cancel                 = mirror_cancel,
     },
     .drained_poll           = mirror_drained_poll,
+    .change                 = mirror_change,
+    .query                  = mirror_query,
 };
 
 static const BlockJobDriver commit_active_job_driver = {
@@ -1205,6 +1341,7 @@ static const BlockJobDriver commit_active_job_driver = {
         .abort                  = mirror_abort,
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
+        .cancel                 = commit_active_cancel,
     },
     .drained_poll           = mirror_drained_poll,
 };
@@ -1271,6 +1408,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
     }
 
     job_progress_increase_remaining(&job->common.job, bytes);
+    job->active_write_bytes_in_flight += bytes;
 
     switch (method) {
     case MIRROR_METHOD_COPY:
@@ -1292,6 +1430,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
         abort();
     }
 
+    job->active_write_bytes_in_flight -= bytes;
     if (ret >= 0) {
         job_progress_update(&job->common.job, bytes);
     } else {
@@ -1307,7 +1446,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
         bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity);
         bdrv_set_dirty_bitmap(job->dirty_bitmap, bitmap_offset,
                               bitmap_end - bitmap_offset);
-        job->actively_synced = false;
+        qatomic_set(&job->actively_synced, false);
 
         action = mirror_error_action(job, false, -ret);
         if (action == BLOCK_ERROR_ACTION_REPORT) {
@@ -1333,12 +1472,26 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s,
         .bytes              = bytes,
         .is_active_write    = true,
         .is_in_flight       = true,
+        .co                 = qemu_coroutine_self(),
     };
     qemu_co_queue_init(&op->waiting_requests);
     QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
 
     s->in_active_write_counter++;
 
+    /*
+     * Wait for concurrent requests affecting the area.  If there are already
+     * running requests that are copying off now-to-be stale data in the area,
+     * we must wait for them to finish before we begin writing fresh data to the
+     * target so that the write operations appear in the correct order.
+     * Note that background requests (see mirror_iteration()) in contrast only
+     * wait for conflicting requests at the start of the dirty area, and then
+     * (based on the in_flight_bitmap) truncate the area to copy so it will not
+     * conflict with any requests beyond that.  For active writes, however, we
+     * cannot truncate that area.  The request from our parent must be blocked
+     * until the area is copied in full.  Therefore, we must wait for the whole
+     * area to become free of concurrent requests.
+     */
     mirror_wait_on_conflicts(op, s, offset, bytes);
 
     bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
@@ -1346,13 +1499,14 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s,
     return op;
 }
 
-static void coroutine_fn active_write_settle(MirrorOp *op)
+static void coroutine_fn GRAPH_RDLOCK active_write_settle(MirrorOp *op)
 {
     uint64_t start_chunk = op->offset / op->s->granularity;
     uint64_t end_chunk = DIV_ROUND_UP(op->offset + op->bytes,
                                       op->s->granularity);
 
-    if (!--op->s->in_active_write_counter && op->s->actively_synced) {
+    if (!--op->s->in_active_write_counter &&
+        qatomic_read(&op->s->actively_synced)) {
         BdrvChild *source = op->s->mirror_top_bs->backing;
 
         if (QLIST_FIRST(&source->bs->parents) == source &&
@@ -1371,23 +1525,28 @@ static void coroutine_fn active_write_settle(MirrorOp *op)
     g_free(op);
 }
 
-static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
-    MirrorMethod method, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
-    int flags)
+static bool should_copy_to_target(MirrorBDSOpaque *s)
+{
+    return s->job && s->job->ret >= 0 &&
+        !job_is_cancelled(&s->job->common.job) &&
+        qatomic_read(&s->job->copy_mode) == MIRROR_COPY_MODE_WRITE_BLOCKING;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_do_write(BlockDriverState *bs, MirrorMethod method,
+                         bool copy_to_target, uint64_t offset, uint64_t bytes,
+                         QEMUIOVector *qiov, int flags)
 {
     MirrorOp *op = NULL;
     MirrorBDSOpaque *s = bs->opaque;
     int ret = 0;
-    bool copy_to_target;
-
-    copy_to_target = s->job->ret >= 0 &&
-                     s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
 
     if (copy_to_target) {
         op = active_write_prepare(s->job, offset, bytes);
@@ -1410,6 +1569,11 @@ static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
         abort();
     }
 
+    if (!copy_to_target && s->job && s->job->dirty_bitmap) {
+        qatomic_set(&s->job->actively_synced, false);
+        bdrv_set_dirty_bitmap(s->job->dirty_bitmap, offset, bytes);
+    }
+
     if (ret < 0) {
         goto out;
     }
@@ -1425,17 +1589,14 @@ out:
     return ret;
 }
 
-static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
-    MirrorBDSOpaque *s = bs->opaque;
     QEMUIOVector bounce_qiov;
     void *bounce_buf;
     int ret = 0;
-    bool copy_to_target;
-
-    copy_to_target = s->job->ret >= 0 &&
-                     s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
+    bool copy_to_target = should_copy_to_target(bs->opaque);
 
     if (copy_to_target) {
         /* The guest might concurrently modify the data to write; but
@@ -1448,10 +1609,12 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
         qemu_iovec_init(&bounce_qiov, 1);
         qemu_iovec_add(&bounce_qiov, bounce_buf, bytes);
         qiov = &bounce_qiov;
+
+        flags &= ~BDRV_REQ_REGISTERED_BUF;
     }
 
-    ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov,
-                                   flags);
+    ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, copy_to_target,
+                                   offset, bytes, qiov, flags);
 
     if (copy_to_target) {
         qemu_iovec_destroy(&bounce_qiov);
@@ -1461,7 +1624,7 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK bdrv_mirror_top_flush(BlockDriverState *bs)
 {
     if (bs->backing == NULL) {
         /* we can be here after failed bdrv_append in mirror_start_job */
@@ -1470,21 +1633,24 @@ static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
     return bdrv_co_flush(bs->backing->bs);
 }
 
-static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                              int64_t bytes, BdrvRequestFlags flags)
 {
-    return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL,
-                                    flags);
+    bool copy_to_target = should_copy_to_target(bs->opaque);
+    return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, copy_to_target,
+                                    offset, bytes, NULL, flags);
 }
 
-static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
-    int64_t offset, int bytes)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_mirror_top_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
-    return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes,
-                                    NULL, 0);
+    bool copy_to_target = should_copy_to_target(bs->opaque);
+    return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, copy_to_target,
+                                    offset, bytes, NULL, 0);
 }
 
-static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs)
+static void GRAPH_RDLOCK bdrv_mirror_top_refresh_filename(BlockDriverState *bs)
 {
     if (bs->backing == NULL) {
         /* we can be here after failed bdrv_attach_child in
@@ -1513,13 +1679,27 @@ static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
         return;
     }
 
-    /* Must be able to forward guest writes to the real image */
-    *nperm = 0;
-    if (perm & BLK_PERM_WRITE) {
-        *nperm |= BLK_PERM_WRITE;
-    }
+    bdrv_default_perms(bs, c, role, reopen_queue,
+                       perm, shared, nperm, nshared);
 
-    *nshared = BLK_PERM_ALL;
+    if (s->is_commit) {
+        /*
+         * For commit jobs, we cannot take CONSISTENT_READ, because
+         * that permission is unshared for everything above the base
+         * node (except for filters on the base node).
+         * We also have to force-share the WRITE permission, or
+         * otherwise we would block ourselves at the base node (if
+         * writes are blocked for a node, they are also blocked for
+         * its backing file).
+         * (We could also share RESIZE, because it may be needed for
+         * the target if its size is less than the top node's; but
+         * bdrv_default_perms_for_cow() automatically shares RESIZE
+         * for backing nodes if WRITE is shared, so there is no need
+         * to do it here.)
+         */
+        *nperm &= ~BLK_PERM_CONSISTENT_READ;
+        *nshared |= BLK_PERM_WRITE;
+    }
 }
 
 /* Dummy node that provides consistent read to its users without requiring it
@@ -1535,6 +1715,7 @@ static BlockDriver bdrv_mirror_top = {
     .bdrv_child_perm            = bdrv_mirror_top_child_perm,
 
     .is_filter                  = true,
+    .filtered_child_is_backing  = true,
 };
 
 static BlockJob *mirror_start_job(
@@ -1560,9 +1741,10 @@ static BlockJob *mirror_start_job(
     BlockDriverState *mirror_top_bs;
     bool target_is_backing;
     uint64_t target_perms, target_shared_perms;
-    Error *local_err = NULL;
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     if (granularity == 0) {
         granularity = bdrv_get_default_bitmap_granularity(target);
     }
@@ -1578,11 +1760,16 @@ static BlockJob *mirror_start_job(
         buf_size = DEFAULT_MIRROR_BUF_SIZE;
     }
 
+    bdrv_graph_rdlock_main_loop();
     if (bdrv_skip_filters(bs) == bdrv_skip_filters(target)) {
         error_setg(errp, "Can't mirror node into itself");
+        bdrv_graph_rdunlock_main_loop();
         return NULL;
     }
 
+    target_is_backing = bdrv_chain_contains(bs, target);
+    bdrv_graph_rdunlock_main_loop();
+
     /* In the case of active commit, add dummy driver to provide consistent
      * reads on the top, while disabling it in the intermediate nodes, and make
      * the backing chain writable. */
@@ -1605,16 +1792,14 @@ static BlockJob *mirror_start_job(
     bs_opaque = g_new0(MirrorBDSOpaque, 1);
     mirror_top_bs->opaque = bs_opaque;
 
-    /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
-     * it alive until block_job_create() succeeds even if bs has no parent. */
-    bdrv_ref(mirror_top_bs);
+    bs_opaque->is_commit = target_is_backing;
+
     bdrv_drained_begin(bs);
-    bdrv_append(mirror_top_bs, bs, &local_err);
+    ret = bdrv_append(mirror_top_bs, bs, errp);
     bdrv_drained_end(bs);
 
-    if (local_err) {
+    if (ret < 0) {
         bdrv_unref(mirror_top_bs);
-        error_propagate(errp, local_err);
         return NULL;
     }
 
@@ -1622,12 +1807,11 @@ static BlockJob *mirror_start_job(
     s = block_job_create(job_id, driver, NULL, mirror_top_bs,
                          BLK_PERM_CONSISTENT_READ,
                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
-                         BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
+                         BLK_PERM_WRITE, speed,
                          creation_flags, cb, opaque, errp);
     if (!s) {
         goto fail;
     }
-    bs_opaque->job = s;
 
     /* The block job now has a reference to this node */
     bdrv_unref(mirror_top_bs);
@@ -1646,7 +1830,6 @@ static BlockJob *mirror_start_job(
     target_perms = BLK_PERM_WRITE;
     target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
 
-    target_is_backing = bdrv_chain_contains(bs, target);
     if (target_is_backing) {
         int64_t bs_size, target_size;
         bs_size = bdrv_getlength(bs);
@@ -1667,21 +1850,20 @@ static BlockJob *mirror_start_job(
             target_perms |= BLK_PERM_RESIZE;
         }
 
-        target_shared_perms |= BLK_PERM_CONSISTENT_READ
-                            |  BLK_PERM_WRITE
-                            |  BLK_PERM_GRAPH_MOD;
-    } else if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) {
-        /*
-         * We may want to allow this in the future, but it would
-         * require taking some extra care.
-         */
-        error_setg(errp, "Cannot mirror to a filter on top of a node in the "
-                   "source's backing chain");
-        goto fail;
-    }
-
-    if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
-        target_perms |= BLK_PERM_GRAPH_MOD;
+        target_shared_perms |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+    } else {
+        bdrv_graph_rdlock_main_loop();
+        if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) {
+            /*
+             * We may want to allow this in the future, but it would
+             * require taking some extra care.
+             */
+            error_setg(errp, "Cannot mirror to a filter on top of a node in "
+                       "the source's backing chain");
+            bdrv_graph_rdunlock_main_loop();
+            goto fail;
+        }
+        bdrv_graph_rdunlock_main_loop();
     }
 
     s->target = blk_new(s->common.job.aio_context,
@@ -1702,13 +1884,14 @@ static BlockJob *mirror_start_job(
     blk_set_allow_aio_context_change(s->target, true);
     blk_set_disable_request_queuing(s->target, true);
 
+    bdrv_graph_rdlock_main_loop();
     s->replaces = g_strdup(replaces);
     s->on_source_error = on_source_error;
     s->on_target_error = on_target_error;
     s->is_none_mode = is_none_mode;
     s->backing_mode = backing_mode;
     s->zero_target = zero_target;
-    s->copy_mode = copy_mode;
+    qatomic_set(&s->copy_mode, copy_mode);
     s->base = base;
     s->base_overlay = bdrv_find_overlay(bs, base);
     s->granularity = granularity;
@@ -1717,20 +1900,27 @@ static BlockJob *mirror_start_job(
     if (auto_complete) {
         s->should_complete = true;
     }
+    bdrv_graph_rdunlock_main_loop();
 
-    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
+    s->dirty_bitmap = bdrv_create_dirty_bitmap(s->mirror_top_bs, granularity,
+                                               NULL, errp);
     if (!s->dirty_bitmap) {
         goto fail;
     }
-    if (s->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING) {
-        bdrv_disable_dirty_bitmap(s->dirty_bitmap);
-    }
 
+    /*
+     * The dirty bitmap is set by bdrv_mirror_top_do_write() when not in active
+     * mode.
+     */
+    bdrv_disable_dirty_bitmap(s->dirty_bitmap);
+
+    bdrv_graph_wrlock(bs);
     ret = block_job_add_bdrv(&s->common, "source", bs, 0,
                              BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
                              BLK_PERM_CONSISTENT_READ,
                              errp);
     if (ret < 0) {
+        bdrv_graph_wrunlock(bs);
         goto fail;
     }
 
@@ -1775,14 +1965,17 @@ static BlockJob *mirror_start_job(
             ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
                                      iter_shared_perms, errp);
             if (ret < 0) {
+                bdrv_graph_wrunlock(bs);
                 goto fail;
             }
         }
 
         if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) {
+            bdrv_graph_wrunlock(bs);
             goto fail;
         }
     }
+    bdrv_graph_wrunlock(bs);
 
     QTAILQ_INIT(&s->ops_in_flight);
 
@@ -1807,9 +2000,14 @@ fail:
     }
 
     bs_opaque->stop = true;
+    bdrv_drained_begin(bs);
+    bdrv_graph_wrlock(bs);
+    assert(mirror_top_bs->backing->bs == bs);
     bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
                              &error_abort);
-    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
+    bdrv_replace_node(mirror_top_bs, bs, &error_abort);
+    bdrv_graph_wrunlock(bs);
+    bdrv_drained_end(bs);
 
     bdrv_unref(mirror_top_bs);
 
@@ -1830,14 +2028,20 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
     bool is_none_mode;
     BlockDriverState *base;
 
+    GLOBAL_STATE_CODE();
+
     if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
         (mode == MIRROR_SYNC_MODE_BITMAP)) {
         error_setg(errp, "Sync mode '%s' not supported",
                    MirrorSyncMode_str(mode));
         return;
     }
+
+    bdrv_graph_rdlock_main_loop();
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
+    bdrv_graph_rdunlock_main_loop();
+
     mirror_start_job(job_id, bs, creation_flags, target, replaces,
                      speed, granularity, buf_size, backing_mode, zero_target,
                      on_source_error, on_target_error, unmap, NULL, NULL,
@@ -1853,8 +2057,9 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
                               bool auto_complete, Error **errp)
 {
     bool base_read_only;
-    Error *local_err = NULL;
-    BlockJob *ret;
+    BlockJob *job;
+
+    GLOBAL_STATE_CODE();
 
     base_read_only = bdrv_is_read_only(base);
 
@@ -1864,19 +2069,18 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
         }
     }
 
-    ret = mirror_start_job(
+    job = mirror_start_job(
                      job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                      MIRROR_LEAVE_BACKING_CHAIN, false,
                      on_error, on_error, true, cb, opaque,
                      &commit_active_job_driver, false, base, auto_complete,
                      filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND,
-                     &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+                     errp);
+    if (!job) {
         goto error_restore_flags;
     }
 
-    return ret;
+    return job;
 
 error_restore_flags:
     /* ignore error and errp for bdrv_reopen, because we want to propagate
index 9f11deec6467cfbda9ae7758f804ead9fcfc3aac..70d01a37763f3d872e14e9d6bdf4df3889d07b6b 100644 (file)
@@ -32,7 +32,9 @@
 
 #include "qemu/osdep.h"
 
+#include "block/block-io.h"
 #include "block/block_int.h"
+#include "block/dirty-bitmap.h"
 #include "qapi/qapi-commands-block.h"
 #include "qapi/error.h"
 
@@ -56,6 +58,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
     BlockDriverState *bs;
     BdrvDirtyBitmap *bitmap;
 
+    GLOBAL_STATE_CODE();
+
     if (!node) {
         error_setg(errp, "Node cannot be NULL");
         return NULL;
@@ -155,6 +159,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
     BdrvDirtyBitmap *bitmap;
     AioContext *aio_context;
 
+    GLOBAL_STATE_CODE();
+
     bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
     if (!bitmap || !bs) {
         return NULL;
@@ -252,69 +258,73 @@ void qmp_block_dirty_bitmap_disable(const char *node, const char *name,
     bdrv_disable_dirty_bitmap(bitmap);
 }
 
-BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
-                                          BlockDirtyBitmapMergeSourceList *bms,
+BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *dst_node,
+                                          const char *dst_bitmap,
+                                          BlockDirtyBitmapOrStrList *bms,
                                           HBitmap **backup, Error **errp)
 {
     BlockDriverState *bs;
-    BdrvDirtyBitmap *dst, *src, *anon;
-    BlockDirtyBitmapMergeSourceList *lst;
-    Error *local_err = NULL;
+    BdrvDirtyBitmap *dst, *src;
+    BlockDirtyBitmapOrStrList *lst;
+    const char *src_node, *src_bitmap;
+    HBitmap *local_backup = NULL;
 
-    dst = block_dirty_bitmap_lookup(node, target, &bs, errp);
-    if (!dst) {
-        return NULL;
-    }
+    GLOBAL_STATE_CODE();
 
-    anon = bdrv_create_dirty_bitmap(bs, bdrv_dirty_bitmap_granularity(dst),
-                                    NULL, errp);
-    if (!anon) {
+    dst = block_dirty_bitmap_lookup(dst_node, dst_bitmap, &bs, errp);
+    if (!dst) {
         return NULL;
     }
 
     for (lst = bms; lst; lst = lst->next) {
         switch (lst->value->type) {
-            const char *name, *node;
         case QTYPE_QSTRING:
-            name = lst->value->u.local;
-            src = bdrv_find_dirty_bitmap(bs, name);
+            src_bitmap = lst->value->u.local;
+            src = bdrv_find_dirty_bitmap(bs, src_bitmap);
             if (!src) {
-                error_setg(errp, "Dirty bitmap '%s' not found", name);
-                dst = NULL;
-                goto out;
+                error_setg(errp, "Dirty bitmap '%s' not found", src_bitmap);
+                goto fail;
             }
             break;
         case QTYPE_QDICT:
-            node = lst->value->u.external.node;
-            name = lst->value->u.external.name;
-            src = block_dirty_bitmap_lookup(node, name, NULL, errp);
+            src_node = lst->value->u.external.node;
+            src_bitmap = lst->value->u.external.name;
+            src = block_dirty_bitmap_lookup(src_node, src_bitmap, NULL, errp);
             if (!src) {
-                dst = NULL;
-                goto out;
+                goto fail;
             }
             break;
         default:
             abort();
         }
 
-        bdrv_merge_dirty_bitmap(anon, src, NULL, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            dst = NULL;
-            goto out;
+        /* We do backup only for first merge operation */
+        if (!bdrv_merge_dirty_bitmap(dst, src,
+                                     local_backup ? NULL : &local_backup,
+                                     errp))
+        {
+            goto fail;
         }
     }
 
-    /* Merge into dst; dst is unchanged on failure. */
-    bdrv_merge_dirty_bitmap(dst, anon, backup, errp);
+    if (backup) {
+        *backup = local_backup;
+    } else {
+        hbitmap_free(local_backup);
+    }
 
- out:
-    bdrv_release_dirty_bitmap(anon);
     return dst;
+
+fail:
+    if (local_backup) {
+        bdrv_restore_dirty_bitmap(dst, local_backup);
+    }
+
+    return NULL;
 }
 
 void qmp_block_dirty_bitmap_merge(const char *node, const char *target,
-                                  BlockDirtyBitmapMergeSourceList *bitmaps,
+                                  BlockDirtyBitmapOrStrList *bitmaps,
                                   Error **errp)
 {
     block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp);
index d15a2be827bb469340dd6cf3d797373d15ed58fb..99791aba3360d0f2a445d3e8c92b6ba3c20d002a 100644 (file)
@@ -48,6 +48,7 @@
 #include "qemu/option.h"
 #include "qemu/sockets.h"
 #include "qemu/cutils.h"
+#include "qemu/error-report.h"
 #include "sysemu/sysemu.h"
 #include "monitor/monitor.h"
 #include "monitor/hmp.h"
@@ -101,7 +102,7 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    opts = drive_def(optstr);
+    opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
     if (!opts)
         return;
 
@@ -143,6 +144,9 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
     AioContext *aio_context;
     Error *local_err = NULL;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     bs = bdrv_find_node(id);
     if (bs) {
         qmp_blockdev_del(id, &local_err);
@@ -202,6 +206,9 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
     BlockBackend *blk;
     int ret;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!strcmp(device, "all")) {
         ret = blk_commit_all();
     } else {
@@ -213,15 +220,17 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
             error_report("Device '%s' not found", device);
             return;
         }
-        if (!blk_is_available(blk)) {
-            error_report("Device '%s' has no medium", device);
-            return;
-        }
 
         bs = bdrv_skip_implicit_filters(blk_bs(blk));
         aio_context = bdrv_get_aio_context(bs);
         aio_context_acquire(aio_context);
 
+        if (!blk_is_available(blk)) {
+            error_report("Device '%s' has no medium", device);
+            aio_context_release(aio_context);
+            return;
+        }
+
         ret = bdrv_commit(bs);
 
         aio_context_release(aio_context);
@@ -241,7 +250,6 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
     DriveMirror mirror = {
         .device = (char *)qdict_get_str(qdict, "device"),
         .target = (char *)filename,
-        .has_format = !!format,
         .format = (char *)format,
         .sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
         .has_mode = true,
@@ -251,10 +259,10 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
 
     if (!filename) {
         error_setg(&err, QERR_MISSING_PARAMETER, "target");
-        hmp_handle_error(mon, err);
-        return;
+        goto end;
     }
     qmp_drive_mirror(&mirror, &err);
+end:
     hmp_handle_error(mon, err);
 }
 
@@ -270,7 +278,6 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
     DriveBackup backup = {
         .device = (char *)device,
         .target = (char *)filename,
-        .has_format = !!format,
         .format = (char *)format,
         .sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
         .has_mode = true,
@@ -281,11 +288,11 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
 
     if (!filename) {
         error_setg(&err, QERR_MISSING_PARAMETER, "target");
-        hmp_handle_error(mon, err);
-        return;
+        goto end;
     }
 
     qmp_drive_backup(&backup, &err);
+end:
     hmp_handle_error(mon, err);
 }
 
@@ -356,15 +363,13 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
          * will be taken internally. Today it's actually required.
          */
         error_setg(&err, QERR_MISSING_PARAMETER, "snapshot-file");
-        hmp_handle_error(mon, err);
-        return;
+        goto end;
     }
 
     mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
-    qmp_blockdev_snapshot_sync(true, device, false, NULL,
-                               filename, false, NULL,
-                               !!format, format,
+    qmp_blockdev_snapshot_sync(device, NULL, filename, NULL, format,
                                true, mode, &err);
+end:
     hmp_handle_error(mon, err);
 }
 
@@ -385,8 +390,7 @@ void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict)
     const char *id = qdict_get_try_str(qdict, "id");
     Error *err = NULL;
 
-    qmp_blockdev_snapshot_delete_internal_sync(device, !!id, id,
-                                               true, name, &err);
+    qmp_blockdev_snapshot_delete_internal_sync(device, id, name, &err);
     hmp_handle_error(mon, err);
 }
 
@@ -396,7 +400,7 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
     bool writable = qdict_get_try_bool(qdict, "writable", false);
     bool all = qdict_get_try_bool(qdict, "all", false);
     Error *local_err = NULL;
-    BlockInfoList *block_list, *info;
+    BlockBackend *blk;
     SocketAddress *addr;
     NbdServerAddOptions export;
 
@@ -421,18 +425,24 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    /* Then try adding all block devices.  If one fails, close all and
+    /*
+     * Then try adding all block devices.  If one fails, close all and
      * exit.
      */
-    block_list = qmp_query_block(NULL);
+    for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
+        BlockDriverState *bs = blk_bs(blk);
 
-    for (info = block_list; info; info = info->next) {
-        if (!info->value->has_inserted) {
+        if (!*blk_name(blk) && !blk_get_attached_dev(blk)) {
+            continue;
+        }
+
+        bs = bdrv_skip_implicit_filters(bs);
+        if (!bs || !bs->drv) {
             continue;
         }
 
         export = (NbdServerAddOptions) {
-            .device         = info->value->device,
+            .device         = g_strdup(blk_name(blk)),
             .has_writable   = true,
             .writable       = writable,
         };
@@ -445,8 +455,6 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
         }
     }
 
-    qapi_free_BlockInfoList(block_list);
-
 exit:
     hmp_handle_error(mon, local_err);
 }
@@ -460,7 +468,6 @@ void hmp_nbd_server_add(Monitor *mon, const QDict *qdict)
 
     NbdServerAddOptions export = {
         .device         = (char *) device,
-        .has_name       = !!name,
         .name           = (char *) name,
         .has_writable   = true,
         .writable       = writable,
@@ -489,13 +496,13 @@ void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, err);
 }
 
-void hmp_block_resize(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_block_resize(Monitor *mon, const QDict *qdict)
 {
     const char *device = qdict_get_str(qdict, "device");
     int64_t size = qdict_get_int(qdict, "size");
     Error *err = NULL;
 
-    qmp_block_resize(true, device, false, NULL, size, &err);
+    qmp_block_resize(device, NULL, size, &err);
     hmp_handle_error(mon, err);
 }
 
@@ -506,24 +513,14 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
     const char *base = qdict_get_try_str(qdict, "base");
     int64_t speed = qdict_get_try_int(qdict, "speed", 0);
 
-    qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
-                     false, NULL, qdict_haskey(qdict, "speed"), speed, true,
-                     BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
-                     &error);
+    qmp_block_stream(device, device, base, NULL, NULL, NULL,
+                     qdict_haskey(qdict, "speed"), speed,
+                     true, BLOCKDEV_ON_ERROR_REPORT, NULL,
+                     false, false, false, false, &error);
 
     hmp_handle_error(mon, error);
 }
 
-void hmp_block_passwd(Monitor *mon, const QDict *qdict)
-{
-    const char *device = qdict_get_str(qdict, "device");
-    const char *password = qdict_get_str(qdict, "password");
-    Error *err = NULL;
-
-    qmp_block_passwd(true, device, false, NULL, password, &err);
-    hmp_handle_error(mon, err);
-}
-
 void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
 {
     Error *err = NULL;
@@ -543,10 +540,8 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
      * version has only one, so we must decide which one to pass.
      */
     if (blk_by_name(device)) {
-        throttle.has_device = true;
         throttle.device = device;
     } else {
-        throttle.has_id = true;
         throttle.id = device;
     }
 
@@ -560,14 +555,16 @@ void hmp_eject(Monitor *mon, const QDict *qdict)
     const char *device = qdict_get_str(qdict, "device");
     Error *err = NULL;
 
-    qmp_eject(true, device, false, NULL, true, force, &err);
+    qmp_eject(device, NULL, true, force, &err);
     hmp_handle_error(mon, err);
 }
 
 void hmp_qemu_io(Monitor *mon, const QDict *qdict)
 {
-    BlockBackend *blk;
+    BlockBackend *blk = NULL;
+    BlockDriverState *bs = NULL;
     BlockBackend *local_blk = NULL;
+    AioContext *ctx = NULL;
     bool qdev = qdict_get_try_bool(qdict, "qdev", false);
     const char *device = qdict_get_str(qdict, "device");
     const char *command = qdict_get_str(qdict, "command");
@@ -582,20 +579,24 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
     } else {
         blk = blk_by_name(device);
         if (!blk) {
-            BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
-            if (bs) {
-                blk = local_blk = blk_new(bdrv_get_aio_context(bs),
-                                          0, BLK_PERM_ALL);
-                ret = blk_insert_bs(blk, bs, &err);
-                if (ret < 0) {
-                    goto fail;
-                }
-            } else {
+            bs = bdrv_lookup_bs(NULL, device, &err);
+            if (!bs) {
                 goto fail;
             }
         }
     }
 
+    ctx = blk ? blk_get_aio_context(blk) : bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
+
+    if (bs) {
+        blk = local_blk = blk_new(bdrv_get_aio_context(bs), 0, BLK_PERM_ALL);
+        ret = blk_insert_bs(blk, bs, &err);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
     /*
      * Notably absent: Proper permission management. This is sad, but it seems
      * almost impossible to achieve without changing the semantics and thereby
@@ -625,6 +626,11 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
 
 fail:
     blk_unref(local_blk);
+
+    if (ctx) {
+        aio_context_release(ctx);
+    }
+
     hmp_handle_error(mon, err);
 }
 
@@ -633,19 +639,19 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
 {
     ImageInfo *image_info;
 
-    assert(!info || !info->has_inserted || info->inserted == inserted);
+    assert(!info || !info->inserted || info->inserted == inserted);
 
     if (info && *info->device) {
-        monitor_printf(mon, "%s", info->device);
-        if (inserted && inserted->has_node_name) {
+        monitor_puts(mon, info->device);
+        if (inserted && inserted->node_name) {
             monitor_printf(mon, " (%s)", inserted->node_name);
         }
     } else {
         assert(info || inserted);
-        monitor_printf(mon, "%s",
-                       inserted && inserted->has_node_name ? inserted->node_name
-                       : info && info->has_qdev ? info->qdev
-                       : "<anonymous>");
+        monitor_puts(mon,
+                     inserted && inserted->node_name ? inserted->node_name
+                     : info && info->qdev ? info->qdev
+                     : "<anonymous>");
     }
 
     if (inserted) {
@@ -659,7 +665,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
     }
 
     if (info) {
-        if (info->has_qdev) {
+        if (info->qdev) {
             monitor_printf(mon, "    Attached to:      %s\n", info->qdev);
         }
         if (info->has_io_status && info->io_status != BLOCK_DEVICE_IO_STATUS_OK) {
@@ -684,7 +690,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
                    inserted->cache->direct ? ", direct" : "",
                    inserted->cache->no_flush ? ", ignore flushes" : "");
 
-    if (inserted->has_backing_file) {
+    if (inserted->backing_file) {
         monitor_printf(mon,
                        "    Backing file:     %s "
                        "(chain depth: %" PRId64 ")\n",
@@ -732,8 +738,8 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
         monitor_printf(mon, "\nImages:\n");
         image_info = inserted->image;
         while (1) {
-                bdrv_image_info_dump(image_info);
-            if (image_info->has_backing_image) {
+            bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0, false);
+            if (image_info->backing_image) {
                 image_info = image_info->backing_image;
             } else {
                 break;
@@ -742,7 +748,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
     }
 }
 
-void hmp_info_block(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_info_block(Monitor *mon, const QDict *qdict)
 {
     BlockInfoList *block_list, *info;
     BlockDeviceInfoList *blockdev_list, *blockdev;
@@ -767,8 +773,7 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
             monitor_printf(mon, "\n");
         }
 
-        print_block_info(mon, info->value, info->value->has_inserted
-                                           ? info->value->inserted : NULL,
+        print_block_info(mon, info->value, info->value->inserted,
                          verbose);
         printed = true;
     }
@@ -782,7 +787,7 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
     /* Print node information */
     blockdev_list = qmp_query_named_block_nodes(false, false, NULL);
     for (blockdev = blockdev_list; blockdev; blockdev = blockdev->next) {
-        assert(blockdev->value->has_node_name);
+        assert(blockdev->value->node_name);
         if (device && strcmp(device, blockdev->value->node_name)) {
             continue;
         }
@@ -803,7 +808,7 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict)
     stats_list = qmp_query_blockstats(false, false, NULL);
 
     for (stats = stats_list; stats; stats = stats->next) {
-        if (!stats->value->has_device) {
+        if (!stats->value->device) {
             continue;
         }
 
@@ -848,7 +853,7 @@ void hmp_info_block_jobs(Monitor *mon, const QDict *qdict)
     }
 
     while (list) {
-        if (strcmp(list->value->type, "stream") == 0) {
+        if (list->value->type == JOB_TYPE_STREAM) {
             monitor_printf(mon, "Streaming device %s: Completed %" PRId64
                            " of %" PRId64 " bytes, speed limit %" PRId64
                            " bytes/s\n",
@@ -860,7 +865,7 @@ void hmp_info_block_jobs(Monitor *mon, const QDict *qdict)
             monitor_printf(mon, "Type %s, device %s: Completed %" PRId64
                            " of %" PRId64 " bytes, speed limit %" PRId64
                            " bytes/s\n",
-                           list->value->type,
+                           JobType_str(list->value->type),
                            list->value->device,
                            list->value->offset,
                            list->value->len,
@@ -899,10 +904,13 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
 
     ImageEntry *image_entry, *next_ie;
     SnapshotEntry *snapshot_entry;
+    Error *err = NULL;
 
-    bs = bdrv_all_find_vmstate_bs();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    bs = bdrv_all_find_vmstate_bs(NULL, false, NULL, &err);
     if (!bs) {
-        monitor_printf(mon, "No available block device supports snapshots\n");
+        error_report_err(err);
         return;
     }
     aio_context = bdrv_get_aio_context(bs);
@@ -952,7 +960,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
     total = 0;
     for (i = 0; i < nb_sns; i++) {
         SnapshotEntry *next_sn;
-        if (bdrv_all_find_snapshot(sn_tab[i].name, &bs1) == 0) {
+        if (bdrv_all_has_snapshot(sn_tab[i].name, false, NULL, NULL) == 1) {
             global_snapshots[total] = i;
             total++;
             QTAILQ_FOREACH(image_entry, &image_list, next) {
@@ -1012,3 +1020,24 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
     g_free(sn_tab);
     g_free(global_snapshots);
 }
+
+void hmp_change_medium(Monitor *mon, const char *device, const char *target,
+                       const char *arg, const char *read_only, bool force,
+                       Error **errp)
+{
+    ERRP_GUARD();
+    BlockdevChangeReadOnlyMode read_only_mode = 0;
+
+    if (read_only) {
+        read_only_mode =
+            qapi_enum_parse(&BlockdevChangeReadOnlyMode_lookup,
+                            read_only,
+                            BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN, errp);
+        if (*errp) {
+            return;
+        }
+    }
+
+    qmp_blockdev_change_medium(device, NULL, target, arg, true, force,
+                               !!read_only, read_only_mode, errp);
+}
index 374aac1140caaafa5a282c9810902814432770a6..1022516e93c56cee241778ff9b393455d97f167a 100644 (file)
@@ -1,2 +1,2 @@
-softmmu_ss.add(files('block-hmp-cmds.c'))
+system_ss.add(files('block-hmp-cmds.c'))
 block_ss.add(files('bitmap-qmp-cmds.c'))
index 42536702b6f9ca996c8adbf8c5feb6b89111f621..b9d4f935e017c738cc6efce96e815aaceb10ec01 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * QEMU Block driver for  NBD
+ * QEMU Block driver for NBD
  *
  * Copyright (c) 2019 Virtuozzo International GmbH.
- * Copyright (C) 2016 Red Hat, Inc.
+ * Copyright Red Hat
  * Copyright (C) 2008 Bull S.A.S.
  *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
  *
 #include "block/qdict.h"
 #include "block/nbd.h"
 #include "block/block_int.h"
+#include "block/coroutines.h"
+
+#include "qemu/yank.h"
 
 #define EN_OPTSTR ":exportname="
 #define MAX_NBD_REQUESTS    16
 
-#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ (uint64_t)(intptr_t)(bs))
-#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ (uint64_t)(intptr_t)(bs))
+#define COOKIE_TO_INDEX(cookie) ((cookie) - 1)
+#define INDEX_TO_COOKIE(index)  ((index) + 1)
 
 typedef struct {
     Coroutine *coroutine;
     uint64_t offset;        /* original offset of the request */
-    bool receiving;         /* waiting for connection_co? */
+    bool receiving;         /* sleeping in the yield in nbd_receive_replies */
 } NBDClientRequest;
 
 typedef enum NBDClientState {
@@ -63,95 +66,64 @@ typedef enum NBDClientState {
     NBD_CLIENT_QUIT
 } NBDClientState;
 
-typedef enum NBDConnectThreadState {
-    /* No thread, no pending results */
-    CONNECT_THREAD_NONE,
-
-    /* Thread is running, no results for now */
-    CONNECT_THREAD_RUNNING,
+typedef struct BDRVNBDState {
+    QIOChannel *ioc; /* The current I/O channel */
+    NBDExportInfo info;
 
     /*
-     * Thread is running, but requestor exited. Thread should close
-     * the new socket and free the connect state on exit.
+     * Protects state, free_sema, in_flight, requests[].coroutine,
+     * reconnect_delay_timer.
      */
-    CONNECT_THREAD_RUNNING_DETACHED,
-
-    /* Thread finished, results are stored in a state */
-    CONNECT_THREAD_FAIL,
-    CONNECT_THREAD_SUCCESS
-} NBDConnectThreadState;
+    QemuMutex requests_lock;
+    NBDClientState state;
+    CoQueue free_sema;
+    unsigned in_flight;
+    NBDClientRequest requests[MAX_NBD_REQUESTS];
+    QEMUTimer *reconnect_delay_timer;
 
-typedef struct NBDConnectThread {
-    /* Initialization constants */
-    SocketAddress *saddr; /* address to connect to */
-    /*
-     * Bottom half to schedule on completion. Scheduled only if bh_ctx is not
-     * NULL
-     */
-    QEMUBHFunc *bh_func;
-    void *bh_opaque;
+    /* Protects sending data on the socket.  */
+    CoMutex send_mutex;
 
     /*
-     * Result of last attempt. Valid in FAIL and SUCCESS states.
-     * If you want to steal error, don't forget to set pointer to NULL.
+     * Protects receiving reply headers from the socket, as well as the
+     * fields reply and requests[].receiving
      */
-    QIOChannelSocket *sioc;
-    Error *err;
-
-    /* state and bh_ctx are protected by mutex */
-    QemuMutex mutex;
-    NBDConnectThreadState state; /* current state of the thread */
-    AioContext *bh_ctx; /* where to schedule bh (NULL means don't schedule) */
-} NBDConnectThread;
-
-typedef struct BDRVNBDState {
-    QIOChannelSocket *sioc; /* The master data channel */
-    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
-    NBDExportInfo info;
-
-    CoMutex send_mutex;
-    CoQueue free_sema;
-    Coroutine *connection_co;
-    Coroutine *teardown_co;
-    QemuCoSleepState *connection_co_sleep_ns_state;
-    bool drained;
-    bool wait_drained_end;
-    int in_flight;
-    NBDClientState state;
-    int connect_status;
-    Error *connect_err;
-    bool wait_in_flight;
+    CoMutex receive_mutex;
+    NBDReply reply;
 
-    QEMUTimer *reconnect_delay_timer;
+    QEMUTimer *open_timer;
 
-    NBDClientRequest requests[MAX_NBD_REQUESTS];
-    NBDReply reply;
     BlockDriverState *bs;
 
     /* Connection parameters */
     uint32_t reconnect_delay;
+    uint32_t open_timeout;
     SocketAddress *saddr;
-    char *export, *tlscredsid;
+    char *export;
+    char *tlscredsid;
     QCryptoTLSCreds *tlscreds;
-    const char *hostname;
+    char *tlshostname;
     char *x_dirty_bitmap;
     bool alloc_depth;
 
-    bool wait_connect;
-    NBDConnectThread *connect_thread;
+    NBDClientConnection *conn;
 } BDRVNBDState;
 
-static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
-                                                  Error **errp);
-static QIOChannelSocket *nbd_co_establish_connection(BlockDriverState *bs,
-                                                     Error **errp);
-static void nbd_co_establish_connection_cancel(BlockDriverState *bs,
-                                               bool detach);
-static int nbd_client_handshake(BlockDriverState *bs, QIOChannelSocket *sioc,
-                                Error **errp);
+static void nbd_yank(void *opaque);
 
-static void nbd_clear_bdrvstate(BDRVNBDState *s)
+static void nbd_clear_bdrvstate(BlockDriverState *bs)
 {
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    nbd_client_connection_release(s->conn);
+    s->conn = NULL;
+
+    yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name));
+
+    /* Must not leave timers behind that would access freed data */
+    assert(!s->reconnect_delay_timer);
+    assert(!s->open_timer);
+
     object_unref(OBJECT(s->tlscreds));
     qapi_free_SocketAddress(s->saddr);
     s->saddr = NULL;
@@ -159,42 +131,62 @@ static void nbd_clear_bdrvstate(BDRVNBDState *s)
     s->export = NULL;
     g_free(s->tlscredsid);
     s->tlscredsid = NULL;
+    g_free(s->tlshostname);
+    s->tlshostname = NULL;
     g_free(s->x_dirty_bitmap);
     s->x_dirty_bitmap = NULL;
 }
 
-static void nbd_channel_error(BDRVNBDState *s, int ret)
+/* Called with s->receive_mutex taken.  */
+static bool coroutine_fn nbd_recv_coroutine_wake_one(NBDClientRequest *req)
+{
+    if (req->receiving) {
+        req->receiving = false;
+        aio_co_wake(req->coroutine);
+        return true;
+    }
+
+    return false;
+}
+
+static void coroutine_fn nbd_recv_coroutines_wake(BDRVNBDState *s)
+{
+    int i;
+
+    QEMU_LOCK_GUARD(&s->receive_mutex);
+    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+        if (nbd_recv_coroutine_wake_one(&s->requests[i])) {
+            return;
+        }
+    }
+}
+
+/* Called with s->requests_lock held.  */
+static void coroutine_fn nbd_channel_error_locked(BDRVNBDState *s, int ret)
 {
+    if (s->state == NBD_CLIENT_CONNECTED) {
+        qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+    }
+
     if (ret == -EIO) {
         if (s->state == NBD_CLIENT_CONNECTED) {
             s->state = s->reconnect_delay ? NBD_CLIENT_CONNECTING_WAIT :
                                             NBD_CLIENT_CONNECTING_NOWAIT;
         }
     } else {
-        if (s->state == NBD_CLIENT_CONNECTED) {
-            qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
-        }
         s->state = NBD_CLIENT_QUIT;
     }
 }
 
-static void nbd_recv_coroutines_wake_all(BDRVNBDState *s)
+static void coroutine_fn nbd_channel_error(BDRVNBDState *s, int ret)
 {
-    int i;
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        NBDClientRequest *req = &s->requests[i];
-
-        if (req->coroutine && req->receiving) {
-            aio_co_wake(req->coroutine);
-        }
-    }
+    QEMU_LOCK_GUARD(&s->requests_lock);
+    nbd_channel_error_locked(s, ret);
 }
 
 static void reconnect_delay_timer_del(BDRVNBDState *s)
 {
     if (s->reconnect_delay_timer) {
-        timer_del(s->reconnect_delay_timer);
         timer_free(s->reconnect_delay_timer);
         s->reconnect_delay_timer = NULL;
     }
@@ -204,22 +196,18 @@ static void reconnect_delay_timer_cb(void *opaque)
 {
     BDRVNBDState *s = opaque;
 
-    if (s->state == NBD_CLIENT_CONNECTING_WAIT) {
-        s->state = NBD_CLIENT_CONNECTING_NOWAIT;
-        while (qemu_co_enter_next(&s->free_sema, NULL)) {
-            /* Resume all queued requests */
+    reconnect_delay_timer_del(s);
+    WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
+        if (s->state != NBD_CLIENT_CONNECTING_WAIT) {
+            return;
         }
+        s->state = NBD_CLIENT_CONNECTING_NOWAIT;
     }
-
-    reconnect_delay_timer_del(s);
+    nbd_co_establish_connection_cancel(s->conn);
 }
 
 static void reconnect_delay_timer_init(BDRVNBDState *s, uint64_t expire_time_ns)
 {
-    if (s->state != NBD_CLIENT_CONNECTING_WAIT) {
-        return;
-    }
-
     assert(!s->reconnect_delay_timer);
     s->reconnect_delay_timer = aio_timer_new(bdrv_get_aio_context(s->bs),
                                              QEMU_CLOCK_REALTIME,
@@ -228,609 +216,345 @@ static void reconnect_delay_timer_init(BDRVNBDState *s, uint64_t expire_time_ns)
     timer_mod(s->reconnect_delay_timer, expire_time_ns);
 }
 
-static void nbd_client_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
-    /* Timer is deleted in nbd_client_co_drain_begin() */
-    assert(!s->reconnect_delay_timer);
-    qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
-}
-
-static void nbd_client_attach_aio_context_bh(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
-    /*
-     * The node is still drained, so we know the coroutine has yielded in
-     * nbd_read_eof(), the only place where bs->in_flight can reach 0, or it is
-     * entered for the first time. Both places are safe for entering the
-     * coroutine.
-     */
-    qemu_aio_coroutine_enter(bs->aio_context, s->connection_co);
-    bdrv_dec_in_flight(bs);
-}
-
-static void nbd_client_attach_aio_context(BlockDriverState *bs,
-                                          AioContext *new_context)
-{
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
-    /*
-     * s->connection_co is either yielded from nbd_receive_reply or from
-     * nbd_co_reconnect_loop()
-     */
-    if (s->state == NBD_CLIENT_CONNECTED) {
-        qio_channel_attach_aio_context(QIO_CHANNEL(s->ioc), new_context);
-    }
-
-    bdrv_inc_in_flight(bs);
-
-    /*
-     * Need to wait here for the BH to run because the BH must run while the
-     * node is still drained.
-     */
-    aio_wait_bh_oneshot(new_context, nbd_client_attach_aio_context_bh, bs);
-}
-
-static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs)
-{
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
-    s->drained = true;
-    if (s->connection_co_sleep_ns_state) {
-        qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
-    }
-
-    nbd_co_establish_connection_cancel(bs, false);
-
-    reconnect_delay_timer_del(s);
-
-    if (s->state == NBD_CLIENT_CONNECTING_WAIT) {
-        s->state = NBD_CLIENT_CONNECTING_NOWAIT;
-        qemu_co_queue_restart_all(&s->free_sema);
-    }
-}
-
-static void coroutine_fn nbd_client_co_drain_end(BlockDriverState *bs)
-{
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
-    s->drained = false;
-    if (s->wait_drained_end) {
-        s->wait_drained_end = false;
-        aio_co_wake(s->connection_co);
-    }
-}
-
-
 static void nbd_teardown_connection(BlockDriverState *bs)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
+    assert(!s->in_flight);
+
     if (s->ioc) {
-        /* finish any pending coroutines */
         qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
-    } else if (s->sioc) {
-        /* abort negotiation */
-        qio_channel_shutdown(QIO_CHANNEL(s->sioc), QIO_CHANNEL_SHUTDOWN_BOTH,
-                             NULL);
+        yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name),
+                                 nbd_yank, s->bs);
+        object_unref(OBJECT(s->ioc));
+        s->ioc = NULL;
     }
 
-    s->state = NBD_CLIENT_QUIT;
-    if (s->connection_co) {
-        if (s->connection_co_sleep_ns_state) {
-            qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
-        }
-        nbd_co_establish_connection_cancel(bs, true);
-    }
-    if (qemu_in_coroutine()) {
-        s->teardown_co = qemu_coroutine_self();
-        /* connection_co resumes us when it terminates */
-        qemu_coroutine_yield();
-        s->teardown_co = NULL;
-    } else {
-        BDRV_POLL_WHILE(bs, s->connection_co);
+    WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
+        s->state = NBD_CLIENT_QUIT;
     }
-    assert(!s->connection_co);
 }
 
-static bool nbd_client_connecting(BDRVNBDState *s)
+static void open_timer_del(BDRVNBDState *s)
 {
-    return s->state == NBD_CLIENT_CONNECTING_WAIT ||
-        s->state == NBD_CLIENT_CONNECTING_NOWAIT;
-}
-
-static bool nbd_client_connecting_wait(BDRVNBDState *s)
-{
-    return s->state == NBD_CLIENT_CONNECTING_WAIT;
+    if (s->open_timer) {
+        timer_free(s->open_timer);
+        s->open_timer = NULL;
+    }
 }
 
-static void connect_bh(void *opaque)
+static void open_timer_cb(void *opaque)
 {
-    BDRVNBDState *state = opaque;
+    BDRVNBDState *s = opaque;
 
-    assert(state->wait_connect);
-    state->wait_connect = false;
-    aio_co_wake(state->connection_co);
+    nbd_co_establish_connection_cancel(s->conn);
+    open_timer_del(s);
 }
 
-static void nbd_init_connect_thread(BDRVNBDState *s)
+static void open_timer_init(BDRVNBDState *s, uint64_t expire_time_ns)
 {
-    s->connect_thread = g_new(NBDConnectThread, 1);
-
-    *s->connect_thread = (NBDConnectThread) {
-        .saddr = QAPI_CLONE(SocketAddress, s->saddr),
-        .state = CONNECT_THREAD_NONE,
-        .bh_func = connect_bh,
-        .bh_opaque = s,
-    };
-
-    qemu_mutex_init(&s->connect_thread->mutex);
+    assert(!s->open_timer);
+    s->open_timer = aio_timer_new(bdrv_get_aio_context(s->bs),
+                                  QEMU_CLOCK_REALTIME,
+                                  SCALE_NS,
+                                  open_timer_cb, s);
+    timer_mod(s->open_timer, expire_time_ns);
 }
 
-static void nbd_free_connect_thread(NBDConnectThread *thr)
+static bool nbd_client_will_reconnect(BDRVNBDState *s)
 {
-    if (thr->sioc) {
-        qio_channel_close(QIO_CHANNEL(thr->sioc), NULL);
-    }
-    error_free(thr->err);
-    qapi_free_SocketAddress(thr->saddr);
-    g_free(thr);
+    /*
+     * Called only after a socket error, so this is not performance sensitive.
+     */
+    QEMU_LOCK_GUARD(&s->requests_lock);
+    return s->state == NBD_CLIENT_CONNECTING_WAIT;
 }
 
-static void *connect_thread_func(void *opaque)
+/*
+ * Update @bs with information learned during a completed negotiation process.
+ * Return failure if the server's advertised options are incompatible with the
+ * client's needs.
+ */
+static int coroutine_fn GRAPH_RDLOCK
+nbd_handle_updated_info(BlockDriverState *bs, Error **errp)
 {
-    NBDConnectThread *thr = opaque;
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     int ret;
-    bool do_free = false;
-
-    thr->sioc = qio_channel_socket_new();
 
-    error_free(thr->err);
-    thr->err = NULL;
-    ret = qio_channel_socket_connect_sync(thr->sioc, thr->saddr, &thr->err);
-    if (ret < 0) {
-        object_unref(OBJECT(thr->sioc));
-        thr->sioc = NULL;
+    if (s->x_dirty_bitmap) {
+        if (!s->info.base_allocation) {
+            error_setg(errp, "requested x-dirty-bitmap %s not found",
+                       s->x_dirty_bitmap);
+            return -EINVAL;
+        }
+        if (strcmp(s->x_dirty_bitmap, "qemu:allocation-depth") == 0) {
+            s->alloc_depth = true;
+        }
     }
 
-    qemu_mutex_lock(&thr->mutex);
-
-    switch (thr->state) {
-    case CONNECT_THREAD_RUNNING:
-        thr->state = ret < 0 ? CONNECT_THREAD_FAIL : CONNECT_THREAD_SUCCESS;
-        if (thr->bh_ctx) {
-            aio_bh_schedule_oneshot(thr->bh_ctx, thr->bh_func, thr->bh_opaque);
-
-            /* play safe, don't reuse bh_ctx on further connection attempts */
-            thr->bh_ctx = NULL;
+    if (s->info.flags & NBD_FLAG_READ_ONLY) {
+        ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp);
+        if (ret < 0) {
+            return ret;
         }
-        break;
-    case CONNECT_THREAD_RUNNING_DETACHED:
-        do_free = true;
-        break;
-    default:
-        abort();
     }
 
-    qemu_mutex_unlock(&thr->mutex);
+    if (s->info.flags & NBD_FLAG_SEND_FUA) {
+        bs->supported_write_flags = BDRV_REQ_FUA;
+        bs->supported_zero_flags |= BDRV_REQ_FUA;
+    }
 
-    if (do_free) {
-        nbd_free_connect_thread(thr);
+    if (s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
+        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
+        if (s->info.flags & NBD_FLAG_SEND_FAST_ZERO) {
+            bs->supported_zero_flags |= BDRV_REQ_NO_FALLBACK;
+        }
     }
 
-    return NULL;
+    trace_nbd_client_handshake_success(s->export);
+
+    return 0;
 }
 
-static QIOChannelSocket *coroutine_fn
-nbd_co_establish_connection(BlockDriverState *bs, Error **errp)
+int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs,
+                                                bool blocking, Error **errp)
 {
-    QemuThread thread;
-    BDRVNBDState *s = bs->opaque;
-    QIOChannelSocket *res;
-    NBDConnectThread *thr = s->connect_thread;
-
-    qemu_mutex_lock(&thr->mutex);
-
-    switch (thr->state) {
-    case CONNECT_THREAD_FAIL:
-    case CONNECT_THREAD_NONE:
-        error_free(thr->err);
-        thr->err = NULL;
-        thr->state = CONNECT_THREAD_RUNNING;
-        qemu_thread_create(&thread, "nbd-connect",
-                           connect_thread_func, thr, QEMU_THREAD_DETACHED);
-        break;
-    case CONNECT_THREAD_SUCCESS:
-        /* Previous attempt finally succeeded in background */
-        thr->state = CONNECT_THREAD_NONE;
-        res = thr->sioc;
-        thr->sioc = NULL;
-        qemu_mutex_unlock(&thr->mutex);
-        return res;
-    case CONNECT_THREAD_RUNNING:
-        /* Already running, will wait */
-        break;
-    default:
-        abort();
-    }
-
-    thr->bh_ctx = qemu_get_current_aio_context();
-
-    qemu_mutex_unlock(&thr->mutex);
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+    int ret;
+    IO_CODE();
 
+    assert_bdrv_graph_readable();
+    assert(!s->ioc);
 
-    /*
-     * We are going to wait for connect-thread finish, but
-     * nbd_client_co_drain_begin() can interrupt.
-     *
-     * Note that wait_connect variable is not visible for connect-thread. It
-     * doesn't need mutex protection, it used only inside home aio context of
-     * bs.
-     */
-    s->wait_connect = true;
-    qemu_coroutine_yield();
-
-    qemu_mutex_lock(&thr->mutex);
-
-    switch (thr->state) {
-    case CONNECT_THREAD_SUCCESS:
-    case CONNECT_THREAD_FAIL:
-        thr->state = CONNECT_THREAD_NONE;
-        error_propagate(errp, thr->err);
-        thr->err = NULL;
-        res = thr->sioc;
-        thr->sioc = NULL;
-        break;
-    case CONNECT_THREAD_RUNNING:
-    case CONNECT_THREAD_RUNNING_DETACHED:
-        /*
-         * Obviously, drained section wants to start. Report the attempt as
-         * failed. Still connect thread is executing in background, and its
-         * result may be used for next connection attempt.
-         */
-        res = NULL;
-        error_setg(errp, "Connection attempt cancelled by other operation");
-        break;
+    s->ioc = nbd_co_establish_connection(s->conn, &s->info, blocking, errp);
+    if (!s->ioc) {
+        return -ECONNREFUSED;
+    }
 
-    case CONNECT_THREAD_NONE:
+    yank_register_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name), nbd_yank,
+                           bs);
+
+    ret = nbd_handle_updated_info(s->bs, NULL);
+    if (ret < 0) {
         /*
-         * Impossible. We've seen this thread running. So it should be
-         * running or at least give some results.
+         * We have connected, but must fail for other reasons.
+         * Send NBD_CMD_DISC as a courtesy to the server.
          */
-        abort();
-
-    default:
-        abort();
-    }
+        NBDRequest request = { .type = NBD_CMD_DISC, .mode = s->info.mode };
 
-    qemu_mutex_unlock(&thr->mutex);
+        nbd_send_request(s->ioc, &request);
 
-    return res;
-}
+        yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name),
+                                 nbd_yank, bs);
+        object_unref(OBJECT(s->ioc));
+        s->ioc = NULL;
 
-/*
- * nbd_co_establish_connection_cancel
- * Cancel nbd_co_establish_connection asynchronously: it will finish soon, to
- * allow drained section to begin.
- *
- * If detach is true, also cleanup the state (or if thread is running, move it
- * to CONNECT_THREAD_RUNNING_DETACHED state). s->connect_thread becomes NULL if
- * detach is true.
- */
-static void nbd_co_establish_connection_cancel(BlockDriverState *bs,
-                                               bool detach)
-{
-    BDRVNBDState *s = bs->opaque;
-    NBDConnectThread *thr = s->connect_thread;
-    bool wake = false;
-    bool do_free = false;
-
-    qemu_mutex_lock(&thr->mutex);
-
-    if (thr->state == CONNECT_THREAD_RUNNING) {
-        /* We can cancel only in running state, when bh is not yet scheduled */
-        thr->bh_ctx = NULL;
-        if (s->wait_connect) {
-            s->wait_connect = false;
-            wake = true;
-        }
-        if (detach) {
-            thr->state = CONNECT_THREAD_RUNNING_DETACHED;
-            s->connect_thread = NULL;
-        }
-    } else if (detach) {
-        do_free = true;
+        return ret;
     }
 
-    qemu_mutex_unlock(&thr->mutex);
+    qio_channel_set_blocking(s->ioc, false, NULL);
+    qio_channel_set_follow_coroutine_ctx(s->ioc, true);
 
-    if (do_free) {
-        nbd_free_connect_thread(thr);
-        s->connect_thread = NULL;
+    /* successfully connected */
+    WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
+        s->state = NBD_CLIENT_CONNECTED;
     }
 
-    if (wake) {
-        aio_co_wake(s->connection_co);
-    }
+    return 0;
 }
 
-static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
+/* Called with s->requests_lock held.  */
+static bool nbd_client_connecting(BDRVNBDState *s)
 {
-    int ret;
-    Error *local_err = NULL;
-    QIOChannelSocket *sioc;
-
-    if (!nbd_client_connecting(s)) {
-        return;
-    }
-
-    /* Wait for completion of all in-flight requests */
-
-    qemu_co_mutex_lock(&s->send_mutex);
-
-    while (s->in_flight > 0) {
-        qemu_co_mutex_unlock(&s->send_mutex);
-        nbd_recv_coroutines_wake_all(s);
-        s->wait_in_flight = true;
-        qemu_coroutine_yield();
-        s->wait_in_flight = false;
-        qemu_co_mutex_lock(&s->send_mutex);
-    }
-
-    qemu_co_mutex_unlock(&s->send_mutex);
+    return s->state == NBD_CLIENT_CONNECTING_WAIT ||
+        s->state == NBD_CLIENT_CONNECTING_NOWAIT;
+}
 
-    if (!nbd_client_connecting(s)) {
-        return;
-    }
+/* Called with s->requests_lock taken.  */
+static void coroutine_fn GRAPH_RDLOCK nbd_reconnect_attempt(BDRVNBDState *s)
+{
+    int ret;
+    bool blocking = s->state == NBD_CLIENT_CONNECTING_WAIT;
 
     /*
      * Now we are sure that nobody is accessing the channel, and no one will
      * try until we set the state to CONNECTED.
      */
+    assert(nbd_client_connecting(s));
+    assert(s->in_flight == 1);
+
+    trace_nbd_reconnect_attempt(s->bs->in_flight);
+
+    if (blocking && !s->reconnect_delay_timer) {
+        /*
+         * It's the first reconnect attempt after switching to
+         * NBD_CLIENT_CONNECTING_WAIT
+         */
+        g_assert(s->reconnect_delay);
+        reconnect_delay_timer_init(s,
+            qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+            s->reconnect_delay * NANOSECONDS_PER_SECOND);
+    }
 
     /* Finalize previous connection if any */
     if (s->ioc) {
-        qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
-        object_unref(OBJECT(s->sioc));
-        s->sioc = NULL;
+        yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name),
+                                 nbd_yank, s->bs);
         object_unref(OBJECT(s->ioc));
         s->ioc = NULL;
     }
 
-    sioc = nbd_co_establish_connection(s->bs, &local_err);
-    if (!sioc) {
-        ret = -ECONNREFUSED;
-        goto out;
-    }
-
-    bdrv_dec_in_flight(s->bs);
-
-    ret = nbd_client_handshake(s->bs, sioc, &local_err);
-
-    if (s->drained) {
-        s->wait_drained_end = true;
-        while (s->drained) {
-            /*
-             * We may be entered once from nbd_client_attach_aio_context_bh
-             * and then from nbd_client_co_drain_end. So here is a loop.
-             */
-            qemu_coroutine_yield();
-        }
-    }
-    bdrv_inc_in_flight(s->bs);
-
-out:
-    s->connect_status = ret;
-    error_free(s->connect_err);
-    s->connect_err = NULL;
-    error_propagate(&s->connect_err, local_err);
+    qemu_mutex_unlock(&s->requests_lock);
+    ret = nbd_co_do_establish_connection(s->bs, blocking, NULL);
+    trace_nbd_reconnect_attempt_result(ret, s->bs->in_flight);
+    qemu_mutex_lock(&s->requests_lock);
 
-    if (ret >= 0) {
-        /* successfully connected */
-        s->state = NBD_CLIENT_CONNECTED;
-        qemu_co_queue_restart_all(&s->free_sema);
-    }
+    /*
+     * The reconnect attempt is done (maybe successfully, maybe not), so
+     * we no longer need this timer.  Delete it so it will not outlive
+     * this I/O request (so draining removes all timers).
+     */
+    reconnect_delay_timer_del(s);
 }
 
-static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
+static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t cookie,
+                                            Error **errp)
 {
-    uint64_t timeout = 1 * NANOSECONDS_PER_SECOND;
-    uint64_t max_timeout = 16 * NANOSECONDS_PER_SECOND;
+    int ret;
+    uint64_t ind = COOKIE_TO_INDEX(cookie), ind2;
+    QEMU_LOCK_GUARD(&s->receive_mutex);
 
-    if (s->state == NBD_CLIENT_CONNECTING_WAIT) {
-        reconnect_delay_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
-                                   s->reconnect_delay * NANOSECONDS_PER_SECOND);
-    }
-
-    nbd_reconnect_attempt(s);
-
-    while (nbd_client_connecting(s)) {
-        if (s->drained) {
-            bdrv_dec_in_flight(s->bs);
-            s->wait_drained_end = true;
-            while (s->drained) {
-                /*
-                 * We may be entered once from nbd_client_attach_aio_context_bh
-                 * and then from nbd_client_co_drain_end. So here is a loop.
-                 */
-                qemu_coroutine_yield();
-            }
-            bdrv_inc_in_flight(s->bs);
-        } else {
-            qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout,
-                                      &s->connection_co_sleep_ns_state);
-            if (s->drained) {
-                continue;
-            }
-            if (timeout < max_timeout) {
-                timeout *= 2;
-            }
+    while (true) {
+        if (s->reply.cookie == cookie) {
+            /* We are done */
+            return 0;
         }
 
-        nbd_reconnect_attempt(s);
-    }
-
-    reconnect_delay_timer_del(s);
-}
-
-static coroutine_fn void nbd_connection_entry(void *opaque)
-{
-    BDRVNBDState *s = opaque;
-    uint64_t i;
-    int ret = 0;
-    Error *local_err = NULL;
+        if (s->reply.cookie != 0) {
+            /*
+             * Some other request is being handled now. It should already be
+             * woken by whoever set s->reply.cookie (or never wait in this
+             * yield). So, we should not wake it here.
+             */
+            ind2 = COOKIE_TO_INDEX(s->reply.cookie);
+            assert(!s->requests[ind2].receiving);
 
-    while (s->state != NBD_CLIENT_QUIT) {
-        /*
-         * The NBD client can only really be considered idle when it has
-         * yielded from qio_channel_readv_all_eof(), waiting for data. This is
-         * the point where the additional scheduled coroutine entry happens
-         * after nbd_client_attach_aio_context().
-         *
-         * Therefore we keep an additional in_flight reference all the time and
-         * only drop it temporarily here.
-         */
+            s->requests[ind].receiving = true;
+            qemu_co_mutex_unlock(&s->receive_mutex);
 
-        if (nbd_client_connecting(s)) {
-            nbd_co_reconnect_loop(s);
-        }
+            qemu_coroutine_yield();
+            /*
+             * We may be woken for 2 reasons:
+             * 1. From this function, executing in parallel coroutine, when our
+             *    cookie is received.
+             * 2. From nbd_co_receive_one_chunk(), when previous request is
+             *    finished and s->reply.cookie set to 0.
+             * Anyway, it's OK to lock the mutex and go to the next iteration.
+             */
 
-        if (s->state != NBD_CLIENT_CONNECTED) {
+            qemu_co_mutex_lock(&s->receive_mutex);
+            assert(!s->requests[ind].receiving);
             continue;
         }
 
-        assert(s->reply.handle == 0);
-        ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, &local_err);
-
-        if (local_err) {
-            trace_nbd_read_reply_entry_fail(ret, error_get_pretty(local_err));
-            error_free(local_err);
-            local_err = NULL;
+        /* We are under mutex and cookie is 0. We have to do the dirty work. */
+        assert(s->reply.cookie == 0);
+        ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, s->info.mode, errp);
+        if (ret == 0) {
+            ret = -EIO;
+            error_setg(errp, "server dropped connection");
         }
-        if (ret <= 0) {
-            nbd_channel_error(s, ret ? ret : -EIO);
-            continue;
+        if (ret < 0) {
+            nbd_channel_error(s, ret);
+            return ret;
         }
-
-        /*
-         * There's no need for a mutex on the receive side, because the
-         * handler acts as a synchronization point and ensures that only
-         * one coroutine is called until the reply finishes.
-         */
-        i = HANDLE_TO_INDEX(s, s->reply.handle);
-        if (i >= MAX_NBD_REQUESTS ||
-            !s->requests[i].coroutine ||
-            !s->requests[i].receiving ||
-            (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply))
-        {
+        if (nbd_reply_is_structured(&s->reply) &&
+            s->info.mode < NBD_MODE_STRUCTURED) {
             nbd_channel_error(s, -EINVAL);
-            continue;
+            error_setg(errp, "unexpected structured reply");
+            return -EINVAL;
         }
-
-        /*
-         * We're woken up again by the request itself.  Note that there
-         * is no race between yielding and reentering connection_co.  This
-         * is because:
-         *
-         * - if the request runs on the same AioContext, it is only
-         *   entered after we yield
-         *
-         * - if the request runs on a different AioContext, reentering
-         *   connection_co happens through a bottom half, which can only
-         *   run after we yield.
-         */
-        aio_co_wake(s->requests[i].coroutine);
-        qemu_coroutine_yield();
-    }
-
-    qemu_co_queue_restart_all(&s->free_sema);
-    nbd_recv_coroutines_wake_all(s);
-    bdrv_dec_in_flight(s->bs);
-
-    s->connection_co = NULL;
-    if (s->ioc) {
-        qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
-        object_unref(OBJECT(s->sioc));
-        s->sioc = NULL;
-        object_unref(OBJECT(s->ioc));
-        s->ioc = NULL;
-    }
-
-    if (s->teardown_co) {
-        aio_co_wake(s->teardown_co);
+        ind2 = COOKIE_TO_INDEX(s->reply.cookie);
+        if (ind2 >= MAX_NBD_REQUESTS || !s->requests[ind2].coroutine) {
+            nbd_channel_error(s, -EINVAL);
+            error_setg(errp, "unexpected cookie value");
+            return -EINVAL;
+        }
+        if (s->reply.cookie == cookie) {
+            /* We are done */
+            return 0;
+        }
+        nbd_recv_coroutine_wake_one(&s->requests[ind2]);
     }
-    aio_wait_kick();
 }
 
-static int nbd_co_send_request(BlockDriverState *bs,
-                               NBDRequest *request,
-                               QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+nbd_co_send_request(BlockDriverState *bs, NBDRequest *request,
+                    QEMUIOVector *qiov)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     int rc, i = -1;
 
-    qemu_co_mutex_lock(&s->send_mutex);
-    while (s->in_flight == MAX_NBD_REQUESTS || nbd_client_connecting_wait(s)) {
-        qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
+    qemu_mutex_lock(&s->requests_lock);
+    while (s->in_flight == MAX_NBD_REQUESTS ||
+           (s->state != NBD_CLIENT_CONNECTED && s->in_flight > 0)) {
+        qemu_co_queue_wait(&s->free_sema, &s->requests_lock);
     }
 
+    s->in_flight++;
     if (s->state != NBD_CLIENT_CONNECTED) {
-        rc = -EIO;
-        goto err;
+        if (nbd_client_connecting(s)) {
+            nbd_reconnect_attempt(s);
+            qemu_co_queue_restart_all(&s->free_sema);
+        }
+        if (s->state != NBD_CLIENT_CONNECTED) {
+            rc = -EIO;
+            goto err;
+        }
     }
 
-    s->in_flight++;
-
     for (i = 0; i < MAX_NBD_REQUESTS; i++) {
         if (s->requests[i].coroutine == NULL) {
             break;
         }
     }
 
-    g_assert(qemu_in_coroutine());
     assert(i < MAX_NBD_REQUESTS);
-
     s->requests[i].coroutine = qemu_coroutine_self();
     s->requests[i].offset = request->from;
     s->requests[i].receiving = false;
+    qemu_mutex_unlock(&s->requests_lock);
 
-    request->handle = INDEX_TO_HANDLE(s, i);
+    qemu_co_mutex_lock(&s->send_mutex);
+    request->cookie = INDEX_TO_COOKIE(i);
+    request->mode = s->info.mode;
 
     assert(s->ioc);
 
     if (qiov) {
         qio_channel_set_cork(s->ioc, true);
         rc = nbd_send_request(s->ioc, request);
-        if (rc >= 0 && s->state == NBD_CLIENT_CONNECTED) {
-            if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
-                                       NULL) < 0) {
-                rc = -EIO;
-            }
-        } else if (rc >= 0) {
+        if (rc >= 0 && qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
+                                              NULL) < 0) {
             rc = -EIO;
         }
         qio_channel_set_cork(s->ioc, false);
     } else {
         rc = nbd_send_request(s->ioc, request);
     }
+    qemu_co_mutex_unlock(&s->send_mutex);
 
-err:
     if (rc < 0) {
-        nbd_channel_error(s, rc);
+        qemu_mutex_lock(&s->requests_lock);
+err:
+        nbd_channel_error_locked(s, rc);
         if (i != -1) {
             s->requests[i].coroutine = NULL;
-            s->in_flight--;
-        }
-        if (s->in_flight == 0 && s->wait_in_flight) {
-            aio_co_wake(s->connection_co);
-        } else {
-            qemu_co_queue_next(&s->free_sema);
         }
+        s->in_flight--;
+        qemu_co_queue_next(&s->free_sema);
+        qemu_mutex_unlock(&s->requests_lock);
     }
-    qemu_co_mutex_unlock(&s->send_mutex);
     return rc;
 }
 
@@ -892,13 +616,17 @@ static int nbd_parse_offset_hole_payload(BDRVNBDState *s,
  */
 static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
                                          NBDStructuredReplyChunk *chunk,
-                                         uint8_t *payload, uint64_t orig_length,
-                                         NBDExtent *extent, Error **errp)
+                                         uint8_t *payload, bool wide,
+                                         uint64_t orig_length,
+                                         NBDExtent64 *extent, Error **errp)
 {
     uint32_t context_id;
+    uint32_t count;
+    size_t ext_len = wide ? sizeof(*extent) : sizeof(NBDExtent32);
+    size_t pay_len = sizeof(context_id) + wide * sizeof(count) + ext_len;
 
     /* The server succeeded, so it must have sent [at least] one extent */
-    if (chunk->length < sizeof(context_id) + sizeof(*extent)) {
+    if (chunk->length < pay_len) {
         error_setg(errp, "Protocol error: invalid payload for "
                          "NBD_REPLY_TYPE_BLOCK_STATUS");
         return -EINVAL;
@@ -913,8 +641,15 @@ static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
         return -EINVAL;
     }
 
-    extent->length = payload_advance32(&payload);
-    extent->flags = payload_advance32(&payload);
+    if (wide) {
+        count = payload_advance32(&payload);
+        extent->length = payload_advance64(&payload);
+        extent->flags = payload_advance64(&payload);
+    } else {
+        count = 0;
+        extent->length = payload_advance32(&payload);
+        extent->flags = payload_advance32(&payload);
+    }
 
     if (extent->length == 0) {
         error_setg(errp, "Protocol error: server sent status chunk with "
@@ -935,7 +670,7 @@ static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
      * (always a safe status, even if it loses information).
      */
     if (s->info.min_block && !QEMU_IS_ALIGNED(extent->length,
-                                                   s->info.min_block)) {
+                                              s->info.min_block)) {
         trace_nbd_parse_blockstatus_compliance("extent length is unaligned");
         if (extent->length > s->info.min_block) {
             extent->length = QEMU_ALIGN_DOWN(extent->length,
@@ -949,13 +684,15 @@ static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
     /*
      * We used NBD_CMD_FLAG_REQ_ONE, so the server should not have
      * sent us any more than one extent, nor should it have included
-     * status beyond our request in that extent. However, it's easy
-     * enough to ignore the server's noncompliance without killing the
+     * status beyond our request in that extent. Furthermore, a wide
+     * server should have replied with an accurate count (we left
+     * count at 0 for a narrow server).  However, it's easy enough to
+     * ignore the server's noncompliance without killing the
      * connection; just ignore trailing extents, and clamp things to
      * the length of our request.
      */
-    if (chunk->length > sizeof(context_id) + sizeof(*extent)) {
-        trace_nbd_parse_blockstatus_compliance("more than one extent");
+    if (count != wide || chunk->length > pay_len) {
+        trace_nbd_parse_blockstatus_compliance("unexpected extent count");
     }
     if (extent->length > orig_length) {
         extent->length = orig_length;
@@ -1017,9 +754,9 @@ static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk,
     return 0;
 }
 
-static int nbd_co_receive_offset_data_payload(BDRVNBDState *s,
-                                              uint64_t orig_offset,
-                                              QEMUIOVector *qiov, Error **errp)
+static int coroutine_fn
+nbd_co_receive_offset_data_payload(BDRVNBDState *s, uint64_t orig_offset,
+                                   QEMUIOVector *qiov, Error **errp)
 {
     QEMUIOVector sub_qiov;
     uint64_t offset;
@@ -1112,11 +849,11 @@ static coroutine_fn int nbd_co_receive_structured_payload(
  * corresponding to the server's error reply), and errp is unchanged.
  */
 static coroutine_fn int nbd_co_do_receive_one_chunk(
-        BDRVNBDState *s, uint64_t handle, bool only_structured,
+        BDRVNBDState *s, uint64_t cookie, bool only_structured,
         int *request_ret, QEMUIOVector *qiov, void **payload, Error **errp)
 {
     int ret;
-    int i = HANDLE_TO_INDEX(s, handle);
+    int i = COOKIE_TO_INDEX(cookie);
     void *local_payload = NULL;
     NBDStructuredReplyChunk *chunk;
 
@@ -1125,17 +862,14 @@ static coroutine_fn int nbd_co_do_receive_one_chunk(
     }
     *request_ret = 0;
 
-    /* Wait until we're woken up by nbd_connection_entry.  */
-    s->requests[i].receiving = true;
-    qemu_coroutine_yield();
-    s->requests[i].receiving = false;
-    if (s->state != NBD_CLIENT_CONNECTED) {
-        error_setg(errp, "Connection closed");
+    ret = nbd_receive_replies(s, cookie, errp);
+    if (ret < 0) {
+        error_prepend(errp, "Connection closed: ");
         return -EIO;
     }
     assert(s->ioc);
 
-    assert(s->reply.handle == handle);
+    assert(s->reply.cookie == cookie);
 
     if (nbd_reply_is_simple(&s->reply)) {
         if (only_structured) {
@@ -1154,7 +888,7 @@ static coroutine_fn int nbd_co_do_receive_one_chunk(
     }
 
     /* handle structured reply chunk */
-    assert(s->info.structured_reply);
+    assert(s->info.mode >= NBD_MODE_STRUCTURED);
     chunk = &s->reply.structured;
 
     if (chunk->type == NBD_REPLY_TYPE_NONE) {
@@ -1205,11 +939,11 @@ static coroutine_fn int nbd_co_do_receive_one_chunk(
  * Return value is a fatal error code or normal nbd reply error code
  */
 static coroutine_fn int nbd_co_receive_one_chunk(
-        BDRVNBDState *s, uint64_t handle, bool only_structured,
+        BDRVNBDState *s, uint64_t cookie, bool only_structured,
         int *request_ret, QEMUIOVector *qiov, NBDReply *reply, void **payload,
         Error **errp)
 {
-    int ret = nbd_co_do_receive_one_chunk(s, handle, only_structured,
+    int ret = nbd_co_do_receive_one_chunk(s, cookie, only_structured,
                                           request_ret, qiov, payload, errp);
 
     if (ret < 0) {
@@ -1219,16 +953,9 @@ static coroutine_fn int nbd_co_receive_one_chunk(
         /* For assert at loop start in nbd_connection_entry */
         *reply = s->reply;
     }
-    s->reply.handle = 0;
+    s->reply.cookie = 0;
 
-    if (s->connection_co && !s->wait_in_flight) {
-        /*
-         * We must check s->wait_in_flight, because we may entered by
-         * nbd_recv_coroutines_wake_all(), in this case we should not
-         * wake connection_co here, it will woken by last request.
-         */
-        aio_co_wake(s->connection_co);
-    }
+    nbd_recv_coroutines_wake(s);
 
     return ret;
 }
@@ -1269,30 +996,26 @@ static void nbd_iter_request_error(NBDReplyChunkIter *iter, int ret)
  * NBD_FOREACH_REPLY_CHUNK
  * The pointer stored in @payload requires g_free() to free it.
  */
-#define NBD_FOREACH_REPLY_CHUNK(s, iter, handle, structured, \
+#define NBD_FOREACH_REPLY_CHUNK(s, iter, cookie, structured, \
                                 qiov, reply, payload) \
     for (iter = (NBDReplyChunkIter) { .only_structured = structured }; \
-         nbd_reply_chunk_iter_receive(s, &iter, handle, qiov, reply, payload);)
+         nbd_reply_chunk_iter_receive(s, &iter, cookie, qiov, reply, payload);)
 
 /*
  * nbd_reply_chunk_iter_receive
  * The pointer stored in @payload requires g_free() to free it.
  */
-static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
-                                         NBDReplyChunkIter *iter,
-                                         uint64_t handle,
-                                         QEMUIOVector *qiov, NBDReply *reply,
-                                         void **payload)
+static bool coroutine_fn nbd_reply_chunk_iter_receive(BDRVNBDState *s,
+                                                      NBDReplyChunkIter *iter,
+                                                      uint64_t cookie,
+                                                      QEMUIOVector *qiov,
+                                                      NBDReply *reply,
+                                                      void **payload)
 {
     int ret, request_ret;
     NBDReply local_reply;
     NBDStructuredReplyChunk *chunk;
     Error *local_err = NULL;
-    if (s->state != NBD_CLIENT_CONNECTED) {
-        error_setg(&local_err, "Connection closed");
-        nbd_iter_channel_error(iter, -EIO, &local_err);
-        goto break_loop;
-    }
 
     if (iter->done) {
         /* Previous iteration was last. */
@@ -1303,7 +1026,7 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
         reply = &local_reply;
     }
 
-    ret = nbd_co_receive_one_chunk(s, handle, iter->only_structured,
+    ret = nbd_co_receive_one_chunk(s, cookie, iter->only_structured,
                                    &request_ret, qiov, reply, payload,
                                    &local_err);
     if (ret < 0) {
@@ -1313,7 +1036,7 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
     }
 
     /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
-    if (nbd_reply_is_simple(reply) || s->state != NBD_CLIENT_CONNECTED) {
+    if (nbd_reply_is_simple(reply) || iter->ret < 0) {
         goto break_loop;
     }
 
@@ -1335,26 +1058,22 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
     return true;
 
 break_loop:
-    s->requests[HANDLE_TO_INDEX(s, handle)].coroutine = NULL;
-
-    qemu_co_mutex_lock(&s->send_mutex);
+    qemu_mutex_lock(&s->requests_lock);
+    s->requests[COOKIE_TO_INDEX(cookie)].coroutine = NULL;
     s->in_flight--;
-    if (s->in_flight == 0 && s->wait_in_flight) {
-        aio_co_wake(s->connection_co);
-    } else {
-        qemu_co_queue_next(&s->free_sema);
-    }
-    qemu_co_mutex_unlock(&s->send_mutex);
+    qemu_co_queue_next(&s->free_sema);
+    qemu_mutex_unlock(&s->requests_lock);
 
     return false;
 }
 
-static int nbd_co_receive_return_code(BDRVNBDState *s, uint64_t handle,
-                                      int *request_ret, Error **errp)
+static int coroutine_fn
+nbd_co_receive_return_code(BDRVNBDState *s, uint64_t cookie,
+                           int *request_ret, Error **errp)
 {
     NBDReplyChunkIter iter;
 
-    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, NULL, NULL) {
+    NBD_FOREACH_REPLY_CHUNK(s, iter, cookie, false, NULL, NULL, NULL) {
         /* nbd_reply_chunk_iter_receive does all the work */
     }
 
@@ -1363,16 +1082,18 @@ static int nbd_co_receive_return_code(BDRVNBDState *s, uint64_t handle,
     return iter.ret;
 }
 
-static int nbd_co_receive_cmdread_reply(BDRVNBDState *s, uint64_t handle,
-                                        uint64_t offset, QEMUIOVector *qiov,
-                                        int *request_ret, Error **errp)
+static int coroutine_fn
+nbd_co_receive_cmdread_reply(BDRVNBDState *s, uint64_t cookie,
+                             uint64_t offset, QEMUIOVector *qiov,
+                             int *request_ret, Error **errp)
 {
     NBDReplyChunkIter iter;
     NBDReply reply;
     void *payload = NULL;
     Error *local_err = NULL;
 
-    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
+    NBD_FOREACH_REPLY_CHUNK(s, iter, cookie,
+                            s->info.mode >= NBD_MODE_STRUCTURED,
                             qiov, &reply, &payload)
     {
         int ret;
@@ -1415,10 +1136,10 @@ static int nbd_co_receive_cmdread_reply(BDRVNBDState *s, uint64_t handle,
     return iter.ret;
 }
 
-static int nbd_co_receive_blockstatus_reply(BDRVNBDState *s,
-                                            uint64_t handle, uint64_t length,
-                                            NBDExtent *extent,
-                                            int *request_ret, Error **errp)
+static int coroutine_fn
+nbd_co_receive_blockstatus_reply(BDRVNBDState *s, uint64_t cookie,
+                                 uint64_t length, NBDExtent64 *extent,
+                                 int *request_ret, Error **errp)
 {
     NBDReplyChunkIter iter;
     NBDReply reply;
@@ -1427,14 +1148,20 @@ static int nbd_co_receive_blockstatus_reply(BDRVNBDState *s,
     bool received = false;
 
     assert(!extent->length);
-    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, &reply, &payload) {
+    NBD_FOREACH_REPLY_CHUNK(s, iter, cookie, false, NULL, &reply, &payload) {
         int ret;
         NBDStructuredReplyChunk *chunk = &reply.structured;
+        bool wide;
 
         assert(nbd_reply_is_structured(&reply));
 
         switch (chunk->type) {
+        case NBD_REPLY_TYPE_BLOCK_STATUS_EXT:
         case NBD_REPLY_TYPE_BLOCK_STATUS:
+            wide = chunk->type == NBD_REPLY_TYPE_BLOCK_STATUS_EXT;
+            if ((s->info.mode >= NBD_MODE_EXTENDED) != wide) {
+                trace_nbd_extended_headers_compliance("block_status");
+            }
             if (received) {
                 nbd_channel_error(s, -EINVAL);
                 error_setg(&local_err, "Several BLOCK_STATUS chunks in reply");
@@ -1442,9 +1169,9 @@ static int nbd_co_receive_blockstatus_reply(BDRVNBDState *s,
             }
             received = true;
 
-            ret = nbd_parse_blockstatus_payload(s, &reply.structured,
-                                                payload, length, extent,
-                                                &local_err);
+            ret = nbd_parse_blockstatus_payload(
+                s, &reply.structured, payload, wide,
+                length, extent, &local_err);
             if (ret < 0) {
                 nbd_channel_error(s, ret);
                 nbd_iter_channel_error(&iter, ret, &local_err);
@@ -1475,8 +1202,9 @@ static int nbd_co_receive_blockstatus_reply(BDRVNBDState *s,
     return iter.ret;
 }
 
-static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
-                          QEMUIOVector *write_qiov)
+static int coroutine_fn GRAPH_RDLOCK
+nbd_co_request(BlockDriverState *bs, NBDRequest *request,
+               QEMUIOVector *write_qiov)
 {
     int ret, request_ret;
     Error *local_err = NULL;
@@ -1496,24 +1224,25 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
             continue;
         }
 
-        ret = nbd_co_receive_return_code(s, request->handle,
+        ret = nbd_co_receive_return_code(s, request->cookie,
                                          &request_ret, &local_err);
         if (local_err) {
             trace_nbd_co_request_fail(request->from, request->len,
-                                      request->handle, request->flags,
+                                      request->cookie, request->flags,
                                       request->type,
                                       nbd_cmd_lookup(request->type),
                                       ret, error_get_pretty(local_err));
             error_free(local_err);
             local_err = NULL;
         }
-    } while (ret < 0 && nbd_client_connecting_wait(s));
+    } while (ret < 0 && nbd_client_will_reconnect(s));
 
     return ret ? ret : request_ret;
 }
 
-static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+nbd_client_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret, request_ret;
     Error *local_err = NULL;
@@ -1525,7 +1254,6 @@ static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
     };
 
     assert(bytes <= NBD_MAX_BUFFER_SIZE);
-    assert(!flags);
 
     if (!bytes) {
         return 0;
@@ -1555,23 +1283,24 @@ static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
             continue;
         }
 
-        ret = nbd_co_receive_cmdread_reply(s, request.handle, offset, qiov,
+        ret = nbd_co_receive_cmdread_reply(s, request.cookie, offset, qiov,
                                            &request_ret, &local_err);
         if (local_err) {
-            trace_nbd_co_request_fail(request.from, request.len, request.handle,
+            trace_nbd_co_request_fail(request.from, request.len, request.cookie,
                                       request.flags, request.type,
                                       nbd_cmd_lookup(request.type),
                                       ret, error_get_pretty(local_err));
             error_free(local_err);
             local_err = NULL;
         }
-    } while (ret < 0 && nbd_client_connecting_wait(s));
+    } while (ret < 0 && nbd_client_will_reconnect(s));
 
     return ret ? ret : request_ret;
 }
 
-static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                 uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+nbd_client_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                      QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = {
@@ -1594,8 +1323,9 @@ static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
     return nbd_co_request(bs, &request, qiov);
 }
 
-static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                       int bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                            BdrvRequestFlags flags)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = {
@@ -1604,6 +1334,9 @@ static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
         .len = bytes,
     };
 
+    /* rely on max_pwrite_zeroes */
+    assert(bytes <= UINT32_MAX || s->info.mode >= NBD_MODE_EXTENDED);
+
     assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
     if (!(s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
         return -ENOTSUP;
@@ -1627,7 +1360,7 @@ static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
     return nbd_co_request(bs, &request, NULL);
 }
 
-static int nbd_client_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK nbd_client_co_flush(BlockDriverState *bs)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = { .type = NBD_CMD_FLUSH };
@@ -1642,8 +1375,8 @@ static int nbd_client_co_flush(BlockDriverState *bs)
     return nbd_co_request(bs, &request, NULL);
 }
 
-static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset,
-                                  int bytes)
+static int coroutine_fn GRAPH_RDLOCK
+nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = {
@@ -1652,6 +1385,9 @@ static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset,
         .len = bytes,
     };
 
+    /* rely on max_pdiscard */
+    assert(bytes <= UINT32_MAX || s->info.mode >= NBD_MODE_EXTENDED);
+
     assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
     if (!(s->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
         return 0;
@@ -1660,20 +1396,19 @@ static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset,
     return nbd_co_request(bs, &request, NULL);
 }
 
-static int coroutine_fn nbd_client_co_block_status(
+static int coroutine_fn GRAPH_RDLOCK nbd_client_co_block_status(
         BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
         int64_t *pnum, int64_t *map, BlockDriverState **file)
 {
     int ret, request_ret;
-    NBDExtent extent = { 0 };
+    NBDExtent64 extent = { 0 };
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     Error *local_err = NULL;
 
     NBDRequest request = {
         .type = NBD_CMD_BLOCK_STATUS,
         .from = offset,
-        .len = MIN(QEMU_ALIGN_DOWN(INT_MAX, bs->bl.request_alignment),
-                   MIN(bytes, s->info.size - offset)),
+        .len = MIN(bytes, s->info.size - offset),
         .flags = NBD_CMD_FLAG_REQ_ONE,
     };
 
@@ -1683,6 +1418,10 @@ static int coroutine_fn nbd_client_co_block_status(
         *file = bs;
         return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
     }
+    if (s->info.mode < NBD_MODE_EXTENDED) {
+        request.len = MIN(QEMU_ALIGN_DOWN(INT_MAX, bs->bl.request_alignment),
+                          request.len);
+    }
 
     /*
      * Work around the fact that the block layer doesn't do
@@ -1707,18 +1446,18 @@ static int coroutine_fn nbd_client_co_block_status(
             continue;
         }
 
-        ret = nbd_co_receive_blockstatus_reply(s, request.handle, bytes,
+        ret = nbd_co_receive_blockstatus_reply(s, request.cookie, bytes,
                                                &extent, &request_ret,
                                                &local_err);
         if (local_err) {
-            trace_nbd_co_request_fail(request.from, request.len, request.handle,
+            trace_nbd_co_request_fail(request.from, request.len, request.cookie,
                                       request.flags, request.type,
                                       nbd_cmd_lookup(request.type),
                                       ret, error_get_pretty(local_err));
             error_free(local_err);
             local_err = NULL;
         }
-    } while (ret < 0 && nbd_client_connecting_wait(s));
+    } while (ret < 0 && nbd_client_will_reconnect(s));
 
     if (ret < 0 || request_ret < 0) {
         return ret ? ret : request_ret;
@@ -1745,121 +1484,29 @@ static int nbd_client_reopen_prepare(BDRVReopenState *state,
     return 0;
 }
 
-static void nbd_client_close(BlockDriverState *bs)
+static void nbd_yank(void *opaque)
 {
+    BlockDriverState *bs = opaque;
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-    NBDRequest request = { .type = NBD_CMD_DISC };
 
-    if (s->ioc) {
-        nbd_send_request(s->ioc, &request);
-    }
-
-    nbd_teardown_connection(bs);
-}
-
-static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
-                                                  Error **errp)
-{
-    ERRP_GUARD();
-    QIOChannelSocket *sioc;
-
-    sioc = qio_channel_socket_new();
-    qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
-
-    qio_channel_socket_connect_sync(sioc, saddr, errp);
-    if (*errp) {
-        object_unref(OBJECT(sioc));
-        return NULL;
-    }
-
-    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
-
-    return sioc;
+    QEMU_LOCK_GUARD(&s->requests_lock);
+    qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+    s->state = NBD_CLIENT_QUIT;
 }
 
-/* nbd_client_handshake takes ownership on sioc. On failure it is unref'ed. */
-static int nbd_client_handshake(BlockDriverState *bs, QIOChannelSocket *sioc,
-                                Error **errp)
+static void nbd_client_close(BlockDriverState *bs)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-    AioContext *aio_context = bdrv_get_aio_context(bs);
-    int ret;
-
-    trace_nbd_client_handshake(s->export);
-
-    s->sioc = sioc;
-
-    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
-    qio_channel_attach_aio_context(QIO_CHANNEL(sioc), aio_context);
+    NBDRequest request = { .type = NBD_CMD_DISC, .mode = s->info.mode };
 
-    s->info.request_sizes = true;
-    s->info.structured_reply = true;
-    s->info.base_allocation = true;
-    s->info.x_dirty_bitmap = g_strdup(s->x_dirty_bitmap);
-    s->info.name = g_strdup(s->export ?: "");
-    ret = nbd_receive_negotiate(aio_context, QIO_CHANNEL(sioc), s->tlscreds,
-                                s->hostname, &s->ioc, &s->info, errp);
-    g_free(s->info.x_dirty_bitmap);
-    g_free(s->info.name);
-    if (ret < 0) {
-        object_unref(OBJECT(sioc));
-        s->sioc = NULL;
-        return ret;
-    }
-    if (s->x_dirty_bitmap) {
-        if (!s->info.base_allocation) {
-            error_setg(errp, "requested x-dirty-bitmap %s not found",
-                       s->x_dirty_bitmap);
-            ret = -EINVAL;
-            goto fail;
-        }
-        if (strcmp(s->x_dirty_bitmap, "qemu:allocation-depth") == 0) {
-            s->alloc_depth = true;
-        }
-    }
-    if (s->info.flags & NBD_FLAG_READ_ONLY) {
-        ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-    if (s->info.flags & NBD_FLAG_SEND_FUA) {
-        bs->supported_write_flags = BDRV_REQ_FUA;
-        bs->supported_zero_flags |= BDRV_REQ_FUA;
-    }
-    if (s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
-        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
-        if (s->info.flags & NBD_FLAG_SEND_FAST_ZERO) {
-            bs->supported_zero_flags |= BDRV_REQ_NO_FALLBACK;
-        }
-    }
-
-    if (!s->ioc) {
-        s->ioc = QIO_CHANNEL(sioc);
-        object_ref(OBJECT(s->ioc));
+    if (s->ioc) {
+        nbd_send_request(s->ioc, &request);
     }
 
-    trace_nbd_client_handshake_success(s->export);
-
-    return 0;
-
- fail:
-    /*
-     * We have connected, but must fail for other reasons.
-     * Send NBD_CMD_DISC as a courtesy to the server.
-     */
-    {
-        NBDRequest request = { .type = NBD_CMD_DISC };
-
-        nbd_send_request(s->ioc ?: QIO_CHANNEL(sioc), &request);
-
-        object_unref(OBJECT(sioc));
-        s->sioc = NULL;
-
-        return ret;
-    }
+    nbd_teardown_connection(bs);
 }
 
+
 /*
  * Parse nbd_open options
  */
@@ -2092,6 +1739,12 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
         goto done;
     }
 
+    if (socket_address_parse_named_fd(saddr, errp) < 0) {
+        qapi_free_SocketAddress(saddr);
+        saddr = NULL;
+        goto done;
+    }
+
 done:
     qobject_unref(addr);
     visit_free(iv);
@@ -2118,9 +1771,9 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
         return NULL;
     }
 
-    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
-        error_setg(errp,
-                   "Expecting TLS credentials with a client endpoint");
+    if (!qcrypto_tls_creds_check_endpoint(creds,
+                                          QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT,
+                                          errp)) {
         return NULL;
     }
     object_ref(obj);
@@ -2157,6 +1810,11 @@ static QemuOptsList nbd_runtime_opts = {
             .type = QEMU_OPT_STRING,
             .help = "ID of the TLS credentials to use",
         },
+        {
+            .name = "tls-hostname",
+            .type = QEMU_OPT_STRING,
+            .help = "Override hostname for validating TLS x509 certificate",
+        },
         {
             .name = "x-dirty-bitmap",
             .type = QEMU_OPT_STRING,
@@ -2174,6 +1832,15 @@ static QemuOptsList nbd_runtime_opts = {
                     "future requests before a successful reconnect will "
                     "immediately fail. Default 0",
         },
+        {
+            .name = "open-timeout",
+            .type = QEMU_OPT_NUMBER,
+            .help = "In seconds. If zero, the nbd driver tries the connection "
+                    "only once, and fails to open if the connection fails. "
+                    "If non-zero, the nbd driver will repeat connection "
+                    "attempts until successful or until @open-timeout seconds "
+                    "have elapsed. Default 0",
+        },
         { /* end of list */ }
     },
 };
@@ -2214,12 +1881,11 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
             goto error;
         }
 
-        /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */
-        if (s->saddr->type != SOCKET_ADDRESS_TYPE_INET) {
-            error_setg(errp, "TLS only supported over IP sockets");
-            goto error;
+        s->tlshostname = g_strdup(qemu_opt_get(opts, "tls-hostname"));
+        if (!s->tlshostname &&
+            s->saddr->type == SOCKET_ADDRESS_TYPE_INET) {
+            s->tlshostname = g_strdup(s->saddr->u.inet.host);
         }
-        s->hostname = s->saddr->u.inet.host;
     }
 
     s->x_dirty_bitmap = g_strdup(qemu_opt_get(opts, "x-dirty-bitmap"));
@@ -2229,13 +1895,11 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
     }
 
     s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0);
+    s->open_timeout = qemu_opt_get_number(opts, "open-timeout", 0);
 
     ret = 0;
 
  error:
-    if (ret < 0) {
-        nbd_clear_bdrvstate(s);
-    }
     qemu_opts_del(opts);
     return ret;
 }
@@ -2245,46 +1909,53 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
 {
     int ret;
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-    QIOChannelSocket *sioc;
+
+    s->bs = bs;
+    qemu_mutex_init(&s->requests_lock);
+    qemu_co_queue_init(&s->free_sema);
+    qemu_co_mutex_init(&s->send_mutex);
+    qemu_co_mutex_init(&s->receive_mutex);
+
+    if (!yank_register_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name), errp)) {
+        return -EEXIST;
+    }
 
     ret = nbd_process_options(bs, options, errp);
     if (ret < 0) {
-        return ret;
+        goto fail;
     }
 
-    s->bs = bs;
-    qemu_co_mutex_init(&s->send_mutex);
-    qemu_co_queue_init(&s->free_sema);
+    s->conn = nbd_client_connection_new(s->saddr, true, s->export,
+                                        s->x_dirty_bitmap, s->tlscreds,
+                                        s->tlshostname);
 
-    /*
-     * establish TCP connection, return error if it fails
-     * TODO: Configurable retry-until-timeout behaviour.
-     */
-    sioc = nbd_establish_connection(s->saddr, errp);
-    if (!sioc) {
-        return -ECONNREFUSED;
+    if (s->open_timeout) {
+        nbd_client_connection_enable_retry(s->conn);
+        open_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+                        s->open_timeout * NANOSECONDS_PER_SECOND);
     }
 
-    ret = nbd_client_handshake(bs, sioc, errp);
+    s->state = NBD_CLIENT_CONNECTING_WAIT;
+    ret = nbd_do_establish_connection(bs, true, errp);
     if (ret < 0) {
-        nbd_clear_bdrvstate(s);
-        return ret;
+        goto fail;
     }
-    /* successfully connected */
-    s->state = NBD_CLIENT_CONNECTED;
 
-    nbd_init_connect_thread(s);
+    /*
+     * The connect attempt is done, so we no longer need this timer.
+     * Delete it, because we do not want it to be around when this node
+     * is drained or closed.
+     */
+    open_timer_del(s);
 
-    s->connection_co = qemu_coroutine_create(nbd_connection_entry, s);
-    bdrv_inc_in_flight(bs);
-    aio_co_schedule(bdrv_get_aio_context(bs), s->connection_co);
+    nbd_client_connection_enable_retry(s->conn);
 
     return 0;
-}
 
-static int nbd_co_flush(BlockDriverState *bs)
-{
-    return nbd_client_co_flush(bs);
+fail:
+    open_timer_del(s);
+    nbd_clear_bdrvstate(bs);
+    return ret;
 }
 
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
@@ -2315,6 +1986,14 @@ static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.max_pwrite_zeroes = max;
     bs->bl.max_transfer = max;
 
+    /*
+     * Assume that if the server supports extended headers, it also
+     * supports unlimited size zero and trim commands.
+     */
+    if (s->info.mode >= NBD_MODE_EXTENDED) {
+        bs->bl.max_pdiscard = bs->bl.max_pwrite_zeroes = 0;
+    }
+
     if (s->info.opt_block &&
         s->info.opt_block > bs->bl.opt_transfer) {
         bs->bl.opt_transfer = s->info.opt_block;
@@ -2323,10 +2002,8 @@ static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 
 static void nbd_close(BlockDriverState *bs)
 {
-    BDRVNBDState *s = bs->opaque;
-
     nbd_client_close(bs);
-    nbd_clear_bdrvstate(s);
+    nbd_clear_bdrvstate(bs);
 }
 
 /*
@@ -2356,7 +2033,7 @@ static int coroutine_fn nbd_co_truncate(BlockDriverState *bs, int64_t offset,
     return 0;
 }
 
-static int64_t nbd_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn nbd_co_getlength(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
 
@@ -2414,11 +2091,55 @@ static const char *const nbd_strong_runtime_opts[] = {
     "port",
     "export",
     "tls-creds",
+    "tls-hostname",
     "server.",
 
     NULL
 };
 
+static void nbd_cancel_in_flight(BlockDriverState *bs)
+{
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    reconnect_delay_timer_del(s);
+
+    qemu_mutex_lock(&s->requests_lock);
+    if (s->state == NBD_CLIENT_CONNECTING_WAIT) {
+        s->state = NBD_CLIENT_CONNECTING_NOWAIT;
+    }
+    qemu_mutex_unlock(&s->requests_lock);
+
+    nbd_co_establish_connection_cancel(s->conn);
+}
+
+static void nbd_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+    BDRVNBDState *s = bs->opaque;
+
+    /* The open_timer is used only during nbd_open() */
+    assert(!s->open_timer);
+
+    /*
+     * The reconnect_delay_timer is scheduled in I/O paths when the
+     * connection is lost, to cancel the reconnection attempt after a
+     * given time.  Once this attempt is done (successfully or not),
+     * nbd_reconnect_attempt() ensures the timer is deleted before the
+     * respective I/O request is resumed.
+     * Since the AioContext can only be changed when a node is drained,
+     * the reconnect_delay_timer cannot be active here.
+     */
+    assert(!s->reconnect_delay_timer);
+}
+
+static void nbd_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVNBDState *s = bs->opaque;
+
+    assert(!s->open_timer);
+    assert(!s->reconnect_delay_timer);
+}
+
 static BlockDriver bdrv_nbd = {
     .format_name                = "nbd",
     .protocol_name              = "nbd",
@@ -2432,19 +2153,19 @@ static BlockDriver bdrv_nbd = {
     .bdrv_co_pwritev            = nbd_client_co_pwritev,
     .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
     .bdrv_close                 = nbd_close,
-    .bdrv_co_flush_to_os        = nbd_co_flush,
+    .bdrv_co_flush_to_os        = nbd_client_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
     .bdrv_refresh_limits        = nbd_refresh_limits,
     .bdrv_co_truncate           = nbd_co_truncate,
-    .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
-    .bdrv_co_drain_begin        = nbd_client_co_drain_begin,
-    .bdrv_co_drain_end          = nbd_client_co_drain_end,
+    .bdrv_co_getlength          = nbd_co_getlength,
     .bdrv_refresh_filename      = nbd_refresh_filename,
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
     .strong_runtime_opts        = nbd_strong_runtime_opts,
+    .bdrv_cancel_in_flight      = nbd_cancel_in_flight,
+
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
 };
 
 static BlockDriver bdrv_nbd_tcp = {
@@ -2460,19 +2181,19 @@ static BlockDriver bdrv_nbd_tcp = {
     .bdrv_co_pwritev            = nbd_client_co_pwritev,
     .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
     .bdrv_close                 = nbd_close,
-    .bdrv_co_flush_to_os        = nbd_co_flush,
+    .bdrv_co_flush_to_os        = nbd_client_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
     .bdrv_refresh_limits        = nbd_refresh_limits,
     .bdrv_co_truncate           = nbd_co_truncate,
-    .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
-    .bdrv_co_drain_begin        = nbd_client_co_drain_begin,
-    .bdrv_co_drain_end          = nbd_client_co_drain_end,
+    .bdrv_co_getlength          = nbd_co_getlength,
     .bdrv_refresh_filename      = nbd_refresh_filename,
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
     .strong_runtime_opts        = nbd_strong_runtime_opts,
+    .bdrv_cancel_in_flight      = nbd_cancel_in_flight,
+
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
 };
 
 static BlockDriver bdrv_nbd_unix = {
@@ -2488,19 +2209,19 @@ static BlockDriver bdrv_nbd_unix = {
     .bdrv_co_pwritev            = nbd_client_co_pwritev,
     .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
     .bdrv_close                 = nbd_close,
-    .bdrv_co_flush_to_os        = nbd_co_flush,
+    .bdrv_co_flush_to_os        = nbd_client_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
     .bdrv_refresh_limits        = nbd_refresh_limits,
     .bdrv_co_truncate           = nbd_co_truncate,
-    .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
-    .bdrv_co_drain_begin        = nbd_client_co_drain_begin,
-    .bdrv_co_drain_end          = nbd_client_co_drain_end,
+    .bdrv_co_getlength          = nbd_co_getlength,
     .bdrv_refresh_filename      = nbd_refresh_filename,
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
     .strong_runtime_opts        = nbd_strong_runtime_opts,
+    .bdrv_cancel_in_flight      = nbd_cancel_in_flight,
+
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
 };
 
 static void bdrv_nbd_init(void)
index 8c1968bb415d9a9e542988fd51128b9f302caa1c..f737e19cd321008fadf8cff56a7085e52f81d4b2 100644 (file)
@@ -30,6 +30,7 @@
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "block/qdict.h"
 #include "trace.h"
@@ -39,7 +40,6 @@
 #include "qemu/option.h"
 #include "qemu/uri.h"
 #include "qemu/cutils.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/replay.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "qapi/qmp/qdict.h"
@@ -114,13 +114,13 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
     qdict_put_str(options, "path", uri->path);
 
     for (i = 0; i < qp->n; i++) {
-        unsigned long long val;
+        uint64_t val;
         if (!qp->p[i].value) {
             error_setg(errp, "Value for NFS parameter expected: %s",
                        qp->p[i].name);
             goto out;
         }
-        if (parse_uint_full(qp->p[i].value, &val, 0)) {
+        if (parse_uint_full(qp->p[i].value, 0, &val)) {
             error_setg(errp, "Illegal value for NFS parameter: %s",
                        qp->p[i].name);
             goto out;
@@ -148,9 +148,7 @@ out:
     if (qp) {
         query_params_free(qp);
     }
-    if (uri) {
-        uri_free(uri);
-    }
+    uri_free(uri);
     return ret;
 }
 
@@ -197,10 +195,9 @@ static void nfs_set_events(NFSClient *client)
     int ev = nfs_which_events(client->context);
     if (ev != client->events) {
         aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                           false,
                            (ev & POLLIN) ? nfs_process_read : NULL,
                            (ev & POLLOUT) ? nfs_process_write : NULL,
-                           NULL, client);
+                           NULL, NULL, client);
 
     }
     client->events = ev;
@@ -226,7 +223,7 @@ static void nfs_process_write(void *arg)
     qemu_mutex_unlock(&client->mutex);
 }
 
-static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
+static void coroutine_fn nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
 {
     *task = (NFSRPC) {
         .co             = qemu_coroutine_self(),
@@ -265,9 +262,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                                      nfs_co_generic_bh_cb, task);
 }
 
-static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                      uint64_t bytes, QEMUIOVector *iov,
-                                      int flags)
+static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, int64_t offset,
+                                      int64_t bytes, QEMUIOVector *iov,
+                                      BdrvRequestFlags flags)
 {
     NFSClient *client = bs->opaque;
     NFSRPC task;
@@ -299,9 +296,9 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
     return 0;
 }
 
-static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, QEMUIOVector *iov,
-                                       int flags)
+static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, int64_t offset,
+                                       int64_t bytes, QEMUIOVector *iov,
+                                       BdrvRequestFlags flags)
 {
     NFSClient *client = bs->opaque;
     NFSRPC task;
@@ -375,7 +372,7 @@ static void nfs_detach_aio_context(BlockDriverState *bs)
     NFSClient *client = bs->opaque;
 
     aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                       false, NULL, NULL, NULL, NULL);
+                       NULL, NULL, NULL, NULL, NULL);
     client->events = 0;
 }
 
@@ -393,7 +390,7 @@ static void nfs_client_close(NFSClient *client)
     if (client->context) {
         qemu_mutex_lock(&client->mutex);
         aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                           false, NULL, NULL, NULL, NULL);
+                           NULL, NULL, NULL, NULL, NULL);
         qemu_mutex_unlock(&client->mutex);
         if (client->fh) {
             nfs_close(client->context, client->fh);
@@ -421,7 +418,11 @@ static int64_t nfs_client_open(NFSClient *client, BlockdevOptionsNfs *opts,
                                int flags, int open_flags, Error **errp)
 {
     int64_t ret = -EINVAL;
+#ifdef _WIN32
+    struct __stat64 st;
+#else
     struct stat st;
+#endif
     char *file = NULL, *strp = NULL;
 
     qemu_mutex_init(&client->mutex);
@@ -724,13 +725,11 @@ nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
     if (task->ret < 0) {
         error_report("NFS Error: %s", nfs_get_error(nfs));
     }
-
-    /* Set task->complete before reading bs->wakeup.  */
-    qatomic_mb_set(&task->complete, 1);
-    bdrv_wakeup(task->bs);
+    replay_bh_schedule_oneshot_event(task->client->aio_context,
+                                     nfs_co_generic_bh_cb, task);
 }
 
-static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
+static int64_t coroutine_fn nfs_co_get_allocated_file_size(BlockDriverState *bs)
 {
     NFSClient *client = bs->opaque;
     NFSRPC task = {0};
@@ -741,15 +740,19 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
         return client->st_blocks * 512;
     }
 
-    task.bs = bs;
+    nfs_co_init_task(bs, &task);
     task.st = &st;
-    if (nfs_fstat_async(client->context, client->fh, nfs_get_allocated_file_size_cb,
-                        &task) != 0) {
-        return -ENOMEM;
-    }
+    WITH_QEMU_LOCK_GUARD(&client->mutex) {
+        if (nfs_fstat_async(client->context, client->fh, nfs_get_allocated_file_size_cb,
+                            &task) != 0) {
+            return -ENOMEM;
+        }
 
-    nfs_set_events(client);
-    BDRV_POLL_WHILE(bs, !task.complete);
+        nfs_set_events(client);
+    }
+    while (!task.complete) {
+        qemu_coroutine_yield();
+    }
 
     return (task.ret < 0 ? task.ret : st.st_blocks * 512);
 }
@@ -784,7 +787,11 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
                               BlockReopenQueue *queue, Error **errp)
 {
     NFSClient *client = state->bs->opaque;
+#ifdef _WIN32
+    struct __stat64 st;
+#else
     struct stat st;
+#endif
     int ret = 0;
 
     if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
@@ -836,7 +843,7 @@ static void nfs_refresh_filename(BlockDriverState *bs)
     }
 }
 
-static char *nfs_dirname(BlockDriverState *bs, Error **errp)
+static char * GRAPH_RDLOCK nfs_dirname(BlockDriverState *bs, Error **errp)
 {
     NFSClient *client = bs->opaque;
 
@@ -879,7 +886,7 @@ static BlockDriver bdrv_nfs = {
     .bdrv_has_zero_init             = nfs_has_zero_init,
 /* libnfs does not provide the allocated filesize of a file on win32. */
 #if !defined(_WIN32)
-    .bdrv_get_allocated_file_size   = nfs_get_allocated_file_size,
+    .bdrv_co_get_allocated_file_size = nfs_co_get_allocated_file_size,
 #endif
     .bdrv_co_truncate               = nfs_file_co_truncate,
 
index cc9b1d4ea720f5bb1992e3929223987f0befd87e..4808704ffd3a1f04eeb04a53d32de9143a6d5e9c 100644 (file)
@@ -16,6 +16,7 @@
 #include "qapi/qmp/qstring.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "sysemu/replay.h"
 
@@ -99,7 +100,7 @@ static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
     return ret;
 }
 
-static int64_t null_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn null_co_getlength(BlockDriverState *bs)
 {
     BDRVNullState *s = bs->opaque;
     return s->length;
@@ -116,8 +117,9 @@ static coroutine_fn int null_co_common(BlockDriverState *bs)
 }
 
 static coroutine_fn int null_co_preadv(BlockDriverState *bs,
-                                       uint64_t offset, uint64_t bytes,
-                                       QEMUIOVector *qiov, int flags)
+                                       int64_t offset, int64_t bytes,
+                                       QEMUIOVector *qiov,
+                                       BdrvRequestFlags flags)
 {
     BDRVNullState *s = bs->opaque;
 
@@ -129,8 +131,9 @@ static coroutine_fn int null_co_preadv(BlockDriverState *bs,
 }
 
 static coroutine_fn int null_co_pwritev(BlockDriverState *bs,
-                                        uint64_t offset, uint64_t bytes,
-                                        QEMUIOVector *qiov, int flags)
+                                        int64_t offset, int64_t bytes,
+                                        QEMUIOVector *qiov,
+                                        BdrvRequestFlags flags)
 {
     return null_co_common(bs);
 }
@@ -187,8 +190,8 @@ static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
 }
 
 static BlockAIOCB *null_aio_preadv(BlockDriverState *bs,
-                                   uint64_t offset, uint64_t bytes,
-                                   QEMUIOVector *qiov, int flags,
+                                   int64_t offset, int64_t bytes,
+                                   QEMUIOVector *qiov, BdrvRequestFlags flags,
                                    BlockCompletionFunc *cb,
                                    void *opaque)
 {
@@ -202,8 +205,8 @@ static BlockAIOCB *null_aio_preadv(BlockDriverState *bs,
 }
 
 static BlockAIOCB *null_aio_pwritev(BlockDriverState *bs,
-                                    uint64_t offset, uint64_t bytes,
-                                    QEMUIOVector *qiov, int flags,
+                                    int64_t offset, int64_t bytes,
+                                    QEMUIOVector *qiov, BdrvRequestFlags flags,
                                     BlockCompletionFunc *cb,
                                     void *opaque)
 {
@@ -262,7 +265,8 @@ static void null_refresh_filename(BlockDriverState *bs)
              bs->drv->format_name);
 }
 
-static int64_t null_allocated_file_size(BlockDriverState *bs)
+static int64_t coroutine_fn
+null_co_get_allocated_file_size(BlockDriverState *bs)
 {
     return 0;
 }
@@ -281,8 +285,8 @@ static BlockDriver bdrv_null_co = {
 
     .bdrv_file_open         = null_file_open,
     .bdrv_parse_filename    = null_co_parse_filename,
-    .bdrv_getlength         = null_getlength,
-    .bdrv_get_allocated_file_size = null_allocated_file_size,
+    .bdrv_co_getlength      = null_co_getlength,
+    .bdrv_co_get_allocated_file_size = null_co_get_allocated_file_size,
 
     .bdrv_co_preadv         = null_co_preadv,
     .bdrv_co_pwritev        = null_co_pwritev,
@@ -302,8 +306,8 @@ static BlockDriver bdrv_null_aio = {
 
     .bdrv_file_open         = null_file_open,
     .bdrv_parse_filename    = null_aio_parse_filename,
-    .bdrv_getlength         = null_getlength,
-    .bdrv_get_allocated_file_size = null_allocated_file_size,
+    .bdrv_co_getlength      = null_co_getlength,
+    .bdrv_co_get_allocated_file_size = null_co_get_allocated_file_size,
 
     .bdrv_aio_preadv        = null_aio_preadv,
     .bdrv_aio_pwritev       = null_aio_pwritev,
index a06a188d530063d02fbacae29e0512f053ba15ef..0a0a0a6b36cd0c1b08f7098b03cc8d5a445ed190 100644 (file)
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
+#include "qemu/defer-call.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
 #include "qemu/cutils.h"
 #include "qemu/option.h"
+#include "qemu/memalign.h"
 #include "qemu/vfio-helpers.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "sysemu/replay.h"
 #include "trace.h"
 
@@ -117,7 +121,6 @@ struct BDRVNVMeState {
     int blkshift;
 
     uint64_t max_transfer;
-    bool plugged;
 
     bool supports_write_zeroes;
     bool supports_discard;
@@ -168,31 +171,35 @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
     size_t bytes;
     int r;
 
-    bytes = ROUND_UP(nentries * entry_bytes, qemu_real_host_page_size);
+    bytes = ROUND_UP(nentries * entry_bytes, qemu_real_host_page_size());
     q->head = q->tail = 0;
-    q->queue = qemu_try_memalign(qemu_real_host_page_size, bytes);
+    q->queue = qemu_try_memalign(qemu_real_host_page_size(), bytes);
     if (!q->queue) {
         error_setg(errp, "Cannot allocate queue");
         return false;
     }
     memset(q->queue, 0, bytes);
-    r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova);
+    r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova, errp);
     if (r) {
-        error_setg(errp, "Cannot map queue");
-        return false;
+        error_prepend(errp, "Cannot map queue: ");
     }
-    return true;
+    return r == 0;
+}
+
+static void nvme_free_queue(NVMeQueue *q)
+{
+    qemu_vfree(q->queue);
 }
 
 static void nvme_free_queue_pair(NVMeQueuePair *q)
 {
-    trace_nvme_free_queue_pair(q->index, q);
+    trace_nvme_free_queue_pair(q->index, q, &q->cq, &q->sq);
     if (q->completion_bh) {
         qemu_bh_delete(q->completion_bh);
     }
+    nvme_free_queue(&q->sq);
+    nvme_free_queue(&q->cq);
     qemu_vfree(q->prp_list_pages);
-    qemu_vfree(q->sq.queue);
-    qemu_vfree(q->cq.queue);
     qemu_mutex_destroy(&q->lock);
     g_free(q);
 }
@@ -202,8 +209,9 @@ static void nvme_free_req_queue_cb(void *opaque)
     NVMeQueuePair *q = opaque;
 
     qemu_mutex_lock(&q->lock);
-    while (qemu_co_enter_next(&q->free_req_queue, &q->lock)) {
-        /* Retry all pending requests */
+    while (q->free_req_head != -1 &&
+           qemu_co_enter_next(&q->free_req_queue, &q->lock)) {
+        /* Retry waiting requests */
     }
     qemu_mutex_unlock(&q->lock);
 }
@@ -220,14 +228,16 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
 
     q = g_try_new0(NVMeQueuePair, 1);
     if (!q) {
+        error_setg(errp, "Cannot allocate queue pair");
         return NULL;
     }
     trace_nvme_create_queue_pair(idx, q, size, aio_context,
                                  event_notifier_get_fd(s->irq_notifier));
     bytes = QEMU_ALIGN_UP(s->page_size * NVME_NUM_REQS,
-                          qemu_real_host_page_size);
-    q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes);
+                          qemu_real_host_page_size());
+    q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size(), bytes);
     if (!q->prp_list_pages) {
+        error_setg(errp, "Cannot allocate PRP page list");
         goto fail;
     }
     memset(q->prp_list_pages, 0, bytes);
@@ -237,8 +247,9 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
     qemu_co_queue_init(&q->free_req_queue);
     q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q);
     r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes,
-                          false, &prp_list_iova);
+                          false, &prp_list_iova, errp);
     if (r) {
+        error_prepend(errp, "Cannot map buffer for DMA: ");
         goto fail;
     }
     q->free_req_head = -1;
@@ -272,7 +283,7 @@ static void nvme_kick(NVMeQueuePair *q)
 {
     BDRVNVMeState *s = q->s;
 
-    if (s->plugged || !q->need_kick) {
+    if (!q->need_kick) {
         return;
     }
     trace_nvme_kick(s, q->index);
@@ -284,34 +295,42 @@ static void nvme_kick(NVMeQueuePair *q)
     q->need_kick = 0;
 }
 
-/* Find a free request element if any, otherwise:
- * a) if in coroutine context, try to wait for one to become available;
- * b) if not in coroutine, return NULL;
- */
-static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q)
+static NVMeRequest *nvme_get_free_req_nofail_locked(NVMeQueuePair *q)
 {
     NVMeRequest *req;
 
-    qemu_mutex_lock(&q->lock);
-
-    while (q->free_req_head == -1) {
-        if (qemu_in_coroutine()) {
-            trace_nvme_free_req_queue_wait(q->s, q->index);
-            qemu_co_queue_wait(&q->free_req_queue, &q->lock);
-        } else {
-            qemu_mutex_unlock(&q->lock);
-            return NULL;
-        }
-    }
-
     req = &q->reqs[q->free_req_head];
     q->free_req_head = req->free_req_next;
     req->free_req_next = -1;
-
-    qemu_mutex_unlock(&q->lock);
     return req;
 }
 
+/* Return a free request element if any, otherwise return NULL.  */
+static NVMeRequest *nvme_get_free_req_nowait(NVMeQueuePair *q)
+{
+    QEMU_LOCK_GUARD(&q->lock);
+    if (q->free_req_head == -1) {
+        return NULL;
+    }
+    return nvme_get_free_req_nofail_locked(q);
+}
+
+/*
+ * Wait for a free request to become available if necessary, then
+ * return it.
+ */
+static coroutine_fn NVMeRequest *nvme_get_free_req(NVMeQueuePair *q)
+{
+    QEMU_LOCK_GUARD(&q->lock);
+
+    while (q->free_req_head == -1) {
+        trace_nvme_free_req_queue_wait(q->s, q->index);
+        qemu_co_queue_wait(&q->free_req_queue, &q->lock);
+    }
+
+    return nvme_get_free_req_nofail_locked(q);
+}
+
 /* With q->lock */
 static void nvme_put_free_req_locked(NVMeQueuePair *q, NVMeRequest *req)
 {
@@ -369,10 +388,6 @@ static bool nvme_process_completion(NVMeQueuePair *q)
     NvmeCqe *c;
 
     trace_nvme_process_completion(s, q->index, q->inflight);
-    if (s->plugged) {
-        trace_nvme_process_completion_queue_plugged(s, q->index);
-        return false;
-    }
 
     /*
      * Support re-entrancy when a request cb() function invokes aio_poll().
@@ -402,9 +417,10 @@ static bool nvme_process_completion(NVMeQueuePair *q)
             q->cq_phase = !q->cq_phase;
         }
         cid = le16_to_cpu(c->cid);
-        if (cid == 0 || cid > NVME_QUEUE_SIZE) {
-            warn_report("NVMe: Unexpected CID in completion queue: %"PRIu32", "
-                        "queue size: %u", cid, NVME_QUEUE_SIZE);
+        if (cid == 0 || cid > NVME_NUM_REQS) {
+            warn_report("NVMe: Unexpected CID in completion queue: %" PRIu32
+                        ", should be within: 1..%u inclusively", cid,
+                        NVME_NUM_REQS);
             continue;
         }
         trace_nvme_complete_command(s, q->index, cid);
@@ -462,6 +478,15 @@ static void nvme_trace_command(const NvmeCmd *cmd)
     }
 }
 
+static void nvme_deferred_fn(void *opaque)
+{
+    NVMeQueuePair *q = opaque;
+
+    QEMU_LOCK_GUARD(&q->lock);
+    nvme_kick(q);
+    nvme_process_completion(q);
+}
+
 static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
                                 NvmeCmd *cmd, BlockCompletionFunc cb,
                                 void *opaque)
@@ -478,9 +503,9 @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
            q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd));
     q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE;
     q->need_kick++;
-    nvme_kick(q);
-    nvme_process_completion(q);
     qemu_mutex_unlock(&q->lock);
+
+    defer_call(nvme_deferred_fn, q);
 }
 
 static void nvme_admin_cmd_sync_cb(void *opaque, int ret)
@@ -497,7 +522,7 @@ static int nvme_admin_cmd_sync(BlockDriverState *bs, NvmeCmd *cmd)
     AioContext *aio_context = bdrv_get_aio_context(bs);
     NVMeRequest *req;
     int ret = -EINPROGRESS;
-    req = nvme_get_free_req(q);
+    req = nvme_get_free_req_nowait(q);
     if (!req) {
         return -EBUSY;
     }
@@ -512,10 +537,10 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
 {
     BDRVNVMeState *s = bs->opaque;
     bool ret = false;
-    union {
+    QEMU_AUTO_VFREE union {
         NvmeIdCtrl ctrl;
         NvmeIdNs ns;
-    } *id;
+    } *id = NULL;
     NvmeLBAF *lbaf;
     uint16_t oncs;
     int r;
@@ -524,16 +549,16 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
         .opcode = NVME_ADM_CMD_IDENTIFY,
         .cdw10 = cpu_to_le32(0x1),
     };
-    size_t id_size = QEMU_ALIGN_UP(sizeof(*id), qemu_real_host_page_size);
+    size_t id_size = QEMU_ALIGN_UP(sizeof(*id), qemu_real_host_page_size());
 
-    id = qemu_try_memalign(qemu_real_host_page_size, id_size);
+    id = qemu_try_memalign(qemu_real_host_page_size(), id_size);
     if (!id) {
         error_setg(errp, "Cannot allocate buffer for identify response");
         goto out;
     }
-    r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova);
+    r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova, errp);
     if (r) {
-        error_setg(errp, "Cannot map buffer for DMA");
+        error_prepend(errp, "Cannot map buffer for DMA: ");
         goto out;
     }
 
@@ -593,15 +618,12 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
     s->blkshift = lbaf->ds;
 out:
     qemu_vfio_dma_unmap(s->vfio, id);
-    qemu_vfree(id);
 
     return ret;
 }
 
-static bool nvme_poll_queue(NVMeQueuePair *q)
+static void nvme_poll_queue(NVMeQueuePair *q)
 {
-    bool progress = false;
-
     const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES;
     NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset];
 
@@ -612,30 +634,23 @@ static bool nvme_poll_queue(NVMeQueuePair *q)
      * cannot race with itself.
      */
     if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) {
-        return false;
+        return;
     }
 
     qemu_mutex_lock(&q->lock);
     while (nvme_process_completion(q)) {
         /* Keep polling */
-        progress = true;
     }
     qemu_mutex_unlock(&q->lock);
-
-    return progress;
 }
 
-static bool nvme_poll_queues(BDRVNVMeState *s)
+static void nvme_poll_queues(BDRVNVMeState *s)
 {
-    bool progress = false;
     int i;
 
     for (i = 0; i < s->queue_count; i++) {
-        if (nvme_poll_queue(s->queues[i])) {
-            progress = true;
-        }
+        nvme_poll_queue(s->queues[i]);
     }
-    return progress;
 }
 
 static void nvme_handle_event(EventNotifier *n)
@@ -694,10 +709,32 @@ out_error:
 static bool nvme_poll_cb(void *opaque)
 {
     EventNotifier *e = opaque;
+    BDRVNVMeState *s = container_of(e, BDRVNVMeState,
+                                    irq_notifier[MSIX_SHARED_IRQ_IDX]);
+    int i;
+
+    for (i = 0; i < s->queue_count; i++) {
+        NVMeQueuePair *q = s->queues[i];
+        const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES;
+        NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset];
+
+        /*
+         * q->lock isn't needed because nvme_process_completion() only runs in
+         * the event loop thread and cannot race with itself.
+         */
+        if ((le16_to_cpu(cqe->status) & 0x1) != q->cq_phase) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static void nvme_poll_ready(EventNotifier *e)
+{
     BDRVNVMeState *s = container_of(e, BDRVNVMeState,
                                     irq_notifier[MSIX_SHARED_IRQ_IDX]);
 
-    return nvme_poll_queues(s);
+    nvme_poll_queues(s);
 }
 
 static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
@@ -708,6 +745,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
     AioContext *aio_context = bdrv_get_aio_context(bs);
     int ret;
     uint64_t cap;
+    uint32_t ver;
     uint64_t timeout_ms;
     uint64_t deadline, now;
     volatile NvmeBar *regs = NULL;
@@ -745,7 +783,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
     trace_nvme_controller_capability("Contiguous Queues Required",
                                      NVME_CAP_CQR(cap));
     trace_nvme_controller_capability("Doorbell Stride",
-                                     2 << (2 + NVME_CAP_DSTRD(cap)));
+                                     1 << (2 + NVME_CAP_DSTRD(cap)));
     trace_nvme_controller_capability("Subsystem Reset Supported",
                                      NVME_CAP_NSSRS(cap));
     trace_nvme_controller_capability("Memory Page Size Minimum",
@@ -764,6 +802,11 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
     bs->bl.request_alignment = s->page_size;
     timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000);
 
+    ver = le32_to_cpu(regs->vs);
+    trace_nvme_controller_spec_version(extract32(ver, 16, 16),
+                                       extract32(ver, 8, 8),
+                                       extract32(ver, 0, 8));
+
     /* Reset device to get a clean state. */
     regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
     /* Wait for CSTS.RDY = 0. */
@@ -826,7 +869,8 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
     }
     aio_set_event_notifier(bdrv_get_aio_context(bs),
                            &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
-                           false, nvme_handle_event, nvme_poll_cb);
+                           nvme_handle_event, nvme_poll_cb,
+                           nvme_poll_ready);
 
     if (!nvme_identify(bs, namespace, errp)) {
         ret = -EIO;
@@ -911,7 +955,7 @@ static void nvme_close(BlockDriverState *bs)
     g_free(s->queues);
     aio_set_event_notifier(bdrv_get_aio_context(bs),
                            &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
-                           false, NULL, NULL);
+                           NULL, NULL, NULL);
     event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
     qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map,
                             0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE);
@@ -965,7 +1009,7 @@ fail:
     return ret;
 }
 
-static int64_t nvme_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn nvme_co_getlength(BlockDriverState *bs)
 {
     BDRVNVMeState *s = bs->opaque;
     return s->nsze << s->blkshift;
@@ -1011,6 +1055,7 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
     uint64_t *pagelist = req->prp_list_page;
     int i, j, r;
     int entries = 0;
+    Error *local_err = NULL, **errp = NULL;
 
     assert(qiov->size);
     assert(QEMU_IS_ALIGNED(qiov->size, s->page_size));
@@ -1019,12 +1064,34 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
         bool retry = true;
         uint64_t iova;
         size_t len = QEMU_ALIGN_UP(qiov->iov[i].iov_len,
-                                   qemu_real_host_page_size);
+                                   qemu_real_host_page_size());
 try_map:
         r = qemu_vfio_dma_map(s->vfio,
                               qiov->iov[i].iov_base,
-                              len, true, &iova);
+                              len, true, &iova, errp);
+        if (r == -ENOSPC) {
+            /*
+             * In addition to the -ENOMEM error, the VFIO_IOMMU_MAP_DMA
+             * ioctl returns -ENOSPC to signal the user exhausted the DMA
+             * mappings available for a container since Linux kernel commit
+             * 492855939bdb ("vfio/type1: Limit DMA mappings per container",
+             * April 2019, see CVE-2019-3882).
+             *
+             * This block driver already handles this error path by checking
+             * for the -ENOMEM error, so we directly replace -ENOSPC by
+             * -ENOMEM. Beside, -ENOSPC has a specific meaning for blockdev
+             * coroutines: it triggers BLOCKDEV_ON_ERROR_ENOSPC and
+             * BLOCK_ERROR_ACTION_STOP which stops the VM, asking the operator
+             * to add more storage to the blockdev. Not something we can do
+             * easily with an IOMMU :)
+             */
+            r = -ENOMEM;
+        }
         if (r == -ENOMEM && retry) {
+            /*
+             * We exhausted the DMA mappings available for our container:
+             * recycle the volatile IOVA mappings.
+             */
             retry = false;
             trace_nvme_dma_flush_queue_wait(s);
             if (s->dma_map_count) {
@@ -1036,6 +1103,8 @@ try_map:
                     goto fail;
                 }
             }
+            errp = &local_err;
+
             goto try_map;
         }
         if (r) {
@@ -1079,6 +1148,9 @@ fail:
      * because they are already mapped before calling this function; for
      * temporary mappings, a later nvme_cmd_(un)map_qiov will reclaim by
      * calling qemu_vfio_dma_reset_temporary when necessary. */
+    if (local_err) {
+        error_reportf_err(local_err, "Cannot map buffer for DMA: ");
+    }
     return r;
 }
 
@@ -1168,8 +1240,8 @@ static inline bool nvme_qiov_aligned(BlockDriverState *bs,
 
     for (i = 0; i < qiov->niov; ++i) {
         if (!QEMU_PTR_IS_ALIGNED(qiov->iov[i].iov_base,
-                                 qemu_real_host_page_size) ||
-            !QEMU_IS_ALIGNED(qiov->iov[i].iov_len, qemu_real_host_page_size)) {
+                                 qemu_real_host_page_size()) ||
+            !QEMU_IS_ALIGNED(qiov->iov[i].iov_len, qemu_real_host_page_size())) {
             trace_nvme_qiov_unaligned(qiov, i, qiov->iov[i].iov_base,
                                       qiov->iov[i].iov_len, s->page_size);
             return false;
@@ -1178,14 +1250,16 @@ static inline bool nvme_qiov_aligned(BlockDriverState *bs,
     return true;
 }
 
-static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                       QEMUIOVector *qiov, bool is_write, int flags)
+static coroutine_fn int nvme_co_prw(BlockDriverState *bs,
+                                    uint64_t offset, uint64_t bytes,
+                                    QEMUIOVector *qiov, bool is_write,
+                                    int flags)
 {
     BDRVNVMeState *s = bs->opaque;
     int r;
-    uint8_t *buf = NULL;
+    QEMU_AUTO_VFREE uint8_t *buf = NULL;
     QEMUIOVector local_qiov;
-    size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size);
+    size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size());
     assert(QEMU_IS_ALIGNED(offset, s->page_size));
     assert(QEMU_IS_ALIGNED(bytes, s->page_size));
     assert(bytes <= s->max_transfer);
@@ -1195,7 +1269,7 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     }
     s->stats.unaligned_accesses++;
     trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write);
-    buf = qemu_try_memalign(qemu_real_host_page_size, len);
+    buf = qemu_try_memalign(qemu_real_host_page_size(), len);
 
     if (!buf) {
         return -ENOMEM;
@@ -1210,20 +1284,21 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     if (!r && !is_write) {
         qemu_iovec_from_buf(qiov, 0, buf, bytes);
     }
-    qemu_vfree(buf);
     return r;
 }
 
 static coroutine_fn int nvme_co_preadv(BlockDriverState *bs,
-                                       uint64_t offset, uint64_t bytes,
-                                       QEMUIOVector *qiov, int flags)
+                                       int64_t offset, int64_t bytes,
+                                       QEMUIOVector *qiov,
+                                       BdrvRequestFlags flags)
 {
     return nvme_co_prw(bs, offset, bytes, qiov, false, flags);
 }
 
 static coroutine_fn int nvme_co_pwritev(BlockDriverState *bs,
-                                        uint64_t offset, uint64_t bytes,
-                                        QEMUIOVector *qiov, int flags)
+                                        int64_t offset, int64_t bytes,
+                                        QEMUIOVector *qiov,
+                                        BdrvRequestFlags flags)
 {
     return nvme_co_prw(bs, offset, bytes, qiov, true, flags);
 }
@@ -1258,19 +1333,29 @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs)
 
 static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
                                               int64_t offset,
-                                              int bytes,
+                                              int64_t bytes,
                                               BdrvRequestFlags flags)
 {
     BDRVNVMeState *s = bs->opaque;
     NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
     NVMeRequest *req;
-
-    uint32_t cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF;
+    uint32_t cdw12;
 
     if (!s->supports_write_zeroes) {
         return -ENOTSUP;
     }
 
+    if (bytes == 0) {
+        return 0;
+    }
+
+    cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF;
+    /*
+     * We should not lose information. pwrite_zeroes_alignment and
+     * max_pwrite_zeroes guarantees it.
+     */
+    assert(((cdw12 + 1) << s->blkshift) == bytes);
+
     NvmeCmd cmd = {
         .opcode = NVME_CMD_WRITE_ZEROES,
         .nsid = cpu_to_le32(s->nsid),
@@ -1312,12 +1397,12 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
 
 static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
                                          int64_t offset,
-                                         int bytes)
+                                         int64_t bytes)
 {
     BDRVNVMeState *s = bs->opaque;
     NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
     NVMeRequest *req;
-    NvmeDsmRange *buf;
+    QEMU_AUTO_VFREE NvmeDsmRange *buf = NULL;
     QEMUIOVector local_qiov;
     int ret;
 
@@ -1339,6 +1424,14 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
 
     assert(s->queue_count > 1);
 
+    /*
+     * Filling the @buf requires @offset and @bytes to satisfy restrictions
+     * defined in nvme_refresh_limits().
+     */
+    assert(QEMU_IS_ALIGNED(bytes, 1UL << s->blkshift));
+    assert(QEMU_IS_ALIGNED(offset, 1UL << s->blkshift));
+    assert((bytes >> s->blkshift) <= UINT32_MAX);
+
     buf = qemu_try_memalign(s->page_size, s->page_size);
     if (!buf) {
         return -ENOMEM;
@@ -1384,11 +1477,33 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
     trace_nvme_dsm_done(s, offset, bytes, ret);
 out:
     qemu_iovec_destroy(&local_qiov);
-    qemu_vfree(buf);
     return ret;
 
 }
 
+static int coroutine_fn nvme_co_truncate(BlockDriverState *bs, int64_t offset,
+                                         bool exact, PreallocMode prealloc,
+                                         BdrvRequestFlags flags, Error **errp)
+{
+    int64_t cur_length;
+
+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_str(prealloc));
+        return -ENOTSUP;
+    }
+
+    cur_length = nvme_co_getlength(bs);
+    if (offset != cur_length && exact) {
+        error_setg(errp, "Cannot resize NVMe devices");
+        return -ENOTSUP;
+    } else if (offset > cur_length) {
+        error_setg(errp, "Cannot grow NVMe devices");
+        return -EINVAL;
+    }
+
+    return 0;
+}
 
 static int nvme_reopen_prepare(BDRVReopenState *reopen_state,
                                BlockReopenQueue *queue, Error **errp)
@@ -1411,6 +1526,18 @@ static void nvme_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.opt_mem_alignment = s->page_size;
     bs->bl.request_alignment = s->page_size;
     bs->bl.max_transfer = s->max_transfer;
+
+    /*
+     * Look at nvme_co_pwrite_zeroes: after shift and decrement we should get
+     * at most 0xFFFF
+     */
+    bs->bl.max_pwrite_zeroes = 1ULL << (s->blkshift + 16);
+    bs->bl.pwrite_zeroes_alignment = MAX(bs->bl.request_alignment,
+                                         1UL << s->blkshift);
+
+    bs->bl.max_pdiscard = (uint64_t)UINT32_MAX << s->blkshift;
+    bs->bl.pdiscard_alignment = MAX(bs->bl.request_alignment,
+                                    1UL << s->blkshift);
 }
 
 static void nvme_detach_aio_context(BlockDriverState *bs)
@@ -1426,7 +1553,7 @@ static void nvme_detach_aio_context(BlockDriverState *bs)
 
     aio_set_event_notifier(bdrv_get_aio_context(bs),
                            &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
-                           false, NULL, NULL);
+                           NULL, NULL, NULL);
 }
 
 static void nvme_attach_aio_context(BlockDriverState *bs,
@@ -1436,7 +1563,8 @@ static void nvme_attach_aio_context(BlockDriverState *bs,
 
     s->aio_context = new_context;
     aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
-                           false, nvme_handle_event, nvme_poll_cb);
+                           nvme_handle_event, nvme_poll_cb,
+                           nvme_poll_ready);
 
     for (unsigned i = 0; i < s->queue_count; i++) {
         NVMeQueuePair *q = s->queues[i];
@@ -1446,42 +1574,22 @@ static void nvme_attach_aio_context(BlockDriverState *bs,
     }
 }
 
-static void nvme_aio_plug(BlockDriverState *bs)
-{
-    BDRVNVMeState *s = bs->opaque;
-    assert(!s->plugged);
-    s->plugged = true;
-}
-
-static void nvme_aio_unplug(BlockDriverState *bs)
-{
-    BDRVNVMeState *s = bs->opaque;
-    assert(s->plugged);
-    s->plugged = false;
-    for (unsigned i = INDEX_IO(0); i < s->queue_count; i++) {
-        NVMeQueuePair *q = s->queues[i];
-        qemu_mutex_lock(&q->lock);
-        nvme_kick(q);
-        nvme_process_completion(q);
-        qemu_mutex_unlock(&q->lock);
-    }
-}
-
-static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
+static bool nvme_register_buf(BlockDriverState *bs, void *host, size_t size,
+                              Error **errp)
 {
     int ret;
     BDRVNVMeState *s = bs->opaque;
 
-    ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL);
-    if (ret) {
-        /* FIXME: we may run out of IOVA addresses after repeated
-         * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
-         * doesn't reclaim addresses for fixed mappings. */
-        error_report("nvme_register_buf failed: %s", strerror(-ret));
-    }
+    /*
+     * FIXME: we may run out of IOVA addresses after repeated
+     * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
+     * doesn't reclaim addresses for fixed mappings.
+     */
+    ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, errp);
+    return ret == 0;
 }
 
-static void nvme_unregister_buf(BlockDriverState *bs, void *host)
+static void nvme_unregister_buf(BlockDriverState *bs, void *host, size_t size)
 {
     BDRVNVMeState *s = bs->opaque;
 
@@ -1521,8 +1629,9 @@ static BlockDriver bdrv_nvme = {
     .bdrv_parse_filename      = nvme_parse_filename,
     .bdrv_file_open           = nvme_file_open,
     .bdrv_close               = nvme_close,
-    .bdrv_getlength           = nvme_getlength,
+    .bdrv_co_getlength        = nvme_co_getlength,
     .bdrv_probe_blocksizes    = nvme_probe_blocksizes,
+    .bdrv_co_truncate         = nvme_co_truncate,
 
     .bdrv_co_preadv           = nvme_co_preadv,
     .bdrv_co_pwritev          = nvme_co_pwritev,
@@ -1541,9 +1650,6 @@ static BlockDriver bdrv_nvme = {
     .bdrv_detach_aio_context  = nvme_detach_aio_context,
     .bdrv_attach_aio_context  = nvme_attach_aio_context,
 
-    .bdrv_io_plug             = nvme_aio_plug,
-    .bdrv_io_unplug           = nvme_aio_unplug,
-
     .bdrv_register_buf        = nvme_register_buf,
     .bdrv_unregister_buf      = nvme_unregister_buf,
 };
diff --git a/block/parallels-ext.c b/block/parallels-ext.c
new file mode 100644 (file)
index 0000000..b4e14c8
--- /dev/null
@@ -0,0 +1,302 @@
+/*
+ * Support of Parallels Format Extension. It's a part of Parallels format
+ * driver.
+ *
+ * Copyright (c) 2021 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "block/block-io.h"
+#include "block/block_int.h"
+#include "block/dirty-bitmap.h"
+#include "parallels.h"
+#include "crypto/hash.h"
+#include "qemu/uuid.h"
+#include "qemu/memalign.h"
+
+#define PARALLELS_FORMAT_EXTENSION_MAGIC 0xAB234CEF23DCEA87ULL
+
+#define PARALLELS_END_OF_FEATURES_MAGIC 0x0ULL
+#define PARALLELS_DIRTY_BITMAP_FEATURE_MAGIC 0x20385FAE252CB34AULL
+
+typedef struct ParallelsFormatExtensionHeader {
+    uint64_t magic; /* PARALLELS_FORMAT_EXTENSION_MAGIC */
+    uint8_t check_sum[16];
+} QEMU_PACKED ParallelsFormatExtensionHeader;
+
+typedef struct ParallelsFeatureHeader {
+    uint64_t magic;
+    uint64_t flags;
+    uint32_t data_size;
+    uint32_t _unused;
+} QEMU_PACKED ParallelsFeatureHeader;
+
+typedef struct ParallelsDirtyBitmapFeature {
+    uint64_t size;
+    uint8_t id[16];
+    uint32_t granularity;
+    uint32_t l1_size;
+    /* L1 table follows */
+} QEMU_PACKED ParallelsDirtyBitmapFeature;
+
+/* Given L1 table read bitmap data from the image and populate @bitmap */
+static int GRAPH_RDLOCK
+parallels_load_bitmap_data(BlockDriverState *bs, const uint64_t *l1_table,
+                           uint32_t l1_size, BdrvDirtyBitmap *bitmap,
+                           Error **errp)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int ret = 0;
+    uint64_t offset, limit;
+    uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
+    uint8_t *buf = NULL;
+    uint64_t i, tab_size =
+        DIV_ROUND_UP(bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size),
+                     s->cluster_size);
+
+    if (tab_size != l1_size) {
+        error_setg(errp, "Bitmap table size %" PRIu32 " does not correspond "
+                   "to bitmap size and cluster size. Expected %" PRIu64,
+                   l1_size, tab_size);
+        return -EINVAL;
+    }
+
+    buf = qemu_blockalign(bs, s->cluster_size);
+    limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap);
+    for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) {
+        uint64_t count = MIN(bm_size - offset, limit);
+        uint64_t entry = l1_table[i];
+
+        if (entry == 0) {
+            /* No need to deserialize zeros because @bitmap is cleared. */
+            continue;
+        }
+
+        if (entry == 1) {
+            bdrv_dirty_bitmap_deserialize_ones(bitmap, offset, count, false);
+        } else {
+            ret = bdrv_pread(bs->file, entry << BDRV_SECTOR_BITS,
+                             s->cluster_size, buf, 0);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret,
+                                 "Failed to read bitmap data cluster");
+                goto finish;
+            }
+            bdrv_dirty_bitmap_deserialize_part(bitmap, buf, offset, count,
+                                               false);
+        }
+    }
+    ret = 0;
+
+    bdrv_dirty_bitmap_deserialize_finish(bitmap);
+
+finish:
+    qemu_vfree(buf);
+
+    return ret;
+}
+
+/*
+ * @data buffer (of @data_size size) is the Dirty bitmaps feature which
+ * consists of ParallelsDirtyBitmapFeature followed by L1 table.
+ */
+static BdrvDirtyBitmap * GRAPH_RDLOCK
+parallels_load_bitmap(BlockDriverState *bs, uint8_t *data, size_t data_size,
+                      Error **errp)
+{
+    int ret;
+    ParallelsDirtyBitmapFeature bf;
+    g_autofree uint64_t *l1_table = NULL;
+    BdrvDirtyBitmap *bitmap;
+    QemuUUID uuid;
+    char uuidstr[UUID_STR_LEN];
+    int i;
+
+    if (data_size < sizeof(bf)) {
+        error_setg(errp, "Too small Bitmap Feature area in Parallels Format "
+                   "Extension: %zu bytes, expected at least %zu bytes",
+                   data_size, sizeof(bf));
+        return NULL;
+    }
+    memcpy(&bf, data, sizeof(bf));
+    bf.size = le64_to_cpu(bf.size);
+    bf.granularity = le32_to_cpu(bf.granularity) << BDRV_SECTOR_BITS;
+    bf.l1_size = le32_to_cpu(bf.l1_size);
+    data += sizeof(bf);
+    data_size -= sizeof(bf);
+
+    if (bf.size != bs->total_sectors) {
+        error_setg(errp, "Bitmap size (in sectors) %" PRId64 " differs from "
+                   "disk size in sectors %" PRId64, bf.size, bs->total_sectors);
+        return NULL;
+    }
+
+    if (bf.l1_size * sizeof(uint64_t) > data_size) {
+        error_setg(errp, "Bitmaps feature corrupted: l1 table exceeds "
+                   "extension data_size");
+        return NULL;
+    }
+
+    memcpy(&uuid, bf.id, sizeof(uuid));
+    qemu_uuid_unparse(&uuid, uuidstr);
+    bitmap = bdrv_create_dirty_bitmap(bs, bf.granularity, uuidstr, errp);
+    if (!bitmap) {
+        return NULL;
+    }
+
+    l1_table = g_new(uint64_t, bf.l1_size);
+    for (i = 0; i < bf.l1_size; i++, data += sizeof(uint64_t)) {
+        l1_table[i] = ldq_le_p(data);
+    }
+
+    ret = parallels_load_bitmap_data(bs, l1_table, bf.l1_size, bitmap, errp);
+    if (ret < 0) {
+        bdrv_release_dirty_bitmap(bitmap);
+        return NULL;
+    }
+
+    /* We support format extension only for RO parallels images. */
+    assert(!(bs->open_flags & BDRV_O_RDWR));
+    bdrv_dirty_bitmap_set_readonly(bitmap, true);
+
+    return bitmap;
+}
+
+static int GRAPH_RDLOCK
+parallels_parse_format_extension(BlockDriverState *bs, uint8_t *ext_cluster,
+                                 Error **errp)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int ret;
+    int remaining = s->cluster_size;
+    uint8_t *pos = ext_cluster;
+    ParallelsFormatExtensionHeader eh;
+    g_autofree uint8_t *hash = NULL;
+    size_t hash_len = 0;
+    GSList *bitmaps = NULL, *el;
+
+    memcpy(&eh, pos, sizeof(eh));
+    eh.magic = le64_to_cpu(eh.magic);
+    pos += sizeof(eh);
+    remaining -= sizeof(eh);
+
+    if (eh.magic != PARALLELS_FORMAT_EXTENSION_MAGIC) {
+        error_setg(errp, "Wrong parallels Format Extension magic: 0x%" PRIx64
+                   ", expected: 0x%llx", eh.magic,
+                   PARALLELS_FORMAT_EXTENSION_MAGIC);
+        goto fail;
+    }
+
+    ret = qcrypto_hash_bytes(QCRYPTO_HASH_ALG_MD5, (char *)pos, remaining,
+                             &hash, &hash_len, errp);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    if (hash_len != sizeof(eh.check_sum) ||
+        memcmp(hash, eh.check_sum, sizeof(eh.check_sum)) != 0) {
+        error_setg(errp, "Wrong checksum in Format Extension header. Format "
+                   "extension is corrupted.");
+        goto fail;
+    }
+
+    while (true) {
+        ParallelsFeatureHeader fh;
+        BdrvDirtyBitmap *bitmap;
+
+        if (remaining < sizeof(fh)) {
+            error_setg(errp, "Can not read feature header, as remaining bytes "
+                       "(%d) in Format Extension is less than Feature header "
+                       "size (%zu)", remaining, sizeof(fh));
+            goto fail;
+        }
+
+        memcpy(&fh, pos, sizeof(fh));
+        pos += sizeof(fh);
+        remaining -= sizeof(fh);
+
+        fh.magic = le64_to_cpu(fh.magic);
+        fh.flags = le64_to_cpu(fh.flags);
+        fh.data_size = le32_to_cpu(fh.data_size);
+
+        if (fh.flags) {
+            error_setg(errp, "Flags for extension feature are unsupported");
+            goto fail;
+        }
+
+        if (fh.data_size > remaining) {
+            error_setg(errp, "Feature data_size exceedes Format Extension "
+                       "cluster");
+            goto fail;
+        }
+
+        switch (fh.magic) {
+        case PARALLELS_END_OF_FEATURES_MAGIC:
+            return 0;
+
+        case PARALLELS_DIRTY_BITMAP_FEATURE_MAGIC:
+            bitmap = parallels_load_bitmap(bs, pos, fh.data_size, errp);
+            if (!bitmap) {
+                goto fail;
+            }
+            bitmaps = g_slist_append(bitmaps, bitmap);
+            break;
+
+        default:
+            error_setg(errp, "Unknown feature: 0x%" PRIx64, fh.magic);
+            goto fail;
+        }
+
+        pos = ext_cluster + QEMU_ALIGN_UP(pos + fh.data_size - ext_cluster, 8);
+    }
+
+fail:
+    for (el = bitmaps; el; el = el->next) {
+        bdrv_release_dirty_bitmap(el->data);
+    }
+    g_slist_free(bitmaps);
+
+    return -EINVAL;
+}
+
+int parallels_read_format_extension(BlockDriverState *bs,
+                                    int64_t ext_off, Error **errp)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int ret;
+    uint8_t *ext_cluster = qemu_blockalign(bs, s->cluster_size);
+
+    assert(ext_off > 0);
+
+    ret = bdrv_pread(bs->file, ext_off, s->cluster_size, ext_cluster, 0);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to read Format Extension cluster");
+        goto out;
+    }
+
+    ret = parallels_parse_format_extension(bs, ext_cluster, errp);
+
+out:
+    qemu_vfree(ext_cluster);
+
+    return ret;
+}
index 3c22dfdc9daa432a3c2ccf4b6910011207f54bae..9205a0864fa6ba96319eb58b3da54d11e99dcadb 100644 (file)
@@ -29,6 +29,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "block/block_int.h"
 #include "block/qdict.h"
@@ -40,6 +41,7 @@
 #include "qapi/qapi-visit-block-core.h"
 #include "qemu/bswap.h"
 #include "qemu/bitmap.h"
+#include "qemu/memalign.h"
 #include "migration/blocker.h"
 #include "parallels.h"
 
@@ -134,6 +136,12 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
     return MIN(nb_sectors, ret);
 }
 
+static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off)
+{
+    off -= s->data_start << BDRV_SECTOR_BITS;
+    return off / s->cluster_size;
+}
+
 static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
                             int nb_sectors, int *pnum)
 {
@@ -163,12 +171,89 @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
     return start_off;
 }
 
-static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
-                                 int nb_sectors, int *pnum)
+static void parallels_set_bat_entry(BDRVParallelsState *s,
+                                    uint32_t index, uint32_t offset)
+{
+    s->bat_bitmap[index] = cpu_to_le32(offset);
+    bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1);
+}
+
+static int mark_used(BlockDriverState *bs, unsigned long *bitmap,
+                     uint32_t bitmap_size, int64_t off, uint32_t count)
+{
+    BDRVParallelsState *s = bs->opaque;
+    uint32_t cluster_index = host_cluster_index(s, off);
+    unsigned long next_used;
+    if (cluster_index + count > bitmap_size) {
+        return -E2BIG;
+    }
+    next_used = find_next_bit(bitmap, bitmap_size, cluster_index);
+    if (next_used < cluster_index + count) {
+        return -EBUSY;
+    }
+    bitmap_set(bitmap, cluster_index, count);
+    return 0;
+}
+
+/*
+ * Collect used bitmap. The image can contain errors, we should fill the
+ * bitmap anyway, as much as we can. This information will be used for
+ * error resolution.
+ */
+static int GRAPH_RDLOCK parallels_fill_used_bitmap(BlockDriverState *bs)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int64_t payload_bytes;
+    uint32_t i;
+    int err = 0;
+
+    payload_bytes = bdrv_getlength(bs->file->bs);
+    if (payload_bytes < 0) {
+        return payload_bytes;
+    }
+    payload_bytes -= s->data_start * BDRV_SECTOR_SIZE;
+    if (payload_bytes < 0) {
+        return -EINVAL;
+    }
+
+    s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size);
+    if (s->used_bmap_size == 0) {
+        return 0;
+    }
+    s->used_bmap = bitmap_try_new(s->used_bmap_size);
+    if (s->used_bmap == NULL) {
+        return -ENOMEM;
+    }
+
+    for (i = 0; i < s->bat_size; i++) {
+        int err2;
+        int64_t host_off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+        if (host_off == 0) {
+            continue;
+        }
+
+        err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, 1);
+        if (err2 < 0 && err == 0) {
+            err = err2;
+        }
+    }
+    return err;
+}
+
+static void parallels_free_used_bitmap(BlockDriverState *bs)
+{
+    BDRVParallelsState *s = bs->opaque;
+    s->used_bmap_size = 0;
+    g_free(s->used_bmap);
+}
+
+static int64_t coroutine_fn GRAPH_RDLOCK
+allocate_clusters(BlockDriverState *bs, int64_t sector_num,
+                  int nb_sectors, int *pnum)
 {
     int ret = 0;
     BDRVParallelsState *s = bs->opaque;
-    int64_t pos, space, idx, to_allocate, i, len;
+    int64_t i, pos, idx, to_allocate, first_free, host_off;
 
     pos = block_status(s, sector_num, nb_sectors, pnum);
     if (pos > 0) {
@@ -178,7 +263,8 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
     idx = sector_num / s->tracks;
     to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
 
-    /* This function is called only by parallels_co_writev(), which will never
+    /*
+     * This function is called only by parallels_co_writev(), which will never
      * pass a sector_num at or beyond the end of the image (because the block
      * layer never passes such a sector_num to that function). Therefore, idx
      * is always below s->bat_size.
@@ -186,47 +272,79 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
      * exceed the image end. Therefore, idx + to_allocate cannot exceed
      * s->bat_size.
      * Note that s->bat_size is an unsigned int, therefore idx + to_allocate
-     * will always fit into a uint32_t. */
+     * will always fit into a uint32_t.
+     */
     assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
 
-    space = to_allocate * s->tracks;
-    len = bdrv_getlength(bs->file->bs);
-    if (len < 0) {
-        return len;
-    }
-    if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
-        space += s->prealloc_size;
+    first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size);
+    if (first_free == s->used_bmap_size) {
+        uint32_t new_usedsize;
+        int64_t bytes = to_allocate * s->cluster_size;
+        bytes += s->prealloc_size * BDRV_SECTOR_SIZE;
+
+        host_off = s->data_end * BDRV_SECTOR_SIZE;
+
         /*
          * We require the expanded size to read back as zero. If the
          * user permitted truncation, we try that; but if it fails, we
          * force the safer-but-slower fallocate.
          */
         if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) {
-            ret = bdrv_truncate(bs->file,
-                                (s->data_end + space) << BDRV_SECTOR_BITS,
-                                false, PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE,
-                                NULL);
+            ret = bdrv_co_truncate(bs->file, host_off + bytes,
+                                   false, PREALLOC_MODE_OFF,
+                                   BDRV_REQ_ZERO_WRITE, NULL);
             if (ret == -ENOTSUP) {
                 s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
             }
         }
         if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
-            ret = bdrv_pwrite_zeroes(bs->file,
-                                     s->data_end << BDRV_SECTOR_BITS,
-                                     space << BDRV_SECTOR_BITS, 0);
+            ret = bdrv_co_pwrite_zeroes(bs->file, host_off, bytes, 0);
         }
         if (ret < 0) {
             return ret;
         }
+
+        new_usedsize = s->used_bmap_size + bytes / s->cluster_size;
+        s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size,
+                                          new_usedsize);
+        s->used_bmap_size = new_usedsize;
+    } else {
+        int64_t next_used;
+        next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free);
+
+        /* Not enough continuous clusters in the middle, adjust the size */
+        if (next_used - first_free < to_allocate) {
+            to_allocate = next_used - first_free;
+            *pnum = (idx + to_allocate) * s->tracks - sector_num;
+        }
+
+        host_off = s->data_start * BDRV_SECTOR_SIZE;
+        host_off += first_free * s->cluster_size;
+
+        /*
+         * No need to preallocate if we are using tail area from the above
+         * branch. In the other case we are likely re-using hole. Preallocate
+         * the space if required by the prealloc_mode.
+         */
+        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE &&
+                host_off < s->data_end * BDRV_SECTOR_SIZE) {
+            ret = bdrv_co_pwrite_zeroes(bs->file, host_off,
+                                        s->cluster_size * to_allocate, 0);
+            if (ret < 0) {
+                return ret;
+            }
+        }
     }
 
-    /* Try to read from backing to fill empty clusters
+    /*
+     * Try to read from backing to fill empty clusters
      * FIXME: 1. previous write_zeroes may be redundant
      *        2. most of data we read from backing will be rewritten by
      *           parallels_co_writev. On aligned-to-cluster write we do not need
      *           this read at all.
      *        3. it would be good to combine write of data from backing and new
-     *           data into one write call */
+     *           data into one write call.
+     */
     if (bs->backing) {
         int64_t nb_cow_sectors = to_allocate * s->tracks;
         int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS;
@@ -239,26 +357,34 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
             return ret;
         }
 
-        ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE,
-                              nb_cow_bytes, buf, 0);
+        ret = bdrv_co_pwrite(bs->file, s->data_end * BDRV_SECTOR_SIZE,
+                             nb_cow_bytes, buf, 0);
         qemu_vfree(buf);
         if (ret < 0) {
             return ret;
         }
     }
 
+    ret = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, to_allocate);
+    if (ret < 0) {
+        /* Image consistency is broken. Alarm! */
+        return ret;
+    }
     for (i = 0; i < to_allocate; i++) {
-        s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
-        s->data_end += s->tracks;
-        bitmap_set(s->bat_dirty_bmap,
-                   bat_entry_off(idx + i) / s->bat_dirty_block, 1);
+        parallels_set_bat_entry(s, idx + i,
+                host_off / BDRV_SECTOR_SIZE / s->off_multiplier);
+        host_off += s->cluster_size;
+    }
+    if (host_off > s->data_end * BDRV_SECTOR_SIZE) {
+        s->data_end = host_off / BDRV_SECTOR_SIZE;
     }
 
     return bat2sect(s, idx) + sector_num % s->tracks;
 }
 
 
-static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_flush_to_os(BlockDriverState *bs)
 {
     BDRVParallelsState *s = bs->opaque;
     unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
@@ -275,8 +401,8 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
         if (off + to_write > s->header_size) {
             to_write = s->header_size - off;
         }
-        ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off,
-                          to_write);
+        ret = bdrv_co_pwrite(bs->file, off, to_write,
+                             (uint8_t *)s->header + off, 0);
         if (ret < 0) {
             qemu_co_mutex_unlock(&s->lock);
             return ret;
@@ -289,14 +415,10 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
     return 0;
 }
 
-
-static int coroutine_fn parallels_co_block_status(BlockDriverState *bs,
-                                                  bool want_zero,
-                                                  int64_t offset,
-                                                  int64_t bytes,
-                                                  int64_t *pnum,
-                                                  int64_t *map,
-                                                  BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
+                          int64_t bytes, int64_t *pnum, int64_t *map,
+                          BlockDriverState **file)
 {
     BDRVParallelsState *s = bs->opaque;
     int count;
@@ -317,16 +439,15 @@ static int coroutine_fn parallels_co_block_status(BlockDriverState *bs,
     return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
 }
 
-static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
-                                            int64_t sector_num, int nb_sectors,
-                                            QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                    QEMUIOVector *qiov, int flags)
 {
     BDRVParallelsState *s = bs->opaque;
     uint64_t bytes_done = 0;
     QEMUIOVector hd_qiov;
     int ret = 0;
 
-    assert(!flags);
     qemu_iovec_init(&hd_qiov, qiov->niov);
 
     while (nb_sectors > 0) {
@@ -361,8 +482,9 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
     return ret;
 }
 
-static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                   QEMUIOVector *qiov)
 {
     BDRVParallelsState *s = bs->opaque;
     uint64_t bytes_done = 0;
@@ -412,89 +534,234 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
 }
 
 
-static int coroutine_fn parallels_co_check(BlockDriverState *bs,
-                                           BdrvCheckResult *res,
-                                           BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
+    int ret = 0;
+    uint32_t cluster, count;
     BDRVParallelsState *s = bs->opaque;
-    int64_t size, prev_off, high_off;
-    int ret;
-    uint32_t i;
-    bool flush_bat = false;
-    int cluster_size = s->tracks << BDRV_SECTOR_BITS;
 
-    size = bdrv_getlength(bs->file->bs);
-    if (size < 0) {
-        res->check_errors++;
-        return size;
+    /*
+     * The image does not support ZERO mark inside the BAT, which means that
+     * stale data could be exposed from the backing file.
+     */
+    if (bs->backing) {
+        return -ENOTSUP;
+    }
+
+    if (!QEMU_IS_ALIGNED(offset, s->cluster_size)) {
+        return -ENOTSUP;
+    } else if (!QEMU_IS_ALIGNED(bytes, s->cluster_size)) {
+        return -ENOTSUP;
     }
 
+    cluster = offset / s->cluster_size;
+    count = bytes / s->cluster_size;
+
     qemu_co_mutex_lock(&s->lock);
-    if (s->header_unclean) {
-        fprintf(stderr, "%s image was not closed correctly\n",
-                fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
-        res->corruptions++;
-        if (fix & BDRV_FIX_ERRORS) {
-            /* parallels_close will do the job right */
-            res->corruptions_fixed++;
-            s->header_unclean = false;
+    for (; count > 0; cluster++, count--) {
+        int64_t host_off = bat2sect(s, cluster) << BDRV_SECTOR_BITS;
+        if (host_off == 0) {
+            continue;
+        }
+
+        ret = bdrv_co_pdiscard(bs->file, host_off, s->cluster_size);
+        if (ret < 0) {
+            goto done;
         }
+
+        parallels_set_bat_entry(s, cluster, 0);
+        bitmap_clear(s->used_bmap, host_cluster_index(s, host_off), 1);
     }
+done:
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}
 
-    res->bfi.total_clusters = s->bat_size;
-    res->bfi.compressed_clusters = 0; /* compression is not supported */
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           BdrvRequestFlags flags)
+{
+    /*
+     * The zero flag is missed in the Parallels format specification. We can
+     * resort to discard if we have no backing file (this condition is checked
+     * inside parallels_co_pdiscard().
+     */
+    return parallels_co_pdiscard(bs, offset, bytes);
+}
 
-    high_off = 0;
-    prev_off = 0;
-    for (i = 0; i < s->bat_size; i++) {
-        int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
-        if (off == 0) {
-            prev_off = 0;
-            continue;
+
+static void parallels_check_unclean(BlockDriverState *bs,
+                                    BdrvCheckResult *res,
+                                    BdrvCheckMode fix)
+{
+    BDRVParallelsState *s = bs->opaque;
+
+    if (!s->header_unclean) {
+        return;
+    }
+
+    fprintf(stderr, "%s image was not closed correctly\n",
+            fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
+    res->corruptions++;
+    if (fix & BDRV_FIX_ERRORS) {
+        /* parallels_close will do the job right */
+        res->corruptions_fixed++;
+        s->header_unclean = false;
+    }
+}
+
+/*
+ * Returns true if data_off is correct, otherwise false. In both cases
+ * correct_offset is set to the proper value.
+ */
+static bool parallels_test_data_off(BDRVParallelsState *s,
+                                    int64_t file_nb_sectors,
+                                    uint32_t *correct_offset)
+{
+    uint32_t data_off, min_off;
+    bool old_magic;
+
+    /*
+     * There are two slightly different image formats: with "WithoutFreeSpace"
+     * or "WithouFreSpacExt" magic words. Call the first one as "old magic".
+     * In such images data_off field can be zero. In this case the offset is
+     * calculated as the end of BAT table plus some padding to ensure sector
+     * size alignment.
+     */
+    old_magic = !memcmp(s->header->magic, HEADER_MAGIC, 16);
+
+    min_off = DIV_ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
+    if (!old_magic) {
+        min_off = ROUND_UP(min_off, s->cluster_size / BDRV_SECTOR_SIZE);
+    }
+
+    if (correct_offset) {
+        *correct_offset = min_off;
+    }
+
+    data_off = le32_to_cpu(s->header->data_off);
+    if (data_off == 0 && old_magic) {
+        return true;
+    }
+
+    if (data_off < min_off || data_off > file_nb_sectors) {
+        return false;
+    }
+
+    if (correct_offset) {
+        *correct_offset = data_off;
+    }
+
+    return true;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res,
+                         BdrvCheckMode fix)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int64_t file_size;
+    uint32_t data_off;
+
+    file_size = bdrv_co_nb_sectors(bs->file->bs);
+    if (file_size < 0) {
+        res->check_errors++;
+        return file_size;
+    }
+
+    if (parallels_test_data_off(s, file_size, &data_off)) {
+        return 0;
+    }
+
+    res->corruptions++;
+    if (fix & BDRV_FIX_ERRORS) {
+        int err;
+        s->header->data_off = cpu_to_le32(data_off);
+        s->data_start = data_off;
+
+        parallels_free_used_bitmap(bs);
+        err = parallels_fill_used_bitmap(bs);
+        if (err == -ENOMEM) {
+            res->check_errors++;
+            return err;
         }
 
-        /* cluster outside the image */
-        if (off > size) {
+        res->corruptions_fixed++;
+    }
+
+    fprintf(stderr, "%s data_off field has incorrect value\n",
+            fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
+
+    return 0;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
+                              BdrvCheckMode fix)
+{
+    BDRVParallelsState *s = bs->opaque;
+    uint32_t i;
+    int64_t off, high_off, size;
+
+    size = bdrv_co_getlength(bs->file->bs);
+    if (size < 0) {
+        res->check_errors++;
+        return size;
+    }
+
+    high_off = 0;
+    for (i = 0; i < s->bat_size; i++) {
+        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+        if (off + s->cluster_size > size) {
             fprintf(stderr, "%s cluster %u is outside image\n",
                     fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
             res->corruptions++;
             if (fix & BDRV_FIX_ERRORS) {
-                prev_off = 0;
-                s->bat_bitmap[i] = 0;
+                parallels_set_bat_entry(s, i, 0);
                 res->corruptions_fixed++;
-                flush_bat = true;
-                continue;
             }
+            continue;
         }
-
-        res->bfi.allocated_clusters++;
-        if (off > high_off) {
+        if (high_off < off) {
             high_off = off;
         }
+    }
 
-        if (prev_off != 0 && (prev_off + cluster_size) != off) {
-            res->bfi.fragmented_clusters++;
-        }
-        prev_off = off;
+    if (high_off == 0) {
+        res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
+    } else {
+        res->image_end_offset = high_off + s->cluster_size;
+        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
     }
 
-    ret = 0;
-    if (flush_bat) {
-        ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
-        if (ret < 0) {
-            res->check_errors++;
-            goto out;
-        }
+    return 0;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
+                     BdrvCheckMode fix, bool explicit)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int64_t size;
+    int ret;
+
+    size = bdrv_co_getlength(bs->file->bs);
+    if (size < 0) {
+        res->check_errors++;
+        return size;
     }
 
-    res->image_end_offset = high_off + cluster_size;
     if (size > res->image_end_offset) {
         int64_t count;
-        count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size);
-        fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
-                fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
-                size - res->image_end_offset);
-        res->leaks += count;
+        count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
+        if (explicit) {
+            fprintf(stderr,
+                    "%s space leaked at the end of the image %" PRId64 "\n",
+                    fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
+                    size - res->image_end_offset);
+            res->leaks += count;
+        }
         if (fix & BDRV_FIX_LEAKS) {
             Error *local_err = NULL;
 
@@ -502,25 +769,231 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs,
              * In order to really repair the image, we must shrink it.
              * That means we have to pass exact=true.
              */
-            ret = bdrv_truncate(bs->file, res->image_end_offset, true,
-                                PREALLOC_MODE_OFF, 0, &local_err);
+            ret = bdrv_co_truncate(bs->file, res->image_end_offset, true,
+                                   PREALLOC_MODE_OFF, 0, &local_err);
             if (ret < 0) {
                 error_report_err(local_err);
                 res->check_errors++;
-                goto out;
+                return ret;
+            }
+            if (explicit) {
+                res->leaks_fixed += count;
             }
-            res->leaks_fixed += count;
         }
     }
 
-out:
-    qemu_co_mutex_unlock(&s->lock);
+    return 0;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res,
+                          BdrvCheckMode fix)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int64_t host_off, host_sector, guest_sector;
+    unsigned long *bitmap;
+    uint32_t i, bitmap_size, bat_entry;
+    int n, ret = 0;
+    uint64_t *buf = NULL;
+    bool fixed = false;
+
+    /*
+     * Create a bitmap of used clusters.
+     * If a bit is set, there is a BAT entry pointing to this cluster.
+     * Loop through the BAT entries, check bits relevant to an entry offset.
+     * If bit is set, this entry is duplicated. Otherwise set the bit.
+     *
+     * We shouldn't worry about newly allocated clusters outside the image
+     * because they are created higher then any existing cluster pointed by
+     * a BAT entry.
+     */
+    bitmap_size = host_cluster_index(s, res->image_end_offset);
+    if (bitmap_size == 0) {
+        return 0;
+    }
+    if (res->image_end_offset % s->cluster_size) {
+        /* A not aligned image end leads to a bitmap shorter by 1 */
+        bitmap_size++;
+    }
+
+    bitmap = bitmap_new(bitmap_size);
+
+    buf = qemu_blockalign(bs, s->cluster_size);
+
+    for (i = 0; i < s->bat_size; i++) {
+        host_off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+        if (host_off == 0) {
+            continue;
+        }
+
+        ret = mark_used(bs, bitmap, bitmap_size, host_off, 1);
+        assert(ret != -E2BIG);
+        if (ret == 0) {
+            continue;
+        }
+
+        /* this cluster duplicates another one */
+        fprintf(stderr, "%s duplicate offset in BAT entry %u\n",
+                fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
+
+        res->corruptions++;
+
+        if (!(fix & BDRV_FIX_ERRORS)) {
+            continue;
+        }
+
+        /*
+         * Reset the entry and allocate a new cluster
+         * for the relevant guest offset. In this way we let
+         * the lower layer to place the new cluster properly.
+         * Copy the original cluster to the allocated one.
+         * But before save the old offset value for repairing
+         * if we have an error.
+         */
+        bat_entry = s->bat_bitmap[i];
+        parallels_set_bat_entry(s, i, 0);
+
+        ret = bdrv_co_pread(bs->file, host_off, s->cluster_size, buf, 0);
+        if (ret < 0) {
+            res->check_errors++;
+            goto out_repair_bat;
+        }
+
+        guest_sector = (i * (int64_t)s->cluster_size) >> BDRV_SECTOR_BITS;
+        host_sector = allocate_clusters(bs, guest_sector, s->tracks, &n);
+        if (host_sector < 0) {
+            res->check_errors++;
+            goto out_repair_bat;
+        }
+        host_off = host_sector << BDRV_SECTOR_BITS;
+
+        ret = bdrv_co_pwrite(bs->file, host_off, s->cluster_size, buf, 0);
+        if (ret < 0) {
+            res->check_errors++;
+            goto out_repair_bat;
+        }
+
+        if (host_off + s->cluster_size > res->image_end_offset) {
+            res->image_end_offset = host_off + s->cluster_size;
+        }
+
+        /*
+         * In the future allocate_cluster() will reuse holed offsets
+         * inside the image. Keep the used clusters bitmap content
+         * consistent for the new allocated clusters too.
+         *
+         * Note, clusters allocated outside the current image are not
+         * considered, and the bitmap size doesn't change. This specifically
+         * means that -E2BIG is OK.
+         */
+        ret = mark_used(bs, bitmap, bitmap_size, host_off, 1);
+        if (ret == -EBUSY) {
+            res->check_errors++;
+            goto out_repair_bat;
+        }
+
+        fixed = true;
+        res->corruptions_fixed++;
+
+    }
+
+    if (fixed) {
+        /*
+         * When new clusters are allocated, the file size increases by
+         * 128 Mb. We need to truncate the file to the right size. Let
+         * the leak fix code make its job without res changing.
+         */
+        ret = parallels_check_leak(bs, res, fix, false);
+    }
+
+out_free:
+    g_free(buf);
+    g_free(bitmap);
+    return ret;
+/*
+ * We can get here only from places where index and old_offset have
+ * meaningful values.
+ */
+out_repair_bat:
+    s->bat_bitmap[i] = bat_entry;
+    goto out_free;
+}
+
+static void parallels_collect_statistics(BlockDriverState *bs,
+                                         BdrvCheckResult *res,
+                                         BdrvCheckMode fix)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int64_t off, prev_off;
+    uint32_t i;
+
+    res->bfi.total_clusters = s->bat_size;
+    res->bfi.compressed_clusters = 0; /* compression is not supported */
+
+    prev_off = 0;
+    for (i = 0; i < s->bat_size; i++) {
+        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+        /*
+         * If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not
+         * fixed. Skip not allocated and out-of-image BAT entries.
+         */
+        if (off == 0 || off + s->cluster_size > res->image_end_offset) {
+            prev_off = 0;
+            continue;
+        }
+
+        if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
+            res->bfi.fragmented_clusters++;
+        }
+        prev_off = off;
+        res->bfi.allocated_clusters++;
+    }
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
+                   BdrvCheckMode fix)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int ret;
+
+    WITH_QEMU_LOCK_GUARD(&s->lock) {
+        parallels_check_unclean(bs, res, fix);
+
+        ret = parallels_check_data_off(bs, res, fix);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = parallels_check_outside_image(bs, res, fix);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = parallels_check_leak(bs, res, fix, true);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = parallels_check_duplicate(bs, res, fix);
+        if (ret < 0) {
+            return ret;
+        }
+
+        parallels_collect_statistics(bs, res, fix);
+    }
+
+    ret = bdrv_co_flush(bs);
+    if (ret < 0) {
+        res->check_errors++;
+    }
+
     return ret;
 }
 
 
-static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
-                                            Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+parallels_co_create(BlockdevCreateOptions* opts, Error **errp)
 {
     BlockdevCreateOptionsParallels *parallels_opts;
     BlockDriverState *bs;
@@ -564,13 +1037,13 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(parallels_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(parallels_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -598,20 +1071,20 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
     memset(tmp, 0, sizeof(tmp));
     memcpy(tmp, &header, sizeof(header));
 
-    ret = blk_pwrite(blk, 0, tmp, BDRV_SECTOR_SIZE, 0);
+    ret = blk_co_pwrite(blk, 0, BDRV_SECTOR_SIZE, tmp, 0);
     if (ret < 0) {
         goto exit;
     }
-    ret = blk_pwrite_zeroes(blk, BDRV_SECTOR_SIZE,
-                            (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
+    ret = blk_co_pwrite_zeroes(blk, BDRV_SECTOR_SIZE,
+                               (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
     if (ret < 0) {
         goto exit;
     }
 
     ret = 0;
 out:
-    blk_unref(blk);
-    bdrv_unref(bs);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs);
     return ret;
 
 exit:
@@ -619,10 +1092,9 @@ exit:
     goto out;
 }
 
-static int coroutine_fn parallels_co_create_opts(BlockDriver *drv,
-                                                 const char *filename,
-                                                 QemuOpts *opts,
-                                                 Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+parallels_co_create_opts(BlockDriver *drv, const char *filename,
+                         QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     BlockDriverState *bs = NULL;
@@ -645,13 +1117,13 @@ static int coroutine_fn parallels_co_create_opts(BlockDriver *drv,
     }
 
     /* Create and open the file (protocol layer) */
-    ret = bdrv_create_file(filename, opts, errp);
+    ret = bdrv_co_create_file(filename, opts, errp);
     if (ret < 0) {
         goto done;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto done;
@@ -689,7 +1161,7 @@ static int coroutine_fn parallels_co_create_opts(BlockDriver *drv,
 
 done:
     qobject_unref(qdict);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
     qapi_free_BlockdevCreateOptions(create_options);
     return ret;
 }
@@ -713,7 +1185,7 @@ static int parallels_probe(const uint8_t *buf, int buf_size,
     return 0;
 }
 
-static int parallels_update_header(BlockDriverState *bs)
+static int GRAPH_RDLOCK parallels_update_header(BlockDriverState *bs)
 {
     BDRVParallelsState *s = bs->opaque;
     unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
@@ -722,7 +1194,45 @@ static int parallels_update_header(BlockDriverState *bs)
     if (size > s->header_size) {
         size = s->header_size;
     }
-    return bdrv_pwrite_sync(bs->file, 0, s->header, size);
+    return bdrv_pwrite_sync(bs->file, 0, size, s->header, 0);
+}
+
+
+static int parallels_opts_prealloc(BlockDriverState *bs, QDict *options,
+                                   Error **errp)
+{
+    int err;
+    char *buf;
+    int64_t bytes;
+    BDRVParallelsState *s = bs->opaque;
+    Error *local_err = NULL;
+    QemuOpts *opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, errp);
+    if (!opts) {
+        return -ENOMEM;
+    }
+
+    err = -EINVAL;
+    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
+        goto done;
+    }
+
+    bytes = qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
+    s->prealloc_size = bytes >> BDRV_SECTOR_BITS;
+    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
+    /* prealloc_mode can be downgraded later during allocate_clusters */
+    s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
+                                       PRL_PREALLOC_MODE_FALLOCATE,
+                                       &local_err);
+    g_free(buf);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        goto done;
+    }
+    err = 0;
+
+done:
+    qemu_opts_del(opts);
+    return err;
 }
 
 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
@@ -731,19 +1241,30 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
     BDRVParallelsState *s = bs->opaque;
     ParallelsHeader ph;
     int ret, size, i;
-    QemuOpts *opts = NULL;
-    Error *local_err = NULL;
-    char *buf;
+    int64_t file_nb_sectors, sector;
+    uint32_t data_start;
+    bool need_check = false;
+
+    ret = parallels_opts_prealloc(bs, options, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
+    file_nb_sectors = bdrv_nb_sectors(bs->file->bs);
+    if (file_nb_sectors < 0) {
         return -EINVAL;
     }
 
-    ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
+    ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0);
     if (ret < 0) {
-        goto fail;
+        return ret;
     }
 
     bs->total_sectors = le64_to_cpu(ph.nb_sectors);
@@ -763,84 +1284,68 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
     s->tracks = le32_to_cpu(ph.tracks);
     if (s->tracks == 0) {
         error_setg(errp, "Invalid image: Zero sectors per track");
-        ret = -EINVAL;
-        goto fail;
+        return -EINVAL;
     }
     if (s->tracks > INT32_MAX/513) {
         error_setg(errp, "Invalid image: Too big cluster");
-        ret = -EFBIG;
-        goto fail;
+        return -EFBIG;
     }
+    s->prealloc_size = MAX(s->tracks, s->prealloc_size);
+    s->cluster_size = s->tracks << BDRV_SECTOR_BITS;
 
     s->bat_size = le32_to_cpu(ph.bat_entries);
     if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
         error_setg(errp, "Catalog too large");
-        ret = -EFBIG;
-        goto fail;
+        return -EFBIG;
     }
 
     size = bat_entry_off(s->bat_size);
     s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
     s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
     if (s->header == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-    s->data_end = le32_to_cpu(ph.data_off);
-    if (s->data_end == 0) {
-        s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
-    }
-    if (s->data_end < s->header_size) {
-        /* there is not enough unused space to fit to block align between BAT
-           and actual data. We can't avoid read-modify-write... */
-        s->header_size = size;
+        return -ENOMEM;
     }
 
-    ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
+    ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0);
     if (ret < 0) {
         goto fail;
     }
     s->bat_bitmap = (uint32_t *)(s->header + 1);
 
-    for (i = 0; i < s->bat_size; i++) {
-        int64_t off = bat2sect(s, i);
-        if (off >= s->data_end) {
-            s->data_end = off + s->tracks;
-        }
-    }
-
     if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
-        /* Image was not closed correctly. The check is mandatory */
-        s->header_unclean = true;
-        if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
-            error_setg(errp, "parallels: Image was not closed correctly; "
-                       "cannot be opened read/write");
-            ret = -EACCES;
-            goto fail;
-        }
+        need_check = s->header_unclean = true;
     }
 
-    opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, errp);
-    if (!opts) {
-        goto fail_options;
+    {
+        bool ok = parallels_test_data_off(s, file_nb_sectors, &data_start);
+        need_check = need_check || !ok;
     }
 
-    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
-        goto fail_options;
+    s->data_start = data_start;
+    s->data_end = s->data_start;
+    if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
+        /*
+         * There is not enough unused space to fit to block align between BAT
+         * and actual data. We can't avoid read-modify-write...
+         */
+        s->header_size = size;
     }
 
-    s->prealloc_size =
-        qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
-    s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
-    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
-    /* prealloc_mode can be downgraded later during allocate_clusters */
-    s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
-                                       PRL_PREALLOC_MODE_FALLOCATE,
-                                       &local_err);
-    g_free(buf);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        goto fail_options;
+    if (ph.ext_off) {
+        if (flags & BDRV_O_RDWR) {
+            /*
+             * It's unsafe to open image RW if there is an extension (as we
+             * don't support it). But parallels driver in QEMU historically
+             * ignores the extension, so print warning and don't care.
+             */
+            warn_report("Format Extension ignored in RW mode");
+        } else {
+            ret = parallels_read_format_extension(
+                    bs, le64_to_cpu(ph.ext_off) << BDRV_SECTOR_BITS, errp);
+            if (ret < 0) {
+                goto fail;
+            }
+        }
     }
 
     if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) {
@@ -851,27 +1356,69 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
         }
     }
 
-    s->bat_dirty_block = 4 * qemu_real_host_page_size;
+    s->bat_dirty_block = 4 * qemu_real_host_page_size();
     s->bat_dirty_bmap =
         bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
 
-    /* Disable migration until bdrv_invalidate_cache method is added */
+    /* Disable migration until bdrv_activate method is added */
     error_setg(&s->migration_blocker, "The Parallels format used by node '%s' "
                "does not support live migration",
                bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, errp);
+
+    ret = migrate_add_blocker_normal(&s->migration_blocker, errp);
     if (ret < 0) {
-        error_free(s->migration_blocker);
         goto fail;
     }
     qemu_co_mutex_init(&s->lock);
+
+    for (i = 0; i < s->bat_size; i++) {
+        sector = bat2sect(s, i);
+        if (sector + s->tracks > s->data_end) {
+            s->data_end = sector + s->tracks;
+        }
+    }
+    need_check = need_check || s->data_end > file_nb_sectors;
+
+    if (!need_check) {
+        ret = parallels_fill_used_bitmap(bs);
+        if (ret == -ENOMEM) {
+            goto fail;
+        }
+        need_check = need_check || ret < 0; /* These are correctable errors */
+    }
+
+    /*
+     * We don't repair the image here if it's opened for checks. Also we don't
+     * want to change inactive images and can't change readonly images.
+     */
+    if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) {
+        return 0;
+    }
+
+    /* Repair the image if corruption was detected. */
+    if (need_check) {
+        BdrvCheckResult res;
+        ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Could not repair corrupted image");
+            migrate_del_blocker(&s->migration_blocker);
+            goto fail;
+        }
+    }
     return 0;
 
 fail_format:
     error_setg(errp, "Image not in Parallels format");
-fail_options:
-    ret = -EINVAL;
+    return -EINVAL;
+
 fail:
+    /*
+     * "s" object was allocated by g_malloc0 so we can safely
+     * try to free its fields even they were not allocated.
+     */
+    parallels_free_used_bitmap(bs);
+
+    g_free(s->bat_dirty_bmap);
     qemu_vfree(s->header);
     return ret;
 }
@@ -881,6 +1428,8 @@ static void parallels_close(BlockDriverState *bs)
 {
     BDRVParallelsState *s = bs->opaque;
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if ((bs->open_flags & BDRV_O_RDWR) && !(bs->open_flags & BDRV_O_INACTIVE)) {
         s->header->inuse = 0;
         parallels_update_header(bs);
@@ -890,31 +1439,42 @@ static void parallels_close(BlockDriverState *bs)
                       PREALLOC_MODE_OFF, 0, NULL);
     }
 
+    parallels_free_used_bitmap(bs);
+
     g_free(s->bat_dirty_bmap);
     qemu_vfree(s->header);
 
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
+    migrate_del_blocker(&s->migration_blocker);
+}
+
+static bool parallels_is_support_dirty_bitmaps(BlockDriverState *bs)
+{
+    return 1;
 }
 
 static BlockDriver bdrv_parallels = {
-    .format_name       = "parallels",
-    .instance_size     = sizeof(BDRVParallelsState),
-    .bdrv_probe                = parallels_probe,
-    .bdrv_open         = parallels_open,
-    .bdrv_close                = parallels_close,
-    .bdrv_child_perm          = bdrv_default_perms,
-    .bdrv_co_block_status     = parallels_co_block_status,
-    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
-    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
-    .bdrv_co_readv  = parallels_co_readv,
-    .bdrv_co_writev = parallels_co_writev,
-    .is_format      = true,
-    .supports_backing = true,
-    .bdrv_co_create      = parallels_co_create,
-    .bdrv_co_create_opts = parallels_co_create_opts,
-    .bdrv_co_check  = parallels_co_check,
-    .create_opts    = &parallels_create_opts,
+    .format_name                = "parallels",
+    .instance_size              = sizeof(BDRVParallelsState),
+    .create_opts                = &parallels_create_opts,
+    .is_format                  = true,
+    .supports_backing           = true,
+
+    .bdrv_has_zero_init         = bdrv_has_zero_init_1,
+    .bdrv_supports_persistent_dirty_bitmap = parallels_is_support_dirty_bitmaps,
+
+    .bdrv_probe                 = parallels_probe,
+    .bdrv_open                  = parallels_open,
+    .bdrv_close                 = parallels_close,
+    .bdrv_child_perm            = bdrv_default_perms,
+    .bdrv_co_block_status       = parallels_co_block_status,
+    .bdrv_co_flush_to_os        = parallels_co_flush_to_os,
+    .bdrv_co_readv              = parallels_co_readv,
+    .bdrv_co_writev             = parallels_co_writev,
+    .bdrv_co_create             = parallels_co_create,
+    .bdrv_co_create_opts        = parallels_co_create_opts,
+    .bdrv_co_check              = parallels_co_check,
+    .bdrv_co_pdiscard           = parallels_co_pdiscard,
+    .bdrv_co_pwrite_zeroes      = parallels_co_pwrite_zeroes,
 };
 
 static void bdrv_parallels_init(void)
index 5aa101cfc886d385adf0b8417bee4197f508b5eb..423b2ad7271a7f48e841d9efcc7bda56995ff67d 100644 (file)
@@ -48,7 +48,8 @@ typedef struct ParallelsHeader {
     uint64_t nb_sectors;
     uint32_t inuse;
     uint32_t data_off;
-    char padding[12];
+    uint32_t flags;
+    uint64_t ext_off;
 } QEMU_PACKED ParallelsHeader;
 
 typedef enum ParallelsPreallocMode {
@@ -71,17 +72,26 @@ typedef struct BDRVParallelsState {
     unsigned long *bat_dirty_bmap;
     unsigned int  bat_dirty_block;
 
+    unsigned long *used_bmap;
+    unsigned long used_bmap_size;
+
     uint32_t *bat_bitmap;
     unsigned int bat_size;
 
+    int64_t  data_start;
     int64_t  data_end;
     uint64_t prealloc_size;
     ParallelsPreallocMode prealloc_mode;
 
     unsigned int tracks;
+    unsigned int cluster_size;
 
     unsigned int off_multiplier;
     Error *migration_blocker;
 } BDRVParallelsState;
 
+int GRAPH_RDLOCK
+parallels_read_format_extension(BlockDriverState *bs, int64_t ext_off,
+                                Error **errp);
+
 #endif
diff --git a/block/preallocate.c b/block/preallocate.c
new file mode 100644 (file)
index 0000000..d215bc5
--- /dev/null
@@ -0,0 +1,627 @@
+/*
+ * preallocate filter driver
+ *
+ * The driver performs preallocate operation: it is injected above
+ * some node, and before each write over EOF it does additional preallocating
+ * write-zeroes request.
+ *
+ * Copyright (c) 2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
+#include "qemu/units.h"
+#include "block/block-io.h"
+#include "block/block_int.h"
+
+
+typedef struct PreallocateOpts {
+    int64_t prealloc_size;
+    int64_t prealloc_align;
+} PreallocateOpts;
+
+typedef struct BDRVPreallocateState {
+    PreallocateOpts opts;
+
+    /*
+     * Track real data end, to crop preallocation on close. If < 0 the status is
+     * unknown.
+     *
+     * @data_end is a maximum of file size on open (or when we get write/resize
+     * permissions) and all write request ends after it. So it's safe to
+     * truncate to data_end if it is valid.
+     */
+    int64_t data_end;
+
+    /*
+     * Start of trailing preallocated area which reads as zero. May be smaller
+     * than data_end, if user does over-EOF write zero operation. If < 0 the
+     * status is unknown.
+     *
+     * If both @zero_start and @file_end are valid, the region
+     * [@zero_start, @file_end) is known to be preallocated zeroes. If @file_end
+     * is not valid, @zero_start doesn't make much sense.
+     */
+    int64_t zero_start;
+
+    /*
+     * Real end of file. Actually the cache for bdrv_getlength(bs->file->bs),
+     * to avoid extra lseek() calls on each write operation. If < 0 the status
+     * is unknown.
+     */
+    int64_t file_end;
+
+    /*
+     * All three states @data_end, @zero_start and @file_end are guaranteed to
+     * be invalid (< 0) when we don't have both exclusive BLK_PERM_RESIZE and
+     * BLK_PERM_WRITE permissions on file child.
+     */
+
+    /* Gives up the resize permission on children when parents don't need it */
+    QEMUBH *drop_resize_bh;
+} BDRVPreallocateState;
+
+static int preallocate_drop_resize(BlockDriverState *bs, Error **errp);
+static void preallocate_drop_resize_bh(void *opaque);
+
+#define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align"
+#define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size"
+static QemuOptsList runtime_opts = {
+    .name = "preallocate",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = PREALLOCATE_OPT_PREALLOC_ALIGN,
+            .type = QEMU_OPT_SIZE,
+            .help = "on preallocation, align file length to this number, "
+                "default 1M",
+        },
+        {
+            .name = PREALLOCATE_OPT_PREALLOC_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "how much to preallocate, default 128M",
+        },
+        { /* end of list */ }
+    },
+};
+
+static bool preallocate_absorb_opts(PreallocateOpts *dest, QDict *options,
+                                    BlockDriverState *child_bs, Error **errp)
+{
+    QemuOpts *opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+
+    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
+        return false;
+    }
+
+    dest->prealloc_align =
+        qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_ALIGN, 1 * MiB);
+    dest->prealloc_size =
+        qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_SIZE, 128 * MiB);
+
+    qemu_opts_del(opts);
+
+    if (!QEMU_IS_ALIGNED(dest->prealloc_align, BDRV_SECTOR_SIZE)) {
+        error_setg(errp, "prealloc-align parameter of preallocate filter "
+                   "is not aligned to %llu", BDRV_SECTOR_SIZE);
+        return false;
+    }
+
+    if (!QEMU_IS_ALIGNED(dest->prealloc_align,
+                         child_bs->bl.request_alignment)) {
+        error_setg(errp, "prealloc-align parameter of preallocate filter "
+                   "is not aligned to underlying node request alignment "
+                   "(%" PRIi32 ")", child_bs->bl.request_alignment);
+        return false;
+    }
+
+    return true;
+}
+
+static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
+                            Error **errp)
+{
+    BDRVPreallocateState *s = bs->opaque;
+    int ret;
+
+    GLOBAL_STATE_CODE();
+
+    /*
+     * s->data_end and friends should be initialized on permission update.
+     * For this to work, mark them invalid.
+     */
+    s->file_end = s->zero_start = s->data_end = -EINVAL;
+    s->drop_resize_bh = qemu_bh_new(preallocate_drop_resize_bh, bs);
+
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    if (!preallocate_absorb_opts(&s->opts, options, bs->file->bs, errp)) {
+        return -EINVAL;
+    }
+
+    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+        (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
+
+    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+        ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+            bs->file->bs->supported_zero_flags);
+
+    return 0;
+}
+
+static int GRAPH_RDLOCK
+preallocate_truncate_to_real_size(BlockDriverState *bs, Error **errp)
+{
+    BDRVPreallocateState *s = bs->opaque;
+    int ret;
+
+    if (s->file_end < 0) {
+        s->file_end = bdrv_getlength(bs->file->bs);
+        if (s->file_end < 0) {
+            error_setg_errno(errp, -s->file_end, "Failed to get file length");
+            return s->file_end;
+        }
+    }
+
+    if (s->data_end < s->file_end) {
+        ret = bdrv_truncate(bs->file, s->data_end, true, PREALLOC_MODE_OFF, 0,
+                            NULL);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to drop preallocation");
+            s->file_end = ret;
+            return ret;
+        }
+        s->file_end = s->data_end;
+    }
+
+    return 0;
+}
+
+static void preallocate_close(BlockDriverState *bs)
+{
+    BDRVPreallocateState *s = bs->opaque;
+
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    qemu_bh_cancel(s->drop_resize_bh);
+    qemu_bh_delete(s->drop_resize_bh);
+
+    if (s->data_end >= 0) {
+        preallocate_truncate_to_real_size(bs, NULL);
+    }
+}
+
+
+/*
+ * Handle reopen.
+ *
+ * We must implement reopen handlers, otherwise reopen just don't work. Handle
+ * new options and don't care about preallocation state, as it is handled in
+ * set/check permission handlers.
+ */
+
+static int preallocate_reopen_prepare(BDRVReopenState *reopen_state,
+                                      BlockReopenQueue *queue, Error **errp)
+{
+    PreallocateOpts *opts = g_new0(PreallocateOpts, 1);
+    int ret;
+
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    if (!preallocate_absorb_opts(opts, reopen_state->options,
+                                 reopen_state->bs->file->bs, errp)) {
+        g_free(opts);
+        return -EINVAL;
+    }
+
+    /*
+     * Drop the preallocation already here if reopening read-only. The child
+     * might also be reopened read-only and then scheduling a BH during the
+     * permission update is too late.
+     */
+    if ((reopen_state->flags & BDRV_O_RDWR) == 0) {
+        ret = preallocate_drop_resize(reopen_state->bs, errp);
+        if (ret < 0) {
+            g_free(opts);
+            return ret;
+        }
+    }
+
+    reopen_state->opaque = opts;
+
+    return 0;
+}
+
+static void preallocate_reopen_commit(BDRVReopenState *state)
+{
+    BDRVPreallocateState *s = state->bs->opaque;
+
+    s->opts = *(PreallocateOpts *)state->opaque;
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+}
+
+static void preallocate_reopen_abort(BDRVReopenState *state)
+{
+    g_free(state->opaque);
+    state->opaque = NULL;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           QEMUIOVector *qiov, size_t qiov_offset,
+                           BdrvRequestFlags flags)
+{
+    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+                               flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    return bdrv_co_pdiscard(bs->file, offset, bytes);
+}
+
+static bool can_write_resize(uint64_t perm)
+{
+    return (perm & BLK_PERM_WRITE) && (perm & BLK_PERM_RESIZE);
+}
+
+static bool GRAPH_RDLOCK has_prealloc_perms(BlockDriverState *bs)
+{
+    BDRVPreallocateState *s = bs->opaque;
+
+    if (can_write_resize(bs->file->perm)) {
+        assert(!(bs->file->shared_perm & BLK_PERM_WRITE));
+        assert(!(bs->file->shared_perm & BLK_PERM_RESIZE));
+        return true;
+    }
+
+    assert(s->data_end < 0);
+    assert(s->zero_start < 0);
+    assert(s->file_end < 0);
+    return false;
+}
+
+/*
+ * Call on each write. Returns true if @want_merge_zero is true and the region
+ * [offset, offset + bytes) is zeroed (as a result of this call or earlier
+ * preallocation).
+ *
+ * want_merge_zero is used to merge write-zero request with preallocation in
+ * one bdrv_co_pwrite_zeroes() call.
+ */
+static bool coroutine_fn GRAPH_RDLOCK
+handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes,
+             bool want_merge_zero)
+{
+    BDRVPreallocateState *s = bs->opaque;
+    int64_t end = offset + bytes;
+    int64_t prealloc_start, prealloc_end;
+    int ret;
+    uint32_t file_align = bs->file->bs->bl.request_alignment;
+    uint32_t prealloc_align = MAX(s->opts.prealloc_align, file_align);
+
+    assert(QEMU_IS_ALIGNED(prealloc_align, file_align));
+
+    if (!has_prealloc_perms(bs)) {
+        /* We don't have state neither should try to recover it */
+        return false;
+    }
+
+    if (s->data_end < 0) {
+        s->data_end = bdrv_co_getlength(bs->file->bs);
+        if (s->data_end < 0) {
+            return false;
+        }
+
+        if (s->file_end < 0) {
+            s->file_end = s->data_end;
+        }
+    }
+
+    if (end <= s->data_end) {
+        return false;
+    }
+
+    /* We have valid s->data_end, and request writes beyond it. */
+
+    s->data_end = end;
+    if (s->zero_start < 0 || !want_merge_zero) {
+        s->zero_start = end;
+    }
+
+    if (s->file_end < 0) {
+        s->file_end = bdrv_co_getlength(bs->file->bs);
+        if (s->file_end < 0) {
+            return false;
+        }
+    }
+
+    /* Now s->data_end, s->zero_start and s->file_end are valid. */
+
+    if (end <= s->file_end) {
+        /* No preallocation needed. */
+        return want_merge_zero && offset >= s->zero_start;
+    }
+
+    /* Now we want new preallocation, as request writes beyond s->file_end. */
+
+    prealloc_start = QEMU_ALIGN_UP(
+            want_merge_zero ? MIN(offset, s->file_end) : s->file_end,
+            file_align);
+    prealloc_end = QEMU_ALIGN_UP(
+            MAX(prealloc_start, end) + s->opts.prealloc_size,
+            prealloc_align);
+
+    want_merge_zero = want_merge_zero && (prealloc_start <= offset);
+
+    ret = bdrv_co_pwrite_zeroes(
+            bs->file, prealloc_start, prealloc_end - prealloc_start,
+            BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT);
+    if (ret < 0) {
+        s->file_end = ret;
+        return false;
+    }
+
+    s->file_end = prealloc_end;
+    return want_merge_zero;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                             int64_t bytes, BdrvRequestFlags flags)
+{
+    bool want_merge_zero =
+        !(flags & ~(BDRV_REQ_ZERO_WRITE | BDRV_REQ_NO_FALLBACK));
+    if (handle_write(bs, offset, bytes, want_merge_zero)) {
+        return 0;
+    }
+
+    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                            QEMUIOVector *qiov, size_t qiov_offset,
+                            BdrvRequestFlags flags)
+{
+    handle_write(bs, offset, bytes, false);
+
+    return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+                                flags);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co_truncate(BlockDriverState *bs, int64_t offset,
+                        bool exact, PreallocMode prealloc,
+                        BdrvRequestFlags flags, Error **errp)
+{
+    ERRP_GUARD();
+    BDRVPreallocateState *s = bs->opaque;
+    int ret;
+
+    if (s->data_end >= 0 && offset > s->data_end) {
+        if (s->file_end < 0) {
+            s->file_end = bdrv_co_getlength(bs->file->bs);
+            if (s->file_end < 0) {
+                error_setg(errp, "failed to get file length");
+                return s->file_end;
+            }
+        }
+
+        if (prealloc == PREALLOC_MODE_FALLOC) {
+            /*
+             * If offset <= s->file_end, the task is already done, just
+             * update s->data_end, to move part of "filter preallocation"
+             * to "preallocation requested by user".
+             * Otherwise just proceed to preallocate missing part.
+             */
+            if (offset <= s->file_end) {
+                s->data_end = offset;
+                return 0;
+            }
+        } else {
+            /*
+             * We have to drop our preallocation, to
+             * - avoid "Cannot use preallocation for shrinking files" in
+             *   case of offset < file_end
+             * - give PREALLOC_MODE_OFF a chance to keep small disk
+             *   usage
+             * - give PREALLOC_MODE_FULL a chance to actually write the
+             *   whole region as user expects
+             */
+            if (s->file_end > s->data_end) {
+                ret = bdrv_co_truncate(bs->file, s->data_end, true,
+                                       PREALLOC_MODE_OFF, 0, errp);
+                if (ret < 0) {
+                    s->file_end = ret;
+                    error_prepend(errp, "preallocate-filter: failed to drop "
+                                  "write-zero preallocation: ");
+                    return ret;
+                }
+                s->file_end = s->data_end;
+            }
+        }
+
+        s->data_end = offset;
+    }
+
+    ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
+    if (ret < 0) {
+        s->file_end = s->zero_start = s->data_end = ret;
+        return ret;
+    }
+
+    if (has_prealloc_perms(bs)) {
+        s->file_end = s->zero_start = s->data_end = offset;
+    }
+    return 0;
+}
+
+static int coroutine_fn GRAPH_RDLOCK preallocate_co_flush(BlockDriverState *bs)
+{
+    return bdrv_co_flush(bs->file->bs);
+}
+
+static int64_t coroutine_fn GRAPH_RDLOCK
+preallocate_co_getlength(BlockDriverState *bs)
+{
+    int64_t ret;
+    BDRVPreallocateState *s = bs->opaque;
+
+    if (s->data_end >= 0) {
+        return s->data_end;
+    }
+
+    ret = bdrv_co_getlength(bs->file->bs);
+
+    if (has_prealloc_perms(bs)) {
+        s->file_end = s->zero_start = s->data_end = ret;
+    }
+
+    return ret;
+}
+
+static int GRAPH_RDLOCK
+preallocate_drop_resize(BlockDriverState *bs, Error **errp)
+{
+    BDRVPreallocateState *s = bs->opaque;
+    int ret;
+
+    if (s->data_end < 0) {
+        return 0;
+    }
+
+    /*
+     * Before switching children to be read-only, truncate them to remove
+     * the preallocation and let them have the real size.
+     */
+    ret = preallocate_truncate_to_real_size(bs, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /*
+     * We'll drop our permissions and will allow other users to take write and
+     * resize permissions (see preallocate_child_perm). Anyone will be able to
+     * change the child, so mark all states invalid. We'll regain control if a
+     * parent requests write access again.
+     */
+    s->data_end = s->file_end = s->zero_start = -EINVAL;
+
+    bdrv_child_refresh_perms(bs, bs->file, NULL);
+
+    return 0;
+}
+
+static void preallocate_drop_resize_bh(void *opaque)
+{
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    /*
+     * In case of errors, we'll simply keep the exclusive lock on the image
+     * indefinitely.
+     */
+    preallocate_drop_resize(opaque, NULL);
+}
+
+static void GRAPH_RDLOCK
+preallocate_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
+{
+    BDRVPreallocateState *s = bs->opaque;
+
+    if (can_write_resize(perm)) {
+        qemu_bh_cancel(s->drop_resize_bh);
+        if (s->data_end < 0) {
+            s->data_end = s->file_end = s->zero_start =
+                bs->file->bs->total_sectors * BDRV_SECTOR_SIZE;
+        }
+    } else {
+        qemu_bh_schedule(s->drop_resize_bh);
+    }
+}
+
+static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c,
+    BdrvChildRole role, BlockReopenQueue *reopen_queue,
+    uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared)
+{
+    BDRVPreallocateState *s = bs->opaque;
+
+    bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, nshared);
+
+    /*
+     * We need exclusive write and resize permissions on the child not only when
+     * the parent can write to it, but also after the parent gave up write
+     * permissions until preallocate_drop_resize() has completed.
+     */
+    if (can_write_resize(perm) || s->data_end != -EINVAL) {
+        *nperm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
+
+        /*
+         * Don't share, to keep our states s->file_end, s->data_end and
+         * s->zero_start valid.
+         */
+        *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+    }
+}
+
+static BlockDriver bdrv_preallocate_filter = {
+    .format_name = "preallocate",
+    .instance_size = sizeof(BDRVPreallocateState),
+
+    .bdrv_co_getlength    = preallocate_co_getlength,
+    .bdrv_open            = preallocate_open,
+    .bdrv_close           = preallocate_close,
+
+    .bdrv_reopen_prepare  = preallocate_reopen_prepare,
+    .bdrv_reopen_commit   = preallocate_reopen_commit,
+    .bdrv_reopen_abort    = preallocate_reopen_abort,
+
+    .bdrv_co_preadv_part = preallocate_co_preadv_part,
+    .bdrv_co_pwritev_part = preallocate_co_pwritev_part,
+    .bdrv_co_pwrite_zeroes = preallocate_co_pwrite_zeroes,
+    .bdrv_co_pdiscard = preallocate_co_pdiscard,
+    .bdrv_co_flush = preallocate_co_flush,
+    .bdrv_co_truncate = preallocate_co_truncate,
+
+    .bdrv_set_perm = preallocate_set_perm,
+    .bdrv_child_perm = preallocate_child_perm,
+
+    .is_filter = true,
+};
+
+static void bdrv_preallocate_init(void)
+{
+    bdrv_register(&bdrv_preallocate_filter);
+}
+
+block_init(bdrv_preallocate_init);
diff --git a/block/progress_meter.c b/block/progress_meter.c
new file mode 100644 (file)
index 0000000..31a170a
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Helper functionality for some process progress tracking.
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012, 2018 Red Hat, Inc.
+ * Copyright (c) 2020 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/coroutine.h"
+#include "qemu/progress_meter.h"
+
+void progress_init(ProgressMeter *pm)
+{
+    qemu_mutex_init(&pm->lock);
+}
+
+void progress_destroy(ProgressMeter *pm)
+{
+    qemu_mutex_destroy(&pm->lock);
+}
+
+void progress_get_snapshot(ProgressMeter *pm, uint64_t *current,
+                           uint64_t *total)
+{
+    QEMU_LOCK_GUARD(&pm->lock);
+
+    *current = pm->current;
+    *total = pm->total;
+}
+
+void progress_work_done(ProgressMeter *pm, uint64_t done)
+{
+    QEMU_LOCK_GUARD(&pm->lock);
+    pm->current += done;
+}
+
+void progress_set_remaining(ProgressMeter *pm, uint64_t remaining)
+{
+    QEMU_LOCK_GUARD(&pm->lock);
+    pm->total = pm->current + remaining;
+}
+
+void progress_increase_remaining(ProgressMeter *pm, uint64_t delta)
+{
+    QEMU_LOCK_GUARD(&pm->lock);
+    pm->total += delta;
+}
index 8498402ad43d4ed1c6a57225f2cfda8516651860..1618cd225a321044efd7c890ce45059392147934 100644 (file)
@@ -32,6 +32,7 @@
 
 #include "qemu/osdep.h"
 
+#include "block/block_int.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-block.h"
 #include "qapi/qmp/qdict.h"
@@ -116,8 +117,8 @@ static int do_open_tray(const char *blk_name, const char *qdev_id,
     return 0;
 }
 
-void qmp_blockdev_open_tray(bool has_device, const char *device,
-                            bool has_id, const char *id,
+void qmp_blockdev_open_tray(const char *device,
+                            const char *id,
                             bool has_force, bool force,
                             Error **errp)
 {
@@ -127,9 +128,7 @@ void qmp_blockdev_open_tray(bool has_device, const char *device,
     if (!has_force) {
         force = false;
     }
-    rc = do_open_tray(has_device ? device : NULL,
-                      has_id ? id : NULL,
-                      force, &local_err);
+    rc = do_open_tray(device, id, force, &local_err);
     if (rc && rc != -ENOSYS && rc != -EINPROGRESS) {
         error_propagate(errp, local_err);
         return;
@@ -137,16 +136,13 @@ void qmp_blockdev_open_tray(bool has_device, const char *device,
     error_free(local_err);
 }
 
-void qmp_blockdev_close_tray(bool has_device, const char *device,
-                             bool has_id, const char *id,
+void qmp_blockdev_close_tray(const char *device,
+                             const char *id,
                              Error **errp)
 {
     BlockBackend *blk;
     Error *local_err = NULL;
 
-    device = has_device ? device : NULL;
-    id = has_id ? id : NULL;
-
     blk = qmp_get_blk(device, id, errp);
     if (!blk) {
         return;
@@ -173,16 +169,15 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
     }
 }
 
-static void blockdev_remove_medium(bool has_device, const char *device,
-                                   bool has_id, const char *id, Error **errp)
+static void GRAPH_UNLOCKED
+blockdev_remove_medium(const char *device, const char *id, Error **errp)
 {
     BlockBackend *blk;
     BlockDriverState *bs;
     AioContext *aio_context;
     bool has_attached_device;
 
-    device = has_device ? device : NULL;
-    id = has_id ? id : NULL;
+    GLOBAL_STATE_CODE();
 
     blk = qmp_get_blk(device, id, errp);
     if (!blk) {
@@ -212,9 +207,12 @@ static void blockdev_remove_medium(bool has_device, const char *device,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
+    bdrv_graph_rdlock_main_loop();
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
+        bdrv_graph_rdunlock_main_loop();
         goto out;
     }
+    bdrv_graph_rdunlock_main_loop();
 
     blk_remove_bs(blk);
 
@@ -232,13 +230,14 @@ out:
 
 void qmp_blockdev_remove_medium(const char *id, Error **errp)
 {
-    blockdev_remove_medium(false, NULL, true, id, errp);
+    blockdev_remove_medium(NULL, id, errp);
 }
 
 static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
                                             BlockDriverState *bs, Error **errp)
 {
     Error *local_err = NULL;
+    AioContext *ctx;
     bool has_device;
     int ret;
 
@@ -260,7 +259,11 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
         return;
     }
 
+    ctx = bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
     ret = blk_insert_bs(blk, bs, errp);
+    aio_context_release(ctx);
+
     if (ret < 0) {
         return;
     }
@@ -280,16 +283,15 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
     }
 }
 
-static void blockdev_insert_medium(bool has_device, const char *device,
-                                   bool has_id, const char *id,
+static void blockdev_insert_medium(const char *device, const char *id,
                                    const char *node_name, Error **errp)
 {
     BlockBackend *blk;
     BlockDriverState *bs;
 
-    blk = qmp_get_blk(has_device ? device : NULL,
-                      has_id ? id : NULL,
-                      errp);
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    blk = qmp_get_blk(device, id, errp);
     if (!blk) {
         return;
     }
@@ -311,13 +313,14 @@ static void blockdev_insert_medium(bool has_device, const char *device,
 void qmp_blockdev_insert_medium(const char *id, const char *node_name,
                                 Error **errp)
 {
-    blockdev_insert_medium(false, NULL, true, id, node_name, errp);
+    blockdev_insert_medium(NULL, id, node_name, errp);
 }
 
-void qmp_blockdev_change_medium(bool has_device, const char *device,
-                                bool has_id, const char *id,
+void qmp_blockdev_change_medium(const char *device,
+                                const char *id,
                                 const char *filename,
-                                bool has_format, const char *format,
+                                const char *format,
+                                bool has_force, bool force,
                                 bool has_read_only,
                                 BlockdevChangeReadOnlyMode read_only,
                                 Error **errp)
@@ -330,9 +333,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
     QDict *options = NULL;
     Error *err = NULL;
 
-    blk = qmp_get_blk(has_device ? device : NULL,
-                      has_id ? id : NULL,
-                      errp);
+    blk = qmp_get_blk(device, id, errp);
     if (!blk) {
         goto fail;
     }
@@ -369,18 +370,19 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
     detect_zeroes = blk_get_detect_zeroes_from_root_state(blk);
     qdict_put_str(options, "detect-zeroes", detect_zeroes ? "on" : "off");
 
-    if (has_format) {
+    if (format) {
         qdict_put_str(options, "driver", format);
     }
 
+    aio_context_acquire(qemu_get_aio_context());
     medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp);
+    aio_context_release(qemu_get_aio_context());
+
     if (!medium_bs) {
         goto fail;
     }
 
-    rc = do_open_tray(has_device ? device : NULL,
-                      has_id ? id : NULL,
-                      false, &err);
+    rc = do_open_tray(device, id, force, &err);
     if (rc && rc != -ENOSYS) {
         error_propagate(errp, err);
         goto fail;
@@ -388,7 +390,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
     error_free(err);
     err = NULL;
 
-    blockdev_remove_medium(has_device, device, has_id, id, &err);
+    blockdev_remove_medium(device, id, &err);
     if (err) {
         error_propagate(errp, err);
         goto fail;
@@ -400,7 +402,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
         goto fail;
     }
 
-    qmp_blockdev_close_tray(has_device, device, has_id, id, errp);
+    qmp_blockdev_close_tray(device, id, errp);
 
 fail:
     /* If the medium has been inserted, the device has its own reference, so
@@ -409,8 +411,7 @@ fail:
     bdrv_unref(medium_bs);
 }
 
-void qmp_eject(bool has_device, const char *device,
-               bool has_id, const char *id,
+void qmp_eject(const char *device, const char *id,
                bool has_force, bool force, Error **errp)
 {
     Error *local_err = NULL;
@@ -420,16 +421,14 @@ void qmp_eject(bool has_device, const char *device,
         force = false;
     }
 
-    rc = do_open_tray(has_device ? device : NULL,
-                      has_id ? id : NULL,
-                      force, &local_err);
+    rc = do_open_tray(device, id, force, &local_err);
     if (rc && rc != -ENOSYS) {
         error_propagate(errp, local_err);
         return;
     }
     error_free(local_err);
 
-    blockdev_remove_medium(has_device, device, has_id, id, errp);
+    blockdev_remove_medium(device, id, errp);
 }
 
 /* throttling disk I/O limits */
@@ -440,9 +439,7 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
     BlockBackend *blk;
     AioContext *aio_context;
 
-    blk = qmp_get_blk(arg->has_device ? arg->device : NULL,
-                      arg->has_id ? arg->id : NULL,
-                      errp);
+    blk = qmp_get_blk(arg->device, arg->id, errp);
     if (!blk) {
         return;
     }
@@ -515,11 +512,8 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
         /* Enable I/O limits if they're not enabled yet, otherwise
          * just update the throttling group. */
         if (!blk_get_public(blk)->throttle_group_member.throttle_state) {
-            blk_io_limits_enable(blk,
-                                 arg->has_group ? arg->group :
-                                 arg->has_device ? arg->device :
-                                 arg->id);
-        } else if (arg->has_group) {
+            blk_io_limits_enable(blk, arg->group ?: arg->device ?: arg->id);
+        } else if (arg->group) {
             blk_io_limits_update_group(blk, arg->group);
         }
         /* Set the new throttling configuration */
@@ -538,6 +532,7 @@ void qmp_block_latency_histogram_set(
     bool has_boundaries, uint64List *boundaries,
     bool has_boundaries_read, uint64List *boundaries_read,
     bool has_boundaries_write, uint64List *boundaries_write,
+    bool has_boundaries_append, uint64List *boundaries_append,
     bool has_boundaries_flush, uint64List *boundaries_flush,
     Error **errp)
 {
@@ -578,6 +573,16 @@ void qmp_block_latency_histogram_set(
         }
     }
 
+    if (has_boundaries || has_boundaries_append) {
+        ret = block_latency_histogram_set(
+                stats, BLOCK_ACCT_ZONE_APPEND,
+                has_boundaries_append ? boundaries_append : boundaries);
+        if (ret) {
+            error_setg(errp, "Device '%s' set append write boundaries fail", id);
+            return;
+        }
+    }
+
     if (has_boundaries || has_boundaries_flush) {
         ret = block_latency_histogram_set(
             stats, BLOCK_ACCT_FLUSH,
index 036da085eea66bd1ad866f5be06f22330297437c..ed0434a18fd71f69b8a193aa6cb2716297fdfb12 100644 (file)
@@ -26,6 +26,7 @@
 #include "qemu/cutils.h"
 #include "block/qapi.h"
 #include "block/block_int.h"
+#include "block/dirty-bitmap.h"
 #include "block/throttle-groups.h"
 #include "block/write-threshold.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qstring.h"
 #include "qemu/qemu-print.h"
 #include "sysemu/block-backend.h"
-#include "qemu/cutils.h"
 
-BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
-                                        BlockDriverState *bs,
-                                        bool flat,
-                                        Error **errp)
+BlockDeviceInfo *coroutine_fn bdrv_block_device_info(BlockBackend *blk,
+                                                     BlockDriverState *bs,
+                                                     bool flat,
+                                                     Error **errp)
 {
     ImageInfo **p_image_info;
-    BlockDriverState *bs0, *backing;
+    ImageInfo *backing_info;
+    BlockDriverState *backing;
     BlockDeviceInfo *info;
+    ERRP_GUARD();
 
     if (!bs->drv) {
         error_setg(errp, "Block device %s is ejected", bs->node_name);
@@ -59,10 +61,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
 
     info = g_malloc0(sizeof(*info));
     info->file                   = g_strdup(bs->filename);
-    info->ro                     = bs->read_only;
+    info->ro                     = bdrv_is_read_only(bs);
     info->drv                    = g_strdup(bs->drv->format_name);
     info->encrypted              = bs->encrypted;
-    info->encryption_key_missing = false;
 
     info->cache = g_new(BlockdevCacheInfo, 1);
     *info->cache = (BlockdevCacheInfo) {
@@ -72,13 +73,11 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
     };
 
     if (bs->node_name[0]) {
-        info->has_node_name = true;
         info->node_name = g_strdup(bs->node_name);
     }
 
     backing = bdrv_cow_bs(bs);
     if (backing) {
-        info->has_backing_file = true;
         info->backing_file = g_strdup(backing->filename);
     }
 
@@ -140,48 +139,29 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
         info->has_iops_size = cfg.op_size;
         info->iops_size = cfg.op_size;
 
-        info->has_group = true;
         info->group =
             g_strdup(throttle_group_get_name(&blkp->throttle_group_member));
     }
 
     info->write_threshold = bdrv_write_threshold_get(bs);
 
-    bs0 = bs;
     p_image_info = &info->image;
     info->backing_file_depth = 0;
-    while (1) {
-        Error *local_err = NULL;
-        bdrv_query_image_info(bs0, p_image_info, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            qapi_free_BlockDeviceInfo(info);
-            return NULL;
-        }
-
-        /* stop gathering data for flat output */
-        if (flat) {
-            break;
-        }
 
-        if (bs0->drv && bdrv_filter_or_cow_child(bs0)) {
-            /*
-             * Put any filtered child here (for backwards compatibility to when
-             * we put bs0->backing here, which might be any filtered child).
-             */
-            info->backing_file_depth++;
-            bs0 = bdrv_filter_or_cow_bs(bs0);
-            (*p_image_info)->has_backing_image = true;
-            p_image_info = &((*p_image_info)->backing_image);
-        } else {
-            break;
-        }
+    /*
+     * Skip automatically inserted nodes that the user isn't aware of for
+     * query-block (blk != NULL), but not for query-named-block-nodes
+     */
+    bdrv_query_image_info(bs, p_image_info, flat, blk != NULL, errp);
+    if (*errp) {
+        qapi_free_BlockDeviceInfo(info);
+        return NULL;
+    }
 
-        /* Skip automatically inserted nodes that the user isn't aware of for
-         * query-block (blk != NULL), but not for query-named-block-nodes */
-        if (blk) {
-            bs0 = bdrv_skip_implicit_filters(bs0);
-        }
+    backing_info = info->image->backing_image;
+    while (backing_info) {
+        info->backing_file_depth++;
+        backing_info = backing_info->backing_image;
     }
 
     return info;
@@ -198,7 +178,7 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs,
 {
     int i, sn_count;
     QEMUSnapshotInfo *sn_tab = NULL;
-    SnapshotInfoList *info_list, *cur_item = NULL, *head = NULL;
+    SnapshotInfoList *head = NULL, **tail = &head;
     SnapshotInfo *info;
 
     sn_count = bdrv_snapshot_list(bs, &sn_tab);
@@ -233,17 +213,7 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs,
         info->icount        = sn_tab[i].icount;
         info->has_icount    = sn_tab[i].icount != -1ULL;
 
-        info_list = g_new0(SnapshotInfoList, 1);
-        info_list->value = info;
-
-        /* XXX: waiting for the qapi to support qemu-queue.h types */
-        if (!cur_item) {
-            head = cur_item = info_list;
-        } else {
-            cur_item->next = info_list;
-            cur_item = info_list;
-        }
-
+        QAPI_LIST_APPEND(tail, info);
     }
 
     g_free(sn_tab);
@@ -252,32 +222,17 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs,
 }
 
 /**
- * bdrv_query_image_info:
- * @bs: block device to examine
- * @p_info: location to store image information
- * @errp: location to store error information
- *
- * Store "flat" image information in @p_info.
- *
- * "Flat" means it does *not* query backing image information,
- * i.e. (*pinfo)->has_backing_image will be set to false and
- * (*pinfo)->backing_image to NULL even when the image does in fact have
- * a backing image.
- *
- * @p_info will be set only on success. On error, store error in @errp.
+ * Helper function for other query info functions.  Store information about @bs
+ * in @info, setting @errp on error.
  */
-void bdrv_query_image_info(BlockDriverState *bs,
-                           ImageInfo **p_info,
-                           Error **errp)
+static void GRAPH_RDLOCK
+bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp)
 {
     int64_t size;
     const char *backing_filename;
     BlockDriverInfo bdi;
     int ret;
     Error *err = NULL;
-    ImageInfo *info;
-
-    aio_context_acquire(bdrv_get_aio_context(bs));
 
     size = bdrv_getlength(bs);
     if (size < 0) {
@@ -288,11 +243,12 @@ void bdrv_query_image_info(BlockDriverState *bs,
 
     bdrv_refresh_filename(bs);
 
-    info = g_new0(ImageInfo, 1);
     info->filename        = g_strdup(bs->filename);
     info->format          = g_strdup(bdrv_get_format_name(bs));
     info->virtual_size    = size;
-    info->actual_size     = bdrv_get_allocated_file_size(bs);
+    bdrv_graph_co_rdlock();
+    info->actual_size     = bdrv_co_get_allocated_file_size(bs);
+    bdrv_graph_co_rdunlock();
     info->has_actual_size = info->actual_size >= 0;
     if (bs->encrypted) {
         info->encrypted = true;
@@ -309,29 +265,23 @@ void bdrv_query_image_info(BlockDriverState *bs,
     info->format_specific = bdrv_get_specific_info(bs, &err);
     if (err) {
         error_propagate(errp, err);
-        qapi_free_ImageInfo(info);
         goto out;
     }
-    info->has_format_specific = info->format_specific != NULL;
-
     backing_filename = bs->backing_file;
     if (backing_filename[0] != '\0') {
         char *backing_filename2;
 
         info->backing_filename = g_strdup(backing_filename);
-        info->has_backing_filename = true;
         backing_filename2 = bdrv_get_full_backing_filename(bs, NULL);
 
         /* Always report the full_backing_filename if present, even if it's the
          * same as backing_filename. That they are same is useful info. */
         if (backing_filename2) {
             info->full_backing_filename = g_strdup(backing_filename2);
-            info->has_full_backing_filename = true;
         }
 
         if (bs->backing_format[0]) {
             info->backing_filename_format = g_strdup(bs->backing_format);
-            info->has_backing_filename_format = true;
         }
         g_free(backing_filename2);
     }
@@ -350,19 +300,132 @@ void bdrv_query_image_info(BlockDriverState *bs,
         break;
     default:
         error_propagate(errp, err);
-        qapi_free_ImageInfo(info);
         goto out;
     }
 
+out:
+    return;
+}
+
+/**
+ * bdrv_query_image_info:
+ * @bs: block node to examine
+ * @p_info: location to store image information
+ * @flat: skip backing node information
+ * @skip_implicit_filters: skip implicit filters in the backing chain
+ * @errp: location to store error information
+ *
+ * Store image information in @p_info, potentially recursively covering the
+ * backing chain.
+ *
+ * If @flat is true, do not query backing image information, i.e.
+ * (*p_info)->has_backing_image will be set to false and
+ * (*p_info)->backing_image to NULL even when the image does in fact have a
+ * backing image.
+ *
+ * If @skip_implicit_filters is true, implicit filter nodes in the backing chain
+ * will be skipped when querying backing image information.
+ * (@skip_implicit_filters is ignored when @flat is true.)
+ *
+ * @p_info will be set only on success. On error, store error in @errp.
+ */
+void bdrv_query_image_info(BlockDriverState *bs,
+                           ImageInfo **p_info,
+                           bool flat,
+                           bool skip_implicit_filters,
+                           Error **errp)
+{
+    ImageInfo *info;
+    ERRP_GUARD();
+
+    info = g_new0(ImageInfo, 1);
+    bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp);
+    if (*errp) {
+        goto fail;
+    }
+
+    if (!flat) {
+        BlockDriverState *backing;
+
+        /*
+         * Use any filtered child here (for backwards compatibility to when
+         * we always took bs->backing, which might be any filtered child).
+         */
+        backing = bdrv_filter_or_cow_bs(bs);
+        if (skip_implicit_filters) {
+            backing = bdrv_skip_implicit_filters(backing);
+        }
+
+        if (backing) {
+            bdrv_query_image_info(backing, &info->backing_image, false,
+                                  skip_implicit_filters, errp);
+            if (*errp) {
+                goto fail;
+            }
+        }
+    }
+
     *p_info = info;
+    return;
 
-out:
-    aio_context_release(bdrv_get_aio_context(bs));
+fail:
+    assert(*errp);
+    qapi_free_ImageInfo(info);
+}
+
+/**
+ * bdrv_co_query_block_graph_info:
+ * @bs: root node to start from
+ * @p_info: location to store image information
+ * @errp: location to store error information
+ *
+ * Store image information about the graph starting from @bs in @p_info.
+ *
+ * @p_info will be set only on success. On error, store error in @errp.
+ */
+void coroutine_fn bdrv_co_query_block_graph_info(BlockDriverState *bs,
+                                                 BlockGraphInfo **p_info,
+                                                 Error **errp)
+{
+    BlockGraphInfo *info;
+    BlockChildInfoList **children_list_tail;
+    BdrvChild *c;
+    ERRP_GUARD();
+
+    assert_bdrv_graph_readable();
+
+    info = g_new0(BlockGraphInfo, 1);
+    bdrv_do_query_node_info(bs, qapi_BlockGraphInfo_base(info), errp);
+    if (*errp) {
+        goto fail;
+    }
+
+    children_list_tail = &info->children;
+
+    QLIST_FOREACH(c, &bs->children, next) {
+        BlockChildInfo *c_info;
+
+        c_info = g_new0(BlockChildInfo, 1);
+        QAPI_LIST_APPEND(children_list_tail, c_info);
+
+        c_info->name = g_strdup(c->name);
+        bdrv_co_query_block_graph_info(c->bs, &c_info->info, errp);
+        if (*errp) {
+            goto fail;
+        }
+    }
+
+    *p_info = info;
+    return;
+
+fail:
+    assert(*errp != NULL);
+    qapi_free_BlockGraphInfo(info);
 }
 
 /* @p_info will be set only on success. */
-static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
-                            Error **errp)
+static void GRAPH_RDLOCK
+bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, Error **errp)
 {
     BlockInfo *info = g_malloc0(sizeof(*info));
     BlockDriverState *bs = blk_bs(blk);
@@ -378,7 +441,6 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
 
     qdev = blk_get_attached_dev_id(blk);
     if (qdev && *qdev) {
-        info->has_qdev = true;
         info->qdev = qdev;
     } else {
         g_free(qdev);
@@ -394,13 +456,7 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
         info->io_status = blk_iostatus(blk);
     }
 
-    if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) {
-        info->has_dirty_bitmaps = true;
-        info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
-    }
-
     if (bs && bs->drv) {
-        info->has_inserted = true;
         info->inserted = bdrv_block_device_info(blk, bs, false, errp);
         if (info->inserted == NULL) {
             goto err;
@@ -418,61 +474,68 @@ static uint64List *uint64_list(uint64_t *list, int size)
 {
     int i;
     uint64List *out_list = NULL;
-    uint64List **pout_list = &out_list;
+    uint64List **tail = &out_list;
 
     for (i = 0; i < size; i++) {
-        uint64List *entry = g_new(uint64List, 1);
-        entry->value = list[i];
-        *pout_list = entry;
-        pout_list = &entry->next;
+        QAPI_LIST_APPEND(tail, list[i]);
     }
 
-    *pout_list = NULL;
-
     return out_list;
 }
 
-static void bdrv_latency_histogram_stats(BlockLatencyHistogram *hist,
-                                         bool *not_null,
-                                         BlockLatencyHistogramInfo **info)
+static BlockLatencyHistogramInfo *
+bdrv_latency_histogram_stats(BlockLatencyHistogram *hist)
 {
-    *not_null = hist->bins != NULL;
-    if (*not_null) {
-        *info = g_new0(BlockLatencyHistogramInfo, 1);
+    BlockLatencyHistogramInfo *info;
 
-        (*info)->boundaries = uint64_list(hist->boundaries, hist->nbins - 1);
-        (*info)->bins = uint64_list(hist->bins, hist->nbins);
+    if (!hist->bins) {
+        return NULL;
     }
+
+    info = g_new0(BlockLatencyHistogramInfo, 1);
+    info->boundaries = uint64_list(hist->boundaries, hist->nbins - 1);
+    info->bins = uint64_list(hist->bins, hist->nbins);
+    return info;
 }
 
 static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
 {
     BlockAcctStats *stats = blk_get_stats(blk);
     BlockAcctTimedStats *ts = NULL;
+    BlockLatencyHistogram *hgram;
 
     ds->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
     ds->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+    ds->zone_append_bytes = stats->nr_bytes[BLOCK_ACCT_ZONE_APPEND];
     ds->unmap_bytes = stats->nr_bytes[BLOCK_ACCT_UNMAP];
     ds->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
     ds->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
+    ds->zone_append_operations = stats->nr_ops[BLOCK_ACCT_ZONE_APPEND];
     ds->unmap_operations = stats->nr_ops[BLOCK_ACCT_UNMAP];
 
     ds->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
     ds->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
+    ds->failed_zone_append_operations =
+        stats->failed_ops[BLOCK_ACCT_ZONE_APPEND];
     ds->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
     ds->failed_unmap_operations = stats->failed_ops[BLOCK_ACCT_UNMAP];
 
     ds->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
     ds->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
+    ds->invalid_zone_append_operations =
+        stats->invalid_ops[BLOCK_ACCT_ZONE_APPEND];
     ds->invalid_flush_operations =
         stats->invalid_ops[BLOCK_ACCT_FLUSH];
     ds->invalid_unmap_operations = stats->invalid_ops[BLOCK_ACCT_UNMAP];
 
     ds->rd_merged = stats->merged[BLOCK_ACCT_READ];
     ds->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+    ds->zone_append_merged = stats->merged[BLOCK_ACCT_ZONE_APPEND];
     ds->unmap_merged = stats->merged[BLOCK_ACCT_UNMAP];
     ds->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
     ds->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+    ds->zone_append_total_time_ns =
+        stats->total_time_ns[BLOCK_ACCT_ZONE_APPEND];
     ds->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
     ds->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
     ds->unmap_total_time_ns = stats->total_time_ns[BLOCK_ACCT_UNMAP];
@@ -486,15 +549,11 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
     ds->account_failed = stats->account_failed;
 
     while ((ts = block_acct_interval_next(stats, ts))) {
-        BlockDeviceTimedStatsList *timed_stats =
-            g_malloc0(sizeof(*timed_stats));
         BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
-        timed_stats->next = ds->timed_stats;
-        timed_stats->value = dev_stats;
-        ds->timed_stats = timed_stats;
 
         TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
         TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
+        TimedAverage *zap = &ts->latency[BLOCK_ACCT_ZONE_APPEND];
         TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
 
         dev_stats->interval_length = ts->interval_length;
@@ -507,6 +566,10 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
         dev_stats->max_wr_latency_ns = timed_average_max(wr);
         dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
 
+        dev_stats->min_zone_append_latency_ns = timed_average_min(zap);
+        dev_stats->max_zone_append_latency_ns = timed_average_max(zap);
+        dev_stats->avg_zone_append_latency_ns = timed_average_avg(zap);
+
         dev_stats->min_flush_latency_ns = timed_average_min(fl);
         dev_stats->max_flush_latency_ns = timed_average_max(fl);
         dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
@@ -515,21 +578,25 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
             block_acct_queue_depth(ts, BLOCK_ACCT_READ);
         dev_stats->avg_wr_queue_depth =
             block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
+        dev_stats->avg_zone_append_queue_depth =
+            block_acct_queue_depth(ts, BLOCK_ACCT_ZONE_APPEND);
+
+        QAPI_LIST_PREPEND(ds->timed_stats, dev_stats);
     }
 
-    bdrv_latency_histogram_stats(&stats->latency_histogram[BLOCK_ACCT_READ],
-                                 &ds->has_rd_latency_histogram,
-                                 &ds->rd_latency_histogram);
-    bdrv_latency_histogram_stats(&stats->latency_histogram[BLOCK_ACCT_WRITE],
-                                 &ds->has_wr_latency_histogram,
-                                 &ds->wr_latency_histogram);
-    bdrv_latency_histogram_stats(&stats->latency_histogram[BLOCK_ACCT_FLUSH],
-                                 &ds->has_flush_latency_histogram,
-                                 &ds->flush_latency_histogram);
+    hgram = stats->latency_histogram;
+    ds->rd_latency_histogram
+        = bdrv_latency_histogram_stats(&hgram[BLOCK_ACCT_READ]);
+    ds->wr_latency_histogram
+        = bdrv_latency_histogram_stats(&hgram[BLOCK_ACCT_WRITE]);
+    ds->zone_append_latency_histogram
+        = bdrv_latency_histogram_stats(&hgram[BLOCK_ACCT_ZONE_APPEND]);
+    ds->flush_latency_histogram
+        = bdrv_latency_histogram_stats(&hgram[BLOCK_ACCT_FLUSH]);
 }
 
-static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs,
-                                        bool blk_level)
+static BlockStats * GRAPH_RDLOCK
+bdrv_query_bds_stats(BlockDriverState *bs, bool blk_level)
 {
     BdrvChild *parent_child;
     BlockDriverState *filter_or_cow_bs;
@@ -550,16 +617,12 @@ static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs,
     }
 
     if (bdrv_get_node_name(bs)[0]) {
-        s->has_node_name = true;
         s->node_name = g_strdup(bdrv_get_node_name(bs));
     }
 
     s->stats->wr_highest_offset = stat64_get(&bs->wr_highest_offset);
 
     s->driver_specific = bdrv_get_specific_stats(bs);
-    if (s->driver_specific) {
-        s->has_driver_specific = true;
-    }
 
     parent_child = bdrv_primary_child(bs);
     if (!parent_child ||
@@ -588,7 +651,6 @@ static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs,
         }
     }
     if (parent_child) {
-        s->has_parent = true;
         s->parent = bdrv_query_bds_stats(parent_child->bs, blk_level);
     }
 
@@ -599,19 +661,20 @@ static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs,
          * compatibility to when we put bs0->backing here, which might
          * be either)
          */
-        s->has_backing = true;
         s->backing = bdrv_query_bds_stats(filter_or_cow_bs, blk_level);
     }
 
     return s;
 }
 
-BlockInfoList *qmp_query_block(Error **errp)
+BlockInfoList *coroutine_fn qmp_query_block(Error **errp)
 {
     BlockInfoList *head = NULL, **p_next = &head;
     BlockBackend *blk;
     Error *local_err = NULL;
 
+    GRAPH_RDLOCK_GUARD();
+
     for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
         BlockInfoList *info;
 
@@ -639,26 +702,23 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
                                      bool query_nodes,
                                      Error **errp)
 {
-    BlockStatsList *head = NULL, **p_next = &head;
+    BlockStatsList *head = NULL, **tail = &head;
     BlockBackend *blk;
     BlockDriverState *bs;
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     /* Just to be safe if query_nodes is not always initialized */
     if (has_query_nodes && query_nodes) {
         for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) {
-            BlockStatsList *info = g_malloc0(sizeof(*info));
             AioContext *ctx = bdrv_get_aio_context(bs);
 
             aio_context_acquire(ctx);
-            info->value = bdrv_query_bds_stats(bs, false);
+            QAPI_LIST_APPEND(tail, bdrv_query_bds_stats(bs, false));
             aio_context_release(ctx);
-
-            *p_next = info;
-            p_next = &info->next;
         }
     } else {
         for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
-            BlockStatsList *info;
             AioContext *ctx = blk_get_aio_context(blk);
             BlockStats *s;
             char *qdev;
@@ -669,12 +729,10 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
 
             aio_context_acquire(ctx);
             s = bdrv_query_bds_stats(blk_bs(blk), true);
-            s->has_device = true;
             s->device = g_strdup(blk_name(blk));
 
             qdev = blk_get_attached_dev_id(blk);
             if (qdev && *qdev) {
-                s->has_qdev = true;
                 s->qdev = qdev;
             } else {
                 g_free(qdev);
@@ -683,10 +741,7 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
             bdrv_query_blk_stats(s->stats, blk);
             aio_context_release(ctx);
 
-            info = g_malloc0(sizeof(*info));
-            info->value = s;
-            *p_next = info;
-            p_next = &info->next;
+            QAPI_LIST_APPEND(tail, s);
         }
     }
 
@@ -695,21 +750,18 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
 
 void bdrv_snapshot_dump(QEMUSnapshotInfo *sn)
 {
-    char date_buf[128], clock_buf[128];
+    char clock_buf[128];
     char icount_buf[128] = {0};
-    struct tm tm;
-    time_t ti;
     int64_t secs;
     char *sizing = NULL;
 
     if (!sn) {
-        qemu_printf("%-10s%-18s%7s%20s%13s%11s",
+        qemu_printf("%-10s%-17s%8s%20s%13s%11s",
                     "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK", "ICOUNT");
     } else {
-        ti = sn->date_sec;
-        localtime_r(&ti, &tm);
-        strftime(date_buf, sizeof(date_buf),
-                 "%Y-%m-%d %H:%M:%S", &tm);
+        g_autoptr(GDateTime) date = g_date_time_new_from_unix_local(sn->date_sec);
+        g_autofree char *date_buf = g_date_time_format(date, "%Y-%m-%d %H:%M:%S");
+
         secs = sn->vm_clock_nsec / 1000000000;
         snprintf(clock_buf, sizeof(clock_buf),
                  "%02d:%02d:%02d.%03d",
@@ -722,7 +774,7 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn)
             snprintf(icount_buf, sizeof(icount_buf),
                 "%"PRId64, sn->icount);
         }
-        qemu_printf("%-9s %-17s %7s%20s%13s%11s",
+        qemu_printf("%-9s %-16s %8s%20s%13s%11s",
                     sn->id_str, sn->name,
                     sizing,
                     date_buf,
@@ -812,7 +864,36 @@ static void dump_qdict(int indentation, QDict *dict)
     }
 }
 
-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec)
+/*
+ * Return whether dumping the given QObject with dump_qobject() would
+ * yield an empty dump, i.e. not print anything.
+ */
+static bool qobject_is_empty_dump(const QObject *obj)
+{
+    switch (qobject_type(obj)) {
+    case QTYPE_QNUM:
+    case QTYPE_QSTRING:
+    case QTYPE_QBOOL:
+        return false;
+
+    case QTYPE_QDICT:
+        return qdict_size(qobject_to(QDict, obj)) == 0;
+
+    case QTYPE_QLIST:
+        return qlist_empty(qobject_to(QList, obj));
+
+    default:
+        abort();
+    }
+}
+
+/**
+ * Dumps the given ImageInfoSpecific object in a human-readable form,
+ * prepending an optional prefix if the dump is not empty.
+ */
+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
+                                   const char *prefix,
+                                   int indentation)
 {
     QObject *obj, *data;
     Visitor *v = qobject_output_visitor_new(&obj);
@@ -820,62 +901,96 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec)
     visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
     visit_complete(v, &obj);
     data = qdict_get(qobject_to(QDict, obj), "data");
-    dump_qobject(1, data);
+    if (!qobject_is_empty_dump(data)) {
+        if (prefix) {
+            qemu_printf("%*s%s", indentation * 4, "", prefix);
+        }
+        dump_qobject(indentation + 1, data);
+    }
     qobject_unref(obj);
     visit_free(v);
 }
 
-void bdrv_image_info_dump(ImageInfo *info)
+/**
+ * Print the given @info object in human-readable form.  Every field is indented
+ * using the given @indentation (four spaces per indentation level).
+ *
+ * When using this to print a whole block graph, @protocol can be set to true to
+ * signify that the given information is associated with a protocol node, i.e.
+ * just data storage for an image, such that the data it presents is not really
+ * a full VM disk.  If so, several fields change name: For example, "virtual
+ * size" is printed as "file length".
+ * (Consider a qcow2 image, which is represented by a qcow2 node and a file
+ * node.  Printing a "virtual size" for the file node does not make sense,
+ * because without the qcow2 node, it is not really a guest disk, so it does not
+ * have a "virtual size".  Therefore, we call it "file length" instead.)
+ *
+ * @protocol is ignored when @indentation is 0, because we take that to mean
+ * that the associated node is the root node in the queried block graph, and
+ * thus is always to be interpreted as a standalone guest disk.
+ */
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol)
 {
     char *size_buf, *dsize_buf;
+    g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, "");
+
+    if (indentation == 0) {
+        /* Top level, consider this a normal image */
+        protocol = false;
+    }
+
     if (!info->has_actual_size) {
         dsize_buf = g_strdup("unavailable");
     } else {
         dsize_buf = size_to_str(info->actual_size);
     }
     size_buf = size_to_str(info->virtual_size);
-    qemu_printf("image: %s\n"
-                "file format: %s\n"
-                "virtual size: %s (%" PRId64 " bytes)\n"
-                "disk size: %s\n",
-                info->filename, info->format, size_buf,
-                info->virtual_size,
-                dsize_buf);
+    qemu_printf("%s%s: %s\n"
+                "%s%s: %s\n"
+                "%s%s: %s (%" PRId64 " bytes)\n"
+                "%sdisk size: %s\n",
+                ind_s, protocol ? "filename" : "image", info->filename,
+                ind_s, protocol ? "protocol type" : "file format",
+                info->format,
+                ind_s, protocol ? "file length" : "virtual size",
+                size_buf, info->virtual_size,
+                ind_s, dsize_buf);
     g_free(size_buf);
     g_free(dsize_buf);
 
     if (info->has_encrypted && info->encrypted) {
-        qemu_printf("encrypted: yes\n");
+        qemu_printf("%sencrypted: yes\n", ind_s);
     }
 
     if (info->has_cluster_size) {
-        qemu_printf("cluster_size: %" PRId64 "\n",
-                    info->cluster_size);
+        qemu_printf("%scluster_size: %" PRId64 "\n",
+                    ind_s, info->cluster_size);
     }
 
     if (info->has_dirty_flag && info->dirty_flag) {
-        qemu_printf("cleanly shut down: no\n");
+        qemu_printf("%scleanly shut down: no\n", ind_s);
     }
 
-    if (info->has_backing_filename) {
-        qemu_printf("backing file: %s", info->backing_filename);
-        if (!info->has_full_backing_filename) {
+    if (info->backing_filename) {
+        qemu_printf("%sbacking file: %s", ind_s, info->backing_filename);
+        if (!info->full_backing_filename) {
             qemu_printf(" (cannot determine actual path)");
         } else if (strcmp(info->backing_filename,
                           info->full_backing_filename) != 0) {
             qemu_printf(" (actual path: %s)", info->full_backing_filename);
         }
         qemu_printf("\n");
-        if (info->has_backing_filename_format) {
-            qemu_printf("backing file format: %s\n",
-                        info->backing_filename_format);
+        if (info->backing_filename_format) {
+            qemu_printf("%sbacking file format: %s\n",
+                        ind_s, info->backing_filename_format);
         }
     }
 
     if (info->has_snapshots) {
         SnapshotInfoList *elem;
 
-        qemu_printf("Snapshot list:\n");
+        qemu_printf("%sSnapshot list:\n", ind_s);
+        qemu_printf("%s", ind_s);
         bdrv_snapshot_dump(NULL);
         qemu_printf("\n");
 
@@ -895,13 +1010,15 @@ void bdrv_image_info_dump(ImageInfo *info)
 
             pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id);
             pstrcpy(sn.name, sizeof(sn.name), elem->value->name);
+            qemu_printf("%s", ind_s);
             bdrv_snapshot_dump(&sn);
             qemu_printf("\n");
         }
     }
 
-    if (info->has_format_specific) {
-        qemu_printf("Format specific information:\n");
-        bdrv_image_info_specific_dump(info->format_specific);
+    if (info->format_specific) {
+        bdrv_image_info_specific_dump(info->format_specific,
+                                      "Format specific information:\n",
+                                      indentation);
     }
 }
index f8919a44d1935b18b545bbd8bbf4a007d000d75a..c6d0e15f1efc8193f8f49cd24342c63138d3c8f9 100644 (file)
@@ -32,6 +32,7 @@
 #include "qemu/option.h"
 #include "qemu/bswap.h"
 #include "qemu/cutils.h"
+#include "qemu/memalign.h"
 #include <zlib.h>
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
@@ -91,7 +92,8 @@ typedef struct BDRVQcowState {
 
 static QemuOptsList qcow_create_opts;
 
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
+static int coroutine_fn GRAPH_RDLOCK
+decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 
 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
@@ -120,14 +122,14 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
     qdict_extract_subqdict(options, &encryptopts, "encrypt.");
     encryptfmt = qdict_get_try_str(encryptopts, "format");
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
-        ret = -EINVAL;
-        goto fail;
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        goto fail_unlocked;
     }
 
-    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+    bdrv_graph_rdlock_main_loop();
+
+    ret = bdrv_pread(bs->file, 0, sizeof(header), &header, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -259,8 +261,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
-               s->l1_size * sizeof(uint64_t));
+    ret = bdrv_pread(bs->file, s->l1_table_offset,
+                     s->l1_size * sizeof(uint64_t), s->l1_table, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -290,8 +292,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
             ret = -EINVAL;
             goto fail;
         }
-        ret = bdrv_pread(bs->file, header.backing_file_offset,
-                   bs->auto_backing_file, len);
+        ret = bdrv_pread(bs->file, header.backing_file_offset, len,
+                         bs->auto_backing_file, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -304,18 +306,21 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
     error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
                "does not support live migration",
                bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, errp);
+
+    ret = migrate_add_blocker_normal(&s->migration_blocker, errp);
     if (ret < 0) {
-        error_free(s->migration_blocker);
         goto fail;
     }
 
     qobject_unref(encryptopts);
     qapi_free_QCryptoBlockOpenOptions(crypto_opts);
     qemu_co_mutex_init(&s->lock);
+    bdrv_graph_rdunlock_main_loop();
     return 0;
 
- fail:
+fail:
+    bdrv_graph_rdunlock_main_loop();
+fail_unlocked:
     g_free(s->l1_table);
     qemu_vfree(s->l2_cache);
     g_free(s->cluster_cache);
@@ -350,10 +355,10 @@ static int qcow_reopen_prepare(BDRVReopenState *state,
  * return 0 if not allocated, 1 if *result is assigned, and negative
  * errno on failure.
  */
-static int get_cluster_offset(BlockDriverState *bs,
-                              uint64_t offset, int allocate,
-                              int compressed_size,
-                              int n_start, int n_end, uint64_t *result)
+static int coroutine_fn GRAPH_RDLOCK
+get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
+                   int compressed_size, int n_start, int n_end,
+                   uint64_t *result)
 {
     BDRVQcowState *s = bs->opaque;
     int min_index, i, j, l1_index, l2_index, ret;
@@ -370,7 +375,7 @@ static int get_cluster_offset(BlockDriverState *bs,
         if (!allocate)
             return 0;
         /* allocate a new l2 entry */
-        l2_offset = bdrv_getlength(bs->file->bs);
+        l2_offset = bdrv_co_getlength(bs->file->bs);
         if (l2_offset < 0) {
             return l2_offset;
         }
@@ -379,10 +384,10 @@ static int get_cluster_offset(BlockDriverState *bs,
         /* update the L1 entry */
         s->l1_table[l1_index] = l2_offset;
         tmp = cpu_to_be64(l2_offset);
-        BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
-        ret = bdrv_pwrite_sync(bs->file,
-                               s->l1_table_offset + l1_index * sizeof(tmp),
-                               &tmp, sizeof(tmp));
+        BLKDBG_CO_EVENT(bs->file, BLKDBG_L1_UPDATE);
+        ret = bdrv_co_pwrite_sync(bs->file,
+                                  s->l1_table_offset + l1_index * sizeof(tmp),
+                                  sizeof(tmp), &tmp, 0);
         if (ret < 0) {
             return ret;
         }
@@ -410,17 +415,17 @@ static int get_cluster_offset(BlockDriverState *bs,
         }
     }
     l2_table = s->l2_cache + (min_index << s->l2_bits);
-    BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_LOAD);
     if (new_l2_table) {
         memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
-                               s->l2_size * sizeof(uint64_t));
+        ret = bdrv_co_pwrite_sync(bs->file, l2_offset,
+                                  s->l2_size * sizeof(uint64_t), l2_table, 0);
         if (ret < 0) {
             return ret;
         }
     } else {
-        ret = bdrv_pread(bs->file, l2_offset, l2_table,
-                         s->l2_size * sizeof(uint64_t));
+        ret = bdrv_co_pread(bs->file, l2_offset,
+                            s->l2_size * sizeof(uint64_t), l2_table, 0);
         if (ret < 0) {
             return ret;
         }
@@ -434,7 +439,7 @@ static int get_cluster_offset(BlockDriverState *bs,
         ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
         if (!allocate)
             return 0;
-        BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
+        BLKDBG_CO_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
         assert(QEMU_IS_ALIGNED(n_start | n_end, BDRV_SECTOR_SIZE));
         /* allocate a new cluster */
         if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
@@ -445,20 +450,20 @@ static int get_cluster_offset(BlockDriverState *bs,
             if (decompress_cluster(bs, cluster_offset) < 0) {
                 return -EIO;
             }
-            cluster_offset = bdrv_getlength(bs->file->bs);
+            cluster_offset = bdrv_co_getlength(bs->file->bs);
             if ((int64_t) cluster_offset < 0) {
                 return cluster_offset;
             }
             cluster_offset = QEMU_ALIGN_UP(cluster_offset, s->cluster_size);
             /* write the cluster content */
-            BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-            ret = bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache,
-                              s->cluster_size);
+            BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
+            ret = bdrv_co_pwrite(bs->file, cluster_offset, s->cluster_size,
+                                 s->cluster_cache, 0);
             if (ret < 0) {
                 return ret;
             }
         } else {
-            cluster_offset = bdrv_getlength(bs->file->bs);
+            cluster_offset = bdrv_co_getlength(bs->file->bs);
             if ((int64_t) cluster_offset < 0) {
                 return cluster_offset;
             }
@@ -468,8 +473,9 @@ static int get_cluster_offset(BlockDriverState *bs,
                 if (cluster_offset + s->cluster_size > INT64_MAX) {
                     return -E2BIG;
                 }
-                ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size,
-                                    false, PREALLOC_MODE_OFF, 0, NULL);
+                ret = bdrv_co_truncate(bs->file,
+                                       cluster_offset + s->cluster_size,
+                                       false, PREALLOC_MODE_OFF, 0, NULL);
                 if (ret < 0) {
                     return ret;
                 }
@@ -490,11 +496,10 @@ static int get_cluster_offset(BlockDriverState *bs,
                                                       NULL) < 0) {
                                 return -EIO;
                             }
-                            BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-                            ret = bdrv_pwrite(bs->file,
-                                              cluster_offset + i,
-                                              s->cluster_data,
-                                              BDRV_SECTOR_SIZE);
+                            BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
+                            ret = bdrv_co_pwrite(bs->file, cluster_offset + i,
+                                                 BDRV_SECTOR_SIZE,
+                                                 s->cluster_data, 0);
                             if (ret < 0) {
                                 return ret;
                             }
@@ -510,12 +515,12 @@ static int get_cluster_offset(BlockDriverState *bs,
         tmp = cpu_to_be64(cluster_offset);
         l2_table[l2_index] = tmp;
         if (allocate == 2) {
-            BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
+            BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
         } else {
-            BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
+            BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_UPDATE);
         }
-        ret = bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
-                               &tmp, sizeof(tmp));
+        ret = bdrv_co_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+                                  sizeof(tmp), &tmp, 0);
         if (ret < 0) {
             return ret;
         }
@@ -524,11 +529,10 @@ static int get_cluster_offset(BlockDriverState *bs,
     return 1;
 }
 
-static int coroutine_fn qcow_co_block_status(BlockDriverState *bs,
-                                             bool want_zero,
-                                             int64_t offset, int64_t bytes,
-                                             int64_t *pnum, int64_t *map,
-                                             BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_block_status(BlockDriverState *bs, bool want_zero,
+                     int64_t offset, int64_t bytes, int64_t *pnum,
+                     int64_t *map, BlockDriverState **file)
 {
     BDRVQcowState *s = bs->opaque;
     int index_in_cluster, ret;
@@ -550,7 +554,10 @@ static int coroutine_fn qcow_co_block_status(BlockDriverState *bs,
     if (!cluster_offset) {
         return 0;
     }
-    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) {
+    if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+        return BDRV_BLOCK_DATA | BDRV_BLOCK_COMPRESSED;
+    }
+    if (s->crypto) {
         return BDRV_BLOCK_DATA;
     }
     *map = cluster_offset | index_in_cluster;
@@ -585,7 +592,8 @@ static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
     return 0;
 }
 
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
+static int coroutine_fn GRAPH_RDLOCK
+decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
 {
     BDRVQcowState *s = bs->opaque;
     int ret, csize;
@@ -595,9 +603,9 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
     if (s->cluster_cache_offset != coffset) {
         csize = cluster_offset >> (63 - s->cluster_bits);
         csize &= (s->cluster_size - 1);
-        BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
-        ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
-        if (ret != csize)
+        BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
+        ret = bdrv_co_pread(bs->file, coffset, csize, s->cluster_data, 0);
+        if (ret < 0)
             return -1;
         if (decompress_buffer(s->cluster_cache, s->cluster_size,
                               s->cluster_data, csize) < 0) {
@@ -617,9 +625,9 @@ static void qcow_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.request_alignment = BDRV_SECTOR_SIZE;
 }
 
-static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, QEMUIOVector *qiov,
-                                       int flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQcowState *s = bs->opaque;
     int offset_in_cluster;
@@ -628,7 +636,6 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
     uint8_t *buf;
     void *orig_buf;
 
-    assert(!flags);
     if (qiov->niov > 1) {
         buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
         if (buf == NULL) {
@@ -658,7 +665,7 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
                 /* read from the base image */
                 qemu_co_mutex_unlock(&s->lock);
                 /* qcow2 emits this on bs->file instead of bs->backing */
-                BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
+                BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                 ret = bdrv_co_pread(bs->backing, offset, n, buf, 0);
                 qemu_co_mutex_lock(&s->lock);
                 if (ret < 0) {
@@ -681,7 +688,7 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
                 break;
             }
             qemu_co_mutex_unlock(&s->lock);
-            BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+            BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
             ret = bdrv_co_pread(bs->file, cluster_offset + offset_in_cluster,
                                 n, buf, 0);
             qemu_co_mutex_lock(&s->lock);
@@ -714,9 +721,9 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
     return ret;
 }
 
-static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                        uint64_t bytes, QEMUIOVector *qiov,
-                                        int flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQcowState *s = bs->opaque;
     int offset_in_cluster;
@@ -725,7 +732,6 @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset,
     uint8_t *buf;
     void *orig_buf;
 
-    assert(!flags);
     s->cluster_cache_offset = -1; /* disable compressed cache */
 
     /* We must always copy the iov when encrypting, so we
@@ -767,7 +773,7 @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset,
         }
 
         qemu_co_mutex_unlock(&s->lock);
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+        BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
         ret = bdrv_co_pwrite(bs->file, cluster_offset + offset_in_cluster,
                              n, buf, 0);
         qemu_co_mutex_lock(&s->lock);
@@ -798,12 +804,11 @@ static void qcow_close(BlockDriverState *bs)
     g_free(s->cluster_cache);
     g_free(s->cluster_data);
 
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
+    migrate_del_blocker(&s->migration_blocker);
 }
 
-static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
-                                       Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+qcow_co_create(BlockdevCreateOptions *opts, Error **errp)
 {
     BlockdevCreateOptionsQcow *qcow_opts;
     int header_size, backing_filename_len, l1_size, shift, i;
@@ -825,7 +830,7 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
         return -EINVAL;
     }
 
-    if (qcow_opts->has_encrypt &&
+    if (qcow_opts->encrypt &&
         qcow_opts->encrypt->format != Q_CRYPTO_BLOCK_FORMAT_QCOW)
     {
         error_setg(errp, "Unsupported encryption format");
@@ -833,13 +838,13 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(qcow_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(qcow_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    qcow_blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE,
-                               BLK_PERM_ALL, errp);
+    qcow_blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE,
+                                  BLK_PERM_ALL, errp);
     if (!qcow_blk) {
         ret = -EPERM;
         goto exit;
@@ -853,7 +858,7 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
     header.size = cpu_to_be64(total_size);
     header_size = sizeof(header);
     backing_filename_len = 0;
-    if (qcow_opts->has_backing_file) {
+    if (qcow_opts->backing_file) {
         if (strcmp(qcow_opts->backing_file, "fat:")) {
             header.backing_file_offset = cpu_to_be64(header_size);
             backing_filename_len = strlen(qcow_opts->backing_file);
@@ -861,7 +866,7 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
             header_size += backing_filename_len;
         } else {
             /* special backing file for vvfat */
-            qcow_opts->has_backing_file = false;
+            qcow_opts->backing_file = NULL;
         }
         header.cluster_bits = 9; /* 512 byte cluster to avoid copying
                                     unmodified sectors */
@@ -876,7 +881,7 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
 
     header.l1_table_offset = cpu_to_be64(header_size);
 
-    if (qcow_opts->has_encrypt) {
+    if (qcow_opts->encrypt) {
         header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
 
         crypto = qcrypto_block_create(qcow_opts->encrypt, "encrypt.",
@@ -890,15 +895,15 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
     }
 
     /* write all the data */
-    ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header), 0);
-    if (ret != sizeof(header)) {
+    ret = blk_co_pwrite(qcow_blk, 0, sizeof(header), &header, 0);
+    if (ret < 0) {
         goto exit;
     }
 
-    if (qcow_opts->has_backing_file) {
-        ret = blk_pwrite(qcow_blk, sizeof(header),
-                         qcow_opts->backing_file, backing_filename_len, 0);
-        if (ret != backing_filename_len) {
+    if (qcow_opts->backing_file) {
+        ret = blk_co_pwrite(qcow_blk, sizeof(header), backing_filename_len,
+                            qcow_opts->backing_file, 0);
+        if (ret < 0) {
             goto exit;
         }
     }
@@ -906,9 +911,9 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
     tmp = g_malloc0(BDRV_SECTOR_SIZE);
     for (i = 0; i < DIV_ROUND_UP(sizeof(uint64_t) * l1_size, BDRV_SECTOR_SIZE);
          i++) {
-        ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i,
-                         tmp, BDRV_SECTOR_SIZE, 0);
-        if (ret != BDRV_SECTOR_SIZE) {
+        ret = blk_co_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i,
+                            BDRV_SECTOR_SIZE, tmp, 0);
+        if (ret < 0) {
             g_free(tmp);
             goto exit;
         }
@@ -917,15 +922,15 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts,
     g_free(tmp);
     ret = 0;
 exit:
-    blk_unref(qcow_blk);
-    bdrv_unref(bs);
+    blk_co_unref(qcow_blk);
+    bdrv_co_unref(bs);
     qcrypto_block_free(crypto);
     return ret;
 }
 
-static int coroutine_fn qcow_co_create_opts(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts, Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+qcow_co_create_opts(BlockDriver *drv, const char *filename,
+                    QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     BlockDriverState *bs = NULL;
@@ -973,13 +978,13 @@ static int coroutine_fn qcow_co_create_opts(BlockDriver *drv,
     }
 
     /* Create and open the file (protocol layer) */
-    ret = bdrv_create_file(filename, opts, errp);
+    ret = bdrv_co_create_file(filename, opts, errp);
     if (ret < 0) {
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;
@@ -1017,20 +1022,20 @@ static int coroutine_fn qcow_co_create_opts(BlockDriver *drv,
 fail:
     g_free(backing_fmt);
     qobject_unref(qdict);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
     qapi_free_BlockdevCreateOptions(create_options);
     return ret;
 }
 
-static int qcow_make_empty(BlockDriverState *bs)
+static int GRAPH_RDLOCK qcow_make_empty(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
     uint32_t l1_length = s->l1_size * sizeof(uint64_t);
     int ret;
 
     memset(s->l1_table, 0, l1_length);
-    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
-            l1_length) < 0)
+    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, l1_length, s->l1_table,
+                         0) < 0)
         return -1;
     ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false,
                         PREALLOC_MODE_OFF, 0, NULL);
@@ -1046,9 +1051,9 @@ static int qcow_make_empty(BlockDriverState *bs)
 
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
-static coroutine_fn int
-qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
-                           uint64_t bytes, QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+qcow_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           QEMUIOVector *qiov)
 {
     BDRVQcowState *s = bs->opaque;
     z_stream strm;
@@ -1116,7 +1121,7 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
     }
     cluster_offset &= s->cluster_offset_mask;
 
-    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
     ret = bdrv_co_pwrite(bs->file, cluster_offset, out_len, out_buf, 0);
     if (ret < 0) {
         goto fail;
@@ -1129,7 +1134,8 @@ fail:
     return ret;
 }
 
-static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int coroutine_fn
+qcow_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     BDRVQcowState *s = bs->opaque;
     bdi->cluster_size = s->cluster_size;
@@ -1198,7 +1204,7 @@ static BlockDriver bdrv_qcow = {
 
     .bdrv_make_empty        = qcow_make_empty,
     .bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed,
-    .bdrv_get_info          = qcow_get_info,
+    .bdrv_co_get_info       = qcow_co_get_info,
 
     .create_opts            = &qcow_create_opts,
     .strong_runtime_opts    = qcow_strong_runtime_opts,
index d7a31a8ddcdbe123489456fbf31a5abba73b28ac..0e567ed588d704473e77cc6f5c266be3fa52fc92 100644 (file)
@@ -26,6 +26,8 @@
  */
 
 #include "qemu/osdep.h"
+#include "block/block-io.h"
+#include "block/dirty-bitmap.h"
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 
@@ -103,7 +105,7 @@ static inline bool can_write(BlockDriverState *bs)
     return !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
 }
 
-static int update_header_sync(BlockDriverState *bs)
+static int GRAPH_RDLOCK update_header_sync(BlockDriverState *bs)
 {
     int ret;
 
@@ -115,7 +117,7 @@ static int update_header_sync(BlockDriverState *bs)
     return bdrv_flush(bs->file->bs);
 }
 
-static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size)
+static inline void bitmap_table_bswap_be(uint64_t *bitmap_table, size_t size)
 {
     size_t i;
 
@@ -154,10 +156,9 @@ static int64_t get_bitmap_bytes_needed(int64_t len, uint32_t granularity)
     return DIV_ROUND_UP(num_bits, 8);
 }
 
-static int check_constraints_on_bitmap(BlockDriverState *bs,
-                                       const char *name,
-                                       uint32_t granularity,
-                                       Error **errp)
+static int GRAPH_RDLOCK
+check_constraints_on_bitmap(BlockDriverState *bs, const char *name,
+                            uint32_t granularity, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     int granularity_bits = ctz32(granularity);
@@ -202,8 +203,9 @@ static int check_constraints_on_bitmap(BlockDriverState *bs,
     return 0;
 }
 
-static void clear_bitmap_table(BlockDriverState *bs, uint64_t *bitmap_table,
-                               uint32_t bitmap_table_size)
+static void GRAPH_RDLOCK
+clear_bitmap_table(BlockDriverState *bs, uint64_t *bitmap_table,
+                   uint32_t bitmap_table_size)
 {
     BDRVQcow2State *s = bs->opaque;
     int i;
@@ -219,8 +221,9 @@ static void clear_bitmap_table(BlockDriverState *bs, uint64_t *bitmap_table,
     }
 }
 
-static int bitmap_table_load(BlockDriverState *bs, Qcow2BitmapTable *tb,
-                             uint64_t **bitmap_table)
+static int GRAPH_RDLOCK
+bitmap_table_load(BlockDriverState *bs, Qcow2BitmapTable *tb,
+                  uint64_t **bitmap_table)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -234,8 +237,8 @@ static int bitmap_table_load(BlockDriverState *bs, Qcow2BitmapTable *tb,
     }
 
     assert(tb->size <= BME_MAX_TABLE_SIZE);
-    ret = bdrv_pread(bs->file, tb->offset,
-                     table, tb->size * BME_TABLE_ENTRY_SIZE);
+    ret = bdrv_pread(bs->file, tb->offset, tb->size * BME_TABLE_ENTRY_SIZE,
+                     table, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -257,7 +260,8 @@ fail:
     return ret;
 }
 
-static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)
+static int GRAPH_RDLOCK
+free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)
 {
     int ret;
     uint64_t *bitmap_table;
@@ -278,25 +282,12 @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)
     return 0;
 }
 
-/* Return the disk size covered by a single qcow2 cluster of bitmap data. */
-static uint64_t bytes_covered_by_bitmap_cluster(const BDRVQcow2State *s,
-                                                const BdrvDirtyBitmap *bitmap)
-{
-    uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap);
-    uint64_t limit = granularity * (s->cluster_size << 3);
-
-    assert(QEMU_IS_ALIGNED(limit,
-                           bdrv_dirty_bitmap_serialization_align(bitmap)));
-    return limit;
-}
-
 /* load_bitmap_data
  * @bitmap_table entries must satisfy specification constraints.
  * @bitmap must be cleared */
-static int load_bitmap_data(BlockDriverState *bs,
-                            const uint64_t *bitmap_table,
-                            uint32_t bitmap_table_size,
-                            BdrvDirtyBitmap *bitmap)
+static int coroutine_fn GRAPH_RDLOCK
+load_bitmap_data(BlockDriverState *bs, const uint64_t *bitmap_table,
+                 uint32_t bitmap_table_size, BdrvDirtyBitmap *bitmap)
 {
     int ret = 0;
     BDRVQcow2State *s = bs->opaque;
@@ -312,7 +303,7 @@ static int load_bitmap_data(BlockDriverState *bs,
     }
 
     buf = g_malloc(s->cluster_size);
-    limit = bytes_covered_by_bitmap_cluster(s, bitmap);
+    limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap);
     for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) {
         uint64_t count = MIN(bm_size - offset, limit);
         uint64_t entry = bitmap_table[i];
@@ -329,7 +320,7 @@ static int load_bitmap_data(BlockDriverState *bs,
                  * already cleared */
             }
         } else {
-            ret = bdrv_pread(bs->file, data_offset, buf, s->cluster_size);
+            ret = bdrv_co_pread(bs->file, data_offset, s->cluster_size, buf, 0);
             if (ret < 0) {
                 goto finish;
             }
@@ -347,8 +338,9 @@ finish:
     return ret;
 }
 
-static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs,
-                                    Qcow2Bitmap *bm, Error **errp)
+static coroutine_fn GRAPH_RDLOCK
+BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs,
+                             Qcow2Bitmap *bm, Error **errp)
 {
     int ret;
     uint64_t *bitmap_table = NULL;
@@ -560,8 +552,9 @@ static uint32_t bitmap_list_count(Qcow2BitmapList *bm_list)
  * Get bitmap list from qcow2 image. Actually reads bitmap directory,
  * checks it and convert to bitmap list.
  */
-static Qcow2BitmapList *bitmap_list_load(BlockDriverState *bs, uint64_t offset,
-                                         uint64_t size, Error **errp)
+static Qcow2BitmapList * GRAPH_RDLOCK
+bitmap_list_load(BlockDriverState *bs, uint64_t offset, uint64_t size,
+                 Error **errp)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -587,7 +580,7 @@ static Qcow2BitmapList *bitmap_list_load(BlockDriverState *bs, uint64_t offset,
     }
     dir_end = dir + size;
 
-    ret = bdrv_pread(bs->file, offset, dir, size);
+    ret = bdrv_pread(bs->file, offset, size, dir, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Failed to read bitmap directory");
         goto fail;
@@ -659,9 +652,10 @@ fail:
     return NULL;
 }
 
-int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                                  void **refcount_table,
-                                  int64_t *refcount_table_size)
+int coroutine_fn
+qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+                              void **refcount_table,
+                              int64_t *refcount_table_size)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -739,8 +733,9 @@ out:
  * Store bitmap list to qcow2 image as a bitmap directory.
  * Everything is checked.
  */
-static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
-                             uint64_t *offset, uint64_t *size, bool in_place)
+static int GRAPH_RDLOCK
+bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
+                  uint64_t *offset, uint64_t *size, bool in_place)
 {
     int ret;
     uint8_t *dir;
@@ -799,10 +794,10 @@ static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
         }
     }
 
-    /* Actually, even in in-place case ignoring QCOW2_OL_BITMAP_DIRECTORY is not
-     * necessary, because we drop QCOW2_AUTOCLEAR_BITMAPS when updating bitmap
-     * directory in-place (actually, turn-off the extension), which is checked
-     * in qcow2_check_metadata_overlap() */
+    /* Actually, even in the in-place case ignoring QCOW2_OL_BITMAP_DIRECTORY
+     * is not necessary, because we drop QCOW2_AUTOCLEAR_BITMAPS when updating
+     * bitmap directory in-place (actually, turn-off the extension), which is
+     * checked in qcow2_check_metadata_overlap() */
     ret = qcow2_pre_write_overlap_check(
             bs, in_place ? QCOW2_OL_BITMAP_DIRECTORY : 0, dir_offset, dir_size,
             false);
@@ -810,7 +805,7 @@ static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
         goto fail;
     }
 
-    ret = bdrv_pwrite(bs->file, dir_offset, dir, dir_size);
+    ret = bdrv_pwrite(bs->file, dir_offset, dir_size, dir, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -838,8 +833,9 @@ fail:
  * Bitmap List end
  */
 
-static int update_ext_header_and_dir_in_place(BlockDriverState *bs,
-                                              Qcow2BitmapList *bm_list)
+static int GRAPH_RDLOCK
+update_ext_header_and_dir_in_place(BlockDriverState *bs,
+                                   Qcow2BitmapList *bm_list)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret;
@@ -886,8 +882,8 @@ static int update_ext_header_and_dir_in_place(BlockDriverState *bs,
      */
 }
 
-static int update_ext_header_and_dir(BlockDriverState *bs,
-                                     Qcow2BitmapList *bm_list)
+static int GRAPH_RDLOCK
+update_ext_header_and_dir(BlockDriverState *bs, Qcow2BitmapList *bm_list)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret;
@@ -962,25 +958,28 @@ static void set_readonly_helper(gpointer bitmap, gpointer value)
     bdrv_dirty_bitmap_set_readonly(bitmap, (bool)value);
 }
 
-/* qcow2_load_dirty_bitmaps()
- * Return value is a hint for caller: true means that the Qcow2 header was
- * updated. (false doesn't mean that the header should be updated by the
- * caller, it just means that updating was not needed or the image cannot be
- * written to).
- * On failure the function returns false.
+/*
+ * Return true on success, false on failure.
+ * If header_updated is not NULL then it is set appropriately regardless of
+ * the return value.
  */
-bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp)
+bool coroutine_fn
+qcow2_load_dirty_bitmaps(BlockDriverState *bs,
+                         bool *header_updated, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     Qcow2BitmapList *bm_list;
     Qcow2Bitmap *bm;
     GSList *created_dirty_bitmaps = NULL;
-    bool header_updated = false;
     bool needs_update = false;
 
+    if (header_updated) {
+        *header_updated = false;
+    }
+
     if (s->nb_bitmaps == 0) {
         /* No bitmaps - nothing to do */
-        return false;
+        return true;
     }
 
     bm_list = bitmap_list_load(bs, s->bitmap_directory_offset,
@@ -1036,7 +1035,9 @@ bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp)
             error_setg_errno(errp, -ret, "Can't update bitmap directory");
             goto fail;
         }
-        header_updated = true;
+        if (header_updated) {
+            *header_updated = true;
+        }
     }
 
     if (!can_write(bs)) {
@@ -1047,7 +1048,7 @@ bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp)
     g_slist_free(created_dirty_bitmaps);
     bitmap_list_free(bm_list);
 
-    return header_updated;
+    return true;
 
 fail:
     g_slist_foreach(created_dirty_bitmaps, release_dirty_bitmap_helper, bs);
@@ -1061,7 +1062,7 @@ fail:
 static Qcow2BitmapInfoFlagsList *get_bitmap_info_flags(uint32_t flags)
 {
     Qcow2BitmapInfoFlagsList *list = NULL;
-    Qcow2BitmapInfoFlagsList **plist = &list;
+    Qcow2BitmapInfoFlagsList **tail = &list;
     int i;
 
     static const struct {
@@ -1076,11 +1077,7 @@ static Qcow2BitmapInfoFlagsList *get_bitmap_info_flags(uint32_t flags)
 
     for (i = 0; i < map_size; ++i) {
         if (flags & map[i].bme) {
-            Qcow2BitmapInfoFlagsList *entry =
-                g_new0(Qcow2BitmapInfoFlagsList, 1);
-            entry->value = map[i].info;
-            *plist = entry;
-            plist = &entry->next;
+            QAPI_LIST_APPEND(tail, map[i].info);
             flags &= ~map[i].bme;
         }
     }
@@ -1093,44 +1090,43 @@ static Qcow2BitmapInfoFlagsList *get_bitmap_info_flags(uint32_t flags)
 /*
  * qcow2_get_bitmap_info_list()
  * Returns a list of QCOW2 bitmap details.
- * In case of no bitmaps, the function returns NULL and
- * the @errp parameter is not set.
- * When bitmap information can not be obtained, the function returns
- * NULL and the @errp parameter is set.
+ * On success return true with info_list set (note, that if there are no
+ * bitmaps, info_list is set to NULL).
+ * On failure return false with errp set.
  */
-Qcow2BitmapInfoList *qcow2_get_bitmap_info_list(BlockDriverState *bs,
-                                                Error **errp)
+bool qcow2_get_bitmap_info_list(BlockDriverState *bs,
+                                Qcow2BitmapInfoList **info_list, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     Qcow2BitmapList *bm_list;
     Qcow2Bitmap *bm;
-    Qcow2BitmapInfoList *list = NULL;
-    Qcow2BitmapInfoList **plist = &list;
+    Qcow2BitmapInfoList **tail;
 
     if (s->nb_bitmaps == 0) {
-        return NULL;
+        *info_list = NULL;
+        return true;
     }
 
     bm_list = bitmap_list_load(bs, s->bitmap_directory_offset,
                                s->bitmap_directory_size, errp);
-    if (bm_list == NULL) {
-        return NULL;
+    if (!bm_list) {
+        return false;
     }
 
+    *info_list = NULL;
+    tail = info_list;
+
     QSIMPLEQ_FOREACH(bm, bm_list, entry) {
         Qcow2BitmapInfo *info = g_new0(Qcow2BitmapInfo, 1);
-        Qcow2BitmapInfoList *obj = g_new0(Qcow2BitmapInfoList, 1);
         info->granularity = 1U << bm->granularity_bits;
         info->name = g_strdup(bm->name);
         info->flags = get_bitmap_info_flags(bm->flags & ~BME_RESERVED_FLAGS);
-        obj->value = info;
-        *plist = obj;
-        plist = &obj->next;
+        QAPI_LIST_APPEND(tail, info);
     }
 
     bitmap_list_free(bm_list);
 
-    return list;
+    return true;
 }
 
 int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp)
@@ -1221,7 +1217,7 @@ int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp)
         }
     }
 
-    g_slist_foreach(ro_dirty_bitmaps, set_readonly_helper, false);
+    g_slist_foreach(ro_dirty_bitmaps, set_readonly_helper, (gpointer)false);
     ret = 0;
 
 out:
@@ -1232,7 +1228,7 @@ out:
 }
 
 /* Checks to see if it's safe to resize bitmaps */
-int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp)
+int coroutine_fn qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     Qcow2BitmapList *bm_list;
@@ -1280,9 +1276,9 @@ out:
 /* store_bitmap_data()
  * Store bitmap to image, filling bitmap table accordingly.
  */
-static uint64_t *store_bitmap_data(BlockDriverState *bs,
-                                   BdrvDirtyBitmap *bitmap,
-                                   uint32_t *bitmap_table_size, Error **errp)
+static uint64_t * GRAPH_RDLOCK
+store_bitmap_data(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                  uint32_t *bitmap_table_size, Error **errp)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -1310,7 +1306,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
     }
 
     buf = g_malloc(s->cluster_size);
-    limit = bytes_covered_by_bitmap_cluster(s, bitmap);
+    limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap);
     assert(DIV_ROUND_UP(bm_size, limit) == tb_size);
 
     offset = 0;
@@ -1352,7 +1348,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
             goto fail;
         }
 
-        ret = bdrv_pwrite(bs->file, off, buf, s->cluster_size);
+        ret = bdrv_pwrite(bs->file, off, s->cluster_size, buf, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file",
                              bm_name);
@@ -1379,7 +1375,8 @@ fail:
  * Store bm->dirty_bitmap to qcow2.
  * Set bm->table_offset and bm->table_size accordingly.
  */
-static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp)
+static int GRAPH_RDLOCK
+store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp)
 {
     int ret;
     uint64_t *tb;
@@ -1414,9 +1411,10 @@ static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp)
         goto fail;
     }
 
-    bitmap_table_to_be(tb, tb_size);
-    ret = bdrv_pwrite(bs->file, tb_offset, tb, tb_size * sizeof(tb[0]));
+    bitmap_table_bswap_be(tb, tb_size);
+    ret = bdrv_pwrite(bs->file, tb_offset, tb_size * sizeof(tb[0]), tb, 0);
     if (ret < 0) {
+        bitmap_table_bswap_be(tb, tb_size);
         error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file",
                          bm_name);
         goto fail;
@@ -1532,9 +1530,10 @@ out:
  * readonly to begin with, and whether we opened directly or reopened to that
  * state shouldn't matter for the state we get afterward.
  */
-void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs,
+bool qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs,
                                           bool release_stored, Error **errp)
 {
+    ERRP_GUARD();
     BdrvDirtyBitmap *bitmap;
     BDRVQcow2State *s = bs->opaque;
     uint32_t new_nb_bitmaps = s->nb_bitmaps;
@@ -1554,7 +1553,7 @@ void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs,
         bm_list = bitmap_list_load(bs, s->bitmap_directory_offset,
                                    s->bitmap_directory_size, errp);
         if (bm_list == NULL) {
-            return;
+            return false;
         }
     }
 
@@ -1562,7 +1561,6 @@ void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs,
     FOR_EACH_DIRTY_BITMAP(bs, bitmap) {
         const char *name = bdrv_dirty_bitmap_name(bitmap);
         uint32_t granularity = bdrv_dirty_bitmap_granularity(bitmap);
-        Qcow2Bitmap *bm;
 
         if (!bdrv_dirty_bitmap_get_persistence(bitmap) ||
             bdrv_dirty_bitmap_inconsistent(bitmap)) {
@@ -1632,7 +1630,7 @@ void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs,
 
     /* allocate clusters and store bitmaps */
     QSIMPLEQ_FOREACH(bm, bm_list, entry) {
-        BdrvDirtyBitmap *bitmap = bm->dirty_bitmap;
+        bitmap = bm->dirty_bitmap;
 
         if (bitmap == NULL || bdrv_dirty_bitmap_readonly(bitmap)) {
             continue;
@@ -1669,7 +1667,7 @@ success:
     }
 
     bitmap_list_free(bm_list);
-    return;
+    return true;
 
 fail:
     QSIMPLEQ_FOREACH(bm, bm_list, entry) {
@@ -1687,16 +1685,14 @@ fail:
     }
 
     bitmap_list_free(bm_list);
+    return false;
 }
 
 int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp)
 {
     BdrvDirtyBitmap *bitmap;
-    Error *local_err = NULL;
 
-    qcow2_store_persistent_dirty_bitmaps(bs, false, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
+    if (!qcow2_store_persistent_dirty_bitmaps(bs, false, errp)) {
         return -EINVAL;
     }
 
index 7444b9c4ab03734556a662f4659cd0f68c165c2d..23d9588b08e83c900609e17f3451b347cf7ba4a2 100644 (file)
@@ -23,6 +23,8 @@
  */
 
 #include "qemu/osdep.h"
+#include "block/block-io.h"
+#include "qemu/memalign.h"
 #include "qcow2.h"
 #include "trace.h"
 
@@ -74,7 +76,7 @@ static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables)
 /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
 #ifdef CONFIG_LINUX
     void *t = qcow2_cache_get_table_addr(c, i);
-    int align = qemu_real_host_page_size;
+    int align = qemu_real_host_page_size();
     size_t mem_size = (size_t) c->table_size * num_tables;
     size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
     size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
@@ -161,7 +163,8 @@ int qcow2_cache_destroy(Qcow2Cache *c)
     return 0;
 }
 
-static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
+static int GRAPH_RDLOCK
+qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
 {
     int ret;
 
@@ -176,7 +179,8 @@ static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
     return 0;
 }
 
-static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
+static int GRAPH_RDLOCK
+qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret = 0;
@@ -222,8 +226,8 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
     }
 
-    ret = bdrv_pwrite(bs->file, c->entries[i].offset,
-                      qcow2_cache_get_table_addr(c, i), c->table_size);
+    ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->table_size,
+                      qcow2_cache_get_table_addr(c, i), 0);
     if (ret < 0) {
         return ret;
     }
@@ -316,8 +320,9 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
     return 0;
 }
 
-static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
-    uint64_t offset, void **table, bool read_from_disk)
+static int GRAPH_RDLOCK
+qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+                   void **table, bool read_from_disk)
 {
     BDRVQcow2State *s = bs->opaque;
     int i;
@@ -378,9 +383,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
         }
 
-        ret = bdrv_pread(bs->file, offset,
-                         qcow2_cache_get_table_addr(c, i),
-                         c->table_size);
+        ret = bdrv_pread(bs->file, offset, c->table_size,
+                         qcow2_cache_get_table_addr(c, i), 0);
         if (ret < 0) {
             return ret;
         }
index bd0597842f320e7bc635fa67a3ed92e9c2c08621..ce8c0076b3b50e799238b55d676703539b17999b 100644 (file)
 #include "qemu/osdep.h"
 #include <zlib.h>
 
+#include "block/block-io.h"
 #include "qapi/error.h"
 #include "qcow2.h"
 #include "qemu/bswap.h"
+#include "qemu/memalign.h"
 #include "trace.h"
 
-int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size)
+int coroutine_fn qcow2_shrink_l1_table(BlockDriverState *bs,
+                                       uint64_t exact_size)
 {
     BDRVQcow2State *s = bs->opaque;
     int new_l1_size, i, ret;
@@ -45,20 +48,20 @@ int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size)
     fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size);
 #endif
 
-    BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE);
-    ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset +
-                                       new_l1_size * L1E_SIZE,
-                             (s->l1_size - new_l1_size) * L1E_SIZE, 0);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE);
+    ret = bdrv_co_pwrite_zeroes(bs->file,
+                                s->l1_table_offset + new_l1_size * L1E_SIZE,
+                                (s->l1_size - new_l1_size) * L1E_SIZE, 0);
     if (ret < 0) {
         goto fail;
     }
 
-    ret = bdrv_flush(bs->file->bs);
+    ret = bdrv_co_flush(bs->file->bs);
     if (ret < 0) {
         goto fail;
     }
 
-    BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS);
     for (i = s->l1_size - 1; i > new_l1_size - 1; i--) {
         if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) {
             continue;
@@ -158,8 +161,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
     for(i = 0; i < s->l1_size; i++)
         new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
-    ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset,
-                           new_l1_table, new_l1_size2);
+    ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_size2,
+                           new_l1_table, 0);
     if (ret < 0)
         goto fail;
     for(i = 0; i < s->l1_size; i++)
@@ -170,7 +173,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
     stl_be_p(data, new_l1_size);
     stq_be_p(data + 4, new_l1_table_offset);
     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size),
-                           data, sizeof(data));
+                           sizeof(data), data, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -204,8 +207,9 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
  * the cache is used; otherwise the L2 slice is loaded from the image
  * file.
  */
-static int l2_load(BlockDriverState *bs, uint64_t offset,
-                   uint64_t l2_offset, uint64_t **l2_slice)
+static int GRAPH_RDLOCK
+l2_load(BlockDriverState *bs, uint64_t offset,
+        uint64_t l2_offset, uint64_t **l2_slice)
 {
     BDRVQcow2State *s = bs->opaque;
     int start_of_slice = l2_entry_size(s) *
@@ -248,7 +252,7 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
     BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
     ret = bdrv_pwrite_sync(bs->file,
                            s->l1_table_offset + L1E_SIZE * l1_start_index,
-                           buf, bufsize);
+                           bufsize, buf, 0);
     if (ret < 0) {
         return ret;
     }
@@ -266,7 +270,7 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
  *
  */
 
-static int l2_allocate(BlockDriverState *bs, int l1_index)
+static int GRAPH_RDLOCK l2_allocate(BlockDriverState *bs, int l1_index)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t old_l2_offset;
@@ -387,11 +391,10 @@ fail:
  * If the L2 entry is invalid return -errno and set @type to
  * QCOW2_SUBCLUSTER_INVALID.
  */
-static int qcow2_get_subcluster_range_type(BlockDriverState *bs,
-                                           uint64_t l2_entry,
-                                           uint64_t l2_bitmap,
-                                           unsigned sc_from,
-                                           QCow2SubclusterType *type)
+static int GRAPH_RDLOCK
+qcow2_get_subcluster_range_type(BlockDriverState *bs, uint64_t l2_entry,
+                                uint64_t l2_bitmap, unsigned sc_from,
+                                QCow2SubclusterType *type)
 {
     BDRVQcow2State *s = bs->opaque;
     uint32_t val;
@@ -438,9 +441,10 @@ static int qcow2_get_subcluster_range_type(BlockDriverState *bs,
  * On failure return -errno and update @l2_index to point to the
  * invalid entry.
  */
-static int count_contiguous_subclusters(BlockDriverState *bs, int nb_clusters,
-                                        unsigned sc_index, uint64_t *l2_slice,
-                                        unsigned *l2_index)
+static int GRAPH_RDLOCK
+count_contiguous_subclusters(BlockDriverState *bs, int nb_clusters,
+                             unsigned sc_index, uint64_t *l2_slice,
+                             unsigned *l2_index)
 {
     BDRVQcow2State *s = bs->opaque;
     int i, count = 0;
@@ -488,10 +492,9 @@ static int count_contiguous_subclusters(BlockDriverState *bs, int nb_clusters,
     return count;
 }
 
-static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
-                                            uint64_t src_cluster_offset,
-                                            unsigned offset_in_cluster,
-                                            QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+do_perform_cow_read(BlockDriverState *bs, uint64_t src_cluster_offset,
+                    unsigned offset_in_cluster, QEMUIOVector *qiov)
 {
     int ret;
 
@@ -499,13 +502,26 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
         return 0;
     }
 
-    BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_COW_READ);
 
     if (!bs->drv) {
         return -ENOMEDIUM;
     }
 
-    /* Call .bdrv_co_readv() directly instead of using the public block-layer
+    /*
+     * We never deal with requests that don't satisfy
+     * bdrv_check_qiov_request(), and aligning requests to clusters never
+     * breaks this condition. So, do some assertions before calling
+     * bs->drv->bdrv_co_preadv_part() which has int64_t arguments.
+     */
+    assert(src_cluster_offset <= INT64_MAX);
+    assert(src_cluster_offset + offset_in_cluster <= INT64_MAX);
+    /* Cast qiov->size to uint64_t to silence a compiler warning on -m32 */
+    assert((uint64_t)qiov->size <= INT64_MAX);
+    bdrv_check_qiov_request(src_cluster_offset + offset_in_cluster, qiov->size,
+                            qiov, 0, &error_abort);
+    /*
+     * Call .bdrv_co_readv() directly instead of using the public block-layer
      * interface.  This avoids double I/O throttling and request tracking,
      * which can lead to deadlock when block layer copy-on-read is enabled.
      */
@@ -519,10 +535,9 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
     return 0;
 }
 
-static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
-                                             uint64_t cluster_offset,
-                                             unsigned offset_in_cluster,
-                                             QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+do_perform_cow_write(BlockDriverState *bs, uint64_t cluster_offset,
+                     unsigned offset_in_cluster, QEMUIOVector *qiov)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret;
@@ -537,7 +552,7 @@ static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
         return ret;
     }
 
-    BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_COW_WRITE);
     ret = bdrv_co_pwritev(s->data_file, cluster_offset + offset_in_cluster,
                           qiov->size, qiov, 0);
     if (ret < 0) {
@@ -556,8 +571,7 @@ static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
  * offset needs to be aligned to a cluster boundary.
  *
  * If the cluster is unallocated then *host_offset will be 0.
- * If the cluster is compressed then *host_offset will contain the
- * complete compressed cluster descriptor.
+ * If the cluster is compressed then *host_offset will contain the l2 entry.
  *
  * On entry, *bytes is the maximum number of contiguous bytes starting at
  * offset that we are interested in.
@@ -660,7 +674,7 @@ int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset,
             ret = -EIO;
             goto fail;
         }
-        *host_offset = l2_entry & L2E_COMPRESSED_OFFSET_SIZE_MASK;
+        *host_offset = l2_entry;
         break;
     case QCOW2_SUBCLUSTER_ZERO_PLAIN:
     case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
@@ -738,9 +752,9 @@ fail:
  *
  * Returns 0 on success, -errno in failure case
  */
-static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
-                             uint64_t **new_l2_slice,
-                             int *new_l2_index)
+static int GRAPH_RDLOCK
+get_cluster_table(BlockDriverState *bs, uint64_t offset,
+                  uint64_t **new_l2_slice, int *new_l2_index)
 {
     BDRVQcow2State *s = bs->opaque;
     unsigned int l2_index;
@@ -810,10 +824,9 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
  *
  * Return 0 on success and -errno in error cases
  */
-int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
-                                          uint64_t offset,
-                                          int compressed_size,
-                                          uint64_t *host_offset)
+int coroutine_fn GRAPH_RDLOCK
+qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset,
+                                      int compressed_size, uint64_t *host_offset)
 {
     BDRVQcow2State *s = bs->opaque;
     int l2_index, ret;
@@ -859,7 +872,7 @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 
     /* compressed clusters never have the copied flag */
 
-    BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
     qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
     set_l2_entry(s, l2_slice, l2_index, cluster_offset);
     if (has_subclusters(s)) {
@@ -871,7 +884,8 @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     return 0;
 }
 
-static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
+static int coroutine_fn GRAPH_RDLOCK
+perform_cow(BlockDriverState *bs, QCowL2Meta *m)
 {
     BDRVQcow2State *s = bs->opaque;
     Qcow2COWRegion *start = &m->cow_start;
@@ -978,7 +992,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
         /* NOTE: we have a write_aio blkdebug event here followed by
          * a cow_write one in do_perform_cow_write(), but there's only
          * one single I/O operation */
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+        BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
         ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
     } else {
         /* If there's no guest data then write both COW regions separately */
@@ -1011,7 +1025,8 @@ fail:
     return ret;
 }
 
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
+int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs,
+                                             QCowL2Meta *m)
 {
     BDRVQcow2State *s = bs->opaque;
     int i, j = 0, l2_index, ret;
@@ -1111,7 +1126,7 @@ err:
  * Frees the allocated clusters because the request failed and they won't
  * actually be linked.
  */
-void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
+void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
 {
     BDRVQcow2State *s = bs->opaque;
     if (!has_data_file(bs) && !m->keep_old_clusters) {
@@ -1141,9 +1156,10 @@ void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
  *
  * Returns 0 on success, -errno on failure.
  */
-static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset,
-                             uint64_t guest_offset, unsigned bytes,
-                             uint64_t *l2_slice, QCowL2Meta **m, bool keep_old)
+static int coroutine_fn GRAPH_RDLOCK
+calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset,
+                  uint64_t guest_offset, unsigned bytes, uint64_t *l2_slice,
+                  QCowL2Meta **m, bool keep_old)
 {
     BDRVQcow2State *s = bs->opaque;
     int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset);
@@ -1313,7 +1329,8 @@ static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset,
  * requires a new allocation (that is, if the cluster is unallocated
  * or has refcount > 1 and therefore cannot be written in-place).
  */
-static bool cluster_needs_new_alloc(BlockDriverState *bs, uint64_t l2_entry)
+static bool GRAPH_RDLOCK
+cluster_needs_new_alloc(BlockDriverState *bs, uint64_t l2_entry)
 {
     switch (qcow2_get_cluster_type(bs, l2_entry)) {
     case QCOW2_CLUSTER_NORMAL:
@@ -1344,9 +1361,9 @@ static bool cluster_needs_new_alloc(BlockDriverState *bs, uint64_t l2_entry)
  * allocated and can be overwritten in-place (this includes clusters
  * of type QCOW2_CLUSTER_ZERO_ALLOC).
  */
-static int count_single_write_clusters(BlockDriverState *bs, int nb_clusters,
-                                       uint64_t *l2_slice, int l2_index,
-                                       bool new_alloc)
+static int GRAPH_RDLOCK
+count_single_write_clusters(BlockDriverState *bs, int nb_clusters,
+                            uint64_t *l2_slice, int l2_index, bool new_alloc)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t l2_entry = get_l2_entry(s, l2_slice, l2_index);
@@ -1384,8 +1401,9 @@ static int count_single_write_clusters(BlockDriverState *bs, int nb_clusters,
  *           information on cluster allocation may be invalid now. The caller
  *           must start over anyway, so consider *cur_bytes undefined.
  */
-static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
-    uint64_t *cur_bytes, QCowL2Meta **m)
+static int coroutine_fn handle_dependencies(BlockDriverState *bs,
+                                            uint64_t guest_offset,
+                                            uint64_t *cur_bytes, QCowL2Meta **m)
 {
     BDRVQcow2State *s = bs->opaque;
     QCowL2Meta *old_alloc;
@@ -1400,29 +1418,47 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
 
         if (end <= old_start || start >= old_end) {
             /* No intersection */
+            continue;
+        }
+
+        if (old_alloc->keep_old_clusters &&
+            (end <= l2meta_cow_start(old_alloc) ||
+             start >= l2meta_cow_end(old_alloc)))
+        {
+            /*
+             * Clusters intersect but COW areas don't. And cluster itself is
+             * already allocated. So, there is no actual conflict.
+             */
+            continue;
+        }
+
+        /* Conflict */
+
+        if (start < old_start) {
+            /* Stop at the start of a running allocation */
+            bytes = old_start - start;
         } else {
-            if (start < old_start) {
-                /* Stop at the start of a running allocation */
-                bytes = old_start - start;
-            } else {
-                bytes = 0;
-            }
+            bytes = 0;
+        }
 
-            /* Stop if already an l2meta exists. After yielding, it wouldn't
-             * be valid any more, so we'd have to clean up the old L2Metas
-             * and deal with requests depending on them before starting to
-             * gather new ones. Not worth the trouble. */
-            if (bytes == 0 && *m) {
-                *cur_bytes = 0;
-                return 0;
-            }
+        /*
+         * Stop if an l2meta already exists. After yielding, it wouldn't
+         * be valid any more, so we'd have to clean up the old L2Metas
+         * and deal with requests depending on them before starting to
+         * gather new ones. Not worth the trouble.
+         */
+        if (bytes == 0 && *m) {
+            *cur_bytes = 0;
+            return 0;
+        }
 
-            if (bytes == 0) {
-                /* Wait for the dependency to complete. We need to recheck
-                 * the free/allocated clusters when we continue. */
-                qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
-                return -EAGAIN;
-            }
+        if (bytes == 0) {
+            /*
+             * Wait for the dependency to complete. We need to recheck
+             * the free/allocated clusters when we continue.
+             */
+            qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
+            return -EAGAIN;
         }
     }
 
@@ -1455,8 +1491,9 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
  *
  *  -errno: in error cases
  */
-static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
-    uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
+static int coroutine_fn GRAPH_RDLOCK
+handle_copied(BlockDriverState *bs, uint64_t guest_offset,
+              uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
 {
     BDRVQcow2State *s = bs->opaque;
     int l2_index;
@@ -1564,8 +1601,9 @@ out:
  * function has been waiting for another request and the allocation must be
  * restarted, but the whole request should not be failed.
  */
-static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
-                                   uint64_t *host_offset, uint64_t *nb_clusters)
+static int coroutine_fn GRAPH_RDLOCK
+do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+                        uint64_t *host_offset, uint64_t *nb_clusters)
 {
     BDRVQcow2State *s = bs->opaque;
 
@@ -1620,8 +1658,9 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
  *
  *  -errno: in error cases
  */
-static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
-    uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
+static int coroutine_fn GRAPH_RDLOCK
+handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
+             uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
 {
     BDRVQcow2State *s = bs->opaque;
     int l2_index;
@@ -1741,9 +1780,10 @@ out:
  *
  * Return 0 on success and -errno in error cases
  */
-int qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset,
-                            unsigned int *bytes, uint64_t *host_offset,
-                            QCowL2Meta **m)
+int coroutine_fn qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset,
+                                         unsigned int *bytes,
+                                         uint64_t *host_offset,
+                                         QCowL2Meta **m)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t start, remaining;
@@ -1858,9 +1898,9 @@ again:
  * all clusters in the same L2 slice) and returns the number of discarded
  * clusters.
  */
-static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
-                               uint64_t nb_clusters,
-                               enum qcow2_discard_type type, bool full_discard)
+static int GRAPH_RDLOCK
+discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, uint64_t nb_clusters,
+                    enum qcow2_discard_type type, bool full_discard)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t *l2_slice;
@@ -1884,6 +1924,10 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
         uint64_t new_l2_bitmap = old_l2_bitmap;
         QCow2ClusterType cluster_type =
             qcow2_get_cluster_type(bs, old_l2_entry);
+        bool keep_reference = (cluster_type != QCOW2_CLUSTER_COMPRESSED) &&
+                              !full_discard &&
+                              (s->discard_no_unref &&
+                               type == QCOW2_DISCARD_REQUEST);
 
         /*
          * If full_discard is true, the cluster should not read back as zeroes,
@@ -1902,10 +1946,22 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
             new_l2_entry = new_l2_bitmap = 0;
         } else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) {
             if (has_subclusters(s)) {
-                new_l2_entry = 0;
+                if (keep_reference) {
+                    new_l2_entry = old_l2_entry;
+                } else {
+                    new_l2_entry = 0;
+                }
                 new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
             } else {
-                new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0;
+                if (s->qcow_version >= 3) {
+                    if (keep_reference) {
+                        new_l2_entry |= QCOW_OFLAG_ZERO;
+                    } else {
+                        new_l2_entry = QCOW_OFLAG_ZERO;
+                    }
+                } else {
+                    new_l2_entry = 0;
+                }
             }
         }
 
@@ -1919,8 +1975,16 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
         if (has_subclusters(s)) {
             set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
         }
-        /* Then decrease the refcount */
-        qcow2_free_any_cluster(bs, old_l2_entry, type);
+        if (!keep_reference) {
+            /* Then decrease the refcount */
+            qcow2_free_any_cluster(bs, old_l2_entry, type);
+        } else if (s->discard_passthrough[type] &&
+                   (cluster_type == QCOW2_CLUSTER_NORMAL ||
+                    cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) {
+            /* If we keep the reference, pass on the discard still */
+            bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
+                          s->cluster_size);
+        }
     }
 
     qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
@@ -1973,8 +2037,9 @@ fail:
  * all clusters in the same L2 slice) and returns the number of zeroed
  * clusters.
  */
-static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
-                            uint64_t nb_clusters, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
+                 uint64_t nb_clusters, int flags)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t *l2_slice;
@@ -1997,9 +2062,15 @@ static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
         QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry);
         bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) ||
             ((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type));
-        uint64_t new_l2_entry = unmap ? 0 : old_l2_entry;
+        bool keep_reference =
+            (s->discard_no_unref && type != QCOW2_CLUSTER_COMPRESSED);
+        uint64_t new_l2_entry = old_l2_entry;
         uint64_t new_l2_bitmap = old_l2_bitmap;
 
+        if (unmap && !keep_reference) {
+            new_l2_entry = 0;
+        }
+
         if (has_subclusters(s)) {
             new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
         } else {
@@ -2017,9 +2088,17 @@ static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
             set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
         }
 
-        /* Then decrease the refcount */
         if (unmap) {
-            qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
+            if (!keep_reference) {
+                /* Then decrease the refcount */
+                qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
+            } else if (s->discard_passthrough[QCOW2_DISCARD_REQUEST] &&
+                       (type == QCOW2_CLUSTER_NORMAL ||
+                        type == QCOW2_CLUSTER_ZERO_ALLOC)) {
+                /* If we keep the reference, pass on the discard still */
+                bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
+                            s->cluster_size);
+            }
         }
     }
 
@@ -2028,8 +2107,9 @@ static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
     return nb_clusters;
 }
 
-static int zero_l2_subclusters(BlockDriverState *bs, uint64_t offset,
-                               unsigned nb_subclusters)
+static int coroutine_fn GRAPH_RDLOCK
+zero_l2_subclusters(BlockDriverState *bs, uint64_t offset,
+                    unsigned nb_subclusters)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t *l2_slice;
@@ -2074,8 +2154,8 @@ out:
     return ret;
 }
 
-int qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset,
-                             uint64_t bytes, int flags)
+int coroutine_fn qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset,
+                                          uint64_t bytes, int flags)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t end_offset = offset + bytes;
@@ -2165,11 +2245,12 @@ fail:
  * status_cb(). l1_entries contains the total number of L1 entries and
  * *visited_l1_entries counts all visited L1 entries.
  */
-static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
-                                      int l1_size, int64_t *visited_l1_entries,
-                                      int64_t l1_entries,
-                                      BlockDriverAmendStatusCB *status_cb,
-                                      void *cb_opaque)
+static int GRAPH_RDLOCK
+expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
+                           int l1_size, int64_t *visited_l1_entries,
+                           int64_t l1_entries,
+                           BlockDriverAmendStatusCB *status_cb,
+                           void *cb_opaque)
 {
     BDRVQcow2State *s = bs->opaque;
     bool is_active_l1 = (l1_table == s->l1_table);
@@ -2229,7 +2310,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                                       (void **)&l2_slice);
             } else {
                 /* load inactive L2 tables from disk */
-                ret = bdrv_pread(bs->file, slice_offset, l2_slice, slice_size2);
+                ret = bdrv_pread(bs->file, slice_offset, slice_size2,
+                                 l2_slice, 0);
             }
             if (ret < 0) {
                 goto fail;
@@ -2345,8 +2427,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                         goto fail;
                     }
 
-                    ret = bdrv_pwrite(bs->file, slice_offset,
-                                      l2_slice, slice_size2);
+                    ret = bdrv_pwrite(bs->file, slice_offset, slice_size2,
+                                      l2_slice, 0);
                     if (ret < 0) {
                         goto fail;
                     }
@@ -2439,8 +2521,8 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
 
         l1_table = new_l1_table;
 
-        ret = bdrv_pread(bs->file, s->snapshots[i].l1_table_offset,
-                         l1_table, l1_size2);
+        ret = bdrv_pread(bs->file, s->snapshots[i].l1_table_offset, l1_size2,
+                         l1_table, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -2463,3 +2545,18 @@ fail:
     g_free(l1_table);
     return ret;
 }
+
+void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
+                                     uint64_t *coffset, int *csize)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int nb_csectors;
+
+    assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED);
+
+    *coffset = l2_entry & s->cluster_offset_mask;
+
+    nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1;
+    *csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
+        (*coffset & (QCOW2_COMPRESSED_SECTOR_SIZE - 1));
+}
index 8e649b008e8cd6b5f36f35e156e8d4a63e0b09b6..0266542ceea26e0829c9757f5d7a1ac067a714ae 100644 (file)
  */
 
 #include "qemu/osdep.h"
+#include "block/block-io.h"
 #include "qapi/error.h"
 #include "qcow2.h"
 #include "qemu/range.h"
 #include "qemu/bswap.h"
 #include "qemu/cutils.h"
+#include "qemu/memalign.h"
 #include "trace.h"
 
 static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
                                     uint64_t max);
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
-                            int64_t offset, int64_t length, uint64_t addend,
-                            bool decrease, enum qcow2_discard_type type);
+
+G_GNUC_WARN_UNUSED_RESULT
+static int update_refcount(BlockDriverState *bs,
+                           int64_t offset, int64_t length, uint64_t addend,
+                           bool decrease, enum qcow2_discard_type type);
 
 static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index);
 static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index);
@@ -94,7 +98,7 @@ static void update_max_refcount_table_index(BDRVQcow2State *s)
     s->max_refcount_table_index = i;
 }
 
-int qcow2_refcount_init(BlockDriverState *bs)
+int coroutine_fn qcow2_refcount_init(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     unsigned int refcount_table_size2, i;
@@ -114,9 +118,9 @@ int qcow2_refcount_init(BlockDriverState *bs)
             ret = -ENOMEM;
             goto fail;
         }
-        BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
-        ret = bdrv_pread(bs->file, s->refcount_table_offset,
-                         s->refcount_table, refcount_table_size2);
+        BLKDBG_CO_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
+        ret = bdrv_co_pread(bs->file, s->refcount_table_offset,
+                            refcount_table_size2, s->refcount_table, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -225,9 +229,9 @@ static void set_refcount_ro6(void *refcount_array, uint64_t index,
 }
 
 
-static int load_refcount_block(BlockDriverState *bs,
-                               int64_t refcount_block_offset,
-                               void **refcount_block)
+static int GRAPH_RDLOCK
+load_refcount_block(BlockDriverState *bs, int64_t refcount_block_offset,
+                    void **refcount_block)
 {
     BDRVQcow2State *s = bs->opaque;
 
@@ -298,8 +302,9 @@ static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
  *
  * Returns 0 on success or -errno in error case
  */
-static int alloc_refcount_block(BlockDriverState *bs,
-                                int64_t cluster_index, void **refcount_block)
+static int GRAPH_RDLOCK
+alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index,
+                     void **refcount_block)
 {
     BDRVQcow2State *s = bs->opaque;
     unsigned int refcount_table_index;
@@ -436,7 +441,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
         ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
                                refcount_table_index * REFTABLE_ENTRY_SIZE,
-            &data64, sizeof(data64));
+            sizeof(data64), &data64, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -681,8 +686,8 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
-    ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
-        table_size * REFTABLE_ENTRY_SIZE);
+    ret = bdrv_pwrite_sync(bs->file, table_offset,
+                           table_size * REFTABLE_ENTRY_SIZE, new_table, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -701,7 +706,7 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
     ret = bdrv_pwrite_sync(bs->file,
                            offsetof(QCowHeader, refcount_table_offset),
-                           &data, sizeof(data));
+                           sizeof(data), &data, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -802,12 +807,9 @@ found:
 /* XXX: cache several refcount block clusters ? */
 /* @addend is the absolute value of the addend; if @decrease is set, @addend
  * will be subtracted from the current refcount, otherwise it will be added */
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
-                                                   int64_t offset,
-                                                   int64_t length,
-                                                   uint64_t addend,
-                                                   bool decrease,
-                                                   enum qcow2_discard_type type)
+static int GRAPH_RDLOCK
+update_refcount(BlockDriverState *bs, int64_t offset, int64_t length,
+                uint64_t addend, bool decrease, enum qcow2_discard_type type)
 {
     BDRVQcow2State *s = bs->opaque;
     int64_t start, last, cluster_offset;
@@ -963,8 +965,8 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs,
 
 
 /* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
-                                    uint64_t max)
+static int64_t GRAPH_RDLOCK
+alloc_clusters_noref(BlockDriverState *bs, uint64_t size, uint64_t max)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t i, nb_clusters, refcount;
@@ -1026,8 +1028,8 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
     return offset;
 }
 
-int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
-                                int64_t nb_clusters)
+int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+                                             int64_t nb_clusters)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t cluster_index, refcount;
@@ -1065,14 +1067,14 @@ int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
 
 /* only used to allocate compressed sectors. We try to allocate
    contiguous sectors. size must be <= cluster_size */
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
+int64_t coroutine_fn GRAPH_RDLOCK qcow2_alloc_bytes(BlockDriverState *bs, int size)
 {
     BDRVQcow2State *s = bs->opaque;
     int64_t offset;
     size_t free_in_cluster;
     int ret;
 
-    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
     assert(size > 0 && size <= s->cluster_size);
     assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset));
 
@@ -1177,11 +1179,11 @@ void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
     switch (ctype) {
     case QCOW2_CLUSTER_COMPRESSED:
         {
-            int64_t offset = (l2_entry & s->cluster_offset_mask)
-                & QCOW2_COMPRESSED_SECTOR_MASK;
-            int size = QCOW2_COMPRESSED_SECTOR_SIZE *
-                (((l2_entry >> s->csize_shift) & s->csize_mask) + 1);
-            qcow2_free_clusters(bs, offset, size, type);
+            uint64_t coffset;
+            int csize;
+
+            qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
+            qcow2_free_clusters(bs, coffset, csize, type);
         }
         break;
     case QCOW2_CLUSTER_NORMAL:
@@ -1203,7 +1205,7 @@ void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
     }
 }
 
-int coroutine_fn qcow2_write_caches(BlockDriverState *bs)
+int qcow2_write_caches(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret;
@@ -1223,7 +1225,7 @@ int coroutine_fn qcow2_write_caches(BlockDriverState *bs)
     return 0;
 }
 
-int coroutine_fn qcow2_flush_caches(BlockDriverState *bs)
+int qcow2_flush_caches(BlockDriverState *bs)
 {
     int ret = qcow2_write_caches(bs);
     if (ret < 0) {
@@ -1247,7 +1249,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     bool l1_allocated = false;
     int64_t old_entry, old_l2_offset;
     unsigned slice, slice_size2, n_slices;
-    int i, j, l1_modified = 0, nb_csectors;
+    int i, j, l1_modified = 0;
     int ret;
 
     assert(addend >= -1 && addend <= 1);
@@ -1271,7 +1273,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
         }
         l1_allocated = true;
 
-        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+        ret = bdrv_pread(bs->file, l1_table_offset, l1_size2, l1_table, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -1318,14 +1320,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 
                     switch (qcow2_get_cluster_type(bs, entry)) {
                     case QCOW2_CLUSTER_COMPRESSED:
-                        nb_csectors = ((entry >> s->csize_shift) &
-                                       s->csize_mask) + 1;
                         if (addend != 0) {
-                            uint64_t coffset = (entry & s->cluster_offset_mask)
-                                & QCOW2_COMPRESSED_SECTOR_MASK;
+                            uint64_t coffset;
+                            int csize;
+
+                            qcow2_parse_compressed_l2_entry(bs, entry,
+                                                            &coffset, &csize);
                             ret = update_refcount(
-                                bs, coffset,
-                                nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE,
+                                bs, coffset, csize,
                                 abs(addend), addend < 0,
                                 QCOW2_DISCARD_SNAPSHOT);
                             if (ret < 0) {
@@ -1432,8 +1434,8 @@ fail:
             cpu_to_be64s(&l1_table[i]);
         }
 
-        ret = bdrv_pwrite_sync(bs->file, l1_table_offset,
-                               l1_table, l1_size2);
+        ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_size2, l1_table,
+                               0);
 
         for (i = 0; i < l1_size; i++) {
             be64_to_cpus(&l1_table[i]);
@@ -1520,10 +1522,11 @@ static int realloc_refcount_array(BDRVQcow2State *s, void **array,
  *
  * Modifies the number of errors in res.
  */
-int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
-                             void **refcount_table,
-                             int64_t *refcount_table_size,
-                             int64_t offset, int64_t size)
+int coroutine_fn GRAPH_RDLOCK
+qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
+                         void **refcount_table,
+                         int64_t *refcount_table_size,
+                         int64_t offset, int64_t size)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t start, last, cluster_offset, k, refcount;
@@ -1534,7 +1537,7 @@ int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
         return 0;
     }
 
-    file_len = bdrv_getlength(bs->file->bs);
+    file_len = bdrv_co_getlength(bs->file->bs);
     if (file_len < 0) {
         return file_len;
     }
@@ -1587,6 +1590,67 @@ enum {
     CHECK_FRAG_INFO = 0x2,      /* update BlockFragInfo counters */
 };
 
+/*
+ * Fix L2 entry by making it QCOW2_CLUSTER_ZERO_PLAIN (or making all its present
+ * subclusters QCOW2_SUBCLUSTER_ZERO_PLAIN).
+ *
+ * This function decrements res->corruptions on success, so the caller is
+ * responsible to increment res->corruptions prior to the call.
+ *
+ * On failure in-memory @l2_table may be modified.
+ */
+static int coroutine_fn GRAPH_RDLOCK
+fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
+                     uint64_t l2_offset, uint64_t *l2_table,
+                     int l2_index, bool active,
+                     bool *metadata_overlap)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int ret;
+    int idx = l2_index * (l2_entry_size(s) / sizeof(uint64_t));
+    uint64_t l2e_offset = l2_offset + (uint64_t)l2_index * l2_entry_size(s);
+    int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2;
+
+    if (has_subclusters(s)) {
+        uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, l2_index);
+
+        /* Allocated subclusters become zero */
+        l2_bitmap |= l2_bitmap << 32;
+        l2_bitmap &= QCOW_L2_BITMAP_ALL_ZEROES;
+
+        set_l2_bitmap(s, l2_table, l2_index, l2_bitmap);
+        set_l2_entry(s, l2_table, l2_index, 0);
+    } else {
+        set_l2_entry(s, l2_table, l2_index, QCOW_OFLAG_ZERO);
+    }
+
+    ret = qcow2_pre_write_overlap_check(bs, ign, l2e_offset, l2_entry_size(s),
+                                        false);
+    if (metadata_overlap) {
+        *metadata_overlap = ret < 0;
+    }
+    if (ret < 0) {
+        fprintf(stderr, "ERROR: Overlap check failed\n");
+        goto fail;
+    }
+
+    ret = bdrv_co_pwrite_sync(bs->file, l2e_offset, l2_entry_size(s),
+                              &l2_table[idx], 0);
+    if (ret < 0) {
+        fprintf(stderr, "ERROR: Failed to overwrite L2 "
+                "table entry: %s\n", strerror(-ret));
+        goto fail;
+    }
+
+    res->corruptions--;
+    res->corruptions_fixed++;
+    return 0;
+
+fail:
+    res->check_errors++;
+    return ret;
+}
+
 /*
  * Increases the refcount in the given refcount table for the all clusters
  * referenced in the L2 table. While doing so, performs some checks on L2
@@ -1595,32 +1659,48 @@ enum {
  * Returns the number of errors found by the checks or -errno if an internal
  * error occurred.
  */
-static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
-                              void **refcount_table,
-                              int64_t *refcount_table_size, int64_t l2_offset,
-                              int flags, BdrvCheckMode fix, bool active)
+static int coroutine_fn GRAPH_RDLOCK
+check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
+                   void **refcount_table,
+                   int64_t *refcount_table_size, int64_t l2_offset,
+                   int flags, BdrvCheckMode fix, bool active)
 {
     BDRVQcow2State *s = bs->opaque;
-    uint64_t *l2_table, l2_entry;
+    uint64_t l2_entry, l2_bitmap;
     uint64_t next_contiguous_offset = 0;
-    int i, l2_size, nb_csectors, ret;
+    int i, ret;
+    size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
+    g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
+    bool metadata_overlap;
 
     /* Read L2 table from disk */
-    l2_size = s->l2_size * l2_entry_size(s);
-    l2_table = g_malloc(l2_size);
-
-    ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size);
+    ret = bdrv_co_pread(bs->file, l2_offset, l2_size_bytes, l2_table, 0);
     if (ret < 0) {
         fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
         res->check_errors++;
-        goto fail;
+        return ret;
     }
 
     /* Do the actual checks */
-    for(i = 0; i < s->l2_size; i++) {
+    for (i = 0; i < s->l2_size; i++) {
+        uint64_t coffset;
+        int csize;
+        QCow2ClusterType type;
+
         l2_entry = get_l2_entry(s, l2_table, i);
+        l2_bitmap = get_l2_bitmap(s, l2_table, i);
+        type = qcow2_get_cluster_type(bs, l2_entry);
+
+        if (type != QCOW2_CLUSTER_COMPRESSED) {
+            /* Check reserved bits of Standard Cluster Descriptor */
+            if (l2_entry & L2E_STD_RESERVED_MASK) {
+                fprintf(stderr, "ERROR found l2 entry with reserved bits set: "
+                        "%" PRIx64 "\n", l2_entry);
+                res->corruptions++;
+            }
+        }
 
-        switch (qcow2_get_cluster_type(bs, l2_entry)) {
+        switch (type) {
         case QCOW2_CLUSTER_COMPRESSED:
             /* Compressed clusters don't have QCOW_OFLAG_COPIED */
             if (l2_entry & QCOW_OFLAG_COPIED) {
@@ -1638,23 +1718,28 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
                 break;
             }
 
+            if (l2_bitmap) {
+                fprintf(stderr, "ERROR compressed cluster %d with non-zero "
+                        "subcluster allocation bitmap, entry=0x%" PRIx64 "\n",
+                        i, l2_entry);
+                res->corruptions++;
+                break;
+            }
+
             /* Mark cluster as used */
-            nb_csectors = ((l2_entry >> s->csize_shift) &
-                           s->csize_mask) + 1;
-            l2_entry &= s->cluster_offset_mask;
+            qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
             ret = qcow2_inc_refcounts_imrt(
-                bs, res, refcount_table, refcount_table_size,
-                l2_entry & QCOW2_COMPRESSED_SECTOR_MASK,
-                nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE);
+                bs, res, refcount_table, refcount_table_size, coffset, csize);
             if (ret < 0) {
-                goto fail;
+                return ret;
             }
 
             if (flags & CHECK_FRAG_INFO) {
                 res->bfi.allocated_clusters++;
                 res->bfi.compressed_clusters++;
 
-                /* Compressed clusters are fragmented by nature.  Since they
+                /*
+                 * Compressed clusters are fragmented by nature.  Since they
                  * take up sub-sector space but we only have sector granularity
                  * I/O we need to re-read the same sectors even for adjacent
                  * compressed clusters.
@@ -1668,13 +1753,19 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
         {
             uint64_t offset = l2_entry & L2E_OFFSET_MASK;
 
+            if ((l2_bitmap >> 32) & l2_bitmap) {
+                res->corruptions++;
+                fprintf(stderr, "ERROR offset=%" PRIx64 ": Allocated "
+                        "cluster has corrupted subcluster allocation bitmap\n",
+                        offset);
+            }
+
             /* Correct offsets are cluster aligned */
             if (offset_into_cluster(s, offset)) {
                 bool contains_data;
                 res->corruptions++;
 
                 if (has_subclusters(s)) {
-                    uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, i);
                     contains_data = (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC);
                 } else {
                     contains_data = !(l2_entry & QCOW_OFLAG_ZERO);
@@ -1687,40 +1778,30 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
                             fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR",
                             offset);
                     if (fix & BDRV_FIX_ERRORS) {
-                        int idx = i * (l2_entry_size(s) / sizeof(uint64_t));
-                        uint64_t l2e_offset =
-                            l2_offset + (uint64_t)i * l2_entry_size(s);
-                        int ign = active ? QCOW2_OL_ACTIVE_L2 :
-                                           QCOW2_OL_INACTIVE_L2;
-
-                        l2_entry = has_subclusters(s) ? 0 : QCOW_OFLAG_ZERO;
-                        set_l2_entry(s, l2_table, i, l2_entry);
-                        ret = qcow2_pre_write_overlap_check(bs, ign,
-                                l2e_offset, l2_entry_size(s), false);
-                        if (ret < 0) {
-                            fprintf(stderr, "ERROR: Overlap check failed\n");
-                            res->check_errors++;
-                            /* Something is seriously wrong, so abort checking
-                             * this L2 table */
-                            goto fail;
+                        ret = fix_l2_entry_by_zero(bs, res, l2_offset,
+                                                   l2_table, i, active,
+                                                   &metadata_overlap);
+                        if (metadata_overlap) {
+                            /*
+                             * Something is seriously wrong, so abort checking
+                             * this L2 table.
+                             */
+                            return ret;
                         }
 
-                        ret = bdrv_pwrite_sync(bs->file, l2e_offset,
-                                               &l2_table[idx],
-                                               l2_entry_size(s));
-                        if (ret < 0) {
-                            fprintf(stderr, "ERROR: Failed to overwrite L2 "
-                                    "table entry: %s\n", strerror(-ret));
-                            res->check_errors++;
-                            /* Do not abort, continue checking the rest of this
-                             * L2 table's entries */
-                        } else {
-                            res->corruptions--;
-                            res->corruptions_fixed++;
-                            /* Skip marking the cluster as used
-                             * (it is unused now) */
+                        if (ret == 0) {
+                            /*
+                             * Skip marking the cluster as used
+                             * (it is unused now).
+                             */
                             continue;
                         }
+
+                        /*
+                         * Failed to fix.
+                         * Do not abort, continue checking the rest of this
+                         * L2 table's entries.
+                         */
                     }
                 } else {
                     fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is "
@@ -1743,14 +1824,23 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
                                                refcount_table_size,
                                                offset, s->cluster_size);
                 if (ret < 0) {
-                    goto fail;
+                    return ret;
                 }
             }
             break;
         }
 
         case QCOW2_CLUSTER_ZERO_PLAIN:
+            /* Impossible when image has subclusters */
+            assert(!l2_bitmap);
+            break;
+
         case QCOW2_CLUSTER_UNALLOCATED:
+            if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) {
+                res->corruptions++;
+                fprintf(stderr, "ERROR: Unallocated "
+                        "cluster has non-zero subcluster allocation map\n");
+            }
             break;
 
         default:
@@ -1758,12 +1848,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
         }
     }
 
-    g_free(l2_table);
     return 0;
-
-fail:
-    g_free(l2_table);
-    return ret;
 }
 
 /*
@@ -1774,79 +1859,86 @@ fail:
  * Returns the number of errors found by the checks or -errno if an internal
  * error occurred.
  */
-static int check_refcounts_l1(BlockDriverState *bs,
-                              BdrvCheckResult *res,
-                              void **refcount_table,
-                              int64_t *refcount_table_size,
-                              int64_t l1_table_offset, int l1_size,
-                              int flags, BdrvCheckMode fix, bool active)
+static int coroutine_fn GRAPH_RDLOCK
+check_refcounts_l1(BlockDriverState *bs, BdrvCheckResult *res,
+                   void **refcount_table, int64_t *refcount_table_size,
+                   int64_t l1_table_offset, int l1_size,
+                   int flags, BdrvCheckMode fix, bool active)
 {
     BDRVQcow2State *s = bs->opaque;
-    uint64_t *l1_table = NULL, l2_offset, l1_size2;
+    size_t l1_size_bytes = l1_size * L1E_SIZE;
+    g_autofree uint64_t *l1_table = NULL;
+    uint64_t l2_offset;
     int i, ret;
 
-    l1_size2 = l1_size * L1E_SIZE;
+    if (!l1_size) {
+        return 0;
+    }
 
     /* Mark L1 table as used */
     ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
-                                   l1_table_offset, l1_size2);
+                                   l1_table_offset, l1_size_bytes);
     if (ret < 0) {
-        goto fail;
+        return ret;
+    }
+
+    l1_table = g_try_malloc(l1_size_bytes);
+    if (l1_table == NULL) {
+        res->check_errors++;
+        return -ENOMEM;
     }
 
     /* Read L1 table entries from disk */
-    if (l1_size2 > 0) {
-        l1_table = g_try_malloc(l1_size2);
-        if (l1_table == NULL) {
-            ret = -ENOMEM;
-            res->check_errors++;
-            goto fail;
-        }
-        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
-        if (ret < 0) {
-            fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
-            res->check_errors++;
-            goto fail;
-        }
-        for(i = 0;i < l1_size; i++)
-            be64_to_cpus(&l1_table[i]);
+    ret = bdrv_co_pread(bs->file, l1_table_offset, l1_size_bytes, l1_table, 0);
+    if (ret < 0) {
+        fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
+        res->check_errors++;
+        return ret;
+    }
+
+    for (i = 0; i < l1_size; i++) {
+        be64_to_cpus(&l1_table[i]);
     }
 
     /* Do the actual checks */
-    for(i = 0; i < l1_size; i++) {
-        l2_offset = l1_table[i];
-        if (l2_offset) {
-            /* Mark L2 table as used */
-            l2_offset &= L1E_OFFSET_MASK;
-            ret = qcow2_inc_refcounts_imrt(bs, res,
-                                           refcount_table, refcount_table_size,
-                                           l2_offset, s->cluster_size);
-            if (ret < 0) {
-                goto fail;
-            }
+    for (i = 0; i < l1_size; i++) {
+        if (!l1_table[i]) {
+            continue;
+        }
 
-            /* L2 tables are cluster aligned */
-            if (offset_into_cluster(s, l2_offset)) {
-                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
-                    "cluster aligned; L1 entry corrupted\n", l2_offset);
-                res->corruptions++;
-            }
+        if (l1_table[i] & L1E_RESERVED_MASK) {
+            fprintf(stderr, "ERROR found L1 entry with reserved bits set: "
+                    "%" PRIx64 "\n", l1_table[i]);
+            res->corruptions++;
+        }
 
-            /* Process and check L2 entries */
-            ret = check_refcounts_l2(bs, res, refcount_table,
-                                     refcount_table_size, l2_offset, flags,
-                                     fix, active);
-            if (ret < 0) {
-                goto fail;
-            }
+        l2_offset = l1_table[i] & L1E_OFFSET_MASK;
+
+        /* Mark L2 table as used */
+        ret = qcow2_inc_refcounts_imrt(bs, res,
+                                       refcount_table, refcount_table_size,
+                                       l2_offset, s->cluster_size);
+        if (ret < 0) {
+            return ret;
+        }
+
+        /* L2 tables are cluster aligned */
+        if (offset_into_cluster(s, l2_offset)) {
+            fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
+                "cluster aligned; L1 entry corrupted\n", l2_offset);
+            res->corruptions++;
+        }
+
+        /* Process and check L2 entries */
+        ret = check_refcounts_l2(bs, res, refcount_table,
+                                 refcount_table_size, l2_offset, flags,
+                                 fix, active);
+        if (ret < 0) {
+            return ret;
         }
     }
-    g_free(l1_table);
-    return 0;
 
-fail:
-    g_free(l1_table);
-    return ret;
+    return 0;
 }
 
 /*
@@ -1857,8 +1949,8 @@ fail:
  * have been already detected and sufficiently signaled by the calling function
  * (qcow2_check_refcounts) by the time this function is called).
  */
-static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
-                              BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
@@ -1913,8 +2005,8 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
             }
         }
 
-        ret = bdrv_pread(bs->file, l2_offset, l2_table,
-                         s->l2_size * l2_entry_size(s));
+        ret = bdrv_co_pread(bs->file, l2_offset, s->l2_size * l2_entry_size(s),
+                            l2_table, 0);
         if (ret < 0) {
             fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
                     strerror(-ret));
@@ -1967,8 +2059,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
                 goto fail;
             }
 
-            ret = bdrv_pwrite(bs->file, l2_offset, l2_table,
-                              s->cluster_size);
+            ret = bdrv_co_pwrite(bs->file, l2_offset, s->cluster_size, l2_table, 0);
             if (ret < 0) {
                 fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
                         strerror(-ret));
@@ -1991,9 +2082,10 @@ fail:
  * Checks consistency of refblocks and accounts for each refblock in
  * *refcount_table.
  */
-static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
-                           BdrvCheckMode fix, bool *rebuild,
-                           void **refcount_table, int64_t *nb_clusters)
+static int coroutine_fn GRAPH_RDLOCK
+check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
+                BdrvCheckMode fix, bool *rebuild,
+                void **refcount_table, int64_t *nb_clusters)
 {
     BDRVQcow2State *s = bs->opaque;
     int64_t i, size;
@@ -2001,9 +2093,17 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
 
     for(i = 0; i < s->refcount_table_size; i++) {
         uint64_t offset, cluster;
-        offset = s->refcount_table[i];
+        offset = s->refcount_table[i] & REFT_OFFSET_MASK;
         cluster = offset >> s->cluster_bits;
 
+        if (s->refcount_table[i] & REFT_RESERVED_MASK) {
+            fprintf(stderr, "ERROR refcount table entry %" PRId64 " has "
+                    "reserved bits set\n", i);
+            res->corruptions++;
+            *rebuild = true;
+            continue;
+        }
+
         /* Refcount blocks are cluster aligned */
         if (offset_into_cluster(s, offset)) {
             fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
@@ -2027,13 +2127,13 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
                     goto resize_fail;
                 }
 
-                ret = bdrv_truncate(bs->file, offset + s->cluster_size, false,
-                                    PREALLOC_MODE_OFF, 0, &local_err);
+                ret = bdrv_co_truncate(bs->file, offset + s->cluster_size, false,
+                                       PREALLOC_MODE_OFF, 0, &local_err);
                 if (ret < 0) {
                     error_report_err(local_err);
                     goto resize_fail;
                 }
-                size = bdrv_getlength(bs->file->bs);
+                size = bdrv_co_getlength(bs->file->bs);
                 if (size < 0) {
                     ret = size;
                     goto resize_fail;
@@ -2097,9 +2197,10 @@ resize_fail:
 /*
  * Calculates an in-memory refcount table.
  */
-static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                               BdrvCheckMode fix, bool *rebuild,
-                               void **refcount_table, int64_t *nb_clusters)
+static int coroutine_fn GRAPH_RDLOCK
+calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+                    BdrvCheckMode fix, bool *rebuild,
+                    void **refcount_table, int64_t *nb_clusters)
 {
     BDRVQcow2State *s = bs->opaque;
     int64_t i;
@@ -2199,10 +2300,11 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
  * Compares the actual reference count for each cluster in the image against the
  * refcount as reported by the refcount structures on-disk.
  */
-static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                              BdrvCheckMode fix, bool *rebuild,
-                              int64_t *highest_cluster,
-                              void *refcount_table, int64_t nb_clusters)
+static void coroutine_fn GRAPH_RDLOCK
+compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+                  BdrvCheckMode fix, bool *rebuild,
+                  int64_t *highest_cluster,
+                  void *refcount_table, int64_t nb_clusters)
 {
     BDRVQcow2State *s = bs->opaque;
     int64_t i;
@@ -2339,188 +2441,329 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
 }
 
 /*
- * Creates a new refcount structure based solely on the in-memory information
- * given through *refcount_table. All necessary allocations will be reflected
- * in that array.
+ * Helper function for rebuild_refcount_structure().
  *
- * On success, the old refcount structure is leaked (it will be covered by the
- * new refcount structure).
+ * Scan the range of clusters [first_cluster, end_cluster) for allocated
+ * clusters and write all corresponding refblocks to disk.  The refblock
+ * and allocation data is taken from the in-memory refcount table
+ * *refcount_table[] (of size *nb_clusters), which is basically one big
+ * (unlimited size) refblock for the whole image.
+ *
+ * For these refblocks, clusters are allocated using said in-memory
+ * refcount table.  Care is taken that these allocations are reflected
+ * in the refblocks written to disk.
+ *
+ * The refblocks' offsets are written into a reftable, which is
+ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr).  If
+ * that reftable is of insufficient size, it will be resized to fit.
+ * This reftable is not written to disk.
+ *
+ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed
+ * to point to existing valid refblocks that do not need to be allocated
+ * again.)
+ *
+ * Return whether the on-disk reftable array was resized (true/false),
+ * or -errno on error.
  */
-static int rebuild_refcount_structure(BlockDriverState *bs,
-                                      BdrvCheckResult *res,
-                                      void **refcount_table,
-                                      int64_t *nb_clusters)
+static int coroutine_fn GRAPH_RDLOCK
+rebuild_refcounts_write_refblocks(
+        BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
+        int64_t first_cluster, int64_t end_cluster,
+        uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr,
+        Error **errp
+    )
 {
     BDRVQcow2State *s = bs->opaque;
-    int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0;
+    int64_t cluster;
     int64_t refblock_offset, refblock_start, refblock_index;
-    uint32_t reftable_size = 0;
-    uint64_t *on_disk_reftable = NULL;
+    int64_t first_free_cluster = 0;
+    uint64_t *on_disk_reftable = *on_disk_reftable_ptr;
+    uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr;
     void *on_disk_refblock;
-    int ret = 0;
-    struct {
-        uint64_t reftable_offset;
-        uint32_t reftable_clusters;
-    } QEMU_PACKED reftable_offset_and_clusters;
-
-    qcow2_cache_empty(bs, s->refcount_block_cache);
+    bool reftable_grown = false;
+    int ret;
 
-write_refblocks:
-    for (; cluster < *nb_clusters; cluster++) {
+    for (cluster = first_cluster; cluster < end_cluster; cluster++) {
+        /* Check all clusters to find refblocks that contain non-zero entries */
         if (!s->get_refcount(*refcount_table, cluster)) {
             continue;
         }
 
+        /*
+         * This cluster is allocated, so we need to create a refblock
+         * for it.  The data we will write to disk is just the
+         * respective slice from *refcount_table, so it will contain
+         * accurate refcounts for all clusters belonging to this
+         * refblock.  After we have written it, we will therefore skip
+         * all remaining clusters in this refblock.
+         */
+
         refblock_index = cluster >> s->refcount_block_bits;
         refblock_start = refblock_index << s->refcount_block_bits;
 
-        /* Don't allocate a cluster in a refblock already written to disk */
-        if (first_free_cluster < refblock_start) {
-            first_free_cluster = refblock_start;
-        }
-        refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
-                                              nb_clusters, &first_free_cluster);
-        if (refblock_offset < 0) {
-            fprintf(stderr, "ERROR allocating refblock: %s\n",
-                    strerror(-refblock_offset));
-            res->check_errors++;
-            ret = refblock_offset;
-            goto fail;
-        }
+        if (on_disk_reftable_entries > refblock_index &&
+            on_disk_reftable[refblock_index])
+        {
+            /*
+             * We can get here after a `goto write_refblocks`: We have a
+             * reftable from a previous run, and the refblock is already
+             * allocated.  No need to allocate it again.
+             */
+            refblock_offset = on_disk_reftable[refblock_index];
+        } else {
+            int64_t refblock_cluster_index;
 
-        if (reftable_size <= refblock_index) {
-            uint32_t old_reftable_size = reftable_size;
-            uint64_t *new_on_disk_reftable;
+            /* Don't allocate a cluster in a refblock already written to disk */
+            if (first_free_cluster < refblock_start) {
+                first_free_cluster = refblock_start;
+            }
+            refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
+                                                  nb_clusters,
+                                                  &first_free_cluster);
+            if (refblock_offset < 0) {
+                error_setg_errno(errp, -refblock_offset,
+                                 "ERROR allocating refblock");
+                return refblock_offset;
+            }
 
-            reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
-                                     s->cluster_size) / REFTABLE_ENTRY_SIZE;
-            new_on_disk_reftable = g_try_realloc(on_disk_reftable,
-                                                 reftable_size *
-                                                 REFTABLE_ENTRY_SIZE);
-            if (!new_on_disk_reftable) {
-                res->check_errors++;
-                ret = -ENOMEM;
-                goto fail;
+            refblock_cluster_index = refblock_offset / s->cluster_size;
+            if (refblock_cluster_index >= end_cluster) {
+                /*
+                 * We must write the refblock that holds this refblock's
+                 * refcount
+                 */
+                end_cluster = refblock_cluster_index + 1;
             }
-            on_disk_reftable = new_on_disk_reftable;
 
-            memset(on_disk_reftable + old_reftable_size, 0,
-                   (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE);
+            if (on_disk_reftable_entries <= refblock_index) {
+                on_disk_reftable_entries =
+                    ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
+                             s->cluster_size) / REFTABLE_ENTRY_SIZE;
+                on_disk_reftable =
+                    g_try_realloc(on_disk_reftable,
+                                  on_disk_reftable_entries *
+                                  REFTABLE_ENTRY_SIZE);
+                if (!on_disk_reftable) {
+                    error_setg(errp, "ERROR allocating reftable memory");
+                    return -ENOMEM;
+                }
 
-            /* The offset we have for the reftable is now no longer valid;
-             * this will leak that range, but we can easily fix that by running
-             * a leak-fixing check after this rebuild operation */
-            reftable_offset = -1;
-        } else {
-            assert(on_disk_reftable);
-        }
-        on_disk_reftable[refblock_index] = refblock_offset;
+                memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0,
+                       (on_disk_reftable_entries -
+                        *on_disk_reftable_entries_ptr) *
+                       REFTABLE_ENTRY_SIZE);
 
-        /* If this is apparently the last refblock (for now), try to squeeze the
-         * reftable in */
-        if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits &&
-            reftable_offset < 0)
-        {
-            uint64_t reftable_clusters = size_to_clusters(s, reftable_size *
-                                                          REFTABLE_ENTRY_SIZE);
-            reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
-                                                  refcount_table, nb_clusters,
-                                                  &first_free_cluster);
-            if (reftable_offset < 0) {
-                fprintf(stderr, "ERROR allocating reftable: %s\n",
-                        strerror(-reftable_offset));
-                res->check_errors++;
-                ret = reftable_offset;
-                goto fail;
+                *on_disk_reftable_ptr = on_disk_reftable;
+                *on_disk_reftable_entries_ptr = on_disk_reftable_entries;
+
+                reftable_grown = true;
+            } else {
+                assert(on_disk_reftable);
             }
+            on_disk_reftable[refblock_index] = refblock_offset;
         }
 
+        /* Refblock is allocated, write it to disk */
+
         ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
                                             s->cluster_size, false);
         if (ret < 0) {
-            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
-            goto fail;
+            error_setg_errno(errp, -ret, "ERROR writing refblock");
+            return ret;
         }
 
-        /* The size of *refcount_table is always cluster-aligned, therefore the
-         * write operation will not overflow */
+        /*
+         * The refblock is simply a slice of *refcount_table.
+         * Note that the size of *refcount_table is always aligned to
+         * whole clusters, so the write operation will not result in
+         * out-of-bounds accesses.
+         */
         on_disk_refblock = (void *)((char *) *refcount_table +
                                     refblock_index * s->cluster_size);
 
-        ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock,
-                          s->cluster_size);
+        ret = bdrv_co_pwrite(bs->file, refblock_offset, s->cluster_size,
+                             on_disk_refblock, 0);
         if (ret < 0) {
-            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
-            goto fail;
+            error_setg_errno(errp, -ret, "ERROR writing refblock");
+            return ret;
         }
 
-        /* Go to the end of this refblock */
+        /* This refblock is done, skip to its end */
         cluster = refblock_start + s->refcount_block_size - 1;
     }
 
-    if (reftable_offset < 0) {
-        uint64_t post_refblock_start, reftable_clusters;
+    return reftable_grown;
+}
+
+/*
+ * Creates a new refcount structure based solely on the in-memory information
+ * given through *refcount_table (this in-memory information is basically just
+ * the concatenation of all refblocks).  All necessary allocations will be
+ * reflected in that array.
+ *
+ * On success, the old refcount structure is leaked (it will be covered by the
+ * new refcount structure).
+ */
+static int coroutine_fn GRAPH_RDLOCK
+rebuild_refcount_structure(BlockDriverState *bs, BdrvCheckResult *res,
+                           void **refcount_table, int64_t *nb_clusters,
+                           Error **errp)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int64_t reftable_offset = -1;
+    int64_t reftable_length = 0;
+    int64_t reftable_clusters;
+    int64_t refblock_index;
+    uint32_t on_disk_reftable_entries = 0;
+    uint64_t *on_disk_reftable = NULL;
+    int ret = 0;
+    int reftable_size_changed = 0;
+    struct {
+        uint64_t reftable_offset;
+        uint32_t reftable_clusters;
+    } QEMU_PACKED reftable_offset_and_clusters;
+
+    qcow2_cache_empty(bs, s->refcount_block_cache);
+
+    /*
+     * For each refblock containing entries, we try to allocate a
+     * cluster (in the in-memory refcount table) and write its offset
+     * into on_disk_reftable[].  We then write the whole refblock to
+     * disk (as a slice of the in-memory refcount table).
+     * This is done by rebuild_refcounts_write_refblocks().
+     *
+     * Once we have scanned all clusters, we try to find space for the
+     * reftable.  This will dirty the in-memory refcount table (i.e.
+     * make it differ from the refblocks we have already written), so we
+     * need to run rebuild_refcounts_write_refblocks() again for the
+     * range of clusters where the reftable has been allocated.
+     *
+     * This second run might make the reftable grow again, in which case
+     * we will need to allocate another space for it, which is why we
+     * repeat all this until the reftable stops growing.
+     *
+     * (This loop will terminate, because with every cluster the
+     * reftable grows, it can accommodate a multitude of more refcounts,
+     * so that at some point this must be able to cover the reftable
+     * and all refblocks describing it.)
+     *
+     * We then convert the reftable to big-endian and write it to disk.
+     *
+     * Note that we never free any reftable allocations.  Doing so would
+     * needlessly complicate the algorithm: The eventual second check
+     * run we do will clean up all leaks we have caused.
+     */
+
+    reftable_size_changed =
+        rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+                                          0, *nb_clusters,
+                                          &on_disk_reftable,
+                                          &on_disk_reftable_entries, errp);
+    if (reftable_size_changed < 0) {
+        res->check_errors++;
+        ret = reftable_size_changed;
+        goto fail;
+    }
+
+    /*
+     * There was no reftable before, so rebuild_refcounts_write_refblocks()
+     * must have increased its size (from 0 to something).
+     */
+    assert(reftable_size_changed);
+
+    do {
+        int64_t reftable_start_cluster, reftable_end_cluster;
+        int64_t first_free_cluster = 0;
+
+        reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE;
+        reftable_clusters = size_to_clusters(s, reftable_length);
 
-        post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size);
-        reftable_clusters =
-            size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE);
-        /* Not pretty but simple */
-        if (first_free_cluster < post_refblock_start) {
-            first_free_cluster = post_refblock_start;
-        }
         reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
                                               refcount_table, nb_clusters,
                                               &first_free_cluster);
         if (reftable_offset < 0) {
-            fprintf(stderr, "ERROR allocating reftable: %s\n",
-                    strerror(-reftable_offset));
+            error_setg_errno(errp, -reftable_offset,
+                             "ERROR allocating reftable");
             res->check_errors++;
             ret = reftable_offset;
             goto fail;
         }
 
-        goto write_refblocks;
-    }
+        /*
+         * We need to update the affected refblocks, so re-run the
+         * write_refblocks loop for the reftable's range of clusters.
+         */
+        assert(offset_into_cluster(s, reftable_offset) == 0);
+        reftable_start_cluster = reftable_offset / s->cluster_size;
+        reftable_end_cluster = reftable_start_cluster + reftable_clusters;
+        reftable_size_changed =
+            rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+                                              reftable_start_cluster,
+                                              reftable_end_cluster,
+                                              &on_disk_reftable,
+                                              &on_disk_reftable_entries, errp);
+        if (reftable_size_changed < 0) {
+            res->check_errors++;
+            ret = reftable_size_changed;
+            goto fail;
+        }
 
-    for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
+        /*
+         * If the reftable size has changed, we will need to find a new
+         * allocation, repeating the loop.
+         */
+    } while (reftable_size_changed);
+
+    /* The above loop must have run at least once */
+    assert(reftable_offset >= 0);
+
+    /*
+     * All allocations are done, all refblocks are written, convert the
+     * reftable to big-endian and write it to disk.
+     */
+
+    for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
+         refblock_index++)
+    {
         cpu_to_be64s(&on_disk_reftable[refblock_index]);
     }
 
-    ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset,
-                                        reftable_size * REFTABLE_ENTRY_SIZE,
+    ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
                                         false);
     if (ret < 0) {
-        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+        error_setg_errno(errp, -ret, "ERROR writing reftable");
         goto fail;
     }
 
-    assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE);
-    ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
-                      reftable_size * REFTABLE_ENTRY_SIZE);
+    assert(reftable_length < INT_MAX);
+    ret = bdrv_co_pwrite(bs->file, reftable_offset, reftable_length,
+                         on_disk_reftable, 0);
     if (ret < 0) {
-        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+        error_setg_errno(errp, -ret, "ERROR writing reftable");
         goto fail;
     }
 
     /* Enter new reftable into the image header */
     reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
     reftable_offset_and_clusters.reftable_clusters =
-        cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE));
-    ret = bdrv_pwrite_sync(bs->file,
-                           offsetof(QCowHeader, refcount_table_offset),
-                           &reftable_offset_and_clusters,
-                           sizeof(reftable_offset_and_clusters));
+        cpu_to_be32(reftable_clusters);
+    ret = bdrv_co_pwrite_sync(bs->file,
+                              offsetof(QCowHeader, refcount_table_offset),
+                              sizeof(reftable_offset_and_clusters),
+                              &reftable_offset_and_clusters, 0);
     if (ret < 0) {
-        fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret));
+        error_setg_errno(errp, -ret, "ERROR setting reftable");
         goto fail;
     }
 
-    for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
+    for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
+         refblock_index++)
+    {
         be64_to_cpus(&on_disk_reftable[refblock_index]);
     }
     s->refcount_table = on_disk_reftable;
     s->refcount_table_offset = reftable_offset;
-    s->refcount_table_size = reftable_size;
+    s->refcount_table_size = on_disk_reftable_entries;
     update_max_refcount_table_index(s);
 
     return 0;
@@ -2536,8 +2779,8 @@ fail:
  * Returns 0 if no errors are found, the number of errors in case the image is
  * detected as corrupted, and -errno when an internal error occurred.
  */
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                          BdrvCheckMode fix)
+int coroutine_fn GRAPH_RDLOCK
+qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
 {
     BDRVQcow2State *s = bs->opaque;
     BdrvCheckResult pre_compare_res;
@@ -2546,7 +2789,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
     bool rebuild = false;
     int ret;
 
-    size = bdrv_getlength(bs->file->bs);
+    size = bdrv_co_getlength(bs->file->bs);
     if (size < 0) {
         res->check_errors++;
         return size;
@@ -2577,11 +2820,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
     if (rebuild && (fix & BDRV_FIX_ERRORS)) {
         BdrvCheckResult old_res = *res;
         int fresh_leaks = 0;
+        Error *local_err = NULL;
 
         fprintf(stderr, "Rebuilding refcount structure\n");
         ret = rebuild_refcount_structure(bs, res, &refcount_table,
-                                         &nb_clusters);
+                                         &nb_clusters, &local_err);
         if (ret < 0) {
+            error_report_err(local_err);
             goto fail;
         }
 
@@ -2767,7 +3012,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
                 return -ENOMEM;
             }
 
-            ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
+            ret = bdrv_pread(bs->file, l1_ofs, l1_sz2, l1, 0);
             if (ret < 0) {
                 g_free(l1);
                 return ret;
@@ -2856,20 +3101,22 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
  *
  * @allocated should be set to true if a new cluster has been allocated.
  */
-typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
-                               uint64_t reftable_index, uint64_t *reftable_size,
-                               void *refblock, bool refblock_empty,
-                               bool *allocated, Error **errp);
+typedef int /* GRAPH_RDLOCK_PTR */
+    (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
+                       uint64_t reftable_index, uint64_t *reftable_size,
+                       void *refblock, bool refblock_empty,
+                       bool *allocated, Error **errp);
 
 /**
  * This "operation" for walk_over_reftable() allocates the refblock on disk (if
  * it is not empty) and inserts its offset into the new reftable. The size of
  * this new reftable is increased as required.
  */
-static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
-                          uint64_t reftable_index, uint64_t *reftable_size,
-                          void *refblock, bool refblock_empty, bool *allocated,
-                          Error **errp)
+static int GRAPH_RDLOCK
+alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
+               uint64_t reftable_index, uint64_t *reftable_size,
+               void *refblock, bool refblock_empty, bool *allocated,
+               Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     int64_t offset;
@@ -2919,10 +3166,11 @@ static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
  * offset specified by the new reftable's entry. It does not modify the new
  * reftable or change any refcounts.
  */
-static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
-                          uint64_t reftable_index, uint64_t *reftable_size,
-                          void *refblock, bool refblock_empty, bool *allocated,
-                          Error **errp)
+static int GRAPH_RDLOCK
+flush_refblock(BlockDriverState *bs, uint64_t **reftable,
+               uint64_t reftable_index, uint64_t *reftable_size,
+               void *refblock, bool refblock_empty, bool *allocated,
+               Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     int64_t offset;
@@ -2938,7 +3186,7 @@ static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
             return ret;
         }
 
-        ret = bdrv_pwrite(bs->file, offset, refblock, s->cluster_size);
+        ret = bdrv_pwrite(bs->file, offset, s->cluster_size, refblock, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Failed to write refblock");
             return ret;
@@ -2963,16 +3211,17 @@ static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
  *
  * @allocated is set to true if a new cluster has been allocated.
  */
-static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
-                              uint64_t *new_reftable_index,
-                              uint64_t *new_reftable_size,
-                              void *new_refblock, int new_refblock_size,
-                              int new_refcount_bits,
-                              RefblockFinishOp *operation, bool *allocated,
-                              Qcow2SetRefcountFunc *new_set_refcount,
-                              BlockDriverAmendStatusCB *status_cb,
-                              void *cb_opaque, int index, int total,
-                              Error **errp)
+static int GRAPH_RDLOCK
+walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
+                   uint64_t *new_reftable_index,
+                   uint64_t *new_reftable_size,
+                   void *new_refblock, int new_refblock_size,
+                   int new_refcount_bits,
+                   RefblockFinishOp *operation, bool *allocated,
+                   Qcow2SetRefcountFunc *new_set_refcount,
+                   BlockDriverAmendStatusCB *status_cb,
+                   void *cb_opaque, int index, int total,
+                   Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t reftable_index;
@@ -3210,8 +3459,9 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
         cpu_to_be64s(&new_reftable[i]);
     }
 
-    ret = bdrv_pwrite(bs->file, new_reftable_offset, new_reftable,
-                      new_reftable_size * REFTABLE_ENTRY_SIZE);
+    ret = bdrv_pwrite(bs->file, new_reftable_offset,
+                      new_reftable_size * REFTABLE_ENTRY_SIZE, new_reftable,
+                      0);
 
     for (i = 0; i < new_reftable_size; i++) {
         be64_to_cpus(&new_reftable[i]);
@@ -3297,7 +3547,8 @@ done:
     return ret;
 }
 
-static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset)
+static int64_t coroutine_fn GRAPH_RDLOCK
+get_refblock_offset(BlockDriverState *bs, uint64_t offset)
 {
     BDRVQcow2State *s = bs->opaque;
     uint32_t index = offset_to_reftable_index(s, offset);
@@ -3316,8 +3567,8 @@ static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset)
     return covering_refblock_offset;
 }
 
-static int qcow2_discard_refcount_block(BlockDriverState *bs,
-                                        uint64_t discard_block_offs)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_discard_refcount_block(BlockDriverState *bs, uint64_t discard_block_offs)
 {
     BDRVQcow2State *s = bs->opaque;
     int64_t refblock_offs;
@@ -3373,7 +3624,7 @@ static int qcow2_discard_refcount_block(BlockDriverState *bs,
     return 0;
 }
 
-int qcow2_shrink_reftable(BlockDriverState *bs)
+int coroutine_fn qcow2_shrink_reftable(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t *reftable_tmp =
@@ -3414,8 +3665,9 @@ int qcow2_shrink_reftable(BlockDriverState *bs)
         reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]);
     }
 
-    ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, reftable_tmp,
-                           s->refcount_table_size * REFTABLE_ENTRY_SIZE);
+    ret = bdrv_co_pwrite_sync(bs->file, s->refcount_table_offset,
+                              s->refcount_table_size * REFTABLE_ENTRY_SIZE,
+                              reftable_tmp, 0);
     /*
      * If the write in the reftable failed the image may contain a partially
      * overwritten reftable. In this case it would be better to clear the
@@ -3440,7 +3692,7 @@ out:
     return ret;
 }
 
-int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
+int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
 {
     BDRVQcow2State *s = bs->opaque;
     int64_t i;
@@ -3462,7 +3714,8 @@ int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
     return -EIO;
 }
 
-int qcow2_detect_metadata_preallocation(BlockDriverState *bs)
+int coroutine_fn GRAPH_RDLOCK
+qcow2_detect_metadata_preallocation(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     int64_t i, end_cluster, cluster_count = 0, threshold;
@@ -3470,12 +3723,12 @@ int qcow2_detect_metadata_preallocation(BlockDriverState *bs)
 
     qemu_co_mutex_assert_locked(&s->lock);
 
-    file_length = bdrv_getlength(bs->file->bs);
+    file_length = bdrv_co_getlength(bs->file->bs);
     if (file_length < 0) {
         return file_length;
     }
 
-    real_allocation = bdrv_get_allocated_file_size(bs->file->bs);
+    real_allocation = bdrv_co_get_allocated_file_size(bs->file->bs);
     if (real_allocation < 0) {
         return real_allocation;
     }
index 2e98c7f4b620ffadc10f6f181c75c45e1f3f86b2..92e47978bf9c10a560c13c6317bc01446a27ae7a 100644 (file)
@@ -29,6 +29,7 @@
 #include "qemu/bswap.h"
 #include "qemu/error-report.h"
 #include "qemu/cutils.h"
+#include "qemu/memalign.h"
 
 static void qcow2_free_single_snapshot(BlockDriverState *bs, int i)
 {
@@ -76,10 +77,11 @@ void qcow2_free_snapshots(BlockDriverState *bs)
  *   qcow2_check_refcounts() does not do anything with snapshots'
  *   extra data.)
  */
-static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
-                                   int *nb_clusters_reduced,
-                                   int *extra_data_dropped,
-                                   Error **errp)
+static coroutine_fn GRAPH_RDLOCK
+int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
+                            int *nb_clusters_reduced,
+                            int *extra_data_dropped,
+                            Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     QCowSnapshotHeader h;
@@ -107,7 +109,7 @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
 
         /* Read statically sized part of the snapshot header */
         offset = ROUND_UP(offset, 8);
-        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
+        ret = bdrv_co_pread(bs->file, offset, sizeof(h), &h, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Failed to read snapshot table");
             goto fail;
@@ -145,8 +147,8 @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
         }
 
         /* Read known extra data */
-        ret = bdrv_pread(bs->file, offset, &extra,
-                         MIN(sizeof(extra), sn->extra_data_size));
+        ret = bdrv_co_pread(bs->file, offset,
+                            MIN(sizeof(extra), sn->extra_data_size), &extra, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Failed to read snapshot table");
             goto fail;
@@ -183,8 +185,8 @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
             /* Store unknown extra data */
             unknown_extra_data_size = sn->extra_data_size - sizeof(extra);
             sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
-            ret = bdrv_pread(bs->file, offset, sn->unknown_extra_data,
-                             unknown_extra_data_size);
+            ret = bdrv_co_pread(bs->file, offset, unknown_extra_data_size,
+                                sn->unknown_extra_data, 0);
             if (ret < 0) {
                 error_setg_errno(errp, -ret,
                                  "Failed to read snapshot table");
@@ -195,7 +197,7 @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
 
         /* Read snapshot ID */
         sn->id_str = g_malloc(id_str_size + 1);
-        ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
+        ret = bdrv_co_pread(bs->file, offset, id_str_size, sn->id_str, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Failed to read snapshot table");
             goto fail;
@@ -205,7 +207,7 @@ static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
 
         /* Read snapshot name */
         sn->name = g_malloc(name_size + 1);
-        ret = bdrv_pread(bs->file, offset, sn->name, name_size);
+        ret = bdrv_co_pread(bs->file, offset, name_size, sn->name, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Failed to read snapshot table");
             goto fail;
@@ -260,7 +262,7 @@ fail:
     return ret;
 }
 
-int qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
+int coroutine_fn qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
 {
     return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp);
 }
@@ -348,13 +350,13 @@ int qcow2_write_snapshots(BlockDriverState *bs)
         h.name_size = cpu_to_be16(name_size);
         offset = ROUND_UP(offset, 8);
 
-        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
+        ret = bdrv_pwrite(bs->file, offset, sizeof(h), &h, 0);
         if (ret < 0) {
             goto fail;
         }
         offset += sizeof(h);
 
-        ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
+        ret = bdrv_pwrite(bs->file, offset, sizeof(extra), &extra, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -368,21 +370,21 @@ int qcow2_write_snapshots(BlockDriverState *bs)
             assert(unknown_extra_data_size <= BDRV_REQUEST_MAX_BYTES);
             assert(sn->unknown_extra_data);
 
-            ret = bdrv_pwrite(bs->file, offset, sn->unknown_extra_data,
-                              unknown_extra_data_size);
+            ret = bdrv_pwrite(bs->file, offset, unknown_extra_data_size,
+                              sn->unknown_extra_data, 0);
             if (ret < 0) {
                 goto fail;
             }
             offset += unknown_extra_data_size;
         }
 
-        ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
+        ret = bdrv_pwrite(bs->file, offset, id_str_size, sn->id_str, 0);
         if (ret < 0) {
             goto fail;
         }
         offset += id_str_size;
 
-        ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
+        ret = bdrv_pwrite(bs->file, offset, name_size, sn->name, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -405,7 +407,7 @@ int qcow2_write_snapshots(BlockDriverState *bs)
     header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
 
     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
-                           &header_data, sizeof(header_data));
+                           sizeof(header_data), &header_data, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -440,8 +442,9 @@ int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
     } QEMU_PACKED snapshot_table_pointer;
 
     /* qcow2_do_open() discards this information in check mode */
-    ret = bdrv_pread(bs->file, offsetof(QCowHeader, nb_snapshots),
-                     &snapshot_table_pointer, sizeof(snapshot_table_pointer));
+    ret = bdrv_co_pread(bs->file, offsetof(QCowHeader, nb_snapshots),
+                        sizeof(snapshot_table_pointer), &snapshot_table_pointer,
+                        0);
     if (ret < 0) {
         result->check_errors++;
         fprintf(stderr, "ERROR failed to read the snapshot table pointer from "
@@ -510,9 +513,9 @@ int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
         assert(fix & BDRV_FIX_ERRORS);
 
         snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
-        ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
-                               &snapshot_table_pointer.nb_snapshots,
-                               sizeof(snapshot_table_pointer.nb_snapshots));
+        ret = bdrv_co_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+                                  sizeof(snapshot_table_pointer.nb_snapshots),
+                                  &snapshot_table_pointer.nb_snapshots, 0);
         if (ret < 0) {
             result->check_errors++;
             fprintf(stderr, "ERROR failed to update the snapshot count in the "
@@ -692,8 +695,8 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
         goto fail;
     }
 
-    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
-                      s->l1_size * L1E_SIZE);
+    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, s->l1_size * L1E_SIZE,
+                      l1_table, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -828,8 +831,8 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
         goto fail;
     }
 
-    ret = bdrv_pread(bs->file, sn->l1_table_offset,
-                     sn_l1_table, sn_l1_bytes);
+    ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_bytes, sn_l1_table,
+                     0);
     if (ret < 0) {
         goto fail;
     }
@@ -847,8 +850,8 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
         goto fail;
     }
 
-    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
-                           cur_l1_bytes);
+    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, cur_l1_bytes,
+                           sn_l1_table, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -1026,7 +1029,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
     int new_l1_bytes;
     int ret;
 
-    assert(bs->read_only);
+    assert(bdrv_is_read_only(bs));
 
     /* Search the snapshot */
     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
@@ -1050,8 +1053,8 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
         return -ENOMEM;
     }
 
-    ret = bdrv_pread(bs->file, sn->l1_table_offset,
-                     new_l1_table, new_l1_bytes);
+    ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_bytes,
+                     new_l1_table, 0);
     if (ret < 0) {
         error_setg(errp, "Failed to read l1 table for snapshot");
         qemu_vfree(new_l1_table);
index 1914baf456c71cb5caef3ca85c1ded57d837251b..d6071a1eae4a087c90b7c8eb287e8bcae8ec4905 100644 (file)
@@ -34,6 +34,7 @@
 #endif
 
 #include "qcow2.h"
+#include "block/block-io.h"
 #include "block/thread-pool.h"
 #include "crypto.h"
 
@@ -42,7 +43,6 @@ qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
-    ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
 
     qemu_co_mutex_lock(&s->lock);
     while (s->nb_threads >= QCOW2_MAX_THREADS) {
@@ -51,7 +51,7 @@ qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg)
     s->nb_threads++;
     qemu_co_mutex_unlock(&s->lock);
 
-    ret = thread_pool_submit_co(pool, func, arg);
+    ret = thread_pool_submit_co(func, arg);
 
     qemu_co_mutex_lock(&s->lock);
     s->nb_threads--;
index 3a90ef27868ad7e4bd9eb2e7fa0ff363c23db8f1..13e032bd5e2e07565bfd29cfd481560437bcd609 100644 (file)
 #include "qemu/option_int.h"
 #include "qemu/cutils.h"
 #include "qemu/bswap.h"
+#include "qemu/memalign.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "crypto.h"
 #include "block/aio_task.h"
+#include "block/dirty-bitmap.h"
 
 /*
   Differences with QCOW:
@@ -74,7 +76,7 @@ typedef struct {
 
 static int coroutine_fn
 qcow2_co_preadv_compressed(BlockDriverState *bs,
-                           uint64_t cluster_descriptor,
+                           uint64_t l2_entry,
                            uint64_t offset,
                            uint64_t bytes,
                            QEMUIOVector *qiov,
@@ -93,9 +95,10 @@ static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
 }
 
 
-static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
-                                          uint8_t *buf, size_t buflen,
-                                          void *opaque, Error **errp)
+static int GRAPH_RDLOCK
+qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
+                           uint8_t *buf, size_t buflen,
+                           void *opaque, Error **errp)
 {
     BlockDriverState *bs = opaque;
     BDRVQcow2State *s = bs->opaque;
@@ -106,18 +109,19 @@ static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
         return -1;
     }
 
-    ret = bdrv_pread(bs->file,
-                     s->crypto_header.offset + offset, buf, buflen);
+    ret = bdrv_pread(bs->file, s->crypto_header.offset + offset, buflen, buf,
+                     0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not read encryption header");
         return -1;
     }
-    return ret;
+    return 0;
 }
 
 
-static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
-                                          void *opaque, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, void *opaque,
+                           Error **errp)
 {
     BlockDriverState *bs = opaque;
     BDRVQcow2State *s = bs->opaque;
@@ -142,21 +146,21 @@ static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
      */
     clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
     assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
-    ret = bdrv_pwrite_zeroes(bs->file,
-                             ret,
-                             clusterlen, 0);
+    ret = bdrv_co_pwrite_zeroes(bs->file, ret, clusterlen, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not zero fill encryption header");
         return -1;
     }
 
-    return ret;
+    return 0;
 }
 
 
-static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
-                                           const uint8_t *buf, size_t buflen,
-                                           void *opaque, Error **errp)
+/* The graph lock must be held when called in coroutine context */
+static int coroutine_mixed_fn GRAPH_RDLOCK
+qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
+                            const uint8_t *buf, size_t buflen,
+                            void *opaque, Error **errp)
 {
     BlockDriverState *bs = opaque;
     BDRVQcow2State *s = bs->opaque;
@@ -167,13 +171,13 @@ static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
         return -1;
     }
 
-    ret = bdrv_pwrite(bs->file,
-                      s->crypto_header.offset + offset, buf, buflen);
+    ret = bdrv_pwrite(bs->file, s->crypto_header.offset + offset, buflen, buf,
+                      0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not read encryption header");
         return -1;
     }
-    return ret;
+    return 0;
 }
 
 static QDict*
@@ -197,10 +201,10 @@ qcow2_extract_crypto_opts(QemuOpts *opts, const char *fmt, Error **errp)
  * unknown magic is skipped (future extension this version knows nothing about)
  * return 0 upon success, non-0 otherwise
  */
-static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
-                                 uint64_t end_offset, void **p_feature_table,
-                                 int flags, bool *need_update_header,
-                                 Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
+                      uint64_t end_offset, void **p_feature_table,
+                      int flags, bool *need_update_header, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     QCowExtension ext;
@@ -226,7 +230,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
         printf("attempting to read extended header in offset %lu\n", offset);
 #endif
 
-        ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
+        ret = bdrv_co_pread(bs->file, offset, sizeof(ext), &ext, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
                              "pread fail from offset %" PRIu64, offset);
@@ -254,7 +258,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                            sizeof(bs->backing_format));
                 return 2;
             }
-            ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
+            ret = bdrv_co_pread(bs->file, offset, ext.len, bs->backing_format, 0);
             if (ret < 0) {
                 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
                                  "Could not read format name");
@@ -270,10 +274,11 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
         case QCOW2_EXT_MAGIC_FEATURE_TABLE:
             if (p_feature_table != NULL) {
                 void *feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
-                ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
+                ret = bdrv_co_pread(bs->file, offset, ext.len, feature_table, 0);
                 if (ret < 0) {
                     error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
                                      "Could not read table");
+                    g_free(feature_table);
                     return ret;
                 }
 
@@ -295,7 +300,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                 return -EINVAL;
             }
 
-            ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len);
+            ret = bdrv_co_pread(bs->file, offset, ext.len, &s->crypto_header, 0);
             if (ret < 0) {
                 error_setg_errno(errp, -ret,
                                  "Unable to read CRYPTO header extension");
@@ -351,7 +356,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                 break;
             }
 
-            ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len);
+            ret = bdrv_co_pread(bs->file, offset, ext.len, &bitmaps_ext, 0);
             if (ret < 0) {
                 error_setg_errno(errp, -ret, "bitmaps_ext: "
                                  "Could not read ext header");
@@ -415,7 +420,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
         case QCOW2_EXT_MAGIC_DATA_FILE:
         {
             s->image_data_file = g_malloc0(ext.len + 1);
-            ret = bdrv_pread(bs->file, offset, s->image_data_file, ext.len);
+            ret = bdrv_co_pread(bs->file, offset, ext.len, s->image_data_file, 0);
             if (ret < 0) {
                 error_setg_errno(errp, -ret,
                                  "ERROR: Could not read data file name");
@@ -439,7 +444,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                 uext->len = ext.len;
                 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
 
-                ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
+                ret = bdrv_co_pread(bs->file, offset, uext->len, uext->data, 0);
                 if (ret < 0) {
                     error_setg_errno(errp, -ret, "ERROR: unknown extension: "
                                      "Could not read data");
@@ -515,12 +520,9 @@ int qcow2_mark_dirty(BlockDriverState *bs)
     }
 
     val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
-    ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
-                      &val, sizeof(val));
-    if (ret < 0) {
-        return ret;
-    }
-    ret = bdrv_flush(bs->file->bs);
+    ret = bdrv_pwrite_sync(bs->file,
+                           offsetof(QCowHeader, incompatible_features),
+                           sizeof(val), &val, 0);
     if (ret < 0) {
         return ret;
     }
@@ -535,7 +537,7 @@ int qcow2_mark_dirty(BlockDriverState *bs)
  * function when there are no pending requests, it does not guard against
  * concurrent requests dirtying the image.
  */
-static int qcow2_mark_clean(BlockDriverState *bs)
+static int GRAPH_RDLOCK qcow2_mark_clean(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
 
@@ -569,7 +571,8 @@ int qcow2_mark_corrupt(BlockDriverState *bs)
  * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
  * before if necessary.
  */
-int qcow2_mark_consistent(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_mark_consistent(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
 
@@ -601,9 +604,9 @@ static void qcow2_add_check_result(BdrvCheckResult *out,
     }
 }
 
-static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
-                                              BdrvCheckResult *result,
-                                              BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_check_locked(BlockDriverState *bs, BdrvCheckResult *result,
+                      BdrvCheckMode fix)
 {
     BdrvCheckResult snapshot_res = {};
     BdrvCheckResult refcount_res = {};
@@ -640,9 +643,9 @@ static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn qcow2_co_check(BlockDriverState *bs,
-                                       BdrvCheckResult *result,
-                                       BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_check(BlockDriverState *bs, BdrvCheckResult *result,
+               BdrvCheckMode fix)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret;
@@ -681,6 +684,7 @@ static const char *const mutable_opts[] = {
     QCOW2_OPT_DISCARD_REQUEST,
     QCOW2_OPT_DISCARD_SNAPSHOT,
     QCOW2_OPT_DISCARD_OTHER,
+    QCOW2_OPT_DISCARD_NO_UNREF,
     QCOW2_OPT_OVERLAP,
     QCOW2_OPT_OVERLAP_TEMPLATE,
     QCOW2_OPT_OVERLAP_MAIN_HEADER,
@@ -725,6 +729,11 @@ static QemuOptsList qcow2_runtime_opts = {
             .type = QEMU_OPT_BOOL,
             .help = "Generate discard requests when other clusters are freed",
         },
+        {
+            .name = QCOW2_OPT_DISCARD_NO_UNREF,
+            .type = QEMU_OPT_BOOL,
+            .help = "Do not unreference discarded clusters",
+        },
         {
             .name = QCOW2_OPT_OVERLAP,
             .type = QEMU_OPT_STRING,
@@ -840,9 +849,10 @@ static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
 {
     BDRVQcow2State *s = bs->opaque;
     if (s->cache_clean_interval > 0) {
-        s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
-                                             SCALE_MS, cache_clean_timer_cb,
-                                             bs);
+        s->cache_clean_timer =
+            aio_timer_new_with_attrs(context, QEMU_CLOCK_VIRTUAL,
+                                     SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL,
+                                     cache_clean_timer_cb, bs);
         timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
                   (int64_t) s->cache_clean_interval * 1000);
     }
@@ -852,7 +862,6 @@ static void cache_clean_timer_del(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     if (s->cache_clean_timer) {
-        timer_del(s->cache_clean_timer);
         timer_free(s->cache_clean_timer);
         s->cache_clean_timer = NULL;
     }
@@ -869,7 +878,7 @@ static void qcow2_attach_aio_context(BlockDriverState *bs,
     cache_clean_timer_init(bs, new_context);
 }
 
-static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
+static bool read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
                              uint64_t *l2_cache_size,
                              uint64_t *l2_cache_entry_size,
                              uint64_t *refcount_cache_size, Error **errp)
@@ -907,16 +916,16 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
             error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
                        " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
                        "at the same time");
-            return;
+            return false;
         } else if (l2_cache_size_set &&
                    (l2_cache_max_setting > combined_cache_size)) {
             error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
                        QCOW2_OPT_CACHE_SIZE);
-            return;
+            return false;
         } else if (*refcount_cache_size > combined_cache_size) {
             error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
                        QCOW2_OPT_CACHE_SIZE);
-            return;
+            return false;
         }
 
         if (l2_cache_size_set) {
@@ -955,8 +964,10 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
         error_setg(errp, "L2 cache entry size must be a power of two "
                    "between %d and the cluster size (%d)",
                    1 << MIN_CLUSTER_BITS, s->cluster_size);
-        return;
+        return false;
     }
+
+    return true;
 }
 
 typedef struct Qcow2ReopenState {
@@ -966,14 +977,14 @@ typedef struct Qcow2ReopenState {
     bool use_lazy_refcounts;
     int overlap_check;
     bool discard_passthrough[QCOW2_DISCARD_MAX];
+    bool discard_no_unref;
     uint64_t cache_clean_interval;
     QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
 } Qcow2ReopenState;
 
-static int qcow2_update_options_prepare(BlockDriverState *bs,
-                                        Qcow2ReopenState *r,
-                                        QDict *options, int flags,
-                                        Error **errp)
+static int GRAPH_RDLOCK
+qcow2_update_options_prepare(BlockDriverState *bs, Qcow2ReopenState *r,
+                             QDict *options, int flags, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     QemuOpts *opts = NULL;
@@ -983,7 +994,6 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
     int i;
     const char *encryptfmt;
     QDict *encryptopts = NULL;
-    Error *local_err = NULL;
     int ret;
 
     qdict_extract_subqdict(options, &encryptopts, "encrypt.");
@@ -996,10 +1006,8 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
     }
 
     /* get L2 table/refcount block cache size from command line options */
-    read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
-                     &refcount_cache_size, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (!read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
+                          &refcount_cache_size, errp)) {
         ret = -EINVAL;
         goto fail;
     }
@@ -1140,6 +1148,15 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
     r->discard_passthrough[QCOW2_DISCARD_OTHER] =
         qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
 
+    r->discard_no_unref = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_NO_UNREF,
+                                            false);
+    if (r->discard_no_unref && s->qcow_version < 3) {
+        error_setg(errp,
+                   "discard-no-unref is only supported since qcow2 version 3");
+        ret = -EINVAL;
+        goto fail;
+    }
+
     switch (s->crypt_method_header) {
     case QCOW_CRYPT_NONE:
         if (encryptfmt) {
@@ -1160,6 +1177,10 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
         }
         qdict_put_str(encryptopts, "format", "qcow");
         r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
+        if (!r->crypto_opts) {
+            ret = -EINVAL;
+            goto fail;
+        }
         break;
 
     case QCOW_CRYPT_LUKS:
@@ -1172,14 +1193,15 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
         }
         qdict_put_str(encryptopts, "format", "luks");
         r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
+        if (!r->crypto_opts) {
+            ret = -EINVAL;
+            goto fail;
+        }
         break;
 
     default:
         error_setg(errp, "Unsupported encryption method %d",
                    s->crypt_method_header);
-        break;
-    }
-    if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) {
         ret = -EINVAL;
         goto fail;
     }
@@ -1215,6 +1237,8 @@ static void qcow2_update_options_commit(BlockDriverState *bs,
         s->discard_passthrough[i] = r->discard_passthrough[i];
     }
 
+    s->discard_no_unref = r->discard_no_unref;
+
     if (s->cache_clean_interval != r->cache_clean_interval) {
         cache_clean_timer_del(bs);
         s->cache_clean_interval = r->cache_clean_interval;
@@ -1237,8 +1261,9 @@ static void qcow2_update_options_abort(BlockDriverState *bs,
     qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
 }
 
-static int qcow2_update_options(BlockDriverState *bs, QDict *options,
-                                int flags, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_update_options(BlockDriverState *bs, QDict *options, int flags,
+                     Error **errp)
 {
     Qcow2ReopenState r = {};
     int ret;
@@ -1290,19 +1315,20 @@ static int validate_compression_type(BDRVQcow2State *s, Error **errp)
 }
 
 /* Called with s->lock held.  */
-static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
-                                      int flags, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
+              bool open_data_file, Error **errp)
 {
+    ERRP_GUARD();
     BDRVQcow2State *s = bs->opaque;
     unsigned int len, i;
     int ret = 0;
     QCowHeader header;
-    Error *local_err = NULL;
     uint64_t ext_end;
     uint64_t l1_vm_state_index;
     bool update_header = false;
 
-    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+    ret = bdrv_co_pread(bs->file, 0, sizeof(header), &header, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not read qcow2 header");
         goto fail;
@@ -1378,8 +1404,9 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
     if (header.header_length > sizeof(header)) {
         s->unknown_header_fields_size = header.header_length - sizeof(header);
         s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
-        ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
-                         s->unknown_header_fields_size);
+        ret = bdrv_co_pread(bs->file, sizeof(header),
+                            s->unknown_header_fields_size,
+                            s->unknown_header_fields, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
                              "fields");
@@ -1574,8 +1601,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
             ret = -ENOMEM;
             goto fail;
         }
-        ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
-                         s->l1_size * L1E_SIZE);
+        ret = bdrv_co_pread(bs->file, s->l1_table_offset, s->l1_size * L1E_SIZE,
+                            s->l1_table, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not read L1 table");
             goto fail;
@@ -1609,51 +1636,57 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
         goto fail;
     }
 
-    /* Open external data file */
-    s->data_file = bdrv_open_child(NULL, options, "data-file", bs,
-                                   &child_of_bds, BDRV_CHILD_DATA,
-                                   true, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
+    if (open_data_file) {
+        /* Open external data file */
+        bdrv_graph_co_rdunlock();
+        s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs,
+                                          &child_of_bds, BDRV_CHILD_DATA,
+                                          true, errp);
+        bdrv_graph_co_rdlock();
+        if (*errp) {
+            ret = -EINVAL;
+            goto fail;
+        }
 
-    if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
-        if (!s->data_file && s->image_data_file) {
-            s->data_file = bdrv_open_child(s->image_data_file, options,
-                                           "data-file", bs, &child_of_bds,
-                                           BDRV_CHILD_DATA, false, errp);
+        if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
+            if (!s->data_file && s->image_data_file) {
+                bdrv_graph_co_rdunlock();
+                s->data_file = bdrv_co_open_child(s->image_data_file, options,
+                                                  "data-file", bs,
+                                                  &child_of_bds,
+                                                  BDRV_CHILD_DATA, false, errp);
+                bdrv_graph_co_rdlock();
+                if (!s->data_file) {
+                    ret = -EINVAL;
+                    goto fail;
+                }
+            }
             if (!s->data_file) {
+                error_setg(errp, "'data-file' is required for this image");
                 ret = -EINVAL;
                 goto fail;
             }
-        }
-        if (!s->data_file) {
-            error_setg(errp, "'data-file' is required for this image");
-            ret = -EINVAL;
-            goto fail;
-        }
 
-        /* No data here */
-        bs->file->role &= ~BDRV_CHILD_DATA;
+            /* No data here */
+            bs->file->role &= ~BDRV_CHILD_DATA;
 
-        /* Must succeed because we have given up permissions if anything */
-        bdrv_child_refresh_perms(bs, bs->file, &error_abort);
-    } else {
-        if (s->data_file) {
-            error_setg(errp, "'data-file' can only be set for images with an "
-                             "external data file");
-            ret = -EINVAL;
-            goto fail;
-        }
+            /* Must succeed because we have given up permissions if anything */
+            bdrv_child_refresh_perms(bs, bs->file, &error_abort);
+        } else {
+            if (s->data_file) {
+                error_setg(errp, "'data-file' can only be set for images with "
+                                 "an external data file");
+                ret = -EINVAL;
+                goto fail;
+            }
 
-        s->data_file = bs->file;
+            s->data_file = bs->file;
 
-        if (data_file_is_raw(bs)) {
-            error_setg(errp, "data-file-raw requires a data file");
-            ret = -EINVAL;
-            goto fail;
+            if (data_file_is_raw(bs)) {
+                error_setg(errp, "data-file-raw requires a data file");
+                ret = -EINVAL;
+                goto fail;
+            }
         }
     }
 
@@ -1691,16 +1724,27 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
             ret = -EINVAL;
             goto fail;
         }
-        ret = bdrv_pread(bs->file, header.backing_file_offset,
-                         bs->auto_backing_file, len);
+
+        s->image_backing_file = g_malloc(len + 1);
+        ret = bdrv_co_pread(bs->file, header.backing_file_offset, len,
+                            s->image_backing_file, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not read backing file name");
             goto fail;
         }
-        bs->auto_backing_file[len] = '\0';
-        pstrcpy(bs->backing_file, sizeof(bs->backing_file),
-                bs->auto_backing_file);
-        s->image_backing_file = g_strdup(bs->auto_backing_file);
+        s->image_backing_file[len] = '\0';
+
+        /*
+         * Update only when something has changed.  This function is called by
+         * qcow2_co_invalidate_cache(), and we do not want to reset
+         * auto_backing_file unless necessary.
+         */
+        if (!g_str_equal(s->image_backing_file, bs->backing_file)) {
+            pstrcpy(bs->backing_file, sizeof(bs->backing_file),
+                    s->image_backing_file);
+            pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
+                    s->image_backing_file);
+        }
     }
 
     /*
@@ -1720,8 +1764,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
 
     /* Clear unknown autoclear feature bits */
     update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
-    update_header =
-        update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
+    update_header = update_header && bdrv_is_writable(bs);
     if (update_header) {
         s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
     }
@@ -1786,9 +1829,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
 
     if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
         /* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */
-        bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err);
-        if (local_err != NULL) {
-            error_propagate(errp, local_err);
+        bool header_updated;
+        if (!qcow2_load_dirty_bitmaps(bs, &header_updated, errp)) {
             ret = -EINVAL;
             goto fail;
         }
@@ -1809,7 +1851,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
     bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
 
     /* Repair image if dirty */
-    if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
+    if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) &&
         (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
         BdrvCheckResult result = {0};
 
@@ -1837,8 +1879,10 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
 
  fail:
     g_free(s->image_data_file);
-    if (has_data_file(bs)) {
-        bdrv_unref_child(bs, s->data_file);
+    if (open_data_file && has_data_file(bs)) {
+        bdrv_graph_co_rdunlock();
+        bdrv_co_unref_child(bs, s->data_file);
+        bdrv_graph_co_rdlock();
         s->data_file = NULL;
     }
     g_free(s->unknown_header_fields);
@@ -1873,9 +1917,14 @@ static void coroutine_fn qcow2_open_entry(void *opaque)
     QCow2OpenCo *qoc = opaque;
     BDRVQcow2State *s = qoc->bs->opaque;
 
+    GRAPH_RDLOCK_GUARD();
+
     qemu_co_mutex_lock(&s->lock);
-    qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp);
+    qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, true,
+                             qoc->errp);
     qemu_co_mutex_unlock(&s->lock);
+
+    aio_wait_kick();
 }
 
 static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
@@ -1889,24 +1938,23 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
         .errp = errp,
         .ret = -EINPROGRESS
     };
+    int ret;
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
     /* Initialise locks */
     qemu_co_mutex_init(&s->lock);
 
-    if (qemu_in_coroutine()) {
-        /* From bdrv_co_create.  */
-        qcow2_open_entry(&qoc);
-    } else {
-        assert(qemu_get_current_aio_context() == qemu_get_aio_context());
-        qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc));
-        BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
-    }
+    assert(!qemu_in_coroutine());
+    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+
+    aio_co_enter(bdrv_get_aio_context(bs),
+                 qemu_coroutine_create(qcow2_open_entry, &qoc));
+    AIO_WAIT_WHILE_UNLOCKED(NULL, qoc.ret == -EINPROGRESS);
+
     return qoc.ret;
 }
 
@@ -1922,12 +1970,17 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.pdiscard_alignment = s->cluster_size;
 }
 
-static int qcow2_reopen_prepare(BDRVReopenState *state,
-                                BlockReopenQueue *queue, Error **errp)
+static int GRAPH_UNLOCKED
+qcow2_reopen_prepare(BDRVReopenState *state,BlockReopenQueue *queue,
+                     Error **errp)
 {
+    BDRVQcow2State *s = state->bs->opaque;
     Qcow2ReopenState *r;
     int ret;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     r = g_new0(Qcow2ReopenState, 1);
     state->opaque = r;
 
@@ -1955,6 +2008,16 @@ static int qcow2_reopen_prepare(BDRVReopenState *state,
         }
     }
 
+    /*
+     * Without an external data file, s->data_file points to the same BdrvChild
+     * as bs->file. It needs to be resynced after reopen because bs->file may
+     * be changed. We can't use it in the meantime.
+     */
+    if (!has_data_file(state->bs)) {
+        assert(s->data_file == state->bs->file);
+        s->data_file = NULL;
+    }
+
     return 0;
 
 fail:
@@ -1965,12 +2028,25 @@ fail:
 
 static void qcow2_reopen_commit(BDRVReopenState *state)
 {
+    BDRVQcow2State *s = state->bs->opaque;
+
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     qcow2_update_options_commit(state->bs, state->opaque);
+    if (!s->data_file) {
+        /*
+         * If we don't have an external data file, s->data_file was cleared by
+         * qcow2_reopen_prepare() and needs to be updated.
+         */
+        s->data_file = state->bs->file;
+    }
     g_free(state->opaque);
 }
 
 static void qcow2_reopen_commit_post(BDRVReopenState *state)
 {
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (state->flags & BDRV_O_RDWR) {
         Error *local_err = NULL;
 
@@ -1989,6 +2065,17 @@ static void qcow2_reopen_commit_post(BDRVReopenState *state)
 
 static void qcow2_reopen_abort(BDRVReopenState *state)
 {
+    BDRVQcow2State *s = state->bs->opaque;
+
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    if (!s->data_file) {
+        /*
+         * If we don't have an external data file, s->data_file was cleared by
+         * qcow2_reopen_prepare() and needs to be restored.
+         */
+        s->data_file = state->bs->file;
+    }
     qcow2_update_options_abort(state->bs, state->opaque);
     g_free(state->opaque);
 }
@@ -2039,11 +2126,10 @@ static void qcow2_join_options(QDict *options, QDict *old_options)
     }
 }
 
-static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
-                                              bool want_zero,
-                                              int64_t offset, int64_t count,
-                                              int64_t *pnum, int64_t *map,
-                                              BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
+                      int64_t count, int64_t *pnum, int64_t *map,
+                      BlockDriverState **file)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t host_offset;
@@ -2087,12 +2173,14 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
     {
         status |= BDRV_BLOCK_RECURSE;
     }
+    if (type == QCOW2_SUBCLUSTER_COMPRESSED) {
+        status |= BDRV_BLOCK_COMPRESSED;
+    }
     return status;
 }
 
-static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
-                                            QCowL2Meta **pl2meta,
-                                            bool link_l2)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_handle_l2meta(BlockDriverState *bs, QCowL2Meta **pl2meta, bool link_l2)
 {
     int ret = 0;
     QCowL2Meta *l2meta = *pl2meta;
@@ -2123,7 +2211,7 @@ out:
     return ret;
 }
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_preadv_encrypted(BlockDriverState *bs,
                            uint64_t host_offset,
                            uint64_t offset,
@@ -2151,7 +2239,7 @@ qcow2_co_preadv_encrypted(BlockDriverState *bs,
         return -ENOMEM;
     }
 
-    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
     ret = bdrv_co_pread(s->data_file, host_offset, bytes, buf, 0);
     if (ret < 0) {
         goto fail;
@@ -2175,7 +2263,7 @@ typedef struct Qcow2AioTask {
 
     BlockDriverState *bs;
     QCow2SubclusterType subcluster_type; /* only for read */
-    uint64_t host_offset; /* or full descriptor in compressed clusters */
+    uint64_t host_offset; /* or l2_entry for compressed read */
     uint64_t offset;
     uint64_t bytes;
     QEMUIOVector *qiov;
@@ -2224,12 +2312,10 @@ static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
     return 0;
 }
 
-static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
-                                             QCow2SubclusterType subc_type,
-                                             uint64_t host_offset,
-                                             uint64_t offset, uint64_t bytes,
-                                             QEMUIOVector *qiov,
-                                             size_t qiov_offset)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_preadv_task(BlockDriverState *bs, QCow2SubclusterType subc_type,
+                     uint64_t host_offset, uint64_t offset, uint64_t bytes,
+                     QEMUIOVector *qiov, size_t qiov_offset)
 {
     BDRVQcow2State *s = bs->opaque;
 
@@ -2243,7 +2329,7 @@ static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
     case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
         assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
 
-        BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
+        BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
         return bdrv_co_preadv_part(bs->backing, offset, bytes,
                                    qiov, qiov_offset, 0);
 
@@ -2257,7 +2343,7 @@ static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
                                              offset, bytes, qiov, qiov_offset);
         }
 
-        BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+        BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
         return bdrv_co_preadv_part(s->data_file, host_offset,
                                    bytes, qiov, qiov_offset, 0);
 
@@ -2268,7 +2354,11 @@ static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
     g_assert_not_reached();
 }
 
-static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
+/*
+ * This function can count as GRAPH_RDLOCK because qcow2_co_preadv_part() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static int coroutine_fn GRAPH_RDLOCK qcow2_co_preadv_task_entry(AioTask *task)
 {
     Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
 
@@ -2279,10 +2369,10 @@ static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
                                 t->qiov, t->qiov_offset);
 }
 
-static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
-                                             uint64_t offset, uint64_t bytes,
-                                             QEMUIOVector *qiov,
-                                             size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     QEMUIOVector *qiov, size_t qiov_offset,
+                     BdrvRequestFlags flags)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret = 0;
@@ -2402,7 +2492,8 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
  * Return 1 if the COW regions read as zeroes, 0 if not, < 0 on error.
  * Note that returning 0 does not guarantee non-zero data.
  */
-static int is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
+static int coroutine_fn GRAPH_RDLOCK
+is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
 {
     /*
      * This check is designed for optimization shortcut so it must be
@@ -2420,7 +2511,8 @@ static int is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
                                 m->cow_end.nb_bytes);
 }
 
-static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
+static int coroutine_fn GRAPH_RDLOCK
+handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
 {
     BDRVQcow2State *s = bs->opaque;
     QCowL2Meta *m;
@@ -2461,7 +2553,7 @@ static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
             return ret;
         }
 
-        BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
+        BLKDBG_CO_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
         ret = bdrv_co_pwrite_zeroes(s->data_file, start_offset, nb_bytes,
                                     BDRV_REQ_NO_FALLBACK);
         if (ret < 0) {
@@ -2483,12 +2575,10 @@ static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
  * l2meta  - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must
  *           not use it somehow after qcow2_co_pwritev_task() call
  */
-static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
-                                              uint64_t host_offset,
-                                              uint64_t offset, uint64_t bytes,
-                                              QEMUIOVector *qiov,
-                                              uint64_t qiov_offset,
-                                              QCowL2Meta *l2meta)
+static coroutine_fn GRAPH_RDLOCK
+int qcow2_co_pwritev_task(BlockDriverState *bs, uint64_t host_offset,
+                          uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
+                          uint64_t qiov_offset, QCowL2Meta *l2meta)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -2528,7 +2618,7 @@ static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
      * guest data now.
      */
     if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+        BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
         trace_qcow2_writev_data(qemu_coroutine_self(), host_offset);
         ret = bdrv_co_pwritev_part(s->data_file, host_offset,
                                    bytes, qiov, qiov_offset, 0);
@@ -2554,7 +2644,11 @@ out_locked:
     return ret;
 }
 
-static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
+/*
+ * This function can count as GRAPH_RDLOCK because qcow2_co_pwritev_part() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static coroutine_fn GRAPH_RDLOCK int qcow2_co_pwritev_task_entry(AioTask *task)
 {
     Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
 
@@ -2565,9 +2659,10 @@ static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
                                  t->l2meta);
 }
 
-static coroutine_fn int qcow2_co_pwritev_part(
-        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                      QEMUIOVector *qiov, size_t qiov_offset,
+                      BdrvRequestFlags flags)
 {
     BDRVQcow2State *s = bs->opaque;
     int offset_in_cluster;
@@ -2647,7 +2742,7 @@ fail_nometa:
     return ret;
 }
 
-static int qcow2_inactivate(BlockDriverState *bs)
+static int GRAPH_RDLOCK qcow2_inactivate(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret, result = 0;
@@ -2682,7 +2777,8 @@ static int qcow2_inactivate(BlockDriverState *bs)
     return result;
 }
 
-static void qcow2_close(BlockDriverState *bs)
+static void coroutine_mixed_fn GRAPH_RDLOCK
+qcow2_do_close(BlockDriverState *bs, bool close_data_file)
 {
     BDRVQcow2State *s = bs->opaque;
     qemu_vfree(s->l1_table);
@@ -2708,23 +2804,37 @@ static void qcow2_close(BlockDriverState *bs)
     g_free(s->image_backing_file);
     g_free(s->image_backing_format);
 
-    if (has_data_file(bs)) {
+    if (close_data_file && has_data_file(bs)) {
+        GLOBAL_STATE_CODE();
+        bdrv_graph_rdunlock_main_loop();
+        bdrv_graph_wrlock(NULL);
         bdrv_unref_child(bs, s->data_file);
+        bdrv_graph_wrunlock(NULL);
         s->data_file = NULL;
+        bdrv_graph_rdlock_main_loop();
     }
 
     qcow2_refcount_close(bs);
     qcow2_free_snapshots(bs);
 }
 
-static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
-                                                   Error **errp)
+static void GRAPH_UNLOCKED qcow2_close(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    qcow2_do_close(bs, true);
+}
+
+static void coroutine_fn GRAPH_RDLOCK
+qcow2_co_invalidate_cache(BlockDriverState *bs, Error **errp)
+{
+    ERRP_GUARD();
     BDRVQcow2State *s = bs->opaque;
+    BdrvChild *data_file;
     int flags = s->flags;
     QCryptoBlock *crypto = NULL;
     QDict *options;
-    Error *local_err = NULL;
     int ret;
 
     /*
@@ -2735,23 +2845,28 @@ static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
     crypto = s->crypto;
     s->crypto = NULL;
 
-    qcow2_close(bs);
+    /*
+     * Do not reopen s->data_file (i.e., have qcow2_do_close() not close it,
+     * and then prevent qcow2_do_open() from opening it), because this function
+     * runs in the I/O path and as such we must not invoke global-state
+     * functions like bdrv_unref_child() and bdrv_open_child().
+     */
+
+    qcow2_do_close(bs, false);
 
+    data_file = s->data_file;
     memset(s, 0, sizeof(BDRVQcow2State));
+    s->data_file = data_file;
+
     options = qdict_clone_shallow(bs->options);
 
     flags &= ~BDRV_O_INACTIVE;
     qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_do_open(bs, options, flags, &local_err);
+    ret = qcow2_do_open(bs, options, flags, false, errp);
     qemu_co_mutex_unlock(&s->lock);
     qobject_unref(options);
-    if (local_err) {
-        error_propagate_prepend(errp, local_err,
-                                "Could not reopen qcow2 layer: ");
-        bs->drv = NULL;
-        return;
-    } else if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
+    if (ret < 0) {
+        error_prepend(errp, "Could not reopen qcow2 layer: ");
         bs->drv = NULL;
         return;
     }
@@ -3034,7 +3149,7 @@ int qcow2_update_header(BlockDriverState *bs)
     }
 
     /* Write the new header */
-    ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
+    ret = bdrv_pwrite(bs->file, 0, s->cluster_size, header, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -3045,8 +3160,9 @@ fail:
     return ret;
 }
 
-static int qcow2_change_backing_file(BlockDriverState *bs,
-    const char *backing_file, const char *backing_fmt)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
+                             const char *backing_fmt)
 {
     BDRVQcow2State *s = bs->opaque;
 
@@ -3074,9 +3190,10 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
     return qcow2_update_header(bs);
 }
 
-static int qcow2_set_up_encryption(BlockDriverState *bs,
-                                   QCryptoBlockCreateOptions *cryptoopts,
-                                   Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_set_up_encryption(BlockDriverState *bs,
+                        QCryptoBlockCreateOptions *cryptoopts,
+                        Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     QCryptoBlock *crypto = NULL;
@@ -3123,9 +3240,9 @@ static int qcow2_set_up_encryption(BlockDriverState *bs,
  *
  * Returns: 0 on success, -errno on failure.
  */
-static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t new_length, PreallocMode mode,
-                                       Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+preallocate_co(BlockDriverState *bs, uint64_t offset, uint64_t new_length,
+               PreallocMode mode, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t bytes;
@@ -3168,7 +3285,7 @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
      * all of the allocated clusters (otherwise we get failing reads after
      * EOF). Extend the image to the last allocated sector.
      */
-    file_length = bdrv_getlength(s->data_file->bs);
+    file_length = bdrv_co_getlength(s->data_file->bs);
     if (file_length < 0) {
         error_setg_errno(errp, -file_length, "Could not get file size");
         ret = file_length;
@@ -3363,7 +3480,7 @@ static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
     return refcount_bits;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_UNLOCKED
 qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
 {
     BlockdevCreateOptionsQcow2 *qcow2_opts;
@@ -3395,7 +3512,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
     qcow2_opts = &create_options->u.qcow2;
 
-    bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(qcow2_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
@@ -3450,7 +3567,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     if (!qcow2_opts->has_preallocation) {
         qcow2_opts->preallocation = PREALLOC_MODE_OFF;
     }
-    if (qcow2_opts->has_backing_file &&
+    if (qcow2_opts->backing_file &&
         qcow2_opts->preallocation != PREALLOC_MODE_OFF &&
         !qcow2_opts->extended_l2)
     {
@@ -3459,7 +3576,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
         ret = -EINVAL;
         goto out;
     }
-    if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) {
+    if (qcow2_opts->has_backing_fmt && !qcow2_opts->backing_file) {
         error_setg(errp, "Backing format cannot be used without backing file");
         ret = -EINVAL;
         goto out;
@@ -3500,12 +3617,34 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
         ret = -EINVAL;
         goto out;
     }
-    if (qcow2_opts->data_file_raw && qcow2_opts->has_backing_file) {
+    if (qcow2_opts->data_file_raw && qcow2_opts->backing_file) {
         error_setg(errp, "Backing file and data-file-raw cannot be used at "
                    "the same time");
         ret = -EINVAL;
         goto out;
     }
+    if (qcow2_opts->data_file_raw &&
+        qcow2_opts->preallocation == PREALLOC_MODE_OFF)
+    {
+        /*
+         * data-file-raw means that "the external data file can be
+         * read as a consistent standalone raw image without looking
+         * at the qcow2 metadata."  It does not say that the metadata
+         * must be ignored, though (and the qcow2 driver in fact does
+         * not ignore it), so the L1/L2 tables must be present and
+         * give a 1:1 mapping, so you get the same result regardless
+         * of whether you look at the metadata or whether you ignore
+         * it.
+         */
+        qcow2_opts->preallocation = PREALLOC_MODE_METADATA;
+
+        /*
+         * Cannot use preallocation with backing files, but giving a
+         * backing file when specifying data_file_raw is an error
+         * anyway.
+         */
+        assert(!qcow2_opts->backing_file);
+    }
 
     if (qcow2_opts->data_file) {
         if (version < 3) {
@@ -3515,7 +3654,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
             ret = -EINVAL;
             goto out;
         }
-        data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp);
+        data_bs = bdrv_co_open_blockdev_ref(qcow2_opts->data_file, errp);
         if (data_bs == NULL) {
             ret = -EIO;
             goto out;
@@ -3548,8 +3687,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     }
 
     /* Create BlockBackend to write to the image */
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -3599,7 +3738,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
             cpu_to_be64(QCOW2_INCOMPAT_EXTL2);
     }
 
-    ret = blk_pwrite(blk, 0, header, cluster_size, 0);
+    ret = blk_co_pwrite(blk, 0, cluster_size, header, 0);
     g_free(header);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -3609,7 +3748,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     /* Write a refcount table with one refcount block */
     refcount_table = g_malloc0(2 * cluster_size);
     refcount_table[0] = cpu_to_be64(2 * cluster_size);
-    ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
+    ret = blk_co_pwrite(blk, cluster_size, 2 * cluster_size, refcount_table, 0);
     g_free(refcount_table);
 
     if (ret < 0) {
@@ -3617,7 +3756,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
         goto out;
     }
 
-    blk_unref(blk);
+    blk_co_unref(blk);
     blk = NULL;
 
     /*
@@ -3631,16 +3770,18 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     if (data_bs) {
         qdict_put_str(options, "data-file", data_bs->node_name);
     }
-    blk = blk_new_open(NULL, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
-                       errp);
+    blk = blk_co_new_open(NULL, NULL, options,
+                          BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
+                          errp);
     if (blk == NULL) {
         ret = -EIO;
         goto out;
     }
 
+    bdrv_graph_co_rdlock();
     ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
     if (ret < 0) {
+        bdrv_graph_co_rdunlock();
         error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
                          "header and refcount table");
         goto out;
@@ -3658,29 +3799,34 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
 
     /* Create a full header (including things like feature table) */
     ret = qcow2_update_header(blk_bs(blk));
+    bdrv_graph_co_rdunlock();
+
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not update qcow2 header");
         goto out;
     }
 
     /* Okay, now that we have a valid image, let's give it the right size */
-    ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation,
-                       0, errp);
+    ret = blk_co_truncate(blk, qcow2_opts->size, false,
+                          qcow2_opts->preallocation, 0, errp);
     if (ret < 0) {
         error_prepend(errp, "Could not resize image: ");
         goto out;
     }
 
     /* Want a backing file? There you go. */
-    if (qcow2_opts->has_backing_file) {
+    if (qcow2_opts->backing_file) {
         const char *backing_format = NULL;
 
         if (qcow2_opts->has_backing_fmt) {
             backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt);
         }
 
-        ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file,
-                                       backing_format, false);
+        bdrv_graph_co_rdlock();
+        ret = bdrv_co_change_backing_file(blk_bs(blk), qcow2_opts->backing_file,
+                                          backing_format, false);
+        bdrv_graph_co_rdunlock();
+
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
                              "with format '%s'", qcow2_opts->backing_file,
@@ -3690,14 +3836,17 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     }
 
     /* Want encryption? There you go. */
-    if (qcow2_opts->has_encrypt) {
+    if (qcow2_opts->encrypt) {
+        bdrv_graph_co_rdlock();
         ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
+        bdrv_graph_co_rdunlock();
+
         if (ret < 0) {
             goto out;
         }
     }
 
-    blk_unref(blk);
+    blk_co_unref(blk);
     blk = NULL;
 
     /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
@@ -3712,9 +3861,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     if (data_bs) {
         qdict_put_str(options, "data-file", data_bs->node_name);
     }
-    blk = blk_new_open(NULL, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
-                       errp);
+    blk = blk_co_new_open(NULL, NULL, options,
+                          BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
+                          errp);
     if (blk == NULL) {
         ret = -EIO;
         goto out;
@@ -3722,16 +3871,15 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
 
     ret = 0;
 out:
-    blk_unref(blk);
-    bdrv_unref(bs);
-    bdrv_unref(data_bs);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs);
+    bdrv_co_unref(data_bs);
     return ret;
 }
 
-static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
-                                             const char *filename,
-                                             QemuOpts *opts,
-                                             Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
+                     Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -3791,13 +3939,13 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
     }
 
     /* Create and open the file (protocol layer) */
-    ret = bdrv_create_file(filename, opts, errp);
+    ret = bdrv_co_create_file(filename, opts, errp);
     if (ret < 0) {
         goto finish;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto finish;
@@ -3806,14 +3954,14 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
     /* Create and open an external data file (protocol layer) */
     val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
     if (val) {
-        ret = bdrv_create_file(val, opts, errp);
+        ret = bdrv_co_create_file(val, opts, errp);
         if (ret < 0) {
             goto finish;
         }
 
-        data_bs = bdrv_open(val, NULL, NULL,
-                            BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                            errp);
+        data_bs = bdrv_co_open(val, NULL, NULL,
+                               BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                               errp);
         if (data_bs == NULL) {
             ret = -EIO;
             goto finish;
@@ -3847,21 +3995,26 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
 
     /* Create the qcow2 image (format layer) */
     ret = qcow2_co_create(create_options, errp);
+finish:
     if (ret < 0) {
-        goto finish;
+        bdrv_graph_co_rdlock();
+        bdrv_co_delete_file_noerr(bs);
+        bdrv_co_delete_file_noerr(data_bs);
+        bdrv_graph_co_rdunlock();
+    } else {
+        ret = 0;
     }
 
-    ret = 0;
-finish:
     qobject_unref(qdict);
-    bdrv_unref(bs);
-    bdrv_unref(data_bs);
+    bdrv_co_unref(bs);
+    bdrv_co_unref(data_bs);
     qapi_free_BlockdevCreateOptions(create_options);
     return ret;
 }
 
 
-static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
+static bool coroutine_fn GRAPH_RDLOCK
+is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     int64_t nr;
     int res;
@@ -3882,7 +4035,7 @@ static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
      * backing file. So, we need a loop.
      */
     do {
-        res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
+        res = bdrv_co_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
         offset += nr;
         bytes -= nr;
     } while (res >= 0 && (res & BDRV_BLOCK_ZERO) && nr && bytes);
@@ -3890,8 +4043,9 @@ static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
     return res >= 0 && (res & BDRV_BLOCK_ZERO) && bytes == 0;
 }
 
-static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       BdrvRequestFlags flags)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -3945,8 +4099,8 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
     return ret;
 }
 
-static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
-                                          int64_t offset, int bytes)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -3974,11 +4128,11 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_copy_range_from(BlockDriverState *bs,
-                         BdrvChild *src, uint64_t src_offset,
-                         BdrvChild *dst, uint64_t dst_offset,
-                         uint64_t bytes, BdrvRequestFlags read_flags,
+                         BdrvChild *src, int64_t src_offset,
+                         BdrvChild *dst, int64_t dst_offset,
+                         int64_t bytes, BdrvRequestFlags read_flags,
                          BdrvRequestFlags write_flags)
 {
     BDRVQcow2State *s = bs->opaque;
@@ -4007,7 +4161,7 @@ qcow2_co_copy_range_from(BlockDriverState *bs,
         case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
         case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
             if (bs->backing && bs->backing->bs) {
-                int64_t backing_length = bdrv_getlength(bs->backing->bs);
+                int64_t backing_length = bdrv_co_getlength(bs->backing->bs);
                 if (src_offset >= backing_length) {
                     cur_write_flags |= BDRV_REQ_ZERO_WRITE;
                 } else {
@@ -4057,11 +4211,11 @@ out:
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_copy_range_to(BlockDriverState *bs,
-                       BdrvChild *src, uint64_t src_offset,
-                       BdrvChild *dst, uint64_t dst_offset,
-                       uint64_t bytes, BdrvRequestFlags read_flags,
+                       BdrvChild *src, int64_t src_offset,
+                       BdrvChild *dst, int64_t dst_offset,
+                       int64_t bytes, BdrvRequestFlags read_flags,
                        BdrvRequestFlags write_flags)
 {
     BDRVQcow2State *s = bs->opaque;
@@ -4125,9 +4279,9 @@ fail:
     return ret;
 }
 
-static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
-                                          bool exact, PreallocMode prealloc,
-                                          BdrvRequestFlags flags, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
+                  PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t old_length;
@@ -4202,7 +4356,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
             goto fail;
         }
 
-        old_file_size = bdrv_getlength(bs->file->bs);
+        old_file_size = bdrv_co_getlength(bs->file->bs);
         if (old_file_size < 0) {
             error_setg_errno(errp, -old_file_size,
                              "Failed to inquire current file length");
@@ -4239,6 +4393,18 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
             error_setg_errno(errp, -ret, "Failed to grow the L1 table");
             goto fail;
         }
+
+        if (data_file_is_raw(bs) && prealloc == PREALLOC_MODE_OFF) {
+            /*
+             * When creating a qcow2 image with data-file-raw, we enforce
+             * at least prealloc=metadata, so that the L1/L2 tables are
+             * fully allocated and reading from the data file will return
+             * the same data as reading from the qcow2 image.  When the
+             * image is grown, we must consequently preallocate the
+             * metadata structures to cover the added area.
+             */
+            prealloc = PREALLOC_MODE_METADATA;
+        }
     }
 
     switch (prealloc) {
@@ -4283,7 +4449,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
             break;
         }
 
-        old_file_size = bdrv_getlength(bs->file->bs);
+        old_file_size = bdrv_co_getlength(bs->file->bs);
         if (old_file_size < 0) {
             error_setg_errno(errp, -old_file_size,
                              "Failed to inquire current file length");
@@ -4467,8 +4633,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
 
     /* write updated header.size */
     offset = cpu_to_be64(offset);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
-                           &offset, sizeof(offset));
+    ret = bdrv_co_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+                              sizeof(offset), &offset, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Failed to update the image size");
         goto fail;
@@ -4489,7 +4655,7 @@ fail:
     return ret;
 }
 
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
                                  uint64_t offset, uint64_t bytes,
                                  QEMUIOVector *qiov, size_t qiov_offset)
@@ -4540,7 +4706,7 @@ qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
         goto fail;
     }
 
-    BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
+    BLKDBG_CO_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
     ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
     if (ret < 0) {
         goto fail;
@@ -4553,7 +4719,13 @@ fail:
     return ret;
 }
 
-static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
+/*
+ * This function can count as GRAPH_RDLOCK because
+ * qcow2_co_pwritev_compressed_part() holds the graph lock and keeps it until
+ * this coroutine has terminated.
+ */
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_pwritev_compressed_task_entry(AioTask *task)
 {
     Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
 
@@ -4567,9 +4739,9 @@ static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
  * XXX: put compressed sectors first, then all the cluster aligned
  * tables to avoid losing bytes in alignment
  */
-static coroutine_fn int
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
-                                 uint64_t offset, uint64_t bytes,
+                                 int64_t offset, int64_t bytes,
                                  QEMUIOVector *qiov, size_t qiov_offset)
 {
     BDRVQcow2State *s = bs->opaque;
@@ -4585,7 +4757,7 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
          * align end of file to a sector boundary to ease reading with
          * sector based I/Os
          */
-        int64_t len = bdrv_getlength(bs->file->bs);
+        int64_t len = bdrv_co_getlength(bs->file->bs);
         if (len < 0) {
             return len;
         }
@@ -4630,24 +4802,21 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_preadv_compressed(BlockDriverState *bs,
-                           uint64_t cluster_descriptor,
+                           uint64_t l2_entry,
                            uint64_t offset,
                            uint64_t bytes,
                            QEMUIOVector *qiov,
                            size_t qiov_offset)
 {
     BDRVQcow2State *s = bs->opaque;
-    int ret = 0, csize, nb_csectors;
+    int ret = 0, csize;
     uint64_t coffset;
     uint8_t *buf, *out_buf;
     int offset_in_cluster = offset_into_cluster(s, offset);
 
-    coffset = cluster_descriptor & s->cluster_offset_mask;
-    nb_csectors = ((cluster_descriptor >> s->csize_shift) & s->csize_mask) + 1;
-    csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
-        (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK);
+    qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
 
     buf = g_try_malloc(csize);
     if (!buf) {
@@ -4656,7 +4825,7 @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
 
     out_buf = qemu_blockalign(bs, s->cluster_size);
 
-    BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
     ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
     if (ret < 0) {
         goto fail;
@@ -4676,7 +4845,7 @@ fail:
     return ret;
 }
 
-static int make_completely_empty(BlockDriverState *bs)
+static int GRAPH_RDLOCK make_completely_empty(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     Error *local_err = NULL;
@@ -4748,7 +4917,7 @@ static int make_completely_empty(BlockDriverState *bs)
     l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
     l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
-                           &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
+                           sizeof(l1_ofs_rt_ofs_cls), &l1_ofs_rt_ofs_cls, 0);
     if (ret < 0) {
         goto fail_broken_refcounts;
     }
@@ -4779,8 +4948,8 @@ static int make_completely_empty(BlockDriverState *bs)
 
     /* Enter the first refblock into the reftable */
     rt_entry = cpu_to_be64(2 * s->cluster_size);
-    ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
-                           &rt_entry, sizeof(rt_entry));
+    ret = bdrv_pwrite_sync(bs->file, s->cluster_size, sizeof(rt_entry),
+                           &rt_entry, 0);
     if (ret < 0) {
         goto fail_broken_refcounts;
     }
@@ -4827,7 +4996,7 @@ fail:
     return ret;
 }
 
-static int qcow2_make_empty(BlockDriverState *bs)
+static int GRAPH_RDLOCK qcow2_make_empty(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t offset, end_offset;
@@ -4871,7 +5040,7 @@ static int qcow2_make_empty(BlockDriverState *bs)
     return ret;
 }
 
-static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
+static coroutine_fn GRAPH_RDLOCK int qcow2_co_flush_to_os(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret;
@@ -5051,26 +5220,27 @@ err:
     return NULL;
 }
 
-static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int coroutine_fn
+qcow2_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     BDRVQcow2State *s = bs->opaque;
     bdi->cluster_size = s->cluster_size;
+    bdi->subcluster_size = s->subcluster_size;
     bdi->vm_state_offset = qcow2_vm_state_offset(s);
+    bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY;
     return 0;
 }
 
-static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs,
-                                                  Error **errp)
+static ImageInfoSpecific * GRAPH_RDLOCK
+qcow2_get_specific_info(BlockDriverState *bs, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     ImageInfoSpecific *spec_info;
     QCryptoBlockInfo *encrypt_info = NULL;
-    Error *local_err = NULL;
 
     if (s->crypto != NULL) {
-        encrypt_info = qcrypto_block_get_info(s->crypto, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
+        encrypt_info = qcrypto_block_get_info(s->crypto, errp);
+        if (!encrypt_info) {
             return NULL;
         }
     }
@@ -5087,9 +5257,7 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs,
         };
     } else if (s->qcow_version == 3) {
         Qcow2BitmapInfoList *bitmaps;
-        bitmaps = qcow2_get_bitmap_info_list(bs, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
+        if (!qcow2_get_bitmap_info_list(bs, &bitmaps, errp)) {
             qapi_free_ImageInfoSpecific(spec_info);
             qapi_free_QCryptoBlockInfo(encrypt_info);
             return NULL;
@@ -5107,7 +5275,6 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs,
             .refcount_bits      = s->refcount_bits,
             .has_bitmaps        = !!bitmaps,
             .bitmaps            = bitmaps,
-            .has_data_file      = !!s->image_data_file,
             .data_file          = g_strdup(s->image_data_file),
             .has_data_file_raw  = has_data_file(bs),
             .data_file_raw      = data_file_is_raw(bs),
@@ -5138,14 +5305,14 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs,
         memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
         qapi_free_QCryptoBlockInfo(encrypt_info);
 
-        spec_info->u.qcow2.data->has_encrypt = true;
         spec_info->u.qcow2.data->encrypt = qencrypt;
     }
 
     return spec_info;
 }
 
-static int qcow2_has_zero_init(BlockDriverState *bs)
+static int coroutine_mixed_fn GRAPH_RDLOCK
+qcow2_has_zero_init(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     bool preallocated;
@@ -5172,33 +5339,97 @@ static int qcow2_has_zero_init(BlockDriverState *bs)
     }
 }
 
-static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
-                              int64_t pos)
+/*
+ * Check the request to vmstate. On success return
+ *      qcow2_vm_state_offset(bs) + @pos
+ */
+static int64_t qcow2_check_vmstate_request(BlockDriverState *bs,
+                                           QEMUIOVector *qiov, int64_t pos)
 {
     BDRVQcow2State *s = bs->opaque;
+    int64_t vmstate_offset = qcow2_vm_state_offset(s);
+    int ret;
 
-    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
-    return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
-                                         qiov->size, qiov, 0, 0);
+    /* Incoming requests must be OK */
+    bdrv_check_qiov_request(pos, qiov->size, qiov, 0, &error_abort);
+
+    if (INT64_MAX - pos < vmstate_offset) {
+        return -EIO;
+    }
+
+    pos += vmstate_offset;
+    ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return pos;
 }
 
-static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
-                              int64_t pos)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
-    BDRVQcow2State *s = bs->opaque;
+    int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
+    if (offset < 0) {
+        return offset;
+    }
+
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
+    return bs->drv->bdrv_co_pwritev_part(bs, offset, qiov->size, qiov, 0, 0);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_co_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
+{
+    int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
+    if (offset < 0) {
+        return offset;
+    }
+
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
+    return bs->drv->bdrv_co_preadv_part(bs, offset, qiov->size, qiov, 0, 0);
+}
+
+static int GRAPH_RDLOCK qcow2_has_compressed_clusters(BlockDriverState *bs)
+{
+    int64_t offset = 0;
+    int64_t bytes = bdrv_getlength(bs);
+
+    if (bytes < 0) {
+        return bytes;
+    }
+
+    while (bytes != 0) {
+        int ret;
+        QCow2SubclusterType type;
+        unsigned int cur_bytes = MIN(INT_MAX, bytes);
+        uint64_t host_offset;
+
+        ret = qcow2_get_host_offset(bs, offset, &cur_bytes, &host_offset,
+                                    &type);
+        if (ret < 0) {
+            return ret;
+        }
+
+        if (type == QCOW2_SUBCLUSTER_COMPRESSED) {
+            return 1;
+        }
 
-    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
-    return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos,
-                                        qiov->size, qiov, 0, 0);
+        offset += cur_bytes;
+        bytes -= cur_bytes;
+    }
+
+    return 0;
 }
 
 /*
  * Downgrades an image's version. To achieve this, any incompatible features
  * have to be removed.
  */
-static int qcow2_downgrade(BlockDriverState *bs, int target_version,
-                           BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
-                           Error **errp)
+static int GRAPH_RDLOCK
+qcow2_downgrade(BlockDriverState *bs, int target_version,
+                BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+                Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     int current_version = s->qcow_version;
@@ -5249,9 +5480,10 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
      * the first place; if that happens nonetheless, returning -ENOTSUP is the
      * best thing to do anyway */
 
-    if (s->incompatible_features) {
+    if (s->incompatible_features & ~QCOW2_INCOMPAT_COMPRESSION) {
         error_setg(errp, "Cannot downgrade an image with incompatible features "
-                   "%#" PRIx64 " set", s->incompatible_features);
+                   "0x%" PRIx64 " set",
+                   s->incompatible_features & ~QCOW2_INCOMPAT_COMPRESSION);
         return -ENOTSUP;
     }
 
@@ -5269,6 +5501,27 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
         return ret;
     }
 
+    if (s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION) {
+        ret = qcow2_has_compressed_clusters(bs);
+        if (ret < 0) {
+            error_setg(errp, "Failed to check block status");
+            return -EINVAL;
+        }
+        if (ret) {
+            error_setg(errp, "Cannot downgrade an image with zstd compression "
+                       "type and existing compressed clusters");
+            return -ENOTSUP;
+        }
+        /*
+         * No compressed clusters for now, so just chose default zlib
+         * compression.
+         */
+        s->incompatible_features &= ~QCOW2_INCOMPAT_COMPRESSION;
+        s->compression_type = QCOW2_COMPRESSION_TYPE_ZLIB;
+    }
+
+    assert(s->incompatible_features == 0);
+
     s->qcow_version = target_version;
     ret = qcow2_update_header(bs);
     if (ret < 0) {
@@ -5284,9 +5537,10 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
  * features of older versions, some things may have to be presented
  * differently.
  */
-static int qcow2_upgrade(BlockDriverState *bs, int target_version,
-                         BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
-                         Error **errp)
+static int GRAPH_RDLOCK
+qcow2_upgrade(BlockDriverState *bs, int target_version,
+              BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+              Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     bool need_snapshot_update;
@@ -5412,11 +5666,10 @@ static void qcow2_amend_helper_cb(BlockDriverState *bs,
                              info->original_cb_opaque);
 }
 
-static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
-                               BlockDriverAmendStatusCB *status_cb,
-                               void *cb_opaque,
-                               bool force,
-                               Error **errp)
+static int GRAPH_RDLOCK
+qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
+                    BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+                    bool force, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     int old_version = s->qcow_version, new_version = old_version;
@@ -5591,15 +5844,10 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
     if (backing_file || backing_format) {
         if (g_strcmp0(backing_file, s->image_backing_file) ||
             g_strcmp0(backing_format, s->image_backing_format)) {
-            warn_report("Deprecated use of amend to alter the backing file; "
-                        "use qemu-img rebase instead");
-        }
-        ret = qcow2_change_backing_file(bs,
-                    backing_file ?: s->image_backing_file,
-                    backing_format ?: s->image_backing_format);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Failed to change the backing file");
-            return ret;
+            error_setg(errp, "Cannot amend the backing file");
+            error_append_hint(errp,
+                              "You can use 'qemu-img rebase' instead.\n");
+            return -EINVAL;
         }
     }
 
@@ -5678,7 +5926,7 @@ static int coroutine_fn qcow2_co_amend(BlockDriverState *bs,
     BDRVQcow2State *s = bs->opaque;
     int ret = 0;
 
-    if (qopts->has_encrypt) {
+    if (qopts->encrypt) {
         if (!s->crypto) {
             error_setg(errp, "image is not encrypted, can't amend");
             return -EOPNOTSUPP;
@@ -5743,7 +5991,7 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
 
     node_name = bdrv_get_node_name(bs);
     qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
-                                          *node_name != '\0', node_name,
+                                          *node_name ? node_name : NULL,
                                           message, offset >= 0, offset,
                                           size >= 0, size,
                                           fatal);
@@ -5876,64 +6124,64 @@ static const char *const qcow2_strong_runtime_opts[] = {
 };
 
 BlockDriver bdrv_qcow2 = {
-    .format_name        = "qcow2",
-    .instance_size      = sizeof(BDRVQcow2State),
-    .bdrv_probe         = qcow2_probe,
-    .bdrv_open          = qcow2_open,
-    .bdrv_close         = qcow2_close,
-    .bdrv_reopen_prepare  = qcow2_reopen_prepare,
-    .bdrv_reopen_commit   = qcow2_reopen_commit,
-    .bdrv_reopen_commit_post = qcow2_reopen_commit_post,
-    .bdrv_reopen_abort    = qcow2_reopen_abort,
-    .bdrv_join_options    = qcow2_join_options,
-    .bdrv_child_perm      = bdrv_default_perms,
-    .bdrv_co_create_opts  = qcow2_co_create_opts,
-    .bdrv_co_create       = qcow2_co_create,
-    .bdrv_has_zero_init   = qcow2_has_zero_init,
-    .bdrv_co_block_status = qcow2_co_block_status,
-
-    .bdrv_co_preadv_part    = qcow2_co_preadv_part,
-    .bdrv_co_pwritev_part   = qcow2_co_pwritev_part,
-    .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
-
-    .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
-    .bdrv_co_pdiscard       = qcow2_co_pdiscard,
-    .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
-    .bdrv_co_copy_range_to  = qcow2_co_copy_range_to,
-    .bdrv_co_truncate       = qcow2_co_truncate,
-    .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
-    .bdrv_make_empty        = qcow2_make_empty,
-
-    .bdrv_snapshot_create   = qcow2_snapshot_create,
-    .bdrv_snapshot_goto     = qcow2_snapshot_goto,
-    .bdrv_snapshot_delete   = qcow2_snapshot_delete,
-    .bdrv_snapshot_list     = qcow2_snapshot_list,
-    .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
-    .bdrv_measure           = qcow2_measure,
-    .bdrv_get_info          = qcow2_get_info,
-    .bdrv_get_specific_info = qcow2_get_specific_info,
-
-    .bdrv_save_vmstate    = qcow2_save_vmstate,
-    .bdrv_load_vmstate    = qcow2_load_vmstate,
-
-    .is_format                  = true,
-    .supports_backing           = true,
-    .bdrv_change_backing_file   = qcow2_change_backing_file,
-
-    .bdrv_refresh_limits        = qcow2_refresh_limits,
-    .bdrv_co_invalidate_cache   = qcow2_co_invalidate_cache,
-    .bdrv_inactivate            = qcow2_inactivate,
-
-    .create_opts         = &qcow2_create_opts,
-    .amend_opts          = &qcow2_amend_opts,
-    .strong_runtime_opts = qcow2_strong_runtime_opts,
-    .mutable_opts        = mutable_opts,
-    .bdrv_co_check       = qcow2_co_check,
-    .bdrv_amend_options  = qcow2_amend_options,
-    .bdrv_co_amend       = qcow2_co_amend,
-
-    .bdrv_detach_aio_context  = qcow2_detach_aio_context,
-    .bdrv_attach_aio_context  = qcow2_attach_aio_context,
+    .format_name                        = "qcow2",
+    .instance_size                      = sizeof(BDRVQcow2State),
+    .bdrv_probe                         = qcow2_probe,
+    .bdrv_open                          = qcow2_open,
+    .bdrv_close                         = qcow2_close,
+    .bdrv_reopen_prepare                = qcow2_reopen_prepare,
+    .bdrv_reopen_commit                 = qcow2_reopen_commit,
+    .bdrv_reopen_commit_post            = qcow2_reopen_commit_post,
+    .bdrv_reopen_abort                  = qcow2_reopen_abort,
+    .bdrv_join_options                  = qcow2_join_options,
+    .bdrv_child_perm                    = bdrv_default_perms,
+    .bdrv_co_create_opts                = qcow2_co_create_opts,
+    .bdrv_co_create                     = qcow2_co_create,
+    .bdrv_has_zero_init                 = qcow2_has_zero_init,
+    .bdrv_co_block_status               = qcow2_co_block_status,
+
+    .bdrv_co_preadv_part                = qcow2_co_preadv_part,
+    .bdrv_co_pwritev_part               = qcow2_co_pwritev_part,
+    .bdrv_co_flush_to_os                = qcow2_co_flush_to_os,
+
+    .bdrv_co_pwrite_zeroes              = qcow2_co_pwrite_zeroes,
+    .bdrv_co_pdiscard                   = qcow2_co_pdiscard,
+    .bdrv_co_copy_range_from            = qcow2_co_copy_range_from,
+    .bdrv_co_copy_range_to              = qcow2_co_copy_range_to,
+    .bdrv_co_truncate                   = qcow2_co_truncate,
+    .bdrv_co_pwritev_compressed_part    = qcow2_co_pwritev_compressed_part,
+    .bdrv_make_empty                    = qcow2_make_empty,
+
+    .bdrv_snapshot_create               = qcow2_snapshot_create,
+    .bdrv_snapshot_goto                 = qcow2_snapshot_goto,
+    .bdrv_snapshot_delete               = qcow2_snapshot_delete,
+    .bdrv_snapshot_list                 = qcow2_snapshot_list,
+    .bdrv_snapshot_load_tmp             = qcow2_snapshot_load_tmp,
+    .bdrv_measure                       = qcow2_measure,
+    .bdrv_co_get_info                   = qcow2_co_get_info,
+    .bdrv_get_specific_info             = qcow2_get_specific_info,
+
+    .bdrv_co_save_vmstate               = qcow2_co_save_vmstate,
+    .bdrv_co_load_vmstate               = qcow2_co_load_vmstate,
+
+    .is_format                          = true,
+    .supports_backing                   = true,
+    .bdrv_co_change_backing_file        = qcow2_co_change_backing_file,
+
+    .bdrv_refresh_limits                = qcow2_refresh_limits,
+    .bdrv_co_invalidate_cache           = qcow2_co_invalidate_cache,
+    .bdrv_inactivate                    = qcow2_inactivate,
+
+    .create_opts                        = &qcow2_create_opts,
+    .amend_opts                         = &qcow2_amend_opts,
+    .strong_runtime_opts                = qcow2_strong_runtime_opts,
+    .mutable_opts                       = mutable_opts,
+    .bdrv_co_check                      = qcow2_co_check,
+    .bdrv_amend_options                 = qcow2_amend_options,
+    .bdrv_co_amend                      = qcow2_co_amend,
+
+    .bdrv_detach_aio_context            = qcow2_detach_aio_context,
+    .bdrv_attach_aio_context            = qcow2_attach_aio_context,
 
     .bdrv_supports_persistent_dirty_bitmap =
             qcow2_supports_persistent_dirty_bitmap,
index 0678073b742f1b6ae2f1e5fcd3daf430fba05b5d..a9e3481c6e02cf4fdef777359477a24e97d937c0 100644 (file)
 
 /* Defined in the qcow2 spec (compressed cluster descriptor) */
 #define QCOW2_COMPRESSED_SECTOR_SIZE 512U
-#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL))
 
 /* Must be at least 2 to cover COW */
 #define MIN_L2_CACHE_SIZE 2 /* cache entries */
 #define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
 #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
 #define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
+#define QCOW2_OPT_DISCARD_NO_UNREF "discard-no-unref"
 #define QCOW2_OPT_OVERLAP "overlap-check"
 #define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
 #define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
@@ -386,6 +386,8 @@ typedef struct BDRVQcow2State {
 
     bool discard_passthrough[QCOW2_DISCARD_MAX];
 
+    bool discard_no_unref;
+
     int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
     bool signaled_corruption;
 
@@ -587,10 +589,12 @@ typedef enum QCow2MetadataOverlap {
     (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)
 
 #define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define L1E_RESERVED_MASK 0x7f000000000001ffULL
 #define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
-#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
+#define L2E_STD_RESERVED_MASK 0x3f000000000001feULL
 
 #define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
+#define REFT_RESERVED_MASK 0x1ffULL
 
 #define INV_OFFSET (-1ULL)
 
@@ -637,7 +641,7 @@ static inline void set_l2_bitmap(BDRVQcow2State *s, uint64_t *l2_slice,
     l2_slice[idx + 1] = cpu_to_be64(bitmap);
 }
 
-static inline bool has_data_file(BlockDriverState *bs)
+static inline bool GRAPH_RDLOCK has_data_file(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     return (s->data_file != bs->file);
@@ -705,8 +709,8 @@ static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s)
     return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
 }
 
-static inline QCow2ClusterType qcow2_get_cluster_type(BlockDriverState *bs,
-                                                      uint64_t l2_entry)
+static inline QCow2ClusterType GRAPH_RDLOCK
+qcow2_get_cluster_type(BlockDriverState *bs, uint64_t l2_entry)
 {
     BDRVQcow2State *s = bs->opaque;
 
@@ -739,7 +743,7 @@ static inline QCow2ClusterType qcow2_get_cluster_type(BlockDriverState *bs,
  * (this checks the whole entry and bitmap, not only the bits related
  * to subcluster @sc_index).
  */
-static inline
+static inline GRAPH_RDLOCK
 QCow2SubclusterType qcow2_get_subcluster_type(BlockDriverState *bs,
                                               uint64_t l2_entry,
                                               uint64_t l2_bitmap,
@@ -830,14 +834,14 @@ int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
                                      int refcount_order, bool generous_increase,
                                      uint64_t *refblock_count);
 
-int qcow2_mark_dirty(BlockDriverState *bs);
-int qcow2_mark_corrupt(BlockDriverState *bs);
-int qcow2_mark_consistent(BlockDriverState *bs);
-int qcow2_update_header(BlockDriverState *bs);
+int GRAPH_RDLOCK qcow2_mark_dirty(BlockDriverState *bs);
+int GRAPH_RDLOCK qcow2_mark_corrupt(BlockDriverState *bs);
+int GRAPH_RDLOCK qcow2_update_header(BlockDriverState *bs);
 
-void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
-                             int64_t size, const char *message_format, ...)
-                             GCC_FMT_ATTR(5, 6);
+void GRAPH_RDLOCK
+qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
+                        int64_t size, const char *message_format, ...)
+                        G_GNUC_PRINTF(5, 6);
 
 int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
                          uint64_t entries, size_t entry_len,
@@ -845,154 +849,211 @@ int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
                          Error **errp);
 
 /* qcow2-refcount.c functions */
-int qcow2_refcount_init(BlockDriverState *bs);
+int coroutine_fn GRAPH_RDLOCK qcow2_refcount_init(BlockDriverState *bs);
 void qcow2_refcount_close(BlockDriverState *bs);
 
-int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
-                       uint64_t *refcount);
-
-int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
-                                  uint64_t addend, bool decrease,
-                                  enum qcow2_discard_type type);
-
-int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset,
-                            uint64_t additional_clusters, bool exact_size,
-                            int new_refblock_index,
-                            uint64_t new_refblock_offset);
-
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
-int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
-                                int64_t nb_clusters);
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
-void qcow2_free_clusters(BlockDriverState *bs,
-                          int64_t offset, int64_t size,
-                          enum qcow2_discard_type type);
-void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
-                            enum qcow2_discard_type type);
-
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
-    int64_t l1_table_offset, int l1_size, int addend);
-
-int coroutine_fn qcow2_flush_caches(BlockDriverState *bs);
-int coroutine_fn qcow2_write_caches(BlockDriverState *bs);
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                          BdrvCheckMode fix);
-
-void qcow2_process_discards(BlockDriverState *bs, int ret);
-
-int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
-                                 int64_t size);
-int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
-                                  int64_t size, bool data_file);
-int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
-                             void **refcount_table,
-                             int64_t *refcount_table_size,
-                             int64_t offset, int64_t size);
-
-int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
-                                BlockDriverAmendStatusCB *status_cb,
-                                void *cb_opaque, Error **errp);
-int qcow2_shrink_reftable(BlockDriverState *bs);
-int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
-int qcow2_detect_metadata_preallocation(BlockDriverState *bs);
+int GRAPH_RDLOCK qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
+                                    uint64_t *refcount);
+
+int GRAPH_RDLOCK
+qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
+                              uint64_t addend, bool decrease,
+                              enum qcow2_discard_type type);
+
+int64_t GRAPH_RDLOCK
+qcow2_refcount_area(BlockDriverState *bs, uint64_t offset,
+                    uint64_t additional_clusters, bool exact_size,
+                    int new_refblock_index,
+                    uint64_t new_refblock_offset);
+
+int64_t GRAPH_RDLOCK
+qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
+
+int64_t GRAPH_RDLOCK coroutine_fn
+qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+                        int64_t nb_clusters);
+
+int64_t coroutine_fn GRAPH_RDLOCK qcow2_alloc_bytes(BlockDriverState *bs, int size);
+void GRAPH_RDLOCK qcow2_free_clusters(BlockDriverState *bs,
+                                      int64_t offset, int64_t size,
+                                      enum qcow2_discard_type type);
+void GRAPH_RDLOCK
+qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
+                       enum qcow2_discard_type type);
+
+int GRAPH_RDLOCK
+qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset,
+                               int l1_size, int addend);
+
+int GRAPH_RDLOCK qcow2_flush_caches(BlockDriverState *bs);
+int GRAPH_RDLOCK qcow2_write_caches(BlockDriverState *bs);
+int coroutine_fn qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+                                       BdrvCheckMode fix);
+
+void GRAPH_RDLOCK qcow2_process_discards(BlockDriverState *bs, int ret);
+
+int GRAPH_RDLOCK
+qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
+                             int64_t size);
+int GRAPH_RDLOCK
+qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
+                              int64_t size, bool data_file);
+
+int coroutine_fn qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
+                                          void **refcount_table,
+                                          int64_t *refcount_table_size,
+                                          int64_t offset, int64_t size);
+
+int GRAPH_RDLOCK
+qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
+                            BlockDriverAmendStatusCB *status_cb,
+                            void *cb_opaque, Error **errp);
+int coroutine_fn GRAPH_RDLOCK qcow2_shrink_reftable(BlockDriverState *bs);
+
+int64_t coroutine_fn GRAPH_RDLOCK
+qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_detect_metadata_preallocation(BlockDriverState *bs);
 
 /* qcow2-cluster.c functions */
-int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
-                        bool exact_size);
-int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size);
-int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
+int GRAPH_RDLOCK
+qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size);
+
+int GRAPH_RDLOCK qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
                           uint8_t *buf, int nb_sectors, bool enc, Error **errp);
 
-int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset,
-                          unsigned int *bytes, uint64_t *host_offset,
-                          QCow2SubclusterType *subcluster_type);
-int qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset,
-                            unsigned int *bytes, uint64_t *host_offset,
-                            QCowL2Meta **m);
-int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
-                                          uint64_t offset,
-                                          int compressed_size,
-                                          uint64_t *host_offset);
-
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
-                          uint64_t bytes, enum qcow2_discard_type type,
-                          bool full_discard);
-int qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset,
-                             uint64_t bytes, int flags);
-
-int qcow2_expand_zero_clusters(BlockDriverState *bs,
-                               BlockDriverAmendStatusCB *status_cb,
-                               void *cb_opaque);
+int GRAPH_RDLOCK
+qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset,
+                      unsigned int *bytes, uint64_t *host_offset,
+                      QCow2SubclusterType *subcluster_type);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset,
+                        unsigned int *bytes, uint64_t *host_offset,
+                        QCowL2Meta **m);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset,
+                                      int compressed_size, uint64_t *host_offset);
+void GRAPH_RDLOCK
+qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
+                                uint64_t *coffset, int *csize);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
+
+void coroutine_fn GRAPH_RDLOCK
+qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
+
+int GRAPH_RDLOCK
+qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                      enum qcow2_discard_type type, bool full_discard);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                         int flags);
+
+int GRAPH_RDLOCK
+qcow2_expand_zero_clusters(BlockDriverState *bs,
+                           BlockDriverAmendStatusCB *status_cb,
+                           void *cb_opaque);
 
 /* qcow2-snapshot.c functions */
-int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
-int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
-int qcow2_snapshot_delete(BlockDriverState *bs,
-                          const char *snapshot_id,
-                          const char *name,
-                          Error **errp);
-int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
-int qcow2_snapshot_load_tmp(BlockDriverState *bs,
-                            const char *snapshot_id,
-                            const char *name,
-                            Error **errp);
+int GRAPH_RDLOCK
+qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
+
+int GRAPH_RDLOCK
+qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
+
+int GRAPH_RDLOCK
+qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id,
+                          const char *name, Error **errp);
+
+int GRAPH_RDLOCK
+qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
+
+int GRAPH_RDLOCK
+qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_id,
+                        const char *name, Error **errp);
 
 void qcow2_free_snapshots(BlockDriverState *bs);
-int qcow2_read_snapshots(BlockDriverState *bs, Error **errp);
-int qcow2_write_snapshots(BlockDriverState *bs);
+int coroutine_fn GRAPH_RDLOCK
+qcow2_read_snapshots(BlockDriverState *bs, Error **errp);
+int GRAPH_RDLOCK qcow2_write_snapshots(BlockDriverState *bs);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_check_read_snapshot_table(BlockDriverState *bs, BdrvCheckResult *result,
+                                BdrvCheckMode fix);
 
-int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
-                                                 BdrvCheckResult *result,
-                                                 BdrvCheckMode fix);
-int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
-                                                BdrvCheckResult *result,
-                                                BdrvCheckMode fix);
+int coroutine_fn GRAPH_RDLOCK
+qcow2_check_fix_snapshot_table(BlockDriverState *bs, BdrvCheckResult *result,
+                               BdrvCheckMode fix);
 
 /* qcow2-cache.c functions */
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
-                               unsigned table_size);
+Qcow2Cache * GRAPH_RDLOCK
+qcow2_cache_create(BlockDriverState *bs, int num_tables, unsigned table_size);
+
 int qcow2_cache_destroy(Qcow2Cache *c);
 
 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
-int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
-int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
-    Qcow2Cache *dependency);
+int GRAPH_RDLOCK qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
+int GRAPH_RDLOCK qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
+int GRAPH_RDLOCK qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
+                                            Qcow2Cache *dependency);
 void qcow2_cache_depends_on_flush(Qcow2Cache *c);
 
 void qcow2_cache_clean_unused(Qcow2Cache *c);
-int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
+int GRAPH_RDLOCK qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
+
+int GRAPH_RDLOCK
+qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+                void **table);
+
+int GRAPH_RDLOCK
+qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+                      void **table);
 
-int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
-    void **table);
-int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
-    void **table);
 void qcow2_cache_put(Qcow2Cache *c, void **table);
 void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset);
 void qcow2_cache_discard(Qcow2Cache *c, void *table);
 
 /* qcow2-bitmap.c functions */
-int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                                  void **refcount_table,
-                                  int64_t *refcount_table_size);
-bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp);
-Qcow2BitmapInfoList *qcow2_get_bitmap_info_list(BlockDriverState *bs,
-                                                Error **errp);
-int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
-int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp);
-void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs,
-                                          bool release_stored, Error **errp);
-int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
-bool qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs,
-                                         const char *name,
-                                         uint32_t granularity,
-                                         Error **errp);
-int qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs,
-                                            const char *name,
-                                            Error **errp);
+int coroutine_fn GRAPH_RDLOCK
+qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+                              void **refcount_table,
+                              int64_t *refcount_table_size);
+
+bool coroutine_fn GRAPH_RDLOCK
+qcow2_load_dirty_bitmaps(BlockDriverState *bs, bool *header_updated,
+                         Error **errp);
+
+bool GRAPH_RDLOCK
+qcow2_get_bitmap_info_list(BlockDriverState *bs,
+                           Qcow2BitmapInfoList **info_list, Error **errp);
+
+int GRAPH_RDLOCK qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
+int GRAPH_RDLOCK qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp);
+
+bool GRAPH_RDLOCK
+qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, bool release_stored,
+                                     Error **errp);
+
+bool coroutine_fn GRAPH_RDLOCK
+qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                    uint32_t granularity, Error **errp);
+
+int coroutine_fn GRAPH_RDLOCK
+qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                        Error **errp);
+
 bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs);
 uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs,
                                                 uint32_t cluster_size);
index 418033ee24e25ccf1ef29d00433aa1c4e765f9c4..6a01b94f9c9c27352d3fec2c5c6777d484a35213 100644 (file)
@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "block/block-io.h"
 #include "qed.h"
 
 typedef struct {
@@ -106,7 +107,8 @@ static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
 /**
  * Descend tables and check each cluster is referenced once only
  */
-static int coroutine_fn qed_check_l1_table(QEDCheck *check, QEDTable *table)
+static int coroutine_fn GRAPH_RDLOCK
+qed_check_l1_table(QEDCheck *check, QEDTable *table)
 {
     BDRVQEDState *s = check->s;
     unsigned int i, num_invalid_l1 = 0;
@@ -198,7 +200,8 @@ static void qed_check_for_leaks(QEDCheck *check)
 /**
  * Mark an image clean once it passes check or has been repaired
  */
-static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
+static void coroutine_fn GRAPH_RDLOCK
+qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
 {
     /* Skip if there were unfixable corruptions or I/O errors */
     if (result->corruptions > 0 || result->check_errors > 0) {
@@ -211,7 +214,7 @@ static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
     }
 
     /* Ensure fixes reach storage before clearing check bit */
-    bdrv_flush(s->bs);
+    bdrv_co_flush(s->bs);
 
     s->header.features &= ~QED_F_NEED_CHECK;
     qed_write_header_sync(s);
index b5483623989601efacc79d038e3c78fdb3d74bbf..caf2c024c2dc33d8deea248da311067ce97d4617 100644 (file)
@@ -51,6 +51,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/memalign.h"
 #include "trace.h"
 #include "qed.h"
 
index 405d446cbe70ea6087de48916f2e3efc713b1f2c..f04520d4c8349ee8ecca70d98662648a2359171f 100644 (file)
  */
 
 #include "qemu/osdep.h"
+#include "block/block-io.h"
 #include "trace.h"
 #include "qemu/sockets.h" /* for EINPROGRESS on Windows */
 #include "qed.h"
 #include "qemu/bswap.h"
+#include "qemu/memalign.h"
 
 /* Called with table_lock held.  */
-static int coroutine_fn qed_read_table(BDRVQEDState *s, uint64_t offset,
-                                       QEDTable *table)
+static int coroutine_fn GRAPH_RDLOCK
+qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
 {
     unsigned int bytes = s->header.cluster_size * s->header.table_size;
 
@@ -61,9 +63,9 @@ out:
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_write_table(BDRVQEDState *s, uint64_t offset,
-                                        QEDTable *table, unsigned int index,
-                                        unsigned int n, bool flush)
+static int coroutine_fn GRAPH_RDLOCK
+qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+                unsigned int index, unsigned int n, bool flush)
 {
     unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
     unsigned int start, end, i;
@@ -99,7 +101,7 @@ static int coroutine_fn qed_write_table(BDRVQEDState *s, uint64_t offset,
     }
 
     if (flush) {
-        ret = bdrv_flush(s->bs);
+        ret = bdrv_co_flush(s->bs);
         if (ret < 0) {
             goto out;
         }
@@ -120,7 +122,7 @@ int coroutine_fn qed_read_l1_table_sync(BDRVQEDState *s)
 int coroutine_fn qed_write_l1_table(BDRVQEDState *s, unsigned int index,
                                     unsigned int n)
 {
-    BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
+    BLKDBG_CO_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
     return qed_write_table(s, s->header.l1_table_offset,
                            s->l1_table, index, n, false);
 }
@@ -148,7 +150,7 @@ int coroutine_fn qed_read_l2_table(BDRVQEDState *s, QEDRequest *request,
     request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
     request->l2_table->table = qed_alloc_table(s);
 
-    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
+    BLKDBG_CO_EVENT(s->bs->file, BLKDBG_L2_LOAD);
     ret = qed_read_table(s, offset, request->l2_table->table);
 
     if (ret) {
@@ -181,7 +183,7 @@ int coroutine_fn qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                                     unsigned int index, unsigned int n,
                                     bool flush)
 {
-    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
+    BLKDBG_CO_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
     return qed_write_table(s, request->l2_table->offset,
                            request->l2_table->table, index, n, flush);
 }
index b27e7546cabd292aa51b1d315ec02b2851b8a18f..bc2f0a61c0a99fef2ff9ccbed45496d5eeaf52f2 100644 (file)
@@ -20,6 +20,7 @@
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
+#include "qemu/memalign.h"
 #include "trace.h"
 #include "qed.h"
 #include "sysemu/block-backend.h"
@@ -86,14 +87,9 @@ static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
 int qed_write_header_sync(BDRVQEDState *s)
 {
     QEDHeader le;
-    int ret;
 
     qed_header_cpu_to_le(&s->header, &le);
-    ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
-    if (ret != sizeof(le)) {
-        return ret;
-    }
-    return 0;
+    return bdrv_pwrite(s->bs->file, 0, sizeof(le), &le, 0);
 }
 
 /**
@@ -104,7 +100,7 @@ int qed_write_header_sync(BDRVQEDState *s)
  *
  * No new allocating reqs can start while this function runs.
  */
-static int coroutine_fn qed_write_header(BDRVQEDState *s)
+static int coroutine_fn GRAPH_RDLOCK qed_write_header(BDRVQEDState *s)
 {
     /* We must write full sectors for O_DIRECT but cannot necessarily generate
      * the data following the header if an unrecognized compat feature is
@@ -199,14 +195,15 @@ static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
  *
  * The string is NUL-terminated.
  */
-static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n,
-                           char *buf, size_t buflen)
+static int coroutine_fn GRAPH_RDLOCK
+qed_read_string(BdrvChild *file, uint64_t offset,
+                size_t n, char *buf, size_t buflen)
 {
     int ret;
     if (n >= buflen) {
         return -EINVAL;
     }
-    ret = bdrv_pread(file, offset, buf, n);
+    ret = bdrv_co_pread(file, offset, n, buf, 0);
     if (ret < 0) {
         return ret;
     }
@@ -258,7 +255,7 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
     return l2_table;
 }
 
-static bool qed_plug_allocating_write_reqs(BDRVQEDState *s)
+static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s)
 {
     qemu_co_mutex_lock(&s->table_lock);
 
@@ -266,7 +263,7 @@ static bool qed_plug_allocating_write_reqs(BDRVQEDState *s)
     assert(!s->allocating_write_reqs_plugged);
     if (s->allocating_acb != NULL) {
         /* Another allocating write came concurrently.  This cannot happen
-         * from bdrv_qed_co_drain_begin, but it can happen when the timer runs.
+         * from bdrv_qed_drain_begin, but it can happen when the timer runs.
          */
         qemu_co_mutex_unlock(&s->table_lock);
         return false;
@@ -277,7 +274,7 @@ static bool qed_plug_allocating_write_reqs(BDRVQEDState *s)
     return true;
 }
 
-static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
+static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s)
 {
     qemu_co_mutex_lock(&s->table_lock);
     assert(s->allocating_write_reqs_plugged);
@@ -286,12 +283,12 @@ static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
     qemu_co_mutex_unlock(&s->table_lock);
 }
 
-static void coroutine_fn qed_need_check_timer_entry(void *opaque)
+static void coroutine_fn GRAPH_RDLOCK qed_need_check_timer(BDRVQEDState *s)
 {
-    BDRVQEDState *s = opaque;
     int ret;
 
     trace_qed_need_check_timer_cb(s);
+    assert_bdrv_graph_readable();
 
     if (!qed_plug_allocating_write_reqs(s)) {
         return;
@@ -314,9 +311,21 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque)
     (void) ret;
 }
 
+static void coroutine_fn qed_need_check_timer_entry(void *opaque)
+{
+    BDRVQEDState *s = opaque;
+    GRAPH_RDLOCK_GUARD();
+
+    qed_need_check_timer(opaque);
+    bdrv_dec_in_flight(s->bs);
+}
+
 static void qed_need_check_timer_cb(void *opaque)
 {
+    BDRVQEDState *s = opaque;
     Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque);
+
+    bdrv_inc_in_flight(s->bs);
     qemu_coroutine_enter(co);
 }
 
@@ -359,7 +368,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
     }
 }
 
-static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
+static void bdrv_qed_drain_begin(BlockDriverState *bs)
 {
     BDRVQEDState *s = bs->opaque;
 
@@ -367,8 +376,12 @@ static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
      * header is flushed.
      */
     if (s->need_check_timer && timer_pending(s->need_check_timer)) {
+        Coroutine *co;
+
         qed_cancel_need_check_timer(s);
-        qed_need_check_timer_entry(s);
+        co = qemu_coroutine_create(qed_need_check_timer_entry, s);
+        bdrv_inc_in_flight(bs);
+        aio_co_enter(bdrv_get_aio_context(bs), co);
     }
 }
 
@@ -383,16 +396,17 @@ static void bdrv_qed_init_state(BlockDriverState *bs)
 }
 
 /* Called with table_lock held.  */
-static int coroutine_fn bdrv_qed_do_open(BlockDriverState *bs, QDict *options,
-                                         int flags, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
 {
     BDRVQEDState *s = bs->opaque;
     QEDHeader le_header;
     int64_t file_size;
     int ret;
 
-    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
+    ret = bdrv_co_pread(bs->file, 0, sizeof(le_header), &le_header, 0);
     if (ret < 0) {
+        error_setg(errp, "Failed to read QED header");
         return ret;
     }
     qed_header_le_to_cpu(&le_header, &s->header);
@@ -408,25 +422,30 @@ static int coroutine_fn bdrv_qed_do_open(BlockDriverState *bs, QDict *options,
         return -ENOTSUP;
     }
     if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
+        error_setg(errp, "QED cluster size is invalid");
         return -EINVAL;
     }
 
     /* Round down file size to the last cluster */
-    file_size = bdrv_getlength(bs->file->bs);
+    file_size = bdrv_co_getlength(bs->file->bs);
     if (file_size < 0) {
+        error_setg(errp, "Failed to get file length");
         return file_size;
     }
     s->file_size = qed_start_of_cluster(s, file_size);
 
     if (!qed_is_table_size_valid(s->header.table_size)) {
+        error_setg(errp, "QED table size is invalid");
         return -EINVAL;
     }
     if (!qed_is_image_size_valid(s->header.image_size,
                                  s->header.cluster_size,
                                  s->header.table_size)) {
+        error_setg(errp, "QED image size is invalid");
         return -EINVAL;
     }
     if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
+        error_setg(errp, "QED table offset is invalid");
         return -EINVAL;
     }
 
@@ -438,25 +457,35 @@ static int coroutine_fn bdrv_qed_do_open(BlockDriverState *bs, QDict *options,
 
     /* Header size calculation must not overflow uint32_t */
     if (s->header.header_size > UINT32_MAX / s->header.cluster_size) {
+        error_setg(errp, "QED header size is too large");
         return -EINVAL;
     }
 
     if ((s->header.features & QED_F_BACKING_FILE)) {
+        g_autofree char *backing_file_str = NULL;
+
         if ((uint64_t)s->header.backing_filename_offset +
             s->header.backing_filename_size >
             s->header.cluster_size * s->header.header_size) {
+            error_setg(errp, "QED backing filename offset is invalid");
             return -EINVAL;
         }
 
+        backing_file_str = g_malloc(sizeof(bs->backing_file));
         ret = qed_read_string(bs->file, s->header.backing_filename_offset,
                               s->header.backing_filename_size,
-                              bs->auto_backing_file,
-                              sizeof(bs->auto_backing_file));
+                              backing_file_str, sizeof(bs->backing_file));
         if (ret < 0) {
+            error_setg(errp, "Failed to read backing filename");
             return ret;
         }
-        pstrcpy(bs->backing_file, sizeof(bs->backing_file),
-                bs->auto_backing_file);
+
+        if (!g_str_equal(backing_file_str, bs->backing_file)) {
+            pstrcpy(bs->backing_file, sizeof(bs->backing_file),
+                    backing_file_str);
+            pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
+                    backing_file_str);
+        }
 
         if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
             pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
@@ -475,11 +504,12 @@ static int coroutine_fn bdrv_qed_do_open(BlockDriverState *bs, QDict *options,
 
         ret = qed_write_header_sync(s);
         if (ret) {
+            error_setg(errp, "Failed to update header");
             return ret;
         }
 
         /* From here on only known autoclear feature bits are valid */
-        bdrv_flush(bs->file->bs);
+        bdrv_co_flush(bs->file->bs);
     }
 
     s->l1_table = qed_alloc_table(s);
@@ -487,6 +517,7 @@ static int coroutine_fn bdrv_qed_do_open(BlockDriverState *bs, QDict *options,
 
     ret = qed_read_l1_table_sync(s);
     if (ret) {
+        error_setg(errp, "Failed to read L1 table");
         goto out;
     }
 
@@ -503,6 +534,7 @@ static int coroutine_fn bdrv_qed_do_open(BlockDriverState *bs, QDict *options,
 
             ret = qed_check(s, &result, true);
             if (ret) {
+                error_setg(errp, "Image corrupted");
                 goto out;
             }
         }
@@ -531,13 +563,15 @@ static void coroutine_fn bdrv_qed_open_entry(void *opaque)
     QEDOpenCo *qoc = opaque;
     BDRVQEDState *s = qoc->bs->opaque;
 
+    GRAPH_RDLOCK_GUARD();
+
     qemu_co_mutex_lock(&s->table_lock);
     qoc->ret = bdrv_qed_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp);
     qemu_co_mutex_unlock(&s->table_lock);
 }
 
-static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
+static int coroutine_mixed_fn bdrv_qed_open(BlockDriverState *bs, QDict *options,
+                                            int flags, Error **errp)
 {
     QEDOpenCo qoc = {
         .bs = bs,
@@ -546,22 +580,19 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
         .errp = errp,
         .ret = -EINPROGRESS
     };
+    int ret;
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
     bdrv_qed_init_state(bs);
-    if (qemu_in_coroutine()) {
-        bdrv_qed_open_entry(&qoc);
-    } else {
-        assert(qemu_get_current_aio_context() == qemu_get_aio_context());
-        qemu_coroutine_enter(qemu_coroutine_create(bdrv_qed_open_entry, &qoc));
-        BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
-    }
+    assert(!qemu_in_coroutine());
+    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+    qemu_coroutine_enter(qemu_coroutine_create(bdrv_qed_open_entry, &qoc));
     BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
+
     return qoc.ret;
 }
 
@@ -570,6 +601,7 @@ static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
     BDRVQEDState *s = bs->opaque;
 
     bs->bl.pwrite_zeroes_alignment = s->header.cluster_size;
+    bs->bl.max_pwrite_zeroes = QEMU_ALIGN_DOWN(INT_MAX, s->header.cluster_size);
 }
 
 /* We have nothing to do for QED reopen, stubs just return
@@ -580,7 +612,7 @@ static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
     return 0;
 }
 
-static void bdrv_qed_close(BlockDriverState *bs)
+static void GRAPH_RDLOCK bdrv_qed_do_close(BlockDriverState *bs)
 {
     BDRVQEDState *s = bs->opaque;
 
@@ -599,8 +631,16 @@ static void bdrv_qed_close(BlockDriverState *bs)
     qemu_vfree(s->l1_table);
 }
 
-static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
-                                           Error **errp)
+static void GRAPH_UNLOCKED bdrv_qed_close(BlockDriverState *bs)
+{
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    bdrv_qed_do_close(bs);
+}
+
+static int coroutine_fn GRAPH_UNLOCKED
+bdrv_qed_co_create(BlockdevCreateOptions *opts, Error **errp)
 {
     BlockdevCreateOptionsQed *qed_opts;
     BlockBackend *blk = NULL;
@@ -646,13 +686,13 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(qed_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(qed_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -677,12 +717,12 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
      * The QED format associates file length with allocation status,
      * so a new file (which is empty) must have a length of 0.
      */
-    ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp);
+    ret = blk_co_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp);
     if (ret < 0) {
         goto out;
     }
 
-    if (qed_opts->has_backing_file) {
+    if (qed_opts->backing_file) {
         header.features |= QED_F_BACKING_FILE;
         header.backing_filename_offset = sizeof(le_header);
         header.backing_filename_size = strlen(qed_opts->backing_file);
@@ -696,18 +736,18 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
     }
 
     qed_header_cpu_to_le(&header, &le_header);
-    ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header), 0);
+    ret = blk_co_pwrite(blk, 0, sizeof(le_header), &le_header, 0);
     if (ret < 0) {
         goto out;
     }
-    ret = blk_pwrite(blk, sizeof(le_header), qed_opts->backing_file,
-                     header.backing_filename_size, 0);
+    ret = blk_co_pwrite(blk, sizeof(le_header), header.backing_filename_size,
+                     qed_opts->backing_file, 0);
     if (ret < 0) {
         goto out;
     }
 
     l1_table = g_malloc0(l1_size);
-    ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size, 0);
+    ret = blk_co_pwrite(blk, header.l1_table_offset, l1_size, l1_table, 0);
     if (ret < 0) {
         goto out;
     }
@@ -715,15 +755,14 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
     ret = 0; /* success */
 out:
     g_free(l1_table);
-    blk_unref(blk);
-    bdrv_unref(bs);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs);
     return ret;
 }
 
-static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv,
-                                                const char *filename,
-                                                QemuOpts *opts,
-                                                Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename,
+                        QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -748,13 +787,13 @@ static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv,
     }
 
     /* Create and open the file (protocol layer) */
-    ret = bdrv_create_file(filename, opts, errp);
+    ret = bdrv_co_create_file(filename, opts, errp);
     if (ret < 0) {
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;
@@ -787,16 +826,15 @@ static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv,
 
 fail:
     qobject_unref(qdict);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
     qapi_free_BlockdevCreateOptions(create_options);
     return ret;
 }
 
-static int coroutine_fn bdrv_qed_co_block_status(BlockDriverState *bs,
-                                                 bool want_zero,
-                                                 int64_t pos, int64_t bytes,
-                                                 int64_t *pnum, int64_t *map,
-                                                 BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_block_status(BlockDriverState *bs, bool want_zero, int64_t pos,
+                         int64_t bytes, int64_t *pnum, int64_t *map,
+                         BlockDriverState **file)
 {
     BDRVQEDState *s = bs->opaque;
     size_t len = MIN(bytes, SIZE_MAX);
@@ -849,11 +887,11 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
  * This function reads qiov->size bytes starting at pos from the backing file.
  * If there is no backing file then zeroes are read.
  */
-static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
-                                              QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+qed_read_backing_file(BDRVQEDState *s, uint64_t pos, QEMUIOVector *qiov)
 {
     if (s->bs->backing) {
-        BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
+        BLKDBG_CO_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
         return bdrv_co_preadv(s->bs->backing, pos, qiov->size, qiov, 0);
     }
     qemu_iovec_memset(qiov, 0, 0, qiov->size);
@@ -868,9 +906,9 @@ static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
  * @len:        Number of bytes
  * @offset:     Byte offset in image file
  */
-static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s,
-                                                   uint64_t pos, uint64_t len,
-                                                   uint64_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, uint64_t len,
+                           uint64_t offset)
 {
     QEMUIOVector qiov;
     int ret;
@@ -888,7 +926,7 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s,
         goto out;
     }
 
-    BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
+    BLKDBG_CO_EVENT(s->bs->file, BLKDBG_COW_WRITE);
     ret = bdrv_co_pwritev(s->bs->file, offset, qiov.size, &qiov, 0);
     if (ret < 0) {
         goto out;
@@ -963,7 +1001,7 @@ static void coroutine_fn qed_aio_complete(QEDAIOCB *acb)
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_write_l1_update(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     CachedL2Table *l2_table = acb->request.l2_table;
@@ -993,7 +1031,8 @@ static int coroutine_fn qed_aio_write_l1_update(QEDAIOCB *acb)
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
 {
     BDRVQEDState *s = acb_to_s(acb);
     bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
@@ -1031,7 +1070,7 @@ static int coroutine_fn qed_aio_write_l2_update(QEDAIOCB *acb, uint64_t offset)
  *
  * Called with table_lock *not* held.
  */
-static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_write_main(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     uint64_t offset = acb->cur_cluster +
@@ -1039,7 +1078,7 @@ static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
 
     trace_qed_aio_write_main(s, acb, 0, offset, acb->cur_qiov.size);
 
-    BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
+    BLKDBG_CO_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
     return bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
                            &acb->cur_qiov, 0);
 }
@@ -1049,7 +1088,7 @@ static int coroutine_fn qed_aio_write_main(QEDAIOCB *acb)
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_cow(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_write_cow(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     uint64_t start, len, offset;
@@ -1107,7 +1146,7 @@ out:
 /**
  * Check if the QED_F_NEED_CHECK bit should be set during allocating write
  */
-static bool qed_should_set_need_check(BDRVQEDState *s)
+static bool GRAPH_RDLOCK qed_should_set_need_check(BDRVQEDState *s)
 {
     /* The flush before L2 update path ensures consistency */
     if (s->bs->backing) {
@@ -1127,7 +1166,8 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
     BDRVQEDState *s = acb_to_s(acb);
     int ret;
@@ -1190,8 +1230,8 @@ static int coroutine_fn qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset,
-                                              size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
 {
     BDRVQEDState *s = acb_to_s(acb);
     int r;
@@ -1233,8 +1273,8 @@ out:
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
-                                           uint64_t offset, size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_write_data(void *opaque, int ret, uint64_t offset, size_t len)
 {
     QEDAIOCB *acb = opaque;
 
@@ -1266,8 +1306,8 @@ static int coroutine_fn qed_aio_write_data(void *opaque, int ret,
  *
  * Called with table_lock held.
  */
-static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
-                                          uint64_t offset, size_t len)
+static int coroutine_fn GRAPH_RDLOCK
+qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len)
 {
     QEDAIOCB *acb = opaque;
     BDRVQEDState *s = acb_to_s(acb);
@@ -1292,7 +1332,7 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
     } else if (ret != QED_CLUSTER_FOUND) {
         r = qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov);
     } else {
-        BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+        BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
         r = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
                            &acb->cur_qiov, 0);
     }
@@ -1304,7 +1344,7 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret,
 /**
  * Begin next I/O or complete the request
  */
-static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK qed_aio_next_io(QEDAIOCB *acb)
 {
     BDRVQEDState *s = acb_to_s(acb);
     uint64_t offset;
@@ -1349,9 +1389,9 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
     return ret;
 }
 
-static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num,
-                                       QEMUIOVector *qiov, int nb_sectors,
-                                       int flags)
+static int coroutine_fn GRAPH_RDLOCK
+qed_co_request(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov,
+               int nb_sectors, int flags)
 {
     QEDAIOCB acb = {
         .bs         = bs,
@@ -1368,25 +1408,23 @@ static int coroutine_fn qed_co_request(BlockDriverState *bs, int64_t sector_num,
     return qed_aio_next_io(&acb);
 }
 
-static int coroutine_fn bdrv_qed_co_readv(BlockDriverState *bs,
-                                          int64_t sector_num, int nb_sectors,
-                                          QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                  QEMUIOVector *qiov)
 {
     return qed_co_request(bs, sector_num, qiov, nb_sectors, 0);
 }
 
-static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
-                                           int64_t sector_num, int nb_sectors,
-                                           QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+                   QEMUIOVector *qiov, int flags)
 {
-    assert(!flags);
     return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE);
 }
 
-static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset,
-                                                  int bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     BDRVQEDState *s = bs->opaque;
 
@@ -1396,6 +1434,12 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
      */
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
 
+    /*
+     * QED is not prepared for 63bit write-zero requests, so rely on
+     * max_pwrite_zeroes.
+     */
+    assert(bytes <= INT_MAX);
+
     /* Fall back if the request is not aligned */
     if (qed_offset_into_cluster(s, offset) ||
         qed_offset_into_cluster(s, bytes)) {
@@ -1407,12 +1451,10 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
                           QED_AIOCB_WRITE | QED_AIOCB_ZERO);
 }
 
-static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs,
-                                             int64_t offset,
-                                             bool exact,
-                                             PreallocMode prealloc,
-                                             BdrvRequestFlags flags,
-                                             Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
+                     PreallocMode prealloc, BdrvRequestFlags flags,
+                     Error **errp)
 {
     BDRVQEDState *s = bs->opaque;
     uint64_t old_image_size;
@@ -1445,13 +1487,14 @@ static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs,
     return ret;
 }
 
-static int64_t bdrv_qed_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn bdrv_qed_co_getlength(BlockDriverState *bs)
 {
     BDRVQEDState *s = bs->opaque;
     return s->header.image_size;
 }
 
-static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int coroutine_fn
+bdrv_qed_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     BDRVQEDState *s = bs->opaque;
 
@@ -1461,9 +1504,9 @@ static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
     return 0;
 }
 
-static int bdrv_qed_change_backing_file(BlockDriverState *bs,
-                                        const char *backing_file,
-                                        const char *backing_fmt)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
+                                const char *backing_fmt)
 {
     BDRVQEDState *s = bs->opaque;
     QEDHeader new_header, le_header;
@@ -1525,7 +1568,7 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
     }
 
     /* Write new header */
-    ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
+    ret = bdrv_co_pwrite_sync(bs->file, 0, buffer_len, buffer, 0);
     g_free(buffer);
     if (ret == 0) {
         memcpy(&s->header, &new_header, sizeof(new_header));
@@ -1533,32 +1576,26 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
     return ret;
 }
 
-static void coroutine_fn bdrv_qed_co_invalidate_cache(BlockDriverState *bs,
-                                                      Error **errp)
+static void coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
     BDRVQEDState *s = bs->opaque;
-    Error *local_err = NULL;
     int ret;
 
-    bdrv_qed_close(bs);
+    bdrv_qed_do_close(bs);
 
     bdrv_qed_init_state(bs);
     qemu_co_mutex_lock(&s->table_lock);
-    ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
+    ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, errp);
     qemu_co_mutex_unlock(&s->table_lock);
-    if (local_err) {
-        error_propagate_prepend(errp, local_err,
-                                "Could not reopen qed layer: ");
-        return;
-    } else if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not reopen qed layer");
-        return;
+    if (ret < 0) {
+        error_prepend(errp, "Could not reopen qed layer: ");
     }
 }
 
-static int coroutine_fn bdrv_qed_co_check(BlockDriverState *bs,
-                                          BdrvCheckResult *result,
-                                          BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_qed_co_check(BlockDriverState *bs, BdrvCheckResult *result,
+                  BdrvCheckMode fix)
 {
     BDRVQEDState *s = bs->opaque;
     int ret;
@@ -1605,34 +1642,34 @@ static QemuOptsList qed_create_opts = {
 };
 
 static BlockDriver bdrv_qed = {
-    .format_name              = "qed",
-    .instance_size            = sizeof(BDRVQEDState),
-    .create_opts              = &qed_create_opts,
-    .is_format                = true,
-    .supports_backing         = true,
-
-    .bdrv_probe               = bdrv_qed_probe,
-    .bdrv_open                = bdrv_qed_open,
-    .bdrv_close               = bdrv_qed_close,
-    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
-    .bdrv_child_perm          = bdrv_default_perms,
-    .bdrv_co_create           = bdrv_qed_co_create,
-    .bdrv_co_create_opts      = bdrv_qed_co_create_opts,
-    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
-    .bdrv_co_block_status     = bdrv_qed_co_block_status,
-    .bdrv_co_readv            = bdrv_qed_co_readv,
-    .bdrv_co_writev           = bdrv_qed_co_writev,
-    .bdrv_co_pwrite_zeroes    = bdrv_qed_co_pwrite_zeroes,
-    .bdrv_co_truncate         = bdrv_qed_co_truncate,
-    .bdrv_getlength           = bdrv_qed_getlength,
-    .bdrv_get_info            = bdrv_qed_get_info,
-    .bdrv_refresh_limits      = bdrv_qed_refresh_limits,
-    .bdrv_change_backing_file = bdrv_qed_change_backing_file,
-    .bdrv_co_invalidate_cache = bdrv_qed_co_invalidate_cache,
-    .bdrv_co_check            = bdrv_qed_co_check,
-    .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
-    .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
-    .bdrv_co_drain_begin      = bdrv_qed_co_drain_begin,
+    .format_name                    = "qed",
+    .instance_size                  = sizeof(BDRVQEDState),
+    .create_opts                    = &qed_create_opts,
+    .is_format                      = true,
+    .supports_backing               = true,
+
+    .bdrv_probe                     = bdrv_qed_probe,
+    .bdrv_open                      = bdrv_qed_open,
+    .bdrv_close                     = bdrv_qed_close,
+    .bdrv_reopen_prepare            = bdrv_qed_reopen_prepare,
+    .bdrv_child_perm                = bdrv_default_perms,
+    .bdrv_co_create                 = bdrv_qed_co_create,
+    .bdrv_co_create_opts            = bdrv_qed_co_create_opts,
+    .bdrv_has_zero_init             = bdrv_has_zero_init_1,
+    .bdrv_co_block_status           = bdrv_qed_co_block_status,
+    .bdrv_co_readv                  = bdrv_qed_co_readv,
+    .bdrv_co_writev                 = bdrv_qed_co_writev,
+    .bdrv_co_pwrite_zeroes          = bdrv_qed_co_pwrite_zeroes,
+    .bdrv_co_truncate               = bdrv_qed_co_truncate,
+    .bdrv_co_getlength              = bdrv_qed_co_getlength,
+    .bdrv_co_get_info               = bdrv_qed_co_get_info,
+    .bdrv_refresh_limits            = bdrv_qed_refresh_limits,
+    .bdrv_co_change_backing_file    = bdrv_qed_co_change_backing_file,
+    .bdrv_co_invalidate_cache       = bdrv_qed_co_invalidate_cache,
+    .bdrv_co_check                  = bdrv_qed_co_check,
+    .bdrv_detach_aio_context        = bdrv_qed_detach_aio_context,
+    .bdrv_attach_aio_context        = bdrv_qed_attach_aio_context,
+    .bdrv_drain_begin               = bdrv_qed_drain_begin,
 };
 
 static void bdrv_qed_init(void)
index 3d12bf78d4128356e62b9772ab8695f41f997d9b..26d4bf038c7a90473f2f47b7e597e0507df836d5 100644 (file)
@@ -185,7 +185,7 @@ enum {
 /**
  * Header functions
  */
-int qed_write_header_sync(BDRVQEDState *s);
+int GRAPH_RDLOCK qed_write_header_sync(BDRVQEDState *s);
 
 /**
  * L2 cache functions
@@ -200,33 +200,40 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
 /**
  * Table I/O functions
  */
-int coroutine_fn qed_read_l1_table_sync(BDRVQEDState *s);
-int coroutine_fn qed_write_l1_table(BDRVQEDState *s, unsigned int index,
-                                    unsigned int n);
-int coroutine_fn qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
-                                         unsigned int n);
-int coroutine_fn qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
-                                        uint64_t offset);
-int coroutine_fn qed_read_l2_table(BDRVQEDState *s, QEDRequest *request,
-                                   uint64_t offset);
-int coroutine_fn qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
-                                    unsigned int index, unsigned int n,
-                                    bool flush);
-int coroutine_fn qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
-                                         unsigned int index, unsigned int n,
-                                         bool flush);
+int coroutine_fn GRAPH_RDLOCK qed_read_l1_table_sync(BDRVQEDState *s);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, unsigned int n);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, unsigned int index,
+                   unsigned int n, bool flush);
+
+int coroutine_fn GRAPH_RDLOCK
+qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+                        unsigned int index, unsigned int n, bool flush);
 
 /**
  * Cluster functions
  */
-int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
-                                  uint64_t pos, size_t *len,
-                                  uint64_t *img_offset);
+int coroutine_fn GRAPH_RDLOCK
+qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
+                 size_t *len, uint64_t *img_offset);
 
 /**
  * Consistency check
  */
-int coroutine_fn qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
+int coroutine_fn GRAPH_RDLOCK
+qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
 
 QEDTable *qed_alloc_table(BDRVQEDState *s);
 
index b10fc2089e5e9e2d9dabd379e398d1b8aa9862fb..505b8b3e18e71f7082d14d205d3c5ee6d5251829 100644 (file)
@@ -17,7 +17,9 @@
 #include "qemu/cutils.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
+#include "qemu/memalign.h"
 #include "block/block_int.h"
+#include "block/coroutines.h"
 #include "block/qdict.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-block.h"
@@ -159,11 +161,10 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
     return a->l == b->l;
 }
 
-static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
-                                   QEMUIOVector *qiov,
-                                   uint64_t offset,
-                                   uint64_t bytes,
-                                   int flags)
+static QuorumAIOCB *coroutine_fn quorum_aio_get(BlockDriverState *bs,
+                                                QEMUIOVector *qiov,
+                                                uint64_t offset, uint64_t bytes,
+                                                int flags)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
@@ -201,11 +202,11 @@ static void quorum_report_bad(QuorumOpType type, uint64_t offset,
         msg = strerror(-ret);
     }
 
-    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector,
+    qapi_event_send_quorum_report_bad(type, msg, node_name, start_sector,
                                       end_sector - start_sector);
 }
 
-static void quorum_report_failure(QuorumAIOCB *acb)
+static void GRAPH_RDLOCK quorum_report_failure(QuorumAIOCB *acb)
 {
     const char *reference = bdrv_get_device_or_node_name(acb->bs);
     int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE;
@@ -218,7 +219,7 @@ static void quorum_report_failure(QuorumAIOCB *acb)
 
 static int quorum_vote_error(QuorumAIOCB *acb);
 
-static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
+static bool GRAPH_RDLOCK quorum_has_too_much_io_failed(QuorumAIOCB *acb)
 {
     BDRVQuorumState *s = acb->bs->opaque;
 
@@ -231,8 +232,6 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
     return false;
 }
 
-static int read_fifo_child(QuorumAIOCB *acb);
-
 static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
 {
     int i;
@@ -271,7 +270,11 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
     }
 }
 
-static void quorum_rewrite_entry(void *opaque)
+/*
+ * This function can count as GRAPH_RDLOCK because read_quorum_children() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static void coroutine_fn GRAPH_RDLOCK quorum_rewrite_entry(void *opaque)
 {
     QuorumCo *co = opaque;
     QuorumAIOCB *acb = co->acb;
@@ -291,8 +294,8 @@ static void quorum_rewrite_entry(void *opaque)
     }
 }
 
-static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
-                                        QuorumVoteValue *value)
+static bool coroutine_fn GRAPH_RDLOCK
+quorum_rewrite_bad_versions(QuorumAIOCB *acb, QuorumVoteValue *value)
 {
     QuorumVoteVersion *version;
     QuorumVoteItem *item;
@@ -492,7 +495,7 @@ static int quorum_vote_error(QuorumAIOCB *acb)
     return ret;
 }
 
-static void quorum_vote(QuorumAIOCB *acb)
+static void coroutine_fn GRAPH_RDLOCK quorum_vote(QuorumAIOCB *acb)
 {
     bool quorum = true;
     int i, j, ret;
@@ -572,7 +575,11 @@ free_exit:
     quorum_free_vote_list(&acb->votes);
 }
 
-static void read_quorum_children_entry(void *opaque)
+/*
+ * This function can count as GRAPH_RDLOCK because read_quorum_children() holds
+ * the graph lock and keeps it until this coroutine has terminated.
+ */
+static void coroutine_fn GRAPH_RDLOCK read_quorum_children_entry(void *opaque)
 {
     QuorumCo *co = opaque;
     QuorumAIOCB *acb = co->acb;
@@ -600,7 +607,7 @@ static void read_quorum_children_entry(void *opaque)
     }
 }
 
-static int read_quorum_children(QuorumAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK read_quorum_children(QuorumAIOCB *acb)
 {
     BDRVQuorumState *s = acb->bs->opaque;
     int i;
@@ -641,7 +648,7 @@ static int read_quorum_children(QuorumAIOCB *acb)
     return acb->vote_ret;
 }
 
-static int read_fifo_child(QuorumAIOCB *acb)
+static int coroutine_fn GRAPH_RDLOCK read_fifo_child(QuorumAIOCB *acb)
 {
     BDRVQuorumState *s = acb->bs->opaque;
     int n, ret;
@@ -662,8 +669,9 @@ static int read_fifo_child(QuorumAIOCB *acb)
     return ret;
 }
 
-static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
-                            uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
@@ -682,7 +690,11 @@ static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
     return ret;
 }
 
-static void write_quorum_entry(void *opaque)
+/*
+ * This function can count as GRAPH_RDLOCK because quorum_co_pwritev() holds the
+ * graph lock and keeps it until this coroutine has terminated.
+ */
+static void coroutine_fn GRAPH_RDLOCK write_quorum_entry(void *opaque)
 {
     QuorumCo *co = opaque;
     QuorumAIOCB *acb = co->acb;
@@ -691,8 +703,13 @@ static void write_quorum_entry(void *opaque)
     QuorumChildRequest *sacb = &acb->qcrs[i];
 
     sacb->bs = s->children[i]->bs;
-    sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
-                                acb->qiov, acb->flags);
+    if (acb->flags & BDRV_REQ_ZERO_WRITE) {
+        sacb->ret = bdrv_co_pwrite_zeroes(s->children[i], acb->offset,
+                                          acb->bytes, acb->flags);
+    } else {
+        sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
+                                    acb->qiov, acb->flags);
+    }
     if (sacb->ret == 0) {
         acb->success_count++;
     } else {
@@ -708,8 +725,9 @@ static void write_quorum_entry(void *opaque)
     }
 }
 
-static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                             uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                  QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
@@ -738,19 +756,28 @@ static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
     return ret;
 }
 
-static int64_t quorum_getlength(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        BdrvRequestFlags flags)
+{
+    return quorum_co_pwritev(bs, offset, bytes, NULL,
+                             flags | BDRV_REQ_ZERO_WRITE);
+}
+
+static int64_t coroutine_fn GRAPH_RDLOCK
+quorum_co_getlength(BlockDriverState *bs)
 {
     BDRVQuorumState *s = bs->opaque;
     int64_t result;
     int i;
 
     /* check that all file have the same length */
-    result = bdrv_getlength(s->children[0]->bs);
+    result = bdrv_co_getlength(s->children[0]->bs);
     if (result < 0) {
         return result;
     }
     for (i = 1; i < s->num_children; i++) {
-        int64_t value = bdrv_getlength(s->children[i]->bs);
+        int64_t value = bdrv_co_getlength(s->children[i]->bs);
         if (value < 0) {
             return value;
         }
@@ -762,7 +789,7 @@ static int64_t quorum_getlength(BlockDriverState *bs)
     return result;
 }
 
-static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
+static coroutine_fn GRAPH_RDLOCK int quorum_co_flush(BlockDriverState *bs)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumVoteVersion *winner = NULL;
@@ -798,8 +825,8 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
     return result;
 }
 
-static bool quorum_recurse_can_replace(BlockDriverState *bs,
-                                       BlockDriverState *to_replace)
+static bool GRAPH_RDLOCK
+quorum_recurse_can_replace(BlockDriverState *bs, BlockDriverState *to_replace)
 {
     BDRVQuorumState *s = bs->opaque;
     int i;
@@ -856,7 +883,7 @@ static int quorum_valid_threshold(int threshold, int num_children, Error **errp)
 
     if (threshold < 1) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
-                   "vote-threshold", "value >= 1");
+                   "vote-threshold", "value >= 1");
         return -ERANGE;
     }
 
@@ -896,11 +923,25 @@ static QemuOptsList quorum_runtime_opts = {
     },
 };
 
+static void quorum_refresh_flags(BlockDriverState *bs)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    bs->supported_zero_flags =
+        BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
+
+    for (i = 0; i < s->num_children; i++) {
+        bs->supported_zero_flags &= s->children[i]->bs->supported_zero_flags;
+    }
+
+    bs->supported_zero_flags |= BDRV_REQ_WRITE_UNCHANGED;
+}
+
 static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
                        Error **errp)
 {
     BDRVQuorumState *s = bs->opaque;
-    Error *local_err = NULL;
     QemuOpts *opts = NULL;
     const char *pattern_str;
     bool *opened;
@@ -978,9 +1019,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
 
         s->children[i] = bdrv_open_child(NULL, options, indexstr, bs,
                                          &child_of_bds, BDRV_CHILD_DATA, false,
-                                         &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
+                                         errp);
+        if (!s->children[i]) {
             ret = -EINVAL;
             goto close_exit;
         }
@@ -990,18 +1030,21 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
     s->next_child_index = s->num_children;
 
     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+    quorum_refresh_flags(bs);
 
     g_free(opened);
     goto exit;
 
 close_exit:
     /* cleanup on error */
+    bdrv_graph_wrlock(NULL);
     for (i = 0; i < s->num_children; i++) {
         if (!opened[i]) {
             continue;
         }
         bdrv_unref_child(bs, s->children[i]);
     }
+    bdrv_graph_wrunlock(NULL);
     g_free(s->children);
     g_free(opened);
 exit:
@@ -1014,15 +1057,17 @@ static void quorum_close(BlockDriverState *bs)
     BDRVQuorumState *s = bs->opaque;
     int i;
 
+    bdrv_graph_wrlock(NULL);
     for (i = 0; i < s->num_children; i++) {
         bdrv_unref_child(bs, s->children[i]);
     }
+    bdrv_graph_wrunlock(NULL);
 
     g_free(s->children);
 }
 
-static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
-                             Error **errp)
+static void GRAPH_WRLOCK
+quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, Error **errp)
 {
     BDRVQuorumState *s = bs->opaque;
     BdrvChild *child;
@@ -1048,8 +1093,6 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
     }
     s->next_child_index++;
 
-    bdrv_drained_begin(bs);
-
     /* We can safely add the child now */
     bdrv_ref(child_bs);
 
@@ -1057,17 +1100,15 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
                               BDRV_CHILD_DATA, errp);
     if (child == NULL) {
         s->next_child_index--;
-        goto out;
+        return;
     }
     s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
     s->children[s->num_children++] = child;
-
-out:
-    bdrv_drained_end(bs);
+    quorum_refresh_flags(bs);
 }
 
-static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
-                             Error **errp)
+static void GRAPH_WRLOCK
+quorum_del_child(BlockDriverState *bs, BdrvChild *child, Error **errp)
 {
     BDRVQuorumState *s = bs->opaque;
     char indexstr[INDEXSTR_LEN];
@@ -1097,15 +1138,14 @@ static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
         s->next_child_index--;
     }
 
-    bdrv_drained_begin(bs);
-
     /* We can safely remove this child now */
     memmove(&s->children[i], &s->children[i + 1],
             (s->num_children - i - 1) * sizeof(BdrvChild *));
     s->children = g_renew(BdrvChild *, s->children, --s->num_children);
+
     bdrv_unref_child(bs, child);
 
-    bdrv_drained_end(bs);
+    quorum_refresh_flags(bs);
 }
 
 static void quorum_gather_child_options(BlockDriverState *bs, QDict *target,
@@ -1179,6 +1219,55 @@ static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c,
              | DEFAULT_PERM_UNCHANGED;
 }
 
+/*
+ * Each one of the children can report different status flags even
+ * when they contain the same data, so what this function does is
+ * return BDRV_BLOCK_ZERO if *all* children agree that a certain
+ * region contains zeroes, and BDRV_BLOCK_DATA otherwise.
+ */
+static int coroutine_fn GRAPH_RDLOCK
+quorum_co_block_status(BlockDriverState *bs, bool want_zero,
+                       int64_t offset, int64_t count,
+                       int64_t *pnum, int64_t *map, BlockDriverState **file)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i, ret;
+    int64_t pnum_zero = count;
+    int64_t pnum_data = 0;
+
+    for (i = 0; i < s->num_children; i++) {
+        int64_t bytes;
+        ret = bdrv_co_common_block_status_above(s->children[i]->bs, NULL, false,
+                                                want_zero, offset, count,
+                                                &bytes, NULL, NULL, NULL);
+        if (ret < 0) {
+            quorum_report_bad(QUORUM_OP_TYPE_READ, offset, count,
+                              s->children[i]->bs->node_name, ret);
+            pnum_data = count;
+            break;
+        }
+        /*
+         * Even if all children agree about whether there are zeroes
+         * or not at @offset they might disagree on the size, so use
+         * the smallest when reporting BDRV_BLOCK_ZERO and the largest
+         * when reporting BDRV_BLOCK_DATA.
+         */
+        if (ret & BDRV_BLOCK_ZERO) {
+            pnum_zero = MIN(pnum_zero, bytes);
+        } else {
+            pnum_data = MAX(pnum_data, bytes);
+        }
+    }
+
+    if (pnum_data) {
+        *pnum = pnum_data;
+        return BDRV_BLOCK_DATA;
+    } else {
+        *pnum = pnum_zero;
+        return BDRV_BLOCK_ZERO;
+    }
+}
+
 static const char *const quorum_strong_runtime_opts[] = {
     QUORUM_OPT_VOTE_THRESHOLD,
     QUORUM_OPT_BLKVERIFY,
@@ -1197,13 +1286,15 @@ static BlockDriver bdrv_quorum = {
     .bdrv_close                         = quorum_close,
     .bdrv_gather_child_options          = quorum_gather_child_options,
     .bdrv_dirname                       = quorum_dirname,
+    .bdrv_co_block_status               = quorum_co_block_status,
 
-    .bdrv_co_flush_to_disk              = quorum_co_flush,
+    .bdrv_co_flush                      = quorum_co_flush,
 
-    .bdrv_getlength                     = quorum_getlength,
+    .bdrv_co_getlength                  = quorum_co_getlength,
 
     .bdrv_co_preadv                     = quorum_co_preadv,
     .bdrv_co_pwritev                    = quorum_co_pwritev,
+    .bdrv_co_pwrite_zeroes              = quorum_co_pwrite_zeroes,
 
     .bdrv_add_child                     = quorum_add_child,
     .bdrv_del_child                     = quorum_del_child,
index 42ec50802bcdbbec43e9a8997956f7c7d13c8faf..1111dffd54f7da49bffa43d6177208b96e929aba 100644 (file)
  */
 
 #include "qemu/osdep.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
+#include "qemu/memalign.h"
 
 typedef struct BDRVRawState {
     uint64_t offset;
@@ -93,9 +95,9 @@ end:
     return ret;
 }
 
-static int raw_apply_options(BlockDriverState *bs, BDRVRawState *s,
-                             uint64_t offset, bool has_size, uint64_t size,
-                             Error **errp)
+static int GRAPH_RDLOCK
+raw_apply_options(BlockDriverState *bs, BDRVRawState *s, uint64_t offset,
+                  bool has_size, uint64_t size, Error **errp)
 {
     int64_t real_size = 0;
 
@@ -143,6 +145,9 @@ static int raw_reopen_prepare(BDRVReopenState *reopen_state,
     uint64_t offset, size;
     int ret;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     assert(reopen_state != NULL);
     assert(reopen_state->bs != NULL);
 
@@ -181,8 +186,8 @@ static void raw_reopen_abort(BDRVReopenState *state)
 }
 
 /* Check and adjust the offset, against 'offset' and 'size' options. */
-static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
-                                    uint64_t bytes, bool is_write)
+static inline int raw_adjust_offset(BlockDriverState *bs, int64_t *offset,
+                                    int64_t bytes, bool is_write)
 {
     BDRVRawState *s = bs->opaque;
 
@@ -201,9 +206,9 @@ static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
     return 0;
 }
 
-static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                      uint64_t bytes, QEMUIOVector *qiov,
-                                      int flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret;
 
@@ -212,13 +217,13 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
         return ret;
     }
 
-    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, QEMUIOVector *qiov,
-                                       int flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     void *buf = NULL;
     BlockDriver *drv;
@@ -257,6 +262,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
         qemu_iovec_add(&local_qiov, buf, 512);
         qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
         qiov = &local_qiov;
+
+        flags &= ~BDRV_REQ_REGISTERED_BUF;
     }
 
     ret = raw_adjust_offset(bs, &offset, bytes, true);
@@ -264,7 +271,7 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
         goto fail;
     }
 
-    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+    BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
     ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 
 fail:
@@ -275,11 +282,10 @@ fail:
     return ret;
 }
 
-static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
-                                            bool want_zero, int64_t offset,
-                                            int64_t bytes, int64_t *pnum,
-                                            int64_t *map,
-                                            BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
+                    int64_t bytes, int64_t *pnum, int64_t *map,
+                    BlockDriverState **file)
 {
     BDRVRawState *s = bs->opaque;
     *pnum = bytes;
@@ -288,39 +294,62 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
 }
 
-static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
-                                             int64_t offset, int bytes,
-                                             BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     BdrvRequestFlags flags)
 {
     int ret;
 
-    ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
+    ret = raw_adjust_offset(bs, &offset, bytes, true);
     if (ret) {
         return ret;
     }
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int bytes)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     int ret;
 
-    ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
+    ret = raw_adjust_offset(bs, &offset, bytes, true);
     if (ret) {
         return ret;
     }
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
-static int64_t raw_getlength(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_zone_report(BlockDriverState *bs, int64_t offset,
+                   unsigned int *nr_zones,
+                   BlockZoneDescriptor *zones)
+{
+    return bdrv_co_zone_report(bs->file->bs, offset, nr_zones, zones);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
+                 int64_t offset, int64_t len)
+{
+    return bdrv_co_zone_mgmt(bs->file->bs, op, offset, len);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_zone_append(BlockDriverState *bs,int64_t *offset, QEMUIOVector *qiov,
+                   BdrvRequestFlags flags)
+{
+    return bdrv_co_zone_append(bs->file->bs, offset, qiov, flags);
+}
+
+static int64_t coroutine_fn GRAPH_RDLOCK
+raw_co_getlength(BlockDriverState *bs)
 {
     int64_t len;
     BDRVRawState *s = bs->opaque;
 
     /* Update size. It should not change unless the file was externally
      * modified. */
-    len = bdrv_getlength(bs->file->bs);
+    len = bdrv_co_getlength(bs->file->bs);
     if (len < 0) {
         return len;
     }
@@ -364,13 +393,16 @@ static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs,
     return info;
 }
 
-static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
-    return bdrv_get_info(bs->file->bs, bdi);
+    return bdrv_co_get_info(bs->file->bs, bdi);
 }
 
-static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
+static void GRAPH_RDLOCK raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
+    bs->bl.has_variable_length = bs->file->bs->bl.has_variable_length;
+
     if (bs->probed) {
         /* To make it easier to protect the first sector, any probed
          * image is restricted to read-modify-write on sub-sector
@@ -379,9 +411,9 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
     }
 }
 
-static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
-                                        bool exact, PreallocMode prealloc,
-                                        BdrvRequestFlags flags, Error **errp)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
+                PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
 
@@ -400,17 +432,20 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
     return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
 }
 
-static void raw_eject(BlockDriverState *bs, bool eject_flag)
+static void coroutine_fn GRAPH_RDLOCK
+raw_co_eject(BlockDriverState *bs, bool eject_flag)
 {
-    bdrv_eject(bs->file->bs, eject_flag);
+    bdrv_co_eject(bs->file->bs, eject_flag);
 }
 
-static void raw_lock_medium(BlockDriverState *bs, bool locked)
+static void coroutine_fn GRAPH_RDLOCK
+raw_co_lock_medium(BlockDriverState *bs, bool locked)
 {
-    bdrv_lock_medium(bs->file->bs, locked);
+    bdrv_co_lock_medium(bs->file->bs, locked);
 }
 
-static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
     BDRVRawState *s = bs->opaque;
     if (s->offset || s->has_size) {
@@ -419,28 +454,30 @@ static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
     return bdrv_co_ioctl(bs->file->bs, req, buf);
 }
 
-static int raw_has_zero_init(BlockDriverState *bs)
+static int GRAPH_RDLOCK raw_has_zero_init(BlockDriverState *bs)
 {
     return bdrv_has_zero_init(bs->file->bs);
 }
 
-static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+raw_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
-    return bdrv_create_file(filename, opts, errp);
+    return bdrv_co_create_file(filename, opts, errp);
 }
 
 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
     BDRVRawState *s = bs->opaque;
+    AioContext *ctx;
     bool has_size;
     uint64_t offset, size;
     BdrvChildRole file_role;
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     ret = raw_read_options(options, &offset, &has_size, &size, errp);
     if (ret < 0) {
         return ret;
@@ -456,13 +493,15 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
         file_role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
     }
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               file_role, false, errp);
+    bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+                    file_role, false, errp);
+
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
     if (!bs->file) {
         return -EINVAL;
     }
 
-    bs->sg = bs->file->bs->sg;
+    bs->sg = bdrv_is_sg(bs->file->bs);
     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
         (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
@@ -483,12 +522,16 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                 bs->file->bs->filename);
     }
 
+    ctx = bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
     ret = raw_apply_options(bs, s, offset, has_size, size, errp);
+    aio_context_release(ctx);
+
     if (ret < 0) {
         return ret;
     }
 
-    if (bs->sg && (s->offset || s->has_size)) {
+    if (bdrv_is_sg(bs) && (s->offset || s->has_size)) {
         error_setg(errp, "Cannot use offset/size with SCSI generic devices");
         return -EINVAL;
     }
@@ -504,7 +547,8 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
     return 1;
 }
 
-static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+static int GRAPH_RDLOCK
+raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
     BDRVRawState *s = bs->opaque;
     int ret;
@@ -521,7 +565,8 @@ static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
     return 0;
 }
 
-static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
+static int GRAPH_RDLOCK
+raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 {
     BDRVRawState *s = bs->opaque;
     if (s->offset || s->has_size) {
@@ -530,14 +575,12 @@ static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
     return bdrv_probe_geometry(bs->file->bs, geo);
 }
 
-static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
-                                               BdrvChild *src,
-                                               uint64_t src_offset,
-                                               BdrvChild *dst,
-                                               uint64_t dst_offset,
-                                               uint64_t bytes,
-                                               BdrvRequestFlags read_flags,
-                                               BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_copy_range_from(BlockDriverState *bs,
+                       BdrvChild *src, int64_t src_offset,
+                       BdrvChild *dst, int64_t dst_offset,
+                       int64_t bytes, BdrvRequestFlags read_flags,
+                       BdrvRequestFlags write_flags)
 {
     int ret;
 
@@ -549,14 +592,12 @@ static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
                                    bytes, read_flags, write_flags);
 }
 
-static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
-                                             BdrvChild *src,
-                                             uint64_t src_offset,
-                                             BdrvChild *dst,
-                                             uint64_t dst_offset,
-                                             uint64_t bytes,
-                                             BdrvRequestFlags read_flags,
-                                             BdrvRequestFlags write_flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_copy_range_to(BlockDriverState *bs,
+                     BdrvChild *src, int64_t src_offset,
+                     BdrvChild *dst, int64_t dst_offset,
+                     int64_t bytes, BdrvRequestFlags read_flags,
+                     BdrvRequestFlags write_flags)
 {
     int ret;
 
@@ -575,39 +616,67 @@ static const char *const raw_strong_runtime_opts[] = {
     NULL
 };
 
+static void GRAPH_RDLOCK raw_cancel_in_flight(BlockDriverState *bs)
+{
+    bdrv_cancel_in_flight(bs->file->bs);
+}
+
+static void raw_child_perm(BlockDriverState *bs, BdrvChild *c,
+                           BdrvChildRole role,
+                           BlockReopenQueue *reopen_queue,
+                           uint64_t parent_perm, uint64_t parent_shared,
+                           uint64_t *nperm, uint64_t *nshared)
+{
+    bdrv_default_perms(bs, c, role, reopen_queue, parent_perm,
+                       parent_shared, nperm, nshared);
+
+    /*
+     * bdrv_default_perms() may add WRITE and/or RESIZE (see comment in
+     * bdrv_default_perms_for_storage() for an explanation) but we only need
+     * them if they are in parent_perm. Drop WRITE and RESIZE whenever possible
+     * to avoid permission conflicts.
+     */
+    *nperm &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+    *nperm |= parent_perm & (BLK_PERM_WRITE | BLK_PERM_RESIZE);
+}
+
 BlockDriver bdrv_raw = {
     .format_name          = "raw",
     .instance_size        = sizeof(BDRVRawState),
+    .supports_zoned_children = true,
     .bdrv_probe           = &raw_probe,
     .bdrv_reopen_prepare  = &raw_reopen_prepare,
     .bdrv_reopen_commit   = &raw_reopen_commit,
     .bdrv_reopen_abort    = &raw_reopen_abort,
     .bdrv_open            = &raw_open,
-    .bdrv_child_perm      = bdrv_default_perms,
+    .bdrv_child_perm      = raw_child_perm,
     .bdrv_co_create_opts  = &raw_co_create_opts,
     .bdrv_co_preadv       = &raw_co_preadv,
     .bdrv_co_pwritev      = &raw_co_pwritev,
     .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
     .bdrv_co_pdiscard     = &raw_co_pdiscard,
+    .bdrv_co_zone_report  = &raw_co_zone_report,
+    .bdrv_co_zone_mgmt  = &raw_co_zone_mgmt,
+    .bdrv_co_zone_append = &raw_co_zone_append,
     .bdrv_co_block_status = &raw_co_block_status,
     .bdrv_co_copy_range_from = &raw_co_copy_range_from,
     .bdrv_co_copy_range_to  = &raw_co_copy_range_to,
     .bdrv_co_truncate     = &raw_co_truncate,
-    .bdrv_getlength       = &raw_getlength,
+    .bdrv_co_getlength    = &raw_co_getlength,
     .is_format            = true,
-    .has_variable_length  = true,
     .bdrv_measure         = &raw_measure,
-    .bdrv_get_info        = &raw_get_info,
+    .bdrv_co_get_info     = &raw_co_get_info,
     .bdrv_refresh_limits  = &raw_refresh_limits,
     .bdrv_probe_blocksizes = &raw_probe_blocksizes,
     .bdrv_probe_geometry  = &raw_probe_geometry,
-    .bdrv_eject           = &raw_eject,
-    .bdrv_lock_medium     = &raw_lock_medium,
+    .bdrv_co_eject        = &raw_co_eject,
+    .bdrv_co_lock_medium  = &raw_co_lock_medium,
     .bdrv_co_ioctl        = &raw_co_ioctl,
     .create_opts          = &raw_create_opts,
     .bdrv_has_zero_init   = &raw_has_zero_init,
     .strong_runtime_opts  = raw_strong_runtime_opts,
     .mutable_opts         = mutable_opts,
+    .bdrv_cancel_in_flight = raw_cancel_in_flight,
 };
 
 static void bdrv_raw_init(void)
index 318e2826fc4180a1ad6837c8115002288d67cfd9..84bb2fa5d7128c646a6a05c9c253f9a8ab451afb 100644 (file)
@@ -18,6 +18,7 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "block/qdict.h"
 #include "crypto/secret.h"
  * leading "\".
  */
 
-/* rbd_aio_discard added in 0.1.2 */
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
-#define LIBRBD_SUPPORTS_DISCARD
-#else
-#undef LIBRBD_SUPPORTS_DISCARD
-#endif
-
 #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
 
 #define RBD_MAX_SNAPS 100
 
-/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
-#ifdef LIBRBD_SUPPORTS_IOVEC
-#define LIBRBD_USE_IOVEC 1
-#else
-#define LIBRBD_USE_IOVEC 0
-#endif
+#define RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN 8
+
+static const char rbd_luks_header_verification[
+        RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {
+    'L', 'U', 'K', 'S', 0xBA, 0xBE, 0, 1
+};
+
+static const char rbd_luks2_header_verification[
+        RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {
+    'L', 'U', 'K', 'S', 0xBA, 0xBE, 0, 2
+};
+
+static const char rbd_layered_luks_header_verification[
+        RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {
+    'R', 'B', 'D', 'L', 0xBA, 0xBE, 0, 1
+};
+
+static const char rbd_layered_luks2_header_verification[
+        RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {
+    'R', 'B', 'D', 'L', 0xBA, 0xBE, 0, 2
+};
 
 typedef enum {
     RBD_AIO_READ,
     RBD_AIO_WRITE,
     RBD_AIO_DISCARD,
-    RBD_AIO_FLUSH
+    RBD_AIO_FLUSH,
+    RBD_AIO_WRITE_ZEROES
 } RBDAIOCmd;
 
-typedef struct RBDAIOCB {
-    BlockAIOCB common;
-    int64_t ret;
-    QEMUIOVector *qiov;
-    char *bounce;
-    RBDAIOCmd cmd;
-    int error;
-    struct BDRVRBDState *s;
-} RBDAIOCB;
-
-typedef struct RADOSCB {
-    RBDAIOCB *acb;
-    struct BDRVRBDState *s;
-    int64_t size;
-    char *buf;
-    int64_t ret;
-} RADOSCB;
-
 typedef struct BDRVRBDState {
     rados_t cluster;
     rados_ioctx_t io_ctx;
@@ -106,28 +98,52 @@ typedef struct BDRVRBDState {
     char *snap;
     char *namespace;
     uint64_t image_size;
+    uint64_t object_size;
 } BDRVRBDState;
 
+typedef struct RBDTask {
+    BlockDriverState *bs;
+    Coroutine *co;
+    bool complete;
+    int64_t ret;
+} RBDTask;
+
+typedef struct RBDDiffIterateReq {
+    uint64_t offs;
+    uint64_t bytes;
+    bool exists;
+} RBDDiffIterateReq;
+
 static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
                             BlockdevOptionsRbd *opts, bool cache,
                             const char *keypairs, const char *secretid,
                             Error **errp);
 
+static char *qemu_rbd_strchr(char *src, char delim)
+{
+    char *p;
+
+    for (p = src; *p; ++p) {
+        if (*p == delim) {
+            return p;
+        }
+        if (*p == '\\' && p[1] != '\0') {
+            ++p;
+        }
+    }
+
+    return NULL;
+}
+
+
 static char *qemu_rbd_next_tok(char *src, char delim, char **p)
 {
     char *end;
 
     *p = NULL;
 
-    for (end = src; *end; ++end) {
-        if (*end == delim) {
-            break;
-        }
-        if (*end == '\\' && end[1] != '\0') {
-            end++;
-        }
-    }
-    if (*end == delim) {
+    end = qemu_rbd_strchr(src, delim);
+    if (end) {
         *p = end + 1;
         *end = '\0';
     }
@@ -171,7 +187,7 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
     qemu_rbd_unescape(found_str);
     qdict_put_str(options, "pool", found_str);
 
-    if (strchr(p, '@')) {
+    if (qemu_rbd_strchr(p, '@')) {
         image_name = qemu_rbd_next_tok(p, '@', &p);
 
         found_str = qemu_rbd_next_tok(p, ':', &p);
@@ -181,7 +197,7 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
         image_name = qemu_rbd_next_tok(p, ':', &p);
     }
     /* Check for namespace in the image_name */
-    if (strchr(image_name, '/')) {
+    if (qemu_rbd_strchr(image_name, '/')) {
         found_str = qemu_rbd_next_tok(image_name, '/', &image_name);
         qemu_rbd_unescape(found_str);
         qdict_put_str(options, "namespace", found_str);
@@ -232,7 +248,7 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
 
     if (keypairs) {
         qdict_put(options, "=keyvalue-pairs",
-                  qobject_to_json(QOBJECT(keypairs)));
+                  qstring_from_gstring(qobject_to_json(QOBJECT(keypairs))));
     }
 
 done:
@@ -241,14 +257,6 @@ done:
     return;
 }
 
-
-static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    /* XXX Does RBD support AIO on less than 512-byte alignment? */
-    bs->bl.request_alignment = 512;
-}
-
-
 static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts,
                              Error **errp)
 {
@@ -330,17 +338,340 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
     return ret;
 }
 
-static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION
+static int qemu_rbd_convert_luks_options(
+        RbdEncryptionOptionsLUKSBase *luks_opts,
+        char **passphrase,
+        size_t *passphrase_len,
+        Error **errp)
+{
+    return qcrypto_secret_lookup(luks_opts->key_secret, (uint8_t **)passphrase,
+                                 passphrase_len, errp);
+}
+
+static int qemu_rbd_convert_luks_create_options(
+        RbdEncryptionCreateOptionsLUKSBase *luks_opts,
+        rbd_encryption_algorithm_t *alg,
+        char **passphrase,
+        size_t *passphrase_len,
+        Error **errp)
 {
-    if (LIBRBD_USE_IOVEC) {
-        RBDAIOCB *acb = rcb->acb;
-        iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
-                   acb->qiov->size - offs);
+    int r = 0;
+
+    r = qemu_rbd_convert_luks_options(
+            qapi_RbdEncryptionCreateOptionsLUKSBase_base(luks_opts),
+            passphrase, passphrase_len, errp);
+    if (r < 0) {
+        return r;
+    }
+
+    if (luks_opts->has_cipher_alg) {
+        switch (luks_opts->cipher_alg) {
+            case QCRYPTO_CIPHER_ALG_AES_128: {
+                *alg = RBD_ENCRYPTION_ALGORITHM_AES128;
+                break;
+            }
+            case QCRYPTO_CIPHER_ALG_AES_256: {
+                *alg = RBD_ENCRYPTION_ALGORITHM_AES256;
+                break;
+            }
+            default: {
+                r = -ENOTSUP;
+                error_setg_errno(errp, -r, "unknown encryption algorithm: %u",
+                                 luks_opts->cipher_alg);
+                return r;
+            }
+        }
     } else {
-        memset(rcb->buf + offs, 0, rcb->size - offs);
+        /* default alg */
+        *alg = RBD_ENCRYPTION_ALGORITHM_AES256;
     }
+
+    return 0;
 }
 
+static int qemu_rbd_encryption_format(rbd_image_t image,
+                                      RbdEncryptionCreateOptions *encrypt,
+                                      Error **errp)
+{
+    int r = 0;
+    g_autofree char *passphrase = NULL;
+    rbd_encryption_format_t format;
+    rbd_encryption_options_t opts;
+    rbd_encryption_luks1_format_options_t luks_opts;
+    rbd_encryption_luks2_format_options_t luks2_opts;
+    size_t opts_size;
+    uint64_t raw_size, effective_size;
+
+    r = rbd_get_size(image, &raw_size);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "cannot get raw image size");
+        return r;
+    }
+
+    switch (encrypt->format) {
+        case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS: {
+            memset(&luks_opts, 0, sizeof(luks_opts));
+            format = RBD_ENCRYPTION_FORMAT_LUKS1;
+            opts = &luks_opts;
+            opts_size = sizeof(luks_opts);
+            r = qemu_rbd_convert_luks_create_options(
+                    qapi_RbdEncryptionCreateOptionsLUKS_base(&encrypt->u.luks),
+                    &luks_opts.alg, &passphrase, &luks_opts.passphrase_size,
+                    errp);
+            if (r < 0) {
+                return r;
+            }
+            luks_opts.passphrase = passphrase;
+            break;
+        }
+        case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: {
+            memset(&luks2_opts, 0, sizeof(luks2_opts));
+            format = RBD_ENCRYPTION_FORMAT_LUKS2;
+            opts = &luks2_opts;
+            opts_size = sizeof(luks2_opts);
+            r = qemu_rbd_convert_luks_create_options(
+                    qapi_RbdEncryptionCreateOptionsLUKS2_base(
+                            &encrypt->u.luks2),
+                    &luks2_opts.alg, &passphrase, &luks2_opts.passphrase_size,
+                    errp);
+            if (r < 0) {
+                return r;
+            }
+            luks2_opts.passphrase = passphrase;
+            break;
+        }
+        default: {
+            r = -ENOTSUP;
+            error_setg_errno(
+                    errp, -r, "unknown image encryption format: %u",
+                    encrypt->format);
+            return r;
+        }
+    }
+
+    r = rbd_encryption_format(image, format, opts, opts_size);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "encryption format fail");
+        return r;
+    }
+
+    r = rbd_get_size(image, &effective_size);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "cannot get effective image size");
+        return r;
+    }
+
+    r = rbd_resize(image, raw_size + (raw_size - effective_size));
+    if (r < 0) {
+        error_setg_errno(errp, -r, "cannot resize image after format");
+        return r;
+    }
+
+    return 0;
+}
+
+static int qemu_rbd_encryption_load(rbd_image_t image,
+                                    RbdEncryptionOptions *encrypt,
+                                    Error **errp)
+{
+    int r = 0;
+    g_autofree char *passphrase = NULL;
+    rbd_encryption_luks1_format_options_t luks_opts;
+    rbd_encryption_luks2_format_options_t luks2_opts;
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+    rbd_encryption_luks_format_options_t luks_any_opts;
+#endif
+    rbd_encryption_format_t format;
+    rbd_encryption_options_t opts;
+    size_t opts_size;
+
+    switch (encrypt->format) {
+        case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS: {
+            memset(&luks_opts, 0, sizeof(luks_opts));
+            format = RBD_ENCRYPTION_FORMAT_LUKS1;
+            opts = &luks_opts;
+            opts_size = sizeof(luks_opts);
+            r = qemu_rbd_convert_luks_options(
+                    qapi_RbdEncryptionOptionsLUKS_base(&encrypt->u.luks),
+                    &passphrase, &luks_opts.passphrase_size, errp);
+            if (r < 0) {
+                return r;
+            }
+            luks_opts.passphrase = passphrase;
+            break;
+        }
+        case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: {
+            memset(&luks2_opts, 0, sizeof(luks2_opts));
+            format = RBD_ENCRYPTION_FORMAT_LUKS2;
+            opts = &luks2_opts;
+            opts_size = sizeof(luks2_opts);
+            r = qemu_rbd_convert_luks_options(
+                    qapi_RbdEncryptionOptionsLUKS2_base(&encrypt->u.luks2),
+                    &passphrase, &luks2_opts.passphrase_size, errp);
+            if (r < 0) {
+                return r;
+            }
+            luks2_opts.passphrase = passphrase;
+            break;
+        }
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+        case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS_ANY: {
+            memset(&luks_any_opts, 0, sizeof(luks_any_opts));
+            format = RBD_ENCRYPTION_FORMAT_LUKS;
+            opts = &luks_any_opts;
+            opts_size = sizeof(luks_any_opts);
+            r = qemu_rbd_convert_luks_options(
+                    qapi_RbdEncryptionOptionsLUKSAny_base(&encrypt->u.luks_any),
+                    &passphrase, &luks_any_opts.passphrase_size, errp);
+            if (r < 0) {
+                return r;
+            }
+            luks_any_opts.passphrase = passphrase;
+            break;
+        }
+#endif
+        default: {
+            r = -ENOTSUP;
+            error_setg_errno(
+                    errp, -r, "unknown image encryption format: %u",
+                    encrypt->format);
+            return r;
+        }
+    }
+
+    r = rbd_encryption_load(image, format, opts, opts_size);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "encryption load fail");
+        return r;
+    }
+
+    return 0;
+}
+
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+static int qemu_rbd_encryption_load2(rbd_image_t image,
+                                     RbdEncryptionOptions *encrypt,
+                                     Error **errp)
+{
+    int r = 0;
+    int encrypt_count = 1;
+    int i;
+    RbdEncryptionOptions *curr_encrypt;
+    rbd_encryption_spec_t *specs;
+    rbd_encryption_luks1_format_options_t *luks_opts;
+    rbd_encryption_luks2_format_options_t *luks2_opts;
+    rbd_encryption_luks_format_options_t *luks_any_opts;
+
+    /* count encryption options */
+    for (curr_encrypt = encrypt->parent; curr_encrypt;
+         curr_encrypt = curr_encrypt->parent) {
+        ++encrypt_count;
+    }
+
+    specs = g_new0(rbd_encryption_spec_t, encrypt_count);
+
+    curr_encrypt = encrypt;
+    for (i = 0; i < encrypt_count; ++i) {
+        switch (curr_encrypt->format) {
+            case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS: {
+                specs[i].format = RBD_ENCRYPTION_FORMAT_LUKS1;
+
+                luks_opts = g_new0(rbd_encryption_luks1_format_options_t, 1);
+                specs[i].opts = luks_opts;
+                specs[i].opts_size = sizeof(*luks_opts);
+
+                r = qemu_rbd_convert_luks_options(
+                        qapi_RbdEncryptionOptionsLUKS_base(
+                                &curr_encrypt->u.luks),
+                        (char **)&luks_opts->passphrase,
+                        &luks_opts->passphrase_size,
+                        errp);
+                break;
+            }
+            case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: {
+                specs[i].format = RBD_ENCRYPTION_FORMAT_LUKS2;
+
+                luks2_opts = g_new0(rbd_encryption_luks2_format_options_t, 1);
+                specs[i].opts = luks2_opts;
+                specs[i].opts_size = sizeof(*luks2_opts);
+
+                r = qemu_rbd_convert_luks_options(
+                        qapi_RbdEncryptionOptionsLUKS2_base(
+                                &curr_encrypt->u.luks2),
+                        (char **)&luks2_opts->passphrase,
+                        &luks2_opts->passphrase_size,
+                        errp);
+                break;
+            }
+            case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS_ANY: {
+                specs[i].format = RBD_ENCRYPTION_FORMAT_LUKS;
+
+                luks_any_opts = g_new0(rbd_encryption_luks_format_options_t, 1);
+                specs[i].opts = luks_any_opts;
+                specs[i].opts_size = sizeof(*luks_any_opts);
+
+                r = qemu_rbd_convert_luks_options(
+                        qapi_RbdEncryptionOptionsLUKSAny_base(
+                                &curr_encrypt->u.luks_any),
+                        (char **)&luks_any_opts->passphrase,
+                        &luks_any_opts->passphrase_size,
+                        errp);
+                break;
+            }
+            default: {
+                r = -ENOTSUP;
+                error_setg_errno(
+                        errp, -r, "unknown image encryption format: %u",
+                        curr_encrypt->format);
+            }
+        }
+
+        if (r < 0) {
+            goto exit;
+        }
+
+        curr_encrypt = curr_encrypt->parent;
+    }
+
+    r = rbd_encryption_load2(image, specs, encrypt_count);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "layered encryption load fail");
+        goto exit;
+    }
+
+exit:
+    for (i = 0; i < encrypt_count; ++i) {
+        if (!specs[i].opts) {
+            break;
+        }
+
+        switch (specs[i].format) {
+            case RBD_ENCRYPTION_FORMAT_LUKS1: {
+                luks_opts = specs[i].opts;
+                g_free((void *)luks_opts->passphrase);
+                break;
+            }
+            case RBD_ENCRYPTION_FORMAT_LUKS2: {
+                luks2_opts = specs[i].opts;
+                g_free((void *)luks2_opts->passphrase);
+                break;
+            }
+            case RBD_ENCRYPTION_FORMAT_LUKS: {
+                luks_any_opts = specs[i].opts;
+                g_free((void *)luks_any_opts->passphrase);
+                break;
+            }
+        }
+
+        g_free(specs[i].opts);
+    }
+    g_free(specs);
+    return r;
+}
+#endif
+#endif
+
 /* FIXME Deprecate and remove keypairs or make it available in QMP. */
 static int qemu_rbd_do_create(BlockdevCreateOptions *options,
                               const char *keypairs, const char *password_secret,
@@ -353,11 +684,18 @@ static int qemu_rbd_do_create(BlockdevCreateOptions *options,
     int ret;
 
     assert(options->driver == BLOCKDEV_DRIVER_RBD);
-    if (opts->location->has_snapshot) {
+    if (opts->location->snapshot) {
         error_setg(errp, "Can't use snapshot name for image creation");
         return -EINVAL;
     }
 
+#ifndef LIBRBD_SUPPORTS_ENCRYPTION
+    if (opts->encrypt) {
+        error_setg(errp, "RBD library does not support image encryption");
+        return -ENOTSUP;
+    }
+#endif
+
     if (opts->has_cluster_size) {
         int64_t objsize = opts->cluster_size;
         if ((objsize - 1) & objsize) {    /* not a power of 2? */
@@ -383,6 +721,28 @@ static int qemu_rbd_do_create(BlockdevCreateOptions *options,
         goto out;
     }
 
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION
+    if (opts->encrypt) {
+        rbd_image_t image;
+
+        ret = rbd_open(io_ctx, opts->location->image, &image, NULL);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret,
+                             "error opening image '%s' for encryption format",
+                             opts->location->image);
+            goto out;
+        }
+
+        ret = qemu_rbd_encryption_format(image, opts->encrypt, errp);
+        rbd_close(image);
+        if (ret < 0) {
+            /* encryption format fail, try removing the image */
+            rbd_remove(io_ctx, opts->location->image);
+            goto out;
+        }
+    }
+#endif
+
     ret = 0;
 out:
     rados_ioctx_destroy(io_ctx);
@@ -395,6 +755,43 @@ static int qemu_rbd_co_create(BlockdevCreateOptions *options, Error **errp)
     return qemu_rbd_do_create(options, NULL, NULL, errp);
 }
 
+static int qemu_rbd_extract_encryption_create_options(
+        QemuOpts *opts,
+        RbdEncryptionCreateOptions **spec,
+        Error **errp)
+{
+    QDict *opts_qdict;
+    QDict *encrypt_qdict;
+    Visitor *v;
+    int ret = 0;
+
+    opts_qdict = qemu_opts_to_qdict(opts, NULL);
+    qdict_extract_subqdict(opts_qdict, &encrypt_qdict, "encrypt.");
+    qobject_unref(opts_qdict);
+    if (!qdict_size(encrypt_qdict)) {
+        *spec = NULL;
+        goto exit;
+    }
+
+    /* Convert options into a QAPI object */
+    v = qobject_input_visitor_new_flat_confused(encrypt_qdict, errp);
+    if (!v) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    visit_type_RbdEncryptionCreateOptions(v, NULL, spec, errp);
+    visit_free(v);
+    if (!*spec) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+exit:
+    qobject_unref(encrypt_qdict);
+    return ret;
+}
+
 static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
                                                 const char *filename,
                                                 QemuOpts *opts,
@@ -403,6 +800,7 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
     BlockdevCreateOptions *create_options;
     BlockdevCreateOptionsRbd *rbd_opts;
     BlockdevOptionsRbd *loc;
+    RbdEncryptionCreateOptions *encrypt = NULL;
     Error *local_err = NULL;
     const char *keypairs, *password_secret;
     QDict *options = NULL;
@@ -431,6 +829,12 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
         goto exit;
     }
 
+    ret = qemu_rbd_extract_encryption_create_options(opts, &encrypt, errp);
+    if (ret < 0) {
+        goto exit;
+    }
+    rbd_opts->encrypt     = encrypt;
+
     /*
      * Caution: while qdict_get_try_str() is fine, getting non-string
      * types would require more care.  When @options come from -blockdev
@@ -440,11 +844,8 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
     loc = rbd_opts->location;
     loc->pool        = g_strdup(qdict_get_try_str(options, "pool"));
     loc->conf        = g_strdup(qdict_get_try_str(options, "conf"));
-    loc->has_conf    = !!loc->conf;
     loc->user        = g_strdup(qdict_get_try_str(options, "user"));
-    loc->has_user    = !!loc->user;
     loc->q_namespace = g_strdup(qdict_get_try_str(options, "namespace"));
-    loc->has_q_namespace = !!loc->q_namespace;
     loc->image       = g_strdup(qdict_get_try_str(options, "image"));
     keypairs         = qdict_get_try_str(options, "=keyvalue-pairs");
 
@@ -459,53 +860,6 @@ exit:
     return ret;
 }
 
-/*
- * This aio completion is being called from rbd_finish_bh() and runs in qemu
- * BH context.
- */
-static void qemu_rbd_complete_aio(RADOSCB *rcb)
-{
-    RBDAIOCB *acb = rcb->acb;
-    int64_t r;
-
-    r = rcb->ret;
-
-    if (acb->cmd != RBD_AIO_READ) {
-        if (r < 0) {
-            acb->ret = r;
-            acb->error = 1;
-        } else if (!acb->error) {
-            acb->ret = rcb->size;
-        }
-    } else {
-        if (r < 0) {
-            qemu_rbd_memset(rcb, 0);
-            acb->ret = r;
-            acb->error = 1;
-        } else if (r < rcb->size) {
-            qemu_rbd_memset(rcb, r);
-            if (!acb->error) {
-                acb->ret = rcb->size;
-            }
-        } else if (!acb->error) {
-            acb->ret = r;
-        }
-    }
-
-    g_free(rcb);
-
-    if (!LIBRBD_USE_IOVEC) {
-        if (acb->cmd == RBD_AIO_READ) {
-            qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-        }
-        qemu_vfree(acb->bounce);
-    }
-
-    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
-
-    qemu_aio_unref(acb);
-}
-
 static char *qemu_rbd_mon_host(BlockdevOptionsRbd *opts, Error **errp)
 {
     const char **vals;
@@ -557,7 +911,6 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
             return -EINVAL;
         }
         opts->key_secret = g_strdup(secretid);
-        opts->has_key_secret = true;
     }
 
     mon_host = qemu_rbd_mon_host(opts, &local_err);
@@ -575,7 +928,7 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
 
     /* try default location when conf=NULL, but ignore failure */
     r = rados_conf_read_file(*cluster, opts->conf);
-    if (opts->has_conf && r < 0) {
+    if (opts->conf && r < 0) {
         error_setg_errno(errp, -r, "error reading conf file %s", opts->conf);
         goto failed_shutdown;
     }
@@ -621,6 +974,26 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
         error_setg_errno(errp, -r, "error opening pool %s", opts->pool);
         goto failed_shutdown;
     }
+
+#ifdef HAVE_RBD_NAMESPACE_EXISTS
+    if (opts->q_namespace && strlen(opts->q_namespace) > 0) {
+        bool exists;
+
+        r = rbd_namespace_exists(*io_ctx, opts->q_namespace, &exists);
+        if (r < 0) {
+            error_setg_errno(errp, -r, "error checking namespace");
+            goto failed_ioctx_destroy;
+        }
+
+        if (!exists) {
+            error_setg(errp, "namespace '%s' does not exist",
+                       opts->q_namespace);
+            r = -ENOENT;
+            goto failed_ioctx_destroy;
+        }
+    }
+#endif
+
     /*
      * Set the namespace after opening the io context on the pool,
      * if nspace == NULL or if nspace == "", it is just as we did nothing
@@ -630,6 +1003,10 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
     r = 0;
     goto out;
 
+#ifdef HAVE_RBD_NAMESPACE_EXISTS
+failed_ioctx_destroy:
+    rados_ioctx_destroy(*io_ctx);
+#endif
 failed_shutdown:
     rados_shutdown(*cluster);
 out:
@@ -692,6 +1069,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
     const QDictEntry *e;
     Error *local_err = NULL;
     char *keypairs, *secretid;
+    rbd_image_info_t info;
     int r;
 
     keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs"));
@@ -756,30 +1134,60 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
         goto failed_open;
     }
 
-    r = rbd_get_size(s->image, &s->image_size);
+    if (opts->encrypt) {
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION
+        if (opts->encrypt->parent) {
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
+            r = qemu_rbd_encryption_load2(s->image, opts->encrypt, errp);
+#else
+            r = -ENOTSUP;
+            error_setg(errp, "RBD library does not support layered encryption");
+#endif
+        } else {
+            r = qemu_rbd_encryption_load(s->image, opts->encrypt, errp);
+        }
+        if (r < 0) {
+            goto failed_post_open;
+        }
+#else
+        r = -ENOTSUP;
+        error_setg(errp, "RBD library does not support image encryption");
+        goto failed_post_open;
+#endif
+    }
+
+    r = rbd_stat(s->image, &info, sizeof(info));
     if (r < 0) {
-        error_setg_errno(errp, -r, "error getting image size from %s",
+        error_setg_errno(errp, -r, "error getting image info from %s",
                          s->image_name);
-        rbd_close(s->image);
-        goto failed_open;
+        goto failed_post_open;
     }
+    s->image_size = info.size;
+    s->object_size = info.obj_size;
 
     /* If we are using an rbd snapshot, we must be r/o, otherwise
      * leave as-is */
     if (s->snap != NULL) {
+        bdrv_graph_rdlock_main_loop();
         r = bdrv_apply_auto_read_only(bs, "rbd snapshots are read-only", errp);
+        bdrv_graph_rdunlock_main_loop();
         if (r < 0) {
-            rbd_close(s->image);
-            goto failed_open;
+            goto failed_post_open;
         }
     }
 
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
+#endif
+
     /* When extending regular files, we get zeros from the OS */
     bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
 
     r = 0;
     goto out;
 
+failed_post_open:
+    rbd_close(s->image);
 failed_open:
     rados_ioctx_destroy(s->io_ctx);
     g_free(s->snap);
@@ -802,6 +1210,8 @@ static int qemu_rbd_reopen_prepare(BDRVReopenState *state,
     BDRVRBDState *s = state->bs->opaque;
     int ret = 0;
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (s->snap && state->flags & BDRV_O_RDWR) {
         error_setg(errp,
                    "Cannot change node '%s' to r/w when using RBD snapshot",
@@ -839,229 +1249,369 @@ static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size)
     return 0;
 }
 
-static const AIOCBInfo rbd_aiocb_info = {
-    .aiocb_size = sizeof(RBDAIOCB),
-};
-
-static void rbd_finish_bh(void *opaque)
+static void qemu_rbd_finish_bh(void *opaque)
 {
-    RADOSCB *rcb = opaque;
-    qemu_rbd_complete_aio(rcb);
+    RBDTask *task = opaque;
+    task->complete = true;
+    aio_co_wake(task->co);
 }
 
 /*
- * This is the callback function for rbd_aio_read and _write
+ * This is the completion callback function for all rbd aio calls
+ * started from qemu_rbd_start_co().
  *
  * Note: this function is being called from a non qemu thread so
  * we need to be careful about what we do here. Generally we only
  * schedule a BH, and do the rest of the io completion handling
- * from rbd_finish_bh() which runs in a qemu context.
+ * from qemu_rbd_finish_bh() which runs in a qemu context.
  */
-static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
+static void qemu_rbd_completion_cb(rbd_completion_t c, RBDTask *task)
 {
-    RBDAIOCB *acb = rcb->acb;
-
-    rcb->ret = rbd_aio_get_return_value(c);
+    task->ret = rbd_aio_get_return_value(c);
     rbd_aio_release(c);
-
-    replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs),
-                                     rbd_finish_bh, rcb);
-}
-
-static int rbd_aio_discard_wrapper(rbd_image_t image,
-                                   uint64_t off,
-                                   uint64_t len,
-                                   rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_DISCARD
-    return rbd_aio_discard(image, off, len, comp);
-#else
-    return -ENOTSUP;
-#endif
-}
-
-static int rbd_aio_flush_wrapper(rbd_image_t image,
-                                 rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-    return rbd_aio_flush(image, comp);
-#else
-    return -ENOTSUP;
-#endif
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs),
+                            qemu_rbd_finish_bh, task);
 }
 
-static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
-                                 int64_t off,
-                                 QEMUIOVector *qiov,
-                                 int64_t size,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque,
-                                 RBDAIOCmd cmd)
+static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
+                                          uint64_t offset,
+                                          uint64_t bytes,
+                                          QEMUIOVector *qiov,
+                                          int flags,
+                                          RBDAIOCmd cmd)
 {
-    RBDAIOCB *acb;
-    RADOSCB *rcb = NULL;
+    BDRVRBDState *s = bs->opaque;
+    RBDTask task = { .bs = bs, .co = qemu_coroutine_self() };
     rbd_completion_t c;
     int r;
 
-    BDRVRBDState *s = bs->opaque;
-
-    acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
-    acb->cmd = cmd;
-    acb->qiov = qiov;
-    assert(!qiov || qiov->size == size);
+    assert(!qiov || qiov->size == bytes);
 
-    rcb = g_new(RADOSCB, 1);
-
-    if (!LIBRBD_USE_IOVEC) {
-        if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
-            acb->bounce = NULL;
-        } else {
-            acb->bounce = qemu_try_blockalign(bs, qiov->size);
-            if (acb->bounce == NULL) {
-                goto failed;
-            }
-        }
-        if (cmd == RBD_AIO_WRITE) {
-            qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
-        }
-        rcb->buf = acb->bounce;
-    }
-
-    acb->ret = 0;
-    acb->error = 0;
-    acb->s = s;
-
-    rcb->acb = acb;
-    rcb->s = acb->s;
-    rcb->size = size;
-    r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
-    if (r < 0) {
-        goto failed;
-    }
-
-    switch (cmd) {
-    case RBD_AIO_WRITE: {
+    if (cmd == RBD_AIO_WRITE || cmd == RBD_AIO_WRITE_ZEROES) {
         /*
          * RBD APIs don't allow us to write more than actual size, so in order
          * to support growing images, we resize the image before write
          * operations that exceed the current size.
          */
-        if (off + size > s->image_size) {
-            r = qemu_rbd_resize(bs, off + size);
+        if (offset + bytes > s->image_size) {
+            r = qemu_rbd_resize(bs, offset + bytes);
             if (r < 0) {
-                goto failed_completion;
+                return r;
             }
         }
-#ifdef LIBRBD_SUPPORTS_IOVEC
-            r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
-#else
-            r = rbd_aio_write(s->image, off, size, rcb->buf, c);
-#endif
-        break;
     }
+
+    r = rbd_aio_create_completion(&task,
+                                  (rbd_callback_t) qemu_rbd_completion_cb, &c);
+    if (r < 0) {
+        return r;
+    }
+
+    switch (cmd) {
     case RBD_AIO_READ:
-#ifdef LIBRBD_SUPPORTS_IOVEC
-            r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
-#else
-            r = rbd_aio_read(s->image, off, size, rcb->buf, c);
-#endif
+        r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, offset, c);
+        break;
+    case RBD_AIO_WRITE:
+        r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, offset, c);
         break;
     case RBD_AIO_DISCARD:
-        r = rbd_aio_discard_wrapper(s->image, off, size, c);
+        r = rbd_aio_discard(s->image, offset, bytes, c);
         break;
     case RBD_AIO_FLUSH:
-        r = rbd_aio_flush_wrapper(s->image, c);
+        r = rbd_aio_flush(s->image, c);
         break;
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+    case RBD_AIO_WRITE_ZEROES: {
+        int zero_flags = 0;
+#ifdef RBD_WRITE_ZEROES_FLAG_THICK_PROVISION
+        if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+            zero_flags = RBD_WRITE_ZEROES_FLAG_THICK_PROVISION;
+        }
+#endif
+        r = rbd_aio_write_zeroes(s->image, offset, bytes, c, zero_flags, 0);
+        break;
+    }
+#endif
     default:
         r = -EINVAL;
     }
 
     if (r < 0) {
-        goto failed_completion;
+        error_report("rbd request failed early: cmd %d offset %" PRIu64
+                     " bytes %" PRIu64 " flags %d r %d (%s)", cmd, offset,
+                     bytes, flags, r, strerror(-r));
+        rbd_aio_release(c);
+        return r;
     }
-    return &acb->common;
 
-failed_completion:
-    rbd_aio_release(c);
-failed:
-    g_free(rcb);
-    if (!LIBRBD_USE_IOVEC) {
-        qemu_vfree(acb->bounce);
+    while (!task.complete) {
+        qemu_coroutine_yield();
     }
 
-    qemu_aio_unref(acb);
-    return NULL;
+    if (task.ret < 0) {
+        error_report("rbd request failed: cmd %d offset %" PRIu64 " bytes %"
+                     PRIu64 " flags %d task.ret %" PRIi64 " (%s)", cmd, offset,
+                     bytes, flags, task.ret, strerror(-task.ret));
+        return task.ret;
+    }
+
+    /* zero pad short reads */
+    if (cmd == RBD_AIO_READ && task.ret < qiov->size) {
+        qemu_iovec_memset(qiov, task.ret, 0, qiov->size - task.ret);
+    }
+
+    return 0;
 }
 
-static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs,
-                                       uint64_t offset, uint64_t bytes,
-                                       QEMUIOVector *qiov, int flags,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
+static int
+coroutine_fn qemu_rbd_co_preadv(BlockDriverState *bs, int64_t offset,
+                                int64_t bytes, QEMUIOVector *qiov,
+                                BdrvRequestFlags flags)
 {
-    return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
-                         RBD_AIO_READ);
+    return qemu_rbd_start_co(bs, offset, bytes, qiov, flags, RBD_AIO_READ);
 }
 
-static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
-                                        uint64_t offset, uint64_t bytes,
-                                        QEMUIOVector *qiov, int flags,
-                                        BlockCompletionFunc *cb,
-                                        void *opaque)
+static int
+coroutine_fn qemu_rbd_co_pwritev(BlockDriverState *bs, int64_t offset,
+                                 int64_t bytes, QEMUIOVector *qiov,
+                                 BdrvRequestFlags flags)
 {
-    return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
-                         RBD_AIO_WRITE);
+    return qemu_rbd_start_co(bs, offset, bytes, qiov, flags, RBD_AIO_WRITE);
 }
 
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
+static int coroutine_fn qemu_rbd_co_flush(BlockDriverState *bs)
 {
-    return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
+    return qemu_rbd_start_co(bs, 0, 0, NULL, 0, RBD_AIO_FLUSH);
 }
 
-#else
+static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
+                                             int64_t offset, int64_t bytes)
+{
+    return qemu_rbd_start_co(bs, offset, bytes, NULL, 0, RBD_AIO_DISCARD);
+}
+
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+static int
+coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                       int64_t bytes, BdrvRequestFlags flags)
+{
+    return qemu_rbd_start_co(bs, offset, bytes, NULL, flags,
+                             RBD_AIO_WRITE_ZEROES);
+}
+#endif
 
-static int qemu_rbd_co_flush(BlockDriverState *bs)
+static int coroutine_fn
+qemu_rbd_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
-    /* rbd_flush added in 0.1.1 */
     BDRVRBDState *s = bs->opaque;
-    return rbd_flush(s->image);
-#else
+    bdi->cluster_size = s->object_size;
     return 0;
-#endif
 }
-#endif
 
-static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
+static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
+                                                     Error **errp)
 {
     BDRVRBDState *s = bs->opaque;
-    rbd_image_info_t info;
+    ImageInfoSpecific *spec_info;
+    char buf[RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {0};
     int r;
 
-    r = rbd_stat(s->image, &info, sizeof(info));
-    if (r < 0) {
-        return r;
+    if (s->image_size >= RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) {
+        r = rbd_read(s->image, 0,
+                     RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN, buf);
+        if (r < 0) {
+            error_setg_errno(errp, -r, "cannot read image start for probe");
+            return NULL;
+        }
     }
 
-    bdi->cluster_size = info.obj_size;
+    spec_info = g_new(ImageInfoSpecific, 1);
+    *spec_info = (ImageInfoSpecific){
+        .type  = IMAGE_INFO_SPECIFIC_KIND_RBD,
+        .u.rbd.data = g_new0(ImageInfoSpecificRbd, 1),
+    };
+
+    if (memcmp(buf, rbd_luks_header_verification,
+               RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+        spec_info->u.rbd.data->encryption_format =
+                RBD_IMAGE_ENCRYPTION_FORMAT_LUKS;
+        spec_info->u.rbd.data->has_encryption_format = true;
+    } else if (memcmp(buf, rbd_luks2_header_verification,
+               RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+        spec_info->u.rbd.data->encryption_format =
+                RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2;
+        spec_info->u.rbd.data->has_encryption_format = true;
+    } else if (memcmp(buf, rbd_layered_luks_header_verification,
+               RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+        spec_info->u.rbd.data->encryption_format =
+                RBD_IMAGE_ENCRYPTION_FORMAT_LUKS;
+        spec_info->u.rbd.data->has_encryption_format = true;
+    } else if (memcmp(buf, rbd_layered_luks2_header_verification,
+               RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+        spec_info->u.rbd.data->encryption_format =
+                RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2;
+        spec_info->u.rbd.data->has_encryption_format = true;
+    } else {
+        spec_info->u.rbd.data->has_encryption_format = false;
+    }
+
+    return spec_info;
+}
+
+/*
+ * rbd_diff_iterate2 allows to interrupt the exection by returning a negative
+ * value in the callback routine. Choose a value that does not conflict with
+ * an existing exitcode and return it if we want to prematurely stop the
+ * execution because we detected a change in the allocation status.
+ */
+#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000
+
+static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
+                                    int exists, void *opaque)
+{
+    RBDDiffIterateReq *req = opaque;
+
+    assert(req->offs + req->bytes <= offs);
+
+    /* treat a hole like an unallocated area and bail out */
+    if (!exists) {
+        return 0;
+    }
+
+    if (!req->exists && offs > req->offs) {
+        /*
+         * we started in an unallocated area and hit the first allocated
+         * block. req->bytes must be set to the length of the unallocated area
+         * before the allocated area. stop further processing.
+         */
+        req->bytes = offs - req->offs;
+        return QEMU_RBD_EXIT_DIFF_ITERATE2;
+    }
+
+    if (req->exists && offs > req->offs + req->bytes) {
+        /*
+         * we started in an allocated area and jumped over an unallocated area,
+         * req->bytes contains the length of the allocated area before the
+         * unallocated area. stop further processing.
+         */
+        return QEMU_RBD_EXIT_DIFF_ITERATE2;
+    }
+
+    req->bytes += len;
+    req->exists = true;
+
     return 0;
 }
 
-static int64_t qemu_rbd_getlength(BlockDriverState *bs)
+static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+                                                 bool want_zero, int64_t offset,
+                                                 int64_t bytes, int64_t *pnum,
+                                                 int64_t *map,
+                                                 BlockDriverState **file)
+{
+    BDRVRBDState *s = bs->opaque;
+    int status, r;
+    RBDDiffIterateReq req = { .offs = offset };
+    uint64_t features, flags;
+    uint64_t head = 0;
+
+    assert(offset + bytes <= s->image_size);
+
+    /* default to all sectors allocated */
+    status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+    *map = offset;
+    *file = bs;
+    *pnum = bytes;
+
+    /* check if RBD image supports fast-diff */
+    r = rbd_get_features(s->image, &features);
+    if (r < 0) {
+        return status;
+    }
+    if (!(features & RBD_FEATURE_FAST_DIFF)) {
+        return status;
+    }
+
+    /* check if RBD fast-diff result is valid */
+    r = rbd_get_flags(s->image, &flags);
+    if (r < 0) {
+        return status;
+    }
+    if (flags & RBD_FLAG_FAST_DIFF_INVALID) {
+        return status;
+    }
+
+#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0)
+    /*
+     * librbd had a bug until early 2022 that affected all versions of ceph that
+     * supported fast-diff. This bug results in reporting of incorrect offsets
+     * if the offset parameter to rbd_diff_iterate2 is not object aligned.
+     * Work around this bug by rounding down the offset to object boundaries.
+     * This is OK because we call rbd_diff_iterate2 with whole_object = true.
+     * However, this workaround only works for non cloned images with default
+     * striping.
+     *
+     * See: https://tracker.ceph.com/issues/53784
+     */
+
+    /* check if RBD image has non-default striping enabled */
+    if (features & RBD_FEATURE_STRIPINGV2) {
+        return status;
+    }
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+    /*
+     * check if RBD image is a clone (= has a parent).
+     *
+     * rbd_get_parent_info is deprecated from Nautilus onwards, but the
+     * replacement rbd_get_parent is not present in Luminous and Mimic.
+     */
+    if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) {
+        return status;
+    }
+#pragma GCC diagnostic pop
+
+    head = req.offs & (s->object_size - 1);
+    req.offs -= head;
+    bytes += head;
+#endif
+
+    r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true,
+                          qemu_rbd_diff_iterate_cb, &req);
+    if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
+        return status;
+    }
+    assert(req.bytes <= bytes);
+    if (!req.exists) {
+        if (r == 0) {
+            /*
+             * rbd_diff_iterate2 does not invoke callbacks for unallocated
+             * areas. This here catches the case where no callback was
+             * invoked at all (req.bytes == 0).
+             */
+            assert(req.bytes == 0);
+            req.bytes = bytes;
+        }
+        status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
+    }
+
+    assert(req.bytes > head);
+    *pnum = req.bytes - head;
+    return status;
+}
+
+static int64_t coroutine_fn qemu_rbd_co_getlength(BlockDriverState *bs)
 {
     BDRVRBDState *s = bs->opaque;
-    rbd_image_info_t info;
     int r;
 
-    r = rbd_stat(s->image, &info, sizeof(info));
+    r = rbd_get_size(s->image, &s->image_size);
     if (r < 0) {
         return r;
     }
 
-    return info.size;
+    return s->image_size;
 }
 
 static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
@@ -1200,19 +1750,6 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
     return snap_count;
 }
 
-#ifdef LIBRBD_SUPPORTS_DISCARD
-static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
-                                         int64_t offset,
-                                         int bytes,
-                                         BlockCompletionFunc *cb,
-                                         void *opaque)
-{
-    return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque,
-                         RBD_AIO_DISCARD);
-}
-#endif
-
-#ifdef LIBRBD_SUPPORTS_INVALIDATE
 static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
                                                       Error **errp)
 {
@@ -1222,7 +1759,6 @@ static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
         error_setg_errno(errp, -r, "Failed to invalidate the cache");
     }
 }
-#endif
 
 static QemuOptsList qemu_rbd_create_opts = {
     .name = "rbd-create-opts",
@@ -1243,6 +1779,22 @@ static QemuOptsList qemu_rbd_create_opts = {
             .type = QEMU_OPT_STRING,
             .help = "ID of secret providing the password",
         },
+        {
+            .name = "encrypt.format",
+            .type = QEMU_OPT_STRING,
+            .help = "Encrypt the image, format choices: 'luks', 'luks2'",
+        },
+        {
+            .name = "encrypt.cipher-alg",
+            .type = QEMU_OPT_STRING,
+            .help = "Name of encryption cipher algorithm"
+                    " (allowed values: aes-128, aes-256)",
+        },
+        {
+            .name = "encrypt.key-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret providing LUKS passphrase",
+        },
         { /* end of list */ }
     }
 };
@@ -1264,39 +1816,33 @@ static BlockDriver bdrv_rbd = {
     .format_name            = "rbd",
     .instance_size          = sizeof(BDRVRBDState),
     .bdrv_parse_filename    = qemu_rbd_parse_filename,
-    .bdrv_refresh_limits    = qemu_rbd_refresh_limits,
     .bdrv_file_open         = qemu_rbd_open,
     .bdrv_close             = qemu_rbd_close,
     .bdrv_reopen_prepare    = qemu_rbd_reopen_prepare,
     .bdrv_co_create         = qemu_rbd_co_create,
     .bdrv_co_create_opts    = qemu_rbd_co_create_opts,
     .bdrv_has_zero_init     = bdrv_has_zero_init_1,
-    .bdrv_get_info          = qemu_rbd_getinfo,
+    .bdrv_co_get_info       = qemu_rbd_co_get_info,
+    .bdrv_get_specific_info = qemu_rbd_get_specific_info,
     .create_opts            = &qemu_rbd_create_opts,
-    .bdrv_getlength         = qemu_rbd_getlength,
+    .bdrv_co_getlength      = qemu_rbd_co_getlength,
     .bdrv_co_truncate       = qemu_rbd_co_truncate,
     .protocol_name          = "rbd",
 
-    .bdrv_aio_preadv        = qemu_rbd_aio_preadv,
-    .bdrv_aio_pwritev       = qemu_rbd_aio_pwritev,
-
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-    .bdrv_aio_flush         = qemu_rbd_aio_flush,
-#else
+    .bdrv_co_preadv         = qemu_rbd_co_preadv,
+    .bdrv_co_pwritev        = qemu_rbd_co_pwritev,
     .bdrv_co_flush_to_disk  = qemu_rbd_co_flush,
+    .bdrv_co_pdiscard       = qemu_rbd_co_pdiscard,
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+    .bdrv_co_pwrite_zeroes  = qemu_rbd_co_pwrite_zeroes,
 #endif
-
-#ifdef LIBRBD_SUPPORTS_DISCARD
-    .bdrv_aio_pdiscard      = qemu_rbd_aio_pdiscard,
-#endif
+    .bdrv_co_block_status   = qemu_rbd_co_block_status,
 
     .bdrv_snapshot_create   = qemu_rbd_snap_create,
     .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
     .bdrv_snapshot_list     = qemu_rbd_snap_list,
     .bdrv_snapshot_goto     = qemu_rbd_snap_rollback,
-#ifdef LIBRBD_SUPPORTS_INVALIDATE
     .bdrv_co_invalidate_cache = qemu_rbd_co_invalidate_cache,
-#endif
 
     .strong_runtime_opts    = qemu_rbd_strong_runtime_opts,
 };
index 0c70215784da9cd4516919f25928f55a06679c41..5ded5f1ca94ba7e893c30d7d525931028f65b816 100644 (file)
@@ -22,7 +22,7 @@
 #include "sysemu/block-backend.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
-#include "replication.h"
+#include "block/replication.h"
 
 typedef enum {
     BLOCK_REPLICATION_NONE,             /* block replication is not started */
@@ -35,7 +35,6 @@ typedef enum {
 typedef struct BDRVReplicationState {
     ReplicationMode mode;
     ReplicationStage stage;
-    BdrvChild *active_disk;
     BlockJob *commit_job;
     BdrvChild *hidden_disk;
     BdrvChild *secondary_disk;
@@ -89,11 +88,9 @@ static int replication_open(BlockDriverState *bs, QDict *options,
     const char *mode;
     const char *top_id;
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
     ret = -EINVAL;
@@ -143,6 +140,7 @@ static void replication_close(BlockDriverState *bs)
 {
     BDRVReplicationState *s = bs->opaque;
     Job *commit_job;
+    GLOBAL_STATE_CODE();
 
     if (s->stage == BLOCK_REPLICATION_RUNNING) {
         replication_stop(s->rs, false, NULL);
@@ -150,7 +148,7 @@ static void replication_close(BlockDriverState *bs)
     if (s->stage == BLOCK_REPLICATION_FAILOVER) {
         commit_job = &s->commit_job->job;
         assert(commit_job->aio_context == qemu_get_current_aio_context());
-        job_cancel_sync(commit_job);
+        job_cancel_sync(commit_job, false);
     }
 
     if (s->mode == REPLICATION_MODE_SECONDARY) {
@@ -166,7 +164,12 @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
                                    uint64_t perm, uint64_t shared,
                                    uint64_t *nperm, uint64_t *nshared)
 {
-    *nperm = BLK_PERM_CONSISTENT_READ;
+    if (role & BDRV_CHILD_PRIMARY) {
+        *nperm = BLK_PERM_CONSISTENT_READ;
+    } else {
+        *nperm = 0;
+    }
+
     if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
         *nperm |= BLK_PERM_WRITE;
     }
@@ -176,9 +179,10 @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
     return;
 }
 
-static int64_t replication_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+replication_co_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
+    return bdrv_co_getlength(bs->file->bs);
 }
 
 static int replication_get_io_status(BDRVReplicationState *s)
@@ -217,10 +221,9 @@ static int replication_return_value(BDRVReplicationState *s, int ret)
     return ret;
 }
 
-static coroutine_fn int replication_co_readv(BlockDriverState *bs,
-                                             int64_t sector_num,
-                                             int remaining_sectors,
-                                             QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+replication_co_readv(BlockDriverState *bs, int64_t sector_num,
+                     int remaining_sectors, QEMUIOVector *qiov)
 {
     BDRVReplicationState *s = bs->opaque;
     int ret;
@@ -241,11 +244,9 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs,
     return replication_return_value(s, ret);
 }
 
-static coroutine_fn int replication_co_writev(BlockDriverState *bs,
-                                              int64_t sector_num,
-                                              int remaining_sectors,
-                                              QEMUIOVector *qiov,
-                                              int flags)
+static int coroutine_fn GRAPH_RDLOCK
+replication_co_writev(BlockDriverState *bs, int64_t sector_num,
+                      int remaining_sectors, QEMUIOVector *qiov, int flags)
 {
     BDRVReplicationState *s = bs->opaque;
     QEMUIOVector hd_qiov;
@@ -256,7 +257,6 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
     int ret;
     int64_t n;
 
-    assert(!flags);
     ret = replication_get_io_status(s);
     if (ret < 0) {
         goto out;
@@ -276,10 +276,10 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
     while (remaining_sectors > 0) {
         int64_t count;
 
-        ret = bdrv_is_allocated_above(top->bs, base->bs, false,
-                                      sector_num * BDRV_SECTOR_SIZE,
-                                      remaining_sectors * BDRV_SECTOR_SIZE,
-                                      &count);
+        ret = bdrv_co_is_allocated_above(top->bs, base->bs, false,
+                                         sector_num * BDRV_SECTOR_SIZE,
+                                         remaining_sectors * BDRV_SECTOR_SIZE,
+                                         &count);
         if (ret < 0) {
             goto out1;
         }
@@ -307,11 +307,16 @@ out:
     return ret;
 }
 
-static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
+static void GRAPH_UNLOCKED
+secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
 {
+    BDRVReplicationState *s = bs->opaque;
+    BdrvChild *active_disk;
     Error *local_err = NULL;
     int ret;
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!s->backup_job) {
         error_setg(errp, "Backup job was cancelled unexpectedly");
         return;
@@ -323,13 +328,14 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
         return;
     }
 
-    if (!s->active_disk->bs->drv) {
+    active_disk = bs->file;
+    if (!active_disk->bs->drv) {
         error_setg(errp, "Active disk %s is ejected",
-                   s->active_disk->bs->node_name);
+                   active_disk->bs->node_name);
         return;
     }
 
-    ret = bdrv_make_empty(s->active_disk, errp);
+    ret = bdrv_make_empty(active_disk, errp);
     if (ret < 0) {
         return;
     }
@@ -340,17 +346,7 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
         return;
     }
 
-    BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
-                                BLK_PERM_WRITE, BLK_PERM_ALL);
-    blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        blk_unref(blk);
-        return;
-    }
-
-    ret = blk_make_empty(blk, errp);
-    blk_unref(blk);
+    ret = bdrv_make_empty(s->hidden_disk, errp);
     if (ret < 0) {
         return;
     }
@@ -365,36 +361,48 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
                                 Error **errp)
 {
     BDRVReplicationState *s = bs->opaque;
+    BdrvChild *hidden_disk, *secondary_disk;
     BlockReopenQueue *reopen_queue = NULL;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    /*
+     * s->hidden_disk and s->secondary_disk may not be set yet, as they will
+     * only be set after the children are writable.
+     */
+    hidden_disk = bs->file->bs->backing;
+    secondary_disk = hidden_disk->bs->backing;
+
     if (writable) {
-        s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs);
-        s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs);
+        s->orig_hidden_read_only = bdrv_is_read_only(hidden_disk->bs);
+        s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
     }
 
-    bdrv_subtree_drained_begin(s->hidden_disk->bs);
-    bdrv_subtree_drained_begin(s->secondary_disk->bs);
-
     if (s->orig_hidden_read_only) {
         QDict *opts = qdict_new();
         qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
-        reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs,
+        reopen_queue = bdrv_reopen_queue(reopen_queue, hidden_disk->bs,
                                          opts, true);
     }
 
     if (s->orig_secondary_read_only) {
         QDict *opts = qdict_new();
         qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
-        reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs,
+        reopen_queue = bdrv_reopen_queue(reopen_queue, secondary_disk->bs,
                                          opts, true);
     }
 
     if (reopen_queue) {
+        AioContext *ctx = bdrv_get_aio_context(bs);
+        if (ctx != qemu_get_aio_context()) {
+            aio_context_release(ctx);
+        }
         bdrv_reopen_multiple(reopen_queue, errp);
+        if (ctx != qemu_get_aio_context()) {
+            aio_context_acquire(ctx);
+        }
     }
-
-    bdrv_subtree_drained_end(s->hidden_disk->bs);
-    bdrv_subtree_drained_end(s->secondary_disk->bs);
 }
 
 static void backup_job_cleanup(BlockDriverState *bs)
@@ -426,7 +434,8 @@ static void backup_job_completed(void *opaque, int ret)
     backup_job_cleanup(bs);
 }
 
-static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs)
+static bool GRAPH_RDLOCK
+check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs)
 {
     BdrvChild *child;
 
@@ -451,9 +460,13 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
     BlockDriverState *bs = rs->opaque;
     BDRVReplicationState *s;
     BlockDriverState *top_bs;
+    BdrvChild *active_disk, *hidden_disk, *secondary_disk;
     int64_t active_length, hidden_length, disk_length;
     AioContext *aio_context;
     Error *local_err = NULL;
+    BackupPerf perf = { .use_copy_range = true, .max_workers = 1 };
+
+    GLOBAL_STATE_CODE();
 
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
@@ -487,32 +500,36 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
     case REPLICATION_MODE_PRIMARY:
         break;
     case REPLICATION_MODE_SECONDARY:
-        s->active_disk = bs->file;
-        if (!s->active_disk || !s->active_disk->bs ||
-                                    !s->active_disk->bs->backing) {
+        bdrv_graph_rdlock_main_loop();
+        active_disk = bs->file;
+        if (!active_disk || !active_disk->bs || !active_disk->bs->backing) {
             error_setg(errp, "Active disk doesn't have backing file");
+            bdrv_graph_rdunlock_main_loop();
             aio_context_release(aio_context);
             return;
         }
 
-        s->hidden_disk = s->active_disk->bs->backing;
-        if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) {
+        hidden_disk = active_disk->bs->backing;
+        if (!hidden_disk->bs || !hidden_disk->bs->backing) {
             error_setg(errp, "Hidden disk doesn't have backing file");
+            bdrv_graph_rdunlock_main_loop();
             aio_context_release(aio_context);
             return;
         }
 
-        s->secondary_disk = s->hidden_disk->bs->backing;
-        if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) {
+        secondary_disk = hidden_disk->bs->backing;
+        if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) {
             error_setg(errp, "The secondary disk doesn't have block backend");
+            bdrv_graph_rdunlock_main_loop();
             aio_context_release(aio_context);
             return;
         }
+        bdrv_graph_rdunlock_main_loop();
 
         /* verify the length */
-        active_length = bdrv_getlength(s->active_disk->bs);
-        hidden_length = bdrv_getlength(s->hidden_disk->bs);
-        disk_length = bdrv_getlength(s->secondary_disk->bs);
+        active_length = bdrv_getlength(active_disk->bs);
+        hidden_length = bdrv_getlength(hidden_disk->bs);
+        disk_length = bdrv_getlength(secondary_disk->bs);
         if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
             active_length != hidden_length || hidden_length != disk_length) {
             error_setg(errp, "Active disk, hidden disk, secondary disk's length"
@@ -522,15 +539,18 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
         }
 
         /* Must be true, or the bdrv_getlength() calls would have failed */
-        assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv);
+        assert(active_disk->bs->drv && hidden_disk->bs->drv);
 
-        if (!s->active_disk->bs->drv->bdrv_make_empty ||
-            !s->hidden_disk->bs->drv->bdrv_make_empty) {
+        bdrv_graph_rdlock_main_loop();
+        if (!active_disk->bs->drv->bdrv_make_empty ||
+            !hidden_disk->bs->drv->bdrv_make_empty) {
             error_setg(errp,
                        "Active disk or hidden disk doesn't support make_empty");
             aio_context_release(aio_context);
+            bdrv_graph_rdunlock_main_loop();
             return;
         }
+        bdrv_graph_rdunlock_main_loop();
 
         /* reopen the backing file in r/w mode */
         reopen_backing_file(bs, true, &local_err);
@@ -540,6 +560,30 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
             return;
         }
 
+        bdrv_graph_wrlock(bs);
+
+        bdrv_ref(hidden_disk->bs);
+        s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
+                                           &child_of_bds, BDRV_CHILD_DATA,
+                                           &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            bdrv_graph_wrunlock(bs);
+            aio_context_release(aio_context);
+            return;
+        }
+
+        bdrv_ref(secondary_disk->bs);
+        s->secondary_disk = bdrv_attach_child(bs, secondary_disk->bs,
+                                              "secondary disk", &child_of_bds,
+                                              BDRV_CHILD_DATA, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            bdrv_graph_wrunlock(bs);
+            aio_context_release(aio_context);
+            return;
+        }
+
         /* start backup job now */
         error_setg(&s->blocker,
                    "Block device is in use by internal backup job");
@@ -548,6 +592,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
         if (!top_bs || !bdrv_is_root_node(top_bs) ||
             !check_top_bs(top_bs, bs)) {
             error_setg(errp, "No top_bs or it is invalid");
+            bdrv_graph_wrunlock(bs);
             reopen_backing_file(bs, false, NULL);
             aio_context_release(aio_context);
             return;
@@ -555,9 +600,12 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
         bdrv_op_block_all(top_bs, s->blocker);
         bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);
 
+        bdrv_graph_wrunlock(bs);
+
         s->backup_job = backup_job_create(
                                 NULL, s->secondary_disk->bs, s->hidden_disk->bs,
                                 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
+                                &perf,
                                 BLOCKDEV_ON_ERROR_REPORT,
                                 BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
                                 backup_job_completed, bs, NULL, &local_err);
@@ -577,7 +625,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
     s->stage = BLOCK_REPLICATION_RUNNING;
 
     if (s->mode == REPLICATION_MODE_SECONDARY) {
-        secondary_do_checkpoint(s, errp);
+        secondary_do_checkpoint(bs, errp);
     }
 
     s->error = 0;
@@ -606,7 +654,7 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
     }
 
     if (s->mode == REPLICATION_MODE_SECONDARY) {
-        secondary_do_checkpoint(s, errp);
+        secondary_do_checkpoint(bs, errp);
     }
     aio_context_release(aio_context);
 }
@@ -643,9 +691,13 @@ static void replication_done(void *opaque, int ret)
     if (ret == 0) {
         s->stage = BLOCK_REPLICATION_DONE;
 
-        s->active_disk = NULL;
+        bdrv_graph_wrlock(NULL);
+        bdrv_unref_child(bs, s->secondary_disk);
         s->secondary_disk = NULL;
+        bdrv_unref_child(bs, s->hidden_disk);
         s->hidden_disk = NULL;
+        bdrv_graph_wrunlock(NULL);
+
         s->error = 0;
     } else {
         s->stage = BLOCK_REPLICATION_FAILOVER_FAILED;
@@ -692,21 +744,25 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
          * disk, secondary disk in backup_job_completed().
          */
         if (s->backup_job) {
-            job_cancel_sync(&s->backup_job->job);
+            aio_context_release(aio_context);
+            job_cancel_sync(&s->backup_job->job, true);
+            aio_context_acquire(aio_context);
         }
 
         if (!failover) {
-            secondary_do_checkpoint(s, errp);
+            secondary_do_checkpoint(bs, errp);
             s->stage = BLOCK_REPLICATION_DONE;
             aio_context_release(aio_context);
             return;
         }
 
+        bdrv_graph_rdlock_main_loop();
         s->stage = BLOCK_REPLICATION_FAILOVER;
         s->commit_job = commit_active_start(
-                            NULL, s->active_disk->bs, s->secondary_disk->bs,
+                            NULL, bs->file->bs, s->secondary_disk->bs,
                             JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
                             NULL, replication_done, bs, true, errp);
+        bdrv_graph_rdunlock_main_loop();
         break;
     default:
         aio_context_release(aio_context);
@@ -730,13 +786,12 @@ static BlockDriver bdrv_replication = {
     .bdrv_close                 = replication_close,
     .bdrv_child_perm            = replication_child_perm,
 
-    .bdrv_getlength             = replication_getlength,
+    .bdrv_co_getlength          = replication_co_getlength,
     .bdrv_co_readv              = replication_co_readv,
     .bdrv_co_writev             = replication_co_writev,
 
     .is_filter                  = true,
 
-    .has_variable_length        = true,
     .strong_runtime_opts        = replication_strong_runtime_opts,
 };
 
diff --git a/block/reqlist.c b/block/reqlist.c
new file mode 100644 (file)
index 0000000..08cb57c
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * reqlist API
+ *
+ * Copyright (C) 2013 Proxmox Server Solutions
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Authors:
+ *  Dietmar Maurer (dietmar@proxmox.com)
+ *  Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/range.h"
+
+#include "block/reqlist.h"
+
+void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset,
+                      int64_t bytes)
+{
+    assert(!reqlist_find_conflict(reqs, offset, bytes));
+
+    *req = (BlockReq) {
+        .offset = offset,
+        .bytes = bytes,
+    };
+    qemu_co_queue_init(&req->wait_queue);
+    QLIST_INSERT_HEAD(reqs, req, list);
+}
+
+BlockReq *reqlist_find_conflict(BlockReqList *reqs, int64_t offset,
+                                int64_t bytes)
+{
+    BlockReq *r;
+
+    QLIST_FOREACH(r, reqs, list) {
+        if (ranges_overlap(offset, bytes, r->offset, r->bytes)) {
+            return r;
+        }
+    }
+
+    return NULL;
+}
+
+bool coroutine_fn reqlist_wait_one(BlockReqList *reqs, int64_t offset,
+                                   int64_t bytes, CoMutex *lock)
+{
+    BlockReq *r = reqlist_find_conflict(reqs, offset, bytes);
+
+    if (!r) {
+        return false;
+    }
+
+    qemu_co_queue_wait(&r->wait_queue, lock);
+
+    return true;
+}
+
+void coroutine_fn reqlist_wait_all(BlockReqList *reqs, int64_t offset,
+                                   int64_t bytes, CoMutex *lock)
+{
+    while (reqlist_wait_one(reqs, offset, bytes, lock)) {
+        /* continue */
+    }
+}
+
+void coroutine_fn reqlist_shrink_req(BlockReq *req, int64_t new_bytes)
+{
+    if (new_bytes == req->bytes) {
+        return;
+    }
+
+    assert(new_bytes > 0 && new_bytes < req->bytes);
+
+    req->bytes = new_bytes;
+    qemu_co_queue_restart_all(&req->wait_queue);
+}
+
+void coroutine_fn reqlist_remove_req(BlockReq *req)
+{
+    QLIST_REMOVE(req, list);
+    qemu_co_queue_restart_all(&req->wait_queue);
+}
diff --git a/block/sheepdog.c b/block/sheepdog.c
deleted file mode 100644 (file)
index a45c738..0000000
+++ /dev/null
@@ -1,3356 +0,0 @@
-/*
- * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qapi/error.h"
-#include "qapi/qapi-visit-sockets.h"
-#include "qapi/qapi-visit-block-core.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qobject-input-visitor.h"
-#include "qapi/qobject-output-visitor.h"
-#include "qemu/uri.h"
-#include "qemu/error-report.h"
-#include "qemu/main-loop.h"
-#include "qemu/module.h"
-#include "qemu/option.h"
-#include "qemu/sockets.h"
-#include "block/block_int.h"
-#include "block/qdict.h"
-#include "sysemu/block-backend.h"
-#include "qemu/bitops.h"
-#include "qemu/cutils.h"
-#include "trace.h"
-
-#define SD_PROTO_VER 0x01
-
-#define SD_DEFAULT_ADDR "localhost"
-#define SD_DEFAULT_PORT 7000
-
-#define SD_OP_CREATE_AND_WRITE_OBJ  0x01
-#define SD_OP_READ_OBJ       0x02
-#define SD_OP_WRITE_OBJ      0x03
-/* 0x04 is used internally by Sheepdog */
-
-#define SD_OP_NEW_VDI        0x11
-#define SD_OP_LOCK_VDI       0x12
-#define SD_OP_RELEASE_VDI    0x13
-#define SD_OP_GET_VDI_INFO   0x14
-#define SD_OP_READ_VDIS      0x15
-#define SD_OP_FLUSH_VDI      0x16
-#define SD_OP_DEL_VDI        0x17
-#define SD_OP_GET_CLUSTER_DEFAULT   0x18
-
-#define SD_FLAG_CMD_WRITE    0x01
-#define SD_FLAG_CMD_COW      0x02
-#define SD_FLAG_CMD_CACHE    0x04 /* Writeback mode for cache */
-#define SD_FLAG_CMD_DIRECT   0x08 /* Don't use cache */
-
-#define SD_RES_SUCCESS       0x00 /* Success */
-#define SD_RES_UNKNOWN       0x01 /* Unknown error */
-#define SD_RES_NO_OBJ        0x02 /* No object found */
-#define SD_RES_EIO           0x03 /* I/O error */
-#define SD_RES_VDI_EXIST     0x04 /* Vdi exists already */
-#define SD_RES_INVALID_PARMS 0x05 /* Invalid parameters */
-#define SD_RES_SYSTEM_ERROR  0x06 /* System error */
-#define SD_RES_VDI_LOCKED    0x07 /* Vdi is locked */
-#define SD_RES_NO_VDI        0x08 /* No vdi found */
-#define SD_RES_NO_BASE_VDI   0x09 /* No base vdi found */
-#define SD_RES_VDI_READ      0x0A /* Cannot read requested vdi */
-#define SD_RES_VDI_WRITE     0x0B /* Cannot write requested vdi */
-#define SD_RES_BASE_VDI_READ 0x0C /* Cannot read base vdi */
-#define SD_RES_BASE_VDI_WRITE   0x0D /* Cannot write base vdi */
-#define SD_RES_NO_TAG        0x0E /* Requested tag is not found */
-#define SD_RES_STARTUP       0x0F /* Sheepdog is on starting up */
-#define SD_RES_VDI_NOT_LOCKED   0x10 /* Vdi is not locked */
-#define SD_RES_SHUTDOWN      0x11 /* Sheepdog is shutting down */
-#define SD_RES_NO_MEM        0x12 /* Cannot allocate memory */
-#define SD_RES_FULL_VDI      0x13 /* we already have the maximum vdis */
-#define SD_RES_VER_MISMATCH  0x14 /* Protocol version mismatch */
-#define SD_RES_NO_SPACE      0x15 /* Server has no room for new objects */
-#define SD_RES_WAIT_FOR_FORMAT  0x16 /* Waiting for a format operation */
-#define SD_RES_WAIT_FOR_JOIN    0x17 /* Waiting for other nodes joining */
-#define SD_RES_JOIN_FAILED   0x18 /* Target node had failed to join sheepdog */
-#define SD_RES_HALT          0x19 /* Sheepdog is stopped serving IO request */
-#define SD_RES_READONLY      0x1A /* Object is read-only */
-
-/*
- * Object ID rules
- *
- *  0 - 19 (20 bits): data object space
- * 20 - 31 (12 bits): reserved data object space
- * 32 - 55 (24 bits): vdi object space
- * 56 - 59 ( 4 bits): reserved vdi object space
- * 60 - 63 ( 4 bits): object type identifier space
- */
-
-#define VDI_SPACE_SHIFT   32
-#define VDI_BIT (UINT64_C(1) << 63)
-#define VMSTATE_BIT (UINT64_C(1) << 62)
-#define MAX_DATA_OBJS (UINT64_C(1) << 20)
-#define MAX_CHILDREN 1024
-#define SD_MAX_VDI_LEN 256
-#define SD_MAX_VDI_TAG_LEN 256
-#define SD_NR_VDIS   (1U << 24)
-#define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
-#define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
-#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22
-/*
- * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and
- * (SD_EC_MAX_STRIP - 1) for parity strips
- *
- * SD_MAX_COPIES is sum of number of data strips and parity strips.
- */
-#define SD_EC_MAX_STRIP 16
-#define SD_MAX_COPIES (SD_EC_MAX_STRIP * 2 - 1)
-
-#define SD_INODE_SIZE (sizeof(SheepdogInode))
-#define CURRENT_VDI_ID 0
-
-#define LOCK_TYPE_NORMAL 0
-#define LOCK_TYPE_SHARED 1      /* for iSCSI multipath */
-
-typedef struct SheepdogReq {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t opcode_specific[8];
-} SheepdogReq;
-
-typedef struct SheepdogRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint32_t opcode_specific[7];
-} SheepdogRsp;
-
-typedef struct SheepdogObjReq {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint64_t oid;
-    uint64_t cow_oid;
-    uint8_t copies;
-    uint8_t copy_policy;
-    uint8_t reserved[6];
-    uint64_t offset;
-} SheepdogObjReq;
-
-typedef struct SheepdogObjRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint8_t copies;
-    uint8_t copy_policy;
-    uint8_t reserved[2];
-    uint32_t pad[6];
-} SheepdogObjRsp;
-
-typedef struct SheepdogVdiReq {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint64_t vdi_size;
-    uint32_t base_vdi_id;
-    uint8_t copies;
-    uint8_t copy_policy;
-    uint8_t store_policy;
-    uint8_t block_size_shift;
-    uint32_t snapid;
-    uint32_t type;
-    uint32_t pad[2];
-} SheepdogVdiReq;
-
-typedef struct SheepdogVdiRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint32_t rsvd;
-    uint32_t vdi_id;
-    uint32_t pad[5];
-} SheepdogVdiRsp;
-
-typedef struct SheepdogClusterRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint8_t nr_copies;
-    uint8_t copy_policy;
-    uint8_t block_size_shift;
-    uint8_t __pad1;
-    uint32_t __pad2[6];
-} SheepdogClusterRsp;
-
-typedef struct SheepdogInode {
-    char name[SD_MAX_VDI_LEN];
-    char tag[SD_MAX_VDI_TAG_LEN];
-    uint64_t ctime;
-    uint64_t snap_ctime;
-    uint64_t vm_clock_nsec;
-    uint64_t vdi_size;
-    uint64_t vm_state_size;
-    uint16_t copy_policy;
-    uint8_t nr_copies;
-    uint8_t block_size_shift;
-    uint32_t snap_id;
-    uint32_t vdi_id;
-    uint32_t parent_vdi_id;
-    uint32_t child_vdi_id[MAX_CHILDREN];
-    uint32_t data_vdi_id[MAX_DATA_OBJS];
-} SheepdogInode;
-
-#define SD_INODE_HEADER_SIZE offsetof(SheepdogInode, data_vdi_id)
-
-/*
- * 64 bit FNV-1a non-zero initial basis
- */
-#define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL)
-
-static void deprecation_warning(void)
-{
-    static bool warned;
-
-    if (!warned) {
-        warn_report("the sheepdog block driver is deprecated");
-        warned = true;
-    }
-}
-
-/*
- * 64 bit Fowler/Noll/Vo FNV-1a hash code
- */
-static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
-{
-    unsigned char *bp = buf;
-    unsigned char *be = bp + len;
-    while (bp < be) {
-        hval ^= (uint64_t) *bp++;
-        hval += (hval << 1) + (hval << 4) + (hval << 5) +
-            (hval << 7) + (hval << 8) + (hval << 40);
-    }
-    return hval;
-}
-
-static inline bool is_data_obj_writable(SheepdogInode *inode, unsigned int idx)
-{
-    return inode->vdi_id == inode->data_vdi_id[idx];
-}
-
-static inline bool is_data_obj(uint64_t oid)
-{
-    return !(VDI_BIT & oid);
-}
-
-static inline uint64_t data_oid_to_idx(uint64_t oid)
-{
-    return oid & (MAX_DATA_OBJS - 1);
-}
-
-static inline uint32_t oid_to_vid(uint64_t oid)
-{
-    return (oid & ~VDI_BIT) >> VDI_SPACE_SHIFT;
-}
-
-static inline uint64_t vid_to_vdi_oid(uint32_t vid)
-{
-    return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT);
-}
-
-static inline uint64_t vid_to_vmstate_oid(uint32_t vid, uint32_t idx)
-{
-    return VMSTATE_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
-}
-
-static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx)
-{
-    return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
-}
-
-static inline bool is_snapshot(struct SheepdogInode *inode)
-{
-    return !!inode->snap_ctime;
-}
-
-static inline size_t count_data_objs(const struct SheepdogInode *inode)
-{
-    return DIV_ROUND_UP(inode->vdi_size,
-                        (1UL << inode->block_size_shift));
-}
-
-typedef struct SheepdogAIOCB SheepdogAIOCB;
-typedef struct BDRVSheepdogState BDRVSheepdogState;
-
-typedef struct AIOReq {
-    SheepdogAIOCB *aiocb;
-    unsigned int iov_offset;
-
-    uint64_t oid;
-    uint64_t base_oid;
-    uint64_t offset;
-    unsigned int data_len;
-    uint8_t flags;
-    uint32_t id;
-    bool create;
-
-    QLIST_ENTRY(AIOReq) aio_siblings;
-} AIOReq;
-
-enum AIOCBState {
-    AIOCB_WRITE_UDATA,
-    AIOCB_READ_UDATA,
-    AIOCB_FLUSH_CACHE,
-    AIOCB_DISCARD_OBJ,
-};
-
-#define AIOCBOverlapping(x, y)                                 \
-    (!(x->max_affect_data_idx < y->min_affect_data_idx          \
-       || y->max_affect_data_idx < x->min_affect_data_idx))
-
-struct SheepdogAIOCB {
-    BDRVSheepdogState *s;
-
-    QEMUIOVector *qiov;
-
-    int64_t sector_num;
-    int nb_sectors;
-
-    int ret;
-    enum AIOCBState aiocb_type;
-
-    Coroutine *coroutine;
-    int nr_pending;
-
-    uint32_t min_affect_data_idx;
-    uint32_t max_affect_data_idx;
-
-    /*
-     * The difference between affect_data_idx and dirty_data_idx:
-     * affect_data_idx represents range of index of all request types.
-     * dirty_data_idx represents range of index updated by COW requests.
-     * dirty_data_idx is used for updating an inode object.
-     */
-    uint32_t min_dirty_data_idx;
-    uint32_t max_dirty_data_idx;
-
-    QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
-};
-
-struct BDRVSheepdogState {
-    BlockDriverState *bs;
-    AioContext *aio_context;
-
-    SheepdogInode inode;
-
-    char name[SD_MAX_VDI_LEN];
-    bool is_snapshot;
-    uint32_t cache_flags;
-    bool discard_supported;
-
-    SocketAddress *addr;
-    int fd;
-
-    CoMutex lock;
-    Coroutine *co_send;
-    Coroutine *co_recv;
-
-    uint32_t aioreq_seq_num;
-
-    /* Every aio request must be linked to either of these queues. */
-    QLIST_HEAD(, AIOReq) inflight_aio_head;
-    QLIST_HEAD(, AIOReq) failed_aio_head;
-
-    CoMutex queue_lock;
-    CoQueue overlapping_queue;
-    QLIST_HEAD(, SheepdogAIOCB) inflight_aiocb_head;
-};
-
-typedef struct BDRVSheepdogReopenState {
-    int fd;
-    int cache_flags;
-} BDRVSheepdogReopenState;
-
-static const char *sd_strerror(int err)
-{
-    int i;
-
-    static const struct {
-        int err;
-        const char *desc;
-    } errors[] = {
-        {SD_RES_SUCCESS, "Success"},
-        {SD_RES_UNKNOWN, "Unknown error"},
-        {SD_RES_NO_OBJ, "No object found"},
-        {SD_RES_EIO, "I/O error"},
-        {SD_RES_VDI_EXIST, "VDI exists already"},
-        {SD_RES_INVALID_PARMS, "Invalid parameters"},
-        {SD_RES_SYSTEM_ERROR, "System error"},
-        {SD_RES_VDI_LOCKED, "VDI is already locked"},
-        {SD_RES_NO_VDI, "No vdi found"},
-        {SD_RES_NO_BASE_VDI, "No base VDI found"},
-        {SD_RES_VDI_READ, "Failed read the requested VDI"},
-        {SD_RES_VDI_WRITE, "Failed to write the requested VDI"},
-        {SD_RES_BASE_VDI_READ, "Failed to read the base VDI"},
-        {SD_RES_BASE_VDI_WRITE, "Failed to write the base VDI"},
-        {SD_RES_NO_TAG, "Failed to find the requested tag"},
-        {SD_RES_STARTUP, "The system is still booting"},
-        {SD_RES_VDI_NOT_LOCKED, "VDI isn't locked"},
-        {SD_RES_SHUTDOWN, "The system is shutting down"},
-        {SD_RES_NO_MEM, "Out of memory on the server"},
-        {SD_RES_FULL_VDI, "We already have the maximum vdis"},
-        {SD_RES_VER_MISMATCH, "Protocol version mismatch"},
-        {SD_RES_NO_SPACE, "Server has no space for new objects"},
-        {SD_RES_WAIT_FOR_FORMAT, "Sheepdog is waiting for a format operation"},
-        {SD_RES_WAIT_FOR_JOIN, "Sheepdog is waiting for other nodes joining"},
-        {SD_RES_JOIN_FAILED, "Target node had failed to join sheepdog"},
-        {SD_RES_HALT, "Sheepdog is stopped serving IO request"},
-        {SD_RES_READONLY, "Object is read-only"},
-    };
-
-    for (i = 0; i < ARRAY_SIZE(errors); ++i) {
-        if (errors[i].err == err) {
-            return errors[i].desc;
-        }
-    }
-
-    return "Invalid error code";
-}
-
-/*
- * Sheepdog I/O handling:
- *
- * 1. In sd_co_rw_vector, we send the I/O requests to the server and
- *    link the requests to the inflight_list in the
- *    BDRVSheepdogState.  The function yields while waiting for
- *    receiving the response.
- *
- * 2. We receive the response in aio_read_response, the fd handler to
- *    the sheepdog connection.  We switch back to sd_co_readv/sd_writev
- *    after all the requests belonging to the AIOCB are finished.  If
- *    needed, sd_co_writev will send another requests for the vdi object.
- */
-
-static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
-                                    uint64_t oid, unsigned int data_len,
-                                    uint64_t offset, uint8_t flags, bool create,
-                                    uint64_t base_oid, unsigned int iov_offset)
-{
-    AIOReq *aio_req;
-
-    aio_req = g_malloc(sizeof(*aio_req));
-    aio_req->aiocb = acb;
-    aio_req->iov_offset = iov_offset;
-    aio_req->oid = oid;
-    aio_req->base_oid = base_oid;
-    aio_req->offset = offset;
-    aio_req->data_len = data_len;
-    aio_req->flags = flags;
-    aio_req->id = s->aioreq_seq_num++;
-    aio_req->create = create;
-
-    acb->nr_pending++;
-    return aio_req;
-}
-
-static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
-{
-    SheepdogAIOCB *cb;
-
-retry:
-    QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
-        if (AIOCBOverlapping(acb, cb)) {
-            qemu_co_queue_wait(&s->overlapping_queue, &s->queue_lock);
-            goto retry;
-        }
-    }
-}
-
-static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
-                         QEMUIOVector *qiov, int64_t sector_num, int nb_sectors,
-                         int type)
-{
-    uint32_t object_size;
-
-    object_size = (UINT32_C(1) << s->inode.block_size_shift);
-
-    acb->s = s;
-
-    acb->qiov = qiov;
-
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-
-    acb->coroutine = qemu_coroutine_self();
-    acb->ret = 0;
-    acb->nr_pending = 0;
-
-    acb->min_affect_data_idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size;
-    acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE +
-                              acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size;
-
-    acb->min_dirty_data_idx = UINT32_MAX;
-    acb->max_dirty_data_idx = 0;
-    acb->aiocb_type = type;
-
-    if (type == AIOCB_FLUSH_CACHE) {
-        return;
-    }
-
-    qemu_co_mutex_lock(&s->queue_lock);
-    wait_for_overlapping_aiocb(s, acb);
-    QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
-    qemu_co_mutex_unlock(&s->queue_lock);
-}
-
-static SocketAddress *sd_server_config(QDict *options, Error **errp)
-{
-    QDict *server = NULL;
-    Visitor *iv = NULL;
-    SocketAddress *saddr = NULL;
-
-    qdict_extract_subqdict(options, &server, "server.");
-
-    iv = qobject_input_visitor_new_flat_confused(server, errp);
-    if (!iv) {
-        goto done;
-    }
-
-    if (!visit_type_SocketAddress(iv, NULL, &saddr, errp)) {
-        goto done;
-    }
-
-done:
-    visit_free(iv);
-    qobject_unref(server);
-    return saddr;
-}
-
-/* Return -EIO in case of error, file descriptor on success */
-static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
-{
-    int fd;
-
-    fd = socket_connect(s->addr, errp);
-
-    if (s->addr->type == SOCKET_ADDRESS_TYPE_INET && fd >= 0) {
-        int ret = socket_set_nodelay(fd);
-        if (ret < 0) {
-            warn_report("can't set TCP_NODELAY: %s", strerror(errno));
-        }
-    }
-
-    if (fd >= 0) {
-        qemu_set_nonblock(fd);
-    } else {
-        fd = -EIO;
-    }
-
-    return fd;
-}
-
-/* Return 0 on success and -errno in case of error */
-static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
-                                    unsigned int *wlen)
-{
-    int ret;
-
-    ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
-    if (ret != sizeof(*hdr)) {
-        error_report("failed to send a req, %s", strerror(errno));
-        return -errno;
-    }
-
-    ret = qemu_co_send(sockfd, data, *wlen);
-    if (ret != *wlen) {
-        error_report("failed to send a req, %s", strerror(errno));
-        return -errno;
-    }
-
-    return ret;
-}
-
-typedef struct SheepdogReqCo {
-    int sockfd;
-    BlockDriverState *bs;
-    AioContext *aio_context;
-    SheepdogReq *hdr;
-    void *data;
-    unsigned int *wlen;
-    unsigned int *rlen;
-    int ret;
-    bool finished;
-    Coroutine *co;
-} SheepdogReqCo;
-
-static void restart_co_req(void *opaque)
-{
-    SheepdogReqCo *srco = opaque;
-
-    aio_co_wake(srco->co);
-}
-
-static coroutine_fn void do_co_req(void *opaque)
-{
-    int ret;
-    SheepdogReqCo *srco = opaque;
-    int sockfd = srco->sockfd;
-    SheepdogReq *hdr = srco->hdr;
-    void *data = srco->data;
-    unsigned int *wlen = srco->wlen;
-    unsigned int *rlen = srco->rlen;
-
-    srco->co = qemu_coroutine_self();
-    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       NULL, restart_co_req, NULL, srco);
-
-    ret = send_co_req(sockfd, hdr, data, wlen);
-    if (ret < 0) {
-        goto out;
-    }
-
-    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       restart_co_req, NULL, NULL, srco);
-
-    ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
-    if (ret != sizeof(*hdr)) {
-        error_report("failed to get a rsp, %s", strerror(errno));
-        ret = -errno;
-        goto out;
-    }
-
-    if (*rlen > hdr->data_length) {
-        *rlen = hdr->data_length;
-    }
-
-    if (*rlen) {
-        ret = qemu_co_recv(sockfd, data, *rlen);
-        if (ret != *rlen) {
-            error_report("failed to get the data, %s", strerror(errno));
-            ret = -errno;
-            goto out;
-        }
-    }
-    ret = 0;
-out:
-    /* there is at most one request for this sockfd, so it is safe to
-     * set each handler to NULL. */
-    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       NULL, NULL, NULL, NULL);
-
-    srco->co = NULL;
-    srco->ret = ret;
-    /* Set srco->finished before reading bs->wakeup.  */
-    qatomic_mb_set(&srco->finished, true);
-    if (srco->bs) {
-        bdrv_wakeup(srco->bs);
-    }
-}
-
-/*
- * Send the request to the sheep in a synchronous manner.
- *
- * Return 0 on success, -errno in case of error.
- */
-static int do_req(int sockfd, BlockDriverState *bs, SheepdogReq *hdr,
-                  void *data, unsigned int *wlen, unsigned int *rlen)
-{
-    Coroutine *co;
-    SheepdogReqCo srco = {
-        .sockfd = sockfd,
-        .aio_context = bs ? bdrv_get_aio_context(bs) : qemu_get_aio_context(),
-        .bs = bs,
-        .hdr = hdr,
-        .data = data,
-        .wlen = wlen,
-        .rlen = rlen,
-        .ret = 0,
-        .finished = false,
-    };
-
-    if (qemu_in_coroutine()) {
-        do_co_req(&srco);
-    } else {
-        co = qemu_coroutine_create(do_co_req, &srco);
-        if (bs) {
-            bdrv_coroutine_enter(bs, co);
-            BDRV_POLL_WHILE(bs, !srco.finished);
-        } else {
-            qemu_coroutine_enter(co);
-            while (!srco.finished) {
-                aio_poll(qemu_get_aio_context(), true);
-            }
-        }
-    }
-
-    return srco.ret;
-}
-
-static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                                         struct iovec *iov, int niov,
-                                         enum AIOCBState aiocb_type);
-static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
-static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag);
-static int get_sheep_fd(BDRVSheepdogState *s, Error **errp);
-static void co_write_request(void *opaque);
-
-static coroutine_fn void reconnect_to_sdog(void *opaque)
-{
-    BDRVSheepdogState *s = opaque;
-    AIOReq *aio_req, *next;
-
-    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
-                       NULL, NULL, NULL);
-    close(s->fd);
-    s->fd = -1;
-
-    /* Wait for outstanding write requests to be completed. */
-    while (s->co_send != NULL) {
-        co_write_request(opaque);
-    }
-
-    /* Try to reconnect the sheepdog server every one second. */
-    while (s->fd < 0) {
-        Error *local_err = NULL;
-        s->fd = get_sheep_fd(s, &local_err);
-        if (s->fd < 0) {
-            trace_sheepdog_reconnect_to_sdog();
-            error_report_err(local_err);
-            qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, NANOSECONDS_PER_SECOND);
-        }
-    };
-
-    /*
-     * Now we have to resend all the request in the inflight queue.  However,
-     * resend_aioreq() can yield and newly created requests can be added to the
-     * inflight queue before the coroutine is resumed.  To avoid mixing them, we
-     * have to move all the inflight requests to the failed queue before
-     * resend_aioreq() is called.
-     */
-    qemu_co_mutex_lock(&s->queue_lock);
-    QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) {
-        QLIST_REMOVE(aio_req, aio_siblings);
-        QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings);
-    }
-
-    /* Resend all the failed aio requests. */
-    while (!QLIST_EMPTY(&s->failed_aio_head)) {
-        aio_req = QLIST_FIRST(&s->failed_aio_head);
-        QLIST_REMOVE(aio_req, aio_siblings);
-        qemu_co_mutex_unlock(&s->queue_lock);
-        resend_aioreq(s, aio_req);
-        qemu_co_mutex_lock(&s->queue_lock);
-    }
-    qemu_co_mutex_unlock(&s->queue_lock);
-}
-
-/*
- * Receive responses of the I/O requests.
- *
- * This function is registered as a fd handler, and called from the
- * main loop when s->fd is ready for reading responses.
- */
-static void coroutine_fn aio_read_response(void *opaque)
-{
-    SheepdogObjRsp rsp;
-    BDRVSheepdogState *s = opaque;
-    int fd = s->fd;
-    int ret;
-    AIOReq *aio_req = NULL;
-    SheepdogAIOCB *acb;
-    uint64_t idx;
-
-    /* read a header */
-    ret = qemu_co_recv(fd, &rsp, sizeof(rsp));
-    if (ret != sizeof(rsp)) {
-        error_report("failed to get the header, %s", strerror(errno));
-        goto err;
-    }
-
-    /* find the right aio_req from the inflight aio list */
-    QLIST_FOREACH(aio_req, &s->inflight_aio_head, aio_siblings) {
-        if (aio_req->id == rsp.id) {
-            break;
-        }
-    }
-    if (!aio_req) {
-        error_report("cannot find aio_req %x", rsp.id);
-        goto err;
-    }
-
-    acb = aio_req->aiocb;
-
-    switch (acb->aiocb_type) {
-    case AIOCB_WRITE_UDATA:
-        if (!is_data_obj(aio_req->oid)) {
-            break;
-        }
-        idx = data_oid_to_idx(aio_req->oid);
-
-        if (aio_req->create) {
-            /*
-             * If the object is newly created one, we need to update
-             * the vdi object (metadata object).  min_dirty_data_idx
-             * and max_dirty_data_idx are changed to include updated
-             * index between them.
-             */
-            if (rsp.result == SD_RES_SUCCESS) {
-                s->inode.data_vdi_id[idx] = s->inode.vdi_id;
-                acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx);
-                acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx);
-            }
-        }
-        break;
-    case AIOCB_READ_UDATA:
-        ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov,
-                            aio_req->iov_offset, rsp.data_length);
-        if (ret != rsp.data_length) {
-            error_report("failed to get the data, %s", strerror(errno));
-            goto err;
-        }
-        break;
-    case AIOCB_FLUSH_CACHE:
-        if (rsp.result == SD_RES_INVALID_PARMS) {
-            trace_sheepdog_aio_read_response();
-            s->cache_flags = SD_FLAG_CMD_DIRECT;
-            rsp.result = SD_RES_SUCCESS;
-        }
-        break;
-    case AIOCB_DISCARD_OBJ:
-        switch (rsp.result) {
-        case SD_RES_INVALID_PARMS:
-            error_report("server doesn't support discard command");
-            rsp.result = SD_RES_SUCCESS;
-            s->discard_supported = false;
-            break;
-        default:
-            break;
-        }
-    }
-
-    /* No more data for this aio_req (reload_inode below uses its own file
-     * descriptor handler which doesn't use co_recv).
-    */
-    s->co_recv = NULL;
-
-    qemu_co_mutex_lock(&s->queue_lock);
-    QLIST_REMOVE(aio_req, aio_siblings);
-    qemu_co_mutex_unlock(&s->queue_lock);
-
-    switch (rsp.result) {
-    case SD_RES_SUCCESS:
-        break;
-    case SD_RES_READONLY:
-        if (s->inode.vdi_id == oid_to_vid(aio_req->oid)) {
-            ret = reload_inode(s, 0, "");
-            if (ret < 0) {
-                goto err;
-            }
-        }
-        if (is_data_obj(aio_req->oid)) {
-            aio_req->oid = vid_to_data_oid(s->inode.vdi_id,
-                                           data_oid_to_idx(aio_req->oid));
-        } else {
-            aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id);
-        }
-        resend_aioreq(s, aio_req);
-        return;
-    default:
-        acb->ret = -EIO;
-        error_report("%s", sd_strerror(rsp.result));
-        break;
-    }
-
-    g_free(aio_req);
-
-    if (!--acb->nr_pending) {
-        /*
-         * We've finished all requests which belong to the AIOCB, so
-         * we can switch back to sd_co_readv/writev now.
-         */
-        aio_co_wake(acb->coroutine);
-    }
-
-    return;
-
-err:
-    reconnect_to_sdog(opaque);
-}
-
-static void co_read_response(void *opaque)
-{
-    BDRVSheepdogState *s = opaque;
-
-    if (!s->co_recv) {
-        s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
-    }
-
-    aio_co_enter(s->aio_context, s->co_recv);
-}
-
-static void co_write_request(void *opaque)
-{
-    BDRVSheepdogState *s = opaque;
-
-    aio_co_wake(s->co_send);
-}
-
-/*
- * Return a socket descriptor to read/write objects.
- *
- * We cannot use this descriptor for other operations because
- * the block driver may be on waiting response from the server.
- */
-static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
-{
-    int fd;
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    aio_set_fd_handler(s->aio_context, fd, false,
-                       co_read_response, NULL, NULL, s);
-    return fd;
-}
-
-/*
- * Parse numeric snapshot ID in @str
- * If @str can't be parsed as number, return false.
- * Else, if the number is zero or too large, set *@snapid to zero and
- * return true.
- * Else, set *@snapid to the number and return true.
- */
-static bool sd_parse_snapid(const char *str, uint32_t *snapid)
-{
-    unsigned long ul;
-    int ret;
-
-    ret = qemu_strtoul(str, NULL, 10, &ul);
-    if (ret == -ERANGE) {
-        ul = ret = 0;
-    }
-    if (ret) {
-        return false;
-    }
-    if (ul > UINT32_MAX) {
-        ul = 0;
-    }
-
-    *snapid = ul;
-    return true;
-}
-
-static bool sd_parse_snapid_or_tag(const char *str,
-                                   uint32_t *snapid, char tag[])
-{
-    if (!sd_parse_snapid(str, snapid)) {
-        *snapid = 0;
-        if (g_strlcpy(tag, str, SD_MAX_VDI_TAG_LEN) >= SD_MAX_VDI_TAG_LEN) {
-            return false;
-        }
-    } else if (!*snapid) {
-        return false;
-    } else {
-        tag[0] = 0;
-    }
-    return true;
-}
-
-typedef struct {
-    const char *path;           /* non-null iff transport is tcp */
-    const char *host;           /* valid when transport is tcp */
-    int port;                   /* valid when transport is tcp */
-    char vdi[SD_MAX_VDI_LEN];
-    char tag[SD_MAX_VDI_TAG_LEN];
-    uint32_t snap_id;
-    /* Remainder is only for sd_config_done() */
-    URI *uri;
-    QueryParams *qp;
-} SheepdogConfig;
-
-static void sd_config_done(SheepdogConfig *cfg)
-{
-    if (cfg->qp) {
-        query_params_free(cfg->qp);
-    }
-    uri_free(cfg->uri);
-}
-
-static void sd_parse_uri(SheepdogConfig *cfg, const char *filename,
-                         Error **errp)
-{
-    Error *err = NULL;
-    QueryParams *qp = NULL;
-    bool is_unix;
-    URI *uri;
-
-    memset(cfg, 0, sizeof(*cfg));
-
-    cfg->uri = uri = uri_parse(filename);
-    if (!uri) {
-        error_setg(&err, "invalid URI '%s'", filename);
-        goto out;
-    }
-
-    /* transport */
-    if (!g_strcmp0(uri->scheme, "sheepdog")) {
-        is_unix = false;
-    } else if (!g_strcmp0(uri->scheme, "sheepdog+tcp")) {
-        is_unix = false;
-    } else if (!g_strcmp0(uri->scheme, "sheepdog+unix")) {
-        is_unix = true;
-    } else {
-        error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp',"
-                   " or 'sheepdog+unix'");
-        goto out;
-    }
-
-    if (uri->path == NULL || !strcmp(uri->path, "/")) {
-        error_setg(&err, "missing file path in URI");
-        goto out;
-    }
-    if (g_strlcpy(cfg->vdi, uri->path + 1, SD_MAX_VDI_LEN)
-        >= SD_MAX_VDI_LEN) {
-        error_setg(&err, "VDI name is too long");
-        goto out;
-    }
-
-    cfg->qp = qp = query_params_parse(uri->query);
-
-    if (is_unix) {
-        /* sheepdog+unix:///vdiname?socket=path */
-        if (uri->server || uri->port) {
-            error_setg(&err, "URI scheme %s doesn't accept a server address",
-                       uri->scheme);
-            goto out;
-        }
-        if (!qp->n) {
-            error_setg(&err,
-                       "URI scheme %s requires query parameter 'socket'",
-                       uri->scheme);
-            goto out;
-        }
-        if (qp->n != 1 || strcmp(qp->p[0].name, "socket")) {
-            error_setg(&err, "unexpected query parameters");
-            goto out;
-        }
-        cfg->path = qp->p[0].value;
-    } else {
-        /* sheepdog[+tcp]://[host:port]/vdiname */
-        if (qp->n) {
-            error_setg(&err, "unexpected query parameters");
-            goto out;
-        }
-        cfg->host = uri->server;
-        cfg->port = uri->port;
-    }
-
-    /* snapshot tag */
-    if (uri->fragment) {
-        if (!sd_parse_snapid_or_tag(uri->fragment,
-                                    &cfg->snap_id, cfg->tag)) {
-            error_setg(&err, "'%s' is not a valid snapshot ID",
-                       uri->fragment);
-            goto out;
-        }
-    } else {
-        cfg->snap_id = CURRENT_VDI_ID; /* search current vdi */
-    }
-
-out:
-    if (err) {
-        error_propagate(errp, err);
-        sd_config_done(cfg);
-    }
-}
-
-/*
- * Parse a filename (old syntax)
- *
- * filename must be one of the following formats:
- *   1. [vdiname]
- *   2. [vdiname]:[snapid]
- *   3. [vdiname]:[tag]
- *   4. [hostname]:[port]:[vdiname]
- *   5. [hostname]:[port]:[vdiname]:[snapid]
- *   6. [hostname]:[port]:[vdiname]:[tag]
- *
- * You can boot from the snapshot images by specifying `snapid` or
- * `tag'.
- *
- * You can run VMs outside the Sheepdog cluster by specifying
- * `hostname' and `port' (experimental).
- */
-static void parse_vdiname(SheepdogConfig *cfg, const char *filename,
-                          Error **errp)
-{
-    Error *err = NULL;
-    char *p, *q, *uri;
-    const char *host_spec, *vdi_spec;
-    int nr_sep;
-
-    strstart(filename, "sheepdog:", &filename);
-    p = q = g_strdup(filename);
-
-    /* count the number of separators */
-    nr_sep = 0;
-    while (*p) {
-        if (*p == ':') {
-            nr_sep++;
-        }
-        p++;
-    }
-    p = q;
-
-    /* use the first two tokens as host_spec. */
-    if (nr_sep >= 2) {
-        host_spec = p;
-        p = strchr(p, ':');
-        p++;
-        p = strchr(p, ':');
-        *p++ = '\0';
-    } else {
-        host_spec = "";
-    }
-
-    vdi_spec = p;
-
-    p = strchr(vdi_spec, ':');
-    if (p) {
-        *p++ = '#';
-    }
-
-    uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec);
-
-    /*
-     * FIXME We to escape URI meta-characters, e.g. "x?y=z"
-     * produces "sheepdog://x?y=z".  Because of that ...
-     */
-    sd_parse_uri(cfg, uri, &err);
-    if (err) {
-        /*
-         * ... this can fail, but the error message is misleading.
-         * Replace it by the traditional useless one until the
-         * escaping is fixed.
-         */
-        error_free(err);
-        error_setg(errp, "Can't parse filename");
-    }
-
-    g_free(q);
-    g_free(uri);
-}
-
-static void sd_parse_filename(const char *filename, QDict *options,
-                              Error **errp)
-{
-    Error *err = NULL;
-    SheepdogConfig cfg;
-    char buf[32];
-
-    if (strstr(filename, "://")) {
-        sd_parse_uri(&cfg, filename, &err);
-    } else {
-        parse_vdiname(&cfg, filename, &err);
-    }
-    if (err) {
-        error_propagate(errp, err);
-        return;
-    }
-
-    if (cfg.path) {
-        qdict_set_default_str(options, "server.path", cfg.path);
-        qdict_set_default_str(options, "server.type", "unix");
-    } else {
-        qdict_set_default_str(options, "server.type", "inet");
-        qdict_set_default_str(options, "server.host",
-                              cfg.host ?: SD_DEFAULT_ADDR);
-        snprintf(buf, sizeof(buf), "%d", cfg.port ?: SD_DEFAULT_PORT);
-        qdict_set_default_str(options, "server.port", buf);
-    }
-    qdict_set_default_str(options, "vdi", cfg.vdi);
-    qdict_set_default_str(options, "tag", cfg.tag);
-    if (cfg.snap_id) {
-        snprintf(buf, sizeof(buf), "%d", cfg.snap_id);
-        qdict_set_default_str(options, "snap-id", buf);
-    }
-
-    sd_config_done(&cfg);
-}
-
-static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
-                         uint32_t snapid, const char *tag, uint32_t *vid,
-                         bool lock, Error **errp)
-{
-    int ret, fd;
-    SheepdogVdiReq hdr;
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    unsigned int wlen, rlen = 0;
-    char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN] QEMU_NONSTRING;
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    /* This pair of strncpy calls ensures that the buffer is zero-filled,
-     * which is desirable since we'll soon be sending those bytes, and
-     * don't want the send_req to read uninitialized data.
-     */
-    strncpy(buf, filename, SD_MAX_VDI_LEN);
-    strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN);
-
-    memset(&hdr, 0, sizeof(hdr));
-    if (lock) {
-        hdr.opcode = SD_OP_LOCK_VDI;
-        hdr.type = LOCK_TYPE_NORMAL;
-    } else {
-        hdr.opcode = SD_OP_GET_VDI_INFO;
-    }
-    wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN;
-    hdr.proto_ver = SD_PROTO_VER;
-    hdr.data_length = wlen;
-    hdr.snapid = snapid;
-    hdr.flags = SD_FLAG_CMD_WRITE;
-
-    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
-    if (ret) {
-        error_setg_errno(errp, -ret, "cannot get vdi info");
-        goto out;
-    }
-
-    if (rsp->result != SD_RES_SUCCESS) {
-        error_setg(errp, "cannot get vdi info, %s, %s %" PRIu32 " %s",
-                   sd_strerror(rsp->result), filename, snapid, tag);
-        if (rsp->result == SD_RES_NO_VDI) {
-            ret = -ENOENT;
-        } else if (rsp->result == SD_RES_VDI_LOCKED) {
-            ret = -EBUSY;
-        } else {
-            ret = -EIO;
-        }
-        goto out;
-    }
-    *vid = rsp->vdi_id;
-
-    ret = 0;
-out:
-    closesocket(fd);
-    return ret;
-}
-
-static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                                         struct iovec *iov, int niov,
-                                         enum AIOCBState aiocb_type)
-{
-    int nr_copies = s->inode.nr_copies;
-    SheepdogObjReq hdr;
-    unsigned int wlen = 0;
-    int ret;
-    uint64_t oid = aio_req->oid;
-    unsigned int datalen = aio_req->data_len;
-    uint64_t offset = aio_req->offset;
-    uint8_t flags = aio_req->flags;
-    uint64_t old_oid = aio_req->base_oid;
-    bool create = aio_req->create;
-
-    qemu_co_mutex_lock(&s->queue_lock);
-    QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-    qemu_co_mutex_unlock(&s->queue_lock);
-
-    if (!nr_copies) {
-        error_report("bug");
-    }
-
-    memset(&hdr, 0, sizeof(hdr));
-
-    switch (aiocb_type) {
-    case AIOCB_FLUSH_CACHE:
-        hdr.opcode = SD_OP_FLUSH_VDI;
-        break;
-    case AIOCB_READ_UDATA:
-        hdr.opcode = SD_OP_READ_OBJ;
-        hdr.flags = flags;
-        break;
-    case AIOCB_WRITE_UDATA:
-        if (create) {
-            hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
-        } else {
-            hdr.opcode = SD_OP_WRITE_OBJ;
-        }
-        wlen = datalen;
-        hdr.flags = SD_FLAG_CMD_WRITE | flags;
-        break;
-    case AIOCB_DISCARD_OBJ:
-        hdr.opcode = SD_OP_WRITE_OBJ;
-        hdr.flags = SD_FLAG_CMD_WRITE | flags;
-        s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0;
-        offset = offsetof(SheepdogInode,
-                          data_vdi_id[data_oid_to_idx(oid)]);
-        oid = vid_to_vdi_oid(s->inode.vdi_id);
-        wlen = datalen = sizeof(uint32_t);
-        break;
-    }
-
-    if (s->cache_flags) {
-        hdr.flags |= s->cache_flags;
-    }
-
-    hdr.oid = oid;
-    hdr.cow_oid = old_oid;
-    hdr.copies = s->inode.nr_copies;
-
-    hdr.data_length = datalen;
-    hdr.offset = offset;
-
-    hdr.id = aio_req->id;
-
-    qemu_co_mutex_lock(&s->lock);
-    s->co_send = qemu_coroutine_self();
-    aio_set_fd_handler(s->aio_context, s->fd, false,
-                       co_read_response, co_write_request, NULL, s);
-    socket_set_cork(s->fd, 1);
-
-    /* send a header */
-    ret = qemu_co_send(s->fd, &hdr, sizeof(hdr));
-    if (ret != sizeof(hdr)) {
-        error_report("failed to send a req, %s", strerror(errno));
-        goto out;
-    }
-
-    if (wlen) {
-        ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen);
-        if (ret != wlen) {
-            error_report("failed to send a data, %s", strerror(errno));
-        }
-    }
-out:
-    socket_set_cork(s->fd, 0);
-    aio_set_fd_handler(s->aio_context, s->fd, false,
-                       co_read_response, NULL, NULL, s);
-    s->co_send = NULL;
-    qemu_co_mutex_unlock(&s->lock);
-}
-
-static int read_write_object(int fd, BlockDriverState *bs, char *buf,
-                             uint64_t oid, uint8_t copies,
-                             unsigned int datalen, uint64_t offset,
-                             bool write, bool create, uint32_t cache_flags)
-{
-    SheepdogObjReq hdr;
-    SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
-    unsigned int wlen, rlen;
-    int ret;
-
-    memset(&hdr, 0, sizeof(hdr));
-
-    if (write) {
-        wlen = datalen;
-        rlen = 0;
-        hdr.flags = SD_FLAG_CMD_WRITE;
-        if (create) {
-            hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
-        } else {
-            hdr.opcode = SD_OP_WRITE_OBJ;
-        }
-    } else {
-        wlen = 0;
-        rlen = datalen;
-        hdr.opcode = SD_OP_READ_OBJ;
-    }
-
-    hdr.flags |= cache_flags;
-
-    hdr.oid = oid;
-    hdr.data_length = datalen;
-    hdr.offset = offset;
-    hdr.copies = copies;
-
-    ret = do_req(fd, bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
-    if (ret) {
-        error_report("failed to send a request to the sheep");
-        return ret;
-    }
-
-    switch (rsp->result) {
-    case SD_RES_SUCCESS:
-        return 0;
-    default:
-        error_report("%s", sd_strerror(rsp->result));
-        return -EIO;
-    }
-}
-
-static int read_object(int fd, BlockDriverState *bs, char *buf,
-                       uint64_t oid, uint8_t copies,
-                       unsigned int datalen, uint64_t offset,
-                       uint32_t cache_flags)
-{
-    return read_write_object(fd, bs, buf, oid, copies,
-                             datalen, offset, false,
-                             false, cache_flags);
-}
-
-static int write_object(int fd, BlockDriverState *bs, char *buf,
-                        uint64_t oid, uint8_t copies,
-                        unsigned int datalen, uint64_t offset, bool create,
-                        uint32_t cache_flags)
-{
-    return read_write_object(fd, bs, buf, oid, copies,
-                             datalen, offset, true,
-                             create, cache_flags);
-}
-
-/* update inode with the latest state */
-static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
-{
-    Error *local_err = NULL;
-    SheepdogInode *inode;
-    int ret = 0, fd;
-    uint32_t vid = 0;
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return -EIO;
-    }
-
-    inode = g_malloc(SD_INODE_HEADER_SIZE);
-
-    ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err);
-    if (ret) {
-        error_report_err(local_err);
-        goto out;
-    }
-
-    ret = read_object(fd, s->bs, (char *)inode, vid_to_vdi_oid(vid),
-                      s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0,
-                      s->cache_flags);
-    if (ret < 0) {
-        goto out;
-    }
-
-    if (inode->vdi_id != s->inode.vdi_id) {
-        memcpy(&s->inode, inode, SD_INODE_HEADER_SIZE);
-    }
-
-out:
-    g_free(inode);
-    closesocket(fd);
-
-    return ret;
-}
-
-static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
-{
-    SheepdogAIOCB *acb = aio_req->aiocb;
-
-    aio_req->create = false;
-
-    /* check whether this request becomes a CoW one */
-    if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) {
-        int idx = data_oid_to_idx(aio_req->oid);
-
-        if (is_data_obj_writable(&s->inode, idx)) {
-            goto out;
-        }
-
-        if (s->inode.data_vdi_id[idx]) {
-            aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
-            aio_req->flags |= SD_FLAG_CMD_COW;
-        }
-        aio_req->create = true;
-    }
-out:
-    if (is_data_obj(aio_req->oid)) {
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
-                        acb->aiocb_type);
-    } else {
-        struct iovec iov;
-        iov.iov_base = &s->inode;
-        iov.iov_len = sizeof(s->inode);
-        add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
-    }
-}
-
-static void sd_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-
-    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
-                       NULL, NULL, NULL);
-}
-
-static void sd_attach_aio_context(BlockDriverState *bs,
-                                  AioContext *new_context)
-{
-    BDRVSheepdogState *s = bs->opaque;
-
-    s->aio_context = new_context;
-    aio_set_fd_handler(new_context, s->fd, false,
-                       co_read_response, NULL, NULL, s);
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "sheepdog",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "vdi",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "snap-id",
-            .type = QEMU_OPT_NUMBER,
-        },
-        {
-            .name = "tag",
-            .type = QEMU_OPT_STRING,
-        },
-        { /* end of list */ }
-    },
-};
-
-static int sd_open(BlockDriverState *bs, QDict *options, int flags,
-                   Error **errp)
-{
-    int ret, fd;
-    uint32_t vid = 0;
-    BDRVSheepdogState *s = bs->opaque;
-    const char *vdi, *snap_id_str, *tag;
-    uint64_t snap_id;
-    char *buf = NULL;
-    QemuOpts *opts;
-
-    deprecation_warning();
-
-    s->bs = bs;
-    s->aio_context = bdrv_get_aio_context(bs);
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
-    s->addr = sd_server_config(options, errp);
-    if (!s->addr) {
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
-    vdi = qemu_opt_get(opts, "vdi");
-    snap_id_str = qemu_opt_get(opts, "snap-id");
-    snap_id = qemu_opt_get_number(opts, "snap-id", CURRENT_VDI_ID);
-    tag = qemu_opt_get(opts, "tag");
-
-    if (!vdi) {
-        error_setg(errp, "parameter 'vdi' is missing");
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-    if (strlen(vdi) >= SD_MAX_VDI_LEN) {
-        error_setg(errp, "value of parameter 'vdi' is too long");
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
-    if (snap_id > UINT32_MAX) {
-        snap_id = 0;
-    }
-    if (snap_id_str && !snap_id) {
-        error_setg(errp, "'snap-id=%s' is not a valid snapshot ID",
-                   snap_id_str);
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
-    if (!tag) {
-        tag = "";
-    }
-    if (strlen(tag) >= SD_MAX_VDI_TAG_LEN) {
-        error_setg(errp, "value of parameter 'tag' is too long");
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
-    QLIST_INIT(&s->inflight_aio_head);
-    QLIST_INIT(&s->failed_aio_head);
-    QLIST_INIT(&s->inflight_aiocb_head);
-
-    s->fd = get_sheep_fd(s, errp);
-    if (s->fd < 0) {
-        ret = s->fd;
-        goto err_no_fd;
-    }
-
-    ret = find_vdi_name(s, vdi, (uint32_t)snap_id, tag, &vid, true, errp);
-    if (ret) {
-        goto err;
-    }
-
-    /*
-     * QEMU block layer emulates writethrough cache as 'writeback + flush', so
-     * we always set SD_FLAG_CMD_CACHE (writeback cache) as default.
-     */
-    s->cache_flags = SD_FLAG_CMD_CACHE;
-    if (flags & BDRV_O_NOCACHE) {
-        s->cache_flags = SD_FLAG_CMD_DIRECT;
-    }
-    s->discard_supported = true;
-
-    if (snap_id || tag[0]) {
-        trace_sheepdog_open(vid);
-        s->is_snapshot = true;
-    }
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        ret = fd;
-        goto err;
-    }
-
-    buf = g_malloc(SD_INODE_SIZE);
-    ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid),
-                      0, SD_INODE_SIZE, 0, s->cache_flags);
-
-    closesocket(fd);
-
-    if (ret) {
-        error_setg(errp, "Can't read snapshot inode");
-        goto err;
-    }
-
-    memcpy(&s->inode, buf, sizeof(s->inode));
-
-    bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
-    bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
-    pstrcpy(s->name, sizeof(s->name), vdi);
-    qemu_co_mutex_init(&s->lock);
-    qemu_co_mutex_init(&s->queue_lock);
-    qemu_co_queue_init(&s->overlapping_queue);
-    qemu_opts_del(opts);
-    g_free(buf);
-    return 0;
-
-err:
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
-                       false, NULL, NULL, NULL, NULL);
-    closesocket(s->fd);
-err_no_fd:
-    qemu_opts_del(opts);
-    g_free(buf);
-    return ret;
-}
-
-static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue,
-                             Error **errp)
-{
-    BDRVSheepdogState *s = state->bs->opaque;
-    BDRVSheepdogReopenState *re_s;
-    int ret = 0;
-
-    re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1);
-
-    re_s->cache_flags = SD_FLAG_CMD_CACHE;
-    if (state->flags & BDRV_O_NOCACHE) {
-        re_s->cache_flags = SD_FLAG_CMD_DIRECT;
-    }
-
-    re_s->fd = get_sheep_fd(s, errp);
-    if (re_s->fd < 0) {
-        ret = re_s->fd;
-        return ret;
-    }
-
-    return ret;
-}
-
-static void sd_reopen_commit(BDRVReopenState *state)
-{
-    BDRVSheepdogReopenState *re_s = state->opaque;
-    BDRVSheepdogState *s = state->bs->opaque;
-
-    if (s->fd) {
-        aio_set_fd_handler(s->aio_context, s->fd, false,
-                           NULL, NULL, NULL, NULL);
-        closesocket(s->fd);
-    }
-
-    s->fd = re_s->fd;
-    s->cache_flags = re_s->cache_flags;
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-static void sd_reopen_abort(BDRVReopenState *state)
-{
-    BDRVSheepdogReopenState *re_s = state->opaque;
-    BDRVSheepdogState *s = state->bs->opaque;
-
-    if (re_s == NULL) {
-        return;
-    }
-
-    if (re_s->fd) {
-        aio_set_fd_handler(s->aio_context, re_s->fd, false,
-                           NULL, NULL, NULL, NULL);
-        closesocket(re_s->fd);
-    }
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
-                        Error **errp)
-{
-    SheepdogVdiReq hdr;
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    int fd, ret;
-    unsigned int wlen, rlen = 0;
-    char buf[SD_MAX_VDI_LEN];
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    /* FIXME: would it be better to fail (e.g., return -EIO) when filename
-     * does not fit in buf?  For now, just truncate and avoid buffer overrun.
-     */
-    memset(buf, 0, sizeof(buf));
-    pstrcpy(buf, sizeof(buf), s->name);
-
-    memset(&hdr, 0, sizeof(hdr));
-    hdr.opcode = SD_OP_NEW_VDI;
-    hdr.base_vdi_id = s->inode.vdi_id;
-
-    wlen = SD_MAX_VDI_LEN;
-
-    hdr.flags = SD_FLAG_CMD_WRITE;
-    hdr.snapid = snapshot;
-
-    hdr.data_length = wlen;
-    hdr.vdi_size = s->inode.vdi_size;
-    hdr.copy_policy = s->inode.copy_policy;
-    hdr.copies = s->inode.nr_copies;
-    hdr.block_size_shift = s->inode.block_size_shift;
-
-    ret = do_req(fd, NULL, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
-
-    closesocket(fd);
-
-    if (ret) {
-        error_setg_errno(errp, -ret, "create failed");
-        return ret;
-    }
-
-    if (rsp->result != SD_RES_SUCCESS) {
-        error_setg(errp, "%s, %s", sd_strerror(rsp->result), s->inode.name);
-        return -EIO;
-    }
-
-    if (vdi_id) {
-        *vdi_id = rsp->vdi_id;
-    }
-
-    return 0;
-}
-
-static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size,
-                       Error **errp)
-{
-    BlockBackend *blk = NULL;
-    BDRVSheepdogState *base = bs->opaque;
-    unsigned long buf_size;
-    uint32_t idx, max_idx;
-    uint32_t object_size;
-    void *buf = NULL;
-    int ret;
-
-    blk = blk_new_with_bs(bs,
-                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
-                          BLK_PERM_ALL, errp);
-
-    if (!blk) {
-        ret = -EPERM;
-        goto out_with_err_set;
-    }
-
-    blk_set_allow_write_beyond_eof(blk, true);
-
-    object_size = (UINT32_C(1) << base->inode.block_size_shift);
-    buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
-    buf = g_malloc0(buf_size);
-
-    max_idx = DIV_ROUND_UP(new_size, buf_size);
-
-    for (idx = old_size / buf_size; idx < max_idx; idx++) {
-        /*
-         * The created image can be a cloned image, so we need to read
-         * a data from the source image.
-         */
-        ret = blk_pread(blk, idx * buf_size, buf, buf_size);
-        if (ret < 0) {
-            goto out;
-        }
-        ret = blk_pwrite(blk, idx * buf_size, buf, buf_size, 0);
-        if (ret < 0) {
-            goto out;
-        }
-    }
-
-    ret = 0;
-out:
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Can't pre-allocate");
-    }
-out_with_err_set:
-    blk_unref(blk);
-    g_free(buf);
-
-    return ret;
-}
-
-static int sd_create_prealloc(BlockdevOptionsSheepdog *location, int64_t size,
-                              Error **errp)
-{
-    BlockDriverState *bs;
-    Visitor *v;
-    QObject *obj = NULL;
-    QDict *qdict;
-    int ret;
-
-    v = qobject_output_visitor_new(&obj);
-    visit_type_BlockdevOptionsSheepdog(v, NULL, &location, &error_abort);
-    visit_free(v);
-
-    qdict = qobject_to(QDict, obj);
-    qdict_flatten(qdict);
-
-    qdict_put_str(qdict, "driver", "sheepdog");
-
-    bs = bdrv_open(NULL, NULL, qdict, BDRV_O_PROTOCOL | BDRV_O_RDWR, errp);
-    if (bs == NULL) {
-        ret = -EIO;
-        goto fail;
-    }
-
-    ret = sd_prealloc(bs, 0, size, errp);
-fail:
-    bdrv_unref(bs);
-    qobject_unref(qdict);
-    return ret;
-}
-
-static int parse_redundancy(BDRVSheepdogState *s, SheepdogRedundancy *opt)
-{
-    struct SheepdogInode *inode = &s->inode;
-
-    switch (opt->type) {
-    case SHEEPDOG_REDUNDANCY_TYPE_FULL:
-        if (opt->u.full.copies > SD_MAX_COPIES || opt->u.full.copies < 1) {
-            return -EINVAL;
-        }
-        inode->copy_policy = 0;
-        inode->nr_copies = opt->u.full.copies;
-        return 0;
-
-    case SHEEPDOG_REDUNDANCY_TYPE_ERASURE_CODED:
-    {
-        int64_t copy = opt->u.erasure_coded.data_strips;
-        int64_t parity = opt->u.erasure_coded.parity_strips;
-
-        if (copy != 2 && copy != 4 && copy != 8 && copy != 16) {
-            return -EINVAL;
-        }
-
-        if (parity >= SD_EC_MAX_STRIP || parity < 1) {
-            return -EINVAL;
-        }
-
-        /*
-         * 4 bits for parity and 4 bits for data.
-         * We have to compress upper data bits because it can't represent 16
-         */
-        inode->copy_policy = ((copy / 2) << 4) + parity;
-        inode->nr_copies = copy + parity;
-        return 0;
-    }
-
-    default:
-        g_assert_not_reached();
-    }
-
-    return -EINVAL;
-}
-
-/*
- * Sheepdog support two kinds of redundancy, full replication and erasure
- * coding.
- *
- * # create a fully replicated vdi with x copies
- * -o redundancy=x (1 <= x <= SD_MAX_COPIES)
- *
- * # create a erasure coded vdi with x data strips and y parity strips
- * -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP)
- */
-static SheepdogRedundancy *parse_redundancy_str(const char *opt)
-{
-    SheepdogRedundancy *redundancy;
-    const char *n1, *n2;
-    long copy, parity;
-    char p[10];
-    int ret;
-
-    pstrcpy(p, sizeof(p), opt);
-    n1 = strtok(p, ":");
-    n2 = strtok(NULL, ":");
-
-    if (!n1) {
-        return NULL;
-    }
-
-    ret = qemu_strtol(n1, NULL, 10, &copy);
-    if (ret < 0) {
-        return NULL;
-    }
-
-    redundancy = g_new0(SheepdogRedundancy, 1);
-    if (!n2) {
-        *redundancy = (SheepdogRedundancy) {
-            .type               = SHEEPDOG_REDUNDANCY_TYPE_FULL,
-            .u.full.copies      = copy,
-        };
-    } else {
-        ret = qemu_strtol(n2, NULL, 10, &parity);
-        if (ret < 0) {
-            g_free(redundancy);
-            return NULL;
-        }
-
-        *redundancy = (SheepdogRedundancy) {
-            .type               = SHEEPDOG_REDUNDANCY_TYPE_ERASURE_CODED,
-            .u.erasure_coded    = {
-                .data_strips    = copy,
-                .parity_strips  = parity,
-            },
-        };
-    }
-
-    return redundancy;
-}
-
-static int parse_block_size_shift(BDRVSheepdogState *s,
-                                  BlockdevCreateOptionsSheepdog *opts)
-{
-    struct SheepdogInode *inode = &s->inode;
-    uint64_t object_size;
-    int obj_order;
-
-    if (opts->has_object_size) {
-        object_size = opts->object_size;
-
-        if ((object_size - 1) & object_size) {    /* not a power of 2? */
-            return -EINVAL;
-        }
-        obj_order = ctz32(object_size);
-        if (obj_order < 20 || obj_order > 31) {
-            return -EINVAL;
-        }
-        inode->block_size_shift = (uint8_t)obj_order;
-    }
-
-    return 0;
-}
-
-static int sd_co_create(BlockdevCreateOptions *options, Error **errp)
-{
-    BlockdevCreateOptionsSheepdog *opts = &options->u.sheepdog;
-    int ret = 0;
-    uint32_t vid = 0;
-    char *backing_file = NULL;
-    char *buf = NULL;
-    BDRVSheepdogState *s;
-    uint64_t max_vdi_size;
-    bool prealloc = false;
-
-    assert(options->driver == BLOCKDEV_DRIVER_SHEEPDOG);
-
-    deprecation_warning();
-
-    s = g_new0(BDRVSheepdogState, 1);
-
-    /* Steal SocketAddress from QAPI, set NULL to prevent double free */
-    s->addr = opts->location->server;
-    opts->location->server = NULL;
-
-    if (strlen(opts->location->vdi) >= sizeof(s->name)) {
-        error_setg(errp, "'vdi' string too long");
-        ret = -EINVAL;
-        goto out;
-    }
-    pstrcpy(s->name, sizeof(s->name), opts->location->vdi);
-
-    s->inode.vdi_size = opts->size;
-    backing_file = opts->backing_file;
-
-    if (!opts->has_preallocation) {
-        opts->preallocation = PREALLOC_MODE_OFF;
-    }
-    switch (opts->preallocation) {
-    case PREALLOC_MODE_OFF:
-        prealloc = false;
-        break;
-    case PREALLOC_MODE_FULL:
-        prealloc = true;
-        break;
-    default:
-        error_setg(errp, "Preallocation mode not supported for Sheepdog");
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (opts->has_redundancy) {
-        ret = parse_redundancy(s, opts->redundancy);
-        if (ret < 0) {
-            error_setg(errp, "Invalid redundancy mode");
-            goto out;
-        }
-    }
-    ret = parse_block_size_shift(s, opts);
-    if (ret < 0) {
-        error_setg(errp, "Invalid object_size."
-                         " obect_size needs to be power of 2"
-                         " and be limited from 2^20 to 2^31");
-        goto out;
-    }
-
-    if (opts->has_backing_file) {
-        BlockBackend *blk;
-        BDRVSheepdogState *base;
-        BlockDriver *drv;
-
-        /* Currently, only Sheepdog backing image is supported. */
-        drv = bdrv_find_protocol(opts->backing_file, true, NULL);
-        if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) {
-            error_setg(errp, "backing_file must be a sheepdog image");
-            ret = -EINVAL;
-            goto out;
-        }
-
-        blk = blk_new_open(opts->backing_file, NULL, NULL,
-                           BDRV_O_PROTOCOL, errp);
-        if (blk == NULL) {
-            ret = -EIO;
-            goto out;
-        }
-
-        base = blk_bs(blk)->opaque;
-
-        if (!is_snapshot(&base->inode)) {
-            error_setg(errp, "cannot clone from a non snapshot vdi");
-            blk_unref(blk);
-            ret = -EINVAL;
-            goto out;
-        }
-        s->inode.vdi_id = base->inode.vdi_id;
-        blk_unref(blk);
-    }
-
-    s->aio_context = qemu_get_aio_context();
-
-    /* if block_size_shift is not specified, get cluster default value */
-    if (s->inode.block_size_shift == 0) {
-        SheepdogVdiReq hdr;
-        SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr;
-        int fd;
-        unsigned int wlen = 0, rlen = 0;
-
-        fd = connect_to_sdog(s, errp);
-        if (fd < 0) {
-            ret = fd;
-            goto out;
-        }
-
-        memset(&hdr, 0, sizeof(hdr));
-        hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT;
-        hdr.proto_ver = SD_PROTO_VER;
-
-        ret = do_req(fd, NULL, (SheepdogReq *)&hdr,
-                     NULL, &wlen, &rlen);
-        closesocket(fd);
-        if (ret) {
-            error_setg_errno(errp, -ret, "failed to get cluster default");
-            goto out;
-        }
-        if (rsp->result == SD_RES_SUCCESS) {
-            s->inode.block_size_shift = rsp->block_size_shift;
-        } else {
-            s->inode.block_size_shift = SD_DEFAULT_BLOCK_SIZE_SHIFT;
-        }
-    }
-
-    max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
-
-    if (s->inode.vdi_size > max_vdi_size) {
-        error_setg(errp, "An image is too large."
-                         " The maximum image size is %"PRIu64 "GB",
-                         max_vdi_size / 1024 / 1024 / 1024);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    ret = do_sd_create(s, &vid, 0, errp);
-    if (ret) {
-        goto out;
-    }
-
-    if (prealloc) {
-        ret = sd_create_prealloc(opts->location, opts->size, errp);
-    }
-out:
-    g_free(backing_file);
-    g_free(buf);
-    g_free(s->addr);
-    g_free(s);
-    return ret;
-}
-
-static int coroutine_fn sd_co_create_opts(BlockDriver *drv,
-                                          const char *filename,
-                                          QemuOpts *opts,
-                                          Error **errp)
-{
-    BlockdevCreateOptions *create_options = NULL;
-    QDict *qdict = NULL, *location_qdict;
-    Visitor *v;
-    char *redundancy = NULL;
-    Error *local_err = NULL;
-    int ret;
-    char *backing_fmt = NULL;
-
-    redundancy = qemu_opt_get_del(opts, BLOCK_OPT_REDUNDANCY);
-    backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
-
-    if (backing_fmt && strcmp(backing_fmt, "sheepdog") != 0) {
-        error_setg(errp, "backing_file must be a sheepdog image");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    qdict = qemu_opts_to_qdict(opts, NULL);
-    qdict_put_str(qdict, "driver", "sheepdog");
-
-    location_qdict = qdict_new();
-    qdict_put(qdict, "location", location_qdict);
-
-    sd_parse_filename(filename, location_qdict, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    qdict_flatten(qdict);
-
-    /* Change legacy command line options into QMP ones */
-    static const QDictRenames opt_renames[] = {
-        { BLOCK_OPT_BACKING_FILE,       "backing-file" },
-        { BLOCK_OPT_OBJECT_SIZE,        "object-size" },
-        { NULL, NULL },
-    };
-
-    if (!qdict_rename_keys(qdict, opt_renames, errp)) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* Get the QAPI object */
-    v = qobject_input_visitor_new_flat_confused(qdict, errp);
-    if (!v) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp);
-    visit_free(v);
-    if (!create_options) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    assert(create_options->driver == BLOCKDEV_DRIVER_SHEEPDOG);
-    create_options->u.sheepdog.size =
-        ROUND_UP(create_options->u.sheepdog.size, BDRV_SECTOR_SIZE);
-
-    if (redundancy) {
-        create_options->u.sheepdog.has_redundancy = true;
-        create_options->u.sheepdog.redundancy =
-            parse_redundancy_str(redundancy);
-        if (create_options->u.sheepdog.redundancy == NULL) {
-            error_setg(errp, "Invalid redundancy mode");
-            ret = -EINVAL;
-            goto fail;
-        }
-    }
-
-    ret = sd_co_create(create_options, errp);
-fail:
-    qapi_free_BlockdevCreateOptions(create_options);
-    qobject_unref(qdict);
-    g_free(redundancy);
-    g_free(backing_fmt);
-    return ret;
-}
-
-static void sd_close(BlockDriverState *bs)
-{
-    Error *local_err = NULL;
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogVdiReq hdr;
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    unsigned int wlen, rlen = 0;
-    int fd, ret;
-
-    trace_sheepdog_close(s->name);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return;
-    }
-
-    memset(&hdr, 0, sizeof(hdr));
-
-    hdr.opcode = SD_OP_RELEASE_VDI;
-    hdr.type = LOCK_TYPE_NORMAL;
-    hdr.base_vdi_id = s->inode.vdi_id;
-    wlen = strlen(s->name) + 1;
-    hdr.data_length = wlen;
-    hdr.flags = SD_FLAG_CMD_WRITE;
-
-    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
-                 s->name, &wlen, &rlen);
-
-    closesocket(fd);
-
-    if (!ret && rsp->result != SD_RES_SUCCESS &&
-        rsp->result != SD_RES_VDI_NOT_LOCKED) {
-        error_report("%s, %s", sd_strerror(rsp->result), s->name);
-    }
-
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
-                       false, NULL, NULL, NULL, NULL);
-    closesocket(s->fd);
-    qapi_free_SocketAddress(s->addr);
-}
-
-static int64_t sd_getlength(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-
-    return s->inode.vdi_size;
-}
-
-static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset,
-                                       bool exact, PreallocMode prealloc,
-                                       BdrvRequestFlags flags, Error **errp)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    int ret, fd;
-    unsigned int datalen;
-    uint64_t max_vdi_size;
-    int64_t old_size = s->inode.vdi_size;
-
-    if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_FULL) {
-        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
-        return -ENOTSUP;
-    }
-
-    max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
-    if (offset < old_size) {
-        error_setg(errp, "shrinking is not supported");
-        return -EINVAL;
-    } else if (offset > max_vdi_size) {
-        error_setg(errp, "too big image size");
-        return -EINVAL;
-    }
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    /* we don't need to update entire object */
-    datalen = SD_INODE_HEADER_SIZE;
-    s->inode.vdi_size = offset;
-    ret = write_object(fd, s->bs, (char *)&s->inode,
-                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
-                       datalen, 0, false, s->cache_flags);
-    close(fd);
-
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "failed to update an inode");
-        return ret;
-    }
-
-    if (prealloc == PREALLOC_MODE_FULL) {
-        ret = sd_prealloc(bs, old_size, offset, errp);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    return 0;
-}
-
-/*
- * This function is called after writing data objects.  If we need to
- * update metadata, this sends a write request to the vdi object.
- */
-static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
-{
-    BDRVSheepdogState *s = acb->s;
-    struct iovec iov;
-    AIOReq *aio_req;
-    uint32_t offset, data_len, mn, mx;
-
-    mn = acb->min_dirty_data_idx;
-    mx = acb->max_dirty_data_idx;
-    if (mn <= mx) {
-        /* we need to update the vdi object. */
-        ++acb->nr_pending;
-        offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
-            mn * sizeof(s->inode.data_vdi_id[0]);
-        data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
-
-        acb->min_dirty_data_idx = UINT32_MAX;
-        acb->max_dirty_data_idx = 0;
-
-        iov.iov_base = &s->inode;
-        iov.iov_len = sizeof(s->inode);
-        aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
-                                data_len, offset, 0, false, 0, offset);
-        add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
-        if (--acb->nr_pending) {
-            qemu_coroutine_yield();
-        }
-    }
-}
-
-/* Delete current working VDI on the snapshot chain */
-static bool sd_delete(BDRVSheepdogState *s)
-{
-    Error *local_err = NULL;
-    unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0;
-    SheepdogVdiReq hdr = {
-        .opcode = SD_OP_DEL_VDI,
-        .base_vdi_id = s->inode.vdi_id,
-        .data_length = wlen,
-        .flags = SD_FLAG_CMD_WRITE,
-    };
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    int fd, ret;
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return false;
-    }
-
-    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
-                 s->name, &wlen, &rlen);
-    closesocket(fd);
-    if (ret) {
-        return false;
-    }
-    switch (rsp->result) {
-    case SD_RES_NO_VDI:
-        error_report("%s was already deleted", s->name);
-        /* fall through */
-    case SD_RES_SUCCESS:
-        break;
-    default:
-        error_report("%s, %s", sd_strerror(rsp->result), s->name);
-        return false;
-    }
-
-    return true;
-}
-
-/*
- * Create a writable VDI from a snapshot
- */
-static int sd_create_branch(BDRVSheepdogState *s)
-{
-    Error *local_err = NULL;
-    int ret, fd;
-    uint32_t vid;
-    char *buf;
-    bool deleted;
-
-    trace_sheepdog_create_branch_snapshot(s->inode.vdi_id);
-
-    buf = g_malloc(SD_INODE_SIZE);
-
-    /*
-     * Even If deletion fails, we will just create extra snapshot based on
-     * the working VDI which was supposed to be deleted. So no need to
-     * false bail out.
-     */
-    deleted = sd_delete(s);
-    ret = do_sd_create(s, &vid, !deleted, &local_err);
-    if (ret) {
-        error_report_err(local_err);
-        goto out;
-    }
-
-    trace_sheepdog_create_branch_created(vid);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto out;
-    }
-
-    ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid),
-                      s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags);
-
-    closesocket(fd);
-
-    if (ret < 0) {
-        goto out;
-    }
-
-    memcpy(&s->inode, buf, sizeof(s->inode));
-
-    s->is_snapshot = false;
-    ret = 0;
-    trace_sheepdog_create_branch_new(s->inode.vdi_id);
-
-out:
-    g_free(buf);
-
-    return ret;
-}
-
-/*
- * Send I/O requests to the server.
- *
- * This function sends requests to the server, links the requests to
- * the inflight_list in BDRVSheepdogState, and exits without
- * waiting the response.  The responses are received in the
- * `aio_read_response' function which is called from the main loop as
- * a fd handler.
- *
- * Returns 1 when we need to wait a response, 0 when there is no sent
- * request and -errno in error cases.
- */
-static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
-{
-    int ret = 0;
-    unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE;
-    unsigned long idx;
-    uint32_t object_size;
-    uint64_t oid;
-    uint64_t offset;
-    BDRVSheepdogState *s = acb->s;
-    SheepdogInode *inode = &s->inode;
-    AIOReq *aio_req;
-
-    if (acb->aiocb_type == AIOCB_WRITE_UDATA && s->is_snapshot) {
-        /*
-         * In the case we open the snapshot VDI, Sheepdog creates the
-         * writable VDI when we do a write operation first.
-         */
-        ret = sd_create_branch(s);
-        if (ret) {
-            acb->ret = -EIO;
-            return;
-        }
-    }
-
-    object_size = (UINT32_C(1) << inode->block_size_shift);
-    idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size;
-    offset = (acb->sector_num * BDRV_SECTOR_SIZE) % object_size;
-
-    /*
-     * Make sure we don't free the aiocb before we are done with all requests.
-     * This additional reference is dropped at the end of this function.
-     */
-    acb->nr_pending++;
-
-    while (done != total) {
-        uint8_t flags = 0;
-        uint64_t old_oid = 0;
-        bool create = false;
-
-        oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
-
-        len = MIN(total - done, object_size - offset);
-
-        switch (acb->aiocb_type) {
-        case AIOCB_READ_UDATA:
-            if (!inode->data_vdi_id[idx]) {
-                qemu_iovec_memset(acb->qiov, done, 0, len);
-                goto done;
-            }
-            break;
-        case AIOCB_WRITE_UDATA:
-            if (!inode->data_vdi_id[idx]) {
-                create = true;
-            } else if (!is_data_obj_writable(inode, idx)) {
-                /* Copy-On-Write */
-                create = true;
-                old_oid = oid;
-                flags = SD_FLAG_CMD_COW;
-            }
-            break;
-        case AIOCB_DISCARD_OBJ:
-            /*
-             * We discard the object only when the whole object is
-             * 1) allocated 2) trimmed. Otherwise, simply skip it.
-             */
-            if (len != object_size || inode->data_vdi_id[idx] == 0) {
-                goto done;
-            }
-            break;
-        default:
-            break;
-        }
-
-        if (create) {
-            trace_sheepdog_co_rw_vector_update(inode->vdi_id, oid,
-                                  vid_to_data_oid(inode->data_vdi_id[idx], idx),
-                                  idx);
-            oid = vid_to_data_oid(inode->vdi_id, idx);
-            trace_sheepdog_co_rw_vector_new(oid);
-        }
-
-        aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
-                                old_oid,
-                                acb->aiocb_type == AIOCB_DISCARD_OBJ ?
-                                0 : done);
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
-                        acb->aiocb_type);
-    done:
-        offset = 0;
-        idx++;
-        done += len;
-    }
-    if (--acb->nr_pending) {
-        qemu_coroutine_yield();
-    }
-}
-
-static void sd_aio_complete(SheepdogAIOCB *acb)
-{
-    BDRVSheepdogState *s;
-    if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
-        return;
-    }
-
-    s = acb->s;
-    qemu_co_mutex_lock(&s->queue_lock);
-    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&s->overlapping_queue);
-    qemu_co_mutex_unlock(&s->queue_lock);
-}
-
-static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
-                                     int nb_sectors, QEMUIOVector *qiov,
-                                     int flags)
-{
-    SheepdogAIOCB acb;
-    int ret;
-    int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
-    BDRVSheepdogState *s = bs->opaque;
-
-    assert(!flags);
-    if (offset > s->inode.vdi_size) {
-        ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA);
-    sd_co_rw_vector(&acb);
-    sd_write_done(&acb);
-    sd_aio_complete(&acb);
-
-    return acb.ret;
-}
-
-static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
-                       int nb_sectors, QEMUIOVector *qiov)
-{
-    SheepdogAIOCB acb;
-    BDRVSheepdogState *s = bs->opaque;
-
-    sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA);
-    sd_co_rw_vector(&acb);
-    sd_aio_complete(&acb);
-
-    return acb.ret;
-}
-
-static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogAIOCB acb;
-    AIOReq *aio_req;
-
-    if (s->cache_flags != SD_FLAG_CMD_CACHE) {
-        return 0;
-    }
-
-    sd_aio_setup(&acb, s, NULL, 0, 0, AIOCB_FLUSH_CACHE);
-
-    acb.nr_pending++;
-    aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id),
-                            0, 0, 0, false, 0, 0);
-    add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type);
-
-    if (--acb.nr_pending) {
-        qemu_coroutine_yield();
-    }
-
-    sd_aio_complete(&acb);
-    return acb.ret;
-}
-
-static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
-{
-    Error *local_err = NULL;
-    BDRVSheepdogState *s = bs->opaque;
-    int ret, fd;
-    uint32_t new_vid;
-    SheepdogInode *inode;
-    unsigned int datalen;
-
-    trace_sheepdog_snapshot_create_info(sn_info->name, sn_info->id_str, s->name,
-                                        sn_info->vm_state_size, s->is_snapshot);
-
-    if (s->is_snapshot) {
-        error_report("You can't create a snapshot of a snapshot VDI, "
-                     "%s (%" PRIu32 ").", s->name, s->inode.vdi_id);
-
-        return -EINVAL;
-    }
-
-    trace_sheepdog_snapshot_create(sn_info->name, sn_info->id_str);
-
-    s->inode.vm_state_size = sn_info->vm_state_size;
-    s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
-    /* It appears that inode.tag does not require a NUL terminator,
-     * which means this use of strncpy is ok.
-     */
-    strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
-    /* we don't need to update entire object */
-    datalen = SD_INODE_HEADER_SIZE;
-    inode = g_malloc(datalen);
-
-    /* refresh inode. */
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto cleanup;
-    }
-
-    ret = write_object(fd, s->bs, (char *)&s->inode,
-                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
-                       datalen, 0, false, s->cache_flags);
-    if (ret < 0) {
-        error_report("failed to write snapshot's inode.");
-        goto cleanup;
-    }
-
-    ret = do_sd_create(s, &new_vid, 1, &local_err);
-    if (ret < 0) {
-        error_reportf_err(local_err,
-                          "failed to create inode for snapshot: ");
-        goto cleanup;
-    }
-
-    ret = read_object(fd, s->bs, (char *)inode,
-                      vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0,
-                      s->cache_flags);
-
-    if (ret < 0) {
-        error_report("failed to read new inode info. %s", strerror(errno));
-        goto cleanup;
-    }
-
-    memcpy(&s->inode, inode, datalen);
-    trace_sheepdog_snapshot_create_inode(s->inode.name, s->inode.snap_id,
-                                         s->inode.vdi_id);
-
-cleanup:
-    g_free(inode);
-    closesocket(fd);
-    return ret;
-}
-
-/*
- * We implement rollback(loadvm) operation to the specified snapshot by
- * 1) switch to the snapshot
- * 2) rely on sd_create_branch to delete working VDI and
- * 3) create a new working VDI based on the specified snapshot
- */
-static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    BDRVSheepdogState *old_s;
-    char tag[SD_MAX_VDI_TAG_LEN];
-    uint32_t snapid = 0;
-    int ret;
-
-    if (!sd_parse_snapid_or_tag(snapshot_id, &snapid, tag)) {
-        return -EINVAL;
-    }
-
-    old_s = g_new(BDRVSheepdogState, 1);
-
-    memcpy(old_s, s, sizeof(BDRVSheepdogState));
-
-    ret = reload_inode(s, snapid, tag);
-    if (ret) {
-        goto out;
-    }
-
-    ret = sd_create_branch(s);
-    if (ret) {
-        goto out;
-    }
-
-    g_free(old_s);
-
-    return 0;
-out:
-    /* recover bdrv_sd_state */
-    memcpy(s, old_s, sizeof(BDRVSheepdogState));
-    g_free(old_s);
-
-    error_report("failed to open. recover old bdrv_sd_state.");
-
-    return ret;
-}
-
-#define NR_BATCHED_DISCARD 128
-
-static int remove_objects(BDRVSheepdogState *s, Error **errp)
-{
-    int fd, i = 0, nr_objs = 0;
-    int ret;
-    SheepdogInode *inode = &s->inode;
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    nr_objs = count_data_objs(inode);
-    while (i < nr_objs) {
-        int start_idx, nr_filled_idx;
-
-        while (i < nr_objs && !inode->data_vdi_id[i]) {
-            i++;
-        }
-        start_idx = i;
-
-        nr_filled_idx = 0;
-        while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) {
-            if (inode->data_vdi_id[i]) {
-                inode->data_vdi_id[i] = 0;
-                nr_filled_idx++;
-            }
-
-            i++;
-        }
-
-        ret = write_object(fd, s->bs,
-                           (char *)&inode->data_vdi_id[start_idx],
-                           vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
-                           (i - start_idx) * sizeof(uint32_t),
-                           offsetof(struct SheepdogInode,
-                                    data_vdi_id[start_idx]),
-                           false, s->cache_flags);
-        if (ret < 0) {
-            error_setg(errp, "Failed to discard snapshot inode");
-            goto out;
-        }
-    }
-
-    ret = 0;
-out:
-    closesocket(fd);
-    return ret;
-}
-
-static int sd_snapshot_delete(BlockDriverState *bs,
-                              const char *snapshot_id,
-                              const char *name,
-                              Error **errp)
-{
-    /*
-     * FIXME should delete the snapshot matching both @snapshot_id and
-     * @name, but @name not used here
-     */
-    unsigned long snap_id = 0;
-    char snap_tag[SD_MAX_VDI_TAG_LEN];
-    int fd, ret;
-    char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
-    BDRVSheepdogState *s = bs->opaque;
-    unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0;
-    uint32_t vid;
-    SheepdogVdiReq hdr = {
-        .opcode = SD_OP_DEL_VDI,
-        .data_length = wlen,
-        .flags = SD_FLAG_CMD_WRITE,
-    };
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-
-    ret = remove_objects(s, errp);
-    if (ret) {
-        return ret;
-    }
-
-    memset(buf, 0, sizeof(buf));
-    memset(snap_tag, 0, sizeof(snap_tag));
-    pstrcpy(buf, SD_MAX_VDI_LEN, s->name);
-    /* TODO Use sd_parse_snapid() once this mess is cleaned up */
-    ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id);
-    if (ret || snap_id > UINT32_MAX) {
-        /*
-         * FIXME Since qemu_strtoul() returns -EINVAL when
-         * @snapshot_id is null, @snapshot_id is mandatory.  Correct
-         * would be to require at least one of @snapshot_id and @name.
-         */
-        error_setg(errp, "Invalid snapshot ID: %s",
-                         snapshot_id ? snapshot_id : "<null>");
-        return -EINVAL;
-    }
-
-    if (snap_id) {
-        hdr.snapid = (uint32_t) snap_id;
-    } else {
-        /* FIXME I suspect we should use @name here */
-        /* FIXME don't truncate silently */
-        pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id);
-        pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
-    }
-
-    ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true, errp);
-    if (ret) {
-        return ret;
-    }
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
-                 buf, &wlen, &rlen);
-    closesocket(fd);
-    if (ret) {
-        error_setg_errno(errp, -ret, "Couldn't send request to server");
-        return ret;
-    }
-
-    switch (rsp->result) {
-    case SD_RES_NO_VDI:
-        error_setg(errp, "Can't find the snapshot");
-        return -ENOENT;
-    case SD_RES_SUCCESS:
-        break;
-    default:
-        error_setg(errp, "%s", sd_strerror(rsp->result));
-        return -EIO;
-    }
-
-    return 0;
-}
-
-static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
-{
-    Error *local_err = NULL;
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogReq req;
-    int fd, nr = 1024, ret, max = BITS_TO_LONGS(SD_NR_VDIS) * sizeof(long);
-    QEMUSnapshotInfo *sn_tab = NULL;
-    unsigned wlen, rlen;
-    int found = 0;
-    SheepdogInode *inode;
-    unsigned long *vdi_inuse;
-    unsigned int start_nr;
-    uint64_t hval;
-    uint32_t vid;
-
-    vdi_inuse = g_malloc(max);
-    inode = g_malloc(SD_INODE_HEADER_SIZE);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto out;
-    }
-
-    rlen = max;
-    wlen = 0;
-
-    memset(&req, 0, sizeof(req));
-
-    req.opcode = SD_OP_READ_VDIS;
-    req.data_length = max;
-
-    ret = do_req(fd, s->bs, &req, vdi_inuse, &wlen, &rlen);
-
-    closesocket(fd);
-    if (ret) {
-        goto out;
-    }
-
-    sn_tab = g_new0(QEMUSnapshotInfo, nr);
-
-    /* calculate a vdi id with hash function */
-    hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
-    start_nr = hval & (SD_NR_VDIS - 1);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto out;
-    }
-
-    for (vid = start_nr; found < nr; vid = (vid + 1) % SD_NR_VDIS) {
-        if (!test_bit(vid, vdi_inuse)) {
-            break;
-        }
-
-        /* we don't need to read entire object */
-        ret = read_object(fd, s->bs, (char *)inode,
-                          vid_to_vdi_oid(vid),
-                          0, SD_INODE_HEADER_SIZE, 0,
-                          s->cache_flags);
-
-        if (ret) {
-            continue;
-        }
-
-        if (!strcmp(inode->name, s->name) && is_snapshot(inode)) {
-            sn_tab[found].date_sec = inode->snap_ctime >> 32;
-            sn_tab[found].date_nsec = inode->snap_ctime & 0xffffffff;
-            sn_tab[found].vm_state_size = inode->vm_state_size;
-            sn_tab[found].vm_clock_nsec = inode->vm_clock_nsec;
-
-            snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str),
-                     "%" PRIu32, inode->snap_id);
-            pstrcpy(sn_tab[found].name,
-                    MIN(sizeof(sn_tab[found].name), sizeof(inode->tag)),
-                    inode->tag);
-            found++;
-        }
-    }
-
-    closesocket(fd);
-out:
-    *psn_tab = sn_tab;
-
-    g_free(vdi_inuse);
-    g_free(inode);
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    return found;
-}
-
-static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
-                                int64_t pos, int size, int load)
-{
-    Error *local_err = NULL;
-    bool create;
-    int fd, ret = 0, remaining = size;
-    unsigned int data_len;
-    uint64_t vmstate_oid;
-    uint64_t offset;
-    uint32_t vdi_index;
-    uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id;
-    uint32_t object_size = (UINT32_C(1) << s->inode.block_size_shift);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return fd;
-    }
-
-    while (remaining) {
-        vdi_index = pos / object_size;
-        offset = pos % object_size;
-
-        data_len = MIN(remaining, object_size - offset);
-
-        vmstate_oid = vid_to_vmstate_oid(vdi_id, vdi_index);
-
-        create = (offset == 0);
-        if (load) {
-            ret = read_object(fd, s->bs, (char *)data, vmstate_oid,
-                              s->inode.nr_copies, data_len, offset,
-                              s->cache_flags);
-        } else {
-            ret = write_object(fd, s->bs, (char *)data, vmstate_oid,
-                               s->inode.nr_copies, data_len, offset, create,
-                               s->cache_flags);
-        }
-
-        if (ret < 0) {
-            error_report("failed to save vmstate %s", strerror(errno));
-            goto cleanup;
-        }
-
-        pos += data_len;
-        data += data_len;
-        remaining -= data_len;
-    }
-    ret = size;
-cleanup:
-    closesocket(fd);
-    return ret;
-}
-
-static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
-                           int64_t pos)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    void *buf;
-    int ret;
-
-    buf = qemu_blockalign(bs, qiov->size);
-    qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
-    ret = do_load_save_vmstate(s, (uint8_t *) buf, pos, qiov->size, 0);
-    qemu_vfree(buf);
-
-    return ret;
-}
-
-static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
-                           int64_t pos)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    void *buf;
-    int ret;
-
-    buf = qemu_blockalign(bs, qiov->size);
-    ret = do_load_save_vmstate(s, buf, pos, qiov->size, 1);
-    qemu_iovec_from_buf(qiov, 0, buf, qiov->size);
-    qemu_vfree(buf);
-
-    return ret;
-}
-
-
-static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
-                                      int bytes)
-{
-    SheepdogAIOCB acb;
-    BDRVSheepdogState *s = bs->opaque;
-    QEMUIOVector discard_iov;
-    struct iovec iov;
-    uint32_t zero = 0;
-
-    if (!s->discard_supported) {
-        return 0;
-    }
-
-    memset(&discard_iov, 0, sizeof(discard_iov));
-    memset(&iov, 0, sizeof(iov));
-    iov.iov_base = &zero;
-    iov.iov_len = sizeof(zero);
-    discard_iov.iov = &iov;
-    discard_iov.niov = 1;
-    if (!QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)) {
-        return -ENOTSUP;
-    }
-    sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS,
-                 bytes >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
-    sd_co_rw_vector(&acb);
-    sd_aio_complete(&acb);
-
-    return acb.ret;
-}
-
-static coroutine_fn int
-sd_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
-                   int64_t bytes, int64_t *pnum, int64_t *map,
-                   BlockDriverState **file)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogInode *inode = &s->inode;
-    uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
-    unsigned long start = offset / object_size,
-                  end = DIV_ROUND_UP(offset + bytes, object_size);
-    unsigned long idx;
-    *map = offset;
-    int ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
-
-    for (idx = start; idx < end; idx++) {
-        if (inode->data_vdi_id[idx] == 0) {
-            break;
-        }
-    }
-    if (idx == start) {
-        /* Get the longest length of unallocated sectors */
-        ret = 0;
-        for (idx = start + 1; idx < end; idx++) {
-            if (inode->data_vdi_id[idx] != 0) {
-                break;
-            }
-        }
-    }
-
-    *pnum = (idx - start) * object_size;
-    if (*pnum > bytes) {
-        *pnum = bytes;
-    }
-    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
-        *file = bs;
-    }
-    return ret;
-}
-
-static int64_t sd_get_allocated_file_size(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogInode *inode = &s->inode;
-    uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
-    unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, object_size);
-    uint64_t size = 0;
-
-    for (i = 0; i < last; i++) {
-        if (inode->data_vdi_id[i] == 0) {
-            continue;
-        }
-        size += object_size;
-    }
-    return size;
-}
-
-static QemuOptsList sd_create_opts = {
-    .name = "sheepdog-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(sd_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FILE,
-            .type = QEMU_OPT_STRING,
-            .help = "File name of a base image"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FMT,
-            .type = QEMU_OPT_STRING,
-            .help = "Must be 'sheepdog' if present",
-        },
-        {
-            .name = BLOCK_OPT_PREALLOC,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode (allowed values: off, full)"
-        },
-        {
-            .name = BLOCK_OPT_REDUNDANCY,
-            .type = QEMU_OPT_STRING,
-            .help = "Redundancy of the image"
-        },
-        {
-            .name = BLOCK_OPT_OBJECT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Object size of the image"
-        },
-        { /* end of list */ }
-    }
-};
-
-static const char *const sd_strong_runtime_opts[] = {
-    "vdi",
-    "snap-id",
-    "tag",
-    "server.",
-
-    NULL
-};
-
-static BlockDriver bdrv_sheepdog = {
-    .format_name                  = "sheepdog",
-    .protocol_name                = "sheepdog",
-    .instance_size                = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename          = sd_parse_filename,
-    .bdrv_file_open               = sd_open,
-    .bdrv_reopen_prepare          = sd_reopen_prepare,
-    .bdrv_reopen_commit           = sd_reopen_commit,
-    .bdrv_reopen_abort            = sd_reopen_abort,
-    .bdrv_close                   = sd_close,
-    .bdrv_co_create               = sd_co_create,
-    .bdrv_co_create_opts          = sd_co_create_opts,
-    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
-    .bdrv_getlength               = sd_getlength,
-    .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_co_truncate             = sd_co_truncate,
-
-    .bdrv_co_readv                = sd_co_readv,
-    .bdrv_co_writev               = sd_co_writev,
-    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard             = sd_co_pdiscard,
-    .bdrv_co_block_status         = sd_co_block_status,
-
-    .bdrv_snapshot_create         = sd_snapshot_create,
-    .bdrv_snapshot_goto           = sd_snapshot_goto,
-    .bdrv_snapshot_delete         = sd_snapshot_delete,
-    .bdrv_snapshot_list           = sd_snapshot_list,
-
-    .bdrv_save_vmstate            = sd_save_vmstate,
-    .bdrv_load_vmstate            = sd_load_vmstate,
-
-    .bdrv_detach_aio_context      = sd_detach_aio_context,
-    .bdrv_attach_aio_context      = sd_attach_aio_context,
-
-    .create_opts                  = &sd_create_opts,
-    .strong_runtime_opts          = sd_strong_runtime_opts,
-};
-
-static BlockDriver bdrv_sheepdog_tcp = {
-    .format_name                  = "sheepdog",
-    .protocol_name                = "sheepdog+tcp",
-    .instance_size                = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename          = sd_parse_filename,
-    .bdrv_file_open               = sd_open,
-    .bdrv_reopen_prepare          = sd_reopen_prepare,
-    .bdrv_reopen_commit           = sd_reopen_commit,
-    .bdrv_reopen_abort            = sd_reopen_abort,
-    .bdrv_close                   = sd_close,
-    .bdrv_co_create               = sd_co_create,
-    .bdrv_co_create_opts          = sd_co_create_opts,
-    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
-    .bdrv_getlength               = sd_getlength,
-    .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_co_truncate             = sd_co_truncate,
-
-    .bdrv_co_readv                = sd_co_readv,
-    .bdrv_co_writev               = sd_co_writev,
-    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard             = sd_co_pdiscard,
-    .bdrv_co_block_status         = sd_co_block_status,
-
-    .bdrv_snapshot_create         = sd_snapshot_create,
-    .bdrv_snapshot_goto           = sd_snapshot_goto,
-    .bdrv_snapshot_delete         = sd_snapshot_delete,
-    .bdrv_snapshot_list           = sd_snapshot_list,
-
-    .bdrv_save_vmstate            = sd_save_vmstate,
-    .bdrv_load_vmstate            = sd_load_vmstate,
-
-    .bdrv_detach_aio_context      = sd_detach_aio_context,
-    .bdrv_attach_aio_context      = sd_attach_aio_context,
-
-    .create_opts                  = &sd_create_opts,
-    .strong_runtime_opts          = sd_strong_runtime_opts,
-};
-
-static BlockDriver bdrv_sheepdog_unix = {
-    .format_name                  = "sheepdog",
-    .protocol_name                = "sheepdog+unix",
-    .instance_size                = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename          = sd_parse_filename,
-    .bdrv_file_open               = sd_open,
-    .bdrv_reopen_prepare          = sd_reopen_prepare,
-    .bdrv_reopen_commit           = sd_reopen_commit,
-    .bdrv_reopen_abort            = sd_reopen_abort,
-    .bdrv_close                   = sd_close,
-    .bdrv_co_create               = sd_co_create,
-    .bdrv_co_create_opts          = sd_co_create_opts,
-    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
-    .bdrv_getlength               = sd_getlength,
-    .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_co_truncate             = sd_co_truncate,
-
-    .bdrv_co_readv                = sd_co_readv,
-    .bdrv_co_writev               = sd_co_writev,
-    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard             = sd_co_pdiscard,
-    .bdrv_co_block_status         = sd_co_block_status,
-
-    .bdrv_snapshot_create         = sd_snapshot_create,
-    .bdrv_snapshot_goto           = sd_snapshot_goto,
-    .bdrv_snapshot_delete         = sd_snapshot_delete,
-    .bdrv_snapshot_list           = sd_snapshot_list,
-
-    .bdrv_save_vmstate            = sd_save_vmstate,
-    .bdrv_load_vmstate            = sd_load_vmstate,
-
-    .bdrv_detach_aio_context      = sd_detach_aio_context,
-    .bdrv_attach_aio_context      = sd_attach_aio_context,
-
-    .create_opts                  = &sd_create_opts,
-    .strong_runtime_opts          = sd_strong_runtime_opts,
-};
-
-static void bdrv_sheepdog_init(void)
-{
-    bdrv_register(&bdrv_sheepdog);
-    bdrv_register(&bdrv_sheepdog_tcp);
-    bdrv_register(&bdrv_sheepdog_unix);
-}
-block_init(bdrv_sheepdog_init);
diff --git a/block/snapshot-access.c b/block/snapshot-access.c
new file mode 100644 (file)
index 0000000..84d0d13
--- /dev/null
@@ -0,0 +1,135 @@
+/*
+ * snapshot_access block driver
+ *
+ * Copyright (c) 2022 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+
+#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
+#include "block/block_int.h"
+
+static int coroutine_fn GRAPH_RDLOCK
+snapshot_access_co_preadv_part(BlockDriverState *bs,
+                               int64_t offset, int64_t bytes,
+                               QEMUIOVector *qiov, size_t qiov_offset,
+                               BdrvRequestFlags flags)
+{
+    if (flags) {
+        return -ENOTSUP;
+    }
+
+    return bdrv_co_preadv_snapshot(bs->file, offset, bytes, qiov, qiov_offset);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+snapshot_access_co_block_status(BlockDriverState *bs,
+                                bool want_zero, int64_t offset,
+                                int64_t bytes, int64_t *pnum,
+                                int64_t *map, BlockDriverState **file)
+{
+    return bdrv_co_snapshot_block_status(bs->file->bs, want_zero, offset,
+                                         bytes, pnum, map, file);
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+snapshot_access_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    return bdrv_co_pdiscard_snapshot(bs->file->bs, offset, bytes);
+}
+
+static int coroutine_fn
+snapshot_access_co_pwrite_zeroes(BlockDriverState *bs,
+                                 int64_t offset, int64_t bytes,
+                                 BdrvRequestFlags flags)
+{
+    return -ENOTSUP;
+}
+
+static coroutine_fn int
+snapshot_access_co_pwritev_part(BlockDriverState *bs,
+                                int64_t offset, int64_t bytes,
+                                QEMUIOVector *qiov, size_t qiov_offset,
+                                BdrvRequestFlags flags)
+{
+    return -ENOTSUP;
+}
+
+
+static void GRAPH_RDLOCK snapshot_access_refresh_filename(BlockDriverState *bs)
+{
+    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
+            bs->file->bs->filename);
+}
+
+static int snapshot_access_open(BlockDriverState *bs, QDict *options, int flags,
+                                Error **errp)
+{
+    bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+                    BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY,
+                    false, errp);
+
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
+    bs->total_sectors = bs->file->bs->total_sectors;
+
+    return 0;
+}
+
+static void snapshot_access_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                BdrvChildRole role,
+                                BlockReopenQueue *reopen_queue,
+                                uint64_t perm, uint64_t shared,
+                                uint64_t *nperm, uint64_t *nshared)
+{
+    /*
+     * Currently, we don't need any permissions. If bs->file provides
+     * snapshot-access API, we can use it.
+     */
+    *nperm = 0;
+    *nshared = BLK_PERM_ALL;
+}
+
+static BlockDriver bdrv_snapshot_access_drv = {
+    .format_name = "snapshot-access",
+
+    .bdrv_open                  = snapshot_access_open,
+
+    .bdrv_co_preadv_part        = snapshot_access_co_preadv_part,
+    .bdrv_co_pwritev_part       = snapshot_access_co_pwritev_part,
+    .bdrv_co_pwrite_zeroes      = snapshot_access_co_pwrite_zeroes,
+    .bdrv_co_pdiscard           = snapshot_access_co_pdiscard,
+    .bdrv_co_block_status       = snapshot_access_co_block_status,
+
+    .bdrv_refresh_filename      = snapshot_access_refresh_filename,
+
+    .bdrv_child_perm            = snapshot_access_child_perm,
+};
+
+static void snapshot_access_init(void)
+{
+    bdrv_register(&bdrv_snapshot_access_drv);
+}
+
+block_init(snapshot_access_init);
index a2bf3a54eb08f09e2c0fa837b1282906bae410d2..4d0d5d65ecd64088007922a87773e38b6fe175ad 100644 (file)
@@ -57,6 +57,8 @@ int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
     QEMUSnapshotInfo *sn_tab, *sn;
     int nb_sns, i, ret;
 
+    GLOBAL_STATE_CODE();
+
     ret = -ENOENT;
     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
     if (nb_sns < 0) {
@@ -105,6 +107,7 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
     bool ret = false;
 
     assert(id || name);
+    GLOBAL_STATE_CODE();
 
     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
     if (nb_sns < 0) {
@@ -148,41 +151,33 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
 }
 
 /**
- * Return a pointer to the child BDS pointer to which we can fall
+ * Return a pointer to child of given BDS to which we can fall
  * back if the given BDS does not support snapshots.
  * Return NULL if there is no BDS to (safely) fall back to.
- *
- * We need to return an indirect pointer because bdrv_snapshot_goto()
- * has to modify the BdrvChild pointer.
  */
-static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs)
+static BdrvChild * GRAPH_RDLOCK
+bdrv_snapshot_fallback_child(BlockDriverState *bs)
 {
-    BdrvChild **fallback;
+    BdrvChild *fallback = bdrv_primary_child(bs);
     BdrvChild *child;
 
-    /*
-     * The only BdrvChild pointers that are safe to modify (and which
-     * we can thus return a reference to) are bs->file and
-     * bs->backing.
-     */
-    fallback = &bs->file;
-    if (!*fallback && bs->drv && bs->drv->is_filter) {
-        fallback = &bs->backing;
-    }
+    GLOBAL_STATE_CODE();
+    assert_bdrv_graph_readable();
 
-    if (!*fallback) {
+    /* We allow fallback only to primary child */
+    if (!fallback) {
         return NULL;
     }
 
     /*
      * Check that there are no other children that would need to be
      * snapshotted.  If there are, it is not safe to fall back to
-     * *fallback.
+     * fallback.
      */
     QLIST_FOREACH(child, &bs->children, next) {
         if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
                            BDRV_CHILD_FILTERED) &&
-            child != *fallback)
+            child != fallback)
         {
             return NULL;
         }
@@ -191,16 +186,20 @@ static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs)
     return fallback;
 }
 
-static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs)
+static BlockDriverState * GRAPH_RDLOCK
+bdrv_snapshot_fallback(BlockDriverState *bs)
 {
-    BdrvChild **child_ptr = bdrv_snapshot_fallback_ptr(bs);
-    return child_ptr ? (*child_ptr)->bs : NULL;
+    GLOBAL_STATE_CODE();
+    return child_bs(bdrv_snapshot_fallback_child(bs));
 }
 
 int bdrv_can_snapshot(BlockDriverState *bs)
 {
     BlockDriver *drv = bs->drv;
-    if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+
+    GLOBAL_STATE_CODE();
+
+    if (!drv || !bdrv_is_inserted(bs) || !bdrv_is_writable(bs)) {
         return 0;
     }
 
@@ -220,6 +219,9 @@ int bdrv_snapshot_create(BlockDriverState *bs,
 {
     BlockDriver *drv = bs->drv;
     BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
+
+    GLOBAL_STATE_CODE();
+
     if (!drv) {
         return -ENOMEDIUM;
     }
@@ -237,9 +239,11 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
                        Error **errp)
 {
     BlockDriver *drv = bs->drv;
-    BdrvChild **fallback_ptr;
+    BdrvChild *fallback;
     int ret, open_ret;
 
+    GLOBAL_STATE_CODE();
+
     if (!drv) {
         error_setg(errp, "Block driver is closed");
         return -ENOMEDIUM;
@@ -258,13 +262,16 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
         return ret;
     }
 
-    fallback_ptr = bdrv_snapshot_fallback_ptr(bs);
-    if (fallback_ptr) {
+    bdrv_graph_rdlock_main_loop();
+    fallback = bdrv_snapshot_fallback_child(bs);
+    bdrv_graph_rdunlock_main_loop();
+
+    if (fallback) {
         QDict *options;
         QDict *file_options;
         Error *local_err = NULL;
-        BlockDriverState *fallback_bs = (*fallback_ptr)->bs;
-        char *subqdict_prefix = g_strdup_printf("%s.", (*fallback_ptr)->name);
+        BlockDriverState *fallback_bs = fallback->bs;
+        char *subqdict_prefix = g_strdup_printf("%s.", fallback->name);
 
         options = qdict_clone_shallow(bs->options);
 
@@ -275,15 +282,19 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
         qobject_unref(file_options);
         g_free(subqdict_prefix);
 
-        qdict_put_str(options, (*fallback_ptr)->name,
+        /* Force .bdrv_open() below to re-attach fallback_bs on fallback */
+        qdict_put_str(options, fallback->name,
                       bdrv_get_node_name(fallback_bs));
 
+        /* Now close bs, apply the snapshot on fallback_bs, and re-open bs */
         if (drv->bdrv_close) {
             drv->bdrv_close(bs);
         }
 
-        bdrv_unref_child(bs, *fallback_ptr);
-        *fallback_ptr = NULL;
+        /* .bdrv_open() will re-attach it */
+        bdrv_graph_wrlock(NULL);
+        bdrv_unref_child(bs, fallback);
+        bdrv_graph_wrunlock(NULL);
 
         ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
         open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
@@ -296,7 +307,16 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
             return ret < 0 ? ret : open_ret;
         }
 
-        assert(fallback_bs == (*fallback_ptr)->bs);
+        /*
+         * fallback was a primary child. It was closed above and set to NULL,
+         * but the .bdrv_open() call has opened it again, because we set the
+         * respective option (with the qdict_put_str() call above).
+         * Assert that .bdrv_open() has attached the right BDS as primary child.
+         */
+        bdrv_graph_rdlock_main_loop();
+        assert(bdrv_primary_bs(bs) == fallback_bs);
+        bdrv_graph_rdunlock_main_loop();
+
         bdrv_unref(fallback_bs);
         return ret;
     }
@@ -336,6 +356,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
     BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
     int ret;
 
+    GLOBAL_STATE_CODE();
+
     if (!drv) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
         return -ENOMEDIUM;
@@ -366,8 +388,12 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
 int bdrv_snapshot_list(BlockDriverState *bs,
                        QEMUSnapshotInfo **psn_info)
 {
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD();
+
     BlockDriver *drv = bs->drv;
     BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
+
     if (!drv) {
         return -ENOMEDIUM;
     }
@@ -407,6 +433,9 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
 {
     BlockDriver *drv = bs->drv;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!drv) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
         return -ENOMEDIUM;
@@ -415,7 +444,7 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
         error_setg(errp, "snapshot_id and name are both NULL");
         return -EINVAL;
     }
-    if (!bs->read_only) {
+    if (!bdrv_is_read_only(bs)) {
         error_setg(errp, "Device is not readonly");
         return -EINVAL;
     }
@@ -435,6 +464,8 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
     int ret;
     Error *local_err = NULL;
 
+    GLOBAL_STATE_CODE();
+
     ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, &local_err);
     if (ret == -ENOENT || ret == -EINVAL) {
         error_free(local_err);
@@ -447,8 +478,46 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
     return ret;
 }
 
-static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs)
+
+static int GRAPH_RDLOCK
+bdrv_all_get_snapshot_devices(bool has_devices, strList *devices,
+                              GList **all_bdrvs, Error **errp)
+{
+    g_autoptr(GList) bdrvs = NULL;
+
+    if (has_devices) {
+        if (!devices) {
+            error_setg(errp, "At least one device is required for snapshot");
+            return -1;
+        }
+
+        while (devices) {
+            BlockDriverState *bs = bdrv_find_node(devices->value);
+            if (!bs) {
+                error_setg(errp, "No block device node '%s'", devices->value);
+                return -1;
+            }
+            bdrvs = g_list_append(bdrvs, bs);
+            devices = devices->next;
+        }
+    } else {
+        BlockDriverState *bs;
+        BdrvNextIterator it;
+        for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+            bdrvs = g_list_append(bdrvs, bs);
+        }
+    }
+
+    *all_bdrvs = g_steal_pointer(&bdrvs);
+    return 0;
+}
+
+
+static bool GRAPH_RDLOCK bdrv_all_snapshots_includes_bs(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
+    assert_bdrv_graph_readable();
+
     if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
         return false;
     }
@@ -462,44 +531,65 @@ static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs)
  * These functions will properly handle dataplane (take aio_context_acquire
  * when appropriate for appropriate block drivers) */
 
-bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
+bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
+                           Error **errp)
 {
-    bool ok = true;
-    BlockDriverState *bs;
-    BdrvNextIterator it;
+    g_autoptr(GList) bdrvs = NULL;
+    GList *iterbdrvs;
+
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+    if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
+        return false;
+    }
+
+    iterbdrvs = bdrvs;
+    while (iterbdrvs) {
+        BlockDriverState *bs = iterbdrvs->data;
         AioContext *ctx = bdrv_get_aio_context(bs);
+        bool ok = true;
 
         aio_context_acquire(ctx);
-        if (bdrv_all_snapshots_includes_bs(bs)) {
+        if (devices || bdrv_all_snapshots_includes_bs(bs)) {
             ok = bdrv_can_snapshot(bs);
         }
         aio_context_release(ctx);
         if (!ok) {
-            bdrv_next_cleanup(&it);
-            goto fail;
+            error_setg(errp, "Device '%s' is writable but does not support "
+                       "snapshots", bdrv_get_device_or_node_name(bs));
+            return false;
         }
+
+        iterbdrvs = iterbdrvs->next;
     }
 
-fail:
-    *first_bad_bs = bs;
-    return ok;
+    return true;
 }
 
-int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
+int bdrv_all_delete_snapshot(const char *name,
+                             bool has_devices, strList *devices,
                              Error **errp)
 {
-    int ret = 0;
-    BlockDriverState *bs;
-    BdrvNextIterator it;
-    QEMUSnapshotInfo sn1, *snapshot = &sn1;
+    g_autoptr(GList) bdrvs = NULL;
+    GList *iterbdrvs;
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
+        return -1;
+    }
+
+    iterbdrvs = bdrvs;
+    while (iterbdrvs) {
+        BlockDriverState *bs = iterbdrvs->data;
         AioContext *ctx = bdrv_get_aio_context(bs);
+        QEMUSnapshotInfo sn1, *snapshot = &sn1;
+        int ret = 0;
 
         aio_context_acquire(ctx);
-        if (bdrv_all_snapshots_includes_bs(bs) &&
+        if ((devices || bdrv_all_snapshots_includes_bs(bs)) &&
             bdrv_snapshot_find(bs, snapshot, name) >= 0)
         {
             ret = bdrv_snapshot_delete(bs, snapshot->id_str,
@@ -507,118 +597,201 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
         }
         aio_context_release(ctx);
         if (ret < 0) {
-            bdrv_next_cleanup(&it);
-            goto fail;
+            error_prepend(errp, "Could not delete snapshot '%s' on '%s': ",
+                          name, bdrv_get_device_or_node_name(bs));
+            return -1;
         }
+
+        iterbdrvs = iterbdrvs->next;
     }
 
-fail:
-    *first_bad_bs = bs;
-    return ret;
+    return 0;
 }
 
 
-int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs,
+int bdrv_all_goto_snapshot(const char *name,
+                           bool has_devices, strList *devices,
                            Error **errp)
 {
-    int ret = 0;
-    BlockDriverState *bs;
-    BdrvNextIterator it;
+    g_autoptr(GList) bdrvs = NULL;
+    GList *iterbdrvs;
+    int ret;
+
+    GLOBAL_STATE_CODE();
+
+    bdrv_graph_rdlock_main_loop();
+    ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp);
+    bdrv_graph_rdunlock_main_loop();
+
+    if (ret < 0) {
+        return -1;
+    }
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+    iterbdrvs = bdrvs;
+    while (iterbdrvs) {
+        BlockDriverState *bs = iterbdrvs->data;
         AioContext *ctx = bdrv_get_aio_context(bs);
+        bool all_snapshots_includes_bs;
 
         aio_context_acquire(ctx);
-        if (bdrv_all_snapshots_includes_bs(bs)) {
-            ret = bdrv_snapshot_goto(bs, name, errp);
-        }
+        bdrv_graph_rdlock_main_loop();
+        all_snapshots_includes_bs = bdrv_all_snapshots_includes_bs(bs);
+        bdrv_graph_rdunlock_main_loop();
+
+        ret = (devices || all_snapshots_includes_bs) ?
+              bdrv_snapshot_goto(bs, name, errp) : 0;
         aio_context_release(ctx);
         if (ret < 0) {
-            bdrv_next_cleanup(&it);
-            goto fail;
+            bdrv_graph_rdlock_main_loop();
+            error_prepend(errp, "Could not load snapshot '%s' on '%s': ",
+                          name, bdrv_get_device_or_node_name(bs));
+            bdrv_graph_rdunlock_main_loop();
+            return -1;
         }
+
+        iterbdrvs = iterbdrvs->next;
     }
 
-fail:
-    *first_bad_bs = bs;
-    return ret;
+    return 0;
 }
 
-int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
+int bdrv_all_has_snapshot(const char *name,
+                          bool has_devices, strList *devices,
+                          Error **errp)
 {
-    QEMUSnapshotInfo sn;
-    int err = 0;
-    BlockDriverState *bs;
-    BdrvNextIterator it;
+    g_autoptr(GList) bdrvs = NULL;
+    GList *iterbdrvs;
+
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+    if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
+        return -1;
+    }
+
+    iterbdrvs = bdrvs;
+    while (iterbdrvs) {
+        BlockDriverState *bs = iterbdrvs->data;
         AioContext *ctx = bdrv_get_aio_context(bs);
+        QEMUSnapshotInfo sn;
+        int ret = 0;
 
         aio_context_acquire(ctx);
-        if (bdrv_all_snapshots_includes_bs(bs)) {
-            err = bdrv_snapshot_find(bs, &sn, name);
+        if (devices || bdrv_all_snapshots_includes_bs(bs)) {
+            ret = bdrv_snapshot_find(bs, &sn, name);
         }
         aio_context_release(ctx);
-        if (err < 0) {
-            bdrv_next_cleanup(&it);
-            goto fail;
+        if (ret < 0) {
+            if (ret == -ENOENT) {
+                return 0;
+            } else {
+                error_setg_errno(errp, errno,
+                                 "Could not check snapshot '%s' on '%s'",
+                                 name, bdrv_get_device_or_node_name(bs));
+                return -1;
+            }
         }
+
+        iterbdrvs = iterbdrvs->next;
     }
 
-fail:
-    *first_bad_bs = bs;
-    return err;
+    return 1;
 }
 
 int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
                              BlockDriverState *vm_state_bs,
                              uint64_t vm_state_size,
-                             BlockDriverState **first_bad_bs)
+                             bool has_devices, strList *devices,
+                             Error **errp)
 {
-    int err = 0;
-    BlockDriverState *bs;
-    BdrvNextIterator it;
+    g_autoptr(GList) bdrvs = NULL;
+    GList *iterbdrvs;
+
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
+        return -1;
+    }
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+    iterbdrvs = bdrvs;
+    while (iterbdrvs) {
+        BlockDriverState *bs = iterbdrvs->data;
         AioContext *ctx = bdrv_get_aio_context(bs);
+        int ret = 0;
 
         aio_context_acquire(ctx);
         if (bs == vm_state_bs) {
             sn->vm_state_size = vm_state_size;
-            err = bdrv_snapshot_create(bs, sn);
-        } else if (bdrv_all_snapshots_includes_bs(bs)) {
+            ret = bdrv_snapshot_create(bs, sn);
+        } else if (devices || bdrv_all_snapshots_includes_bs(bs)) {
             sn->vm_state_size = 0;
-            err = bdrv_snapshot_create(bs, sn);
+            ret = bdrv_snapshot_create(bs, sn);
         }
         aio_context_release(ctx);
-        if (err < 0) {
-            bdrv_next_cleanup(&it);
-            goto fail;
+        if (ret < 0) {
+            error_setg(errp, "Could not create snapshot '%s' on '%s'",
+                       sn->name, bdrv_get_device_or_node_name(bs));
+            return -1;
         }
+
+        iterbdrvs = iterbdrvs->next;
     }
 
-fail:
-    *first_bad_bs = bs;
-    return err;
+    return 0;
 }
 
-BlockDriverState *bdrv_all_find_vmstate_bs(void)
+
+BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs,
+                                           bool has_devices, strList *devices,
+                                           Error **errp)
 {
-    BlockDriverState *bs;
-    BdrvNextIterator it;
+    g_autoptr(GList) bdrvs = NULL;
+    GList *iterbdrvs;
+
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+    if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
+        return NULL;
+    }
+
+    iterbdrvs = bdrvs;
+    while (iterbdrvs) {
+        BlockDriverState *bs = iterbdrvs->data;
         AioContext *ctx = bdrv_get_aio_context(bs);
-        bool found;
+        bool found = false;
 
         aio_context_acquire(ctx);
-        found = bdrv_all_snapshots_includes_bs(bs) && bdrv_can_snapshot(bs);
+        found = (devices || bdrv_all_snapshots_includes_bs(bs)) &&
+            bdrv_can_snapshot(bs);
         aio_context_release(ctx);
 
-        if (found) {
-            bdrv_next_cleanup(&it);
-            break;
+        if (vmstate_bs) {
+            if (g_str_equal(vmstate_bs,
+                            bdrv_get_node_name(bs))) {
+                if (found) {
+                    return bs;
+                } else {
+                    error_setg(errp,
+                               "vmstate block device '%s' does not support snapshots",
+                               vmstate_bs);
+                    return NULL;
+                }
+            }
+        } else if (found) {
+            return bs;
         }
+
+        iterbdrvs = iterbdrvs->next;
+    }
+
+    if (vmstate_bs) {
+        error_setg(errp,
+                   "vmstate block device '%s' does not exist", vmstate_bs);
+    } else {
+        error_setg(errp,
+                   "no block device can store vmstate for snapshot");
     }
-    return bs;
+    return NULL;
 }
index ebe3d8b631ffc91eeaf66e2627301597db83d23e..2748253d4a3644261d866761fa1d436b7416b6a8 100644 (file)
@@ -27,6 +27,7 @@
 #include <libssh/libssh.h>
 #include <libssh/sftp.h>
 
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "block/qdict.h"
 #include "qapi/error.h"
@@ -108,7 +109,7 @@ static void ssh_state_free(BDRVSSHState *s)
     }
 }
 
-static void GCC_FMT_ATTR(3, 4)
+static void G_GNUC_PRINTF(3, 4)
 session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
 {
     va_list args;
@@ -133,7 +134,7 @@ session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
     g_free(msg);
 }
 
-static void GCC_FMT_ATTR(3, 4)
+static void G_GNUC_PRINTF(3, 4)
 sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
 {
     va_list args;
@@ -237,9 +238,7 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
     return 0;
 
  err:
-    if (uri) {
-      uri_free(uri);
-    }
+    uri_free(uri);
     return -EINVAL;
 }
 
@@ -277,7 +276,6 @@ static void ssh_parse_filename(const char *filename, QDict *options,
 static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
 {
     int ret;
-#ifdef HAVE_LIBSSH_0_8
     enum ssh_known_hosts_e state;
     int r;
     ssh_key pubkey;
@@ -343,46 +341,6 @@ static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
         error_setg(errp, "error while checking for known server (%d)", state);
         goto out;
     }
-#else /* !HAVE_LIBSSH_0_8 */
-    int state;
-
-    state = ssh_is_server_known(s->session);
-    trace_ssh_server_status(state);
-
-    switch (state) {
-    case SSH_SERVER_KNOWN_OK:
-        /* OK */
-        trace_ssh_check_host_key_knownhosts();
-        break;
-    case SSH_SERVER_KNOWN_CHANGED:
-        ret = -EINVAL;
-        error_setg(errp,
-                   "host key does not match the one in known_hosts; this "
-                   "may be a possible attack");
-        goto out;
-    case SSH_SERVER_FOUND_OTHER:
-        ret = -EINVAL;
-        error_setg(errp,
-                   "host key for this server not found, another type exists");
-        goto out;
-    case SSH_SERVER_FILE_NOT_FOUND:
-        ret = -ENOENT;
-        error_setg(errp, "known_hosts file not found");
-        goto out;
-    case SSH_SERVER_NOT_KNOWN:
-        ret = -EINVAL;
-        error_setg(errp, "no host key was found in known_hosts");
-        goto out;
-    case SSH_SERVER_ERROR:
-        ret = -EINVAL;
-        error_setg(errp, "server error");
-        goto out;
-    default:
-        ret = -EINVAL;
-        error_setg(errp, "error while checking for known server (%d)", state);
-        goto out;
-    }
-#endif /* !HAVE_LIBSSH_0_8 */
 
     /* known_hosts checking successful. */
     ret = 0;
@@ -429,25 +387,37 @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
     return *host_key_check - '\0';
 }
 
+static char *format_fingerprint(const unsigned char *fingerprint, size_t len)
+{
+    static const char *hex = "0123456789abcdef";
+    char *ret = g_new0(char, (len * 2) + 1);
+    for (size_t i = 0; i < len; i++) {
+        ret[i * 2] = hex[((fingerprint[i] >> 4) & 0xf)];
+        ret[(i * 2) + 1] = hex[(fingerprint[i] & 0xf)];
+    }
+    ret[len * 2] = '\0';
+    return ret;
+}
+
 static int
 check_host_key_hash(BDRVSSHState *s, const char *hash,
-                    enum ssh_publickey_hash_type type, Error **errp)
+                    enum ssh_publickey_hash_type type, const char *typestr,
+                    Error **errp)
 {
     int r;
     ssh_key pubkey;
     unsigned char *server_hash;
     size_t server_hash_len;
+    const char *keytype;
 
-#ifdef HAVE_LIBSSH_0_8
     r = ssh_get_server_publickey(s->session, &pubkey);
-#else
-    r = ssh_get_publickey(s->session, &pubkey);
-#endif
     if (r != SSH_OK) {
         session_error_setg(errp, s, "failed to read remote host key");
         return -EINVAL;
     }
 
+    keytype = ssh_key_type_to_char(ssh_key_type(pubkey));
+
     r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len);
     ssh_key_free(pubkey);
     if (r != 0) {
@@ -457,12 +427,16 @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
     }
 
     r = compare_fingerprint(server_hash, server_hash_len, hash);
-    ssh_clean_pubkey_hash(&server_hash);
     if (r != 0) {
-        error_setg(errp, "remote host key does not match host_key_check '%s'",
-                   hash);
+        g_autofree char *server_fp = format_fingerprint(server_hash,
+                                                        server_hash_len);
+        error_setg(errp, "remote host %s key fingerprint '%s:%s' "
+                   "does not match host_key_check '%s:%s'",
+                   keytype, typestr, server_fp, typestr, hash);
+        ssh_clean_pubkey_hash(&server_hash);
         return -EPERM;
     }
+    ssh_clean_pubkey_hash(&server_hash);
 
     return 0;
 }
@@ -483,10 +457,16 @@ static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp)
     case SSH_HOST_KEY_CHECK_MODE_HASH:
         if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) {
             return check_host_key_hash(s, hkc->u.hash.hash,
-                                       SSH_PUBLICKEY_HASH_MD5, errp);
+                                       SSH_PUBLICKEY_HASH_MD5, "md5",
+                                       errp);
         } else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) {
             return check_host_key_hash(s, hkc->u.hash.hash,
-                                       SSH_PUBLICKEY_HASH_SHA1, errp);
+                                       SSH_PUBLICKEY_HASH_SHA1, "sha1",
+                                       errp);
+        } else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA256) {
+            return check_host_key_hash(s, hkc->u.hash.hash,
+                                       SSH_PUBLICKEY_HASH_SHA256, "sha256",
+                                       errp);
         }
         g_assert_not_reached();
         break;
@@ -600,6 +580,11 @@ static bool ssh_process_legacy_options(QDict *output_opts,
             qdict_put_str(output_opts, "host-key-check.type", "sha1");
             qdict_put_str(output_opts, "host-key-check.hash",
                           &host_key_check[5]);
+        } else if (strncmp(host_key_check, "sha256:", 7) == 0) {
+            qdict_put_str(output_opts, "host-key-check.mode", "hash");
+            qdict_put_str(output_opts, "host-key-check.type", "sha256");
+            qdict_put_str(output_opts, "host-key-check.hash",
+                          &host_key_check[7]);
         } else if (strcmp(host_key_check, "yes") == 0) {
             qdict_put_str(output_opts, "host-key-check.mode", "known_hosts");
         } else {
@@ -659,7 +644,7 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
     unsigned int port = 0;
     int new_sock = -1;
 
-    if (opts->has_user) {
+    if (opts->user) {
         s->user = g_strdup(opts->user);
     } else {
         s->user = g_strdup(g_get_user_name());
@@ -1034,7 +1019,7 @@ static void restart_coroutine(void *opaque)
     AioContext *ctx = bdrv_get_aio_context(bs);
 
     trace_ssh_restart_coroutine(restart->co);
-    aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL);
+    aio_set_fd_handler(ctx, s->sock, NULL, NULL, NULL, NULL, NULL);
 
     aio_co_wake(restart->co);
 }
@@ -1064,7 +1049,7 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
     trace_ssh_co_yield(s->sock, rd_handler, wr_handler);
 
     aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, rd_handler, wr_handler, NULL, &restart);
+                       rd_handler, wr_handler, NULL, NULL, &restart);
     qemu_coroutine_yield();
     trace_ssh_co_yield_back(s->sock);
 }
@@ -1145,9 +1130,9 @@ static coroutine_fn int ssh_co_readv(BlockDriverState *bs,
     return ret;
 }
 
-static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
-                     int64_t offset, size_t size,
-                     QEMUIOVector *qiov)
+static coroutine_fn int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
+                                  int64_t offset, size_t size,
+                                  QEMUIOVector *qiov)
 {
     ssize_t r;
     size_t written;
@@ -1212,7 +1197,6 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
     BDRVSSHState *s = bs->opaque;
     int ret;
 
-    assert(!flags);
     qemu_co_mutex_lock(&s->lock);
     ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
                     nb_sectors * BDRV_SECTOR_SIZE, qiov);
@@ -1233,8 +1217,6 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
     }
 }
 
-#ifdef HAVE_LIBSSH_0_8
-
 static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
 {
     int r;
@@ -1271,19 +1253,7 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
     return ret;
 }
 
-#else /* !HAVE_LIBSSH_0_8 */
-
-static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
-{
-    BDRVSSHState *s = bs->opaque;
-
-    unsafe_flush_warning(s, "libssh >= 0.8.0");
-    return 0;
-}
-
-#endif /* !HAVE_LIBSSH_0_8 */
-
-static int64_t ssh_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn ssh_co_getlength(BlockDriverState *bs)
 {
     BDRVSSHState *s = bs->opaque;
     int64_t length;
@@ -1394,7 +1364,7 @@ static BlockDriver bdrv_ssh = {
     .bdrv_has_zero_init           = ssh_has_zero_init,
     .bdrv_co_readv                = ssh_co_readv,
     .bdrv_co_writev               = ssh_co_writev,
-    .bdrv_getlength               = ssh_getlength,
+    .bdrv_co_getlength            = ssh_co_getlength,
     .bdrv_co_truncate             = ssh_co_truncate,
     .bdrv_co_flush_to_disk        = ssh_co_flush,
     .bdrv_refresh_filename        = ssh_refresh_filename,
index 236384f2f739125b8e1f2a4632fa1c99699a78ca..01fe7c0f166123748ba1d7e6e64c438e732ee322 100644 (file)
 #include "block/block_int.h"
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
     /*
@@ -31,12 +32,14 @@ enum {
 
 typedef struct StreamBlockJob {
     BlockJob common;
+    BlockBackend *blk;
     BlockDriverState *base_overlay; /* COW overlay (stream from this) */
     BlockDriverState *above_base;   /* Node directly above the base */
+    BlockDriverState *cor_filter_bs;
+    BlockDriverState *target_bs;
     BlockdevOnError on_error;
     char *backing_file_str;
     bool bs_read_only;
-    bool chain_frozen;
 } StreamBlockJob;
 
 static int coroutine_fn stream_populate(BlockBackend *blk,
@@ -44,63 +47,100 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
 {
     assert(bytes < SIZE_MAX);
 
-    return blk_co_preadv(blk, offset, bytes, NULL,
-                         BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
-}
-
-static void stream_abort(Job *job)
-{
-    StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-
-    if (s->chain_frozen) {
-        BlockJob *bjob = &s->common;
-        bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
-    }
+    return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);
 }
 
 static int stream_prepare(Job *job)
 {
     StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-    BlockJob *bjob = &s->common;
-    BlockDriverState *bs = blk_bs(bjob->blk);
-    BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-    BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+    BlockDriverState *unfiltered_bs;
+    BlockDriverState *unfiltered_bs_cow;
+    BlockDriverState *base;
+    BlockDriverState *unfiltered_base;
     Error *local_err = NULL;
     int ret = 0;
 
-    bdrv_unfreeze_backing_chain(bs, s->above_base);
-    s->chain_frozen = false;
+    GLOBAL_STATE_CODE();
+
+    bdrv_graph_rdlock_main_loop();
+    unfiltered_bs = bdrv_skip_filters(s->target_bs);
+    unfiltered_bs_cow = bdrv_cow_bs(unfiltered_bs);
+    bdrv_graph_rdunlock_main_loop();
+
+    /* We should drop filter at this point, as filter hold the backing chain */
+    bdrv_cor_filter_drop(s->cor_filter_bs);
+    s->cor_filter_bs = NULL;
 
-    if (bdrv_cow_child(unfiltered_bs)) {
+    /*
+     * bdrv_set_backing_hd() requires that the unfiltered_bs and the COW child
+     * of unfiltered_bs is drained. Drain already here and use
+     * bdrv_set_backing_hd_drained() instead because the polling during
+     * drained_begin() might change the graph, and if we do this only later, we
+     * may end up working with the wrong base node (or it might even have gone
+     * away by the time we want to use it).
+     */
+    bdrv_drained_begin(unfiltered_bs);
+    if (unfiltered_bs_cow) {
+        bdrv_ref(unfiltered_bs_cow);
+        bdrv_drained_begin(unfiltered_bs_cow);
+    }
+
+    bdrv_graph_rdlock_main_loop();
+    base = bdrv_filter_or_cow_bs(s->above_base);
+    unfiltered_base = bdrv_skip_filters(base);
+    bdrv_graph_rdunlock_main_loop();
+
+    if (unfiltered_bs_cow) {
         const char *base_id = NULL, *base_fmt = NULL;
-        if (base) {
-            base_id = s->backing_file_str;
-            if (base->drv) {
-                base_fmt = base->drv->format_name;
+        if (unfiltered_base) {
+            base_id = s->backing_file_str ?: unfiltered_base->filename;
+            if (unfiltered_base->drv) {
+                base_fmt = unfiltered_base->drv->format_name;
             }
         }
-        bdrv_set_backing_hd(unfiltered_bs, base, &local_err);
+
+        bdrv_graph_wrlock(s->target_bs);
+        bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err);
+        bdrv_graph_wrunlock(s->target_bs);
+
+        /*
+         * This call will do I/O, so the graph can change again from here on.
+         * We have already completed the graph change, so we are not in danger
+         * of operating on the wrong node any more if this happens.
+         */
         ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false);
         if (local_err) {
             error_report_err(local_err);
-            return -EPERM;
+            ret = -EPERM;
+            goto out;
         }
     }
 
+out:
+    if (unfiltered_bs_cow) {
+        bdrv_drained_end(unfiltered_bs_cow);
+        bdrv_unref(unfiltered_bs_cow);
+    }
+    bdrv_drained_end(unfiltered_bs);
     return ret;
 }
 
 static void stream_clean(Job *job)
 {
     StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-    BlockJob *bjob = &s->common;
-    BlockDriverState *bs = blk_bs(bjob->blk);
+
+    if (s->cor_filter_bs) {
+        bdrv_cor_filter_drop(s->cor_filter_bs);
+        s->cor_filter_bs = NULL;
+    }
+
+    blk_unref(s->blk);
+    s->blk = NULL;
 
     /* Reopen the image back in read-only mode if necessary */
     if (s->bs_read_only) {
         /* Give up write permissions before making it read-only */
-        blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
-        bdrv_reopen_set_read_only(bs, true, NULL);
+        bdrv_reopen_set_read_only(s->target_bs, true, NULL);
     }
 
     g_free(s->backing_file_str);
@@ -109,36 +149,26 @@ static void stream_clean(Job *job)
 static int coroutine_fn stream_run(Job *job, Error **errp)
 {
     StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-    BlockBackend *blk = s->common.blk;
-    BlockDriverState *bs = blk_bs(blk);
-    BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-    bool enable_cor = !bdrv_cow_child(s->base_overlay);
+    BlockDriverState *unfiltered_bs;
     int64_t len;
     int64_t offset = 0;
-    uint64_t delay_ns = 0;
     int error = 0;
     int64_t n = 0; /* bytes */
 
-    if (unfiltered_bs == s->base_overlay) {
-        /* Nothing to stream */
-        return 0;
-    }
+    WITH_GRAPH_RDLOCK_GUARD() {
+        unfiltered_bs = bdrv_skip_filters(s->target_bs);
+        if (unfiltered_bs == s->base_overlay) {
+            /* Nothing to stream */
+            return 0;
+        }
 
-    len = bdrv_getlength(bs);
-    if (len < 0) {
-        return len;
+        len = bdrv_co_getlength(s->target_bs);
+        if (len < 0) {
+            return len;
+        }
     }
     job_progress_set_remaining(&s->common.job, len);
 
-    /* Turn on copy-on-read for the whole block device so that guest read
-     * requests help us make progress.  Only do this when copying the entire
-     * backing chain since the copy-on-read operation does not take base into
-     * account.
-     */
-    if (enable_cor) {
-        bdrv_enable_copy_on_read(bs);
-    }
-
     for ( ; offset < len; offset += n) {
         bool copy;
         int ret;
@@ -146,32 +176,36 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
         /* Note that even when no rate limit is applied we need to yield
          * with no pending I/O here so that bdrv_drain_all() returns.
          */
-        job_sleep_ns(&s->common.job, delay_ns);
+        block_job_ratelimit_sleep(&s->common);
         if (job_is_cancelled(&s->common.job)) {
             break;
         }
 
         copy = false;
 
-        ret = bdrv_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n);
-        if (ret == 1) {
-            /* Allocated in the top, no need to copy.  */
-        } else if (ret >= 0) {
-            /* Copy if allocated in the intermediate images.  Limit to the
-             * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).  */
-            ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
-                                          s->base_overlay, true,
-                                          offset, n, &n);
-            /* Finish early if end of backing file has been reached */
-            if (ret == 0 && n == 0) {
-                n = len - offset;
+        WITH_GRAPH_RDLOCK_GUARD() {
+            ret = bdrv_co_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n);
+            if (ret == 1) {
+                /* Allocated in the top, no need to copy.  */
+            } else if (ret >= 0) {
+                /*
+                 * Copy if allocated in the intermediate images.  Limit to the
+                 * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).
+                 */
+                ret = bdrv_co_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
+                                                 s->base_overlay, true,
+                                                 offset, n, &n);
+                /* Finish early if end of backing file has been reached */
+                if (ret == 0 && n == 0) {
+                    n = len - offset;
+                }
+
+                copy = (ret > 0);
             }
-
-            copy = (ret > 0);
         }
         trace_stream_one_iteration(s, offset, n, ret);
         if (copy) {
-            ret = stream_populate(blk, offset, n);
+            ret = stream_populate(s->blk, offset, n);
         }
         if (ret < 0) {
             BlockErrorAction action =
@@ -191,16 +225,10 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
         /* Publish progress */
         job_progress_update(&s->common.job, n);
         if (copy) {
-            delay_ns = block_job_ratelimit_get_delay(&s->common, n);
-        } else {
-            delay_ns = 0;
+            block_job_ratelimit_processed_bytes(&s->common, n);
         }
     }
 
-    if (enable_cor) {
-        bdrv_disable_copy_on_read(bs);
-    }
-
     /* Do not remove the backing file if an error was there but ignored. */
     return error;
 }
@@ -212,7 +240,6 @@ static const BlockJobDriver stream_job_driver = {
         .free          = block_job_free,
         .run           = stream_run,
         .prepare       = stream_prepare,
-        .abort         = stream_abort,
         .clean         = stream_clean,
         .user_resume   = block_job_user_resume,
     },
@@ -220,59 +247,132 @@ static const BlockJobDriver stream_job_driver = {
 
 void stream_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *base, const char *backing_file_str,
+                  BlockDriverState *bottom,
                   int creation_flags, int64_t speed,
-                  BlockdevOnError on_error, Error **errp)
+                  BlockdevOnError on_error,
+                  const char *filter_node_name,
+                  Error **errp)
 {
-    StreamBlockJob *s;
+    StreamBlockJob *s = NULL;
     BlockDriverState *iter;
     bool bs_read_only;
     int basic_flags = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
-    BlockDriverState *base_overlay = bdrv_find_overlay(bs, base);
+    BlockDriverState *base_overlay;
+    BlockDriverState *cor_filter_bs = NULL;
     BlockDriverState *above_base;
+    QDict *opts;
+    int ret;
 
-    if (!base_overlay) {
-        error_setg(errp, "'%s' is not in the backing chain of '%s'",
-                   base->node_name, bs->node_name);
-        return;
-    }
+    GLOBAL_STATE_CODE();
 
-    /*
-     * Find the node directly above @base.  @base_overlay is a COW overlay, so
-     * it must have a bdrv_cow_child(), but it is the immediate overlay of
-     * @base, so between the two there can only be filters.
-     */
-    above_base = base_overlay;
-    if (bdrv_cow_bs(above_base) != base) {
-        above_base = bdrv_cow_bs(above_base);
-        while (bdrv_filter_bs(above_base) != base) {
-            above_base = bdrv_filter_bs(above_base);
+    assert(!(base && bottom));
+    assert(!(backing_file_str && bottom));
+
+    bdrv_graph_rdlock_main_loop();
+
+    if (bottom) {
+        /*
+         * New simple interface. The code is written in terms of old interface
+         * with @base parameter (still, it doesn't freeze link to base, so in
+         * this mean old code is correct for new interface). So, for now, just
+         * emulate base_overlay and above_base. Still, when old interface
+         * finally removed, we should refactor code to use only "bottom", but
+         * not "*base*" things.
+         */
+        assert(!bottom->drv->is_filter);
+        base_overlay = above_base = bottom;
+    } else {
+        base_overlay = bdrv_find_overlay(bs, base);
+        if (!base_overlay) {
+            error_setg(errp, "'%s' is not in the backing chain of '%s'",
+                       base->node_name, bs->node_name);
+            goto out_rdlock;
         }
-    }
 
-    if (bdrv_freeze_backing_chain(bs, above_base, errp) < 0) {
-        return;
+        /*
+         * Find the node directly above @base.  @base_overlay is a COW overlay,
+         * so it must have a bdrv_cow_child(), but it is the immediate overlay
+         * of @base, so between the two there can only be filters.
+         */
+        above_base = base_overlay;
+        if (bdrv_cow_bs(above_base) != base) {
+            above_base = bdrv_cow_bs(above_base);
+            while (bdrv_filter_bs(above_base) != base) {
+                above_base = bdrv_filter_bs(above_base);
+            }
+        }
     }
 
     /* Make sure that the image is opened in read-write mode */
     bs_read_only = bdrv_is_read_only(bs);
     if (bs_read_only) {
-        if (bdrv_reopen_set_read_only(bs, false, errp) != 0) {
-            bs_read_only = false;
-            goto fail;
+        /* Hold the chain during reopen */
+        if (bdrv_freeze_backing_chain(bs, above_base, errp) < 0) {
+            goto out_rdlock;
+        }
+
+        ret = bdrv_reopen_set_read_only(bs, false, errp);
+
+        /* failure, or cor-filter will hold the chain */
+        bdrv_unfreeze_backing_chain(bs, above_base);
+
+        if (ret < 0) {
+            goto out_rdlock;
         }
     }
 
-    /* Prevent concurrent jobs trying to modify the graph structure here, we
-     * already have our own plans. Also don't allow resize as the image size is
-     * queried only at the job start and then cached. */
-    s = block_job_create(job_id, &stream_job_driver, NULL, bs,
-                         basic_flags | BLK_PERM_GRAPH_MOD,
-                         basic_flags | BLK_PERM_WRITE,
+    bdrv_graph_rdunlock_main_loop();
+
+    opts = qdict_new();
+
+    qdict_put_str(opts, "driver", "copy-on-read");
+    qdict_put_str(opts, "file", bdrv_get_node_name(bs));
+    /* Pass the base_overlay node name as 'bottom' to COR driver */
+    qdict_put_str(opts, "bottom", base_overlay->node_name);
+    if (filter_node_name) {
+        qdict_put_str(opts, "node-name", filter_node_name);
+    }
+
+    cor_filter_bs = bdrv_insert_node(bs, opts, BDRV_O_RDWR, errp);
+    if (!cor_filter_bs) {
+        goto fail;
+    }
+
+    if (!filter_node_name) {
+        cor_filter_bs->implicit = true;
+    }
+
+    s = block_job_create(job_id, &stream_job_driver, NULL, cor_filter_bs,
+                         0, BLK_PERM_ALL,
                          speed, creation_flags, NULL, NULL, errp);
     if (!s) {
         goto fail;
     }
 
+    s->blk = blk_new_with_bs(cor_filter_bs, BLK_PERM_CONSISTENT_READ,
+                             basic_flags | BLK_PERM_WRITE, errp);
+    if (!s->blk) {
+        goto fail;
+    }
+    /*
+     * Disable request queuing in the BlockBackend to avoid deadlocks on drain:
+     * The job reports that it's busy until it reaches a pause point.
+     */
+    blk_set_disable_request_queuing(s->blk, true);
+    blk_set_allow_aio_context_change(s->blk, true);
+
+    /*
+     * Prevent concurrent jobs trying to modify the graph structure here, we
+     * already have our own plans. Also don't allow resize as the image size is
+     * queried only at the job start and then cached.
+     */
+    bdrv_graph_wrlock(bs);
+    if (block_job_add_bdrv(&s->common, "active node", bs, 0,
+                           basic_flags | BLK_PERM_WRITE, errp)) {
+        bdrv_graph_wrunlock(bs);
+        goto fail;
+    }
+
     /* Block all intermediate nodes between bs and base, because they will
      * disappear from the chain after this operation. The streaming job reads
      * every block only once, assuming that it doesn't change, so forbid writes
@@ -286,15 +386,21 @@ void stream_start(const char *job_id, BlockDriverState *bs,
     for (iter = bdrv_filter_or_cow_bs(bs); iter != base;
          iter = bdrv_filter_or_cow_bs(iter))
     {
-        block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
-                           basic_flags, &error_abort);
+        ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+                                 basic_flags, errp);
+        if (ret < 0) {
+            bdrv_graph_wrunlock(bs);
+            goto fail;
+        }
     }
+    bdrv_graph_wrunlock(bs);
 
     s->base_overlay = base_overlay;
     s->above_base = above_base;
     s->backing_file_str = g_strdup(backing_file_str);
+    s->cor_filter_bs = cor_filter_bs;
+    s->target_bs = bs;
     s->bs_read_only = bs_read_only;
-    s->chain_frozen = true;
 
     s->on_error = on_error;
     trace_stream_start(bs, base, s);
@@ -302,8 +408,17 @@ void stream_start(const char *job_id, BlockDriverState *bs,
     return;
 
 fail:
+    if (s) {
+        job_early_fail(&s->common.job);
+    }
+    if (cor_filter_bs) {
+        bdrv_cor_filter_drop(cor_filter_bs);
+    }
     if (bs_read_only) {
         bdrv_reopen_set_read_only(bs, true, NULL);
     }
-    bdrv_unfreeze_backing_chain(bs, above_base);
+    return;
+
+out_rdlock:
+    bdrv_graph_rdunlock_main_loop();
 }
index e2f2813c0fcda64a6ec8a2d7e20f2652651d645a..f5c0fac5814be07423233690764a35d402b6114a 100644 (file)
@@ -37,7 +37,7 @@
 
 static void throttle_group_obj_init(Object *obj);
 static void throttle_group_obj_complete(UserCreatable *obj, Error **errp);
-static void timer_cb(ThrottleGroupMember *tgm, bool is_write);
+static void timer_cb(ThrottleGroupMember *tgm, ThrottleDirection direction);
 
 /* The ThrottleGroup structure (with its ThrottleState) is shared
  * among different ThrottleGroupMembers and it's independent from
@@ -73,8 +73,8 @@ struct ThrottleGroup {
     QemuMutex lock; /* This lock protects the following four fields */
     ThrottleState ts;
     QLIST_HEAD(, ThrottleGroupMember) head;
-    ThrottleGroupMember *tokens[2];
-    bool any_timer_armed[2];
+    ThrottleGroupMember *tokens[THROTTLE_MAX];
+    bool any_timer_armed[THROTTLE_MAX];
     QEMUClockType clock_type;
 
     /* This field is protected by the global QEMU mutex */
@@ -197,13 +197,13 @@ static ThrottleGroupMember *throttle_group_next_tgm(ThrottleGroupMember *tgm)
  * This assumes that tg->lock is held.
  *
  * @tgm:        the ThrottleGroupMember
- * @is_write:   the type of operation (read/write)
+ * @direction:  the ThrottleDirection
  * @ret:        whether the ThrottleGroupMember has pending requests.
  */
 static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm,
-                                        bool is_write)
+                                        ThrottleDirection direction)
 {
-    return tgm->pending_reqs[is_write];
+    return tgm->pending_reqs[direction];
 }
 
 /* Return the next ThrottleGroupMember in the round-robin sequence with pending
@@ -212,12 +212,12 @@ static inline bool tgm_has_pending_reqs(ThrottleGroupMember *tgm,
  * This assumes that tg->lock is held.
  *
  * @tgm:       the current ThrottleGroupMember
- * @is_write:  the type of operation (read/write)
+ * @direction: the ThrottleDirection
  * @ret:       the next ThrottleGroupMember with pending requests, or tgm if
  *             there is none.
  */
 static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
-                                                bool is_write)
+                                                ThrottleDirection direction)
 {
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
@@ -227,16 +227,16 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
      * it's being drained. Skip the round-robin search and return tgm
      * immediately if it has pending requests. Otherwise we could be
      * forcing it to wait for other member's throttled requests. */
-    if (tgm_has_pending_reqs(tgm, is_write) &&
+    if (tgm_has_pending_reqs(tgm, direction) &&
         qatomic_read(&tgm->io_limits_disabled)) {
         return tgm;
     }
 
-    start = token = tg->tokens[is_write];
+    start = token = tg->tokens[direction];
 
     /* get next bs round in round robin style */
     token = throttle_group_next_tgm(token);
-    while (token != start && !tgm_has_pending_reqs(token, is_write)) {
+    while (token != start && !tgm_has_pending_reqs(token, direction)) {
         token = throttle_group_next_tgm(token);
     }
 
@@ -244,12 +244,12 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
      * then decide the token is the current tgm because chances are
      * the current tgm got the current request queued.
      */
-    if (token == start && !tgm_has_pending_reqs(token, is_write)) {
+    if (token == start && !tgm_has_pending_reqs(token, direction)) {
         token = tgm;
     }
 
     /* Either we return the original TGM, or one with pending requests */
-    assert(token == tgm || tgm_has_pending_reqs(token, is_write));
+    assert(token == tgm || tgm_has_pending_reqs(token, direction));
 
     return token;
 }
@@ -261,11 +261,11 @@ static ThrottleGroupMember *next_throttle_token(ThrottleGroupMember *tgm,
  * This assumes that tg->lock is held.
  *
  * @tgm:        the current ThrottleGroupMember
- * @is_write:   the type of operation (read/write)
+ * @direction:  the ThrottleDirection
  * @ret:        whether the I/O request needs to be throttled or not
  */
 static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
-                                          bool is_write)
+                                          ThrottleDirection direction)
 {
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
@@ -277,16 +277,16 @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
     }
 
     /* Check if any of the timers in this group is already armed */
-    if (tg->any_timer_armed[is_write]) {
+    if (tg->any_timer_armed[direction]) {
         return true;
     }
 
-    must_wait = throttle_schedule_timer(ts, tt, is_write);
+    must_wait = throttle_schedule_timer(ts, tt, direction);
 
     /* If a timer just got armed, set tgm as the current token */
     if (must_wait) {
-        tg->tokens[is_write] = tgm;
-        tg->any_timer_armed[is_write] = true;
+        tg->tokens[direction] = tgm;
+        tg->any_timer_armed[direction] = true;
     }
 
     return must_wait;
@@ -296,15 +296,15 @@ static bool throttle_group_schedule_timer(ThrottleGroupMember *tgm,
  * any request was actually pending.
  *
  * @tgm:       the current ThrottleGroupMember
- * @is_write:  the type of operation (read/write)
+ * @direction: the ThrottleDirection
  */
 static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tgm,
-                                                         bool is_write)
+                                                         ThrottleDirection direction)
 {
     bool ret;
 
     qemu_co_mutex_lock(&tgm->throttled_reqs_lock);
-    ret = qemu_co_queue_next(&tgm->throttled_reqs[is_write]);
+    ret = qemu_co_queue_next(&tgm->throttled_reqs[direction]);
     qemu_co_mutex_unlock(&tgm->throttled_reqs_lock);
 
     return ret;
@@ -315,9 +315,10 @@ static bool coroutine_fn throttle_group_co_restart_queue(ThrottleGroupMember *tg
  * This assumes that tg->lock is held.
  *
  * @tgm:       the current ThrottleGroupMember
- * @is_write:  the type of operation (read/write)
+ * @direction: the ThrottleDirection
  */
-static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
+static void coroutine_mixed_fn schedule_next_request(ThrottleGroupMember *tgm,
+                                                     ThrottleDirection direction)
 {
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
@@ -325,27 +326,27 @@ static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
     ThrottleGroupMember *token;
 
     /* Check if there's any pending request to schedule next */
-    token = next_throttle_token(tgm, is_write);
-    if (!tgm_has_pending_reqs(token, is_write)) {
+    token = next_throttle_token(tgm, direction);
+    if (!tgm_has_pending_reqs(token, direction)) {
         return;
     }
 
     /* Set a timer for the request if it needs to be throttled */
-    must_wait = throttle_group_schedule_timer(token, is_write);
+    must_wait = throttle_group_schedule_timer(token, direction);
 
     /* If it doesn't have to wait, queue it for immediate execution */
     if (!must_wait) {
         /* Give preference to requests from the current tgm */
         if (qemu_in_coroutine() &&
-            throttle_group_co_restart_queue(tgm, is_write)) {
+            throttle_group_co_restart_queue(tgm, direction)) {
             token = tgm;
         } else {
             ThrottleTimers *tt = &token->throttle_timers;
             int64_t now = qemu_clock_get_ns(tg->clock_type);
-            timer_mod(tt->timers[is_write], now);
-            tg->any_timer_armed[is_write] = true;
+            timer_mod(tt->timers[direction], now);
+            tg->any_timer_armed[direction] = true;
         }
-        tg->tokens[is_write] = token;
+        tg->tokens[direction] = token;
     }
 }
 
@@ -355,45 +356,49 @@ static void schedule_next_request(ThrottleGroupMember *tgm, bool is_write)
  *
  * @tgm:       the current ThrottleGroupMember
  * @bytes:     the number of bytes for this I/O
- * @is_write:  the type of operation (read/write)
+ * @direction: the ThrottleDirection
  */
 void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm,
-                                                        unsigned int bytes,
-                                                        bool is_write)
+                                                        int64_t bytes,
+                                                        ThrottleDirection direction)
 {
     bool must_wait;
     ThrottleGroupMember *token;
     ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts);
+
+    assert(bytes >= 0);
+    assert(direction < THROTTLE_MAX);
+
     qemu_mutex_lock(&tg->lock);
 
     /* First we check if this I/O has to be throttled. */
-    token = next_throttle_token(tgm, is_write);
-    must_wait = throttle_group_schedule_timer(token, is_write);
+    token = next_throttle_token(tgm, direction);
+    must_wait = throttle_group_schedule_timer(token, direction);
 
     /* Wait if there's a timer set or queued requests of this type */
-    if (must_wait || tgm->pending_reqs[is_write]) {
-        tgm->pending_reqs[is_write]++;
+    if (must_wait || tgm->pending_reqs[direction]) {
+        tgm->pending_reqs[direction]++;
         qemu_mutex_unlock(&tg->lock);
         qemu_co_mutex_lock(&tgm->throttled_reqs_lock);
-        qemu_co_queue_wait(&tgm->throttled_reqs[is_write],
+        qemu_co_queue_wait(&tgm->throttled_reqs[direction],
                            &tgm->throttled_reqs_lock);
         qemu_co_mutex_unlock(&tgm->throttled_reqs_lock);
         qemu_mutex_lock(&tg->lock);
-        tgm->pending_reqs[is_write]--;
+        tgm->pending_reqs[direction]--;
     }
 
     /* The I/O will be executed, so do the accounting */
-    throttle_account(tgm->throttle_state, is_write, bytes);
+    throttle_account(tgm->throttle_state, direction, bytes);
 
     /* Schedule the next request */
-    schedule_next_request(tgm, is_write);
+    schedule_next_request(tgm, direction);
 
     qemu_mutex_unlock(&tg->lock);
 }
 
 typedef struct {
     ThrottleGroupMember *tgm;
-    bool is_write;
+    ThrottleDirection direction;
 } RestartData;
 
 static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
@@ -402,16 +407,16 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
     ThrottleGroupMember *tgm = data->tgm;
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    bool is_write = data->is_write;
+    ThrottleDirection direction = data->direction;
     bool empty_queue;
 
-    empty_queue = !throttle_group_co_restart_queue(tgm, is_write);
+    empty_queue = !throttle_group_co_restart_queue(tgm, direction);
 
     /* If the request queue was empty then we have to take care of
      * scheduling the next one */
     if (empty_queue) {
         qemu_mutex_lock(&tg->lock);
-        schedule_next_request(tgm, is_write);
+        schedule_next_request(tgm, direction);
         qemu_mutex_unlock(&tg->lock);
     }
 
@@ -421,18 +426,19 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
     aio_wait_kick();
 }
 
-static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write)
+static void throttle_group_restart_queue(ThrottleGroupMember *tgm,
+                                        ThrottleDirection direction)
 {
     Coroutine *co;
     RestartData *rd = g_new0(RestartData, 1);
 
     rd->tgm = tgm;
-    rd->is_write = is_write;
+    rd->direction = direction;
 
     /* This function is called when a timer is fired or when
      * throttle_group_restart_tgm() is called. Either way, there can
      * be no timer pending on this tgm at this point */
-    assert(!timer_pending(tgm->throttle_timers.timers[is_write]));
+    assert(!timer_pending(tgm->throttle_timers.timers[direction]));
 
     qatomic_inc(&tgm->restart_pending);
 
@@ -442,18 +448,18 @@ static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write
 
 void throttle_group_restart_tgm(ThrottleGroupMember *tgm)
 {
-    int i;
+    ThrottleDirection dir;
 
     if (tgm->throttle_state) {
-        for (i = 0; i < 2; i++) {
-            QEMUTimer *t = tgm->throttle_timers.timers[i];
+        for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+            QEMUTimer *t = tgm->throttle_timers.timers[dir];
             if (timer_pending(t)) {
                 /* If there's a pending timer on this tgm, fire it now */
                 timer_del(t);
-                timer_cb(tgm, i);
+                timer_cb(tgm, dir);
             } else {
                 /* Else run the next request from the queue manually */
-                throttle_group_restart_queue(tgm, i);
+                throttle_group_restart_queue(tgm, dir);
             }
         }
     }
@@ -497,30 +503,30 @@ void throttle_group_get_config(ThrottleGroupMember *tgm, ThrottleConfig *cfg)
  * because it had been throttled.
  *
  * @tgm:       the ThrottleGroupMember whose request had been throttled
- * @is_write:  the type of operation (read/write)
+ * @direction: the ThrottleDirection
  */
-static void timer_cb(ThrottleGroupMember *tgm, bool is_write)
+static void timer_cb(ThrottleGroupMember *tgm, ThrottleDirection direction)
 {
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
 
     /* The timer has just been fired, so we can update the flag */
     qemu_mutex_lock(&tg->lock);
-    tg->any_timer_armed[is_write] = false;
+    tg->any_timer_armed[direction] = false;
     qemu_mutex_unlock(&tg->lock);
 
     /* Run the request that was waiting for this timer */
-    throttle_group_restart_queue(tgm, is_write);
+    throttle_group_restart_queue(tgm, direction);
 }
 
 static void read_timer_cb(void *opaque)
 {
-    timer_cb(opaque, false);
+    timer_cb(opaque, THROTTLE_READ);
 }
 
 static void write_timer_cb(void *opaque)
 {
-    timer_cb(opaque, true);
+    timer_cb(opaque, THROTTLE_WRITE);
 }
 
 /* Register a ThrottleGroupMember from the throttling group, also initializing
@@ -538,7 +544,7 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
                                  const char *groupname,
                                  AioContext *ctx)
 {
-    int i;
+    ThrottleDirection dir;
     ThrottleState *ts = throttle_group_incref(groupname);
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
 
@@ -546,12 +552,13 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
     tgm->aio_context = ctx;
     qatomic_set(&tgm->restart_pending, 0);
 
-    qemu_mutex_lock(&tg->lock);
+    QEMU_LOCK_GUARD(&tg->lock);
     /* If the ThrottleGroup is new set this ThrottleGroupMember as the token */
-    for (i = 0; i < 2; i++) {
-        if (!tg->tokens[i]) {
-            tg->tokens[i] = tgm;
+    for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+        if (!tg->tokens[dir]) {
+            tg->tokens[dir] = tgm;
         }
+        qemu_co_queue_init(&tgm->throttled_reqs[dir]);
     }
 
     QLIST_INSERT_HEAD(&tg->head, tgm, round_robin);
@@ -563,10 +570,6 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm,
                          write_timer_cb,
                          tgm);
     qemu_co_mutex_init(&tgm->throttled_reqs_lock);
-    qemu_co_queue_init(&tgm->throttled_reqs[0]);
-    qemu_co_queue_init(&tgm->throttled_reqs[1]);
-
-    qemu_mutex_unlock(&tg->lock);
 }
 
 /* Unregister a ThrottleGroupMember from its group, removing it from the list,
@@ -584,7 +587,7 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm)
     ThrottleState *ts = tgm->throttle_state;
     ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
     ThrottleGroupMember *token;
-    int i;
+    ThrottleDirection dir;
 
     if (!ts) {
         /* Discard already unregistered tgm */
@@ -594,25 +597,25 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm)
     /* Wait for throttle_group_restart_queue_entry() coroutines to finish */
     AIO_WAIT_WHILE(tgm->aio_context, qatomic_read(&tgm->restart_pending) > 0);
 
-    qemu_mutex_lock(&tg->lock);
-    for (i = 0; i < 2; i++) {
-        assert(tgm->pending_reqs[i] == 0);
-        assert(qemu_co_queue_empty(&tgm->throttled_reqs[i]));
-        assert(!timer_pending(tgm->throttle_timers.timers[i]));
-        if (tg->tokens[i] == tgm) {
-            token = throttle_group_next_tgm(tgm);
-            /* Take care of the case where this is the last tgm in the group */
-            if (token == tgm) {
-                token = NULL;
+    WITH_QEMU_LOCK_GUARD(&tg->lock) {
+        for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+            assert(tgm->pending_reqs[dir] == 0);
+            assert(qemu_co_queue_empty(&tgm->throttled_reqs[dir]));
+            assert(!timer_pending(tgm->throttle_timers.timers[dir]));
+            if (tg->tokens[dir] == tgm) {
+                token = throttle_group_next_tgm(tgm);
+                /* Take care of the case where this is the last tgm in the group */
+                if (token == tgm) {
+                    token = NULL;
+                }
+                tg->tokens[dir] = token;
             }
-            tg->tokens[i] = token;
         }
-    }
 
-    /* remove the current tgm from the list */
-    QLIST_REMOVE(tgm, round_robin);
-    throttle_timers_destroy(&tgm->throttle_timers);
-    qemu_mutex_unlock(&tg->lock);
+        /* remove the current tgm from the list */
+        QLIST_REMOVE(tgm, round_robin);
+        throttle_timers_destroy(&tgm->throttle_timers);
+    }
 
     throttle_group_unref(&tg->ts);
     tgm->throttle_state = NULL;
@@ -630,22 +633,23 @@ void throttle_group_detach_aio_context(ThrottleGroupMember *tgm)
 {
     ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts);
     ThrottleTimers *tt = &tgm->throttle_timers;
-    int i;
+    ThrottleDirection dir;
 
     /* Requests must have been drained */
-    assert(tgm->pending_reqs[0] == 0 && tgm->pending_reqs[1] == 0);
-    assert(qemu_co_queue_empty(&tgm->throttled_reqs[0]));
-    assert(qemu_co_queue_empty(&tgm->throttled_reqs[1]));
+    for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+        assert(tgm->pending_reqs[dir] == 0);
+        assert(qemu_co_queue_empty(&tgm->throttled_reqs[dir]));
+    }
 
     /* Kick off next ThrottleGroupMember, if necessary */
-    qemu_mutex_lock(&tg->lock);
-    for (i = 0; i < 2; i++) {
-        if (timer_pending(tt->timers[i])) {
-            tg->any_timer_armed[i] = false;
-            schedule_next_request(tgm, i);
+    WITH_QEMU_LOCK_GUARD(&tg->lock) {
+        for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+            if (timer_pending(tt->timers[dir])) {
+                tg->any_timer_armed[dir] = false;
+                schedule_next_request(tgm, dir);
+            }
         }
     }
-    qemu_mutex_unlock(&tg->lock);
 
     throttle_timers_detach_aio_context(tt);
     tgm->aio_context = NULL;
index b685166ad4a940a059f06269daddd726b08038ad..97972d1f15e9ae58780ee9ac6298ec60754d1a24 100644 (file)
@@ -18,6 +18,8 @@
  */
 
 #include "qemu/osdep.h"
+#include "block/block-io.h"
+#include "block/block_int.h"
 #include "block/throttle-groups.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
@@ -78,12 +80,13 @@ static int throttle_open(BlockDriverState *bs, QDict *options,
     char *group;
     int ret;
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                               false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
+
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     bs->supported_write_flags = bs->file->bs->supported_write_flags |
                                 BDRV_REQ_WRITE_UNCHANGED;
     bs->supported_zero_flags = bs->file->bs->supported_zero_flags |
@@ -106,61 +109,61 @@ static void throttle_close(BlockDriverState *bs)
 }
 
 
-static int64_t throttle_getlength(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+throttle_co_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
+    return bdrv_co_getlength(bs->file->bs);
 }
 
-static int coroutine_fn throttle_co_preadv(BlockDriverState *bs,
-                                           uint64_t offset, uint64_t bytes,
-                                           QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
 
     ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, false);
+    throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_READ);
 
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs,
-                                            uint64_t offset, uint64_t bytes,
-                                            QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, true);
+    throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
 
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int bytes,
-                                                  BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          BdrvRequestFlags flags)
 {
     ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, true);
+    throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
 
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
-static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int bytes)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     ThrottleGroupMember *tgm = bs->opaque;
-    throttle_group_co_io_limits_intercept(tgm, bytes, true);
+    throttle_group_co_io_limits_intercept(tgm, bytes, THROTTLE_WRITE);
 
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
-static int coroutine_fn throttle_co_pwritev_compressed(BlockDriverState *bs,
-                                                       uint64_t offset,
-                                                       uint64_t bytes,
-                                                       QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+throttle_co_pwritev_compressed(BlockDriverState *bs, int64_t offset,
+                               int64_t bytes, QEMUIOVector *qiov)
 {
     return throttle_co_pwritev(bs, offset, bytes, qiov,
                                BDRV_REQ_WRITE_COMPRESSED);
 }
 
-static int throttle_co_flush(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK throttle_co_flush(BlockDriverState *bs)
 {
     return bdrv_co_flush(bs->file->bs);
 }
@@ -214,7 +217,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state)
     reopen_state->opaque = NULL;
 }
 
-static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
+static void throttle_drain_begin(BlockDriverState *bs)
 {
     ThrottleGroupMember *tgm = bs->opaque;
     if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
@@ -222,7 +225,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
     }
 }
 
-static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs)
+static void throttle_drain_end(BlockDriverState *bs)
 {
     ThrottleGroupMember *tgm = bs->opaque;
     assert(tgm->io_limits_disabled);
@@ -245,7 +248,7 @@ static BlockDriver bdrv_throttle = {
 
     .bdrv_child_perm                    =   bdrv_default_perms,
 
-    .bdrv_getlength                     =   throttle_getlength,
+    .bdrv_co_getlength                  =   throttle_co_getlength,
 
     .bdrv_co_preadv                     =   throttle_co_preadv,
     .bdrv_co_pwritev                    =   throttle_co_pwritev,
@@ -261,8 +264,8 @@ static BlockDriver bdrv_throttle = {
     .bdrv_reopen_commit                 =   throttle_reopen_commit,
     .bdrv_reopen_abort                  =   throttle_reopen_abort,
 
-    .bdrv_co_drain_begin                =   throttle_co_drain_begin,
-    .bdrv_co_drain_end                  =   throttle_co_drain_end,
+    .bdrv_drain_begin                   =   throttle_drain_begin,
+    .bdrv_drain_end                     =   throttle_drain_end,
 
     .is_filter                          =   true,
     .strong_runtime_opts                =   throttle_strong_runtime_opts,
index 8368f4acb0bba73b1b407c722483d36adba89549..8e789e1f12fe5781868b43b041acbe773c95fbd0 100644 (file)
@@ -1,22 +1,22 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # ../block.c
 bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags 0x%x format_name \"%s\""
 bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
 
 # block-backend.c
-blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x"
-blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x"
+blk_co_preadv(void *blk, void *bs, int64_t offset, int64_t bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %" PRId64 " flags 0x%x"
+blk_co_pwritev(void *blk, void *bs, int64_t offset, int64_t bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %" PRId64 " flags 0x%x"
 blk_root_attach(void *child, void *blk, void *bs) "child %p blk %p bs %p"
 blk_root_detach(void *child, void *blk, void *bs) "child %p blk %p bs %p"
 
 # io.c
-bdrv_co_preadv(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
-bdrv_co_pwritev(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
-bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags 0x%x"
-bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, int64_t cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %"PRId64
-bdrv_co_copy_range_from(void *src, uint64_t src_offset, void *dst, uint64_t dst_offset, uint64_t bytes, int read_flags, int write_flags) "src %p offset %"PRIu64" dst %p offset %"PRIu64" bytes %"PRIu64" rw flags 0x%x 0x%x"
-bdrv_co_copy_range_to(void *src, uint64_t src_offset, void *dst, uint64_t dst_offset, uint64_t bytes, int read_flags, int write_flags) "src %p offset %"PRIu64" dst %p offset %"PRIu64" bytes %"PRIu64" rw flags 0x%x 0x%x"
+bdrv_co_preadv_part(void *bs, int64_t offset, int64_t bytes, unsigned int flags) "bs %p offset %" PRId64 " bytes %" PRId64 " flags 0x%x"
+bdrv_co_pwritev_part(void *bs, int64_t offset, int64_t bytes, unsigned int flags) "bs %p offset %" PRId64 " bytes %" PRId64 " flags 0x%x"
+bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int64_t bytes, int flags) "bs %p offset %" PRId64 " bytes %" PRId64 " flags 0x%x"
+bdrv_co_do_copy_on_readv(void *bs, int64_t offset, int64_t bytes, int64_t cluster_offset, int64_t cluster_bytes) "bs %p offset %" PRId64 " bytes %" PRId64 " cluster_offset %" PRId64 " cluster_bytes %" PRId64
+bdrv_co_copy_range_from(void *src, int64_t src_offset, void *dst, int64_t dst_offset, int64_t bytes, int read_flags, int write_flags) "src %p offset %" PRId64 " dst %p offset %" PRId64 " bytes %" PRId64 " rw flags 0x%x 0x%x"
+bdrv_co_copy_range_to(void *src, int64_t src_offset, void *dst, int64_t dst_offset, int64_t bytes, int read_flags, int write_flags) "src %p offset %" PRId64 " dst %p offset %" PRId64 " bytes %" PRId64 " rw flags 0x%x 0x%x"
 
 # stream.c
 stream_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d"
@@ -64,9 +64,8 @@ file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "
 # io_uring.c
 luring_init_state(void *s, size_t size) "s %p size %zu"
 luring_cleanup_state(void *s) "%p freed"
-luring_io_plug(void *s) "LuringState %p plug"
-luring_io_unplug(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d"
-luring_do_submit(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d"
+luring_unplug_fn(void *s, int blocked, int queued, int inflight) "LuringState %p blocked %d queued %d inflight %d"
+luring_do_submit(void *s, int blocked, int queued, int inflight) "LuringState %p blocked %d queued %d inflight %d"
 luring_do_submit_done(void *s, int ret) "LuringState %p submitted to kernel %d"
 luring_co_submit(void *bs, void *s, void *luringcb, int fd, uint64_t offset, size_t nbytes, int type) "bs %p s %p luringcb %p fd %d offset %" PRId64 " nbytes %zd type %d"
 luring_process_completion(void *s, void *aiocb, int ret) "LuringState %p luringcb %p ret %d"
@@ -75,13 +74,13 @@ luring_resubmit_short_read(void *s, void *luringcb, int nread) "LuringState %p l
 
 # qcow2.c
 qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t host_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu"
-qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d"
+qcow2_writev_start_req(void *co, int64_t offset, int64_t bytes) "co %p offset 0x%" PRIx64 " bytes %" PRId64
 qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
 qcow2_writev_start_part(void *co) "co %p"
 qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d"
 qcow2_writev_data(void *co, uint64_t offset) "co %p offset 0x%" PRIx64
-qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
-qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
+qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int64_t bytes) "co %p offset 0x%" PRIx64 " bytes %" PRId64
+qcow2_pwrite_zeroes(void *co, int64_t offset, int64_t bytes) "co %p offset 0x%" PRIx64 " bytes %" PRId64
 qcow2_skip_cow(void *co, uint64_t offset, int nb_clusters) "co %p offset 0x%" PRIx64 " nb_clusters %d"
 
 # qcow2-cluster.c
@@ -136,11 +135,11 @@ qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
 # nvme.c
 nvme_controller_capability_raw(uint64_t value) "0x%08"PRIx64
 nvme_controller_capability(const char *desc, uint64_t value) "%s: %"PRIu64
+nvme_controller_spec_version(uint32_t mjr, uint32_t mnr, uint32_t ter) "Specification supported: %u.%u.%u"
 nvme_kick(void *s, unsigned q_index) "s %p q #%u"
 nvme_dma_flush_queue_wait(void *s) "s %p"
 nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x"
 nvme_process_completion(void *s, unsigned q_index, int inflight) "s %p q #%u inflight %d"
-nvme_process_completion_queue_plugged(void *s, unsigned q_index) "s %p q #%u"
 nvme_complete_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d"
 nvme_submit_command(void *s, unsigned q_index, int cid) "s %p q #%u cid %d"
 nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x"
@@ -151,12 +150,12 @@ nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p off
 nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x"
 nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset 0x%"PRIx64" bytes %"PRId64" niov %d is_write %d"
 nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" ret %d"
-nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64""
-nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
+nvme_dsm(void *s, int64_t offset, int64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64""
+nvme_dsm_done(void *s, int64_t offset, int64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
 nvme_dma_map_flush(void *s) "s %p"
 nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u"
-nvme_create_queue_pair(unsigned q_index, void *q, unsigned size, void *aio_context, int fd) "index %u q %p size %u aioctx %p fd %d"
-nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p"
+nvme_create_queue_pair(unsigned q_index, void *q, size_t size, void *aio_context, int fd) "index %u q %p size %zu aioctx %p fd %d"
+nvme_free_queue_pair(unsigned q_index, void *q, void *cq, void *sq) "index %u q %p cq %p sq %p"
 nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d"
 nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64
 nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d"
@@ -167,10 +166,13 @@ iscsi_xcopy(void *src_lun, uint64_t src_off, void *dst_lun, uint64_t dst_off, ui
 # nbd.c
 nbd_parse_blockstatus_compliance(const char *err) "ignoring extra data from non-compliant server: %s"
 nbd_structured_read_compliance(const char *type) "server sent non-compliant unaligned read %s chunk"
+nbd_extended_headers_compliance(const char *type) "server sent non-compliant %s chunk not matching choice of extended headers"
 nbd_read_reply_entry_fail(int ret, const char *err) "ret = %d, err: %s"
-nbd_co_request_fail(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name, int ret, const char *err) "Request failed { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s"
+nbd_co_request_fail(uint64_t from, uint64_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name, int ret, const char *err) "Request failed { .from = %" PRIu64", .len = %" PRIu64 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s"
 nbd_client_handshake(const char *export_name) "export '%s'"
 nbd_client_handshake_success(const char *export_name) "export '%s'"
+nbd_reconnect_attempt(unsigned in_flight) "in_flight %u"
+nbd_reconnect_attempt_result(int ret, unsigned in_flight) "ret %d in_flight %u"
 
 # ssh.c
 ssh_restart_coroutine(void *co) "co=%p"
@@ -205,20 +207,11 @@ file_copy_file_range(void *bs, int src, int64_t src_off, int dst, int64_t dst_of
 file_FindEjectableOpticalMedia(const char *media) "Matching using %s"
 file_setup_cdrom(const char *partition) "Using %s as optical disc"
 file_hdev_is_sg(int type, int version) "SG device found: type=%d, version=%d"
-
-# sheepdog.c
-sheepdog_reconnect_to_sdog(void) "Wait for connection to be established"
-sheepdog_aio_read_response(void) "disable cache since the server doesn't support it"
-sheepdog_open(uint32_t vid) "0x%" PRIx32 " snapshot inode was open"
-sheepdog_close(const char *name) "%s"
-sheepdog_create_branch_snapshot(uint32_t vdi) "0x%" PRIx32 " is snapshot"
-sheepdog_create_branch_created(uint32_t vdi) "0x%" PRIx32 " is created"
-sheepdog_create_branch_new(uint32_t vdi) "0x%" PRIx32 " was newly created"
-sheepdog_co_rw_vector_update(uint32_t vdi, uint64_t oid, uint64_t data, long idx) "update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld"
-sheepdog_co_rw_vector_new(uint64_t oid) "new oid 0x%" PRIx64
-sheepdog_snapshot_create_info(const char *sn_name, const char *id, const char *name, int64_t size, int is_snapshot) "sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " " "is_snapshot %d"
-sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s"
-sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32
+file_flush_fdatasync_failed(int err) "errno %d"
+zbd_zone_report(void *bs, unsigned int nr_zones, int64_t sector) "bs %p report %d zones starting at sector offset 0x%" PRIx64 ""
+zbd_zone_mgmt(void *bs, const char *op_name, int64_t sector, int64_t len) "bs %p %s starts at sector offset 0x%" PRIx64 " over a range of 0x%" PRIx64 " sectors"
+zbd_zone_append(void *bs, int64_t sector) "bs %p append at sector offset 0x%" PRIx64 ""
+zbd_zone_append_complete(void *bs, int64_t sector) "bs %p returns append sector 0x%" PRIx64 ""
 
 # ssh.c
 sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
index 5627e7d764aeeccba87e569f8209caf82dccaa35..3b57becb9fe0d3f3cc0d31e254d82d5fb98e2c4c 100644 (file)
@@ -64,6 +64,7 @@
 #include "qemu/coroutine.h"
 #include "qemu/cutils.h"
 #include "qemu/uuid.h"
+#include "qemu/memalign.h"
 
 /* Code configuration options. */
 
@@ -238,7 +239,7 @@ static void vdi_header_to_le(VdiHeader *header)
 
 static void vdi_header_print(VdiHeader *header)
 {
-    char uuidstr[37];
+    char uuidstr[UUID_STR_LEN];
     QemuUUID uuid;
     logout("text        %s", header->text);
     logout("signature   0x%08x\n", header->signature);
@@ -326,9 +327,10 @@ static int coroutine_fn vdi_co_check(BlockDriverState *bs, BdrvCheckResult *res,
     return 0;
 }
 
-static int vdi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int coroutine_fn
+vdi_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
-    /* TODO: vdi_get_info would be needed for machine snapshots.
+    /* TODO: vdi_co_get_info would be needed for machine snapshots.
        vm_state_offset is still missing. */
     BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
     logout("\n");
@@ -376,15 +378,16 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
     int ret;
     QemuUUID uuid_link, uuid_parent;
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     logout("\n");
 
-    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+    ret = bdrv_pread(bs->file, 0, sizeof(header), &header, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -484,8 +487,8 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    ret = bdrv_pread(bs->file, header.offset_bmap, s->bmap,
-                     bmap_size * SECTOR_SIZE);
+    ret = bdrv_pread(bs->file, header.offset_bmap, bmap_size * SECTOR_SIZE,
+                     s->bmap, 0);
     if (ret < 0) {
         goto fail_free_bmap;
     }
@@ -494,9 +497,9 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
     error_setg(&s->migration_blocker, "The vdi format used by node '%s' "
                "does not support live migration",
                bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, errp);
+
+    ret = migrate_add_blocker_normal(&s->migration_blocker, errp);
     if (ret < 0) {
-        error_free(s->migration_blocker);
         goto fail_free_bmap;
     }
 
@@ -517,11 +520,10 @@ static int vdi_reopen_prepare(BDRVReopenState *state,
     return 0;
 }
 
-static int coroutine_fn vdi_co_block_status(BlockDriverState *bs,
-                                            bool want_zero,
-                                            int64_t offset, int64_t bytes,
-                                            int64_t *pnum, int64_t *map,
-                                            BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+vdi_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
+                    int64_t bytes, int64_t *pnum, int64_t *map,
+                    BlockDriverState **file)
 {
     BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
     size_t bmap_index = offset / s->block_size;
@@ -543,9 +545,9 @@ static int coroutine_fn vdi_co_block_status(BlockDriverState *bs,
         (s->header.image_type == VDI_TYPE_STATIC ? BDRV_BLOCK_RECURSE : 0);
 }
 
-static int coroutine_fn
-vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-              QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+vdi_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVVdiState *s = bs->opaque;
     QEMUIOVector local_qiov;
@@ -599,9 +601,9 @@ vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return ret;
 }
 
-static int coroutine_fn
-vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-               QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+vdi_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVVdiState *s = bs->opaque;
     QEMUIOVector local_qiov;
@@ -633,7 +635,6 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
         bmap_entry = le32_to_cpu(s->bmap[block_index]);
         if (!VDI_IS_ALLOCATED(bmap_entry)) {
             /* Allocate new block and write to it. */
-            uint64_t data_offset;
             qemu_co_rwlock_upgrade(&s->bmap_lock);
             bmap_entry = le32_to_cpu(s->bmap[block_index]);
             if (VDI_IS_ALLOCATED(bmap_entry)) {
@@ -663,7 +664,8 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
              * so this full-cluster write does not overlap a partial write
              * of the same cluster, issued from the "else" branch.
              */
-            ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size);
+            ret = bdrv_co_pwrite(bs->file, data_offset, s->block_size, block,
+                                 0);
             qemu_co_rwlock_unlock(&s->bmap_lock);
         } else {
 nonallocating_write:
@@ -690,23 +692,26 @@ nonallocating_write:
 
     logout("finished data write\n");
     if (ret < 0) {
+        g_free(block);
         return ret;
     }
 
     if (block) {
         /* One or more new blocks were allocated. */
-        VdiHeader *header = (VdiHeader *) block;
+        VdiHeader *header;
         uint8_t *base;
-        uint64_t offset;
+        uint64_t bmap_offset;
         uint32_t n_sectors;
 
+        g_free(block);
+        header = g_malloc(sizeof(*header));
+
         logout("now writing modified header\n");
         assert(VDI_IS_ALLOCATED(bmap_first));
         *header = s->header;
         vdi_header_to_le(header);
-        ret = bdrv_pwrite(bs->file, 0, block, sizeof(VdiHeader));
-        g_free(block);
-        block = NULL;
+        ret = bdrv_co_pwrite(bs->file, 0, sizeof(*header), header, 0);
+        g_free(header);
 
         if (ret < 0) {
             return ret;
@@ -718,19 +723,20 @@ nonallocating_write:
         bmap_first /= (SECTOR_SIZE / sizeof(uint32_t));
         bmap_last /= (SECTOR_SIZE / sizeof(uint32_t));
         n_sectors = bmap_last - bmap_first + 1;
-        offset = s->bmap_sector + bmap_first;
+        bmap_offset = s->bmap_sector + bmap_first;
         base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
         logout("will write %u block map sectors starting from entry %u\n",
                n_sectors, bmap_first);
-        ret = bdrv_pwrite(bs->file, offset * SECTOR_SIZE, base,
-                          n_sectors * SECTOR_SIZE);
+        ret = bdrv_co_pwrite(bs->file, bmap_offset * SECTOR_SIZE,
+                             n_sectors * SECTOR_SIZE, base, 0);
     }
 
-    return ret < 0 ? ret : 0;
+    return ret;
 }
 
-static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
-                                         size_t block_size, Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vdi_co_do_create(BlockdevCreateOptions *create_options, size_t block_size,
+                 Error **errp)
 {
     BlockdevCreateOptionsVdi *vdi_opts;
     int ret = 0;
@@ -795,14 +801,14 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
     }
 
     /* Create BlockBackend to write to the image */
-    bs_file = bdrv_open_blockdev_ref(vdi_opts->file, errp);
+    bs_file = bdrv_co_open_blockdev_ref(vdi_opts->file, errp);
     if (!bs_file) {
         ret = -EIO;
         goto exit;
     }
 
-    blk = blk_new_with_bs(bs_file, BLK_PERM_WRITE | BLK_PERM_RESIZE,
-                          BLK_PERM_ALL, errp);
+    blk = blk_co_new_with_bs(bs_file, BLK_PERM_WRITE | BLK_PERM_RESIZE,
+                             BLK_PERM_ALL, errp);
     if (!blk) {
         ret = -EPERM;
         goto exit;
@@ -841,7 +847,7 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
         vdi_header_print(&header);
     }
     vdi_header_to_le(&header);
-    ret = blk_pwrite(blk, offset, &header, sizeof(header), 0);
+    ret = blk_co_pwrite(blk, offset, sizeof(header), &header, 0);
     if (ret < 0) {
         error_setg(errp, "Error writing header");
         goto exit;
@@ -862,7 +868,7 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
                 bmap[i] = VDI_UNALLOCATED;
             }
         }
-        ret = blk_pwrite(blk, offset, bmap, bmap_size, 0);
+        ret = blk_co_pwrite(blk, offset, bmap_size, bmap, 0);
         if (ret < 0) {
             error_setg(errp, "Error writing bmap");
             goto exit;
@@ -871,8 +877,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
     }
 
     if (image_type == VDI_TYPE_STATIC) {
-        ret = blk_truncate(blk, offset + blocks * block_size, false,
-                           PREALLOC_MODE_OFF, 0, errp);
+        ret = blk_co_truncate(blk, offset + blocks * block_size, false,
+                              PREALLOC_MODE_OFF, 0, errp);
         if (ret < 0) {
             error_prepend(errp, "Failed to statically allocate file");
             goto exit;
@@ -881,22 +887,21 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
 
     ret = 0;
 exit:
-    blk_unref(blk);
-    bdrv_unref(bs_file);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs_file);
     g_free(bmap);
     return ret;
 }
 
-static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options,
-                                      Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vdi_co_create(BlockdevCreateOptions *create_options, Error **errp)
 {
     return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp);
 }
 
-static int coroutine_fn vdi_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vdi_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     QDict *qdict = NULL;
     BlockdevCreateOptions *create_options = NULL;
@@ -930,13 +935,13 @@ static int coroutine_fn vdi_co_create_opts(BlockDriver *drv,
     qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vdi_create_opts, true);
 
     /* Create and open the file (protocol layer) */
-    ret = bdrv_create_file(filename, opts, errp);
+    ret = bdrv_co_create_file(filename, opts, errp);
     if (ret < 0) {
         goto done;
     }
 
-    bs_file = bdrv_open(filename, NULL, NULL,
-                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs_file = bdrv_co_open(filename, NULL, NULL,
+                           BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (!bs_file) {
         ret = -EIO;
         goto done;
@@ -971,7 +976,7 @@ static int coroutine_fn vdi_co_create_opts(BlockDriver *drv,
 done:
     qobject_unref(qdict);
     qapi_free_BlockdevCreateOptions(create_options);
-    bdrv_unref(bs_file);
+    bdrv_co_unref(bs_file);
     return ret;
 }
 
@@ -981,11 +986,10 @@ static void vdi_close(BlockDriverState *bs)
 
     qemu_vfree(s->bmap);
 
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
+    migrate_del_blocker(&s->migration_blocker);
 }
 
-static int vdi_has_zero_init(BlockDriverState *bs)
+static int GRAPH_RDLOCK vdi_has_zero_init(BlockDriverState *bs)
 {
     BDRVVdiState *s = bs->opaque;
 
@@ -1045,7 +1049,7 @@ static BlockDriver bdrv_vdi = {
     .bdrv_co_pwritev    = vdi_co_pwritev,
 #endif
 
-    .bdrv_get_info = vdi_get_info,
+    .bdrv_co_get_info = vdi_co_get_info,
 
     .is_format = true,
     .create_opts = &vdi_create_opts,
index 404fb5f3cb035ea2f6cfd2f1dc5b513adc724987..4385a2d4f622e94e1d548b42dfba8c50040d45e9 100644 (file)
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "qemu/error-report.h"
 #include "qemu/bswap.h"
+#include "qemu/memalign.h"
 #include "vhdx.h"
 
 
@@ -53,8 +55,9 @@ static const MSGUID zero_guid = { 0 };
 
 /* Allow peeking at the hdr entry at the beginning of the current
  * read index, without advancing the read index */
-static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
-                             VHDXLogEntryHeader *hdr)
+static int GRAPH_RDLOCK
+vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
+                  VHDXLogEntryHeader *hdr)
 {
     int ret = 0;
     uint64_t offset;
@@ -83,7 +86,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
 
     offset = log->offset + read;
 
-    ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader));
+    ret = bdrv_pread(bs->file, offset, sizeof(VHDXLogEntryHeader), hdr, 0);
     if (ret < 0) {
         goto exit;
     }
@@ -105,7 +108,7 @@ static int vhdx_log_inc_idx(uint32_t idx, uint64_t length)
 
 
 /* Reset the log to empty */
-static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s)
+static void GRAPH_RDLOCK vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s)
 {
     MSGUID guid = { 0 };
     s->log.read = s->log.write = 0;
@@ -125,9 +128,10 @@ static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s)
  * not modified.
  *
  * 0 is returned on success, -errno otherwise.  */
-static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
-                                 uint32_t *sectors_read, void *buffer,
-                                 uint32_t num_sectors, bool peek)
+static int GRAPH_RDLOCK
+vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
+                      uint32_t *sectors_read, void *buffer,
+                      uint32_t num_sectors, bool peek)
 {
     int ret = 0;
     uint64_t offset;
@@ -143,7 +147,7 @@ static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
         }
         offset = log->offset + read;
 
-        ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE);
+        ret = bdrv_pread(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer, 0);
         if (ret < 0) {
             goto exit;
         }
@@ -167,9 +171,10 @@ exit:
  * It is assumed that 'buffer' is at least 4096*num_sectors large.
  *
  * 0 is returned on success, -errno otherwise */
-static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
-                                  uint32_t *sectors_written, void *buffer,
-                                  uint32_t num_sectors)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
+                       uint32_t *sectors_written, void *buffer,
+                       uint32_t num_sectors)
 {
     int ret = 0;
     uint64_t offset;
@@ -193,8 +198,7 @@ static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
             /* full */
             break;
         }
-        ret = bdrv_pwrite(bs->file, offset, buffer_tmp,
-                          VHDX_LOG_SECTOR_SIZE);
+        ret = bdrv_co_pwrite(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer_tmp, 0);
         if (ret < 0) {
             goto exit;
         }
@@ -331,9 +335,9 @@ static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
  * will allocate all the space for buffer, which must be NULL when
  * passed into this function. Each descriptor will also be validated,
  * and error returned if any are invalid. */
-static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
-                              VHDXLogEntries *log, VHDXLogDescEntries **buffer,
-                              bool convert_endian)
+static int GRAPH_RDLOCK
+vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, VHDXLogEntries *log,
+                   VHDXLogDescEntries **buffer, bool convert_endian)
 {
     int ret = 0;
     uint32_t desc_sectors;
@@ -410,8 +414,9 @@ exit:
  * For a zero descriptor, it may describe multiple sectors to fill with zeroes.
  * In this case, it should be noted that zeroes are written to disk, and the
  * image file is not extended as a sparse file.  */
-static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
-                               VHDXLogDataSector *data)
+static int GRAPH_RDLOCK
+vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
+                    VHDXLogDataSector *data)
 {
     int ret = 0;
     uint64_t seq, file_offset;
@@ -465,8 +470,8 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
 
     /* count is only > 1 if we are writing zeroes */
     for (i = 0; i < count; i++) {
-        ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
-                               VHDX_LOG_SECTOR_SIZE);
+        ret = bdrv_pwrite_sync(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
+                               buffer, 0);
         if (ret < 0) {
             goto exit;
         }
@@ -482,8 +487,8 @@ exit:
  * file, and then set the log to 'empty' status once complete.
  *
  * The log entries should be validate prior to flushing */
-static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
-                          VHDXLogSequence *logs)
+static int GRAPH_RDLOCK
+vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, VHDXLogSequence *logs)
 {
     int ret = 0;
     int i;
@@ -582,9 +587,10 @@ exit:
     return ret;
 }
 
-static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
-                                   VHDXLogEntries *log, uint64_t seq,
-                                   bool *valid, VHDXLogEntryHeader *entry)
+static int GRAPH_RDLOCK
+vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
+                        VHDXLogEntries *log, uint64_t seq,
+                        bool *valid, VHDXLogEntryHeader *entry)
 {
     int ret = 0;
     VHDXLogEntryHeader hdr;
@@ -661,8 +667,8 @@ free_and_exit:
 /* Search through the log circular buffer, and find the valid, active
  * log sequence, if any exists
  * */
-static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s,
-                           VHDXLogSequence *logs)
+static int GRAPH_RDLOCK
+vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s, VHDXLogSequence *logs)
 {
     int ret = 0;
     uint32_t tail;
@@ -801,7 +807,7 @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
     }
 
     if (logs.valid) {
-        if (bs->read_only) {
+        if (bdrv_is_read_only(bs)) {
             bdrv_refresh_filename(bs);
             ret = -EPERM;
             error_setg(errp,
@@ -851,8 +857,9 @@ static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
 }
 
 
-static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
-                          void *data, uint32_t length, uint64_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
+               void *data, uint32_t length, uint64_t offset)
 {
     int ret = 0;
     void *buffer = NULL;
@@ -922,7 +929,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
 
     sectors += partial_sectors;
 
-    file_length = bdrv_getlength(bs->file->bs);
+    file_length = bdrv_co_getlength(bs->file->bs);
     if (file_length < 0) {
         ret = file_length;
         goto exit;
@@ -969,8 +976,8 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
 
         if (i == 0 && leading_length) {
             /* partial sector at the front of the buffer */
-            ret = bdrv_pread(bs->file, file_offset, merged_sector,
-                             VHDX_LOG_SECTOR_SIZE);
+            ret = bdrv_co_pread(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
+                                merged_sector, 0);
             if (ret < 0) {
                 goto exit;
             }
@@ -979,10 +986,9 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
             sector_write = merged_sector;
         } else if (i == sectors - 1 && trailing_length) {
             /* partial sector at the end of the buffer */
-            ret = bdrv_pread(bs->file,
-                            file_offset,
-                            merged_sector + trailing_length,
-                            VHDX_LOG_SECTOR_SIZE - trailing_length);
+            ret = bdrv_co_pread(bs->file, file_offset + trailing_length,
+                                VHDX_LOG_SECTOR_SIZE - trailing_length,
+                                merged_sector + trailing_length, 0);
             if (ret < 0) {
                 goto exit;
             }
@@ -1035,8 +1041,9 @@ exit:
 }
 
 /* Perform a log write, and then immediately flush the entire log */
-int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
-                             void *data, uint32_t length, uint64_t offset)
+int coroutine_fn
+vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+                         void *data, uint32_t length, uint64_t offset)
 {
     int ret = 0;
     VHDXLogSequence logs = { .valid = true,
@@ -1046,7 +1053,7 @@ int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
 
     /* Make sure data written (new and/or changed blocks) is stable
      * on disk, before creating log entry */
-    ret = bdrv_flush(bs);
+    ret = bdrv_co_flush(bs);
     if (ret < 0) {
         goto exit;
     }
@@ -1058,7 +1065,7 @@ int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
     logs.log = s->log;
 
     /* Make sure log is stable on disk */
-    ret = bdrv_flush(bs);
+    ret = bdrv_co_flush(bs);
     if (ret < 0) {
         goto exit;
     }
index 356ec4c455a42be6e5f68b62c712536e9f2f6310..5aa1a13506268c84f4a9a6cbf2dcdfae6a5d94b5 100644 (file)
@@ -25,6 +25,7 @@
 #include "qemu/crc32c.h"
 #include "qemu/bswap.h"
 #include "qemu/error-report.h"
+#include "qemu/memalign.h"
 #include "vhdx.h"
 #include "migration/blocker.h"
 #include "qemu/uuid.h"
@@ -325,7 +326,7 @@ static int vhdx_write_header(BdrvChild *file, VHDXHeader *hdr,
     buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE);
     if (read) {
         /* if true, we can't assume the extra reserved bytes are 0 */
-        ret = bdrv_pread(file, offset, buffer, VHDX_HEADER_SIZE);
+        ret = bdrv_pread(file, offset, VHDX_HEADER_SIZE, buffer, 0);
         if (ret < 0) {
             goto exit;
         }
@@ -339,7 +340,7 @@ static int vhdx_write_header(BdrvChild *file, VHDXHeader *hdr,
     vhdx_header_le_export(hdr, header_le);
     vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
                          offsetof(VHDXHeader, checksum));
-    ret = bdrv_pwrite_sync(file, offset, header_le, sizeof(VHDXHeader));
+    ret = bdrv_pwrite_sync(file, offset, sizeof(VHDXHeader), header_le, 0);
 
 exit:
     qemu_vfree(buffer);
@@ -352,8 +353,9 @@ exit:
  *
  *  - non-current header is updated with largest sequence number
  */
-static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
-                              bool generate_data_write_guid, MSGUID *log_guid)
+static int GRAPH_RDLOCK
+vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
+                   bool generate_data_write_guid, MSGUID *log_guid)
 {
     int ret = 0;
     int hdr_idx = 0;
@@ -415,8 +417,8 @@ int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s,
 }
 
 /* opens the specified header block from the VHDX file header section */
-static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
-                              Error **errp)
+static void GRAPH_RDLOCK
+vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s, Error **errp)
 {
     int ret;
     VHDXHeader *header1;
@@ -439,8 +441,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
     /* We have to read the whole VHDX_HEADER_SIZE instead of
      * sizeof(VHDXHeader), because the checksum is over the whole
      * region */
-    ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer,
-                     VHDX_HEADER_SIZE);
+    ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, VHDX_HEADER_SIZE, buffer,
+                     0);
     if (ret < 0) {
         goto fail;
     }
@@ -456,8 +458,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
         }
     }
 
-    ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer,
-                     VHDX_HEADER_SIZE);
+    ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, VHDX_HEADER_SIZE, buffer,
+                     0);
     if (ret < 0) {
         goto fail;
     }
@@ -516,7 +518,8 @@ exit:
 }
 
 
-static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
+static int GRAPH_RDLOCK
+vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
 {
     int ret = 0;
     uint8_t *buffer;
@@ -530,8 +533,8 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
      * whole block */
     buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
 
-    ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
-                     VHDX_HEADER_BLOCK_SIZE);
+    ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET,
+                     VHDX_HEADER_BLOCK_SIZE, buffer, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -633,7 +636,8 @@ fail:
  * Also, if the File Parameters indicate this is a differencing file,
  * we must also look for the Parent Locator metadata item.
  */
-static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
+static int GRAPH_RDLOCK
+vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
 {
     int ret = 0;
     uint8_t *buffer;
@@ -643,8 +647,8 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
 
     buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
 
-    ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
-                     VHDX_METADATA_TABLE_MAX_SIZE);
+    ret = bdrv_pread(bs->file, s->metadata_rt.file_offset,
+                     VHDX_METADATA_TABLE_MAX_SIZE, buffer, 0);
     if (ret < 0) {
         goto exit;
     }
@@ -749,8 +753,9 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
     ret = bdrv_pread(bs->file,
                      s->metadata_entries.file_parameters_entry.offset
                                          + s->metadata_rt.file_offset,
+                     sizeof(s->params),
                      &s->params,
-                     sizeof(s->params));
+                     0);
 
     if (ret < 0) {
         goto exit;
@@ -784,24 +789,27 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
     ret = bdrv_pread(bs->file,
                      s->metadata_entries.virtual_disk_size_entry.offset
                                            + s->metadata_rt.file_offset,
+                     sizeof(uint64_t),
                      &s->virtual_disk_size,
-                     sizeof(uint64_t));
+                     0);
     if (ret < 0) {
         goto exit;
     }
     ret = bdrv_pread(bs->file,
                      s->metadata_entries.logical_sector_size_entry.offset
                                              + s->metadata_rt.file_offset,
+                     sizeof(uint32_t),
                      &s->logical_sector_size,
-                     sizeof(uint32_t));
+                     0);
     if (ret < 0) {
         goto exit;
     }
     ret = bdrv_pread(bs->file,
                      s->metadata_entries.phys_sector_size_entry.offset
                                           + s->metadata_rt.file_offset,
+                     sizeof(uint32_t),
                      &s->physical_sector_size,
-                     sizeof(uint32_t));
+                     0);
     if (ret < 0) {
         goto exit;
     }
@@ -880,7 +888,8 @@ static void vhdx_calc_bat_entries(BDRVVHDXState *s)
 
 }
 
-static int vhdx_check_bat_entries(BlockDriverState *bs, int *errcnt)
+static int coroutine_mixed_fn GRAPH_RDLOCK
+vhdx_check_bat_entries(BlockDriverState *bs, int *errcnt)
 {
     BDRVVHDXState *s = bs->opaque;
     int64_t image_file_size = bdrv_getlength(bs->file->bs);
@@ -980,8 +989,7 @@ static void vhdx_close(BlockDriverState *bs)
     s->bat = NULL;
     qemu_vfree(s->parent_entries);
     s->parent_entries = NULL;
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
+    migrate_del_blocker(&s->migration_blocker);
     qemu_vfree(s->log.hdr);
     s->log.hdr = NULL;
     vhdx_region_unregister_all(s);
@@ -996,12 +1004,15 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
     uint64_t signature;
     Error *local_err = NULL;
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    GLOBAL_STATE_CODE();
+
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     s->bat = NULL;
     s->first_visible_write = true;
 
@@ -1009,7 +1020,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
     QLIST_INIT(&s->regions);
 
     /* validate the file signature */
-    ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
+    ret = bdrv_pread(bs->file, 0, sizeof(uint64_t), &signature, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -1068,12 +1079,12 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
+    ret = bdrv_pread(bs->file, s->bat_offset, s->bat_rt.length, s->bat, 0);
     if (ret < 0) {
         goto fail;
     }
 
-    /* endian convert populated BAT field entires */
+    /* endian convert populated BAT field entries */
     for (i = 0; i < s->bat_entries; i++) {
         s->bat[i] = le64_to_cpu(s->bat[i]);
     }
@@ -1089,9 +1100,8 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
     error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
                "does not support live migration",
                bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, errp);
+    ret = migrate_add_blocker_normal(&s->migration_blocker, errp);
     if (ret < 0) {
-        error_free(s->migration_blocker);
         goto fail;
     }
 
@@ -1157,7 +1167,8 @@ static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num,
 }
 
 
-static int vhdx_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int coroutine_fn
+vhdx_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     BDRVVHDXState *s = bs->opaque;
 
@@ -1167,8 +1178,9 @@ static int vhdx_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 }
 
 
-static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+              QEMUIOVector *qiov)
 {
     BDRVVHDXState *s = bs->opaque;
     int ret = 0;
@@ -1244,12 +1256,13 @@ exit:
  *
  * Returns the file offset start of the new payload block
  */
-static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
-                               uint64_t *new_offset, bool *need_zero)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
+                    uint64_t *new_offset, bool *need_zero)
 {
     int64_t current_len;
 
-    current_len = bdrv_getlength(bs->file->bs);
+    current_len = bdrv_co_getlength(bs->file->bs);
     if (current_len < 0) {
         return current_len;
     }
@@ -1265,16 +1278,16 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
     if (*need_zero) {
         int ret;
 
-        ret = bdrv_truncate(bs->file, *new_offset + s->block_size, false,
-                            PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL);
+        ret = bdrv_co_truncate(bs->file, *new_offset + s->block_size, false,
+                               PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL);
         if (ret != -ENOTSUP) {
             *need_zero = false;
             return ret;
         }
     }
 
-    return bdrv_truncate(bs->file, *new_offset + s->block_size, false,
-                         PREALLOC_MODE_OFF, 0, NULL);
+    return bdrv_co_truncate(bs->file, *new_offset + s->block_size, false,
+                            PREALLOC_MODE_OFF, 0, NULL);
 }
 
 /*
@@ -1319,9 +1332,9 @@ int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s)
     return ret;
 }
 
-static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
-                                       int nb_sectors, QEMUIOVector *qiov,
-                                       int flags)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+               QEMUIOVector *qiov, int flags)
 {
     int ret = -ENOTSUP;
     BDRVVHDXState *s = bs->opaque;
@@ -1337,7 +1350,6 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
     uint64_t bat_prior_offset = 0;
     bool bat_update = false;
 
-    assert(!flags);
     qemu_iovec_init(&hd_qiov, qiov->niov);
 
     qemu_co_mutex_lock(&s->lock);
@@ -1501,14 +1513,17 @@ exit:
  * There are 2 headers, and the highest sequence number will represent
  * the active header
  */
-static int vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
-                                   uint32_t log_size)
+static int coroutine_fn GRAPH_UNLOCKED
+vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
+                        uint32_t log_size)
 {
     BlockDriverState *bs = blk_bs(blk);
     BdrvChild *child;
     int ret = 0;
     VHDXHeader *hdr = NULL;
 
+    GRAPH_RDLOCK_GUARD();
+
     hdr = g_new0(VHDXHeader, 1);
 
     hdr->signature       = VHDX_HEADER_SIGNATURE;
@@ -1564,12 +1579,10 @@ exit:
  * The first 64KB of the Metadata section is reserved for the metadata
  * header and entries; beyond that, the metadata items themselves reside.
  */
-static int vhdx_create_new_metadata(BlockBackend *blk,
-                                    uint64_t image_size,
-                                    uint32_t block_size,
-                                    uint32_t sector_size,
-                                    uint64_t metadata_offset,
-                                    VHDXImageType type)
+static int coroutine_fn
+vhdx_create_new_metadata(BlockBackend *blk, uint64_t image_size,
+                         uint32_t block_size, uint32_t sector_size,
+                         uint64_t metadata_offset, VHDXImageType type)
 {
     int ret = 0;
     uint32_t offset = 0;
@@ -1660,13 +1673,13 @@ static int vhdx_create_new_metadata(BlockBackend *blk,
                                    VHDX_META_FLAGS_IS_VIRTUAL_DISK;
     vhdx_metadata_entry_le_export(&md_table_entry[4]);
 
-    ret = blk_pwrite(blk, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE, 0);
+    ret = blk_co_pwrite(blk, metadata_offset, VHDX_HEADER_BLOCK_SIZE, buffer, 0);
     if (ret < 0) {
         goto exit;
     }
 
-    ret = blk_pwrite(blk, metadata_offset + (64 * KiB), entry_buffer,
-                     VHDX_METADATA_ENTRY_BUFFER_SIZE, 0);
+    ret = blk_co_pwrite(blk, metadata_offset + (64 * KiB),
+                        VHDX_METADATA_ENTRY_BUFFER_SIZE, entry_buffer, 0);
     if (ret < 0) {
         goto exit;
     }
@@ -1686,10 +1699,11 @@ exit:
  *  Fixed images: default state of the BAT is fully populated, with
  *                file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT.
  */
-static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
-                           uint64_t image_size, VHDXImageType type,
-                           bool use_zero_blocks, uint64_t file_offset,
-                           uint32_t length, Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
+                uint64_t image_size, VHDXImageType type,
+                bool use_zero_blocks, uint64_t file_offset,
+                uint32_t length, Error **errp)
 {
     int ret = 0;
     uint64_t data_file_offset;
@@ -1698,6 +1712,7 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
     uint64_t unused;
     int block_state;
     VHDXSectorInfo sinfo;
+    bool has_zero_init;
 
     assert(s->bat == NULL);
 
@@ -1710,14 +1725,14 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
     if (type == VHDX_TYPE_DYNAMIC) {
         /* All zeroes, so we can just extend the file - the end of the BAT
          * is the furthest thing we have written yet */
-        ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF,
-                           0, errp);
+        ret = blk_co_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF,
+                              0, errp);
         if (ret < 0) {
             goto exit;
         }
     } else if (type == VHDX_TYPE_FIXED) {
-        ret = blk_truncate(blk, data_file_offset + image_size, false,
-                           PREALLOC_MODE_OFF, 0, errp);
+        ret = blk_co_truncate(blk, data_file_offset + image_size, false,
+                              PREALLOC_MODE_OFF, 0, errp);
         if (ret < 0) {
             goto exit;
         }
@@ -1727,9 +1742,13 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
         goto exit;
     }
 
+    bdrv_graph_co_rdlock();
+    has_zero_init = bdrv_has_zero_init(blk_bs(blk));
+    bdrv_graph_co_rdunlock();
+
     if (type == VHDX_TYPE_FIXED ||
                 use_zero_blocks ||
-                bdrv_has_zero_init(blk_bs(blk)) == 0) {
+                has_zero_init == 0) {
         /* for a fixed file, the default BAT entry is not zero */
         s->bat = g_try_malloc0(length);
         if (length && s->bat == NULL) {
@@ -1751,7 +1770,7 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
             s->bat[sinfo.bat_idx] = cpu_to_le64(s->bat[sinfo.bat_idx]);
             sector_num += s->sectors_per_block;
         }
-        ret = blk_pwrite(blk, file_offset, s->bat, length, 0);
+        ret = blk_co_pwrite(blk, file_offset, length, s->bat, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Failed to write the BAT");
             goto exit;
@@ -1772,15 +1791,12 @@ exit:
  * to create the BAT itself, we will also cause the BAT to be
  * created.
  */
-static int vhdx_create_new_region_table(BlockBackend *blk,
-                                        uint64_t image_size,
-                                        uint32_t block_size,
-                                        uint32_t sector_size,
-                                        uint32_t log_size,
-                                        bool use_zero_blocks,
-                                        VHDXImageType type,
-                                        uint64_t *metadata_offset,
-                                        Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vhdx_create_new_region_table(BlockBackend *blk, uint64_t image_size,
+                             uint32_t block_size, uint32_t sector_size,
+                             uint32_t log_size, bool use_zero_blocks,
+                             VHDXImageType type, uint64_t *metadata_offset,
+                             Error **errp)
 {
     int ret = 0;
     uint32_t offset = 0;
@@ -1855,15 +1871,15 @@ static int vhdx_create_new_region_table(BlockBackend *blk,
     }
 
     /* Now write out the region headers to disk */
-    ret = blk_pwrite(blk, VHDX_REGION_TABLE_OFFSET, buffer,
-                     VHDX_HEADER_BLOCK_SIZE, 0);
+    ret = blk_co_pwrite(blk, VHDX_REGION_TABLE_OFFSET, VHDX_HEADER_BLOCK_SIZE,
+                        buffer, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Failed to write first region table");
         goto exit;
     }
 
-    ret = blk_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, buffer,
-                     VHDX_HEADER_BLOCK_SIZE, 0);
+    ret = blk_co_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, VHDX_HEADER_BLOCK_SIZE,
+                        buffer, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Failed to write second region table");
         goto exit;
@@ -1892,8 +1908,8 @@ exit:
  *    .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
  *   1MB
  */
-static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
-                                       Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vhdx_co_create(BlockdevCreateOptions *opts, Error **errp)
 {
     BlockdevCreateOptionsVhdx *vhdx_opts;
     BlockBackend *blk = NULL;
@@ -1987,13 +2003,13 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(vhdx_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(vhdx_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto delete_and_exit;
@@ -2007,15 +2023,15 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
     creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
                               &creator_items, NULL);
     signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
-    ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature),
-                     0);
+    ret = blk_co_pwrite(blk, VHDX_FILE_ID_OFFSET, sizeof(signature), &signature,
+                        0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Failed to write file signature");
         goto delete_and_exit;
     }
     if (creator) {
-        ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
-                         creator, creator_items * sizeof(gunichar2), 0);
+        ret = blk_co_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
+                            creator_items * sizeof(gunichar2), creator, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Failed to write creator field");
             goto delete_and_exit;
@@ -2048,16 +2064,15 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts,
 
     ret = 0;
 delete_and_exit:
-    blk_unref(blk);
-    bdrv_unref(bs);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs);
     g_free(creator);
     return ret;
 }
 
-static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts,
-                                            Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vhdx_co_create_opts(BlockDriver *drv, const char *filename,
+                    QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -2081,13 +2096,13 @@ static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv,
     }
 
     /* Create and open the file (protocol layer) */
-    ret = bdrv_create_file(filename, opts, errp);
+    ret = bdrv_co_create_file(filename, opts, errp);
     if (ret < 0) {
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;
@@ -2140,7 +2155,7 @@ static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv,
 
 fail:
     qobject_unref(qdict);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
     qapi_free_BlockdevCreateOptions(create_options);
     return ret;
 }
@@ -2152,9 +2167,9 @@ fail:
  * r/w and any log has already been replayed, so there is nothing (currently)
  * for us to do here
  */
-static int coroutine_fn vhdx_co_check(BlockDriverState *bs,
-                                      BdrvCheckResult *result,
-                                      BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+vhdx_co_check(BlockDriverState *bs, BdrvCheckResult *result,
+              BdrvCheckMode fix)
 {
     BDRVVHDXState *s = bs->opaque;
 
@@ -2167,7 +2182,7 @@ static int coroutine_fn vhdx_co_check(BlockDriverState *bs,
     return 0;
 }
 
-static int vhdx_has_zero_init(BlockDriverState *bs)
+static int GRAPH_RDLOCK vhdx_has_zero_init(BlockDriverState *bs)
 {
     BDRVVHDXState *s = bs->opaque;
     int state;
@@ -2242,7 +2257,7 @@ static BlockDriver bdrv_vhdx = {
     .bdrv_co_writev         = vhdx_co_writev,
     .bdrv_co_create         = vhdx_co_create,
     .bdrv_co_create_opts    = vhdx_co_create_opts,
-    .bdrv_get_info          = vhdx_get_info,
+    .bdrv_co_get_info       = vhdx_co_get_info,
     .bdrv_co_check          = vhdx_co_check,
     .bdrv_has_zero_init     = vhdx_has_zero_init,
 
index 0b74924cee987ca57132bf90eeee0cd197278069..c6dd4d6040e8c08992c2b155c66e2897045aa2cb 100644 (file)
@@ -212,7 +212,7 @@ typedef struct QEMU_PACKED VHDXLogDataSector {
     uint32_t    sequence_high;          /* 4 MSB of 8 byte sequence_number */
     uint8_t     data[4084];             /* raw data, bytes 8-4091 (inclusive).
                                            see the data descriptor field for the
-                                           other mising bytes */
+                                           other missing bytes */
     uint32_t    sequence_low;           /* 4 LSB of 8 byte sequence_number */
 } VHDXLogDataSector;
 
@@ -257,7 +257,7 @@ typedef struct QEMU_PACKED VHDXMetadataTableHeader {
 
 #define VHDX_META_FLAGS_IS_USER         0x01    /* max 1024 entries */
 #define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02    /* virtual disk metadata if set,
-                                                   otherwise file metdata */
+                                                   otherwise file metadata */
 #define VHDX_META_FLAGS_IS_REQUIRED     0x04    /* parse must understand this
                                                    entry to open the file */
 typedef struct QEMU_PACKED VHDXMetadataTableEntry {
@@ -401,8 +401,9 @@ typedef struct BDRVVHDXState {
 
 void vhdx_guid_generate(MSGUID *guid);
 
-int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
-                        MSGUID *log_guid);
+int GRAPH_RDLOCK
+vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
+                    MSGUID *log_guid);
 
 uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset);
 uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
@@ -410,11 +411,13 @@ uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
 
 bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
 
-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
-                   Error **errp);
+int GRAPH_RDLOCK
+vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
+               Error **errp);
 
-int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
-                             void *data, uint32_t length, uint64_t offset);
+int coroutine_fn GRAPH_RDLOCK
+vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+                         void *data, uint32_t length, uint64_t offset);
 
 static inline void leguid_to_cpus(MSGUID *guid)
 {
@@ -446,6 +449,8 @@ void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr);
 void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr);
 void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e);
 void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e);
-int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
+
+int GRAPH_RDLOCK
+vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
 
 #endif
index a00dc00eb47a1d478e8c56a644bed04931fe12cb..d6971c706750a5c18cbd71c1e38a179ab15e61bb 100644 (file)
@@ -33,6 +33,7 @@
 #include "qemu/module.h"
 #include "qemu/option.h"
 #include "qemu/bswap.h"
+#include "qemu/memalign.h"
 #include "migration/blocker.h"
 #include "qemu/cutils.h"
 #include <zlib.h>
@@ -60,6 +61,7 @@
 #define VMDK_ZEROED  (-3)
 
 #define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
+#define BLOCK_OPT_TOOLSVERSION "toolsversion"
 
 typedef struct {
     uint32_t version;
@@ -176,6 +178,10 @@ typedef struct BDRVVmdkState {
     char *create_type;
 } BDRVVmdkState;
 
+typedef struct BDRVVmdkReopenState {
+    bool *extents_using_bs_file;
+} BDRVVmdkReopenState;
+
 typedef struct VmdkMetaData {
     unsigned int l1_index;
     unsigned int l2_index;
@@ -266,6 +272,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *e;
 
+    bdrv_graph_wrlock(NULL);
     for (i = 0; i < s->num_extents; i++) {
         e = &s->extents[i];
         g_free(e->l1_table);
@@ -276,6 +283,8 @@ static void vmdk_free_extents(BlockDriverState *bs)
             bdrv_unref_child(bs, e->file);
         }
     }
+    bdrv_graph_wrunlock(NULL);
+
     g_free(s->extents);
 }
 
@@ -291,7 +300,8 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
 }
 
 /* Return -ve errno, or 0 on success and write CID into *pcid. */
-static int vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid)
+static int GRAPH_RDLOCK
+vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid)
 {
     char *desc;
     uint32_t cid;
@@ -301,7 +311,7 @@ static int vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid)
     int ret;
 
     desc = g_malloc0(DESC_SIZE);
-    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
     if (ret < 0) {
         goto out;
     }
@@ -333,36 +343,49 @@ out:
     return ret;
 }
 
-static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
 {
     char *desc, *tmp_desc;
     char *p_name, *tmp_str;
     BDRVVmdkState *s = bs->opaque;
     int ret = 0;
 
-    desc = g_malloc0(DESC_SIZE);
-    tmp_desc = g_malloc0(DESC_SIZE);
-    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    size_t desc_buf_size;
+
+    if (s->desc_offset == 0) {
+        desc_buf_size = bdrv_getlength(bs->file->bs);
+        if (desc_buf_size > 16ULL << 20) {
+            error_report("VMDK description file too big");
+            return -EFBIG;
+        }
+    } else {
+        desc_buf_size = DESC_SIZE;
+    }
+
+    desc = g_malloc0(desc_buf_size);
+    tmp_desc = g_malloc0(desc_buf_size);
+    ret = bdrv_co_pread(bs->file, s->desc_offset, desc_buf_size, desc, 0);
     if (ret < 0) {
         goto out;
     }
 
-    desc[DESC_SIZE - 1] = '\0';
+    desc[desc_buf_size - 1] = '\0';
     tmp_str = strstr(desc, "parentCID");
     if (tmp_str == NULL) {
         ret = -EINVAL;
         goto out;
     }
 
-    pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
+    pstrcpy(tmp_desc, desc_buf_size, tmp_str);
     p_name = strstr(desc, "CID");
     if (p_name != NULL) {
         p_name += sizeof("CID");
-        snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
-        pstrcat(desc, DESC_SIZE, tmp_desc);
+        snprintf(p_name, desc_buf_size - (p_name - desc), "%" PRIx32 "\n", cid);
+        pstrcat(desc, desc_buf_size, tmp_desc);
     }
 
-    ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_co_pwrite_sync(bs->file, s->desc_offset, desc_buf_size, desc, 0);
 
 out:
     g_free(desc);
@@ -370,7 +393,7 @@ out:
     return ret;
 }
 
-static int vmdk_is_cid_valid(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK vmdk_is_cid_valid(BlockDriverState *bs)
 {
     BDRVVmdkState *s = bs->opaque;
     uint32_t cur_pcid;
@@ -398,16 +421,70 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
     return 1;
 }
 
-/* We have nothing to do for VMDK reopen, stubs just return success */
 static int vmdk_reopen_prepare(BDRVReopenState *state,
                                BlockReopenQueue *queue, Error **errp)
 {
+    BDRVVmdkState *s;
+    BDRVVmdkReopenState *rs;
+    int i;
+
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     assert(state != NULL);
     assert(state->bs != NULL);
+    assert(state->opaque == NULL);
+
+    s = state->bs->opaque;
+
+    rs = g_new0(BDRVVmdkReopenState, 1);
+    state->opaque = rs;
+
+    /*
+     * Check whether there are any extents stored in bs->file; if bs->file
+     * changes, we will need to update their .file pointers to follow suit
+     */
+    rs->extents_using_bs_file = g_new(bool, s->num_extents);
+    for (i = 0; i < s->num_extents; i++) {
+        rs->extents_using_bs_file[i] = s->extents[i].file == state->bs->file;
+    }
+
     return 0;
 }
 
-static int vmdk_parent_open(BlockDriverState *bs)
+static void vmdk_reopen_clean(BDRVReopenState *state)
+{
+    BDRVVmdkReopenState *rs = state->opaque;
+
+    g_free(rs->extents_using_bs_file);
+    g_free(rs);
+    state->opaque = NULL;
+}
+
+static void vmdk_reopen_commit(BDRVReopenState *state)
+{
+    BDRVVmdkState *s = state->bs->opaque;
+    BDRVVmdkReopenState *rs = state->opaque;
+    int i;
+
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    for (i = 0; i < s->num_extents; i++) {
+        if (rs->extents_using_bs_file[i]) {
+            s->extents[i].file = state->bs->file;
+        }
+    }
+
+    vmdk_reopen_clean(state);
+}
+
+static void vmdk_reopen_abort(BDRVReopenState *state)
+{
+    vmdk_reopen_clean(state);
+}
+
+static int GRAPH_RDLOCK vmdk_parent_open(BlockDriverState *bs)
 {
     char *p_name;
     char *desc;
@@ -415,11 +492,10 @@ static int vmdk_parent_open(BlockDriverState *bs)
     int ret;
 
     desc = g_malloc0(DESC_SIZE + 1);
-    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
     if (ret < 0) {
         goto out;
     }
-    ret = 0;
 
     p_name = strstr(desc, "parentFileNameHint");
     if (p_name != NULL) {
@@ -521,8 +597,8 @@ static int vmdk_add_extent(BlockDriverState *bs,
     return 0;
 }
 
-static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
-                            Error **errp)
+static int GRAPH_RDLOCK
+vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, Error **errp)
 {
     int ret;
     size_t l1_size;
@@ -535,10 +611,8 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
         return -ENOMEM;
     }
 
-    ret = bdrv_pread(extent->file,
-                     extent->l1_table_offset,
-                     extent->l1_table,
-                     l1_size);
+    ret = bdrv_pread(extent->file, extent->l1_table_offset, l1_size,
+                     extent->l1_table, 0);
     if (ret < 0) {
         bdrv_refresh_filename(extent->file->bs);
         error_setg_errno(errp, -ret,
@@ -562,10 +636,8 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
             ret = -ENOMEM;
             goto fail_l1;
         }
-        ret = bdrv_pread(extent->file,
-                         extent->l1_backup_table_offset,
-                         extent->l1_backup_table,
-                         l1_size);
+        ret = bdrv_pread(extent->file, extent->l1_backup_table_offset,
+                         l1_size, extent->l1_backup_table, 0);
         if (ret < 0) {
             bdrv_refresh_filename(extent->file->bs);
             error_setg_errno(errp, -ret,
@@ -588,16 +660,16 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
     return ret;
 }
 
-static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
-                                 BdrvChild *file,
-                                 int flags, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_open_vmfs_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
+                      Error **errp)
 {
     int ret;
     uint32_t magic;
     VMDK3Header header;
     VmdkExtent *extent = NULL;
 
-    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
+    ret = bdrv_pread(file, sizeof(magic), sizeof(header), &header, 0);
     if (ret < 0) {
         bdrv_refresh_filename(file->bs);
         error_setg_errno(errp, -ret,
@@ -744,9 +816,9 @@ static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
     return 0;
 }
 
-static int vmdk_open_se_sparse(BlockDriverState *bs,
-                               BdrvChild *file,
-                               int flags, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_open_se_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
+                    Error **errp)
 {
     int ret;
     VMDKSESparseConstHeader const_header;
@@ -761,7 +833,7 @@ static int vmdk_open_se_sparse(BlockDriverState *bs,
 
     assert(sizeof(const_header) == SECTOR_SIZE);
 
-    ret = bdrv_pread(file, 0, &const_header, sizeof(const_header));
+    ret = bdrv_pread(file, 0, sizeof(const_header), &const_header, 0);
     if (ret < 0) {
         bdrv_refresh_filename(file->bs);
         error_setg_errno(errp, -ret,
@@ -778,9 +850,8 @@ static int vmdk_open_se_sparse(BlockDriverState *bs,
 
     assert(sizeof(volatile_header) == SECTOR_SIZE);
 
-    ret = bdrv_pread(file,
-                     const_header.volatile_header_offset * SECTOR_SIZE,
-                     &volatile_header, sizeof(volatile_header));
+    ret = bdrv_pread(file, const_header.volatile_header_offset * SECTOR_SIZE,
+                     sizeof(volatile_header), &volatile_header, 0);
     if (ret < 0) {
         bdrv_refresh_filename(file->bs);
         error_setg_errno(errp, -ret,
@@ -850,20 +921,20 @@ static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp)
     size = MIN(size, (1 << 20) - 1);  /* avoid unbounded allocation */
     buf = g_malloc(size + 1);
 
-    ret = bdrv_pread(file, desc_offset, buf, size);
+    ret = bdrv_pread(file, desc_offset, size, buf, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not read from file");
         g_free(buf);
         return NULL;
     }
-    buf[ret] = 0;
+    buf[size] = 0;
 
     return buf;
 }
 
-static int vmdk_open_vmdk4(BlockDriverState *bs,
-                           BdrvChild *file,
-                           int flags, QDict *options, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_open_vmdk4(BlockDriverState *bs, BdrvChild *file, int flags,
+                QDict *options, Error **errp)
 {
     int ret;
     uint32_t magic;
@@ -874,7 +945,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
     int64_t l1_backup_offset = 0;
     bool compressed;
 
-    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
+    ret = bdrv_pread(file, sizeof(magic), sizeof(header), &header, 0);
     if (ret < 0) {
         bdrv_refresh_filename(file->bs);
         error_setg_errno(errp, -ret,
@@ -925,9 +996,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
             } QEMU_PACKED eos_marker;
         } QEMU_PACKED footer;
 
-        ret = bdrv_pread(file,
-            bs->file->bs->total_sectors * 512 - 1536,
-            &footer, sizeof(footer));
+        ret = bdrv_pread(file, bs->file->bs->total_sectors * 512 - 1536,
+                         sizeof(footer), &footer, 0);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Failed to read footer");
             return ret;
@@ -1044,8 +1114,9 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,
 }
 
 /* Open an extent file and append to bs array */
-static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
-                            char *buf, QDict *options, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
+                 char *buf, QDict *options, Error **errp)
 {
     uint32_t magic;
 
@@ -1072,8 +1143,9 @@ static const char *next_line(const char *s)
     return s;
 }
 
-static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
-                              QDict *options, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
+                   Error **errp)
 {
     int ret;
     int matches;
@@ -1092,6 +1164,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
     char extent_opt_prefix[32];
     Error *local_err = NULL;
 
+    GLOBAL_STATE_CODE();
+
     for (p = desc; *p; p = next_line(p)) {
         /* parse extent line in one of below formats:
          *
@@ -1159,7 +1233,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
                                       bs, &child_of_bds, extent_role, false,
                                       &local_err);
         g_free(extent_path);
-        if (local_err) {
+        if (!extent_file) {
             error_propagate(errp, local_err);
             ret = -EINVAL;
             goto out;
@@ -1172,7 +1246,11 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             ret = vmdk_add_extent(bs, extent_file, true, sectors,
                             0, 0, 0, 0, 0, &extent, errp);
             if (ret < 0) {
+                bdrv_graph_rdunlock_main_loop();
+                bdrv_graph_wrlock(NULL);
                 bdrv_unref_child(bs, extent_file);
+                bdrv_graph_wrunlock(NULL);
+                bdrv_graph_rdlock_main_loop();
                 goto out;
             }
             extent->flat_start_offset = flat_offset << 9;
@@ -1187,20 +1265,32 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             }
             g_free(buf);
             if (ret) {
+                bdrv_graph_rdunlock_main_loop();
+                bdrv_graph_wrlock(NULL);
                 bdrv_unref_child(bs, extent_file);
+                bdrv_graph_wrunlock(NULL);
+                bdrv_graph_rdlock_main_loop();
                 goto out;
             }
             extent = &s->extents[s->num_extents - 1];
         } else if (!strcmp(type, "SESPARSE")) {
             ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
             if (ret) {
+                bdrv_graph_rdunlock_main_loop();
+                bdrv_graph_wrlock(NULL);
                 bdrv_unref_child(bs, extent_file);
+                bdrv_graph_wrunlock(NULL);
+                bdrv_graph_rdlock_main_loop();
                 goto out;
             }
             extent = &s->extents[s->num_extents - 1];
         } else {
             error_setg(errp, "Unsupported extent type '%s'", type);
+            bdrv_graph_rdunlock_main_loop();
+            bdrv_graph_wrlock(NULL);
             bdrv_unref_child(bs, extent_file);
+            bdrv_graph_wrunlock(NULL);
+            bdrv_graph_rdlock_main_loop();
             ret = -ENOTSUP;
             goto out;
         }
@@ -1224,8 +1314,9 @@ out:
     return ret;
 }
 
-static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
-                               QDict *options, Error **errp)
+static int GRAPH_RDLOCK
+vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, QDict *options,
+                    Error **errp)
 {
     int ret;
     char ct[128];
@@ -1261,10 +1352,11 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
     BDRVVmdkState *s = bs->opaque;
     uint32_t magic;
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
     buf = vmdk_read_desc(bs->file, 0, errp);
@@ -1313,9 +1405,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
     error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
                "does not support live migration",
                bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, errp);
+    ret = migrate_add_blocker_normal(&s->migration_blocker, errp);
     if (ret < 0) {
-        error_free(s->migration_blocker);
         goto fail;
     }
 
@@ -1357,13 +1448,11 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
  * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
  * it for call to write user data in the request.
  */
-static int get_whole_cluster(BlockDriverState *bs,
-                             VmdkExtent *extent,
-                             uint64_t cluster_offset,
-                             uint64_t offset,
-                             uint64_t skip_start_bytes,
-                             uint64_t skip_end_bytes,
-                             bool zeroed)
+static int coroutine_fn GRAPH_RDLOCK
+get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent,
+                  uint64_t cluster_offset, uint64_t offset,
+                  uint64_t skip_start_bytes, uint64_t skip_end_bytes,
+                  bool zeroed)
 {
     int ret = VMDK_OK;
     int64_t cluster_bytes;
@@ -1393,17 +1482,17 @@ static int get_whole_cluster(BlockDriverState *bs,
     if (skip_start_bytes > 0) {
         if (copy_from_backing) {
             /* qcow2 emits this on bs->file instead of bs->backing */
-            BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
-            ret = bdrv_pread(bs->backing, offset, whole_grain,
-                             skip_start_bytes);
+            BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_READ);
+            ret = bdrv_co_pread(bs->backing, offset, skip_start_bytes,
+                                whole_grain, 0);
             if (ret < 0) {
                 ret = VMDK_ERROR;
                 goto exit;
             }
         }
-        BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE);
-        ret = bdrv_pwrite(extent->file, cluster_offset, whole_grain,
-                          skip_start_bytes);
+        BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_WRITE);
+        ret = bdrv_co_pwrite(extent->file, cluster_offset, skip_start_bytes,
+                             whole_grain, 0);
         if (ret < 0) {
             ret = VMDK_ERROR;
             goto exit;
@@ -1413,19 +1502,19 @@ static int get_whole_cluster(BlockDriverState *bs,
     if (skip_end_bytes < cluster_bytes) {
         if (copy_from_backing) {
             /* qcow2 emits this on bs->file instead of bs->backing */
-            BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
-            ret = bdrv_pread(bs->backing, offset + skip_end_bytes,
-                             whole_grain + skip_end_bytes,
-                             cluster_bytes - skip_end_bytes);
+            BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_READ);
+            ret = bdrv_co_pread(bs->backing, offset + skip_end_bytes,
+                                cluster_bytes - skip_end_bytes,
+                                whole_grain + skip_end_bytes, 0);
             if (ret < 0) {
                 ret = VMDK_ERROR;
                 goto exit;
             }
         }
-        BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE);
-        ret = bdrv_pwrite(extent->file, cluster_offset + skip_end_bytes,
-                          whole_grain + skip_end_bytes,
-                          cluster_bytes - skip_end_bytes);
+        BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_WRITE);
+        ret = bdrv_co_pwrite(extent->file, cluster_offset + skip_end_bytes,
+                             cluster_bytes - skip_end_bytes,
+                             whole_grain + skip_end_bytes, 0);
         if (ret < 0) {
             ret = VMDK_ERROR;
             goto exit;
@@ -1438,29 +1527,29 @@ exit:
     return ret;
 }
 
-static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
-                         uint32_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, uint32_t offset)
 {
     offset = cpu_to_le32(offset);
     /* update L2 table */
-    BLKDBG_EVENT(extent->file, BLKDBG_L2_UPDATE);
-    if (bdrv_pwrite(extent->file,
-                ((int64_t)m_data->l2_offset * 512)
-                    + (m_data->l2_index * sizeof(offset)),
-                &offset, sizeof(offset)) < 0) {
+    BLKDBG_CO_EVENT(extent->file, BLKDBG_L2_UPDATE);
+    if (bdrv_co_pwrite(extent->file,
+                       ((int64_t)m_data->l2_offset * 512)
+                           + (m_data->l2_index * sizeof(offset)),
+                       sizeof(offset), &offset, 0) < 0) {
         return VMDK_ERROR;
     }
     /* update backup L2 table */
     if (extent->l1_backup_table_offset != 0) {
         m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
-        if (bdrv_pwrite(extent->file,
-                    ((int64_t)m_data->l2_offset * 512)
-                        + (m_data->l2_index * sizeof(offset)),
-                    &offset, sizeof(offset)) < 0) {
+        if (bdrv_co_pwrite(extent->file,
+                           ((int64_t)m_data->l2_offset * 512)
+                               + (m_data->l2_index * sizeof(offset)),
+                           sizeof(offset), &offset, 0) < 0) {
             return VMDK_ERROR;
         }
     }
-    if (bdrv_flush(extent->file->bs) < 0) {
+    if (bdrv_co_flush(extent->file->bs) < 0) {
         return VMDK_ERROR;
     }
     if (m_data->l2_cache_entry) {
@@ -1490,14 +1579,11 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
  *          VMDK_UNALLOC if cluster is not mapped and @allocate is false.
  *          VMDK_ERROR if failed.
  */
-static int get_cluster_offset(BlockDriverState *bs,
-                              VmdkExtent *extent,
-                              VmdkMetaData *m_data,
-                              uint64_t offset,
-                              bool allocate,
-                              uint64_t *cluster_offset,
-                              uint64_t skip_start_bytes,
-                              uint64_t skip_end_bytes)
+static int coroutine_fn GRAPH_RDLOCK
+get_cluster_offset(BlockDriverState *bs, VmdkExtent *extent,
+                   VmdkMetaData *m_data, uint64_t offset, bool allocate,
+                   uint64_t *cluster_offset, uint64_t skip_start_bytes,
+                   uint64_t skip_end_bytes)
 {
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
@@ -1576,12 +1662,12 @@ static int get_cluster_offset(BlockDriverState *bs,
         }
     }
     l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
-    BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
-    if (bdrv_pread(extent->file,
+    BLKDBG_CO_EVENT(extent->file, BLKDBG_L2_LOAD);
+    if (bdrv_co_pread(extent->file,
                 (int64_t)l2_offset * 512,
-                l2_table,
-                l2_size_bytes
-            ) != l2_size_bytes) {
+                l2_size_bytes,
+                l2_table, 0
+            ) < 0) {
         return VMDK_ERROR;
     }
 
@@ -1690,11 +1776,10 @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
     return extent_relative_offset % cluster_size;
 }
 
-static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs,
-                                             bool want_zero,
-                                             int64_t offset, int64_t bytes,
-                                             int64_t *pnum, int64_t *map,
-                                             BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_block_status(BlockDriverState *bs, bool want_zero,
+                     int64_t offset, int64_t bytes, int64_t *pnum,
+                     int64_t *map, BlockDriverState **file)
 {
     BDRVVmdkState *s = bs->opaque;
     int64_t index_in_cluster, n, ret;
@@ -1729,6 +1814,8 @@ static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs,
             if (extent->flat) {
                 ret |= BDRV_BLOCK_RECURSE;
             }
+        } else {
+            ret |= BDRV_BLOCK_COMPRESSED;
         }
         *file = extent->file->bs;
         break;
@@ -1739,10 +1826,11 @@ static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs,
     return ret;
 }
 
-static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
-                            int64_t offset_in_cluster, QEMUIOVector *qiov,
-                            uint64_t qiov_offset, uint64_t n_bytes,
-                            uint64_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
+                  int64_t offset_in_cluster, QEMUIOVector *qiov,
+                  uint64_t qiov_offset, uint64_t n_bytes,
+                  uint64_t offset)
 {
     int ret;
     VmdkGrainMarker *data = NULL;
@@ -1787,12 +1875,12 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
         n_bytes = buf_len + sizeof(VmdkGrainMarker);
         qemu_iovec_init_buf(&local_qiov, data, n_bytes);
 
-        BLKDBG_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED);
+        BLKDBG_CO_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED);
     } else {
         qemu_iovec_init(&local_qiov, qiov->niov);
         qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes);
 
-        BLKDBG_EVENT(extent->file, BLKDBG_WRITE_AIO);
+        BLKDBG_CO_EVENT(extent->file, BLKDBG_WRITE_AIO);
     }
 
     write_offset = cluster_offset + offset_in_cluster;
@@ -1820,9 +1908,9 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
     return ret;
 }
 
-static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
-                            int64_t offset_in_cluster, QEMUIOVector *qiov,
-                            int bytes)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
+                 int64_t offset_in_cluster, QEMUIOVector *qiov, int bytes)
 {
     int ret;
     int cluster_bytes, buf_bytes;
@@ -1834,7 +1922,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
 
 
     if (!extent->compressed) {
-        BLKDBG_EVENT(extent->file, BLKDBG_READ_AIO);
+        BLKDBG_CO_EVENT(extent->file, BLKDBG_READ_AIO);
         ret = bdrv_co_preadv(extent->file,
                              cluster_offset + offset_in_cluster, bytes,
                              qiov, 0);
@@ -1848,10 +1936,9 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
     buf_bytes = cluster_bytes * 2;
     cluster_buf = g_malloc(buf_bytes);
     uncomp_buf = g_malloc(cluster_bytes);
-    BLKDBG_EVENT(extent->file, BLKDBG_READ_COMPRESSED);
-    ret = bdrv_pread(extent->file,
-                cluster_offset,
-                cluster_buf, buf_bytes);
+    BLKDBG_CO_EVENT(extent->file, BLKDBG_READ_COMPRESSED);
+    ret = bdrv_co_pread(extent->file, cluster_offset, buf_bytes, cluster_buf,
+                        0);
     if (ret < 0) {
         goto out;
     }
@@ -1887,9 +1974,9 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
     return ret;
 }
 
-static int coroutine_fn
-vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-               QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVVmdkState *s = bs->opaque;
     int ret;
@@ -1927,7 +2014,7 @@ vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
                 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
 
                 /* qcow2 emits this on bs->file instead of bs->backing */
-                BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
+                BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                 ret = bdrv_co_preadv(bs->backing, offset, n_bytes,
                                      &local_qiov, 0);
                 if (ret < 0) {
@@ -1969,9 +2056,9 @@ fail:
  *
  * Returns: error code with 0 for success.
  */
-static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
-                       uint64_t bytes, QEMUIOVector *qiov,
-                       bool zeroed, bool zero_dry_run)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+             QEMUIOVector *qiov, bool zeroed, bool zero_dry_run)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
@@ -2067,9 +2154,9 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
     return 0;
 }
 
-static int coroutine_fn
-vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret;
     BDRVVmdkState *s = bs->opaque;
@@ -2079,9 +2166,9 @@ vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return ret;
 }
 
-static int coroutine_fn
-vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
-                           uint64_t bytes, QEMUIOVector *qiov)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           QEMUIOVector *qiov)
 {
     if (bytes == 0) {
         /* The caller will write bytes 0 to signal EOF.
@@ -2091,13 +2178,13 @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
         int64_t length;
 
         for (i = 0; i < s->num_extents; i++) {
-            length = bdrv_getlength(s->extents[i].file->bs);
+            length = bdrv_co_getlength(s->extents[i].file->bs);
             if (length < 0) {
                 return length;
             }
             length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE);
-            ret = bdrv_truncate(s->extents[i].file, length, false,
-                                PREALLOC_MODE_OFF, 0, NULL);
+            ret = bdrv_co_truncate(s->extents[i].file, length, false,
+                                   PREALLOC_MODE_OFF, 0, NULL);
             if (ret < 0) {
                 return ret;
             }
@@ -2107,10 +2194,9 @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
     return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
 }
 
-static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
-                                              int64_t offset,
-                                              int bytes,
-                                              BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                      BdrvRequestFlags flags)
 {
     int ret;
     BDRVVmdkState *s = bs->opaque;
@@ -2126,10 +2212,9 @@ static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
     return ret;
 }
 
-static int vmdk_init_extent(BlockBackend *blk,
-                            int64_t filesize, bool flat,
-                            bool compress, bool zeroed_grain,
-                            Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress,
+                 bool zeroed_grain, Error **errp)
 {
     int ret, i;
     VMDK4Header header;
@@ -2138,7 +2223,7 @@ static int vmdk_init_extent(BlockBackend *blk,
     int gd_buf_size;
 
     if (flat) {
-        ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
+        ret = blk_co_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
         goto exit;
     }
     magic = cpu_to_be32(VMDK4_MAGIC);
@@ -2190,19 +2275,19 @@ static int vmdk_init_extent(BlockBackend *blk,
     header.check_bytes[3] = 0xa;
 
     /* write all the data */
-    ret = blk_pwrite(blk, 0, &magic, sizeof(magic), 0);
+    ret = blk_co_pwrite(blk, 0, sizeof(magic), &magic, 0);
     if (ret < 0) {
         error_setg(errp, QERR_IO_ERROR);
         goto exit;
     }
-    ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header), 0);
+    ret = blk_co_pwrite(blk, sizeof(magic), sizeof(header), &header, 0);
     if (ret < 0) {
         error_setg(errp, QERR_IO_ERROR);
         goto exit;
     }
 
-    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
-                       PREALLOC_MODE_OFF, 0, errp);
+    ret = blk_co_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
+                          PREALLOC_MODE_OFF, 0, errp);
     if (ret < 0) {
         goto exit;
     }
@@ -2214,8 +2299,8 @@ static int vmdk_init_extent(BlockBackend *blk,
          i < gt_count; i++, tmp += gt_size) {
         gd_buf[i] = cpu_to_le32(tmp);
     }
-    ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
-                     gd_buf, gd_buf_size, 0);
+    ret = blk_co_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
+                        gd_buf_size, gd_buf, 0);
     if (ret < 0) {
         error_setg(errp, QERR_IO_ERROR);
         goto exit;
@@ -2226,8 +2311,8 @@ static int vmdk_init_extent(BlockBackend *blk,
          i < gt_count; i++, tmp += gt_size) {
         gd_buf[i] = cpu_to_le32(tmp);
     }
-    ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
-                     gd_buf, gd_buf_size, 0);
+    ret = blk_co_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
+                        gd_buf_size, gd_buf, 0);
     if (ret < 0) {
         error_setg(errp, QERR_IO_ERROR);
     }
@@ -2238,22 +2323,22 @@ exit:
     return ret;
 }
 
-static int vmdk_create_extent(const char *filename, int64_t filesize,
-                              bool flat, bool compress, bool zeroed_grain,
-                              BlockBackend **pbb,
-                              QemuOpts *opts, Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vmdk_create_extent(const char *filename, int64_t filesize, bool flat,
+                   bool compress, bool zeroed_grain, BlockBackend **pbb,
+                   QemuOpts *opts, Error **errp)
 {
     int ret;
     BlockBackend *blk = NULL;
 
-    ret = bdrv_create_file(filename, opts, errp);
+    ret = bdrv_co_create_file(filename, opts, errp);
     if (ret < 0) {
         goto exit;
     }
 
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-                       errp);
+    blk = blk_co_new_open(filename, NULL, NULL,
+                          BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                          errp);
     if (blk == NULL) {
         ret = -EIO;
         goto exit;
@@ -2267,7 +2352,7 @@ exit:
         if (pbb) {
             *pbb = blk;
         } else {
-            blk_unref(blk);
+            blk_co_unref(blk);
             blk = NULL;
         }
     }
@@ -2319,14 +2404,10 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
  *           non-split format.
  * idx >= 1: get the n-th extent if in a split subformat
  */
-typedef BlockBackend *(*vmdk_create_extent_fn)(int64_t size,
-                                               int idx,
-                                               bool flat,
-                                               bool split,
-                                               bool compress,
-                                               bool zeroed_grain,
-                                               void *opaque,
-                                               Error **errp);
+typedef BlockBackend * coroutine_fn GRAPH_UNLOCKED_PTR
+    (*vmdk_create_extent_fn)(int64_t size, int idx, bool flat, bool split,
+                             bool compress, bool zeroed_grain, void *opaque,
+                             Error **errp);
 
 static void vmdk_desc_add_extent(GString *desc,
                                  const char *extent_line_fmt,
@@ -2339,16 +2420,18 @@ static void vmdk_desc_add_extent(GString *desc,
     g_free(basename);
 }
 
-static int coroutine_fn vmdk_co_do_create(int64_t size,
-                                          BlockdevVmdkSubformat subformat,
-                                          BlockdevVmdkAdapterType adapter_type,
-                                          const char *backing_file,
-                                          const char *hw_version,
-                                          bool compat6,
-                                          bool zeroed_grain,
-                                          vmdk_create_extent_fn extent_fn,
-                                          void *opaque,
-                                          Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vmdk_co_do_create(int64_t size,
+                  BlockdevVmdkSubformat subformat,
+                  BlockdevVmdkAdapterType adapter_type,
+                  const char *backing_file,
+                  const char *hw_version,
+                  const char *toolsversion,
+                  bool compat6,
+                  bool zeroed_grain,
+                  vmdk_create_extent_fn extent_fn,
+                  void *opaque,
+                  Error **errp)
 {
     int extent_idx;
     BlockBackend *blk = NULL;
@@ -2384,7 +2467,8 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
         "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
         "ddb.geometry.heads = \"%" PRIu32 "\"\n"
         "ddb.geometry.sectors = \"63\"\n"
-        "ddb.adapterType = \"%s\"\n";
+        "ddb.adapterType = \"%s\"\n"
+        "ddb.toolsVersion = \"%s\"\n";
 
     ext_desc_lines = g_string_new(NULL);
 
@@ -2401,6 +2485,9 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
     if (!hw_version) {
         hw_version = "4";
     }
+    if (!toolsversion) {
+        toolsversion = "2147483647";
+    }
 
     if (adapter_type != BLOCKDEV_VMDK_ADAPTER_TYPE_IDE) {
         /* that's the number of heads with which vmware operates when
@@ -2465,8 +2552,8 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
         }
         assert(full_backing);
 
-        backing = blk_new_open(full_backing, NULL, NULL,
-                               BDRV_O_NO_BACKING, errp);
+        backing = blk_co_new_open(full_backing, NULL, NULL,
+                                  BDRV_O_NO_BACKING, errp);
         g_free(full_backing);
         if (backing == NULL) {
             ret = -EIO;
@@ -2475,12 +2562,15 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
         if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
             error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
                        blk_bs(backing)->drv->format_name);
-            blk_unref(backing);
+            blk_co_unref(backing);
             ret = -EINVAL;
             goto exit;
         }
+
+        bdrv_graph_co_rdlock();
         ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
-        blk_unref(backing);
+        bdrv_graph_co_rdunlock();
+        blk_co_unref(backing);
         if (ret) {
             error_setg(errp, "Failed to read parent CID");
             goto exit;
@@ -2501,14 +2591,14 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
                              blk_bs(extent_blk)->filename);
         created_size += cur_size;
         extent_idx++;
-        blk_unref(extent_blk);
+        blk_co_unref(extent_blk);
     }
 
     /* Check whether we got excess extents */
     extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
                            opaque, NULL);
     if (extent_blk) {
-        blk_unref(extent_blk);
+        blk_co_unref(extent_blk);
         error_setg(errp, "List of extents contains unused extents");
         ret = -EINVAL;
         goto exit;
@@ -2525,14 +2615,15 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
                            size /
                                (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
                            number_heads,
-                           BlockdevVmdkAdapterType_str(adapter_type));
+                           BlockdevVmdkAdapterType_str(adapter_type),
+                           toolsversion);
     desc_len = strlen(desc);
     /* the descriptor offset = 0x200 */
     if (!split && !flat) {
         desc_offset = 0x200;
     }
 
-    ret = blk_pwrite(blk, desc_offset, desc, desc_len, 0);
+    ret = blk_co_pwrite(blk, desc_offset, desc_len, desc, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not write description");
         goto exit;
@@ -2540,7 +2631,7 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
     /* bdrv_pwrite write padding zeros to align to sector, we don't need that
      * for description file */
     if (desc_offset == 0) {
-        ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp);
+        ret = blk_co_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp);
         if (ret < 0) {
             goto exit;
         }
@@ -2548,7 +2639,7 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
     ret = 0;
 exit:
     if (blk) {
-        blk_unref(blk);
+        blk_co_unref(blk);
     }
     g_free(desc);
     g_free(parent_desc_line);
@@ -2563,10 +2654,10 @@ typedef struct {
     QemuOpts *opts;
 } VMDKCreateOptsData;
 
-static BlockBackend *vmdk_co_create_opts_cb(int64_t size, int idx,
-                                            bool flat, bool split, bool compress,
-                                            bool zeroed_grain, void *opaque,
-                                            Error **errp)
+static BlockBackend * coroutine_fn GRAPH_UNLOCKED
+vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split,
+                       bool compress, bool zeroed_grain, void *opaque,
+                       Error **errp)
 {
     BlockBackend *blk = NULL;
     BlockDriverState *bs = NULL;
@@ -2599,16 +2690,15 @@ static BlockBackend *vmdk_co_create_opts_cb(int64_t size, int idx,
                            errp)) {
         goto exit;
     }
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
 exit:
     g_free(ext_filename);
     return blk;
 }
 
-static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts,
-                                            Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vmdk_co_create_opts(BlockDriver *drv, const char *filename,
+                    QemuOpts *opts, Error **errp)
 {
     Error *local_err = NULL;
     char *desc = NULL;
@@ -2617,6 +2707,7 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv,
     BlockdevVmdkAdapterType adapter_type_enum;
     char *backing_file = NULL;
     char *hw_version = NULL;
+    char *toolsversion = NULL;
     char *fmt = NULL;
     BlockdevVmdkSubformat subformat;
     int ret = 0;
@@ -2649,6 +2740,7 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv,
     adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
     backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
     hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION);
+    toolsversion = qemu_opt_get_del(opts, BLOCK_OPT_TOOLSVERSION);
     compat6 = qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false);
     if (strcmp(hw_version, "undefined") == 0) {
         g_free(hw_version);
@@ -2692,14 +2784,15 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv,
         .opts = opts,
     };
     ret = vmdk_co_do_create(total_size, subformat, adapter_type_enum,
-                            backing_file, hw_version, compat6, zeroed_grain,
-                            vmdk_co_create_opts_cb, &data, errp);
+                            backing_file, hw_version, toolsversion, compat6,
+                            zeroed_grain, vmdk_co_create_opts_cb, &data, errp);
 
 exit:
     g_free(backing_fmt);
     g_free(adapter_type);
     g_free(backing_file);
     g_free(hw_version);
+    g_free(toolsversion);
     g_free(fmt);
     g_free(desc);
     g_free(path);
@@ -2712,10 +2805,9 @@ exit:
     return ret;
 }
 
-static BlockBackend *vmdk_co_create_cb(int64_t size, int idx,
-                                       bool flat, bool split, bool compress,
-                                       bool zeroed_grain, void *opaque,
-                                       Error **errp)
+static BlockBackend * coroutine_fn GRAPH_UNLOCKED
+vmdk_co_create_cb(int64_t size, int idx, bool flat, bool split, bool compress,
+                  bool zeroed_grain, void *opaque, Error **errp)
 {
     int ret;
     BlockDriverState *bs;
@@ -2723,7 +2815,7 @@ static BlockBackend *vmdk_co_create_cb(int64_t size, int idx,
     BlockdevCreateOptionsVmdk *opts = opaque;
 
     if (idx == 0) {
-        bs = bdrv_open_blockdev_ref(opts->file, errp);
+        bs = bdrv_co_open_blockdev_ref(opts->file, errp);
     } else {
         int i;
         BlockdevRefList *list = opts->extents;
@@ -2738,34 +2830,35 @@ static BlockBackend *vmdk_co_create_cb(int64_t size, int idx,
             error_setg(errp, "Extent [%d] not specified", idx - 1);
             return NULL;
         }
-        bs = bdrv_open_blockdev_ref(list->value, errp);
+        bs = bdrv_co_open_blockdev_ref(list->value, errp);
     }
     if (!bs) {
         return NULL;
     }
-    blk = blk_new_with_bs(bs,
-                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
-                          BLK_PERM_ALL, errp);
+    blk = blk_co_new_with_bs(bs,
+                             BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+                                BLK_PERM_RESIZE,
+                             BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         return NULL;
     }
     blk_set_allow_write_beyond_eof(blk, true);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
 
     if (size != -1) {
         ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
         if (ret) {
-            blk_unref(blk);
+            blk_co_unref(blk);
             blk = NULL;
         }
     }
     return blk;
 }
 
-static int coroutine_fn vmdk_co_create(BlockdevCreateOptions *create_options,
-                                       Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vmdk_co_create(BlockdevCreateOptions *create_options, Error **errp)
 {
-    int ret;
     BlockdevCreateOptionsVmdk *opts;
 
     opts = &create_options->u.vmdk;
@@ -2773,23 +2866,19 @@ static int coroutine_fn vmdk_co_create(BlockdevCreateOptions *create_options,
     /* Validate options */
     if (!QEMU_IS_ALIGNED(opts->size, BDRV_SECTOR_SIZE)) {
         error_setg(errp, "Image size must be a multiple of 512 bytes");
-        ret = -EINVAL;
-        goto out;
+        return -EINVAL;
     }
 
-    ret = vmdk_co_do_create(opts->size,
-                            opts->subformat,
-                            opts->adapter_type,
-                            opts->backing_file,
-                            opts->hwversion,
-                            false,
-                            opts->zeroed_grain,
-                            vmdk_co_create_cb,
-                            opts, errp);
-    return ret;
-
-out:
-    return ret;
+    return vmdk_co_do_create(opts->size,
+                             opts->subformat,
+                             opts->adapter_type,
+                             opts->backing_file,
+                             opts->hwversion,
+                             opts->toolsversion,
+                             false,
+                             opts->zeroed_grain,
+                             vmdk_co_create_cb,
+                             opts, errp);
 }
 
 static void vmdk_close(BlockDriverState *bs)
@@ -2799,18 +2888,18 @@ static void vmdk_close(BlockDriverState *bs)
     vmdk_free_extents(bs);
     g_free(s->create_type);
 
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
+    migrate_del_blocker(&s->migration_blocker);
 }
 
-static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
+static int64_t coroutine_fn GRAPH_RDLOCK
+vmdk_co_get_allocated_file_size(BlockDriverState *bs)
 {
     int i;
     int64_t ret = 0;
     int64_t r;
     BDRVVmdkState *s = bs->opaque;
 
-    ret = bdrv_get_allocated_file_size(bs->file->bs);
+    ret = bdrv_co_get_allocated_file_size(bs->file->bs);
     if (ret < 0) {
         return ret;
     }
@@ -2818,7 +2907,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
         if (s->extents[i].file == bs->file) {
             continue;
         }
-        r = bdrv_get_allocated_file_size(s->extents[i].file->bs);
+        r = bdrv_co_get_allocated_file_size(s->extents[i].file->bs);
         if (r < 0) {
             return r;
         }
@@ -2827,7 +2916,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
     return ret;
 }
 
-static int vmdk_has_zero_init(BlockDriverState *bs)
+static int GRAPH_RDLOCK vmdk_has_zero_init(BlockDriverState *bs)
 {
     int i;
     BDRVVmdkState *s = bs->opaque;
@@ -2844,12 +2933,12 @@ static int vmdk_has_zero_init(BlockDriverState *bs)
     return 1;
 }
 
-static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
+static VmdkExtentInfo * GRAPH_RDLOCK vmdk_get_extent_info(VmdkExtent *extent)
 {
-    ImageInfo *info = g_new0(ImageInfo, 1);
+    VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1);
 
     bdrv_refresh_filename(extent->file->bs);
-    *info = (ImageInfo){
+    *info = (VmdkExtentInfo){
         .filename         = g_strdup(extent->file->bs->filename),
         .format           = g_strdup(extent->type),
         .virtual_size     = extent->sectors * BDRV_SECTOR_SIZE,
@@ -2862,14 +2951,13 @@ static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
     return info;
 }
 
-static int coroutine_fn vmdk_co_check(BlockDriverState *bs,
-                                      BdrvCheckResult *result,
-                                      BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+vmdk_co_check(BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
     int64_t sector_num = 0;
-    int64_t total_sectors = bdrv_nb_sectors(bs);
+    int64_t total_sectors = bdrv_co_nb_sectors(bs);
     int ret;
     uint64_t cluster_offset;
 
@@ -2899,7 +2987,7 @@ static int coroutine_fn vmdk_co_check(BlockDriverState *bs,
             break;
         }
         if (ret == VMDK_OK) {
-            int64_t extent_len = bdrv_getlength(extent->file->bs);
+            int64_t extent_len = bdrv_co_getlength(extent->file->bs);
             if (extent_len < 0) {
                 fprintf(stderr,
                         "ERROR: could not get extent file length for sector %"
@@ -2922,13 +3010,13 @@ static int coroutine_fn vmdk_co_check(BlockDriverState *bs,
     return ret;
 }
 
-static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs,
-                                                 Error **errp)
+static ImageInfoSpecific * GRAPH_RDLOCK
+vmdk_get_specific_info(BlockDriverState *bs, Error **errp)
 {
     int i;
     BDRVVmdkState *s = bs->opaque;
     ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
-    ImageInfoList **next;
+    VmdkExtentInfoList **tail;
 
     *spec_info = (ImageInfoSpecific){
         .type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
@@ -2943,12 +3031,9 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs,
         .parent_cid = s->parent_cid,
     };
 
-    next = &spec_info->u.vmdk.data->extents;
+    tail = &spec_info->u.vmdk.data->extents;
     for (i = 0; i < s->num_extents; i++) {
-        *next = g_new0(ImageInfoList, 1);
-        (*next)->value = vmdk_get_extent_info(&s->extents[i]);
-        (*next)->next = NULL;
-        next = &(*next)->next;
+        QAPI_LIST_APPEND(tail, vmdk_get_extent_info(&s->extents[i]));
     }
 
     return spec_info;
@@ -2961,7 +3046,8 @@ static bool vmdk_extents_type_eq(const VmdkExtent *a, const VmdkExtent *b)
            (a->flat || a->cluster_sectors == b->cluster_sectors);
 }
 
-static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int coroutine_fn
+vmdk_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     int i;
     BDRVVmdkState *s = bs->opaque;
@@ -2980,8 +3066,9 @@ static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
     return 0;
 }
 
-static void vmdk_gather_child_options(BlockDriverState *bs, QDict *target,
-                                      bool backing_overridden)
+static void GRAPH_RDLOCK
+vmdk_gather_child_options(BlockDriverState *bs, QDict *target,
+                          bool backing_overridden)
 {
     /* No children but file and backing can be explicitly specified (TODO) */
     qdict_put(target, "file",
@@ -3034,6 +3121,11 @@ static QemuOptsList vmdk_create_opts = {
             .help = "VMDK hardware version",
             .def_value_str = "undefined"
         },
+        {
+            .name = BLOCK_OPT_TOOLSVERSION,
+            .type = QEMU_OPT_STRING,
+            .help = "VMware guest tools version",
+        },
         {
             .name = BLOCK_OPT_SUBFMT,
             .type = QEMU_OPT_STRING,
@@ -3058,6 +3150,8 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_open                    = vmdk_open,
     .bdrv_co_check                = vmdk_co_check,
     .bdrv_reopen_prepare          = vmdk_reopen_prepare,
+    .bdrv_reopen_commit           = vmdk_reopen_commit,
+    .bdrv_reopen_abort            = vmdk_reopen_abort,
     .bdrv_child_perm              = bdrv_default_perms,
     .bdrv_co_preadv               = vmdk_co_preadv,
     .bdrv_co_pwritev              = vmdk_co_pwritev,
@@ -3067,11 +3161,11 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_co_create_opts          = vmdk_co_create_opts,
     .bdrv_co_create               = vmdk_co_create,
     .bdrv_co_block_status         = vmdk_co_block_status,
-    .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
+    .bdrv_co_get_allocated_file_size = vmdk_co_get_allocated_file_size,
     .bdrv_has_zero_init           = vmdk_has_zero_init,
     .bdrv_get_specific_info       = vmdk_get_specific_info,
     .bdrv_refresh_limits          = vmdk_refresh_limits,
-    .bdrv_get_info                = vmdk_get_info,
+    .bdrv_co_get_info             = vmdk_co_get_info,
     .bdrv_gather_child_options    = vmdk_gather_child_options,
 
     .is_format                    = true,
index 1ab55f92877f864cfea5a9fe537df086d954bd00..d95a204612b7d8982a3fe35e2097a27fb8548d4a 100644 (file)
 #include "migration/blocker.h"
 #include "qemu/bswap.h"
 #include "qemu/uuid.h"
+#include "qemu/memalign.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 
 /**************************************************************/
 
-#define HEADER_SIZE 512
-
 //#define CACHE
 
 enum vhd_type {
@@ -95,8 +94,11 @@ typedef struct vhd_footer {
     QemuUUID    uuid;
 
     uint8_t     in_saved_state;
+    uint8_t     reserved[427];
 } QEMU_PACKED VHDFooter;
 
+QEMU_BUILD_BUG_ON(sizeof(VHDFooter) != 512);
+
 typedef struct vhd_dyndisk_header {
     char        magic[8]; /* "cxsparse" */
 
@@ -127,11 +129,14 @@ typedef struct vhd_dyndisk_header {
         uint32_t    reserved;
         uint64_t    data_offset;
     } parent_locator[8];
+    uint8_t     reserved2[256];
 } QEMU_PACKED VHDDynDiskHeader;
 
+QEMU_BUILD_BUG_ON(sizeof(VHDDynDiskHeader) != 1024);
+
 typedef struct BDRVVPCState {
     CoMutex lock;
-    uint8_t footer_buf[HEADER_SIZE];
+    VHDFooter footer;
     uint64_t free_data_block_offset;
     int max_table_entries;
     uint32_t *pagetable;
@@ -172,8 +177,9 @@ static QemuOptsList vpc_runtime_opts = {
 
 static QemuOptsList vpc_create_opts;
 
-static uint32_t vpc_checksum(uint8_t *buf, size_t size)
+static uint32_t vpc_checksum(void *p, size_t size)
 {
+    uint8_t *buf = p;
     uint32_t res = 0;
     int i;
 
@@ -216,11 +222,10 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     BDRVVPCState *s = bs->opaque;
     int i;
     VHDFooter *footer;
-    VHDDynDiskHeader *dyndisk_header;
     QemuOpts *opts = NULL;
     Error *local_err = NULL;
     bool use_chs;
-    uint8_t buf[HEADER_SIZE];
+    VHDDynDiskHeader dyndisk_header;
     uint32_t checksum;
     uint64_t computed_size;
     uint64_t pagetable_size;
@@ -228,12 +233,13 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     int ret;
     int64_t bs_size;
 
-    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-                               BDRV_CHILD_IMAGE, false, errp);
-    if (!bs->file) {
-        return -EINVAL;
+    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+    if (ret < 0) {
+        return ret;
     }
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
         ret = -EINVAL;
@@ -247,32 +253,33 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
+    ret = bdrv_pread(bs->file, 0, sizeof(s->footer), &s->footer, 0);
     if (ret < 0) {
         error_setg(errp, "Unable to read VHD header");
         goto fail;
     }
 
-    footer = (VHDFooter *) s->footer_buf;
+    footer = &s->footer;
     if (strncmp(footer->creator, "conectix", 8)) {
         int64_t offset = bdrv_getlength(bs->file->bs);
         if (offset < 0) {
             ret = offset;
             error_setg(errp, "Invalid file size");
             goto fail;
-        } else if (offset < HEADER_SIZE) {
+        } else if (offset < sizeof(*footer)) {
             ret = -EINVAL;
             error_setg(errp, "File too small for a VHD header");
             goto fail;
         }
 
         /* If a fixed disk, the footer is found only at the end of the file */
-        ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
-                         HEADER_SIZE);
+        ret = bdrv_pread(bs->file, offset - sizeof(*footer), sizeof(*footer),
+                         footer, 0);
         if (ret < 0) {
             goto fail;
         }
-        if (strncmp(footer->creator, "conectix", 8)) {
+        if (strncmp(footer->creator, "conectix", 8) ||
+            be32_to_cpu(footer->type) != VHD_FIXED) {
             error_setg(errp, "invalid VPC image");
             ret = -EINVAL;
             goto fail;
@@ -282,7 +289,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 
     checksum = be32_to_cpu(footer->checksum);
     footer->checksum = 0;
-    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum) {
+    if (vpc_checksum(footer, sizeof(*footer)) != checksum) {
         error_setg(errp, "Incorrect header checksum");
         ret = -EINVAL;
         goto fail;
@@ -340,22 +347,20 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     if (disk_type == VHD_DYNAMIC) {
-        ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
-                         HEADER_SIZE);
+        ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset),
+                         sizeof(dyndisk_header), &dyndisk_header, 0);
         if (ret < 0) {
             error_setg(errp, "Error reading dynamic VHD header");
             goto fail;
         }
 
-        dyndisk_header = (VHDDynDiskHeader *) buf;
-
-        if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
+        if (strncmp(dyndisk_header.magic, "cxsparse", 8)) {
             error_setg(errp, "Invalid header magic");
             ret = -EINVAL;
             goto fail;
         }
 
-        s->block_size = be32_to_cpu(dyndisk_header->block_size);
+        s->block_size = be32_to_cpu(dyndisk_header.block_size);
         if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
             error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
             ret = -EINVAL;
@@ -363,7 +368,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         }
         s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
 
-        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
+        s->max_table_entries = be32_to_cpu(dyndisk_header.max_table_entries);
 
         if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
             error_setg(errp, "Too many blocks");
@@ -395,10 +400,10 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
             goto fail;
         }
 
-        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
+        s->bat_offset = be64_to_cpu(dyndisk_header.table_offset);
 
-        ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
-                         pagetable_size);
+        ret = bdrv_pread(bs->file, s->bat_offset, pagetable_size,
+                         s->pagetable, 0);
         if (ret < 0) {
             error_setg(errp, "Error reading pagetable");
             goto fail;
@@ -446,9 +451,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
                "does not support live migration",
                bdrv_get_device_or_node_name(bs));
-    ret = migrate_add_blocker(s->migration_blocker, errp);
+
+    ret = migrate_add_blocker_normal(&s->migration_blocker, errp);
     if (ret < 0) {
-        error_free(s->migration_blocker);
         goto fail;
     }
 
@@ -483,8 +488,8 @@ static int vpc_reopen_prepare(BDRVReopenState *state,
  * operation (the block bitmaps is updated then), 0 otherwise.
  * If write is true then err must not be NULL.
  */
-static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
-                                       bool write, int *err)
+static int64_t coroutine_fn GRAPH_RDLOCK
+get_image_offset(BlockDriverState *bs, uint64_t offset, bool write, int *err)
 {
     BDRVVPCState *s = bs->opaque;
     uint64_t bitmap_offset, block_offset;
@@ -507,12 +512,12 @@ static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
        miss sparse read optimization, but it's not a problem in terms of
        correctness. */
     if (write && (s->last_bitmap_offset != bitmap_offset)) {
-        uint8_t bitmap[s->bitmap_size];
+        g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size);
         int r;
 
         s->last_bitmap_offset = bitmap_offset;
         memset(bitmap, 0xff, s->bitmap_size);
-        r = bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+        r = bdrv_co_pwrite_sync(bs->file, bitmap_offset, s->bitmap_size, bitmap, 0);
         if (r < 0) {
             *err = r;
             return -2;
@@ -528,13 +533,13 @@ static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
  *
  * Returns 0 on success and < 0 on error
  */
-static int rewrite_footer(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK rewrite_footer(BlockDriverState *bs)
 {
     int ret;
     BDRVVPCState *s = bs->opaque;
     int64_t offset = s->free_data_block_offset;
 
-    ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
+    ret = bdrv_co_pwrite_sync(bs->file, offset, sizeof(s->footer), &s->footer, 0);
     if (ret < 0)
         return ret;
 
@@ -548,13 +553,14 @@ static int rewrite_footer(BlockDriverState *bs)
  *
  * Returns the sectors' offset in the image file on success and < 0 on error
  */
-static int64_t alloc_block(BlockDriverState *bs, int64_t offset)
+static int64_t coroutine_fn GRAPH_RDLOCK
+alloc_block(BlockDriverState *bs, int64_t offset)
 {
     BDRVVPCState *s = bs->opaque;
     int64_t bat_offset;
     uint32_t index, bat_value;
     int ret;
-    uint8_t bitmap[s->bitmap_size];
+    g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size);
 
     /* Check if sector_num is valid */
     if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
@@ -568,8 +574,8 @@ static int64_t alloc_block(BlockDriverState *bs, int64_t offset)
 
     /* Initialize the block's bitmap */
     memset(bitmap, 0xff, s->bitmap_size);
-    ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
-        s->bitmap_size);
+    ret = bdrv_co_pwrite_sync(bs->file, s->free_data_block_offset,
+                              s->bitmap_size, bitmap, 0);
     if (ret < 0) {
         return ret;
     }
@@ -583,7 +589,7 @@ static int64_t alloc_block(BlockDriverState *bs, int64_t offset)
     /* Write BAT entry to disk */
     bat_offset = s->bat_offset + (4 * index);
     bat_value = cpu_to_be32(s->pagetable[index]);
-    ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
+    ret = bdrv_co_pwrite_sync(bs->file, bat_offset, 4, &bat_value, 0);
     if (ret < 0)
         goto fail;
 
@@ -594,31 +600,30 @@ fail:
     return ret;
 }
 
-static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+static int coroutine_fn
+vpc_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
-    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 
-    if (be32_to_cpu(footer->type) != VHD_FIXED) {
+    if (be32_to_cpu(s->footer.type) != VHD_FIXED) {
         bdi->cluster_size = s->block_size;
     }
 
     return 0;
 }
 
-static int coroutine_fn
-vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-              QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+vpc_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVVPCState *s = bs->opaque;
     int ret;
     int64_t image_offset;
     int64_t n_bytes;
     int64_t bytes_done = 0;
-    VHDFooter *footer = (VHDFooter *) s->footer_buf;
     QEMUIOVector local_qiov;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
         return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
     }
 
@@ -657,19 +662,18 @@ fail:
     return ret;
 }
 
-static int coroutine_fn
-vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-               QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+vpc_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVVPCState *s = bs->opaque;
     int64_t image_offset;
     int64_t n_bytes;
     int64_t bytes_done = 0;
     int ret = 0;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
     QEMUIOVector local_qiov;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
         return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
     }
 
@@ -716,20 +720,19 @@ fail:
     return ret;
 }
 
-static int coroutine_fn vpc_co_block_status(BlockDriverState *bs,
-                                            bool want_zero,
-                                            int64_t offset, int64_t bytes,
-                                            int64_t *pnum, int64_t *map,
-                                            BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+vpc_co_block_status(BlockDriverState *bs, bool want_zero,
+                    int64_t offset, int64_t bytes,
+                    int64_t *pnum, int64_t *map,
+                    BlockDriverState **file)
 {
     BDRVVPCState *s = bs->opaque;
-    VHDFooter *footer = (VHDFooter*) s->footer_buf;
     int64_t image_offset;
     bool allocated;
     int ret;
     int64_t n;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
         *pnum = bytes;
         *map = offset;
         *file = bs->file->bs;
@@ -819,11 +822,11 @@ static int calculate_geometry(int64_t total_sectors, uint16_t *cyls,
     return 0;
 }
 
-static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
-                               int64_t total_sectors)
+static int coroutine_fn create_dynamic_disk(BlockBackend *blk, VHDFooter *footer,
+                                            int64_t total_sectors)
 {
-    VHDDynDiskHeader *dyndisk_header =
-        (VHDDynDiskHeader *) buf;
+    VHDDynDiskHeader dyndisk_header;
+    uint8_t bat_sector[512];
     size_t block_size, num_bat_entries;
     int i;
     int ret;
@@ -833,13 +836,13 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
     block_size = 0x200000;
     num_bat_entries = DIV_ROUND_UP(total_sectors, block_size / 512);
 
-    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
+    ret = blk_co_pwrite(blk, offset, sizeof(*footer), footer, 0);
     if (ret < 0) {
         goto fail;
     }
 
     offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
-    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
+    ret = blk_co_pwrite(blk, offset, sizeof(*footer), footer, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -847,9 +850,9 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
     /* Write the initial BAT */
     offset = 3 * 512;
 
-    memset(buf, 0xFF, 512);
+    memset(bat_sector, 0xFF, 512);
     for (i = 0; i < DIV_ROUND_UP(num_bat_entries * 4, 512); i++) {
-        ret = blk_pwrite(blk, offset, buf, 512, 0);
+        ret = blk_co_pwrite(blk, offset, 512, bat_sector, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -857,26 +860,27 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
     }
 
     /* Prepare the Dynamic Disk Header */
-    memset(buf, 0, 1024);
+    memset(&dyndisk_header, 0, sizeof(dyndisk_header));
 
-    memcpy(dyndisk_header->magic, "cxsparse", 8);
+    memcpy(dyndisk_header.magic, "cxsparse", 8);
 
     /*
      * Note: The spec is actually wrong here for data_offset, it says
      * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
      */
-    dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
-    dyndisk_header->table_offset = cpu_to_be64(3 * 512);
-    dyndisk_header->version = cpu_to_be32(0x00010000);
-    dyndisk_header->block_size = cpu_to_be32(block_size);
-    dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
+    dyndisk_header.data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
+    dyndisk_header.table_offset = cpu_to_be64(3 * 512);
+    dyndisk_header.version = cpu_to_be32(0x00010000);
+    dyndisk_header.block_size = cpu_to_be32(block_size);
+    dyndisk_header.max_table_entries = cpu_to_be32(num_bat_entries);
 
-    dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
+    dyndisk_header.checksum = cpu_to_be32(
+        vpc_checksum(&dyndisk_header, sizeof(dyndisk_header)));
 
     /* Write the header */
     offset = 512;
 
-    ret = blk_pwrite(blk, offset, buf, 1024, 0);
+    ret = blk_co_pwrite(blk, offset, sizeof(dyndisk_header), &dyndisk_header, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -886,20 +890,21 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
     return ret;
 }
 
-static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
-                             int64_t total_size, Error **errp)
+static int coroutine_fn create_fixed_disk(BlockBackend *blk, VHDFooter *footer,
+                                          int64_t total_size, Error **errp)
 {
     int ret;
 
     /* Add footer to total size */
-    total_size += HEADER_SIZE;
+    total_size += sizeof(*footer);
 
-    ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp);
+    ret = blk_co_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp);
     if (ret < 0) {
         return ret;
     }
 
-    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
+    ret = blk_co_pwrite(blk, total_size - sizeof(*footer), sizeof(*footer),
+                        footer, 0);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Unable to write VHD header");
         return ret;
@@ -964,15 +969,14 @@ static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts,
     return 0;
 }
 
-static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
-                                      Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vpc_co_create(BlockdevCreateOptions *opts, Error **errp)
 {
     BlockdevCreateOptionsVpc *vpc_opts;
     BlockBackend *blk = NULL;
     BlockDriverState *bs = NULL;
 
-    uint8_t buf[1024];
-    VHDFooter *footer = (VHDFooter *) buf;
+    VHDFooter footer;
     uint16_t cyls = 0;
     uint8_t heads = 0;
     uint8_t secs_per_cyl = 0;
@@ -1003,13 +1007,13 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
     }
 
     /* Create BlockBackend to write to the image */
-    bs = bdrv_open_blockdev_ref(vpc_opts->file, errp);
+    bs = bdrv_co_open_blockdev_ref(vpc_opts->file, errp);
     if (bs == NULL) {
         return -EIO;
     }
 
-    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
-                          errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
+                             errp);
     if (!blk) {
         ret = -EPERM;
         goto out;
@@ -1035,60 +1039,59 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
     }
 
     /* Prepare the Hard Disk Footer */
-    memset(buf, 0, 1024);
+    memset(&footer, 0, sizeof(footer));
 
-    memcpy(footer->creator, "conectix", 8);
+    memcpy(footer.creator, "conectix", 8);
     if (vpc_opts->force_size) {
-        memcpy(footer->creator_app, "qem2", 4);
+        memcpy(footer.creator_app, "qem2", 4);
     } else {
-        memcpy(footer->creator_app, "qemu", 4);
+        memcpy(footer.creator_app, "qemu", 4);
     }
-    memcpy(footer->creator_os, "Wi2k", 4);
+    memcpy(footer.creator_os, "Wi2k", 4);
 
-    footer->features = cpu_to_be32(0x02);
-    footer->version = cpu_to_be32(0x00010000);
+    footer.features = cpu_to_be32(0x02);
+    footer.version = cpu_to_be32(0x00010000);
     if (disk_type == VHD_DYNAMIC) {
-        footer->data_offset = cpu_to_be64(HEADER_SIZE);
+        footer.data_offset = cpu_to_be64(sizeof(footer));
     } else {
-        footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
+        footer.data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
     }
-    footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
+    footer.timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
 
     /* Version of Virtual PC 2007 */
-    footer->major = cpu_to_be16(0x0005);
-    footer->minor = cpu_to_be16(0x0003);
-    footer->orig_size = cpu_to_be64(total_size);
-    footer->current_size = cpu_to_be64(total_size);
-    footer->cyls = cpu_to_be16(cyls);
-    footer->heads = heads;
-    footer->secs_per_cyl = secs_per_cyl;
+    footer.major = cpu_to_be16(0x0005);
+    footer.minor = cpu_to_be16(0x0003);
+    footer.orig_size = cpu_to_be64(total_size);
+    footer.current_size = cpu_to_be64(total_size);
+    footer.cyls = cpu_to_be16(cyls);
+    footer.heads = heads;
+    footer.secs_per_cyl = secs_per_cyl;
 
-    footer->type = cpu_to_be32(disk_type);
+    footer.type = cpu_to_be32(disk_type);
 
     qemu_uuid_generate(&uuid);
-    footer->uuid = uuid;
+    footer.uuid = uuid;
 
-    footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
+    footer.checksum = cpu_to_be32(vpc_checksum(&footer, sizeof(footer)));
 
     if (disk_type == VHD_DYNAMIC) {
-        ret = create_dynamic_disk(blk, buf, total_sectors);
+        ret = create_dynamic_disk(blk, &footer, total_sectors);
         if (ret < 0) {
             error_setg(errp, "Unable to create or write VHD header");
         }
     } else {
-        ret = create_fixed_disk(blk, buf, total_size, errp);
+        ret = create_fixed_disk(blk, &footer, total_size, errp);
     }
 
 out:
-    blk_unref(blk);
-    bdrv_unref(bs);
+    blk_co_unref(blk);
+    bdrv_co_unref(bs);
     return ret;
 }
 
-static int coroutine_fn vpc_co_create_opts(BlockDriver *drv,
-                                           const char *filename,
-                                           QemuOpts *opts,
-                                           Error **errp)
+static int coroutine_fn GRAPH_UNLOCKED
+vpc_co_create_opts(BlockDriver *drv, const char *filename,
+                   QemuOpts *opts, Error **errp)
 {
     BlockdevCreateOptions *create_options = NULL;
     QDict *qdict;
@@ -1110,13 +1113,13 @@ static int coroutine_fn vpc_co_create_opts(BlockDriver *drv,
     }
 
     /* Create and open the file (protocol layer) */
-    ret = bdrv_create_file(filename, opts, errp);
+    ret = bdrv_co_create_file(filename, opts, errp);
     if (ret < 0) {
         goto fail;
     }
 
-    bs = bdrv_open(filename, NULL, NULL,
-                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
+    bs = bdrv_co_open(filename, NULL, NULL,
+                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (bs == NULL) {
         ret = -EIO;
         goto fail;
@@ -1161,18 +1164,17 @@ static int coroutine_fn vpc_co_create_opts(BlockDriver *drv,
 
 fail:
     qobject_unref(qdict);
-    bdrv_unref(bs);
+    bdrv_co_unref(bs);
     qapi_free_BlockdevCreateOptions(create_options);
     return ret;
 }
 
 
-static int vpc_has_zero_init(BlockDriverState *bs)
+static int GRAPH_RDLOCK vpc_has_zero_init(BlockDriverState *bs)
 {
     BDRVVPCState *s = bs->opaque;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
         return bdrv_has_zero_init(bs->file->bs);
     } else {
         return 1;
@@ -1187,8 +1189,7 @@ static void vpc_close(BlockDriverState *bs)
     g_free(s->pageentry_u8);
 #endif
 
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
+    migrate_del_blocker(&s->migration_blocker);
 }
 
 static QemuOptsList vpc_create_opts = {
@@ -1240,7 +1241,7 @@ static BlockDriver bdrv_vpc = {
     .bdrv_co_pwritev            = vpc_co_pwritev,
     .bdrv_co_block_status       = vpc_co_block_status,
 
-    .bdrv_get_info          = vpc_get_info,
+    .bdrv_co_get_info       = vpc_co_get_info,
 
     .is_format              = true,
     .create_opts            = &vpc_create_opts,
index 54807f82ca1f362e956cccddae85cb73362fc5b7..9d050ba3aea08eeac3d284e7d1e41545ff5b9f9f 100644 (file)
@@ -25,7 +25,9 @@
 
 #include "qemu/osdep.h"
 #include <dirent.h>
+#include <glib/gstdio.h>
 #include "qapi/error.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "block/qdict.h"
 #include "qemu/module.h"
@@ -499,7 +501,7 @@ static bool valid_filename(const unsigned char *name)
               (c >= 'A' && c <= 'Z') ||
               (c >= 'a' && c <= 'z') ||
               c > 127 ||
-              strchr("$%'-_@~`!(){}^#&.+,;=[]", c) != NULL))
+              strchr(" $%'-_@~`!(){}^#&.+,;=[]", c) != NULL))
         {
             return false;
         }
@@ -775,7 +777,6 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
     while((entry=readdir(dir))) {
         unsigned int length=strlen(dirname)+2+strlen(entry->d_name);
         char* buffer;
-        direntry_t* direntry;
         struct stat st;
         int is_dot=!strcmp(entry->d_name,".");
         int is_dotdot=!strcmp(entry->d_name,"..");
@@ -855,7 +856,7 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
 
     /* fill with zeroes up to the end of the cluster */
     while(s->directory.next%(0x10*s->sectors_per_cluster)) {
-        direntry_t* direntry=array_get_next(&(s->directory));
+        direntry = array_get_next(&(s->directory));
         memset(direntry,0,sizeof(direntry_t));
     }
 
@@ -882,7 +883,7 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
     return 0;
 }
 
-static inline uint32_t sector2cluster(BDRVVVFATState* s,off_t sector_num)
+static inline int32_t sector2cluster(BDRVVVFATState* s,off_t sector_num)
 {
     return (sector_num - s->offset_to_root_dir) / s->sectors_per_cluster;
 }
@@ -1051,7 +1052,7 @@ static BDRVVVFATState *vvv = NULL;
 #endif
 
 static int enable_write_target(BlockDriverState *bs, Error **errp);
-static int is_consistent(BDRVVVFATState *s);
+static int coroutine_fn is_consistent(BDRVVVFATState *s);
 
 static QemuOptsList runtime_opts = {
     .name = "vvfat",
@@ -1143,6 +1144,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
     QemuOpts *opts;
     int ret;
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
 #ifdef DEBUG
     vvv = s;
 #endif
@@ -1230,6 +1233,7 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
                  dirname, cyls, heads, secs));
 
     s->sector_count = cyls * heads * secs - s->offset_to_bootsector;
+    bs->total_sectors = cyls * heads * secs;
 
     if (qemu_opt_get_bool(opts, "rw", false)) {
         if (!bdrv_is_read_only(bs)) {
@@ -1250,8 +1254,6 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
         }
     }
 
-    bs->total_sectors = cyls * heads * secs;
-
     if (init_directories(s, dirname, heads, secs, errp)) {
         ret = -EIO;
         goto fail;
@@ -1266,9 +1268,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
                    "The vvfat (rw) format used by node '%s' "
                    "does not support live migration",
                    bdrv_get_device_or_node_name(bs));
-        ret = migrate_add_blocker(s->migration_blocker, errp);
+        ret = migrate_add_blocker_normal(&s->migration_blocker, errp);
         if (ret < 0) {
-            error_free(s->migration_blocker);
             goto fail;
         }
     }
@@ -1279,8 +1280,18 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
 
     qemu_co_mutex_init(&s->lock);
 
-    ret = 0;
+    qemu_opts_del(opts);
+
+    return 0;
+
 fail:
+    g_free(s->qcow_filename);
+    s->qcow_filename = NULL;
+    g_free(s->cluster_buffer);
+    s->cluster_buffer = NULL;
+    g_free(s->used_clusters);
+    s->used_clusters = NULL;
+
     qemu_opts_del(opts);
     return ret;
 }
@@ -1458,8 +1469,8 @@ static void print_mapping(const mapping_t* mapping)
 }
 #endif
 
-static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
+static int coroutine_fn GRAPH_RDLOCK
+vvfat_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors)
 {
     BDRVVVFATState *s = bs->opaque;
     int i;
@@ -1470,8 +1481,8 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
         if (s->qcow) {
             int64_t n;
             int ret;
-            ret = bdrv_is_allocated(s->qcow->bs, sector_num * BDRV_SECTOR_SIZE,
-                                    (nb_sectors - i) * BDRV_SECTOR_SIZE, &n);
+            ret = bdrv_co_is_allocated(s->qcow->bs, sector_num * BDRV_SECTOR_SIZE,
+                                       (nb_sectors - i) * BDRV_SECTOR_SIZE, &n);
             if (ret < 0) {
                 return ret;
             }
@@ -1479,8 +1490,8 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
                 DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64
                              " allocated\n", sector_num,
                              n >> BDRV_SECTOR_BITS));
-                if (bdrv_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE,
-                               buf + i * 0x200, n) < 0) {
+                if (bdrv_co_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, n,
+                                  buf + i * 0x200, 0) < 0) {
                     return -1;
                 }
                 i += (n >> BDRV_SECTOR_BITS) - 1;
@@ -1521,9 +1532,9 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
     return 0;
 }
 
-static int coroutine_fn
-vvfat_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+vvfat_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret;
     BDRVVVFATState *s = bs->opaque;
@@ -1785,8 +1796,8 @@ static inline uint32_t modified_fat_get(BDRVVVFATState* s,
     }
 }
 
-static inline bool cluster_was_modified(BDRVVVFATState *s,
-                                        uint32_t cluster_num)
+static inline bool coroutine_fn GRAPH_RDLOCK
+cluster_was_modified(BDRVVVFATState *s, uint32_t cluster_num)
 {
     int was_modified = 0;
     int i;
@@ -1796,10 +1807,10 @@ static inline bool cluster_was_modified(BDRVVVFATState *s,
     }
 
     for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) {
-        was_modified = bdrv_is_allocated(s->qcow->bs,
-                                         (cluster2sector(s, cluster_num) +
-                                          i) * BDRV_SECTOR_SIZE,
-                                         BDRV_SECTOR_SIZE, NULL);
+        was_modified = bdrv_co_is_allocated(s->qcow->bs,
+                                            (cluster2sector(s, cluster_num) +
+                                             i) * BDRV_SECTOR_SIZE,
+                                            BDRV_SECTOR_SIZE, NULL);
     }
 
     /*
@@ -1841,8 +1852,8 @@ typedef enum {
  * Further, the files/directories handled by this function are
  * assumed to be *not* deleted (and *only* those).
  */
-static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
-        direntry_t* direntry, const char* path)
+static uint32_t coroutine_fn GRAPH_RDLOCK
+get_cluster_count_for_direntry(BDRVVVFATState* s, direntry_t* direntry, const char* path)
 {
     /*
      * This is a little bit tricky:
@@ -1951,25 +1962,26 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
                  * This is horribly inefficient, but that is okay, since
                  * it is rarely executed, if at all.
                  */
-                int64_t offset = cluster2sector(s, cluster_num);
+                int64_t offs = cluster2sector(s, cluster_num);
 
                 vvfat_close_current_file(s);
                 for (i = 0; i < s->sectors_per_cluster; i++) {
                     int res;
 
-                    res = bdrv_is_allocated(s->qcow->bs,
-                                            (offset + i) * BDRV_SECTOR_SIZE,
-                                            BDRV_SECTOR_SIZE, NULL);
+                    res = bdrv_co_is_allocated(s->qcow->bs,
+                                               (offs + i) * BDRV_SECTOR_SIZE,
+                                               BDRV_SECTOR_SIZE, NULL);
                     if (res < 0) {
                         return -1;
                     }
                     if (!res) {
-                        res = vvfat_read(s->bs, offset, s->cluster_buffer, 1);
+                        res = vvfat_read(s->bs, offs, s->cluster_buffer, 1);
                         if (res) {
                             return -1;
                         }
-                        res = bdrv_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE,
-                                          s->cluster_buffer, BDRV_SECTOR_SIZE);
+                        res = bdrv_co_pwrite(s->qcow, offs * BDRV_SECTOR_SIZE,
+                                             BDRV_SECTOR_SIZE, s->cluster_buffer,
+                                             0);
                         if (res < 0) {
                             return -2;
                         }
@@ -1999,8 +2011,8 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
  * It returns 0 upon inconsistency or error, and the number of clusters
  * used by the directory, its subdirectories and their files.
  */
-static int check_directory_consistency(BDRVVVFATState *s,
-        int cluster_num, const char* path)
+static int coroutine_fn GRAPH_RDLOCK
+check_directory_consistency(BDRVVVFATState *s, int cluster_num, const char* path)
 {
     int ret = 0;
     unsigned char* cluster = g_malloc(s->cluster_size);
@@ -2126,7 +2138,8 @@ DLOG(fprintf(stderr, "check direntry %d:\n", i); print_direntry(direntries + i))
 }
 
 /* returns 1 on success */
-static int is_consistent(BDRVVVFATState* s)
+static int coroutine_fn GRAPH_RDLOCK
+is_consistent(BDRVVVFATState* s)
 {
     int i, check;
     int used_clusters_count = 0;
@@ -2402,8 +2415,8 @@ static int commit_mappings(BDRVVVFATState* s,
     return 0;
 }
 
-static int commit_direntries(BDRVVVFATState* s,
-        int dir_index, int parent_mapping_index)
+static int coroutine_fn GRAPH_RDLOCK
+commit_direntries(BDRVVVFATState* s, int dir_index, int parent_mapping_index)
 {
     direntry_t* direntry = array_get(&(s->directory), dir_index);
     uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry);
@@ -2454,8 +2467,9 @@ static int commit_direntries(BDRVVVFATState* s,
 
     for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) {
         direntry_t *first_direntry;
-        void* direntry = array_get(&(s->directory), current_dir_index);
-        int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry,
+
+        direntry = array_get(&(s->directory), current_dir_index);
+        ret = vvfat_read(s->bs, cluster2sector(s, c), (uint8_t *)direntry,
                 s->sectors_per_cluster);
         if (ret)
             return ret;
@@ -2492,8 +2506,8 @@ static int commit_direntries(BDRVVVFATState* s,
 
 /* commit one file (adjust contents, adjust mapping),
    return first_mapping_index */
-static int commit_one_file(BDRVVVFATState* s,
-        int dir_index, uint32_t offset)
+static int coroutine_fn GRAPH_RDLOCK
+commit_one_file(BDRVVVFATState* s, int dir_index, uint32_t offset)
 {
     direntry_t* direntry = array_get(&(s->directory), dir_index);
     uint32_t c = begin_of_direntry(direntry);
@@ -2677,12 +2691,12 @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s)
                 direntry_t* direntry = array_get(&(s->directory),
                         mapping->info.dir.first_dir_index);
                 uint32_t c = mapping->begin;
-                int i = 0;
+                int j = 0;
 
                 /* recurse */
                 while (!fat_eof(s, c)) {
                     do {
-                        direntry_t* d = direntry + i;
+                        direntry_t *d = direntry + j;
 
                         if (is_file(d) || (is_directory(d) && !is_dot(d))) {
                             int l;
@@ -2703,8 +2717,8 @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s)
 
                             schedule_rename(s, m->begin, new_path);
                         }
-                        i++;
-                    } while((i % (0x10 * s->sectors_per_cluster)) != 0);
+                        j++;
+                    } while (j % (0x10 * s->sectors_per_cluster) != 0);
                     c = fat_get(s, c);
                 }
             }
@@ -2716,13 +2730,9 @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s)
             mapping_t* mapping;
             int j, parent_path_len;
 
-#ifdef __MINGW32__
-            if (mkdir(commit->path))
-                return -5;
-#else
-            if (mkdir(commit->path, 0755))
+            if (g_mkdir(commit->path, 0755)) {
                 return -5;
-#endif
+            }
 
             mapping = insert_mapping(s, commit->param.mkdir.cluster,
                     commit->param.mkdir.cluster + 1);
@@ -2762,7 +2772,7 @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s)
 /*
  * TODO: make sure that the short name is not matching *another* file
  */
-static int handle_commits(BDRVVVFATState* s)
+static int coroutine_fn GRAPH_RDLOCK handle_commits(BDRVVVFATState* s)
 {
     int i, fail = 0;
 
@@ -2776,13 +2786,10 @@ static int handle_commits(BDRVVVFATState* s)
             fail = -2;
             break;
         case ACTION_WRITEOUT: {
-#ifndef NDEBUG
-            /* these variables are only used by assert() below */
             direntry_t* entry = array_get(&(s->directory),
                     commit->param.writeout.dir_index);
             uint32_t begin = begin_of_direntry(entry);
             mapping_t* mapping = find_mapping_for_cluster(s, begin);
-#endif
 
             assert(mapping);
             assert(mapping->begin == begin);
@@ -2798,16 +2805,16 @@ static int handle_commits(BDRVVVFATState* s)
             int begin = commit->param.new_file.first_cluster;
             mapping_t* mapping = find_mapping_for_cluster(s, begin);
             direntry_t* entry;
-            int i;
+            int j;
 
             /* find direntry */
-            for (i = 0; i < s->directory.next; i++) {
-                entry = array_get(&(s->directory), i);
+            for (j = 0; j < s->directory.next; j++) {
+                entry = array_get(&(s->directory), j);
                 if (is_file(entry) && begin_of_direntry(entry) == begin)
                     break;
             }
 
-            if (i >= s->directory.next) {
+            if (j >= s->directory.next) {
                 fail = -6;
                 continue;
             }
@@ -2827,8 +2834,9 @@ static int handle_commits(BDRVVVFATState* s)
             mapping->mode = MODE_NORMAL;
             mapping->info.file.offset = 0;
 
-            if (commit_one_file(s, i, 0))
+            if (commit_one_file(s, j, 0)) {
                 fail = -7;
+            }
 
             break;
         }
@@ -2908,7 +2916,7 @@ static int handle_deletes(BDRVVVFATState* s)
  * - recurse direntries from root (using bs->bdrv_pread)
  * - delete files corresponding to mappings marked as deleted
  */
-static int do_commit(BDRVVVFATState* s)
+static int coroutine_fn GRAPH_RDLOCK do_commit(BDRVVVFATState* s)
 {
     int ret = 0;
 
@@ -2958,7 +2966,7 @@ DLOG(checkpoint());
     return 0;
 }
 
-static int try_commit(BDRVVVFATState* s)
+static int coroutine_fn GRAPH_RDLOCK try_commit(BDRVVVFATState* s)
 {
     vvfat_close_current_file(s);
 DLOG(checkpoint());
@@ -2967,11 +2975,13 @@ DLOG(checkpoint());
     return do_commit(s);
 }
 
-static int vvfat_write(BlockDriverState *bs, int64_t sector_num,
-                    const uint8_t *buf, int nb_sectors)
+static int coroutine_fn GRAPH_RDLOCK
+vvfat_write(BlockDriverState *bs, int64_t sector_num,
+            const uint8_t *buf, int nb_sectors)
 {
     BDRVVVFATState *s = bs->opaque;
     int i, ret;
+    int first_cluster, last_cluster;
 
 DLOG(checkpoint());
 
@@ -2982,17 +2992,52 @@ DLOG(checkpoint());
 
     vvfat_close_current_file(s);
 
+    if (sector_num == s->offset_to_bootsector && nb_sectors == 1) {
+        /*
+         * Write on bootsector. Allow only changing the reserved1 field,
+         * used to mark volume dirtiness
+         */
+        unsigned char *bootsector = s->first_sectors
+                                    + s->offset_to_bootsector * 0x200;
+        /*
+         * LATER TODO: if FAT32, this is wrong (see init_directories(),
+         * which always creates a FAT16 bootsector)
+         */
+        const int reserved1_offset = offsetof(bootsector_t, u.fat16.reserved1);
+
+        for (i = 0; i < 0x200; i++) {
+            if (i != reserved1_offset && bootsector[i] != buf[i]) {
+                fprintf(stderr, "Tried to write to protected bootsector\n");
+                return -1;
+            }
+        }
+
+        /* Update bootsector with the only updatable byte, and return success */
+        bootsector[reserved1_offset] = buf[reserved1_offset];
+        return 0;
+    }
+
     /*
      * Some sanity checks:
      * - do not allow writing to the boot sector
      */
-
     if (sector_num < s->offset_to_fat)
         return -1;
 
-    for (i = sector2cluster(s, sector_num);
-            i <= sector2cluster(s, sector_num + nb_sectors - 1);) {
-        mapping_t* mapping = find_mapping_for_cluster(s, i);
+    /*
+     * Values will be negative for writes to the FAT, which is located before
+     * the root directory.
+     */
+    first_cluster = sector2cluster(s, sector_num);
+    last_cluster = sector2cluster(s, sector_num + nb_sectors - 1);
+
+    for (i = first_cluster; i <= last_cluster;) {
+        mapping_t *mapping = NULL;
+
+        if (i >= 0) {
+            mapping = find_mapping_for_cluster(s, i);
+        }
+
         if (mapping) {
             if (mapping->read_only) {
                 fprintf(stderr, "Tried to write to write-protected file %s\n",
@@ -3032,25 +3077,27 @@ DLOG(checkpoint());
                 }
             }
             i = mapping->end;
-        } else
+        } else {
             i++;
+        }
     }
 
     /*
      * Use qcow backend. Commit later.
      */
 DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors));
-    ret = bdrv_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE, buf,
-                      nb_sectors * BDRV_SECTOR_SIZE);
+    ret = bdrv_co_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE,
+                         nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
     if (ret < 0) {
         fprintf(stderr, "Error writing to qcow backend\n");
         return ret;
     }
 
-    for (i = sector2cluster(s, sector_num);
-            i <= sector2cluster(s, sector_num + nb_sectors - 1); i++)
-        if (i >= 0)
+    for (i = first_cluster; i <= last_cluster; i++) {
+        if (i >= 0) {
             s->used_clusters[i] |= USED_ALLOCATED;
+        }
+    }
 
 DLOG(checkpoint());
     /* TODO: add timeout */
@@ -3060,9 +3107,9 @@ DLOG(checkpoint());
     return 0;
 }
 
-static int coroutine_fn
-vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                 QEMUIOVector *qiov, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+vvfat_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret;
     BDRVVVFATState *s = bs->opaque;
@@ -3098,26 +3145,6 @@ static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs,
     return BDRV_BLOCK_DATA;
 }
 
-static int coroutine_fn
-write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                    QEMUIOVector *qiov, int flags)
-{
-    int ret;
-
-    BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    qemu_co_mutex_lock(&s->lock);
-    ret = try_commit(s);
-    qemu_co_mutex_unlock(&s->lock);
-
-    return ret;
-}
-
-static BlockDriver vvfat_write_target = {
-    .format_name        = "vvfat_write_target",
-    .instance_size      = sizeof(void*),
-    .bdrv_co_pwritev    = write_target_commit,
-};
-
 static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format,
                                int *child_flags, QDict *child_options,
                                int parent_flags, QDict *parent_options)
@@ -3127,29 +3154,24 @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format,
     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
 }
 
-static const BdrvChildClass child_vvfat_qcow = {
-    .parent_is_bds      = true,
-    .inherit_options    = vvfat_qcow_options,
-};
+static BdrvChildClass child_vvfat_qcow;
 
 static int enable_write_target(BlockDriverState *bs, Error **errp)
 {
     BDRVVVFATState *s = bs->opaque;
     BlockDriver *bdrv_qcow = NULL;
-    BlockDriverState *backing;
     QemuOpts *opts = NULL;
     int ret;
     int size = sector2cluster(s, s->sector_count);
     QDict *options;
 
-    s->used_clusters = calloc(size, 1);
+    s->used_clusters = g_malloc0(size);
 
     array_init(&(s->commits), sizeof(commit_t));
 
-    s->qcow_filename = g_malloc(PATH_MAX);
-    ret = get_tmp_filename(s->qcow_filename, PATH_MAX);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "can't create temporary file");
+    s->qcow_filename = create_tmp_file(errp);
+    if (!s->qcow_filename) {
+        ret = -ENOENT;
         goto err;
     }
 
@@ -3161,8 +3183,8 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
     }
 
     opts = qemu_opts_create(bdrv_qcow->create_opts, NULL, 0, &error_abort);
-    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s->sector_count * 512,
-                        &error_abort);
+    qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
+                        bs->total_sectors * BDRV_SECTOR_SIZE, &error_abort);
     qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, "fat:", &error_abort);
 
     ret = bdrv_create(bdrv_qcow, s->qcow_filename, opts, errp);
@@ -3187,18 +3209,9 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
     unlink(s->qcow_filename);
 #endif
 
-    backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR,
-                                   &error_abort);
-    *(void**) backing->opaque = s;
-
-    bdrv_set_backing_hd(s->bs, backing, &error_abort);
-    bdrv_unref(backing);
-
     return 0;
 
 err:
-    g_free(s->qcow_filename);
-    s->qcow_filename = NULL;
     return ret;
 }
 
@@ -3208,20 +3221,10 @@ static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
                              uint64_t perm, uint64_t shared,
                              uint64_t *nperm, uint64_t *nshared)
 {
-    BDRVVVFATState *s = bs->opaque;
-
-    assert(c == s->qcow || (role & BDRV_CHILD_COW));
-
-    if (c == s->qcow) {
-        /* This is a private node, nobody should try to attach to it */
-        *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
-        *nshared = BLK_PERM_WRITE_UNCHANGED;
-    } else {
-        /* The backing file is there so 'commit' can use it. vvfat doesn't
-         * access it in any way. */
-        *nperm = 0;
-        *nshared = BLK_PERM_ALL;
-    }
+    assert(role & BDRV_CHILD_DATA);
+    /* This is a private node, nobody should try to attach to it */
+    *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+    *nshared = BLK_PERM_WRITE_UNCHANGED;
 }
 
 static void vvfat_close(BlockDriverState *bs)
@@ -3235,8 +3238,7 @@ static void vvfat_close(BlockDriverState *bs)
     g_free(s->cluster_buffer);
 
     if (s->qcow) {
-        migrate_del_blocker(s->migration_blocker);
-        error_free(s->migration_blocker);
+        migrate_del_blocker(&s->migration_blocker);
     }
 }
 
@@ -3270,6 +3272,8 @@ static BlockDriver bdrv_vvfat = {
 
 static void bdrv_vvfat_init(void)
 {
+    child_vvfat_qcow = child_of_bds;
+    child_vvfat_qcow.inherit_options = vvfat_qcow_options;
     bdrv_register(&bdrv_vvfat);
 }
 
index b7221a272f82686fbe87267218c80ac6919bb4b5..6327861e1d22995f1efe68c746bfde8c0a8601e7 100644 (file)
 
 #include "qemu/osdep.h"
 #include "qemu/timer.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
 #include "block/aio.h"
 #include "block/raw-aio.h"
 #include "qemu/event_notifier.h"
 #include "qemu/iov.h"
+#include "qemu/memalign.h"
 #include <windows.h>
 #include <winioctl.h>
 
@@ -172,7 +174,7 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
 void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
                                   AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL);
+    aio_set_event_notifier(old_context, &aio->e, NULL, NULL, NULL);
     aio->aio_ctx = NULL;
 }
 
@@ -180,8 +182,8 @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
                                   AioContext *new_context)
 {
     aio->aio_ctx = new_context;
-    aio_set_event_notifier(new_context, &aio->e, false,
-                           win32_aio_completion_cb, NULL);
+    aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb,
+                           NULL, NULL);
 }
 
 QEMUWin32AIOState *win32_aio_init(void)
index 85b78dc2a9bd590d54746f02f188814f8e9dbcca..76d8885677e266af57574020c403ee4901e9134c 100644 (file)
  */
 
 #include "qemu/osdep.h"
+#include "block/block-io.h"
 #include "block/block_int.h"
-#include "qemu/coroutine.h"
 #include "block/write-threshold.h"
-#include "qemu/notify.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-block-core.h"
 #include "qapi/qapi-events-block-core.h"
@@ -24,82 +23,9 @@ uint64_t bdrv_write_threshold_get(const BlockDriverState *bs)
     return bs->write_threshold_offset;
 }
 
-bool bdrv_write_threshold_is_set(const BlockDriverState *bs)
-{
-    return bs->write_threshold_offset > 0;
-}
-
-static void write_threshold_disable(BlockDriverState *bs)
-{
-    if (bdrv_write_threshold_is_set(bs)) {
-        notifier_with_return_remove(&bs->write_threshold_notifier);
-        bs->write_threshold_offset = 0;
-    }
-}
-
-uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
-                                       const BdrvTrackedRequest *req)
-{
-    if (bdrv_write_threshold_is_set(bs)) {
-        if (req->offset > bs->write_threshold_offset) {
-            return (req->offset - bs->write_threshold_offset) + req->bytes;
-        }
-        if ((req->offset + req->bytes) > bs->write_threshold_offset) {
-            return (req->offset + req->bytes) - bs->write_threshold_offset;
-        }
-    }
-    return 0;
-}
-
-static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
-                                            void *opaque)
-{
-    BdrvTrackedRequest *req = opaque;
-    BlockDriverState *bs = req->bs;
-    uint64_t amount = 0;
-
-    amount = bdrv_write_threshold_exceeded(bs, req);
-    if (amount > 0) {
-        qapi_event_send_block_write_threshold(
-            bs->node_name,
-            amount,
-            bs->write_threshold_offset);
-
-        /* autodisable to avoid flooding the monitor */
-        write_threshold_disable(bs);
-    }
-
-    return 0; /* should always let other notifiers run */
-}
-
-static void write_threshold_register_notifier(BlockDriverState *bs)
-{
-    bs->write_threshold_notifier.notify = before_write_notify;
-    bdrv_add_before_write_notifier(bs, &bs->write_threshold_notifier);
-}
-
-static void write_threshold_update(BlockDriverState *bs,
-                                   int64_t threshold_bytes)
-{
-    bs->write_threshold_offset = threshold_bytes;
-}
-
 void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes)
 {
-    if (bdrv_write_threshold_is_set(bs)) {
-        if (threshold_bytes > 0) {
-            write_threshold_update(bs, threshold_bytes);
-        } else {
-            write_threshold_disable(bs);
-        }
-    } else {
-        if (threshold_bytes > 0) {
-            /* avoid multiple registration */
-            write_threshold_register_notifier(bs);
-            write_threshold_update(bs, threshold_bytes);
-        }
-        /* discard bogus disable request */
-    }
+    bs->write_threshold_offset = threshold_bytes;
 }
 
 void qmp_block_set_write_threshold(const char *node_name,
@@ -122,3 +48,17 @@ void qmp_block_set_write_threshold(const char *node_name,
 
     aio_context_release(aio_context);
 }
+
+void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset,
+                                      int64_t bytes)
+{
+    int64_t end = offset + bytes;
+    uint64_t wtr = bs->write_threshold_offset;
+
+    if (wtr > 0 && end > wtr) {
+        qapi_event_send_block_write_threshold(bs->node_name, end - wtr, wtr);
+
+        /* autodisable to avoid flooding the monitor */
+        bdrv_write_threshold_set(bs, 0);
+    }
+}
index b264620b98d8c024b147872ce089e5371d66dd1d..213012435f4616f7c84bc436c10dab158994a368 100644 (file)
@@ -30,18 +30,23 @@ typedef struct NBDServerData {
 } NBDServerData;
 
 static NBDServerData *nbd_server;
-static bool is_qemu_nbd;
+static int qemu_nbd_connections = -1; /* Non-negative if this is qemu-nbd */
 
 static void nbd_update_server_watch(NBDServerData *s);
 
-void nbd_server_is_qemu_nbd(bool value)
+void nbd_server_is_qemu_nbd(int max_connections)
 {
-    is_qemu_nbd = value;
+    qemu_nbd_connections = max_connections;
 }
 
 bool nbd_server_is_running(void)
 {
-    return nbd_server || is_qemu_nbd;
+    return nbd_server || qemu_nbd_connections >= 0;
+}
+
+int nbd_server_max_connections(void)
+{
+    return nbd_server ? nbd_server->max_connections : qemu_nbd_connections;
 }
 
 static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
@@ -108,9 +113,9 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
         return NULL;
     }
 
-    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
-        error_setg(errp,
-                   "Expecting TLS credentials with a server endpoint");
+    if (!qcrypto_tls_creds_check_endpoint(creds,
+                                          QCRYPTO_TLS_CREDS_ENDPOINT_SERVER,
+                                          errp)) {
         return NULL;
     }
     object_ref(obj);
@@ -148,12 +153,6 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds,
         if (!nbd_server->tlscreds) {
             goto error;
         }
-
-        /* TODO SOCKET_ADDRESS_TYPE_FD where fd has AF_INET or AF_INET6 */
-        if (addr->type != SOCKET_ADDRESS_TYPE_INET) {
-            error_setg(errp, "TLS is only supported with IPv4/IPv6");
-            goto error;
-        }
     }
 
     nbd_server->tlsauthz = g_strdup(tls_authz);
@@ -174,8 +173,8 @@ void nbd_server_start_options(NbdServerOptions *arg, Error **errp)
 }
 
 void qmp_nbd_server_start(SocketAddressLegacy *addr,
-                          bool has_tls_creds, const char *tls_creds,
-                          bool has_tls_authz, const char *tls_authz,
+                          const char *tls_creds,
+                          const char *tls_authz,
                           bool has_max_connections, uint32_t max_connections,
                           Error **errp)
 {
@@ -201,8 +200,7 @@ void qmp_nbd_server_add(NbdServerAddOptions *arg, Error **errp)
      * block-export-add would default to the node-name, but we may have to use
      * the device name as a default here for compatibility.
      */
-    if (!arg->has_name) {
-        arg->has_name = true;
+    if (!arg->name) {
         arg->name = g_strdup(arg->device);
     }
 
@@ -216,9 +214,15 @@ void qmp_nbd_server_add(NbdServerAddOptions *arg, Error **errp)
     };
     QAPI_CLONE_MEMBERS(BlockExportOptionsNbdBase, &export_opts->u.nbd,
                        qapi_NbdServerAddOptions_base(arg));
-    if (arg->has_bitmap) {
+    if (arg->bitmap) {
+        BlockDirtyBitmapOrStr *el = g_new(BlockDirtyBitmapOrStr, 1);
+
+        *el = (BlockDirtyBitmapOrStr) {
+            .type = QTYPE_QSTRING,
+            .u.local = g_strdup(arg->bitmap),
+        };
         export_opts->u.nbd.has_bitmaps = true;
-        QAPI_LIST_PREPEND(export_opts->u.nbd.bitmaps, g_strdup(arg->bitmap));
+        QAPI_LIST_PREPEND(export_opts->u.nbd.bitmaps, el);
     }
 
     /*
index e4dfa65aa444346c3c09dbc6d1c5c811a12ac18f..1e49304745d0a005acc49edc5b2e63c8e6c8a3af 100644 (file)
@@ -35,6 +35,7 @@
 #include "sysemu/blockdev.h"
 #include "hw/block/block.h"
 #include "block/blockjob.h"
+#include "block/dirty-bitmap.h"
 #include "block/qdict.h"
 #include "block/throttle-groups.h"
 #include "monitor/monitor.h"
@@ -56,8 +57,6 @@
 #include "sysemu/iothread.h"
 #include "block/block_int.h"
 #include "block/trace.h"
-#include "sysemu/arch_init.h"
-#include "sysemu/qtest.h"
 #include "sysemu/runstate.h"
 #include "sysemu/replay.h"
 #include "qemu/cutils.h"
 #include "qemu/main-loop.h"
 #include "qemu/throttle-options.h"
 
+/* Protected by BQL */
 QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
     QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
 
 void bdrv_set_monitor_owned(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
     QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
 }
 
@@ -113,6 +114,8 @@ void override_max_devs(BlockInterfaceType type, int max_devs)
     BlockBackend *blk;
     DriveInfo *dinfo;
 
+    GLOBAL_STATE_CODE();
+
     if (max_devs <= 0) {
         return;
     }
@@ -142,20 +145,30 @@ void blockdev_mark_auto_del(BlockBackend *blk)
     DriveInfo *dinfo = blk_legacy_dinfo(blk);
     BlockJob *job;
 
+    GLOBAL_STATE_CODE();
+
     if (!dinfo) {
         return;
     }
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
-        if (block_job_has_bdrv(job, blk_bs(blk))) {
-            AioContext *aio_context = job->job.aio_context;
-            aio_context_acquire(aio_context);
-
-            job_cancel(&job->job, false);
+    JOB_LOCK_GUARD();
 
-            aio_context_release(aio_context);
+    do {
+        job = block_job_next_locked(NULL);
+        while (job && (job->job.cancelled ||
+                       job->job.deferred_to_main_loop ||
+                       !block_job_has_bdrv(job, blk_bs(blk))))
+        {
+            job = block_job_next_locked(job);
         }
-    }
+        if (job) {
+            /*
+             * This drops the job lock temporarily and polls, so we need to
+             * restart processing the list from the start after this.
+             */
+            job_cancel_locked(&job->job, false);
+        }
+    } while (job);
 
     dinfo->auto_del = 1;
 }
@@ -163,6 +176,7 @@ void blockdev_mark_auto_del(BlockBackend *blk)
 void blockdev_auto_del(BlockBackend *blk)
 {
     DriveInfo *dinfo = blk_legacy_dinfo(blk);
+    GLOBAL_STATE_CODE();
 
     if (dinfo && dinfo->auto_del) {
         monitor_remove_blk(blk);
@@ -170,23 +184,6 @@ void blockdev_auto_del(BlockBackend *blk)
     }
 }
 
-/**
- * Returns the current mapping of how many units per bus
- * a particular interface can support.
- *
- *  A positive integer indicates n units per bus.
- *  0 implies the mapping has not been established.
- * -1 indicates an invalid BlockInterfaceType was given.
- */
-int drive_get_max_devs(BlockInterfaceType type)
-{
-    if (type >= IF_IDE && type < IF_COUNT) {
-        return if_max_devs[type];
-    }
-
-    return -1;
-}
-
 static int drive_index_to_bus_id(BlockInterfaceType type, int index)
 {
     int max_devs = if_max_devs[type];
@@ -199,17 +196,14 @@ static int drive_index_to_unit_id(BlockInterfaceType type, int index)
     return max_devs ? index % max_devs : index;
 }
 
-QemuOpts *drive_def(const char *optstr)
-{
-    return qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
-}
-
 QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
                     const char *optstr)
 {
     QemuOpts *opts;
 
-    opts = drive_def(optstr);
+    GLOBAL_STATE_CODE();
+
+    opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
     if (!opts) {
         return NULL;
     }
@@ -229,6 +223,8 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
     BlockBackend *blk;
     DriveInfo *dinfo;
 
+    GLOBAL_STATE_CODE();
+
     for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
         dinfo = blk_legacy_dinfo(blk);
         if (dinfo && dinfo->type == type
@@ -240,19 +236,10 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
     return NULL;
 }
 
-void drive_mark_claimed_by_board(void)
-{
-    BlockBackend *blk;
-    DriveInfo *dinfo;
-
-    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
-        dinfo = blk_legacy_dinfo(blk);
-        if (dinfo && blk_get_attached_dev(blk)) {
-            dinfo->claimed_by_board = true;
-        }
-    }
-}
-
+/*
+ * Check board claimed all -drive that are meant to be claimed.
+ * Fatal error if any remain unclaimed.
+ */
 void drive_check_orphaned(void)
 {
     BlockBackend *blk;
@@ -260,9 +247,21 @@ void drive_check_orphaned(void)
     Location loc;
     bool orphans = false;
 
+    GLOBAL_STATE_CODE();
+
     for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
         dinfo = blk_legacy_dinfo(blk);
-        if (dinfo->is_default || dinfo->type == IF_NONE) {
+        /*
+         * Ignore default drives, because we create certain default
+         * drives unconditionally, then leave them unclaimed.  Not the
+         * users fault.
+         * Ignore IF_VIRTIO or IF_XEN, because it gets desugared into
+         * -device, so we can leave failing to -device.
+         * Ignore IF_NONE, because leaving unclaimed IF_NONE remains
+         * available for device_add is a feature.
+         */
+        if (dinfo->is_default || dinfo->type == IF_VIRTIO
+            || dinfo->type == IF_XEN || dinfo->type == IF_NONE) {
             continue;
         }
         if (!blk_get_attached_dev(blk)) {
@@ -273,14 +272,6 @@ void drive_check_orphaned(void)
                          if_name[dinfo->type], dinfo->bus, dinfo->unit);
             loc_pop(&loc);
             orphans = true;
-            continue;
-        }
-        if (!dinfo->claimed_by_board && dinfo->type != IF_VIRTIO) {
-            loc_push_none(&loc);
-            qemu_opts_loc_restore(dinfo->opts);
-            warn_report("bogus if=%s is deprecated, use if=none",
-                        if_name[dinfo->type]);
-            loc_pop(&loc);
         }
     }
 
@@ -291,6 +282,7 @@ void drive_check_orphaned(void)
 
 DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
 {
+    GLOBAL_STATE_CODE();
     return drive_get(type,
                      drive_index_to_bus_id(type, index),
                      drive_index_to_unit_id(type, index));
@@ -302,6 +294,8 @@ int drive_get_max_bus(BlockInterfaceType type)
     BlockBackend *blk;
     DriveInfo *dinfo;
 
+    GLOBAL_STATE_CODE();
+
     max_bus = -1;
     for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
         dinfo = blk_legacy_dinfo(blk);
@@ -312,16 +306,6 @@ int drive_get_max_bus(BlockInterfaceType type)
     return max_bus;
 }
 
-/* Get a block device.  This should only be used for single-drive devices
-   (e.g. SD/Floppy/MTD).  Multi-disk devices (scsi/ide) should use the
-   appropriate bus.  */
-DriveInfo *drive_get_next(BlockInterfaceType type)
-{
-    static int next_block_unit[IF_COUNT];
-
-    return drive_get(type, 0, next_block_unit[type]++);
-}
-
 static void bdrv_format_print(void *opaque, const char *name)
 {
     qemu_printf(" %s", name);
@@ -357,10 +341,10 @@ static bool parse_stats_intervals(BlockAcctStats *stats, QList *intervals,
         switch (qobject_type(entry->value)) {
 
         case QTYPE_QSTRING: {
-            unsigned long long length;
+            uint64_t length;
             const char *str = qstring_get_str(qobject_to(QString,
                                                          entry->value));
-            if (parse_uint_full(str, &length, 10) == 0 &&
+            if (parse_uint_full(str, 10, &length) == 0 &&
                 length > 0 && length <= UINT_MAX) {
                 block_acct_add_interval(stats, (unsigned) length);
             } else {
@@ -480,6 +464,17 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
     }
 }
 
+static OnOffAuto account_get_opt(QemuOpts *opts, const char *name)
+{
+    if (!qemu_opt_find(opts, name)) {
+        return ON_OFF_AUTO_AUTO;
+    }
+    if (qemu_opt_get_bool(opts, name, true)) {
+        return ON_OFF_AUTO_ON;
+    }
+    return ON_OFF_AUTO_OFF;
+}
+
 /* Takes the ownership of bs_opts */
 static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
                                    Error **errp)
@@ -487,7 +482,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
     const char *buf;
     int bdrv_flags = 0;
     int on_read_error, on_write_error;
-    bool account_invalid, account_failed;
+    OnOffAuto account_invalid, account_failed;
     bool writethrough, read_only;
     BlockBackend *blk;
     BlockDriverState *bs;
@@ -521,8 +516,8 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
     /* extract parameters */
     snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
 
-    account_invalid = qemu_opt_get_bool(opts, "stats-account-invalid", true);
-    account_failed = qemu_opt_get_bool(opts, "stats-account-failed", true);
+    account_invalid = account_get_opt(opts, "stats-account-invalid");
+    account_failed = account_get_opt(opts, "stats-account-failed");
 
     writethrough = !qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true);
 
@@ -591,8 +586,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
 
         blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
         blk_rs = blk_get_root_state(blk);
-        blk_rs->open_flags    = bdrv_flags;
-        blk_rs->read_only     = read_only;
+        blk_rs->open_flags    = bdrv_flags | (read_only ? 0 : BDRV_O_RDWR);
         blk_rs->detect_zeroes = detect_zeroes;
 
         qobject_unref(bs_opts);
@@ -668,8 +662,10 @@ err_no_opts:
 /* Takes the ownership of bs_opts */
 BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
 {
+    BlockDriverState *bs;
     int bdrv_flags = 0;
 
+    GLOBAL_STATE_CODE();
     /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
      * with other callers) rather than what we want as the real defaults.
      * Apply the defaults here instead. */
@@ -681,13 +677,18 @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
         bdrv_flags |= BDRV_O_INACTIVE;
     }
 
-    return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
+    aio_context_acquire(qemu_get_aio_context());
+    bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
+    aio_context_release(qemu_get_aio_context());
+
+    return bs;
 }
 
 void blockdev_close_all_bdrv_states(void)
 {
     BlockDriverState *bs, *next_bs;
 
+    GLOBAL_STATE_CODE();
     QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
@@ -700,6 +701,7 @@ void blockdev_close_all_bdrv_states(void)
 /* Iterates over the list of monitor-owned BlockDriverStates */
 BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs)
 {
+    GLOBAL_STATE_CODE();
     return bs ? QTAILQ_NEXT(bs, monitor_list)
               : QTAILQ_FIRST(&monitor_bdrv_states);
 }
@@ -796,6 +798,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
     const char *filename;
     int i;
 
+    GLOBAL_STATE_CODE();
+
     /* Change legacy command line options into QMP ones */
     static const struct {
         const char *from;
@@ -970,11 +974,16 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
         QemuOpts *devopts;
         devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
                                    &error_abort);
-        if (arch_type == QEMU_ARCH_S390X) {
-            qemu_opt_set(devopts, "driver", "virtio-blk-ccw", &error_abort);
-        } else {
-            qemu_opt_set(devopts, "driver", "virtio-blk-pci", &error_abort);
-        }
+        qemu_opt_set(devopts, "driver", "virtio-blk", &error_abort);
+        qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
+                     &error_abort);
+    } else if (type == IF_XEN) {
+        QemuOpts *devopts;
+        devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
+                                   &error_abort);
+        qemu_opt_set(devopts, "driver",
+                     (media == MEDIA_CDROM) ? "xen-cdrom" : "xen-disk",
+                     &error_abort);
         qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
                      &error_abort);
     }
@@ -1039,6 +1048,9 @@ fail:
 static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp)
 {
     BlockDriverState *bs;
+    AioContext *aio_context;
+
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
 
     bs = bdrv_lookup_bs(name, name, errp);
     if (bs == NULL) {
@@ -1050,11 +1062,16 @@ static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp)
         return NULL;
     }
 
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+
     if (!bdrv_is_inserted(bs)) {
         error_setg(errp, "Device has no medium");
-        return NULL;
+        bs = NULL;
     }
 
+    aio_context_release(aio_context);
+
     return bs;
 }
 
@@ -1064,26 +1081,20 @@ static void blockdev_do_action(TransactionAction *action, Error **errp)
 
     list.value = action;
     list.next = NULL;
-    qmp_transaction(&list, false, NULL, errp);
+    qmp_transaction(&list, NULL, errp);
 }
 
-void qmp_blockdev_snapshot_sync(bool has_device, const char *device,
-                                bool has_node_name, const char *node_name,
+void qmp_blockdev_snapshot_sync(const char *device, const char *node_name,
                                 const char *snapshot_file,
-                                bool has_snapshot_node_name,
                                 const char *snapshot_node_name,
-                                bool has_format, const char *format,
+                                const char *format,
                                 bool has_mode, NewImageMode mode, Error **errp)
 {
     BlockdevSnapshotSync snapshot = {
-        .has_device = has_device,
         .device = (char *) device,
-        .has_node_name = has_node_name,
         .node_name = (char *) node_name,
         .snapshot_file = (char *) snapshot_file,
-        .has_snapshot_node_name = has_snapshot_node_name,
         .snapshot_node_name = (char *) snapshot_node_name,
-        .has_format = has_format,
         .format = (char *) format,
         .has_mode = has_mode,
         .mode = mode,
@@ -1125,9 +1136,7 @@ void qmp_blockdev_snapshot_internal_sync(const char *device,
 }
 
 SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
-                                                         bool has_id,
                                                          const char *id,
-                                                         bool has_name,
                                                          const char *name,
                                                          Error **errp)
 {
@@ -1138,6 +1147,9 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
     SnapshotInfo *info = NULL;
     int ret;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     bs = qmp_get_root_bs(device, errp);
     if (!bs) {
         return NULL;
@@ -1145,14 +1157,6 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    if (!has_id) {
-        id = NULL;
-    }
-
-    if (!has_name) {
-        name = NULL;
-    }
-
     if (!id && !name) {
         error_setg(errp, "Name or id must be provided");
         goto out_aio_context;
@@ -1203,77 +1207,22 @@ out_aio_context:
     return NULL;
 }
 
-/* New and old BlockDriverState structs for atomic group operations */
-
-typedef struct BlkActionState BlkActionState;
-
-/**
- * BlkActionOps:
- * Table of operations that define an Action.
- *
- * @instance_size: Size of state struct, in bytes.
- * @prepare: Prepare the work, must NOT be NULL.
- * @commit: Commit the changes, can be NULL.
- * @abort: Abort the changes on fail, can be NULL.
- * @clean: Clean up resources after all transaction actions have called
- *         commit() or abort(). Can be NULL.
- *
- * Only prepare() may fail. In a single transaction, only one of commit() or
- * abort() will be called. clean() will always be called if it is present.
- */
-typedef struct BlkActionOps {
-    size_t instance_size;
-    void (*prepare)(BlkActionState *common, Error **errp);
-    void (*commit)(BlkActionState *common);
-    void (*abort)(BlkActionState *common);
-    void (*clean)(BlkActionState *common);
-} BlkActionOps;
-
-/**
- * BlkActionState:
- * Describes one Action's state within a Transaction.
- *
- * @action: QAPI-defined enum identifying which Action to perform.
- * @ops: Table of ActionOps this Action can perform.
- * @block_job_txn: Transaction which this action belongs to.
- * @entry: List membership for all Actions in this Transaction.
- *
- * This structure must be arranged as first member in a subclassed type,
- * assuming that the compiler will also arrange it to the same offsets as the
- * base class.
- */
-struct BlkActionState {
-    TransactionAction *action;
-    const BlkActionOps *ops;
-    JobTxn *block_job_txn;
-    TransactionProperties *txn_props;
-    QTAILQ_ENTRY(BlkActionState) entry;
-};
-
 /* internal snapshot private data */
 typedef struct InternalSnapshotState {
-    BlkActionState common;
     BlockDriverState *bs;
     QEMUSnapshotInfo sn;
     bool created;
 } InternalSnapshotState;
 
+static void internal_snapshot_abort(void *opaque);
+static void internal_snapshot_clean(void *opaque);
+TransactionActionDrv internal_snapshot_drv = {
+    .abort = internal_snapshot_abort,
+    .clean = internal_snapshot_clean,
+};
 
-static int action_check_completion_mode(BlkActionState *s, Error **errp)
-{
-    if (s->txn_props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
-        error_setg(errp,
-                   "Action '%s' does not support Transaction property "
-                   "completion-mode = %s",
-                   TransactionActionKind_str(s->action->type),
-                   ActionCompletionMode_str(s->txn_props->completion_mode));
-        return -1;
-    }
-    return 0;
-}
-
-static void internal_snapshot_prepare(BlkActionState *common,
-                                      Error **errp)
+static void internal_snapshot_action(BlockdevSnapshotInternal *internal,
+                                     Transaction *tran, Error **errp)
 {
     Error *local_err = NULL;
     const char *device;
@@ -1281,26 +1230,19 @@ static void internal_snapshot_prepare(BlkActionState *common,
     BlockDriverState *bs;
     QEMUSnapshotInfo old_sn, *sn;
     bool ret;
-    qemu_timeval tv;
-    BlockdevSnapshotInternal *internal;
-    InternalSnapshotState *state;
+    int64_t rt;
+    InternalSnapshotState *state = g_new0(InternalSnapshotState, 1);
     AioContext *aio_context;
     int ret1;
 
-    g_assert(common->action->type ==
-             TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC);
-    internal = common->action->u.blockdev_snapshot_internal_sync.data;
-    state = DO_UPCAST(InternalSnapshotState, common, common);
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    tran_add(tran, &internal_snapshot_drv, state);
 
-    /* 1. parse input */
     device = internal->device;
     name = internal->name;
 
-    /* 2. check for validation */
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
-
     bs = qmp_get_root_bs(device, errp);
     if (!bs) {
         return;
@@ -1351,9 +1293,9 @@ static void internal_snapshot_prepare(BlkActionState *common,
     /* 3. take the snapshot */
     sn = &state->sn;
     pstrcpy(sn->name, sizeof(sn->name), name);
-    qemu_gettimeofday(&tv);
-    sn->date_sec = tv.tv_sec;
-    sn->date_nsec = tv.tv_usec * 1000;
+    rt = g_get_real_time();
+    sn->date_sec = rt / G_USEC_PER_SEC;
+    sn->date_nsec = (rt % G_USEC_PER_SEC) * 1000;
     sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
     if (replay_mode != REPLAY_MODE_NONE) {
         sn->icount = replay_get_current_icount();
@@ -1376,15 +1318,17 @@ out:
     aio_context_release(aio_context);
 }
 
-static void internal_snapshot_abort(BlkActionState *common)
+static void internal_snapshot_abort(void *opaque)
 {
-    InternalSnapshotState *state =
-                             DO_UPCAST(InternalSnapshotState, common, common);
+    InternalSnapshotState *state = opaque;
     BlockDriverState *bs = state->bs;
     QEMUSnapshotInfo *sn = &state->sn;
     AioContext *aio_context;
     Error *local_error = NULL;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!state->created) {
         return;
     }
@@ -1403,10 +1347,9 @@ static void internal_snapshot_abort(BlkActionState *common)
     aio_context_release(aio_context);
 }
 
-static void internal_snapshot_clean(BlkActionState *common)
+static void internal_snapshot_clean(void *opaque)
 {
-    InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState,
-                                             common, common);
+    g_autofree InternalSnapshotState *state = opaque;
     AioContext *aio_context;
 
     if (!state->bs) {
@@ -1423,15 +1366,24 @@ static void internal_snapshot_clean(BlkActionState *common)
 
 /* external snapshot private data */
 typedef struct ExternalSnapshotState {
-    BlkActionState common;
     BlockDriverState *old_bs;
     BlockDriverState *new_bs;
     bool overlay_appended;
 } ExternalSnapshotState;
 
-static void external_snapshot_prepare(BlkActionState *common,
-                                      Error **errp)
+static void external_snapshot_commit(void *opaque);
+static void external_snapshot_abort(void *opaque);
+static void external_snapshot_clean(void *opaque);
+TransactionActionDrv external_snapshot_drv = {
+    .commit = external_snapshot_commit,
+    .abort = external_snapshot_abort,
+    .clean = external_snapshot_clean,
+};
+
+static void external_snapshot_action(TransactionAction *action,
+                                     Transaction *tran, Error **errp)
 {
+    int ret;
     int flags = 0;
     QDict *options = NULL;
     Error *local_err = NULL;
@@ -1442,12 +1394,15 @@ static void external_snapshot_prepare(BlkActionState *common,
     const char *snapshot_ref;
     /* File name of the new image (for 'blockdev-snapshot-sync') */
     const char *new_image_file;
-    ExternalSnapshotState *state =
-                             DO_UPCAST(ExternalSnapshotState, common, common);
-    TransactionAction *action = common->action;
+    ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1);
     AioContext *aio_context;
     uint64_t perm, shared;
 
+    /* TODO We'll eventually have to take a writer lock in this function */
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+    tran_add(tran, &external_snapshot_drv, state);
+
     /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
      * purpose but a different set of parameters */
     switch (action->type) {
@@ -1463,8 +1418,8 @@ static void external_snapshot_prepare(BlkActionState *common,
     case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
         {
             BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
-            device = s->has_device ? s->device : NULL;
-            node_name = s->has_node_name ? s->node_name : NULL;
+            device = s->device;
+            node_name = s->node_name;
             new_image_file = s->snapshot_file;
             snapshot_ref = NULL;
         }
@@ -1474,9 +1429,6 @@ static void external_snapshot_prepare(BlkActionState *common,
     }
 
     /* start processing */
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
 
     state->old_bs = bdrv_lookup_bs(device, node_name, errp);
     if (!state->old_bs) {
@@ -1508,19 +1460,18 @@ static void external_snapshot_prepare(BlkActionState *common,
 
     if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) {
         BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
-        const char *format = s->has_format ? s->format : "qcow2";
+        const char *format = s->format ?: "qcow2";
         enum NewImageMode mode;
-        const char *snapshot_node_name =
-            s->has_snapshot_node_name ? s->snapshot_node_name : NULL;
+        const char *snapshot_node_name = s->snapshot_node_name;
 
         if (node_name && !snapshot_node_name) {
-            error_setg(errp, "New overlay node name missing");
+            error_setg(errp, "New overlay node-name missing");
             goto out;
         }
 
         if (snapshot_node_name &&
             bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) {
-            error_setg(errp, "New overlay node name already in use");
+            error_setg(errp, "New overlay node-name already in use");
             goto out;
         }
 
@@ -1537,10 +1488,14 @@ static void external_snapshot_prepare(BlkActionState *common,
                 goto out;
             }
             bdrv_refresh_filename(state->old_bs);
+
+            aio_context_release(aio_context);
             bdrv_img_create(new_image_file, format,
                             state->old_bs->filename,
                             state->old_bs->drv->format_name,
                             NULL, size, flags, false, &local_err);
+            aio_context_acquire(aio_context);
+
             if (local_err) {
                 error_propagate(errp, local_err);
                 goto out;
@@ -1553,14 +1508,20 @@ static void external_snapshot_prepare(BlkActionState *common,
         }
         qdict_put_str(options, "driver", format);
     }
+    aio_context_release(aio_context);
 
+    aio_context_acquire(qemu_get_aio_context());
     state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags,
                               errp);
+    aio_context_release(qemu_get_aio_context());
+
     /* We will manually add the backing_hd field to the bs later */
     if (!state->new_bs) {
-        goto out;
+        return;
     }
 
+    aio_context_acquire(aio_context);
+
     /*
      * Allow attaching a backing file to an overlay that's already in use only
      * if the parents don't assume that they are already seeing a valid image.
@@ -1587,13 +1548,8 @@ static void external_snapshot_prepare(BlkActionState *common,
         goto out;
     }
 
-    /* This removes our old bs and adds the new bs. This is an operation that
-     * can fail, so we need to do it in .prepare; undoing it for abort is
-     * always possible. */
-    bdrv_ref(state->new_bs);
-    bdrv_append(state->new_bs, state->old_bs, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    ret = bdrv_append(state->new_bs, state->old_bs, errp);
+    if (ret < 0) {
         goto out;
     }
     state->overlay_appended = true;
@@ -1602,10 +1558,9 @@ out:
     aio_context_release(aio_context);
 }
 
-static void external_snapshot_commit(BlkActionState *common)
+static void external_snapshot_commit(void *opaque)
 {
-    ExternalSnapshotState *state =
-                             DO_UPCAST(ExternalSnapshotState, common, common);
+    ExternalSnapshotState *state = opaque;
     AioContext *aio_context;
 
     aio_context = bdrv_get_aio_context(state->old_bs);
@@ -1621,10 +1576,9 @@ static void external_snapshot_commit(BlkActionState *common)
     aio_context_release(aio_context);
 }
 
-static void external_snapshot_abort(BlkActionState *common)
+static void external_snapshot_abort(void *opaque)
 {
-    ExternalSnapshotState *state =
-                             DO_UPCAST(ExternalSnapshotState, common, common);
+    ExternalSnapshotState *state = opaque;
     if (state->new_bs) {
         if (state->overlay_appended) {
             AioContext *aio_context;
@@ -1648,15 +1602,20 @@ static void external_snapshot_abort(BlkActionState *common)
                 aio_context_release(aio_context);
                 aio_context_acquire(tmp_context);
 
-                ret = bdrv_try_set_aio_context(state->old_bs,
-                                               aio_context, NULL);
+                ret = bdrv_try_change_aio_context(state->old_bs,
+                                                  aio_context, NULL, NULL);
                 assert(ret == 0);
 
                 aio_context_release(tmp_context);
                 aio_context_acquire(aio_context);
             }
 
+            bdrv_drained_begin(state->new_bs);
+            bdrv_graph_wrlock(state->old_bs);
             bdrv_replace_node(state->new_bs, state->old_bs, &error_abort);
+            bdrv_graph_wrunlock(state->old_bs);
+            bdrv_drained_end(state->new_bs);
+
             bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */
 
             aio_context_release(aio_context);
@@ -1664,10 +1623,9 @@ static void external_snapshot_abort(BlkActionState *common)
     }
 }
 
-static void external_snapshot_clean(BlkActionState *common)
+static void external_snapshot_clean(void *opaque)
 {
-    ExternalSnapshotState *state =
-                             DO_UPCAST(ExternalSnapshotState, common, common);
+    g_autofree ExternalSnapshotState *state = opaque;
     AioContext *aio_context;
 
     if (!state->old_bs) {
@@ -1684,7 +1642,6 @@ static void external_snapshot_clean(BlkActionState *common)
 }
 
 typedef struct DriveBackupState {
-    BlkActionState common;
     BlockDriverState *bs;
     BlockJob *job;
 } DriveBackupState;
@@ -1695,15 +1652,26 @@ static BlockJob *do_backup_common(BackupCommon *backup,
                                   AioContext *aio_context,
                                   JobTxn *txn, Error **errp);
 
-static void drive_backup_prepare(BlkActionState *common, Error **errp)
+static void drive_backup_commit(void *opaque);
+static void drive_backup_abort(void *opaque);
+static void drive_backup_clean(void *opaque);
+TransactionActionDrv drive_backup_drv = {
+    .commit = drive_backup_commit,
+    .abort = drive_backup_abort,
+    .clean = drive_backup_clean,
+};
+
+static void drive_backup_action(DriveBackup *backup,
+                                JobTxn *block_job_txn,
+                                Transaction *tran, Error **errp)
 {
-    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
-    DriveBackup *backup;
+    DriveBackupState *state = g_new0(DriveBackupState, 1);
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     BlockDriverState *source = NULL;
     AioContext *aio_context;
     AioContext *old_context;
+    const char *format;
     QDict *options;
     Error *local_err = NULL;
     int flags;
@@ -1711,8 +1679,9 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
     bool set_backing_hd = false;
     int ret;
 
-    assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
-    backup = common->action->u.drive_backup.data;
+    GLOBAL_STATE_CODE();
+
+    tran_add(tran, &drive_backup_drv, state);
 
     if (!backup->has_mode) {
         backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@@ -1731,16 +1700,19 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
+    state->bs = bs;
     /* Paired with .clean() */
     bdrv_drained_begin(bs);
 
-    if (!backup->has_format) {
-        backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ?
-                         NULL : (char *) bs->drv->format_name;
+    format = backup->format;
+    if (!format && backup->mode != NEW_IMAGE_MODE_EXISTING) {
+        format = bs->drv->format_name;
     }
 
     /* Early check to avoid creating target */
+    bdrv_graph_rdlock_main_loop();
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
+        bdrv_graph_rdunlock_main_loop();
         goto out;
     }
 
@@ -1767,6 +1739,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
         flags |= BDRV_O_NO_BACKING;
         set_backing_hd = true;
     }
+    bdrv_graph_rdunlock_main_loop();
 
     size = bdrv_getlength(bs);
     if (size < 0) {
@@ -1775,19 +1748,22 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
     }
 
     if (backup->mode != NEW_IMAGE_MODE_EXISTING) {
-        assert(backup->format);
+        assert(format);
         if (source) {
             /* Implicit filters should not appear in the filename */
-            BlockDriverState *explicit_backing =
-                bdrv_skip_implicit_filters(source);
+            BlockDriverState *explicit_backing;
 
+            bdrv_graph_rdlock_main_loop();
+            explicit_backing = bdrv_skip_implicit_filters(source);
             bdrv_refresh_filename(explicit_backing);
-            bdrv_img_create(backup->target, backup->format,
+            bdrv_graph_rdunlock_main_loop();
+
+            bdrv_img_create(backup->target, format,
                             explicit_backing->filename,
                             explicit_backing->drv->format_name, NULL,
                             size, flags, false, &local_err);
         } else {
-            bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL,
+            bdrv_img_create(backup->target, format, NULL, NULL, NULL,
                             size, flags, false, &local_err);
         }
     }
@@ -1800,21 +1776,24 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
     options = qdict_new();
     qdict_put_str(options, "discard", "unmap");
     qdict_put_str(options, "detect-zeroes", "unmap");
-    if (backup->format) {
-        qdict_put_str(options, "driver", backup->format);
+    if (format) {
+        qdict_put_str(options, "driver", format);
     }
+    aio_context_release(aio_context);
 
+    aio_context_acquire(qemu_get_aio_context());
     target_bs = bdrv_open(backup->target, NULL, options, flags, errp);
+    aio_context_release(qemu_get_aio_context());
+
     if (!target_bs) {
-        goto out;
+        return;
     }
 
-    /* Honor bdrv_try_set_aio_context() context acquisition requirements. */
+    /* Honor bdrv_try_change_aio_context() context acquisition requirements. */
     old_context = bdrv_get_aio_context(target_bs);
-    aio_context_release(aio_context);
     aio_context_acquire(old_context);
 
-    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
     if (ret < 0) {
         bdrv_unref(target_bs);
         aio_context_release(old_context);
@@ -1825,17 +1804,14 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
     aio_context_acquire(aio_context);
 
     if (set_backing_hd) {
-        bdrv_set_backing_hd(target_bs, source, &local_err);
-        if (local_err) {
+        if (bdrv_set_backing_hd(target_bs, source, errp) < 0) {
             goto unref;
         }
     }
 
-    state->bs = bs;
-
     state->job = do_backup_common(qapi_DriveBackup_base(backup),
                                   bs, target_bs, aio_context,
-                                  common->block_job_txn, errp);
+                                  block_job_txn, errp);
 
 unref:
     bdrv_unref(target_bs);
@@ -1843,9 +1819,9 @@ out:
     aio_context_release(aio_context);
 }
 
-static void drive_backup_commit(BlkActionState *common)
+static void drive_backup_commit(void *opaque)
 {
-    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+    DriveBackupState *state = opaque;
     AioContext *aio_context;
 
     aio_context = bdrv_get_aio_context(state->bs);
@@ -1857,25 +1833,18 @@ static void drive_backup_commit(BlkActionState *common)
     aio_context_release(aio_context);
 }
 
-static void drive_backup_abort(BlkActionState *common)
+static void drive_backup_abort(void *opaque)
 {
-    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+    DriveBackupState *state = opaque;
 
     if (state->job) {
-        AioContext *aio_context;
-
-        aio_context = bdrv_get_aio_context(state->bs);
-        aio_context_acquire(aio_context);
-
-        job_cancel_sync(&state->job->job);
-
-        aio_context_release(aio_context);
+        job_cancel_sync(&state->job->job, true);
     }
 }
 
-static void drive_backup_clean(BlkActionState *common)
+static void drive_backup_clean(void *opaque)
 {
-    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+    g_autofree DriveBackupState *state = opaque;
     AioContext *aio_context;
 
     if (!state->bs) {
@@ -1891,23 +1860,31 @@ static void drive_backup_clean(BlkActionState *common)
 }
 
 typedef struct BlockdevBackupState {
-    BlkActionState common;
     BlockDriverState *bs;
     BlockJob *job;
 } BlockdevBackupState;
 
-static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
+static void blockdev_backup_commit(void *opaque);
+static void blockdev_backup_abort(void *opaque);
+static void blockdev_backup_clean(void *opaque);
+TransactionActionDrv blockdev_backup_drv = {
+    .commit = blockdev_backup_commit,
+    .abort = blockdev_backup_abort,
+    .clean = blockdev_backup_clean,
+};
+
+static void blockdev_backup_action(BlockdevBackup *backup,
+                                   JobTxn *block_job_txn,
+                                   Transaction *tran, Error **errp)
 {
-    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
-    BlockdevBackup *backup;
+    BlockdevBackupState *state = g_new0(BlockdevBackupState, 1);
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     AioContext *aio_context;
     AioContext *old_context;
     int ret;
 
-    assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
-    backup = common->action->u.blockdev_backup.data;
+    tran_add(tran, &blockdev_backup_drv, state);
 
     bs = bdrv_lookup_bs(backup->device, backup->device, errp);
     if (!bs) {
@@ -1919,12 +1896,12 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
         return;
     }
 
-    /* Honor bdrv_try_set_aio_context() context acquisition requirements. */
+    /* Honor bdrv_try_change_aio_context() context acquisition requirements. */
     aio_context = bdrv_get_aio_context(bs);
     old_context = bdrv_get_aio_context(target_bs);
     aio_context_acquire(old_context);
 
-    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
     if (ret < 0) {
         aio_context_release(old_context);
         return;
@@ -1939,14 +1916,14 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
 
     state->job = do_backup_common(qapi_BlockdevBackup_base(backup),
                                   bs, target_bs, aio_context,
-                                  common->block_job_txn, errp);
+                                  block_job_txn, errp);
 
     aio_context_release(aio_context);
 }
 
-static void blockdev_backup_commit(BlkActionState *common)
+static void blockdev_backup_commit(void *opaque)
 {
-    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    BlockdevBackupState *state = opaque;
     AioContext *aio_context;
 
     aio_context = bdrv_get_aio_context(state->bs);
@@ -1958,25 +1935,18 @@ static void blockdev_backup_commit(BlkActionState *common)
     aio_context_release(aio_context);
 }
 
-static void blockdev_backup_abort(BlkActionState *common)
+static void blockdev_backup_abort(void *opaque)
 {
-    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    BlockdevBackupState *state = opaque;
 
     if (state->job) {
-        AioContext *aio_context;
-
-        aio_context = bdrv_get_aio_context(state->bs);
-        aio_context_acquire(aio_context);
-
-        job_cancel_sync(&state->job->job);
-
-        aio_context_release(aio_context);
+        job_cancel_sync(&state->job->job, true);
     }
 }
 
-static void blockdev_backup_clean(BlkActionState *common)
+static void blockdev_backup_clean(void *opaque)
 {
-    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    g_autofree BlockdevBackupState *state = opaque;
     AioContext *aio_context;
 
     if (!state->bs) {
@@ -1992,27 +1962,26 @@ static void blockdev_backup_clean(BlkActionState *common)
 }
 
 typedef struct BlockDirtyBitmapState {
-    BlkActionState common;
     BdrvDirtyBitmap *bitmap;
     BlockDriverState *bs;
     HBitmap *backup;
-    bool prepared;
     bool was_enabled;
 } BlockDirtyBitmapState;
 
-static void block_dirty_bitmap_add_prepare(BlkActionState *common,
-                                           Error **errp)
+static void block_dirty_bitmap_add_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_add_drv = {
+    .abort = block_dirty_bitmap_add_abort,
+    .clean = g_free,
+};
+
+static void block_dirty_bitmap_add_action(BlockDirtyBitmapAdd *action,
+                                          Transaction *tran, Error **errp)
 {
     Error *local_err = NULL;
-    BlockDirtyBitmapAdd *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+    tran_add(tran, &block_dirty_bitmap_add_drv, state);
 
-    action = common->action->u.block_dirty_bitmap_add.data;
     /* AIO context taken and released within qmp_block_dirty_bitmap_add */
     qmp_block_dirty_bitmap_add(action->node, action->name,
                                action->has_granularity, action->granularity,
@@ -2021,39 +1990,37 @@ static void block_dirty_bitmap_add_prepare(BlkActionState *common,
                                &local_err);
 
     if (!local_err) {
-        state->prepared = true;
+        state->bitmap = block_dirty_bitmap_lookup(action->node, action->name,
+                                                  NULL, &error_abort);
     } else {
         error_propagate(errp, local_err);
     }
 }
 
-static void block_dirty_bitmap_add_abort(BlkActionState *common)
+static void block_dirty_bitmap_add_abort(void *opaque)
 {
-    BlockDirtyBitmapAdd *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
-    action = common->action->u.block_dirty_bitmap_add.data;
-    /* Should not be able to fail: IF the bitmap was added via .prepare(),
-     * then the node reference and bitmap name must have been valid.
-     */
-    if (state->prepared) {
-        qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort);
+    if (state->bitmap) {
+        bdrv_release_dirty_bitmap(state->bitmap);
     }
 }
 
-static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
-                                             Error **errp)
+static void block_dirty_bitmap_restore(void *opaque);
+static void block_dirty_bitmap_free_backup(void *opaque);
+TransactionActionDrv block_dirty_bitmap_clear_drv = {
+    .abort = block_dirty_bitmap_restore,
+    .commit = block_dirty_bitmap_free_backup,
+    .clean = g_free,
+};
+
+static void block_dirty_bitmap_clear_action(BlockDirtyBitmap *action,
+                                            Transaction *tran, Error **errp)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
-    BlockDirtyBitmap *action;
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+    tran_add(tran, &block_dirty_bitmap_clear_drv, state);
 
-    action = common->action->u.block_dirty_bitmap_clear.data;
     state->bitmap = block_dirty_bitmap_lookup(action->node,
                                               action->name,
                                               &state->bs,
@@ -2069,36 +2036,35 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
     bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
 }
 
-static void block_dirty_bitmap_restore(BlkActionState *common)
+static void block_dirty_bitmap_restore(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     if (state->backup) {
         bdrv_restore_dirty_bitmap(state->bitmap, state->backup);
     }
 }
 
-static void block_dirty_bitmap_free_backup(BlkActionState *common)
+static void block_dirty_bitmap_free_backup(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     hbitmap_free(state->backup);
 }
 
-static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
-                                              Error **errp)
+static void block_dirty_bitmap_enable_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_enable_drv = {
+    .abort = block_dirty_bitmap_enable_abort,
+    .clean = g_free,
+};
+
+static void block_dirty_bitmap_enable_action(BlockDirtyBitmap *action,
+                                             Transaction *tran, Error **errp)
 {
-    BlockDirtyBitmap *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+    tran_add(tran, &block_dirty_bitmap_enable_drv, state);
 
-    action = common->action->u.block_dirty_bitmap_enable.data;
     state->bitmap = block_dirty_bitmap_lookup(action->node,
                                               action->name,
                                               NULL,
@@ -2115,28 +2081,28 @@ static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
     bdrv_enable_dirty_bitmap(state->bitmap);
 }
 
-static void block_dirty_bitmap_enable_abort(BlkActionState *common)
+static void block_dirty_bitmap_enable_abort(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     if (!state->was_enabled) {
         bdrv_disable_dirty_bitmap(state->bitmap);
     }
 }
 
-static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
-                                               Error **errp)
+static void block_dirty_bitmap_disable_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_disable_drv = {
+    .abort = block_dirty_bitmap_disable_abort,
+    .clean = g_free,
+};
+
+static void block_dirty_bitmap_disable_action(BlockDirtyBitmap *action,
+                                              Transaction *tran, Error **errp)
 {
-    BlockDirtyBitmap *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+    tran_add(tran, &block_dirty_bitmap_disable_drv, state);
 
-    action = common->action->u.block_dirty_bitmap_disable.data;
     state->bitmap = block_dirty_bitmap_lookup(action->node,
                                               action->name,
                                               NULL,
@@ -2153,46 +2119,48 @@ static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
     bdrv_disable_dirty_bitmap(state->bitmap);
 }
 
-static void block_dirty_bitmap_disable_abort(BlkActionState *common)
+static void block_dirty_bitmap_disable_abort(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     if (state->was_enabled) {
         bdrv_enable_dirty_bitmap(state->bitmap);
     }
 }
 
-static void block_dirty_bitmap_merge_prepare(BlkActionState *common,
-                                             Error **errp)
-{
-    BlockDirtyBitmapMerge *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+TransactionActionDrv block_dirty_bitmap_merge_drv = {
+    .commit = block_dirty_bitmap_free_backup,
+    .abort = block_dirty_bitmap_restore,
+    .clean = g_free,
+};
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+static void block_dirty_bitmap_merge_action(BlockDirtyBitmapMerge *action,
+                                            Transaction *tran, Error **errp)
+{
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    action = common->action->u.block_dirty_bitmap_merge.data;
+    tran_add(tran, &block_dirty_bitmap_merge_drv, state);
 
     state->bitmap = block_dirty_bitmap_merge(action->node, action->target,
                                              action->bitmaps, &state->backup,
                                              errp);
 }
 
-static void block_dirty_bitmap_remove_prepare(BlkActionState *common,
-                                              Error **errp)
+static void block_dirty_bitmap_remove_commit(void *opaque);
+static void block_dirty_bitmap_remove_abort(void *opaque);
+TransactionActionDrv block_dirty_bitmap_remove_drv = {
+    .commit = block_dirty_bitmap_remove_commit,
+    .abort = block_dirty_bitmap_remove_abort,
+    .clean = g_free,
+};
+
+static void block_dirty_bitmap_remove_action(BlockDirtyBitmap *action,
+                                             Transaction *tran, Error **errp)
 {
-    BlockDirtyBitmap *action;
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1);
 
-    if (action_check_completion_mode(common, errp) < 0) {
-        return;
-    }
+    tran_add(tran, &block_dirty_bitmap_remove_drv, state);
 
-    action = common->action->u.block_dirty_bitmap_remove.data;
 
     state->bitmap = block_dirty_bitmap_remove(action->node, action->name,
                                               false, &state->bs, errp);
@@ -2202,10 +2170,9 @@ static void block_dirty_bitmap_remove_prepare(BlkActionState *common,
     }
 }
 
-static void block_dirty_bitmap_remove_abort(BlkActionState *common)
+static void block_dirty_bitmap_remove_abort(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     if (state->bitmap) {
         bdrv_dirty_bitmap_skip_store(state->bitmap, false);
@@ -2213,217 +2180,159 @@ static void block_dirty_bitmap_remove_abort(BlkActionState *common)
     }
 }
 
-static void block_dirty_bitmap_remove_commit(BlkActionState *common)
+static void block_dirty_bitmap_remove_commit(void *opaque)
 {
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
+    BlockDirtyBitmapState *state = opaque;
 
     bdrv_dirty_bitmap_set_busy(state->bitmap, false);
     bdrv_release_dirty_bitmap(state->bitmap);
 }
 
-static void abort_prepare(BlkActionState *common, Error **errp)
+static void abort_commit(void *opaque);
+TransactionActionDrv abort_drv = {
+    .commit = abort_commit,
+};
+
+static void abort_action(Transaction *tran, Error **errp)
 {
+    tran_add(tran, &abort_drv, NULL);
     error_setg(errp, "Transaction aborted using Abort action");
 }
 
-static void abort_commit(BlkActionState *common)
+static void abort_commit(void *opaque)
 {
     g_assert_not_reached(); /* this action never succeeds */
 }
 
-static const BlkActionOps actions[] = {
-    [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = {
-        .instance_size = sizeof(ExternalSnapshotState),
-        .prepare  = external_snapshot_prepare,
-        .commit   = external_snapshot_commit,
-        .abort = external_snapshot_abort,
-        .clean = external_snapshot_clean,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = {
-        .instance_size = sizeof(ExternalSnapshotState),
-        .prepare  = external_snapshot_prepare,
-        .commit   = external_snapshot_commit,
-        .abort = external_snapshot_abort,
-        .clean = external_snapshot_clean,
-    },
-    [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
-        .instance_size = sizeof(DriveBackupState),
-        .prepare = drive_backup_prepare,
-        .commit = drive_backup_commit,
-        .abort = drive_backup_abort,
-        .clean = drive_backup_clean,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
-        .instance_size = sizeof(BlockdevBackupState),
-        .prepare = blockdev_backup_prepare,
-        .commit = blockdev_backup_commit,
-        .abort = blockdev_backup_abort,
-        .clean = blockdev_backup_clean,
-    },
-    [TRANSACTION_ACTION_KIND_ABORT] = {
-        .instance_size = sizeof(BlkActionState),
-        .prepare = abort_prepare,
-        .commit = abort_commit,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC] = {
-        .instance_size = sizeof(InternalSnapshotState),
-        .prepare  = internal_snapshot_prepare,
-        .abort = internal_snapshot_abort,
-        .clean = internal_snapshot_clean,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_add_prepare,
-        .abort = block_dirty_bitmap_add_abort,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_clear_prepare,
-        .commit = block_dirty_bitmap_free_backup,
-        .abort = block_dirty_bitmap_restore,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_enable_prepare,
-        .abort = block_dirty_bitmap_enable_abort,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_disable_prepare,
-        .abort = block_dirty_bitmap_disable_abort,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_merge_prepare,
-        .commit = block_dirty_bitmap_free_backup,
-        .abort = block_dirty_bitmap_restore,
-    },
-    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = {
-        .instance_size = sizeof(BlockDirtyBitmapState),
-        .prepare = block_dirty_bitmap_remove_prepare,
-        .commit = block_dirty_bitmap_remove_commit,
-        .abort = block_dirty_bitmap_remove_abort,
-    },
-    /* Where are transactions for MIRROR, COMMIT and STREAM?
+static void transaction_action(TransactionAction *act, JobTxn *block_job_txn,
+                               Transaction *tran, Error **errp)
+{
+    switch (act->type) {
+    case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT:
+    case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
+        external_snapshot_action(act, tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_DRIVE_BACKUP:
+        drive_backup_action(act->u.drive_backup.data,
+                            block_job_txn, tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP:
+        blockdev_backup_action(act->u.blockdev_backup.data,
+                               block_job_txn, tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_ABORT:
+        abort_action(tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC:
+        internal_snapshot_action(act->u.blockdev_snapshot_internal_sync.data,
+                                 tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD:
+        block_dirty_bitmap_add_action(act->u.block_dirty_bitmap_add.data,
+                                      tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR:
+        block_dirty_bitmap_clear_action(act->u.block_dirty_bitmap_clear.data,
+                                        tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE:
+        block_dirty_bitmap_enable_action(act->u.block_dirty_bitmap_enable.data,
+                                         tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE:
+        block_dirty_bitmap_disable_action(
+                act->u.block_dirty_bitmap_disable.data, tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE:
+        block_dirty_bitmap_merge_action(act->u.block_dirty_bitmap_merge.data,
+                                        tran, errp);
+        return;
+    case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE:
+        block_dirty_bitmap_remove_action(act->u.block_dirty_bitmap_remove.data,
+                                         tran, errp);
+        return;
+    /*
+     * Where are transactions for MIRROR, COMMIT and STREAM?
      * Although these blockjobs use transaction callbacks like the backup job,
      * these jobs do not necessarily adhere to transaction semantics.
      * These jobs may not fully undo all of their actions on abort, nor do they
      * necessarily work in transactions with more than one job in them.
      */
-};
-
-/**
- * Allocate a TransactionProperties structure if necessary, and fill
- * that structure with desired defaults if they are unset.
- */
-static TransactionProperties *get_transaction_properties(
-    TransactionProperties *props)
-{
-    if (!props) {
-        props = g_new0(TransactionProperties, 1);
-    }
-
-    if (!props->has_completion_mode) {
-        props->has_completion_mode = true;
-        props->completion_mode = ACTION_COMPLETION_MODE_INDIVIDUAL;
-    }
-
-    return props;
+    case TRANSACTION_ACTION_KIND__MAX:
+    default:
+        g_assert_not_reached();
+    };
 }
 
+
 /*
  * 'Atomic' group operations.  The operations are performed as a set, and if
  * any fail then we roll back all operations in the group.
+ *
+ * Always run under BQL.
  */
-void qmp_transaction(TransactionActionList *dev_list,
-                     bool has_props,
-                     struct TransactionProperties *props,
+void qmp_transaction(TransactionActionList *actions,
+                     struct TransactionProperties *properties,
                      Error **errp)
 {
-    TransactionActionList *dev_entry = dev_list;
+    TransactionActionList *act;
     JobTxn *block_job_txn = NULL;
-    BlkActionState *state, *next;
     Error *local_err = NULL;
+    Transaction *tran;
+    ActionCompletionMode comp_mode =
+        properties ? properties->completion_mode :
+        ACTION_COMPLETION_MODE_INDIVIDUAL;
 
-    QTAILQ_HEAD(, BlkActionState) snap_bdrv_states;
-    QTAILQ_INIT(&snap_bdrv_states);
+    GLOBAL_STATE_CODE();
 
     /* Does this transaction get canceled as a group on failure?
      * If not, we don't really need to make a JobTxn.
      */
-    props = get_transaction_properties(props);
-    if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
+    if (comp_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
+        for (act = actions; act; act = act->next) {
+            TransactionActionKind type = act->value->type;
+
+            if (type != TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP &&
+                type != TRANSACTION_ACTION_KIND_DRIVE_BACKUP)
+            {
+                error_setg(errp,
+                           "Action '%s' does not support transaction property "
+                           "completion-mode = %s",
+                           TransactionActionKind_str(type),
+                           ActionCompletionMode_str(comp_mode));
+                return;
+            }
+        }
+
         block_job_txn = job_txn_new();
     }
 
     /* drain all i/o before any operations */
     bdrv_drain_all();
 
-    /* We don't do anything in this loop that commits us to the operations */
-    while (NULL != dev_entry) {
-        TransactionAction *dev_info = NULL;
-        const BlkActionOps *ops;
-
-        dev_info = dev_entry->value;
-        dev_entry = dev_entry->next;
-
-        assert(dev_info->type < ARRAY_SIZE(actions));
-
-        ops = &actions[dev_info->type];
-        assert(ops->instance_size > 0);
-
-        state = g_malloc0(ops->instance_size);
-        state->ops = ops;
-        state->action = dev_info;
-        state->block_job_txn = block_job_txn;
-        state->txn_props = props;
-        QTAILQ_INSERT_TAIL(&snap_bdrv_states, state, entry);
+    tran = tran_new();
 
-        state->ops->prepare(state, &local_err);
+    /* We don't do anything in this loop that commits us to the operations */
+    for (act = actions; act; act = act->next) {
+        transaction_action(act->value, block_job_txn, tran, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             goto delete_and_fail;
         }
     }
 
-    QTAILQ_FOREACH(state, &snap_bdrv_states, entry) {
-        if (state->ops->commit) {
-            state->ops->commit(state);
-        }
-    }
+    tran_commit(tran);
 
     /* success */
     goto exit;
 
 delete_and_fail:
     /* failure, and it is all-or-none; roll back all operations */
-    QTAILQ_FOREACH_REVERSE(state, &snap_bdrv_states, entry) {
-        if (state->ops->abort) {
-            state->ops->abort(state);
-        }
-    }
+    tran_abort(tran);
 exit:
-    QTAILQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) {
-        if (state->ops->clean) {
-            state->ops->clean(state);
-        }
-        g_free(state);
-    }
-    if (!has_props) {
-        qapi_free_TransactionProperties(props);
-    }
     job_txn_unref(block_job_txn);
 }
 
-void qmp_block_passwd(bool has_device, const char *device,
-                      bool has_node_name, const char *node_name,
-                      const char *password, Error **errp)
-{
-    error_setg(errp,
-               "Setting block passwords directly is no longer supported");
-}
-
 BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
                                                               const char *name,
                                                               Error **errp)
@@ -2449,8 +2358,7 @@ BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
     return ret;
 }
 
-void coroutine_fn qmp_block_resize(bool has_device, const char *device,
-                                   bool has_node_name, const char *node_name,
+void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
                                    int64_t size, Error **errp)
 {
     Error *local_err = NULL;
@@ -2458,9 +2366,7 @@ void coroutine_fn qmp_block_resize(bool has_device, const char *device,
     BlockDriverState *bs;
     AioContext *old_ctx;
 
-    bs = bdrv_lookup_bs(has_device ? device : NULL,
-                        has_node_name ? node_name : NULL,
-                        &local_err);
+    bs = bdrv_lookup_bs(device, node_name, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
@@ -2471,12 +2377,15 @@ void coroutine_fn qmp_block_resize(bool has_device, const char *device,
         return;
     }
 
+    bdrv_graph_co_rdlock();
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
         error_setg(errp, QERR_DEVICE_IN_USE, device);
+        bdrv_graph_co_rdunlock();
         return;
     }
+    bdrv_graph_co_rdunlock();
 
-    blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
+    blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
     if (!blk) {
         return;
     }
@@ -2486,32 +2395,55 @@ void coroutine_fn qmp_block_resize(bool has_device, const char *device,
     bdrv_co_unlock(bs);
 
     old_ctx = bdrv_co_enter(bs);
-    blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
+    blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
     bdrv_co_leave(bs, old_ctx);
 
     bdrv_co_lock(bs);
     bdrv_drained_end(bs);
-    blk_unref(blk);
     bdrv_co_unlock(bs);
+
+    blk_co_unref(blk);
 }
 
-void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
-                      bool has_base, const char *base,
-                      bool has_base_node, const char *base_node,
-                      bool has_backing_file, const char *backing_file,
+void qmp_block_stream(const char *job_id, const char *device,
+                      const char *base,
+                      const char *base_node,
+                      const char *backing_file,
+                      const char *bottom,
                       bool has_speed, int64_t speed,
                       bool has_on_error, BlockdevOnError on_error,
+                      const char *filter_node_name,
                       bool has_auto_finalize, bool auto_finalize,
                       bool has_auto_dismiss, bool auto_dismiss,
                       Error **errp)
 {
-    BlockDriverState *bs, *iter;
+    BlockDriverState *bs, *iter, *iter_end;
     BlockDriverState *base_bs = NULL;
+    BlockDriverState *bottom_bs = NULL;
     AioContext *aio_context;
     Error *local_err = NULL;
-    const char *base_name = NULL;
     int job_flags = JOB_DEFAULT;
 
+    GLOBAL_STATE_CODE();
+
+    if (base && base_node) {
+        error_setg(errp, "'base' and 'base-node' cannot be specified "
+                   "at the same time");
+        return;
+    }
+
+    if (base && bottom) {
+        error_setg(errp, "'base' and 'bottom' cannot be specified "
+                   "at the same time");
+        return;
+    }
+
+    if (bottom && base_node) {
+        error_setg(errp, "'bottom' and 'base-node' cannot be specified "
+                   "at the same time");
+        return;
+    }
+
     if (!has_on_error) {
         on_error = BLOCKDEV_ON_ERROR_REPORT;
     }
@@ -2524,57 +2456,74 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    if (has_base && has_base_node) {
-        error_setg(errp, "'base' and 'base-node' cannot be specified "
-                   "at the same time");
-        goto out;
-    }
-
-    if (has_base) {
+    bdrv_graph_rdlock_main_loop();
+    if (base) {
         base_bs = bdrv_find_backing_image(bs, base);
         if (base_bs == NULL) {
-            error_setg(errp, QERR_BASE_NOT_FOUND, base);
-            goto out;
+            error_setg(errp, "Can't find '%s' in the backing chain", base);
+            goto out_rdlock;
         }
         assert(bdrv_get_aio_context(base_bs) == aio_context);
-        base_name = base;
     }
 
-    if (has_base_node) {
+    if (base_node) {
         base_bs = bdrv_lookup_bs(NULL, base_node, errp);
         if (!base_bs) {
-            goto out;
+            goto out_rdlock;
         }
         if (bs == base_bs || !bdrv_chain_contains(bs, base_bs)) {
             error_setg(errp, "Node '%s' is not a backing image of '%s'",
                        base_node, device);
-            goto out;
+            goto out_rdlock;
         }
         assert(bdrv_get_aio_context(base_bs) == aio_context);
+
         bdrv_refresh_filename(base_bs);
-        base_name = base_bs->filename;
     }
 
-    /* Check for op blockers in the whole chain between bs and base */
-    for (iter = bs; iter && iter != base_bs;
+    if (bottom) {
+        bottom_bs = bdrv_lookup_bs(NULL, bottom, errp);
+        if (!bottom_bs) {
+            goto out_rdlock;
+        }
+        if (!bottom_bs->drv) {
+            error_setg(errp, "Node '%s' is not open", bottom);
+            goto out_rdlock;
+        }
+        if (bottom_bs->drv->is_filter) {
+            error_setg(errp, "Node '%s' is a filter, use a non-filter node "
+                       "as 'bottom'", bottom);
+            goto out_rdlock;
+        }
+        if (!bdrv_chain_contains(bs, bottom_bs)) {
+            error_setg(errp, "Node '%s' is not in a chain starting from '%s'",
+                       bottom, device);
+            goto out_rdlock;
+        }
+        assert(bdrv_get_aio_context(bottom_bs) == aio_context);
+    }
+
+    /*
+     * Check for op blockers in the whole chain between bs and base (or bottom)
+     */
+    iter_end = bottom ? bdrv_filter_or_cow_bs(bottom_bs) : base_bs;
+    for (iter = bs; iter && iter != iter_end;
          iter = bdrv_filter_or_cow_bs(iter))
     {
         if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) {
-            goto out;
+            goto out_rdlock;
         }
     }
+    bdrv_graph_rdunlock_main_loop();
 
     /* if we are streaming the entire chain, the result will have no backing
      * file, and specifying one is therefore an error */
-    if (base_bs == NULL && has_backing_file) {
+    if (!base_bs && backing_file) {
         error_setg(errp, "backing file specified, but streaming the "
                          "entire chain");
         goto out;
     }
 
-    /* backing_file string overrides base bs filename */
-    base_name = has_backing_file ? backing_file : base_name;
-
     if (has_auto_finalize && !auto_finalize) {
         job_flags |= JOB_MANUAL_FINALIZE;
     }
@@ -2582,8 +2531,9 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
         job_flags |= JOB_MANUAL_DISMISS;
     }
 
-    stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
-                 job_flags, has_speed ? speed : 0, on_error, &local_err);
+    stream_start(job_id, bs, base_bs, backing_file,
+                 bottom_bs, job_flags, has_speed ? speed : 0, on_error,
+                 filter_node_name, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         goto out;
@@ -2593,17 +2543,22 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
 
 out:
     aio_context_release(aio_context);
+    return;
+
+out_rdlock:
+    bdrv_graph_rdunlock_main_loop();
+    aio_context_release(aio_context);
 }
 
-void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
-                      bool has_base_node, const char *base_node,
-                      bool has_base, const char *base,
-                      bool has_top_node, const char *top_node,
-                      bool has_top, const char *top,
-                      bool has_backing_file, const char *backing_file,
+void qmp_block_commit(const char *job_id, const char *device,
+                      const char *base_node,
+                      const char *base,
+                      const char *top_node,
+                      const char *top,
+                      const char *backing_file,
                       bool has_speed, int64_t speed,
                       bool has_on_error, BlockdevOnError on_error,
-                      bool has_filter_node_name, const char *filter_node_name,
+                      const char *filter_node_name,
                       bool has_auto_finalize, bool auto_finalize,
                       bool has_auto_dismiss, bool auto_dismiss,
                       Error **errp)
@@ -2616,15 +2571,15 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
     int job_flags = JOB_DEFAULT;
     uint64_t top_perm, top_shared;
 
+    /* TODO We'll eventually have to take a writer lock in this function */
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!has_speed) {
         speed = 0;
     }
     if (!has_on_error) {
         on_error = BLOCKDEV_ON_ERROR_REPORT;
     }
-    if (!has_filter_node_name) {
-        filter_node_name = NULL;
-    }
     if (has_auto_finalize && !auto_finalize) {
         job_flags |= JOB_MANUAL_FINALIZE;
     }
@@ -2660,10 +2615,10 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
     /* default top_bs is the active layer */
     top_bs = bs;
 
-    if (has_top_node && has_top) {
+    if (top_node && top) {
         error_setg(errp, "'top-node' and 'top' are mutually exclusive");
         goto out;
-    } else if (has_top_node) {
+    } else if (top_node) {
         top_bs = bdrv_lookup_bs(NULL, top_node, errp);
         if (top_bs == NULL) {
             goto out;
@@ -2673,7 +2628,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                        top_node);
             goto out;
         }
-    } else if (has_top && top) {
+    } else if (top) {
         /* This strcmp() is just a shortcut, there is no need to
          * refresh @bs's filename.  If it mismatches,
          * bdrv_find_backing_image() will do the refresh and may still
@@ -2690,10 +2645,10 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
 
     assert(bdrv_get_aio_context(top_bs) == aio_context);
 
-    if (has_base_node && has_base) {
+    if (base_node && base) {
         error_setg(errp, "'base-node' and 'base' are mutually exclusive");
         goto out;
-    } else if (has_base_node) {
+    } else if (base_node) {
         base_bs = bdrv_lookup_bs(NULL, base_node, errp);
         if (base_bs == NULL) {
             goto out;
@@ -2703,15 +2658,18 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                        base_node);
             goto out;
         }
-    } else if (has_base && base) {
+    } else if (base) {
         base_bs = bdrv_find_backing_image(top_bs, base);
+        if (base_bs == NULL) {
+            error_setg(errp, "Can't find '%s' in the backing chain", base);
+            goto out;
+        }
     } else {
         base_bs = bdrv_find_base(top_bs);
-    }
-
-    if (base_bs == NULL) {
-        error_setg(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL");
-        goto out;
+        if (base_bs == NULL) {
+            error_setg(errp, "There is no backimg image");
+            goto out;
+        }
     }
 
     assert(bdrv_get_aio_context(base_bs) == aio_context);
@@ -2742,7 +2700,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
     if (top_perm & BLK_PERM_WRITE ||
         bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs))
     {
-        if (has_backing_file) {
+        if (backing_file) {
             if (bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs)) {
                 error_setg(errp, "'backing-file' specified,"
                                  " but 'top' is the active layer");
@@ -2752,7 +2710,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
             }
             goto out;
         }
-        if (!has_job_id) {
+        if (!job_id) {
             /*
              * Emulate here what block_job_create() does, because it
              * is possible that @bs != @top_bs (the block job should
@@ -2768,8 +2726,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
         if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
             goto out;
         }
-        commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, job_flags,
-                     speed, on_error, has_backing_file ? backing_file : NULL,
+        commit_start(job_id, bs, base_bs, top_bs, job_flags,
+                     speed, on_error, backing_file,
                      filter_node_name, &local_err);
     }
     if (local_err != NULL) {
@@ -2790,6 +2748,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
 {
     BlockJob *job = NULL;
     BdrvDirtyBitmap *bmap = NULL;
+    BackupPerf perf = { .max_workers = 64 };
     int job_flags = JOB_DEFAULT;
 
     if (!backup->has_speed) {
@@ -2801,9 +2760,6 @@ static BlockJob *do_backup_common(BackupCommon *backup,
     if (!backup->has_on_target_error) {
         backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT;
     }
-    if (!backup->has_job_id) {
-        backup->job_id = NULL;
-    }
     if (!backup->has_auto_finalize) {
         backup->auto_finalize = true;
     }
@@ -2814,10 +2770,22 @@ static BlockJob *do_backup_common(BackupCommon *backup,
         backup->compress = false;
     }
 
+    if (backup->x_perf) {
+        if (backup->x_perf->has_use_copy_range) {
+            perf.use_copy_range = backup->x_perf->use_copy_range;
+        }
+        if (backup->x_perf->has_max_workers) {
+            perf.max_workers = backup->x_perf->max_workers;
+        }
+        if (backup->x_perf->has_max_chunk) {
+            perf.max_chunk = backup->x_perf->max_chunk;
+        }
+    }
+
     if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
         (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) {
         /* done before desugaring 'incremental' to print the right message */
-        if (!backup->has_bitmap) {
+        if (!backup->bitmap) {
             error_setg(errp, "must provide a valid bitmap name for "
                        "'%s' sync mode", MirrorSyncMode_str(backup->sync));
             return NULL;
@@ -2838,7 +2806,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
         backup->bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
     }
 
-    if (backup->has_bitmap) {
+    if (backup->bitmap) {
         bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
         if (!bmap) {
             error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap);
@@ -2871,7 +2839,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
         }
     }
 
-    if (!backup->has_bitmap && backup->has_bitmap_mode) {
+    if (!backup->bitmap && backup->has_bitmap_mode) {
         error_setg(errp, "Cannot specify bitmap sync mode without a bitmap");
         return NULL;
     }
@@ -2887,6 +2855,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
                             backup->sync, bmap, backup->bitmap_mode,
                             backup->compress,
                             backup->filter_node_name,
+                            &perf,
                             backup->on_source_error,
                             backup->on_target_error,
                             job_flags, NULL, NULL, txn, errp);
@@ -2902,9 +2871,9 @@ void qmp_drive_backup(DriveBackup *backup, Error **errp)
     blockdev_do_action(&action, errp);
 }
 
-BlockDeviceInfoList *qmp_query_named_block_nodes(bool has_flat,
-                                                 bool flat,
-                                                 Error **errp)
+BlockDeviceInfoList *coroutine_fn qmp_query_named_block_nodes(bool has_flat,
+                                                              bool flat,
+                                                              Error **errp)
 {
     bool return_flat = has_flat && flat;
 
@@ -2913,6 +2882,8 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(bool has_flat,
 
 XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp)
 {
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     return bdrv_get_xdbg_block_graph(errp);
 }
 
@@ -2930,7 +2901,7 @@ void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp)
  **/
 static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
                                    BlockDriverState *target,
-                                   bool has_replaces, const char *replaces,
+                                   const char *replaces,
                                    enum MirrorSyncMode sync,
                                    BlockMirrorBackingMode backing_mode,
                                    bool zero_target,
@@ -2942,7 +2913,6 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
                                    bool has_on_target_error,
                                    BlockdevOnError on_target_error,
                                    bool has_unmap, bool unmap,
-                                   bool has_filter_node_name,
                                    const char *filter_node_name,
                                    bool has_copy_mode, MirrorCopyMode copy_mode,
                                    bool has_auto_finalize, bool auto_finalize,
@@ -2952,6 +2922,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
     BlockDriverState *unfiltered_bs;
     int job_flags = JOB_DEFAULT;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (!has_speed) {
         speed = 0;
     }
@@ -2970,9 +2943,6 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
     if (!has_unmap) {
         unmap = true;
     }
-    if (!has_filter_node_name) {
-        filter_node_name = NULL;
-    }
     if (!has_copy_mode) {
         copy_mode = MIRROR_COPY_MODE_BACKGROUND;
     }
@@ -2990,7 +2960,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
     }
     if (granularity & (granularity - 1)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
-                   "power of 2");
+                   "power of 2");
         return;
     }
 
@@ -3005,17 +2975,17 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
         sync = MIRROR_SYNC_MODE_FULL;
     }
 
-    if (!has_replaces) {
+    if (!replaces) {
         /* We want to mirror from @bs, but keep implicit filters on top */
         unfiltered_bs = bdrv_skip_implicit_filters(bs);
         if (unfiltered_bs != bs) {
             replaces = unfiltered_bs->node_name;
-            has_replaces = true;
         }
     }
 
-    if (has_replaces) {
+    if (replaces) {
         BlockDriverState *to_replace_bs;
+        AioContext *aio_context;
         AioContext *replace_aio_context;
         int64_t bs_size, replace_size;
 
@@ -3030,10 +3000,19 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
             return;
         }
 
+        aio_context = bdrv_get_aio_context(bs);
         replace_aio_context = bdrv_get_aio_context(to_replace_bs);
-        aio_context_acquire(replace_aio_context);
+        /*
+         * bdrv_getlength() is a co-wrapper and uses AIO_WAIT_WHILE. Be sure not
+         * to acquire the same AioContext twice.
+         */
+        if (replace_aio_context != aio_context) {
+            aio_context_acquire(replace_aio_context);
+        }
         replace_size = bdrv_getlength(to_replace_bs);
-        aio_context_release(replace_aio_context);
+        if (replace_aio_context != aio_context) {
+            aio_context_release(replace_aio_context);
+        }
 
         if (replace_size < 0) {
             error_setg_errno(errp, -replace_size,
@@ -3051,7 +3030,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
      * and will allow to check whether the node still exist at mirror completion
      */
     mirror_start(job_id, bs, target,
-                 has_replaces ? replaces : NULL, job_flags,
+                 replaces, job_flags,
                  speed, granularity, buf_size, sync, backing_mode, zero_target,
                  on_source_error, on_target_error, unmap, filter_node_name,
                  copy_mode, errp);
@@ -3078,7 +3057,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     }
 
     /* Early check to avoid creating target */
+    bdrv_graph_rdlock_main_loop();
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) {
+        bdrv_graph_rdunlock_main_loop();
         return;
     }
 
@@ -3089,7 +3070,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
         arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
     }
 
-    if (!arg->has_format) {
+    if (!arg->format) {
         format = (arg->mode == NEW_IMAGE_MODE_EXISTING
                   ? NULL : bs->drv->format_name);
     }
@@ -3102,6 +3083,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     if (arg->sync == MIRROR_SYNC_MODE_NONE) {
         target_backing_bs = bs;
     }
+    bdrv_graph_rdunlock_main_loop();
 
     size = bdrv_getlength(bs);
     if (size < 0) {
@@ -3109,8 +3091,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
         goto out;
     }
 
-    if (arg->has_replaces) {
-        if (!arg->has_node_name) {
+    if (arg->replaces) {
+        if (!arg->node_name) {
             error_setg(errp, "a node-name must be provided when replacing a"
                              " named node of the graph");
             goto out;
@@ -3134,16 +3116,21 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
         bdrv_img_create(arg->target, format,
                         NULL, NULL, NULL, size, flags, false, &local_err);
     } else {
-        /* Implicit filters should not appear in the filename */
-        BlockDriverState *explicit_backing =
-            bdrv_skip_implicit_filters(target_backing_bs);
+        BlockDriverState *explicit_backing;
 
         switch (arg->mode) {
         case NEW_IMAGE_MODE_EXISTING:
             break;
         case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
-            /* create new image with backing file */
+            /*
+             * Create new image with backing file.
+             * Implicit filters should not appear in the filename.
+             */
+            bdrv_graph_rdlock_main_loop();
+            explicit_backing = bdrv_skip_implicit_filters(target_backing_bs);
             bdrv_refresh_filename(explicit_backing);
+            bdrv_graph_rdunlock_main_loop();
+
             bdrv_img_create(arg->target, format,
                             explicit_backing->filename,
                             explicit_backing->drv->format_name,
@@ -3160,32 +3147,37 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     }
 
     options = qdict_new();
-    if (arg->has_node_name) {
+    if (arg->node_name) {
         qdict_put_str(options, "node-name", arg->node_name);
     }
     if (format) {
         qdict_put_str(options, "driver", format);
     }
+    aio_context_release(aio_context);
 
     /* Mirroring takes care of copy-on-write using the source's backing
      * file.
      */
+    aio_context_acquire(qemu_get_aio_context());
     target_bs = bdrv_open(arg->target, NULL, options, flags, errp);
+    aio_context_release(qemu_get_aio_context());
+
     if (!target_bs) {
-        goto out;
+        return;
     }
 
+    bdrv_graph_rdlock_main_loop();
     zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL &&
                    (arg->mode == NEW_IMAGE_MODE_EXISTING ||
                     !bdrv_has_zero_init(target_bs)));
+    bdrv_graph_rdunlock_main_loop();
 
 
-    /* Honor bdrv_try_set_aio_context() context acquisition requirements. */
+    /* Honor bdrv_try_change_aio_context() context acquisition requirements. */
     old_context = bdrv_get_aio_context(target_bs);
-    aio_context_release(aio_context);
     aio_context_acquire(old_context);
 
-    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
     if (ret < 0) {
         bdrv_unref(target_bs);
         aio_context_release(old_context);
@@ -3195,8 +3187,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     aio_context_release(old_context);
     aio_context_acquire(aio_context);
 
-    blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
-                           arg->has_replaces, arg->replaces, arg->sync,
+    blockdev_mirror_common(arg->job_id, bs, target_bs,
+                           arg->replaces, arg->sync,
                            backing_mode, zero_target,
                            arg->has_speed, arg->speed,
                            arg->has_granularity, arg->granularity,
@@ -3204,7 +3196,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
                            arg->has_on_source_error, arg->on_source_error,
                            arg->has_on_target_error, arg->on_target_error,
                            arg->has_unmap, arg->unmap,
-                           false, NULL,
+                           NULL,
                            arg->has_copy_mode, arg->copy_mode,
                            arg->has_auto_finalize, arg->auto_finalize,
                            arg->has_auto_dismiss, arg->auto_dismiss,
@@ -3214,9 +3206,9 @@ out:
     aio_context_release(aio_context);
 }
 
-void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
+void qmp_blockdev_mirror(const char *job_id,
                          const char *device, const char *target,
-                         bool has_replaces, const char *replaces,
+                         const char *replaces,
                          MirrorSyncMode sync,
                          bool has_speed, int64_t speed,
                          bool has_granularity, uint32_t granularity,
@@ -3225,7 +3217,6 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                          BlockdevOnError on_source_error,
                          bool has_on_target_error,
                          BlockdevOnError on_target_error,
-                         bool has_filter_node_name,
                          const char *filter_node_name,
                          bool has_copy_mode, MirrorCopyMode copy_mode,
                          bool has_auto_finalize, bool auto_finalize,
@@ -3252,12 +3243,12 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
 
     zero_target = (sync == MIRROR_SYNC_MODE_FULL);
 
-    /* Honor bdrv_try_set_aio_context() context acquisition requirements. */
+    /* Honor bdrv_try_change_aio_context() context acquisition requirements. */
     old_context = bdrv_get_aio_context(target_bs);
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(old_context);
 
-    ret = bdrv_try_set_aio_context(target_bs, aio_context, errp);
+    ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
 
     aio_context_release(old_context);
     aio_context_acquire(aio_context);
@@ -3266,15 +3257,14 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
         goto out;
     }
 
-    blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
-                           has_replaces, replaces, sync, backing_mode,
+    blockdev_mirror_common(job_id, bs, target_bs,
+                           replaces, sync, backing_mode,
                            zero_target, has_speed, speed,
                            has_granularity, granularity,
                            has_buf_size, buf_size,
                            has_on_source_error, on_source_error,
                            has_on_target_error, on_target_error,
-                           true, true,
-                           has_filter_node_name, filter_node_name,
+                           true, true, filter_node_name,
                            has_copy_mode, copy_mode,
                            has_auto_finalize, auto_finalize,
                            has_auto_dismiss, auto_dismiss,
@@ -3283,17 +3273,16 @@ out:
     aio_context_release(aio_context);
 }
 
-/* Get a block job using its ID and acquire its AioContext */
-static BlockJob *find_block_job(const char *id, AioContext **aio_context,
-                                Error **errp)
+/*
+ * Get a block job using its ID. Called with job_mutex held.
+ */
+static BlockJob *find_block_job_locked(const char *id, Error **errp)
 {
     BlockJob *job;
 
     assert(id != NULL);
 
-    *aio_context = NULL;
-
-    job = block_job_get(id);
+    job = block_job_get_locked(id);
 
     if (!job) {
         error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
@@ -3301,30 +3290,30 @@ static BlockJob *find_block_job(const char *id, AioContext **aio_context,
         return NULL;
     }
 
-    *aio_context = blk_get_aio_context(job->blk);
-    aio_context_acquire(*aio_context);
-
     return job;
 }
 
 void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
 {
-    AioContext *aio_context;
-    BlockJob *job = find_block_job(device, &aio_context, errp);
+    BlockJob *job;
+
+    JOB_LOCK_GUARD();
+    job = find_block_job_locked(device, errp);
 
     if (!job) {
         return;
     }
 
-    block_job_set_speed(job, speed, errp);
-    aio_context_release(aio_context);
+    block_job_set_speed_locked(job, speed, errp);
 }
 
 void qmp_block_job_cancel(const char *device,
                           bool has_force, bool force, Error **errp)
 {
-    AioContext *aio_context;
-    BlockJob *job = find_block_job(device, &aio_context, errp);
+    BlockJob *job;
+
+    JOB_LOCK_GUARD();
+    job = find_block_job_locked(device, errp);
 
     if (!job) {
         return;
@@ -3334,97 +3323,108 @@ void qmp_block_job_cancel(const char *device,
         force = false;
     }
 
-    if (job_user_paused(&job->job) && !force) {
+    if (job_user_paused_locked(&job->job) && !force) {
         error_setg(errp, "The block job for device '%s' is currently paused",
                    device);
-        goto out;
+        return;
     }
 
     trace_qmp_block_job_cancel(job);
-    job_user_cancel(&job->job, force, errp);
-out:
-    aio_context_release(aio_context);
+    job_user_cancel_locked(&job->job, force, errp);
 }
 
 void qmp_block_job_pause(const char *device, Error **errp)
 {
-    AioContext *aio_context;
-    BlockJob *job = find_block_job(device, &aio_context, errp);
+    BlockJob *job;
+
+    JOB_LOCK_GUARD();
+    job = find_block_job_locked(device, errp);
 
     if (!job) {
         return;
     }
 
     trace_qmp_block_job_pause(job);
-    job_user_pause(&job->job, errp);
-    aio_context_release(aio_context);
+    job_user_pause_locked(&job->job, errp);
 }
 
 void qmp_block_job_resume(const char *device, Error **errp)
 {
-    AioContext *aio_context;
-    BlockJob *job = find_block_job(device, &aio_context, errp);
+    BlockJob *job;
+
+    JOB_LOCK_GUARD();
+    job = find_block_job_locked(device, errp);
 
     if (!job) {
         return;
     }
 
     trace_qmp_block_job_resume(job);
-    job_user_resume(&job->job, errp);
-    aio_context_release(aio_context);
+    job_user_resume_locked(&job->job, errp);
 }
 
 void qmp_block_job_complete(const char *device, Error **errp)
 {
-    AioContext *aio_context;
-    BlockJob *job = find_block_job(device, &aio_context, errp);
+    BlockJob *job;
+
+    JOB_LOCK_GUARD();
+    job = find_block_job_locked(device, errp);
 
     if (!job) {
         return;
     }
 
     trace_qmp_block_job_complete(job);
-    job_complete(&job->job, errp);
-    aio_context_release(aio_context);
+    job_complete_locked(&job->job, errp);
 }
 
 void qmp_block_job_finalize(const char *id, Error **errp)
 {
-    AioContext *aio_context;
-    BlockJob *job = find_block_job(id, &aio_context, errp);
+    BlockJob *job;
+
+    JOB_LOCK_GUARD();
+    job = find_block_job_locked(id, errp);
 
     if (!job) {
         return;
     }
 
     trace_qmp_block_job_finalize(job);
-    job_ref(&job->job);
-    job_finalize(&job->job, errp);
+    job_ref_locked(&job->job);
+    job_finalize_locked(&job->job, errp);
 
-    /*
-     * Job's context might have changed via job_finalize (and job_txn_apply
-     * automatically acquires the new one), so make sure we release the correct
-     * one.
-     */
-    aio_context = blk_get_aio_context(job->blk);
-    job_unref(&job->job);
-    aio_context_release(aio_context);
+    job_unref_locked(&job->job);
 }
 
 void qmp_block_job_dismiss(const char *id, Error **errp)
 {
-    AioContext *aio_context;
-    BlockJob *bjob = find_block_job(id, &aio_context, errp);
+    BlockJob *bjob;
     Job *job;
 
+    JOB_LOCK_GUARD();
+    bjob = find_block_job_locked(id, errp);
+
     if (!bjob) {
         return;
     }
 
     trace_qmp_block_job_dismiss(bjob);
     job = &bjob->job;
-    job_dismiss(&job, errp);
-    aio_context_release(aio_context);
+    job_dismiss_locked(&job, errp);
+}
+
+void qmp_block_job_change(BlockJobChangeOptions *opts, Error **errp)
+{
+    BlockJob *job;
+
+    JOB_LOCK_GUARD();
+    job = find_block_job_locked(opts->id, errp);
+
+    if (!job) {
+        return;
+    }
+
+    block_job_change_locked(job, opts, errp);
 }
 
 void qmp_change_backing_file(const char *device,
@@ -3447,35 +3447,38 @@ void qmp_change_backing_file(const char *device,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
+    bdrv_graph_rdlock_main_loop();
+
     image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
-        goto out;
+        goto out_rdlock;
     }
 
     if (!image_bs) {
         error_setg(errp, "image file not found");
-        goto out;
+        goto out_rdlock;
     }
 
     if (bdrv_find_base(image_bs) == image_bs) {
         error_setg(errp, "not allowing backing file change on an image "
                          "without a backing file");
-        goto out;
+        goto out_rdlock;
     }
 
     /* even though we are not necessarily operating on bs, we need it to
      * determine if block ops are currently prohibited on the chain */
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_CHANGE, errp)) {
-        goto out;
+        goto out_rdlock;
     }
 
     /* final sanity check */
     if (!bdrv_chain_contains(bs, image_bs)) {
         error_setg(errp, "'%s' and image file are not in the same chain",
                    device);
-        goto out;
+        goto out_rdlock;
     }
+    bdrv_graph_rdunlock_main_loop();
 
     /* if not r/w, reopen to make r/w */
     ro = bdrv_is_read_only(image_bs);
@@ -3503,6 +3506,11 @@ void qmp_change_backing_file(const char *device,
 
 out:
     aio_context_release(aio_context);
+    return;
+
+out_rdlock:
+    bdrv_graph_rdunlock_main_loop();
+    aio_context_release(aio_context);
 }
 
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
@@ -3534,45 +3542,56 @@ fail:
     visit_free(v);
 }
 
-void qmp_x_blockdev_reopen(BlockdevOptions *options, Error **errp)
+void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
 {
-    BlockDriverState *bs;
-    AioContext *ctx;
-    QObject *obj;
-    Visitor *v = qobject_output_visitor_new(&obj);
-    BlockReopenQueue *queue;
-    QDict *qdict;
+    BlockReopenQueue *queue = NULL;
 
-    /* Check for the selected node name */
-    if (!options->has_node_name) {
-        error_setg(errp, "Node name not specified");
-        goto fail;
-    }
+    /* Add each one of the BDS that we want to reopen to the queue */
+    for (; reopen_list != NULL; reopen_list = reopen_list->next) {
+        BlockdevOptions *options = reopen_list->value;
+        BlockDriverState *bs;
+        AioContext *ctx;
+        QObject *obj;
+        Visitor *v;
+        QDict *qdict;
 
-    bs = bdrv_find_node(options->node_name);
-    if (!bs) {
-        error_setg(errp, "Cannot find node named '%s'", options->node_name);
-        goto fail;
-    }
+        /* Check for the selected node name */
+        if (!options->node_name) {
+            error_setg(errp, "node-name not specified");
+            goto fail;
+        }
 
-    /* Put all options in a QDict and flatten it */
-    visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
-    visit_complete(v, &obj);
-    qdict = qobject_to(QDict, obj);
+        bs = bdrv_find_node(options->node_name);
+        if (!bs) {
+            error_setg(errp, "Failed to find node with node-name='%s'",
+                       options->node_name);
+            goto fail;
+        }
 
-    qdict_flatten(qdict);
+        /* Put all options in a QDict and flatten it */
+        v = qobject_output_visitor_new(&obj);
+        visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
+        visit_complete(v, &obj);
+        visit_free(v);
+
+        qdict = qobject_to(QDict, obj);
+
+        qdict_flatten(qdict);
+
+        ctx = bdrv_get_aio_context(bs);
+        aio_context_acquire(ctx);
+
+        queue = bdrv_reopen_queue(queue, bs, qdict, false);
+
+        aio_context_release(ctx);
+    }
 
     /* Perform the reopen operation */
-    ctx = bdrv_get_aio_context(bs);
-    aio_context_acquire(ctx);
-    bdrv_subtree_drained_begin(bs);
-    queue = bdrv_reopen_queue(NULL, bs, qdict, false);
     bdrv_reopen_multiple(queue, errp);
-    bdrv_subtree_drained_end(bs);
-    aio_context_release(ctx);
+    queue = NULL;
 
 fail:
-    visit_free(v);
+    bdrv_reopen_queue_free(queue);
 }
 
 void qmp_blockdev_del(const char *node_name, Error **errp)
@@ -3580,9 +3599,12 @@ void qmp_blockdev_del(const char *node_name, Error **errp)
     AioContext *aio_context;
     BlockDriverState *bs;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     bs = bdrv_find_node(node_name);
     if (!bs) {
-        error_setg(errp, "Cannot find node %s", node_name);
+        error_setg(errp, "Failed to find node with node-name='%s'", node_name);
         return;
     }
     if (bdrv_has_blk(bs)) {
@@ -3615,8 +3637,8 @@ out:
     aio_context_release(aio_context);
 }
 
-static BdrvChild *bdrv_find_child(BlockDriverState *parent_bs,
-                                  const char *child_name)
+static BdrvChild * GRAPH_RDLOCK
+bdrv_find_child(BlockDriverState *parent_bs, const char *child_name)
 {
     BdrvChild *child;
 
@@ -3629,71 +3651,71 @@ static BdrvChild *bdrv_find_child(BlockDriverState *parent_bs,
     return NULL;
 }
 
-void qmp_x_blockdev_change(const char *parent, bool has_child,
-                           const char *child, bool has_node,
+void qmp_x_blockdev_change(const char *parent, const char *child,
                            const char *node, Error **errp)
 {
     BlockDriverState *parent_bs, *new_bs = NULL;
     BdrvChild *p_child;
 
+    bdrv_graph_wrlock(NULL);
+
     parent_bs = bdrv_lookup_bs(parent, parent, errp);
     if (!parent_bs) {
-        return;
+        goto out;
     }
 
-    if (has_child == has_node) {
-        if (has_child) {
+    if (!child == !node) {
+        if (child) {
             error_setg(errp, "The parameters child and node are in conflict");
         } else {
             error_setg(errp, "Either child or node must be specified");
         }
-        return;
+        goto out;
     }
 
-    if (has_child) {
+    if (child) {
         p_child = bdrv_find_child(parent_bs, child);
         if (!p_child) {
             error_setg(errp, "Node '%s' does not have child '%s'",
                        parent, child);
-            return;
+            goto out;
         }
         bdrv_del_child(parent_bs, p_child, errp);
     }
 
-    if (has_node) {
+    if (node) {
         new_bs = bdrv_find_node(node);
         if (!new_bs) {
             error_setg(errp, "Node '%s' not found", node);
-            return;
+            goto out;
         }
         bdrv_add_child(parent_bs, new_bs, errp);
     }
+
+out:
+    bdrv_graph_wrunlock(NULL);
 }
 
 BlockJobInfoList *qmp_query_block_jobs(Error **errp)
 {
-    BlockJobInfoList *head = NULL, **p_next = &head;
+    BlockJobInfoList *head = NULL, **tail = &head;
     BlockJob *job;
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
-        BlockJobInfoList *elem;
-        AioContext *aio_context;
+    JOB_LOCK_GUARD();
+
+    for (job = block_job_next_locked(NULL); job;
+         job = block_job_next_locked(job)) {
+        BlockJobInfo *value;
 
         if (block_job_is_internal(job)) {
             continue;
         }
-        elem = g_new0(BlockJobInfoList, 1);
-        aio_context = blk_get_aio_context(job->blk);
-        aio_context_acquire(aio_context);
-        elem->value = block_job_query(job, errp);
-        aio_context_release(aio_context);
-        if (!elem->value) {
-            g_free(elem);
+        value = block_job_query_locked(job, errp);
+        if (!value) {
             qapi_free_BlockJobInfoList(head);
             return NULL;
         }
-        *p_next = elem;
-        p_next = &elem->next;
+        QAPI_LIST_APPEND(tail, value);
     }
 
     return head;
@@ -3706,9 +3728,11 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
     AioContext *new_context;
     BlockDriverState *bs;
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     bs = bdrv_find_node(node_name);
     if (!bs) {
-        error_setg(errp, "Cannot find node %s", node_name);
+        error_setg(errp, "Failed to find node with node-name='%s'", node_name);
         return;
     }
 
@@ -3735,7 +3759,7 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
     old_context = bdrv_get_aio_context(bs);
     aio_context_acquire(old_context);
 
-    bdrv_try_set_aio_context(bs, new_context, errp);
+    bdrv_try_change_aio_context(bs, new_context, NULL, errp);
 
     aio_context_release(old_context);
 }
index 62f1a537399f07a88ebf520eb560a0cc3da7592c..b7a29052b94fcb1a8a2ffb33ac8ac23962bd5c7d 100644 (file)
@@ -24,6 +24,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "block/aio-wait.h"
 #include "block/block.h"
 #include "block/blockjob_int.h"
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-block-core.h"
 #include "qapi/qmp/qerror.h"
-#include "qemu/coroutine.h"
 #include "qemu/main-loop.h"
 #include "qemu/timer.h"
 
-/*
- * The block job API is composed of two categories of functions.
- *
- * The first includes functions used by the monitor.  The monitor is
- * peculiar in that it accesses the block job list with block_job_get, and
- * therefore needs consistency across block_job_get and the actual operation
- * (e.g. block_job_set_speed).  The consistency is achieved with
- * aio_context_acquire/release.  These functions are declared in blockjob.h.
- *
- * The second includes functions used by the block job drivers and sometimes
- * by the core block layer.  These do not care about locking, because the
- * whole coroutine runs under the AioContext lock, and are declared in
- * blockjob_int.h.
- */
-
 static bool is_block_job(Job *job)
 {
     return job_type(job) == JOB_TYPE_BACKUP ||
@@ -59,20 +44,22 @@ static bool is_block_job(Job *job)
            job_type(job) == JOB_TYPE_STREAM;
 }
 
-BlockJob *block_job_next(BlockJob *bjob)
+BlockJob *block_job_next_locked(BlockJob *bjob)
 {
     Job *job = bjob ? &bjob->job : NULL;
+    GLOBAL_STATE_CODE();
 
     do {
-        job = job_next(job);
+        job = job_next_locked(job);
     } while (job && !is_block_job(job));
 
     return job ? container_of(job, BlockJob, job) : NULL;
 }
 
-BlockJob *block_job_get(const char *id)
+BlockJob *block_job_get_locked(const char *id)
 {
-    Job *job = job_get(id);
+    Job *job = job_get_locked(id);
+    GLOBAL_STATE_CODE();
 
     if (job && is_block_job(job)) {
         return container_of(job, BlockJob, job);
@@ -81,12 +68,19 @@ BlockJob *block_job_get(const char *id)
     }
 }
 
+BlockJob *block_job_get(const char *id)
+{
+    JOB_LOCK_GUARD();
+    return block_job_get_locked(id);
+}
+
 void block_job_free(Job *job)
 {
     BlockJob *bjob = container_of(job, BlockJob, job);
+    GLOBAL_STATE_CODE();
 
     block_job_remove_all_bdrv(bjob);
-    blk_unref(bjob->blk);
+    ratelimit_destroy(&bjob->limit);
     error_free(bjob->blocker);
 }
 
@@ -111,8 +105,10 @@ static bool child_job_drained_poll(BdrvChild *c)
     /* An inactive or completed job doesn't have any pending requests. Jobs
      * with !job->busy are either already paused or have a pause point after
      * being reentered, so no job driver code will run before they pause. */
-    if (!job->busy || job_is_completed(job)) {
-        return false;
+    WITH_JOB_LOCK_GUARD() {
+        if (!job->busy || job_is_completed_locked(job)) {
+            return false;
+        }
     }
 
     /* Otherwise, assume that it isn't fully stopped yet, but allow the job to
@@ -124,43 +120,63 @@ static bool child_job_drained_poll(BdrvChild *c)
     }
 }
 
-static void child_job_drained_end(BdrvChild *c, int *drained_end_counter)
+static void child_job_drained_end(BdrvChild *c)
 {
     BlockJob *job = c->opaque;
     job_resume(&job->job);
 }
 
-static bool child_job_can_set_aio_ctx(BdrvChild *c, AioContext *ctx,
-                                      GSList **ignore, Error **errp)
+typedef struct BdrvStateChildJobContext {
+    AioContext *new_ctx;
+    BlockJob *job;
+} BdrvStateChildJobContext;
+
+static void child_job_set_aio_ctx_commit(void *opaque)
+{
+    BdrvStateChildJobContext *s = opaque;
+    BlockJob *job = s->job;
+
+    job_set_aio_context(&job->job, s->new_ctx);
+}
+
+static TransactionActionDrv change_child_job_context = {
+    .commit = child_job_set_aio_ctx_commit,
+    .clean = g_free,
+};
+
+static bool child_job_change_aio_ctx(BdrvChild *c, AioContext *ctx,
+                                     GHashTable *visited, Transaction *tran,
+                                     Error **errp)
 {
     BlockJob *job = c->opaque;
+    BdrvStateChildJobContext *s;
     GSList *l;
 
     for (l = job->nodes; l; l = l->next) {
         BdrvChild *sibling = l->data;
-        if (!bdrv_child_can_set_aio_context(sibling, ctx, ignore, errp)) {
+        if (!bdrv_child_change_aio_context(sibling, ctx, visited,
+                                           tran, errp)) {
             return false;
         }
     }
+
+    s = g_new(BdrvStateChildJobContext, 1);
+    *s = (BdrvStateChildJobContext) {
+        .new_ctx = ctx,
+        .job = job,
+    };
+
+    tran_add(tran, &change_child_job_context, s);
     return true;
 }
 
-static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx,
-                                  GSList **ignore)
+static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
 {
     BlockJob *job = c->opaque;
-    GSList *l;
-
-    for (l = job->nodes; l; l = l->next) {
-        BdrvChild *sibling = l->data;
-        if (g_slist_find(*ignore, sibling)) {
-            continue;
-        }
-        *ignore = g_slist_prepend(*ignore, sibling);
-        bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore);
-    }
+    IO_CODE();
+    JOB_LOCK_GUARD();
 
-    job->job.aio_context = ctx;
+    return job->job.aio_context;
 }
 
 static const BdrvChildClass child_job = {
@@ -168,19 +184,23 @@ static const BdrvChildClass child_job = {
     .drained_begin      = child_job_drained_begin,
     .drained_poll       = child_job_drained_poll,
     .drained_end        = child_job_drained_end,
-    .can_set_aio_ctx    = child_job_can_set_aio_ctx,
-    .set_aio_ctx        = child_job_set_aio_ctx,
+    .change_aio_ctx     = child_job_change_aio_ctx,
     .stay_at_node       = true,
+    .get_parent_aio_context = child_job_get_parent_aio_context,
 };
 
 void block_job_remove_all_bdrv(BlockJob *job)
 {
+    GLOBAL_STATE_CODE();
     /*
      * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(),
      * which will also traverse job->nodes, so consume the list one by
      * one to make sure that such a concurrent access does not attempt
      * to process an already freed BdrvChild.
      */
+    aio_context_release(job->job.aio_context);
+    bdrv_graph_wrlock(NULL);
+    aio_context_acquire(job->job.aio_context);
     while (job->nodes) {
         GSList *l = job->nodes;
         BdrvChild *c = l->data;
@@ -192,11 +212,13 @@ void block_job_remove_all_bdrv(BlockJob *job)
 
         g_slist_free_1(l);
     }
+    bdrv_graph_wrunlock_ctx(job->job.aio_context);
 }
 
 bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs)
 {
     GSList *el;
+    GLOBAL_STATE_CODE();
 
     for (el = job->nodes; el; el = el->next) {
         BdrvChild *c = el->data;
@@ -212,20 +234,27 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
                        uint64_t perm, uint64_t shared_perm, Error **errp)
 {
     BdrvChild *c;
+    AioContext *ctx = bdrv_get_aio_context(bs);
     bool need_context_ops;
+    GLOBAL_STATE_CODE();
 
     bdrv_ref(bs);
 
-    need_context_ops = bdrv_get_aio_context(bs) != job->job.aio_context;
+    need_context_ops = ctx != job->job.aio_context;
 
-    if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
-        aio_context_release(job->job.aio_context);
+    if (need_context_ops) {
+        if (job->job.aio_context != qemu_get_aio_context()) {
+            aio_context_release(job->job.aio_context);
+        }
+        aio_context_acquire(ctx);
     }
-    c = bdrv_root_attach_child(bs, name, &child_job, 0,
-                               job->job.aio_context, perm, shared_perm, job,
+    c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job,
                                errp);
-    if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
-        aio_context_acquire(job->job.aio_context);
+    if (need_context_ops) {
+        aio_context_release(ctx);
+        if (job->job.aio_context != qemu_get_aio_context()) {
+            aio_context_acquire(job->job.aio_context);
+        }
     }
     if (c == NULL) {
         return -EPERM;
@@ -237,7 +266,8 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
     return 0;
 }
 
-static void block_job_on_idle(Notifier *n, void *opaque)
+/* Called with job_mutex lock held. */
+static void block_job_on_idle_locked(Notifier *n, void *opaque)
 {
     aio_wait_kick();
 }
@@ -258,66 +288,136 @@ static bool job_timer_pending(Job *job)
     return timer_pending(&job->sleep_timer);
 }
 
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
+bool block_job_set_speed_locked(BlockJob *job, int64_t speed, Error **errp)
 {
+    const BlockJobDriver *drv = block_job_driver(job);
     int64_t old_speed = job->speed;
 
-    if (job_apply_verb(&job->job, JOB_VERB_SET_SPEED, errp)) {
-        return;
+    GLOBAL_STATE_CODE();
+
+    if (job_apply_verb_locked(&job->job, JOB_VERB_SET_SPEED, errp) < 0) {
+        return false;
     }
     if (speed < 0) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "speed",
                    "a non-negative value");
-        return;
+        return false;
     }
 
     ratelimit_set_speed(&job->limit, speed, BLOCK_JOB_SLICE_TIME);
 
     job->speed = speed;
+
+    if (drv->set_speed) {
+        job_unlock();
+        drv->set_speed(job, speed);
+        job_lock();
+    }
+
     if (speed && speed <= old_speed) {
-        return;
+        return true;
     }
 
     /* kick only if a timer is pending */
-    job_enter_cond(&job->job, job_timer_pending);
+    job_enter_cond_locked(&job->job, job_timer_pending);
+
+    return true;
+}
+
+static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+    JOB_LOCK_GUARD();
+    return block_job_set_speed_locked(job, speed, errp);
 }
 
-int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n)
+void block_job_change_locked(BlockJob *job, BlockJobChangeOptions *opts,
+                             Error **errp)
 {
-    if (!job->speed) {
-        return 0;
+    const BlockJobDriver *drv = block_job_driver(job);
+
+    GLOBAL_STATE_CODE();
+
+    if (job_apply_verb_locked(&job->job, JOB_VERB_CHANGE, errp)) {
+        return;
+    }
+
+    if (drv->change) {
+        job_unlock();
+        drv->change(job, opts, errp);
+        job_lock();
+    } else {
+        error_setg(errp, "Job type does not support change");
     }
+}
 
-    return ratelimit_calculate_delay(&job->limit, n);
+void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n)
+{
+    IO_CODE();
+    ratelimit_calculate_delay(&job->limit, n);
 }
 
-BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
+void block_job_ratelimit_sleep(BlockJob *job)
+{
+    uint64_t delay_ns;
+
+    /*
+     * Sleep at least once. If the job is reentered early, keep waiting until
+     * we've waited for the full time that is necessary to keep the job at the
+     * right speed.
+     *
+     * Make sure to recalculate the delay after each (possibly interrupted)
+     * sleep because the speed can change while the job has yielded.
+     */
+    do {
+        delay_ns = ratelimit_calculate_delay(&job->limit, 0);
+        job_sleep_ns(&job->job, delay_ns);
+    } while (delay_ns && !job_is_cancelled(&job->job));
+}
+
+BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp)
 {
     BlockJobInfo *info;
+    uint64_t progress_current, progress_total;
+    const BlockJobDriver *drv = block_job_driver(job);
+
+    GLOBAL_STATE_CODE();
 
     if (block_job_is_internal(job)) {
         error_setg(errp, "Cannot query QEMU internal jobs");
         return NULL;
     }
+
+    progress_get_snapshot(&job->job.progress, &progress_current,
+                          &progress_total);
+
     info = g_new0(BlockJobInfo, 1);
-    info->type      = g_strdup(job_type_str(&job->job));
+    info->type      = job_type(&job->job);
     info->device    = g_strdup(job->job.id);
-    info->busy      = qatomic_read(&job->job.busy);
+    info->busy      = job->job.busy;
     info->paused    = job->job.pause_count > 0;
-    info->offset    = job->job.progress.current;
-    info->len       = job->job.progress.total;
+    info->offset    = progress_current;
+    info->len       = progress_total;
     info->speed     = job->speed;
     info->io_status = job->iostatus;
-    info->ready     = job_is_ready(&job->job),
+    info->ready     = job_is_ready_locked(&job->job),
     info->status    = job->job.status;
     info->auto_finalize = job->job.auto_finalize;
     info->auto_dismiss  = job->job.auto_dismiss;
-    info->has_error = job->job.ret != 0;
-    info->error     = job->job.ret ? g_strdup(strerror(-job->job.ret)) : NULL;
+    if (job->job.ret) {
+        info->error = job->job.err ?
+                        g_strdup(error_get_pretty(job->job.err)) :
+                        g_strdup(strerror(-job->job.ret));
+    }
+    if (drv->query) {
+        job_unlock();
+        drv->query(job, info);
+        job_lock();
+    }
     return info;
 }
 
-static void block_job_iostatus_set_err(BlockJob *job, int error)
+/* Called with job lock held */
+static void block_job_iostatus_set_err_locked(BlockJob *job, int error)
 {
     if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
         job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
@@ -325,44 +425,54 @@ static void block_job_iostatus_set_err(BlockJob *job, int error)
     }
 }
 
-static void block_job_event_cancelled(Notifier *n, void *opaque)
+/* Called with job_mutex lock held. */
+static void block_job_event_cancelled_locked(Notifier *n, void *opaque)
 {
     BlockJob *job = opaque;
+    uint64_t progress_current, progress_total;
 
     if (block_job_is_internal(job)) {
         return;
     }
 
+    progress_get_snapshot(&job->job.progress, &progress_current,
+                          &progress_total);
+
     qapi_event_send_block_job_cancelled(job_type(&job->job),
                                         job->job.id,
-                                        job->job.progress.total,
-                                        job->job.progress.current,
+                                        progress_total,
+                                        progress_current,
                                         job->speed);
 }
 
-static void block_job_event_completed(Notifier *n, void *opaque)
+/* Called with job_mutex lock held. */
+static void block_job_event_completed_locked(Notifier *n, void *opaque)
 {
     BlockJob *job = opaque;
     const char *msg = NULL;
+    uint64_t progress_current, progress_total;
 
     if (block_job_is_internal(job)) {
         return;
     }
 
     if (job->job.ret < 0) {
-        msg = strerror(-job->job.ret);
+        msg = error_get_pretty(job->job.err);
     }
 
+    progress_get_snapshot(&job->job.progress, &progress_current,
+                          &progress_total);
+
     qapi_event_send_block_job_completed(job_type(&job->job),
                                         job->job.id,
-                                        job->job.progress.total,
-                                        job->job.progress.current,
+                                        progress_total,
+                                        progress_current,
                                         job->speed,
-                                        !!msg,
                                         msg);
 }
 
-static void block_job_event_pending(Notifier *n, void *opaque)
+/* Called with job_mutex lock held. */
+static void block_job_event_pending_locked(Notifier *n, void *opaque)
 {
     BlockJob *job = opaque;
 
@@ -374,48 +484,46 @@ static void block_job_event_pending(Notifier *n, void *opaque)
                                       job->job.id);
 }
 
-static void block_job_event_ready(Notifier *n, void *opaque)
+/* Called with job_mutex lock held. */
+static void block_job_event_ready_locked(Notifier *n, void *opaque)
 {
     BlockJob *job = opaque;
+    uint64_t progress_current, progress_total;
 
     if (block_job_is_internal(job)) {
         return;
     }
 
+    progress_get_snapshot(&job->job.progress, &progress_current,
+                          &progress_total);
+
     qapi_event_send_block_job_ready(job_type(&job->job),
                                     job->job.id,
-                                    job->job.progress.total,
-                                    job->job.progress.current,
+                                    progress_total,
+                                    progress_current,
                                     job->speed);
 }
 
 
-/*
- * API for block job drivers and the block layer.  These functions are
- * declared in blockjob_int.h.
- */
-
 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
                        JobTxn *txn, BlockDriverState *bs, uint64_t perm,
                        uint64_t shared_perm, int64_t speed, int flags,
                        BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
-    BlockBackend *blk;
     BlockJob *job;
+    int ret;
+    GLOBAL_STATE_CODE();
+
+    bdrv_graph_wrlock(bs);
 
     if (job_id == NULL && !(flags & JOB_INTERNAL)) {
         job_id = bdrv_get_device_name(bs);
     }
 
-    blk = blk_new_with_bs(bs, perm, shared_perm, errp);
-    if (!blk) {
-        return NULL;
-    }
-
-    job = job_create(job_id, &driver->job_driver, txn, blk_get_aio_context(blk),
+    job = job_create(job_id, &driver->job_driver, txn, bdrv_get_aio_context(bs),
                      flags, cb, opaque, errp);
     if (job == NULL) {
-        blk_unref(blk);
+        bdrv_graph_wrunlock(bs);
         return NULL;
     }
 
@@ -423,50 +531,50 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     assert(job->job.driver->free == &block_job_free);
     assert(job->job.driver->user_resume == &block_job_user_resume);
 
-    job->blk = blk;
-
-    job->finalize_cancelled_notifier.notify = block_job_event_cancelled;
-    job->finalize_completed_notifier.notify = block_job_event_completed;
-    job->pending_notifier.notify = block_job_event_pending;
-    job->ready_notifier.notify = block_job_event_ready;
-    job->idle_notifier.notify = block_job_on_idle;
-
-    notifier_list_add(&job->job.on_finalize_cancelled,
-                      &job->finalize_cancelled_notifier);
-    notifier_list_add(&job->job.on_finalize_completed,
-                      &job->finalize_completed_notifier);
-    notifier_list_add(&job->job.on_pending, &job->pending_notifier);
-    notifier_list_add(&job->job.on_ready, &job->ready_notifier);
-    notifier_list_add(&job->job.on_idle, &job->idle_notifier);
+    ratelimit_init(&job->limit);
+
+    job->finalize_cancelled_notifier.notify = block_job_event_cancelled_locked;
+    job->finalize_completed_notifier.notify = block_job_event_completed_locked;
+    job->pending_notifier.notify = block_job_event_pending_locked;
+    job->ready_notifier.notify = block_job_event_ready_locked;
+    job->idle_notifier.notify = block_job_on_idle_locked;
+
+    WITH_JOB_LOCK_GUARD() {
+        notifier_list_add(&job->job.on_finalize_cancelled,
+                          &job->finalize_cancelled_notifier);
+        notifier_list_add(&job->job.on_finalize_completed,
+                          &job->finalize_completed_notifier);
+        notifier_list_add(&job->job.on_pending, &job->pending_notifier);
+        notifier_list_add(&job->job.on_ready, &job->ready_notifier);
+        notifier_list_add(&job->job.on_idle, &job->idle_notifier);
+    }
 
     error_setg(&job->blocker, "block device is in use by block job: %s",
                job_type_str(&job->job));
-    block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
-
-    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
 
-    /* Disable request queuing in the BlockBackend to avoid deadlocks on drain:
-     * The job reports that it's busy until it reaches a pause point. */
-    blk_set_disable_request_queuing(blk, true);
-    blk_set_allow_aio_context_change(blk, true);
+    ret = block_job_add_bdrv(job, "main node", bs, perm, shared_perm, errp);
+    if (ret < 0) {
+        goto fail;
+    }
 
-    /* Only set speed when necessary to avoid NotSupported error */
-    if (speed != 0) {
-        Error *local_err = NULL;
+    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
 
-        block_job_set_speed(job, speed, &local_err);
-        if (local_err) {
-            job_early_fail(&job->job);
-            error_propagate(errp, local_err);
-            return NULL;
-        }
+    if (!block_job_set_speed(job, speed, errp)) {
+        goto fail;
     }
 
+    bdrv_graph_wrunlock(bs);
     return job;
+
+fail:
+    bdrv_graph_wrunlock(bs);
+    job_early_fail(&job->job);
+    return NULL;
 }
 
-void block_job_iostatus_reset(BlockJob *job)
+void block_job_iostatus_reset_locked(BlockJob *job)
 {
+    GLOBAL_STATE_CODE();
     if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
         return;
     }
@@ -474,9 +582,16 @@ void block_job_iostatus_reset(BlockJob *job)
     job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
 }
 
+static void block_job_iostatus_reset(BlockJob *job)
+{
+    JOB_LOCK_GUARD();
+    block_job_iostatus_reset_locked(job);
+}
+
 void block_job_user_resume(Job *job)
 {
     BlockJob *bjob = container_of(job, BlockJob, job);
+    GLOBAL_STATE_CODE();
     block_job_iostatus_reset(bjob);
 }
 
@@ -484,6 +599,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
                                         int is_read, int error)
 {
     BlockErrorAction action;
+    IO_CODE();
 
     switch (on_err) {
     case BLOCKDEV_ON_ERROR_ENOSPC:
@@ -510,12 +626,23 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
                                         action);
     }
     if (action == BLOCK_ERROR_ACTION_STOP) {
-        if (!job->job.user_paused) {
-            job_pause(&job->job);
-            /* make the pause user visible, which will be resumed from QMP. */
-            job->job.user_paused = true;
+        WITH_JOB_LOCK_GUARD() {
+            if (!job->job.user_paused) {
+                job_pause_locked(&job->job);
+                /*
+                 * make the pause user visible, which will be
+                 * resumed from QMP.
+                 */
+                job->job.user_paused = true;
+            }
+            block_job_iostatus_set_err_locked(job, error);
         }
-        block_job_iostatus_set_err(job, error);
     }
     return action;
 }
+
+AioContext *block_job_get_aio_context(BlockJob *job)
+{
+    GLOBAL_STATE_CODE();
+    return job->job.aio_context;
+}
index 9d64e368de876bc36566a8c263a3b63f33e42e94..2280399ef5a576c933329f9eba0e51eca573700b 100755 (executable)
--- a/configure
+++ b/configure
@@ -4,9 +4,8 @@
 #
 
 # Unset some variables known to interfere with behavior of common tools,
-# just as autoconf does.
-CLICOLOR_FORCE= GREP_OPTIONS=
-unset CLICOLOR_FORCE GREP_OPTIONS
+# just as autoconf does.  Unlike autoconf, we assume that unset exists.
+unset CLICOLOR_FORCE GREP_OPTIONS BASH_ENV ENV MAIL MAILPATH CDPATH
 
 # Don't allow CCACHE, if present, to use cached results of compile tests!
 export CCACHE_RECACHE=yes
@@ -31,19 +30,18 @@ then
         fi
     fi
 
-    mkdir build
-    touch $MARKER
+    if ! mkdir build || ! touch $MARKER
+    then
+        echo "ERROR: Could not create ./build directory. Check the permissions on"
+        echo "your source directory, or try doing an out-of-tree build."
+        exit 1
+    fi
 
     cat > GNUmakefile <<'EOF'
 # This file is auto-generated by configure to support in-source tree
 # 'make' command invocation
 
-ifeq ($(MAKECMDGOALS),)
-recurse: all
-endif
-
-.NOTPARALLEL: %
-%: force
+build:
        @echo 'changing dir to build for $(MAKE) "$(MAKECMDGOALS)"...'
        @$(MAKE) -C build -f Makefile $(MAKECMDGOALS)
        @if test "$(MAKECMDGOALS)" = "distclean" && \
@@ -51,13 +49,14 @@ endif
        then \
            rm -rf build GNUmakefile ; \
        fi
-force: ;
-.PHONY: force
+%: build
+       @
+.PHONY: build
 GNUmakefile: ;
 
 EOF
     cd build
-    exec $source_path/configure "$@"
+    exec "$source_path/configure" "$@"
 fi
 
 # Temporary directory used for files created while
@@ -67,8 +66,7 @@ fi
 # it when configure exits.)
 TMPDIR1="config-temp"
 rm -rf "${TMPDIR1}"
-mkdir -p "${TMPDIR1}"
-if [ $? -ne 0 ]; then
+if ! mkdir -p "${TMPDIR1}"; then
     echo "ERROR: failed to create temporary directory"
     exit 1
 fi
@@ -76,20 +74,23 @@ fi
 TMPB="qemu-conf"
 TMPC="${TMPDIR1}/${TMPB}.c"
 TMPO="${TMPDIR1}/${TMPB}.o"
-TMPCXX="${TMPDIR1}/${TMPB}.cxx"
 TMPE="${TMPDIR1}/${TMPB}.exe"
-TMPTXT="${TMPDIR1}/${TMPB}.txt"
 
 rm -f config.log
 
 # Print a helpful header at the top of config.log
 echo "# QEMU configure log $(date)" >> config.log
 printf "# Configured with:" >> config.log
-printf " '%s'" "$0" "$@" >> config.log
-echo >> config.log
-echo "#" >> config.log
+# repeat the invocation to log and stdout for CI
+invoke=$(printf " '%s'" "$0" "$@")
+test -n "$GITLAB_CI" && echo "configuring with: $invoke"
+{ echo "$invoke"; echo; echo "#"; } >> config.log
+
+quote_sh() {
+    printf "%s" "$1" | sed "s,','\\\\'',g; s,.*,'&',"
+}
 
-print_error() {
+error_exit() {
     (echo
     echo "ERROR: $1"
     while test -n "$2"; do
@@ -97,87 +98,37 @@ print_error() {
         shift
     done
     echo) >&2
-}
-
-error_exit() {
-    print_error "$@"
     exit 1
 }
 
 do_compiler() {
-    # Run the compiler, capturing its output to the log. First argument
-    # is compiler binary to execute.
-    local compiler="$1"
-    shift
-    if test -n "$BASH_VERSION"; then eval '
-        echo >>config.log "
+  # Run the compiler, capturing its output to the log. First argument
+  # is compiler binary to execute.
+  compiler="$1"
+  shift
+  if test -n "$BASH_VERSION"; then eval '
+      echo >>config.log "
 funcs: ${FUNCNAME[*]}
 lines: ${BASH_LINENO[*]}"
-    '; fi
-    echo $compiler "$@" >> config.log
-    $compiler "$@" >> config.log 2>&1 || return $?
-    # Test passed. If this is an --enable-werror build, rerun
-    # the test with -Werror and bail out if it fails. This
-    # makes warning-generating-errors in configure test code
-    # obvious to developers.
-    if test "$werror" != "yes"; then
-        return 0
-    fi
-    # Don't bother rerunning the compile if we were already using -Werror
-    case "$*" in
-        *-Werror*)
-           return 0
-        ;;
-    esac
-    echo $compiler -Werror "$@" >> config.log
-    $compiler -Werror "$@" >> config.log 2>&1 && return $?
-    error_exit "configure test passed without -Werror but failed with -Werror." \
-        "This is probably a bug in the configure script. The failing command" \
-        "will be at the bottom of config.log." \
-        "You can run configure with --disable-werror to bypass this check."
+  '; fi
+  echo $compiler "$@" >> config.log
+  $compiler "$@" >> config.log 2>&1 || return $?
 }
 
 do_cc() {
-    do_compiler "$cc" "$@"
-}
-
-do_cxx() {
-    do_compiler "$cxx" "$@"
-}
-
-# Append $2 to the variable named $1, with space separation
-add_to() {
-    eval $1=\${$1:+\"\$$1 \"}\$2
-}
-
-update_cxxflags() {
-    # Set QEMU_CXXFLAGS from QEMU_CFLAGS by filtering out those
-    # options which some versions of GCC's C++ compiler complain about
-    # because they only make sense for C programs.
-    QEMU_CXXFLAGS="$QEMU_CXXFLAGS -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS"
-    CONFIGURE_CXXFLAGS=$(echo "$CONFIGURE_CFLAGS" | sed s/-std=gnu99/-std=gnu++11/)
-    for arg in $QEMU_CFLAGS; do
-        case $arg in
-            -Wstrict-prototypes|-Wmissing-prototypes|-Wnested-externs|\
-            -Wold-style-declaration|-Wold-style-definition|-Wredundant-decls)
-                ;;
-            *)
-                QEMU_CXXFLAGS=${QEMU_CXXFLAGS:+$QEMU_CXXFLAGS }$arg
-                ;;
-        esac
-    done
+    do_compiler "$cc" $CPU_CFLAGS "$@"
 }
 
 compile_object() {
   local_cflags="$1"
-  do_cc $CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -c -o $TMPO $TMPC
+  do_cc $CFLAGS $EXTRA_CFLAGS $local_cflags -c -o $TMPO $TMPC
 }
 
 compile_prog() {
   local_cflags="$1"
   local_ldflags="$2"
-  do_cc $CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -o $TMPE $TMPC \
-      $LDFLAGS $CONFIGURE_LDFLAGS $QEMU_LDFLAGS $local_ldflags
+  do_cc $CFLAGS $EXTRA_CFLAGS $local_cflags -o $TMPE $TMPC \
+      $LDFLAGS $EXTRA_LDFLAGS $local_ldflags
 }
 
 # symbolically link $1 to $2.  Portable version of "ln -sf".
@@ -194,8 +145,8 @@ has() {
 }
 
 version_ge () {
-    local_ver1=`echo $1 | tr . ' '`
-    local_ver2=`echo $2 | tr . ' '`
+    local_ver1=$(expr "$1" : '\([0-9.]*\)' | tr . ' ')
+    local_ver2=$(echo "$2" | tr . ' ')
     while true; do
         set x $local_ver1
         local_first=${2-0}
@@ -212,75 +163,24 @@ version_ge () {
     done
 }
 
-have_backend () {
-    echo "$trace_backends" | grep "$1" >/dev/null
-}
-
-glob() {
-    eval test -z '"${1#'"$2"'}"'
-}
-
-ld_has() {
-    $ld --help 2>/dev/null | grep ".$1" >/dev/null 2>&1
-}
-
 if printf %s\\n "$source_path" "$PWD" | grep -q "[[:space:]:]";
 then
   error_exit "main directory cannot contain spaces nor colons"
 fi
 
+# parse CC options first; some compiler tests are used to establish
+# some defaults, based on the host environment
+
 # default parameters
+container_engine="auto"
 cpu=""
-iasl="iasl"
-interp_prefix="/usr/gnemul/qemu-%M"
-static="no"
+cross_compile="no"
 cross_prefix=""
-audio_drv_list=""
-block_drv_rw_whitelist=""
-block_drv_ro_whitelist=""
 host_cc="cc"
-audio_win_int=""
-libs_qga=""
-debug_info="yes"
-stack_protector=""
-safe_stack=""
-use_containers="yes"
-gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb")
-glib_has_gslice="no"
-
-if test -e "$source_path/.git"
-then
-    git_update=yes
-    git_submodules="ui/keycodemapdb"
-    git_submodules="$git_submodules tests/fp/berkeley-testfloat-3"
-    git_submodules="$git_submodules tests/fp/berkeley-softfloat-3"
-else
-    git_update=no
-    git_submodules=""
-
-    # if ! test -f "$source_path/ui/keycodemapdb/README"
-    # then
-    #     echo
-    #     echo "ERROR: missing file $source_path/ui/keycodemapdb/README"
-    #     echo
-    #     echo "This is not a GIT checkout but module content appears to"
-    #     echo "be missing. Do not use 'git archive' or GitHub download links"
-    #     echo "to acquire QEMU source archives. Non-GIT builds are only"
-    #     echo "supported with source archives linked from:"
-    #     echo
-    #     echo "  https://www.qemu.org/download/#source"
-    #     echo
-    #     echo "Developers working with GIT can use scripts/archive-source.sh"
-    #     echo "if they need to create valid source archives."
-    #     echo
-    #     exit 1
-    # fi
-fi
-git="git"
-
-# Don't accept a target_list environment variable.
-unset target_list
-unset target_list_exclude
+EXTRA_CFLAGS=""
+EXTRA_CXXFLAGS=""
+EXTRA_OBJCFLAGS=""
+EXTRA_LDFLAGS=""
 
 # Default value for a variable defining feature "foo".
 #  * foo="no"  feature will only be used if --enable-foo arg is given
@@ -293,214 +193,90 @@ unset target_list_exclude
 # Always add --enable-foo and --disable-foo command line args.
 # Distributions want to ensure that several features are compiled in, and it
 # is impossible without a --enable-foo that exits if a feature is not found.
+default_feature=""
 
-brlapi=""
-curl=""
-iconv="auto"
-curses="auto"
-docs="auto"
-fdt="auto"
-netmap="no"
-sdl="auto"
-sdl_image="auto"
-virtiofsd="auto"
-virtfs=""
-libudev="auto"
-mpath="auto"
-vnc="enabled"
-sparse="auto"
-vde=""
-vnc_sasl="auto"
-vnc_jpeg="auto"
-vnc_png="auto"
-xkbcommon="auto"
-xen=""
-xen_ctrl_version=""
-xen_pci_passthrough="auto"
-linux_aio=""
-linux_io_uring=""
-cap_ng=""
-attr=""
-libattr=""
-xfs=""
-tcg="enabled"
-membarrier=""
-vhost_net=""
-vhost_crypto=""
-vhost_scsi=""
-vhost_vsock=""
-vhost_user="no"
-vhost_user_blk_server="auto"
-vhost_user_fs=""
-kvm="auto"
-hax="auto"
-hvf="auto"
-whpx="auto"
-rdma=""
-pvrdma=""
-gprof="no"
-debug_tcg="no"
-debug="no"
-sanitizers="no"
-tsan="no"
-fortify_source=""
-strip_opt="yes"
-tcg_interpreter="no"
-bigendian="no"
-mingw32="no"
-gcov="no"
-EXESUF=""
-HOST_DSOSUF=".so"
-modules="no"
-module_upgrades="no"
-prefix="/usr/local"
-qemu_suffix="qemu"
-slirp="auto"
-oss_lib=""
-bsd="no"
-linux="no"
-solaris="no"
-profiler="no"
-cocoa="auto"
-softmmu="yes"
-linux_user="no"
-bsd_user="no"
-blobs="true"
-pkgversion=""
-pie=""
-qom_cast_debug="yes"
-trace_backends="log"
-trace_file="trace"
-spice=""
-rbd=""
-smartcard=""
-u2f="auto"
-libusb=""
-usb_redir=""
-opengl=""
-opengl_dmabuf="no"
-cpuid_h="no"
-avx2_opt=""
-capstone="auto"
-lzo=""
-snappy=""
-bzip2=""
-lzfse=""
-zstd=""
-guest_agent=""
-guest_agent_with_vss="no"
-guest_agent_ntddscsi="no"
-guest_agent_msi=""
-vss_win32_sdk=""
-win_sdk="no"
-want_tools=""
-libiscsi=""
-libnfs=""
-coroutine=""
-coroutine_pool=""
-debug_stack_usage="no"
-crypto_afalg="no"
-seccomp=""
-glusterfs=""
-glusterfs_xlator_opt="no"
-glusterfs_discard="no"
-glusterfs_fallocate="no"
-glusterfs_zerofill="no"
-glusterfs_ftruncate_has_stat="no"
-glusterfs_iocb_has_stat="no"
-gtk=""
-gtk_gl="no"
-tls_priority="NORMAL"
-gnutls=""
-nettle=""
-nettle_xts="no"
-gcrypt=""
-gcrypt_hmac="no"
-gcrypt_xts="no"
-qemu_private_xts="yes"
-auth_pam=""
-vte=""
-virglrenderer=""
-tpm=""
-libssh=""
-live_block_migration="yes"
-numa=""
-tcmalloc="no"
-jemalloc="no"
-replication="yes"
-bochs="yes"
-cloop="yes"
-dmg="yes"
-qcow1="yes"
-vdi="yes"
-vvfat="yes"
-qed="yes"
-parallels="yes"
-sheepdog="no"
-libxml2=""
-debug_mutex="no"
-libpmem=""
-default_devices="yes"
-plugins="no"
-fuzzing="no"
-rng_none="no"
-secret_keyring=""
-libdaxctl=""
-meson=""
-ninja=""
-skip_meson=no
-gettext=""
-
-bogus_os="no"
-malloc_trim="auto"
-
-# parse CC options first
 for opt do
   optarg=$(expr "x$opt" : 'x[^=]*=\(.*\)')
   case "$opt" in
   --cross-prefix=*) cross_prefix="$optarg"
+                    cross_compile="yes"
   ;;
   --cc=*) CC="$optarg"
   ;;
   --cxx=*) CXX="$optarg"
   ;;
-  --cpu=*) cpu="$optarg"
-  ;;
-  --extra-cflags=*) QEMU_CFLAGS="$QEMU_CFLAGS $optarg"
-                    QEMU_LDFLAGS="$QEMU_LDFLAGS $optarg"
+  --objcc=*) objcc="$optarg"
   ;;
-  --extra-cxxflags=*) QEMU_CXXFLAGS="$QEMU_CXXFLAGS $optarg"
+  --cpu=*) cpu="$optarg"
   ;;
-  --extra-ldflags=*) QEMU_LDFLAGS="$QEMU_LDFLAGS $optarg"
-                     EXTRA_LDFLAGS="$optarg"
+  --extra-cflags=*)
+    EXTRA_CFLAGS="$EXTRA_CFLAGS $optarg"
+    EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS $optarg"
+    EXTRA_OBJCFLAGS="$EXTRA_OBJCFLAGS $optarg"
+    ;;
+  --extra-cxxflags=*) EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS $optarg"
   ;;
-  --enable-debug-info) debug_info="yes"
+  --extra-objcflags=*) EXTRA_OBJCFLAGS="$EXTRA_OBJCFLAGS $optarg"
   ;;
-  --disable-debug-info) debug_info="no"
+  --extra-ldflags=*) EXTRA_LDFLAGS="$EXTRA_LDFLAGS $optarg"
   ;;
   --cross-cc-*[!a-zA-Z0-9_-]*=*) error_exit "Passed bad --cross-cc-FOO option"
   ;;
-  --cross-cc-cflags-*) cc_arch=${opt#--cross-cc-flags-}; cc_arch=${cc_arch%%=*}
+  --cross-cc-cflags-*) cc_arch=${opt#--cross-cc-cflags-}; cc_arch=${cc_arch%%=*}
                       eval "cross_cc_cflags_${cc_arch}=\$optarg"
-                      cross_cc_vars="$cross_cc_vars cross_cc_cflags_${cc_arch}"
   ;;
   --cross-cc-*) cc_arch=${opt#--cross-cc-}; cc_arch=${cc_arch%%=*}
-                cc_archs="$cc_archs $cc_arch"
                 eval "cross_cc_${cc_arch}=\$optarg"
-                cross_cc_vars="$cross_cc_vars cross_cc_${cc_arch}"
+  ;;
+  --cross-prefix-*[!a-zA-Z0-9_-]*=*) error_exit "Passed bad --cross-prefix-FOO option"
+  ;;
+  --cross-prefix-*) cc_arch=${opt#--cross-prefix-}; cc_arch=${cc_arch%%=*}
+                    eval "cross_prefix_${cc_arch}=\$optarg"
+  ;;
+  --without-default-features) default_feature="no"
   ;;
   esac
 done
-# OS specific
-# Using uname is really, really broken.  Once we have the right set of checks
-# we can eliminate its usage altogether.
+
+default_cflags='-O2 -g'
+git_submodules_action="update"
+docs="auto"
+EXESUF=""
+system="yes"
+linux_user=""
+bsd_user=""
+plugins="$default_feature"
+subdirs=""
+ninja=""
+python=
+download="enabled"
+skip_meson=no
+use_containers="yes"
+gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb")
+gdb_arches=""
+
+# Don't accept a target_list environment variable.
+unset target_list
+unset target_list_exclude
+
+# The following Meson options are handled manually (still they
+# are included in the automatically generated help message)
+# because they automatically enable/disable other options
+tcg="auto"
+cfi="false"
+
+# Meson has PIE as a boolean rather than enabled/disabled/auto,
+# and we also need to check for -static-pie before Meson runs
+# which requires knowing whether --static is enabled.
+pie=""
+static="no"
 
 # Preferred compiler:
 #  ${CC} (if set)
 #  ${cross_prefix}gcc (if cross-prefix specified)
 #  system compiler
 if test -z "${CC}${cross_prefix}"; then
-  cc="$host_cc"
+  cc="cc"
 else
   cc="${CC-${cross_prefix}gcc}"
 fi
@@ -511,41 +287,36 @@ else
   cxx="${CXX-${cross_prefix}g++}"
 fi
 
+# Preferred ObjC compiler:
+# $objcc (if set, i.e. via --objcc option)
+# ${cross_prefix}clang (if cross-prefix specified)
+# clang (if available)
+# $cc
+if test -z "${objcc}${cross_prefix}"; then
+  if has clang; then
+    objcc=clang
+  else
+    objcc="$cc"
+  fi
+else
+  objcc="${objcc-${cross_prefix}clang}"
+fi
+
 ar="${AR-${cross_prefix}ar}"
 as="${AS-${cross_prefix}as}"
 ccas="${CCAS-$cc}"
-cpp="${CPP-$cc -E}"
+dlltool="${DLLTOOL-${cross_prefix}dlltool}"
 objcopy="${OBJCOPY-${cross_prefix}objcopy}"
 ld="${LD-${cross_prefix}ld}"
 ranlib="${RANLIB-${cross_prefix}ranlib}"
 nm="${NM-${cross_prefix}nm}"
 strip="${STRIP-${cross_prefix}strip}"
+widl="${WIDL-${cross_prefix}widl}"
 windres="${WINDRES-${cross_prefix}windres}"
-pkg_config_exe="${PKG_CONFIG-${cross_prefix}pkg-config}"
-query_pkg_config() {
-    "${pkg_config_exe}" ${QEMU_PKG_CONFIG_FLAGS} "$@"
-}
-pkg_config=query_pkg_config
+windmc="${WINDMC-${cross_prefix}windmc}"
+pkg_config="${PKG_CONFIG-${cross_prefix}pkg-config}"
 sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}"
 
-# If the user hasn't specified ARFLAGS, default to 'rv', just as make does.
-ARFLAGS="${ARFLAGS-rv}"
-
-# default flags for all hosts
-# We use -fwrapv to tell the compiler that we require a C dialect where
-# left shift of signed integers is well defined and has the expected
-# 2s-complement style results. (Both clang and gcc agree that it
-# provides these semantics.)
-QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv $QEMU_CFLAGS"
-QEMU_CFLAGS="-Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS"
-QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS"
-QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE $QEMU_CFLAGS"
-
-# Flags that are needed during configure but later taken care of by Meson
-CONFIGURE_CFLAGS="-std=gnu99 -Wall"
-CONFIGURE_LDFLAGS=
-
-
 check_define() {
 cat > $TMPC <<EOF
 #if !defined($1)
@@ -556,79 +327,39 @@ EOF
   compile_object
 }
 
-check_include() {
-cat > $TMPC <<EOF
-#include <$1>
-int main(void) { return 0; }
-EOF
-  compile_object
-}
-
 write_c_skeleton() {
     cat > $TMPC <<EOF
 int main(void) { return 0; }
 EOF
 }
 
-write_c_fuzzer_skeleton() {
-    cat > $TMPC <<EOF
-#include <stdint.h>
-#include <sys/types.h>
-int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
-int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { return 0; }
-EOF
-}
-
 if check_define __linux__ ; then
-  targetos="Linux"
+  targetos=linux
 elif check_define _WIN32 ; then
-  targetos='MINGW32'
+  targetos=windows
 elif check_define __OpenBSD__ ; then
-  targetos='OpenBSD'
+  targetos=openbsd
 elif check_define __sun__ ; then
-  targetos='SunOS'
+  targetos=sunos
 elif check_define __HAIKU__ ; then
-  targetos='Haiku'
+  targetos=haiku
 elif check_define __FreeBSD__ ; then
-  targetos='FreeBSD'
+  targetos=freebsd
 elif check_define __FreeBSD_kernel__ && check_define __GLIBC__; then
-  targetos='GNU/kFreeBSD'
+  targetos=gnu/kfreebsd
 elif check_define __DragonFly__ ; then
-  targetos='DragonFly'
+  targetos=dragonfly
 elif check_define __NetBSD__; then
-  targetos='NetBSD'
+  targetos=netbsd
 elif check_define __APPLE__; then
-  targetos='Darwin'
+  targetos=darwin
 else
   # This is a fatal error, but don't report it yet, because we
   # might be going to just print the --help text, or it might
   # be the result of a missing compiler.
-  targetos='bogus'
-  bogus_os='yes'
+  targetos=bogus
 fi
 
-# Some host OSes need non-standard checks for which CPU to use.
-# Note that these checks are broken for cross-compilation: if you're
-# cross-compiling to one of these OSes then you'll need to specify
-# the correct CPU with the --cpu option.
-case $targetos in
-Darwin)
-  # on Leopard most of the system is 32-bit, so we have to ask the kernel if we can
-  # run 64-bit userspace code.
-  # If the user didn't specify a CPU explicitly and the kernel says this is
-  # 64 bit hw, then assume x86_64. Otherwise fall through to the usual detection code.
-  if test -z "$cpu" && test "$(sysctl -n hw.optional.x86_64)" = "1"; then
-    cpu="x86_64"
-  fi
-  HOST_DSOSUF=".dylib"
-  ;;
-SunOS)
-  # $(uname -m) returns i86pc even on an x86_64 box, so default based on isainfo
-  if test -z "$cpu" && test "$(isainfo -k)" = "amd64"; then
-    cpu="x86_64"
-  fi
-esac
-
 if test ! -z "$cpu" ; then
   # command line argument
   :
@@ -674,155 +405,162 @@ elif check_define __arm__ ; then
   cpu="arm"
 elif check_define __aarch64__ ; then
   cpu="aarch64"
+elif check_define __loongarch64 ; then
+  cpu="loongarch64"
 else
+  # Using uname is really broken, but it is just a fallback for architectures
+  # that are going to use TCI anyway
   cpu=$(uname -m)
+  echo "WARNING: unrecognized host CPU, proceeding with 'uname -m' output '$cpu'"
 fi
 
-ARCH=
-# Normalise host CPU name and set ARCH.
+# Normalise host CPU name to the values used by Meson cross files and in source
+# directories, and set multilib cflags.  The canonicalization isn't really
+# necessary, because the architectures that we check for should not hit the
+# 'uname -m' case, but better safe than sorry in case --cpu= is used.
+#
 # Note that this case should only have supported host CPUs, not guests.
+# Please keep it sorted and synchronized with meson.build's host_arch.
+host_arch=
+linux_arch=
 case "$cpu" in
-  ppc|ppc64|s390x|sparc64|x32|riscv32|riscv64)
-  ;;
-  ppc64le)
-    ARCH="ppc64"
-  ;;
-  i386|i486|i586|i686|i86pc|BePC)
-    cpu="i386"
-  ;;
-  x86_64|amd64)
-    cpu="x86_64"
-  ;;
-  armv*b|armv*l|arm)
-    cpu="arm"
-  ;;
   aarch64)
-    cpu="aarch64"
-  ;;
+    host_arch=aarch64
+    linux_arch=arm64
+    ;;
+
+  armv*b|armv*l|arm)
+    cpu=arm
+    host_arch=arm
+    linux_arch=arm
+    ;;
+
+  i386|i486|i586|i686)
+    cpu="i386"
+    host_arch=i386
+    linux_arch=x86
+    CPU_CFLAGS="-m32"
+    ;;
+
+  loongarch*)
+    cpu=loongarch64
+    host_arch=loongarch64
+    ;;
+
+  mips64*)
+    cpu=mips64
+    host_arch=mips
+    linux_arch=mips
+    ;;
   mips*)
-    cpu="mips"
-  ;;
-  sparc|sun4[cdmuv])
-    cpu="sparc"
-  ;;
-  *)
-    # This will result in either an error or falling back to TCI later
-    ARCH=unknown
-  ;;
-esac
-if test -z "$ARCH"; then
-  ARCH="$cpu"
-fi
+    cpu=mips
+    host_arch=mips
+    linux_arch=mips
+    ;;
 
-# OS specific
+  ppc)
+    host_arch=ppc
+    linux_arch=powerpc
+    CPU_CFLAGS="-m32"
+    ;;
+  ppc64)
+    host_arch=ppc64
+    linux_arch=powerpc
+    CPU_CFLAGS="-m64 -mbig-endian"
+    ;;
+  ppc64le)
+    cpu=ppc64
+    host_arch=ppc64
+    linux_arch=powerpc
+    CPU_CFLAGS="-m64 -mlittle-endian"
+    ;;
 
-case $targetos in
-MINGW32*)
-  mingw32="yes"
-  audio_possible_drivers="dsound sdl"
-  if check_include dsound.h; then
-    audio_drv_list="dsound"
-  else
-    audio_drv_list=""
-  fi
-  supported_os="yes"
-  pie="no"
-;;
-GNU/kFreeBSD)
-  bsd="yes"
-  audio_drv_list="oss try-sdl"
-  audio_possible_drivers="oss sdl pa"
-;;
-FreeBSD)
-  bsd="yes"
-  make="${MAKE-gmake}"
-  audio_drv_list="oss try-sdl"
-  audio_possible_drivers="oss sdl pa"
-  # needed for kinfo_getvmmap(3) in libutil.h
-  netmap=""  # enable netmap autodetect
-;;
-DragonFly)
-  bsd="yes"
-  make="${MAKE-gmake}"
-  audio_drv_list="oss try-sdl"
-  audio_possible_drivers="oss sdl pa"
-;;
-NetBSD)
-  bsd="yes"
-  make="${MAKE-gmake}"
-  audio_drv_list="oss try-sdl"
-  audio_possible_drivers="oss sdl"
-  oss_lib="-lossaudio"
-;;
-OpenBSD)
-  bsd="yes"
-  make="${MAKE-gmake}"
-  audio_drv_list="try-sdl"
-  audio_possible_drivers="sdl"
-;;
-Darwin)
-  bsd="yes"
-  darwin="yes"
-  if [ "$cpu" = "x86_64" ] ; then
-    QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS"
-    QEMU_LDFLAGS="-arch x86_64 $QEMU_LDFLAGS"
-  fi
-  cocoa="enabled"
-  audio_drv_list="coreaudio try-sdl"
-  audio_possible_drivers="coreaudio sdl"
-  QEMU_LDFLAGS="-framework CoreFoundation -framework IOKit $QEMU_LDFLAGS"
-  # Disable attempts to use ObjectiveC features in os/object.h since they
-  # won't work when we're compiling with gcc as a C compiler.
-  QEMU_CFLAGS="-DOS_OBJECT_USE_OBJC=0 $QEMU_CFLAGS"
-;;
-SunOS)
-  solaris="yes"
-  make="${MAKE-gmake}"
-  smbd="${SMBD-/usr/sfw/sbin/smbd}"
-  if test -f /usr/include/sys/soundcard.h ; then
-    audio_drv_list="oss try-sdl"
-  fi
-  audio_possible_drivers="oss sdl"
-# needed for CMSG_ macros in sys/socket.h
-  QEMU_CFLAGS="-D_XOPEN_SOURCE=600 $QEMU_CFLAGS"
-# needed for TIOCWIN* defines in termios.h
-  QEMU_CFLAGS="-D__EXTENSIONS__ $QEMU_CFLAGS"
-;;
-Haiku)
-  haiku="yes"
-  QEMU_CFLAGS="-DB_USE_POSITIVE_POSIX_ERRORS -D_BSD_SOURCE $QEMU_CFLAGS"
-;;
-Linux)
-  audio_drv_list="try-pa oss"
-  audio_possible_drivers="oss alsa sdl pa"
-  linux="yes"
-  linux_user="yes"
-  vhost_user="yes"
-;;
-esac
+  riscv32 | riscv64)
+    host_arch=riscv
+    linux_arch=riscv
+    ;;
 
-if [ "$bsd" = "yes" ] ; then
-  if [ "$darwin" != "yes" ] ; then
-    bsd_user="yes"
-  fi
-fi
+  s390)
+    linux_arch=s390
+    CPU_CFLAGS="-m31"
+    ;;
+  s390x)
+    host_arch=s390x
+    linux_arch=s390
+    CPU_CFLAGS="-m64"
+    ;;
 
-: ${make=${MAKE-make}}
+  sparc|sun4[cdmuv])
+    cpu=sparc
+    CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc"
+    ;;
+  sparc64)
+    host_arch=sparc64
+    CPU_CFLAGS="-m64 -mcpu=ultrasparc"
+    ;;
 
-# We prefer python 3.x. A bare 'python' is traditionally
-# python 2.x, but some distros have it as python 3.x, so
-# we check that too
-python=
-explicit_python=no
-for binary in "${PYTHON-python3}" python
-do
-    if has "$binary"
-    then
-        python=$(command -v "$binary")
-        break
-    fi
-done
+  x32)
+    cpu="x86_64"
+    host_arch=x86_64
+    linux_arch=x86
+    CPU_CFLAGS="-mx32"
+    ;;
+  x86_64|amd64)
+    cpu="x86_64"
+    host_arch=x86_64
+    linux_arch=x86
+    # ??? Only extremely old AMD cpus do not have cmpxchg16b.
+    # If we truly care, we should simply detect this case at
+    # runtime and generate the fallback to serial emulation.
+    CPU_CFLAGS="-m64 -mcx16"
+    ;;
+esac
 
+#if test -n "$host_arch" && {
+#    ! test -d "$source_path/linux-user/include/host/$host_arch" ||
+#    ! test -d "$source_path/common-user/host/$host_arch"; }; then
+#    error_exit "linux-user/include/host/$host_arch does not exist." \
+#       "This is a bug in the configure script, please report it."
+#fi
+#if test -n "$linux_arch" && ! test -d "$source_path/linux-headers/asm-$linux_arch"; then
+#    error_exit "linux-headers/asm-$linux_arch does not exist." \
+#       "This is a bug in the configure script, please report it."
+#fi
+
+check_py_version() {
+    # We require python >= 3.8.
+    # NB: a True python conditional creates a non-zero return code (Failure)
+    "$1" -c 'import sys; sys.exit(sys.version_info < (3,8))'
+}
+
+first_python=
+if test -z "${PYTHON}"; then
+    # A bare 'python' is traditionally python 2.x, but some distros
+    # have it as python 3.x, so check in both places.
+    for binary in python3 python python3.12 python3.11 \
+                          python3.10 python3.9 python3.8; do
+        if has "$binary"; then
+            python=$(command -v "$binary")
+            if check_py_version "$python"; then
+                # This one is good.
+                first_python=
+                break
+            else
+                first_python=$python
+            fi
+        fi
+    done
+else
+    # Same as above, but only check the environment variable.
+    has "${PYTHON}" || error_exit "The PYTHON environment variable does not point to an executable"
+    python=$(command -v "$PYTHON")
+    if check_py_version "$python"; then
+        # This one is good.
+        first_python=
+    else
+        first_python=$first_python
+    fi
+fi
 
 # Check for ancillary tools used in testing
 genisoimage=
@@ -835,38 +573,58 @@ do
     fi
 done
 
-: ${smbd=${SMBD-/usr/sbin/smbd}}
-
-# Default objcc to clang if available, otherwise use CC
-if has clang; then
-  objcc=clang
-else
-  objcc="$cc"
-fi
-
-if test "$mingw32" = "yes" ; then
+if test "$targetos" = "windows" ; then
   EXESUF=".exe"
-  HOST_DSOSUF=".dll"
-  # MinGW needs -mthreads for TLS and macro _MT.
-  CONFIGURE_CFLAGS="-mthreads $CONFIGURE_CFLAGS"
-  write_c_skeleton;
-  prefix="/qemu"
-  qemu_suffix=""
-  libs_qga="-lws2_32 -lwinmm -lpowrprof -lwtsapi32 -lwininet -liphlpapi -lnetapi32 $libs_qga"
 fi
 
-werror=""
+meson_option_build_array() {
+  printf '['
+  (if test "$targetos" = windows; then
+    IFS=\;
+  else
+    IFS=:
+  fi
+  for e in $1; do
+    printf '"""'
+    # backslash escape any '\' and '"' characters
+    printf "%s" "$e" | sed -e 's/\([\"]\)/\\\1/g'
+    printf '""",'
+  done)
+  printf ']\n'
+}
+
+. "$source_path/scripts/meson-buildoptions.sh"
+
+meson_options=
+meson_option_add() {
+  local arg
+  for arg; do
+    meson_options="$meson_options $(quote_sh "$arg")"
+  done
+}
+meson_option_parse() {
+  meson_options="$meson_options $(_meson_option_parse "$@")"
+  if test $? -eq 1; then
+    echo "ERROR: unknown option $1"
+    echo "Try '$0 --help' for more information"
+    exit 1
+  fi
+}
+
+meson_add_machine_file() {
+  if test "$cross_compile" = "yes"; then
+    meson_option_add --cross-file "$1"
+  else
+    meson_option_add --native-file "$1"
+  fi
+}
 
 for opt do
   optarg=$(expr "x$opt" : 'x[^=]*=\(.*\)')
   case "$opt" in
   --help|-h) show_help=yes
   ;;
-  --version|-V) exec cat $source_path/VERSION
-  ;;
-  --prefix=*) prefix="$optarg"
-  ;;
-  --interp-prefix=*) interp_prefix="$optarg"
+  --version|-V) exec cat "$source_path/VERSION"
   ;;
   --cross-prefix=*)
   ;;
@@ -876,47 +634,33 @@ for opt do
   ;;
   --cxx=*)
   ;;
-  --iasl=*) iasl="$optarg"
-  ;;
-  --objcc=*) objcc="$optarg"
+  --objcc=*)
   ;;
-  --make=*) make="$optarg"
+  --make=*)
   ;;
   --install=*)
   ;;
-  --python=*) python="$optarg" ; explicit_python=yes
-  ;;
-  --sphinx-build=*) sphinx_build="$optarg"
+  --python=*) python="$optarg"
   ;;
   --skip-meson) skip_meson=yes
   ;;
-  --meson=*) meson="$optarg"
-  ;;
   --ninja=*) ninja="$optarg"
   ;;
-  --smbd=*) smbd="$optarg"
-  ;;
   --extra-cflags=*)
   ;;
   --extra-cxxflags=*)
   ;;
-  --extra-ldflags=*)
-  ;;
-  --enable-debug-info)
+  --extra-objcflags=*)
   ;;
-  --disable-debug-info)
+  --extra-ldflags=*)
   ;;
   --cross-cc-*)
   ;;
-  --enable-modules)
-      modules="yes"
-  ;;
-  --disable-modules)
-      modules="no"
+  --cross-prefix-*)
   ;;
-  --disable-module-upgrades) module_upgrades="no"
+  --enable-docs) docs=enabled
   ;;
-  --enable-module-upgrades) module_upgrades="yes"
+  --disable-docs) docs=disabled
   ;;
   --cpu=*)
   ;;
@@ -930,48 +674,25 @@ for opt do
                        error_exit "Can't mix --target-list-exclude with --target-list"
                    fi
   ;;
-  --enable-trace-backends=*) trace_backends="$optarg"
+  --with-default-devices) meson_option_add -Ddefault_devices=true
   ;;
-  # XXX: backwards compatibility
-  --enable-trace-backend=*) trace_backends="$optarg"
+  --without-default-devices) meson_option_add -Ddefault_devices=false
   ;;
-  --with-trace-file=*) trace_file="$optarg"
+  --with-devices-*[!a-zA-Z0-9_-]*=*) error_exit "Passed bad --with-devices-FOO option"
   ;;
-  --with-default-devices) default_devices="yes"
+  --with-devices-*) device_arch=${opt#--with-devices-};
+                    device_arch=${device_arch%%=*}
+                    cf=$source_path/configs/devices/$device_arch-softmmu/$optarg.mak
+                    if test -f "$cf"; then
+                        device_archs="$device_archs $device_arch"
+                        eval "devices_${device_arch}=\$optarg"
+                    else
+                        error_exit "File $cf does not exist"
+                    fi
   ;;
-  --without-default-devices) default_devices="no"
+  --without-default-features) # processed above
   ;;
-  --enable-gprof) gprof="yes"
-  ;;
-  --enable-gcov) gcov="yes"
-  ;;
-  --static)
-    static="yes"
-    QEMU_PKG_CONFIG_FLAGS="--static $QEMU_PKG_CONFIG_FLAGS"
-  ;;
-  --mandir=*) mandir="$optarg"
-  ;;
-  --bindir=*) bindir="$optarg"
-  ;;
-  --libdir=*) libdir="$optarg"
-  ;;
-  --libexecdir=*) libexecdir="$optarg"
-  ;;
-  --includedir=*) includedir="$optarg"
-  ;;
-  --datadir=*) datadir="$optarg"
-  ;;
-  --with-suffix=*) qemu_suffix="$optarg"
-  ;;
-  --docdir=*) docdir="$optarg"
-  ;;
-  --localedir=*) localedir="$optarg"
-  ;;
-  --sysconfdir=*) sysconfdir="$optarg"
-  ;;
-  --localstatedir=*) local_statedir="$optarg"
-  ;;
-  --firmwarepath=*) firmwarepath="$optarg"
+  --static) static="yes"
   ;;
   --host=*|--build=*|\
   --disable-dependency-tracking|\
@@ -983,6001 +704,1097 @@ for opt do
     # configure to be used by RPM and similar macros that set
     # lots of directory switches by default.
   ;;
-  --disable-sdl) sdl="disabled"
-  ;;
-  --enable-sdl) sdl="enabled"
-  ;;
-  --disable-sdl-image) sdl_image="disabled"
-  ;;
-  --enable-sdl-image) sdl_image="enabled"
-  ;;
-  --disable-qom-cast-debug) qom_cast_debug="no"
-  ;;
-  --enable-qom-cast-debug) qom_cast_debug="yes"
-  ;;
-  --disable-virtfs) virtfs="no"
-  ;;
-  --enable-virtfs) virtfs="yes"
-  ;;
-  --disable-libudev) libudev="disabled"
-  ;;
-  --enable-libudev) libudev="enabled"
-  ;;
-  --disable-virtiofsd) virtiofsd="disabled"
-  ;;
-  --enable-virtiofsd) virtiofsd="enabled"
-  ;;
-  --disable-mpath) mpath="disabled"
-  ;;
-  --enable-mpath) mpath="enabled"
+  --enable-debug)
+      # Enable debugging options that aren't excessively noisy
+      meson_option_parse --enable-debug-tcg ""
+      meson_option_parse --enable-debug-graph-lock ""
+      meson_option_parse --enable-debug-mutex ""
+      meson_option_add -Doptimization=0
+      default_cflags='-O0 -g'
   ;;
-  --disable-vnc) vnc="disabled"
+  --disable-tcg) tcg="disabled"
   ;;
-  --enable-vnc) vnc="enabled"
+  --enable-tcg) tcg="enabled"
   ;;
-  --disable-gettext) gettext="false"
+  --disable-system) system="no"
   ;;
-  --enable-gettext) gettext="true"
+  --enable-system) system="yes"
   ;;
-  --oss-lib=*) oss_lib="$optarg"
+  --disable-user)
+      linux_user="no" ;
+      bsd_user="no" ;
   ;;
-  --audio-drv-list=*) audio_drv_list="$optarg"
+  --enable-user) ;;
+  --disable-linux-user) linux_user="no"
   ;;
-  --block-drv-rw-whitelist=*|--block-drv-whitelist=*) block_drv_rw_whitelist=$(echo "$optarg" | sed -e 's/,/ /g')
+  --enable-linux-user) linux_user="yes"
   ;;
-  --block-drv-ro-whitelist=*) block_drv_ro_whitelist=$(echo "$optarg" | sed -e 's/,/ /g')
+  --disable-bsd-user) bsd_user="no"
   ;;
-  --enable-debug-tcg) debug_tcg="yes"
+  --enable-bsd-user) bsd_user="yes"
   ;;
-  --disable-debug-tcg) debug_tcg="no"
+  --enable-pie) pie="yes"
   ;;
-  --enable-debug)
-      # Enable debugging options that aren't excessively noisy
-      debug_tcg="yes"
-      debug_mutex="yes"
-      debug="yes"
-      strip_opt="no"
-      fortify_source="no"
+  --disable-pie) pie="no"
   ;;
-  --enable-sanitizers) sanitizers="yes"
+  --enable-cfi) cfi=true
   ;;
-  --disable-sanitizers) sanitizers="no"
+  --disable-cfi) cfi=false
   ;;
-  --enable-tsan) tsan="yes"
+  --disable-download) download="disabled"; git_submodules_action=validate;
   ;;
-  --disable-tsan) tsan="no"
+  --enable-download) download="enabled"; git_submodules_action=update;
   ;;
-  --enable-sparse) sparse="enabled"
+  --enable-plugins) plugins="yes"
   ;;
-  --disable-sparse) sparse="disabled"
+  --disable-plugins) plugins="no"
   ;;
-  --disable-strip) strip_opt="no"
+  --enable-containers) use_containers="yes"
   ;;
-  --disable-vnc-sasl) vnc_sasl="disabled"
+  --disable-containers) use_containers="no"
   ;;
-  --enable-vnc-sasl) vnc_sasl="enabled"
+  --container-engine=*) container_engine="$optarg"
   ;;
-  --disable-vnc-jpeg) vnc_jpeg="disabled"
+  --gdb=*) gdb_bin="$optarg"
   ;;
-  --enable-vnc-jpeg) vnc_jpeg="enabled"
+  # everything else has the same name in configure and meson
+  --*) meson_option_parse "$opt" "$optarg"
   ;;
-  --disable-vnc-png) vnc_png="disabled"
+  # Pass through -Dxxxx options to meson
+  -D*) meson_options="$meson_options $opt"
   ;;
-  --enable-vnc-png) vnc_png="enabled"
-  ;;
-  --disable-slirp) slirp="disabled"
-  ;;
-  --enable-slirp=git) slirp="internal"
-  ;;
-  --enable-slirp=system) slirp="system"
-  ;;
-  --disable-vde) vde="no"
-  ;;
-  --enable-vde) vde="yes"
-  ;;
-  --disable-netmap) netmap="no"
-  ;;
-  --enable-netmap) netmap="yes"
-  ;;
-  --disable-xen) xen="disabled"
-  ;;
-  --enable-xen) xen="enabled"
-  ;;
-  --disable-xen-pci-passthrough) xen_pci_passthrough="disabled"
-  ;;
-  --enable-xen-pci-passthrough) xen_pci_passthrough="enabled"
-  ;;
-  --disable-brlapi) brlapi="no"
-  ;;
-  --enable-brlapi) brlapi="yes"
-  ;;
-  --disable-kvm) kvm="disabled"
-  ;;
-  --enable-kvm) kvm="enabled"
-  ;;
-  --disable-hax) hax="disabled"
-  ;;
-  --enable-hax) hax="enabled"
-  ;;
-  --disable-hvf) hvf="disabled"
-  ;;
-  --enable-hvf) hvf="enabled"
-  ;;
-  --disable-whpx) whpx="disabled"
-  ;;
-  --enable-whpx) whpx="enabled"
-  ;;
-  --disable-tcg-interpreter) tcg_interpreter="no"
-  ;;
-  --enable-tcg-interpreter) tcg_interpreter="yes"
-  ;;
-  --disable-cap-ng)  cap_ng="no"
-  ;;
-  --enable-cap-ng) cap_ng="yes"
-  ;;
-  --disable-tcg) tcg="disabled"
-  ;;
-  --enable-tcg) tcg="enabled"
-  ;;
-  --disable-malloc-trim) malloc_trim="disabled"
-  ;;
-  --enable-malloc-trim) malloc_trim="enabled"
-  ;;
-  --disable-spice) spice="no"
-  ;;
-  --enable-spice) spice="yes"
-  ;;
-  --disable-libiscsi) libiscsi="no"
-  ;;
-  --enable-libiscsi) libiscsi="yes"
-  ;;
-  --disable-libnfs) libnfs="no"
-  ;;
-  --enable-libnfs) libnfs="yes"
-  ;;
-  --enable-profiler) profiler="yes"
-  ;;
-  --disable-cocoa) cocoa="disabled"
-  ;;
-  --enable-cocoa)
-      cocoa="enabled" ;
-      audio_drv_list="coreaudio $(echo $audio_drv_list | sed s,coreaudio,,g)"
-  ;;
-  --disable-system) softmmu="no"
-  ;;
-  --enable-system) softmmu="yes"
-  ;;
-  --disable-user)
-      linux_user="no" ;
-      bsd_user="no" ;
-  ;;
-  --enable-user) ;;
-  --disable-linux-user) linux_user="no"
-  ;;
-  --enable-linux-user) linux_user="yes"
-  ;;
-  --disable-bsd-user) bsd_user="no"
-  ;;
-  --enable-bsd-user) bsd_user="yes"
-  ;;
-  --enable-pie) pie="yes"
-  ;;
-  --disable-pie) pie="no"
-  ;;
-  --enable-werror) werror="yes"
-  ;;
-  --disable-werror) werror="no"
-  ;;
-  --enable-stack-protector) stack_protector="yes"
-  ;;
-  --disable-stack-protector) stack_protector="no"
-  ;;
-  --enable-safe-stack) safe_stack="yes"
-  ;;
-  --disable-safe-stack) safe_stack="no"
-  ;;
-  --disable-curses) curses="disabled"
-  ;;
-  --enable-curses) curses="enabled"
-  ;;
-  --disable-iconv) iconv="disabled"
-  ;;
-  --enable-iconv) iconv="enabled"
-  ;;
-  --disable-curl) curl="no"
-  ;;
-  --enable-curl) curl="yes"
-  ;;
-  --disable-fdt) fdt="disabled"
-  ;;
-  --enable-fdt) fdt="enabled"
-  ;;
-  --enable-fdt=git) fdt="internal"
-  ;;
-  --enable-fdt=system) fdt="system"
-  ;;
-  --disable-linux-aio) linux_aio="no"
-  ;;
-  --enable-linux-aio) linux_aio="yes"
-  ;;
-  --disable-linux-io-uring) linux_io_uring="no"
-  ;;
-  --enable-linux-io-uring) linux_io_uring="yes"
-  ;;
-  --disable-attr) attr="no"
-  ;;
-  --enable-attr) attr="yes"
-  ;;
-  --disable-membarrier) membarrier="no"
-  ;;
-  --enable-membarrier) membarrier="yes"
-  ;;
-  --disable-blobs) blobs="false"
-  ;;
-  --with-pkgversion=*) pkgversion="$optarg"
-  ;;
-  --with-coroutine=*) coroutine="$optarg"
-  ;;
-  --disable-coroutine-pool) coroutine_pool="no"
-  ;;
-  --enable-coroutine-pool) coroutine_pool="yes"
-  ;;
-  --enable-debug-stack-usage) debug_stack_usage="yes"
-  ;;
-  --enable-crypto-afalg) crypto_afalg="yes"
-  ;;
-  --disable-crypto-afalg) crypto_afalg="no"
-  ;;
-  --disable-docs) docs="disabled"
-  ;;
-  --enable-docs) docs="enabled"
-  ;;
-  --disable-vhost-net) vhost_net="no"
-  ;;
-  --enable-vhost-net) vhost_net="yes"
-  ;;
-  --disable-vhost-crypto) vhost_crypto="no"
-  ;;
-  --enable-vhost-crypto) vhost_crypto="yes"
-  ;;
-  --disable-vhost-scsi) vhost_scsi="no"
-  ;;
-  --enable-vhost-scsi) vhost_scsi="yes"
-  ;;
-  --disable-vhost-vsock) vhost_vsock="no"
-  ;;
-  --enable-vhost-vsock) vhost_vsock="yes"
-  ;;
-  --disable-vhost-user-blk-server) vhost_user_blk_server="disabled"
-  ;;
-  --enable-vhost-user-blk-server) vhost_user_blk_server="enabled"
-  ;;
-  --disable-vhost-user-fs) vhost_user_fs="no"
-  ;;
-  --enable-vhost-user-fs) vhost_user_fs="yes"
-  ;;
-  --disable-opengl) opengl="no"
-  ;;
-  --enable-opengl) opengl="yes"
-  ;;
-  --disable-rbd) rbd="no"
-  ;;
-  --enable-rbd) rbd="yes"
-  ;;
-  --disable-xfsctl) xfs="no"
-  ;;
-  --enable-xfsctl) xfs="yes"
-  ;;
-  --disable-smartcard) smartcard="no"
-  ;;
-  --enable-smartcard) smartcard="yes"
-  ;;
-  --disable-u2f) u2f="disabled"
-  ;;
-  --enable-u2f) u2f="enabled"
-  ;;
-  --disable-libusb) libusb="no"
-  ;;
-  --enable-libusb) libusb="yes"
-  ;;
-  --disable-usb-redir) usb_redir="no"
-  ;;
-  --enable-usb-redir) usb_redir="yes"
-  ;;
-  --disable-zlib-test)
-  ;;
-  --disable-lzo) lzo="no"
-  ;;
-  --enable-lzo) lzo="yes"
-  ;;
-  --disable-snappy) snappy="no"
-  ;;
-  --enable-snappy) snappy="yes"
-  ;;
-  --disable-bzip2) bzip2="no"
-  ;;
-  --enable-bzip2) bzip2="yes"
-  ;;
-  --enable-lzfse) lzfse="yes"
-  ;;
-  --disable-lzfse) lzfse="no"
-  ;;
-  --disable-zstd) zstd="no"
-  ;;
-  --enable-zstd) zstd="yes"
-  ;;
-  --enable-guest-agent) guest_agent="yes"
-  ;;
-  --disable-guest-agent) guest_agent="no"
-  ;;
-  --enable-guest-agent-msi) guest_agent_msi="yes"
-  ;;
-  --disable-guest-agent-msi) guest_agent_msi="no"
-  ;;
-  --with-vss-sdk) vss_win32_sdk=""
-  ;;
-  --with-vss-sdk=*) vss_win32_sdk="$optarg"
-  ;;
-  --without-vss-sdk) vss_win32_sdk="no"
-  ;;
-  --with-win-sdk) win_sdk=""
-  ;;
-  --with-win-sdk=*) win_sdk="$optarg"
-  ;;
-  --without-win-sdk) win_sdk="no"
-  ;;
-  --enable-tools) want_tools="yes"
-  ;;
-  --disable-tools) want_tools="no"
-  ;;
-  --enable-seccomp) seccomp="yes"
-  ;;
-  --disable-seccomp) seccomp="no"
-  ;;
-  --disable-glusterfs) glusterfs="no"
-  ;;
-  --disable-avx2) avx2_opt="no"
-  ;;
-  --enable-avx2) avx2_opt="yes"
-  ;;
-  --disable-avx512f) avx512f_opt="no"
-  ;;
-  --enable-avx512f) avx512f_opt="yes"
-  ;;
-
-  --enable-glusterfs) glusterfs="yes"
-  ;;
-  --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane)
-      echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2
-  ;;
-  --enable-vhdx|--disable-vhdx)
-      echo "$0: $opt is obsolete, VHDX driver is always built" >&2
-  ;;
-  --enable-uuid|--disable-uuid)
-      echo "$0: $opt is obsolete, UUID support is always built" >&2
-  ;;
-  --disable-gtk) gtk="no"
-  ;;
-  --enable-gtk) gtk="yes"
-  ;;
-  --tls-priority=*) tls_priority="$optarg"
-  ;;
-  --disable-gnutls) gnutls="no"
-  ;;
-  --enable-gnutls) gnutls="yes"
-  ;;
-  --disable-nettle) nettle="no"
-  ;;
-  --enable-nettle) nettle="yes"
-  ;;
-  --disable-gcrypt) gcrypt="no"
-  ;;
-  --enable-gcrypt) gcrypt="yes"
-  ;;
-  --disable-auth-pam) auth_pam="no"
-  ;;
-  --enable-auth-pam) auth_pam="yes"
-  ;;
-  --enable-rdma) rdma="yes"
-  ;;
-  --disable-rdma) rdma="no"
-  ;;
-  --enable-pvrdma) pvrdma="yes"
-  ;;
-  --disable-pvrdma) pvrdma="no"
-  ;;
-  --disable-vte) vte="no"
-  ;;
-  --enable-vte) vte="yes"
-  ;;
-  --disable-virglrenderer) virglrenderer="no"
-  ;;
-  --enable-virglrenderer) virglrenderer="yes"
-  ;;
-  --disable-tpm) tpm="no"
-  ;;
-  --enable-tpm) tpm="yes"
-  ;;
-  --disable-libssh) libssh="no"
-  ;;
-  --enable-libssh) libssh="yes"
-  ;;
-  --disable-live-block-migration) live_block_migration="no"
-  ;;
-  --enable-live-block-migration) live_block_migration="yes"
-  ;;
-  --disable-numa) numa="no"
-  ;;
-  --enable-numa) numa="yes"
-  ;;
-  --disable-libxml2) libxml2="no"
-  ;;
-  --enable-libxml2) libxml2="yes"
-  ;;
-  --disable-tcmalloc) tcmalloc="no"
-  ;;
-  --enable-tcmalloc) tcmalloc="yes"
-  ;;
-  --disable-jemalloc) jemalloc="no"
-  ;;
-  --enable-jemalloc) jemalloc="yes"
-  ;;
-  --disable-replication) replication="no"
-  ;;
-  --enable-replication) replication="yes"
-  ;;
-  --disable-bochs) bochs="no"
-  ;;
-  --enable-bochs) bochs="yes"
-  ;;
-  --disable-cloop) cloop="no"
-  ;;
-  --enable-cloop) cloop="yes"
-  ;;
-  --disable-dmg) dmg="no"
-  ;;
-  --enable-dmg) dmg="yes"
-  ;;
-  --disable-qcow1) qcow1="no"
-  ;;
-  --enable-qcow1) qcow1="yes"
-  ;;
-  --disable-vdi) vdi="no"
-  ;;
-  --enable-vdi) vdi="yes"
-  ;;
-  --disable-vvfat) vvfat="no"
-  ;;
-  --enable-vvfat) vvfat="yes"
-  ;;
-  --disable-qed) qed="no"
-  ;;
-  --enable-qed) qed="yes"
-  ;;
-  --disable-parallels) parallels="no"
-  ;;
-  --enable-parallels) parallels="yes"
-  ;;
-  --disable-sheepdog) sheepdog="no"
-  ;;
-  --enable-sheepdog) sheepdog="yes"
-  ;;
-  --disable-vhost-user) vhost_user="no"
-  ;;
-  --enable-vhost-user) vhost_user="yes"
-  ;;
-  --disable-vhost-vdpa) vhost_vdpa="no"
-  ;;
-  --enable-vhost-vdpa) vhost_vdpa="yes"
-  ;;
-  --disable-vhost-kernel) vhost_kernel="no"
-  ;;
-  --enable-vhost-kernel) vhost_kernel="yes"
-  ;;
-  --disable-capstone) capstone="disabled"
-  ;;
-  --enable-capstone) capstone="enabled"
-  ;;
-  --enable-capstone=git) capstone="internal"
-  ;;
-  --enable-capstone=system) capstone="system"
-  ;;
-  --with-git=*) git="$optarg"
-  ;;
-  --enable-git-update) git_update=yes
-  ;;
-  --disable-git-update) git_update=no
-  ;;
-  --enable-debug-mutex) debug_mutex=yes
-  ;;
-  --disable-debug-mutex) debug_mutex=no
-  ;;
-  --enable-libpmem) libpmem=yes
-  ;;
-  --disable-libpmem) libpmem=no
-  ;;
-  --enable-xkbcommon) xkbcommon="enabled"
-  ;;
-  --disable-xkbcommon) xkbcommon="disabled"
-  ;;
-  --enable-plugins) plugins="yes"
-  ;;
-  --disable-plugins) plugins="no"
-  ;;
-  --enable-containers) use_containers="yes"
-  ;;
-  --disable-containers) use_containers="no"
-  ;;
-  --enable-fuzzing) fuzzing=yes
-  ;;
-  --disable-fuzzing) fuzzing=no
-  ;;
-  --gdb=*) gdb_bin="$optarg"
-  ;;
-  --enable-rng-none) rng_none=yes
-  ;;
-  --disable-rng-none) rng_none=no
-  ;;
-  --enable-keyring) secret_keyring="yes"
-  ;;
-  --disable-keyring) secret_keyring="no"
-  ;;
-  --enable-libdaxctl) libdaxctl=yes
-  ;;
-  --disable-libdaxctl) libdaxctl=no
-  ;;
-  *)
-      echo "ERROR: unknown option $opt"
-      echo "Try '$0 --help' for more information"
-      exit 1
-  ;;
-  esac
-done
-
-libdir="${libdir:-$prefix/lib}"
-libexecdir="${libexecdir:-$prefix/libexec}"
-includedir="${includedir:-$prefix/include}"
-
-if test "$mingw32" = "yes" ; then
-    mandir="$prefix"
-    datadir="$prefix"
-    docdir="$prefix"
-    bindir="$prefix"
-    sysconfdir="$prefix"
-    local_statedir="$prefix"
-else
-    mandir="${mandir:-$prefix/share/man}"
-    datadir="${datadir:-$prefix/share}"
-    docdir="${docdir:-$prefix/share/doc}"
-    bindir="${bindir:-$prefix/bin}"
-    sysconfdir="${sysconfdir:-$prefix/etc}"
-    local_statedir="${local_statedir:-$prefix/var}"
-fi
-firmwarepath="${firmwarepath:-$datadir/qemu-firmware}"
-localedir="${localedir:-$datadir/locale}"
-
-case "$cpu" in
-    ppc)
-           CPU_CFLAGS="-m32"
-           QEMU_LDFLAGS="-m32 $QEMU_LDFLAGS"
-           ;;
-    ppc64)
-           CPU_CFLAGS="-m64"
-           QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS"
-           ;;
-    sparc)
-           CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc"
-           QEMU_LDFLAGS="-m32 -mv8plus $QEMU_LDFLAGS"
-           ;;
-    sparc64)
-           CPU_CFLAGS="-m64 -mcpu=ultrasparc"
-           QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS"
-           ;;
-    s390)
-           CPU_CFLAGS="-m31"
-           QEMU_LDFLAGS="-m31 $QEMU_LDFLAGS"
-           ;;
-    s390x)
-           CPU_CFLAGS="-m64"
-           QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS"
-           ;;
-    i386)
-           CPU_CFLAGS="-m32"
-           QEMU_LDFLAGS="-m32 $QEMU_LDFLAGS"
-           ;;
-    x86_64)
-           # ??? Only extremely old AMD cpus do not have cmpxchg16b.
-           # If we truly care, we should simply detect this case at
-           # runtime and generate the fallback to serial emulation.
-           CPU_CFLAGS="-m64 -mcx16"
-           QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS"
-           ;;
-    x32)
-           CPU_CFLAGS="-mx32"
-           QEMU_LDFLAGS="-mx32 $QEMU_LDFLAGS"
-           ;;
-    # No special flags required for other host CPUs
-esac
-
-eval "cross_cc_${cpu}=\$host_cc"
-cross_cc_vars="$cross_cc_vars cross_cc_${cpu}"
-QEMU_CFLAGS="$CPU_CFLAGS $QEMU_CFLAGS"
-
-# For user-mode emulation the host arch has to be one we explicitly
-# support, even if we're using TCI.
-if [ "$ARCH" = "unknown" ]; then
-  bsd_user="no"
-  linux_user="no"
-fi
-
-default_target_list=""
-deprecated_targets_list=ppc64abi32-linux-user,tilegx-linux-user,lm32-softmmu,unicore32-softmmu
-deprecated_features=""
-mak_wilds=""
-
-if [ "$softmmu" = "yes" ]; then
-    mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-softmmu.mak"
-fi
-if [ "$linux_user" = "yes" ]; then
-    mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-linux-user.mak"
-fi
-if [ "$bsd_user" = "yes" ]; then
-    mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-bsd-user.mak"
-fi
-
-# If the user doesn't explicitly specify a deprecated target we will
-# skip it.
-if test -z "$target_list"; then
-    if test -z "$target_list_exclude"; then
-        target_list_exclude="$deprecated_targets_list"
-    else
-        target_list_exclude="$target_list_exclude,$deprecated_targets_list"
-    fi
-fi
-
-for config in $mak_wilds; do
-    target="$(basename "$config" .mak)"
-    if echo "$target_list_exclude" | grep -vq "$target"; then
-        default_target_list="${default_target_list} $target"
-    fi
-done
-
-# Enumerate public trace backends for --help output
-trace_backend_list=$(echo $(grep -le '^PUBLIC = True$' "$source_path"/scripts/tracetool/backend/*.py | sed -e 's/^.*\/\(.*\)\.py$/\1/'))
-
-if test x"$show_help" = x"yes" ; then
-cat << EOF
-
-Usage: configure [options]
-Options: [defaults in brackets after descriptions]
-
-Standard options:
-  --help                   print this message
-  --prefix=PREFIX          install in PREFIX [$prefix]
-  --interp-prefix=PREFIX   where to find shared libraries, etc.
-                           use %M for cpu name [$interp_prefix]
-  --target-list=LIST       set target list (default: build all non-deprecated)
-$(echo Available targets: $default_target_list | \
-  fold -s -w 53 | sed -e 's/^/                           /')
-$(echo Deprecated targets: $deprecated_targets_list | \
-  fold -s -w 53 | sed -e 's/^/                           /')
-  --target-list-exclude=LIST exclude a set of targets from the default target-list
-
-Advanced options (experts only):
-  --cross-prefix=PREFIX    use PREFIX for compile tools [$cross_prefix]
-  --cc=CC                  use C compiler CC [$cc]
-  --iasl=IASL              use ACPI compiler IASL [$iasl]
-  --host-cc=CC             use C compiler CC [$host_cc] for code run at
-                           build time
-  --cxx=CXX                use C++ compiler CXX [$cxx]
-  --objcc=OBJCC            use Objective-C compiler OBJCC [$objcc]
-  --extra-cflags=CFLAGS    append extra C compiler flags QEMU_CFLAGS
-  --extra-cxxflags=CXXFLAGS append extra C++ compiler flags QEMU_CXXFLAGS
-  --extra-ldflags=LDFLAGS  append extra linker flags LDFLAGS
-  --cross-cc-ARCH=CC       use compiler when building ARCH guest test cases
-  --cross-cc-flags-ARCH=   use compiler flags when building ARCH guest tests
-  --make=MAKE              use specified make [$make]
-  --python=PYTHON          use specified python [$python]
-  --sphinx-build=SPHINX    use specified sphinx-build [$sphinx_build]
-  --meson=MESON            use specified meson [$meson]
-  --ninja=NINJA            use specified ninja [$ninja]
-  --smbd=SMBD              use specified smbd [$smbd]
-  --with-git=GIT           use specified git [$git]
-  --static                 enable static build [$static]
-  --mandir=PATH            install man pages in PATH
-  --datadir=PATH           install firmware in PATH/$qemu_suffix
-  --localedir=PATH         install translation in PATH/$qemu_suffix
-  --docdir=PATH            install documentation in PATH/$qemu_suffix
-  --bindir=PATH            install binaries in PATH
-  --libdir=PATH            install libraries in PATH
-  --libexecdir=PATH        install helper binaries in PATH
-  --sysconfdir=PATH        install config in PATH/$qemu_suffix
-  --localstatedir=PATH     install local state in PATH (set at runtime on win32)
-  --firmwarepath=PATH      search PATH for firmware files
-  --efi-aarch64=PATH       PATH of efi file to use for aarch64 VMs.
-  --with-suffix=SUFFIX     suffix for QEMU data inside datadir/libdir/sysconfdir/docdir [$qemu_suffix]
-  --with-pkgversion=VERS   use specified string as sub-version of the package
-  --enable-debug           enable common debug build options
-  --enable-sanitizers      enable default sanitizers
-  --enable-tsan            enable thread sanitizer
-  --disable-strip          disable stripping binaries
-  --disable-werror         disable compilation abort on warning
-  --disable-stack-protector disable compiler-provided stack protection
-  --audio-drv-list=LIST    set audio drivers list:
-                           Available drivers: $audio_possible_drivers
-  --block-drv-whitelist=L  Same as --block-drv-rw-whitelist=L
-  --block-drv-rw-whitelist=L
-                           set block driver read-write whitelist
-                           (affects only QEMU, not qemu-img)
-  --block-drv-ro-whitelist=L
-                           set block driver read-only whitelist
-                           (affects only QEMU, not qemu-img)
-  --enable-trace-backends=B Set trace backend
-                           Available backends: $trace_backend_list
-  --with-trace-file=NAME   Full PATH,NAME of file to store traces
-                           Default:trace-<pid>
-  --disable-slirp          disable SLIRP userspace network connectivity
-  --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)
-  --enable-malloc-trim     enable libc malloc_trim() for memory optimization
-  --oss-lib                path to OSS library
-  --cpu=CPU                Build for host CPU [$cpu]
-  --with-coroutine=BACKEND coroutine backend. Supported options:
-                           ucontext, sigaltstack, windows
-  --enable-gcov            enable test coverage analysis with gcov
-  --disable-blobs          disable installing provided firmware blobs
-  --with-vss-sdk=SDK-path  enable Windows VSS support in QEMU Guest Agent
-  --with-win-sdk=SDK-path  path to Windows Platform SDK (to build VSS .tlb)
-  --tls-priority           default TLS protocol/cipher priority string
-  --enable-gprof           QEMU profiling with gprof
-  --enable-profiler        profiler support
-  --enable-debug-stack-usage
-                           track the maximum stack usage of stacks created by qemu_alloc_stack
-  --enable-plugins
-                           enable plugins via shared library loading
-  --disable-containers     don't use containers for cross-building
-  --gdb=GDB-path           gdb to use for gdbstub tests [$gdb_bin]
-
-Optional features, enabled with --enable-FEATURE and
-disabled with --disable-FEATURE, default is enabled if available:
-
-  system          all system emulation targets
-  user            supported user emulation targets
-  linux-user      all linux usermode emulation targets
-  bsd-user        all BSD usermode emulation targets
-  docs            build documentation
-  guest-agent     build the QEMU Guest Agent
-  guest-agent-msi build guest agent Windows MSI installation package
-  pie             Position Independent Executables
-  modules         modules support (non-Windows)
-  module-upgrades try to load modules from alternate paths for upgrades
-  debug-tcg       TCG debugging (default is disabled)
-  debug-info      debugging information
-  sparse          sparse checker
-  safe-stack      SafeStack Stack Smash Protection. Depends on
-                  clang/llvm >= 3.7 and requires coroutine backend ucontext.
-
-  gnutls          GNUTLS cryptography support
-  nettle          nettle cryptography support
-  gcrypt          libgcrypt cryptography support
-  auth-pam        PAM access control
-  sdl             SDL UI
-  sdl-image       SDL Image support for icons
-  gtk             gtk UI
-  vte             vte support for the gtk UI
-  curses          curses UI
-  iconv           font glyph conversion support
-  vnc             VNC UI support
-  vnc-sasl        SASL encryption for VNC server
-  vnc-jpeg        JPEG lossy compression for VNC server
-  vnc-png         PNG compression for VNC server
-  cocoa           Cocoa UI (Mac OS X only)
-  virtfs          VirtFS
-  virtiofsd       build virtiofs daemon (virtiofsd)
-  libudev         Use libudev to enumerate host devices
-  mpath           Multipath persistent reservation passthrough
-  xen             xen backend driver support
-  xen-pci-passthrough    PCI passthrough support for Xen
-  brlapi          BrlAPI (Braile)
-  curl            curl connectivity
-  membarrier      membarrier system call (for Linux 4.14+ or Windows)
-  fdt             fdt device tree
-  kvm             KVM acceleration support
-  hax             HAX acceleration support
-  hvf             Hypervisor.framework acceleration support
-  whpx            Windows Hypervisor Platform acceleration support
-  rdma            Enable RDMA-based migration
-  pvrdma          Enable PVRDMA support
-  vde             support for vde network
-  netmap          support for netmap network
-  linux-aio       Linux AIO support
-  linux-io-uring  Linux io_uring support
-  cap-ng          libcap-ng support
-  attr            attr and xattr support
-  vhost-net       vhost-net kernel acceleration support
-  vhost-vsock     virtio sockets device support
-  vhost-scsi      vhost-scsi kernel target support
-  vhost-crypto    vhost-user-crypto backend support
-  vhost-kernel    vhost kernel backend support
-  vhost-user      vhost-user backend support
-  vhost-user-blk-server    vhost-user-blk server support
-  vhost-vdpa      vhost-vdpa kernel backend support
-  spice           spice
-  rbd             rados block device (rbd)
-  libiscsi        iscsi support
-  libnfs          nfs support
-  smartcard       smartcard support (libcacard)
-  u2f             U2F support (u2f-emu)
-  libusb          libusb (for usb passthrough)
-  live-block-migration   Block migration in the main migration stream
-  usb-redir       usb network redirection support
-  lzo             support of lzo compression library
-  snappy          support of snappy compression library
-  bzip2           support of bzip2 compression library
-                  (for reading bzip2-compressed dmg images)
-  lzfse           support of lzfse compression library
-                  (for reading lzfse-compressed dmg images)
-  zstd            support for zstd compression library
-                  (for migration compression and qcow2 cluster compression)
-  seccomp         seccomp support
-  coroutine-pool  coroutine freelist (better performance)
-  glusterfs       GlusterFS backend
-  tpm             TPM support
-  libssh          ssh block device support
-  numa            libnuma support
-  libxml2         for Parallels image format
-  tcmalloc        tcmalloc support
-  jemalloc        jemalloc support
-  avx2            AVX2 optimization support
-  avx512f         AVX512F optimization support
-  replication     replication support
-  opengl          opengl support
-  virglrenderer   virgl rendering support
-  xfsctl          xfsctl support
-  qom-cast-debug  cast debugging support
-  tools           build qemu-io, qemu-nbd and qemu-img tools
-  bochs           bochs image format support
-  cloop           cloop image format support
-  dmg             dmg image format support
-  qcow1           qcow v1 image format support
-  vdi             vdi image format support
-  vvfat           vvfat image format support
-  qed             qed image format support
-  parallels       parallels image format support
-  sheepdog        sheepdog block driver support (deprecated)
-  crypto-afalg    Linux AF_ALG crypto backend driver
-  capstone        capstone disassembler support
-  debug-mutex     mutex debugging support
-  libpmem         libpmem support
-  xkbcommon       xkbcommon support
-  rng-none        dummy RNG, avoid using /dev/(u)random and getrandom()
-  libdaxctl       libdaxctl support
-
-NOTE: The object files are built at the place where configure is launched
-EOF
-exit 0
-fi
-
-# Remove old dependency files to make sure that they get properly regenerated
-rm -f */config-devices.mak.d
-
-if test -z "$python"
-then
-    error_exit "Python not found. Use --python=/path/to/python"
-fi
-
-# Note that if the Python conditional here evaluates True we will exit
-# with status 1 which is a shell 'false' value.
-if ! $python -c 'import sys; sys.exit(sys.version_info < (3,6))'; then
-  error_exit "Cannot use '$python', Python >= 3.6 is required." \
-      "Use --python=/path/to/python to specify a supported Python."
-fi
-
-# Preserve python version since some functionality is dependent on it
-python_version=$($python -c 'import sys; print("%d.%d.%d" % (sys.version_info[0], sys.version_info[1], sys.version_info[2]))' 2>/dev/null)
-
-# Suppress writing compiled files
-python="$python -B"
-
-if test -z "$meson"; then
-    if test "$explicit_python" = no && has meson && version_ge "$(meson --version)" 0.55.3; then
-        meson=meson
-    elif test -e "${source_path}/.git" && test $git_update = 'yes' ; then
-        meson=git
-    elif test -e "${source_path}/meson/meson.py" ; then
-        meson=internal
-    else
-        if test "$explicit_python" = yes; then
-            error_exit "--python requires using QEMU's embedded Meson distribution, but it was not found."
-        else
-            error_exit "Meson not found.  Use --meson=/path/to/meson"
-        fi
-    fi
-else
-    # Meson uses its own Python interpreter to invoke other Python scripts,
-    # but the user wants to use the one they specified with --python.
-    #
-    # We do not want to override the distro Python interpreter (and sometimes
-    # cannot: for example in Homebrew /usr/bin/meson is a bash script), so
-    # just require --meson=git|internal together with --python.
-    if test "$explicit_python" = yes; then
-        case "$meson" in
-            git | internal) ;;
-            *) error_exit "--python requires using QEMU's embedded Meson distribution." ;;
-        esac
-    fi
-fi
-
-if test "$meson" = git; then
-    git_submodules="${git_submodules} meson"
-fi
-
-case "$meson" in
-    git | internal)
-        meson="$python ${source_path}/meson/meson.py"
-        ;;
-    *) meson=$(command -v "$meson") ;;
-esac
-
-# Probe for ninja
-
-if test -z "$ninja"; then
-    for c in ninja ninja-build samu; do
-        if has $c; then
-            ninja=$(command -v "$c")
-            break
-        fi
-    done
-    if test -z "$ninja"; then
-      error_exit "Cannot find Ninja"
-    fi
-fi
-
-# Check that the C compiler works. Doing this here before testing
-# the host CPU ensures that we had a valid CC to autodetect the
-# $cpu var (and we should bail right here if that's not the case).
-# It also allows the help message to be printed without a CC.
-write_c_skeleton;
-if compile_object ; then
-  : C compiler works ok
-else
-    error_exit "\"$cc\" either does not exist or does not work"
-fi
-if ! compile_prog ; then
-    error_exit "\"$cc\" cannot build an executable (is your linker broken?)"
-fi
-
-# Consult white-list to determine whether to enable werror
-# by default.  Only enable by default for git builds
-if test -z "$werror" ; then
-    if test -e "$source_path/.git" && \
-        { test "$linux" = "yes" || test "$mingw32" = "yes"; }; then
-        werror="yes"
-    else
-        werror="no"
-    fi
-fi
-
-if test "$bogus_os" = "yes"; then
-    # Now that we know that we're not printing the help and that
-    # the compiler works (so the results of the check_defines we used
-    # to identify the OS are reliable), if we didn't recognize the
-    # host OS we should stop now.
-    error_exit "Unrecognized host OS (uname -s reports '$(uname -s)')"
-fi
-
-# Check whether the compiler matches our minimum requirements:
-cat > $TMPC << EOF
-#if defined(__clang_major__) && defined(__clang_minor__)
-# ifdef __apple_build_version__
-#  if __clang_major__ < 5 || (__clang_major__ == 5 && __clang_minor__ < 1)
-#   error You need at least XCode Clang v5.1 to compile QEMU
-#  endif
-# else
-#  if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 4)
-#   error You need at least Clang v3.4 to compile QEMU
-#  endif
-# endif
-#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
-# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
-#  error You need at least GCC v4.8 to compile QEMU
-# endif
-#else
-# error You either need GCC or Clang to compiler QEMU
-#endif
-int main (void) { return 0; }
-EOF
-if ! compile_prog "" "" ; then
-    error_exit "You need at least GCC v4.8 or Clang v3.4 (or XCode Clang v5.1)"
-fi
-
-# Accumulate -Wfoo and -Wno-bar separately.
-# We will list all of the enable flags first, and the disable flags second.
-# Note that we do not add -Werror, because that would enable it for all
-# configure tests. If a configure test failed due to -Werror this would
-# just silently disable some features, so it's too error prone.
-
-warn_flags=
-add_to warn_flags -Wold-style-declaration
-add_to warn_flags -Wold-style-definition
-add_to warn_flags -Wtype-limits
-add_to warn_flags -Wformat-security
-add_to warn_flags -Wformat-y2k
-add_to warn_flags -Winit-self
-add_to warn_flags -Wignored-qualifiers
-add_to warn_flags -Wempty-body
-add_to warn_flags -Wnested-externs
-add_to warn_flags -Wendif-labels
-add_to warn_flags -Wexpansion-to-defined
-
-nowarn_flags=
-add_to nowarn_flags -Wno-initializer-overrides
-add_to nowarn_flags -Wno-missing-include-dirs
-add_to nowarn_flags -Wno-shift-negative-value
-add_to nowarn_flags -Wno-string-plus-int
-add_to nowarn_flags -Wno-typedef-redefinition
-add_to nowarn_flags -Wno-tautological-type-limit-compare
-add_to nowarn_flags -Wno-psabi
-
-gcc_flags="$warn_flags $nowarn_flags"
-
-cc_has_warning_flag() {
-    write_c_skeleton;
-
-    # Use the positive sense of the flag when testing for -Wno-wombat
-    # support (gcc will happily accept the -Wno- form of unknown
-    # warning options).
-    optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')"
-    compile_prog "-Werror $optflag" ""
-}
-
-for flag in $gcc_flags; do
-    if cc_has_warning_flag $flag ; then
-        QEMU_CFLAGS="$QEMU_CFLAGS $flag"
-    fi
-done
-
-if test "$stack_protector" != "no"; then
-  cat > $TMPC << EOF
-int main(int argc, char *argv[])
-{
-    char arr[64], *p = arr, *c = argv[0];
-    while (*c) {
-        *p++ = *c++;
-    }
-    return 0;
-}
-EOF
-  gcc_flags="-fstack-protector-strong -fstack-protector-all"
-  sp_on=0
-  for flag in $gcc_flags; do
-    # We need to check both a compile and a link, since some compiler
-    # setups fail only on a .c->.o compile and some only at link time
-    if compile_object "-Werror $flag" &&
-       compile_prog "-Werror $flag" ""; then
-      QEMU_CFLAGS="$QEMU_CFLAGS $flag"
-      QEMU_LDFLAGS="$QEMU_LDFLAGS $flag"
-      sp_on=1
-      break
-    fi
-  done
-  if test "$stack_protector" = yes; then
-    if test $sp_on = 0; then
-      error_exit "Stack protector not supported"
-    fi
-  fi
-fi
-
-# Disable -Wmissing-braces on older compilers that warn even for
-# the "universal" C zero initializer {0}.
-cat > $TMPC << EOF
-struct {
-  int a[2];
-} x = {0};
-EOF
-if compile_object "-Werror" "" ; then
-  :
-else
-  QEMU_CFLAGS="$QEMU_CFLAGS -Wno-missing-braces"
-fi
-
-# Our module code doesn't support Windows
-if test "$modules" = "yes" && test "$mingw32" = "yes" ; then
-  error_exit "Modules are not available for Windows"
-fi
-
-# module_upgrades is only reasonable if modules are enabled
-if test "$modules" = "no" && test "$module_upgrades" = "yes" ; then
-  error_exit "Can't enable module-upgrades as Modules are not enabled"
-fi
-
-# Static linking is not possible with modules or PIE
-if test "$static" = "yes" ; then
-  if test "$modules" = "yes" ; then
-    error_exit "static and modules are mutually incompatible"
-  fi
-fi
-
-# Unconditional check for compiler __thread support
-  cat > $TMPC << EOF
-static __thread int tls_var;
-int main(void) { return tls_var; }
-EOF
-
-if ! compile_prog "-Werror" "" ; then
-    error_exit "Your compiler does not support the __thread specifier for " \
-       "Thread-Local Storage (TLS). Please upgrade to a version that does."
-fi
-
-cat > $TMPC << EOF
-
-#ifdef __linux__
-#  define THREAD __thread
-#else
-#  define THREAD
-#endif
-static THREAD int tls_var;
-int main(void) { return tls_var; }
-EOF
-
-# Check we support --no-pie first; we will need this for building ROMs.
-if compile_prog "-Werror -fno-pie" "-no-pie"; then
-  CFLAGS_NOPIE="-fno-pie"
-fi
-
-if test "$static" = "yes"; then
-  if test "$pie" != "no" && compile_prog "-Werror -fPIE -DPIE" "-static-pie"; then
-    CONFIGURE_CFLAGS="-fPIE -DPIE $CONFIGURE_CFLAGS"
-    QEMU_LDFLAGS="-static-pie $QEMU_LDFLAGS"
-    pie="yes"
-  elif test "$pie" = "yes"; then
-    error_exit "-static-pie not available due to missing toolchain support"
-  else
-    QEMU_LDFLAGS="-static $QEMU_LDFLAGS"
-    pie="no"
-  fi
-elif test "$pie" = "no"; then
-  CONFIGURE_CFLAGS="$CFLAGS_NOPIE $CONFIGURE_CFLAGS"
-elif compile_prog "-Werror -fPIE -DPIE" "-pie"; then
-  CONFIGURE_CFLAGS="-fPIE -DPIE $CONFIGURE_CFLAGS"
-  CONFIGURE_LDFLAGS="-pie $CONFIGURE_LDFLAGS"
-  pie="yes"
-elif test "$pie" = "yes"; then
-  error_exit "PIE not available due to missing toolchain support"
-else
-  echo "Disabling PIE due to missing toolchain support"
-  pie="no"
-fi
-
-# Detect support for PT_GNU_RELRO + DT_BIND_NOW.
-# The combination is known as "full relro", because .got.plt is read-only too.
-if compile_prog "" "-Wl,-z,relro -Wl,-z,now" ; then
-  QEMU_LDFLAGS="-Wl,-z,relro -Wl,-z,now $QEMU_LDFLAGS"
-fi
-
-##########################################
-# __sync_fetch_and_and requires at least -march=i486. Many toolchains
-# use i686 as default anyway, but for those that don't, an explicit
-# specification is necessary
-
-if test "$cpu" = "i386"; then
-  cat > $TMPC << EOF
-static int sfaa(int *ptr)
-{
-  return __sync_fetch_and_and(ptr, 0);
-}
-
-int main(void)
-{
-  int val = 42;
-  val = __sync_val_compare_and_swap(&val, 0, 1);
-  sfaa(&val);
-  return val;
-}
-EOF
-  if ! compile_prog "" "" ; then
-    QEMU_CFLAGS="-march=i486 $QEMU_CFLAGS"
-  fi
-fi
-
-#########################################
-# Solaris specific configure tool chain decisions
-
-if test "$solaris" = "yes" ; then
-  if has ar; then
-    :
-  else
-    if test -f /usr/ccs/bin/ar ; then
-      error_exit "No path includes ar" \
-          "Add /usr/ccs/bin to your path and rerun configure"
-    fi
-    error_exit "No path includes ar"
-  fi
-fi
-
-if test -z "${target_list+xxx}" ; then
-    default_targets=yes
-    for target in $default_target_list; do
-        target_list="$target_list $target"
-    done
-    target_list="${target_list# }"
-else
-    default_targets=no
-    target_list=$(echo "$target_list" | sed -e 's/,/ /g')
-    for target in $target_list; do
-        # Check that we recognised the target name; this allows a more
-        # friendly error message than if we let it fall through.
-        case " $default_target_list " in
-            *" $target "*)
-                ;;
-            *)
-                error_exit "Unknown target name '$target'"
-                ;;
-        esac
-    done
-fi
-
-for target in $target_list; do
-    # if a deprecated target is enabled we note it here
-    if echo "$deprecated_targets_list" | grep -q "$target"; then
-        add_to deprecated_features $target
-    fi
-done
-
-# see if system emulation was really requested
-case " $target_list " in
-  *"-softmmu "*) softmmu=yes
-  ;;
-  *) softmmu=no
-  ;;
-esac
-
-feature_not_found() {
-  feature=$1
-  remedy=$2
-
-  error_exit "User requested feature $feature" \
-      "configure was not able to find it." \
-      "$remedy"
-}
-
-# ---
-# big/little endian test
-cat > $TMPC << EOF
-short big_endian[] = { 0x4269, 0x4765, 0x4e64, 0x4961, 0x4e00, 0, };
-short little_endian[] = { 0x694c, 0x7454, 0x654c, 0x6e45, 0x6944, 0x6e41, 0, };
-extern int foo(short *, short *);
-int main(int argc, char *argv[]) {
-    return foo(big_endian, little_endian);
-}
-EOF
-
-if compile_object ; then
-    if strings -a $TMPO | grep -q BiGeNdIaN ; then
-        bigendian="yes"
-    elif strings -a $TMPO | grep -q LiTtLeEnDiAn ; then
-        bigendian="no"
-    else
-        echo big/little test failed
-    fi
-else
-    echo big/little test failed
-fi
-
-##########################################
-# system tools
-if test -z "$want_tools"; then
-    if test "$softmmu" = "no"; then
-        want_tools=no
-    else
-        want_tools=yes
-    fi
-fi
-
-##########################################
-# cocoa implies not SDL or GTK
-# (the cocoa UI code currently assumes it is always the active UI
-# and doesn't interact well with other UI frontend code)
-if test "$cocoa" = "enabled"; then
-    if test "$sdl" = "enabled"; then
-        error_exit "Cocoa and SDL UIs cannot both be enabled at once"
-    fi
-    if test "$gtk" = "yes"; then
-        error_exit "Cocoa and GTK UIs cannot both be enabled at once"
-    fi
-    gtk=no
-    sdl=disabled
-fi
-
-# Some versions of Mac OS X incorrectly define SIZE_MAX
-cat > $TMPC << EOF
-#include <stdint.h>
-#include <stdio.h>
-int main(int argc, char *argv[]) {
-    return printf("%zu", SIZE_MAX);
-}
-EOF
-have_broken_size_max=no
-if ! compile_object -Werror ; then
-    have_broken_size_max=yes
-fi
-
-##########################################
-# L2TPV3 probe
-
-cat > $TMPC <<EOF
-#include <sys/socket.h>
-#include <linux/ip.h>
-int main(void) { return sizeof(struct mmsghdr); }
-EOF
-if compile_prog "" "" ; then
-  l2tpv3=yes
-else
-  l2tpv3=no
-fi
-
-if check_include "pty.h" ; then
-  pty_h=yes
-else
-  pty_h=no
-fi
-
-cat > $TMPC <<EOF
-#include <sys/mman.h>
-int main(int argc, char *argv[]) {
-    return mlockall(MCL_FUTURE);
-}
-EOF
-if compile_prog "" "" ; then
-  have_mlockall=yes
-else
-  have_mlockall=no
-fi
-
-#########################################
-# vhost interdependencies and host support
-
-# vhost backends
-if test "$vhost_user" = "yes" && test "$linux" != "yes"; then
-  error_exit "vhost-user is only available on Linux"
-fi
-test "$vhost_vdpa" = "" && vhost_vdpa=$linux
-if test "$vhost_vdpa" = "yes" && test "$linux" != "yes"; then
-  error_exit "vhost-vdpa is only available on Linux"
-fi
-test "$vhost_kernel" = "" && vhost_kernel=$linux
-if test "$vhost_kernel" = "yes" && test "$linux" != "yes"; then
-  error_exit "vhost-kernel is only available on Linux"
-fi
-
-# vhost-kernel devices
-test "$vhost_scsi" = "" && vhost_scsi=$vhost_kernel
-if test "$vhost_scsi" = "yes" && test "$vhost_kernel" != "yes"; then
-  error_exit "--enable-vhost-scsi requires --enable-vhost-kernel"
-fi
-test "$vhost_vsock" = "" && vhost_vsock=$vhost_kernel
-if test "$vhost_vsock" = "yes" && test "$vhost_kernel" != "yes"; then
-  error_exit "--enable-vhost-vsock requires --enable-vhost-kernel"
-fi
-
-# vhost-user backends
-test "$vhost_net_user" = "" && vhost_net_user=$vhost_user
-if test "$vhost_net_user" = "yes" && test "$vhost_user" = "no"; then
-  error_exit "--enable-vhost-net-user requires --enable-vhost-user"
-fi
-test "$vhost_crypto" = "" && vhost_crypto=$vhost_user
-if test "$vhost_crypto" = "yes" && test "$vhost_user" = "no"; then
-  error_exit "--enable-vhost-crypto requires --enable-vhost-user"
-fi
-test "$vhost_user_fs" = "" && vhost_user_fs=$vhost_user
-if test "$vhost_user_fs" = "yes" && test "$vhost_user" = "no"; then
-  error_exit "--enable-vhost-user-fs requires --enable-vhost-user"
-fi
-#vhost-vdpa backends
-test "$vhost_net_vdpa" = "" && vhost_net_vdpa=$vhost_vdpa
-if test "$vhost_net_vdpa" = "yes" && test "$vhost_vdpa" = "no"; then
-  error_exit "--enable-vhost-net-vdpa requires --enable-vhost-vdpa"
-fi
-
-# OR the vhost-kernel, vhost-vdpa and vhost-user values for simplicity
-if test "$vhost_net" = ""; then
-  test "$vhost_net_user" = "yes" && vhost_net=yes
-  test "$vhost_net_vdpa" = "yes" && vhost_net=yes
-  test "$vhost_kernel" = "yes" && vhost_net=yes
-fi
-
-##########################################
-# pkg-config probe
-
-if ! has "$pkg_config_exe"; then
-  error_exit "pkg-config binary '$pkg_config_exe' not found"
-fi
-
-##########################################
-# NPTL probe
-
-if test "$linux_user" = "yes"; then
-  cat > $TMPC <<EOF
-#include <sched.h>
-#include <linux/futex.h>
-int main(void) {
-#if !defined(CLONE_SETTLS) || !defined(FUTEX_WAIT)
-#error bork
-#endif
-  return 0;
-}
-EOF
-  if ! compile_object ; then
-    feature_not_found "nptl" "Install glibc and linux kernel headers."
-  fi
-fi
-
-##########################################
-# lzo check
-
-if test "$lzo" != "no" ; then
-    cat > $TMPC << EOF
-#include <lzo/lzo1x.h>
-int main(void) { lzo_version(); return 0; }
-EOF
-    if compile_prog "" "-llzo2" ; then
-        lzo_libs="-llzo2"
-        lzo="yes"
-    else
-        if test "$lzo" = "yes"; then
-            feature_not_found "liblzo2" "Install liblzo2 devel"
-        fi
-        lzo="no"
-    fi
-fi
-
-##########################################
-# snappy check
-
-if test "$snappy" != "no" ; then
-    cat > $TMPC << EOF
-#include <snappy-c.h>
-int main(void) { snappy_max_compressed_length(4096); return 0; }
-EOF
-    if compile_prog "" "-lsnappy" ; then
-        snappy_libs='-lsnappy'
-        snappy="yes"
-    else
-        if test "$snappy" = "yes"; then
-            feature_not_found "libsnappy" "Install libsnappy devel"
-        fi
-        snappy="no"
-    fi
-fi
-
-##########################################
-# bzip2 check
-
-if test "$bzip2" != "no" ; then
-    cat > $TMPC << EOF
-#include <bzlib.h>
-int main(void) { BZ2_bzlibVersion(); return 0; }
-EOF
-    if compile_prog "" "-lbz2" ; then
-        bzip2="yes"
-    else
-        if test "$bzip2" = "yes"; then
-            feature_not_found "libbzip2" "Install libbzip2 devel"
-        fi
-        bzip2="no"
-    fi
-fi
-
-##########################################
-# lzfse check
-
-if test "$lzfse" != "no" ; then
-    cat > $TMPC << EOF
-#include <lzfse.h>
-int main(void) { lzfse_decode_scratch_size(); return 0; }
-EOF
-    if compile_prog "" "-llzfse" ; then
-        lzfse="yes"
-    else
-        if test "$lzfse" = "yes"; then
-            feature_not_found "lzfse" "Install lzfse devel"
-        fi
-        lzfse="no"
-    fi
-fi
-
-##########################################
-# zstd check
-
-if test "$zstd" != "no" ; then
-    libzstd_minver="1.4.0"
-    if $pkg_config --atleast-version=$libzstd_minver libzstd ; then
-        zstd_cflags="$($pkg_config --cflags libzstd)"
-        zstd_libs="$($pkg_config --libs libzstd)"
-        zstd="yes"
-    else
-        if test "$zstd" = "yes" ; then
-            feature_not_found "libzstd" "Install libzstd devel"
-        fi
-        zstd="no"
-    fi
-fi
-
-##########################################
-# libseccomp check
-
-if test "$seccomp" != "no" ; then
-    libseccomp_minver="2.3.0"
-    if $pkg_config --atleast-version=$libseccomp_minver libseccomp ; then
-        seccomp_cflags="$($pkg_config --cflags libseccomp)"
-        seccomp_libs="$($pkg_config --libs libseccomp)"
-        seccomp="yes"
-    else
-        if test "$seccomp" = "yes" ; then
-            feature_not_found "libseccomp" \
-                 "Install libseccomp devel >= $libseccomp_minver"
-        fi
-        seccomp="no"
-    fi
-fi
-
-##########################################
-# xen probe
-
-if test "$xen" != "disabled" ; then
-  # Check whether Xen library path is specified via --extra-ldflags to avoid
-  # overriding this setting with pkg-config output. If not, try pkg-config
-  # to obtain all needed flags.
-
-  if ! echo $EXTRA_LDFLAGS | grep tools/libxc > /dev/null && \
-     $pkg_config --exists xencontrol ; then
-    xen_ctrl_version="$(printf '%d%02d%02d' \
-      $($pkg_config --modversion xencontrol | sed 's/\./ /g') )"
-    xen=enabled
-    xen_pc="xencontrol xenstore xenforeignmemory xengnttab"
-    xen_pc="$xen_pc xenevtchn xendevicemodel"
-    if $pkg_config --exists xentoolcore; then
-      xen_pc="$xen_pc xentoolcore"
-    fi
-    xen_cflags="$($pkg_config --cflags $xen_pc)"
-    xen_libs="$($pkg_config --libs $xen_pc)"
-  else
-
-    xen_libs="-lxenstore -lxenctrl"
-    xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn"
-
-    # First we test whether Xen headers and libraries are available.
-    # If no, we are done and there is no Xen support.
-    # If yes, more tests are run to detect the Xen version.
-
-    # Xen (any)
-    cat > $TMPC <<EOF
-#include <xenctrl.h>
-int main(void) {
-  return 0;
-}
-EOF
-    if ! compile_prog "" "$xen_libs" ; then
-      # Xen not found
-      if test "$xen" = "enabled" ; then
-        feature_not_found "xen" "Install xen devel"
-      fi
-      xen=disabled
-
-    # Xen unstable
-    elif
-        cat > $TMPC <<EOF &&
-#undef XC_WANT_COMPAT_DEVICEMODEL_API
-#define __XEN_TOOLS__
-#include <xendevicemodel.h>
-#include <xenforeignmemory.h>
-int main(void) {
-  xendevicemodel_handle *xd;
-  xenforeignmemory_handle *xfmem;
-
-  xd = xendevicemodel_open(0, 0);
-  xendevicemodel_pin_memory_cacheattr(xd, 0, 0, 0, 0);
-
-  xfmem = xenforeignmemory_open(0, 0);
-  xenforeignmemory_map_resource(xfmem, 0, 0, 0, 0, 0, NULL, 0, 0);
-
-  return 0;
-}
-EOF
-        compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs -lxentoolcore"
-      then
-      xen_stable_libs="-lxendevicemodel $xen_stable_libs -lxentoolcore"
-      xen_ctrl_version=41100
-      xen=enabled
-    elif
-        cat > $TMPC <<EOF &&
-#undef XC_WANT_COMPAT_MAP_FOREIGN_API
-#include <xenforeignmemory.h>
-#include <xentoolcore.h>
-int main(void) {
-  xenforeignmemory_handle *xfmem;
-
-  xfmem = xenforeignmemory_open(0, 0);
-  xenforeignmemory_map2(xfmem, 0, 0, 0, 0, 0, 0, 0);
-  xentoolcore_restrict_all(0);
-
-  return 0;
-}
-EOF
-        compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs -lxentoolcore"
-      then
-      xen_stable_libs="-lxendevicemodel $xen_stable_libs -lxentoolcore"
-      xen_ctrl_version=41000
-      xen=enabled
-    elif
-        cat > $TMPC <<EOF &&
-#undef XC_WANT_COMPAT_DEVICEMODEL_API
-#define __XEN_TOOLS__
-#include <xendevicemodel.h>
-int main(void) {
-  xendevicemodel_handle *xd;
-
-  xd = xendevicemodel_open(0, 0);
-  xendevicemodel_close(xd);
-
-  return 0;
-}
-EOF
-        compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs"
-      then
-      xen_stable_libs="-lxendevicemodel $xen_stable_libs"
-      xen_ctrl_version=40900
-      xen=enabled
-    elif
-        cat > $TMPC <<EOF &&
-/*
- * If we have stable libs the we don't want the libxc compat
- * layers, regardless of what CFLAGS we may have been given.
- *
- * Also, check if xengnttab_grant_copy_segment_t is defined and
- * grant copy operation is implemented.
- */
-#undef XC_WANT_COMPAT_EVTCHN_API
-#undef XC_WANT_COMPAT_GNTTAB_API
-#undef XC_WANT_COMPAT_MAP_FOREIGN_API
-#include <xenctrl.h>
-#include <xenstore.h>
-#include <xenevtchn.h>
-#include <xengnttab.h>
-#include <xenforeignmemory.h>
-#include <stdint.h>
-#include <xen/hvm/hvm_info_table.h>
-#if !defined(HVM_MAX_VCPUS)
-# error HVM_MAX_VCPUS not defined
-#endif
-int main(void) {
-  xc_interface *xc = NULL;
-  xenforeignmemory_handle *xfmem;
-  xenevtchn_handle *xe;
-  xengnttab_handle *xg;
-  xengnttab_grant_copy_segment_t* seg = NULL;
-
-  xs_daemon_open();
-
-  xc = xc_interface_open(0, 0, 0);
-  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
-  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
-  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
-  xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
-
-  xfmem = xenforeignmemory_open(0, 0);
-  xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
-
-  xe = xenevtchn_open(0, 0);
-  xenevtchn_fd(xe);
-
-  xg = xengnttab_open(0, 0);
-  xengnttab_grant_copy(xg, 0, seg);
-
-  return 0;
-}
-EOF
-        compile_prog "" "$xen_libs $xen_stable_libs"
-      then
-      xen_ctrl_version=40800
-      xen=enabled
-    elif
-        cat > $TMPC <<EOF &&
-/*
- * If we have stable libs the we don't want the libxc compat
- * layers, regardless of what CFLAGS we may have been given.
- */
-#undef XC_WANT_COMPAT_EVTCHN_API
-#undef XC_WANT_COMPAT_GNTTAB_API
-#undef XC_WANT_COMPAT_MAP_FOREIGN_API
-#include <xenctrl.h>
-#include <xenstore.h>
-#include <xenevtchn.h>
-#include <xengnttab.h>
-#include <xenforeignmemory.h>
-#include <stdint.h>
-#include <xen/hvm/hvm_info_table.h>
-#if !defined(HVM_MAX_VCPUS)
-# error HVM_MAX_VCPUS not defined
-#endif
-int main(void) {
-  xc_interface *xc = NULL;
-  xenforeignmemory_handle *xfmem;
-  xenevtchn_handle *xe;
-  xengnttab_handle *xg;
-
-  xs_daemon_open();
-
-  xc = xc_interface_open(0, 0, 0);
-  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
-  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
-  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
-  xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
-
-  xfmem = xenforeignmemory_open(0, 0);
-  xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
-
-  xe = xenevtchn_open(0, 0);
-  xenevtchn_fd(xe);
-
-  xg = xengnttab_open(0, 0);
-  xengnttab_map_grant_ref(xg, 0, 0, 0);
-
-  return 0;
-}
-EOF
-        compile_prog "" "$xen_libs $xen_stable_libs"
-      then
-      xen_ctrl_version=40701
-      xen=enabled
-
-    # Xen 4.6
-    elif
-        cat > $TMPC <<EOF &&
-#include <xenctrl.h>
-#include <xenstore.h>
-#include <stdint.h>
-#include <xen/hvm/hvm_info_table.h>
-#if !defined(HVM_MAX_VCPUS)
-# error HVM_MAX_VCPUS not defined
-#endif
-int main(void) {
-  xc_interface *xc;
-  xs_daemon_open();
-  xc = xc_interface_open(0, 0, 0);
-  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
-  xc_gnttab_open(NULL, 0);
-  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
-  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
-  xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
-  xc_reserved_device_memory_map(xc, 0, 0, 0, 0, NULL, 0);
-  return 0;
-}
-EOF
-        compile_prog "" "$xen_libs"
-      then
-      xen_ctrl_version=40600
-      xen=enabled
-
-    # Xen 4.5
-    elif
-        cat > $TMPC <<EOF &&
-#include <xenctrl.h>
-#include <xenstore.h>
-#include <stdint.h>
-#include <xen/hvm/hvm_info_table.h>
-#if !defined(HVM_MAX_VCPUS)
-# error HVM_MAX_VCPUS not defined
-#endif
-int main(void) {
-  xc_interface *xc;
-  xs_daemon_open();
-  xc = xc_interface_open(0, 0, 0);
-  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
-  xc_gnttab_open(NULL, 0);
-  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
-  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
-  xc_hvm_create_ioreq_server(xc, 0, 0, NULL);
-  return 0;
-}
-EOF
-        compile_prog "" "$xen_libs"
-      then
-      xen_ctrl_version=40500
-      xen=enabled
-
-    elif
-        cat > $TMPC <<EOF &&
-#include <xenctrl.h>
-#include <xenstore.h>
-#include <stdint.h>
-#include <xen/hvm/hvm_info_table.h>
-#if !defined(HVM_MAX_VCPUS)
-# error HVM_MAX_VCPUS not defined
-#endif
-int main(void) {
-  xc_interface *xc;
-  xs_daemon_open();
-  xc = xc_interface_open(0, 0, 0);
-  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
-  xc_gnttab_open(NULL, 0);
-  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
-  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
-  return 0;
-}
-EOF
-        compile_prog "" "$xen_libs"
-      then
-      xen_ctrl_version=40200
-      xen=enabled
-
-    else
-      if test "$xen" = "enabled" ; then
-        feature_not_found "xen (unsupported version)" \
-                          "Install a supported xen (xen 4.2 or newer)"
-      fi
-      xen=disabled
-    fi
-
-    if test "$xen" = enabled; then
-      if test $xen_ctrl_version -ge 40701  ; then
-        xen_libs="$xen_libs $xen_stable_libs "
-      fi
-    fi
-  fi
-fi
-
-if test "$xen_pci_passthrough" != "disabled"; then
-  if test "$xen" = "enabled" && test "$linux" = "yes"; then
-    xen_pci_passthrough=enabled
-  else
-    if test "$xen_pci_passthrough" = "enabled"; then
-      error_exit "User requested feature Xen PCI Passthrough" \
-          " but this feature requires /sys from Linux"
-    fi
-    xen_pci_passthrough=disabled
-  fi
-fi
-
-##########################################
-# gettext probe
-if test "$gettext" != "false" ; then
-  if has xgettext; then
-    gettext=true
-  else
-    if test "$gettext" = "true" ; then
-      feature_not_found "gettext" "Install xgettext binary"
-    fi
-    gettext=false
-  fi
-fi
-
-##########################################
-# X11 probe
-if $pkg_config --exists "x11"; then
-    have_x11=yes
-    x11_cflags=$($pkg_config --cflags x11)
-    x11_libs=$($pkg_config --libs x11)
-fi
-
-##########################################
-# GTK probe
-
-if test "$gtk" != "no"; then
-    gtkpackage="gtk+-3.0"
-    gtkx11package="gtk+-x11-3.0"
-    gtkversion="3.22.0"
-    if $pkg_config --exists "$gtkpackage >= $gtkversion"; then
-        gtk_cflags=$($pkg_config --cflags $gtkpackage)
-        gtk_libs=$($pkg_config --libs $gtkpackage)
-        gtk_version=$($pkg_config --modversion $gtkpackage)
-        if $pkg_config --exists "$gtkx11package >= $gtkversion"; then
-            need_x11=yes
-            gtk_cflags="$gtk_cflags $x11_cflags"
-            gtk_libs="$gtk_libs $x11_libs"
-        fi
-        gtk="yes"
-    elif test "$gtk" = "yes"; then
-        feature_not_found "gtk" "Install gtk3-devel"
-    else
-        gtk="no"
-    fi
-fi
-
-
-##########################################
-# GNUTLS probe
-
-if test "$gnutls" != "no"; then
-    pass="no"
-    if $pkg_config --exists "gnutls >= 3.1.18"; then
-        gnutls_cflags=$($pkg_config --cflags gnutls)
-        gnutls_libs=$($pkg_config --libs gnutls)
-        # Packaging for the static libraries is not always correct.
-        # At least ubuntu 18.04 ships only shared libraries.
-        write_c_skeleton
-        if compile_prog "" "$gnutls_libs" ; then
-            pass="yes"
-        fi
-    fi
-    if test "$pass" = "no" && test "$gnutls" = "yes"; then
-       feature_not_found "gnutls" "Install gnutls devel >= 3.1.18"
-    else
-        gnutls="$pass"
-    fi
-fi
-
-
-# If user didn't give a --disable/enable-gcrypt flag,
-# then mark as disabled if user requested nettle
-# explicitly
-if test -z "$gcrypt"
-then
-    if test "$nettle" = "yes"
-    then
-        gcrypt="no"
-    fi
-fi
-
-# If user didn't give a --disable/enable-nettle flag,
-# then mark as disabled if user requested gcrypt
-# explicitly
-if test -z "$nettle"
-then
-    if test "$gcrypt" = "yes"
-    then
-        nettle="no"
-    fi
-fi
-
-has_libgcrypt() {
-    if ! has "libgcrypt-config"
-    then
-       return 1
-    fi
-
-    if test -n "$cross_prefix"
-    then
-       host=$(libgcrypt-config --host)
-       if test "$host-" != $cross_prefix
-       then
-           return 1
-       fi
-    fi
-
-    maj=`libgcrypt-config --version | awk -F . '{print $1}'`
-    min=`libgcrypt-config --version | awk -F . '{print $2}'`
-
-    if test $maj != 1 || test $min -lt 5
-    then
-       return 1
-    fi
-
-    return 0
-}
-
-
-if test "$nettle" != "no"; then
-    pass="no"
-    if $pkg_config --exists "nettle >= 2.7.1"; then
-        nettle_cflags=$($pkg_config --cflags nettle)
-        nettle_libs=$($pkg_config --libs nettle)
-        nettle_version=$($pkg_config --modversion nettle)
-        # Link test to make sure the given libraries work (e.g for static).
-        write_c_skeleton
-        if compile_prog "" "$nettle_libs" ; then
-            if test -z "$gcrypt"; then
-               gcrypt="no"
-            fi
-            pass="yes"
-        fi
-    fi
-    if test "$pass" = "yes"
-    then
-        cat > $TMPC << EOF
-#include <nettle/xts.h>
-int main(void) {
-  return 0;
-}
-EOF
-        if compile_prog "$nettle_cflags" "$nettle_libs" ; then
-            nettle_xts=yes
-            qemu_private_xts=no
-        fi
-    fi
-    if test "$pass" = "no" && test "$nettle" = "yes"; then
-        feature_not_found "nettle" "Install nettle devel >= 2.7.1"
-    else
-        nettle="$pass"
-    fi
-fi
-
-if test "$gcrypt" != "no"; then
-    pass="no"
-    if has_libgcrypt; then
-        gcrypt_cflags=$(libgcrypt-config --cflags)
-        gcrypt_libs=$(libgcrypt-config --libs)
-        # Debian has removed -lgpg-error from libgcrypt-config
-        # as it "spreads unnecessary dependencies" which in
-        # turn breaks static builds...
-        if test "$static" = "yes"
-        then
-            gcrypt_libs="$gcrypt_libs -lgpg-error"
-        fi
-
-        # Link test to make sure the given libraries work (e.g for static).
-        write_c_skeleton
-        if compile_prog "" "$gcrypt_libs" ; then
-            pass="yes"
-        fi
-    fi
-    if test "$pass" = "yes"; then
-        gcrypt="yes"
-        cat > $TMPC << EOF
-#include <gcrypt.h>
-int main(void) {
-  gcry_mac_hd_t handle;
-  gcry_mac_open(&handle, GCRY_MAC_HMAC_MD5,
-                GCRY_MAC_FLAG_SECURE, NULL);
-  return 0;
-}
-EOF
-        if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then
-            gcrypt_hmac=yes
-        fi
-        cat > $TMPC << EOF
-#include <gcrypt.h>
-int main(void) {
-  gcry_cipher_hd_t handle;
-  gcry_cipher_open(&handle, GCRY_CIPHER_AES, GCRY_CIPHER_MODE_XTS, 0);
-  return 0;
-}
-EOF
-        if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then
-            gcrypt_xts=yes
-            qemu_private_xts=no
-        fi
-    elif test "$gcrypt" = "yes"; then
-        feature_not_found "gcrypt" "Install gcrypt devel >= 1.5.0"
-    else
-        gcrypt="no"
-    fi
-fi
-
-
-if test "$gcrypt" = "yes" && test "$nettle" = "yes"
-then
-    error_exit "Only one of gcrypt & nettle can be enabled"
-fi
-
-##########################################
-# libtasn1 - only for the TLS creds/session test suite
-
-tasn1=yes
-tasn1_cflags=""
-tasn1_libs=""
-if $pkg_config --exists "libtasn1"; then
-    tasn1_cflags=$($pkg_config --cflags libtasn1)
-    tasn1_libs=$($pkg_config --libs libtasn1)
-else
-    tasn1=no
-fi
-
-
-##########################################
-# PAM probe
-
-if test "$auth_pam" != "no"; then
-    cat > $TMPC <<EOF
-#include <security/pam_appl.h>
-#include <stdio.h>
-int main(void) {
-   const char *service_name = "qemu";
-   const char *user = "frank";
-   const struct pam_conv pam_conv = { 0 };
-   pam_handle_t *pamh = NULL;
-   pam_start(service_name, user, &pam_conv, &pamh);
-   return 0;
-}
-EOF
-    if compile_prog "" "-lpam" ; then
-        auth_pam=yes
-    else
-        if test "$auth_pam" = "yes"; then
-            feature_not_found "PAM" "Install PAM development package"
-        else
-            auth_pam=no
-        fi
-    fi
-fi
-
-##########################################
-# getifaddrs (for tests/test-io-channel-socket )
-
-have_ifaddrs_h=yes
-if ! check_include "ifaddrs.h" ; then
-  have_ifaddrs_h=no
-fi
-
-#########################################
-# libdrm check
-have_drm_h=no
-if check_include "libdrm/drm.h" ; then
-    have_drm_h=yes
-fi
-
-#########################################
-# sys/signal.h check
-have_sys_signal_h=no
-if check_include "sys/signal.h" ; then
-  have_sys_signal_h=yes
-fi
-
-##########################################
-# VTE probe
-
-if test "$vte" != "no"; then
-    vteminversion="0.32.0"
-    if $pkg_config --exists "vte-2.91"; then
-      vtepackage="vte-2.91"
-    else
-      vtepackage="vte-2.90"
-    fi
-    if $pkg_config --exists "$vtepackage >= $vteminversion"; then
-        vte_cflags=$($pkg_config --cflags $vtepackage)
-        vte_libs=$($pkg_config --libs $vtepackage)
-        vteversion=$($pkg_config --modversion $vtepackage)
-        vte="yes"
-    elif test "$vte" = "yes"; then
-        feature_not_found "vte" "Install libvte-2.90/2.91 devel"
-    else
-        vte="no"
-    fi
-fi
-
-##########################################
-# RDMA needs OpenFabrics libraries
-if test "$rdma" != "no" ; then
-  cat > $TMPC <<EOF
-#include <rdma/rdma_cma.h>
-int main(void) { return 0; }
-EOF
-  rdma_libs="-lrdmacm -libverbs -libumad"
-  if compile_prog "" "$rdma_libs" ; then
-    rdma="yes"
-  else
-    if test "$rdma" = "yes" ; then
-        error_exit \
-            " OpenFabrics librdmacm/libibverbs/libibumad not present." \
-            " Your options:" \
-            "  (1) Fast: Install infiniband packages (devel) from your distro." \
-            "  (2) Cleanest: Install libraries from www.openfabrics.org" \
-            "  (3) Also: Install softiwarp if you don't have RDMA hardware"
-    fi
-    rdma="no"
-  fi
-fi
-
-##########################################
-# PVRDMA detection
-
-cat > $TMPC <<EOF &&
-#include <sys/mman.h>
-
-int
-main(void)
-{
-    char buf = 0;
-    void *addr = &buf;
-    addr = mremap(addr, 0, 1, MREMAP_MAYMOVE | MREMAP_FIXED);
-
-    return 0;
-}
-EOF
-
-if test "$rdma" = "yes" ; then
-    case "$pvrdma" in
-    "")
-        if compile_prog "" ""; then
-            pvrdma="yes"
-        else
-            pvrdma="no"
-        fi
-        ;;
-    "yes")
-        if ! compile_prog "" ""; then
-            error_exit "PVRDMA is not supported since mremap is not implemented"
-        fi
-        pvrdma="yes"
-        ;;
-    "no")
-        pvrdma="no"
-        ;;
-    esac
-else
-    if test "$pvrdma" = "yes" ; then
-        error_exit "PVRDMA requires rdma suppport"
-    fi
-    pvrdma="no"
-fi
-
-# Let's see if enhanced reg_mr is supported
-if test "$pvrdma" = "yes" ; then
-
-cat > $TMPC <<EOF &&
-#include <infiniband/verbs.h>
-
-int
-main(void)
-{
-    struct ibv_mr *mr;
-    struct ibv_pd *pd = NULL;
-    size_t length = 10;
-    uint64_t iova = 0;
-    int access = 0;
-    void *addr = NULL;
-
-    mr = ibv_reg_mr_iova(pd, addr, length, iova, access);
-
-    ibv_dereg_mr(mr);
-
-    return 0;
-}
-EOF
-    if ! compile_prog "" "-libverbs"; then
-        QEMU_CFLAGS="$QEMU_CFLAGS -DLEGACY_RDMA_REG_MR"
-    fi
-fi
-
-##########################################
-# xfsctl() probe, used for file-posix.c
-if test "$xfs" != "no" ; then
-  cat > $TMPC << EOF
-#include <stddef.h>  /* NULL */
-#include <xfs/xfs.h>
-int main(void)
-{
-    xfsctl(NULL, 0, 0, NULL);
-    return 0;
-}
-EOF
-  if compile_prog "" "" ; then
-    xfs="yes"
-  else
-    if test "$xfs" = "yes" ; then
-      feature_not_found "xfs" "Install xfsprogs/xfslibs devel"
-    fi
-    xfs=no
-  fi
-fi
-
-##########################################
-# vde libraries probe
-if test "$vde" != "no" ; then
-  vde_libs="-lvdeplug"
-  cat > $TMPC << EOF
-#include <libvdeplug.h>
-int main(void)
-{
-    struct vde_open_args a = {0, 0, 0};
-    char s[] = "";
-    vde_open(s, s, &a);
-    return 0;
-}
-EOF
-  if compile_prog "" "$vde_libs" ; then
-    vde=yes
-  else
-    if test "$vde" = "yes" ; then
-      feature_not_found "vde" "Install vde (Virtual Distributed Ethernet) devel"
-    fi
-    vde=no
-  fi
-fi
-
-##########################################
-# netmap support probe
-# Apart from looking for netmap headers, we make sure that the host API version
-# supports the netmap backend (>=11). The upper bound (15) is meant to simulate
-# a minor/major version number. Minor new features will be marked with values up
-# to 15, and if something happens that requires a change to the backend we will
-# move above 15, submit the backend fixes and modify this two bounds.
-if test "$netmap" != "no" ; then
-  cat > $TMPC << EOF
-#include <inttypes.h>
-#include <net/if.h>
-#include <net/netmap.h>
-#include <net/netmap_user.h>
-#if (NETMAP_API < 11) || (NETMAP_API > 15)
-#error
-#endif
-int main(void) { return 0; }
-EOF
-  if compile_prog "" "" ; then
-    netmap=yes
-  else
-    if test "$netmap" = "yes" ; then
-      feature_not_found "netmap"
-    fi
-    netmap=no
-  fi
-fi
-
-##########################################
-# libcap-ng library probe
-if test "$cap_ng" != "no" ; then
-  cap_libs="-lcap-ng"
-  cat > $TMPC << EOF
-#include <cap-ng.h>
-int main(void)
-{
-    capng_capability_to_name(CAPNG_EFFECTIVE);
-    return 0;
-}
-EOF
-  if compile_prog "" "$cap_libs" ; then
-    cap_ng=yes
-  else
-    if test "$cap_ng" = "yes" ; then
-      feature_not_found "cap_ng" "Install libcap-ng devel"
-    fi
-    cap_ng=no
-  fi
-fi
-
-##########################################
-# Sound support libraries probe
-
-audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/,/ /g')
-for drv in $audio_drv_list; do
-    case $drv in
-    alsa | try-alsa)
-    if $pkg_config alsa --exists; then
-        alsa_libs=$($pkg_config alsa --libs)
-        alsa_cflags=$($pkg_config alsa --cflags)
-        alsa=yes
-        if test "$drv" = "try-alsa"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-alsa/alsa/')
-        fi
-    else
-        if test "$drv" = "try-alsa"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-alsa//')
-        else
-            error_exit "$drv check failed" \
-                "Make sure to have the $drv libs and headers installed."
-        fi
-    fi
-    ;;
-
-    pa | try-pa)
-    if $pkg_config libpulse --exists; then
-        libpulse=yes
-        pulse_libs=$($pkg_config libpulse --libs)
-        pulse_cflags=$($pkg_config libpulse --cflags)
-        if test "$drv" = "try-pa"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-pa/pa/')
-        fi
-    else
-        if test "$drv" = "try-pa"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-pa//')
-        else
-            error_exit "$drv check failed" \
-                "Make sure to have the $drv libs and headers installed."
-        fi
-    fi
-    ;;
-
-    sdl)
-    if test "$sdl" = "no"; then
-        error_exit "sdl not found or disabled, can not use sdl audio driver"
-    fi
-    ;;
-
-    try-sdl)
-    if test "$sdl" = "no"; then
-        audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-sdl//')
-    else
-        audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-sdl/sdl/')
-    fi
-    ;;
-
-    coreaudio)
-      coreaudio_libs="-framework CoreAudio"
-    ;;
-
-    dsound)
-      dsound_libs="-lole32 -ldxguid"
-      audio_win_int="yes"
-    ;;
-
-    oss)
-      oss_libs="$oss_lib"
-    ;;
-
-    jack | try-jack)
-    if $pkg_config jack --exists; then
-        libjack=yes
-        jack_libs=$($pkg_config jack --libs)
-        if test "$drv" = "try-jack"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-jack/jack/')
-        fi
-    else
-        if test "$drv" = "try-jack"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-jack//')
-        else
-            error_exit "$drv check failed" \
-                "Make sure to have the $drv libs and headers installed."
-        fi
-    fi
-    ;;
-
-    *)
-    echo "$audio_possible_drivers" | grep -q "\<$drv\>" || {
-        error_exit "Unknown driver '$drv' selected" \
-            "Possible drivers are: $audio_possible_drivers"
-    }
-    ;;
-    esac
-done
-
-##########################################
-# BrlAPI probe
-
-if test "$brlapi" != "no" ; then
-  brlapi_libs="-lbrlapi"
-  cat > $TMPC << EOF
-#include <brlapi.h>
-#include <stddef.h>
-int main( void ) { return brlapi__openConnection (NULL, NULL, NULL); }
-EOF
-  if compile_prog "" "$brlapi_libs" ; then
-    brlapi=yes
-  else
-    if test "$brlapi" = "yes" ; then
-      feature_not_found "brlapi" "Install brlapi devel"
-    fi
-    brlapi=no
-  fi
-fi
-
-##########################################
-# curl probe
-if test "$curl" != "no" ; then
-  if $pkg_config libcurl --exists; then
-    curlconfig="$pkg_config libcurl"
-  else
-    curlconfig=curl-config
-  fi
-  cat > $TMPC << EOF
-#include <curl/curl.h>
-int main(void) { curl_easy_init(); curl_multi_setopt(0, 0, 0); return 0; }
-EOF
-  curl_cflags=$($curlconfig --cflags 2>/dev/null)
-  curl_libs=$($curlconfig --libs 2>/dev/null)
-  if compile_prog "$curl_cflags" "$curl_libs" ; then
-    curl=yes
-  else
-    if test "$curl" = "yes" ; then
-      feature_not_found "curl" "Install libcurl devel"
-    fi
-    curl=no
-  fi
-fi # test "$curl"
-
-##########################################
-# glib support probe
-
-glib_req_ver=2.48
-glib_modules=gthread-2.0
-if test "$modules" = yes; then
-    glib_modules="$glib_modules gmodule-export-2.0"
-fi
-if test "$plugins" = yes; then
-    glib_modules="$glib_modules gmodule-2.0"
-fi
-
-for i in $glib_modules; do
-    if $pkg_config --atleast-version=$glib_req_ver $i; then
-        glib_cflags=$($pkg_config --cflags $i)
-        glib_libs=$($pkg_config --libs $i)
-    else
-        error_exit "glib-$glib_req_ver $i is required to compile QEMU"
-    fi
-done
-
-# Check whether glib has gslice, which we have to avoid for correctness.
-# TODO: remove this check and the corresponding workaround (qtree) when
-# the minimum supported glib is >= $glib_dropped_gslice_version.
-glib_dropped_gslice_version=2.75.3
-for i in $glib_modules; do
-    if ! $pkg_config --atleast-version=$glib_dropped_gslice_version $i; then
-        glib_has_gslice="yes"
-       break
-    fi
-done
-
-# This workaround is required due to a bug in pkg-config file for glib as it
-# doesn't define GLIB_STATIC_COMPILATION for pkg-config --static
-
-if test "$static" = yes && test "$mingw32" = yes; then
-    glib_cflags="-DGLIB_STATIC_COMPILATION $glib_cflags"
-fi
-
-if $pkg_config --atleast-version=$glib_req_ver gio-2.0; then
-    gio_cflags=$($pkg_config --cflags gio-2.0)
-    gio_libs=$($pkg_config --libs gio-2.0)
-    gdbus_codegen=$($pkg_config --variable=gdbus_codegen gio-2.0)
-    if [ ! -x "$gdbus_codegen" ]; then
-        gdbus_codegen=
-    fi
-    # Check that the libraries actually work -- Ubuntu 18.04 ships
-    # with pkg-config --static --libs data for gio-2.0 that is missing
-    # -lblkid and will give a link error.
-    cat > $TMPC <<EOF
-#include <gio/gio.h>
-int main(void)
-{
-    g_dbus_proxy_new_sync(0, 0, 0, 0, 0, 0, 0, 0);
-    return 0;
-}
-EOF
-    if compile_prog "$gio_cflags" "$gio_libs" ; then
-        gio=yes
-    else
-        gio=no
-    fi
-else
-    gio=no
-fi
-
-if $pkg_config --atleast-version=$glib_req_ver gio-unix-2.0; then
-    gio_cflags="$gio_cflags $($pkg_config --cflags gio-unix-2.0)"
-    gio_libs="$gio_libs $($pkg_config --libs gio-unix-2.0)"
-fi
-
-# Sanity check that the current size_t matches the
-# size that glib thinks it should be. This catches
-# problems on multi-arch where people try to build
-# 32-bit QEMU while pointing at 64-bit glib headers
-cat > $TMPC <<EOF
-#include <glib.h>
-#include <unistd.h>
-
-#define QEMU_BUILD_BUG_ON(x) \
-  typedef char qemu_build_bug_on[(x)?-1:1] __attribute__((unused));
-
-int main(void) {
-   QEMU_BUILD_BUG_ON(sizeof(size_t) != GLIB_SIZEOF_SIZE_T);
-   return 0;
-}
-EOF
-
-if ! compile_prog "$glib_cflags" "$glib_libs" ; then
-    error_exit "sizeof(size_t) doesn't match GLIB_SIZEOF_SIZE_T."\
-               "You probably need to set PKG_CONFIG_LIBDIR"\
-              "to point to the right pkg-config files for your"\
-              "build target"
-fi
-
-# Silence clang 3.5.0 warnings about glib attribute __alloc_size__ usage
-cat > $TMPC << EOF
-#include <glib.h>
-int main(void) { return 0; }
-EOF
-if ! compile_prog "$glib_cflags -Werror" "$glib_libs" ; then
-    if cc_has_warning_flag "-Wno-unknown-attributes"; then
-        glib_cflags="-Wno-unknown-attributes $glib_cflags"
-        CONFIGURE_CFLAGS="-Wno-unknown-attributes $CONFIGURE_CFLAGS"
-    fi
-fi
-
-# Silence clang warnings triggered by glib < 2.57.2
-cat > $TMPC << EOF
-#include <glib.h>
-typedef struct Foo {
-    int i;
-} Foo;
-static void foo_free(Foo *f)
-{
-    g_free(f);
-}
-G_DEFINE_AUTOPTR_CLEANUP_FUNC(Foo, foo_free);
-int main(void) { return 0; }
-EOF
-if ! compile_prog "$glib_cflags -Werror" "$glib_libs" ; then
-    if cc_has_warning_flag "-Wno-unused-function"; then
-        glib_cflags="$glib_cflags -Wno-unused-function"
-        CONFIGURE_CFLAGS="$CONFIGURE_CFLAGS -Wno-unused-function"
-    fi
-fi
-
-##########################################
-# SHA command probe for modules
-if test "$modules" = yes; then
-    shacmd_probe="sha1sum sha1 shasum"
-    for c in $shacmd_probe; do
-        if has $c; then
-            shacmd="$c"
-            break
-        fi
-    done
-    if test "$shacmd" = ""; then
-        error_exit "one of the checksum commands is required to enable modules: $shacmd_probe"
-    fi
-fi
-
-##########################################
-# pthread probe
-PTHREADLIBS_LIST="-pthread -lpthread -lpthreadGC2"
-
-pthread=no
-cat > $TMPC << EOF
-#include <pthread.h>
-static void *f(void *p) { return NULL; }
-int main(void) {
-  pthread_t thread;
-  pthread_create(&thread, 0, f, 0);
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  pthread=yes
-else
-  for pthread_lib in $PTHREADLIBS_LIST; do
-    if compile_prog "" "$pthread_lib" ; then
-      pthread=yes
-      found=no
-      for lib_entry in $LIBS; do
-        if test "$lib_entry" = "$pthread_lib"; then
-          found=yes
-          break
-        fi
-      done
-      if test "$found" = "no"; then
-        LIBS="$pthread_lib $LIBS"
-      fi
-      break
-    fi
-  done
-fi
-
-if test "$mingw32" != yes && test "$pthread" = no; then
-  error_exit "pthread check failed" \
-      "Make sure to have the pthread libs and headers installed."
-fi
-
-# check for pthread_setname_np with thread id
-pthread_setname_np_w_tid=no
-cat > $TMPC << EOF
-#include <pthread.h>
-
-static void *f(void *p) { return NULL; }
-int main(void)
-{
-    pthread_t thread;
-    pthread_create(&thread, 0, f, 0);
-    pthread_setname_np(thread, "QEMU");
-    return 0;
-}
-EOF
-if compile_prog "" "$pthread_lib" ; then
-  pthread_setname_np_w_tid=yes
-fi
-
-# check for pthread_setname_np without thread id
-pthread_setname_np_wo_tid=no
-cat > $TMPC << EOF
-#include <pthread.h>
-
-static void *f(void *p) { pthread_setname_np("QEMU"); return NULL; }
-int main(void)
-{
-    pthread_t thread;
-    pthread_create(&thread, 0, f, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "$pthread_lib" ; then
-  pthread_setname_np_wo_tid=yes
-fi
-
-##########################################
-# rbd probe
-if test "$rbd" != "no" ; then
-  cat > $TMPC <<EOF
-#include <stdio.h>
-#include <rbd/librbd.h>
-int main(void) {
-    rados_t cluster;
-    rados_create(&cluster, NULL);
-    return 0;
-}
-EOF
-  rbd_libs="-lrbd -lrados"
-  if compile_prog "" "$rbd_libs" ; then
-    rbd=yes
-  else
-    if test "$rbd" = "yes" ; then
-      feature_not_found "rados block device" "Install librbd/ceph devel"
-    fi
-    rbd=no
-  fi
-fi
-
-##########################################
-# libssh probe
-if test "$libssh" != "no" ; then
-  if $pkg_config --exists libssh; then
-    libssh_cflags=$($pkg_config libssh --cflags)
-    libssh_libs=$($pkg_config libssh --libs)
-    libssh=yes
-  else
-    if test "$libssh" = "yes" ; then
-      error_exit "libssh required for --enable-libssh"
-    fi
-    libssh=no
-  fi
-fi
-
-##########################################
-# Check for libssh 0.8
-# This is done like this instead of using the LIBSSH_VERSION_* and
-# SSH_VERSION_* macros because some distributions in the past shipped
-# snapshots of the future 0.8 from Git, and those snapshots did not
-# have updated version numbers (still referring to 0.7.0).
-
-if test "$libssh" = "yes"; then
-  cat > $TMPC <<EOF
-#include <libssh/libssh.h>
-int main(void) { return ssh_get_server_publickey(NULL, NULL); }
-EOF
-  if compile_prog "$libssh_cflags" "$libssh_libs"; then
-    libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags"
-  fi
-fi
-
-##########################################
-# linux-aio probe
-
-if test "$linux_aio" != "no" ; then
-  cat > $TMPC <<EOF
-#include <libaio.h>
-#include <sys/eventfd.h>
-#include <stddef.h>
-int main(void) { io_setup(0, NULL); io_set_eventfd(NULL, 0); eventfd(0, 0); return 0; }
-EOF
-  if compile_prog "" "-laio" ; then
-    linux_aio=yes
-  else
-    if test "$linux_aio" = "yes" ; then
-      feature_not_found "linux AIO" "Install libaio devel"
-    fi
-    linux_aio=no
-  fi
-fi
-##########################################
-# linux-io-uring probe
-
-if test "$linux_io_uring" != "no" ; then
-  if $pkg_config liburing; then
-    linux_io_uring_cflags=$($pkg_config --cflags liburing)
-    linux_io_uring_libs=$($pkg_config --libs liburing)
-    linux_io_uring=yes
-  else
-    if test "$linux_io_uring" = "yes" ; then
-      feature_not_found "linux io_uring" "Install liburing devel"
-    fi
-    linux_io_uring=no
-  fi
-fi
-
-##########################################
-# TPM emulation is only on POSIX
-
-if test "$tpm" = ""; then
-  if test "$mingw32" = "yes"; then
-    tpm=no
-  else
-    tpm=yes
-  fi
-elif test "$tpm" = "yes"; then
-  if test "$mingw32" = "yes" ; then
-    error_exit "TPM emulation only available on POSIX systems"
-  fi
-fi
-
-##########################################
-# attr probe
-
-libattr_libs=
-if test "$attr" != "no" ; then
-  cat > $TMPC <<EOF
-#include <stdio.h>
-#include <sys/types.h>
-#ifdef CONFIG_LIBATTR
-#include <attr/xattr.h>
-#else
-#include <sys/xattr.h>
-#endif
-int main(void) { getxattr(NULL, NULL, NULL, 0); setxattr(NULL, NULL, NULL, 0, 0); return 0; }
-EOF
-  if compile_prog "" "" ; then
-    attr=yes
-  # Older distros have <attr/xattr.h>, and need -lattr:
-  elif compile_prog "-DCONFIG_LIBATTR" "-lattr" ; then
-    attr=yes
-    libattr_libs="-lattr"
-    libattr=yes
-  else
-    if test "$attr" = "yes" ; then
-      feature_not_found "ATTR" "Install libc6 or libattr devel"
-    fi
-    attr=no
-  fi
-fi
-
-##########################################
-# iovec probe
-cat > $TMPC <<EOF
-#include <sys/types.h>
-#include <sys/uio.h>
-#include <unistd.h>
-int main(void) { return sizeof(struct iovec); }
-EOF
-iovec=no
-if compile_prog "" "" ; then
-  iovec=yes
-fi
-
-##########################################
-# preadv probe
-cat > $TMPC <<EOF
-#include <sys/types.h>
-#include <sys/uio.h>
-#include <unistd.h>
-int main(void) { return preadv(0, 0, 0, 0); }
-EOF
-preadv=no
-if compile_prog "" "" ; then
-  preadv=yes
-fi
-
-##########################################
-# fdt probe
-
-case "$fdt" in
-  auto | enabled | internal)
-    # Simpler to always update submodule, even if not needed.
-    if test -e "${source_path}/.git" && test $git_update = 'yes' ; then
-      git_submodules="${git_submodules} dtc"
-    fi
-    ;;
-esac
-
-##########################################
-# opengl probe (for sdl2, gtk, milkymist-tmu2)
-
-gbm="no"
-if $pkg_config gbm; then
-    gbm_cflags="$($pkg_config --cflags gbm)"
-    gbm_libs="$($pkg_config --libs gbm)"
-    gbm="yes"
-fi
-
-if test "$opengl" != "no" ; then
-  opengl_pkgs="epoxy gbm"
-  if $pkg_config $opengl_pkgs; then
-    opengl_cflags="$($pkg_config --cflags $opengl_pkgs)"
-    opengl_libs="$($pkg_config --libs $opengl_pkgs)"
-    opengl=yes
-    if test "$gtk" = "yes" && $pkg_config --exists "$gtkpackage >= 3.16"; then
-        gtk_gl="yes"
-    fi
-  else
-    if test "$opengl" = "yes" ; then
-      feature_not_found "opengl" "Please install opengl (mesa) devel pkgs: $opengl_pkgs"
-    fi
-    opengl_cflags=""
-    opengl_libs=""
-    opengl=no
-  fi
-fi
-
-if test "$opengl" = "yes"; then
-  cat > $TMPC << EOF
-#include <epoxy/egl.h>
-#ifndef EGL_MESA_image_dma_buf_export
-# error mesa/epoxy lacks support for dmabufs (mesa 10.6+)
-#endif
-int main(void) { return 0; }
-EOF
-  if compile_prog "" "" ; then
-    opengl_dmabuf=yes
-  fi
-fi
-
-if test "$opengl" = "yes" && test "$have_x11" = "yes"; then
-  for target in $target_list; do
-    case $target in
-      lm32-softmmu) # milkymist-tmu2 requires X11 and OpenGL
-        need_x11=yes
-      ;;
-    esac
-  done
-fi
-
-##########################################
-# libxml2 probe
-if test "$libxml2" != "no" ; then
-    if $pkg_config --exists libxml-2.0; then
-        libxml2="yes"
-        libxml2_cflags=$($pkg_config --cflags libxml-2.0)
-        libxml2_libs=$($pkg_config --libs libxml-2.0)
-    else
-        if test "$libxml2" = "yes"; then
-            feature_not_found "libxml2" "Install libxml2 devel"
-        fi
-        libxml2="no"
-    fi
-fi
-
-##########################################
-# glusterfs probe
-if test "$glusterfs" != "no" ; then
-  if $pkg_config --atleast-version=3 glusterfs-api; then
-    glusterfs="yes"
-    glusterfs_cflags=$($pkg_config --cflags glusterfs-api)
-    glusterfs_libs=$($pkg_config --libs glusterfs-api)
-    if $pkg_config --atleast-version=4 glusterfs-api; then
-      glusterfs_xlator_opt="yes"
-    fi
-    if $pkg_config --atleast-version=5 glusterfs-api; then
-      glusterfs_discard="yes"
-    fi
-    if $pkg_config --atleast-version=6 glusterfs-api; then
-      glusterfs_fallocate="yes"
-      glusterfs_zerofill="yes"
-    fi
-    cat > $TMPC << EOF
-#include <glusterfs/api/glfs.h>
-
-int
-main(void)
-{
-       /* new glfs_ftruncate() passes two additional args */
-       return glfs_ftruncate(NULL, 0, NULL, NULL);
-}
-EOF
-    if compile_prog "$glusterfs_cflags" "$glusterfs_libs" ; then
-      glusterfs_ftruncate_has_stat="yes"
-    fi
-    cat > $TMPC << EOF
-#include <glusterfs/api/glfs.h>
-
-/* new glfs_io_cbk() passes two additional glfs_stat structs */
-static void
-glusterfs_iocb(glfs_fd_t *fd, ssize_t ret, struct glfs_stat *prestat, struct glfs_stat *poststat, void *data)
-{}
-
-int
-main(void)
-{
-       glfs_io_cbk iocb = &glusterfs_iocb;
-       iocb(NULL, 0 , NULL, NULL, NULL);
-       return 0;
-}
-EOF
-    if compile_prog "$glusterfs_cflags" "$glusterfs_libs" ; then
-      glusterfs_iocb_has_stat="yes"
-    fi
-  else
-    if test "$glusterfs" = "yes" ; then
-      feature_not_found "GlusterFS backend support" \
-          "Install glusterfs-api devel >= 3"
-    fi
-    glusterfs="no"
-  fi
-fi
-
-# Check for inotify functions when we are building linux-user
-# emulator.  This is done because older glibc versions don't
-# have syscall stubs for these implemented.  In that case we
-# don't provide them even if kernel supports them.
-#
-inotify=no
-cat > $TMPC << EOF
-#include <sys/inotify.h>
-
-int
-main(void)
-{
-       /* try to start inotify */
-       return inotify_init();
-}
-EOF
-if compile_prog "" "" ; then
-  inotify=yes
-fi
-
-inotify1=no
-cat > $TMPC << EOF
-#include <sys/inotify.h>
-
-int
-main(void)
-{
-    /* try to start inotify */
-    return inotify_init1(0);
-}
-EOF
-if compile_prog "" "" ; then
-  inotify1=yes
-fi
-
-# check if pipe2 is there
-pipe2=no
-cat > $TMPC << EOF
-#include <unistd.h>
-#include <fcntl.h>
-
-int main(void)
-{
-    int pipefd[2];
-    return pipe2(pipefd, O_CLOEXEC);
-}
-EOF
-if compile_prog "" "" ; then
-  pipe2=yes
-fi
-
-# check if accept4 is there
-accept4=no
-cat > $TMPC << EOF
-#include <sys/socket.h>
-#include <stddef.h>
-
-int main(void)
-{
-    accept4(0, NULL, NULL, SOCK_CLOEXEC);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  accept4=yes
-fi
-
-# check if tee/splice is there. vmsplice was added same time.
-splice=no
-cat > $TMPC << EOF
-#include <unistd.h>
-#include <fcntl.h>
-#include <limits.h>
-
-int main(void)
-{
-    int len, fd = 0;
-    len = tee(STDIN_FILENO, STDOUT_FILENO, INT_MAX, SPLICE_F_NONBLOCK);
-    splice(STDIN_FILENO, NULL, fd, NULL, len, SPLICE_F_MOVE);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  splice=yes
-fi
-
-##########################################
-# libnuma probe
-
-if test "$numa" != "no" ; then
-  cat > $TMPC << EOF
-#include <numa.h>
-int main(void) { return numa_available(); }
-EOF
-
-  if compile_prog "" "-lnuma" ; then
-    numa=yes
-    numa_libs="-lnuma"
-  else
-    if test "$numa" = "yes" ; then
-      feature_not_found "numa" "install numactl devel"
-    fi
-    numa=no
-  fi
-fi
-
-malloc=system
-if test "$tcmalloc" = "yes" && test "$jemalloc" = "yes" ; then
-    echo "ERROR: tcmalloc && jemalloc can't be used at the same time"
-    exit 1
-elif test "$tcmalloc" = "yes" ; then
-    malloc=tcmalloc
-elif test "$jemalloc" = "yes" ; then
-    malloc=jemalloc
-fi
-
-##########################################
-# signalfd probe
-signalfd="no"
-cat > $TMPC << EOF
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <signal.h>
-int main(void) { return syscall(SYS_signalfd, -1, NULL, _NSIG / 8); }
-EOF
-
-if compile_prog "" "" ; then
-  signalfd=yes
-fi
-
-# check if optreset global is declared by <getopt.h>
-optreset="no"
-cat > $TMPC << EOF
-#include <getopt.h>
-int main(void) { return optreset; }
-EOF
-
-if compile_prog "" "" ; then
-  optreset=yes
-fi
-
-# check if eventfd is supported
-eventfd=no
-cat > $TMPC << EOF
-#include <sys/eventfd.h>
-
-int main(void)
-{
-    return eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-}
-EOF
-if compile_prog "" "" ; then
-  eventfd=yes
-fi
-
-# check if memfd is supported
-memfd=no
-cat > $TMPC << EOF
-#include <sys/mman.h>
-
-int main(void)
-{
-    return memfd_create("foo", MFD_ALLOW_SEALING);
-}
-EOF
-if compile_prog "" "" ; then
-  memfd=yes
-fi
-
-# check for usbfs
-have_usbfs=no
-if test "$linux_user" = "yes"; then
-  cat > $TMPC << EOF
-#include <linux/usbdevice_fs.h>
-
-#ifndef USBDEVFS_GET_CAPABILITIES
-#error "USBDEVFS_GET_CAPABILITIES undefined"
-#endif
-
-#ifndef USBDEVFS_DISCONNECT_CLAIM
-#error "USBDEVFS_DISCONNECT_CLAIM undefined"
-#endif
-
-int main(void)
-{
-    return 0;
-}
-EOF
-  if compile_prog "" ""; then
-    have_usbfs=yes
-  fi
-fi
-
-# check for fallocate
-fallocate=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-
-int main(void)
-{
-    fallocate(0, 0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  fallocate=yes
-fi
-
-# check for fallocate hole punching
-fallocate_punch_hole=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-#include <linux/falloc.h>
-
-int main(void)
-{
-    fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  fallocate_punch_hole=yes
-fi
-
-# check that fallocate supports range zeroing inside the file
-fallocate_zero_range=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-#include <linux/falloc.h>
-
-int main(void)
-{
-    fallocate(0, FALLOC_FL_ZERO_RANGE, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  fallocate_zero_range=yes
-fi
-
-# check for posix_fallocate
-posix_fallocate=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-
-int main(void)
-{
-    posix_fallocate(0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    posix_fallocate=yes
-fi
-
-# check for sync_file_range
-sync_file_range=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-
-int main(void)
-{
-    sync_file_range(0, 0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  sync_file_range=yes
-fi
-
-# check for linux/fiemap.h and FS_IOC_FIEMAP
-fiemap=no
-cat > $TMPC << EOF
-#include <sys/ioctl.h>
-#include <linux/fs.h>
-#include <linux/fiemap.h>
-
-int main(void)
-{
-    ioctl(0, FS_IOC_FIEMAP, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  fiemap=yes
-fi
-
-# check for dup3
-dup3=no
-cat > $TMPC << EOF
-#include <unistd.h>
-
-int main(void)
-{
-    dup3(0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  dup3=yes
-fi
-
-# check for ppoll support
-ppoll=no
-cat > $TMPC << EOF
-#include <poll.h>
-
-int main(void)
-{
-    struct pollfd pfd = { .fd = 0, .events = 0, .revents = 0 };
-    ppoll(&pfd, 1, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  ppoll=yes
-fi
-
-# check for prctl(PR_SET_TIMERSLACK , ... ) support
-prctl_pr_set_timerslack=no
-cat > $TMPC << EOF
-#include <sys/prctl.h>
-
-int main(void)
-{
-    prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  prctl_pr_set_timerslack=yes
-fi
-
-# check for epoll support
-epoll=no
-cat > $TMPC << EOF
-#include <sys/epoll.h>
-
-int main(void)
-{
-    epoll_create(0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  epoll=yes
-fi
-
-# epoll_create1 is a later addition
-# so we must check separately for its presence
-epoll_create1=no
-cat > $TMPC << EOF
-#include <sys/epoll.h>
-
-int main(void)
-{
-    /* Note that we use epoll_create1 as a value, not as
-     * a function being called. This is necessary so that on
-     * old SPARC glibc versions where the function was present in
-     * the library but not declared in the header file we will
-     * fail the configure check. (Otherwise we will get a compiler
-     * warning but not an error, and will proceed to fail the
-     * qemu compile where we compile with -Werror.)
-     */
-    return (int)(uintptr_t)&epoll_create1;
-}
-EOF
-if compile_prog "" "" ; then
-  epoll_create1=yes
-fi
-
-# check for sendfile support
-sendfile=no
-cat > $TMPC << EOF
-#include <sys/sendfile.h>
-
-int main(void)
-{
-    return sendfile(0, 0, 0, 0);
-}
-EOF
-if compile_prog "" "" ; then
-  sendfile=yes
-fi
-
-# check for timerfd support (glibc 2.8 and newer)
-timerfd=no
-cat > $TMPC << EOF
-#include <sys/timerfd.h>
-
-int main(void)
-{
-    return(timerfd_create(CLOCK_REALTIME, 0));
-}
-EOF
-if compile_prog "" "" ; then
-  timerfd=yes
-fi
-
-# check for setns and unshare support
-setns=no
-cat > $TMPC << EOF
-#include <sched.h>
-
-int main(void)
-{
-    int ret;
-    ret = setns(0, 0);
-    ret = unshare(0);
-    return ret;
-}
-EOF
-if compile_prog "" "" ; then
-  setns=yes
-fi
-
-# clock_adjtime probe
-clock_adjtime=no
-cat > $TMPC <<EOF
-#include <time.h>
-
-int main(void)
-{
-    return clock_adjtime(0, 0);
-}
-EOF
-clock_adjtime=no
-if compile_prog "" "" ; then
-  clock_adjtime=yes
-fi
-
-# syncfs probe
-syncfs=no
-cat > $TMPC <<EOF
-#include <unistd.h>
-
-int main(void)
-{
-    return syncfs(0);
-}
-EOF
-syncfs=no
-if compile_prog "" "" ; then
-  syncfs=yes
-fi
-
-# check for kcov support (kernel must be 4.4+, compiled with certain options)
-kcov=no
-if check_include sys/kcov.h ; then
-    kcov=yes
-fi
-
-# check for btrfs filesystem support (kernel must be 3.9+)
-btrfs=no
-if check_include linux/btrfs.h ; then
-    btrfs=yes
-fi
-
-# Search for bswap_32 function
-byteswap_h=no
-cat > $TMPC << EOF
-#include <byteswap.h>
-int main(void) { return bswap_32(0); }
-EOF
-if compile_prog "" "" ; then
-  byteswap_h=yes
-fi
-
-# Search for bswap32 function
-bswap_h=no
-cat > $TMPC << EOF
-#include <sys/endian.h>
-#include <sys/types.h>
-#include <machine/bswap.h>
-int main(void) { return bswap32(0); }
-EOF
-if compile_prog "" "" ; then
-  bswap_h=yes
-fi
-
-##########################################
-# Do we have libiscsi >= 1.9.0
-if test "$libiscsi" != "no" ; then
-  if $pkg_config --atleast-version=1.9.0 libiscsi; then
-    libiscsi="yes"
-    libiscsi_cflags=$($pkg_config --cflags libiscsi)
-    libiscsi_libs=$($pkg_config --libs libiscsi)
-  else
-    if test "$libiscsi" = "yes" ; then
-      feature_not_found "libiscsi" "Install libiscsi >= 1.9.0"
-    fi
-    libiscsi="no"
-  fi
-fi
-
-##########################################
-# Do we need librt
-# uClibc provides 2 versions of clock_gettime(), one with realtime
-# support and one without. This means that the clock_gettime() don't
-# need -lrt. We still need it for timer_create() so we check for this
-# function in addition.
-cat > $TMPC <<EOF
-#include <signal.h>
-#include <time.h>
-int main(void) {
-  timer_create(CLOCK_REALTIME, NULL, NULL);
-  return clock_gettime(CLOCK_REALTIME, NULL);
-}
-EOF
-
-if compile_prog "" "" ; then
-  :
-# we need pthread for static linking. use previous pthread test result
-elif compile_prog "" "$pthread_lib -lrt" ; then
-  LIBS="$LIBS -lrt"
-fi
-
-# Check whether we have openpty() in either libc or libutil
-cat > $TMPC << EOF
-extern int openpty(int *am, int *as, char *name, void *termp, void *winp);
-int main(void) { return openpty(0, 0, 0, 0, 0); }
-EOF
-
-have_openpty="no"
-if compile_prog "" "" ; then
-  have_openpty="yes"
-else
-  if compile_prog "" "-lutil" ; then
-    have_openpty="yes"
-  fi
-fi
-
-##########################################
-# spice probe
-if test "$spice" != "no" ; then
-  cat > $TMPC << EOF
-#include <spice.h>
-int main(void) { spice_server_new(); return 0; }
-EOF
-  spice_cflags=$($pkg_config --cflags spice-protocol spice-server 2>/dev/null)
-  spice_libs=$($pkg_config --libs spice-protocol spice-server 2>/dev/null)
-  if $pkg_config --atleast-version=0.12.5 spice-server && \
-     $pkg_config --atleast-version=0.12.3 spice-protocol && \
-     compile_prog "$spice_cflags" "$spice_libs" ; then
-    spice="yes"
-  else
-    if test "$spice" = "yes" ; then
-      feature_not_found "spice" \
-          "Install spice-server(>=0.12.5) and spice-protocol(>=0.12.3) devel"
-    fi
-    spice="no"
-  fi
-fi
-
-# check for smartcard support
-if test "$smartcard" != "no"; then
-    if $pkg_config --atleast-version=2.5.1 libcacard; then
-        libcacard_cflags=$($pkg_config --cflags libcacard)
-        libcacard_libs=$($pkg_config --libs libcacard)
-        smartcard="yes"
-    else
-        if test "$smartcard" = "yes"; then
-            feature_not_found "smartcard" "Install libcacard devel"
-        fi
-        smartcard="no"
-    fi
-fi
-
-# check for libusb
-if test "$libusb" != "no" ; then
-    if $pkg_config --atleast-version=1.0.13 libusb-1.0; then
-        libusb="yes"
-        libusb_cflags=$($pkg_config --cflags libusb-1.0)
-        libusb_libs=$($pkg_config --libs libusb-1.0)
-    else
-        if test "$libusb" = "yes"; then
-            feature_not_found "libusb" "Install libusb devel >= 1.0.13"
-        fi
-        libusb="no"
-    fi
-fi
-
-# check for usbredirparser for usb network redirection support
-if test "$usb_redir" != "no" ; then
-    if $pkg_config --atleast-version=0.6 libusbredirparser-0.5; then
-        usb_redir="yes"
-        usb_redir_cflags=$($pkg_config --cflags libusbredirparser-0.5)
-        usb_redir_libs=$($pkg_config --libs libusbredirparser-0.5)
-    else
-        if test "$usb_redir" = "yes"; then
-            feature_not_found "usb-redir" "Install usbredir devel"
-        fi
-        usb_redir="no"
-    fi
-fi
-
-##########################################
-# check if we have VSS SDK headers for win
-
-if test "$mingw32" = "yes" && test "$guest_agent" != "no" && \
-        test "$vss_win32_sdk" != "no" ; then
-  case "$vss_win32_sdk" in
-    "")   vss_win32_include="-isystem $source_path" ;;
-    *\ *) # The SDK is installed in "Program Files" by default, but we cannot
-          # handle path with spaces. So we symlink the headers into ".sdk/vss".
-          vss_win32_include="-isystem $source_path/.sdk/vss"
-         symlink "$vss_win32_sdk/inc" "$source_path/.sdk/vss/inc"
-         ;;
-    *)    vss_win32_include="-isystem $vss_win32_sdk"
-  esac
-  cat > $TMPC << EOF
-#define __MIDL_user_allocate_free_DEFINED__
-#include <inc/win2003/vss.h>
-int main(void) { return VSS_CTX_BACKUP; }
-EOF
-  if compile_prog "$vss_win32_include" "" ; then
-    guest_agent_with_vss="yes"
-    QEMU_CFLAGS="$QEMU_CFLAGS $vss_win32_include"
-    libs_qga="-lole32 -loleaut32 -lshlwapi -lstdc++ -Wl,--enable-stdcall-fixup $libs_qga"
-    qga_vss_provider="qga/vss-win32/qga-vss.dll qga/vss-win32/qga-vss.tlb"
-  else
-    if test "$vss_win32_sdk" != "" ; then
-      echo "ERROR: Please download and install Microsoft VSS SDK:"
-      echo "ERROR:   http://www.microsoft.com/en-us/download/details.aspx?id=23490"
-      echo "ERROR: On POSIX-systems, you can extract the SDK headers by:"
-      echo "ERROR:   scripts/extract-vsssdk-headers setup.exe"
-      echo "ERROR: The headers are extracted in the directory \`inc'."
-      feature_not_found "VSS support"
-    fi
-    guest_agent_with_vss="no"
-  fi
-fi
-
-##########################################
-# lookup Windows platform SDK (if not specified)
-# The SDK is needed only to build .tlb (type library) file of guest agent
-# VSS provider from the source. It is usually unnecessary because the
-# pre-compiled .tlb file is included.
-
-if test "$mingw32" = "yes" && test "$guest_agent" != "no" && \
-        test "$guest_agent_with_vss" = "yes" ; then
-  if test -z "$win_sdk"; then
-    programfiles="$PROGRAMFILES"
-    test -n "$PROGRAMW6432" && programfiles="$PROGRAMW6432"
-    if test -n "$programfiles"; then
-      win_sdk=$(ls -d "$programfiles/Microsoft SDKs/Windows/v"* | tail -1) 2>/dev/null
-    else
-      feature_not_found "Windows SDK"
-    fi
-  elif test "$win_sdk" = "no"; then
-    win_sdk=""
-  fi
-fi
-
-##########################################
-# check if mingw environment provides a recent ntddscsi.h
-if test "$mingw32" = "yes" && test "$guest_agent" != "no"; then
-  cat > $TMPC << EOF
-#include <windows.h>
-#include <ntddscsi.h>
-int main(void) {
-#if !defined(IOCTL_SCSI_GET_ADDRESS)
-#error Missing required ioctl definitions
-#endif
-  SCSI_ADDRESS addr = { .Lun = 0, .TargetId = 0, .PathId = 0 };
-  return addr.Lun;
-}
-EOF
-  if compile_prog "" "" ; then
-    guest_agent_ntddscsi=yes
-    libs_qga="-lsetupapi -lcfgmgr32 $libs_qga"
-  fi
-fi
-
-##########################################
-# virgl renderer probe
-
-if test "$virglrenderer" != "no" ; then
-  cat > $TMPC << EOF
-#include <virglrenderer.h>
-int main(void) { virgl_renderer_poll(); return 0; }
-EOF
-  virgl_cflags=$($pkg_config --cflags virglrenderer 2>/dev/null)
-  virgl_libs=$($pkg_config --libs virglrenderer 2>/dev/null)
-  virgl_version=$($pkg_config --modversion virglrenderer 2>/dev/null)
-  if $pkg_config virglrenderer >/dev/null 2>&1 && \
-     compile_prog "$virgl_cflags" "$virgl_libs" ; then
-    virglrenderer="yes"
-  else
-    if test "$virglrenderer" = "yes" ; then
-      feature_not_found "virglrenderer"
-    fi
-    virglrenderer="no"
-  fi
-fi
-
-##########################################
-# capstone
-
-case "$capstone" in
-  auto | enabled | internal)
-    # Simpler to always update submodule, even if not needed.
-    if test -e "${source_path}/.git" && test $git_update = 'yes' ; then
-      git_submodules="${git_submodules} capstone"
-    fi
-    ;;
-esac
-
-##########################################
-# check if we have fdatasync
-
-fdatasync=no
-cat > $TMPC << EOF
-#include <unistd.h>
-int main(void) {
-#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
-return fdatasync(0);
-#else
-#error Not supported
-#endif
-}
-EOF
-if compile_prog "" "" ; then
-    fdatasync=yes
-fi
-
-##########################################
-# check if we have madvise
-
-madvise=no
-cat > $TMPC << EOF
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <stddef.h>
-int main(void) { return madvise(NULL, 0, MADV_DONTNEED); }
-EOF
-if compile_prog "" "" ; then
-    madvise=yes
-fi
-
-##########################################
-# check if we have posix_madvise
-
-posix_madvise=no
-cat > $TMPC << EOF
-#include <sys/mman.h>
-#include <stddef.h>
-int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }
-EOF
-if compile_prog "" "" ; then
-    posix_madvise=yes
-fi
-
-##########################################
-# check if we have posix_memalign()
-
-posix_memalign=no
-cat > $TMPC << EOF
-#include <stdlib.h>
-int main(void) {
-    void *p;
-    return posix_memalign(&p, 8, 8);
-}
-EOF
-if compile_prog "" "" ; then
-    posix_memalign=yes
-fi
-
-##########################################
-# check if we have posix_syslog
-
-posix_syslog=no
-cat > $TMPC << EOF
-#include <syslog.h>
-int main(void) { openlog("qemu", LOG_PID, LOG_DAEMON); syslog(LOG_INFO, "configure"); return 0; }
-EOF
-if compile_prog "" "" ; then
-    posix_syslog=yes
-fi
-
-##########################################
-# check if we have sem_timedwait
-
-sem_timedwait=no
-cat > $TMPC << EOF
-#include <semaphore.h>
-int main(void) { sem_t s; struct timespec t = {0}; return sem_timedwait(&s, &t); }
-EOF
-if compile_prog "" "" ; then
-    sem_timedwait=yes
-fi
-
-##########################################
-# check if we have strchrnul
-
-strchrnul=no
-cat > $TMPC << EOF
-#include <string.h>
-int main(void);
-// Use a haystack that the compiler shouldn't be able to constant fold
-char *haystack = (char*)&main;
-int main(void) { return strchrnul(haystack, 'x') != &haystack[6]; }
-EOF
-if compile_prog "" "" ; then
-    strchrnul=yes
-fi
-
-#########################################
-# check if we have st_atim
-
-st_atim=no
-cat > $TMPC << EOF
-#include <sys/stat.h>
-#include <stddef.h>
-int main(void) { return offsetof(struct stat, st_atim); }
-EOF
-if compile_prog "" "" ; then
-    st_atim=yes
-fi
-
-##########################################
-# check if trace backend exists
-
-$python "$source_path/scripts/tracetool.py" "--backends=$trace_backends" --check-backends  > /dev/null 2> /dev/null
-if test "$?" -ne 0 ; then
-  error_exit "invalid trace backends" \
-      "Please choose supported trace backends."
-fi
-
-##########################################
-# For 'ust' backend, test if ust headers are present
-if have_backend "ust"; then
-  cat > $TMPC << EOF
-#include <lttng/tracepoint.h>
-int main(void) { return 0; }
-EOF
-  if compile_prog "" "-Wl,--no-as-needed -ldl" ; then
-    if $pkg_config lttng-ust --exists; then
-      lttng_ust_libs=$($pkg_config --libs lttng-ust)
-    else
-      lttng_ust_libs="-llttng-ust -ldl"
-    fi
-    if $pkg_config liburcu-bp --exists; then
-      urcu_bp_libs=$($pkg_config --libs liburcu-bp)
-    else
-      urcu_bp_libs="-lurcu-bp"
-    fi
-  else
-    error_exit "Trace backend 'ust' missing lttng-ust header files"
-  fi
-fi
-
-##########################################
-# For 'dtrace' backend, test if 'dtrace' command is present
-if have_backend "dtrace"; then
-  if ! has 'dtrace' ; then
-    error_exit "dtrace command is not found in PATH $PATH"
-  fi
-  trace_backend_stap="no"
-  if has 'stap' ; then
-    trace_backend_stap="yes"
-
-    # Workaround to avoid dtrace(1) producing a file with 'hidden' symbol
-    # visibility. Define STAP_SDT_V2 to produce 'default' symbol visibility
-    # instead. QEMU --enable-modules depends on this because the SystemTap
-    # semaphores are linked into the main binary and not the module's shared
-    # object.
-    QEMU_CFLAGS="$QEMU_CFLAGS -DSTAP_SDT_V2"
-  fi
-fi
-
-##########################################
-# check and set a backend for coroutine
-
-# We prefer ucontext, but it's not always possible. The fallback
-# is sigcontext. On Windows the only valid backend is the Windows
-# specific one.
-
-ucontext_works=no
-if test "$darwin" != "yes"; then
-  cat > $TMPC << EOF
-#include <ucontext.h>
-#ifdef __stub_makecontext
-#error Ignoring glibc stub makecontext which will always fail
-#endif
-int main(void) { makecontext(0, 0, 0); return 0; }
-EOF
-  if compile_prog "" "" ; then
-    ucontext_works=yes
-  fi
-fi
-
-if test "$coroutine" = ""; then
-  if test "$mingw32" = "yes"; then
-    coroutine=win32
-  elif test "$ucontext_works" = "yes"; then
-    coroutine=ucontext
-  else
-    coroutine=sigaltstack
-  fi
-else
-  case $coroutine in
-  windows)
-    if test "$mingw32" != "yes"; then
-      error_exit "'windows' coroutine backend only valid for Windows"
-    fi
-    # Unfortunately the user visible backend name doesn't match the
-    # coroutine-*.c filename for this case, so we have to adjust it here.
-    coroutine=win32
-    ;;
-  ucontext)
-    if test "$ucontext_works" != "yes"; then
-      feature_not_found "ucontext"
-    fi
-    ;;
-  sigaltstack)
-    if test "$mingw32" = "yes"; then
-      error_exit "only the 'windows' coroutine backend is valid for Windows"
-    fi
-    ;;
-  *)
-    error_exit "unknown coroutine backend $coroutine"
-    ;;
-  esac
-fi
-
-if test "$coroutine_pool" = ""; then
-  coroutine_pool=yes
-fi
-
-if test "$debug_stack_usage" = "yes"; then
-  if test "$coroutine_pool" = "yes"; then
-    echo "WARN: disabling coroutine pool for stack usage debugging"
-    coroutine_pool=no
-  fi
-fi
-
-##################################################
-# SafeStack
-
-
-if test "$safe_stack" = "yes"; then
-cat > $TMPC << EOF
-int main(int argc, char *argv[])
-{
-#if ! __has_feature(safe_stack)
-#error SafeStack Disabled
-#endif
-    return 0;
-}
-EOF
-  flag="-fsanitize=safe-stack"
-  # Check that safe-stack is supported and enabled.
-  if compile_prog "-Werror $flag" "$flag"; then
-    # Flag needed both at compilation and at linking
-    QEMU_CFLAGS="$QEMU_CFLAGS $flag"
-    QEMU_LDFLAGS="$QEMU_LDFLAGS $flag"
-  else
-    error_exit "SafeStack not supported by your compiler"
-  fi
-  if test "$coroutine" != "ucontext"; then
-    error_exit "SafeStack is only supported by the coroutine backend ucontext"
-  fi
-else
-cat > $TMPC << EOF
-int main(int argc, char *argv[])
-{
-#if defined(__has_feature)
-#if __has_feature(safe_stack)
-#error SafeStack Enabled
-#endif
-#endif
-    return 0;
-}
-EOF
-if test "$safe_stack" = "no"; then
-  # Make sure that safe-stack is disabled
-  if ! compile_prog "-Werror" ""; then
-    # SafeStack was already enabled, try to explicitly remove the feature
-    flag="-fno-sanitize=safe-stack"
-    if ! compile_prog "-Werror $flag" "$flag"; then
-      error_exit "Configure cannot disable SafeStack"
-    fi
-    QEMU_CFLAGS="$QEMU_CFLAGS $flag"
-    QEMU_LDFLAGS="$QEMU_LDFLAGS $flag"
-  fi
-else # "$safe_stack" = ""
-  # Set safe_stack to yes or no based on pre-existing flags
-  if compile_prog "-Werror" ""; then
-    safe_stack="no"
-  else
-    safe_stack="yes"
-    if test "$coroutine" != "ucontext"; then
-      error_exit "SafeStack is only supported by the coroutine backend ucontext"
-    fi
-  fi
-fi
-fi
-
-##########################################
-# check if we have open_by_handle_at
-
-open_by_handle_at=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-#if !defined(AT_EMPTY_PATH)
-# error missing definition
-#else
-int main(void) { struct file_handle fh; return open_by_handle_at(0, &fh, 0); }
-#endif
-EOF
-if compile_prog "" "" ; then
-    open_by_handle_at=yes
-fi
-
-########################################
-# check if we have linux/magic.h
-
-linux_magic_h=no
-cat > $TMPC << EOF
-#include <linux/magic.h>
-int main(void) {
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    linux_magic_h=yes
-fi
-
-########################################
-# check if we have valgrind/valgrind.h
-
-valgrind_h=no
-cat > $TMPC << EOF
-#include <valgrind/valgrind.h>
-int main(void) {
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    valgrind_h=yes
-fi
-
-########################################
-# check if environ is declared
-
-has_environ=no
-cat > $TMPC << EOF
-#include <unistd.h>
-int main(void) {
-    environ = 0;
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    has_environ=yes
-fi
-
-########################################
-# check if cpuid.h is usable.
-
-cat > $TMPC << EOF
-#include <cpuid.h>
-int main(void) {
-    unsigned a, b, c, d;
-    int max = __get_cpuid_max(0, 0);
-
-    if (max >= 1) {
-        __cpuid(1, a, b, c, d);
-    }
-
-    if (max >= 7) {
-        __cpuid_count(7, 0, a, b, c, d);
-    }
-
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    cpuid_h=yes
-fi
-
-##########################################
-# avx2 optimization requirement check
-#
-# There is no point enabling this if cpuid.h is not usable,
-# since we won't be able to select the new routines.
-
-if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then
-  cat > $TMPC << EOF
-#pragma GCC push_options
-#pragma GCC target("avx2")
-#include <cpuid.h>
-#include <immintrin.h>
-static int bar(void *a) {
-    __m256i x = *(__m256i *)a;
-    return _mm256_testz_si256(x, x);
-}
-int main(int argc, char *argv[]) { return bar(argv[0]); }
-EOF
-  if compile_object "" ; then
-    avx2_opt="yes"
-  else
-    avx2_opt="no"
-  fi
-fi
-
-##########################################
-# avx512f optimization requirement check
-#
-# There is no point enabling this if cpuid.h is not usable,
-# since we won't be able to select the new routines.
-# by default, it is turned off.
-# if user explicitly want to enable it, check environment
-
-if test "$cpuid_h" = "yes" && test "$avx512f_opt" = "yes"; then
-  cat > $TMPC << EOF
-#pragma GCC push_options
-#pragma GCC target("avx512f")
-#include <cpuid.h>
-#include <immintrin.h>
-static int bar(void *a) {
-    __m512i x = *(__m512i *)a;
-    return _mm512_test_epi64_mask(x, x);
-}
-int main(int argc, char *argv[])
-{
-       return bar(argv[0]);
-}
-EOF
-  if ! compile_object "" ; then
-    avx512f_opt="no"
-  fi
-else
-  avx512f_opt="no"
-fi
-
-########################################
-# check if __[u]int128_t is usable.
-
-int128=no
-cat > $TMPC << EOF
-__int128_t a;
-__uint128_t b;
-int main (void) {
-  a = a + b;
-  b = a * b;
-  a = a * a;
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    int128=yes
-fi
-
-#########################################
-# See if 128-bit atomic operations are supported.
-
-atomic128=no
-if test "$int128" = "yes"; then
-  cat > $TMPC << EOF
-int main(void)
-{
-  unsigned __int128 x = 0, y = 0;
-  y = __atomic_load_16(&x, 0);
-  __atomic_store_16(&x, y, 0);
-  __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0);
-  return 0;
-}
-EOF
-  if compile_prog "" "" ; then
-    atomic128=yes
-  fi
-fi
-
-cmpxchg128=no
-if test "$int128" = yes && test "$atomic128" = no; then
-  cat > $TMPC << EOF
-int main(void)
-{
-  unsigned __int128 x = 0, y = 0;
-  __sync_val_compare_and_swap_16(&x, y, x);
-  return 0;
-}
-EOF
-  if compile_prog "" "" ; then
-    cmpxchg128=yes
-  fi
-fi
-
-#########################################
-# See if 64-bit atomic operations are supported.
-# Note that without __atomic builtins, we can only
-# assume atomic loads/stores max at pointer size.
-
-cat > $TMPC << EOF
-#include <stdint.h>
-int main(void)
-{
-  uint64_t x = 0, y = 0;
-#ifdef __ATOMIC_RELAXED
-  y = __atomic_load_n(&x, __ATOMIC_RELAXED);
-  __atomic_store_n(&x, y, __ATOMIC_RELAXED);
-  __atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-  __atomic_exchange_n(&x, y, __ATOMIC_RELAXED);
-  __atomic_fetch_add(&x, y, __ATOMIC_RELAXED);
-#else
-  typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1];
-  __sync_lock_test_and_set(&x, y);
-  __sync_val_compare_and_swap(&x, y, 0);
-  __sync_fetch_and_add(&x, y);
-#endif
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  atomic64=yes
-fi
-
-#########################################
-# See if --dynamic-list is supported by the linker
-ld_dynamic_list="no"
-if test "$static" = "no" ; then
-    cat > $TMPTXT <<EOF
-{
-  foo;
-};
-EOF
-
-    cat > $TMPC <<EOF
-#include <stdio.h>
-void foo(void);
-
-void foo(void)
-{
-  printf("foo\n");
-}
-
-int main(void)
-{
-  foo();
-  return 0;
-}
-EOF
-
-    if compile_prog "" "-Wl,--dynamic-list=$TMPTXT" ; then
-        ld_dynamic_list="yes"
-    fi
-fi
-
-#########################################
-# See if -exported_symbols_list is supported by the linker
-
-ld_exported_symbols_list="no"
-if test "$static" = "no" ; then
-    cat > $TMPTXT <<EOF
-  _foo
-EOF
-
-    if compile_prog "" "-Wl,-exported_symbols_list,$TMPTXT" ; then
-        ld_exported_symbols_list="yes"
-    fi
-fi
-
-if  test "$plugins" = "yes" &&
-    test "$ld_dynamic_list" = "no" &&
-    test "$ld_exported_symbols_list" = "no" ; then
-  error_exit \
-      "Plugin support requires dynamic linking and specifying a set of symbols " \
-      "that are exported to plugins. Unfortunately your linker doesn't " \
-      "support the flag (--dynamic-list or -exported_symbols_list) used " \
-      "for this purpose. You can't build with --static."
-fi
-
-########################################
-# See if __attribute__((alias)) is supported.
-# This false for Xcode 9, but has been remedied for Xcode 10.
-# Unfortunately, travis uses Xcode 9 by default.
-
-attralias=no
-cat > $TMPC << EOF
-int x = 1;
-extern const int y __attribute__((alias("x")));
-int main(void) { return 0; }
-EOF
-if compile_prog "" "" ; then
-    attralias=yes
-fi
-
-########################################
-# check if getauxval is available.
-
-getauxval=no
-cat > $TMPC << EOF
-#include <sys/auxv.h>
-int main(void) {
-  return getauxval(AT_HWCAP) == 0;
-}
-EOF
-if compile_prog "" "" ; then
-    getauxval=yes
-fi
-
-########################################
-# check if ccache is interfering with
-# semantic analysis of macros
-
-unset CCACHE_CPP2
-ccache_cpp2=no
-cat > $TMPC << EOF
-static const int Z = 1;
-#define fn() ({ Z; })
-#define TAUT(X) ((X) == Z)
-#define PAREN(X, Y) (X == Y)
-#define ID(X) (X)
-int main(int argc, char *argv[])
-{
-    int x = 0, y = 0;
-    x = ID(x);
-    x = fn();
-    fn();
-    if (PAREN(x, y)) return 0;
-    if (TAUT(Z)) return 0;
-    return 0;
-}
-EOF
-
-if ! compile_object "-Werror"; then
-    ccache_cpp2=yes
-fi
-
-#################################################
-# clang does not support glibc + FORTIFY_SOURCE.
-
-if test "$fortify_source" != "no"; then
-  if echo | $cc -dM -E - | grep __clang__ > /dev/null 2>&1 ; then
-    fortify_source="no";
-  elif test -n "$cxx" && has $cxx &&
-       echo | $cxx -dM -E - | grep __clang__ >/dev/null 2>&1 ; then
-    fortify_source="no";
-  else
-    fortify_source="yes"
-  fi
-fi
-
-###############################################
-# Check if copy_file_range is provided by glibc
-have_copy_file_range=no
-cat > $TMPC << EOF
-#include <unistd.h>
-int main(void) {
-  copy_file_range(0, NULL, 0, NULL, 0, 0);
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    have_copy_file_range=yes
-fi
-
-##########################################
-# check if struct fsxattr is available via linux/fs.h
-
-have_fsxattr=no
-cat > $TMPC << EOF
-#include <linux/fs.h>
-struct fsxattr foo;
-int main(void) {
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    have_fsxattr=yes
-fi
-
-##########################################
-# check for usable membarrier system call
-if test "$membarrier" = "yes"; then
-    have_membarrier=no
-    if test "$mingw32" = "yes" ; then
-        have_membarrier=yes
-    elif test "$linux" = "yes" ; then
-        cat > $TMPC << EOF
-    #include <linux/membarrier.h>
-    #include <sys/syscall.h>
-    #include <unistd.h>
-    #include <stdlib.h>
-    int main(void) {
-        syscall(__NR_membarrier, MEMBARRIER_CMD_QUERY, 0);
-        syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0);
-       exit(0);
-    }
-EOF
-        if compile_prog "" "" ; then
-            have_membarrier=yes
-        fi
-    fi
-    if test "$have_membarrier" = "no"; then
-      feature_not_found "membarrier" "membarrier system call not available"
-    fi
-else
-    # Do not enable it by default even for Mingw32, because it doesn't
-    # work on Wine.
-    membarrier=no
-fi
-
-##########################################
-# check if rtnetlink.h exists and is useful
-have_rtnetlink=no
-cat > $TMPC << EOF
-#include <linux/rtnetlink.h>
-int main(void) {
-  return IFLA_PROTO_DOWN;
-}
-EOF
-if compile_prog "" "" ; then
-    have_rtnetlink=yes
-fi
-
-##########################################
-# check for usable AF_VSOCK environment
-have_af_vsock=no
-cat > $TMPC << EOF
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#if !defined(AF_VSOCK)
-# error missing AF_VSOCK flag
-#endif
-#include <linux/vm_sockets.h>
-int main(void) {
-    int sock, ret;
-    struct sockaddr_vm svm;
-    socklen_t len = sizeof(svm);
-    sock = socket(AF_VSOCK, SOCK_STREAM, 0);
-    ret = getpeername(sock, (struct sockaddr *)&svm, &len);
-    if ((ret == -1) && (errno == ENOTCONN)) {
-        return 0;
-    }
-    return -1;
-}
-EOF
-if compile_prog "" "" ; then
-    have_af_vsock=yes
-fi
-
-##########################################
-# check for usable AF_ALG environment
-have_afalg=no
-cat > $TMPC << EOF
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <linux/if_alg.h>
-int main(void) {
-    int sock;
-    sock = socket(AF_ALG, SOCK_SEQPACKET, 0);
-    return sock;
-}
-EOF
-if compile_prog "" "" ; then
-    have_afalg=yes
-fi
-if test "$crypto_afalg" = "yes"
-then
-    if test "$have_afalg" != "yes"
-    then
-       error_exit "AF_ALG requested but could not be detected"
-    fi
-fi
-
-
-#################################################
-# check for sysmacros.h
-
-have_sysmacros=no
-cat > $TMPC << EOF
-#include <sys/sysmacros.h>
-int main(void) {
-    return makedev(0, 0);
-}
-EOF
-if compile_prog "" "" ; then
-    have_sysmacros=yes
-fi
-
-##########################################
-# check for _Static_assert()
-
-have_static_assert=no
-cat > $TMPC << EOF
-_Static_assert(1, "success");
-int main(void) {
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    have_static_assert=yes
-fi
-
-##########################################
-# check for utmpx.h, it is missing e.g. on OpenBSD
-
-have_utmpx=no
-cat > $TMPC << EOF
-#include <utmpx.h>
-struct utmpx user_info;
-int main(void) {
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    have_utmpx=yes
-fi
-
-##########################################
-# check for getrandom()
-
-have_getrandom=no
-cat > $TMPC << EOF
-#include <sys/random.h>
-int main(void) {
-    return getrandom(0, 0, GRND_NONBLOCK);
-}
-EOF
-if compile_prog "" "" ; then
-    have_getrandom=yes
-fi
-
-##########################################
-# checks for sanitizers
-
-have_asan=no
-have_ubsan=no
-have_asan_iface_h=no
-have_asan_iface_fiber=no
-
-if test "$sanitizers" = "yes" ; then
-  write_c_skeleton
-  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=address" ""; then
-      have_asan=yes
-  fi
-
-  # we could use a simple skeleton for flags checks, but this also
-  # detect the static linking issue of ubsan, see also:
-  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84285
-  cat > $TMPC << EOF
-#include <stdlib.h>
-int main(void) {
-    void *tmp = malloc(10);
-    if (tmp != NULL) {
-        return *(int *)(tmp + 2);
-    }
-    return 1;
-}
-EOF
-  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=undefined" ""; then
-      have_ubsan=yes
-  fi
-
-  if check_include "sanitizer/asan_interface.h" ; then
-      have_asan_iface_h=yes
-  fi
-
-  cat > $TMPC << EOF
-#include <sanitizer/asan_interface.h>
-int main(void) {
-  __sanitizer_start_switch_fiber(0, 0, 0);
-  return 0;
-}
-EOF
-  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=address" "" ; then
-      have_asan_iface_fiber=yes
-  fi
-fi
-
-##########################################
-# checks for fuzzer
-if test "$fuzzing" = "yes" && test -z "${LIB_FUZZING_ENGINE+xxx}"; then
-  write_c_fuzzer_skeleton
-  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=fuzzer" ""; then
-    have_fuzzer=yes
-  else
-    error_exit "Your compiler doesn't support -fsanitize=fuzzer"
-    exit 1
-  fi
-fi
-
-# Thread sanitizer is, for now, much noisier than the other sanitizers;
-# keep it separate until that is not the case.
-if test "$tsan" = "yes" && test "$sanitizers" = "yes"; then
-  error_exit "TSAN is not supported with other sanitiziers."
-fi
-have_tsan=no
-have_tsan_iface_fiber=no
-if test "$tsan" = "yes" ; then
-  write_c_skeleton
-  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=thread" "" ; then
-      have_tsan=yes
-  fi
-  cat > $TMPC << EOF
-#include <sanitizer/tsan_interface.h>
-int main(void) {
-  __tsan_create_fiber(0);
-  return 0;
-}
-EOF
-  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=thread" "" ; then
-      have_tsan_iface_fiber=yes
-  fi
-fi
-
-##########################################
-# check for libpmem
-
-if test "$libpmem" != "no"; then
-       if $pkg_config --exists "libpmem"; then
-               libpmem="yes"
-               libpmem_libs=$($pkg_config --libs libpmem)
-               libpmem_cflags=$($pkg_config --cflags libpmem)
-       else
-               if test "$libpmem" = "yes" ; then
-                       feature_not_found "libpmem" "Install nvml or pmdk"
-               fi
-               libpmem="no"
-       fi
-fi
-
-##########################################
-# check for libdaxctl
-
-if test "$libdaxctl" != "no"; then
-       if $pkg_config --atleast-version=57 "libdaxctl"; then
-               libdaxctl="yes"
-               libdaxctl_libs=$($pkg_config --libs libdaxctl)
-               libdaxctl_cflags=$($pkg_config --cflags libdaxctl)
-       else
-               if test "$libdaxctl" = "yes" ; then
-                       feature_not_found "libdaxctl" "Install libdaxctl"
-               fi
-               libdaxctl="no"
-       fi
-fi
-
-##########################################
-# check for slirp
-
-case "$slirp" in
-  auto | enabled | internal)
-    # Simpler to always update submodule, even if not needed.
-    if test -e "${source_path}/.git" && test $git_update = 'yes' ; then
-      git_submodules="${git_submodules} slirp"
-    fi
-    ;;
-esac
-
-##########################################
-# check for usable __NR_keyctl syscall
-
-if test "$linux" = "yes" ; then
-
-    have_keyring=no
-    cat > $TMPC << EOF
-#include <errno.h>
-#include <asm/unistd.h>
-#include <linux/keyctl.h>
-#include <unistd.h>
-int main(void) {
-    return syscall(__NR_keyctl, KEYCTL_READ, 0, NULL, NULL, 0);
-}
-EOF
-    if compile_prog "" "" ; then
-        have_keyring=yes
-    fi
-fi
-if test "$secret_keyring" != "no"
-then
-    if test "$have_keyring" = "yes"
-    then
-       secret_keyring=yes
-    else
-       if test "$secret_keyring" = "yes"
-       then
-           error_exit "syscall __NR_keyctl requested, \
-but not implemented on your system"
-       else
-           secret_keyring=no
-       fi
-    fi
-fi
-
-##########################################
-# End of CC checks
-# After here, no more $cc or $ld runs
-
-write_c_skeleton
-
-if test "$gcov" = "yes" ; then
-  :
-elif test "$fortify_source" = "yes" ; then
-  QEMU_CFLAGS="-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $QEMU_CFLAGS"
-  debug=no
-fi
-
-case "$ARCH" in
-alpha)
-  # Ensure there's only a single GP
-  QEMU_CFLAGS="-msmall-data $QEMU_CFLAGS"
-;;
-esac
-
-if test "$gprof" = "yes" ; then
-  QEMU_CFLAGS="-p $QEMU_CFLAGS"
-  QEMU_LDFLAGS="-p $QEMU_LDFLAGS"
-fi
-
-if test "$have_asan" = "yes"; then
-  QEMU_CFLAGS="-fsanitize=address $QEMU_CFLAGS"
-  QEMU_LDFLAGS="-fsanitize=address $QEMU_LDFLAGS"
-  if test "$have_asan_iface_h" = "no" ; then
-      echo "ASAN build enabled, but ASAN header missing." \
-           "Without code annotation, the report may be inferior."
-  elif test "$have_asan_iface_fiber" = "no" ; then
-      echo "ASAN build enabled, but ASAN header is too old." \
-           "Without code annotation, the report may be inferior."
-  fi
-fi
-if test "$have_tsan" = "yes" ; then
-  if test "$have_tsan_iface_fiber" = "yes" ; then
-    QEMU_CFLAGS="-fsanitize=thread $QEMU_CFLAGS"
-    QEMU_LDFLAGS="-fsanitize=thread $QEMU_LDFLAGS"
-  else
-    error_exit "Cannot enable TSAN due to missing fiber annotation interface."
-  fi
-elif test "$tsan" = "yes" ; then
-  error_exit "Cannot enable TSAN due to missing sanitize thread interface."
-fi
-if test "$have_ubsan" = "yes"; then
-  QEMU_CFLAGS="-fsanitize=undefined $QEMU_CFLAGS"
-  QEMU_LDFLAGS="-fsanitize=undefined $QEMU_LDFLAGS"
-fi
-
-##########################################
-# Do we have libnfs
-if test "$libnfs" != "no" ; then
-  if $pkg_config --atleast-version=1.9.3 libnfs; then
-    libnfs="yes"
-    libnfs_libs=$($pkg_config --libs libnfs)
-  else
-    if test "$libnfs" = "yes" ; then
-      feature_not_found "libnfs" "Install libnfs devel >= 1.9.3"
-    fi
-    libnfs="no"
-  fi
-fi
-
-##########################################
-
-# Exclude --warn-common with TSan to suppress warnings from the TSan libraries.
-if test "$solaris" = "no" && test "$tsan" = "no"; then
-    if $ld --version 2>/dev/null | grep "GNU ld" >/dev/null 2>/dev/null ; then
-        QEMU_LDFLAGS="-Wl,--warn-common $QEMU_LDFLAGS"
-    fi
-fi
-
-# Use ASLR, no-SEH and DEP if available
-if test "$mingw32" = "yes" ; then
-    flags="--no-seh --nxcompat"
-
-    # Disable ASLR for debug builds to allow debugging with gdb
-    if test "$debug" = "no" ; then
-        flags="--dynamicbase $flags"
-    fi
-
-    for flag in $flags; do
-        if ld_has $flag ; then
-            QEMU_LDFLAGS="-Wl,$flag $QEMU_LDFLAGS"
-        fi
-    done
-fi
-
-# We can only support ivshmem if we have eventfd
-if [ "$eventfd" = "yes" ]; then
-  ivshmem=yes
-fi
-
-if test "$softmmu" = yes ; then
-  if test "$linux" = yes; then
-    if test "$virtfs" != no && test "$cap_ng" = yes && test "$attr" = yes ; then
-      virtfs=yes
-    else
-      if test "$virtfs" = yes; then
-        error_exit "VirtFS requires libcap-ng devel and libattr devel"
-      fi
-      virtfs=no
-    fi
-  else
-    if test "$virtfs" = yes; then
-      error_exit "VirtFS is supported only on Linux"
-    fi
-    virtfs=no
-  fi
-fi
-
-# Probe for guest agent support/options
-
-if [ "$guest_agent" != "no" ]; then
-  if [ "$softmmu" = no -a "$want_tools" = no ] ; then
-      guest_agent=no
-  elif [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" -o "$mingw32" = "yes" ] ; then
-      guest_agent=yes
-  elif [ "$guest_agent" != yes ]; then
-      guest_agent=no
-  else
-      error_exit "Guest agent is not supported on this platform"
-  fi
-fi
-
-# Guest agent Window MSI  package
-
-if test "$guest_agent" != yes; then
-  if test "$guest_agent_msi" = yes; then
-    error_exit "MSI guest agent package requires guest agent enabled"
-  fi
-  guest_agent_msi=no
-elif test "$mingw32" != "yes"; then
-  if test "$guest_agent_msi" = "yes"; then
-    error_exit "MSI guest agent package is available only for MinGW Windows cross-compilation"
-  fi
-  guest_agent_msi=no
-elif ! has wixl; then
-  if test "$guest_agent_msi" = "yes"; then
-    error_exit "MSI guest agent package requires wixl tool installed ( usually from msitools package )"
-  fi
-  guest_agent_msi=no
-else
-  # we support qemu-ga, mingw32, and wixl: default to MSI enabled if it wasn't
-  # disabled explicitly
-  if test "$guest_agent_msi" != "no"; then
-    guest_agent_msi=yes
-  fi
-fi
-
-if test "$guest_agent_msi" = "yes"; then
-  if test "$guest_agent_with_vss" = "yes"; then
-    QEMU_GA_MSI_WITH_VSS="-D InstallVss"
-  fi
-
-  if test "$QEMU_GA_MANUFACTURER" = ""; then
-    QEMU_GA_MANUFACTURER=QEMU
-  fi
-
-  if test "$QEMU_GA_DISTRO" = ""; then
-    QEMU_GA_DISTRO=Linux
-  fi
-
-  if test "$QEMU_GA_VERSION" = ""; then
-      QEMU_GA_VERSION=$(cat $source_path/VERSION)
-  fi
-
-  QEMU_GA_MSI_MINGW_DLL_PATH="-D Mingw_dlls=$($pkg_config --variable=prefix glib-2.0)/bin"
-
-  case "$cpu" in
-  x86_64)
-    QEMU_GA_MSI_ARCH="-a x64 -D Arch=64"
-    ;;
-  i386)
-    QEMU_GA_MSI_ARCH="-D Arch=32"
-    ;;
-  *)
-    error_exit "CPU $cpu not supported for building installation package"
-    ;;
-  esac
-fi
-
-# Mac OS X ships with a broken assembler
-roms=
-if { test "$cpu" = "i386" || test "$cpu" = "x86_64"; } && \
-        test "$targetos" != "Darwin" && test "$targetos" != "SunOS" && \
-        test "$targetos" != "Haiku" && test "$softmmu" = yes ; then
-    # Different host OS linkers have different ideas about the name of the ELF
-    # emulation. Linux and OpenBSD/amd64 use 'elf_i386'; FreeBSD uses the _fbsd
-    # variant; OpenBSD/i386 uses the _obsd variant; and Windows uses i386pe.
-    for emu in elf_i386 elf_i386_fbsd elf_i386_obsd i386pe; do
-        if "$ld" -verbose 2>&1 | grep -q "^[[:space:]]*$emu[[:space:]]*$"; then
-            ld_i386_emulation="$emu"
-            roms="optionrom"
-            break
-        fi
-    done
-fi
-
-# Only build s390-ccw bios if we're on s390x and the compiler has -march=z900
-if test "$cpu" = "s390x" && test "$softmmu" = yes ; then
-  write_c_skeleton
-  if compile_prog "-march=z900" ""; then
-    roms="$roms s390-ccw"
-    # SLOF is required for building the s390-ccw firmware on s390x,
-    # since it is using the libnet code from SLOF for network booting.
-    if test -e "${source_path}/.git" ; then
-      git_submodules="${git_submodules} roms/SLOF"
-    fi
-  fi
-fi
-
-# Check that the C++ compiler exists and works with the C compiler.
-# All the QEMU_CXXFLAGS are based on QEMU_CFLAGS. Keep this at the end to don't miss any other that could be added.
-if has $cxx; then
-    cat > $TMPC <<EOF
-int c_function(void);
-int main(void) { return c_function(); }
-EOF
-
-    compile_object
-
-    cat > $TMPCXX <<EOF
-extern "C" {
-   int c_function(void);
-}
-int c_function(void) { return 42; }
-EOF
-
-    update_cxxflags
-
-    if do_cxx $CXXFLAGS $CONFIGURE_CXXFLAGS $QEMU_CXXFLAGS -o $TMPE $TMPCXX $TMPO $QEMU_LDFLAGS; then
-        # C++ compiler $cxx works ok with C compiler $cc
-        :
-    else
-        echo "C++ compiler $cxx does not work with C compiler $cc"
-        echo "Disabling C++ specific optional code"
-        cxx=
-    fi
-else
-    echo "No C++ compiler available; disabling C++ specific optional code"
-    cxx=
-fi
-
-if test $git_update = 'yes' ; then
-    (cd "${source_path}" && GIT="$git" "./scripts/git-submodule.sh" update "$git_submodules")
-fi
-
-config_host_mak="config-host.mak"
-
-echo "# Automatically generated by configure - do not modify" > $config_host_mak
-echo >> $config_host_mak
+  esac
+done
 
-echo all: >> $config_host_mak
-echo "GIT=$git" >> $config_host_mak
-echo "GIT_SUBMODULES=$git_submodules" >> $config_host_mak
-echo "GIT_UPDATE=$git_update" >> $config_host_mak
+if ! test -e "$source_path/.git"
+then
+    git_submodules_action="validate"
+fi
+
+# if ! test -f "$source_path/subprojects/keycodemapdb/README" \
+#     && test "$download" = disabled
+# then
+#     echo
+#     echo "ERROR: missing subprojects"
+#     echo
+#     if test -e "$source_path/.git"; then
+#         echo "--disable-download specified but subprojects were not"
+#         echo 'checked out.  Please invoke "meson subprojects download"'
+#         echo "before configuring QEMU, or remove --disable-download"
+#         echo "from the command line."
+#     else
+#         echo "This is not a GIT checkout but subproject content appears to"
+#         echo "be missing. Do not use 'git archive' or GitHub download links"
+#         echo "to acquire QEMU source archives. Non-GIT builds are only"
+#         echo "supported with source archives linked from:"
+#         echo
+#         echo "  https://www.qemu.org/download/#source"
+#         echo
+#         echo "Developers working with GIT can use scripts/archive-source.sh"
+#         echo "if they need to create valid source archives."
+#     fi
+#     echo
+#     exit 1
+# fi
 
-echo "ARCH=$ARCH" >> $config_host_mak
+default_target_list=""
+mak_wilds=""
 
-if test "$default_devices" = "yes" ; then
-  echo "CONFIG_MINIKCONF_MODE=--defconfig" >> $config_host_mak
+if [ -n "$host_arch" ] && [ -d "$source_path/common-user/host/$host_arch" ]; then
+    if [ "$linux_user" != no ]; then
+        if [ "$targetos" = linux ]; then
+            linux_user=yes
+        elif [ "$linux_user" = yes ]; then
+            error_exit "linux-user not supported on this architecture"
+        fi
+        if [ "$linux_user" = "yes" ]; then
+            mak_wilds="${mak_wilds} $source_path/configs/targets/*-linux-user.mak"
+        fi
+    fi
+    if [ "$bsd_user" != no ]; then
+        if [ "$bsd_user" = "" ]; then
+            test $targetos = freebsd && bsd_user=yes
+        fi
+        if [ "$bsd_user" = yes ] && ! [ -d "$source_path/bsd-user/$targetos" ]; then
+            error_exit "bsd-user not supported on this host OS"
+        fi
+        if [ "$bsd_user" = "yes" ]; then
+            mak_wilds="${mak_wilds} $source_path/configs/targets/*-bsd-user.mak"
+        fi
+    fi
 else
-  echo "CONFIG_MINIKCONF_MODE=--allnoconfig" >> $config_host_mak
-fi
-if test "$debug_tcg" = "yes" ; then
-  echo "CONFIG_DEBUG_TCG=y" >> $config_host_mak
-fi
-if test "$strip_opt" = "yes" ; then
-  echo "STRIP=${strip}" >> $config_host_mak
-fi
-if test "$bigendian" = "yes" ; then
-  echo "HOST_WORDS_BIGENDIAN=y" >> $config_host_mak
+    if [ "$linux_user" = yes ] || [ "$bsd_user" = yes ]; then
+        error_exit "user mode emulation not supported on this architecture"
+    fi
 fi
-if test "$mingw32" = "yes" ; then
-  echo "CONFIG_WIN32=y" >> $config_host_mak
-  rc_version=$(cat $source_path/VERSION)
-  version_major=${rc_version%%.*}
-  rc_version=${rc_version#*.}
-  version_minor=${rc_version%%.*}
-  rc_version=${rc_version#*.}
-  version_subminor=${rc_version%%.*}
-  version_micro=0
-  echo "CONFIG_FILEVERSION=$version_major,$version_minor,$version_subminor,$version_micro" >> $config_host_mak
-  echo "CONFIG_PRODUCTVERSION=$version_major,$version_minor,$version_subminor,$version_micro" >> $config_host_mak
-  if test "$guest_agent_with_vss" = "yes" ; then
-    echo "CONFIG_QGA_VSS=y" >> $config_host_mak
-    echo "QGA_VSS_PROVIDER=$qga_vss_provider" >> $config_host_mak
-    echo "WIN_SDK=\"$win_sdk\"" >> $config_host_mak
-  fi
-  if test "$guest_agent_ntddscsi" = "yes" ; then
-    echo "CONFIG_QGA_NTDDSCSI=y" >> $config_host_mak
-  fi
-  if test "$guest_agent_msi" = "yes"; then
-    echo "CONFIG_QGA_MSI=y" >> $config_host_mak
-    echo "QEMU_GA_MSI_MINGW_DLL_PATH=${QEMU_GA_MSI_MINGW_DLL_PATH}" >> $config_host_mak
-    echo "QEMU_GA_MSI_WITH_VSS=${QEMU_GA_MSI_WITH_VSS}" >> $config_host_mak
-    echo "QEMU_GA_MSI_ARCH=${QEMU_GA_MSI_ARCH}" >> $config_host_mak
-    echo "QEMU_GA_MANUFACTURER=${QEMU_GA_MANUFACTURER}" >> $config_host_mak
-    echo "QEMU_GA_DISTRO=${QEMU_GA_DISTRO}" >> $config_host_mak
-    echo "QEMU_GA_VERSION=${QEMU_GA_VERSION}" >> $config_host_mak
-  fi
-else
-  echo "CONFIG_POSIX=y" >> $config_host_mak
+if [ "$system" = "yes" ]; then
+    mak_wilds="${mak_wilds} $source_path/configs/targets/*-softmmu.mak"
 fi
 
-if test "$linux" = "yes" ; then
-  echo "CONFIG_LINUX=y" >> $config_host_mak
-fi
+for config in $mak_wilds; do
+    target="$(basename "$config" .mak)"
+    if echo "$target_list_exclude" | grep -vq "$target"; then
+        default_target_list="${default_target_list} $target"
+    fi
+done
 
-if test "$darwin" = "yes" ; then
-  echo "CONFIG_DARWIN=y" >> $config_host_mak
-fi
+if test x"$show_help" = x"yes" ; then
+cat << EOF
 
-if test "$solaris" = "yes" ; then
-  echo "CONFIG_SOLARIS=y" >> $config_host_mak
-fi
-if test "$haiku" = "yes" ; then
-  echo "CONFIG_HAIKU=y" >> $config_host_mak
-fi
-if test "$static" = "yes" ; then
-  echo "CONFIG_STATIC=y" >> $config_host_mak
-fi
-if test "$profiler" = "yes" ; then
-  echo "CONFIG_PROFILER=y" >> $config_host_mak
-fi
-if test "$want_tools" = "yes" ; then
-  echo "CONFIG_TOOLS=y" >> $config_host_mak
-fi
-if test "$guest_agent" = "yes" ; then
-  echo "CONFIG_GUEST_AGENT=y" >> $config_host_mak
-fi
-echo "CONFIG_SMBD_COMMAND=\"$smbd\"" >> $config_host_mak
-if test "$vde" = "yes" ; then
-  echo "CONFIG_VDE=y" >> $config_host_mak
-  echo "VDE_LIBS=$vde_libs" >> $config_host_mak
-fi
-if test "$netmap" = "yes" ; then
-  echo "CONFIG_NETMAP=y" >> $config_host_mak
-fi
-if test "$l2tpv3" = "yes" ; then
-  echo "CONFIG_L2TPV3=y" >> $config_host_mak
-fi
-if test "$gprof" = "yes" ; then
-  echo "CONFIG_GPROF=y" >> $config_host_mak
-fi
-if test "$cap_ng" = "yes" ; then
-  echo "CONFIG_LIBCAP_NG=y" >> $config_host_mak
-  echo "LIBCAP_NG_LIBS=$cap_libs" >> $config_host_mak
-fi
-echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak
-for drv in $audio_drv_list; do
-    def=CONFIG_AUDIO_$(echo $drv | LC_ALL=C tr '[a-z]' '[A-Z]')
-    echo "$def=y" >> $config_host_mak
-done
-if test "$alsa" = "yes" ; then
-    echo "CONFIG_ALSA=y" >> $config_host_mak
-fi
-echo "ALSA_LIBS=$alsa_libs" >> $config_host_mak
-echo "ALSA_CFLAGS=$alsa_cflags" >> $config_host_mak
-if test "$libpulse" = "yes" ; then
-    echo "CONFIG_LIBPULSE=y" >> $config_host_mak
-fi
-echo "PULSE_LIBS=$pulse_libs" >> $config_host_mak
-echo "PULSE_CFLAGS=$pulse_cflags" >> $config_host_mak
-echo "COREAUDIO_LIBS=$coreaudio_libs" >> $config_host_mak
-echo "DSOUND_LIBS=$dsound_libs" >> $config_host_mak
-echo "OSS_LIBS=$oss_libs" >> $config_host_mak
-if test "$libjack" = "yes" ; then
-    echo "CONFIG_LIBJACK=y" >> $config_host_mak
-fi
-echo "JACK_LIBS=$jack_libs" >> $config_host_mak
-if test "$audio_win_int" = "yes" ; then
-  echo "CONFIG_AUDIO_WIN_INT=y" >> $config_host_mak
-fi
-echo "CONFIG_BDRV_RW_WHITELIST=$block_drv_rw_whitelist" >> $config_host_mak
-echo "CONFIG_BDRV_RO_WHITELIST=$block_drv_ro_whitelist" >> $config_host_mak
-if test "$xfs" = "yes" ; then
-  echo "CONFIG_XFS=y" >> $config_host_mak
-fi
-qemu_version=$(head $source_path/VERSION)
-echo "PKGVERSION=$pkgversion" >>$config_host_mak
-echo "SRC_PATH=$source_path" >> $config_host_mak
-echo "TARGET_DIRS=$target_list" >> $config_host_mak
-if test "$modules" = "yes"; then
-  # $shacmd can generate a hash started with digit, which the compiler doesn't
-  # like as an symbol. So prefix it with an underscore
-  echo "CONFIG_STAMP=_$( (echo $qemu_version; cat $0) | $shacmd - | cut -f1 -d\ )" >> $config_host_mak
-  echo "CONFIG_MODULES=y" >> $config_host_mak
-fi
-if test "$module_upgrades" = "yes"; then
-  echo "CONFIG_MODULE_UPGRADES=y" >> $config_host_mak
-fi
-if test "$have_x11" = "yes" && test "$need_x11" = "yes"; then
-  echo "CONFIG_X11=y" >> $config_host_mak
-  echo "X11_CFLAGS=$x11_cflags" >> $config_host_mak
-  echo "X11_LIBS=$x11_libs" >> $config_host_mak
-fi
-if test "$pipe2" = "yes" ; then
-  echo "CONFIG_PIPE2=y" >> $config_host_mak
-fi
-if test "$accept4" = "yes" ; then
-  echo "CONFIG_ACCEPT4=y" >> $config_host_mak
-fi
-if test "$splice" = "yes" ; then
-  echo "CONFIG_SPLICE=y" >> $config_host_mak
-fi
-if test "$eventfd" = "yes" ; then
-  echo "CONFIG_EVENTFD=y" >> $config_host_mak
-fi
-if test "$memfd" = "yes" ; then
-  echo "CONFIG_MEMFD=y" >> $config_host_mak
-fi
-if test "$have_usbfs" = "yes" ; then
-  echo "CONFIG_USBFS=y" >> $config_host_mak
-fi
-if test "$fallocate" = "yes" ; then
-  echo "CONFIG_FALLOCATE=y" >> $config_host_mak
-fi
-if test "$fallocate_punch_hole" = "yes" ; then
-  echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak
-fi
-if test "$fallocate_zero_range" = "yes" ; then
-  echo "CONFIG_FALLOCATE_ZERO_RANGE=y" >> $config_host_mak
-fi
-if test "$posix_fallocate" = "yes" ; then
-  echo "CONFIG_POSIX_FALLOCATE=y" >> $config_host_mak
-fi
-if test "$sync_file_range" = "yes" ; then
-  echo "CONFIG_SYNC_FILE_RANGE=y" >> $config_host_mak
-fi
-if test "$fiemap" = "yes" ; then
-  echo "CONFIG_FIEMAP=y" >> $config_host_mak
-fi
-if test "$dup3" = "yes" ; then
-  echo "CONFIG_DUP3=y" >> $config_host_mak
-fi
-if test "$ppoll" = "yes" ; then
-  echo "CONFIG_PPOLL=y" >> $config_host_mak
-fi
-if test "$prctl_pr_set_timerslack" = "yes" ; then
-  echo "CONFIG_PRCTL_PR_SET_TIMERSLACK=y" >> $config_host_mak
-fi
-if test "$epoll" = "yes" ; then
-  echo "CONFIG_EPOLL=y" >> $config_host_mak
-fi
-if test "$epoll_create1" = "yes" ; then
-  echo "CONFIG_EPOLL_CREATE1=y" >> $config_host_mak
-fi
-if test "$sendfile" = "yes" ; then
-  echo "CONFIG_SENDFILE=y" >> $config_host_mak
-fi
-if test "$timerfd" = "yes" ; then
-  echo "CONFIG_TIMERFD=y" >> $config_host_mak
-fi
-if test "$setns" = "yes" ; then
-  echo "CONFIG_SETNS=y" >> $config_host_mak
-fi
-if test "$clock_adjtime" = "yes" ; then
-  echo "CONFIG_CLOCK_ADJTIME=y" >> $config_host_mak
-fi
-if test "$syncfs" = "yes" ; then
-  echo "CONFIG_SYNCFS=y" >> $config_host_mak
-fi
-if test "$kcov" = "yes" ; then
-  echo "CONFIG_KCOV=y" >> $config_host_mak
-fi
-if test "$btrfs" = "yes" ; then
-  echo "CONFIG_BTRFS=y" >> $config_host_mak
-fi
-if test "$inotify" = "yes" ; then
-  echo "CONFIG_INOTIFY=y" >> $config_host_mak
-fi
-if test "$inotify1" = "yes" ; then
-  echo "CONFIG_INOTIFY1=y" >> $config_host_mak
-fi
-if test "$sem_timedwait" = "yes" ; then
-  echo "CONFIG_SEM_TIMEDWAIT=y" >> $config_host_mak
-fi
-if test "$strchrnul" = "yes" ; then
-  echo "HAVE_STRCHRNUL=y" >> $config_host_mak
-fi
-if test "$st_atim" = "yes" ; then
-  echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak
-fi
-if test "$byteswap_h" = "yes" ; then
-  echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak
-fi
-if test "$bswap_h" = "yes" ; then
-  echo "CONFIG_MACHINE_BSWAP_H=y" >> $config_host_mak
-fi
-if test "$curl" = "yes" ; then
-  echo "CONFIG_CURL=y" >> $config_host_mak
-  echo "CURL_CFLAGS=$curl_cflags" >> $config_host_mak
-  echo "CURL_LIBS=$curl_libs" >> $config_host_mak
-fi
-if test "$brlapi" = "yes" ; then
-  echo "CONFIG_BRLAPI=y" >> $config_host_mak
-  echo "BRLAPI_LIBS=$brlapi_libs" >> $config_host_mak
-fi
-if test "$gtk" = "yes" ; then
-  echo "CONFIG_GTK=y" >> $config_host_mak
-  echo "GTK_CFLAGS=$gtk_cflags" >> $config_host_mak
-  echo "GTK_LIBS=$gtk_libs" >> $config_host_mak
-  if test "$gtk_gl" = "yes" ; then
-    echo "CONFIG_GTK_GL=y" >> $config_host_mak
-  fi
-fi
-if test "$gio" = "yes" ; then
-    echo "CONFIG_GIO=y" >> $config_host_mak
-    echo "GIO_CFLAGS=$gio_cflags" >> $config_host_mak
-    echo "GIO_LIBS=$gio_libs" >> $config_host_mak
-    echo "GDBUS_CODEGEN=$gdbus_codegen" >> $config_host_mak
-fi
-echo "CONFIG_TLS_PRIORITY=\"$tls_priority\"" >> $config_host_mak
-if test "$gnutls" = "yes" ; then
-  echo "CONFIG_GNUTLS=y" >> $config_host_mak
-  echo "GNUTLS_CFLAGS=$gnutls_cflags" >> $config_host_mak
-  echo "GNUTLS_LIBS=$gnutls_libs" >> $config_host_mak
-fi
-if test "$gcrypt" = "yes" ; then
-  echo "CONFIG_GCRYPT=y" >> $config_host_mak
-  if test "$gcrypt_hmac" = "yes" ; then
-    echo "CONFIG_GCRYPT_HMAC=y" >> $config_host_mak
-  fi
-  echo "GCRYPT_CFLAGS=$gcrypt_cflags" >> $config_host_mak
-  echo "GCRYPT_LIBS=$gcrypt_libs" >> $config_host_mak
-fi
-if test "$nettle" = "yes" ; then
-  echo "CONFIG_NETTLE=y" >> $config_host_mak
-  echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak
-  echo "NETTLE_CFLAGS=$nettle_cflags" >> $config_host_mak
-  echo "NETTLE_LIBS=$nettle_libs" >> $config_host_mak
-fi
-if test "$qemu_private_xts" = "yes" ; then
-  echo "CONFIG_QEMU_PRIVATE_XTS=y" >> $config_host_mak
-fi
-if test "$tasn1" = "yes" ; then
-  echo "CONFIG_TASN1=y" >> $config_host_mak
-fi
-if test "$auth_pam" = "yes" ; then
-    echo "CONFIG_AUTH_PAM=y" >> $config_host_mak
-fi
-if test "$have_ifaddrs_h" = "yes" ; then
-    echo "HAVE_IFADDRS_H=y" >> $config_host_mak
-fi
-if test "$have_drm_h" = "yes" ; then
-  echo "HAVE_DRM_H=y" >> $config_host_mak
-fi
-if test "$have_broken_size_max" = "yes" ; then
-    echo "HAVE_BROKEN_SIZE_MAX=y" >> $config_host_mak
-fi
-if test "$have_openpty" = "yes" ; then
-    echo "HAVE_OPENPTY=y" >> $config_host_mak
-fi
-if test "$have_sys_signal_h" = "yes" ; then
-    echo "HAVE_SYS_SIGNAL_H=y" >> $config_host_mak
-fi
+Usage: configure [options]
+Options: [defaults in brackets after descriptions]
 
-# Work around a system header bug with some kernel/XFS header
-# versions where they both try to define 'struct fsxattr':
-# xfs headers will not try to redefine structs from linux headers
-# if this macro is set.
-if test "$have_fsxattr" = "yes" ; then
-    echo "HAVE_FSXATTR=y" >> $config_host_mak
-fi
-if test "$have_copy_file_range" = "yes" ; then
-    echo "HAVE_COPY_FILE_RANGE=y" >> $config_host_mak
-fi
-if test "$vte" = "yes" ; then
-  echo "CONFIG_VTE=y" >> $config_host_mak
-  echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak
-  echo "VTE_LIBS=$vte_libs" >> $config_host_mak
-fi
-if test "$virglrenderer" = "yes" ; then
-  echo "CONFIG_VIRGL=y" >> $config_host_mak
-  echo "VIRGL_CFLAGS=$virgl_cflags" >> $config_host_mak
-  echo "VIRGL_LIBS=$virgl_libs" >> $config_host_mak
-fi
-if test "$xen" = "enabled" ; then
-  echo "CONFIG_XEN_BACKEND=y" >> $config_host_mak
-  echo "CONFIG_XEN_CTRL_INTERFACE_VERSION=$xen_ctrl_version" >> $config_host_mak
-  echo "XEN_CFLAGS=$xen_cflags" >> $config_host_mak
-  echo "XEN_LIBS=$xen_libs" >> $config_host_mak
-fi
-if test "$linux_aio" = "yes" ; then
-  echo "CONFIG_LINUX_AIO=y" >> $config_host_mak
-fi
-if test "$linux_io_uring" = "yes" ; then
-  echo "CONFIG_LINUX_IO_URING=y" >> $config_host_mak
-  echo "LINUX_IO_URING_CFLAGS=$linux_io_uring_cflags" >> $config_host_mak
-  echo "LINUX_IO_URING_LIBS=$linux_io_uring_libs" >> $config_host_mak
-fi
-if test "$attr" = "yes" ; then
-  echo "CONFIG_ATTR=y" >> $config_host_mak
-  echo "LIBATTR_LIBS=$libattr_libs" >> $config_host_mak
-fi
-if test "$libattr" = "yes" ; then
-  echo "CONFIG_LIBATTR=y" >> $config_host_mak
-fi
-if test "$virtfs" = "yes" ; then
-  echo "CONFIG_VIRTFS=y" >> $config_host_mak
-fi
-if test "$vhost_scsi" = "yes" ; then
-  echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak
-fi
-if test "$vhost_net" = "yes" ; then
-  echo "CONFIG_VHOST_NET=y" >> $config_host_mak
-fi
-if test "$vhost_net_user" = "yes" ; then
-  echo "CONFIG_VHOST_NET_USER=y" >> $config_host_mak
-fi
-if test "$vhost_net_vdpa" = "yes" ; then
-  echo "CONFIG_VHOST_NET_VDPA=y" >> $config_host_mak
-fi
-if test "$vhost_crypto" = "yes" ; then
-  echo "CONFIG_VHOST_CRYPTO=y" >> $config_host_mak
-fi
-if test "$vhost_vsock" = "yes" ; then
-  echo "CONFIG_VHOST_VSOCK=y" >> $config_host_mak
-  if test "$vhost_user" = "yes" ; then
-    echo "CONFIG_VHOST_USER_VSOCK=y" >> $config_host_mak
-  fi
-fi
-if test "$vhost_kernel" = "yes" ; then
-  echo "CONFIG_VHOST_KERNEL=y" >> $config_host_mak
-fi
-if test "$vhost_user" = "yes" ; then
-  echo "CONFIG_VHOST_USER=y" >> $config_host_mak
-fi
-if test "$vhost_vdpa" = "yes" ; then
-  echo "CONFIG_VHOST_VDPA=y" >> $config_host_mak
-fi
-if test "$vhost_user_fs" = "yes" ; then
-  echo "CONFIG_VHOST_USER_FS=y" >> $config_host_mak
-fi
-if test "$iovec" = "yes" ; then
-  echo "CONFIG_IOVEC=y" >> $config_host_mak
-fi
-if test "$preadv" = "yes" ; then
-  echo "CONFIG_PREADV=y" >> $config_host_mak
-fi
-if test "$membarrier" = "yes" ; then
-  echo "CONFIG_MEMBARRIER=y" >> $config_host_mak
-fi
-if test "$signalfd" = "yes" ; then
-  echo "CONFIG_SIGNALFD=y" >> $config_host_mak
-fi
-if test "$optreset" = "yes" ; then
-  echo "HAVE_OPTRESET=y" >> $config_host_mak
-fi
-if test "$tcg" = "enabled"; then
-  if test "$tcg_interpreter" = "yes" ; then
-    echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak
-  fi
-fi
-if test "$fdatasync" = "yes" ; then
-  echo "CONFIG_FDATASYNC=y" >> $config_host_mak
-fi
-if test "$madvise" = "yes" ; then
-  echo "CONFIG_MADVISE=y" >> $config_host_mak
-fi
-if test "$posix_madvise" = "yes" ; then
-  echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak
-fi
-if test "$posix_memalign" = "yes" ; then
-  echo "CONFIG_POSIX_MEMALIGN=y" >> $config_host_mak
-fi
-if test "$spice" = "yes" ; then
-  echo "CONFIG_SPICE=y" >> $config_host_mak
-  echo "SPICE_CFLAGS=$spice_cflags" >> $config_host_mak
-  echo "SPICE_LIBS=$spice_libs" >> $config_host_mak
-fi
+Standard options:
+  --help                   print this message
+  --target-list=LIST       set target list (default: build all)
+$(echo Available targets: $default_target_list | \
+  fold -s -w 53 | sed -e 's/^/                           /')
+  --target-list-exclude=LIST exclude a set of targets from the default target-list
 
-if test "$smartcard" = "yes" ; then
-  echo "CONFIG_SMARTCARD=y" >> $config_host_mak
-  echo "SMARTCARD_CFLAGS=$libcacard_cflags" >> $config_host_mak
-  echo "SMARTCARD_LIBS=$libcacard_libs" >> $config_host_mak
-fi
+Advanced options (experts only):
+  -Dmesonoptname=val       passthrough option to meson unmodified
+  --cross-prefix=PREFIX    use PREFIX for compile tools, PREFIX can be blank [$cross_prefix]
+  --cc=CC                  use C compiler CC [$cc]
+  --host-cc=CC             when cross compiling, use C compiler CC for code run
+                           at build time [$host_cc]
+  --cxx=CXX                use C++ compiler CXX [$cxx]
+  --objcc=OBJCC            use Objective-C compiler OBJCC [$objcc]
+  --extra-cflags=CFLAGS    append extra C compiler flags CFLAGS
+  --extra-cxxflags=CXXFLAGS append extra C++ compiler flags CXXFLAGS
+  --extra-objcflags=OBJCFLAGS append extra Objective C compiler flags OBJCFLAGS
+  --extra-ldflags=LDFLAGS  append extra linker flags LDFLAGS
+  --cross-cc-ARCH=CC       use compiler when building ARCH guest test cases
+  --cross-cc-cflags-ARCH=  use compiler flags when building ARCH guest tests
+  --cross-prefix-ARCH=PREFIX cross compiler prefix when building ARCH guest test cases
+  --python=PYTHON          use specified python [$python]
+  --ninja=NINJA            use specified ninja [$ninja]
+  --static                 enable static build [$static]
+  --without-default-features default all --enable-* options to "disabled"
+  --without-default-devices  do not include any device that is not needed to
+                           start the emulator (only use if you are including
+                           desired devices in configs/devices/)
+  --with-devices-ARCH=NAME override default configs/devices
+  --enable-debug           enable common debug build options
+  --cpu=CPU                Build for host CPU [$cpu]
+  --disable-containers     don't use containers for cross-building
+  --container-engine=TYPE  which container engine to use [$container_engine]
+  --gdb=GDB-path           gdb to use for gdbstub tests [$gdb_bin]
+EOF
+  meson_options_help
+cat << EOF
+  system          all system emulation targets
+  user            supported user emulation targets
+  linux-user      all linux usermode emulation targets
+  bsd-user        all BSD usermode emulation targets
+  pie             Position Independent Executables
 
-if test "$libusb" = "yes" ; then
-  echo "CONFIG_USB_LIBUSB=y" >> $config_host_mak
-  echo "LIBUSB_CFLAGS=$libusb_cflags" >> $config_host_mak
-  echo "LIBUSB_LIBS=$libusb_libs" >> $config_host_mak
+NOTE: The object files are built at the place where configure is launched
+EOF
+exit 0
 fi
 
-if test "$usb_redir" = "yes" ; then
-  echo "CONFIG_USB_REDIR=y" >> $config_host_mak
-  echo "USB_REDIR_CFLAGS=$usb_redir_cflags" >> $config_host_mak
-  echo "USB_REDIR_LIBS=$usb_redir_libs" >> $config_host_mak
-fi
+# Remove old dependency files to make sure that they get properly regenerated
+rm -f ./*/config-devices.mak.d
 
-if test "$opengl" = "yes" ; then
-  echo "CONFIG_OPENGL=y" >> $config_host_mak
-  echo "OPENGL_CFLAGS=$opengl_cflags" >> $config_host_mak
-  echo "OPENGL_LIBS=$opengl_libs" >> $config_host_mak
-  if test "$opengl_dmabuf" = "yes" ; then
-    echo "CONFIG_OPENGL_DMABUF=y" >> $config_host_mak
-  fi
+if test -z "$python"
+then
+    # If first_python is set, there was a binary somewhere even though
+    # it was not suitable.  Use it for the error message.
+    if test -n "$first_python"; then
+        error_exit "Cannot use '$first_python', Python >= 3.8 is required." \
+            "Use --python=/path/to/python to specify a supported Python."
+    else
+        error_exit "Python not found. Use --python=/path/to/python"
+    fi
 fi
 
-if test "$gbm" = "yes" ; then
-    echo "CONFIG_GBM=y" >> $config_host_mak
-    echo "GBM_LIBS=$gbm_libs" >> $config_host_mak
-    echo "GBM_CFLAGS=$gbm_cflags" >> $config_host_mak
+if ! check_py_version "$python"; then
+  error_exit "Cannot use '$python', Python >= 3.8 is required." \
+             "Use --python=/path/to/python to specify a supported Python." \
+             "Maybe try:" \
+             "  openSUSE Leap 15.3+: zypper install python39" \
+             "  CentOS 8: dnf install python38"
 fi
 
+# Resolve PATH
+python="$(command -v "$python")"
 
-if test "$avx2_opt" = "yes" ; then
-  echo "CONFIG_AVX2_OPT=y" >> $config_host_mak
-fi
+# Create a Python virtual environment using our configured python.
+# The stdout of this script will be the location of a symlink that
+# points to the configured Python.
+# Entry point scripts for pip, meson, and sphinx are generated if those
+# packages are present.
 
-if test "$avx512f_opt" = "yes" ; then
-  echo "CONFIG_AVX512F_OPT=y" >> $config_host_mak
-fi
+# Defaults assumed for now:
+# - venv is cleared if it exists already;
+# - venv is allowed to use system packages;
+# - all setup can be performed offline;
+# - missing packages may be fetched from PyPI,
+#   unless --disable-download is passed.
+# - pip is not installed into the venv when possible,
+#   but ensurepip is called as a fallback when necessary.
 
-if test "$lzo" = "yes" ; then
-  echo "CONFIG_LZO=y" >> $config_host_mak
-  echo "LZO_LIBS=$lzo_libs" >> $config_host_mak
-fi
+echo "python determined to be '$python'"
+echo "python version: $($python --version)"
 
-if test "$snappy" = "yes" ; then
-  echo "CONFIG_SNAPPY=y" >> $config_host_mak
-  echo "SNAPPY_LIBS=$snappy_libs" >> $config_host_mak
+python="$($python -B "${source_path}/python/scripts/mkvenv.py" create pyvenv)"
+if test "$?" -ne 0 ; then
+    error_exit "python venv creation failed"
 fi
 
-if test "$bzip2" = "yes" ; then
-  echo "CONFIG_BZIP2=y" >> $config_host_mak
-  echo "BZIP2_LIBS=-lbz2" >> $config_host_mak
-fi
+# Suppress writing compiled files
+python="$python -B"
+mkvenv="$python ${source_path}/python/scripts/mkvenv.py"
 
-if test "$lzfse" = "yes" ; then
-  echo "CONFIG_LZFSE=y" >> $config_host_mak
-  echo "LZFSE_LIBS=-llzfse" >> $config_host_mak
-fi
+# Finish preparing the virtual environment using vendored .whl files
 
-if test "$zstd" = "yes" ; then
-  echo "CONFIG_ZSTD=y" >> $config_host_mak
-  echo "ZSTD_CFLAGS=$zstd_cflags" >> $config_host_mak
-  echo "ZSTD_LIBS=$zstd_libs" >> $config_host_mak
+if $python -c 'import sys; sys.exit(sys.version_info >= (3,11))'; then
+    $mkvenv ensure --dir "${source_path}/python/wheels" \
+        'tomli>=1.2.0' || exit 1
 fi
+$mkvenv ensuregroup --dir "${source_path}/python/wheels" \
+     ${source_path}/pythondeps.toml meson || exit 1
 
-if test "$libiscsi" = "yes" ; then
-  echo "CONFIG_LIBISCSI=y" >> $config_host_mak
-  echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak
-  echo "LIBISCSI_LIBS=$libiscsi_libs" >> $config_host_mak
-fi
+# At this point, we expect Meson to be installed and available.
+# We expect mkvenv or pip to have created pyvenv/bin/meson for us.
+# We ignore PATH completely here: we want to use the venv's Meson
+# *exclusively*.
 
-if test "$libnfs" = "yes" ; then
-  echo "CONFIG_LIBNFS=y" >> $config_host_mak
-  echo "LIBNFS_LIBS=$libnfs_libs" >> $config_host_mak
-fi
+meson="$(cd pyvenv/bin; pwd)/meson"
 
-if test "$seccomp" = "yes"; then
-  echo "CONFIG_SECCOMP=y" >> $config_host_mak
-  echo "SECCOMP_CFLAGS=$seccomp_cflags" >> $config_host_mak
-  echo "SECCOMP_LIBS=$seccomp_libs" >> $config_host_mak
-fi
+# Conditionally ensure Sphinx is installed.
 
-# XXX: suppress that
-if [ "$bsd" = "yes" ] ; then
-  echo "CONFIG_BSD=y" >> $config_host_mak
+mkvenv_online_flag=""
+if test "$download" = "enabled" ; then
+    mkvenv_online_flag=" --online"
 fi
 
-if test "$qom_cast_debug" = "yes" ; then
-  echo "CONFIG_QOM_CAST_DEBUG=y" >> $config_host_mak
-fi
-if test "$rbd" = "yes" ; then
-  echo "CONFIG_RBD=y" >> $config_host_mak
-  echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
+if test "$docs" != "disabled" ; then
+    if ! $mkvenv ensuregroup \
+         $(test "$docs" = "enabled" && echo "$mkvenv_online_flag") \
+         ${source_path}/pythondeps.toml docs;
+    then
+        if test "$docs" = "enabled" ; then
+            exit 1
+        fi
+        echo "Sphinx not found/usable, disabling docs."
+        docs=disabled
+    else
+        docs=enabled
+    fi
 fi
 
-echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
-if test "$coroutine_pool" = "yes" ; then
-  echo "CONFIG_COROUTINE_POOL=1" >> $config_host_mak
-else
-  echo "CONFIG_COROUTINE_POOL=0" >> $config_host_mak
-fi
+# Probe for ninja
 
-if test "$debug_stack_usage" = "yes" ; then
-  echo "CONFIG_DEBUG_STACK_USAGE=y" >> $config_host_mak
+if test -z "$ninja"; then
+    for c in ninja ninja-build samu; do
+        if has $c; then
+            ninja=$(command -v "$c")
+            break
+        fi
+    done
+    if test -z "$ninja"; then
+      error_exit "Cannot find Ninja"
+    fi
 fi
 
-if test "$crypto_afalg" = "yes" ; then
-  echo "CONFIG_AF_ALG=y" >> $config_host_mak
+if test "$targetos" = "bogus"; then
+    # Now that we know that we're not printing the help and that
+    # the compiler works (so the results of the check_defines we used
+    # to identify the OS are reliable), if we didn't recognize the
+    # host OS we should stop now.
+    error_exit "Unrecognized host OS (uname -s reports '$(uname -s)')"
 fi
 
-if test "$open_by_handle_at" = "yes" ; then
-  echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak
+# test for any invalid configuration combinations
+if test "$targetos" = "windows" && ! has "$dlltool"; then
+  if test "$plugins" = "yes"; then
+    error_exit "TCG plugins requires dlltool to build on Windows platforms"
+  fi
+  plugins="no"
 fi
-
-if test "$linux_magic_h" = "yes" ; then
-  echo "CONFIG_LINUX_MAGIC_H=y" >> $config_host_mak
+if test "$tcg" = "disabled" ; then
+  if test "$plugins" = "yes"; then
+    error_exit "Can't enable plugins on non-TCG builds"
+  fi
+  plugins="no"
 fi
-
-if test "$valgrind_h" = "yes" ; then
-  echo "CONFIG_VALGRIND_H=y" >> $config_host_mak
+if test "$static" = "yes" ; then
+  if test "$plugins" = "yes"; then
+    error_exit "static and plugins are mutually incompatible"
+  fi
+  plugins="no"
 fi
-
-if test "$have_asan_iface_fiber" = "yes" ; then
-    echo "CONFIG_ASAN_IFACE_FIBER=y" >> $config_host_mak
+if test "$plugins" != "no"; then
+  plugins=yes
+  subdirs="$subdirs contrib/plugins"
 fi
 
-if test "$have_tsan" = "yes" && test "$have_tsan_iface_fiber" = "yes" ; then
-    echo "CONFIG_TSAN=y" >> $config_host_mak
-fi
+cat > $TMPC << EOF
 
-if test "$has_environ" = "yes" ; then
-  echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak
-fi
+#ifdef __linux__
+#  define THREAD __thread
+#else
+#  define THREAD
+#endif
+static THREAD int tls_var;
+int main(void) { return tls_var; }
+EOF
 
-if test "$cpuid_h" = "yes" ; then
-  echo "CONFIG_CPUID_H=y" >> $config_host_mak
+if test "$targetos" = windows || test "$targetos" = haiku; then
+  if test "$pie" = "yes"; then
+    error_exit "PIE not available due to missing OS support"
+  fi
+  pie=no
 fi
 
-if test "$int128" = "yes" ; then
-  echo "CONFIG_INT128=y" >> $config_host_mak
+if test "$pie" != "no"; then
+  if test "$static" = "yes"; then
+    pie_ldflags=-static-pie
+  else
+    pie_ldflags=-pie
+  fi
+  if compile_prog "-Werror -fPIE -DPIE" "$pie_ldflags"; then
+    pie="yes"
+  elif test "$pie" = "yes"; then
+    error_exit "-static-pie not available due to missing toolchain support"
+  else
+    echo "Disabling PIE due to missing toolchain support"
+    pie="no"
+  fi
 fi
 
-if test "$atomic128" = "yes" ; then
-  echo "CONFIG_ATOMIC128=y" >> $config_host_mak
-fi
+##########################################
 
-if test "$cmpxchg128" = "yes" ; then
-  echo "CONFIG_CMPXCHG128=y" >> $config_host_mak
+if test -z "${target_list+xxx}" ; then
+    default_targets=yes
+    for target in $default_target_list; do
+        target_list="$target_list $target"
+    done
+    target_list="${target_list# }"
+else
+    default_targets=no
+    target_list=$(echo "$target_list" | sed -e 's/,/ /g')
+    for target in $target_list; do
+        # Check that we recognised the target name; this allows a more
+        # friendly error message than if we let it fall through.
+        case " $default_target_list " in
+            *" $target "*)
+                ;;
+            *)
+                error_exit "Unknown target name '$target'"
+                ;;
+        esac
+    done
 fi
 
-if test "$atomic64" = "yes" ; then
-  echo "CONFIG_ATOMIC64=y" >> $config_host_mak
+if test "$tcg" = "auto"; then
+  if test -z "$target_list"; then
+    tcg="disabled"
+  else
+    tcg="enabled"
+  fi
 fi
 
-if test "$attralias" = "yes" ; then
-  echo "CONFIG_ATTRIBUTE_ALIAS=y" >> $config_host_mak
-fi
+#########################################
+# gdb test
 
-if test "$getauxval" = "yes" ; then
-  echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
+if test -n "$gdb_bin"; then
+    gdb_version=$($gdb_bin --version | head -n 1)
+    if version_ge ${gdb_version##* } 9.1; then
+        gdb_arches=$($python "$source_path/scripts/probe-gdb-support.py" $gdb_bin)
+    else
+        gdb_bin=""
+    fi
 fi
 
-if test "$glusterfs" = "yes" ; then
-  echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
-  echo "GLUSTERFS_CFLAGS=$glusterfs_cflags" >> $config_host_mak
-  echo "GLUSTERFS_LIBS=$glusterfs_libs" >> $config_host_mak
-fi
+##########################################
+# big/little endian test
+cat > $TMPC << EOF
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# error LITTLE
+#endif
+int main(void) { return 0; }
+EOF
 
-if test "$glusterfs_xlator_opt" = "yes" ; then
-  echo "CONFIG_GLUSTERFS_XLATOR_OPT=y" >> $config_host_mak
-fi
+if ! compile_prog ; then
+  bigendian="no"
+else
+  cat > $TMPC << EOF
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# error BIG
+#endif
+int main(void) { return 0; }
+EOF
 
-if test "$glusterfs_discard" = "yes" ; then
-  echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak
+  if ! compile_prog ; then
+    bigendian="yes"
+  else
+    echo big/little test failed
+    exit 1
+  fi
 fi
 
-if test "$glusterfs_fallocate" = "yes" ; then
-  echo "CONFIG_GLUSTERFS_FALLOCATE=y" >> $config_host_mak
-fi
+##########################################
+# functions to probe cross compilers
 
-if test "$glusterfs_zerofill" = "yes" ; then
-  echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak
-fi
+container="no"
+runc=""
+if test $use_containers = "yes" && (has "docker" || has "podman"); then
+    case $($python "$source_path"/tests/docker/docker.py --engine "$container_engine" probe) in
+        *docker) container=docker ;;
+        podman) container=podman ;;
+        no) container=no ;;
+    esac
+    if test "$container" != "no"; then
+        docker_py="$python $source_path/tests/docker/docker.py --engine $container"
+        runc=$container
+    fi
+fi
+
+# cross compilers defaults, can be overridden with --cross-cc-ARCH
+: ${cross_prefix_aarch64="aarch64-linux-gnu-"}
+: ${cross_prefix_aarch64_be="$cross_prefix_aarch64"}
+: ${cross_prefix_alpha="alpha-linux-gnu-"}
+: ${cross_prefix_arm="arm-linux-gnueabihf-"}
+: ${cross_prefix_armeb="$cross_prefix_arm"}
+: ${cross_prefix_hexagon="hexagon-unknown-linux-musl-"}
+: ${cross_prefix_loongarch64="loongarch64-unknown-linux-gnu-"}
+: ${cross_prefix_hppa="hppa-linux-gnu-"}
+: ${cross_prefix_i386="i686-linux-gnu-"}
+: ${cross_prefix_m68k="m68k-linux-gnu-"}
+: ${cross_prefix_microblaze="microblaze-linux-musl-"}
+: ${cross_prefix_mips64el="mips64el-linux-gnuabi64-"}
+: ${cross_prefix_mips64="mips64-linux-gnuabi64-"}
+: ${cross_prefix_mipsel="mipsel-linux-gnu-"}
+: ${cross_prefix_mips="mips-linux-gnu-"}
+: ${cross_prefix_nios2="nios2-linux-gnu-"}
+: ${cross_prefix_ppc="powerpc-linux-gnu-"}
+: ${cross_prefix_ppc64="powerpc64-linux-gnu-"}
+: ${cross_prefix_ppc64le="$cross_prefix_ppc64"}
+: ${cross_prefix_riscv64="riscv64-linux-gnu-"}
+: ${cross_prefix_s390x="s390x-linux-gnu-"}
+: ${cross_prefix_sh4="sh4-linux-gnu-"}
+: ${cross_prefix_sparc64="sparc64-linux-gnu-"}
+: ${cross_prefix_sparc="$cross_prefix_sparc64"}
+: ${cross_prefix_tricore="tricore-"}
+: ${cross_prefix_x86_64="x86_64-linux-gnu-"}
+
+: ${cross_cc_aarch64_be="$cross_cc_aarch64"}
+: ${cross_cc_cflags_aarch64_be="-mbig-endian"}
+: ${cross_cc_armeb="$cross_cc_arm"}
+: ${cross_cc_cflags_armeb="-mbig-endian"}
+: ${cross_cc_hexagon="hexagon-unknown-linux-musl-clang"}
+: ${cross_cc_cflags_hexagon="-mv73 -O2 -static"}
+: ${cross_cc_cflags_i386="-m32"}
+: ${cross_cc_cflags_ppc="-m32 -mbig-endian"}
+: ${cross_cc_cflags_ppc64="-m64 -mbig-endian"}
+: ${cross_cc_ppc64le="$cross_cc_ppc64"}
+: ${cross_cc_cflags_ppc64le="-m64 -mlittle-endian"}
+: ${cross_cc_cflags_sparc64="-m64 -mcpu=ultrasparc"}
+: ${cross_cc_sparc="$cross_cc_sparc64"}
+: ${cross_cc_cflags_sparc="-m32 -mcpu=supersparc"}
+: ${cross_cc_cflags_x86_64="-m64"}
+
+compute_target_variable() {
+  eval "$2="
+  if eval test -n "\"\${cross_prefix_$1}\""; then
+    if eval has "\"\${cross_prefix_$1}\$3\""; then
+      eval "$2=\"\${cross_prefix_$1}\$3\""
+    fi
+  fi
+}
+
+have_target() {
+  for i; do
+    case " $target_list " in
+      *" $i "*) return 0;;
+      *) ;;
+    esac
+  done
+  return 1
+}
 
-if test "$glusterfs_ftruncate_has_stat" = "yes" ; then
-  echo "CONFIG_GLUSTERFS_FTRUNCATE_HAS_STAT=y" >> $config_host_mak
-fi
+# probe_target_compiler TARGET
+#
+# Look for a compiler for the given target, either native or cross.
+# Set variables target_* if a compiler is found, and container_cross_*
+# if a Docker-based cross-compiler image is known for the target.
+# Set got_cross_cc to yes/no depending on whether a non-container-based
+# compiler was found.
+#
+# If TARGET is a user-mode emulation target, also set build_static to
+# "y" if static linking is possible.
+#
+probe_target_compiler() {
+  # reset all output variables
+  got_cross_cc=no
+  container_image=
+  container_hosts=
+  container_cross_cc=
+  container_cross_ar=
+  container_cross_as=
+  container_cross_ld=
+  container_cross_nm=
+  container_cross_objcopy=
+  container_cross_ranlib=
+  container_cross_strip=
+
+  target_arch=${1%%-*}
+  case $target_arch in
+    aarch64) container_hosts="x86_64 aarch64" ;;
+    alpha) container_hosts=x86_64 ;;
+    arm) container_hosts="x86_64 aarch64" ;;
+    cris) container_hosts=x86_64 ;;
+    hexagon) container_hosts=x86_64 ;;
+    hppa) container_hosts=x86_64 ;;
+    i386) container_hosts=x86_64 ;;
+    loongarch64) container_hosts=x86_64 ;;
+    m68k) container_hosts=x86_64 ;;
+    microblaze) container_hosts=x86_64 ;;
+    mips64el) container_hosts=x86_64 ;;
+    mips64) container_hosts=x86_64 ;;
+    mipsel) container_hosts=x86_64 ;;
+    mips) container_hosts=x86_64 ;;
+    nios2) container_hosts=x86_64 ;;
+    ppc) container_hosts=x86_64 ;;
+    ppc64|ppc64le) container_hosts=x86_64 ;;
+    riscv64) container_hosts=x86_64 ;;
+    s390x) container_hosts=x86_64 ;;
+    sh4) container_hosts=x86_64 ;;
+    sparc64) container_hosts=x86_64 ;;
+    tricore) container_hosts=x86_64 ;;
+    x86_64) container_hosts="aarch64 ppc64le x86_64" ;;
+    xtensa*) container_hosts=x86_64 ;;
+  esac
 
-if test "$glusterfs_iocb_has_stat" = "yes" ; then
-  echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak
-fi
+  for host in $container_hosts; do
+    test "$container" != no || continue
+    test "$host" = "$cpu" || continue
+    case $target_arch in
+      aarch64)
+        # We don't have any bigendian build tools so we only use this for AArch64
+        container_image=debian-arm64-cross
+        container_cross_prefix=aarch64-linux-gnu-
+        container_cross_cc=${container_cross_prefix}gcc
+        ;;
+      alpha)
+        container_image=debian-legacy-test-cross
+        container_cross_prefix=alpha-linux-gnu-
+        container_cross_cc=${container_cross_prefix}gcc
+        ;;
+      arm)
+        # We don't have any bigendian build tools so we only use this for ARM
+        container_image=debian-armhf-cross
+        container_cross_prefix=arm-linux-gnueabihf-
+        ;;
+      cris)
+        container_image=fedora-cris-cross
+        container_cross_prefix=cris-linux-gnu-
+        ;;
+      hexagon)
+        container_image=debian-hexagon-cross
+        container_cross_prefix=hexagon-unknown-linux-musl-
+        container_cross_cc=${container_cross_prefix}clang
+        ;;
+      hppa)
+        container_image=debian-all-test-cross
+        container_cross_prefix=hppa-linux-gnu-
+        container_cross_cc=${container_cross_prefix}gcc
+        ;;
+      i386)
+        container_image=debian-i686-cross
+        container_cross_prefix=i686-linux-gnu-
+        ;;
+      loongarch64)
+        container_image=debian-loongarch-cross
+        container_cross_prefix=loongarch64-unknown-linux-gnu-
+        ;;
+      m68k)
+        container_image=debian-all-test-cross
+        container_cross_prefix=m68k-linux-gnu-
+        container_cross_cc=${container_cross_prefix}gcc
+        ;;
+      microblaze)
+        container_image=debian-microblaze-cross
+        container_cross_prefix=microblaze-linux-musl-
+        ;;
+      mips64el)
+        container_image=debian-mips64el-cross
+        container_cross_prefix=mips64el-linux-gnuabi64-
+        ;;
+      mips64)
+        container_image=debian-all-test-cross
+        container_cross_prefix=mips64-linux-gnuabi64-
+        ;;
+      mips)
+        container_image=debian-all-test-cross
+        container_cross_prefix=mips-linux-gnu-
+        ;;
+      nios2)
+        container_image=debian-nios2-cross
+        container_cross_prefix=nios2-linux-gnu-
+        ;;
+      ppc)
+        container_image=debian-all-test-cross
+        container_cross_prefix=powerpc-linux-gnu-
+        container_cross_cc=${container_cross_prefix}gcc
+        ;;
+      ppc64|ppc64le)
+        container_image=debian-all-test-cross
+        container_cross_prefix=powerpc${target_arch#ppc}-linux-gnu-
+        ;;
+      riscv64)
+        container_image=debian-all-test-cross
+        container_cross_prefix=riscv64-linux-gnu-
+        ;;
+      sh4)
+        container_image=debian-legacy-test-cross
+        container_cross_prefix=sh4-linux-gnu-
+        ;;
+      sparc64)
+        container_image=debian-all-test-cross
+        container_cross_prefix=sparc64-linux-gnu-
+        ;;
+      tricore)
+        container_image=debian-tricore-cross
+        container_cross_prefix=tricore-
+        ;;
+      x86_64)
+        container_image=debian-amd64-cross
+        container_cross_prefix=x86_64-linux-gnu-
+        ;;
+      xtensa*)
+        container_image=debian-xtensa-cross
 
-if test "$libssh" = "yes" ; then
-  echo "CONFIG_LIBSSH=y" >> $config_host_mak
-  echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak
-  echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak
-fi
+        # default to the dc232b cpu
+        container_cross_prefix=/opt/2020.07/xtensa-dc232b-elf/bin/xtensa-dc232b-elf-
+        ;;
+      *)
+        # Debian and GNU architecture names usually match
+        container_image=debian-$target_arch-cross
+        container_cross_prefix=$target_arch-linux-gnu-
+        ;;
+    esac
+    : ${container_cross_cc:=${container_cross_prefix}gcc}
+    : ${container_cross_ar:=${container_cross_prefix}ar}
+    : ${container_cross_as:=${container_cross_prefix}as}
+    : ${container_cross_ld:=${container_cross_prefix}ld}
+    : ${container_cross_nm:=${container_cross_prefix}nm}
+    : ${container_cross_objcopy:=${container_cross_prefix}objcopy}
+    : ${container_cross_ranlib:=${container_cross_prefix}ranlib}
+    : ${container_cross_strip:=${container_cross_prefix}strip}
+  done
 
-if test "$live_block_migration" = "yes" ; then
-  echo "CONFIG_LIVE_BLOCK_MIGRATION=y" >> $config_host_mak
-fi
+  try=cross
+  # For softmmu/roms also look for a bi-endian or multilib-enabled host compiler
+  if [ "${1%softmmu}" != "$1" ] || test "$target_arch" = "$cpu"; then
+      case "$target_arch:$cpu" in
+        aarch64_be:aarch64 | \
+        armeb:arm | \
+        i386:x86_64 | \
+        mips*:mips64 | \
+        ppc*:ppc64 | \
+        sparc:sparc64 | \
+        "$cpu:$cpu")
+        try='native cross' ;;
+      esac
+  fi
+  eval "target_cflags=\${cross_cc_cflags_$target_arch}"
+  for thistry in $try; do
+    case $thistry in
+    native)
+      target_cc=$cc
+      target_ccas=$ccas
+      target_ar=$ar
+      target_as=$as
+      target_ld=$ld
+      target_nm=$nm
+      target_objcopy=$objcopy
+      target_ranlib=$ranlib
+      target_strip=$strip
+      ;;
+    cross)
+      target_cc=
+      if eval test -n "\"\${cross_cc_$target_arch}\""; then
+        if eval has "\"\${cross_cc_$target_arch}\""; then
+          eval "target_cc=\"\${cross_cc_$target_arch}\""
+        fi
+      else
+        compute_target_variable $target_arch target_cc gcc
+      fi
+      target_ccas=$target_cc
+      compute_target_variable $target_arch target_ar ar
+      compute_target_variable $target_arch target_as as
+      compute_target_variable $target_arch target_ld ld
+      compute_target_variable $target_arch target_nm nm
+      compute_target_variable $target_arch target_objcopy objcopy
+      compute_target_variable $target_arch target_ranlib ranlib
+      compute_target_variable $target_arch target_strip strip
+      ;;
+    esac
 
-if test "$tpm" = "yes"; then
-  echo 'CONFIG_TPM=y' >> $config_host_mak
-fi
+    if test -n "$target_cc"; then
+      case $target_arch in
+        i386|x86_64)
+          if $target_cc --version | grep -qi "clang"; then
+            continue
+          fi
+          ;;
+      esac
+    elif test -n "$target_as" && test -n "$target_ld"; then
+      # Special handling for assembler only targets
+      case $target in
+        tricore-softmmu)
+          build_static=
+          got_cross_cc=yes
+          break
+          ;;
+        *)
+          continue
+          ;;
+      esac
+    else
+      continue
+    fi
 
-echo "TRACE_BACKENDS=$trace_backends" >> $config_host_mak
-if have_backend "nop"; then
-  echo "CONFIG_TRACE_NOP=y" >> $config_host_mak
-fi
-if have_backend "simple"; then
-  echo "CONFIG_TRACE_SIMPLE=y" >> $config_host_mak
-  # Set the appropriate trace file.
-  trace_file="\"$trace_file-\" FMT_pid"
-fi
-if have_backend "log"; then
-  echo "CONFIG_TRACE_LOG=y" >> $config_host_mak
-fi
-if have_backend "ust"; then
-  echo "CONFIG_TRACE_UST=y" >> $config_host_mak
-  echo "LTTNG_UST_LIBS=$lttng_ust_libs" >> $config_host_mak
-  echo "URCU_BP_LIBS=$urcu_bp_libs" >> $config_host_mak
-fi
-if have_backend "dtrace"; then
-  echo "CONFIG_TRACE_DTRACE=y" >> $config_host_mak
-  if test "$trace_backend_stap" = "yes" ; then
-    echo "CONFIG_TRACE_SYSTEMTAP=y" >> $config_host_mak
-  fi
-fi
-if have_backend "ftrace"; then
-  if test "$linux" = "yes" ; then
-    echo "CONFIG_TRACE_FTRACE=y" >> $config_host_mak
-  else
-    feature_not_found "ftrace(trace backend)" "ftrace requires Linux"
-  fi
-fi
-if have_backend "syslog"; then
-  if test "$posix_syslog" = "yes" ; then
-    echo "CONFIG_TRACE_SYSLOG=y" >> $config_host_mak
+    write_c_skeleton
+    case $1 in
+      *-softmmu)
+        if do_compiler "$target_cc" $target_cflags -o $TMPO -c $TMPC &&
+          do_compiler "$target_cc" $target_cflags -r -nostdlib -o "${TMPDIR1}/${TMPB}2.o" "$TMPO" -lgcc; then
+          got_cross_cc=yes
+          break
+        fi
+        ;;
+      *)
+        if do_compiler "$target_cc" $target_cflags -o $TMPE $TMPC -static ; then
+          build_static=y
+          got_cross_cc=yes
+          break
+        fi
+        if do_compiler "$target_cc" $target_cflags -o $TMPE $TMPC ; then
+          build_static=
+          got_cross_cc=yes
+          break
+        fi
+        ;;
+    esac
+  done
+  if test $got_cross_cc != yes; then
+    build_static=
+    target_cc=
+    target_ccas=
+    target_ar=
+    target_as=
+    target_ld=
+    target_nm=
+    target_objcopy=
+    target_ranlib=
+    target_strip=
+  fi
+  test -n "$target_cc"
+}
+
+write_target_makefile() {
+  echo "EXTRA_CFLAGS=$target_cflags"
+  if test -z "$target_cc" && test -z "$target_as"; then
+    test -z "$container_image" && error_exit "Internal error: could not find cross compiler for $1?"
+    echo "$1: docker-image-$container_image" >> Makefile.prereqs
+    if test -n "$container_cross_cc"; then
+      echo "CC=$docker_py cc --cc $container_cross_cc -i qemu/$container_image -s $source_path --"
+      echo "CCAS=$docker_py cc --cc $container_cross_cc -i qemu/$container_image -s $source_path --"
+    fi
+    echo "AR=$docker_py cc --cc $container_cross_ar -i qemu/$container_image -s $source_path --"
+    echo "AS=$docker_py cc --cc $container_cross_as -i qemu/$container_image -s $source_path --"
+    echo "LD=$docker_py cc --cc $container_cross_ld -i qemu/$container_image -s $source_path --"
+    echo "NM=$docker_py cc --cc $container_cross_nm -i qemu/$container_image -s $source_path --"
+    echo "OBJCOPY=$docker_py cc --cc $container_cross_objcopy -i qemu/$container_image -s $source_path --"
+    echo "RANLIB=$docker_py cc --cc $container_cross_ranlib -i qemu/$container_image -s $source_path --"
+    echo "STRIP=$docker_py cc --cc $container_cross_strip -i qemu/$container_image -s $source_path --"
   else
-    feature_not_found "syslog(trace backend)" "syslog not available"
+    if test -n "$target_cc"; then
+      echo "CC=$target_cc"
+      echo "CCAS=$target_ccas"
+    fi
+    if test -n "$target_ar"; then
+      echo "AR=$target_ar"
+    fi
+    if test -n "$target_as"; then
+      echo "AS=$target_as"
+    fi
+    if test -n "$target_ld"; then
+      echo "LD=$target_ld"
+    fi
+    if test -n "$target_nm"; then
+      echo "NM=$target_nm"
+    fi
+    if test -n "$target_objcopy"; then
+      echo "OBJCOPY=$target_objcopy"
+    fi
+    if test -n "$target_ranlib"; then
+      echo "RANLIB=$target_ranlib"
+    fi
+    if test -n "$target_strip"; then
+      echo "STRIP=$target_strip"
+    fi
   fi
-fi
-echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak
-
-if test "$rdma" = "yes" ; then
-  echo "CONFIG_RDMA=y" >> $config_host_mak
-  echo "RDMA_LIBS=$rdma_libs" >> $config_host_mak
-fi
-
-if test "$pvrdma" = "yes" ; then
-  echo "CONFIG_PVRDMA=y" >> $config_host_mak
-fi
-
-if test "$have_rtnetlink" = "yes" ; then
-  echo "CONFIG_RTNETLINK=y" >> $config_host_mak
-fi
-
-if test "$libxml2" = "yes" ; then
-  echo "CONFIG_LIBXML2=y" >> $config_host_mak
-  echo "LIBXML2_CFLAGS=$libxml2_cflags" >> $config_host_mak
-  echo "LIBXML2_LIBS=$libxml2_libs" >> $config_host_mak
-fi
-
-if test "$replication" = "yes" ; then
-  echo "CONFIG_REPLICATION=y" >> $config_host_mak
-fi
+}
 
-if test "$have_af_vsock" = "yes" ; then
-  echo "CONFIG_AF_VSOCK=y" >> $config_host_mak
-fi
+#######################################
+# cross-compiled firmware targets
 
-if test "$have_sysmacros" = "yes" ; then
-  echo "CONFIG_SYSMACROS=y" >> $config_host_mak
-fi
+# Set up build tree symlinks that point back into the source tree
+# (these can be both files and directories).
+# Caution: avoid adding files or directories here using wildcards. This
+# will result in problems later if a new file matching the wildcard is
+# added to the source tree -- nothing will cause configure to be rerun
+# so the build tree will be missing the link back to the new file, and
+# tests might fail. Prefer to keep the relevant files in their own
+# directory and symlink the directory instead.
+LINKS="Makefile"
+LINKS="$LINKS docs/config"
+LINKS="$LINKS pc-bios/optionrom/Makefile"
+LINKS="$LINKS pc-bios/s390-ccw/Makefile"
+LINKS="$LINKS pc-bios/vof/Makefile"
+LINKS="$LINKS .gdbinit scripts" # scripts needed by relative path in .gdbinit
+LINKS="$LINKS tests/avocado tests/data"
+LINKS="$LINKS tests/qemu-iotests/check"
+LINKS="$LINKS python"
+LINKS="$LINKS contrib/plugins/Makefile "
+for f in $LINKS ; do
+    if [ -e "$source_path/$f" ]; then
+        symlink "$source_path/$f" "$f"
+    fi
+done
 
-if test "$have_static_assert" = "yes" ; then
-  echo "CONFIG_STATIC_ASSERT=y" >> $config_host_mak
-fi
+echo "# Automatically generated by configure - do not modify" > Makefile.prereqs
 
-if test "$have_utmpx" = "yes" ; then
-  echo "HAVE_UTMPX=y" >> $config_host_mak
-fi
-if test "$have_getrandom" = "yes" ; then
-  echo "CONFIG_GETRANDOM=y" >> $config_host_mak
-fi
-if test "$ivshmem" = "yes" ; then
-  echo "CONFIG_IVSHMEM=y" >> $config_host_mak
-fi
-if test "$debug_mutex" = "yes" ; then
-  echo "CONFIG_DEBUG_MUTEX=y" >> $config_host_mak
+# Mac OS X ships with a broken assembler
+if have_target i386-softmmu x86_64-softmmu && \
+        test "$targetos" != "darwin" && test "$targetos" != "sunos" && \
+        test "$targetos" != "haiku" && \
+        probe_target_compiler i386-softmmu; then
+    subdirs="$subdirs pc-bios/optionrom"
+    config_mak=pc-bios/optionrom/config.mak
+    echo "# Automatically generated by configure - do not modify" > $config_mak
+    echo "TOPSRC_DIR=$source_path" >> $config_mak
+    write_target_makefile >> $config_mak
 fi
 
-# Hold two types of flag:
-#   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
-#                                     a thread we have a handle to
-#   CONFIG_PTHREAD_SETNAME_NP_W_TID - A way of doing it on a particular
-#                                     platform
-if test "$pthread_setname_np_w_tid" = "yes" ; then
-  echo "CONFIG_THREAD_SETNAME_BYTHREAD=y" >> $config_host_mak
-  echo "CONFIG_PTHREAD_SETNAME_NP_W_TID=y" >> $config_host_mak
-elif test "$pthread_setname_np_wo_tid" = "yes" ; then
-  echo "CONFIG_THREAD_SETNAME_BYTHREAD=y" >> $config_host_mak
-  echo "CONFIG_PTHREAD_SETNAME_NP_WO_TID=y" >> $config_host_mak
+if have_target ppc-softmmu ppc64-softmmu && \
+        probe_target_compiler ppc-softmmu; then
+    subdirs="$subdirs pc-bios/vof"
+    config_mak=pc-bios/vof/config.mak
+    echo "# Automatically generated by configure - do not modify" > $config_mak
+    echo "SRC_DIR=$source_path/pc-bios/vof" >> $config_mak
+    write_target_makefile >> $config_mak
 fi
 
-if test "$libpmem" = "yes" ; then
-  echo "CONFIG_LIBPMEM=y" >> $config_host_mak
-  echo "LIBPMEM_LIBS=$libpmem_libs" >> $config_host_mak
-  echo "LIBPMEM_CFLAGS=$libpmem_cflags" >> $config_host_mak
+# Only build s390-ccw bios if the compiler has -march=z900 or -march=z10
+# (which is the lowest architecture level that Clang supports)
+if have_target s390x-softmmu && probe_target_compiler s390x-softmmu && \
+    GIT=git "$source_path/scripts/git-submodule.sh" "$git_submodules_action" roms/SLOF >> config.log 2>&1; then
+  write_c_skeleton
+  do_compiler "$target_cc" $target_cc_cflags -march=z900 -o $TMPO -c $TMPC
+  has_z900=$?
+  if [ $has_z900 = 0 ] || do_compiler "$target_cc" $target_cc_cflags -march=z10 -msoft-float -Werror -o $TMPO -c $TMPC; then
+    if [ $has_z900 != 0 ]; then
+      echo "WARNING: Your compiler does not support the z900!"
+      echo "         The s390-ccw bios will only work with guest CPUs >= z10."
+    fi
+    subdirs="$subdirs pc-bios/s390-ccw"
+    config_mak=pc-bios/s390-ccw/config-host.mak
+    echo "# Automatically generated by configure - do not modify" > $config_mak
+    echo "SRC_PATH=$source_path/pc-bios/s390-ccw" >> $config_mak
+    echo "GIT_SUBMODULES_ACTION=$git_submodules_action" >> $config_mak
+    write_target_makefile >> $config_mak
+  fi
 fi
 
-if test "$libdaxctl" = "yes" ; then
-  echo "CONFIG_LIBDAXCTL=y" >> $config_host_mak
-  echo "LIBDAXCTL_LIBS=$libdaxctl_libs" >> $config_host_mak
-fi
+#######################################
+# generate config-host.mak
 
-if test "$bochs" = "yes" ; then
-  echo "CONFIG_BOCHS=y" >> $config_host_mak
-fi
-if test "$cloop" = "yes" ; then
-  echo "CONFIG_CLOOP=y" >> $config_host_mak
-fi
-if test "$dmg" = "yes" ; then
-  echo "CONFIG_DMG=y" >> $config_host_mak
-fi
-if test "$qcow1" = "yes" ; then
-  echo "CONFIG_QCOW1=y" >> $config_host_mak
-fi
-if test "$vdi" = "yes" ; then
-  echo "CONFIG_VDI=y" >> $config_host_mak
-fi
-if test "$vvfat" = "yes" ; then
-  echo "CONFIG_VVFAT=y" >> $config_host_mak
-fi
-if test "$qed" = "yes" ; then
-  echo "CONFIG_QED=y" >> $config_host_mak
-fi
-if test "$parallels" = "yes" ; then
-  echo "CONFIG_PARALLELS=y" >> $config_host_mak
-fi
-if test "$sheepdog" = "yes" ; then
-  add_to deprecated_features "sheepdog"
-  echo "CONFIG_SHEEPDOG=y" >> $config_host_mak
-fi
-if test "$pty_h" = "yes" ; then
-  echo "HAVE_PTY_H=y" >> $config_host_mak
-fi
-if test "$have_mlockall" = "yes" ; then
-  echo "HAVE_MLOCKALL=y" >> $config_host_mak
-fi
-if test "$fuzzing" = "yes" ; then
-  # If LIB_FUZZING_ENGINE is set, assume we are running on OSS-Fuzz, and the
-  # needed CFLAGS have already been provided
-  if test -z "${LIB_FUZZING_ENGINE+xxx}" ; then
-    QEMU_CFLAGS="$QEMU_CFLAGS -fsanitize=fuzzer-no-link"
-    FUZZ_EXE_LDFLAGS="-fsanitize=fuzzer"
-  else
-    FUZZ_EXE_LDFLAGS="$LIB_FUZZING_ENGINE"
-  fi
-fi
+config_host_mak="config-host.mak"
 
-if test "$plugins" = "yes" ; then
-    echo "CONFIG_PLUGIN=y" >> $config_host_mak
-    # Copy the export object list to the build dir
-    if test "$ld_dynamic_list" = "yes" ; then
-       echo "CONFIG_HAS_LD_DYNAMIC_LIST=yes" >> $config_host_mak
-       ld_symbols=qemu-plugins-ld.symbols
-       cp "$source_path/plugins/qemu-plugins.symbols" $ld_symbols
-    elif test "$ld_exported_symbols_list" = "yes" ; then
-       echo "CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST=yes" >> $config_host_mak
-       ld64_symbols=qemu-plugins-ld64.symbols
-       echo "# Automatically generated by configure - do not modify" > $ld64_symbols
-       grep 'qemu_' "$source_path/plugins/qemu-plugins.symbols" | sed 's/;//g' | \
-           sed -E 's/^[[:space:]]*(.*)/_\1/' >> $ld64_symbols
-    else
-       error_exit \
-           "If \$plugins=yes, either \$ld_dynamic_list or " \
-           "\$ld_exported_symbols_list should have been set to 'yes'."
-    fi
-fi
+echo "# Automatically generated by configure - do not modify" > $config_host_mak
+echo >> $config_host_mak
 
-if test -n "$gdb_bin" ; then
-    echo "HAVE_GDB_BIN=$gdb_bin" >> $config_host_mak
-fi
+echo all: >> $config_host_mak
 
-if test "$secret_keyring" = "yes" ; then
-  echo "CONFIG_SECRET_KEYRING=y" >> $config_host_mak
+echo "SRC_PATH=$source_path" >> $config_host_mak
+echo "TARGET_DIRS=$target_list" >> $config_host_mak
+echo "GDB=$gdb_bin" >> $config_host_mak
+if test "$container" != no; then
+    echo "RUNC=$runc" >> $config_host_mak
 fi
-
-echo "ROMS=$roms" >> $config_host_mak
-echo "MAKE=$make" >> $config_host_mak
+echo "SUBDIRS=$subdirs" >> $config_host_mak
 echo "PYTHON=$python" >> $config_host_mak
+echo "MKVENV_ENSUREGROUP=$mkvenv ensuregroup $mkvenv_online_flag" >> $config_host_mak
 echo "GENISOIMAGE=$genisoimage" >> $config_host_mak
 echo "MESON=$meson" >> $config_host_mak
 echo "NINJA=$ninja" >> $config_host_mak
-echo "CC=$cc" >> $config_host_mak
-if $iasl -h > /dev/null 2>&1; then
-  echo "CONFIG_IASL=$iasl" >> $config_host_mak
-fi
-echo "CXX=$cxx" >> $config_host_mak
-echo "OBJCC=$objcc" >> $config_host_mak
-echo "AR=$ar" >> $config_host_mak
-echo "ARFLAGS=$ARFLAGS" >> $config_host_mak
-echo "AS=$as" >> $config_host_mak
-echo "CCAS=$ccas" >> $config_host_mak
-echo "CPP=$cpp" >> $config_host_mak
-echo "OBJCOPY=$objcopy" >> $config_host_mak
-echo "LD=$ld" >> $config_host_mak
-echo "RANLIB=$ranlib" >> $config_host_mak
-echo "NM=$nm" >> $config_host_mak
-echo "PKG_CONFIG=$pkg_config_exe" >> $config_host_mak
-echo "WINDRES=$windres" >> $config_host_mak
-echo "CFLAGS_NOPIE=$CFLAGS_NOPIE" >> $config_host_mak
-echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak
-echo "QEMU_CXXFLAGS=$QEMU_CXXFLAGS" >> $config_host_mak
-echo "GLIB_CFLAGS=$glib_cflags" >> $config_host_mak
-echo "GLIB_LIBS=$glib_libs" >> $config_host_mak
-if test "$glib_has_gslice" = "yes" ; then
-    echo "HAVE_GLIB_WITH_SLICE_ALLOCATOR=y" >> $config_host_mak
-fi
-echo "QEMU_LDFLAGS=$QEMU_LDFLAGS" >> $config_host_mak
-echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak
 echo "EXESUF=$EXESUF" >> $config_host_mak
-echo "HOST_DSOSUF=$HOST_DSOSUF" >> $config_host_mak
-echo "LIBS_QGA=$libs_qga" >> $config_host_mak
-echo "TASN1_LIBS=$tasn1_libs" >> $config_host_mak
-echo "TASN1_CFLAGS=$tasn1_cflags" >> $config_host_mak
-if test "$gcov" = "yes" ; then
-  echo "CONFIG_GCOV=y" >> $config_host_mak
-fi
 
-if test "$fuzzing" != "no"; then
-    echo "CONFIG_FUZZ=y" >> $config_host_mak
-fi
-echo "FUZZ_EXE_LDFLAGS=$FUZZ_EXE_LDFLAGS" >> $config_host_mak
-
-if test "$rng_none" = "yes"; then
-  echo "CONFIG_RNG_NONE=y" >> $config_host_mak
-fi
-
-# use included Linux headers
-if test "$linux" = "yes" ; then
-  mkdir -p linux-headers
-  case "$cpu" in
-  i386|x86_64|x32)
-    linux_arch=x86
-    ;;
-  ppc|ppc64|ppc64le)
-    linux_arch=powerpc
-    ;;
-  s390x)
-    linux_arch=s390
-    ;;
-  aarch64)
-    linux_arch=arm64
-    ;;
-  mips64)
-    linux_arch=mips
-    ;;
-  *)
-    # For most CPUs the kernel architecture name and QEMU CPU name match.
-    linux_arch="$cpu"
-    ;;
-  esac
-    # For non-KVM architectures we will not have asm headers
-    if [ -e "$source_path/linux-headers/asm-$linux_arch" ]; then
-      symlink "$source_path/linux-headers/asm-$linux_arch" linux-headers/asm
-    fi
+# use included Linux headers for KVM architectures
+if test "$targetos" = "linux" && test -n "$linux_arch"; then
+  symlink "$source_path/linux-headers/asm-$linux_arch" linux-headers/asm
 fi
 
 for target in $target_list; do
     target_dir="$target"
-    target_name=$(echo $target | cut -d '-' -f 1)
-    mkdir -p $target_dir
+    target_name=$(echo $target | cut -d '-' -f 1)$EXESUF
     case $target in
         *-user) symlink "../qemu-$target_name" "$target_dir/qemu-$target_name" ;;
         *) symlink "../qemu-system-$target_name" "$target_dir/qemu-system-$target_name" ;;
     esac
 done
 
-echo "CONFIG_QEMU_INTERP_PREFIX=$interp_prefix" | sed 's/%M/@0@/' >> $config_host_mak
 if test "$default_targets" = "yes"; then
   echo "CONFIG_DEFAULT_TARGETS=y" >> $config_host_mak
 fi
 
-if test "$numa" = "yes"; then
-  echo "CONFIG_NUMA=y" >> $config_host_mak
-  echo "NUMA_LIBS=$numa_libs" >> $config_host_mak
+# contrib/plugins configuration
+#echo "# Automatically generated by configure - do not modify" > contrib/plugins/$config_host_mak
+#echo "SRC_PATH=$source_path/contrib/plugins" >> contrib/plugins/$config_host_mak
+#echo "PKG_CONFIG=${pkg_config}" >> contrib/plugins/$config_host_mak
+#echo "CC=$cc $CPU_CFLAGS" >> contrib/plugins/$config_host_mak
+#echo "CFLAGS=${CFLAGS-$default_cflags} $EXTRA_CFLAGS" >> contrib/plugins/$config_host_mak
+if test "$targetos" = windows; then
+  echo "DLLTOOL=$dlltool" >> contrib/plugins/$config_host_mak
 fi
-
-if test "$ccache_cpp2" = "yes"; then
-  echo "export CCACHE_CPP2=y" >> $config_host_mak
+if test "$targetos" = darwin; then
+  echo "CONFIG_DARWIN=y" >> contrib/plugins/$config_host_mak
+fi
+if test "$targetos" = windows; then
+  echo "CONFIG_WIN32=y" >> contrib/plugins/$config_host_mak
 fi
 
-if test "$safe_stack" = "yes"; then
-  echo "CONFIG_SAFESTACK=y" >> $config_host_mak
+# tests/tcg configuration
+(config_host_mak=tests/tcg/config-host.mak
+mkdir -p tests/tcg
+echo "# Automatically generated by configure - do not modify" > $config_host_mak
+echo "SRC_PATH=$source_path" >> $config_host_mak
+if test "$plugins" = "yes" ; then
+    echo "CONFIG_PLUGIN=y" >> tests/tcg/$config_host_mak
 fi
 
-# If we're using a separate build tree, set it up now.
-# DIRS are directories which we simply mkdir in the build tree;
-# LINKS are things to symlink back into the source tree
-# (these can be both files and directories).
-# Caution: do not add files or directories here using wildcards. This
-# will result in problems later if a new file matching the wildcard is
-# added to the source tree -- nothing will cause configure to be rerun
-# so the build tree will be missing the link back to the new file, and
-# tests might fail. Prefer to keep the relevant files in their own
-# directory and symlink the directory instead.
-# UNLINK is used to remove symlinks from older development versions
-# that might get into the way when doing "git update" without doing
-# a "make distclean" in between.
-DIRS="tests tests/tcg tests/tcg/lm32 tests/qapi-schema tests/qtest/libqos"
-DIRS="$DIRS tests/qtest tests/qemu-iotests tests/vm tests/fp tests/qgraph"
-DIRS="$DIRS docs docs/interop fsdev scsi"
-DIRS="$DIRS pc-bios/optionrom pc-bios/s390-ccw"
-DIRS="$DIRS roms/seabios"
-DIRS="$DIRS contrib/plugins/"
-LINKS="Makefile"
-LINKS="$LINKS tests/tcg/lm32/Makefile"
-LINKS="$LINKS tests/tcg/Makefile.target"
-LINKS="$LINKS pc-bios/optionrom/Makefile"
-LINKS="$LINKS pc-bios/s390-ccw/Makefile"
-LINKS="$LINKS roms/seabios/Makefile"
-LINKS="$LINKS pc-bios/qemu-icon.bmp"
-LINKS="$LINKS .gdbinit scripts" # scripts needed by relative path in .gdbinit
-LINKS="$LINKS tests/acceptance tests/data"
-LINKS="$LINKS tests/qemu-iotests/check"
-LINKS="$LINKS python"
-LINKS="$LINKS contrib/plugins/Makefile "
-UNLINK="pc-bios/keymaps"
-for bios_file in \
-    $source_path/pc-bios/*.bin \
-    $source_path/pc-bios/*.elf \
-    $source_path/pc-bios/*.lid \
-    $source_path/pc-bios/*.rom \
-    $source_path/pc-bios/*.dtb \
-    $source_path/pc-bios/*.img \
-    $source_path/pc-bios/openbios-* \
-    $source_path/pc-bios/u-boot.* \
-    $source_path/pc-bios/edk2-*.fd.bz2 \
-    $source_path/pc-bios/palcode-*
-do
-    LINKS="$LINKS pc-bios/$(basename $bios_file)"
-done
-mkdir -p $DIRS
-for f in $LINKS ; do
-    if [ -e "$source_path/$f" ]; then
-        symlink "$source_path/$f" "$f"
-    fi
-done
-for f in $UNLINK ; do
-    if [ -L "$f" ]; then
-        rm -f "$f"
-    fi
-done
+tcg_tests_targets=
+for target in $target_list; do
+  arch=${target%%-*}
 
-(for i in $cross_cc_vars; do
-  export $i
-done
-export target_list source_path use_containers
-#$source_path/tests/tcg/configure.sh)
-)
+  case $target in
+    xtensa*-linux-user)
+      # the toolchain is not complete with headers, only build system tests
+      continue
+      ;;
+    *-softmmu)
+      test -f "$source_path/tests/tcg/$arch/Makefile.softmmu-target" || continue
+      qemu="qemu-system-$arch"
+      ;;
+    *-linux-user|*-bsd-user)
+      qemu="qemu-$arch"
+      ;;
+  esac
 
-# temporary config to build submodules
-for rom in seabios; do
-    config_mak=roms/$rom/config.mak
-    echo "# Automatically generated by configure - do not modify" > $config_mak
-    echo "SRC_PATH=$source_path/roms/$rom" >> $config_mak
-    echo "AS=$as" >> $config_mak
-    echo "CCAS=$ccas" >> $config_mak
-    echo "CC=$cc" >> $config_mak
-    echo "BCC=bcc" >> $config_mak
-    echo "CPP=$cpp" >> $config_mak
-    echo "OBJCOPY=objcopy" >> $config_mak
-    echo "IASL=$iasl" >> $config_mak
-    echo "LD=$ld" >> $config_mak
-    echo "RANLIB=$ranlib" >> $config_mak
-done
+  if probe_target_compiler $target || test -n "$container_image"; then
+      test -n "$container_image" && build_static=y
+      mkdir -p "tests/tcg/$target"
+      config_target_mak=tests/tcg/$target/config-target.mak
+      ln -sf "$source_path/tests/tcg/Makefile.target" "tests/tcg/$target/Makefile"
+      echo "# Automatically generated by configure - do not modify" > "$config_target_mak"
+      echo "TARGET_NAME=$arch" >> "$config_target_mak"
+      echo "TARGET=$target" >> "$config_target_mak"
+      write_target_makefile "build-tcg-tests-$target" >> "$config_target_mak"
+      echo "BUILD_STATIC=$build_static" >> "$config_target_mak"
+      echo "QEMU=$PWD/$qemu" >> "$config_target_mak"
+
+      # will GDB work with these binaries?
+      if test "${gdb_arches#*$arch}" != "$gdb_arches"; then
+          echo "GDB=$gdb_bin" >> $config_target_mak
+      fi
 
-# set up qemu-iotests in this build directory
-iotests_common_env="tests/qemu-iotests/common.env"
+      echo "run-tcg-tests-$target: $qemu\$(EXESUF)" >> Makefile.prereqs
+      tcg_tests_targets="$tcg_tests_targets $target"
+  fi
+done
 
-echo "# Automatically generated by configure - do not modify" > "$iotests_common_env"
-echo >> "$iotests_common_env"
-echo "export PYTHON='$python'" >> "$iotests_common_env"
+if test "$tcg" = "enabled"; then
+    echo "TCG_TESTS_TARGETS=$tcg_tests_targets" >> config-host.mak
+fi
+)
 
 if test "$skip_meson" = no; then
-cross="config-meson.cross.new"
-meson_quote() {
+  cross="config-meson.cross.new"
+  meson_quote() {
+    test $# = 0 && return
     echo "'$(echo $* | sed "s/ /','/g")'"
-}
+  }
 
-echo "# Automatically generated by configure - do not modify" > $cross
-echo "[properties]" >> $cross
-test -z "$cxx" && echo "link_language = 'c'" >> $cross
-echo "[built-in options]" >> $cross
-echo "c_args = [${CFLAGS:+$(meson_quote $CFLAGS)}]" >> $cross
-echo "cpp_args = [${CXXFLAGS:+$(meson_quote $CXXFLAGS)}]" >> $cross
-echo "c_link_args = [${LDFLAGS:+$(meson_quote $LDFLAGS)}]" >> $cross
-echo "cpp_link_args = [${LDFLAGS:+$(meson_quote $LDFLAGS)}]" >> $cross
-echo "[binaries]" >> $cross
-echo "c = [$(meson_quote $cc)]" >> $cross
-test -n "$cxx" && echo "cpp = [$(meson_quote $cxx)]" >> $cross
-echo "ar = [$(meson_quote $ar)]" >> $cross
-echo "nm = [$(meson_quote $nm)]" >> $cross
-echo "pkgconfig = [$(meson_quote $pkg_config_exe)]" >> $cross
-echo "ranlib = [$(meson_quote $ranlib)]" >> $cross
-if has $sdl2_config; then
-  echo "sdl2-config = [$(meson_quote $sdl2_config)]" >> $cross
-fi
-echo "strip = [$(meson_quote $strip)]" >> $cross
-echo "windres = [$(meson_quote $windres)]" >> $cross
-if test -n "$cross_prefix"; then
-    cross_arg="--cross-file config-meson.cross"
+  echo "# Automatically generated by configure - do not modify" > $cross
+  echo "[properties]" >> $cross
+
+  # unroll any custom device configs
+  for a in $device_archs; do
+      eval "c=\$devices_${a}"
+      echo "${a}-softmmu = '$c'" >> $cross
+  done
+
+  echo "[built-in options]" >> $cross
+  echo "c_args = [$(meson_quote $CFLAGS $EXTRA_CFLAGS)]" >> $cross
+  echo "cpp_args = [$(meson_quote $CXXFLAGS $EXTRA_CXXFLAGS)]" >> $cross
+  test -n "$objcc" && echo "objc_args = [$(meson_quote $OBJCFLAGS $EXTRA_OBJCFLAGS)]" >> $cross
+  echo "c_link_args = [$(meson_quote $CFLAGS $LDFLAGS $EXTRA_CFLAGS $EXTRA_LDFLAGS)]" >> $cross
+  echo "cpp_link_args = [$(meson_quote $CXXFLAGS $LDFLAGS $EXTRA_CXXFLAGS $EXTRA_LDFLAGS)]" >> $cross
+
+  # Only enable by default for git builds and on select OSes
+  echo "# environment defaults, can still be overridden on " >> $cross
+  echo "# the command line" >> $cross
+  if test -e "$source_path/.git" && \
+      { test "$targetos" = linux || test "$targetos" = "windows"; }; then
+      echo 'werror = true' >> $cross
+  fi
+  echo "[project options]" >> $cross
+  if test "$SMBD" != ''; then
+    echo "smbd = $(meson_quote "$SMBD")" >> $cross
+  fi
+  if test "${QEMU_GA_MANUFACTURER}" != ''; then
+    echo "qemu_ga_manufacturer = $(meson_quote "${QEMU_GA_MANUFACTURER}")" >> $cross
+  fi
+  if test "${QEMU_GA_DISTRO}" != ''; then
+    echo "qemu_ga_distro = $(meson_quote "${QEMU_GA_DISTRO}")" >> $cross
+  fi
+  if test "${QEMU_GA_VERSION}" != ''; then
+    echo "qemu_ga_version = $(meson_quote "${QEMU_GA_VERSION}")" >> $cross
+  fi
+
+  echo >> $cross
+  echo "[binaries]" >> $cross
+  echo "c = [$(meson_quote $cc $CPU_CFLAGS)]" >> $cross
+  test -n "$cxx" && echo "cpp = [$(meson_quote $cxx $CPU_CFLAGS)]" >> $cross
+  test -n "$objcc" && echo "objc = [$(meson_quote $objcc $CPU_CFLAGS)]" >> $cross
+  echo "ar = [$(meson_quote $ar)]" >> $cross
+  echo "dlltool = [$(meson_quote $dlltool)]" >> $cross
+  echo "nm = [$(meson_quote $nm)]" >> $cross
+  echo "pkgconfig = [$(meson_quote $pkg_config)]" >> $cross
+  echo "pkg-config = [$(meson_quote $pkg_config)]" >> $cross
+  echo "ranlib = [$(meson_quote $ranlib)]" >> $cross
+  if has $sdl2_config; then
+    echo "sdl2-config = [$(meson_quote $sdl2_config)]" >> $cross
+  fi
+  echo "strip = [$(meson_quote $strip)]" >> $cross
+  echo "widl = [$(meson_quote $widl)]" >> $cross
+  echo "windres = [$(meson_quote $windres)]" >> $cross
+  echo "windmc = [$(meson_quote $windmc)]" >> $cross
+  if test "$cross_compile" = "yes"; then
     echo "[host_machine]" >> $cross
-    if test "$mingw32" = "yes" ; then
-        echo "system = 'windows'" >> $cross
-    fi
-    if test "$linux" = "yes" ; then
-        echo "system = 'linux'" >> $cross
-    fi
-    case "$ARCH" in
-        i386|x86_64)
+    echo "system = '$targetos'" >> $cross
+    case "$cpu" in
+        i386)
             echo "cpu_family = 'x86'" >> $cross
             ;;
-        ppc64le)
-            echo "cpu_family = 'ppc64'" >> $cross
-            ;;
         *)
-            echo "cpu_family = '$ARCH'" >> $cross
+            echo "cpu_family = '$cpu'" >> $cross
             ;;
     esac
     echo "cpu = '$cpu'" >> $cross
@@ -6986,59 +1803,50 @@ if test -n "$cross_prefix"; then
     else
         echo "endian = 'little'" >> $cross
     fi
-else
-    cross_arg="--native-file config-meson.cross"
-fi
-mv $cross config-meson.cross
 
-rm -rf meson-private meson-info meson-logs
-unset staticpic
-if ! version_ge "$($meson --version)" 0.56.0; then
-  staticpic=$(if test "$pie" = yes; then echo true; else echo false; fi)
-fi
-NINJA=$ninja $meson setup \
-        --prefix "$prefix" \
-        --libdir "$libdir" \
-        --libexecdir "$libexecdir" \
-        --bindir "$bindir" \
-        --includedir "$includedir" \
-        --datadir "$datadir" \
-        --mandir "$mandir" \
-        --sysconfdir "$sysconfdir" \
-        --localedir "$localedir" \
-        --localstatedir "$local_statedir" \
-        -Ddocdir="$docdir" \
-        -Dqemu_firmwarepath="$firmwarepath" \
-        -Dqemu_suffix="$qemu_suffix" \
-        -Doptimization=$(if test "$debug" = yes; then echo 0; else echo 2; fi) \
-        -Ddebug=$(if test "$debug_info" = yes; then echo true; else echo false; fi) \
-        -Dwerror=$(if test "$werror" = yes; then echo true; else echo false; fi) \
-        -Dstrip=$(if test "$strip_opt" = yes; then echo true; else echo false; fi) \
-        -Db_pie=$(if test "$pie" = yes; then echo true; else echo false; fi) \
-        ${staticpic:+-Db_staticpic=$staticpic} \
-        -Db_coverage=$(if test "$gcov" = yes; then echo true; else echo false; fi) \
-        -Dmalloc=$malloc -Dmalloc_trim=$malloc_trim -Dsparse=$sparse \
-        -Dkvm=$kvm -Dhax=$hax -Dwhpx=$whpx -Dhvf=$hvf \
-        -Dxen=$xen -Dxen_pci_passthrough=$xen_pci_passthrough -Dtcg=$tcg \
-        -Dcocoa=$cocoa -Dmpath=$mpath -Dsdl=$sdl -Dsdl_image=$sdl_image \
-        -Dvnc=$vnc -Dvnc_sasl=$vnc_sasl -Dvnc_jpeg=$vnc_jpeg -Dvnc_png=$vnc_png \
-        -Dgettext=$gettext -Dxkbcommon=$xkbcommon -Du2f=$u2f -Dvirtiofsd=$virtiofsd \
-        -Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt \
-        -Diconv=$iconv -Dcurses=$curses -Dlibudev=$libudev\
-        -Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \
-        -Dvhost_user_blk_server=$vhost_user_blk_server \
-        $cross_arg \
-        "$PWD" "$source_path"
-
-if test "$?" -ne 0 ; then
-    error_exit "meson setup failed"
-fi
-fi
-
-if test -n "${deprecated_features}"; then
-    echo "Warning, deprecated features enabled."
-    echo "Please see docs/system/deprecated.rst"
-    echo "  features: ${deprecated_features}"
+    native="config-meson.native.new"
+    echo "# Automatically generated by configure - do not modify" > $native
+    echo "[binaries]" >> $native
+    echo "c = [$(meson_quote $host_cc)]" >> $native
+    mv $native config-meson.native
+    meson_option_add --native-file
+    meson_option_add config-meson.native
+  fi
+  mv $cross config-meson.cross
+  meson_add_machine_file config-meson.cross
+  if test -f "$source_path/configs/meson/$targetos.txt"; then
+    meson_add_machine_file $source_path/configs/meson/$targetos.txt
+  fi
+
+  rm -rf meson-private meson-info meson-logs
+
+  test "$download" = "disabled" && meson_option_add "--wrap-mode=nodownload"
+  test "$default_feature" = no && meson_option_add -Dauto_features=disabled
+  test "$static" = yes && meson_option_add -Dprefer_static=true
+  test "$pie" = no && meson_option_add -Db_pie=false
+
+  # QEMU options
+  test "$cfi" != false && meson_option_add "-Dcfi=$cfi" "-Db_lto=$cfi"
+  test "$docs" != auto && meson_option_add "-Ddocs=$docs"
+  test -n "${LIB_FUZZING_ENGINE+xxx}" && meson_option_add "-Dfuzzing_engine=$LIB_FUZZING_ENGINE"
+  test "$plugins" = yes && meson_option_add "-Dplugins=true"
+  test "$tcg" != enabled && meson_option_add "-Dtcg=$tcg"
+  run_meson() {
+    NINJA=$ninja $meson setup "$@" "$PWD" "$source_path"
+  }
+  eval run_meson $meson_options
+  if test "$?" -ne 0 ; then
+      error_exit "meson setup failed"
+  fi
+  echo "$meson" > build.ninja.stamp
+else
+  if test -f meson-private/cmd_line.txt; then
+    # Adjust old command line options that were removed
+    # sed -i is not portable
+    perl -i -ne '
+      /^sphinx_build/ && next;
+      print;' meson-private/cmd_line.txt
+  fi
 fi
 
 # Save the configure command line for later reuse.
@@ -7069,28 +1877,34 @@ preserve_env() {
 preserve_env AR
 preserve_env AS
 preserve_env CC
-preserve_env CPP
+preserve_env CFLAGS
 preserve_env CXX
-preserve_env INSTALL
+preserve_env CXXFLAGS
+preserve_env DLLTOOL
 preserve_env LD
+preserve_env LDFLAGS
 preserve_env LD_LIBRARY_PATH
-preserve_env LIBTOOL
-preserve_env MAKE
 preserve_env NM
+preserve_env OBJCFLAGS
 preserve_env OBJCOPY
 preserve_env PATH
 preserve_env PKG_CONFIG
 preserve_env PKG_CONFIG_LIBDIR
 preserve_env PKG_CONFIG_PATH
 preserve_env PYTHON
+preserve_env QEMU_GA_MANUFACTURER
+preserve_env QEMU_GA_DISTRO
+preserve_env QEMU_GA_VERSION
 preserve_env SDL2_CONFIG
 preserve_env SMBD
 preserve_env STRIP
+preserve_env WIDL
 preserve_env WINDRES
+preserve_env WINDMC
 
 printf "exec" >>config.status
 for i in "$0" "$@"; do
-  test "$i" = --skip-meson || printf " '%s'" "$i" >>config.status
+  test "$i" = --skip-meson || printf " %s" "$(quote_sh "$i")" >>config.status
 done
 echo ' "$@"' >>config.status
 chmod +x config.status
index 159800df652d14d30f8ccbb3f66ae524c6f623b7..df4362ac6022eac2d736940f9a42a62561410ec8 100644 (file)
  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include "qemu/osdep.h"
+#include "qemu/bswap.h"
+#include "qemu/bitops.h"
 #include "crypto/aes.h"
+#include "crypto/aes-round.h"
 
 typedef uint32_t u32;
 typedef uint8_t u8;
@@ -108,277 +111,151 @@ const uint8_t AES_isbox[256] = {
     0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D,
 };
 
-const uint8_t AES_shifts[16] = {
-    0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
-};
+/* AES ShiftRows, for complete unrolling. */
+#define AES_SH(X)   (((X) * 5) & 15)
 
-const uint8_t AES_ishifts[16] = {
-    0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
-};
+/* AES InvShiftRows, for complete unrolling. */
+#define AES_ISH(X)  (((X) * 13) & 15)
 
-/* AES_imc[x][0] = [x].[0e, 09, 0d, 0b]; */
-/* AES_imc[x][1] = [x].[0b, 0e, 09, 0d]; */
-/* AES_imc[x][2] = [x].[0d, 0b, 0e, 09]; */
-/* AES_imc[x][3] = [x].[09, 0d, 0b, 0e]; */
-const uint32_t AES_imc[256][4] = {
-    { 0x00000000, 0x00000000, 0x00000000, 0x00000000, }, /* x=00 */
-    { 0x0E090D0B, 0x0B0E090D, 0x0D0B0E09, 0x090D0B0E, }, /* x=01 */
-    { 0x1C121A16, 0x161C121A, 0x1A161C12, 0x121A161C, }, /* x=02 */
-    { 0x121B171D, 0x1D121B17, 0x171D121B, 0x1B171D12, }, /* x=03 */
-    { 0x3824342C, 0x2C382434, 0x342C3824, 0x24342C38, }, /* x=04 */
-    { 0x362D3927, 0x27362D39, 0x3927362D, 0x2D392736, }, /* x=05 */
-    { 0x24362E3A, 0x3A24362E, 0x2E3A2436, 0x362E3A24, }, /* x=06 */
-    { 0x2A3F2331, 0x312A3F23, 0x23312A3F, 0x3F23312A, }, /* x=07 */
-    { 0x70486858, 0x58704868, 0x68587048, 0x48685870, }, /* x=08 */
-    { 0x7E416553, 0x537E4165, 0x65537E41, 0x4165537E, }, /* x=09 */
-    { 0x6C5A724E, 0x4E6C5A72, 0x724E6C5A, 0x5A724E6C, }, /* x=0A */
-    { 0x62537F45, 0x4562537F, 0x7F456253, 0x537F4562, }, /* x=0B */
-    { 0x486C5C74, 0x74486C5C, 0x5C74486C, 0x6C5C7448, }, /* x=0C */
-    { 0x4665517F, 0x7F466551, 0x517F4665, 0x65517F46, }, /* x=0D */
-    { 0x547E4662, 0x62547E46, 0x4662547E, 0x7E466254, }, /* x=0E */
-    { 0x5A774B69, 0x695A774B, 0x4B695A77, 0x774B695A, }, /* x=0F */
-    { 0xE090D0B0, 0xB0E090D0, 0xD0B0E090, 0x90D0B0E0, }, /* x=10 */
-    { 0xEE99DDBB, 0xBBEE99DD, 0xDDBBEE99, 0x99DDBBEE, }, /* x=11 */
-    { 0xFC82CAA6, 0xA6FC82CA, 0xCAA6FC82, 0x82CAA6FC, }, /* x=12 */
-    { 0xF28BC7AD, 0xADF28BC7, 0xC7ADF28B, 0x8BC7ADF2, }, /* x=13 */
-    { 0xD8B4E49C, 0x9CD8B4E4, 0xE49CD8B4, 0xB4E49CD8, }, /* x=14 */
-    { 0xD6BDE997, 0x97D6BDE9, 0xE997D6BD, 0xBDE997D6, }, /* x=15 */
-    { 0xC4A6FE8A, 0x8AC4A6FE, 0xFE8AC4A6, 0xA6FE8AC4, }, /* x=16 */
-    { 0xCAAFF381, 0x81CAAFF3, 0xF381CAAF, 0xAFF381CA, }, /* x=17 */
-    { 0x90D8B8E8, 0xE890D8B8, 0xB8E890D8, 0xD8B8E890, }, /* x=18 */
-    { 0x9ED1B5E3, 0xE39ED1B5, 0xB5E39ED1, 0xD1B5E39E, }, /* x=19 */
-    { 0x8CCAA2FE, 0xFE8CCAA2, 0xA2FE8CCA, 0xCAA2FE8C, }, /* x=1A */
-    { 0x82C3AFF5, 0xF582C3AF, 0xAFF582C3, 0xC3AFF582, }, /* x=1B */
-    { 0xA8FC8CC4, 0xC4A8FC8C, 0x8CC4A8FC, 0xFC8CC4A8, }, /* x=1C */
-    { 0xA6F581CF, 0xCFA6F581, 0x81CFA6F5, 0xF581CFA6, }, /* x=1D */
-    { 0xB4EE96D2, 0xD2B4EE96, 0x96D2B4EE, 0xEE96D2B4, }, /* x=1E */
-    { 0xBAE79BD9, 0xD9BAE79B, 0x9BD9BAE7, 0xE79BD9BA, }, /* x=1F */
-    { 0xDB3BBB7B, 0x7BDB3BBB, 0xBB7BDB3B, 0x3BBB7BDB, }, /* x=20 */
-    { 0xD532B670, 0x70D532B6, 0xB670D532, 0x32B670D5, }, /* x=21 */
-    { 0xC729A16D, 0x6DC729A1, 0xA16DC729, 0x29A16DC7, }, /* x=22 */
-    { 0xC920AC66, 0x66C920AC, 0xAC66C920, 0x20AC66C9, }, /* x=23 */
-    { 0xE31F8F57, 0x57E31F8F, 0x8F57E31F, 0x1F8F57E3, }, /* x=24 */
-    { 0xED16825C, 0x5CED1682, 0x825CED16, 0x16825CED, }, /* x=25 */
-    { 0xFF0D9541, 0x41FF0D95, 0x9541FF0D, 0x0D9541FF, }, /* x=26 */
-    { 0xF104984A, 0x4AF10498, 0x984AF104, 0x04984AF1, }, /* x=27 */
-    { 0xAB73D323, 0x23AB73D3, 0xD323AB73, 0x73D323AB, }, /* x=28 */
-    { 0xA57ADE28, 0x28A57ADE, 0xDE28A57A, 0x7ADE28A5, }, /* x=29 */
-    { 0xB761C935, 0x35B761C9, 0xC935B761, 0x61C935B7, }, /* x=2A */
-    { 0xB968C43E, 0x3EB968C4, 0xC43EB968, 0x68C43EB9, }, /* x=2B */
-    { 0x9357E70F, 0x0F9357E7, 0xE70F9357, 0x57E70F93, }, /* x=2C */
-    { 0x9D5EEA04, 0x049D5EEA, 0xEA049D5E, 0x5EEA049D, }, /* x=2D */
-    { 0x8F45FD19, 0x198F45FD, 0xFD198F45, 0x45FD198F, }, /* x=2E */
-    { 0x814CF012, 0x12814CF0, 0xF012814C, 0x4CF01281, }, /* x=2F */
-    { 0x3BAB6BCB, 0xCB3BAB6B, 0x6BCB3BAB, 0xAB6BCB3B, }, /* x=30 */
-    { 0x35A266C0, 0xC035A266, 0x66C035A2, 0xA266C035, }, /* x=31 */
-    { 0x27B971DD, 0xDD27B971, 0x71DD27B9, 0xB971DD27, }, /* x=32 */
-    { 0x29B07CD6, 0xD629B07C, 0x7CD629B0, 0xB07CD629, }, /* x=33 */
-    { 0x038F5FE7, 0xE7038F5F, 0x5FE7038F, 0x8F5FE703, }, /* x=34 */
-    { 0x0D8652EC, 0xEC0D8652, 0x52EC0D86, 0x8652EC0D, }, /* x=35 */
-    { 0x1F9D45F1, 0xF11F9D45, 0x45F11F9D, 0x9D45F11F, }, /* x=36 */
-    { 0x119448FA, 0xFA119448, 0x48FA1194, 0x9448FA11, }, /* x=37 */
-    { 0x4BE30393, 0x934BE303, 0x03934BE3, 0xE303934B, }, /* x=38 */
-    { 0x45EA0E98, 0x9845EA0E, 0x0E9845EA, 0xEA0E9845, }, /* x=39 */
-    { 0x57F11985, 0x8557F119, 0x198557F1, 0xF1198557, }, /* x=3A */
-    { 0x59F8148E, 0x8E59F814, 0x148E59F8, 0xF8148E59, }, /* x=3B */
-    { 0x73C737BF, 0xBF73C737, 0x37BF73C7, 0xC737BF73, }, /* x=3C */
-    { 0x7DCE3AB4, 0xB47DCE3A, 0x3AB47DCE, 0xCE3AB47D, }, /* x=3D */
-    { 0x6FD52DA9, 0xA96FD52D, 0x2DA96FD5, 0xD52DA96F, }, /* x=3E */
-    { 0x61DC20A2, 0xA261DC20, 0x20A261DC, 0xDC20A261, }, /* x=3F */
-    { 0xAD766DF6, 0xF6AD766D, 0x6DF6AD76, 0x766DF6AD, }, /* x=40 */
-    { 0xA37F60FD, 0xFDA37F60, 0x60FDA37F, 0x7F60FDA3, }, /* x=41 */
-    { 0xB16477E0, 0xE0B16477, 0x77E0B164, 0x6477E0B1, }, /* x=42 */
-    { 0xBF6D7AEB, 0xEBBF6D7A, 0x7AEBBF6D, 0x6D7AEBBF, }, /* x=43 */
-    { 0x955259DA, 0xDA955259, 0x59DA9552, 0x5259DA95, }, /* x=44 */
-    { 0x9B5B54D1, 0xD19B5B54, 0x54D19B5B, 0x5B54D19B, }, /* x=45 */
-    { 0x894043CC, 0xCC894043, 0x43CC8940, 0x4043CC89, }, /* x=46 */
-    { 0x87494EC7, 0xC787494E, 0x4EC78749, 0x494EC787, }, /* x=47 */
-    { 0xDD3E05AE, 0xAEDD3E05, 0x05AEDD3E, 0x3E05AEDD, }, /* x=48 */
-    { 0xD33708A5, 0xA5D33708, 0x08A5D337, 0x3708A5D3, }, /* x=49 */
-    { 0xC12C1FB8, 0xB8C12C1F, 0x1FB8C12C, 0x2C1FB8C1, }, /* x=4A */
-    { 0xCF2512B3, 0xB3CF2512, 0x12B3CF25, 0x2512B3CF, }, /* x=4B */
-    { 0xE51A3182, 0x82E51A31, 0x3182E51A, 0x1A3182E5, }, /* x=4C */
-    { 0xEB133C89, 0x89EB133C, 0x3C89EB13, 0x133C89EB, }, /* x=4D */
-    { 0xF9082B94, 0x94F9082B, 0x2B94F908, 0x082B94F9, }, /* x=4E */
-    { 0xF701269F, 0x9FF70126, 0x269FF701, 0x01269FF7, }, /* x=4F */
-    { 0x4DE6BD46, 0x464DE6BD, 0xBD464DE6, 0xE6BD464D, }, /* x=50 */
-    { 0x43EFB04D, 0x4D43EFB0, 0xB04D43EF, 0xEFB04D43, }, /* x=51 */
-    { 0x51F4A750, 0x5051F4A7, 0xA75051F4, 0xF4A75051, }, /* x=52 */
-    { 0x5FFDAA5B, 0x5B5FFDAA, 0xAA5B5FFD, 0xFDAA5B5F, }, /* x=53 */
-    { 0x75C2896A, 0x6A75C289, 0x896A75C2, 0xC2896A75, }, /* x=54 */
-    { 0x7BCB8461, 0x617BCB84, 0x84617BCB, 0xCB84617B, }, /* x=55 */
-    { 0x69D0937C, 0x7C69D093, 0x937C69D0, 0xD0937C69, }, /* x=56 */
-    { 0x67D99E77, 0x7767D99E, 0x9E7767D9, 0xD99E7767, }, /* x=57 */
-    { 0x3DAED51E, 0x1E3DAED5, 0xD51E3DAE, 0xAED51E3D, }, /* x=58 */
-    { 0x33A7D815, 0x1533A7D8, 0xD81533A7, 0xA7D81533, }, /* x=59 */
-    { 0x21BCCF08, 0x0821BCCF, 0xCF0821BC, 0xBCCF0821, }, /* x=5A */
-    { 0x2FB5C203, 0x032FB5C2, 0xC2032FB5, 0xB5C2032F, }, /* x=5B */
-    { 0x058AE132, 0x32058AE1, 0xE132058A, 0x8AE13205, }, /* x=5C */
-    { 0x0B83EC39, 0x390B83EC, 0xEC390B83, 0x83EC390B, }, /* x=5D */
-    { 0x1998FB24, 0x241998FB, 0xFB241998, 0x98FB2419, }, /* x=5E */
-    { 0x1791F62F, 0x2F1791F6, 0xF62F1791, 0x91F62F17, }, /* x=5F */
-    { 0x764DD68D, 0x8D764DD6, 0xD68D764D, 0x4DD68D76, }, /* x=60 */
-    { 0x7844DB86, 0x867844DB, 0xDB867844, 0x44DB8678, }, /* x=61 */
-    { 0x6A5FCC9B, 0x9B6A5FCC, 0xCC9B6A5F, 0x5FCC9B6A, }, /* x=62 */
-    { 0x6456C190, 0x906456C1, 0xC1906456, 0x56C19064, }, /* x=63 */
-    { 0x4E69E2A1, 0xA14E69E2, 0xE2A14E69, 0x69E2A14E, }, /* x=64 */
-    { 0x4060EFAA, 0xAA4060EF, 0xEFAA4060, 0x60EFAA40, }, /* x=65 */
-    { 0x527BF8B7, 0xB7527BF8, 0xF8B7527B, 0x7BF8B752, }, /* x=66 */
-    { 0x5C72F5BC, 0xBC5C72F5, 0xF5BC5C72, 0x72F5BC5C, }, /* x=67 */
-    { 0x0605BED5, 0xD50605BE, 0xBED50605, 0x05BED506, }, /* x=68 */
-    { 0x080CB3DE, 0xDE080CB3, 0xB3DE080C, 0x0CB3DE08, }, /* x=69 */
-    { 0x1A17A4C3, 0xC31A17A4, 0xA4C31A17, 0x17A4C31A, }, /* x=6A */
-    { 0x141EA9C8, 0xC8141EA9, 0xA9C8141E, 0x1EA9C814, }, /* x=6B */
-    { 0x3E218AF9, 0xF93E218A, 0x8AF93E21, 0x218AF93E, }, /* x=6C */
-    { 0x302887F2, 0xF2302887, 0x87F23028, 0x2887F230, }, /* x=6D */
-    { 0x223390EF, 0xEF223390, 0x90EF2233, 0x3390EF22, }, /* x=6E */
-    { 0x2C3A9DE4, 0xE42C3A9D, 0x9DE42C3A, 0x3A9DE42C, }, /* x=6F */
-    { 0x96DD063D, 0x3D96DD06, 0x063D96DD, 0xDD063D96, }, /* x=70 */
-    { 0x98D40B36, 0x3698D40B, 0x0B3698D4, 0xD40B3698, }, /* x=71 */
-    { 0x8ACF1C2B, 0x2B8ACF1C, 0x1C2B8ACF, 0xCF1C2B8A, }, /* x=72 */
-    { 0x84C61120, 0x2084C611, 0x112084C6, 0xC6112084, }, /* x=73 */
-    { 0xAEF93211, 0x11AEF932, 0x3211AEF9, 0xF93211AE, }, /* x=74 */
-    { 0xA0F03F1A, 0x1AA0F03F, 0x3F1AA0F0, 0xF03F1AA0, }, /* x=75 */
-    { 0xB2EB2807, 0x07B2EB28, 0x2807B2EB, 0xEB2807B2, }, /* x=76 */
-    { 0xBCE2250C, 0x0CBCE225, 0x250CBCE2, 0xE2250CBC, }, /* x=77 */
-    { 0xE6956E65, 0x65E6956E, 0x6E65E695, 0x956E65E6, }, /* x=78 */
-    { 0xE89C636E, 0x6EE89C63, 0x636EE89C, 0x9C636EE8, }, /* x=79 */
-    { 0xFA877473, 0x73FA8774, 0x7473FA87, 0x877473FA, }, /* x=7A */
-    { 0xF48E7978, 0x78F48E79, 0x7978F48E, 0x8E7978F4, }, /* x=7B */
-    { 0xDEB15A49, 0x49DEB15A, 0x5A49DEB1, 0xB15A49DE, }, /* x=7C */
-    { 0xD0B85742, 0x42D0B857, 0x5742D0B8, 0xB85742D0, }, /* x=7D */
-    { 0xC2A3405F, 0x5FC2A340, 0x405FC2A3, 0xA3405FC2, }, /* x=7E */
-    { 0xCCAA4D54, 0x54CCAA4D, 0x4D54CCAA, 0xAA4D54CC, }, /* x=7F */
-    { 0x41ECDAF7, 0xF741ECDA, 0xDAF741EC, 0xECDAF741, }, /* x=80 */
-    { 0x4FE5D7FC, 0xFC4FE5D7, 0xD7FC4FE5, 0xE5D7FC4F, }, /* x=81 */
-    { 0x5DFEC0E1, 0xE15DFEC0, 0xC0E15DFE, 0xFEC0E15D, }, /* x=82 */
-    { 0x53F7CDEA, 0xEA53F7CD, 0xCDEA53F7, 0xF7CDEA53, }, /* x=83 */
-    { 0x79C8EEDB, 0xDB79C8EE, 0xEEDB79C8, 0xC8EEDB79, }, /* x=84 */
-    { 0x77C1E3D0, 0xD077C1E3, 0xE3D077C1, 0xC1E3D077, }, /* x=85 */
-    { 0x65DAF4CD, 0xCD65DAF4, 0xF4CD65DA, 0xDAF4CD65, }, /* x=86 */
-    { 0x6BD3F9C6, 0xC66BD3F9, 0xF9C66BD3, 0xD3F9C66B, }, /* x=87 */
-    { 0x31A4B2AF, 0xAF31A4B2, 0xB2AF31A4, 0xA4B2AF31, }, /* x=88 */
-    { 0x3FADBFA4, 0xA43FADBF, 0xBFA43FAD, 0xADBFA43F, }, /* x=89 */
-    { 0x2DB6A8B9, 0xB92DB6A8, 0xA8B92DB6, 0xB6A8B92D, }, /* x=8A */
-    { 0x23BFA5B2, 0xB223BFA5, 0xA5B223BF, 0xBFA5B223, }, /* x=8B */
-    { 0x09808683, 0x83098086, 0x86830980, 0x80868309, }, /* x=8C */
-    { 0x07898B88, 0x8807898B, 0x8B880789, 0x898B8807, }, /* x=8D */
-    { 0x15929C95, 0x9515929C, 0x9C951592, 0x929C9515, }, /* x=8E */
-    { 0x1B9B919E, 0x9E1B9B91, 0x919E1B9B, 0x9B919E1B, }, /* x=8F */
-    { 0xA17C0A47, 0x47A17C0A, 0x0A47A17C, 0x7C0A47A1, }, /* x=90 */
-    { 0xAF75074C, 0x4CAF7507, 0x074CAF75, 0x75074CAF, }, /* x=91 */
-    { 0xBD6E1051, 0x51BD6E10, 0x1051BD6E, 0x6E1051BD, }, /* x=92 */
-    { 0xB3671D5A, 0x5AB3671D, 0x1D5AB367, 0x671D5AB3, }, /* x=93 */
-    { 0x99583E6B, 0x6B99583E, 0x3E6B9958, 0x583E6B99, }, /* x=94 */
-    { 0x97513360, 0x60975133, 0x33609751, 0x51336097, }, /* x=95 */
-    { 0x854A247D, 0x7D854A24, 0x247D854A, 0x4A247D85, }, /* x=96 */
-    { 0x8B432976, 0x768B4329, 0x29768B43, 0x4329768B, }, /* x=97 */
-    { 0xD134621F, 0x1FD13462, 0x621FD134, 0x34621FD1, }, /* x=98 */
-    { 0xDF3D6F14, 0x14DF3D6F, 0x6F14DF3D, 0x3D6F14DF, }, /* x=99 */
-    { 0xCD267809, 0x09CD2678, 0x7809CD26, 0x267809CD, }, /* x=9A */
-    { 0xC32F7502, 0x02C32F75, 0x7502C32F, 0x2F7502C3, }, /* x=9B */
-    { 0xE9105633, 0x33E91056, 0x5633E910, 0x105633E9, }, /* x=9C */
-    { 0xE7195B38, 0x38E7195B, 0x5B38E719, 0x195B38E7, }, /* x=9D */
-    { 0xF5024C25, 0x25F5024C, 0x4C25F502, 0x024C25F5, }, /* x=9E */
-    { 0xFB0B412E, 0x2EFB0B41, 0x412EFB0B, 0x0B412EFB, }, /* x=9F */
-    { 0x9AD7618C, 0x8C9AD761, 0x618C9AD7, 0xD7618C9A, }, /* x=A0 */
-    { 0x94DE6C87, 0x8794DE6C, 0x6C8794DE, 0xDE6C8794, }, /* x=A1 */
-    { 0x86C57B9A, 0x9A86C57B, 0x7B9A86C5, 0xC57B9A86, }, /* x=A2 */
-    { 0x88CC7691, 0x9188CC76, 0x769188CC, 0xCC769188, }, /* x=A3 */
-    { 0xA2F355A0, 0xA0A2F355, 0x55A0A2F3, 0xF355A0A2, }, /* x=A4 */
-    { 0xACFA58AB, 0xABACFA58, 0x58ABACFA, 0xFA58ABAC, }, /* x=A5 */
-    { 0xBEE14FB6, 0xB6BEE14F, 0x4FB6BEE1, 0xE14FB6BE, }, /* x=A6 */
-    { 0xB0E842BD, 0xBDB0E842, 0x42BDB0E8, 0xE842BDB0, }, /* x=A7 */
-    { 0xEA9F09D4, 0xD4EA9F09, 0x09D4EA9F, 0x9F09D4EA, }, /* x=A8 */
-    { 0xE49604DF, 0xDFE49604, 0x04DFE496, 0x9604DFE4, }, /* x=A9 */
-    { 0xF68D13C2, 0xC2F68D13, 0x13C2F68D, 0x8D13C2F6, }, /* x=AA */
-    { 0xF8841EC9, 0xC9F8841E, 0x1EC9F884, 0x841EC9F8, }, /* x=AB */
-    { 0xD2BB3DF8, 0xF8D2BB3D, 0x3DF8D2BB, 0xBB3DF8D2, }, /* x=AC */
-    { 0xDCB230F3, 0xF3DCB230, 0x30F3DCB2, 0xB230F3DC, }, /* x=AD */
-    { 0xCEA927EE, 0xEECEA927, 0x27EECEA9, 0xA927EECE, }, /* x=AE */
-    { 0xC0A02AE5, 0xE5C0A02A, 0x2AE5C0A0, 0xA02AE5C0, }, /* x=AF */
-    { 0x7A47B13C, 0x3C7A47B1, 0xB13C7A47, 0x47B13C7A, }, /* x=B0 */
-    { 0x744EBC37, 0x37744EBC, 0xBC37744E, 0x4EBC3774, }, /* x=B1 */
-    { 0x6655AB2A, 0x2A6655AB, 0xAB2A6655, 0x55AB2A66, }, /* x=B2 */
-    { 0x685CA621, 0x21685CA6, 0xA621685C, 0x5CA62168, }, /* x=B3 */
-    { 0x42638510, 0x10426385, 0x85104263, 0x63851042, }, /* x=B4 */
-    { 0x4C6A881B, 0x1B4C6A88, 0x881B4C6A, 0x6A881B4C, }, /* x=B5 */
-    { 0x5E719F06, 0x065E719F, 0x9F065E71, 0x719F065E, }, /* x=B6 */
-    { 0x5078920D, 0x0D507892, 0x920D5078, 0x78920D50, }, /* x=B7 */
-    { 0x0A0FD964, 0x640A0FD9, 0xD9640A0F, 0x0FD9640A, }, /* x=B8 */
-    { 0x0406D46F, 0x6F0406D4, 0xD46F0406, 0x06D46F04, }, /* x=B9 */
-    { 0x161DC372, 0x72161DC3, 0xC372161D, 0x1DC37216, }, /* x=BA */
-    { 0x1814CE79, 0x791814CE, 0xCE791814, 0x14CE7918, }, /* x=BB */
-    { 0x322BED48, 0x48322BED, 0xED48322B, 0x2BED4832, }, /* x=BC */
-    { 0x3C22E043, 0x433C22E0, 0xE0433C22, 0x22E0433C, }, /* x=BD */
-    { 0x2E39F75E, 0x5E2E39F7, 0xF75E2E39, 0x39F75E2E, }, /* x=BE */
-    { 0x2030FA55, 0x552030FA, 0xFA552030, 0x30FA5520, }, /* x=BF */
-    { 0xEC9AB701, 0x01EC9AB7, 0xB701EC9A, 0x9AB701EC, }, /* x=C0 */
-    { 0xE293BA0A, 0x0AE293BA, 0xBA0AE293, 0x93BA0AE2, }, /* x=C1 */
-    { 0xF088AD17, 0x17F088AD, 0xAD17F088, 0x88AD17F0, }, /* x=C2 */
-    { 0xFE81A01C, 0x1CFE81A0, 0xA01CFE81, 0x81A01CFE, }, /* x=C3 */
-    { 0xD4BE832D, 0x2DD4BE83, 0x832DD4BE, 0xBE832DD4, }, /* x=C4 */
-    { 0xDAB78E26, 0x26DAB78E, 0x8E26DAB7, 0xB78E26DA, }, /* x=C5 */
-    { 0xC8AC993B, 0x3BC8AC99, 0x993BC8AC, 0xAC993BC8, }, /* x=C6 */
-    { 0xC6A59430, 0x30C6A594, 0x9430C6A5, 0xA59430C6, }, /* x=C7 */
-    { 0x9CD2DF59, 0x599CD2DF, 0xDF599CD2, 0xD2DF599C, }, /* x=C8 */
-    { 0x92DBD252, 0x5292DBD2, 0xD25292DB, 0xDBD25292, }, /* x=C9 */
-    { 0x80C0C54F, 0x4F80C0C5, 0xC54F80C0, 0xC0C54F80, }, /* x=CA */
-    { 0x8EC9C844, 0x448EC9C8, 0xC8448EC9, 0xC9C8448E, }, /* x=CB */
-    { 0xA4F6EB75, 0x75A4F6EB, 0xEB75A4F6, 0xF6EB75A4, }, /* x=CC */
-    { 0xAAFFE67E, 0x7EAAFFE6, 0xE67EAAFF, 0xFFE67EAA, }, /* x=CD */
-    { 0xB8E4F163, 0x63B8E4F1, 0xF163B8E4, 0xE4F163B8, }, /* x=CE */
-    { 0xB6EDFC68, 0x68B6EDFC, 0xFC68B6ED, 0xEDFC68B6, }, /* x=CF */
-    { 0x0C0A67B1, 0xB10C0A67, 0x67B10C0A, 0x0A67B10C, }, /* x=D0 */
-    { 0x02036ABA, 0xBA02036A, 0x6ABA0203, 0x036ABA02, }, /* x=D1 */
-    { 0x10187DA7, 0xA710187D, 0x7DA71018, 0x187DA710, }, /* x=D2 */
-    { 0x1E1170AC, 0xAC1E1170, 0x70AC1E11, 0x1170AC1E, }, /* x=D3 */
-    { 0x342E539D, 0x9D342E53, 0x539D342E, 0x2E539D34, }, /* x=D4 */
-    { 0x3A275E96, 0x963A275E, 0x5E963A27, 0x275E963A, }, /* x=D5 */
-    { 0x283C498B, 0x8B283C49, 0x498B283C, 0x3C498B28, }, /* x=D6 */
-    { 0x26354480, 0x80263544, 0x44802635, 0x35448026, }, /* x=D7 */
-    { 0x7C420FE9, 0xE97C420F, 0x0FE97C42, 0x420FE97C, }, /* x=D8 */
-    { 0x724B02E2, 0xE2724B02, 0x02E2724B, 0x4B02E272, }, /* x=D9 */
-    { 0x605015FF, 0xFF605015, 0x15FF6050, 0x5015FF60, }, /* x=DA */
-    { 0x6E5918F4, 0xF46E5918, 0x18F46E59, 0x5918F46E, }, /* x=DB */
-    { 0x44663BC5, 0xC544663B, 0x3BC54466, 0x663BC544, }, /* x=DC */
-    { 0x4A6F36CE, 0xCE4A6F36, 0x36CE4A6F, 0x6F36CE4A, }, /* x=DD */
-    { 0x587421D3, 0xD3587421, 0x21D35874, 0x7421D358, }, /* x=DE */
-    { 0x567D2CD8, 0xD8567D2C, 0x2CD8567D, 0x7D2CD856, }, /* x=DF */
-    { 0x37A10C7A, 0x7A37A10C, 0x0C7A37A1, 0xA10C7A37, }, /* x=E0 */
-    { 0x39A80171, 0x7139A801, 0x017139A8, 0xA8017139, }, /* x=E1 */
-    { 0x2BB3166C, 0x6C2BB316, 0x166C2BB3, 0xB3166C2B, }, /* x=E2 */
-    { 0x25BA1B67, 0x6725BA1B, 0x1B6725BA, 0xBA1B6725, }, /* x=E3 */
-    { 0x0F853856, 0x560F8538, 0x38560F85, 0x8538560F, }, /* x=E4 */
-    { 0x018C355D, 0x5D018C35, 0x355D018C, 0x8C355D01, }, /* x=E5 */
-    { 0x13972240, 0x40139722, 0x22401397, 0x97224013, }, /* x=E6 */
-    { 0x1D9E2F4B, 0x4B1D9E2F, 0x2F4B1D9E, 0x9E2F4B1D, }, /* x=E7 */
-    { 0x47E96422, 0x2247E964, 0x642247E9, 0xE9642247, }, /* x=E8 */
-    { 0x49E06929, 0x2949E069, 0x692949E0, 0xE0692949, }, /* x=E9 */
-    { 0x5BFB7E34, 0x345BFB7E, 0x7E345BFB, 0xFB7E345B, }, /* x=EA */
-    { 0x55F2733F, 0x3F55F273, 0x733F55F2, 0xF2733F55, }, /* x=EB */
-    { 0x7FCD500E, 0x0E7FCD50, 0x500E7FCD, 0xCD500E7F, }, /* x=EC */
-    { 0x71C45D05, 0x0571C45D, 0x5D0571C4, 0xC45D0571, }, /* x=ED */
-    { 0x63DF4A18, 0x1863DF4A, 0x4A1863DF, 0xDF4A1863, }, /* x=EE */
-    { 0x6DD64713, 0x136DD647, 0x47136DD6, 0xD647136D, }, /* x=EF */
-    { 0xD731DCCA, 0xCAD731DC, 0xDCCAD731, 0x31DCCAD7, }, /* x=F0 */
-    { 0xD938D1C1, 0xC1D938D1, 0xD1C1D938, 0x38D1C1D9, }, /* x=F1 */
-    { 0xCB23C6DC, 0xDCCB23C6, 0xC6DCCB23, 0x23C6DCCB, }, /* x=F2 */
-    { 0xC52ACBD7, 0xD7C52ACB, 0xCBD7C52A, 0x2ACBD7C5, }, /* x=F3 */
-    { 0xEF15E8E6, 0xE6EF15E8, 0xE8E6EF15, 0x15E8E6EF, }, /* x=F4 */
-    { 0xE11CE5ED, 0xEDE11CE5, 0xE5EDE11C, 0x1CE5EDE1, }, /* x=F5 */
-    { 0xF307F2F0, 0xF0F307F2, 0xF2F0F307, 0x07F2F0F3, }, /* x=F6 */
-    { 0xFD0EFFFB, 0xFBFD0EFF, 0xFFFBFD0E, 0x0EFFFBFD, }, /* x=F7 */
-    { 0xA779B492, 0x92A779B4, 0xB492A779, 0x79B492A7, }, /* x=F8 */
-    { 0xA970B999, 0x99A970B9, 0xB999A970, 0x70B999A9, }, /* x=F9 */
-    { 0xBB6BAE84, 0x84BB6BAE, 0xAE84BB6B, 0x6BAE84BB, }, /* x=FA */
-    { 0xB562A38F, 0x8FB562A3, 0xA38FB562, 0x62A38FB5, }, /* x=FB */
-    { 0x9F5D80BE, 0xBE9F5D80, 0x80BE9F5D, 0x5D80BE9F, }, /* x=FC */
-    { 0x91548DB5, 0xB591548D, 0x8DB59154, 0x548DB591, }, /* x=FD */
-    { 0x834F9AA8, 0xA8834F9A, 0x9AA8834F, 0x4F9AA883, }, /* x=FE */
-    { 0x8D4697A3, 0xA38D4697, 0x97A38D46, 0x4697A38D, }, /* x=FF */
+/*
+ * MixColumns lookup table, for use with rot32.
+ */
+static const uint32_t AES_mc_rot[256] = {
+    0x00000000, 0x03010102, 0x06020204, 0x05030306,
+    0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
+    0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
+    0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
+    0x30101020, 0x33111122, 0x36121224, 0x35131326,
+    0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
+    0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
+    0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
+    0x60202040, 0x63212142, 0x66222244, 0x65232346,
+    0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
+    0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
+    0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
+    0x50303060, 0x53313162, 0x56323264, 0x55333366,
+    0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
+    0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
+    0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
+    0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
+    0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
+    0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
+    0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
+    0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
+    0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
+    0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
+    0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
+    0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
+    0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
+    0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
+    0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
+    0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
+    0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
+    0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
+    0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
+    0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
+    0x97848413, 0x94858511, 0x91868617, 0x92878715,
+    0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
+    0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
+    0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
+    0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
+    0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
+    0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
+    0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
+    0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
+    0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
+    0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
+    0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
+    0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
+    0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
+    0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
+    0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
+    0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
+    0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
+    0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
+    0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
+    0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
+    0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
+    0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
+    0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
+    0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
+    0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
+    0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
+    0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
+    0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
+    0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
+    0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
 };
 
+/*
+ * Inverse MixColumns lookup table, for use with rot32.
+ */
+static const uint32_t AES_imc_rot[256] = {
+    0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
+    0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
+    0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
+    0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
+    0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
+    0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
+    0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
+    0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
+    0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
+    0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
+    0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
+    0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
+    0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
+    0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
+    0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
+    0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
+    0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
+    0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
+    0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
+    0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
+    0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
+    0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
+    0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
+    0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
+    0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
+    0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
+    0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
+    0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
+    0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
+    0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
+    0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
+    0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
+    0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
+    0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
+    0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
+    0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
+    0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
+    0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
+    0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
+    0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
+    0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
+    0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
+    0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
+    0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
+    0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
+    0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
+    0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
+    0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
+    0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
+    0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
+    0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
+    0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
+    0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
+    0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
+    0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
+    0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
+    0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
+    0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
+    0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
+    0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
+    0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
+    0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
+    0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
+    0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
+};
 
 
 /*
@@ -461,7 +338,8 @@ const uint32_t AES_Te0[256] = {
     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
 };
-const uint32_t AES_Te1[256] = {
+
+static const uint32_t AES_Te1[256] = {
     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
@@ -527,7 +405,8 @@ const uint32_t AES_Te1[256] = {
     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
 };
-const uint32_t AES_Te2[256] = {
+
+static const uint32_t AES_Te2[256] = {
     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
@@ -593,8 +472,8 @@ const uint32_t AES_Te2[256] = {
     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
 };
-const uint32_t AES_Te3[256] = {
 
+static const uint32_t AES_Te3[256] = {
     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
@@ -660,7 +539,8 @@ const uint32_t AES_Te3[256] = {
     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
 };
-const uint32_t AES_Te4[256] = {
+
+static const uint32_t AES_Te4[256] = {
     0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
     0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
     0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
@@ -726,6 +606,7 @@ const uint32_t AES_Te4[256] = {
     0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
     0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
 };
+
 const uint32_t AES_Td0[256] = {
     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
@@ -792,7 +673,8 @@ const uint32_t AES_Td0[256] = {
     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
 };
-const uint32_t AES_Td1[256] = {
+
+static const uint32_t AES_Td1[256] = {
     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
@@ -858,7 +740,8 @@ const uint32_t AES_Td1[256] = {
     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
 };
-const uint32_t AES_Td2[256] = {
+
+static const uint32_t AES_Td2[256] = {
     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
@@ -925,7 +808,8 @@ const uint32_t AES_Td2[256] = {
     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
 };
-const uint32_t AES_Td3[256] = {
+
+static const uint32_t AES_Td3[256] = {
     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
@@ -991,7 +875,8 @@ const uint32_t AES_Td3[256] = {
     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
 };
-const uint32_t AES_Td4[256] = {
+
+static const uint32_t AES_Td4[256] = {
     0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
     0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
     0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
@@ -1057,12 +942,351 @@ const uint32_t AES_Td4[256] = {
     0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
     0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
 };
+
 static const u32 rcon[] = {
         0x01000000, 0x02000000, 0x04000000, 0x08000000,
         0x10000000, 0x20000000, 0x40000000, 0x80000000,
         0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
 };
 
+/*
+ * Perform MixColumns.
+ */
+static inline void
+aesenc_MC_swap(AESState *r, const AESState *st, bool swap)
+{
+    int swap_b = swap * 0xf;
+    int swap_w = swap * 0x3;
+    bool be = HOST_BIG_ENDIAN ^ swap;
+    uint32_t t;
+
+    /* Note that AES_mc_rot is encoded for little-endian. */
+    t = (      AES_mc_rot[st->b[swap_b ^ 0x0]] ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0x1]], 8) ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0x2]], 16) ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0x3]], 24));
+    if (be) {
+        t = bswap32(t);
+    }
+    r->w[swap_w ^ 0] = t;
+
+    t = (      AES_mc_rot[st->b[swap_b ^ 0x4]] ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0x5]], 8) ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0x6]], 16) ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0x7]], 24));
+    if (be) {
+        t = bswap32(t);
+    }
+    r->w[swap_w ^ 1] = t;
+
+    t = (      AES_mc_rot[st->b[swap_b ^ 0x8]] ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0x9]], 8) ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0xA]], 16) ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0xB]], 24));
+    if (be) {
+        t = bswap32(t);
+    }
+    r->w[swap_w ^ 2] = t;
+
+    t = (      AES_mc_rot[st->b[swap_b ^ 0xC]] ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0xD]], 8) ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0xE]], 16) ^
+         rol32(AES_mc_rot[st->b[swap_b ^ 0xF]], 24));
+    if (be) {
+        t = bswap32(t);
+    }
+    r->w[swap_w ^ 3] = t;
+}
+
+void aesenc_MC_gen(AESState *r, const AESState *st)
+{
+    aesenc_MC_swap(r, st, false);
+}
+
+void aesenc_MC_genrev(AESState *r, const AESState *st)
+{
+    aesenc_MC_swap(r, st, true);
+}
+
+/*
+ * Perform SubBytes + ShiftRows + AddRoundKey.
+ */
+static inline void
+aesenc_SB_SR_AK_swap(AESState *ret, const AESState *st,
+                     const AESState *rk, bool swap)
+{
+    const int swap_b = swap ? 15 : 0;
+    AESState t;
+
+    t.b[swap_b ^ 0x0] = AES_sbox[st->b[swap_b ^ AES_SH(0x0)]];
+    t.b[swap_b ^ 0x1] = AES_sbox[st->b[swap_b ^ AES_SH(0x1)]];
+    t.b[swap_b ^ 0x2] = AES_sbox[st->b[swap_b ^ AES_SH(0x2)]];
+    t.b[swap_b ^ 0x3] = AES_sbox[st->b[swap_b ^ AES_SH(0x3)]];
+    t.b[swap_b ^ 0x4] = AES_sbox[st->b[swap_b ^ AES_SH(0x4)]];
+    t.b[swap_b ^ 0x5] = AES_sbox[st->b[swap_b ^ AES_SH(0x5)]];
+    t.b[swap_b ^ 0x6] = AES_sbox[st->b[swap_b ^ AES_SH(0x6)]];
+    t.b[swap_b ^ 0x7] = AES_sbox[st->b[swap_b ^ AES_SH(0x7)]];
+    t.b[swap_b ^ 0x8] = AES_sbox[st->b[swap_b ^ AES_SH(0x8)]];
+    t.b[swap_b ^ 0x9] = AES_sbox[st->b[swap_b ^ AES_SH(0x9)]];
+    t.b[swap_b ^ 0xa] = AES_sbox[st->b[swap_b ^ AES_SH(0xA)]];
+    t.b[swap_b ^ 0xb] = AES_sbox[st->b[swap_b ^ AES_SH(0xB)]];
+    t.b[swap_b ^ 0xc] = AES_sbox[st->b[swap_b ^ AES_SH(0xC)]];
+    t.b[swap_b ^ 0xd] = AES_sbox[st->b[swap_b ^ AES_SH(0xD)]];
+    t.b[swap_b ^ 0xe] = AES_sbox[st->b[swap_b ^ AES_SH(0xE)]];
+    t.b[swap_b ^ 0xf] = AES_sbox[st->b[swap_b ^ AES_SH(0xF)]];
+
+    /*
+     * Perform the AddRoundKey with generic vectors.
+     * This may be expanded to either host integer or host vector code.
+     * The key and output endianness match, so no bswap required.
+     */
+    ret->v = t.v ^ rk->v;
+}
+
+void aesenc_SB_SR_AK_gen(AESState *r, const AESState *s, const AESState *k)
+{
+    aesenc_SB_SR_AK_swap(r, s, k, false);
+}
+
+void aesenc_SB_SR_AK_genrev(AESState *r, const AESState *s, const AESState *k)
+{
+    aesenc_SB_SR_AK_swap(r, s, k, true);
+}
+
+/*
+ * Perform SubBytes + ShiftRows + MixColumns + AddRoundKey.
+ */
+static inline void
+aesenc_SB_SR_MC_AK_swap(AESState *r, const AESState *st,
+                        const AESState *rk, bool swap)
+{
+    int swap_b = swap * 0xf;
+    int swap_w = swap * 0x3;
+    bool be = HOST_BIG_ENDIAN ^ swap;
+    uint32_t w0, w1, w2, w3;
+
+    w0 = (AES_Te0[st->b[swap_b ^ AES_SH(0x0)]] ^
+          AES_Te1[st->b[swap_b ^ AES_SH(0x1)]] ^
+          AES_Te2[st->b[swap_b ^ AES_SH(0x2)]] ^
+          AES_Te3[st->b[swap_b ^ AES_SH(0x3)]]);
+
+    w1 = (AES_Te0[st->b[swap_b ^ AES_SH(0x4)]] ^
+          AES_Te1[st->b[swap_b ^ AES_SH(0x5)]] ^
+          AES_Te2[st->b[swap_b ^ AES_SH(0x6)]] ^
+          AES_Te3[st->b[swap_b ^ AES_SH(0x7)]]);
+
+    w2 = (AES_Te0[st->b[swap_b ^ AES_SH(0x8)]] ^
+          AES_Te1[st->b[swap_b ^ AES_SH(0x9)]] ^
+          AES_Te2[st->b[swap_b ^ AES_SH(0xA)]] ^
+          AES_Te3[st->b[swap_b ^ AES_SH(0xB)]]);
+
+    w3 = (AES_Te0[st->b[swap_b ^ AES_SH(0xC)]] ^
+          AES_Te1[st->b[swap_b ^ AES_SH(0xD)]] ^
+          AES_Te2[st->b[swap_b ^ AES_SH(0xE)]] ^
+          AES_Te3[st->b[swap_b ^ AES_SH(0xF)]]);
+
+    /* Note that AES_TeX is encoded for big-endian. */
+    if (!be) {
+        w0 = bswap32(w0);
+        w1 = bswap32(w1);
+        w2 = bswap32(w2);
+        w3 = bswap32(w3);
+    }
+
+    r->w[swap_w ^ 0] = rk->w[swap_w ^ 0] ^ w0;
+    r->w[swap_w ^ 1] = rk->w[swap_w ^ 1] ^ w1;
+    r->w[swap_w ^ 2] = rk->w[swap_w ^ 2] ^ w2;
+    r->w[swap_w ^ 3] = rk->w[swap_w ^ 3] ^ w3;
+}
+
+void aesenc_SB_SR_MC_AK_gen(AESState *r, const AESState *st,
+                            const AESState *rk)
+{
+    aesenc_SB_SR_MC_AK_swap(r, st, rk, false);
+}
+
+void aesenc_SB_SR_MC_AK_genrev(AESState *r, const AESState *st,
+                               const AESState *rk)
+{
+    aesenc_SB_SR_MC_AK_swap(r, st, rk, true);
+}
+
+/*
+ * Perform InvMixColumns.
+ */
+static inline void
+aesdec_IMC_swap(AESState *r, const AESState *st, bool swap)
+{
+    int swap_b = swap * 0xf;
+    int swap_w = swap * 0x3;
+    bool be = HOST_BIG_ENDIAN ^ swap;
+    uint32_t t;
+
+    /* Note that AES_imc_rot is encoded for little-endian. */
+    t = (      AES_imc_rot[st->b[swap_b ^ 0x0]] ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0x1]], 8) ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0x2]], 16) ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0x3]], 24));
+    if (be) {
+        t = bswap32(t);
+    }
+    r->w[swap_w ^ 0] = t;
+
+    t = (      AES_imc_rot[st->b[swap_b ^ 0x4]] ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0x5]], 8) ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0x6]], 16) ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0x7]], 24));
+    if (be) {
+        t = bswap32(t);
+    }
+    r->w[swap_w ^ 1] = t;
+
+    t = (      AES_imc_rot[st->b[swap_b ^ 0x8]] ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0x9]], 8) ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0xA]], 16) ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0xB]], 24));
+    if (be) {
+        t = bswap32(t);
+    }
+    r->w[swap_w ^ 2] = t;
+
+    t = (      AES_imc_rot[st->b[swap_b ^ 0xC]] ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0xD]], 8) ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0xE]], 16) ^
+         rol32(AES_imc_rot[st->b[swap_b ^ 0xF]], 24));
+    if (be) {
+        t = bswap32(t);
+    }
+    r->w[swap_w ^ 3] = t;
+}
+
+void aesdec_IMC_gen(AESState *r, const AESState *st)
+{
+    aesdec_IMC_swap(r, st, false);
+}
+
+void aesdec_IMC_genrev(AESState *r, const AESState *st)
+{
+    aesdec_IMC_swap(r, st, true);
+}
+
+/*
+ * Perform InvSubBytes + InvShiftRows + AddRoundKey.
+ */
+static inline void
+aesdec_ISB_ISR_AK_swap(AESState *ret, const AESState *st,
+                       const AESState *rk, bool swap)
+{
+    const int swap_b = swap ? 15 : 0;
+    AESState t;
+
+    t.b[swap_b ^ 0x0] = AES_isbox[st->b[swap_b ^ AES_ISH(0x0)]];
+    t.b[swap_b ^ 0x1] = AES_isbox[st->b[swap_b ^ AES_ISH(0x1)]];
+    t.b[swap_b ^ 0x2] = AES_isbox[st->b[swap_b ^ AES_ISH(0x2)]];
+    t.b[swap_b ^ 0x3] = AES_isbox[st->b[swap_b ^ AES_ISH(0x3)]];
+    t.b[swap_b ^ 0x4] = AES_isbox[st->b[swap_b ^ AES_ISH(0x4)]];
+    t.b[swap_b ^ 0x5] = AES_isbox[st->b[swap_b ^ AES_ISH(0x5)]];
+    t.b[swap_b ^ 0x6] = AES_isbox[st->b[swap_b ^ AES_ISH(0x6)]];
+    t.b[swap_b ^ 0x7] = AES_isbox[st->b[swap_b ^ AES_ISH(0x7)]];
+    t.b[swap_b ^ 0x8] = AES_isbox[st->b[swap_b ^ AES_ISH(0x8)]];
+    t.b[swap_b ^ 0x9] = AES_isbox[st->b[swap_b ^ AES_ISH(0x9)]];
+    t.b[swap_b ^ 0xa] = AES_isbox[st->b[swap_b ^ AES_ISH(0xA)]];
+    t.b[swap_b ^ 0xb] = AES_isbox[st->b[swap_b ^ AES_ISH(0xB)]];
+    t.b[swap_b ^ 0xc] = AES_isbox[st->b[swap_b ^ AES_ISH(0xC)]];
+    t.b[swap_b ^ 0xd] = AES_isbox[st->b[swap_b ^ AES_ISH(0xD)]];
+    t.b[swap_b ^ 0xe] = AES_isbox[st->b[swap_b ^ AES_ISH(0xE)]];
+    t.b[swap_b ^ 0xf] = AES_isbox[st->b[swap_b ^ AES_ISH(0xF)]];
+
+    /*
+     * Perform the AddRoundKey with generic vectors.
+     * This may be expanded to either host integer or host vector code.
+     * The key and output endianness match, so no bswap required.
+     */
+    ret->v = t.v ^ rk->v;
+}
+
+void aesdec_ISB_ISR_AK_gen(AESState *r, const AESState *s, const AESState *k)
+{
+    aesdec_ISB_ISR_AK_swap(r, s, k, false);
+}
+
+void aesdec_ISB_ISR_AK_genrev(AESState *r, const AESState *s, const AESState *k)
+{
+    aesdec_ISB_ISR_AK_swap(r, s, k, true);
+}
+
+/*
+ * Perform InvSubBytes + InvShiftRows + InvMixColumns + AddRoundKey.
+ */
+static inline void
+aesdec_ISB_ISR_IMC_AK_swap(AESState *r, const AESState *st,
+                           const AESState *rk, bool swap)
+{
+    int swap_b = swap * 0xf;
+    int swap_w = swap * 0x3;
+    bool be = HOST_BIG_ENDIAN ^ swap;
+    uint32_t w0, w1, w2, w3;
+
+    w0 = (AES_Td0[st->b[swap_b ^ AES_ISH(0x0)]] ^
+          AES_Td1[st->b[swap_b ^ AES_ISH(0x1)]] ^
+          AES_Td2[st->b[swap_b ^ AES_ISH(0x2)]] ^
+          AES_Td3[st->b[swap_b ^ AES_ISH(0x3)]]);
+
+    w1 = (AES_Td0[st->b[swap_b ^ AES_ISH(0x4)]] ^
+          AES_Td1[st->b[swap_b ^ AES_ISH(0x5)]] ^
+          AES_Td2[st->b[swap_b ^ AES_ISH(0x6)]] ^
+          AES_Td3[st->b[swap_b ^ AES_ISH(0x7)]]);
+
+    w2 = (AES_Td0[st->b[swap_b ^ AES_ISH(0x8)]] ^
+          AES_Td1[st->b[swap_b ^ AES_ISH(0x9)]] ^
+          AES_Td2[st->b[swap_b ^ AES_ISH(0xA)]] ^
+          AES_Td3[st->b[swap_b ^ AES_ISH(0xB)]]);
+
+    w3 = (AES_Td0[st->b[swap_b ^ AES_ISH(0xC)]] ^
+          AES_Td1[st->b[swap_b ^ AES_ISH(0xD)]] ^
+          AES_Td2[st->b[swap_b ^ AES_ISH(0xE)]] ^
+          AES_Td3[st->b[swap_b ^ AES_ISH(0xF)]]);
+
+    /* Note that AES_TdX is encoded for big-endian. */
+    if (!be) {
+        w0 = bswap32(w0);
+        w1 = bswap32(w1);
+        w2 = bswap32(w2);
+        w3 = bswap32(w3);
+    }
+
+    r->w[swap_w ^ 0] = rk->w[swap_w ^ 0] ^ w0;
+    r->w[swap_w ^ 1] = rk->w[swap_w ^ 1] ^ w1;
+    r->w[swap_w ^ 2] = rk->w[swap_w ^ 2] ^ w2;
+    r->w[swap_w ^ 3] = rk->w[swap_w ^ 3] ^ w3;
+}
+
+void aesdec_ISB_ISR_IMC_AK_gen(AESState *r, const AESState *st,
+                               const AESState *rk)
+{
+    aesdec_ISB_ISR_IMC_AK_swap(r, st, rk, false);
+}
+
+void aesdec_ISB_ISR_IMC_AK_genrev(AESState *r, const AESState *st,
+                                  const AESState *rk)
+{
+    aesdec_ISB_ISR_IMC_AK_swap(r, st, rk, true);
+}
+
+void aesdec_ISB_ISR_AK_IMC_gen(AESState *ret, const AESState *st,
+                               const AESState *rk)
+{
+    aesdec_ISB_ISR_AK_gen(ret, st, rk);
+    aesdec_IMC_gen(ret, ret);
+}
+
+void aesdec_ISB_ISR_AK_IMC_genrev(AESState *ret, const AESState *st,
+                                  const AESState *rk)
+{
+    aesdec_ISB_ISR_AK_genrev(ret, st, rk);
+    aesdec_IMC_genrev(ret, ret);
+}
+
 /**
  * Expand the cipher key into the encryption key schedule.
  */
@@ -1080,9 +1304,9 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
 
         rk = key->rd_key;
 
-        if (bits==128)
+        if (bits == 128)
                 key->rounds = 10;
-        else if (bits==192)
+        else if (bits == 192)
                 key->rounds = 12;
         else
                 key->rounds = 14;
@@ -1182,7 +1406,7 @@ int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
         rk = key->rd_key;
 
         /* invert the order of the round keys: */
-        for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+        for (i = 0, j = 4 * (key->rounds); i < j; i += 4, j -= 4) {
                 temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
                 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
                 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
index 10046bb0ae96bdef922e2ddba5452ac7b536d835..52a491dbb515a250c996baa66f40f493970c648f 100644 (file)
@@ -59,7 +59,7 @@ qcrypto_afalg_socket_bind(const char *type, const char *name,
 
     if (bind(sbind, (const struct sockaddr *)&salg, sizeof(salg)) != 0) {
         error_setg_errno(errp, errno, "Failed to bind socket");
-        closesocket(sbind);
+        close(sbind);
         return -1;
     }
 
@@ -73,7 +73,7 @@ qcrypto_afalg_comm_alloc(const char *type, const char *name,
     QCryptoAFAlg *afalg;
 
     afalg = g_new0(QCryptoAFAlg, 1);
-    /* initilize crypto API socket */
+    /* initialize crypto API socket */
     afalg->opfd = -1;
     afalg->tfmfd = qcrypto_afalg_socket_bind(type, name, errp);
     if (afalg->tfmfd == -1) {
@@ -105,11 +105,11 @@ void qcrypto_afalg_comm_free(QCryptoAFAlg *afalg)
     }
 
     if (afalg->tfmfd != -1) {
-        closesocket(afalg->tfmfd);
+        close(afalg->tfmfd);
     }
 
     if (afalg->opfd != -1) {
-        closesocket(afalg->opfd);
+        close(afalg->opfd);
     }
 
     g_free(afalg);
diff --git a/crypto/akcipher-gcrypt.c.inc b/crypto/akcipher-gcrypt.c.inc
new file mode 100644 (file)
index 0000000..abb1fb2
--- /dev/null
@@ -0,0 +1,595 @@
+/*
+ * QEMU Crypto akcipher algorithms
+ *
+ * Copyright (c) 2022 Bytedance
+ * Author: lei he <helei.sig11@bytedance.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <gcrypt.h>
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "crypto/akcipher.h"
+#include "crypto/random.h"
+#include "qapi/error.h"
+#include "sysemu/cryptodev.h"
+#include "rsakey.h"
+
+typedef struct QCryptoGcryptRSA {
+    QCryptoAkCipher akcipher;
+    gcry_sexp_t key;
+    QCryptoRSAPaddingAlgorithm padding_alg;
+    QCryptoHashAlgorithm hash_alg;
+} QCryptoGcryptRSA;
+
+static void qcrypto_gcrypt_rsa_free(QCryptoAkCipher *akcipher)
+{
+    QCryptoGcryptRSA *rsa = (QCryptoGcryptRSA *)akcipher;
+    if (!rsa) {
+        return;
+    }
+
+    gcry_sexp_release(rsa->key);
+    g_free(rsa);
+}
+
+static QCryptoGcryptRSA *qcrypto_gcrypt_rsa_new(
+    const QCryptoAkCipherOptionsRSA *opt,
+    QCryptoAkCipherKeyType type,
+    const uint8_t *key,  size_t keylen,
+    Error **errp);
+
+QCryptoAkCipher *qcrypto_akcipher_new(const QCryptoAkCipherOptions *opts,
+                                      QCryptoAkCipherKeyType type,
+                                      const uint8_t *key, size_t keylen,
+                                      Error **errp)
+{
+    switch (opts->alg) {
+    case QCRYPTO_AKCIPHER_ALG_RSA:
+        return (QCryptoAkCipher *)qcrypto_gcrypt_rsa_new(
+            &opts->u.rsa, type, key, keylen, errp);
+
+    default:
+        error_setg(errp, "Unsupported algorithm: %u", opts->alg);
+        return NULL;
+    }
+
+    return NULL;
+}
+
+static void qcrypto_gcrypt_set_rsa_size(QCryptoAkCipher *akcipher, gcry_mpi_t n)
+{
+    size_t key_size = (gcry_mpi_get_nbits(n) + 7) / 8;
+    akcipher->max_plaintext_len = key_size;
+    akcipher->max_ciphertext_len = key_size;
+    akcipher->max_dgst_len = key_size;
+    akcipher->max_signature_len = key_size;
+}
+
+static int qcrypto_gcrypt_parse_rsa_private_key(
+    QCryptoGcryptRSA *rsa,
+    const uint8_t *key, size_t keylen, Error **errp)
+{
+    g_autoptr(QCryptoAkCipherRSAKey) rsa_key = qcrypto_akcipher_rsakey_parse(
+        QCRYPTO_AKCIPHER_KEY_TYPE_PRIVATE, key, keylen, errp);
+    gcry_mpi_t n = NULL, e = NULL, d = NULL, p = NULL, q = NULL, u = NULL;
+    bool compute_mul_inv = false;
+    int ret = -1;
+    gcry_error_t err;
+
+    if (!rsa_key) {
+        return ret;
+    }
+
+    err = gcry_mpi_scan(&n, GCRYMPI_FMT_STD,
+                        rsa_key->n.data, rsa_key->n.len, NULL);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to parse RSA parameter n: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    err = gcry_mpi_scan(&e, GCRYMPI_FMT_STD,
+                        rsa_key->e.data, rsa_key->e.len, NULL);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to parse RSA parameter e: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    err = gcry_mpi_scan(&d, GCRYMPI_FMT_STD,
+                        rsa_key->d.data, rsa_key->d.len, NULL);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to parse RSA parameter d: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    err = gcry_mpi_scan(&p, GCRYMPI_FMT_STD,
+                        rsa_key->p.data, rsa_key->p.len, NULL);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to parse RSA parameter p: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    err = gcry_mpi_scan(&q, GCRYMPI_FMT_STD,
+                        rsa_key->q.data, rsa_key->q.len, NULL);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to parse RSA parameter q: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    if (gcry_mpi_cmp_ui(p, 0) > 0 && gcry_mpi_cmp_ui(q, 0) > 0) {
+        compute_mul_inv = true;
+
+        u = gcry_mpi_new(0);
+        if (gcry_mpi_cmp(p, q) > 0) {
+            gcry_mpi_swap(p, q);
+        }
+        gcry_mpi_invm(u, p, q);
+    }
+
+    if (compute_mul_inv) {
+        err = gcry_sexp_build(&rsa->key, NULL,
+            "(private-key (rsa (n %m) (e %m) (d %m) (p %m) (q %m) (u %m)))",
+            n, e, d, p, q, u);
+    } else {
+        err = gcry_sexp_build(&rsa->key, NULL,
+            "(private-key (rsa (n %m) (e %m) (d %m)))", n, e, d);
+    }
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to build RSA private key: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+    qcrypto_gcrypt_set_rsa_size((QCryptoAkCipher *)rsa,  n);
+    ret = 0;
+
+cleanup:
+    gcry_mpi_release(n);
+    gcry_mpi_release(e);
+    gcry_mpi_release(d);
+    gcry_mpi_release(p);
+    gcry_mpi_release(q);
+    gcry_mpi_release(u);
+    return ret;
+}
+
+static int qcrypto_gcrypt_parse_rsa_public_key(QCryptoGcryptRSA *rsa,
+                                               const uint8_t *key,
+                                               size_t keylen,
+                                               Error **errp)
+{
+
+    g_autoptr(QCryptoAkCipherRSAKey) rsa_key = qcrypto_akcipher_rsakey_parse(
+        QCRYPTO_AKCIPHER_KEY_TYPE_PUBLIC, key, keylen, errp);
+    gcry_mpi_t n = NULL, e = NULL;
+    int ret = -1;
+    gcry_error_t err;
+
+    if (!rsa_key) {
+        return ret;
+    }
+
+    err = gcry_mpi_scan(&n, GCRYMPI_FMT_STD,
+                        rsa_key->n.data, rsa_key->n.len, NULL);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to parse RSA parameter n: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    err = gcry_mpi_scan(&e, GCRYMPI_FMT_STD,
+                        rsa_key->e.data, rsa_key->e.len, NULL);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to parse RSA parameter e: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    err = gcry_sexp_build(&rsa->key, NULL,
+                          "(public-key (rsa (n %m) (e %m)))", n, e);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to build RSA public key: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+    qcrypto_gcrypt_set_rsa_size((QCryptoAkCipher *)rsa, n);
+    ret = 0;
+
+cleanup:
+    gcry_mpi_release(n);
+    gcry_mpi_release(e);
+    return ret;
+}
+
+static int qcrypto_gcrypt_rsa_encrypt(QCryptoAkCipher *akcipher,
+                                      const void *in, size_t in_len,
+                                      void *out, size_t out_len,
+                                      Error **errp)
+{
+    QCryptoGcryptRSA *rsa = (QCryptoGcryptRSA *)akcipher;
+    int ret = -1;
+    gcry_sexp_t data_sexp = NULL, cipher_sexp = NULL;
+    gcry_sexp_t cipher_sexp_item = NULL;
+    gcry_mpi_t cipher_mpi = NULL;
+    const char *result;
+    gcry_error_t err;
+    size_t actual_len;
+
+    if (in_len > akcipher->max_plaintext_len) {
+        error_setg(errp, "Plaintext length is greater than key size: %d",
+                   akcipher->max_plaintext_len);
+        return ret;
+    }
+
+    err = gcry_sexp_build(&data_sexp, NULL,
+                          "(data (flags %s) (value %b))",
+                          QCryptoRSAPaddingAlgorithm_str(rsa->padding_alg),
+                          in_len, in);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to build plaintext: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    err = gcry_pk_encrypt(&cipher_sexp, data_sexp, rsa->key);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to encrypt: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    /* S-expression of cipher: (enc-val (rsa (a a-mpi))) */
+    cipher_sexp_item = gcry_sexp_find_token(cipher_sexp, "a", 0);
+    if (!cipher_sexp_item || gcry_sexp_length(cipher_sexp_item) != 2) {
+        error_setg(errp, "Invalid ciphertext result");
+        goto cleanup;
+    }
+
+    if (rsa->padding_alg == QCRYPTO_RSA_PADDING_ALG_RAW) {
+        cipher_mpi = gcry_sexp_nth_mpi(cipher_sexp_item, 1, GCRYMPI_FMT_USG);
+        if (!cipher_mpi) {
+            error_setg(errp, "Invalid ciphertext result");
+            goto cleanup;
+        }
+        err = gcry_mpi_print(GCRYMPI_FMT_USG, out, out_len,
+                             &actual_len, cipher_mpi);
+        if (gcry_err_code(err) != 0) {
+            error_setg(errp, "Failed to print MPI: %s/%s",
+                       gcry_strsource(err), gcry_strerror(err));
+            goto cleanup;
+        }
+
+        if (actual_len > out_len) {
+            error_setg(errp, "Ciphertext buffer length is too small");
+            goto cleanup;
+        }
+
+        /* We always padding leading-zeros for RSA-RAW */
+        if (actual_len < out_len) {
+            memmove((uint8_t *)out + (out_len - actual_len), out, actual_len);
+            memset(out, 0, out_len - actual_len);
+        }
+        ret = out_len;
+
+    } else {
+        result = gcry_sexp_nth_data(cipher_sexp_item, 1, &actual_len);
+        if (!result) {
+            error_setg(errp, "Invalid ciphertext result");
+            goto cleanup;
+        }
+        if (actual_len > out_len) {
+            error_setg(errp, "Ciphertext buffer length is too small");
+            goto cleanup;
+        }
+        memcpy(out, result, actual_len);
+        ret = actual_len;
+    }
+
+cleanup:
+    gcry_sexp_release(data_sexp);
+    gcry_sexp_release(cipher_sexp);
+    gcry_sexp_release(cipher_sexp_item);
+    gcry_mpi_release(cipher_mpi);
+    return ret;
+}
+
+static int qcrypto_gcrypt_rsa_decrypt(QCryptoAkCipher *akcipher,
+                                      const void *in, size_t in_len,
+                                      void *out, size_t out_len,
+                                      Error **errp)
+{
+    QCryptoGcryptRSA *rsa = (QCryptoGcryptRSA *)akcipher;
+    int ret = -1;
+    gcry_sexp_t data_sexp = NULL, cipher_sexp = NULL;
+    gcry_mpi_t data_mpi = NULL;
+    gcry_error_t err;
+    size_t actual_len;
+    const char *result;
+
+    if (in_len > akcipher->max_ciphertext_len) {
+        error_setg(errp, "Ciphertext length is greater than key size: %d",
+                   akcipher->max_ciphertext_len);
+        return ret;
+    }
+
+    err = gcry_sexp_build(&cipher_sexp, NULL,
+                          "(enc-val (flags %s) (rsa (a %b) ))",
+                          QCryptoRSAPaddingAlgorithm_str(rsa->padding_alg),
+                          in_len, in);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to build ciphertext: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    err = gcry_pk_decrypt(&data_sexp, cipher_sexp, rsa->key);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to decrypt: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    /* S-expression of plaintext: (value plaintext) */
+    if (rsa->padding_alg == QCRYPTO_RSA_PADDING_ALG_RAW) {
+        data_mpi = gcry_sexp_nth_mpi(data_sexp, 1, GCRYMPI_FMT_USG);
+        if (!data_mpi) {
+            error_setg(errp, "Invalid plaintext result");
+            goto cleanup;
+        }
+        err = gcry_mpi_print(GCRYMPI_FMT_USG, out, out_len,
+                             &actual_len, data_mpi);
+        if (gcry_err_code(err) != 0) {
+            error_setg(errp, "Failed to print MPI: %s/%s",
+                       gcry_strsource(err), gcry_strerror(err));
+            goto cleanup;
+        }
+        if (actual_len > out_len) {
+            error_setg(errp, "Plaintext buffer length is too small");
+            goto cleanup;
+        }
+        /* We always padding leading-zeros for RSA-RAW */
+        if (actual_len < out_len) {
+            memmove((uint8_t *)out + (out_len - actual_len), out, actual_len);
+            memset(out, 0, out_len - actual_len);
+        }
+        ret = out_len;
+    } else {
+        result = gcry_sexp_nth_data(data_sexp, 1, &actual_len);
+        if (!result) {
+            error_setg(errp, "Invalid plaintext result");
+            goto cleanup;
+        }
+        if (actual_len > out_len) {
+            error_setg(errp, "Plaintext buffer length is too small");
+            goto cleanup;
+        }
+        memcpy(out, result, actual_len);
+        ret = actual_len;
+    }
+
+cleanup:
+    gcry_sexp_release(cipher_sexp);
+    gcry_sexp_release(data_sexp);
+    gcry_mpi_release(data_mpi);
+    return ret;
+}
+
+static int qcrypto_gcrypt_rsa_sign(QCryptoAkCipher *akcipher,
+                                   const void *in, size_t in_len,
+                                   void *out, size_t out_len, Error **errp)
+{
+    QCryptoGcryptRSA *rsa = (QCryptoGcryptRSA *)akcipher;
+    int ret = -1;
+    gcry_sexp_t dgst_sexp = NULL, sig_sexp = NULL;
+    gcry_sexp_t sig_sexp_item = NULL;
+    const char *result;
+    gcry_error_t err;
+    size_t actual_len;
+
+    if (in_len > akcipher->max_dgst_len) {
+        error_setg(errp, "Data length is greater than key size: %d",
+                   akcipher->max_dgst_len);
+        return ret;
+    }
+
+    if (rsa->padding_alg != QCRYPTO_RSA_PADDING_ALG_PKCS1) {
+        error_setg(errp, "Invalid padding %u", rsa->padding_alg);
+        return ret;
+    }
+
+    err = gcry_sexp_build(&dgst_sexp, NULL,
+                          "(data (flags pkcs1) (hash %s %b))",
+                          QCryptoHashAlgorithm_str(rsa->hash_alg),
+                          in_len, in);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to build dgst: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    err = gcry_pk_sign(&sig_sexp, dgst_sexp, rsa->key);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to make signature: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    /* S-expression of signature: (sig-val (rsa (s s-mpi))) */
+    sig_sexp_item = gcry_sexp_find_token(sig_sexp, "s", 0);
+    if (!sig_sexp_item || gcry_sexp_length(sig_sexp_item) != 2) {
+        error_setg(errp, "Invalid signature result");
+        goto cleanup;
+    }
+
+    result = gcry_sexp_nth_data(sig_sexp_item, 1, &actual_len);
+    if (!result) {
+        error_setg(errp, "Invalid signature result");
+        goto cleanup;
+    }
+
+    if (actual_len > out_len) {
+        error_setg(errp, "Signature buffer length is too small");
+        goto cleanup;
+    }
+    memcpy(out, result, actual_len);
+    ret = actual_len;
+
+cleanup:
+    gcry_sexp_release(dgst_sexp);
+    gcry_sexp_release(sig_sexp);
+    gcry_sexp_release(sig_sexp_item);
+
+    return ret;
+}
+
+static int qcrypto_gcrypt_rsa_verify(QCryptoAkCipher *akcipher,
+                                     const void *in, size_t in_len,
+                                     const void *in2, size_t in2_len,
+                                     Error **errp)
+{
+    QCryptoGcryptRSA *rsa = (QCryptoGcryptRSA *)akcipher;
+    int ret = -1;
+    gcry_sexp_t sig_sexp = NULL, dgst_sexp = NULL;
+    gcry_error_t err;
+
+    if (in_len > akcipher->max_signature_len) {
+        error_setg(errp, "Signature length is greater than key size: %d",
+                   akcipher->max_signature_len);
+        return ret;
+    }
+
+    if (in2_len > akcipher->max_dgst_len) {
+        error_setg(errp, "Data length is greater than key size: %d",
+                   akcipher->max_dgst_len);
+        return ret;
+    }
+
+    if (rsa->padding_alg != QCRYPTO_RSA_PADDING_ALG_PKCS1) {
+        error_setg(errp, "Invalid padding %u", rsa->padding_alg);
+        return ret;
+    }
+
+    err = gcry_sexp_build(&sig_sexp, NULL,
+                          "(sig-val (rsa (s %b)))", in_len, in);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to build signature: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    err = gcry_sexp_build(&dgst_sexp, NULL,
+                          "(data (flags pkcs1) (hash %s %b))",
+                          QCryptoHashAlgorithm_str(rsa->hash_alg),
+                          in2_len, in2);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to build dgst: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+
+    err = gcry_pk_verify(sig_sexp, dgst_sexp, rsa->key);
+    if (gcry_err_code(err) != 0) {
+        error_setg(errp, "Failed to verify signature: %s/%s",
+                   gcry_strsource(err), gcry_strerror(err));
+        goto cleanup;
+    }
+    ret = 0;
+
+cleanup:
+    gcry_sexp_release(dgst_sexp);
+    gcry_sexp_release(sig_sexp);
+
+    return ret;
+}
+
+QCryptoAkCipherDriver gcrypt_rsa = {
+    .encrypt = qcrypto_gcrypt_rsa_encrypt,
+    .decrypt = qcrypto_gcrypt_rsa_decrypt,
+    .sign = qcrypto_gcrypt_rsa_sign,
+    .verify = qcrypto_gcrypt_rsa_verify,
+    .free = qcrypto_gcrypt_rsa_free,
+};
+
+static QCryptoGcryptRSA *qcrypto_gcrypt_rsa_new(
+    const QCryptoAkCipherOptionsRSA *opt,
+    QCryptoAkCipherKeyType type,
+    const uint8_t *key, size_t keylen,
+    Error **errp)
+{
+    QCryptoGcryptRSA *rsa = g_new0(QCryptoGcryptRSA, 1);
+    rsa->padding_alg = opt->padding_alg;
+    rsa->hash_alg = opt->hash_alg;
+    rsa->akcipher.driver = &gcrypt_rsa;
+
+    switch (type) {
+    case QCRYPTO_AKCIPHER_KEY_TYPE_PRIVATE:
+        if (qcrypto_gcrypt_parse_rsa_private_key(rsa, key, keylen, errp) != 0) {
+            goto error;
+        }
+        break;
+
+    case QCRYPTO_AKCIPHER_KEY_TYPE_PUBLIC:
+        if (qcrypto_gcrypt_parse_rsa_public_key(rsa, key, keylen, errp) != 0) {
+            goto error;
+        }
+        break;
+
+    default:
+        error_setg(errp, "Unknown akcipher key type %d", type);
+        goto error;
+    }
+
+    return rsa;
+
+error:
+    qcrypto_gcrypt_rsa_free((QCryptoAkCipher *)rsa);
+    return NULL;
+}
+
+
+bool qcrypto_akcipher_supports(QCryptoAkCipherOptions *opts)
+{
+    switch (opts->alg) {
+    case QCRYPTO_AKCIPHER_ALG_RSA:
+        switch (opts->u.rsa.padding_alg) {
+        case QCRYPTO_RSA_PADDING_ALG_RAW:
+            return true;
+
+        case QCRYPTO_RSA_PADDING_ALG_PKCS1:
+            switch (opts->u.rsa.hash_alg) {
+            case QCRYPTO_HASH_ALG_MD5:
+            case QCRYPTO_HASH_ALG_SHA1:
+            case QCRYPTO_HASH_ALG_SHA256:
+            case QCRYPTO_HASH_ALG_SHA512:
+                return true;
+
+            default:
+                return false;
+            }
+
+        default:
+            return false;
+        }
+
+    default:
+        return true;
+    }
+}
diff --git a/crypto/akcipher-nettle.c.inc b/crypto/akcipher-nettle.c.inc
new file mode 100644 (file)
index 0000000..02699e6
--- /dev/null
@@ -0,0 +1,451 @@
+/*
+ * QEMU Crypto akcipher algorithms
+ *
+ * Copyright (c) 2022 Bytedance
+ * Author: lei he <helei.sig11@bytedance.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <nettle/rsa.h>
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "crypto/akcipher.h"
+#include "crypto/random.h"
+#include "qapi/error.h"
+#include "sysemu/cryptodev.h"
+#include "rsakey.h"
+
+typedef struct QCryptoNettleRSA {
+    QCryptoAkCipher akcipher;
+    struct rsa_public_key pub;
+    struct rsa_private_key priv;
+    QCryptoRSAPaddingAlgorithm padding_alg;
+    QCryptoHashAlgorithm hash_alg;
+} QCryptoNettleRSA;
+
+static void qcrypto_nettle_rsa_free(QCryptoAkCipher *akcipher)
+{
+    QCryptoNettleRSA *rsa = (QCryptoNettleRSA *)akcipher;
+    if (!rsa) {
+        return;
+    }
+
+    rsa_public_key_clear(&rsa->pub);
+    rsa_private_key_clear(&rsa->priv);
+    g_free(rsa);
+}
+
+static QCryptoAkCipher *qcrypto_nettle_rsa_new(
+    const QCryptoAkCipherOptionsRSA *opt,
+    QCryptoAkCipherKeyType type,
+    const uint8_t *key,  size_t keylen,
+    Error **errp);
+
+QCryptoAkCipher *qcrypto_akcipher_new(const QCryptoAkCipherOptions *opts,
+                                      QCryptoAkCipherKeyType type,
+                                      const uint8_t *key, size_t keylen,
+                                      Error **errp)
+{
+    switch (opts->alg) {
+    case QCRYPTO_AKCIPHER_ALG_RSA:
+        return qcrypto_nettle_rsa_new(&opts->u.rsa, type, key, keylen, errp);
+
+    default:
+        error_setg(errp, "Unsupported algorithm: %u", opts->alg);
+        return NULL;
+    }
+
+    return NULL;
+}
+
+static void qcrypto_nettle_rsa_set_akcipher_size(QCryptoAkCipher *akcipher,
+                                                 int key_size)
+{
+    akcipher->max_plaintext_len = key_size;
+    akcipher->max_ciphertext_len = key_size;
+    akcipher->max_signature_len = key_size;
+    akcipher->max_dgst_len = key_size;
+}
+
+static int qcrypt_nettle_parse_rsa_private_key(QCryptoNettleRSA *rsa,
+                                               const uint8_t *key,
+                                               size_t keylen,
+                                               Error **errp)
+{
+    g_autoptr(QCryptoAkCipherRSAKey) rsa_key = qcrypto_akcipher_rsakey_parse(
+        QCRYPTO_AKCIPHER_KEY_TYPE_PRIVATE, key, keylen, errp);
+
+    if (!rsa_key) {
+        return -1;
+    }
+
+    nettle_mpz_init_set_str_256_u(rsa->pub.n, rsa_key->n.len, rsa_key->n.data);
+    nettle_mpz_init_set_str_256_u(rsa->pub.e, rsa_key->e.len, rsa_key->e.data);
+    nettle_mpz_init_set_str_256_u(rsa->priv.d, rsa_key->d.len, rsa_key->d.data);
+    nettle_mpz_init_set_str_256_u(rsa->priv.p, rsa_key->p.len, rsa_key->p.data);
+    nettle_mpz_init_set_str_256_u(rsa->priv.q, rsa_key->q.len, rsa_key->q.data);
+    nettle_mpz_init_set_str_256_u(rsa->priv.a, rsa_key->dp.len,
+                                  rsa_key->dp.data);
+    nettle_mpz_init_set_str_256_u(rsa->priv.b, rsa_key->dq.len,
+                                  rsa_key->dq.data);
+    nettle_mpz_init_set_str_256_u(rsa->priv.c, rsa_key->u.len, rsa_key->u.data);
+
+    if (!rsa_public_key_prepare(&rsa->pub)) {
+        error_setg(errp, "Failed to check RSA key");
+        return -1;
+    }
+
+    /**
+     * Since in the kernel's unit test, the p, q, a, b, c of some
+     * private keys is 0, only the simplest length check is done here
+     */
+    if (rsa_key->p.len > 1 &&
+        rsa_key->q.len > 1 &&
+        rsa_key->dp.len > 1 &&
+        rsa_key->dq.len > 1 &&
+        rsa_key->u.len > 1) {
+        if (!rsa_private_key_prepare(&rsa->priv)) {
+            error_setg(errp, "Failed to check RSA key");
+            return -1;
+        }
+    } else {
+        rsa->priv.size = rsa->pub.size;
+    }
+    qcrypto_nettle_rsa_set_akcipher_size(
+        (QCryptoAkCipher *)rsa, rsa->priv.size);
+
+    return 0;
+}
+
+static int qcrypt_nettle_parse_rsa_public_key(QCryptoNettleRSA *rsa,
+                                              const uint8_t *key,
+                                              size_t keylen,
+                                              Error **errp)
+{
+    g_autoptr(QCryptoAkCipherRSAKey) rsa_key = qcrypto_akcipher_rsakey_parse(
+        QCRYPTO_AKCIPHER_KEY_TYPE_PUBLIC, key, keylen, errp);
+
+    if (!rsa_key) {
+        return -1;
+    }
+    nettle_mpz_init_set_str_256_u(rsa->pub.n, rsa_key->n.len, rsa_key->n.data);
+    nettle_mpz_init_set_str_256_u(rsa->pub.e, rsa_key->e.len, rsa_key->e.data);
+
+    if (!rsa_public_key_prepare(&rsa->pub)) {
+        error_setg(errp, "Failed to check RSA key");
+        return -1;
+    }
+    qcrypto_nettle_rsa_set_akcipher_size(
+        (QCryptoAkCipher *)rsa, rsa->pub.size);
+
+    return 0;
+}
+
+static void wrap_nettle_random_func(void *ctx, size_t len, uint8_t *out)
+{
+    qcrypto_random_bytes(out, len, &error_abort);
+}
+
+static int qcrypto_nettle_rsa_encrypt(QCryptoAkCipher *akcipher,
+                                      const void *data, size_t data_len,
+                                      void *enc, size_t enc_len,
+                                      Error **errp)
+{
+
+    QCryptoNettleRSA *rsa = (QCryptoNettleRSA *)akcipher;
+    mpz_t c;
+    int ret = -1;
+
+    if (data_len > rsa->pub.size) {
+        error_setg(errp, "Plaintext length %zu is greater than key size: %zu",
+                   data_len, rsa->pub.size);
+        return ret;
+    }
+
+    if (enc_len < rsa->pub.size) {
+        error_setg(errp, "Ciphertext buffer length %zu is less than "
+                         "key size: %zu", enc_len, rsa->pub.size);
+        return ret;
+    }
+
+    /* Nettle do not support RSA encryption without any padding */
+    switch (rsa->padding_alg) {
+    case QCRYPTO_RSA_PADDING_ALG_RAW:
+        error_setg(errp, "RSA with raw padding is not supported");
+        break;
+
+    case QCRYPTO_RSA_PADDING_ALG_PKCS1:
+        mpz_init(c);
+        if (rsa_encrypt(&rsa->pub, NULL, wrap_nettle_random_func,
+                        data_len, (uint8_t *)data, c) != 1) {
+            error_setg(errp, "Failed to encrypt");
+        } else {
+            nettle_mpz_get_str_256(enc_len, (uint8_t *)enc, c);
+            ret = nettle_mpz_sizeinbase_256_u(c);
+        }
+        mpz_clear(c);
+        break;
+
+    default:
+        error_setg(errp, "Unknown padding");
+    }
+
+    return ret;
+}
+
+static int qcrypto_nettle_rsa_decrypt(QCryptoAkCipher *akcipher,
+                                      const void *enc, size_t enc_len,
+                                      void *data, size_t data_len,
+                                      Error **errp)
+{
+    QCryptoNettleRSA *rsa = (QCryptoNettleRSA *)akcipher;
+    mpz_t c;
+    int ret = -1;
+
+    if (enc_len > rsa->priv.size) {
+        error_setg(errp, "Ciphertext length %zu is greater than key size: %zu",
+                   enc_len, rsa->priv.size);
+        return ret;
+    }
+
+    switch (rsa->padding_alg) {
+    case QCRYPTO_RSA_PADDING_ALG_RAW:
+        error_setg(errp, "RSA with raw padding is not supported");
+        break;
+
+    case QCRYPTO_RSA_PADDING_ALG_PKCS1:
+        nettle_mpz_init_set_str_256_u(c, enc_len, enc);
+        if (!rsa_decrypt(&rsa->priv, &data_len, (uint8_t *)data, c)) {
+            error_setg(errp, "Failed to decrypt");
+        } else {
+            ret = data_len;
+        }
+
+        mpz_clear(c);
+        break;
+
+    default:
+        error_setg(errp, "Unknown padding algorithm: %d", rsa->padding_alg);
+    }
+
+    return ret;
+}
+
+static int qcrypto_nettle_rsa_sign(QCryptoAkCipher *akcipher,
+                                   const void *data, size_t data_len,
+                                   void *sig, size_t sig_len, Error **errp)
+{
+    QCryptoNettleRSA *rsa = (QCryptoNettleRSA *)akcipher;
+    int ret = -1, rv;
+    mpz_t s;
+
+    /**
+     * The RSA algorithm cannot be used for signature/verification
+     * without padding.
+     */
+    if (rsa->padding_alg == QCRYPTO_RSA_PADDING_ALG_RAW) {
+        error_setg(errp, "Try to make signature without padding");
+        return ret;
+    }
+
+    if (data_len > rsa->priv.size) {
+        error_setg(errp, "Data length %zu is greater than key size: %zu",
+                   data_len, rsa->priv.size);
+        return ret;
+    }
+
+    if (sig_len < rsa->priv.size) {
+        error_setg(errp, "Signature buffer length %zu is less than "
+                         "key size: %zu", sig_len, rsa->priv.size);
+        return ret;
+    }
+
+    mpz_init(s);
+    switch (rsa->hash_alg) {
+    case QCRYPTO_HASH_ALG_MD5:
+        rv = rsa_md5_sign_digest(&rsa->priv, data, s);
+        break;
+
+    case QCRYPTO_HASH_ALG_SHA1:
+        rv = rsa_sha1_sign_digest(&rsa->priv, data, s);
+        break;
+
+    case QCRYPTO_HASH_ALG_SHA256:
+        rv = rsa_sha256_sign_digest(&rsa->priv, data, s);
+        break;
+
+    case QCRYPTO_HASH_ALG_SHA512:
+        rv = rsa_sha512_sign_digest(&rsa->priv, data, s);
+        break;
+
+    default:
+        error_setg(errp, "Unknown hash algorithm: %d", rsa->hash_alg);
+        goto cleanup;
+    }
+
+    if (rv != 1) {
+        error_setg(errp, "Failed to make signature");
+        goto cleanup;
+    }
+    nettle_mpz_get_str_256(sig_len, (uint8_t *)sig, s);
+    ret = nettle_mpz_sizeinbase_256_u(s);
+
+cleanup:
+    mpz_clear(s);
+
+    return ret;
+}
+
+static int qcrypto_nettle_rsa_verify(QCryptoAkCipher *akcipher,
+                                     const void *sig, size_t sig_len,
+                                     const void *data, size_t data_len,
+                                     Error **errp)
+{
+    QCryptoNettleRSA *rsa = (QCryptoNettleRSA *)akcipher;
+
+    int ret = -1, rv;
+    mpz_t s;
+
+    /**
+     * The RSA algorithm cannot be used for signature/verification
+     * without padding.
+     */
+    if (rsa->padding_alg == QCRYPTO_RSA_PADDING_ALG_RAW) {
+        error_setg(errp, "Try to verify signature without padding");
+        return ret;
+    }
+    if (data_len > rsa->pub.size) {
+        error_setg(errp, "Data length %zu is greater than key size: %zu",
+                   data_len, rsa->pub.size);
+        return ret;
+    }
+    if (sig_len < rsa->pub.size) {
+        error_setg(errp, "Signature length %zu is greater than key size: %zu",
+                   sig_len, rsa->pub.size);
+        return ret;
+    }
+
+    nettle_mpz_init_set_str_256_u(s, sig_len, sig);
+    switch (rsa->hash_alg) {
+    case QCRYPTO_HASH_ALG_MD5:
+        rv = rsa_md5_verify_digest(&rsa->pub, data, s);
+        break;
+
+    case QCRYPTO_HASH_ALG_SHA1:
+        rv = rsa_sha1_verify_digest(&rsa->pub, data, s);
+        break;
+
+    case QCRYPTO_HASH_ALG_SHA256:
+        rv = rsa_sha256_verify_digest(&rsa->pub, data, s);
+        break;
+
+    case QCRYPTO_HASH_ALG_SHA512:
+        rv = rsa_sha512_verify_digest(&rsa->pub, data, s);
+        break;
+
+    default:
+        error_setg(errp, "Unsupported hash algorithm: %d", rsa->hash_alg);
+        goto cleanup;
+    }
+
+    if (rv != 1) {
+        error_setg(errp, "Failed to verify signature");
+        goto cleanup;
+    }
+    ret = 0;
+
+cleanup:
+    mpz_clear(s);
+
+    return ret;
+}
+
+QCryptoAkCipherDriver nettle_rsa = {
+    .encrypt = qcrypto_nettle_rsa_encrypt,
+    .decrypt = qcrypto_nettle_rsa_decrypt,
+    .sign = qcrypto_nettle_rsa_sign,
+    .verify = qcrypto_nettle_rsa_verify,
+    .free = qcrypto_nettle_rsa_free,
+};
+
+static QCryptoAkCipher *qcrypto_nettle_rsa_new(
+    const QCryptoAkCipherOptionsRSA *opt,
+    QCryptoAkCipherKeyType type,
+    const uint8_t *key, size_t keylen,
+    Error **errp)
+{
+    QCryptoNettleRSA *rsa = g_new0(QCryptoNettleRSA, 1);
+
+    rsa->padding_alg = opt->padding_alg;
+    rsa->hash_alg = opt->hash_alg;
+    rsa->akcipher.driver = &nettle_rsa;
+    rsa_public_key_init(&rsa->pub);
+    rsa_private_key_init(&rsa->priv);
+
+    switch (type) {
+    case QCRYPTO_AKCIPHER_KEY_TYPE_PRIVATE:
+        if (qcrypt_nettle_parse_rsa_private_key(rsa, key, keylen, errp) != 0) {
+            goto error;
+        }
+        break;
+
+    case QCRYPTO_AKCIPHER_KEY_TYPE_PUBLIC:
+        if (qcrypt_nettle_parse_rsa_public_key(rsa, key, keylen, errp) != 0) {
+            goto error;
+        }
+        break;
+
+    default:
+        error_setg(errp, "Unknown akcipher key type %d", type);
+        goto error;
+    }
+
+    return (QCryptoAkCipher *)rsa;
+
+error:
+    qcrypto_nettle_rsa_free((QCryptoAkCipher *)rsa);
+    return NULL;
+}
+
+
+bool qcrypto_akcipher_supports(QCryptoAkCipherOptions *opts)
+{
+    switch (opts->alg) {
+    case QCRYPTO_AKCIPHER_ALG_RSA:
+        switch (opts->u.rsa.padding_alg) {
+        case QCRYPTO_RSA_PADDING_ALG_PKCS1:
+            switch (opts->u.rsa.hash_alg) {
+            case QCRYPTO_HASH_ALG_MD5:
+            case QCRYPTO_HASH_ALG_SHA1:
+            case QCRYPTO_HASH_ALG_SHA256:
+            case QCRYPTO_HASH_ALG_SHA512:
+                return true;
+
+            default:
+                return false;
+            }
+
+        case QCRYPTO_RSA_PADDING_ALG_RAW:
+        default:
+            return false;
+        }
+        break;
+
+    default:
+        return false;
+    }
+}
diff --git a/crypto/akcipher.c b/crypto/akcipher.c
new file mode 100644 (file)
index 0000000..e4bbc6e
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * QEMU Crypto akcipher algorithms
+ *
+ * Copyright (c) 2022 Bytedance
+ * Author: zhenwei pi <pizhenwei@bytedance.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "crypto/akcipher.h"
+#include "akcipherpriv.h"
+#include "der.h"
+#include "rsakey.h"
+
+#if defined(CONFIG_GCRYPT)
+#include "akcipher-gcrypt.c.inc"
+#elif defined(CONFIG_NETTLE) && defined(CONFIG_HOGWEED)
+#include "akcipher-nettle.c.inc"
+#else
+QCryptoAkCipher *qcrypto_akcipher_new(const QCryptoAkCipherOptions *opts,
+                                      QCryptoAkCipherKeyType type,
+                                      const uint8_t *key, size_t keylen,
+                                      Error **errp)
+{
+    QCryptoAkCipher *akcipher = NULL;
+
+    return akcipher;
+}
+
+bool qcrypto_akcipher_supports(QCryptoAkCipherOptions *opts)
+{
+    return false;
+}
+#endif
+
+int qcrypto_akcipher_encrypt(QCryptoAkCipher *akcipher,
+                             const void *in, size_t in_len,
+                             void *out, size_t out_len, Error **errp)
+{
+    const QCryptoAkCipherDriver *drv = akcipher->driver;
+
+    return drv->encrypt(akcipher, in, in_len, out, out_len, errp);
+}
+
+int qcrypto_akcipher_decrypt(QCryptoAkCipher *akcipher,
+                             const void *in, size_t in_len,
+                             void *out, size_t out_len, Error **errp)
+{
+    const QCryptoAkCipherDriver *drv = akcipher->driver;
+
+    return drv->decrypt(akcipher, in, in_len, out, out_len, errp);
+}
+
+int qcrypto_akcipher_sign(QCryptoAkCipher *akcipher,
+                          const void *in, size_t in_len,
+                          void *out, size_t out_len, Error **errp)
+{
+    const QCryptoAkCipherDriver *drv = akcipher->driver;
+
+    return drv->sign(akcipher, in, in_len, out, out_len, errp);
+}
+
+int qcrypto_akcipher_verify(QCryptoAkCipher *akcipher,
+                            const void *in, size_t in_len,
+                            const void *in2, size_t in2_len, Error **errp)
+{
+    const QCryptoAkCipherDriver *drv = akcipher->driver;
+
+    return drv->verify(akcipher, in, in_len, in2, in2_len, errp);
+}
+
+int qcrypto_akcipher_max_plaintext_len(QCryptoAkCipher *akcipher)
+{
+    return akcipher->max_plaintext_len;
+}
+
+int qcrypto_akcipher_max_ciphertext_len(QCryptoAkCipher *akcipher)
+{
+    return akcipher->max_ciphertext_len;
+}
+
+int qcrypto_akcipher_max_signature_len(QCryptoAkCipher *akcipher)
+{
+    return akcipher->max_signature_len;
+}
+
+int qcrypto_akcipher_max_dgst_len(QCryptoAkCipher *akcipher)
+{
+    return akcipher->max_dgst_len;
+}
+
+void qcrypto_akcipher_free(QCryptoAkCipher *akcipher)
+{
+    const QCryptoAkCipherDriver *drv = akcipher->driver;
+
+    drv->free(akcipher);
+}
+
+int qcrypto_akcipher_export_p8info(const QCryptoAkCipherOptions *opts,
+                                   uint8_t *key, size_t keylen,
+                                   uint8_t **dst, size_t *dst_len,
+                                   Error **errp)
+{
+    switch (opts->alg) {
+    case QCRYPTO_AKCIPHER_ALG_RSA:
+        qcrypto_akcipher_rsakey_export_p8info(key, keylen, dst, dst_len);
+        return 0;
+
+    default:
+        error_setg(errp, "Unsupported algorithm: %u", opts->alg);
+        return -1;
+    }
+}
diff --git a/crypto/akcipherpriv.h b/crypto/akcipherpriv.h
new file mode 100644 (file)
index 0000000..739f639
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * QEMU Crypto asymmetric algorithms
+ *
+ * Copyright (c) 2022 Bytedance
+ * Author: zhenwei pi <pizhenwei@bytedance.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QCRYPTO_AKCIPHERPRIV_H
+#define QCRYPTO_AKCIPHERPRIV_H
+
+#include "qapi/qapi-types-crypto.h"
+
+typedef struct QCryptoAkCipherDriver QCryptoAkCipherDriver;
+
+struct QCryptoAkCipher {
+    QCryptoAkCipherAlgorithm alg;
+    QCryptoAkCipherKeyType type;
+    int max_plaintext_len;
+    int max_ciphertext_len;
+    int max_signature_len;
+    int max_dgst_len;
+    QCryptoAkCipherDriver *driver;
+};
+
+struct QCryptoAkCipherDriver {
+    int (*encrypt)(QCryptoAkCipher *akcipher,
+                   const void *in, size_t in_len,
+                   void *out, size_t out_len, Error **errp);
+    int (*decrypt)(QCryptoAkCipher *akcipher,
+                   const void *out, size_t out_len,
+                   void *in, size_t in_len, Error **errp);
+    int (*sign)(QCryptoAkCipher *akcipher,
+                const void *in, size_t in_len,
+                void *out, size_t out_len, Error **errp);
+    int (*verify)(QCryptoAkCipher *akcipher,
+                  const void *in, size_t in_len,
+                  const void *in2, size_t in2_len, Error **errp);
+    void (*free)(QCryptoAkCipher *akcipher);
+};
+
+#endif /* QCRYPTO_AKCIPHER_H */
diff --git a/crypto/block-luks-priv.h b/crypto/block-luks-priv.h
new file mode 100644 (file)
index 0000000..8fc967a
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * QEMU Crypto block device encryption LUKS format
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qapi/error.h"
+#include "qemu/bswap.h"
+
+#include "block-luks.h"
+
+#include "crypto/hash.h"
+#include "crypto/afsplit.h"
+#include "crypto/pbkdf.h"
+#include "crypto/secret.h"
+#include "crypto/random.h"
+#include "qemu/uuid.h"
+
+#include "qemu/bitmap.h"
+
+/*
+ * Reference for the LUKS format implemented here is
+ *
+ *   docs/on-disk-format.pdf
+ *
+ * in 'cryptsetup' package source code
+ *
+ * This file implements the 1.2.1 specification, dated
+ * Oct 16, 2011.
+ */
+
+typedef struct QCryptoBlockLUKSHeader QCryptoBlockLUKSHeader;
+typedef struct QCryptoBlockLUKSKeySlot QCryptoBlockLUKSKeySlot;
+
+
+/* The following constants are all defined by the LUKS spec */
+#define QCRYPTO_BLOCK_LUKS_VERSION 1
+
+#define QCRYPTO_BLOCK_LUKS_MAGIC_LEN 6
+#define QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN 32
+#define QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN 32
+#define QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN 32
+#define QCRYPTO_BLOCK_LUKS_DIGEST_LEN 20
+#define QCRYPTO_BLOCK_LUKS_SALT_LEN 32
+#define QCRYPTO_BLOCK_LUKS_UUID_LEN 40
+#define QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS 8
+#define QCRYPTO_BLOCK_LUKS_STRIPES 4000
+#define QCRYPTO_BLOCK_LUKS_MIN_SLOT_KEY_ITERS 1000
+#define QCRYPTO_BLOCK_LUKS_MIN_MASTER_KEY_ITERS 1000
+#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET 4096
+
+#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_DISABLED 0x0000DEAD
+#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED 0x00AC71F3
+
+#define QCRYPTO_BLOCK_LUKS_SECTOR_SIZE 512LL
+
+#define QCRYPTO_BLOCK_LUKS_DEFAULT_ITER_TIME_MS 2000
+#define QCRYPTO_BLOCK_LUKS_ERASE_ITERATIONS 40
+
+static const char qcrypto_block_luks_magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN] = {
+    'L', 'U', 'K', 'S', 0xBA, 0xBE
+};
+
+/*
+ * This struct is written to disk in big-endian format,
+ * but operated upon in native-endian format.
+ */
+struct QCryptoBlockLUKSKeySlot {
+    /* state of keyslot, enabled/disable */
+    uint32_t active;
+    /* iterations for PBKDF2 */
+    uint32_t iterations;
+    /* salt for PBKDF2 */
+    uint8_t salt[QCRYPTO_BLOCK_LUKS_SALT_LEN];
+    /* start sector of key material */
+    uint32_t key_offset_sector;
+    /* number of anti-forensic stripes */
+    uint32_t stripes;
+};
+
+/*
+ * This struct is written to disk in big-endian format,
+ * but operated upon in native-endian format.
+ */
+struct QCryptoBlockLUKSHeader {
+    /* 'L', 'U', 'K', 'S', '0xBA', '0xBE' */
+    char magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN];
+
+    /* LUKS version, currently 1 */
+    uint16_t version;
+
+    /* cipher name specification (aes, etc) */
+    char cipher_name[QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN];
+
+    /* cipher mode specification (cbc-plain, xts-essiv:sha256, etc) */
+    char cipher_mode[QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN];
+
+    /* hash specification (sha256, etc) */
+    char hash_spec[QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN];
+
+    /* start offset of the volume data (in 512 byte sectors) */
+    uint32_t payload_offset_sector;
+
+    /* Number of key bytes */
+    uint32_t master_key_len;
+
+    /* master key checksum after PBKDF2 */
+    uint8_t master_key_digest[QCRYPTO_BLOCK_LUKS_DIGEST_LEN];
+
+    /* salt for master key PBKDF2 */
+    uint8_t master_key_salt[QCRYPTO_BLOCK_LUKS_SALT_LEN];
+
+    /* iterations for master key PBKDF2 */
+    uint32_t master_key_iterations;
+
+    /* UUID of the partition in standard ASCII representation */
+    uint8_t uuid[QCRYPTO_BLOCK_LUKS_UUID_LEN];
+
+    /* key slots */
+    QCryptoBlockLUKSKeySlot key_slots[QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS];
+};
+
+
+void
+qcrypto_block_luks_to_disk_endian(QCryptoBlockLUKSHeader *hdr);
+void
+qcrypto_block_luks_from_disk_endian(QCryptoBlockLUKSHeader *hdr);
index 564caa10949bcbbf7de8f2cc04f81fc636372969..fb01ec38bbf00e29cadae5deeee1e4304d9a1e85 100644 (file)
@@ -23,6 +23,7 @@
 #include "qemu/bswap.h"
 
 #include "block-luks.h"
+#include "block-luks-priv.h"
 
 #include "crypto/hash.h"
 #include "crypto/afsplit.h"
@@ -31,7 +32,6 @@
 #include "crypto/random.h"
 #include "qemu/uuid.h"
 
-#include "qemu/coroutine.h"
 #include "qemu/bitmap.h"
 
 /*
  */
 
 typedef struct QCryptoBlockLUKS QCryptoBlockLUKS;
-typedef struct QCryptoBlockLUKSHeader QCryptoBlockLUKSHeader;
-typedef struct QCryptoBlockLUKSKeySlot QCryptoBlockLUKSKeySlot;
-
-
-/* The following constants are all defined by the LUKS spec */
-#define QCRYPTO_BLOCK_LUKS_VERSION 1
-
-#define QCRYPTO_BLOCK_LUKS_MAGIC_LEN 6
-#define QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN 32
-#define QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN 32
-#define QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN 32
-#define QCRYPTO_BLOCK_LUKS_DIGEST_LEN 20
-#define QCRYPTO_BLOCK_LUKS_SALT_LEN 32
-#define QCRYPTO_BLOCK_LUKS_UUID_LEN 40
-#define QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS 8
-#define QCRYPTO_BLOCK_LUKS_STRIPES 4000
-#define QCRYPTO_BLOCK_LUKS_MIN_SLOT_KEY_ITERS 1000
-#define QCRYPTO_BLOCK_LUKS_MIN_MASTER_KEY_ITERS 1000
-#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET 4096
-
-#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_DISABLED 0x0000DEAD
-#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED 0x00AC71F3
-
-#define QCRYPTO_BLOCK_LUKS_SECTOR_SIZE 512LL
-
-#define QCRYPTO_BLOCK_LUKS_DEFAULT_ITER_TIME_MS 2000
-#define QCRYPTO_BLOCK_LUKS_ERASE_ITERATIONS 40
-
-static const char qcrypto_block_luks_magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN] = {
-    'L', 'U', 'K', 'S', 0xBA, 0xBE
-};
 
 typedef struct QCryptoBlockLUKSNameMap QCryptoBlockLUKSNameMap;
 struct QCryptoBlockLUKSNameMap {
@@ -134,69 +103,7 @@ qcrypto_block_luks_cipher_name_map[] = {
     { "twofish", qcrypto_block_luks_cipher_size_map_twofish },
 };
 
-
-/*
- * This struct is written to disk in big-endian format,
- * but operated upon in native-endian format.
- */
-struct QCryptoBlockLUKSKeySlot {
-    /* state of keyslot, enabled/disable */
-    uint32_t active;
-    /* iterations for PBKDF2 */
-    uint32_t iterations;
-    /* salt for PBKDF2 */
-    uint8_t salt[QCRYPTO_BLOCK_LUKS_SALT_LEN];
-    /* start sector of key material */
-    uint32_t key_offset_sector;
-    /* number of anti-forensic stripes */
-    uint32_t stripes;
-};
-
 QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSKeySlot) != 48);
-
-
-/*
- * This struct is written to disk in big-endian format,
- * but operated upon in native-endian format.
- */
-struct QCryptoBlockLUKSHeader {
-    /* 'L', 'U', 'K', 'S', '0xBA', '0xBE' */
-    char magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN];
-
-    /* LUKS version, currently 1 */
-    uint16_t version;
-
-    /* cipher name specification (aes, etc) */
-    char cipher_name[QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN];
-
-    /* cipher mode specification (cbc-plain, xts-essiv:sha256, etc) */
-    char cipher_mode[QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN];
-
-    /* hash specification (sha256, etc) */
-    char hash_spec[QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN];
-
-    /* start offset of the volume data (in 512 byte sectors) */
-    uint32_t payload_offset_sector;
-
-    /* Number of key bytes */
-    uint32_t master_key_len;
-
-    /* master key checksum after PBKDF2 */
-    uint8_t master_key_digest[QCRYPTO_BLOCK_LUKS_DIGEST_LEN];
-
-    /* salt for master key PBKDF2 */
-    uint8_t master_key_salt[QCRYPTO_BLOCK_LUKS_SALT_LEN];
-
-    /* iterations for master key PBKDF2 */
-    uint32_t master_key_iterations;
-
-    /* UUID of the partition in standard ASCII representation */
-    uint8_t uuid[QCRYPTO_BLOCK_LUKS_UUID_LEN];
-
-    /* key slots */
-    QCryptoBlockLUKSKeySlot key_slots[QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS];
-};
-
 QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSHeader) != 592);
 
 
@@ -254,7 +161,7 @@ static int qcrypto_block_luks_cipher_name_lookup(const char *name,
         }
     }
 
-    error_setg(errp, "Algorithm %s with key size %d bytes not supported",
+    error_setg(errp, "Algorithm '%s' with key size %d bytes not supported",
                name, key_bytes);
     return 0;
 }
@@ -290,7 +197,7 @@ static int qcrypto_block_luks_name_lookup(const char *name,
     int ret = qapi_enum_parse(map, name, -1, NULL);
 
     if (ret < 0) {
-        error_setg(errp, "%s %s not supported", type, name);
+        error_setg(errp, "%s '%s' not supported", type, name);
         return 0;
     }
     return ret;
@@ -337,7 +244,7 @@ qcrypto_block_luks_has_format(const uint8_t *buf,
  *
  * When calculating ESSIV IVs, the cipher length used by ESSIV
  * may be different from the cipher length used for the block
- * encryption, becauses dm-crypt uses the hash digest length
+ * encryption, because dm-crypt uses the hash digest length
  * as the key size. ie, if you have AES 128 as the block cipher
  * and SHA 256 as ESSIV hash, then ESSIV will use AES 256 as
  * the cipher since that gets a key length matching the digest
@@ -440,8 +347,53 @@ qcrypto_block_luks_splitkeylen_sectors(const QCryptoBlockLUKS *luks,
     return ROUND_UP(splitkeylen_sectors, header_sectors);
 }
 
+
+void
+qcrypto_block_luks_to_disk_endian(QCryptoBlockLUKSHeader *hdr)
+{
+    size_t i;
+
+    /*
+     * Everything on disk uses Big Endian (tm), so flip header fields
+     * before writing them
+     */
+    cpu_to_be16s(&hdr->version);
+    cpu_to_be32s(&hdr->payload_offset_sector);
+    cpu_to_be32s(&hdr->master_key_len);
+    cpu_to_be32s(&hdr->master_key_iterations);
+
+    for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
+        cpu_to_be32s(&hdr->key_slots[i].active);
+        cpu_to_be32s(&hdr->key_slots[i].iterations);
+        cpu_to_be32s(&hdr->key_slots[i].key_offset_sector);
+        cpu_to_be32s(&hdr->key_slots[i].stripes);
+    }
+}
+
+void
+qcrypto_block_luks_from_disk_endian(QCryptoBlockLUKSHeader *hdr)
+{
+    size_t i;
+
+    /*
+     * The header is always stored in big-endian format, so
+     * convert everything to native
+     */
+    be16_to_cpus(&hdr->version);
+    be32_to_cpus(&hdr->payload_offset_sector);
+    be32_to_cpus(&hdr->master_key_len);
+    be32_to_cpus(&hdr->master_key_iterations);
+
+    for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
+        be32_to_cpus(&hdr->key_slots[i].active);
+        be32_to_cpus(&hdr->key_slots[i].iterations);
+        be32_to_cpus(&hdr->key_slots[i].key_offset_sector);
+        be32_to_cpus(&hdr->key_slots[i].stripes);
+    }
+}
+
 /*
- * Stores the main LUKS header, taking care of endianess
+ * Stores the main LUKS header, taking care of endianness
  */
 static int
 qcrypto_block_luks_store_header(QCryptoBlock *block,
@@ -451,28 +403,13 @@ qcrypto_block_luks_store_header(QCryptoBlock *block,
 {
     const QCryptoBlockLUKS *luks = block->opaque;
     Error *local_err = NULL;
-    size_t i;
     g_autofree QCryptoBlockLUKSHeader *hdr_copy = NULL;
 
     /* Create a copy of the header */
     hdr_copy = g_new0(QCryptoBlockLUKSHeader, 1);
     memcpy(hdr_copy, &luks->header, sizeof(QCryptoBlockLUKSHeader));
 
-    /*
-     * Everything on disk uses Big Endian (tm), so flip header fields
-     * before writing them
-     */
-    cpu_to_be16s(&hdr_copy->version);
-    cpu_to_be32s(&hdr_copy->payload_offset_sector);
-    cpu_to_be32s(&hdr_copy->master_key_len);
-    cpu_to_be32s(&hdr_copy->master_key_iterations);
-
-    for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
-        cpu_to_be32s(&hdr_copy->key_slots[i].active);
-        cpu_to_be32s(&hdr_copy->key_slots[i].iterations);
-        cpu_to_be32s(&hdr_copy->key_slots[i].key_offset_sector);
-        cpu_to_be32s(&hdr_copy->key_slots[i].stripes);
-    }
+    qcrypto_block_luks_to_disk_endian(hdr_copy);
 
     /* Write out the partition header and key slot headers */
     writefunc(block, 0, (const uint8_t *)hdr_copy, sizeof(*hdr_copy),
@@ -486,7 +423,7 @@ qcrypto_block_luks_store_header(QCryptoBlock *block,
 }
 
 /*
- * Loads the main LUKS header,and byteswaps it to native endianess
+ * Loads the main LUKS header, and byteswaps it to native endianness
  * And run basic sanity checks on it
  */
 static int
@@ -495,8 +432,7 @@ qcrypto_block_luks_load_header(QCryptoBlock *block,
                                 void *opaque,
                                 Error **errp)
 {
-    ssize_t rv;
-    size_t i;
+    int rv;
     QCryptoBlockLUKS *luks = block->opaque;
 
     /*
@@ -512,21 +448,7 @@ qcrypto_block_luks_load_header(QCryptoBlock *block,
         return rv;
     }
 
-    /*
-     * The header is always stored in big-endian format, so
-     * convert everything to native
-     */
-    be16_to_cpus(&luks->header.version);
-    be32_to_cpus(&luks->header.payload_offset_sector);
-    be32_to_cpus(&luks->header.master_key_len);
-    be32_to_cpus(&luks->header.master_key_iterations);
-
-    for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
-        be32_to_cpus(&luks->header.key_slots[i].active);
-        be32_to_cpus(&luks->header.key_slots[i].iterations);
-        be32_to_cpus(&luks->header.key_slots[i].key_offset_sector);
-        be32_to_cpus(&luks->header.key_slots[i].stripes);
-    }
+    qcrypto_block_luks_from_disk_endian(&luks->header);
 
     return 0;
 }
@@ -554,6 +476,36 @@ qcrypto_block_luks_check_header(const QCryptoBlockLUKS *luks, Error **errp)
         return -1;
     }
 
+    if (!memchr(luks->header.cipher_name, '\0',
+                sizeof(luks->header.cipher_name))) {
+        error_setg(errp, "LUKS header cipher name is not NUL terminated");
+        return -1;
+    }
+
+    if (!memchr(luks->header.cipher_mode, '\0',
+                sizeof(luks->header.cipher_mode))) {
+        error_setg(errp, "LUKS header cipher mode is not NUL terminated");
+        return -1;
+    }
+
+    if (!memchr(luks->header.hash_spec, '\0',
+                sizeof(luks->header.hash_spec))) {
+        error_setg(errp, "LUKS header hash spec is not NUL terminated");
+        return -1;
+    }
+
+    if (luks->header.payload_offset_sector <
+        DIV_ROUND_UP(QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET,
+                     QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)) {
+        error_setg(errp, "LUKS payload is overlapping with the header");
+        return -1;
+    }
+
+    if (luks->header.master_key_iterations == 0) {
+        error_setg(errp, "LUKS key iteration count is zero");
+        return -1;
+    }
+
     /* Check all keyslots for corruption  */
     for (i = 0 ; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS ; i++) {
 
@@ -564,8 +516,9 @@ qcrypto_block_luks_check_header(const QCryptoBlockLUKS *luks, Error **errp)
                                                    header_sectors,
                                                    slot1->stripes);
 
-        if (slot1->stripes == 0) {
-            error_setg(errp, "Keyslot %zu is corrupted (stripes == 0)", i);
+        if (slot1->stripes != QCRYPTO_BLOCK_LUKS_STRIPES) {
+            error_setg(errp, "Keyslot %zu is corrupted (stripes %d != %d)",
+                       i, slot1->stripes, QCRYPTO_BLOCK_LUKS_STRIPES);
             return -1;
         }
 
@@ -576,6 +529,20 @@ qcrypto_block_luks_check_header(const QCryptoBlockLUKS *luks, Error **errp)
             return -1;
         }
 
+        if (slot1->active == QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED &&
+            slot1->iterations == 0) {
+            error_setg(errp, "Keyslot %zu iteration count is zero", i);
+            return -1;
+        }
+
+        if (start1 < DIV_ROUND_UP(QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET,
+                                  QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)) {
+            error_setg(errp,
+                       "Keyslot %zu is overlapping with the LUKS header",
+                       i);
+            return -1;
+        }
+
         if (start1 + len1 > luks->header.payload_offset_sector) {
             error_setg(errp,
                        "Keyslot %zu is overlapping with the encrypted payload",
@@ -624,7 +591,7 @@ qcrypto_block_luks_parse_header(QCryptoBlockLUKS *luks, Error **errp)
      */
     ivgen_name = strchr(cipher_mode, '-');
     if (!ivgen_name) {
-        error_setg(errp, "Unexpected cipher mode string format %s",
+        error_setg(errp, "Unexpected cipher mode string format '%s'",
                    luks->header.cipher_mode);
         return -1;
     }
@@ -739,14 +706,14 @@ qcrypto_block_luks_store_key(QCryptoBlock *block,
 
     assert(slot_idx < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS);
     slot = &luks->header.key_slots[slot_idx];
+    splitkeylen = luks->header.master_key_len * slot->stripes;
+
     if (qcrypto_random_bytes(slot->salt,
                              QCRYPTO_BLOCK_LUKS_SALT_LEN,
                              errp) < 0) {
         goto cleanup;
     }
 
-    splitkeylen = luks->header.master_key_len * slot->stripes;
-
     /*
      * Determine how many iterations are required to
      * hash the user password while consuming 1 second of compute
@@ -856,7 +823,7 @@ qcrypto_block_luks_store_key(QCryptoBlock *block,
                   QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
                   splitkey, splitkeylen,
                   opaque,
-                  errp) != splitkeylen) {
+                  errp) < 0) {
         goto cleanup;
     }
 
@@ -903,7 +870,7 @@ qcrypto_block_luks_load_key(QCryptoBlock *block,
     g_autofree uint8_t *splitkey = NULL;
     size_t splitkeylen;
     g_autofree uint8_t *possiblekey = NULL;
-    ssize_t rv;
+    int rv;
     g_autoptr(QCryptoCipher) cipher = NULL;
     uint8_t keydigest[QCRYPTO_BLOCK_LUKS_DIGEST_LEN];
     g_autoptr(QCryptoIVGen) ivgen = NULL;
@@ -1193,7 +1160,7 @@ qcrypto_block_luks_erase_key(QCryptoBlock *block,
                       garbagesplitkey,
                       splitkeylen,
                       opaque,
-                      &local_err) != splitkeylen) {
+                      &local_err) < 0) {
             error_propagate(errp, local_err);
             return -1;
         }
@@ -1629,13 +1596,13 @@ qcrypto_block_luks_amend_add_keyslot(QCryptoBlock *block,
     g_autofree char *new_password = NULL;
     g_autofree uint8_t *master_key = NULL;
 
-    char *secret = opts_luks->has_secret ? opts_luks->secret : luks->secret;
+    char *secret = opts_luks->secret ?: luks->secret;
 
-    if (!opts_luks->has_new_secret) {
+    if (!opts_luks->new_secret) {
         error_setg(errp, "'new-secret' is required to activate a keyslot");
         return -1;
     }
-    if (opts_luks->has_old_secret) {
+    if (opts_luks->old_secret) {
         error_setg(errp,
                    "'old-secret' must not be given when activating keyslots");
         return -1;
@@ -1709,7 +1676,7 @@ qcrypto_block_luks_amend_erase_keyslots(QCryptoBlock *block,
     g_autofree uint8_t *tmpkey = NULL;
     g_autofree char *old_password = NULL;
 
-    if (opts_luks->has_new_secret) {
+    if (opts_luks->new_secret) {
         error_setg(errp,
                    "'new-secret' must not be given when erasing keyslots");
         return -1;
@@ -1719,14 +1686,14 @@ qcrypto_block_luks_amend_erase_keyslots(QCryptoBlock *block,
                    "'iter-time' must not be given when erasing keyslots");
         return -1;
     }
-    if (opts_luks->has_secret) {
+    if (opts_luks->secret) {
         error_setg(errp,
                    "'secret' must not be given when erasing keyslots");
         return -1;
     }
 
     /* Load the old password if given */
-    if (opts_luks->has_old_secret) {
+    if (opts_luks->old_secret) {
         old_password = qcrypto_secret_lookup_as_utf8(opts_luks->old_secret,
                                                      errp);
         if (!old_password) {
@@ -1751,7 +1718,7 @@ qcrypto_block_luks_amend_erase_keyslots(QCryptoBlock *block,
             return -1;
         }
 
-        if (opts_luks->has_old_secret) {
+        if (opts_luks->old_secret) {
             int rv = qcrypto_block_luks_load_key(block,
                                                  keyslot,
                                                  old_password,
@@ -1793,7 +1760,7 @@ qcrypto_block_luks_amend_erase_keyslots(QCryptoBlock *block,
         }
 
     /* Erase all keyslots that match the given old password */
-    } else if (opts_luks->has_old_secret) {
+    } else if (opts_luks->old_secret) {
 
         unsigned long slots_to_erase_bitmap = 0;
         size_t i;
@@ -1885,7 +1852,7 @@ static int qcrypto_block_luks_get_info(QCryptoBlock *block,
 {
     QCryptoBlockLUKS *luks = block->opaque;
     QCryptoBlockInfoLUKSSlot *slot;
-    QCryptoBlockInfoLUKSSlotList *slots = NULL, **prev = &info->u.luks.slots;
+    QCryptoBlockInfoLUKSSlotList **tail = &info->u.luks.slots;
     size_t i;
 
     info->u.luks.cipher_alg = luks->cipher_alg;
@@ -1902,10 +1869,7 @@ static int qcrypto_block_luks_get_info(QCryptoBlock *block,
                                   sizeof(luks->header.uuid));
 
     for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
-        slots = g_new0(QCryptoBlockInfoLUKSSlotList, 1);
-        *prev = slots;
-
-        slots->value = slot = g_new0(QCryptoBlockInfoLUKSSlot, 1);
+        slot = g_new0(QCryptoBlockInfoLUKSSlot, 1);
         slot->active = luks->header.key_slots[i].active ==
             QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED;
         slot->key_offset = luks->header.key_slots[i].key_offset_sector
@@ -1917,7 +1881,7 @@ static int qcrypto_block_luks_get_info(QCryptoBlock *block,
             slot->stripes = luks->header.key_slots[i].stripes;
         }
 
-        prev = &slots->next;
+        QAPI_LIST_APPEND(tail, slot);
     }
 
     return 0;
index eb057948b591626bb7a84fdc60006d4eebb2a176..7bb4b74a37cdeeb04fc2f65cccb89fda7dd66c6f 100644 (file)
@@ -115,7 +115,7 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
 }
 
 
-static ssize_t qcrypto_block_headerlen_hdr_init_func(QCryptoBlock *block,
+static int qcrypto_block_headerlen_hdr_init_func(QCryptoBlock *block,
         size_t headerlen, void *opaque, Error **errp)
 {
     size_t *headerlenp = opaque;
@@ -126,12 +126,12 @@ static ssize_t qcrypto_block_headerlen_hdr_init_func(QCryptoBlock *block,
 }
 
 
-static ssize_t qcrypto_block_headerlen_hdr_write_func(QCryptoBlock *block,
+static int qcrypto_block_headerlen_hdr_write_func(QCryptoBlock *block,
         size_t offset, const uint8_t *buf, size_t buflen,
         void *opaque, Error **errp)
 {
     /* Discard the bytes, we're not actually writing to an image */
-    return buflen;
+    return 0;
 }
 
 
index 052355a8a9219be6bca07ce10ee5b282af7665a6..3df8fc54c051e3d82e5dbbb38ee2a4a5d6d2952c 100644 (file)
@@ -12,7 +12,6 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/sockets.h"
-#include "qemu-common.h"
 #include "qapi/error.h"
 #include "crypto/cipher.h"
 #include "cipherpriv.h"
@@ -84,8 +83,8 @@ qcrypto_afalg_cipher_ctx_new(QCryptoCipherAlgorithm alg,
     g_free(name);
 
     /* setkey */
-    if (qemu_setsockopt(afalg->tfmfd, SOL_ALG, ALG_SET_KEY, key,
-                        nkey) != 0) {
+    if (setsockopt(afalg->tfmfd, SOL_ALG, ALG_SET_KEY, key,
+                   nkey) != 0) {
         error_setg_errno(errp, errno, "Set key failed");
         qcrypto_afalg_comm_free(afalg);
         return NULL;
index 7597cf4a10fd208664c7082240bba49f4fc38a0d..b409089095c452faa05e5d94807c2f726ce28f02 100644 (file)
@@ -19,8 +19,6 @@
  */
 
 #include "crypto/aes.h"
-#include "crypto/desrfb.h"
-#include "crypto/xts.h"
 
 typedef struct QCryptoCipherBuiltinAESContext QCryptoCipherBuiltinAESContext;
 struct QCryptoCipherBuiltinAESContext {
@@ -32,7 +30,6 @@ typedef struct QCryptoCipherBuiltinAES QCryptoCipherBuiltinAES;
 struct QCryptoCipherBuiltinAES {
     QCryptoCipher base;
     QCryptoCipherBuiltinAESContext key;
-    QCryptoCipherBuiltinAESContext key_tweak;
     uint8_t iv[AES_BLOCK_SIZE];
 };
 
@@ -194,39 +191,6 @@ static int qcrypto_cipher_aes_decrypt_cbc(QCryptoCipher *cipher,
     return 0;
 }
 
-static int qcrypto_cipher_aes_encrypt_xts(QCryptoCipher *cipher,
-                                          const void *in, void *out,
-                                          size_t len, Error **errp)
-{
-    QCryptoCipherBuiltinAES *ctx
-        = container_of(cipher, QCryptoCipherBuiltinAES, base);
-
-    if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) {
-        return -1;
-    }
-    xts_encrypt(&ctx->key, &ctx->key_tweak,
-                do_aes_encrypt_ecb, do_aes_decrypt_ecb,
-                ctx->iv, len, out, in);
-    return 0;
-}
-
-static int qcrypto_cipher_aes_decrypt_xts(QCryptoCipher *cipher,
-                                          const void *in, void *out,
-                                          size_t len, Error **errp)
-{
-    QCryptoCipherBuiltinAES *ctx
-        = container_of(cipher, QCryptoCipherBuiltinAES, base);
-
-    if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) {
-        return -1;
-    }
-    xts_decrypt(&ctx->key, &ctx->key_tweak,
-                do_aes_encrypt_ecb, do_aes_decrypt_ecb,
-                ctx->iv, len, out, in);
-    return 0;
-}
-
-
 static int qcrypto_cipher_aes_setiv(QCryptoCipher *cipher, const uint8_t *iv,
                              size_t niv, Error **errp)
 {
@@ -257,84 +221,16 @@ static const struct QCryptoCipherDriver qcrypto_cipher_aes_driver_cbc = {
     .cipher_free = qcrypto_cipher_ctx_free,
 };
 
-static const struct QCryptoCipherDriver qcrypto_cipher_aes_driver_xts = {
-    .cipher_encrypt = qcrypto_cipher_aes_encrypt_xts,
-    .cipher_decrypt = qcrypto_cipher_aes_decrypt_xts,
-    .cipher_setiv = qcrypto_cipher_aes_setiv,
-    .cipher_free = qcrypto_cipher_ctx_free,
-};
-
-
-typedef struct QCryptoCipherBuiltinDESRFB QCryptoCipherBuiltinDESRFB;
-struct QCryptoCipherBuiltinDESRFB {
-    QCryptoCipher base;
-
-    /* C.f. alg_key_len[QCRYPTO_CIPHER_ALG_DES_RFB] */
-    uint8_t key[8];
-};
-
-static int qcrypto_cipher_encrypt_des_rfb(QCryptoCipher *cipher,
-                                          const void *in, void *out,
-                                          size_t len, Error **errp)
-{
-    QCryptoCipherBuiltinDESRFB *ctx
-        = container_of(cipher, QCryptoCipherBuiltinDESRFB, base);
-    size_t i;
-
-    if (!qcrypto_length_check(len, 8, errp)) {
-        return -1;
-    }
-
-    deskey(ctx->key, EN0);
-
-    for (i = 0; i < len; i += 8) {
-        des((void *)in + i, out + i);
-    }
-
-    return 0;
-}
-
-static int qcrypto_cipher_decrypt_des_rfb(QCryptoCipher *cipher,
-                                          const void *in, void *out,
-                                          size_t len, Error **errp)
-{
-    QCryptoCipherBuiltinDESRFB *ctx
-        = container_of(cipher, QCryptoCipherBuiltinDESRFB, base);
-    size_t i;
-
-    if (!qcrypto_length_check(len, 8, errp)) {
-        return -1;
-    }
-
-    deskey(ctx->key, DE1);
-
-    for (i = 0; i < len; i += 8) {
-        des((void *)in + i, out + i);
-    }
-
-    return 0;
-}
-
-static const struct QCryptoCipherDriver qcrypto_cipher_des_rfb_driver = {
-    .cipher_encrypt = qcrypto_cipher_encrypt_des_rfb,
-    .cipher_decrypt = qcrypto_cipher_decrypt_des_rfb,
-    .cipher_setiv = qcrypto_cipher_no_setiv,
-    .cipher_free = qcrypto_cipher_ctx_free,
-};
-
 bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
                              QCryptoCipherMode mode)
 {
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
-        return mode == QCRYPTO_CIPHER_MODE_ECB;
     case QCRYPTO_CIPHER_ALG_AES_128:
     case QCRYPTO_CIPHER_ALG_AES_192:
     case QCRYPTO_CIPHER_ALG_AES_256:
         switch (mode) {
         case QCRYPTO_CIPHER_MODE_ECB:
         case QCRYPTO_CIPHER_MODE_CBC:
-        case QCRYPTO_CIPHER_MODE_XTS:
             return true;
         default:
             return false;
@@ -356,18 +252,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
     }
 
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
-        if (mode == QCRYPTO_CIPHER_MODE_ECB) {
-            QCryptoCipherBuiltinDESRFB *ctx;
-
-            ctx = g_new0(QCryptoCipherBuiltinDESRFB, 1);
-            ctx->base.driver = &qcrypto_cipher_des_rfb_driver;
-            memcpy(ctx->key, key, sizeof(ctx->key));
-
-            return &ctx->base;
-        }
-        goto bad_mode;
-
     case QCRYPTO_CIPHER_ALG_AES_128:
     case QCRYPTO_CIPHER_ALG_AES_192:
     case QCRYPTO_CIPHER_ALG_AES_256:
@@ -382,9 +266,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
             case QCRYPTO_CIPHER_MODE_CBC:
                 drv = &qcrypto_cipher_aes_driver_cbc;
                 break;
-            case QCRYPTO_CIPHER_MODE_XTS:
-                drv = &qcrypto_cipher_aes_driver_xts;
-                break;
             default:
                 goto bad_mode;
             }
@@ -392,19 +273,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
             ctx = g_new0(QCryptoCipherBuiltinAES, 1);
             ctx->base.driver = drv;
 
-            if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-                nkey /= 2;
-                if (AES_set_encrypt_key(key + nkey, nkey * 8,
-                                        &ctx->key_tweak.enc)) {
-                    error_setg(errp, "Failed to set encryption key");
-                    goto error;
-                }
-                if (AES_set_decrypt_key(key + nkey, nkey * 8,
-                                        &ctx->key_tweak.dec)) {
-                    error_setg(errp, "Failed to set decryption key");
-                    goto error;
-                }
-            }
             if (AES_set_encrypt_key(key, nkey * 8, &ctx->key.enc)) {
                 error_setg(errp, "Failed to set encryption key");
                 goto error;
index 42d4137534f271147b4cd0cec8e556ef5904676a..a6a0117717f5bc1061e91612981ee26eaa8a1d3b 100644 (file)
  *
  */
 
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-#include "crypto/xts.h"
-#endif
-
 #include <gcrypt.h>
 
 bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
                              QCryptoCipherMode mode)
 {
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
+    case QCRYPTO_CIPHER_ALG_DES:
     case QCRYPTO_CIPHER_ALG_3DES:
     case QCRYPTO_CIPHER_ALG_AES_128:
     case QCRYPTO_CIPHER_ALG_AES_192:
@@ -59,10 +55,6 @@ typedef struct QCryptoCipherGcrypt {
     QCryptoCipher base;
     gcry_cipher_hd_t handle;
     size_t blocksize;
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-    gcry_cipher_hd_t tweakhandle;
-    uint8_t iv[XTS_BLOCK_SIZE];
-#endif
 } QCryptoCipherGcrypt;
 
 
@@ -178,90 +170,6 @@ static const struct QCryptoCipherDriver qcrypto_gcrypt_ctr_driver = {
     .cipher_free = qcrypto_gcrypt_ctx_free,
 };
 
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-static void qcrypto_gcrypt_xts_ctx_free(QCryptoCipher *cipher)
-{
-    QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base);
-
-    gcry_cipher_close(ctx->tweakhandle);
-    qcrypto_gcrypt_ctx_free(cipher);
-}
-
-static void qcrypto_gcrypt_xts_wrape(const void *ctx, size_t length,
-                                     uint8_t *dst, const uint8_t *src)
-{
-    gcry_error_t err;
-    err = gcry_cipher_encrypt((gcry_cipher_hd_t)ctx, dst, length, src, length);
-    g_assert(err == 0);
-}
-
-static void qcrypto_gcrypt_xts_wrapd(const void *ctx, size_t length,
-                                     uint8_t *dst, const uint8_t *src)
-{
-    gcry_error_t err;
-    err = gcry_cipher_decrypt((gcry_cipher_hd_t)ctx, dst, length, src, length);
-    g_assert(err == 0);
-}
-
-static int qcrypto_gcrypt_xts_encrypt(QCryptoCipher *cipher, const void *in,
-                                      void *out, size_t len, Error **errp)
-{
-    QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base);
-
-    if (len & (ctx->blocksize - 1)) {
-        error_setg(errp, "Length %zu must be a multiple of block size %zu",
-                   len, ctx->blocksize);
-        return -1;
-    }
-
-    xts_encrypt(ctx->handle, ctx->tweakhandle,
-                qcrypto_gcrypt_xts_wrape, qcrypto_gcrypt_xts_wrapd,
-                ctx->iv, len, out, in);
-    return 0;
-}
-
-static int qcrypto_gcrypt_xts_decrypt(QCryptoCipher *cipher, const void *in,
-                                      void *out, size_t len, Error **errp)
-{
-    QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base);
-
-    if (len & (ctx->blocksize - 1)) {
-        error_setg(errp, "Length %zu must be a multiple of block size %zu",
-                   len, ctx->blocksize);
-        return -1;
-    }
-
-    xts_decrypt(ctx->handle, ctx->tweakhandle,
-                qcrypto_gcrypt_xts_wrape, qcrypto_gcrypt_xts_wrapd,
-                ctx->iv, len, out, in);
-    return 0;
-}
-
-static int qcrypto_gcrypt_xts_setiv(QCryptoCipher *cipher,
-                                    const uint8_t *iv, size_t niv,
-                                    Error **errp)
-{
-    QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base);
-
-    if (niv != ctx->blocksize) {
-        error_setg(errp, "Expected IV size %zu not %zu",
-                   ctx->blocksize, niv);
-        return -1;
-    }
-
-    memcpy(ctx->iv, iv, niv);
-    return 0;
-}
-
-static const struct QCryptoCipherDriver qcrypto_gcrypt_xts_driver = {
-    .cipher_encrypt = qcrypto_gcrypt_xts_encrypt,
-    .cipher_decrypt = qcrypto_gcrypt_xts_decrypt,
-    .cipher_setiv = qcrypto_gcrypt_xts_setiv,
-    .cipher_free = qcrypto_gcrypt_xts_ctx_free,
-};
-#endif /* CONFIG_QEMU_PRIVATE_XTS */
-
-
 static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
                                              QCryptoCipherMode mode,
                                              const uint8_t *key,
@@ -278,7 +186,7 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
     }
 
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
+    case QCRYPTO_CIPHER_ALG_DES:
         gcryalg = GCRY_CIPHER_DES;
         break;
     case QCRYPTO_CIPHER_ALG_3DES:
@@ -323,12 +231,7 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
         gcrymode = GCRY_CIPHER_MODE_ECB;
         break;
     case QCRYPTO_CIPHER_MODE_XTS:
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-        drv = &qcrypto_gcrypt_xts_driver;
-        gcrymode = GCRY_CIPHER_MODE_ECB;
-#else
         gcrymode = GCRY_CIPHER_MODE_XTS;
-#endif
         break;
     case QCRYPTO_CIPHER_MODE_CBC:
         gcrymode = GCRY_CIPHER_MODE_CBC;
@@ -354,44 +257,7 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
     }
     ctx->blocksize = gcry_cipher_get_algo_blklen(gcryalg);
 
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-        if (ctx->blocksize != XTS_BLOCK_SIZE) {
-            error_setg(errp,
-                       "Cipher block size %zu must equal XTS block size %d",
-                      ctx->blocksize, XTS_BLOCK_SIZE);
-            goto error;
-        }
-        err = gcry_cipher_open(&ctx->tweakhandle, gcryalg, gcrymode, 0);
-        if (err != 0) {
-            error_setg(errp, "Cannot initialize cipher: %s",
-                       gcry_strerror(err));
-            goto error;
-        }
-    }
-#endif
-
-    if (alg == QCRYPTO_CIPHER_ALG_DES_RFB) {
-        /* We're using standard DES cipher from gcrypt, so we need
-         * to munge the key so that the results are the same as the
-         * bizarre RFB variant of DES :-)
-         */
-        uint8_t *rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey);
-        err = gcry_cipher_setkey(ctx->handle, rfbkey, nkey);
-        g_free(rfbkey);
-    } else {
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-            nkey /= 2;
-            err = gcry_cipher_setkey(ctx->tweakhandle, key + nkey, nkey);
-            if (err != 0) {
-                error_setg(errp, "Cannot set key: %s", gcry_strerror(err));
-                goto error;
-            }
-        }
-#endif
-        err = gcry_cipher_setkey(ctx->handle, key, nkey);
-    }
+    err = gcry_cipher_setkey(ctx->handle, key, nkey);
     if (err != 0) {
         error_setg(errp, "Cannot set key: %s", gcry_strerror(err));
         goto error;
@@ -400,9 +266,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
     return &ctx->base;
 
  error:
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-    gcry_cipher_close(ctx->tweakhandle);
-#endif
     gcry_cipher_close(ctx->handle);
     g_free(ctx);
     return NULL;
diff --git a/crypto/cipher-gnutls.c.inc b/crypto/cipher-gnutls.c.inc
new file mode 100644 (file)
index 0000000..d3e231c
--- /dev/null
@@ -0,0 +1,335 @@
+/*
+ * QEMU Crypto cipher gnutls algorithms
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "cipherpriv.h"
+
+#include <gnutls/crypto.h>
+
+#if GNUTLS_VERSION_NUMBER >= 0x030608
+#define QEMU_GNUTLS_XTS
+#endif
+
+bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
+                             QCryptoCipherMode mode)
+{
+
+    switch (mode) {
+    case QCRYPTO_CIPHER_MODE_ECB:
+    case QCRYPTO_CIPHER_MODE_CBC:
+        switch (alg) {
+        case QCRYPTO_CIPHER_ALG_AES_128:
+        case QCRYPTO_CIPHER_ALG_AES_192:
+        case QCRYPTO_CIPHER_ALG_AES_256:
+        case QCRYPTO_CIPHER_ALG_DES:
+        case QCRYPTO_CIPHER_ALG_3DES:
+            return true;
+        default:
+            return false;
+        }
+#ifdef QEMU_GNUTLS_XTS
+    case QCRYPTO_CIPHER_MODE_XTS:
+        switch (alg) {
+        case QCRYPTO_CIPHER_ALG_AES_128:
+        case QCRYPTO_CIPHER_ALG_AES_256:
+            return true;
+        default:
+            return false;
+        }
+#endif
+    default:
+        return false;
+    }
+}
+
+typedef struct QCryptoCipherGnutls QCryptoCipherGnutls;
+struct QCryptoCipherGnutls {
+    QCryptoCipher base;
+    gnutls_cipher_hd_t handle; /* XTS & CBC mode */
+    gnutls_cipher_algorithm_t galg; /* ECB mode */
+    guint8 *key; /* ECB mode */
+    size_t nkey; /* ECB mode */
+    size_t blocksize;
+};
+
+
+static void
+qcrypto_gnutls_cipher_free(QCryptoCipher *cipher)
+{
+    QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base);
+
+    g_free(ctx->key);
+    if (ctx->handle) {
+        gnutls_cipher_deinit(ctx->handle);
+    }
+    g_free(ctx);
+}
+
+
+static int
+qcrypto_gnutls_cipher_encrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
+{
+    QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base);
+    int err;
+
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
+    if (ctx->handle) { /* CBC / XTS mode */
+        err = gnutls_cipher_encrypt2(ctx->handle,
+                                     in, len,
+                                     out, len);
+        if (err != 0) {
+            error_setg(errp, "Cannot encrypt data: %s",
+                       gnutls_strerror(err));
+            return -1;
+        }
+    } else { /* ECB mode very inefficiently faked with CBC */
+        g_autofree unsigned char *iv = g_new0(unsigned char, ctx->blocksize);
+        while (len) {
+            gnutls_cipher_hd_t handle;
+            gnutls_datum_t gkey = { (unsigned char *)ctx->key, ctx->nkey };
+            err = gnutls_cipher_init(&handle, ctx->galg, &gkey, NULL);
+            if (err != 0) {
+                error_setg(errp, "Cannot initialize cipher: %s",
+                           gnutls_strerror(err));
+                return -1;
+            }
+
+            gnutls_cipher_set_iv(handle, iv, ctx->blocksize);
+
+            err = gnutls_cipher_encrypt2(handle,
+                                         in, ctx->blocksize,
+                                         out, ctx->blocksize);
+            if (err != 0) {
+                gnutls_cipher_deinit(handle);
+                error_setg(errp, "Cannot encrypt data: %s",
+                           gnutls_strerror(err));
+                return -1;
+            }
+            gnutls_cipher_deinit(handle);
+
+            len -= ctx->blocksize;
+            in += ctx->blocksize;
+            out += ctx->blocksize;
+        }
+    }
+
+    return 0;
+}
+
+
+static int
+qcrypto_gnutls_cipher_decrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
+{
+    QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base);
+    int err;
+
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
+    if (ctx->handle) { /* CBC / XTS mode */
+        err = gnutls_cipher_decrypt2(ctx->handle,
+                                     in, len,
+                                     out, len);
+
+        if (err != 0) {
+            error_setg(errp, "Cannot decrypt data: %s",
+                       gnutls_strerror(err));
+            return -1;
+        }
+    } else { /* ECB mode very inefficiently faked with CBC */
+        g_autofree unsigned char *iv = g_new0(unsigned char, ctx->blocksize);
+        while (len) {
+            gnutls_cipher_hd_t handle;
+            gnutls_datum_t gkey = { (unsigned char *)ctx->key, ctx->nkey };
+            err = gnutls_cipher_init(&handle, ctx->galg, &gkey, NULL);
+            if (err != 0) {
+                error_setg(errp, "Cannot initialize cipher: %s",
+                           gnutls_strerror(err));
+                return -1;
+            }
+
+            gnutls_cipher_set_iv(handle, iv, ctx->blocksize);
+
+            err = gnutls_cipher_decrypt2(handle,
+                                         in, ctx->blocksize,
+                                         out, ctx->blocksize);
+            if (err != 0) {
+                gnutls_cipher_deinit(handle);
+                error_setg(errp, "Cannot encrypt data: %s",
+                           gnutls_strerror(err));
+                return -1;
+            }
+            gnutls_cipher_deinit(handle);
+
+            len -= ctx->blocksize;
+            in += ctx->blocksize;
+            out += ctx->blocksize;
+        }
+    }
+
+    return 0;
+}
+
+static int
+qcrypto_gnutls_cipher_setiv(QCryptoCipher *cipher,
+                            const uint8_t *iv, size_t niv,
+                            Error **errp)
+{
+    QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base);
+
+    if (niv != ctx->blocksize) {
+        error_setg(errp, "Expected IV size %zu not %zu",
+                   ctx->blocksize, niv);
+        return -1;
+    }
+
+    gnutls_cipher_set_iv(ctx->handle, (unsigned char *)iv, niv);
+
+    return 0;
+}
+
+
+static struct QCryptoCipherDriver gnutls_driver = {
+    .cipher_encrypt = qcrypto_gnutls_cipher_encrypt,
+    .cipher_decrypt = qcrypto_gnutls_cipher_decrypt,
+    .cipher_setiv = qcrypto_gnutls_cipher_setiv,
+    .cipher_free = qcrypto_gnutls_cipher_free,
+};
+
+static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                                             QCryptoCipherMode mode,
+                                             const uint8_t *key,
+                                             size_t nkey,
+                                             Error **errp)
+{
+    QCryptoCipherGnutls *ctx;
+    gnutls_datum_t gkey = { (unsigned char *)key, nkey };
+    gnutls_cipher_algorithm_t galg = GNUTLS_CIPHER_UNKNOWN;
+    int err;
+
+    switch (mode) {
+#ifdef QEMU_GNUTLS_XTS
+    case QCRYPTO_CIPHER_MODE_XTS:
+        switch (alg) {
+        case QCRYPTO_CIPHER_ALG_AES_128:
+            galg = GNUTLS_CIPHER_AES_128_XTS;
+            break;
+        case QCRYPTO_CIPHER_ALG_AES_256:
+            galg = GNUTLS_CIPHER_AES_256_XTS;
+            break;
+        default:
+            break;
+        }
+        break;
+#endif
+
+    case QCRYPTO_CIPHER_MODE_ECB:
+    case QCRYPTO_CIPHER_MODE_CBC:
+        switch (alg) {
+        case QCRYPTO_CIPHER_ALG_AES_128:
+            galg = GNUTLS_CIPHER_AES_128_CBC;
+            break;
+        case QCRYPTO_CIPHER_ALG_AES_192:
+            galg = GNUTLS_CIPHER_AES_192_CBC;
+            break;
+        case QCRYPTO_CIPHER_ALG_AES_256:
+            galg = GNUTLS_CIPHER_AES_256_CBC;
+            break;
+        case QCRYPTO_CIPHER_ALG_DES:
+            galg = GNUTLS_CIPHER_DES_CBC;
+            break;
+        case QCRYPTO_CIPHER_ALG_3DES:
+            galg = GNUTLS_CIPHER_3DES_CBC;
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (galg == GNUTLS_CIPHER_UNKNOWN) {
+        error_setg(errp, "Unsupported cipher algorithm %s with %s mode",
+                   QCryptoCipherAlgorithm_str(alg),
+                   QCryptoCipherMode_str(mode));
+        return NULL;
+    }
+
+    if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) {
+        return NULL;
+    }
+
+    ctx = g_new0(QCryptoCipherGnutls, 1);
+    ctx->base.driver = &gnutls_driver;
+
+    if (mode == QCRYPTO_CIPHER_MODE_ECB) {
+        ctx->key = g_new0(guint8, nkey);
+        memcpy(ctx->key, key, nkey);
+        ctx->nkey = nkey;
+        ctx->galg = galg;
+    } else {
+        err = gnutls_cipher_init(&ctx->handle, galg, &gkey, NULL);
+        if (err != 0) {
+            error_setg(errp, "Cannot initialize cipher: %s",
+                       gnutls_strerror(err));
+            goto error;
+        }
+    }
+
+    if (alg == QCRYPTO_CIPHER_ALG_DES ||
+        alg == QCRYPTO_CIPHER_ALG_3DES)
+        ctx->blocksize = 8;
+    else
+        ctx->blocksize = 16;
+
+    /*
+     * Our API contract for requires iv to be optional
+     * but nettle gets unhappy when called by gnutls
+     * in this case, so we just force set a default
+     * all-zeros IV, to match behaviour of other backends.
+     */
+    if (mode != QCRYPTO_CIPHER_MODE_ECB) {
+        g_autofree unsigned char *iv = g_new0(unsigned char, ctx->blocksize);
+        gnutls_cipher_set_iv(ctx->handle, iv, ctx->blocksize);
+    }
+
+    return &ctx->base;
+
+ error:
+    qcrypto_gnutls_cipher_free(&ctx->base);
+    return NULL;
+}
index cac771e4ff7bd93a055a968aa5ad4cd0750ec9b5..24cc61f87bfc4ae7ab8ff523a4105a67bace67a1 100644 (file)
 #include <nettle/xts.h>
 #endif
 
-typedef void (*QCryptoCipherNettleFuncWrapper)(const void *ctx,
-                                               size_t length,
-                                               uint8_t *dst,
-                                               const uint8_t *src);
-
-#if CONFIG_NETTLE_VERSION_MAJOR < 3
-typedef nettle_crypt_func * QCryptoCipherNettleFuncNative;
-typedef void *       cipher_ctx_t;
-typedef unsigned     cipher_length_t;
-#define CONST_CTX
-
-#define cast5_set_key cast128_set_key
-
-#define aes128_ctx aes_ctx
-#define aes192_ctx aes_ctx
-#define aes256_ctx aes_ctx
-#define aes128_set_encrypt_key(c, k) \
-    aes_set_encrypt_key(c, 16, k)
-#define aes192_set_encrypt_key(c, k) \
-    aes_set_encrypt_key(c, 24, k)
-#define aes256_set_encrypt_key(c, k) \
-    aes_set_encrypt_key(c, 32, k)
-#define aes128_set_decrypt_key(c, k) \
-    aes_set_decrypt_key(c, 16, k)
-#define aes192_set_decrypt_key(c, k) \
-    aes_set_decrypt_key(c, 24, k)
-#define aes256_set_decrypt_key(c, k) \
-    aes_set_decrypt_key(c, 32, k)
-#define aes128_encrypt aes_encrypt
-#define aes192_encrypt aes_encrypt
-#define aes256_encrypt aes_encrypt
-#define aes128_decrypt aes_decrypt
-#define aes192_decrypt aes_decrypt
-#define aes256_decrypt aes_decrypt
-#else
-typedef nettle_cipher_func * QCryptoCipherNettleFuncNative;
-typedef const void * cipher_ctx_t;
-typedef size_t       cipher_length_t;
-#define CONST_CTX    const
-#endif
-
 static inline bool qcrypto_length_check(size_t len, size_t blocksize,
                                         Error **errp)
 {
@@ -197,12 +156,12 @@ static const struct QCryptoCipherDriver NAME##_driver_ctr = {           \
 static void NAME##_xts_wrape(const void *ctx, size_t length,            \
                              uint8_t *dst, const uint8_t *src)          \
 {                                                                       \
-    ENCRYPT((cipher_ctx_t)ctx, length, dst, src);                       \
+    ENCRYPT((const void *)ctx, length, dst, src);                       \
 }                                                                       \
 static void NAME##_xts_wrapd(const void *ctx, size_t length,            \
                              uint8_t *dst, const uint8_t *src)          \
 {                                                                       \
-    DECRYPT((cipher_ctx_t)ctx, length, dst, src);                       \
+    DECRYPT((const void *)ctx, length, dst, src);                       \
 }                                                                       \
 static int NAME##_encrypt_xts(QCryptoCipher *cipher, const void *in,    \
                               void *out, size_t len, Error **errp)      \
@@ -276,25 +235,25 @@ static const struct QCryptoCipherDriver NAME##_driver_xts = {   \
     DEFINE_XTS(NAME, TYPE, BLEN, ENCRYPT, DECRYPT)
 
 
-typedef struct QCryptoNettleDESRFB {
+typedef struct QCryptoNettleDES {
     QCryptoCipher base;
     struct des_ctx key;
     uint8_t iv[DES_BLOCK_SIZE];
-} QCryptoNettleDESRFB;
+} QCryptoNettleDES;
 
-static void des_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void des_encrypt_native(const void *ctx, size_t length,
                                uint8_t *dst, const uint8_t *src)
 {
     des_encrypt(ctx, length, dst, src);
 }
 
-static void des_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void des_decrypt_native(const void *ctx, size_t length,
                                uint8_t *dst, const uint8_t *src)
 {
     des_decrypt(ctx, length, dst, src);
 }
 
-DEFINE_ECB_CBC_CTR(qcrypto_nettle_des_rfb, QCryptoNettleDESRFB,
+DEFINE_ECB_CBC_CTR(qcrypto_nettle_des, QCryptoNettleDES,
                    DES_BLOCK_SIZE, des_encrypt_native, des_decrypt_native)
 
 
@@ -304,13 +263,13 @@ typedef struct QCryptoNettleDES3 {
     uint8_t iv[DES3_BLOCK_SIZE];
 } QCryptoNettleDES3;
 
-static void des3_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void des3_encrypt_native(const void *ctx, size_t length,
                                 uint8_t *dst, const uint8_t *src)
 {
     des3_encrypt(ctx, length, dst, src);
 }
 
-static void des3_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void des3_decrypt_native(const void *ctx, size_t length,
                                 uint8_t *dst, const uint8_t *src)
 {
     des3_decrypt(ctx, length, dst, src);
@@ -327,17 +286,17 @@ typedef struct QCryptoNettleAES128 {
     struct aes128_ctx key[2], key_xts[2];
 } QCryptoNettleAES128;
 
-static void aes128_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void aes128_encrypt_native(const void *ctx, size_t length,
                                   uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes128_ctx *keys = ctx;
+    const struct aes128_ctx *keys = ctx;
     aes128_encrypt(&keys[0], length, dst, src);
 }
 
-static void aes128_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void aes128_decrypt_native(const void *ctx, size_t length,
                                   uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes128_ctx *keys = ctx;
+    const struct aes128_ctx *keys = ctx;
     aes128_decrypt(&keys[1], length, dst, src);
 }
 
@@ -353,17 +312,17 @@ typedef struct QCryptoNettleAES192 {
     struct aes192_ctx key[2], key_xts[2];
 } QCryptoNettleAES192;
 
-static void aes192_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void aes192_encrypt_native(const void *ctx, size_t length,
                                   uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes192_ctx *keys = ctx;
+    const struct aes192_ctx *keys = ctx;
     aes192_encrypt(&keys[0], length, dst, src);
 }
 
-static void aes192_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void aes192_decrypt_native(const void *ctx, size_t length,
                                   uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes192_ctx *keys = ctx;
+    const struct aes192_ctx *keys = ctx;
     aes192_decrypt(&keys[1], length, dst, src);
 }
 
@@ -379,17 +338,17 @@ typedef struct QCryptoNettleAES256 {
     struct aes256_ctx key[2], key_xts[2];
 } QCryptoNettleAES256;
 
-static void aes256_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void aes256_encrypt_native(const void *ctx, size_t length,
                                   uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes256_ctx *keys = ctx;
+    const struct aes256_ctx *keys = ctx;
     aes256_encrypt(&keys[0], length, dst, src);
 }
 
-static void aes256_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
-                               uint8_t *dst, const uint8_t *src)
+static void aes256_decrypt_native(const void *ctx, size_t length,
+                                  uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes256_ctx *keys = ctx;
+    const struct aes256_ctx *keys = ctx;
     aes256_decrypt(&keys[1], length, dst, src);
 }
 
@@ -404,13 +363,13 @@ typedef struct QCryptoNettleCAST128 {
     struct cast128_ctx key, key_xts;
 } QCryptoNettleCAST128;
 
-static void cast128_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void cast128_encrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     cast128_encrypt(ctx, length, dst, src);
 }
 
-static void cast128_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void cast128_decrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     cast128_decrypt(ctx, length, dst, src);
@@ -428,13 +387,13 @@ typedef struct QCryptoNettleSerpent {
 } QCryptoNettleSerpent;
 
 
-static void serpent_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void serpent_encrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     serpent_encrypt(ctx, length, dst, src);
 }
 
-static void serpent_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void serpent_decrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     serpent_decrypt(ctx, length, dst, src);
@@ -451,13 +410,13 @@ typedef struct QCryptoNettleTwofish {
     struct twofish_ctx key, key_xts;
 } QCryptoNettleTwofish;
 
-static void twofish_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void twofish_encrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     twofish_encrypt(ctx, length, dst, src);
 }
 
-static void twofish_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void twofish_decrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     twofish_decrypt(ctx, length, dst, src);
@@ -472,7 +431,7 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
                              QCryptoCipherMode mode)
 {
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
+    case QCRYPTO_CIPHER_ALG_DES:
     case QCRYPTO_CIPHER_ALG_3DES:
     case QCRYPTO_CIPHER_ALG_AES_128:
     case QCRYPTO_CIPHER_ALG_AES_192:
@@ -521,32 +480,28 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
     }
 
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
+    case QCRYPTO_CIPHER_ALG_DES:
         {
-            QCryptoNettleDESRFB *ctx;
+            QCryptoNettleDES *ctx;
             const QCryptoCipherDriver *drv;
-            uint8_t *rfbkey;
 
             switch (mode) {
             case QCRYPTO_CIPHER_MODE_ECB:
-                drv = &qcrypto_nettle_des_rfb_driver_ecb;
+                drv = &qcrypto_nettle_des_driver_ecb;
                 break;
             case QCRYPTO_CIPHER_MODE_CBC:
-                drv = &qcrypto_nettle_des_rfb_driver_cbc;
+                drv = &qcrypto_nettle_des_driver_cbc;
                 break;
             case QCRYPTO_CIPHER_MODE_CTR:
-                drv = &qcrypto_nettle_des_rfb_driver_ctr;
+                drv = &qcrypto_nettle_des_driver_ctr;
                 break;
             default:
                 goto bad_cipher_mode;
             }
 
-            ctx = g_new0(QCryptoNettleDESRFB, 1);
+            ctx = g_new0(QCryptoNettleDES, 1);
             ctx->base.driver = drv;
-
-            rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey);
-            des_set_key(&ctx->key, rfbkey);
-            g_free(rfbkey);
+            des_set_key(&ctx->key, key);
 
             return &ctx->base;
         }
index 068b2fb867c626e446d1016c136226764044bd62..74b09a5b261bae00c01afa788dbca99017f4110e 100644 (file)
@@ -29,7 +29,7 @@ static const size_t alg_key_len[QCRYPTO_CIPHER_ALG__MAX] = {
     [QCRYPTO_CIPHER_ALG_AES_128] = 16,
     [QCRYPTO_CIPHER_ALG_AES_192] = 24,
     [QCRYPTO_CIPHER_ALG_AES_256] = 32,
-    [QCRYPTO_CIPHER_ALG_DES_RFB] = 8,
+    [QCRYPTO_CIPHER_ALG_DES] = 8,
     [QCRYPTO_CIPHER_ALG_3DES] = 24,
     [QCRYPTO_CIPHER_ALG_CAST5_128] = 16,
     [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16,
@@ -44,7 +44,7 @@ static const size_t alg_block_len[QCRYPTO_CIPHER_ALG__MAX] = {
     [QCRYPTO_CIPHER_ALG_AES_128] = 16,
     [QCRYPTO_CIPHER_ALG_AES_192] = 16,
     [QCRYPTO_CIPHER_ALG_AES_256] = 16,
-    [QCRYPTO_CIPHER_ALG_DES_RFB] = 8,
+    [QCRYPTO_CIPHER_ALG_DES] = 8,
     [QCRYPTO_CIPHER_ALG_3DES] = 8,
     [QCRYPTO_CIPHER_ALG_CAST5_128] = 8,
     [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16,
@@ -107,9 +107,9 @@ qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg,
     }
 
     if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-        if (alg == QCRYPTO_CIPHER_ALG_DES_RFB
-                || alg == QCRYPTO_CIPHER_ALG_3DES) {
-            error_setg(errp, "XTS mode not compatible with DES-RFB/3DES");
+        if (alg == QCRYPTO_CIPHER_ALG_DES ||
+            alg == QCRYPTO_CIPHER_ALG_3DES) {
+            error_setg(errp, "XTS mode not compatible with DES/3DES");
             return false;
         }
         if (nkey % 2) {
@@ -132,28 +132,12 @@ qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg,
     return true;
 }
 
-#if defined(CONFIG_GCRYPT) || defined(CONFIG_NETTLE)
-static uint8_t *
-qcrypto_cipher_munge_des_rfb_key(const uint8_t *key,
-                                 size_t nkey)
-{
-    uint8_t *ret = g_new0(uint8_t, nkey);
-    size_t i;
-    for (i = 0; i < nkey; i++) {
-        uint8_t r = key[i];
-        r = (r & 0xf0) >> 4 | (r & 0x0f) << 4;
-        r = (r & 0xcc) >> 2 | (r & 0x33) << 2;
-        r = (r & 0xaa) >> 1 | (r & 0x55) << 1;
-        ret[i] = r;
-    }
-    return ret;
-}
-#endif /* CONFIG_GCRYPT || CONFIG_NETTLE */
-
 #ifdef CONFIG_GCRYPT
 #include "cipher-gcrypt.c.inc"
 #elif defined CONFIG_NETTLE
 #include "cipher-nettle.c.inc"
+#elif defined CONFIG_GNUTLS_CRYPTO
+#include "cipher-gnutls.c.inc"
 #else
 #include "cipher-builtin.c.inc"
 #endif
diff --git a/crypto/clmul.c b/crypto/clmul.c
new file mode 100644 (file)
index 0000000..9e3e61a
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Carry-less multiply operations.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#include "qemu/osdep.h"
+#include "crypto/clmul.h"
+
+uint64_t clmul_8x8_low(uint64_t n, uint64_t m)
+{
+    uint64_t r = 0;
+
+    for (int i = 0; i < 8; ++i) {
+        uint64_t mask = (n & 0x0101010101010101ull) * 0xff;
+        r ^= m & mask;
+        m = (m << 1) & 0xfefefefefefefefeull;
+        n >>= 1;
+    }
+    return r;
+}
+
+static uint64_t clmul_8x4_even_int(uint64_t n, uint64_t m)
+{
+    uint64_t r = 0;
+
+    for (int i = 0; i < 8; ++i) {
+        uint64_t mask = (n & 0x0001000100010001ull) * 0xffff;
+        r ^= m & mask;
+        n >>= 1;
+        m <<= 1;
+    }
+    return r;
+}
+
+uint64_t clmul_8x4_even(uint64_t n, uint64_t m)
+{
+    n &= 0x00ff00ff00ff00ffull;
+    m &= 0x00ff00ff00ff00ffull;
+    return clmul_8x4_even_int(n, m);
+}
+
+uint64_t clmul_8x4_odd(uint64_t n, uint64_t m)
+{
+    return clmul_8x4_even(n >> 8, m >> 8);
+}
+
+static uint64_t unpack_8_to_16(uint64_t x)
+{
+    return  (x & 0x000000ff)
+         | ((x & 0x0000ff00) << 8)
+         | ((x & 0x00ff0000) << 16)
+         | ((x & 0xff000000) << 24);
+}
+
+uint64_t clmul_8x4_packed(uint32_t n, uint32_t m)
+{
+    return clmul_8x4_even_int(unpack_8_to_16(n), unpack_8_to_16(m));
+}
+
+uint64_t clmul_16x2_even(uint64_t n, uint64_t m)
+{
+    uint64_t r = 0;
+
+    n &= 0x0000ffff0000ffffull;
+    m &= 0x0000ffff0000ffffull;
+
+    for (int i = 0; i < 16; ++i) {
+        uint64_t mask = (n & 0x0000000100000001ull) * 0xffffffffull;
+        r ^= m & mask;
+        n >>= 1;
+        m <<= 1;
+    }
+    return r;
+}
+
+uint64_t clmul_16x2_odd(uint64_t n, uint64_t m)
+{
+    return clmul_16x2_even(n >> 16, m >> 16);
+}
+
+uint64_t clmul_32(uint32_t n, uint32_t m32)
+{
+    uint64_t r = 0;
+    uint64_t m = m32;
+
+    for (int i = 0; i < 32; ++i) {
+        r ^= n & 1 ? m : 0;
+        n >>= 1;
+        m <<= 1;
+    }
+    return r;
+}
+
+Int128 clmul_64_gen(uint64_t n, uint64_t m)
+{
+    uint64_t rl = 0, rh = 0;
+
+    /* Bit 0 can only influence the low 64-bit result.  */
+    if (n & 1) {
+        rl = m;
+    }
+
+    for (int i = 1; i < 64; ++i) {
+        uint64_t mask = -((n >> i) & 1);
+        rl ^= (m << i) & mask;
+        rh ^= (m >> (64 - i)) & mask;
+    }
+    return int128_make128(rl, rh);
+}
diff --git a/crypto/der.c b/crypto/der.c
new file mode 100644 (file)
index 0000000..ebbecfc
--- /dev/null
@@ -0,0 +1,452 @@
+/*
+ * QEMU Crypto ASN.1 DER decoder
+ *
+ * Copyright (c) 2022 Bytedance
+ * Author: lei he <helei.sig11@bytedance.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "crypto/der.h"
+
+typedef struct QCryptoDerEncodeNode {
+    uint8_t tag;
+    struct QCryptoDerEncodeNode *parent;
+    struct QCryptoDerEncodeNode *next;
+    /* for constructed type, data is null */
+    const uint8_t *data;
+    size_t dlen;
+} QCryptoDerEncodeNode;
+
+typedef struct QCryptoEncodeContext {
+    QCryptoDerEncodeNode root;
+    QCryptoDerEncodeNode *current_parent;
+    QCryptoDerEncodeNode *tail;
+} QCryptoEncodeContext;
+
+enum QCryptoDERTypeTag {
+    QCRYPTO_DER_TYPE_TAG_BOOL = 0x1,
+    QCRYPTO_DER_TYPE_TAG_INT = 0x2,
+    QCRYPTO_DER_TYPE_TAG_BIT_STR = 0x3,
+    QCRYPTO_DER_TYPE_TAG_OCT_STR = 0x4,
+    QCRYPTO_DER_TYPE_TAG_NULL = 0x5,
+    QCRYPTO_DER_TYPE_TAG_OID = 0x6,
+    QCRYPTO_DER_TYPE_TAG_SEQ = 0x10,
+    QCRYPTO_DER_TYPE_TAG_SET = 0x11,
+};
+
+enum QCryptoDERTagClass {
+    QCRYPTO_DER_TAG_CLASS_UNIV = 0x0,
+    QCRYPTO_DER_TAG_CLASS_APPL = 0x1,
+    QCRYPTO_DER_TAG_CLASS_CONT = 0x2,
+    QCRYPTO_DER_TAG_CLASS_PRIV = 0x3,
+};
+
+enum QCryptoDERTagEnc {
+    QCRYPTO_DER_TAG_ENC_PRIM = 0x0,
+    QCRYPTO_DER_TAG_ENC_CONS = 0x1,
+};
+
+#define QCRYPTO_DER_TAG_ENC_MASK 0x20
+#define QCRYPTO_DER_TAG_ENC_SHIFT 5
+
+#define QCRYPTO_DER_TAG_CLASS_MASK 0xc0
+#define QCRYPTO_DER_TAG_CLASS_SHIFT 6
+
+#define QCRYPTO_DER_TAG_VAL_MASK 0x1f
+#define QCRYPTO_DER_SHORT_LEN_MASK 0x80
+
+#define QCRYPTO_DER_TAG(class, enc, val)           \
+    (((class) << QCRYPTO_DER_TAG_CLASS_SHIFT) |    \
+     ((enc) << QCRYPTO_DER_TAG_ENC_SHIFT) | (val))
+
+/**
+ * qcrypto_der_encode_length:
+ * @src_len: the length of source data
+ * @dst: destination to save the encoded 'length', if dst is NULL, only compute
+ * the expected buffer size in bytes.
+ * @dst_len: output parameter, indicates how many bytes wrote.
+ *
+ * Encode the 'length' part of TLV tuple.
+ */
+static void qcrypto_der_encode_length(size_t src_len,
+                                      uint8_t *dst, size_t *dst_len)
+{
+    size_t max_length = 0xFF;
+    uint8_t length_bytes = 0, header_byte;
+
+    if (src_len < QCRYPTO_DER_SHORT_LEN_MASK) {
+        header_byte = src_len;
+        *dst_len = 1;
+    } else {
+        for (length_bytes = 1; max_length < src_len; length_bytes++) {
+            max_length = (max_length << 8) + max_length;
+        }
+        header_byte = length_bytes;
+        header_byte |= QCRYPTO_DER_SHORT_LEN_MASK;
+        *dst_len = length_bytes + 1;
+    }
+    if (!dst) {
+        return;
+    }
+    *dst++ = header_byte;
+    /* Bigendian length bytes */
+    for (; length_bytes > 0; length_bytes--) {
+        *dst++ = ((src_len >> (length_bytes - 1) * 8) & 0xFF);
+    }
+}
+
+static uint8_t qcrypto_der_peek_byte(const uint8_t **data, size_t *dlen)
+{
+    return **data;
+}
+
+static void qcrypto_der_cut_nbytes(const uint8_t **data,
+                                   size_t *dlen,
+                                   size_t nbytes)
+{
+    *data += nbytes;
+    *dlen -= nbytes;
+}
+
+static uint8_t qcrypto_der_cut_byte(const uint8_t **data, size_t *dlen)
+{
+    uint8_t val = qcrypto_der_peek_byte(data, dlen);
+
+    qcrypto_der_cut_nbytes(data, dlen, 1);
+
+    return val;
+}
+
+static int qcrypto_der_invoke_callback(QCryptoDERDecodeCb cb, void *ctx,
+                                       const uint8_t *value, size_t vlen,
+                                       Error **errp)
+{
+    if (!cb) {
+        return 0;
+    }
+
+    return cb(ctx, value, vlen, errp);
+}
+
+static int qcrypto_der_extract_definite_data(const uint8_t **data, size_t *dlen,
+                                             QCryptoDERDecodeCb cb, void *ctx,
+                                             Error **errp)
+{
+    const uint8_t *value;
+    size_t vlen = 0;
+    uint8_t byte_count = qcrypto_der_cut_byte(data, dlen);
+
+    /* short format of definite-length */
+    if (!(byte_count & QCRYPTO_DER_SHORT_LEN_MASK)) {
+        if (byte_count > *dlen) {
+            error_setg(errp, "Invalid content length: %u", byte_count);
+            return -1;
+        }
+
+        value = *data;
+        vlen = byte_count;
+        qcrypto_der_cut_nbytes(data, dlen, vlen);
+
+        if (qcrypto_der_invoke_callback(cb, ctx, value, vlen, errp) != 0) {
+            return -1;
+        }
+        return vlen;
+    }
+
+    /* Ignore highest bit */
+    byte_count &= ~QCRYPTO_DER_SHORT_LEN_MASK;
+
+    /*
+     * size_t is enough to store the value of length, although the DER
+     * encoding standard supports larger length.
+     */
+    if (byte_count > sizeof(size_t)) {
+        error_setg(errp, "Invalid byte count of content length: %u",
+                   byte_count);
+        return -1;
+    }
+
+    if (byte_count > *dlen) {
+        error_setg(errp, "Invalid content length: %u", byte_count);
+        return -1;
+    }
+    while (byte_count--) {
+        vlen <<= 8;
+        vlen += qcrypto_der_cut_byte(data, dlen);
+    }
+
+    if (vlen > *dlen) {
+        error_setg(errp, "Invalid content length: %zu", vlen);
+        return -1;
+    }
+
+    value = *data;
+    qcrypto_der_cut_nbytes(data, dlen, vlen);
+
+    if (qcrypto_der_invoke_callback(cb, ctx, value, vlen, errp) != 0) {
+        return -1;
+    }
+    return vlen;
+}
+
+static int qcrypto_der_extract_data(const uint8_t **data, size_t *dlen,
+                                    QCryptoDERDecodeCb cb, void *ctx,
+                                    Error **errp)
+{
+    uint8_t val;
+    if (*dlen < 1) {
+        error_setg(errp, "Need more data");
+        return -1;
+    }
+    val = qcrypto_der_peek_byte(data, dlen);
+
+    /* must use definite length format */
+    if (val == QCRYPTO_DER_SHORT_LEN_MASK) {
+        error_setg(errp, "Only definite length format is allowed");
+        return -1;
+    }
+
+    return qcrypto_der_extract_definite_data(data, dlen, cb, ctx, errp);
+}
+
+static int qcrypto_der_decode_tlv(const uint8_t expected_tag,
+                                  const uint8_t **data, size_t *dlen,
+                                  QCryptoDERDecodeCb cb,
+                                  void *ctx, Error **errp)
+{
+    const uint8_t *saved_data = *data;
+    size_t saved_dlen = *dlen;
+    uint8_t tag;
+    int data_length;
+
+    if (*dlen < 1) {
+        error_setg(errp, "Need more data");
+        return -1;
+    }
+    tag = qcrypto_der_cut_byte(data, dlen);
+    if (tag != expected_tag) {
+        error_setg(errp, "Unexpected tag: expected: %u, actual: %u",
+                   expected_tag, tag);
+        goto error;
+    }
+
+    data_length = qcrypto_der_extract_data(data, dlen, cb, ctx, errp);
+    if (data_length < 0) {
+        goto error;
+    }
+    return data_length;
+
+error:
+    *data = saved_data;
+    *dlen = saved_dlen;
+    return -1;
+}
+
+int qcrypto_der_decode_int(const uint8_t **data, size_t *dlen,
+                           QCryptoDERDecodeCb cb, void *ctx, Error **errp)
+{
+    const uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_UNIV,
+                                        QCRYPTO_DER_TAG_ENC_PRIM,
+                                        QCRYPTO_DER_TYPE_TAG_INT);
+    return qcrypto_der_decode_tlv(tag, data, dlen, cb, ctx, errp);
+}
+
+int qcrypto_der_decode_seq(const uint8_t **data, size_t *dlen,
+                           QCryptoDERDecodeCb cb, void *ctx, Error **errp)
+{
+    uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_UNIV,
+                                  QCRYPTO_DER_TAG_ENC_CONS,
+                                  QCRYPTO_DER_TYPE_TAG_SEQ);
+    return qcrypto_der_decode_tlv(tag, data, dlen, cb, ctx, errp);
+}
+
+int qcrypto_der_decode_octet_str(const uint8_t **data, size_t *dlen,
+                                 QCryptoDERDecodeCb cb, void *ctx, Error **errp)
+{
+    uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_UNIV,
+                                  QCRYPTO_DER_TAG_ENC_PRIM,
+                                  QCRYPTO_DER_TYPE_TAG_OCT_STR);
+    return qcrypto_der_decode_tlv(tag, data, dlen, cb, ctx, errp);
+}
+
+int qcrypto_der_decode_bit_str(const uint8_t **data, size_t *dlen,
+                               QCryptoDERDecodeCb cb, void *ctx, Error **errp)
+{
+    uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_UNIV,
+                                  QCRYPTO_DER_TAG_ENC_PRIM,
+                                  QCRYPTO_DER_TYPE_TAG_BIT_STR);
+    return qcrypto_der_decode_tlv(tag, data, dlen, cb, ctx, errp);
+}
+
+int qcrypto_der_decode_oid(const uint8_t **data, size_t *dlen,
+                           QCryptoDERDecodeCb cb, void *ctx, Error **errp)
+{
+    uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_UNIV,
+                                  QCRYPTO_DER_TAG_ENC_PRIM,
+                                  QCRYPTO_DER_TYPE_TAG_OID);
+    return qcrypto_der_decode_tlv(tag, data, dlen, cb, ctx, errp);
+}
+
+int qcrypto_der_decode_ctx_tag(const uint8_t **data, size_t *dlen, int tag_id,
+                               QCryptoDERDecodeCb cb, void *ctx, Error **errp)
+{
+    uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_CONT,
+                                  QCRYPTO_DER_TAG_ENC_CONS,
+                                  tag_id);
+    return qcrypto_der_decode_tlv(tag, data, dlen, cb, ctx, errp);
+}
+
+static void qcrypto_der_encode_prim(QCryptoEncodeContext *ctx, uint8_t tag,
+                                    const uint8_t *data, size_t dlen)
+{
+    QCryptoDerEncodeNode *node = g_new0(QCryptoDerEncodeNode, 1);
+    size_t nbytes_len;
+
+    node->tag = tag;
+    node->data = data;
+    node->dlen = dlen;
+    node->parent = ctx->current_parent;
+
+    qcrypto_der_encode_length(dlen, NULL, &nbytes_len);
+    /* 1 byte for Tag, nbyte_len for Length, and dlen for Value */
+    node->parent->dlen += 1 + nbytes_len + dlen;
+
+    ctx->tail->next = node;
+    ctx->tail = node;
+}
+
+QCryptoEncodeContext *qcrypto_der_encode_ctx_new(void)
+{
+    QCryptoEncodeContext *ctx = g_new0(QCryptoEncodeContext, 1);
+    ctx->current_parent = &ctx->root;
+    ctx->tail = &ctx->root;
+    return ctx;
+}
+
+static void qcrypto_der_encode_cons_begin(QCryptoEncodeContext *ctx,
+                                          uint8_t tag)
+{
+    QCryptoDerEncodeNode *node = g_new0(QCryptoDerEncodeNode, 1);
+
+    node->tag = tag;
+    node->parent = ctx->current_parent;
+    ctx->current_parent = node;
+    ctx->tail->next = node;
+    ctx->tail = node;
+}
+
+static void qcrypto_der_encode_cons_end(QCryptoEncodeContext *ctx)
+{
+    QCryptoDerEncodeNode *cons_node = ctx->current_parent;
+    size_t nbytes_len;
+
+    qcrypto_der_encode_length(cons_node->dlen, NULL, &nbytes_len);
+    /* 1 byte for Tag, nbyte_len for Length, and dlen for Value */
+    cons_node->parent->dlen += 1 + nbytes_len + cons_node->dlen;
+    ctx->current_parent = cons_node->parent;
+}
+
+void qcrypto_der_encode_seq_begin(QCryptoEncodeContext *ctx)
+{
+    uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_UNIV,
+                                  QCRYPTO_DER_TAG_ENC_CONS,
+                                  QCRYPTO_DER_TYPE_TAG_SEQ);
+    qcrypto_der_encode_cons_begin(ctx, tag);
+}
+
+void qcrypto_der_encode_seq_end(QCryptoEncodeContext *ctx)
+{
+    qcrypto_der_encode_cons_end(ctx);
+}
+
+void qcrypto_der_encode_oid(QCryptoEncodeContext *ctx,
+                            const uint8_t *src, size_t src_len)
+{
+    uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_UNIV,
+                                  QCRYPTO_DER_TAG_ENC_PRIM,
+                                  QCRYPTO_DER_TYPE_TAG_OID);
+    qcrypto_der_encode_prim(ctx, tag, src, src_len);
+}
+
+void qcrypto_der_encode_int(QCryptoEncodeContext *ctx,
+                            const uint8_t *src, size_t src_len)
+{
+    uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_UNIV,
+                                  QCRYPTO_DER_TAG_ENC_PRIM,
+                                  QCRYPTO_DER_TYPE_TAG_INT);
+    qcrypto_der_encode_prim(ctx, tag, src, src_len);
+}
+
+void qcrypto_der_encode_null(QCryptoEncodeContext *ctx)
+{
+    uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_UNIV,
+                                  QCRYPTO_DER_TAG_ENC_PRIM,
+                                  QCRYPTO_DER_TYPE_TAG_NULL);
+    qcrypto_der_encode_prim(ctx, tag, NULL, 0);
+}
+
+void qcrypto_der_encode_octet_str(QCryptoEncodeContext *ctx,
+                                  const uint8_t *src, size_t src_len)
+{
+    uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_UNIV,
+                                  QCRYPTO_DER_TAG_ENC_PRIM,
+                                  QCRYPTO_DER_TYPE_TAG_OCT_STR);
+    qcrypto_der_encode_prim(ctx, tag, src, src_len);
+}
+
+void qcrypto_der_encode_octet_str_begin(QCryptoEncodeContext *ctx)
+{
+    uint8_t tag = QCRYPTO_DER_TAG(QCRYPTO_DER_TAG_CLASS_UNIV,
+                                  QCRYPTO_DER_TAG_ENC_PRIM,
+                                  QCRYPTO_DER_TYPE_TAG_OCT_STR);
+    qcrypto_der_encode_cons_begin(ctx, tag);
+}
+
+void qcrypto_der_encode_octet_str_end(QCryptoEncodeContext *ctx)
+{
+    qcrypto_der_encode_cons_end(ctx);
+}
+
+size_t qcrypto_der_encode_ctx_buffer_len(QCryptoEncodeContext *ctx)
+{
+    return ctx->root.dlen;
+}
+
+void qcrypto_der_encode_ctx_flush_and_free(QCryptoEncodeContext *ctx,
+                                           uint8_t *dst)
+{
+    QCryptoDerEncodeNode *node, *prev;
+    size_t len;
+
+    for (prev = &ctx->root;
+         (node = prev->next) && (prev->next = node->next, 1);) {
+        /* Tag */
+        *dst++ = node->tag;
+
+        /* Length */
+        qcrypto_der_encode_length(node->dlen, dst, &len);
+        dst += len;
+
+        /* Value */
+        if (node->data) {
+            memcpy(dst, node->data, node->dlen);
+            dst += node->dlen;
+        }
+        g_free(node);
+    }
+    g_free(ctx);
+}
diff --git a/crypto/der.h b/crypto/der.h
new file mode 100644 (file)
index 0000000..f4ba6da
--- /dev/null
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2022 Bytedance
+ * Author: lei he <helei.sig11@bytedance.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QCRYPTO_ASN1_DECODER_H
+#define QCRYPTO_ASN1_DECODER_H
+
+#include "qapi/error.h"
+
+typedef struct QCryptoEncodeContext QCryptoEncodeContext;
+
+/* rsaEncryption: 1.2.840.113549.1.1.1 */
+#define QCRYPTO_OID_rsaEncryption "\x2A\x86\x48\x86\xF7\x0D\x01\x01\x01"
+
+/* Simple decoder used to parse DER encoded rsa keys. */
+
+/**
+ *  @opaque: user context.
+ *  @value: the starting address of |value| part of 'Tag-Length-Value' pattern.
+ *  @vlen: length of the |value|.
+ *  Returns: 0 for success, any other value is considered an error.
+ */
+typedef int (*QCryptoDERDecodeCb) (void *opaque, const uint8_t *value,
+                                   size_t vlen, Error **errp);
+
+/**
+ * qcrypto_der_decode_int:
+ * @data: pointer to address of input data
+ * @dlen: pointer to length of input data
+ * @cb: callback invoked when decode succeed, if cb equals NULL, no
+ * callback will be invoked
+ * @opaque: parameter passed to cb
+ *
+ * Decode integer from DER-encoded data.
+ *
+ * Returns: On success, *data points to rest data, and *dlen
+ * will be set to the rest length of data, if cb is not NULL, must
+ * return 0 to make decode success, at last, the length of the data
+ * part of the decoded INTEGER will be returned. Otherwise, -1 is
+ * returned and the valued of *data and *dlen keep unchanged.
+ */
+int qcrypto_der_decode_int(const uint8_t **data,
+                           size_t *dlen,
+                           QCryptoDERDecodeCb cb,
+                           void *opaque,
+                           Error **errp);
+/**
+ * qcrypto_der_decode_seq:
+ *
+ * Decode sequence from DER-encoded data, similar with der_decode_int.
+ *
+ * @data: pointer to address of input data
+ * @dlen: pointer to length of input data
+ * @cb: callback invoked when decode succeed, if cb equals NULL, no
+ * callback will be invoked
+ * @opaque: parameter passed to cb
+ *
+ * Returns: On success, *data points to rest data, and *dlen
+ * will be set to the rest length of data, if cb is not NULL, must
+ * return 0 to make decode success, at last, the length of the data
+ * part of the decoded SEQUENCE will be returned. Otherwise, -1 is
+ * returned and the valued of *data and *dlen keep unchanged.
+ */
+int qcrypto_der_decode_seq(const uint8_t **data,
+                           size_t *dlen,
+                           QCryptoDERDecodeCb cb,
+                           void *opaque,
+                           Error **errp);
+
+/**
+ * qcrypto_der_decode_oid:
+ *
+ * Decode OID from DER-encoded data, similar with der_decode_int.
+ *
+ * @data: pointer to address of input data
+ * @dlen: pointer to length of input data
+ * @cb: callback invoked when decode succeed, if cb equals NULL, no
+ * callback will be invoked
+ * @opaque: parameter passed to cb
+ *
+ * Returns: On success, *data points to rest data, and *dlen
+ * will be set to the rest length of data, if cb is not NULL, must
+ * return 0 to make decode success, at last, the length of the data
+ * part of the decoded OID will be returned. Otherwise, -1 is
+ * returned and the valued of *data and *dlen keep unchanged.
+ */
+int qcrypto_der_decode_oid(const uint8_t **data,
+                           size_t *dlen,
+                           QCryptoDERDecodeCb cb,
+                           void *opaque,
+                           Error **errp);
+
+/**
+ * qcrypto_der_decode_octet_str:
+ *
+ * Decode OCTET STRING from DER-encoded data, similar with der_decode_int.
+ *
+ * @data: pointer to address of input data
+ * @dlen: pointer to length of input data
+ * @cb: callback invoked when decode succeed, if cb equals NULL, no
+ * callback will be invoked
+ * @opaque: parameter passed to cb
+ *
+ * Returns: On success, *data points to rest data, and *dlen
+ * will be set to the rest length of data, if cb is not NULL, must
+ * return 0 to make decode success, at last, the length of the data
+ * part of the decoded OCTET STRING will be returned. Otherwise, -1 is
+ * returned and the valued of *data and *dlen keep unchanged.
+ */
+int qcrypto_der_decode_octet_str(const uint8_t **data,
+                                 size_t *dlen,
+                                 QCryptoDERDecodeCb cb,
+                                 void *opaque,
+                                 Error **errp);
+
+/**
+ * qcrypto_der_decode_bit_str:
+ *
+ * Decode BIT STRING from DER-encoded data, similar with der_decode_int.
+ *
+ * @data: pointer to address of input data
+ * @dlen: pointer to length of input data
+ * @cb: callback invoked when decode succeed, if cb equals NULL, no
+ * callback will be invoked
+ * @opaque: parameter passed to cb
+ *
+ * Returns: On success, *data points to rest data, and *dlen
+ * will be set to the rest length of data, if cb is not NULL, must
+ * return 0 to make decode success, at last, the length of the data
+ * part of the decoded BIT STRING will be returned. Otherwise, -1 is
+ * returned and the valued of *data and *dlen keep unchanged.
+ */
+int qcrypto_der_decode_bit_str(const uint8_t **data,
+                               size_t *dlen,
+                               QCryptoDERDecodeCb cb,
+                               void *opaque,
+                               Error **errp);
+
+
+/**
+ * qcrypto_der_decode_ctx_tag:
+ *
+ * Decode context specific tag
+ *
+ * @data: pointer to address of input data
+ * @dlen: pointer to length of input data
+ * @tag: expected value of context specific tag
+ * @cb: callback invoked when decode succeed, if cb equals NULL, no
+ * callback will be invoked
+ * @opaque: parameter passed to cb
+ *
+ * Returns: On success, *data points to rest data, and *dlen
+ * will be set to the rest length of data, if cb is not NULL, must
+ * return 0 to make decode success, at last, the length of the data
+ * part of the decoded BIT STRING will be returned. Otherwise, -1 is
+ * returned and the valued of *data and *dlen keep unchanged.
+ */
+int qcrypto_der_decode_ctx_tag(const uint8_t **data,
+                               size_t *dlen, int tag_id,
+                               QCryptoDERDecodeCb cb,
+                               void *opaque,
+                               Error **errp);
+
+/**
+ * qcrypto_der_encode_ctx_new:
+ *
+ * Allocate a context used for der encoding.
+ */
+QCryptoEncodeContext *qcrypto_der_encode_ctx_new(void);
+
+/**
+ * qcrypto_der_encode_seq_begin:
+ * @ctx: the encode context.
+ *
+ * Start encoding a SEQUENCE for ctx.
+ *
+ */
+void qcrypto_der_encode_seq_begin(QCryptoEncodeContext *ctx);
+
+/**
+ * qcrypto_der_encode_seq_begin:
+ * @ctx: the encode context.
+ *
+ * Finish uencoding a SEQUENCE for ctx.
+ *
+ */
+void qcrypto_der_encode_seq_end(QCryptoEncodeContext *ctx);
+
+
+/**
+ * qcrypto_der_encode_oid:
+ * @ctx: the encode context.
+ * @src: the source data of oid, note it should be already encoded, this
+ * function only add tag and length part for it.
+ *
+ * Encode an oid into ctx.
+ */
+void qcrypto_der_encode_oid(QCryptoEncodeContext *ctx,
+                            const uint8_t *src, size_t src_len);
+
+/**
+ * qcrypto_der_encode_int:
+ * @ctx: the encode context.
+ * @src: the source data of integer, note it should be already encoded, this
+ * function only add tag and length part for it.
+ *
+ * Encode an integer into ctx.
+ */
+void qcrypto_der_encode_int(QCryptoEncodeContext *ctx,
+                            const uint8_t *src, size_t src_len);
+
+/**
+ * qcrypto_der_encode_null:
+ * @ctx: the encode context.
+ *
+ * Encode a null into ctx.
+ */
+void qcrypto_der_encode_null(QCryptoEncodeContext *ctx);
+
+/**
+ * qcrypto_der_encode_octet_str:
+ * @ctx: the encode context.
+ * @src: the source data of the octet string.
+ *
+ * Encode a octet string into ctx.
+ */
+void qcrypto_der_encode_octet_str(QCryptoEncodeContext *ctx,
+                                  const uint8_t *src, size_t src_len);
+
+/**
+ * qcrypto_der_encode_octet_str_begin:
+ * @ctx: the encode context.
+ *
+ * Start encoding a octet string, All fields between
+ * qcrypto_der_encode_octet_str_begin and qcrypto_der_encode_octet_str_end
+ * are encoded as an octet string. This is useful when we need to encode a
+ * encoded SEQUENCE as OCTET STRING.
+ */
+void qcrypto_der_encode_octet_str_begin(QCryptoEncodeContext *ctx);
+
+/**
+ * qcrypto_der_encode_octet_str_end:
+ * @ctx: the encode context.
+ *
+ * Finish encoding a octet string, All fields between
+ * qcrypto_der_encode_octet_str_begin and qcrypto_der_encode_octet_str_end
+ * are encoded as an octet string. This is useful when we need to encode a
+ * encoded SEQUENCE as OCTET STRING.
+ */
+void qcrypto_der_encode_octet_str_end(QCryptoEncodeContext *ctx);
+
+/**
+ * qcrypto_der_encode_ctx_buffer_len:
+ * @ctx: the encode context.
+ *
+ * Compute the expected buffer size to save all encoded things.
+ */
+size_t qcrypto_der_encode_ctx_buffer_len(QCryptoEncodeContext *ctx);
+
+/**
+ * qcrypto_der_encode_ctx_flush_and_free:
+ * @ctx: the encode context.
+ * @dst: the destination to save the encoded data, the length of dst should
+ * not less than qcrypto_der_encode_cxt_buffer_len
+ *
+ * Flush all encoded data into dst, then free ctx.
+ */
+void qcrypto_der_encode_ctx_flush_and_free(QCryptoEncodeContext *ctx,
+                                           uint8_t *dst);
+
+#endif  /* QCRYPTO_ASN1_DECODER_H */
diff --git a/crypto/desrfb.c b/crypto/desrfb.c
deleted file mode 100644 (file)
index 3274c36..0000000
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * This is D3DES (V5.09) by Richard Outerbridge with the double and
- * triple-length support removed for use in VNC.  Also the bytebit[] array
- * has been reversed so that the most significant bit in each byte of the
- * key is ignored, not the least significant.
- *
- * These changes are:
- *  Copyright (C) 1999 AT&T Laboratories Cambridge.  All Rights Reserved.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-
-/* D3DES (V5.09) -
- *
- * A portable, public domain, version of the Data Encryption Standard.
- *
- * Written with Symantec's THINK (Lightspeed) C by Richard Outerbridge.
- * Thanks to: Dan Hoey for his excellent Initial and Inverse permutation
- * code;  Jim Gillogly & Phil Karn for the DES key schedule code; Dennis
- * Ferguson, Eric Young and Dana How for comparing notes; and Ray Lau,
- * for humouring me on.
- *
- * Copyright (c) 1988,1989,1990,1991,1992 by Richard Outerbridge.
- * (GEnie : OUTER; CIS : [71755,204]) Graven Imagery, 1992.
- */
-
-#include "qemu/osdep.h"
-#include "crypto/desrfb.h"
-
-static void scrunch(unsigned char *, unsigned long *);
-static void unscrun(unsigned long *, unsigned char *);
-static void desfunc(unsigned long *, unsigned long *);
-static void cookey(unsigned long *);
-
-static unsigned long KnL[32] = { 0L };
-
-static const unsigned short bytebit[8] = {
-        01, 02, 04, 010, 020, 040, 0100, 0200 };
-
-static const unsigned long bigbyte[24] = {
-        0x800000L,     0x400000L,      0x200000L,      0x100000L,
-        0x80000L,      0x40000L,       0x20000L,       0x10000L,
-        0x8000L,       0x4000L,        0x2000L,        0x1000L,
-        0x800L,        0x400L,         0x200L,         0x100L,
-        0x80L,         0x40L,          0x20L,          0x10L,
-        0x8L,          0x4L,           0x2L,           0x1L    };
-
-/* Use the key schedule specified in the Standard (ANSI X3.92-1981). */
-
-static const unsigned char pc1[56] = {
-        56, 48, 40, 32, 24, 16,  8,     0, 57, 49, 41, 33, 25, 17,
-         9,  1, 58, 50, 42, 34, 26,    18, 10,  2, 59, 51, 43, 35,
-        62, 54, 46, 38, 30, 22, 14,     6, 61, 53, 45, 37, 29, 21,
-        13,  5, 60, 52, 44, 36, 28,    20, 12,  4, 27, 19, 11,  3 };
-
-static const unsigned char totrot[16] = {
-        1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28 };
-
-static const unsigned char pc2[48] = {
-        13, 16, 10, 23,  0,  4,  2, 27, 14,  5, 20,  9,
-        22, 18, 11,  3, 25,  7, 15,  6, 26, 19, 12,  1,
-        40, 51, 30, 36, 46, 54, 29, 39, 50, 44, 32, 47,
-        43, 48, 38, 55, 33, 52, 45, 41, 49, 35, 28, 31 };
-
-/* Thanks to James Gillogly & Phil Karn! */
-void deskey(unsigned char *key, int edf)
-{
-        register int i, j, l, m, n;
-        unsigned char pc1m[56], pcr[56];
-        unsigned long kn[32];
-
-        for ( j = 0; j < 56; j++ ) {
-                l = pc1[j];
-                m = l & 07;
-                pc1m[j] = (key[l >> 3] & bytebit[m]) ? 1 : 0;
-                }
-        for( i = 0; i < 16; i++ ) {
-                if( edf == DE1 ) m = (15 - i) << 1;
-                else m = i << 1;
-                n = m + 1;
-                kn[m] = kn[n] = 0L;
-                for( j = 0; j < 28; j++ ) {
-                        l = j + totrot[i];
-                        if( l < 28 ) pcr[j] = pc1m[l];
-                        else pcr[j] = pc1m[l - 28];
-                        }
-                for( j = 28; j < 56; j++ ) {
-                    l = j + totrot[i];
-                    if( l < 56 ) pcr[j] = pc1m[l];
-                    else pcr[j] = pc1m[l - 28];
-                    }
-                for( j = 0; j < 24; j++ ) {
-                        if( pcr[pc2[j]] ) kn[m] |= bigbyte[j];
-                        if( pcr[pc2[j+24]] ) kn[n] |= bigbyte[j];
-                        }
-                }
-        cookey(kn);
-        return;
-        }
-
-static void cookey(register unsigned long *raw1)
-{
-        register unsigned long *cook, *raw0;
-        unsigned long dough[32];
-        register int i;
-
-        cook = dough;
-        for( i = 0; i < 16; i++, raw1++ ) {
-                raw0 = raw1++;
-                *cook   = (*raw0 & 0x00fc0000L) << 6;
-                *cook  |= (*raw0 & 0x00000fc0L) << 10;
-                *cook  |= (*raw1 & 0x00fc0000L) >> 10;
-                *cook++ |= (*raw1 & 0x00000fc0L) >> 6;
-                *cook   = (*raw0 & 0x0003f000L) << 12;
-                *cook  |= (*raw0 & 0x0000003fL) << 16;
-                *cook  |= (*raw1 & 0x0003f000L) >> 4;
-                *cook++ |= (*raw1 & 0x0000003fL);
-                }
-        usekey(dough);
-        return;
-        }
-
-void usekey(register unsigned long *from)
-{
-        register unsigned long *to, *endp;
-
-        to = KnL, endp = &KnL[32];
-        while( to < endp ) *to++ = *from++;
-        return;
-        }
-
-void des(unsigned char *inblock, unsigned char *outblock)
-{
-        unsigned long work[2];
-
-        scrunch(inblock, work);
-        desfunc(work, KnL);
-        unscrun(work, outblock);
-        return;
-        }
-
-static void scrunch(register unsigned char *outof, register unsigned long *into)
-{
-        *into   = (*outof++ & 0xffL) << 24;
-        *into  |= (*outof++ & 0xffL) << 16;
-        *into  |= (*outof++ & 0xffL) << 8;
-        *into++ |= (*outof++ & 0xffL);
-        *into   = (*outof++ & 0xffL) << 24;
-        *into  |= (*outof++ & 0xffL) << 16;
-        *into  |= (*outof++ & 0xffL) << 8;
-        *into  |= (*outof   & 0xffL);
-        return;
-        }
-
-static void unscrun(register unsigned long *outof, register unsigned char *into)
-{
-        *into++ = (unsigned char)((*outof >> 24) & 0xffL);
-        *into++ = (unsigned char)((*outof >> 16) & 0xffL);
-        *into++ = (unsigned char)((*outof >>  8) & 0xffL);
-        *into++ = (unsigned char)(*outof++      & 0xffL);
-        *into++ = (unsigned char)((*outof >> 24) & 0xffL);
-        *into++ = (unsigned char)((*outof >> 16) & 0xffL);
-        *into++ = (unsigned char)((*outof >>  8) & 0xffL);
-        *into  =  (unsigned char)(*outof        & 0xffL);
-        return;
-        }
-
-static const unsigned long SP1[64] = {
-        0x01010400L, 0x00000000L, 0x00010000L, 0x01010404L,
-        0x01010004L, 0x00010404L, 0x00000004L, 0x00010000L,
-        0x00000400L, 0x01010400L, 0x01010404L, 0x00000400L,
-        0x01000404L, 0x01010004L, 0x01000000L, 0x00000004L,
-        0x00000404L, 0x01000400L, 0x01000400L, 0x00010400L,
-        0x00010400L, 0x01010000L, 0x01010000L, 0x01000404L,
-        0x00010004L, 0x01000004L, 0x01000004L, 0x00010004L,
-        0x00000000L, 0x00000404L, 0x00010404L, 0x01000000L,
-        0x00010000L, 0x01010404L, 0x00000004L, 0x01010000L,
-        0x01010400L, 0x01000000L, 0x01000000L, 0x00000400L,
-        0x01010004L, 0x00010000L, 0x00010400L, 0x01000004L,
-        0x00000400L, 0x00000004L, 0x01000404L, 0x00010404L,
-        0x01010404L, 0x00010004L, 0x01010000L, 0x01000404L,
-        0x01000004L, 0x00000404L, 0x00010404L, 0x01010400L,
-        0x00000404L, 0x01000400L, 0x01000400L, 0x00000000L,
-        0x00010004L, 0x00010400L, 0x00000000L, 0x01010004L };
-
-static const unsigned long SP2[64] = {
-        0x80108020L, 0x80008000L, 0x00008000L, 0x00108020L,
-        0x00100000L, 0x00000020L, 0x80100020L, 0x80008020L,
-        0x80000020L, 0x80108020L, 0x80108000L, 0x80000000L,
-        0x80008000L, 0x00100000L, 0x00000020L, 0x80100020L,
-        0x00108000L, 0x00100020L, 0x80008020L, 0x00000000L,
-        0x80000000L, 0x00008000L, 0x00108020L, 0x80100000L,
-        0x00100020L, 0x80000020L, 0x00000000L, 0x00108000L,
-        0x00008020L, 0x80108000L, 0x80100000L, 0x00008020L,
-        0x00000000L, 0x00108020L, 0x80100020L, 0x00100000L,
-        0x80008020L, 0x80100000L, 0x80108000L, 0x00008000L,
-        0x80100000L, 0x80008000L, 0x00000020L, 0x80108020L,
-        0x00108020L, 0x00000020L, 0x00008000L, 0x80000000L,
-        0x00008020L, 0x80108000L, 0x00100000L, 0x80000020L,
-        0x00100020L, 0x80008020L, 0x80000020L, 0x00100020L,
-        0x00108000L, 0x00000000L, 0x80008000L, 0x00008020L,
-        0x80000000L, 0x80100020L, 0x80108020L, 0x00108000L };
-
-static const unsigned long SP3[64] = {
-        0x00000208L, 0x08020200L, 0x00000000L, 0x08020008L,
-        0x08000200L, 0x00000000L, 0x00020208L, 0x08000200L,
-        0x00020008L, 0x08000008L, 0x08000008L, 0x00020000L,
-        0x08020208L, 0x00020008L, 0x08020000L, 0x00000208L,
-        0x08000000L, 0x00000008L, 0x08020200L, 0x00000200L,
-        0x00020200L, 0x08020000L, 0x08020008L, 0x00020208L,
-        0x08000208L, 0x00020200L, 0x00020000L, 0x08000208L,
-        0x00000008L, 0x08020208L, 0x00000200L, 0x08000000L,
-        0x08020200L, 0x08000000L, 0x00020008L, 0x00000208L,
-        0x00020000L, 0x08020200L, 0x08000200L, 0x00000000L,
-        0x00000200L, 0x00020008L, 0x08020208L, 0x08000200L,
-        0x08000008L, 0x00000200L, 0x00000000L, 0x08020008L,
-        0x08000208L, 0x00020000L, 0x08000000L, 0x08020208L,
-        0x00000008L, 0x00020208L, 0x00020200L, 0x08000008L,
-        0x08020000L, 0x08000208L, 0x00000208L, 0x08020000L,
-        0x00020208L, 0x00000008L, 0x08020008L, 0x00020200L };
-
-static const unsigned long SP4[64] = {
-        0x00802001L, 0x00002081L, 0x00002081L, 0x00000080L,
-        0x00802080L, 0x00800081L, 0x00800001L, 0x00002001L,
-        0x00000000L, 0x00802000L, 0x00802000L, 0x00802081L,
-        0x00000081L, 0x00000000L, 0x00800080L, 0x00800001L,
-        0x00000001L, 0x00002000L, 0x00800000L, 0x00802001L,
-        0x00000080L, 0x00800000L, 0x00002001L, 0x00002080L,
-        0x00800081L, 0x00000001L, 0x00002080L, 0x00800080L,
-        0x00002000L, 0x00802080L, 0x00802081L, 0x00000081L,
-        0x00800080L, 0x00800001L, 0x00802000L, 0x00802081L,
-        0x00000081L, 0x00000000L, 0x00000000L, 0x00802000L,
-        0x00002080L, 0x00800080L, 0x00800081L, 0x00000001L,
-        0x00802001L, 0x00002081L, 0x00002081L, 0x00000080L,
-        0x00802081L, 0x00000081L, 0x00000001L, 0x00002000L,
-        0x00800001L, 0x00002001L, 0x00802080L, 0x00800081L,
-        0x00002001L, 0x00002080L, 0x00800000L, 0x00802001L,
-        0x00000080L, 0x00800000L, 0x00002000L, 0x00802080L };
-
-static const unsigned long SP5[64] = {
-        0x00000100L, 0x02080100L, 0x02080000L, 0x42000100L,
-        0x00080000L, 0x00000100L, 0x40000000L, 0x02080000L,
-        0x40080100L, 0x00080000L, 0x02000100L, 0x40080100L,
-        0x42000100L, 0x42080000L, 0x00080100L, 0x40000000L,
-        0x02000000L, 0x40080000L, 0x40080000L, 0x00000000L,
-        0x40000100L, 0x42080100L, 0x42080100L, 0x02000100L,
-        0x42080000L, 0x40000100L, 0x00000000L, 0x42000000L,
-        0x02080100L, 0x02000000L, 0x42000000L, 0x00080100L,
-        0x00080000L, 0x42000100L, 0x00000100L, 0x02000000L,
-        0x40000000L, 0x02080000L, 0x42000100L, 0x40080100L,
-        0x02000100L, 0x40000000L, 0x42080000L, 0x02080100L,
-        0x40080100L, 0x00000100L, 0x02000000L, 0x42080000L,
-        0x42080100L, 0x00080100L, 0x42000000L, 0x42080100L,
-        0x02080000L, 0x00000000L, 0x40080000L, 0x42000000L,
-        0x00080100L, 0x02000100L, 0x40000100L, 0x00080000L,
-        0x00000000L, 0x40080000L, 0x02080100L, 0x40000100L };
-
-static const unsigned long SP6[64] = {
-        0x20000010L, 0x20400000L, 0x00004000L, 0x20404010L,
-        0x20400000L, 0x00000010L, 0x20404010L, 0x00400000L,
-        0x20004000L, 0x00404010L, 0x00400000L, 0x20000010L,
-        0x00400010L, 0x20004000L, 0x20000000L, 0x00004010L,
-        0x00000000L, 0x00400010L, 0x20004010L, 0x00004000L,
-        0x00404000L, 0x20004010L, 0x00000010L, 0x20400010L,
-        0x20400010L, 0x00000000L, 0x00404010L, 0x20404000L,
-        0x00004010L, 0x00404000L, 0x20404000L, 0x20000000L,
-        0x20004000L, 0x00000010L, 0x20400010L, 0x00404000L,
-        0x20404010L, 0x00400000L, 0x00004010L, 0x20000010L,
-        0x00400000L, 0x20004000L, 0x20000000L, 0x00004010L,
-        0x20000010L, 0x20404010L, 0x00404000L, 0x20400000L,
-        0x00404010L, 0x20404000L, 0x00000000L, 0x20400010L,
-        0x00000010L, 0x00004000L, 0x20400000L, 0x00404010L,
-        0x00004000L, 0x00400010L, 0x20004010L, 0x00000000L,
-        0x20404000L, 0x20000000L, 0x00400010L, 0x20004010L };
-
-static const unsigned long SP7[64] = {
-        0x00200000L, 0x04200002L, 0x04000802L, 0x00000000L,
-        0x00000800L, 0x04000802L, 0x00200802L, 0x04200800L,
-        0x04200802L, 0x00200000L, 0x00000000L, 0x04000002L,
-        0x00000002L, 0x04000000L, 0x04200002L, 0x00000802L,
-        0x04000800L, 0x00200802L, 0x00200002L, 0x04000800L,
-        0x04000002L, 0x04200000L, 0x04200800L, 0x00200002L,
-        0x04200000L, 0x00000800L, 0x00000802L, 0x04200802L,
-        0x00200800L, 0x00000002L, 0x04000000L, 0x00200800L,
-        0x04000000L, 0x00200800L, 0x00200000L, 0x04000802L,
-        0x04000802L, 0x04200002L, 0x04200002L, 0x00000002L,
-        0x00200002L, 0x04000000L, 0x04000800L, 0x00200000L,
-        0x04200800L, 0x00000802L, 0x00200802L, 0x04200800L,
-        0x00000802L, 0x04000002L, 0x04200802L, 0x04200000L,
-        0x00200800L, 0x00000000L, 0x00000002L, 0x04200802L,
-        0x00000000L, 0x00200802L, 0x04200000L, 0x00000800L,
-        0x04000002L, 0x04000800L, 0x00000800L, 0x00200002L };
-
-static const unsigned long SP8[64] = {
-        0x10001040L, 0x00001000L, 0x00040000L, 0x10041040L,
-        0x10000000L, 0x10001040L, 0x00000040L, 0x10000000L,
-        0x00040040L, 0x10040000L, 0x10041040L, 0x00041000L,
-        0x10041000L, 0x00041040L, 0x00001000L, 0x00000040L,
-        0x10040000L, 0x10000040L, 0x10001000L, 0x00001040L,
-        0x00041000L, 0x00040040L, 0x10040040L, 0x10041000L,
-        0x00001040L, 0x00000000L, 0x00000000L, 0x10040040L,
-        0x10000040L, 0x10001000L, 0x00041040L, 0x00040000L,
-        0x00041040L, 0x00040000L, 0x10041000L, 0x00001000L,
-        0x00000040L, 0x10040040L, 0x00001000L, 0x00041040L,
-        0x10001000L, 0x00000040L, 0x10000040L, 0x10040000L,
-        0x10040040L, 0x10000000L, 0x00040000L, 0x10001040L,
-        0x00000000L, 0x10041040L, 0x00040040L, 0x10000040L,
-        0x10040000L, 0x10001000L, 0x10001040L, 0x00000000L,
-        0x10041040L, 0x00041000L, 0x00041000L, 0x00001040L,
-        0x00001040L, 0x00040040L, 0x10000000L, 0x10041000L };
-
-static void desfunc(register unsigned long *block, register unsigned long *keys)
-{
-        register unsigned long fval, work, right, leftt;
-        register int round;
-
-        leftt = block[0];
-        right = block[1];
-        work = ((leftt >> 4) ^ right) & 0x0f0f0f0fL;
-        right ^= work;
-        leftt ^= (work << 4);
-        work = ((leftt >> 16) ^ right) & 0x0000ffffL;
-        right ^= work;
-        leftt ^= (work << 16);
-        work = ((right >> 2) ^ leftt) & 0x33333333L;
-        leftt ^= work;
-        right ^= (work << 2);
-        work = ((right >> 8) ^ leftt) & 0x00ff00ffL;
-        leftt ^= work;
-        right ^= (work << 8);
-        right = ((right << 1) | ((right >> 31) & 1L)) & 0xffffffffL;
-        work = (leftt ^ right) & 0xaaaaaaaaL;
-        leftt ^= work;
-        right ^= work;
-        leftt = ((leftt << 1) | ((leftt >> 31) & 1L)) & 0xffffffffL;
-
-        for( round = 0; round < 8; round++ ) {
-                work  = (right << 28) | (right >> 4);
-                work ^= *keys++;
-                fval  = SP7[ work               & 0x3fL];
-                fval |= SP5[(work >>  8) & 0x3fL];
-                fval |= SP3[(work >> 16) & 0x3fL];
-                fval |= SP1[(work >> 24) & 0x3fL];
-                work  = right ^ *keys++;
-                fval |= SP8[ work               & 0x3fL];
-                fval |= SP6[(work >>  8) & 0x3fL];
-                fval |= SP4[(work >> 16) & 0x3fL];
-                fval |= SP2[(work >> 24) & 0x3fL];
-                leftt ^= fval;
-                work  = (leftt << 28) | (leftt >> 4);
-                work ^= *keys++;
-                fval  = SP7[ work               & 0x3fL];
-                fval |= SP5[(work >>  8) & 0x3fL];
-                fval |= SP3[(work >> 16) & 0x3fL];
-                fval |= SP1[(work >> 24) & 0x3fL];
-                work  = leftt ^ *keys++;
-                fval |= SP8[ work               & 0x3fL];
-                fval |= SP6[(work >>  8) & 0x3fL];
-                fval |= SP4[(work >> 16) & 0x3fL];
-                fval |= SP2[(work >> 24) & 0x3fL];
-                right ^= fval;
-                }
-
-        right = (right << 31) | (right >> 1);
-        work = (leftt ^ right) & 0xaaaaaaaaL;
-        leftt ^= work;
-        right ^= work;
-        leftt = (leftt << 31) | (leftt >> 1);
-        work = ((leftt >> 8) ^ right) & 0x00ff00ffL;
-        right ^= work;
-        leftt ^= (work << 8);
-        work = ((leftt >> 2) ^ right) & 0x33333333L;
-        right ^= work;
-        leftt ^= (work << 2);
-        work = ((right >> 16) ^ leftt) & 0x0000ffffL;
-        leftt ^= work;
-        right ^= (work << 16);
-        work = ((right >> 4) ^ leftt) & 0x0f0f0f0fL;
-        leftt ^= work;
-        right ^= (work << 4);
-        *block++ = right;
-        *block = leftt;
-        return;
-        }
-
-/* Validation sets:
- *
- * Single-length key, single-length plaintext -
- * Key   : 0123 4567 89ab cdef
- * Plain  : 0123 4567 89ab cde7
- * Cipher : c957 4425 6a5e d31d
- *
- * Double-length key, single-length plaintext -
- * Key   : 0123 4567 89ab cdef fedc ba98 7654 3210
- * Plain  : 0123 4567 89ab cde7
- * Cipher : 7f1d 0a77 826b 8aff
- *
- * Double-length key, double-length plaintext -
- * Key   : 0123 4567 89ab cdef fedc ba98 7654 3210
- * Plain  : 0123 4567 89ab cdef 0123 4567 89ab cdff
- * Cipher : 27a0 8440 406a df60 278f 47cf 42d6 15d7
- *
- * Triple-length key, single-length plaintext -
- * Key   : 0123 4567 89ab cdef fedc ba98 7654 3210 89ab cdef 0123 4567
- * Plain  : 0123 4567 89ab cde7
- * Cipher : de0b 7c06 ae5e 0ed5
- *
- * Triple-length key, double-length plaintext -
- * Key   : 0123 4567 89ab cdef fedc ba98 7654 3210 89ab cdef 0123 4567
- * Plain  : 0123 4567 89ab cdef 0123 4567 89ab cdff
- * Cipher : ad0d 1b30 ac17 cf07 0ed1 1c63 81e4 4de5
- *
- * d3des V5.0a rwo 9208.07 18:44 Graven Imagery
- **********************************************************************/
index cf34c694af6f57d92cd22f5f77ee01313d3666aa..3ebea392920d50da04109c0aa4a538891e861d21 100644 (file)
@@ -13,7 +13,6 @@
 #include "qemu/osdep.h"
 #include "qemu/iov.h"
 #include "qemu/sockets.h"
-#include "qemu-common.h"
 #include "qapi/error.h"
 #include "crypto/hash.h"
 #include "crypto/hmac.h"
@@ -88,8 +87,8 @@ qcrypto_afalg_hash_hmac_ctx_new(QCryptoHashAlgorithm alg,
 
     /* HMAC needs setkey */
     if (is_hmac) {
-        if (qemu_setsockopt(afalg->tfmfd, SOL_ALG, ALG_SET_KEY,
-                            key, nkey) != 0) {
+        if (setsockopt(afalg->tfmfd, SOL_ALG, ALG_SET_KEY,
+                       key, nkey) != 0) {
             error_setg_errno(errp, errno, "Set hmac key failed");
             qcrypto_afalg_comm_free(afalg);
             return NULL;
diff --git a/crypto/hash-gnutls.c b/crypto/hash-gnutls.c
new file mode 100644 (file)
index 0000000..17911ac
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * QEMU Crypto hash algorithms
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <gnutls/crypto.h>
+#include "qapi/error.h"
+#include "crypto/hash.h"
+#include "hashpriv.h"
+
+
+static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
+    [QCRYPTO_HASH_ALG_MD5] = GNUTLS_DIG_MD5,
+    [QCRYPTO_HASH_ALG_SHA1] = GNUTLS_DIG_SHA1,
+    [QCRYPTO_HASH_ALG_SHA224] = GNUTLS_DIG_SHA224,
+    [QCRYPTO_HASH_ALG_SHA256] = GNUTLS_DIG_SHA256,
+    [QCRYPTO_HASH_ALG_SHA384] = GNUTLS_DIG_SHA384,
+    [QCRYPTO_HASH_ALG_SHA512] = GNUTLS_DIG_SHA512,
+    [QCRYPTO_HASH_ALG_RIPEMD160] = GNUTLS_DIG_RMD160,
+};
+
+gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
+{
+    size_t i;
+    const gnutls_digest_algorithm_t *algs;
+    if (alg >= G_N_ELEMENTS(qcrypto_hash_alg_map) ||
+        qcrypto_hash_alg_map[alg] == GNUTLS_DIG_UNKNOWN) {
+        return false;
+    }
+    algs = gnutls_digest_list();
+    for (i = 0; algs[i] != GNUTLS_DIG_UNKNOWN; i++) {
+        if (algs[i] == qcrypto_hash_alg_map[alg]) {
+            return true;
+        }
+    }
+    return false;
+}
+
+
+static int
+qcrypto_gnutls_hash_bytesv(QCryptoHashAlgorithm alg,
+                           const struct iovec *iov,
+                           size_t niov,
+                           uint8_t **result,
+                           size_t *resultlen,
+                           Error **errp)
+{
+    int i, ret;
+    gnutls_hash_hd_t hash;
+
+    if (!qcrypto_hash_supports(alg)) {
+        error_setg(errp,
+                   "Unknown hash algorithm %d",
+                   alg);
+        return -1;
+    }
+
+    ret = gnutls_hash_get_len(qcrypto_hash_alg_map[alg]);
+    if (*resultlen == 0) {
+        *resultlen = ret;
+        *result = g_new0(uint8_t, *resultlen);
+    } else if (*resultlen != ret) {
+        error_setg(errp,
+                   "Result buffer size %zu is smaller than hash %d",
+                   *resultlen, ret);
+        return -1;
+    }
+
+    ret = gnutls_hash_init(&hash, qcrypto_hash_alg_map[alg]);
+    if (ret < 0) {
+        error_setg(errp,
+                   "Unable to initialize hash algorithm: %s",
+                   gnutls_strerror(ret));
+        return -1;
+    }
+
+    for (i = 0; i < niov; i++) {
+        gnutls_hash(hash, iov[i].iov_base, iov[i].iov_len);
+    }
+
+    gnutls_hash_deinit(hash, *result);
+    return 0;
+}
+
+
+QCryptoHashDriver qcrypto_hash_lib_driver = {
+    .hash_bytesv = qcrypto_gnutls_hash_bytesv,
+};
index 2a6ee7c7d53fc8693a4881ce2edb66a4540199d6..1ca1a4106283f2bc3f11d362433aac69d9737c81 100644 (file)
 #include <nettle/sha.h>
 #include <nettle/ripemd160.h>
 
-#if CONFIG_NETTLE_VERSION_MAJOR < 3
-typedef unsigned int     hash_length_t;
-#else
-typedef size_t       hash_length_t;
-#endif
-
 typedef void (*qcrypto_nettle_init)(void *ctx);
 typedef void (*qcrypto_nettle_write)(void *ctx,
-                                     hash_length_t len,
+                                     size_t len,
                                      const uint8_t *buf);
 typedef void (*qcrypto_nettle_result)(void *ctx,
-                                      hash_length_t len,
+                                      size_t len,
                                       uint8_t *buf);
 
 union qcrypto_hash_ctx {
diff --git a/crypto/hmac-gnutls.c b/crypto/hmac-gnutls.c
new file mode 100644 (file)
index 0000000..24db383
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * QEMU Crypto hmac algorithms
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * Derived from hmac-gcrypt.c:
+ *
+ *   Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <gnutls/crypto.h>
+
+#include "qapi/error.h"
+#include "crypto/hmac.h"
+#include "hmacpriv.h"
+
+static int qcrypto_hmac_alg_map[QCRYPTO_HASH_ALG__MAX] = {
+    [QCRYPTO_HASH_ALG_MD5] = GNUTLS_MAC_MD5,
+    [QCRYPTO_HASH_ALG_SHA1] = GNUTLS_MAC_SHA1,
+    [QCRYPTO_HASH_ALG_SHA224] = GNUTLS_MAC_SHA224,
+    [QCRYPTO_HASH_ALG_SHA256] = GNUTLS_MAC_SHA256,
+    [QCRYPTO_HASH_ALG_SHA384] = GNUTLS_MAC_SHA384,
+    [QCRYPTO_HASH_ALG_SHA512] = GNUTLS_MAC_SHA512,
+    [QCRYPTO_HASH_ALG_RIPEMD160] = GNUTLS_MAC_RMD160,
+};
+
+typedef struct QCryptoHmacGnutls QCryptoHmacGnutls;
+struct QCryptoHmacGnutls {
+    gnutls_hmac_hd_t handle;
+};
+
+bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg)
+{
+    size_t i;
+    const gnutls_digest_algorithm_t *algs;
+    if (alg >= G_N_ELEMENTS(qcrypto_hmac_alg_map) ||
+        qcrypto_hmac_alg_map[alg] == GNUTLS_DIG_UNKNOWN) {
+        return false;
+    }
+    algs = gnutls_digest_list();
+    for (i = 0; algs[i] != GNUTLS_DIG_UNKNOWN; i++) {
+        if (algs[i] == qcrypto_hmac_alg_map[alg]) {
+            return true;
+        }
+    }
+    return false;
+}
+
+void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                           const uint8_t *key, size_t nkey,
+                           Error **errp)
+{
+    QCryptoHmacGnutls *ctx;
+    int err;
+
+    if (!qcrypto_hmac_supports(alg)) {
+        error_setg(errp, "Unsupported hmac algorithm %s",
+                   QCryptoHashAlgorithm_str(alg));
+        return NULL;
+    }
+
+    ctx = g_new0(QCryptoHmacGnutls, 1);
+
+    err = gnutls_hmac_init(&ctx->handle,
+                           qcrypto_hmac_alg_map[alg],
+                           (const void *)key, nkey);
+    if (err != 0) {
+        error_setg(errp, "Cannot initialize hmac: %s",
+                   gnutls_strerror(err));
+        goto error;
+    }
+
+    return ctx;
+
+error:
+    g_free(ctx);
+    return NULL;
+}
+
+static void
+qcrypto_gnutls_hmac_ctx_free(QCryptoHmac *hmac)
+{
+    QCryptoHmacGnutls *ctx;
+
+    ctx = hmac->opaque;
+    gnutls_hmac_deinit(ctx->handle, NULL);
+
+    g_free(ctx);
+}
+
+static int
+qcrypto_gnutls_hmac_bytesv(QCryptoHmac *hmac,
+                           const struct iovec *iov,
+                           size_t niov,
+                           uint8_t **result,
+                           size_t *resultlen,
+                           Error **errp)
+{
+    QCryptoHmacGnutls *ctx;
+    uint32_t ret;
+    int i;
+
+    ctx = hmac->opaque;
+
+    for (i = 0; i < niov; i++) {
+        gnutls_hmac(ctx->handle, iov[i].iov_base, iov[i].iov_len);
+    }
+
+    ret = gnutls_hmac_get_len(qcrypto_hmac_alg_map[hmac->alg]);
+    if (ret <= 0) {
+        error_setg(errp, "Unable to get hmac length: %s",
+                   gnutls_strerror(ret));
+        return -1;
+    }
+
+    if (*resultlen == 0) {
+        *resultlen = ret;
+        *result = g_new0(uint8_t, *resultlen);
+    } else if (*resultlen != ret) {
+        error_setg(errp, "Result buffer size %zu is smaller than hmac %d",
+                   *resultlen, ret);
+        return -1;
+    }
+
+    gnutls_hmac_output(ctx->handle, *result);
+
+    return 0;
+}
+
+QCryptoHmacDriver qcrypto_hmac_lib_driver = {
+    .hmac_bytesv = qcrypto_gnutls_hmac_bytesv,
+    .hmac_free = qcrypto_gnutls_hmac_ctx_free,
+};
index 1152b741fdcbadace957d4ecf7df7a93cae9328c..1ad6c4f253069223b6a479bca25b25e858030918 100644 (file)
 #include "hmacpriv.h"
 #include <nettle/hmac.h>
 
-#if CONFIG_NETTLE_VERSION_MAJOR < 3
-typedef unsigned int hmac_length_t;
-#else
-typedef size_t hmac_length_t;
-#endif
-
 typedef void (*qcrypto_nettle_hmac_setkey)(void *ctx,
-                                           hmac_length_t key_length,
+                                           size_t key_length,
                                            const uint8_t *key);
 
 typedef void (*qcrypto_nettle_hmac_update)(void *ctx,
-                                           hmac_length_t length,
+                                           size_t length,
                                            const uint8_t *data);
 
 typedef void (*qcrypto_nettle_hmac_digest)(void *ctx,
-                                           hmac_length_t length,
+                                           size_t length,
                                            uint8_t *digest);
 
 typedef struct QCryptoHmacNettle QCryptoHmacNettle;
index 4387ca2587f956289c6b45843872621a9a4c3587..62dfe8257afd3d3c6926cb0d3e6482784cef2bc9 100644 (file)
@@ -28,19 +28,18 @@ struct QCryptoHmacDriver {
     void (*hmac_free)(QCryptoHmac *hmac);
 };
 
-extern void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
-                                  const uint8_t *key, size_t nkey,
-                                  Error **errp);
+void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                           const uint8_t *key, size_t nkey,
+                           Error **errp);
 extern QCryptoHmacDriver qcrypto_hmac_lib_driver;
 
 #ifdef CONFIG_AF_ALG
 
 #include "afalgpriv.h"
 
-extern QCryptoAFAlg *
-qcrypto_afalg_hmac_ctx_new(QCryptoHashAlgorithm alg,
-                           const uint8_t *key, size_t nkey,
-                           Error **errp);
+QCryptoAFAlg *qcrypto_afalg_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                                         const uint8_t *key, size_t nkey,
+                                         Error **errp);
 extern QCryptoHmacDriver qcrypto_hmac_afalg_driver;
 
 #endif
index ea233b9192ab070a208c8786c931f5b7036d4376..fb7f1bff1053526a8b7c1f097ada5951f907ab8a 100644 (file)
 #include "crypto/random.h"
 
 /* #define DEBUG_GNUTLS */
-
-/*
- * We need to init gcrypt threading if
- *
- *   - gcrypt < 1.6.0
- *
- */
-
-#if (defined(CONFIG_GCRYPT) &&                  \
-     (GCRYPT_VERSION_NUMBER < 0x010600))
-#define QCRYPTO_INIT_GCRYPT_THREADS
-#else
-#undef QCRYPTO_INIT_GCRYPT_THREADS
-#endif
-
 #ifdef DEBUG_GNUTLS
 static void qcrypto_gnutls_log(int level, const char *str)
 {
@@ -57,55 +42,8 @@ static void qcrypto_gnutls_log(int level, const char *str)
 }
 #endif
 
-#ifdef QCRYPTO_INIT_GCRYPT_THREADS
-static int qcrypto_gcrypt_mutex_init(void **priv)
-{                                                                             \
-    QemuMutex *lock = NULL;
-    lock = g_new0(QemuMutex, 1);
-    qemu_mutex_init(lock);
-    *priv = lock;
-    return 0;
-}
-
-static int qcrypto_gcrypt_mutex_destroy(void **priv)
-{
-    QemuMutex *lock = *priv;
-    qemu_mutex_destroy(lock);
-    g_free(lock);
-    return 0;
-}
-
-static int qcrypto_gcrypt_mutex_lock(void **priv)
-{
-    QemuMutex *lock = *priv;
-    qemu_mutex_lock(lock);
-    return 0;
-}
-
-static int qcrypto_gcrypt_mutex_unlock(void **priv)
-{
-    QemuMutex *lock = *priv;
-    qemu_mutex_unlock(lock);
-    return 0;
-}
-
-static struct gcry_thread_cbs qcrypto_gcrypt_thread_impl = {
-    (GCRY_THREAD_OPTION_PTHREAD | (GCRY_THREAD_OPTION_VERSION << 8)),
-    NULL,
-    qcrypto_gcrypt_mutex_init,
-    qcrypto_gcrypt_mutex_destroy,
-    qcrypto_gcrypt_mutex_lock,
-    qcrypto_gcrypt_mutex_unlock,
-    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
-};
-#endif /* QCRYPTO_INIT_GCRYPT */
-
 int qcrypto_init(Error **errp)
 {
-#ifdef QCRYPTO_INIT_GCRYPT_THREADS
-    gcry_control(GCRYCTL_SET_THREAD_CBS, &qcrypto_gcrypt_thread_impl);
-#endif /* QCRYPTO_INIT_GCRYPT_THREADS */
-
 #ifdef CONFIG_GNUTLS
     int ret;
     ret = gnutls_global_init();
index 43db898809a02a6ecca49e236f66a408c2f647a5..ebae6acd04055873f96fd2c4aeff331baaf90770 100644 (file)
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef QCRYPTO_IVGEN_PLAIN_H__
-#define QCRYPTO_IVGEN_PLAIN_H__
+#ifndef QCRYPTO_IVGEN_PLAIN_H
+#define QCRYPTO_IVGEN_PLAIN_H
 
 #include "ivgenpriv.h"
 
 extern struct QCryptoIVGenDriver qcrypto_ivgen_plain;
 
-#endif /* QCRYPTO_IVGEN_PLAIN_H__ */
+#endif /* QCRYPTO_IVGEN_PLAIN_H */
index 7f37b5d33546902208b20cf32d7e85371d3ccbf6..c46f9c22a7feec92346f99e32c8cde02fd7444c1 100644 (file)
@@ -1,11 +1,12 @@
 crypto_ss.add(genh)
 crypto_ss.add(files(
   'afsplit.c',
+  'akcipher.c',
   'block-luks.c',
   'block-qcow.c',
   'block.c',
   'cipher.c',
-  'desrfb.c',
+  'der.c',
   'hash.c',
   'hmac.c',
   'ivgen-essiv.c',
@@ -20,54 +21,51 @@ crypto_ss.add(files(
   'tlscredspsk.c',
   'tlscredsx509.c',
   'tlssession.c',
+  'rsakey.c',
 ))
 
-if 'CONFIG_NETTLE' in config_host
-  crypto_ss.add(files('hash-nettle.c', 'hmac-nettle.c', 'pbkdf-nettle.c'))
-elif 'CONFIG_GCRYPT' in config_host
-  crypto_ss.add(files('hash-gcrypt.c', 'pbkdf-gcrypt.c'))
-  if 'CONFIG_GCRYPT_HMAC' in config_host
-    crypto_ss.add(files('hmac-gcrypt.c'))
-  else
-    crypto_ss.add(files('hmac-glib.c'))
+if nettle.found()
+  crypto_ss.add(nettle, files('hash-nettle.c', 'hmac-nettle.c', 'pbkdf-nettle.c'))
+  if hogweed.found()
+    crypto_ss.add(gmp, hogweed)
   endif
+  if xts == 'private'
+    crypto_ss.add(files('xts.c'))
+  endif
+elif gcrypt.found()
+  crypto_ss.add(gcrypt, files('hash-gcrypt.c', 'hmac-gcrypt.c', 'pbkdf-gcrypt.c'))
+elif gnutls_crypto.found()
+  crypto_ss.add(gnutls, files('hash-gnutls.c', 'hmac-gnutls.c', 'pbkdf-gnutls.c'))
 else
   crypto_ss.add(files('hash-glib.c', 'hmac-glib.c', 'pbkdf-stub.c'))
 endif
 
-crypto_ss.add(when: 'CONFIG_SECRET_KEYRING', if_true: files('secret_keyring.c'))
-crypto_ss.add(when: 'CONFIG_QEMU_PRIVATE_XTS', if_true: files('xts.c'))
-crypto_ss.add(when: 'CONFIG_AF_ALG', if_true: files('afalg.c', 'cipher-afalg.c', 'hash-afalg.c'))
-crypto_ss.add(when: 'CONFIG_GNUTLS', if_true: files('tls-cipher-suites.c'))
-
-if 'CONFIG_NETTLE' in config_host
-  crypto_ss.add(nettle)
-elif 'CONFIG_GCRYPT' in config_host
-  crypto_ss.add(gcrypt)
+if have_keyring
+  crypto_ss.add(files('secret_keyring.c'))
 endif
-
-if 'CONFIG_GNUTLS' in config_host
-  crypto_ss.add(gnutls)
+if have_afalg
+  crypto_ss.add(if_true: files('afalg.c', 'cipher-afalg.c', 'hash-afalg.c'))
 endif
 
+system_ss.add(when: gnutls, if_true: files('tls-cipher-suites.c'))
 
-util_ss.add(files('aes.c'))
-util_ss.add(files('init.c'))
+util_ss.add(files(
+  'aes.c',
+  'clmul.c',
+  'init.c',
+  'sm4.c',
+))
+if gnutls.found()
+  util_ss.add(gnutls)
+endif
 
-if 'CONFIG_GCRYPT' in config_host
-  util_ss.add(files('random-gcrypt.c'))
-elif 'CONFIG_GNUTLS' in config_host
-  util_ss.add(files('random-gnutls.c'))
-elif 'CONFIG_RNG_NONE' in config_host
+if gcrypt.found()
+  util_ss.add(gcrypt, files('random-gcrypt.c'))
+elif gnutls.found()
+  util_ss.add(gnutls, files('random-gnutls.c'))
+elif get_option('rng_none')
   util_ss.add(files('random-none.c'))
 else
   util_ss.add(files('random-platform.c'))
 endif
 
-if 'CONFIG_GCRYPT' in config_host
-  util_ss.add(gcrypt)
-endif
-
-if 'CONFIG_GNUTLS' in config_host
-  util_ss.add(gnutls)
-endif
diff --git a/crypto/pbkdf-gnutls.c b/crypto/pbkdf-gnutls.c
new file mode 100644 (file)
index 0000000..2dfbbd3
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * QEMU Crypto PBKDF support (Password-Based Key Derivation Function)
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <gnutls/crypto.h>
+#include "qapi/error.h"
+#include "crypto/pbkdf.h"
+
+bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash)
+{
+    switch (hash) {
+    case QCRYPTO_HASH_ALG_MD5:
+    case QCRYPTO_HASH_ALG_SHA1:
+    case QCRYPTO_HASH_ALG_SHA224:
+    case QCRYPTO_HASH_ALG_SHA256:
+    case QCRYPTO_HASH_ALG_SHA384:
+    case QCRYPTO_HASH_ALG_SHA512:
+    case QCRYPTO_HASH_ALG_RIPEMD160:
+        return true;
+    default:
+        return false;
+    }
+}
+
+int qcrypto_pbkdf2(QCryptoHashAlgorithm hash,
+                   const uint8_t *key, size_t nkey,
+                   const uint8_t *salt, size_t nsalt,
+                   uint64_t iterations,
+                   uint8_t *out, size_t nout,
+                   Error **errp)
+{
+    static const int hash_map[QCRYPTO_HASH_ALG__MAX] = {
+        [QCRYPTO_HASH_ALG_MD5] = GNUTLS_DIG_MD5,
+        [QCRYPTO_HASH_ALG_SHA1] = GNUTLS_DIG_SHA1,
+        [QCRYPTO_HASH_ALG_SHA224] = GNUTLS_DIG_SHA224,
+        [QCRYPTO_HASH_ALG_SHA256] = GNUTLS_DIG_SHA256,
+        [QCRYPTO_HASH_ALG_SHA384] = GNUTLS_DIG_SHA384,
+        [QCRYPTO_HASH_ALG_SHA512] = GNUTLS_DIG_SHA512,
+        [QCRYPTO_HASH_ALG_RIPEMD160] = GNUTLS_DIG_RMD160,
+    };
+    int ret;
+    const gnutls_datum_t gkey = { (unsigned char *)key, nkey };
+    const gnutls_datum_t gsalt = { (unsigned char *)salt, nsalt };
+
+    if (iterations > ULONG_MAX) {
+        error_setg_errno(errp, ERANGE,
+                         "PBKDF iterations %llu must be less than %lu",
+                         (long long unsigned)iterations, ULONG_MAX);
+        return -1;
+    }
+
+    if (hash >= G_N_ELEMENTS(hash_map) ||
+        hash_map[hash] == GNUTLS_DIG_UNKNOWN) {
+        error_setg_errno(errp, ENOSYS,
+                         "PBKDF does not support hash algorithm %s",
+                         QCryptoHashAlgorithm_str(hash));
+        return -1;
+    }
+
+    ret = gnutls_pbkdf2(hash_map[hash],
+                        &gkey,
+                        &gsalt,
+                        iterations,
+                        out,
+                        nout);
+    if (ret != 0) {
+        error_setg(errp, "Cannot derive password: %s",
+                   gnutls_strerror(ret));
+        return -1;
+    }
+
+    return 0;
+}
index 3775ddc6c5a2c50d744023e50e16b4eb87def975..8d198c152cf2ee06ea340d6b9adea4cbfd001136 100644 (file)
 #ifndef _WIN32
 #include <sys/resource.h>
 #endif
+#ifdef CONFIG_DARWIN
+#include <mach/mach_init.h>
+#include <mach/thread_act.h>
+#include <mach/mach_port.h>
+#endif
 
 
 static int qcrypto_pbkdf2_get_thread_cpu(unsigned long long *val_ms,
@@ -45,6 +50,24 @@ static int qcrypto_pbkdf2_get_thread_cpu(unsigned long long *val_ms,
     /* QuadPart is units of 100ns and we want ms as unit */
     *val_ms = thread_time.QuadPart / 10000ll;
     return 0;
+#elif defined(CONFIG_DARWIN)
+    mach_port_t thread;
+    kern_return_t kr;
+    mach_msg_type_number_t count;
+    thread_basic_info_data_t info;
+
+    thread = mach_thread_self();
+    count = THREAD_BASIC_INFO_COUNT;
+    kr = thread_info(thread, THREAD_BASIC_INFO, (thread_info_t)&info, &count);
+    mach_port_deallocate(mach_task_self(), thread);
+    if (kr != KERN_SUCCESS || (info.flags & TH_FLAGS_IDLE) != 0) {
+        error_setg_errno(errp, errno, "Unable to get thread CPU usage");
+        return -1;
+    }
+
+    *val_ms = ((info.user_time.seconds * 1000ll) +
+               (info.user_time.microseconds / 1000));
+    return 0;
 #elif defined(RUSAGE_THREAD)
     struct rusage ru;
     if (getrusage(RUSAGE_THREAD, &ru) < 0) {
diff --git a/crypto/rsakey-builtin.c.inc b/crypto/rsakey-builtin.c.inc
new file mode 100644 (file)
index 0000000..46cc7af
--- /dev/null
@@ -0,0 +1,196 @@
+/*
+ * QEMU Crypto akcipher algorithms
+ *
+ * Copyright (c) 2022 Bytedance
+ * Author: lei he <helei.sig11@bytedance.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "der.h"
+#include "rsakey.h"
+
+static int extract_mpi(void *ctx, const uint8_t *value,
+                       size_t vlen, Error **errp)
+{
+    QCryptoAkCipherMPI *mpi = (QCryptoAkCipherMPI *)ctx;
+    if (vlen == 0) {
+        error_setg(errp, "Empty mpi field");
+        return -1;
+    }
+    mpi->data = g_memdup2(value, vlen);
+    mpi->len = vlen;
+    return 0;
+}
+
+static int extract_version(void *ctx, const uint8_t *value,
+                           size_t vlen, Error **errp)
+{
+    uint8_t *version = (uint8_t *)ctx;
+    if (vlen != 1 || *value > 1) {
+        error_setg(errp, "Invalid rsakey version");
+        return -1;
+    }
+    *version = *value;
+    return 0;
+}
+
+static int extract_seq_content(void *ctx, const uint8_t *value,
+                               size_t vlen, Error **errp)
+{
+    const uint8_t **content = (const uint8_t **)ctx;
+    if (vlen == 0) {
+        error_setg(errp, "Empty sequence");
+        return -1;
+    }
+    *content = value;
+    return 0;
+}
+
+/**
+ *
+ *        RsaPubKey ::= SEQUENCE {
+ *             n           INTEGER
+ *             e           INTEGER
+ *         }
+ */
+static QCryptoAkCipherRSAKey *qcrypto_builtin_rsa_public_key_parse(
+    const uint8_t *key, size_t keylen, Error **errp)
+{
+    QCryptoAkCipherRSAKey *rsa = g_new0(QCryptoAkCipherRSAKey, 1);
+    const uint8_t *seq;
+    size_t seq_length;
+    int decode_ret;
+
+    decode_ret = qcrypto_der_decode_seq(&key, &keylen,
+                                        extract_seq_content, &seq, errp);
+    if (decode_ret < 0 || keylen != 0) {
+        goto error;
+    }
+    seq_length = decode_ret;
+
+    if (qcrypto_der_decode_int(&seq, &seq_length, extract_mpi,
+                               &rsa->n, errp) < 0 ||
+        qcrypto_der_decode_int(&seq, &seq_length, extract_mpi,
+                               &rsa->e, errp) < 0) {
+        goto error;
+    }
+    if (seq_length != 0) {
+        error_setg(errp, "Invalid RSA public key");
+        goto error;
+    }
+
+    return rsa;
+
+error:
+    qcrypto_akcipher_rsakey_free(rsa);
+    return NULL;
+}
+
+/**
+ *        RsaPrivKey ::= SEQUENCE {
+ *             version     INTEGER
+ *             n           INTEGER
+ *             e           INTEGER
+ *             d           INTEGER
+ *             p           INTEGER
+ *             q           INTEGER
+ *             dp          INTEGER
+ *             dq          INTEGER
+ *             u           INTEGER
+ *       otherPrimeInfos   OtherPrimeInfos OPTIONAL
+ *         }
+ */
+static QCryptoAkCipherRSAKey *qcrypto_builtin_rsa_private_key_parse(
+    const uint8_t *key, size_t keylen, Error **errp)
+{
+    QCryptoAkCipherRSAKey *rsa = g_new0(QCryptoAkCipherRSAKey, 1);
+    uint8_t version;
+    const uint8_t *seq;
+    int decode_ret;
+    size_t seq_length;
+
+    decode_ret = qcrypto_der_decode_seq(&key, &keylen, extract_seq_content,
+                                        &seq, errp);
+    if (decode_ret < 0 || keylen != 0) {
+        goto error;
+    }
+    seq_length = decode_ret;
+
+    decode_ret = qcrypto_der_decode_int(&seq, &seq_length, extract_version,
+                                        &version, errp);
+
+    if (qcrypto_der_decode_int(&seq, &seq_length, extract_mpi,
+                               &rsa->n, errp) < 0 ||
+        qcrypto_der_decode_int(&seq, &seq_length, extract_mpi,
+                               &rsa->e, errp) < 0 ||
+        qcrypto_der_decode_int(&seq, &seq_length, extract_mpi,
+                               &rsa->d, errp) < 0 ||
+        qcrypto_der_decode_int(&seq, &seq_length, extract_mpi, &rsa->p,
+                               errp) < 0 ||
+        qcrypto_der_decode_int(&seq, &seq_length, extract_mpi, &rsa->q,
+                               errp) < 0 ||
+        qcrypto_der_decode_int(&seq, &seq_length, extract_mpi, &rsa->dp,
+                               errp) < 0 ||
+        qcrypto_der_decode_int(&seq, &seq_length, extract_mpi, &rsa->dq,
+                               errp) < 0 ||
+        qcrypto_der_decode_int(&seq, &seq_length, extract_mpi, &rsa->u,
+                               errp) < 0) {
+        goto error;
+    }
+
+    /**
+     * According to the standard, otherPrimeInfos must be present for version 1.
+     * There is no strict verification here, this is to be compatible with
+     * the unit test of the kernel. TODO: remove this until linux kernel's
+     * unit-test is fixed.
+     */
+    if (version == 1 && seq_length != 0) {
+        if (qcrypto_der_decode_seq(&seq, &seq_length, NULL, NULL, errp) < 0) {
+            goto error;
+        }
+        if (seq_length != 0) {
+            goto error;
+        }
+        return rsa;
+    }
+    if (seq_length != 0) {
+        error_setg(errp, "Invalid RSA private key");
+        goto error;
+    }
+
+    return rsa;
+
+error:
+    qcrypto_akcipher_rsakey_free(rsa);
+    return NULL;
+}
+
+QCryptoAkCipherRSAKey *qcrypto_akcipher_rsakey_parse(
+    QCryptoAkCipherKeyType type, const uint8_t *key,
+    size_t keylen, Error **errp)
+{
+    switch (type) {
+    case QCRYPTO_AKCIPHER_KEY_TYPE_PRIVATE:
+        return qcrypto_builtin_rsa_private_key_parse(key, keylen, errp);
+
+    case QCRYPTO_AKCIPHER_KEY_TYPE_PUBLIC:
+        return qcrypto_builtin_rsa_public_key_parse(key, keylen, errp);
+
+    default:
+        error_setg(errp, "Unknown key type: %d", type);
+        return NULL;
+    }
+}
diff --git a/crypto/rsakey-nettle.c.inc b/crypto/rsakey-nettle.c.inc
new file mode 100644 (file)
index 0000000..cc49872
--- /dev/null
@@ -0,0 +1,158 @@
+/*
+ * QEMU Crypto akcipher algorithms
+ *
+ * Copyright (c) 2022 Bytedance
+ * Author: lei he <helei.sig11@bytedance.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <nettle/asn1.h>
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "rsakey.h"
+
+static bool DumpMPI(struct asn1_der_iterator *i, QCryptoAkCipherMPI *mpi)
+{
+    mpi->data = g_memdup2(i->data, i->length);
+    mpi->len = i->length;
+    return true;
+}
+
+static bool GetMPI(struct asn1_der_iterator *i, QCryptoAkCipherMPI *mpi)
+{
+    if (asn1_der_iterator_next(i) != ASN1_ITERATOR_PRIMITIVE ||
+        i->type != ASN1_INTEGER) {
+        return false;
+    }
+    return DumpMPI(i, mpi);
+}
+
+/**
+ *        RsaPrivKey ::= SEQUENCE {
+ *             version     INTEGER
+ *             n           INTEGER
+ *             e           INTEGER
+ *             d           INTEGER
+ *             p           INTEGER
+ *             q           INTEGER
+ *             dp          INTEGER
+ *             dq          INTEGER
+ *             u           INTEGER
+ *       otherPrimeInfos   OtherPrimeInfos OPTIONAL
+ *         }
+ */
+static QCryptoAkCipherRSAKey *qcrypto_nettle_rsa_private_key_parse(
+    const uint8_t *key, size_t keylen, Error **errp)
+{
+    QCryptoAkCipherRSAKey *rsa = g_new0(QCryptoAkCipherRSAKey, 1);
+    struct asn1_der_iterator i;
+    uint32_t version;
+    int tag;
+
+    /* Parse entire struct */
+    if (asn1_der_iterator_first(&i, keylen, key) != ASN1_ITERATOR_CONSTRUCTED ||
+        i.type != ASN1_SEQUENCE ||
+        asn1_der_decode_constructed_last(&i) != ASN1_ITERATOR_PRIMITIVE ||
+        i.type != ASN1_INTEGER ||
+        !asn1_der_get_uint32(&i, &version) ||
+        version > 1 ||
+        !GetMPI(&i, &rsa->n) ||
+        !GetMPI(&i, &rsa->e) ||
+        !GetMPI(&i, &rsa->d) ||
+        !GetMPI(&i, &rsa->p) ||
+        !GetMPI(&i, &rsa->q) ||
+        !GetMPI(&i, &rsa->dp) ||
+        !GetMPI(&i, &rsa->dq) ||
+        !GetMPI(&i, &rsa->u)) {
+        goto error;
+    }
+
+    if (version == 1) {
+        tag = asn1_der_iterator_next(&i);
+        /**
+         * According to the standard otherPrimeInfos must be present for
+         * version 1. There is no strict verification here, this is to be
+         * compatible with the unit test of the kernel. TODO: remove this
+         * until linux-kernel's unit-test is fixed;
+         */
+        if (tag == ASN1_ITERATOR_END) {
+            return rsa;
+        }
+        if (tag != ASN1_ITERATOR_CONSTRUCTED ||
+            i.type != ASN1_SEQUENCE) {
+            goto error;
+        }
+    }
+
+    if (asn1_der_iterator_next(&i) != ASN1_ITERATOR_END) {
+        goto error;
+    }
+
+    return rsa;
+
+error:
+    error_setg(errp, "Failed to parse RSA private key");
+    qcrypto_akcipher_rsakey_free(rsa);
+    return NULL;
+}
+
+/**
+ *        RsaPubKey ::= SEQUENCE {
+ *             n           INTEGER
+ *             e           INTEGER
+ *         }
+ */
+static QCryptoAkCipherRSAKey *qcrypto_nettle_rsa_public_key_parse(
+    const uint8_t *key, size_t keylen, Error **errp)
+{
+
+    QCryptoAkCipherRSAKey *rsa = g_new0(QCryptoAkCipherRSAKey, 1);
+    struct asn1_der_iterator i;
+
+    if (asn1_der_iterator_first(&i, keylen, key) != ASN1_ITERATOR_CONSTRUCTED ||
+        i.type != ASN1_SEQUENCE ||
+        asn1_der_decode_constructed_last(&i) != ASN1_ITERATOR_PRIMITIVE ||
+        !DumpMPI(&i, &rsa->n) ||
+        !GetMPI(&i, &rsa->e) ||
+        asn1_der_iterator_next(&i) != ASN1_ITERATOR_END) {
+        goto error;
+    }
+
+    return rsa;
+
+error:
+    error_setg(errp, "Failed to parse RSA public key");
+    qcrypto_akcipher_rsakey_free(rsa);
+    return NULL;
+}
+
+QCryptoAkCipherRSAKey *qcrypto_akcipher_rsakey_parse(
+    QCryptoAkCipherKeyType type, const uint8_t *key,
+    size_t keylen, Error **errp)
+{
+    switch (type) {
+    case QCRYPTO_AKCIPHER_KEY_TYPE_PRIVATE:
+        return qcrypto_nettle_rsa_private_key_parse(key, keylen, errp);
+
+    case QCRYPTO_AKCIPHER_KEY_TYPE_PUBLIC:
+        return qcrypto_nettle_rsa_public_key_parse(key, keylen, errp);
+
+    default:
+        error_setg(errp, "Unknown key type: %d", type);
+        return NULL;
+    }
+}
diff --git a/crypto/rsakey.c b/crypto/rsakey.c
new file mode 100644 (file)
index 0000000..7d6f273
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * QEMU Crypto RSA key parser
+ *
+ * Copyright (c) 2022 Bytedance
+ * Author: lei he <helei.sig11@bytedance.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "der.h"
+#include "rsakey.h"
+
+void qcrypto_akcipher_rsakey_free(QCryptoAkCipherRSAKey *rsa_key)
+{
+    if (!rsa_key) {
+        return;
+    }
+    g_free(rsa_key->n.data);
+    g_free(rsa_key->e.data);
+    g_free(rsa_key->d.data);
+    g_free(rsa_key->p.data);
+    g_free(rsa_key->q.data);
+    g_free(rsa_key->dp.data);
+    g_free(rsa_key->dq.data);
+    g_free(rsa_key->u.data);
+    g_free(rsa_key);
+}
+
+/**
+ * PKCS#8 private key info for RSA
+ *
+ * PrivateKeyInfo ::= SEQUENCE {
+ * version         INTEGER,
+ * privateKeyAlgorithm PrivateKeyAlgorithmIdentifier,
+ * privateKey      OCTET STRING,
+ * attributes      [0] IMPLICIT Attributes OPTIONAL
+ * }
+ */
+void qcrypto_akcipher_rsakey_export_p8info(const uint8_t *key,
+                                           size_t keylen,
+                                           uint8_t **dst,
+                                           size_t *dlen)
+{
+    QCryptoEncodeContext *ctx = qcrypto_der_encode_ctx_new();
+    uint8_t version = 0;
+
+    qcrypto_der_encode_seq_begin(ctx);
+
+    /* version */
+    qcrypto_der_encode_int(ctx, &version, sizeof(version));
+
+    /* algorithm identifier */
+    qcrypto_der_encode_seq_begin(ctx);
+    qcrypto_der_encode_oid(ctx, (uint8_t *)QCRYPTO_OID_rsaEncryption,
+                           sizeof(QCRYPTO_OID_rsaEncryption) - 1);
+    qcrypto_der_encode_null(ctx);
+    qcrypto_der_encode_seq_end(ctx);
+
+    /* RSA private key */
+    qcrypto_der_encode_octet_str(ctx, key, keylen);
+
+    qcrypto_der_encode_seq_end(ctx);
+
+    *dlen = qcrypto_der_encode_ctx_buffer_len(ctx);
+    *dst = g_malloc(*dlen);
+    qcrypto_der_encode_ctx_flush_and_free(ctx, *dst);
+}
+
+#if defined(CONFIG_NETTLE) && defined(CONFIG_HOGWEED)
+#include "rsakey-nettle.c.inc"
+#else
+#include "rsakey-builtin.c.inc"
+#endif
diff --git a/crypto/rsakey.h b/crypto/rsakey.h
new file mode 100644 (file)
index 0000000..00b3ecc
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * QEMU Crypto RSA key parser
+ *
+ * Copyright (c) 2022 Bytedance
+ * Author: lei he <helei.sig11@bytedance.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QCRYPTO_RSAKEY_H
+#define QCRYPTO_RSAKEY_H
+
+#include "qemu/host-utils.h"
+#include "crypto/akcipher.h"
+
+typedef struct QCryptoAkCipherRSAKey QCryptoAkCipherRSAKey;
+typedef struct QCryptoAkCipherMPI QCryptoAkCipherMPI;
+
+/**
+ * Multiple precious integer, encoded as two' complement,
+ * copied directly from DER encoded ASN.1 structures.
+ */
+struct QCryptoAkCipherMPI {
+    uint8_t *data;
+    size_t len;
+};
+
+/* See rfc2437: https://datatracker.ietf.org/doc/html/rfc2437 */
+struct QCryptoAkCipherRSAKey {
+    /* The modulus */
+    QCryptoAkCipherMPI n;
+    /* The public exponent */
+    QCryptoAkCipherMPI e;
+    /* The private exponent */
+    QCryptoAkCipherMPI d;
+    /* The first factor */
+    QCryptoAkCipherMPI p;
+    /* The second factor */
+    QCryptoAkCipherMPI q;
+    /* The first factor's exponent */
+    QCryptoAkCipherMPI dp;
+    /* The second factor's exponent */
+    QCryptoAkCipherMPI dq;
+    /* The CRT coefficient */
+    QCryptoAkCipherMPI u;
+};
+
+/**
+ * Parse DER encoded ASN.1 RSA keys, expected ASN.1 schemas:
+ *        RsaPrivKey ::= SEQUENCE {
+ *             version     INTEGER
+ *             n           INTEGER
+ *             e           INTEGER
+ *             d           INTEGER
+ *             p           INTEGER
+ *             q           INTEGER
+ *             dp          INTEGER
+ *             dq          INTEGER
+ *             u           INTEGER
+ *       otherPrimeInfos   OtherPrimeInfos OPTIONAL
+ *         }
+ *
+ *        RsaPubKey ::= SEQUENCE {
+ *             n           INTEGER
+ *             e           INTEGER
+ *         }
+ *
+ * Returns: On success QCryptoAkCipherRSAKey is returned, otherwise returns NULL
+ */
+QCryptoAkCipherRSAKey *qcrypto_akcipher_rsakey_parse(
+    QCryptoAkCipherKeyType type,
+    const uint8_t *key, size_t keylen, Error **errp);
+
+/**
+ * qcrypto_akcipher_rsakey_export_as_p8info:
+ *
+ * Export RSA private key to PKCS#8 private key info.
+ */
+void qcrypto_akcipher_rsakey_export_p8info(const uint8_t *key,
+                                           size_t keylen,
+                                           uint8_t **dst,
+                                           size_t *dlen);
+
+void qcrypto_akcipher_rsakey_free(QCryptoAkCipherRSAKey *key);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoAkCipherRSAKey,
+                              qcrypto_akcipher_rsakey_free);
+
+#endif
index 281cb81f0f082bcfc3883ab0c6fc03e94cf2875d..44eaff16f60809771ed27ed26a48cf0f555a9895 100644 (file)
@@ -107,13 +107,6 @@ qcrypto_secret_prop_get_file(Object *obj,
 }
 
 
-static void
-qcrypto_secret_complete(UserCreatable *uc, Error **errp)
-{
-    object_property_set_bool(OBJECT(uc), "loaded", true, errp);
-}
-
-
 static void
 qcrypto_secret_finalize(Object *obj)
 {
@@ -129,9 +122,6 @@ qcrypto_secret_class_init(ObjectClass *oc, void *data)
     QCryptoSecretCommonClass *sic = QCRYPTO_SECRET_COMMON_CLASS(oc);
     sic->load_data = qcrypto_secret_load_data;
 
-    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
-    ucc->complete = qcrypto_secret_complete;
-
     object_class_property_add_str(oc, "data",
                                   qcrypto_secret_prop_get_data,
                                   qcrypto_secret_prop_set_data);
@@ -148,10 +138,6 @@ static const TypeInfo qcrypto_secret_info = {
     .instance_finalize = qcrypto_secret_finalize,
     .class_size = sizeof(QCryptoSecretClass),
     .class_init = qcrypto_secret_class_init,
-    .interfaces = (InterfaceInfo[]) {
-        { TYPE_USER_CREATABLE },
-        { }
-    }
 };
 
 
index b03d530867ebc8e00918193870d7615b6bfd7c80..3441c44ca89e220e0a05e5b042b6ddf7016d619b 100644 (file)
@@ -138,36 +138,44 @@ static void qcrypto_secret_decode(const uint8_t *input,
 
 
 static void
-qcrypto_secret_prop_set_loaded(Object *obj,
-                               bool value,
-                               Error **errp)
+qcrypto_secret_complete(UserCreatable *uc, Error **errp)
 {
-    QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(obj);
+    QCryptoSecretCommon *secret = QCRYPTO_SECRET_COMMON(uc);
     QCryptoSecretCommonClass *sec_class
-                                = QCRYPTO_SECRET_COMMON_GET_CLASS(obj);
-
-    if (value) {
-        Error *local_err = NULL;
-        uint8_t *input = NULL;
-        size_t inputlen = 0;
-        uint8_t *output = NULL;
-        size_t outputlen = 0;
-
-        if (sec_class->load_data) {
-            sec_class->load_data(secret, &input, &inputlen, &local_err);
-            if (local_err) {
-                error_propagate(errp, local_err);
-                return;
-            }
-        } else {
-            error_setg(errp, "%s provides no 'load_data' method'",
-                             object_get_typename(obj));
+                                = QCRYPTO_SECRET_COMMON_GET_CLASS(uc);
+
+    Error *local_err = NULL;
+    uint8_t *input = NULL;
+    size_t inputlen = 0;
+    uint8_t *output = NULL;
+    size_t outputlen = 0;
+
+    if (sec_class->load_data) {
+        sec_class->load_data(secret, &input, &inputlen, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
             return;
         }
+    } else {
+        error_setg(errp, "%s provides no 'load_data' method'",
+                         object_get_typename(OBJECT(uc)));
+        return;
+    }
 
-        if (secret->keyid) {
-            qcrypto_secret_decrypt(secret, input, inputlen,
-                                   &output, &outputlen, &local_err);
+    if (secret->keyid) {
+        qcrypto_secret_decrypt(secret, input, inputlen,
+                               &output, &outputlen, &local_err);
+        g_free(input);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+        input = output;
+        inputlen = outputlen;
+    } else {
+        if (secret->format == QCRYPTO_SECRET_FORMAT_BASE64) {
+            qcrypto_secret_decode(input, inputlen,
+                                  &output, &outputlen, &local_err);
             g_free(input);
             if (local_err) {
                 error_propagate(errp, local_err);
@@ -175,26 +183,11 @@ qcrypto_secret_prop_set_loaded(Object *obj,
             }
             input = output;
             inputlen = outputlen;
-        } else {
-            if (secret->format == QCRYPTO_SECRET_FORMAT_BASE64) {
-                qcrypto_secret_decode(input, inputlen,
-                                      &output, &outputlen, &local_err);
-                g_free(input);
-                if (local_err) {
-                    error_propagate(errp, local_err);
-                    return;
-                }
-                input = output;
-                inputlen = outputlen;
-            }
         }
-
-        secret->rawdata = input;
-        secret->rawlen = inputlen;
-    } else {
-        g_free(secret->rawdata);
-        secret->rawlen = 0;
     }
+
+    secret->rawdata = input;
+    secret->rawlen = inputlen;
 }
 
 
@@ -281,9 +274,13 @@ qcrypto_secret_finalize(Object *obj)
 static void
 qcrypto_secret_class_init(ObjectClass *oc, void *data)
 {
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    ucc->complete = qcrypto_secret_complete;
+
     object_class_property_add_bool(oc, "loaded",
                                    qcrypto_secret_prop_get_loaded,
-                                   qcrypto_secret_prop_set_loaded);
+                                   NULL);
     object_class_property_add_enum(oc, "format",
                                    "QCryptoSecretFormat",
                                    &QCryptoSecretFormat_lookup,
@@ -390,6 +387,10 @@ static const TypeInfo qcrypto_secret_info = {
     .class_size = sizeof(QCryptoSecretCommonClass),
     .class_init = qcrypto_secret_class_init,
     .abstract = true,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
 };
 
 
index 10d8bc48a0f551d320a7101d92d5fe7ac2e759f0..1b7edec84a0a1d79ec8f0eaac2ccd0e9e998a8e0 100644 (file)
@@ -102,22 +102,12 @@ qcrypto_secret_prop_get_key(Object *obj, Visitor *v,
 }
 
 
-static void
-qcrypto_secret_keyring_complete(UserCreatable *uc, Error **errp)
-{
-    object_property_set_bool(OBJECT(uc), "loaded", true, errp);
-}
-
-
 static void
 qcrypto_secret_keyring_class_init(ObjectClass *oc, void *data)
 {
     QCryptoSecretCommonClass *sic = QCRYPTO_SECRET_COMMON_CLASS(oc);
     sic->load_data = qcrypto_secret_keyring_load_data;
 
-    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
-    ucc->complete = qcrypto_secret_keyring_complete;
-
     object_class_property_add(oc, "serial", "int32_t",
                                   qcrypto_secret_prop_get_key,
                                   qcrypto_secret_prop_set_key,
@@ -130,10 +120,6 @@ static const TypeInfo qcrypto_secret_info = {
     .name = TYPE_QCRYPTO_SECRET_KEYRING,
     .instance_size = sizeof(QCryptoSecretKeyring),
     .class_init = qcrypto_secret_keyring_class_init,
-    .interfaces = (InterfaceInfo[]) {
-        { TYPE_USER_CREATABLE },
-        { }
-    }
 };
 
 
diff --git a/crypto/sm4.c b/crypto/sm4.c
new file mode 100644 (file)
index 0000000..2987306
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * QEMU crypto sm4 support
+ *
+ * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "crypto/sm4.h"
+
+uint8_t const sm4_sbox[] = {
+    0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+    0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+    0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+    0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+    0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+    0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
+    0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+    0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
+    0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+    0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
+    0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+    0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
+    0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+    0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
+    0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+    0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
+    0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+    0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
+    0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+    0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
+    0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+    0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
+    0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+    0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
+    0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+    0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
+    0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+    0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
+    0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+    0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
+    0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+    0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
+};
+
+uint32_t const sm4_ck[] = {
+    0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+    0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+    0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+    0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+    0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+    0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+    0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+    0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
index 55fb5f7c19d107a152039a0d90f22474c4a11567..d0df4badc0fe1730dc9d58dcc1f0aad3c0de6563 100644 (file)
 #include "crypto/tlscreds.h"
 #include "crypto/tls-cipher-suites.h"
 #include "hw/nvram/fw_cfg.h"
+#include "tlscredspriv.h"
 #include "trace.h"
 
+struct QCryptoTLSCipherSuites {
+    /* <private> */
+    QCryptoTLSCreds parent_obj;
+    /* <public> */
+};
+
 /*
  * IANA registered TLS ciphers:
  * https://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-4
@@ -45,7 +52,6 @@ GByteArray *qcrypto_tls_cipher_suites_get_data(QCryptoTLSCipherSuites *obj,
     byte_array = g_byte_array_new();
 
     for (i = 0;; i++) {
-        int ret;
         unsigned idx;
         const char *name;
         IANA_TLS_CIPHER cipher;
index b68735f06fe35a75605533ae1f81392c9559f7de..084ce0d51ae75449f7c37d628457c2a21f907064 100644 (file)
@@ -20,6 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "qapi-types-crypto.h"
 #include "qemu/module.h"
 #include "tlscredspriv.h"
 #include "trace.h"
@@ -259,6 +260,17 @@ qcrypto_tls_creds_finalize(Object *obj)
     g_free(creds->priority);
 }
 
+bool qcrypto_tls_creds_check_endpoint(QCryptoTLSCreds *creds,
+                                      QCryptoTLSCredsEndpoint endpoint,
+                                      Error **errp)
+{
+    if (creds->endpoint != endpoint) {
+        error_setg(errp, "Expected TLS credentials for a %s endpoint",
+                   QCryptoTLSCredsEndpoint_str(endpoint));
+        return false;
+    }
+    return true;
+}
 
 static const TypeInfo qcrypto_tls_creds_info = {
     .parent = TYPE_OBJECT,
index 30275b684772ef6436a7d5c01ab25b6145c4bf29..c0d23a0ef3c07a4ef3ba676ac94b8143e3aeb0b1 100644 (file)
@@ -29,6 +29,8 @@
 
 #ifdef CONFIG_GNUTLS
 
+#include <gnutls/gnutls.h>
+
 
 static int
 qcrypto_tls_creds_anon_load(QCryptoTLSCredsAnon *creds,
@@ -117,17 +119,11 @@ qcrypto_tls_creds_anon_unload(QCryptoTLSCredsAnon *creds G_GNUC_UNUSED)
 
 
 static void
-qcrypto_tls_creds_anon_prop_set_loaded(Object *obj,
-                                       bool value,
-                                       Error **errp)
+qcrypto_tls_creds_anon_complete(UserCreatable *uc, Error **errp)
 {
-    QCryptoTLSCredsAnon *creds = QCRYPTO_TLS_CREDS_ANON(obj);
+    QCryptoTLSCredsAnon *creds = QCRYPTO_TLS_CREDS_ANON(uc);
 
-    if (value) {
-        qcrypto_tls_creds_anon_load(creds, errp);
-    } else {
-        qcrypto_tls_creds_anon_unload(creds);
-    }
+    qcrypto_tls_creds_anon_load(creds, errp);
 }
 
 
@@ -162,13 +158,6 @@ qcrypto_tls_creds_anon_prop_get_loaded(Object *obj G_GNUC_UNUSED,
 #endif /* ! CONFIG_GNUTLS */
 
 
-static void
-qcrypto_tls_creds_anon_complete(UserCreatable *uc, Error **errp)
-{
-    object_property_set_bool(OBJECT(uc), "loaded", true, errp);
-}
-
-
 static void
 qcrypto_tls_creds_anon_finalize(Object *obj)
 {
@@ -187,7 +176,7 @@ qcrypto_tls_creds_anon_class_init(ObjectClass *oc, void *data)
 
     object_class_property_add_bool(oc, "loaded",
                                    qcrypto_tls_creds_anon_prop_get_loaded,
-                                   qcrypto_tls_creds_anon_prop_set_loaded);
+                                   NULL);
 }
 
 
index 39f1a91c4135aa41b8a5f4f4e4fa3a1ddb5794e9..df9815a28633e78bbd5d0d789be301bc6005d2da 100644 (file)
 
 #include "crypto/tlscreds.h"
 
+#ifdef CONFIG_GNUTLS
+#include <gnutls/gnutls.h>
+#endif
+
+struct QCryptoTLSCreds {
+    Object parent_obj;
+    char *dir;
+    QCryptoTLSCredsEndpoint endpoint;
+#ifdef CONFIG_GNUTLS
+    gnutls_dh_params_t dh_params;
+#endif
+    bool verifyPeer;
+    char *priority;
+};
+
+struct QCryptoTLSCredsAnon {
+    QCryptoTLSCreds parent_obj;
+#ifdef CONFIG_GNUTLS
+    union {
+        gnutls_anon_server_credentials_t server;
+        gnutls_anon_client_credentials_t client;
+    } data;
+#endif
+};
+
+struct QCryptoTLSCredsPSK {
+    QCryptoTLSCreds parent_obj;
+    char *username;
+#ifdef CONFIG_GNUTLS
+    union {
+        gnutls_psk_server_credentials_t server;
+        gnutls_psk_client_credentials_t client;
+    } data;
+#endif
+};
+
+struct QCryptoTLSCredsX509 {
+    QCryptoTLSCreds parent_obj;
+#ifdef CONFIG_GNUTLS
+    gnutls_certificate_credentials_t data;
+#endif
+    bool sanityCheck;
+    char *passwordid;
+};
+
 #ifdef CONFIG_GNUTLS
 
 int qcrypto_tls_creds_get_path(QCryptoTLSCreds *creds,
index e26807b899e9f2723be43b0144a5e81d887dad1e..546cad1c5a49d25d20de84ca4613681e612c5810 100644 (file)
@@ -29,6 +29,8 @@
 
 #ifdef CONFIG_GNUTLS
 
+#include <gnutls/gnutls.h>
+
 static int
 lookup_key(const char *pskfile, const char *username, gnutls_datum_t *key,
            Error **errp)
@@ -107,7 +109,12 @@ qcrypto_tls_creds_psk_load(QCryptoTLSCredsPSK *creds,
             goto cleanup;
         }
 
-        gnutls_psk_set_server_credentials_file(creds->data.server, pskfile);
+        ret = gnutls_psk_set_server_credentials_file(creds->data.server, pskfile);
+        if (ret < 0) {
+            error_setg(errp, "Cannot set PSK server credentials: %s",
+                       gnutls_strerror(ret));
+            goto cleanup;
+        }
         gnutls_psk_set_server_dh_params(creds->data.server,
                                         creds->parent_obj.dh_params);
     } else {
@@ -133,8 +140,13 @@ qcrypto_tls_creds_psk_load(QCryptoTLSCredsPSK *creds,
             goto cleanup;
         }
 
-        gnutls_psk_set_client_credentials(creds->data.client,
-                                          username, &key, GNUTLS_PSK_KEY_HEX);
+        ret = gnutls_psk_set_client_credentials(creds->data.client,
+                                                username, &key, GNUTLS_PSK_KEY_HEX);
+        if (ret < 0) {
+            error_setg(errp, "Cannot set PSK client credentials: %s",
+                       gnutls_strerror(ret));
+            goto cleanup;
+        }
     }
 
     rv = 0;
@@ -186,17 +198,11 @@ qcrypto_tls_creds_psk_unload(QCryptoTLSCredsPSK *creds G_GNUC_UNUSED)
 
 
 static void
-qcrypto_tls_creds_psk_prop_set_loaded(Object *obj,
-                                      bool value,
-                                      Error **errp)
+qcrypto_tls_creds_psk_complete(UserCreatable *uc, Error **errp)
 {
-    QCryptoTLSCredsPSK *creds = QCRYPTO_TLS_CREDS_PSK(obj);
+    QCryptoTLSCredsPSK *creds = QCRYPTO_TLS_CREDS_PSK(uc);
 
-    if (value) {
-        qcrypto_tls_creds_psk_load(creds, errp);
-    } else {
-        qcrypto_tls_creds_psk_unload(creds);
-    }
+    qcrypto_tls_creds_psk_load(creds, errp);
 }
 
 
@@ -231,13 +237,6 @@ qcrypto_tls_creds_psk_prop_get_loaded(Object *obj G_GNUC_UNUSED,
 #endif /* ! CONFIG_GNUTLS */
 
 
-static void
-qcrypto_tls_creds_psk_complete(UserCreatable *uc, Error **errp)
-{
-    object_property_set_bool(OBJECT(uc), "loaded", true, errp);
-}
-
-
 static void
 qcrypto_tls_creds_psk_finalize(Object *obj)
 {
@@ -275,7 +274,7 @@ qcrypto_tls_creds_psk_class_init(ObjectClass *oc, void *data)
 
     object_class_property_add_bool(oc, "loaded",
                                    qcrypto_tls_creds_psk_prop_get_loaded,
-                                   qcrypto_tls_creds_psk_prop_set_loaded);
+                                   NULL);
     object_class_property_add_str(oc, "username",
                                   qcrypto_tls_creds_psk_prop_get_username,
                                   qcrypto_tls_creds_psk_prop_set_username);
index dd7267ccdb0c5e686f53552a5732ca6c7d3ae0d6..d14313925dd8bbdc97c5347a9d45f2c1e0eb52ba 100644 (file)
@@ -30,6 +30,7 @@
 
 #ifdef CONFIG_GNUTLS
 
+#include <gnutls/gnutls.h>
 #include <gnutls/x509.h>
 
 
@@ -143,7 +144,7 @@ qcrypto_tls_creds_check_cert_key_usage(QCryptoTLSCredsX509 *creds,
     if (status < 0) {
         if (status == GNUTLS_E_REQUESTED_DATA_NOT_AVAILABLE) {
             usage = isCA ? GNUTLS_KEY_KEY_CERT_SIGN :
-                GNUTLS_KEY_DIGITAL_SIGNATURE|GNUTLS_KEY_KEY_ENCIPHERMENT;
+                GNUTLS_KEY_DIGITAL_SIGNATURE | GNUTLS_KEY_KEY_ENCIPHERMENT;
         } else {
             error_setg(errp,
                        "Unable to query certificate %s key usage: %s",
@@ -354,11 +355,9 @@ qcrypto_tls_creds_check_cert_pair(gnutls_x509_crt_t cert,
             reason = "The certificate has been revoked";
         }
 
-#ifndef GNUTLS_1_0_COMPAT
         if (status & GNUTLS_CERT_INSECURE_ALGORITHM) {
             reason = "The certificate uses an insecure algorithm";
         }
-#endif
 
         error_setg(errp,
                    "Our own certificate %s failed validation against %s: %s",
@@ -688,17 +687,11 @@ qcrypto_tls_creds_x509_unload(QCryptoTLSCredsX509 *creds G_GNUC_UNUSED)
 
 
 static void
-qcrypto_tls_creds_x509_prop_set_loaded(Object *obj,
-                                       bool value,
-                                       Error **errp)
+qcrypto_tls_creds_x509_complete(UserCreatable *uc, Error **errp)
 {
-    QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(obj);
+    QCryptoTLSCredsX509 *creds = QCRYPTO_TLS_CREDS_X509(uc);
 
-    if (value) {
-        qcrypto_tls_creds_x509_load(creds, errp);
-    } else {
-        qcrypto_tls_creds_x509_unload(creds);
-    }
+    qcrypto_tls_creds_x509_load(creds, errp);
 }
 
 
@@ -771,13 +764,51 @@ qcrypto_tls_creds_x509_prop_get_sanity(Object *obj,
 }
 
 
-static void
-qcrypto_tls_creds_x509_complete(UserCreatable *uc, Error **errp)
+#ifdef CONFIG_GNUTLS
+
+
+static bool
+qcrypto_tls_creds_x509_reload(QCryptoTLSCreds *creds, Error **errp)
+{
+    QCryptoTLSCredsX509 *x509_creds = QCRYPTO_TLS_CREDS_X509(creds);
+    Error *local_err = NULL;
+    gnutls_certificate_credentials_t creds_data = x509_creds->data;
+    gnutls_dh_params_t creds_dh_params = x509_creds->parent_obj.dh_params;
+
+    x509_creds->data = NULL;
+    x509_creds->parent_obj.dh_params = NULL;
+    qcrypto_tls_creds_x509_load(x509_creds, &local_err);
+    if (local_err) {
+        qcrypto_tls_creds_x509_unload(x509_creds);
+        x509_creds->data = creds_data;
+        x509_creds->parent_obj.dh_params = creds_dh_params;
+        error_propagate(errp, local_err);
+        return false;
+    }
+
+    if (creds_data) {
+        gnutls_certificate_free_credentials(creds_data);
+    }
+    if (creds_dh_params) {
+        gnutls_dh_params_deinit(creds_dh_params);
+    }
+    return true;
+}
+
+
+#else /* ! CONFIG_GNUTLS */
+
+
+static bool
+qcrypto_tls_creds_x509_reload(QCryptoTLSCreds *creds, Error **errp)
 {
-    object_property_set_bool(OBJECT(uc), "loaded", true, errp);
+    return false;
 }
 
 
+#endif /* ! CONFIG_GNUTLS */
+
+
 static void
 qcrypto_tls_creds_x509_init(Object *obj)
 {
@@ -801,12 +832,15 @@ static void
 qcrypto_tls_creds_x509_class_init(ObjectClass *oc, void *data)
 {
     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+    QCryptoTLSCredsClass *ctcc = QCRYPTO_TLS_CREDS_CLASS(oc);
+
+    ctcc->reload = qcrypto_tls_creds_x509_reload;
 
     ucc->complete = qcrypto_tls_creds_x509_complete;
 
     object_class_property_add_bool(oc, "loaded",
                                    qcrypto_tls_creds_x509_prop_get_loaded,
-                                   qcrypto_tls_creds_x509_prop_set_loaded);
+                                   NULL);
     object_class_property_add_bool(oc, "sanity-check",
                                    qcrypto_tls_creds_x509_prop_get_sanity,
                                    qcrypto_tls_creds_x509_prop_set_sanity);
index 33203e8ca711d6944b6f60fe7ccef7ae43a49afb..1e98f44e0d172fefa57e8d5f128b88f2f02eb5c7 100644 (file)
@@ -25,6 +25,7 @@
 #include "crypto/tlscredsx509.h"
 #include "qapi/error.h"
 #include "authz/base.h"
+#include "tlscredspriv.h"
 #include "trace.h"
 
 #ifdef CONFIG_GNUTLS
@@ -372,6 +373,12 @@ qcrypto_tls_session_check_certificate(QCryptoTLSSession *session,
                                session->hostname);
                     goto error;
                 }
+            } else {
+                if (session->creds->endpoint ==
+                    QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+                    error_setg(errp, "No hostname for certificate validation");
+                    goto error;
+                }
             }
         }
 
@@ -486,6 +493,13 @@ qcrypto_tls_session_read(QCryptoTLSSession *session,
 }
 
 
+size_t
+qcrypto_tls_session_check_pending(QCryptoTLSSession *session)
+{
+    return gnutls_record_check_pending(session->handle);
+}
+
+
 int
 qcrypto_tls_session_handshake(QCryptoTLSSession *session,
                               Error **errp)
@@ -608,6 +622,13 @@ qcrypto_tls_session_read(QCryptoTLSSession *sess,
 }
 
 
+size_t
+qcrypto_tls_session_check_pending(QCryptoTLSSession *session)
+{
+    return 0;
+}
+
+
 int
 qcrypto_tls_session_handshake(QCryptoTLSSession *sess,
                               Error **errp)
index 798b6067ab0f3a3be76f1a670da3deca00aa833b..bccd0bbf291a265516270dec586a1ece5cb5eaa3 100644 (file)
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # tlscreds.c
 qcrypto_tls_creds_load_dh(void *creds, const char *filename) "TLS creds load DH creds=%p filename=%s"
diff --git a/event-loop-base.c b/event-loop-base.c
new file mode 100644 (file)
index 0000000..d5be4dc
--- /dev/null
@@ -0,0 +1,140 @@
+/*
+ * QEMU event-loop base
+ *
+ * Copyright (C) 2022 Red Hat Inc
+ *
+ * Authors:
+ *  Stefan Hajnoczi <stefanha@redhat.com>
+ *  Nicolas Saenz Julienne <nsaenzju@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object_interfaces.h"
+#include "qapi/error.h"
+#include "block/thread-pool.h"
+#include "sysemu/event-loop-base.h"
+
+typedef struct {
+    const char *name;
+    ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */
+} EventLoopBaseParamInfo;
+
+static void event_loop_base_instance_init(Object *obj)
+{
+    EventLoopBase *base = EVENT_LOOP_BASE(obj);
+
+    base->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
+}
+
+static EventLoopBaseParamInfo aio_max_batch_info = {
+    "aio-max-batch", offsetof(EventLoopBase, aio_max_batch),
+};
+static EventLoopBaseParamInfo thread_pool_min_info = {
+    "thread-pool-min", offsetof(EventLoopBase, thread_pool_min),
+};
+static EventLoopBaseParamInfo thread_pool_max_info = {
+    "thread-pool-max", offsetof(EventLoopBase, thread_pool_max),
+};
+
+static void event_loop_base_get_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+    EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj);
+    EventLoopBaseParamInfo *info = opaque;
+    int64_t *field = (void *)event_loop_base + info->offset;
+
+    visit_type_int64(v, name, field, errp);
+}
+
+static void event_loop_base_set_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+    EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj);
+    EventLoopBase *base = EVENT_LOOP_BASE(obj);
+    EventLoopBaseParamInfo *info = opaque;
+    int64_t *field = (void *)base + info->offset;
+    int64_t value;
+
+    if (!visit_type_int64(v, name, &value, errp)) {
+        return;
+    }
+
+    if (value < 0) {
+        error_setg(errp, "%s value must be in range [0, %" PRId64 "]",
+                   info->name, INT64_MAX);
+        return;
+    }
+
+    *field = value;
+
+    if (bc->update_params) {
+        bc->update_params(base, errp);
+    }
+
+    return;
+}
+
+static void event_loop_base_complete(UserCreatable *uc, Error **errp)
+{
+    EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc);
+    EventLoopBase *base = EVENT_LOOP_BASE(uc);
+
+    if (bc->init) {
+        bc->init(base, errp);
+    }
+}
+
+static bool event_loop_base_can_be_deleted(UserCreatable *uc)
+{
+    EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc);
+    EventLoopBase *backend = EVENT_LOOP_BASE(uc);
+
+    if (bc->can_be_deleted) {
+        return bc->can_be_deleted(backend);
+    }
+
+    return true;
+}
+
+static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
+    ucc->complete = event_loop_base_complete;
+    ucc->can_be_deleted = event_loop_base_can_be_deleted;
+
+    object_class_property_add(klass, "aio-max-batch", "int",
+                              event_loop_base_get_param,
+                              event_loop_base_set_param,
+                              NULL, &aio_max_batch_info);
+    object_class_property_add(klass, "thread-pool-min", "int",
+                              event_loop_base_get_param,
+                              event_loop_base_set_param,
+                              NULL, &thread_pool_min_info);
+    object_class_property_add(klass, "thread-pool-max", "int",
+                              event_loop_base_get_param,
+                              event_loop_base_set_param,
+                              NULL, &thread_pool_max_info);
+}
+
+static const TypeInfo event_loop_base_info = {
+    .name = TYPE_EVENT_LOOP_BASE,
+    .parent = TYPE_OBJECT,
+    .instance_size = sizeof(EventLoopBase),
+    .instance_init = event_loop_base_instance_init,
+    .class_size = sizeof(EventLoopBaseClass),
+    .class_init = event_loop_base_class_init,
+    .abstract = true,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void register_types(void)
+{
+    type_register_static(&event_loop_base_info);
+}
+type_init(register_types);
diff --git a/host/include/aarch64/host/atomic128-cas.h b/host/include/aarch64/host/atomic128-cas.h
new file mode 100644 (file)
index 0000000..5863010
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Compare-and-swap for 128-bit atomic operations, AArch64 version.
+ *
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef AARCH64_ATOMIC128_CAS_H
+#define AARCH64_ATOMIC128_CAS_H
+
+/* Through gcc 10, aarch64 has no support for 128-bit atomics.  */
+#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
+#include "host/include/generic/host/atomic128-cas.h"
+#else
+static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
+    uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
+    uint64_t oldl, oldh;
+    uint32_t tmp;
+
+    asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
+        "cmp %[oldl], %[cmpl]\n\t"
+        "ccmp %[oldh], %[cmph], #0, eq\n\t"
+        "b.ne 1f\n\t"
+        "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
+        "cbnz %w[tmp], 0b\n"
+        "1:"
+        : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
+          [oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
+        : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
+          [newl] "r"(newl), [newh] "r"(newh)
+        : "memory", "cc");
+
+    return int128_make128(oldl, oldh);
+}
+
+# define CONFIG_CMPXCHG128 1
+# define HAVE_CMPXCHG128 1
+#endif
+
+#endif /* AARCH64_ATOMIC128_CAS_H */
diff --git a/host/include/aarch64/host/atomic128-ldst.h b/host/include/aarch64/host/atomic128-ldst.h
new file mode 100644 (file)
index 0000000..a08f62c
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Load/store for 128-bit atomic operations, AArch64 version.
+ *
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef AARCH64_ATOMIC128_LDST_H
+#define AARCH64_ATOMIC128_LDST_H
+
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+/*
+ * Through gcc 10, aarch64 has no support for 128-bit atomics.
+ * Through clang 16, without -march=armv8.4-a, __atomic_load_16
+ * is incorrectly expanded to a read-write operation.
+ *
+ * Anyway, this method allows runtime detection of FEAT_LSE2.
+ */
+
+#define HAVE_ATOMIC128_RO (cpuinfo & CPUINFO_LSE2)
+#define HAVE_ATOMIC128_RW 1
+
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
+{
+    uint64_t l, h;
+
+    tcg_debug_assert(HAVE_ATOMIC128_RO);
+    /* With FEAT_LSE2, 16-byte aligned LDP is atomic. */
+    asm("ldp %[l], %[h], %[mem]"
+        : [l] "=r"(l), [h] "=r"(h) : [mem] "m"(*ptr));
+
+    return int128_make128(l, h);
+}
+
+static inline Int128 atomic16_read_rw(Int128 *ptr)
+{
+    uint64_t l, h;
+    uint32_t tmp;
+
+    if (cpuinfo & CPUINFO_LSE2) {
+        /* With FEAT_LSE2, 16-byte aligned LDP is atomic. */
+        asm("ldp %[l], %[h], %[mem]"
+            : [l] "=r"(l), [h] "=r"(h) : [mem] "m"(*ptr));
+    } else {
+        /* The load must be paired with the store to guarantee not tearing.  */
+        asm("0: ldxp %[l], %[h], %[mem]\n\t"
+            "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
+            "cbnz %w[tmp], 0b"
+            : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), [l] "=&r"(l), [h] "=&r"(h));
+    }
+
+    return int128_make128(l, h);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    uint64_t l = int128_getlo(val), h = int128_gethi(val);
+    uint64_t t1, t2;
+
+    if (cpuinfo & CPUINFO_LSE2) {
+        /* With FEAT_LSE2, 16-byte aligned STP is atomic. */
+        asm("stp %[l], %[h], %[mem]"
+            : [mem] "=m"(*ptr) : [l] "r"(l), [h] "r"(h));
+    } else {
+        /* Load into temporaries to acquire the exclusive access lock.  */
+        asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
+            "stxp %w[t1], %[l], %[h], %[mem]\n\t"
+            "cbnz %w[t1], 0b"
+            : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
+            : [l] "r"(l), [h] "r"(h));
+    }
+}
+
+#endif /* AARCH64_ATOMIC128_LDST_H */
diff --git a/host/include/aarch64/host/cpuinfo.h b/host/include/aarch64/host/cpuinfo.h
new file mode 100644 (file)
index 0000000..fe67153
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Host specific cpu identification for AArch64.
+ */
+
+#ifndef HOST_CPUINFO_H
+#define HOST_CPUINFO_H
+
+#define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
+#define CPUINFO_LSE             (1u << 1)
+#define CPUINFO_LSE2            (1u << 2)
+#define CPUINFO_AES             (1u << 3)
+#define CPUINFO_PMULL           (1u << 4)
+#define CPUINFO_BTI             (1u << 5)
+
+/* Initialized with a constructor. */
+extern unsigned cpuinfo;
+
+/*
+ * We cannot rely on constructor ordering, so other constructors must
+ * use the function interface rather than the variable above.
+ */
+unsigned cpuinfo_init(void);
+
+#endif /* HOST_CPUINFO_H */
diff --git a/host/include/aarch64/host/crypto/aes-round.h b/host/include/aarch64/host/crypto/aes-round.h
new file mode 100644 (file)
index 0000000..8b5f88d
--- /dev/null
@@ -0,0 +1,205 @@
+/*
+ * AArch64 specific aes acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef AARCH64_HOST_CRYPTO_AES_ROUND_H
+#define AARCH64_HOST_CRYPTO_AES_ROUND_H
+
+#include "host/cpuinfo.h"
+#include <arm_neon.h>
+
+#ifdef __ARM_FEATURE_AES
+# define HAVE_AES_ACCEL  true
+#else
+# define HAVE_AES_ACCEL  likely(cpuinfo & CPUINFO_AES)
+#endif
+#if !defined(__ARM_FEATURE_AES) && defined(CONFIG_ARM_AES_BUILTIN)
+# define ATTR_AES_ACCEL  __attribute__((target("+crypto")))
+#else
+# define ATTR_AES_ACCEL
+#endif
+
+static inline uint8x16_t aes_accel_bswap(uint8x16_t x)
+{
+    return vqtbl1q_u8(x, (uint8x16_t){ 15, 14, 13, 12, 11, 10, 9, 8,
+                                        7,  6,  5,  4,  3,  2, 1, 0, });
+}
+
+#ifdef CONFIG_ARM_AES_BUILTIN
+# define aes_accel_aesd            vaesdq_u8
+# define aes_accel_aese            vaeseq_u8
+# define aes_accel_aesmc           vaesmcq_u8
+# define aes_accel_aesimc          vaesimcq_u8
+# define aes_accel_aesd_imc(S, K)  vaesimcq_u8(vaesdq_u8(S, K))
+# define aes_accel_aese_mc(S, K)   vaesmcq_u8(vaeseq_u8(S, K))
+#else
+static inline uint8x16_t aes_accel_aesd(uint8x16_t d, uint8x16_t k)
+{
+    asm(".arch_extension aes\n\t"
+        "aesd %0.16b, %1.16b" : "+w"(d) : "w"(k));
+    return d;
+}
+
+static inline uint8x16_t aes_accel_aese(uint8x16_t d, uint8x16_t k)
+{
+    asm(".arch_extension aes\n\t"
+        "aese %0.16b, %1.16b" : "+w"(d) : "w"(k));
+    return d;
+}
+
+static inline uint8x16_t aes_accel_aesmc(uint8x16_t d)
+{
+    asm(".arch_extension aes\n\t"
+        "aesmc %0.16b, %1.16b" : "=w"(d) : "w"(d));
+    return d;
+}
+
+static inline uint8x16_t aes_accel_aesimc(uint8x16_t d)
+{
+    asm(".arch_extension aes\n\t"
+        "aesimc %0.16b, %1.16b" : "=w"(d) : "w"(d));
+    return d;
+}
+
+/* Most CPUs fuse AESD+AESIMC in the execution pipeline. */
+static inline uint8x16_t aes_accel_aesd_imc(uint8x16_t d, uint8x16_t k)
+{
+    asm(".arch_extension aes\n\t"
+        "aesd %0.16b, %1.16b\n\t"
+        "aesimc %0.16b, %0.16b" : "+w"(d) : "w"(k));
+    return d;
+}
+
+/* Most CPUs fuse AESE+AESMC in the execution pipeline. */
+static inline uint8x16_t aes_accel_aese_mc(uint8x16_t d, uint8x16_t k)
+{
+    asm(".arch_extension aes\n\t"
+        "aese %0.16b, %1.16b\n\t"
+        "aesmc %0.16b, %0.16b" : "+w"(d) : "w"(k));
+    return d;
+}
+#endif /* CONFIG_ARM_AES_BUILTIN */
+
+static inline void ATTR_AES_ACCEL
+aesenc_MC_accel(AESState *ret, const AESState *st, bool be)
+{
+    uint8x16_t t = (uint8x16_t)st->v;
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        t = aes_accel_aesmc(t);
+        t = aes_accel_bswap(t);
+    } else {
+        t = aes_accel_aesmc(t);
+    }
+    ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesenc_SB_SR_AK_accel(AESState *ret, const AESState *st,
+                      const AESState *rk, bool be)
+{
+    uint8x16_t t = (uint8x16_t)st->v;
+    uint8x16_t z = { };
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        t = aes_accel_aese(t, z);
+        t = aes_accel_bswap(t);
+    } else {
+        t = aes_accel_aese(t, z);
+    }
+    ret->v = (AESStateVec)t ^ rk->v;
+}
+
+static inline void ATTR_AES_ACCEL
+aesenc_SB_SR_MC_AK_accel(AESState *ret, const AESState *st,
+                         const AESState *rk, bool be)
+{
+    uint8x16_t t = (uint8x16_t)st->v;
+    uint8x16_t z = { };
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        t = aes_accel_aese_mc(t, z);
+        t = aes_accel_bswap(t);
+    } else {
+        t = aes_accel_aese_mc(t, z);
+    }
+    ret->v = (AESStateVec)t ^ rk->v;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_IMC_accel(AESState *ret, const AESState *st, bool be)
+{
+    uint8x16_t t = (uint8x16_t)st->v;
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        t = aes_accel_aesimc(t);
+        t = aes_accel_bswap(t);
+    } else {
+        t = aes_accel_aesimc(t);
+    }
+    ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_ISB_ISR_AK_accel(AESState *ret, const AESState *st,
+                        const AESState *rk, bool be)
+{
+    uint8x16_t t = (uint8x16_t)st->v;
+    uint8x16_t z = { };
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        t = aes_accel_aesd(t, z);
+        t = aes_accel_bswap(t);
+    } else {
+        t = aes_accel_aesd(t, z);
+    }
+    ret->v = (AESStateVec)t ^ rk->v;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_ISB_ISR_AK_IMC_accel(AESState *ret, const AESState *st,
+                            const AESState *rk, bool be)
+{
+    uint8x16_t t = (uint8x16_t)st->v;
+    uint8x16_t k = (uint8x16_t)rk->v;
+    uint8x16_t z = { };
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        k = aes_accel_bswap(k);
+        t = aes_accel_aesd(t, z);
+        t ^= k;
+        t = aes_accel_aesimc(t);
+        t = aes_accel_bswap(t);
+    } else {
+        t = aes_accel_aesd(t, z);
+        t ^= k;
+        t = aes_accel_aesimc(t);
+    }
+    ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_ISB_ISR_IMC_AK_accel(AESState *ret, const AESState *st,
+                            const AESState *rk, bool be)
+{
+    uint8x16_t t = (uint8x16_t)st->v;
+    uint8x16_t z = { };
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        t = aes_accel_aesd_imc(t, z);
+        t = aes_accel_bswap(t);
+    } else {
+        t = aes_accel_aesd_imc(t, z);
+    }
+    ret->v = (AESStateVec)t ^ rk->v;
+}
+
+#endif /* AARCH64_HOST_CRYPTO_AES_ROUND_H */
diff --git a/host/include/aarch64/host/crypto/clmul.h b/host/include/aarch64/host/crypto/clmul.h
new file mode 100644 (file)
index 0000000..bb516d8
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * AArch64 specific clmul acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef AARCH64_HOST_CRYPTO_CLMUL_H
+#define AARCH64_HOST_CRYPTO_CLMUL_H
+
+#include "host/cpuinfo.h"
+#include <arm_neon.h>
+
+/*
+ * 64x64->128 pmull is available with FEAT_PMULL.
+ * Both FEAT_AES and FEAT_PMULL are covered under the same macro.
+ */
+#ifdef __ARM_FEATURE_AES
+# define HAVE_CLMUL_ACCEL  true
+#else
+# define HAVE_CLMUL_ACCEL  likely(cpuinfo & CPUINFO_PMULL)
+#endif
+#if !defined(__ARM_FEATURE_AES) && defined(CONFIG_ARM_AES_BUILTIN)
+# define ATTR_CLMUL_ACCEL  __attribute__((target("+crypto")))
+#else
+# define ATTR_CLMUL_ACCEL
+#endif
+
+static inline Int128 ATTR_CLMUL_ACCEL
+clmul_64_accel(uint64_t n, uint64_t m)
+{
+    union { poly128_t v; Int128 s; } u;
+
+#ifdef CONFIG_ARM_AES_BUILTIN
+    u.v = vmull_p64((poly64_t)n, (poly64_t)m);
+#else
+    asm(".arch_extension aes\n\t"
+        "pmull %0.1q, %1.1d, %2.1d" : "=w"(u.v) : "w"(n), "w"(m));
+#endif
+    return u.s;
+}
+
+#endif /* AARCH64_HOST_CRYPTO_CLMUL_H */
diff --git a/host/include/aarch64/host/load-extract-al16-al8.h b/host/include/aarch64/host/load-extract-al16-al8.h
new file mode 100644 (file)
index 0000000..bd677c5
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic extract 64 from 128-bit, AArch64 version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef AARCH64_LOAD_EXTRACT_AL16_AL8_H
+#define AARCH64_LOAD_EXTRACT_AL16_AL8_H
+
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+/**
+ * load_atom_extract_al16_or_al8:
+ * @pv: host address
+ * @s: object size in bytes, @s <= 8.
+ *
+ * Load @s bytes from @pv, when pv % s != 0.  If [p, p+s-1] does not
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
+ * otherwise the access must be 8-byte atomic.
+ */
+static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
+{
+    uintptr_t pi = (uintptr_t)pv;
+    __int128_t *ptr_align = (__int128_t *)(pi & ~7);
+    int shr = (pi & 7) * 8;
+    uint64_t l, h;
+
+    /*
+     * With FEAT_LSE2, LDP is single-copy atomic if 16-byte aligned
+     * and single-copy atomic on the parts if 8-byte aligned.
+     * All we need do is align the pointer mod 8.
+     */
+    tcg_debug_assert(HAVE_ATOMIC128_RO);
+    asm("ldp %0, %1, %2" : "=r"(l), "=r"(h) : "m"(*ptr_align));
+    return (l >> shr) | (h << (-shr & 63));
+}
+
+#endif /* AARCH64_LOAD_EXTRACT_AL16_AL8_H */
diff --git a/host/include/aarch64/host/store-insert-al16.h b/host/include/aarch64/host/store-insert-al16.h
new file mode 100644 (file)
index 0000000..1943155
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic store insert into 128-bit, AArch64 version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef AARCH64_STORE_INSERT_AL16_H
+#define AARCH64_STORE_INSERT_AL16_H
+
+/**
+ * store_atom_insert_al16:
+ * @p: host address
+ * @val: shifted value to store
+ * @msk: mask for value to store
+ *
+ * Atomically store @val to @p masked by @msk.
+ */
+static inline void ATTRIBUTE_ATOMIC128_OPT
+store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
+{
+    /*
+     * GCC only implements __sync* primitives for int128 on aarch64.
+     * We can do better without the barriers, and integrating the
+     * arithmetic into the load-exclusive/store-conditional pair.
+     */
+    uint64_t tl, th, vl, vh, ml, mh;
+    uint32_t fail;
+
+    qemu_build_assert(!HOST_BIG_ENDIAN);
+    vl = int128_getlo(val);
+    vh = int128_gethi(val);
+    ml = int128_getlo(msk);
+    mh = int128_gethi(msk);
+
+    asm("0: ldxp %[l], %[h], %[mem]\n\t"
+        "bic %[l], %[l], %[ml]\n\t"
+        "bic %[h], %[h], %[mh]\n\t"
+        "orr %[l], %[l], %[vl]\n\t"
+        "orr %[h], %[h], %[vh]\n\t"
+        "stxp %w[f], %[l], %[h], %[mem]\n\t"
+        "cbnz %w[f], 0b\n"
+        : [mem] "+Q"(*ps), [f] "=&r"(fail), [l] "=&r"(tl), [h] "=&r"(th)
+        : [vl] "r"(vl), [vh] "r"(vh), [ml] "r"(ml), [mh] "r"(mh));
+}
+
+#endif /* AARCH64_STORE_INSERT_AL16_H */
diff --git a/host/include/generic/host/atomic128-cas.h b/host/include/generic/host/atomic128-cas.h
new file mode 100644 (file)
index 0000000..6b40cc2
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Compare-and-swap for 128-bit atomic operations, generic version.
+ *
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef HOST_ATOMIC128_CAS_H
+#define HOST_ATOMIC128_CAS_H
+
+#if defined(CONFIG_ATOMIC128)
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Alias r, c, n;
+
+    c.s = cmp;
+    n.s = new;
+    r.i = qatomic_cmpxchg__nocheck(ptr_align, c.i, n.i);
+    return r.s;
+}
+# define HAVE_CMPXCHG128 1
+#elif defined(CONFIG_CMPXCHG128)
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
+{
+    Int128Aligned *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Alias r, c, n;
+
+    c.s = cmp;
+    n.s = new;
+    r.i = __sync_val_compare_and_swap_16(ptr_align, c.i, n.i);
+    return r.s;
+}
+# define HAVE_CMPXCHG128 1
+#else
+/* Fallback definition that must be optimized away, or error.  */
+Int128 QEMU_ERROR("unsupported atomic")
+    atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
+# define HAVE_CMPXCHG128 0
+#endif
+
+#endif /* HOST_ATOMIC128_CAS_H */
diff --git a/host/include/generic/host/atomic128-ldst.h b/host/include/generic/host/atomic128-ldst.h
new file mode 100644 (file)
index 0000000..691e6a8
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Load/store for 128-bit atomic operations, generic version.
+ *
+ * Copyright (C) 2018, 2023 Linaro, Ltd.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef HOST_ATOMIC128_LDST_H
+#define HOST_ATOMIC128_LDST_H
+
+#if defined(CONFIG_ATOMIC128)
+# define HAVE_ATOMIC128_RO 1
+# define HAVE_ATOMIC128_RW 1
+
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+atomic16_read_ro(const Int128 *ptr)
+{
+    const __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Alias r;
+
+    r.i = qatomic_read__nocheck(ptr_align);
+    return r.s;
+}
+
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+atomic16_read_rw(Int128 *ptr)
+{
+    return atomic16_read_ro(ptr);
+}
+
+static inline void ATTRIBUTE_ATOMIC128_OPT
+atomic16_set(Int128 *ptr, Int128 val)
+{
+    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Alias v;
+
+    v.s = val;
+    qatomic_set__nocheck(ptr_align, v.i);
+}
+
+#elif defined(CONFIG_CMPXCHG128)
+# define HAVE_ATOMIC128_RO 0
+# define HAVE_ATOMIC128_RW 1
+
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
+
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+atomic16_read_rw(Int128 *ptr)
+{
+    /* Maybe replace 0 with 0, returning the old value.  */
+    Int128 z = int128_make64(0);
+    return atomic16_cmpxchg(ptr, z, z);
+}
+
+static inline void ATTRIBUTE_ATOMIC128_OPT
+atomic16_set(Int128 *ptr, Int128 val)
+{
+    Int128Aligned *ptr_align = __builtin_assume_aligned(ptr, 16);
+    __int128_t old;
+    Int128Alias new;
+
+    new.s = val;
+    do {
+        old = *ptr_align;
+    } while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i));
+}
+
+#else
+# define HAVE_ATOMIC128_RO 0
+# define HAVE_ATOMIC128_RW 0
+
+/* Fallback definitions that must be optimized away, or error.  */
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read_ro(const Int128 *ptr);
+Int128 QEMU_ERROR("unsupported atomic") atomic16_read_rw(Int128 *ptr);
+void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
+#endif
+
+#endif /* HOST_ATOMIC128_LDST_H */
diff --git a/host/include/generic/host/cpuinfo.h b/host/include/generic/host/cpuinfo.h
new file mode 100644 (file)
index 0000000..67ad410
--- /dev/null
@@ -0,0 +1,4 @@
+/*
+ * No host specific cpu identification.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
diff --git a/host/include/generic/host/crypto/aes-round.h b/host/include/generic/host/crypto/aes-round.h
new file mode 100644 (file)
index 0000000..1b9720f
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * No host specific aes acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef GENERIC_HOST_CRYPTO_AES_ROUND_H
+#define GENERIC_HOST_CRYPTO_AES_ROUND_H
+
+#define HAVE_AES_ACCEL  false
+#define ATTR_AES_ACCEL
+
+void aesenc_MC_accel(AESState *, const AESState *, bool)
+    QEMU_ERROR("unsupported accel");
+void aesenc_SB_SR_AK_accel(AESState *, const AESState *,
+                           const AESState *, bool)
+    QEMU_ERROR("unsupported accel");
+void aesenc_SB_SR_MC_AK_accel(AESState *, const AESState *,
+                              const AESState *, bool)
+    QEMU_ERROR("unsupported accel");
+
+void aesdec_IMC_accel(AESState *, const AESState *, bool)
+    QEMU_ERROR("unsupported accel");
+void aesdec_ISB_ISR_AK_accel(AESState *, const AESState *,
+                             const AESState *, bool)
+    QEMU_ERROR("unsupported accel");
+void aesdec_ISB_ISR_AK_IMC_accel(AESState *, const AESState *,
+                                 const AESState *, bool)
+    QEMU_ERROR("unsupported accel");
+void aesdec_ISB_ISR_IMC_AK_accel(AESState *, const AESState *,
+                                 const AESState *, bool)
+    QEMU_ERROR("unsupported accel");
+
+#endif /* GENERIC_HOST_CRYPTO_AES_ROUND_H */
diff --git a/host/include/generic/host/crypto/clmul.h b/host/include/generic/host/crypto/clmul.h
new file mode 100644 (file)
index 0000000..915bfb8
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * No host specific carry-less multiply acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef GENERIC_HOST_CRYPTO_CLMUL_H
+#define GENERIC_HOST_CRYPTO_CLMUL_H
+
+#define HAVE_CLMUL_ACCEL  false
+#define ATTR_CLMUL_ACCEL
+
+Int128 clmul_64_accel(uint64_t, uint64_t)
+    QEMU_ERROR("unsupported accel");
+
+#endif /* GENERIC_HOST_CRYPTO_CLMUL_H */
diff --git a/host/include/generic/host/load-extract-al16-al8.h b/host/include/generic/host/load-extract-al16-al8.h
new file mode 100644 (file)
index 0000000..d955561
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic extract 64 from 128-bit, generic version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef HOST_LOAD_EXTRACT_AL16_AL8_H
+#define HOST_LOAD_EXTRACT_AL16_AL8_H
+
+/**
+ * load_atom_extract_al16_or_al8:
+ * @pv: host address
+ * @s: object size in bytes, @s <= 8.
+ *
+ * Load @s bytes from @pv, when pv % s != 0.  If [p, p+s-1] does not
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
+ * otherwise the access must be 8-byte atomic.
+ */
+static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
+load_atom_extract_al16_or_al8(void *pv, int s)
+{
+    uintptr_t pi = (uintptr_t)pv;
+    int o = pi & 7;
+    int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
+    Int128 r;
+
+    pv = (void *)(pi & ~7);
+    if (pi & 8) {
+        uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
+        uint64_t a = qatomic_read__nocheck(p8);
+        uint64_t b = qatomic_read__nocheck(p8 + 1);
+
+        if (HOST_BIG_ENDIAN) {
+            r = int128_make128(b, a);
+        } else {
+            r = int128_make128(a, b);
+        }
+    } else {
+        r = atomic16_read_ro(pv);
+    }
+    return int128_getlo(int128_urshift(r, shr));
+}
+
+#endif /* HOST_LOAD_EXTRACT_AL16_AL8_H */
diff --git a/host/include/generic/host/store-insert-al16.h b/host/include/generic/host/store-insert-al16.h
new file mode 100644 (file)
index 0000000..4a16621
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic store insert into 128-bit, generic version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef HOST_STORE_INSERT_AL16_H
+#define HOST_STORE_INSERT_AL16_H
+
+/**
+ * store_atom_insert_al16:
+ * @p: host address
+ * @val: shifted value to store
+ * @msk: mask for value to store
+ *
+ * Atomically store @val to @p masked by @msk.
+ */
+static inline void ATTRIBUTE_ATOMIC128_OPT
+store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
+{
+#if defined(CONFIG_ATOMIC128)
+    __uint128_t *pu;
+    Int128Alias old, new;
+
+    /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
+    pu = __builtin_assume_aligned(ps, 16);
+    old.u = *pu;
+    msk = int128_not(msk);
+    do {
+        new.s = int128_and(old.s, msk);
+        new.s = int128_or(new.s, val);
+    } while (!__atomic_compare_exchange_n(pu, &old.u, new.u, true,
+                                          __ATOMIC_RELAXED, __ATOMIC_RELAXED));
+#else
+    Int128 old, new, cmp;
+
+    ps = __builtin_assume_aligned(ps, 16);
+    old = *ps;
+    msk = int128_not(msk);
+    do {
+        cmp = old;
+        new = int128_and(old, msk);
+        new = int128_or(new, val);
+        old = atomic16_cmpxchg(ps, cmp, new);
+    } while (int128_ne(cmp, old));
+#endif
+}
+
+#endif /* HOST_STORE_INSERT_AL16_H */
diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
new file mode 100644 (file)
index 0000000..b89e6d2
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Host specific cpu identification for x86.
+ */
+
+#ifndef HOST_CPUINFO_H
+#define HOST_CPUINFO_H
+
+/* Digested version of <cpuid.h> */
+
+#define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
+#define CPUINFO_CMOV            (1u << 1)
+#define CPUINFO_MOVBE           (1u << 2)
+#define CPUINFO_LZCNT           (1u << 3)
+#define CPUINFO_POPCNT          (1u << 4)
+#define CPUINFO_BMI1            (1u << 5)
+#define CPUINFO_BMI2            (1u << 6)
+#define CPUINFO_SSE2            (1u << 7)
+#define CPUINFO_SSE4            (1u << 8)
+#define CPUINFO_AVX1            (1u << 9)
+#define CPUINFO_AVX2            (1u << 10)
+#define CPUINFO_AVX512F         (1u << 11)
+#define CPUINFO_AVX512VL        (1u << 12)
+#define CPUINFO_AVX512BW        (1u << 13)
+#define CPUINFO_AVX512DQ        (1u << 14)
+#define CPUINFO_AVX512VBMI2     (1u << 15)
+#define CPUINFO_ATOMIC_VMOVDQA  (1u << 16)
+#define CPUINFO_ATOMIC_VMOVDQU  (1u << 17)
+#define CPUINFO_AES             (1u << 18)
+#define CPUINFO_PCLMUL          (1u << 19)
+
+/* Initialized with a constructor. */
+extern unsigned cpuinfo;
+
+/*
+ * We cannot rely on constructor ordering, so other constructors must
+ * use the function interface rather than the variable above.
+ */
+unsigned cpuinfo_init(void);
+
+#endif /* HOST_CPUINFO_H */
diff --git a/host/include/i386/host/crypto/aes-round.h b/host/include/i386/host/crypto/aes-round.h
new file mode 100644 (file)
index 0000000..59a6413
--- /dev/null
@@ -0,0 +1,152 @@
+/*
+ * x86 specific aes acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef X86_HOST_CRYPTO_AES_ROUND_H
+#define X86_HOST_CRYPTO_AES_ROUND_H
+
+#include "host/cpuinfo.h"
+#include <immintrin.h>
+
+#if defined(__AES__) && defined(__SSSE3__)
+# define HAVE_AES_ACCEL  true
+# define ATTR_AES_ACCEL
+#else
+# define HAVE_AES_ACCEL  likely(cpuinfo & CPUINFO_AES)
+# define ATTR_AES_ACCEL  __attribute__((target("aes,ssse3")))
+#endif
+
+static inline __m128i ATTR_AES_ACCEL
+aes_accel_bswap(__m128i x)
+{
+    return _mm_shuffle_epi8(x, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8,
+                                            9, 10, 11, 12, 13, 14, 15));
+}
+
+static inline void ATTR_AES_ACCEL
+aesenc_MC_accel(AESState *ret, const AESState *st, bool be)
+{
+    __m128i t = (__m128i)st->v;
+    __m128i z = _mm_setzero_si128();
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        t = _mm_aesdeclast_si128(t, z);
+        t = _mm_aesenc_si128(t, z);
+        t = aes_accel_bswap(t);
+    } else {
+        t = _mm_aesdeclast_si128(t, z);
+        t = _mm_aesenc_si128(t, z);
+    }
+    ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesenc_SB_SR_AK_accel(AESState *ret, const AESState *st,
+                      const AESState *rk, bool be)
+{
+    __m128i t = (__m128i)st->v;
+    __m128i k = (__m128i)rk->v;
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        k = aes_accel_bswap(k);
+        t = _mm_aesenclast_si128(t, k);
+        t = aes_accel_bswap(t);
+    } else {
+        t = _mm_aesenclast_si128(t, k);
+    }
+    ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesenc_SB_SR_MC_AK_accel(AESState *ret, const AESState *st,
+                         const AESState *rk, bool be)
+{
+    __m128i t = (__m128i)st->v;
+    __m128i k = (__m128i)rk->v;
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        k = aes_accel_bswap(k);
+        t = _mm_aesenc_si128(t, k);
+        t = aes_accel_bswap(t);
+    } else {
+        t = _mm_aesenc_si128(t, k);
+    }
+    ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_IMC_accel(AESState *ret, const AESState *st, bool be)
+{
+    __m128i t = (__m128i)st->v;
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        t = _mm_aesimc_si128(t);
+        t = aes_accel_bswap(t);
+    } else {
+        t = _mm_aesimc_si128(t);
+    }
+    ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_ISB_ISR_AK_accel(AESState *ret, const AESState *st,
+                        const AESState *rk, bool be)
+{
+    __m128i t = (__m128i)st->v;
+    __m128i k = (__m128i)rk->v;
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        k = aes_accel_bswap(k);
+        t = _mm_aesdeclast_si128(t, k);
+        t = aes_accel_bswap(t);
+    } else {
+        t = _mm_aesdeclast_si128(t, k);
+    }
+    ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_ISB_ISR_AK_IMC_accel(AESState *ret, const AESState *st,
+                            const AESState *rk, bool be)
+{
+    __m128i t = (__m128i)st->v;
+    __m128i k = (__m128i)rk->v;
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        k = aes_accel_bswap(k);
+        t = _mm_aesdeclast_si128(t, k);
+        t = _mm_aesimc_si128(t);
+        t = aes_accel_bswap(t);
+    } else {
+        t = _mm_aesdeclast_si128(t, k);
+        t = _mm_aesimc_si128(t);
+    }
+    ret->v = (AESStateVec)t;
+}
+
+static inline void ATTR_AES_ACCEL
+aesdec_ISB_ISR_IMC_AK_accel(AESState *ret, const AESState *st,
+                            const AESState *rk, bool be)
+{
+    __m128i t = (__m128i)st->v;
+    __m128i k = (__m128i)rk->v;
+
+    if (be) {
+        t = aes_accel_bswap(t);
+        k = aes_accel_bswap(k);
+        t = _mm_aesdec_si128(t, k);
+        t = aes_accel_bswap(t);
+    } else {
+        t = _mm_aesdec_si128(t, k);
+    }
+    ret->v = (AESStateVec)t;
+}
+
+#endif /* X86_HOST_CRYPTO_AES_ROUND_H */
diff --git a/host/include/i386/host/crypto/clmul.h b/host/include/i386/host/crypto/clmul.h
new file mode 100644 (file)
index 0000000..dc3c814
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * x86 specific clmul acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef X86_HOST_CRYPTO_CLMUL_H
+#define X86_HOST_CRYPTO_CLMUL_H
+
+#include "host/cpuinfo.h"
+#include <immintrin.h>
+
+#if defined(__PCLMUL__)
+# define HAVE_CLMUL_ACCEL  true
+# define ATTR_CLMUL_ACCEL
+#else
+# define HAVE_CLMUL_ACCEL  likely(cpuinfo & CPUINFO_PCLMUL)
+# define ATTR_CLMUL_ACCEL  __attribute__((target("pclmul")))
+#endif
+
+static inline Int128 ATTR_CLMUL_ACCEL
+clmul_64_accel(uint64_t n, uint64_t m)
+{
+    union { __m128i v; Int128 s; } u;
+
+    u.v = _mm_clmulepi64_si128(_mm_set_epi64x(0, n), _mm_set_epi64x(0, m), 0);
+    return u.s;
+}
+
+#endif /* X86_HOST_CRYPTO_CLMUL_H */
diff --git a/host/include/loongarch64/host/atomic128-ldst.h b/host/include/loongarch64/host/atomic128-ldst.h
new file mode 100644 (file)
index 0000000..9a4a8f8
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Load/store for 128-bit atomic operations, LoongArch version.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef LOONGARCH_ATOMIC128_LDST_H
+#define LOONGARCH_ATOMIC128_LDST_H
+
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+#define HAVE_ATOMIC128_RO  likely(cpuinfo & CPUINFO_LSX)
+#define HAVE_ATOMIC128_RW  HAVE_ATOMIC128_RO
+
+/*
+ * As of gcc 13 and clang 16, there is no compiler support for LSX at all.
+ * Use inline assembly throughout.
+ */
+
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
+{
+    uint64_t l, h;
+
+    tcg_debug_assert(HAVE_ATOMIC128_RO);
+    asm("vld $vr0, %2, 0\n\t"
+        "vpickve2gr.d %0, $vr0, 0\n\t"
+        "vpickve2gr.d %1, $vr0, 1"
+       : "=r"(l), "=r"(h) : "r"(ptr), "m"(*ptr) : "f0");
+
+    return int128_make128(l, h);
+}
+
+static inline Int128 atomic16_read_rw(Int128 *ptr)
+{
+    return atomic16_read_ro(ptr);
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    uint64_t l = int128_getlo(val), h = int128_gethi(val);
+
+    tcg_debug_assert(HAVE_ATOMIC128_RW);
+    asm("vinsgr2vr.d $vr0, %1, 0\n\t"
+        "vinsgr2vr.d $vr0, %2, 1\n\t"
+        "vst $vr0, %3, 0"
+       : "=m"(*ptr) : "r"(l), "r"(h), "r"(ptr) : "f0");
+}
+
+#endif /* LOONGARCH_ATOMIC128_LDST_H */
diff --git a/host/include/loongarch64/host/cpuinfo.h b/host/include/loongarch64/host/cpuinfo.h
new file mode 100644 (file)
index 0000000..fab664a
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Host specific cpu identification for LoongArch
+ */
+
+#ifndef HOST_CPUINFO_H
+#define HOST_CPUINFO_H
+
+#define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
+#define CPUINFO_LSX             (1u << 1)
+
+/* Initialized with a constructor. */
+extern unsigned cpuinfo;
+
+/*
+ * We cannot rely on constructor ordering, so other constructors must
+ * use the function interface rather than the variable above.
+ */
+unsigned cpuinfo_init(void);
+
+#endif /* HOST_CPUINFO_H */
diff --git a/host/include/loongarch64/host/load-extract-al16-al8.h b/host/include/loongarch64/host/load-extract-al16-al8.h
new file mode 100644 (file)
index 0000000..d1fb59d
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic extract 64 from 128-bit, LoongArch version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef LOONGARCH_LOAD_EXTRACT_AL16_AL8_H
+#define LOONGARCH_LOAD_EXTRACT_AL16_AL8_H
+
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+/**
+ * load_atom_extract_al16_or_al8:
+ * @pv: host address
+ * @s: object size in bytes, @s <= 8.
+ *
+ * Load @s bytes from @pv, when pv % s != 0.  If [p, p+s-1] does not
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
+ * otherwise the access must be 8-byte atomic.
+ */
+static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
+{
+    uintptr_t pi = (uintptr_t)pv;
+    Int128 *ptr_align = (Int128 *)(pi & ~7);
+    int shr = (pi & 7) * 8;
+    uint64_t l, h;
+
+    tcg_debug_assert(HAVE_ATOMIC128_RO);
+    asm("vld $vr0, %2, 0\n\t"
+        "vpickve2gr.d %0, $vr0, 0\n\t"
+        "vpickve2gr.d %1, $vr0, 1"
+       : "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "f0");
+
+    return (l >> shr) | (h << (-shr & 63));
+}
+
+#endif /* LOONGARCH_LOAD_EXTRACT_AL16_AL8_H */
diff --git a/host/include/loongarch64/host/store-insert-al16.h b/host/include/loongarch64/host/store-insert-al16.h
new file mode 100644 (file)
index 0000000..919fd8d
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic store insert into 128-bit, LoongArch version.
+ */
+
+#ifndef LOONGARCH_STORE_INSERT_AL16_H
+#define LOONGARCH_STORE_INSERT_AL16_H
+
+void store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
+    QEMU_ERROR("unsupported atomic");
+
+#endif /* LOONGARCH_STORE_INSERT_AL16_H */
diff --git a/host/include/ppc/host/cpuinfo.h b/host/include/ppc/host/cpuinfo.h
new file mode 100644 (file)
index 0000000..38b8eab
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Host specific cpu identification for ppc.
+ */
+
+#ifndef HOST_CPUINFO_H
+#define HOST_CPUINFO_H
+
+/* Digested version of <cpuid.h> */
+
+#define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
+#define CPUINFO_V2_06           (1u << 1)
+#define CPUINFO_V2_07           (1u << 2)
+#define CPUINFO_V3_0            (1u << 3)
+#define CPUINFO_V3_1            (1u << 4)
+#define CPUINFO_ISEL            (1u << 5)
+#define CPUINFO_ALTIVEC         (1u << 6)
+#define CPUINFO_VSX             (1u << 7)
+#define CPUINFO_CRYPTO          (1u << 8)
+
+/* Initialized with a constructor. */
+extern unsigned cpuinfo;
+
+/*
+ * We cannot rely on constructor ordering, so other constructors must
+ * use the function interface rather than the variable above.
+ */
+unsigned cpuinfo_init(void);
+
+#endif /* HOST_CPUINFO_H */
diff --git a/host/include/ppc/host/crypto/aes-round.h b/host/include/ppc/host/crypto/aes-round.h
new file mode 100644 (file)
index 0000000..8062d2a
--- /dev/null
@@ -0,0 +1,182 @@
+/*
+ * Power v2.07 specific aes acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef PPC_HOST_CRYPTO_AES_ROUND_H
+#define PPC_HOST_CRYPTO_AES_ROUND_H
+
+#ifdef __ALTIVEC__
+#include "host/cpuinfo.h"
+
+#ifdef __CRYPTO__
+# define HAVE_AES_ACCEL  true
+#else
+# define HAVE_AES_ACCEL  likely(cpuinfo & CPUINFO_CRYPTO)
+#endif
+#define ATTR_AES_ACCEL
+
+/*
+ * While there is <altivec.h>, both gcc and clang "aid" with the
+ * endianness issues in different ways. Just use inline asm instead.
+ */
+
+/* Bytes in memory are host-endian; bytes in register are @be. */
+static inline AESStateVec aes_accel_ld(const AESState *p, bool be)
+{
+    AESStateVec r;
+
+    if (be) {
+        asm("lvx %0, 0, %1" : "=v"(r) : "r"(p), "m"(*p));
+    } else if (HOST_BIG_ENDIAN) {
+        AESStateVec rev = {
+            15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0,
+        };
+        asm("lvx %0, 0, %1\n\t"
+            "vperm %0, %0, %0, %2"
+            : "=v"(r) : "r"(p), "v"(rev), "m"(*p));
+    } else {
+#ifdef __POWER9_VECTOR__
+        asm("lxvb16x %x0, 0, %1" : "=v"(r) : "r"(p), "m"(*p));
+#else
+        asm("lxvd2x %x0, 0, %1\n\t"
+            "xxpermdi %x0, %x0, %x0, 2"
+            : "=v"(r) : "r"(p), "m"(*p));
+#endif
+    }
+    return r;
+}
+
+static void aes_accel_st(AESState *p, AESStateVec r, bool be)
+{
+    if (be) {
+        asm("stvx %1, 0, %2" : "=m"(*p) : "v"(r), "r"(p));
+    } else if (HOST_BIG_ENDIAN) {
+        AESStateVec rev = {
+            15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0,
+        };
+        asm("vperm %1, %1, %1, %2\n\t"
+            "stvx %1, 0, %3"
+            : "=m"(*p), "+v"(r) : "v"(rev), "r"(p));
+    } else {
+#ifdef __POWER9_VECTOR__
+        asm("stxvb16x %x1, 0, %2" : "=m"(*p) : "v"(r), "r"(p));
+#else
+        asm("xxpermdi %x1, %x1, %x1, 2\n\t"
+            "stxvd2x %x1, 0, %2"
+            : "=m"(*p), "+v"(r) : "r"(p));
+#endif
+    }
+}
+
+static inline AESStateVec aes_accel_vcipher(AESStateVec d, AESStateVec k)
+{
+    asm("vcipher %0, %0, %1" : "+v"(d) : "v"(k));
+    return d;
+}
+
+static inline AESStateVec aes_accel_vncipher(AESStateVec d, AESStateVec k)
+{
+    asm("vncipher %0, %0, %1" : "+v"(d) : "v"(k));
+    return d;
+}
+
+static inline AESStateVec aes_accel_vcipherlast(AESStateVec d, AESStateVec k)
+{
+    asm("vcipherlast %0, %0, %1" : "+v"(d) : "v"(k));
+    return d;
+}
+
+static inline AESStateVec aes_accel_vncipherlast(AESStateVec d, AESStateVec k)
+{
+    asm("vncipherlast %0, %0, %1" : "+v"(d) : "v"(k));
+    return d;
+}
+
+static inline void
+aesenc_MC_accel(AESState *ret, const AESState *st, bool be)
+{
+    AESStateVec t, z = { };
+
+    t = aes_accel_ld(st, be);
+    t = aes_accel_vncipherlast(t, z);
+    t = aes_accel_vcipher(t, z);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesenc_SB_SR_AK_accel(AESState *ret, const AESState *st,
+                      const AESState *rk, bool be)
+{
+    AESStateVec t, k;
+
+    t = aes_accel_ld(st, be);
+    k = aes_accel_ld(rk, be);
+    t = aes_accel_vcipherlast(t, k);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesenc_SB_SR_MC_AK_accel(AESState *ret, const AESState *st,
+                         const AESState *rk, bool be)
+{
+    AESStateVec t, k;
+
+    t = aes_accel_ld(st, be);
+    k = aes_accel_ld(rk, be);
+    t = aes_accel_vcipher(t, k);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesdec_IMC_accel(AESState *ret, const AESState *st, bool be)
+{
+    AESStateVec t, z = { };
+
+    t = aes_accel_ld(st, be);
+    t = aes_accel_vcipherlast(t, z);
+    t = aes_accel_vncipher(t, z);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesdec_ISB_ISR_AK_accel(AESState *ret, const AESState *st,
+                        const AESState *rk, bool be)
+{
+    AESStateVec t, k;
+
+    t = aes_accel_ld(st, be);
+    k = aes_accel_ld(rk, be);
+    t = aes_accel_vncipherlast(t, k);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesdec_ISB_ISR_AK_IMC_accel(AESState *ret, const AESState *st,
+                            const AESState *rk, bool be)
+{
+    AESStateVec t, k;
+
+    t = aes_accel_ld(st, be);
+    k = aes_accel_ld(rk, be);
+    t = aes_accel_vncipher(t, k);
+    aes_accel_st(ret, t, be);
+}
+
+static inline void
+aesdec_ISB_ISR_IMC_AK_accel(AESState *ret, const AESState *st,
+                            const AESState *rk, bool be)
+{
+    AESStateVec t, k, z = { };
+
+    t = aes_accel_ld(st, be);
+    k = aes_accel_ld(rk, be);
+    t = aes_accel_vncipher(t, z);
+    aes_accel_st(ret, t ^ k, be);
+}
+#else
+/* Without ALTIVEC, we can't even write inline assembly. */
+#include "host/include/generic/host/crypto/aes-round.h"
+#endif
+
+#endif /* PPC_HOST_CRYPTO_AES_ROUND_H */
diff --git a/host/include/ppc64/host/cpuinfo.h b/host/include/ppc64/host/cpuinfo.h
new file mode 100644 (file)
index 0000000..2f036a0
--- /dev/null
@@ -0,0 +1 @@
+#include "host/include/ppc/host/cpuinfo.h"
diff --git a/host/include/ppc64/host/crypto/aes-round.h b/host/include/ppc64/host/crypto/aes-round.h
new file mode 100644 (file)
index 0000000..5eeba6d
--- /dev/null
@@ -0,0 +1 @@
+#include "host/include/ppc/host/crypto/aes-round.h"
diff --git a/host/include/x86_64/host/atomic128-ldst.h b/host/include/x86_64/host/atomic128-ldst.h
new file mode 100644 (file)
index 0000000..8d6f909
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Load/store for 128-bit atomic operations, x86_64 version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef X86_64_ATOMIC128_LDST_H
+#define X86_64_ATOMIC128_LDST_H
+
+#ifdef CONFIG_INT128_TYPE
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+#include <immintrin.h>
+
+typedef union {
+    __m128i v;
+    __int128_t i;
+    Int128 s;
+} X86Int128Union;
+
+/*
+ * Through clang 16, with -mcx16, __atomic_load_n is incorrectly
+ * expanded to a read-write operation: lock cmpxchg16b.
+ */
+
+#define HAVE_ATOMIC128_RO  likely(cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
+#define HAVE_ATOMIC128_RW  1
+
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
+{
+    X86Int128Union r;
+
+    tcg_debug_assert(HAVE_ATOMIC128_RO);
+    asm("vmovdqa %1, %0" : "=x" (r.v) : "m" (*ptr));
+
+    return r.s;
+}
+
+static inline Int128 atomic16_read_rw(Int128 *ptr)
+{
+    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    X86Int128Union r;
+
+    if (HAVE_ATOMIC128_RO) {
+        asm("vmovdqa %1, %0" : "=x" (r.v) : "m" (*ptr_align));
+    } else {
+        r.i = __sync_val_compare_and_swap_16(ptr_align, 0, 0);
+    }
+    return r.s;
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    X86Int128Union new = { .s = val };
+
+    if (HAVE_ATOMIC128_RO) {
+        asm("vmovdqa %1, %0" : "=m"(*ptr_align) : "x" (new.v));
+    } else {
+        __int128_t old;
+        do {
+            old = *ptr_align;
+        } while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i));
+    }
+}
+#else
+/* Provide QEMU_ERROR stubs. */
+#include "host/include/generic/host/atomic128-ldst.h"
+#endif
+
+#endif /* X86_64_ATOMIC128_LDST_H */
diff --git a/host/include/x86_64/host/cpuinfo.h b/host/include/x86_64/host/cpuinfo.h
new file mode 100644 (file)
index 0000000..67debab
--- /dev/null
@@ -0,0 +1 @@
+#include "host/include/i386/host/cpuinfo.h"
diff --git a/host/include/x86_64/host/crypto/aes-round.h b/host/include/x86_64/host/crypto/aes-round.h
new file mode 100644 (file)
index 0000000..2773cc9
--- /dev/null
@@ -0,0 +1 @@
+#include "host/include/i386/host/crypto/aes-round.h"
diff --git a/host/include/x86_64/host/crypto/clmul.h b/host/include/x86_64/host/crypto/clmul.h
new file mode 100644 (file)
index 0000000..f25eced
--- /dev/null
@@ -0,0 +1 @@
+#include "host/include/i386/host/crypto/clmul.h"
diff --git a/host/include/x86_64/host/load-extract-al16-al8.h b/host/include/x86_64/host/load-extract-al16-al8.h
new file mode 100644 (file)
index 0000000..baa506b
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Atomic extract 64 from 128-bit, x86_64 version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef X86_64_LOAD_EXTRACT_AL16_AL8_H
+#define X86_64_LOAD_EXTRACT_AL16_AL8_H
+
+#ifdef CONFIG_INT128_TYPE
+#include "host/atomic128-ldst.h"
+
+/**
+ * load_atom_extract_al16_or_al8:
+ * @pv: host address
+ * @s: object size in bytes, @s <= 8.
+ *
+ * Load @s bytes from @pv, when pv % s != 0.  If [p, p+s-1] does not
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
+ * otherwise the access must be 8-byte atomic.
+ */
+static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
+load_atom_extract_al16_or_al8(void *pv, int s)
+{
+    uintptr_t pi = (uintptr_t)pv;
+    __int128_t *ptr_align = (__int128_t *)(pi & ~7);
+    int shr = (pi & 7) * 8;
+    X86Int128Union r;
+
+    /*
+     * ptr_align % 16 is now only 0 or 8.
+     * If the host supports atomic loads with VMOVDQU, then always use that,
+     * making the branch highly predictable.  Otherwise we must use VMOVDQA
+     * when ptr_align % 16 == 0 for 16-byte atomicity.
+     */
+    if ((cpuinfo & CPUINFO_ATOMIC_VMOVDQU) || (pi & 8)) {
+        asm("vmovdqu %1, %0" : "=x" (r.v) : "m" (*ptr_align));
+    } else {
+        asm("vmovdqa %1, %0" : "=x" (r.v) : "m" (*ptr_align));
+    }
+    return int128_getlo(int128_urshift(r.s, shr));
+}
+#else
+/* Fallback definition that must be optimized away, or error.  */
+uint64_t QEMU_ERROR("unsupported atomic")
+    load_atom_extract_al16_or_al8(void *pv, int s);
+#endif
+
+#endif /* X86_64_LOAD_EXTRACT_AL16_AL8_H */
index 0a1e5bddd3e51cdd5f2fbc929faedc2df7112472..0b7fe721984bb86e571f71c5c17bbfdace26dc73 100644 (file)
@@ -73,7 +73,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(QAuthZListFile,
  * The object can be created on the command line using
  *
  *   -object authz-list-file,id=authz0,\
- *           filename=/etc/qemu/myvm-vnc.acl,refresh=yes
+ *           filename=/etc/qemu/myvm-vnc.acl,refresh=on
  *
  */
 struct QAuthZListFile {
index 878b4c358150ae6560d99745e5169627c947550f..a59e39f49d92f44b94cd86e1a86ac27f7e9affb7 100644 (file)
@@ -27,7 +27,7 @@
 
 #include "qemu/timed-average.h"
 #include "qemu/thread.h"
-#include "qapi/qapi-builtin-types.h"
+#include "qapi/qapi-types-common.h"
 
 typedef struct BlockAcctTimedStats BlockAcctTimedStats;
 typedef struct BlockAcctStats BlockAcctStats;
@@ -37,6 +37,7 @@ enum BlockAcctType {
     BLOCK_ACCT_READ,
     BLOCK_ACCT_WRITE,
     BLOCK_ACCT_FLUSH,
+    BLOCK_ACCT_ZONE_APPEND,
     BLOCK_ACCT_UNMAP,
     BLOCK_MAX_IOTYPE,
 };
@@ -100,8 +101,8 @@ typedef struct BlockAcctCookie {
 } BlockAcctCookie;
 
 void block_acct_init(BlockAcctStats *stats);
-void block_acct_setup(BlockAcctStats *stats, bool account_invalid,
-                     bool account_failed);
+void block_acct_setup(BlockAcctStats *stats, enum OnOffAuto account_invalid,
+                      enum OnOffAuto account_failed);
 void block_acct_cleanup(BlockAcctStats *stats);
 void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length);
 BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
index b39eefb38d1607e881b4020e384b644d8849ff1d..5449b6d7428df3e9604c880b630c6f0565bb1dc2 100644 (file)
@@ -59,10 +59,13 @@ typedef struct {
 extern AioWait global_aio_wait;
 
 /**
- * AIO_WAIT_WHILE:
+ * AIO_WAIT_WHILE_INTERNAL:
  * @ctx: the aio context, or NULL if multiple aio contexts (for which the
  *       caller does not hold a lock) are involved in the polling condition.
  * @cond: wait while this conditional expression is true
+ * @unlock: whether to unlock and then lock again @ctx. This applies
+ * only when waiting for another AioContext from the main loop.
+ * Otherwise it's ignored.
  *
  * Wait while a condition is true.  Use this to implement synchronous
  * operations that require event loop activity.
@@ -75,12 +78,14 @@ extern AioWait global_aio_wait;
  * wait on conditions between two IOThreads since that could lead to deadlock,
  * go via the main loop instead.
  */
-#define AIO_WAIT_WHILE(ctx, cond) ({                               \
+#define AIO_WAIT_WHILE_INTERNAL(ctx, cond, unlock) ({              \
     bool waited_ = false;                                          \
     AioWait *wait_ = &global_aio_wait;                             \
     AioContext *ctx_ = (ctx);                                      \
     /* Increment wait_->num_waiters before evaluating cond. */     \
     qatomic_inc(&wait_->num_waiters);                              \
+    /* Paired with smp_mb in aio_wait_kick(). */                   \
+    smp_mb__after_rmw();                                           \
     if (ctx_ && in_aio_context_home_thread(ctx_)) {                \
         while ((cond)) {                                           \
             aio_poll(ctx_, true);                                  \
@@ -90,11 +95,11 @@ extern AioWait global_aio_wait;
         assert(qemu_get_current_aio_context() ==                   \
                qemu_get_aio_context());                            \
         while ((cond)) {                                           \
-            if (ctx_) {                                            \
+            if (unlock && ctx_) {                                  \
                 aio_context_release(ctx_);                         \
             }                                                      \
             aio_poll(qemu_get_aio_context(), true);                \
-            if (ctx_) {                                            \
+            if (unlock && ctx_) {                                  \
                 aio_context_acquire(ctx_);                         \
             }                                                      \
             waited_ = true;                                        \
@@ -103,6 +108,12 @@ extern AioWait global_aio_wait;
     qatomic_dec(&wait_->num_waiters);                              \
     waited_; })
 
+#define AIO_WAIT_WHILE(ctx, cond)                                  \
+    AIO_WAIT_WHILE_INTERNAL(ctx, cond, true)
+
+#define AIO_WAIT_WHILE_UNLOCKED(ctx, cond)                         \
+    AIO_WAIT_WHILE_INTERNAL(ctx, cond, false)
+
 /**
  * aio_wait_kick:
  * Wake up the main thread if it is waiting on AIO_WAIT_WHILE().  During
@@ -120,7 +131,7 @@ void aio_wait_kick(void);
  *
  * Run a BH in @ctx and wait for it to complete.
  *
- * Must be called from the main loop thread with @ctx acquired exactly once.
+ * Must be called from the main loop thread without @ctx acquired.
  * Note that main loop event processing may occur.
  */
 void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
index 5f342267d5cea100def1d01f378e7956bfa2d7f5..795a375ff2440e17ee30e9963e8250e2a47bd947 100644 (file)
 #ifdef CONFIG_LINUX_IO_URING
 #include <liburing.h>
 #endif
-#include "qemu/coroutine.h"
+#include "qemu/coroutine-core.h"
 #include "qemu/queue.h"
 #include "qemu/event_notifier.h"
 #include "qemu/thread.h"
 #include "qemu/timer.h"
+#include "block/graph-lock.h"
+#include "hw/qdev-core.h"
+
 
 typedef struct BlockAIOCB BlockAIOCB;
 typedef void BlockCompletionFunc(void *opaque, int ret);
 
 typedef struct AIOCBInfo {
     void (*cancel_async)(BlockAIOCB *acb);
-    AioContext *(*get_aio_context)(BlockAIOCB *acb);
     size_t aiocb_size;
 } AIOCBInfo;
 
@@ -51,7 +53,6 @@ typedef void QEMUBHFunc(void *opaque);
 typedef bool AioPollFn(void *opaque);
 typedef void IOHandler(void *opaque);
 
-struct Coroutine;
 struct ThreadPool;
 struct LinuxAioState;
 struct LuringState;
@@ -127,6 +128,14 @@ struct AioContext {
     /* Used by AioContext users to protect from multi-threaded access.  */
     QemuRecMutex lock;
 
+    /*
+     * Keep track of readers and writers of the block layer graph.
+     * This is essential to avoid performing additions and removal
+     * of nodes and edges from block graph while some
+     * other thread is traversing it.
+     */
+    BdrvGraphRWlock *bdrv_graph;
+
     /* The list of registered AIO handlers.  Protected by ctx->list_lock. */
     AioHandlerList aio_handlers;
 
@@ -192,23 +201,17 @@ struct AioContext {
     QSLIST_HEAD(, Coroutine) scheduled_coroutines;
     QEMUBH *co_schedule_bh;
 
+    int thread_pool_min;
+    int thread_pool_max;
     /* Thread pool for performing work and receiving completion callbacks.
      * Has its own locking.
      */
     struct ThreadPool *thread_pool;
 
 #ifdef CONFIG_LINUX_AIO
-    /*
-     * State for native Linux AIO.  Uses aio_context_acquire/release for
-     * locking.
-     */
     struct LinuxAioState *linux_aio;
 #endif
 #ifdef CONFIG_LINUX_IO_URING
-    /*
-     * State for Linux io_uring.  Uses aio_context_acquire/release for
-     * locking.
-     */
     struct LuringState *linux_io_uring;
 
     /* State for file descriptor monitoring using Linux io_uring */
@@ -221,8 +224,6 @@ struct AioContext {
      */
     QEMUTimerListGroup tlg;
 
-    int external_disable_cnt;
-
     /* Number of AioHandlers without .io_poll() */
     int poll_disable_cnt;
 
@@ -232,6 +233,9 @@ struct AioContext {
     int64_t poll_grow;      /* polling time growth factor */
     int64_t poll_shrink;    /* polling time shrink factor */
 
+    /* AIO engine parameters */
+    int64_t aio_max_batch;  /* maximum number of requests in a batch */
+
     /*
      * List of handlers participating in userspace polling.  Protected by
      * ctx->list_lock.  Iterated and modified mostly by the event loop thread
@@ -291,20 +295,57 @@ void aio_context_acquire(AioContext *ctx);
 /* Relinquish ownership of the AioContext. */
 void aio_context_release(AioContext *ctx);
 
+/**
+ * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will
+ * run only once and as soon as possible.
+ *
+ * @name: A human-readable identifier for debugging purposes.
+ */
+void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
+                                  const char *name);
+
 /**
  * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run
  * only once and as soon as possible.
+ *
+ * A convenience wrapper for aio_bh_schedule_oneshot_full() that uses cb as the
+ * name string.
  */
-void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+#define aio_bh_schedule_oneshot(ctx, cb, opaque) \
+    aio_bh_schedule_oneshot_full((ctx), (cb), (opaque), (stringify(cb)))
 
 /**
- * aio_bh_new: Allocate a new bottom half structure.
+ * aio_bh_new_full: Allocate a new bottom half structure.
  *
  * Bottom halves are lightweight callbacks whose invocation is guaranteed
  * to be wait-free, thread-safe and signal-safe.  The #QEMUBH structure
  * is opaque and must be allocated prior to its use.
+ *
+ * @name: A human-readable identifier for debugging purposes.
+ * @reentrancy_guard: A guard set when entering a cb to prevent
+ * device-reentrancy issues
+ */
+QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
+                        const char *name, MemReentrancyGuard *reentrancy_guard);
+
+/**
+ * aio_bh_new: Allocate a new bottom half structure
+ *
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
+ * string.
+ */
+#define aio_bh_new(ctx, cb, opaque) \
+    aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL)
+
+/**
+ * aio_bh_new_guarded: Allocate a new bottom half structure with a
+ * reentrancy_guard
+ *
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
+ * string.
  */
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \
+    aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)
 
 /**
  * aio_notify: Force processing of pending events.
@@ -426,7 +467,7 @@ void aio_dispatch(AioContext *ctx);
  * or more AIO events have completed, to ensure something has moved
  * before returning.
  */
-bool aio_poll(AioContext *ctx, bool blocking);
+bool no_coroutine_fn aio_poll(AioContext *ctx, bool blocking);
 
 /* Register a file descriptor and associated callbacks.  Behaves very similarly
  * to qemu_set_fd_handler.  Unlike qemu_set_fd_handler, these callbacks will
@@ -437,20 +478,12 @@ bool aio_poll(AioContext *ctx, bool blocking);
  */
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
-                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         AioPollFn *io_poll,
+                        IOHandler *io_poll_ready,
                         void *opaque);
 
-/* Set polling begin/end callbacks for a file descriptor that has already been
- * registered with aio_set_fd_handler.  Do nothing if the file descriptor is
- * not registered.
- */
-void aio_set_fd_poll(AioContext *ctx, int fd,
-                     IOHandler *io_poll_begin,
-                     IOHandler *io_poll_end);
-
 /* Register an event notifier and associated callbacks.  Behaves very similarly
  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
  * will be invoked when using aio_poll().
@@ -460,13 +493,18 @@ void aio_set_fd_poll(AioContext *ctx, int fd,
  */
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *notifier,
-                            bool is_external,
                             EventNotifierHandler *io_read,
-                            AioPollFn *io_poll);
+                            AioPollFn *io_poll,
+                            EventNotifierHandler *io_poll_ready);
 
-/* Set polling begin/end callbacks for an event notifier that has already been
+/*
+ * Set polling begin/end callbacks for an event notifier that has already been
  * registered with aio_set_event_notifier.  Do nothing if the event notifier is
  * not registered.
+ *
+ * Note that if the io_poll_end() callback (or the entire notifier) is removed
+ * during polling, it will not be called, so an io_poll_begin() is not
+ * necessarily always followed by an io_poll_end().
  */
 void aio_set_event_notifier_poll(AioContext *ctx,
                                  EventNotifier *notifier,
@@ -588,59 +626,6 @@ static inline void aio_timer_init(AioContext *ctx,
  */
 int64_t aio_compute_timeout(AioContext *ctx);
 
-/**
- * aio_disable_external:
- * @ctx: the aio context
- *
- * Disable the further processing of external clients.
- */
-static inline void aio_disable_external(AioContext *ctx)
-{
-    qatomic_inc(&ctx->external_disable_cnt);
-}
-
-/**
- * aio_enable_external:
- * @ctx: the aio context
- *
- * Enable the processing of external clients.
- */
-static inline void aio_enable_external(AioContext *ctx)
-{
-    int old;
-
-    old = qatomic_fetch_dec(&ctx->external_disable_cnt);
-    assert(old > 0);
-    if (old == 1) {
-        /* Kick event loop so it re-arms file descriptors */
-        aio_notify(ctx);
-    }
-}
-
-/**
- * aio_external_disabled:
- * @ctx: the aio context
- *
- * Return true if the external clients are disabled.
- */
-static inline bool aio_external_disabled(AioContext *ctx)
-{
-    return qatomic_read(&ctx->external_disable_cnt);
-}
-
-/**
- * aio_node_check:
- * @ctx: the aio context
- * @is_external: Whether or not the checked node is an external event source.
- *
- * Check if the node's is_external flag is okay to be polled by the ctx at this
- * moment. True means green light.
- */
-static inline bool aio_node_check(AioContext *ctx, bool is_external)
-{
-    return !is_external || !qatomic_read(&ctx->external_disable_cnt);
-}
-
 /**
  * aio_co_schedule:
  * @ctx: the aio context
@@ -653,7 +638,7 @@ static inline bool aio_node_check(AioContext *ctx, bool is_external)
  * is the context in which the coroutine is running (i.e. the value of
  * qemu_get_current_aio_context() from the coroutine itself).
  */
-void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
+void aio_co_schedule(AioContext *ctx, Coroutine *co);
 
 /**
  * aio_co_reschedule_self:
@@ -676,7 +661,7 @@ void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx);
  * context.  The coroutine must not be entered by anyone else while
  * aio_co_wake() is active.
  */
-void aio_co_wake(struct Coroutine *co);
+void aio_co_wake(Coroutine *co);
 
 /**
  * aio_co_enter:
@@ -685,16 +670,19 @@ void aio_co_wake(struct Coroutine *co);
  *
  * Enter a coroutine in the specified AioContext.
  */
-void aio_co_enter(AioContext *ctx, struct Coroutine *co);
+void aio_co_enter(AioContext *ctx, Coroutine *co);
 
 /**
  * Return the AioContext whose event loop runs in the current thread.
  *
  * If called from an IOThread this will be the IOThread's AioContext.  If
- * called from another thread it will be the main loop AioContext.
+ * called from the main thread or with the "big QEMU lock" taken it
+ * will be the main loop AioContext.
  */
 AioContext *qemu_get_current_aio_context(void);
 
+void qemu_set_current_aio_context(AioContext *ctx);
+
 /**
  * aio_context_setup:
  * @ctx: the aio context
@@ -727,4 +715,21 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
                                  int64_t grow, int64_t shrink,
                                  Error **errp);
 
+/**
+ * aio_context_set_aio_params:
+ * @ctx: the aio context
+ * @max_batch: maximum number of requests in a batch, 0 means that the
+ *             engine will use its default
+ */
+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
+                                Error **errp);
+
+/**
+ * aio_context_set_thread_pool_params:
+ * @ctx: the aio context
+ * @min: min number of threads to have readily available in the thread pool
+ * @min: max number of threads the thread pool can contain
+ */
+void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
+                                        int64_t max, Error **errp);
 #endif
index 50bc1e18177132da28002e4cba20c3404d92850b..18a9c41f4e6c78bb32dedb48bbf3d97438104c7c 100644 (file)
@@ -25,8 +25,6 @@
 #ifndef BLOCK_AIO_TASK_H
 #define BLOCK_AIO_TASK_H
 
-#include "qemu/coroutine.h"
-
 typedef struct AioTaskPool AioTaskPool;
 typedef struct AioTask AioTask;
 typedef int coroutine_fn (*AioTaskFunc)(AioTask *task);
diff --git a/include/block/block-common.h b/include/block/block-common.h
new file mode 100644 (file)
index 0000000..d759956
--- /dev/null
@@ -0,0 +1,568 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_COMMON_H
+#define BLOCK_COMMON_H
+
+#include "qapi/qapi-types-block-core.h"
+#include "qemu/queue.h"
+
+/*
+ * co_wrapper{*}: Function specifiers used by block-coroutine-wrapper.py
+ *
+ * Function specifiers, which do nothing but mark functions to be
+ * generated by scripts/block-coroutine-wrapper.py
+ *
+ * Usage: read docs/devel/block-coroutine-wrapper.rst
+ *
+ * There are 4 kind of specifiers:
+ * - co_wrapper functions can be called by only non-coroutine context, because
+ *   they always generate a new coroutine.
+ * - co_wrapper_mixed functions can be called by both coroutine and
+ *   non-coroutine context.
+ * - co_wrapper_bdrv_rdlock are co_wrapper functions but automatically take and
+ *   release the graph rdlock when creating a new coroutine
+ * - co_wrapper_mixed_bdrv_rdlock are co_wrapper_mixed functions but
+ *   automatically take and release the graph rdlock when creating a new
+ *   coroutine.
+ *
+ * These functions should not be called from a coroutine_fn; instead,
+ * call the wrapped function directly.
+ */
+#define co_wrapper                     no_coroutine_fn
+#define co_wrapper_mixed               no_coroutine_fn coroutine_mixed_fn
+#define co_wrapper_bdrv_rdlock         no_coroutine_fn
+#define co_wrapper_mixed_bdrv_rdlock   no_coroutine_fn coroutine_mixed_fn
+
+/*
+ * no_co_wrapper: Function specifier used by block-coroutine-wrapper.py
+ *
+ * Function specifier which does nothing but mark functions to be generated by
+ * scripts/block-coroutine-wrapper.py.
+ *
+ * A no_co_wrapper function declaration creates a coroutine_fn wrapper around
+ * functions that must not be called in coroutine context. It achieves this by
+ * scheduling a BH in the bottom half that runs the respective non-coroutine
+ * function. The coroutine yields after scheduling the BH and is reentered when
+ * the wrapped function returns.
+ *
+ * A no_co_wrapper_bdrv_rdlock function is a no_co_wrapper function that
+ * automatically takes the graph rdlock when calling the wrapped function. In
+ * the same way, no_co_wrapper_bdrv_wrlock functions automatically take the
+ * graph wrlock.
+ *
+ * If the first parameter of the function is a BlockDriverState, BdrvChild or
+ * BlockBackend pointer, the AioContext lock for it is taken in the wrapper.
+ */
+#define no_co_wrapper
+#define no_co_wrapper_bdrv_rdlock
+#define no_co_wrapper_bdrv_wrlock
+
+#include "block/blockjob.h"
+
+/* block.c */
+typedef struct BlockDriver BlockDriver;
+typedef struct BdrvChild BdrvChild;
+typedef struct BdrvChildClass BdrvChildClass;
+
+typedef enum BlockZoneOp {
+    BLK_ZO_OPEN,
+    BLK_ZO_CLOSE,
+    BLK_ZO_FINISH,
+    BLK_ZO_RESET,
+} BlockZoneOp;
+
+typedef enum BlockZoneModel {
+    BLK_Z_NONE = 0x0, /* Regular block device */
+    BLK_Z_HM = 0x1, /* Host-managed zoned block device */
+    BLK_Z_HA = 0x2, /* Host-aware zoned block device */
+} BlockZoneModel;
+
+typedef enum BlockZoneState {
+    BLK_ZS_NOT_WP = 0x0,
+    BLK_ZS_EMPTY = 0x1,
+    BLK_ZS_IOPEN = 0x2,
+    BLK_ZS_EOPEN = 0x3,
+    BLK_ZS_CLOSED = 0x4,
+    BLK_ZS_RDONLY = 0xD,
+    BLK_ZS_FULL = 0xE,
+    BLK_ZS_OFFLINE = 0xF,
+} BlockZoneState;
+
+typedef enum BlockZoneType {
+    BLK_ZT_CONV = 0x1, /* Conventional random writes supported */
+    BLK_ZT_SWR = 0x2, /* Sequential writes required */
+    BLK_ZT_SWP = 0x3, /* Sequential writes preferred */
+} BlockZoneType;
+
+/*
+ * Zone descriptor data structure.
+ * Provides information on a zone with all position and size values in bytes.
+ */
+typedef struct BlockZoneDescriptor {
+    uint64_t start;
+    uint64_t length;
+    uint64_t cap;
+    uint64_t wp;
+    BlockZoneType type;
+    BlockZoneState state;
+} BlockZoneDescriptor;
+
+/*
+ * Track write pointers of a zone in bytes.
+ */
+typedef struct BlockZoneWps {
+    CoMutex colock;
+    uint64_t wp[];
+} BlockZoneWps;
+
+typedef struct BlockDriverInfo {
+    /* in bytes, 0 if irrelevant */
+    int cluster_size;
+    /*
+     * A fraction of cluster_size, if supported (currently QCOW2 only); if
+     * disabled or unsupported, set equal to cluster_size.
+     */
+    int subcluster_size;
+    /* offset at which the VM state can be saved (0 if not possible) */
+    int64_t vm_state_offset;
+    bool is_dirty;
+    /*
+     * True if this block driver only supports compressed writes
+     */
+    bool needs_compressed_writes;
+} BlockDriverInfo;
+
+typedef struct BlockFragInfo {
+    uint64_t allocated_clusters;
+    uint64_t total_clusters;
+    uint64_t fragmented_clusters;
+    uint64_t compressed_clusters;
+} BlockFragInfo;
+
+typedef enum {
+    BDRV_REQ_COPY_ON_READ       = 0x1,
+    BDRV_REQ_ZERO_WRITE         = 0x2,
+
+    /*
+     * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
+     * that the block driver should unmap (discard) blocks if it is guaranteed
+     * that the result will read back as zeroes. The flag is only passed to the
+     * driver if the block device is opened with BDRV_O_UNMAP.
+     */
+    BDRV_REQ_MAY_UNMAP          = 0x4,
+
+    /*
+     * An optimization hint when all QEMUIOVector elements are within
+     * previously registered bdrv_register_buf() memory ranges.
+     *
+     * Code that replaces the user's QEMUIOVector elements with bounce buffers
+     * must take care to clear this flag.
+     */
+    BDRV_REQ_REGISTERED_BUF     = 0x8,
+
+    BDRV_REQ_FUA                = 0x10,
+    BDRV_REQ_WRITE_COMPRESSED   = 0x20,
+
+    /*
+     * Signifies that this write request will not change the visible disk
+     * content.
+     */
+    BDRV_REQ_WRITE_UNCHANGED    = 0x40,
+
+    /*
+     * Forces request serialisation. Use only with write requests.
+     */
+    BDRV_REQ_SERIALISING        = 0x80,
+
+    /*
+     * Execute the request only if the operation can be offloaded or otherwise
+     * be executed efficiently, but return an error instead of using a slow
+     * fallback.
+     */
+    BDRV_REQ_NO_FALLBACK        = 0x100,
+
+    /*
+     * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
+     * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
+     * filter is involved), in which case it signals that the COR operation
+     * need not read the data into memory (qiov) but only ensure they are
+     * copied to the top layer (i.e., that COR operation is done).
+     */
+    BDRV_REQ_PREFETCH  = 0x200,
+
+    /*
+     * If we need to wait for other requests, just fail immediately. Used
+     * only together with BDRV_REQ_SERIALISING. Used only with requests aligned
+     * to request_alignment (corresponding assertions are in block/io.c).
+     */
+    BDRV_REQ_NO_WAIT = 0x400,
+
+    /* Mask of valid flags */
+    BDRV_REQ_MASK               = 0x7ff,
+} BdrvRequestFlags;
+
+#define BDRV_O_NO_SHARE    0x0001 /* don't share permissions */
+#define BDRV_O_RDWR        0x0002
+#define BDRV_O_RESIZE      0x0004 /* request permission for resizing the node */
+#define BDRV_O_SNAPSHOT    0x0008 /* open the file read only and save
+                                     writes in a snapshot */
+#define BDRV_O_TEMPORARY   0x0010 /* delete the file after use */
+#define BDRV_O_NOCACHE     0x0020 /* do not use the host page cache */
+#define BDRV_O_NATIVE_AIO  0x0080 /* use native AIO instead of the
+                                     thread pool */
+#define BDRV_O_NO_BACKING  0x0100 /* don't open the backing file */
+#define BDRV_O_NO_FLUSH    0x0200 /* disable flushing on this disk */
+#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
+#define BDRV_O_INACTIVE    0x0800  /* consistency hint for migration handoff */
+#define BDRV_O_CHECK       0x1000  /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR  0x2000  /* allow reopen to change from r/o to r/w */
+#define BDRV_O_UNMAP       0x4000  /* execute guest UNMAP/TRIM operations */
+#define BDRV_O_PROTOCOL    0x8000  /* if no block driver is explicitly given:
+                                      select an appropriate protocol driver,
+                                      ignoring the format layer */
+#define BDRV_O_NO_IO       0x10000 /* don't initialize for I/O */
+#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening
+                                      read-write fails */
+#define BDRV_O_IO_URING    0x40000 /* use io_uring instead of the thread pool */
+
+#define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
+
+
+/* Option names of options parsed by the block layer */
+
+#define BDRV_OPT_CACHE_WB       "cache.writeback"
+#define BDRV_OPT_CACHE_DIRECT   "cache.direct"
+#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
+#define BDRV_OPT_READ_ONLY      "read-only"
+#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
+#define BDRV_OPT_DISCARD        "discard"
+#define BDRV_OPT_FORCE_SHARE    "force-share"
+
+
+#define BDRV_SECTOR_BITS   9
+#define BDRV_SECTOR_SIZE   (1ULL << BDRV_SECTOR_BITS)
+
+/*
+ * Get the first most significant bit of wp. If it is zero, then
+ * the zone type is SWR.
+ */
+#define BDRV_ZT_IS_CONV(wp)    (wp & (1ULL << 63))
+
+#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
+                                           INT_MAX >> BDRV_SECTOR_BITS)
+#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
+
+/*
+ * We want allow aligning requests and disk length up to any 32bit alignment
+ * and don't afraid of overflow.
+ * To achieve it, and in the same time use some pretty number as maximum disk
+ * size, let's define maximum "length" (a limit for any offset/bytes request and
+ * for disk size) to be the greatest power of 2 less than INT64_MAX.
+ */
+#define BDRV_MAX_ALIGNMENT (1L << 30)
+#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
+
+/*
+ * Allocation status flags for bdrv_block_status() and friends.
+ *
+ * Public flags:
+ * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
+ * BDRV_BLOCK_ZERO: offset reads as zero
+ * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
+ * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
+ *                       layer rather than any backing, set by block layer
+ * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
+ *                 layer, set by block layer
+ * BDRV_BLOCK_COMPRESSED: the underlying data is compressed; only valid for
+ *                        the formats supporting compression: qcow, qcow2
+ *
+ * Internal flags:
+ * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
+ *                 that the block layer recompute the answer from the returned
+ *                 BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
+ * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
+ *                     zeroes in file child of current block node inside
+ *                     returned region. Only valid together with both
+ *                     BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
+ *                     appear with BDRV_BLOCK_ZERO.
+ *
+ * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
+ * host offset within the returned BDS that is allocated for the
+ * corresponding raw guest data.  However, whether that offset
+ * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
+ *
+ * DATA ZERO OFFSET_VALID
+ *  t    t        t       sectors read as zero, returned file is zero at offset
+ *  t    f        t       sectors read as valid from file at offset
+ *  f    t        t       sectors preallocated, read as zero, returned file not
+ *                        necessarily zero at offset
+ *  f    f        t       sectors preallocated but read from backing_hd,
+ *                        returned file contains garbage at offset
+ *  t    t        f       sectors preallocated, read as zero, unknown offset
+ *  t    f        f       sectors read from unknown file or offset
+ *  f    t        f       not allocated or unknown offset, read as zero
+ *  f    f        f       not allocated or unknown offset, read from backing_hd
+ */
+#define BDRV_BLOCK_DATA         0x01
+#define BDRV_BLOCK_ZERO         0x02
+#define BDRV_BLOCK_OFFSET_VALID 0x04
+#define BDRV_BLOCK_RAW          0x08
+#define BDRV_BLOCK_ALLOCATED    0x10
+#define BDRV_BLOCK_EOF          0x20
+#define BDRV_BLOCK_RECURSE      0x40
+#define BDRV_BLOCK_COMPRESSED   0x80
+
+typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+    BlockDriverState *bs;
+    int flags;
+    BlockdevDetectZeroesOptions detect_zeroes;
+    bool backing_missing;
+    BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
+    BlockDriverState *old_file_bs; /* keep pointer for permissions update */
+    QDict *options;
+    QDict *explicit_options;
+    void *opaque;
+} BDRVReopenState;
+
+/*
+ * Block operation types
+ */
+typedef enum BlockOpType {
+    BLOCK_OP_TYPE_BACKUP_SOURCE,
+    BLOCK_OP_TYPE_BACKUP_TARGET,
+    BLOCK_OP_TYPE_CHANGE,
+    BLOCK_OP_TYPE_COMMIT_SOURCE,
+    BLOCK_OP_TYPE_COMMIT_TARGET,
+    BLOCK_OP_TYPE_DATAPLANE,
+    BLOCK_OP_TYPE_DRIVE_DEL,
+    BLOCK_OP_TYPE_EJECT,
+    BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
+    BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
+    BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
+    BLOCK_OP_TYPE_MIRROR_SOURCE,
+    BLOCK_OP_TYPE_MIRROR_TARGET,
+    BLOCK_OP_TYPE_RESIZE,
+    BLOCK_OP_TYPE_STREAM,
+    BLOCK_OP_TYPE_REPLACE,
+    BLOCK_OP_TYPE_MAX,
+} BlockOpType;
+
+/* Block node permission constants */
+enum {
+    /**
+     * A user that has the "permission" of consistent reads is guaranteed that
+     * their view of the contents of the block device is complete and
+     * self-consistent, representing the contents of a disk at a specific
+     * point.
+     *
+     * For most block devices (including their backing files) this is true, but
+     * the property cannot be maintained in a few situations like for
+     * intermediate nodes of a commit block job.
+     */
+    BLK_PERM_CONSISTENT_READ    = 0x01,
+
+    /** This permission is required to change the visible disk contents. */
+    BLK_PERM_WRITE              = 0x02,
+
+    /**
+     * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
+     * required for writes to the block node when the caller promises that
+     * the visible disk content doesn't change.
+     *
+     * As the BLK_PERM_WRITE permission is strictly stronger, either is
+     * sufficient to perform an unchanging write.
+     */
+    BLK_PERM_WRITE_UNCHANGED    = 0x04,
+
+    /** This permission is required to change the size of a block node. */
+    BLK_PERM_RESIZE             = 0x08,
+
+    /**
+     * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
+     * 6.1 and earlier may still lock the corresponding byte in block/file-posix
+     * locking.  So, implementing some new permission should be very careful to
+     * not interfere with this old unused thing.
+     */
+
+    BLK_PERM_ALL                = 0x0f,
+
+    DEFAULT_PERM_PASSTHROUGH    = BLK_PERM_CONSISTENT_READ
+                                 | BLK_PERM_WRITE
+                                 | BLK_PERM_WRITE_UNCHANGED
+                                 | BLK_PERM_RESIZE,
+
+    DEFAULT_PERM_UNCHANGED      = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
+};
+
+/*
+ * Flags that parent nodes assign to child nodes to specify what kind of
+ * role(s) they take.
+ *
+ * At least one of DATA, METADATA, FILTERED, or COW must be set for
+ * every child.
+ *
+ *
+ * = Connection with bs->children, bs->file and bs->backing fields =
+ *
+ * 1. Filters
+ *
+ * Filter drivers have drv->is_filter = true.
+ *
+ * Filter node has exactly one FILTERED|PRIMARY child, and may have other
+ * children which must not have these bits (one example is the
+ * copy-before-write filter, which also has its target DATA child).
+ *
+ * Filter nodes never have COW children.
+ *
+ * For most filters, the filtered child is linked in bs->file, bs->backing is
+ * NULL.  For some filters (as an exception), it is the other way around; those
+ * drivers will have drv->filtered_child_is_backing set to true (see that
+ * field’s documentation for what drivers this concerns)
+ *
+ * 2. "raw" driver (block/raw-format.c)
+ *
+ * Formally it's not a filter (drv->is_filter = false)
+ *
+ * bs->backing is always NULL
+ *
+ * Only has one child, linked in bs->file. Its role is either FILTERED|PRIMARY
+ * (like filter) or DATA|PRIMARY depending on options.
+ *
+ * 3. Other drivers
+ *
+ * Don't have any FILTERED children.
+ *
+ * May have at most one COW child. In this case it's linked in bs->backing.
+ * Otherwise bs->backing is NULL. COW child is never PRIMARY.
+ *
+ * May have at most one PRIMARY child. In this case it's linked in bs->file.
+ * Otherwise bs->file is NULL.
+ *
+ * May also have some other children that don't have the PRIMARY or COW bit set.
+ */
+enum BdrvChildRoleBits {
+    /*
+     * This child stores data.
+     * Any node may have an arbitrary number of such children.
+     */
+    BDRV_CHILD_DATA         = (1 << 0),
+
+    /*
+     * This child stores metadata.
+     * Any node may have an arbitrary number of metadata-storing
+     * children.
+     */
+    BDRV_CHILD_METADATA     = (1 << 1),
+
+    /*
+     * A child that always presents exactly the same visible data as
+     * the parent, e.g. by virtue of the parent forwarding all reads
+     * and writes.
+     * This flag is mutually exclusive with DATA, METADATA, and COW.
+     * Any node may have at most one filtered child at a time.
+     */
+    BDRV_CHILD_FILTERED     = (1 << 2),
+
+    /*
+     * Child from which to read all data that isn't allocated in the
+     * parent (i.e., the backing child); such data is copied to the
+     * parent through COW (and optionally COR).
+     * This field is mutually exclusive with DATA, METADATA, and
+     * FILTERED.
+     * Any node may have at most one such backing child at a time.
+     */
+    BDRV_CHILD_COW          = (1 << 3),
+
+    /*
+     * The primary child.  For most drivers, this is the child whose
+     * filename applies best to the parent node.
+     * Any node may have at most one primary child at a time.
+     */
+    BDRV_CHILD_PRIMARY      = (1 << 4),
+
+    /* Useful combination of flags */
+    BDRV_CHILD_IMAGE        = BDRV_CHILD_DATA
+                              | BDRV_CHILD_METADATA
+                              | BDRV_CHILD_PRIMARY,
+};
+
+/* Mask of BdrvChildRoleBits values */
+typedef unsigned int BdrvChildRole;
+
+typedef struct BdrvCheckResult {
+    int corruptions;
+    int leaks;
+    int check_errors;
+    int corruptions_fixed;
+    int leaks_fixed;
+    int64_t image_end_offset;
+    BlockFragInfo bfi;
+} BdrvCheckResult;
+
+typedef enum {
+    BDRV_FIX_LEAKS    = 1,
+    BDRV_FIX_ERRORS   = 2,
+} BdrvCheckMode;
+
+typedef struct BlockSizes {
+    uint32_t phys;
+    uint32_t log;
+} BlockSizes;
+
+typedef struct HDGeometry {
+    uint32_t heads;
+    uint32_t sectors;
+    uint32_t cylinders;
+} HDGeometry;
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * These functions must never call any function from other categories
+ * (I/O, "I/O or GS", Global State) except this one, but can be invoked by
+ * all of them.
+ */
+
+char *bdrv_perm_names(uint64_t perm);
+uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
+
+void bdrv_init_with_whitelist(void);
+bool bdrv_uses_whitelist(void);
+int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
+
+int bdrv_parse_aio(const char *mode, int *flags);
+int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
+int bdrv_parse_discard_flags(const char *mode, int *flags);
+
+int path_has_protocol(const char *path);
+int path_is_absolute(const char *path);
+char *path_combine(const char *base_path, const char *filename);
+
+char *bdrv_get_full_backing_filename_from_filename(const char *backed,
+                                                   const char *backing,
+                                                   Error **errp);
+
+#endif /* BLOCK_COMMON_H */
index aac85e14880a430a0d37c6ac0ed570bdaaf9a327..0700953ab8fd775b7d44acfac5278540cdea1c6e 100644 (file)
 #ifndef BLOCK_COPY_H
 #define BLOCK_COPY_H
 
-#include "block/block.h"
-#include "qemu/co-shared-resource.h"
+#include "block/block-common.h"
+#include "qemu/progress_meter.h"
 
-typedef void (*ProgressBytesCallbackFunc)(int64_t bytes, void *opaque);
+/* All APIs are thread-safe */
+
+typedef void (*BlockCopyAsyncCallbackFunc)(void *opaque);
 typedef struct BlockCopyState BlockCopyState;
+typedef struct BlockCopyCallState BlockCopyCallState;
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-                                     int64_t cluster_size,
-                                     BdrvRequestFlags write_flags,
+                                     const BdrvDirtyBitmap *bitmap,
                                      Error **errp);
 
-void block_copy_set_progress_callback(
-        BlockCopyState *s,
-        ProgressBytesCallbackFunc progress_bytes_callback,
-        void *progress_opaque);
-
+/* Function should be called prior any actual copy request */
+void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+                              bool compress);
 void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm);
 
 void block_copy_state_free(BlockCopyState *s);
 
-int64_t block_copy_reset_unallocated(BlockCopyState *s,
-                                     int64_t offset, int64_t *count);
+void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes);
+
+int64_t coroutine_fn GRAPH_RDLOCK
+block_copy_reset_unallocated(BlockCopyState *s, int64_t offset, int64_t *count);
 
 int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
-                            bool *error_is_read);
+                            bool ignore_ratelimit, uint64_t timeout_ns,
+                            BlockCopyAsyncCallbackFunc cb,
+                            void *cb_opaque);
+
+/*
+ * Run block-copy in a coroutine, create corresponding BlockCopyCallState
+ * object and return pointer to it. Never returns NULL.
+ *
+ * Caller is responsible to call block_copy_call_free() to free
+ * BlockCopyCallState object.
+ *
+ * @max_workers means maximum of parallel coroutines to execute sub-requests,
+ * must be > 0.
+ *
+ * @max_chunk means maximum length for one IO operation. Zero means unlimited.
+ */
+BlockCopyCallState *block_copy_async(BlockCopyState *s,
+                                     int64_t offset, int64_t bytes,
+                                     int max_workers, int64_t max_chunk,
+                                     BlockCopyAsyncCallbackFunc cb,
+                                     void *cb_opaque);
+
+/*
+ * Free finished BlockCopyCallState. Trying to free running
+ * block-copy will crash.
+ */
+void block_copy_call_free(BlockCopyCallState *call_state);
+
+/*
+ * Note, that block-copy call is marked finished prior to calling
+ * the callback.
+ */
+bool block_copy_call_finished(BlockCopyCallState *call_state);
+bool block_copy_call_succeeded(BlockCopyCallState *call_state);
+bool block_copy_call_failed(BlockCopyCallState *call_state);
+bool block_copy_call_cancelled(BlockCopyCallState *call_state);
+int block_copy_call_status(BlockCopyCallState *call_state, bool *error_is_read);
+
+void block_copy_set_speed(BlockCopyState *s, uint64_t speed);
+void block_copy_kick(BlockCopyCallState *call_state);
+
+/*
+ * Cancel running block-copy call.
+ *
+ * Cancel leaves block-copy state valid: dirty bits are correct and you may use
+ * cancel + <run block_copy with same parameters> to emulate pause/resume.
+ *
+ * Note also, that the cancel is async: it only marks block-copy call to be
+ * cancelled. So, the call may be cancelled (block_copy_call_cancelled() reports
+ * true) but not yet finished (block_copy_call_finished() reports false).
+ */
+void block_copy_call_cancel(BlockCopyCallState *call_state);
 
 BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
+int64_t block_copy_cluster_size(BlockCopyState *s);
 void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
 
 #endif /* BLOCK_COPY_H */
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
new file mode 100644 (file)
index 0000000..6b21fbc
--- /dev/null
@@ -0,0 +1,319 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_GLOBAL_STATE_H
+#define BLOCK_GLOBAL_STATE_H
+
+#include "block/block-common.h"
+#include "qemu/coroutine.h"
+#include "qemu/transactions.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * If a function modifies the graph, it also uses drain and/or
+ * aio_context_acquire/release to be sure it has unique access.
+ * aio_context locking is needed together with BQL because of
+ * the thread-safe I/O API that concurrently runs and accesses
+ * the graph without the BQL.
+ *
+ * It is important to note that not all of these functions are
+ * necessarily limited to running under the BQL, but they would
+ * require additional auditing and many small thread-safety changes
+ * to move them into the I/O API. Often it's not worth doing that
+ * work since the APIs are only used with the BQL held at the
+ * moment, so they have been placed in the GS API (for now).
+ *
+ * These functions can call any function from this and other categories
+ * (I/O, "I/O or GS", Common), but must be invoked only by other GS APIs.
+ *
+ * All functions in this header must use the macro
+ * GLOBAL_STATE_CODE();
+ * to catch when they are accidentally called without the BQL.
+ */
+
+void bdrv_init(void);
+BlockDriver *bdrv_find_protocol(const char *filename,
+                                bool allow_protocol_prefix,
+                                Error **errp);
+BlockDriver *bdrv_find_format(const char *format_name);
+
+int coroutine_fn GRAPH_UNLOCKED
+bdrv_co_create(BlockDriver *drv, const char *filename, QemuOpts *opts,
+               Error **errp);
+
+int co_wrapper bdrv_create(BlockDriver *drv, const char *filename,
+                           QemuOpts *opts, Error **errp);
+
+int coroutine_fn GRAPH_UNLOCKED
+bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp);
+
+BlockDriverState *bdrv_new(void);
+int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+                Error **errp);
+
+int GRAPH_WRLOCK
+bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp);
+
+int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
+                          Error **errp);
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+                                   int flags, Error **errp);
+int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
+
+BdrvChild * no_coroutine_fn
+bdrv_open_child(const char *filename, QDict *options, const char *bdref_key,
+                BlockDriverState *parent, const BdrvChildClass *child_class,
+                BdrvChildRole child_role, bool allow_none, Error **errp);
+
+BdrvChild * coroutine_fn no_co_wrapper
+bdrv_co_open_child(const char *filename, QDict *options, const char *bdref_key,
+                BlockDriverState *parent, const BdrvChildClass *child_class,
+                BdrvChildRole child_role, bool allow_none, Error **errp);
+
+int bdrv_open_file_child(const char *filename,
+                         QDict *options, const char *bdref_key,
+                         BlockDriverState *parent, Error **errp);
+
+BlockDriverState * no_coroutine_fn
+bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+
+BlockDriverState * coroutine_fn no_co_wrapper
+bdrv_co_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+                        Error **errp);
+int GRAPH_WRLOCK
+bdrv_set_backing_hd_drained(BlockDriverState *bs, BlockDriverState *backing_hd,
+                            Error **errp);
+
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
+                           const char *bdref_key, Error **errp);
+
+BlockDriverState * no_coroutine_fn
+bdrv_open(const char *filename, const char *reference, QDict *options,
+          int flags, Error **errp);
+
+BlockDriverState * coroutine_fn no_co_wrapper
+bdrv_co_open(const char *filename, const char *reference,
+             QDict *options, int flags, Error **errp);
+
+BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
+                                            const char *node_name,
+                                            QDict *options, int flags,
+                                            Error **errp);
+BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
+                                       int flags, Error **errp);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+                                    BlockDriverState *bs, QDict *options,
+                                    bool keep_old_opts);
+void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
+                Error **errp);
+int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
+                              Error **errp);
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+                                          const char *backing_file);
+void GRAPH_RDLOCK bdrv_refresh_filename(BlockDriverState *bs);
+
+void GRAPH_RDLOCK
+bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
+
+int bdrv_commit(BlockDriverState *bs);
+int GRAPH_RDLOCK bdrv_make_empty(BdrvChild *c, Error **errp);
+
+void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
+                           const char *backing_file_str);
+
+BlockDriverState * GRAPH_RDLOCK
+bdrv_find_overlay(BlockDriverState *active, BlockDriverState *bs);
+
+BlockDriverState * GRAPH_RDLOCK bdrv_find_base(BlockDriverState *bs);
+
+int GRAPH_RDLOCK
+bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
+                          Error **errp);
+void GRAPH_RDLOCK
+bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
+
+/*
+ * The units of offset and total_work_size may be chosen arbitrarily by the
+ * block driver; total_work_size may change during the course of the amendment
+ * operation
+ */
+typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
+                                      int64_t total_work_size, void *opaque);
+int GRAPH_RDLOCK
+bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
+                   BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+                   bool force, Error **errp);
+
+/* check if a named node can be replaced when doing drive-mirror */
+BlockDriverState * GRAPH_RDLOCK
+check_to_replace_node(BlockDriverState *parent_bs, const char *node_name,
+                      Error **errp);
+
+int no_coroutine_fn GRAPH_RDLOCK
+bdrv_activate(BlockDriverState *bs, Error **errp);
+
+int coroutine_fn no_co_wrapper_bdrv_rdlock
+bdrv_co_activate(BlockDriverState *bs, Error **errp);
+
+void bdrv_activate_all(Error **errp);
+int bdrv_inactivate_all(void);
+
+int bdrv_flush_all(void);
+void bdrv_close_all(void);
+void bdrv_drain_all_begin(void);
+void bdrv_drain_all_begin_nopoll(void);
+void bdrv_drain_all_end(void);
+void bdrv_drain_all(void);
+
+void bdrv_aio_cancel(BlockAIOCB *acb);
+
+int bdrv_has_zero_init_1(BlockDriverState *bs);
+int coroutine_mixed_fn GRAPH_RDLOCK bdrv_has_zero_init(BlockDriverState *bs);
+BlockDriverState *bdrv_find_node(const char *node_name);
+BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp);
+XDbgBlockGraph * GRAPH_RDLOCK bdrv_get_xdbg_block_graph(Error **errp);
+BlockDriverState *bdrv_lookup_bs(const char *device,
+                                 const char *node_name,
+                                 Error **errp);
+bool GRAPH_RDLOCK
+bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
+
+BlockDriverState *bdrv_next_node(BlockDriverState *bs);
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
+
+typedef struct BdrvNextIterator {
+    enum {
+        BDRV_NEXT_BACKEND_ROOTS,
+        BDRV_NEXT_MONITOR_OWNED,
+    } phase;
+    BlockBackend *blk;
+    BlockDriverState *bs;
+} BdrvNextIterator;
+
+BlockDriverState * GRAPH_RDLOCK bdrv_first(BdrvNextIterator *it);
+BlockDriverState * GRAPH_RDLOCK bdrv_next(BdrvNextIterator *it);
+void bdrv_next_cleanup(BdrvNextIterator *it);
+
+BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+                         void *opaque, bool read_only);
+
+char * GRAPH_RDLOCK
+bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
+
+char * GRAPH_RDLOCK bdrv_dirname(BlockDriverState *bs, Error **errp);
+
+void bdrv_img_create(const char *filename, const char *fmt,
+                     const char *base_filename, const char *base_fmt,
+                     char *options, uint64_t img_size, int flags,
+                     bool quiet, Error **errp);
+
+void bdrv_ref(BlockDriverState *bs);
+void no_coroutine_fn bdrv_unref(BlockDriverState *bs);
+void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs);
+void GRAPH_WRLOCK bdrv_schedule_unref(BlockDriverState *bs);
+
+void GRAPH_WRLOCK
+bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
+
+void coroutine_fn no_co_wrapper_bdrv_wrlock
+bdrv_co_unref_child(BlockDriverState *parent, BdrvChild *child);
+
+BdrvChild * GRAPH_WRLOCK
+bdrv_attach_child(BlockDriverState *parent_bs,
+                  BlockDriverState *child_bs,
+                  const char *child_name,
+                  const BdrvChildClass *child_class,
+                  BdrvChildRole child_role,
+                  Error **errp);
+
+bool GRAPH_RDLOCK
+bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
+
+void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
+void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
+bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+                           const char *tag);
+int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+
+/**
+ * Locks the AioContext of @bs if it's not the current AioContext. This avoids
+ * double locking which could lead to deadlocks: This is a coroutine_fn, so we
+ * know we already own the lock of the current AioContext.
+ *
+ * May only be called in the main thread.
+ */
+void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
+
+/**
+ * Unlocks the AioContext of @bs if it's not the current AioContext.
+ */
+void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
+
+bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
+                                   GHashTable *visited, Transaction *tran,
+                                   Error **errp);
+int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
+                                BdrvChild *ignore_child, Error **errp);
+
+int GRAPH_RDLOCK bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
+int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
+
+void GRAPH_WRLOCK
+bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, Error **errp);
+
+void GRAPH_WRLOCK
+bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
+
+/**
+ *
+ * bdrv_register_buf/bdrv_unregister_buf:
+ *
+ * Register/unregister a buffer for I/O. For example, VFIO drivers are
+ * interested to know the memory areas that would later be used for I/O, so
+ * that they can prepare IOMMU mapping etc., to get better performance.
+ *
+ * Buffers must not overlap and they must be unregistered with the same <host,
+ * size> values that they were registered with.
+ *
+ * Returns: true on success, false on failure
+ */
+bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
+                       Error **errp);
+void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size);
+
+void bdrv_cancel_in_flight(BlockDriverState *bs);
+
+#endif /* BLOCK_GLOBAL_STATE_H */
index 3412e108ca226ee69578d29bc4c6b9e2c30a3139..6d9152318fe0ed2d9c017290722d73716c4e91e3 100644 (file)
  * the COPYING file in the top-level directory.
  */
 
-#ifndef BLOCK_HMP_COMMANDS_H
-#define BLOCK_HMP_COMMANDS_H
+#ifndef BLOCK_BLOCK_HMP_CMDS_H
+#define BLOCK_BLOCK_HMP_CMDS_H
+
+#include "qemu/coroutine.h"
 
 void hmp_drive_add(Monitor *mon, const QDict *qdict);
 
@@ -38,7 +40,7 @@ void hmp_nbd_server_add(Monitor *mon, const QDict *qdict);
 void hmp_nbd_server_remove(Monitor *mon, const QDict *qdict);
 void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict);
 
-void hmp_block_resize(Monitor *mon, const QDict *qdict);
+void coroutine_fn hmp_block_resize(Monitor *mon, const QDict *qdict);
 void hmp_block_stream(Monitor *mon, const QDict *qdict);
 void hmp_block_passwd(Monitor *mon, const QDict *qdict);
 void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
@@ -46,7 +48,7 @@ void hmp_eject(Monitor *mon, const QDict *qdict);
 
 void hmp_qemu_io(Monitor *mon, const QDict *qdict);
 
-void hmp_info_block(Monitor *mon, const QDict *qdict);
+void coroutine_fn hmp_info_block(Monitor *mon, const QDict *qdict);
 void hmp_info_blockstats(Monitor *mon, const QDict *qdict);
 void hmp_info_block_jobs(Monitor *mon, const QDict *qdict);
 void hmp_info_snapshots(Monitor *mon, const QDict *qdict);
diff --git a/include/block/block-io.h b/include/block/block-io.h
new file mode 100644 (file)
index 0000000..f8729cc
--- /dev/null
@@ -0,0 +1,457 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_IO_H
+#define BLOCK_IO_H
+
+#include "block/aio-wait.h"
+#include "block/block-common.h"
+#include "qemu/coroutine.h"
+#include "qemu/iov.h"
+
+/*
+ * I/O API functions. These functions are thread-safe, and therefore
+ * can run in any thread as long as the thread has called
+ * aio_context_acquire/release().
+ *
+ * These functions can only call functions from I/O and Common categories,
+ * but can be invoked by GS, "I/O or GS" and I/O APIs.
+ *
+ * All functions in this category must use the macro
+ * IO_CODE();
+ * to catch when they are accidentally called by the wrong API.
+ */
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes,
+                   BdrvRequestFlags flags);
+
+int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
+           BdrvRequestFlags flags);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_pwrite(BdrvChild *child, int64_t offset,int64_t bytes,
+            const void *buf, BdrvRequestFlags flags);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
+                 const void *buf, BdrvRequestFlags flags);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
+                    const void *buf, BdrvRequestFlags flags);
+
+/*
+ * Efficiently zero a region of the disk image.  Note that this is a regular
+ * I/O request like read or write and should have a reasonable size.  This
+ * function is not suitable for zeroing the entire image in a single request
+ * because it may allocate memory for the entire region.
+ */
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes,
+                      BdrvRequestFlags flags);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
+                 PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+
+int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_nb_sectors(BlockDriverState *bs);
+int64_t coroutine_mixed_fn bdrv_nb_sectors(BlockDriverState *bs);
+
+int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_getlength(BlockDriverState *bs);
+int64_t co_wrapper_mixed_bdrv_rdlock bdrv_getlength(BlockDriverState *bs);
+
+int64_t coroutine_fn GRAPH_RDLOCK
+bdrv_co_get_allocated_file_size(BlockDriverState *bs);
+
+int64_t co_wrapper_bdrv_rdlock
+bdrv_get_allocated_file_size(BlockDriverState *bs);
+
+BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
+                               BlockDriverState *in_bs, Error **errp);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
+
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_delete_file_noerr(BlockDriverState *bs);
+
+
+/* async block I/O */
+void bdrv_aio_cancel_async(BlockAIOCB *acb);
+
+/* sg packet commands */
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
+
+/* Ensure contents are flushed to disk.  */
+int coroutine_fn GRAPH_RDLOCK bdrv_co_flush(BlockDriverState *bs);
+
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
+                                               int64_t bytes);
+
+/* Report zone information of zone block device. */
+int coroutine_fn GRAPH_RDLOCK bdrv_co_zone_report(BlockDriverState *bs,
+                                                  int64_t offset,
+                                                  unsigned int *nr_zones,
+                                                  BlockZoneDescriptor *zones);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_zone_mgmt(BlockDriverState *bs,
+                                                BlockZoneOp op,
+                                                int64_t offset, int64_t len);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_zone_append(BlockDriverState *bs,
+                                                  int64_t *offset,
+                                                  QEMUIOVector *qiov,
+                                                  BdrvRequestFlags flags);
+
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     int64_t *pnum, int64_t *map, BlockDriverState **file);
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                  int64_t *pnum, int64_t *map, BlockDriverState **file);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_block_status_above(BlockDriverState *bs, BlockDriverState *base,
+                           int64_t offset, int64_t bytes, int64_t *pnum,
+                           int64_t *map, BlockDriverState **file);
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
+                        int64_t offset, int64_t bytes, int64_t *pnum,
+                        int64_t *map, BlockDriverState **file);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     int64_t *pnum);
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
+                  int64_t bytes, int64_t *pnum);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+                           bool include_base, int64_t offset, int64_t bytes,
+                           int64_t *pnum);
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_is_allocated_above(BlockDriverState *bs, BlockDriverState *base,
+                        bool include_base, int64_t offset,
+                        int64_t bytes, int64_t *pnum);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes);
+
+int GRAPH_RDLOCK
+bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
+                          Error **errp);
+
+bool bdrv_is_read_only(BlockDriverState *bs);
+bool bdrv_is_writable(BlockDriverState *bs);
+bool bdrv_is_sg(BlockDriverState *bs);
+int bdrv_get_flags(BlockDriverState *bs);
+
+bool coroutine_fn GRAPH_RDLOCK bdrv_co_is_inserted(BlockDriverState *bs);
+bool co_wrapper_bdrv_rdlock bdrv_is_inserted(BlockDriverState *bs);
+
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_lock_medium(BlockDriverState *bs, bool locked);
+
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_eject(BlockDriverState *bs, bool eject_flag);
+
+const char *bdrv_get_format_name(BlockDriverState *bs);
+
+bool GRAPH_RDLOCK bdrv_supports_compressed_writes(BlockDriverState *bs);
+const char *bdrv_get_node_name(const BlockDriverState *bs);
+
+const char * GRAPH_RDLOCK
+bdrv_get_device_name(const BlockDriverState *bs);
+
+const char * GRAPH_RDLOCK
+bdrv_get_device_or_node_name(const BlockDriverState *bs);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+
+ImageInfoSpecific * GRAPH_RDLOCK
+bdrv_get_specific_info(BlockDriverState *bs, Error **errp);
+
+BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
+void bdrv_round_to_subclusters(BlockDriverState *bs,
+                               int64_t offset, int64_t bytes,
+                               int64_t *cluster_offset,
+                               int64_t *cluster_bytes);
+
+void bdrv_get_backing_filename(BlockDriverState *bs,
+                               char *filename, int filename_size);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
+                            const char *backing_fmt, bool warn);
+
+int co_wrapper_bdrv_rdlock
+bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
+                         const char *backing_fmt, bool warn);
+
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+                      int64_t pos, int size);
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+                      int64_t pos, int size);
+
+/*
+ * Returns the alignment in bytes that is required so that no bounce buffer
+ * is required throughout the stack
+ */
+size_t bdrv_min_mem_align(BlockDriverState *bs);
+/* Returns optimal alignment in bytes for bounce buffer */
+size_t bdrv_opt_mem_align(BlockDriverState *bs);
+void *qemu_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_blockalign0(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
+
+void bdrv_enable_copy_on_read(BlockDriverState *bs);
+void bdrv_disable_copy_on_read(BlockDriverState *bs);
+
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_debug_event(BlockDriverState *bs, BlkdebugEvent event);
+
+void co_wrapper_mixed_bdrv_rdlock
+bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
+
+#define BLKDBG_CO_EVENT(child, evt) \
+    do { \
+        if (child) { \
+            bdrv_co_debug_event(child->bs, evt); \
+        } \
+    } while (0)
+
+#define BLKDBG_EVENT(child, evt) \
+    do { \
+        if (child) { \
+            bdrv_debug_event(child->bs, evt); \
+        } \
+    } while (0)
+
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
+AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
+
+/**
+ * Move the current coroutine to the AioContext of @bs and return the old
+ * AioContext of the coroutine. Increase bs->in_flight so that draining @bs
+ * will wait for the operation to proceed until the corresponding
+ * bdrv_co_leave().
+ *
+ * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as
+ * this will deadlock.
+ */
+AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs);
+
+/**
+ * Ends a section started by bdrv_co_enter(). Move the current coroutine back
+ * to old_ctx and decrease bs->in_flight again.
+ */
+void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
+
+AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
+
+bool coroutine_fn GRAPH_RDLOCK
+bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                   uint32_t granularity, Error **errp);
+bool co_wrapper_bdrv_rdlock
+bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                uint32_t granularity, Error **errp);
+
+/**
+ *
+ * bdrv_co_copy_range:
+ *
+ * Do offloaded copy between two children. If the operation is not implemented
+ * by the driver, or if the backend storage doesn't support it, a negative
+ * error code will be returned.
+ *
+ * Note: block layer doesn't emulate or fallback to a bounce buffer approach
+ * because usually the caller shouldn't attempt offloaded copy any more (e.g.
+ * calling copy_file_range(2)) after the first error, thus it should fall back
+ * to a read+write path in the caller level.
+ *
+ * @src: Source child to copy data from
+ * @src_offset: offset in @src image to read data
+ * @dst: Destination child to copy data to
+ * @dst_offset: offset in @dst image to write data
+ * @bytes: number of bytes to copy
+ * @flags: request flags. Supported flags:
+ *         BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
+ *                               write on @dst as if bdrv_co_pwrite_zeroes is
+ *                               called. Used to simplify caller code, or
+ *                               during BlockDriver.bdrv_co_copy_range_from()
+ *                               recursion.
+ *         BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
+ *                                   requests currently in flight.
+ *
+ * Returns: 0 if succeeded; negative error code if failed.
+ **/
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
+                   BdrvChild *dst, int64_t dst_offset,
+                   int64_t bytes, BdrvRequestFlags read_flags,
+                   BdrvRequestFlags write_flags);
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which
+ * requires the caller to be either in the main thread and hold
+ * the BlockdriverState (bs) AioContext lock, or directly in the
+ * home thread that runs the bs AioContext. Calling them from
+ * another thread in another AioContext would cause deadlocks.
+ *
+ * Therefore, these functions are not proper I/O, because they
+ * can't run in *any* iothreads, but only in a specific one.
+ *
+ * These functions can call any function from I/O, Common and this
+ * categories, but must be invoked only by other "I/O or GS" and GS APIs.
+ *
+ * All functions in this category must use the macro
+ * IO_OR_GS_CODE();
+ * to catch when they are accidentally called by the wrong API.
+ */
+
+#define BDRV_POLL_WHILE(bs, cond) ({                       \
+    BlockDriverState *bs_ = (bs);                          \
+    IO_OR_GS_CODE();                                       \
+    AIO_WAIT_WHILE(bdrv_get_aio_context(bs_),              \
+                   cond); })
+
+void bdrv_drain(BlockDriverState *bs);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
+              PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
+
+/* Invalidate any cached metadata used by image formats */
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
+
+int co_wrapper_mixed_bdrv_rdlock bdrv_flush(BlockDriverState *bs);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+/**
+ * bdrv_parent_drained_begin_single:
+ *
+ * Begin a quiesced section for the parent of @c.
+ */
+void GRAPH_RDLOCK bdrv_parent_drained_begin_single(BdrvChild *c);
+
+/**
+ * bdrv_parent_drained_poll_single:
+ *
+ * Returns true if there is any pending activity to cease before @c can be
+ * called quiesced, false otherwise.
+ */
+bool GRAPH_RDLOCK bdrv_parent_drained_poll_single(BdrvChild *c);
+
+/**
+ * bdrv_parent_drained_end_single:
+ *
+ * End a quiesced section for the parent of @c.
+ */
+void GRAPH_RDLOCK bdrv_parent_drained_end_single(BdrvChild *c);
+
+/**
+ * bdrv_drain_poll:
+ *
+ * Poll for pending requests in @bs and its parents (except for @ignore_parent).
+ *
+ * If @ignore_bds_parents is true, parents that are BlockDriverStates must
+ * ignore the drain request because they will be drained separately (used for
+ * drain_all).
+ *
+ * This is part of bdrv_drained_begin.
+ */
+bool GRAPH_RDLOCK
+bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
+                bool ignore_bds_parents);
+
+/**
+ * bdrv_drained_begin:
+ *
+ * Begin a quiesced section for exclusive access to the BDS, by disabling
+ * external request sources including NBD server, block jobs, and device model.
+ *
+ * This function can only be invoked by the main loop or a coroutine
+ * (regardless of the AioContext where it is running).
+ * If the coroutine is running in an Iothread AioContext, this function will
+ * just schedule a BH to run in the main loop.
+ * However, it cannot be directly called by an Iothread.
+ *
+ * This function can be recursive.
+ */
+void bdrv_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_do_drained_begin_quiesce:
+ *
+ * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
+ * running requests to complete.
+ */
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent);
+
+/**
+ * bdrv_drained_end:
+ *
+ * End a quiescent section started by bdrv_drained_begin().
+ *
+ * This function can only be invoked by the main loop or a coroutine
+ * (regardless of the AioContext where it is running).
+ * If the coroutine is running in an Iothread AioContext, this function will
+ * just schedule a BH to run in the main loop.
+ * However, it cannot be directly called by an Iothread.
+ */
+void bdrv_drained_end(BlockDriverState *bs);
+
+#endif /* BLOCK_IO_H */
index c9d7c58765aebcc1ae8875ebee19ea891b343e2c..e2c647de279b3dfa612e3ca4561f8c3a751d2453 100644 (file)
-#ifndef BLOCK_H
-#define BLOCK_H
-
-#include "block/aio.h"
-#include "block/aio-wait.h"
-#include "qemu/iov.h"
-#include "qemu/coroutine.h"
-#include "block/accounting.h"
-#include "block/dirty-bitmap.h"
-#include "block/blockjob.h"
-#include "qemu/hbitmap.h"
-
 /*
- * generated_co_wrapper
- *
- * Function specifier, which does nothing but mark functions to be
- * generated by scripts/block-coroutine-wrapper.py
- *
- * Read more in docs/devel/block-coroutine-wrapper.rst
- */
-#define generated_co_wrapper
-
-/* block.c */
-typedef struct BlockDriver BlockDriver;
-typedef struct BdrvChild BdrvChild;
-typedef struct BdrvChildClass BdrvChildClass;
-
-typedef struct BlockDriverInfo {
-    /* in bytes, 0 if irrelevant */
-    int cluster_size;
-    /* offset at which the VM state can be saved (0 if not possible) */
-    int64_t vm_state_offset;
-    bool is_dirty;
-    /*
-     * True if this block driver only supports compressed writes
-     */
-    bool needs_compressed_writes;
-} BlockDriverInfo;
-
-typedef struct BlockFragInfo {
-    uint64_t allocated_clusters;
-    uint64_t total_clusters;
-    uint64_t fragmented_clusters;
-    uint64_t compressed_clusters;
-} BlockFragInfo;
-
-typedef enum {
-    BDRV_REQ_COPY_ON_READ       = 0x1,
-    BDRV_REQ_ZERO_WRITE         = 0x2,
-
-    /*
-     * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
-     * that the block driver should unmap (discard) blocks if it is guaranteed
-     * that the result will read back as zeroes. The flag is only passed to the
-     * driver if the block device is opened with BDRV_O_UNMAP.
-     */
-    BDRV_REQ_MAY_UNMAP          = 0x4,
-
-    BDRV_REQ_FUA                = 0x10,
-    BDRV_REQ_WRITE_COMPRESSED   = 0x20,
-
-    /* Signifies that this write request will not change the visible disk
-     * content. */
-    BDRV_REQ_WRITE_UNCHANGED    = 0x40,
-
-    /*
-     * BDRV_REQ_SERIALISING forces request serialisation for writes.
-     * It is used to ensure that writes to the backing file of a backup process
-     * target cannot race with a read of the backup target that defers to the
-     * backing file.
-     *
-     * Note, that BDRV_REQ_SERIALISING is _not_ opposite in meaning to
-     * BDRV_REQ_NO_SERIALISING. A more descriptive name for the latter might be
-     * _DO_NOT_WAIT_FOR_SERIALISING, except that is too long.
-     */
-    BDRV_REQ_SERIALISING        = 0x80,
-
-    /* Execute the request only if the operation can be offloaded or otherwise
-     * be executed efficiently, but return an error instead of using a slow
-     * fallback. */
-    BDRV_REQ_NO_FALLBACK        = 0x100,
-
-    /*
-     * BDRV_REQ_PREFETCH may be used only together with BDRV_REQ_COPY_ON_READ
-     * on read request and means that caller doesn't really need data to be
-     * written to qiov parameter which may be NULL.
-     */
-    BDRV_REQ_PREFETCH  = 0x200,
-    /* Mask of valid flags */
-    BDRV_REQ_MASK               = 0x3ff,
-} BdrvRequestFlags;
-
-typedef struct BlockSizes {
-    uint32_t phys;
-    uint32_t log;
-} BlockSizes;
-
-typedef struct HDGeometry {
-    uint32_t heads;
-    uint32_t sectors;
-    uint32_t cylinders;
-} HDGeometry;
-
-#define BDRV_O_RDWR        0x0002
-#define BDRV_O_RESIZE      0x0004 /* request permission for resizing the node */
-#define BDRV_O_SNAPSHOT    0x0008 /* open the file read only and save writes in a snapshot */
-#define BDRV_O_TEMPORARY   0x0010 /* delete the file after use */
-#define BDRV_O_NOCACHE     0x0020 /* do not use the host page cache */
-#define BDRV_O_NATIVE_AIO  0x0080 /* use native AIO instead of the thread pool */
-#define BDRV_O_NO_BACKING  0x0100 /* don't open the backing file */
-#define BDRV_O_NO_FLUSH    0x0200 /* disable flushing on this disk */
-#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
-#define BDRV_O_INACTIVE    0x0800  /* consistency hint for migration handoff */
-#define BDRV_O_CHECK       0x1000  /* open solely for consistency check */
-#define BDRV_O_ALLOW_RDWR  0x2000  /* allow reopen to change from r/o to r/w */
-#define BDRV_O_UNMAP       0x4000  /* execute guest UNMAP/TRIM operations */
-#define BDRV_O_PROTOCOL    0x8000  /* if no block driver is explicitly given:
-                                      select an appropriate protocol driver,
-                                      ignoring the format layer */
-#define BDRV_O_NO_IO       0x10000 /* don't initialize for I/O */
-#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */
-#define BDRV_O_IO_URING    0x40000 /* use io_uring instead of the thread pool */
-
-#define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
-
-
-/* Option names of options parsed by the block layer */
-
-#define BDRV_OPT_CACHE_WB       "cache.writeback"
-#define BDRV_OPT_CACHE_DIRECT   "cache.direct"
-#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
-#define BDRV_OPT_READ_ONLY      "read-only"
-#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
-#define BDRV_OPT_DISCARD        "discard"
-#define BDRV_OPT_FORCE_SHARE    "force-share"
-
-
-#define BDRV_SECTOR_BITS   9
-#define BDRV_SECTOR_SIZE   (1ULL << BDRV_SECTOR_BITS)
-
-#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
-                                           INT_MAX >> BDRV_SECTOR_BITS)
-#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
-
-/*
- * Allocation status flags for bdrv_block_status() and friends.
- *
- * Public flags:
- * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
- * BDRV_BLOCK_ZERO: offset reads as zero
- * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
- * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
- *                       layer rather than any backing, set by block layer
- * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
- *                 layer, set by block layer
- *
- * Internal flags:
- * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
- *                 that the block layer recompute the answer from the returned
- *                 BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
- * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
- *                     zeroes in file child of current block node inside
- *                     returned region. Only valid together with both
- *                     BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
- *                     appear with BDRV_BLOCK_ZERO.
- *
- * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
- * host offset within the returned BDS that is allocated for the
- * corresponding raw guest data.  However, whether that offset
- * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
- *
- * DATA ZERO OFFSET_VALID
- *  t    t        t       sectors read as zero, returned file is zero at offset
- *  t    f        t       sectors read as valid from file at offset
- *  f    t        t       sectors preallocated, read as zero, returned file not
- *                        necessarily zero at offset
- *  f    f        t       sectors preallocated but read from backing_hd,
- *                        returned file contains garbage at offset
- *  t    t        f       sectors preallocated, read as zero, unknown offset
- *  t    f        f       sectors read from unknown file or offset
- *  f    t        f       not allocated or unknown offset, read as zero
- *  f    f        f       not allocated or unknown offset, read from backing_hd
- */
-#define BDRV_BLOCK_DATA         0x01
-#define BDRV_BLOCK_ZERO         0x02
-#define BDRV_BLOCK_OFFSET_VALID 0x04
-#define BDRV_BLOCK_RAW          0x08
-#define BDRV_BLOCK_ALLOCATED    0x10
-#define BDRV_BLOCK_EOF          0x20
-#define BDRV_BLOCK_RECURSE      0x40
-
-typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
-
-typedef struct BDRVReopenState {
-    BlockDriverState *bs;
-    int flags;
-    BlockdevDetectZeroesOptions detect_zeroes;
-    bool backing_missing;
-    bool replace_backing_bs;  /* new_backing_bs is ignored if this is false */
-    BlockDriverState *new_backing_bs; /* If NULL then detach the current bs */
-    uint64_t perm, shared_perm;
-    QDict *options;
-    QDict *explicit_options;
-    void *opaque;
-} BDRVReopenState;
-
-/*
- * Block operation types
- */
-typedef enum BlockOpType {
-    BLOCK_OP_TYPE_BACKUP_SOURCE,
-    BLOCK_OP_TYPE_BACKUP_TARGET,
-    BLOCK_OP_TYPE_CHANGE,
-    BLOCK_OP_TYPE_COMMIT_SOURCE,
-    BLOCK_OP_TYPE_COMMIT_TARGET,
-    BLOCK_OP_TYPE_DATAPLANE,
-    BLOCK_OP_TYPE_DRIVE_DEL,
-    BLOCK_OP_TYPE_EJECT,
-    BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
-    BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
-    BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
-    BLOCK_OP_TYPE_MIRROR_SOURCE,
-    BLOCK_OP_TYPE_MIRROR_TARGET,
-    BLOCK_OP_TYPE_RESIZE,
-    BLOCK_OP_TYPE_STREAM,
-    BLOCK_OP_TYPE_REPLACE,
-    BLOCK_OP_TYPE_MAX,
-} BlockOpType;
-
-/* Block node permission constants */
-enum {
-    /**
-     * A user that has the "permission" of consistent reads is guaranteed that
-     * their view of the contents of the block device is complete and
-     * self-consistent, representing the contents of a disk at a specific
-     * point.
-     *
-     * For most block devices (including their backing files) this is true, but
-     * the property cannot be maintained in a few situations like for
-     * intermediate nodes of a commit block job.
-     */
-    BLK_PERM_CONSISTENT_READ    = 0x01,
-
-    /** This permission is required to change the visible disk contents. */
-    BLK_PERM_WRITE              = 0x02,
-
-    /**
-     * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
-     * required for writes to the block node when the caller promises that
-     * the visible disk content doesn't change.
-     *
-     * As the BLK_PERM_WRITE permission is strictly stronger, either is
-     * sufficient to perform an unchanging write.
-     */
-    BLK_PERM_WRITE_UNCHANGED    = 0x04,
-
-    /** This permission is required to change the size of a block node. */
-    BLK_PERM_RESIZE             = 0x08,
-
-    /**
-     * This permission is required to change the node that this BdrvChild
-     * points to.
-     */
-    BLK_PERM_GRAPH_MOD          = 0x10,
-
-    BLK_PERM_ALL                = 0x1f,
-
-    DEFAULT_PERM_PASSTHROUGH    = BLK_PERM_CONSISTENT_READ
-                                 | BLK_PERM_WRITE
-                                 | BLK_PERM_WRITE_UNCHANGED
-                                 | BLK_PERM_RESIZE,
-
-    DEFAULT_PERM_UNCHANGED      = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
-};
-
-/*
- * Flags that parent nodes assign to child nodes to specify what kind of
- * role(s) they take.
- *
- * At least one of DATA, METADATA, FILTERED, or COW must be set for
- * every child.
- */
-enum BdrvChildRoleBits {
-    /*
-     * This child stores data.
-     * Any node may have an arbitrary number of such children.
-     */
-    BDRV_CHILD_DATA         = (1 << 0),
-
-    /*
-     * This child stores metadata.
-     * Any node may have an arbitrary number of metadata-storing
-     * children.
-     */
-    BDRV_CHILD_METADATA     = (1 << 1),
-
-    /*
-     * A child that always presents exactly the same visible data as
-     * the parent, e.g. by virtue of the parent forwarding all reads
-     * and writes.
-     * This flag is mutually exclusive with DATA, METADATA, and COW.
-     * Any node may have at most one filtered child at a time.
-     */
-    BDRV_CHILD_FILTERED     = (1 << 2),
-
-    /*
-     * Child from which to read all data that isn't allocated in the
-     * parent (i.e., the backing child); such data is copied to the
-     * parent through COW (and optionally COR).
-     * This field is mutually exclusive with DATA, METADATA, and
-     * FILTERED.
-     * Any node may have at most one such backing child at a time.
-     */
-    BDRV_CHILD_COW          = (1 << 3),
-
-    /*
-     * The primary child.  For most drivers, this is the child whose
-     * filename applies best to the parent node.
-     * Any node may have at most one primary child at a time.
-     */
-    BDRV_CHILD_PRIMARY      = (1 << 4),
-
-    /* Useful combination of flags */
-    BDRV_CHILD_IMAGE        = BDRV_CHILD_DATA
-                              | BDRV_CHILD_METADATA
-                              | BDRV_CHILD_PRIMARY,
-};
-
-/* Mask of BdrvChildRoleBits values */
-typedef unsigned int BdrvChildRole;
-
-char *bdrv_perm_names(uint64_t perm);
-uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
-
-/* disk I/O throttling */
-void bdrv_init(void);
-void bdrv_init_with_whitelist(void);
-bool bdrv_uses_whitelist(void);
-int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
-BlockDriver *bdrv_find_protocol(const char *filename,
-                                bool allow_protocol_prefix,
-                                Error **errp);
-BlockDriver *bdrv_find_format(const char *format_name);
-int bdrv_create(BlockDriver *drv, const char* filename,
-                QemuOpts *opts, Error **errp);
-int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
-
-BlockDriverState *bdrv_new(void);
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
-                 Error **errp);
-void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
-                       Error **errp);
-
-int bdrv_parse_aio(const char *mode, int *flags);
-int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
-int bdrv_parse_discard_flags(const char *mode, int *flags);
-BdrvChild *bdrv_open_child(const char *filename,
-                           QDict *options, const char *bdref_key,
-                           BlockDriverState* parent,
-                           const BdrvChildClass *child_class,
-                           BdrvChildRole child_role,
-                           bool allow_none, Error **errp);
-BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
-                         Error **errp);
-int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
-                           const char *bdref_key, Error **errp);
-BlockDriverState *bdrv_open(const char *filename, const char *reference,
-                            QDict *options, int flags, Error **errp);
-BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
-                                       int flags, Error **errp);
-BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
-                                    BlockDriverState *bs, QDict *options,
-                                    bool keep_old_opts);
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
-int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
-                              Error **errp);
-int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
-                        BlockReopenQueue *queue, Error **errp);
-void bdrv_reopen_commit(BDRVReopenState *reopen_state);
-void bdrv_reopen_abort(BDRVReopenState *reopen_state);
-int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-                       int bytes, BdrvRequestFlags flags);
-int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
-int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes);
-int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes);
-int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
-                     const void *buf, int count);
-/*
- * Efficiently zero a region of the disk image.  Note that this is a regular
- * I/O request like read or write and should have a reasonable size.  This
- * function is not suitable for zeroing the entire image in a single request
- * because it may allocate memory for the entire region.
- */
-int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
-                                       int bytes, BdrvRequestFlags flags);
-BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
-    const char *backing_file);
-void bdrv_refresh_filename(BlockDriverState *bs);
-
-int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
-                                  PreallocMode prealloc, BdrvRequestFlags flags,
-                                  Error **errp);
-int generated_co_wrapper
-bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
-              PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
-
-int64_t bdrv_nb_sectors(BlockDriverState *bs);
-int64_t bdrv_getlength(BlockDriverState *bs);
-int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
-BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
-                               BlockDriverState *in_bs, Error **errp);
-void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-void bdrv_refresh_limits(BlockDriverState *bs, Error **errp);
-int bdrv_commit(BlockDriverState *bs);
-int bdrv_make_empty(BdrvChild *c, Error **errp);
-int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
-                             const char *backing_fmt, bool warn);
-void bdrv_register(BlockDriver *bdrv);
-int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
-                           const char *backing_file_str);
-BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
-                                    BlockDriverState *bs);
-BlockDriverState *bdrv_find_base(BlockDriverState *bs);
-bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
-                                  Error **errp);
-int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
-                              Error **errp);
-void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
-int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
-
-
-typedef struct BdrvCheckResult {
-    int corruptions;
-    int leaks;
-    int check_errors;
-    int corruptions_fixed;
-    int leaks_fixed;
-    int64_t image_end_offset;
-    BlockFragInfo bfi;
-} BdrvCheckResult;
-
-typedef enum {
-    BDRV_FIX_LEAKS    = 1,
-    BDRV_FIX_ERRORS   = 2,
-} BdrvCheckMode;
-
-int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
-                                    BdrvCheckMode fix);
-
-/* The units of offset and total_work_size may be chosen arbitrarily by the
- * block driver; total_work_size may change during the course of the amendment
- * operation */
-typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
-                                      int64_t total_work_size, void *opaque);
-int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
-                       BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
-                       bool force,
-                       Error **errp);
-
-/* check if a named node can be replaced when doing drive-mirror */
-BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
-                                        const char *node_name, Error **errp);
-
-/* async block I/O */
-void bdrv_aio_cancel(BlockAIOCB *acb);
-void bdrv_aio_cancel_async(BlockAIOCB *acb);
-
-/* sg packet commands */
-int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
-
-/* Invalidate any cached metadata used by image formats */
-int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
-                                               Error **errp);
-void bdrv_invalidate_cache_all(Error **errp);
-int bdrv_inactivate_all(void);
-
-/* Ensure contents are flushed to disk.  */
-int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
-int bdrv_flush_all(void);
-void bdrv_close_all(void);
-void bdrv_drain(BlockDriverState *bs);
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
-void bdrv_drain_all_begin(void);
-void bdrv_drain_all_end(void);
-void bdrv_drain_all(void);
-
-#define BDRV_POLL_WHILE(bs, cond) ({                       \
-    BlockDriverState *bs_ = (bs);                          \
-    AIO_WAIT_WHILE(bdrv_get_aio_context(bs_),              \
-                   cond); })
-
-int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
-                                       int64_t bytes);
-int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
-int bdrv_has_zero_init_1(BlockDriverState *bs);
-int bdrv_has_zero_init(BlockDriverState *bs);
-bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
-int bdrv_block_status(BlockDriverState *bs, int64_t offset,
-                      int64_t bytes, int64_t *pnum, int64_t *map,
-                      BlockDriverState **file);
-int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
-                            int64_t offset, int64_t bytes, int64_t *pnum,
-                            int64_t *map, BlockDriverState **file);
-int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                      int64_t *pnum);
-int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
-                            bool include_base, int64_t offset, int64_t bytes,
-                            int64_t *pnum);
-int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
-                                      int64_t bytes);
-
-bool bdrv_is_read_only(BlockDriverState *bs);
-int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
-                           bool ignore_allow_rdw, Error **errp);
-int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
-                              Error **errp);
-bool bdrv_is_writable(BlockDriverState *bs);
-bool bdrv_is_sg(BlockDriverState *bs);
-bool bdrv_is_inserted(BlockDriverState *bs);
-void bdrv_lock_medium(BlockDriverState *bs, bool locked);
-void bdrv_eject(BlockDriverState *bs, bool eject_flag);
-const char *bdrv_get_format_name(BlockDriverState *bs);
-BlockDriverState *bdrv_find_node(const char *node_name);
-BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp);
-XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp);
-BlockDriverState *bdrv_lookup_bs(const char *device,
-                                 const char *node_name,
-                                 Error **errp);
-bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
-BlockDriverState *bdrv_next_node(BlockDriverState *bs);
-BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
-
-typedef struct BdrvNextIterator {
-    enum {
-        BDRV_NEXT_BACKEND_ROOTS,
-        BDRV_NEXT_MONITOR_OWNED,
-    } phase;
-    BlockBackend *blk;
-    BlockDriverState *bs;
-} BdrvNextIterator;
-
-BlockDriverState *bdrv_first(BdrvNextIterator *it);
-BlockDriverState *bdrv_next(BdrvNextIterator *it);
-void bdrv_next_cleanup(BdrvNextIterator *it);
-
-BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
-bool bdrv_supports_compressed_writes(BlockDriverState *bs);
-void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
-                         void *opaque, bool read_only);
-const char *bdrv_get_node_name(const BlockDriverState *bs);
-const char *bdrv_get_device_name(const BlockDriverState *bs);
-const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
-int bdrv_get_flags(BlockDriverState *bs);
-int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
-ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
-                                          Error **errp);
-BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
-void bdrv_round_to_clusters(BlockDriverState *bs,
-                            int64_t offset, int64_t bytes,
-                            int64_t *cluster_offset,
-                            int64_t *cluster_bytes);
-
-void bdrv_get_backing_filename(BlockDriverState *bs,
-                               char *filename, int filename_size);
-char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
-char *bdrv_get_full_backing_filename_from_filename(const char *backed,
-                                                   const char *backing,
-                                                   Error **errp);
-char *bdrv_dirname(BlockDriverState *bs, Error **errp);
-
-int path_has_protocol(const char *path);
-int path_is_absolute(const char *path);
-char *path_combine(const char *base_path, const char *filename);
-
-int generated_co_wrapper
-bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int generated_co_wrapper
-bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
-                      int64_t pos, int size);
-
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
-                      int64_t pos, int size);
-
-void bdrv_img_create(const char *filename, const char *fmt,
-                     const char *base_filename, const char *base_fmt,
-                     char *options, uint64_t img_size, int flags,
-                     bool quiet, Error **errp);
-
-/* Returns the alignment in bytes that is required so that no bounce buffer
- * is required throughout the stack */
-size_t bdrv_min_mem_align(BlockDriverState *bs);
-/* Returns optimal alignment in bytes for bounce buffer */
-size_t bdrv_opt_mem_align(BlockDriverState *bs);
-void *qemu_blockalign(BlockDriverState *bs, size_t size);
-void *qemu_blockalign0(BlockDriverState *bs, size_t size);
-void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
-void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
-
-void bdrv_enable_copy_on_read(BlockDriverState *bs);
-void bdrv_disable_copy_on_read(BlockDriverState *bs);
-
-void bdrv_ref(BlockDriverState *bs);
-void bdrv_unref(BlockDriverState *bs);
-void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
-BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
-                             BlockDriverState *child_bs,
-                             const char *child_name,
-                             const BdrvChildClass *child_class,
-                             BdrvChildRole child_role,
-                             Error **errp);
-
-bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
-void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
-void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
-void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
-void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
-bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
-
-#define BLKDBG_EVENT(child, evt) \
-    do { \
-        if (child) { \
-            bdrv_debug_event(child->bs, evt); \
-        } \
-    } while (0)
-
-void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
-
-int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
-                           const char *tag);
-int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
-int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
-bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
-
-/**
- * bdrv_get_aio_context:
+ * QEMU System Emulator block driver
  *
- * Returns: the currently bound #AioContext
- */
-AioContext *bdrv_get_aio_context(BlockDriverState *bs);
-
-/**
- * Move the current coroutine to the AioContext of @bs and return the old
- * AioContext of the coroutine. Increase bs->in_flight so that draining @bs
- * will wait for the operation to proceed until the corresponding
- * bdrv_co_leave().
+ * Copyright (c) 2003 Fabrice Bellard
  *
- * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as
- * this will deadlock.
- */
-AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs);
-
-/**
- * Ends a section started by bdrv_co_enter(). Move the current coroutine back
- * to old_ctx and decrease bs->in_flight again.
- */
-void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
-
-/**
- * Locks the AioContext of @bs if it's not the current AioContext. This avoids
- * double locking which could lead to deadlocks: This is a coroutine_fn, so we
- * know we already own the lock of the current AioContext.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
  *
- * May only be called in the main thread.
- */
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
-
-/**
- * Unlocks the AioContext of @bs if it's not the current AioContext.
- */
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
-
-/**
- * Transfer control to @co in the aio context of @bs
- */
-void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
-
-void bdrv_set_aio_context_ignore(BlockDriverState *bs,
-                                 AioContext *new_context, GSList **ignore);
-int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
-                             Error **errp);
-int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
-                                   BdrvChild *ignore_child, Error **errp);
-bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
-                                    GSList **ignore, Error **errp);
-bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
-                              GSList **ignore, Error **errp);
-int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
-int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
-
-void bdrv_io_plug(BlockDriverState *bs);
-void bdrv_io_unplug(BlockDriverState *bs);
-
-/**
- * bdrv_parent_drained_begin_single:
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
  *
- * Begin a quiesced section for the parent of @c. If @poll is true, wait for
- * any pending activity to cease.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
  */
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
-
-/**
- * bdrv_parent_drained_end_single:
- *
- * End a quiesced section for the parent of @c.
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled, which may result in graph changes.
- */
-void bdrv_parent_drained_end_single(BdrvChild *c);
-
-/**
- * bdrv_drain_poll:
- *
- * Poll for pending requests in @bs, its parents (except for @ignore_parent),
- * and if @recursive is true its children as well (used for subtree drain).
- *
- * If @ignore_bds_parents is true, parents that are BlockDriverStates must
- * ignore the drain request because they will be drained separately (used for
- * drain_all).
- *
- * This is part of bdrv_drained_begin.
- */
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
-                     BdrvChild *ignore_parent, bool ignore_bds_parents);
-
-/**
- * bdrv_drained_begin:
- *
- * Begin a quiesced section for exclusive access to the BDS, by disabling
- * external request sources including NBD server and device model. Note that
- * this doesn't block timers or coroutines from submitting more requests, which
- * means block_job_pause is still necessary.
- *
- * This function can be recursive.
- */
-void bdrv_drained_begin(BlockDriverState *bs);
-
-/**
- * bdrv_do_drained_begin_quiesce:
- *
- * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
- * running requests to complete.
- */
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
-                                   BdrvChild *parent, bool ignore_bds_parents);
-
-/**
- * Like bdrv_drained_begin, but recursively begins a quiesced section for
- * exclusive access to all child nodes as well.
- */
-void bdrv_subtree_drained_begin(BlockDriverState *bs);
-
-/**
- * bdrv_drained_end:
- *
- * End a quiescent section started by bdrv_drained_begin().
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled.  On one hand, that may result in graph changes.  On
- * the other, this requires that the caller either runs in the main
- * loop; or that all involved nodes (@bs and all of its parents) are
- * in the caller's AioContext.
- */
-void bdrv_drained_end(BlockDriverState *bs);
-
-/**
- * bdrv_drained_end_no_poll:
- *
- * Same as bdrv_drained_end(), but do not poll for the subgraph to
- * actually become unquiesced.  Therefore, no graph changes will occur
- * with this function.
- *
- * *drained_end_counter is incremented for every background operation
- * that is scheduled, and will be decremented for every operation once
- * it settles.  The caller must poll until it reaches 0.  The counter
- * should be accessed using atomic operations only.
- */
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
-
-/**
- * End a quiescent section started by bdrv_subtree_drained_begin().
- */
-void bdrv_subtree_drained_end(BlockDriverState *bs);
+#ifndef BLOCK_H
+#define BLOCK_H
 
-void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
-                    Error **errp);
-void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
+#include "block/block-global-state.h"
+#include "block/block-io.h"
 
-bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
-                                     uint32_t granularity, Error **errp);
-/**
- *
- * bdrv_register_buf/bdrv_unregister_buf:
- *
- * Register/unregister a buffer for I/O. For example, VFIO drivers are
- * interested to know the memory areas that would later be used for I/O, so
- * that they can prepare IOMMU mapping etc., to get better performance.
- */
-void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
-void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
 
-/**
- *
- * bdrv_co_copy_range:
- *
- * Do offloaded copy between two children. If the operation is not implemented
- * by the driver, or if the backend storage doesn't support it, a negative
- * error code will be returned.
- *
- * Note: block layer doesn't emulate or fallback to a bounce buffer approach
- * because usually the caller shouldn't attempt offloaded copy any more (e.g.
- * calling copy_file_range(2)) after the first error, thus it should fall back
- * to a read+write path in the caller level.
- *
- * @src: Source child to copy data from
- * @src_offset: offset in @src image to read data
- * @dst: Destination child to copy data to
- * @dst_offset: offset in @dst image to write data
- * @bytes: number of bytes to copy
- * @flags: request flags. Supported flags:
- *         BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
- *                               write on @dst as if bdrv_co_pwrite_zeroes is
- *                               called. Used to simplify caller code, or
- *                               during BlockDriver.bdrv_co_copy_range_from()
- *                               recursion.
- *         BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
- *                                   requests currently in flight.
- *
- * Returns: 0 if succeeded; negative error code if failed.
- **/
-int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
-                                    BdrvChild *dst, uint64_t dst_offset,
-                                    uint64_t bytes, BdrvRequestFlags read_flags,
-                                    BdrvRequestFlags write_flags);
-#endif
+#endif /* BLOCK_H */
index 157596c2963fea03a57bf7dc2ab69fb2d8191210..4d4d5ba1538c9dcd3db07b81716a38695610f96a 100644 (file)
@@ -18,7 +18,7 @@
 #ifndef BLOCK_BACKUP_H
 #define BLOCK_BACKUP_H
 
-#include "block/block_int.h"
+#include "block/blockjob.h"
 
 void backup_do_checkpoint(BlockJob *job, Error **errp);
 
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
new file mode 100644 (file)
index 0000000..4e31d16
--- /dev/null
@@ -0,0 +1,1321 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_COMMON_H
+#define BLOCK_INT_COMMON_H
+
+#include "block/aio.h"
+#include "block/block-common.h"
+#include "block/block-global-state.h"
+#include "block/snapshot.h"
+#include "qemu/iov.h"
+#include "qemu/rcu.h"
+#include "qemu/stats64.h"
+
+#define BLOCK_FLAG_LAZY_REFCOUNTS   8
+
+#define BLOCK_OPT_SIZE              "size"
+#define BLOCK_OPT_ENCRYPT           "encryption"
+#define BLOCK_OPT_ENCRYPT_FORMAT    "encrypt.format"
+#define BLOCK_OPT_COMPAT6           "compat6"
+#define BLOCK_OPT_HWVERSION         "hwversion"
+#define BLOCK_OPT_BACKING_FILE      "backing_file"
+#define BLOCK_OPT_BACKING_FMT       "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE      "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE        "table_size"
+#define BLOCK_OPT_PREALLOC          "preallocation"
+#define BLOCK_OPT_SUBFMT            "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL      "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS    "lazy_refcounts"
+#define BLOCK_OPT_ADAPTER_TYPE      "adapter_type"
+#define BLOCK_OPT_REDUNDANCY        "redundancy"
+#define BLOCK_OPT_NOCOW             "nocow"
+#define BLOCK_OPT_EXTENT_SIZE_HINT  "extent_size_hint"
+#define BLOCK_OPT_OBJECT_SIZE       "object_size"
+#define BLOCK_OPT_REFCOUNT_BITS     "refcount_bits"
+#define BLOCK_OPT_DATA_FILE         "data_file"
+#define BLOCK_OPT_DATA_FILE_RAW     "data_file_raw"
+#define BLOCK_OPT_COMPRESSION_TYPE  "compression_type"
+#define BLOCK_OPT_EXTL2             "extended_l2"
+
+#define BLOCK_PROBE_BUF_SIZE        512
+
+enum BdrvTrackedRequestType {
+    BDRV_TRACKED_READ,
+    BDRV_TRACKED_WRITE,
+    BDRV_TRACKED_DISCARD,
+    BDRV_TRACKED_TRUNCATE,
+};
+
+/*
+ * That is not quite good that BdrvTrackedRequest structure is public,
+ * as block/io.c is very careful about incoming offset/bytes being
+ * correct. Be sure to assert bdrv_check_request() succeeded after any
+ * modification of BdrvTrackedRequest object out of block/io.c
+ */
+typedef struct BdrvTrackedRequest {
+    BlockDriverState *bs;
+    int64_t offset;
+    int64_t bytes;
+    enum BdrvTrackedRequestType type;
+
+    bool serialising;
+    int64_t overlap_offset;
+    int64_t overlap_bytes;
+
+    QLIST_ENTRY(BdrvTrackedRequest) list;
+    Coroutine *co; /* owner, used for deadlock detection */
+    CoQueue wait_queue; /* coroutines blocked on this request */
+
+    struct BdrvTrackedRequest *waiting_for;
+} BdrvTrackedRequest;
+
+
+struct BlockDriver {
+    /*
+     * These fields are initialized when this object is created,
+     * and are never changed afterwards.
+     */
+
+    const char *format_name;
+    int instance_size;
+
+    /*
+     * Set to true if the BlockDriver is a block filter. Block filters pass
+     * certain callbacks that refer to data (see block.c) to their bs->file
+     * or bs->backing (whichever one exists) if the driver doesn't implement
+     * them. Drivers that do not wish to forward must implement them and return
+     * -ENOTSUP.
+     * Note that filters are not allowed to modify data.
+     *
+     * Filters generally cannot have more than a single filtered child,
+     * because the data they present must at all times be the same as
+     * that on their filtered child.  That would be impossible to
+     * achieve for multiple filtered children.
+     * (And this filtered child must then be bs->file or bs->backing.)
+     */
+    bool is_filter;
+    /*
+     * Only make sense for filter drivers, for others must be false.
+     * If true, filtered child is bs->backing. Otherwise it's bs->file.
+     * Two internal filters use bs->backing as filtered child and has this
+     * field set to true: mirror_top and commit_top. There also two such test
+     * filters in tests/unit/test-bdrv-graph-mod.c.
+     *
+     * Never create any more such filters!
+     *
+     * TODO: imagine how to deprecate this behavior and make all filters work
+     * similarly using bs->file as filtered child.
+     */
+    bool filtered_child_is_backing;
+
+    /*
+     * Set to true if the BlockDriver is a format driver.  Format nodes
+     * generally do not expect their children to be other format nodes
+     * (except for backing files), and so format probing is disabled
+     * on those children.
+     */
+    bool is_format;
+
+    /*
+     * Set to true if the BlockDriver supports zoned children.
+     */
+    bool supports_zoned_children;
+
+    /*
+     * Drivers not implementing bdrv_parse_filename nor bdrv_open should have
+     * this field set to true, except ones that are defined only by their
+     * child's bs.
+     * An example of the last type will be the quorum block driver.
+     */
+    bool bdrv_needs_filename;
+
+    /*
+     * Set if a driver can support backing files. This also implies the
+     * following semantics:
+     *
+     *  - Return status 0 of .bdrv_co_block_status means that corresponding
+     *    blocks are not allocated in this layer of backing-chain
+     *  - For such (unallocated) blocks, read will:
+     *    - fill buffer with zeros if there is no backing file
+     *    - read from the backing file otherwise, where the block layer
+     *      takes care of reading zeros beyond EOF if backing file is short
+     */
+    bool supports_backing;
+
+    /*
+     * Drivers setting this field must be able to work with just a plain
+     * filename with '<protocol_name>:' as a prefix, and no other options.
+     * Options may be extracted from the filename by implementing
+     * bdrv_parse_filename.
+     */
+    const char *protocol_name;
+
+    /* List of options for creating images, terminated by name == NULL */
+    QemuOptsList *create_opts;
+
+    /* List of options for image amend */
+    QemuOptsList *amend_opts;
+
+    /*
+     * If this driver supports reopening images this contains a
+     * NULL-terminated list of the runtime options that can be
+     * modified. If an option in this list is unspecified during
+     * reopen then it _must_ be reset to its default value or return
+     * an error.
+     */
+    const char *const *mutable_opts;
+
+    /*
+     * Pointer to a NULL-terminated array of names of strong options
+     * that can be specified for bdrv_open(). A strong option is one
+     * that changes the data of a BDS.
+     * If this pointer is NULL, the array is considered empty.
+     * "filename" and "driver" are always considered strong.
+     */
+    const char *const *strong_runtime_opts;
+
+
+    /*
+     * Global state (GS) API. These functions run under the BQL.
+     *
+     * See include/block/block-global-state.h for more information about
+     * the GS API.
+     */
+
+    /*
+     * This function is invoked under BQL before .bdrv_co_amend()
+     * (which in contrast does not necessarily run under the BQL)
+     * to allow driver-specific initialization code that requires
+     * the BQL, like setting up specific permission flags.
+     */
+    int GRAPH_RDLOCK_PTR (*bdrv_amend_pre_run)(
+        BlockDriverState *bs, Error **errp);
+    /*
+     * This function is invoked under BQL after .bdrv_co_amend()
+     * to allow cleaning up what was done in .bdrv_amend_pre_run().
+     */
+    void GRAPH_RDLOCK_PTR (*bdrv_amend_clean)(BlockDriverState *bs);
+
+    /*
+     * Return true if @to_replace can be replaced by a BDS with the
+     * same data as @bs without it affecting @bs's behavior (that is,
+     * without it being visible to @bs's parents).
+     */
+    bool GRAPH_RDLOCK_PTR (*bdrv_recurse_can_replace)(
+        BlockDriverState *bs, BlockDriverState *to_replace);
+
+    int (*bdrv_probe_device)(const char *filename);
+
+    /*
+     * Any driver implementing this callback is expected to be able to handle
+     * NULL file names in its .bdrv_open() implementation.
+     */
+    void (*bdrv_parse_filename)(const char *filename, QDict *options,
+                                Error **errp);
+
+    /* For handling image reopen for split or non-split files. */
+    int GRAPH_UNLOCKED_PTR (*bdrv_reopen_prepare)(
+        BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp);
+    void GRAPH_UNLOCKED_PTR (*bdrv_reopen_commit)(
+        BDRVReopenState *reopen_state);
+    void GRAPH_UNLOCKED_PTR (*bdrv_reopen_commit_post)(
+        BDRVReopenState *reopen_state);
+    void GRAPH_UNLOCKED_PTR (*bdrv_reopen_abort)(
+        BDRVReopenState *reopen_state);
+    void (*bdrv_join_options)(QDict *options, QDict *old_options);
+
+    int GRAPH_UNLOCKED_PTR (*bdrv_open)(
+        BlockDriverState *bs, QDict *options, int flags, Error **errp);
+
+    /* Protocol drivers should implement this instead of bdrv_open */
+    int GRAPH_UNLOCKED_PTR (*bdrv_file_open)(
+        BlockDriverState *bs, QDict *options, int flags, Error **errp);
+    void (*bdrv_close)(BlockDriverState *bs);
+
+    int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create)(
+        BlockdevCreateOptions *opts, Error **errp);
+
+    int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create_opts)(
+        BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp);
+
+    int GRAPH_RDLOCK_PTR (*bdrv_amend_options)(
+        BlockDriverState *bs, QemuOpts *opts,
+        BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+        bool force, Error **errp);
+
+    int GRAPH_RDLOCK_PTR (*bdrv_make_empty)(BlockDriverState *bs);
+
+    /*
+     * Refreshes the bs->exact_filename field. If that is impossible,
+     * bs->exact_filename has to be left empty.
+     */
+    void GRAPH_RDLOCK_PTR (*bdrv_refresh_filename)(BlockDriverState *bs);
+
+    /*
+     * Gathers the open options for all children into @target.
+     * A simple format driver (without backing file support) might
+     * implement this function like this:
+     *
+     *     QINCREF(bs->file->bs->full_open_options);
+     *     qdict_put(target, "file", bs->file->bs->full_open_options);
+     *
+     * If not specified, the generic implementation will simply put
+     * all children's options under their respective name.
+     *
+     * @backing_overridden is true when bs->backing seems not to be
+     * the child that would result from opening bs->backing_file.
+     * Therefore, if it is true, the backing child's options should be
+     * gathered; otherwise, there is no need since the backing child
+     * is the one implied by the image header.
+     *
+     * Note that ideally this function would not be needed.  Every
+     * block driver which implements it is probably doing something
+     * shady regarding its runtime option structure.
+     */
+    void GRAPH_RDLOCK_PTR (*bdrv_gather_child_options)(
+        BlockDriverState *bs, QDict *target, bool backing_overridden);
+
+    /*
+     * Returns an allocated string which is the directory name of this BDS: It
+     * will be used to make relative filenames absolute by prepending this
+     * function's return value to them.
+     */
+    char * GRAPH_RDLOCK_PTR (*bdrv_dirname)(BlockDriverState *bs, Error **errp);
+
+    /*
+     * This informs the driver that we are no longer interested in the result
+     * of in-flight requests, so don't waste the time if possible.
+     *
+     * One example usage is to avoid waiting for an nbd target node reconnect
+     * timeout during job-cancel with force=true.
+     */
+    void GRAPH_RDLOCK_PTR (*bdrv_cancel_in_flight)(BlockDriverState *bs);
+
+    int GRAPH_RDLOCK_PTR (*bdrv_inactivate)(BlockDriverState *bs);
+
+    int GRAPH_RDLOCK_PTR (*bdrv_snapshot_create)(
+        BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
+
+    int GRAPH_UNLOCKED_PTR (*bdrv_snapshot_goto)(
+        BlockDriverState *bs, const char *snapshot_id);
+
+    int GRAPH_RDLOCK_PTR (*bdrv_snapshot_delete)(
+        BlockDriverState *bs, const char *snapshot_id, const char *name,
+        Error **errp);
+
+    int GRAPH_RDLOCK_PTR (*bdrv_snapshot_list)(
+        BlockDriverState *bs, QEMUSnapshotInfo **psn_info);
+
+    int GRAPH_RDLOCK_PTR (*bdrv_snapshot_load_tmp)(
+        BlockDriverState *bs, const char *snapshot_id, const char *name,
+        Error **errp);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_change_backing_file)(
+        BlockDriverState *bs, const char *backing_file,
+        const char *backing_fmt);
+
+    /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
+    int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
+        const char *tag);
+    int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
+        const char *tag);
+    int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
+    bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
+
+    void GRAPH_RDLOCK_PTR (*bdrv_refresh_limits)(
+        BlockDriverState *bs, Error **errp);
+
+    /*
+     * Returns 1 if newly created images are guaranteed to contain only
+     * zeros, 0 otherwise.
+     */
+    int GRAPH_RDLOCK_PTR (*bdrv_has_zero_init)(BlockDriverState *bs);
+
+    /*
+     * Remove fd handlers, timers, and other event loop callbacks so the event
+     * loop is no longer in use.  Called with no in-flight requests and in
+     * depth-first traversal order with parents before child nodes.
+     */
+    void (*bdrv_detach_aio_context)(BlockDriverState *bs);
+
+    /*
+     * Add fd handlers, timers, and other event loop callbacks so I/O requests
+     * can be processed again.  Called with no in-flight requests and in
+     * depth-first traversal order with child nodes before parent nodes.
+     */
+    void (*bdrv_attach_aio_context)(BlockDriverState *bs,
+                                    AioContext *new_context);
+
+    /**
+     * bdrv_drain_begin is called if implemented in the beginning of a
+     * drain operation to drain and stop any internal sources of requests in
+     * the driver.
+     * bdrv_drain_end is called if implemented at the end of the drain.
+     *
+     * They should be used by the driver to e.g. manage scheduled I/O
+     * requests, or toggle an internal state. After the end of the drain new
+     * requests will continue normally.
+     *
+     * Implementations of both functions must not call aio_poll().
+     */
+    void (*bdrv_drain_begin)(BlockDriverState *bs);
+    void (*bdrv_drain_end)(BlockDriverState *bs);
+
+    /**
+     * Try to get @bs's logical and physical block size.
+     * On success, store them in @bsz and return zero.
+     * On failure, return negative errno.
+     */
+    int GRAPH_RDLOCK_PTR (*bdrv_probe_blocksizes)(
+        BlockDriverState *bs, BlockSizes *bsz);
+    /**
+     * Try to get @bs's geometry (cyls, heads, sectors)
+     * On success, store them in @geo and return 0.
+     * On failure return -errno.
+     * Only drivers that want to override guest geometry implement this
+     * callback; see hd_geometry_guess().
+     */
+    int GRAPH_RDLOCK_PTR (*bdrv_probe_geometry)(
+        BlockDriverState *bs, HDGeometry *geo);
+
+    void GRAPH_WRLOCK_PTR (*bdrv_add_child)(
+        BlockDriverState *parent, BlockDriverState *child, Error **errp);
+
+    void GRAPH_WRLOCK_PTR (*bdrv_del_child)(
+        BlockDriverState *parent, BdrvChild *child, Error **errp);
+
+    /**
+     * Informs the block driver that a permission change is intended. The
+     * driver checks whether the change is permissible and may take other
+     * preparations for the change (e.g. get file system locks). This operation
+     * is always followed either by a call to either .bdrv_set_perm or
+     * .bdrv_abort_perm_update.
+     *
+     * Checks whether the requested set of cumulative permissions in @perm
+     * can be granted for accessing @bs and whether no other users are using
+     * permissions other than those given in @shared (both arguments take
+     * BLK_PERM_* bitmasks).
+     *
+     * If both conditions are met, 0 is returned. Otherwise, -errno is returned
+     * and errp is set to an error describing the conflict.
+     */
+    int GRAPH_RDLOCK_PTR (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
+                                            uint64_t shared, Error **errp);
+
+    /**
+     * Called to inform the driver that the set of cumulative set of used
+     * permissions for @bs has changed to @perm, and the set of shareable
+     * permission to @shared. The driver can use this to propagate changes to
+     * its children (i.e. request permissions only if a parent actually needs
+     * them).
+     *
+     * This function is only invoked after bdrv_check_perm(), so block drivers
+     * may rely on preparations made in their .bdrv_check_perm implementation.
+     */
+    void GRAPH_RDLOCK_PTR (*bdrv_set_perm)(
+        BlockDriverState *bs, uint64_t perm, uint64_t shared);
+
+    /*
+     * Called to inform the driver that after a previous bdrv_check_perm()
+     * call, the permission update is not performed and any preparations made
+     * for it (e.g. taken file locks) need to be undone.
+     *
+     * This function can be called even for nodes that never saw a
+     * bdrv_check_perm() call. It is a no-op then.
+     */
+    void GRAPH_RDLOCK_PTR (*bdrv_abort_perm_update)(BlockDriverState *bs);
+
+    /**
+     * Returns in @nperm and @nshared the permissions that the driver for @bs
+     * needs on its child @c, based on the cumulative permissions requested by
+     * the parents in @parent_perm and @parent_shared.
+     *
+     * If @c is NULL, return the permissions for attaching a new child for the
+     * given @child_class and @role.
+     *
+     * If @reopen_queue is non-NULL, don't return the currently needed
+     * permissions, but those that will be needed after applying the
+     * @reopen_queue.
+     */
+     void GRAPH_RDLOCK_PTR (*bdrv_child_perm)(
+        BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+        BlockReopenQueue *reopen_queue,
+        uint64_t parent_perm, uint64_t parent_shared,
+        uint64_t *nperm, uint64_t *nshared);
+
+    /**
+     * Register/unregister a buffer for I/O. For example, when the driver is
+     * interested to know the memory areas that will later be used in iovs, so
+     * that it can do IOMMU mapping with VFIO etc., in order to get better
+     * performance. In the case of VFIO drivers, this callback is used to do
+     * DMA mapping for hot buffers.
+     *
+     * Returns: true on success, false on failure
+     */
+    bool GRAPH_RDLOCK_PTR (*bdrv_register_buf)(
+        BlockDriverState *bs, void *host, size_t size, Error **errp);
+    void GRAPH_RDLOCK_PTR (*bdrv_unregister_buf)(
+        BlockDriverState *bs, void *host, size_t size);
+
+    /*
+     * This field is modified only under the BQL, and is part of
+     * the global state.
+     */
+    QLIST_ENTRY(BlockDriver) list;
+
+    /*
+     * I/O API functions. These functions are thread-safe.
+     *
+     * See include/block/block-io.h for more information about
+     * the I/O API.
+     */
+
+    int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_amend)(
+        BlockDriverState *bs, BlockdevAmendOptions *opts, bool force,
+        Error **errp);
+
+    /* aio */
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_preadv)(BlockDriverState *bs,
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
+
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_pwritev)(BlockDriverState *bs,
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
+
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_flush)(
+        BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque);
+
+    BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_pdiscard)(
+        BlockDriverState *bs, int64_t offset, int bytes,
+        BlockCompletionFunc *cb, void *opaque);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_readv)(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+
+    /**
+     * @offset: position in bytes to read at
+     * @bytes: number of bytes to read
+     * @qiov: the buffers to fill with read data
+     * @flags: currently unused, always 0
+     *
+     * @offset and @bytes will be a multiple of 'request_alignment',
+     * but the length of individual @qiov elements does not have to
+     * be a multiple.
+     *
+     * @bytes will always equal the total size of @qiov, and will be
+     * no larger than 'max_transfer'.
+     *
+     * The buffer in @qiov may point directly to guest memory.
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv)(BlockDriverState *bs,
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        BdrvRequestFlags flags);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv_part)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov, size_t qiov_offset,
+        BdrvRequestFlags flags);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_writev)(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+        int flags);
+    /**
+     * @offset: position in bytes to write at
+     * @bytes: number of bytes to write
+     * @qiov: the buffers containing data to write
+     * @flags: zero or more bits allowed by 'supported_write_flags'
+     *
+     * @offset and @bytes will be a multiple of 'request_alignment',
+     * but the length of individual @qiov elements does not have to
+     * be a multiple.
+     *
+     * @bytes will always equal the total size of @qiov, and will be
+     * no larger than 'max_transfer'.
+     *
+     * The buffer in @qiov may point directly to guest memory.
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        BdrvRequestFlags flags);
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_part)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        size_t qiov_offset, BdrvRequestFlags flags);
+
+    /*
+     * Efficiently zero a region of the disk image.  Typically an image format
+     * would use a compact metadata representation to implement this.  This
+     * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
+     * will be called instead.
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwrite_zeroes)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        BdrvRequestFlags flags);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes);
+
+    /*
+     * Map [offset, offset + nbytes) range onto a child of @bs to copy from,
+     * and invoke bdrv_co_copy_range_from(child, ...), or invoke
+     * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
+     *
+     * See the comment of bdrv_co_copy_range for the parameter and return value
+     * semantics.
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_copy_range_from)(
+        BlockDriverState *bs, BdrvChild *src, int64_t offset,
+        BdrvChild *dst, int64_t dst_offset, int64_t bytes,
+        BdrvRequestFlags read_flags, BdrvRequestFlags write_flags);
+
+    /*
+     * Map [offset, offset + nbytes) range onto a child of bs to copy data to,
+     * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
+     * operation if @bs is the leaf and @src has the same BlockDriver.  Return
+     * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
+     *
+     * See the comment of bdrv_co_copy_range for the parameter and return value
+     * semantics.
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_copy_range_to)(
+        BlockDriverState *bs, BdrvChild *src, int64_t src_offset,
+        BdrvChild *dst, int64_t dst_offset, int64_t bytes,
+        BdrvRequestFlags read_flags, BdrvRequestFlags write_flags);
+
+    /*
+     * Building block for bdrv_block_status[_above] and
+     * bdrv_is_allocated[_above].  The driver should answer only
+     * according to the current layer, and should only need to set
+     * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
+     * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
+     * layer, the result should be 0 (and not BDRV_BLOCK_ZERO).  See
+     * block.h for the overall meaning of the bits.  As a hint, the
+     * flag want_zero is true if the caller cares more about precise
+     * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
+     * overall allocation (favor larger *pnum, perhaps by reporting
+     * _DATA instead of _ZERO).  The block layer guarantees input
+     * clamped to bdrv_getlength() and aligned to request_alignment,
+     * as well as non-NULL pnum, map, and file; in turn, the driver
+     * must return an error or set pnum to an aligned non-zero value.
+     *
+     * Note that @bytes is just a hint on how big of a region the
+     * caller wants to inspect.  It is not a limit on *pnum.
+     * Implementations are free to return larger values of *pnum if
+     * doing so does not incur a performance penalty.
+     *
+     * block/io.c's bdrv_co_block_status() will utilize an unclamped
+     * *pnum value for the block-status cache on protocol nodes, prior
+     * to clamping *pnum for return to its caller.
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_block_status)(
+        BlockDriverState *bs,
+        bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+        int64_t *map, BlockDriverState **file);
+
+    /*
+     * Snapshot-access API.
+     *
+     * Block-driver may provide snapshot-access API: special functions to access
+     * some internal "snapshot". The functions are similar with normal
+     * read/block_status/discard handler, but don't have any specific handling
+     * in generic block-layer: no serializing, no alignment, no tracked
+     * requests. So, block-driver that realizes these APIs is fully responsible
+     * for synchronization between snapshot-access API and normal IO requests.
+     *
+     * TODO: To be able to support qcow2's internal snapshots, this API will
+     * need to be extended to:
+     * - be able to select a specific snapshot
+     * - receive the snapshot's actual length (which may differ from bs's
+     *   length)
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_preadv_snapshot)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov, size_t qiov_offset);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_snapshot_block_status)(
+        BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
+        int64_t *pnum, int64_t *map, BlockDriverState **file);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard_snapshot)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes);
+
+    /*
+     * Invalidate any cached meta-data.
+     */
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_invalidate_cache)(
+        BlockDriverState *bs, Error **errp);
+
+    /*
+     * Flushes all data for all layers by calling bdrv_co_flush for underlying
+     * layers, if needed. This function is needed for deterministic
+     * synchronization of the flush finishing callback.
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush)(BlockDriverState *bs);
+
+    /* Delete a created file. */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_delete_file)(
+        BlockDriverState *bs, Error **errp);
+
+    /*
+     * Flushes all data that was already written to the OS all the way down to
+     * the disk (for example file-posix.c calls fsync()).
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush_to_disk)(
+        BlockDriverState *bs);
+
+    /*
+     * Flushes all internal caches to the OS. The data may still sit in a
+     * writeback cache of the host OS, but it will survive a crash of the qemu
+     * process.
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_flush_to_os)(
+        BlockDriverState *bs);
+
+    /*
+     * Truncate @bs to @offset bytes using the given @prealloc mode
+     * when growing.  Modes other than PREALLOC_MODE_OFF should be
+     * rejected when shrinking @bs.
+     *
+     * If @exact is true, @bs must be resized to exactly @offset.
+     * Otherwise, it is sufficient for @bs (if it is a host block
+     * device and thus there is no way to resize it) to be at least
+     * @offset bytes in length.
+     *
+     * If @exact is true and this function fails but would succeed
+     * with @exact = false, it should return -ENOTSUP.
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_truncate)(
+        BlockDriverState *bs, int64_t offset, bool exact,
+        PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+
+    int64_t coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_getlength)(
+        BlockDriverState *bs);
+
+    int64_t coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_get_allocated_file_size)(
+        BlockDriverState *bs);
+
+    BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
+                                      Error **errp);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_compressed)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pwritev_compressed_part)(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov, size_t qiov_offset);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_get_info)(
+        BlockDriverState *bs, BlockDriverInfo *bdi);
+
+    ImageInfoSpecific * GRAPH_RDLOCK_PTR (*bdrv_get_specific_info)(
+        BlockDriverState *bs, Error **errp);
+    BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_save_vmstate)(
+        BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_load_vmstate)(
+        BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+    int coroutine_fn (*bdrv_co_zone_report)(BlockDriverState *bs,
+            int64_t offset, unsigned int *nr_zones,
+            BlockZoneDescriptor *zones);
+    int coroutine_fn (*bdrv_co_zone_mgmt)(BlockDriverState *bs, BlockZoneOp op,
+            int64_t offset, int64_t len);
+    int coroutine_fn (*bdrv_co_zone_append)(BlockDriverState *bs,
+            int64_t *offset, QEMUIOVector *qiov,
+            BdrvRequestFlags flags);
+
+    /* removable device specific */
+    bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_is_inserted)(
+        BlockDriverState *bs);
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_eject)(
+        BlockDriverState *bs, bool eject_flag);
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_lock_medium)(
+        BlockDriverState *bs, bool locked);
+
+    /* to control generic scsi devices */
+    BlockAIOCB *coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_aio_ioctl)(
+        BlockDriverState *bs, unsigned long int req, void *buf,
+        BlockCompletionFunc *cb, void *opaque);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_ioctl)(
+        BlockDriverState *bs, unsigned long int req, void *buf);
+
+    /*
+     * Returns 0 for completed check, -errno for internal errors.
+     * The check results are stored in result.
+     */
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_check)(
+        BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix);
+
+    void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_debug_event)(
+        BlockDriverState *bs, BlkdebugEvent event);
+
+    bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
+
+    bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_can_store_new_dirty_bitmap)(
+        BlockDriverState *bs, const char *name, uint32_t granularity,
+        Error **errp);
+
+    int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_remove_persistent_dirty_bitmap)(
+        BlockDriverState *bs, const char *name, Error **errp);
+};
+
+static inline bool TSA_NO_TSA block_driver_can_compress(BlockDriver *drv)
+{
+    return drv->bdrv_co_pwritev_compressed ||
+           drv->bdrv_co_pwritev_compressed_part;
+}
+
+typedef struct BlockLimits {
+    /*
+     * Alignment requirement, in bytes, for offset/length of I/O
+     * requests. Must be a power of 2 less than INT_MAX; defaults to
+     * 1 for drivers with modern byte interfaces, and to 512
+     * otherwise.
+     */
+    uint32_t request_alignment;
+
+    /*
+     * Maximum number of bytes that can be discarded at once. Must be multiple
+     * of pdiscard_alignment, but need not be power of 2. May be 0 if no
+     * inherent 64-bit limit.
+     */
+    int64_t max_pdiscard;
+
+    /*
+     * Optimal alignment for discard requests in bytes. A power of 2
+     * is best but not mandatory.  Must be a multiple of
+     * bl.request_alignment, and must be less than max_pdiscard if
+     * that is set. May be 0 if bl.request_alignment is good enough
+     */
+    uint32_t pdiscard_alignment;
+
+    /*
+     * Maximum number of bytes that can zeroized at once. Must be multiple of
+     * pwrite_zeroes_alignment. 0 means no limit.
+     */
+    int64_t max_pwrite_zeroes;
+
+    /*
+     * Optimal alignment for write zeroes requests in bytes. A power
+     * of 2 is best but not mandatory.  Must be a multiple of
+     * bl.request_alignment, and must be less than max_pwrite_zeroes
+     * if that is set. May be 0 if bl.request_alignment is good
+     * enough
+     */
+    uint32_t pwrite_zeroes_alignment;
+
+    /*
+     * Optimal transfer length in bytes.  A power of 2 is best but not
+     * mandatory.  Must be a multiple of bl.request_alignment, or 0 if
+     * no preferred size
+     */
+    uint32_t opt_transfer;
+
+    /*
+     * Maximal transfer length in bytes.  Need not be power of 2, but
+     * must be multiple of opt_transfer and bl.request_alignment, or 0
+     * for no 32-bit limit.  For now, anything larger than INT_MAX is
+     * clamped down.
+     */
+    uint32_t max_transfer;
+
+    /*
+     * Maximal hardware transfer length in bytes.  Applies whenever
+     * transfers to the device bypass the kernel I/O scheduler, for
+     * example with SG_IO.  If larger than max_transfer or if zero,
+     * blk_get_max_hw_transfer will fall back to max_transfer.
+     */
+    uint64_t max_hw_transfer;
+
+    /*
+     * Maximal number of scatter/gather elements allowed by the hardware.
+     * Applies whenever transfers to the device bypass the kernel I/O
+     * scheduler, for example with SG_IO.  If larger than max_iov
+     * or if zero, blk_get_max_hw_iov will fall back to max_iov.
+     */
+    int max_hw_iov;
+
+
+    /* memory alignment, in bytes so that no bounce buffer is needed */
+    size_t min_mem_alignment;
+
+    /* memory alignment, in bytes, for bounce buffer */
+    size_t opt_mem_alignment;
+
+    /* maximum number of iovec elements */
+    int max_iov;
+
+    /*
+     * true if the length of the underlying file can change, and QEMU
+     * is expected to adjust automatically.  Mostly for CD-ROM drives,
+     * whose length is zero when the tray is empty (they don't need
+     * an explicit monitor command to load the disk inside the guest).
+     */
+    bool has_variable_length;
+
+    /* device zone model */
+    BlockZoneModel zoned;
+
+    /* zone size expressed in bytes */
+    uint32_t zone_size;
+
+    /* total number of zones */
+    uint32_t nr_zones;
+
+    /* maximum sectors of a zone append write operation */
+    uint32_t max_append_sectors;
+
+    /* maximum number of open zones */
+    uint32_t max_open_zones;
+
+    /* maximum number of active zones */
+    uint32_t max_active_zones;
+
+    uint32_t write_granularity;
+} BlockLimits;
+
+typedef struct BdrvOpBlocker BdrvOpBlocker;
+
+typedef struct BdrvAioNotifier {
+    void (*attached_aio_context)(AioContext *new_context, void *opaque);
+    void (*detach_aio_context)(void *opaque);
+
+    void *opaque;
+    bool deleted;
+
+    QLIST_ENTRY(BdrvAioNotifier) list;
+} BdrvAioNotifier;
+
+struct BdrvChildClass {
+    /*
+     * If true, bdrv_replace_node() doesn't change the node this BdrvChild
+     * points to.
+     */
+    bool stay_at_node;
+
+    /*
+     * If true, the parent is a BlockDriverState and bdrv_next_all_states()
+     * will return it. This information is used for drain_all, where every node
+     * will be drained separately, so the drain only needs to be propagated to
+     * non-BDS parents.
+     */
+    bool parent_is_bds;
+
+    /*
+     * Global state (GS) API. These functions run under the BQL.
+     *
+     * See include/block/block-global-state.h for more information about
+     * the GS API.
+     */
+    void (*inherit_options)(BdrvChildRole role, bool parent_is_format,
+                            int *child_flags, QDict *child_options,
+                            int parent_flags, QDict *parent_options);
+    void (*change_media)(BdrvChild *child, bool load);
+
+    /*
+     * Returns a malloced string that describes the parent of the child for a
+     * human reader. This could be a node-name, BlockBackend name, qdev ID or
+     * QOM path of the device owning the BlockBackend, job type and ID etc. The
+     * caller is responsible for freeing the memory.
+     */
+    char *(*get_parent_desc)(BdrvChild *child);
+
+    /*
+     * Notifies the parent that the child has been activated/inactivated (e.g.
+     * when migration is completing) and it can start/stop requesting
+     * permissions and doing I/O on it.
+     */
+    void GRAPH_RDLOCK_PTR (*activate)(BdrvChild *child, Error **errp);
+    int GRAPH_RDLOCK_PTR (*inactivate)(BdrvChild *child);
+
+    void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child);
+    void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child);
+
+    /*
+     * If this pair of functions is implemented, the parent doesn't issue new
+     * requests after returning from .drained_begin() until .drained_end() is
+     * called.
+     *
+     * These functions must not change the graph (and therefore also must not
+     * call aio_poll(), which could change the graph indirectly).
+     *
+     * Note that this can be nested. If drained_begin() was called twice, new
+     * I/O is allowed only after drained_end() was called twice, too.
+     */
+    void GRAPH_RDLOCK_PTR (*drained_begin)(BdrvChild *child);
+    void GRAPH_RDLOCK_PTR (*drained_end)(BdrvChild *child);
+
+    /*
+     * Returns whether the parent has pending requests for the child. This
+     * callback is polled after .drained_begin() has been called until all
+     * activity on the child has stopped.
+     */
+    bool GRAPH_RDLOCK_PTR (*drained_poll)(BdrvChild *child);
+
+    /*
+     * Notifies the parent that the filename of its child has changed (e.g.
+     * because the direct child was removed from the backing chain), so that it
+     * can update its reference.
+     */
+    int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
+                           const char *filename, Error **errp);
+
+    bool (*change_aio_ctx)(BdrvChild *child, AioContext *ctx,
+                           GHashTable *visited, Transaction *tran,
+                           Error **errp);
+
+    /*
+     * I/O API functions. These functions are thread-safe.
+     *
+     * See include/block/block-io.h for more information about
+     * the I/O API.
+     */
+
+    void (*resize)(BdrvChild *child);
+
+    /*
+     * Returns a name that is supposedly more useful for human users than the
+     * node name for identifying the node in question (in particular, a BB
+     * name), or NULL if the parent can't provide a better name.
+     */
+    const char *(*get_name)(BdrvChild *child);
+
+    AioContext *(*get_parent_aio_context)(BdrvChild *child);
+};
+
+extern const BdrvChildClass child_of_bds;
+
+struct BdrvChild {
+    BlockDriverState *bs;
+    char *name;
+    const BdrvChildClass *klass;
+    BdrvChildRole role;
+    void *opaque;
+
+    /**
+     * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
+     */
+    uint64_t perm;
+
+    /**
+     * Permissions that can still be granted to other users of @bs while this
+     * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
+     */
+    uint64_t shared_perm;
+
+    /*
+     * This link is frozen: the child can neither be replaced nor
+     * detached from the parent.
+     */
+    bool frozen;
+
+    /*
+     * True if the parent of this child has been drained by this BdrvChild
+     * (through klass->drained_*).
+     *
+     * It is generally true if bs->quiesce_counter > 0. It may differ while the
+     * child is entering or leaving a drained section.
+     */
+    bool quiesced_parent;
+
+    QLIST_ENTRY(BdrvChild GRAPH_RDLOCK_PTR) next;
+    QLIST_ENTRY(BdrvChild GRAPH_RDLOCK_PTR) next_parent;
+};
+
+/*
+ * Allows bdrv_co_block_status() to cache one data region for a
+ * protocol node.
+ *
+ * @valid: Whether the cache is valid (should be accessed with atomic
+ *         functions so this can be reset by RCU readers)
+ * @data_start: Offset where we know (or strongly assume) is data
+ * @data_end: Offset where the data region ends (which is not necessarily
+ *            the start of a zeroed region)
+ */
+typedef struct BdrvBlockStatusCache {
+    struct rcu_head rcu;
+
+    bool valid;
+    int64_t data_start;
+    int64_t data_end;
+} BdrvBlockStatusCache;
+
+struct BlockDriverState {
+    /*
+     * Protected by big QEMU lock or read-only after opening.  No special
+     * locking needed during I/O...
+     */
+    int open_flags; /* flags used to open the file, re-used for re-open */
+    bool encrypted; /* if true, the media is encrypted */
+    bool sg;        /* if true, the device is a /dev/sg* */
+    bool probed;    /* if true, format was probed rather than specified */
+    bool force_share; /* if true, always allow all shared permissions */
+    bool implicit;  /* if true, this filter node was automatically inserted */
+
+    BlockDriver *drv; /* NULL means no media */
+    void *opaque;
+
+    AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
+    /*
+     * long-running tasks intended to always use the same AioContext as this
+     * BDS may register themselves in this list to be notified of changes
+     * regarding this BDS's context
+     */
+    QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
+    bool walking_aio_notifiers; /* to make removal during iteration safe */
+
+    char filename[PATH_MAX];
+    /*
+     * If not empty, this image is a diff in relation to backing_file.
+     * Note that this is the name given in the image header and
+     * therefore may or may not be equal to .backing->bs->filename.
+     * If this field contains a relative path, it is to be resolved
+     * relatively to the overlay's location.
+     */
+    char backing_file[PATH_MAX];
+    /*
+     * The backing filename indicated by the image header.  Contrary
+     * to backing_file, if we ever open this file, auto_backing_file
+     * is replaced by the resulting BDS's filename (i.e. after a
+     * bdrv_refresh_filename() run).
+     */
+    char auto_backing_file[PATH_MAX];
+    char backing_format[16]; /* if non-zero and backing_file exists */
+
+    QDict *full_open_options;
+    char exact_filename[PATH_MAX];
+
+    /* I/O Limits */
+    BlockLimits bl;
+
+    /*
+     * Flags honored during pread
+     */
+    BdrvRequestFlags supported_read_flags;
+    /*
+     * Flags honored during pwrite (so far: BDRV_REQ_FUA,
+     * BDRV_REQ_WRITE_UNCHANGED).
+     * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
+     * writes will be issued as normal writes without the flag set.
+     * This is important to note for drivers that do not explicitly
+     * request a WRITE permission for their children and instead take
+     * the same permissions as their parent did (this is commonly what
+     * block filters do).  Such drivers have to be aware that the
+     * parent may have taken a WRITE_UNCHANGED permission only and is
+     * issuing such requests.  Drivers either must make sure that
+     * these requests do not result in plain WRITE accesses (usually
+     * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
+     * every incoming write request as-is, including potentially that
+     * flag), or they have to explicitly take the WRITE permission for
+     * their children.
+     */
+    BdrvRequestFlags supported_write_flags;
+    /*
+     * Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
+     * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED)
+     */
+    BdrvRequestFlags supported_zero_flags;
+    /*
+     * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
+     *
+     * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
+     * that any added space reads as all zeros. If this can't be guaranteed,
+     * the operation must fail.
+     */
+    BdrvRequestFlags supported_truncate_flags;
+
+    /* the following member gives a name to every node on the bs graph. */
+    char node_name[32];
+    /* element of the list of named nodes building the graph */
+    QTAILQ_ENTRY(BlockDriverState) node_list;
+    /* element of the list of all BlockDriverStates (all_bdrv_states) */
+    QTAILQ_ENTRY(BlockDriverState) bs_list;
+    /* element of the list of monitor-owned BDS */
+    QTAILQ_ENTRY(BlockDriverState) monitor_list;
+    int refcnt;
+
+    /* operation blockers. Protected by BQL. */
+    QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
+
+    /*
+     * The node that this node inherited default options from (and a reopen on
+     * which can affect this node by changing these defaults). This is always a
+     * parent node of this node.
+     */
+    BlockDriverState *inherits_from;
+
+    /*
+     * @backing and @file are some of @children or NULL. All these three fields
+     * (@file, @backing and @children) are modified only in
+     * bdrv_child_cb_attach() and bdrv_child_cb_detach().
+     *
+     * See also comment in include/block/block.h, to learn how backing and file
+     * are connected with BdrvChildRole.
+     */
+    QLIST_HEAD(, BdrvChild GRAPH_RDLOCK_PTR) children;
+    BdrvChild * GRAPH_RDLOCK_PTR backing;
+    BdrvChild * GRAPH_RDLOCK_PTR file;
+
+    QLIST_HEAD(, BdrvChild GRAPH_RDLOCK_PTR) parents;
+
+    QDict *options;
+    QDict *explicit_options;
+    BlockdevDetectZeroesOptions detect_zeroes;
+
+    /* The error object in use for blocking operations on backing_hd */
+    Error *backing_blocker;
+
+    /* Protected by AioContext lock */
+
+    /*
+     * If we are reading a disk image, give its size in sectors.
+     * Generally read-only; it is written to by load_snapshot and
+     * save_snaphost, but the block layer is quiescent during those.
+     */
+    int64_t total_sectors;
+
+    /* threshold limit for writes, in bytes. "High water mark". */
+    uint64_t write_threshold_offset;
+
+    /*
+     * Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
+     * Reading from the list can be done with either the BQL or the
+     * dirty_bitmap_mutex.  Modifying a bitmap only requires
+     * dirty_bitmap_mutex.
+     */
+    QemuMutex dirty_bitmap_mutex;
+    QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
+
+    /* Offset after the highest byte written to */
+    Stat64 wr_highest_offset;
+
+    /*
+     * If true, copy read backing sectors into image.  Can be >1 if more
+     * than one client has requested copy-on-read.  Accessed with atomic
+     * ops.
+     */
+    int copy_on_read;
+
+    /*
+     * number of in-flight requests; overall and serialising.
+     * Accessed with atomic ops.
+     */
+    unsigned int in_flight;
+    unsigned int serialising_in_flight;
+
+    /* do we need to tell the quest if we have a volatile write cache? */
+    int enable_write_cache;
+
+    /* Accessed with atomic ops.  */
+    int quiesce_counter;
+
+    unsigned int write_gen;               /* Current data generation */
+
+    /* Protected by reqs_lock.  */
+    QemuMutex reqs_lock;
+    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+    CoQueue flush_queue;                  /* Serializing flush queue */
+    bool active_flush_req;                /* Flush request in flight? */
+
+    /* Only read/written by whoever has set active_flush_req to true.  */
+    unsigned int flushed_gen;             /* Flushed write generation */
+
+    /* BdrvChild links to this node may never be frozen */
+    bool never_freeze;
+
+    /* Lock for block-status cache RCU writers */
+    CoMutex bsc_modify_lock;
+    /* Always non-NULL, but must only be dereferenced under an RCU read guard */
+    BdrvBlockStatusCache *block_status_cache;
+
+    /* array of write pointers' location of each zone in the zoned device. */
+    BlockZoneWps *wps;
+};
+
+struct BlockBackendRootState {
+    int open_flags;
+    BlockdevDetectZeroesOptions detect_zeroes;
+};
+
+typedef enum BlockMirrorBackingMode {
+    /*
+     * Reuse the existing backing chain from the source for the target.
+     * - sync=full: Set backing BDS to NULL.
+     * - sync=top:  Use source's backing BDS.
+     * - sync=none: Use source as the backing BDS.
+     */
+    MIRROR_SOURCE_BACKING_CHAIN,
+
+    /* Open the target's backing chain completely anew */
+    MIRROR_OPEN_BACKING_CHAIN,
+
+    /* Do not change the target's backing BDS after job completion */
+    MIRROR_LEAVE_BACKING_CHAIN,
+} BlockMirrorBackingMode;
+
+
+/*
+ * Essential block drivers which must always be statically linked into qemu, and
+ * which therefore can be accessed without using bdrv_find_format()
+ */
+extern BlockDriver bdrv_file;
+extern BlockDriver bdrv_raw;
+extern BlockDriver bdrv_qcow2;
+
+extern unsigned int bdrv_drain_all_count;
+extern QemuOptsList bdrv_create_opts_simple;
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * See include/block/block-common.h for more information about
+ * the Common API.
+ */
+
+static inline BlockDriverState *child_bs(BdrvChild *child)
+{
+    return child ? child->bs : NULL;
+}
+
+int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp);
+char *create_tmp_file(Error **errp);
+void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
+                                      QDict *options);
+
+
+int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
+                            QEMUIOVector *qiov, size_t qiov_offset,
+                            Error **errp);
+
+#ifdef _WIN32
+int is_windows_drive(const char *filename);
+#endif
+
+#endif /* BLOCK_INT_COMMON_H */
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
new file mode 100644 (file)
index 0000000..ef31c58
--- /dev/null
@@ -0,0 +1,320 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_INT_GLOBAL_STATE_H
+#define BLOCK_INT_GLOBAL_STATE_H
+
+#include "block/blockjob.h"
+#include "block/block_int-common.h"
+#include "qemu/hbitmap.h"
+#include "qemu/main-loop.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+/**
+ * stream_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @base: Block device that will become the new base, or %NULL to
+ * flatten the whole backing file chain onto @bs.
+ * @backing_file_str: The file name that will be written to @bs as the
+ * the new backing file if the job completes. Ignored if @base is %NULL.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ *                  See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ *                    driver that the stream job inserts into the graph above
+ *                    @bs. NULL means that a node name should be autogenerated.
+ * @errp: Error object.
+ *
+ * Start a streaming operation on @bs.  Clusters that are unallocated
+ * in @bs, but allocated in any image between @base and @bs (both
+ * exclusive) will be written to @bs.  At the end of a successful
+ * streaming job, the backing file of @bs will be changed to
+ * @backing_file_str in the written image and to @base in the live
+ * BlockDriverState.
+ */
+void stream_start(const char *job_id, BlockDriverState *bs,
+                  BlockDriverState *base, const char *backing_file_str,
+                  BlockDriverState *bottom,
+                  int creation_flags, int64_t speed,
+                  BlockdevOnError on_error,
+                  const char *filter_node_name,
+                  Error **errp);
+
+/**
+ * commit_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Active block device.
+ * @top: Top block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ *                  See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @backing_file_str: String to use as the backing file in @top's overlay
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @top. NULL means
+ * that a node name should be autogenerated.
+ * @errp: Error object.
+ *
+ */
+void commit_start(const char *job_id, BlockDriverState *bs,
+                  BlockDriverState *base, BlockDriverState *top,
+                  int creation_flags, int64_t speed,
+                  BlockdevOnError on_error, const char *backing_file_str,
+                  const char *filter_node_name, Error **errp);
+/**
+ * commit_active_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Active block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ *                  See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @auto_complete: Auto complete the job.
+ * @errp: Error object.
+ *
+ */
+BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
+                              BlockDriverState *base, int creation_flags,
+                              int64_t speed, BlockdevOnError on_error,
+                              const char *filter_node_name,
+                              BlockCompletionFunc *cb, void *opaque,
+                              bool auto_complete, Error **errp);
+/*
+ * mirror_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @replaces: Block graph node name to replace once the mirror is done. Can
+ *            only be used when full mirroring is selected.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ *                  See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @granularity: The chosen granularity for the dirty bitmap.
+ * @buf_size: The amount of data that can be in flight at one time.
+ * @mode: Whether to collapse all images in the chain to the target.
+ * @backing_mode: How to establish the target's backing chain after completion.
+ * @zero_target: Whether the target should be explicitly zero-initialized
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @unmap: Whether to unmap target where source sectors only contain zeroes.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the mirror job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
+ * @copy_mode: When to trigger writes to the target.
+ * @errp: Error object.
+ *
+ * Start a mirroring operation on @bs.  Clusters that are allocated
+ * in @bs will be written to @target until the job is cancelled or
+ * manually completed.  At the end of a successful mirroring job,
+ * @bs will be switched to read from @target.
+ */
+void mirror_start(const char *job_id, BlockDriverState *bs,
+                  BlockDriverState *target, const char *replaces,
+                  int creation_flags, int64_t speed,
+                  uint32_t granularity, int64_t buf_size,
+                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
+                  bool zero_target,
+                  BlockdevOnError on_source_error,
+                  BlockdevOnError on_target_error,
+                  bool unmap, const char *filter_node_name,
+                  MirrorCopyMode copy_mode, Error **errp);
+
+/*
+ * backup_job_create:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @sync_mode: What parts of the disk image should be copied to the destination.
+ * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental'
+ * @bitmap_mode: The bitmap synchronization policy to use.
+ * @perf: Performance options. All actual fields assumed to be present,
+ *        all ".has_*" fields are ignored.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ *                  See @BlockJobCreateFlags
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @txn: Transaction that this job is part of (may be NULL).
+ *
+ * Create a backup operation on @bs.  Clusters in @bs are written to @target
+ * until the job is cancelled or manually completed.
+ */
+BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+                            BlockDriverState *target, int64_t speed,
+                            MirrorSyncMode sync_mode,
+                            BdrvDirtyBitmap *sync_bitmap,
+                            BitmapSyncMode bitmap_mode,
+                            bool compress,
+                            const char *filter_node_name,
+                            BackupPerf *perf,
+                            BlockdevOnError on_source_error,
+                            BlockdevOnError on_target_error,
+                            int creation_flags,
+                            BlockCompletionFunc *cb, void *opaque,
+                            JobTxn *txn, Error **errp);
+
+BdrvChild * GRAPH_WRLOCK
+bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name,
+                       const BdrvChildClass *child_class,
+                       BdrvChildRole child_role,
+                       uint64_t perm, uint64_t shared_perm,
+                       void *opaque, Error **errp);
+
+void GRAPH_WRLOCK bdrv_root_unref_child(BdrvChild *child);
+
+void GRAPH_RDLOCK bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
+                                           uint64_t *shared_perm);
+
+/**
+ * Sets a BdrvChild's permissions.  Avoid if the parent is a BDS; use
+ * bdrv_child_refresh_perms() instead and make the parent's
+ * .bdrv_child_perm() implementation return the correct values.
+ */
+int GRAPH_RDLOCK
+bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                        Error **errp);
+
+/**
+ * Calls bs->drv->bdrv_child_perm() and updates the child's permission
+ * masks with the result.
+ * Drivers should invoke this function whenever an event occurs that
+ * makes their .bdrv_child_perm() implementation return different
+ * values than before, but which will not result in the block layer
+ * automatically refreshing the permissions.
+ */
+int GRAPH_RDLOCK
+bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp);
+
+bool GRAPH_RDLOCK bdrv_recurse_can_replace(BlockDriverState *bs,
+                                           BlockDriverState *to_replace);
+
+/*
+ * Default implementation for BlockDriver.bdrv_child_perm() that can
+ * be used by block filters and image formats, as long as they use the
+ * child_of_bds child class and set an appropriate BdrvChildRole.
+ */
+void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
+                        BdrvChildRole role, BlockReopenQueue *reopen_queue,
+                        uint64_t perm, uint64_t shared,
+                        uint64_t *nperm, uint64_t *nshared);
+
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
+bool blk_dev_has_removable_media(BlockBackend *blk);
+void blk_dev_eject_request(BlockBackend *blk, bool force);
+bool blk_dev_is_medium_locked(BlockBackend *blk);
+
+void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
+
+void bdrv_set_monitor_owned(BlockDriverState *bs);
+
+void blockdev_close_all_bdrv_states(void);
+
+BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp);
+
+/**
+ * Simple implementation of bdrv_co_create_opts for protocol drivers
+ * which only support creation via opening a file
+ * (usually existing raw storage device)
+ */
+int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
+                                            const char *filename,
+                                            QemuOpts *opts,
+                                            Error **errp);
+
+BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
+                                           const char *name,
+                                           BlockDriverState **pbs,
+                                           Error **errp);
+BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
+                                          BlockDirtyBitmapOrStrList *bms,
+                                          HBitmap **backup, Error **errp);
+BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
+                                           bool release,
+                                           BlockDriverState **bitmap_bs,
+                                           Error **errp);
+
+
+BlockDriverState * GRAPH_RDLOCK
+bdrv_skip_implicit_filters(BlockDriverState *bs);
+
+/**
+ * bdrv_add_aio_context_notifier:
+ *
+ * If a long-running job intends to be always run in the same AioContext as a
+ * certain BDS, it may use this function to be notified of changes regarding the
+ * association of the BDS to an AioContext.
+ *
+ * attached_aio_context() is called after the target BDS has been attached to a
+ * new AioContext; detach_aio_context() is called before the target BDS is being
+ * detached from its old AioContext.
+ */
+void bdrv_add_aio_context_notifier(BlockDriverState *bs,
+        void (*attached_aio_context)(AioContext *new_context, void *opaque),
+        void (*detach_aio_context)(void *opaque), void *opaque);
+
+/**
+ * bdrv_remove_aio_context_notifier:
+ *
+ * Unsubscribe of change notifications regarding the BDS's AioContext. The
+ * parameters given here have to be the same as those given to
+ * bdrv_add_aio_context_notifier().
+ */
+void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
+                                      void (*aio_context_attached)(AioContext *,
+                                                                   void *),
+                                      void (*aio_context_detached)(void *),
+                                      void *opaque);
+
+/**
+ * End all quiescent sections started by bdrv_drain_all_begin(). This is
+ * needed when deleting a BDS before bdrv_drain_all_end() is called.
+ *
+ * NOTE: this is an internal helper for bdrv_close() *only*. No one else
+ * should call it.
+ */
+void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
+
+#endif /* BLOCK_INT_GLOBAL_STATE_H */
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
new file mode 100644 (file)
index 0000000..4a7cf2b
--- /dev/null
@@ -0,0 +1,194 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_IO_H
+#define BLOCK_INT_IO_H
+
+#include "block/block_int-common.h"
+#include "qemu/hbitmap.h"
+#include "qemu/main-loop.h"
+
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv_snapshot(BdrvChild *child,
+    int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_snapshot_block_status(
+    BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
+    int64_t *pnum, int64_t *map, BlockDriverState **file);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pdiscard_snapshot(BlockDriverState *bs,
+    int64_t offset, int64_t bytes);
+
+
+int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv(BdrvChild *child,
+    int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+    BdrvRequestFlags flags);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv_part(BdrvChild *child,
+    int64_t offset, int64_t bytes,
+    QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pwritev(BdrvChild *child,
+    int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+    BdrvRequestFlags flags);
+int coroutine_fn GRAPH_RDLOCK bdrv_co_pwritev_part(BdrvChild *child,
+    int64_t offset, int64_t bytes,
+    QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
+
+static inline int coroutine_fn GRAPH_RDLOCK bdrv_co_pread(BdrvChild *child,
+    int64_t offset, int64_t bytes, void *buf, BdrvRequestFlags flags)
+{
+    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
+    return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
+}
+
+static inline int coroutine_fn GRAPH_RDLOCK bdrv_co_pwrite(BdrvChild *child,
+    int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags)
+{
+    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
+    return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
+}
+
+void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
+                                                uint64_t align);
+BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
+
+BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
+                            const char *filename);
+
+/**
+ * bdrv_wakeup:
+ * @bs: The BlockDriverState for which an I/O operation has been completed.
+ *
+ * Wake up the main thread if it is waiting on BDRV_POLL_WHILE.  During
+ * synchronous I/O on a BlockDriverState that is attached to another
+ * I/O thread, the main thread lets the I/O thread's event loop run,
+ * waiting for the I/O operation to complete.  A bdrv_wakeup will wake
+ * up the main thread if necessary.
+ *
+ * Manual calls to bdrv_wakeup are rarely necessary, because
+ * bdrv_dec_in_flight already calls it.
+ */
+void bdrv_wakeup(BlockDriverState *bs);
+
+const char * GRAPH_RDLOCK bdrv_get_parent_name(const BlockDriverState *bs);
+bool blk_dev_has_tray(BlockBackend *blk);
+bool blk_dev_is_tray_open(BlockBackend *blk);
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
+
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
+void bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
+                                      const BdrvDirtyBitmap *src,
+                                      HBitmap **backup, bool lock);
+
+void bdrv_inc_in_flight(BlockDriverState *bs);
+void bdrv_dec_in_flight(BlockDriverState *bs);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
+                        BdrvChild *dst, int64_t dst_offset,
+                        int64_t bytes, BdrvRequestFlags read_flags,
+                        BdrvRequestFlags write_flags);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
+                      BdrvChild *dst, int64_t dst_offset,
+                      int64_t bytes, BdrvRequestFlags read_flags,
+                      BdrvRequestFlags write_flags);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_refresh_total_sectors(BlockDriverState *bs, int64_t hint);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_refresh_total_sectors(BlockDriverState *bs, int64_t hint);
+
+BdrvChild * GRAPH_RDLOCK bdrv_cow_child(BlockDriverState *bs);
+BdrvChild * GRAPH_RDLOCK bdrv_filter_child(BlockDriverState *bs);
+BdrvChild * GRAPH_RDLOCK bdrv_filter_or_cow_child(BlockDriverState *bs);
+BdrvChild * GRAPH_RDLOCK bdrv_primary_child(BlockDriverState *bs);
+BlockDriverState * GRAPH_RDLOCK bdrv_skip_filters(BlockDriverState *bs);
+BlockDriverState * GRAPH_RDLOCK bdrv_backing_chain_next(BlockDriverState *bs);
+
+static inline BlockDriverState * GRAPH_RDLOCK
+bdrv_cow_bs(BlockDriverState *bs)
+{
+    IO_CODE();
+    return child_bs(bdrv_cow_child(bs));
+}
+
+static inline BlockDriverState * GRAPH_RDLOCK
+bdrv_filter_bs(BlockDriverState *bs)
+{
+    IO_CODE();
+    return child_bs(bdrv_filter_child(bs));
+}
+
+static inline BlockDriverState * GRAPH_RDLOCK
+bdrv_filter_or_cow_bs(BlockDriverState *bs)
+{
+    IO_CODE();
+    return child_bs(bdrv_filter_or_cow_child(bs));
+}
+
+static inline BlockDriverState * GRAPH_RDLOCK
+bdrv_primary_bs(BlockDriverState *bs)
+{
+    IO_CODE();
+    return child_bs(bdrv_primary_child(bs));
+}
+
+/**
+ * Check whether the given offset is in the cached block-status data
+ * region.
+ *
+ * If it is, and @pnum is not NULL, *pnum is set to
+ * `bsc.data_end - offset`, i.e. how many bytes, starting from
+ * @offset, are data (according to the cache).
+ * Otherwise, *pnum is not touched.
+ */
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum);
+
+/**
+ * If [offset, offset + bytes) overlaps with the currently cached
+ * block-status region, invalidate the cache.
+ *
+ * (To be used by I/O paths that cause data regions to be zero or
+ * holes.)
+ */
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
+                               int64_t offset, int64_t bytes);
+
+/**
+ * Mark the range [offset, offset + bytes) as a data region.
+ */
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
+
+#endif /* BLOCK_INT_IO_H */
index 95d9333be14f220f96754c3cdd737ca092b3dbf9..567a178e13d10cffae10caae932b8f5012230a18 100644 (file)
 #ifndef BLOCK_INT_H
 #define BLOCK_INT_H
 
-#include "block/accounting.h"
-#include "block/block.h"
-#include "block/aio-wait.h"
-#include "qemu/queue.h"
-#include "qemu/coroutine.h"
-#include "qemu/stats64.h"
-#include "qemu/timer.h"
-#include "qemu/hbitmap.h"
-#include "block/snapshot.h"
-#include "qemu/throttle.h"
+#include "block/block_int-global-state.h"
+#include "block/block_int-io.h"
+#include "block/graph-lock.h"
 
-#define BLOCK_FLAG_LAZY_REFCOUNTS   8
-
-#define BLOCK_OPT_SIZE              "size"
-#define BLOCK_OPT_ENCRYPT           "encryption"
-#define BLOCK_OPT_ENCRYPT_FORMAT    "encrypt.format"
-#define BLOCK_OPT_COMPAT6           "compat6"
-#define BLOCK_OPT_HWVERSION         "hwversion"
-#define BLOCK_OPT_BACKING_FILE      "backing_file"
-#define BLOCK_OPT_BACKING_FMT       "backing_fmt"
-#define BLOCK_OPT_CLUSTER_SIZE      "cluster_size"
-#define BLOCK_OPT_TABLE_SIZE        "table_size"
-#define BLOCK_OPT_PREALLOC          "preallocation"
-#define BLOCK_OPT_SUBFMT            "subformat"
-#define BLOCK_OPT_COMPAT_LEVEL      "compat"
-#define BLOCK_OPT_LAZY_REFCOUNTS    "lazy_refcounts"
-#define BLOCK_OPT_ADAPTER_TYPE      "adapter_type"
-#define BLOCK_OPT_REDUNDANCY        "redundancy"
-#define BLOCK_OPT_NOCOW             "nocow"
-#define BLOCK_OPT_EXTENT_SIZE_HINT  "extent_size_hint"
-#define BLOCK_OPT_OBJECT_SIZE       "object_size"
-#define BLOCK_OPT_REFCOUNT_BITS     "refcount_bits"
-#define BLOCK_OPT_DATA_FILE         "data_file"
-#define BLOCK_OPT_DATA_FILE_RAW     "data_file_raw"
-#define BLOCK_OPT_COMPRESSION_TYPE  "compression_type"
-#define BLOCK_OPT_EXTL2             "extended_l2"
-
-#define BLOCK_PROBE_BUF_SIZE        512
-
-enum BdrvTrackedRequestType {
-    BDRV_TRACKED_READ,
-    BDRV_TRACKED_WRITE,
-    BDRV_TRACKED_DISCARD,
-    BDRV_TRACKED_TRUNCATE,
-};
-
-typedef struct BdrvTrackedRequest {
-    BlockDriverState *bs;
-    int64_t offset;
-    uint64_t bytes;
-    enum BdrvTrackedRequestType type;
-
-    bool serialising;
-    int64_t overlap_offset;
-    uint64_t overlap_bytes;
-
-    QLIST_ENTRY(BdrvTrackedRequest) list;
-    Coroutine *co; /* owner, used for deadlock detection */
-    CoQueue wait_queue; /* coroutines blocked on this request */
-
-    struct BdrvTrackedRequest *waiting_for;
-} BdrvTrackedRequest;
-
-struct BlockDriver {
-    const char *format_name;
-    int instance_size;
-
-    /* set to true if the BlockDriver is a block filter. Block filters pass
-     * certain callbacks that refer to data (see block.c) to their bs->file
-     * or bs->backing (whichever one exists) if the driver doesn't implement
-     * them. Drivers that do not wish to forward must implement them and return
-     * -ENOTSUP.
-     * Note that filters are not allowed to modify data.
-     *
-     * Filters generally cannot have more than a single filtered child,
-     * because the data they present must at all times be the same as
-     * that on their filtered child.  That would be impossible to
-     * achieve for multiple filtered children.
-     * (And this filtered child must then be bs->file or bs->backing.)
-     */
-    bool is_filter;
-    /*
-     * Set to true if the BlockDriver is a format driver.  Format nodes
-     * generally do not expect their children to be other format nodes
-     * (except for backing files), and so format probing is disabled
-     * on those children.
-     */
-    bool is_format;
-    /*
-     * Return true if @to_replace can be replaced by a BDS with the
-     * same data as @bs without it affecting @bs's behavior (that is,
-     * without it being visible to @bs's parents).
-     */
-    bool (*bdrv_recurse_can_replace)(BlockDriverState *bs,
-                                     BlockDriverState *to_replace);
-
-    int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
-    int (*bdrv_probe_device)(const char *filename);
-
-    /* Any driver implementing this callback is expected to be able to handle
-     * NULL file names in its .bdrv_open() implementation */
-    void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
-    /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have
-     * this field set to true, except ones that are defined only by their
-     * child's bs.
-     * An example of the last type will be the quorum block driver.
-     */
-    bool bdrv_needs_filename;
-
-    /*
-     * Set if a driver can support backing files. This also implies the
-     * following semantics:
-     *
-     *  - Return status 0 of .bdrv_co_block_status means that corresponding
-     *    blocks are not allocated in this layer of backing-chain
-     *  - For such (unallocated) blocks, read will:
-     *    - fill buffer with zeros if there is no backing file
-     *    - read from the backing file otherwise, where the block layer
-     *      takes care of reading zeros beyond EOF if backing file is short
-     */
-    bool supports_backing;
-
-    /* For handling image reopen for split or non-split files */
-    int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
-                               BlockReopenQueue *queue, Error **errp);
-    void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
-    void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state);
-    void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
-    void (*bdrv_join_options)(QDict *options, QDict *old_options);
-
-    int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp);
-
-    /* Protocol drivers should implement this instead of bdrv_open */
-    int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
-                          Error **errp);
-    void (*bdrv_close)(BlockDriverState *bs);
-
-
-    int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
-                                       Error **errp);
-    int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts,
-                                            Error **errp);
-
-    int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs,
-                                      BlockdevAmendOptions *opts,
-                                      bool force,
-                                      Error **errp);
-
-    int (*bdrv_amend_options)(BlockDriverState *bs,
-                              QemuOpts *opts,
-                              BlockDriverAmendStatusCB *status_cb,
-                              void *cb_opaque,
-                              bool force,
-                              Error **errp);
-
-    int (*bdrv_make_empty)(BlockDriverState *bs);
-
-    /*
-     * Refreshes the bs->exact_filename field. If that is impossible,
-     * bs->exact_filename has to be left empty.
-     */
-    void (*bdrv_refresh_filename)(BlockDriverState *bs);
-
-    /*
-     * Gathers the open options for all children into @target.
-     * A simple format driver (without backing file support) might
-     * implement this function like this:
-     *
-     *     QINCREF(bs->file->bs->full_open_options);
-     *     qdict_put(target, "file", bs->file->bs->full_open_options);
-     *
-     * If not specified, the generic implementation will simply put
-     * all children's options under their respective name.
-     *
-     * @backing_overridden is true when bs->backing seems not to be
-     * the child that would result from opening bs->backing_file.
-     * Therefore, if it is true, the backing child's options should be
-     * gathered; otherwise, there is no need since the backing child
-     * is the one implied by the image header.
-     *
-     * Note that ideally this function would not be needed.  Every
-     * block driver which implements it is probably doing something
-     * shady regarding its runtime option structure.
-     */
-    void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target,
-                                      bool backing_overridden);
-
-    /*
-     * Returns an allocated string which is the directory name of this BDS: It
-     * will be used to make relative filenames absolute by prepending this
-     * function's return value to them.
-     */
-    char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp);
-
-    /* aio */
-    BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
-        BlockCompletionFunc *cb, void *opaque);
-    BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
-        BlockCompletionFunc *cb, void *opaque);
-    BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
-        BlockCompletionFunc *cb, void *opaque);
-    BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
-        int64_t offset, int bytes,
-        BlockCompletionFunc *cb, void *opaque);
-
-    int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
-
-    /**
-     * @offset: position in bytes to read at
-     * @bytes: number of bytes to read
-     * @qiov: the buffers to fill with read data
-     * @flags: currently unused, always 0
-     *
-     * @offset and @bytes will be a multiple of 'request_alignment',
-     * but the length of individual @qiov elements does not have to
-     * be a multiple.
-     *
-     * @bytes will always equal the total size of @qiov, and will be
-     * no larger than 'max_transfer'.
-     *
-     * The buffer in @qiov may point directly to guest memory.
-     */
-    int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
-    int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, int flags);
-    int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
-    /**
-     * @offset: position in bytes to write at
-     * @bytes: number of bytes to write
-     * @qiov: the buffers containing data to write
-     * @flags: zero or more bits allowed by 'supported_write_flags'
-     *
-     * @offset and @bytes will be a multiple of 'request_alignment',
-     * but the length of individual @qiov elements does not have to
-     * be a multiple.
-     *
-     * @bytes will always equal the total size of @qiov, and will be
-     * no larger than 'max_transfer'.
-     *
-     * The buffer in @qiov may point directly to guest memory.
-     */
-    int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
-    int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, int flags);
-
-    /*
-     * Efficiently zero a region of the disk image.  Typically an image format
-     * would use a compact metadata representation to implement this.  This
-     * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
-     * will be called instead.
-     */
-    int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
-        int64_t offset, int bytes, BdrvRequestFlags flags);
-    int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
-        int64_t offset, int bytes);
-
-    /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
-     * and invoke bdrv_co_copy_range_from(child, ...), or invoke
-     * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
-     *
-     * See the comment of bdrv_co_copy_range for the parameter and return value
-     * semantics.
-     */
-    int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
-                                                BdrvChild *src,
-                                                uint64_t offset,
-                                                BdrvChild *dst,
-                                                uint64_t dst_offset,
-                                                uint64_t bytes,
-                                                BdrvRequestFlags read_flags,
-                                                BdrvRequestFlags write_flags);
-
-    /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
-     * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
-     * operation if @bs is the leaf and @src has the same BlockDriver.  Return
-     * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
-     *
-     * See the comment of bdrv_co_copy_range for the parameter and return value
-     * semantics.
-     */
-    int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
-                                              BdrvChild *src,
-                                              uint64_t src_offset,
-                                              BdrvChild *dst,
-                                              uint64_t dst_offset,
-                                              uint64_t bytes,
-                                              BdrvRequestFlags read_flags,
-                                              BdrvRequestFlags write_flags);
-
-    /*
-     * Building block for bdrv_block_status[_above] and
-     * bdrv_is_allocated[_above].  The driver should answer only
-     * according to the current layer, and should only need to set
-     * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
-     * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
-     * layer, the result should be 0 (and not BDRV_BLOCK_ZERO).  See
-     * block.h for the overall meaning of the bits.  As a hint, the
-     * flag want_zero is true if the caller cares more about precise
-     * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
-     * overall allocation (favor larger *pnum, perhaps by reporting
-     * _DATA instead of _ZERO).  The block layer guarantees input
-     * clamped to bdrv_getlength() and aligned to request_alignment,
-     * as well as non-NULL pnum, map, and file; in turn, the driver
-     * must return an error or set pnum to an aligned non-zero value.
-     */
-    int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
-        bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
-        int64_t *map, BlockDriverState **file);
-
-    /*
-     * Invalidate any cached meta-data.
-     */
-    void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
-                                                  Error **errp);
-    int (*bdrv_inactivate)(BlockDriverState *bs);
-
-    /*
-     * Flushes all data for all layers by calling bdrv_co_flush for underlying
-     * layers, if needed. This function is needed for deterministic
-     * synchronization of the flush finishing callback.
-     */
-    int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
-
-    /* Delete a created file. */
-    int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs,
-                                            Error **errp);
-
-    /*
-     * Flushes all data that was already written to the OS all the way down to
-     * the disk (for example file-posix.c calls fsync()).
-     */
-    int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
-
-    /*
-     * Flushes all internal caches to the OS. The data may still sit in a
-     * writeback cache of the host OS, but it will survive a crash of the qemu
-     * process.
-     */
-    int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
-
-    /*
-     * Drivers setting this field must be able to work with just a plain
-     * filename with '<protocol_name>:' as a prefix, and no other options.
-     * Options may be extracted from the filename by implementing
-     * bdrv_parse_filename.
-     */
-    const char *protocol_name;
-
-    /*
-     * Truncate @bs to @offset bytes using the given @prealloc mode
-     * when growing.  Modes other than PREALLOC_MODE_OFF should be
-     * rejected when shrinking @bs.
-     *
-     * If @exact is true, @bs must be resized to exactly @offset.
-     * Otherwise, it is sufficient for @bs (if it is a host block
-     * device and thus there is no way to resize it) to be at least
-     * @offset bytes in length.
-     *
-     * If @exact is true and this function fails but would succeed
-     * with @exact = false, it should return -ENOTSUP.
-     */
-    int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
-                                         bool exact, PreallocMode prealloc,
-                                         BdrvRequestFlags flags, Error **errp);
-
-    int64_t (*bdrv_getlength)(BlockDriverState *bs);
-    bool has_variable_length;
-    int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
-    BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
-                                      Error **errp);
-
-    int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov);
-    int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
-        size_t qiov_offset);
-
-    int (*bdrv_snapshot_create)(BlockDriverState *bs,
-                                QEMUSnapshotInfo *sn_info);
-    int (*bdrv_snapshot_goto)(BlockDriverState *bs,
-                              const char *snapshot_id);
-    int (*bdrv_snapshot_delete)(BlockDriverState *bs,
-                                const char *snapshot_id,
-                                const char *name,
-                                Error **errp);
-    int (*bdrv_snapshot_list)(BlockDriverState *bs,
-                              QEMUSnapshotInfo **psn_info);
-    int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
-                                  const char *snapshot_id,
-                                  const char *name,
-                                  Error **errp);
-    int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
-    ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs,
-                                                 Error **errp);
-    BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
-
-    int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
-                                          QEMUIOVector *qiov,
-                                          int64_t pos);
-    int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
-                                          QEMUIOVector *qiov,
-                                          int64_t pos);
-
-    int (*bdrv_change_backing_file)(BlockDriverState *bs,
-        const char *backing_file, const char *backing_fmt);
-
-    /* removable device specific */
-    bool (*bdrv_is_inserted)(BlockDriverState *bs);
-    void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
-    void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
-
-    /* to control generic scsi devices */
-    BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
-        unsigned long int req, void *buf,
-        BlockCompletionFunc *cb, void *opaque);
-    int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
-                                      unsigned long int req, void *buf);
-
-    /* List of options for creating images, terminated by name == NULL */
-    QemuOptsList *create_opts;
-
-    /* List of options for image amend */
-    QemuOptsList *amend_opts;
-
-    /*
-     * If this driver supports reopening images this contains a
-     * NULL-terminated list of the runtime options that can be
-     * modified. If an option in this list is unspecified during
-     * reopen then it _must_ be reset to its default value or return
-     * an error.
-     */
-    const char *const *mutable_opts;
-
-    /*
-     * Returns 0 for completed check, -errno for internal errors.
-     * The check results are stored in result.
-     */
-    int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
-                                      BdrvCheckResult *result,
-                                      BdrvCheckMode fix);
-
-    void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
-
-    /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
-    int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
-        const char *tag);
-    int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
-        const char *tag);
-    int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
-    bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
-
-    void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
-
-    /*
-     * Returns 1 if newly created images are guaranteed to contain only
-     * zeros, 0 otherwise.
-     */
-    int (*bdrv_has_zero_init)(BlockDriverState *bs);
-
-    /* Remove fd handlers, timers, and other event loop callbacks so the event
-     * loop is no longer in use.  Called with no in-flight requests and in
-     * depth-first traversal order with parents before child nodes.
-     */
-    void (*bdrv_detach_aio_context)(BlockDriverState *bs);
-
-    /* Add fd handlers, timers, and other event loop callbacks so I/O requests
-     * can be processed again.  Called with no in-flight requests and in
-     * depth-first traversal order with child nodes before parent nodes.
-     */
-    void (*bdrv_attach_aio_context)(BlockDriverState *bs,
-                                    AioContext *new_context);
-
-    /* io queue for linux-aio */
-    void (*bdrv_io_plug)(BlockDriverState *bs);
-    void (*bdrv_io_unplug)(BlockDriverState *bs);
-
-    /**
-     * Try to get @bs's logical and physical block size.
-     * On success, store them in @bsz and return zero.
-     * On failure, return negative errno.
-     */
-    int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
-    /**
-     * Try to get @bs's geometry (cyls, heads, sectors)
-     * On success, store them in @geo and return 0.
-     * On failure return -errno.
-     * Only drivers that want to override guest geometry implement this
-     * callback; see hd_geometry_guess().
-     */
-    int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
-
-    /**
-     * bdrv_co_drain_begin is called if implemented in the beginning of a
-     * drain operation to drain and stop any internal sources of requests in
-     * the driver.
-     * bdrv_co_drain_end is called if implemented at the end of the drain.
-     *
-     * They should be used by the driver to e.g. manage scheduled I/O
-     * requests, or toggle an internal state. After the end of the drain new
-     * requests will continue normally.
-     */
-    void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
-    void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
-
-    void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
-                           Error **errp);
-    void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
-                           Error **errp);
-
-    /**
-     * Informs the block driver that a permission change is intended. The
-     * driver checks whether the change is permissible and may take other
-     * preparations for the change (e.g. get file system locks). This operation
-     * is always followed either by a call to either .bdrv_set_perm or
-     * .bdrv_abort_perm_update.
-     *
-     * Checks whether the requested set of cumulative permissions in @perm
-     * can be granted for accessing @bs and whether no other users are using
-     * permissions other than those given in @shared (both arguments take
-     * BLK_PERM_* bitmasks).
-     *
-     * If both conditions are met, 0 is returned. Otherwise, -errno is returned
-     * and errp is set to an error describing the conflict.
-     */
-    int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
-                           uint64_t shared, Error **errp);
-
-    /**
-     * Called to inform the driver that the set of cumulative set of used
-     * permissions for @bs has changed to @perm, and the set of sharable
-     * permission to @shared. The driver can use this to propagate changes to
-     * its children (i.e. request permissions only if a parent actually needs
-     * them).
-     *
-     * This function is only invoked after bdrv_check_perm(), so block drivers
-     * may rely on preparations made in their .bdrv_check_perm implementation.
-     */
-    void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
-
-    /*
-     * Called to inform the driver that after a previous bdrv_check_perm()
-     * call, the permission update is not performed and any preparations made
-     * for it (e.g. taken file locks) need to be undone.
-     *
-     * This function can be called even for nodes that never saw a
-     * bdrv_check_perm() call. It is a no-op then.
-     */
-    void (*bdrv_abort_perm_update)(BlockDriverState *bs);
-
-    /**
-     * Returns in @nperm and @nshared the permissions that the driver for @bs
-     * needs on its child @c, based on the cumulative permissions requested by
-     * the parents in @parent_perm and @parent_shared.
-     *
-     * If @c is NULL, return the permissions for attaching a new child for the
-     * given @child_class and @role.
-     *
-     * If @reopen_queue is non-NULL, don't return the currently needed
-     * permissions, but those that will be needed after applying the
-     * @reopen_queue.
-     */
-     void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
-                             BdrvChildRole role,
-                             BlockReopenQueue *reopen_queue,
-                             uint64_t parent_perm, uint64_t parent_shared,
-                             uint64_t *nperm, uint64_t *nshared);
-
-    bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
-    bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs,
-                                               const char *name,
-                                               uint32_t granularity,
-                                               Error **errp);
-    int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
-                                                  const char *name,
-                                                  Error **errp);
-
-    /**
-     * Register/unregister a buffer for I/O. For example, when the driver is
-     * interested to know the memory areas that will later be used in iovs, so
-     * that it can do IOMMU mapping with VFIO etc., in order to get better
-     * performance. In the case of VFIO drivers, this callback is used to do
-     * DMA mapping for hot buffers.
-     */
-    void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
-    void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
-    QLIST_ENTRY(BlockDriver) list;
-
-    /* Pointer to a NULL-terminated array of names of strong options
-     * that can be specified for bdrv_open(). A strong option is one
-     * that changes the data of a BDS.
-     * If this pointer is NULL, the array is considered empty.
-     * "filename" and "driver" are always considered strong. */
-    const char *const *strong_runtime_opts;
-};
-
-static inline bool block_driver_can_compress(BlockDriver *drv)
-{
-    return drv->bdrv_co_pwritev_compressed ||
-           drv->bdrv_co_pwritev_compressed_part;
-}
-
-typedef struct BlockLimits {
-    /* Alignment requirement, in bytes, for offset/length of I/O
-     * requests. Must be a power of 2 less than INT_MAX; defaults to
-     * 1 for drivers with modern byte interfaces, and to 512
-     * otherwise. */
-    uint32_t request_alignment;
-
-    /* Maximum number of bytes that can be discarded at once (since it
-     * is signed, it must be < 2G, if set). Must be multiple of
-     * pdiscard_alignment, but need not be power of 2. May be 0 if no
-     * inherent 32-bit limit */
-    int32_t max_pdiscard;
-
-    /* Optimal alignment for discard requests in bytes. A power of 2
-     * is best but not mandatory.  Must be a multiple of
-     * bl.request_alignment, and must be less than max_pdiscard if
-     * that is set. May be 0 if bl.request_alignment is good enough */
-    uint32_t pdiscard_alignment;
-
-    /* Maximum number of bytes that can zeroized at once (since it is
-     * signed, it must be < 2G, if set). Must be multiple of
-     * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
-    int32_t max_pwrite_zeroes;
-
-    /* Optimal alignment for write zeroes requests in bytes. A power
-     * of 2 is best but not mandatory.  Must be a multiple of
-     * bl.request_alignment, and must be less than max_pwrite_zeroes
-     * if that is set. May be 0 if bl.request_alignment is good
-     * enough */
-    uint32_t pwrite_zeroes_alignment;
-
-    /* Optimal transfer length in bytes.  A power of 2 is best but not
-     * mandatory.  Must be a multiple of bl.request_alignment, or 0 if
-     * no preferred size */
-    uint32_t opt_transfer;
-
-    /* Maximal transfer length in bytes.  Need not be power of 2, but
-     * must be multiple of opt_transfer and bl.request_alignment, or 0
-     * for no 32-bit limit.  For now, anything larger than INT_MAX is
-     * clamped down. */
-    uint32_t max_transfer;
-
-    /* memory alignment, in bytes so that no bounce buffer is needed */
-    size_t min_mem_alignment;
-
-    /* memory alignment, in bytes, for bounce buffer */
-    size_t opt_mem_alignment;
-
-    /* maximum number of iovec elements */
-    int max_iov;
-} BlockLimits;
-
-typedef struct BdrvOpBlocker BdrvOpBlocker;
-
-typedef struct BdrvAioNotifier {
-    void (*attached_aio_context)(AioContext *new_context, void *opaque);
-    void (*detach_aio_context)(void *opaque);
-
-    void *opaque;
-    bool deleted;
-
-    QLIST_ENTRY(BdrvAioNotifier) list;
-} BdrvAioNotifier;
-
-struct BdrvChildClass {
-    /* If true, bdrv_replace_node() doesn't change the node this BdrvChild
-     * points to. */
-    bool stay_at_node;
-
-    /* If true, the parent is a BlockDriverState and bdrv_next_all_states()
-     * will return it. This information is used for drain_all, where every node
-     * will be drained separately, so the drain only needs to be propagated to
-     * non-BDS parents. */
-    bool parent_is_bds;
-
-    void (*inherit_options)(BdrvChildRole role, bool parent_is_format,
-                            int *child_flags, QDict *child_options,
-                            int parent_flags, QDict *parent_options);
-
-    void (*change_media)(BdrvChild *child, bool load);
-    void (*resize)(BdrvChild *child);
-
-    /* Returns a name that is supposedly more useful for human users than the
-     * node name for identifying the node in question (in particular, a BB
-     * name), or NULL if the parent can't provide a better name. */
-    const char *(*get_name)(BdrvChild *child);
-
-    /* Returns a malloced string that describes the parent of the child for a
-     * human reader. This could be a node-name, BlockBackend name, qdev ID or
-     * QOM path of the device owning the BlockBackend, job type and ID etc. The
-     * caller is responsible for freeing the memory. */
-    char *(*get_parent_desc)(BdrvChild *child);
-
-    /*
-     * If this pair of functions is implemented, the parent doesn't issue new
-     * requests after returning from .drained_begin() until .drained_end() is
-     * called.
-     *
-     * These functions must not change the graph (and therefore also must not
-     * call aio_poll(), which could change the graph indirectly).
-     *
-     * If drained_end() schedules background operations, it must atomically
-     * increment *drained_end_counter for each such operation and atomically
-     * decrement it once the operation has settled.
-     *
-     * Note that this can be nested. If drained_begin() was called twice, new
-     * I/O is allowed only after drained_end() was called twice, too.
-     */
-    void (*drained_begin)(BdrvChild *child);
-    void (*drained_end)(BdrvChild *child, int *drained_end_counter);
-
-    /*
-     * Returns whether the parent has pending requests for the child. This
-     * callback is polled after .drained_begin() has been called until all
-     * activity on the child has stopped.
-     */
-    bool (*drained_poll)(BdrvChild *child);
-
-    /* Notifies the parent that the child has been activated/inactivated (e.g.
-     * when migration is completing) and it can start/stop requesting
-     * permissions and doing I/O on it. */
-    void (*activate)(BdrvChild *child, Error **errp);
-    int (*inactivate)(BdrvChild *child);
-
-    void (*attach)(BdrvChild *child);
-    void (*detach)(BdrvChild *child);
-
-    /* Notifies the parent that the filename of its child has changed (e.g.
-     * because the direct child was removed from the backing chain), so that it
-     * can update its reference. */
-    int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
-                           const char *filename, Error **errp);
-
-    bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
-                            GSList **ignore, Error **errp);
-    void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
-};
-
-extern const BdrvChildClass child_of_bds;
-
-struct BdrvChild {
-    BlockDriverState *bs;
-    char *name;
-    const BdrvChildClass *klass;
-    BdrvChildRole role;
-    void *opaque;
-
-    /**
-     * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
-     */
-    uint64_t perm;
-
-    /**
-     * Permissions that can still be granted to other users of @bs while this
-     * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
-     */
-    uint64_t shared_perm;
-
-    /* backup of permissions during permission update procedure */
-    bool has_backup_perm;
-    uint64_t backup_perm;
-    uint64_t backup_shared_perm;
-
-    /*
-     * This link is frozen: the child can neither be replaced nor
-     * detached from the parent.
-     */
-    bool frozen;
-
-    /*
-     * How many times the parent of this child has been drained
-     * (through klass->drained_*).
-     * Usually, this is equal to bs->quiesce_counter (potentially
-     * reduced by bdrv_drain_all_count).  It may differ while the
-     * child is entering or leaving a drained section.
-     */
-    int parent_quiesce_counter;
-
-    QLIST_ENTRY(BdrvChild) next;
-    QLIST_ENTRY(BdrvChild) next_parent;
-};
-
-/*
- * Note: the function bdrv_append() copies and swaps contents of
- * BlockDriverStates, so if you add new fields to this struct, please
- * inspect bdrv_append() to determine if the new fields need to be
- * copied as well.
- */
-struct BlockDriverState {
-    /* Protected by big QEMU lock or read-only after opening.  No special
-     * locking needed during I/O...
-     */
-    int open_flags; /* flags used to open the file, re-used for re-open */
-    bool read_only; /* if true, the media is read only */
-    bool encrypted; /* if true, the media is encrypted */
-    bool sg;        /* if true, the device is a /dev/sg* */
-    bool probed;    /* if true, format was probed rather than specified */
-    bool force_share; /* if true, always allow all shared permissions */
-    bool implicit;  /* if true, this filter node was automatically inserted */
-
-    BlockDriver *drv; /* NULL means no media */
-    void *opaque;
-
-    AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
-    /* long-running tasks intended to always use the same AioContext as this
-     * BDS may register themselves in this list to be notified of changes
-     * regarding this BDS's context */
-    QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
-    bool walking_aio_notifiers; /* to make removal during iteration safe */
-
-    char filename[PATH_MAX];
-    /*
-     * If not empty, this image is a diff in relation to backing_file.
-     * Note that this is the name given in the image header and
-     * therefore may or may not be equal to .backing->bs->filename.
-     * If this field contains a relative path, it is to be resolved
-     * relatively to the overlay's location.
-     */
-    char backing_file[PATH_MAX];
-    /*
-     * The backing filename indicated by the image header.  Contrary
-     * to backing_file, if we ever open this file, auto_backing_file
-     * is replaced by the resulting BDS's filename (i.e. after a
-     * bdrv_refresh_filename() run).
-     */
-    char auto_backing_file[PATH_MAX];
-    char backing_format[16]; /* if non-zero and backing_file exists */
-
-    QDict *full_open_options;
-    char exact_filename[PATH_MAX];
-
-    BdrvChild *backing;
-    BdrvChild *file;
-
-    /* I/O Limits */
-    BlockLimits bl;
-
-    /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
-     * BDRV_REQ_WRITE_UNCHANGED).
-     * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
-     * writes will be issued as normal writes without the flag set.
-     * This is important to note for drivers that do not explicitly
-     * request a WRITE permission for their children and instead take
-     * the same permissions as their parent did (this is commonly what
-     * block filters do).  Such drivers have to be aware that the
-     * parent may have taken a WRITE_UNCHANGED permission only and is
-     * issuing such requests.  Drivers either must make sure that
-     * these requests do not result in plain WRITE accesses (usually
-     * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
-     * every incoming write request as-is, including potentially that
-     * flag), or they have to explicitly take the WRITE permission for
-     * their children. */
-    unsigned int supported_write_flags;
-    /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
-     * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
-    unsigned int supported_zero_flags;
-    /*
-     * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
-     *
-     * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
-     * that any added space reads as all zeros. If this can't be guaranteed,
-     * the operation must fail.
-     */
-    unsigned int supported_truncate_flags;
-
-    /* the following member gives a name to every node on the bs graph. */
-    char node_name[32];
-    /* element of the list of named nodes building the graph */
-    QTAILQ_ENTRY(BlockDriverState) node_list;
-    /* element of the list of all BlockDriverStates (all_bdrv_states) */
-    QTAILQ_ENTRY(BlockDriverState) bs_list;
-    /* element of the list of monitor-owned BDS */
-    QTAILQ_ENTRY(BlockDriverState) monitor_list;
-    int refcnt;
-
-    /* operation blockers */
-    QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
-
-    /* The node that this node inherited default options from (and a reopen on
-     * which can affect this node by changing these defaults). This is always a
-     * parent node of this node. */
-    BlockDriverState *inherits_from;
-    QLIST_HEAD(, BdrvChild) children;
-    QLIST_HEAD(, BdrvChild) parents;
-
-    QDict *options;
-    QDict *explicit_options;
-    BlockdevDetectZeroesOptions detect_zeroes;
-
-    /* The error object in use for blocking operations on backing_hd */
-    Error *backing_blocker;
-
-    /* Protected by AioContext lock */
-
-    /* If we are reading a disk image, give its size in sectors.
-     * Generally read-only; it is written to by load_snapshot and
-     * save_snaphost, but the block layer is quiescent during those.
-     */
-    int64_t total_sectors;
-
-    /* Callback before write request is processed */
-    NotifierWithReturnList before_write_notifiers;
-
-    /* threshold limit for writes, in bytes. "High water mark". */
-    uint64_t write_threshold_offset;
-    NotifierWithReturn write_threshold_notifier;
-
-    /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
-     * Reading from the list can be done with either the BQL or the
-     * dirty_bitmap_mutex.  Modifying a bitmap only requires
-     * dirty_bitmap_mutex.  */
-    QemuMutex dirty_bitmap_mutex;
-    QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
-
-    /* Offset after the highest byte written to */
-    Stat64 wr_highest_offset;
-
-    /* If true, copy read backing sectors into image.  Can be >1 if more
-     * than one client has requested copy-on-read.  Accessed with atomic
-     * ops.
-     */
-    int copy_on_read;
-
-    /* number of in-flight requests; overall and serialising.
-     * Accessed with atomic ops.
-     */
-    unsigned int in_flight;
-    unsigned int serialising_in_flight;
-
-    /* counter for nested bdrv_io_plug.
-     * Accessed with atomic ops.
-    */
-    unsigned io_plugged;
-
-    /* do we need to tell the quest if we have a volatile write cache? */
-    int enable_write_cache;
-
-    /* Accessed with atomic ops.  */
-    int quiesce_counter;
-    int recursive_quiesce_counter;
-
-    unsigned int write_gen;               /* Current data generation */
-
-    /* Protected by reqs_lock.  */
-    CoMutex reqs_lock;
-    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
-    CoQueue flush_queue;                  /* Serializing flush queue */
-    bool active_flush_req;                /* Flush request in flight? */
-
-    /* Only read/written by whoever has set active_flush_req to true.  */
-    unsigned int flushed_gen;             /* Flushed write generation */
-
-    /* BdrvChild links to this node may never be frozen */
-    bool never_freeze;
-};
-
-struct BlockBackendRootState {
-    int open_flags;
-    bool read_only;
-    BlockdevDetectZeroesOptions detect_zeroes;
-};
-
-typedef enum BlockMirrorBackingMode {
-    /* Reuse the existing backing chain from the source for the target.
-     * - sync=full: Set backing BDS to NULL.
-     * - sync=top:  Use source's backing BDS.
-     * - sync=none: Use source as the backing BDS. */
-    MIRROR_SOURCE_BACKING_CHAIN,
-
-    /* Open the target's backing chain completely anew */
-    MIRROR_OPEN_BACKING_CHAIN,
-
-    /* Do not change the target's backing BDS after job completion */
-    MIRROR_LEAVE_BACKING_CHAIN,
-} BlockMirrorBackingMode;
-
-
-/* Essential block drivers which must always be statically linked into qemu, and
- * which therefore can be accessed without using bdrv_find_format() */
-extern BlockDriver bdrv_file;
-extern BlockDriver bdrv_raw;
-extern BlockDriver bdrv_qcow2;
-
-int coroutine_fn bdrv_co_preadv(BdrvChild *child,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
-    BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
-    int64_t offset, unsigned int bytes,
-    QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
-    BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
-    int64_t offset, unsigned int bytes,
-    QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
-
-static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
-    int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
-{
-    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
-    return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
-}
-
-static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
-    int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
-{
-    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
-    return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
-}
-
-extern unsigned int bdrv_drain_all_count;
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
-
-bool coroutine_fn bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align);
-BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
-
-int get_tmp_filename(char *filename, int size);
-BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
-                            const char *filename);
-
-void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
-                                      QDict *options);
-
-bool bdrv_backing_overridden(BlockDriverState *bs);
-
-
-/**
- * bdrv_add_before_write_notifier:
- *
- * Register a callback that is invoked before write requests are processed but
- * after any throttling or waiting for overlapping requests.
- */
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
-                                    NotifierWithReturn *notifier);
-
-/**
- * bdrv_add_aio_context_notifier:
- *
- * If a long-running job intends to be always run in the same AioContext as a
- * certain BDS, it may use this function to be notified of changes regarding the
- * association of the BDS to an AioContext.
- *
- * attached_aio_context() is called after the target BDS has been attached to a
- * new AioContext; detach_aio_context() is called before the target BDS is being
- * detached from its old AioContext.
- */
-void bdrv_add_aio_context_notifier(BlockDriverState *bs,
-        void (*attached_aio_context)(AioContext *new_context, void *opaque),
-        void (*detach_aio_context)(void *opaque), void *opaque);
-
-/**
- * bdrv_remove_aio_context_notifier:
- *
- * Unsubscribe of change notifications regarding the BDS's AioContext. The
- * parameters given here have to be the same as those given to
- * bdrv_add_aio_context_notifier().
- */
-void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
-                                      void (*aio_context_attached)(AioContext *,
-                                                                   void *),
-                                      void (*aio_context_detached)(void *),
-                                      void *opaque);
-
-/**
- * bdrv_wakeup:
- * @bs: The BlockDriverState for which an I/O operation has been completed.
- *
- * Wake up the main thread if it is waiting on BDRV_POLL_WHILE.  During
- * synchronous I/O on a BlockDriverState that is attached to another
- * I/O thread, the main thread lets the I/O thread's event loop run,
- * waiting for the I/O operation to complete.  A bdrv_wakeup will wake
- * up the main thread if necessary.
- *
- * Manual calls to bdrv_wakeup are rarely necessary, because
- * bdrv_dec_in_flight already calls it.
- */
-void bdrv_wakeup(BlockDriverState *bs);
-
-#ifdef _WIN32
-int is_windows_drive(const char *filename);
-#endif
-
-/**
- * stream_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @base: Block device that will become the new base, or %NULL to
- * flatten the whole backing file chain onto @bs.
- * @backing_file_str: The file name that will be written to @bs as the
- * the new backing file if the job completes. Ignored if @base is %NULL.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- *                  See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @errp: Error object.
- *
- * Start a streaming operation on @bs.  Clusters that are unallocated
- * in @bs, but allocated in any image between @base and @bs (both
- * exclusive) will be written to @bs.  At the end of a successful
- * streaming job, the backing file of @bs will be changed to
- * @backing_file_str in the written image and to @base in the live
- * BlockDriverState.
- */
-void stream_start(const char *job_id, BlockDriverState *bs,
-                  BlockDriverState *base, const char *backing_file_str,
-                  int creation_flags, int64_t speed,
-                  BlockdevOnError on_error, Error **errp);
-
-/**
- * commit_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Active block device.
- * @top: Top block device to be committed.
- * @base: Block device that will be written into, and become the new top.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- *                  See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @backing_file_str: String to use as the backing file in @top's overlay
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the commit job inserts into the graph above @top. NULL means
- * that a node name should be autogenerated.
- * @errp: Error object.
- *
- */
-void commit_start(const char *job_id, BlockDriverState *bs,
-                  BlockDriverState *base, BlockDriverState *top,
-                  int creation_flags, int64_t speed,
-                  BlockdevOnError on_error, const char *backing_file_str,
-                  const char *filter_node_name, Error **errp);
-/**
- * commit_active_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Active block device to be committed.
- * @base: Block device that will be written into, and become the new top.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- *                  See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the commit job inserts into the graph above @bs. NULL means that
- * a node name should be autogenerated.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @auto_complete: Auto complete the job.
- * @errp: Error object.
- *
- */
-BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
-                              BlockDriverState *base, int creation_flags,
-                              int64_t speed, BlockdevOnError on_error,
-                              const char *filter_node_name,
-                              BlockCompletionFunc *cb, void *opaque,
-                              bool auto_complete, Error **errp);
-/*
- * mirror_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @replaces: Block graph node name to replace once the mirror is done. Can
- *            only be used when full mirroring is selected.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- *                  See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @granularity: The chosen granularity for the dirty bitmap.
- * @buf_size: The amount of data that can be in flight at one time.
- * @mode: Whether to collapse all images in the chain to the target.
- * @backing_mode: How to establish the target's backing chain after completion.
- * @zero_target: Whether the target should be explicitly zero-initialized
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @unmap: Whether to unmap target where source sectors only contain zeroes.
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the mirror job inserts into the graph above @bs. NULL means that
- * a node name should be autogenerated.
- * @copy_mode: When to trigger writes to the target.
- * @errp: Error object.
- *
- * Start a mirroring operation on @bs.  Clusters that are allocated
- * in @bs will be written to @target until the job is cancelled or
- * manually completed.  At the end of a successful mirroring job,
- * @bs will be switched to read from @target.
- */
-void mirror_start(const char *job_id, BlockDriverState *bs,
-                  BlockDriverState *target, const char *replaces,
-                  int creation_flags, int64_t speed,
-                  uint32_t granularity, int64_t buf_size,
-                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
-                  bool zero_target,
-                  BlockdevOnError on_source_error,
-                  BlockdevOnError on_target_error,
-                  bool unmap, const char *filter_node_name,
-                  MirrorCopyMode copy_mode, Error **errp);
-
-/*
- * backup_job_create:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @sync_mode: What parts of the disk image should be copied to the destination.
- * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental'
- * @bitmap_mode: The bitmap synchronization policy to use.
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- *                  See @BlockJobCreateFlags
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @txn: Transaction that this job is part of (may be NULL).
- *
- * Create a backup operation on @bs.  Clusters in @bs are written to @target
- * until the job is cancelled or manually completed.
- */
-BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
-                            BlockDriverState *target, int64_t speed,
-                            MirrorSyncMode sync_mode,
-                            BdrvDirtyBitmap *sync_bitmap,
-                            BitmapSyncMode bitmap_mode,
-                            bool compress,
-                            const char *filter_node_name,
-                            BlockdevOnError on_source_error,
-                            BlockdevOnError on_target_error,
-                            int creation_flags,
-                            BlockCompletionFunc *cb, void *opaque,
-                            JobTxn *txn, Error **errp);
-
-BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
-                                  const char *child_name,
-                                  const BdrvChildClass *child_class,
-                                  BdrvChildRole child_role,
-                                  AioContext *ctx,
-                                  uint64_t perm, uint64_t shared_perm,
-                                  void *opaque, Error **errp);
-void bdrv_root_unref_child(BdrvChild *child);
-
-void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
-                              uint64_t *shared_perm);
-
-/**
- * Sets a BdrvChild's permissions.  Avoid if the parent is a BDS; use
- * bdrv_child_refresh_perms() instead and make the parent's
- * .bdrv_child_perm() implementation return the correct values.
- */
-int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
-                            Error **errp);
-
-/**
- * Calls bs->drv->bdrv_child_perm() and updates the child's permission
- * masks with the result.
- * Drivers should invoke this function whenever an event occurs that
- * makes their .bdrv_child_perm() implementation return different
- * values than before, but which will not result in the block layer
- * automatically refreshing the permissions.
- */
-int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp);
-
-bool bdrv_recurse_can_replace(BlockDriverState *bs,
-                              BlockDriverState *to_replace);
-
-/*
- * Default implementation for BlockDriver.bdrv_child_perm() that can
- * be used by block filters and image formats, as long as they use the
- * child_of_bds child class and set an appropriate BdrvChildRole.
- */
-void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
-                        BdrvChildRole role, BlockReopenQueue *reopen_queue,
-                        uint64_t perm, uint64_t shared,
-                        uint64_t *nperm, uint64_t *nshared);
-
-const char *bdrv_get_parent_name(const BlockDriverState *bs);
-void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
-bool blk_dev_has_removable_media(BlockBackend *blk);
-bool blk_dev_has_tray(BlockBackend *blk);
-void blk_dev_eject_request(BlockBackend *blk, bool force);
-bool blk_dev_is_tray_open(BlockBackend *blk);
-bool blk_dev_is_medium_locked(BlockBackend *blk);
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
-
-void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
-void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
-bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
-                                      const BdrvDirtyBitmap *src,
-                                      HBitmap **backup, bool lock);
-
-void bdrv_inc_in_flight(BlockDriverState *bs);
-void bdrv_dec_in_flight(BlockDriverState *bs);
-
-void blockdev_close_all_bdrv_states(void);
-
-int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
-                                         BdrvChild *dst, uint64_t dst_offset,
-                                         uint64_t bytes,
-                                         BdrvRequestFlags read_flags,
-                                         BdrvRequestFlags write_flags);
-int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
-                                       BdrvChild *dst, uint64_t dst_offset,
-                                       uint64_t bytes,
-                                       BdrvRequestFlags read_flags,
-                                       BdrvRequestFlags write_flags);
-
-int refresh_total_sectors(BlockDriverState *bs, int64_t hint);
-
-void bdrv_set_monitor_owned(BlockDriverState *bs);
-BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp);
-
-/**
- * Simple implementation of bdrv_co_create_opts for protocol drivers
- * which only support creation via opening a file
- * (usually existing raw storage device)
- */
-int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
-                                            const char *filename,
-                                            QemuOpts *opts,
-                                            Error **errp);
-extern QemuOptsList bdrv_create_opts_simple;
-
-BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
-                                           const char *name,
-                                           BlockDriverState **pbs,
-                                           Error **errp);
-BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
-                                          BlockDirtyBitmapMergeSourceList *bms,
-                                          HBitmap **backup, Error **errp);
-BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
-                                           bool release,
-                                           BlockDriverState **bitmap_bs,
-                                           Error **errp);
-
-BdrvChild *bdrv_cow_child(BlockDriverState *bs);
-BdrvChild *bdrv_filter_child(BlockDriverState *bs);
-BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs);
-BdrvChild *bdrv_primary_child(BlockDriverState *bs);
-BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs);
-BlockDriverState *bdrv_skip_filters(BlockDriverState *bs);
-BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs);
-
-static inline BlockDriverState *child_bs(BdrvChild *child)
-{
-    return child ? child->bs : NULL;
-}
-
-static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs)
-{
-    return child_bs(bdrv_cow_child(bs));
-}
-
-static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs)
-{
-    return child_bs(bdrv_filter_child(bs));
-}
-
-static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs)
-{
-    return child_bs(bdrv_filter_or_cow_child(bs));
-}
-
-static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
-{
-    return child_bs(bdrv_primary_child(bs));
-}
-
-/**
- * End all quiescent sections started by bdrv_drain_all_begin(). This is
- * needed when deleting a BDS before bdrv_drain_all_end() is called.
- *
- * NOTE: this is an internal helper for bdrv_close() *only*. No one else
- * should call it.
- */
-void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
 
 #endif /* BLOCK_INT_H */
index 35faa3aa2687e2975fe54bd3b1a92882df375fee..e594c10d2319982ef4bfbf584f08be3aada39dc0 100644 (file)
@@ -26,8 +26,8 @@
 #ifndef BLOCKJOB_H
 #define BLOCKJOB_H
 
+#include "qapi/qapi-types-block-core.h"
 #include "qemu/job.h"
-#include "block/block.h"
 #include "qemu/ratelimit.h"
 
 #define BLOCK_JOB_SLICE_TIME 100000000ULL /* ns */
@@ -40,24 +40,38 @@ typedef struct BlockJobDriver BlockJobDriver;
  * Long-running operation on a BlockDriverState.
  */
 typedef struct BlockJob {
-    /** Data belonging to the generic Job infrastructure */
+    /**
+     * Data belonging to the generic Job infrastructure.
+     * Protected by job mutex.
+     */
     Job job;
 
-    /** The block device on which the job is operating.  */
-    BlockBackend *blk;
-
-    /** Status that is published by the query-block-jobs QMP API */
+    /**
+     * Status that is published by the query-block-jobs QMP API.
+     * Protected by job mutex.
+     */
     BlockDeviceIoStatus iostatus;
 
-    /** Speed that was set with @block_job_set_speed.  */
+    /**
+     * Speed that was set with @block_job_set_speed.
+     * Always modified and read under QEMU global mutex (GLOBAL_STATE_CODE).
+     */
     int64_t speed;
 
-    /** Rate limiting data structure for implementing @speed. */
+    /**
+     * Rate limiting data structure for implementing @speed.
+     * RateLimit API is thread-safe.
+     */
     RateLimit limit;
 
-    /** Block other operations when block job is running */
+    /**
+     * Block other operations when block job is running.
+     * Always modified and read under QEMU global mutex (GLOBAL_STATE_CODE).
+     */
     Error *blocker;
 
+    /** All notifiers are set once in block_job_create() and never modified. */
+
     /** Called when a cancelled job is finalised. */
     Notifier finalize_cancelled_notifier;
 
@@ -73,20 +87,31 @@ typedef struct BlockJob {
     /** Called when the job coroutine yields or terminates */
     Notifier idle_notifier;
 
-    /** BlockDriverStates that are involved in this block job */
+    /**
+     * BlockDriverStates that are involved in this block job.
+     * Always modified and read under QEMU global mutex (GLOBAL_STATE_CODE).
+     */
     GSList *nodes;
 } BlockJob;
 
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
 /**
- * block_job_next:
+ * block_job_next_locked:
  * @job: A block job, or %NULL.
  *
  * Get the next element from the list of block jobs after @job, or the
  * first one if @job is %NULL.
  *
  * Returns the requested job, or %NULL if there are no more jobs left.
+ * Called with job lock held.
  */
-BlockJob *block_job_next(BlockJob *job);
+BlockJob *block_job_next_locked(BlockJob *job);
 
 /**
  * block_job_get:
@@ -95,9 +120,13 @@ BlockJob *block_job_next(BlockJob *job);
  * Get the block job identified by @id (which must not be %NULL).
  *
  * Returns the requested job, or %NULL if it doesn't exist.
+ * Called with job lock *not* held.
  */
 BlockJob *block_job_get(const char *id);
 
+/* Same as block_job_get(), but called with job lock held. */
+BlockJob *block_job_get_locked(const char *id);
+
 /**
  * block_job_add_bdrv:
  * @job: A block job
@@ -109,8 +138,9 @@ BlockJob *block_job_get(const char *id);
  * @job. This means that all operations will be blocked on @bs while
  * @job exists.
  */
-int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
-                       uint64_t perm, uint64_t shared_perm, Error **errp);
+int GRAPH_WRLOCK
+block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
+                   uint64_t perm, uint64_t shared_perm, Error **errp);
 
 /**
  * block_job_remove_all_bdrv:
@@ -131,32 +161,64 @@ void block_job_remove_all_bdrv(BlockJob *job);
 bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs);
 
 /**
- * block_job_set_speed:
+ * block_job_set_speed_locked:
  * @job: The job to set the speed for.
  * @speed: The new value
  * @errp: Error object.
  *
  * Set a rate-limiting parameter for the job; the actual meaning may
  * vary depending on the job type.
+ *
+ * Called with job lock held, but might release it temporarily.
+ */
+bool block_job_set_speed_locked(BlockJob *job, int64_t speed, Error **errp);
+
+/**
+ * block_job_change_locked:
+ * @job: The job to change.
+ * @opts: The new options.
+ * @errp: Error object.
+ *
+ * Change the job according to opts.
  */
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
+void block_job_change_locked(BlockJob *job, BlockJobChangeOptions *opts,
+                             Error **errp);
 
 /**
- * block_job_query:
+ * block_job_query_locked:
  * @job: The job to get information about.
  *
  * Return information about a job.
+ *
+ * Called with job lock held.
  */
-BlockJobInfo *block_job_query(BlockJob *job, Error **errp);
+BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp);
 
 /**
- * block_job_iostatus_reset:
+ * block_job_iostatus_reset_locked:
  * @job: The job whose I/O status should be reset.
  *
  * Reset I/O status on @job and on BlockDriverState objects it uses,
  * other than job->blk.
+ *
+ * Called with job lock held.
+ */
+void block_job_iostatus_reset_locked(BlockJob *job);
+
+/*
+ * block_job_get_aio_context:
+ *
+ * Returns aio context associated with a block job.
+ */
+AioContext *block_job_get_aio_context(BlockJob *job);
+
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * See include/block/block-common.h for more information about
+ * the Common API.
  */
-void block_job_iostatus_reset(BlockJob *job);
 
 /**
  * block_job_is_internal:
index e2824a36a842205c7dd8a3e3bc146b8058636376..4c3d2e25a209b578cf292fc205dbc202c88f901a 100644 (file)
@@ -27,7 +27,6 @@
 #define BLOCKJOB_INT_H
 
 #include "block/blockjob.h"
-#include "block/block.h"
 
 /**
  * BlockJobDriver:
@@ -38,6 +37,13 @@ struct BlockJobDriver {
     /** Generic JobDriver callbacks and settings */
     JobDriver job_driver;
 
+    /*
+     * I/O API functions. These functions are thread-safe.
+     *
+     * See include/block/block-io.h for more information about
+     * the I/O API.
+     */
+
     /*
      * Returns whether the job has pending requests for the child or will
      * submit new requests before the next pause point. This callback is polled
@@ -46,14 +52,42 @@ struct BlockJobDriver {
      */
     bool (*drained_poll)(BlockJob *job);
 
+    /*
+     * Global state (GS) API. These functions run under the BQL.
+     *
+     * See include/block/block-global-state.h for more information about
+     * the GS API.
+     */
+
     /*
      * If the callback is not NULL, it will be invoked before the job is
      * resumed in a new AioContext.  This is the place to move any resources
      * besides job->blk to the new AioContext.
      */
     void (*attached_aio_context)(BlockJob *job, AioContext *new_context);
+
+    void (*set_speed)(BlockJob *job, int64_t speed);
+
+    /*
+     * Change the @job's options according to @opts.
+     *
+     * Note that this can already be called before the job coroutine is running.
+     */
+    void (*change)(BlockJob *job, BlockJobChangeOptions *opts, Error **errp);
+
+    /*
+     * Query information specific to this kind of block job.
+     */
+    void (*query)(BlockJob *job, BlockJobInfo *info);
 };
 
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
 /**
  * block_job_create:
  * @job_id: The id of the newly-created job, or %NULL to have one
@@ -77,10 +111,11 @@ struct BlockJobDriver {
  * This function is not part of the public job interface; it should be
  * called from a wrapper that is specific to the job type.
  */
-void *block_job_create(const char *job_id, const BlockJobDriver *driver,
-                       JobTxn *txn, BlockDriverState *bs, uint64_t perm,
-                       uint64_t shared_perm, int64_t speed, int flags,
-                       BlockCompletionFunc *cb, void *opaque, Error **errp);
+void * GRAPH_UNLOCKED
+block_job_create(const char *job_id, const BlockJobDriver *driver,
+                 JobTxn *txn, BlockDriverState *bs, uint64_t perm,
+                 uint64_t shared_perm, int64_t speed, int flags,
+                 BlockCompletionFunc *cb, void *opaque, Error **errp);
 
 /**
  * block_job_free:
@@ -96,13 +131,26 @@ void block_job_free(Job *job);
  */
 void block_job_user_resume(Job *job);
 
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
 /**
- * block_job_ratelimit_get_delay:
+ * block_job_ratelimit_processed_bytes:
  *
- * Calculate and return delay for the next request in ns. See the documentation
- * of ratelimit_calculate_delay() for details.
+ * To be called after some work has been done. Adjusts the delay for the next
+ * request. See the documentation of ratelimit_calculate_delay() for details.
+ */
+void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n);
+
+/**
+ * Put the job to sleep (assuming that it wasn't canceled) to throttle it to the
+ * right speed according to its rate limiting.
  */
-int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n);
+void block_job_ratelimit_sleep(BlockJob *job);
 
 /**
  * block_job_error_action:
index 36e8da4fc2e34b6a3c2dda5f545ec260c5206060..fa956debfb2883e721142fafe5c2f1941fca621c 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef BLOCK_DIRTY_BITMAP_H
 #define BLOCK_DIRTY_BITMAP_H
 
+#include "block/block-common.h"
 #include "qapi/qapi-types-block-core.h"
 #include "qemu/hbitmap.h"
 
@@ -34,8 +35,14 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags,
                             Error **errp);
 void bdrv_release_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
-int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
-                                        Error **errp);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                       Error **errp);
+int co_wrapper_bdrv_rdlock
+bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                    Error **errp);
+
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_enable_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap);
@@ -46,7 +53,6 @@ bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_has_successor(BdrvDirtyBitmap *bitmap);
 const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
-DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                            int64_t offset, int64_t bytes);
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
@@ -57,6 +63,8 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter);
 uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
                                               uint64_t offset, uint64_t bytes);
 uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap);
+uint64_t bdrv_dirty_bitmap_serialization_coverage(int serialized_chunk_size,
+        const BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
                                       uint8_t *buf, uint64_t offset,
                                       uint64_t bytes);
@@ -76,7 +84,7 @@ void bdrv_dirty_bitmap_set_persistence(BdrvDirtyBitmap *bitmap,
                                        bool persistent);
 void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_set_busy(BdrvDirtyBitmap *bitmap, bool busy);
-void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
+bool bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
                              HBitmap **backup, Error **errp);
 void bdrv_dirty_bitmap_skip_store(BdrvDirtyBitmap *bitmap, bool skip);
 bool bdrv_dirty_bitmap_get(BdrvDirtyBitmap *bitmap, int64_t offset);
@@ -114,6 +122,8 @@ int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, int64_t offset,
 bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap,
         int64_t start, int64_t end, int64_t max_dirty_count,
         int64_t *dirty_start, int64_t *dirty_count);
+bool bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap, int64_t offset,
+                              int64_t bytes, int64_t *count);
 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
                                                   Error **errp);
 
index 7feb02e10d471806a17837d8e602e7616bd2a590..f2fe0f8078f32c6b4719d8d003044ebadc2993e2 100644 (file)
@@ -57,6 +57,8 @@ struct BlockExport {
      * Reference count for this block export. This includes strong references
      * both from the owner (qemu-nbd or the monitor) and clients connected to
      * the export.
+     *
+     * Use atomics to access this field.
      */
     int refcount;
 
diff --git a/include/block/fuse.h b/include/block/fuse.h
new file mode 100644 (file)
index 0000000..ffa91fe
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Present a block device as a raw image through FUSE
+ *
+ * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 or later of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef BLOCK_FUSE_H
+#define BLOCK_FUSE_H
+
+#ifdef CONFIG_FUSE
+
+#include "block/export.h"
+
+extern const BlockExportDriver blk_exp_fuse;
+
+#endif /* CONFIG_FUSE */
+
+#endif
diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h
new file mode 100644 (file)
index 0000000..22b5db1
--- /dev/null
@@ -0,0 +1,295 @@
+/*
+ * Graph lock: rwlock to protect block layer graph manipulations (add/remove
+ * edges and nodes)
+ *
+ *  Copyright (c) 2022 Red Hat
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef GRAPH_LOCK_H
+#define GRAPH_LOCK_H
+
+#include "qemu/clang-tsa.h"
+
+/**
+ * Graph Lock API
+ * This API provides a rwlock used to protect block layer
+ * graph modifications like edge (BdrvChild) and node (BlockDriverState)
+ * addition and removal.
+ * Currently we have 1 writer only, the Main loop, and many
+ * readers, mostly coroutines running in other AioContext thus other threads.
+ *
+ * We distinguish between writer (main loop, under BQL) that modifies the
+ * graph, and readers (all other coroutines running in various AioContext),
+ * that go through the graph edges, reading
+ * BlockDriverState ->parents and->children.
+ *
+ * The writer (main loop)  has an "exclusive" access, so it first waits for
+ * current read to finish, and then prevents incoming ones from
+ * entering while it has the exclusive access.
+ *
+ * The readers (coroutines in multiple AioContext) are free to
+ * access the graph as long the writer is not modifying the graph.
+ * In case it is, they go in a CoQueue and sleep until the writer
+ * is done.
+ *
+ * If a coroutine changes AioContext, the counter in the original and new
+ * AioContext are left intact, since the writer does not care where is the
+ * reader, but only if there is one.
+ * As a result, some AioContexts might have a negative reader count, to
+ * balance the positive count of the AioContext that took the lock.
+ * This also means that when an AioContext is deleted it may have a nonzero
+ * reader count. In that case we transfer the count to a global shared counter
+ * so that the writer is always aware of all readers.
+ */
+typedef struct BdrvGraphRWlock BdrvGraphRWlock;
+
+/* Dummy lock object to use for Thread Safety Analysis (TSA) */
+typedef struct TSA_CAPABILITY("mutex") BdrvGraphLock {
+} BdrvGraphLock;
+
+extern BdrvGraphLock graph_lock;
+
+/*
+ * clang doesn't check consistency in locking annotations between forward
+ * declarations and the function definition. Having the annotation on the
+ * definition, but not the declaration in a header file, may give the reader
+ * a false sense of security because the condition actually remains unchecked
+ * for callers in other source files.
+ *
+ * Therefore, as a convention, for public functions, GRAPH_RDLOCK and
+ * GRAPH_WRLOCK annotations should be present only in the header file.
+ */
+#define GRAPH_WRLOCK TSA_REQUIRES(graph_lock)
+#define GRAPH_RDLOCK TSA_REQUIRES_SHARED(graph_lock)
+#define GRAPH_UNLOCKED TSA_EXCLUDES(graph_lock)
+
+/*
+ * TSA annotations are not part of function types, so checks are defeated when
+ * using a function pointer. As a workaround, annotate function pointers with
+ * this macro that will require that the lock is at least taken while reading
+ * the pointer. In most cases this is equivalent to actually protecting the
+ * function call.
+ */
+#define GRAPH_RDLOCK_PTR TSA_GUARDED_BY(graph_lock)
+#define GRAPH_WRLOCK_PTR TSA_GUARDED_BY(graph_lock)
+#define GRAPH_UNLOCKED_PTR
+
+/*
+ * register_aiocontext:
+ * Add AioContext @ctx to the list of AioContext.
+ * This list is used to obtain the total number of readers
+ * currently running the graph.
+ */
+void register_aiocontext(AioContext *ctx);
+
+/*
+ * unregister_aiocontext:
+ * Removes AioContext @ctx to the list of AioContext.
+ */
+void unregister_aiocontext(AioContext *ctx);
+
+/*
+ * bdrv_graph_wrlock:
+ * Start an exclusive write operation to modify the graph. This means we are
+ * adding or removing an edge or a node in the block layer graph. Nobody else
+ * is allowed to access the graph.
+ *
+ * Must only be called from outside bdrv_graph_co_rdlock.
+ *
+ * The wrlock can only be taken from the main loop, with BQL held, as only the
+ * main loop is allowed to modify the graph.
+ *
+ * If @bs is non-NULL, its AioContext is temporarily released.
+ *
+ * This function polls. Callers must not hold the lock of any AioContext other
+ * than the current one and the one of @bs.
+ */
+void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA
+bdrv_graph_wrlock(BlockDriverState *bs);
+
+/*
+ * bdrv_graph_wrunlock:
+ * Write finished, reset global has_writer to 0 and restart
+ * all readers that are waiting.
+ *
+ * If @bs is non-NULL, its AioContext is temporarily released.
+ */
+void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA
+bdrv_graph_wrunlock(BlockDriverState *bs);
+
+/*
+ * bdrv_graph_wrunlock_ctx:
+ * Write finished, reset global has_writer to 0 and restart
+ * all readers that are waiting.
+ *
+ * If @ctx is non-NULL, its lock is temporarily released.
+ */
+void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA
+bdrv_graph_wrunlock_ctx(AioContext *ctx);
+
+/*
+ * bdrv_graph_co_rdlock:
+ * Read the bs graph. This usually means traversing all nodes in
+ * the graph, therefore it can't happen while another thread is
+ * modifying it.
+ * Increases the reader counter of the current aiocontext,
+ * and if has_writer is set, it means that the writer is modifying
+ * the graph, therefore wait in a coroutine queue.
+ * The writer will then wake this coroutine once it is done.
+ *
+ * This lock should be taken from Iothreads (IO_CODE() class of functions)
+ * because it signals the writer that there are some
+ * readers currently running, or waits until the current
+ * write is finished before continuing.
+ * Calling this function from the Main Loop with BQL held
+ * is not necessary, since the Main Loop itself is the only
+ * writer, thus won't be able to read and write at the same time.
+ * The only exception to that is when we can't take the lock in the
+ * function/coroutine itself, and need to delegate the caller (usually main
+ * loop) to take it and wait that the coroutine ends, so that
+ * we always signal that a reader is running.
+ */
+void coroutine_fn TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA
+bdrv_graph_co_rdlock(void);
+
+/*
+ * bdrv_graph_rdunlock:
+ * Read terminated, decrease the count of readers in the current aiocontext.
+ * If the writer is waiting for reads to finish (has_writer == 1), signal
+ * the writer that we are done via aio_wait_kick() to let it continue.
+ */
+void coroutine_fn TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA
+bdrv_graph_co_rdunlock(void);
+
+/*
+ * bdrv_graph_rd{un}lock_main_loop:
+ * Just a placeholder to mark where the graph rdlock should be taken
+ * in the main loop. It is just asserting that we are not
+ * in a coroutine and in GLOBAL_STATE_CODE.
+ */
+void TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA
+bdrv_graph_rdlock_main_loop(void);
+
+void TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA
+bdrv_graph_rdunlock_main_loop(void);
+
+/*
+ * assert_bdrv_graph_readable:
+ * Make sure that the reader is either the main loop,
+ * or there is at least a reader helding the rdlock.
+ * In this way an incoming writer is aware of the read and waits.
+ */
+void GRAPH_RDLOCK assert_bdrv_graph_readable(void);
+
+/*
+ * assert_bdrv_graph_writable:
+ * Make sure that the writer is the main loop and has set @has_writer,
+ * so that incoming readers will pause.
+ */
+void GRAPH_WRLOCK assert_bdrv_graph_writable(void);
+
+/*
+ * Calling this function tells TSA that we know that the lock is effectively
+ * taken even though we cannot prove it (yet) with GRAPH_RDLOCK. This can be
+ * useful in intermediate stages of a conversion to using the GRAPH_RDLOCK
+ * macro.
+ */
+static inline void TSA_ASSERT_SHARED(graph_lock) TSA_NO_TSA
+assume_graph_lock(void)
+{
+}
+
+typedef struct GraphLockable { } GraphLockable;
+
+/*
+ * In C, compound literals have the lifetime of an automatic variable.
+ * In C++ it would be different, but then C++ wouldn't need QemuLockable
+ * either...
+ */
+#define GML_OBJ_() (&(GraphLockable) { })
+
+/*
+ * This is not marked as TSA_ACQUIRE_SHARED() because TSA doesn't understand the
+ * cleanup attribute and would therefore complain that the graph is never
+ * unlocked. TSA_ASSERT_SHARED() makes sure that the following calls know that
+ * we hold the lock while unlocking is left unchecked.
+ */
+static inline GraphLockable * TSA_ASSERT_SHARED(graph_lock) TSA_NO_TSA coroutine_fn
+graph_lockable_auto_lock(GraphLockable *x)
+{
+    bdrv_graph_co_rdlock();
+    return x;
+}
+
+static inline void TSA_NO_TSA coroutine_fn
+graph_lockable_auto_unlock(GraphLockable *x)
+{
+    bdrv_graph_co_rdunlock();
+}
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(GraphLockable, graph_lockable_auto_unlock)
+
+#define WITH_GRAPH_RDLOCK_GUARD_(var)                                         \
+    for (g_autoptr(GraphLockable) var = graph_lockable_auto_lock(GML_OBJ_()); \
+         var;                                                                 \
+         graph_lockable_auto_unlock(var), var = NULL)
+
+#define WITH_GRAPH_RDLOCK_GUARD() \
+    WITH_GRAPH_RDLOCK_GUARD_(glue(graph_lockable_auto, __COUNTER__))
+
+#define GRAPH_RDLOCK_GUARD(x)                                       \
+    g_autoptr(GraphLockable)                                        \
+    glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED =          \
+            graph_lockable_auto_lock(GML_OBJ_())
+
+
+typedef struct GraphLockableMainloop { } GraphLockableMainloop;
+
+/*
+ * In C, compound literals have the lifetime of an automatic variable.
+ * In C++ it would be different, but then C++ wouldn't need QemuLockable
+ * either...
+ */
+#define GMLML_OBJ_() (&(GraphLockableMainloop) { })
+
+/*
+ * This is not marked as TSA_ACQUIRE_SHARED() because TSA doesn't understand the
+ * cleanup attribute and would therefore complain that the graph is never
+ * unlocked. TSA_ASSERT_SHARED() makes sure that the following calls know that
+ * we hold the lock while unlocking is left unchecked.
+ */
+static inline GraphLockableMainloop * TSA_ASSERT_SHARED(graph_lock) TSA_NO_TSA
+graph_lockable_auto_lock_mainloop(GraphLockableMainloop *x)
+{
+    bdrv_graph_rdlock_main_loop();
+    return x;
+}
+
+static inline void TSA_NO_TSA
+graph_lockable_auto_unlock_mainloop(GraphLockableMainloop *x)
+{
+    bdrv_graph_rdunlock_main_loop();
+}
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(GraphLockableMainloop,
+                              graph_lockable_auto_unlock_mainloop)
+
+#define GRAPH_RDLOCK_GUARD_MAINLOOP(x)                              \
+    g_autoptr(GraphLockableMainloop)                                \
+    glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED =          \
+            graph_lockable_auto_lock_mainloop(GMLML_OBJ_())
+
+#endif /* GRAPH_LOCK_H */
+
index 4a52a43ef59882c8c31a06f1400212aa2722d155..4e7bd6342f9cc39378e913cb47969331ca462ee0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2016-2020 Red Hat, Inc.
+ *  Copyright Red Hat
  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
  *
  *  Network Block Device
 #include "io/channel-socket.h"
 #include "crypto/tlscreds.h"
 #include "qapi/error.h"
+#include "qemu/bswap.h"
+
+typedef struct NBDExport NBDExport;
+typedef struct NBDClient NBDClient;
+typedef struct NBDClientConnection NBDClientConnection;
+typedef struct NBDMetaContexts NBDMetaContexts;
 
 extern const BlockExportDriver blk_exp_nbd;
 
 /* Handshake phase structs - this struct is passed on the wire */
 
-struct NBDOption {
+typedef struct NBDOption {
     uint64_t magic; /* NBD_OPTS_MAGIC */
     uint32_t option; /* NBD_OPT_* */
     uint32_t length;
-} QEMU_PACKED;
-typedef struct NBDOption NBDOption;
+} QEMU_PACKED NBDOption;
 
-struct NBDOptionReply {
+typedef struct NBDOptionReply {
     uint64_t magic; /* NBD_REP_MAGIC */
     uint32_t option; /* NBD_OPT_* */
     uint32_t type; /* NBD_REP_* */
     uint32_t length;
-} QEMU_PACKED;
-typedef struct NBDOptionReply NBDOptionReply;
+} QEMU_PACKED NBDOptionReply;
 
 typedef struct NBDOptionReplyMetaContext {
     NBDOptionReply h; /* h.type = NBD_REP_META_CONTEXT, h.length > 4 */
@@ -50,24 +54,36 @@ typedef struct NBDOptionReplyMetaContext {
     /* metadata context name follows */
 } QEMU_PACKED NBDOptionReplyMetaContext;
 
-/* Transmission phase structs
- *
- * Note: these are _NOT_ the same as the network representation of an NBD
- * request and reply!
+/* Track results of negotiation */
+typedef enum NBDMode {
+    /* Keep this list in a continuum of increasing features. */
+    NBD_MODE_OLDSTYLE,     /* server lacks newstyle negotiation */
+    NBD_MODE_EXPORT_NAME,  /* newstyle but only OPT_EXPORT_NAME safe */
+    NBD_MODE_SIMPLE,       /* newstyle but only simple replies */
+    NBD_MODE_STRUCTURED,   /* newstyle, structured replies enabled */
+    NBD_MODE_EXTENDED,     /* newstyle, extended headers enabled */
+} NBDMode;
+
+/* Transmission phase structs */
+
+/*
+ * Note: NBDRequest is _NOT_ the same as the network representation of an NBD
+ * request!
  */
-struct NBDRequest {
-    uint64_t handle;
-    uint64_t from;
-    uint32_t len;
+typedef struct NBDRequest {
+    uint64_t cookie;
+    uint64_t from;  /* Offset touched by the command */
+    uint64_t len;   /* Effect length; 32 bit limit without extended headers */
     uint16_t flags; /* NBD_CMD_FLAG_* */
-    uint16_t type; /* NBD_CMD_* */
-};
-typedef struct NBDRequest NBDRequest;
+    uint16_t type;  /* NBD_CMD_* */
+    NBDMode mode;   /* Determines which network representation to use */
+    NBDMetaContexts *contexts; /* Used by NBD_CMD_BLOCK_STATUS */
+} NBDRequest;
 
 typedef struct NBDSimpleReply {
     uint32_t magic;  /* NBD_SIMPLE_REPLY_MAGIC */
     uint32_t error;
-    uint64_t handle;
+    uint64_t cookie;
 } QEMU_PACKED NBDSimpleReply;
 
 /* Header of all structured replies */
@@ -75,57 +91,94 @@ typedef struct NBDStructuredReplyChunk {
     uint32_t magic;  /* NBD_STRUCTURED_REPLY_MAGIC */
     uint16_t flags;  /* combination of NBD_REPLY_FLAG_* */
     uint16_t type;   /* NBD_REPLY_TYPE_* */
-    uint64_t handle; /* request handle */
+    uint64_t cookie; /* request handle */
     uint32_t length; /* length of payload */
 } QEMU_PACKED NBDStructuredReplyChunk;
 
+typedef struct NBDExtendedReplyChunk {
+    uint32_t magic;  /* NBD_EXTENDED_REPLY_MAGIC */
+    uint16_t flags;  /* combination of NBD_REPLY_FLAG_* */
+    uint16_t type;   /* NBD_REPLY_TYPE_* */
+    uint64_t cookie; /* request handle */
+    uint64_t offset; /* request offset */
+    uint64_t length; /* length of payload */
+} QEMU_PACKED NBDExtendedReplyChunk;
+
 typedef union NBDReply {
     NBDSimpleReply simple;
     NBDStructuredReplyChunk structured;
+    NBDExtendedReplyChunk extended;
     struct {
-        /* @magic and @handle fields have the same offset and size both in
-         * simple reply and structured reply chunk, so let them be accessible
-         * without ".simple." or ".structured." specification
+        /*
+         * @magic and @cookie fields have the same offset and size in all
+         * forms of replies, so let them be accessible without ".simple.",
+         * ".structured.", or ".extended." specifications.
          */
         uint32_t magic;
         uint32_t _skip;
-        uint64_t handle;
-    } QEMU_PACKED;
+        uint64_t cookie;
+    };
 } NBDReply;
+QEMU_BUILD_BUG_ON(offsetof(NBDReply, simple.cookie) !=
+                  offsetof(NBDReply, cookie));
+QEMU_BUILD_BUG_ON(offsetof(NBDReply, structured.cookie) !=
+                  offsetof(NBDReply, cookie));
+QEMU_BUILD_BUG_ON(offsetof(NBDReply, extended.cookie) !=
+                  offsetof(NBDReply, cookie));
 
 /* Header of chunk for NBD_REPLY_TYPE_OFFSET_DATA */
 typedef struct NBDStructuredReadData {
-    NBDStructuredReplyChunk h; /* h.length >= 9 */
+    /* header's .length >= 9 */
     uint64_t offset;
     /* At least one byte of data payload follows, calculated from h.length */
 } QEMU_PACKED NBDStructuredReadData;
 
 /* Complete chunk for NBD_REPLY_TYPE_OFFSET_HOLE */
 typedef struct NBDStructuredReadHole {
-    NBDStructuredReplyChunk h; /* h.length == 12 */
+    /* header's length == 12 */
     uint64_t offset;
     uint32_t length;
 } QEMU_PACKED NBDStructuredReadHole;
 
 /* Header of all NBD_REPLY_TYPE_ERROR* errors */
 typedef struct NBDStructuredError {
-    NBDStructuredReplyChunk h; /* h.length >= 6 */
+    /* header's length >= 6 */
     uint32_t error;
     uint16_t message_length;
 } QEMU_PACKED NBDStructuredError;
 
 /* Header of NBD_REPLY_TYPE_BLOCK_STATUS */
 typedef struct NBDStructuredMeta {
-    NBDStructuredReplyChunk h; /* h.length >= 12 (at least one extent) */
+    /* header's length >= 12 (at least one extent) */
     uint32_t context_id;
-    /* extents follows */
+    /* NBDExtent32 extents[] follows, array length implied by header */
 } QEMU_PACKED NBDStructuredMeta;
 
-/* Extent chunk for NBD_REPLY_TYPE_BLOCK_STATUS */
-typedef struct NBDExtent {
+/* Extent array element for NBD_REPLY_TYPE_BLOCK_STATUS */
+typedef struct NBDExtent32 {
     uint32_t length;
     uint32_t flags; /* NBD_STATE_* */
-} QEMU_PACKED NBDExtent;
+} QEMU_PACKED NBDExtent32;
+
+/* Header of NBD_REPLY_TYPE_BLOCK_STATUS_EXT */
+typedef struct NBDExtendedMeta {
+    /* header's length >= 24 (at least one extent) */
+    uint32_t context_id;
+    uint32_t count; /* header length must be count * 16 + 8 */
+    /* NBDExtent64 extents[count] follows */
+} QEMU_PACKED NBDExtendedMeta;
+
+/* Extent array element for NBD_REPLY_TYPE_BLOCK_STATUS_EXT */
+typedef struct NBDExtent64 {
+    uint64_t length;
+    uint64_t flags; /* NBD_STATE_* */
+} QEMU_PACKED NBDExtent64;
+
+/* Client payload for limiting NBD_CMD_BLOCK_STATUS reply */
+typedef struct NBDBlockStatusPayload {
+    uint64_t effect_length;
+    /* uint32_t ids[] follows, array length implied by header */
+} QEMU_PACKED NBDBlockStatusPayload;
 
 /* Transmission (export) flags: sent from server to client during handshake,
    but describe what will happen during transmission */
@@ -143,20 +196,22 @@ enum {
     NBD_FLAG_SEND_RESIZE_BIT        =  9, /* Send resize */
     NBD_FLAG_SEND_CACHE_BIT         = 10, /* Send CACHE (prefetch) */
     NBD_FLAG_SEND_FAST_ZERO_BIT     = 11, /* FAST_ZERO flag for WRITE_ZEROES */
+    NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT = 12, /* PAYLOAD flag for BLOCK_STATUS */
 };
 
-#define NBD_FLAG_HAS_FLAGS         (1 << NBD_FLAG_HAS_FLAGS_BIT)
-#define NBD_FLAG_READ_ONLY         (1 << NBD_FLAG_READ_ONLY_BIT)
-#define NBD_FLAG_SEND_FLUSH        (1 << NBD_FLAG_SEND_FLUSH_BIT)
-#define NBD_FLAG_SEND_FUA          (1 << NBD_FLAG_SEND_FUA_BIT)
-#define NBD_FLAG_ROTATIONAL        (1 << NBD_FLAG_ROTATIONAL_BIT)
-#define NBD_FLAG_SEND_TRIM         (1 << NBD_FLAG_SEND_TRIM_BIT)
-#define NBD_FLAG_SEND_WRITE_ZEROES (1 << NBD_FLAG_SEND_WRITE_ZEROES_BIT)
-#define NBD_FLAG_SEND_DF           (1 << NBD_FLAG_SEND_DF_BIT)
-#define NBD_FLAG_CAN_MULTI_CONN    (1 << NBD_FLAG_CAN_MULTI_CONN_BIT)
-#define NBD_FLAG_SEND_RESIZE       (1 << NBD_FLAG_SEND_RESIZE_BIT)
-#define NBD_FLAG_SEND_CACHE        (1 << NBD_FLAG_SEND_CACHE_BIT)
-#define NBD_FLAG_SEND_FAST_ZERO    (1 << NBD_FLAG_SEND_FAST_ZERO_BIT)
+#define NBD_FLAG_HAS_FLAGS          (1 << NBD_FLAG_HAS_FLAGS_BIT)
+#define NBD_FLAG_READ_ONLY          (1 << NBD_FLAG_READ_ONLY_BIT)
+#define NBD_FLAG_SEND_FLUSH         (1 << NBD_FLAG_SEND_FLUSH_BIT)
+#define NBD_FLAG_SEND_FUA           (1 << NBD_FLAG_SEND_FUA_BIT)
+#define NBD_FLAG_ROTATIONAL         (1 << NBD_FLAG_ROTATIONAL_BIT)
+#define NBD_FLAG_SEND_TRIM          (1 << NBD_FLAG_SEND_TRIM_BIT)
+#define NBD_FLAG_SEND_WRITE_ZEROES  (1 << NBD_FLAG_SEND_WRITE_ZEROES_BIT)
+#define NBD_FLAG_SEND_DF            (1 << NBD_FLAG_SEND_DF_BIT)
+#define NBD_FLAG_CAN_MULTI_CONN     (1 << NBD_FLAG_CAN_MULTI_CONN_BIT)
+#define NBD_FLAG_SEND_RESIZE        (1 << NBD_FLAG_SEND_RESIZE_BIT)
+#define NBD_FLAG_SEND_CACHE         (1 << NBD_FLAG_SEND_CACHE_BIT)
+#define NBD_FLAG_SEND_FAST_ZERO     (1 << NBD_FLAG_SEND_FAST_ZERO_BIT)
+#define NBD_FLAG_BLOCK_STAT_PAYLOAD (1 << NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT)
 
 /* New-style handshake (global) flags, sent from server to client, and
    control what will happen during handshake phase. */
@@ -179,6 +234,7 @@ enum {
 #define NBD_OPT_STRUCTURED_REPLY  (8)
 #define NBD_OPT_LIST_META_CONTEXT (9)
 #define NBD_OPT_SET_META_CONTEXT  (10)
+#define NBD_OPT_EXTENDED_HEADERS  (11)
 
 /* Option reply types. */
 #define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value))
@@ -196,6 +252,8 @@ enum {
 #define NBD_REP_ERR_UNKNOWN         NBD_REP_ERR(6)  /* Export unknown */
 #define NBD_REP_ERR_SHUTDOWN        NBD_REP_ERR(7)  /* Server shutting down */
 #define NBD_REP_ERR_BLOCK_SIZE_REQD NBD_REP_ERR(8)  /* Need INFO_BLOCK_SIZE */
+#define NBD_REP_ERR_TOO_BIG         NBD_REP_ERR(9)  /* Payload size overflow */
+#define NBD_REP_ERR_EXT_HEADER_REQD NBD_REP_ERR(10) /* Need extended headers */
 
 /* Info types, used during NBD_REP_INFO */
 #define NBD_INFO_EXPORT         0
@@ -204,12 +262,14 @@ enum {
 #define NBD_INFO_BLOCK_SIZE     3
 
 /* Request flags, sent from client to server during transmission phase */
-#define NBD_CMD_FLAG_FUA        (1 << 0) /* 'force unit access' during write */
-#define NBD_CMD_FLAG_NO_HOLE    (1 << 1) /* don't punch hole on zero run */
-#define NBD_CMD_FLAG_DF         (1 << 2) /* don't fragment structured read */
-#define NBD_CMD_FLAG_REQ_ONE    (1 << 3) /* only one extent in BLOCK_STATUS
-                                          * reply chunk */
-#define NBD_CMD_FLAG_FAST_ZERO  (1 << 4) /* fail if WRITE_ZEROES is not fast */
+#define NBD_CMD_FLAG_FUA         (1 << 0) /* 'force unit access' during write */
+#define NBD_CMD_FLAG_NO_HOLE     (1 << 1) /* don't punch hole on zero run */
+#define NBD_CMD_FLAG_DF          (1 << 2) /* don't fragment structured read */
+#define NBD_CMD_FLAG_REQ_ONE     (1 << 3) \
+    /* only one extent in BLOCK_STATUS reply chunk */
+#define NBD_CMD_FLAG_FAST_ZERO   (1 << 4) /* fail if WRITE_ZEROES is not fast */
+#define NBD_CMD_FLAG_PAYLOAD_LEN (1 << 5) \
+    /* length describes payload, not effect; only with ext header */
 
 /* Supported request types */
 enum {
@@ -235,22 +295,31 @@ enum {
  */
 #define NBD_MAX_STRING_SIZE 4096
 
-/* Two types of reply structures */
+/* Two types of request structures, a given client will only use 1 */
+#define NBD_REQUEST_MAGIC           0x25609513
+#define NBD_EXTENDED_REQUEST_MAGIC  0x21e41c71
+
+/*
+ * Three types of reply structures, but what a client expects depends
+ * on NBD_OPT_STRUCTURED_REPLY and NBD_OPT_EXTENDED_HEADERS.
+ */
 #define NBD_SIMPLE_REPLY_MAGIC      0x67446698
 #define NBD_STRUCTURED_REPLY_MAGIC  0x668e33ef
+#define NBD_EXTENDED_REPLY_MAGIC    0x6e8a278c
 
-/* Structured reply flags */
+/* Chunk reply flags (for structured and extended replies) */
 #define NBD_REPLY_FLAG_DONE          (1 << 0) /* This reply-chunk is last */
 
-/* Structured reply types */
+/* Chunk reply types */
 #define NBD_REPLY_ERR(value)         ((1 << 15) | (value))
 
-#define NBD_REPLY_TYPE_NONE          0
-#define NBD_REPLY_TYPE_OFFSET_DATA   1
-#define NBD_REPLY_TYPE_OFFSET_HOLE   2
-#define NBD_REPLY_TYPE_BLOCK_STATUS  5
-#define NBD_REPLY_TYPE_ERROR         NBD_REPLY_ERR(1)
-#define NBD_REPLY_TYPE_ERROR_OFFSET  NBD_REPLY_ERR(2)
+#define NBD_REPLY_TYPE_NONE              0
+#define NBD_REPLY_TYPE_OFFSET_DATA       1
+#define NBD_REPLY_TYPE_OFFSET_HOLE       2
+#define NBD_REPLY_TYPE_BLOCK_STATUS      5
+#define NBD_REPLY_TYPE_BLOCK_STATUS_EXT  6
+#define NBD_REPLY_TYPE_ERROR             NBD_REPLY_ERR(1)
+#define NBD_REPLY_TYPE_ERROR_OFFSET      NBD_REPLY_ERR(2)
 
 /* Extent flags for base:allocation in NBD_REPLY_TYPE_BLOCK_STATUS */
 #define NBD_STATE_HOLE (1 << 0)
@@ -281,7 +350,7 @@ static inline bool nbd_reply_type_is_error(int type)
 #define NBD_ESHUTDOWN  108
 
 /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
-struct NBDExportInfo {
+typedef struct NBDExportInfo {
     /* Set by client before nbd_receive_negotiate() */
     bool request_sizes;
     char *x_dirty_bitmap;
@@ -292,7 +361,7 @@ struct NBDExportInfo {
 
     /* In-out fields, set by client before nbd_receive_negotiate() and
      * updated by server results during nbd_receive_negotiate() */
-    bool structured_reply;
+    NBDMode mode; /* input maximum mode tolerated; output actual mode chosen */
     bool base_allocation; /* base:allocation context for NBD_CMD_BLOCK_STATUS */
 
     /* Set by server results during nbd_receive_negotiate() and
@@ -309,11 +378,9 @@ struct NBDExportInfo {
     char *description;
     int n_contexts;
     char **contexts;
-};
-typedef struct NBDExportInfo NBDExportInfo;
+} NBDExportInfo;
 
-int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
-                          QCryptoTLSCreds *tlscreds,
+int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
                           const char *hostname, QIOChannel **outioc,
                           NBDExportInfo *info, Error **errp);
 void nbd_free_export_list(NBDExportInfo *info, int count);
@@ -324,14 +391,12 @@ int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
              Error **errp);
 int nbd_send_request(QIOChannel *ioc, NBDRequest *request);
 int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
-                                   NBDReply *reply, Error **errp);
+                                   NBDReply *reply, NBDMode mode,
+                                   Error **errp);
 int nbd_client(int fd);
 int nbd_disconnect(int fd);
 int nbd_errno_to_system_errno(int err);
 
-typedef struct NBDExport NBDExport;
-typedef struct NBDClient NBDClient;
-
 void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk);
 
 AioContext *nbd_export_aio_context(NBDExport *exp);
@@ -344,8 +409,9 @@ void nbd_client_new(QIOChannelSocket *sioc,
 void nbd_client_get(NBDClient *client);
 void nbd_client_put(NBDClient *client);
 
-void nbd_server_is_qemu_nbd(bool value);
+void nbd_server_is_qemu_nbd(int max_connections);
 bool nbd_server_is_running(void);
+int nbd_server_max_connections(void);
 void nbd_server_start(SocketAddress *addr, const char *tls_creds,
                       const char *tls_authz, uint32_t max_connections,
                       Error **errp);
@@ -364,7 +430,7 @@ static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
         if (desc) {
             error_prepend(errp, "Failed to read %s: ", desc);
         }
-        return -1;
+        return ret;
     }
 
     return 0;
@@ -375,8 +441,9 @@ static inline int nbd_read##bits(QIOChannel *ioc,                       \
                                  uint##bits##_t *val,                   \
                                  const char *desc, Error **errp)        \
 {                                                                       \
-    if (nbd_read(ioc, val, sizeof(*val), desc, errp) < 0) {             \
-        return -1;                                                      \
+    int ret = nbd_read(ioc, val, sizeof(*val), desc, errp);             \
+    if (ret < 0) {                                                      \
+        return ret;                                                     \
     }                                                                   \
     *val = be##bits##_to_cpu(*val);                                     \
     return 0;                                                           \
@@ -404,5 +471,23 @@ const char *nbd_rep_lookup(uint32_t rep);
 const char *nbd_info_lookup(uint16_t info);
 const char *nbd_cmd_lookup(uint16_t info);
 const char *nbd_err_lookup(int err);
+const char *nbd_mode_lookup(NBDMode mode);
+
+/* nbd/client-connection.c */
+void nbd_client_connection_enable_retry(NBDClientConnection *conn);
+
+NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
+                                               bool do_negotiation,
+                                               const char *export_name,
+                                               const char *x_dirty_bitmap,
+                                               QCryptoTLSCreds *tlscreds,
+                                               const char *tlshostname);
+void nbd_client_connection_release(NBDClientConnection *conn);
+
+QIOChannel *coroutine_fn
+nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info,
+                            bool blocking, Error **errp);
+
+void nbd_co_establish_connection_cancel(NBDClientConnection *conn);
 
 #endif
index 3e02d9ca98438a50eda6dd38c581f88cacfc99e6..bb231d0b9ad0486bf32ab1023f952f5bd2057a75 100644 (file)
@@ -1,30 +1,83 @@
 #ifndef BLOCK_NVME_H
 #define BLOCK_NVME_H
 
+#include "hw/registerfields.h"
+
 typedef struct QEMU_PACKED NvmeBar {
     uint64_t    cap;
     uint32_t    vs;
     uint32_t    intms;
     uint32_t    intmc;
     uint32_t    cc;
-    uint32_t    rsvd1;
+    uint8_t     rsvd24[4];
     uint32_t    csts;
-    uint32_t    nssrc;
+    uint32_t    nssr;
     uint32_t    aqa;
     uint64_t    asq;
     uint64_t    acq;
     uint32_t    cmbloc;
     uint32_t    cmbsz;
-    uint8_t     padding[3520]; /* not used by QEMU */
+    uint32_t    bpinfo;
+    uint32_t    bprsel;
+    uint64_t    bpmbl;
+    uint64_t    cmbmsc;
+    uint32_t    cmbsts;
+    uint8_t     rsvd92[3492];
     uint32_t    pmrcap;
     uint32_t    pmrctl;
     uint32_t    pmrsts;
     uint32_t    pmrebs;
     uint32_t    pmrswtp;
-    uint64_t    pmrmsc;
-    uint8_t     reserved[484];
+    uint32_t    pmrmscl;
+    uint32_t    pmrmscu;
+    uint8_t     css[484];
 } NvmeBar;
 
+enum NvmeBarRegs {
+    NVME_REG_CAP     = offsetof(NvmeBar, cap),
+    NVME_REG_VS      = offsetof(NvmeBar, vs),
+    NVME_REG_INTMS   = offsetof(NvmeBar, intms),
+    NVME_REG_INTMC   = offsetof(NvmeBar, intmc),
+    NVME_REG_CC      = offsetof(NvmeBar, cc),
+    NVME_REG_CSTS    = offsetof(NvmeBar, csts),
+    NVME_REG_NSSR    = offsetof(NvmeBar, nssr),
+    NVME_REG_AQA     = offsetof(NvmeBar, aqa),
+    NVME_REG_ASQ     = offsetof(NvmeBar, asq),
+    NVME_REG_ACQ     = offsetof(NvmeBar, acq),
+    NVME_REG_CMBLOC  = offsetof(NvmeBar, cmbloc),
+    NVME_REG_CMBSZ   = offsetof(NvmeBar, cmbsz),
+    NVME_REG_BPINFO  = offsetof(NvmeBar, bpinfo),
+    NVME_REG_BPRSEL  = offsetof(NvmeBar, bprsel),
+    NVME_REG_BPMBL   = offsetof(NvmeBar, bpmbl),
+    NVME_REG_CMBMSC  = offsetof(NvmeBar, cmbmsc),
+    NVME_REG_CMBSTS  = offsetof(NvmeBar, cmbsts),
+    NVME_REG_PMRCAP  = offsetof(NvmeBar, pmrcap),
+    NVME_REG_PMRCTL  = offsetof(NvmeBar, pmrctl),
+    NVME_REG_PMRSTS  = offsetof(NvmeBar, pmrsts),
+    NVME_REG_PMREBS  = offsetof(NvmeBar, pmrebs),
+    NVME_REG_PMRSWTP = offsetof(NvmeBar, pmrswtp),
+    NVME_REG_PMRMSCL = offsetof(NvmeBar, pmrmscl),
+    NVME_REG_PMRMSCU = offsetof(NvmeBar, pmrmscu),
+};
+
+typedef struct QEMU_PACKED NvmeEndGrpLog {
+    uint8_t  critical_warning;
+    uint8_t  rsvd[2];
+    uint8_t  avail_spare;
+    uint8_t  avail_spare_thres;
+    uint8_t  percet_used;
+    uint8_t  rsvd1[26];
+    uint64_t end_estimate[2];
+    uint64_t data_units_read[2];
+    uint64_t data_units_written[2];
+    uint64_t media_units_written[2];
+    uint64_t host_read_commands[2];
+    uint64_t host_write_commands[2];
+    uint64_t media_integrity_errors[2];
+    uint64_t no_err_info_log_entries[2];
+    uint8_t rsvd2[352];
+} NvmeEndGrpLog;
+
 enum NvmeCapShift {
     CAP_MQES_SHIFT     = 0,
     CAP_CQR_SHIFT      = 16,
@@ -35,7 +88,8 @@ enum NvmeCapShift {
     CAP_CSS_SHIFT      = 37,
     CAP_MPSMIN_SHIFT   = 48,
     CAP_MPSMAX_SHIFT   = 52,
-    CAP_PMR_SHIFT      = 56,
+    CAP_PMRS_SHIFT     = 56,
+    CAP_CMBS_SHIFT     = 57,
 };
 
 enum NvmeCapMask {
@@ -48,7 +102,8 @@ enum NvmeCapMask {
     CAP_CSS_MASK       = 0xff,
     CAP_MPSMIN_MASK    = 0xf,
     CAP_MPSMAX_MASK    = 0xf,
-    CAP_PMR_MASK       = 0x1,
+    CAP_PMRS_MASK      = 0x1,
+    CAP_CMBS_MASK      = 0x1,
 };
 
 #define NVME_CAP_MQES(cap)  (((cap) >> CAP_MQES_SHIFT)   & CAP_MQES_MASK)
@@ -60,30 +115,35 @@ enum NvmeCapMask {
 #define NVME_CAP_CSS(cap)   (((cap) >> CAP_CSS_SHIFT)    & CAP_CSS_MASK)
 #define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK)
 #define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK)
-
-#define NVME_CAP_SET_MQES(cap, val)   (cap |= (uint64_t)(val & CAP_MQES_MASK)  \
-                                                           << CAP_MQES_SHIFT)
-#define NVME_CAP_SET_CQR(cap, val)    (cap |= (uint64_t)(val & CAP_CQR_MASK)   \
-                                                           << CAP_CQR_SHIFT)
-#define NVME_CAP_SET_AMS(cap, val)    (cap |= (uint64_t)(val & CAP_AMS_MASK)   \
-                                                           << CAP_AMS_SHIFT)
-#define NVME_CAP_SET_TO(cap, val)     (cap |= (uint64_t)(val & CAP_TO_MASK)    \
-                                                           << CAP_TO_SHIFT)
-#define NVME_CAP_SET_DSTRD(cap, val)  (cap |= (uint64_t)(val & CAP_DSTRD_MASK) \
-                                                           << CAP_DSTRD_SHIFT)
-#define NVME_CAP_SET_NSSRS(cap, val)  (cap |= (uint64_t)(val & CAP_NSSRS_MASK) \
-                                                           << CAP_NSSRS_SHIFT)
-#define NVME_CAP_SET_CSS(cap, val)    (cap |= (uint64_t)(val & CAP_CSS_MASK)   \
-                                                           << CAP_CSS_SHIFT)
-#define NVME_CAP_SET_MPSMIN(cap, val) (cap |= (uint64_t)(val & CAP_MPSMIN_MASK)\
-                                                           << CAP_MPSMIN_SHIFT)
-#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\
-                                                            << CAP_MPSMAX_SHIFT)
-#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\
-                                                            << CAP_PMR_SHIFT)
+#define NVME_CAP_PMRS(cap)  (((cap) >> CAP_PMRS_SHIFT)   & CAP_PMRS_MASK)
+#define NVME_CAP_CMBS(cap)  (((cap) >> CAP_CMBS_SHIFT)   & CAP_CMBS_MASK)
+
+#define NVME_CAP_SET_MQES(cap, val)   \
+    ((cap) |= (uint64_t)((val) & CAP_MQES_MASK)   << CAP_MQES_SHIFT)
+#define NVME_CAP_SET_CQR(cap, val)    \
+    ((cap) |= (uint64_t)((val) & CAP_CQR_MASK)    << CAP_CQR_SHIFT)
+#define NVME_CAP_SET_AMS(cap, val)    \
+    ((cap) |= (uint64_t)((val) & CAP_AMS_MASK)    << CAP_AMS_SHIFT)
+#define NVME_CAP_SET_TO(cap, val)     \
+    ((cap) |= (uint64_t)((val) & CAP_TO_MASK)     << CAP_TO_SHIFT)
+#define NVME_CAP_SET_DSTRD(cap, val)  \
+    ((cap) |= (uint64_t)((val) & CAP_DSTRD_MASK)  << CAP_DSTRD_SHIFT)
+#define NVME_CAP_SET_NSSRS(cap, val)  \
+    ((cap) |= (uint64_t)((val) & CAP_NSSRS_MASK)  << CAP_NSSRS_SHIFT)
+#define NVME_CAP_SET_CSS(cap, val)    \
+    ((cap) |= (uint64_t)((val) & CAP_CSS_MASK)    << CAP_CSS_SHIFT)
+#define NVME_CAP_SET_MPSMIN(cap, val) \
+    ((cap) |= (uint64_t)((val) & CAP_MPSMIN_MASK) << CAP_MPSMIN_SHIFT)
+#define NVME_CAP_SET_MPSMAX(cap, val) \
+    ((cap) |= (uint64_t)((val) & CAP_MPSMAX_MASK) << CAP_MPSMAX_SHIFT)
+#define NVME_CAP_SET_PMRS(cap, val)   \
+    ((cap) |= (uint64_t)((val) & CAP_PMRS_MASK)   << CAP_PMRS_SHIFT)
+#define NVME_CAP_SET_CMBS(cap, val)   \
+    ((cap) |= (uint64_t)((val) & CAP_CMBS_MASK)   << CAP_CMBS_SHIFT)
 
 enum NvmeCapCss {
     NVME_CAP_CSS_NVM        = 1 << 0,
+    NVME_CAP_CSS_CSI_SUPP   = 1 << 6,
     NVME_CAP_CSS_ADMIN_ONLY = 1 << 7,
 };
 
@@ -117,9 +177,25 @@ enum NvmeCcMask {
 
 enum NvmeCcCss {
     NVME_CC_CSS_NVM        = 0x0,
+    NVME_CC_CSS_CSI        = 0x6,
     NVME_CC_CSS_ADMIN_ONLY = 0x7,
 };
 
+#define NVME_SET_CC_EN(cc, val)     \
+    (cc |= (uint32_t)((val) & CC_EN_MASK) << CC_EN_SHIFT)
+#define NVME_SET_CC_CSS(cc, val)    \
+    (cc |= (uint32_t)((val) & CC_CSS_MASK) << CC_CSS_SHIFT)
+#define NVME_SET_CC_MPS(cc, val)    \
+    (cc |= (uint32_t)((val) & CC_MPS_MASK) << CC_MPS_SHIFT)
+#define NVME_SET_CC_AMS(cc, val)    \
+    (cc |= (uint32_t)((val) & CC_AMS_MASK) << CC_AMS_SHIFT)
+#define NVME_SET_CC_SHN(cc, val)    \
+    (cc |= (uint32_t)((val) & CC_SHN_MASK) << CC_SHN_SHIFT)
+#define NVME_SET_CC_IOSQES(cc, val) \
+    (cc |= (uint32_t)((val) & CC_IOSQES_MASK) << CC_IOSQES_SHIFT)
+#define NVME_SET_CC_IOCQES(cc, val) \
+    (cc |= (uint32_t)((val) & CC_IOCQES_MASK) << CC_IOCQES_SHIFT)
+
 enum NvmeCstsShift {
     CSTS_RDY_SHIFT      = 0,
     CSTS_CFS_SHIFT      = 1,
@@ -162,25 +238,64 @@ enum NvmeAqaMask {
 #define NVME_AQA_ACQS(aqa) ((aqa >> AQA_ACQS_SHIFT) & AQA_ACQS_MASK)
 
 enum NvmeCmblocShift {
-    CMBLOC_BIR_SHIFT  = 0,
-    CMBLOC_OFST_SHIFT = 12,
+    CMBLOC_BIR_SHIFT     = 0,
+    CMBLOC_CQMMS_SHIFT   = 3,
+    CMBLOC_CQPDS_SHIFT   = 4,
+    CMBLOC_CDPMLS_SHIFT  = 5,
+    CMBLOC_CDPCILS_SHIFT = 6,
+    CMBLOC_CDMMMS_SHIFT  = 7,
+    CMBLOC_CQDA_SHIFT    = 8,
+    CMBLOC_OFST_SHIFT    = 12,
 };
 
 enum NvmeCmblocMask {
-    CMBLOC_BIR_MASK  = 0x7,
-    CMBLOC_OFST_MASK = 0xfffff,
+    CMBLOC_BIR_MASK     = 0x7,
+    CMBLOC_CQMMS_MASK   = 0x1,
+    CMBLOC_CQPDS_MASK   = 0x1,
+    CMBLOC_CDPMLS_MASK  = 0x1,
+    CMBLOC_CDPCILS_MASK = 0x1,
+    CMBLOC_CDMMMS_MASK  = 0x1,
+    CMBLOC_CQDA_MASK    = 0x1,
+    CMBLOC_OFST_MASK    = 0xfffff,
 };
 
-#define NVME_CMBLOC_BIR(cmbloc) ((cmbloc >> CMBLOC_BIR_SHIFT)  & \
-                                 CMBLOC_BIR_MASK)
-#define NVME_CMBLOC_OFST(cmbloc)((cmbloc >> CMBLOC_OFST_SHIFT) & \
-                                 CMBLOC_OFST_MASK)
-
-#define NVME_CMBLOC_SET_BIR(cmbloc, val)  \
+#define NVME_CMBLOC_BIR(cmbloc) \
+    ((cmbloc >> CMBLOC_BIR_SHIFT) & CMBLOC_BIR_MASK)
+#define NVME_CMBLOC_CQMMS(cmbloc) \
+    ((cmbloc >> CMBLOC_CQMMS_SHIFT) & CMBLOC_CQMMS_MASK)
+#define NVME_CMBLOC_CQPDS(cmbloc) \
+    ((cmbloc >> CMBLOC_CQPDS_SHIFT) & CMBLOC_CQPDS_MASK)
+#define NVME_CMBLOC_CDPMLS(cmbloc) \
+    ((cmbloc >> CMBLOC_CDPMLS_SHIFT) & CMBLOC_CDPMLS_MASK)
+#define NVME_CMBLOC_CDPCILS(cmbloc) \
+    ((cmbloc >> CMBLOC_CDPCILS_SHIFT) & CMBLOC_CDPCILS_MASK)
+#define NVME_CMBLOC_CDMMMS(cmbloc) \
+    ((cmbloc >> CMBLOC_CDMMMS_SHIFT) & CMBLOC_CDMMMS_MASK)
+#define NVME_CMBLOC_CQDA(cmbloc) \
+    ((cmbloc >> CMBLOC_CQDA_SHIFT) & CMBLOC_CQDA_MASK)
+#define NVME_CMBLOC_OFST(cmbloc) \
+    ((cmbloc >> CMBLOC_OFST_SHIFT) & CMBLOC_OFST_MASK)
+
+#define NVME_CMBLOC_SET_BIR(cmbloc, val) \
     (cmbloc |= (uint64_t)(val & CMBLOC_BIR_MASK) << CMBLOC_BIR_SHIFT)
+#define NVME_CMBLOC_SET_CQMMS(cmbloc, val) \
+    (cmbloc |= (uint64_t)(val & CMBLOC_CQMMS_MASK) << CMBLOC_CQMMS_SHIFT)
+#define NVME_CMBLOC_SET_CQPDS(cmbloc, val) \
+    (cmbloc |= (uint64_t)(val & CMBLOC_CQPDS_MASK) << CMBLOC_CQPDS_SHIFT)
+#define NVME_CMBLOC_SET_CDPMLS(cmbloc, val) \
+    (cmbloc |= (uint64_t)(val & CMBLOC_CDPMLS_MASK) << CMBLOC_CDPMLS_SHIFT)
+#define NVME_CMBLOC_SET_CDPCILS(cmbloc, val) \
+    (cmbloc |= (uint64_t)(val & CMBLOC_CDPCILS_MASK) << CMBLOC_CDPCILS_SHIFT)
+#define NVME_CMBLOC_SET_CDMMMS(cmbloc, val) \
+    (cmbloc |= (uint64_t)(val & CMBLOC_CDMMMS_MASK) << CMBLOC_CDMMMS_SHIFT)
+#define NVME_CMBLOC_SET_CQDA(cmbloc, val) \
+    (cmbloc |= (uint64_t)(val & CMBLOC_CQDA_MASK) << CMBLOC_CQDA_SHIFT)
 #define NVME_CMBLOC_SET_OFST(cmbloc, val) \
     (cmbloc |= (uint64_t)(val & CMBLOC_OFST_MASK) << CMBLOC_OFST_SHIFT)
 
+#define NVME_CMBMSMC_SET_CRE (cmbmsc, val) \
+    (cmbmsc |= (uint64_t)(val & CMBLOC_OFST_MASK) << CMBMSC_CRE_SHIFT)
+
 enum NvmeCmbszShift {
     CMBSZ_SQS_SHIFT   = 0,
     CMBSZ_CQS_SHIFT   = 1,
@@ -227,6 +342,46 @@ enum NvmeCmbszMask {
 #define NVME_CMBSZ_GETSIZE(cmbsz) \
     (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz))))
 
+enum NvmeCmbmscShift {
+    CMBMSC_CRE_SHIFT  = 0,
+    CMBMSC_CMSE_SHIFT = 1,
+    CMBMSC_CBA_SHIFT  = 12,
+};
+
+enum NvmeCmbmscMask {
+    CMBMSC_CRE_MASK  = 0x1,
+    CMBMSC_CMSE_MASK = 0x1,
+    CMBMSC_CBA_MASK  = ((1ULL << 52) - 1),
+};
+
+#define NVME_CMBMSC_CRE(cmbmsc) \
+    ((cmbmsc >> CMBMSC_CRE_SHIFT)  & CMBMSC_CRE_MASK)
+#define NVME_CMBMSC_CMSE(cmbmsc) \
+    ((cmbmsc >> CMBMSC_CMSE_SHIFT) & CMBMSC_CMSE_MASK)
+#define NVME_CMBMSC_CBA(cmbmsc) \
+    ((cmbmsc >> CMBMSC_CBA_SHIFT) & CMBMSC_CBA_MASK)
+
+
+#define NVME_CMBMSC_SET_CRE(cmbmsc, val)  \
+    (cmbmsc |= (uint64_t)(val & CMBMSC_CRE_MASK) << CMBMSC_CRE_SHIFT)
+#define NVME_CMBMSC_SET_CMSE(cmbmsc, val) \
+    (cmbmsc |= (uint64_t)(val & CMBMSC_CMSE_MASK) << CMBMSC_CMSE_SHIFT)
+#define NVME_CMBMSC_SET_CBA(cmbmsc, val) \
+    (cmbmsc |= (uint64_t)(val & CMBMSC_CBA_MASK) << CMBMSC_CBA_SHIFT)
+
+enum NvmeCmbstsShift {
+    CMBSTS_CBAI_SHIFT = 0,
+};
+enum NvmeCmbstsMask {
+    CMBSTS_CBAI_MASK = 0x1,
+};
+
+#define NVME_CMBSTS_CBAI(cmbsts) \
+    ((cmbsts >> CMBSTS_CBAI_SHIFT) & CMBSTS_CBAI_MASK)
+
+#define NVME_CMBSTS_SET_CBAI(cmbsts, val)  \
+    (cmbsts |= (uint64_t)(val & CMBSTS_CBAI_MASK) << CMBSTS_CBAI_SHIFT)
+
 enum NvmePmrcapShift {
     PMRCAP_RDS_SHIFT      = 3,
     PMRCAP_WDS_SHIFT      = 4,
@@ -368,25 +523,25 @@ enum NvmePmrswtpMask {
 #define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val)   \
     (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT)
 
-enum NvmePmrmscShift {
-    PMRMSC_CMSE_SHIFT   = 1,
-    PMRMSC_CBA_SHIFT    = 12,
+enum NvmePmrmsclShift {
+    PMRMSCL_CMSE_SHIFT   = 1,
+    PMRMSCL_CBA_SHIFT    = 12,
 };
 
-enum NvmePmrmscMask {
-    PMRMSC_CMSE_MASK   = 0x1,
-    PMRMSC_CBA_MASK    = 0xfffffffffffff,
+enum NvmePmrmsclMask {
+    PMRMSCL_CMSE_MASK   = 0x1,
+    PMRMSCL_CBA_MASK    = 0xfffff,
 };
 
-#define NVME_PMRMSC_CMSE(pmrmsc)    \
-    ((pmrmsc >> PMRMSC_CMSE_SHIFT)   & PMRMSC_CMSE_MASK)
-#define NVME_PMRMSC_CBA(pmrmsc)     \
-    ((pmrmsc >> PMRMSC_CBA_SHIFT)   & PMRMSC_CBA_MASK)
+#define NVME_PMRMSCL_CMSE(pmrmscl)    \
+    ((pmrmscl >> PMRMSCL_CMSE_SHIFT)   & PMRMSCL_CMSE_MASK)
+#define NVME_PMRMSCL_CBA(pmrmscl)     \
+    ((pmrmscl >> PMRMSCL_CBA_SHIFT)   & PMRMSCL_CBA_MASK)
 
-#define NVME_PMRMSC_SET_CMSE(pmrmsc, val)   \
-    (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT)
-#define NVME_PMRMSC_SET_CBA(pmrmsc, val)   \
-    (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
+#define NVME_PMRMSCL_SET_CMSE(pmrmscl, val)   \
+    (pmrmscl |= (uint32_t)(val & PMRMSCL_CMSE_MASK) << PMRMSCL_CMSE_SHIFT)
+#define NVME_PMRMSCL_SET_CBA(pmrmscl, val)   \
+    (pmrmscl |= (uint32_t)(val & PMRMSCL_CBA_MASK) << PMRMSCL_CBA_SHIFT)
 
 enum NvmeSglDescriptorType {
     NVME_SGL_DESCR_TYPE_DATA_BLOCK          = 0x0,
@@ -459,6 +614,11 @@ enum NvmeAdminCommands {
     NVME_ADM_CMD_ASYNC_EV_REQ   = 0x0c,
     NVME_ADM_CMD_ACTIVATE_FW    = 0x10,
     NVME_ADM_CMD_DOWNLOAD_FW    = 0x11,
+    NVME_ADM_CMD_NS_ATTACHMENT  = 0x15,
+    NVME_ADM_CMD_DIRECTIVE_SEND = 0x19,
+    NVME_ADM_CMD_VIRT_MNGMT     = 0x1c,
+    NVME_ADM_CMD_DIRECTIVE_RECV = 0x1a,
+    NVME_ADM_CMD_DBBUF_CONFIG   = 0x7c,
     NVME_ADM_CMD_FORMAT_NVM     = 0x80,
     NVME_ADM_CMD_SECURITY_SEND  = 0x81,
     NVME_ADM_CMD_SECURITY_RECV  = 0x82,
@@ -472,6 +632,13 @@ enum NvmeIoCommands {
     NVME_CMD_COMPARE            = 0x05,
     NVME_CMD_WRITE_ZEROES       = 0x08,
     NVME_CMD_DSM                = 0x09,
+    NVME_CMD_VERIFY             = 0x0c,
+    NVME_CMD_IO_MGMT_RECV       = 0x12,
+    NVME_CMD_COPY               = 0x19,
+    NVME_CMD_IO_MGMT_SEND       = 0x1d,
+    NVME_CMD_ZONE_MGMT_SEND     = 0x79,
+    NVME_CMD_ZONE_MGMT_RECV     = 0x7a,
+    NVME_CMD_ZONE_APPEND        = 0x7d,
 };
 
 typedef struct QEMU_PACKED NvmeDeleteQ {
@@ -540,8 +707,13 @@ typedef struct QEMU_PACKED NvmeIdentify {
     uint64_t    rsvd2[2];
     uint64_t    prp1;
     uint64_t    prp2;
-    uint32_t    cns;
-    uint32_t    rsvd11[5];
+    uint8_t     cns;
+    uint8_t     rsvd10;
+    uint16_t    ctrlid;
+    uint16_t    nvmsetid;
+    uint8_t     rsvd11;
+    uint8_t     csi;
+    uint32_t    rsvd12[4];
 } NvmeIdentify;
 
 typedef struct QEMU_PACKED NvmeRwCmd {
@@ -549,13 +721,16 @@ typedef struct QEMU_PACKED NvmeRwCmd {
     uint8_t     flags;
     uint16_t    cid;
     uint32_t    nsid;
-    uint64_t    rsvd2;
+    uint32_t    cdw2;
+    uint32_t    cdw3;
     uint64_t    mptr;
     NvmeCmdDptr dptr;
     uint64_t    slba;
     uint16_t    nlb;
     uint16_t    control;
-    uint32_t    dsmgmt;
+    uint8_t     dsmgmt;
+    uint8_t     rsvd;
+    uint16_t    dspec;
     uint32_t    reftag;
     uint16_t    apptag;
     uint16_t    appmask;
@@ -579,10 +754,22 @@ enum {
     NVME_RW_DSM_LATENCY_LOW     = 3 << 4,
     NVME_RW_DSM_SEQ_REQ         = 1 << 6,
     NVME_RW_DSM_COMPRESSED      = 1 << 7,
+    NVME_RW_PIREMAP             = 1 << 9,
     NVME_RW_PRINFO_PRACT        = 1 << 13,
     NVME_RW_PRINFO_PRCHK_GUARD  = 1 << 12,
     NVME_RW_PRINFO_PRCHK_APP    = 1 << 11,
     NVME_RW_PRINFO_PRCHK_REF    = 1 << 10,
+    NVME_RW_PRINFO_PRCHK_MASK   = 7 << 10,
+};
+
+#define NVME_RW_PRINFO(control) ((control >> 10) & 0xf)
+
+enum {
+    NVME_PRINFO_PRACT       = 1 << 3,
+    NVME_PRINFO_PRCHK_GUARD = 1 << 2,
+    NVME_PRINFO_PRCHK_APP   = 1 << 1,
+    NVME_PRINFO_PRCHK_REF   = 1 << 0,
+    NVME_PRINFO_PRCHK_MASK  = 7 << 0,
 };
 
 typedef struct QEMU_PACKED NvmeDsmCmd {
@@ -609,9 +796,54 @@ typedef struct QEMU_PACKED NvmeDsmRange {
     uint64_t    slba;
 } NvmeDsmRange;
 
+enum {
+    NVME_COPY_FORMAT_0 = 0x0,
+    NVME_COPY_FORMAT_1 = 0x1,
+};
+
+typedef struct QEMU_PACKED NvmeCopyCmd {
+    uint8_t     opcode;
+    uint8_t     flags;
+    uint16_t    cid;
+    uint32_t    nsid;
+    uint32_t    cdw2;
+    uint32_t    cdw3;
+    uint32_t    rsvd2[2];
+    NvmeCmdDptr dptr;
+    uint64_t    sdlba;
+    uint8_t     nr;
+    uint8_t     control[3];
+    uint16_t    rsvd13;
+    uint16_t    dspec;
+    uint32_t    reftag;
+    uint16_t    apptag;
+    uint16_t    appmask;
+} NvmeCopyCmd;
+
+typedef struct QEMU_PACKED NvmeCopySourceRangeFormat0 {
+    uint8_t  rsvd0[8];
+    uint64_t slba;
+    uint16_t nlb;
+    uint8_t  rsvd18[6];
+    uint32_t reftag;
+    uint16_t apptag;
+    uint16_t appmask;
+} NvmeCopySourceRangeFormat0;
+
+typedef struct QEMU_PACKED NvmeCopySourceRangeFormat1 {
+    uint8_t  rsvd0[8];
+    uint64_t slba;
+    uint16_t nlb;
+    uint8_t  rsvd18[8];
+    uint8_t  sr[10];
+    uint16_t apptag;
+    uint16_t appmask;
+} NvmeCopySourceRangeFormat1;
+
 enum NvmeAsyncEventRequest {
     NVME_AER_TYPE_ERROR                     = 0,
     NVME_AER_TYPE_SMART                     = 1,
+    NVME_AER_TYPE_NOTICE                    = 2,
     NVME_AER_TYPE_IO_SPECIFIC               = 6,
     NVME_AER_TYPE_VENDOR_SPECIFIC           = 7,
     NVME_AER_INFO_ERR_INVALID_DB_REGISTER   = 0,
@@ -623,6 +855,7 @@ enum NvmeAsyncEventRequest {
     NVME_AER_INFO_SMART_RELIABILITY         = 0,
     NVME_AER_INFO_SMART_TEMP_THRESH         = 1,
     NVME_AER_INFO_SMART_SPARE_THRESH        = 2,
+    NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED    = 0,
 };
 
 typedef struct QEMU_PACKED NvmeAerResult {
@@ -632,9 +865,13 @@ typedef struct QEMU_PACKED NvmeAerResult {
     uint8_t resv;
 } NvmeAerResult;
 
+typedef struct QEMU_PACKED NvmeZonedResult {
+    uint64_t slba;
+} NvmeZonedResult;
+
 typedef struct QEMU_PACKED NvmeCqe {
     uint32_t    result;
-    uint32_t    rsvd;
+    uint32_t    dw1;
     uint16_t    sq_head;
     uint16_t    sq_id;
     uint16_t    cid;
@@ -662,10 +899,15 @@ enum NvmeStatusCodes {
     NVME_SGL_DESCR_TYPE_INVALID = 0x0011,
     NVME_INVALID_USE_OF_CMB     = 0x0012,
     NVME_INVALID_PRP_OFFSET     = 0x0013,
+    NVME_CMD_SET_CMB_REJECTED   = 0x002b,
+    NVME_INVALID_CMD_SET        = 0x002c,
+    NVME_FDP_DISABLED           = 0x0029,
+    NVME_INVALID_PHID_LIST      = 0x002a,
     NVME_LBA_RANGE              = 0x0080,
     NVME_CAP_EXCEEDED           = 0x0081,
     NVME_NS_NOT_READY           = 0x0082,
     NVME_NS_RESV_CONFLICT       = 0x0083,
+    NVME_FORMAT_IN_PROGRESS     = 0x0084,
     NVME_INVALID_CQID           = 0x0100,
     NVME_INVALID_QID            = 0x0101,
     NVME_MAX_QSIZE_EXCEEDED     = 0x0102,
@@ -683,9 +925,28 @@ enum NvmeStatusCodes {
     NVME_FEAT_NOT_CHANGEABLE    = 0x010e,
     NVME_FEAT_NOT_NS_SPEC       = 0x010f,
     NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110,
+    NVME_NS_ALREADY_ATTACHED    = 0x0118,
+    NVME_NS_PRIVATE             = 0x0119,
+    NVME_NS_NOT_ATTACHED        = 0x011a,
+    NVME_NS_CTRL_LIST_INVALID   = 0x011c,
+    NVME_INVALID_CTRL_ID        = 0x011f,
+    NVME_INVALID_SEC_CTRL_STATE = 0x0120,
+    NVME_INVALID_NUM_RESOURCES  = 0x0121,
+    NVME_INVALID_RESOURCE_ID    = 0x0122,
     NVME_CONFLICTING_ATTRS      = 0x0180,
     NVME_INVALID_PROT_INFO      = 0x0181,
     NVME_WRITE_TO_RO            = 0x0182,
+    NVME_CMD_SIZE_LIMIT         = 0x0183,
+    NVME_INVALID_ZONE_OP        = 0x01b6,
+    NVME_NOZRWA                 = 0x01b7,
+    NVME_ZONE_BOUNDARY_ERROR    = 0x01b8,
+    NVME_ZONE_FULL              = 0x01b9,
+    NVME_ZONE_READ_ONLY         = 0x01ba,
+    NVME_ZONE_OFFLINE           = 0x01bb,
+    NVME_ZONE_INVALID_WRITE     = 0x01bc,
+    NVME_ZONE_TOO_MANY_ACTIVE   = 0x01bd,
+    NVME_ZONE_TOO_MANY_OPEN     = 0x01be,
+    NVME_ZONE_INVAL_TRANSITION  = 0x01bf,
     NVME_WRITE_FAULT            = 0x0280,
     NVME_UNRECOVERED_READ       = 0x0281,
     NVME_E2E_GUARD_ERROR        = 0x0282,
@@ -693,6 +954,8 @@ enum NvmeStatusCodes {
     NVME_E2E_REF_ERROR          = 0x0284,
     NVME_CMP_FAILURE            = 0x0285,
     NVME_ACCESS_DENIED          = 0x0286,
+    NVME_DULB                   = 0x0287,
+    NVME_E2E_STORAGE_TAG_ERROR  = 0x0288,
     NVME_MORE                   = 0x2000,
     NVME_DNR                    = 0x4000,
     NVME_NO_COMPLETE            = 0xffff,
@@ -743,18 +1006,43 @@ typedef struct QEMU_PACKED NvmeSmartLog {
     uint8_t     reserved2[320];
 } NvmeSmartLog;
 
+#define NVME_SMART_WARN_MAX     6
 enum NvmeSmartWarn {
     NVME_SMART_SPARE                  = 1 << 0,
     NVME_SMART_TEMPERATURE            = 1 << 1,
     NVME_SMART_RELIABILITY            = 1 << 2,
     NVME_SMART_MEDIA_READ_ONLY        = 1 << 3,
     NVME_SMART_FAILED_VOLATILE_MEDIA  = 1 << 4,
+    NVME_SMART_PMR_UNRELIABLE         = 1 << 5,
+};
+
+typedef struct NvmeEffectsLog {
+    uint32_t    acs[256];
+    uint32_t    iocs[256];
+    uint8_t     resv[2048];
+} NvmeEffectsLog;
+
+enum {
+    NVME_CMD_EFF_CSUPP      = 1 << 0,
+    NVME_CMD_EFF_LBCC       = 1 << 1,
+    NVME_CMD_EFF_NCC        = 1 << 2,
+    NVME_CMD_EFF_NIC        = 1 << 3,
+    NVME_CMD_EFF_CCC        = 1 << 4,
+    NVME_CMD_EFF_CSE_MASK   = 3 << 16,
+    NVME_CMD_EFF_UUID_SEL   = 1 << 19,
 };
 
 enum NvmeLogIdentifier {
-    NVME_LOG_ERROR_INFO     = 0x01,
-    NVME_LOG_SMART_INFO     = 0x02,
-    NVME_LOG_FW_SLOT_INFO   = 0x03,
+    NVME_LOG_ERROR_INFO                 = 0x01,
+    NVME_LOG_SMART_INFO                 = 0x02,
+    NVME_LOG_FW_SLOT_INFO               = 0x03,
+    NVME_LOG_CHANGED_NSLIST             = 0x04,
+    NVME_LOG_CMD_EFFECTS                = 0x05,
+    NVME_LOG_ENDGRP                     = 0x09,
+    NVME_LOG_FDP_CONFS                  = 0x20,
+    NVME_LOG_FDP_RUH_USAGE              = 0x21,
+    NVME_LOG_FDP_STATS                  = 0x22,
+    NVME_LOG_FDP_EVENTS                 = 0x23,
 };
 
 typedef struct QEMU_PACKED NvmePSD {
@@ -769,13 +1057,26 @@ typedef struct QEMU_PACKED NvmePSD {
     uint8_t     resv[16];
 } NvmePSD;
 
+#define NVME_CONTROLLER_LIST_SIZE 2048
 #define NVME_IDENTIFY_DATA_SIZE 4096
 
-enum {
-    NVME_ID_CNS_NS             = 0x0,
-    NVME_ID_CNS_CTRL           = 0x1,
-    NVME_ID_CNS_NS_ACTIVE_LIST = 0x2,
-    NVME_ID_CNS_NS_DESCR_LIST  = 0x3,
+enum NvmeIdCns {
+    NVME_ID_CNS_NS                    = 0x00,
+    NVME_ID_CNS_CTRL                  = 0x01,
+    NVME_ID_CNS_NS_ACTIVE_LIST        = 0x02,
+    NVME_ID_CNS_NS_DESCR_LIST         = 0x03,
+    NVME_ID_CNS_CS_NS                 = 0x05,
+    NVME_ID_CNS_CS_CTRL               = 0x06,
+    NVME_ID_CNS_CS_NS_ACTIVE_LIST     = 0x07,
+    NVME_ID_CNS_NS_PRESENT_LIST       = 0x10,
+    NVME_ID_CNS_NS_PRESENT            = 0x11,
+    NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12,
+    NVME_ID_CNS_CTRL_LIST             = 0x13,
+    NVME_ID_CNS_PRIMARY_CTRL_CAP      = 0x14,
+    NVME_ID_CNS_SECONDARY_CTRL_LIST   = 0x15,
+    NVME_ID_CNS_CS_NS_PRESENT_LIST    = 0x1a,
+    NVME_ID_CNS_CS_NS_PRESENT         = 0x1b,
+    NVME_ID_CNS_IO_COMMAND_SET        = 0x1c,
 };
 
 typedef struct QEMU_PACKED NvmeIdCtrl {
@@ -794,7 +1095,8 @@ typedef struct QEMU_PACKED NvmeIdCtrl {
     uint32_t    rtd3e;
     uint32_t    oaes;
     uint32_t    ctratt;
-    uint8_t     rsvd100[12];
+    uint8_t     rsvd100[11];
+    uint8_t     cntrltype;
     uint8_t     fguid[16];
     uint8_t     rsvd128[128];
     uint16_t    oacs;
@@ -822,7 +1124,10 @@ typedef struct QEMU_PACKED NvmeIdCtrl {
     uint16_t    mntmt;
     uint16_t    mxtmt;
     uint32_t    sanicap;
-    uint8_t     rsvd332[180];
+    uint8_t     rsvd332[6];
+    uint16_t    nsetidmax;
+    uint16_t    endgidmax;
+    uint8_t     rsvd342[170];
     uint8_t     sqes;
     uint8_t     cqes;
     uint16_t    maxcmd;
@@ -836,7 +1141,7 @@ typedef struct QEMU_PACKED NvmeIdCtrl {
     uint8_t     nvscc;
     uint8_t     rsvd531;
     uint16_t    acwu;
-    uint8_t     rsvd534[2];
+    uint16_t    ocfs;
     uint32_t    sgls;
     uint8_t     rsvd540[228];
     uint8_t     subnqn[256];
@@ -845,10 +1150,38 @@ typedef struct QEMU_PACKED NvmeIdCtrl {
     uint8_t     vs[1024];
 } NvmeIdCtrl;
 
+typedef struct NvmeIdCtrlZoned {
+    uint8_t     zasl;
+    uint8_t     rsvd1[4095];
+} NvmeIdCtrlZoned;
+
+typedef struct NvmeIdCtrlNvm {
+    uint8_t     vsl;
+    uint8_t     wzsl;
+    uint8_t     wusl;
+    uint8_t     dmrl;
+    uint32_t    dmrsl;
+    uint64_t    dmsl;
+    uint8_t     rsvd16[4080];
+} NvmeIdCtrlNvm;
+
+enum NvmeIdCtrlOaes {
+    NVME_OAES_NS_ATTR   = 1 << 8,
+};
+
+enum NvmeIdCtrlCtratt {
+    NVME_CTRATT_ENDGRPS = 1 <<  4,
+    NVME_CTRATT_ELBAS   = 1 << 15,
+    NVME_CTRATT_FDPS    = 1 << 19,
+};
+
 enum NvmeIdCtrlOacs {
-    NVME_OACS_SECURITY  = 1 << 0,
-    NVME_OACS_FORMAT    = 1 << 1,
-    NVME_OACS_FW        = 1 << 2,
+    NVME_OACS_SECURITY      = 1 << 0,
+    NVME_OACS_FORMAT        = 1 << 1,
+    NVME_OACS_FW            = 1 << 2,
+    NVME_OACS_NS_MGMT       = 1 << 3,
+    NVME_OACS_DIRECTIVES    = 1 << 5,
+    NVME_OACS_DBBUF         = 1 << 8,
 };
 
 enum NvmeIdCtrlOncs {
@@ -859,6 +1192,21 @@ enum NvmeIdCtrlOncs {
     NVME_ONCS_FEATURES      = 1 << 4,
     NVME_ONCS_RESRVATIONS   = 1 << 5,
     NVME_ONCS_TIMESTAMP     = 1 << 6,
+    NVME_ONCS_VERIFY        = 1 << 7,
+    NVME_ONCS_COPY          = 1 << 8,
+};
+
+enum NvmeIdCtrlOcfs {
+    NVME_OCFS_COPY_FORMAT_0 = 1 << NVME_COPY_FORMAT_0,
+    NVME_OCFS_COPY_FORMAT_1 = 1 << NVME_COPY_FORMAT_1,
+};
+
+enum NvmeIdctrlVwc {
+    NVME_VWC_PRESENT                    = 1 << 0,
+    NVME_VWC_NSID_BROADCAST_NO_SUPPORT  = 0 << 1,
+    NVME_VWC_NSID_BROADCAST_RESERVED    = 1 << 1,
+    NVME_VWC_NSID_BROADCAST_CTRL_SPEC   = 2 << 1,
+    NVME_VWC_NSID_BROADCAST_SUPPORT     = 3 << 1,
 };
 
 enum NvmeIdCtrlFrmw {
@@ -867,9 +1215,19 @@ enum NvmeIdCtrlFrmw {
 
 enum NvmeIdCtrlLpa {
     NVME_LPA_NS_SMART = 1 << 0,
+    NVME_LPA_CSE      = 1 << 1,
     NVME_LPA_EXTENDED = 1 << 2,
 };
 
+enum NvmeIdCtrlCmic {
+    NVME_CMIC_MULTI_CTRL    = 1 << 1,
+};
+
+enum NvmeNsAttachmentOperation {
+    NVME_NS_ATTACHMENT_ATTACH = 0x0,
+    NVME_NS_ATTACHMENT_DETACH = 0x1,
+};
+
 #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
 #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf)
 #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)
@@ -908,6 +1266,10 @@ enum NvmeIdCtrlLpa {
 #define NVME_AEC_SMART(aec)         (aec & 0xff)
 #define NVME_AEC_NS_ATTR(aec)       ((aec >> 8) & 0x1)
 #define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1)
+#define NVME_AEC_ENDGRP_NOTICE(aec) ((aec >> 14) & 0x1)
+
+#define NVME_ERR_REC_TLER(err_rec)  (err_rec & 0xffff)
+#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x10000)
 
 enum NvmeFeatureIds {
     NVME_ARBITRATION                = 0x1,
@@ -922,6 +1284,10 @@ enum NvmeFeatureIds {
     NVME_WRITE_ATOMICITY            = 0xa,
     NVME_ASYNCHRONOUS_EVENT_CONF    = 0xb,
     NVME_TIMESTAMP                  = 0xe,
+    NVME_HOST_BEHAVIOR_SUPPORT      = 0x16,
+    NVME_COMMAND_SET_PROFILE        = 0x19,
+    NVME_FDP_MODE                   = 0x1d,
+    NVME_FDP_EVENTS                 = 0x1e,
     NVME_SOFTWARE_PROGRESS_MARKER   = 0x80,
     NVME_FID_MAX                    = 0x100,
 };
@@ -962,13 +1328,27 @@ typedef struct QEMU_PACKED NvmeRangeType {
     uint8_t     rsvd48[16];
 } NvmeRangeType;
 
+typedef struct NvmeHostBehaviorSupport {
+    uint8_t     acre;
+    uint8_t     etdas;
+    uint8_t     lbafee;
+    uint8_t     rsvd3[509];
+} NvmeHostBehaviorSupport;
+
 typedef struct QEMU_PACKED NvmeLBAF {
     uint16_t    ms;
     uint8_t     ds;
     uint8_t     rp;
 } NvmeLBAF;
 
+typedef struct QEMU_PACKED NvmeLBAFE {
+    uint64_t    zsze;
+    uint8_t     zdes;
+    uint8_t     rsvd9[7];
+} NvmeLBAFE;
+
 #define NVME_NSID_BROADCAST 0xffffffff
+#define NVME_MAX_NLBAF 64
 
 typedef struct QEMU_PACKED NvmeIdNs {
     uint64_t    nsze;
@@ -992,30 +1372,90 @@ typedef struct QEMU_PACKED NvmeIdNs {
     uint16_t    nabspf;
     uint16_t    noiob;
     uint8_t     nvmcap[16];
-    uint8_t     rsvd64[40];
+    uint16_t    npwg;
+    uint16_t    npwa;
+    uint16_t    npdg;
+    uint16_t    npda;
+    uint16_t    nows;
+    uint16_t    mssrl;
+    uint32_t    mcl;
+    uint8_t     msrc;
+    uint8_t     rsvd81[18];
+    uint8_t     nsattr;
+    uint16_t    nvmsetid;
+    uint16_t    endgid;
     uint8_t     nguid[16];
     uint64_t    eui64;
-    NvmeLBAF    lbaf[16];
-    uint8_t     rsvd192[192];
+    NvmeLBAF    lbaf[NVME_MAX_NLBAF];
     uint8_t     vs[3712];
 } NvmeIdNs;
 
+#define NVME_ID_NS_NVM_ELBAF_PIF(elbaf) (((elbaf) >> 7) & 0x3)
+
+typedef struct QEMU_PACKED NvmeIdNsNvm {
+    uint64_t    lbstm;
+    uint8_t     pic;
+    uint8_t     rsvd9[3];
+    uint32_t    elbaf[NVME_MAX_NLBAF];
+    uint8_t     rsvd268[3828];
+} NvmeIdNsNvm;
+
 typedef struct QEMU_PACKED NvmeIdNsDescr {
     uint8_t nidt;
     uint8_t nidl;
     uint8_t rsvd2[2];
 } NvmeIdNsDescr;
 
-enum {
-    NVME_NIDT_EUI64_LEN =  8,
-    NVME_NIDT_NGUID_LEN = 16,
-    NVME_NIDT_UUID_LEN  = 16,
+enum NvmeNsIdentifierLength {
+    NVME_NIDL_EUI64             = 8,
+    NVME_NIDL_NGUID             = 16,
+    NVME_NIDL_UUID              = 16,
+    NVME_NIDL_CSI               = 1,
 };
 
 enum NvmeNsIdentifierType {
-    NVME_NIDT_EUI64 = 0x1,
-    NVME_NIDT_NGUID = 0x2,
-    NVME_NIDT_UUID  = 0x3,
+    NVME_NIDT_EUI64             = 0x01,
+    NVME_NIDT_NGUID             = 0x02,
+    NVME_NIDT_UUID              = 0x03,
+    NVME_NIDT_CSI               = 0x04,
+};
+
+enum NvmeIdNsNmic {
+    NVME_NMIC_NS_SHARED         = 1 << 0,
+};
+
+enum NvmeCsi {
+    NVME_CSI_NVM                = 0x00,
+    NVME_CSI_ZONED              = 0x02,
+};
+
+#define NVME_SET_CSI(vec, csi) (vec |= (uint8_t)(1 << (csi)))
+
+typedef struct QEMU_PACKED NvmeIdNsZoned {
+    uint16_t    zoc;
+    uint16_t    ozcs;
+    uint32_t    mar;
+    uint32_t    mor;
+    uint32_t    rrl;
+    uint32_t    frl;
+    uint8_t     rsvd12[24];
+    uint32_t    numzrwa;
+    uint16_t    zrwafg;
+    uint16_t    zrwas;
+    uint8_t     zrwacap;
+    uint8_t     rsvd53[2763];
+    NvmeLBAFE   lbafe[16];
+    uint8_t     rsvd3072[768];
+    uint8_t     vs[256];
+} NvmeIdNsZoned;
+
+enum NvmeIdNsZonedOzcs {
+    NVME_ID_NS_ZONED_OZCS_RAZB    = 1 << 0,
+    NVME_ID_NS_ZONED_OZCS_ZRWASUP = 1 << 1,
+};
+
+enum NvmeIdNsZonedZrwacap {
+    NVME_ID_NS_ZONED_ZRWACAP_EXPFLUSHSUP = 1 << 0,
 };
 
 /*Deallocate Logical Block Features*/
@@ -1029,6 +1469,7 @@ enum NvmeNsIdentifierType {
 
 
 #define NVME_ID_NS_NSFEAT_THIN(nsfeat)      ((nsfeat & 0x1))
+#define NVME_ID_NS_NSFEAT_DULBE(nsfeat)     ((nsfeat >> 2) & 0x1)
 #define NVME_ID_NS_FLBAS_EXTENDED(flbas)    ((flbas >> 4) & 0x1)
 #define NVME_ID_NS_FLBAS_INDEX(flbas)       ((flbas & 0xf))
 #define NVME_ID_NS_MC_SEPARATE(mc)          ((mc >> 1) & 0x1)
@@ -1041,20 +1482,358 @@ enum NvmeNsIdentifierType {
 #define NVME_ID_NS_DPC_TYPE_MASK            0x7
 
 enum NvmeIdNsDps {
-    DPS_TYPE_NONE   = 0,
-    DPS_TYPE_1      = 1,
-    DPS_TYPE_2      = 2,
-    DPS_TYPE_3      = 3,
-    DPS_TYPE_MASK   = 0x7,
-    DPS_FIRST_EIGHT = 8,
+    NVME_ID_NS_DPS_TYPE_NONE   = 0,
+    NVME_ID_NS_DPS_TYPE_1      = 1,
+    NVME_ID_NS_DPS_TYPE_2      = 2,
+    NVME_ID_NS_DPS_TYPE_3      = 3,
+    NVME_ID_NS_DPS_TYPE_MASK   = 0x7,
+    NVME_ID_NS_DPS_FIRST_EIGHT = 8,
+};
+
+enum NvmeIdNsFlbas {
+    NVME_ID_NS_FLBAS_EXTENDED = 1 << 4,
+};
+
+enum NvmeIdNsMc {
+    NVME_ID_NS_MC_EXTENDED = 1 << 0,
+    NVME_ID_NS_MC_SEPARATE = 1 << 1,
+};
+
+#define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK)
+
+enum NvmePIFormat {
+    NVME_PI_GUARD_16                 = 0,
+    NVME_PI_GUARD_64                 = 2,
+};
+
+typedef union NvmeDifTuple {
+    struct {
+        uint16_t guard;
+        uint16_t apptag;
+        uint32_t reftag;
+    } g16;
+
+    struct {
+        uint64_t guard;
+        uint16_t apptag;
+        uint8_t  sr[6];
+    } g64;
+} NvmeDifTuple;
+
+enum NvmeZoneAttr {
+    NVME_ZA_FINISHED_BY_CTLR         = 1 << 0,
+    NVME_ZA_FINISH_RECOMMENDED       = 1 << 1,
+    NVME_ZA_RESET_RECOMMENDED        = 1 << 2,
+    NVME_ZA_ZRWA_VALID               = 1 << 3,
+    NVME_ZA_ZD_EXT_VALID             = 1 << 7,
+};
+
+typedef struct QEMU_PACKED NvmeZoneReportHeader {
+    uint64_t    nr_zones;
+    uint8_t     rsvd[56];
+} NvmeZoneReportHeader;
+
+enum NvmeZoneReceiveAction {
+    NVME_ZONE_REPORT                 = 0,
+    NVME_ZONE_REPORT_EXTENDED        = 1,
+};
+
+enum NvmeZoneReportType {
+    NVME_ZONE_REPORT_ALL             = 0,
+    NVME_ZONE_REPORT_EMPTY           = 1,
+    NVME_ZONE_REPORT_IMPLICITLY_OPEN = 2,
+    NVME_ZONE_REPORT_EXPLICITLY_OPEN = 3,
+    NVME_ZONE_REPORT_CLOSED          = 4,
+    NVME_ZONE_REPORT_FULL            = 5,
+    NVME_ZONE_REPORT_READ_ONLY       = 6,
+    NVME_ZONE_REPORT_OFFLINE         = 7,
+};
+
+enum NvmeZoneType {
+    NVME_ZONE_TYPE_RESERVED          = 0x00,
+    NVME_ZONE_TYPE_SEQ_WRITE         = 0x02,
+};
+
+typedef struct QEMU_PACKED NvmeZoneSendCmd {
+    uint8_t     opcode;
+    uint8_t     flags;
+    uint16_t    cid;
+    uint32_t    nsid;
+    uint32_t    rsvd8[4];
+    NvmeCmdDptr dptr;
+    uint64_t    slba;
+    uint32_t    rsvd48;
+    uint8_t     zsa;
+    uint8_t     zsflags;
+    uint8_t     rsvd54[2];
+    uint32_t    rsvd56[2];
+} NvmeZoneSendCmd;
+
+enum NvmeZoneSendAction {
+    NVME_ZONE_ACTION_RSD             = 0x00,
+    NVME_ZONE_ACTION_CLOSE           = 0x01,
+    NVME_ZONE_ACTION_FINISH          = 0x02,
+    NVME_ZONE_ACTION_OPEN            = 0x03,
+    NVME_ZONE_ACTION_RESET           = 0x04,
+    NVME_ZONE_ACTION_OFFLINE         = 0x05,
+    NVME_ZONE_ACTION_SET_ZD_EXT      = 0x10,
+    NVME_ZONE_ACTION_ZRWA_FLUSH      = 0x11,
+};
+
+enum {
+    NVME_ZSFLAG_SELECT_ALL = 1 << 0,
+    NVME_ZSFLAG_ZRWA_ALLOC = 1 << 1,
+};
+
+typedef struct QEMU_PACKED NvmeZoneDescr {
+    uint8_t     zt;
+    uint8_t     zs;
+    uint8_t     za;
+    uint8_t     rsvd3[5];
+    uint64_t    zcap;
+    uint64_t    zslba;
+    uint64_t    wp;
+    uint8_t     rsvd32[32];
+} NvmeZoneDescr;
+
+typedef enum NvmeZoneState {
+    NVME_ZONE_STATE_RESERVED         = 0x00,
+    NVME_ZONE_STATE_EMPTY            = 0x01,
+    NVME_ZONE_STATE_IMPLICITLY_OPEN  = 0x02,
+    NVME_ZONE_STATE_EXPLICITLY_OPEN  = 0x03,
+    NVME_ZONE_STATE_CLOSED           = 0x04,
+    NVME_ZONE_STATE_READ_ONLY        = 0x0d,
+    NVME_ZONE_STATE_FULL             = 0x0e,
+    NVME_ZONE_STATE_OFFLINE          = 0x0f,
+} NvmeZoneState;
+
+typedef struct QEMU_PACKED NvmePriCtrlCap {
+    uint16_t    cntlid;
+    uint16_t    portid;
+    uint8_t     crt;
+    uint8_t     rsvd5[27];
+    uint32_t    vqfrt;
+    uint32_t    vqrfa;
+    uint16_t    vqrfap;
+    uint16_t    vqprt;
+    uint16_t    vqfrsm;
+    uint16_t    vqgran;
+    uint8_t     rsvd48[16];
+    uint32_t    vifrt;
+    uint32_t    virfa;
+    uint16_t    virfap;
+    uint16_t    viprt;
+    uint16_t    vifrsm;
+    uint16_t    vigran;
+    uint8_t     rsvd80[4016];
+} NvmePriCtrlCap;
+
+typedef enum NvmePriCtrlCapCrt {
+    NVME_CRT_VQ             = 1 << 0,
+    NVME_CRT_VI             = 1 << 1,
+} NvmePriCtrlCapCrt;
+
+typedef struct QEMU_PACKED NvmeSecCtrlEntry {
+    uint16_t    scid;
+    uint16_t    pcid;
+    uint8_t     scs;
+    uint8_t     rsvd5[3];
+    uint16_t    vfn;
+    uint16_t    nvq;
+    uint16_t    nvi;
+    uint8_t     rsvd14[18];
+} NvmeSecCtrlEntry;
+
+typedef struct QEMU_PACKED NvmeSecCtrlList {
+    uint8_t             numcntl;
+    uint8_t             rsvd1[31];
+    NvmeSecCtrlEntry    sec[127];
+} NvmeSecCtrlList;
+
+typedef enum NvmeVirtMngmtAction {
+    NVME_VIRT_MNGMT_ACTION_PRM_ALLOC    = 0x01,
+    NVME_VIRT_MNGMT_ACTION_SEC_OFFLINE  = 0x07,
+    NVME_VIRT_MNGMT_ACTION_SEC_ASSIGN   = 0x08,
+    NVME_VIRT_MNGMT_ACTION_SEC_ONLINE   = 0x09,
+} NvmeVirtMngmtAction;
+
+typedef enum NvmeVirtualResourceType {
+    NVME_VIRT_RES_QUEUE         = 0x00,
+    NVME_VIRT_RES_INTERRUPT     = 0x01,
+} NvmeVirtualResourceType;
+
+typedef struct NvmeDirectiveIdentify {
+    uint8_t supported;
+    uint8_t unused1[31];
+    uint8_t enabled;
+    uint8_t unused33[31];
+    uint8_t persistent;
+    uint8_t unused65[31];
+    uint8_t rsvd64[4000];
+} NvmeDirectiveIdentify;
+
+enum NvmeDirectiveTypes {
+    NVME_DIRECTIVE_IDENTIFY       = 0x0,
+    NVME_DIRECTIVE_DATA_PLACEMENT = 0x2,
+};
+
+enum NvmeDirectiveOperations {
+    NVME_DIRECTIVE_RETURN_PARAMS = 0x1,
+};
+
+typedef struct QEMU_PACKED NvmeFdpConfsHdr {
+    uint16_t num_confs;
+    uint8_t  version;
+    uint8_t  rsvd3;
+    uint32_t size;
+    uint8_t  rsvd8[8];
+} NvmeFdpConfsHdr;
+
+REG8(FDPA, 0x0)
+    FIELD(FDPA, RGIF, 0, 4)
+    FIELD(FDPA, VWC, 4, 1)
+    FIELD(FDPA, VALID, 7, 1);
+
+typedef struct QEMU_PACKED NvmeFdpDescrHdr {
+    uint16_t descr_size;
+    uint8_t  fdpa;
+    uint8_t  vss;
+    uint32_t nrg;
+    uint16_t nruh;
+    uint16_t maxpids;
+    uint32_t nnss;
+    uint64_t runs;
+    uint32_t erutl;
+    uint8_t  rsvd28[36];
+} NvmeFdpDescrHdr;
+
+enum NvmeRuhType {
+    NVME_RUHT_INITIALLY_ISOLATED = 1,
+    NVME_RUHT_PERSISTENTLY_ISOLATED = 2,
+};
+
+typedef struct QEMU_PACKED NvmeRuhDescr {
+    uint8_t ruht;
+    uint8_t rsvd1[3];
+} NvmeRuhDescr;
+
+typedef struct QEMU_PACKED NvmeRuhuLog {
+    uint16_t nruh;
+    uint8_t  rsvd2[6];
+} NvmeRuhuLog;
+
+enum NvmeRuhAttributes {
+    NVME_RUHA_UNUSED = 0,
+    NVME_RUHA_HOST = 1,
+    NVME_RUHA_CTRL = 2,
+};
+
+typedef struct QEMU_PACKED NvmeRuhuDescr {
+    uint8_t ruha;
+    uint8_t rsvd1[7];
+} NvmeRuhuDescr;
+
+typedef struct QEMU_PACKED NvmeFdpStatsLog {
+    uint64_t hbmw[2];
+    uint64_t mbmw[2];
+    uint64_t mbe[2];
+    uint8_t  rsvd48[16];
+} NvmeFdpStatsLog;
+
+typedef struct QEMU_PACKED NvmeFdpEventsLog {
+    uint32_t num_events;
+    uint8_t  rsvd4[60];
+} NvmeFdpEventsLog;
+
+enum NvmeFdpEventType {
+    FDP_EVT_RU_NOT_FULLY_WRITTEN = 0x0,
+    FDP_EVT_RU_ATL_EXCEEDED = 0x1,
+    FDP_EVT_CTRL_RESET_RUH = 0x2,
+    FDP_EVT_INVALID_PID = 0x3,
+    FDP_EVT_MEDIA_REALLOC = 0x80,
+    FDP_EVT_RUH_IMPLICIT_RU_CHANGE = 0x81,
+};
+
+enum NvmeFdpEventFlags {
+    FDPEF_PIV = 1 << 0,
+    FDPEF_NSIDV = 1 << 1,
+    FDPEF_LV = 1 << 2,
+};
+
+typedef struct QEMU_PACKED NvmeFdpEvent {
+    uint8_t  type;
+    uint8_t  flags;
+    uint16_t pid;
+    uint64_t timestamp;
+    uint32_t nsid;
+    uint64_t type_specific[2];
+    uint16_t rgid;
+    uint8_t  ruhid;
+    uint8_t  rsvd35[5];
+    uint64_t vendor[3];
+} NvmeFdpEvent;
+
+typedef struct QEMU_PACKED NvmePhidList {
+    uint16_t nnruhd;
+    uint8_t  rsvd2[6];
+} NvmePhidList;
+
+typedef struct QEMU_PACKED NvmePhidDescr {
+    uint8_t  ruht;
+    uint8_t  rsvd1;
+    uint16_t ruhid;
+} NvmePhidDescr;
+
+REG32(FEAT_FDP, 0x0)
+    FIELD(FEAT_FDP, FDPE, 0, 1)
+    FIELD(FEAT_FDP, CONF_NDX, 8, 8);
+
+typedef struct QEMU_PACKED NvmeFdpEventDescr {
+    uint8_t evt;
+    uint8_t evta;
+} NvmeFdpEventDescr;
+
+REG32(NVME_IOMR, 0x0)
+    FIELD(NVME_IOMR, MO, 0, 8)
+    FIELD(NVME_IOMR, MOS, 16, 16);
+
+enum NvmeIomr2Mo {
+    NVME_IOMR_MO_NOP = 0x0,
+    NVME_IOMR_MO_RUH_STATUS = 0x1,
+    NVME_IOMR_MO_VENDOR_SPECIFIC = 0x255,
+};
+
+typedef struct QEMU_PACKED NvmeRuhStatus {
+    uint8_t  rsvd0[14];
+    uint16_t nruhsd;
+} NvmeRuhStatus;
+
+typedef struct QEMU_PACKED NvmeRuhStatusDescr {
+    uint16_t pid;
+    uint16_t ruhid;
+    uint32_t earutr;
+    uint64_t ruamw;
+    uint8_t  rsvd16[16];
+} NvmeRuhStatusDescr;
+
+REG32(NVME_IOMS, 0x0)
+    FIELD(NVME_IOMS, MO, 0, 8)
+    FIELD(NVME_IOMS, MOS, 16, 16);
+
+enum NvmeIoms2Mo {
+    NVME_IOMS_MO_NOP = 0x0,
+    NVME_IOMS_MO_RUH_UPDATE = 0x1,
 };
 
 static inline void _nvme_check_size(void)
 {
     QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096);
     QEMU_BUILD_BUG_ON(sizeof(NvmeAerResult) != 4);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeZonedResult) != 8);
     QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16);
     QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRangeFormat0) != 32);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRangeFormat1) != 40);
     QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64);
     QEMU_BUILD_BUG_ON(sizeof(NvmeDeleteQ) != 64);
     QEMU_BUILD_BUG_ON(sizeof(NvmeCreateCq) != 64);
@@ -1062,13 +1841,29 @@ static inline void _nvme_check_size(void)
     QEMU_BUILD_BUG_ON(sizeof(NvmeIdentify) != 64);
     QEMU_BUILD_BUG_ON(sizeof(NvmeRwCmd) != 64);
     QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeCopyCmd) != 64);
     QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeHostBehaviorSupport) != 512);
     QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
     QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
     QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096);
     QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlZoned) != 4096);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlNvm) != 4096);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16);
     QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsNvm) != 4096);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsZoned) != 4096);
     QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16);
     QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsDescr) != 4);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 16);
+    QEMU_BUILD_BUG_ON(sizeof(NvmePriCtrlCap) != 4096);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlEntry) != 32);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlList) != 4096);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeEndGrpLog) != 512);
+    QEMU_BUILD_BUG_ON(sizeof(NvmeDirectiveIdentify) != 4096);
 }
 #endif
index 22c7807c89a90749eb187859ef6e7e1ed214cf87..b842f19f93e3204586613c9022b77b7f45da1fdd 100644 (file)
 #ifndef BLOCK_QAPI_H
 #define BLOCK_QAPI_H
 
-#include "block/block.h"
+#include "block/block-common.h"
+#include "block/graph-lock.h"
 #include "block/snapshot.h"
+#include "qapi/qapi-types-block-core.h"
 
-BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
-                                        BlockDriverState *bs,
-                                        bool flat,
-                                        Error **errp);
-int bdrv_query_snapshot_info_list(BlockDriverState *bs,
-                                  SnapshotInfoList **p_list,
-                                  Error **errp);
-void bdrv_query_image_info(BlockDriverState *bs,
-                           ImageInfo **p_info,
-                           Error **errp);
+BlockDeviceInfo *coroutine_fn GRAPH_RDLOCK
+bdrv_block_device_info(BlockBackend *blk, BlockDriverState *bs, bool flat,
+                       Error **errp);
+int GRAPH_RDLOCK
+bdrv_query_snapshot_info_list(BlockDriverState *bs,
+                              SnapshotInfoList **p_list,
+                              Error **errp);
+void GRAPH_RDLOCK
+bdrv_query_image_info(BlockDriverState *bs, ImageInfo **p_info, bool flat,
+                      bool skip_implicit_filters, Error **errp);
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_query_block_graph_info(BlockDriverState *bs, BlockGraphInfo **p_info,
+                               Error **errp);
+void co_wrapper_bdrv_rdlock
+bdrv_query_block_graph_info(BlockDriverState *bs, BlockGraphInfo **p_info,
+                            Error **errp);
 
 void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec);
-void bdrv_image_info_dump(ImageInfo *info);
+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
+                                   const char *prefix,
+                                   int indentation);
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol);
 #endif
index d8cb502d7db3d687eb4701804db0562f4ca05b66..b4c28d96a9e5ffe19e030c35e75cd61316152038 100644 (file)
@@ -12,6 +12,9 @@
 
 #include "qapi/qmp/qdict.h"
 
+QObject *qdict_crumple(const QDict *src, Error **errp);
+void qdict_flatten(QDict *qdict);
+
 void qdict_copy_default(QDict *dst, QDict *src, const char *key);
 void qdict_set_default_str(QDict *dst, const char *key, const char *val);
 
@@ -20,8 +23,6 @@ void qdict_join(QDict *dest, QDict *src, bool overwrite);
 void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start);
 void qdict_array_split(QDict *src, QList **dst);
 int qdict_array_entries(QDict *src, const char *subqdict);
-QObject *qdict_crumple(const QDict *src, Error **errp);
-void qdict_flatten(QDict *qdict);
 
 typedef struct QDictRenames {
     const char *from;
index 251b10d273326b6478e5f6622694a0b2afcf80ab..1f2c6784610645cc54e1adb09be2eed0a88b2b3c 100644 (file)
@@ -17,7 +17,6 @@
 #define QEMU_RAW_AIO_H
 
 #include "block/aio.h"
-#include "qemu/coroutine.h"
 #include "qemu/iov.h"
 
 /* AIO request types */
 #define QEMU_AIO_WRITE_ZEROES 0x0020
 #define QEMU_AIO_COPY_RANGE   0x0040
 #define QEMU_AIO_TRUNCATE     0x0080
+#define QEMU_AIO_ZONE_REPORT  0x0100
+#define QEMU_AIO_ZONE_MGMT    0x0200
+#define QEMU_AIO_ZONE_APPEND  0x0400
+#define QEMU_AIO_FSTAT        0x0800
 #define QEMU_AIO_TYPE_MASK \
         (QEMU_AIO_READ | \
          QEMU_AIO_WRITE | \
          QEMU_AIO_DISCARD | \
          QEMU_AIO_WRITE_ZEROES | \
          QEMU_AIO_COPY_RANGE | \
-         QEMU_AIO_TRUNCATE)
+         QEMU_AIO_TRUNCATE | \
+         QEMU_AIO_ZONE_REPORT | \
+         QEMU_AIO_ZONE_MGMT | \
+         QEMU_AIO_ZONE_APPEND | \
+         QEMU_AIO_FSTAT)
 
 /* AIO flags */
 #define QEMU_AIO_MISALIGNED   0x1000
 typedef struct LinuxAioState LinuxAioState;
 LinuxAioState *laio_init(Error **errp);
 void laio_cleanup(LinuxAioState *s);
-int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-                                uint64_t offset, QEMUIOVector *qiov, int type);
+
+/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
+int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
+                                int type, uint64_t dev_max_batch);
+
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
-void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s);
 #endif
 /* io_uring.c - Linux io_uring implementation */
 #ifdef CONFIG_LINUX_IO_URING
 typedef struct LuringState LuringState;
 LuringState *luring_init(Error **errp);
 void luring_cleanup(LuringState *s);
-int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
-                                uint64_t offset, QEMUIOVector *qiov, int type);
+
+/* luring_co_submit: submit I/O requests in the thread's current AioContext. */
+int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
+                                  QEMUIOVector *qiov, int type);
 void luring_detach_aio_context(LuringState *s, AioContext *old_context);
 void luring_attach_aio_context(LuringState *s, AioContext *new_context);
-void luring_io_plug(BlockDriverState *bs, LuringState *s);
-void luring_io_unplug(BlockDriverState *bs, LuringState *s);
 #endif
 
 #ifdef _WIN32
diff --git a/include/block/replication.h b/include/block/replication.h
new file mode 100644 (file)
index 0000000..21931b4
--- /dev/null
@@ -0,0 +1,175 @@
+/*
+ * Replication filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Changlong Xie <xiecl.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef REPLICATION_H
+#define REPLICATION_H
+
+#include "qapi/qapi-types-block-core.h"
+#include "qemu/module.h"
+#include "qemu/queue.h"
+
+typedef struct ReplicationOps ReplicationOps;
+typedef struct ReplicationState ReplicationState;
+
+/**
+ * SECTION:block/replication.h
+ * @title:Base Replication System
+ * @short_description: interfaces for handling replication
+ *
+ * The Replication Model provides a framework for handling Replication
+ *
+ * <example>
+ *   <title>How to use replication interfaces</title>
+ *   <programlisting>
+ * #include "block/replication.h"
+ *
+ * typedef struct BDRVReplicationState {
+ *     ReplicationState *rs;
+ * } BDRVReplicationState;
+ *
+ * static void replication_start(ReplicationState *rs, ReplicationMode mode,
+ *                               Error **errp);
+ * static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
+ * static void replication_get_error(ReplicationState *rs, Error **errp);
+ * static void replication_stop(ReplicationState *rs, bool failover,
+ *                              Error **errp);
+ *
+ * static ReplicationOps replication_ops = {
+ *     .start = replication_start,
+ *     .checkpoint = replication_do_checkpoint,
+ *     .get_error = replication_get_error,
+ *     .stop = replication_stop,
+ * }
+ *
+ * static int replication_open(BlockDriverState *bs, QDict *options,
+ *                             int flags, Error **errp)
+ * {
+ *     BDRVReplicationState *s = bs->opaque;
+ *     s->rs = replication_new(bs, &replication_ops);
+ *     return 0;
+ * }
+ *
+ * static void replication_close(BlockDriverState *bs)
+ * {
+ *     BDRVReplicationState *s = bs->opaque;
+ *     replication_remove(s->rs);
+ * }
+ *
+ * BlockDriver bdrv_replication = {
+ *     .format_name                = "replication",
+ *     .instance_size              = sizeof(BDRVReplicationState),
+ *
+ *     .bdrv_open                  = replication_open,
+ *     .bdrv_close                 = replication_close,
+ * };
+ *
+ * static void bdrv_replication_init(void)
+ * {
+ *     bdrv_register(&bdrv_replication);
+ * }
+ *
+ * block_init(bdrv_replication_init);
+ *   </programlisting>
+ * </example>
+ *
+ * We create an example about how to use replication interfaces in above.
+ * Then in migration, we can use replication_(start/stop/do_checkpoint/
+ * get_error)_all to handle all replication operations.
+ */
+
+/**
+ * ReplicationState:
+ * @opaque: opaque pointer value passed to this ReplicationState
+ * @ops: replication operation of this ReplicationState
+ * @node: node that we will insert into @replication_states QLIST
+ */
+struct ReplicationState {
+    void *opaque;
+    ReplicationOps *ops;
+    QLIST_ENTRY(ReplicationState) node;
+};
+
+/**
+ * ReplicationOps:
+ * @start: callback to start replication
+ * @stop: callback to stop replication
+ * @checkpoint: callback to do checkpoint
+ * @get_error: callback to check if error occurred during replication
+ */
+struct ReplicationOps {
+    void (*start)(ReplicationState *rs, ReplicationMode mode, Error **errp);
+    void (*stop)(ReplicationState *rs, bool failover, Error **errp);
+    void (*checkpoint)(ReplicationState *rs, Error **errp);
+    void (*get_error)(ReplicationState *rs, Error **errp);
+};
+
+/**
+ * replication_new:
+ * @opaque: opaque pointer value passed to ReplicationState
+ * @ops: replication operation of the new relevant ReplicationState
+ *
+ * Called to create a new ReplicationState instance, and then insert it
+ * into @replication_states QLIST
+ *
+ * Returns: the new ReplicationState instance
+ */
+ReplicationState *replication_new(void *opaque, ReplicationOps *ops);
+
+/**
+ * replication_remove:
+ * @rs: the ReplicationState instance to remove
+ *
+ * Called to remove a ReplicationState instance, and then delete it from
+ * @replication_states QLIST
+ */
+void replication_remove(ReplicationState *rs);
+
+/**
+ * replication_start_all:
+ * @mode: replication mode that could be "primary" or "secondary"
+ * @errp: returns an error if this function fails
+ *
+ * Start replication, called in migration/checkpoint thread
+ *
+ * Note: the caller of the function MUST make sure vm stopped
+ */
+void replication_start_all(ReplicationMode mode, Error **errp);
+
+/**
+ * replication_do_checkpoint_all:
+ * @errp: returns an error if this function fails
+ *
+ * This interface is called after all VM state is transferred to Secondary QEMU
+ */
+void replication_do_checkpoint_all(Error **errp);
+
+/**
+ * replication_get_error_all:
+ * @errp: returns an error if this function fails
+ *
+ * This interface is called to check if error occurred during replication
+ */
+void replication_get_error_all(Error **errp);
+
+/**
+ * replication_stop_all:
+ * @failover: boolean value that indicates if we need do failover or not
+ * @errp: returns an error if this function fails
+ *
+ * It is called on failover. The vm should be stopped before calling it, if you
+ * use this API to shutdown the guest, or other things except failover
+ */
+void replication_stop_all(bool failover, Error **errp);
+
+#endif /* REPLICATION_H */
diff --git a/include/block/reqlist.h b/include/block/reqlist.h
new file mode 100644 (file)
index 0000000..5253497
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * reqlist API
+ *
+ * Copyright (C) 2013 Proxmox Server Solutions
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Authors:
+ *  Dietmar Maurer (dietmar@proxmox.com)
+ *  Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef REQLIST_H
+#define REQLIST_H
+
+#include "qemu/coroutine.h"
+
+/*
+ * The API is not thread-safe and shouldn't be. The struct is public to be part
+ * of other structures and protected by third-party locks, see
+ * block/block-copy.c for example.
+ */
+
+typedef struct BlockReq {
+    int64_t offset;
+    int64_t bytes;
+
+    CoQueue wait_queue; /* coroutines blocked on this req */
+    QLIST_ENTRY(BlockReq) list;
+} BlockReq;
+
+typedef QLIST_HEAD(, BlockReq) BlockReqList;
+
+/*
+ * Initialize new request and add it to the list. Caller must be sure that
+ * there are no conflicting requests in the list.
+ */
+void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset,
+                      int64_t bytes);
+/* Search for request in the list intersecting with @offset/@bytes area. */
+BlockReq *reqlist_find_conflict(BlockReqList *reqs, int64_t offset,
+                                int64_t bytes);
+
+/*
+ * If there are no intersecting requests return false. Otherwise, wait for the
+ * first found intersecting request to finish and return true.
+ *
+ * @lock is passed to qemu_co_queue_wait()
+ * False return value proves that lock was released at no point.
+ */
+bool coroutine_fn reqlist_wait_one(BlockReqList *reqs, int64_t offset,
+                                   int64_t bytes, CoMutex *lock);
+
+/*
+ * Wait for all intersecting requests. It just calls reqlist_wait_one() in a
+ * loop, caller is responsible to stop producing new requests in this region
+ * in parallel, otherwise reqlist_wait_all() may never return.
+ */
+void coroutine_fn reqlist_wait_all(BlockReqList *reqs, int64_t offset,
+                                   int64_t bytes, CoMutex *lock);
+
+/*
+ * Shrink request and wake all waiting coroutines (maybe some of them are not
+ * intersecting with shrunk request).
+ */
+void coroutine_fn reqlist_shrink_req(BlockReq *req, int64_t new_bytes);
+
+/*
+ * Remove request and wake all waiting coroutines. Do not release any memory.
+ */
+void coroutine_fn reqlist_remove_req(BlockReq *req);
+
+#endif /* REQLIST_H */
index b0fe42993dccf8d587159cbd6c9824faea6c071e..d49c5599d9bc6ca6916ad3ee218083bb3661c504 100644 (file)
@@ -25,7 +25,8 @@
 #ifndef SNAPSHOT_H
 #define SNAPSHOT_H
 
-
+#include "block/graph-lock.h"
+#include "qapi/qapi-builtin-types.h"
 
 #define SNAPSHOT_OPT_BASE       "snapshot."
 #define SNAPSHOT_OPT_ID         "snapshot.id"
@@ -45,6 +46,13 @@ typedef struct QEMUSnapshotInfo {
     uint64_t icount; /* record/replay step */
 } QEMUSnapshotInfo;
 
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
 int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
                        const char *name);
 bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
@@ -52,16 +60,19 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
                                        const char *name,
                                        QEMUSnapshotInfo *sn_info,
                                        Error **errp);
-int bdrv_can_snapshot(BlockDriverState *bs);
-int bdrv_snapshot_create(BlockDriverState *bs,
-                         QEMUSnapshotInfo *sn_info);
-int bdrv_snapshot_goto(BlockDriverState *bs,
-                       const char *snapshot_id,
-                       Error **errp);
-int bdrv_snapshot_delete(BlockDriverState *bs,
-                         const char *snapshot_id,
-                         const char *name,
-                         Error **errp);
+
+int GRAPH_RDLOCK bdrv_can_snapshot(BlockDriverState *bs);
+
+int GRAPH_RDLOCK
+bdrv_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
+
+int GRAPH_UNLOCKED
+bdrv_snapshot_goto(BlockDriverState *bs, const char *snapshot_id, Error **errp);
+
+int GRAPH_RDLOCK
+bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id,
+                     const char *name, Error **errp);
+
 int bdrv_snapshot_list(BlockDriverState *bs,
                        QEMUSnapshotInfo **psn_info);
 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
@@ -73,21 +84,32 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
                                          Error **errp);
 
 
-/* Group operations. All block drivers are involved.
+/*
+ * Group operations. All block drivers are involved.
  * These functions will properly handle dataplane (take aio_context_acquire
- * when appropriate for appropriate block drivers */
+ * when appropriate for appropriate block drivers
+ */
 
-bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs);
-int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs,
+bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
+                           Error **errp);
+int bdrv_all_delete_snapshot(const char *name,
+                             bool has_devices, strList *devices,
                              Error **errp);
-int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs,
+int bdrv_all_goto_snapshot(const char *name,
+                           bool has_devices, strList *devices,
                            Error **errp);
-int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs);
+int bdrv_all_has_snapshot(const char *name,
+                          bool has_devices, strList *devices,
+                          Error **errp);
 int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
                              BlockDriverState *vm_state_bs,
                              uint64_t vm_state_size,
-                             BlockDriverState **first_bad_bs);
+                             bool has_devices,
+                             strList *devices,
+                             Error **errp);
 
-BlockDriverState *bdrv_all_find_vmstate_bs(void);
+BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs,
+                                           bool has_devices, strList *devices,
+                                           Error **errp);
 
 #endif
index 7dd7d730a004af57dfeeae4c58ac21afe984cde1..948ff5f30c31de452361578dec9fa252817b4a69 100644 (file)
@@ -18,7 +18,9 @@
 #ifndef QEMU_THREAD_POOL_H
 #define QEMU_THREAD_POOL_H
 
-#include "block/block.h"
+#include "block/aio.h"
+
+#define THREAD_POOL_MAX_THREADS_DEFAULT         64
 
 typedef int ThreadPoolFunc(void *opaque);
 
@@ -27,11 +29,15 @@ typedef struct ThreadPool ThreadPool;
 ThreadPool *thread_pool_new(struct AioContext *ctx);
 void thread_pool_free(ThreadPool *pool);
 
-BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
-        ThreadPoolFunc *func, void *arg,
-        BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn thread_pool_submit_co(ThreadPool *pool,
-        ThreadPoolFunc *func, void *arg);
-void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg);
+/*
+ * thread_pool_submit* API: submit I/O requests in the thread's
+ * current AioContext.
+ */
+BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
+                                   BlockCompletionFunc *cb, void *opaque);
+int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
+void thread_pool_submit(ThreadPoolFunc *func, void *arg);
+
+void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx);
 
 #endif
index 8bf7d233fae57b1114aef45530837aa36a75666e..e98f71c4db4fad21716309c46a2c530619b5f657 100644 (file)
@@ -25,6 +25,7 @@
 #ifndef THROTTLE_GROUPS_H
 #define THROTTLE_GROUPS_H
 
+#include "qemu/coroutine.h"
 #include "qemu/throttle.h"
 #include "block/block_int.h"
 #include "qom/object.h"
@@ -37,7 +38,7 @@ typedef struct ThrottleGroupMember {
     AioContext   *aio_context;
     /* throttled_reqs_lock protects the CoQueues for throttled requests.  */
     CoMutex      throttled_reqs_lock;
-    CoQueue      throttled_reqs[2];
+    CoQueue      throttled_reqs[THROTTLE_MAX];
 
     /* Nonzero if the I/O limits are currently being ignored; generally
      * it is zero.  Accessed with atomic operations.
@@ -54,7 +55,7 @@ typedef struct ThrottleGroupMember {
      * throttle_state tells us if I/O limits are configured. */
     ThrottleState *throttle_state;
     ThrottleTimers throttle_timers;
-    unsigned       pending_reqs[2];
+    unsigned       pending_reqs[THROTTLE_MAX];
     QLIST_ENTRY(ThrottleGroupMember) round_robin;
 
 } ThrottleGroupMember;
@@ -77,8 +78,8 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm);
 void throttle_group_restart_tgm(ThrottleGroupMember *tgm);
 
 void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm,
-                                                        unsigned int bytes,
-                                                        bool is_write);
+                                                        int64_t bytes,
+                                                        ThrottleDirection direction);
 void throttle_group_attach_aio_context(ThrottleGroupMember *tgm,
                                        AioContext *new_context);
 void throttle_group_detach_aio_context(ThrottleGroupMember *tgm);
diff --git a/include/block/ufs.h b/include/block/ufs.h
new file mode 100644 (file)
index 0000000..d61598b
--- /dev/null
@@ -0,0 +1,1090 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef BLOCK_UFS_H
+#define BLOCK_UFS_H
+
+#include "hw/registerfields.h"
+
+typedef struct QEMU_PACKED UfsReg {
+    uint32_t cap;
+    uint32_t rsvd0;
+    uint32_t ver;
+    uint32_t rsvd1;
+    uint32_t hcpid;
+    uint32_t hcmid;
+    uint32_t ahit;
+    uint32_t rsvd2;
+    uint32_t is;
+    uint32_t ie;
+    uint32_t rsvd3[2];
+    uint32_t hcs;
+    uint32_t hce;
+    uint32_t uecpa;
+    uint32_t uecdl;
+    uint32_t uecn;
+    uint32_t uect;
+    uint32_t uecdme;
+    uint32_t utriacr;
+    uint32_t utrlba;
+    uint32_t utrlbau;
+    uint32_t utrldbr;
+    uint32_t utrlclr;
+    uint32_t utrlrsr;
+    uint32_t utrlcnr;
+    uint32_t rsvd4[2];
+    uint32_t utmrlba;
+    uint32_t utmrlbau;
+    uint32_t utmrldbr;
+    uint32_t utmrlclr;
+    uint32_t utmrlrsr;
+    uint32_t rsvd5[3];
+    uint32_t uiccmd;
+    uint32_t ucmdarg1;
+    uint32_t ucmdarg2;
+    uint32_t ucmdarg3;
+    uint32_t rsvd6[4];
+    uint32_t rsvd7[4];
+    uint32_t rsvd8[16];
+    uint32_t ccap;
+} UfsReg;
+
+REG32(CAP, offsetof(UfsReg, cap))
+    FIELD(CAP, NUTRS, 0, 5)
+    FIELD(CAP, RTT, 8, 8)
+    FIELD(CAP, NUTMRS, 16, 3)
+    FIELD(CAP, AUTOH8, 23, 1)
+    FIELD(CAP, 64AS, 24, 1)
+    FIELD(CAP, OODDS, 25, 1)
+    FIELD(CAP, UICDMETMS, 26, 1)
+    FIELD(CAP, CS, 28, 1)
+REG32(VER, offsetof(UfsReg, ver))
+REG32(HCPID, offsetof(UfsReg, hcpid))
+REG32(HCMID, offsetof(UfsReg, hcmid))
+REG32(AHIT, offsetof(UfsReg, ahit))
+REG32(IS, offsetof(UfsReg, is))
+    FIELD(IS, UTRCS, 0, 1)
+    FIELD(IS, UDEPRI, 1, 1)
+    FIELD(IS, UE, 2, 1)
+    FIELD(IS, UTMS, 3, 1)
+    FIELD(IS, UPMS, 4, 1)
+    FIELD(IS, UHXS, 5, 1)
+    FIELD(IS, UHES, 6, 1)
+    FIELD(IS, ULLS, 7, 1)
+    FIELD(IS, ULSS, 8, 1)
+    FIELD(IS, UTMRCS, 9, 1)
+    FIELD(IS, UCCS, 10, 1)
+    FIELD(IS, DFES, 11, 1)
+    FIELD(IS, UTPES, 12, 1)
+    FIELD(IS, HCFES, 16, 1)
+    FIELD(IS, SBFES, 17, 1)
+    FIELD(IS, CEFES, 18, 1)
+REG32(IE, offsetof(UfsReg, ie))
+    FIELD(IE, UTRCE, 0, 1)
+    FIELD(IE, UDEPRIE, 1, 1)
+    FIELD(IE, UEE, 2, 1)
+    FIELD(IE, UTMSE, 3, 1)
+    FIELD(IE, UPMSE, 4, 1)
+    FIELD(IE, UHXSE, 5, 1)
+    FIELD(IE, UHESE, 6, 1)
+    FIELD(IE, ULLSE, 7, 1)
+    FIELD(IE, ULSSE, 8, 1)
+    FIELD(IE, UTMRCE, 9, 1)
+    FIELD(IE, UCCE, 10, 1)
+    FIELD(IE, DFEE, 11, 1)
+    FIELD(IE, UTPEE, 12, 1)
+    FIELD(IE, HCFEE, 16, 1)
+    FIELD(IE, SBFEE, 17, 1)
+    FIELD(IE, CEFEE, 18, 1)
+REG32(HCS, offsetof(UfsReg, hcs))
+    FIELD(HCS, DP, 0, 1)
+    FIELD(HCS, UTRLRDY, 1, 1)
+    FIELD(HCS, UTMRLRDY, 2, 1)
+    FIELD(HCS, UCRDY, 3, 1)
+    FIELD(HCS, UPMCRS, 8, 3)
+REG32(HCE, offsetof(UfsReg, hce))
+    FIELD(HCE, HCE, 0, 1)
+    FIELD(HCE, CGE, 1, 1)
+REG32(UECPA, offsetof(UfsReg, uecpa))
+REG32(UECDL, offsetof(UfsReg, uecdl))
+REG32(UECN, offsetof(UfsReg, uecn))
+REG32(UECT, offsetof(UfsReg, uect))
+REG32(UECDME, offsetof(UfsReg, uecdme))
+REG32(UTRIACR, offsetof(UfsReg, utriacr))
+REG32(UTRLBA, offsetof(UfsReg, utrlba))
+    FIELD(UTRLBA, UTRLBA, 10, 22)
+REG32(UTRLBAU, offsetof(UfsReg, utrlbau))
+REG32(UTRLDBR, offsetof(UfsReg, utrldbr))
+REG32(UTRLCLR, offsetof(UfsReg, utrlclr))
+REG32(UTRLRSR, offsetof(UfsReg, utrlrsr))
+REG32(UTRLCNR, offsetof(UfsReg, utrlcnr))
+REG32(UTMRLBA, offsetof(UfsReg, utmrlba))
+    FIELD(UTMRLBA, UTMRLBA, 10, 22)
+REG32(UTMRLBAU, offsetof(UfsReg, utmrlbau))
+REG32(UTMRLDBR, offsetof(UfsReg, utmrldbr))
+REG32(UTMRLCLR, offsetof(UfsReg, utmrlclr))
+REG32(UTMRLRSR, offsetof(UfsReg, utmrlrsr))
+REG32(UICCMD, offsetof(UfsReg, uiccmd))
+REG32(UCMDARG1, offsetof(UfsReg, ucmdarg1))
+REG32(UCMDARG2, offsetof(UfsReg, ucmdarg2))
+REG32(UCMDARG3, offsetof(UfsReg, ucmdarg3))
+REG32(CCAP, offsetof(UfsReg, ccap))
+
+#define UFS_INTR_MASK                                    \
+    ((1 << R_IS_CEFES_SHIFT) | (1 << R_IS_SBFES_SHIFT) | \
+     (1 << R_IS_HCFES_SHIFT) | (1 << R_IS_UTPES_SHIFT) | \
+     (1 << R_IS_DFES_SHIFT) | (1 << R_IS_UCCS_SHIFT) |   \
+     (1 << R_IS_UTMRCS_SHIFT) | (1 << R_IS_ULSS_SHIFT) | \
+     (1 << R_IS_ULLS_SHIFT) | (1 << R_IS_UHES_SHIFT) |   \
+     (1 << R_IS_UHXS_SHIFT) | (1 << R_IS_UPMS_SHIFT) |   \
+     (1 << R_IS_UTMS_SHIFT) | (1 << R_IS_UE_SHIFT) |     \
+     (1 << R_IS_UDEPRI_SHIFT) | (1 << R_IS_UTRCS_SHIFT))
+
+#define UFS_UPIU_HEADER_TRANSACTION_TYPE_SHIFT 24
+#define UFS_UPIU_HEADER_TRANSACTION_TYPE_MASK 0xff
+#define UFS_UPIU_HEADER_TRANSACTION_TYPE(dword0)                       \
+    ((be32_to_cpu(dword0) >> UFS_UPIU_HEADER_TRANSACTION_TYPE_SHIFT) & \
+     UFS_UPIU_HEADER_TRANSACTION_TYPE_MASK)
+
+#define UFS_UPIU_HEADER_QUERY_FUNC_SHIFT 16
+#define UFS_UPIU_HEADER_QUERY_FUNC_MASK 0xff
+#define UFS_UPIU_HEADER_QUERY_FUNC(dword1)                       \
+    ((be32_to_cpu(dword1) >> UFS_UPIU_HEADER_QUERY_FUNC_SHIFT) & \
+     UFS_UPIU_HEADER_QUERY_FUNC_MASK)
+
+#define UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_SHIFT 0
+#define UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_MASK 0xffff
+#define UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH(dword2)                       \
+    ((be32_to_cpu(dword2) >> UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_SHIFT) & \
+     UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_MASK)
+
+typedef struct QEMU_PACKED DeviceDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint8_t device;
+    uint8_t device_class;
+    uint8_t device_sub_class;
+    uint8_t protocol;
+    uint8_t number_lu;
+    uint8_t number_wlu;
+    uint8_t boot_enable;
+    uint8_t descr_access_en;
+    uint8_t init_power_mode;
+    uint8_t high_priority_lun;
+    uint8_t secure_removal_type;
+    uint8_t security_lu;
+    uint8_t background_ops_term_lat;
+    uint8_t init_active_icc_level;
+    uint16_t spec_version;
+    uint16_t manufacture_date;
+    uint8_t manufacturer_name;
+    uint8_t product_name;
+    uint8_t serial_number;
+    uint8_t oem_id;
+    uint16_t manufacturer_id;
+    uint8_t ud_0_base_offset;
+    uint8_t ud_config_p_length;
+    uint8_t device_rtt_cap;
+    uint16_t periodic_rtc_update;
+    uint8_t ufs_features_support;
+    uint8_t ffu_timeout;
+    uint8_t queue_depth;
+    uint16_t device_version;
+    uint8_t num_secure_wp_area;
+    uint32_t psa_max_data_size;
+    uint8_t psa_state_timeout;
+    uint8_t product_revision_level;
+    uint8_t reserved[36];
+    uint32_t extended_ufs_features_support;
+    uint8_t write_booster_buffer_preserve_user_space_en;
+    uint8_t write_booster_buffer_type;
+    uint32_t num_shared_write_booster_buffer_alloc_units;
+} DeviceDescriptor;
+
+typedef struct QEMU_PACKED GeometryDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint8_t media_technology;
+    uint8_t reserved;
+    uint64_t total_raw_device_capacity;
+    uint8_t max_number_lu;
+    uint32_t segment_size;
+    uint8_t allocation_unit_size;
+    uint8_t min_addr_block_size;
+    uint8_t optimal_read_block_size;
+    uint8_t optimal_write_block_size;
+    uint8_t max_in_buffer_size;
+    uint8_t max_out_buffer_size;
+    uint8_t rpmb_read_write_size;
+    uint8_t dynamic_capacity_resource_policy;
+    uint8_t data_ordering;
+    uint8_t max_context_id_number;
+    uint8_t sys_data_tag_unit_size;
+    uint8_t sys_data_tag_res_size;
+    uint8_t supported_sec_r_types;
+    uint16_t supported_memory_types;
+    uint32_t system_code_max_n_alloc_u;
+    uint16_t system_code_cap_adj_fac;
+    uint32_t non_persist_max_n_alloc_u;
+    uint16_t non_persist_cap_adj_fac;
+    uint32_t enhanced_1_max_n_alloc_u;
+    uint16_t enhanced_1_cap_adj_fac;
+    uint32_t enhanced_2_max_n_alloc_u;
+    uint16_t enhanced_2_cap_adj_fac;
+    uint32_t enhanced_3_max_n_alloc_u;
+    uint16_t enhanced_3_cap_adj_fac;
+    uint32_t enhanced_4_max_n_alloc_u;
+    uint16_t enhanced_4_cap_adj_fac;
+    uint32_t optimal_logical_block_size;
+    uint8_t reserved2[7];
+    uint32_t write_booster_buffer_max_n_alloc_units;
+    uint8_t device_max_write_booster_l_us;
+    uint8_t write_booster_buffer_cap_adj_fac;
+    uint8_t supported_write_booster_buffer_user_space_reduction_types;
+    uint8_t supported_write_booster_buffer_types;
+} GeometryDescriptor;
+
+#define UFS_GEOMETRY_CAPACITY_SHIFT 9
+
+typedef struct QEMU_PACKED UnitDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint8_t unit_index;
+    uint8_t lu_enable;
+    uint8_t boot_lun_id;
+    uint8_t lu_write_protect;
+    uint8_t lu_queue_depth;
+    uint8_t psa_sensitive;
+    uint8_t memory_type;
+    uint8_t data_reliability;
+    uint8_t logical_block_size;
+    uint64_t logical_block_count;
+    uint32_t erase_block_size;
+    uint8_t provisioning_type;
+    uint64_t phy_mem_resource_count;
+    uint16_t context_capabilities;
+    uint8_t large_unit_granularity_m1;
+    uint8_t reserved[6];
+    uint32_t lu_num_write_booster_buffer_alloc_units;
+} UnitDescriptor;
+
+typedef struct QEMU_PACKED RpmbUnitDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint8_t unit_index;
+    uint8_t lu_enable;
+    uint8_t boot_lun_id;
+    uint8_t lu_write_protect;
+    uint8_t lu_queue_depth;
+    uint8_t psa_sensitive;
+    uint8_t memory_type;
+    uint8_t reserved;
+    uint8_t logical_block_size;
+    uint64_t logical_block_count;
+    uint32_t erase_block_size;
+    uint8_t provisioning_type;
+    uint64_t phy_mem_resource_count;
+    uint8_t reserved2[3];
+} RpmbUnitDescriptor;
+
+typedef struct QEMU_PACKED PowerParametersDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint16_t active_icc_levels_vcc[16];
+    uint16_t active_icc_levels_vccq[16];
+    uint16_t active_icc_levels_vccq_2[16];
+} PowerParametersDescriptor;
+
+typedef struct QEMU_PACKED InterconnectDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint16_t bcd_unipro_version;
+    uint16_t bcd_mphy_version;
+} InterconnectDescriptor;
+
+typedef struct QEMU_PACKED StringDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint16_t UC[126];
+} StringDescriptor;
+
+typedef struct QEMU_PACKED DeviceHealthDescriptor {
+    uint8_t length;
+    uint8_t descriptor_idn;
+    uint8_t pre_eol_info;
+    uint8_t device_life_time_est_a;
+    uint8_t device_life_time_est_b;
+    uint8_t vendor_prop_info[32];
+    uint32_t refresh_total_count;
+    uint32_t refresh_progress;
+} DeviceHealthDescriptor;
+
+typedef struct QEMU_PACKED Flags {
+    uint8_t reserved;
+    uint8_t device_init;
+    uint8_t permanent_wp_en;
+    uint8_t power_on_wp_en;
+    uint8_t background_ops_en;
+    uint8_t device_life_span_mode_en;
+    uint8_t purge_enable;
+    uint8_t refresh_enable;
+    uint8_t phy_resource_removal;
+    uint8_t busy_rtc;
+    uint8_t reserved2;
+    uint8_t permanently_disable_fw_update;
+    uint8_t reserved3[2];
+    uint8_t wb_en;
+    uint8_t wb_buffer_flush_en;
+    uint8_t wb_buffer_flush_during_hibernate;
+    uint8_t reserved4[2];
+} Flags;
+
+typedef struct Attributes {
+    uint8_t boot_lun_en;
+    uint8_t reserved;
+    uint8_t current_power_mode;
+    uint8_t active_icc_level;
+    uint8_t out_of_order_data_en;
+    uint8_t background_op_status;
+    uint8_t purge_status;
+    uint8_t max_data_in_size;
+    uint8_t max_data_out_size;
+    uint32_t dyn_cap_needed;
+    uint8_t ref_clk_freq;
+    uint8_t config_descr_lock;
+    uint8_t max_num_of_rtt;
+    uint16_t exception_event_control;
+    uint16_t exception_event_status;
+    uint32_t seconds_passed;
+    uint16_t context_conf;
+    uint8_t device_ffu_status;
+    uint8_t psa_state;
+    uint32_t psa_data_size;
+    uint8_t ref_clk_gating_wait_time;
+    uint8_t device_case_rough_temperaure;
+    uint8_t device_too_high_temp_boundary;
+    uint8_t device_too_low_temp_boundary;
+    uint8_t throttling_status;
+    uint8_t wb_buffer_flush_status;
+    uint8_t available_wb_buffer_size;
+    uint8_t wb_buffer_life_time_est;
+    uint32_t current_wb_buffer_size;
+    uint8_t refresh_status;
+    uint8_t refresh_freq;
+    uint8_t refresh_unit;
+    uint8_t refresh_method;
+} Attributes;
+
+#define UFS_TRANSACTION_SPECIFIC_FIELD_SIZE 20
+#define UFS_MAX_QUERY_DATA_SIZE 256
+
+/* Command response result code */
+typedef enum CommandRespCode {
+    UFS_COMMAND_RESULT_SUCCESS = 0x00,
+    UFS_COMMAND_RESULT_FAIL = 0x01,
+} CommandRespCode;
+
+enum {
+    UFS_UPIU_FLAG_UNDERFLOW = 0x20,
+    UFS_UPIU_FLAG_OVERFLOW = 0x40,
+};
+
+typedef struct QEMU_PACKED UtpUpiuHeader {
+    uint8_t trans_type;
+    uint8_t flags;
+    uint8_t lun;
+    uint8_t task_tag;
+    uint8_t iid_cmd_set_type;
+    uint8_t query_func;
+    uint8_t response;
+    uint8_t scsi_status;
+    uint8_t ehs_len;
+    uint8_t device_inf;
+    uint16_t data_segment_length;
+} UtpUpiuHeader;
+
+/*
+ * The code below is copied from the linux kernel
+ * ("include/uapi/scsi/scsi_bsg_ufs.h") and modified to fit the qemu style.
+ */
+
+typedef struct QEMU_PACKED UtpUpiuQuery {
+    uint8_t opcode;
+    uint8_t idn;
+    uint8_t index;
+    uint8_t selector;
+    uint16_t reserved_osf;
+    uint16_t length;
+    uint32_t value;
+    uint32_t reserved[2];
+    /* EHS length should be 0. We don't have to worry about EHS area. */
+    uint8_t data[UFS_MAX_QUERY_DATA_SIZE];
+} UtpUpiuQuery;
+
+#define UFS_CDB_SIZE 16
+
+/*
+ * struct UtpUpiuCmd - Command UPIU structure
+ * @data_transfer_len: Data Transfer Length DW-3
+ * @cdb: Command Descriptor Block CDB DW-4 to DW-7
+ */
+typedef struct QEMU_PACKED UtpUpiuCmd {
+    uint32_t exp_data_transfer_len;
+    uint8_t cdb[UFS_CDB_SIZE];
+} UtpUpiuCmd;
+
+/*
+ * struct UtpUpiuReq - general upiu request structure
+ * @header:UPIU header structure DW-0 to DW-2
+ * @sc: fields structure for scsi command DW-3 to DW-7
+ * @qr: fields structure for query request DW-3 to DW-7
+ * @uc: use utp_upiu_query to host the 4 dwords of uic command
+ */
+typedef struct QEMU_PACKED UtpUpiuReq {
+    UtpUpiuHeader header;
+    union {
+        UtpUpiuCmd sc;
+        UtpUpiuQuery qr;
+    };
+} UtpUpiuReq;
+
+/*
+ * The code below is copied from the linux kernel ("include/ufs/ufshci.h") and
+ * modified to fit the qemu style.
+ */
+
+enum {
+    UFS_PWR_OK = 0x0,
+    UFS_PWR_LOCAL = 0x01,
+    UFS_PWR_REMOTE = 0x02,
+    UFS_PWR_BUSY = 0x03,
+    UFS_PWR_ERROR_CAP = 0x04,
+    UFS_PWR_FATAL_ERROR = 0x05,
+};
+
+/* UIC Commands */
+enum uic_cmd_dme {
+    UFS_UIC_CMD_DME_GET = 0x01,
+    UFS_UIC_CMD_DME_SET = 0x02,
+    UFS_UIC_CMD_DME_PEER_GET = 0x03,
+    UFS_UIC_CMD_DME_PEER_SET = 0x04,
+    UFS_UIC_CMD_DME_POWERON = 0x10,
+    UFS_UIC_CMD_DME_POWEROFF = 0x11,
+    UFS_UIC_CMD_DME_ENABLE = 0x12,
+    UFS_UIC_CMD_DME_RESET = 0x14,
+    UFS_UIC_CMD_DME_END_PT_RST = 0x15,
+    UFS_UIC_CMD_DME_LINK_STARTUP = 0x16,
+    UFS_UIC_CMD_DME_HIBER_ENTER = 0x17,
+    UFS_UIC_CMD_DME_HIBER_EXIT = 0x18,
+    UFS_UIC_CMD_DME_TEST_MODE = 0x1A,
+};
+
+/* UIC Config result code / Generic error code */
+enum {
+    UFS_UIC_CMD_RESULT_SUCCESS = 0x00,
+    UFS_UIC_CMD_RESULT_INVALID_ATTR = 0x01,
+    UFS_UIC_CMD_RESULT_FAILURE = 0x01,
+    UFS_UIC_CMD_RESULT_INVALID_ATTR_VALUE = 0x02,
+    UFS_UIC_CMD_RESULT_READ_ONLY_ATTR = 0x03,
+    UFS_UIC_CMD_RESULT_WRITE_ONLY_ATTR = 0x04,
+    UFS_UIC_CMD_RESULT_BAD_INDEX = 0x05,
+    UFS_UIC_CMD_RESULT_LOCKED_ATTR = 0x06,
+    UFS_UIC_CMD_RESULT_BAD_TEST_FEATURE_INDEX = 0x07,
+    UFS_UIC_CMD_RESULT_PEER_COMM_FAILURE = 0x08,
+    UFS_UIC_CMD_RESULT_BUSY = 0x09,
+    UFS_UIC_CMD_RESULT_DME_FAILURE = 0x0A,
+};
+
+#define UFS_MASK_UIC_COMMAND_RESULT 0xFF
+
+/*
+ * Request Descriptor Definitions
+ */
+
+/* Transfer request command type */
+enum {
+    UFS_UTP_CMD_TYPE_SCSI = 0x0,
+    UFS_UTP_CMD_TYPE_UFS = 0x1,
+    UFS_UTP_CMD_TYPE_DEV_MANAGE = 0x2,
+};
+
+/* To accommodate UFS2.0 required Command type */
+enum {
+    UFS_UTP_CMD_TYPE_UFS_STORAGE = 0x1,
+};
+
+enum {
+    UFS_UTP_SCSI_COMMAND = 0x00000000,
+    UFS_UTP_NATIVE_UFS_COMMAND = 0x10000000,
+    UFS_UTP_DEVICE_MANAGEMENT_FUNCTION = 0x20000000,
+    UFS_UTP_REQ_DESC_INT_CMD = 0x01000000,
+    UFS_UTP_REQ_DESC_CRYPTO_ENABLE_CMD = 0x00800000,
+};
+
+/* UTP Transfer Request Data Direction (DD) */
+enum {
+    UFS_UTP_NO_DATA_TRANSFER = 0x00000000,
+    UFS_UTP_HOST_TO_DEVICE = 0x02000000,
+    UFS_UTP_DEVICE_TO_HOST = 0x04000000,
+};
+
+/* Overall command status values */
+enum UtpOcsCodes {
+    UFS_OCS_SUCCESS = 0x0,
+    UFS_OCS_INVALID_CMD_TABLE_ATTR = 0x1,
+    UFS_OCS_INVALID_PRDT_ATTR = 0x2,
+    UFS_OCS_MISMATCH_DATA_BUF_SIZE = 0x3,
+    UFS_OCS_MISMATCH_RESP_UPIU_SIZE = 0x4,
+    UFS_OCS_PEER_COMM_FAILURE = 0x5,
+    UFS_OCS_ABORTED = 0x6,
+    UFS_OCS_FATAL_ERROR = 0x7,
+    UFS_OCS_DEVICE_FATAL_ERROR = 0x8,
+    UFS_OCS_INVALID_CRYPTO_CONFIG = 0x9,
+    UFS_OCS_GENERAL_CRYPTO_ERROR = 0xa,
+    UFS_OCS_INVALID_COMMAND_STATUS = 0xf,
+};
+
+enum {
+    UFS_MASK_OCS = 0x0F,
+};
+
+/*
+ * struct UfshcdSgEntry - UFSHCI PRD Entry
+ * @addr: Physical address; DW-0 and DW-1.
+ * @reserved: Reserved for future use DW-2
+ * @size: size of physical segment DW-3
+ */
+typedef struct QEMU_PACKED UfshcdSgEntry {
+    uint64_t addr;
+    uint32_t reserved;
+    uint32_t size;
+    /*
+     * followed by variant-specific fields if
+     * CONFIG_SCSI_UFS_VARIABLE_SG_ENTRY_SIZE has been defined.
+     */
+} UfshcdSgEntry;
+
+/*
+ * struct RequestDescHeader - Descriptor Header common to both UTRD and UTMRD
+ * @dword0: Descriptor Header DW0
+ * @dword1: Descriptor Header DW1
+ * @dword2: Descriptor Header DW2
+ * @dword3: Descriptor Header DW3
+ */
+typedef struct QEMU_PACKED RequestDescHeader {
+    uint32_t dword_0;
+    uint32_t dword_1;
+    uint32_t dword_2;
+    uint32_t dword_3;
+} RequestDescHeader;
+
+/*
+ * struct UtpTransferReqDesc - UTP Transfer Request Descriptor (UTRD)
+ * @header: UTRD header DW-0 to DW-3
+ * @command_desc_base_addr_lo: UCD base address low DW-4
+ * @command_desc_base_addr_hi: UCD base address high DW-5
+ * @response_upiu_length: response UPIU length DW-6
+ * @response_upiu_offset: response UPIU offset DW-6
+ * @prd_table_length: Physical region descriptor length DW-7
+ * @prd_table_offset: Physical region descriptor offset DW-7
+ */
+typedef struct QEMU_PACKED UtpTransferReqDesc {
+    /* DW 0-3 */
+    RequestDescHeader header;
+
+    /* DW 4-5*/
+    uint32_t command_desc_base_addr_lo;
+    uint32_t command_desc_base_addr_hi;
+
+    /* DW 6 */
+    uint16_t response_upiu_length;
+    uint16_t response_upiu_offset;
+
+    /* DW 7 */
+    uint16_t prd_table_length;
+    uint16_t prd_table_offset;
+} UtpTransferReqDesc;
+
+/*
+ * UTMRD structure.
+ */
+typedef struct QEMU_PACKED UtpTaskReqDesc {
+    /* DW 0-3 */
+    RequestDescHeader header;
+
+    /* DW 4-11 - Task request UPIU structure */
+    struct {
+        UtpUpiuHeader req_header;
+        uint32_t input_param1;
+        uint32_t input_param2;
+        uint32_t input_param3;
+        uint32_t reserved1[2];
+    } upiu_req;
+
+    /* DW 12-19 - Task Management Response UPIU structure */
+    struct {
+        UtpUpiuHeader rsp_header;
+        uint32_t output_param1;
+        uint32_t output_param2;
+        uint32_t reserved2[3];
+    } upiu_rsp;
+} UtpTaskReqDesc;
+
+/*
+ * The code below is copied from the linux kernel ("include/ufs/ufs.h") and
+ * modified to fit the qemu style.
+ */
+
+#define UFS_GENERAL_UPIU_REQUEST_SIZE (sizeof(UtpUpiuReq))
+#define UFS_QUERY_DESC_MAX_SIZE 255
+#define UFS_QUERY_DESC_MIN_SIZE 2
+#define UFS_QUERY_DESC_HDR_SIZE 2
+#define UFS_QUERY_OSF_SIZE (GENERAL_UPIU_REQUEST_SIZE - (sizeof(UtpUpiuHeader)))
+#define UFS_SENSE_SIZE 18
+
+/*
+ * UFS device may have standard LUs and LUN id could be from 0x00 to
+ * 0x7F. Standard LUs use "Peripheral Device Addressing Format".
+ * UFS device may also have the Well Known LUs (also referred as W-LU)
+ * which again could be from 0x00 to 0x7F. For W-LUs, device only use
+ * the "Extended Addressing Format" which means the W-LUNs would be
+ * from 0xc100 (SCSI_W_LUN_BASE) onwards.
+ * This means max. LUN number reported from UFS device could be 0xC17F.
+ */
+#define UFS_UPIU_MAX_UNIT_NUM_ID 0x7F
+#define UFS_UPIU_WLUN_ID (1 << 7)
+
+/* WriteBooster buffer is available only for the logical unit from 0 to 7 */
+#define UFS_UPIU_MAX_WB_LUN_ID 8
+
+/*
+ * WriteBooster buffer lifetime has a limit set by vendor.
+ * If it is over the limit, WriteBooster feature will be disabled.
+ */
+#define UFS_WB_EXCEED_LIFETIME 0x0B
+
+/*
+ * In UFS Spec, the Extra Header Segment (EHS) starts from byte 32 in UPIU
+ * request/response packet
+ */
+#define UFS_EHS_OFFSET_IN_RESPONSE 32
+
+/* Well known logical unit id in LUN field of UPIU */
+enum {
+    UFS_UPIU_REPORT_LUNS_WLUN = 0x81,
+    UFS_UPIU_UFS_DEVICE_WLUN = 0xD0,
+    UFS_UPIU_BOOT_WLUN = 0xB0,
+    UFS_UPIU_RPMB_WLUN = 0xC4,
+};
+
+/*
+ * UFS Protocol Information Unit related definitions
+ */
+
+/* Task management functions */
+enum {
+    UFS_ABORT_TASK = 0x01,
+    UFS_ABORT_TASK_SET = 0x02,
+    UFS_CLEAR_TASK_SET = 0x04,
+    UFS_LOGICAL_RESET = 0x08,
+    UFS_QUERY_TASK = 0x80,
+    UFS_QUERY_TASK_SET = 0x81,
+};
+
+/* UTP UPIU Transaction Codes Initiator to Target */
+enum {
+    UFS_UPIU_TRANSACTION_NOP_OUT = 0x00,
+    UFS_UPIU_TRANSACTION_COMMAND = 0x01,
+    UFS_UPIU_TRANSACTION_DATA_OUT = 0x02,
+    UFS_UPIU_TRANSACTION_TASK_REQ = 0x04,
+    UFS_UPIU_TRANSACTION_QUERY_REQ = 0x16,
+};
+
+/* UTP UPIU Transaction Codes Target to Initiator */
+enum {
+    UFS_UPIU_TRANSACTION_NOP_IN = 0x20,
+    UFS_UPIU_TRANSACTION_RESPONSE = 0x21,
+    UFS_UPIU_TRANSACTION_DATA_IN = 0x22,
+    UFS_UPIU_TRANSACTION_TASK_RSP = 0x24,
+    UFS_UPIU_TRANSACTION_READY_XFER = 0x31,
+    UFS_UPIU_TRANSACTION_QUERY_RSP = 0x36,
+    UFS_UPIU_TRANSACTION_REJECT_UPIU = 0x3F,
+};
+
+/* UPIU Read/Write flags */
+enum {
+    UFS_UPIU_CMD_FLAGS_NONE = 0x00,
+    UFS_UPIU_CMD_FLAGS_WRITE = 0x20,
+    UFS_UPIU_CMD_FLAGS_READ = 0x40,
+};
+
+/* UPIU Task Attributes */
+enum {
+    UFS_UPIU_TASK_ATTR_SIMPLE = 0x00,
+    UFS_UPIU_TASK_ATTR_ORDERED = 0x01,
+    UFS_UPIU_TASK_ATTR_HEADQ = 0x02,
+    UFS_UPIU_TASK_ATTR_ACA = 0x03,
+};
+
+/* UPIU Query request function */
+enum {
+    UFS_UPIU_QUERY_FUNC_STANDARD_READ_REQUEST = 0x01,
+    UFS_UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST = 0x81,
+};
+
+/* Flag idn for Query Requests*/
+enum flag_idn {
+    UFS_QUERY_FLAG_IDN_FDEVICEINIT = 0x01,
+    UFS_QUERY_FLAG_IDN_PERMANENT_WPE = 0x02,
+    UFS_QUERY_FLAG_IDN_PWR_ON_WPE = 0x03,
+    UFS_QUERY_FLAG_IDN_BKOPS_EN = 0x04,
+    UFS_QUERY_FLAG_IDN_LIFE_SPAN_MODE_ENABLE = 0x05,
+    UFS_QUERY_FLAG_IDN_PURGE_ENABLE = 0x06,
+    UFS_QUERY_FLAG_IDN_REFRESH_ENABLE = 0x07,
+    UFS_QUERY_FLAG_IDN_FPHYRESOURCEREMOVAL = 0x08,
+    UFS_QUERY_FLAG_IDN_BUSY_RTC = 0x09,
+    UFS_QUERY_FLAG_IDN_RESERVED3 = 0x0A,
+    UFS_QUERY_FLAG_IDN_PERMANENTLY_DISABLE_FW_UPDATE = 0x0B,
+    UFS_QUERY_FLAG_IDN_WB_EN = 0x0E,
+    UFS_QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN = 0x0F,
+    UFS_QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8 = 0x10,
+    UFS_QUERY_FLAG_IDN_HPB_RESET = 0x11,
+    UFS_QUERY_FLAG_IDN_HPB_EN = 0x12,
+    UFS_QUERY_FLAG_IDN_COUNT,
+};
+
+/* Attribute idn for Query requests */
+enum attr_idn {
+    UFS_QUERY_ATTR_IDN_BOOT_LU_EN = 0x00,
+    UFS_QUERY_ATTR_IDN_MAX_HPB_SINGLE_CMD = 0x01,
+    UFS_QUERY_ATTR_IDN_POWER_MODE = 0x02,
+    UFS_QUERY_ATTR_IDN_ACTIVE_ICC_LVL = 0x03,
+    UFS_QUERY_ATTR_IDN_OOO_DATA_EN = 0x04,
+    UFS_QUERY_ATTR_IDN_BKOPS_STATUS = 0x05,
+    UFS_QUERY_ATTR_IDN_PURGE_STATUS = 0x06,
+    UFS_QUERY_ATTR_IDN_MAX_DATA_IN = 0x07,
+    UFS_QUERY_ATTR_IDN_MAX_DATA_OUT = 0x08,
+    UFS_QUERY_ATTR_IDN_DYN_CAP_NEEDED = 0x09,
+    UFS_QUERY_ATTR_IDN_REF_CLK_FREQ = 0x0A,
+    UFS_QUERY_ATTR_IDN_CONF_DESC_LOCK = 0x0B,
+    UFS_QUERY_ATTR_IDN_MAX_NUM_OF_RTT = 0x0C,
+    UFS_QUERY_ATTR_IDN_EE_CONTROL = 0x0D,
+    UFS_QUERY_ATTR_IDN_EE_STATUS = 0x0E,
+    UFS_QUERY_ATTR_IDN_SECONDS_PASSED = 0x0F,
+    UFS_QUERY_ATTR_IDN_CNTX_CONF = 0x10,
+    UFS_QUERY_ATTR_IDN_CORR_PRG_BLK_NUM = 0x11,
+    UFS_QUERY_ATTR_IDN_RESERVED2 = 0x12,
+    UFS_QUERY_ATTR_IDN_RESERVED3 = 0x13,
+    UFS_QUERY_ATTR_IDN_FFU_STATUS = 0x14,
+    UFS_QUERY_ATTR_IDN_PSA_STATE = 0x15,
+    UFS_QUERY_ATTR_IDN_PSA_DATA_SIZE = 0x16,
+    UFS_QUERY_ATTR_IDN_REF_CLK_GATING_WAIT_TIME = 0x17,
+    UFS_QUERY_ATTR_IDN_CASE_ROUGH_TEMP = 0x18,
+    UFS_QUERY_ATTR_IDN_HIGH_TEMP_BOUND = 0x19,
+    UFS_QUERY_ATTR_IDN_LOW_TEMP_BOUND = 0x1A,
+    UFS_QUERY_ATTR_IDN_THROTTLING_STATUS = 0x1B,
+    UFS_QUERY_ATTR_IDN_WB_FLUSH_STATUS = 0x1C,
+    UFS_QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE = 0x1D,
+    UFS_QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST = 0x1E,
+    UFS_QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE = 0x1F,
+    UFS_QUERY_ATTR_IDN_REFRESH_STATUS = 0x2C,
+    UFS_QUERY_ATTR_IDN_REFRESH_FREQ = 0x2D,
+    UFS_QUERY_ATTR_IDN_REFRESH_UNIT = 0x2E,
+    UFS_QUERY_ATTR_IDN_COUNT,
+};
+
+/* Descriptor idn for Query requests */
+enum desc_idn {
+    UFS_QUERY_DESC_IDN_DEVICE = 0x0,
+    UFS_QUERY_DESC_IDN_CONFIGURATION = 0x1,
+    UFS_QUERY_DESC_IDN_UNIT = 0x2,
+    UFS_QUERY_DESC_IDN_RFU_0 = 0x3,
+    UFS_QUERY_DESC_IDN_INTERCONNECT = 0x4,
+    UFS_QUERY_DESC_IDN_STRING = 0x5,
+    UFS_QUERY_DESC_IDN_RFU_1 = 0x6,
+    UFS_QUERY_DESC_IDN_GEOMETRY = 0x7,
+    UFS_QUERY_DESC_IDN_POWER = 0x8,
+    UFS_QUERY_DESC_IDN_HEALTH = 0x9,
+    UFS_QUERY_DESC_IDN_MAX,
+};
+
+enum desc_header_offset {
+    UFS_QUERY_DESC_LENGTH_OFFSET = 0x00,
+    UFS_QUERY_DESC_DESC_TYPE_OFFSET = 0x01,
+};
+
+/* Unit descriptor parameters offsets in bytes*/
+enum unit_desc_param {
+    UFS_UNIT_DESC_PARAM_LEN = 0x0,
+    UFS_UNIT_DESC_PARAM_TYPE = 0x1,
+    UFS_UNIT_DESC_PARAM_UNIT_INDEX = 0x2,
+    UFS_UNIT_DESC_PARAM_LU_ENABLE = 0x3,
+    UFS_UNIT_DESC_PARAM_BOOT_LUN_ID = 0x4,
+    UFS_UNIT_DESC_PARAM_LU_WR_PROTECT = 0x5,
+    UFS_UNIT_DESC_PARAM_LU_Q_DEPTH = 0x6,
+    UFS_UNIT_DESC_PARAM_PSA_SENSITIVE = 0x7,
+    UFS_UNIT_DESC_PARAM_MEM_TYPE = 0x8,
+    UFS_UNIT_DESC_PARAM_DATA_RELIABILITY = 0x9,
+    UFS_UNIT_DESC_PARAM_LOGICAL_BLK_SIZE = 0xA,
+    UFS_UNIT_DESC_PARAM_LOGICAL_BLK_COUNT = 0xB,
+    UFS_UNIT_DESC_PARAM_ERASE_BLK_SIZE = 0x13,
+    UFS_UNIT_DESC_PARAM_PROVISIONING_TYPE = 0x17,
+    UFS_UNIT_DESC_PARAM_PHY_MEM_RSRC_CNT = 0x18,
+    UFS_UNIT_DESC_PARAM_CTX_CAPABILITIES = 0x20,
+    UFS_UNIT_DESC_PARAM_LARGE_UNIT_SIZE_M1 = 0x22,
+    UFS_UNIT_DESC_PARAM_HPB_LU_MAX_ACTIVE_RGNS = 0x23,
+    UFS_UNIT_DESC_PARAM_HPB_PIN_RGN_START_OFF = 0x25,
+    UFS_UNIT_DESC_PARAM_HPB_NUM_PIN_RGNS = 0x27,
+    UFS_UNIT_DESC_PARAM_WB_BUF_ALLOC_UNITS = 0x29,
+};
+
+/* RPMB Unit descriptor parameters offsets in bytes*/
+enum rpmb_unit_desc_param {
+    UFS_RPMB_UNIT_DESC_PARAM_LEN = 0x0,
+    UFS_RPMB_UNIT_DESC_PARAM_TYPE = 0x1,
+    UFS_RPMB_UNIT_DESC_PARAM_UNIT_INDEX = 0x2,
+    UFS_RPMB_UNIT_DESC_PARAM_LU_ENABLE = 0x3,
+    UFS_RPMB_UNIT_DESC_PARAM_BOOT_LUN_ID = 0x4,
+    UFS_RPMB_UNIT_DESC_PARAM_LU_WR_PROTECT = 0x5,
+    UFS_RPMB_UNIT_DESC_PARAM_LU_Q_DEPTH = 0x6,
+    UFS_RPMB_UNIT_DESC_PARAM_PSA_SENSITIVE = 0x7,
+    UFS_RPMB_UNIT_DESC_PARAM_MEM_TYPE = 0x8,
+    UFS_RPMB_UNIT_DESC_PARAM_REGION_EN = 0x9,
+    UFS_RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_SIZE = 0xA,
+    UFS_RPMB_UNIT_DESC_PARAM_LOGICAL_BLK_COUNT = 0xB,
+    UFS_RPMB_UNIT_DESC_PARAM_REGION0_SIZE = 0x13,
+    UFS_RPMB_UNIT_DESC_PARAM_REGION1_SIZE = 0x14,
+    UFS_RPMB_UNIT_DESC_PARAM_REGION2_SIZE = 0x15,
+    UFS_RPMB_UNIT_DESC_PARAM_REGION3_SIZE = 0x16,
+    UFS_RPMB_UNIT_DESC_PARAM_PROVISIONING_TYPE = 0x17,
+    UFS_RPMB_UNIT_DESC_PARAM_PHY_MEM_RSRC_CNT = 0x18,
+};
+
+/* Device descriptor parameters offsets in bytes*/
+enum device_desc_param {
+    UFS_DEVICE_DESC_PARAM_LEN = 0x0,
+    UFS_DEVICE_DESC_PARAM_TYPE = 0x1,
+    UFS_DEVICE_DESC_PARAM_DEVICE_TYPE = 0x2,
+    UFS_DEVICE_DESC_PARAM_DEVICE_CLASS = 0x3,
+    UFS_DEVICE_DESC_PARAM_DEVICE_SUB_CLASS = 0x4,
+    UFS_DEVICE_DESC_PARAM_PRTCL = 0x5,
+    UFS_DEVICE_DESC_PARAM_NUM_LU = 0x6,
+    UFS_DEVICE_DESC_PARAM_NUM_WLU = 0x7,
+    UFS_DEVICE_DESC_PARAM_BOOT_ENBL = 0x8,
+    UFS_DEVICE_DESC_PARAM_DESC_ACCSS_ENBL = 0x9,
+    UFS_DEVICE_DESC_PARAM_INIT_PWR_MODE = 0xA,
+    UFS_DEVICE_DESC_PARAM_HIGH_PR_LUN = 0xB,
+    UFS_DEVICE_DESC_PARAM_SEC_RMV_TYPE = 0xC,
+    UFS_DEVICE_DESC_PARAM_SEC_LU = 0xD,
+    UFS_DEVICE_DESC_PARAM_BKOP_TERM_LT = 0xE,
+    UFS_DEVICE_DESC_PARAM_ACTVE_ICC_LVL = 0xF,
+    UFS_DEVICE_DESC_PARAM_SPEC_VER = 0x10,
+    UFS_DEVICE_DESC_PARAM_MANF_DATE = 0x12,
+    UFS_DEVICE_DESC_PARAM_MANF_NAME = 0x14,
+    UFS_DEVICE_DESC_PARAM_PRDCT_NAME = 0x15,
+    UFS_DEVICE_DESC_PARAM_SN = 0x16,
+    UFS_DEVICE_DESC_PARAM_OEM_ID = 0x17,
+    UFS_DEVICE_DESC_PARAM_MANF_ID = 0x18,
+    UFS_DEVICE_DESC_PARAM_UD_OFFSET = 0x1A,
+    UFS_DEVICE_DESC_PARAM_UD_LEN = 0x1B,
+    UFS_DEVICE_DESC_PARAM_RTT_CAP = 0x1C,
+    UFS_DEVICE_DESC_PARAM_FRQ_RTC = 0x1D,
+    UFS_DEVICE_DESC_PARAM_UFS_FEAT = 0x1F,
+    UFS_DEVICE_DESC_PARAM_FFU_TMT = 0x20,
+    UFS_DEVICE_DESC_PARAM_Q_DPTH = 0x21,
+    UFS_DEVICE_DESC_PARAM_DEV_VER = 0x22,
+    UFS_DEVICE_DESC_PARAM_NUM_SEC_WPA = 0x24,
+    UFS_DEVICE_DESC_PARAM_PSA_MAX_DATA = 0x25,
+    UFS_DEVICE_DESC_PARAM_PSA_TMT = 0x29,
+    UFS_DEVICE_DESC_PARAM_PRDCT_REV = 0x2A,
+    UFS_DEVICE_DESC_PARAM_HPB_VER = 0x40,
+    UFS_DEVICE_DESC_PARAM_HPB_CONTROL = 0x42,
+    UFS_DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP = 0x4F,
+    UFS_DEVICE_DESC_PARAM_WB_PRESRV_USRSPC_EN = 0x53,
+    UFS_DEVICE_DESC_PARAM_WB_TYPE = 0x54,
+    UFS_DEVICE_DESC_PARAM_WB_SHARED_ALLOC_UNITS = 0x55,
+};
+
+/* Interconnect descriptor parameters offsets in bytes*/
+enum interconnect_desc_param {
+    UFS_INTERCONNECT_DESC_PARAM_LEN = 0x0,
+    UFS_INTERCONNECT_DESC_PARAM_TYPE = 0x1,
+    UFS_INTERCONNECT_DESC_PARAM_UNIPRO_VER = 0x2,
+    UFS_INTERCONNECT_DESC_PARAM_MPHY_VER = 0x4,
+};
+
+/* Geometry descriptor parameters offsets in bytes*/
+enum geometry_desc_param {
+    UFS_GEOMETRY_DESC_PARAM_LEN = 0x0,
+    UFS_GEOMETRY_DESC_PARAM_TYPE = 0x1,
+    UFS_GEOMETRY_DESC_PARAM_DEV_CAP = 0x4,
+    UFS_GEOMETRY_DESC_PARAM_MAX_NUM_LUN = 0xC,
+    UFS_GEOMETRY_DESC_PARAM_SEG_SIZE = 0xD,
+    UFS_GEOMETRY_DESC_PARAM_ALLOC_UNIT_SIZE = 0x11,
+    UFS_GEOMETRY_DESC_PARAM_MIN_BLK_SIZE = 0x12,
+    UFS_GEOMETRY_DESC_PARAM_OPT_RD_BLK_SIZE = 0x13,
+    UFS_GEOMETRY_DESC_PARAM_OPT_WR_BLK_SIZE = 0x14,
+    UFS_GEOMETRY_DESC_PARAM_MAX_IN_BUF_SIZE = 0x15,
+    UFS_GEOMETRY_DESC_PARAM_MAX_OUT_BUF_SIZE = 0x16,
+    UFS_GEOMETRY_DESC_PARAM_RPMB_RW_SIZE = 0x17,
+    UFS_GEOMETRY_DESC_PARAM_DYN_CAP_RSRC_PLC = 0x18,
+    UFS_GEOMETRY_DESC_PARAM_DATA_ORDER = 0x19,
+    UFS_GEOMETRY_DESC_PARAM_MAX_NUM_CTX = 0x1A,
+    UFS_GEOMETRY_DESC_PARAM_TAG_UNIT_SIZE = 0x1B,
+    UFS_GEOMETRY_DESC_PARAM_TAG_RSRC_SIZE = 0x1C,
+    UFS_GEOMETRY_DESC_PARAM_SEC_RM_TYPES = 0x1D,
+    UFS_GEOMETRY_DESC_PARAM_MEM_TYPES = 0x1E,
+    UFS_GEOMETRY_DESC_PARAM_SCM_MAX_NUM_UNITS = 0x20,
+    UFS_GEOMETRY_DESC_PARAM_SCM_CAP_ADJ_FCTR = 0x24,
+    UFS_GEOMETRY_DESC_PARAM_NPM_MAX_NUM_UNITS = 0x26,
+    UFS_GEOMETRY_DESC_PARAM_NPM_CAP_ADJ_FCTR = 0x2A,
+    UFS_GEOMETRY_DESC_PARAM_ENM1_MAX_NUM_UNITS = 0x2C,
+    UFS_GEOMETRY_DESC_PARAM_ENM1_CAP_ADJ_FCTR = 0x30,
+    UFS_GEOMETRY_DESC_PARAM_ENM2_MAX_NUM_UNITS = 0x32,
+    UFS_GEOMETRY_DESC_PARAM_ENM2_CAP_ADJ_FCTR = 0x36,
+    UFS_GEOMETRY_DESC_PARAM_ENM3_MAX_NUM_UNITS = 0x38,
+    UFS_GEOMETRY_DESC_PARAM_ENM3_CAP_ADJ_FCTR = 0x3C,
+    UFS_GEOMETRY_DESC_PARAM_ENM4_MAX_NUM_UNITS = 0x3E,
+    UFS_GEOMETRY_DESC_PARAM_ENM4_CAP_ADJ_FCTR = 0x42,
+    UFS_GEOMETRY_DESC_PARAM_OPT_LOG_BLK_SIZE = 0x44,
+    UFS_GEOMETRY_DESC_PARAM_HPB_REGION_SIZE = 0x48,
+    UFS_GEOMETRY_DESC_PARAM_HPB_NUMBER_LU = 0x49,
+    UFS_GEOMETRY_DESC_PARAM_HPB_SUBREGION_SIZE = 0x4A,
+    UFS_GEOMETRY_DESC_PARAM_HPB_MAX_ACTIVE_REGS = 0x4B,
+    UFS_GEOMETRY_DESC_PARAM_WB_MAX_ALLOC_UNITS = 0x4F,
+    UFS_GEOMETRY_DESC_PARAM_WB_MAX_WB_LUNS = 0x53,
+    UFS_GEOMETRY_DESC_PARAM_WB_BUFF_CAP_ADJ = 0x54,
+    UFS_GEOMETRY_DESC_PARAM_WB_SUP_RED_TYPE = 0x55,
+    UFS_GEOMETRY_DESC_PARAM_WB_SUP_WB_TYPE = 0x56,
+};
+
+/* Health descriptor parameters offsets in bytes*/
+enum health_desc_param {
+    UFS_HEALTH_DESC_PARAM_LEN = 0x0,
+    UFS_HEALTH_DESC_PARAM_TYPE = 0x1,
+    UFS_HEALTH_DESC_PARAM_EOL_INFO = 0x2,
+    UFS_HEALTH_DESC_PARAM_LIFE_TIME_EST_A = 0x3,
+    UFS_HEALTH_DESC_PARAM_LIFE_TIME_EST_B = 0x4,
+};
+
+/* WriteBooster buffer mode */
+enum {
+    UFS_WB_BUF_MODE_LU_DEDICATED = 0x0,
+    UFS_WB_BUF_MODE_SHARED = 0x1,
+};
+
+/*
+ * Logical Unit Write Protect
+ * 00h: LU not write protected
+ * 01h: LU write protected when fPowerOnWPEn =1
+ * 02h: LU permanently write protected when fPermanentWPEn =1
+ */
+enum ufs_lu_wp_type {
+    UFS_LU_NO_WP = 0x00,
+    UFS_LU_POWER_ON_WP = 0x01,
+    UFS_LU_PERM_WP = 0x02,
+};
+
+/* UTP QUERY Transaction Specific Fields OpCode */
+enum query_opcode {
+    UFS_UPIU_QUERY_OPCODE_NOP = 0x0,
+    UFS_UPIU_QUERY_OPCODE_READ_DESC = 0x1,
+    UFS_UPIU_QUERY_OPCODE_WRITE_DESC = 0x2,
+    UFS_UPIU_QUERY_OPCODE_READ_ATTR = 0x3,
+    UFS_UPIU_QUERY_OPCODE_WRITE_ATTR = 0x4,
+    UFS_UPIU_QUERY_OPCODE_READ_FLAG = 0x5,
+    UFS_UPIU_QUERY_OPCODE_SET_FLAG = 0x6,
+    UFS_UPIU_QUERY_OPCODE_CLEAR_FLAG = 0x7,
+    UFS_UPIU_QUERY_OPCODE_TOGGLE_FLAG = 0x8,
+};
+
+/* Query response result code */
+typedef enum QueryRespCode {
+    UFS_QUERY_RESULT_SUCCESS = 0x00,
+    UFS_QUERY_RESULT_NOT_READABLE = 0xF6,
+    UFS_QUERY_RESULT_NOT_WRITEABLE = 0xF7,
+    UFS_QUERY_RESULT_ALREADY_WRITTEN = 0xF8,
+    UFS_QUERY_RESULT_INVALID_LENGTH = 0xF9,
+    UFS_QUERY_RESULT_INVALID_VALUE = 0xFA,
+    UFS_QUERY_RESULT_INVALID_SELECTOR = 0xFB,
+    UFS_QUERY_RESULT_INVALID_INDEX = 0xFC,
+    UFS_QUERY_RESULT_INVALID_IDN = 0xFD,
+    UFS_QUERY_RESULT_INVALID_OPCODE = 0xFE,
+    UFS_QUERY_RESULT_GENERAL_FAILURE = 0xFF,
+} QueryRespCode;
+
+/* UTP Transfer Request Command Type (CT) */
+enum {
+    UFS_UPIU_COMMAND_SET_TYPE_SCSI = 0x0,
+    UFS_UPIU_COMMAND_SET_TYPE_UFS = 0x1,
+    UFS_UPIU_COMMAND_SET_TYPE_QUERY = 0x2,
+};
+
+/* Task management service response */
+enum {
+    UFS_UPIU_TASK_MANAGEMENT_FUNC_COMPL = 0x00,
+    UFS_UPIU_TASK_MANAGEMENT_FUNC_NOT_SUPPORTED = 0x04,
+    UFS_UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED = 0x08,
+    UFS_UPIU_TASK_MANAGEMENT_FUNC_FAILED = 0x05,
+    UFS_UPIU_INCORRECT_LOGICAL_UNIT_NO = 0x09,
+};
+
+/* UFS device power modes */
+enum ufs_dev_pwr_mode {
+    UFS_ACTIVE_PWR_MODE = 1,
+    UFS_SLEEP_PWR_MODE = 2,
+    UFS_POWERDOWN_PWR_MODE = 3,
+    UFS_DEEPSLEEP_PWR_MODE = 4,
+};
+
+/*
+ * struct UtpCmdRsp - Response UPIU structure
+ * @residual_transfer_count: Residual transfer count DW-3
+ * @reserved: Reserved double words DW-4 to DW-7
+ * @sense_data_len: Sense data length DW-8 U16
+ * @sense_data: Sense data field DW-8 to DW-12
+ */
+typedef struct QEMU_PACKED UtpCmdRsp {
+    uint32_t residual_transfer_count;
+    uint32_t reserved[4];
+    uint16_t sense_data_len;
+    uint8_t sense_data[UFS_SENSE_SIZE];
+} UtpCmdRsp;
+
+/*
+ * struct UtpUpiuRsp - general upiu response structure
+ * @header: UPIU header structure DW-0 to DW-2
+ * @sr: fields structure for scsi command DW-3 to DW-12
+ * @qr: fields structure for query request DW-3 to DW-7
+ */
+typedef struct QEMU_PACKED UtpUpiuRsp {
+    UtpUpiuHeader header;
+    union {
+        UtpCmdRsp sr;
+        UtpUpiuQuery qr;
+    };
+} UtpUpiuRsp;
+
+static inline void _ufs_check_size(void)
+{
+    QEMU_BUILD_BUG_ON(sizeof(UfsReg) != 0x104);
+    QEMU_BUILD_BUG_ON(sizeof(DeviceDescriptor) != 89);
+    QEMU_BUILD_BUG_ON(sizeof(GeometryDescriptor) != 87);
+    QEMU_BUILD_BUG_ON(sizeof(UnitDescriptor) != 45);
+    QEMU_BUILD_BUG_ON(sizeof(RpmbUnitDescriptor) != 35);
+    QEMU_BUILD_BUG_ON(sizeof(PowerParametersDescriptor) != 98);
+    QEMU_BUILD_BUG_ON(sizeof(InterconnectDescriptor) != 6);
+    QEMU_BUILD_BUG_ON(sizeof(StringDescriptor) != 254);
+    QEMU_BUILD_BUG_ON(sizeof(DeviceHealthDescriptor) != 45);
+    QEMU_BUILD_BUG_ON(sizeof(Flags) != 0x13);
+    QEMU_BUILD_BUG_ON(sizeof(UtpUpiuHeader) != 12);
+    QEMU_BUILD_BUG_ON(sizeof(UtpUpiuQuery) != 276);
+    QEMU_BUILD_BUG_ON(sizeof(UtpUpiuCmd) != 20);
+    QEMU_BUILD_BUG_ON(sizeof(UtpUpiuReq) != 288);
+    QEMU_BUILD_BUG_ON(sizeof(UfshcdSgEntry) != 16);
+    QEMU_BUILD_BUG_ON(sizeof(RequestDescHeader) != 16);
+    QEMU_BUILD_BUG_ON(sizeof(UtpTransferReqDesc) != 32);
+    QEMU_BUILD_BUG_ON(sizeof(UtpTaskReqDesc) != 80);
+    QEMU_BUILD_BUG_ON(sizeof(UtpCmdRsp) != 40);
+    QEMU_BUILD_BUG_ON(sizeof(UtpUpiuRsp) != 288);
+}
+#endif
index c646f267a45c7139854013e612e0a43aa82b52fc..63d1583887aa5ce2f14f4f2d2f514b0f87aae555 100644 (file)
@@ -13,8 +13,6 @@
 #ifndef BLOCK_WRITE_THRESHOLD_H
 #define BLOCK_WRITE_THRESHOLD_H
 
-#include "block/block_int.h"
-
 /*
  * bdrv_write_threshold_set:
  *
@@ -36,27 +34,12 @@ void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes);
 uint64_t bdrv_write_threshold_get(const BlockDriverState *bs);
 
 /*
- * bdrv_write_threshold_is_set
- *
- * Tell if a write threshold is set for a given BDS.
- */
-bool bdrv_write_threshold_is_set(const BlockDriverState *bs);
-
-/*
- * bdrv_write_threshold_exceeded
- *
- * Return the extent of a write request that exceeded the threshold,
- * or zero if the request is below the threshold.
- * Return zero also if the threshold was not set.
- *
- * NOTE: here we assume the following holds for each request this code
- * deals with:
- *
- * assert((req->offset + req->bytes) <= UINT64_MAX)
+ * bdrv_write_threshold_check_write
  *
- * Please not there is *not* an actual C assert().
+ * Check whether the specified request exceeds the write threshold.
+ * If so, send a corresponding event and disable write threshold checking.
  */
-uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
-                                       const BdrvTrackedRequest *req);
+void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset,
+                                      int64_t bytes);
 
 #endif
index a5538433649488772f8dd7e8995d2f383506515b..0ff6f875116d921c9333de45fa5c9fb5fbfdd67f 100644 (file)
@@ -78,7 +78,7 @@ bool qemu_chr_fe_backend_open(CharBackend *be);
  *             is not supported and will not be attempted
  * @opaque: an opaque pointer for the callbacks
  * @context: a main loop context or NULL for the default
- * @set_open: whether to call qemu_chr_fe_set_open() implicitely when
+ * @set_open: whether to call qemu_chr_fe_set_open() implicitly when
  * any of the handler is non-NULL
  * @sync_state: whether to issue event callback with updated state
  *
@@ -138,7 +138,7 @@ void qemu_chr_fe_disconnect(CharBackend *be);
 /**
  * qemu_chr_fe_wait_connected:
  *
- * Wait for characted backend to be connected, return < 0 on error or
+ * Wait for character backend to be connected, return < 0 on error or
  * if no associated Chardev.
  */
 int qemu_chr_fe_wait_connected(CharBackend *be, Error **errp);
@@ -172,7 +172,24 @@ void qemu_chr_fe_set_open(CharBackend *be, int fe_open);
  * Chardev.
  */
 void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...)
-    GCC_FMT_ATTR(2, 3);
+    G_GNUC_PRINTF(2, 3);
+
+
+/**
+ * FEWatchFunc: a #GSourceFunc called when any conditions requested by
+ *              qemu_chr_fe_add_watch() is satisfied.
+ * @do_not_use: depending on the underlying chardev, a GIOChannel or a
+ *              QIOChannel. DO NOT USE!
+ * @cond: bitwise combination of conditions watched and satisfied
+ *              before calling this callback.
+ * @data: user data passed at creation to qemu_chr_fe_add_watch(). Can
+ *              be NULL.
+ *
+ * Returns: G_SOURCE_REMOVE if the GSource should be removed from the
+ *              main loop, or G_SOURCE_CONTINUE to leave the GSource in
+ *              the main loop.
+ */
+typedef gboolean (*FEWatchFunc)(void *do_not_use, GIOCondition condition, void *data);
 
 /**
  * qemu_chr_fe_add_watch:
@@ -188,10 +205,13 @@ void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...)
  * Note that you are responsible to update the front-end sources if
  * you are switching the main context with qemu_chr_fe_set_handlers().
  *
+ * Warning: DO NOT use the first callback argument (it may be either
+ * a GIOChannel or a QIOChannel, depending on the underlying chardev)
+ *
  * Returns: the source tag
  */
 guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond,
-                            GIOFunc func, void *user_data);
+                            FEWatchFunc func, void *user_data);
 
 /**
  * qemu_chr_fe_write:
diff --git a/include/chardev/char-socket.h b/include/chardev/char-socket.h
new file mode 100644 (file)
index 0000000..0708ca6
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef CHAR_SOCKET_H
+#define CHAR_SOCKET_H
+
+#include "io/channel-socket.h"
+#include "io/channel-tls.h"
+#include "io/net-listener.h"
+#include "chardev/char.h"
+#include "qom/object.h"
+
+#define TCP_MAX_FDS 16
+
+typedef struct {
+    char buf[21];
+    size_t buflen;
+} TCPChardevTelnetInit;
+
+typedef enum {
+    TCP_CHARDEV_STATE_DISCONNECTED,
+    TCP_CHARDEV_STATE_CONNECTING,
+    TCP_CHARDEV_STATE_CONNECTED,
+} TCPChardevState;
+
+typedef ChardevClass SocketChardevClass;
+
+struct SocketChardev {
+    Chardev parent;
+    QIOChannel *ioc; /* Client I/O channel */
+    QIOChannelSocket *sioc; /* Client master channel */
+    QIONetListener *listener;
+    GSource *hup_source;
+    QCryptoTLSCreds *tls_creds;
+    char *tls_authz;
+    TCPChardevState state;
+    int max_size;
+    int do_telnetopt;
+    int do_nodelay;
+    int *read_msgfds;
+    size_t read_msgfds_num;
+    int *write_msgfds;
+    size_t write_msgfds_num;
+    bool registered_yank;
+
+    SocketAddress *addr;
+    bool is_listen;
+    bool is_telnet;
+    bool is_tn3270;
+    GSource *telnet_source;
+    TCPChardevTelnetInit *telnet_init;
+
+    bool is_websock;
+
+    GSource *reconnect_timer;
+    int64_t reconnect_time;
+    bool connect_err_reported;
+
+    QIOTask *connect_task;
+};
+typedef struct SocketChardev SocketChardev;
+
+DECLARE_INSTANCE_CHECKER(SocketChardev, SOCKET_CHARDEV,
+                         TYPE_CHARDEV_SOCKET)
+
+#endif /* CHAR_SOCKET_H */
index db42f0a8c6310ac22c7c059f33a4a4470dde4111..01df55f9e8c8da5dfd40cafa195cfa606e172fb7 100644 (file)
@@ -65,6 +65,8 @@ struct Chardev {
     char *filename;
     int logfd;
     int be_open;
+    /* used to coordinate the chardev-change special-case: */
+    bool handover_yank_instance;
     GSource *gsource;
     GMainContext *gcontext;
     DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST);
@@ -184,7 +186,7 @@ int qemu_chr_be_can_write(Chardev *s);
  * the caller should call @qemu_chr_be_can_write to determine how much data
  * the front end can currently accept.
  */
-void qemu_chr_be_write(Chardev *s, uint8_t *buf, int len);
+void qemu_chr_be_write(Chardev *s, const uint8_t *buf, int len);
 
 /**
  * qemu_chr_be_write_impl:
@@ -193,7 +195,7 @@ void qemu_chr_be_write(Chardev *s, uint8_t *buf, int len);
  *
  * Implementation of back end writing. Used by replay module.
  */
-void qemu_chr_be_write_impl(Chardev *s, uint8_t *buf, int len);
+void qemu_chr_be_write_impl(Chardev *s, const uint8_t *buf, int len);
 
 /**
  * qemu_chr_be_update_read_handlers:
@@ -251,27 +253,59 @@ struct ChardevClass {
     ObjectClass parent_class;
 
     bool internal; /* TODO: eventually use TYPE_USER_CREATABLE */
+    bool supports_yank;
+
+    /* parse command line options and populate QAPI @backend */
     void (*parse)(QemuOpts *opts, ChardevBackend *backend, Error **errp);
 
+    /* called after construction, open/starts the backend */
     void (*open)(Chardev *chr, ChardevBackend *backend,
                  bool *be_opened, Error **errp);
 
+    /* write buf to the backend */
     int (*chr_write)(Chardev *s, const uint8_t *buf, int len);
+
+    /*
+     * Read from the backend (blocking). A typical front-end will instead rely
+     * on chr_can_read/chr_read being called when polling/looping.
+     */
     int (*chr_sync_read)(Chardev *s, const uint8_t *buf, int len);
+
+    /* create a watch on the backend */
     GSource *(*chr_add_watch)(Chardev *s, GIOCondition cond);
+
+    /* update the backend internal sources */
     void (*chr_update_read_handler)(Chardev *s);
+
+    /* send an ioctl to the backend */
     int (*chr_ioctl)(Chardev *s, int cmd, void *arg);
+
+    /* get ancillary-received fds during last read */
     int (*get_msgfds)(Chardev *s, int* fds, int num);
+
+    /* set ancillary fds to be sent with next write */
     int (*set_msgfds)(Chardev *s, int *fds, int num);
+
+    /* accept the given fd */
     int (*chr_add_client)(Chardev *chr, int fd);
+
+    /* wait for a connection */
     int (*chr_wait_connected)(Chardev *chr, Error **errp);
+
+    /* disconnect a connection */
     void (*chr_disconnect)(Chardev *chr);
+
+    /* called by frontend when it can read */
     void (*chr_accept_input)(Chardev *chr);
+
+    /* set terminal echo */
     void (*chr_set_echo)(Chardev *chr, bool echo);
+
+    /* notify the backend of frontend open state */
     void (*chr_set_fe_open)(Chardev *chr, int fe_open);
+
+    /* handle various events */
     void (*chr_be_event)(Chardev *s, QEMUChrEvent event);
-    /* Return 0 if succeeded, 1 if failed */
-    int (*chr_machine_done)(Chardev *chr);
 };
 
 Chardev *qemu_chardev_new(const char *id, const char *typename,
@@ -283,7 +317,7 @@ extern int term_escape_char;
 GSource *qemu_chr_timeout_add_ms(Chardev *chr, guint ms,
                                  GSourceFunc func, void *private);
 
-/* console.c */
-void qemu_chr_parse_vc(QemuOpts *opts, ChardevBackend *backend, Error **errp);
+void suspend_mux_open(void);
+void resume_mux_open(void);
 
 #endif
diff --git a/include/crypto/aes-round.h b/include/crypto/aes-round.h
new file mode 100644 (file)
index 0000000..854fb09
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ * AES round fragments, generic version
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef CRYPTO_AES_ROUND_H
+#define CRYPTO_AES_ROUND_H
+
+/* Hosts with acceleration will usually need a 16-byte vector type. */
+typedef uint8_t AESStateVec __attribute__((vector_size(16)));
+
+typedef union {
+    uint8_t b[16];
+    uint32_t w[4];
+    uint64_t d[2];
+    AESStateVec v;
+} AESState;
+
+#include "host/crypto/aes-round.h"
+
+/*
+ * Perform MixColumns.
+ */
+
+void aesenc_MC_gen(AESState *ret, const AESState *st);
+void aesenc_MC_genrev(AESState *ret, const AESState *st);
+
+static inline void aesenc_MC(AESState *r, const AESState *st, bool be)
+{
+    if (HAVE_AES_ACCEL) {
+        aesenc_MC_accel(r, st, be);
+    } else if (HOST_BIG_ENDIAN == be) {
+        aesenc_MC_gen(r, st);
+    } else {
+        aesenc_MC_genrev(r, st);
+    }
+}
+
+/*
+ * Perform SubBytes + ShiftRows + AddRoundKey.
+ */
+
+void aesenc_SB_SR_AK_gen(AESState *ret, const AESState *st,
+                         const AESState *rk);
+void aesenc_SB_SR_AK_genrev(AESState *ret, const AESState *st,
+                            const AESState *rk);
+
+static inline void aesenc_SB_SR_AK(AESState *r, const AESState *st,
+                                   const AESState *rk, bool be)
+{
+    if (HAVE_AES_ACCEL) {
+        aesenc_SB_SR_AK_accel(r, st, rk, be);
+    } else if (HOST_BIG_ENDIAN == be) {
+        aesenc_SB_SR_AK_gen(r, st, rk);
+    } else {
+        aesenc_SB_SR_AK_genrev(r, st, rk);
+    }
+}
+
+/*
+ * Perform SubBytes + ShiftRows + MixColumns + AddRoundKey.
+ */
+
+void aesenc_SB_SR_MC_AK_gen(AESState *ret, const AESState *st,
+                            const AESState *rk);
+void aesenc_SB_SR_MC_AK_genrev(AESState *ret, const AESState *st,
+                               const AESState *rk);
+
+static inline void aesenc_SB_SR_MC_AK(AESState *r, const AESState *st,
+                                      const AESState *rk, bool be)
+{
+    if (HAVE_AES_ACCEL) {
+        aesenc_SB_SR_MC_AK_accel(r, st, rk, be);
+    } else if (HOST_BIG_ENDIAN == be) {
+        aesenc_SB_SR_MC_AK_gen(r, st, rk);
+    } else {
+        aesenc_SB_SR_MC_AK_genrev(r, st, rk);
+    }
+}
+
+/*
+ * Perform InvMixColumns.
+ */
+
+void aesdec_IMC_gen(AESState *ret, const AESState *st);
+void aesdec_IMC_genrev(AESState *ret, const AESState *st);
+
+static inline void aesdec_IMC(AESState *r, const AESState *st, bool be)
+{
+    if (HAVE_AES_ACCEL) {
+        aesdec_IMC_accel(r, st, be);
+    } else if (HOST_BIG_ENDIAN == be) {
+        aesdec_IMC_gen(r, st);
+    } else {
+        aesdec_IMC_genrev(r, st);
+    }
+}
+
+/*
+ * Perform InvSubBytes + InvShiftRows + AddRoundKey.
+ */
+
+void aesdec_ISB_ISR_AK_gen(AESState *ret, const AESState *st,
+                           const AESState *rk);
+void aesdec_ISB_ISR_AK_genrev(AESState *ret, const AESState *st,
+                              const AESState *rk);
+
+static inline void aesdec_ISB_ISR_AK(AESState *r, const AESState *st,
+                                     const AESState *rk, bool be)
+{
+    if (HAVE_AES_ACCEL) {
+        aesdec_ISB_ISR_AK_accel(r, st, rk, be);
+    } else if (HOST_BIG_ENDIAN == be) {
+        aesdec_ISB_ISR_AK_gen(r, st, rk);
+    } else {
+        aesdec_ISB_ISR_AK_genrev(r, st, rk);
+    }
+}
+
+/*
+ * Perform InvSubBytes + InvShiftRows + AddRoundKey + InvMixColumns.
+ */
+
+void aesdec_ISB_ISR_AK_IMC_gen(AESState *ret, const AESState *st,
+                               const AESState *rk);
+void aesdec_ISB_ISR_AK_IMC_genrev(AESState *ret, const AESState *st,
+                                  const AESState *rk);
+
+static inline void aesdec_ISB_ISR_AK_IMC(AESState *r, const AESState *st,
+                                         const AESState *rk, bool be)
+{
+    if (HAVE_AES_ACCEL) {
+        aesdec_ISB_ISR_AK_IMC_accel(r, st, rk, be);
+    } else if (HOST_BIG_ENDIAN == be) {
+        aesdec_ISB_ISR_AK_IMC_gen(r, st, rk);
+    } else {
+        aesdec_ISB_ISR_AK_IMC_genrev(r, st, rk);
+    }
+}
+
+/*
+ * Perform InvSubBytes + InvShiftRows + InvMixColumns + AddRoundKey.
+ */
+
+void aesdec_ISB_ISR_IMC_AK_gen(AESState *ret, const AESState *st,
+                               const AESState *rk);
+void aesdec_ISB_ISR_IMC_AK_genrev(AESState *ret, const AESState *st,
+                                  const AESState *rk);
+
+static inline void aesdec_ISB_ISR_IMC_AK(AESState *r, const AESState *st,
+                                         const AESState *rk, bool be)
+{
+    if (HAVE_AES_ACCEL) {
+        aesdec_ISB_ISR_IMC_AK_accel(r, st, rk, be);
+    } else if (HOST_BIG_ENDIAN == be) {
+        aesdec_ISB_ISR_IMC_AK_gen(r, st, rk);
+    } else {
+        aesdec_ISB_ISR_IMC_AK_genrev(r, st, rk);
+    }
+}
+
+#endif /* CRYPTO_AES_ROUND_H */
index ba297d6a73e7d9075da383a9902d5f8ec43de34b..381f24c9022d2aa8a0ea0a350bc7b6cc6dc7e4e4 100644 (file)
@@ -18,46 +18,23 @@ typedef struct aes_key_st AES_KEY;
 #define AES_decrypt QEMU_AES_decrypt
 
 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
-       AES_KEY *key);
+                        AES_KEY *key);
 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
-       AES_KEY *key);
+                        AES_KEY *key);
 
 void AES_encrypt(const unsigned char *in, unsigned char *out,
-       const AES_KEY *key);
+                 const AES_KEY *key);
 void AES_decrypt(const unsigned char *in, unsigned char *out,
-       const AES_KEY *key);
+                 const AES_KEY *key);
 
 extern const uint8_t AES_sbox[256];
 extern const uint8_t AES_isbox[256];
 
-/* AES ShiftRows and InvShiftRows */
-extern const uint8_t AES_shifts[16];
-extern const uint8_t AES_ishifts[16];
-
-/* AES InvMixColumns */
-/* AES_imc[x][0] = [x].[0e, 09, 0d, 0b]; */
-/* AES_imc[x][1] = [x].[0b, 0e, 09, 0d]; */
-/* AES_imc[x][2] = [x].[0d, 0b, 0e, 09]; */
-/* AES_imc[x][3] = [x].[09, 0d, 0b, 0e]; */
-extern const uint32_t AES_imc[256][4];
-
 /*
 AES_Te0[x] = S [x].[02, 01, 01, 03];
-AES_Te1[x] = S [x].[03, 02, 01, 01];
-AES_Te2[x] = S [x].[01, 03, 02, 01];
-AES_Te3[x] = S [x].[01, 01, 03, 02];
-AES_Te4[x] = S [x].[01, 01, 01, 01];
-
 AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
-AES_Td1[x] = Si[x].[0b, 0e, 09, 0d];
-AES_Td2[x] = Si[x].[0d, 0b, 0e, 09];
-AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
-AES_Td4[x] = Si[x].[01, 01, 01, 01];
 */
 
-extern const uint32_t AES_Te0[256], AES_Te1[256], AES_Te2[256],
-                      AES_Te3[256], AES_Te4[256];
-extern const uint32_t AES_Td0[256], AES_Td1[256], AES_Td2[256],
-                      AES_Td3[256], AES_Td4[256];
+extern const uint32_t AES_Te0[256], AES_Td0[256];
 
 #endif
diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h
new file mode 100644 (file)
index 0000000..8756105
--- /dev/null
@@ -0,0 +1,179 @@
+/*
+ * QEMU Crypto asymmetric algorithms
+ *
+ * Copyright (c) 2022 Bytedance
+ * Author: zhenwei pi <pizhenwei@bytedance.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QCRYPTO_AKCIPHER_H
+#define QCRYPTO_AKCIPHER_H
+
+#include "qapi/qapi-types-crypto.h"
+
+typedef struct QCryptoAkCipher QCryptoAkCipher;
+
+/**
+ * qcrypto_akcipher_supports:
+ * @opts: the asymmetric key algorithm and related options
+ *
+ * Determine if asymmetric key cipher described with @opts is
+ * supported by the current configured build
+ *
+ * Returns: true if it is supported, false otherwise.
+ */
+bool qcrypto_akcipher_supports(QCryptoAkCipherOptions *opts);
+
+/**
+ * qcrypto_akcipher_new:
+ * @opts: specify the algorithm and the related arguments
+ * @type: private or public key type
+ * @key: buffer to store the key
+ * @key_len: the length of key buffer
+ * @errp: error pointer
+ *
+ * Create akcipher context
+ *
+ * Returns: On success, a new QCryptoAkCipher initialized with @opt
+ * is created and returned, otherwise NULL is returned.
+ */
+
+QCryptoAkCipher *qcrypto_akcipher_new(const QCryptoAkCipherOptions *opts,
+                                      QCryptoAkCipherKeyType type,
+                                      const uint8_t *key, size_t key_len,
+                                      Error **errp);
+
+/**
+ * qcrypto_akcipher_encrypt:
+ * @akcipher: akcipher context
+ * @in: plaintext pending to be encrypted
+ * @in_len: length of plaintext, less or equal to the size reported
+ *          by a call to qcrypto_akcipher_max_plaintext_len()
+ * @out: buffer to store the ciphertext
+ * @out_len: length of ciphertext, less or equal to the size reported
+ *           by a call to qcrypto_akcipher_max_ciphertext_len()
+ * @errp: error pointer
+ *
+ * Encrypt @in and write ciphertext into @out
+ *
+ * Returns: length of ciphertext if encrypt succeed,
+ *          otherwise -1 is returned
+ */
+int qcrypto_akcipher_encrypt(QCryptoAkCipher *akcipher,
+                             const void *in, size_t in_len,
+                             void *out, size_t out_len, Error **errp);
+
+/**
+ * qcrypto_akcipher_decrypt:
+ * @akcipher: akcipher context
+ * @in: ciphertext to be decrypted
+ * @in_len: the length of ciphertext, less or equal to the size reported
+ *          by a call to qcrypto_akcipher_max_ciphertext_len()
+ * @out: buffer to store the plaintext
+ * @out_len: length of the plaintext buffer, less or equal to the size
+ *           reported by a call to qcrypto_akcipher_max_plaintext_len()
+ * @errp: error pointer
+ *
+ * Decrypt @in and write plaintext into @out
+ *
+ * Returns: length of plaintext if decrypt succeed,
+ *          otherwise -1 is returned
+ */
+int qcrypto_akcipher_decrypt(QCryptoAkCipher *akcipher,
+                             const void *in, size_t in_len,
+                             void *out, size_t out_len, Error **errp);
+
+/**
+ * qcrypto_akcipher_sign:
+ * @akcipher: akcipher context
+ * @in: data to be signed
+ * @in_len: the length of data, less or equal to the size reported
+ *          by a call to qcrypto_akcipher_max_dgst_len()
+ * @out: buffer to store the signature
+ * @out_len: length of the signature buffer, less or equal to the size
+ *           by a call to qcrypto_akcipher_max_signature_len()
+ * @errp: error pointer
+ *
+ * Generate signature for @in, write into @out
+ *
+ * Returns: length of signature if succeed,
+ *          otherwise -1 is returned
+ */
+int qcrypto_akcipher_sign(QCryptoAkCipher *akcipher,
+                          const void *in, size_t in_len,
+                          void *out, size_t out_len, Error **errp);
+
+/**
+ * qcrypto_akcipher_verify:
+ * @akcipher: akcipher context
+ * @in: pointer to the signature
+ * @in_len: length of signature, ess or equal to the size reported
+ *          by a call to qcrypto_akcipher_max_signature_len()
+ * @in2: pointer to original data
+ * @in2_len: the length of original data, less or equal to the size
+ *           by a call to qcrypto_akcipher_max_dgst_len()
+ * @errp: error pointer
+ *
+ * Verify @in and @in2 match or not
+ *
+ * Returns: 0 for succeed,
+ *          otherwise -1 is returned
+ */
+int qcrypto_akcipher_verify(QCryptoAkCipher *akcipher,
+                            const void *in, size_t in_len,
+                            const void *in2, size_t in2_len, Error **errp);
+
+int qcrypto_akcipher_max_plaintext_len(QCryptoAkCipher *akcipher);
+
+int qcrypto_akcipher_max_ciphertext_len(QCryptoAkCipher *akcipher);
+
+int qcrypto_akcipher_max_signature_len(QCryptoAkCipher *akcipher);
+
+int qcrypto_akcipher_max_dgst_len(QCryptoAkCipher *akcipher);
+
+/**
+ * qcrypto_akcipher_free:
+ * @akcipher: akcipher context
+ *
+ * Free the akcipher context
+ *
+ */
+void qcrypto_akcipher_free(QCryptoAkCipher *akcipher);
+
+/**
+ * qcrypto_akcipher_export_p8info:
+ * @opts: the options of the akcipher to be exported.
+ * @key: the original key of the akcipher to be exported.
+ * @keylen: length of the 'key'
+ * @dst: output parameter, if export succeed, *dst is set to the
+ * PKCS#8 encoded private key, caller MUST free this key with
+ * g_free after use.
+ * @dst_len: output parameter, indicates the length of PKCS#8 encoded
+ * key.
+ *
+ * Export the akcipher into DER encoded pkcs#8 private key info, expects
+ * |key| stores a valid asymmetric PRIVATE key.
+ *
+ * Returns: 0 for succeed, otherwise -1 is returned.
+ */
+int qcrypto_akcipher_export_p8info(const QCryptoAkCipherOptions *opts,
+                                   uint8_t *key, size_t keylen,
+                                   uint8_t **dst, size_t *dst_len,
+                                   Error **errp);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoAkCipher, qcrypto_akcipher_free)
+
+#endif /* QCRYPTO_AKCIPHER_H */
index 7a65e8e402a4e72ad1e6874f1e4b8d3ad0408b2e..4f63a37872773b5fe39d2023f101de09e0f20519 100644 (file)
@@ -29,24 +29,24 @@ typedef struct QCryptoBlock QCryptoBlock;
 /* See also QCryptoBlockFormat, QCryptoBlockCreateOptions
  * and QCryptoBlockOpenOptions in qapi/crypto.json */
 
-typedef ssize_t (*QCryptoBlockReadFunc)(QCryptoBlock *block,
-                                        size_t offset,
-                                        uint8_t *buf,
-                                        size_t buflen,
-                                        void *opaque,
-                                        Error **errp);
+typedef int (*QCryptoBlockReadFunc)(QCryptoBlock *block,
+                                    size_t offset,
+                                    uint8_t *buf,
+                                    size_t buflen,
+                                    void *opaque,
+                                    Error **errp);
 
-typedef ssize_t (*QCryptoBlockInitFunc)(QCryptoBlock *block,
-                                        size_t headerlen,
-                                        void *opaque,
-                                        Error **errp);
+typedef int (*QCryptoBlockInitFunc)(QCryptoBlock *block,
+                                    size_t headerlen,
+                                    void *opaque,
+                                    Error **errp);
 
-typedef ssize_t (*QCryptoBlockWriteFunc)(QCryptoBlock *block,
-                                         size_t offset,
-                                         const uint8_t *buf,
-                                         size_t buflen,
-                                         void *opaque,
-                                         Error **errp);
+typedef int (*QCryptoBlockWriteFunc)(QCryptoBlock *block,
+                                     size_t offset,
+                                     const uint8_t *buf,
+                                     size_t buflen,
+                                     void *opaque,
+                                     Error **errp);
 
 /**
  * qcrypto_block_has_format:
diff --git a/include/crypto/clmul.h b/include/crypto/clmul.h
new file mode 100644 (file)
index 0000000..446931f
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Carry-less multiply operations.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ */
+
+#ifndef CRYPTO_CLMUL_H
+#define CRYPTO_CLMUL_H
+
+#include "qemu/int128.h"
+#include "host/crypto/clmul.h"
+
+/**
+ * clmul_8x8_low:
+ *
+ * Perform eight 8x8->8 carry-less multiplies.
+ */
+uint64_t clmul_8x8_low(uint64_t, uint64_t);
+
+/**
+ * clmul_8x4_even:
+ *
+ * Perform four 8x8->16 carry-less multiplies.
+ * The odd bytes of the inputs are ignored.
+ */
+uint64_t clmul_8x4_even(uint64_t, uint64_t);
+
+/**
+ * clmul_8x4_odd:
+ *
+ * Perform four 8x8->16 carry-less multiplies.
+ * The even bytes of the inputs are ignored.
+ */
+uint64_t clmul_8x4_odd(uint64_t, uint64_t);
+
+/**
+ * clmul_8x4_packed:
+ *
+ * Perform four 8x8->16 carry-less multiplies.
+ */
+uint64_t clmul_8x4_packed(uint32_t, uint32_t);
+
+/**
+ * clmul_16x2_even:
+ *
+ * Perform two 16x16->32 carry-less multiplies.
+ * The odd words of the inputs are ignored.
+ */
+uint64_t clmul_16x2_even(uint64_t, uint64_t);
+
+/**
+ * clmul_16x2_odd:
+ *
+ * Perform two 16x16->32 carry-less multiplies.
+ * The even words of the inputs are ignored.
+ */
+uint64_t clmul_16x2_odd(uint64_t, uint64_t);
+
+/**
+ * clmul_32:
+ *
+ * Perform a 32x32->64 carry-less multiply.
+ */
+uint64_t clmul_32(uint32_t, uint32_t);
+
+/**
+ * clmul_64:
+ *
+ * Perform a 64x64->128 carry-less multiply.
+ */
+Int128 clmul_64_gen(uint64_t, uint64_t);
+
+static inline Int128 clmul_64(uint64_t a, uint64_t b)
+{
+    if (HAVE_CLMUL_ACCEL) {
+        return clmul_64_accel(a, b);
+    } else {
+        return clmul_64_gen(a, b);
+    }
+}
+
+#endif /* CRYPTO_CLMUL_H */
index 7ca596c38764691e11e4033456b052c55c8474b6..af8d12234da3258577e976a0f886f10ff91a3167 100644 (file)
 
 /* d3des.h -
  *
- *     Headers and defines for d3des.c
- *     Graven Imagery, 1992.
+ * Headers and defines for d3des.c
+ * Graven Imagery, 1992.
  *
  * Copyright (c) 1988,1989,1990,1991,1992 by Richard Outerbridge
- *     (GEnie : OUTER; CIS : [71755,204])
+ * (GEnie : OUTER; CIS : [71755,204])
  */
 
-#define EN0    0       /* MODE == encrypt */
-#define DE1    1       /* MODE == decrypt */
+#define EN0 0   /* MODE == encrypt */
+#define DE1 1   /* MODE == decrypt */
 
 void deskey(unsigned char *, int);
-/*                   hexkey[8]     MODE
+/*          hexkey[8]        MODE
  * Sets the internal key register according to the hexadecimal
  * key contained in the 8 bytes of hexkey, according to the DES,
  * for encryption or decryption according to MODE.
  */
 
 void usekey(unsigned long *);
-/*                 cookedkey[32]
+/*          cookedkey[32]
  * Loads the internal key register with the data in cookedkey.
  */
 
 void des(unsigned char *, unsigned char *);
-/*                 from[8]           to[8]
+/*       from[8]          to[8]
  * Encrypts/Decrypts (according to the key currently loaded in the
  * internal key register) one block of eight bytes at address 'from'
  * into the block at address 'to'.  They can be the same.
index e41521519c727aa051c78ef64a408a6245eaa126..a09d5732daf81d3d0cf6881e3c7c3bb8b1f7e49a 100644 (file)
@@ -32,7 +32,7 @@
  * sector.
  *
  * <example>
- *   <title>Encrypting block data with initialiation vectors</title>
+ *   <title>Encrypting block data with initialization vectors</title>
  *   <programlisting>
  * uint8_t *data = ....data to encrypt...
  * size_t ndata = XXX;
@@ -147,7 +147,7 @@ QCryptoIVGen *qcrypto_ivgen_new(QCryptoIVGenAlgorithm alg,
  * @niv: the number of bytes in @iv
  * @errp: pointer to a NULL-initialized error object
  *
- * Calculate a new initialiation vector for the data
+ * Calculate a new initialization vector for the data
  * to be stored in sector @sector. The IV will be
  * written into the buffer @iv of size @niv.
  *
index 42c7ff7af65217653266723088976b38ba95b4a6..a0a22e1abd276b86e1180d04b9b2c860df81f295 100644 (file)
@@ -48,13 +48,11 @@ struct QCryptoSecretCommonClass {
 };
 
 
-extern int qcrypto_secret_lookup(const char *secretid,
-                                 uint8_t **data,
-                                 size_t *datalen,
-                                 Error **errp);
-extern char *qcrypto_secret_lookup_as_utf8(const char *secretid,
-                                           Error **errp);
-extern char *qcrypto_secret_lookup_as_base64(const char *secretid,
-                                             Error **errp);
+int qcrypto_secret_lookup(const char *secretid,
+                          uint8_t **data,
+                          size_t *datalen,
+                          Error **errp);
+char *qcrypto_secret_lookup_as_utf8(const char *secretid, Error **errp);
+char *qcrypto_secret_lookup_as_base64(const char *secretid, Error **errp);
 
 #endif /* QCRYPTO_SECRET_COMMON_H */
diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h
new file mode 100644 (file)
index 0000000..382b26d
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef QEMU_SM4_H
+#define QEMU_SM4_H
+
+extern const uint8_t sm4_sbox[256];
+extern const uint32_t sm4_ck[32];
+
+static inline uint32_t sm4_subword(uint32_t word)
+{
+    return sm4_sbox[word & 0xff] |
+           sm4_sbox[(word >> 8) & 0xff] << 8 |
+           sm4_sbox[(word >> 16) & 0xff] << 16 |
+           sm4_sbox[(word >> 24) & 0xff] << 24;
+}
+
+#endif
index bb9ee53e03a1dfc2edf39ac09033c261e382853b..3bd2003f32d1f498bc37fca0a55cd79f60b0cf68 100644 (file)
@@ -8,8 +8,8 @@
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
-#ifndef QCRYPTO_TLSCIPHERSUITES_H
-#define QCRYPTO_TLSCIPHERSUITES_H
+#ifndef QCRYPTO_TLS_CIPHER_SUITES_H
+#define QCRYPTO_TLS_CIPHER_SUITES_H
 
 #include "qom/object.h"
 #include "crypto/tlscreds.h"
@@ -19,12 +19,6 @@ typedef struct QCryptoTLSCipherSuites QCryptoTLSCipherSuites;
 DECLARE_INSTANCE_CHECKER(QCryptoTLSCipherSuites, QCRYPTO_TLS_CIPHER_SUITES,
                          TYPE_QCRYPTO_TLS_CIPHER_SUITES)
 
-struct QCryptoTLSCipherSuites {
-    /* <private> */
-    QCryptoTLSCreds parent_obj;
-    /* <public> */
-};
-
 /**
   * qcrypto_tls_cipher_suites_get_data:
   * @obj: pointer to a TLS cipher suites object
@@ -37,4 +31,4 @@ struct QCryptoTLSCipherSuites {
 GByteArray *qcrypto_tls_cipher_suites_get_data(QCryptoTLSCipherSuites *obj,
                                                Error **errp);
 
-#endif /* QCRYPTO_TLSCIPHERSUITES_H */
+#endif /* QCRYPTO_TLS_CIPHER_SUITES_H */
index 079e37604784959bb2c7a97e7ffe07782228d426..2a8a8570109488b8787e00cf2936eda9f53bd963 100644 (file)
 #include "qapi/qapi-types-crypto.h"
 #include "qom/object.h"
 
-#ifdef CONFIG_GNUTLS
-#include <gnutls/gnutls.h>
-#endif
-
 #define TYPE_QCRYPTO_TLS_CREDS "tls-creds"
 typedef struct QCryptoTLSCreds QCryptoTLSCreds;
-DECLARE_INSTANCE_CHECKER(QCryptoTLSCreds, QCRYPTO_TLS_CREDS,
-                         TYPE_QCRYPTO_TLS_CREDS)
-
 typedef struct QCryptoTLSCredsClass QCryptoTLSCredsClass;
+DECLARE_OBJ_CHECKERS(QCryptoTLSCreds, QCryptoTLSCredsClass, QCRYPTO_TLS_CREDS,
+                     TYPE_QCRYPTO_TLS_CREDS)
+
 
 #define QCRYPTO_TLS_CREDS_DH_PARAMS "dh-params.pem"
 
 
+typedef bool (*CryptoTLSCredsReload)(QCryptoTLSCreds *, Error **);
 /**
  * QCryptoTLSCreds:
  *
@@ -47,21 +44,24 @@ typedef struct QCryptoTLSCredsClass QCryptoTLSCredsClass;
  * certificate credentials.
  */
 
-struct QCryptoTLSCreds {
-    Object parent_obj;
-    char *dir;
-    QCryptoTLSCredsEndpoint endpoint;
-#ifdef CONFIG_GNUTLS
-    gnutls_dh_params_t dh_params;
-#endif
-    bool verifyPeer;
-    char *priority;
-};
-
-
 struct QCryptoTLSCredsClass {
     ObjectClass parent_class;
+    CryptoTLSCredsReload reload;
 };
 
+/**
+ * qcrypto_tls_creds_check_endpoint:
+ * @creds: pointer to a TLS credentials object
+ * @endpoint: type of network endpoint that will be using the credentials
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Check whether the credentials is setup according to
+ * the type of @endpoint argument.
+ *
+ * Returns true if the credentials is setup for the endpoint, false otherwise
+ */
+bool qcrypto_tls_creds_check_endpoint(QCryptoTLSCreds *creds,
+                                      QCryptoTLSCredsEndpoint endpoint,
+                                      Error **errp);
 
 #endif /* QCRYPTO_TLSCREDS_H */
index 3f464a380958266afb47785c00615247b2f4bd1c..bd3023f9ea746a152b75f11fda48cb4d10a521d8 100644 (file)
@@ -92,18 +92,6 @@ typedef struct QCryptoTLSCredsAnonClass QCryptoTLSCredsAnonClass;
  *
  */
 
-
-struct QCryptoTLSCredsAnon {
-    QCryptoTLSCreds parent_obj;
-#ifdef CONFIG_GNUTLS
-    union {
-        gnutls_anon_server_credentials_t server;
-        gnutls_anon_client_credentials_t client;
-    } data;
-#endif
-};
-
-
 struct QCryptoTLSCredsAnonClass {
     QCryptoTLSCredsClass parent_class;
 };
index d7e6bdb5edf40ebf47add117679a0069aac62c76..bcd07dc4f620aa98f22805bff195598ab533e3f1 100644 (file)
@@ -87,18 +87,6 @@ typedef struct QCryptoTLSCredsPSKClass QCryptoTLSCredsPSKClass;
  * The PSK file can be created and managed using psktool.
  */
 
-struct QCryptoTLSCredsPSK {
-    QCryptoTLSCreds parent_obj;
-    char *username;
-#ifdef CONFIG_GNUTLS
-    union {
-        gnutls_psk_server_credentials_t server;
-        gnutls_psk_client_credentials_t client;
-    } data;
-#endif
-};
-
-
 struct QCryptoTLSCredsPSKClass {
     QCryptoTLSCredsClass parent_class;
 };
index c6d89b78819f441a107033865af4f000856d16cb..c4daba21a6babff35d85f172ca3cb47197169097 100644 (file)
@@ -96,16 +96,6 @@ typedef struct QCryptoTLSCredsX509Class QCryptoTLSCredsX509Class;
  *
  */
 
-struct QCryptoTLSCredsX509 {
-    QCryptoTLSCreds parent_obj;
-#ifdef CONFIG_GNUTLS
-    gnutls_certificate_credentials_t data;
-#endif
-    bool sanityCheck;
-    char *passwordid;
-};
-
-
 struct QCryptoTLSCredsX509Class {
     QCryptoTLSCredsClass parent_class;
 };
index 15b9cef086ccb483af2da68b10978b882498f0b0..571049bd0ec05540d2311038072299f6205608b6 100644 (file)
@@ -248,6 +248,17 @@ ssize_t qcrypto_tls_session_read(QCryptoTLSSession *sess,
                                  char *buf,
                                  size_t len);
 
+/**
+ * qcrypto_tls_session_check_pending:
+ * @sess: the TLS session object
+ *
+ * Check if there are unread data in the TLS buffers that have
+ * already been read from the underlying data source.
+ *
+ * Returns: the number of bytes available or zero
+ */
+size_t qcrypto_tls_session_check_pending(QCryptoTLSSession *sess);
+
 /**
  * qcrypto_tls_session_handshake:
  * @sess: the TLS session object
index 2164762b4670a7c1038eaefc99a0ded6c6781cb4..2324f6b1a467d6897bb6c729cb1152c2c6eb9248 100644 (file)
@@ -9,6 +9,8 @@
 #ifndef DISAS_DIS_ASM_H
 #define DISAS_DIS_ASM_H
 
+#include "qemu/bswap.h"
+
 typedef void *PTR;
 typedef uint64_t bfd_vma;
 typedef int64_t bfd_signed_vma;
@@ -186,20 +188,20 @@ enum bfd_architecture
 #define bfd_mach_alpha_ev5  0x20
 #define bfd_mach_alpha_ev6  0x30
   bfd_arch_arm,        /* Advanced Risc Machines ARM */
-#define bfd_mach_arm_unknown   0
-#define bfd_mach_arm_2         1
-#define bfd_mach_arm_2a                2
-#define bfd_mach_arm_3         3
-#define bfd_mach_arm_3M        4
-#define bfd_mach_arm_4                 5
-#define bfd_mach_arm_4T        6
-#define bfd_mach_arm_5                 7
-#define bfd_mach_arm_5T                8
-#define bfd_mach_arm_5TE       9
-#define bfd_mach_arm_XScale    10
-#define bfd_mach_arm_ep9312    11
-#define bfd_mach_arm_iWMMXt    12
-#define bfd_mach_arm_iWMMXt2   13
+#define bfd_mach_arm_unknown  0
+#define bfd_mach_arm_2        1
+#define bfd_mach_arm_2a       2
+#define bfd_mach_arm_3        3
+#define bfd_mach_arm_3M       4
+#define bfd_mach_arm_4        5
+#define bfd_mach_arm_4T       6
+#define bfd_mach_arm_5        7
+#define bfd_mach_arm_5T       8
+#define bfd_mach_arm_5TE      9
+#define bfd_mach_arm_XScale   10
+#define bfd_mach_arm_ep9312   11
+#define bfd_mach_arm_iWMMXt   12
+#define bfd_mach_arm_iWMMXt2  13
   bfd_arch_ns32k,      /* National Semiconductors ns32000 */
   bfd_arch_w65,        /* WDC 65816 */
   bfd_arch_tic30,      /* Texas Instruments TMS320C30 */
@@ -239,16 +241,15 @@ enum bfd_architecture
   bfd_arch_ia64,      /* HP/Intel ia64 */
 #define bfd_mach_ia64_elf64    64
 #define bfd_mach_ia64_elf32    32
-  bfd_arch_nios2,      /* Nios II */
+  bfd_arch_nios2,      /* Nios II */
 #define bfd_mach_nios2          0
 #define bfd_mach_nios2r1        1
 #define bfd_mach_nios2r2        2
-  bfd_arch_lm32,       /* Lattice Mico32 */
-#define bfd_mach_lm32 1
   bfd_arch_rx,       /* Renesas RX */
 #define bfd_mach_rx            0x75
 #define bfd_mach_rx_v2         0x76
 #define bfd_mach_rx_v3         0x77
+  bfd_arch_loongarch,
   bfd_arch_last
   };
 #define bfd_mach_s390_31 31
@@ -265,17 +266,17 @@ typedef struct symbol_cache_entry
 } asymbol;
 
 typedef int (*fprintf_function)(FILE *f, const char *fmt, ...)
-    GCC_FMT_ATTR(2, 3);
+    G_GNUC_PRINTF(2, 3);
 
 enum dis_insn_type {
-  dis_noninsn,                 /* Not a valid instruction */
-  dis_nonbranch,               /* Not a branch instruction */
-  dis_branch,                  /* Unconditional branch */
-  dis_condbranch,              /* Conditional branch */
-  dis_jsr,                     /* Jump to subroutine */
-  dis_condjsr,                 /* Conditional jump to subroutine */
-  dis_dref,                    /* Data reference instruction */
-  dis_dref2                    /* Two data references in instruction */
+  dis_noninsn,          /* Not a valid instruction */
+  dis_nonbranch,        /* Not a branch instruction */
+  dis_branch,           /* Unconditional branch */
+  dis_condbranch,       /* Conditional branch */
+  dis_jsr,              /* Jump to subroutine */
+  dis_condjsr,          /* Conditional jump to subroutine */
+  dis_dref,             /* Data reference instruction */
+  dis_dref2             /* Two data references in instruction */
 };
 
 /* This struct is passed into the instruction decoding routine,
@@ -318,8 +319,8 @@ typedef struct disassemble_info {
      The top 16 bits are reserved for public use (and are documented here).
      The bottom 16 bits are for the internal use of the disassembler.  */
   unsigned long flags;
-#define INSN_HAS_RELOC 0x80000000
-#define INSN_ARM_BE32  0x00010000
+#define INSN_HAS_RELOC  0x80000000
+#define INSN_ARM_BE32   0x00010000
   PTR private_data;
 
   /* Function used to get bytes to disassemble.  MEMADDR is the
@@ -329,7 +330,7 @@ typedef struct disassemble_info {
      Returns an errno value or 0 for success.  */
   int (*read_memory_func)
     (bfd_vma memaddr, bfd_byte *myaddr, int length,
-            struct disassemble_info *info);
+        struct disassemble_info *info);
 
   /* Function which should be called if we get an error that we can't
      recover from.  STATUS is the errno value from read_memory_func and
@@ -358,7 +359,7 @@ typedef struct disassemble_info {
     (bfd_vma addr, struct disassemble_info * info);
 
   /* These are for buffer_read_memory.  */
-  bfd_byte *buffer;
+  const bfd_byte *buffer;
   bfd_vma buffer_vma;
   int buffer_length;
 
@@ -383,20 +384,20 @@ typedef struct disassemble_info {
      To determine whether this decoder supports this information, set
      insn_info_valid to 0, decode an instruction, then check it.  */
 
-  char insn_info_valid;                /* Branch info has been set. */
-  char branch_delay_insns;     /* How many sequential insn's will run before
-                                  a branch takes effect.  (0 = normal) */
-  char data_size;              /* Size of data reference in insn, in bytes */
-  enum dis_insn_type insn_type;        /* Type of instruction */
-  bfd_vma target;              /* Target address of branch or dref, if known;
-                                  zero if unknown.  */
-  bfd_vma target2;             /* Second target address for dref2 */
+  char insn_info_valid;         /* Branch info has been set. */
+  char branch_delay_insns;      /* How many sequential insn's will run before
+                                   a branch takes effect.  (0 = normal) */
+  char data_size;               /* Size of data reference in insn, in bytes */
+  enum dis_insn_type insn_type; /* Type of instruction */
+  bfd_vma target;               /* Target address of branch or dref, if known;
+                                   zero if unknown.  */
+  bfd_vma target2;              /* Second target address for dref2 */
 
   /* Command line options specific to the target disassembler.  */
   char * disassembler_options;
 
   /* Field intended to be used by targets in any way they deem suitable.  */
-  int64_t target_info;
+  void *target_info;
 
   /* Options for Capstone disassembly.  */
   int cap_arch;
@@ -414,7 +415,6 @@ int print_insn_tci(bfd_vma, disassemble_info*);
 int print_insn_big_mips         (bfd_vma, disassemble_info*);
 int print_insn_little_mips      (bfd_vma, disassemble_info*);
 int print_insn_nanomips         (bfd_vma, disassemble_info*);
-int print_insn_i386             (bfd_vma, disassemble_info*);
 int print_insn_m68k             (bfd_vma, disassemble_info*);
 int print_insn_z8001            (bfd_vma, disassemble_info*);
 int print_insn_z8002            (bfd_vma, disassemble_info*);
@@ -425,7 +425,6 @@ int print_insn_h8500            (bfd_vma, disassemble_info*);
 int print_insn_arm_a64          (bfd_vma, disassemble_info*);
 int print_insn_alpha            (bfd_vma, disassemble_info*);
 disassembler_ftype arc_get_disassembler (int, int);
-int print_insn_arm              (bfd_vma, disassemble_info*);
 int print_insn_sparc            (bfd_vma, disassemble_info*);
 int print_insn_big_a29k         (bfd_vma, disassemble_info*);
 int print_insn_little_a29k      (bfd_vma, disassemble_info*);
@@ -437,7 +436,6 @@ int print_insn_m32r             (bfd_vma, disassemble_info*);
 int print_insn_m88k             (bfd_vma, disassemble_info*);
 int print_insn_mn10200          (bfd_vma, disassemble_info*);
 int print_insn_mn10300          (bfd_vma, disassemble_info*);
-int print_insn_moxie            (bfd_vma, disassemble_info*);
 int print_insn_ns32k            (bfd_vma, disassemble_info*);
 int print_insn_big_powerpc      (bfd_vma, disassemble_info*);
 int print_insn_little_powerpc   (bfd_vma, disassemble_info*);
@@ -446,23 +444,22 @@ int print_insn_w65              (bfd_vma, disassemble_info*);
 int print_insn_d10v             (bfd_vma, disassemble_info*);
 int print_insn_v850             (bfd_vma, disassemble_info*);
 int print_insn_tic30            (bfd_vma, disassemble_info*);
-int print_insn_ppc              (bfd_vma, disassemble_info*);
-int print_insn_s390             (bfd_vma, disassemble_info*);
 int print_insn_crisv32          (bfd_vma, disassemble_info*);
 int print_insn_crisv10          (bfd_vma, disassemble_info*);
 int print_insn_microblaze       (bfd_vma, disassemble_info*);
 int print_insn_ia64             (bfd_vma, disassemble_info*);
-int print_insn_lm32             (bfd_vma, disassemble_info*);
-int print_insn_big_nios2        (bfd_vma, disassemble_info*);
-int print_insn_little_nios2     (bfd_vma, disassemble_info*);
+int print_insn_nios2(bfd_vma, disassemble_info*);
 int print_insn_xtensa           (bfd_vma, disassemble_info*);
 int print_insn_riscv32          (bfd_vma, disassemble_info*);
 int print_insn_riscv64          (bfd_vma, disassemble_info*);
+int print_insn_riscv128         (bfd_vma, disassemble_info*);
 int print_insn_rx(bfd_vma, disassemble_info *);
+int print_insn_hexagon(bfd_vma, disassemble_info *);
+int print_insn_loongarch(bfd_vma, disassemble_info *);
 
 #ifdef CONFIG_CAPSTONE
 bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size);
-bool cap_disas_host(disassemble_info *info, void *code, size_t size);
+bool cap_disas_host(disassemble_info *info, const void *code, size_t size);
 bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count);
 bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size);
 #else
@@ -478,8 +475,6 @@ bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size);
 
 /* from libbfd */
 
-#include "qemu/bswap.h"
-
 static inline bfd_vma bfd_getl64(const bfd_byte *addr)
 {
     return ldq_le_p(addr);
index 36c33f6f194c2541d5deeb07252cfe3ae9078ee4..176775eff772cdffff86033d44b1ae638b2da54f 100644 (file)
@@ -1,34 +1,23 @@
 #ifndef QEMU_DISAS_H
 #define QEMU_DISAS_H
 
-#include "exec/hwaddr.h"
-
-#ifdef NEED_CPU_H
-#include "cpu.h"
-
 /* Disassemble this for me please... (debugging). */
-void disas(FILE *out, void *code, unsigned long size);
-void target_disas(FILE *out, CPUState *cpu, target_ulong code,
-                  target_ulong size);
+void disas(FILE *out, const void *code, size_t size);
+void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size);
 
-void monitor_disas(Monitor *mon, CPUState *cpu,
-                   target_ulong pc, int nb_insn, int is_physical);
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
+                   int nb_insn, bool is_physical);
 
 char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
 
 /* Look up symbol for debugging purpose.  Returns "" if unknown. */
-const char *lookup_symbol(target_ulong orig_addr);
-#endif
+const char *lookup_symbol(uint64_t orig_addr);
 
 struct syminfo;
 struct elf32_sym;
 struct elf64_sym;
 
-#if defined(CONFIG_USER_ONLY)
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr);
-#else
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, hwaddr orig_addr);
-#endif
+typedef const char *(*lookup_symbol_t)(struct syminfo *s, uint64_t orig_addr);
 
 struct syminfo {
     lookup_symbol_t lookup_symbol;
index 7a418ee559349365fe18bf0924a031df963c599f..e7259ec366fca166e35fbd9b725c3a7a20501f19 100644 (file)
@@ -11,26 +11,27 @@ typedef uint32_t Elf32_Word;
 /* 64-bit ELF base types. */
 typedef uint64_t Elf64_Addr;
 typedef uint16_t Elf64_Half;
-typedef int16_t         Elf64_SHalf;
+typedef int16_t  Elf64_SHalf;
 typedef uint64_t Elf64_Off;
-typedef int32_t         Elf64_Sword;
+typedef int32_t  Elf64_Sword;
 typedef uint32_t Elf64_Word;
 typedef uint64_t Elf64_Xword;
 typedef int64_t  Elf64_Sxword;
 
 /* These constants are for the segment types stored in the image headers */
-#define PT_NULL    0
-#define PT_LOAD    1
-#define PT_DYNAMIC 2
-#define PT_INTERP  3
-#define PT_NOTE    4
-#define PT_SHLIB   5
-#define PT_PHDR    6
-#define PT_LOOS    0x60000000
-#define PT_HIOS    0x6fffffff
-#define PT_LOPROC  0x70000000
-#define PT_HIPROC  0x7fffffff
-
+#define PT_NULL           0
+#define PT_LOAD           1
+#define PT_DYNAMIC        2
+#define PT_INTERP         3
+#define PT_NOTE           4
+#define PT_SHLIB          5
+#define PT_PHDR           6
+#define PT_LOOS           0x60000000
+#define PT_HIOS           0x6fffffff
+#define PT_LOPROC         0x70000000
+#define PT_HIPROC         0x7fffffff
+
+#define PT_GNU_STACK      (PT_LOOS + 0x474e551)
 #define PT_GNU_PROPERTY   (PT_LOOS + 0x474e553)
 
 #define PT_MIPS_REGINFO   0x70000000
@@ -40,34 +41,34 @@ typedef int64_t  Elf64_Sxword;
 
 /* Flags in the e_flags field of the header */
 /* MIPS architecture level. */
-#define EF_MIPS_ARCH            0xf0000000
+#define EF_MIPS_ARCH          0xf0000000
 
 /* Legal values for MIPS architecture level.  */
-#define EF_MIPS_ARCH_1         0x00000000      /* -mips1 code.  */
-#define EF_MIPS_ARCH_2         0x10000000      /* -mips2 code.  */
-#define EF_MIPS_ARCH_3         0x20000000      /* -mips3 code.  */
-#define EF_MIPS_ARCH_4         0x30000000      /* -mips4 code.  */
-#define EF_MIPS_ARCH_5         0x40000000      /* -mips5 code.  */
-#define EF_MIPS_ARCH_32                0x50000000      /* MIPS32 code.  */
-#define EF_MIPS_ARCH_64                0x60000000      /* MIPS64 code.  */
-#define EF_MIPS_ARCH_32R2       0x70000000      /* MIPS32r2 code.  */
-#define EF_MIPS_ARCH_64R2       0x80000000      /* MIPS64r2 code.  */
-#define EF_MIPS_ARCH_32R6       0x90000000      /* MIPS32r6 code.  */
-#define EF_MIPS_ARCH_64R6       0xa0000000      /* MIPS64r6 code.  */
+#define EF_MIPS_ARCH_1        0x00000000      /* -mips1 code.  */
+#define EF_MIPS_ARCH_2        0x10000000      /* -mips2 code.  */
+#define EF_MIPS_ARCH_3        0x20000000      /* -mips3 code.  */
+#define EF_MIPS_ARCH_4        0x30000000      /* -mips4 code.  */
+#define EF_MIPS_ARCH_5        0x40000000      /* -mips5 code.  */
+#define EF_MIPS_ARCH_32       0x50000000      /* MIPS32 code.  */
+#define EF_MIPS_ARCH_64       0x60000000      /* MIPS64 code.  */
+#define EF_MIPS_ARCH_32R2     0x70000000      /* MIPS32r2 code.  */
+#define EF_MIPS_ARCH_64R2     0x80000000      /* MIPS64r2 code.  */
+#define EF_MIPS_ARCH_32R6     0x90000000      /* MIPS32r6 code.  */
+#define EF_MIPS_ARCH_64R6     0xa0000000      /* MIPS64r6 code.  */
 
 /* The ABI of a file. */
-#define EF_MIPS_ABI_O32                0x00001000      /* O32 ABI.  */
-#define EF_MIPS_ABI_O64                0x00002000      /* O32 extended for 64 bit.  */
-
-#define EF_MIPS_NOREORDER 0x00000001
-#define EF_MIPS_PIC       0x00000002
-#define EF_MIPS_CPIC      0x00000004
-#define EF_MIPS_ABI2           0x00000020
-#define EF_MIPS_OPTIONS_FIRST  0x00000080
-#define EF_MIPS_32BITMODE      0x00000100
-#define EF_MIPS_ABI            0x0000f000
-#define EF_MIPS_FP64      0x00000200
-#define EF_MIPS_NAN2008   0x00000400
+#define EF_MIPS_ABI_O32       0x00001000      /* O32 ABI.  */
+#define EF_MIPS_ABI_O64       0x00002000      /* O32 extended for 64 bit.  */
+
+#define EF_MIPS_NOREORDER     0x00000001
+#define EF_MIPS_PIC           0x00000002
+#define EF_MIPS_CPIC          0x00000004
+#define EF_MIPS_ABI2          0x00000020
+#define EF_MIPS_OPTIONS_FIRST 0x00000080
+#define EF_MIPS_32BITMODE     0x00000100
+#define EF_MIPS_ABI           0x0000f000
+#define EF_MIPS_FP64          0x00000200
+#define EF_MIPS_NAN2008       0x00000400
 
 /* MIPS machine variant */
 #define EF_MIPS_MACH_NONE     0x00000000  /* A standard MIPS implementation  */
@@ -128,165 +129,162 @@ typedef struct mips_elf_abiflags_v0 {
 #define ET_HIPROC 0xffff
 
 /* These constants define the various ELF target machines */
-#define EM_NONE  0
-#define EM_M32   1
-#define EM_SPARC 2
-#define EM_386   3
-#define EM_68K   4
-#define EM_88K   5
-#define EM_486   6   /* Perhaps disused */
-#define EM_860   7
+#define EM_NONE             0
+#define EM_M32              1
+#define EM_SPARC            2
+#define EM_386              3
+#define EM_68K              4
+#define EM_88K              5
+#define EM_486              6   /* Perhaps disused */
+#define EM_860              7
+
+#define EM_MIPS             8   /* MIPS R3000 (officially, big-endian only) */
 
-#define EM_MIPS                8       /* MIPS R3000 (officially, big-endian only) */
+#define EM_MIPS_RS4_BE      10  /* MIPS R4000 big-endian */
 
-#define EM_MIPS_RS4_BE 10      /* MIPS R4000 big-endian */
+#define EM_PARISC           15  /* HPPA */
 
-#define EM_PARISC      15      /* HPPA */
+#define EM_SPARC32PLUS      18  /* Sun's "v8plus" */
 
-#define EM_SPARC32PLUS 18      /* Sun's "v8plus" */
+#define EM_PPC              20  /* PowerPC */
+#define EM_PPC64            21  /* PowerPC64 */
 
-#define EM_PPC        20       /* PowerPC */
-#define EM_PPC64       21       /* PowerPC64 */
+#define EM_ARM              40  /* ARM */
 
-#define EM_ARM         40              /* ARM */
+#define EM_SH               42  /* SuperH */
 
-#define EM_SH         42       /* SuperH */
+#define EM_SPARCV9          43  /* SPARC v9 64-bit */
 
-#define EM_SPARCV9     43      /* SPARC v9 64-bit */
+#define EM_TRICORE          44  /* Infineon TriCore */
 
-#define EM_TRICORE      44      /* Infineon TriCore */
+#define EM_IA_64            50  /* HP/Intel IA-64 */
 
-#define EM_IA_64       50      /* HP/Intel IA-64 */
+#define EM_X86_64           62  /* AMD x86-64 */
 
-#define EM_X86_64      62      /* AMD x86-64 */
+#define EM_S390             22  /* IBM S/390 */
 
-#define EM_S390                22      /* IBM S/390 */
+#define EM_CRIS             76  /* Axis Communications 32-bit embedded processor */
 
-#define EM_CRIS         76      /* Axis Communications 32-bit embedded processor */
+#define EM_AVR              83  /* AVR 8-bit microcontroller */
 
-#define EM_AVR          83      /* AVR 8-bit microcontroller */
+#define EM_V850             87  /* NEC v850 */
 
-#define EM_V850                87      /* NEC v850 */
+#define EM_H8_300H          47  /* Hitachi H8/300H */
+#define EM_H8S              48  /* Hitachi H8S     */
+#define EM_LATTICEMICO32    138 /* LatticeMico32 */
 
-#define EM_H8_300H      47      /* Hitachi H8/300H */
-#define EM_H8S          48      /* Hitachi H8S     */
-#define EM_LATTICEMICO32 138    /* LatticeMico32 */
+#define EM_OPENRISC         92  /* OpenCores OpenRISC */
 
-#define EM_OPENRISC     92        /* OpenCores OpenRISC */
+#define EM_HEXAGON          164 /* Qualcomm Hexagon */
 
-#define EM_UNICORE32    110     /* UniCore32 */
+#define EM_RX               173 /* Renesas RX family */
 
-#define EM_RX           173     /* Renesas RX family */
+#define EM_RISCV            243 /* RISC-V */
 
-#define EM_RISCV        243     /* RISC-V */
+#define EM_NANOMIPS         249 /* Wave Computing nanoMIPS */
 
-#define EM_NANOMIPS     249     /* Wave Computing nanoMIPS */
+#define EM_LOONGARCH        258 /* LoongArch */
 
 /*
  * This is an interim value that we will use until the committee comes
  * up with a final number.
  */
-#define EM_ALPHA       0x9026
+#define EM_ALPHA            0x9026
 
 /* Bogus old v850 magic number, used by old tools.  */
-#define EM_CYGNUS_V850 0x9080
+#define EM_CYGNUS_V850      0x9080
 
 /*
  * This is the old interim value for S/390 architecture
  */
-#define EM_S390_OLD     0xA390
-
-#define EM_ALTERA_NIOS2 113     /* Altera Nios II soft-core processor */
-
-#define EM_MICROBLAZE      189
-#define EM_MICROBLAZE_OLD  0xBAAB
+#define EM_S390_OLD         0xA390
 
-#define EM_XTENSA   94      /* Tensilica Xtensa */
+#define EM_ALTERA_NIOS2     113 /* Altera Nios II soft-core processor */
 
-#define EM_AARCH64  183
+#define EM_MICROBLAZE       189
+#define EM_MICROBLAZE_OLD   0xBAAB
 
-#define EM_TILEGX   191 /* TILE-Gx */
+#define EM_XTENSA           94  /* Tensilica Xtensa */
 
-#define EM_MOXIE           223     /* Moxie processor family */
-#define EM_MOXIE_OLD       0xFEED
+#define EM_AARCH64          183
 
-#define EF_AVR_MACH     0x7F       /* Mask for AVR e_flags to get core type */
+#define EF_AVR_MACH         0x7F /* Mask for AVR e_flags to get core type */
 
 /* This is the info that is needed to parse the dynamic section of the file */
-#define DT_NULL                0
-#define DT_NEEDED      1
-#define DT_PLTRELSZ    2
-#define DT_PLTGOT      3
-#define DT_HASH                4
-#define DT_STRTAB      5
-#define DT_SYMTAB      6
-#define DT_RELA                7
-#define DT_RELASZ      8
-#define DT_RELAENT     9
-#define DT_STRSZ       10
-#define DT_SYMENT      11
-#define DT_INIT                12
-#define DT_FINI                13
-#define DT_SONAME      14
-#define DT_RPATH       15
-#define DT_SYMBOLIC    16
-#define DT_REL         17
-#define DT_RELSZ       18
-#define DT_RELENT      19
-#define DT_PLTREL      20
-#define DT_DEBUG       21
-#define DT_TEXTREL     22
-#define DT_JMPREL      23
-#define DT_BINDNOW     24
-#define DT_INIT_ARRAY  25
-#define DT_FINI_ARRAY  26
-#define DT_INIT_ARRAYSZ        27
-#define DT_FINI_ARRAYSZ        28
-#define DT_RUNPATH     29
-#define DT_FLAGS       30
-#define DT_LOOS                0x6000000d
-#define DT_HIOS                0x6ffff000
-#define DT_LOPROC      0x70000000
-#define DT_HIPROC      0x7fffffff
+#define DT_NULL         0
+#define DT_NEEDED       1
+#define DT_PLTRELSZ     2
+#define DT_PLTGOT       3
+#define DT_HASH         4
+#define DT_STRTAB       5
+#define DT_SYMTAB       6
+#define DT_RELA         7
+#define DT_RELASZ       8
+#define DT_RELAENT      9
+#define DT_STRSZ        10
+#define DT_SYMENT       11
+#define DT_INIT         12
+#define DT_FINI         13
+#define DT_SONAME       14
+#define DT_RPATH        15
+#define DT_SYMBOLIC     16
+#define DT_REL          17
+#define DT_RELSZ        18
+#define DT_RELENT       19
+#define DT_PLTREL       20
+#define DT_DEBUG        21
+#define DT_TEXTREL      22
+#define DT_JMPREL       23
+#define DT_BINDNOW      24
+#define DT_INIT_ARRAY   25
+#define DT_FINI_ARRAY   26
+#define DT_INIT_ARRAYSZ 27
+#define DT_FINI_ARRAYSZ 28
+#define DT_RUNPATH      29
+#define DT_FLAGS        30
+#define DT_LOOS         0x6000000d
+#define DT_HIOS         0x6ffff000
+#define DT_LOPROC       0x70000000
+#define DT_HIPROC       0x7fffffff
 
 /* DT_ entries which fall between DT_VALRNGLO and DT_VALRNDHI use
    the d_val field of the Elf*_Dyn structure.  I.e. they contain scalars.  */
-#define DT_VALRNGLO    0x6ffffd00
-#define DT_VALRNGHI    0x6ffffdff
+#define DT_VALRNGLO     0x6ffffd00
+#define DT_VALRNGHI     0x6ffffdff
 
 /* DT_ entries which fall between DT_ADDRRNGLO and DT_ADDRRNGHI use
    the d_ptr field of the Elf*_Dyn structure.  I.e. they contain pointers.  */
-#define DT_ADDRRNGLO   0x6ffffe00
-#define DT_ADDRRNGHI   0x6ffffeff
-
-#define        DT_VERSYM       0x6ffffff0
-#define DT_RELACOUNT   0x6ffffff9
-#define DT_RELCOUNT    0x6ffffffa
-#define DT_FLAGS_1     0x6ffffffb
-#define DT_VERDEF      0x6ffffffc
-#define DT_VERDEFNUM   0x6ffffffd
-#define DT_VERNEED     0x6ffffffe
-#define DT_VERNEEDNUM  0x6fffffff
-
-#define DT_MIPS_RLD_VERSION    0x70000001
-#define DT_MIPS_TIME_STAMP     0x70000002
-#define DT_MIPS_ICHECKSUM      0x70000003
-#define DT_MIPS_IVERSION       0x70000004
-#define DT_MIPS_FLAGS          0x70000005
-  #define RHF_NONE               0
-  #define RHF_HARDWAY            1
-  #define RHF_NOTPOT             2
-#define DT_MIPS_BASE_ADDRESS   0x70000006
-#define DT_MIPS_CONFLICT       0x70000008
-#define DT_MIPS_LIBLIST                0x70000009
-#define DT_MIPS_LOCAL_GOTNO    0x7000000a
-#define DT_MIPS_CONFLICTNO     0x7000000b
-#define DT_MIPS_LIBLISTNO      0x70000010
-#define DT_MIPS_SYMTABNO       0x70000011
-#define DT_MIPS_UNREFEXTNO     0x70000012
-#define DT_MIPS_GOTSYM         0x70000013
-#define DT_MIPS_HIPAGENO       0x70000014
-#define DT_MIPS_RLD_MAP                0x70000016
+#define DT_ADDRRNGLO    0x6ffffe00
+#define DT_ADDRRNGHI    0x6ffffeff
+
+#define DT_VERSYM       0x6ffffff0
+#define DT_RELACOUNT    0x6ffffff9
+#define DT_RELCOUNT     0x6ffffffa
+#define DT_FLAGS_1      0x6ffffffb
+#define DT_VERDEF       0x6ffffffc
+#define DT_VERDEFNUM    0x6ffffffd
+#define DT_VERNEED      0x6ffffffe
+#define DT_VERNEEDNUM   0x6fffffff
+
+#define DT_MIPS_RLD_VERSION     0x70000001
+#define DT_MIPS_TIME_STAMP      0x70000002
+#define DT_MIPS_ICHECKSUM       0x70000003
+#define DT_MIPS_IVERSION        0x70000004
+#define DT_MIPS_FLAGS           0x70000005
+  #define RHF_NONE              0
+  #define RHF_HARDWAY           1
+  #define RHF_NOTPOT            2
+#define DT_MIPS_BASE_ADDRESS    0x70000006
+#define DT_MIPS_CONFLICT        0x70000008
+#define DT_MIPS_LIBLIST         0x70000009
+#define DT_MIPS_LOCAL_GOTNO     0x7000000a
+#define DT_MIPS_CONFLICTNO      0x7000000b
+#define DT_MIPS_LIBLISTNO       0x70000010
+#define DT_MIPS_SYMTABNO        0x70000011
+#define DT_MIPS_UNREFEXTNO      0x70000012
+#define DT_MIPS_GOTSYM          0x70000013
+#define DT_MIPS_HIPAGENO        0x70000014
+#define DT_MIPS_RLD_MAP         0x70000016
 
 /* This info is needed when parsing the symbol table */
 #define STB_LOCAL  0
@@ -299,61 +297,61 @@ typedef struct mips_elf_abiflags_v0 {
 #define STT_SECTION 3
 #define STT_FILE    4
 
-#define ELF_ST_BIND(x)         ((x) >> 4)
-#define ELF_ST_TYPE(x)         (((unsigned int) x) & 0xf)
+#define ELF_ST_BIND(x)          ((x) >> 4)
+#define ELF_ST_TYPE(x)          (((unsigned int) x) & 0xf)
 #define ELF_ST_INFO(bind, type) (((bind) << 4) | ((type) & 0xf))
-#define ELF32_ST_BIND(x)       ELF_ST_BIND(x)
-#define ELF32_ST_TYPE(x)       ELF_ST_TYPE(x)
-#define ELF64_ST_BIND(x)       ELF_ST_BIND(x)
-#define ELF64_ST_TYPE(x)       ELF_ST_TYPE(x)
+#define ELF32_ST_BIND(x)        ELF_ST_BIND(x)
+#define ELF32_ST_TYPE(x)        ELF_ST_TYPE(x)
+#define ELF64_ST_BIND(x)        ELF_ST_BIND(x)
+#define ELF64_ST_TYPE(x)        ELF_ST_TYPE(x)
 
 /* Symbolic values for the entries in the auxiliary table
    put on the initial stack */
-#define AT_NULL      /* end of vector */
-#define AT_IGNORE    /* entry should be ignored */
-#define AT_EXECFD    /* file descriptor of program */
-#define AT_PHDR      /* program headers for program */
-#define AT_PHENT     /* size of program header entry */
-#define AT_PHNUM     /* number of program headers */
-#define AT_PAGESZ    /* system page size */
-#define AT_BASE      /* base address of interpreter */
-#define AT_FLAGS     /* flags */
-#define AT_ENTRY     /* entry point of program */
-#define AT_NOTELF 10   /* program is not ELF */
-#define AT_UID    11   /* real uid */
-#define AT_EUID   12   /* effective uid */
-#define AT_GID    13   /* real gid */
-#define AT_EGID   14   /* effective gid */
-#define AT_PLATFORM 15  /* string identifying CPU for optimizations */
-#define AT_HWCAP  16    /* arch dependent hints at CPU capabilities */
-#define AT_CLKTCK 17   /* frequency at which times() increments */
-#define AT_FPUCW  18   /* info about fpu initialization by kernel */
-#define AT_DCACHEBSIZE 19      /* data cache block size */
-#define AT_ICACHEBSIZE 20      /* instruction cache block size */
-#define AT_UCACHEBSIZE 21      /* unified cache block size */
-#define AT_IGNOREPPC   22      /* ppc only; entry should be ignored */
-#define AT_SECURE      23      /* boolean, was exec suid-like? */
-#define AT_BASE_PLATFORM 24    /* string identifying real platforms */
-#define AT_RANDOM      25      /* address of 16 random bytes */
-#define AT_HWCAP2       26      /* extension of AT_HWCAP */
-#define AT_EXECFN      31      /* filename of the executable */
-#define AT_SYSINFO     32      /* address of kernel entry point */
-#define AT_SYSINFO_EHDR        33      /* address of kernel vdso */
-#define AT_L1I_CACHESHAPE 34   /* shapes of the caches: */
-#define AT_L1D_CACHESHAPE 35   /*   bits 0-3: cache associativity.  */
-#define AT_L2_CACHESHAPE  36   /*   bits 4-7: log2 of line size.  */
-#define AT_L3_CACHESHAPE  37   /*   val&~255: cache size.  */
+#define AT_NULL             0   /* end of vector */
+#define AT_IGNORE           1   /* entry should be ignored */
+#define AT_EXECFD           2   /* file descriptor of program */
+#define AT_PHDR             3   /* program headers for program */
+#define AT_PHENT            4   /* size of program header entry */
+#define AT_PHNUM            5   /* number of program headers */
+#define AT_PAGESZ           6   /* system page size */
+#define AT_BASE             7   /* base address of interpreter */
+#define AT_FLAGS            8   /* flags */
+#define AT_ENTRY            9   /* entry point of program */
+#define AT_NOTELF           10  /* program is not ELF */
+#define AT_UID              11  /* real uid */
+#define AT_EUID             12  /* effective uid */
+#define AT_GID              13  /* real gid */
+#define AT_EGID             14  /* effective gid */
+#define AT_PLATFORM         15  /* string identifying CPU for optimizations */
+#define AT_HWCAP            16  /* arch dependent hints at CPU capabilities */
+#define AT_CLKTCK           17  /* frequency at which times() increments */
+#define AT_FPUCW            18  /* info about fpu initialization by kernel */
+#define AT_DCACHEBSIZE      19  /* data cache block size */
+#define AT_ICACHEBSIZE      20  /* instruction cache block size */
+#define AT_UCACHEBSIZE      21  /* unified cache block size */
+#define AT_IGNOREPPC        22  /* ppc only; entry should be ignored */
+#define AT_SECURE           23  /* boolean, was exec suid-like? */
+#define AT_BASE_PLATFORM    24  /* string identifying real platforms */
+#define AT_RANDOM           25  /* address of 16 random bytes */
+#define AT_HWCAP2           26  /* extension of AT_HWCAP */
+#define AT_EXECFN           31  /* filename of the executable */
+#define AT_SYSINFO          32  /* address of kernel entry point */
+#define AT_SYSINFO_EHDR     33  /* address of kernel vdso */
+#define AT_L1I_CACHESHAPE   34  /* shapes of the caches: */
+#define AT_L1D_CACHESHAPE   35  /*   bits 0-3: cache associativity.  */
+#define AT_L2_CACHESHAPE    36  /*   bits 4-7: log2 of line size.  */
+#define AT_L3_CACHESHAPE    37  /*   val&~255: cache size.  */
 
 typedef struct dynamic{
   Elf32_Sword d_tag;
   union{
-    Elf32_Sword        d_val;
-    Elf32_Addr d_ptr;
+    Elf32_Sword d_val;
+    Elf32_Addr d_ptr;
   } d_un;
 } Elf32_Dyn;
 
 typedef struct {
-  Elf64_Sxword d_tag;          /* entry tag value */
+  Elf64_Sxword d_tag;   /* entry tag value */
   union {
     Elf64_Xword d_val;
     Elf64_Addr d_ptr;
@@ -364,72 +362,72 @@ typedef struct {
 #define ELF32_R_SYM(x) ((x) >> 8)
 #define ELF32_R_TYPE(x) ((x) & 0xff)
 
-#define ELF64_R_SYM(i)                 ((i) >> 32)
-#define ELF64_R_TYPE(i)                        ((i) & 0xffffffff)
+#define ELF64_R_SYM(i)                  ((i) >> 32)
+#define ELF64_R_TYPE(i)                 ((i) & 0xffffffff)
 #define ELF64_R_TYPE_DATA(i)            (((ELF64_R_TYPE(i) >> 8) ^ 0x00800000) - 0x00800000)
 
-#define R_386_NONE     0
-#define R_386_32       1
-#define R_386_PC32     2
-#define R_386_GOT32    3
-#define R_386_PLT32    4
-#define R_386_COPY     5
-#define R_386_GLOB_DAT 6
-#define R_386_JMP_SLOT 7
-#define R_386_RELATIVE 8
-#define R_386_GOTOFF   9
-#define R_386_GOTPC    10
-#define R_386_NUM      11
+#define R_386_NONE      0
+#define R_386_32        1
+#define R_386_PC32      2
+#define R_386_GOT32     3
+#define R_386_PLT32     4
+#define R_386_COPY      5
+#define R_386_GLOB_DAT  6
+#define R_386_JMP_SLOT  7
+#define R_386_RELATIVE  8
+#define R_386_GOTOFF    9
+#define R_386_GOTPC     10
+#define R_386_NUM       11
 /* Not a dynamic reloc, so not included in R_386_NUM.  Used in TCG.  */
-#define R_386_PC8      23
-
-#define R_MIPS_NONE            0
-#define R_MIPS_16              1
-#define R_MIPS_32              2
-#define R_MIPS_REL32           3
-#define R_MIPS_26              4
-#define R_MIPS_HI16            5
-#define R_MIPS_LO16            6
-#define R_MIPS_GPREL16         7
-#define R_MIPS_LITERAL         8
-#define R_MIPS_GOT16           9
-#define R_MIPS_PC16            10
-#define R_MIPS_CALL16          11
-#define R_MIPS_GPREL32         12
+#define R_386_PC8       23
+
+#define R_MIPS_NONE     0
+#define R_MIPS_16       1
+#define R_MIPS_32       2
+#define R_MIPS_REL32    3
+#define R_MIPS_26       4
+#define R_MIPS_HI16     5
+#define R_MIPS_LO16     6
+#define R_MIPS_GPREL16  7
+#define R_MIPS_LITERAL  8
+#define R_MIPS_GOT16    9
+#define R_MIPS_PC16     10
+#define R_MIPS_CALL16   11
+#define R_MIPS_GPREL32  12
 /* The remaining relocs are defined on Irix, although they are not
    in the MIPS ELF ABI.  */
-#define R_MIPS_UNUSED1         13
-#define R_MIPS_UNUSED2         14
-#define R_MIPS_UNUSED3         15
-#define R_MIPS_SHIFT5          16
-#define R_MIPS_SHIFT6          17
-#define R_MIPS_64              18
-#define R_MIPS_GOT_DISP                19
-#define R_MIPS_GOT_PAGE                20
-#define R_MIPS_GOT_OFST                21
+#define R_MIPS_UNUSED1  13
+#define R_MIPS_UNUSED2  14
+#define R_MIPS_UNUSED3  15
+#define R_MIPS_SHIFT5   16
+#define R_MIPS_SHIFT6   17
+#define R_MIPS_64       18
+#define R_MIPS_GOT_DISP 19
+#define R_MIPS_GOT_PAGE 20
+#define R_MIPS_GOT_OFST 21
 /*
  * The following two relocation types are specified in the MIPS ABI
  * conformance guide version 1.2 but not yet in the psABI.
  */
-#define R_MIPS_GOTHI16         22
-#define R_MIPS_GOTLO16         23
-#define R_MIPS_SUB             24
-#define R_MIPS_INSERT_A                25
-#define R_MIPS_INSERT_B                26
-#define R_MIPS_DELETE          27
-#define R_MIPS_HIGHER          28
-#define R_MIPS_HIGHEST         29
+#define R_MIPS_GOTHI16  22
+#define R_MIPS_GOTLO16  23
+#define R_MIPS_SUB      24
+#define R_MIPS_INSERT_A 25
+#define R_MIPS_INSERT_B 26
+#define R_MIPS_DELETE   27
+#define R_MIPS_HIGHER   28
+#define R_MIPS_HIGHEST  29
 /*
  * The following two relocation types are specified in the MIPS ABI
  * conformance guide version 1.2 but not yet in the psABI.
  */
-#define R_MIPS_CALLHI16                30
-#define R_MIPS_CALLLO16                31
+#define R_MIPS_CALLHI16 30
+#define R_MIPS_CALLLO16 31
 /*
  * This range is reserved for vendor specific relocations.
  */
-#define R_MIPS_LOVENDOR                100
-#define R_MIPS_HIVENDOR                127
+#define R_MIPS_LOVENDOR 100
+#define R_MIPS_HIVENDOR 127
 
 
 /* SUN SPARC specific definitions.  */
@@ -450,48 +448,48 @@ typedef struct {
 /*
  * Sparc ELF relocation types
  */
-#define        R_SPARC_NONE            0
-#define        R_SPARC_8               1
-#define        R_SPARC_16              2
-#define        R_SPARC_32              3
-#define        R_SPARC_DISP8           4
-#define        R_SPARC_DISP16          5
-#define        R_SPARC_DISP32          6
-#define        R_SPARC_WDISP30         7
-#define        R_SPARC_WDISP22         8
-#define        R_SPARC_HI22            9
-#define        R_SPARC_22              10
-#define        R_SPARC_13              11
-#define        R_SPARC_LO10            12
-#define        R_SPARC_GOT10           13
-#define        R_SPARC_GOT13           14
-#define        R_SPARC_GOT22           15
-#define        R_SPARC_PC10            16
-#define        R_SPARC_PC22            17
-#define        R_SPARC_WPLT30          18
-#define        R_SPARC_COPY            19
-#define        R_SPARC_GLOB_DAT        20
-#define        R_SPARC_JMP_SLOT        21
-#define        R_SPARC_RELATIVE        22
-#define        R_SPARC_UA32            23
-#define R_SPARC_PLT32          24
-#define R_SPARC_HIPLT22                25
-#define R_SPARC_LOPLT10                26
-#define R_SPARC_PCPLT32                27
-#define R_SPARC_PCPLT22                28
-#define R_SPARC_PCPLT10                29
-#define R_SPARC_10             30
-#define R_SPARC_11             31
-#define R_SPARC_64             32
-#define R_SPARC_OLO10           33
-#define R_SPARC_HH22            34
-#define R_SPARC_HM10            35
-#define R_SPARC_LM22            36
-#define R_SPARC_WDISP16                40
-#define R_SPARC_WDISP19                41
-#define R_SPARC_7              43
-#define R_SPARC_5              44
-#define R_SPARC_6              45
+#define R_SPARC_NONE        0
+#define R_SPARC_8           1
+#define R_SPARC_16          2
+#define R_SPARC_32          3
+#define R_SPARC_DISP8       4
+#define R_SPARC_DISP16      5
+#define R_SPARC_DISP32      6
+#define R_SPARC_WDISP30     7
+#define R_SPARC_WDISP22     8
+#define R_SPARC_HI22        9
+#define R_SPARC_22          10
+#define R_SPARC_13          11
+#define R_SPARC_LO10        12
+#define R_SPARC_GOT10       13
+#define R_SPARC_GOT13       14
+#define R_SPARC_GOT22       15
+#define R_SPARC_PC10        16
+#define R_SPARC_PC22        17
+#define R_SPARC_WPLT30      18
+#define R_SPARC_COPY        19
+#define R_SPARC_GLOB_DAT    20
+#define R_SPARC_JMP_SLOT    21
+#define R_SPARC_RELATIVE    22
+#define R_SPARC_UA32        23
+#define R_SPARC_PLT32       24
+#define R_SPARC_HIPLT22     25
+#define R_SPARC_LOPLT10     26
+#define R_SPARC_PCPLT32     27
+#define R_SPARC_PCPLT22     28
+#define R_SPARC_PCPLT10     29
+#define R_SPARC_10          30
+#define R_SPARC_11          31
+#define R_SPARC_64          32
+#define R_SPARC_OLO10       33
+#define R_SPARC_HH22        34
+#define R_SPARC_HM10        35
+#define R_SPARC_LM22        36
+#define R_SPARC_WDISP16     40
+#define R_SPARC_WDISP19     41
+#define R_SPARC_7           43
+#define R_SPARC_5           44
+#define R_SPARC_6           45
 
 /* Bits present in AT_HWCAP for ARM.  */
 
@@ -598,18 +596,53 @@ typedef struct {
 
 /* Bits present in AT_HWCAP for s390.  */
 
-#define HWCAP_S390_ESAN3        1
-#define HWCAP_S390_ZARCH        2
-#define HWCAP_S390_STFLE        4
-#define HWCAP_S390_MSA          8
-#define HWCAP_S390_LDISP        16
-#define HWCAP_S390_EIMM         32
-#define HWCAP_S390_DFP          64
-#define HWCAP_S390_HPAGE        128
-#define HWCAP_S390_ETF3EH       256
-#define HWCAP_S390_HIGH_GPRS    512
-#define HWCAP_S390_TE           1024
-#define HWCAP_S390_VXRS         2048
+#define HWCAP_S390_NR_ESAN3      0
+#define HWCAP_S390_NR_ZARCH      1
+#define HWCAP_S390_NR_STFLE      2
+#define HWCAP_S390_NR_MSA        3
+#define HWCAP_S390_NR_LDISP      4
+#define HWCAP_S390_NR_EIMM       5
+#define HWCAP_S390_NR_DFP        6
+#define HWCAP_S390_NR_HPAGE      7
+#define HWCAP_S390_NR_ETF3EH     8
+#define HWCAP_S390_NR_HIGH_GPRS  9
+#define HWCAP_S390_NR_TE        10
+#define HWCAP_S390_NR_VXRS      11
+#define HWCAP_S390_NR_VXRS_BCD  12
+#define HWCAP_S390_NR_VXRS_EXT  13
+#define HWCAP_S390_NR_GS        14
+#define HWCAP_S390_NR_VXRS_EXT2 15
+#define HWCAP_S390_NR_VXRS_PDE  16
+#define HWCAP_S390_NR_SORT      17
+#define HWCAP_S390_NR_DFLT      18
+#define HWCAP_S390_NR_VXRS_PDE2 19
+#define HWCAP_S390_NR_NNPA      20
+#define HWCAP_S390_NR_PCI_MIO   21
+#define HWCAP_S390_NR_SIE       22
+
+#define HWCAP_S390_ESAN3     (1 << HWCAP_S390_NR_ESAN3)
+#define HWCAP_S390_ZARCH     (1 << HWCAP_S390_NR_ZARCH)
+#define HWCAP_S390_STFLE     (1 << HWCAP_S390_NR_STFLE)
+#define HWCAP_S390_MSA       (1 << HWCAP_S390_NR_MSA)
+#define HWCAP_S390_LDISP     (1 << HWCAP_S390_NR_LDISP)
+#define HWCAP_S390_EIMM      (1 << HWCAP_S390_NR_EIMM)
+#define HWCAP_S390_DFP       (1 << HWCAP_S390_NR_DFP)
+#define HWCAP_S390_HPAGE     (1 << HWCAP_S390_NR_HPAGE)
+#define HWCAP_S390_ETF3EH    (1 << HWCAP_S390_NR_ETF3EH)
+#define HWCAP_S390_HIGH_GPRS (1 << HWCAP_S390_NR_HIGH_GPRS)
+#define HWCAP_S390_TE        (1 << HWCAP_S390_NR_TE)
+#define HWCAP_S390_VXRS      (1 << HWCAP_S390_NR_VXRS)
+#define HWCAP_S390_VXRS_BCD  (1 << HWCAP_S390_NR_VXRS_BCD)
+#define HWCAP_S390_VXRS_EXT  (1 << HWCAP_S390_NR_VXRS_EXT)
+#define HWCAP_S390_GS        (1 << HWCAP_S390_NR_GS)
+#define HWCAP_S390_VXRS_EXT2 (1 << HWCAP_S390_NR_VXRS_EXT2)
+#define HWCAP_S390_VXRS_PDE  (1 << HWCAP_S390_NR_VXRS_PDE)
+#define HWCAP_S390_SORT      (1 << HWCAP_S390_NR_SORT)
+#define HWCAP_S390_DFLT      (1 << HWCAP_S390_NR_DFLT)
+#define HWCAP_S390_VXRS_PDE2 (1 << HWCAP_S390_NR_VXRS_PDE2)
+#define HWCAP_S390_NNPA      (1 << HWCAP_S390_NR_NNPA)
+#define HWCAP_S390_PCI_MIO   (1 << HWCAP_S390_NR_PCI_MIO)
+#define HWCAP_S390_SIE       (1 << HWCAP_S390_NR_SIE)
 
 /* M68K specific definitions. */
 /* We use the top 24 bits to encode information about the
@@ -642,29 +675,29 @@ typedef struct {
 /*
  * 68k ELF relocation types
  */
-#define R_68K_NONE     0
-#define R_68K_32       1
-#define R_68K_16       2
-#define R_68K_8                3
-#define R_68K_PC32     4
-#define R_68K_PC16     5
-#define R_68K_PC8      6
-#define R_68K_GOT32    7
-#define R_68K_GOT16    8
-#define R_68K_GOT8     9
-#define R_68K_GOT32O   10
-#define R_68K_GOT16O   11
-#define R_68K_GOT8O    12
-#define R_68K_PLT32    13
-#define R_68K_PLT16    14
-#define R_68K_PLT8     15
-#define R_68K_PLT32O   16
-#define R_68K_PLT16O   17
-#define R_68K_PLT8O    18
-#define R_68K_COPY     19
-#define R_68K_GLOB_DAT 20
-#define R_68K_JMP_SLOT 21
-#define R_68K_RELATIVE 22
+#define R_68K_NONE      0
+#define R_68K_32        1
+#define R_68K_16        2
+#define R_68K_8         3
+#define R_68K_PC32      4
+#define R_68K_PC16      5
+#define R_68K_PC8       6
+#define R_68K_GOT32     7
+#define R_68K_GOT16     8
+#define R_68K_GOT8      9
+#define R_68K_GOT32O    10
+#define R_68K_GOT16O    11
+#define R_68K_GOT8O     12
+#define R_68K_PLT32     13
+#define R_68K_PLT16     14
+#define R_68K_PLT8      15
+#define R_68K_PLT32O    16
+#define R_68K_PLT16O    17
+#define R_68K_PLT8O     18
+#define R_68K_COPY      19
+#define R_68K_GLOB_DAT  20
+#define R_68K_JMP_SLOT  21
+#define R_68K_RELATIVE  22
 
 /*
  * Alpha ELF relocation types
@@ -688,7 +721,7 @@ typedef struct {
 #define R_ALPHA_GLOB_DAT        25      /* Create GOT entry */
 #define R_ALPHA_JMP_SLOT        26      /* Create PLT entry */
 #define R_ALPHA_RELATIVE        27      /* Adjust by program base */
-#define R_ALPHA_BRSGP          28
+#define R_ALPHA_BRSGP           28
 #define R_ALPHA_TLSGD           29
 #define R_ALPHA_TLS_LDM         30
 #define R_ALPHA_DTPMOD64        31
@@ -703,78 +736,78 @@ typedef struct {
 #define R_ALPHA_TPRELLO         40
 #define R_ALPHA_TPREL16         41
 
-#define SHF_ALPHA_GPREL                0x10000000
+#define SHF_ALPHA_GPREL         0x10000000
 
 
 /* PowerPC specific definitions.  */
 
 /* Processor specific flags for the ELF header e_flags field.  */
-#define EF_PPC64_ABI           0x3
+#define EF_PPC64_ABI            0x3
 
 /* PowerPC relocations defined by the ABIs */
-#define R_PPC_NONE             0
-#define R_PPC_ADDR32           1       /* 32bit absolute address */
-#define R_PPC_ADDR24           2       /* 26bit address, 2 bits ignored.  */
-#define R_PPC_ADDR16           3       /* 16bit absolute address */
-#define R_PPC_ADDR16_LO                4       /* lower 16bit of absolute address */
-#define R_PPC_ADDR16_HI                5       /* high 16bit of absolute address */
-#define R_PPC_ADDR16_HA                6       /* adjusted high 16bit */
-#define R_PPC_ADDR14           7       /* 16bit address, 2 bits ignored */
-#define R_PPC_ADDR14_BRTAKEN   8
-#define R_PPC_ADDR14_BRNTAKEN  9
-#define R_PPC_REL24            10      /* PC relative 26 bit */
-#define R_PPC_REL14            11      /* PC relative 16 bit */
-#define R_PPC_REL14_BRTAKEN    12
-#define R_PPC_REL14_BRNTAKEN   13
-#define R_PPC_GOT16            14
-#define R_PPC_GOT16_LO         15
-#define R_PPC_GOT16_HI         16
-#define R_PPC_GOT16_HA         17
-#define R_PPC_PLTREL24         18
-#define R_PPC_COPY             19
-#define R_PPC_GLOB_DAT         20
-#define R_PPC_JMP_SLOT         21
-#define R_PPC_RELATIVE         22
-#define R_PPC_LOCAL24PC                23
-#define R_PPC_UADDR32          24
-#define R_PPC_UADDR16          25
-#define R_PPC_REL32            26
-#define R_PPC_PLT32            27
-#define R_PPC_PLTREL32         28
-#define R_PPC_PLT16_LO         29
-#define R_PPC_PLT16_HI         30
-#define R_PPC_PLT16_HA         31
-#define R_PPC_SDAREL16         32
-#define R_PPC_SECTOFF          33
-#define R_PPC_SECTOFF_LO       34
-#define R_PPC_SECTOFF_HI       35
-#define R_PPC_SECTOFF_HA       36
+#define R_PPC_NONE              0
+#define R_PPC_ADDR32            1   /* 32bit absolute address */
+#define R_PPC_ADDR24            2   /* 26bit address, 2 bits ignored.  */
+#define R_PPC_ADDR16            3   /* 16bit absolute address */
+#define R_PPC_ADDR16_LO         4   /* lower 16bit of absolute address */
+#define R_PPC_ADDR16_HI         5   /* high 16bit of absolute address */
+#define R_PPC_ADDR16_HA         6   /* adjusted high 16bit */
+#define R_PPC_ADDR14            7   /* 16bit address, 2 bits ignored */
+#define R_PPC_ADDR14_BRTAKEN    8
+#define R_PPC_ADDR14_BRNTAKEN   9
+#define R_PPC_REL24             10  /* PC relative 26 bit */
+#define R_PPC_REL14             11  /* PC relative 16 bit */
+#define R_PPC_REL14_BRTAKEN     12
+#define R_PPC_REL14_BRNTAKEN    13
+#define R_PPC_GOT16             14
+#define R_PPC_GOT16_LO          15
+#define R_PPC_GOT16_HI          16
+#define R_PPC_GOT16_HA          17
+#define R_PPC_PLTREL24          18
+#define R_PPC_COPY              19
+#define R_PPC_GLOB_DAT          20
+#define R_PPC_JMP_SLOT          21
+#define R_PPC_RELATIVE          22
+#define R_PPC_LOCAL24PC         23
+#define R_PPC_UADDR32           24
+#define R_PPC_UADDR16           25
+#define R_PPC_REL32             26
+#define R_PPC_PLT32             27
+#define R_PPC_PLTREL32          28
+#define R_PPC_PLT16_LO          29
+#define R_PPC_PLT16_HI          30
+#define R_PPC_PLT16_HA          31
+#define R_PPC_SDAREL16          32
+#define R_PPC_SECTOFF           33
+#define R_PPC_SECTOFF_LO        34
+#define R_PPC_SECTOFF_HI        35
+#define R_PPC_SECTOFF_HA        36
 /* Keep this the last entry.  */
 #ifndef R_PPC_NUM
-#define R_PPC_NUM              37
+#define R_PPC_NUM               37
 #endif
 
 /* ARM specific declarations */
 
 /* Processor specific flags for the ELF header e_flags field.  */
-#define EF_ARM_RELEXEC     0x01
-#define EF_ARM_HASENTRY    0x02
-#define EF_ARM_INTERWORK   0x04
-#define EF_ARM_APCS_26     0x08
-#define EF_ARM_APCS_FLOAT  0x10
-#define EF_ARM_PIC         0x20
-#define EF_ALIGN8          0x40                /* 8-bit structure alignment is in use */
-#define EF_NEW_ABI         0x80
-#define EF_OLD_ABI         0x100
-#define EF_ARM_SOFT_FLOAT  0x200
-#define EF_ARM_VFP_FLOAT   0x400
+#define EF_ARM_RELEXEC        0x01
+#define EF_ARM_HASENTRY       0x02
+#define EF_ARM_INTERWORK      0x04
+#define EF_ARM_APCS_26        0x08
+#define EF_ARM_APCS_FLOAT     0x10
+#define EF_ARM_PIC            0x20
+#define EF_ALIGN8             0x40     /* 8-bit structure alignment is in use */
+#define EF_NEW_ABI            0x80
+#define EF_OLD_ABI            0x100
+#define EF_ARM_SOFT_FLOAT     0x200
+#define EF_ARM_VFP_FLOAT      0x400
 #define EF_ARM_MAVERICK_FLOAT 0x800
 
 /* Other constants defined in the ARM ELF spec. version B-01.  */
-#define EF_ARM_SYMSARESORTED 0x04       /* NB conflicts with EF_INTERWORK */
-#define EF_ARM_DYNSYMSUSESEGIDX 0x08    /* NB conflicts with EF_APCS26 */
-#define EF_ARM_MAPSYMSFIRST 0x10        /* NB conflicts with EF_APCS_FLOAT */
-#define EF_ARM_EABIMASK      0xFF000000
+#define EF_ARM_SYMSARESORTED    0x04   /* NB conflicts with EF_INTERWORK */
+#define EF_ARM_DYNSYMSUSESEGIDX 0x08   /* NB conflicts with EF_APCS26 */
+#define EF_ARM_MAPSYMSFIRST     0x10   /* NB conflicts with EF_APCS_FLOAT */
+#define EF_ARM_EABIMASK         0xFF000000
 
 /* Constants defined in AAELF.  */
 #define EF_ARM_BE8          0x00800000
@@ -801,46 +834,46 @@ typedef struct {
                                            addressed by the static base */
 
 /* ARM relocs.  */
-#define R_ARM_NONE             0       /* No reloc */
-#define R_ARM_PC24             1       /* PC relative 26 bit branch */
-#define R_ARM_ABS32            2       /* Direct 32 bit  */
-#define R_ARM_REL32            3       /* PC relative 32 bit */
-#define R_ARM_PC13             4
-#define R_ARM_ABS16            5       /* Direct 16 bit */
-#define R_ARM_ABS12            6       /* Direct 12 bit */
-#define R_ARM_THM_ABS5         7
-#define R_ARM_ABS8             8       /* Direct 8 bit */
-#define R_ARM_SBREL32          9
-#define R_ARM_THM_PC22         10
-#define R_ARM_THM_PC8          11
-#define R_ARM_AMP_VCALL9       12
-#define R_ARM_SWI24            13
-#define R_ARM_THM_SWI8         14
-#define R_ARM_XPC25            15
-#define R_ARM_THM_XPC22                16
-#define R_ARM_COPY             20      /* Copy symbol at runtime */
-#define R_ARM_GLOB_DAT         21      /* Create GOT entry */
-#define R_ARM_JUMP_SLOT                22      /* Create PLT entry */
-#define R_ARM_RELATIVE         23      /* Adjust by program base */
-#define R_ARM_GOTOFF           24      /* 32 bit offset to GOT */
-#define R_ARM_GOTPC            25      /* 32 bit PC relative offset to GOT */
-#define R_ARM_GOT32            26      /* 32 bit GOT entry */
-#define R_ARM_PLT32            27      /* 32 bit PLT address */
+#define R_ARM_NONE              0   /* No reloc */
+#define R_ARM_PC24              1   /* PC relative 26 bit branch */
+#define R_ARM_ABS32             2   /* Direct 32 bit  */
+#define R_ARM_REL32             3   /* PC relative 32 bit */
+#define R_ARM_PC13              4
+#define R_ARM_ABS16             5   /* Direct 16 bit */
+#define R_ARM_ABS12             6   /* Direct 12 bit */
+#define R_ARM_THM_ABS5          7
+#define R_ARM_ABS8              8   /* Direct 8 bit */
+#define R_ARM_SBREL32           9
+#define R_ARM_THM_PC22          10
+#define R_ARM_THM_PC8           11
+#define R_ARM_AMP_VCALL9        12
+#define R_ARM_SWI24             13
+#define R_ARM_THM_SWI8          14
+#define R_ARM_XPC25             15
+#define R_ARM_THM_XPC22         16
+#define R_ARM_COPY              20  /* Copy symbol at runtime */
+#define R_ARM_GLOB_DAT          21  /* Create GOT entry */
+#define R_ARM_JUMP_SLOT         22  /* Create PLT entry */
+#define R_ARM_RELATIVE          23  /* Adjust by program base */
+#define R_ARM_GOTOFF            24  /* 32 bit offset to GOT */
+#define R_ARM_GOTPC             25  /* 32 bit PC relative offset to GOT */
+#define R_ARM_GOT32             26  /* 32 bit GOT entry */
+#define R_ARM_PLT32             27  /* 32 bit PLT address */
 #define R_ARM_CALL              28
 #define R_ARM_JUMP24            29
-#define R_ARM_GNU_VTENTRY      100
-#define R_ARM_GNU_VTINHERIT    101
-#define R_ARM_THM_PC11         102     /* thumb unconditional branch */
-#define R_ARM_THM_PC9          103     /* thumb conditional branch */
-#define R_ARM_RXPC25           249
-#define R_ARM_RSBREL32         250
-#define R_ARM_THM_RPC22                251
-#define R_ARM_RREL32           252
-#define R_ARM_RABS22           253
-#define R_ARM_RPC24            254
-#define R_ARM_RBASE            255
+#define R_ARM_GNU_VTENTRY       100
+#define R_ARM_GNU_VTINHERIT     101
+#define R_ARM_THM_PC11          102 /* thumb unconditional branch */
+#define R_ARM_THM_PC9           103 /* thumb conditional branch */
+#define R_ARM_RXPC25            249
+#define R_ARM_RSBREL32          250
+#define R_ARM_THM_RPC22         251
+#define R_ARM_RREL32            252
+#define R_ARM_RABS22            253
+#define R_ARM_RPC24             254
+#define R_ARM_RBASE             255
 /* Keep this the last entry.  */
-#define R_ARM_NUM              256
+#define R_ARM_NUM               256
 
 /* ARM Aarch64 relocation types */
 #define R_AARCH64_NONE                256 /* also accepts R_ARM_NONE (0) */
@@ -970,385 +1003,385 @@ typedef struct {
 #define R_AARCH64_TLS_TPREL32  1033
 
 /* s390 relocations defined by the ABIs */
-#define R_390_NONE             0       /* No reloc.  */
-#define R_390_8                        1       /* Direct 8 bit.  */
-#define R_390_12               2       /* Direct 12 bit.  */
-#define R_390_16               3       /* Direct 16 bit.  */
-#define R_390_32               4       /* Direct 32 bit.  */
-#define R_390_PC32             5       /* PC relative 32 bit.  */
-#define R_390_GOT12            6       /* 12 bit GOT offset.  */
-#define R_390_GOT32            7       /* 32 bit GOT offset.  */
-#define R_390_PLT32            8       /* 32 bit PC relative PLT address.  */
-#define R_390_COPY             9       /* Copy symbol at runtime.  */
-#define R_390_GLOB_DAT         10      /* Create GOT entry.  */
-#define R_390_JMP_SLOT         11      /* Create PLT entry.  */
-#define R_390_RELATIVE         12      /* Adjust by program base.  */
-#define R_390_GOTOFF32         13      /* 32 bit offset to GOT.         */
-#define R_390_GOTPC            14      /* 32 bit PC rel. offset to GOT.  */
-#define R_390_GOT16            15      /* 16 bit GOT offset.  */
-#define R_390_PC16             16      /* PC relative 16 bit.  */
-#define R_390_PC16DBL          17      /* PC relative 16 bit shifted by 1.  */
-#define R_390_PLT16DBL         18      /* 16 bit PC rel. PLT shifted by 1.  */
-#define R_390_PC32DBL          19      /* PC relative 32 bit shifted by 1.  */
-#define R_390_PLT32DBL         20      /* 32 bit PC rel. PLT shifted by 1.  */
-#define R_390_GOTPCDBL         21      /* 32 bit PC rel. GOT shifted by 1.  */
-#define R_390_64               22      /* Direct 64 bit.  */
-#define R_390_PC64             23      /* PC relative 64 bit.  */
-#define R_390_GOT64            24      /* 64 bit GOT offset.  */
-#define R_390_PLT64            25      /* 64 bit PC relative PLT address.  */
-#define R_390_GOTENT           26      /* 32 bit PC rel. to GOT entry >> 1. */
-#define R_390_GOTOFF16         27      /* 16 bit offset to GOT. */
-#define R_390_GOTOFF64         28      /* 64 bit offset to GOT. */
-#define R_390_GOTPLT12         29      /* 12 bit offset to jump slot.  */
-#define R_390_GOTPLT16         30      /* 16 bit offset to jump slot.  */
-#define R_390_GOTPLT32         31      /* 32 bit offset to jump slot.  */
-#define R_390_GOTPLT64         32      /* 64 bit offset to jump slot.  */
-#define R_390_GOTPLTENT                33      /* 32 bit rel. offset to jump slot.  */
-#define R_390_PLTOFF16         34      /* 16 bit offset from GOT to PLT. */
-#define R_390_PLTOFF32         35      /* 32 bit offset from GOT to PLT. */
-#define R_390_PLTOFF64         36      /* 16 bit offset from GOT to PLT. */
-#define R_390_TLS_LOAD         37      /* Tag for load insn in TLS code. */
-#define R_390_TLS_GDCALL       38      /* Tag for function call in general
-                                           dynamic TLS code.  */
-#define R_390_TLS_LDCALL       39      /* Tag for function call in local
-                                           dynamic TLS code.  */
-#define R_390_TLS_GD32         40      /* Direct 32 bit for general dynamic
-                                           thread local data.  */
-#define R_390_TLS_GD64         41      /* Direct 64 bit for general dynamic
-                                           thread local data.  */
-#define R_390_TLS_GOTIE12      42      /* 12 bit GOT offset for static TLS
-                                           block offset.  */
-#define R_390_TLS_GOTIE32      43      /* 32 bit GOT offset for static TLS
-                                           block offset.  */
-#define R_390_TLS_GOTIE64      44      /* 64 bit GOT offset for static TLS
-                                           block offset.  */
-#define R_390_TLS_LDM32                45      /* Direct 32 bit for local dynamic
-                                           thread local data in LD code.  */
-#define R_390_TLS_LDM64                46      /* Direct 64 bit for local dynamic
-                                           thread local data in LD code.  */
-#define R_390_TLS_IE32         47      /* 32 bit address of GOT entry for
-                                           negated static TLS block offset.  */
-#define R_390_TLS_IE64         48      /* 64 bit address of GOT entry for
-                                           negated static TLS block offset.  */
-#define R_390_TLS_IEENT                49      /* 32 bit rel. offset to GOT entry for
-                                           negated static TLS block offset.  */
-#define R_390_TLS_LE32         50      /* 32 bit negated offset relative to
-                                           static TLS block.  */
-#define R_390_TLS_LE64         51      /* 64 bit negated offset relative to
-                                           static TLS block.  */
-#define R_390_TLS_LDO32                52      /* 32 bit offset relative to TLS
-                                           block.  */
-#define R_390_TLS_LDO64                53      /* 64 bit offset relative to TLS
-                                           block.  */
-#define R_390_TLS_DTPMOD       54      /* ID of module containing symbol.  */
-#define R_390_TLS_DTPOFF       55      /* Offset in TLS block.  */
-#define R_390_TLS_TPOFF                56      /* Negate offset in static TLS
+#define R_390_NONE              0   /* No reloc.  */
+#define R_390_8                 1   /* Direct 8 bit.  */
+#define R_390_12                2   /* Direct 12 bit.  */
+#define R_390_16                3   /* Direct 16 bit.  */
+#define R_390_32                4   /* Direct 32 bit.  */
+#define R_390_PC32              5   /* PC relative 32 bit.  */
+#define R_390_GOT12             6   /* 12 bit GOT offset.  */
+#define R_390_GOT32             7   /* 32 bit GOT offset.  */
+#define R_390_PLT32             8   /* 32 bit PC relative PLT address.  */
+#define R_390_COPY              9   /* Copy symbol at runtime.  */
+#define R_390_GLOB_DAT          10  /* Create GOT entry.  */
+#define R_390_JMP_SLOT          11  /* Create PLT entry.  */
+#define R_390_RELATIVE          12  /* Adjust by program base.  */
+#define R_390_GOTOFF32          13  /* 32 bit offset to GOT.  */
+#define R_390_GOTPC             14  /* 32 bit PC rel. offset to GOT.  */
+#define R_390_GOT16             15  /* 16 bit GOT offset.  */
+#define R_390_PC16              16  /* PC relative 16 bit.  */
+#define R_390_PC16DBL           17  /* PC relative 16 bit shifted by 1.  */
+#define R_390_PLT16DBL          18  /* 16 bit PC rel. PLT shifted by 1.  */
+#define R_390_PC32DBL           19  /* PC relative 32 bit shifted by 1.  */
+#define R_390_PLT32DBL          20  /* 32 bit PC rel. PLT shifted by 1.  */
+#define R_390_GOTPCDBL          21  /* 32 bit PC rel. GOT shifted by 1.  */
+#define R_390_64                22  /* Direct 64 bit.  */
+#define R_390_PC64              23  /* PC relative 64 bit.  */
+#define R_390_GOT64             24  /* 64 bit GOT offset.  */
+#define R_390_PLT64             25  /* 64 bit PC relative PLT address.  */
+#define R_390_GOTENT            26  /* 32 bit PC rel. to GOT entry >> 1. */
+#define R_390_GOTOFF16          27  /* 16 bit offset to GOT. */
+#define R_390_GOTOFF64          28  /* 64 bit offset to GOT. */
+#define R_390_GOTPLT12          29  /* 12 bit offset to jump slot.  */
+#define R_390_GOTPLT16          30  /* 16 bit offset to jump slot.  */
+#define R_390_GOTPLT32          31  /* 32 bit offset to jump slot.  */
+#define R_390_GOTPLT64          32  /* 64 bit offset to jump slot.  */
+#define R_390_GOTPLTENT         33  /* 32 bit rel. offset to jump slot.  */
+#define R_390_PLTOFF16          34  /* 16 bit offset from GOT to PLT. */
+#define R_390_PLTOFF32          35  /* 32 bit offset from GOT to PLT. */
+#define R_390_PLTOFF64          36  /* 16 bit offset from GOT to PLT. */
+#define R_390_TLS_LOAD          37  /* Tag for load insn in TLS code. */
+#define R_390_TLS_GDCALL        38  /* Tag for function call in general
+                                               dynamic TLS code.  */
+#define R_390_TLS_LDCALL        39  /* Tag for function call in local
+                                               dynamic TLS code.  */
+#define R_390_TLS_GD32          40  /* Direct 32 bit for general dynamic
+                                               thread local data.  */
+#define R_390_TLS_GD64          41  /* Direct 64 bit for general dynamic
+                                               thread local data.  */
+#define R_390_TLS_GOTIE12       42  /* 12 bit GOT offset for static TLS
+                                               block offset.  */
+#define R_390_TLS_GOTIE32       43  /* 32 bit GOT offset for static TLS
+                                               block offset.  */
+#define R_390_TLS_GOTIE64       44  /* 64 bit GOT offset for static TLS
+                                               block offset.  */
+#define R_390_TLS_LDM32         45  /* Direct 32 bit for local dynamic
+                                               thread local data in LD code.  */
+#define R_390_TLS_LDM64         46  /* Direct 64 bit for local dynamic
+                                               thread local data in LD code.  */
+#define R_390_TLS_IE32          47  /* 32 bit address of GOT entry for
+                                               negated static TLS block offset.  */
+#define R_390_TLS_IE64          48  /* 64 bit address of GOT entry for
+                                               negated static TLS block offset.  */
+#define R_390_TLS_IEENT         49  /* 32 bit rel. offset to GOT entry for
+                                               negated static TLS block offset.  */
+#define R_390_TLS_LE32          50  /* 32 bit negated offset relative to
+                                               static TLS block.  */
+#define R_390_TLS_LE64          51  /* 64 bit negated offset relative to
+                                               static TLS block.  */
+#define R_390_TLS_LDO32         52  /* 32 bit offset relative to TLS
+                                               block.  */
+#define R_390_TLS_LDO64         53  /* 64 bit offset relative to TLS
+                                               block.  */
+#define R_390_TLS_DTPMOD        54  /* ID of module containing symbol.  */
+#define R_390_TLS_DTPOFF        55  /* Offset in TLS block.  */
+#define R_390_TLS_TPOFF         56  /* Negate offset in static TLS
                                            block.  */
 #define R_390_20                57
 /* Keep this the last entry.  */
 #define R_390_NUM               58
 
 /* x86-64 relocation types */
-#define R_X86_64_NONE          0       /* No reloc */
-#define R_X86_64_64            1       /* Direct 64 bit  */
-#define R_X86_64_PC32          2       /* PC relative 32 bit signed */
-#define R_X86_64_GOT32         3       /* 32 bit GOT entry */
-#define R_X86_64_PLT32         4       /* 32 bit PLT address */
-#define R_X86_64_COPY          5       /* Copy symbol at runtime */
-#define R_X86_64_GLOB_DAT      6       /* Create GOT entry */
-#define R_X86_64_JUMP_SLOT     7       /* Create PLT entry */
-#define R_X86_64_RELATIVE      8       /* Adjust by program base */
-#define R_X86_64_GOTPCREL      9       /* 32 bit signed pc relative
+#define R_X86_64_NONE       0   /* No reloc */
+#define R_X86_64_64         1   /* Direct 64 bit  */
+#define R_X86_64_PC32       2   /* PC relative 32 bit signed */
+#define R_X86_64_GOT32      3   /* 32 bit GOT entry */
+#define R_X86_64_PLT32      4   /* 32 bit PLT address */
+#define R_X86_64_COPY       5   /* Copy symbol at runtime */
+#define R_X86_64_GLOB_DAT   6   /* Create GOT entry */
+#define R_X86_64_JUMP_SLOT  7   /* Create PLT entry */
+#define R_X86_64_RELATIVE   8   /* Adjust by program base */
+#define R_X86_64_GOTPCREL   9   /* 32 bit signed pc relative
                                            offset to GOT */
-#define R_X86_64_32            10      /* Direct 32 bit zero extended */
-#define R_X86_64_32S           11      /* Direct 32 bit sign extended */
-#define R_X86_64_16            12      /* Direct 16 bit zero extended */
-#define R_X86_64_PC16          13      /* 16 bit sign extended pc relative */
-#define R_X86_64_8             14      /* Direct 8 bit sign extended  */
-#define R_X86_64_PC8           15      /* 8 bit sign extended pc relative */
+#define R_X86_64_32         10  /* Direct 32 bit zero extended */
+#define R_X86_64_32S        11  /* Direct 32 bit sign extended */
+#define R_X86_64_16         12  /* Direct 16 bit zero extended */
+#define R_X86_64_PC16       13  /* 16 bit sign extended pc relative */
+#define R_X86_64_8          14  /* Direct 8 bit sign extended  */
+#define R_X86_64_PC8        15  /* 8 bit sign extended pc relative */
 
-#define R_X86_64_NUM           16
+#define R_X86_64_NUM        16
 
 /* Legal values for e_flags field of Elf64_Ehdr.  */
 
-#define EF_ALPHA_32BIT         1       /* All addresses are below 2GB */
+#define EF_ALPHA_32BIT      1   /* All addresses are below 2GB */
 
 /* HPPA specific definitions.  */
 
 /* Legal values for e_flags field of Elf32_Ehdr.  */
 
-#define EF_PARISC_TRAPNIL      0x00010000 /* Trap nil pointer dereference.  */
-#define EF_PARISC_EXT          0x00020000 /* Program uses arch. extensions. */
-#define EF_PARISC_LSB          0x00040000 /* Program expects little endian. */
-#define EF_PARISC_WIDE         0x00080000 /* Program expects wide mode.  */
-#define EF_PARISC_NO_KABP      0x00100000 /* No kernel assisted branch
+#define EF_PARISC_TRAPNIL   0x00010000 /* Trap nil pointer dereference.  */
+#define EF_PARISC_EXT       0x00020000 /* Program uses arch. extensions. */
+#define EF_PARISC_LSB       0x00040000 /* Program expects little endian. */
+#define EF_PARISC_WIDE      0x00080000 /* Program expects wide mode.  */
+#define EF_PARISC_NO_KABP   0x00100000 /* No kernel assisted branch
                                               prediction.  */
-#define EF_PARISC_LAZYSWAP     0x00400000 /* Allow lazy swapping.  */
-#define EF_PARISC_ARCH         0x0000ffff /* Architecture version.  */
+#define EF_PARISC_LAZYSWAP  0x00400000 /* Allow lazy swapping.  */
+#define EF_PARISC_ARCH      0x0000ffff /* Architecture version.  */
 
 /* Defined values for `e_flags & EF_PARISC_ARCH' are:  */
 
-#define EFA_PARISC_1_0             0x020b /* PA-RISC 1.0 big-endian.  */
-#define EFA_PARISC_1_1             0x0210 /* PA-RISC 1.1 big-endian.  */
-#define EFA_PARISC_2_0             0x0214 /* PA-RISC 2.0 big-endian.  */
+#define EFA_PARISC_1_0      0x020b /* PA-RISC 1.0 big-endian.  */
+#define EFA_PARISC_1_1      0x0210 /* PA-RISC 1.1 big-endian.  */
+#define EFA_PARISC_2_0      0x0214 /* PA-RISC 2.0 big-endian.  */
 
-/* Additional section indeces.  */
+/* Additional section indices.  */
 
-#define SHN_PARISC_ANSI_COMMON 0xff00     /* Section for tenatively declared
+#define SHN_PARISC_ANSI_COMMON  0xff00   /* Section for tentatively declared
                                               symbols in ANSI C.  */
-#define SHN_PARISC_HUGE_COMMON 0xff01     /* Common blocks in huge model.  */
+#define SHN_PARISC_HUGE_COMMON  0xff01   /* Common blocks in huge model.  */
 
 /* Legal values for sh_type field of Elf32_Shdr.  */
 
-#define SHT_PARISC_EXT         0x70000000 /* Contains product specific ext. */
-#define SHT_PARISC_UNWIND      0x70000001 /* Unwind information.  */
-#define SHT_PARISC_DOC         0x70000002 /* Debug info for optimized code. */
+#define SHT_PARISC_EXT      0x70000000 /* Contains product specific ext. */
+#define SHT_PARISC_UNWIND   0x70000001 /* Unwind information.  */
+#define SHT_PARISC_DOC      0x70000002 /* Debug info for optimized code. */
 
 /* Legal values for sh_flags field of Elf32_Shdr.  */
 
-#define SHF_PARISC_SHORT       0x20000000 /* Section with short addressing. */
-#define SHF_PARISC_HUGE                0x40000000 /* Section far from gp.  */
-#define SHF_PARISC_SBP         0x80000000 /* Static branch prediction code. */
+#define SHF_PARISC_SHORT    0x20000000 /* Section with short addressing. */
+#define SHF_PARISC_HUGE     0x40000000 /* Section far from gp.  */
+#define SHF_PARISC_SBP      0x80000000 /* Static branch prediction code. */
 
 /* Legal values for ST_TYPE subfield of st_info (symbol type).  */
 
-#define STT_PARISC_MILLICODE   13      /* Millicode function entry point.  */
+#define STT_PARISC_MILLICODE    13  /* Millicode function entry point.  */
 
-#define STT_HP_OPAQUE          (STT_LOOS + 0x1)
-#define STT_HP_STUB            (STT_LOOS + 0x2)
+#define STT_HP_OPAQUE           (STT_LOOS + 0x1)
+#define STT_HP_STUB             (STT_LOOS + 0x2)
 
 /* HPPA relocs.  */
 
-#define R_PARISC_NONE          0       /* No reloc.  */
-#define R_PARISC_DIR32         1       /* Direct 32-bit reference.  */
-#define R_PARISC_DIR21L                2       /* Left 21 bits of eff. address.  */
-#define R_PARISC_DIR17R                3       /* Right 17 bits of eff. address.  */
-#define R_PARISC_DIR17F                4       /* 17 bits of eff. address.  */
-#define R_PARISC_DIR14R                6       /* Right 14 bits of eff. address.  */
-#define R_PARISC_PCREL32       9       /* 32-bit rel. address.  */
-#define R_PARISC_PCREL21L      10      /* Left 21 bits of rel. address.  */
-#define R_PARISC_PCREL17R      11      /* Right 17 bits of rel. address.  */
-#define R_PARISC_PCREL17F      12      /* 17 bits of rel. address.  */
-#define R_PARISC_PCREL14R      14      /* Right 14 bits of rel. address.  */
-#define R_PARISC_DPREL21L      18      /* Left 21 bits of rel. address.  */
-#define R_PARISC_DPREL14R      22      /* Right 14 bits of rel. address.  */
-#define R_PARISC_GPREL21L      26      /* GP-relative, left 21 bits.  */
-#define R_PARISC_GPREL14R      30      /* GP-relative, right 14 bits.  */
-#define R_PARISC_LTOFF21L      34      /* LT-relative, left 21 bits.  */
-#define R_PARISC_LTOFF14R      38      /* LT-relative, right 14 bits.  */
-#define R_PARISC_SECREL32      41      /* 32 bits section rel. address.  */
-#define R_PARISC_SEGBASE       48      /* No relocation, set segment base.  */
-#define R_PARISC_SEGREL32      49      /* 32 bits segment rel. address.  */
-#define R_PARISC_PLTOFF21L     50      /* PLT rel. address, left 21 bits.  */
-#define R_PARISC_PLTOFF14R     54      /* PLT rel. address, right 14 bits.  */
-#define R_PARISC_LTOFF_FPTR32  57      /* 32 bits LT-rel. function pointer. */
-#define R_PARISC_LTOFF_FPTR21L 58      /* LT-rel. fct ptr, left 21 bits. */
-#define R_PARISC_LTOFF_FPTR14R 62      /* LT-rel. fct ptr, right 14 bits. */
-#define R_PARISC_FPTR64                64      /* 64 bits function address.  */
-#define R_PARISC_PLABEL32      65      /* 32 bits function address.  */
-#define R_PARISC_PCREL64       72      /* 64 bits PC-rel. address.  */
-#define R_PARISC_PCREL22F      74      /* 22 bits PC-rel. address.  */
-#define R_PARISC_PCREL14WR     75      /* PC-rel. address, right 14 bits.  */
-#define R_PARISC_PCREL14DR     76      /* PC rel. address, right 14 bits.  */
-#define R_PARISC_PCREL16F      77      /* 16 bits PC-rel. address.  */
-#define R_PARISC_PCREL16WF     78      /* 16 bits PC-rel. address.  */
-#define R_PARISC_PCREL16DF     79      /* 16 bits PC-rel. address.  */
-#define R_PARISC_DIR64         80      /* 64 bits of eff. address.  */
-#define R_PARISC_DIR14WR       83      /* 14 bits of eff. address.  */
-#define R_PARISC_DIR14DR       84      /* 14 bits of eff. address.  */
-#define R_PARISC_DIR16F                85      /* 16 bits of eff. address.  */
-#define R_PARISC_DIR16WF       86      /* 16 bits of eff. address.  */
-#define R_PARISC_DIR16DF       87      /* 16 bits of eff. address.  */
-#define R_PARISC_GPREL64       88      /* 64 bits of GP-rel. address.  */
-#define R_PARISC_GPREL14WR     91      /* GP-rel. address, right 14 bits.  */
-#define R_PARISC_GPREL14DR     92      /* GP-rel. address, right 14 bits.  */
-#define R_PARISC_GPREL16F      93      /* 16 bits GP-rel. address.  */
-#define R_PARISC_GPREL16WF     94      /* 16 bits GP-rel. address.  */
-#define R_PARISC_GPREL16DF     95      /* 16 bits GP-rel. address.  */
-#define R_PARISC_LTOFF64       96      /* 64 bits LT-rel. address.  */
-#define R_PARISC_LTOFF14WR     99      /* LT-rel. address, right 14 bits.  */
-#define R_PARISC_LTOFF14DR     100     /* LT-rel. address, right 14 bits.  */
-#define R_PARISC_LTOFF16F      101     /* 16 bits LT-rel. address.  */
-#define R_PARISC_LTOFF16WF     102     /* 16 bits LT-rel. address.  */
-#define R_PARISC_LTOFF16DF     103     /* 16 bits LT-rel. address.  */
-#define R_PARISC_SECREL64      104     /* 64 bits section rel. address.  */
-#define R_PARISC_SEGREL64      112     /* 64 bits segment rel. address.  */
-#define R_PARISC_PLTOFF14WR    115     /* PLT-rel. address, right 14 bits.  */
-#define R_PARISC_PLTOFF14DR    116     /* PLT-rel. address, right 14 bits.  */
-#define R_PARISC_PLTOFF16F     117     /* 16 bits LT-rel. address.  */
-#define R_PARISC_PLTOFF16WF    118     /* 16 bits PLT-rel. address.  */
-#define R_PARISC_PLTOFF16DF    119     /* 16 bits PLT-rel. address.  */
-#define R_PARISC_LTOFF_FPTR64  120     /* 64 bits LT-rel. function ptr.  */
-#define R_PARISC_LTOFF_FPTR14WR        123     /* LT-rel. fct. ptr., right 14 bits. */
-#define R_PARISC_LTOFF_FPTR14DR        124     /* LT-rel. fct. ptr., right 14 bits. */
-#define R_PARISC_LTOFF_FPTR16F 125     /* 16 bits LT-rel. function ptr.  */
-#define R_PARISC_LTOFF_FPTR16WF        126     /* 16 bits LT-rel. function ptr.  */
-#define R_PARISC_LTOFF_FPTR16DF        127     /* 16 bits LT-rel. function ptr.  */
-#define R_PARISC_LORESERVE     128
-#define R_PARISC_COPY          128     /* Copy relocation.  */
-#define R_PARISC_IPLT          129     /* Dynamic reloc, imported PLT */
-#define R_PARISC_EPLT          130     /* Dynamic reloc, exported PLT */
-#define R_PARISC_TPREL32       153     /* 32 bits TP-rel. address.  */
-#define R_PARISC_TPREL21L      154     /* TP-rel. address, left 21 bits.  */
-#define R_PARISC_TPREL14R      158     /* TP-rel. address, right 14 bits.  */
-#define R_PARISC_LTOFF_TP21L   162     /* LT-TP-rel. address, left 21 bits. */
-#define R_PARISC_LTOFF_TP14R   166     /* LT-TP-rel. address, right 14 bits.*/
-#define R_PARISC_LTOFF_TP14F   167     /* 14 bits LT-TP-rel. address.  */
-#define R_PARISC_TPREL64       216     /* 64 bits TP-rel. address.  */
-#define R_PARISC_TPREL14WR     219     /* TP-rel. address, right 14 bits.  */
-#define R_PARISC_TPREL14DR     220     /* TP-rel. address, right 14 bits.  */
-#define R_PARISC_TPREL16F      221     /* 16 bits TP-rel. address.  */
-#define R_PARISC_TPREL16WF     222     /* 16 bits TP-rel. address.  */
-#define R_PARISC_TPREL16DF     223     /* 16 bits TP-rel. address.  */
-#define R_PARISC_LTOFF_TP64    224     /* 64 bits LT-TP-rel. address.  */
-#define R_PARISC_LTOFF_TP14WR  227     /* LT-TP-rel. address, right 14 bits.*/
-#define R_PARISC_LTOFF_TP14DR  228     /* LT-TP-rel. address, right 14 bits.*/
-#define R_PARISC_LTOFF_TP16F   229     /* 16 bits LT-TP-rel. address.  */
-#define R_PARISC_LTOFF_TP16WF  230     /* 16 bits LT-TP-rel. address.  */
-#define R_PARISC_LTOFF_TP16DF  231     /* 16 bits LT-TP-rel. address.  */
-#define R_PARISC_HIRESERVE     255
+#define R_PARISC_NONE           0   /* No reloc.  */
+#define R_PARISC_DIR32          1   /* Direct 32-bit reference.  */
+#define R_PARISC_DIR21L         2   /* Left 21 bits of eff. address.  */
+#define R_PARISC_DIR17R         3   /* Right 17 bits of eff. address.  */
+#define R_PARISC_DIR17F         4   /* 17 bits of eff. address.  */
+#define R_PARISC_DIR14R         6   /* Right 14 bits of eff. address.  */
+#define R_PARISC_PCREL32        9   /* 32-bit rel. address.  */
+#define R_PARISC_PCREL21L       10  /* Left 21 bits of rel. address.  */
+#define R_PARISC_PCREL17R       11  /* Right 17 bits of rel. address.  */
+#define R_PARISC_PCREL17F       12  /* 17 bits of rel. address.  */
+#define R_PARISC_PCREL14R       14  /* Right 14 bits of rel. address.  */
+#define R_PARISC_DPREL21L       18  /* Left 21 bits of rel. address.  */
+#define R_PARISC_DPREL14R       22  /* Right 14 bits of rel. address.  */
+#define R_PARISC_GPREL21L       26  /* GP-relative, left 21 bits.  */
+#define R_PARISC_GPREL14R       30  /* GP-relative, right 14 bits.  */
+#define R_PARISC_LTOFF21L       34  /* LT-relative, left 21 bits.  */
+#define R_PARISC_LTOFF14R       38  /* LT-relative, right 14 bits.  */
+#define R_PARISC_SECREL32       41  /* 32 bits section rel. address.  */
+#define R_PARISC_SEGBASE        48  /* No relocation, set segment base.  */
+#define R_PARISC_SEGREL32       49  /* 32 bits segment rel. address.  */
+#define R_PARISC_PLTOFF21L      50  /* PLT rel. address, left 21 bits.  */
+#define R_PARISC_PLTOFF14R      54  /* PLT rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF_FPTR32   57  /* 32 bits LT-rel. function pointer. */
+#define R_PARISC_LTOFF_FPTR21L  58  /* LT-rel. fct ptr, left 21 bits. */
+#define R_PARISC_LTOFF_FPTR14R  62  /* LT-rel. fct ptr, right 14 bits. */
+#define R_PARISC_FPTR64         64  /* 64 bits function address.  */
+#define R_PARISC_PLABEL32       65  /* 32 bits function address.  */
+#define R_PARISC_PCREL64        72  /* 64 bits PC-rel. address.  */
+#define R_PARISC_PCREL22F       74  /* 22 bits PC-rel. address.  */
+#define R_PARISC_PCREL14WR      75  /* PC-rel. address, right 14 bits.  */
+#define R_PARISC_PCREL14DR      76  /* PC rel. address, right 14 bits.  */
+#define R_PARISC_PCREL16F       77  /* 16 bits PC-rel. address.  */
+#define R_PARISC_PCREL16WF      78  /* 16 bits PC-rel. address.  */
+#define R_PARISC_PCREL16DF      79  /* 16 bits PC-rel. address.  */
+#define R_PARISC_DIR64          80  /* 64 bits of eff. address.  */
+#define R_PARISC_DIR14WR        83  /* 14 bits of eff. address.  */
+#define R_PARISC_DIR14DR        84  /* 14 bits of eff. address.  */
+#define R_PARISC_DIR16F         85  /* 16 bits of eff. address.  */
+#define R_PARISC_DIR16WF        86  /* 16 bits of eff. address.  */
+#define R_PARISC_DIR16DF        87  /* 16 bits of eff. address.  */
+#define R_PARISC_GPREL64        88  /* 64 bits of GP-rel. address.  */
+#define R_PARISC_GPREL14WR      91  /* GP-rel. address, right 14 bits.  */
+#define R_PARISC_GPREL14DR      92  /* GP-rel. address, right 14 bits.  */
+#define R_PARISC_GPREL16F       93  /* 16 bits GP-rel. address.  */
+#define R_PARISC_GPREL16WF      94  /* 16 bits GP-rel. address.  */
+#define R_PARISC_GPREL16DF      95  /* 16 bits GP-rel. address.  */
+#define R_PARISC_LTOFF64        96  /* 64 bits LT-rel. address.  */
+#define R_PARISC_LTOFF14WR      99  /* LT-rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF14DR      100 /* LT-rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF16F       101 /* 16 bits LT-rel. address.  */
+#define R_PARISC_LTOFF16WF      102 /* 16 bits LT-rel. address.  */
+#define R_PARISC_LTOFF16DF      103 /* 16 bits LT-rel. address.  */
+#define R_PARISC_SECREL64       104 /* 64 bits section rel. address.  */
+#define R_PARISC_SEGREL64       112 /* 64 bits segment rel. address.  */
+#define R_PARISC_PLTOFF14WR     115 /* PLT-rel. address, right 14 bits.  */
+#define R_PARISC_PLTOFF14DR     116 /* PLT-rel. address, right 14 bits.  */
+#define R_PARISC_PLTOFF16F      117 /* 16 bits LT-rel. address.  */
+#define R_PARISC_PLTOFF16WF     118 /* 16 bits PLT-rel. address.  */
+#define R_PARISC_PLTOFF16DF     119 /* 16 bits PLT-rel. address.  */
+#define R_PARISC_LTOFF_FPTR64   120 /* 64 bits LT-rel. function ptr.  */
+#define R_PARISC_LTOFF_FPTR14WR 123 /* LT-rel. fct. ptr., right 14 bits. */
+#define R_PARISC_LTOFF_FPTR14DR 124 /* LT-rel. fct. ptr., right 14 bits. */
+#define R_PARISC_LTOFF_FPTR16F  125 /* 16 bits LT-rel. function ptr.  */
+#define R_PARISC_LTOFF_FPTR16WF 126 /* 16 bits LT-rel. function ptr.  */
+#define R_PARISC_LTOFF_FPTR16DF 127 /* 16 bits LT-rel. function ptr.  */
+#define R_PARISC_LORESERVE      128
+#define R_PARISC_COPY           128 /* Copy relocation.  */
+#define R_PARISC_IPLT           129 /* Dynamic reloc, imported PLT */
+#define R_PARISC_EPLT           130 /* Dynamic reloc, exported PLT */
+#define R_PARISC_TPREL32        153 /* 32 bits TP-rel. address.  */
+#define R_PARISC_TPREL21L       154 /* TP-rel. address, left 21 bits.  */
+#define R_PARISC_TPREL14R       158 /* TP-rel. address, right 14 bits.  */
+#define R_PARISC_LTOFF_TP21L    162 /* LT-TP-rel. address, left 21 bits. */
+#define R_PARISC_LTOFF_TP14R    166 /* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP14F    167 /* 14 bits LT-TP-rel. address.  */
+#define R_PARISC_TPREL64        216 /* 64 bits TP-rel. address.  */
+#define R_PARISC_TPREL14WR      219 /* TP-rel. address, right 14 bits.  */
+#define R_PARISC_TPREL14DR      220 /* TP-rel. address, right 14 bits.  */
+#define R_PARISC_TPREL16F       221 /* 16 bits TP-rel. address.  */
+#define R_PARISC_TPREL16WF      222 /* 16 bits TP-rel. address.  */
+#define R_PARISC_TPREL16DF      223 /* 16 bits TP-rel. address.  */
+#define R_PARISC_LTOFF_TP64     224 /* 64 bits LT-TP-rel. address.  */
+#define R_PARISC_LTOFF_TP14WR   227 /* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP14DR   228 /* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP16F    229 /* 16 bits LT-TP-rel. address.  */
+#define R_PARISC_LTOFF_TP16WF   230 /* 16 bits LT-TP-rel. address.  */
+#define R_PARISC_LTOFF_TP16DF   231 /* 16 bits LT-TP-rel. address.  */
+#define R_PARISC_HIRESERVE      255
 
 /* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr.  */
 
-#define PT_HP_TLS              (PT_LOOS + 0x0)
-#define PT_HP_CORE_NONE                (PT_LOOS + 0x1)
-#define PT_HP_CORE_VERSION     (PT_LOOS + 0x2)
-#define PT_HP_CORE_KERNEL      (PT_LOOS + 0x3)
-#define PT_HP_CORE_COMM                (PT_LOOS + 0x4)
-#define PT_HP_CORE_PROC                (PT_LOOS + 0x5)
-#define PT_HP_CORE_LOADABLE    (PT_LOOS + 0x6)
-#define PT_HP_CORE_STACK       (PT_LOOS + 0x7)
-#define PT_HP_CORE_SHM         (PT_LOOS + 0x8)
-#define PT_HP_CORE_MMF         (PT_LOOS + 0x9)
-#define PT_HP_PARALLEL         (PT_LOOS + 0x10)
-#define PT_HP_FASTBIND         (PT_LOOS + 0x11)
-#define PT_HP_OPT_ANNOT                (PT_LOOS + 0x12)
-#define PT_HP_HSL_ANNOT                (PT_LOOS + 0x13)
-#define PT_HP_STACK            (PT_LOOS + 0x14)
-
-#define PT_PARISC_ARCHEXT      0x70000000
-#define PT_PARISC_UNWIND       0x70000001
+#define PT_HP_TLS               (PT_LOOS + 0x0)
+#define PT_HP_CORE_NONE         (PT_LOOS + 0x1)
+#define PT_HP_CORE_VERSION      (PT_LOOS + 0x2)
+#define PT_HP_CORE_KERNEL       (PT_LOOS + 0x3)
+#define PT_HP_CORE_COMM         (PT_LOOS + 0x4)
+#define PT_HP_CORE_PROC         (PT_LOOS + 0x5)
+#define PT_HP_CORE_LOADABLE     (PT_LOOS + 0x6)
+#define PT_HP_CORE_STACK        (PT_LOOS + 0x7)
+#define PT_HP_CORE_SHM          (PT_LOOS + 0x8)
+#define PT_HP_CORE_MMF          (PT_LOOS + 0x9)
+#define PT_HP_PARALLEL          (PT_LOOS + 0x10)
+#define PT_HP_FASTBIND          (PT_LOOS + 0x11)
+#define PT_HP_OPT_ANNOT         (PT_LOOS + 0x12)
+#define PT_HP_HSL_ANNOT         (PT_LOOS + 0x13)
+#define PT_HP_STACK             (PT_LOOS + 0x14)
+
+#define PT_PARISC_ARCHEXT       0x70000000
+#define PT_PARISC_UNWIND        0x70000001
 
 /* Legal values for p_flags field of Elf32_Phdr/Elf64_Phdr.  */
 
-#define PF_PARISC_SBP          0x08000000
+#define PF_PARISC_SBP           0x08000000
 
-#define PF_HP_PAGE_SIZE                0x00100000
-#define PF_HP_FAR_SHARED       0x00200000
-#define PF_HP_NEAR_SHARED      0x00400000
-#define PF_HP_CODE             0x01000000
-#define PF_HP_MODIFY           0x02000000
-#define PF_HP_LAZYSWAP         0x04000000
-#define PF_HP_SBP              0x08000000
+#define PF_HP_PAGE_SIZE         0x00100000
+#define PF_HP_FAR_SHARED        0x00200000
+#define PF_HP_NEAR_SHARED       0x00400000
+#define PF_HP_CODE              0x01000000
+#define PF_HP_MODIFY            0x02000000
+#define PF_HP_LAZYSWAP          0x04000000
+#define PF_HP_SBP               0x08000000
 
 /* IA-64 specific declarations.  */
 
 /* Processor specific flags for the Ehdr e_flags field.  */
-#define EF_IA_64_MASKOS                0x0000000f      /* os-specific flags */
-#define EF_IA_64_ABI64         0x00000010      /* 64-bit ABI */
-#define EF_IA_64_ARCH          0xff000000      /* arch. version mask */
+#define EF_IA_64_MASKOS         0x0000000f  /* os-specific flags */
+#define EF_IA_64_ABI64          0x00000010  /* 64-bit ABI */
+#define EF_IA_64_ARCH           0xff000000  /* arch. version mask */
 
 /* Processor specific values for the Phdr p_type field.  */
-#define PT_IA_64_ARCHEXT       (PT_LOPROC + 0) /* arch extension bits */
-#define PT_IA_64_UNWIND                (PT_LOPROC + 1) /* ia64 unwind bits */
+#define PT_IA_64_ARCHEXT        (PT_LOPROC + 0) /* arch extension bits */
+#define PT_IA_64_UNWIND         (PT_LOPROC + 1) /* ia64 unwind bits */
 
 /* Processor specific flags for the Phdr p_flags field.  */
-#define PF_IA_64_NORECOV       0x80000000      /* spec insns w/o recovery */
+#define PF_IA_64_NORECOV        0x80000000  /* spec insns w/o recovery */
 
 /* Processor specific values for the Shdr sh_type field.  */
-#define SHT_IA_64_EXT          (SHT_LOPROC + 0) /* extension bits */
-#define SHT_IA_64_UNWIND       (SHT_LOPROC + 1) /* unwind bits */
+#define SHT_IA_64_EXT           (SHT_LOPROC + 0) /* extension bits */
+#define SHT_IA_64_UNWIND        (SHT_LOPROC + 1) /* unwind bits */
 
 /* Processor specific flags for the Shdr sh_flags field.  */
-#define SHF_IA_64_SHORT                0x10000000      /* section near gp */
-#define SHF_IA_64_NORECOV      0x20000000      /* spec insns w/o recovery */
+#define SHF_IA_64_SHORT         0x10000000  /* section near gp */
+#define SHF_IA_64_NORECOV       0x20000000  /* spec insns w/o recovery */
 
 /* Processor specific values for the Dyn d_tag field.  */
-#define DT_IA_64_PLT_RESERVE   (DT_LOPROC + 0)
-#define DT_IA_64_NUM           1
+#define DT_IA_64_PLT_RESERVE    (DT_LOPROC + 0)
+#define DT_IA_64_NUM            1
 
 /* IA-64 relocations.  */
-#define R_IA64_NONE            0x00    /* none */
-#define R_IA64_IMM14           0x21    /* symbol + addend, add imm14 */
-#define R_IA64_IMM22           0x22    /* symbol + addend, add imm22 */
-#define R_IA64_IMM64           0x23    /* symbol + addend, mov imm64 */
-#define R_IA64_DIR32MSB                0x24    /* symbol + addend, data4 MSB */
-#define R_IA64_DIR32LSB                0x25    /* symbol + addend, data4 LSB */
-#define R_IA64_DIR64MSB                0x26    /* symbol + addend, data8 MSB */
-#define R_IA64_DIR64LSB                0x27    /* symbol + addend, data8 LSB */
-#define R_IA64_GPREL22         0x2a    /* @gprel(sym + add), add imm22 */
-#define R_IA64_GPREL64I                0x2b    /* @gprel(sym + add), mov imm64 */
-#define R_IA64_GPREL32MSB      0x2c    /* @gprel(sym + add), data4 MSB */
-#define R_IA64_GPREL32LSB      0x2d    /* @gprel(sym + add), data4 LSB */
-#define R_IA64_GPREL64MSB      0x2e    /* @gprel(sym + add), data8 MSB */
-#define R_IA64_GPREL64LSB      0x2f    /* @gprel(sym + add), data8 LSB */
-#define R_IA64_LTOFF22         0x32    /* @ltoff(sym + add), add imm22 */
-#define R_IA64_LTOFF64I                0x33    /* @ltoff(sym + add), mov imm64 */
-#define R_IA64_PLTOFF22                0x3a    /* @pltoff(sym + add), add imm22 */
-#define R_IA64_PLTOFF64I       0x3b    /* @pltoff(sym + add), mov imm64 */
-#define R_IA64_PLTOFF64MSB     0x3e    /* @pltoff(sym + add), data8 MSB */
-#define R_IA64_PLTOFF64LSB     0x3f    /* @pltoff(sym + add), data8 LSB */
-#define R_IA64_FPTR64I         0x43    /* @fptr(sym + add), mov imm64 */
-#define R_IA64_FPTR32MSB       0x44    /* @fptr(sym + add), data4 MSB */
-#define R_IA64_FPTR32LSB       0x45    /* @fptr(sym + add), data4 LSB */
-#define R_IA64_FPTR64MSB       0x46    /* @fptr(sym + add), data8 MSB */
-#define R_IA64_FPTR64LSB       0x47    /* @fptr(sym + add), data8 LSB */
-#define R_IA64_PCREL60B                0x48    /* @pcrel(sym + add), brl */
-#define R_IA64_PCREL21B                0x49    /* @pcrel(sym + add), ptb, call */
-#define R_IA64_PCREL21M                0x4a    /* @pcrel(sym + add), chk.s */
-#define R_IA64_PCREL21F                0x4b    /* @pcrel(sym + add), fchkf */
-#define R_IA64_PCREL32MSB      0x4c    /* @pcrel(sym + add), data4 MSB */
-#define R_IA64_PCREL32LSB      0x4d    /* @pcrel(sym + add), data4 LSB */
-#define R_IA64_PCREL64MSB      0x4e    /* @pcrel(sym + add), data8 MSB */
-#define R_IA64_PCREL64LSB      0x4f    /* @pcrel(sym + add), data8 LSB */
-#define R_IA64_LTOFF_FPTR22    0x52    /* @ltoff(@fptr(s+a)), imm22 */
-#define R_IA64_LTOFF_FPTR64I   0x53    /* @ltoff(@fptr(s+a)), imm64 */
-#define R_IA64_LTOFF_FPTR32MSB 0x54    /* @ltoff(@fptr(s+a)), data4 MSB */
-#define R_IA64_LTOFF_FPTR32LSB 0x55    /* @ltoff(@fptr(s+a)), data4 LSB */
-#define R_IA64_LTOFF_FPTR64MSB 0x56    /* @ltoff(@fptr(s+a)), data8 MSB */
-#define R_IA64_LTOFF_FPTR64LSB 0x57    /* @ltoff(@fptr(s+a)), data8 LSB */
-#define R_IA64_SEGREL32MSB     0x5c    /* @segrel(sym + add), data4 MSB */
-#define R_IA64_SEGREL32LSB     0x5d    /* @segrel(sym + add), data4 LSB */
-#define R_IA64_SEGREL64MSB     0x5e    /* @segrel(sym + add), data8 MSB */
-#define R_IA64_SEGREL64LSB     0x5f    /* @segrel(sym + add), data8 LSB */
-#define R_IA64_SECREL32MSB     0x64    /* @secrel(sym + add), data4 MSB */
-#define R_IA64_SECREL32LSB     0x65    /* @secrel(sym + add), data4 LSB */
-#define R_IA64_SECREL64MSB     0x66    /* @secrel(sym + add), data8 MSB */
-#define R_IA64_SECREL64LSB     0x67    /* @secrel(sym + add), data8 LSB */
-#define R_IA64_REL32MSB                0x6c    /* data 4 + REL */
-#define R_IA64_REL32LSB                0x6d    /* data 4 + REL */
-#define R_IA64_REL64MSB                0x6e    /* data 8 + REL */
-#define R_IA64_REL64LSB                0x6f    /* data 8 + REL */
-#define R_IA64_LTV32MSB                0x74    /* symbol + addend, data4 MSB */
-#define R_IA64_LTV32LSB                0x75    /* symbol + addend, data4 LSB */
-#define R_IA64_LTV64MSB                0x76    /* symbol + addend, data8 MSB */
-#define R_IA64_LTV64LSB                0x77    /* symbol + addend, data8 LSB */
-#define R_IA64_PCREL21BI       0x79    /* @pcrel(sym + add), 21bit inst */
-#define R_IA64_PCREL22         0x7a    /* @pcrel(sym + add), 22bit inst */
-#define R_IA64_PCREL64I                0x7b    /* @pcrel(sym + add), 64bit inst */
-#define R_IA64_IPLTMSB         0x80    /* dynamic reloc, imported PLT, MSB */
-#define R_IA64_IPLTLSB         0x81    /* dynamic reloc, imported PLT, LSB */
-#define R_IA64_COPY            0x84    /* copy relocation */
-#define R_IA64_SUB             0x85    /* Addend and symbol difference */
-#define R_IA64_LTOFF22X                0x86    /* LTOFF22, relaxable.  */
-#define R_IA64_LDXMOV          0x87    /* Use of LTOFF22X.  */
-#define R_IA64_TPREL14         0x91    /* @tprel(sym + add), imm14 */
-#define R_IA64_TPREL22         0x92    /* @tprel(sym + add), imm22 */
-#define R_IA64_TPREL64I                0x93    /* @tprel(sym + add), imm64 */
-#define R_IA64_TPREL64MSB      0x96    /* @tprel(sym + add), data8 MSB */
-#define R_IA64_TPREL64LSB      0x97    /* @tprel(sym + add), data8 LSB */
-#define R_IA64_LTOFF_TPREL22   0x9a    /* @ltoff(@tprel(s+a)), imm2 */
-#define R_IA64_DTPMOD64MSB     0xa6    /* @dtpmod(sym + add), data8 MSB */
-#define R_IA64_DTPMOD64LSB     0xa7    /* @dtpmod(sym + add), data8 LSB */
-#define R_IA64_LTOFF_DTPMOD22  0xaa    /* @ltoff(@dtpmod(sym + add)), imm22 */
-#define R_IA64_DTPREL14                0xb1    /* @dtprel(sym + add), imm14 */
-#define R_IA64_DTPREL22                0xb2    /* @dtprel(sym + add), imm22 */
-#define R_IA64_DTPREL64I       0xb3    /* @dtprel(sym + add), imm64 */
-#define R_IA64_DTPREL32MSB     0xb4    /* @dtprel(sym + add), data4 MSB */
-#define R_IA64_DTPREL32LSB     0xb5    /* @dtprel(sym + add), data4 LSB */
-#define R_IA64_DTPREL64MSB     0xb6    /* @dtprel(sym + add), data8 MSB */
-#define R_IA64_DTPREL64LSB     0xb7    /* @dtprel(sym + add), data8 LSB */
-#define R_IA64_LTOFF_DTPREL22  0xba    /* @ltoff(@dtprel(s+a)), imm22 */
+#define R_IA64_NONE             0x00    /* none */
+#define R_IA64_IMM14            0x21    /* symbol + addend, add imm14 */
+#define R_IA64_IMM22            0x22    /* symbol + addend, add imm22 */
+#define R_IA64_IMM64            0x23    /* symbol + addend, mov imm64 */
+#define R_IA64_DIR32MSB         0x24    /* symbol + addend, data4 MSB */
+#define R_IA64_DIR32LSB         0x25    /* symbol + addend, data4 LSB */
+#define R_IA64_DIR64MSB         0x26    /* symbol + addend, data8 MSB */
+#define R_IA64_DIR64LSB         0x27    /* symbol + addend, data8 LSB */
+#define R_IA64_GPREL22          0x2a    /* @gprel(sym + add), add imm22 */
+#define R_IA64_GPREL64I         0x2b    /* @gprel(sym + add), mov imm64 */
+#define R_IA64_GPREL32MSB       0x2c    /* @gprel(sym + add), data4 MSB */
+#define R_IA64_GPREL32LSB       0x2d    /* @gprel(sym + add), data4 LSB */
+#define R_IA64_GPREL64MSB       0x2e    /* @gprel(sym + add), data8 MSB */
+#define R_IA64_GPREL64LSB       0x2f    /* @gprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF22          0x32    /* @ltoff(sym + add), add imm22 */
+#define R_IA64_LTOFF64I         0x33    /* @ltoff(sym + add), mov imm64 */
+#define R_IA64_PLTOFF22         0x3a    /* @pltoff(sym + add), add imm22 */
+#define R_IA64_PLTOFF64I        0x3b    /* @pltoff(sym + add), mov imm64 */
+#define R_IA64_PLTOFF64MSB      0x3e    /* @pltoff(sym + add), data8 MSB */
+#define R_IA64_PLTOFF64LSB      0x3f    /* @pltoff(sym + add), data8 LSB */
+#define R_IA64_FPTR64I          0x43    /* @fptr(sym + add), mov imm64 */
+#define R_IA64_FPTR32MSB        0x44    /* @fptr(sym + add), data4 MSB */
+#define R_IA64_FPTR32LSB        0x45    /* @fptr(sym + add), data4 LSB */
+#define R_IA64_FPTR64MSB        0x46    /* @fptr(sym + add), data8 MSB */
+#define R_IA64_FPTR64LSB        0x47    /* @fptr(sym + add), data8 LSB */
+#define R_IA64_PCREL60B         0x48    /* @pcrel(sym + add), brl */
+#define R_IA64_PCREL21B         0x49    /* @pcrel(sym + add), ptb, call */
+#define R_IA64_PCREL21M         0x4a    /* @pcrel(sym + add), chk.s */
+#define R_IA64_PCREL21F         0x4b    /* @pcrel(sym + add), fchkf */
+#define R_IA64_PCREL32MSB       0x4c    /* @pcrel(sym + add), data4 MSB */
+#define R_IA64_PCREL32LSB       0x4d    /* @pcrel(sym + add), data4 LSB */
+#define R_IA64_PCREL64MSB       0x4e    /* @pcrel(sym + add), data8 MSB */
+#define R_IA64_PCREL64LSB       0x4f    /* @pcrel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_FPTR22     0x52    /* @ltoff(@fptr(s+a)), imm22 */
+#define R_IA64_LTOFF_FPTR64I    0x53    /* @ltoff(@fptr(s+a)), imm64 */
+#define R_IA64_LTOFF_FPTR32MSB  0x54    /* @ltoff(@fptr(s+a)), data4 MSB */
+#define R_IA64_LTOFF_FPTR32LSB  0x55    /* @ltoff(@fptr(s+a)), data4 LSB */
+#define R_IA64_LTOFF_FPTR64MSB  0x56    /* @ltoff(@fptr(s+a)), data8 MSB */
+#define R_IA64_LTOFF_FPTR64LSB  0x57    /* @ltoff(@fptr(s+a)), data8 LSB */
+#define R_IA64_SEGREL32MSB      0x5c    /* @segrel(sym + add), data4 MSB */
+#define R_IA64_SEGREL32LSB      0x5d    /* @segrel(sym + add), data4 LSB */
+#define R_IA64_SEGREL64MSB      0x5e    /* @segrel(sym + add), data8 MSB */
+#define R_IA64_SEGREL64LSB      0x5f    /* @segrel(sym + add), data8 LSB */
+#define R_IA64_SECREL32MSB      0x64    /* @secrel(sym + add), data4 MSB */
+#define R_IA64_SECREL32LSB      0x65    /* @secrel(sym + add), data4 LSB */
+#define R_IA64_SECREL64MSB      0x66    /* @secrel(sym + add), data8 MSB */
+#define R_IA64_SECREL64LSB      0x67    /* @secrel(sym + add), data8 LSB */
+#define R_IA64_REL32MSB         0x6c    /* data 4 + REL */
+#define R_IA64_REL32LSB         0x6d    /* data 4 + REL */
+#define R_IA64_REL64MSB         0x6e    /* data 8 + REL */
+#define R_IA64_REL64LSB         0x6f    /* data 8 + REL */
+#define R_IA64_LTV32MSB         0x74    /* symbol + addend, data4 MSB */
+#define R_IA64_LTV32LSB         0x75    /* symbol + addend, data4 LSB */
+#define R_IA64_LTV64MSB         0x76    /* symbol + addend, data8 MSB */
+#define R_IA64_LTV64LSB         0x77    /* symbol + addend, data8 LSB */
+#define R_IA64_PCREL21BI        0x79    /* @pcrel(sym + add), 21bit inst */
+#define R_IA64_PCREL22          0x7a    /* @pcrel(sym + add), 22bit inst */
+#define R_IA64_PCREL64I         0x7b    /* @pcrel(sym + add), 64bit inst */
+#define R_IA64_IPLTMSB          0x80    /* dynamic reloc, imported PLT, MSB */
+#define R_IA64_IPLTLSB          0x81    /* dynamic reloc, imported PLT, LSB */
+#define R_IA64_COPY             0x84    /* copy relocation */
+#define R_IA64_SUB              0x85    /* Addend and symbol difference */
+#define R_IA64_LTOFF22X         0x86    /* LTOFF22, relaxable.  */
+#define R_IA64_LDXMOV           0x87    /* Use of LTOFF22X.  */
+#define R_IA64_TPREL14          0x91    /* @tprel(sym + add), imm14 */
+#define R_IA64_TPREL22          0x92    /* @tprel(sym + add), imm22 */
+#define R_IA64_TPREL64I         0x93    /* @tprel(sym + add), imm64 */
+#define R_IA64_TPREL64MSB       0x96    /* @tprel(sym + add), data8 MSB */
+#define R_IA64_TPREL64LSB       0x97    /* @tprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_TPREL22    0x9a    /* @ltoff(@tprel(s+a)), imm2 */
+#define R_IA64_DTPMOD64MSB      0xa6    /* @dtpmod(sym + add), data8 MSB */
+#define R_IA64_DTPMOD64LSB      0xa7    /* @dtpmod(sym + add), data8 LSB */
+#define R_IA64_LTOFF_DTPMOD22   0xaa    /* @ltoff(@dtpmod(sym + add)), imm22 */
+#define R_IA64_DTPREL14         0xb1    /* @dtprel(sym + add), imm14 */
+#define R_IA64_DTPREL22         0xb2    /* @dtprel(sym + add), imm22 */
+#define R_IA64_DTPREL64I        0xb3    /* @dtprel(sym + add), imm64 */
+#define R_IA64_DTPREL32MSB      0xb4    /* @dtprel(sym + add), data4 MSB */
+#define R_IA64_DTPREL32LSB      0xb5    /* @dtprel(sym + add), data4 LSB */
+#define R_IA64_DTPREL64MSB      0xb6    /* @dtprel(sym + add), data8 MSB */
+#define R_IA64_DTPREL64LSB      0xb7    /* @dtprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_DTPREL22   0xba    /* @ltoff(@dtprel(s+a)), imm22 */
 
 /* RISC-V relocations.  */
 #define R_RISCV_NONE          0
@@ -1416,47 +1449,47 @@ typedef struct {
 #define EF_RISCV_TSO              0x0010
 
 typedef struct elf32_rel {
-  Elf32_Addr   r_offset;
-  Elf32_Word   r_info;
+  Elf32_Addr r_offset;
+  Elf32_Word r_info;
 } Elf32_Rel;
 
 typedef struct elf64_rel {
-  Elf64_Addr r_offset; /* Location at which to apply the action */
-  Elf64_Xword r_info;  /* index and type of relocation */
+  Elf64_Addr r_offset;      /* Location at which to apply the action */
+  Elf64_Xword r_info;       /* index and type of relocation */
 } Elf64_Rel;
 
 typedef struct elf32_rela{
-  Elf32_Addr   r_offset;
-  Elf32_Word   r_info;
-  Elf32_Sword  r_addend;
+  Elf32_Addr r_offset;
+  Elf32_Word r_info;
+  Elf32_Sword r_addend;
 } Elf32_Rela;
 
 typedef struct elf64_rela {
-  Elf64_Addr r_offset; /* Location at which to apply the action */
-  Elf64_Xword r_info;  /* index and type of relocation */
-  Elf64_Sxword r_addend;       /* Constant addend used to compute value */
+  Elf64_Addr r_offset;      /* Location at which to apply the action */
+  Elf64_Xword r_info;       /* index and type of relocation */
+  Elf64_Sxword r_addend;    /* Constant addend used to compute value */
 } Elf64_Rela;
 
 typedef struct elf32_sym{
-  Elf32_Word   st_name;
-  Elf32_Addr   st_value;
-  Elf32_Word   st_size;
-  unsigned char        st_info;
-  unsigned char        st_other;
-  Elf32_Half   st_shndx;
+  Elf32_Word st_name;
+  Elf32_Addr st_value;
+  Elf32_Word st_size;
+  unsigned char st_info;
+  unsigned char st_other;
+  Elf32_Half st_shndx;
 } Elf32_Sym;
 
 typedef struct elf64_sym {
-  Elf64_Word st_name;          /* Symbol name, index in string tbl */
-  unsigned char        st_info;        /* Type and binding attributes */
-  unsigned char        st_other;       /* No defined meaning, 0 */
-  Elf64_Half st_shndx;         /* Associated section index */
-  Elf64_Addr st_value;         /* Value of the symbol */
-  Elf64_Xword st_size;         /* Associated symbol size */
+  Elf64_Word st_name;       /* Symbol name, index in string tbl */
+  unsigned char st_info;    /* Type and binding attributes */
+  unsigned char st_other;   /* No defined meaning, 0 */
+  Elf64_Half st_shndx;      /* Associated section index */
+  Elf64_Addr st_value;      /* Value of the symbol */
+  Elf64_Xword st_size;      /* Associated symbol size */
 } Elf64_Sym;
 
 
-#define EI_NIDENT      16
+#define EI_NIDENT       16
 
 /* Special value for e_phnum.  This indicates that the real number of
    program headers is too large to fit into e_phnum.  Instead the real
@@ -1464,30 +1497,30 @@ typedef struct elf64_sym {
 #define PN_XNUM         0xffff
 
 typedef struct elf32_hdr{
-  unsigned char        e_ident[EI_NIDENT];
-  Elf32_Half   e_type;
-  Elf32_Half   e_machine;
-  Elf32_Word   e_version;
-  Elf32_Addr   e_entry;  /* Entry point */
-  Elf32_Off    e_phoff;
-  Elf32_Off    e_shoff;
-  Elf32_Word   e_flags;
-  Elf32_Half   e_ehsize;
-  Elf32_Half   e_phentsize;
-  Elf32_Half   e_phnum;
-  Elf32_Half   e_shentsize;
-  Elf32_Half   e_shnum;
-  Elf32_Half   e_shstrndx;
+  unsigned char e_ident[EI_NIDENT];
+  Elf32_Half e_type;
+  Elf32_Half e_machine;
+  Elf32_Word e_version;
+  Elf32_Addr e_entry;  /* Entry point */
+  Elf32_Off e_phoff;
+  Elf32_Off e_shoff;
+  Elf32_Word e_flags;
+  Elf32_Half e_ehsize;
+  Elf32_Half e_phentsize;
+  Elf32_Half e_phnum;
+  Elf32_Half e_shentsize;
+  Elf32_Half e_shnum;
+  Elf32_Half e_shstrndx;
 } Elf32_Ehdr;
 
 typedef struct elf64_hdr {
-  unsigned char        e_ident[16];            /* ELF "magic number" */
+  unsigned char e_ident[16];    /* ELF "magic number" */
   Elf64_Half e_type;
   Elf64_Half e_machine;
   Elf64_Word e_version;
-  Elf64_Addr e_entry;          /* Entry point virtual address */
-  Elf64_Off e_phoff;           /* Program header table file offset */
-  Elf64_Off e_shoff;           /* Section header table file offset */
+  Elf64_Addr e_entry;           /* Entry point virtual address */
+  Elf64_Off e_phoff;            /* Program header table file offset */
+  Elf64_Off e_shoff;            /* Section header table file offset */
   Elf64_Word e_flags;
   Elf64_Half e_ehsize;
   Elf64_Half e_phentsize;
@@ -1499,107 +1532,107 @@ typedef struct elf64_hdr {
 
 /* These constants define the permissions on sections in the program
    header, p_flags. */
-#define PF_R           0x4
-#define PF_W           0x2
-#define PF_X           0x1
+#define PF_R 0x4
+#define PF_W 0x2
+#define PF_X 0x1
 
 typedef struct elf32_phdr{
-  Elf32_Word   p_type;
-  Elf32_Off    p_offset;
-  Elf32_Addr   p_vaddr;
-  Elf32_Addr   p_paddr;
-  Elf32_Word   p_filesz;
-  Elf32_Word   p_memsz;
-  Elf32_Word   p_flags;
-  Elf32_Word   p_align;
+  Elf32_Word p_type;
+  Elf32_Off p_offset;
+  Elf32_Addr p_vaddr;
+  Elf32_Addr p_paddr;
+  Elf32_Word p_filesz;
+  Elf32_Word p_memsz;
+  Elf32_Word p_flags;
+  Elf32_Word p_align;
 } Elf32_Phdr;
 
 typedef struct elf64_phdr {
   Elf64_Word p_type;
   Elf64_Word p_flags;
-  Elf64_Off p_offset;          /* Segment file offset */
-  Elf64_Addr p_vaddr;          /* Segment virtual address */
-  Elf64_Addr p_paddr;          /* Segment physical address */
-  Elf64_Xword p_filesz;                /* Segment size in file */
-  Elf64_Xword p_memsz;         /* Segment size in memory */
-  Elf64_Xword p_align;         /* Segment alignment, file & memory */
+  Elf64_Off p_offset;       /* Segment file offset */
+  Elf64_Addr p_vaddr;       /* Segment virtual address */
+  Elf64_Addr p_paddr;       /* Segment physical address */
+  Elf64_Xword p_filesz;     /* Segment size in file */
+  Elf64_Xword p_memsz;      /* Segment size in memory */
+  Elf64_Xword p_align;      /* Segment alignment, file & memory */
 } Elf64_Phdr;
 
 /* sh_type */
-#define SHT_NULL       0
-#define SHT_PROGBITS   1
-#define SHT_SYMTAB     2
-#define SHT_STRTAB     3
-#define SHT_RELA       4
-#define SHT_HASH       5
-#define SHT_DYNAMIC    6
-#define SHT_NOTE       7
-#define SHT_NOBITS     8
-#define SHT_REL                9
-#define SHT_SHLIB      10
-#define SHT_DYNSYM     11
-#define SHT_NUM                12
-#define SHT_LOPROC     0x70000000
-#define SHT_HIPROC     0x7fffffff
-#define SHT_LOUSER     0x80000000
-#define SHT_HIUSER     0xffffffff
-#define SHT_MIPS_LIST          0x70000000
-#define SHT_MIPS_CONFLICT      0x70000002
-#define SHT_MIPS_GPTAB         0x70000003
-#define SHT_MIPS_UCODE         0x70000004
+#define SHT_NULL            0
+#define SHT_PROGBITS        1
+#define SHT_SYMTAB          2
+#define SHT_STRTAB          3
+#define SHT_RELA            4
+#define SHT_HASH            5
+#define SHT_DYNAMIC         6
+#define SHT_NOTE            7
+#define SHT_NOBITS          8
+#define SHT_REL             9
+#define SHT_SHLIB           10
+#define SHT_DYNSYM          11
+#define SHT_NUM             12
+#define SHT_LOPROC          0x70000000
+#define SHT_HIPROC          0x7fffffff
+#define SHT_LOUSER          0x80000000
+#define SHT_HIUSER          0xffffffff
+#define SHT_MIPS_LIST       0x70000000
+#define SHT_MIPS_CONFLICT   0x70000002
+#define SHT_MIPS_GPTAB      0x70000003
+#define SHT_MIPS_UCODE      0x70000004
 
 /* sh_flags */
-#define SHF_WRITE      0x1
-#define SHF_ALLOC      0x2
-#define SHF_EXECINSTR  0x4
-#define SHF_MASKPROC   0xf0000000
-#define SHF_MIPS_GPREL 0x10000000
+#define SHF_WRITE       0x1
+#define SHF_ALLOC       0x2
+#define SHF_EXECINSTR   0x4
+#define SHF_MASKPROC    0xf0000000
+#define SHF_MIPS_GPREL  0x10000000
 
 /* special section indexes */
-#define SHN_UNDEF      0
-#define SHN_LORESERVE  0xff00
-#define SHN_LOPROC     0xff00
-#define SHN_HIPROC     0xff1f
-#define SHN_ABS                0xfff1
-#define SHN_COMMON     0xfff2
-#define SHN_HIRESERVE  0xffff
-#define SHN_MIPS_ACCOMON       0xff00
+#define SHN_UNDEF           0
+#define SHN_LORESERVE       0xff00
+#define SHN_LOPROC          0xff00
+#define SHN_HIPROC          0xff1f
+#define SHN_ABS             0xfff1
+#define SHN_COMMON          0xfff2
+#define SHN_HIRESERVE       0xffff
+#define SHN_MIPS_ACCOMON    0xff00
 
 typedef struct elf32_shdr {
-  Elf32_Word   sh_name;
-  Elf32_Word   sh_type;
-  Elf32_Word   sh_flags;
-  Elf32_Addr   sh_addr;
-  Elf32_Off    sh_offset;
-  Elf32_Word   sh_size;
-  Elf32_Word   sh_link;
-  Elf32_Word   sh_info;
-  Elf32_Word   sh_addralign;
-  Elf32_Word   sh_entsize;
+  Elf32_Word sh_name;
+  Elf32_Word sh_type;
+  Elf32_Word sh_flags;
+  Elf32_Addr sh_addr;
+  Elf32_Off sh_offset;
+  Elf32_Word sh_size;
+  Elf32_Word sh_link;
+  Elf32_Word sh_info;
+  Elf32_Word sh_addralign;
+  Elf32_Word sh_entsize;
 } Elf32_Shdr;
 
 typedef struct elf64_shdr {
-  Elf64_Word sh_name;          /* Section name, index in string tbl */
-  Elf64_Word sh_type;          /* Type of section */
-  Elf64_Xword sh_flags;                /* Miscellaneous section attributes */
-  Elf64_Addr sh_addr;          /* Section virtual addr at execution */
-  Elf64_Off sh_offset;         /* Section file offset */
-  Elf64_Xword sh_size;         /* Size of section in bytes */
-  Elf64_Word sh_link;          /* Index of another section */
-  Elf64_Word sh_info;          /* Additional section information */
-  Elf64_Xword sh_addralign;    /* Section alignment */
-  Elf64_Xword sh_entsize;      /* Entry size if section holds table */
+  Elf64_Word sh_name;       /* Section name, index in string tbl */
+  Elf64_Word sh_type;       /* Type of section */
+  Elf64_Xword sh_flags;     /* Miscellaneous section attributes */
+  Elf64_Addr sh_addr;       /* Section virtual addr at execution */
+  Elf64_Off sh_offset;      /* Section file offset */
+  Elf64_Xword sh_size;      /* Size of section in bytes */
+  Elf64_Word sh_link;       /* Index of another section */
+  Elf64_Word sh_info;       /* Additional section information */
+  Elf64_Xword sh_addralign; /* Section alignment */
+  Elf64_Xword sh_entsize;   /* Entry size if section holds table */
 } Elf64_Shdr;
 
-#define        EI_MAG0         0               /* e_ident[] indexes */
-#define        EI_MAG1         1
-#define        EI_MAG2         2
-#define        EI_MAG3         3
-#define        EI_CLASS        4
-#define        EI_DATA         5
-#define        EI_VERSION      6
-#define        EI_OSABI        7
-#define        EI_PAD          8
+#define EI_MAG0     0       /* e_ident[] indexes */
+#define EI_MAG1     1
+#define EI_MAG2     2
+#define EI_MAG3     3
+#define EI_CLASS    4
+#define EI_DATA     5
+#define EI_VERSION  6
+#define EI_OSABI    7
+#define EI_PAD      8
 
 #define ELFOSABI_NONE           0       /* UNIX System V ABI */
 #define ELFOSABI_SYSV           0       /* Alias.  */
@@ -1614,54 +1647,57 @@ typedef struct elf64_shdr {
 #define ELFOSABI_MODESTO        11      /* Novell Modesto.  */
 #define ELFOSABI_OPENBSD        12      /* OpenBSD.  */
 #define ELFOSABI_ARM_FDPIC      65      /* ARM FDPIC */
+#define ELFOSABI_XTENSA_FDPIC   65      /* Xtensa FDPIC */
 #define ELFOSABI_ARM            97      /* ARM */
 #define ELFOSABI_STANDALONE     255     /* Standalone (embedded) application */
 
-#define        ELFMAG0         0x7f            /* EI_MAG */
-#define        ELFMAG1         'E'
-#define        ELFMAG2         'L'
-#define        ELFMAG3         'F'
-#define        ELFMAG          "\177ELF"
-#define        SELFMAG         4
+#define ELFMAG0         0x7f        /* EI_MAG */
+#define ELFMAG1         'E'
+#define ELFMAG2         'L'
+#define ELFMAG3         'F'
+#define ELFMAG          "\177ELF"
+#define SELFMAG         4
 
-#define        ELFCLASSNONE    0               /* EI_CLASS */
-#define        ELFCLASS32      1
-#define        ELFCLASS64      2
-#define        ELFCLASSNUM     3
+#define ELFCLASSNONE    0   /* EI_CLASS */
+#define ELFCLASS32      1
+#define ELFCLASS64      2
+#define ELFCLASSNUM     3
 
-#define ELFDATANONE    0               /* e_ident[EI_DATA] */
-#define ELFDATA2LSB    1
-#define ELFDATA2MSB    2
+#define ELFDATANONE     0   /* e_ident[EI_DATA] */
+#define ELFDATA2LSB     1
+#define ELFDATA2MSB     2
 
-#define EV_NONE                0               /* e_version, EI_VERSION */
-#define EV_CURRENT     1
-#define EV_NUM         2
+#define EV_NONE         0   /* e_version, EI_VERSION */
+#define EV_CURRENT      1
+#define EV_NUM          2
 
 /* Notes used in ET_CORE */
-#define NT_PRSTATUS    1
-#define NT_FPREGSET     2
-#define NT_PRFPREG     2
-#define NT_PRPSINFO    3
-#define NT_TASKSTRUCT  4
-#define NT_AUXV                6
-#define NT_PRXFPREG     0x46e62b7f      /* copied from gdb5.1/include/elf/common.h */
-#define NT_S390_GS_CB   0x30b           /* s390 guarded storage registers */
-#define NT_S390_VXRS_HIGH 0x30a         /* s390 vector registers 16-31 */
-#define NT_S390_VXRS_LOW  0x309         /* s390 vector registers 0-15 (lower half) */
-#define NT_S390_PREFIX  0x305           /* s390 prefix register */
-#define NT_S390_CTRS    0x304           /* s390 control registers */
-#define NT_S390_TODPREG 0x303           /* s390 TOD programmable register */
-#define NT_S390_TODCMP  0x302           /* s390 TOD clock comparator register */
-#define NT_S390_TIMER   0x301           /* s390 timer register */
-#define NT_PPC_VMX       0x100          /* PowerPC Altivec/VMX registers */
-#define NT_PPC_SPE       0x101          /* PowerPC SPE/EVR registers */
-#define NT_PPC_VSX       0x102          /* PowerPC VSX registers */
-#define NT_ARM_VFP      0x400           /* ARM VFP/NEON registers */
-#define NT_ARM_TLS      0x401           /* ARM TLS register */
-#define NT_ARM_HW_BREAK 0x402           /* ARM hardware breakpoint registers */
-#define NT_ARM_HW_WATCH 0x403           /* ARM hardware watchpoint registers */
-#define NT_ARM_SYSTEM_CALL      0x404   /* ARM system call number */
-#define NT_ARM_SVE      0x405           /* ARM Scalable Vector Extension regs */
+#define NT_PRSTATUS         1
+#define NT_FPREGSET         2
+#define NT_PRFPREG          2
+#define NT_PRPSINFO         3
+#define NT_TASKSTRUCT       4
+#define NT_AUXV             6
+#define NT_PRXFPREG         0x46e62b7f  /* copied from gdb5.1/include/elf/common.h */
+#define NT_S390_PV_CPU_DATA 0x30e       /* s390 protvirt cpu dump data */
+#define NT_S390_RI_CB       0x30d       /* s390 runtime instrumentation */
+#define NT_S390_GS_CB       0x30b       /* s390 guarded storage registers */
+#define NT_S390_VXRS_HIGH   0x30a       /* s390 vector registers 16-31 */
+#define NT_S390_VXRS_LOW    0x309       /* s390 vector registers 0-15 (lower half) */
+#define NT_S390_PREFIX      0x305       /* s390 prefix register */
+#define NT_S390_CTRS        0x304       /* s390 control registers */
+#define NT_S390_TODPREG     0x303       /* s390 TOD programmable register */
+#define NT_S390_TODCMP      0x302       /* s390 TOD clock comparator register */
+#define NT_S390_TIMER       0x301       /* s390 timer register */
+#define NT_PPC_VMX          0x100       /* PowerPC Altivec/VMX registers */
+#define NT_PPC_SPE          0x101       /* PowerPC SPE/EVR registers */
+#define NT_PPC_VSX          0x102       /* PowerPC VSX registers */
+#define NT_ARM_VFP          0x400       /* ARM VFP/NEON registers */
+#define NT_ARM_TLS          0x401       /* ARM TLS register */
+#define NT_ARM_HW_BREAK     0x402       /* ARM hardware breakpoint registers */
+#define NT_ARM_HW_WATCH     0x403       /* ARM hardware watchpoint registers */
+#define NT_ARM_SYSTEM_CALL  0x404       /* ARM system call number */
+#define NT_ARM_SVE          0x405       /* ARM Scalable Vector Extension regs */
 
 /* Defined note types for GNU systems.  */
 
@@ -1694,16 +1730,16 @@ typedef struct elf64_shdr {
 
 /* Note header in a PT_NOTE section */
 typedef struct elf32_note {
-  Elf32_Word   n_namesz;       /* Name size */
-  Elf32_Word   n_descsz;       /* Content size */
-  Elf32_Word   n_type;         /* Content type */
+  Elf32_Word n_namesz;   /* Name size */
+  Elf32_Word n_descsz;   /* Content size */
+  Elf32_Word n_type;     /* Content type */
 } Elf32_Nhdr;
 
 /* Note header in a PT_NOTE section */
 typedef struct elf64_note {
-  Elf64_Word n_namesz; /* Name size */
-  Elf64_Word n_descsz; /* Content size */
-  Elf64_Word n_type;   /* Content type */
+  Elf64_Word n_namesz;  /* Name size */
+  Elf64_Word n_descsz;  /* Content size */
+  Elf64_Word n_type;    /* Content type */
 } Elf64_Nhdr;
 
 
@@ -1728,13 +1764,13 @@ struct elf32_fdpic_loadmap {
 #ifdef ELF_CLASS
 #if ELF_CLASS == ELFCLASS32
 
-#define elfhdr         elf32_hdr
-#define elf_phdr       elf32_phdr
-#define elf_note       elf32_note
-#define elf_shdr       elf32_shdr
-#define elf_sym                elf32_sym
-#define elf_addr_t     Elf32_Off
-#define elf_rela  elf32_rela
+#define elfhdr      elf32_hdr
+#define elf_phdr    elf32_phdr
+#define elf_note    elf32_note
+#define elf_shdr    elf32_shdr
+#define elf_sym     elf32_sym
+#define elf_addr_t  Elf32_Off
+#define elf_rela    elf32_rela
 
 #ifdef ELF_USES_RELOCA
 # define ELF_RELOC      Elf32_Rela
@@ -1744,13 +1780,13 @@ struct elf32_fdpic_loadmap {
 
 #else
 
-#define elfhdr         elf64_hdr
-#define elf_phdr       elf64_phdr
-#define elf_note       elf64_note
-#define elf_shdr       elf64_shdr
-#define elf_sym                elf64_sym
-#define elf_addr_t     Elf64_Off
-#define elf_rela  elf64_rela
+#define elfhdr      elf64_hdr
+#define elf_phdr    elf64_phdr
+#define elf_note    elf64_note
+#define elf_shdr    elf64_shdr
+#define elf_sym     elf64_sym
+#define elf_addr_t  Elf64_Off
+#define elf_rela    elf64_rela
 
 #ifdef ELF_USES_RELOCA
 # define ELF_RELOC      Elf64_Rela
index db8bfa9a92ba65d929b2da38010f8e8e7f4b7a62..0d0aa61d68966a56e65e35cc6f38687b7e16816f 100644 (file)
@@ -19,8 +19,6 @@
  * you're one of them.
  */
 
-#include "exec/memory.h"
-
 #ifndef CONFIG_USER_ONLY
 
 /* Get the root memory region.  This interface should only be used temporarily
diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h
new file mode 100644 (file)
index 0000000..ba2dd4b
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * QEMU Confidential Guest support
+ *   This interface describes the common pieces between various
+ *   schemes for protecting guest memory or other state against a
+ *   compromised hypervisor.  This includes memory encryption (AMD's
+ *   SEV and Intel's MKTME) or special protection modes (PEF on POWER,
+ *   or PV on s390x).
+ *
+ * Copyright Red Hat.
+ *
+ * Authors:
+ *  David Gibson <david@gibson.dropbear.id.au>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_CONFIDENTIAL_GUEST_SUPPORT_H
+#define QEMU_CONFIDENTIAL_GUEST_SUPPORT_H
+
+#ifndef CONFIG_USER_ONLY
+
+#include "qom/object.h"
+
+#define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support"
+OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT)
+
+struct ConfidentialGuestSupport {
+    Object parent;
+
+    /*
+     * ready: flag set by CGS initialization code once it's ready to
+     *        start executing instructions in a potentially-secure
+     *        guest
+     *
+     * The definition here is a bit fuzzy, because this is essentially
+     * part of a self-sanity-check, rather than a strict mechanism.
+     *
+     * It's not feasible to have a single point in the common machine
+     * init path to configure confidential guest support, because
+     * different mechanisms have different interdependencies requiring
+     * initialization in different places, often in arch or machine
+     * type specific code.  It's also usually not possible to check
+     * for invalid configurations until that initialization code.
+     * That means it would be very easy to have a bug allowing CGS
+     * init to be bypassed entirely in certain configurations.
+     *
+     * Silently ignoring a requested security feature would be bad, so
+     * to avoid that we check late in init that this 'ready' flag is
+     * set if CGS was requested.  If the CGS init hasn't happened, and
+     * so 'ready' is not set, we'll abort.
+     */
+    bool ready;
+};
+
+typedef struct ConfidentialGuestSupportClass {
+    ObjectClass parent;
+} ConfidentialGuestSupportClass;
+
+#endif /* !CONFIG_USER_ONLY */
+
+#endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */
index 4b5408c34158c073c3016115a6e42f3c20078fe0..5340907cfd051c21c3cf08006b7c91bac695ff8f 100644 (file)
 
 #include "exec/cpu-common.h"
 #include "exec/memory.h"
+#include "exec/tswap.h"
 #include "qemu/thread.h"
 #include "hw/core/cpu.h"
 #include "qemu/rcu.h"
 
-#define EXCP_INTERRUPT         0x10000 /* async interruption */
-#define EXCP_HLT        0x10001 /* hlt instruction reached */
-#define EXCP_DEBUG      0x10002 /* cpu stopped after a breakpoint or singlestep */
-#define EXCP_HALTED     0x10003 /* cpu is halted (waiting for external event) */
-#define EXCP_YIELD      0x10004 /* cpu wants to yield timeslice to another */
-#define EXCP_ATOMIC     0x10005 /* stop-the-world and emulate atomic */
-
 /* some important defines:
  *
- * HOST_WORDS_BIGENDIAN : if defined, the host cpu is big endian and
+ * HOST_BIG_ENDIAN : whether the host cpu is big endian and
  * otherwise little endian.
  *
- * TARGET_WORDS_BIGENDIAN : same for target cpu
+ * TARGET_BIG_ENDIAN : same for the target cpu
  */
 
-#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
 #define BSWAP_NEEDED
 #endif
 
-#ifdef BSWAP_NEEDED
-
-static inline uint16_t tswap16(uint16_t s)
-{
-    return bswap16(s);
-}
-
-static inline uint32_t tswap32(uint32_t s)
-{
-    return bswap32(s);
-}
-
-static inline uint64_t tswap64(uint64_t s)
-{
-    return bswap64(s);
-}
-
-static inline void tswap16s(uint16_t *s)
-{
-    *s = bswap16(*s);
-}
-
-static inline void tswap32s(uint32_t *s)
-{
-    *s = bswap32(*s);
-}
-
-static inline void tswap64s(uint64_t *s)
-{
-    *s = bswap64(*s);
-}
-
-#else
-
-static inline uint16_t tswap16(uint16_t s)
-{
-    return s;
-}
-
-static inline uint32_t tswap32(uint32_t s)
-{
-    return s;
-}
-
-static inline uint64_t tswap64(uint64_t s)
-{
-    return s;
-}
-
-static inline void tswap16s(uint16_t *s)
-{
-}
-
-static inline void tswap32s(uint32_t *s)
-{
-}
-
-static inline void tswap64s(uint64_t *s)
-{
-}
-
-#endif
-
 #if TARGET_LONG_SIZE == 4
 #define tswapl(s) tswap32(s)
 #define tswapls(s) tswap32s((uint32_t *)(s))
@@ -120,18 +51,14 @@ static inline void tswap64s(uint64_t *s)
 /* Target-endianness CPU memory access functions. These fit into the
  * {ld,st}{type}{sign}{size}{endian}_p naming scheme described in bswap.h.
  */
-#if defined(TARGET_WORDS_BIGENDIAN)
+#if TARGET_BIG_ENDIAN
 #define lduw_p(p) lduw_be_p(p)
 #define ldsw_p(p) ldsw_be_p(p)
 #define ldl_p(p) ldl_be_p(p)
 #define ldq_p(p) ldq_be_p(p)
-#define ldfl_p(p) ldfl_be_p(p)
-#define ldfq_p(p) ldfq_be_p(p)
 #define stw_p(p, v) stw_be_p(p, v)
 #define stl_p(p, v) stl_be_p(p, v)
 #define stq_p(p, v) stq_be_p(p, v)
-#define stfl_p(p, v) stfl_be_p(p, v)
-#define stfq_p(p, v) stfq_be_p(p, v)
 #define ldn_p(p, sz) ldn_be_p(p, sz)
 #define stn_p(p, sz, v) stn_be_p(p, sz, v)
 #else
@@ -139,13 +66,9 @@ static inline void tswap64s(uint64_t *s)
 #define ldsw_p(p) ldsw_le_p(p)
 #define ldl_p(p) ldl_le_p(p)
 #define ldq_p(p) ldq_le_p(p)
-#define ldfl_p(p) ldfl_le_p(p)
-#define ldfq_p(p) ldfq_le_p(p)
 #define stw_p(p, v) stw_le_p(p, v)
 #define stl_p(p, v) stl_le_p(p, v)
 #define stq_p(p, v) stq_le_p(p, v)
-#define stfl_p(p, v) stfl_le_p(p, v)
-#define stfq_p(p, v) stfq_le_p(p, v)
 #define ldn_p(p, sz) ldn_le_p(p, sz)
 #define stn_p(p, sz, v) stn_le_p(p, sz, v)
 #endif
@@ -154,12 +77,18 @@ static inline void tswap64s(uint64_t *s)
 
 #if defined(CONFIG_USER_ONLY)
 #include "exec/user/abitypes.h"
+#include "exec/user/guest-base.h"
 
-/* On some host systems the guest address space is reserved on the host.
- * This allows the guest address space to be offset to a convenient location.
- */
-extern unsigned long guest_base;
 extern bool have_guest_base;
+
+/*
+ * If non-zero, the guest virtual address space is a contiguous subset
+ * of the host virtual address space, i.e. '-R reserved_va' is in effect
+ * either from the command-line or by default.  The value is the last
+ * byte of the guest address space e.g. UINT32_MAX.
+ *
+ * If zero, the host and guest virtual address spaces are intermingled.
+ */
 extern unsigned long reserved_va;
 
 /*
@@ -179,7 +108,7 @@ extern unsigned long reserved_va;
 #define GUEST_ADDR_MAX_                                                 \
     ((MIN_CONST(TARGET_VIRT_ADDR_SPACE_BITS, TARGET_ABI_BITS) <= 32) ?  \
      UINT32_MAX : ~0ul)
-#define GUEST_ADDR_MAX    (reserved_va ? reserved_va - 1 : GUEST_ADDR_MAX_)
+#define GUEST_ADDR_MAX    (reserved_va ? : GUEST_ADDR_MAX_)
 
 #else
 
@@ -223,22 +152,15 @@ static inline void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val
 /* page related stuff */
 
 #ifdef TARGET_PAGE_BITS_VARY
-typedef struct {
-    bool decided;
-    int bits;
-    target_long mask;
-} TargetPageBits;
-#if defined(CONFIG_ATTRIBUTE_ALIAS) || !defined(IN_EXEC_VARY)
+# include "exec/page-vary.h"
 extern const TargetPageBits target_page;
-#else
-extern TargetPageBits target_page;
-#endif
 #ifdef CONFIG_DEBUG_TCG
 #define TARGET_PAGE_BITS   ({ assert(target_page.decided); target_page.bits; })
-#define TARGET_PAGE_MASK   ({ assert(target_page.decided); target_page.mask; })
+#define TARGET_PAGE_MASK   ({ assert(target_page.decided); \
+                              (target_long)target_page.mask; })
 #else
 #define TARGET_PAGE_BITS   target_page.bits
-#define TARGET_PAGE_MASK   target_page.mask
+#define TARGET_PAGE_MASK   ((target_long)target_page.mask)
 #endif
 #define TARGET_PAGE_SIZE   (-(int)TARGET_PAGE_MASK)
 #else
@@ -249,33 +171,39 @@ extern TargetPageBits target_page;
 
 #define TARGET_PAGE_ALIGN(addr) ROUND_UP((addr), TARGET_PAGE_SIZE)
 
-/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even
- * when intptr_t is 32-bit and we are aligning a long long.
- */
-extern uintptr_t qemu_host_page_size;
-extern intptr_t qemu_host_page_mask;
-
-#define HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_host_page_size)
-#define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size)
-
 /* same as PROT_xxx */
 #define PAGE_READ      0x0001
 #define PAGE_WRITE     0x0002
 #define PAGE_EXEC      0x0004
 #define PAGE_BITS      (PAGE_READ | PAGE_WRITE | PAGE_EXEC)
 #define PAGE_VALID     0x0008
-/* original state of the write flag (used when tracking self-modifying
-   code */
+/*
+ * Original state of the write flag (used when tracking self-modifying code)
+ */
 #define PAGE_WRITE_ORG 0x0010
-/* Invalidate the TLB entry immediately, helpful for s390x
- * Low-Address-Protection. Used with PAGE_WRITE in tlb_set_page_with_attrs() */
-#define PAGE_WRITE_INV 0x0040
+/*
+ * Invalidate the TLB entry immediately, helpful for s390x
+ * Low-Address-Protection. Used with PAGE_WRITE in tlb_set_page_with_attrs()
+ */
+#define PAGE_WRITE_INV 0x0020
+/* For use with page_set_flags: page is being replaced; target_data cleared. */
+#define PAGE_RESET     0x0040
+/* For linux-user, indicates that the page is MAP_ANON. */
+#define PAGE_ANON      0x0080
+
 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
 /* FIXME: Code that sets/uses this is broken and needs to go away.  */
-#define PAGE_RESERVED  0x0020
+#define PAGE_RESERVED  0x0100
 #endif
 /* Target-specific bits that will be used via page_get_flags().  */
-#define PAGE_TARGET_1  0x0080
+#define PAGE_TARGET_1  0x0200
+#define PAGE_TARGET_2  0x0400
+
+/*
+ * For linux-user, indicates that the page is mapped with the same semantics
+ * in both guest and host.
+ */
+#define PAGE_PASSTHROUGH 0x0800
 
 #if defined(CONFIG_USER_ONLY)
 void page_dump(FILE *f);
@@ -285,8 +213,61 @@ typedef int (*walk_memory_regions_fn)(void *, target_ulong,
 int walk_memory_regions(void *, walk_memory_regions_fn);
 
 int page_get_flags(target_ulong address);
-void page_set_flags(target_ulong start, target_ulong end, int flags);
-int page_check_range(target_ulong start, target_ulong len, int flags);
+void page_set_flags(target_ulong start, target_ulong last, int flags);
+void page_reset_target_data(target_ulong start, target_ulong last);
+
+/**
+ * page_check_range
+ * @start: first byte of range
+ * @len: length of range
+ * @flags: flags required for each page
+ *
+ * Return true if every page in [@start, @start+@len) has @flags set.
+ * Return false if any page is unmapped.  Thus testing flags == 0 is
+ * equivalent to testing for flags == PAGE_VALID.
+ */
+bool page_check_range(target_ulong start, target_ulong last, int flags);
+
+/**
+ * page_check_range_empty:
+ * @start: first byte of range
+ * @last: last byte of range
+ * Context: holding mmap lock
+ *
+ * Return true if the entire range [@start, @last] is unmapped.
+ * The memory lock must be held so that the caller will can ensure
+ * the result stays true until a new mapping can be installed.
+ */
+bool page_check_range_empty(target_ulong start, target_ulong last);
+
+/**
+ * page_find_range_empty
+ * @min: first byte of search range
+ * @max: last byte of search range
+ * @len: size of the hole required
+ * @align: alignment of the hole required (power of 2)
+ *
+ * If there is a range [x, x+@len) within [@min, @max] such that
+ * x % @align == 0, then return x.  Otherwise return -1.
+ * The memory lock must be held, as the caller will want to ensure
+ * the returned range stays empty until a new mapping can be installed.
+ */
+target_ulong page_find_range_empty(target_ulong min, target_ulong max,
+                                   target_ulong len, target_ulong align);
+
+/**
+ * page_get_target_data(address)
+ * @address: guest virtual address
+ *
+ * Return TARGET_PAGE_DATA_SIZE bytes of out-of-band data to associate
+ * with the guest page at @address, allocating it if necessary.  The
+ * caller should already have verified that the address is valid.
+ *
+ * The memory will be freed when the guest page is deallocated,
+ * e.g. with the munmap system call.
+ */
+void *page_get_target_data(target_ulong address)
+    __attribute__((returns_nonnull));
 #endif
 
 CPUArchState *cpu_copy(CPUArchState *env);
@@ -351,7 +332,7 @@ CPUArchState *cpu_copy(CPUArchState *env);
  * be signaled by probe_access_flags().
  */
 #define TLB_INVALID_MASK    (1 << (TARGET_PAGE_BITS_MIN - 1))
-#define TLB_MMIO            0
+#define TLB_MMIO            (1 << (TARGET_PAGE_BITS_MIN - 2))
 #define TLB_WATCHPOINT      0
 
 #else
@@ -364,6 +345,9 @@ CPUArchState *cpu_copy(CPUArchState *env);
  *
  * Use TARGET_PAGE_BITS_MIN so that these bits are constant
  * when TARGET_PAGE_BITS_VARY is in effect.
+ *
+ * The count, if not the placement of these bits is known
+ * to tcg/tcg-op-ldst.c, check_max_alignment().
  */
 /* Zero if TLB entry is valid.  */
 #define TLB_INVALID_MASK    (1 << (TARGET_PAGE_BITS_MIN - 1))
@@ -372,19 +356,32 @@ CPUArchState *cpu_copy(CPUArchState *env);
 #define TLB_NOTDIRTY        (1 << (TARGET_PAGE_BITS_MIN - 2))
 /* Set if TLB entry is an IO callback.  */
 #define TLB_MMIO            (1 << (TARGET_PAGE_BITS_MIN - 3))
-/* Set if TLB entry contains a watchpoint.  */
-#define TLB_WATCHPOINT      (1 << (TARGET_PAGE_BITS_MIN - 4))
-/* Set if TLB entry requires byte swap.  */
-#define TLB_BSWAP           (1 << (TARGET_PAGE_BITS_MIN - 5))
 /* Set if TLB entry writes ignored.  */
-#define TLB_DISCARD_WRITE   (1 << (TARGET_PAGE_BITS_MIN - 6))
+#define TLB_DISCARD_WRITE   (1 << (TARGET_PAGE_BITS_MIN - 4))
+/* Set if the slow path must be used; more flags in CPUTLBEntryFull. */
+#define TLB_FORCE_SLOW      (1 << (TARGET_PAGE_BITS_MIN - 5))
 
-/* Use this mask to check interception with an alignment mask
+/*
+ * Use this mask to check interception with an alignment mask
  * in a TCG backend.
  */
 #define TLB_FLAGS_MASK \
     (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
-    | TLB_WATCHPOINT | TLB_BSWAP | TLB_DISCARD_WRITE)
+    | TLB_FORCE_SLOW | TLB_DISCARD_WRITE)
+
+/*
+ * Flags stored in CPUTLBEntryFull.slow_flags[x].
+ * TLB_FORCE_SLOW must be set in CPUTLBEntry.addr_idx[x].
+ */
+/* Set if TLB entry requires byte swap.  */
+#define TLB_BSWAP            (1 << 0)
+/* Set if TLB entry contains a watchpoint.  */
+#define TLB_WATCHPOINT       (1 << 1)
+
+#define TLB_SLOW_FLAGS_MASK  (TLB_BSWAP | TLB_WATCHPOINT)
+
+/* The two sets of flags must not overlap. */
+QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK);
 
 /**
  * tlb_hit_page: return true if page aligned @addr is a hit against the
@@ -393,7 +390,7 @@ CPUArchState *cpu_copy(CPUArchState *env);
  * @addr: virtual address to test (must be page aligned)
  * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
  */
-static inline bool tlb_hit_page(target_ulong tlb_addr, target_ulong addr)
+static inline bool tlb_hit_page(uint64_t tlb_addr, vaddr addr)
 {
     return addr == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK));
 }
@@ -404,36 +401,19 @@ static inline bool tlb_hit_page(target_ulong tlb_addr, target_ulong addr)
  * @addr: virtual address to test (need not be page aligned)
  * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
  */
-static inline bool tlb_hit(target_ulong tlb_addr, target_ulong addr)
+static inline bool tlb_hit(uint64_t tlb_addr, vaddr addr)
 {
     return tlb_hit_page(tlb_addr, addr & TARGET_PAGE_MASK);
 }
 
-#ifdef CONFIG_TCG
-void dump_drift_info(void);
-void dump_exec_info(void);
-void dump_opcount_info(void);
-#endif /* CONFIG_TCG */
-
 #endif /* !CONFIG_USER_ONLY */
 
-/* Returns: 0 on success, -1 on error */
-int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
-                        void *ptr, target_ulong len, bool is_write);
-
+/* accel/tcg/cpu-exec.c */
 int cpu_exec(CPUState *cpu);
 
-/**
- * cpu_set_cpustate_pointers(cpu)
- * @cpu: The cpu object
- *
- * Set the generic pointers in CPUState into the outer object.
- */
-static inline void cpu_set_cpustate_pointers(ArchCPU *cpu)
-{
-    cpu->parent_obj.env_ptr = &cpu->env;
-    cpu->parent_obj.icount_decr_ptr = &cpu->neg.icount_decr;
-}
+/* Validate correct placement of CPUArchState. */
+QEMU_BUILD_BUG_ON(offsetof(ArchCPU, parent_obj) != 0);
+QEMU_BUILD_BUG_ON(offsetof(ArchCPU, env) != sizeof(CPUState));
 
 /**
  * env_archcpu(env)
@@ -443,7 +423,7 @@ static inline void cpu_set_cpustate_pointers(ArchCPU *cpu)
  */
 static inline ArchCPU *env_archcpu(CPUArchState *env)
 {
-    return container_of(env, ArchCPU, env);
+    return (void *)env - sizeof(CPUState);
 }
 
 /**
@@ -454,42 +434,7 @@ static inline ArchCPU *env_archcpu(CPUArchState *env)
  */
 static inline CPUState *env_cpu(CPUArchState *env)
 {
-    return &env_archcpu(env)->parent_obj;
-}
-
-/**
- * env_neg(env)
- * @env: The architecture environment
- *
- * Return the CPUNegativeOffsetState associated with the environment.
- */
-static inline CPUNegativeOffsetState *env_neg(CPUArchState *env)
-{
-    ArchCPU *arch_cpu = container_of(env, ArchCPU, env);
-    return &arch_cpu->neg;
-}
-
-/**
- * cpu_neg(cpu)
- * @cpu: The generic CPUState
- *
- * Return the CPUNegativeOffsetState associated with the cpu.
- */
-static inline CPUNegativeOffsetState *cpu_neg(CPUState *cpu)
-{
-    ArchCPU *arch_cpu = container_of(cpu, ArchCPU, parent_obj);
-    return &arch_cpu->neg;
-}
-
-/**
- * env_tlb(env)
- * @env: The architecture environment
- *
- * Return the CPUTLB state associated with the environment.
- */
-static inline CPUTLB *env_tlb(CPUArchState *env)
-{
-    return &env_neg(env)->tlb;
+    return (void *)env - sizeof(CPUState);
 }
 
 #endif /* CPU_ALL_H */
index 19805ed6db2ea6b697863be3ba6e157e22b0465e..41115d8919407109d83b9e633d0aacfa18dd03ac 100644 (file)
@@ -7,12 +7,43 @@
 #include "exec/hwaddr.h"
 #endif
 
+#define EXCP_INTERRUPT  0x10000 /* async interruption */
+#define EXCP_HLT        0x10001 /* hlt instruction reached */
+#define EXCP_DEBUG      0x10002 /* cpu stopped after a breakpoint or singlestep */
+#define EXCP_HALTED     0x10003 /* cpu is halted (waiting for external event) */
+#define EXCP_YIELD      0x10004 /* cpu wants to yield timeslice to another */
+#define EXCP_ATOMIC     0x10005 /* stop-the-world and emulate atomic */
+
+/**
+ * vaddr:
+ * Type wide enough to contain any #target_ulong virtual address.
+ */
+typedef uint64_t vaddr;
+#define VADDR_PRId PRId64
+#define VADDR_PRIu PRIu64
+#define VADDR_PRIo PRIo64
+#define VADDR_PRIx PRIx64
+#define VADDR_PRIX PRIX64
+#define VADDR_MAX UINT64_MAX
+
+void cpu_exec_init_all(void);
+void cpu_exec_step_atomic(CPUState *cpu);
+
+/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even
+ * when intptr_t is 32-bit and we are aligning a long long.
+ */
+extern uintptr_t qemu_host_page_size;
+extern intptr_t qemu_host_page_mask;
+
+#define HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_host_page_size)
+#define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size())
+
 /* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */
+extern QemuMutex qemu_cpu_list_lock;
 void qemu_init_cpu_list(void);
 void cpu_list_lock(void);
 void cpu_list_unlock(void);
-
-void tcg_flush_softmmu_tlb(CPUState *cs);
+unsigned int cpu_list_generation_id_get(void);
 
 void tcg_iommu_init_notifier_list(CPUState *cpu);
 void tcg_iommu_free_notifier_list(CPUState *cpu);
@@ -25,7 +56,7 @@ enum device_endian {
     DEVICE_LITTLE_ENDIAN,
 };
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define DEVICE_HOST_ENDIAN DEVICE_BIG_ENDIAN
 #else
 #define DEVICE_HOST_ENDIAN DEVICE_LITTLE_ENDIAN
@@ -42,14 +73,28 @@ typedef uintptr_t ram_addr_t;
 #  define RAM_ADDR_FMT "%" PRIxPTR
 #endif
 
-extern ram_addr_t ram_size;
-
 /* memory API */
 
 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 /* This should not be used by devices.  */
 ram_addr_t qemu_ram_addr_from_host(void *ptr);
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
 RAMBlock *qemu_ram_block_by_name(const char *name);
+
+/*
+ * Translates a host ptr back to a RAMBlock and an offset in that RAMBlock.
+ *
+ * @ptr: The host pointer to translate.
+ * @round_offset: Whether to round the result offset down to a target page
+ * @offset: Will be set to the offset within the returned RAMBlock.
+ *
+ * Returns: RAMBlock (or NULL if not found)
+ *
+ * By the time this function returns, the returned pointer is not protected
+ * by RCU anymore.  If the caller is not within an RCU critical section and
+ * does not hold the iothread lock, it must have other means of protecting the
+ * pointer, such as a reference to the memory region that owns the RAMBlock.
+ */
 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
                                    ram_addr_t *offset);
 ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host);
@@ -59,16 +104,42 @@ const char *qemu_ram_get_idstr(RAMBlock *rb);
 void *qemu_ram_get_host_addr(RAMBlock *rb);
 ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
 ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
+ram_addr_t qemu_ram_get_max_length(RAMBlock *rb);
 bool qemu_ram_is_shared(RAMBlock *rb);
+bool qemu_ram_is_noreserve(RAMBlock *rb);
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
 bool qemu_ram_is_migratable(RAMBlock *rb);
 void qemu_ram_set_migratable(RAMBlock *rb);
 void qemu_ram_unset_migratable(RAMBlock *rb);
+bool qemu_ram_is_named_file(RAMBlock *rb);
+int qemu_ram_get_fd(RAMBlock *rb);
 
 size_t qemu_ram_pagesize(RAMBlock *block);
 size_t qemu_ram_pagesize_largest(void);
 
+/**
+ * cpu_address_space_init:
+ * @cpu: CPU to add this address space to
+ * @asidx: integer index of this address space
+ * @prefix: prefix to be used as name of address space
+ * @mr: the root memory region of address space
+ *
+ * Add the specified address space to the CPU's cpu_ases list.
+ * The address space added with @asidx 0 is the one used for the
+ * convenience pointer cpu->as.
+ * The target-specific code which registers ASes is responsible
+ * for defining what semantics address space 0, 1, 2, etc have.
+ *
+ * Before the first call to this function, the caller must set
+ * cpu->num_ases to the total number of address spaces it needs
+ * to support.
+ *
+ * Note that with KVM only one address space is supported.
+ */
+void cpu_address_space_init(CPUState *cpu, int asidx,
+                            const char *prefix, MemoryRegion *mr);
+
 void cpu_physical_memory_rw(hwaddr addr, void *buf,
                             hwaddr len, bool is_write);
 static inline void cpu_physical_memory_read(hwaddr addr,
@@ -107,4 +178,43 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
 
 #endif
 
+/* Returns: 0 on success, -1 on error */
+int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
+                        void *ptr, size_t len, bool is_write);
+
+/* vl.c */
+void list_cpus(void);
+
+#ifdef CONFIG_TCG
+/**
+ * cpu_unwind_state_data:
+ * @cpu: the cpu context
+ * @host_pc: the host pc within the translation
+ * @data: output data
+ *
+ * Attempt to load the the unwind state for a host pc occurring in
+ * translated code.  If @host_pc is not in translated code, the
+ * function returns false; otherwise @data is loaded.
+ * This is the same unwind info as given to restore_state_to_opc.
+ */
+bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data);
+
+/**
+ * cpu_restore_state:
+ * @cpu: the cpu context
+ * @host_pc: the host pc within the translation
+ * @return: true if state was restored, false otherwise
+ *
+ * Attempt to restore the state for a fault occurring in translated
+ * code. If @host_pc is not in translated code no state is
+ * restored and the function returns false.
+ */
+bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc);
+
+G_NORETURN void cpu_loop_exit_noexc(CPUState *cpu);
+G_NORETURN void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
+#endif /* CONFIG_TCG */
+G_NORETURN void cpu_loop_exit(CPUState *cpu);
+G_NORETURN void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
+
 #endif /* CPU_COMMON_H */
index d1f5e3fc3d874dca3738766b4eecd63aecfb3091..3915438b835919bca649089dd0fedea03c9452b6 100644 (file)
@@ -25,9 +25,6 @@
 
 #include "qemu/host-utils.h"
 #include "qemu/thread.h"
-#ifdef CONFIG_TCG
-#include "tcg-target.h"
-#endif
 #ifndef CONFIG_USER_ONLY
 #include "exec/hwaddr.h"
 #endif
@@ -39,9 +36,6 @@
 #ifndef TARGET_LONG_BITS
 # error TARGET_LONG_BITS must be defined in cpu-param.h
 #endif
-#ifndef NB_MMU_MODES
-# error NB_MMU_MODES must be defined in cpu-param.h
-#endif
 #ifndef TARGET_PHYS_ADDR_SPACE_BITS
 # error TARGET_PHYS_ADDR_SPACE_BITS must be defined in cpu-param.h
 #endif
 # endif
 #endif
 
-#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
-
-/* target_ulong is the type of a virtual address */
-#if TARGET_LONG_SIZE == 4
-typedef int32_t target_long;
-typedef uint32_t target_ulong;
-#define TARGET_FMT_lx "%08x"
-#define TARGET_FMT_ld "%d"
-#define TARGET_FMT_lu "%u"
-#elif TARGET_LONG_SIZE == 8
-typedef int64_t target_long;
-typedef uint64_t target_ulong;
-#define TARGET_FMT_lx "%016" PRIx64
-#define TARGET_FMT_ld "%" PRId64
-#define TARGET_FMT_lu "%" PRIu64
-#else
-#error TARGET_LONG_SIZE undefined
-#endif
-
-#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
-
-/* use a fully associative victim tlb of 8 entries */
-#define CPU_VTLB_SIZE 8
-
-#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
-#define CPU_TLB_ENTRY_BITS 4
-#else
-#define CPU_TLB_ENTRY_BITS 5
-#endif
+#include "exec/target_long.h"
 
+#if defined(CONFIG_SOFTMMU) && defined(CONFIG_TCG)
 #define CPU_TLB_DYN_MIN_BITS 6
 #define CPU_TLB_DYN_DEFAULT_BITS 8
 
@@ -111,138 +78,6 @@ typedef uint64_t target_ulong;
 #  endif
 # endif
 
-typedef struct CPUTLBEntry {
-    /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
-       bit TARGET_PAGE_BITS-1..4  : Nonzero for accesses that should not
-                                    go directly to ram.
-       bit 3                      : indicates that the entry is invalid
-       bit 2..0                   : zero
-    */
-    union {
-        struct {
-            target_ulong addr_read;
-            target_ulong addr_write;
-            target_ulong addr_code;
-            /* Addend to virtual address to get host address.  IO accesses
-               use the corresponding iotlb value.  */
-            uintptr_t addend;
-        };
-        /* padding to get a power of two size */
-        uint8_t dummy[1 << CPU_TLB_ENTRY_BITS];
-    };
-} CPUTLBEntry;
-
-QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
-
-/* The IOTLB is not accessed directly inline by generated TCG code,
- * so the CPUIOTLBEntry layout is not as critical as that of the
- * CPUTLBEntry. (This is also why we don't want to combine the two
- * structs into one.)
- */
-typedef struct CPUIOTLBEntry {
-    /*
-     * @addr contains:
-     *  - in the lower TARGET_PAGE_BITS, a physical section number
-     *  - with the lower TARGET_PAGE_BITS masked off, an offset which
-     *    must be added to the virtual address to obtain:
-     *     + the ram_addr_t of the target RAM (if the physical section
-     *       number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
-     *     + the offset within the target MemoryRegion (otherwise)
-     */
-    hwaddr addr;
-    MemTxAttrs attrs;
-} CPUIOTLBEntry;
-
-/*
- * Data elements that are per MMU mode, minus the bits accessed by
- * the TCG fast path.
- */
-typedef struct CPUTLBDesc {
-    /*
-     * Describe a region covering all of the large pages allocated
-     * into the tlb.  When any page within this region is flushed,
-     * we must flush the entire tlb.  The region is matched if
-     * (addr & large_page_mask) == large_page_addr.
-     */
-    target_ulong large_page_addr;
-    target_ulong large_page_mask;
-    /* host time (in ns) at the beginning of the time window */
-    int64_t window_begin_ns;
-    /* maximum number of entries observed in the window */
-    size_t window_max_entries;
-    size_t n_used_entries;
-    /* The next index to use in the tlb victim table.  */
-    size_t vindex;
-    /* The tlb victim table, in two parts.  */
-    CPUTLBEntry vtable[CPU_VTLB_SIZE];
-    CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
-    /* The iotlb.  */
-    CPUIOTLBEntry *iotlb;
-} CPUTLBDesc;
-
-/*
- * Data elements that are per MMU mode, accessed by the fast path.
- * The structure is aligned to aid loading the pair with one insn.
- */
-typedef struct CPUTLBDescFast {
-    /* Contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */
-    uintptr_t mask;
-    /* The array of tlb entries itself. */
-    CPUTLBEntry *table;
-} CPUTLBDescFast QEMU_ALIGNED(2 * sizeof(void *));
-
-/*
- * Data elements that are shared between all MMU modes.
- */
-typedef struct CPUTLBCommon {
-    /* Serialize updates to f.table and d.vtable, and others as noted. */
-    QemuSpin lock;
-    /*
-     * Within dirty, for each bit N, modifications have been made to
-     * mmu_idx N since the last time that mmu_idx was flushed.
-     * Protected by tlb_c.lock.
-     */
-    uint16_t dirty;
-    /*
-     * Statistics.  These are not lock protected, but are read and
-     * written atomically.  This allows the monitor to print a snapshot
-     * of the stats without interfering with the cpu.
-     */
-    size_t full_flush_count;
-    size_t part_flush_count;
-    size_t elide_flush_count;
-} CPUTLBCommon;
-
-/*
- * The entire softmmu tlb, for all MMU modes.
- * The meaning of each of the MMU modes is defined in the target code.
- * Since this is placed within CPUNegativeOffsetState, the smallest
- * negative offsets are at the end of the struct.
- */
-
-typedef struct CPUTLB {
-    CPUTLBCommon c;
-    CPUTLBDesc d[NB_MMU_MODES];
-    CPUTLBDescFast f[NB_MMU_MODES];
-} CPUTLB;
-
-/* This will be used by TCG backends to compute offsets.  */
-#define TLB_MASK_TABLE_OFS(IDX) \
-    ((int)offsetof(ArchCPU, neg.tlb.f[IDX]) - (int)offsetof(ArchCPU, env))
-
-#else
-
-typedef struct CPUTLB { } CPUTLB;
-
-#endif  /* !CONFIG_USER_ONLY && CONFIG_TCG */
-
-/*
- * This structure must be placed in ArchCPU immediately
- * before CPUArchState, as a field named "neg".
- */
-typedef struct CPUNegativeOffsetState {
-    CPUTLB tlb;
-    IcountDecr icount_decr;
-} CPUNegativeOffsetState;
+#endif /* CONFIG_SOFTMMU && CONFIG_TCG */
 
 #endif
index ef54cb7e1f87fa03eb58b136f17c41d76cc1f5f3..6061e33ac95cfa6964a2c7f844afe849c3e43766 100644 (file)
  * load:  cpu_ld{sign}{size}{end}_{mmusuffix}(env, ptr)
  *        cpu_ld{sign}{size}{end}_{mmusuffix}_ra(env, ptr, retaddr)
  *        cpu_ld{sign}{size}{end}_mmuidx_ra(env, ptr, mmu_idx, retaddr)
+ *        cpu_ld{sign}{size}{end}_mmu(env, ptr, oi, retaddr)
  *
  * store: cpu_st{size}{end}_{mmusuffix}(env, ptr, val)
  *        cpu_st{size}{end}_{mmusuffix}_ra(env, ptr, val, retaddr)
  *        cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr)
+ *        cpu_st{size}{end}_mmu(env, ptr, val, oi, retaddr)
  *
  * sign is:
  * (empty): for 32 and 64 bit sizes
  * The "mmuidx" suffix carries an extra mmu_idx argument that specifies
  * the index to use; the "data" and "code" suffixes take the index from
  * cpu_mmu_index().
+ *
+ * The "mmu" suffix carries the full MemOpIdx, with both mmu_idx and the
+ * MemOp including alignment requirements.  The alignment will be enforced.
  */
 #ifndef CPU_LDST_H
 #define CPU_LDST_H
 
+#include "exec/memopidx.h"
+#include "qemu/int128.h"
+#include "cpu.h"
+
 #if defined(CONFIG_USER_ONLY)
 /* sparc32plus has 64bit long but 32bit space address
  * this can make bad result with g2h() and h2g()
@@ -69,23 +78,40 @@ typedef uint64_t abi_ptr;
 #define TARGET_ABI_FMT_ptr "%"PRIx64
 #endif
 
+#ifndef TARGET_TAGGED_ADDRESSES
+static inline abi_ptr cpu_untagged_addr(CPUState *cs, abi_ptr x)
+{
+    return x;
+}
+#endif
+
 /* All direct uses of g2h and h2g need to go away for usermode softmmu.  */
-#define g2h(x) ((void *)((unsigned long)(abi_ptr)(x) + guest_base))
+static inline void *g2h_untagged(abi_ptr x)
+{
+    return (void *)((uintptr_t)(x) + guest_base);
+}
 
-#if HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS
-#define guest_addr_valid(x) (1)
-#else
-#define guest_addr_valid(x) ((x) <= GUEST_ADDR_MAX)
-#endif
-#define h2g_valid(x) guest_addr_valid((unsigned long)(x) - guest_base)
+static inline void *g2h(CPUState *cs, abi_ptr x)
+{
+    return g2h_untagged(cpu_untagged_addr(cs, x));
+}
 
-static inline int guest_range_valid(unsigned long start, unsigned long len)
+static inline bool guest_addr_valid_untagged(abi_ulong x)
+{
+    return x <= GUEST_ADDR_MAX;
+}
+
+static inline bool guest_range_valid_untagged(abi_ulong start, abi_ulong len)
 {
     return len - 1 <= GUEST_ADDR_MAX && start <= GUEST_ADDR_MAX - len + 1;
 }
 
+#define h2g_valid(x) \
+    (HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS || \
+     (uintptr_t)(x) - guest_base <= GUEST_ADDR_MAX)
+
 #define h2g_nocheck(x) ({ \
-    unsigned long __ret = (unsigned long)(x) - guest_base; \
+    uintptr_t __ret = (uintptr_t)(x) - guest_base; \
     (abi_ptr)__ret; \
 })
 
@@ -96,17 +122,15 @@ static inline int guest_range_valid(unsigned long start, unsigned long len)
 })
 #else
 typedef target_ulong abi_ptr;
-#define TARGET_ABI_FMT_ptr TARGET_ABI_FMT_lx
+#define TARGET_ABI_FMT_ptr TARGET_FMT_lx
 #endif
 
 uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr);
 int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr);
-
 uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr);
 int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr);
 uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr);
 uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr);
-
 uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr);
 int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr);
 uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr);
@@ -114,37 +138,31 @@ uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr);
 
 uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
-
 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
-
 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 
 void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
-
 void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
 void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
 void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val);
-
 void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
 void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
 void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val);
 
 void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr,
                      uint32_t val, uintptr_t ra);
-
 void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr,
                         uint32_t val, uintptr_t ra);
 void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr,
                         uint32_t val, uintptr_t ra);
 void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr,
                         uint64_t val, uintptr_t ra);
-
 void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr,
                         uint32_t val, uintptr_t ra);
 void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr,
@@ -152,6 +170,136 @@ void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr,
 void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr ptr,
                         uint64_t val, uintptr_t ra);
 
+uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                            int mmu_idx, uintptr_t ra);
+int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                       int mmu_idx, uintptr_t ra);
+uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                               int mmu_idx, uintptr_t ra);
+int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                          int mmu_idx, uintptr_t ra);
+uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                              int mmu_idx, uintptr_t ra);
+uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                              int mmu_idx, uintptr_t ra);
+uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                               int mmu_idx, uintptr_t ra);
+int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                          int mmu_idx, uintptr_t ra);
+uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                              int mmu_idx, uintptr_t ra);
+uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                              int mmu_idx, uintptr_t ra);
+
+void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                       int mmu_idx, uintptr_t ra);
+void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                          int mmu_idx, uintptr_t ra);
+void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                          int mmu_idx, uintptr_t ra);
+void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint64_t val,
+                          int mmu_idx, uintptr_t ra);
+void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                          int mmu_idx, uintptr_t ra);
+void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                          int mmu_idx, uintptr_t ra);
+void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint64_t val,
+                          int mmu_idx, uintptr_t ra);
+
+uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
+uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
+uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
+uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
+Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra);
+
+void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
+                 MemOpIdx oi, uintptr_t ra);
+void cpu_stw_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
+                 MemOpIdx oi, uintptr_t ra);
+void cpu_stl_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                 MemOpIdx oi, uintptr_t ra);
+void cpu_stq_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
+                 MemOpIdx oi, uintptr_t ra);
+void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
+                  MemOpIdx oi, uintptr_t ra);
+
+uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, abi_ptr addr,
+                                 uint32_t cmpv, uint32_t newv,
+                                 MemOpIdx oi, uintptr_t retaddr);
+uint32_t cpu_atomic_cmpxchgw_le_mmu(CPUArchState *env, abi_ptr addr,
+                                    uint32_t cmpv, uint32_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint32_t cpu_atomic_cmpxchgl_le_mmu(CPUArchState *env, abi_ptr addr,
+                                    uint32_t cmpv, uint32_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint64_t cpu_atomic_cmpxchgq_le_mmu(CPUArchState *env, abi_ptr addr,
+                                    uint64_t cmpv, uint64_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint32_t cpu_atomic_cmpxchgw_be_mmu(CPUArchState *env, abi_ptr addr,
+                                    uint32_t cmpv, uint32_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint32_t cpu_atomic_cmpxchgl_be_mmu(CPUArchState *env, abi_ptr addr,
+                                    uint32_t cmpv, uint32_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint64_t cpu_atomic_cmpxchgq_be_mmu(CPUArchState *env, abi_ptr addr,
+                                    uint64_t cmpv, uint64_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+
+#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX)   \
+TYPE cpu_atomic_ ## NAME ## SUFFIX ## _mmu      \
+    (CPUArchState *env, abi_ptr addr, TYPE val, \
+     MemOpIdx oi, uintptr_t retaddr);
+
+#ifdef CONFIG_ATOMIC64
+#define GEN_ATOMIC_HELPER_ALL(NAME)          \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, b)     \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_be)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)  \
+    GEN_ATOMIC_HELPER(NAME, uint64_t, q_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint64_t, q_be)
+#else
+#define GEN_ATOMIC_HELPER_ALL(NAME)          \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, b)     \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_be)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)
+#endif
+
+GEN_ATOMIC_HELPER_ALL(fetch_add)
+GEN_ATOMIC_HELPER_ALL(fetch_sub)
+GEN_ATOMIC_HELPER_ALL(fetch_and)
+GEN_ATOMIC_HELPER_ALL(fetch_or)
+GEN_ATOMIC_HELPER_ALL(fetch_xor)
+GEN_ATOMIC_HELPER_ALL(fetch_smin)
+GEN_ATOMIC_HELPER_ALL(fetch_umin)
+GEN_ATOMIC_HELPER_ALL(fetch_smax)
+GEN_ATOMIC_HELPER_ALL(fetch_umax)
+
+GEN_ATOMIC_HELPER_ALL(add_fetch)
+GEN_ATOMIC_HELPER_ALL(sub_fetch)
+GEN_ATOMIC_HELPER_ALL(and_fetch)
+GEN_ATOMIC_HELPER_ALL(or_fetch)
+GEN_ATOMIC_HELPER_ALL(xor_fetch)
+GEN_ATOMIC_HELPER_ALL(smin_fetch)
+GEN_ATOMIC_HELPER_ALL(umin_fetch)
+GEN_ATOMIC_HELPER_ALL(smax_fetch)
+GEN_ATOMIC_HELPER_ALL(umax_fetch)
+
+GEN_ATOMIC_HELPER_ALL(xchg)
+
+#undef GEN_ATOMIC_HELPER_ALL
+#undef GEN_ATOMIC_HELPER
+
+Int128 cpu_atomic_cmpxchgo_le_mmu(CPUArchState *env, abi_ptr addr,
+                                  Int128 cmpv, Int128 newv,
+                                  MemOpIdx oi, uintptr_t retaddr);
+Int128 cpu_atomic_cmpxchgo_be_mmu(CPUArchState *env, abi_ptr addr,
+                                  Int128 cmpv, Int128 newv,
+                                  MemOpIdx oi, uintptr_t retaddr);
+
 #if defined(CONFIG_USER_ONLY)
 
 extern __thread uintptr_t helper_retaddr;
@@ -176,192 +324,61 @@ static inline void clear_helper_retaddr(void)
     helper_retaddr = 0;
 }
 
-/*
- * Provide the same *_mmuidx_ra interface as for softmmu.
- * The mmu_idx argument is ignored.
- */
-
-static inline uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                          int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldub_data_ra(env, addr, ra);
-}
-
-static inline int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                     int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldsb_data_ra(env, addr, ra);
-}
-
-static inline uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                             int mmu_idx, uintptr_t ra)
-{
-    return cpu_lduw_be_data_ra(env, addr, ra);
-}
-
-static inline int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldsw_be_data_ra(env, addr, ra);
-}
-
-static inline uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                            int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldl_be_data_ra(env, addr, ra);
-}
-
-static inline uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                            int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldq_be_data_ra(env, addr, ra);
-}
-
-static inline uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                             int mmu_idx, uintptr_t ra)
-{
-    return cpu_lduw_le_data_ra(env, addr, ra);
-}
-
-static inline int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldsw_le_data_ra(env, addr, ra);
-}
-
-static inline uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                            int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldl_le_data_ra(env, addr, ra);
-}
-
-static inline uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                            int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldq_le_data_ra(env, addr, ra);
-}
-
-static inline void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                     uint32_t val, int mmu_idx, uintptr_t ra)
-{
-    cpu_stb_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint32_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stw_be_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint32_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stl_be_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint64_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stq_be_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint32_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stw_le_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint32_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stl_le_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint64_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stq_le_data_ra(env, addr, val, ra);
-}
-
 #else
 
-/* Needed for TCG_OVERSIZED_GUEST */
-#include "tcg/tcg.h"
+#include "tcg/oversized-guest.h"
 
-static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
+static inline uint64_t tlb_read_idx(const CPUTLBEntry *entry,
+                                    MMUAccessType access_type)
 {
-#if TCG_OVERSIZED_GUEST
-    return entry->addr_write;
+    /* Do not rearrange the CPUTLBEntry structure members. */
+    QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) !=
+                      MMU_DATA_LOAD * sizeof(uint64_t));
+    QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
+                      MMU_DATA_STORE * sizeof(uint64_t));
+    QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
+                      MMU_INST_FETCH * sizeof(uint64_t));
+
+#if TARGET_LONG_BITS == 32
+    /* Use qatomic_read, in case of addr_write; only care about low bits. */
+    const uint32_t *ptr = (uint32_t *)&entry->addr_idx[access_type];
+    ptr += HOST_BIG_ENDIAN;
+    return qatomic_read(ptr);
 #else
-    return qatomic_read(&entry->addr_write);
+    const uint64_t *ptr = &entry->addr_idx[access_type];
+# if TCG_OVERSIZED_GUEST
+    return *ptr;
+# else
+    /* ofs might correspond to .addr_write, so use qatomic_read */
+    return qatomic_read(ptr);
+# endif
 #endif
 }
 
+static inline uint64_t tlb_addr_write(const CPUTLBEntry *entry)
+{
+    return tlb_read_idx(entry, MMU_DATA_STORE);
+}
+
 /* Find the TLB index corresponding to the mmu_idx + address pair.  */
-static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
-                                  target_ulong addr)
+static inline uintptr_t tlb_index(CPUState *cpu, uintptr_t mmu_idx,
+                                  vaddr addr)
 {
-    uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS;
+    uintptr_t size_mask = cpu->neg.tlb.f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS;
 
     return (addr >> TARGET_PAGE_BITS) & size_mask;
 }
 
 /* Find the TLB entry corresponding to the mmu_idx + address pair.  */
-static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
-                                     target_ulong addr)
+static inline CPUTLBEntry *tlb_entry(CPUState *cpu, uintptr_t mmu_idx,
+                                     vaddr addr)
 {
-    return &env_tlb(env)->f[mmu_idx].table[tlb_index(env, mmu_idx, addr)];
+    return &cpu->neg.tlb.f[mmu_idx].table[tlb_index(cpu, mmu_idx, addr)];
 }
 
-uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                            int mmu_idx, uintptr_t ra);
-int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                       int mmu_idx, uintptr_t ra);
-
-uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                               int mmu_idx, uintptr_t ra);
-int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                          int mmu_idx, uintptr_t ra);
-uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra);
-uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra);
-
-uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                               int mmu_idx, uintptr_t ra);
-int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                          int mmu_idx, uintptr_t ra);
-uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra);
-uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra);
-
-void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                       int mmu_idx, uintptr_t retaddr);
-
-void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr);
-void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr);
-void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
-                          int mmu_idx, uintptr_t retaddr);
-
-void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr);
-void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr);
-void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
-                          int mmu_idx, uintptr_t retaddr);
-
 #endif /* defined(CONFIG_USER_ONLY) */
 
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
 # define cpu_lduw_data        cpu_lduw_be_data
 # define cpu_ldsw_data        cpu_ldsw_be_data
 # define cpu_ldl_data         cpu_ldl_be_data
@@ -407,6 +424,15 @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
 # define cpu_stq_mmuidx_ra    cpu_stq_le_mmuidx_ra
 #endif
 
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
+                         MemOpIdx oi, uintptr_t ra);
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
+                          MemOpIdx oi, uintptr_t ra);
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
+                          MemOpIdx oi, uintptr_t ra);
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
+                          MemOpIdx oi, uintptr_t ra);
+
 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr);
 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr);
@@ -439,7 +465,7 @@ static inline int cpu_ldsw_code(CPUArchState *env, abi_ptr addr)
 static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
                                       MMUAccessType access_type, int mmu_idx)
 {
-    return g2h(addr);
+    return g2h(env_cpu(env), addr);
 }
 #else
 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
index 19b16e58f84e4b93c6cfc771c51afad8a0527657..6da1462c4f8966daed60045a2cc45cbd02ee3d6f 100644 (file)
@@ -26,6 +26,5 @@
 /* cputlb.c */
 void tlb_protect_code(ram_addr_t ram_addr);
 void tlb_unprotect_code(ram_addr_t ram_addr);
-void tlb_flush_counts(size_t *full, size_t *part, size_t *elide);
 #endif
 #endif
index 94fe05daaa189af514e7e01f37483aa58171d900..ee90ef122bc1e00b9c9748e966d715e07a7e11f7 100644 (file)
 #define EXEC_ALL_H
 
 #include "cpu.h"
-#include "exec/tb-context.h"
-#ifdef CONFIG_TCG
-#include "exec/cpu_ldst.h"
-#endif
-#include "sysemu/cpu-timers.h"
-
-/* allow to see translation results - the slowdown should be negligible, so we leave it */
-#define DEBUG_DISAS
-
-/* Page tracking code uses ram addresses in system mode, and virtual
-   addresses in userspace mode.  Define tb_page_addr_t to be an appropriate
-   type.  */
 #if defined(CONFIG_USER_ONLY)
-typedef abi_ulong tb_page_addr_t;
-#define TB_PAGE_ADDR_FMT TARGET_ABI_FMT_lx
-#else
-typedef ram_addr_t tb_page_addr_t;
-#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
+#include "exec/cpu_ldst.h"
 #endif
-
-#include "qemu/log.h"
-
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns);
-void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb,
-                          target_ulong *data);
-
-void cpu_gen_init(void);
-
-/**
- * cpu_restore_state:
- * @cpu: the vCPU state is to be restore to
- * @searched_pc: the host PC the fault occurred at
- * @will_exit: true if the TB executed will be interrupted after some
-               cpu adjustments. Required for maintaining the correct
-               icount valus
- * @return: true if state was restored, false otherwise
- *
- * Attempt to restore the state for a fault occurring in translated
- * code. If the searched_pc is not in translated code no state is
- * restored and the function returns false.
- */
-bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc, bool will_exit);
-
-void QEMU_NORETURN cpu_loop_exit_noexc(CPUState *cpu);
-void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
-TranslationBlock *tb_gen_code(CPUState *cpu,
-                              target_ulong pc, target_ulong cs_base,
-                              uint32_t flags,
-                              int cflags);
-
-void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
-void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
-void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
+#include "exec/translation-block.h"
+#include "qemu/clang-tsa.h"
 
 /**
  * cpu_loop_exit_requested:
@@ -89,34 +41,9 @@ void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
  */
 static inline bool cpu_loop_exit_requested(CPUState *cpu)
 {
-    return (int32_t)qatomic_read(&cpu_neg(cpu)->icount_decr.u32) < 0;
+    return (int32_t)qatomic_read(&cpu->neg.icount_decr.u32) < 0;
 }
 
-#if !defined(CONFIG_USER_ONLY)
-void cpu_reloading_memory_map(void);
-/**
- * cpu_address_space_init:
- * @cpu: CPU to add this address space to
- * @asidx: integer index of this address space
- * @prefix: prefix to be used as name of address space
- * @mr: the root memory region of address space
- *
- * Add the specified address space to the CPU's cpu_ases list.
- * The address space added with @asidx 0 is the one used for the
- * convenience pointer cpu->as.
- * The target-specific code which registers ASes is responsible
- * for defining what semantics address space 0, 1, 2, etc have.
- *
- * Before the first call to this function, the caller must set
- * cpu->num_ases to the total number of address spaces it needs
- * to support.
- *
- * Note that with KVM only one address space is supported.
- */
-void cpu_address_space_init(CPUState *cpu, int asidx,
-                            const char *prefix, MemoryRegion *mr);
-#endif
-
 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
 /* cputlb.c */
 /**
@@ -137,7 +64,7 @@ void tlb_destroy(CPUState *cpu);
  * Flush one page from the TLB of the specified CPU, for all
  * MMU indexes.
  */
-void tlb_flush_page(CPUState *cpu, target_ulong addr);
+void tlb_flush_page(CPUState *cpu, vaddr addr);
 /**
  * tlb_flush_page_all_cpus:
  * @cpu: src CPU of the flush
@@ -146,7 +73,7 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr);
  * Flush one page from the TLB of the specified CPU, for all
  * MMU indexes.
  */
-void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr);
+void tlb_flush_page_all_cpus(CPUState *src, vaddr addr);
 /**
  * tlb_flush_page_all_cpus_synced:
  * @cpu: src CPU of the flush
@@ -158,7 +85,7 @@ void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr);
  * the source vCPUs safe work is complete. This will depend on when
  * the guests translation ends the TB.
  */
-void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr);
+void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr);
 /**
  * tlb_flush:
  * @cpu: CPU whose TLB should be flushed
@@ -193,7 +120,7 @@ void tlb_flush_all_cpus_synced(CPUState *src_cpu);
  * Flush one page from the TLB of the specified CPU, for the specified
  * MMU indexes.
  */
-void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr,
+void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr,
                               uint16_t idxmap);
 /**
  * tlb_flush_page_by_mmuidx_all_cpus:
@@ -204,7 +131,7 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr,
  * Flush one page from the TLB of all CPUs, for the specified
  * MMU indexes.
  */
-void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
+void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr,
                                        uint16_t idxmap);
 /**
  * tlb_flush_page_by_mmuidx_all_cpus_synced:
@@ -218,7 +145,7 @@ void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
  * complete once  the source vCPUs safe work is complete. This will
  * depend on when the guests translation ends the TB.
  */
-void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr,
+void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
                                               uint16_t idxmap);
 /**
  * tlb_flush_by_mmuidx:
@@ -261,19 +188,66 @@ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap);
  *
  * Similar to tlb_flush_page_mask, but with a bitmap of indexes.
  */
-void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
+void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
                                    uint16_t idxmap, unsigned bits);
 
 /* Similarly, with broadcast and syncing. */
-void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
+void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr,
                                             uint16_t idxmap, unsigned bits);
 void tlb_flush_page_bits_by_mmuidx_all_cpus_synced
-    (CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits);
+    (CPUState *cpu, vaddr addr, uint16_t idxmap, unsigned bits);
+
+/**
+ * tlb_flush_range_by_mmuidx
+ * @cpu: CPU whose TLB should be flushed
+ * @addr: virtual address of the start of the range to be flushed
+ * @len: length of range to be flushed
+ * @idxmap: bitmap of mmu indexes to flush
+ * @bits: number of significant bits in address
+ *
+ * For each mmuidx in @idxmap, flush all pages within [@addr,@addr+@len),
+ * comparing only the low @bits worth of each virtual page.
+ */
+void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
+                               vaddr len, uint16_t idxmap,
+                               unsigned bits);
+
+/* Similarly, with broadcast and syncing. */
+void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr,
+                                        vaddr len, uint16_t idxmap,
+                                        unsigned bits);
+void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
+                                               vaddr addr,
+                                               vaddr len,
+                                               uint16_t idxmap,
+                                               unsigned bits);
+
+/**
+ * tlb_set_page_full:
+ * @cpu: CPU context
+ * @mmu_idx: mmu index of the tlb to modify
+ * @addr: virtual address of the entry to add
+ * @full: the details of the tlb entry
+ *
+ * Add an entry to @cpu tlb index @mmu_idx.  All of the fields of
+ * @full must be filled, except for xlat_section, and constitute
+ * the complete description of the translated page.
+ *
+ * This is generally called by the target tlb_fill function after
+ * having performed a successful page table walk to find the physical
+ * address and attributes for the translation.
+ *
+ * At most one entry for a given virtual address is permitted. Only a
+ * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only
+ * used by tlb_flush_page.
+ */
+void tlb_set_page_full(CPUState *cpu, int mmu_idx, vaddr addr,
+                       CPUTLBEntryFull *full);
 
 /**
  * tlb_set_page_with_attrs:
  * @cpu: CPU to add this TLB entry for
- * @vaddr: virtual address of page to add entry for
+ * @addr: virtual address of page to add entry for
  * @paddr: physical address of the page
  * @attrs: memory transaction attributes
  * @prot: access permissions (PAGE_READ/PAGE_WRITE/PAGE_EXEC bits)
@@ -281,7 +255,7 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced
  * @size: size of the page in bytes
  *
  * Add an entry to this CPU's TLB (a mapping from virtual address
- * @vaddr to physical address @paddr) with the specified memory
+ * @addr to physical address @paddr) with the specified memory
  * transaction attributes. This is generally called by the target CPU
  * specific code after it has been called through the tlb_fill()
  * entry point and performed a successful page table walk to find
@@ -292,18 +266,18 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced
  * single TARGET_PAGE_SIZE region is mapped; the supplied @size is only
  * used by tlb_flush_page.
  */
-void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
+void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr,
                              hwaddr paddr, MemTxAttrs attrs,
-                             int prot, int mmu_idx, target_ulong size);
+                             int prot, int mmu_idx, vaddr size);
 /* tlb_set_page:
  *
  * This function is equivalent to calling tlb_set_page_with_attrs()
  * with an @attrs argument of MEMTXATTRS_UNSPECIFIED. It's provided
  * as a convenience for CPUs which don't use memory transaction attributes.
  */
-void tlb_set_page(CPUState *cpu, target_ulong vaddr,
+void tlb_set_page(CPUState *cpu, vaddr addr,
                   hwaddr paddr, int prot,
-                  int mmu_idx, target_ulong size);
+                  int mmu_idx, vaddr size);
 #else
 static inline void tlb_init(CPUState *cpu)
 {
@@ -311,14 +285,13 @@ static inline void tlb_init(CPUState *cpu)
 static inline void tlb_destroy(CPUState *cpu)
 {
 }
-static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
+static inline void tlb_flush_page(CPUState *cpu, vaddr addr)
 {
 }
-static inline void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
+static inline void tlb_flush_page_all_cpus(CPUState *src, vaddr addr)
 {
 }
-static inline void tlb_flush_page_all_cpus_synced(CPUState *src,
-                                                  target_ulong addr)
+static inline void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr)
 {
 }
 static inline void tlb_flush(CPUState *cpu)
@@ -331,7 +304,7 @@ static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu)
 {
 }
 static inline void tlb_flush_page_by_mmuidx(CPUState *cpu,
-                                            target_ulong addr, uint16_t idxmap)
+                                            vaddr addr, uint16_t idxmap)
 {
 }
 
@@ -339,12 +312,12 @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
 {
 }
 static inline void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu,
-                                                     target_ulong addr,
+                                                     vaddr addr,
                                                      uint16_t idxmap)
 {
 }
 static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu,
-                                                            target_ulong addr,
+                                                            vaddr addr,
                                                             uint16_t idxmap)
 {
 }
@@ -357,22 +330,41 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
 {
 }
 static inline void tlb_flush_page_bits_by_mmuidx(CPUState *cpu,
-                                                 target_ulong addr,
+                                                 vaddr addr,
                                                  uint16_t idxmap,
                                                  unsigned bits)
 {
 }
 static inline void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu,
-                                                          target_ulong addr,
+                                                          vaddr addr,
                                                           uint16_t idxmap,
                                                           unsigned bits)
 {
 }
 static inline void
-tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr,
+tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
                                               uint16_t idxmap, unsigned bits)
 {
 }
+static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
+                                             vaddr len, uint16_t idxmap,
+                                             unsigned bits)
+{
+}
+static inline void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu,
+                                                      vaddr addr,
+                                                      vaddr len,
+                                                      uint16_t idxmap,
+                                                      unsigned bits)
+{
+}
+static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
+                                                             vaddr addr,
+                                                             vaddr len,
+                                                             uint16_t idxmap,
+                                                             unsigned bits)
+{
+}
 #endif
 /**
  * probe_access:
@@ -391,16 +383,16 @@ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr,
  * Finally, return the host address for a page that is backed by RAM,
  * or NULL if the page requires I/O.
  */
-void *probe_access(CPUArchState *env, target_ulong addr, int size,
+void *probe_access(CPUArchState *env, vaddr addr, int size,
                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr);
 
-static inline void *probe_write(CPUArchState *env, target_ulong addr, int size,
+static inline void *probe_write(CPUArchState *env, vaddr addr, int size,
                                 int mmu_idx, uintptr_t retaddr)
 {
     return probe_access(env, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
 }
 
-static inline void *probe_read(CPUArchState *env, target_ulong addr, int size,
+static inline void *probe_read(CPUArchState *env, vaddr addr, int size,
                                int mmu_idx, uintptr_t retaddr)
 {
     return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
@@ -410,6 +402,7 @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size,
  * probe_access_flags:
  * @env: CPUArchState
  * @addr: guest virtual address to look up
+ * @size: size of the access
  * @access_type: read, write or execute permission
  * @mmu_idx: MMU index to use for lookup
  * @nonfault: suppress the fault
@@ -424,135 +417,119 @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size,
  * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags.
  * For simplicity, all "mmio-like" flags are folded to TLB_MMIO.
  */
-int probe_access_flags(CPUArchState *env, target_ulong addr,
+int probe_access_flags(CPUArchState *env, vaddr addr, int size,
                        MMUAccessType access_type, int mmu_idx,
                        bool nonfault, void **phost, uintptr_t retaddr);
 
-#define CODE_GEN_ALIGN           16 /* must be >= of the size of a icache line */
+#ifndef CONFIG_USER_ONLY
+/**
+ * probe_access_full:
+ * Like probe_access_flags, except also return into @pfull.
+ *
+ * The CPUTLBEntryFull structure returned via @pfull is transient
+ * and must be consumed or copied immediately, before any further
+ * access or changes to TLB @mmu_idx.
+ */
+int probe_access_full(CPUArchState *env, vaddr addr, int size,
+                      MMUAccessType access_type, int mmu_idx,
+                      bool nonfault, void **phost,
+                      CPUTLBEntryFull **pfull, uintptr_t retaddr);
+
+/**
+ * probe_access_mmu() - Like probe_access_full except cannot fault and
+ * doesn't trigger instrumentation.
+ *
+ * @env: CPUArchState
+ * @vaddr: virtual address to probe
+ * @size: size of the probe
+ * @access_type: read, write or execute permission
+ * @mmu_idx: softmmu index
+ * @phost: ptr to return value host address or NULL
+ * @pfull: ptr to return value CPUTLBEntryFull structure or NULL
+ *
+ * The CPUTLBEntryFull structure returned via @pfull is transient
+ * and must be consumed or copied immediately, before any further
+ * access or changes to TLB @mmu_idx.
+ *
+ * Returns: TLB flags as per probe_access_flags()
+ */
+int probe_access_full_mmu(CPUArchState *env, vaddr addr, int size,
+                          MMUAccessType access_type, int mmu_idx,
+                          void **phost, CPUTLBEntryFull **pfull);
+
+#endif
 
-/* Estimated block size for TB allocation.  */
-/* ??? The following is based on a 2015 survey of x86_64 host output.
-   Better would seem to be some sort of dynamically sized TB array,
-   adapting to the block sizes actually being produced.  */
-#if defined(CONFIG_SOFTMMU)
-#define CODE_GEN_AVG_BLOCK_SIZE 400
+/* Hide the qatomic_read to make code a little easier on the eyes */
+static inline uint32_t tb_cflags(const TranslationBlock *tb)
+{
+    return qatomic_read(&tb->cflags);
+}
+
+static inline tb_page_addr_t tb_page_addr0(const TranslationBlock *tb)
+{
+#ifdef CONFIG_USER_ONLY
+    return tb->itree.start;
 #else
-#define CODE_GEN_AVG_BLOCK_SIZE 150
+    return tb->page_addr[0];
 #endif
+}
 
-/*
- * Translation Cache-related fields of a TB.
- * This struct exists just for convenience; we keep track of TB's in a binary
- * search tree, and the only fields needed to compare TB's in the tree are
- * @ptr and @size.
- * Note: the address of search data can be obtained by adding @size to @ptr.
- */
-struct tb_tc {
-    void *ptr;    /* pointer to the translated code */
-    size_t size;
-};
-
-struct TranslationBlock {
-    target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
-    target_ulong cs_base; /* CS base for this block */
-    uint32_t flags; /* flags defining in which context the code was generated */
-    uint16_t size;      /* size of target code for this block (1 <=
-                           size <= TARGET_PAGE_SIZE) */
-    uint16_t icount;
-    uint32_t cflags;    /* compile flags */
-#define CF_COUNT_MASK  0x00007fff
-#define CF_LAST_IO     0x00008000 /* Last insn may be an IO access.  */
-#define CF_NOCACHE     0x00010000 /* To be freed after execution */
-#define CF_USE_ICOUNT  0x00020000
-#define CF_INVALID     0x00040000 /* TB is stale. Set with @jmp_lock held */
-#define CF_PARALLEL    0x00080000 /* Generate code for a parallel context */
-#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
-#define CF_CLUSTER_SHIFT 24
-/* cflags' mask for hashing/comparison */
-#define CF_HASH_MASK   \
-    (CF_COUNT_MASK | CF_LAST_IO | CF_USE_ICOUNT | CF_PARALLEL | CF_CLUSTER_MASK)
-
-    /* Per-vCPU dynamic tracing state used to generate this TB */
-    uint32_t trace_vcpu_dstate;
-
-    struct tb_tc tc;
-
-    /* original tb when cflags has CF_NOCACHE */
-    struct TranslationBlock *orig_tb;
-    /* first and second physical page containing code. The lower bit
-       of the pointer tells the index in page_next[].
-       The list is protected by the TB's page('s) lock(s) */
-    uintptr_t page_next[2];
-    tb_page_addr_t page_addr[2];
-
-    /* jmp_lock placed here to fill a 4-byte hole. Its documentation is below */
-    QemuSpin jmp_lock;
-
-    /* The following data are used to directly call another TB from
-     * the code of this one. This can be done either by emitting direct or
-     * indirect native jump instructions. These jumps are reset so that the TB
-     * just continues its execution. The TB can be linked to another one by
-     * setting one of the jump targets (or patching the jump instruction). Only
-     * two of such jumps are supported.
-     */
-    uint16_t jmp_reset_offset[2]; /* offset of original jump target */
-#define TB_JMP_RESET_OFFSET_INVALID 0xffff /* indicates no jump generated */
-    uintptr_t jmp_target_arg[2];  /* target address or offset */
+static inline tb_page_addr_t tb_page_addr1(const TranslationBlock *tb)
+{
+#ifdef CONFIG_USER_ONLY
+    tb_page_addr_t next = tb->itree.last & TARGET_PAGE_MASK;
+    return next == (tb->itree.start & TARGET_PAGE_MASK) ? -1 : next;
+#else
+    return tb->page_addr[1];
+#endif
+}
 
+static inline void tb_set_page_addr0(TranslationBlock *tb,
+                                     tb_page_addr_t addr)
+{
+#ifdef CONFIG_USER_ONLY
+    tb->itree.start = addr;
     /*
-     * Each TB has a NULL-terminated list (jmp_list_head) of incoming jumps.
-     * Each TB can have two outgoing jumps, and therefore can participate
-     * in two lists. The list entries are kept in jmp_list_next[2]. The least
-     * significant bit (LSB) of the pointers in these lists is used to encode
-     * which of the two list entries is to be used in the pointed TB.
-     *
-     * List traversals are protected by jmp_lock. The destination TB of each
-     * outgoing jump is kept in jmp_dest[] so that the appropriate jmp_lock
-     * can be acquired from any origin TB.
-     *
-     * jmp_dest[] are tagged pointers as well. The LSB is set when the TB is
-     * being invalidated, so that no further outgoing jumps from it can be set.
-     *
-     * jmp_lock also protects the CF_INVALID cflag; a jump must not be chained
-     * to a destination TB that has CF_INVALID set.
+     * To begin, we record an interval of one byte.  When the translation
+     * loop encounters a second page, the interval will be extended to
+     * include the first byte of the second page, which is sufficient to
+     * allow tb_page_addr1() above to work properly.  The final corrected
+     * interval will be set by tb_page_add() from tb->size before the
+     * node is added to the interval tree.
      */
-    uintptr_t jmp_list_head;
-    uintptr_t jmp_list_next[2];
-    uintptr_t jmp_dest[2];
-};
-
-extern bool parallel_cpus;
+    tb->itree.last = addr;
+#else
+    tb->page_addr[0] = addr;
+#endif
+}
 
-/* Hide the qatomic_read to make code a little easier on the eyes */
-static inline uint32_t tb_cflags(const TranslationBlock *tb)
+static inline void tb_set_page_addr1(TranslationBlock *tb,
+                                     tb_page_addr_t addr)
 {
-    return qatomic_read(&tb->cflags);
+#ifdef CONFIG_USER_ONLY
+    /* Extend the interval to the first byte of the second page.  See above. */
+    tb->itree.last = addr;
+#else
+    tb->page_addr[1] = addr;
+#endif
 }
 
 /* current cflags for hashing/comparison */
-static inline uint32_t curr_cflags(void)
-{
-    return (parallel_cpus ? CF_PARALLEL : 0)
-         | (icount_enabled() ? CF_USE_ICOUNT : 0);
-}
+uint32_t curr_cflags(CPUState *cpu);
 
 /* TranslationBlock invalidate API */
 #if defined(CONFIG_USER_ONLY)
-void tb_invalidate_phys_addr(target_ulong addr);
-void tb_invalidate_phys_range(target_ulong start, target_ulong end);
+void tb_invalidate_phys_addr(hwaddr addr);
 #else
 void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
 #endif
-void tb_flush(CPUState *cpu);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
-TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-                                   target_ulong cs_base, uint32_t flags,
-                                   uint32_t cf_mask);
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last);
 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
 
 /* GETPC is the true target of the return instruction that we'll execute.  */
 #if defined(CONFIG_TCG_INTERPRETER)
-extern uintptr_t tci_tb_ptr;
+extern __thread uintptr_t tci_tb_ptr;
 # define GETPC() tci_tb_ptr
 #else
 # define GETPC() \
@@ -568,14 +545,6 @@ extern uintptr_t tci_tb_ptr;
    smaller than 4 bytes, so we don't worry about special-casing this.  */
 #define GETPC_ADJ   2
 
-#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_DEBUG_TCG)
-void assert_no_pages_locked(void);
-#else
-static inline void assert_no_pages_locked(void)
-{
-}
-#endif
-
 #if !defined(CONFIG_USER_ONLY)
 
 /**
@@ -591,49 +560,25 @@ struct MemoryRegionSection *iotlb_to_section(CPUState *cpu,
                                              hwaddr index, MemTxAttrs attrs);
 #endif
 
-#if defined(CONFIG_USER_ONLY)
-void mmap_lock(void);
-void mmap_unlock(void);
-bool have_mmap_lock(void);
-
 /**
- * get_page_addr_code() - user-mode version
+ * get_page_addr_code_hostp()
  * @env: CPUArchState
  * @addr: guest virtual address of guest code
  *
- * Returns @addr.
- */
-static inline tb_page_addr_t get_page_addr_code(CPUArchState *env,
-                                                target_ulong addr)
-{
-    return addr;
-}
-
-/**
- * get_page_addr_code_hostp() - user-mode version
- * @env: CPUArchState
- * @addr: guest virtual address of guest code
+ * See get_page_addr_code() (full-system version) for documentation on the
+ * return value.
  *
- * Returns @addr.
+ * Sets *@hostp (when @hostp is non-NULL) as follows.
+ * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
+ * to the host address where @addr's content is kept.
  *
- * If @hostp is non-NULL, sets *@hostp to the host address where @addr's content
- * is kept.
+ * Note: this function can trigger an exception.
  */
-static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env,
-                                                      target_ulong addr,
-                                                      void **hostp)
-{
-    if (hostp) {
-        *hostp = g2h(addr);
-    }
-    return addr;
-}
-#else
-static inline void mmap_lock(void) {}
-static inline void mmap_unlock(void) {}
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
+                                        void **hostp);
 
 /**
- * get_page_addr_code() - full-system version
+ * get_page_addr_code()
  * @env: CPUArchState
  * @addr: guest virtual address of guest code
  *
@@ -643,30 +588,84 @@ static inline void mmap_unlock(void) {}
  *
  * Note: this function can trigger an exception.
  */
-tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr);
+static inline tb_page_addr_t get_page_addr_code(CPUArchState *env,
+                                                vaddr addr)
+{
+    return get_page_addr_code_hostp(env, addr, NULL);
+}
+
+#if defined(CONFIG_USER_ONLY)
+void TSA_NO_TSA mmap_lock(void);
+void TSA_NO_TSA mmap_unlock(void);
+bool have_mmap_lock(void);
+
+static inline void mmap_unlock_guard(void *unused)
+{
+    mmap_unlock();
+}
+
+#define WITH_MMAP_LOCK_GUARD()                                            \
+    for (int _mmap_lock_iter __attribute__((cleanup(mmap_unlock_guard)))  \
+         = (mmap_lock(), 0); _mmap_lock_iter == 0; _mmap_lock_iter = 1)
 
 /**
- * get_page_addr_code_hostp() - full-system version
- * @env: CPUArchState
- * @addr: guest virtual address of guest code
+ * adjust_signal_pc:
+ * @pc: raw pc from the host signal ucontext_t.
+ * @is_write: host memory operation was write, or read-modify-write.
  *
- * See get_page_addr_code() (full-system version) for documentation on the
- * return value.
- *
- * Sets *@hostp (when @hostp is non-NULL) as follows.
- * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
- * to the host address where @addr's content is kept.
+ * Alter @pc as required for unwinding.  Return the type of the
+ * guest memory access -- host reads may be for guest execution.
+ */
+MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write);
+
+/**
+ * handle_sigsegv_accerr_write:
+ * @cpu: the cpu context
+ * @old_set: the sigset_t from the signal ucontext_t
+ * @host_pc: the host pc, adjusted for the signal
+ * @host_addr: the host address of the fault
  *
- * Note: this function can trigger an exception.
+ * Return true if the write fault has been handled, and should be re-tried.
  */
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
-                                        void **hostp);
+bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
+                                 uintptr_t host_pc, abi_ptr guest_addr);
 
-void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
-void tlb_set_dirty(CPUState *cpu, target_ulong vaddr);
+/**
+ * cpu_loop_exit_sigsegv:
+ * @cpu: the cpu context
+ * @addr: the guest address of the fault
+ * @access_type: access was read/write/execute
+ * @maperr: true for invalid page, false for permission fault
+ * @ra: host pc for unwinding
+ *
+ * Use the TCGCPUOps hook to record cpu state, do guest operating system
+ * specific things to raise SIGSEGV, and jump to the main cpu loop.
+ */
+G_NORETURN void cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr,
+                                      MMUAccessType access_type,
+                                      bool maperr, uintptr_t ra);
 
-/* exec.c */
-void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr);
+/**
+ * cpu_loop_exit_sigbus:
+ * @cpu: the cpu context
+ * @addr: the guest address of the alignment fault
+ * @access_type: access was read/write/execute
+ * @ra: host pc for unwinding
+ *
+ * Use the TCGCPUOps hook to record cpu state, do guest operating system
+ * specific things to raise SIGBUS, and jump to the main cpu loop.
+ */
+G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
+                                     MMUAccessType access_type,
+                                     uintptr_t ra);
+
+#else
+static inline void mmap_lock(void) {}
+static inline void mmap_unlock(void) {}
+#define WITH_MMAP_LOCK_GUARD()
+
+void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
+void tlb_set_dirty(CPUState *cpu, vaddr addr);
 
 MemoryRegionSection *
 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
@@ -676,7 +675,4 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                        MemoryRegionSection *section);
 #endif
 
-/* vl.c */
-extern int singlestep;
-
 #endif
index 94d8f83e92daddb2591d7c8244eb3175da8e5a37..d8a3c56fa2b8f10c261d2d46f146a72202e557c3 100644 (file)
 #define GDB_WATCHPOINT_READ      3
 #define GDB_WATCHPOINT_ACCESS    4
 
-#ifdef NEED_CPU_H
-#include "cpu.h"
+typedef struct GDBFeature {
+    const char *xmlname;
+    const char *xml;
+    int num_regs;
+} GDBFeature;
+
+typedef struct GDBFeatureBuilder {
+    GDBFeature *feature;
+    GPtrArray *xml;
+    int base_reg;
+} GDBFeatureBuilder;
 
-typedef void (*gdb_syscall_complete_cb)(CPUState *cpu,
-                                        target_ulong ret, target_ulong err);
 
-/**
- * gdb_do_syscall:
- * @cb: function to call when the system call has completed
- * @fmt: gdb syscall format string
- * ...: list of arguments to interpolate into @fmt
- *
- * Send a GDB syscall request. This function will return immediately;
- * the callback function will be called later when the remote system
- * call has completed.
- *
- * @fmt should be in the 'call-id,parameter,parameter...' format documented
- * for the F request packet in the GDB remote protocol. A limited set of
- * printf-style format specifiers is supported:
- *   %x  - target_ulong argument printed in hex
- *   %lx - 64-bit argument printed in hex
- *   %s  - string pointer (target_ulong) and length (int) pair
- */
-void gdb_do_syscall(gdb_syscall_complete_cb cb, const char *fmt, ...);
-/**
- * gdb_do_syscallv:
- * @cb: function to call when the system call has completed
- * @fmt: gdb syscall format string
- * @va: arguments to interpolate into @fmt
- *
- * As gdb_do_syscall, but taking a va_list rather than a variable
- * argument list.
- */
-void gdb_do_syscallv(gdb_syscall_complete_cb cb, const char *fmt, va_list va);
-int use_gdb_syscalls(void);
-void gdb_set_stop_cpu(CPUState *cpu);
-void gdb_exit(CPUArchState *, int);
-#ifdef CONFIG_USER_ONLY
-/**
- * gdb_handlesig: yield control to gdb
- * @cpu: CPU
- * @sig: if non-zero, the signal number which caused us to stop
- *
- * This function yields control to gdb, when a user-mode-only target
- * needs to stop execution. If @sig is non-zero, then we will send a
- * stop packet to tell gdb that we have stopped because of this signal.
- *
- * This function will block (handling protocol requests from gdb)
- * until gdb tells us to continue target execution. When it does
- * return, the return value is a signal to deliver to the target,
- * or 0 if no signal should be delivered, ie the signal that caused
- * us to stop should be ignored.
- */
-int gdb_handlesig(CPUState *, int);
-void gdb_signalled(CPUArchState *, int);
-void gdbserver_fork(CPUState *);
-#endif
 /* Get or set a register.  Returns the size of the register.  */
 typedef int (*gdb_get_reg_cb)(CPUArchState *env, GByteArray *buf, int reg);
 typedef int (*gdb_set_reg_cb)(CPUArchState *env, uint8_t *buf, int reg);
-void gdb_register_coprocessor(CPUState *cpu,
-                              gdb_get_reg_cb get_reg, gdb_set_reg_cb set_reg,
-                              int num_regs, const char *xml, int g_pos);
-
-/*
- * The GDB remote protocol transfers values in target byte order. As
- * the gdbstub may be batching up several register values we always
- * append to the array.
- */
-
-static inline int gdb_get_reg8(GByteArray *buf, uint8_t val)
-{
-    g_byte_array_append(buf, &val, 1);
-    return 1;
-}
-
-static inline int gdb_get_reg16(GByteArray *buf, uint16_t val)
-{
-    uint16_t to_word = tswap16(val);
-    g_byte_array_append(buf, (uint8_t *) &to_word, 2);
-    return 2;
-}
-
-static inline int gdb_get_reg32(GByteArray *buf, uint32_t val)
-{
-    uint32_t to_long = tswap32(val);
-    g_byte_array_append(buf, (uint8_t *) &to_long, 4);
-    return 4;
-}
-
-static inline int gdb_get_reg64(GByteArray *buf, uint64_t val)
-{
-    uint64_t to_quad = tswap64(val);
-    g_byte_array_append(buf, (uint8_t *) &to_quad, 8);
-    return 8;
-}
-
-static inline int gdb_get_reg128(GByteArray *buf, uint64_t val_hi,
-                                 uint64_t val_lo)
-{
-    uint64_t to_quad;
-#ifdef TARGET_WORDS_BIGENDIAN
-    to_quad = tswap64(val_hi);
-    g_byte_array_append(buf, (uint8_t *) &to_quad, 8);
-    to_quad = tswap64(val_lo);
-    g_byte_array_append(buf, (uint8_t *) &to_quad, 8);
-#else
-    to_quad = tswap64(val_lo);
-    g_byte_array_append(buf, (uint8_t *) &to_quad, 8);
-    to_quad = tswap64(val_hi);
-    g_byte_array_append(buf, (uint8_t *) &to_quad, 8);
-#endif
-    return 16;
-}
-
-static inline int gdb_get_float32(GByteArray *array, float32 val)
-{
-    uint8_t buf[sizeof(CPU_FloatU)];
-
-    stfl_p(buf, val);
-    g_byte_array_append(array, buf, sizeof(buf));
-
-    return sizeof(buf);
-}
-
-static inline int gdb_get_float64(GByteArray *array, float64 val)
-{
-    uint8_t buf[sizeof(CPU_DoubleU)];
-
-    stfq_p(buf, val);
-    g_byte_array_append(array, buf, sizeof(buf));
-
-    return sizeof(buf);
-}
-
-static inline int gdb_get_zeroes(GByteArray *array, size_t len)
-{
-    guint oldlen = array->len;
-    g_byte_array_set_size(array, oldlen + len);
-    memset(array->data + oldlen, 0, len);
-
-    return len;
-}
 
 /**
- * gdb_get_reg_ptr: get pointer to start of last element
- * @len: length of element
- *
- * This is a helper function to extract the pointer to the last
- * element for additional processing. Some front-ends do additional
- * dynamic swapping of the elements based on CPU state.
+ * gdb_register_coprocessor() - register a supplemental set of registers
+ * @cpu - the CPU associated with registers
+ * @get_reg - get function (gdb reading)
+ * @set_reg - set function (gdb modifying)
+ * @num_regs - number of registers in set
+ * @xml - xml name of set
+ * @gpos - non-zero to append to "general" register set at @gpos
  */
-static inline uint8_t * gdb_get_reg_ptr(GByteArray *buf, int len)
-{
-    return buf->data + buf->len - len;
-}
-
-#if TARGET_LONG_BITS == 64
-#define gdb_get_regl(buf, val) gdb_get_reg64(buf, val)
-#define ldtul_p(addr) ldq_p(addr)
-#else
-#define gdb_get_regl(buf, val) gdb_get_reg32(buf, val)
-#define ldtul_p(addr) ldl_p(addr)
-#endif
-
-#endif
+void gdb_register_coprocessor(CPUState *cpu,
+                              gdb_get_reg_cb get_reg, gdb_set_reg_cb set_reg,
+                              int num_regs, const char *xml, int g_pos);
 
 /**
  * gdbserver_start: start the gdb server
@@ -187,17 +50,61 @@ static inline uint8_t * gdb_get_reg_ptr(GByteArray *buf, int len)
  */
 int gdbserver_start(const char *port_or_device);
 
-void gdbserver_cleanup(void);
+/**
+ * gdb_feature_builder_init() - Initialize GDBFeatureBuilder.
+ * @builder: The builder to be initialized.
+ * @feature: The feature to be filled.
+ * @name: The name of the feature.
+ * @xmlname: The name of the XML.
+ * @base_reg: The base number of the register ID.
+ */
+void gdb_feature_builder_init(GDBFeatureBuilder *builder, GDBFeature *feature,
+                              const char *name, const char *xmlname,
+                              int base_reg);
 
 /**
- * gdb_has_xml:
- * This is an ugly hack to cope with both new and old gdb.
- * If gdb sends qXfer:features:read then assume we're talking to a newish
- * gdb that understands target descriptions.
+ * gdb_feature_builder_append_tag() - Append a tag.
+ * @builder: The builder.
+ * @format: The format of the tag.
+ * @...: The values to be formatted.
  */
-extern bool gdb_has_xml;
+void G_GNUC_PRINTF(2, 3)
+gdb_feature_builder_append_tag(const GDBFeatureBuilder *builder,
+                               const char *format, ...);
+
+/**
+ * gdb_feature_builder_append_reg() - Append a register.
+ * @builder: The builder.
+ * @name: The register's name; it must be unique within a CPU.
+ * @bitsize: The register's size, in bits.
+ * @regnum: The offset of the register's number in the feature.
+ * @type: The type of the register.
+ * @group: The register group to which this register belongs; it can be NULL.
+ */
+void gdb_feature_builder_append_reg(const GDBFeatureBuilder *builder,
+                                    const char *name,
+                                    int bitsize,
+                                    int regnum,
+                                    const char *type,
+                                    const char *group);
+
+/**
+ * gdb_feature_builder_end() - End building GDBFeature.
+ * @builder: The builder.
+ */
+void gdb_feature_builder_end(const GDBFeatureBuilder *builder);
+
+/**
+ * gdb_find_static_feature() - Find a static feature.
+ * @xmlname: The name of the XML.
+ *
+ * Return: The static feature.
+ */
+const GDBFeature *gdb_find_static_feature(const char *xmlname);
+
+void gdb_set_stop_cpu(CPUState *cpu);
 
-/* in gdbstub-xml.c, generated by scripts/feature_to_c.sh */
-extern const char *const xml_builtin[][2];
+/* in gdbstub-xml.c, generated by scripts/feature_to_c.py */
+extern const GDBFeature gdb_static_features[];
 
 #endif
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
deleted file mode 100644 (file)
index 822c43c..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-#ifndef GEN_ICOUNT_H
-#define GEN_ICOUNT_H
-
-#include "qemu/timer.h"
-
-/* Helpers for instruction counting code generation.  */
-
-static TCGOp *icount_start_insn;
-
-static inline void gen_io_start(void)
-{
-    TCGv_i32 tmp = tcg_const_i32(1);
-    tcg_gen_st_i32(tmp, cpu_env,
-                   offsetof(ArchCPU, parent_obj.can_do_io) -
-                   offsetof(ArchCPU, env));
-    tcg_temp_free_i32(tmp);
-}
-
-/*
- * cpu->can_do_io is cleared automatically at the beginning of
- * each translation block.  The cost is minimal and only paid
- * for -icount, plus it would be very easy to forget doing it
- * in the translator.  Therefore, backends only need to call
- * gen_io_start.
- */
-static inline void gen_io_end(void)
-{
-    TCGv_i32 tmp = tcg_const_i32(0);
-    tcg_gen_st_i32(tmp, cpu_env,
-                   offsetof(ArchCPU, parent_obj.can_do_io) -
-                   offsetof(ArchCPU, env));
-    tcg_temp_free_i32(tmp);
-}
-
-static inline void gen_tb_start(TranslationBlock *tb)
-{
-    TCGv_i32 count, imm;
-
-    tcg_ctx->exitreq_label = gen_new_label();
-    if (tb_cflags(tb) & CF_USE_ICOUNT) {
-        count = tcg_temp_local_new_i32();
-    } else {
-        count = tcg_temp_new_i32();
-    }
-
-    tcg_gen_ld_i32(count, cpu_env,
-                   offsetof(ArchCPU, neg.icount_decr.u32) -
-                   offsetof(ArchCPU, env));
-
-    if (tb_cflags(tb) & CF_USE_ICOUNT) {
-        imm = tcg_temp_new_i32();
-        /* We emit a movi with a dummy immediate argument. Keep the insn index
-         * of the movi so that we later (when we know the actual insn count)
-         * can update the immediate argument with the actual insn count.  */
-        tcg_gen_movi_i32(imm, 0xdeadbeef);
-        icount_start_insn = tcg_last_op();
-
-        tcg_gen_sub_i32(count, count, imm);
-        tcg_temp_free_i32(imm);
-    }
-
-    tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label);
-
-    if (tb_cflags(tb) & CF_USE_ICOUNT) {
-        tcg_gen_st16_i32(count, cpu_env,
-                         offsetof(ArchCPU, neg.icount_decr.u16.low) -
-                         offsetof(ArchCPU, env));
-        gen_io_end();
-    }
-
-    tcg_temp_free_i32(count);
-}
-
-static inline void gen_tb_end(TranslationBlock *tb, int num_insns)
-{
-    if (tb_cflags(tb) & CF_USE_ICOUNT) {
-        /* Update the num_insn immediate parameter now that we know
-         * the actual insn count.  */
-        tcg_set_insn_param(icount_start_insn, 1, num_insns);
-    }
-
-    gen_set_label(tcg_ctx->exitreq_label);
-    tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED);
-}
-
-#endif
diff --git a/include/exec/helper-gen-common.h b/include/exec/helper-gen-common.h
new file mode 100644 (file)
index 0000000..5d6d78a
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Helper file for declaring TCG helper functions.
+ * This one expands generation functions for tcg opcodes.
+ */
+
+#ifndef HELPER_GEN_COMMON_H
+#define HELPER_GEN_COMMON_H
+
+#define HELPER_H "accel/tcg/tcg-runtime.h"
+#include "exec/helper-gen.h.inc"
+#undef  HELPER_H
+
+#define HELPER_H "accel/tcg/plugin-helpers.h"
+#include "exec/helper-gen.h.inc"
+#undef  HELPER_H
+
+#endif /* HELPER_GEN_COMMON_H */
index 29c02f85dcc3b6358d2bd696dccbc7b09e0c7817..f7ec15569973d7885fc2be36913e697e77c59b34 100644 (file)
@@ -1,97 +1,16 @@
-/* Helper file for declaring TCG helper functions.
-   This one expands generation functions for tcg opcodes.  */
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Helper file for declaring TCG helper functions.
+ * This one expands generation functions for tcg opcodes.
+ */
 
 #ifndef HELPER_GEN_H
 #define HELPER_GEN_H
 
-#include "exec/helper-head.h"
+#include "exec/helper-gen-common.h"
 
-#define DEF_HELPER_FLAGS_0(name, flags, ret)                            \
-static inline void glue(gen_helper_, name)(dh_retvar_decl0(ret))        \
-{                                                                       \
-  tcg_gen_callN(HELPER(name), dh_retvar(ret), 0, NULL);                 \
-}
-
-#define DEF_HELPER_FLAGS_1(name, flags, ret, t1)                        \
-static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
-    dh_arg_decl(t1, 1))                                                 \
-{                                                                       \
-  TCGTemp *args[1] = { dh_arg(t1, 1) };                                 \
-  tcg_gen_callN(HELPER(name), dh_retvar(ret), 1, args);                 \
-}
-
-#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2)                    \
-static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
-    dh_arg_decl(t1, 1), dh_arg_decl(t2, 2))                             \
-{                                                                       \
-  TCGTemp *args[2] = { dh_arg(t1, 1), dh_arg(t2, 2) };                  \
-  tcg_gen_callN(HELPER(name), dh_retvar(ret), 2, args);                 \
-}
-
-#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3)                \
-static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
-    dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3))         \
-{                                                                       \
-  TCGTemp *args[3] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3) };   \
-  tcg_gen_callN(HELPER(name), dh_retvar(ret), 3, args);                 \
-}
-
-#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4)            \
-static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
-    dh_arg_decl(t1, 1), dh_arg_decl(t2, 2),                             \
-    dh_arg_decl(t3, 3), dh_arg_decl(t4, 4))                             \
-{                                                                       \
-  TCGTemp *args[4] = { dh_arg(t1, 1), dh_arg(t2, 2),                    \
-                     dh_arg(t3, 3), dh_arg(t4, 4) };                    \
-  tcg_gen_callN(HELPER(name), dh_retvar(ret), 4, args);                 \
-}
-
-#define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5)        \
-static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
-    dh_arg_decl(t1, 1),  dh_arg_decl(t2, 2), dh_arg_decl(t3, 3),        \
-    dh_arg_decl(t4, 4), dh_arg_decl(t5, 5))                             \
-{                                                                       \
-  TCGTemp *args[5] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3),     \
-                     dh_arg(t4, 4), dh_arg(t5, 5) };                    \
-  tcg_gen_callN(HELPER(name), dh_retvar(ret), 5, args);                 \
-}
-
-#define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6)    \
-static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
-    dh_arg_decl(t1, 1),  dh_arg_decl(t2, 2), dh_arg_decl(t3, 3),        \
-    dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6))         \
-{                                                                       \
-  TCGTemp *args[6] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3),     \
-                     dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6) };     \
-  tcg_gen_callN(HELPER(name), dh_retvar(ret), 6, args);                 \
-}
-
-#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7)\
-static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
-    dh_arg_decl(t1, 1),  dh_arg_decl(t2, 2), dh_arg_decl(t3, 3),        \
-    dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6),         \
-    dh_arg_decl(t7, 7))                                                 \
-{                                                                       \
-  TCGTemp *args[7] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3),     \
-                     dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6),       \
-                     dh_arg(t7, 7) };                                   \
-  tcg_gen_callN(HELPER(name), dh_retvar(ret), 7, args);                 \
-}
-
-#include "helper.h"
-#include "trace/generated-helpers.h"
-#include "trace/generated-helpers-wrappers.h"
-#include "tcg-runtime.h"
-#include "plugin-helpers.h"
-
-#undef DEF_HELPER_FLAGS_0
-#undef DEF_HELPER_FLAGS_1
-#undef DEF_HELPER_FLAGS_2
-#undef DEF_HELPER_FLAGS_3
-#undef DEF_HELPER_FLAGS_4
-#undef DEF_HELPER_FLAGS_5
-#undef DEF_HELPER_FLAGS_6
-#undef DEF_HELPER_FLAGS_7
-#undef GEN_HELPER
+#define HELPER_H "helper.h"
+#include "exec/helper-gen.h.inc"
+#undef  HELPER_H
 
 #endif /* HELPER_GEN_H */
diff --git a/include/exec/helper-gen.h.inc b/include/exec/helper-gen.h.inc
new file mode 100644 (file)
index 0000000..c009641
--- /dev/null
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Helper file for declaring TCG helper functions.
+ * This one expands generation functions for tcg opcodes.
+ * Define HELPER_H for the header file to be expanded,
+ * and static inline to change from global file scope.
+ */
+
+#include "tcg/tcg.h"
+#include "tcg/helper-info.h"
+#include "exec/helper-head.h"
+
+#define DEF_HELPER_FLAGS_0(name, flags, ret)                            \
+extern TCGHelperInfo glue(helper_info_, name);                          \
+static inline void glue(gen_helper_, name)(dh_retvar_decl0(ret))        \
+{                                                                       \
+    tcg_gen_call0(&glue(helper_info_, name), dh_retvar(ret));           \
+}
+
+#define DEF_HELPER_FLAGS_1(name, flags, ret, t1)                        \
+extern TCGHelperInfo glue(helper_info_, name);                          \
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
+    dh_arg_decl(t1, 1))                                                 \
+{                                                                       \
+    tcg_gen_call1(&glue(helper_info_, name), dh_retvar(ret),            \
+                  dh_arg(t1, 1));                                       \
+}
+
+#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2)                    \
+extern TCGHelperInfo glue(helper_info_, name);                          \
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
+    dh_arg_decl(t1, 1), dh_arg_decl(t2, 2))                             \
+{                                                                       \
+    tcg_gen_call2(&glue(helper_info_, name), dh_retvar(ret),            \
+                  dh_arg(t1, 1), dh_arg(t2, 2));                        \
+}
+
+#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3)                \
+extern TCGHelperInfo glue(helper_info_, name);                          \
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
+    dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3))         \
+{                                                                       \
+    tcg_gen_call3(&glue(helper_info_, name), dh_retvar(ret),            \
+                  dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3));         \
+}
+
+#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4)            \
+extern TCGHelperInfo glue(helper_info_, name);                          \
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
+    dh_arg_decl(t1, 1), dh_arg_decl(t2, 2),                             \
+    dh_arg_decl(t3, 3), dh_arg_decl(t4, 4))                             \
+{                                                                       \
+    tcg_gen_call4(&glue(helper_info_, name), dh_retvar(ret),            \
+                  dh_arg(t1, 1), dh_arg(t2, 2),                         \
+                  dh_arg(t3, 3), dh_arg(t4, 4));                        \
+}
+
+#define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5)        \
+extern TCGHelperInfo glue(helper_info_, name);                          \
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
+    dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3),         \
+    dh_arg_decl(t4, 4), dh_arg_decl(t5, 5))                             \
+{                                                                       \
+    tcg_gen_call5(&glue(helper_info_, name), dh_retvar(ret),            \
+                  dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3),          \
+                  dh_arg(t4, 4), dh_arg(t5, 5));                        \
+}
+
+#define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6)    \
+extern TCGHelperInfo glue(helper_info_, name);                          \
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
+    dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3),         \
+    dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6))         \
+{                                                                       \
+    tcg_gen_call6(&glue(helper_info_, name), dh_retvar(ret),            \
+                  dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3),          \
+                  dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6));         \
+}
+
+#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7)\
+extern TCGHelperInfo glue(helper_info_, name);                          \
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
+    dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3),         \
+    dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6),         \
+    dh_arg_decl(t7, 7))                                                 \
+{                                                                       \
+    tcg_gen_call7(&glue(helper_info_, name), dh_retvar(ret),            \
+                  dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3),          \
+                  dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6),          \
+                  dh_arg(t7, 7));                                       \
+}
+
+#include HELPER_H
+
+#undef DEF_HELPER_FLAGS_0
+#undef DEF_HELPER_FLAGS_1
+#undef DEF_HELPER_FLAGS_2
+#undef DEF_HELPER_FLAGS_3
+#undef DEF_HELPER_FLAGS_4
+#undef DEF_HELPER_FLAGS_5
+#undef DEF_HELPER_FLAGS_6
+#undef DEF_HELPER_FLAGS_7
index 3094c7946daf592c67cd5c4c10d6bbf7b3837e75..28ceab0a461326b7636a86ff4ec8806d2548fa69 100644 (file)
@@ -1,23 +1,13 @@
-/* Helper file for declaring TCG helper functions.
-   Used by other helper files.
-
-   Targets should use DEF_HELPER_N and DEF_HELPER_FLAGS_N to declare helper
-   functions.  Names should be specified without the helper_ prefix, and
-   the return and argument types specified.  3 basic types are understood
-   (i32, i64 and ptr).  Additional aliases are provided for convenience and
-   to match the types used by the C helper implementation.
-
-   The target helper.h should be included in all files that use/define
-   helper functions.  THis will ensure that function prototypes are
-   consistent.  In addition it should be included an extra two times for
-   helper.c, defining:
-    GEN_HELPER 1 to produce op generation functions (gen_helper_*)
-    GEN_HELPER 2 to do runtime registration helper functions.
+/*
+ * Helper file for declaring TCG helper functions.
+ * Used by other helper files.
  */
 
 #ifndef EXEC_HELPER_HEAD_H
 #define EXEC_HELPER_HEAD_H
 
+#include "fpu/softfloat-types.h"
+
 #define HELPER(name) glue(helper_, name)
 
 /* Some types that make sense in C, but not for TCG.  */
 #define dh_alias_int i32
 #define dh_alias_i64 i64
 #define dh_alias_s64 i64
+#define dh_alias_i128 i128
 #define dh_alias_f16 i32
 #define dh_alias_f32 i32
 #define dh_alias_f64 i64
 #define dh_alias_ptr ptr
 #define dh_alias_cptr ptr
+#define dh_alias_env ptr
 #define dh_alias_void void
 #define dh_alias_noreturn noreturn
 #define dh_alias(t) glue(dh_alias_, t)
 #define dh_ctype_int int
 #define dh_ctype_i64 uint64_t
 #define dh_ctype_s64 int64_t
+#define dh_ctype_i128 Int128
 #define dh_ctype_f16 uint32_t
 #define dh_ctype_f32 float32
 #define dh_ctype_f64 float64
 #define dh_ctype_ptr void *
 #define dh_ctype_cptr const void *
+#define dh_ctype_env CPUArchState *
 #define dh_ctype_void void
-#define dh_ctype_noreturn void QEMU_NORETURN
+#define dh_ctype_noreturn G_NORETURN void
 #define dh_ctype(t) dh_ctype_##t
 
 #ifdef NEED_CPU_H
 # ifdef TARGET_LONG_BITS
 #  if TARGET_LONG_BITS == 32
 #   define dh_alias_tl i32
+#   define dh_typecode_tl dh_typecode_i32
 #  else
 #   define dh_alias_tl i64
+#   define dh_typecode_tl dh_typecode_i64
 #  endif
 # endif
-# define dh_alias_env ptr
 # define dh_ctype_tl target_ulong
-# define dh_ctype_env CPUArchState *
 #endif
 
 /* We can't use glue() here because it falls foul of C preprocessor
@@ -68,6 +62,7 @@
 #define dh_retvar_decl0_noreturn void
 #define dh_retvar_decl0_i32 TCGv_i32 retval
 #define dh_retvar_decl0_i64 TCGv_i64 retval
+#define dh_retval_decl0_i128 TCGv_i128 retval
 #define dh_retvar_decl0_ptr TCGv_ptr retval
 #define dh_retvar_decl0(t) glue(dh_retvar_decl0_, dh_alias(t))
 
@@ -75,6 +70,7 @@
 #define dh_retvar_decl_noreturn
 #define dh_retvar_decl_i32 TCGv_i32 retval,
 #define dh_retvar_decl_i64 TCGv_i64 retval,
+#define dh_retvar_decl_i128 TCGv_i128 retval,
 #define dh_retvar_decl_ptr TCGv_ptr retval,
 #define dh_retvar_decl(t) glue(dh_retvar_decl_, dh_alias(t))
 
 #define dh_retvar_noreturn NULL
 #define dh_retvar_i32 tcgv_i32_temp(retval)
 #define dh_retvar_i64 tcgv_i64_temp(retval)
+#define dh_retvar_i128 tcgv_i128_temp(retval)
 #define dh_retvar_ptr tcgv_ptr_temp(retval)
 #define dh_retvar(t) glue(dh_retvar_, dh_alias(t))
 
-#define dh_is_64bit_void 0
-#define dh_is_64bit_noreturn 0
-#define dh_is_64bit_i32 0
-#define dh_is_64bit_i64 1
-#define dh_is_64bit_ptr (sizeof(void *) == 8)
-#define dh_is_64bit_cptr dh_is_64bit_ptr
-#define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t))
-
-#define dh_is_signed_void 0
-#define dh_is_signed_noreturn 0
-#define dh_is_signed_i32 0
-#define dh_is_signed_s32 1
-#define dh_is_signed_i64 0
-#define dh_is_signed_s64 1
-#define dh_is_signed_f16 0
-#define dh_is_signed_f32 0
-#define dh_is_signed_f64 0
-#define dh_is_signed_tl  0
-#define dh_is_signed_int 1
-/* ??? This is highly specific to the host cpu.  There are even special
-   extension instructions that may be required, e.g. ia64's addp4.  But
-   for now we don't support any 64-bit targets with 32-bit pointers.  */
-#define dh_is_signed_ptr 0
-#define dh_is_signed_cptr dh_is_signed_ptr
-#define dh_is_signed_env dh_is_signed_ptr
-#define dh_is_signed(t) dh_is_signed_##t
+#define dh_typecode_void 0
+#define dh_typecode_noreturn 0
+#define dh_typecode_i32 2
+#define dh_typecode_s32 3
+#define dh_typecode_i64 4
+#define dh_typecode_s64 5
+#define dh_typecode_ptr 6
+#define dh_typecode_i128 7
+#define dh_typecode_int dh_typecode_s32
+#define dh_typecode_f16 dh_typecode_i32
+#define dh_typecode_f32 dh_typecode_i32
+#define dh_typecode_f64 dh_typecode_i64
+#define dh_typecode_cptr dh_typecode_ptr
+#define dh_typecode_env dh_typecode_ptr
+#define dh_typecode(t) dh_typecode_##t
 
 #define dh_callflag_i32  0
-#define dh_callflag_s32  0
-#define dh_callflag_int  0
 #define dh_callflag_i64  0
-#define dh_callflag_s64  0
-#define dh_callflag_f16  0
-#define dh_callflag_f32  0
-#define dh_callflag_f64  0
+#define dh_callflag_i128 0
 #define dh_callflag_ptr  0
-#define dh_callflag_cptr dh_callflag_ptr
 #define dh_callflag_void 0
 #define dh_callflag_noreturn TCG_CALL_NO_RETURN
 #define dh_callflag(t) glue(dh_callflag_, dh_alias(t))
 
-#define dh_sizemask(t, n) \
-  ((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1)))
+#define dh_typemask(t, n)  (dh_typecode(t) << (n * 3))
 
 #define dh_arg(t, n) \
   glue(glue(tcgv_, dh_alias(t)), _temp)(glue(arg, n))
 #define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \
     DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7)
 
-/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
+/* MAX_CALL_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
 
 #endif /* EXEC_HELPER_HEAD_H */
diff --git a/include/exec/helper-info.c.inc b/include/exec/helper-info.c.inc
new file mode 100644 (file)
index 0000000..530d2e6
--- /dev/null
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Helper file for declaring TCG helper functions.
+ * This one expands info structures for tcg helpers.
+ * Define HELPER_H for the header file to be expanded.
+ */
+
+#include "tcg/tcg.h"
+#include "tcg/helper-info.h"
+#include "exec/helper-head.h"
+
+/*
+ * Need one more level of indirection before stringification
+ * to get all the macros expanded first.
+ */
+#define str(s) #s
+
+#define DEF_HELPER_FLAGS_0(NAME, FLAGS, RET)                            \
+    TCGHelperInfo glue(helper_info_, NAME) = {                          \
+        .func = HELPER(NAME), .name = str(NAME),                        \
+        .flags = FLAGS | dh_callflag(RET),                              \
+        .typemask = dh_typemask(RET, 0)                                 \
+    };
+
+#define DEF_HELPER_FLAGS_1(NAME, FLAGS, RET, T1)                        \
+    TCGHelperInfo glue(helper_info_, NAME) = {                          \
+        .func = HELPER(NAME), .name = str(NAME),                        \
+        .flags = FLAGS | dh_callflag(RET),                              \
+        .typemask = dh_typemask(RET, 0) | dh_typemask(T1, 1)            \
+    };
+
+#define DEF_HELPER_FLAGS_2(NAME, FLAGS, RET, T1, T2)                    \
+    TCGHelperInfo glue(helper_info_, NAME) = {                          \
+        .func = HELPER(NAME), .name = str(NAME),                        \
+        .flags = FLAGS | dh_callflag(RET),                              \
+        .typemask = dh_typemask(RET, 0) | dh_typemask(T1, 1)            \
+                  | dh_typemask(T2, 2)                                  \
+    };
+
+#define DEF_HELPER_FLAGS_3(NAME, FLAGS, RET, T1, T2, T3)                \
+    TCGHelperInfo glue(helper_info_, NAME) = {                          \
+        .func = HELPER(NAME), .name = str(NAME),                        \
+        .flags = FLAGS | dh_callflag(RET),                              \
+        .typemask = dh_typemask(RET, 0) | dh_typemask(T1, 1)            \
+                  | dh_typemask(T2, 2) | dh_typemask(T3, 3)             \
+    };
+
+#define DEF_HELPER_FLAGS_4(NAME, FLAGS, RET, T1, T2, T3, T4)            \
+    TCGHelperInfo glue(helper_info_, NAME) = {                          \
+        .func = HELPER(NAME), .name = str(NAME),                        \
+        .flags = FLAGS | dh_callflag(RET),                              \
+        .typemask = dh_typemask(RET, 0) | dh_typemask(T1, 1)            \
+                  | dh_typemask(T2, 2) | dh_typemask(T3, 3)             \
+                  | dh_typemask(T4, 4)                                  \
+    };
+
+#define DEF_HELPER_FLAGS_5(NAME, FLAGS, RET, T1, T2, T3, T4, T5)        \
+    TCGHelperInfo glue(helper_info_, NAME) = {                          \
+        .func = HELPER(NAME), .name = str(NAME),                        \
+        .flags = FLAGS | dh_callflag(RET),                              \
+        .typemask = dh_typemask(RET, 0) | dh_typemask(T1, 1)            \
+                  | dh_typemask(T2, 2) | dh_typemask(T3, 3)             \
+                  | dh_typemask(T4, 4) | dh_typemask(T5, 5)             \
+    };
+
+#define DEF_HELPER_FLAGS_6(NAME, FLAGS, RET, T1, T2, T3, T4, T5, T6)    \
+    TCGHelperInfo glue(helper_info_, NAME) = {                          \
+        .func = HELPER(NAME), .name = str(NAME),                        \
+        .flags = FLAGS | dh_callflag(RET),                              \
+        .typemask = dh_typemask(RET, 0) | dh_typemask(T1, 1)            \
+                  | dh_typemask(T2, 2) | dh_typemask(T3, 3)             \
+                  | dh_typemask(T4, 4) | dh_typemask(T5, 5)             \
+                  | dh_typemask(T6, 6)                                  \
+    };
+
+#define DEF_HELPER_FLAGS_7(NAME, FLAGS, RET, T1, T2, T3, T4, T5, T6, T7) \
+    TCGHelperInfo glue(helper_info_, NAME) = {                          \
+        .func = HELPER(NAME), .name = str(NAME),                        \
+        .flags = FLAGS | dh_callflag(RET),                              \
+        .typemask = dh_typemask(RET, 0) | dh_typemask(T1, 1)            \
+                  | dh_typemask(T2, 2) | dh_typemask(T3, 3)             \
+                  | dh_typemask(T4, 4) | dh_typemask(T5, 5)             \
+                  | dh_typemask(T6, 6) | dh_typemask(T7, 7)             \
+    };
+
+#include HELPER_H
+
+#undef str
+#undef DEF_HELPER_FLAGS_0
+#undef DEF_HELPER_FLAGS_1
+#undef DEF_HELPER_FLAGS_2
+#undef DEF_HELPER_FLAGS_3
+#undef DEF_HELPER_FLAGS_4
+#undef DEF_HELPER_FLAGS_5
+#undef DEF_HELPER_FLAGS_6
+#undef DEF_HELPER_FLAGS_7
diff --git a/include/exec/helper-proto-common.h b/include/exec/helper-proto-common.h
new file mode 100644 (file)
index 0000000..8b67170
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Helper file for declaring TCG helper functions.
+ * This one expands prototypes for the helper functions.
+ */
+
+#ifndef HELPER_PROTO_COMMON_H
+#define HELPER_PROTO_COMMON_H
+
+#include "qemu/atomic128.h"  /* for HAVE_CMPXCHG128 */
+
+#define HELPER_H "accel/tcg/tcg-runtime.h"
+#include "exec/helper-proto.h.inc"
+#undef  HELPER_H
+
+#define HELPER_H "accel/tcg/plugin-helpers.h"
+#include "exec/helper-proto.h.inc"
+#undef  HELPER_H
+
+#endif /* HELPER_PROTO_COMMON_H */
index 659f9298e8fe2935cd3ea9931d448dd9b6f88c69..6935cb4f16f8e2a5372ddf99c58d8fdff4e6ea4b 100644 (file)
@@ -1,56 +1,16 @@
-/* Helper file for declaring TCG helper functions.
-   This one expands prototypes for the helper functions.  */
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Helper file for declaring TCG helper functions.
+ * This one expands prototypes for the helper functions.
+ */
 
 #ifndef HELPER_PROTO_H
 #define HELPER_PROTO_H
 
-#include "exec/helper-head.h"
+#include "exec/helper-proto-common.h"
 
-#define DEF_HELPER_FLAGS_0(name, flags, ret) \
-dh_ctype(ret) HELPER(name) (void);
-
-#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1));
-
-#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2));
-
-#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3));
-
-#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-                                   dh_ctype(t4));
-
-#define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-                            dh_ctype(t4), dh_ctype(t5));
-
-#define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-                            dh_ctype(t4), dh_ctype(t5), dh_ctype(t6));
-
-#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
-                            dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
-                            dh_ctype(t7));
-
-#define IN_HELPER_PROTO
-
-#include "helper.h"
-#include "trace/generated-helpers.h"
-#include "tcg-runtime.h"
-#include "plugin-helpers.h"
-
-#undef IN_HELPER_PROTO
-
-#undef DEF_HELPER_FLAGS_0
-#undef DEF_HELPER_FLAGS_1
-#undef DEF_HELPER_FLAGS_2
-#undef DEF_HELPER_FLAGS_3
-#undef DEF_HELPER_FLAGS_4
-#undef DEF_HELPER_FLAGS_5
-#undef DEF_HELPER_FLAGS_6
-#undef DEF_HELPER_FLAGS_7
+#define HELPER_H "helper.h"
+#include "exec/helper-proto.h.inc"
+#undef  HELPER_H
 
 #endif /* HELPER_PROTO_H */
diff --git a/include/exec/helper-proto.h.inc b/include/exec/helper-proto.h.inc
new file mode 100644 (file)
index 0000000..c3aa666
--- /dev/null
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Helper file for declaring TCG helper functions.
+ * This one expands prototypes for the helper functions.
+ * Define HELPER_H for the header file to be expanded.
+ */
+
+#include "exec/helper-head.h"
+
+/*
+ * Work around an issue with --enable-lto, in which GCC's ipa-split pass
+ * decides to split out the noreturn code paths that raise an exception,
+ * taking the __builtin_return_address() along into the new function,
+ * where it no longer computes a value that returns to TCG generated code.
+ * Despite the name, the noinline attribute affects splitter, so this
+ * prevents the optimization in question.  Given that helpers should not
+ * otherwise be called directly, this should not have any other visible effect.
+ *
+ * See https://gitlab.com/qemu-project/qemu/-/issues/1454
+ */
+#define DEF_HELPER_ATTR  __attribute__((noinline))
+
+#define DEF_HELPER_FLAGS_0(name, flags, ret) \
+dh_ctype(ret) HELPER(name) (void) DEF_HELPER_ATTR;
+
+#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1)) DEF_HELPER_ATTR;
+
+#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)) DEF_HELPER_ATTR;
+
+#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), \
+                            dh_ctype(t3)) DEF_HELPER_ATTR;
+
+#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
+                            dh_ctype(t4)) DEF_HELPER_ATTR;
+
+#define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
+                            dh_ctype(t4), dh_ctype(t5)) DEF_HELPER_ATTR;
+
+#define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
+                            dh_ctype(t4), dh_ctype(t5), \
+                            dh_ctype(t6)) DEF_HELPER_ATTR;
+
+#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
+                            dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
+                            dh_ctype(t7)) DEF_HELPER_ATTR;
+
+#define IN_HELPER_PROTO
+
+#include HELPER_H
+
+#undef IN_HELPER_PROTO
+
+#undef DEF_HELPER_FLAGS_0
+#undef DEF_HELPER_FLAGS_1
+#undef DEF_HELPER_FLAGS_2
+#undef DEF_HELPER_FLAGS_3
+#undef DEF_HELPER_FLAGS_4
+#undef DEF_HELPER_FLAGS_5
+#undef DEF_HELPER_FLAGS_6
+#undef DEF_HELPER_FLAGS_7
+#undef DEF_HELPER_ATTR
diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h
deleted file mode 100644 (file)
index 2787050..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Helper file for declaring TCG helper functions.
-   This one defines data structures private to tcg.c.  */
-
-#ifndef HELPER_TCG_H
-#define HELPER_TCG_H
-
-#include "exec/helper-head.h"
-
-/* Need one more level of indirection before stringification
-   to get all the macros expanded first.  */
-#define str(s) #s
-
-#define DEF_HELPER_FLAGS_0(NAME, FLAGS, ret) \
-  { .func = HELPER(NAME), .name = str(NAME), \
-    .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) },
-
-#define DEF_HELPER_FLAGS_1(NAME, FLAGS, ret, t1) \
-  { .func = HELPER(NAME), .name = str(NAME), \
-    .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) },
-
-#define DEF_HELPER_FLAGS_2(NAME, FLAGS, ret, t1, t2) \
-  { .func = HELPER(NAME), .name = str(NAME), \
-    .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) },
-
-#define DEF_HELPER_FLAGS_3(NAME, FLAGS, ret, t1, t2, t3) \
-  { .func = HELPER(NAME), .name = str(NAME), \
-    .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) },
-
-#define DEF_HELPER_FLAGS_4(NAME, FLAGS, ret, t1, t2, t3, t4) \
-  { .func = HELPER(NAME), .name = str(NAME), \
-    .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) },
-
-#define DEF_HELPER_FLAGS_5(NAME, FLAGS, ret, t1, t2, t3, t4, t5) \
-  { .func = HELPER(NAME), .name = str(NAME), \
-    .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
-    | dh_sizemask(t5, 5) },
-
-#define DEF_HELPER_FLAGS_6(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6) \
-  { .func = HELPER(NAME), .name = str(NAME), \
-    .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
-    | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) },
-
-#define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \
-  { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
-    | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) | dh_sizemask(t7, 7) },
-
-#include "helper.h"
-#include "trace/generated-helpers.h"
-#include "tcg-runtime.h"
-#include "plugin-helpers.h"
-
-#undef str
-#undef DEF_HELPER_FLAGS_0
-#undef DEF_HELPER_FLAGS_1
-#undef DEF_HELPER_FLAGS_2
-#undef DEF_HELPER_FLAGS_3
-#undef DEF_HELPER_FLAGS_4
-#undef DEF_HELPER_FLAGS_5
-#undef DEF_HELPER_FLAGS_6
-#undef DEF_HELPER_FLAGS_7
-
-#endif /* HELPER_TCG_H */
index 8f16d179a88599dcd8b82aaafbc80193f4d3f846..50fbb2d96c86401bc3780a4f314b8cf5b8f36694 100644 (file)
@@ -10,7 +10,7 @@
 
 typedef uint64_t hwaddr;
 #define HWADDR_MAX UINT64_MAX
-#define TARGET_FMT_plx "%016" PRIx64
+#define HWADDR_FMT_plx "%016" PRIx64
 #define HWADDR_PRId PRId64
 #define HWADDR_PRIi PRIi64
 #define HWADDR_PRIo PRIo64
index e02fff5de165c979a46769acf1ac04c8cf9f9475..4a7375a45fb5520936ef3ff6493605e1b3b97e76 100644 (file)
  */
 static inline void log_cpu_state(CPUState *cpu, int flags)
 {
-    QemuLogFile *logfile;
-
-    if (qemu_log_enabled()) {
-        rcu_read_lock();
-        logfile = qatomic_rcu_read(&qemu_logfile);
-        if (logfile) {
-            cpu_dump_state(cpu, logfile->fd, flags);
-        }
-        rcu_read_unlock();
+    FILE *f = qemu_log_trylock();
+    if (f) {
+        cpu_dump_state(cpu, f, flags);
+        qemu_log_unlock(f);
     }
 }
 
@@ -42,43 +37,4 @@ static inline void log_cpu_state_mask(int mask, CPUState *cpu, int flags)
     }
 }
 
-#ifdef NEED_CPU_H
-/* disas() and target_disas() to qemu_logfile: */
-static inline void log_target_disas(CPUState *cpu, target_ulong start,
-                                    target_ulong len)
-{
-    QemuLogFile *logfile;
-    rcu_read_lock();
-    logfile = qatomic_rcu_read(&qemu_logfile);
-    if (logfile) {
-        target_disas(logfile->fd, cpu, start, len);
-    }
-    rcu_read_unlock();
-}
-
-static inline void log_disas(void *code, unsigned long size)
-{
-    QemuLogFile *logfile;
-    rcu_read_lock();
-    logfile = qatomic_rcu_read(&qemu_logfile);
-    if (logfile) {
-        disas(logfile->fd, code, size);
-    }
-    rcu_read_unlock();
-}
-
-#if defined(CONFIG_USER_ONLY)
-/* page_dump() output to the log file: */
-static inline void log_page_dump(const char *operation)
-{
-    FILE *logfile = qemu_log_lock();
-    if (logfile) {
-        qemu_log("page layout changed following %s\n", operation);
-        page_dump(logfile);
-    }
-    qemu_log_unlock(logfile);
-}
-#endif
-#endif
-
 #endif
index 95f2d20d55b8266903eb430084a2f8b7c39759e7..d04170aa27ae4c816395ccce275a2e2a0dfcd002 100644 (file)
@@ -29,12 +29,27 @@ typedef struct MemTxAttrs {
      * "didn't specify" if necessary.
      */
     unsigned int unspecified:1;
-    /* ARM/AMBA: TrustZone Secure access
+    /*
+     * ARM/AMBA: TrustZone Secure access
      * x86: System Management Mode access
      */
     unsigned int secure:1;
+    /*
+     * ARM: ArmSecuritySpace.  This partially overlaps secure, but it is
+     * easier to have both fields to assist code that does not understand
+     * ARMv9 RME, or no specific knowledge of ARM at all (e.g. pflash).
+     */
+    unsigned int space:2;
     /* Memory access is usermode (unprivileged) */
     unsigned int user:1;
+    /*
+     * Bus interconnect and peripherals can access anything (memories,
+     * devices) by default. By setting the 'memory' bit, bus transaction
+     * are restricted to "normal" memories (per the AMBA documentation)
+     * versus devices. Access to devices will be logged and rejected
+     * (see MEMTX_ACCESS_ERROR).
+     */
+    unsigned int memory:1;
     /* Requester ID (for MSI for example) */
     unsigned int requester_id:16;
     /* Invert endianness for this page */
@@ -66,6 +81,7 @@ typedef struct MemTxAttrs {
 #define MEMTX_OK 0
 #define MEMTX_ERROR             (1U << 0) /* device returned an error */
 #define MEMTX_DECODE_ERROR      (1U << 1) /* nothing at that address */
+#define MEMTX_ACCESS_ERROR      (1U << 2) /* access denied */
 typedef uint32_t MemTxResult;
 
 #endif
index 529d07b02dd4e870cc491ce45c09cd4e58121719..a86dc6743a620f021c0a34370a2affbdad25ae99 100644 (file)
@@ -19,12 +19,16 @@ typedef enum MemOp {
     MO_16    = 1,
     MO_32    = 2,
     MO_64    = 3,
-    MO_SIZE  = 3,   /* Mask for the above.  */
+    MO_128   = 4,
+    MO_256   = 5,
+    MO_512   = 6,
+    MO_1024  = 7,
+    MO_SIZE  = 0x07,   /* Mask for the above.  */
 
-    MO_SIGN  = 4,   /* Sign-extended, otherwise zero-extended.  */
+    MO_SIGN  = 0x08,   /* Sign-extended, otherwise zero-extended.  */
 
-    MO_BSWAP = 8,   /* Host reverse endian.  */
-#ifdef HOST_WORDS_BIGENDIAN
+    MO_BSWAP = 0x10,   /* Host reverse endian.  */
+#if HOST_BIG_ENDIAN
     MO_LE    = MO_BSWAP,
     MO_BE    = 0,
 #else
@@ -32,7 +36,7 @@ typedef enum MemOp {
     MO_BE    = MO_BSWAP,
 #endif
 #ifdef NEED_CPU_H
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
     MO_TE    = MO_BE,
 #else
     MO_TE    = MO_LE,
@@ -43,8 +47,6 @@ typedef enum MemOp {
      * MO_UNALN accesses are never checked for alignment.
      * MO_ALIGN accesses will result in a call to the CPU's
      * do_unaligned_access hook if the guest address is not aligned.
-     * The default depends on whether the target CPU defines
-     * TARGET_ALIGNED_ONLY.
      *
      * Some architectures (e.g. ARMv8) need the address which is aligned
      * to a size more than the size of the memory access.
@@ -59,51 +61,88 @@ typedef enum MemOp {
      * - an alignment to a specified size, which may be more or less than
      *   the access size (MO_ALIGN_x where 'x' is a size in bytes);
      */
-    MO_ASHIFT = 4,
-    MO_AMASK = 7 << MO_ASHIFT,
-#ifdef NEED_CPU_H
-#ifdef TARGET_ALIGNED_ONLY
-    MO_ALIGN = 0,
-    MO_UNALN = MO_AMASK,
-#else
-    MO_ALIGN = MO_AMASK,
-    MO_UNALN = 0,
-#endif
-#endif
+    MO_ASHIFT = 5,
+    MO_AMASK = 0x7 << MO_ASHIFT,
+    MO_UNALN    = 0,
     MO_ALIGN_2  = 1 << MO_ASHIFT,
     MO_ALIGN_4  = 2 << MO_ASHIFT,
     MO_ALIGN_8  = 3 << MO_ASHIFT,
     MO_ALIGN_16 = 4 << MO_ASHIFT,
     MO_ALIGN_32 = 5 << MO_ASHIFT,
     MO_ALIGN_64 = 6 << MO_ASHIFT,
+    MO_ALIGN    = MO_AMASK,
+
+    /*
+     * MO_ATOM_* describes the atomicity requirements of the operation:
+     * MO_ATOM_IFALIGN: the operation must be single-copy atomic if it
+     *    is aligned; if unaligned there is no atomicity.
+     * MO_ATOM_IFALIGN_PAIR: the entire operation may be considered to
+     *    be a pair of half-sized operations which are packed together
+     *    for convenience, with single-copy atomicity on each half if
+     *    the half is aligned.
+     *    This is the atomicity e.g. of Arm pre-FEAT_LSE2 LDP.
+     * MO_ATOM_WITHIN16: the operation is single-copy atomic, even if it
+     *    is unaligned, so long as it does not cross a 16-byte boundary;
+     *    if it crosses a 16-byte boundary there is no atomicity.
+     *    This is the atomicity e.g. of Arm FEAT_LSE2 LDR.
+     * MO_ATOM_WITHIN16_PAIR: the entire operation is single-copy atomic,
+     *    if it happens to be within a 16-byte boundary, otherwise it
+     *    devolves to a pair of half-sized MO_ATOM_WITHIN16 operations.
+     *    Depending on alignment, one or both will be single-copy atomic.
+     *    This is the atomicity e.g. of Arm FEAT_LSE2 LDP.
+     * MO_ATOM_SUBALIGN: the operation is single-copy atomic by parts
+     *    by the alignment.  E.g. if the address is 0 mod 4, then each
+     *    4-byte subobject is single-copy atomic.
+     *    This is the atomicity e.g. of IBM Power.
+     * MO_ATOM_NONE: the operation has no atomicity requirements.
+     *
+     * Note the default (i.e. 0) value is single-copy atomic to the
+     * size of the operation, if aligned.  This retains the behaviour
+     * from before this field was introduced.
+     */
+    MO_ATOM_SHIFT         = 8,
+    MO_ATOM_IFALIGN       = 0 << MO_ATOM_SHIFT,
+    MO_ATOM_IFALIGN_PAIR  = 1 << MO_ATOM_SHIFT,
+    MO_ATOM_WITHIN16      = 2 << MO_ATOM_SHIFT,
+    MO_ATOM_WITHIN16_PAIR = 3 << MO_ATOM_SHIFT,
+    MO_ATOM_SUBALIGN      = 4 << MO_ATOM_SHIFT,
+    MO_ATOM_NONE          = 5 << MO_ATOM_SHIFT,
+    MO_ATOM_MASK          = 7 << MO_ATOM_SHIFT,
 
     /* Combinations of the above, for ease of use.  */
     MO_UB    = MO_8,
     MO_UW    = MO_16,
     MO_UL    = MO_32,
+    MO_UQ    = MO_64,
+    MO_UO    = MO_128,
     MO_SB    = MO_SIGN | MO_8,
     MO_SW    = MO_SIGN | MO_16,
     MO_SL    = MO_SIGN | MO_32,
-    MO_Q     = MO_64,
+    MO_SQ    = MO_SIGN | MO_64,
+    MO_SO    = MO_SIGN | MO_128,
 
     MO_LEUW  = MO_LE | MO_UW,
     MO_LEUL  = MO_LE | MO_UL,
+    MO_LEUQ  = MO_LE | MO_UQ,
     MO_LESW  = MO_LE | MO_SW,
     MO_LESL  = MO_LE | MO_SL,
-    MO_LEQ   = MO_LE | MO_Q,
+    MO_LESQ  = MO_LE | MO_SQ,
 
     MO_BEUW  = MO_BE | MO_UW,
     MO_BEUL  = MO_BE | MO_UL,
+    MO_BEUQ  = MO_BE | MO_UQ,
     MO_BESW  = MO_BE | MO_SW,
     MO_BESL  = MO_BE | MO_SL,
-    MO_BEQ   = MO_BE | MO_Q,
+    MO_BESQ  = MO_BE | MO_SQ,
 
 #ifdef NEED_CPU_H
     MO_TEUW  = MO_TE | MO_UW,
     MO_TEUL  = MO_TE | MO_UL,
+    MO_TEUQ  = MO_TE | MO_UQ,
+    MO_TEUO  = MO_TE | MO_UO,
     MO_TESW  = MO_TE | MO_SW,
     MO_TESL  = MO_TE | MO_SL,
-    MO_TEQ   = MO_TE | MO_Q,
+    MO_TESQ  = MO_TE | MO_SQ,
 #endif
 
     MO_SSIZE = MO_SIZE | MO_SIGN,
diff --git a/include/exec/memopidx.h b/include/exec/memopidx.h
new file mode 100644 (file)
index 0000000..eb7f159
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Combine the MemOp and mmu_idx parameters into a single value.
+ *
+ * Authors:
+ *  Richard Henderson <rth@twiddle.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef EXEC_MEMOPIDX_H
+#define EXEC_MEMOPIDX_H
+
+#include "exec/memop.h"
+
+typedef uint32_t MemOpIdx;
+
+/**
+ * make_memop_idx
+ * @op: memory operation
+ * @idx: mmu index
+ *
+ * Encode these values into a single parameter.
+ */
+static inline MemOpIdx make_memop_idx(MemOp op, unsigned idx)
+{
+#ifdef CONFIG_DEBUG_TCG
+    assert(idx <= 15);
+#endif
+    return (op << 4) | idx;
+}
+
+/**
+ * get_memop
+ * @oi: combined op/idx parameter
+ *
+ * Extract the memory operation from the combined value.
+ */
+static inline MemOp get_memop(MemOpIdx oi)
+{
+    return oi >> 4;
+}
+
+/**
+ * get_mmuidx
+ * @oi: combined op/idx parameter
+ *
+ * Extract the mmu index from the combined value.
+ */
+static inline unsigned get_mmuidx(MemOpIdx oi)
+{
+    return oi & 15;
+}
+
+#endif
index 9fcc2af25c884d76ede07cee426452728c80ac67..100c1237ac21520294070c4585d4f4806ddd24b4 100644 (file)
@@ -38,10 +38,6 @@ void flatview_unref(FlatView *view);
 
 extern const MemoryRegionOps unassigned_mem_ops;
 
-bool memory_region_access_valid(MemoryRegion *mr, hwaddr addr,
-                                unsigned size, bool is_write,
-                                MemTxAttrs attrs);
-
 void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section);
 AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv);
 void address_space_dispatch_compact(AddressSpaceDispatch *d);
index 0f3e6bcd5e74a9969913bccd1d93a073f190e55b..831f7c996d9da49cdf9884fdeffa32959865cb07 100644 (file)
@@ -24,6 +24,7 @@
 #include "qemu/bswap.h"
 #include "qemu/queue.h"
 #include "qemu/int128.h"
+#include "qemu/range.h"
 #include "qemu/notify.h"
 #include "qom/object.h"
 #include "qemu/rcu.h"
 #define MAX_PHYS_ADDR_SPACE_BITS 62
 #define MAX_PHYS_ADDR            (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1)
 
-#define TYPE_MEMORY_REGION "qemu:memory-region"
+#define TYPE_MEMORY_REGION "memory-region"
 DECLARE_INSTANCE_CHECKER(MemoryRegion, MEMORY_REGION,
                          TYPE_MEMORY_REGION)
 
-#define TYPE_IOMMU_MEMORY_REGION "qemu:iommu-memory-region"
+#define TYPE_IOMMU_MEMORY_REGION "iommu-memory-region"
 typedef struct IOMMUMemoryRegionClass IOMMUMemoryRegionClass;
 DECLARE_OBJ_CHECKERS(IOMMUMemoryRegion, IOMMUMemoryRegionClass,
                      IOMMU_MEMORY_REGION, TYPE_IOMMU_MEMORY_REGION)
 
+#define TYPE_RAM_DISCARD_MANAGER "qemu:ram-discard-manager"
+typedef struct RamDiscardManagerClass RamDiscardManagerClass;
+typedef struct RamDiscardManager RamDiscardManager;
+DECLARE_OBJ_CHECKERS(RamDiscardManager, RamDiscardManagerClass,
+                     RAM_DISCARD_MANAGER, TYPE_RAM_DISCARD_MANAGER);
+
 #ifdef CONFIG_FUZZ
 void fuzz_dma_read_cb(size_t addr,
                       size_t len,
-                      MemoryRegion *mr,
-                      bool is_write);
+                      MemoryRegion *mr);
 #else
 static inline void fuzz_dma_read_cb(size_t addr,
                                     size_t len,
-                                    MemoryRegion *mr,
-                                    bool is_write)
+                                    MemoryRegion *mr)
 {
     /* Do Nothing */
 }
 #endif
 
-extern bool global_dirty_log;
+/* Possible bits for global_dirty_log_{start|stop} */
+
+/* Dirty tracking enabled because migration is running */
+#define GLOBAL_DIRTY_MIGRATION  (1U << 0)
+
+/* Dirty tracking enabled because measuring dirty rate */
+#define GLOBAL_DIRTY_DIRTY_RATE (1U << 1)
+
+/* Dirty tracking enabled because dirty limit */
+#define GLOBAL_DIRTY_LIMIT      (1U << 2)
+
+#define GLOBAL_DIRTY_MASK  (0x7)
+
+extern unsigned int global_dirty_tracking;
 
 typedef struct MemoryRegionOps MemoryRegionOps;
 
 struct ReservedRegion {
-    hwaddr low;
-    hwaddr high;
+    Range range;
     unsigned type;
 };
 
+/**
+ * struct MemoryRegionSection: describes a fragment of a #MemoryRegion
+ *
+ * @mr: the region, or %NULL if empty
+ * @fv: the flat view of the address space the region is mapped in
+ * @offset_within_region: the beginning of the section, relative to @mr's start
+ * @size: the size of the section; will not exceed @mr's boundaries
+ * @offset_within_address_space: the address of the first byte of the section
+ *     relative to the region's address space
+ * @readonly: writes to this section are ignored
+ * @nonvolatile: this section is non-volatile
+ * @unmergeable: this section should not get merged with adjacent sections
+ */
+struct MemoryRegionSection {
+    Int128 size;
+    MemoryRegion *mr;
+    FlatView *fv;
+    hwaddr offset_within_region;
+    hwaddr offset_within_address_space;
+    bool readonly;
+    bool nonvolatile;
+    bool unmergeable;
+};
+
 typedef struct IOMMUTLBEntry IOMMUTLBEntry;
 
 /* See address_space_translate: bit 0 is read, bit 1 is write.  */
@@ -90,6 +131,32 @@ struct IOMMUTLBEntry {
 /*
  * Bitmap for different IOMMUNotifier capabilities. Each notifier can
  * register with one or multiple IOMMU Notifier capability bit(s).
+ *
+ * Normally there're two use cases for the notifiers:
+ *
+ *   (1) When the device needs accurate synchronizations of the vIOMMU page
+ *       tables, it needs to register with both MAP|UNMAP notifies (which
+ *       is defined as IOMMU_NOTIFIER_IOTLB_EVENTS below).
+ *
+ *       Regarding to accurate synchronization, it's when the notified
+ *       device maintains a shadow page table and must be notified on each
+ *       guest MAP (page table entry creation) and UNMAP (invalidation)
+ *       events (e.g. VFIO). Both notifications must be accurate so that
+ *       the shadow page table is fully in sync with the guest view.
+ *
+ *   (2) When the device doesn't need accurate synchronizations of the
+ *       vIOMMU page tables, it needs to register only with UNMAP or
+ *       DEVIOTLB_UNMAP notifies.
+ *
+ *       It's when the device maintains a cache of IOMMU translations
+ *       (IOTLB) and is able to fill that cache by requesting translations
+ *       from the vIOMMU through a protocol similar to ATS (Address
+ *       Translation Service).
+ *
+ *       Note that in this mode the vIOMMU will not maintain a shadowed
+ *       page table for the address space, and the UNMAP messages can cover
+ *       more than the pages that used to get mapped.  The IOMMU notifiee
+ *       should be able to take care of over-sized invalidations.
  */
 typedef enum {
     IOMMU_NOTIFIER_NONE = 0,
@@ -97,9 +164,14 @@ typedef enum {
     IOMMU_NOTIFIER_UNMAP = 0x1,
     /* Notify entry changes (newly created entries) */
     IOMMU_NOTIFIER_MAP = 0x2,
+    /* Notify changes on device IOTLB entries */
+    IOMMU_NOTIFIER_DEVIOTLB_UNMAP = 0x04,
 } IOMMUNotifierFlag;
 
-#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP)
+#define IOMMU_NOTIFIER_IOTLB_EVENTS (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP)
+#define IOMMU_NOTIFIER_DEVIOTLB_EVENTS IOMMU_NOTIFIER_DEVIOTLB_UNMAP
+#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_IOTLB_EVENTS | \
+                            IOMMU_NOTIFIER_DEVIOTLB_EVENTS)
 
 struct IOMMUNotifier;
 typedef void (*IOMMUNotify)(struct IOMMUNotifier *notifier,
@@ -116,6 +188,11 @@ struct IOMMUNotifier {
 };
 typedef struct IOMMUNotifier IOMMUNotifier;
 
+typedef struct IOMMUTLBEvent {
+    IOMMUNotifierFlag type;
+    IOMMUTLBEntry entry;
+} IOMMUTLBEvent;
+
 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
 #define RAM_PREALLOC   (1 << 0)
 
@@ -123,7 +200,7 @@ typedef struct IOMMUNotifier IOMMUNotifier;
 #define RAM_SHARED     (1 << 1)
 
 /* Only a portion of RAM (used_length) is actually used, and migrated.
- * This used_length size can change across reboots.
+ * Resizing RAM while migrating can result in the migration being canceled.
  */
 #define RAM_RESIZEABLE (1 << 2)
 
@@ -139,6 +216,33 @@ typedef struct IOMMUNotifier IOMMUNotifier;
 /* RAM is a persistent kind memory */
 #define RAM_PMEM (1 << 5)
 
+
+/*
+ * UFFDIO_WRITEPROTECT is used on this RAMBlock to
+ * support 'write-tracking' migration type.
+ * Implies ram_state->ram_wt_enabled.
+ */
+#define RAM_UF_WRITEPROTECT (1 << 6)
+
+/*
+ * RAM is mmap-ed with MAP_NORESERVE. When set, reserving swap space (or huge
+ * pages if applicable) is skipped: will bail out if not supported. When not
+ * set, the OS will do the reservation, if supported for the memory type.
+ */
+#define RAM_NORESERVE (1 << 7)
+
+/* RAM that isn't accessible through normal means. */
+#define RAM_PROTECTED (1 << 8)
+
+/* RAM is an mmap-ed named file */
+#define RAM_NAMED_FILE (1 << 9)
+
+/* RAM is mmap-ed read-only */
+#define RAM_READONLY (1 << 10)
+
+/* RAM FD is opened read-only */
+#define RAM_READONLY_FD (1 << 11)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
                                        hwaddr start, hwaddr end,
@@ -236,7 +340,7 @@ enum IOMMUMemoryRegionAttr {
  * The IOMMU implementation must use the IOMMU notifier infrastructure
  * to report whenever mappings are changed, by calling
  * memory_region_notify_iommu() (or, if necessary, by calling
- * memory_region_notify_one() for each registered notifier).
+ * memory_region_notify_iommu_one() for each registered notifier).
  *
  * Conceptually an IOMMU provides a mapping from input address
  * to an output TLB entry. If the IOMMU is aware of memory transaction
@@ -423,8 +527,254 @@ struct IOMMUMemoryRegionClass {
      int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu,
                                      uint64_t page_size_mask,
                                      Error **errp);
+    /**
+     * @iommu_set_iova_ranges:
+     *
+     * Propagate information about the usable IOVA ranges for a given IOMMU
+     * memory region. Used for example to propagate host physical device
+     * reserved memory region constraints to the virtual IOMMU.
+     *
+     * Optional method: if this method is not provided, then the default IOVA
+     * aperture is used.
+     *
+     * @iommu: the IOMMUMemoryRegion
+     *
+     * @iova_ranges: list of ordered IOVA ranges (at least one range)
+     *
+     * Returns 0 on success, or a negative error. In case of failure, the error
+     * object must be created.
+     */
+     int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu,
+                                  GList *iova_ranges,
+                                  Error **errp);
+};
+
+typedef struct RamDiscardListener RamDiscardListener;
+typedef int (*NotifyRamPopulate)(RamDiscardListener *rdl,
+                                 MemoryRegionSection *section);
+typedef void (*NotifyRamDiscard)(RamDiscardListener *rdl,
+                                 MemoryRegionSection *section);
+
+struct RamDiscardListener {
+    /*
+     * @notify_populate:
+     *
+     * Notification that previously discarded memory is about to get populated.
+     * Listeners are able to object. If any listener objects, already
+     * successfully notified listeners are notified about a discard again.
+     *
+     * @rdl: the #RamDiscardListener getting notified
+     * @section: the #MemoryRegionSection to get populated. The section
+     *           is aligned within the memory region to the minimum granularity
+     *           unless it would exceed the registered section.
+     *
+     * Returns 0 on success. If the notification is rejected by the listener,
+     * an error is returned.
+     */
+    NotifyRamPopulate notify_populate;
+
+    /*
+     * @notify_discard:
+     *
+     * Notification that previously populated memory was discarded successfully
+     * and listeners should drop all references to such memory and prevent
+     * new population (e.g., unmap).
+     *
+     * @rdl: the #RamDiscardListener getting notified
+     * @section: the #MemoryRegionSection to get populated. The section
+     *           is aligned within the memory region to the minimum granularity
+     *           unless it would exceed the registered section.
+     */
+    NotifyRamDiscard notify_discard;
+
+    /*
+     * @double_discard_supported:
+     *
+     * The listener suppors getting @notify_discard notifications that span
+     * already discarded parts.
+     */
+    bool double_discard_supported;
+
+    MemoryRegionSection *section;
+    QLIST_ENTRY(RamDiscardListener) next;
 };
 
+static inline void ram_discard_listener_init(RamDiscardListener *rdl,
+                                             NotifyRamPopulate populate_fn,
+                                             NotifyRamDiscard discard_fn,
+                                             bool double_discard_supported)
+{
+    rdl->notify_populate = populate_fn;
+    rdl->notify_discard = discard_fn;
+    rdl->double_discard_supported = double_discard_supported;
+}
+
+typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque);
+typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque);
+
+/*
+ * RamDiscardManagerClass:
+ *
+ * A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion
+ * regions are currently populated to be used/accessed by the VM, notifying
+ * after parts were discarded (freeing up memory) and before parts will be
+ * populated (consuming memory), to be used/accessed by the VM.
+ *
+ * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
+ * #MemoryRegion isn't mapped into an address space yet (either directly
+ * or via an alias); it cannot change while the #MemoryRegion is
+ * mapped into an address space.
+ *
+ * The #RamDiscardManager is intended to be used by technologies that are
+ * incompatible with discarding of RAM (e.g., VFIO, which may pin all
+ * memory inside a #MemoryRegion), and require proper coordination to only
+ * map the currently populated parts, to hinder parts that are expected to
+ * remain discarded from silently getting populated and consuming memory.
+ * Technologies that support discarding of RAM don't have to bother and can
+ * simply map the whole #MemoryRegion.
+ *
+ * An example #RamDiscardManager is virtio-mem, which logically (un)plugs
+ * memory within an assigned RAM #MemoryRegion, coordinated with the VM.
+ * Logically unplugging memory consists of discarding RAM. The VM agreed to not
+ * access unplugged (discarded) memory - especially via DMA. virtio-mem will
+ * properly coordinate with listeners before memory is plugged (populated),
+ * and after memory is unplugged (discarded).
+ *
+ * Listeners are called in multiples of the minimum granularity (unless it
+ * would exceed the registered range) and changes are aligned to the minimum
+ * granularity within the #MemoryRegion. Listeners have to prepare for memory
+ * becoming discarded in a different granularity than it was populated and the
+ * other way around.
+ */
+struct RamDiscardManagerClass {
+    /* private */
+    InterfaceClass parent_class;
+
+    /* public */
+
+    /**
+     * @get_min_granularity:
+     *
+     * Get the minimum granularity in which listeners will get notified
+     * about changes within the #MemoryRegion via the #RamDiscardManager.
+     *
+     * @rdm: the #RamDiscardManager
+     * @mr: the #MemoryRegion
+     *
+     * Returns the minimum granularity.
+     */
+    uint64_t (*get_min_granularity)(const RamDiscardManager *rdm,
+                                    const MemoryRegion *mr);
+
+    /**
+     * @is_populated:
+     *
+     * Check whether the given #MemoryRegionSection is completely populated
+     * (i.e., no parts are currently discarded) via the #RamDiscardManager.
+     * There are no alignment requirements.
+     *
+     * @rdm: the #RamDiscardManager
+     * @section: the #MemoryRegionSection
+     *
+     * Returns whether the given range is completely populated.
+     */
+    bool (*is_populated)(const RamDiscardManager *rdm,
+                         const MemoryRegionSection *section);
+
+    /**
+     * @replay_populated:
+     *
+     * Call the #ReplayRamPopulate callback for all populated parts within the
+     * #MemoryRegionSection via the #RamDiscardManager.
+     *
+     * In case any call fails, no further calls are made.
+     *
+     * @rdm: the #RamDiscardManager
+     * @section: the #MemoryRegionSection
+     * @replay_fn: the #ReplayRamPopulate callback
+     * @opaque: pointer to forward to the callback
+     *
+     * Returns 0 on success, or a negative error if any notification failed.
+     */
+    int (*replay_populated)(const RamDiscardManager *rdm,
+                            MemoryRegionSection *section,
+                            ReplayRamPopulate replay_fn, void *opaque);
+
+    /**
+     * @replay_discarded:
+     *
+     * Call the #ReplayRamDiscard callback for all discarded parts within the
+     * #MemoryRegionSection via the #RamDiscardManager.
+     *
+     * @rdm: the #RamDiscardManager
+     * @section: the #MemoryRegionSection
+     * @replay_fn: the #ReplayRamDiscard callback
+     * @opaque: pointer to forward to the callback
+     */
+    void (*replay_discarded)(const RamDiscardManager *rdm,
+                             MemoryRegionSection *section,
+                             ReplayRamDiscard replay_fn, void *opaque);
+
+    /**
+     * @register_listener:
+     *
+     * Register a #RamDiscardListener for the given #MemoryRegionSection and
+     * immediately notify the #RamDiscardListener about all populated parts
+     * within the #MemoryRegionSection via the #RamDiscardManager.
+     *
+     * In case any notification fails, no further notifications are triggered
+     * and an error is logged.
+     *
+     * @rdm: the #RamDiscardManager
+     * @rdl: the #RamDiscardListener
+     * @section: the #MemoryRegionSection
+     */
+    void (*register_listener)(RamDiscardManager *rdm,
+                              RamDiscardListener *rdl,
+                              MemoryRegionSection *section);
+
+    /**
+     * @unregister_listener:
+     *
+     * Unregister a previously registered #RamDiscardListener via the
+     * #RamDiscardManager after notifying the #RamDiscardListener about all
+     * populated parts becoming unpopulated within the registered
+     * #MemoryRegionSection.
+     *
+     * @rdm: the #RamDiscardManager
+     * @rdl: the #RamDiscardListener
+     */
+    void (*unregister_listener)(RamDiscardManager *rdm,
+                                RamDiscardListener *rdl);
+};
+
+uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
+                                                 const MemoryRegion *mr);
+
+bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
+                                      const MemoryRegionSection *section);
+
+int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
+                                         MemoryRegionSection *section,
+                                         ReplayRamPopulate replay_fn,
+                                         void *opaque);
+
+void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+                                          MemoryRegionSection *section,
+                                          ReplayRamDiscard replay_fn,
+                                          void *opaque);
+
+void ram_discard_manager_register_listener(RamDiscardManager *rdm,
+                                           RamDiscardListener *rdl,
+                                           MemoryRegionSection *section);
+
+void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
+                                             RamDiscardListener *rdl);
+
+bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
+                          ram_addr_t *ram_addr, bool *read_only,
+                          bool *mr_has_discard_manager);
+
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
 typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
 
@@ -445,14 +795,18 @@ struct MemoryRegion {
     bool nonvolatile;
     bool rom_device;
     bool flush_coalesced_mmio;
+    bool unmergeable;
     uint8_t dirty_log_mask;
     bool is_iommu;
     RAMBlock *ram_block;
     Object *owner;
+    /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
+    DeviceState *dev;
 
     const MemoryRegionOps *ops;
     void *opaque;
     MemoryRegion *container;
+    int mapped_via_alias; /* Mapped via an alias, container might be NULL */
     Int128 size;
     hwaddr addr;
     void (*destructor)(MemoryRegion *mr);
@@ -471,6 +825,10 @@ struct MemoryRegion {
     const char *name;
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
+    RamDiscardManager *rdm; /* Only for RAM */
+
+    /* For devices designed to perform re-entrant IO into their own IO MRs */
+    bool disable_reentrancy_guard;
 };
 
 struct IOMMUMemoryRegion {
@@ -483,6 +841,10 @@ struct IOMMUMemoryRegion {
 #define IOMMU_NOTIFIER_FOREACH(n, mr) \
     QLIST_FOREACH((n), &(mr)->iommu_notify, node)
 
+#define MEMORY_LISTENER_PRIORITY_MIN            0
+#define MEMORY_LISTENER_PRIORITY_ACCEL          10
+#define MEMORY_LISTENER_PRIORITY_DEV_BACKEND    10
+
 /**
  * struct MemoryListener: callbacks structure for updates to the physical memory map
  *
@@ -555,7 +917,7 @@ struct MemoryListener {
      * @log_start:
      *
      * Called during an address space update transaction, after
-     * one of #MemoryListener.region_add(),#MemoryListener.region_del() or
+     * one of #MemoryListener.region_add(), #MemoryListener.region_del() or
      * #MemoryListener.region_nop(), if dirty memory logging clients have
      * become active since the last transaction.
      *
@@ -600,6 +962,21 @@ struct MemoryListener {
      */
     void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section);
 
+    /**
+     * @log_sync_global:
+     *
+     * This is the global version of @log_sync when the listener does
+     * not have a way to synchronize the log with finer granularity.
+     * When the listener registers with @log_sync_global defined, then
+     * its @log_sync must be NULL.  Vice versa.
+     *
+     * @listener: The #MemoryListener.
+     * @last_stage: The last stage to synchronize the log during migration.
+     * The caller should guarantee that the synchronization with true for
+     * @last_stage is triggered for once after all VCPUs have been stopped.
+     */
+    void (*log_sync_global)(MemoryListener *listener, bool last_stage);
+
     /**
      * @log_clear:
      *
@@ -715,6 +1092,14 @@ struct MemoryListener {
      */
     unsigned priority;
 
+    /**
+     * @name:
+     *
+     * Name of the listener.  It can be used in contexts where we'd like to
+     * identify one memory listener with the rest.
+     */
+    const char *name;
+
     /* private: */
     AddressSpace *address_space;
     QTAILQ_ENTRY(MemoryListener) link;
@@ -734,6 +1119,7 @@ struct AddressSpace {
     struct FlatView *current_map;
 
     int ioeventfd_nb;
+    int ioeventfd_notifiers;
     struct MemoryRegionIoeventfd *ioeventfds;
     QTAILQ_HEAD(, MemoryListener) listeners;
     QTAILQ_ENTRY(AddressSpace) address_spaces_link;
@@ -760,33 +1146,35 @@ static inline FlatView *address_space_to_flatview(AddressSpace *as)
     return qatomic_rcu_read(&as->current_map);
 }
 
-typedef int (*flatview_cb)(Int128 start,
-                           Int128 len,
-                           const MemoryRegion*, void*);
-
-void flatview_for_each_range(FlatView *fv, flatview_cb cb , void *opaque);
+/**
+ * typedef flatview_cb: callback for flatview_for_each_range()
+ *
+ * @start: start address of the range within the FlatView
+ * @len: length of the range in bytes
+ * @mr: MemoryRegion covering this range
+ * @offset_in_region: offset of the first byte of the range within @mr
+ * @opaque: data pointer passed to flatview_for_each_range()
+ *
+ * Returns: true to stop the iteration, false to keep going.
+ */
+typedef bool (*flatview_cb)(Int128 start,
+                            Int128 len,
+                            const MemoryRegion *mr,
+                            hwaddr offset_in_region,
+                            void *opaque);
 
 /**
- * struct MemoryRegionSection: describes a fragment of a #MemoryRegion
+ * flatview_for_each_range: Iterate through a FlatView
+ * @fv: the FlatView to iterate through
+ * @cb: function to call for each range
+ * @opaque: opaque data pointer to pass to @cb
  *
- * @mr: the region, or %NULL if empty
- * @fv: the flat view of the address space the region is mapped in
- * @offset_within_region: the beginning of the section, relative to @mr's start
- * @size: the size of the section; will not exceed @mr's boundaries
- * @offset_within_address_space: the address of the first byte of the section
- *     relative to the region's address space
- * @readonly: writes to this section are ignored
- * @nonvolatile: this section is non-volatile
+ * A FlatView is made up of a list of non-overlapping ranges, each of
+ * which is a slice of a MemoryRegion. This function iterates through
+ * each range in @fv, calling @cb. The callback function can terminate
+ * iteration early by returning 'true'.
  */
-struct MemoryRegionSection {
-    Int128 size;
-    MemoryRegion *mr;
-    FlatView *fv;
-    hwaddr offset_within_region;
-    hwaddr offset_within_address_space;
-    bool readonly;
-    bool nonvolatile;
-};
+void flatview_for_each_range(FlatView *fv, flatview_cb cb, void *opaque);
 
 static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
                                           MemoryRegionSection *b)
@@ -800,6 +1188,26 @@ static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
            a->nonvolatile == b->nonvolatile;
 }
 
+/**
+ * memory_region_section_new_copy: Copy a memory region section
+ *
+ * Allocate memory for a new copy, copy the memory region section, and
+ * properly take a reference on all relevant members.
+ *
+ * @s: the #MemoryRegionSection to copy
+ */
+MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s);
+
+/**
+ * memory_region_section_new_copy: Free a copied memory region section
+ *
+ * Free a copy of a memory section created via memory_region_section_new_copy().
+ * properly dropping references on all relevant members.
+ *
+ * @s: the #MemoryRegionSection to copy
+ */
+void memory_region_section_free_copy(MemoryRegionSection *s);
+
 /**
  * memory_region_init: Initialize a memory region
  *
@@ -812,7 +1220,7 @@ static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
  * @size: size of the region; any subregions beyond this size will be clipped
  */
 void memory_region_init(MemoryRegion *mr,
-                        struct Object *owner,
+                        Object *owner,
                         const char *name,
                         uint64_t size);
 
@@ -860,7 +1268,7 @@ void memory_region_unref(MemoryRegion *mr);
  * @size: size of the region.
  */
 void memory_region_init_io(MemoryRegion *mr,
-                           struct Object *owner,
+                           Object *owner,
                            const MemoryRegionOps *ops,
                            void *opaque,
                            const char *name,
@@ -882,40 +1290,42 @@ void memory_region_init_io(MemoryRegion *mr,
  * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_ram_nomigrate(MemoryRegion *mr,
-                                      struct Object *owner,
+                                      Object *owner,
                                       const char *name,
                                       uint64_t size,
                                       Error **errp);
 
 /**
- * memory_region_init_ram_shared_nomigrate:  Initialize RAM memory region.
- *                                           Accesses into the region will
- *                                           modify memory directly.
+ * memory_region_init_ram_flags_nomigrate:  Initialize RAM memory region.
+ *                                          Accesses into the region will
+ *                                          modify memory directly.
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
  * @name: Region name, becomes part of RAMBlock name used in migration stream
  *        must be unique within any device
  * @size: size of the region.
- * @share: allow remapping RAM to different addresses
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE.
  * @errp: pointer to Error*, to store an error if it happens.
  *
- * Note that this function is similar to memory_region_init_ram_nomigrate.
- * The only difference is part of the RAM region can be remapped.
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
-void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr,
-                                             struct Object *owner,
-                                             const char *name,
-                                             uint64_t size,
-                                             bool share,
-                                             Error **errp);
+void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr,
+                                            Object *owner,
+                                            const char *name,
+                                            uint64_t size,
+                                            uint32_t ram_flags,
+                                            Error **errp);
 
 /**
- * memory_region_init_resizeable_ram:  Initialize memory region with resizeable
+ * memory_region_init_resizeable_ram:  Initialize memory region with resizable
  *                                     RAM.  Accesses into the region will
  *                                     modify memory directly.  Only an initial
  *                                     portion of this RAM is actually used.
- *                                     The used size can change across reboots.
+ *                                     Changing the size while migrating
+ *                                     can result in the migration being
+ *                                     canceled.
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
@@ -930,7 +1340,7 @@ void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr,
  * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_resizeable_ram(MemoryRegion *mr,
-                                       struct Object *owner,
+                                       Object *owner,
                                        const char *name,
                                        uint64_t size,
                                        uint64_t max_size,
@@ -951,23 +1361,24 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
  * @size: size of the region.
  * @align: alignment of the region base address; if 0, the default alignment
  *         (getpagesize()) will be used.
- * @ram_flags: Memory region features:
- *             - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag
- *             - RAM_PMEM: the memory is persistent memory
- *             Other bits are ignored now.
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ *             RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
+ *             RAM_READONLY_FD
  * @path: the path in which to allocate the RAM.
+ * @offset: offset within the file referenced by path
  * @errp: pointer to Error*, to store an error if it happens.
  *
  * Note that this function does not do anything to cause the data in the
  * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_ram_from_file(MemoryRegion *mr,
-                                      struct Object *owner,
+                                      Object *owner,
                                       const char *name,
                                       uint64_t size,
                                       uint64_t align,
                                       uint32_t ram_flags,
                                       const char *path,
+                                      ram_addr_t offset,
                                       Error **errp);
 
 /**
@@ -978,19 +1389,23 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
  * @owner: the object that tracks the region's reference count
  * @name: the name of the region.
  * @size: size of the region.
- * @share: %true if memory must be mmaped with the MAP_SHARED flag
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ *             RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
+ *             RAM_READONLY_FD
  * @fd: the fd to mmap.
+ * @offset: offset within the file referenced by fd
  * @errp: pointer to Error*, to store an error if it happens.
  *
  * Note that this function does not do anything to cause the data in the
  * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_ram_from_fd(MemoryRegion *mr,
-                                    struct Object *owner,
+                                    Object *owner,
                                     const char *name,
                                     uint64_t size,
-                                    bool share,
+                                    uint32_t ram_flags,
                                     int fd,
+                                    ram_addr_t offset,
                                     Error **errp);
 #endif
 
@@ -1010,7 +1425,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
  * RAM memory region to be migrated; that is the responsibility of the caller.
  */
 void memory_region_init_ram_ptr(MemoryRegion *mr,
-                                struct Object *owner,
+                                Object *owner,
                                 const char *name,
                                 uint64_t size,
                                 void *ptr);
@@ -1038,7 +1453,7 @@ void memory_region_init_ram_ptr(MemoryRegion *mr,
  * (For RAM device memory regions, migrating the contents rarely makes sense.)
  */
 void memory_region_init_ram_device_ptr(MemoryRegion *mr,
-                                       struct Object *owner,
+                                       Object *owner,
                                        const char *name,
                                        uint64_t size,
                                        void *ptr);
@@ -1056,7 +1471,7 @@ void memory_region_init_ram_device_ptr(MemoryRegion *mr,
  * @size: size of the region.
  */
 void memory_region_init_alias(MemoryRegion *mr,
-                              struct Object *owner,
+                              Object *owner,
                               const char *name,
                               MemoryRegion *orig,
                               hwaddr offset,
@@ -1081,7 +1496,7 @@ void memory_region_init_alias(MemoryRegion *mr,
  * @errp: pointer to Error*, to store an error if it happens.
  */
 void memory_region_init_rom_nomigrate(MemoryRegion *mr,
-                                      struct Object *owner,
+                                      Object *owner,
                                       const char *name,
                                       uint64_t size,
                                       Error **errp);
@@ -1104,7 +1519,7 @@ void memory_region_init_rom_nomigrate(MemoryRegion *mr,
  * @errp: pointer to Error*, to store an error if it happens.
  */
 void memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
-                                             struct Object *owner,
+                                             Object *owner,
                                              const MemoryRegionOps *ops,
                                              void *opaque,
                                              const char *name,
@@ -1163,7 +1578,7 @@ void memory_region_init_iommu(void *_iommu_mr,
  * If you pass a non-NULL non-device @owner then we will assert.
  */
 void memory_region_init_ram(MemoryRegion *mr,
-                            struct Object *owner,
+                            Object *owner,
                             const char *name,
                             uint64_t size,
                             Error **errp);
@@ -1190,7 +1605,7 @@ void memory_region_init_ram(MemoryRegion *mr,
  * @errp: pointer to Error*, to store an error if it happens.
  */
 void memory_region_init_rom(MemoryRegion *mr,
-                            struct Object *owner,
+                            Object *owner,
                             const char *name,
                             uint64_t size,
                             Error **errp);
@@ -1221,7 +1636,7 @@ void memory_region_init_rom(MemoryRegion *mr,
  * @errp: pointer to Error*, to store an error if it happens.
  */
 void memory_region_init_rom_device(MemoryRegion *mr,
-                                   struct Object *owner,
+                                   Object *owner,
                                    const MemoryRegionOps *ops,
                                    void *opaque,
                                    const char *name,
@@ -1234,7 +1649,7 @@ void memory_region_init_rom_device(MemoryRegion *mr,
  *
  * @mr: the memory region being queried.
  */
-struct Object *memory_region_owner(MemoryRegion *mr);
+Object *memory_region_owner(MemoryRegion *mr);
 
 /**
  * memory_region_size: get a memory region's size.
@@ -1277,6 +1692,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
     return mr->rom_device && mr->romd_mode;
 }
 
+/**
+ * memory_region_is_protected: check whether a memory region is protected
+ *
+ * Returns %true if a memory region is protected RAM and cannot be accessed
+ * via standard mechanisms, e.g. DMA.
+ *
+ * @mr: the memory region being queried
+ */
+bool memory_region_is_protected(MemoryRegion *mr);
+
 /**
  * memory_region_get_iommu: check whether a memory region is an iommu
  *
@@ -1326,39 +1751,43 @@ uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr);
 /**
  * memory_region_notify_iommu: notify a change in an IOMMU translation entry.
  *
- * The notification type will be decided by entry.perm bits:
- *
- * - For UNMAP (cache invalidation) notifies: set entry.perm to IOMMU_NONE.
- * - For MAP (newly added entry) notifies: set entry.perm to the
- *   permission of the page (which is definitely !IOMMU_NONE).
- *
  * Note: for any IOMMU implementation, an in-place mapping change
  * should be notified with an UNMAP followed by a MAP.
  *
  * @iommu_mr: the memory region that was changed
  * @iommu_idx: the IOMMU index for the translation table which has changed
- * @entry: the new entry in the IOMMU translation table.  The entry
- *         replaces all old entries for the same virtual I/O address range.
- *         Deleted entries have .@perm == 0.
+ * @event: TLB event with the new entry in the IOMMU translation table.
+ *         The entry replaces all old entries for the same virtual I/O address
+ *         range.
  */
 void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr,
                                 int iommu_idx,
-                                IOMMUTLBEntry entry);
+                                IOMMUTLBEvent event);
 
 /**
- * memory_region_notify_one: notify a change in an IOMMU translation
+ * memory_region_notify_iommu_one: notify a change in an IOMMU translation
  *                           entry to a single notifier
  *
  * This works just like memory_region_notify_iommu(), but it only
  * notifies a specific notifier, not all of them.
  *
  * @notifier: the notifier to be notified
- * @entry: the new entry in the IOMMU translation table.  The entry
- *         replaces all old entries for the same virtual I/O address range.
- *         Deleted entries have .@perm == 0.
+ * @event: TLB event with the new entry in the IOMMU translation table.
+ *         The entry replaces all old entries for the same virtual I/O address
+ *         range.
+ */
+void memory_region_notify_iommu_one(IOMMUNotifier *notifier,
+                                    IOMMUTLBEvent *event);
+
+/**
+ * memory_region_unmap_iommu_notifier_range: notify a unmap for an IOMMU
+ *                                           translation that covers the
+ *                                           range of a notifier
+ *
+ * @notifier: the notifier to be notified
  */
-void memory_region_notify_one(IOMMUNotifier *notifier,
-                              IOMMUTLBEntry *entry);
+void memory_region_unmap_iommu_notifier_range(IOMMUNotifier *notifier);
+
 
 /**
  * memory_region_register_iommu_notifier: register a notifier for changes to
@@ -1447,6 +1876,18 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
                                            uint64_t page_size_mask,
                                            Error **errp);
 
+/**
+ * memory_region_iommu_set_iova_ranges - Set the usable IOVA ranges
+ * for a given IOMMU MR region
+ *
+ * @iommu: IOMMU memory region
+ * @iova_ranges: list of ordered IOVA ranges (at least one range)
+ * @errp: pointer to Error*, to store an error if it happens.
+ */
+int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu,
+                                        GList *iova_ranges,
+                                        Error **errp);
+
 /**
  * memory_region_name: get a memory region's name
  *
@@ -1548,8 +1989,8 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr);
 
 /* memory_region_ram_resize: Resize a RAM region.
  *
- * Only legal before guest might have detected the memory size: e.g. on
- * incoming migration, or right after reset.
+ * Resizing RAM while migrating can result in the migration being canceled.
+ * Care has to be taken if the guest might have already detected the memory.
  *
  * @mr: a memory region created with @memory_region_init_resizeable_ram.
  * @newsize: the new size the region
@@ -1629,7 +2070,7 @@ void memory_region_clear_dirty_bitmap(MemoryRegion *mr, hwaddr start,
  * querying the same page multiple times, which is especially useful for
  * display updates where the scanlines often are not page aligned.
  *
- * The dirty bitmap region which gets copyed into the snapshot (and
+ * The dirty bitmap region which gets copied into the snapshot (and
  * cleared afterwards) can be larger than requested.  The boundaries
  * are rounded up/down so complete bitmap longs (covering 64 pages on
  * 64bit hosts) can be copied over into the bitmap snapshot.  Which
@@ -1945,6 +2386,25 @@ void memory_region_set_size(MemoryRegion *mr, uint64_t size);
 void memory_region_set_alias_offset(MemoryRegion *mr,
                                     hwaddr offset);
 
+/*
+ * memory_region_set_unmergeable: Set a memory region unmergeable
+ *
+ * Mark a memory region unmergeable, resulting in the memory region (or
+ * everything contained in a memory region container) not getting merged when
+ * simplifying the address space and notifying memory listeners. Consequently,
+ * memory listeners will never get notified about ranges that are larger than
+ * the original memory regions.
+ *
+ * This is primarily useful when multiple aliases to a RAM memory region are
+ * mapped into a memory region container, and updates (e.g., enable/disable or
+ * map/unmap) of individual memory region aliases are not supposed to affect
+ * other memory regions in the same container.
+ *
+ * @mr: the #MemoryRegion to be updated
+ * @unmergeable: whether to mark the #MemoryRegion unmergeable
+ */
+void memory_region_set_unmergeable(MemoryRegion *mr, bool unmergeable);
+
 /**
  * memory_region_present: checks if an address relative to a @container
  * translates into #MemoryRegion within @container
@@ -1959,12 +2419,48 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr);
 
 /**
  * memory_region_is_mapped: returns true if #MemoryRegion is mapped
- * into any address space.
+ * into another memory region, which does not necessarily imply that it is
+ * mapped into an address space.
  *
  * @mr: a #MemoryRegion which should be checked if it's mapped
  */
 bool memory_region_is_mapped(MemoryRegion *mr);
 
+/**
+ * memory_region_get_ram_discard_manager: get the #RamDiscardManager for a
+ * #MemoryRegion
+ *
+ * The #RamDiscardManager cannot change while a memory region is mapped.
+ *
+ * @mr: the #MemoryRegion
+ */
+RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr);
+
+/**
+ * memory_region_has_ram_discard_manager: check whether a #MemoryRegion has a
+ * #RamDiscardManager assigned
+ *
+ * @mr: the #MemoryRegion
+ */
+static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr)
+{
+    return !!memory_region_get_ram_discard_manager(mr);
+}
+
+/**
+ * memory_region_set_ram_discard_manager: set the #RamDiscardManager for a
+ * #MemoryRegion
+ *
+ * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion
+ * that does not cover RAM, or a #MemoryRegion that already has a
+ * #RamDiscardManager assigned.
+ *
+ * @mr: the #MemoryRegion
+ * @rdm: #RamDiscardManager to set
+ */
+void memory_region_set_ram_discard_manager(MemoryRegion *mr,
+                                           RamDiscardManager *rdm);
+
 /**
  * memory_region_find: translate an address/size relative to a
  * MemoryRegion into a #MemoryRegionSection.
@@ -2000,8 +2496,10 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
  * memory_global_dirty_log_sync: synchronize the dirty log for all memory
  *
  * Synchronizes the dirty page log for all address spaces.
+ *
+ * @last_stage: whether this is the last stage of live migration
  */
-void memory_global_dirty_log_sync(void);
+void memory_global_dirty_log_sync(bool last_stage);
 
 /**
  * memory_global_dirty_log_sync: synchronize the dirty log for all memory
@@ -2047,16 +2545,24 @@ void memory_listener_unregister(MemoryListener *listener);
 
 /**
  * memory_global_dirty_log_start: begin dirty logging for all regions
+ *
+ * @flags: purpose of starting dirty log, migration or dirty rate
  */
-void memory_global_dirty_log_start(void);
+void memory_global_dirty_log_start(unsigned int flags);
 
 /**
  * memory_global_dirty_log_stop: end dirty logging for all regions
+ *
+ * @flags: purpose of stopping dirty log, migration or dirty rate
  */
-void memory_global_dirty_log_stop(void);
+void memory_global_dirty_log_stop(unsigned int flags);
 
 void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled);
 
+bool memory_region_access_valid(MemoryRegion *mr, hwaddr addr,
+                                unsigned size, bool is_write,
+                                MemTxAttrs attrs);
+
 /**
  * memory_region_dispatch_read: perform a read directly to the specified
  * MemoryRegion.
@@ -2220,9 +2726,6 @@ struct MemoryRegionCache {
     bool is_write;
 };
 
-#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mrs.mr = NULL })
-
-
 /* address_space_ld*_cached: load from a cached #MemoryRegion
  * address_space_st*_cached: store into a cached #MemoryRegion
  *
@@ -2265,7 +2768,7 @@ static inline uint8_t address_space_ldub_cached(MemoryRegionCache *cache,
 }
 
 static inline void address_space_stb_cached(MemoryRegionCache *cache,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+    hwaddr addr, uint8_t val, MemTxAttrs attrs, MemTxResult *result)
 {
     assert(addr < cache->len);
     if (likely(cache->ptr)) {
@@ -2311,6 +2814,21 @@ int64_t address_space_cache_init(MemoryRegionCache *cache,
                                  hwaddr len,
                                  bool is_write);
 
+/**
+ * address_space_cache_init_empty: Initialize empty #MemoryRegionCache
+ *
+ * @cache: The #MemoryRegionCache to operate on.
+ *
+ * Initializes #MemoryRegionCache structure without memory region attached.
+ * Cache initialized this way can only be safely destroyed, but not used.
+ */
+static inline void address_space_cache_init_empty(MemoryRegionCache *cache)
+{
+    cache->mrs.mr = NULL;
+    /* There is no real need to initialize fv, but it makes Coverity happy. */
+    cache->fv = NULL;
+}
+
 /**
  * address_space_cache_invalidate: complete a write to a #MemoryRegionCache
  *
@@ -2432,6 +2950,9 @@ MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache,
                                             hwaddr addr, const void *buf,
                                             hwaddr len);
 
+int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr);
+bool prepare_mmio_access(MemoryRegion *mr);
+
 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 {
     if (is_write) {
@@ -2500,7 +3021,7 @@ address_space_read_cached(MemoryRegionCache *cache, hwaddr addr,
                           void *buf, hwaddr len)
 {
     assert(addr < cache->len && len <= cache->len - addr);
-    fuzz_dma_read_cb(cache->xlat + addr, len, cache->mrs.mr, false);
+    fuzz_dma_read_cb(cache->xlat + addr, len, cache->mrs.mr);
     if (likely(cache->ptr)) {
         memcpy(buf, cache->ptr + addr, len);
         return MEMTX_OK;
@@ -2530,6 +3051,22 @@ address_space_write_cached(MemoryRegionCache *cache, hwaddr addr,
     }
 }
 
+/**
+ * address_space_set: Fill address space with a constant byte.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @c: constant byte to fill the memory
+ * @len: the number of bytes to fill with the constant byte
+ * @attrs: memory transaction attributes
+ */
+MemTxResult address_space_set(AddressSpace *as, hwaddr addr,
+                              uint8_t c, hwaddr len, MemTxAttrs attrs);
+
 #ifdef NEED_CPU_H
 /* enum device_endian to MemOp.  */
 static inline MemOp devend_memop(enum device_endian end)
@@ -2537,7 +3074,7 @@ static inline MemOp devend_memop(enum device_endian end)
     QEMU_BUILD_BUG_ON(DEVICE_HOST_ENDIAN != DEVICE_LITTLE_ENDIAN &&
                       DEVICE_HOST_ENDIAN != DEVICE_BIG_ENDIAN);
 
-#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
     /* Swap if non-host endianness or native (target) endianness */
     return (end == DEVICE_HOST_ENDIAN) ? 0 : MO_BSWAP;
 #else
@@ -2573,6 +3110,12 @@ static inline MemOp devend_memop(enum device_endian end)
  */
 int ram_block_discard_disable(bool state);
 
+/*
+ * See ram_block_discard_disable(): only disable uncoordinated discards,
+ * keeping coordinated discards (via the RamDiscardManager) enabled.
+ */
+int ram_block_uncoordinated_discard_disable(bool state);
+
 /*
  * Inhibit technologies that disable discarding of pages in RAM blocks.
  *
@@ -2582,12 +3125,20 @@ int ram_block_discard_disable(bool state);
 int ram_block_discard_require(bool state);
 
 /*
- * Test if discarding of memory in ram blocks is disabled.
+ * See ram_block_discard_require(): only inhibit technologies that disable
+ * uncoordinated discarding of pages in RAM blocks, allowing co-existance with
+ * technologies that only inhibit uncoordinated discards (via the
+ * RamDiscardManager).
+ */
+int ram_block_coordinated_discard_require(bool state);
+
+/*
+ * Test if any discarding of memory in ram blocks is disabled.
  */
 bool ram_block_discard_is_disabled(void);
 
 /*
- * Test if discarding of memory in ram blocks is required to work reliably.
+ * Test if any discarding of memory in ram blocks is required to work reliably.
  */
 bool ram_block_discard_is_required(void);
 
index 46e6c220d3539f66a3b12adf26dd5addaec3e8d6..92ad74e95608b4c853acdbfdb2aacc6d51dd6507 100644 (file)
  */
 
 #ifdef TARGET_ENDIANNESS
-extern uint32_t glue(address_space_lduw, SUFFIX)(ARG1_DECL,
+uint16_t glue(address_space_lduw, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint32_t glue(address_space_ldl, SUFFIX)(ARG1_DECL,
+uint32_t glue(address_space_ldl, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint64_t glue(address_space_ldq, SUFFIX)(ARG1_DECL,
+uint64_t glue(address_space_ldq, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
+void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stw, SUFFIX)(ARG1_DECL,
+void glue(address_space_stw, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result);
+void glue(address_space_stl, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stl, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stq, SUFFIX)(ARG1_DECL,
+void glue(address_space_stq, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result);
 #else
-extern uint32_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
+uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint32_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL,
+uint16_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint32_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL,
+uint16_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint32_t glue(address_space_ldl_le, SUFFIX)(ARG1_DECL,
+uint32_t glue(address_space_ldl_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint32_t glue(address_space_ldl_be, SUFFIX)(ARG1_DECL,
+uint32_t glue(address_space_ldl_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint64_t glue(address_space_ldq_le, SUFFIX)(ARG1_DECL,
+uint64_t glue(address_space_ldq_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint64_t glue(address_space_ldq_be, SUFFIX)(ARG1_DECL,
+uint64_t glue(address_space_ldq_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stb, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stw_le, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stw_be, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stl_le, SUFFIX)(ARG1_DECL,
+void glue(address_space_stb, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint8_t val, MemTxAttrs attrs, MemTxResult *result);
+void glue(address_space_stw_le, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result);
+void glue(address_space_stw_be, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result);
+void glue(address_space_stl_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stl_be, SUFFIX)(ARG1_DECL,
+void glue(address_space_stl_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stq_le, SUFFIX)(ARG1_DECL,
+void glue(address_space_stq_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result);
-extern void glue(address_space_stq_be, SUFFIX)(ARG1_DECL,
+void glue(address_space_stq_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result);
 #endif
 
index 01efad62ded602e9bc744691586beb62821c3145..d7834f852c47c6f4b2d55427ffd10b82aadd69ab 100644 (file)
 #define LD_P(size) \
     glue(glue(ld, size), glue(ENDIANNESS, _p))
 
+static inline uint16_t ADDRESS_SPACE_LD_CACHED(uw)(MemoryRegionCache *cache,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    assert(addr < cache->len && 2 <= cache->len - addr);
+    fuzz_dma_read_cb(cache->xlat + addr, 2, cache->mrs.mr);
+    if (likely(cache->ptr)) {
+        return LD_P(uw)(cache->ptr + addr);
+    } else {
+        return ADDRESS_SPACE_LD_CACHED_SLOW(uw)(cache, addr, attrs, result);
+    }
+}
+
 static inline uint32_t ADDRESS_SPACE_LD_CACHED(l)(MemoryRegionCache *cache,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
 {
     assert(addr < cache->len && 4 <= cache->len - addr);
-    fuzz_dma_read_cb(cache->xlat + addr, 4, cache->mrs.mr, false);
+    fuzz_dma_read_cb(cache->xlat + addr, 4, cache->mrs.mr);
     if (likely(cache->ptr)) {
         return LD_P(l)(cache->ptr + addr);
     } else {
@@ -40,7 +52,7 @@ static inline uint64_t ADDRESS_SPACE_LD_CACHED(q)(MemoryRegionCache *cache,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
 {
     assert(addr < cache->len && 8 <= cache->len - addr);
-    fuzz_dma_read_cb(cache->xlat + addr, 8, cache->mrs.mr, false);
+    fuzz_dma_read_cb(cache->xlat + addr, 8, cache->mrs.mr);
     if (likely(cache->ptr)) {
         return LD_P(q)(cache->ptr + addr);
     } else {
@@ -48,18 +60,6 @@ static inline uint64_t ADDRESS_SPACE_LD_CACHED(q)(MemoryRegionCache *cache,
     }
 }
 
-static inline uint32_t ADDRESS_SPACE_LD_CACHED(uw)(MemoryRegionCache *cache,
-    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
-{
-    assert(addr < cache->len && 2 <= cache->len - addr);
-    fuzz_dma_read_cb(cache->xlat + addr, 2, cache->mrs.mr, false);
-    if (likely(cache->ptr)) {
-        return LD_P(uw)(cache->ptr + addr);
-    } else {
-        return ADDRESS_SPACE_LD_CACHED_SLOW(uw)(cache, addr, attrs, result);
-    }
-}
-
 #undef ADDRESS_SPACE_LD_CACHED
 #undef ADDRESS_SPACE_LD_CACHED_SLOW
 #undef LD_P
@@ -71,25 +71,25 @@ static inline uint32_t ADDRESS_SPACE_LD_CACHED(uw)(MemoryRegionCache *cache,
 #define ST_P(size) \
     glue(glue(st, size), glue(ENDIANNESS, _p))
 
-static inline void ADDRESS_SPACE_ST_CACHED(l)(MemoryRegionCache *cache,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+static inline void ADDRESS_SPACE_ST_CACHED(w)(MemoryRegionCache *cache,
+    hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result)
 {
-    assert(addr < cache->len && 4 <= cache->len - addr);
+    assert(addr < cache->len && 2 <= cache->len - addr);
     if (likely(cache->ptr)) {
-        ST_P(l)(cache->ptr + addr, val);
+        ST_P(w)(cache->ptr + addr, val);
     } else {
-        ADDRESS_SPACE_ST_CACHED_SLOW(l)(cache, addr, val, attrs, result);
+        ADDRESS_SPACE_ST_CACHED_SLOW(w)(cache, addr, val, attrs, result);
     }
 }
 
-static inline void ADDRESS_SPACE_ST_CACHED(w)(MemoryRegionCache *cache,
+static inline void ADDRESS_SPACE_ST_CACHED(l)(MemoryRegionCache *cache,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
 {
-    assert(addr < cache->len && 2 <= cache->len - addr);
+    assert(addr < cache->len && 4 <= cache->len - addr);
     if (likely(cache->ptr)) {
-        ST_P(w)(cache->ptr + addr, val);
+        ST_P(l)(cache->ptr + addr, val);
     } else {
-        ADDRESS_SPACE_ST_CACHED_SLOW(w)(cache, addr, val, attrs, result);
+        ADDRESS_SPACE_ST_CACHED_SLOW(l)(cache, addr, val, attrs, result);
     }
 }
 
index b9dd53c389d7baf7a35e97f05ec071f264af728f..ecd678610d1855eb91dfc060e639b8659f576f45 100644 (file)
  */
 
 #ifdef TARGET_ENDIANNESS
+static inline uint16_t glue(lduw_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_lduw, SUFFIX)(ARG1, addr,
+                                            MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
 static inline uint32_t glue(ldl_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
 {
     return glue(address_space_ldl, SUFFIX)(ARG1, addr,
@@ -32,10 +38,10 @@ static inline uint64_t glue(ldq_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
                                            MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline uint32_t glue(lduw_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+static inline void glue(stw_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint16_t val)
 {
-    return glue(address_space_lduw, SUFFIX)(ARG1, addr,
-                                            MEMTXATTRS_UNSPECIFIED, NULL);
+    glue(address_space_stw, SUFFIX)(ARG1, addr, val,
+                                    MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 static inline void glue(stl_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
@@ -44,18 +50,30 @@ static inline void glue(stl_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
                                     MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline void glue(stw_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
-{
-    glue(address_space_stw, SUFFIX)(ARG1, addr, val,
-                                    MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
 static inline void glue(stq_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val)
 {
     glue(address_space_stq, SUFFIX)(ARG1, addr, val,
                                     MEMTXATTRS_UNSPECIFIED, NULL);
 }
 #else
+static inline uint8_t glue(ldub_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_ldub, SUFFIX)(ARG1, addr,
+                                            MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+static inline uint16_t glue(lduw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_lduw_le, SUFFIX)(ARG1, addr,
+                                               MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+static inline uint16_t glue(lduw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_lduw_be, SUFFIX)(ARG1, addr,
+                                               MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
 static inline uint32_t glue(ldl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
 {
     return glue(address_space_ldl_le, SUFFIX)(ARG1, addr,
@@ -80,22 +98,22 @@ static inline uint64_t glue(ldq_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
                                               MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline uint32_t glue(ldub_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+static inline void glue(stb_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint8_t val)
 {
-    return glue(address_space_ldub, SUFFIX)(ARG1, addr,
-                                            MEMTXATTRS_UNSPECIFIED, NULL);
+    glue(address_space_stb, SUFFIX)(ARG1, addr, val,
+                                    MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline uint32_t glue(lduw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+static inline void glue(stw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint16_t val)
 {
-    return glue(address_space_lduw_le, SUFFIX)(ARG1, addr,
-                                               MEMTXATTRS_UNSPECIFIED, NULL);
+    glue(address_space_stw_le, SUFFIX)(ARG1, addr, val,
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline uint32_t glue(lduw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+static inline void glue(stw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint16_t val)
 {
-    return glue(address_space_lduw_be, SUFFIX)(ARG1, addr,
-                                               MEMTXATTRS_UNSPECIFIED, NULL);
+    glue(address_space_stw_be, SUFFIX)(ARG1, addr, val,
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 static inline void glue(stl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
@@ -110,24 +128,6 @@ static inline void glue(stl_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t va
                                        MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline void glue(stb_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
-{
-    glue(address_space_stb, SUFFIX)(ARG1, addr, val,
-                                    MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-static inline void glue(stw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
-{
-    glue(address_space_stw_le, SUFFIX)(ARG1, addr, val,
-                                       MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-static inline void glue(stw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
-{
-    glue(address_space_stw_be, SUFFIX)(ARG1, addr, val,
-                                       MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
 static inline void glue(stq_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val)
 {
     glue(address_space_stq_le, SUFFIX)(ARG1, addr, val,
diff --git a/include/exec/page-vary.h b/include/exec/page-vary.h
new file mode 100644 (file)
index 0000000..54ddde3
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Definitions for cpus with variable page sizes.
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef EXEC_PAGE_VARY_H
+#define EXEC_PAGE_VARY_H
+
+typedef struct {
+    bool decided;
+    int bits;
+    uint64_t mask;
+} TargetPageBits;
+
+#ifdef IN_PAGE_VARY
+bool set_preferred_target_page_bits_common(int bits);
+void finalize_target_page_bits_common(int min);
+#endif
+
+/**
+ * set_preferred_target_page_bits:
+ * @bits: number of bits needed to represent an address within the page
+ *
+ * Set the preferred target page size (the actual target page
+ * size may be smaller than any given CPU's preference).
+ * Returns true on success, false on failure (which can only happen
+ * if this is called after the system has already finalized its
+ * choice of page size and the requested page size is smaller than that).
+ */
+bool set_preferred_target_page_bits(int bits);
+
+/**
+ * finalize_target_page_bits:
+ * Commit the final value set by set_preferred_target_page_bits.
+ */
+void finalize_target_page_bits(void);
+
+#endif /* EXEC_PAGE_VARY_H */
index 4834a9e2f40a9fbf9f3ea0d7d3786063c0af5ed0..c4552b5061646bf550f1c1c58857a544fc027e38 100644 (file)
 #ifndef QEMU_PLUGIN_GEN_H
 #define QEMU_PLUGIN_GEN_H
 
-#include "qemu/plugin.h"
 #include "tcg/tcg.h"
 
 struct DisasContextBase;
 
 #ifdef CONFIG_PLUGIN
 
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb);
-void plugin_gen_tb_end(CPUState *cpu);
+bool plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db,
+                         bool supress);
+void plugin_gen_tb_end(CPUState *cpu, size_t num_insns);
 void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
 void plugin_gen_insn_end(void);
 
 void plugin_gen_disable_mem_helpers(void);
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info);
-
-static inline void plugin_insn_append(const void *from, size_t size)
-{
-    struct qemu_plugin_insn *insn = tcg_ctx->plugin_insn;
-
-    if (insn == NULL) {
-        return;
-    }
-
-    insn->data = g_byte_array_append(insn->data, from, size);
-}
+void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info);
 
 #else /* !CONFIG_PLUGIN */
 
-static inline
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb)
+static inline bool
+plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db, bool sup)
 {
     return false;
 }
@@ -53,16 +42,13 @@ void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db)
 static inline void plugin_gen_insn_end(void)
 { }
 
-static inline void plugin_gen_tb_end(CPUState *cpu)
+static inline void plugin_gen_tb_end(CPUState *cpu, size_t num_insns)
 { }
 
 static inline void plugin_gen_disable_mem_helpers(void)
 { }
 
-static inline void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
-{ }
-
-static inline void plugin_insn_append(const void *from, size_t size)
+static inline void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
 { }
 
 #endif /* CONFIG_PLUGIN */
index 7b9ac361dc9ac3e9672042f2081d15946857727b..1ea5633eb33f83f582b25c516652582341971fc4 100644 (file)
@@ -3,7 +3,8 @@
 
 #ifndef HW_POISON_H
 #define HW_POISON_H
-#ifdef __GNUC__
+
+#include "config-poison.h"
 
 #pragma GCC poison TARGET_I386
 #pragma GCC poison TARGET_X86_64
@@ -11,8 +12,9 @@
 #pragma GCC poison TARGET_ALPHA
 #pragma GCC poison TARGET_ARM
 #pragma GCC poison TARGET_CRIS
+#pragma GCC poison TARGET_HEXAGON
 #pragma GCC poison TARGET_HPPA
-#pragma GCC poison TARGET_LM32
+#pragma GCC poison TARGET_LOONGARCH64
 #pragma GCC poison TARGET_M68K
 #pragma GCC poison TARGET_MICROBLAZE
 #pragma GCC poison TARGET_MIPS
@@ -20,7 +22,6 @@
 #pragma GCC poison TARGET_ABI_MIPSO32
 #pragma GCC poison TARGET_MIPS64
 #pragma GCC poison TARGET_ABI_MIPSN64
-#pragma GCC poison TARGET_MOXIE
 #pragma GCC poison TARGET_NIOS2
 #pragma GCC poison TARGET_OPENRISC
 #pragma GCC poison TARGET_PPC
 #pragma GCC poison TARGET_SH4
 #pragma GCC poison TARGET_SPARC
 #pragma GCC poison TARGET_SPARC64
-#pragma GCC poison TARGET_TILEGX
 #pragma GCC poison TARGET_TRICORE
-#pragma GCC poison TARGET_UNICORE32
 #pragma GCC poison TARGET_XTENSA
 
-#pragma GCC poison TARGET_ALIGNED_ONLY
 #pragma GCC poison TARGET_HAS_BFLT
 #pragma GCC poison TARGET_NAME
 #pragma GCC poison TARGET_SUPPORTS_MTTCG
-#pragma GCC poison TARGET_WORDS_BIGENDIAN
+#pragma GCC poison TARGET_BIG_ENDIAN
 #pragma GCC poison BSWAP_NEEDED
 
 #pragma GCC poison TARGET_LONG_BITS
@@ -53,8 +51,6 @@
 #pragma GCC poison TARGET_PAGE_BITS
 #pragma GCC poison TARGET_PAGE_ALIGN
 
-#pragma GCC poison CPUArchState
-
 #pragma GCC poison CPU_INTERRUPT_HARD
 #pragma GCC poison CPU_INTERRUPT_EXITTB
 #pragma GCC poison CPU_INTERRUPT_HALT
 #pragma GCC poison CPU_INTERRUPT_TGT_INT_2
 
 #pragma GCC poison CONFIG_ALPHA_DIS
-#pragma GCC poison CONFIG_ARM_A64_DIS
-#pragma GCC poison CONFIG_ARM_DIS
 #pragma GCC poison CONFIG_CRIS_DIS
 #pragma GCC poison CONFIG_HPPA_DIS
 #pragma GCC poison CONFIG_I386_DIS
-#pragma GCC poison CONFIG_LM32_DIS
+#pragma GCC poison CONFIG_HEXAGON_DIS
+#pragma GCC poison CONFIG_LOONGARCH_DIS
 #pragma GCC poison CONFIG_M68K_DIS
 #pragma GCC poison CONFIG_MICROBLAZE_DIS
 #pragma GCC poison CONFIG_MIPS_DIS
-#pragma GCC poison CONFIG_NANOMIPS_DIS
-#pragma GCC poison CONFIG_MOXIE_DIS
 #pragma GCC poison CONFIG_NIOS2_DIS
 #pragma GCC poison CONFIG_PPC_DIS
 #pragma GCC poison CONFIG_RISCV_DIS
 #pragma GCC poison CONFIG_SPARC_DIS
 #pragma GCC poison CONFIG_XTENSA_DIS
 
+#pragma GCC poison CONFIG_HVF
 #pragma GCC poison CONFIG_LINUX_USER
 #pragma GCC poison CONFIG_KVM
-#pragma GCC poison CONFIG_SOFTMMU
+#pragma GCC poison CONFIG_WHPX
+#pragma GCC poison CONFIG_XEN
 
 #endif
-#endif
index c6d2ef1d07e67b16995e0d61982f1a4e585627ce..90676093f5d50c3e4fe6afc55e3ed26587eaf47e 100644 (file)
@@ -26,6 +26,8 @@
 #include "exec/ramlist.h"
 #include "exec/ramblock.h"
 
+extern uint64_t total_dirty_pages;
+
 /**
  * clear_bmap_size: calculate clear bitmap size
  *
@@ -40,7 +42,8 @@ static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
 }
 
 /**
- * clear_bmap_set: set clear bitmap for the page range
+ * clear_bmap_set: set clear bitmap for the page range.  Must be with
+ * bitmap_mutex held.
  *
  * @rb: the ramblock to operate on
  * @start: the start page number
@@ -53,12 +56,12 @@ static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
 {
     uint8_t shift = rb->clear_bmap_shift;
 
-    bitmap_set_atomic(rb->clear_bmap, start >> shift,
-                      clear_bmap_size(npages, shift));
+    bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift));
 }
 
 /**
- * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set
+ * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set.
+ * Must be with bitmap_mutex held.
  *
  * @rb: the ramblock to operate on
  * @page: the page number to check
@@ -69,7 +72,7 @@ static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
 {
     uint8_t shift = rb->clear_bmap_shift;
 
-    return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1);
+    return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1);
 }
 
 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
@@ -104,12 +107,11 @@ long qemu_maxrampagesize(void);
  * Parameters:
  *  @size: the size in bytes of the ram block
  *  @mr: the memory region where the ram block is
- *  @ram_flags: specify the properties of the ram block, which can be one
- *              or bit-or of following values
- *              - RAM_SHARED: mmap the backing file or device with MAP_SHARED
- *              - RAM_PMEM: the backend @mem_path or @fd is persistent memory
- *              Other bits are ignored.
+ *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ *              RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
+ *              RAM_READONLY_FD
  *  @mem_path or @fd: specify the backing file or device
+ *  @offset: Offset into target file
  *  @errp: pointer to Error*, to store an error if it happens
  *
  * Return:
@@ -118,14 +120,14 @@ long qemu_maxrampagesize(void);
  */
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                    uint32_t ram_flags, const char *mem_path,
-                                   Error **errp);
+                                   off_t offset, Error **errp);
 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
-                                 uint32_t ram_flags, int fd,
+                                 uint32_t ram_flags, int fd, off_t offset,
                                  Error **errp);
 
 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr, Error **errp);
-RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr,
+RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr,
                          Error **errp);
 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
                                     void (*resized)(const char*,
@@ -147,8 +149,6 @@ static inline void qemu_ram_block_writeback(RAMBlock *block)
 #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
 #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
 
-void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end);
-
 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
                                                  ram_addr_t length,
                                                  unsigned client)
@@ -334,16 +334,25 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
 }
 
 #if !defined(_WIN32)
-static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
-                                                          ram_addr_t start,
-                                                          ram_addr_t pages)
+
+/*
+ * Contrary to cpu_physical_memory_sync_dirty_bitmap() this function returns
+ * the number of dirty pages in @bitmap passed as argument. On the other hand,
+ * cpu_physical_memory_sync_dirty_bitmap() returns newly dirtied pages that
+ * weren't set in the global migration bitmap.
+ */
+static inline
+uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
+                                                ram_addr_t start,
+                                                ram_addr_t pages)
 {
     unsigned long i, j;
-    unsigned long page_number, c;
+    unsigned long page_number, c, nbits;
     hwaddr addr;
     ram_addr_t ram_addr;
+    uint64_t num_dirty = 0;
     unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
-    unsigned long hpratio = qemu_real_host_page_size / TARGET_PAGE_SIZE;
+    unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE;
     unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
 
     /* start address is aligned at the start of a word? */
@@ -369,14 +378,21 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
                 if (bitmap[k]) {
                     unsigned long temp = leul_to_cpu(bitmap[k]);
 
+                    nbits = ctpopl(temp);
                     qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
 
-                    if (global_dirty_log) {
+                    if (global_dirty_tracking) {
                         qatomic_or(
                                 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
                                 temp);
+                        if (unlikely(
+                            global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
+                            total_dirty_pages += nbits;
+                        }
                     }
 
+                    num_dirty += nbits;
+
                     if (tcg_enabled()) {
                         qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset],
                                    temp);
@@ -394,7 +410,7 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
     } else {
         uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
 
-        if (!global_dirty_log) {
+        if (!global_dirty_tracking) {
             clients &= ~(1 << DIRTY_MEMORY_MIGRATION);
         }
 
@@ -405,6 +421,11 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
         for (i = 0; i < len; i++) {
             if (bitmap[i] != 0) {
                 c = leul_to_cpu(bitmap[i]);
+                nbits = ctpopl(c);
+                if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
+                    total_dirty_pages += nbits;
+                }
+                num_dirty += nbits;
                 do {
                     j = ctzl(c);
                     c &= ~(1ul << j);
@@ -417,6 +438,8 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
             }
         }
     }
+
+    return num_dirty;
 }
 #endif /* not _WIN32 */
 
index 07d50864d862686298efa4b11066676895d341ee..69c6a53902939a2de7b38284baff5ed4a86daa63 100644 (file)
@@ -21,6 +21,8 @@
 
 #ifndef CONFIG_USER_ONLY
 #include "cpu-common.h"
+#include "qemu/rcu.h"
+#include "exec/ramlist.h"
 
 struct RAMBlock {
     struct rcu_head rcu;
@@ -38,6 +40,7 @@ struct RAMBlock {
     QLIST_ENTRY(RAMBlock) next;
     QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
     int fd;
+    uint64_t fd_offset;
     size_t page_size;
     /* dirty bitmap used during migration */
     unsigned long *bmap;
@@ -51,6 +54,9 @@ struct RAMBlock {
      * and split clearing of dirty bitmap on the remote node (e.g.,
      * KVM).  The bitmap will be set only when doing global sync.
      *
+     * It is only used during src side of ram migration, and it is
+     * protected by the global ram_state.bitmap_mutex.
+     *
      * NOTE: this bitmap is different comparing to the other bitmaps
      * in that one bit can represent multiple guest pages (which is
      * decided by the `clear_bmap_shift' variable below).  On
@@ -59,6 +65,16 @@ struct RAMBlock {
      */
     unsigned long *clear_bmap;
     uint8_t clear_bmap_shift;
+
+    /*
+     * RAM block length that corresponds to the used_length on the migration
+     * source (after RAM block sizes were synchronized). Especially, after
+     * starting to run the guest, used_length and postcopy_length can differ.
+     * Used to register/unregister uffd handlers and as the size of the received
+     * bitmap. Receiving any page beyond this length will bail out, as it
+     * could not have been valid on the source.
+     */
+    ram_addr_t postcopy_length;
 };
 #endif
 #endif
index 26704aa3b0d75f6c823037fbe2c5b7fedfdd9cbb..2ad2a81accfb6fc4ae4534e009d72be9cc558909 100644 (file)
@@ -65,16 +65,21 @@ void qemu_mutex_lock_ramlist(void);
 void qemu_mutex_unlock_ramlist(void);
 
 struct RAMBlockNotifier {
-    void (*ram_block_added)(RAMBlockNotifier *n, void *host, size_t size);
-    void (*ram_block_removed)(RAMBlockNotifier *n, void *host, size_t size);
+    void (*ram_block_added)(RAMBlockNotifier *n, void *host, size_t size,
+                            size_t max_size);
+    void (*ram_block_removed)(RAMBlockNotifier *n, void *host, size_t size,
+                              size_t max_size);
+    void (*ram_block_resized)(RAMBlockNotifier *n, void *host, size_t old_size,
+                              size_t new_size);
     QLIST_ENTRY(RAMBlockNotifier) next;
 };
 
 void ram_block_notifier_add(RAMBlockNotifier *n);
 void ram_block_notifier_remove(RAMBlockNotifier *n);
-void ram_block_notify_add(void *host, size_t size);
-void ram_block_notify_remove(void *host, size_t size);
+void ram_block_notify_add(void *host, size_t size, size_t max_size);
+void ram_block_notify_remove(void *host, size_t size, size_t max_size);
+void ram_block_notify_resize(void *host, size_t old_size, size_t new_size);
 
-void ram_block_dump(Monitor *mon);
+GString *ram_block_format(void);
 
 #endif /* RAMLIST_H */
diff --git a/include/exec/replay-core.h b/include/exec/replay-core.h
new file mode 100644 (file)
index 0000000..244c77a
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * QEMU replay core API
+ *
+ * Copyright (c) 2010-2015 Institute for System Programming
+ *                         of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef EXEC_REPLAY_H
+#define EXEC_REPLAY_H
+
+#include "qapi/qapi-types-replay.h"
+
+extern ReplayMode replay_mode;
+
+/* Replay process control functions */
+
+/* Enables recording or saving event log with specified parameters */
+void replay_configure(struct QemuOpts *opts);
+/* Initializes timers used for snapshotting and enables events recording */
+void replay_start(void);
+/* Closes replay log file and frees other resources. */
+void replay_finish(void);
+/* Adds replay blocker with the specified error description */
+void replay_add_blocker(const char *feature);
+/* Returns name of the replay log file */
+const char *replay_get_filename(void);
+
+/*
+ * Start making one step in backward direction.
+ * Used by gdbstub for backwards debugging.
+ * Returns true on success.
+ */
+bool replay_reverse_step(void);
+/*
+ * Start searching the last breakpoint/watchpoint.
+ * Used by gdbstub for backwards debugging.
+ * Returns true if the process successfully started.
+ */
+bool replay_reverse_continue(void);
+/*
+ * Returns true if replay module is processing
+ * reverse_continue or reverse_step request
+ */
+bool replay_running_debug(void);
+/* Called in reverse debugging mode to collect breakpoint information */
+void replay_breakpoint(void);
+/* Called when gdb is attached to gdbstub */
+void replay_gdb_attached(void);
+
+/* Interrupts and exceptions */
+
+/* Called by exception handler to write or read exception processing events */
+bool replay_exception(void);
+/*
+ * Used to determine that exception is pending.
+ * Does not proceed to the next event in the log.
+ */
+bool replay_has_exception(void);
+/*
+ * Called by interrupt handlers to write or read interrupt processing events.
+ * Returns true if interrupt should be processed.
+ */
+bool replay_interrupt(void);
+/*
+ * Tries to read interrupt event from the file.
+ * Returns true, when interrupt request is pending.
+ */
+bool replay_has_interrupt(void);
+
+/* Processing data from random generators */
+
+/* Saves the values from the random number generator */
+void replay_save_random(int ret, void *buf, size_t len);
+/* Loads the saved values for the random number generator */
+int replay_read_random(void *buf, size_t len);
+
+#endif
diff --git a/include/exec/softmmu-semi.h b/include/exec/softmmu-semi.h
deleted file mode 100644 (file)
index fbcae88..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Helper routines to provide target memory access for semihosting
- * syscalls in system emulation mode.
- *
- * Copyright (c) 2007 CodeSourcery.
- *
- * This code is licensed under the GPL
- */
-
-#ifndef SOFTMMU_SEMI_H
-#define SOFTMMU_SEMI_H
-
-#include "cpu.h"
-
-static inline uint64_t softmmu_tget64(CPUArchState *env, target_ulong addr)
-{
-    uint64_t val;
-
-    cpu_memory_rw_debug(env_cpu(env), addr, (uint8_t *)&val, 8, 0);
-    return tswap64(val);
-}
-
-static inline uint32_t softmmu_tget32(CPUArchState *env, target_ulong addr)
-{
-    uint32_t val;
-
-    cpu_memory_rw_debug(env_cpu(env), addr, (uint8_t *)&val, 4, 0);
-    return tswap32(val);
-}
-
-static inline uint32_t softmmu_tget8(CPUArchState *env, target_ulong addr)
-{
-    uint8_t val;
-
-    cpu_memory_rw_debug(env_cpu(env), addr, &val, 1, 0);
-    return val;
-}
-
-#define get_user_u64(arg, p) ({ arg = softmmu_tget64(env, p); 0; })
-#define get_user_u32(arg, p) ({ arg = softmmu_tget32(env, p) ; 0; })
-#define get_user_u8(arg, p) ({ arg = softmmu_tget8(env, p) ; 0; })
-#define get_user_ual(arg, p) get_user_u32(arg, p)
-
-static inline void softmmu_tput64(CPUArchState *env,
-                                  target_ulong addr, uint64_t val)
-{
-    val = tswap64(val);
-    cpu_memory_rw_debug(env_cpu(env), addr, (uint8_t *)&val, 8, 1);
-}
-
-static inline void softmmu_tput32(CPUArchState *env,
-                                  target_ulong addr, uint32_t val)
-{
-    val = tswap32(val);
-    cpu_memory_rw_debug(env_cpu(env), addr, (uint8_t *)&val, 4, 1);
-}
-#define put_user_u64(arg, p) ({ softmmu_tput64(env, p, arg) ; 0; })
-#define put_user_u32(arg, p) ({ softmmu_tput32(env, p, arg) ; 0; })
-#define put_user_ual(arg, p) put_user_u32(arg, p)
-
-static void *softmmu_lock_user(CPUArchState *env,
-                               target_ulong addr, target_ulong len, int copy)
-{
-    uint8_t *p;
-    /* TODO: Make this something that isn't fixed size.  */
-    p = malloc(len);
-    if (p && copy) {
-        cpu_memory_rw_debug(env_cpu(env), addr, p, len, 0);
-    }
-    return p;
-}
-#define lock_user(type, p, len, copy) softmmu_lock_user(env, p, len, copy)
-static char *softmmu_lock_user_string(CPUArchState *env, target_ulong addr)
-{
-    char *p;
-    char *s;
-    uint8_t c;
-    /* TODO: Make this something that isn't fixed size.  */
-    s = p = malloc(1024);
-    if (!s) {
-        return NULL;
-    }
-    do {
-        cpu_memory_rw_debug(env_cpu(env), addr, &c, 1, 0);
-        addr++;
-        *(p++) = c;
-    } while (c);
-    return s;
-}
-#define lock_user_string(p) softmmu_lock_user_string(env, p)
-static void softmmu_unlock_user(CPUArchState *env, void *p, target_ulong addr,
-                                target_ulong len)
-{
-    if (len) {
-        cpu_memory_rw_debug(env_cpu(env), addr, p, len, 1);
-    }
-    free(p);
-}
-#define unlock_user(s, args, len) softmmu_unlock_user(env, s, args, len)
-
-#endif
diff --git a/include/exec/target_long.h b/include/exec/target_long.h
new file mode 100644 (file)
index 0000000..3cd8e26
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Target Long Definitions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2023 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _TARGET_LONG_H_
+#define _TARGET_LONG_H_
+
+/*
+ * Usually this should only be included via cpu-defs.h however for
+ * certain cases where we want to build only two versions of a binary
+ * object we can include directly. However the build-system must
+ * ensure TARGET_LONG_BITS is defined directly.
+ */
+#ifndef TARGET_LONG_BITS
+#error TARGET_LONG_BITS not defined
+#endif
+
+#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
+
+/* target_ulong is the type of a virtual address */
+#if TARGET_LONG_SIZE == 4
+typedef int32_t target_long;
+typedef uint32_t target_ulong;
+#define TARGET_FMT_lx "%08x"
+#define TARGET_FMT_ld "%d"
+#define TARGET_FMT_lu "%u"
+#define MO_TL MO_32
+#elif TARGET_LONG_SIZE == 8
+typedef int64_t target_long;
+typedef uint64_t target_ulong;
+#define TARGET_FMT_lx "%016" PRIx64
+#define TARGET_FMT_ld "%" PRId64
+#define TARGET_FMT_lu "%" PRIu64
+#define MO_TL MO_64
+#else
+#error TARGET_LONG_SIZE undefined
+#endif
+
+#endif /* _TARGET_LONG_H_ */
index 96726c36a416379ecbaf7855479f1a7393063cb2..98ffbb5c2396f97d2d3cc06c915f6daa170a75ed 100644 (file)
@@ -15,7 +15,9 @@
 #define EXEC_TARGET_PAGE_H
 
 size_t qemu_target_page_size(void);
+int qemu_target_page_mask(void);
 int qemu_target_page_bits(void);
 int qemu_target_page_bits_min(void);
 
+size_t qemu_target_pages_to_MiB(size_t pages);
 #endif
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
deleted file mode 100644 (file)
index ec4c13b..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Internal structs that QEMU exports to TCG
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef QEMU_TB_CONTEXT_H
-#define QEMU_TB_CONTEXT_H
-
-#include "qemu/thread.h"
-#include "qemu/qht.h"
-
-#define CODE_GEN_HTABLE_BITS     15
-#define CODE_GEN_HTABLE_SIZE     (1 << CODE_GEN_HTABLE_BITS)
-
-typedef struct TranslationBlock TranslationBlock;
-typedef struct TBContext TBContext;
-
-struct TBContext {
-
-    struct qht htable;
-
-    /* statistics */
-    unsigned tb_flush_count;
-};
-
-extern TBContext tb_ctx;
-
-#endif
diff --git a/include/exec/tb-flush.h b/include/exec/tb-flush.h
new file mode 100644 (file)
index 0000000..142c240
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * tb-flush prototype for use by the rest of the system.
+ *
+ * Copyright (c) 2022 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef _TB_FLUSH_H_
+#define _TB_FLUSH_H_
+
+/**
+ * tb_flush() - flush all translation blocks
+ * @cs: CPUState (must be valid, but treated as anonymous pointer)
+ *
+ * Used to flush all the translation blocks in the system. Sometimes
+ * it is simpler to flush everything than work out which individual
+ * translations are now invalid and ensure they are not called
+ * anymore.
+ *
+ * tb_flush() takes care of running the flush in an exclusive context
+ * if it is not already running in one. This means no guest code will
+ * run until this complete.
+ */
+void tb_flush(CPUState *cs);
+
+void tcg_flush_jmp_cache(CPUState *cs);
+
+#endif /* _TB_FLUSH_H_ */
diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
deleted file mode 100644 (file)
index 0a273d9..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * internal execution defines for qemu
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef EXEC_TB_HASH_H
-#define EXEC_TB_HASH_H
-
-#include "exec/cpu-defs.h"
-#include "exec/exec-all.h"
-#include "qemu/xxhash.h"
-
-#ifdef CONFIG_SOFTMMU
-
-/* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for
-   addresses on the same page.  The top bits are the same.  This allows
-   TLB invalidation to quickly clear a subset of the hash table.  */
-#define TB_JMP_PAGE_BITS (TB_JMP_CACHE_BITS / 2)
-#define TB_JMP_PAGE_SIZE (1 << TB_JMP_PAGE_BITS)
-#define TB_JMP_ADDR_MASK (TB_JMP_PAGE_SIZE - 1)
-#define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE)
-
-static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
-{
-    target_ulong tmp;
-    tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
-    return (tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK;
-}
-
-static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
-{
-    target_ulong tmp;
-    tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
-    return (((tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK)
-           | (tmp & TB_JMP_ADDR_MASK));
-}
-
-#else
-
-/* In user-mode we can get better hashing because we do not have a TLB */
-static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
-{
-    return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1);
-}
-
-#endif /* CONFIG_SOFTMMU */
-
-static inline
-uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
-                      uint32_t cf_mask, uint32_t trace_vcpu_dstate)
-{
-    return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
-}
-
-#endif
diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h
deleted file mode 100644 (file)
index 9cf475b..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
- *
- * License: GNU GPL, version 2 or later.
- *   See the COPYING file in the top-level directory.
- */
-#ifndef EXEC_TB_LOOKUP_H
-#define EXEC_TB_LOOKUP_H
-
-#ifdef NEED_CPU_H
-#include "cpu.h"
-#else
-#include "exec/poison.h"
-#endif
-
-#include "exec/exec-all.h"
-#include "exec/tb-hash.h"
-
-/* Might cause an exception, so have a longjmp destination ready */
-static inline TranslationBlock *
-tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
-                     uint32_t *flags, uint32_t cf_mask)
-{
-    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
-    TranslationBlock *tb;
-    uint32_t hash;
-
-    cpu_get_tb_cpu_state(env, pc, cs_base, flags);
-    hash = tb_jmp_cache_hash_func(*pc);
-    tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
-
-    cf_mask &= ~CF_CLUSTER_MASK;
-    cf_mask |= cpu->cluster_index << CF_CLUSTER_SHIFT;
-
-    if (likely(tb &&
-               tb->pc == *pc &&
-               tb->cs_base == *cs_base &&
-               tb->flags == *flags &&
-               tb->trace_vcpu_dstate == *cpu->trace_dstate &&
-               (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) {
-        return tb;
-    }
-    tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask);
-    if (tb == NULL) {
-        return NULL;
-    }
-    qatomic_set(&cpu->tb_jmp_cache[hash], tb);
-    return tb;
-}
-
-#endif /* EXEC_TB_LOOKUP_H */
diff --git a/include/exec/tlb-common.h b/include/exec/tlb-common.h
new file mode 100644 (file)
index 0000000..dc5a5fa
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Common definitions for the softmmu tlb
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef EXEC_TLB_COMMON_H
+#define EXEC_TLB_COMMON_H 1
+
+#define CPU_TLB_ENTRY_BITS 5
+
+/* Minimalized TLB entry for use by TCG fast path. */
+typedef union CPUTLBEntry {
+    struct {
+        uint64_t addr_read;
+        uint64_t addr_write;
+        uint64_t addr_code;
+        /*
+         * Addend to virtual address to get host address.  IO accesses
+         * use the corresponding iotlb value.
+         */
+        uintptr_t addend;
+    };
+    /*
+     * Padding to get a power of two size, as well as index
+     * access to addr_{read,write,code}.
+     */
+    uint64_t addr_idx[(1 << CPU_TLB_ENTRY_BITS) / sizeof(uint64_t)];
+} CPUTLBEntry;
+
+QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
+
+/*
+ * Data elements that are per MMU mode, accessed by the fast path.
+ * The structure is aligned to aid loading the pair with one insn.
+ */
+typedef struct CPUTLBDescFast {
+    /* Contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */
+    uintptr_t mask;
+    /* The array of tlb entries itself. */
+    CPUTLBEntry *table;
+} CPUTLBDescFast QEMU_ALIGNED(2 * sizeof(void *));
+
+#endif /* EXEC_TLB_COMMON_H */
diff --git a/include/exec/translate-all.h b/include/exec/translate-all.h
new file mode 100644 (file)
index 0000000..88602ae
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ *  Translated block handling
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef TRANSLATE_ALL_H
+#define TRANSLATE_ALL_H
+
+#include "exec/exec-all.h"
+
+
+/* translate-all.c */
+void tb_invalidate_phys_page(tb_page_addr_t addr);
+void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr);
+
+#ifdef CONFIG_USER_ONLY
+void page_protect(tb_page_addr_t page_addr);
+int page_unprotect(target_ulong address, uintptr_t pc);
+#endif
+
+#endif /* TRANSLATE_ALL_H */
diff --git a/include/exec/translation-block.h b/include/exec/translation-block.h
new file mode 100644 (file)
index 0000000..e2b26e1
--- /dev/null
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Definition of TranslationBlock.
+ *  Copyright (c) 2003 Fabrice Bellard
+ */
+
+#ifndef EXEC_TRANSLATION_BLOCK_H
+#define EXEC_TRANSLATION_BLOCK_H
+
+#include "qemu/thread.h"
+#include "exec/cpu-common.h"
+#ifdef CONFIG_USER_ONLY
+#include "qemu/interval-tree.h"
+#endif
+
+/*
+ * Page tracking code uses ram addresses in system mode, and virtual
+ * addresses in userspace mode.  Define tb_page_addr_t to be an
+ * appropriate type.
+ */
+#if defined(CONFIG_USER_ONLY)
+typedef vaddr tb_page_addr_t;
+#define TB_PAGE_ADDR_FMT "%" VADDR_PRIx
+#else
+typedef ram_addr_t tb_page_addr_t;
+#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
+#endif
+
+/*
+ * Translation Cache-related fields of a TB.
+ * This struct exists just for convenience; we keep track of TB's in a binary
+ * search tree, and the only fields needed to compare TB's in the tree are
+ * @ptr and @size.
+ * Note: the address of search data can be obtained by adding @size to @ptr.
+ */
+struct tb_tc {
+    const void *ptr;    /* pointer to the translated code */
+    size_t size;
+};
+
+struct TranslationBlock {
+    /*
+     * Guest PC corresponding to this block.  This must be the true
+     * virtual address.  Therefore e.g. x86 stores EIP + CS_BASE, and
+     * targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or
+     * privilege, must store those bits elsewhere.
+     *
+     * If CF_PCREL, the opcodes for the TranslationBlock are written
+     * such that the TB is associated only with the physical page and
+     * may be run in any virtual address context.  In this case, PC
+     * must always be taken from ENV in a target-specific manner.
+     * Unwind information is taken as offsets from the page, to be
+     * deposited into the "current" PC.
+     */
+    vaddr pc;
+
+    /*
+     * Target-specific data associated with the TranslationBlock, e.g.:
+     * x86: the original user, the Code Segment virtual base,
+     * arm: an extension of tb->flags,
+     * s390x: instruction data for EXECUTE,
+     * sparc: the next pc of the instruction queue (for delay slots).
+     */
+    uint64_t cs_base;
+
+    uint32_t flags; /* flags defining in which context the code was generated */
+    uint32_t cflags;    /* compile flags */
+
+/* Note that TCG_MAX_INSNS is 512; we validate this match elsewhere. */
+#define CF_COUNT_MASK    0x000001ff
+#define CF_NO_GOTO_TB    0x00000200 /* Do not chain with goto_tb */
+#define CF_NO_GOTO_PTR   0x00000400 /* Do not chain with goto_ptr */
+#define CF_SINGLE_STEP   0x00000800 /* gdbstub single-step in effect */
+#define CF_MEMI_ONLY     0x00001000 /* Only instrument memory ops */
+#define CF_USE_ICOUNT    0x00002000
+#define CF_INVALID       0x00004000 /* TB is stale. Set with @jmp_lock held */
+#define CF_PARALLEL      0x00008000 /* Generate code for a parallel context */
+#define CF_NOIRQ         0x00010000 /* Generate an uninterruptible TB */
+#define CF_PCREL         0x00020000 /* Opcodes in TB are PC-relative */
+#define CF_CLUSTER_MASK  0xff000000 /* Top 8 bits are cluster ID */
+#define CF_CLUSTER_SHIFT 24
+
+    /*
+     * Above fields used for comparing
+     */
+
+    /* size of target code for this block (1 <= size <= TARGET_PAGE_SIZE) */
+    uint16_t size;
+    uint16_t icount;
+
+    struct tb_tc tc;
+
+    /*
+     * Track tb_page_addr_t intervals that intersect this TB.
+     * For user-only, the virtual addresses are always contiguous,
+     * and we use a unified interval tree.  For system, we use a
+     * linked list headed in each PageDesc.  Within the list, the lsb
+     * of the previous pointer tells the index of page_next[], and the
+     * list is protected by the PageDesc lock(s).
+     */
+#ifdef CONFIG_USER_ONLY
+    IntervalTreeNode itree;
+#else
+    uintptr_t page_next[2];
+    tb_page_addr_t page_addr[2];
+#endif
+
+    /* jmp_lock placed here to fill a 4-byte hole. Its documentation is below */
+    QemuSpin jmp_lock;
+
+    /* The following data are used to directly call another TB from
+     * the code of this one. This can be done either by emitting direct or
+     * indirect native jump instructions. These jumps are reset so that the TB
+     * just continues its execution. The TB can be linked to another one by
+     * setting one of the jump targets (or patching the jump instruction). Only
+     * two of such jumps are supported.
+     */
+#define TB_JMP_OFFSET_INVALID 0xffff /* indicates no jump generated */
+    uint16_t jmp_reset_offset[2]; /* offset of original jump target */
+    uint16_t jmp_insn_offset[2];  /* offset of direct jump insn */
+    uintptr_t jmp_target_addr[2]; /* target address */
+
+    /*
+     * Each TB has a NULL-terminated list (jmp_list_head) of incoming jumps.
+     * Each TB can have two outgoing jumps, and therefore can participate
+     * in two lists. The list entries are kept in jmp_list_next[2]. The least
+     * significant bit (LSB) of the pointers in these lists is used to encode
+     * which of the two list entries is to be used in the pointed TB.
+     *
+     * List traversals are protected by jmp_lock. The destination TB of each
+     * outgoing jump is kept in jmp_dest[] so that the appropriate jmp_lock
+     * can be acquired from any origin TB.
+     *
+     * jmp_dest[] are tagged pointers as well. The LSB is set when the TB is
+     * being invalidated, so that no further outgoing jumps from it can be set.
+     *
+     * jmp_lock also protects the CF_INVALID cflag; a jump must not be chained
+     * to a destination TB that has CF_INVALID set.
+     */
+    uintptr_t jmp_list_head;
+    uintptr_t jmp_list_next[2];
+    uintptr_t jmp_dest[2];
+};
+
+/* The alignment given to TranslationBlock during allocation. */
+#define CODE_GEN_ALIGN  16
+
+#endif /* EXEC_TRANSLATION_BLOCK_H */
index 638e1529c588e2867638756e0621830ac746d564..6d3f59d095104a0f3a4ab1b1b75f5e9917394d0b 100644 (file)
  * member in your target-specific DisasContext.
  */
 
-
 #include "qemu/bswap.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "exec/plugin-gen.h"
-#include "tcg/tcg.h"
+#include "exec/cpu_ldst.h"     /* for abi_ptr */
 
+/**
+ * gen_intermediate_code
+ * @cpu: cpu context
+ * @tb: translation block
+ * @max_insns: max number of instructions to translate
+ * @pc: guest virtual program counter address
+ * @host_pc: host physical program counter address
+ *
+ * This function must be provided by the target, which should create
+ * the target-specific DisasContext, and then invoke translator_loop.
+ */
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
+                           target_ulong pc, void *host_pc);
 
 /**
  * DisasJumpType:
@@ -63,6 +72,8 @@ typedef enum DisasJumpType {
  * @num_insns: Number of translated instructions (including current).
  * @max_insns: Maximum number of instructions to be translated in this TB.
  * @singlestep_enabled: "Hardware" single stepping enabled.
+ * @saved_can_do_io: Known value of cpu->neg.can_do_io, or -1 for unknown.
+ * @plugin_enabled: TCG plugin enabled in this TB.
  *
  * Architecture-agnostic disassembly context.
  */
@@ -74,6 +85,9 @@ typedef struct DisasContextBase {
     int num_insns;
     int max_insns;
     bool singlestep_enabled;
+    int8_t saved_can_do_io;
+    bool plugin_enabled;
+    void *host_addr[2];
 } DisasContextBase;
 
 /**
@@ -89,15 +103,6 @@ typedef struct DisasContextBase {
  * @insn_start:
  *      Emit the tcg_gen_insn_start opcode.
  *
- * @breakpoint_check:
- *      When called, the breakpoint has already been checked to match the PC,
- *      but the target may decide the breakpoint missed the address
- *      (e.g., due to conditions encoded in their flags).  Return true to
- *      indicate that the breakpoint did hit, in which case no more breakpoints
- *      are checked.  If the breakpoint did hit, emit any code required to
- *      signal the exception, and set db->is_jmp as necessary to terminate
- *      the main loop.
- *
  * @translate_insn:
  *      Disassemble one instruction and set db->pc_next for the start
  *      of the following instruction.  Set db->is_jmp as necessary to
@@ -113,20 +118,20 @@ typedef struct TranslatorOps {
     void (*init_disas_context)(DisasContextBase *db, CPUState *cpu);
     void (*tb_start)(DisasContextBase *db, CPUState *cpu);
     void (*insn_start)(DisasContextBase *db, CPUState *cpu);
-    bool (*breakpoint_check)(DisasContextBase *db, CPUState *cpu,
-                             const CPUBreakpoint *bp);
     void (*translate_insn)(DisasContextBase *db, CPUState *cpu);
     void (*tb_stop)(DisasContextBase *db, CPUState *cpu);
-    void (*disas_log)(const DisasContextBase *db, CPUState *cpu);
+    void (*disas_log)(const DisasContextBase *db, CPUState *cpu, FILE *f);
 } TranslatorOps;
 
 /**
  * translator_loop:
- * @ops: Target-specific operations.
- * @db: Disassembly context.
  * @cpu: Target vCPU.
  * @tb: Translation block.
  * @max_insns: Maximum number of insns to translate.
+ * @pc: guest virtual program counter address
+ * @host_pc: host physical program counter address
+ * @ops: Target-specific operations.
+ * @db: Disassembly context.
  *
  * Generic translator loop.
  *
@@ -140,10 +145,29 @@ typedef struct TranslatorOps {
  * - When single-stepping is enabled (system-wide or on the current vCPU).
  * - When too many instructions have been translated.
  */
-void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
-                     CPUState *cpu, TranslationBlock *tb, int max_insns);
+void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
+                     vaddr pc, void *host_pc, const TranslatorOps *ops,
+                     DisasContextBase *db);
 
-void translator_loop_temp_check(DisasContextBase *db);
+/**
+ * translator_use_goto_tb
+ * @db: Disassembly context
+ * @dest: target pc of the goto
+ *
+ * Return true if goto_tb is allowed between the current TB
+ * and the destination PC.
+ */
+bool translator_use_goto_tb(DisasContextBase *db, vaddr dest);
+
+/**
+ * translator_io_start
+ * @db: Disassembly context
+ *
+ * If icount is enabled, set cpu->can_do_io, adjust db->is_jmp to
+ * DISAS_TOO_MANY if it is still DISAS_NEXT, and return true.
+ * Otherwise return false.
+ */
+bool translator_io_start(DisasContextBase *db);
 
 /*
  * Translator Load Functions
@@ -156,28 +180,64 @@ void translator_loop_temp_check(DisasContextBase *db);
  * the relevant information at translation time.
  */
 
-#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn)             \
-    static inline type                                                  \
-    fullname ## _swap(CPUArchState *env, abi_ptr pc, bool do_swap)      \
-    {                                                                   \
-        type ret = load_fn(env, pc);                                    \
-        if (do_swap) {                                                  \
-            ret = swap_fn(ret);                                         \
-        }                                                               \
-        plugin_insn_append(&ret, sizeof(ret));                          \
-        return ret;                                                     \
-    }                                                                   \
-                                                                        \
-    static inline type fullname(CPUArchState *env, abi_ptr pc)          \
-    {                                                                   \
-        return fullname ## _swap(env, pc, false);                       \
+uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+
+static inline uint16_t
+translator_lduw_swap(CPUArchState *env, DisasContextBase *db,
+                     abi_ptr pc, bool do_swap)
+{
+    uint16_t ret = translator_lduw(env, db, pc);
+    if (do_swap) {
+        ret = bswap16(ret);
+    }
+    return ret;
+}
+
+static inline uint32_t
+translator_ldl_swap(CPUArchState *env, DisasContextBase *db,
+                    abi_ptr pc, bool do_swap)
+{
+    uint32_t ret = translator_ldl(env, db, pc);
+    if (do_swap) {
+        ret = bswap32(ret);
+    }
+    return ret;
+}
+
+static inline uint64_t
+translator_ldq_swap(CPUArchState *env, DisasContextBase *db,
+                    abi_ptr pc, bool do_swap)
+{
+    uint64_t ret = translator_ldq(env, db, pc);
+    if (do_swap) {
+        ret = bswap64(ret);
     }
+    return ret;
+}
 
-GEN_TRANSLATOR_LD(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */)
-GEN_TRANSLATOR_LD(translator_ldsw, int16_t, cpu_ldsw_code, bswap16)
-GEN_TRANSLATOR_LD(translator_lduw, uint16_t, cpu_lduw_code, bswap16)
-GEN_TRANSLATOR_LD(translator_ldl, uint32_t, cpu_ldl_code, bswap32)
-GEN_TRANSLATOR_LD(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
-#undef GEN_TRANSLATOR_LD
+/**
+ * translator_fake_ldb - fake instruction load
+ * @insn8: byte of instruction
+ * @pc: program counter of instruction
+ *
+ * This is a special case helper used where the instruction we are
+ * about to translate comes from somewhere else (e.g. being
+ * re-synthesised for s390x "ex"). It ensures we update other areas of
+ * the translator with details of the executed instruction.
+ */
+void translator_fake_ldb(uint8_t insn8, abi_ptr pc);
+
+/*
+ * Return whether addr is on the same page as where disassembly started.
+ * Translators can use this to enforce the rule that only single-insn
+ * translation blocks are allowed to cross page boundaries.
+ */
+static inline bool is_same_page(const DisasContextBase *db, target_ulong addr)
+{
+    return ((addr ^ db->pc_first) & TARGET_PAGE_MASK) == 0;
+}
 
-#endif  /* EXEC__TRANSLATOR_H */
+#endif /* EXEC__TRANSLATOR_H */
diff --git a/include/exec/tswap.h b/include/exec/tswap.h
new file mode 100644 (file)
index 0000000..68944a8
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Macros for swapping a value if the endianness is different
+ * between the target and the host.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef TSWAP_H
+#define TSWAP_H
+
+#include "hw/core/cpu.h"
+#include "qemu/bswap.h"
+
+/*
+ * If we're in target-specific code, we can hard-code the swapping
+ * condition, otherwise we have to do (slower) run-time checks.
+ */
+#ifdef NEED_CPU_H
+#define target_needs_bswap()  (HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN)
+#else
+#define target_needs_bswap()  (target_words_bigendian() != HOST_BIG_ENDIAN)
+#endif
+
+static inline uint16_t tswap16(uint16_t s)
+{
+    if (target_needs_bswap()) {
+        return bswap16(s);
+    } else {
+        return s;
+    }
+}
+
+static inline uint32_t tswap32(uint32_t s)
+{
+    if (target_needs_bswap()) {
+        return bswap32(s);
+    } else {
+        return s;
+    }
+}
+
+static inline uint64_t tswap64(uint64_t s)
+{
+    if (target_needs_bswap()) {
+        return bswap64(s);
+    } else {
+        return s;
+    }
+}
+
+static inline void tswap16s(uint16_t *s)
+{
+    if (target_needs_bswap()) {
+        *s = bswap16(*s);
+    }
+}
+
+static inline void tswap32s(uint32_t *s)
+{
+    if (target_needs_bswap()) {
+        *s = bswap32(*s);
+    }
+}
+
+static inline void tswap64s(uint64_t *s)
+{
+    if (target_needs_bswap()) {
+        *s = bswap64(*s);
+    }
+}
+
+#endif  /* TSWAP_H */
index 743b8bb9eac1263c68562d35de4d45a72afa3f7a..6178453d94130ac5fdc72d9bab698f1c81cda7ac 100644 (file)
 #define ABI_LLONG_ALIGNMENT 2
 #endif
 
-#if (defined(TARGET_I386) && !defined(TARGET_X86_64)) || defined(TARGET_SH4)
+#ifdef TARGET_CRIS
+#define ABI_SHORT_ALIGNMENT 1
+#define ABI_INT_ALIGNMENT 1
+#define ABI_LONG_ALIGNMENT 1
+#define ABI_LLONG_ALIGNMENT 1
+#endif
+
+#if (defined(TARGET_I386) && !defined(TARGET_X86_64)) \
+    || defined(TARGET_SH4) \
+    || defined(TARGET_OPENRISC) \
+    || defined(TARGET_MICROBLAZE) \
+    || defined(TARGET_NIOS2)
 #define ABI_LLONG_ALIGNMENT 4
 #endif
 
diff --git a/include/exec/user/guest-base.h b/include/exec/user/guest-base.h
new file mode 100644 (file)
index 0000000..afe2ab7
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Declaration of guest_base.
+ *  Copyright (c) 2003 Fabrice Bellard
+ */
+
+#ifndef EXEC_USER_GUEST_BASE_H
+#define EXEC_USER_GUEST_BASE_H
+
+extern uintptr_t guest_base;
+
+#endif
index 300a840d58d31b3abe5841930eedcd39647c86b6..2ebfecf58eb2f62da322a595ea79f7c951565b96 100644 (file)
@@ -111,8 +111,7 @@ static inline int thunk_type_size(const argtype *type_ptr, int is_host)
         if (is_host) {
 #if defined(HOST_X86_64)
             return 8;
-#elif defined(HOST_ALPHA) || defined(HOST_IA64) || defined(HOST_MIPS) || \
-      defined(HOST_PARISC) || defined(HOST_SPARC64)
+#elif defined(HOST_MIPS) || defined(HOST_SPARC64)
             return 4;
 #elif defined(HOST_PPC)
             return sizeof(void *);
@@ -193,10 +192,17 @@ static inline int thunk_type_align(const argtype *type_ptr, int is_host)
     }
 }
 
-unsigned int target_to_host_bitmask(unsigned int target_mask,
-                                    const bitmask_transtbl * trans_tbl);
-unsigned int host_to_target_bitmask(unsigned int host_mask,
-                                    const bitmask_transtbl * trans_tbl);
+unsigned int target_to_host_bitmask_len(unsigned int target_mask,
+                                        const bitmask_transtbl *trans_tbl,
+                                        size_t trans_len);
+unsigned int host_to_target_bitmask_len(unsigned int host_mask,
+                                        const bitmask_transtbl * trans_tbl,
+                                        size_t trans_len);
+
+#define target_to_host_bitmask(M, T) \
+    target_to_host_bitmask_len(M, T, ARRAY_SIZE(T))
+#define host_to_target_bitmask(M, T) \
+    host_to_target_bitmask_len(M, T, ARRAY_SIZE(T))
 
 void thunk_init(unsigned int max_structs);
 
index 2f0674fbddec92ea36eb6fbf418614e78060833a..94cbe073ec5bb5d0cb3430691b32fe14576843fa 100644 (file)
@@ -48,8 +48,8 @@ this code that are retained.
 ===============================================================================
 */
 
-#ifndef _SOFTFLOAT_HELPERS_H_
-#define _SOFTFLOAT_HELPERS_H_
+#ifndef SOFTFLOAT_HELPERS_H
+#define SOFTFLOAT_HELPERS_H
 
 #include "fpu/softfloat-types.h"
 
@@ -69,7 +69,7 @@ static inline void set_float_exception_flags(int val, float_status *status)
     status->float_exception_flags = val;
 }
 
-static inline void set_floatx80_rounding_precision(int val,
+static inline void set_floatx80_rounding_precision(FloatX80RoundPrec val,
                                                    float_status *status)
 {
     status->floatx80_rounding_precision = val;
@@ -120,7 +120,8 @@ static inline int get_float_exception_flags(float_status *status)
     return status->float_exception_flags;
 }
 
-static inline int get_floatx80_rounding_precision(float_status *status)
+static inline FloatX80RoundPrec
+get_floatx80_rounding_precision(float_status *status)
 {
     return status->floatx80_rounding_precision;
 }
@@ -140,4 +141,4 @@ static inline bool get_default_nan_mode(float_status *status)
     return status->default_nan_mode;
 }
 
-#endif /* _SOFTFLOAT_HELPERS_H_ */
+#endif /* SOFTFLOAT_HELPERS_H */
index a35ec2893a2c966a84759a0bfa83db97cdcda2f4..f35cdbfa636b52f15548536d13e0b13220ddb8fc 100644 (file)
@@ -8,7 +8,6 @@
  * so some portions are provided under:
  *  the SoftFloat-2a license
  *  the BSD license
- *  GPL-v2-or-later
  *
  * Any future contributions to this file after December 1st 2014 will be
  * taken to be licensed under the Softfloat-2a license unless specifically
@@ -75,14 +74,47 @@ this code that are retained.
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/* Portions of this work are licensed under the terms of the GNU GPL,
- * version 2 or later. See the COPYING file in the top-level directory.
- */
-
 #ifndef FPU_SOFTFLOAT_MACROS_H
 #define FPU_SOFTFLOAT_MACROS_H
 
 #include "fpu/softfloat-types.h"
+#include "qemu/host-utils.h"
+
+/**
+ * shl_double: double-word merging left shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @l left by @c bits, shifting in bits from @r.
+ */
+static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+    asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
+    return l;
+#else
+    return c ? (l << c) | (r >> (64 - c)) : l;
+#endif
+}
+
+/**
+ * shr_double: double-word merging right shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @r right by @c bits, shifting in bits from @l.
+ */
+static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+    asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
+    return r;
+#else
+    return c ? (r >> c) | (l << (64 - c)) : r;
+#endif
+}
 
 /*----------------------------------------------------------------------------
 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
@@ -403,16 +435,12 @@ static inline void
 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- add128(
-     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
-    uint64_t z1;
-
-    z1 = a1 + b1;
-    *z1Ptr = z1;
-    *z0Ptr = a0 + b0 + ( z1 < a1 );
-
+    bool c = 0;
+    *z1Ptr = uadd64_carry(a1, b1, &c);
+    *z0Ptr = uadd64_carry(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -423,34 +451,14 @@ static inline void
 | `z1Ptr', and `z2Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- add192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t a2,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t b2,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
+static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2,
+                          uint64_t b0, uint64_t b1, uint64_t b2,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 {
-    uint64_t z0, z1, z2;
-    int8_t carry0, carry1;
-
-    z2 = a2 + b2;
-    carry1 = ( z2 < a2 );
-    z1 = a1 + b1;
-    carry0 = ( z1 < a1 );
-    z0 = a0 + b0;
-    z1 += carry1;
-    z0 += ( z1 < carry1 );
-    z0 += carry0;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
+    bool c = 0;
+    *z2Ptr = uadd64_carry(a2, b2, &c);
+    *z1Ptr = uadd64_carry(a1, b1, &c);
+    *z0Ptr = uadd64_carry(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -461,14 +469,12 @@ static inline void
 | `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- sub128(
-     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
-
-    *z1Ptr = a1 - b1;
-    *z0Ptr = a0 - b0 - ( a1 < b1 );
-
+    bool c = 0;
+    *z1Ptr = usub64_borrow(a1, b1, &c);
+    *z0Ptr = usub64_borrow(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -479,34 +485,14 @@ static inline void
 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- sub192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t a2,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t b2,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
+static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
+                          uint64_t b0, uint64_t b1, uint64_t b2,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 {
-    uint64_t z0, z1, z2;
-    int8_t borrow0, borrow1;
-
-    z2 = a2 - b2;
-    borrow1 = ( a2 < b2 );
-    z1 = a1 - b1;
-    borrow0 = ( a1 < b1 );
-    z0 = a0 - b0;
-    z0 -= ( z1 < borrow1 );
-    z1 -= borrow1;
-    z0 -= borrow0;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
+    bool c = 0;
+    *z2Ptr = usub64_borrow(a2, b2, &c);
+    *z1Ptr = usub64_borrow(a1, b1, &c);
+    *z0Ptr = usub64_borrow(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -515,27 +501,10 @@ static inline void
 | `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void
+mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
-    uint32_t aHigh, aLow, bHigh, bLow;
-    uint64_t z0, zMiddleA, zMiddleB, z1;
-
-    aLow = a;
-    aHigh = a>>32;
-    bLow = b;
-    bHigh = b>>32;
-    z1 = ( (uint64_t) aLow ) * bLow;
-    zMiddleA = ( (uint64_t) aLow ) * bHigh;
-    zMiddleB = ( (uint64_t) aHigh ) * bLow;
-    z0 = ( (uint64_t) aHigh ) * bHigh;
-    zMiddleA += zMiddleB;
-    z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
-    zMiddleA <<= 32;
-    z1 += zMiddleA;
-    z0 += ( z1 < zMiddleA );
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
+    mulu64(z1Ptr, z0Ptr, a, b);
 }
 
 /*----------------------------------------------------------------------------
@@ -546,24 +515,14 @@ static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t
 *----------------------------------------------------------------------------*/
 
 static inline void
- mul128By64To192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t b,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
+mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b,
+                uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 {
-    uint64_t z0, z1, z2, more1;
-
-    mul64To128( a1, b, &z1, &z2 );
-    mul64To128( a0, b, &z0, &more1 );
-    add128( z0, more1, 0, z1, &z0, &z1 );
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
+    uint64_t z0, z1, m1;
 
+    mul64To128(a1, b, &m1, z2Ptr);
+    mul64To128(a0, b, &z0, &z1);
+    add128(z0, z1, 0, m1, z0Ptr, z1Ptr);
 }
 
 /*----------------------------------------------------------------------------
@@ -573,34 +532,21 @@ static inline void
 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- mul128To256(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr,
-     uint64_t *z3Ptr
- )
+static inline void mul128To256(uint64_t a0, uint64_t a1,
+                               uint64_t b0, uint64_t b1,
+                               uint64_t *z0Ptr, uint64_t *z1Ptr,
+                               uint64_t *z2Ptr, uint64_t *z3Ptr)
 {
-    uint64_t z0, z1, z2, z3;
-    uint64_t more1, more2;
-
-    mul64To128( a1, b1, &z2, &z3 );
-    mul64To128( a1, b0, &z1, &more2 );
-    add128( z1, more2, 0, z2, &z1, &z2 );
-    mul64To128( a0, b0, &z0, &more1 );
-    add128( z0, more1, 0, z1, &z0, &z1 );
-    mul64To128( a0, b1, &more1, &more2 );
-    add128( more1, more2, 0, z2, &more1, &z2 );
-    add128( z0, z1, 0, more1, &z0, &z1 );
-    *z3Ptr = z3;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
+    uint64_t z0, z1, z2;
+    uint64_t m0, m1, m2, n1, n2;
 
+    mul64To128(a1, b0, &m1, &m2);
+    mul64To128(a0, b1, &n1, &n2);
+    mul64To128(a1, b1, &z2, z3Ptr);
+    mul64To128(a0, b0, &z0, &z1);
+
+    add192( 0, m1, m2,  0, n1, n2, &m0, &m1, &m2);
+    add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
 }
 
 /*----------------------------------------------------------------------------
@@ -634,83 +580,6 @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
 
 }
 
-/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
- * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
- *
- * Licensed under the GPLv2/LGPLv3
- */
-static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
-                                  uint64_t n0, uint64_t d)
-{
-#if defined(__x86_64__)
-    uint64_t q;
-    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
-    return q;
-#elif defined(__s390x__) && !defined(__clang__)
-    /* Need to use a TImode type to get an even register pair for DLGR.  */
-    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
-    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
-    *r = n >> 64;
-    return n;
-#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
-    /* From Power ISA 2.06, programming note for divdeu.  */
-    uint64_t q1, q2, Q, r1, r2, R;
-    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
-        : "=&r"(q1), "=r"(q2)
-        : "r"(n1), "r"(n0), "r"(d));
-    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
-    r2 = n0 - (q2 * d);
-    Q = q1 + q2;
-    R = r1 + r2;
-    if (R >= d || R < r2) { /* overflow implies R > d */
-        Q += 1;
-        R -= d;
-    }
-    *r = R;
-    return Q;
-#else
-    uint64_t d0, d1, q0, q1, r1, r0, m;
-
-    d0 = (uint32_t)d;
-    d1 = d >> 32;
-
-    r1 = n1 % d1;
-    q1 = n1 / d1;
-    m = q1 * d0;
-    r1 = (r1 << 32) | (n0 >> 32);
-    if (r1 < m) {
-        q1 -= 1;
-        r1 += d;
-        if (r1 >= d) {
-            if (r1 < m) {
-                q1 -= 1;
-                r1 += d;
-            }
-        }
-    }
-    r1 -= m;
-
-    r0 = r1 % d1;
-    q0 = r1 / d1;
-    m = q0 * d0;
-    r0 = (r0 << 32) | (uint32_t)n0;
-    if (r0 < m) {
-        q0 -= 1;
-        r0 += d;
-        if (r0 >= d) {
-            if (r0 < m) {
-                q0 -= 1;
-                r0 += d;
-            }
-        }
-    }
-    r0 -= m;
-
-    *r = r0;
-    return (q1 << 32) | q0;
-#endif
-}
-
 /*----------------------------------------------------------------------------
 | Returns an approximation to the square root of the 32-bit significand given
 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
@@ -794,4 +663,38 @@ static inline bool ne128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
     return a0 != b0 || a1 != b1;
 }
 
+/*
+ * Similarly, comparisons of 192-bit values.
+ */
+
+static inline bool eq192(uint64_t a0, uint64_t a1, uint64_t a2,
+                         uint64_t b0, uint64_t b1, uint64_t b2)
+{
+    return ((a0 ^ b0) | (a1 ^ b1) | (a2 ^ b2)) == 0;
+}
+
+static inline bool le192(uint64_t a0, uint64_t a1, uint64_t a2,
+                         uint64_t b0, uint64_t b1, uint64_t b2)
+{
+    if (a0 != b0) {
+        return a0 < b0;
+    }
+    if (a1 != b1) {
+        return a1 < b1;
+    }
+    return a2 <= b2;
+}
+
+static inline bool lt192(uint64_t a0, uint64_t a1, uint64_t a2,
+                         uint64_t b0, uint64_t b1, uint64_t b2)
+{
+    if (a0 != b0) {
+        return a0 < b0;
+    }
+    if (a1 != b1) {
+        return a1 < b1;
+    }
+    return a2 < b2;
+}
+
 #endif
index 8a3f20fae9e0716cf2cf7fd1a675fec90924cc0c..0884ec4ef7a4bf5a8b42790421964f31e3e2ffa8 100644 (file)
@@ -103,7 +103,7 @@ typedef struct {
 #define make_floatx80(exp, mant) ((floatx80) { mant, exp })
 #define make_floatx80_init(exp, mant) { .low = mant, .high = exp }
 typedef struct {
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     uint64_t high, low;
 #else
     uint64_t low, high;
@@ -134,8 +134,10 @@ typedef enum __attribute__((__packed__)) {
     float_round_up           = 2,
     float_round_to_zero      = 3,
     float_round_ties_away    = 4,
-    /* Not an IEEE rounding mode: round to the closest odd mantissa value */
+    /* Not an IEEE rounding mode: round to closest odd, overflow to max */
     float_round_to_odd       = 5,
+    /* Not an IEEE rounding mode: round to closest odd, overflow to inf */
+    float_round_to_odd_inf   = 6,
 } FloatRoundMode;
 
 /*
@@ -143,15 +145,30 @@ typedef enum __attribute__((__packed__)) {
  */
 
 enum {
-    float_flag_invalid   =  1,
-    float_flag_divbyzero =  4,
-    float_flag_overflow  =  8,
-    float_flag_underflow = 16,
-    float_flag_inexact   = 32,
-    float_flag_input_denormal = 64,
-    float_flag_output_denormal = 128
+    float_flag_invalid         = 0x0001,
+    float_flag_divbyzero       = 0x0002,
+    float_flag_overflow        = 0x0004,
+    float_flag_underflow       = 0x0008,
+    float_flag_inexact         = 0x0010,
+    float_flag_input_denormal  = 0x0020,
+    float_flag_output_denormal = 0x0040,
+    float_flag_invalid_isi     = 0x0080,  /* inf - inf */
+    float_flag_invalid_imz     = 0x0100,  /* inf * 0 */
+    float_flag_invalid_idi     = 0x0200,  /* inf / inf */
+    float_flag_invalid_zdz     = 0x0400,  /* 0 / 0 */
+    float_flag_invalid_sqrt    = 0x0800,  /* sqrt(-x) */
+    float_flag_invalid_cvti    = 0x1000,  /* non-nan to integer */
+    float_flag_invalid_snan    = 0x2000,  /* any operand was snan */
 };
 
+/*
+ * Rounding precision for floatx80.
+ */
+typedef enum __attribute__((__packed__)) {
+    floatx80_precision_x,
+    floatx80_precision_d,
+    floatx80_precision_s,
+} FloatX80RoundPrec;
 
 /*
  * Floating Point Status. Individual architectures may maintain
@@ -161,9 +178,9 @@ enum {
  */
 
 typedef struct float_status {
+    uint16_t float_exception_flags;
     FloatRoundMode float_rounding_mode;
-    uint8_t     float_exception_flags;
-    signed char floatx80_rounding_precision;
+    FloatX80RoundPrec floatx80_rounding_precision;
     bool tininess_before_rounding;
     /* should denormalised results go to zero and set the inexact flag? */
     bool flush_to_zero;
@@ -178,6 +195,10 @@ typedef struct float_status {
     bool snan_bit_is_one;
     bool use_first_nan;
     bool no_signaling_nans;
+    /* should overflowed results subtract re_bias to its exponent? */
+    bool rebias_overflow;
+    /* should underflowed results add re_bias to its exponent? */
+    bool rebias_underflow;
 } float_status;
 
 #endif /* SOFTFLOAT_TYPES_H */
index 78ad5ca738bb7ced65adfdf3bd5f1c92d18fab3d..eb64075b9cb4d21ac1a6194af5e8127aa2a5ad26 100644 (file)
@@ -95,12 +95,16 @@ typedef enum {
 
 #include "fpu/softfloat-types.h"
 #include "fpu/softfloat-helpers.h"
+#include "qemu/int128.h"
 
 /*----------------------------------------------------------------------------
 | Routine to raise any or all of the software IEC/IEEE floating-point
 | exception flags.
 *----------------------------------------------------------------------------*/
-void float_raise(uint8_t flags, float_status *status);
+static inline void float_raise(uint16_t flags, float_status *status)
+{
+    status->float_exception_flags |= flags;
+}
 
 /*----------------------------------------------------------------------------
 | If `a' is denormal and we are in flush-to-zero mode then set the
@@ -179,7 +183,9 @@ floatx80 int64_to_floatx80(int64_t, float_status *status);
 
 float128 int32_to_float128(int32_t, float_status *status);
 float128 int64_to_float128(int64_t, float_status *status);
+float128 int128_to_float128(Int128, float_status *status);
 float128 uint64_to_float128(uint64_t, float_status *status);
+float128 uint128_to_float128(Int128, float_status *status);
 
 /*----------------------------------------------------------------------------
 | Software half-precision conversion routines.
@@ -240,6 +246,8 @@ float16 float16_minnum(float16, float16, float_status *status);
 float16 float16_maxnum(float16, float16, float_status *status);
 float16 float16_minnummag(float16, float16, float_status *status);
 float16 float16_maxnummag(float16, float16, float_status *status);
+float16 float16_minimum_number(float16, float16, float_status *status);
+float16 float16_maximum_number(float16, float16, float_status *status);
 float16 float16_sqrt(float16, float_status *status);
 FloatRelation float16_compare(float16, float16, float_status *status);
 FloatRelation float16_compare_quiet(float16, float16, float_status *status);
@@ -358,6 +366,8 @@ float32 bfloat16_to_float32(bfloat16, float_status *status);
 bfloat16 float64_to_bfloat16(float64 a, float_status *status);
 float64 bfloat16_to_float64(bfloat16 a, float_status *status);
 
+int8_t bfloat16_to_int8_scalbn(bfloat16, FloatRoundMode,
+                               int, float_status *status);
 int16_t bfloat16_to_int16_scalbn(bfloat16, FloatRoundMode,
                                  int, float_status *status);
 int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode,
@@ -365,14 +375,18 @@ int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode,
 int64_t bfloat16_to_int64_scalbn(bfloat16, FloatRoundMode,
                                  int, float_status *status);
 
+int8_t bfloat16_to_int8(bfloat16, float_status *status);
 int16_t bfloat16_to_int16(bfloat16, float_status *status);
 int32_t bfloat16_to_int32(bfloat16, float_status *status);
 int64_t bfloat16_to_int64(bfloat16, float_status *status);
 
+int8_t bfloat16_to_int8_round_to_zero(bfloat16, float_status *status);
 int16_t bfloat16_to_int16_round_to_zero(bfloat16, float_status *status);
 int32_t bfloat16_to_int32_round_to_zero(bfloat16, float_status *status);
 int64_t bfloat16_to_int64_round_to_zero(bfloat16, float_status *status);
 
+uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode,
+                                 int, float_status *status);
 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode,
                                    int, float_status *status);
 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode,
@@ -380,24 +394,30 @@ uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode,
 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode,
                                    int, float_status *status);
 
+uint8_t bfloat16_to_uint8(bfloat16 a, float_status *status);
 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *status);
 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *status);
 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *status);
 
+uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *status);
 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *status);
 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *status);
 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *status);
 
+bfloat16 int8_to_bfloat16_scalbn(int8_t a, int, float_status *status);
 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int, float_status *status);
 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int, float_status *status);
 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int, float_status *status);
+bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int, float_status *status);
 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int, float_status *status);
 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int, float_status *status);
 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int, float_status *status);
 
+bfloat16 int8_to_bfloat16(int8_t a, float_status *status);
 bfloat16 int16_to_bfloat16(int16_t a, float_status *status);
 bfloat16 int32_to_bfloat16(int32_t a, float_status *status);
 bfloat16 int64_to_bfloat16(int64_t a, float_status *status);
+bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status);
 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status);
 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status);
 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status);
@@ -419,6 +439,8 @@ bfloat16 bfloat16_minnum(bfloat16, bfloat16, float_status *status);
 bfloat16 bfloat16_maxnum(bfloat16, bfloat16, float_status *status);
 bfloat16 bfloat16_minnummag(bfloat16, bfloat16, float_status *status);
 bfloat16 bfloat16_maxnummag(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_minimum_number(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_maximum_number(bfloat16, bfloat16, float_status *status);
 bfloat16 bfloat16_sqrt(bfloat16, float_status *status);
 FloatRelation bfloat16_compare(bfloat16, bfloat16, float_status *status);
 FloatRelation bfloat16_compare_quiet(bfloat16, bfloat16, float_status *status);
@@ -586,6 +608,8 @@ float32 float32_minnum(float32, float32, float_status *status);
 float32 float32_maxnum(float32, float32, float_status *status);
 float32 float32_minnummag(float32, float32, float_status *status);
 float32 float32_maxnummag(float32, float32, float_status *status);
+float32 float32_minimum_number(float32, float32, float_status *status);
+float32 float32_maximum_number(float32, float32, float_status *status);
 bool float32_is_quiet_nan(float32, float_status *status);
 bool float32_is_signaling_nan(float32, float_status *status);
 float32 float32_silence_nan(float32, float_status *status);
@@ -739,6 +763,9 @@ int16_t float64_to_int16_round_to_zero(float64, float_status *status);
 int32_t float64_to_int32_round_to_zero(float64, float_status *status);
 int64_t float64_to_int64_round_to_zero(float64, float_status *status);
 
+int32_t float64_to_int32_modulo(float64, FloatRoundMode, float_status *status);
+int64_t float64_to_int64_modulo(float64, FloatRoundMode, float_status *status);
+
 uint16_t float64_to_uint16_scalbn(float64, FloatRoundMode, int, float_status *);
 uint32_t float64_to_uint32_scalbn(float64, FloatRoundMode, int, float_status *);
 uint64_t float64_to_uint64_scalbn(float64, FloatRoundMode, int, float_status *);
@@ -775,6 +802,8 @@ float64 float64_minnum(float64, float64, float_status *status);
 float64 float64_maxnum(float64, float64, float_status *status);
 float64 float64_minnummag(float64, float64, float_status *status);
 float64 float64_maxnummag(float64, float64, float_status *status);
+float64 float64_minimum_number(float64, float64, float_status *status);
+float64 float64_maximum_number(float64, float64, float_status *status);
 bool float64_is_quiet_nan(float64 a, float_status *status);
 bool float64_is_signaling_nan(float64, float_status *status);
 float64 float64_silence_nan(float64, float_status *status);
@@ -897,6 +926,18 @@ static inline bool float64_unordered_quiet(float64 a, float64 b,
 *----------------------------------------------------------------------------*/
 float64 float64_default_nan(float_status *status);
 
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations, rounding to single precision,
+| returning a result in double precision, with only one rounding step.
+*----------------------------------------------------------------------------*/
+
+float64 float64r32_add(float64, float64, float_status *status);
+float64 float64r32_sub(float64, float64, float_status *status);
+float64 float64r32_mul(float64, float64, float_status *status);
+float64 float64r32_div(float64, float64, float_status *status);
+float64 float64r32_muladd(float64, float64, float64, int, float_status *status);
+float64 float64r32_sqrt(float64, float_status *status);
+
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE extended double-precision conversion routines.
 *----------------------------------------------------------------------------*/
@@ -1149,7 +1190,7 @@ floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status);
 | Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
+floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
                               float_status *status);
 
@@ -1162,7 +1203,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
 | normalized.
 *----------------------------------------------------------------------------*/
 
-floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
+floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
                                        bool zSign, int32_t zExp,
                                        uint64_t zSig0, uint64_t zSig1,
                                        float_status *status);
@@ -1178,9 +1219,13 @@ floatx80 floatx80_default_nan(float_status *status);
 int32_t float128_to_int32(float128, float_status *status);
 int32_t float128_to_int32_round_to_zero(float128, float_status *status);
 int64_t float128_to_int64(float128, float_status *status);
+Int128 float128_to_int128(float128, float_status *status);
 int64_t float128_to_int64_round_to_zero(float128, float_status *status);
+Int128 float128_to_int128_round_to_zero(float128, float_status *status);
 uint64_t float128_to_uint64(float128, float_status *status);
+Int128 float128_to_uint128(float128, float_status *status);
 uint64_t float128_to_uint64_round_to_zero(float128, float_status *status);
+Int128 float128_to_uint128_round_to_zero(float128, float_status *status);
 uint32_t float128_to_uint32(float128, float_status *status);
 uint32_t float128_to_uint32_round_to_zero(float128, float_status *status);
 float32 float128_to_float32(float128, float_status *status);
@@ -1194,11 +1239,21 @@ float128 float128_round_to_int(float128, float_status *status);
 float128 float128_add(float128, float128, float_status *status);
 float128 float128_sub(float128, float128, float_status *status);
 float128 float128_mul(float128, float128, float_status *status);
+float128 float128_muladd(float128, float128, float128, int,
+                         float_status *status);
 float128 float128_div(float128, float128, float_status *status);
 float128 float128_rem(float128, float128, float_status *status);
 float128 float128_sqrt(float128, float_status *status);
 FloatRelation float128_compare(float128, float128, float_status *status);
 FloatRelation float128_compare_quiet(float128, float128, float_status *status);
+float128 float128_min(float128, float128, float_status *status);
+float128 float128_max(float128, float128, float_status *status);
+float128 float128_minnum(float128, float128, float_status *status);
+float128 float128_maxnum(float128, float128, float_status *status);
+float128 float128_minnummag(float128, float128, float_status *status);
+float128 float128_maxnummag(float128, float128, float_status *status);
+float128 float128_minimum_number(float128, float128, float_status *status);
+float128 float128_maximum_number(float128, float128, float_status *status);
 bool float128_is_quiet_nan(float128, float_status *status);
 bool float128_is_signaling_nan(float128, float_status *status);
 float128 float128_silence_nan(float128, float_status *status);
diff --git a/include/gdbstub/helpers.h b/include/gdbstub/helpers.h
new file mode 100644 (file)
index 0000000..c573aef
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * gdbstub helpers
+ *
+ * These are all used by the various frontends and have to be host
+ * aware to ensure things are store in target order.
+ *
+ * Copyright (c) 2022 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _GDBSTUB_HELPERS_H_
+#define _GDBSTUB_HELPERS_H_
+
+#ifdef NEED_CPU_H
+#include "cpu.h"
+
+/*
+ * The GDB remote protocol transfers values in target byte order. As
+ * the gdbstub may be batching up several register values we always
+ * append to the array.
+ */
+
+static inline int gdb_get_reg8(GByteArray *buf, uint8_t val)
+{
+    g_byte_array_append(buf, &val, 1);
+    return 1;
+}
+
+static inline int gdb_get_reg16(GByteArray *buf, uint16_t val)
+{
+    uint16_t to_word = tswap16(val);
+    g_byte_array_append(buf, (uint8_t *) &to_word, 2);
+    return 2;
+}
+
+static inline int gdb_get_reg32(GByteArray *buf, uint32_t val)
+{
+    uint32_t to_long = tswap32(val);
+    g_byte_array_append(buf, (uint8_t *) &to_long, 4);
+    return 4;
+}
+
+static inline int gdb_get_reg64(GByteArray *buf, uint64_t val)
+{
+    uint64_t to_quad = tswap64(val);
+    g_byte_array_append(buf, (uint8_t *) &to_quad, 8);
+    return 8;
+}
+
+static inline int gdb_get_reg128(GByteArray *buf, uint64_t val_hi,
+                                 uint64_t val_lo)
+{
+    uint64_t to_quad;
+#if TARGET_BIG_ENDIAN
+    to_quad = tswap64(val_hi);
+    g_byte_array_append(buf, (uint8_t *) &to_quad, 8);
+    to_quad = tswap64(val_lo);
+    g_byte_array_append(buf, (uint8_t *) &to_quad, 8);
+#else
+    to_quad = tswap64(val_lo);
+    g_byte_array_append(buf, (uint8_t *) &to_quad, 8);
+    to_quad = tswap64(val_hi);
+    g_byte_array_append(buf, (uint8_t *) &to_quad, 8);
+#endif
+    return 16;
+}
+
+static inline int gdb_get_zeroes(GByteArray *array, size_t len)
+{
+    guint oldlen = array->len;
+    g_byte_array_set_size(array, oldlen + len);
+    memset(array->data + oldlen, 0, len);
+
+    return len;
+}
+
+/**
+ * gdb_get_reg_ptr: get pointer to start of last element
+ * @len: length of element
+ *
+ * This is a helper function to extract the pointer to the last
+ * element for additional processing. Some front-ends do additional
+ * dynamic swapping of the elements based on CPU state.
+ */
+static inline uint8_t *gdb_get_reg_ptr(GByteArray *buf, int len)
+{
+    return buf->data + buf->len - len;
+}
+
+#if TARGET_LONG_BITS == 64
+#define gdb_get_regl(buf, val) gdb_get_reg64(buf, val)
+#define ldtul_p(addr) ldq_p(addr)
+#else
+#define gdb_get_regl(buf, val) gdb_get_reg32(buf, val)
+#define ldtul_p(addr) ldl_p(addr)
+#endif
+
+#else
+#error "gdbstub helpers should only be included by target specific code"
+#endif
+
+#endif /* _GDBSTUB_HELPERS_H_ */
diff --git a/include/gdbstub/syscalls.h b/include/gdbstub/syscalls.h
new file mode 100644 (file)
index 0000000..54ff724
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * GDB Syscall support
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ *
+ * SPDX-License-Identifier: LGPL-2.0+
+ */
+
+#ifndef _SYSCALLS_H_
+#define _SYSCALLS_H_
+
+/* For gdb file i/o remote protocol open flags. */
+#define GDB_O_RDONLY  0
+#define GDB_O_WRONLY  1
+#define GDB_O_RDWR    2
+#define GDB_O_APPEND  8
+#define GDB_O_CREAT   0x200
+#define GDB_O_TRUNC   0x400
+#define GDB_O_EXCL    0x800
+
+/* For gdb file i/o remote protocol errno values */
+#define GDB_EPERM           1
+#define GDB_ENOENT          2
+#define GDB_EINTR           4
+#define GDB_EBADF           9
+#define GDB_EACCES         13
+#define GDB_EFAULT         14
+#define GDB_EBUSY          16
+#define GDB_EEXIST         17
+#define GDB_ENODEV         19
+#define GDB_ENOTDIR        20
+#define GDB_EISDIR         21
+#define GDB_EINVAL         22
+#define GDB_ENFILE         23
+#define GDB_EMFILE         24
+#define GDB_EFBIG          27
+#define GDB_ENOSPC         28
+#define GDB_ESPIPE         29
+#define GDB_EROFS          30
+#define GDB_ENAMETOOLONG   91
+#define GDB_EUNKNOWN       9999
+
+/* For gdb file i/o remote protocol lseek whence. */
+#define GDB_SEEK_SET  0
+#define GDB_SEEK_CUR  1
+#define GDB_SEEK_END  2
+
+/* For gdb file i/o stat/fstat. */
+typedef uint32_t gdb_mode_t;
+typedef uint32_t gdb_time_t;
+
+struct gdb_stat {
+  uint32_t    gdb_st_dev;     /* device */
+  uint32_t    gdb_st_ino;     /* inode */
+  gdb_mode_t  gdb_st_mode;    /* protection */
+  uint32_t    gdb_st_nlink;   /* number of hard links */
+  uint32_t    gdb_st_uid;     /* user ID of owner */
+  uint32_t    gdb_st_gid;     /* group ID of owner */
+  uint32_t    gdb_st_rdev;    /* device type (if inode device) */
+  uint64_t    gdb_st_size;    /* total size, in bytes */
+  uint64_t    gdb_st_blksize; /* blocksize for filesystem I/O */
+  uint64_t    gdb_st_blocks;  /* number of blocks allocated */
+  gdb_time_t  gdb_st_atime;   /* time of last access */
+  gdb_time_t  gdb_st_mtime;   /* time of last modification */
+  gdb_time_t  gdb_st_ctime;   /* time of last change */
+} QEMU_PACKED;
+
+struct gdb_timeval {
+  gdb_time_t tv_sec;  /* second */
+  uint64_t tv_usec;   /* microsecond */
+} QEMU_PACKED;
+
+typedef void (*gdb_syscall_complete_cb)(CPUState *cpu, uint64_t ret, int err);
+
+/**
+ * gdb_do_syscall:
+ * @cb: function to call when the system call has completed
+ * @fmt: gdb syscall format string
+ * ...: list of arguments to interpolate into @fmt
+ *
+ * Send a GDB syscall request. This function will return immediately;
+ * the callback function will be called later when the remote system
+ * call has completed.
+ *
+ * @fmt should be in the 'call-id,parameter,parameter...' format documented
+ * for the F request packet in the GDB remote protocol. A limited set of
+ * printf-style format specifiers is supported:
+ *   %x  - target_ulong argument printed in hex
+ *   %lx - 64-bit argument printed in hex
+ *   %s  - string pointer (target_ulong) and length (int) pair
+ */
+void gdb_do_syscall(gdb_syscall_complete_cb cb, const char *fmt, ...);
+
+/**
+ * use_gdb_syscalls() - report if GDB should be used for syscalls
+ *
+ * This is mostly driven by the semihosting mode the user configures
+ * but assuming GDB is allowed by that we report true if GDB is
+ * connected to the stub.
+ */
+int use_gdb_syscalls(void);
+
+/**
+ * gdb_exit: exit gdb session, reporting inferior status
+ * @code: exit code reported
+ *
+ * This closes the session and sends a final packet to GDB reporting
+ * the exit status of the program. It also cleans up any connections
+ * detritus before returning.
+ */
+void gdb_exit(int code);
+
+/**
+ * gdb_qemu_exit: ask qemu to exit
+ * @code: exit code reported
+ *
+ * This requests qemu to exit. This function is allowed to return as
+ * the exit request might be processed asynchronously by qemu backend.
+ */
+void gdb_qemu_exit(int code);
+
+#endif /* _SYSCALLS_H_ */
diff --git a/include/gdbstub/user.h b/include/gdbstub/user.h
new file mode 100644 (file)
index 0000000..d392e51
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * gdbstub user-mode only APIs
+ *
+ * Copyright (c) 2022 Linaro Ltd
+ *
+ * SPDX-License-Identifier: LGPL-2.0+
+ */
+
+#ifndef GDBSTUB_USER_H
+#define GDBSTUB_USER_H
+
+/**
+ * gdb_handlesig() - yield control to gdb
+ * @cpu: CPU
+ * @sig: if non-zero, the signal number which caused us to stop
+ *
+ * This function yields control to gdb, when a user-mode-only target
+ * needs to stop execution. If @sig is non-zero, then we will send a
+ * stop packet to tell gdb that we have stopped because of this signal.
+ *
+ * This function will block (handling protocol requests from gdb)
+ * until gdb tells us to continue target execution. When it does
+ * return, the return value is a signal to deliver to the target,
+ * or 0 if no signal should be delivered, ie the signal that caused
+ * us to stop should be ignored.
+ */
+int gdb_handlesig(CPUState *, int);
+
+/**
+ * gdb_signalled() - inform remote gdb of sig exit
+ * @as: current CPUArchState
+ * @sig: signal number
+ */
+void gdb_signalled(CPUArchState *as, int sig);
+
+/**
+ * gdbserver_fork() - disable gdb stub for child processes.
+ * @cs: CPU
+ */
+void gdbserver_fork(CPUState *cs);
+
+
+#endif /* GDBSTUB_USER_H */
index 695a96f7ea66fd7910f3f83a935e2bd93642c4f6..43a562974d80e525d4146aaaaea9830b6bc5178f 100644 (file)
 /* Ask for warnings for anything that was marked deprecated in
  * the defined version, or before. It is a candidate for rewrite.
  */
-#define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_48
+#define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_56
 
 /* Ask for warnings if code tries to use function that did not
  * exist in the defined version. These risk breaking builds
  */
-#define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_48
+#define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_56
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
@@ -46,9 +46,9 @@
  *    int g_foo(const char *wibble)
  *
  * We must define a static inline function with the same signature that does
- * what we need, but with a "_qemu" suffix e.g.
+ * what we need, but with a "_compat" suffix e.g.
  *
- * static inline void g_foo_qemu(const char *wibble)
+ * static inline void g_foo_compat(const char *wibble)
  * {
  *     #if GLIB_CHECK_VERSION(X, Y, 0)
  *        g_foo(wibble)
  * ensuring this wrapper function impl doesn't trigger the compiler warning
  * about using too new glib APIs. Finally we can do
  *
- *   #define g_foo(a) g_foo_qemu(a)
+ *   #define g_foo(a) g_foo_compat(a)
  *
  * So now the code elsewhere in QEMU, which *does* have the
  * -Wdeprecated-declarations warning active, can call g_foo(...) as normal,
  * without generating warnings.
  */
 
-#if defined(_WIN32) && !GLIB_CHECK_VERSION(2, 50, 0)
 /*
- * g_poll has a problem on Windows when using
- * timeouts < 10ms, so use wrapper.
+ * g_memdup2_qemu:
+ * @mem: (nullable): the memory to copy.
+ * @byte_size: the number of bytes to copy.
+ *
+ * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
+ * from @mem. If @mem is %NULL it returns %NULL.
+ *
+ * This replaces g_memdup(), which was prone to integer overflows when
+ * converting the argument from a #gsize to a #guint.
+ *
+ * This static inline version is a backport of the new public API from
+ * GLib 2.68, kept internal to GLib for backport to older stable releases.
+ * See https://gitlab.gnome.org/GNOME/glib/-/issues/2319.
+ *
+ * Returns: (nullable): a pointer to the newly-allocated copy of the memory,
+ *          or %NULL if @mem is %NULL.
  */
-#define g_poll(fds, nfds, timeout) g_poll_fixed(fds, nfds, timeout)
-gint g_poll_fixed(GPollFD *fds, guint nfds, gint timeout);
+static inline gpointer g_memdup2_qemu(gconstpointer mem, gsize byte_size)
+{
+#if GLIB_CHECK_VERSION(2, 68, 0)
+    return g_memdup2(mem, byte_size);
+#else
+    gpointer new_mem;
+
+    if (mem && byte_size != 0) {
+        new_mem = g_malloc(byte_size);
+        memcpy(new_mem, mem, byte_size);
+    } else {
+        new_mem = NULL;
+    }
+
+    return new_mem;
 #endif
+}
+#define g_memdup2(m, s) g_memdup2_qemu(m, s)
 
 #if defined(G_OS_UNIX)
 /*
@@ -100,6 +128,27 @@ g_unix_get_passwd_entry_qemu(const gchar *user_name, GError **error)
 }
 #endif /* G_OS_UNIX */
 
+static inline bool
+qemu_g_test_slow(void)
+{
+    static int cached = -1;
+    if (cached == -1) {
+        cached = g_test_slow() || getenv("G_TEST_SLOW") != NULL;
+    }
+    return cached;
+}
+
+#undef g_test_slow
+#undef g_test_thorough
+#undef g_test_quick
+#define g_test_slow() qemu_g_test_slow()
+#define g_test_thorough() qemu_g_test_slow()
+#define g_test_quick() (!qemu_g_test_slow())
+
 #pragma GCC diagnostic pop
 
+#ifndef G_NORETURN
+#define G_NORETURN G_GNUC_NORETURN
+#endif
+
 #endif
index 38a42f409a24c53c0f5f47cde5cd1749434974c9..2b42e4192bf62e01d34d8c148b86eb8b2322187f 100644 (file)
@@ -41,38 +41,13 @@ enum {
 };
 
 typedef struct AcpiRsdpData {
-    uint8_t oem_id[6] QEMU_NONSTRING; /* OEM identification */
+    char *oem_id;                     /* OEM identification */
     uint8_t revision;                 /* Must be 0 for 1.0, 2 for 2.0 */
 
     unsigned *rsdt_tbl_offset;
     unsigned *xsdt_tbl_offset;
 } AcpiRsdpData;
 
-/* Table structure from Linux kernel (the ACPI tables are under the
-   BSD license) */
-
-
-#define ACPI_TABLE_HEADER_DEF   /* ACPI common table header */ \
-    uint32_t signature;          /* ACPI signature (4 ASCII characters) */ \
-    uint32_t length;                 /* Length of table, in bytes, including header */ \
-    uint8_t  revision;               /* ACPI Specification minor version # */ \
-    uint8_t  checksum;               /* To make sum of entire table == 0 */ \
-    uint8_t  oem_id[6] \
-                 QEMU_NONSTRING;     /* OEM identification */ \
-    uint8_t  oem_table_id[8] \
-                 QEMU_NONSTRING;     /* OEM table identification */ \
-    uint32_t oem_revision;           /* OEM revision number */ \
-    uint8_t  asl_compiler_id[4] \
-                 QEMU_NONSTRING;     /* ASL compiler vendor ID */ \
-    uint32_t asl_compiler_revision;  /* ASL compiler revision number */
-
-
-/* ACPI common table header */
-struct AcpiTableHeader {
-    ACPI_TABLE_HEADER_DEF
-} QEMU_PACKED;
-typedef struct AcpiTableHeader AcpiTableHeader;
-
 struct AcpiGenericAddress {
     uint8_t space_id;        /* Address space where struct or register exists */
     uint8_t bit_width;       /* Size in bits of given register */
@@ -80,7 +55,7 @@ struct AcpiGenericAddress {
     uint8_t access_width;    /* ACPI 3.0: Minimum Access size (ACPI 3.0),
                                 ACPI 2.0: Reserved, Table 5-1 */
     uint64_t address;        /* 64-bit address of struct or register */
-} QEMU_PACKED;
+};
 
 typedef struct AcpiFadtData {
     struct AcpiGenericAddress pm1a_cnt;   /* PM1a_CNT_BLK */
@@ -102,6 +77,7 @@ typedef struct AcpiFadtData {
     uint16_t plvl2_lat;        /* P_LVL2_LAT */
     uint16_t plvl3_lat;        /* P_LVL3_LAT */
     uint16_t arm_boot_arch;    /* ARM_BOOT_ARCH */
+    uint16_t iapc_boot_arch;   /* IAPC_BOOT_ARCH */
     uint8_t minor_ver;         /* FADT Minor Version */
 
     /*
@@ -117,505 +93,4 @@ typedef struct AcpiFadtData {
 #define ACPI_FADT_ARM_PSCI_COMPLIANT  (1 << 0)
 #define ACPI_FADT_ARM_PSCI_USE_HVC    (1 << 1)
 
-/*
- * Serial Port Console Redirection Table (SPCR), Rev. 1.02
- *
- * For .interface_type see Debug Port Table 2 (DBG2) serial port
- * subtypes in Table 3, Rev. May 22, 2012
- */
-struct AcpiSerialPortConsoleRedirection {
-    ACPI_TABLE_HEADER_DEF
-    uint8_t  interface_type;
-    uint8_t  reserved1[3];
-    struct AcpiGenericAddress base_address;
-    uint8_t  interrupt_types;
-    uint8_t  irq;
-    uint32_t gsi;
-    uint8_t  baud;
-    uint8_t  parity;
-    uint8_t  stopbits;
-    uint8_t  flowctrl;
-    uint8_t  term_type;
-    uint8_t  reserved2;
-    uint16_t pci_device_id;
-    uint16_t pci_vendor_id;
-    uint8_t  pci_bus;
-    uint8_t  pci_slot;
-    uint8_t  pci_func;
-    uint32_t pci_flags;
-    uint8_t  pci_seg;
-    uint32_t reserved3;
-} QEMU_PACKED;
-typedef struct AcpiSerialPortConsoleRedirection
-               AcpiSerialPortConsoleRedirection;
-
-/*
- * ACPI 1.0 Root System Description Table (RSDT)
- */
-struct AcpiRsdtDescriptorRev1 {
-    ACPI_TABLE_HEADER_DEF       /* ACPI common table header */
-    uint32_t table_offset_entry[];  /* Array of pointers to other */
-    /* ACPI tables */
-} QEMU_PACKED;
-typedef struct AcpiRsdtDescriptorRev1 AcpiRsdtDescriptorRev1;
-
-/*
- * ACPI 2.0 eXtended System Description Table (XSDT)
- */
-struct AcpiXsdtDescriptorRev2 {
-    ACPI_TABLE_HEADER_DEF       /* ACPI common table header */
-    uint64_t table_offset_entry[];  /* Array of pointers to other */
-    /* ACPI tables */
-} QEMU_PACKED;
-typedef struct AcpiXsdtDescriptorRev2 AcpiXsdtDescriptorRev2;
-
-/*
- * ACPI 1.0 Firmware ACPI Control Structure (FACS)
- */
-struct AcpiFacsDescriptorRev1 {
-    uint32_t signature;           /* ACPI Signature */
-    uint32_t length;                 /* Length of structure, in bytes */
-    uint32_t hardware_signature;     /* Hardware configuration signature */
-    uint32_t firmware_waking_vector; /* ACPI OS waking vector */
-    uint32_t global_lock;            /* Global Lock */
-    uint32_t flags;
-    uint8_t  resverved3 [40];        /* Reserved - must be zero */
-} QEMU_PACKED;
-typedef struct AcpiFacsDescriptorRev1 AcpiFacsDescriptorRev1;
-
-/*
- * Differentiated System Description Table (DSDT)
- */
-
-/*
- * MADT values and structures
- */
-
-/* Values for MADT PCATCompat */
-
-#define ACPI_DUAL_PIC                0
-#define ACPI_MULTIPLE_APIC           1
-
-/* Master MADT */
-
-struct AcpiMultipleApicTable {
-    ACPI_TABLE_HEADER_DEF     /* ACPI common table header */
-    uint32_t local_apic_address;     /* Physical address of local APIC */
-    uint32_t flags;
-} QEMU_PACKED;
-typedef struct AcpiMultipleApicTable AcpiMultipleApicTable;
-
-/* Values for Type in APIC sub-headers */
-
-#define ACPI_APIC_PROCESSOR          0
-#define ACPI_APIC_IO                 1
-#define ACPI_APIC_XRUPT_OVERRIDE     2
-#define ACPI_APIC_NMI                3
-#define ACPI_APIC_LOCAL_NMI          4
-#define ACPI_APIC_ADDRESS_OVERRIDE   5
-#define ACPI_APIC_IO_SAPIC           6
-#define ACPI_APIC_LOCAL_SAPIC        7
-#define ACPI_APIC_XRUPT_SOURCE       8
-#define ACPI_APIC_LOCAL_X2APIC       9
-#define ACPI_APIC_LOCAL_X2APIC_NMI      10
-#define ACPI_APIC_GENERIC_CPU_INTERFACE 11
-#define ACPI_APIC_GENERIC_DISTRIBUTOR   12
-#define ACPI_APIC_GENERIC_MSI_FRAME     13
-#define ACPI_APIC_GENERIC_REDISTRIBUTOR 14
-#define ACPI_APIC_GENERIC_TRANSLATOR    15
-#define ACPI_APIC_RESERVED              16   /* 16 and greater are reserved */
-
-/*
- * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
- */
-#define ACPI_SUB_HEADER_DEF   /* Common ACPI sub-structure header */\
-    uint8_t  type;                               \
-    uint8_t  length;
-
-/* Sub-structures for MADT */
-
-struct AcpiMadtProcessorApic {
-    ACPI_SUB_HEADER_DEF
-    uint8_t  processor_id;           /* ACPI processor id */
-    uint8_t  local_apic_id;          /* Processor's local APIC id */
-    uint32_t flags;
-} QEMU_PACKED;
-typedef struct AcpiMadtProcessorApic AcpiMadtProcessorApic;
-
-struct AcpiMadtIoApic {
-    ACPI_SUB_HEADER_DEF
-    uint8_t  io_apic_id;             /* I/O APIC ID */
-    uint8_t  reserved;               /* Reserved - must be zero */
-    uint32_t address;                /* APIC physical address */
-    uint32_t interrupt;              /* Global system interrupt where INTI
-                                 * lines start */
-} QEMU_PACKED;
-typedef struct AcpiMadtIoApic AcpiMadtIoApic;
-
-struct AcpiMadtIntsrcovr {
-    ACPI_SUB_HEADER_DEF
-    uint8_t  bus;
-    uint8_t  source;
-    uint32_t gsi;
-    uint16_t flags;
-} QEMU_PACKED;
-typedef struct AcpiMadtIntsrcovr AcpiMadtIntsrcovr;
-
-struct AcpiMadtLocalNmi {
-    ACPI_SUB_HEADER_DEF
-    uint8_t  processor_id;           /* ACPI processor id */
-    uint16_t flags;                  /* MPS INTI flags */
-    uint8_t  lint;                   /* Local APIC LINT# */
-} QEMU_PACKED;
-typedef struct AcpiMadtLocalNmi AcpiMadtLocalNmi;
-
-struct AcpiMadtProcessorX2Apic {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint32_t x2apic_id;              /* Processor's local x2APIC ID */
-    uint32_t flags;
-    uint32_t uid;                    /* Processor object _UID */
-} QEMU_PACKED;
-typedef struct AcpiMadtProcessorX2Apic AcpiMadtProcessorX2Apic;
-
-struct AcpiMadtLocalX2ApicNmi {
-    ACPI_SUB_HEADER_DEF
-    uint16_t flags;                  /* MPS INTI flags */
-    uint32_t uid;                    /* Processor object _UID */
-    uint8_t  lint;                   /* Local APIC LINT# */
-    uint8_t  reserved[3];            /* Local APIC LINT# */
-} QEMU_PACKED;
-typedef struct AcpiMadtLocalX2ApicNmi AcpiMadtLocalX2ApicNmi;
-
-struct AcpiMadtGenericCpuInterface {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint32_t cpu_interface_number;
-    uint32_t uid;
-    uint32_t flags;
-    uint32_t parking_version;
-    uint32_t performance_interrupt;
-    uint64_t parked_address;
-    uint64_t base_address;
-    uint64_t gicv_base_address;
-    uint64_t gich_base_address;
-    uint32_t vgic_interrupt;
-    uint64_t gicr_base_address;
-    uint64_t arm_mpidr;
-} QEMU_PACKED;
-
-typedef struct AcpiMadtGenericCpuInterface AcpiMadtGenericCpuInterface;
-
-/* GICC CPU Interface Flags */
-#define ACPI_MADT_GICC_ENABLED 1
-
-struct AcpiMadtGenericDistributor {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint32_t gic_id;
-    uint64_t base_address;
-    uint32_t global_irq_base;
-    /* ACPI 5.1 Errata 1228 Present GIC version in MADT table */
-    uint8_t version;
-    uint8_t reserved2[3];
-} QEMU_PACKED;
-
-typedef struct AcpiMadtGenericDistributor AcpiMadtGenericDistributor;
-
-struct AcpiMadtGenericMsiFrame {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint32_t gic_msi_frame_id;
-    uint64_t base_address;
-    uint32_t flags;
-    uint16_t spi_count;
-    uint16_t spi_base;
-} QEMU_PACKED;
-
-typedef struct AcpiMadtGenericMsiFrame AcpiMadtGenericMsiFrame;
-
-struct AcpiMadtGenericRedistributor {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint64_t base_address;
-    uint32_t range_length;
-} QEMU_PACKED;
-
-typedef struct AcpiMadtGenericRedistributor AcpiMadtGenericRedistributor;
-
-struct AcpiMadtGenericTranslator {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint32_t translation_id;
-    uint64_t base_address;
-    uint32_t reserved2;
-} QEMU_PACKED;
-
-typedef struct AcpiMadtGenericTranslator AcpiMadtGenericTranslator;
-
-/*
- * Generic Timer Description Table (GTDT)
- */
-#define ACPI_GTDT_INTERRUPT_MODE_LEVEL    (0 << 0)
-#define ACPI_GTDT_INTERRUPT_MODE_EDGE     (1 << 0)
-#define ACPI_GTDT_CAP_ALWAYS_ON           (1 << 2)
-
-struct AcpiGenericTimerTable {
-    ACPI_TABLE_HEADER_DEF
-    uint64_t counter_block_addresss;
-    uint32_t reserved;
-    uint32_t secure_el1_interrupt;
-    uint32_t secure_el1_flags;
-    uint32_t non_secure_el1_interrupt;
-    uint32_t non_secure_el1_flags;
-    uint32_t virtual_timer_interrupt;
-    uint32_t virtual_timer_flags;
-    uint32_t non_secure_el2_interrupt;
-    uint32_t non_secure_el2_flags;
-    uint64_t counter_read_block_address;
-    uint32_t platform_timer_count;
-    uint32_t platform_timer_offset;
-} QEMU_PACKED;
-typedef struct AcpiGenericTimerTable AcpiGenericTimerTable;
-
-/*
- * HPET Description Table
- */
-struct Acpi20Hpet {
-    ACPI_TABLE_HEADER_DEF                    /* ACPI common table header */
-    uint32_t           timer_block_id;
-    struct AcpiGenericAddress addr;
-    uint8_t            hpet_number;
-    uint16_t           min_tick;
-    uint8_t            page_protect;
-} QEMU_PACKED;
-typedef struct Acpi20Hpet Acpi20Hpet;
-
-/*
- * SRAT (NUMA topology description) table
- */
-
-struct AcpiSystemResourceAffinityTable {
-    ACPI_TABLE_HEADER_DEF
-    uint32_t    reserved1;
-    uint32_t    reserved2[2];
-} QEMU_PACKED;
-typedef struct AcpiSystemResourceAffinityTable AcpiSystemResourceAffinityTable;
-
-#define ACPI_SRAT_PROCESSOR_APIC     0
-#define ACPI_SRAT_MEMORY             1
-#define ACPI_SRAT_PROCESSOR_x2APIC   2
-#define ACPI_SRAT_PROCESSOR_GICC     3
-
-struct AcpiSratProcessorAffinity {
-    ACPI_SUB_HEADER_DEF
-    uint8_t     proximity_lo;
-    uint8_t     local_apic_id;
-    uint32_t    flags;
-    uint8_t     local_sapic_eid;
-    uint8_t     proximity_hi[3];
-    uint32_t    reserved;
-} QEMU_PACKED;
-typedef struct AcpiSratProcessorAffinity AcpiSratProcessorAffinity;
-
-struct AcpiSratProcessorX2ApicAffinity {
-    ACPI_SUB_HEADER_DEF
-    uint16_t    reserved;
-    uint32_t    proximity_domain;
-    uint32_t    x2apic_id;
-    uint32_t    flags;
-    uint32_t    clk_domain;
-    uint32_t    reserved2;
-} QEMU_PACKED;
-typedef struct AcpiSratProcessorX2ApicAffinity AcpiSratProcessorX2ApicAffinity;
-
-struct AcpiSratMemoryAffinity {
-    ACPI_SUB_HEADER_DEF
-    uint32_t    proximity;
-    uint16_t    reserved1;
-    uint64_t    base_addr;
-    uint64_t    range_length;
-    uint32_t    reserved2;
-    uint32_t    flags;
-    uint32_t    reserved3[2];
-} QEMU_PACKED;
-typedef struct AcpiSratMemoryAffinity AcpiSratMemoryAffinity;
-
-struct AcpiSratProcessorGiccAffinity {
-    ACPI_SUB_HEADER_DEF
-    uint32_t    proximity;
-    uint32_t    acpi_processor_uid;
-    uint32_t    flags;
-    uint32_t    clock_domain;
-} QEMU_PACKED;
-
-typedef struct AcpiSratProcessorGiccAffinity AcpiSratProcessorGiccAffinity;
-
-/*
- * TCPA Description Table
- *
- * Following Level 00, Rev 00.37 of specs:
- * http://www.trustedcomputinggroup.org/resources/tcg_acpi_specification
- */
-struct Acpi20Tcpa {
-    ACPI_TABLE_HEADER_DEF                    /* ACPI common table header */
-    uint16_t platform_class;
-    uint32_t log_area_minimum_length;
-    uint64_t log_area_start_address;
-} QEMU_PACKED;
-typedef struct Acpi20Tcpa Acpi20Tcpa;
-
-/* DMAR - DMA Remapping table r2.2 */
-struct AcpiTableDmar {
-    ACPI_TABLE_HEADER_DEF
-    uint8_t host_address_width; /* Maximum DMA physical addressability */
-    uint8_t flags;
-    uint8_t reserved[10];
-} QEMU_PACKED;
-typedef struct AcpiTableDmar AcpiTableDmar;
-
-/* Masks for Flags field above */
-#define ACPI_DMAR_INTR_REMAP        1
-#define ACPI_DMAR_X2APIC_OPT_OUT    (1 << 1)
-
-/* Values for sub-structure type for DMAR */
-enum {
-    ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,       /* DRHD */
-    ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,     /* RMRR */
-    ACPI_DMAR_TYPE_ATSR = 2,                /* ATSR */
-    ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3,   /* RHSR */
-    ACPI_DMAR_TYPE_ANDD = 4,                /* ANDD */
-    ACPI_DMAR_TYPE_RESERVED = 5             /* Reserved for furture use */
-};
-
-/*
- * Sub-structures for DMAR
- */
-
-/* Device scope structure for DRHD. */
-struct AcpiDmarDeviceScope {
-    uint8_t entry_type;
-    uint8_t length;
-    uint16_t reserved;
-    uint8_t enumeration_id;
-    uint8_t bus;
-    struct {
-        uint8_t device;
-        uint8_t function;
-    } path[];
-} QEMU_PACKED;
-typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope;
-
-/* Type 0: Hardware Unit Definition */
-struct AcpiDmarHardwareUnit {
-    uint16_t type;
-    uint16_t length;
-    uint8_t flags;
-    uint8_t reserved;
-    uint16_t pci_segment;   /* The PCI Segment associated with this unit */
-    uint64_t address;   /* Base address of remapping hardware register-set */
-    AcpiDmarDeviceScope scope[];
-} QEMU_PACKED;
-typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit;
-
-/* Type 2: Root Port ATS Capability Reporting Structure */
-struct AcpiDmarRootPortATS {
-    uint16_t type;
-    uint16_t length;
-    uint8_t flags;
-    uint8_t reserved;
-    uint16_t pci_segment;
-    AcpiDmarDeviceScope scope[];
-} QEMU_PACKED;
-typedef struct AcpiDmarRootPortATS AcpiDmarRootPortATS;
-
-/* Masks for Flags field above */
-#define ACPI_DMAR_INCLUDE_PCI_ALL   1
-#define ACPI_DMAR_ATSR_ALL_PORTS    1
-
-/*
- * Input Output Remapping Table (IORT)
- * Conforms to "IO Remapping Table System Software on ARM Platforms",
- * Document number: ARM DEN 0049B, October 2015
- */
-
-struct AcpiIortTable {
-    ACPI_TABLE_HEADER_DEF     /* ACPI common table header */
-    uint32_t node_count;
-    uint32_t node_offset;
-    uint32_t reserved;
-} QEMU_PACKED;
-typedef struct AcpiIortTable AcpiIortTable;
-
-/*
- * IORT node types
- */
-
-#define ACPI_IORT_NODE_HEADER_DEF   /* Node format common fields */ \
-    uint8_t  type;          \
-    uint16_t length;        \
-    uint8_t  revision;      \
-    uint32_t reserved;      \
-    uint32_t mapping_count; \
-    uint32_t mapping_offset;
-
-/* Values for node Type above */
-enum {
-        ACPI_IORT_NODE_ITS_GROUP = 0x00,
-        ACPI_IORT_NODE_NAMED_COMPONENT = 0x01,
-        ACPI_IORT_NODE_PCI_ROOT_COMPLEX = 0x02,
-        ACPI_IORT_NODE_SMMU = 0x03,
-        ACPI_IORT_NODE_SMMU_V3 = 0x04
-};
-
-struct AcpiIortIdMapping {
-    uint32_t input_base;
-    uint32_t id_count;
-    uint32_t output_base;
-    uint32_t output_reference;
-    uint32_t flags;
-} QEMU_PACKED;
-typedef struct AcpiIortIdMapping AcpiIortIdMapping;
-
-struct AcpiIortMemoryAccess {
-    uint32_t cache_coherency;
-    uint8_t  hints;
-    uint16_t reserved;
-    uint8_t  memory_flags;
-} QEMU_PACKED;
-typedef struct AcpiIortMemoryAccess AcpiIortMemoryAccess;
-
-struct AcpiIortItsGroup {
-    ACPI_IORT_NODE_HEADER_DEF
-    uint32_t its_count;
-    uint32_t identifiers[];
-} QEMU_PACKED;
-typedef struct AcpiIortItsGroup AcpiIortItsGroup;
-
-#define ACPI_IORT_SMMU_V3_COHACC_OVERRIDE 1
-
-struct AcpiIortSmmu3 {
-    ACPI_IORT_NODE_HEADER_DEF
-    uint64_t base_address;
-    uint32_t flags;
-    uint32_t reserved2;
-    uint64_t vatos_address;
-    uint32_t model;
-    uint32_t event_gsiv;
-    uint32_t pri_gsiv;
-    uint32_t gerr_gsiv;
-    uint32_t sync_gsiv;
-    AcpiIortIdMapping id_mapping_array[];
-} QEMU_PACKED;
-typedef struct AcpiIortSmmu3 AcpiIortSmmu3;
-
-struct AcpiIortRC {
-    ACPI_IORT_NODE_HEADER_DEF
-    AcpiIortMemoryAccess memory_properties;
-    uint32_t ats_attribute;
-    uint32_t pci_segment_number;
-    AcpiIortIdMapping id_mapping_array[];
-} QEMU_PACKED;
-typedef struct AcpiIortRC AcpiIortRC;
-
 #endif
index 22b0b65bb2bdea46098172c412bb58adf214a370..e0e51e85b418fce46c100d0e5293b0ba07313b8b 100644 (file)
@@ -47,6 +47,8 @@
 #define ACPI_PM_PROP_PM_IO_BASE "pm_io_base"
 #define ACPI_PM_PROP_GPE0_BLK "gpe0_blk"
 #define ACPI_PM_PROP_GPE0_BLK_LEN "gpe0_blk_len"
+#define ACPI_PM_PROP_ACPI_PCIHP_BRIDGE "acpi-pci-hotplug-with-bridge-support"
+#define ACPI_PM_PROP_ACPI_PCI_ROOTHP "acpi-root-pci-hotplug"
 
 /* PM Timer ticks per second (HZ) */
 #define PM_TIMER_FREQUENCY  3579545
@@ -64,7 +66,7 @@
 #define ACPI_BITMASK_POWER_BUTTON_STATUS        0x0100
 #define ACPI_BITMASK_SLEEP_BUTTON_STATUS        0x0200
 #define ACPI_BITMASK_RT_CLOCK_STATUS            0x0400
-#define ACPI_BITMASK_PCIEXP_WAKE_STATUS         0x4000 /* ACPI 3.0 */
+#define ACPI_BITMASK_PCIEXP_WAKE_STATUS         0x4000  /* ACPI 3.0 */
 #define ACPI_BITMASK_WAKE_STATUS                0x8000
 
 #define ACPI_BITMASK_ALL_FIXED_STATUS           (\
@@ -82,7 +84,7 @@
 #define ACPI_BITMASK_POWER_BUTTON_ENABLE        0x0100
 #define ACPI_BITMASK_SLEEP_BUTTON_ENABLE        0x0200
 #define ACPI_BITMASK_RT_CLOCK_ENABLE            0x0400
-#define ACPI_BITMASK_PCIEXP_WAKE_DISABLE        0x4000 /* ACPI 3.0 */
+#define ACPI_BITMASK_PCIEXP_WAKE_DISABLE        0x4000  /* ACPI 3.0 */
 
 #define ACPI_BITMASK_PM1_COMMON_ENABLED         ( \
         ACPI_BITMASK_RT_CLOCK_ENABLE        | \
@@ -128,6 +130,7 @@ struct ACPIPM1CNT {
     MemoryRegion io;
     uint16_t cnt;
     uint8_t s4_val;
+    bool acpi_only;
 };
 
 struct ACPIGPE {
@@ -163,7 +166,8 @@ void acpi_pm1_evt_init(ACPIREGS *ar, acpi_update_sci_fn update_sci,
 
 /* PM1a_CNT: piix and ich9 don't implement PM1b CNT. */
 void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent,
-                       bool disable_s3, bool disable_s4, uint8_t s4_val);
+                       bool disable_s3, bool disable_s4, uint8_t s4_val,
+                       bool acpi_only);
 void acpi_pm1_cnt_update(ACPIREGS *ar,
                          bool sci_enable, bool sci_disable);
 void acpi_pm1_cnt_reset(ACPIREGS *ar);
diff --git a/include/hw/acpi/acpi_aml_interface.h b/include/hw/acpi/acpi_aml_interface.h
new file mode 100644 (file)
index 0000000..11748a8
--- /dev/null
@@ -0,0 +1,52 @@
+#ifndef ACPI_AML_INTERFACE_H
+#define ACPI_AML_INTERFACE_H
+
+#include "qom/object.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_ACPI_DEV_AML_IF "acpi-dev-aml-interface"
+typedef struct AcpiDevAmlIfClass AcpiDevAmlIfClass;
+DECLARE_CLASS_CHECKERS(AcpiDevAmlIfClass, ACPI_DEV_AML_IF, TYPE_ACPI_DEV_AML_IF)
+#define ACPI_DEV_AML_IF(obj) \
+     INTERFACE_CHECK(AcpiDevAmlIf, (obj), TYPE_ACPI_DEV_AML_IF)
+
+typedef struct AcpiDevAmlIf AcpiDevAmlIf;
+typedef void (*dev_aml_fn)(AcpiDevAmlIf *adev, Aml *scope);
+
+/**
+ * AcpiDevAmlIfClass:
+ *
+ * build_dev_aml: adds device specific AML blob to provided scope
+ *
+ * Interface is designed for providing generic callback that builds device
+ * specific AML blob.
+ */
+struct AcpiDevAmlIfClass {
+    /* <private> */
+    InterfaceClass parent_class;
+
+    /* <public> */
+    dev_aml_fn build_dev_aml;
+};
+
+static inline dev_aml_fn get_dev_aml_func(DeviceState *dev)
+{
+    if (object_dynamic_cast(OBJECT(dev), TYPE_ACPI_DEV_AML_IF)) {
+        AcpiDevAmlIfClass *klass = ACPI_DEV_AML_IF_GET_CLASS(dev);
+        return klass->build_dev_aml;
+    }
+    return NULL;
+}
+
+static inline void call_dev_aml_func(DeviceState *dev, Aml *scope)
+{
+    dev_aml_fn fn = get_dev_aml_func(dev);
+    if (fn) {
+        fn(ACPI_DEV_AML_IF(dev), scope);
+    }
+}
+
+void qbus_build_aml(BusState *bus, Aml *scope);
+
+#endif
index 769ff55c7ee88e6a0566ad3f81ee542176880395..68d9d15f50aa7edcac9e23c235f7cf8503e861cd 100644 (file)
@@ -3,7 +3,6 @@
 
 #include "qapi/qapi-types-acpi.h"
 #include "qom/object.h"
-#include "hw/boards.h"
 #include "hw/qdev-core.h"
 
 /* These values are part of guest ABI, and can not be changed */
@@ -52,7 +51,5 @@ struct AcpiDeviceIfClass {
     /* <public> */
     void (*ospm_status)(AcpiDeviceIf *adev, ACPIOSTInfoList ***list);
     void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev);
-    void (*madt_cpu)(AcpiDeviceIf *adev, int uid,
-                     const CPUArchIdList *apic_ids, GArray *entry);
 };
 #endif
index fe0055fffb51709eb8e0d4631ee6b5cdb62f371b..ff2a310270a44f25821f324479a1390379b9d894 100644 (file)
@@ -4,11 +4,8 @@
 #include "hw/acpi/acpi-defs.h"
 #include "hw/acpi/bios-linker-loader.h"
 
-/* Reserve RAM space for tables: add another order of magnitude. */
-#define ACPI_BUILD_TABLE_MAX_SIZE         0x200000
-
 #define ACPI_BUILD_APPNAME6 "BOCHS "
-#define ACPI_BUILD_APPNAME4 "BXPC"
+#define ACPI_BUILD_APPNAME8 "BXPC    "
 
 #define ACPI_BUILD_TABLE_FILE "etc/acpi/tables"
 #define ACPI_BUILD_RSDP_FILE "etc/acpi/rsdp"
@@ -224,6 +221,20 @@ struct AcpiBuildTables {
     BIOSLinker *linker;
 } AcpiBuildTables;
 
+typedef
+struct CrsRangeEntry {
+    uint64_t base;
+    uint64_t limit;
+} CrsRangeEntry;
+
+typedef
+struct CrsRangeSet {
+    GPtrArray *io_ranges;
+    GPtrArray *mem_ranges;
+    GPtrArray *mem_64bit_ranges;
+} CrsRangeSet;
+
+
 /*
  * ACPI 5.0: 6.4.3.8.2 Serial Bus Connection Descriptors
  * Serial Bus Type
@@ -266,7 +277,7 @@ void free_aml_allocator(void);
  * @child: element that is copied into @parent_ctx context
  *
  * Joins Aml elements together and helps to construct AML tables
- * Examle of usage:
+ * Example of usage:
  *   Aml *table = aml_def_block("SSDT", ...);
  *   Aml *sb = aml_scope("\\_SB");
  *   Aml *dev = aml_device("PCI0");
@@ -278,7 +289,7 @@ void free_aml_allocator(void);
 void aml_append(Aml *parent_ctx, Aml *child);
 
 /* non block AML object primitives */
-Aml *aml_name(const char *name_format, ...) GCC_FMT_ATTR(1, 2);
+Aml *aml_name(const char *name_format, ...) G_GNUC_PRINTF(1, 2);
 Aml *aml_name_decl(const char *name, Aml *val);
 Aml *aml_debug(void);
 Aml *aml_return(Aml *val);
@@ -287,6 +298,7 @@ Aml *aml_arg(int pos);
 Aml *aml_to_integer(Aml *arg);
 Aml *aml_to_hexstring(Aml *src, Aml *dst);
 Aml *aml_to_buffer(Aml *src, Aml *dst);
+Aml *aml_to_decimalstring(Aml *src, Aml *dst);
 Aml *aml_store(Aml *val, Aml *target);
 Aml *aml_and(Aml *arg1, Aml *arg2, Aml *dst);
 Aml *aml_or(Aml *arg1, Aml *arg2, Aml *dst);
@@ -309,6 +321,8 @@ Aml *aml_call3(const char *method, Aml *arg1, Aml *arg2, Aml *arg3);
 Aml *aml_call4(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4);
 Aml *aml_call5(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4,
                Aml *arg5);
+Aml *aml_call6(const char *method, Aml *arg1, Aml *arg2, Aml *arg3, Aml *arg4,
+               Aml *arg5, Aml *arg6);
 Aml *aml_gpio_int(AmlConsumerAndProducer con_and_pro,
                   AmlLevelAndEdge edge_level,
                   AmlActiveHighAndLow active_level, AmlShared shared,
@@ -330,13 +344,13 @@ Aml *aml_irq_no_flags(uint8_t irq);
 Aml *aml_named_field(const char *name, unsigned length);
 Aml *aml_reserved_field(unsigned length);
 Aml *aml_local(int num);
-Aml *aml_string(const char *name_format, ...) GCC_FMT_ATTR(1, 2);
+Aml *aml_string(const char *name_format, ...) G_GNUC_PRINTF(1, 2);
 Aml *aml_lnot(Aml *arg);
 Aml *aml_equal(Aml *arg1, Aml *arg2);
 Aml *aml_lgreater(Aml *arg1, Aml *arg2);
 Aml *aml_lgreater_equal(Aml *arg1, Aml *arg2);
 Aml *aml_processor(uint8_t proc_id, uint32_t pblk_addr, uint8_t pblk_len,
-                   const char *name_format, ...) GCC_FMT_ATTR(4, 5);
+                   const char *name_format, ...) G_GNUC_PRINTF(4, 5);
 Aml *aml_eisaid(const char *str);
 Aml *aml_word_bus_number(AmlMinFixed min_fixed, AmlMaxFixed max_fixed,
                          AmlDecode dec, uint16_t addr_gran,
@@ -370,8 +384,8 @@ Aml *aml_sleep(uint64_t msec);
 Aml *aml_i2c_serial_bus_device(uint16_t address, const char *resource_source);
 
 /* Block AML object primitives */
-Aml *aml_scope(const char *name_format, ...) GCC_FMT_ATTR(1, 2);
-Aml *aml_device(const char *name_format, ...) GCC_FMT_ATTR(1, 2);
+Aml *aml_scope(const char *name_format, ...) G_GNUC_PRINTF(1, 2);
+Aml *aml_device(const char *name_format, ...) G_GNUC_PRINTF(1, 2);
 Aml *aml_method(const char *name, int arg_count, AmlSerializeFlag sflag);
 Aml *aml_if(Aml *predicate);
 Aml *aml_else(void);
@@ -399,10 +413,37 @@ Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target);
 Aml *aml_object_type(Aml *object);
 
 void build_append_int_noprefix(GArray *table, uint64_t value, int size);
-void
-build_header(BIOSLinker *linker, GArray *table_data,
-             AcpiTableHeader *h, const char *sig, int len, uint8_t rev,
-             const char *oem_id, const char *oem_table_id);
+
+typedef struct AcpiTable {
+    const char *sig;
+    const uint8_t rev;
+    const char *oem_id;
+    const char *oem_table_id;
+    /* private vars tracking table state */
+    GArray *array;
+    unsigned table_offset;
+} AcpiTable;
+
+/**
+ * acpi_table_begin:
+ * initializes table header and keeps track of
+ * table data/offsets
+ * @desc: ACPI table descriptor
+ * @array: blob where the ACPI table will be composed/stored.
+ */
+void acpi_table_begin(AcpiTable *desc, GArray *array);
+
+/**
+ * acpi_table_end:
+ * sets actual table length and tells bios loader
+ * where table is for the later initialization on
+ * guest side.
+ * @linker: reference to BIOSLinker object to use for the table
+ * @table: ACPI table descriptor that was used with @acpi_table_begin
+ * counterpart
+ */
+void acpi_table_end(BIOSLinker *linker, AcpiTable *table);
+
 void *acpi_data_push(GArray *table_data, unsigned size);
 unsigned acpi_data_len(GArray *table);
 void acpi_add_table(GArray *table_offsets, GArray *table_data);
@@ -419,7 +460,7 @@ build_xsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets,
 
 int
 build_append_named_dword(GArray *array, const char *name_format, ...)
-GCC_FMT_ATTR(2, 3);
+G_GNUC_PRINTF(2, 3);
 
 void build_append_gas(GArray *table, AmlAddressSpace as,
                       uint8_t bit_width, uint8_t bit_offset,
@@ -432,13 +473,28 @@ build_append_gas_from_struct(GArray *table, const struct AcpiGenericAddress *s)
                      s->access_width, s->address);
 }
 
-void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
+void crs_range_insert(GPtrArray *ranges, uint64_t base, uint64_t limit);
+void crs_replace_with_free_ranges(GPtrArray *ranges,
+                                         uint64_t start, uint64_t end);
+void crs_range_set_init(CrsRangeSet *range_set);
+void crs_range_set_free(CrsRangeSet *range_set);
+
+Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set, uint32_t io_offset,
+               uint32_t mmio32_offset, uint64_t mmio64_offset,
+               uint16_t bus_nr_offset);
+
+void build_srat_memory(GArray *table_data, uint64_t base,
                        uint64_t len, int node, MemoryAffinityFlags flags);
 
-void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms);
+void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms,
+                const char *oem_id, const char *oem_table_id);
+
+void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
+                const char *oem_id, const char *oem_table_id);
 
 void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
                 const char *oem_id, const char *oem_table_id);
 
-void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog);
+void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog,
+                const char *oem_id, const char *oem_table_id);
 #endif
index 0eeedaa491c14cca4926a388a5461917bb8bcb10..bc901660fb6fd6f585c553636c1fcb183d2c1276 100644 (file)
@@ -15,6 +15,7 @@
 #include "hw/qdev-core.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/aml-build.h"
+#include "hw/boards.h"
 #include "hw/hotplug.h"
 
 typedef struct AcpiCpuStatus {
@@ -22,6 +23,7 @@ typedef struct AcpiCpuStatus {
     uint64_t arch_id;
     bool is_inserting;
     bool is_removing;
+    bool fw_remove;
     uint32_t ost_event;
     uint32_t ost_status;
 } AcpiCpuStatus;
@@ -50,11 +52,15 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
 typedef struct CPUHotplugFeatures {
     bool acpi_1_compatible;
     bool has_legacy_cphp;
+    bool fw_unplugs_cpu;
     const char *smi_path;
 } CPUHotplugFeatures;
 
+typedef void (*build_madt_cpu_fn)(int uid, const CPUArchIdList *apic_ids,
+                                  GArray *entry, bool force_enabled);
+
 void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
-                    hwaddr io_base,
+                    build_madt_cpu_fn build_madt_cpu, hwaddr io_base,
                     const char *res_root,
                     const char *event_handler_method);
 
diff --git a/include/hw/acpi/cxl.h b/include/hw/acpi/cxl.h
new file mode 100644 (file)
index 0000000..8f22c71
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_ACPI_CXL_H
+#define HW_ACPI_CXL_H
+
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/cxl/cxl.h"
+
+void cxl_build_cedt(GArray *table_offsets, GArray *table_data,
+                    BIOSLinker *linker, const char *oem_id,
+                    const char *oem_table_id, CXLState *cxl_state);
+void build_cxl_osc_method(Aml *dev);
+void build_cxl_dsm_method(Aml *dev);
+
+#endif
diff --git a/include/hw/acpi/erst.h b/include/hw/acpi/erst.h
new file mode 100644 (file)
index 0000000..b2ff663
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * ACPI Error Record Serialization Table, ERST, Implementation
+ *
+ * ACPI ERST introduced in ACPI 4.0, June 16, 2009.
+ * ACPI Platform Error Interfaces : Error Serialization
+ *
+ * Copyright (c) 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef HW_ACPI_ERST_H
+#define HW_ACPI_ERST_H
+
+#include "hw/acpi/bios-linker-loader.h"
+#include "qom/object.h"
+
+void build_erst(GArray *table_data, BIOSLinker *linker, Object *erst_dev,
+                const char *oem_id, const char *oem_table_id);
+
+#define TYPE_ACPI_ERST "acpi-erst"
+
+/* returns NULL unless there is exactly one device */
+static inline Object *find_erst_dev(void)
+{
+    return object_resolve_path_type("", TYPE_ACPI_ERST, NULL);
+}
+#endif
index 6bed92e8fc50ecbb257123b5fce4c58768296e48..ba84ce02147726309c489256f69d7b9be9ef5a7b 100644 (file)
@@ -56,8 +56,8 @@
  *
  */
 
-#ifndef HW_ACPI_GED_H
-#define HW_ACPI_GED_H
+#ifndef HW_ACPI_GENERIC_EVENT_DEVICE_H
+#define HW_ACPI_GENERIC_EVENT_DEVICE_H
 
 #include "hw/sysbus.h"
 #include "hw/acpi/memory_hotplug.h"
 #define TYPE_ACPI_GED "acpi-ged"
 OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED)
 
-#define TYPE_ACPI_GED_X86 "acpi-ged-x86"
-#define ACPI_GED_X86(obj) \
-    OBJECT_CHECK(AcpiGedX86State, (obj), TYPE_ACPI_GED_X86)
-
 #define ACPI_GED_EVT_SEL_OFFSET    0x0
 #define ACPI_GED_EVT_SEL_LEN       0x4
 
index 4ad025e09aeb1a6dbc86e1100c9b7a6d4fbd83f4..674f6958e905fa2ea179747d722a0c72d5eb5a98 100644 (file)
@@ -64,11 +64,21 @@ enum {
 
 typedef struct AcpiGhesState {
     uint64_t ghes_addr_le;
+    bool present; /* True if GHES is present at all on this board */
 } AcpiGhesState;
 
 void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
-void acpi_build_hest(GArray *table_data, BIOSLinker *linker);
+void acpi_build_hest(GArray *table_data, BIOSLinker *linker,
+                     const char *oem_id, const char *oem_table_id);
 void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
                           GArray *hardware_errors);
 int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr);
+
+/**
+ * acpi_ghes_present: Report whether ACPI GHES table is present
+ *
+ * Returns: true if the system has an ACPI GHES table and it is
+ * safe to call acpi_ghes_record_errors() to record a memory error.
+ */
+bool acpi_ghes_present(void);
 #endif
index 54571c77e07c32d76904653c7a0389d9688b3d92..2faf7f0caeba3230cffe5401910dd41eeeaab6d6 100644 (file)
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/cpu_hotplug.h"
 #include "hw/acpi/cpu.h"
+#include "hw/acpi/pcihp.h"
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/acpi_dev_interface.h"
-#include "hw/acpi/tco.h"
+#include "hw/acpi/ich9_tco.h"
+
+#define ACPI_PCIHP_ADDR_ICH9 0x0cc0
 
 typedef struct ICH9LPCPMRegs {
     /*
@@ -53,21 +56,23 @@ typedef struct ICH9LPCPMRegs {
     AcpiCpuHotplug gpe_cpu;
     CPUHotplugState cpuhp_state;
 
+    bool keep_pci_slot_hpc;
+    bool use_acpi_hotplug_bridge;
+    AcpiPciHpState acpi_pci_hotplug;
     MemHotplugState acpi_memory_hotplug;
 
     uint8_t disable_s3;
     uint8_t disable_s4;
     uint8_t s4_val;
-    uint8_t smm_enabled;
+    bool smm_enabled;
+    bool smm_compat;
     bool enable_tco;
     TCOIORegs tco_regs;
 } ICH9LPCPMRegs;
 
 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco"
 
-void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
-                  bool smm_enabled,
-                  qemu_irq sci_irq);
+void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, qemu_irq sci_irq);
 
 void ich9_pm_iospace_update(ICH9LPCPMRegs *pm, uint32_t pm_io_base);
 extern const VMStateDescription vmstate_ich9_pm;
@@ -82,6 +87,7 @@ void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev,
                                       DeviceState *dev, Error **errp);
 void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
                               Error **errp);
+bool ich9_pm_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus);
 
 void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list);
 #endif /* HW_ACPI_ICH9_H */
diff --git a/include/hw/acpi/ich9_tco.h b/include/hw/acpi/ich9_tco.h
new file mode 100644 (file)
index 0000000..c4393ca
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * QEMU ICH9 TCO emulation (total cost of ownership)
+ *
+ * Copyright (c) 2015 Paulo Alcantara <pcacjr@zytor.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_ACPI_TCO_H
+#define HW_ACPI_TCO_H
+
+#include "exec/memory.h"
+
+/* As per ICH9 spec, the internal timer has an error of ~0.6s on every tick */
+#define TCO_TICK_NSEC 600000000LL
+
+/* TCO I/O register offsets */
+enum {
+    TCO_RLD           = 0x00,
+    TCO_DAT_IN        = 0x02,
+    TCO_DAT_OUT       = 0x03,
+    TCO1_STS          = 0x04,
+    TCO2_STS          = 0x06,
+    TCO1_CNT          = 0x08,
+    TCO2_CNT          = 0x0a,
+    TCO_MESSAGE1      = 0x0c,
+    TCO_MESSAGE2      = 0x0d,
+    TCO_WDCNT         = 0x0e,
+    SW_IRQ_GEN        = 0x10,
+    TCO_TMR           = 0x12,
+};
+
+/* TCO I/O register control/status bits */
+enum {
+    SW_TCO_SMI           = 1 << 1,
+    TCO_INT_STS          = 1 << 2,
+    TCO_LOCK             = 1 << 12,
+    TCO_TMR_HLT          = 1 << 11,
+    TCO_TIMEOUT          = 1 << 3,
+    TCO_SECOND_TO_STS    = 1 << 1,
+    TCO_BOOT_STS         = 1 << 2,
+};
+
+/* TCO I/O registers mask bits */
+enum {
+    TCO_RLD_MASK     = 0x3ff,
+    TCO1_STS_MASK    = 0xe870,
+    TCO2_STS_MASK    = 0xfff8,
+    TCO1_CNT_MASK    = 0xfeff,
+    TCO_TMR_MASK     = 0x3ff,
+};
+
+typedef struct TCOIORegs {
+    struct {
+        uint16_t rld;
+        uint8_t din;
+        uint8_t dout;
+        uint16_t sts1;
+        uint16_t sts2;
+        uint16_t cnt1;
+        uint16_t cnt2;
+        uint8_t msg1;
+        uint8_t msg2;
+        uint8_t wdcnt;
+        uint16_t tmr;
+    } tco;
+    uint8_t sw_irq_gen;
+
+    QEMUTimer *tco_timer;
+    int64_t expire_time;
+    uint8_t timeouts_no;
+
+    MemoryRegion io;
+} TCOIORegs;
+
+/* tco.c */
+void acpi_pm_tco_init(TCOIORegs *tr, MemoryRegion *parent);
+
+extern const VMStateDescription vmstate_tco_io_sts;
+
+#endif /* HW_ACPI_TCO_H */
index c14ad682ac95586ff6a6cc81986fe5dca8b369c8..6c8079c97a0f49ca65b94d1cd84ae7beda70c915 100644 (file)
@@ -9,13 +9,8 @@
 #ifndef HW_ACPI_IPMI_H
 #define HW_ACPI_IPMI_H
 
-#include "hw/acpi/aml-build.h"
+#include "hw/acpi/acpi_aml_interface.h"
 
-/*
- * Add ACPI IPMI entries for all registered IPMI devices whose parent
- * bus matches the given bus.  The resource is the ACPI resource that
- * contains the IPMI device, this is required for the I2C CRS.
- */
-void build_acpi_ipmi_devices(Aml *table, BusState *bus, const char *resource);
+void build_ipmi_dev_aml(AcpiDevAmlIf *adev, Aml *scope);
 
 #endif /* HW_ACPI_IPMI_H */
index 31bc9191c3d388ae3f4b93cf834d634c41a2a3c1..8a654248e9c67dd8970a5b7c8266abf6137ad336 100644 (file)
@@ -13,7 +13,7 @@
 #define PC_HOTPLUG_H
 
 /*
- * ONLY DEFINEs are permited in this file since it's shared
+ * ONLY DEFINEs are permitted in this file since it's shared
  * between C and ASL code.
  */
 
index bf2a3ed0bab96e37207b77a9dd5e110f948b62ba..467a99461cd8075bc0d707bb7727e755a13adf5f 100644 (file)
 #define HW_ACPI_PCI_H
 
 #include "hw/acpi/bios-linker-loader.h"
+#include "hw/acpi/acpi_aml_interface.h"
 
 typedef struct AcpiMcfgInfo {
     uint64_t base;
     uint32_t size;
 } AcpiMcfgInfo;
 
-void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info);
+void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info,
+                const char *oem_id, const char *oem_table_id);
+Aml *aml_pci_device_dsm(void);
+
+void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus);
+void build_pci_bridge_aml(AcpiDevAmlIf *adev, Aml *scope);
 #endif
index dfd375820f87e97caeba6f9764b4cba01b7ce377..ac21a95913c49a2284886ec27a70e4f7defb6bdc 100644 (file)
@@ -46,16 +46,19 @@ typedef struct AcpiPciHpPciStatus {
 typedef struct AcpiPciHpState {
     AcpiPciHpPciStatus acpi_pcihp_pci_status[ACPI_PCIHP_MAX_HOTPLUG_BUS];
     uint32_t hotplug_select;
+    uint32_t acpi_index;
     PCIBus *root;
     MemoryRegion io;
-    bool legacy_piix;
     uint16_t io_base;
     uint16_t io_len;
+    bool use_acpi_hotplug_bridge;
+    bool use_acpi_root_pci_hotplug;
 } AcpiPciHpState;
 
 void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root,
-                     MemoryRegion *address_space_io, bool bridges_enabled);
+                     MemoryRegion *io, uint16_t io_base);
 
+bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus);
 void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
                                    DeviceState *dev, Error **errp);
 void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
@@ -67,17 +70,21 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
                                          Error **errp);
 
 /* Called on reset */
-void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off);
+void acpi_pcihp_reset(AcpiPciHpState *s);
+
+void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus);
 
 extern const VMStateDescription vmstate_acpi_pcihp_pci_status;
 
-#define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp) \
+#define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp, test_acpi_index) \
         VMSTATE_UINT32_TEST(pcihp.hotplug_select, state, \
                             test_pcihp), \
         VMSTATE_STRUCT_ARRAY_TEST(pcihp.acpi_pcihp_pci_status, state, \
                                   ACPI_PCIHP_MAX_HOTPLUG_BUS, \
                                   test_pcihp, 1, \
                                   vmstate_acpi_pcihp_pci_status, \
-                                  AcpiPciHpPciStatus)
+                                  AcpiPciHpPciStatus), \
+        VMSTATE_UINT32_TEST(pcihp.acpi_index, state, \
+                            test_acpi_index)
 
 #endif
diff --git a/include/hw/acpi/piix4.h b/include/hw/acpi/piix4.h
new file mode 100644 (file)
index 0000000..eb1c122
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * ACPI implementation
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef HW_ACPI_PIIX4_H
+#define HW_ACPI_PIIX4_H
+
+#include "hw/pci/pci_device.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/cpu_hotplug.h"
+#include "hw/acpi/memory_hotplug.h"
+#include "hw/acpi/pcihp.h"
+#include "hw/i2c/pm_smbus.h"
+#include "hw/isa/apm.h"
+
+#define TYPE_PIIX4_PM "PIIX4_PM"
+OBJECT_DECLARE_SIMPLE_TYPE(PIIX4PMState, PIIX4_PM)
+
+struct PIIX4PMState {
+    /*< private >*/
+    PCIDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion io;
+    uint32_t io_base;
+
+    MemoryRegion io_gpe;
+    ACPIREGS ar;
+
+    APMState apm;
+
+    PMSMBus smb;
+    uint32_t smb_io_base;
+
+    qemu_irq irq;
+    qemu_irq smi_irq;
+    bool smm_enabled;
+    bool smm_compat;
+    Notifier machine_ready;
+    Notifier powerdown_notifier;
+
+    AcpiPciHpState acpi_pci_hotplug;
+    bool not_migrate_acpi_index;
+
+    uint8_t disable_s3;
+    uint8_t disable_s4;
+    uint8_t s4_val;
+
+    bool cpu_hotplug_legacy;
+    AcpiCpuHotplug gpe_cpu;
+    CPUHotplugState cpuhp_state;
+
+    MemHotplugState acpi_memory_hotplug;
+};
+
+#endif
diff --git a/include/hw/acpi/tco.h b/include/hw/acpi/tco.h
deleted file mode 100644 (file)
index a1e0da8..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * QEMU ICH9 TCO emulation
- *
- * Copyright (c) 2015 Paulo Alcantara <pcacjr@zytor.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef HW_ACPI_TCO_H
-#define HW_ACPI_TCO_H
-
-#include "exec/memory.h"
-
-/* As per ICH9 spec, the internal timer has an error of ~0.6s on every tick */
-#define TCO_TICK_NSEC 600000000LL
-
-/* TCO I/O register offsets */
-enum {
-    TCO_RLD           = 0x00,
-    TCO_DAT_IN        = 0x02,
-    TCO_DAT_OUT       = 0x03,
-    TCO1_STS          = 0x04,
-    TCO2_STS          = 0x06,
-    TCO1_CNT          = 0x08,
-    TCO2_CNT          = 0x0a,
-    TCO_MESSAGE1      = 0x0c,
-    TCO_MESSAGE2      = 0x0d,
-    TCO_WDCNT         = 0x0e,
-    SW_IRQ_GEN        = 0x10,
-    TCO_TMR           = 0x12,
-};
-
-/* TCO I/O register control/status bits */
-enum {
-    SW_TCO_SMI           = 1 << 1,
-    TCO_INT_STS          = 1 << 2,
-    TCO_LOCK             = 1 << 12,
-    TCO_TMR_HLT          = 1 << 11,
-    TCO_TIMEOUT          = 1 << 3,
-    TCO_SECOND_TO_STS    = 1 << 1,
-    TCO_BOOT_STS         = 1 << 2,
-};
-
-/* TCO I/O registers mask bits */
-enum {
-    TCO_RLD_MASK     = 0x3ff,
-    TCO1_STS_MASK    = 0xe870,
-    TCO2_STS_MASK    = 0xfff8,
-    TCO1_CNT_MASK    = 0xfeff,
-    TCO_TMR_MASK     = 0x3ff,
-};
-
-typedef struct TCOIORegs {
-    struct {
-        uint16_t rld;
-        uint8_t din;
-        uint8_t dout;
-        uint16_t sts1;
-        uint16_t sts2;
-        uint16_t cnt1;
-        uint16_t cnt2;
-        uint8_t msg1;
-        uint8_t msg2;
-        uint8_t wdcnt;
-        uint16_t tmr;
-    } tco;
-    uint8_t sw_irq_gen;
-
-    QEMUTimer *tco_timer;
-    int64_t expire_time;
-    uint8_t timeouts_no;
-
-    MemoryRegion io;
-} TCOIORegs;
-
-/* tco.c */
-void acpi_pm_tco_init(TCOIORegs *tr, MemoryRegion *parent);
-
-extern const VMStateDescription vmstate_tco_io_sts;
-
-#endif /* HW_ACPI_TCO_H */
index 1a2a57a21f0573f9a753a06f8e2001975dfca5d2..579c45f5bafed9e271fc243943ac0cb2bf811b73 100644 (file)
@@ -21,6 +21,8 @@
 #include "hw/acpi/aml-build.h"
 #include "sysemu/tpm.h"
 
+#ifdef CONFIG_TPM
+
 #define TPM_TIS_ADDR_BASE           0xFED40000
 #define TPM_TIS_ADDR_SIZE           0x5000
 
@@ -91,6 +93,7 @@
 #define TPM_TIS_CAP_DATA_TRANSFER_64B    (3 << 9)
 #define TPM_TIS_CAP_DATA_TRANSFER_LEGACY (0 << 9)
 #define TPM_TIS_CAP_BURST_COUNT_DYNAMIC  (0 << 8)
+#define TPM_TIS_CAP_BURST_COUNT_STATIC   (1 << 8)
 #define TPM_TIS_CAP_INTERRUPT_LOW_LEVEL  (1 << 4) /* support is mandatory */
 #define TPM_TIS_CAPABILITIES_SUPPORTED1_3 \
     (TPM_TIS_CAP_INTERRUPT_LOW_LEVEL | \
@@ -207,6 +210,48 @@ REG32(CRB_DATA_BUFFER, 0x80)
 #define TPM_PPI_FUNC_ALLOWED_USR_NOT_REQ (4 << 0)
 #define TPM_PPI_FUNC_MASK                (7 << 0)
 
+/* TPM TIS I2C registers */
+#define TPM_I2C_REG_LOC_SEL              0x00
+#define TPM_I2C_REG_ACCESS               0x04
+#define TPM_I2C_REG_INT_ENABLE           0x08
+#define TPM_I2C_REG_INT_CAPABILITY       0x14
+#define TPM_I2C_REG_STS                  0x18
+#define TPM_I2C_REG_DATA_FIFO            0x24
+#define TPM_I2C_REG_INTF_CAPABILITY      0x30
+#define TPM_I2C_REG_I2C_DEV_ADDRESS      0x38
+#define TPM_I2C_REG_DATA_CSUM_ENABLE     0x40
+#define TPM_I2C_REG_DATA_CSUM_GET        0x44
+#define TPM_I2C_REG_DID_VID              0x48
+#define TPM_I2C_REG_RID                  0x4c
+#define TPM_I2C_REG_UNKNOWN              0xff
+
+/* I2C specific interface capabilities */
+#define TPM_I2C_CAP_INTERFACE_TYPE     (0x2 << 0)       /* FIFO interface */
+#define TPM_I2C_CAP_INTERFACE_VER      (0x0 << 4)       /* TCG I2C intf 1.0 */
+#define TPM_I2C_CAP_TPM2_FAMILY        (0x1 << 7)       /* TPM 2.0 family. */
+#define TPM_I2C_CAP_DEV_ADDR_CHANGE    (0x0 << 27)      /* No dev addr chng */
+#define TPM_I2C_CAP_BURST_COUNT_STATIC (0x1 << 29)      /* Burst count static */
+#define TPM_I2C_CAP_LOCALITY_CAP       (0x1 << 25)      /* 0-5 locality */
+#define TPM_I2C_CAP_BUS_SPEED          (3   << 21)      /* std and fast mode */
+
+/*
+ * TPM_I2C_STS masks for read/writing bits from/to TIS
+ * TPM_STS mask for read bits 31:26 must be zero
+ */
+#define TPM_I2C_STS_READ_MASK          0x00ffffdd
+#define TPM_I2C_STS_WRITE_MASK         0x03000062
+
+/* Checksum enabled. */
+#define TPM_DATA_CSUM_ENABLED     0x1
+
+/*
+ * TPM_I2C_INT_ENABLE mask. Linux kernel does not support
+ * interrupts hence setting it to 0.
+ */
+#define TPM_I2C_INT_ENABLE_MASK   0x0
+
 void tpm_build_ppi_acpi(TPMIf *tpm, Aml *dev);
 
+#endif /* CONFIG_TPM */
+
 #endif /* HW_ACPI_TPM_H */
index 140b4de6034c9e8c1f9d9f8c18eacdd583129a3d..0022df027d9db83382b0d1fe66ac2581c23c7541 100644 (file)
@@ -4,6 +4,5 @@
 #include "hw/nvram/fw_cfg.h"
 
 MemoryRegion *acpi_add_rom_blob(FWCfgCallback update, void *opaque,
-                                GArray *blob, const char *name,
-                                uint64_t max_size);
+                                GArray *blob, const char *name);
 #endif
index cb4ad37fc5b35f08f9b261443a28878b46fbfb23..fb135d5bcbeefa9858a1f6450631e734cb2f5b3c 100644 (file)
@@ -13,7 +13,7 @@
 
 #define VMGENID_FW_CFG_SIZE      4096 /* Occupy a page of memory */
 #define VMGENID_GUID_OFFSET      40   /* allow space for
-                                       * OVMF SDT Header Probe Supressor
+                                       * OVMF SDT Header Probe Suppressor
                                        */
 
 OBJECT_DECLARE_SIMPLE_TYPE(VmGenIdState, VMGENID)
@@ -31,7 +31,7 @@ static inline Object *find_vmgenid_dev(void)
 }
 
 void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
-                        BIOSLinker *linker);
+                        BIOSLinker *linker, const char *oem_id);
 void vmgenid_add_fw_cfg(VmGenIdState *vms, FWCfgState *s, GArray *guid);
 
 #endif
diff --git a/include/hw/adc/aspeed_adc.h b/include/hw/adc/aspeed_adc.h
new file mode 100644 (file)
index 0000000..ff1d06e
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Aspeed ADC
+ *
+ * Copyright 2017-2021 IBM Corp.
+ *
+ * Andrew Jeffery <andrew@aj.id.au>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_ADC_ASPEED_ADC_H
+#define HW_ADC_ASPEED_ADC_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ASPEED_ADC "aspeed.adc"
+#define TYPE_ASPEED_2400_ADC TYPE_ASPEED_ADC "-ast2400"
+#define TYPE_ASPEED_2500_ADC TYPE_ASPEED_ADC "-ast2500"
+#define TYPE_ASPEED_2600_ADC TYPE_ASPEED_ADC "-ast2600"
+#define TYPE_ASPEED_1030_ADC TYPE_ASPEED_ADC "-ast1030"
+OBJECT_DECLARE_TYPE(AspeedADCState, AspeedADCClass, ASPEED_ADC)
+
+#define TYPE_ASPEED_ADC_ENGINE "aspeed.adc.engine"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedADCEngineState, ASPEED_ADC_ENGINE)
+
+#define ASPEED_ADC_NR_CHANNELS 16
+#define ASPEED_ADC_NR_REGS     (0xD0 >> 2)
+
+struct AspeedADCEngineState {
+    /* <private> */
+    SysBusDevice parent;
+
+    MemoryRegion mmio;
+    qemu_irq irq;
+    uint32_t engine_id;
+    uint32_t nr_channels;
+    uint32_t regs[ASPEED_ADC_NR_REGS];
+};
+
+struct AspeedADCState {
+    /* <private> */
+    SysBusDevice parent;
+
+    MemoryRegion mmio;
+    qemu_irq irq;
+
+    AspeedADCEngineState engines[2];
+};
+
+struct AspeedADCClass {
+    SysBusDeviceClass parent_class;
+
+    uint32_t nr_engines;
+};
+
+#endif /* HW_ADC_ASPEED_ADC_H */
diff --git a/include/hw/adc/max111x.h b/include/hw/adc/max111x.h
new file mode 100644 (file)
index 0000000..beff59c
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Maxim MAX1110/1111 ADC chip emulation.
+ *
+ * Copyright (c) 2006 Openedhand Ltd.
+ * Written by Andrzej Zaborowski <balrog@zabor.org>
+ *
+ * This code is licensed under the GNU GPLv2.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef HW_MISC_MAX111X_H
+#define HW_MISC_MAX111X_H
+
+#include "hw/ssi/ssi.h"
+#include "qom/object.h"
+
+/*
+ * This is a model of the Maxim MAX1110/1111 ADC chip, which for QEMU
+ * is an SSI slave device. It has either 4 (max1110) or 8 (max1111)
+ * 8-bit ADC channels.
+ *
+ * QEMU interface:
+ *  + GPIO inputs 0..3 (for max1110) or 0..7 (for max1111): set the value
+ *    of each ADC input, as an unsigned 8-bit value
+ *  + GPIO output 0: interrupt line
+ *  + Properties "input0" to "input3" (max1110) or "input0" to "input7"
+ *    (max1111): initial reset values for ADC inputs.
+ *
+ * Known bugs:
+ *  + the interrupt line is not correctly implemented, and will never
+ *    be lowered once it has been asserted.
+ */
+struct MAX111xState {
+    SSIPeripheral parent_obj;
+
+    qemu_irq interrupt;
+    /* Values of inputs at system reset (settable by QOM property) */
+    uint8_t reset_input[8];
+
+    uint8_t tb1, rb2, rb3;
+    int cycle;
+
+    uint8_t input[8];
+    int inputs, com;
+};
+
+#define TYPE_MAX_111X "max111x"
+
+OBJECT_DECLARE_SIMPLE_TYPE(MAX111xState, MAX_111X)
+
+#define TYPE_MAX_1110 "max1110"
+#define TYPE_MAX_1111 "max1111"
+
+#endif
diff --git a/include/hw/adc/npcm7xx_adc.h b/include/hw/adc/npcm7xx_adc.h
new file mode 100644 (file)
index 0000000..93330a4
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Nuvoton NPCM7xx ADC Module
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM7XX_ADC_H
+#define NPCM7XX_ADC_H
+
+#include "hw/clock.h"
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+
+#define NPCM7XX_ADC_NUM_INPUTS      8
+/**
+ * This value should not be changed unless write_adc_calibration function in
+ * hw/arm/npcm7xx.c is also changed.
+ */
+#define NPCM7XX_ADC_NUM_CALIB       2
+
+/**
+ * struct NPCM7xxADCState - Analog to Digital Converter Module device state.
+ * @parent: System bus device.
+ * @iomem: Memory region through which registers are accessed.
+ * @conv_timer: The timer counts down remaining cycles for the conversion.
+ * @irq: GIC interrupt line to fire on expiration (if enabled).
+ * @con: The Control Register.
+ * @data: The Data Buffer.
+ * @clock: The ADC Clock.
+ * @adci: The input voltage in units of uV. 1uv = 1e-6V.
+ * @vref: The external reference voltage.
+ * @iref: The internal reference voltage, initialized at launch time.
+ * @rv: The calibrated output values of 0.5V and 1.5V for the ADC.
+ */
+struct NPCM7xxADCState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+
+    QEMUTimer    conv_timer;
+
+    qemu_irq     irq;
+    uint32_t     con;
+    uint32_t     data;
+    Clock       *clock;
+
+    /* Voltages are in unit of uV. 1V = 1000000uV. */
+    uint32_t     adci[NPCM7XX_ADC_NUM_INPUTS];
+    uint32_t     vref;
+    uint32_t     iref;
+
+    uint16_t     calibration_r_values[NPCM7XX_ADC_NUM_CALIB];
+};
+
+#define TYPE_NPCM7XX_ADC "npcm7xx-adc"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxADCState, NPCM7XX_ADC)
+
+#endif /* NPCM7XX_ADC_H */
diff --git a/include/hw/adc/stm32f2xx_adc.h b/include/hw/adc/stm32f2xx_adc.h
new file mode 100644 (file)
index 0000000..42b4898
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * STM32F2XX ADC
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_STM32F2XX_ADC_H
+#define HW_STM32F2XX_ADC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define ADC_SR    0x00
+#define ADC_CR1   0x04
+#define ADC_CR2   0x08
+#define ADC_SMPR1 0x0C
+#define ADC_SMPR2 0x10
+#define ADC_JOFR1 0x14
+#define ADC_JOFR2 0x18
+#define ADC_JOFR3 0x1C
+#define ADC_JOFR4 0x20
+#define ADC_HTR   0x24
+#define ADC_LTR   0x28
+#define ADC_SQR1  0x2C
+#define ADC_SQR2  0x30
+#define ADC_SQR3  0x34
+#define ADC_JSQR  0x38
+#define ADC_JDR1  0x3C
+#define ADC_JDR2  0x40
+#define ADC_JDR3  0x44
+#define ADC_JDR4  0x48
+#define ADC_DR    0x4C
+
+#define ADC_CR2_ADON    0x01
+#define ADC_CR2_CONT    0x02
+#define ADC_CR2_ALIGN   0x800
+#define ADC_CR2_SWSTART 0x40000000
+
+#define ADC_CR1_RES 0x3000000
+
+#define ADC_COMMON_ADDRESS 0x100
+
+#define TYPE_STM32F2XX_ADC "stm32f2xx-adc"
+OBJECT_DECLARE_SIMPLE_TYPE(STM32F2XXADCState, STM32F2XX_ADC)
+
+struct STM32F2XXADCState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint32_t adc_sr;
+    uint32_t adc_cr1;
+    uint32_t adc_cr2;
+    uint32_t adc_smpr1;
+    uint32_t adc_smpr2;
+    uint32_t adc_jofr[4];
+    uint32_t adc_htr;
+    uint32_t adc_ltr;
+    uint32_t adc_sqr1;
+    uint32_t adc_sqr2;
+    uint32_t adc_sqr3;
+    uint32_t adc_jsqr;
+    uint32_t adc_jdr[4];
+    uint32_t adc_dr;
+
+    qemu_irq irq;
+};
+
+#endif /* HW_STM32F2XX_ADC_H */
diff --git a/include/hw/adc/zynq-xadc.h b/include/hw/adc/zynq-xadc.h
new file mode 100644 (file)
index 0000000..c10cc4c
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Device model for Zynq ADC controller
+ *
+ * Copyright (c) 2015 Guenter Roeck <linux@roeck-us.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef ZYNQ_XADC_H
+#define ZYNQ_XADC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define ZYNQ_XADC_MMIO_SIZE     0x0020
+#define ZYNQ_XADC_NUM_IO_REGS   (ZYNQ_XADC_MMIO_SIZE / 4)
+#define ZYNQ_XADC_NUM_ADC_REGS  128
+#define ZYNQ_XADC_FIFO_DEPTH    15
+
+#define TYPE_ZYNQ_XADC          "xlnx-zynq-xadc"
+OBJECT_DECLARE_SIMPLE_TYPE(ZynqXADCState, ZYNQ_XADC)
+
+struct ZynqXADCState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    uint32_t regs[ZYNQ_XADC_NUM_IO_REGS];
+    uint16_t xadc_regs[ZYNQ_XADC_NUM_ADC_REGS];
+    uint16_t xadc_read_reg_previous;
+    uint16_t xadc_dfifo[ZYNQ_XADC_FIFO_DEPTH];
+    uint16_t xadc_dfifo_entries;
+
+    qemu_irq irq;
+};
+
+#endif /* ZYNQ_XADC_H */
diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h
new file mode 100644 (file)
index 0000000..2eb83a1
--- /dev/null
@@ -0,0 +1,70 @@
+#ifndef HW_ARM_ALLWINNER_A10_H
+#define HW_ARM_ALLWINNER_A10_H
+
+#include "hw/timer/allwinner-a10-pit.h"
+#include "hw/intc/allwinner-a10-pic.h"
+#include "hw/net/allwinner_emac.h"
+#include "hw/sd/allwinner-sdhost.h"
+#include "hw/ide/ahci.h"
+#include "hw/usb/hcd-ohci.h"
+#include "hw/usb/hcd-ehci.h"
+#include "hw/rtc/allwinner-rtc.h"
+#include "hw/misc/allwinner-a10-ccm.h"
+#include "hw/misc/allwinner-a10-dramc.h"
+#include "hw/i2c/allwinner-i2c.h"
+#include "hw/watchdog/allwinner-wdt.h"
+#include "sysemu/block-backend.h"
+
+#include "target/arm/cpu.h"
+#include "qom/object.h"
+
+
+#define AW_A10_SDRAM_BASE       0x40000000
+
+#define AW_A10_NUM_USB          2
+
+#define TYPE_AW_A10 "allwinner-a10"
+OBJECT_DECLARE_SIMPLE_TYPE(AwA10State, AW_A10)
+
+struct AwA10State {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    ARMCPU cpu;
+    AwA10ClockCtlState ccm;
+    AwA10DramControllerState dramc;
+    AwA10PITState timer;
+    AwA10PICState intc;
+    AwEmacState emac;
+    AllwinnerAHCIState sata;
+    AwSdHostState mmc0;
+    AWI2CState i2c0;
+    AwRtcState rtc;
+    AwWdtState wdt;
+    MemoryRegion sram_a;
+    EHCISysBusState ehci[AW_A10_NUM_USB];
+    OHCISysBusState ohci[AW_A10_NUM_USB];
+};
+
+/**
+ * Emulate Boot ROM firmware setup functionality.
+ *
+ * A real Allwinner A10 SoC contains a Boot ROM
+ * which is the first code that runs right after
+ * the SoC is powered on. The Boot ROM is responsible
+ * for loading user code (e.g. a bootloader) from any
+ * of the supported external devices and writing the
+ * downloaded code to internal SRAM. After loading the SoC
+ * begins executing the code written to SRAM.
+ *
+ * This function emulates the Boot ROM by copying 32 KiB
+ * of data at offset 8 KiB from the given block device and writes it to
+ * the start of the first internal SRAM memory.
+ *
+ * @s: Allwinner A10 state object pointer
+ * @blk: Block backend device object pointer
+ */
+void allwinner_a10_bootrom_setup(AwA10State *s, BlockBackend *blk);
+
+#endif
diff --git a/include/hw/arm/allwinner-h3.h b/include/hw/arm/allwinner-h3.h
new file mode 100644 (file)
index 0000000..24ba4e1
--- /dev/null
@@ -0,0 +1,172 @@
+/*
+ * Allwinner H3 System on Chip emulation
+ *
+ * Copyright (C) 2019 Niek Linnenbank <nieklinnenbank@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * The Allwinner H3 is a System on Chip containing four ARM Cortex-A7
+ * processor cores. Features and specifications include DDR2/DDR3 memory,
+ * SD/MMC storage cards, 10/100/1000Mbit Ethernet, USB 2.0, HDMI and
+ * various I/O modules.
+ *
+ * This implementation is based on the following datasheet:
+ *
+ *   https://linux-sunxi.org/File:Allwinner_H3_Datasheet_V1.2.pdf
+ *
+ * The latest datasheet and more info can be found on the Linux Sunxi wiki:
+ *
+ *   https://linux-sunxi.org/H3
+ */
+
+#ifndef HW_ARM_ALLWINNER_H3_H
+#define HW_ARM_ALLWINNER_H3_H
+
+#include "qom/object.h"
+#include "hw/timer/allwinner-a10-pit.h"
+#include "hw/intc/arm_gic.h"
+#include "hw/misc/allwinner-h3-ccu.h"
+#include "hw/misc/allwinner-cpucfg.h"
+#include "hw/misc/allwinner-h3-dramc.h"
+#include "hw/misc/allwinner-h3-sysctrl.h"
+#include "hw/misc/allwinner-sid.h"
+#include "hw/sd/allwinner-sdhost.h"
+#include "hw/net/allwinner-sun8i-emac.h"
+#include "hw/rtc/allwinner-rtc.h"
+#include "hw/i2c/allwinner-i2c.h"
+#include "hw/watchdog/allwinner-wdt.h"
+#include "target/arm/cpu.h"
+#include "sysemu/block-backend.h"
+
+/**
+ * Allwinner H3 device list
+ *
+ * This enumeration is can be used refer to a particular device in the
+ * Allwinner H3 SoC. For example, the physical memory base address for
+ * each device can be found in the AwH3State object in the memmap member
+ * using the device enum value as index.
+ *
+ * @see AwH3State
+ */
+enum {
+    AW_H3_DEV_SRAM_A1,
+    AW_H3_DEV_SRAM_A2,
+    AW_H3_DEV_SRAM_C,
+    AW_H3_DEV_SYSCTRL,
+    AW_H3_DEV_MMC0,
+    AW_H3_DEV_SID,
+    AW_H3_DEV_EHCI0,
+    AW_H3_DEV_OHCI0,
+    AW_H3_DEV_EHCI1,
+    AW_H3_DEV_OHCI1,
+    AW_H3_DEV_EHCI2,
+    AW_H3_DEV_OHCI2,
+    AW_H3_DEV_EHCI3,
+    AW_H3_DEV_OHCI3,
+    AW_H3_DEV_CCU,
+    AW_H3_DEV_PIT,
+    AW_H3_DEV_UART0,
+    AW_H3_DEV_UART1,
+    AW_H3_DEV_UART2,
+    AW_H3_DEV_UART3,
+    AW_H3_DEV_EMAC,
+    AW_H3_DEV_TWI0,
+    AW_H3_DEV_TWI1,
+    AW_H3_DEV_TWI2,
+    AW_H3_DEV_DRAMCOM,
+    AW_H3_DEV_DRAMCTL,
+    AW_H3_DEV_DRAMPHY,
+    AW_H3_DEV_GIC_DIST,
+    AW_H3_DEV_GIC_CPU,
+    AW_H3_DEV_GIC_HYP,
+    AW_H3_DEV_GIC_VCPU,
+    AW_H3_DEV_RTC,
+    AW_H3_DEV_CPUCFG,
+    AW_H3_DEV_R_TWI,
+    AW_H3_DEV_SDRAM,
+    AW_H3_DEV_WDT
+};
+
+/** Total number of CPU cores in the H3 SoC */
+#define AW_H3_NUM_CPUS      (4)
+
+/**
+ * Allwinner H3 object model
+ * @{
+ */
+
+/** Object type for the Allwinner H3 SoC */
+#define TYPE_AW_H3 "allwinner-h3"
+
+/** Convert input object to Allwinner H3 state object */
+OBJECT_DECLARE_SIMPLE_TYPE(AwH3State, AW_H3)
+
+/** @} */
+
+/**
+ * Allwinner H3 object
+ *
+ * This struct contains the state of all the devices
+ * which are currently emulated by the H3 SoC code.
+ */
+struct AwH3State {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    ARMCPU cpus[AW_H3_NUM_CPUS];
+    const hwaddr *memmap;
+    AwA10PITState timer;
+    AwH3ClockCtlState ccu;
+    AwCpuCfgState cpucfg;
+    AwH3DramCtlState dramc;
+    AwH3SysCtrlState sysctrl;
+    AwSidState sid;
+    AwSdHostState mmc0;
+    AWI2CState i2c0;
+    AWI2CState i2c1;
+    AWI2CState i2c2;
+    AWI2CState r_twi;
+    AwSun8iEmacState emac;
+    AwRtcState rtc;
+    AwWdtState wdt;
+    GICState gic;
+    MemoryRegion sram_a1;
+    MemoryRegion sram_a2;
+    MemoryRegion sram_c;
+};
+
+/**
+ * Emulate Boot ROM firmware setup functionality.
+ *
+ * A real Allwinner H3 SoC contains a Boot ROM
+ * which is the first code that runs right after
+ * the SoC is powered on. The Boot ROM is responsible
+ * for loading user code (e.g. a bootloader) from any
+ * of the supported external devices and writing the
+ * downloaded code to internal SRAM. After loading the SoC
+ * begins executing the code written to SRAM.
+ *
+ * This function emulates the Boot ROM by copying 32 KiB
+ * of data from the given block device and writes it to
+ * the start of the first internal SRAM memory.
+ *
+ * @s: Allwinner H3 state object pointer
+ * @blk: Block backend device object pointer
+ */
+void allwinner_h3_bootrom_setup(AwH3State *s, BlockBackend *blk);
+
+#endif /* HW_ARM_ALLWINNER_H3_H */
diff --git a/include/hw/arm/allwinner-r40.h b/include/hw/arm/allwinner-r40.h
new file mode 100644 (file)
index 0000000..6e1ac9d
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Allwinner R40/A40i/T3 System on Chip emulation
+ *
+ * Copyright (C) 2023 qianfan Zhao <qianfanguijin@163.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_ARM_ALLWINNER_R40_H
+#define HW_ARM_ALLWINNER_R40_H
+
+#include "qom/object.h"
+#include "hw/timer/allwinner-a10-pit.h"
+#include "hw/intc/arm_gic.h"
+#include "hw/sd/allwinner-sdhost.h"
+#include "hw/misc/allwinner-r40-ccu.h"
+#include "hw/misc/allwinner-r40-dramc.h"
+#include "hw/misc/allwinner-sramc.h"
+#include "hw/i2c/allwinner-i2c.h"
+#include "hw/net/allwinner_emac.h"
+#include "hw/net/allwinner-sun8i-emac.h"
+#include "target/arm/cpu.h"
+#include "sysemu/block-backend.h"
+
+enum {
+    AW_R40_DEV_SRAM_A1,
+    AW_R40_DEV_SRAM_A2,
+    AW_R40_DEV_SRAM_A3,
+    AW_R40_DEV_SRAM_A4,
+    AW_R40_DEV_SRAMC,
+    AW_R40_DEV_EMAC,
+    AW_R40_DEV_MMC0,
+    AW_R40_DEV_MMC1,
+    AW_R40_DEV_MMC2,
+    AW_R40_DEV_MMC3,
+    AW_R40_DEV_CCU,
+    AW_R40_DEV_PIT,
+    AW_R40_DEV_UART0,
+    AW_R40_DEV_UART1,
+    AW_R40_DEV_UART2,
+    AW_R40_DEV_UART3,
+    AW_R40_DEV_UART4,
+    AW_R40_DEV_UART5,
+    AW_R40_DEV_UART6,
+    AW_R40_DEV_UART7,
+    AW_R40_DEV_TWI0,
+    AW_R40_DEV_GMAC,
+    AW_R40_DEV_GIC_DIST,
+    AW_R40_DEV_GIC_CPU,
+    AW_R40_DEV_GIC_HYP,
+    AW_R40_DEV_GIC_VCPU,
+    AW_R40_DEV_SDRAM,
+    AW_R40_DEV_DRAMCOM,
+    AW_R40_DEV_DRAMCTL,
+    AW_R40_DEV_DRAMPHY,
+};
+
+#define AW_R40_NUM_CPUS      (4)
+
+/**
+ * Allwinner R40 object model
+ * @{
+ */
+
+/** Object type for the Allwinner R40 SoC */
+#define TYPE_AW_R40 "allwinner-r40"
+
+/** Convert input object to Allwinner R40 state object */
+OBJECT_DECLARE_SIMPLE_TYPE(AwR40State, AW_R40)
+
+/** @} */
+
+/**
+ * Allwinner R40 object
+ *
+ * This struct contains the state of all the devices
+ * which are currently emulated by the R40 SoC code.
+ */
+#define AW_R40_NUM_MMCS         4
+#define AW_R40_NUM_UARTS        8
+
+struct AwR40State {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    /** Physical base address for start of RAM */
+    hwaddr ram_addr;
+
+    /** Total RAM size in megabytes */
+    uint32_t ram_size;
+
+    ARMCPU cpus[AW_R40_NUM_CPUS];
+    const hwaddr *memmap;
+    AwSRAMCState sramc;
+    AwA10PITState timer;
+    AwSdHostState mmc[AW_R40_NUM_MMCS];
+    AwR40ClockCtlState ccu;
+    AwR40DramCtlState dramc;
+    AWI2CState i2c0;
+    AwEmacState emac;
+    AwSun8iEmacState gmac;
+    GICState gic;
+    MemoryRegion sram_a1;
+    MemoryRegion sram_a2;
+    MemoryRegion sram_a3;
+    MemoryRegion sram_a4;
+};
+
+/**
+ * Emulate Boot ROM firmware setup functionality.
+ *
+ * A real Allwinner R40 SoC contains a Boot ROM
+ * which is the first code that runs right after
+ * the SoC is powered on. The Boot ROM is responsible
+ * for loading user code (e.g. a bootloader) from any
+ * of the supported external devices and writing the
+ * downloaded code to internal SRAM. After loading the SoC
+ * begins executing the code written to SRAM.
+ *
+ * This function emulates the Boot ROM by copying 32 KiB
+ * of data from the given block device and writes it to
+ * the start of the first internal SRAM memory.
+ *
+ * @s: Allwinner R40 state object pointer
+ * @blk: Block backend device object pointer
+ * @unit: the mmc control's unit
+ */
+bool allwinner_r40_bootrom_setup(AwR40State *s, BlockBackend *blk, int unit);
+
+#endif /* HW_ARM_ALLWINNER_R40_H */
diff --git a/include/hw/arm/armsse-version.h b/include/hw/arm/armsse-version.h
new file mode 100644 (file)
index 0000000..60780fa
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * ARM SSE (Subsystems for Embedded): IoTKit, SSE-200
+ *
+ * Copyright (c) 2020 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+#ifndef ARMSSE_VERSION_H
+#define ARMSSE_VERSION_H
+
+
+/*
+ * Define an enumeration of the possible values of the sse-version
+ * property implemented by various sub-devices of the SSE, and
+ * a validation function that checks that a valid value has been passed.
+ * These are arbitrary QEMU-internal values (nobody should be creating
+ * the sub-devices of the SSE except for the SSE object itself), but
+ * we pick obvious numbers for the benefit of people debugging with gdb.
+ */
+enum {
+    ARMSSE_IOTKIT = 0,
+    ARMSSE_SSE200 = 200,
+    ARMSSE_SSE300 = 300,
+};
+
+static inline bool armsse_version_valid(uint32_t sse_version)
+{
+    switch (sse_version) {
+    case ARMSSE_IOTKIT:
+    case ARMSSE_SSE200:
+    case ARMSSE_SSE300:
+        return true;
+    default:
+        return false;
+    }
+}
+
+#endif
diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h
new file mode 100644 (file)
index 0000000..88b3b75
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * ARM SSE (Subsystems for Embedded): IoTKit, SSE-200
+ *
+ * Copyright (c) 2018 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/*
+ * This is a model of the Arm "Subsystems for Embedded" family of
+ * hardware, which include the IoT Kit and the SSE-050, SSE-100 and
+ * SSE-200. Currently we model:
+ *  - the Arm IoT Kit which is documented in
+ *    https://developer.arm.com/documentation/ecm0601256/latest
+ *  - the SSE-200 which is documented in
+ *    https://developer.arm.com/documentation/101104/latest/
+ *
+ * The IoTKit contains:
+ *  a Cortex-M33
+ *  the IDAU
+ *  some timers and watchdogs
+ *  two peripheral protection controllers
+ *  a memory protection controller
+ *  a security controller
+ *  a bus fabric which arranges that some parts of the address
+ *  space are secure and non-secure aliases of each other
+ * The SSE-200 additionally contains:
+ *  a second Cortex-M33
+ *  two Message Handling Units (MHUs)
+ *  an optional CryptoCell (which we do not model)
+ *  more SRAM banks with associated MPCs
+ *  multiple Power Policy Units (PPUs)
+ *  a control interface for an icache for each CPU
+ *  per-CPU identity and control register blocks
+ *
+ * QEMU interface:
+ *  + Clock input "MAINCLK": clock for CPUs and most peripherals
+ *  + Clock input "S32KCLK": slow 32KHz clock used for a few peripherals
+ *  + QOM property "memory" is a MemoryRegion containing the devices provided
+ *    by the board model.
+ *  + QOM property "EXP_NUMIRQ" sets the number of expansion interrupts.
+ *    (In hardware, the SSE-200 permits the number of expansion interrupts
+ *    for the two CPUs to be configured separately, but we restrict it to
+ *    being the same for both, to avoid having to have separate Property
+ *    lists for different variants. This restriction can be relaxed later
+ *    if necessary.)
+ *  + QOM property "SRAM_ADDR_WIDTH" sets the number of bits used for the
+ *    address of each SRAM bank (and thus the total amount of internal SRAM)
+ *  + QOM property "init-svtor" sets the initial value of the CPU SVTOR register
+ *    (where it expects to load the PC and SP from the vector table on reset)
+ *  + QOM properties "CPU0_FPU", "CPU0_DSP", "CPU1_FPU" and "CPU1_DSP" which
+ *    set whether the CPUs have the FPU and DSP features present. The default
+ *    (matching the hardware) is that for CPU0 in an IoTKit and CPU1 in an
+ *    SSE-200 both are present; CPU0 in an SSE-200 has neither.
+ *    Since the IoTKit has only one CPU, it does not have the CPU1_* properties.
+ *  + QOM properties "CPU0_MPU_NS", "CPU0_MPU_S", "CPU1_MPU_NS" and "CPU1_MPU_S"
+ *    which set the number of MPU regions on the CPUs. If there is only one
+ *    CPU the CPU1 properties are not present.
+ *  + Named GPIO inputs "EXP_IRQ" 0..n are the expansion interrupts for CPU 0,
+ *    which are wired to its NVIC lines 32 .. n+32
+ *  + Named GPIO inputs "EXP_CPU1_IRQ" 0..n are the expansion interrupts for
+ *    CPU 1, which are wired to its NVIC lines 32 .. n+32
+ *  + sysbus MMIO region 0 is the "AHB Slave Expansion" which allows
+ *    bus master devices in the board model to make transactions into
+ *    all the devices and memory areas in the IoTKit
+ * Controlling up to 4 AHB expansion PPBs which a system using the IoTKit
+ * might provide:
+ *  + named GPIO outputs apb_ppcexp{0,1,2,3}_nonsec[0..15]
+ *  + named GPIO outputs apb_ppcexp{0,1,2,3}_ap[0..15]
+ *  + named GPIO outputs apb_ppcexp{0,1,2,3}_irq_enable
+ *  + named GPIO outputs apb_ppcexp{0,1,2,3}_irq_clear
+ *  + named GPIO inputs apb_ppcexp{0,1,2,3}_irq_status
+ * Controlling each of the 4 expansion AHB PPCs which a system using the IoTKit
+ * might provide:
+ *  + named GPIO outputs ahb_ppcexp{0,1,2,3}_nonsec[0..15]
+ *  + named GPIO outputs ahb_ppcexp{0,1,2,3}_ap[0..15]
+ *  + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_enable
+ *  + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_clear
+ *  + named GPIO inputs ahb_ppcexp{0,1,2,3}_irq_status
+ * Controlling each of the 16 expansion MPCs which a system using the IoTKit
+ * might provide:
+ *  + named GPIO inputs mpcexp_status[0..15]
+ * Controlling each of the 16 expansion MSCs which a system using the IoTKit
+ * might provide:
+ *  + named GPIO inputs mscexp_status[0..15]
+ *  + named GPIO outputs mscexp_clear[0..15]
+ *  + named GPIO outputs mscexp_ns[0..15]
+ */
+
+#ifndef ARMSSE_H
+#define ARMSSE_H
+
+#include "hw/sysbus.h"
+#include "hw/arm/armv7m.h"
+#include "hw/misc/iotkit-secctl.h"
+#include "hw/misc/tz-ppc.h"
+#include "hw/misc/tz-mpc.h"
+#include "hw/timer/cmsdk-apb-timer.h"
+#include "hw/timer/cmsdk-apb-dualtimer.h"
+#include "hw/timer/sse-counter.h"
+#include "hw/timer/sse-timer.h"
+#include "hw/watchdog/cmsdk-apb-watchdog.h"
+#include "hw/misc/iotkit-sysctl.h"
+#include "hw/misc/iotkit-sysinfo.h"
+#include "hw/misc/armsse-cpuid.h"
+#include "hw/misc/armsse-mhu.h"
+#include "hw/misc/armsse-cpu-pwrctrl.h"
+#include "hw/misc/unimp.h"
+#include "hw/or-irq.h"
+#include "hw/clock.h"
+#include "hw/core/split-irq.h"
+#include "hw/cpu/cluster.h"
+#include "qom/object.h"
+
+#define TYPE_ARM_SSE "arm-sse"
+OBJECT_DECLARE_TYPE(ARMSSE, ARMSSEClass,
+                    ARM_SSE)
+
+/*
+ * These type names are for specific IoTKit subsystems; other than
+ * instantiating them, code using these devices should always handle
+ * them via the ARMSSE base class, so they have no IOTKIT() etc macros.
+ */
+#define TYPE_IOTKIT "iotkit"
+#define TYPE_SSE200 "sse-200"
+#define TYPE_SSE300 "sse-300"
+
+/* We have an IRQ splitter and an OR gate input for each external PPC
+ * and the 2 internal PPCs
+ */
+#define NUM_INTERNAL_PPCS 2
+#define NUM_EXTERNAL_PPCS (IOTS_NUM_AHB_EXP_PPC + IOTS_NUM_APB_EXP_PPC)
+#define NUM_PPCS (NUM_EXTERNAL_PPCS + NUM_INTERNAL_PPCS)
+
+#define MAX_SRAM_BANKS 4
+#if MAX_SRAM_BANKS > IOTS_NUM_MPC
+#error Too many SRAM banks
+#endif
+
+#define SSE_MAX_CPUS 2
+
+#define NUM_PPUS 8
+
+/* Number of CPU IRQs used by the SSE itself */
+#define NUM_SSE_IRQS 32
+
+struct ARMSSE {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    ARMv7MState armv7m[SSE_MAX_CPUS];
+    CPUClusterState cluster[SSE_MAX_CPUS];
+    IoTKitSecCtl secctl;
+    TZPPC apb_ppc[NUM_INTERNAL_PPCS];
+    TZMPC mpc[IOTS_NUM_MPC];
+    CMSDKAPBTimer timer[3];
+    OrIRQState ppc_irq_orgate;
+    SplitIRQ sec_resp_splitter;
+    SplitIRQ ppc_irq_splitter[NUM_PPCS];
+    SplitIRQ mpc_irq_splitter[IOTS_NUM_EXP_MPC + IOTS_NUM_MPC];
+    OrIRQState mpc_irq_orgate;
+    OrIRQState nmi_orgate;
+
+    SplitIRQ cpu_irq_splitter[NUM_SSE_IRQS];
+
+    CMSDKAPBDualTimer dualtimer;
+
+    CMSDKAPBWatchdog cmsdk_watchdog[3];
+
+    SSECounter sse_counter;
+    SSETimer sse_timer[4];
+
+    IoTKitSysCtl sysctl;
+    IoTKitSysCtl sysinfo;
+
+    ARMSSEMHU mhu[2];
+    UnimplementedDeviceState unimp[NUM_PPUS];
+    UnimplementedDeviceState cachectrl[SSE_MAX_CPUS];
+    UnimplementedDeviceState cpusecctrl[SSE_MAX_CPUS];
+
+    ARMSSECPUID cpuid[SSE_MAX_CPUS];
+
+    ARMSSECPUPwrCtrl cpu_pwrctrl[SSE_MAX_CPUS];
+
+    /*
+     * 'container' holds all devices seen by all CPUs.
+     * 'cpu_container[i]' is the view that CPU i has: this has the
+     * per-CPU devices of that CPU, plus as the background 'container'
+     * (or an alias of it, since we can only use it directly once).
+     * container_alias[i] is the alias of 'container' used by CPU i+1;
+     * CPU 0 can use 'container' directly.
+     */
+    MemoryRegion container;
+    MemoryRegion container_alias[SSE_MAX_CPUS - 1];
+    MemoryRegion cpu_container[SSE_MAX_CPUS];
+    MemoryRegion alias1;
+    MemoryRegion alias2;
+    MemoryRegion alias3[SSE_MAX_CPUS];
+    MemoryRegion sram[MAX_SRAM_BANKS];
+    MemoryRegion itcm;
+    MemoryRegion dtcm;
+
+    qemu_irq *exp_irqs[SSE_MAX_CPUS];
+    qemu_irq ppc0_irq;
+    qemu_irq ppc1_irq;
+    qemu_irq sec_resp_cfg;
+    qemu_irq sec_resp_cfg_in;
+    qemu_irq nsc_cfg_in;
+
+    qemu_irq irq_status_in[NUM_EXTERNAL_PPCS];
+    qemu_irq mpcexp_status_in[IOTS_NUM_EXP_MPC];
+
+    uint32_t nsccfg;
+
+    Clock *mainclk;
+    Clock *s32kclk;
+
+    /* Properties */
+    MemoryRegion *board_memory;
+    uint32_t exp_numirq;
+    uint32_t sram_addr_width;
+    uint32_t init_svtor;
+    uint32_t cpu_mpu_ns[SSE_MAX_CPUS];
+    uint32_t cpu_mpu_s[SSE_MAX_CPUS];
+    bool cpu_fpu[SSE_MAX_CPUS];
+    bool cpu_dsp[SSE_MAX_CPUS];
+};
+
+typedef struct ARMSSEInfo ARMSSEInfo;
+
+struct ARMSSEClass {
+    SysBusDeviceClass parent_class;
+    const ARMSSEInfo *info;
+};
+
+
+#endif
diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h
new file mode 100644 (file)
index 0000000..e2cebbd
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+ * ARMv7M CPU object
+ *
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This code is licensed under the GPL version 2 or later.
+ */
+
+#ifndef HW_ARM_ARMV7M_H
+#define HW_ARM_ARMV7M_H
+
+#include "hw/sysbus.h"
+#include "hw/intc/armv7m_nvic.h"
+#include "hw/misc/armv7m_ras.h"
+#include "target/arm/idau.h"
+#include "qom/object.h"
+#include "hw/clock.h"
+
+#define TYPE_BITBAND "ARM-bitband-memory"
+OBJECT_DECLARE_SIMPLE_TYPE(BitBandState, BITBAND)
+
+struct BitBandState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    AddressSpace source_as;
+    MemoryRegion iomem;
+    uint32_t base;
+    MemoryRegion *source_memory;
+};
+
+#define TYPE_ARMV7M "armv7m"
+OBJECT_DECLARE_SIMPLE_TYPE(ARMv7MState, ARMV7M)
+
+#define ARMV7M_NUM_BITBANDS 2
+
+/* ARMv7M container object.
+ * + Unnamed GPIO input lines: external IRQ lines for the NVIC
+ * + Named GPIO output SYSRESETREQ: signalled for guest AIRCR.SYSRESETREQ.
+ *   If this GPIO is not wired up then the NVIC will default to performing
+ *   a qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET).
+ * + Property "cpu-type": CPU type to instantiate
+ * + Property "num-irq": number of external IRQ lines
+ * + Property "memory": MemoryRegion defining the physical address space
+ *   that CPU accesses see. (The NVIC, bitbanding and other CPU-internal
+ *   devices will be automatically layered on top of this view.)
+ * + Property "idau": IDAU interface (forwarded to CPU object)
+ * + Property "init-svtor": secure VTOR reset value (forwarded to CPU object)
+ * + Property "init-nsvtor": non-secure VTOR reset value (forwarded to CPU object)
+ * + Property "vfp": enable VFP (forwarded to CPU object)
+ * + Property "dsp": enable DSP (forwarded to CPU object)
+ * + Property "enable-bitband": expose bitbanded IO
+ * + Property "mpu-ns-regions": number of Non-Secure MPU regions (forwarded
+ *   to CPU object pmsav7-dregion property; default is whatever the default
+ *   for the CPU is)
+ * + Property "mpu-s-regions": number of Secure MPU regions (default is
+ *   whatever the default for the CPU is; must currently be set to the same
+ *   value as mpu-ns-regions if the CPU implements the Security Extension)
+ * + Clock input "refclk" is the external reference clock for the systick timers
+ * + Clock input "cpuclk" is the main CPU clock
+ */
+struct ARMv7MState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    NVICState nvic;
+    BitBandState bitband[ARMV7M_NUM_BITBANDS];
+    ARMCPU *cpu;
+    ARMv7MRAS ras;
+    SysTickState systick[M_REG_NUM_BANKS];
+
+    /* MemoryRegion we pass to the CPU, with our devices layered on
+     * top of the ones the board provides in board_memory.
+     */
+    MemoryRegion container;
+    /*
+     * MemoryRegion which passes the transaction to either the S or the
+     * NS systick device depending on the transaction attributes
+     */
+    MemoryRegion systickmem;
+    /*
+     * MemoryRegion which enforces the S/NS handling of the systick
+     * device NS alias region and passes the transaction to the
+     * NS systick device if appropriate.
+     */
+    MemoryRegion systick_ns_mem;
+    /* Ditto, for the sysregs region provided by the NVIC */
+    MemoryRegion sysreg_ns_mem;
+    /* MR providing default PPB behaviour */
+    MemoryRegion defaultmem;
+
+    Clock *refclk;
+    Clock *cpuclk;
+
+    /* Properties */
+    char *cpu_type;
+    /* MemoryRegion the board provides to us (with its devices, RAM, etc) */
+    MemoryRegion *board_memory;
+    Object *idau;
+    uint32_t init_svtor;
+    uint32_t init_nsvtor;
+    uint32_t mpu_ns_regions;
+    uint32_t mpu_s_regions;
+    bool enable_bitband;
+    bool start_powered_off;
+    bool vfp;
+    bool dsp;
+};
+
+#endif
diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h
new file mode 100644 (file)
index 0000000..cbeacb2
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Aspeed Machines
+ *
+ * Copyright 2018 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+#ifndef ARM_ASPEED_H
+#define ARM_ASPEED_H
+
+#include "hw/boards.h"
+#include "qom/object.h"
+
+typedef struct AspeedMachineState AspeedMachineState;
+
+#define TYPE_ASPEED_MACHINE       MACHINE_TYPE_NAME("aspeed")
+typedef struct AspeedMachineClass AspeedMachineClass;
+DECLARE_OBJ_CHECKERS(AspeedMachineState, AspeedMachineClass,
+                     ASPEED_MACHINE, TYPE_ASPEED_MACHINE)
+
+#define ASPEED_MAC0_ON   (1 << 0)
+#define ASPEED_MAC1_ON   (1 << 1)
+#define ASPEED_MAC2_ON   (1 << 2)
+#define ASPEED_MAC3_ON   (1 << 3)
+
+
+struct AspeedMachineClass {
+    MachineClass parent_obj;
+
+    const char *name;
+    const char *desc;
+    const char *soc_name;
+    uint32_t hw_strap1;
+    uint32_t hw_strap2;
+    const char *fmc_model;
+    const char *spi_model;
+    uint32_t num_cs;
+    uint32_t macs_mask;
+    void (*i2c_init)(AspeedMachineState *bmc);
+    uint32_t uart_default;
+};
+
+
+#endif
diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h
new file mode 100644 (file)
index 0000000..cb832bc
--- /dev/null
@@ -0,0 +1,232 @@
+/*
+ * ASPEED SoC family
+ *
+ * Andrew Jeffery <andrew@aj.id.au>
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef ASPEED_SOC_H
+#define ASPEED_SOC_H
+
+#include "hw/cpu/a15mpcore.h"
+#include "hw/arm/armv7m.h"
+#include "hw/intc/aspeed_vic.h"
+#include "hw/misc/aspeed_scu.h"
+#include "hw/adc/aspeed_adc.h"
+#include "hw/misc/aspeed_sdmc.h"
+#include "hw/misc/aspeed_xdma.h"
+#include "hw/timer/aspeed_timer.h"
+#include "hw/rtc/aspeed_rtc.h"
+#include "hw/i2c/aspeed_i2c.h"
+#include "hw/misc/aspeed_i3c.h"
+#include "hw/ssi/aspeed_smc.h"
+#include "hw/misc/aspeed_hace.h"
+#include "hw/misc/aspeed_sbc.h"
+#include "hw/watchdog/wdt_aspeed.h"
+#include "hw/net/ftgmac100.h"
+#include "target/arm/cpu.h"
+#include "hw/gpio/aspeed_gpio.h"
+#include "hw/sd/aspeed_sdhci.h"
+#include "hw/usb/hcd-ehci.h"
+#include "qom/object.h"
+#include "hw/misc/aspeed_lpc.h"
+#include "hw/misc/unimp.h"
+#include "hw/misc/aspeed_peci.h"
+#include "hw/char/serial.h"
+
+#define ASPEED_SPIS_NUM  2
+#define ASPEED_EHCIS_NUM 2
+#define ASPEED_WDTS_NUM  4
+#define ASPEED_CPUS_NUM  2
+#define ASPEED_MACS_NUM  4
+#define ASPEED_UARTS_NUM 13
+#define ASPEED_JTAG_NUM  2
+
+struct AspeedSoCState {
+    DeviceState parent;
+
+    MemoryRegion *memory;
+    MemoryRegion *dram_mr;
+    MemoryRegion dram_container;
+    MemoryRegion sram;
+    MemoryRegion spi_boot_container;
+    MemoryRegion spi_boot;
+    AspeedRtcState rtc;
+    AspeedTimerCtrlState timerctrl;
+    AspeedI2CState i2c;
+    AspeedI3CState i3c;
+    AspeedSCUState scu;
+    AspeedHACEState hace;
+    AspeedXDMAState xdma;
+    AspeedADCState adc;
+    AspeedSMCState fmc;
+    AspeedSMCState spi[ASPEED_SPIS_NUM];
+    EHCISysBusState ehci[ASPEED_EHCIS_NUM];
+    AspeedSBCState sbc;
+    MemoryRegion secsram;
+    UnimplementedDeviceState sbc_unimplemented;
+    AspeedSDMCState sdmc;
+    AspeedWDTState wdt[ASPEED_WDTS_NUM];
+    FTGMAC100State ftgmac100[ASPEED_MACS_NUM];
+    AspeedMiiState mii[ASPEED_MACS_NUM];
+    AspeedGPIOState gpio;
+    AspeedGPIOState gpio_1_8v;
+    AspeedSDHCIState sdhci;
+    AspeedSDHCIState emmc;
+    AspeedLPCState lpc;
+    AspeedPECIState peci;
+    SerialMM uart[ASPEED_UARTS_NUM];
+    Clock *sysclk;
+    UnimplementedDeviceState iomem;
+    UnimplementedDeviceState video;
+    UnimplementedDeviceState emmc_boot_controller;
+    UnimplementedDeviceState dpmcu;
+    UnimplementedDeviceState pwm;
+    UnimplementedDeviceState espi;
+    UnimplementedDeviceState udc;
+    UnimplementedDeviceState sgpiom;
+    UnimplementedDeviceState jtag[ASPEED_JTAG_NUM];
+};
+
+#define TYPE_ASPEED_SOC "aspeed-soc"
+OBJECT_DECLARE_TYPE(AspeedSoCState, AspeedSoCClass, ASPEED_SOC)
+
+struct Aspeed2400SoCState {
+    AspeedSoCState parent;
+
+    ARMCPU cpu[ASPEED_CPUS_NUM];
+    AspeedVICState vic;
+};
+
+#define TYPE_ASPEED2400_SOC "aspeed2400-soc"
+OBJECT_DECLARE_SIMPLE_TYPE(Aspeed2400SoCState, ASPEED2400_SOC)
+
+struct Aspeed2600SoCState {
+    AspeedSoCState parent;
+
+    A15MPPrivState a7mpcore;
+    ARMCPU cpu[ASPEED_CPUS_NUM]; /* XXX belong to a7mpcore */
+};
+
+#define TYPE_ASPEED2600_SOC "aspeed2600-soc"
+OBJECT_DECLARE_SIMPLE_TYPE(Aspeed2600SoCState, ASPEED2600_SOC)
+
+struct Aspeed10x0SoCState {
+    AspeedSoCState parent;
+
+    ARMv7MState armv7m;
+};
+
+#define TYPE_ASPEED10X0_SOC "aspeed10x0-soc"
+OBJECT_DECLARE_SIMPLE_TYPE(Aspeed10x0SoCState, ASPEED10X0_SOC)
+
+struct AspeedSoCClass {
+    DeviceClass parent_class;
+
+    const char *name;
+    const char *cpu_type;
+    uint32_t silicon_rev;
+    uint64_t sram_size;
+    uint64_t secsram_size;
+    int spis_num;
+    int ehcis_num;
+    int wdts_num;
+    int macs_num;
+    int uarts_num;
+    const int *irqmap;
+    const hwaddr *memmap;
+    uint32_t num_cpus;
+    qemu_irq (*get_irq)(AspeedSoCState *s, int dev);
+};
+
+
+enum {
+    ASPEED_DEV_SPI_BOOT,
+    ASPEED_DEV_IOMEM,
+    ASPEED_DEV_UART1,
+    ASPEED_DEV_UART2,
+    ASPEED_DEV_UART3,
+    ASPEED_DEV_UART4,
+    ASPEED_DEV_UART5,
+    ASPEED_DEV_UART6,
+    ASPEED_DEV_UART7,
+    ASPEED_DEV_UART8,
+    ASPEED_DEV_UART9,
+    ASPEED_DEV_UART10,
+    ASPEED_DEV_UART11,
+    ASPEED_DEV_UART12,
+    ASPEED_DEV_UART13,
+    ASPEED_DEV_VUART,
+    ASPEED_DEV_FMC,
+    ASPEED_DEV_SPI1,
+    ASPEED_DEV_SPI2,
+    ASPEED_DEV_EHCI1,
+    ASPEED_DEV_EHCI2,
+    ASPEED_DEV_VIC,
+    ASPEED_DEV_SDMC,
+    ASPEED_DEV_SCU,
+    ASPEED_DEV_ADC,
+    ASPEED_DEV_SBC,
+    ASPEED_DEV_SECSRAM,
+    ASPEED_DEV_EMMC_BC,
+    ASPEED_DEV_VIDEO,
+    ASPEED_DEV_SRAM,
+    ASPEED_DEV_SDHCI,
+    ASPEED_DEV_GPIO,
+    ASPEED_DEV_GPIO_1_8V,
+    ASPEED_DEV_RTC,
+    ASPEED_DEV_TIMER1,
+    ASPEED_DEV_TIMER2,
+    ASPEED_DEV_TIMER3,
+    ASPEED_DEV_TIMER4,
+    ASPEED_DEV_TIMER5,
+    ASPEED_DEV_TIMER6,
+    ASPEED_DEV_TIMER7,
+    ASPEED_DEV_TIMER8,
+    ASPEED_DEV_WDT,
+    ASPEED_DEV_PWM,
+    ASPEED_DEV_LPC,
+    ASPEED_DEV_IBT,
+    ASPEED_DEV_I2C,
+    ASPEED_DEV_PECI,
+    ASPEED_DEV_ETH1,
+    ASPEED_DEV_ETH2,
+    ASPEED_DEV_ETH3,
+    ASPEED_DEV_ETH4,
+    ASPEED_DEV_MII1,
+    ASPEED_DEV_MII2,
+    ASPEED_DEV_MII3,
+    ASPEED_DEV_MII4,
+    ASPEED_DEV_SDRAM,
+    ASPEED_DEV_XDMA,
+    ASPEED_DEV_EMMC,
+    ASPEED_DEV_KCS,
+    ASPEED_DEV_HACE,
+    ASPEED_DEV_DPMCU,
+    ASPEED_DEV_DP,
+    ASPEED_DEV_I3C,
+    ASPEED_DEV_ESPI,
+    ASPEED_DEV_UDC,
+    ASPEED_DEV_SGPIOM,
+    ASPEED_DEV_JTAG0,
+    ASPEED_DEV_JTAG1,
+};
+
+#define ASPEED_SOC_SPI_BOOT_ADDR 0x0
+
+qemu_irq aspeed_soc_get_irq(AspeedSoCState *s, int dev);
+bool aspeed_soc_uart_realize(AspeedSoCState *s, Error **errp);
+void aspeed_soc_uart_set_chr(AspeedSoCState *s, int dev, Chardev *chr);
+bool aspeed_soc_dram_init(AspeedSoCState *s, Error **errp);
+void aspeed_mmio_map(AspeedSoCState *s, SysBusDevice *dev, int n, hwaddr addr);
+void aspeed_mmio_map_unimplemented(AspeedSoCState *s, SysBusDevice *dev,
+                                   const char *name, hwaddr addr,
+                                   uint64_t size);
+void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype,
+                               unsigned int count, int unit0);
+
+#endif /* ASPEED_SOC_H */
diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h
new file mode 100644 (file)
index 0000000..d724a2f
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Raspberry Pi emulation (c) 2012 Gregory Estrade
+ * Upstreaming code cleanup [including bcm2835_*] (c) 2013 Jan Petrous
+ *
+ * Rasperry Pi 2 emulation and refactoring Copyright (c) 2015, Microsoft
+ * Written by Andrew Baumann
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_PERIPHERALS_H
+#define BCM2835_PERIPHERALS_H
+
+#include "hw/sysbus.h"
+#include "hw/char/pl011.h"
+#include "hw/char/bcm2835_aux.h"
+#include "hw/display/bcm2835_fb.h"
+#include "hw/dma/bcm2835_dma.h"
+#include "hw/or-irq.h"
+#include "hw/intc/bcm2835_ic.h"
+#include "hw/misc/bcm2835_property.h"
+#include "hw/misc/bcm2835_rng.h"
+#include "hw/misc/bcm2835_mbox.h"
+#include "hw/misc/bcm2835_mphi.h"
+#include "hw/misc/bcm2835_thermal.h"
+#include "hw/misc/bcm2835_cprman.h"
+#include "hw/misc/bcm2835_powermgt.h"
+#include "hw/sd/sdhci.h"
+#include "hw/sd/bcm2835_sdhost.h"
+#include "hw/gpio/bcm2835_gpio.h"
+#include "hw/timer/bcm2835_systmr.h"
+#include "hw/usb/hcd-dwc2.h"
+#include "hw/misc/unimp.h"
+#include "qom/object.h"
+
+#define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835PeripheralState, BCM2835_PERIPHERALS)
+
+struct BCM2835PeripheralState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion peri_mr, peri_mr_alias, gpu_bus_mr, mbox_mr;
+    MemoryRegion ram_alias[4];
+    qemu_irq irq, fiq;
+
+    BCM2835SystemTimerState systmr;
+    BCM2835MphiState mphi;
+    UnimplementedDeviceState txp;
+    UnimplementedDeviceState armtmr;
+    BCM2835PowerMgtState powermgt;
+    BCM2835CprmanState cprman;
+    PL011State uart0;
+    BCM2835AuxState aux;
+    BCM2835FBState fb;
+    BCM2835DMAState dma;
+    OrIRQState orgated_dma_irq;
+    BCM2835ICState ic;
+    BCM2835PropertyState property;
+    BCM2835RngState rng;
+    BCM2835MboxState mboxes;
+    SDHCIState sdhci;
+    BCM2835SDHostState sdhost;
+    BCM2835GpioState gpio;
+    Bcm2835ThermalState thermal;
+    UnimplementedDeviceState i2s;
+    UnimplementedDeviceState spi[1];
+    UnimplementedDeviceState i2c[3];
+    UnimplementedDeviceState otp;
+    UnimplementedDeviceState dbus;
+    UnimplementedDeviceState ave0;
+    UnimplementedDeviceState v3d;
+    UnimplementedDeviceState bscsl;
+    UnimplementedDeviceState smi;
+    DWC2State dwc2;
+    UnimplementedDeviceState sdramc;
+};
+
+#endif /* BCM2835_PERIPHERALS_H */
diff --git a/include/hw/arm/bcm2836.h b/include/hw/arm/bcm2836.h
new file mode 100644 (file)
index 0000000..6f90cab
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Raspberry Pi emulation (c) 2012 Gregory Estrade
+ * Upstreaming code cleanup [including bcm2835_*] (c) 2013 Jan Petrous
+ *
+ * Rasperry Pi 2 emulation and refactoring Copyright (c) 2015, Microsoft
+ * Written by Andrew Baumann
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2836_H
+#define BCM2836_H
+
+#include "hw/arm/bcm2835_peripherals.h"
+#include "hw/intc/bcm2836_control.h"
+#include "target/arm/cpu.h"
+#include "qom/object.h"
+
+#define TYPE_BCM283X "bcm283x"
+OBJECT_DECLARE_TYPE(BCM283XState, BCM283XClass, BCM283X)
+
+#define BCM283X_NCPUS 4
+
+/* These type names are for specific SoCs; other than instantiating
+ * them, code using these devices should always handle them via the
+ * BCM283x base class, so they have no BCM2836(obj) etc macros.
+ */
+#define TYPE_BCM2835 "bcm2835"
+#define TYPE_BCM2836 "bcm2836"
+#define TYPE_BCM2837 "bcm2837"
+
+struct BCM283XState {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    uint32_t enabled_cpus;
+
+    struct {
+        ARMCPU core;
+    } cpu[BCM283X_NCPUS];
+    BCM2836ControlState control;
+    BCM2835PeripheralState peripherals;
+};
+
+#endif /* BCM2836_H */
diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h
new file mode 100644 (file)
index 0000000..80c492d
--- /dev/null
@@ -0,0 +1,235 @@
+/*
+ * ARM kernel loader.
+ *
+ * Copyright (c) 2006 CodeSourcery.
+ * Written by Paul Brook
+ *
+ * This code is licensed under the LGPL.
+ *
+ */
+
+#ifndef HW_ARM_BOOT_H
+#define HW_ARM_BOOT_H
+
+#include "target/arm/cpu-qom.h"
+#include "qemu/notify.h"
+
+typedef enum {
+    ARM_ENDIANNESS_UNKNOWN = 0,
+    ARM_ENDIANNESS_LE,
+    ARM_ENDIANNESS_BE8,
+    ARM_ENDIANNESS_BE32,
+} arm_endianness;
+
+/**
+ * armv7m_load_kernel:
+ * @cpu: CPU
+ * @kernel_filename: file to load
+ * @mem_base: base address to load image at (should be where the
+ *            CPU expects to find its vector table on reset)
+ * @mem_size: mem_size: maximum image size to load
+ *
+ * Load the guest image for an ARMv7M system. This must be called by
+ * any ARMv7M board. (This is necessary to ensure that the CPU resets
+ * correctly on system reset, as well as for kernel loading.)
+ */
+void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename,
+                        hwaddr mem_base, int mem_size);
+
+/* arm_boot.c */
+struct arm_boot_info {
+    uint64_t ram_size;
+    const char *kernel_filename;
+    const char *kernel_cmdline;
+    const char *initrd_filename;
+    const char *dtb_filename;
+    hwaddr loader_start;
+    hwaddr dtb_start;
+    hwaddr dtb_limit;
+    /* If set to True, arm_load_kernel() will not load DTB.
+     * It allows board to load DTB manually later.
+     * (default: False)
+     */
+    bool skip_dtb_autoload;
+    /* multicore boards that use the default secondary core boot functions
+     * need to put the address of the secondary boot code, the boot reg,
+     * and the GIC address in the next 3 values, respectively. boards that
+     * have their own boot functions can use these values as they want.
+     */
+    hwaddr smp_loader_start;
+    hwaddr smp_bootreg_addr;
+    hwaddr gic_cpu_if_addr;
+    int board_id;
+    /* ARM machines that support the ARM Security Extensions use this field to
+     * control whether Linux is booted as secure(true) or non-secure(false).
+     */
+    bool secure_boot;
+    int (*atag_board)(const struct arm_boot_info *info, void *p);
+    /* multicore boards that use the default secondary core boot functions
+     * can ignore these two function calls. If the default functions won't
+     * work, then write_secondary_boot() should write a suitable blob of
+     * code mimicking the secondary CPU startup process used by the board's
+     * boot loader/boot ROM code, and secondary_cpu_reset_hook() should
+     * perform any necessary CPU reset handling and set the PC for the
+     * secondary CPUs to point at this boot blob.
+     *
+     * These hooks won't be called if secondary CPUs are booting via
+     * emulated PSCI (see psci_conduit below).
+     */
+    void (*write_secondary_boot)(ARMCPU *cpu,
+                                 const struct arm_boot_info *info);
+    void (*secondary_cpu_reset_hook)(ARMCPU *cpu,
+                                     const struct arm_boot_info *info);
+    /* if a board is able to create a dtb without a dtb file then it
+     * sets get_dtb. This will only be used if no dtb file is provided
+     * by the user. On success, sets *size to the length of the created
+     * dtb, and returns a pointer to it. (The caller must free this memory
+     * with g_free() when it has finished with it.) On failure, returns NULL.
+     */
+    void *(*get_dtb)(const struct arm_boot_info *info, int *size);
+    /* if a board needs to be able to modify a device tree provided by
+     * the user it should implement this hook.
+     */
+    void (*modify_dtb)(const struct arm_boot_info *info, void *fdt);
+    /*
+     * If a board wants to use the QEMU emulated-firmware PSCI support,
+     * it should set this to QEMU_PSCI_CONDUIT_HVC or QEMU_PSCI_CONDUIT_SMC
+     * as appropriate. arm_load_kernel() will set the psci-conduit and
+     * start-powered-off properties on the CPUs accordingly.
+     * Note that if the guest image is started at the same exception level
+     * as the conduit specifies calls should go to (eg guest firmware booted
+     * to EL3) then PSCI will not be enabled.
+     */
+    int psci_conduit;
+    /* Used internally by arm_boot.c */
+    int is_linux;
+    hwaddr initrd_start;
+    hwaddr initrd_size;
+    hwaddr entry;
+
+    /* Boot firmware has been loaded, typically at address 0, with -bios or
+     * -pflash. It also implies that fw_cfg_find() will succeed.
+     */
+    bool firmware_loaded;
+
+    /* Address at which board specific loader/setup code exists. If enabled,
+     * this code-blob will run before anything else. It must return to the
+     * caller via the link register. There is no stack set up. Enabled by
+     * defining write_board_setup, which is responsible for loading the blob
+     * to the specified address.
+     */
+    hwaddr board_setup_addr;
+    void (*write_board_setup)(ARMCPU *cpu,
+                              const struct arm_boot_info *info);
+
+    /*
+     * If set, the board specific loader/setup blob will be run from secure
+     * mode, regardless of secure_boot. The blob becomes responsible for
+     * changing to non-secure state if implementing a non-secure boot,
+     * including setting up EL3/Secure registers such as the NSACR as
+     * required by the Linux booting ABI before the switch to non-secure.
+     */
+    bool secure_board_setup;
+
+    arm_endianness endianness;
+};
+
+/**
+ * arm_load_kernel - Loads memory with everything needed to boot
+ *
+ * @cpu: handle to the first CPU object
+ * @info: handle to the boot info struct
+ * Registers a machine init done notifier that copies to memory
+ * everything needed to boot, depending on machine and user options:
+ * kernel image, boot loaders, initrd, dtb. Also registers the CPU
+ * reset handler.
+ *
+ * In case the machine file supports the platform bus device and its
+ * dynamically instantiable sysbus devices, this function must be called
+ * before sysbus-fdt arm_register_platform_bus_fdt_creator. Indeed the
+ * machine init done notifiers are called in registration reverse order.
+ */
+void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info);
+
+AddressSpace *arm_boot_address_space(ARMCPU *cpu,
+                                     const struct arm_boot_info *info);
+
+/**
+ * arm_load_dtb() - load a device tree binary image into memory
+ * @addr:       the address to load the image at
+ * @binfo:      struct describing the boot environment
+ * @addr_limit: upper limit of the available memory area at @addr
+ * @as:         address space to load image to
+ *
+ * Load a device tree supplied by the machine or by the user  with the
+ * '-dtb' command line option, and put it at offset @addr in target
+ * memory.
+ *
+ * If @addr_limit contains a meaningful value (i.e., it is strictly greater
+ * than @addr), the device tree is only loaded if its size does not exceed
+ * the limit.
+ *
+ * Returns: the size of the device tree image on success,
+ *          0 if the image size exceeds the limit,
+ *          -1 on errors.
+ *
+ * Note: Must not be called unless have_dtb(binfo) is true.
+ */
+int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
+                 hwaddr addr_limit, AddressSpace *as, MachineState *ms);
+
+/* Write a secure board setup routine with a dummy handler for SMCs */
+void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu,
+                                            const struct arm_boot_info *info,
+                                            hwaddr mvbar_addr);
+
+typedef enum {
+    FIXUP_NONE = 0,     /* do nothing */
+    FIXUP_TERMINATOR,   /* end of insns */
+    FIXUP_BOARDID,      /* overwrite with board ID number */
+    FIXUP_BOARD_SETUP,  /* overwrite with board specific setup code address */
+    FIXUP_ARGPTR_LO,    /* overwrite with pointer to kernel args */
+    FIXUP_ARGPTR_HI,    /* overwrite with pointer to kernel args (high half) */
+    FIXUP_ENTRYPOINT_LO, /* overwrite with kernel entry point */
+    FIXUP_ENTRYPOINT_HI, /* overwrite with kernel entry point (high half) */
+    FIXUP_GIC_CPU_IF,   /* overwrite with GIC CPU interface address */
+    FIXUP_BOOTREG,      /* overwrite with boot register address */
+    FIXUP_DSB,          /* overwrite with correct DSB insn for cpu */
+    FIXUP_MAX,
+} FixupType;
+
+typedef struct ARMInsnFixup {
+    uint32_t insn;
+    FixupType fixup;
+} ARMInsnFixup;
+
+/**
+ * arm_write_bootloader - write a bootloader to guest memory
+ * @name: name of the bootloader blob
+ * @as: AddressSpace to write the bootloader
+ * @addr: guest address to write it
+ * @insns: the blob to be loaded
+ * @fixupcontext: context to be used for any fixups in @insns
+ *
+ * Write a bootloader to guest memory at address @addr in the address
+ * space @as. @name is the name to use for the resulting ROM blob, so
+ * it should be unique in the system and reasonably identifiable for debugging.
+ *
+ * @insns must be an array of ARMInsnFixup structs, each of which has
+ * one 32-bit value to be written to the guest memory, and a fixup to be
+ * applied to the value. FIXUP_NONE (do nothing) is value 0, so effectively
+ * the fixup is optional when writing a struct initializer.
+ * The final entry in the array must be { 0, FIXUP_TERMINATOR }.
+ *
+ * All other supported fixup types have the semantics "ignore insn
+ * and instead use the value from the array element @fixupcontext[fixup]".
+ * The caller should therefore provide @fixupcontext as an array of
+ * size FIXUP_MAX whose elements have been initialized for at least
+ * the entries that @insns refers to.
+ */
+void arm_write_bootloader(const char *name,
+                          AddressSpace *as, hwaddr addr,
+                          const ARMInsnFixup *insns,
+                          const uint32_t *fixupcontext);
+
+#endif /* HW_ARM_BOOT_H */
diff --git a/include/hw/arm/bsa.h b/include/hw/arm/bsa.h
new file mode 100644 (file)
index 0000000..8eaab60
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Common definitions for Arm Base System Architecture (BSA) platforms.
+ *
+ * Copyright (c) 2015 Linaro Limited
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QEMU_ARM_BSA_H
+#define QEMU_ARM_BSA_H
+
+/* These are architectural INTID values */
+#define VIRTUAL_PMU_IRQ            23
+#define ARCH_GIC_MAINT_IRQ         25
+#define ARCH_TIMER_NS_EL2_IRQ      26
+#define ARCH_TIMER_VIRT_IRQ        27
+#define ARCH_TIMER_NS_EL2_VIRT_IRQ 28
+#define ARCH_TIMER_S_EL1_IRQ       29
+#define ARCH_TIMER_NS_EL1_IRQ      30
+
+#define INTID_TO_PPI(irq) ((irq) - 16)
+
+#endif /* QEMU_ARM_BSA_H */
diff --git a/include/hw/arm/digic.h b/include/hw/arm/digic.h
new file mode 100644 (file)
index 0000000..8f2735c
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Misc Canon DIGIC declarations.
+ *
+ * Copyright (C) 2013 Antony Pavlov <antonynpavlov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef HW_ARM_DIGIC_H
+#define HW_ARM_DIGIC_H
+
+#include "cpu.h"
+#include "hw/timer/digic-timer.h"
+#include "hw/char/digic-uart.h"
+#include "qom/object.h"
+
+#define TYPE_DIGIC "digic"
+
+OBJECT_DECLARE_SIMPLE_TYPE(DigicState, DIGIC)
+
+#define DIGIC4_NB_TIMERS 3
+
+struct DigicState {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    ARMCPU cpu;
+
+    DigicTimerState timer[DIGIC4_NB_TIMERS];
+    DigicUartState uart;
+};
+
+#endif /* HW_ARM_DIGIC_H */
diff --git a/include/hw/arm/exynos4210.h b/include/hw/arm/exynos4210.h
new file mode 100644 (file)
index 0000000..d33fe38
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ *  Samsung exynos4210 SoC emulation
+ *
+ *  Copyright (c) 2011 Samsung Electronics Co., Ltd. All rights reserved.
+ *    Maksim Kozlov <m.kozlov@samsung.com>
+ *    Evgeny Voevodin <e.voevodin@samsung.com>
+ *    Igor Mitsyanko <i.mitsyanko@samsung.com>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef EXYNOS4210_H
+#define EXYNOS4210_H
+
+#include "hw/or-irq.h"
+#include "hw/sysbus.h"
+#include "hw/cpu/a9mpcore.h"
+#include "hw/intc/exynos4210_gic.h"
+#include "hw/intc/exynos4210_combiner.h"
+#include "hw/core/split-irq.h"
+#include "hw/arm/boot.h"
+#include "qom/object.h"
+
+#define EXYNOS4210_NCPUS                    2
+
+#define EXYNOS4210_DRAM0_BASE_ADDR          0x40000000
+#define EXYNOS4210_DRAM1_BASE_ADDR          0xa0000000
+#define EXYNOS4210_DRAM_MAX_SIZE            0x60000000  /* 1.5 GB */
+
+#define EXYNOS4210_IROM_BASE_ADDR           0x00000000
+#define EXYNOS4210_IROM_SIZE                0x00010000  /* 64 KB */
+#define EXYNOS4210_IROM_MIRROR_BASE_ADDR    0x02000000
+#define EXYNOS4210_IROM_MIRROR_SIZE         0x00010000  /* 64 KB */
+
+#define EXYNOS4210_IRAM_BASE_ADDR           0x02020000
+#define EXYNOS4210_IRAM_SIZE                0x00020000  /* 128 KB */
+
+/* Secondary CPU startup code is in IROM memory */
+#define EXYNOS4210_SMP_BOOT_ADDR            EXYNOS4210_IROM_BASE_ADDR
+#define EXYNOS4210_SMP_BOOT_SIZE            0x1000
+#define EXYNOS4210_BASE_BOOT_ADDR           EXYNOS4210_DRAM0_BASE_ADDR
+/* Secondary CPU polling address to get loader start from */
+#define EXYNOS4210_SECOND_CPU_BOOTREG       0x10020814
+
+#define EXYNOS4210_SMP_PRIVATE_BASE_ADDR    0x10500000
+#define EXYNOS4210_L2X0_BASE_ADDR           0x10502000
+
+/*
+ * exynos4210 IRQ subsystem stub definitions.
+ */
+#define EXYNOS4210_IRQ_GATE_NINPUTS 2 /* Internal and External GIC */
+
+#define EXYNOS4210_MAX_INT_COMBINER_OUT_IRQ  64
+#define EXYNOS4210_MAX_EXT_COMBINER_OUT_IRQ  16
+#define EXYNOS4210_MAX_INT_COMBINER_IN_IRQ   \
+    (EXYNOS4210_MAX_INT_COMBINER_OUT_IRQ * 8)
+#define EXYNOS4210_MAX_EXT_COMBINER_IN_IRQ   \
+    (EXYNOS4210_MAX_EXT_COMBINER_OUT_IRQ * 8)
+
+#define EXYNOS4210_I2C_NUMBER               9
+
+#define EXYNOS4210_NUM_DMA      3
+
+/*
+ * We need one splitter for every external combiner input, plus
+ * one for every non-zero entry in combiner_grp_to_gic_id[],
+ * minus one for every external combiner ID in second or later
+ * places in a combinermap[] line.
+ * We'll assert in exynos4210_init_board_irqs() if this is wrong.
+ */
+#define EXYNOS4210_NUM_SPLITTERS (EXYNOS4210_MAX_EXT_COMBINER_IN_IRQ + 38)
+
+struct Exynos4210State {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    ARMCPU *cpu[EXYNOS4210_NCPUS];
+    qemu_irq irq_table[EXYNOS4210_MAX_INT_COMBINER_IN_IRQ];
+
+    MemoryRegion chipid_mem;
+    MemoryRegion iram_mem;
+    MemoryRegion irom_mem;
+    MemoryRegion irom_alias_mem;
+    MemoryRegion boot_secondary;
+    MemoryRegion bootreg_mem;
+    I2CBus *i2c_if[EXYNOS4210_I2C_NUMBER];
+    OrIRQState pl330_irq_orgate[EXYNOS4210_NUM_DMA];
+    OrIRQState cpu_irq_orgate[EXYNOS4210_NCPUS];
+    A9MPPrivState a9mpcore;
+    Exynos4210GicState ext_gic;
+    Exynos4210CombinerState int_combiner;
+    Exynos4210CombinerState ext_combiner;
+    SplitIRQ splitter[EXYNOS4210_NUM_SPLITTERS];
+};
+
+#define TYPE_EXYNOS4210_SOC "exynos4210"
+OBJECT_DECLARE_SIMPLE_TYPE(Exynos4210State, EXYNOS4210_SOC)
+
+void exynos4210_write_secondary(ARMCPU *cpu,
+        const struct arm_boot_info *info);
+
+/* Get IRQ number from exynos4210 IRQ subsystem stub.
+ * To identify IRQ source use internal combiner group and bit number
+ *  grp - group number
+ *  bit - bit number inside group */
+uint32_t exynos4210_get_irq(uint32_t grp, uint32_t bit);
+
+/*
+ * exynos4210 UART
+ */
+DeviceState *exynos4210_uart_create(hwaddr addr,
+                                    int fifo_size,
+                                    int channel,
+                                    Chardev *chr,
+                                    qemu_irq irq);
+
+#endif /* EXYNOS4210_H */
diff --git a/include/hw/arm/fdt.h b/include/hw/arm/fdt.h
new file mode 100644 (file)
index 0000000..c3d5015
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ *
+ * Copyright (c) 2015 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Define macros useful when building ARM device tree nodes
+ */
+
+#ifndef QEMU_ARM_FDT_H
+#define QEMU_ARM_FDT_H
+
+#define GIC_FDT_IRQ_TYPE_SPI 0
+#define GIC_FDT_IRQ_TYPE_PPI 1
+
+#define GIC_FDT_IRQ_FLAGS_EDGE_LO_HI 1
+#define GIC_FDT_IRQ_FLAGS_EDGE_HI_LO 2
+#define GIC_FDT_IRQ_FLAGS_LEVEL_HI 4
+#define GIC_FDT_IRQ_FLAGS_LEVEL_LO 8
+
+#define GIC_FDT_IRQ_PPI_CPU_START 8
+#define GIC_FDT_IRQ_PPI_CPU_WIDTH 8
+
+#endif
diff --git a/include/hw/arm/fsl-imx25.h b/include/hw/arm/fsl-imx25.h
new file mode 100644 (file)
index 0000000..df2f839
--- /dev/null
@@ -0,0 +1,278 @@
+/*
+ * Freescale i.MX25 SoC emulation
+ *
+ * Copyright (C) 2015 Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef FSL_IMX25_H
+#define FSL_IMX25_H
+
+#include "hw/intc/imx_avic.h"
+#include "hw/misc/imx25_ccm.h"
+#include "hw/char/imx_serial.h"
+#include "hw/timer/imx_gpt.h"
+#include "hw/timer/imx_epit.h"
+#include "hw/net/imx_fec.h"
+#include "hw/misc/imx_rngc.h"
+#include "hw/i2c/imx_i2c.h"
+#include "hw/gpio/imx_gpio.h"
+#include "hw/sd/sdhci.h"
+#include "hw/usb/chipidea.h"
+#include "hw/watchdog/wdt_imx2.h"
+#include "exec/memory.h"
+#include "target/arm/cpu.h"
+#include "qom/object.h"
+
+#define TYPE_FSL_IMX25 "fsl-imx25"
+OBJECT_DECLARE_SIMPLE_TYPE(FslIMX25State, FSL_IMX25)
+
+#define FSL_IMX25_NUM_UARTS 5
+#define FSL_IMX25_NUM_GPTS 4
+#define FSL_IMX25_NUM_EPITS 2
+#define FSL_IMX25_NUM_I2CS 3
+#define FSL_IMX25_NUM_GPIOS 4
+#define FSL_IMX25_NUM_ESDHCS 2
+#define FSL_IMX25_NUM_USBS 2
+
+struct FslIMX25State {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    ARMCPU         cpu;
+    IMXAVICState   avic;
+    IMX25CCMState  ccm;
+    IMXSerialState uart[FSL_IMX25_NUM_UARTS];
+    IMXGPTState    gpt[FSL_IMX25_NUM_GPTS];
+    IMXEPITState   epit[FSL_IMX25_NUM_EPITS];
+    IMXFECState    fec;
+    IMXRNGCState   rngc;
+    IMXI2CState    i2c[FSL_IMX25_NUM_I2CS];
+    IMXGPIOState   gpio[FSL_IMX25_NUM_GPIOS];
+    SDHCIState     esdhc[FSL_IMX25_NUM_ESDHCS];
+    ChipideaState  usb[FSL_IMX25_NUM_USBS];
+    IMX2WdtState   wdt;
+    MemoryRegion   rom[2];
+    MemoryRegion   iram;
+    MemoryRegion   iram_alias;
+    uint32_t       phy_num;
+};
+
+/**
+ * i.MX25 memory map
+ ****************************************************************
+ * 0x0000_0000 0x0000_3FFF 16 Kbytes    ROM (36 Kbytes)
+ * 0x0000_4000 0x0040_3FFF 4 Mbytes     Reserved
+ * 0x0040_4000 0x0040_8FFF 20 Kbytes    ROM (36 Kbytes)
+ * 0x0040_9000 0x0FFF_FFFF 252 Mbytes (minus 36 Kbytes) Reserved
+ * 0x1000_0000 0x1FFF_FFFF 256 Mbytes   Reserved
+ * 0x2000_0000 0x2FFF_FFFF 256 Mbytes   Reserved
+ * 0x3000_0000 0x3FFF_FFFF 256 Mbytes   Reserved
+ * 0x4000_0000 0x43EF_FFFF 63 Mbytes    Reserved
+ * 0x43F0_0000 0x43F0_3FFF 16 Kbytes    AIPS A control registers
+ * 0x43F0_4000 0x43F0_7FFF 16 Kbytes    ARM926 platform MAX
+ * 0x43F0_8000 0x43F0_BFFF 16 Kbytes    ARM926 platform CLKCTL
+ * 0x43F0_C000 0x43F0_FFFF 16 Kbytes    ARM926 platform ETB registers
+ * 0x43F1_0000 0x43F1_3FFF 16 Kbytes    ARM926 platform ETB memory
+ * 0x43F1_4000 0x43F1_7FFF 16 Kbytes    ARM926 platform AAPE registers
+ * 0x43F1_8000 0x43F7_FFFF 416 Kbytes   Reserved
+ * 0x43F8_0000 0x43F8_3FFF 16 Kbytes    I2C-1
+ * 0x43F8_4000 0x43F8_7FFF 16 Kbytes    I2C-3
+ * 0x43F8_8000 0x43F8_BFFF 16 Kbytes    CAN-1
+ * 0x43F8_C000 0x43F8_FFFF 16 Kbytes    CAN-2
+ * 0x43F9_0000 0x43F9_3FFF 16 Kbytes    UART-1
+ * 0x43F9_4000 0x43F9_7FFF 16 Kbytes    UART-2
+ * 0x43F9_8000 0x43F9_BFFF 16 Kbytes    I2C-2
+ * 0x43F9_C000 0x43F9_FFFF 16 Kbytes    1-Wire
+ * 0x43FA_0000 0x43FA_3FFF 16 Kbytes    ATA (CPU side)
+ * 0x43FA_4000 0x43FA_7FFF 16 Kbytes    CSPI-1
+ * 0x43FA_8000 0x43FA_BFFF 16 Kbytes    KPP
+ * 0x43FA_C000 0x43FA_FFFF 16 Kbytes    IOMUXC
+ * 0x43FB_0000 0x43FB_3FFF 16 Kbytes    AUDMUX
+ * 0x43FB_4000 0x43FB_7FFF 16 Kbytes    Reserved
+ * 0x43FB_8000 0x43FB_BFFF 16 Kbytes    ECT (IP BUS A)
+ * 0x43FB_C000 0x43FB_FFFF 16 Kbytes    ECT (IP BUS B)
+ * 0x43FC_0000 0x43FF_FFFF 256 Kbytes   Reserved AIPS A off-platform slots
+ * 0x4400_0000 0x4FFF_FFFF 192 Mbytes   Reserved
+ * 0x5000_0000 0x5000_3FFF 16 Kbytes    SPBA base address
+ * 0x5000_4000 0x5000_7FFF 16 Kbytes    CSPI-3
+ * 0x5000_8000 0x5000_BFFF 16 Kbytes    UART-4
+ * 0x5000_C000 0x5000_FFFF 16 Kbytes    UART-3
+ * 0x5001_0000 0x5001_3FFF 16 Kbytes    CSPI-2
+ * 0x5001_4000 0x5001_7FFF 16 Kbytes    SSI-2
+ * 0x5001_C000 0x5001_FFFF 16 Kbytes    Reserved
+ * 0x5002_0000 0x5002_3FFF 16 Kbytes    ATA
+ * 0x5002_4000 0x5002_7FFF 16 Kbytes    SIM-1
+ * 0x5002_8000 0x5002_BFFF 16 Kbytes    SIM-2
+ * 0x5002_C000 0x5002_FFFF 16 Kbytes    UART-5
+ * 0x5003_0000 0x5003_3FFF 16 Kbytes    TSC
+ * 0x5003_4000 0x5003_7FFF 16 Kbytes    SSI-1
+ * 0x5003_8000 0x5003_BFFF 16 Kbytes    FEC
+ * 0x5003_C000 0x5003_FFFF 16 Kbytes    SPBA registers
+ * 0x5004_0000 0x51FF_FFFF 32 Mbytes (minus 256 Kbytes)
+ * 0x5200_0000 0x53EF_FFFF 31 Mbytes    Reserved
+ * 0x53F0_0000 0x53F0_3FFF 16 Kbytes    AIPS B control registers
+ * 0x53F0_4000 0x53F7_FFFF 496 Kbytes   Reserved
+ * 0x53F8_0000 0x53F8_3FFF 16 Kbytes    CCM
+ * 0x53F8_4000 0x53F8_7FFF 16 Kbytes    GPT-4
+ * 0x53F8_8000 0x53F8_BFFF 16 Kbytes    GPT-3
+ * 0x53F8_C000 0x53F8_FFFF 16 Kbytes    GPT-2
+ * 0x53F9_0000 0x53F9_3FFF 16 Kbytes    GPT-1
+ * 0x53F9_4000 0x53F9_7FFF 16 Kbytes    EPIT-1
+ * 0x53F9_8000 0x53F9_BFFF 16 Kbytes    EPIT-2
+ * 0x53F9_C000 0x53F9_FFFF 16 Kbytes    GPIO-4
+ * 0x53FA_0000 0x53FA_3FFF 16 Kbytes    PWM-2
+ * 0x53FA_4000 0x53FA_7FFF 16 Kbytes    GPIO-3
+ * 0x53FA_8000 0x53FA_BFFF 16 Kbytes    PWM-3
+ * 0x53FA_C000 0x53FA_FFFF 16 Kbytes    SCC
+ * 0x53FB_0000 0x53FB_3FFF 16 Kbytes    RNGB
+ * 0x53FB_4000 0x53FB_7FFF 16 Kbytes    eSDHC-1
+ * 0x53FB_8000 0x53FB_BFFF 16 Kbytes    eSDHC-2
+ * 0x53FB_C000 0x53FB_FFFF 16 Kbytes    LCDC
+ * 0x53FC_0000 0x53FC_3FFF 16 Kbytes    SLCDC
+ * 0x53FC_4000 0x53FC_7FFF 16 Kbytes    Reserved
+ * 0x53FC_8000 0x53FC_BFFF 16 Kbytes    PWM-4
+ * 0x53FC_C000 0x53FC_FFFF 16 Kbytes    GPIO-1
+ * 0x53FD_0000 0x53FD_3FFF 16 Kbytes    GPIO-2
+ * 0x53FD_4000 0x53FD_7FFF 16 Kbytes    SDMA
+ * 0x53FD_8000 0x53FD_BFFF 16 Kbytes    Reserved
+ * 0x53FD_C000 0x53FD_FFFF 16 Kbytes    WDOG
+ * 0x53FE_0000 0x53FE_3FFF 16 Kbytes    PWM-1
+ * 0x53FE_4000 0x53FE_7FFF 16 Kbytes    Reserved
+ * 0x53FE_8000 0x53FE_BFFF 16 Kbytes    Reserved
+ * 0x53FE_C000 0x53FE_FFFF 16 Kbytes    RTICv3
+ * 0x53FF_0000 0x53FF_3FFF 16 Kbytes    IIM
+ * 0x53FF_4000 0x53FF_7FFF 16 Kbytes    USB
+ * 0x53FF_8000 0x53FF_BFFF 16 Kbytes    CSI
+ * 0x53FF_C000 0x53FF_FFFF 16 Kbytes    DryIce
+ * 0x5400_0000 0x5FFF_FFFF 192 Mbytes   Reserved (aliased AIPS B slots)
+ * 0x6000_0000 0x67FF_FFFF 128 Mbytes   ARM926 platform ROMPATCH
+ * 0x6800_0000 0x6FFF_FFFF 128 Mbytes   ARM926 platform ASIC
+ * 0x7000_0000 0x77FF_FFFF 128 Mbytes   Reserved
+ * 0x7800_0000 0x7801_FFFF 128 Kbytes   RAM
+ * 0x7802_0000 0x7FFF_FFFF 128 Mbytes (minus 128 Kbytes)
+ * 0x8000_0000 0x8FFF_FFFF 256 Mbytes   SDRAM bank 0
+ * 0x9000_0000 0x9FFF_FFFF 256 Mbytes   SDRAM bank 1
+ * 0xA000_0000 0xA7FF_FFFF 128 Mbytes   WEIM CS0 (flash 128) 1
+ * 0xA800_0000 0xAFFF_FFFF 128 Mbytes   WEIM CS1 (flash 64) 1
+ * 0xB000_0000 0xB1FF_FFFF 32 Mbytes    WEIM CS2 (SRAM)
+ * 0xB200_0000 0xB3FF_FFFF 32 Mbytes    WEIM CS3 (SRAM)
+ * 0xB400_0000 0xB5FF_FFFF 32 Mbytes    WEIM CS4
+ * 0xB600_0000 0xB7FF_FFFF 32 Mbytes    Reserved
+ * 0xB800_0000 0xB800_0FFF 4 Kbytes     Reserved
+ * 0xB800_1000 0xB800_1FFF 4 Kbytes     SDRAM control registers
+ * 0xB800_2000 0xB800_2FFF 4 Kbytes     WEIM control registers
+ * 0xB800_3000 0xB800_3FFF 4 Kbytes     M3IF control registers
+ * 0xB800_4000 0xB800_4FFF 4 Kbytes     EMI control registers
+ * 0xB800_5000 0xBAFF_FFFF 32 Mbytes (minus 20 Kbytes)
+ * 0xBB00_0000 0xBB00_0FFF 4 Kbytes     NAND flash main area buffer
+ * 0xBB00_1000 0xBB00_11FF 512 B        NAND flash spare area buffer
+ * 0xBB00_1200 0xBB00_1DFF 3 Kbytes     Reserved
+ * 0xBB00_1E00 0xBB00_1FFF 512 B        NAND flash control registers
+ * 0xBB01_2000 0xBFFF_FFFF 96 Mbytes (minus 8 Kbytes) Reserved
+ * 0xC000_0000 0xFFFF_FFFF 1024 Mbytes  Reserved
+ */
+
+#define FSL_IMX25_ROM0_ADDR     0x00000000
+#define FSL_IMX25_ROM0_SIZE     0x4000
+#define FSL_IMX25_ROM1_ADDR     0x00404000
+#define FSL_IMX25_ROM1_SIZE     0x4000
+#define FSL_IMX25_I2C1_ADDR     0x43F80000
+#define FSL_IMX25_I2C1_SIZE     0x4000
+#define FSL_IMX25_I2C3_ADDR     0x43F84000
+#define FSL_IMX25_I2C3_SIZE     0x4000
+#define FSL_IMX25_UART1_ADDR    0x43F90000
+#define FSL_IMX25_UART1_SIZE    0x4000
+#define FSL_IMX25_UART2_ADDR    0x43F94000
+#define FSL_IMX25_UART2_SIZE    0x4000
+#define FSL_IMX25_I2C2_ADDR     0x43F98000
+#define FSL_IMX25_I2C2_SIZE     0x4000
+#define FSL_IMX25_UART4_ADDR    0x50008000
+#define FSL_IMX25_UART4_SIZE    0x4000
+#define FSL_IMX25_UART3_ADDR    0x5000C000
+#define FSL_IMX25_UART3_SIZE    0x4000
+#define FSL_IMX25_UART5_ADDR    0x5002C000
+#define FSL_IMX25_UART5_SIZE    0x4000
+#define FSL_IMX25_FEC_ADDR      0x50038000
+#define FSL_IMX25_CCM_ADDR      0x53F80000
+#define FSL_IMX25_CCM_SIZE      0x4000
+#define FSL_IMX25_GPT4_ADDR     0x53F84000
+#define FSL_IMX25_GPT4_SIZE     0x4000
+#define FSL_IMX25_GPT3_ADDR     0x53F88000
+#define FSL_IMX25_GPT3_SIZE     0x4000
+#define FSL_IMX25_GPT2_ADDR     0x53F8C000
+#define FSL_IMX25_GPT2_SIZE     0x4000
+#define FSL_IMX25_GPT1_ADDR     0x53F90000
+#define FSL_IMX25_GPT1_SIZE     0x4000
+#define FSL_IMX25_EPIT1_ADDR    0x53F94000
+#define FSL_IMX25_EPIT1_SIZE    0x4000
+#define FSL_IMX25_EPIT2_ADDR    0x53F98000
+#define FSL_IMX25_EPIT2_SIZE    0x4000
+#define FSL_IMX25_GPIO4_ADDR    0x53F9C000
+#define FSL_IMX25_GPIO4_SIZE    0x4000
+#define FSL_IMX25_GPIO3_ADDR    0x53FA4000
+#define FSL_IMX25_GPIO3_SIZE    0x4000
+#define FSL_IMX25_RNGC_ADDR     0x53FB0000
+#define FSL_IMX25_RNGC_SIZE     0x4000
+#define FSL_IMX25_ESDHC1_ADDR   0x53FB4000
+#define FSL_IMX25_ESDHC1_SIZE   0x4000
+#define FSL_IMX25_ESDHC2_ADDR   0x53FB8000
+#define FSL_IMX25_ESDHC2_SIZE   0x4000
+#define FSL_IMX25_GPIO1_ADDR    0x53FCC000
+#define FSL_IMX25_GPIO1_SIZE    0x4000
+#define FSL_IMX25_GPIO2_ADDR    0x53FD0000
+#define FSL_IMX25_GPIO2_SIZE    0x4000
+#define FSL_IMX25_WDT_ADDR      0x53FDC000
+#define FSL_IMX25_WDT_SIZE      0x4000
+#define FSL_IMX25_USB1_ADDR     0x53FF4000
+#define FSL_IMX25_USB1_SIZE     0x0200
+#define FSL_IMX25_USB2_ADDR     0x53FF4400
+#define FSL_IMX25_USB2_SIZE     0x0200
+#define FSL_IMX25_AVIC_ADDR     0x68000000
+#define FSL_IMX25_AVIC_SIZE     0x4000
+#define FSL_IMX25_IRAM_ADDR     0x78000000
+#define FSL_IMX25_IRAM_SIZE     0x20000
+#define FSL_IMX25_IRAM_ALIAS_ADDR     0x78020000
+#define FSL_IMX25_IRAM_ALIAS_SIZE     0x7FE0000
+#define FSL_IMX25_SDRAM0_ADDR   0x80000000
+#define FSL_IMX25_SDRAM0_SIZE   0x10000000
+#define FSL_IMX25_SDRAM1_ADDR   0x90000000
+#define FSL_IMX25_SDRAM1_SIZE   0x10000000
+
+#define FSL_IMX25_UART1_IRQ     45
+#define FSL_IMX25_UART2_IRQ     32
+#define FSL_IMX25_UART3_IRQ     18
+#define FSL_IMX25_UART4_IRQ     5
+#define FSL_IMX25_UART5_IRQ     40
+#define FSL_IMX25_GPT1_IRQ      54
+#define FSL_IMX25_GPT2_IRQ      53
+#define FSL_IMX25_GPT3_IRQ      29
+#define FSL_IMX25_GPT4_IRQ      1
+#define FSL_IMX25_EPIT1_IRQ     28
+#define FSL_IMX25_EPIT2_IRQ     27
+#define FSL_IMX25_FEC_IRQ       57
+#define FSL_IMX25_RNGC_IRQ      22
+#define FSL_IMX25_I2C1_IRQ      3
+#define FSL_IMX25_I2C2_IRQ      4
+#define FSL_IMX25_I2C3_IRQ      10
+#define FSL_IMX25_GPIO1_IRQ     52
+#define FSL_IMX25_GPIO2_IRQ     51
+#define FSL_IMX25_GPIO3_IRQ     16
+#define FSL_IMX25_GPIO4_IRQ     23
+#define FSL_IMX25_ESDHC1_IRQ    9
+#define FSL_IMX25_ESDHC2_IRQ    8
+#define FSL_IMX25_USB1_IRQ      37
+#define FSL_IMX25_USB2_IRQ      35
+#define FSL_IMX25_WDT_IRQ       55
+
+#endif /* FSL_IMX25_H */
diff --git a/include/hw/arm/fsl-imx31.h b/include/hw/arm/fsl-imx31.h
new file mode 100644 (file)
index 0000000..40c593a
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * Freescale i.MX31 SoC emulation
+ *
+ * Copyright (C) 2015 Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef FSL_IMX31_H
+#define FSL_IMX31_H
+
+#include "hw/intc/imx_avic.h"
+#include "hw/misc/imx31_ccm.h"
+#include "hw/char/imx_serial.h"
+#include "hw/timer/imx_gpt.h"
+#include "hw/timer/imx_epit.h"
+#include "hw/i2c/imx_i2c.h"
+#include "hw/gpio/imx_gpio.h"
+#include "hw/watchdog/wdt_imx2.h"
+#include "exec/memory.h"
+#include "target/arm/cpu.h"
+#include "qom/object.h"
+
+#define TYPE_FSL_IMX31 "fsl-imx31"
+OBJECT_DECLARE_SIMPLE_TYPE(FslIMX31State, FSL_IMX31)
+
+#define FSL_IMX31_NUM_UARTS 2
+#define FSL_IMX31_NUM_EPITS 2
+#define FSL_IMX31_NUM_I2CS 3
+#define FSL_IMX31_NUM_GPIOS 3
+
+struct FslIMX31State {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    ARMCPU         cpu;
+    IMXAVICState   avic;
+    IMX31CCMState  ccm;
+    IMXSerialState uart[FSL_IMX31_NUM_UARTS];
+    IMXGPTState    gpt;
+    IMXEPITState   epit[FSL_IMX31_NUM_EPITS];
+    IMXI2CState    i2c[FSL_IMX31_NUM_I2CS];
+    IMXGPIOState   gpio[FSL_IMX31_NUM_GPIOS];
+    IMX2WdtState   wdt;
+    MemoryRegion   secure_rom;
+    MemoryRegion   rom;
+    MemoryRegion   iram;
+    MemoryRegion   iram_alias;
+};
+
+#define FSL_IMX31_SECURE_ROM_ADDR       0x00000000
+#define FSL_IMX31_SECURE_ROM_SIZE       0x4000
+#define FSL_IMX31_ROM_ADDR              0x00404000
+#define FSL_IMX31_ROM_SIZE              0x4000
+#define FSL_IMX31_IRAM_ALIAS_ADDR       0x10000000
+#define FSL_IMX31_IRAM_ALIAS_SIZE       0xFFC0000
+#define FSL_IMX31_IRAM_ADDR             0x1FFFC000
+#define FSL_IMX31_IRAM_SIZE             0x4000
+#define FSL_IMX31_I2C1_ADDR             0x43F80000
+#define FSL_IMX31_I2C1_SIZE             0x4000
+#define FSL_IMX31_I2C3_ADDR             0x43F84000
+#define FSL_IMX31_I2C3_SIZE             0x4000
+#define FSL_IMX31_UART1_ADDR            0x43F90000
+#define FSL_IMX31_UART1_SIZE            0x4000
+#define FSL_IMX31_UART2_ADDR            0x43F94000
+#define FSL_IMX31_UART2_SIZE            0x4000
+#define FSL_IMX31_I2C2_ADDR             0x43F98000
+#define FSL_IMX31_I2C2_SIZE             0x4000
+#define FSL_IMX31_CCM_ADDR              0x53F80000
+#define FSL_IMX31_CCM_SIZE              0x4000
+#define FSL_IMX31_GPT_ADDR              0x53F90000
+#define FSL_IMX31_GPT_SIZE              0x4000
+#define FSL_IMX31_EPIT1_ADDR            0x53F94000
+#define FSL_IMX31_EPIT1_SIZE            0x4000
+#define FSL_IMX31_EPIT2_ADDR            0x53F98000
+#define FSL_IMX31_EPIT2_SIZE            0x4000
+#define FSL_IMX31_GPIO3_ADDR            0x53FA4000
+#define FSL_IMX31_GPIO3_SIZE            0x4000
+#define FSL_IMX31_GPIO1_ADDR            0x53FCC000
+#define FSL_IMX31_GPIO1_SIZE            0x4000
+#define FSL_IMX31_GPIO2_ADDR            0x53FD0000
+#define FSL_IMX31_GPIO2_SIZE            0x4000
+#define FSL_IMX31_WDT_ADDR              0x53FDC000
+#define FSL_IMX31_WDT_SIZE              0x4000
+#define FSL_IMX31_AVIC_ADDR             0x68000000
+#define FSL_IMX31_AVIC_SIZE             0x100
+#define FSL_IMX31_SDRAM0_ADDR           0x80000000
+#define FSL_IMX31_SDRAM0_SIZE           0x10000000
+#define FSL_IMX31_SDRAM1_ADDR           0x90000000
+#define FSL_IMX31_SDRAM1_SIZE           0x10000000
+#define FSL_IMX31_FLASH0_ADDR           0xA0000000
+#define FSL_IMX31_FLASH0_SIZE           0x8000000
+#define FSL_IMX31_FLASH1_ADDR           0xA8000000
+#define FSL_IMX31_FLASH1_SIZE           0x8000000
+#define FSL_IMX31_CS2_ADDR              0xB0000000
+#define FSL_IMX31_CS2_SIZE              0x2000000
+#define FSL_IMX31_CS3_ADDR              0xB2000000
+#define FSL_IMX31_CS3_SIZE              0x2000000
+#define FSL_IMX31_CS4_ADDR              0xB4000000
+#define FSL_IMX31_CS4_SIZE              0x2000000
+#define FSL_IMX31_CS5_ADDR              0xB6000000
+#define FSL_IMX31_CS5_SIZE              0x2000000
+#define FSL_IMX31_NAND_ADDR             0xB8000000
+#define FSL_IMX31_NAND_SIZE             0x1000
+
+#define FSL_IMX31_EPIT2_IRQ             27
+#define FSL_IMX31_EPIT1_IRQ             28
+#define FSL_IMX31_GPT_IRQ               29
+#define FSL_IMX31_UART2_IRQ             32
+#define FSL_IMX31_UART1_IRQ             45
+#define FSL_IMX31_I2C1_IRQ              10
+#define FSL_IMX31_I2C2_IRQ              4
+#define FSL_IMX31_I2C3_IRQ              3
+#define FSL_IMX31_GPIO1_IRQ             52
+#define FSL_IMX31_GPIO2_IRQ             51
+#define FSL_IMX31_GPIO3_IRQ             56
+
+#endif /* FSL_IMX31_H */
diff --git a/include/hw/arm/fsl-imx6.h b/include/hw/arm/fsl-imx6.h
new file mode 100644 (file)
index 0000000..519b871
--- /dev/null
@@ -0,0 +1,465 @@
+/*
+ * Freescale i.MX31 SoC emulation
+ *
+ * Copyright (C) 2015 Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef FSL_IMX6_H
+#define FSL_IMX6_H
+
+#include "hw/cpu/a9mpcore.h"
+#include "hw/misc/imx6_ccm.h"
+#include "hw/misc/imx6_src.h"
+#include "hw/misc/imx7_snvs.h"
+#include "hw/watchdog/wdt_imx2.h"
+#include "hw/char/imx_serial.h"
+#include "hw/timer/imx_gpt.h"
+#include "hw/timer/imx_epit.h"
+#include "hw/i2c/imx_i2c.h"
+#include "hw/gpio/imx_gpio.h"
+#include "hw/sd/sdhci.h"
+#include "hw/ssi/imx_spi.h"
+#include "hw/net/imx_fec.h"
+#include "hw/usb/chipidea.h"
+#include "hw/usb/imx-usb-phy.h"
+#include "exec/memory.h"
+#include "cpu.h"
+#include "qom/object.h"
+
+#define TYPE_FSL_IMX6 "fsl-imx6"
+OBJECT_DECLARE_SIMPLE_TYPE(FslIMX6State, FSL_IMX6)
+
+#define FSL_IMX6_NUM_CPUS 4
+#define FSL_IMX6_NUM_UARTS 5
+#define FSL_IMX6_NUM_EPITS 2
+#define FSL_IMX6_NUM_I2CS 3
+#define FSL_IMX6_NUM_GPIOS 7
+#define FSL_IMX6_NUM_ESDHCS 4
+#define FSL_IMX6_NUM_ECSPIS 5
+#define FSL_IMX6_NUM_WDTS 2
+#define FSL_IMX6_NUM_USB_PHYS 2
+#define FSL_IMX6_NUM_USBS 4
+
+struct FslIMX6State {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    ARMCPU         cpu[FSL_IMX6_NUM_CPUS];
+    A9MPPrivState  a9mpcore;
+    IMX6CCMState   ccm;
+    IMX6SRCState   src;
+    IMX7SNVSState  snvs;
+    IMXSerialState uart[FSL_IMX6_NUM_UARTS];
+    IMXGPTState    gpt;
+    IMXEPITState   epit[FSL_IMX6_NUM_EPITS];
+    IMXI2CState    i2c[FSL_IMX6_NUM_I2CS];
+    IMXGPIOState   gpio[FSL_IMX6_NUM_GPIOS];
+    SDHCIState     esdhc[FSL_IMX6_NUM_ESDHCS];
+    IMXSPIState    spi[FSL_IMX6_NUM_ECSPIS];
+    IMX2WdtState   wdt[FSL_IMX6_NUM_WDTS];
+    IMXUSBPHYState usbphy[FSL_IMX6_NUM_USB_PHYS];
+    ChipideaState  usb[FSL_IMX6_NUM_USBS];
+    IMXFECState    eth;
+    MemoryRegion   rom;
+    MemoryRegion   caam;
+    MemoryRegion   ocram;
+    MemoryRegion   ocram_alias;
+    uint32_t       phy_num;
+};
+
+
+#define FSL_IMX6_MMDC_ADDR 0x10000000
+#define FSL_IMX6_MMDC_SIZE 0xF0000000
+#define FSL_IMX6_EIM_MEM_ADDR 0x08000000
+#define FSL_IMX6_EIM_MEM_SIZE 0x8000000
+#define FSL_IMX6_IPU_2_ADDR 0x02800000
+#define FSL_IMX6_IPU_2_SIZE 0x400000
+#define FSL_IMX6_IPU_1_ADDR 0x02400000
+#define FSL_IMX6_IPU_1_SIZE 0x400000
+#define FSL_IMX6_MIPI_HSI_ADDR 0x02208000
+#define FSL_IMX6_MIPI_HSI_SIZE 0x4000
+#define FSL_IMX6_OPENVG_ADDR 0x02204000
+#define FSL_IMX6_OPENVG_SIZE 0x4000
+#define FSL_IMX6_SATA_ADDR 0x02200000
+#define FSL_IMX6_SATA_SIZE 0x4000
+#define FSL_IMX6_AIPS_2_ADDR 0x02100000
+#define FSL_IMX6_AIPS_2_SIZE 0x100000
+/* AIPS2 */
+#define FSL_IMX6_UART5_ADDR 0x021F4000
+#define FSL_IMX6_UART5_SIZE 0x4000
+#define FSL_IMX6_UART4_ADDR 0x021F0000
+#define FSL_IMX6_UART4_SIZE 0x4000
+#define FSL_IMX6_UART3_ADDR 0x021EC000
+#define FSL_IMX6_UART3_SIZE 0x4000
+#define FSL_IMX6_UART2_ADDR 0x021E8000
+#define FSL_IMX6_UART2_SIZE 0x4000
+#define FSL_IMX6_VDOA_ADDR 0x021E4000
+#define FSL_IMX6_VDOA_SIZE 0x4000
+#define FSL_IMX6_MIPI_DSI_ADDR 0x021E0000
+#define FSL_IMX6_MIPI_DSI_SIZE 0x4000
+#define FSL_IMX6_MIPI_CSI_ADDR 0x021DC000
+#define FSL_IMX6_MIPI_CSI_SIZE 0x4000
+#define FSL_IMX6_AUDMUX_ADDR 0x021D8000
+#define FSL_IMX6_AUDMUX_SIZE 0x4000
+#define FSL_IMX6_TZASC2_ADDR 0x021D4000
+#define FSL_IMX6_TZASC2_SIZE 0x4000
+#define FSL_IMX6_TZASC1_ADDR 0x021D0000
+#define FSL_IMX6_TZASC1_SIZE 0x4000
+#define FSL_IMX6_CSU_ADDR 0x021C0000
+#define FSL_IMX6_CSU_SIZE 0x4000
+#define FSL_IMX6_OCOTPCTRL_ADDR 0x021BC000
+#define FSL_IMX6_OCOTPCTRL_SIZE 0x4000
+#define FSL_IMX6_EIM_ADDR 0x021B8000
+#define FSL_IMX6_EIM_SIZE 0x4000
+#define FSL_IMX6_MMDC1_ADDR 0x021B4000
+#define FSL_IMX6_MMDC1_SIZE 0x4000
+#define FSL_IMX6_MMDC0_ADDR 0x021B0000
+#define FSL_IMX6_MMDC0_SIZE 0x4000
+#define FSL_IMX6_ROMCP_ADDR 0x021AC000
+#define FSL_IMX6_ROMCP_SIZE 0x4000
+#define FSL_IMX6_I2C3_ADDR 0x021A8000
+#define FSL_IMX6_I2C3_SIZE 0x4000
+#define FSL_IMX6_I2C2_ADDR 0x021A4000
+#define FSL_IMX6_I2C2_SIZE 0x4000
+#define FSL_IMX6_I2C1_ADDR 0x021A0000
+#define FSL_IMX6_I2C1_SIZE 0x4000
+#define FSL_IMX6_uSDHC4_ADDR 0x0219C000
+#define FSL_IMX6_uSDHC4_SIZE 0x4000
+#define FSL_IMX6_uSDHC3_ADDR 0x02198000
+#define FSL_IMX6_uSDHC3_SIZE 0x4000
+#define FSL_IMX6_uSDHC2_ADDR 0x02194000
+#define FSL_IMX6_uSDHC2_SIZE 0x4000
+#define FSL_IMX6_uSDHC1_ADDR 0x02190000
+#define FSL_IMX6_uSDHC1_SIZE 0x4000
+#define FSL_IMX6_MLB150_ADDR 0x0218C000
+#define FSL_IMX6_MLB150_SIZE 0x4000
+#define FSL_IMX6_ENET_ADDR 0x02188000
+#define FSL_IMX6_ENET_SIZE 0x4000
+#define FSL_IMX6_USBOH3_USB_ADDR 0x02184000
+#define FSL_IMX6_USBOH3_USB_SIZE 0x4000
+#define FSL_IMX6_AIPS2_CFG_ADDR 0x0217C000
+#define FSL_IMX6_AIPS2_CFG_SIZE 0x4000
+/* DAP */
+#define FSL_IMX6_PTF_CTRL_ADDR 0x02160000
+#define FSL_IMX6_PTF_CTRL_SIZE 0x1000
+#define FSL_IMX6_PTM3_ADDR 0x0215F000
+#define FSL_IMX6_PTM3_SIZE 0x1000
+#define FSL_IMX6_PTM2_ADDR 0x0215E000
+#define FSL_IMX6_PTM2_SIZE 0x1000
+#define FSL_IMX6_PTM1_ADDR 0x0215D000
+#define FSL_IMX6_PTM1_SIZE 0x1000
+#define FSL_IMX6_PTM0_ADDR 0x0215C000
+#define FSL_IMX6_PTM0_SIZE 0x1000
+#define FSL_IMX6_CTI3_ADDR 0x0215B000
+#define FSL_IMX6_CTI3_SIZE 0x1000
+#define FSL_IMX6_CTI2_ADDR 0x0215A000
+#define FSL_IMX6_CTI2_SIZE 0x1000
+#define FSL_IMX6_CTI1_ADDR 0x02159000
+#define FSL_IMX6_CTI1_SIZE 0x1000
+#define FSL_IMX6_CTI0_ADDR 0x02158000
+#define FSL_IMX6_CTI0_SIZE 0x1000
+#define FSL_IMX6_CPU3_PMU_ADDR 0x02157000
+#define FSL_IMX6_CPU3_PMU_SIZE 0x1000
+#define FSL_IMX6_CPU3_DEBUG_IF_ADDR 0x02156000
+#define FSL_IMX6_CPU3_DEBUG_IF_SIZE 0x1000
+#define FSL_IMX6_CPU2_PMU_ADDR 0x02155000
+#define FSL_IMX6_CPU2_PMU_SIZE 0x1000
+#define FSL_IMX6_CPU2_DEBUG_IF_ADDR 0x02154000
+#define FSL_IMX6_CPU2_DEBUG_IF_SIZE 0x1000
+#define FSL_IMX6_CPU1_PMU_ADDR 0x02153000
+#define FSL_IMX6_CPU1_PMU_SIZE 0x1000
+#define FSL_IMX6_CPU1_DEBUG_IF_ADDR 0x02152000
+#define FSL_IMX6_CPU1_DEBUG_IF_SIZE 0x1000
+#define FSL_IMX6_CPU0_PMU_ADDR 0x02151000
+#define FSL_IMX6_CPU0_PMU_SIZE 0x1000
+#define FSL_IMX6_CPU0_DEBUG_IF_ADDR 0x02150000
+#define FSL_IMX6_CPU0_DEBUG_IF_SIZE 0x1000
+#define FSL_IMX6_CA9_INTEG_ADDR 0x0214F000
+#define FSL_IMX6_CA9_INTEG_SIZE 0x1000
+#define FSL_IMX6_FUNNEL_ADDR 0x02144000
+#define FSL_IMX6_FUNNEL_SIZE 0x1000
+#define FSL_IMX6_TPIU_ADDR 0x02143000
+#define FSL_IMX6_TPIU_SIZE 0x1000
+#define FSL_IMX6_EXT_CTI_ADDR 0x02142000
+#define FSL_IMX6_EXT_CTI_SIZE 0x1000
+#define FSL_IMX6_ETB_ADDR 0x02141000
+#define FSL_IMX6_ETB_SIZE 0x1000
+#define FSL_IMX6_DAP_ROM_TABLE_ADDR 0x02140000
+#define FSL_IMX6_DAP_ROM_TABLE_SIZE 0x1000
+/* DAP end */
+#define FSL_IMX6_CAAM_ADDR 0x02100000
+#define FSL_IMX6_CAAM_SIZE 0x10000
+/* AIPS2 end */
+#define FSL_IMX6_AIPS_1_ADDR 0x02000000
+#define FSL_IMX6_AIPS_1_SIZE 0x100000
+/* AIPS1 */
+#define FSL_IMX6_SDMA_ADDR 0x020EC000
+#define FSL_IMX6_SDMA_SIZE 0x4000
+#define FSL_IMX6_DCIC2_ADDR 0x020E8000
+#define FSL_IMX6_DCIC2_SIZE 0x4000
+#define FSL_IMX6_DCIC1_ADDR 0x020E4000
+#define FSL_IMX6_DCIC1_SIZE 0x4000
+#define FSL_IMX6_IOMUXC_ADDR 0x020E0000
+#define FSL_IMX6_IOMUXC_SIZE 0x4000
+#define FSL_IMX6_PGCARM_ADDR 0x020DCA00
+#define FSL_IMX6_PGCARM_SIZE 0x20
+#define FSL_IMX6_PGCPU_ADDR 0x020DC260
+#define FSL_IMX6_PGCPU_SIZE 0x20
+#define FSL_IMX6_GPC_ADDR 0x020DC000
+#define FSL_IMX6_GPC_SIZE 0x4000
+#define FSL_IMX6_SRC_ADDR 0x020D8000
+#define FSL_IMX6_SRC_SIZE 0x4000
+#define FSL_IMX6_EPIT2_ADDR 0x020D4000
+#define FSL_IMX6_EPIT2_SIZE 0x4000
+#define FSL_IMX6_EPIT1_ADDR 0x020D0000
+#define FSL_IMX6_EPIT1_SIZE 0x4000
+#define FSL_IMX6_SNVSHP_ADDR 0x020CC000
+#define FSL_IMX6_SNVSHP_SIZE 0x4000
+#define FSL_IMX6_USBPHY2_ADDR 0x020CA000
+#define FSL_IMX6_USBPHY2_SIZE 0x1000
+#define FSL_IMX6_USBPHY1_ADDR 0x020C9000
+#define FSL_IMX6_USBPHY1_SIZE 0x1000
+#define FSL_IMX6_ANALOG_ADDR 0x020C8000
+#define FSL_IMX6_ANALOG_SIZE 0x1000
+#define FSL_IMX6_CCM_ADDR 0x020C4000
+#define FSL_IMX6_CCM_SIZE 0x4000
+#define FSL_IMX6_WDOG2_ADDR 0x020C0000
+#define FSL_IMX6_WDOG2_SIZE 0x4000
+#define FSL_IMX6_WDOG1_ADDR 0x020BC000
+#define FSL_IMX6_WDOG1_SIZE 0x4000
+#define FSL_IMX6_KPP_ADDR 0x020B8000
+#define FSL_IMX6_KPP_SIZE 0x4000
+#define FSL_IMX6_GPIO7_ADDR 0x020B4000
+#define FSL_IMX6_GPIO7_SIZE 0x4000
+#define FSL_IMX6_GPIO6_ADDR 0x020B0000
+#define FSL_IMX6_GPIO6_SIZE 0x4000
+#define FSL_IMX6_GPIO5_ADDR 0x020AC000
+#define FSL_IMX6_GPIO5_SIZE 0x4000
+#define FSL_IMX6_GPIO4_ADDR 0x020A8000
+#define FSL_IMX6_GPIO4_SIZE 0x4000
+#define FSL_IMX6_GPIO3_ADDR 0x020A4000
+#define FSL_IMX6_GPIO3_SIZE 0x4000
+#define FSL_IMX6_GPIO2_ADDR 0x020A0000
+#define FSL_IMX6_GPIO2_SIZE 0x4000
+#define FSL_IMX6_GPIO1_ADDR 0x0209C000
+#define FSL_IMX6_GPIO1_SIZE 0x4000
+#define FSL_IMX6_GPT_ADDR 0x02098000
+#define FSL_IMX6_GPT_SIZE 0x4000
+#define FSL_IMX6_CAN2_ADDR 0x02094000
+#define FSL_IMX6_CAN2_SIZE 0x4000
+#define FSL_IMX6_CAN1_ADDR 0x02090000
+#define FSL_IMX6_CAN1_SIZE 0x4000
+#define FSL_IMX6_PWM4_ADDR 0x0208C000
+#define FSL_IMX6_PWM4_SIZE 0x4000
+#define FSL_IMX6_PWM3_ADDR 0x02088000
+#define FSL_IMX6_PWM3_SIZE 0x4000
+#define FSL_IMX6_PWM2_ADDR 0x02084000
+#define FSL_IMX6_PWM2_SIZE 0x4000
+#define FSL_IMX6_PWM1_ADDR 0x02080000
+#define FSL_IMX6_PWM1_SIZE 0x4000
+#define FSL_IMX6_AIPS1_CFG_ADDR 0x0207C000
+#define FSL_IMX6_AIPS1_CFG_SIZE 0x4000
+#define FSL_IMX6_VPU_ADDR 0x02040000
+#define FSL_IMX6_VPU_SIZE 0x3C000
+#define FSL_IMX6_AIPS1_SPBA_ADDR 0x0203C000
+#define FSL_IMX6_AIPS1_SPBA_SIZE 0x4000
+#define FSL_IMX6_ASRC_ADDR 0x02034000
+#define FSL_IMX6_ASRC_SIZE 0x4000
+#define FSL_IMX6_SSI3_ADDR 0x02030000
+#define FSL_IMX6_SSI3_SIZE 0x4000
+#define FSL_IMX6_SSI2_ADDR 0x0202C000
+#define FSL_IMX6_SSI2_SIZE 0x4000
+#define FSL_IMX6_SSI1_ADDR 0x02028000
+#define FSL_IMX6_SSI1_SIZE 0x4000
+#define FSL_IMX6_ESAI_ADDR 0x02024000
+#define FSL_IMX6_ESAI_SIZE 0x4000
+#define FSL_IMX6_UART1_ADDR 0x02020000
+#define FSL_IMX6_UART1_SIZE 0x4000
+#define FSL_IMX6_eCSPI5_ADDR 0x02018000
+#define FSL_IMX6_eCSPI5_SIZE 0x4000
+#define FSL_IMX6_eCSPI4_ADDR 0x02014000
+#define FSL_IMX6_eCSPI4_SIZE 0x4000
+#define FSL_IMX6_eCSPI3_ADDR 0x02010000
+#define FSL_IMX6_eCSPI3_SIZE 0x4000
+#define FSL_IMX6_eCSPI2_ADDR 0x0200C000
+#define FSL_IMX6_eCSPI2_SIZE 0x4000
+#define FSL_IMX6_eCSPI1_ADDR 0x02008000
+#define FSL_IMX6_eCSPI1_SIZE 0x4000
+#define FSL_IMX6_SPDIF_ADDR 0x02004000
+#define FSL_IMX6_SPDIF_SIZE 0x4000
+/* AIPS1 end */
+#define FSL_IMX6_PCIe_REG_ADDR 0x01FFC000
+#define FSL_IMX6_PCIe_REG_SIZE 0x4000
+#define FSL_IMX6_PCIe_ADDR 0x01000000
+#define FSL_IMX6_PCIe_SIZE 0xFFC000
+#define FSL_IMX6_GPV_1_PL301_CFG_ADDR 0x00C00000
+#define FSL_IMX6_GPV_1_PL301_CFG_SIZE 0x100000
+#define FSL_IMX6_GPV_0_PL301_CFG_ADDR 0x00B00000
+#define FSL_IMX6_GPV_0_PL301_CFG_SIZE 0x100000
+#define FSL_IMX6_PL310_ADDR 0x00A02000
+#define FSL_IMX6_PL310_SIZE 0x1000
+#define FSL_IMX6_A9MPCORE_ADDR 0x00A00000
+#define FSL_IMX6_A9MPCORE_SIZE 0x2000
+#define FSL_IMX6_OCRAM_ALIAS_ADDR 0x00940000
+#define FSL_IMX6_OCRAM_ALIAS_SIZE 0xC0000
+#define FSL_IMX6_OCRAM_ADDR 0x00900000
+#define FSL_IMX6_OCRAM_SIZE 0x40000
+#define FSL_IMX6_GPV_4_PL301_CFG_ADDR 0x00800000
+#define FSL_IMX6_GPV_4_PL301_CFG_SIZE 0x100000
+#define FSL_IMX6_GPV_3_PL301_CFG_ADDR 0x00300000
+#define FSL_IMX6_GPV_3_PL301_CFG_SIZE 0x100000
+#define FSL_IMX6_GPV_2_PL301_CFG_ADDR 0x00200000
+#define FSL_IMX6_GPV_2_PL301_CFG_SIZE 0x100000
+#define FSL_IMX6_DTCP_ADDR 0x00138000
+#define FSL_IMX6_DTCP_SIZE 0x4000
+#define FSL_IMX6_GPU_2D_ADDR 0x00134000
+#define FSL_IMX6_GPU_2D_SIZE 0x4000
+#define FSL_IMX6_GPU_3D_ADDR 0x00130000
+#define FSL_IMX6_GPU_3D_SIZE 0x4000
+#define FSL_IMX6_HDMI_ADDR 0x00120000
+#define FSL_IMX6_HDMI_SIZE 0x9000
+#define FSL_IMX6_BCH_ADDR 0x00114000
+#define FSL_IMX6_BCH_SIZE 0x4000
+#define FSL_IMX6_GPMI_ADDR 0x00112000
+#define FSL_IMX6_GPMI_SIZE 0x2000
+#define FSL_IMX6_APBH_BRIDGE_DMA_ADDR 0x00110000
+#define FSL_IMX6_APBH_BRIDGE_DMA_SIZE 0x2000
+#define FSL_IMX6_CAAM_MEM_ADDR 0x00100000
+#define FSL_IMX6_CAAM_MEM_SIZE 0x4000
+#define FSL_IMX6_ROM_ADDR 0x00000000
+#define FSL_IMX6_ROM_SIZE 0x18000
+
+#define FSL_IMX6_IOMUXC_IRQ 0
+#define FSL_IMX6_DAP_IRQ 1
+#define FSL_IMX6_SDMA_IRQ 2
+#define FSL_IMX6_VPU_JPEG_IRQ 3
+#define FSL_IMX6_SNVS_PMIC_IRQ 4
+#define FSL_IMX6_IPU1_ERROR_IRQ 5
+#define FSL_IMX6_IPU1_SYNC_IRQ 6
+#define FSL_IMX6_IPU2_ERROR_IRQ 7
+#define FSL_IMX6_IPU2_SYNC_IRQ 8
+#define FSL_IMX6_GPU3D_IRQ 9
+#define FSL_IMX6_R2D_IRQ 10
+#define FSL_IMX6_V2D_IRQ 11
+#define FSL_IMX6_VPU_IRQ 12
+#define FSL_IMX6_APBH_BRIDGE_DMA_IRQ 13
+#define FSL_IMX6_EIM_IRQ 14
+#define FSL_IMX6_BCH_IRQ 15
+#define FSL_IMX6_GPMI_IRQ 16
+#define FSL_IMX6_DTCP_IRQ 17
+#define FSL_IMX6_VDOA_IRQ 18
+#define FSL_IMX6_SNVS_CONS_IRQ 19
+#define FSL_IMX6_SNVS_SEC_IRQ 20
+#define FSL_IMX6_CSU_IRQ 21
+#define FSL_IMX6_uSDHC1_IRQ 22
+#define FSL_IMX6_uSDHC2_IRQ 23
+#define FSL_IMX6_uSDHC3_IRQ 24
+#define FSL_IMX6_uSDHC4_IRQ 25
+#define FSL_IMX6_UART1_IRQ 26
+#define FSL_IMX6_UART2_IRQ 27
+#define FSL_IMX6_UART3_IRQ 28
+#define FSL_IMX6_UART4_IRQ 29
+#define FSL_IMX6_UART5_IRQ 30
+#define FSL_IMX6_ECSPI1_IRQ 31
+#define FSL_IMX6_ECSPI2_IRQ 32
+#define FSL_IMX6_ECSPI3_IRQ 33
+#define FSL_IMX6_ECSPI4_IRQ 34
+#define FSL_IMX6_ECSPI5_IRQ 35
+#define FSL_IMX6_I2C1_IRQ 36
+#define FSL_IMX6_I2C2_IRQ 37
+#define FSL_IMX6_I2C3_IRQ 38
+#define FSL_IMX6_SATA_IRQ 39
+#define FSL_IMX6_USB_HOST1_IRQ 40
+#define FSL_IMX6_USB_HOST2_IRQ 41
+#define FSL_IMX6_USB_HOST3_IRQ 42
+#define FSL_IMX6_USB_OTG_IRQ 43
+#define FSL_IMX6_USB_PHY_UTMI0_IRQ 44
+#define FSL_IMX6_USB_PHY_UTMI1_IRQ 45
+#define FSL_IMX6_SSI1_IRQ 46
+#define FSL_IMX6_SSI2_IRQ 47
+#define FSL_IMX6_SSI3_IRQ 48
+#define FSL_IMX6_TEMP_IRQ 49
+#define FSL_IMX6_ASRC_IRQ 50
+#define FSL_IMX6_ESAI_IRQ 51
+#define FSL_IMX6_SPDIF_IRQ 52
+#define FSL_IMX6_MLB150_IRQ 53
+#define FSL_IMX6_PMU1_IRQ 54
+#define FSL_IMX6_GPT_IRQ 55
+#define FSL_IMX6_EPIT1_IRQ 56
+#define FSL_IMX6_EPIT2_IRQ 57
+#define FSL_IMX6_GPIO1_INT7_IRQ 58
+#define FSL_IMX6_GPIO1_INT6_IRQ 59
+#define FSL_IMX6_GPIO1_INT5_IRQ 60
+#define FSL_IMX6_GPIO1_INT4_IRQ 61
+#define FSL_IMX6_GPIO1_INT3_IRQ 62
+#define FSL_IMX6_GPIO1_INT2_IRQ 63
+#define FSL_IMX6_GPIO1_INT1_IRQ 64
+#define FSL_IMX6_GPIO1_INT0_IRQ 65
+#define FSL_IMX6_GPIO1_LOW_IRQ 66
+#define FSL_IMX6_GPIO1_HIGH_IRQ 67
+#define FSL_IMX6_GPIO2_LOW_IRQ 68
+#define FSL_IMX6_GPIO2_HIGH_IRQ 69
+#define FSL_IMX6_GPIO3_LOW_IRQ 70
+#define FSL_IMX6_GPIO3_HIGH_IRQ 71
+#define FSL_IMX6_GPIO4_LOW_IRQ 72
+#define FSL_IMX6_GPIO4_HIGH_IRQ 73
+#define FSL_IMX6_GPIO5_LOW_IRQ 74
+#define FSL_IMX6_GPIO5_HIGH_IRQ 75
+#define FSL_IMX6_GPIO6_LOW_IRQ 76
+#define FSL_IMX6_GPIO6_HIGH_IRQ 77
+#define FSL_IMX6_GPIO7_LOW_IRQ 78
+#define FSL_IMX6_GPIO7_HIGH_IRQ 79
+#define FSL_IMX6_WDOG1_IRQ 80
+#define FSL_IMX6_WDOG2_IRQ 81
+#define FSL_IMX6_KPP_IRQ 82
+#define FSL_IMX6_PWM1_IRQ 83
+#define FSL_IMX6_PWM2_IRQ 84
+#define FSL_IMX6_PWM3_IRQ 85
+#define FSL_IMX6_PWM4_IRQ 86
+#define FSL_IMX6_CCM1_IRQ 87
+#define FSL_IMX6_CCM2_IRQ 88
+#define FSL_IMX6_GPC_IRQ 89
+#define FSL_IMX6_SRC_IRQ 91
+#define FSL_IMX6_CPU_L2_IRQ 92
+#define FSL_IMX6_CPU_PARITY_IRQ 93
+#define FSL_IMX6_CPU_PERF_IRQ 94
+#define FSL_IMX6_CPU_CTI_IRQ 95
+#define FSL_IMX6_SRC_COMB_IRQ 96
+#define FSL_IMX6_MIPI_CSI1_IRQ 100
+#define FSL_IMX6_MIPI_CSI2_IRQ 101
+#define FSL_IMX6_MIPI_DSI_IRQ 102
+#define FSL_IMX6_MIPI_HSI_IRQ 103
+#define FSL_IMX6_SJC_IRQ 104
+#define FSL_IMX6_CAAM0_IRQ 105
+#define FSL_IMX6_CAAM1_IRQ 106
+#define FSL_IMX6_ASC1_IRQ 108
+#define FSL_IMX6_ASC2_IRQ 109
+#define FSL_IMX6_FLEXCAN1_IRQ 110
+#define FSL_IMX6_FLEXCAN2_IRQ 111
+#define FSL_IMX6_HDMI_MASTER_IRQ 115
+#define FSL_IMX6_HDMI_CEC_IRQ 116
+#define FSL_IMX6_MLB150_LOW_IRQ 117
+#define FSL_IMX6_ENET_MAC_IRQ 118
+#define FSL_IMX6_ENET_MAC_1588_IRQ 119
+#define FSL_IMX6_PCIE1_IRQ 120
+#define FSL_IMX6_PCIE2_IRQ 121
+#define FSL_IMX6_PCIE3_IRQ 122
+#define FSL_IMX6_PCIE4_IRQ 123
+#define FSL_IMX6_DCIC1_IRQ 124
+#define FSL_IMX6_DCIC2_IRQ 125
+#define FSL_IMX6_MLB150_HIGH_IRQ 126
+#define FSL_IMX6_PMU2_IRQ 127
+#define FSL_IMX6_MAX_IRQ 128
+
+#endif /* FSL_IMX6_H */
diff --git a/include/hw/arm/fsl-imx6ul.h b/include/hw/arm/fsl-imx6ul.h
new file mode 100644 (file)
index 0000000..14390f6
--- /dev/null
@@ -0,0 +1,461 @@
+/*
+ * Copyright (c) 2018 Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * i.MX6ul SoC definitions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef FSL_IMX6UL_H
+#define FSL_IMX6UL_H
+
+#include "hw/cpu/a15mpcore.h"
+#include "hw/misc/imx6ul_ccm.h"
+#include "hw/misc/imx6_src.h"
+#include "hw/misc/imx7_snvs.h"
+#include "hw/intc/imx_gpcv2.h"
+#include "hw/watchdog/wdt_imx2.h"
+#include "hw/gpio/imx_gpio.h"
+#include "hw/char/imx_serial.h"
+#include "hw/timer/imx_gpt.h"
+#include "hw/timer/imx_epit.h"
+#include "hw/i2c/imx_i2c.h"
+#include "hw/sd/sdhci.h"
+#include "hw/ssi/imx_spi.h"
+#include "hw/net/imx_fec.h"
+#include "hw/usb/chipidea.h"
+#include "hw/usb/imx-usb-phy.h"
+#include "exec/memory.h"
+#include "cpu.h"
+#include "qom/object.h"
+#include "qemu/units.h"
+
+#define TYPE_FSL_IMX6UL "fsl-imx6ul"
+OBJECT_DECLARE_SIMPLE_TYPE(FslIMX6ULState, FSL_IMX6UL)
+
+enum FslIMX6ULConfiguration {
+    FSL_IMX6UL_NUM_CPUS         = 1,
+    FSL_IMX6UL_NUM_UARTS        = 8,
+    FSL_IMX6UL_NUM_ETHS         = 2,
+    FSL_IMX6UL_ETH_NUM_TX_RINGS = 2,
+    FSL_IMX6UL_NUM_USDHCS       = 2,
+    FSL_IMX6UL_NUM_WDTS         = 3,
+    FSL_IMX6UL_NUM_GPTS         = 2,
+    FSL_IMX6UL_NUM_EPITS        = 2,
+    FSL_IMX6UL_NUM_IOMUXCS      = 2,
+    FSL_IMX6UL_NUM_GPIOS        = 5,
+    FSL_IMX6UL_NUM_I2CS         = 4,
+    FSL_IMX6UL_NUM_ECSPIS       = 4,
+    FSL_IMX6UL_NUM_ADCS         = 2,
+    FSL_IMX6UL_NUM_USB_PHYS     = 2,
+    FSL_IMX6UL_NUM_USBS         = 2,
+    FSL_IMX6UL_NUM_SAIS         = 3,
+    FSL_IMX6UL_NUM_CANS         = 2,
+    FSL_IMX6UL_NUM_PWMS         = 8,
+};
+
+struct FslIMX6ULState {
+    /*< private >*/
+    DeviceState    parent_obj;
+
+    /*< public >*/
+    ARMCPU             cpu;
+    A15MPPrivState     a7mpcore;
+    IMXGPTState        gpt[FSL_IMX6UL_NUM_GPTS];
+    IMXEPITState       epit[FSL_IMX6UL_NUM_EPITS];
+    IMXGPIOState       gpio[FSL_IMX6UL_NUM_GPIOS];
+    IMX6ULCCMState     ccm;
+    IMX6SRCState       src;
+    IMX7SNVSState      snvs;
+    IMXGPCv2State      gpcv2;
+    IMXSPIState        spi[FSL_IMX6UL_NUM_ECSPIS];
+    IMXI2CState        i2c[FSL_IMX6UL_NUM_I2CS];
+    IMXSerialState     uart[FSL_IMX6UL_NUM_UARTS];
+    IMXFECState        eth[FSL_IMX6UL_NUM_ETHS];
+    SDHCIState         usdhc[FSL_IMX6UL_NUM_USDHCS];
+    IMX2WdtState       wdt[FSL_IMX6UL_NUM_WDTS];
+    IMXUSBPHYState     usbphy[FSL_IMX6UL_NUM_USB_PHYS];
+    ChipideaState      usb[FSL_IMX6UL_NUM_USBS];
+    MemoryRegion       rom;
+    MemoryRegion       caam;
+    MemoryRegion       ocram;
+    MemoryRegion       ocram_alias;
+
+    uint32_t           phy_num[FSL_IMX6UL_NUM_ETHS];
+    bool               phy_connected[FSL_IMX6UL_NUM_ETHS];
+};
+
+enum FslIMX6ULMemoryMap {
+    FSL_IMX6UL_MMDC_ADDR            = 0x80000000,
+    FSL_IMX6UL_MMDC_SIZE            = (2 * GiB),
+
+    FSL_IMX6UL_QSPI1_MEM_ADDR       = 0x60000000,
+    FSL_IMX6UL_QSPI1_MEM_SIZE       = (256 * MiB),
+
+    FSL_IMX6UL_EIM_ALIAS_ADDR       = 0x58000000,
+    FSL_IMX6UL_EIM_ALIAS_SIZE       = (128 * MiB),
+
+    FSL_IMX6UL_EIM_CS_ADDR          = 0x50000000,
+    FSL_IMX6UL_EIM_CS_SIZE          = (128 * MiB),
+
+    FSL_IMX6UL_AES_ENCRYPT_ADDR     = 0x10000000,
+    FSL_IMX6UL_AES_ENCRYPT_SIZE     = (1 * MiB),
+
+    FSL_IMX6UL_QSPI1_RX_ADDR        = 0x0C000000,
+    FSL_IMX6UL_QSPI1_RX_SIZE        = (32 * MiB),
+
+    /* AIPS-2 Begin */
+    FSL_IMX6UL_UART6_ADDR           = 0x021FC000,
+
+    FSL_IMX6UL_I2C4_ADDR            = 0x021F8000,
+
+    FSL_IMX6UL_UART5_ADDR           = 0x021F4000,
+    FSL_IMX6UL_UART4_ADDR           = 0x021F0000,
+    FSL_IMX6UL_UART3_ADDR           = 0x021EC000,
+    FSL_IMX6UL_UART2_ADDR           = 0x021E8000,
+
+    FSL_IMX6UL_WDOG3_ADDR           = 0x021E4000,
+
+    FSL_IMX6UL_QSPI_ADDR            = 0x021E0000,
+    FSL_IMX6UL_QSPI_SIZE            = 0x500,
+
+    FSL_IMX6UL_SYS_CNT_CTRL_ADDR    = 0x021DC000,
+    FSL_IMX6UL_SYS_CNT_CTRL_SIZE    = (16 * KiB),
+
+    FSL_IMX6UL_SYS_CNT_CMP_ADDR     = 0x021D8000,
+    FSL_IMX6UL_SYS_CNT_CMP_SIZE     = (16 * KiB),
+
+    FSL_IMX6UL_SYS_CNT_RD_ADDR      = 0x021D4000,
+    FSL_IMX6UL_SYS_CNT_RD_SIZE      = (16 * KiB),
+
+    FSL_IMX6UL_TZASC_ADDR           = 0x021D0000,
+    FSL_IMX6UL_TZASC_SIZE           = (16 * KiB),
+
+    FSL_IMX6UL_PXP_ADDR             = 0x021CC000,
+    FSL_IMX6UL_PXP_SIZE             = (16 * KiB),
+
+    FSL_IMX6UL_LCDIF_ADDR           = 0x021C8000,
+    FSL_IMX6UL_LCDIF_SIZE           = 0x100,
+
+    FSL_IMX6UL_CSI_ADDR             = 0x021C4000,
+    FSL_IMX6UL_CSI_SIZE             = 0x100,
+
+    FSL_IMX6UL_CSU_ADDR             = 0x021C0000,
+    FSL_IMX6UL_CSU_SIZE             = (16 * KiB),
+
+    FSL_IMX6UL_OCOTP_CTRL_ADDR      = 0x021BC000,
+    FSL_IMX6UL_OCOTP_CTRL_SIZE      = (4 * KiB),
+
+    FSL_IMX6UL_EIM_ADDR             = 0x021B8000,
+    FSL_IMX6UL_EIM_SIZE             = 0x100,
+
+    FSL_IMX6UL_SIM2_ADDR            = 0x021B4000,
+
+    FSL_IMX6UL_MMDC_CFG_ADDR        = 0x021B0000,
+    FSL_IMX6UL_MMDC_CFG_SIZE        = (4 * KiB),
+
+    FSL_IMX6UL_ROMCP_ADDR           = 0x021AC000,
+    FSL_IMX6UL_ROMCP_SIZE           = 0x300,
+
+    FSL_IMX6UL_I2C3_ADDR            = 0x021A8000,
+    FSL_IMX6UL_I2C2_ADDR            = 0x021A4000,
+    FSL_IMX6UL_I2C1_ADDR            = 0x021A0000,
+
+    FSL_IMX6UL_ADC2_ADDR            = 0x0219C000,
+    FSL_IMX6UL_ADC1_ADDR            = 0x02198000,
+    FSL_IMX6UL_ADCn_SIZE            = 0x100,
+
+    FSL_IMX6UL_USDHC2_ADDR          = 0x02194000,
+    FSL_IMX6UL_USDHC1_ADDR          = 0x02190000,
+
+    FSL_IMX6UL_SIM1_ADDR            = 0x0218C000,
+    FSL_IMX6UL_SIMn_SIZE            = (16 * KiB),
+
+    FSL_IMX6UL_ENET1_ADDR           = 0x02188000,
+
+    FSL_IMX6UL_USBO2_USBMISC_ADDR   = 0x02184800,
+    FSL_IMX6UL_USBO2_USB1_ADDR      = 0x02184000,
+    FSL_IMX6UL_USBO2_USB2_ADDR      = 0x02184200,
+
+    FSL_IMX6UL_USBO2_PL301_ADDR     = 0x02180000,
+    FSL_IMX6UL_USBO2_PL301_SIZE     = (16 * KiB),
+
+    FSL_IMX6UL_AIPS2_CFG_ADDR       = 0x0217C000,
+    FSL_IMX6UL_AIPS2_CFG_SIZE       = 0x100,
+
+    FSL_IMX6UL_CAAM_ADDR            = 0x02140000,
+    FSL_IMX6UL_CAAM_SIZE            = (16 * KiB),
+
+    FSL_IMX6UL_A7MPCORE_DAP_ADDR    = 0x02100000,
+    FSL_IMX6UL_A7MPCORE_DAP_SIZE    = (4 * KiB),
+    /* AIPS-2 End */
+
+    /* AIPS-1 Begin */
+    FSL_IMX6UL_PWM8_ADDR            = 0x020FC000,
+    FSL_IMX6UL_PWM7_ADDR            = 0x020F8000,
+    FSL_IMX6UL_PWM6_ADDR            = 0x020F4000,
+    FSL_IMX6UL_PWM5_ADDR            = 0x020F0000,
+
+    FSL_IMX6UL_SDMA_ADDR            = 0x020EC000,
+    FSL_IMX6UL_SDMA_SIZE            = 0x300,
+
+    FSL_IMX6UL_GPT2_ADDR            = 0x020E8000,
+
+    FSL_IMX6UL_IOMUXC_GPR_ADDR      = 0x020E4000,
+    FSL_IMX6UL_IOMUXC_GPR_SIZE      = 0x40,
+
+    FSL_IMX6UL_IOMUXC_ADDR          = 0x020E0000,
+    FSL_IMX6UL_IOMUXC_SIZE          = 0x700,
+
+    FSL_IMX6UL_GPC_ADDR             = 0x020DC000,
+
+    FSL_IMX6UL_SRC_ADDR             = 0x020D8000,
+
+    FSL_IMX6UL_EPIT2_ADDR           = 0x020D4000,
+    FSL_IMX6UL_EPIT1_ADDR           = 0x020D0000,
+
+    FSL_IMX6UL_SNVS_HP_ADDR         = 0x020CC000,
+
+    FSL_IMX6UL_USBPHY2_ADDR         = 0x020CA000,
+    FSL_IMX6UL_USBPHY1_ADDR         = 0x020C9000,
+
+    FSL_IMX6UL_ANALOG_ADDR          = 0x020C8000,
+    FSL_IMX6UL_ANALOG_SIZE          = 0x300,
+
+    FSL_IMX6UL_CCM_ADDR             = 0x020C4000,
+
+    FSL_IMX6UL_WDOG2_ADDR           = 0x020C0000,
+    FSL_IMX6UL_WDOG1_ADDR           = 0x020BC000,
+
+    FSL_IMX6UL_KPP_ADDR             = 0x020B8000,
+    FSL_IMX6UL_KPP_SIZE             = 0x10,
+
+    FSL_IMX6UL_ENET2_ADDR           = 0x020B4000,
+
+    FSL_IMX6UL_SNVS_LP_ADDR         = 0x020B0000,
+    FSL_IMX6UL_SNVS_LP_SIZE         = (16 * KiB),
+
+    FSL_IMX6UL_GPIO5_ADDR           = 0x020AC000,
+    FSL_IMX6UL_GPIO4_ADDR           = 0x020A8000,
+    FSL_IMX6UL_GPIO3_ADDR           = 0x020A4000,
+    FSL_IMX6UL_GPIO2_ADDR           = 0x020A0000,
+    FSL_IMX6UL_GPIO1_ADDR           = 0x0209C000,
+
+    FSL_IMX6UL_GPT1_ADDR            = 0x02098000,
+
+    FSL_IMX6UL_CAN2_ADDR            = 0x02094000,
+    FSL_IMX6UL_CAN1_ADDR            = 0x02090000,
+    FSL_IMX6UL_CANn_SIZE            = (4 * KiB),
+
+    FSL_IMX6UL_PWM4_ADDR            = 0x0208C000,
+    FSL_IMX6UL_PWM3_ADDR            = 0x02088000,
+    FSL_IMX6UL_PWM2_ADDR            = 0x02084000,
+    FSL_IMX6UL_PWM1_ADDR            = 0x02080000,
+    FSL_IMX6UL_PWMn_SIZE            = 0x20,
+
+    FSL_IMX6UL_AIPS1_CFG_ADDR       = 0x0207C000,
+    FSL_IMX6UL_AIPS1_CFG_SIZE       = (16 * KiB),
+
+    FSL_IMX6UL_BEE_ADDR             = 0x02044000,
+    FSL_IMX6UL_BEE_SIZE             = (16 * KiB),
+
+    FSL_IMX6UL_TOUCH_CTRL_ADDR      = 0x02040000,
+    FSL_IMX6UL_TOUCH_CTRL_SIZE      = 0x100,
+
+    FSL_IMX6UL_SPBA_ADDR            = 0x0203C000,
+    FSL_IMX6UL_SPBA_SIZE            = 0x100,
+
+    FSL_IMX6UL_ASRC_ADDR            = 0x02034000,
+    FSL_IMX6UL_ASRC_SIZE            = 0x100,
+
+    FSL_IMX6UL_SAI3_ADDR            = 0x02030000,
+    FSL_IMX6UL_SAI2_ADDR            = 0x0202C000,
+    FSL_IMX6UL_SAI1_ADDR            = 0x02028000,
+    FSL_IMX6UL_SAIn_SIZE            = 0x200,
+
+    FSL_IMX6UL_UART8_ADDR           = 0x02024000,
+    FSL_IMX6UL_UART1_ADDR           = 0x02020000,
+    FSL_IMX6UL_UART7_ADDR           = 0x02018000,
+
+    FSL_IMX6UL_ECSPI4_ADDR          = 0x02014000,
+    FSL_IMX6UL_ECSPI3_ADDR          = 0x02010000,
+    FSL_IMX6UL_ECSPI2_ADDR          = 0x0200C000,
+    FSL_IMX6UL_ECSPI1_ADDR          = 0x02008000,
+
+    FSL_IMX6UL_SPDIF_ADDR           = 0x02004000,
+    FSL_IMX6UL_SPDIF_SIZE           = 0x100,
+    /* AIPS-1 End */
+
+    FSL_IMX6UL_BCH_ADDR             = 0x01808000,
+    FSL_IMX6UL_BCH_SIZE             = 0x200,
+
+    FSL_IMX6UL_GPMI_ADDR            = 0x01806000,
+    FSL_IMX6UL_GPMI_SIZE            = 0x200,
+
+    FSL_IMX6UL_APBH_DMA_ADDR        = 0x01804000,
+    FSL_IMX6UL_APBH_DMA_SIZE        = (4 * KiB),
+
+    FSL_IMX6UL_A7MPCORE_ADDR        = 0x00A00000,
+
+    FSL_IMX6UL_OCRAM_ALIAS_ADDR     = 0x00920000,
+    FSL_IMX6UL_OCRAM_ALIAS_SIZE     = (384 * KiB),
+
+    FSL_IMX6UL_OCRAM_MEM_ADDR       = 0x00900000,
+    FSL_IMX6UL_OCRAM_MEM_SIZE       = (128 * KiB),
+
+    FSL_IMX6UL_CAAM_MEM_ADDR        = 0x00100000,
+    FSL_IMX6UL_CAAM_MEM_SIZE        = (32 * KiB),
+
+    FSL_IMX6UL_ROM_ADDR             = 0x00000000,
+    FSL_IMX6UL_ROM_SIZE             = (96 * KiB),
+};
+
+enum FslIMX6ULIRQs {
+    FSL_IMX6UL_IOMUXC_IRQ   = 0,
+    FSL_IMX6UL_DAP_IRQ      = 1,
+    FSL_IMX6UL_SDMA_IRQ     = 2,
+    FSL_IMX6UL_TSC_IRQ      = 3,
+    FSL_IMX6UL_SNVS_IRQ     = 4,
+    FSL_IMX6UL_LCDIF_IRQ    = 5,
+    FSL_IMX6UL_BEE_IRQ      = 6,
+    FSL_IMX6UL_CSI_IRQ      = 7,
+    FSL_IMX6UL_PXP_IRQ      = 8,
+    FSL_IMX6UL_SCTR1_IRQ    = 9,
+    FSL_IMX6UL_SCTR2_IRQ    = 10,
+    FSL_IMX6UL_WDOG3_IRQ    = 11,
+    FSL_IMX6UL_APBH_DMA_IRQ = 13,
+    FSL_IMX6UL_WEIM_IRQ     = 14,
+    FSL_IMX6UL_RAWNAND1_IRQ = 15,
+    FSL_IMX6UL_RAWNAND2_IRQ = 16,
+    FSL_IMX6UL_UART6_IRQ    = 17,
+    FSL_IMX6UL_SRTC_IRQ     = 19,
+    FSL_IMX6UL_SRTC_SEC_IRQ = 20,
+    FSL_IMX6UL_CSU_IRQ      = 21,
+    FSL_IMX6UL_USDHC1_IRQ   = 22,
+    FSL_IMX6UL_USDHC2_IRQ   = 23,
+    FSL_IMX6UL_SAI3_IRQ     = 24,
+    FSL_IMX6UL_SAI32_IRQ    = 25,
+
+    FSL_IMX6UL_UART1_IRQ    = 26,
+    FSL_IMX6UL_UART2_IRQ    = 27,
+    FSL_IMX6UL_UART3_IRQ    = 28,
+    FSL_IMX6UL_UART4_IRQ    = 29,
+    FSL_IMX6UL_UART5_IRQ    = 30,
+
+    FSL_IMX6UL_ECSPI1_IRQ   = 31,
+    FSL_IMX6UL_ECSPI2_IRQ   = 32,
+    FSL_IMX6UL_ECSPI3_IRQ   = 33,
+    FSL_IMX6UL_ECSPI4_IRQ   = 34,
+
+    FSL_IMX6UL_I2C4_IRQ     = 35,
+    FSL_IMX6UL_I2C1_IRQ     = 36,
+    FSL_IMX6UL_I2C2_IRQ     = 37,
+    FSL_IMX6UL_I2C3_IRQ     = 38,
+
+    FSL_IMX6UL_UART7_IRQ    = 39,
+    FSL_IMX6UL_UART8_IRQ    = 40,
+
+    FSL_IMX6UL_USB1_IRQ     = 43,
+    FSL_IMX6UL_USB2_IRQ     = 42,
+    FSL_IMX6UL_USB_PHY1_IRQ = 44,
+    FSL_IMX6UL_USB_PHY2_IRQ = 45,
+
+    FSL_IMX6UL_CAAM_JQ2_IRQ = 46,
+    FSL_IMX6UL_CAAM_ERR_IRQ = 47,
+    FSL_IMX6UL_CAAM_RTIC_IRQ = 48,
+    FSL_IMX6UL_TEMP_IRQ     = 49,
+    FSL_IMX6UL_ASRC_IRQ     = 50,
+    FSL_IMX6UL_SPDIF_IRQ    = 52,
+    FSL_IMX6UL_PMU_REG_IRQ  = 54,
+    FSL_IMX6UL_GPT1_IRQ     = 55,
+
+    FSL_IMX6UL_EPIT1_IRQ    = 56,
+    FSL_IMX6UL_EPIT2_IRQ    = 57,
+
+    FSL_IMX6UL_GPIO1_INT7_IRQ = 58,
+    FSL_IMX6UL_GPIO1_INT6_IRQ = 59,
+    FSL_IMX6UL_GPIO1_INT5_IRQ = 60,
+    FSL_IMX6UL_GPIO1_INT4_IRQ = 61,
+    FSL_IMX6UL_GPIO1_INT3_IRQ = 62,
+    FSL_IMX6UL_GPIO1_INT2_IRQ = 63,
+    FSL_IMX6UL_GPIO1_INT1_IRQ = 64,
+    FSL_IMX6UL_GPIO1_INT0_IRQ = 65,
+    FSL_IMX6UL_GPIO1_LOW_IRQ  = 66,
+    FSL_IMX6UL_GPIO1_HIGH_IRQ = 67,
+    FSL_IMX6UL_GPIO2_LOW_IRQ  = 68,
+    FSL_IMX6UL_GPIO2_HIGH_IRQ = 69,
+    FSL_IMX6UL_GPIO3_LOW_IRQ  = 70,
+    FSL_IMX6UL_GPIO3_HIGH_IRQ = 71,
+    FSL_IMX6UL_GPIO4_LOW_IRQ  = 72,
+    FSL_IMX6UL_GPIO4_HIGH_IRQ = 73,
+    FSL_IMX6UL_GPIO5_LOW_IRQ  = 74,
+    FSL_IMX6UL_GPIO5_HIGH_IRQ = 75,
+
+    FSL_IMX6UL_WDOG1_IRQ    = 80,
+    FSL_IMX6UL_WDOG2_IRQ    = 81,
+
+    FSL_IMX6UL_KPP_IRQ      = 82,
+
+    FSL_IMX6UL_PWM1_IRQ     = 83,
+    FSL_IMX6UL_PWM2_IRQ     = 84,
+    FSL_IMX6UL_PWM3_IRQ     = 85,
+    FSL_IMX6UL_PWM4_IRQ     = 86,
+
+    FSL_IMX6UL_CCM1_IRQ     = 87,
+    FSL_IMX6UL_CCM2_IRQ     = 88,
+
+    FSL_IMX6UL_GPC_IRQ      = 89,
+
+    FSL_IMX6UL_SRC_IRQ      = 91,
+
+    FSL_IMX6UL_CPU_PERF_IRQ = 94,
+    FSL_IMX6UL_CPU_CTI_IRQ  = 95,
+
+    FSL_IMX6UL_SRC_WDOG_IRQ = 96,
+
+    FSL_IMX6UL_SAI1_IRQ     = 97,
+    FSL_IMX6UL_SAI2_IRQ     = 98,
+
+    FSL_IMX6UL_ADC1_IRQ     = 100,
+    FSL_IMX6UL_ADC2_IRQ     = 101,
+
+    FSL_IMX6UL_SJC_IRQ      = 104,
+
+    FSL_IMX6UL_CAAM_RING0_IRQ = 105,
+    FSL_IMX6UL_CAAM_RING1_IRQ = 106,
+
+    FSL_IMX6UL_QSPI_IRQ     = 107,
+
+    FSL_IMX6UL_TZASC_IRQ    = 108,
+
+    FSL_IMX6UL_GPT2_IRQ     = 109,
+
+    FSL_IMX6UL_CAN1_IRQ     = 110,
+    FSL_IMX6UL_CAN2_IRQ     = 111,
+
+    FSL_IMX6UL_SIM1_IRQ     = 112,
+    FSL_IMX6UL_SIM2_IRQ     = 113,
+
+    FSL_IMX6UL_PWM5_IRQ     = 114,
+    FSL_IMX6UL_PWM6_IRQ     = 115,
+    FSL_IMX6UL_PWM7_IRQ     = 116,
+    FSL_IMX6UL_PWM8_IRQ     = 117,
+
+    FSL_IMX6UL_ENET1_IRQ    = 118,
+    FSL_IMX6UL_ENET1_TIMER_IRQ = 119,
+    FSL_IMX6UL_ENET2_IRQ    = 120,
+    FSL_IMX6UL_ENET2_TIMER_IRQ = 121,
+
+    FSL_IMX6UL_PMU_CORE_IRQ = 127,
+    FSL_IMX6UL_MAX_IRQ      = 128,
+};
+
+#endif /* FSL_IMX6UL_H */
diff --git a/include/hw/arm/fsl-imx7.h b/include/hw/arm/fsl-imx7.h
new file mode 100644 (file)
index 0000000..411fa1c
--- /dev/null
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2018, Impinj, Inc.
+ *
+ * i.MX7 SoC definitions
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef FSL_IMX7_H
+#define FSL_IMX7_H
+
+#include "hw/cpu/a15mpcore.h"
+#include "hw/intc/imx_gpcv2.h"
+#include "hw/misc/imx7_ccm.h"
+#include "hw/misc/imx7_snvs.h"
+#include "hw/misc/imx7_gpr.h"
+#include "hw/misc/imx7_src.h"
+#include "hw/watchdog/wdt_imx2.h"
+#include "hw/gpio/imx_gpio.h"
+#include "hw/char/imx_serial.h"
+#include "hw/timer/imx_gpt.h"
+#include "hw/timer/imx_epit.h"
+#include "hw/i2c/imx_i2c.h"
+#include "hw/sd/sdhci.h"
+#include "hw/ssi/imx_spi.h"
+#include "hw/net/imx_fec.h"
+#include "hw/pci-host/designware.h"
+#include "hw/usb/chipidea.h"
+#include "cpu.h"
+#include "qom/object.h"
+#include "qemu/units.h"
+
+#define TYPE_FSL_IMX7 "fsl-imx7"
+OBJECT_DECLARE_SIMPLE_TYPE(FslIMX7State, FSL_IMX7)
+
+enum FslIMX7Configuration {
+    FSL_IMX7_NUM_CPUS         = 2,
+    FSL_IMX7_NUM_UARTS        = 7,
+    FSL_IMX7_NUM_ETHS         = 2,
+    FSL_IMX7_ETH_NUM_TX_RINGS = 3,
+    FSL_IMX7_NUM_USDHCS       = 3,
+    FSL_IMX7_NUM_WDTS         = 4,
+    FSL_IMX7_NUM_GPTS         = 4,
+    FSL_IMX7_NUM_IOMUXCS      = 2,
+    FSL_IMX7_NUM_GPIOS        = 7,
+    FSL_IMX7_NUM_I2CS         = 4,
+    FSL_IMX7_NUM_ECSPIS       = 4,
+    FSL_IMX7_NUM_USBS         = 3,
+    FSL_IMX7_NUM_ADCS         = 2,
+    FSL_IMX7_NUM_SAIS         = 3,
+    FSL_IMX7_NUM_CANS         = 2,
+    FSL_IMX7_NUM_PWMS         = 4,
+};
+
+struct FslIMX7State {
+    /*< private >*/
+    DeviceState    parent_obj;
+
+    /*< public >*/
+    ARMCPU             cpu[FSL_IMX7_NUM_CPUS];
+    A15MPPrivState     a7mpcore;
+    IMXGPTState        gpt[FSL_IMX7_NUM_GPTS];
+    IMXGPIOState       gpio[FSL_IMX7_NUM_GPIOS];
+    IMX7CCMState       ccm;
+    IMX7AnalogState    analog;
+    IMX7SNVSState      snvs;
+    IMX7SRCState       src;
+    IMXGPCv2State      gpcv2;
+    IMXSPIState        spi[FSL_IMX7_NUM_ECSPIS];
+    IMXI2CState        i2c[FSL_IMX7_NUM_I2CS];
+    IMXSerialState     uart[FSL_IMX7_NUM_UARTS];
+    IMXFECState        eth[FSL_IMX7_NUM_ETHS];
+    SDHCIState         usdhc[FSL_IMX7_NUM_USDHCS];
+    IMX2WdtState       wdt[FSL_IMX7_NUM_WDTS];
+    IMX7GPRState       gpr;
+    ChipideaState      usb[FSL_IMX7_NUM_USBS];
+    DesignwarePCIEHost pcie;
+    MemoryRegion       rom;
+    MemoryRegion       caam;
+    MemoryRegion       ocram;
+    MemoryRegion       ocram_epdc;
+    MemoryRegion       ocram_pxp;
+    MemoryRegion       ocram_s;
+
+    uint32_t           phy_num[FSL_IMX7_NUM_ETHS];
+    bool               phy_connected[FSL_IMX7_NUM_ETHS];
+};
+
+enum FslIMX7MemoryMap {
+    FSL_IMX7_MMDC_ADDR            = 0x80000000,
+    FSL_IMX7_MMDC_SIZE            = (2 * GiB),
+
+    FSL_IMX7_QSPI1_MEM_ADDR       = 0x60000000,
+    FSL_IMX7_QSPI1_MEM_SIZE       = (256 * MiB),
+
+    FSL_IMX7_PCIE1_MEM_ADDR       = 0x40000000,
+    FSL_IMX7_PCIE1_MEM_SIZE       = (256 * MiB),
+
+    FSL_IMX7_QSPI1_RX_BUF_ADDR    = 0x34000000,
+    FSL_IMX7_QSPI1_RX_BUF_SIZE    = (32 * MiB),
+
+    /* PCIe Peripherals */
+    FSL_IMX7_PCIE_REG_ADDR        = 0x33800000,
+
+    /* MMAP Peripherals */
+    FSL_IMX7_DMA_APBH_ADDR        = 0x33000000,
+    FSL_IMX7_DMA_APBH_SIZE        = 0x8000,
+
+    /* GPV configuration */
+    FSL_IMX7_GPV6_ADDR            = 0x32600000,
+    FSL_IMX7_GPV5_ADDR            = 0x32500000,
+    FSL_IMX7_GPV4_ADDR            = 0x32400000,
+    FSL_IMX7_GPV3_ADDR            = 0x32300000,
+    FSL_IMX7_GPV2_ADDR            = 0x32200000,
+    FSL_IMX7_GPV1_ADDR            = 0x32100000,
+    FSL_IMX7_GPV0_ADDR            = 0x32000000,
+    FSL_IMX7_GPVn_SIZE            = (1 * MiB),
+
+    /* Arm Peripherals */
+    FSL_IMX7_A7MPCORE_ADDR        = 0x31000000,
+
+    /* AIPS-3 Begin */
+
+    FSL_IMX7_ENET2_ADDR           = 0x30BF0000,
+    FSL_IMX7_ENET1_ADDR           = 0x30BE0000,
+
+    FSL_IMX7_SDMA_ADDR            = 0x30BD0000,
+    FSL_IMX7_SDMA_SIZE            = (4 * KiB),
+
+    FSL_IMX7_EIM_ADDR             = 0x30BC0000,
+    FSL_IMX7_EIM_SIZE             = (4 * KiB),
+
+    FSL_IMX7_QSPI_ADDR            = 0x30BB0000,
+    FSL_IMX7_QSPI_SIZE            = 0x8000,
+
+    FSL_IMX7_SIM2_ADDR            = 0x30BA0000,
+    FSL_IMX7_SIM1_ADDR            = 0x30B90000,
+    FSL_IMX7_SIMn_SIZE            = (4 * KiB),
+
+    FSL_IMX7_USDHC3_ADDR          = 0x30B60000,
+    FSL_IMX7_USDHC2_ADDR          = 0x30B50000,
+    FSL_IMX7_USDHC1_ADDR          = 0x30B40000,
+
+    FSL_IMX7_USB3_ADDR            = 0x30B30000,
+    FSL_IMX7_USBMISC3_ADDR        = 0x30B30200,
+    FSL_IMX7_USB2_ADDR            = 0x30B20000,
+    FSL_IMX7_USBMISC2_ADDR        = 0x30B20200,
+    FSL_IMX7_USB1_ADDR            = 0x30B10000,
+    FSL_IMX7_USBMISC1_ADDR        = 0x30B10200,
+    FSL_IMX7_USBMISCn_SIZE        = 0x200,
+
+    FSL_IMX7_USB_PL301_ADDR       = 0x30AD0000,
+    FSL_IMX7_USB_PL301_SIZE       = (64 * KiB),
+
+    FSL_IMX7_SEMAPHORE_HS_ADDR    = 0x30AC0000,
+    FSL_IMX7_SEMAPHORE_HS_SIZE    = (64 * KiB),
+
+    FSL_IMX7_MUB_ADDR             = 0x30AB0000,
+    FSL_IMX7_MUA_ADDR             = 0x30AA0000,
+    FSL_IMX7_MUn_SIZE             = (KiB),
+
+    FSL_IMX7_UART7_ADDR           = 0x30A90000,
+    FSL_IMX7_UART6_ADDR           = 0x30A80000,
+    FSL_IMX7_UART5_ADDR           = 0x30A70000,
+    FSL_IMX7_UART4_ADDR           = 0x30A60000,
+
+    FSL_IMX7_I2C4_ADDR            = 0x30A50000,
+    FSL_IMX7_I2C3_ADDR            = 0x30A40000,
+    FSL_IMX7_I2C2_ADDR            = 0x30A30000,
+    FSL_IMX7_I2C1_ADDR            = 0x30A20000,
+
+    FSL_IMX7_CAN2_ADDR            = 0x30A10000,
+    FSL_IMX7_CAN1_ADDR            = 0x30A00000,
+    FSL_IMX7_CANn_SIZE            = (4 * KiB),
+
+    FSL_IMX7_AIPS3_CONF_ADDR      = 0x309F0000,
+    FSL_IMX7_AIPS3_CONF_SIZE      = (64 * KiB),
+
+    FSL_IMX7_CAAM_ADDR            = 0x30900000,
+    FSL_IMX7_CAAM_SIZE            = (256 * KiB),
+
+    FSL_IMX7_SPBA_ADDR            = 0x308F0000,
+    FSL_IMX7_SPBA_SIZE            = (4 * KiB),
+
+    FSL_IMX7_SAI3_ADDR            = 0x308C0000,
+    FSL_IMX7_SAI2_ADDR            = 0x308B0000,
+    FSL_IMX7_SAI1_ADDR            = 0x308A0000,
+    FSL_IMX7_SAIn_SIZE            = (4 * KiB),
+
+    FSL_IMX7_UART3_ADDR           = 0x30880000,
+    /*
+     * Some versions of the reference manual claim that UART2 is @
+     * 0x30870000, but experiments with HW + DT files in upstream
+     * Linux kernel show that not to be true and that block is
+     * actually located @ 0x30890000
+     */
+    FSL_IMX7_UART2_ADDR           = 0x30890000,
+    FSL_IMX7_UART1_ADDR           = 0x30860000,
+
+    FSL_IMX7_ECSPI3_ADDR          = 0x30840000,
+    FSL_IMX7_ECSPI2_ADDR          = 0x30830000,
+    FSL_IMX7_ECSPI1_ADDR          = 0x30820000,
+    FSL_IMX7_ECSPIn_SIZE          = (4 * KiB),
+
+    /* AIPS-3 End */
+
+    /* AIPS-2 Begin */
+
+    FSL_IMX7_AXI_DEBUG_MON_ADDR   = 0x307E0000,
+    FSL_IMX7_AXI_DEBUG_MON_SIZE   = (64 * KiB),
+
+    FSL_IMX7_PERFMON2_ADDR        = 0x307D0000,
+    FSL_IMX7_PERFMON1_ADDR        = 0x307C0000,
+    FSL_IMX7_PERFMONn_SIZE        = (64 * KiB),
+
+    FSL_IMX7_DDRC_ADDR            = 0x307A0000,
+    FSL_IMX7_DDRC_SIZE            = (4 * KiB),
+
+    FSL_IMX7_DDRC_PHY_ADDR        = 0x30790000,
+    FSL_IMX7_DDRC_PHY_SIZE        = (4 * KiB),
+
+    FSL_IMX7_TZASC_ADDR           = 0x30780000,
+    FSL_IMX7_TZASC_SIZE           = (64 * KiB),
+
+    FSL_IMX7_MIPI_DSI_ADDR        = 0x30760000,
+    FSL_IMX7_MIPI_DSI_SIZE        = (4 * KiB),
+
+    FSL_IMX7_MIPI_CSI_ADDR        = 0x30750000,
+    FSL_IMX7_MIPI_CSI_SIZE        = 0x4000,
+
+    FSL_IMX7_LCDIF_ADDR           = 0x30730000,
+    FSL_IMX7_LCDIF_SIZE           = 0x8000,
+
+    FSL_IMX7_CSI_ADDR             = 0x30710000,
+    FSL_IMX7_CSI_SIZE             = (4 * KiB),
+
+    FSL_IMX7_PXP_ADDR             = 0x30700000,
+    FSL_IMX7_PXP_SIZE             = 0x4000,
+
+    FSL_IMX7_EPDC_ADDR            = 0x306F0000,
+    FSL_IMX7_EPDC_SIZE            = (4 * KiB),
+
+    FSL_IMX7_PCIE_PHY_ADDR        = 0x306D0000,
+    FSL_IMX7_PCIE_PHY_SIZE        = (4 * KiB),
+
+    FSL_IMX7_SYSCNT_CTRL_ADDR     = 0x306C0000,
+    FSL_IMX7_SYSCNT_CMP_ADDR      = 0x306B0000,
+    FSL_IMX7_SYSCNT_RD_ADDR       = 0x306A0000,
+
+    FSL_IMX7_PWM4_ADDR            = 0x30690000,
+    FSL_IMX7_PWM3_ADDR            = 0x30680000,
+    FSL_IMX7_PWM2_ADDR            = 0x30670000,
+    FSL_IMX7_PWM1_ADDR            = 0x30660000,
+    FSL_IMX7_PWMn_SIZE            = (4 * KiB),
+
+    FSL_IMX7_FlEXTIMER2_ADDR      = 0x30650000,
+    FSL_IMX7_FlEXTIMER1_ADDR      = 0x30640000,
+    FSL_IMX7_FLEXTIMERn_SIZE      = (4 * KiB),
+
+    FSL_IMX7_ECSPI4_ADDR          = 0x30630000,
+
+    FSL_IMX7_ADC2_ADDR            = 0x30620000,
+    FSL_IMX7_ADC1_ADDR            = 0x30610000,
+    FSL_IMX7_ADCn_SIZE            = (4 * KiB),
+
+    FSL_IMX7_AIPS2_CONF_ADDR      = 0x305F0000,
+    FSL_IMX7_AIPS2_CONF_SIZE      = (64 * KiB),
+
+    /* AIPS-2 End */
+
+    /* AIPS-1 Begin */
+
+    FSL_IMX7_CSU_ADDR             = 0x303E0000,
+    FSL_IMX7_CSU_SIZE             = (64 * KiB),
+
+    FSL_IMX7_RDC_ADDR             = 0x303D0000,
+    FSL_IMX7_RDC_SIZE             = (4 * KiB),
+
+    FSL_IMX7_SEMAPHORE2_ADDR      = 0x303C0000,
+    FSL_IMX7_SEMAPHORE1_ADDR      = 0x303B0000,
+    FSL_IMX7_SEMAPHOREn_SIZE      = (4 * KiB),
+
+    FSL_IMX7_GPC_ADDR             = 0x303A0000,
+
+    FSL_IMX7_SRC_ADDR             = 0x30390000,
+
+    FSL_IMX7_CCM_ADDR             = 0x30380000,
+
+    FSL_IMX7_SNVS_HP_ADDR         = 0x30370000,
+
+    FSL_IMX7_ANALOG_ADDR          = 0x30360000,
+
+    FSL_IMX7_OCOTP_ADDR           = 0x30350000,
+    FSL_IMX7_OCOTP_SIZE           = 0x10000,
+
+    FSL_IMX7_IOMUXC_GPR_ADDR      = 0x30340000,
+    FSL_IMX7_IOMUXC_GPR_SIZE      = (4 * KiB),
+
+    FSL_IMX7_IOMUXC_ADDR          = 0x30330000,
+    FSL_IMX7_IOMUXC_SIZE          = (4 * KiB),
+
+    FSL_IMX7_KPP_ADDR             = 0x30320000,
+    FSL_IMX7_KPP_SIZE             = (4 * KiB),
+
+    FSL_IMX7_ROMCP_ADDR           = 0x30310000,
+    FSL_IMX7_ROMCP_SIZE           = (4 * KiB),
+
+    FSL_IMX7_GPT4_ADDR            = 0x30300000,
+    FSL_IMX7_GPT3_ADDR            = 0x302F0000,
+    FSL_IMX7_GPT2_ADDR            = 0x302E0000,
+    FSL_IMX7_GPT1_ADDR            = 0x302D0000,
+
+    FSL_IMX7_IOMUXC_LPSR_ADDR     = 0x302C0000,
+    FSL_IMX7_IOMUXC_LPSR_SIZE     = (4 * KiB),
+
+    FSL_IMX7_WDOG4_ADDR           = 0x302B0000,
+    FSL_IMX7_WDOG3_ADDR           = 0x302A0000,
+    FSL_IMX7_WDOG2_ADDR           = 0x30290000,
+    FSL_IMX7_WDOG1_ADDR           = 0x30280000,
+
+    FSL_IMX7_IOMUXC_LPSR_GPR_ADDR = 0x30270000,
+
+    FSL_IMX7_GPIO7_ADDR           = 0x30260000,
+    FSL_IMX7_GPIO6_ADDR           = 0x30250000,
+    FSL_IMX7_GPIO5_ADDR           = 0x30240000,
+    FSL_IMX7_GPIO4_ADDR           = 0x30230000,
+    FSL_IMX7_GPIO3_ADDR           = 0x30220000,
+    FSL_IMX7_GPIO2_ADDR           = 0x30210000,
+    FSL_IMX7_GPIO1_ADDR           = 0x30200000,
+
+    FSL_IMX7_AIPS1_CONF_ADDR      = 0x301F0000,
+    FSL_IMX7_AIPS1_CONF_SIZE      = (64 * KiB),
+
+    FSL_IMX7_A7MPCORE_DAP_ADDR    = 0x30000000,
+    FSL_IMX7_A7MPCORE_DAP_SIZE    = (1 * MiB),
+
+    /* AIPS-1 End */
+
+    FSL_IMX7_EIM_CS0_ADDR         = 0x28000000,
+    FSL_IMX7_EIM_CS0_SIZE         = (128 * MiB),
+
+    FSL_IMX7_OCRAM_PXP_ADDR       = 0x00940000,
+    FSL_IMX7_OCRAM_PXP_SIZE       = (32 * KiB),
+
+    FSL_IMX7_OCRAM_EPDC_ADDR      = 0x00920000,
+    FSL_IMX7_OCRAM_EPDC_SIZE      = (128 * KiB),
+
+    FSL_IMX7_OCRAM_MEM_ADDR       = 0x00900000,
+    FSL_IMX7_OCRAM_MEM_SIZE       = (128 * KiB),
+
+    FSL_IMX7_TCMU_ADDR            = 0x00800000,
+    FSL_IMX7_TCMU_SIZE            = (32 * KiB),
+
+    FSL_IMX7_TCML_ADDR            = 0x007F8000,
+    FSL_IMX7_TCML_SIZE            = (32 * KiB),
+
+    FSL_IMX7_OCRAM_S_ADDR         = 0x00180000,
+    FSL_IMX7_OCRAM_S_SIZE         = (32 * KiB),
+
+    FSL_IMX7_CAAM_MEM_ADDR        = 0x00100000,
+    FSL_IMX7_CAAM_MEM_SIZE        = (32 * KiB),
+
+    FSL_IMX7_ROM_ADDR             = 0x00000000,
+    FSL_IMX7_ROM_SIZE             = (96 * KiB),
+};
+
+enum FslIMX7IRQs {
+    FSL_IMX7_USDHC1_IRQ   = 22,
+    FSL_IMX7_USDHC2_IRQ   = 23,
+    FSL_IMX7_USDHC3_IRQ   = 24,
+
+    FSL_IMX7_UART1_IRQ    = 26,
+    FSL_IMX7_UART2_IRQ    = 27,
+    FSL_IMX7_UART3_IRQ    = 28,
+    FSL_IMX7_UART4_IRQ    = 29,
+    FSL_IMX7_UART5_IRQ    = 30,
+    FSL_IMX7_UART6_IRQ    = 16,
+
+    FSL_IMX7_ECSPI1_IRQ   = 31,
+    FSL_IMX7_ECSPI2_IRQ   = 32,
+    FSL_IMX7_ECSPI3_IRQ   = 33,
+    FSL_IMX7_ECSPI4_IRQ   = 34,
+
+    FSL_IMX7_I2C1_IRQ     = 35,
+    FSL_IMX7_I2C2_IRQ     = 36,
+    FSL_IMX7_I2C3_IRQ     = 37,
+    FSL_IMX7_I2C4_IRQ     = 38,
+
+    FSL_IMX7_USB1_IRQ     = 43,
+    FSL_IMX7_USB2_IRQ     = 42,
+    FSL_IMX7_USB3_IRQ     = 40,
+
+    FSL_IMX7_GPT1_IRQ     = 55,
+    FSL_IMX7_GPT2_IRQ     = 54,
+    FSL_IMX7_GPT3_IRQ     = 53,
+    FSL_IMX7_GPT4_IRQ     = 52,
+
+    FSL_IMX7_GPIO1_LOW_IRQ  = 64,
+    FSL_IMX7_GPIO1_HIGH_IRQ = 65,
+    FSL_IMX7_GPIO2_LOW_IRQ  = 66,
+    FSL_IMX7_GPIO2_HIGH_IRQ = 67,
+    FSL_IMX7_GPIO3_LOW_IRQ  = 68,
+    FSL_IMX7_GPIO3_HIGH_IRQ = 69,
+    FSL_IMX7_GPIO4_LOW_IRQ  = 70,
+    FSL_IMX7_GPIO4_HIGH_IRQ = 71,
+    FSL_IMX7_GPIO5_LOW_IRQ  = 72,
+    FSL_IMX7_GPIO5_HIGH_IRQ = 73,
+    FSL_IMX7_GPIO6_LOW_IRQ  = 74,
+    FSL_IMX7_GPIO6_HIGH_IRQ = 75,
+    FSL_IMX7_GPIO7_LOW_IRQ  = 76,
+    FSL_IMX7_GPIO7_HIGH_IRQ = 77,
+
+    FSL_IMX7_WDOG1_IRQ    = 78,
+    FSL_IMX7_WDOG2_IRQ    = 79,
+    FSL_IMX7_WDOG3_IRQ    = 10,
+    FSL_IMX7_WDOG4_IRQ    = 109,
+
+    FSL_IMX7_PCI_INTA_IRQ = 125,
+    FSL_IMX7_PCI_INTB_IRQ = 124,
+    FSL_IMX7_PCI_INTC_IRQ = 123,
+    FSL_IMX7_PCI_INTD_IRQ = 122,
+
+    FSL_IMX7_UART7_IRQ    = 126,
+
+#define FSL_IMX7_ENET_IRQ(i, n)  ((n) + ((i) ? 100 : 118))
+
+    FSL_IMX7_MAX_IRQ      = 128,
+};
+
+#endif /* FSL_IMX7_H */
diff --git a/include/hw/arm/linux-boot-if.h b/include/hw/arm/linux-boot-if.h
new file mode 100644 (file)
index 0000000..c85f33b
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * hw/arm/linux-boot-if.h : interface for devices which need to behave
+ * specially for direct boot of an ARM Linux kernel
+ */
+
+#ifndef HW_ARM_LINUX_BOOT_IF_H
+#define HW_ARM_LINUX_BOOT_IF_H
+
+#include "qom/object.h"
+
+#define TYPE_ARM_LINUX_BOOT_IF "arm-linux-boot-if"
+typedef struct ARMLinuxBootIfClass ARMLinuxBootIfClass;
+DECLARE_CLASS_CHECKERS(ARMLinuxBootIfClass, ARM_LINUX_BOOT_IF,
+                       TYPE_ARM_LINUX_BOOT_IF)
+#define ARM_LINUX_BOOT_IF(obj) \
+    INTERFACE_CHECK(ARMLinuxBootIf, (obj), TYPE_ARM_LINUX_BOOT_IF)
+
+typedef struct ARMLinuxBootIf ARMLinuxBootIf;
+
+struct ARMLinuxBootIfClass {
+    /*< private >*/
+    InterfaceClass parent_class;
+
+    /*< public >*/
+    /** arm_linux_init: configure the device for a direct boot
+     * of an ARM Linux kernel (so that device reset puts it into
+     * the state the kernel expects after firmware initialization,
+     * rather than the true hardware reset state). This callback is
+     * called once after machine construction is complete (before the
+     * first system reset).
+     *
+     * @obj: the object implementing this interface
+     * @secure_boot: true if we are booting Secure, false for NonSecure
+     * (or for a CPU which doesn't support TrustZone)
+     */
+    void (*arm_linux_init)(ARMLinuxBootIf *obj, bool secure_boot);
+};
+
+#endif
diff --git a/include/hw/arm/msf2-soc.h b/include/hw/arm/msf2-soc.h
new file mode 100644 (file)
index 0000000..ce417a6
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Microsemi Smartfusion2 SoC
+ *
+ * Copyright (c) 2017 Subbaraya Sundeep <sundeep.lkml@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_ARM_MSF2_SOC_H
+#define HW_ARM_MSF2_SOC_H
+
+#include "hw/arm/armv7m.h"
+#include "hw/timer/mss-timer.h"
+#include "hw/misc/msf2-sysreg.h"
+#include "hw/ssi/mss-spi.h"
+#include "hw/net/msf2-emac.h"
+#include "hw/clock.h"
+#include "qom/object.h"
+
+#define TYPE_MSF2_SOC     "msf2-soc"
+OBJECT_DECLARE_SIMPLE_TYPE(MSF2State, MSF2_SOC)
+
+#define MSF2_NUM_SPIS         2
+#define MSF2_NUM_UARTS        2
+
+/*
+ * System timer consists of two programmable 32-bit
+ * decrementing counters that generate individual interrupts to
+ * the Cortex-M3 processor
+ */
+#define MSF2_NUM_TIMERS       2
+
+struct MSF2State {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    ARMv7MState armv7m;
+
+    char *cpu_type;
+    char *part_name;
+    uint64_t envm_size;
+    uint64_t esram_size;
+
+    Clock *m3clk;
+    Clock *refclk;
+    uint8_t apb0div;
+    uint8_t apb1div;
+
+    MSF2SysregState sysreg;
+    MSSTimerState timer;
+    MSSSpiState spi[MSF2_NUM_SPIS];
+    MSF2EmacState emac;
+
+    MemoryRegion nvm;
+    MemoryRegion nvm_alias;
+    MemoryRegion sram;
+};
+
+#endif
diff --git a/include/hw/arm/npcm7xx.h b/include/hw/arm/npcm7xx.h
new file mode 100644 (file)
index 0000000..72c7722
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * Nuvoton NPCM7xx SoC family.
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM7XX_H
+#define NPCM7XX_H
+
+#include "hw/boards.h"
+#include "hw/adc/npcm7xx_adc.h"
+#include "hw/core/split-irq.h"
+#include "hw/cpu/a9mpcore.h"
+#include "hw/gpio/npcm7xx_gpio.h"
+#include "hw/i2c/npcm7xx_smbus.h"
+#include "hw/mem/npcm7xx_mc.h"
+#include "hw/misc/npcm7xx_clk.h"
+#include "hw/misc/npcm7xx_gcr.h"
+#include "hw/misc/npcm7xx_mft.h"
+#include "hw/misc/npcm7xx_pwm.h"
+#include "hw/misc/npcm7xx_rng.h"
+#include "hw/net/npcm7xx_emc.h"
+#include "hw/nvram/npcm7xx_otp.h"
+#include "hw/timer/npcm7xx_timer.h"
+#include "hw/ssi/npcm7xx_fiu.h"
+#include "hw/ssi/npcm_pspi.h"
+#include "hw/usb/hcd-ehci.h"
+#include "hw/usb/hcd-ohci.h"
+#include "target/arm/cpu.h"
+#include "hw/sd/npcm7xx_sdhci.h"
+
+#define NPCM7XX_MAX_NUM_CPUS    (2)
+
+/* The first half of the address space is reserved for DDR4 DRAM. */
+#define NPCM7XX_DRAM_BA         (0x00000000)
+#define NPCM7XX_DRAM_SZ         (2 * GiB)
+
+/* Magic addresses for setting up direct kernel booting and SMP boot stubs. */
+#define NPCM7XX_LOADER_START            (0x00000000)  /* Start of SDRAM */
+#define NPCM7XX_SMP_LOADER_START        (0xffff0000)  /* Boot ROM */
+#define NPCM7XX_SMP_BOOTREG_ADDR        (0xf080013c)  /* GCR.SCRPAD */
+#define NPCM7XX_GIC_CPU_IF_ADDR         (0xf03fe100)  /* GIC within A9 */
+#define NPCM7XX_BOARD_SETUP_ADDR        (0xffff1000)  /* Boot ROM */
+
+#define NPCM7XX_NR_PWM_MODULES 2
+
+struct NPCM7xxMachine {
+    MachineState        parent;
+    /*
+     * PWM fan splitter. each splitter connects to one PWM output and
+     * multiple MFT inputs.
+     */
+    SplitIRQ            fan_splitter[NPCM7XX_NR_PWM_MODULES *
+                                     NPCM7XX_PWM_PER_MODULE];
+};
+
+#define TYPE_NPCM7XX_MACHINE MACHINE_TYPE_NAME("npcm7xx")
+OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxMachine, NPCM7XX_MACHINE)
+
+typedef struct NPCM7xxMachineClass {
+    MachineClass        parent;
+
+    const char          *soc_type;
+} NPCM7xxMachineClass;
+
+#define NPCM7XX_MACHINE_CLASS(klass)                                    \
+    OBJECT_CLASS_CHECK(NPCM7xxMachineClass, (klass), TYPE_NPCM7XX_MACHINE)
+#define NPCM7XX_MACHINE_GET_CLASS(obj)                                  \
+    OBJECT_GET_CLASS(NPCM7xxMachineClass, (obj), TYPE_NPCM7XX_MACHINE)
+
+struct NPCM7xxState {
+    DeviceState         parent;
+
+    ARMCPU              cpu[NPCM7XX_MAX_NUM_CPUS];
+    A9MPPrivState       a9mpcore;
+
+    MemoryRegion        sram;
+    MemoryRegion        irom;
+    MemoryRegion        ram3;
+    MemoryRegion        *dram;
+
+    NPCM7xxGCRState     gcr;
+    NPCM7xxCLKState     clk;
+    NPCM7xxTimerCtrlState tim[3];
+    NPCM7xxADCState     adc;
+    NPCM7xxPWMState     pwm[NPCM7XX_NR_PWM_MODULES];
+    NPCM7xxMFTState     mft[8];
+    NPCM7xxOTPState     key_storage;
+    NPCM7xxOTPState     fuse_array;
+    NPCM7xxMCState      mc;
+    NPCM7xxRNGState     rng;
+    NPCM7xxGPIOState    gpio[8];
+    NPCM7xxSMBusState   smbus[16];
+    EHCISysBusState     ehci;
+    OHCISysBusState     ohci;
+    NPCM7xxFIUState     fiu[2];
+    NPCM7xxEMCState     emc[2];
+    NPCM7xxSDHCIState   mmc;
+    NPCMPSPIState       pspi[2];
+};
+
+#define TYPE_NPCM7XX    "npcm7xx"
+OBJECT_DECLARE_TYPE(NPCM7xxState, NPCM7xxClass, NPCM7XX)
+
+#define TYPE_NPCM730    "npcm730"
+#define TYPE_NPCM750    "npcm750"
+
+typedef struct NPCM7xxClass {
+    DeviceClass         parent;
+
+    /* Bitmask of modules that are permanently disabled on this chip. */
+    uint32_t            disabled_modules;
+    /* Number of CPU cores enabled in this SoC class (may be 1 or 2). */
+    uint32_t            num_cpus;
+} NPCM7xxClass;
+
+/**
+ * npcm7xx_load_kernel - Loads memory with everything needed to boot
+ * @machine - The machine containing the SoC to be booted.
+ * @soc - The SoC containing the CPU to be booted.
+ *
+ * This will set up the ARM boot info structure for the specific NPCM7xx
+ * derivative and call arm_load_kernel() to set up loading of the kernel, etc.
+ * into memory, if requested by the user.
+ */
+void npcm7xx_load_kernel(MachineState *machine, NPCM7xxState *soc);
+
+#endif /* NPCM7XX_H */
diff --git a/include/hw/arm/nrf51.h b/include/hw/arm/nrf51.h
new file mode 100644 (file)
index 0000000..de836be
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Nordic Semiconductor nRF51 Series SOC Common Defines
+ *
+ * This file hosts generic defines used in various nRF51 peripheral devices.
+ *
+ * Reference Manual: http://infocenter.nordicsemi.com/pdf/nRF51_RM_v3.0.pdf
+ * Product Spec: http://infocenter.nordicsemi.com/pdf/nRF51822_PS_v3.1.pdf
+ *
+ * Copyright 2018 Steffen Görtz <contrib@steffen-goertz.de>
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef NRF51_H
+#define NRF51_H
+
+#define NRF51_FLASH_BASE      0x00000000
+#define NRF51_FICR_BASE       0x10000000
+#define NRF51_FICR_SIZE       0x00000100
+#define NRF51_UICR_BASE       0x10001000
+#define NRF51_SRAM_BASE       0x20000000
+
+#define NRF51_IOMEM_BASE      0x40000000
+#define NRF51_IOMEM_SIZE      0x20000000
+
+#define NRF51_PERIPHERAL_SIZE 0x00001000
+#define NRF51_UART_BASE       0x40002000
+#define NRF51_TWI_BASE        0x40003000
+#define NRF51_TIMER_BASE      0x40008000
+#define NRF51_RNG_BASE        0x4000D000
+#define NRF51_NVMC_BASE       0x4001E000
+#define NRF51_GPIO_BASE       0x50000000
+
+#define NRF51_PRIVATE_BASE    0xF0000000
+#define NRF51_PRIVATE_SIZE    0x10000000
+
+#define NRF51_PAGE_SIZE       1024
+
+/* Trigger */
+#define NRF51_TRIGGER_TASK 0x01
+
+/* Events */
+#define NRF51_EVENT_CLEAR  0x00
+
+#endif
diff --git a/include/hw/arm/nrf51_soc.h b/include/hw/arm/nrf51_soc.h
new file mode 100644 (file)
index 0000000..e52a56e
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Nordic Semiconductor nRF51  SoC
+ *
+ * Copyright 2018 Joel Stanley <joel@jms.id.au>
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef NRF51_SOC_H
+#define NRF51_SOC_H
+
+#include "hw/sysbus.h"
+#include "hw/arm/armv7m.h"
+#include "hw/char/nrf51_uart.h"
+#include "hw/misc/nrf51_rng.h"
+#include "hw/gpio/nrf51_gpio.h"
+#include "hw/nvram/nrf51_nvm.h"
+#include "hw/timer/nrf51_timer.h"
+#include "hw/clock.h"
+#include "qom/object.h"
+
+#define TYPE_NRF51_SOC "nrf51-soc"
+OBJECT_DECLARE_SIMPLE_TYPE(NRF51State, NRF51_SOC)
+
+#define NRF51_NUM_TIMERS 3
+
+struct NRF51State {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    ARMv7MState cpu;
+
+    NRF51UARTState uart;
+    NRF51RNGState rng;
+    NRF51NVMState nvm;
+    NRF51GPIOState gpio;
+    NRF51TimerState timer[NRF51_NUM_TIMERS];
+
+    MemoryRegion iomem;
+    MemoryRegion sram;
+    MemoryRegion flash;
+    MemoryRegion clock;
+    MemoryRegion twi;
+
+    uint32_t sram_size;
+    uint32_t flash_size;
+
+    MemoryRegion *board_memory;
+
+    MemoryRegion container;
+
+    Clock *sysclk;
+};
+
+#endif
diff --git a/include/hw/arm/omap.h b/include/hw/arm/omap.h
new file mode 100644 (file)
index 0000000..067e941
--- /dev/null
@@ -0,0 +1,1040 @@
+/*
+ * Texas Instruments OMAP processors.
+ *
+ * Copyright (C) 2006-2008 Andrzej Zaborowski  <balrog@zabor.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_ARM_OMAP_H
+#define HW_ARM_OMAP_H
+
+#include "exec/memory.h"
+#include "hw/input/tsc2xxx.h"
+#include "target/arm/cpu-qom.h"
+#include "qemu/log.h"
+#include "qom/object.h"
+
+# define OMAP_EMIFS_BASE       0x00000000
+# define OMAP2_Q0_BASE         0x00000000
+# define OMAP_CS0_BASE         0x00000000
+# define OMAP_CS1_BASE         0x04000000
+# define OMAP_CS2_BASE         0x08000000
+# define OMAP_CS3_BASE         0x0c000000
+# define OMAP_EMIFF_BASE       0x10000000
+# define OMAP_IMIF_BASE                0x20000000
+# define OMAP_LOCALBUS_BASE    0x30000000
+# define OMAP2_Q1_BASE         0x40000000
+# define OMAP2_L4_BASE         0x48000000
+# define OMAP2_SRAM_BASE       0x40200000
+# define OMAP2_L3_BASE         0x68000000
+# define OMAP2_Q2_BASE         0x80000000
+# define OMAP2_Q3_BASE         0xc0000000
+# define OMAP_MPUI_BASE                0xe1000000
+
+# define OMAP730_SRAM_SIZE     0x00032000
+# define OMAP15XX_SRAM_SIZE    0x00030000
+# define OMAP16XX_SRAM_SIZE    0x00004000
+# define OMAP1611_SRAM_SIZE    0x0003e800
+# define OMAP242X_SRAM_SIZE    0x000a0000
+# define OMAP243X_SRAM_SIZE    0x00010000
+# define OMAP_CS0_SIZE         0x04000000
+# define OMAP_CS1_SIZE         0x04000000
+# define OMAP_CS2_SIZE         0x04000000
+# define OMAP_CS3_SIZE         0x04000000
+
+/* omap_clk.c */
+struct omap_mpu_state_s;
+typedef struct clk *omap_clk;
+omap_clk omap_findclk(struct omap_mpu_state_s *mpu, const char *name);
+void omap_clk_init(struct omap_mpu_state_s *mpu);
+void omap_clk_adduser(struct clk *clk, qemu_irq user);
+void omap_clk_get(omap_clk clk);
+void omap_clk_put(omap_clk clk);
+void omap_clk_onoff(omap_clk clk, int on);
+void omap_clk_canidle(omap_clk clk, int can);
+void omap_clk_setrate(omap_clk clk, int divide, int multiply);
+int64_t omap_clk_getrate(omap_clk clk);
+void omap_clk_reparent(omap_clk clk, omap_clk parent);
+
+/* omap_intc.c */
+#define TYPE_OMAP_INTC "common-omap-intc"
+typedef struct OMAPIntcState OMAPIntcState;
+DECLARE_INSTANCE_CHECKER(OMAPIntcState, OMAP_INTC, TYPE_OMAP_INTC)
+
+
+/*
+ * TODO: Ideally we should have a clock framework that
+ * let us wire these clocks up with QOM properties or links.
+ *
+ * qdev should support a generic means of defining a 'port' with
+ * an arbitrary interface for connecting two devices. Then we
+ * could reframe the omap clock API in terms of clock ports,
+ * and get some type safety. For now the best qdev provides is
+ * passing an arbitrary pointer.
+ * (It's not possible to pass in the string which is the clock
+ * name, because this device does not have the necessary information
+ * (ie the struct omap_mpu_state_s*) to do the clockname to pointer
+ * translation.)
+ */
+void omap_intc_set_iclk(OMAPIntcState *intc, omap_clk clk);
+void omap_intc_set_fclk(OMAPIntcState *intc, omap_clk clk);
+
+/* omap_i2c.c */
+#define TYPE_OMAP_I2C "omap_i2c"
+OBJECT_DECLARE_SIMPLE_TYPE(OMAPI2CState, OMAP_I2C)
+
+
+/* TODO: clock framework (see above) */
+void omap_i2c_set_iclk(OMAPI2CState *i2c, omap_clk clk);
+void omap_i2c_set_fclk(OMAPI2CState *i2c, omap_clk clk);
+
+/* omap_gpio.c */
+#define TYPE_OMAP1_GPIO "omap-gpio"
+typedef struct Omap1GpioState Omap1GpioState;
+DECLARE_INSTANCE_CHECKER(Omap1GpioState, OMAP1_GPIO,
+                         TYPE_OMAP1_GPIO)
+
+#define TYPE_OMAP2_GPIO "omap2-gpio"
+typedef struct Omap2GpioState Omap2GpioState;
+DECLARE_INSTANCE_CHECKER(Omap2GpioState, OMAP2_GPIO,
+                         TYPE_OMAP2_GPIO)
+
+/* TODO: clock framework (see above) */
+void omap_gpio_set_clk(Omap1GpioState *gpio, omap_clk clk);
+
+void omap2_gpio_set_iclk(Omap2GpioState *gpio, omap_clk clk);
+void omap2_gpio_set_fclk(Omap2GpioState *gpio, uint8_t i, omap_clk clk);
+
+/* OMAP2 l4 Interconnect */
+struct omap_l4_s;
+struct omap_l4_region_s {
+    hwaddr offset;
+    size_t size;
+    int access;
+};
+struct omap_l4_agent_info_s {
+    int ta;
+    int region;
+    int regions;
+    int ta_region;
+};
+struct omap_target_agent_s {
+    MemoryRegion iomem;
+    struct omap_l4_s *bus;
+    int regions;
+    const struct omap_l4_region_s *start;
+    hwaddr base;
+    uint32_t component;
+    uint32_t control;
+    uint32_t status;
+};
+struct omap_l4_s *omap_l4_init(MemoryRegion *address_space,
+                               hwaddr base, int ta_num);
+
+struct omap_target_agent_s;
+struct omap_target_agent_s *omap_l4ta_get(
+    struct omap_l4_s *bus,
+    const struct omap_l4_region_s *regions,
+    const struct omap_l4_agent_info_s *agents,
+    int cs);
+hwaddr omap_l4_attach(struct omap_target_agent_s *ta,
+                                         int region, MemoryRegion *mr);
+hwaddr omap_l4_region_base(struct omap_target_agent_s *ta,
+                                       int region);
+hwaddr omap_l4_region_size(struct omap_target_agent_s *ta,
+                                       int region);
+
+/* OMAP2 SDRAM controller */
+struct omap_sdrc_s;
+struct omap_sdrc_s *omap_sdrc_init(MemoryRegion *sysmem,
+                                   hwaddr base);
+void omap_sdrc_reset(struct omap_sdrc_s *s);
+
+/* OMAP2 general purpose memory controller */
+struct omap_gpmc_s;
+struct omap_gpmc_s *omap_gpmc_init(struct omap_mpu_state_s *mpu,
+                                   hwaddr base,
+                                   qemu_irq irq, qemu_irq drq);
+void omap_gpmc_reset(struct omap_gpmc_s *s);
+void omap_gpmc_attach(struct omap_gpmc_s *s, int cs, MemoryRegion *iomem);
+void omap_gpmc_attach_nand(struct omap_gpmc_s *s, int cs, DeviceState *nand);
+
+/*
+ * Common IRQ numbers for level 1 interrupt handler
+ * See /usr/include/asm-arm/arch-omap/irqs.h in Linux.
+ */
+# define OMAP_INT_CAMERA               1
+# define OMAP_INT_FIQ                  3
+# define OMAP_INT_RTDX                 6
+# define OMAP_INT_DSP_MMU_ABORT                7
+# define OMAP_INT_HOST                 8
+# define OMAP_INT_ABORT                        9
+# define OMAP_INT_BRIDGE_PRIV          13
+# define OMAP_INT_GPIO_BANK1           14
+# define OMAP_INT_UART3                        15
+# define OMAP_INT_TIMER3               16
+# define OMAP_INT_DMA_CH0_6            19
+# define OMAP_INT_DMA_CH1_7            20
+# define OMAP_INT_DMA_CH2_8            21
+# define OMAP_INT_DMA_CH3              22
+# define OMAP_INT_DMA_CH4              23
+# define OMAP_INT_DMA_CH5              24
+# define OMAP_INT_DMA_LCD              25
+# define OMAP_INT_TIMER1               26
+# define OMAP_INT_WD_TIMER             27
+# define OMAP_INT_BRIDGE_PUB           28
+# define OMAP_INT_TIMER2               30
+# define OMAP_INT_LCD_CTRL             31
+
+/*
+ * Common OMAP-15xx IRQ numbers for level 1 interrupt handler
+ */
+# define OMAP_INT_15XX_IH2_IRQ         0
+# define OMAP_INT_15XX_LB_MMU          17
+# define OMAP_INT_15XX_LOCAL_BUS       29
+
+/*
+ * OMAP-1510 specific IRQ numbers for level 1 interrupt handler
+ */
+# define OMAP_INT_1510_SPI_TX          4
+# define OMAP_INT_1510_SPI_RX          5
+# define OMAP_INT_1510_DSP_MAILBOX1    10
+# define OMAP_INT_1510_DSP_MAILBOX2    11
+
+/*
+ * OMAP-310 specific IRQ numbers for level 1 interrupt handler
+ */
+# define OMAP_INT_310_McBSP2_TX                4
+# define OMAP_INT_310_McBSP2_RX                5
+# define OMAP_INT_310_HSB_MAILBOX1     12
+# define OMAP_INT_310_HSAB_MMU         18
+
+/*
+ * OMAP-1610 specific IRQ numbers for level 1 interrupt handler
+ */
+# define OMAP_INT_1610_IH2_IRQ         0
+# define OMAP_INT_1610_IH2_FIQ         2
+# define OMAP_INT_1610_McBSP2_TX       4
+# define OMAP_INT_1610_McBSP2_RX       5
+# define OMAP_INT_1610_DSP_MAILBOX1    10
+# define OMAP_INT_1610_DSP_MAILBOX2    11
+# define OMAP_INT_1610_LCD_LINE                12
+# define OMAP_INT_1610_GPTIMER1                17
+# define OMAP_INT_1610_GPTIMER2                18
+# define OMAP_INT_1610_SSR_FIFO_0      29
+
+/*
+ * OMAP-730 specific IRQ numbers for level 1 interrupt handler
+ */
+# define OMAP_INT_730_IH2_FIQ          0
+# define OMAP_INT_730_IH2_IRQ          1
+# define OMAP_INT_730_USB_NON_ISO      2
+# define OMAP_INT_730_USB_ISO          3
+# define OMAP_INT_730_ICR              4
+# define OMAP_INT_730_EAC              5
+# define OMAP_INT_730_GPIO_BANK1       6
+# define OMAP_INT_730_GPIO_BANK2       7
+# define OMAP_INT_730_GPIO_BANK3       8
+# define OMAP_INT_730_McBSP2TX         10
+# define OMAP_INT_730_McBSP2RX         11
+# define OMAP_INT_730_McBSP2RX_OVF     12
+# define OMAP_INT_730_LCD_LINE         14
+# define OMAP_INT_730_GSM_PROTECT      15
+# define OMAP_INT_730_TIMER3           16
+# define OMAP_INT_730_GPIO_BANK5       17
+# define OMAP_INT_730_GPIO_BANK6       18
+# define OMAP_INT_730_SPGIO_WR         29
+
+/*
+ * Common IRQ numbers for level 2 interrupt handler
+ */
+# define OMAP_INT_KEYBOARD             1
+# define OMAP_INT_uWireTX              2
+# define OMAP_INT_uWireRX              3
+# define OMAP_INT_I2C                  4
+# define OMAP_INT_MPUIO                        5
+# define OMAP_INT_USB_HHC_1            6
+# define OMAP_INT_McBSP3TX             10
+# define OMAP_INT_McBSP3RX             11
+# define OMAP_INT_McBSP1TX             12
+# define OMAP_INT_McBSP1RX             13
+# define OMAP_INT_UART1                        14
+# define OMAP_INT_UART2                        15
+# define OMAP_INT_USB_W2FC             20
+# define OMAP_INT_1WIRE                        21
+# define OMAP_INT_OS_TIMER             22
+# define OMAP_INT_OQN                  23
+# define OMAP_INT_GAUGE_32K            24
+# define OMAP_INT_RTC_TIMER            25
+# define OMAP_INT_RTC_ALARM            26
+# define OMAP_INT_DSP_MMU              28
+
+/*
+ * OMAP-1510 specific IRQ numbers for level 2 interrupt handler
+ */
+# define OMAP_INT_1510_BT_MCSI1TX      16
+# define OMAP_INT_1510_BT_MCSI1RX      17
+# define OMAP_INT_1510_SoSSI_MATCH     19
+# define OMAP_INT_1510_MEM_STICK       27
+# define OMAP_INT_1510_COM_SPI_RO      31
+
+/*
+ * OMAP-310 specific IRQ numbers for level 2 interrupt handler
+ */
+# define OMAP_INT_310_FAC              0
+# define OMAP_INT_310_USB_HHC_2                7
+# define OMAP_INT_310_MCSI1_FE         16
+# define OMAP_INT_310_MCSI2_FE         17
+# define OMAP_INT_310_USB_W2FC_ISO     29
+# define OMAP_INT_310_USB_W2FC_NON_ISO 30
+# define OMAP_INT_310_McBSP2RX_OF      31
+
+/*
+ * OMAP-1610 specific IRQ numbers for level 2 interrupt handler
+ */
+# define OMAP_INT_1610_FAC             0
+# define OMAP_INT_1610_USB_HHC_2       7
+# define OMAP_INT_1610_USB_OTG         8
+# define OMAP_INT_1610_SoSSI           9
+# define OMAP_INT_1610_BT_MCSI1TX      16
+# define OMAP_INT_1610_BT_MCSI1RX      17
+# define OMAP_INT_1610_SoSSI_MATCH     19
+# define OMAP_INT_1610_MEM_STICK       27
+# define OMAP_INT_1610_McBSP2RX_OF     31
+# define OMAP_INT_1610_STI             32
+# define OMAP_INT_1610_STI_WAKEUP      33
+# define OMAP_INT_1610_GPTIMER3                34
+# define OMAP_INT_1610_GPTIMER4                35
+# define OMAP_INT_1610_GPTIMER5                36
+# define OMAP_INT_1610_GPTIMER6                37
+# define OMAP_INT_1610_GPTIMER7                38
+# define OMAP_INT_1610_GPTIMER8                39
+# define OMAP_INT_1610_GPIO_BANK2      40
+# define OMAP_INT_1610_GPIO_BANK3      41
+# define OMAP_INT_1610_MMC2            42
+# define OMAP_INT_1610_CF              43
+# define OMAP_INT_1610_WAKE_UP_REQ     46
+# define OMAP_INT_1610_GPIO_BANK4      48
+# define OMAP_INT_1610_SPI             49
+# define OMAP_INT_1610_DMA_CH6         53
+# define OMAP_INT_1610_DMA_CH7         54
+# define OMAP_INT_1610_DMA_CH8         55
+# define OMAP_INT_1610_DMA_CH9         56
+# define OMAP_INT_1610_DMA_CH10                57
+# define OMAP_INT_1610_DMA_CH11                58
+# define OMAP_INT_1610_DMA_CH12                59
+# define OMAP_INT_1610_DMA_CH13                60
+# define OMAP_INT_1610_DMA_CH14                61
+# define OMAP_INT_1610_DMA_CH15                62
+# define OMAP_INT_1610_NAND            63
+
+/*
+ * OMAP-730 specific IRQ numbers for level 2 interrupt handler
+ */
+# define OMAP_INT_730_HW_ERRORS                0
+# define OMAP_INT_730_NFIQ_PWR_FAIL    1
+# define OMAP_INT_730_CFCD             2
+# define OMAP_INT_730_CFIREQ           3
+# define OMAP_INT_730_I2C              4
+# define OMAP_INT_730_PCC              5
+# define OMAP_INT_730_MPU_EXT_NIRQ     6
+# define OMAP_INT_730_SPI_100K_1       7
+# define OMAP_INT_730_SYREN_SPI                8
+# define OMAP_INT_730_VLYNQ            9
+# define OMAP_INT_730_GPIO_BANK4       10
+# define OMAP_INT_730_McBSP1TX         11
+# define OMAP_INT_730_McBSP1RX         12
+# define OMAP_INT_730_McBSP1RX_OF      13
+# define OMAP_INT_730_UART_MODEM_IRDA_2        14
+# define OMAP_INT_730_UART_MODEM_1     15
+# define OMAP_INT_730_MCSI             16
+# define OMAP_INT_730_uWireTX          17
+# define OMAP_INT_730_uWireRX          18
+# define OMAP_INT_730_SMC_CD           19
+# define OMAP_INT_730_SMC_IREQ         20
+# define OMAP_INT_730_HDQ_1WIRE                21
+# define OMAP_INT_730_TIMER32K         22
+# define OMAP_INT_730_MMC_SDIO         23
+# define OMAP_INT_730_UPLD             24
+# define OMAP_INT_730_USB_HHC_1                27
+# define OMAP_INT_730_USB_HHC_2                28
+# define OMAP_INT_730_USB_GENI         29
+# define OMAP_INT_730_USB_OTG          30
+# define OMAP_INT_730_CAMERA_IF                31
+# define OMAP_INT_730_RNG              32
+# define OMAP_INT_730_DUAL_MODE_TIMER  33
+# define OMAP_INT_730_DBB_RF_EN                34
+# define OMAP_INT_730_MPUIO_KEYPAD     35
+# define OMAP_INT_730_SHA1_MD5         36
+# define OMAP_INT_730_SPI_100K_2       37
+# define OMAP_INT_730_RNG_IDLE         38
+# define OMAP_INT_730_MPUIO            39
+# define OMAP_INT_730_LLPC_LCD_CTRL_OFF        40
+# define OMAP_INT_730_LLPC_OE_FALLING  41
+# define OMAP_INT_730_LLPC_OE_RISING   42
+# define OMAP_INT_730_LLPC_VSYNC       43
+# define OMAP_INT_730_WAKE_UP_REQ      46
+# define OMAP_INT_730_DMA_CH6          53
+# define OMAP_INT_730_DMA_CH7          54
+# define OMAP_INT_730_DMA_CH8          55
+# define OMAP_INT_730_DMA_CH9          56
+# define OMAP_INT_730_DMA_CH10         57
+# define OMAP_INT_730_DMA_CH11         58
+# define OMAP_INT_730_DMA_CH12         59
+# define OMAP_INT_730_DMA_CH13         60
+# define OMAP_INT_730_DMA_CH14         61
+# define OMAP_INT_730_DMA_CH15         62
+# define OMAP_INT_730_NAND             63
+
+/*
+ * OMAP-24xx common IRQ numbers
+ */
+# define OMAP_INT_24XX_STI             4
+# define OMAP_INT_24XX_SYS_NIRQ                7
+# define OMAP_INT_24XX_L3_IRQ          10
+# define OMAP_INT_24XX_PRCM_MPU_IRQ    11
+# define OMAP_INT_24XX_SDMA_IRQ0       12
+# define OMAP_INT_24XX_SDMA_IRQ1       13
+# define OMAP_INT_24XX_SDMA_IRQ2       14
+# define OMAP_INT_24XX_SDMA_IRQ3       15
+# define OMAP_INT_243X_MCBSP2_IRQ      16
+# define OMAP_INT_243X_MCBSP3_IRQ      17
+# define OMAP_INT_243X_MCBSP4_IRQ      18
+# define OMAP_INT_243X_MCBSP5_IRQ      19
+# define OMAP_INT_24XX_GPMC_IRQ                20
+# define OMAP_INT_24XX_GUFFAW_IRQ      21
+# define OMAP_INT_24XX_IVA_IRQ         22
+# define OMAP_INT_24XX_EAC_IRQ         23
+# define OMAP_INT_24XX_CAM_IRQ         24
+# define OMAP_INT_24XX_DSS_IRQ         25
+# define OMAP_INT_24XX_MAIL_U0_MPU     26
+# define OMAP_INT_24XX_DSP_UMA         27
+# define OMAP_INT_24XX_DSP_MMU         28
+# define OMAP_INT_24XX_GPIO_BANK1      29
+# define OMAP_INT_24XX_GPIO_BANK2      30
+# define OMAP_INT_24XX_GPIO_BANK3      31
+# define OMAP_INT_24XX_GPIO_BANK4      32
+# define OMAP_INT_243X_GPIO_BANK5      33
+# define OMAP_INT_24XX_MAIL_U3_MPU     34
+# define OMAP_INT_24XX_WDT3            35
+# define OMAP_INT_24XX_WDT4            36
+# define OMAP_INT_24XX_GPTIMER1                37
+# define OMAP_INT_24XX_GPTIMER2                38
+# define OMAP_INT_24XX_GPTIMER3                39
+# define OMAP_INT_24XX_GPTIMER4                40
+# define OMAP_INT_24XX_GPTIMER5                41
+# define OMAP_INT_24XX_GPTIMER6                42
+# define OMAP_INT_24XX_GPTIMER7                43
+# define OMAP_INT_24XX_GPTIMER8                44
+# define OMAP_INT_24XX_GPTIMER9                45
+# define OMAP_INT_24XX_GPTIMER10       46
+# define OMAP_INT_24XX_GPTIMER11       47
+# define OMAP_INT_24XX_GPTIMER12       48
+# define OMAP_INT_24XX_PKA_IRQ         50
+# define OMAP_INT_24XX_SHA1MD5_IRQ     51
+# define OMAP_INT_24XX_RNG_IRQ         52
+# define OMAP_INT_24XX_MG_IRQ          53
+# define OMAP_INT_24XX_I2C1_IRQ                56
+# define OMAP_INT_24XX_I2C2_IRQ                57
+# define OMAP_INT_24XX_MCBSP1_IRQ_TX   59
+# define OMAP_INT_24XX_MCBSP1_IRQ_RX   60
+# define OMAP_INT_24XX_MCBSP2_IRQ_TX   62
+# define OMAP_INT_24XX_MCBSP2_IRQ_RX   63
+# define OMAP_INT_243X_MCBSP1_IRQ      64
+# define OMAP_INT_24XX_MCSPI1_IRQ      65
+# define OMAP_INT_24XX_MCSPI2_IRQ      66
+# define OMAP_INT_24XX_SSI1_IRQ0       67
+# define OMAP_INT_24XX_SSI1_IRQ1       68
+# define OMAP_INT_24XX_SSI2_IRQ0       69
+# define OMAP_INT_24XX_SSI2_IRQ1       70
+# define OMAP_INT_24XX_SSI_GDD_IRQ     71
+# define OMAP_INT_24XX_UART1_IRQ       72
+# define OMAP_INT_24XX_UART2_IRQ       73
+# define OMAP_INT_24XX_UART3_IRQ       74
+# define OMAP_INT_24XX_USB_IRQ_GEN     75
+# define OMAP_INT_24XX_USB_IRQ_NISO    76
+# define OMAP_INT_24XX_USB_IRQ_ISO     77
+# define OMAP_INT_24XX_USB_IRQ_HGEN    78
+# define OMAP_INT_24XX_USB_IRQ_HSOF    79
+# define OMAP_INT_24XX_USB_IRQ_OTG     80
+# define OMAP_INT_24XX_VLYNQ_IRQ       81
+# define OMAP_INT_24XX_MMC_IRQ         83
+# define OMAP_INT_24XX_MS_IRQ          84
+# define OMAP_INT_24XX_FAC_IRQ         85
+# define OMAP_INT_24XX_MCSPI3_IRQ      91
+# define OMAP_INT_243X_HS_USB_MC       92
+# define OMAP_INT_243X_HS_USB_DMA      93
+# define OMAP_INT_243X_CARKIT          94
+# define OMAP_INT_34XX_GPTIMER12       95
+
+/* omap_dma.c */
+enum omap_dma_model {
+    omap_dma_3_0,
+    omap_dma_3_1,
+    omap_dma_3_2,
+    omap_dma_4,
+};
+
+struct soc_dma_s;
+struct soc_dma_s *omap_dma_init(hwaddr base, qemu_irq *irqs,
+                MemoryRegion *sysmem,
+                qemu_irq lcd_irq, struct omap_mpu_state_s *mpu, omap_clk clk,
+                enum omap_dma_model model);
+struct soc_dma_s *omap_dma4_init(hwaddr base, qemu_irq *irqs,
+                MemoryRegion *sysmem,
+                struct omap_mpu_state_s *mpu, int fifo,
+                int chans, omap_clk iclk, omap_clk fclk);
+void omap_dma_reset(struct soc_dma_s *s);
+
+struct dma_irq_map {
+    int ih;
+    int intr;
+};
+
+/* Only used in OMAP DMA 3.x gigacells */
+enum omap_dma_port {
+    emiff = 0,
+    emifs,
+    imif,      /* omap16xx: ocp_t1 */
+    tipb,
+    local,     /* omap16xx: ocp_t2 */
+    tipb_mpui,
+    __omap_dma_port_last,
+};
+
+typedef enum {
+    constant = 0,
+    post_incremented,
+    single_index,
+    double_index,
+} omap_dma_addressing_t;
+
+/* Only used in OMAP DMA 3.x gigacells */
+struct omap_dma_lcd_channel_s {
+    enum omap_dma_port src;
+    hwaddr src_f1_top;
+    hwaddr src_f1_bottom;
+    hwaddr src_f2_top;
+    hwaddr src_f2_bottom;
+
+    /* Used in OMAP DMA 3.2 gigacell */
+    unsigned char brust_f1;
+    unsigned char pack_f1;
+    unsigned char data_type_f1;
+    unsigned char brust_f2;
+    unsigned char pack_f2;
+    unsigned char data_type_f2;
+    unsigned char end_prog;
+    unsigned char repeat;
+    unsigned char auto_init;
+    unsigned char priority;
+    unsigned char fs;
+    unsigned char running;
+    unsigned char bs;
+    unsigned char omap_3_1_compatible_disable;
+    unsigned char dst;
+    unsigned char lch_type;
+    int16_t element_index_f1;
+    int16_t element_index_f2;
+    int32_t frame_index_f1;
+    int32_t frame_index_f2;
+    uint16_t elements_f1;
+    uint16_t frames_f1;
+    uint16_t elements_f2;
+    uint16_t frames_f2;
+    omap_dma_addressing_t mode_f1;
+    omap_dma_addressing_t mode_f2;
+
+    /* Destination port is fixed.  */
+    int interrupts;
+    int condition;
+    int dual;
+
+    int current_frame;
+    hwaddr phys_framebuffer[2];
+    qemu_irq irq;
+    struct omap_mpu_state_s *mpu;
+} *omap_dma_get_lcdch(struct soc_dma_s *s);
+
+/*
+ * DMA request numbers for OMAP1
+ * See /usr/include/asm-arm/arch-omap/dma.h in Linux.
+ */
+# define OMAP_DMA_NO_DEVICE            0
+# define OMAP_DMA_MCSI1_TX             1
+# define OMAP_DMA_MCSI1_RX             2
+# define OMAP_DMA_I2C_RX               3
+# define OMAP_DMA_I2C_TX               4
+# define OMAP_DMA_EXT_NDMA_REQ0                5
+# define OMAP_DMA_EXT_NDMA_REQ1                6
+# define OMAP_DMA_UWIRE_TX             7
+# define OMAP_DMA_MCBSP1_TX            8
+# define OMAP_DMA_MCBSP1_RX            9
+# define OMAP_DMA_MCBSP3_TX            10
+# define OMAP_DMA_MCBSP3_RX            11
+# define OMAP_DMA_UART1_TX             12
+# define OMAP_DMA_UART1_RX             13
+# define OMAP_DMA_UART2_TX             14
+# define OMAP_DMA_UART2_RX             15
+# define OMAP_DMA_MCBSP2_TX            16
+# define OMAP_DMA_MCBSP2_RX            17
+# define OMAP_DMA_UART3_TX             18
+# define OMAP_DMA_UART3_RX             19
+# define OMAP_DMA_CAMERA_IF_RX         20
+# define OMAP_DMA_MMC_TX               21
+# define OMAP_DMA_MMC_RX               22
+# define OMAP_DMA_NAND                 23      /* Not in OMAP310 */
+# define OMAP_DMA_IRQ_LCD_LINE         24      /* Not in OMAP310 */
+# define OMAP_DMA_MEMORY_STICK         25      /* Not in OMAP310 */
+# define OMAP_DMA_USB_W2FC_RX0         26
+# define OMAP_DMA_USB_W2FC_RX1         27
+# define OMAP_DMA_USB_W2FC_RX2         28
+# define OMAP_DMA_USB_W2FC_TX0         29
+# define OMAP_DMA_USB_W2FC_TX1         30
+# define OMAP_DMA_USB_W2FC_TX2         31
+
+/* These are only for 1610 */
+# define OMAP_DMA_CRYPTO_DES_IN                32
+# define OMAP_DMA_SPI_TX               33
+# define OMAP_DMA_SPI_RX               34
+# define OMAP_DMA_CRYPTO_HASH          35
+# define OMAP_DMA_CCP_ATTN             36
+# define OMAP_DMA_CCP_FIFO_NOT_EMPTY   37
+# define OMAP_DMA_CMT_APE_TX_CHAN_0    38
+# define OMAP_DMA_CMT_APE_RV_CHAN_0    39
+# define OMAP_DMA_CMT_APE_TX_CHAN_1    40
+# define OMAP_DMA_CMT_APE_RV_CHAN_1    41
+# define OMAP_DMA_CMT_APE_TX_CHAN_2    42
+# define OMAP_DMA_CMT_APE_RV_CHAN_2    43
+# define OMAP_DMA_CMT_APE_TX_CHAN_3    44
+# define OMAP_DMA_CMT_APE_RV_CHAN_3    45
+# define OMAP_DMA_CMT_APE_TX_CHAN_4    46
+# define OMAP_DMA_CMT_APE_RV_CHAN_4    47
+# define OMAP_DMA_CMT_APE_TX_CHAN_5    48
+# define OMAP_DMA_CMT_APE_RV_CHAN_5    49
+# define OMAP_DMA_CMT_APE_TX_CHAN_6    50
+# define OMAP_DMA_CMT_APE_RV_CHAN_6    51
+# define OMAP_DMA_CMT_APE_TX_CHAN_7    52
+# define OMAP_DMA_CMT_APE_RV_CHAN_7    53
+# define OMAP_DMA_MMC2_TX              54
+# define OMAP_DMA_MMC2_RX              55
+# define OMAP_DMA_CRYPTO_DES_OUT       56
+
+/*
+ * DMA request numbers for the OMAP2
+ */
+# define OMAP24XX_DMA_NO_DEVICE                0
+# define OMAP24XX_DMA_XTI_DMA          1       /* Not in OMAP2420 */
+# define OMAP24XX_DMA_EXT_DMAREQ0      2
+# define OMAP24XX_DMA_EXT_DMAREQ1      3
+# define OMAP24XX_DMA_GPMC             4
+# define OMAP24XX_DMA_GFX              5       /* Not in OMAP2420 */
+# define OMAP24XX_DMA_DSS              6
+# define OMAP24XX_DMA_VLYNQ_TX         7       /* Not in OMAP2420 */
+# define OMAP24XX_DMA_CWT              8       /* Not in OMAP2420 */
+# define OMAP24XX_DMA_AES_TX           9       /* Not in OMAP2420 */
+# define OMAP24XX_DMA_AES_RX           10      /* Not in OMAP2420 */
+# define OMAP24XX_DMA_DES_TX           11      /* Not in OMAP2420 */
+# define OMAP24XX_DMA_DES_RX           12      /* Not in OMAP2420 */
+# define OMAP24XX_DMA_SHA1MD5_RX       13      /* Not in OMAP2420 */
+# define OMAP24XX_DMA_EXT_DMAREQ2      14
+# define OMAP24XX_DMA_EXT_DMAREQ3      15
+# define OMAP24XX_DMA_EXT_DMAREQ4      16
+# define OMAP24XX_DMA_EAC_AC_RD                17
+# define OMAP24XX_DMA_EAC_AC_WR                18
+# define OMAP24XX_DMA_EAC_MD_UL_RD     19
+# define OMAP24XX_DMA_EAC_MD_UL_WR     20
+# define OMAP24XX_DMA_EAC_MD_DL_RD     21
+# define OMAP24XX_DMA_EAC_MD_DL_WR     22
+# define OMAP24XX_DMA_EAC_BT_UL_RD     23
+# define OMAP24XX_DMA_EAC_BT_UL_WR     24
+# define OMAP24XX_DMA_EAC_BT_DL_RD     25
+# define OMAP24XX_DMA_EAC_BT_DL_WR     26
+# define OMAP24XX_DMA_I2C1_TX          27
+# define OMAP24XX_DMA_I2C1_RX          28
+# define OMAP24XX_DMA_I2C2_TX          29
+# define OMAP24XX_DMA_I2C2_RX          30
+# define OMAP24XX_DMA_MCBSP1_TX                31
+# define OMAP24XX_DMA_MCBSP1_RX                32
+# define OMAP24XX_DMA_MCBSP2_TX                33
+# define OMAP24XX_DMA_MCBSP2_RX                34
+# define OMAP24XX_DMA_SPI1_TX0         35
+# define OMAP24XX_DMA_SPI1_RX0         36
+# define OMAP24XX_DMA_SPI1_TX1         37
+# define OMAP24XX_DMA_SPI1_RX1         38
+# define OMAP24XX_DMA_SPI1_TX2         39
+# define OMAP24XX_DMA_SPI1_RX2         40
+# define OMAP24XX_DMA_SPI1_TX3         41
+# define OMAP24XX_DMA_SPI1_RX3         42
+# define OMAP24XX_DMA_SPI2_TX0         43
+# define OMAP24XX_DMA_SPI2_RX0         44
+# define OMAP24XX_DMA_SPI2_TX1         45
+# define OMAP24XX_DMA_SPI2_RX1         46
+
+# define OMAP24XX_DMA_UART1_TX         49
+# define OMAP24XX_DMA_UART1_RX         50
+# define OMAP24XX_DMA_UART2_TX         51
+# define OMAP24XX_DMA_UART2_RX         52
+# define OMAP24XX_DMA_UART3_TX         53
+# define OMAP24XX_DMA_UART3_RX         54
+# define OMAP24XX_DMA_USB_W2FC_TX0     55
+# define OMAP24XX_DMA_USB_W2FC_RX0     56
+# define OMAP24XX_DMA_USB_W2FC_TX1     57
+# define OMAP24XX_DMA_USB_W2FC_RX1     58
+# define OMAP24XX_DMA_USB_W2FC_TX2     59
+# define OMAP24XX_DMA_USB_W2FC_RX2     60
+# define OMAP24XX_DMA_MMC1_TX          61
+# define OMAP24XX_DMA_MMC1_RX          62
+# define OMAP24XX_DMA_MS               63      /* Not in OMAP2420 */
+# define OMAP24XX_DMA_EXT_DMAREQ5      64
+
+/* omap[123].c */
+/* OMAP2 gp timer */
+struct omap_gp_timer_s;
+struct omap_gp_timer_s *omap_gp_timer_init(struct omap_target_agent_s *ta,
+                qemu_irq irq, omap_clk fclk, omap_clk iclk);
+void omap_gp_timer_reset(struct omap_gp_timer_s *s);
+
+/* OMAP2 sysctimer */
+struct omap_synctimer_s;
+struct omap_synctimer_s *omap_synctimer_init(struct omap_target_agent_s *ta,
+                struct omap_mpu_state_s *mpu, omap_clk fclk, omap_clk iclk);
+void omap_synctimer_reset(struct omap_synctimer_s *s);
+
+struct omap_uart_s;
+struct omap_uart_s *omap_uart_init(hwaddr base,
+                qemu_irq irq, omap_clk fclk, omap_clk iclk,
+                qemu_irq txdma, qemu_irq rxdma,
+                const char *label, Chardev *chr);
+struct omap_uart_s *omap2_uart_init(MemoryRegion *sysmem,
+                struct omap_target_agent_s *ta,
+                qemu_irq irq, omap_clk fclk, omap_clk iclk,
+                qemu_irq txdma, qemu_irq rxdma,
+                const char *label, Chardev *chr);
+void omap_uart_reset(struct omap_uart_s *s);
+
+struct omap_mpuio_s;
+qemu_irq *omap_mpuio_in_get(struct omap_mpuio_s *s);
+void omap_mpuio_out_set(struct omap_mpuio_s *s, int line, qemu_irq handler);
+void omap_mpuio_key(struct omap_mpuio_s *s, int row, int col, int down);
+
+struct omap_uwire_s;
+void omap_uwire_attach(struct omap_uwire_s *s,
+                uWireSlave *slave, int chipselect);
+
+/* OMAP2 spi */
+struct omap_mcspi_s;
+struct omap_mcspi_s *omap_mcspi_init(struct omap_target_agent_s *ta, int chnum,
+                qemu_irq irq, qemu_irq *drq, omap_clk fclk, omap_clk iclk);
+void omap_mcspi_attach(struct omap_mcspi_s *s,
+                uint32_t (*txrx)(void *opaque, uint32_t, int), void *opaque,
+                int chipselect);
+void omap_mcspi_reset(struct omap_mcspi_s *s);
+
+struct I2SCodec {
+    void *opaque;
+
+    /* The CPU can call this if it is generating the clock signal on the
+     * i2s port.  The CODEC can ignore it if it is set up as a clock
+     * master and generates its own clock.  */
+    void (*set_rate)(void *opaque, int in, int out);
+
+    void (*tx_swallow)(void *opaque);
+    qemu_irq rx_swallow;
+    qemu_irq tx_start;
+
+    int tx_rate;
+    int cts;
+    int rx_rate;
+    int rts;
+
+    struct i2s_fifo_s {
+        uint8_t *fifo;
+        int len;
+        int start;
+        int size;
+    } in, out;
+};
+struct omap_mcbsp_s;
+void omap_mcbsp_i2s_attach(struct omap_mcbsp_s *s, I2SCodec *slave);
+
+void omap_tap_init(struct omap_target_agent_s *ta,
+                struct omap_mpu_state_s *mpu);
+
+/* omap_lcdc.c */
+struct omap_lcd_panel_s;
+void omap_lcdc_reset(struct omap_lcd_panel_s *s);
+struct omap_lcd_panel_s *omap_lcdc_init(MemoryRegion *sysmem,
+                                        hwaddr base,
+                                        qemu_irq irq,
+                                        struct omap_dma_lcd_channel_s *dma,
+                                        omap_clk clk);
+
+/* omap_dss.c */
+struct rfbi_chip_s {
+    void *opaque;
+    void (*write)(void *opaque, int dc, uint16_t value);
+    void (*block)(void *opaque, int dc, void *buf, size_t len, int pitch);
+    uint16_t (*read)(void *opaque, int dc);
+};
+struct omap_dss_s;
+void omap_dss_reset(struct omap_dss_s *s);
+struct omap_dss_s *omap_dss_init(struct omap_target_agent_s *ta,
+                MemoryRegion *sysmem,
+                hwaddr l3_base,
+                qemu_irq irq, qemu_irq drq,
+                omap_clk fck1, omap_clk fck2, omap_clk ck54m,
+                omap_clk ick1, omap_clk ick2);
+void omap_rfbi_attach(struct omap_dss_s *s, int cs, struct rfbi_chip_s *chip);
+
+/* omap_mmc.c */
+struct omap_mmc_s;
+struct omap_mmc_s *omap_mmc_init(hwaddr base,
+                MemoryRegion *sysmem,
+                BlockBackend *blk,
+                qemu_irq irq, qemu_irq dma[], omap_clk clk);
+struct omap_mmc_s *omap2_mmc_init(struct omap_target_agent_s *ta,
+                BlockBackend *blk, qemu_irq irq, qemu_irq dma[],
+                omap_clk fclk, omap_clk iclk);
+void omap_mmc_reset(struct omap_mmc_s *s);
+void omap_mmc_handlers(struct omap_mmc_s *s, qemu_irq ro, qemu_irq cover);
+void omap_mmc_enable(struct omap_mmc_s *s, int enable);
+
+/* omap_i2c.c */
+I2CBus *omap_i2c_bus(DeviceState *omap_i2c);
+
+# define cpu_is_omap310(cpu)           (cpu->mpu_model == omap310)
+# define cpu_is_omap1510(cpu)          (cpu->mpu_model == omap1510)
+# define cpu_is_omap1610(cpu)          (cpu->mpu_model == omap1610)
+# define cpu_is_omap1710(cpu)          (cpu->mpu_model == omap1710)
+# define cpu_is_omap2410(cpu)          (cpu->mpu_model == omap2410)
+# define cpu_is_omap2420(cpu)          (cpu->mpu_model == omap2420)
+# define cpu_is_omap2430(cpu)          (cpu->mpu_model == omap2430)
+# define cpu_is_omap3430(cpu)          (cpu->mpu_model == omap3430)
+# define cpu_is_omap3630(cpu)           (cpu->mpu_model == omap3630)
+
+# define cpu_is_omap15xx(cpu)          \
+        (cpu_is_omap310(cpu) || cpu_is_omap1510(cpu))
+# define cpu_is_omap16xx(cpu)          \
+        (cpu_is_omap1610(cpu) || cpu_is_omap1710(cpu))
+# define cpu_is_omap24xx(cpu)          \
+        (cpu_is_omap2410(cpu) || cpu_is_omap2420(cpu) || cpu_is_omap2430(cpu))
+
+# define cpu_class_omap1(cpu)          \
+        (cpu_is_omap15xx(cpu) || cpu_is_omap16xx(cpu))
+# define cpu_class_omap2(cpu)          cpu_is_omap24xx(cpu)
+# define cpu_class_omap3(cpu) \
+        (cpu_is_omap3430(cpu) || cpu_is_omap3630(cpu))
+
+struct omap_mpu_state_s {
+    enum omap_mpu_model {
+        omap310,
+        omap1510,
+        omap1610,
+        omap1710,
+        omap2410,
+        omap2420,
+        omap2422,
+        omap2423,
+        omap2430,
+        omap3430,
+        omap3630,
+    } mpu_model;
+
+    ARMCPU *cpu;
+
+    qemu_irq *drq;
+
+    qemu_irq wakeup;
+
+    MemoryRegion ulpd_pm_iomem;
+    MemoryRegion pin_cfg_iomem;
+    MemoryRegion id_iomem;
+    MemoryRegion id_iomem_e18;
+    MemoryRegion id_iomem_ed4;
+    MemoryRegion id_iomem_e20;
+    MemoryRegion mpui_iomem;
+    MemoryRegion tcmi_iomem;
+    MemoryRegion clkm_iomem;
+    MemoryRegion clkdsp_iomem;
+    MemoryRegion mpui_io_iomem;
+    MemoryRegion tap_iomem;
+    MemoryRegion imif_ram;
+    MemoryRegion sram;
+
+    struct omap_dma_port_if_s {
+        uint32_t (*read[3])(struct omap_mpu_state_s *s,
+                        hwaddr offset);
+        void (*write[3])(struct omap_mpu_state_s *s,
+                        hwaddr offset, uint32_t value);
+        int (*addr_valid)(struct omap_mpu_state_s *s,
+                        hwaddr addr);
+    } port[__omap_dma_port_last];
+
+    uint64_t sdram_size;
+    unsigned long sram_size;
+
+    /* MPUI-TIPB peripherals */
+    struct omap_uart_s *uart[3];
+
+    DeviceState *gpio;
+
+    struct omap_mcbsp_s *mcbsp1;
+    struct omap_mcbsp_s *mcbsp3;
+
+    /* MPU public TIPB peripherals */
+    struct omap_32khz_timer_s *os_timer;
+
+    struct omap_mmc_s *mmc;
+
+    struct omap_mpuio_s *mpuio;
+
+    struct omap_uwire_s *microwire;
+
+    struct omap_pwl_s *pwl;
+    struct omap_pwt_s *pwt;
+    DeviceState *i2c[2];
+
+    struct omap_rtc_s *rtc;
+
+    struct omap_mcbsp_s *mcbsp2;
+
+    struct omap_lpg_s *led[2];
+
+    /* MPU private TIPB peripherals */
+    DeviceState *ih[2];
+
+    struct soc_dma_s *dma;
+
+    struct omap_mpu_timer_s *timer[3];
+    struct omap_watchdog_timer_s *wdt;
+
+    struct omap_lcd_panel_s *lcd;
+
+    uint32_t ulpd_pm_regs[21];
+    int64_t ulpd_gauge_start;
+
+    uint32_t func_mux_ctrl[14];
+    uint32_t comp_mode_ctrl[1];
+    uint32_t pull_dwn_ctrl[4];
+    uint32_t gate_inh_ctrl[1];
+    uint32_t voltage_ctrl[1];
+    uint32_t test_dbg_ctrl[1];
+    uint32_t mod_conf_ctrl[1];
+    int compat1509;
+
+    uint32_t mpui_ctrl;
+
+    struct omap_tipb_bridge_s *private_tipb;
+    struct omap_tipb_bridge_s *public_tipb;
+
+    uint32_t tcmi_regs[17];
+
+    struct dpll_ctl_s *dpll[3];
+
+    omap_clk clks;
+    struct {
+        int cold_start;
+        int clocking_scheme;
+        uint16_t arm_ckctl;
+        uint16_t arm_idlect1;
+        uint16_t arm_idlect2;
+        uint16_t arm_ewupct;
+        uint16_t arm_rstct1;
+        uint16_t arm_rstct2;
+        uint16_t arm_ckout1;
+        int dpll1_mode;
+        uint16_t dsp_idlect1;
+        uint16_t dsp_idlect2;
+        uint16_t dsp_rstct2;
+    } clkm;
+
+    /* OMAP2-only peripherals */
+    struct omap_l4_s *l4;
+
+    struct omap_gp_timer_s *gptimer[12];
+    struct omap_synctimer_s *synctimer;
+
+    struct omap_prcm_s *prcm;
+    struct omap_sdrc_s *sdrc;
+    struct omap_gpmc_s *gpmc;
+    struct omap_sysctl_s *sysc;
+
+    struct omap_mcspi_s *mcspi[2];
+
+    struct omap_dss_s *dss;
+
+    struct omap_eac_s *eac;
+};
+
+/* omap1.c */
+struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *sdram,
+                const char *core);
+
+/* omap2.c */
+struct omap_mpu_state_s *omap2420_mpu_init(MemoryRegion *sdram,
+                const char *core);
+
+uint32_t omap_badwidth_read8(void *opaque, hwaddr addr);
+void omap_badwidth_write8(void *opaque, hwaddr addr,
+                uint32_t value);
+uint32_t omap_badwidth_read16(void *opaque, hwaddr addr);
+void omap_badwidth_write16(void *opaque, hwaddr addr,
+                uint32_t value);
+uint32_t omap_badwidth_read32(void *opaque, hwaddr addr);
+void omap_badwidth_write32(void *opaque, hwaddr addr,
+                uint32_t value);
+
+void omap_mpu_wakeup(void *opaque, int irq, int req);
+
+# define OMAP_BAD_REG(paddr)           \
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad register %#08"HWADDR_PRIx"\n", \
+                      __func__, paddr)
+# define OMAP_RO_REG(paddr)            \
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Read-only register %#08" \
+                                       HWADDR_PRIx "\n", \
+                      __func__, paddr)
+
+/* OMAP-specific Linux bootloader tags for the ATAG_BOARD area
+   (Board-specifc tags are not here)  */
+#define OMAP_TAG_CLOCK         0x4f01
+#define OMAP_TAG_MMC           0x4f02
+#define OMAP_TAG_SERIAL_CONSOLE        0x4f03
+#define OMAP_TAG_USB           0x4f04
+#define OMAP_TAG_LCD           0x4f05
+#define OMAP_TAG_GPIO_SWITCH   0x4f06
+#define OMAP_TAG_UART          0x4f07
+#define OMAP_TAG_FBMEM         0x4f08
+#define OMAP_TAG_STI_CONSOLE   0x4f09
+#define OMAP_TAG_CAMERA_SENSOR 0x4f0a
+#define OMAP_TAG_PARTITION     0x4f0b
+#define OMAP_TAG_TEA5761       0x4f10
+#define OMAP_TAG_TMP105                0x4f11
+#define OMAP_TAG_BOOT_REASON   0x4f80
+#define OMAP_TAG_FLASH_PART_STR        0x4f81
+#define OMAP_TAG_VERSION_STR   0x4f82
+
+enum {
+    OMAP_GPIOSW_TYPE_COVER     = 0 << 4,
+    OMAP_GPIOSW_TYPE_CONNECTION        = 1 << 4,
+    OMAP_GPIOSW_TYPE_ACTIVITY  = 2 << 4,
+};
+
+#define OMAP_GPIOSW_INVERTED   0x0001
+#define OMAP_GPIOSW_OUTPUT     0x0002
+
+# define OMAP_MPUI_REG_MASK            0x000007ff
+
+#endif
diff --git a/include/hw/arm/primecell.h b/include/hw/arm/primecell.h
new file mode 100644 (file)
index 0000000..7337c3b
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef PRIMECELL_H
+#define PRIMECELL_H
+
+/* Declarations for ARM PrimeCell based periperals.  */
+/* Also includes some devices that are currently only used by the
+   ARM boards.  */
+
+/* arm_sysctl GPIO lines */
+#define ARM_SYSCTL_GPIO_MMC_WPROT 0
+#define ARM_SYSCTL_GPIO_MMC_CARDIN 1
+
+#endif
diff --git a/include/hw/arm/pxa.h b/include/hw/arm/pxa.h
new file mode 100644 (file)
index 0000000..4c6caee
--- /dev/null
@@ -0,0 +1,197 @@
+/*
+ * Intel XScale PXA255/270 processor support.
+ *
+ * Copyright (c) 2006 Openedhand Ltd.
+ * Written by Andrzej Zaborowski <balrog@zabor.org>
+ *
+ * This code is licensed under the GNU GPL v2.
+ */
+
+#ifndef PXA_H
+#define PXA_H
+
+#include "exec/memory.h"
+#include "target/arm/cpu-qom.h"
+#include "hw/pcmcia.h"
+#include "qom/object.h"
+
+/* Interrupt numbers */
+# define PXA2XX_PIC_SSP3       0
+# define PXA2XX_PIC_USBH2      2
+# define PXA2XX_PIC_USBH1      3
+# define PXA2XX_PIC_KEYPAD     4
+# define PXA2XX_PIC_PWRI2C     6
+# define PXA25X_PIC_HWUART     7
+# define PXA27X_PIC_OST_4_11   7
+# define PXA2XX_PIC_GPIO_0     8
+# define PXA2XX_PIC_GPIO_1     9
+# define PXA2XX_PIC_GPIO_X     10
+# define PXA2XX_PIC_I2S        13
+# define PXA26X_PIC_ASSP       15
+# define PXA25X_PIC_NSSP       16
+# define PXA27X_PIC_SSP2       16
+# define PXA2XX_PIC_LCD                17
+# define PXA2XX_PIC_I2C                18
+# define PXA2XX_PIC_ICP                19
+# define PXA2XX_PIC_STUART     20
+# define PXA2XX_PIC_BTUART     21
+# define PXA2XX_PIC_FFUART     22
+# define PXA2XX_PIC_MMC                23
+# define PXA2XX_PIC_SSP                24
+# define PXA2XX_PIC_DMA                25
+# define PXA2XX_PIC_OST_0      26
+# define PXA2XX_PIC_RTC1HZ     30
+# define PXA2XX_PIC_RTCALARM   31
+
+/* DMA requests */
+# define PXA2XX_RX_RQ_I2S      2
+# define PXA2XX_TX_RQ_I2S      3
+# define PXA2XX_RX_RQ_BTUART   4
+# define PXA2XX_TX_RQ_BTUART   5
+# define PXA2XX_RX_RQ_FFUART   6
+# define PXA2XX_TX_RQ_FFUART   7
+# define PXA2XX_RX_RQ_SSP1     13
+# define PXA2XX_TX_RQ_SSP1     14
+# define PXA2XX_RX_RQ_SSP2     15
+# define PXA2XX_TX_RQ_SSP2     16
+# define PXA2XX_RX_RQ_ICP      17
+# define PXA2XX_TX_RQ_ICP      18
+# define PXA2XX_RX_RQ_STUART   19
+# define PXA2XX_TX_RQ_STUART   20
+# define PXA2XX_RX_RQ_MMCI     21
+# define PXA2XX_TX_RQ_MMCI     22
+# define PXA2XX_USB_RQ(x)      ((x) + 24)
+# define PXA2XX_RX_RQ_SSP3     66
+# define PXA2XX_TX_RQ_SSP3     67
+
+# define PXA2XX_SDRAM_BASE     0xa0000000
+# define PXA2XX_INTERNAL_BASE  0x5c000000
+# define PXA2XX_INTERNAL_SIZE  0x40000
+
+/* pxa2xx_pic.c */
+DeviceState *pxa2xx_pic_init(hwaddr base, ARMCPU *cpu);
+
+/* pxa2xx_gpio.c */
+DeviceState *pxa2xx_gpio_init(hwaddr base,
+                              ARMCPU *cpu, DeviceState *pic, int lines);
+void pxa2xx_gpio_read_notifier(DeviceState *dev, qemu_irq handler);
+
+/* pxa2xx_dma.c */
+DeviceState *pxa255_dma_init(hwaddr base, qemu_irq irq);
+DeviceState *pxa27x_dma_init(hwaddr base, qemu_irq irq);
+
+/* pxa2xx_lcd.c */
+typedef struct PXA2xxLCDState PXA2xxLCDState;
+PXA2xxLCDState *pxa2xx_lcdc_init(MemoryRegion *sysmem,
+                hwaddr base, qemu_irq irq);
+void pxa2xx_lcd_vsync_notifier(PXA2xxLCDState *s, qemu_irq handler);
+
+/* pxa2xx_mmci.c */
+#define TYPE_PXA2XX_MMCI "pxa2xx-mmci"
+OBJECT_DECLARE_SIMPLE_TYPE(PXA2xxMMCIState, PXA2XX_MMCI)
+
+PXA2xxMMCIState *pxa2xx_mmci_init(MemoryRegion *sysmem,
+                hwaddr base,
+                qemu_irq irq, qemu_irq rx_dma, qemu_irq tx_dma);
+void pxa2xx_mmci_handlers(PXA2xxMMCIState *s, qemu_irq readonly,
+                qemu_irq coverswitch);
+
+/* pxa2xx_pcmcia.c */
+#define TYPE_PXA2XX_PCMCIA "pxa2xx-pcmcia"
+OBJECT_DECLARE_SIMPLE_TYPE(PXA2xxPCMCIAState, PXA2XX_PCMCIA)
+
+int pxa2xx_pcmcia_attach(void *opaque, PCMCIACardState *card);
+int pxa2xx_pcmcia_detach(void *opaque);
+void pxa2xx_pcmcia_set_irq_cb(void *opaque, qemu_irq irq, qemu_irq cd_irq);
+
+/* pxa2xx_keypad.c */
+struct  keymap {
+    int8_t column;
+    int8_t row;
+};
+typedef struct PXA2xxKeyPadState PXA2xxKeyPadState;
+PXA2xxKeyPadState *pxa27x_keypad_init(MemoryRegion *sysmem,
+                                      hwaddr base,
+                                      qemu_irq irq);
+void pxa27x_register_keypad(PXA2xxKeyPadState *kp,
+                            const struct keymap *map, int size);
+
+/* pxa2xx.c */
+#define TYPE_PXA2XX_I2C "pxa2xx_i2c"
+OBJECT_DECLARE_SIMPLE_TYPE(PXA2xxI2CState, PXA2XX_I2C)
+
+PXA2xxI2CState *pxa2xx_i2c_init(hwaddr base,
+                qemu_irq irq, uint32_t page_size);
+I2CBus *pxa2xx_i2c_bus(PXA2xxI2CState *s);
+
+typedef struct PXA2xxI2SState PXA2xxI2SState;
+
+#define TYPE_PXA2XX_FIR "pxa2xx-fir"
+OBJECT_DECLARE_SIMPLE_TYPE(PXA2xxFIrState, PXA2XX_FIR)
+
+typedef struct {
+    ARMCPU *cpu;
+    DeviceState *pic;
+    qemu_irq reset;
+    MemoryRegion sdram;
+    MemoryRegion internal;
+    MemoryRegion cm_iomem;
+    MemoryRegion mm_iomem;
+    MemoryRegion pm_iomem;
+    DeviceState *dma;
+    DeviceState *gpio;
+    PXA2xxLCDState *lcd;
+    SSIBus **ssp;
+    PXA2xxI2CState *i2c[2];
+    PXA2xxMMCIState *mmc;
+    PXA2xxPCMCIAState *pcmcia[2];
+    PXA2xxI2SState *i2s;
+    PXA2xxFIrState *fir;
+    PXA2xxKeyPadState *kp;
+
+    /* Power management */
+    hwaddr pm_base;
+    uint32_t pm_regs[0x40];
+
+    /* Clock management */
+    hwaddr cm_base;
+    uint32_t cm_regs[4];
+    uint32_t clkcfg;
+
+    /* Memory management */
+    hwaddr mm_base;
+    uint32_t mm_regs[0x1a];
+
+    /* Performance monitoring */
+    uint32_t pmnc;
+} PXA2xxState;
+
+struct PXA2xxI2SState {
+    MemoryRegion iomem;
+    qemu_irq irq;
+    qemu_irq rx_dma;
+    qemu_irq tx_dma;
+    void (*data_req)(void *, int, int);
+
+    uint32_t control[2];
+    uint32_t status;
+    uint32_t mask;
+    uint32_t clk;
+
+    int enable;
+    int rx_len;
+    int tx_len;
+    void (*codec_out)(void *, uint32_t);
+    uint32_t (*codec_in)(void *);
+    void *opaque;
+
+    int fifo_len;
+    uint32_t fifo[16];
+};
+
+# define PA_FMT                        "0x%08lx"
+
+PXA2xxState *pxa270_init(unsigned int sdram_size, const char *revision);
+PXA2xxState *pxa255_init(unsigned int sdram_size);
+
+#endif /* PXA_H */
diff --git a/include/hw/arm/raspberrypi-fw-defs.h b/include/hw/arm/raspberrypi-fw-defs.h
new file mode 100644 (file)
index 0000000..4551fe7
--- /dev/null
@@ -0,0 +1,163 @@
+/*
+ * Raspberry Pi firmware definitions
+ *
+ * Copyright (C) 2022  Auriga LLC, based on Linux kernel
+ *   `include/soc/bcm2835/raspberrypi-firmware.h` (Copyright © 2015 Broadcom)
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef INCLUDE_HW_MISC_RASPBERRYPI_FW_DEFS_H_
+#define INCLUDE_HW_MISC_RASPBERRYPI_FW_DEFS_H_
+
+#include "qemu/osdep.h"
+
+enum rpi_firmware_property_tag {
+    RPI_FWREQ_PROPERTY_END =                           0,
+    RPI_FWREQ_GET_FIRMWARE_REVISION =                  0x00000001,
+    RPI_FWREQ_GET_FIRMWARE_VARIANT =                   0x00000002,
+    RPI_FWREQ_GET_FIRMWARE_HASH =                      0x00000003,
+
+    RPI_FWREQ_SET_CURSOR_INFO =                        0x00008010,
+    RPI_FWREQ_SET_CURSOR_STATE =                       0x00008011,
+
+    RPI_FWREQ_GET_BOARD_MODEL =                        0x00010001,
+    RPI_FWREQ_GET_BOARD_REVISION =                     0x00010002,
+    RPI_FWREQ_GET_BOARD_MAC_ADDRESS =                  0x00010003,
+    RPI_FWREQ_GET_BOARD_SERIAL =                       0x00010004,
+    RPI_FWREQ_GET_ARM_MEMORY =                         0x00010005,
+    RPI_FWREQ_GET_VC_MEMORY =                          0x00010006,
+    RPI_FWREQ_GET_CLOCKS =                             0x00010007,
+    RPI_FWREQ_GET_POWER_STATE =                        0x00020001,
+    RPI_FWREQ_GET_TIMING =                             0x00020002,
+    RPI_FWREQ_SET_POWER_STATE =                        0x00028001,
+    RPI_FWREQ_GET_CLOCK_STATE =                        0x00030001,
+    RPI_FWREQ_GET_CLOCK_RATE =                         0x00030002,
+    RPI_FWREQ_GET_VOLTAGE =                            0x00030003,
+    RPI_FWREQ_GET_MAX_CLOCK_RATE =                     0x00030004,
+    RPI_FWREQ_GET_MAX_VOLTAGE =                        0x00030005,
+    RPI_FWREQ_GET_TEMPERATURE =                        0x00030006,
+    RPI_FWREQ_GET_MIN_CLOCK_RATE =                     0x00030007,
+    RPI_FWREQ_GET_MIN_VOLTAGE =                        0x00030008,
+    RPI_FWREQ_GET_TURBO =                              0x00030009,
+    RPI_FWREQ_GET_MAX_TEMPERATURE =                    0x0003000a,
+    RPI_FWREQ_GET_STC =                                0x0003000b,
+    RPI_FWREQ_ALLOCATE_MEMORY =                        0x0003000c,
+    RPI_FWREQ_LOCK_MEMORY =                            0x0003000d,
+    RPI_FWREQ_UNLOCK_MEMORY =                          0x0003000e,
+    RPI_FWREQ_RELEASE_MEMORY =                         0x0003000f,
+    RPI_FWREQ_EXECUTE_CODE =                           0x00030010,
+    RPI_FWREQ_EXECUTE_QPU =                            0x00030011,
+    RPI_FWREQ_SET_ENABLE_QPU =                         0x00030012,
+    RPI_FWREQ_GET_DISPMANX_RESOURCE_MEM_HANDLE =       0x00030014,
+    RPI_FWREQ_GET_EDID_BLOCK =                         0x00030020,
+    RPI_FWREQ_GET_CUSTOMER_OTP =                       0x00030021,
+    RPI_FWREQ_GET_EDID_BLOCK_DISPLAY =                 0x00030023,
+    RPI_FWREQ_GET_DOMAIN_STATE =                       0x00030030,
+    RPI_FWREQ_GET_THROTTLED =                          0x00030046,
+    RPI_FWREQ_GET_CLOCK_MEASURED =                     0x00030047,
+    RPI_FWREQ_NOTIFY_REBOOT =                          0x00030048,
+    RPI_FWREQ_SET_CLOCK_STATE =                        0x00038001,
+    RPI_FWREQ_SET_CLOCK_RATE =                         0x00038002,
+    RPI_FWREQ_SET_VOLTAGE =                            0x00038003,
+    RPI_FWREQ_SET_MAX_CLOCK_RATE =                     0x00038004,
+    RPI_FWREQ_SET_MIN_CLOCK_RATE =                     0x00038007,
+    RPI_FWREQ_SET_TURBO =                              0x00038009,
+    RPI_FWREQ_SET_CUSTOMER_OTP =                       0x00038021,
+    RPI_FWREQ_SET_DOMAIN_STATE =                       0x00038030,
+    RPI_FWREQ_GET_GPIO_STATE =                         0x00030041,
+    RPI_FWREQ_SET_GPIO_STATE =                         0x00038041,
+    RPI_FWREQ_SET_SDHOST_CLOCK =                       0x00038042,
+    RPI_FWREQ_GET_GPIO_CONFIG =                        0x00030043,
+    RPI_FWREQ_SET_GPIO_CONFIG =                        0x00038043,
+    RPI_FWREQ_GET_PERIPH_REG =                         0x00030045,
+    RPI_FWREQ_SET_PERIPH_REG =                         0x00038045,
+    RPI_FWREQ_GET_POE_HAT_VAL =                        0x00030049,
+    RPI_FWREQ_SET_POE_HAT_VAL =                        0x00038049,
+    RPI_FWREQ_SET_POE_HAT_VAL_OLD =                    0x00030050,
+    RPI_FWREQ_NOTIFY_XHCI_RESET =                      0x00030058,
+    RPI_FWREQ_GET_REBOOT_FLAGS =                       0x00030064,
+    RPI_FWREQ_SET_REBOOT_FLAGS =                       0x00038064,
+    RPI_FWREQ_NOTIFY_DISPLAY_DONE =                    0x00030066,
+
+    /* Dispmanx TAGS */
+    RPI_FWREQ_FRAMEBUFFER_ALLOCATE =                   0x00040001,
+    RPI_FWREQ_FRAMEBUFFER_BLANK =                      0x00040002,
+    RPI_FWREQ_FRAMEBUFFER_GET_PHYSICAL_WIDTH_HEIGHT =  0x00040003,
+    RPI_FWREQ_FRAMEBUFFER_GET_VIRTUAL_WIDTH_HEIGHT =   0x00040004,
+    RPI_FWREQ_FRAMEBUFFER_GET_DEPTH =                  0x00040005,
+    RPI_FWREQ_FRAMEBUFFER_GET_PIXEL_ORDER =            0x00040006,
+    RPI_FWREQ_FRAMEBUFFER_GET_ALPHA_MODE =             0x00040007,
+    RPI_FWREQ_FRAMEBUFFER_GET_PITCH =                  0x00040008,
+    RPI_FWREQ_FRAMEBUFFER_GET_VIRTUAL_OFFSET =         0x00040009,
+    RPI_FWREQ_FRAMEBUFFER_GET_OVERSCAN =               0x0004000a,
+    RPI_FWREQ_FRAMEBUFFER_GET_PALETTE =                0x0004000b,
+    RPI_FWREQ_FRAMEBUFFER_GET_LAYER =                  0x0004000c,
+    RPI_FWREQ_FRAMEBUFFER_GET_TRANSFORM =              0x0004000d,
+    RPI_FWREQ_FRAMEBUFFER_GET_VSYNC =                  0x0004000e,
+    RPI_FWREQ_FRAMEBUFFER_GET_TOUCHBUF =               0x0004000f,
+    RPI_FWREQ_FRAMEBUFFER_GET_GPIOVIRTBUF =            0x00040010,
+    RPI_FWREQ_FRAMEBUFFER_RELEASE =                    0x00048001,
+    RPI_FWREQ_FRAMEBUFFER_GET_DISPLAY_ID =             0x00040016,
+    RPI_FWREQ_FRAMEBUFFER_SET_DISPLAY_NUM =            0x00048013,
+    RPI_FWREQ_FRAMEBUFFER_GET_NUM_DISPLAYS =           0x00040013,
+    RPI_FWREQ_FRAMEBUFFER_GET_DISPLAY_SETTINGS =       0x00040014,
+    RPI_FWREQ_FRAMEBUFFER_TEST_PHYSICAL_WIDTH_HEIGHT = 0x00044003,
+    RPI_FWREQ_FRAMEBUFFER_TEST_VIRTUAL_WIDTH_HEIGHT =  0x00044004,
+    RPI_FWREQ_FRAMEBUFFER_TEST_DEPTH =                 0x00044005,
+    RPI_FWREQ_FRAMEBUFFER_TEST_PIXEL_ORDER =           0x00044006,
+    RPI_FWREQ_FRAMEBUFFER_TEST_ALPHA_MODE =            0x00044007,
+    RPI_FWREQ_FRAMEBUFFER_TEST_VIRTUAL_OFFSET =        0x00044009,
+    RPI_FWREQ_FRAMEBUFFER_TEST_OVERSCAN =              0x0004400a,
+    RPI_FWREQ_FRAMEBUFFER_TEST_PALETTE =               0x0004400b,
+    RPI_FWREQ_FRAMEBUFFER_TEST_LAYER =                 0x0004400c,
+    RPI_FWREQ_FRAMEBUFFER_TEST_TRANSFORM =             0x0004400d,
+    RPI_FWREQ_FRAMEBUFFER_TEST_VSYNC =                 0x0004400e,
+    RPI_FWREQ_FRAMEBUFFER_SET_PHYSICAL_WIDTH_HEIGHT =  0x00048003,
+    RPI_FWREQ_FRAMEBUFFER_SET_VIRTUAL_WIDTH_HEIGHT =   0x00048004,
+    RPI_FWREQ_FRAMEBUFFER_SET_DEPTH =                  0x00048005,
+    RPI_FWREQ_FRAMEBUFFER_SET_PIXEL_ORDER =            0x00048006,
+    RPI_FWREQ_FRAMEBUFFER_SET_ALPHA_MODE =             0x00048007,
+    RPI_FWREQ_FRAMEBUFFER_SET_PITCH =                  0x00048008,
+    RPI_FWREQ_FRAMEBUFFER_SET_VIRTUAL_OFFSET =         0x00048009,
+    RPI_FWREQ_FRAMEBUFFER_SET_OVERSCAN =               0x0004800a,
+    RPI_FWREQ_FRAMEBUFFER_SET_PALETTE =                0x0004800b,
+
+    RPI_FWREQ_FRAMEBUFFER_SET_TOUCHBUF =               0x0004801f,
+    RPI_FWREQ_FRAMEBUFFER_SET_GPIOVIRTBUF =            0x00048020,
+    RPI_FWREQ_FRAMEBUFFER_SET_VSYNC =                  0x0004800e,
+    RPI_FWREQ_FRAMEBUFFER_SET_LAYER =                  0x0004800c,
+    RPI_FWREQ_FRAMEBUFFER_SET_TRANSFORM =              0x0004800d,
+    RPI_FWREQ_FRAMEBUFFER_SET_BACKLIGHT =              0x0004800f,
+
+    RPI_FWREQ_VCHIQ_INIT =                             0x00048010,
+
+    RPI_FWREQ_SET_PLANE =                              0x00048015,
+    RPI_FWREQ_GET_DISPLAY_TIMING =                     0x00040017,
+    RPI_FWREQ_SET_TIMING =                             0x00048017,
+    RPI_FWREQ_GET_DISPLAY_CFG =                        0x00040018,
+    RPI_FWREQ_SET_DISPLAY_POWER =                      0x00048019,
+    RPI_FWREQ_GET_COMMAND_LINE =                       0x00050001,
+    RPI_FWREQ_GET_DMA_CHANNELS =                       0x00060001,
+};
+
+enum rpi_firmware_clk_id {
+    RPI_FIRMWARE_EMMC_CLK_ID = 1,
+    RPI_FIRMWARE_UART_CLK_ID,
+    RPI_FIRMWARE_ARM_CLK_ID,
+    RPI_FIRMWARE_CORE_CLK_ID,
+    RPI_FIRMWARE_V3D_CLK_ID,
+    RPI_FIRMWARE_H264_CLK_ID,
+    RPI_FIRMWARE_ISP_CLK_ID,
+    RPI_FIRMWARE_SDRAM_CLK_ID,
+    RPI_FIRMWARE_PIXEL_CLK_ID,
+    RPI_FIRMWARE_PWM_CLK_ID,
+    RPI_FIRMWARE_HEVC_CLK_ID,
+    RPI_FIRMWARE_EMMC2_CLK_ID,
+    RPI_FIRMWARE_M2MC_CLK_ID,
+    RPI_FIRMWARE_PIXEL_BVB_CLK_ID,
+    RPI_FIRMWARE_VEC_CLK_ID,
+    RPI_FIRMWARE_NUM_CLK_ID,
+};
+
+#endif /* INCLUDE_HW_MISC_RASPBERRYPI_FW_DEFS_H_ */
diff --git a/include/hw/arm/raspi_platform.h b/include/hw/arm/raspi_platform.h
new file mode 100644 (file)
index 0000000..ede98e6
--- /dev/null
@@ -0,0 +1,183 @@
+/*
+ * bcm2708 aka bcm2835/2836 aka Raspberry Pi/Pi2 SoC platform defines
+ *
+ * These definitions are derived from those in Raspbian Linux at
+ * arch/arm/mach-{bcm2708,bcm2709}/include/mach/platform.h
+ * where they carry the following notice:
+ *
+ * Copyright (C) 2010 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Various undocumented addresses and names come from Herman Hermitage's VC4
+ * documentation:
+ * https://github.com/hermanhermitage/videocoreiv/wiki/MMIO-Register-map
+ */
+
+#ifndef HW_ARM_RASPI_PLATFORM_H
+#define HW_ARM_RASPI_PLATFORM_H
+
+#define MSYNC_OFFSET            0x0000   /* Multicore Sync Block */
+#define CCPT_OFFSET             0x1000   /* Compact Camera Port 2 TX */
+#define INTE_OFFSET             0x2000   /* VC Interrupt controller */
+#define ST_OFFSET               0x3000   /* System Timer */
+#define TXP_OFFSET              0x4000   /* Transposer */
+#define JPEG_OFFSET             0x5000
+#define MPHI_OFFSET             0x6000   /* Message-based Parallel Host Intf. */
+#define DMA_OFFSET              0x7000   /* DMA controller, channels 0-14 */
+#define ARBA_OFFSET             0x9000
+#define BRDG_OFFSET             0xa000
+#define ARM_OFFSET              0xB000   /* ARM control block */
+#define ARMCTRL_OFFSET          (ARM_OFFSET + 0x000)
+#define ARMCTRL_IC_OFFSET       (ARM_OFFSET + 0x200) /* Interrupt controller */
+#define ARMCTRL_TIMER0_1_OFFSET (ARM_OFFSET + 0x400) /* Timer 0 and 1 (SP804) */
+#define ARMCTRL_0_SBM_OFFSET    (ARM_OFFSET + 0x800) /* User 0 (ARM) Semaphores
+                                                      * Doorbells & Mailboxes */
+#define PM_OFFSET               0x100000 /* Power Management */
+#define CPRMAN_OFFSET           0x101000 /* Clock Management */
+#define AVS_OFFSET              0x103000 /* Audio Video Standard */
+#define RNG_OFFSET              0x104000
+#define GPIO_OFFSET             0x200000
+#define UART0_OFFSET            0x201000 /* PL011 */
+#define MMCI0_OFFSET            0x202000 /* Legacy MMC */
+#define I2S_OFFSET              0x203000 /* PCM */
+#define SPI0_OFFSET             0x204000 /* SPI master */
+#define BSC0_OFFSET             0x205000 /* BSC0 I2C/TWI */
+#define PIXV0_OFFSET            0x206000
+#define PIXV1_OFFSET            0x207000
+#define DPI_OFFSET              0x208000
+#define DSI0_OFFSET             0x209000 /* Display Serial Interface */
+#define PWM_OFFSET              0x20c000
+#define PERM_OFFSET             0x20d000
+#define TEC_OFFSET              0x20e000
+#define OTP_OFFSET              0x20f000
+#define SLIM_OFFSET             0x210000 /* SLIMbus */
+#define CPG_OFFSET              0x211000
+#define THERMAL_OFFSET          0x212000
+#define AVSP_OFFSET             0x213000
+#define BSC_SL_OFFSET           0x214000 /* SPI slave (bootrom) */
+#define AUX_OFFSET              0x215000 /* AUX: UART1/SPI1/SPI2 */
+#define EMMC1_OFFSET            0x300000
+#define EMMC2_OFFSET            0x340000
+#define HVS_OFFSET              0x400000
+#define SMI_OFFSET              0x600000
+#define DSI1_OFFSET             0x700000
+#define UCAM_OFFSET             0x800000
+#define CMI_OFFSET              0x802000
+#define BSC1_OFFSET             0x804000 /* BSC1 I2C/TWI */
+#define BSC2_OFFSET             0x805000 /* BSC2 I2C/TWI */
+#define VECA_OFFSET             0x806000
+#define PIXV2_OFFSET            0x807000
+#define HDMI_OFFSET             0x808000
+#define HDCP_OFFSET             0x809000
+#define ARBR0_OFFSET            0x80a000
+#define DBUS_OFFSET             0x900000
+#define AVE0_OFFSET             0x910000
+#define USB_OTG_OFFSET          0x980000 /* DTC_OTG USB controller */
+#define V3D_OFFSET              0xc00000
+#define SDRAMC_OFFSET           0xe00000
+#define L2CC_OFFSET             0xe01000 /* Level 2 Cache controller */
+#define L1CC_OFFSET             0xe02000 /* Level 1 Cache controller */
+#define ARBR1_OFFSET            0xe04000
+#define DMA15_OFFSET            0xE05000 /* DMA controller, channel 15 */
+#define DCRC_OFFSET             0xe07000
+#define AXIP_OFFSET             0xe08000
+
+/* GPU interrupts */
+#define INTERRUPT_TIMER0               0
+#define INTERRUPT_TIMER1               1
+#define INTERRUPT_TIMER2               2
+#define INTERRUPT_TIMER3               3
+#define INTERRUPT_CODEC0               4
+#define INTERRUPT_CODEC1               5
+#define INTERRUPT_CODEC2               6
+#define INTERRUPT_JPEG                 7
+#define INTERRUPT_ISP                  8
+#define INTERRUPT_USB                  9
+#define INTERRUPT_3D                   10
+#define INTERRUPT_TRANSPOSER           11
+#define INTERRUPT_MULTICORESYNC0       12
+#define INTERRUPT_MULTICORESYNC1       13
+#define INTERRUPT_MULTICORESYNC2       14
+#define INTERRUPT_MULTICORESYNC3       15
+#define INTERRUPT_DMA0                 16
+#define INTERRUPT_DMA1                 17
+#define INTERRUPT_DMA2                 18
+#define INTERRUPT_DMA3                 19
+#define INTERRUPT_DMA4                 20
+#define INTERRUPT_DMA5                 21
+#define INTERRUPT_DMA6                 22
+#define INTERRUPT_DMA7                 23
+#define INTERRUPT_DMA8                 24
+#define INTERRUPT_DMA9                 25
+#define INTERRUPT_DMA10                26
+#define INTERRUPT_DMA11                27
+#define INTERRUPT_DMA12                28
+#define INTERRUPT_AUX                  29
+#define INTERRUPT_ARM                  30
+#define INTERRUPT_VPUDMA               31
+#define INTERRUPT_HOSTPORT             32
+#define INTERRUPT_VIDEOSCALER          33
+#define INTERRUPT_CCP2TX               34
+#define INTERRUPT_SDC                  35
+#define INTERRUPT_DSI0                 36
+#define INTERRUPT_AVE                  37
+#define INTERRUPT_CAM0                 38
+#define INTERRUPT_CAM1                 39
+#define INTERRUPT_HDMI0                40
+#define INTERRUPT_HDMI1                41
+#define INTERRUPT_PIXELVALVE1          42
+#define INTERRUPT_I2CSPISLV            43
+#define INTERRUPT_DSI1                 44
+#define INTERRUPT_PWA0                 45
+#define INTERRUPT_PWA1                 46
+#define INTERRUPT_CPR                  47
+#define INTERRUPT_SMI                  48
+#define INTERRUPT_GPIO0                49
+#define INTERRUPT_GPIO1                50
+#define INTERRUPT_GPIO2                51
+#define INTERRUPT_GPIO3                52
+#define INTERRUPT_I2C                  53
+#define INTERRUPT_SPI                  54
+#define INTERRUPT_I2SPCM               55
+#define INTERRUPT_SDIO                 56
+#define INTERRUPT_UART0                57
+#define INTERRUPT_SLIMBUS              58
+#define INTERRUPT_VEC                  59
+#define INTERRUPT_CPG                  60
+#define INTERRUPT_RNG                  61
+#define INTERRUPT_ARASANSDIO           62
+#define INTERRUPT_AVSPMON              63
+
+/* ARM CPU IRQs use a private number space */
+#define INTERRUPT_ARM_TIMER            0
+#define INTERRUPT_ARM_MAILBOX          1
+#define INTERRUPT_ARM_DOORBELL_0       2
+#define INTERRUPT_ARM_DOORBELL_1       3
+#define INTERRUPT_VPU0_HALTED          4
+#define INTERRUPT_VPU1_HALTED          5
+#define INTERRUPT_ILLEGAL_TYPE0        6
+#define INTERRUPT_ILLEGAL_TYPE1        7
+
+/* Clock rates */
+#define RPI_FIRMWARE_EMMC_CLK_RATE    50000000
+#define RPI_FIRMWARE_UART_CLK_RATE    3000000
+/*
+ * TODO: this is really SoC-specific; we might want to
+ * set it per-SoC if it turns out any guests care.
+ */
+#define RPI_FIRMWARE_CORE_CLK_RATE    350000000
+#define RPI_FIRMWARE_DEFAULT_CLK_RATE 700000000
+
+#endif
diff --git a/include/hw/arm/sharpsl.h b/include/hw/arm/sharpsl.h
new file mode 100644 (file)
index 0000000..e986b28
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * Common declarations for the Zaurii.
+ *
+ * This file is licensed under the GNU GPL.
+ */
+
+#ifndef QEMU_SHARPSL_H
+#define QEMU_SHARPSL_H
+
+#include "exec/hwaddr.h"
+
+/* zaurus.c */
+
+#define SL_PXA_PARAM_BASE      0xa0000a00
+void sl_bootparam_write(hwaddr ptr);
+
+#endif
diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
new file mode 100644 (file)
index 0000000..fd8d772
--- /dev/null
@@ -0,0 +1,203 @@
+/*
+ * ARM SMMU Support
+ *
+ * Copyright (C) 2015-2016 Broadcom Corporation
+ * Copyright (c) 2017 Red Hat, Inc.
+ * Written by Prem Mallappa, Eric Auger
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef HW_ARM_SMMU_COMMON_H
+#define HW_ARM_SMMU_COMMON_H
+
+#include "hw/sysbus.h"
+#include "hw/pci/pci.h"
+#include "qom/object.h"
+
+#define SMMU_PCI_BUS_MAX                    256
+#define SMMU_PCI_DEVFN_MAX                  256
+#define SMMU_PCI_DEVFN(sid)                 (sid & 0xFF)
+
+/* VMSAv8-64 Translation constants and functions */
+#define VMSA_LEVELS                         4
+#define VMSA_MAX_S2_CONCAT                  16
+
+#define VMSA_STRIDE(gran)                   ((gran) - VMSA_LEVELS + 1)
+#define VMSA_BIT_LVL(isz, strd, lvl)        ((isz) - (strd) * \
+                                             (VMSA_LEVELS - (lvl)))
+#define VMSA_IDXMSK(isz, strd, lvl)         ((1ULL << \
+                                             VMSA_BIT_LVL(isz, strd, lvl)) - 1)
+
+/*
+ * Page table walk error types
+ */
+typedef enum {
+    SMMU_PTW_ERR_NONE,
+    SMMU_PTW_ERR_WALK_EABT,   /* Translation walk external abort */
+    SMMU_PTW_ERR_TRANSLATION, /* Translation fault */
+    SMMU_PTW_ERR_ADDR_SIZE,   /* Address Size fault */
+    SMMU_PTW_ERR_ACCESS,      /* Access fault */
+    SMMU_PTW_ERR_PERMISSION,  /* Permission fault */
+} SMMUPTWEventType;
+
+typedef struct SMMUPTWEventInfo {
+    int stage;
+    SMMUPTWEventType type;
+    dma_addr_t addr; /* fetched address that induced an abort, if any */
+} SMMUPTWEventInfo;
+
+typedef struct SMMUTransTableInfo {
+    bool disabled;             /* is the translation table disabled? */
+    uint64_t ttb;              /* TT base address */
+    uint8_t tsz;               /* input range, ie. 2^(64 -tsz)*/
+    uint8_t granule_sz;        /* granule page shift */
+    bool had;                  /* hierarchical attribute disable */
+} SMMUTransTableInfo;
+
+typedef struct SMMUTLBEntry {
+    IOMMUTLBEntry entry;
+    uint8_t level;
+    uint8_t granule;
+} SMMUTLBEntry;
+
+/* Stage-2 configuration. */
+typedef struct SMMUS2Cfg {
+    uint8_t tsz;            /* Size of IPA input region (S2T0SZ) */
+    uint8_t sl0;            /* Start level of translation (S2SL0) */
+    bool affd;              /* AF Fault Disable (S2AFFD) */
+    bool record_faults;     /* Record fault events (S2R) */
+    uint8_t granule_sz;     /* Granule page shift (based on S2TG) */
+    uint8_t eff_ps;         /* Effective PA output range (based on S2PS) */
+    uint16_t vmid;          /* Virtual Machine ID (S2VMID) */
+    uint64_t vttb;          /* Address of translation table base (S2TTB) */
+} SMMUS2Cfg;
+
+/*
+ * Generic structure populated by derived SMMU devices
+ * after decoding the configuration information and used as
+ * input to the page table walk
+ */
+typedef struct SMMUTransCfg {
+    /* Shared fields between stage-1 and stage-2. */
+    int stage;                 /* translation stage */
+    bool disabled;             /* smmu is disabled */
+    bool bypassed;             /* translation is bypassed */
+    bool aborted;              /* translation is aborted */
+    uint32_t iotlb_hits;       /* counts IOTLB hits */
+    uint32_t iotlb_misses;     /* counts IOTLB misses*/
+    /* Used by stage-1 only. */
+    bool aa64;                 /* arch64 or aarch32 translation table */
+    bool record_faults;        /* record fault events */
+    uint64_t ttb;              /* TT base address */
+    uint8_t oas;               /* output address width */
+    uint8_t tbi;               /* Top Byte Ignore */
+    uint16_t asid;
+    SMMUTransTableInfo tt[2];
+    /* Used by stage-2 only. */
+    struct SMMUS2Cfg s2cfg;
+} SMMUTransCfg;
+
+typedef struct SMMUDevice {
+    void               *smmu;
+    PCIBus             *bus;
+    int                devfn;
+    IOMMUMemoryRegion  iommu;
+    AddressSpace       as;
+    uint32_t           cfg_cache_hits;
+    uint32_t           cfg_cache_misses;
+    QLIST_ENTRY(SMMUDevice) next;
+} SMMUDevice;
+
+typedef struct SMMUPciBus {
+    PCIBus       *bus;
+    SMMUDevice   *pbdev[]; /* Parent array is sparse, so dynamically alloc */
+} SMMUPciBus;
+
+typedef struct SMMUIOTLBKey {
+    uint64_t iova;
+    uint16_t asid;
+    uint16_t vmid;
+    uint8_t tg;
+    uint8_t level;
+} SMMUIOTLBKey;
+
+struct SMMUState {
+    /* <private> */
+    SysBusDevice  dev;
+    const char *mrtypename;
+    MemoryRegion iomem;
+
+    GHashTable *smmu_pcibus_by_busptr;
+    GHashTable *configs; /* cache for configuration data */
+    GHashTable *iotlb;
+    SMMUPciBus *smmu_pcibus_by_bus_num[SMMU_PCI_BUS_MAX];
+    PCIBus *pci_bus;
+    QLIST_HEAD(, SMMUDevice) devices_with_notifiers;
+    uint8_t bus_num;
+    PCIBus *primary_bus;
+};
+
+struct SMMUBaseClass {
+    /* <private> */
+    SysBusDeviceClass parent_class;
+
+    /*< public >*/
+
+    DeviceRealize parent_realize;
+
+};
+
+#define TYPE_ARM_SMMU "arm-smmu"
+OBJECT_DECLARE_TYPE(SMMUState, SMMUBaseClass, ARM_SMMU)
+
+/* Return the SMMUPciBus handle associated to a PCI bus number */
+SMMUPciBus *smmu_find_smmu_pcibus(SMMUState *s, uint8_t bus_num);
+
+/* Return the stream ID of an SMMU device */
+static inline uint16_t smmu_get_sid(SMMUDevice *sdev)
+{
+    return PCI_BUILD_BDF(pci_bus_num(sdev->bus), sdev->devfn);
+}
+
+/**
+ * smmu_ptw - Perform the page table walk for a given iova / access flags
+ * pair, according to @cfg translation config
+ */
+int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
+             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info);
+
+/**
+ * select_tt - compute which translation table shall be used according to
+ * the input iova and translation config and return the TT specific info
+ */
+SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova);
+
+/* Return the iommu mr associated to @sid, or NULL if none */
+IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid);
+
+#define SMMU_IOTLB_MAX_SIZE 256
+
+SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
+                                SMMUTransTableInfo *tt, hwaddr iova);
+void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry);
+SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t iova,
+                                uint8_t tg, uint8_t level);
+void smmu_iotlb_inv_all(SMMUState *s);
+void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid);
+void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid);
+void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova,
+                         uint8_t tg, uint64_t num_pages, uint8_t ttl);
+
+/* Unmap the range of all the notifiers registered to any IOMMU mr */
+void smmu_inv_notifiers_all(SMMUState *s);
+
+#endif /* HW_ARM_SMMU_COMMON_H */
diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
new file mode 100644 (file)
index 0000000..d183a62
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2014-2016 Broadcom Corporation
+ * Copyright (c) 2017 Red Hat, Inc.
+ * Written by Prem Mallappa, Eric Auger
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_ARM_SMMUV3_H
+#define HW_ARM_SMMUV3_H
+
+#include "hw/arm/smmu-common.h"
+#include "qom/object.h"
+
+#define TYPE_SMMUV3_IOMMU_MEMORY_REGION "smmuv3-iommu-memory-region"
+
+typedef struct SMMUQueue {
+     uint64_t base; /* base register */
+     uint32_t prod;
+     uint32_t cons;
+     uint8_t entry_size;
+     uint8_t log2size;
+} SMMUQueue;
+
+struct SMMUv3State {
+    SMMUState     smmu_state;
+
+    uint32_t features;
+    uint8_t sid_size;
+    uint8_t sid_split;
+
+    uint32_t idr[6];
+    uint32_t iidr;
+    uint32_t aidr;
+    uint32_t cr[3];
+    uint32_t cr0ack;
+    uint32_t statusr;
+    uint32_t gbpa;
+    uint32_t irq_ctrl;
+    uint32_t gerror;
+    uint32_t gerrorn;
+    uint64_t gerror_irq_cfg0;
+    uint32_t gerror_irq_cfg1;
+    uint32_t gerror_irq_cfg2;
+    uint64_t strtab_base;
+    uint32_t strtab_base_cfg;
+    uint64_t eventq_irq_cfg0;
+    uint32_t eventq_irq_cfg1;
+    uint32_t eventq_irq_cfg2;
+
+    SMMUQueue eventq, cmdq;
+
+    qemu_irq     irq[4];
+    QemuMutex mutex;
+    char *stage;
+};
+
+typedef enum {
+    SMMU_IRQ_EVTQ,
+    SMMU_IRQ_PRIQ,
+    SMMU_IRQ_CMD_SYNC,
+    SMMU_IRQ_GERROR,
+} SMMUIrq;
+
+struct SMMUv3Class {
+    /*< private >*/
+    SMMUBaseClass smmu_base_class;
+    /*< public >*/
+
+    DeviceRealize parent_realize;
+    ResettablePhases parent_phases;
+};
+
+#define TYPE_ARM_SMMUV3   "arm-smmuv3"
+OBJECT_DECLARE_TYPE(SMMUv3State, SMMUv3Class, ARM_SMMUV3)
+
+#define STAGE1_SUPPORTED(s)      FIELD_EX32(s->idr[0], IDR0, S1P)
+#define STAGE2_SUPPORTED(s)      FIELD_EX32(s->idr[0], IDR0, S2P)
+
+#endif
diff --git a/include/hw/arm/soc_dma.h b/include/hw/arm/soc_dma.h
new file mode 100644 (file)
index 0000000..e93a749
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * On-chip DMA controller framework.
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Written by Andrzej Zaborowski <andrew@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SOC_DMA_H
+#define HW_SOC_DMA_H
+
+#include "exec/hwaddr.h"
+
+struct soc_dma_s;
+struct soc_dma_ch_s;
+typedef void (*soc_dma_io_t)(void *opaque, uint8_t *buf, int len);
+typedef void (*soc_dma_transfer_t)(struct soc_dma_ch_s *ch);
+
+enum soc_dma_port_type {
+    soc_dma_port_mem,
+    soc_dma_port_fifo,
+    soc_dma_port_other,
+};
+
+enum soc_dma_access_type {
+    soc_dma_access_const,
+    soc_dma_access_linear,
+    soc_dma_access_other,
+};
+
+struct soc_dma_ch_s {
+    /* Private */
+    struct soc_dma_s *dma;
+    int num;
+    QEMUTimer *timer;
+
+    /* Set by soc_dma.c */
+    int enable;
+    int update;
+
+    /* This should be set by dma->setup_fn().  */
+    int bytes;
+    /* Initialised by the DMA module, call soc_dma_ch_update after writing.  */
+    enum soc_dma_access_type type[2];
+    hwaddr vaddr[2];   /* Updated by .transfer_fn().  */
+    /* Private */
+    void *paddr[2];
+    soc_dma_io_t io_fn[2];
+    void *io_opaque[2];
+
+    int running;
+    soc_dma_transfer_t transfer_fn;
+
+    /* Set and used by the DMA module.  */
+    void *opaque;
+};
+
+struct soc_dma_s {
+    /* Following fields are set by the SoC DMA module and can be used
+     * by anybody.  */
+    uint64_t drqbmp;   /* Is zeroed by soc_dma_reset() */
+    qemu_irq *drq;
+    void *opaque;
+    int64_t freq;
+    soc_dma_transfer_t transfer_fn;
+    soc_dma_transfer_t setup_fn;
+    /* Set by soc_dma_init() for use by the DMA module.  */
+    struct soc_dma_ch_s *ch;
+};
+
+/* Call to activate or stop a DMA channel.  */
+void soc_dma_set_request(struct soc_dma_ch_s *ch, int level);
+/* Call after every write to one of the following fields and before
+ * calling soc_dma_set_request(ch, 1):
+ *   ch->type[0...1],
+ *   ch->vaddr[0...1],
+ *   ch->paddr[0...1],
+ * or after a soc_dma_port_add_fifo() or soc_dma_port_add_mem().  */
+void soc_dma_ch_update(struct soc_dma_ch_s *ch);
+
+/* The SoC should call this when the DMA module is being reset.  */
+void soc_dma_reset(struct soc_dma_s *s);
+struct soc_dma_s *soc_dma_init(int n);
+
+void soc_dma_port_add_fifo(struct soc_dma_s *dma, hwaddr virt_base,
+                soc_dma_io_t fn, void *opaque, int out);
+void soc_dma_port_add_mem(struct soc_dma_s *dma, uint8_t *phys_base,
+                hwaddr virt_base, size_t size);
+
+static inline void soc_dma_port_add_fifo_in(struct soc_dma_s *dma,
+                hwaddr virt_base, soc_dma_io_t fn, void *opaque)
+{
+    return soc_dma_port_add_fifo(dma, virt_base, fn, opaque, 0);
+}
+
+static inline void soc_dma_port_add_fifo_out(struct soc_dma_s *dma,
+                hwaddr virt_base, soc_dma_io_t fn, void *opaque)
+{
+    return soc_dma_port_add_fifo(dma, virt_base, fn, opaque, 1);
+}
+
+#endif
diff --git a/include/hw/arm/stm32f100_soc.h b/include/hw/arm/stm32f100_soc.h
new file mode 100644 (file)
index 0000000..a74d7b3
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * STM32F100 SoC
+ *
+ * Copyright (c) 2021 Alexandre Iooss <erdnaxe@crans.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_ARM_STM32F100_SOC_H
+#define HW_ARM_STM32F100_SOC_H
+
+#include "hw/char/stm32f2xx_usart.h"
+#include "hw/ssi/stm32f2xx_spi.h"
+#include "hw/arm/armv7m.h"
+#include "qom/object.h"
+#include "hw/clock.h"
+
+#define TYPE_STM32F100_SOC "stm32f100-soc"
+OBJECT_DECLARE_SIMPLE_TYPE(STM32F100State, STM32F100_SOC)
+
+#define STM_NUM_USARTS 3
+#define STM_NUM_SPIS 2
+
+#define FLASH_BASE_ADDRESS 0x08000000
+#define FLASH_SIZE (128 * 1024)
+#define SRAM_BASE_ADDRESS 0x20000000
+#define SRAM_SIZE (8 * 1024)
+
+struct STM32F100State {
+    SysBusDevice parent_obj;
+
+    ARMv7MState armv7m;
+
+    STM32F2XXUsartState usart[STM_NUM_USARTS];
+    STM32F2XXSPIState spi[STM_NUM_SPIS];
+
+    MemoryRegion sram;
+    MemoryRegion flash;
+    MemoryRegion flash_alias;
+
+    Clock *sysclk;
+    Clock *refclk;
+};
+
+#endif
diff --git a/include/hw/arm/stm32f205_soc.h b/include/hw/arm/stm32f205_soc.h
new file mode 100644 (file)
index 0000000..4f4c8bb
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * STM32F205 SoC
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_ARM_STM32F205_SOC_H
+#define HW_ARM_STM32F205_SOC_H
+
+#include "hw/misc/stm32f2xx_syscfg.h"
+#include "hw/timer/stm32f2xx_timer.h"
+#include "hw/char/stm32f2xx_usart.h"
+#include "hw/adc/stm32f2xx_adc.h"
+#include "hw/or-irq.h"
+#include "hw/ssi/stm32f2xx_spi.h"
+#include "hw/arm/armv7m.h"
+#include "hw/clock.h"
+#include "qom/object.h"
+
+#define TYPE_STM32F205_SOC "stm32f205-soc"
+OBJECT_DECLARE_SIMPLE_TYPE(STM32F205State, STM32F205_SOC)
+
+#define STM_NUM_USARTS 6
+#define STM_NUM_TIMERS 4
+#define STM_NUM_ADCS 3
+#define STM_NUM_SPIS 3
+
+#define FLASH_BASE_ADDRESS 0x08000000
+#define FLASH_SIZE (1024 * 1024)
+#define SRAM_BASE_ADDRESS 0x20000000
+#define SRAM_SIZE (128 * 1024)
+
+struct STM32F205State {
+    SysBusDevice parent_obj;
+
+    ARMv7MState armv7m;
+
+    STM32F2XXSyscfgState syscfg;
+    STM32F2XXUsartState usart[STM_NUM_USARTS];
+    STM32F2XXTimerState timer[STM_NUM_TIMERS];
+    STM32F2XXADCState adc[STM_NUM_ADCS];
+    STM32F2XXSPIState spi[STM_NUM_SPIS];
+
+    OrIRQState *adc_irqs;
+
+    MemoryRegion sram;
+    MemoryRegion flash;
+    MemoryRegion flash_alias;
+
+    Clock *sysclk;
+    Clock *refclk;
+};
+
+#endif
diff --git a/include/hw/arm/stm32f405_soc.h b/include/hw/arm/stm32f405_soc.h
new file mode 100644 (file)
index 0000000..d15c03c
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * STM32F405 SoC
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_ARM_STM32F405_SOC_H
+#define HW_ARM_STM32F405_SOC_H
+
+#include "hw/misc/stm32f4xx_syscfg.h"
+#include "hw/timer/stm32f2xx_timer.h"
+#include "hw/char/stm32f2xx_usart.h"
+#include "hw/adc/stm32f2xx_adc.h"
+#include "hw/misc/stm32f4xx_exti.h"
+#include "hw/or-irq.h"
+#include "hw/ssi/stm32f2xx_spi.h"
+#include "hw/arm/armv7m.h"
+#include "qom/object.h"
+
+#define TYPE_STM32F405_SOC "stm32f405-soc"
+OBJECT_DECLARE_SIMPLE_TYPE(STM32F405State, STM32F405_SOC)
+
+#define STM_NUM_USARTS 7
+#define STM_NUM_TIMERS 4
+#define STM_NUM_ADCS 6
+#define STM_NUM_SPIS 6
+
+#define FLASH_BASE_ADDRESS 0x08000000
+#define FLASH_SIZE (1024 * 1024)
+#define SRAM_BASE_ADDRESS 0x20000000
+#define SRAM_SIZE (128 * 1024)
+#define CCM_BASE_ADDRESS 0x10000000
+#define CCM_SIZE (64 * 1024)
+
+struct STM32F405State {
+    SysBusDevice parent_obj;
+
+    ARMv7MState armv7m;
+
+    STM32F4xxSyscfgState syscfg;
+    STM32F4xxExtiState exti;
+    STM32F2XXUsartState usart[STM_NUM_USARTS];
+    STM32F2XXTimerState timer[STM_NUM_TIMERS];
+    OrIRQState adc_irqs;
+    STM32F2XXADCState adc[STM_NUM_ADCS];
+    STM32F2XXSPIState spi[STM_NUM_SPIS];
+
+    MemoryRegion ccm;
+    MemoryRegion sram;
+    MemoryRegion flash;
+    MemoryRegion flash_alias;
+
+    Clock *sysclk;
+    Clock *refclk;
+};
+
+#endif
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
new file mode 100644 (file)
index 0000000..f692398
--- /dev/null
@@ -0,0 +1,210 @@
+/*
+ *
+ * Copyright (c) 2015 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Emulate a virtual board which works by passing Linux all the information
+ * it needs about what devices are present via the device tree.
+ * There are some restrictions about what we can do here:
+ *  + we can only present devices whose Linux drivers will work based
+ *    purely on the device tree with no platform data at all
+ *  + we want to present a very stripped-down minimalist platform,
+ *    both because this reduces the security attack surface from the guest
+ *    and also because it reduces our exposure to being broken when
+ *    the kernel updates its device tree bindings and requires further
+ *    information in a device binding that we aren't providing.
+ * This is essentially the same approach kvmtool uses.
+ */
+
+#ifndef QEMU_ARM_VIRT_H
+#define QEMU_ARM_VIRT_H
+
+#include "exec/hwaddr.h"
+#include "qemu/notify.h"
+#include "hw/boards.h"
+#include "hw/arm/boot.h"
+#include "hw/arm/bsa.h"
+#include "hw/block/flash.h"
+#include "sysemu/kvm.h"
+#include "hw/intc/arm_gicv3_common.h"
+#include "qom/object.h"
+
+#define NUM_GICV2M_SPIS       64
+#define NUM_VIRTIO_TRANSPORTS 32
+#define NUM_SMMU_IRQS          4
+
+/* See Linux kernel arch/arm64/include/asm/pvclock-abi.h */
+#define PVTIME_SIZE_PER_CPU 64
+
+enum {
+    VIRT_FLASH,
+    VIRT_MEM,
+    VIRT_CPUPERIPHS,
+    VIRT_GIC_DIST,
+    VIRT_GIC_CPU,
+    VIRT_GIC_V2M,
+    VIRT_GIC_HYP,
+    VIRT_GIC_VCPU,
+    VIRT_GIC_ITS,
+    VIRT_GIC_REDIST,
+    VIRT_SMMU,
+    VIRT_UART,
+    VIRT_MMIO,
+    VIRT_RTC,
+    VIRT_FW_CFG,
+    VIRT_PCIE,
+    VIRT_PCIE_MMIO,
+    VIRT_PCIE_PIO,
+    VIRT_PCIE_ECAM,
+    VIRT_PLATFORM_BUS,
+    VIRT_GPIO,
+    VIRT_SECURE_UART,
+    VIRT_SECURE_MEM,
+    VIRT_SECURE_GPIO,
+    VIRT_PCDIMM_ACPI,
+    VIRT_ACPI_GED,
+    VIRT_NVDIMM_ACPI,
+    VIRT_PVTIME,
+    VIRT_LOWMEMMAP_LAST,
+};
+
+/* indices of IO regions located after the RAM */
+enum {
+    VIRT_HIGH_GIC_REDIST2 =  VIRT_LOWMEMMAP_LAST,
+    VIRT_HIGH_PCIE_ECAM,
+    VIRT_HIGH_PCIE_MMIO,
+};
+
+typedef enum VirtIOMMUType {
+    VIRT_IOMMU_NONE,
+    VIRT_IOMMU_SMMUV3,
+    VIRT_IOMMU_VIRTIO,
+} VirtIOMMUType;
+
+typedef enum VirtMSIControllerType {
+    VIRT_MSI_CTRL_NONE,
+    VIRT_MSI_CTRL_GICV2M,
+    VIRT_MSI_CTRL_ITS,
+} VirtMSIControllerType;
+
+typedef enum VirtGICType {
+    VIRT_GIC_VERSION_MAX = 0,
+    VIRT_GIC_VERSION_HOST = 1,
+    /* The concrete GIC values have to match the GIC version number */
+    VIRT_GIC_VERSION_2 = 2,
+    VIRT_GIC_VERSION_3 = 3,
+    VIRT_GIC_VERSION_4 = 4,
+    VIRT_GIC_VERSION_NOSEL,
+} VirtGICType;
+
+#define VIRT_GIC_VERSION_2_MASK BIT(VIRT_GIC_VERSION_2)
+#define VIRT_GIC_VERSION_3_MASK BIT(VIRT_GIC_VERSION_3)
+#define VIRT_GIC_VERSION_4_MASK BIT(VIRT_GIC_VERSION_4)
+
+struct VirtMachineClass {
+    MachineClass parent;
+    bool disallow_affinity_adjustment;
+    bool no_its;
+    bool no_tcg_its;
+    bool no_pmu;
+    bool claim_edge_triggered_timers;
+    bool smbios_old_sys_ver;
+    bool no_highmem_compact;
+    bool no_highmem_ecam;
+    bool no_ged;   /* Machines < 4.2 have no support for ACPI GED device */
+    bool kvm_no_adjvtime;
+    bool no_kvm_steal_time;
+    bool acpi_expose_flash;
+    bool no_secure_gpio;
+    /* Machines < 6.2 have no support for describing cpu topology to guest */
+    bool no_cpu_topology;
+    bool no_tcg_lpa2;
+};
+
+struct VirtMachineState {
+    MachineState parent;
+    Notifier machine_done;
+    DeviceState *platform_bus_dev;
+    FWCfgState *fw_cfg;
+    PFlashCFI01 *flash[2];
+    bool secure;
+    bool highmem;
+    bool highmem_compact;
+    bool highmem_ecam;
+    bool highmem_mmio;
+    bool highmem_redists;
+    bool its;
+    bool tcg_its;
+    bool virt;
+    bool ras;
+    bool mte;
+    bool dtb_randomness;
+    OnOffAuto acpi;
+    VirtGICType gic_version;
+    VirtIOMMUType iommu;
+    bool default_bus_bypass_iommu;
+    VirtMSIControllerType msi_controller;
+    uint16_t virtio_iommu_bdf;
+    struct arm_boot_info bootinfo;
+    MemMapEntry *memmap;
+    char *pciehb_nodename;
+    const int *irqmap;
+    int fdt_size;
+    uint32_t clock_phandle;
+    uint32_t gic_phandle;
+    uint32_t msi_phandle;
+    uint32_t iommu_phandle;
+    int psci_conduit;
+    hwaddr highest_gpa;
+    DeviceState *gic;
+    DeviceState *acpi_dev;
+    Notifier powerdown_notifier;
+    PCIBus *bus;
+    char *oem_id;
+    char *oem_table_id;
+};
+
+#define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM)
+
+#define TYPE_VIRT_MACHINE   MACHINE_TYPE_NAME("virt")
+OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE)
+
+void virt_acpi_setup(VirtMachineState *vms);
+bool virt_is_acpi_enabled(VirtMachineState *vms);
+
+/* Return number of redistributors that fit in the specified region */
+static uint32_t virt_redist_capacity(VirtMachineState *vms, int region)
+{
+    uint32_t redist_size;
+
+    if (vms->gic_version == VIRT_GIC_VERSION_3) {
+        redist_size = GICV3_REDIST_SIZE;
+    } else {
+        redist_size = GICV4_REDIST_SIZE;
+    }
+    return vms->memmap[region].size / redist_size;
+}
+
+/* Return the number of used redistributor regions  */
+static inline int virt_gicv3_redist_region_count(VirtMachineState *vms)
+{
+    uint32_t redist0_capacity = virt_redist_capacity(vms, VIRT_GIC_REDIST);
+
+    assert(vms->gic_version != VIRT_GIC_VERSION_2);
+
+    return (MACHINE(vms)->smp.cpus > redist0_capacity &&
+            vms->highmem_redists) ? 2 : 1;
+}
+
+#endif /* QEMU_ARM_VIRT_H */
diff --git a/include/hw/arm/xen_arch_hvm.h b/include/hw/arm/xen_arch_hvm.h
new file mode 100644 (file)
index 0000000..8fd645e
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef HW_XEN_ARCH_ARM_HVM_H
+#define HW_XEN_ARCH_ARM_HVM_H
+
+#include <xen/hvm/ioreq.h>
+void arch_handle_ioreq(XenIOState *state, ioreq_t *req);
+void arch_xen_set_memory(XenIOState *state,
+                         MemoryRegionSection *section,
+                         bool add);
+#endif
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
new file mode 100644 (file)
index 0000000..b24fa64
--- /dev/null
@@ -0,0 +1,336 @@
+/*
+ * Model of the Xilinx Versal
+ *
+ * Copyright (c) 2018 Xilinx Inc.
+ * Written by Edgar E. Iglesias
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+#ifndef XLNX_VERSAL_H
+#define XLNX_VERSAL_H
+
+#include "hw/sysbus.h"
+#include "hw/cpu/cluster.h"
+#include "hw/or-irq.h"
+#include "hw/sd/sdhci.h"
+#include "hw/intc/arm_gicv3.h"
+#include "hw/char/pl011.h"
+#include "hw/dma/xlnx-zdma.h"
+#include "hw/net/cadence_gem.h"
+#include "hw/rtc/xlnx-zynqmp-rtc.h"
+#include "qom/object.h"
+#include "hw/usb/xlnx-usb-subsystem.h"
+#include "hw/misc/xlnx-versal-xramc.h"
+#include "hw/nvram/xlnx-bbram.h"
+#include "hw/nvram/xlnx-versal-efuse.h"
+#include "hw/ssi/xlnx-versal-ospi.h"
+#include "hw/dma/xlnx_csu_dma.h"
+#include "hw/misc/xlnx-versal-crl.h"
+#include "hw/misc/xlnx-versal-pmc-iou-slcr.h"
+#include "hw/misc/xlnx-versal-trng.h"
+#include "hw/net/xlnx-versal-canfd.h"
+#include "hw/misc/xlnx-versal-cfu.h"
+#include "hw/misc/xlnx-versal-cframe-reg.h"
+
+#define TYPE_XLNX_VERSAL "xlnx-versal"
+OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL)
+
+#define XLNX_VERSAL_NR_ACPUS   2
+#define XLNX_VERSAL_NR_RCPUS   2
+#define XLNX_VERSAL_NR_UARTS   2
+#define XLNX_VERSAL_NR_GEMS    2
+#define XLNX_VERSAL_NR_ADMAS   8
+#define XLNX_VERSAL_NR_SDS     2
+#define XLNX_VERSAL_NR_XRAM    4
+#define XLNX_VERSAL_NR_IRQS    192
+#define XLNX_VERSAL_NR_CANFD   2
+#define XLNX_VERSAL_CANFD_REF_CLK (24 * 1000 * 1000)
+#define XLNX_VERSAL_NR_CFRAME  15
+
+struct Versal {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    struct {
+        struct {
+            MemoryRegion mr;
+            CPUClusterState cluster;
+            ARMCPU cpu[XLNX_VERSAL_NR_ACPUS];
+            GICv3State gic;
+        } apu;
+    } fpd;
+
+    MemoryRegion mr_ps;
+
+    struct {
+        /* 4 ranges to access DDR.  */
+        MemoryRegion mr_ddr_ranges[4];
+    } noc;
+
+    struct {
+        MemoryRegion mr_ocm;
+
+        struct {
+            PL011State uart[XLNX_VERSAL_NR_UARTS];
+            CadenceGEMState gem[XLNX_VERSAL_NR_GEMS];
+            XlnxZDMA adma[XLNX_VERSAL_NR_ADMAS];
+            VersalUsb2 usb;
+            CanBusState *canbus[XLNX_VERSAL_NR_CANFD];
+            XlnxVersalCANFDState canfd[XLNX_VERSAL_NR_CANFD];
+        } iou;
+
+        /* Real-time Processing Unit.  */
+        struct {
+            MemoryRegion mr;
+            MemoryRegion mr_ps_alias;
+
+            CPUClusterState cluster;
+            ARMCPU cpu[XLNX_VERSAL_NR_RCPUS];
+        } rpu;
+
+        struct {
+            OrIRQState irq_orgate;
+            XlnxXramCtrl ctrl[XLNX_VERSAL_NR_XRAM];
+        } xram;
+
+        XlnxVersalCRL crl;
+    } lpd;
+
+    /* The Platform Management Controller subsystem.  */
+    struct {
+        struct {
+            SDHCIState sd[XLNX_VERSAL_NR_SDS];
+            XlnxVersalPmcIouSlcr slcr;
+
+            struct {
+                XlnxVersalOspi ospi;
+                XlnxCSUDMA dma_src;
+                XlnxCSUDMA dma_dst;
+                MemoryRegion linear_mr;
+                OrIRQState irq_orgate;
+            } ospi;
+        } iou;
+
+        XlnxZynqMPRTC rtc;
+        XlnxVersalTRng trng;
+        XlnxBBRam bbram;
+        XlnxEFuse efuse;
+        XlnxVersalEFuseCtrl efuse_ctrl;
+        XlnxVersalEFuseCache efuse_cache;
+        XlnxVersalCFUAPB cfu_apb;
+        XlnxVersalCFUFDRO cfu_fdro;
+        XlnxVersalCFUSFR cfu_sfr;
+        XlnxVersalCFrameReg cframe[XLNX_VERSAL_NR_CFRAME];
+        XlnxVersalCFrameBcastReg cframe_bcast;
+
+        OrIRQState apb_irq_orgate;
+    } pmc;
+
+    struct {
+        MemoryRegion *mr_ddr;
+    } cfg;
+};
+
+/* Memory-map and IRQ definitions. Copied a subset from
+ * auto-generated files.  */
+
+#define VERSAL_GIC_MAINT_IRQ        9
+#define VERSAL_TIMER_VIRT_IRQ       11
+#define VERSAL_TIMER_S_EL1_IRQ      13
+#define VERSAL_TIMER_NS_EL1_IRQ     14
+#define VERSAL_TIMER_NS_EL2_IRQ     10
+
+#define VERSAL_CRL_IRQ             10
+#define VERSAL_UART0_IRQ_0         18
+#define VERSAL_UART1_IRQ_0         19
+#define VERSAL_CANFD0_IRQ_0        20
+#define VERSAL_CANFD1_IRQ_0        21
+#define VERSAL_USB0_IRQ_0          22
+#define VERSAL_GEM0_IRQ_0          56
+#define VERSAL_GEM0_WAKE_IRQ_0     57
+#define VERSAL_GEM1_IRQ_0          58
+#define VERSAL_GEM1_WAKE_IRQ_0     59
+#define VERSAL_ADMA_IRQ_0          60
+#define VERSAL_XRAM_IRQ_0          79
+#define VERSAL_CFU_IRQ_0           120
+#define VERSAL_PMC_APB_IRQ         121
+#define VERSAL_OSPI_IRQ            124
+#define VERSAL_SD0_IRQ_0           126
+#define VERSAL_EFUSE_IRQ           139
+#define VERSAL_TRNG_IRQ            141
+#define VERSAL_RTC_ALARM_IRQ       142
+#define VERSAL_RTC_SECONDS_IRQ     143
+
+/* Architecturally reserved IRQs suitable for virtualization.  */
+#define VERSAL_RSVD_IRQ_FIRST 111
+#define VERSAL_RSVD_IRQ_LAST  118
+
+#define MM_TOP_RSVD                 0xa0000000U
+#define MM_TOP_RSVD_SIZE            0x4000000
+#define MM_GIC_APU_DIST_MAIN        0xf9000000U
+#define MM_GIC_APU_DIST_MAIN_SIZE   0x10000
+#define MM_GIC_APU_REDIST_0         0xf9080000U
+#define MM_GIC_APU_REDIST_0_SIZE    0x80000
+
+#define MM_UART0                    0xff000000U
+#define MM_UART0_SIZE               0x10000
+#define MM_UART1                    0xff010000U
+#define MM_UART1_SIZE               0x10000
+
+#define MM_CANFD0                   0xff060000U
+#define MM_CANFD0_SIZE              0x10000
+#define MM_CANFD1                   0xff070000U
+#define MM_CANFD1_SIZE              0x10000
+
+#define MM_GEM0                     0xff0c0000U
+#define MM_GEM0_SIZE                0x10000
+#define MM_GEM1                     0xff0d0000U
+#define MM_GEM1_SIZE                0x10000
+
+#define MM_ADMA_CH0                 0xffa80000U
+#define MM_ADMA_CH0_SIZE            0x10000
+
+#define MM_OCM                      0xfffc0000U
+#define MM_OCM_SIZE                 0x40000
+
+#define MM_XRAM                     0xfe800000
+#define MM_XRAMC                    0xff8e0000
+#define MM_XRAMC_SIZE               0x10000
+
+#define MM_USB2_CTRL_REGS           0xFF9D0000
+#define MM_USB2_CTRL_REGS_SIZE      0x10000
+
+#define MM_USB_0                    0xFE200000
+#define MM_USB_0_SIZE               0x10000
+
+#define MM_TOP_DDR                  0x0
+#define MM_TOP_DDR_SIZE             0x80000000U
+#define MM_TOP_DDR_2                0x800000000ULL
+#define MM_TOP_DDR_2_SIZE           0x800000000ULL
+#define MM_TOP_DDR_3                0xc000000000ULL
+#define MM_TOP_DDR_3_SIZE           0x4000000000ULL
+#define MM_TOP_DDR_4                0x10000000000ULL
+#define MM_TOP_DDR_4_SIZE           0xb780000000ULL
+
+#define MM_PSM_START                0xffc80000U
+#define MM_PSM_END                  0xffcf0000U
+
+#define MM_CRL                      0xff5e0000U
+#define MM_CRL_SIZE                 0x300000
+#define MM_IOU_SCNTR                0xff130000U
+#define MM_IOU_SCNTR_SIZE           0x10000
+#define MM_IOU_SCNTRS               0xff140000U
+#define MM_IOU_SCNTRS_SIZE          0x10000
+#define MM_FPD_CRF                  0xfd1a0000U
+#define MM_FPD_CRF_SIZE             0x140000
+#define MM_FPD_FPD_APU              0xfd5c0000
+#define MM_FPD_FPD_APU_SIZE         0x100
+
+#define MM_PMC_PMC_IOU_SLCR         0xf1060000
+#define MM_PMC_PMC_IOU_SLCR_SIZE    0x10000
+
+#define MM_PMC_OSPI                 0xf1010000
+#define MM_PMC_OSPI_SIZE            0x10000
+
+#define MM_PMC_OSPI_DAC             0xc0000000
+#define MM_PMC_OSPI_DAC_SIZE        0x20000000
+
+#define MM_PMC_OSPI_DMA_DST         0xf1011800
+#define MM_PMC_OSPI_DMA_SRC         0xf1011000
+
+#define MM_PMC_SD0                  0xf1040000U
+#define MM_PMC_SD0_SIZE             0x10000
+#define MM_PMC_BBRAM_CTRL           0xf11f0000
+#define MM_PMC_BBRAM_CTRL_SIZE      0x00050
+#define MM_PMC_EFUSE_CTRL           0xf1240000
+#define MM_PMC_EFUSE_CTRL_SIZE      0x00104
+#define MM_PMC_EFUSE_CACHE          0xf1250000
+#define MM_PMC_EFUSE_CACHE_SIZE     0x00C00
+
+#define MM_PMC_CFU_APB              0xf12b0000
+#define MM_PMC_CFU_APB_SIZE         0x10000
+#define MM_PMC_CFU_STREAM           0xf12c0000
+#define MM_PMC_CFU_STREAM_SIZE      0x1000
+#define MM_PMC_CFU_SFR              0xf12c1000
+#define MM_PMC_CFU_SFR_SIZE         0x1000
+#define MM_PMC_CFU_FDRO             0xf12c2000
+#define MM_PMC_CFU_FDRO_SIZE        0x1000
+#define MM_PMC_CFU_STREAM_2         0xf1f80000
+#define MM_PMC_CFU_STREAM_2_SIZE    0x40000
+
+#define MM_PMC_CFRAME0_REG          0xf12d0000
+#define MM_PMC_CFRAME0_REG_SIZE     0x1000
+#define MM_PMC_CFRAME0_FDRI         0xf12d1000
+#define MM_PMC_CFRAME0_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME1_REG          0xf12d2000
+#define MM_PMC_CFRAME1_REG_SIZE     0x1000
+#define MM_PMC_CFRAME1_FDRI         0xf12d3000
+#define MM_PMC_CFRAME1_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME2_REG          0xf12d4000
+#define MM_PMC_CFRAME2_REG_SIZE     0x1000
+#define MM_PMC_CFRAME2_FDRI         0xf12d5000
+#define MM_PMC_CFRAME2_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME3_REG          0xf12d6000
+#define MM_PMC_CFRAME3_REG_SIZE     0x1000
+#define MM_PMC_CFRAME3_FDRI         0xf12d7000
+#define MM_PMC_CFRAME3_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME4_REG          0xf12d8000
+#define MM_PMC_CFRAME4_REG_SIZE     0x1000
+#define MM_PMC_CFRAME4_FDRI         0xf12d9000
+#define MM_PMC_CFRAME4_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME5_REG          0xf12da000
+#define MM_PMC_CFRAME5_REG_SIZE     0x1000
+#define MM_PMC_CFRAME5_FDRI         0xf12db000
+#define MM_PMC_CFRAME5_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME6_REG          0xf12dc000
+#define MM_PMC_CFRAME6_REG_SIZE     0x1000
+#define MM_PMC_CFRAME6_FDRI         0xf12dd000
+#define MM_PMC_CFRAME6_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME7_REG          0xf12de000
+#define MM_PMC_CFRAME7_REG_SIZE     0x1000
+#define MM_PMC_CFRAME7_FDRI         0xf12df000
+#define MM_PMC_CFRAME7_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME8_REG          0xf12e0000
+#define MM_PMC_CFRAME8_REG_SIZE     0x1000
+#define MM_PMC_CFRAME8_FDRI         0xf12e1000
+#define MM_PMC_CFRAME8_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME9_REG          0xf12e2000
+#define MM_PMC_CFRAME9_REG_SIZE     0x1000
+#define MM_PMC_CFRAME9_FDRI         0xf12e3000
+#define MM_PMC_CFRAME9_FDRI_SIZE    0x1000
+#define MM_PMC_CFRAME10_REG         0xf12e4000
+#define MM_PMC_CFRAME10_REG_SIZE    0x1000
+#define MM_PMC_CFRAME10_FDRI        0xf12e5000
+#define MM_PMC_CFRAME10_FDRI_SIZE   0x1000
+#define MM_PMC_CFRAME11_REG         0xf12e6000
+#define MM_PMC_CFRAME11_REG_SIZE    0x1000
+#define MM_PMC_CFRAME11_FDRI        0xf12e7000
+#define MM_PMC_CFRAME11_FDRI_SIZE   0x1000
+#define MM_PMC_CFRAME12_REG         0xf12e8000
+#define MM_PMC_CFRAME12_REG_SIZE    0x1000
+#define MM_PMC_CFRAME12_FDRI        0xf12e9000
+#define MM_PMC_CFRAME12_FDRI_SIZE   0x1000
+#define MM_PMC_CFRAME13_REG         0xf12ea000
+#define MM_PMC_CFRAME13_REG_SIZE    0x1000
+#define MM_PMC_CFRAME13_FDRI        0xf12eb000
+#define MM_PMC_CFRAME13_FDRI_SIZE   0x1000
+#define MM_PMC_CFRAME14_REG         0xf12ec000
+#define MM_PMC_CFRAME14_REG_SIZE    0x1000
+#define MM_PMC_CFRAME14_FDRI        0xf12ed000
+#define MM_PMC_CFRAME14_FDRI_SIZE   0x1000
+#define MM_PMC_CFRAME_BCAST_REG       0xf12ee000
+#define MM_PMC_CFRAME_BCAST_REG_SIZE  0x1000
+#define MM_PMC_CFRAME_BCAST_FDRI      0xf12ef000
+#define MM_PMC_CFRAME_BCAST_FDRI_SIZE 0x1000
+
+#define MM_PMC_CRP                  0xf1260000U
+#define MM_PMC_CRP_SIZE             0x10000
+#define MM_PMC_RTC                  0xf12a0000
+#define MM_PMC_RTC_SIZE             0x10000
+#define MM_PMC_TRNG                 0xf1230000
+#define MM_PMC_TRNG_SIZE            0x10000
+#endif
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
new file mode 100644 (file)
index 0000000..96358d5
--- /dev/null
@@ -0,0 +1,150 @@
+/*
+ * Xilinx Zynq MPSoC emulation
+ *
+ * Copyright (C) 2015 Xilinx Inc
+ * Written by Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef XLNX_ZYNQMP_H
+#define XLNX_ZYNQMP_H
+
+#include "hw/intc/arm_gic.h"
+#include "hw/net/cadence_gem.h"
+#include "hw/char/cadence_uart.h"
+#include "hw/net/xlnx-zynqmp-can.h"
+#include "hw/ide/ahci.h"
+#include "hw/sd/sdhci.h"
+#include "hw/ssi/xilinx_spips.h"
+#include "hw/dma/xlnx_dpdma.h"
+#include "hw/dma/xlnx-zdma.h"
+#include "hw/display/xlnx_dp.h"
+#include "hw/intc/xlnx-zynqmp-ipi.h"
+#include "hw/rtc/xlnx-zynqmp-rtc.h"
+#include "hw/cpu/cluster.h"
+#include "target/arm/cpu.h"
+#include "qom/object.h"
+#include "net/can_emu.h"
+#include "hw/dma/xlnx_csu_dma.h"
+#include "hw/nvram/xlnx-bbram.h"
+#include "hw/nvram/xlnx-zynqmp-efuse.h"
+#include "hw/or-irq.h"
+#include "hw/misc/xlnx-zynqmp-apu-ctrl.h"
+#include "hw/misc/xlnx-zynqmp-crf.h"
+#include "hw/timer/cadence_ttc.h"
+#include "hw/usb/hcd-dwc3.h"
+
+#define TYPE_XLNX_ZYNQMP "xlnx-zynqmp"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPState, XLNX_ZYNQMP)
+
+#define XLNX_ZYNQMP_NUM_APU_CPUS 4
+#define XLNX_ZYNQMP_NUM_RPU_CPUS 2
+#define XLNX_ZYNQMP_NUM_GEMS 4
+#define XLNX_ZYNQMP_NUM_UARTS 2
+#define XLNX_ZYNQMP_NUM_CAN 2
+#define XLNX_ZYNQMP_CAN_REF_CLK (24 * 1000 * 1000)
+#define XLNX_ZYNQMP_NUM_SDHCI 2
+#define XLNX_ZYNQMP_NUM_SPIS 2
+#define XLNX_ZYNQMP_NUM_GDMA_CH 8
+#define XLNX_ZYNQMP_NUM_ADMA_CH 8
+#define XLNX_ZYNQMP_NUM_USB 2
+
+#define XLNX_ZYNQMP_NUM_QSPI_BUS 2
+#define XLNX_ZYNQMP_NUM_QSPI_BUS_CS 2
+#define XLNX_ZYNQMP_NUM_QSPI_FLASH 4
+
+#define XLNX_ZYNQMP_NUM_OCM_BANKS 4
+#define XLNX_ZYNQMP_OCM_RAM_0_ADDRESS 0xFFFC0000
+#define XLNX_ZYNQMP_OCM_RAM_SIZE 0x10000
+
+#define XLNX_ZYNQMP_GIC_REGIONS 6
+
+/*
+ * ZynqMP maps the ARM GIC regions (GICC, GICD ...) at consecutive 64k offsets
+ * and under-decodes the 64k region. This mirrors the 4k regions to every 4k
+ * aligned address in the 64k region. To implement each GIC region needs a
+ * number of memory region aliases.
+ */
+
+#define XLNX_ZYNQMP_GIC_REGION_SIZE 0x1000
+#define XLNX_ZYNQMP_GIC_ALIASES     (0x10000 / XLNX_ZYNQMP_GIC_REGION_SIZE)
+
+#define XLNX_ZYNQMP_MAX_LOW_RAM_SIZE    0x80000000ull
+
+#define XLNX_ZYNQMP_MAX_HIGH_RAM_SIZE   0x800000000ull
+#define XLNX_ZYNQMP_HIGH_RAM_START      0x800000000ull
+
+#define XLNX_ZYNQMP_MAX_RAM_SIZE (XLNX_ZYNQMP_MAX_LOW_RAM_SIZE + \
+                                  XLNX_ZYNQMP_MAX_HIGH_RAM_SIZE)
+
+#define XLNX_ZYNQMP_NUM_TTC 4
+
+/*
+ * Unimplemented mmio regions needed to boot some images.
+ */
+#define XLNX_ZYNQMP_NUM_UNIMP_AREAS 1
+
+struct XlnxZynqMPState {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    CPUClusterState apu_cluster;
+    CPUClusterState rpu_cluster;
+    ARMCPU apu_cpu[XLNX_ZYNQMP_NUM_APU_CPUS];
+    ARMCPU rpu_cpu[XLNX_ZYNQMP_NUM_RPU_CPUS];
+    GICState gic;
+    MemoryRegion gic_mr[XLNX_ZYNQMP_GIC_REGIONS][XLNX_ZYNQMP_GIC_ALIASES];
+
+    MemoryRegion ocm_ram[XLNX_ZYNQMP_NUM_OCM_BANKS];
+
+    MemoryRegion *ddr_ram;
+    MemoryRegion ddr_ram_low, ddr_ram_high;
+    XlnxBBRam bbram;
+    XlnxEFuse efuse;
+    XlnxZynqMPEFuse efuse_ctrl;
+
+    MemoryRegion mr_unimp[XLNX_ZYNQMP_NUM_UNIMP_AREAS];
+
+    CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS];
+    CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS];
+    XlnxZynqMPCANState can[XLNX_ZYNQMP_NUM_CAN];
+    SysbusAHCIState sata;
+    SDHCIState sdhci[XLNX_ZYNQMP_NUM_SDHCI];
+    XilinxSPIPS spi[XLNX_ZYNQMP_NUM_SPIS];
+    XlnxZynqMPQSPIPS qspi;
+    XlnxDPState dp;
+    XlnxDPDMAState dpdma;
+    XlnxZynqMPIPI ipi;
+    XlnxZynqMPRTC rtc;
+    XlnxZDMA gdma[XLNX_ZYNQMP_NUM_GDMA_CH];
+    XlnxZDMA adma[XLNX_ZYNQMP_NUM_ADMA_CH];
+    XlnxCSUDMA qspi_dma;
+    OrIRQState qspi_irq_orgate;
+    XlnxZynqMPAPUCtrl apu_ctrl;
+    XlnxZynqMPCRF crf;
+    CadenceTTCState ttc[XLNX_ZYNQMP_NUM_TTC];
+    USBDWC3 usb[XLNX_ZYNQMP_NUM_USB];
+
+    char *boot_cpu;
+    ARMCPU *boot_cpu_ptr;
+
+    /* Has the ARM Security extensions?  */
+    bool secure;
+    /* Has the ARM Virtualization extensions?  */
+    bool virt;
+
+    /* CAN bus. */
+    CanBusState *canbus[XLNX_ZYNQMP_NUM_CAN];
+};
+
+#endif
diff --git a/include/hw/audio/asc.h b/include/hw/audio/asc.h
new file mode 100644 (file)
index 0000000..4741f92
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * QEMU Apple Sound Chip emulation
+ *
+ * Apple Sound Chip (ASC) 344S0063
+ * Enhanced Apple Sound Chip (EASC) 343S1063
+ *
+ * Copyright (c) 2012-2018 Laurent Vivier <laurent@vivier.eu>
+ * Copyright (c) 2022 Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_AUDIO_ASC_H
+#define HW_AUDIO_ASC_H
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "audio/audio.h"
+
+#define ASC_FREQ 22257
+
+enum {
+    ASC_TYPE_ASC    = 0,  /* original discrete Apple Sound Chip */
+    ASC_TYPE_EASC   = 1   /* discrete Enhanced Apple Sound Chip */
+};
+
+#define ASC_FIFO_OFFSET    0x0
+#define ASC_FIFO_SIZE      0x400
+
+#define ASC_REG_OFFSET     0x800
+#define ASC_REG_SIZE       0x60
+
+#define ASC_EXTREG_OFFSET  0xf00
+#define ASC_EXTREG_SIZE    0x20
+
+typedef struct ASCFIFOState {
+    int index;
+
+    MemoryRegion mem_fifo;
+    uint8_t fifo[ASC_FIFO_SIZE];
+    uint8_t int_status;
+
+    int cnt;
+    int wptr;
+    int rptr;
+
+    MemoryRegion mem_extregs;
+    uint8_t extregs[ASC_EXTREG_SIZE];
+
+    int xa_cnt;
+    uint8_t xa_val;
+    uint8_t xa_flags;
+    int16_t xa_last[2];
+} ASCFIFOState;
+
+struct ASCState {
+    SysBusDevice parent_obj;
+
+    uint8_t type;
+    MemoryRegion asc;
+    MemoryRegion mem_fifo;
+    MemoryRegion mem_regs;
+    MemoryRegion mem_extregs;
+
+    QEMUSoundCard card;
+    SWVoiceOut *voice;
+    uint8_t *mixbuf;
+    int samples;
+    int shift;
+
+    uint8_t *silentbuf;
+
+    /* Time when we were last able to generate samples */
+    int64_t fifo_empty_ns;
+
+    qemu_irq irq;
+
+    ASCFIFOState fifos[2];
+
+    uint8_t regs[ASC_REG_SIZE];
+};
+
+#define TYPE_ASC "apple-sound-chip"
+OBJECT_DECLARE_SIMPLE_TYPE(ASCState, ASC)
+
+#endif
diff --git a/include/hw/audio/pcspk.h b/include/hw/audio/pcspk.h
new file mode 100644 (file)
index 0000000..6be75a6
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * QEMU PC speaker emulation
+ *
+ * Copyright (c) 2006 Joachim Henke
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_PCSPK_H
+#define HW_PCSPK_H
+
+#define TYPE_PC_SPEAKER "isa-pcspk"
+
+#endif /* HW_PCSPK_H */
diff --git a/include/hw/audio/soundhw.h b/include/hw/audio/soundhw.h
new file mode 100644 (file)
index 0000000..474c5ff
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef HW_SOUNDHW_H
+#define HW_SOUNDHW_H
+
+void pci_register_soundhw(const char *name, const char *descr,
+                          int (*init_pci)(PCIBus *bus, const char *audiodev));
+void deprecated_register_soundhw(const char *name, const char *descr,
+                                 int isa, const char *typename);
+
+void soundhw_init(void);
+void show_valid_soundhw(void);
+void select_soundhw(const char *name, const char *audiodev);
+
+#endif
diff --git a/include/hw/audio/virtio-snd.h b/include/hw/audio/virtio-snd.h
new file mode 100644 (file)
index 0000000..c3767f4
--- /dev/null
@@ -0,0 +1,235 @@
+/*
+ * VIRTIO Sound Device conforming to
+ *
+ * "Virtual I/O Device (VIRTIO) Version 1.2
+ * Committee Specification Draft 01
+ * 09 May 2022"
+ *
+ * Copyright (c) 2023 Emmanouil Pitsidianakis <manos.pitsidianakis@linaro.org>
+ * Copyright (C) 2019 OpenSynergy GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef QEMU_VIRTIO_SOUND_H
+#define QEMU_VIRTIO_SOUND_H
+
+#include "hw/virtio/virtio.h"
+#include "audio/audio.h"
+#include "standard-headers/linux/virtio_ids.h"
+#include "standard-headers/linux/virtio_snd.h"
+
+#define TYPE_VIRTIO_SND "virtio-sound-device"
+#define VIRTIO_SND(obj) \
+        OBJECT_CHECK(VirtIOSound, (obj), TYPE_VIRTIO_SND)
+
+/* CONFIGURATION SPACE */
+
+typedef struct virtio_snd_config virtio_snd_config;
+
+/* COMMON DEFINITIONS */
+
+/* common header for request/response*/
+typedef struct virtio_snd_hdr virtio_snd_hdr;
+
+/* event notification */
+typedef struct virtio_snd_event virtio_snd_event;
+
+/* common control request to query an item information */
+typedef struct virtio_snd_query_info virtio_snd_query_info;
+
+/* JACK CONTROL MESSAGES */
+
+typedef struct virtio_snd_jack_hdr virtio_snd_jack_hdr;
+
+/* jack information structure */
+typedef struct virtio_snd_jack_info virtio_snd_jack_info;
+
+/* jack remapping control request */
+typedef struct virtio_snd_jack_remap virtio_snd_jack_remap;
+
+/*
+ * PCM CONTROL MESSAGES
+ */
+typedef struct virtio_snd_pcm_hdr virtio_snd_pcm_hdr;
+
+/* PCM stream info structure */
+typedef struct virtio_snd_pcm_info virtio_snd_pcm_info;
+
+/* set PCM stream params */
+typedef struct virtio_snd_pcm_set_params virtio_snd_pcm_set_params;
+
+/* I/O request header */
+typedef struct virtio_snd_pcm_xfer virtio_snd_pcm_xfer;
+
+/* I/O request status */
+typedef struct virtio_snd_pcm_status virtio_snd_pcm_status;
+
+/* device structs */
+
+typedef struct VirtIOSound VirtIOSound;
+
+typedef struct VirtIOSoundPCMStream VirtIOSoundPCMStream;
+
+typedef struct virtio_snd_ctrl_command virtio_snd_ctrl_command;
+
+typedef struct VirtIOSoundPCM VirtIOSoundPCM;
+
+typedef struct VirtIOSoundPCMBuffer VirtIOSoundPCMBuffer;
+
+/*
+ * The VirtIO sound spec reuses layouts and values from the High Definition
+ * Audio spec (virtio/v1.2: 5.14 Sound Device). This struct handles each I/O
+ * message's buffer (virtio/v1.2: 5.14.6.8 PCM I/O Messages).
+ *
+ * In the case of TX (i.e. playback) buffers, we defer reading the raw PCM data
+ * from the virtqueue until QEMU's sound backsystem calls the output callback.
+ * This is tracked by the `bool populated;` field, which is set to true when
+ * data has been read into our own buffer for consumption.
+ *
+ * VirtIOSoundPCMBuffer has a dynamic size since it includes the raw PCM data
+ * in its allocation. It must be initialized and destroyed as follows:
+ *
+ *   size_t size = [[derived from owned VQ element descriptor sizes]];
+ *   buffer = g_malloc0(sizeof(VirtIOSoundPCMBuffer) + size);
+ *   buffer->elem = [[owned VQ element]];
+ *
+ *   [..]
+ *
+ *   g_free(buffer->elem);
+ *   g_free(buffer);
+ */
+struct VirtIOSoundPCMBuffer {
+    QSIMPLEQ_ENTRY(VirtIOSoundPCMBuffer) entry;
+    VirtQueueElement *elem;
+    VirtQueue *vq;
+    size_t size;
+    /*
+     * In TX / Plaback, `offset` represents the first unused position inside
+     * `data`. If `offset == size` then there are no unused data left.
+     */
+    uint64_t offset;
+    /* Used for the TX queue for lazy I/O copy from `elem` */
+    bool populated;
+    /*
+     * VirtIOSoundPCMBuffer is an unsized type because it ends with an array of
+     * bytes. The size of `data` is determined from the I/O message's read-only
+     * or write-only size when allocating VirtIOSoundPCMBuffer.
+     */
+    uint8_t data[];
+};
+
+struct VirtIOSoundPCM {
+    VirtIOSound *snd;
+    /*
+     * PCM parameters are a separate field instead of a VirtIOSoundPCMStream
+     * field, because the operation of PCM control requests is first
+     * VIRTIO_SND_R_PCM_SET_PARAMS and then VIRTIO_SND_R_PCM_PREPARE; this
+     * means that some times we get parameters without having an allocated
+     * stream yet.
+     */
+    virtio_snd_pcm_set_params *pcm_params;
+    VirtIOSoundPCMStream **streams;
+};
+
+struct VirtIOSoundPCMStream {
+    VirtIOSoundPCM *pcm;
+    virtio_snd_pcm_info info;
+    virtio_snd_pcm_set_params params;
+    uint32_t id;
+    /* channel position values (VIRTIO_SND_CHMAP_XXX) */
+    uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE];
+    VirtIOSound *s;
+    bool flushing;
+    audsettings as;
+    union {
+        SWVoiceIn *in;
+        SWVoiceOut *out;
+    } voice;
+    QemuMutex queue_mutex;
+    bool active;
+    QSIMPLEQ_HEAD(, VirtIOSoundPCMBuffer) queue;
+    QSIMPLEQ_HEAD(, VirtIOSoundPCMBuffer) invalid;
+};
+
+/*
+ * PCM stream state machine.
+ * -------------------------
+ *
+ * 5.14.6.6.1 PCM Command Lifecycle
+ * ================================
+ *
+ * A PCM stream has the following command lifecycle:
+ * - `SET PARAMETERS`
+ *   The driver negotiates the stream parameters (format, transport, etc) with
+ *   the device.
+ *   Possible valid transitions: `SET PARAMETERS`, `PREPARE`.
+ * - `PREPARE`
+ *   The device prepares the stream (allocates resources, etc).
+ *   Possible valid transitions: `SET PARAMETERS`, `PREPARE`, `START`,
+ *   `RELEASE`. Output only: the driver transfers data for pre-buffing.
+ * - `START`
+ *   The device starts the stream (unmute, putting into running state, etc).
+ *   Possible valid transitions: `STOP`.
+ *   The driver transfers data to/from the stream.
+ * - `STOP`
+ *   The device stops the stream (mute, putting into non-running state, etc).
+ *   Possible valid transitions: `START`, `RELEASE`.
+ * - `RELEASE`
+ *   The device releases the stream (frees resources, etc).
+ *   Possible valid transitions: `SET PARAMETERS`, `PREPARE`.
+ *
+ * +---------------+ +---------+ +---------+ +-------+ +-------+
+ * | SetParameters | | Prepare | | Release | | Start | | Stop  |
+ * +---------------+ +---------+ +---------+ +-------+ +-------+
+ *         |-             |           |          |         |
+ *         ||             |           |          |         |
+ *         |<             |           |          |         |
+ *         |------------->|           |          |         |
+ *         |<-------------|           |          |         |
+ *         |              |-          |          |         |
+ *         |              ||          |          |         |
+ *         |              |<          |          |         |
+ *         |              |--------------------->|         |
+ *         |              |---------->|          |         |
+ *         |              |           |          |-------->|
+ *         |              |           |          |<--------|
+ *         |              |           |<-------------------|
+ *         |<-------------------------|          |         |
+ *         |              |<----------|          |         |
+ *
+ * CTRL in the VirtIOSound device
+ * ==============================
+ *
+ * The control messages that affect the state of a stream arrive in the
+ * `virtio_snd_handle_ctrl()` queue callback and are of type `struct
+ * virtio_snd_ctrl_command`. They are stored in a queue field in the device
+ * type, `VirtIOSound`. This allows deferring the CTRL request completion if
+ * it's not immediately possible due to locking/state reasons.
+ *
+ * The CTRL message is finally handled in `process_cmd()`.
+ */
+struct VirtIOSound {
+    VirtIODevice parent_obj;
+
+    VirtQueue *queues[VIRTIO_SND_VQ_MAX];
+    uint64_t features;
+    VirtIOSoundPCM *pcm;
+    QEMUSoundCard card;
+    VMChangeStateEntry *vmstate;
+    virtio_snd_config snd_conf;
+    QemuMutex cmdq_mutex;
+    QTAILQ_HEAD(, virtio_snd_ctrl_command) cmdq;
+    bool processing_cmdq;
+};
+
+struct virtio_snd_ctrl_command {
+    VirtQueueElement *elem;
+    VirtQueue *vq;
+    virtio_snd_hdr ctrl;
+    virtio_snd_hdr resp;
+    QTAILQ_ENTRY(virtio_snd_ctrl_command) next;
+};
+#endif
diff --git a/include/hw/audio/wm8750.h b/include/hw/audio/wm8750.h
new file mode 100644 (file)
index 0000000..f7bafd5
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef HW_DAC_WM8750_H
+#define HW_DAC_WM8750_H
+
+
+#define TYPE_WM8750 "wm8750"
+#define TYPE_MV88W8618_AUDIO "mv88w8618_audio"
+
+typedef void data_req_cb(void *opaque, int free_out, int free_in);
+
+void wm8750_data_req_set(DeviceState *dev, data_req_cb *data_req, void *opaque);
+void wm8750_dac_dat(void *opaque, uint32_t sample);
+uint32_t wm8750_adc_dat(void *opaque);
+void *wm8750_dac_buffer(void *opaque, int samples);
+void wm8750_dac_commit(void *opaque);
+void wm8750_set_bclk_in(void *opaque, int new_hz);
+
+#endif
index 1e8b6253dd348f8ef212103c012190fe90effe36..15fff6643502e51a0896189d12709e1622c2b339 100644 (file)
 
 #include "exec/hwaddr.h"
 #include "qapi/qapi-types-block-core.h"
+#include "hw/qdev-properties-system.h"
 
 /* Configuration */
 
 typedef struct BlockConf {
     BlockBackend *blk;
+    OnOffAuto backend_defaults;
     uint32_t physical_block_size;
     uint32_t logical_block_size;
     uint32_t min_io_size;
@@ -29,6 +31,7 @@ typedef struct BlockConf {
     uint32_t lcyls, lheads, lsecs;
     OnOffAuto wce;
     bool share_rw;
+    OnOffAuto account_invalid, account_failed;
     BlockdevOnError rerror;
     BlockdevOnError werror;
 } BlockConf;
@@ -47,6 +50,8 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
 }
 
 #define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf)                     \
+    DEFINE_PROP_ON_OFF_AUTO("backend_defaults", _state,                 \
+                            _conf.backend_defaults, ON_OFF_AUTO_AUTO),  \
     DEFINE_PROP_BLOCKSIZE("logical_block_size", _state,                 \
                           _conf.logical_block_size),                    \
     DEFINE_PROP_BLOCKSIZE("physical_block_size", _state,                \
@@ -57,7 +62,11 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
                        _conf.discard_granularity, -1),                  \
     DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce,           \
                             ON_OFF_AUTO_AUTO),                          \
-    DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false)
+    DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false),        \
+    DEFINE_PROP_ON_OFF_AUTO("account-invalid", _state,                  \
+                            _conf.account_invalid, ON_OFF_AUTO_AUTO),   \
+    DEFINE_PROP_ON_OFF_AUTO("account-failed", _state,                   \
+                            _conf.account_failed, ON_OFF_AUTO_AUTO)
 
 #define DEFINE_BLOCK_PROPERTIES(_state, _conf)                          \
     DEFINE_PROP_DRIVE("drive", _state, _conf.blk),                      \
index 1ecca7cac7f0ce7ae54b5c49d69b9e193dc6f04b..35248c08379d74b4e2d3c880719d11e485834911 100644 (file)
@@ -10,8 +10,7 @@
 #define TYPE_ISA_FDC "isa-fdc"
 
 void isa_fdc_init_drives(ISADevice *fdc, DriveInfo **fds);
-void fdctrl_init_sysbus(qemu_irq irq, int dma_chann,
-                        hwaddr mmio_base, DriveInfo **fds);
+void fdctrl_init_sysbus(qemu_irq irq, hwaddr mmio_base, DriveInfo **fds);
 void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base,
                        DriveInfo **fds, qemu_irq *fdc_tc);
 
index 7dde0adcee78c160a4a2b94e7d551a09132ef240..de93756cbe8f261edf0ff4b4cf2fa811a9c0463d 100644 (file)
@@ -53,27 +53,31 @@ void nand_setio(DeviceState *dev, uint32_t value);
 uint32_t nand_getio(DeviceState *dev);
 uint32_t nand_getbuswidth(DeviceState *dev);
 
-#define NAND_MFR_TOSHIBA       0x98
-#define NAND_MFR_SAMSUNG       0xec
-#define NAND_MFR_FUJITSU       0x04
-#define NAND_MFR_NATIONAL      0x8f
-#define NAND_MFR_RENESAS       0x07
-#define NAND_MFR_STMICRO       0x20
-#define NAND_MFR_HYNIX         0xad
-#define NAND_MFR_MICRON                0x2c
+#define NAND_MFR_TOSHIBA    0x98
+#define NAND_MFR_SAMSUNG    0xec
+#define NAND_MFR_FUJITSU    0x04
+#define NAND_MFR_NATIONAL   0x8f
+#define NAND_MFR_RENESAS    0x07
+#define NAND_MFR_STMICRO    0x20
+#define NAND_MFR_HYNIX      0xad
+#define NAND_MFR_MICRON     0x2c
 
 /* onenand.c */
 void *onenand_raw_otp(DeviceState *onenand_device);
 
 /* ecc.c */
 typedef struct {
-    uint8_t cp;                /* Column parity */
-    uint16_t lp[2];    /* Line parity */
+    uint8_t cp;     /* Column parity */
+    uint16_t lp[2]; /* Line parity */
     uint16_t count;
 } ECCState;
 
 uint8_t ecc_digest(ECCState *s, uint8_t sample);
 void ecc_reset(ECCState *s);
-extern VMStateDescription vmstate_ecc_state;
+extern const VMStateDescription vmstate_ecc_state;
+
+/* m25p80.c */
+
+BlockBackend *m25p80_get_blk(DeviceState *dev);
 
 #endif
index 5a49029543bfd8195c2fea9f93fbb7ccb83a3625..5f567e8d59579b54b76080eb56fad95f0d12f6e5 100644 (file)
@@ -11,7 +11,7 @@
 #ifndef SWIM_H
 #define SWIM_H
 
-#include "qemu/osdep.h"
+#include "hw/block/block.h"
 #include "hw/sysbus.h"
 #include "qom/object.h"
 
@@ -43,25 +43,22 @@ typedef struct FDrive {
 } FDrive;
 
 struct SWIMCtrl {
-    MemoryRegion iomem;
+    MemoryRegion swim;
+    MemoryRegion iwm;
+    MemoryRegion ism;
     FDrive drives[SWIM_MAX_FD];
     int mode;
     /* IWM mode */
     int iwm_switch;
-    uint16_t regs[8];
-#define IWM_PH0   0
-#define IWM_PH1   1
-#define IWM_PH2   2
-#define IWM_PH3   3
-#define IWM_MTR   4
-#define IWM_DRIVE 5
-#define IWM_Q6    6
-#define IWM_Q7    7
-    uint8_t iwm_data;
-    uint8_t iwm_mode;
+    uint8_t iwm_latches;
+    uint8_t iwmregs[8];
     /* SWIM mode */
+    uint8_t ismregs[16];
     uint8_t swim_phase;
     uint8_t swim_mode;
+    uint8_t swim_status;
+    uint8_t pram[16];
+    uint8_t pram_idx;
     SWIMBus bus;
 };
 
index a49e3a6b44816a63af432ff5c7d73f355b9fc7b7..da85f86efb9185df05a444bd22eeec0557a3e3c8 100644 (file)
@@ -6,7 +6,6 @@
 #include "exec/memory.h"
 #include "sysemu/hostmem.h"
 #include "sysemu/blockdev.h"
-#include "sysemu/accel.h"
 #include "qapi/qapi-types-machine.h"
 #include "qemu/module.h"
 #include "qom/object.h"
@@ -25,7 +24,8 @@ OBJECT_DECLARE_TYPE(MachineState, MachineClass, MACHINE)
 
 extern MachineState *current_machine;
 
-void machine_run_board_init(MachineState *machine);
+void machine_add_audiodev_property(MachineClass *mc);
+void machine_run_board_init(MachineState *machine, const char *mem_path, Error **errp);
 bool machine_usb(MachineState *machine);
 int machine_phandle_start(MachineState *machine);
 bool machine_dump_guest_core(MachineState *machine);
@@ -34,8 +34,67 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
 void machine_set_cpu_numa_node(MachineState *machine,
                                const CpuInstanceProperties *props,
                                Error **errp);
+void machine_parse_smp_config(MachineState *ms,
+                              const SMPConfiguration *config, Error **errp);
+unsigned int machine_topo_get_cores_per_socket(const MachineState *ms);
+unsigned int machine_topo_get_threads_per_socket(const MachineState *ms);
+void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size);
 
+/**
+ * machine_class_allow_dynamic_sysbus_dev: Add type to list of valid devices
+ * @mc: Machine class
+ * @type: type to allow (should be a subtype of TYPE_SYS_BUS_DEVICE)
+ *
+ * Add the QOM type @type to the list of devices of which are subtypes
+ * of TYPE_SYS_BUS_DEVICE but which are still permitted to be dynamically
+ * created (eg by the user on the command line with -device).
+ * By default if the user tries to create any devices on the command line
+ * that are subtypes of TYPE_SYS_BUS_DEVICE they will get an error message;
+ * for the special cases which are permitted for this machine model, the
+ * machine model class init code must call this function to add them
+ * to the list of specifically permitted devices.
+ */
 void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type);
+
+/**
+ * device_type_is_dynamic_sysbus: Check if type is an allowed sysbus device
+ * type for the machine class.
+ * @mc: Machine class
+ * @type: type to check (should be a subtype of TYPE_SYS_BUS_DEVICE)
+ *
+ * Returns: true if @type is a type in the machine's list of
+ * dynamically pluggable sysbus devices; otherwise false.
+ *
+ * Check if the QOM type @type is in the list of allowed sysbus device
+ * types (see machine_class_allowed_dynamic_sysbus_dev()).
+ * Note that if @type has a parent type in the list, it is allowed too.
+ */
+bool device_type_is_dynamic_sysbus(MachineClass *mc, const char *type);
+
+/**
+ * device_is_dynamic_sysbus: test whether device is a dynamic sysbus device
+ * @mc: Machine class
+ * @dev: device to check
+ *
+ * Returns: true if @dev is a sysbus device on the machine's list
+ * of dynamically pluggable sysbus devices; otherwise false.
+ *
+ * This function checks whether @dev is a valid dynamic sysbus device,
+ * by first confirming that it is a sysbus device and then checking it
+ * against the list of permitted dynamic sysbus devices which has been
+ * set up by the machine using machine_class_allow_dynamic_sysbus_dev().
+ *
+ * It is valid to call this with something that is not a subclass of
+ * TYPE_SYS_BUS_DEVICE; the function will return false in this case.
+ * This allows hotplug callback functions to be written as:
+ *     if (device_is_dynamic_sysbus(mc, dev)) {
+ *         handle dynamic sysbus case;
+ *     } else if (some other kind of hotplug) {
+ *         handle that;
+ *     }
+ */
+bool device_is_dynamic_sysbus(MachineClass *mc, DeviceState *dev);
+
 /*
  * Checks that backend isn't used, preps it for exclusive usage and
  * returns migratable MemoryRegion provided by backend.
@@ -69,6 +128,25 @@ typedef struct {
     CPUArchId cpus[];
 } CPUArchIdList;
 
+/**
+ * SMPCompatProps:
+ * @prefer_sockets - whether sockets are preferred over cores in smp parsing
+ * @dies_supported - whether dies are supported by the machine
+ * @clusters_supported - whether clusters are supported by the machine
+ * @has_clusters - whether clusters are explicitly specified in the user
+ *                 provided SMP configuration
+ * @books_supported - whether books are supported by the machine
+ * @drawers_supported - whether drawers are supported by the machine
+ */
+typedef struct {
+    bool prefer_sockets;
+    bool dies_supported;
+    bool clusters_supported;
+    bool has_clusters;
+    bool books_supported;
+    bool drawers_supported;
+} SMPCompatProps;
+
 /**
  * MachineClass:
  * @deprecation_reason: If set, the machine is marked as deprecated. The
@@ -85,7 +163,7 @@ typedef struct {
  *    any actions to be performed by hotplug handler.
  * @cpu_index_to_instance_props:
  *    used to provide @cpu_index to socket/core/thread number mapping, allowing
- *    legacy code to perform maping from cpu_index to topology properties
+ *    legacy code to perform mapping from cpu_index to topology properties
  *    Returns: tuple of socket/core/thread ids given cpu_index belongs to.
  *    used to provide @cpu_index to socket number mapping, allowing
  *    a machine to group CPU threads belonging to the same socket/package
@@ -127,12 +205,9 @@ typedef struct {
  * @kvm_type:
  *    Return the type of KVM corresponding to the kvm-type string option or
  *    computed based on other criteria such as the host kernel capabilities.
+ *    kvm-type may be NULL if it is not needed.
  * @numa_mem_supported:
  *    true if '--numa node.mem' option is supported and false otherwise
- * @smp_parse:
- *    The function pointer to hook different machine specific functions for
- *    parsing "smp-opts" from QemuOpts to MachineState::CpuTopology and more
- *    machine specific topology fields, such as smp_dies for PCMachine.
  * @hotplug_allowed:
  *    If the hook is provided, then it'll be called for each device
  *    hotplug to check whether the device hotplug is allowed.  Return
@@ -141,10 +216,10 @@ typedef struct {
  *    the rejection.  If the hook is not provided, all hotplug will be
  *    allowed.
  * @default_ram_id:
- *    Specifies inital RAM MemoryRegion name to be used for default backend
+ *    Specifies initial RAM MemoryRegion name to be used for default backend
  *    creation if user explicitly hasn't specified backend with "memory-backend"
  *    property.
- *    It also will be used as a way to optin into "-m" option support.
+ *    It also will be used as a way to option into "-m" option support.
  *    If it's not set by board, '-m' will be ignored and generic code will
  *    not create default RAM MemoryRegion.
  * @fixup_ram_size:
@@ -166,10 +241,9 @@ struct MachineClass {
     const char *deprecation_reason;
 
     void (*init)(MachineState *state);
-    void (*reset)(MachineState *state);
+    void (*reset)(MachineState *state, ShutdownCause reason);
     void (*wakeup)(MachineState *state);
     int (*kvm_type)(MachineState *machine, const char *arg);
-    void (*smp_parse)(MachineState *ms, QemuOpts *opts);
 
     BlockInterfaceType block_default_type;
     int units_per_default_bus;
@@ -187,6 +261,7 @@ struct MachineClass {
     const char *default_machine_opts;
     const char *default_boot_order;
     const char *default_display;
+    const char *default_nic;
     GPtrArray *compat_props;
     const char *hw_version;
     ram_addr_t default_ram_size;
@@ -198,7 +273,7 @@ struct MachineClass {
     bool has_hotpluggable_cpus;
     bool ignore_memory_transaction_failures;
     int numa_mem_align_shift;
-    const char **valid_cpu_types;
+    const char * const *valid_cpu_types;
     strList *allowed_dynamic_sysbus_devices;
     bool auto_enable_numa_with_memhp;
     bool auto_enable_numa_with_memdev;
@@ -207,6 +282,8 @@ struct MachineClass {
     bool nvdimm_supported;
     bool numa_mem_supported;
     bool auto_enable_numa;
+    bool cpu_cluster_has_numa_boundary;
+    SMPCompatProps smp_props;
     const char *default_ram_id;
 
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
@@ -224,26 +301,50 @@ struct MachineClass {
  * DeviceMemoryState:
  * @base: address in guest physical address space where the memory
  * address space for memory devices starts
- * @mr: address space container for memory devices
+ * @mr: memory region container for memory devices
+ * @as: address space for memory devices
+ * @listener: memory listener used to track used memslots in the address space
+ * @dimm_size: the sum of plugged DIMMs' sizes
+ * @used_region_size: the part of @mr already used by memory devices
+ * @required_memslots: the number of memslots required by memory devices
+ * @used_memslots: the number of memslots currently used by memory devices
+ * @memslot_auto_decision_active: whether any plugged memory device
+ *                                automatically decided to use more than
+ *                                one memslot
  */
 typedef struct DeviceMemoryState {
     hwaddr base;
     MemoryRegion mr;
+    AddressSpace as;
+    MemoryListener listener;
+    uint64_t dimm_size;
+    uint64_t used_region_size;
+    unsigned int required_memslots;
+    unsigned int used_memslots;
+    unsigned int memslot_auto_decision_active;
 } DeviceMemoryState;
 
 /**
  * CpuTopology:
  * @cpus: the number of present logical processors on the machine
- * @cores: the number of cores in one package
+ * @drawers: the number of drawers on the machine
+ * @books: the number of books in one drawer
+ * @sockets: the number of sockets in one book
+ * @dies: the number of dies in one socket
+ * @clusters: the number of clusters in one die
+ * @cores: the number of cores in one cluster
  * @threads: the number of threads in one core
- * @sockets: the number of sockets on the machine
  * @max_cpus: the maximum number of logical processors on the machine
  */
 typedef struct CpuTopology {
     unsigned int cpus;
+    unsigned int drawers;
+    unsigned int books;
+    unsigned int sockets;
+    unsigned int dies;
+    unsigned int clusters;
     unsigned int cores;
     unsigned int threads;
-    unsigned int sockets;
     unsigned int max_cpus;
 } CpuTopology;
 
@@ -253,10 +354,10 @@ typedef struct CpuTopology {
 struct MachineState {
     /*< private >*/
     Object parent_obj;
-    Notifier sysbus_notifier;
 
     /*< public >*/
 
+    void *fdt;
     char *dtb;
     char *dumpdtb;
     int phandle_start;
@@ -269,8 +370,8 @@ struct MachineState {
     bool iommu;
     bool suppress_vmdesc;
     bool enable_graphics;
-    char *memory_encryption;
-    char *ram_memdev_id;
+    ConfidentialGuestSupport *cgs;
+    HostMemoryBackend *memdev;
     /*
      * convenience alias to ram_memdev_id backend memory region
      * or to numa container memory region
@@ -278,10 +379,18 @@ struct MachineState {
     MemoryRegion *ram;
     DeviceMemoryState *device_memory;
 
+    /*
+     * Included in MachineState for simplicity, but not supported
+     * unless machine_add_audiodev_property is called.  Boards
+     * that have embedded audio devices can call it from the
+     * machine init function and forward the property to the device.
+     */
+    char *audiodev;
+
     ram_addr_t ram_size;
     ram_addr_t maxram_size;
     uint64_t   ram_slots;
-    const char *boot_order;
+    BootConfiguration boot_config;
     char *kernel_filename;
     char *kernel_cmdline;
     char *initrd_filename;
@@ -310,6 +419,33 @@ struct MachineState {
     } \
     type_init(machine_initfn##_register_types)
 
+extern GlobalProperty hw_compat_8_1[];
+extern const size_t hw_compat_8_1_len;
+
+extern GlobalProperty hw_compat_8_0[];
+extern const size_t hw_compat_8_0_len;
+
+extern GlobalProperty hw_compat_7_2[];
+extern const size_t hw_compat_7_2_len;
+
+extern GlobalProperty hw_compat_7_1[];
+extern const size_t hw_compat_7_1_len;
+
+extern GlobalProperty hw_compat_7_0[];
+extern const size_t hw_compat_7_0_len;
+
+extern GlobalProperty hw_compat_6_2[];
+extern const size_t hw_compat_6_2_len;
+
+extern GlobalProperty hw_compat_6_1[];
+extern const size_t hw_compat_6_1_len;
+
+extern GlobalProperty hw_compat_6_0[];
+extern const size_t hw_compat_6_0_len;
+
+extern GlobalProperty hw_compat_5_2[];
+extern const size_t hw_compat_5_2_len;
+
 extern GlobalProperty hw_compat_5_1[];
 extern const size_t hw_compat_5_1_len;
 
diff --git a/include/hw/char/avr_usart.h b/include/hw/char/avr_usart.h
new file mode 100644 (file)
index 0000000..0cc599e
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * AVR USART
+ *
+ * Copyright (c) 2018 University of Kent
+ * Author: Sarah Harris
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#ifndef HW_CHAR_AVR_USART_H
+#define HW_CHAR_AVR_USART_H
+
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+#include "qom/object.h"
+
+/* Offsets of registers. */
+#define USART_DR   0x06
+#define USART_CSRA  0x00
+#define USART_CSRB  0x01
+#define USART_CSRC  0x02
+#define USART_BRRH 0x05
+#define USART_BRRL 0x04
+
+/* Relevant bits in registers. */
+#define USART_CSRA_RXC    (1 << 7)
+#define USART_CSRA_TXC    (1 << 6)
+#define USART_CSRA_DRE    (1 << 5)
+#define USART_CSRA_MPCM   (1 << 0)
+
+#define USART_CSRB_RXCIE  (1 << 7)
+#define USART_CSRB_TXCIE  (1 << 6)
+#define USART_CSRB_DREIE  (1 << 5)
+#define USART_CSRB_RXEN   (1 << 4)
+#define USART_CSRB_TXEN   (1 << 3)
+#define USART_CSRB_CSZ2   (1 << 2)
+#define USART_CSRB_RXB8   (1 << 1)
+#define USART_CSRB_TXB8   (1 << 0)
+
+#define USART_CSRC_MSEL1  (1 << 7)
+#define USART_CSRC_MSEL0  (1 << 6)
+#define USART_CSRC_PM1    (1 << 5)
+#define USART_CSRC_PM0    (1 << 4)
+#define USART_CSRC_CSZ1   (1 << 2)
+#define USART_CSRC_CSZ0   (1 << 1)
+
+#define TYPE_AVR_USART "avr-usart"
+OBJECT_DECLARE_SIMPLE_TYPE(AVRUsartState, AVR_USART)
+
+struct AVRUsartState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    CharBackend chr;
+
+    bool enabled;
+
+    uint8_t data;
+    bool data_valid;
+    uint8_t char_mask;
+    /* Control and Status Registers */
+    uint8_t csra;
+    uint8_t csrb;
+    uint8_t csrc;
+    /* Baud Rate Registers (low/high byte) */
+    uint8_t brrh;
+    uint8_t brrl;
+
+    /* Receive Complete */
+    qemu_irq rxc_irq;
+    /* Transmit Complete */
+    qemu_irq txc_irq;
+    /* Data Register Empty */
+    qemu_irq dre_irq;
+};
+
+#endif /* HW_CHAR_AVR_USART_H */
diff --git a/include/hw/char/bcm2835_aux.h b/include/hw/char/bcm2835_aux.h
new file mode 100644 (file)
index 0000000..9e08179
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Rasperry Pi 2 emulation and refactoring Copyright (c) 2015, Microsoft
+ * Written by Andrew Baumann
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_AUX_H
+#define BCM2835_AUX_H
+
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+#include "qom/object.h"
+
+#define TYPE_BCM2835_AUX "bcm2835-aux"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835AuxState, BCM2835_AUX)
+
+#define BCM2835_AUX_RX_FIFO_LEN 8
+
+struct BCM2835AuxState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion iomem;
+    CharBackend chr;
+    qemu_irq irq;
+
+    uint8_t read_fifo[BCM2835_AUX_RX_FIFO_LEN];
+    uint8_t read_pos, read_count;
+    uint8_t ier, iir;
+};
+
+#endif
diff --git a/include/hw/char/cadence_uart.h b/include/hw/char/cadence_uart.h
new file mode 100644 (file)
index 0000000..e7f7cd8
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Device model for Cadence UART
+ *
+ * Copyright (c) 2010 Xilinx Inc.
+ * Copyright (c) 2012 Peter A.G. Crosthwaite (peter.crosthwaite@petalogix.com)
+ * Copyright (c) 2012 PetaLogix Pty Ltd.
+ * Written by Haibing Ma
+ *            M.Habib
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef CADENCE_UART_H
+#define CADENCE_UART_H
+
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+#include "qapi/error.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+
+#define CADENCE_UART_RX_FIFO_SIZE           16
+#define CADENCE_UART_TX_FIFO_SIZE           16
+
+#define CADENCE_UART_R_MAX (0x48/4)
+
+#define TYPE_CADENCE_UART "cadence_uart"
+OBJECT_DECLARE_SIMPLE_TYPE(CadenceUARTState, CADENCE_UART)
+
+struct CadenceUARTState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    uint32_t r[CADENCE_UART_R_MAX];
+    uint8_t rx_fifo[CADENCE_UART_RX_FIFO_SIZE];
+    uint8_t tx_fifo[CADENCE_UART_TX_FIFO_SIZE];
+    uint32_t rx_wpos;
+    uint32_t rx_count;
+    uint32_t tx_count;
+    uint64_t char_tx_time;
+    CharBackend chr;
+    qemu_irq irq;
+    QEMUTimer *fifo_trigger_handle;
+    Clock *refclk;
+};
+
+#endif
diff --git a/include/hw/char/cmsdk-apb-uart.h b/include/hw/char/cmsdk-apb-uart.h
new file mode 100644 (file)
index 0000000..7de8f8d
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * ARM CMSDK APB UART emulation
+ *
+ * Copyright (c) 2017 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+#ifndef CMSDK_APB_UART_H
+#define CMSDK_APB_UART_H
+
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+#include "qom/object.h"
+
+#define TYPE_CMSDK_APB_UART "cmsdk-apb-uart"
+OBJECT_DECLARE_SIMPLE_TYPE(CMSDKAPBUART, CMSDK_APB_UART)
+
+struct CMSDKAPBUART {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    CharBackend chr;
+    qemu_irq txint;
+    qemu_irq rxint;
+    qemu_irq txovrint;
+    qemu_irq rxovrint;
+    qemu_irq uartint;
+    guint watch_tag;
+    uint32_t pclk_frq;
+
+    uint32_t state;
+    uint32_t ctrl;
+    uint32_t intstatus;
+    uint32_t bauddiv;
+    /* This UART has no FIFO, only a 1-character buffer for each of Tx and Rx */
+    uint8_t txbuf;
+    uint8_t rxbuf;
+};
+
+#endif
diff --git a/include/hw/char/digic-uart.h b/include/hw/char/digic-uart.h
new file mode 100644 (file)
index 0000000..f710a1a
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Canon DIGIC UART block declarations.
+ *
+ * Copyright (C) 2013 Antony Pavlov <antonynpavlov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef HW_CHAR_DIGIC_UART_H
+#define HW_CHAR_DIGIC_UART_H
+
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+#include "qom/object.h"
+
+#define TYPE_DIGIC_UART "digic-uart"
+OBJECT_DECLARE_SIMPLE_TYPE(DigicUartState, DIGIC_UART)
+
+enum {
+    R_TX = 0x00,
+    R_RX,
+    R_ST = (0x14 >> 2),
+    R_MAX
+};
+
+struct DigicUartState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion regs_region;
+    CharBackend chr;
+
+    uint32_t reg_rx;
+    uint32_t reg_st;
+};
+
+#endif /* HW_CHAR_DIGIC_UART_H */
diff --git a/include/hw/char/escc.h b/include/hw/char/escc.h
new file mode 100644 (file)
index 0000000..5669a5b
--- /dev/null
@@ -0,0 +1,62 @@
+#ifndef HW_ESCC_H
+#define HW_ESCC_H
+
+#include "chardev/char-fe.h"
+#include "chardev/char-serial.h"
+#include "hw/sysbus.h"
+#include "ui/input.h"
+#include "qom/object.h"
+
+/* escc.c */
+#define TYPE_ESCC "escc"
+#define ESCC_SIZE 4
+
+OBJECT_DECLARE_SIMPLE_TYPE(ESCCState, ESCC)
+
+typedef enum {
+    escc_chn_a, escc_chn_b,
+} ESCCChnID;
+
+typedef enum {
+    escc_serial, escc_kbd, escc_mouse,
+} ESCCChnType;
+
+#define ESCC_SERIO_QUEUE_SIZE 256
+
+typedef struct {
+    uint8_t data[ESCC_SERIO_QUEUE_SIZE];
+    int rptr, wptr, count;
+} ESCCSERIOQueue;
+
+#define ESCC_SERIAL_REGS 16
+typedef struct ESCCChannelState {
+    qemu_irq irq;
+    uint32_t rxint, txint, rxint_under_svc, txint_under_svc;
+    struct ESCCChannelState *otherchn;
+    uint32_t reg;
+    uint8_t wregs[ESCC_SERIAL_REGS], rregs[ESCC_SERIAL_REGS];
+    ESCCSERIOQueue queue;
+    CharBackend chr;
+    int e0_mode, led_mode, caps_lock_mode, num_lock_mode;
+    int disabled;
+    int clock;
+    uint32_t vmstate_dummy;
+    ESCCChnID chn; /* this channel, A (base+4) or B (base+0) */
+    ESCCChnType type;
+    uint8_t rx, tx;
+    QemuInputHandlerState *hs;
+    char *sunkbd_layout;
+} ESCCChannelState;
+
+struct ESCCState {
+    SysBusDevice parent_obj;
+
+    struct ESCCChannelState chn[2];
+    uint32_t it_shift;
+    bool bit_swap;
+    MemoryRegion mmio;
+    uint32_t disabled;
+    uint32_t frequency;
+};
+
+#endif
diff --git a/include/hw/char/goldfish_tty.h b/include/hw/char/goldfish_tty.h
new file mode 100644 (file)
index 0000000..d59733e
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Goldfish TTY
+ *
+ * (c) 2020 Laurent Vivier <laurent@vivier.eu>
+ *
+ */
+
+#ifndef HW_CHAR_GOLDFISH_TTY_H
+#define HW_CHAR_GOLDFISH_TTY_H
+
+#include "qemu/fifo8.h"
+#include "chardev/char-fe.h"
+#include "hw/sysbus.h"
+
+#define TYPE_GOLDFISH_TTY "goldfish_tty"
+OBJECT_DECLARE_SIMPLE_TYPE(GoldfishTTYState, GOLDFISH_TTY)
+
+#define GOLFISH_TTY_BUFFER_SIZE 128
+
+struct GoldfishTTYState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+    CharBackend chr;
+
+    uint32_t data_len;
+    uint64_t data_ptr;
+    bool int_enabled;
+
+    Fifo8 rx_fifo;
+};
+
+#endif
diff --git a/include/hw/char/ibex_uart.h b/include/hw/char/ibex_uart.h
new file mode 100644 (file)
index 0000000..9deadf2
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * QEMU lowRISC Ibex UART device
+ *
+ * Copyright (c) 2020 Western Digital
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_IBEX_UART_H
+#define HW_IBEX_UART_H
+
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+
+#define IBEX_UART_TX_FIFO_SIZE 16
+#define IBEX_UART_CLOCK 50000000 /* 50MHz clock */
+
+#define TYPE_IBEX_UART "ibex-uart"
+OBJECT_DECLARE_SIMPLE_TYPE(IbexUartState, IBEX_UART)
+
+struct IbexUartState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint8_t tx_fifo[IBEX_UART_TX_FIFO_SIZE];
+    uint32_t tx_level;
+
+    uint32_t rx_level;
+
+    QEMUTimer *fifo_trigger_handle;
+    uint64_t char_tx_time;
+
+    uint32_t uart_intr_state;
+    uint32_t uart_intr_enable;
+    uint32_t uart_ctrl;
+    uint32_t uart_status;
+    uint32_t uart_rdata;
+    uint32_t uart_fifo_ctrl;
+    uint32_t uart_fifo_status;
+    uint32_t uart_ovrd;
+    uint32_t uart_val;
+    uint32_t uart_timeout_ctrl;
+
+    Clock *f_clk;
+
+    CharBackend chr;
+    qemu_irq tx_watermark;
+    qemu_irq rx_watermark;
+    qemu_irq tx_empty;
+    qemu_irq rx_overflow;
+};
+#endif /* HW_IBEX_UART_H */
diff --git a/include/hw/char/imx_serial.h b/include/hw/char/imx_serial.h
new file mode 100644 (file)
index 0000000..b823f94
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * Device model for i.MX UART
+ *
+ * Copyright (c) 2008 OKL
+ * Originally Written by Hans Jiang
+ * Copyright (c) 2011 NICTA Pty Ltd.
+ * Updated by Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef IMX_SERIAL_H
+#define IMX_SERIAL_H
+
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+#include "qom/object.h"
+
+#define TYPE_IMX_SERIAL "imx.serial"
+OBJECT_DECLARE_SIMPLE_TYPE(IMXSerialState, IMX_SERIAL)
+
+#define URXD_CHARRDY    (1<<15)   /* character read is valid */
+#define URXD_ERR        (1<<14)   /* Character has error */
+#define URXD_FRMERR     (1<<12)   /* Character has frame error */
+#define URXD_BRK        (1<<11)   /* Break received */
+
+#define USR1_PARTYER    (1<<15)   /* Parity Error */
+#define USR1_RTSS       (1<<14)   /* RTS pin status */
+#define USR1_TRDY       (1<<13)   /* Tx ready */
+#define USR1_RTSD       (1<<12)   /* RTS delta: pin changed state */
+#define USR1_ESCF       (1<<11)   /* Escape sequence interrupt */
+#define USR1_FRAMERR    (1<<10)   /* Framing error  */
+#define USR1_RRDY       (1<<9)    /* receiver ready */
+#define USR1_AGTIM      (1<<8)    /* Aging timer interrupt */
+#define USR1_DTRD       (1<<7)    /* DTR changed */
+#define USR1_RXDS       (1<<6)    /* Receiver is idle */
+#define USR1_AIRINT     (1<<5)    /* Aysnch IR interrupt */
+#define USR1_AWAKE      (1<<4)    /* Falling edge detected on RXd pin */
+
+#define USR2_ADET       (1<<15)   /* Autobaud complete */
+#define USR2_TXFE       (1<<14)   /* Transmit FIFO empty */
+#define USR2_DTRF       (1<<13)   /* DTR/DSR transition */
+#define USR2_IDLE       (1<<12)   /* UART has been idle for too long */
+#define USR2_ACST       (1<<11)   /* Autobaud counter stopped */
+#define USR2_RIDELT     (1<<10)   /* Ring Indicator delta */
+#define USR2_RIIN       (1<<9)    /* Ring Indicator Input */
+#define USR2_IRINT      (1<<8)    /* Serial Infrared Interrupt */
+#define USR2_WAKE       (1<<7)    /* Start bit detected */
+#define USR2_DCDDELT    (1<<6)    /* Data Carrier Detect delta */
+#define USR2_DCDIN      (1<<5)    /* Data Carrier Detect Input */
+#define USR2_RTSF       (1<<4)    /* RTS transition */
+#define USR2_TXDC       (1<<3)    /* Transmission complete */
+#define USR2_BRCD       (1<<2)    /* Break condition detected */
+#define USR2_ORE        (1<<1)    /* Overrun error */
+#define USR2_RDR        (1<<0)    /* Receive data ready */
+
+#define UCR1_TRDYEN     (1<<13)   /* Tx Ready Interrupt Enable */
+#define UCR1_RRDYEN     (1<<9)    /* Rx Ready Interrupt Enable */
+#define UCR1_TXMPTYEN   (1<<6)    /* Tx Empty Interrupt Enable */
+#define UCR1_UARTEN     (1<<0)    /* UART Enable */
+
+#define UCR2_TXEN       (1<<2)    /* Transmitter enable */
+#define UCR2_RXEN       (1<<1)    /* Receiver enable */
+#define UCR2_SRST       (1<<0)    /* Reset complete */
+
+#define UCR4_DREN       BIT(0)    /* Receive Data Ready interrupt enable */
+#define UCR4_TCEN       BIT(3)    /* TX complete interrupt enable */
+#define UCR4_WKEN       BIT(7)    /* WAKE interrupt enable */
+
+#define UTS1_TXEMPTY    (1<<6)
+#define UTS1_RXEMPTY    (1<<5)
+#define UTS1_TXFULL     (1<<4)
+#define UTS1_RXFULL     (1<<3)
+
+struct IMXSerialState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    int32_t readbuff;
+
+    uint32_t usr1;
+    uint32_t usr2;
+    uint32_t ucr1;
+    uint32_t ucr2;
+    uint32_t uts1;
+
+    /*
+     * The registers below are implemented just so that the
+     * guest OS sees what it has written
+     */
+    uint32_t onems;
+    uint32_t ufcr;
+    uint32_t ubmr;
+    uint32_t ubrc;
+    uint32_t ucr3;
+    uint32_t ucr4;
+
+    qemu_irq irq;
+    CharBackend chr;
+};
+
+#endif
diff --git a/include/hw/char/mchp_pfsoc_mmuart.h b/include/hw/char/mchp_pfsoc_mmuart.h
new file mode 100644 (file)
index 0000000..b0e14ca
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Microchip PolarFire SoC MMUART emulation
+ *
+ * Copyright (c) 2020 Wind River Systems, Inc.
+ *
+ * Author:
+ *   Bin Meng <bin.meng@windriver.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_MCHP_PFSOC_MMUART_H
+#define HW_MCHP_PFSOC_MMUART_H
+
+#include "hw/sysbus.h"
+#include "hw/char/serial.h"
+
+#define MCHP_PFSOC_MMUART_REG_COUNT 13
+
+#define TYPE_MCHP_PFSOC_UART "mchp.pfsoc.uart"
+OBJECT_DECLARE_SIMPLE_TYPE(MchpPfSoCMMUartState, MCHP_PFSOC_UART)
+
+typedef struct MchpPfSoCMMUartState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion container;
+    MemoryRegion iomem;
+
+    SerialMM serial_mm;
+
+    uint32_t reg[MCHP_PFSOC_MMUART_REG_COUNT];
+} MchpPfSoCMMUartState;
+
+/**
+ * mchp_pfsoc_mmuart_create - Create a Microchip PolarFire SoC MMUART
+ *
+ * This is a helper routine for board to create a MMUART device that is
+ * compatible with Microchip PolarFire SoC.
+ *
+ * @sysmem: system memory region to map
+ * @base: base address of the MMUART registers
+ * @irq: IRQ number of the MMUART device
+ * @chr: character device to associate to
+ *
+ * @return: a pointer to the device specific control structure
+ */
+MchpPfSoCMMUartState *mchp_pfsoc_mmuart_create(MemoryRegion *sysmem,
+    hwaddr base, qemu_irq irq, Chardev *chr);
+
+#endif /* HW_MCHP_PFSOC_MMUART_H */
diff --git a/include/hw/char/nrf51_uart.h b/include/hw/char/nrf51_uart.h
new file mode 100644 (file)
index 0000000..561b638
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * nRF51 SoC UART emulation
+ *
+ * Copyright (c) 2018 Julia Suvorova <jusual@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+#ifndef NRF51_UART_H
+#define NRF51_UART_H
+
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+#include "hw/registerfields.h"
+#include "qom/object.h"
+
+#define UART_FIFO_LENGTH 6
+#define UART_SIZE 0x1000
+
+#define TYPE_NRF51_UART "nrf51_soc.uart"
+OBJECT_DECLARE_SIMPLE_TYPE(NRF51UARTState, NRF51_UART)
+
+REG32(UART_STARTRX, 0x000)
+REG32(UART_STOPRX, 0x004)
+REG32(UART_STARTTX, 0x008)
+REG32(UART_STOPTX, 0x00C)
+REG32(UART_SUSPEND, 0x01C)
+
+REG32(UART_CTS, 0x100)
+REG32(UART_NCTS, 0x104)
+REG32(UART_RXDRDY, 0x108)
+REG32(UART_TXDRDY, 0x11C)
+REG32(UART_ERROR, 0x124)
+REG32(UART_RXTO, 0x144)
+
+REG32(UART_INTEN, 0x300)
+    FIELD(UART_INTEN, CTS, 0, 1)
+    FIELD(UART_INTEN, NCTS, 1, 1)
+    FIELD(UART_INTEN, RXDRDY, 2, 1)
+    FIELD(UART_INTEN, TXDRDY, 7, 1)
+    FIELD(UART_INTEN, ERROR, 9, 1)
+    FIELD(UART_INTEN, RXTO, 17, 1)
+REG32(UART_INTENSET, 0x304)
+REG32(UART_INTENCLR, 0x308)
+REG32(UART_ERRORSRC, 0x480)
+REG32(UART_ENABLE, 0x500)
+REG32(UART_PSELRTS, 0x508)
+REG32(UART_PSELTXD, 0x50C)
+REG32(UART_PSELCTS, 0x510)
+REG32(UART_PSELRXD, 0x514)
+REG32(UART_RXD, 0x518)
+REG32(UART_TXD, 0x51C)
+REG32(UART_BAUDRATE, 0x524)
+REG32(UART_CONFIG, 0x56C)
+
+struct NRF51UARTState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    CharBackend chr;
+    qemu_irq irq;
+    guint watch_tag;
+
+    uint8_t rx_fifo[UART_FIFO_LENGTH];
+    unsigned int rx_fifo_pos;
+    unsigned int rx_fifo_len;
+
+    uint32_t reg[0x56C];
+
+    bool rx_started;
+    bool tx_started;
+    bool pending_tx_byte;
+    bool enabled;
+};
+
+#endif
diff --git a/include/hw/char/parallel-isa.h b/include/hw/char/parallel-isa.h
new file mode 100644 (file)
index 0000000..d24ccec
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * QEMU ISA Parallel PORT emulation
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2007 Marko Kohtala
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef HW_PARALLEL_ISA_H
+#define HW_PARALLEL_ISA_H
+
+#include "parallel.h"
+
+#include "hw/isa/isa.h"
+#include "qom/object.h"
+
+#define TYPE_ISA_PARALLEL "isa-parallel"
+OBJECT_DECLARE_SIMPLE_TYPE(ISAParallelState, ISA_PARALLEL)
+
+struct ISAParallelState {
+    ISADevice parent_obj;
+
+    uint32_t index;
+    uint32_t iobase;
+    uint32_t isairq;
+    ParallelState state;
+};
+
+#endif /* HW_PARALLEL_ISA_H */
diff --git a/include/hw/char/parallel.h b/include/hw/char/parallel.h
new file mode 100644 (file)
index 0000000..7b5a309
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef HW_PARALLEL_H
+#define HW_PARALLEL_H
+
+#include "exec/ioport.h"
+#include "exec/memory.h"
+#include "hw/isa/isa.h"
+#include "hw/irq.h"
+#include "chardev/char-fe.h"
+#include "chardev/char.h"
+
+typedef struct ParallelState {
+    MemoryRegion iomem;
+    uint8_t dataw;
+    uint8_t datar;
+    uint8_t status;
+    uint8_t control;
+    qemu_irq irq;
+    int irq_pending;
+    CharBackend chr;
+    int hw_driver;
+    int epp_timeout;
+    uint32_t last_read_offset; /* For debugging */
+    /* Memory-mapped interface */
+    int it_shift;
+    PortioList portio_list;
+} ParallelState;
+
+void parallel_hds_isa_init(ISABus *bus, int n);
+
+bool parallel_mm_init(MemoryRegion *address_space,
+                      hwaddr base, int it_shift, qemu_irq irq,
+                      Chardev *chr);
+
+#endif
diff --git a/include/hw/char/pl011.h b/include/hw/char/pl011.h
new file mode 100644 (file)
index 0000000..d853802
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_PL011_H
+#define HW_PL011_H
+
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+#include "qom/object.h"
+
+#define TYPE_PL011 "pl011"
+OBJECT_DECLARE_SIMPLE_TYPE(PL011State, PL011)
+
+/* This shares the same struct (and cast macro) as the base pl011 device */
+#define TYPE_PL011_LUMINARY "pl011_luminary"
+
+/* Depth of UART FIFO in bytes, when FIFO mode is enabled (else depth == 1) */
+#define PL011_FIFO_DEPTH 16
+
+struct PL011State {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint32_t readbuff;
+    uint32_t flags;
+    uint32_t lcr;
+    uint32_t rsr;
+    uint32_t cr;
+    uint32_t dmacr;
+    uint32_t int_enabled;
+    uint32_t int_level;
+    uint32_t read_fifo[PL011_FIFO_DEPTH];
+    uint32_t ilpr;
+    uint32_t ibrd;
+    uint32_t fbrd;
+    uint32_t ifl;
+    int read_pos;
+    int read_count;
+    int read_trigger;
+    CharBackend chr;
+    qemu_irq irq[6];
+    Clock *clk;
+    bool migrate_clk;
+    const unsigned char *id;
+};
+
+DeviceState *pl011_create(hwaddr addr, qemu_irq irq, Chardev *chr);
+
+#endif
diff --git a/include/hw/char/renesas_sci.h b/include/hw/char/renesas_sci.h
new file mode 100644 (file)
index 0000000..a4764e3
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Renesas Serial Communication Interface
+ *
+ * Copyright (c) 2018 Yoshinori Sato
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_CHAR_RENESAS_SCI_H
+#define HW_CHAR_RENESAS_SCI_H
+
+#include "chardev/char-fe.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_RENESAS_SCI "renesas-sci"
+typedef struct RSCIState RSCIState;
+DECLARE_INSTANCE_CHECKER(RSCIState, RSCI,
+                         TYPE_RENESAS_SCI)
+
+enum {
+    ERI = 0,
+    RXI = 1,
+    TXI = 2,
+    TEI = 3,
+    SCI_NR_IRQ = 4
+};
+
+struct RSCIState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion memory;
+    QEMUTimer timer;
+    CharBackend chr;
+    qemu_irq irq[SCI_NR_IRQ];
+
+    uint8_t smr;
+    uint8_t brr;
+    uint8_t scr;
+    uint8_t tdr;
+    uint8_t ssr;
+    uint8_t rdr;
+    uint8_t scmr;
+    uint8_t semr;
+
+    uint8_t read_ssr;
+    int64_t trtime;
+    int64_t rx_next;
+    uint64_t input_freq;
+};
+
+#endif
diff --git a/include/hw/char/riscv_htif.h b/include/hw/char/riscv_htif.h
new file mode 100644 (file)
index 0000000..df493fd
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * QEMU RISCV Host Target Interface (HTIF) Emulation
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2017-2018 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_HTIF_H
+#define HW_RISCV_HTIF_H
+
+#include "chardev/char.h"
+#include "chardev/char-fe.h"
+#include "exec/memory.h"
+
+#define TYPE_HTIF_UART "riscv.htif.uart"
+
+typedef struct HTIFState {
+    int allow_tohost;
+    int fromhost_inprogress;
+
+    uint64_t tohost;
+    uint64_t fromhost;
+    hwaddr tohost_offset;
+    hwaddr fromhost_offset;
+    MemoryRegion mmio;
+
+    CharBackend chr;
+    uint64_t pending_read;
+} HTIFState;
+
+extern const char *sig_file;
+extern uint8_t line_size;
+
+/* HTIF symbol callback */
+void htif_symbol_callback(const char *st_name, int st_info, uint64_t st_value,
+    uint64_t st_size);
+
+/* legacy pre qom */
+HTIFState *htif_mm_init(MemoryRegion *address_space, Chardev *chr,
+                        uint64_t nonelf_base, bool custom_base);
+
+#endif
diff --git a/include/hw/char/serial.h b/include/hw/char/serial.h
new file mode 100644 (file)
index 0000000..8ba7eca
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * QEMU 16550A UART emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_SERIAL_H
+#define HW_SERIAL_H
+
+#include "chardev/char-fe.h"
+#include "exec/memory.h"
+#include "qemu/fifo8.h"
+#include "chardev/char.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define UART_FIFO_LENGTH    16      /* 16550A Fifo Length */
+
+struct SerialState {
+    DeviceState parent;
+
+    uint16_t divider;
+    uint8_t rbr; /* receive register */
+    uint8_t thr; /* transmit holding register */
+    uint8_t tsr; /* transmit shift register */
+    uint8_t ier;
+    uint8_t iir; /* read only */
+    uint8_t lcr;
+    uint8_t mcr;
+    uint8_t lsr; /* read only */
+    uint8_t msr; /* read only */
+    uint8_t scr;
+    uint8_t fcr;
+    uint8_t fcr_vmstate; /* we can't write directly this value
+                            it has side effects */
+    /* NOTE: this hidden state is necessary for tx irq generation as
+       it can be reset while reading iir */
+    int thr_ipending;
+    qemu_irq irq;
+    CharBackend chr;
+    int last_break_enable;
+    uint32_t baudbase;
+    uint32_t tsr_retry;
+    guint watch_tag;
+    bool wakeup;
+
+    /* Time when the last byte was successfully sent out of the tsr */
+    uint64_t last_xmit_ts;
+    Fifo8 recv_fifo;
+    Fifo8 xmit_fifo;
+    /* Interrupt trigger level for recv_fifo */
+    uint8_t recv_fifo_itl;
+
+    QEMUTimer *fifo_timeout_timer;
+    int timeout_ipending;           /* timeout interrupt pending state */
+
+    uint64_t char_transmit_time;    /* time to transmit a char in ticks */
+    int poll_msl;
+
+    QEMUTimer *modem_status_poll;
+    MemoryRegion io;
+};
+typedef struct SerialState SerialState;
+
+struct SerialMM {
+    SysBusDevice parent;
+
+    SerialState serial;
+
+    uint8_t regshift;
+    uint8_t endianness;
+};
+
+extern const VMStateDescription vmstate_serial;
+extern const MemoryRegionOps serial_io_ops;
+
+void serial_set_frequency(SerialState *s, uint32_t frequency);
+
+#define TYPE_SERIAL "serial"
+OBJECT_DECLARE_SIMPLE_TYPE(SerialState, SERIAL)
+
+#define TYPE_SERIAL_MM "serial-mm"
+OBJECT_DECLARE_SIMPLE_TYPE(SerialMM, SERIAL_MM)
+
+SerialMM *serial_mm_init(MemoryRegion *address_space,
+                         hwaddr base, int regshift,
+                         qemu_irq irq, int baudbase,
+                         Chardev *chr, enum device_endian end);
+
+/* serial-isa.c */
+
+#define MAX_ISA_SERIAL_PORTS 4
+
+#define TYPE_ISA_SERIAL "isa-serial"
+void serial_hds_isa_init(ISABus *bus, int from, int to);
+
+#endif
diff --git a/include/hw/char/shakti_uart.h b/include/hw/char/shakti_uart.h
new file mode 100644 (file)
index 0000000..526c408
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * SHAKTI UART
+ *
+ * Copyright (c) 2021 Vijai Kumar K <vijai@behindbytes.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_SHAKTI_UART_H
+#define HW_SHAKTI_UART_H
+
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+
+#define SHAKTI_UART_BAUD        0x00
+#define SHAKTI_UART_TX          0x04
+#define SHAKTI_UART_RX          0x08
+#define SHAKTI_UART_STATUS      0x0C
+#define SHAKTI_UART_DELAY       0x10
+#define SHAKTI_UART_CONTROL     0x14
+#define SHAKTI_UART_INT_EN      0x18
+#define SHAKTI_UART_IQ_CYCLES   0x1C
+#define SHAKTI_UART_RX_THRES    0x20
+
+#define SHAKTI_UART_STATUS_TX_EMPTY     (1 << 0)
+#define SHAKTI_UART_STATUS_TX_FULL      (1 << 1)
+#define SHAKTI_UART_STATUS_RX_NOT_EMPTY (1 << 2)
+#define SHAKTI_UART_STATUS_RX_FULL      (1 << 3)
+/* 9600 8N1 is the default setting */
+/* Reg value = (50000000 Hz)/(16 * 9600)*/
+#define SHAKTI_UART_BAUD_DEFAULT    0x0145
+#define SHAKTI_UART_CONTROL_DEFAULT 0x0100
+
+#define TYPE_SHAKTI_UART "shakti-uart"
+#define SHAKTI_UART(obj) \
+    OBJECT_CHECK(ShaktiUartState, (obj), TYPE_SHAKTI_UART)
+
+typedef struct {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint32_t uart_baud;
+    uint32_t uart_tx;
+    uint32_t uart_rx;
+    uint32_t uart_status;
+    uint32_t uart_delay;
+    uint32_t uart_control;
+    uint32_t uart_interrupt;
+    uint32_t uart_iq_cycles;
+    uint32_t uart_rx_threshold;
+
+    CharBackend chr;
+} ShaktiUartState;
+
+#endif /* HW_SHAKTI_UART_H */
diff --git a/include/hw/char/sifive_uart.h b/include/hw/char/sifive_uart.h
new file mode 100644 (file)
index 0000000..7f6c79f
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * SiFive UART interface
+ *
+ * Copyright (c) 2016 Stefan O'Rear
+ * Copyright (c) 2017 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_UART_H
+#define HW_SIFIVE_UART_H
+
+#include "chardev/char-fe.h"
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+enum {
+    SIFIVE_UART_TXFIFO        = 0,
+    SIFIVE_UART_RXFIFO        = 4,
+    SIFIVE_UART_TXCTRL        = 8,
+    SIFIVE_UART_TXMARK        = 10,
+    SIFIVE_UART_RXCTRL        = 12,
+    SIFIVE_UART_RXMARK        = 14,
+    SIFIVE_UART_IE            = 16,
+    SIFIVE_UART_IP            = 20,
+    SIFIVE_UART_DIV           = 24,
+    SIFIVE_UART_MAX           = 32
+};
+
+enum {
+    SIFIVE_UART_IE_TXWM       = 1, /* Transmit watermark interrupt enable */
+    SIFIVE_UART_IE_RXWM       = 2  /* Receive watermark interrupt enable */
+};
+
+enum {
+    SIFIVE_UART_IP_TXWM       = 1, /* Transmit watermark interrupt pending */
+    SIFIVE_UART_IP_RXWM       = 2  /* Receive watermark interrupt pending */
+};
+
+#define SIFIVE_UART_GET_TXCNT(txctrl)   ((txctrl >> 16) & 0x7)
+#define SIFIVE_UART_GET_RXCNT(rxctrl)   ((rxctrl >> 16) & 0x7)
+#define SIFIVE_UART_RX_FIFO_SIZE 8
+
+#define TYPE_SIFIVE_UART "riscv.sifive.uart"
+OBJECT_DECLARE_SIMPLE_TYPE(SiFiveUARTState, SIFIVE_UART)
+
+struct SiFiveUARTState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    qemu_irq irq;
+    MemoryRegion mmio;
+    CharBackend chr;
+    uint8_t rx_fifo[SIFIVE_UART_RX_FIFO_SIZE];
+    uint8_t rx_fifo_len;
+    uint32_t ie;
+    uint32_t ip;
+    uint32_t txctrl;
+    uint32_t rxctrl;
+    uint32_t div;
+};
+
+SiFiveUARTState *sifive_uart_create(MemoryRegion *address_space, hwaddr base,
+    Chardev *chr, qemu_irq irq);
+
+#endif
diff --git a/include/hw/char/stm32f2xx_usart.h b/include/hw/char/stm32f2xx_usart.h
new file mode 100644 (file)
index 0000000..fdfa742
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * STM32F2XX USART
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_STM32F2XX_USART_H
+#define HW_STM32F2XX_USART_H
+
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+#include "qom/object.h"
+
+#define USART_SR   0x00
+#define USART_DR   0x04
+#define USART_BRR  0x08
+#define USART_CR1  0x0C
+#define USART_CR2  0x10
+#define USART_CR3  0x14
+#define USART_GTPR 0x18
+
+/*
+ * NB: The reset value mentioned in "24.6.1 Status register" seems bogus.
+ * Looking at "Table 98 USART register map and reset values", it seems it
+ * should be 0xc0, and that's how real hardware behaves.
+ */
+#define USART_SR_RESET (USART_SR_TXE | USART_SR_TC)
+
+#define USART_SR_TXE  (1 << 7)
+#define USART_SR_TC   (1 << 6)
+#define USART_SR_RXNE (1 << 5)
+
+#define USART_CR1_UE     (1 << 13)
+#define USART_CR1_TXEIE  (1 << 7)
+#define USART_CR1_TCEIE  (1 << 6)
+#define USART_CR1_RXNEIE (1 << 5)
+#define USART_CR1_TE     (1 << 3)
+#define USART_CR1_RE     (1 << 2)
+
+#define TYPE_STM32F2XX_USART "stm32f2xx-usart"
+OBJECT_DECLARE_SIMPLE_TYPE(STM32F2XXUsartState, STM32F2XX_USART)
+
+struct STM32F2XXUsartState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint32_t usart_sr;
+    uint32_t usart_dr;
+    uint32_t usart_brr;
+    uint32_t usart_cr1;
+    uint32_t usart_cr2;
+    uint32_t usart_cr3;
+    uint32_t usart_gtpr;
+
+    CharBackend chr;
+    qemu_irq irq;
+};
+#endif /* HW_STM32F2XX_USART_H */
diff --git a/include/hw/char/xilinx_uartlite.h b/include/hw/char/xilinx_uartlite.h
new file mode 100644 (file)
index 0000000..36d4e84
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef XILINX_UARTLITE_H
+#define XILINX_UARTLITE_H
+
+#include "qom/object.h"
+
+#define TYPE_XILINX_UARTLITE "xlnx.xps-uartlite"
+OBJECT_DECLARE_SIMPLE_TYPE(XilinxUARTLite, XILINX_UARTLITE)
+
+#endif
index 81bcf3e505a02899faac2a2e503511b262e5eab9..bb12117f67b57b3dbe04d7365e339c0ed9885e5a 100644 (file)
 
 #include "qom/object.h"
 #include "qemu/queue.h"
+#include "qemu/host-utils.h"
+#include "qemu/bitops.h"
 
 #define TYPE_CLOCK "clock"
 OBJECT_DECLARE_SIMPLE_TYPE(Clock, CLOCK)
 
-typedef void ClockCallback(void *opaque);
+/*
+ * Argument to ClockCallback functions indicating why the callback
+ * has been called. A mask of these values logically ORed together
+ * is used to specify which events are interesting when the callback
+ * is registered, so these values must all be different bit values.
+ */
+typedef enum ClockEvent {
+    ClockUpdate = 1, /* Clock period has just updated */
+    ClockPreUpdate = 2, /* Clock period is about to update */
+} ClockEvent;
+
+typedef void ClockCallback(void *opaque, ClockEvent event);
 
 /*
  * clock store a value representing the clock's period in 2^-32ns unit.
@@ -38,7 +51,6 @@ typedef void ClockCallback(void *opaque);
  * macro helpers to convert to hertz / nanosecond
  */
 #define CLOCK_PERIOD_FROM_NS(ns) ((ns) * (CLOCK_PERIOD_1SEC / 1000000000llu))
-#define CLOCK_PERIOD_TO_NS(per) ((per) / (CLOCK_PERIOD_1SEC / 1000000000llu))
 #define CLOCK_PERIOD_FROM_HZ(hz) (((hz) != 0) ? CLOCK_PERIOD_1SEC / (hz) : 0u)
 #define CLOCK_PERIOD_TO_HZ(per) (((per) != 0) ? CLOCK_PERIOD_1SEC / (per) : 0u)
 
@@ -49,6 +61,7 @@ typedef void ClockCallback(void *opaque);
  * @canonical_path: clock path string cache (used for trace purpose)
  * @callback: called when clock changes
  * @callback_opaque: argument for @callback
+ * @callback_events: mask of events when callback should be called
  * @source: source (or parent in clock tree) of the clock
  * @children: list of clocks connected to this one (it is their source)
  * @sibling: structure used to form a clock list
@@ -66,6 +79,11 @@ struct Clock {
     char *canonical_path;
     ClockCallback *callback;
     void *callback_opaque;
+    unsigned int callback_events;
+
+    /* Ratio of the parent clock to run the child clocks at */
+    uint32_t multiplier;
+    uint32_t divider;
 
     /* Clocks are organized in a clock tree */
     Clock *source;
@@ -113,10 +131,15 @@ Clock *clock_new(Object *parent, const char *name);
  * @clk: the clock to register the callback into
  * @cb: the callback function
  * @opaque: the argument to the callback
+ * @events: the events the callback should be called for
+ *          (logical OR of ClockEvent enum values)
  *
  * Register a callback called on every clock update.
+ * Note that a clock has only one callback: you cannot register
+ * different callback functions for different events.
  */
-void clock_set_callback(Clock *clk, ClockCallback *cb, void *opaque);
+void clock_set_callback(Clock *clk, ClockCallback *cb,
+                        void *opaque, unsigned int events);
 
 /**
  * clock_clear_callback:
@@ -138,6 +161,21 @@ void clock_clear_callback(Clock *clk);
  */
 void clock_set_source(Clock *clk, Clock *src);
 
+/**
+ * clock_has_source:
+ * @clk: the clock
+ *
+ * Returns true if the clock has a source clock connected to it.
+ * This is useful for devices which have input clocks which must
+ * be connected by the board/SoC code which creates them. The
+ * device code can use this to check in its realize method that
+ * the clock has been connected.
+ */
+static inline bool clock_has_source(const Clock *clk)
+{
+    return clk->source != NULL;
+}
+
 /**
  * clock_set:
  * @clk: the clock to initialize.
@@ -166,7 +204,7 @@ static inline bool clock_set_ns(Clock *clk, unsigned ns)
  * Propagate the clock period that has been previously configured using
  * @clock_set(). This will update recursively all connected clocks.
  * It is an error to call this function on a clock which has a source.
- * Note: this function must not be called during device inititialization
+ * Note: this function must not be called during device initialization
  * or migration.
  */
 void clock_propagate(Clock *clk);
@@ -213,9 +251,81 @@ static inline unsigned clock_get_hz(Clock *clk)
     return CLOCK_PERIOD_TO_HZ(clock_get(clk));
 }
 
-static inline unsigned clock_get_ns(Clock *clk)
+/**
+ * clock_ticks_to_ns:
+ * @clk: the clock to query
+ * @ticks: number of ticks
+ *
+ * Returns the length of time in nanoseconds for this clock
+ * to tick @ticks times. Because a clock can have a period
+ * which is not a whole number of nanoseconds, it is important
+ * to use this function when calculating things like timer
+ * expiry deadlines, rather than attempting to obtain a "period
+ * in nanoseconds" value and then multiplying that by a number
+ * of ticks.
+ *
+ * The result could in theory be too large to fit in a 64-bit
+ * value if the number of ticks and the clock period are both
+ * large; to avoid overflow the result will be saturated to INT64_MAX
+ * (because this is the largest valid input to the QEMUTimer APIs).
+ * Since INT64_MAX nanoseconds is almost 300 years, anything with
+ * an expiry later than that is in the "will never happen" category
+ * and callers can reasonably not special-case the saturated result.
+ */
+static inline uint64_t clock_ticks_to_ns(const Clock *clk, uint64_t ticks)
+{
+    uint64_t ns_low, ns_high;
+
+    /*
+     * clk->period is the period in units of 2^-32 ns, so
+     * (clk->period * ticks) is the required length of time in those
+     * units, and we can convert to nanoseconds by multiplying by
+     * 2^32, which is the same as shifting the 128-bit multiplication
+     * result right by 32.
+     */
+    mulu64(&ns_low, &ns_high, clk->period, ticks);
+    if (ns_high & MAKE_64BIT_MASK(31, 33)) {
+        return INT64_MAX;
+    }
+    return ns_low >> 32 | ns_high << 32;
+}
+
+/**
+ * clock_ns_to_ticks:
+ * @clk: the clock to query
+ * @ns: duration in nanoseconds
+ *
+ * Returns the number of ticks this clock would make in the given
+ * number of nanoseconds. Because a clock can have a period which
+ * is not a whole number of nanoseconds, it is important to use this
+ * function rather than attempting to obtain a "period in nanoseconds"
+ * value and then dividing the duration by that value.
+ *
+ * If the clock is stopped (ie it has period zero), returns 0.
+ *
+ * For some inputs the result could overflow a 64-bit value (because
+ * the clock's period is short and the duration is long). In these
+ * cases we truncate the result to a 64-bit value. This is on the
+ * assumption that generally the result is going to be used to report
+ * a 32-bit or 64-bit guest register value, so wrapping either cannot
+ * happen or is the desired behaviour.
+ */
+static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
 {
-    return CLOCK_PERIOD_TO_NS(clock_get(clk));
+    /*
+     * ticks = duration_in_ns / period_in_ns
+     *       = ns / (period / 2^32)
+     *       = (ns * 2^32) / period
+     * The hi, lo inputs to divu128() are (ns << 32) as a 128 bit value.
+     */
+    uint64_t lo = ns << 32;
+    uint64_t hi = ns >> 32;
+    if (clk->period == 0) {
+        return 0;
+    }
+
+    divu128(&lo, &hi, clk->period);
+    return lo;
 }
 
 /**
@@ -229,4 +339,41 @@ static inline bool clock_is_enabled(const Clock *clk)
     return clock_get(clk) != 0;
 }
 
+/**
+ * clock_display_freq: return human-readable representation of clock frequency
+ * @clk: clock
+ *
+ * Return a string which has a human-readable representation of the
+ * clock's frequency, e.g. "33.3 MHz". This is intended for debug
+ * and display purposes.
+ *
+ * The caller is responsible for freeing the string with g_free().
+ */
+char *clock_display_freq(Clock *clk);
+
+/**
+ * clock_set_mul_div: set multiplier/divider for child clocks
+ * @clk: clock
+ * @multiplier: multiplier value
+ * @divider: divider value
+ *
+ * By default, a Clock's children will all run with the same period
+ * as their parent. This function allows you to adjust the multiplier
+ * and divider used to derive the child clock frequency.
+ * For example, setting a multiplier of 2 and a divider of 3
+ * will run child clocks with a period 2/3 of the parent clock,
+ * so if the parent clock is an 8MHz clock the children will
+ * be 12MHz.
+ *
+ * Setting the multiplier to 0 will stop the child clocks.
+ * Setting the divider to 0 is a programming error (diagnosed with
+ * an assertion failure).
+ * Setting a multiplier value that results in the child period
+ * overflowing is not diagnosed.
+ *
+ * Note that this function does not call clock_propagate(); the
+ * caller should do that if necessary.
+ */
+void clock_set_mul_div(Clock *clk, uint32_t multiplier, uint32_t divider);
+
 #endif /* QEMU_HW_CLOCK_H */
diff --git a/include/hw/core/accel-cpu.h b/include/hw/core/accel-cpu.h
new file mode 100644 (file)
index 0000000..24dad45
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Accelerator interface, specializes CPUClass
+ * This header is used only by target-specific code.
+ *
+ * Copyright 2021 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef ACCEL_CPU_H
+#define ACCEL_CPU_H
+
+/*
+ * This header is used to define new accelerator-specific target-specific
+ * accelerator cpu subclasses.
+ * It uses CPU_RESOLVING_TYPE, so this is clearly target-specific.
+ *
+ * Do not try to use for any other purpose than the implementation of new
+ * subclasses in target/, or the accel implementation itself in accel/
+ */
+
+#define TYPE_ACCEL_CPU "accel-" CPU_RESOLVING_TYPE
+#define ACCEL_CPU_NAME(name) (name "-" TYPE_ACCEL_CPU)
+typedef struct AccelCPUClass AccelCPUClass;
+DECLARE_CLASS_CHECKERS(AccelCPUClass, ACCEL_CPU, TYPE_ACCEL_CPU)
+
+typedef struct AccelCPUClass {
+    /*< private >*/
+    ObjectClass parent_class;
+    /*< public >*/
+
+    void (*cpu_class_init)(CPUClass *cc);
+    void (*cpu_instance_init)(CPUState *cpu);
+    bool (*cpu_target_realize)(CPUState *cpu, Error **errp);
+} AccelCPUClass;
+
+#endif /* ACCEL_CPU_H */
index 3d92c967fffaf348bec0e267398f3cf929744306..c0c8320413e5f66f3c026ec6b4b89374b9cde7b3 100644 (file)
 
 #include "hw/qdev-core.h"
 #include "disas/dis-asm.h"
+#include "exec/cpu-common.h"
 #include "exec/hwaddr.h"
 #include "exec/memattrs.h"
+#include "exec/tlb-common.h"
 #include "qapi/qapi-types-run-state.h"
 #include "qemu/bitmap.h"
 #include "qemu/rcu_queue.h"
 #include "qemu/queue.h"
 #include "qemu/thread.h"
-#include "qemu/plugin.h"
+#include "qemu/plugin-event.h"
 #include "qom/object.h"
 
 typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
                                      void *opaque);
 
-/**
- * vaddr:
- * Type wide enough to contain any #target_ulong virtual address.
- */
-typedef uint64_t vaddr;
-#define VADDR_PRId PRId64
-#define VADDR_PRIu PRIu64
-#define VADDR_PRIo PRIo64
-#define VADDR_PRIx PRIx64
-#define VADDR_PRIX PRIX64
-#define VADDR_MAX UINT64_MAX
-
 /**
  * SECTION:cpu
  * @section_id: QEMU-cpu
@@ -62,42 +52,66 @@ typedef uint64_t vaddr;
  */
 #define CPU(obj) ((CPUState *)(obj))
 
+/*
+ * The class checkers bring in CPU_GET_CLASS() which is potentially
+ * expensive given the eventual call to
+ * object_class_dynamic_cast_assert(). Because of this the CPUState
+ * has a cached value for the class in cs->cc which is set up in
+ * cpu_exec_realizefn() for use in hot code paths.
+ */
 typedef struct CPUClass CPUClass;
 DECLARE_CLASS_CHECKERS(CPUClass, CPU,
                        TYPE_CPU)
 
+/**
+ * OBJECT_DECLARE_CPU_TYPE:
+ * @CpuInstanceType: instance struct name
+ * @CpuClassType: class struct name
+ * @CPU_MODULE_OBJ_NAME: the CPU name in uppercase with underscore separators
+ *
+ * This macro is typically used in "cpu-qom.h" header file, and will:
+ *
+ *   - create the typedefs for the CPU object and class structs
+ *   - register the type for use with g_autoptr
+ *   - provide three standard type cast functions
+ *
+ * The object struct and class struct need to be declared manually.
+ */
+#define OBJECT_DECLARE_CPU_TYPE(CpuInstanceType, CpuClassType, CPU_MODULE_OBJ_NAME) \
+    typedef struct ArchCPU CpuInstanceType; \
+    OBJECT_DECLARE_TYPE(ArchCPU, CpuClassType, CPU_MODULE_OBJ_NAME);
+
 typedef enum MMUAccessType {
     MMU_DATA_LOAD  = 0,
     MMU_DATA_STORE = 1,
     MMU_INST_FETCH = 2
+#define MMU_ACCESS_COUNT 3
 } MMUAccessType;
 
 typedef struct CPUWatchpoint CPUWatchpoint;
 
-struct TranslationBlock;
+/* see tcg-cpu-ops.h */
+struct TCGCPUOps;
+
+/* see accel-cpu.h */
+struct AccelCPUClass;
+
+/* see sysemu-cpu-ops.h */
+struct SysemuCPUOps;
 
 /**
  * CPUClass:
  * @class_by_name: Callback to map -cpu command line model name to an
- * instantiatable CPU type.
+ *                 instantiatable CPU type.
  * @parse_features: Callback to parse command line arguments.
  * @reset_dump_flags: #CPUDumpFlags to use for reset logging.
  * @has_work: Callback for checking if there is work to do.
- * @do_interrupt: Callback for interrupt handling.
- * @do_unaligned_access: Callback for unaligned access handling, if
- * the target defines #TARGET_ALIGNED_ONLY.
- * @do_transaction_failed: Callback for handling failed memory transactions
- * (ie bus faults or external aborts; not MMU faults)
- * @virtio_is_big_endian: Callback to return %true if a CPU which supports
- * runtime configurable endianness is currently big-endian. Non-configurable
- * CPUs can use the default implementation of this method. This method should
- * not be used by any callers other than the pre-1.0 virtio devices.
  * @memory_rw_debug: Callback for GDB memory access.
  * @dump_state: Callback for dumping state.
- * @dump_statistics: Callback for dumping statistics.
+ * @query_cpu_fast:
+ *       Fill in target specific information for the "query-cpus-fast"
+ *       QAPI call.
  * @get_arch_id: Callback for getting architecture-dependent CPU ID.
- * @get_paging_enabled: Callback for inquiring whether paging is enabled.
- * @get_memory_mapping: Callback for obtaining the memory mappings.
  * @set_pc: Callback for setting the Program Counter register. This
  *       should have the semantics used by the target architecture when
  *       setting the PC from a source such as an ELF file entry point;
@@ -106,40 +120,13 @@ struct TranslationBlock;
  *       If the target behaviour here is anything other than "set
  *       the PC register to the value passed in" then the target must
  *       also implement the synchronize_from_tb hook.
- * @synchronize_from_tb: Callback for synchronizing state from a TCG
- *       #TranslationBlock. This is called when we abandon execution
- *       of a TB before starting it, and must set all parts of the CPU
- *       state which the previous TB in the chain may not have updated.
- *       This always includes at least the program counter; some targets
- *       will need to do more. If this hook is not implemented then the
- *       default is to call @set_pc(tb->pc).
- * @tlb_fill: Callback for handling a softmmu tlb miss or user-only
- *       address fault.  For system mode, if the access is valid, call
- *       tlb_set_page and return true; if the access is invalid, and
- *       probe is true, return false; otherwise raise an exception and
- *       do not return.  For user-only mode, always raise an exception
- *       and do not return.
- * @get_phys_page_debug: Callback for obtaining a physical address.
- * @get_phys_page_attrs_debug: Callback for obtaining a physical address and the
- *       associated memory transaction attributes to use for the access.
- *       CPUs which use memory transaction attributes should implement this
- *       instead of get_phys_page_debug.
- * @asidx_from_attrs: Callback to return the CPU AddressSpace to use for
- *       a memory access with the specified memory transaction attributes.
+ * @get_pc: Callback for getting the Program Counter register.
+ *       As above, with the semantics of the target architecture.
  * @gdb_read_register: Callback for letting GDB read a register.
  * @gdb_write_register: Callback for letting GDB write a register.
- * @debug_check_watchpoint: Callback: return true if the architectural
- *       watchpoint whose address has matched should really fire.
- * @debug_excp_handler: Callback for handling debug exceptions.
- * @write_elf64_note: Callback for writing a CPU-specific ELF note to a
- * 64-bit VM coredump.
- * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF
- * note to a 32-bit VM coredump.
- * @write_elf32_note: Callback for writing a CPU-specific ELF note to a
- * 32-bit VM coredump.
- * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF
- * note to a 32-bit VM coredump.
- * @vmsd: State description for migration.
+ * @gdb_adjust_breakpoint: Callback for adjusting the address of a
+ *       breakpoint.  Used by AVR to handle a gdb mis-feature with
+ *       its Harvard architecture split code and data.
  * @gdb_num_core_regs: Number of core registers accessible to GDB.
  * @gdb_core_xml_file: File name for core registers GDB XML description.
  * @gdb_stop_before_watchpoint: Indicates whether GDB expects the CPU to stop
@@ -149,9 +136,6 @@ struct TranslationBlock;
  * @gdb_get_dynamic_xml: Callback to return dynamically generated XML for the
  *   gdb stub. Returns a pointer to the XML contents for the specified XML file
  *   or NULL if the CPU doesn't have a dynamically generated content for it.
- * @cpu_exec_enter: Callback for cpu_exec preparation.
- * @cpu_exec_exit: Callback for cpu_exec cleanup.
- * @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec.
  * @disas_set_info: Setup architecture specific components of disassembly info
  * @adjust_watchpoint_address: Perform a target-specific adjustment to an
  * address before attempting to match it against watchpoints.
@@ -168,67 +152,178 @@ struct CPUClass {
     ObjectClass *(*class_by_name)(const char *cpu_model);
     void (*parse_features)(const char *typename, char *str, Error **errp);
 
-    int reset_dump_flags;
     bool (*has_work)(CPUState *cpu);
-    void (*do_interrupt)(CPUState *cpu);
-    void (*do_unaligned_access)(CPUState *cpu, vaddr addr,
-                                MMUAccessType access_type,
-                                int mmu_idx, uintptr_t retaddr);
-    void (*do_transaction_failed)(CPUState *cpu, hwaddr physaddr, vaddr addr,
-                                  unsigned size, MMUAccessType access_type,
-                                  int mmu_idx, MemTxAttrs attrs,
-                                  MemTxResult response, uintptr_t retaddr);
-    bool (*virtio_is_big_endian)(CPUState *cpu);
     int (*memory_rw_debug)(CPUState *cpu, vaddr addr,
                            uint8_t *buf, int len, bool is_write);
     void (*dump_state)(CPUState *cpu, FILE *, int flags);
-    GuestPanicInformation* (*get_crash_info)(CPUState *cpu);
-    void (*dump_statistics)(CPUState *cpu, int flags);
+    void (*query_cpu_fast)(CPUState *cpu, CpuInfoFast *value);
     int64_t (*get_arch_id)(CPUState *cpu);
-    bool (*get_paging_enabled)(const CPUState *cpu);
-    void (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list,
-                               Error **errp);
     void (*set_pc)(CPUState *cpu, vaddr value);
-    void (*synchronize_from_tb)(CPUState *cpu, struct TranslationBlock *tb);
-    bool (*tlb_fill)(CPUState *cpu, vaddr address, int size,
-                     MMUAccessType access_type, int mmu_idx,
-                     bool probe, uintptr_t retaddr);
-    hwaddr (*get_phys_page_debug)(CPUState *cpu, vaddr addr);
-    hwaddr (*get_phys_page_attrs_debug)(CPUState *cpu, vaddr addr,
-                                        MemTxAttrs *attrs);
-    int (*asidx_from_attrs)(CPUState *cpu, MemTxAttrs attrs);
+    vaddr (*get_pc)(CPUState *cpu);
     int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg);
     int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
-    bool (*debug_check_watchpoint)(CPUState *cpu, CPUWatchpoint *wp);
-    void (*debug_excp_handler)(CPUState *cpu);
-
-    int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu,
-                            int cpuid, void *opaque);
-    int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu,
-                                void *opaque);
-    int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu,
-                            int cpuid, void *opaque);
-    int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu,
-                                void *opaque);
-
-    const VMStateDescription *vmsd;
+    vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
+
     const char *gdb_core_xml_file;
-    gchar * (*gdb_arch_name)(CPUState *cpu);
+    const gchar * (*gdb_arch_name)(CPUState *cpu);
     const char * (*gdb_get_dynamic_xml)(CPUState *cpu, const char *xmlname);
-    void (*cpu_exec_enter)(CPUState *cpu);
-    void (*cpu_exec_exit)(CPUState *cpu);
-    bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request);
 
     void (*disas_set_info)(CPUState *cpu, disassemble_info *info);
-    vaddr (*adjust_watchpoint_address)(CPUState *cpu, vaddr addr, int len);
-    void (*tcg_initialize)(void);
 
     const char *deprecation_note;
-    /* Keep non-pointer data at the end to minimize holes.  */
+    struct AccelCPUClass *accel_cpu;
+
+    /* when system emulation is not available, this pointer is NULL */
+    const struct SysemuCPUOps *sysemu_ops;
+
+    /* when TCG is not available, this pointer is NULL */
+    const struct TCGCPUOps *tcg_ops;
+
+    /*
+     * if not NULL, this is called in order for the CPUClass to initialize
+     * class data that depends on the accelerator, see accel/accel-common.c.
+     */
+    void (*init_accel_cpu)(struct AccelCPUClass *accel_cpu, CPUClass *cc);
+
+    /*
+     * Keep non-pointer data at the end to minimize holes.
+     */
+    int reset_dump_flags;
     int gdb_num_core_regs;
     bool gdb_stop_before_watchpoint;
 };
 
+/*
+ * Fix the number of mmu modes to 16, which is also the maximum
+ * supported by the softmmu tlb api.
+ */
+#define NB_MMU_MODES 16
+
+/* Use a fully associative victim tlb of 8 entries. */
+#define CPU_VTLB_SIZE 8
+
+/*
+ * The full TLB entry, which is not accessed by generated TCG code,
+ * so the layout is not as critical as that of CPUTLBEntry. This is
+ * also why we don't want to combine the two structs.
+ */
+typedef struct CPUTLBEntryFull {
+    /*
+     * @xlat_section contains:
+     *  - in the lower TARGET_PAGE_BITS, a physical section number
+     *  - with the lower TARGET_PAGE_BITS masked off, an offset which
+     *    must be added to the virtual address to obtain:
+     *     + the ram_addr_t of the target RAM (if the physical section
+     *       number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
+     *     + the offset within the target MemoryRegion (otherwise)
+     */
+    hwaddr xlat_section;
+
+    /*
+     * @phys_addr contains the physical address in the address space
+     * given by cpu_asidx_from_attrs(cpu, @attrs).
+     */
+    hwaddr phys_addr;
+
+    /* @attrs contains the memory transaction attributes for the page. */
+    MemTxAttrs attrs;
+
+    /* @prot contains the complete protections for the page. */
+    uint8_t prot;
+
+    /* @lg_page_size contains the log2 of the page size. */
+    uint8_t lg_page_size;
+
+    /*
+     * Additional tlb flags for use by the slow path. If non-zero,
+     * the corresponding CPUTLBEntry comparator must have TLB_FORCE_SLOW.
+     */
+    uint8_t slow_flags[MMU_ACCESS_COUNT];
+
+    /*
+     * Allow target-specific additions to this structure.
+     * This may be used to cache items from the guest cpu
+     * page tables for later use by the implementation.
+     */
+    union {
+        /*
+         * Cache the attrs and shareability fields from the page table entry.
+         *
+         * For ARMMMUIdx_Stage2*, pte_attrs is the S2 descriptor bits [5:2].
+         * Otherwise, pte_attrs is the same as the MAIR_EL1 8-bit format.
+         * For shareability and guarded, as in the SH and GP fields respectively
+         * of the VMSAv8-64 PTEs.
+         */
+        struct {
+            uint8_t pte_attrs;
+            uint8_t shareability;
+            bool guarded;
+        } arm;
+    } extra;
+} CPUTLBEntryFull;
+
+/*
+ * Data elements that are per MMU mode, minus the bits accessed by
+ * the TCG fast path.
+ */
+typedef struct CPUTLBDesc {
+    /*
+     * Describe a region covering all of the large pages allocated
+     * into the tlb.  When any page within this region is flushed,
+     * we must flush the entire tlb.  The region is matched if
+     * (addr & large_page_mask) == large_page_addr.
+     */
+    vaddr large_page_addr;
+    vaddr large_page_mask;
+    /* host time (in ns) at the beginning of the time window */
+    int64_t window_begin_ns;
+    /* maximum number of entries observed in the window */
+    size_t window_max_entries;
+    size_t n_used_entries;
+    /* The next index to use in the tlb victim table.  */
+    size_t vindex;
+    /* The tlb victim table, in two parts.  */
+    CPUTLBEntry vtable[CPU_VTLB_SIZE];
+    CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE];
+    CPUTLBEntryFull *fulltlb;
+} CPUTLBDesc;
+
+/*
+ * Data elements that are shared between all MMU modes.
+ */
+typedef struct CPUTLBCommon {
+    /* Serialize updates to f.table and d.vtable, and others as noted. */
+    QemuSpin lock;
+    /*
+     * Within dirty, for each bit N, modifications have been made to
+     * mmu_idx N since the last time that mmu_idx was flushed.
+     * Protected by tlb_c.lock.
+     */
+    uint16_t dirty;
+    /*
+     * Statistics.  These are not lock protected, but are read and
+     * written atomically.  This allows the monitor to print a snapshot
+     * of the stats without interfering with the cpu.
+     */
+    size_t full_flush_count;
+    size_t part_flush_count;
+    size_t elide_flush_count;
+} CPUTLBCommon;
+
+/*
+ * The entire softmmu tlb, for all MMU modes.
+ * The meaning of each of the MMU modes is defined in the target code.
+ * Since this is placed within CPUNegativeOffsetState, the smallest
+ * negative offsets are at the end of the struct.
+ */
+typedef struct CPUTLB {
+#ifdef CONFIG_TCG
+    CPUTLBCommon c;
+    CPUTLBDesc d[NB_MMU_MODES];
+    CPUTLBDescFast f[NB_MMU_MODES];
+#endif
+} CPUTLB;
+
 /*
  * Low 16 bits: number of cycles left, used only in icount mode.
  * High 16 bits: Set to -1 to force TCG to stop executing linked TBs
@@ -239,7 +334,7 @@ struct CPUClass {
 typedef union IcountDecr {
     uint32_t u32;
     struct {
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
         uint16_t high;
         uint16_t low;
 #else
@@ -249,6 +344,16 @@ typedef union IcountDecr {
     } u16;
 } IcountDecr;
 
+/*
+ * Elements of CPUState most efficiently accessed from CPUArchState,
+ * via small negative offsets.
+ */
+typedef struct CPUNegativeOffsetState {
+    CPUTLB tlb;
+    IcountDecr icount_decr;
+    bool can_do_io;
+} CPUNegativeOffsetState;
+
 typedef struct CPUBreakpoint {
     vaddr pc;
     int flags; /* BP_* */
@@ -264,26 +369,9 @@ struct CPUWatchpoint {
     QTAILQ_ENTRY(CPUWatchpoint) entry;
 };
 
-#ifdef CONFIG_PLUGIN
-/*
- * For plugins we sometime need to save the resolved iotlb data before
- * the memory regions get moved around  by io_writex.
- */
-typedef struct SavedIOTLB {
-    hwaddr addr;
-    MemoryRegionSection *section;
-    hwaddr mr_offset;
-} SavedIOTLB;
-#endif
-
 struct KVMState;
 struct kvm_run;
 
-struct hax_vcpu_state;
-
-#define TB_JMP_CACHE_BITS 12
-#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
-
 /* work queue */
 
 /* The union type allows passing of 64 bit target pointers on 32 bit
@@ -307,7 +395,6 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data);
 struct qemu_work_item;
 
 #define CPU_UNSET_NUMA_NODE_ID -1
-#define CPU_TRACE_DSTATE_MAX_EVENTS 32
 
 /**
  * CPUState:
@@ -317,8 +404,11 @@ struct qemu_work_item;
  *   to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will
  *   be the same as the cluster-id property of the CPU object's TYPE_CPU_CLUSTER
  *   QOM parent.
+ *   Under TCG this value is propagated to @tcg_cflags.
+ *   See TranslationBlock::TCG CF_CLUSTER_MASK.
+ * @tcg_cflags: Pre-computed cflags for this cpu.
  * @nr_cores: Number of cores within this CPU package.
- * @nr_threads: Number of threads within this CPU.
+ * @nr_threads: Number of threads within this CPU core.
  * @running: #true if CPU is currently running (lockless).
  * @has_waiter: #true if a CPU is currently waiting for the cpu_exec_end;
  * valid under cpu_list_lock.
@@ -331,22 +421,19 @@ struct qemu_work_item;
  * @crash_occurred: Indicates the OS reported a crash (panic) for this CPU
  * @singlestep_enabled: Flags for single-stepping.
  * @icount_extra: Instructions until next timer event.
- * @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution
- * requires that IO only be performed on the last instruction of a TB
- * so that interrupts take effect immediately.
+ * @neg.can_do_io: True if memory-mapped IO is allowed.
  * @cpu_ases: Pointer to array of CPUAddressSpaces (which define the
  *            AddressSpaces this CPU has)
  * @num_ases: number of CPUAddressSpaces in @cpu_ases
  * @as: Pointer to the first AddressSpace, for the convenience of targets which
  *      only have a single AddressSpace
- * @env_ptr: Pointer to subclass-specific CPUArchState field.
- * @icount_decr_ptr: Pointer to IcountDecr field within subclass.
  * @gdb_regs: Additional GDB registers.
  * @gdb_num_regs: Number of total registers accessible to GDB.
  * @gdb_num_g_regs: Number of registers in GDB 'g' packets.
  * @next_cpu: Next CPU sharing TB cache.
  * @opaque: User data.
  * @mem_io_pc: Host Program Counter at which the memory was accessed.
+ * @accel: Pointer to accelerator specific state.
  * @kvm_fd: vCPU file descriptor for KVM.
  * @work_mutex: Lock to prevent multiple access to @work_list.
  * @work_list: List of pending asynchronous work.
@@ -357,12 +444,21 @@ struct qemu_work_item;
  * @ignore_memory_transaction_failures: Cached copy of the MachineState
  *    flag of the same name: allows the board to suppress calling of the
  *    CPU do_transaction_failed hook function.
+ * @kvm_dirty_gfns: Points to the KVM dirty ring for this CPU when KVM dirty
+ *    ring is enabled.
+ * @kvm_fetch_index: Keeps the index that we last fetched from the per-vCPU
+ *    dirty ring structure.
  *
  * State of one CPU core or thread.
+ *
+ * Align, in order to match possible alignment required by CPUArchState,
+ * and eliminate a hole between CPUState and CPUArchState within ArchCPU.
  */
 struct CPUState {
     /*< private >*/
     DeviceState parent_obj;
+    /* cache to avoid expensive CPU_GET_CLASS */
+    CPUClass *cc;
     /*< public >*/
 
     int nr_cores;
@@ -370,7 +466,7 @@ struct CPUState {
 
     struct QemuThread *thread;
 #ifdef _WIN32
-    HANDLE hThread;
+    QemuSemaphore sem;
 #endif
     int thread_id;
     bool running, has_waiter;
@@ -386,7 +482,7 @@ struct CPUState {
     bool unplug;
     bool crash_occurred;
     bool exit_request;
-    bool in_exclusive_context;
+    int exclusive_context_count;
     uint32_t cflags_next_tb;
     /* updates protected by BQL */
     uint32_t interrupt_request;
@@ -404,13 +500,9 @@ struct CPUState {
     AddressSpace *as;
     MemoryRegion *memory;
 
-    void *env_ptr; /* CPUArchState */
-    IcountDecr *icount_decr_ptr;
-
-    /* Accessed in parallel; all accesses must be atomic */
-    struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
+    CPUJumpCache *tb_jmp_cache;
 
-    struct GDBRegisterState *gdb_regs;
+    GArray *gdb_regs;
     int gdb_num_regs;
     int gdb_num_g_regs;
     QTAILQ_ENTRY(CPUState) node;
@@ -428,30 +520,33 @@ struct CPUState {
      */
     uintptr_t mem_io_pc;
 
+    /* Only used in KVM */
     int kvm_fd;
     struct KVMState *kvm_state;
     struct kvm_run *kvm_run;
+    struct kvm_dirty_gfn *kvm_dirty_gfns;
+    uint32_t kvm_fetch_index;
+    uint64_t dirty_pages;
+    int kvm_vcpu_stats_fd;
 
-    /* Used for events with 'vcpu' and *without* the 'disabled' properties */
-    DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
-    DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
+    /* Use by accel-block: CPU is executing an ioctl() */
+    QemuLockCnt in_ioctl_lock;
 
     DECLARE_BITMAP(plugin_mask, QEMU_PLUGIN_EV_MAX);
 
 #ifdef CONFIG_PLUGIN
     GArray *plugin_mem_cbs;
-    /* saved iotlb data from io_writex */
-    SavedIOTLB saved_iotlb;
 #endif
 
     /* TODO Move common fields from CPUArchState here. */
     int cpu_index;
     int cluster_index;
+    uint32_t tcg_cflags;
     uint32_t halted;
-    uint32_t can_do_io;
     int32_t exception_index;
 
-    /* shared by kvm, hax and hvf */
+    AccelCPUState *accel;
+    /* shared by kvm and hvf */
     bool vcpu_dirty;
 
     /* Used to keep track of an outstanding cpu throttle thread for migration
@@ -459,36 +554,48 @@ struct CPUState {
      */
     bool throttle_thread_scheduled;
 
-    bool ignore_memory_transaction_failures;
+    /*
+     * Sleep throttle_us_per_full microseconds once dirty ring is full
+     * if dirty page rate limit is enabled.
+     */
+    int64_t throttle_us_per_full;
 
-    struct hax_vcpu_state *hax_vcpu;
+    bool ignore_memory_transaction_failures;
 
-    int hvf_fd;
+    /* Used for user-only emulation of prctl(PR_SET_UNALIGN). */
+    bool prctl_unalign_sigbus;
 
     /* track IOMMUs whose translations we've cached in the TCG TLB */
     GArray *iommu_notifiers;
+
+    /*
+     * MUST BE LAST in order to minimize the displacement to CPUArchState.
+     */
+    char neg_align[-sizeof(CPUNegativeOffsetState) % 16] QEMU_ALIGNED(16);
+    CPUNegativeOffsetState neg;
 };
 
+/* Validate placement of CPUNegativeOffsetState. */
+QEMU_BUILD_BUG_ON(offsetof(CPUState, neg) !=
+                  sizeof(CPUState) - sizeof(CPUNegativeOffsetState));
+
+static inline CPUArchState *cpu_env(CPUState *cpu)
+{
+    /* We validate that CPUArchState follows CPUState in cpu-all.h. */
+    return (CPUArchState *)(cpu + 1);
+}
+
 typedef QTAILQ_HEAD(CPUTailQ, CPUState) CPUTailQ;
-extern CPUTailQ cpus;
+extern CPUTailQ cpus_queue;
 
-#define first_cpu        QTAILQ_FIRST_RCU(&cpus)
+#define first_cpu        QTAILQ_FIRST_RCU(&cpus_queue)
 #define CPU_NEXT(cpu)    QTAILQ_NEXT_RCU(cpu, node)
-#define CPU_FOREACH(cpu) QTAILQ_FOREACH_RCU(cpu, &cpus, node)
+#define CPU_FOREACH(cpu) QTAILQ_FOREACH_RCU(cpu, &cpus_queue, node)
 #define CPU_FOREACH_SAFE(cpu, next_cpu) \
-    QTAILQ_FOREACH_SAFE_RCU(cpu, &cpus, node, next_cpu)
+    QTAILQ_FOREACH_SAFE_RCU(cpu, &cpus_queue, node, next_cpu)
 
 extern __thread CPUState *current_cpu;
 
-static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
-{
-    unsigned int i;
-
-    for (i = 0; i < TB_JMP_CACHE_SIZE; i++) {
-        qatomic_set(&cpu->tb_jmp_cache[i], NULL);
-    }
-}
-
 /**
  * qemu_tcg_mttcg_enabled:
  * Check whether we are running MultiThread TCG or not.
@@ -511,8 +618,10 @@ bool cpu_paging_enabled(const CPUState *cpu);
  * @cpu: The CPU whose memory mappings are to be obtained.
  * @list: Where to write the memory mappings to.
  * @errp: Pointer for reporting an #Error.
+ *
+ * Returns: %true on success, %false otherwise.
  */
-void cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list,
+bool cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list,
                             Error **errp);
 
 #if !defined(CONFIG_USER_ONLY)
@@ -573,11 +682,13 @@ GuestPanicInformation *cpu_get_crash_info(CPUState *cpu);
  * @CPU_DUMP_CODE:
  * @CPU_DUMP_FPU: dump FPU register state, not just integer
  * @CPU_DUMP_CCOP: dump info about TCG QEMU's condition code optimization state
+ * @CPU_DUMP_VPU: dump VPU registers
  */
 enum CPUDumpFlags {
     CPU_DUMP_CODE = 0x00010000,
     CPU_DUMP_FPU  = 0x00020000,
     CPU_DUMP_CCOP = 0x00040000,
+    CPU_DUMP_VPU  = 0x00080000,
 };
 
 /**
@@ -589,16 +700,6 @@ enum CPUDumpFlags {
  */
 void cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 
-/**
- * cpu_dump_statistics:
- * @cpu: The CPU whose state is to be dumped.
- * @flags: Flags what to dump.
- *
- * Dump CPU statistics to the current monitor if we have one, else to
- * stdout.
- */
-void cpu_dump_statistics(CPUState *cpu, int flags);
-
 #ifndef CONFIG_USER_ONLY
 /**
  * cpu_get_phys_page_attrs_debug:
@@ -613,18 +714,8 @@ void cpu_dump_statistics(CPUState *cpu, int flags);
  *
  * Returns: Corresponding physical page address or -1 if no page found.
  */
-static inline hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
-                                                   MemTxAttrs *attrs)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (cc->get_phys_page_attrs_debug) {
-        return cc->get_phys_page_attrs_debug(cpu, addr, attrs);
-    }
-    /* Fallback for CPUs which don't implement the _attrs_ hook */
-    *attrs = MEMTXATTRS_UNSPECIFIED;
-    return cc->get_phys_page_debug(cpu, addr);
-}
+hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
+                                     MemTxAttrs *attrs);
 
 /**
  * cpu_get_phys_page_debug:
@@ -636,12 +727,7 @@ static inline hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
  *
  * Returns: Corresponding physical page address or -1 if no page found.
  */
-static inline hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
-{
-    MemTxAttrs attrs = {};
-
-    return cpu_get_phys_page_attrs_debug(cpu, addr, &attrs);
-}
+hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 
 /** cpu_asidx_from_attrs:
  * @cpu: CPU
@@ -650,17 +736,16 @@ static inline hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
  * Returns the address space index specifying the CPU AddressSpace
  * to use for a memory access with the given transaction attributes.
  */
-static inline int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-    int ret = 0;
+int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs);
 
-    if (cc->asidx_from_attrs) {
-        ret = cc->asidx_from_attrs(cpu, attrs);
-        assert(ret < cpu->num_ases && ret >= 0);
-    }
-    return ret;
-}
+/**
+ * cpu_virtio_is_big_endian:
+ * @cpu: CPU
+
+ * Returns %true if a CPU which supports runtime configurable endianness
+ * is currently big-endian.
+ */
+bool cpu_virtio_is_big_endian(CPUState *cpu);
 
 #endif /* CONFIG_USER_ONLY */
 
@@ -687,9 +772,10 @@ void cpu_reset(CPUState *cpu);
  * @typename: The CPU base type.
  * @cpu_model: The model string without any parameters.
  *
- * Looks up a CPU #ObjectClass matching name @cpu_model.
+ * Looks up a concrete CPU #ObjectClass matching name @cpu_model.
  *
- * Returns: A #CPUClass or %NULL if not matching class is found.
+ * Returns: A concrete #CPUClass or %NULL if no matching class is found
+ *          or if the matching class is abstract.
  */
 ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model);
 
@@ -814,7 +900,7 @@ void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data
  */
 static inline bool cpu_in_exclusive_context(const CPUState *cpu)
 {
-    return cpu->in_exclusive_context;
+    return cpu->exclusive_context_count;
 }
 
 /**
@@ -857,36 +943,6 @@ CPUState *cpu_by_arch_id(int64_t id);
 
 void cpu_interrupt(CPUState *cpu, int mask);
 
-#ifdef NEED_CPU_H
-
-#ifdef CONFIG_SOFTMMU
-static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
-                                        MMUAccessType access_type,
-                                        int mmu_idx, uintptr_t retaddr)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    cc->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
-}
-
-static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
-                                          vaddr addr, unsigned size,
-                                          MMUAccessType access_type,
-                                          int mmu_idx, MemTxAttrs attrs,
-                                          MemTxResult response,
-                                          uintptr_t retaddr)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (!cpu->ignore_memory_transaction_failures && cc->do_transaction_failed) {
-        cc->do_transaction_failed(cpu, physaddr, addr, size, access_type,
-                                  mmu_idx, attrs, response, retaddr);
-    }
-}
-#endif
-
-#endif /* NEED_CPU_H */
-
 /**
  * cpu_set_pc:
  * @cpu: The CPU to set the program counter for.
@@ -1007,9 +1063,10 @@ void cpu_single_step(CPUState *cpu, int enabled);
 #define BP_GDB                0x10
 #define BP_CPU                0x20
 #define BP_ANY                (BP_GDB | BP_CPU)
-#define BP_WATCHPOINT_HIT_READ 0x40
-#define BP_WATCHPOINT_HIT_WRITE 0x80
-#define BP_WATCHPOINT_HIT (BP_WATCHPOINT_HIT_READ | BP_WATCHPOINT_HIT_WRITE)
+#define BP_HIT_SHIFT          6
+#define BP_WATCHPOINT_HIT_READ  (BP_MEM_READ << BP_HIT_SHIFT)
+#define BP_WATCHPOINT_HIT_WRITE (BP_MEM_WRITE << BP_HIT_SHIFT)
+#define BP_WATCHPOINT_HIT       (BP_MEM_ACCESS << BP_HIT_SHIFT)
 
 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
                           CPUBreakpoint **breakpoint);
@@ -1032,7 +1089,7 @@ static inline bool cpu_breakpoint_test(CPUState *cpu, vaddr pc, int mask)
     return false;
 }
 
-#ifdef CONFIG_USER_ONLY
+#if defined(CONFIG_USER_ONLY)
 static inline int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
                                         int flags, CPUWatchpoint **watchpoint)
 {
@@ -1053,17 +1110,6 @@ static inline void cpu_watchpoint_remove_by_ref(CPUState *cpu,
 static inline void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 {
 }
-
-static inline void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
-                                        MemTxAttrs atr, int fl, uintptr_t ra)
-{
-}
-
-static inline int cpu_watchpoint_address_matches(CPUState *cpu,
-                                                 vaddr addr, vaddr len)
-{
-    return 0;
-}
 #else
 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
                           int flags, CPUWatchpoint **watchpoint);
@@ -1071,33 +1117,24 @@ int cpu_watchpoint_remove(CPUState *cpu, vaddr addr,
                           vaddr len, int flags);
 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint);
 void cpu_watchpoint_remove_all(CPUState *cpu, int mask);
+#endif
 
 /**
- * cpu_check_watchpoint:
- * @cpu: cpu context
- * @addr: guest virtual address
- * @len: access length
- * @attrs: memory access attributes
- * @flags: watchpoint access type
- * @ra: unwind return address
- *
- * Check for a watchpoint hit in [addr, addr+len) of the type
- * specified by @flags.  Exit via exception with a hit.
- */
-void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
-                          MemTxAttrs attrs, int flags, uintptr_t ra);
-
-/**
- * cpu_watchpoint_address_matches:
- * @cpu: cpu context
- * @addr: guest virtual address
- * @len: access length
+ * cpu_plugin_mem_cbs_enabled() - are plugin memory callbacks enabled?
+ * @cs: CPUState pointer
  *
- * Return the watchpoint flags that apply to [addr, addr+len).
- * If no watchpoint is registered for the range, the result is 0.
+ * The memory callbacks are installed if a plugin has instrumented an
+ * instruction for memory. This can be useful to know if you want to
+ * force a slow path for a series of memory accesses.
  */
-int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len);
+static inline bool cpu_plugin_mem_cbs_enabled(const CPUState *cpu)
+{
+#ifdef CONFIG_PLUGIN
+    return !!cpu->plugin_mem_cbs;
+#else
+    return false;
 #endif
+}
 
 /**
  * cpu_get_address_space:
@@ -1109,31 +1146,36 @@ int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len);
  */
 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx);
 
-void QEMU_NORETURN cpu_abort(CPUState *cpu, const char *fmt, ...)
-    GCC_FMT_ATTR(2, 3);
-extern Property cpu_common_props[];
+G_NORETURN void cpu_abort(CPUState *cpu, const char *fmt, ...)
+    G_GNUC_PRINTF(2, 3);
+
+/* $(top_srcdir)/cpu.c */
+void cpu_class_init_props(DeviceClass *dc);
 void cpu_exec_initfn(CPUState *cpu);
-void cpu_exec_realizefn(CPUState *cpu, Error **errp);
+bool cpu_exec_realizefn(CPUState *cpu, Error **errp);
 void cpu_exec_unrealizefn(CPUState *cpu);
+void cpu_exec_reset_hold(CPUState *cpu);
 
 /**
  * target_words_bigendian:
  * Returns true if the (default) endianness of the target is big endian,
  * false otherwise. Note that in target-specific code, you can use
- * TARGET_WORDS_BIGENDIAN directly instead. On the other hand, common
+ * TARGET_BIG_ENDIAN directly instead. On the other hand, common
  * code should normally never need to know about the endianness of the
  * target, so please do *not* use this function unless you know very well
  * what you are doing!
  */
 bool target_words_bigendian(void);
 
+const char *target_name(void);
+
+void page_size_init(void);
+
 #ifdef NEED_CPU_H
 
-#ifdef CONFIG_SOFTMMU
+#ifndef CONFIG_USER_ONLY
+
 extern const VMStateDescription vmstate_cpu_common;
-#else
-#define vmstate_cpu_common vmstate_dummy
-#endif
 
 #define VMSTATE_CPU() {                                                     \
     .name = "parent_obj",                                                   \
@@ -1142,6 +1184,7 @@ extern const VMStateDescription vmstate_cpu_common;
     .flags = VMS_STRUCT,                                                    \
     .offset = 0,                                                            \
 }
+#endif /* !CONFIG_USER_ONLY */
 
 #endif /* NEED_CPU_H */
 
diff --git a/include/hw/core/sysbus-fdt.h b/include/hw/core/sysbus-fdt.h
new file mode 100644 (file)
index 0000000..340c382
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Dynamic sysbus device tree node generation API
+ *
+ * Copyright Linaro Limited, 2014
+ *
+ * Authors:
+ *  Alex Graf <agraf@suse.de>
+ *  Eric Auger <eric.auger@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef HW_ARM_SYSBUS_FDT_H
+#define HW_ARM_SYSBUS_FDT_H
+
+#include "exec/hwaddr.h"
+
+/**
+ * platform_bus_add_all_fdt_nodes - create all the platform bus nodes
+ *
+ * builds the parent platform bus node and all the nodes of dynamic
+ * sysbus devices attached to it.
+ */
+void platform_bus_add_all_fdt_nodes(void *fdt, const char *intc, hwaddr addr,
+                                    hwaddr bus_size, int irq_start);
+#endif
diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h
new file mode 100644 (file)
index 0000000..24d003f
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * CPU operations specific to system emulation
+ *
+ * Copyright (c) 2012 SUSE LINUX Products GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef SYSEMU_CPU_OPS_H
+#define SYSEMU_CPU_OPS_H
+
+#include "hw/core/cpu.h"
+
+/*
+ * struct SysemuCPUOps: System operations specific to a CPU class
+ */
+typedef struct SysemuCPUOps {
+    /**
+     * @get_memory_mapping: Callback for obtaining the memory mappings.
+     */
+    bool (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list,
+                               Error **errp);
+    /**
+     * @get_paging_enabled: Callback for inquiring whether paging is enabled.
+     */
+    bool (*get_paging_enabled)(const CPUState *cpu);
+    /**
+     * @get_phys_page_debug: Callback for obtaining a physical address.
+     */
+    hwaddr (*get_phys_page_debug)(CPUState *cpu, vaddr addr);
+    /**
+     * @get_phys_page_attrs_debug: Callback for obtaining a physical address
+     *       and the associated memory transaction attributes to use for the
+     *       access.
+     * CPUs which use memory transaction attributes should implement this
+     * instead of get_phys_page_debug.
+     */
+    hwaddr (*get_phys_page_attrs_debug)(CPUState *cpu, vaddr addr,
+                                        MemTxAttrs *attrs);
+    /**
+     * @asidx_from_attrs: Callback to return the CPU AddressSpace to use for
+     *       a memory access with the specified memory transaction attributes.
+     */
+    int (*asidx_from_attrs)(CPUState *cpu, MemTxAttrs attrs);
+    /**
+     * @get_crash_info: Callback for reporting guest crash information in
+     * GUEST_PANICKED events.
+     */
+    GuestPanicInformation* (*get_crash_info)(CPUState *cpu);
+    /**
+     * @write_elf32_note: Callback for writing a CPU-specific ELF note to a
+     * 32-bit VM coredump.
+     */
+    int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu,
+                            int cpuid, DumpState *s);
+    /**
+     * @write_elf64_note: Callback for writing a CPU-specific ELF note to a
+     * 64-bit VM coredump.
+     */
+    int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu,
+                            int cpuid, DumpState *s);
+    /**
+     * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF
+     * note to a 32-bit VM coredump.
+     */
+    int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu,
+                                DumpState *s);
+    /**
+     * @write_elf64_qemunote: Callback for writing a CPU- and QEMU-specific ELF
+     * note to a 64-bit VM coredump.
+     */
+    int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu,
+                                DumpState *s);
+    /**
+     * @virtio_is_big_endian: Callback to return %true if a CPU which supports
+     * runtime configurable endianness is currently big-endian.
+     * Non-configurable CPUs can use the default implementation of this method.
+     * This method should not be used by any callers other than the pre-1.0
+     * virtio devices.
+     */
+    bool (*virtio_is_big_endian)(CPUState *cpu);
+
+    /**
+     * @legacy_vmsd: Legacy state for migration.
+     *               Do not use in new targets, use #DeviceClass::vmsd instead.
+     */
+    const VMStateDescription *legacy_vmsd;
+
+} SysemuCPUOps;
+
+#endif /* SYSEMU_CPU_OPS_H */
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
new file mode 100644 (file)
index 0000000..479713a
--- /dev/null
@@ -0,0 +1,221 @@
+/*
+ * TCG CPU-specific operations
+ *
+ * Copyright 2021 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TCG_CPU_OPS_H
+#define TCG_CPU_OPS_H
+
+#include "hw/core/cpu.h"
+
+struct TCGCPUOps {
+    /**
+     * @initialize: Initialize TCG state
+     *
+     * Called when the first CPU is realized.
+     */
+    void (*initialize)(void);
+    /**
+     * @synchronize_from_tb: Synchronize state from a TCG #TranslationBlock
+     *
+     * This is called when we abandon execution of a TB before starting it,
+     * and must set all parts of the CPU state which the previous TB in the
+     * chain may not have updated.
+     * By default, when this is NULL, a call is made to @set_pc(tb->pc).
+     *
+     * If more state needs to be restored, the target must implement a
+     * function to restore all the state, and register it here.
+     */
+    void (*synchronize_from_tb)(CPUState *cpu, const TranslationBlock *tb);
+    /**
+     * @restore_state_to_opc: Synchronize state from INDEX_op_start_insn
+     *
+     * This is called when we unwind state in the middle of a TB,
+     * usually before raising an exception.  Set all part of the CPU
+     * state which are tracked insn-by-insn in the target-specific
+     * arguments to start_insn, passed as @data.
+     */
+    void (*restore_state_to_opc)(CPUState *cpu, const TranslationBlock *tb,
+                                 const uint64_t *data);
+
+    /** @cpu_exec_enter: Callback for cpu_exec preparation */
+    void (*cpu_exec_enter)(CPUState *cpu);
+    /** @cpu_exec_exit: Callback for cpu_exec cleanup */
+    void (*cpu_exec_exit)(CPUState *cpu);
+    /** @debug_excp_handler: Callback for handling debug exceptions */
+    void (*debug_excp_handler)(CPUState *cpu);
+
+#ifdef NEED_CPU_H
+#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386)
+    /**
+     * @fake_user_interrupt: Callback for 'fake exception' handling.
+     *
+     * Simulate 'fake exception' which will be handled outside the
+     * cpu execution loop (hack for x86 user mode).
+     */
+    void (*fake_user_interrupt)(CPUState *cpu);
+#else
+    /**
+     * @do_interrupt: Callback for interrupt handling.
+     */
+    void (*do_interrupt)(CPUState *cpu);
+#endif /* !CONFIG_USER_ONLY || !TARGET_I386 */
+#ifdef CONFIG_USER_ONLY
+    /**
+     * record_sigsegv:
+     * @cpu: cpu context
+     * @addr: faulting guest address
+     * @access_type: access was read/write/execute
+     * @maperr: true for invalid page, false for permission fault
+     * @ra: host pc for unwinding
+     *
+     * We are about to raise SIGSEGV with si_code set for @maperr,
+     * and si_addr set for @addr.  Record anything further needed
+     * for the signal ucontext_t.
+     *
+     * If the emulated kernel does not provide anything to the signal
+     * handler with anything besides the user context registers, and
+     * the siginfo_t, then this hook need do nothing and may be omitted.
+     * Otherwise, record the data and return; the caller will raise
+     * the signal, unwind the cpu state, and return to the main loop.
+     *
+     * If it is simpler to re-use the sysemu tlb_fill code, @ra is provided
+     * so that a "normal" cpu exception can be raised.  In this case,
+     * the signal must be raised by the architecture cpu_loop.
+     */
+    void (*record_sigsegv)(CPUState *cpu, vaddr addr,
+                           MMUAccessType access_type,
+                           bool maperr, uintptr_t ra);
+    /**
+     * record_sigbus:
+     * @cpu: cpu context
+     * @addr: misaligned guest address
+     * @access_type: access was read/write/execute
+     * @ra: host pc for unwinding
+     *
+     * We are about to raise SIGBUS with si_code BUS_ADRALN,
+     * and si_addr set for @addr.  Record anything further needed
+     * for the signal ucontext_t.
+     *
+     * If the emulated kernel does not provide the signal handler with
+     * anything besides the user context registers, and the siginfo_t,
+     * then this hook need do nothing and may be omitted.
+     * Otherwise, record the data and return; the caller will raise
+     * the signal, unwind the cpu state, and return to the main loop.
+     *
+     * If it is simpler to re-use the sysemu do_unaligned_access code,
+     * @ra is provided so that a "normal" cpu exception can be raised.
+     * In this case, the signal must be raised by the architecture cpu_loop.
+     */
+    void (*record_sigbus)(CPUState *cpu, vaddr addr,
+                          MMUAccessType access_type, uintptr_t ra);
+#else
+    /** @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec */
+    bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request);
+    /**
+     * @tlb_fill: Handle a softmmu tlb miss
+     *
+     * If the access is valid, call tlb_set_page and return true;
+     * if the access is invalid and probe is true, return false;
+     * otherwise raise an exception and do not return.
+     */
+    bool (*tlb_fill)(CPUState *cpu, vaddr address, int size,
+                     MMUAccessType access_type, int mmu_idx,
+                     bool probe, uintptr_t retaddr);
+    /**
+     * @do_transaction_failed: Callback for handling failed memory transactions
+     * (ie bus faults or external aborts; not MMU faults)
+     */
+    void (*do_transaction_failed)(CPUState *cpu, hwaddr physaddr, vaddr addr,
+                                  unsigned size, MMUAccessType access_type,
+                                  int mmu_idx, MemTxAttrs attrs,
+                                  MemTxResult response, uintptr_t retaddr);
+    /**
+     * @do_unaligned_access: Callback for unaligned access handling
+     * The callback must exit via raising an exception.
+     */
+    G_NORETURN void (*do_unaligned_access)(CPUState *cpu, vaddr addr,
+                                           MMUAccessType access_type,
+                                           int mmu_idx, uintptr_t retaddr);
+
+    /**
+     * @adjust_watchpoint_address: hack for cpu_check_watchpoint used by ARM
+     */
+    vaddr (*adjust_watchpoint_address)(CPUState *cpu, vaddr addr, int len);
+
+    /**
+     * @debug_check_watchpoint: return true if the architectural
+     * watchpoint whose address has matched should really fire, used by ARM
+     * and RISC-V
+     */
+    bool (*debug_check_watchpoint)(CPUState *cpu, CPUWatchpoint *wp);
+
+    /**
+     * @debug_check_breakpoint: return true if the architectural
+     * breakpoint whose PC has matched should really fire.
+     */
+    bool (*debug_check_breakpoint)(CPUState *cpu);
+
+    /**
+     * @io_recompile_replay_branch: Callback for cpu_io_recompile.
+     *
+     * The cpu has been stopped, and cpu_restore_state_from_tb has been
+     * called.  If the faulting instruction is in a delay slot, and the
+     * target architecture requires re-execution of the branch, then
+     * adjust the cpu state as required and return true.
+     */
+    bool (*io_recompile_replay_branch)(CPUState *cpu,
+                                       const TranslationBlock *tb);
+#endif /* !CONFIG_USER_ONLY */
+#endif /* NEED_CPU_H */
+
+};
+
+#if defined(CONFIG_USER_ONLY)
+
+static inline void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
+                                        MemTxAttrs atr, int fl, uintptr_t ra)
+{
+}
+
+static inline int cpu_watchpoint_address_matches(CPUState *cpu,
+                                                 vaddr addr, vaddr len)
+{
+    return 0;
+}
+
+#else
+
+/**
+ * cpu_check_watchpoint:
+ * @cpu: cpu context
+ * @addr: guest virtual address
+ * @len: access length
+ * @attrs: memory access attributes
+ * @flags: watchpoint access type
+ * @ra: unwind return address
+ *
+ * Check for a watchpoint hit in [addr, addr+len) of the type
+ * specified by @flags.  Exit via exception with a hit.
+ */
+void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
+                          MemTxAttrs attrs, int flags, uintptr_t ra);
+
+/**
+ * cpu_watchpoint_address_matches:
+ * @cpu: cpu context
+ * @addr: guest virtual address
+ * @len: access length
+ *
+ * Return the watchpoint flags that apply to [addr, addr+len).
+ * If no watchpoint is registered for the range, the result is 0.
+ */
+int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len);
+
+#endif
+
+#endif /* TCG_CPU_OPS_H */
diff --git a/include/hw/cpu/a15mpcore.h b/include/hw/cpu/a15mpcore.h
new file mode 100644 (file)
index 0000000..75d39e5
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Cortex-A15MPCore internal peripheral emulation.
+ *
+ * Copyright (c) 2012 Linaro Limited.
+ * Written by Peter Maydell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef HW_CPU_A15MPCORE_H
+#define HW_CPU_A15MPCORE_H
+
+#include "hw/sysbus.h"
+#include "hw/intc/arm_gic.h"
+#include "qom/object.h"
+
+/* A15MP private memory region.  */
+
+#define TYPE_A15MPCORE_PRIV "a15mpcore_priv"
+OBJECT_DECLARE_SIMPLE_TYPE(A15MPPrivState, A15MPCORE_PRIV)
+
+struct A15MPPrivState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint32_t num_cpu;
+    uint32_t num_irq;
+    MemoryRegion container;
+
+    GICState gic;
+};
+
+#endif
diff --git a/include/hw/cpu/a9mpcore.h b/include/hw/cpu/a9mpcore.h
new file mode 100644 (file)
index 0000000..e0396ab
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Cortex-A9MPCore internal peripheral emulation.
+ *
+ * Copyright (c) 2009 CodeSourcery.
+ * Copyright (c) 2011 Linaro Limited.
+ * Written by Paul Brook, Peter Maydell.
+ *
+ * This code is licensed under the GPL.
+ */
+#ifndef HW_CPU_A9MPCORE_H
+#define HW_CPU_A9MPCORE_H
+
+#include "hw/sysbus.h"
+#include "hw/intc/arm_gic.h"
+#include "hw/misc/a9scu.h"
+#include "hw/timer/arm_mptimer.h"
+#include "hw/timer/a9gtimer.h"
+#include "qom/object.h"
+
+#define TYPE_A9MPCORE_PRIV "a9mpcore_priv"
+OBJECT_DECLARE_SIMPLE_TYPE(A9MPPrivState, A9MPCORE_PRIV)
+
+struct A9MPPrivState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint32_t num_cpu;
+    MemoryRegion container;
+    uint32_t num_irq;
+
+    A9SCUState scu;
+    GICState gic;
+    A9GTimerState gtimer;
+    ARMMPTimerState mptimer;
+    ARMMPTimerState wdt;
+};
+
+#endif
diff --git a/include/hw/cpu/arm11mpcore.h b/include/hw/cpu/arm11mpcore.h
new file mode 100644 (file)
index 0000000..2cac8c1
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * ARM11MPCore internal peripheral emulation.
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Written by Paul Brook
+ *
+ * This code is licensed under the GPL.
+ */
+
+#ifndef HW_CPU_ARM11MPCORE_H
+#define HW_CPU_ARM11MPCORE_H
+
+#include "hw/sysbus.h"
+#include "hw/misc/arm11scu.h"
+#include "hw/intc/arm_gic.h"
+#include "hw/timer/arm_mptimer.h"
+#include "qom/object.h"
+
+#define TYPE_ARM11MPCORE_PRIV "arm11mpcore_priv"
+OBJECT_DECLARE_SIMPLE_TYPE(ARM11MPCorePriveState, ARM11MPCORE_PRIV)
+
+struct ARM11MPCorePriveState {
+    SysBusDevice parent_obj;
+
+    uint32_t num_cpu;
+    MemoryRegion container;
+    uint32_t num_irq;
+
+    ARM11SCUState scu;
+    GICState gic;
+    ARMMPTimerState mptimer;
+    ARMMPTimerState wdtimer;
+};
+
+#endif
diff --git a/include/hw/cpu/cluster.h b/include/hw/cpu/cluster.h
new file mode 100644 (file)
index 0000000..53fbf36
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * QEMU CPU cluster
+ *
+ * Copyright (c) 2018 GreenSocs SAS
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ */
+#ifndef HW_CPU_CLUSTER_H
+#define HW_CPU_CLUSTER_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+/*
+ * CPU Cluster type
+ *
+ * A cluster is a group of CPUs which are all identical and have the same view
+ * of the rest of the system. It is mainly an internal QEMU representation and
+ * does not necessarily match with the notion of clusters on the real hardware.
+ *
+ * If CPUs are not identical (for example, Cortex-A53 and Cortex-A57 CPUs in an
+ * Arm big.LITTLE system) they should be in different clusters. If the CPUs do
+ * not have the same view of memory (for example the main CPU and a management
+ * controller processor) they should be in different clusters.
+ *
+ * A cluster is created by creating an object of TYPE_CPU_CLUSTER, and then
+ * adding the CPUs to it as QOM child objects (e.g. using the
+ * object_initialize_child() or object_property_add_child() functions).
+ * The CPUs may be either direct children of the cluster object, or indirect
+ * children (e.g. children of children of the cluster object).
+ *
+ * All CPUs must be added as children before the cluster is realized.
+ * (Regrettably QOM provides no way to prevent adding children to a realized
+ * object and no way for the parent to be notified when a new child is added
+ * to it, so this restriction is not checked for, but the system will not
+ * behave correctly if it is not adhered to. The cluster will assert that
+ * it contains at least one CPU, which should catch most inadvertent
+ * violations of this constraint.)
+ *
+ * A CPU which is not put into any cluster will be considered implicitly
+ * to be in a cluster with all the other "loose" CPUs, so all CPUs that are
+ * not assigned to clusters must be identical.
+ */
+
+#define TYPE_CPU_CLUSTER "cpu-cluster"
+OBJECT_DECLARE_SIMPLE_TYPE(CPUClusterState, CPU_CLUSTER)
+
+/*
+ * This limit is imposed by TCG, which puts the cluster ID into an
+ * 8 bit field (and uses all-1s for the default "not in any cluster").
+ */
+#define MAX_CLUSTERS 255
+
+/**
+ * CPUClusterState:
+ * @cluster_id: The cluster ID. This value is for internal use only and should
+ *   not be exposed directly to the user or to the guest.
+ *
+ * State of a CPU cluster.
+ */
+struct CPUClusterState {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    uint32_t cluster_id;
+};
+
+#endif
diff --git a/include/hw/cpu/core.h b/include/hw/cpu/core.h
new file mode 100644 (file)
index 0000000..98ab916
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * CPU core abstract device
+ *
+ * Copyright (C) 2016 Bharata B Rao <bharata@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef HW_CPU_CORE_H
+#define HW_CPU_CORE_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+#define TYPE_CPU_CORE "cpu-core"
+
+OBJECT_DECLARE_SIMPLE_TYPE(CPUCore, CPU_CORE)
+
+struct CPUCore {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    int core_id;
+    int nr_threads;
+};
+
+/* Note: topology field names need to be kept in sync with
+ * 'CpuInstanceProperties' */
+
+#define CPU_CORE_PROP_CORE_ID "core-id"
+
+#endif
diff --git a/include/hw/cris/etraxfs.h b/include/hw/cris/etraxfs.h
new file mode 100644 (file)
index 0000000..467b529
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * QEMU ETRAX System Emulator
+ *
+ * Copyright (c) 2008 Edgar E. Iglesias, Axis Communications AB.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_ETRAXFS_H
+#define HW_ETRAXFS_H
+
+#include "net/net.h"
+#include "hw/cris/etraxfs_dma.h"
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "qapi/error.h"
+
+DeviceState *etraxfs_eth_init(NICInfo *nd, hwaddr base, int phyaddr,
+                              struct etraxfs_dma_client *dma_out,
+                              struct etraxfs_dma_client *dma_in);
+
+static inline DeviceState *etraxfs_ser_create(hwaddr addr,
+                                              qemu_irq irq,
+                                              Chardev *chr)
+{
+    DeviceState *dev;
+    SysBusDevice *s;
+
+    dev = qdev_new("etraxfs-serial");
+    s = SYS_BUS_DEVICE(dev);
+    qdev_prop_set_chr(dev, "chardev", chr);
+    sysbus_realize_and_unref(s, &error_fatal);
+    sysbus_mmio_map(s, 0, addr);
+    sysbus_connect_irq(s, 0, irq);
+    return dev;
+}
+
+#endif
diff --git a/include/hw/cris/etraxfs_dma.h b/include/hw/cris/etraxfs_dma.h
new file mode 100644 (file)
index 0000000..095d76b
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef HW_ETRAXFS_DMA_H
+#define HW_ETRAXFS_DMA_H
+
+#include "exec/hwaddr.h"
+
+struct dma_context_metadata {
+       /* data descriptor md */
+       uint16_t metadata;
+};
+
+struct etraxfs_dma_client
+{
+       /* DMA controller. */
+       int channel;
+       void *ctrl;
+
+       /* client.  */
+       struct {
+               int (*push)(void *opaque, unsigned char *buf,
+                           int len, bool eop);
+               void (*pull)(void *opaque);
+               void (*metadata_push)(void *opaque,
+                                     const struct dma_context_metadata *md);
+               void *opaque;
+       } client;
+};
+
+void *etraxfs_dmac_init(hwaddr base, int nr_channels);
+void etraxfs_dmac_connect(void *opaque, int channel, qemu_irq *line,
+                         int input);
+void etraxfs_dmac_connect_client(void *opaque, int c, 
+                                struct etraxfs_dma_client *cl);
+int etraxfs_dmac_input(struct etraxfs_dma_client *client, 
+                      void *buf, int len, int eop);
+
+#endif
diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
new file mode 100644 (file)
index 0000000..75e47b6
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ * QEMU CXL Support
+ *
+ * Copyright (c) 2020 Intel
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_H
+#define CXL_H
+
+
+#include "qapi/qapi-types-machine.h"
+#include "qapi/qapi-visit-machine.h"
+#include "hw/pci/pci_host.h"
+#include "cxl_pci.h"
+#include "cxl_component.h"
+#include "cxl_device.h"
+
+#define CXL_CACHE_LINE_SIZE 64
+#define CXL_COMPONENT_REG_BAR_IDX 0
+#define CXL_DEVICE_REG_BAR_IDX 2
+
+#define CXL_WINDOW_MAX 10
+
+typedef struct PXBCXLDev PXBCXLDev;
+
+typedef struct CXLFixedWindow {
+    uint64_t size;
+    char **targets;
+    PXBCXLDev *target_hbs[16];
+    uint8_t num_targets;
+    uint8_t enc_int_ways;
+    uint8_t enc_int_gran;
+    /* Todo: XOR based interleaving */
+    MemoryRegion mr;
+    hwaddr base;
+} CXLFixedWindow;
+
+typedef struct CXLState {
+    bool is_enabled;
+    MemoryRegion host_mr;
+    unsigned int next_mr_idx;
+    GList *fixed_windows;
+    CXLFixedMemoryWindowOptionsList *cfmw_list;
+} CXLState;
+
+struct CXLHost {
+    PCIHostState parent_obj;
+
+    CXLComponentState cxl_cstate;
+    bool passthrough;
+};
+
+#define TYPE_PXB_CXL_HOST "pxb-cxl-host"
+OBJECT_DECLARE_SIMPLE_TYPE(CXLHost, PXB_CXL_HOST)
+
+#define TYPE_CXL_USP "cxl-upstream"
+
+typedef struct CXLUpstreamPort CXLUpstreamPort;
+DECLARE_INSTANCE_CHECKER(CXLUpstreamPort, CXL_USP, TYPE_CXL_USP)
+CXLComponentState *cxl_usp_to_cstate(CXLUpstreamPort *usp);
+
+#define TYPE_CXL_DSP "cxl-downstream"
+
+typedef struct CXLDownstreamPort CXLDownstreamPort;
+DECLARE_INSTANCE_CHECKER(CXLDownstreamPort, CXL_DSP, TYPE_CXL_DSP)
+
+#endif
diff --git a/include/hw/cxl/cxl_cdat.h b/include/hw/cxl/cxl_cdat.h
new file mode 100644 (file)
index 0000000..7f67638
--- /dev/null
@@ -0,0 +1,167 @@
+/*
+ * CXL CDAT Structure
+ *
+ * Copyright (C) 2021 Avery Design Systems, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_CDAT_H
+#define CXL_CDAT_H
+
+#include "hw/cxl/cxl_pci.h"
+#include "hw/pci/pcie_doe.h"
+
+/*
+ * Reference:
+ *   Coherent Device Attribute Table (CDAT) Specification, Rev. 1.03, July. 2022
+ *   Compute Express Link (CXL) Specification, Rev. 3.0, Aug. 2022
+ */
+/* Table Access DOE - CXL r3.0 8.1.11 */
+#define CXL_DOE_TABLE_ACCESS      2
+#define CXL_DOE_PROTOCOL_CDAT     ((CXL_DOE_TABLE_ACCESS << 16) | CXL_VENDOR_ID)
+
+/* Read Entry - CXL r3.0 8.1.11.1 */
+#define CXL_DOE_TAB_TYPE_CDAT 0
+#define CXL_DOE_TAB_ENT_MAX 0xFFFF
+
+/* Read Entry Request - CXL r3.0 8.1.11.1 Table 8-13 */
+#define CXL_DOE_TAB_REQ 0
+typedef struct CDATReq {
+    DOEHeader header;
+    uint8_t req_code;
+    uint8_t table_type;
+    uint16_t entry_handle;
+} QEMU_PACKED CDATReq;
+
+/* Read Entry Response - CXL r3.0 8.1.11.1 Table 8-14 */
+#define CXL_DOE_TAB_RSP 0
+typedef struct CDATRsp {
+    DOEHeader header;
+    uint8_t rsp_code;
+    uint8_t table_type;
+    uint16_t entry_handle;
+} QEMU_PACKED CDATRsp;
+
+/* CDAT Table Format - CDAT Table 1 */
+#define CXL_CDAT_REV 2
+typedef struct CDATTableHeader {
+    uint32_t length;
+    uint8_t revision;
+    uint8_t checksum;
+    uint8_t reserved[6];
+    uint32_t sequence;
+} QEMU_PACKED CDATTableHeader;
+
+/* CDAT Structure Types - CDAT Table 2 */
+typedef enum {
+    CDAT_TYPE_DSMAS = 0,
+    CDAT_TYPE_DSLBIS = 1,
+    CDAT_TYPE_DSMSCIS = 2,
+    CDAT_TYPE_DSIS = 3,
+    CDAT_TYPE_DSEMTS = 4,
+    CDAT_TYPE_SSLBIS = 5,
+} CDATType;
+
+typedef struct CDATSubHeader {
+    uint8_t type;
+    uint8_t reserved;
+    uint16_t length;
+} CDATSubHeader;
+
+/* Device Scoped Memory Affinity Structure - CDAT Table 3 */
+typedef struct CDATDsmas {
+    CDATSubHeader header;
+    uint8_t DSMADhandle;
+    uint8_t flags;
+#define CDAT_DSMAS_FLAG_NV              (1 << 2)
+#define CDAT_DSMAS_FLAG_SHAREABLE       (1 << 3)
+#define CDAT_DSMAS_FLAG_HW_COHERENT     (1 << 4)
+#define CDAT_DSMAS_FLAG_DYNAMIC_CAP     (1 << 5)
+    uint16_t reserved;
+    uint64_t DPA_base;
+    uint64_t DPA_length;
+} QEMU_PACKED CDATDsmas;
+
+/* Device Scoped Latency and Bandwidth Information Structure - CDAT Table 5 */
+typedef struct CDATDslbis {
+    CDATSubHeader header;
+    uint8_t handle;
+    /* Definitions of these fields refer directly to HMAT fields */
+    uint8_t flags;
+    uint8_t data_type;
+    uint8_t reserved;
+    uint64_t entry_base_unit;
+    uint16_t entry[3];
+    uint16_t reserved2;
+} QEMU_PACKED CDATDslbis;
+
+/* Device Scoped Memory Side Cache Information Structure - CDAT Table 6 */
+typedef struct CDATDsmscis {
+    CDATSubHeader header;
+    uint8_t DSMAS_handle;
+    uint8_t reserved[3];
+    uint64_t memory_side_cache_size;
+    uint32_t cache_attributes;
+} QEMU_PACKED CDATDsmscis;
+
+/* Device Scoped Initiator Structure - CDAT Table 7 */
+typedef struct CDATDsis {
+    CDATSubHeader header;
+    uint8_t flags;
+    uint8_t handle;
+    uint16_t reserved;
+} QEMU_PACKED CDATDsis;
+
+/* Device Scoped EFI Memory Type Structure - CDAT Table 8 */
+typedef struct CDATDsemts {
+    CDATSubHeader header;
+    uint8_t DSMAS_handle;
+    uint8_t EFI_memory_type_attr;
+    uint16_t reserved;
+    uint64_t DPA_offset;
+    uint64_t DPA_length;
+} QEMU_PACKED CDATDsemts;
+
+/* Switch Scoped Latency and Bandwidth Information Structure - CDAT Table 9 */
+typedef struct CDATSslbisHeader {
+    CDATSubHeader header;
+    uint8_t data_type;
+    uint8_t reserved[3];
+    uint64_t entry_base_unit;
+} QEMU_PACKED CDATSslbisHeader;
+
+#define CDAT_PORT_ID_USP 0x100
+/* Switch Scoped Latency and Bandwidth Entry - CDAT Table 10 */
+typedef struct CDATSslbe {
+    uint16_t port_x_id;
+    uint16_t port_y_id;
+    uint16_t latency_bandwidth;
+    uint16_t reserved;
+} QEMU_PACKED CDATSslbe;
+
+typedef struct CDATSslbis {
+    CDATSslbisHeader sslbis_header;
+    CDATSslbe sslbe[];
+} QEMU_PACKED CDATSslbis;
+
+typedef struct CDATEntry {
+    void *base;
+    uint32_t length;
+} CDATEntry;
+
+typedef struct CDATObject {
+    CDATEntry *entry;
+    int entry_len;
+
+    int (*build_cdat_table)(CDATSubHeader ***cdat_table, void *priv);
+    void (*free_cdat_table)(CDATSubHeader **cdat_table, int num, void *priv);
+    bool to_update;
+    void *private;
+    char *filename;
+    uint8_t *buf;
+    struct CDATSubHeader **built_buf;
+    int built_buf_len;
+} CDATObject;
+#endif /* CXL_CDAT_H */
diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
new file mode 100644 (file)
index 0000000..5227a8e
--- /dev/null
@@ -0,0 +1,256 @@
+/*
+ * QEMU CXL Component
+ *
+ * Copyright (c) 2020 Intel
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_COMPONENT_H
+#define CXL_COMPONENT_H
+
+/* CXL 2.0 - 8.2.4 */
+#define CXL2_COMPONENT_IO_REGION_SIZE 0x1000
+#define CXL2_COMPONENT_CM_REGION_SIZE 0x1000
+#define CXL2_COMPONENT_BLOCK_SIZE 0x10000
+
+#include "qemu/range.h"
+#include "hw/cxl/cxl_cdat.h"
+#include "hw/register.h"
+#include "qapi/error.h"
+
+enum reg_type {
+    CXL2_DEVICE,
+    CXL2_TYPE3_DEVICE,
+    CXL2_LOGICAL_DEVICE,
+    CXL2_ROOT_PORT,
+    CXL2_UPSTREAM_PORT,
+    CXL2_DOWNSTREAM_PORT,
+    CXL3_SWITCH_MAILBOX_CCI,
+};
+
+/*
+ * Capability registers are defined at the top of the CXL.cache/mem region and
+ * are packed. For our purposes we will always define the caps in the same
+ * order.
+ * CXL 2.0 - 8.2.5 Table 142 for details.
+ */
+
+/* CXL 2.0 - 8.2.5.1 */
+REG32(CXL_CAPABILITY_HEADER, 0)
+    FIELD(CXL_CAPABILITY_HEADER, ID, 0, 16)
+    FIELD(CXL_CAPABILITY_HEADER, VERSION, 16, 4)
+    FIELD(CXL_CAPABILITY_HEADER, CACHE_MEM_VERSION, 20, 4)
+    FIELD(CXL_CAPABILITY_HEADER, ARRAY_SIZE, 24, 8)
+
+#define CXLx_CAPABILITY_HEADER(type, offset)                  \
+    REG32(CXL_##type##_CAPABILITY_HEADER, offset)             \
+        FIELD(CXL_##type##_CAPABILITY_HEADER, ID, 0, 16)      \
+        FIELD(CXL_##type##_CAPABILITY_HEADER, VERSION, 16, 4) \
+        FIELD(CXL_##type##_CAPABILITY_HEADER, PTR, 20, 12)
+CXLx_CAPABILITY_HEADER(RAS, 0x4)
+CXLx_CAPABILITY_HEADER(LINK, 0x8)
+CXLx_CAPABILITY_HEADER(HDM, 0xc)
+CXLx_CAPABILITY_HEADER(EXTSEC, 0x10)
+CXLx_CAPABILITY_HEADER(SNOOP, 0x14)
+
+/*
+ * Capability structures contain the actual registers that the CXL component
+ * implements. Some of these are specific to certain types of components, but
+ * this implementation leaves enough space regardless.
+ */
+/* 8.2.5.9 - CXL RAS Capability Structure */
+
+/* Give ample space for caps before this */
+#define CXL_RAS_REGISTERS_OFFSET 0x80
+#define CXL_RAS_REGISTERS_SIZE   0x58
+REG32(CXL_RAS_UNC_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET)
+#define CXL_RAS_UNC_ERR_CACHE_DATA_PARITY 0
+#define CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY 1
+#define CXL_RAS_UNC_ERR_CACHE_BE_PARITY 2
+#define CXL_RAS_UNC_ERR_CACHE_DATA_ECC 3
+#define CXL_RAS_UNC_ERR_MEM_DATA_PARITY 4
+#define CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY 5
+#define CXL_RAS_UNC_ERR_MEM_BE_PARITY 6
+#define CXL_RAS_UNC_ERR_MEM_DATA_ECC 7
+#define CXL_RAS_UNC_ERR_REINIT_THRESHOLD 8
+#define CXL_RAS_UNC_ERR_RSVD_ENCODING 9
+#define CXL_RAS_UNC_ERR_POISON_RECEIVED 10
+#define CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW 11
+#define CXL_RAS_UNC_ERR_INTERNAL 14
+#define CXL_RAS_UNC_ERR_CXL_IDE_TX 15
+#define CXL_RAS_UNC_ERR_CXL_IDE_RX 16
+#define CXL_RAS_UNC_ERR_CXL_UNUSED 63 /* Magic value */
+REG32(CXL_RAS_UNC_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x4)
+REG32(CXL_RAS_UNC_ERR_SEVERITY, CXL_RAS_REGISTERS_OFFSET + 0x8)
+REG32(CXL_RAS_COR_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET + 0xc)
+#define CXL_RAS_COR_ERR_CACHE_DATA_ECC 0
+#define CXL_RAS_COR_ERR_MEM_DATA_ECC 1
+#define CXL_RAS_COR_ERR_CRC_THRESHOLD 2
+#define CXL_RAS_COR_ERR_RETRY_THRESHOLD 3
+#define CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED 4
+#define CXL_RAS_COR_ERR_MEM_POISON_RECEIVED 5
+#define CXL_RAS_COR_ERR_PHYSICAL 6
+REG32(CXL_RAS_COR_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x10)
+REG32(CXL_RAS_ERR_CAP_CTRL, CXL_RAS_REGISTERS_OFFSET + 0x14)
+    FIELD(CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER, 0, 6)
+REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18)
+#define CXL_RAS_ERR_HEADER_NUM 32
+/* Offset 0x18 - 0x58 reserved for RAS logs */
+
+/* 8.2.5.10 - CXL Security Capability Structure */
+#define CXL_SEC_REGISTERS_OFFSET \
+    (CXL_RAS_REGISTERS_OFFSET + CXL_RAS_REGISTERS_SIZE)
+#define CXL_SEC_REGISTERS_SIZE   0 /* We don't implement 1.1 downstream ports */
+
+/* 8.2.5.11 - CXL Link Capability Structure */
+#define CXL_LINK_REGISTERS_OFFSET \
+    (CXL_SEC_REGISTERS_OFFSET + CXL_SEC_REGISTERS_SIZE)
+#define CXL_LINK_REGISTERS_SIZE   0x38
+
+/* 8.2.5.12 - CXL HDM Decoder Capability Structure */
+#define HDM_DECODE_MAX 10 /* 8.2.5.12.1 */
+#define CXL_HDM_REGISTERS_OFFSET \
+    (CXL_LINK_REGISTERS_OFFSET + CXL_LINK_REGISTERS_SIZE)
+#define CXL_HDM_REGISTERS_SIZE (0x10 + 0x20 * HDM_DECODE_MAX)
+#define HDM_DECODER_INIT(n)                                                    \
+  REG32(CXL_HDM_DECODER##n##_BASE_LO,                                          \
+        CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x10)                          \
+            FIELD(CXL_HDM_DECODER##n##_BASE_LO, L, 28, 4)                      \
+  REG32(CXL_HDM_DECODER##n##_BASE_HI,                                          \
+        CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x14)                          \
+  REG32(CXL_HDM_DECODER##n##_SIZE_LO,                                          \
+        CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x18)                          \
+  REG32(CXL_HDM_DECODER##n##_SIZE_HI,                                          \
+        CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x1C)                          \
+  REG32(CXL_HDM_DECODER##n##_CTRL,                                             \
+        CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x20)                          \
+            FIELD(CXL_HDM_DECODER##n##_CTRL, IG, 0, 4)                         \
+            FIELD(CXL_HDM_DECODER##n##_CTRL, IW, 4, 4)                         \
+            FIELD(CXL_HDM_DECODER##n##_CTRL, LOCK_ON_COMMIT, 8, 1)             \
+            FIELD(CXL_HDM_DECODER##n##_CTRL, COMMIT, 9, 1)                     \
+            FIELD(CXL_HDM_DECODER##n##_CTRL, COMMITTED, 10, 1)                 \
+            FIELD(CXL_HDM_DECODER##n##_CTRL, ERR, 11, 1)                       \
+            FIELD(CXL_HDM_DECODER##n##_CTRL, TYPE, 12, 1)                      \
+  REG32(CXL_HDM_DECODER##n##_TARGET_LIST_LO,                                   \
+        CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x24)                          \
+  REG32(CXL_HDM_DECODER##n##_TARGET_LIST_HI,                                   \
+        CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x28)                          \
+  REG32(CXL_HDM_DECODER##n##_DPA_SKIP_LO,                                      \
+        CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x24)                          \
+  REG32(CXL_HDM_DECODER##n##_DPA_SKIP_HI,                                      \
+        CXL_HDM_REGISTERS_OFFSET + (0x20 * n) + 0x28)
+
+REG32(CXL_HDM_DECODER_CAPABILITY, CXL_HDM_REGISTERS_OFFSET)
+    FIELD(CXL_HDM_DECODER_CAPABILITY, DECODER_COUNT, 0, 4)
+    FIELD(CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 4, 4)
+    FIELD(CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_256B, 8, 1)
+    FIELD(CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 9, 1)
+    FIELD(CXL_HDM_DECODER_CAPABILITY, POISON_ON_ERR_CAP, 10, 1)
+REG32(CXL_HDM_DECODER_GLOBAL_CONTROL, CXL_HDM_REGISTERS_OFFSET + 4)
+    FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, POISON_ON_ERR_EN, 0, 1)
+    FIELD(CXL_HDM_DECODER_GLOBAL_CONTROL, HDM_DECODER_ENABLE, 1, 1)
+
+/* Support 4 decoders at all levels of topology */
+#define CXL_HDM_DECODER_COUNT 4
+
+HDM_DECODER_INIT(0);
+HDM_DECODER_INIT(1);
+HDM_DECODER_INIT(2);
+HDM_DECODER_INIT(3);
+
+/* 8.2.5.13 - CXL Extended Security Capability Structure (Root complex only) */
+#define EXTSEC_ENTRY_MAX        256
+#define CXL_EXTSEC_REGISTERS_OFFSET \
+    (CXL_HDM_REGISTERS_OFFSET + CXL_HDM_REGISTERS_SIZE)
+#define CXL_EXTSEC_REGISTERS_SIZE   (8 * EXTSEC_ENTRY_MAX + 4)
+
+/* 8.2.5.14 - CXL IDE Capability Structure */
+#define CXL_IDE_REGISTERS_OFFSET \
+    (CXL_EXTSEC_REGISTERS_OFFSET + CXL_EXTSEC_REGISTERS_SIZE)
+#define CXL_IDE_REGISTERS_SIZE   0x20
+
+/* 8.2.5.15 - CXL Snoop Filter Capability Structure */
+#define CXL_SNOOP_REGISTERS_OFFSET \
+    (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE)
+#define CXL_SNOOP_REGISTERS_SIZE   0x8
+
+QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET +
+                    CXL_SNOOP_REGISTERS_SIZE) >= 0x1000,
+                   "No space for registers");
+
+typedef struct component_registers {
+    /*
+     * Main memory region to be registered with QEMU core.
+     */
+    MemoryRegion component_registers;
+
+    /*
+     * 8.2.4 Table 141:
+     *   0x0000 - 0x0fff CXL.io registers
+     *   0x1000 - 0x1fff CXL.cache and CXL.mem
+     *   0x2000 - 0xdfff Implementation specific
+     *   0xe000 - 0xe3ff CXL ARB/MUX registers
+     *   0xe400 - 0xffff RSVD
+     */
+    uint32_t io_registers[CXL2_COMPONENT_IO_REGION_SIZE >> 2];
+    MemoryRegion io;
+
+    uint32_t cache_mem_registers[CXL2_COMPONENT_CM_REGION_SIZE >> 2];
+    uint32_t cache_mem_regs_write_mask[CXL2_COMPONENT_CM_REGION_SIZE >> 2];
+    MemoryRegion cache_mem;
+
+    MemoryRegion impl_specific;
+    MemoryRegion arb_mux;
+    MemoryRegion rsvd;
+
+    /* special_ops is used for any component that needs any specific handling */
+    MemoryRegionOps *special_ops;
+} ComponentRegisters;
+
+/*
+ * A CXL component represents all entities in a CXL hierarchy. This includes,
+ * host bridges, root ports, upstream/downstream switch ports, and devices
+ */
+typedef struct cxl_component {
+    ComponentRegisters crb;
+    union {
+        struct {
+            Range dvsecs[CXL20_MAX_DVSEC];
+            uint16_t dvsec_offset;
+            struct PCIDevice *pdev;
+        };
+    };
+
+    CDATObject cdat;
+} CXLComponentState;
+
+void cxl_component_register_block_init(Object *obj,
+                                       CXLComponentState *cxl_cstate,
+                                       const char *type);
+void cxl_component_register_init_common(uint32_t *reg_state,
+                                        uint32_t *write_msk,
+                                        enum reg_type type);
+
+void cxl_component_create_dvsec(CXLComponentState *cxl_cstate,
+                                enum reg_type cxl_dev_type, uint16_t length,
+                                uint16_t type, uint8_t rev, uint8_t *body);
+
+int cxl_decoder_count_enc(int count);
+int cxl_decoder_count_dec(int enc_cnt);
+
+uint8_t cxl_interleave_ways_enc(int iw, Error **errp);
+int cxl_interleave_ways_dec(uint8_t iw_enc, Error **errp);
+uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp);
+
+hwaddr cxl_decode_ig(int ig);
+
+CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb);
+bool cxl_get_hb_passthrough(PCIHostState *hb);
+
+void cxl_doe_cdat_init(CXLComponentState *cxl_cstate, Error **errp);
+void cxl_doe_cdat_release(CXLComponentState *cxl_cstate);
+void cxl_doe_cdat_update(CXLComponentState *cxl_cstate, Error **errp);
+
+#endif
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
new file mode 100644 (file)
index 0000000..31d2afc
--- /dev/null
@@ -0,0 +1,477 @@
+/*
+ * QEMU CXL Devices
+ *
+ * Copyright (c) 2020 Intel
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_DEVICE_H
+#define CXL_DEVICE_H
+
+#include "hw/cxl/cxl_component.h"
+#include "hw/pci/pci_device.h"
+#include "hw/register.h"
+#include "hw/cxl/cxl_events.h"
+
+/*
+ * The following is how a CXL device's Memory Device registers are laid out.
+ * The only requirement from the spec is that the capabilities array and the
+ * capability headers start at offset 0 and are contiguously packed. The headers
+ * themselves provide offsets to the register fields. For this emulation, the
+ * actual registers  * will start at offset 0x80 (m == 0x80). No secondary
+ * mailbox is implemented which means that the offset of the start of the
+ * mailbox payload (n) is given by
+ * n = m + sizeof(mailbox registers) + sizeof(device registers).
+ *
+ *                       +---------------------------------+
+ *                       |                                 |
+ *                       |    Memory Device Registers      |
+ *                       |                                 |
+ * n + PAYLOAD_SIZE_MAX  -----------------------------------
+ *                  ^    |                                 |
+ *                  |    |                                 |
+ *                  |    |                                 |
+ *                  |    |                                 |
+ *                  |    |                                 |
+ *                  |    |         Mailbox Payload         |
+ *                  |    |                                 |
+ *                  |    |                                 |
+ *                  |    |                                 |
+ *                  n    -----------------------------------
+ *                  ^    |       Mailbox Registers         |
+ *                  |    |                                 |
+ *                  |    -----------------------------------
+ *                  |    |                                 |
+ *                  |    |        Device Registers         |
+ *                  |    |                                 |
+ *                  m    ---------------------------------->
+ *                  ^    |  Memory Device Capability Header|
+ *                  |    -----------------------------------
+ *                  |    |     Mailbox Capability Header   |
+ *                  |    -----------------------------------
+ *                  |    |     Device Capability Header    |
+ *                  |    -----------------------------------
+ *                  |    |     Device Cap Array Register   |
+ *                  0    +---------------------------------+
+ *
+ */
+
+#define CXL_DEVICE_CAP_HDR1_OFFSET 0x10 /* Figure 138 */
+#define CXL_DEVICE_CAP_REG_SIZE 0x10 /* 8.2.8.2 */
+#define CXL_DEVICE_CAPS_MAX 4 /* 8.2.8.2.1 + 8.2.8.5 */
+#define CXL_CAPS_SIZE \
+    (CXL_DEVICE_CAP_REG_SIZE * (CXL_DEVICE_CAPS_MAX + 1)) /* +1 for header */
+
+#define CXL_DEVICE_STATUS_REGISTERS_OFFSET 0x80 /* Read comment above */
+#define CXL_DEVICE_STATUS_REGISTERS_LENGTH 0x8 /* 8.2.8.3.1 */
+
+#define CXL_MAILBOX_REGISTERS_OFFSET \
+    (CXL_DEVICE_STATUS_REGISTERS_OFFSET + CXL_DEVICE_STATUS_REGISTERS_LENGTH)
+#define CXL_MAILBOX_REGISTERS_SIZE 0x20 /* 8.2.8.4, Figure 139 */
+#define CXL_MAILBOX_PAYLOAD_SHIFT 11
+#define CXL_MAILBOX_MAX_PAYLOAD_SIZE (1 << CXL_MAILBOX_PAYLOAD_SHIFT)
+#define CXL_MAILBOX_REGISTERS_LENGTH \
+    (CXL_MAILBOX_REGISTERS_SIZE + CXL_MAILBOX_MAX_PAYLOAD_SIZE)
+
+#define CXL_MEMORY_DEVICE_REGISTERS_OFFSET \
+    (CXL_MAILBOX_REGISTERS_OFFSET + CXL_MAILBOX_REGISTERS_LENGTH)
+#define CXL_MEMORY_DEVICE_REGISTERS_LENGTH 0x8
+
+#define CXL_MMIO_SIZE                                                   \
+    (CXL_DEVICE_CAP_REG_SIZE + CXL_DEVICE_STATUS_REGISTERS_LENGTH +     \
+     CXL_MAILBOX_REGISTERS_LENGTH + CXL_MEMORY_DEVICE_REGISTERS_LENGTH)
+
+/* 8.2.8.4.5.1 Command Return Codes */
+typedef enum {
+    CXL_MBOX_SUCCESS = 0x0,
+    CXL_MBOX_BG_STARTED = 0x1,
+    CXL_MBOX_INVALID_INPUT = 0x2,
+    CXL_MBOX_UNSUPPORTED = 0x3,
+    CXL_MBOX_INTERNAL_ERROR = 0x4,
+    CXL_MBOX_RETRY_REQUIRED = 0x5,
+    CXL_MBOX_BUSY = 0x6,
+    CXL_MBOX_MEDIA_DISABLED = 0x7,
+    CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8,
+    CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9,
+    CXL_MBOX_FW_AUTH_FAILED = 0xa,
+    CXL_MBOX_FW_INVALID_SLOT = 0xb,
+    CXL_MBOX_FW_ROLLEDBACK = 0xc,
+    CXL_MBOX_FW_REST_REQD = 0xd,
+    CXL_MBOX_INVALID_HANDLE = 0xe,
+    CXL_MBOX_INVALID_PA = 0xf,
+    CXL_MBOX_INJECT_POISON_LIMIT = 0x10,
+    CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11,
+    CXL_MBOX_ABORTED = 0x12,
+    CXL_MBOX_INVALID_SECURITY_STATE = 0x13,
+    CXL_MBOX_INCORRECT_PASSPHRASE = 0x14,
+    CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15,
+    CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16,
+    CXL_MBOX_MAX = 0x17
+} CXLRetCode;
+
+typedef struct CXLCCI CXLCCI;
+typedef struct cxl_device_state CXLDeviceState;
+struct cxl_cmd;
+typedef CXLRetCode (*opcode_handler)(const struct cxl_cmd *cmd,
+                                     uint8_t *payload_in, size_t len_in,
+                                     uint8_t *payload_out, size_t *len_out,
+                                     CXLCCI *cci);
+struct cxl_cmd {
+    const char *name;
+    opcode_handler handler;
+    ssize_t in;
+    uint16_t effect; /* Reported in CEL */
+};
+
+typedef struct CXLEvent {
+    CXLEventRecordRaw data;
+    QSIMPLEQ_ENTRY(CXLEvent) node;
+} CXLEvent;
+
+typedef struct CXLEventLog {
+    uint16_t next_handle;
+    uint16_t overflow_err_count;
+    uint64_t first_overflow_timestamp;
+    uint64_t last_overflow_timestamp;
+    bool irq_enabled;
+    int irq_vec;
+    QemuMutex lock;
+    QSIMPLEQ_HEAD(, CXLEvent) events;
+} CXLEventLog;
+
+typedef struct CXLCCI {
+    const struct cxl_cmd (*cxl_cmd_set)[256];
+    struct cel_log {
+        uint16_t opcode;
+        uint16_t effect;
+    } cel_log[1 << 16];
+    size_t cel_size;
+
+    /* background command handling (times in ms) */
+    struct {
+        uint16_t opcode;
+        uint16_t complete_pct;
+        uint16_t ret_code; /* Current value of retcode */
+        uint64_t starttime;
+        /* set by each bg cmd, cleared by the bg_timer when complete */
+        uint64_t runtime;
+        QEMUTimer *timer;
+    } bg;
+    size_t payload_max;
+    /* Pointer to device hosting the CCI */
+    DeviceState *d;
+    /* Pointer to the device hosting the protocol conversion */
+    DeviceState *intf;
+} CXLCCI;
+
+typedef struct cxl_device_state {
+    MemoryRegion device_registers;
+
+    /* mmio for device capabilities array - 8.2.8.2 */
+    struct {
+        MemoryRegion device;
+        union {
+            uint8_t dev_reg_state[CXL_DEVICE_STATUS_REGISTERS_LENGTH];
+            uint16_t dev_reg_state16[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 2];
+            uint32_t dev_reg_state32[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 4];
+            uint64_t dev_reg_state64[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 8];
+        };
+        uint64_t event_status;
+    };
+    MemoryRegion memory_device;
+    struct {
+        MemoryRegion caps;
+        union {
+            uint32_t caps_reg_state32[CXL_CAPS_SIZE / 4];
+            uint64_t caps_reg_state64[CXL_CAPS_SIZE / 8];
+        };
+    };
+
+    /* mmio for the mailbox registers 8.2.8.4 */
+    struct {
+        MemoryRegion mailbox;
+        uint16_t payload_size;
+        uint8_t mbox_msi_n;
+        union {
+            uint8_t mbox_reg_state[CXL_MAILBOX_REGISTERS_LENGTH];
+            uint16_t mbox_reg_state16[CXL_MAILBOX_REGISTERS_LENGTH / 2];
+            uint32_t mbox_reg_state32[CXL_MAILBOX_REGISTERS_LENGTH / 4];
+            uint64_t mbox_reg_state64[CXL_MAILBOX_REGISTERS_LENGTH / 8];
+        };
+    };
+
+    /* Stash the memory device status value */
+    uint64_t memdev_status;
+
+    struct {
+        bool set;
+        uint64_t last_set;
+        uint64_t host_set;
+    } timestamp;
+
+    /* memory region size, HDM */
+    uint64_t mem_size;
+    uint64_t pmem_size;
+    uint64_t vmem_size;
+
+    const struct cxl_cmd (*cxl_cmd_set)[256];
+    CXLEventLog event_logs[CXL_EVENT_TYPE_MAX];
+} CXLDeviceState;
+
+/* Initialize the register block for a device */
+void cxl_device_register_block_init(Object *obj, CXLDeviceState *dev,
+                                    CXLCCI *cci);
+
+typedef struct CXLType3Dev CXLType3Dev;
+typedef struct CSWMBCCIDev CSWMBCCIDev;
+/* Set up default values for the register block */
+void cxl_device_register_init_t3(CXLType3Dev *ct3d);
+void cxl_device_register_init_swcci(CSWMBCCIDev *sw);
+
+/*
+ * CXL 2.0 - 8.2.8.1 including errata F4
+ * Documented as a 128 bit register, but 64 bit accesses and the second
+ * 64 bits are currently reserved.
+ */
+REG64(CXL_DEV_CAP_ARRAY, 0)
+    FIELD(CXL_DEV_CAP_ARRAY, CAP_ID, 0, 16)
+    FIELD(CXL_DEV_CAP_ARRAY, CAP_VERSION, 16, 8)
+    FIELD(CXL_DEV_CAP_ARRAY, CAP_COUNT, 32, 16)
+
+void cxl_event_set_status(CXLDeviceState *cxl_dstate, CXLEventLogType log_type,
+                          bool available);
+
+/*
+ * Helper macro to initialize capability headers for CXL devices.
+ *
+ * In the 8.2.8.2, this is listed as a 128b register, but in 8.2.8, it says:
+ * > No registers defined in Section 8.2.8 are larger than 64-bits wide so that
+ * > is the maximum access size allowed for these registers. If this rule is not
+ * > followed, the behavior is undefined
+ *
+ * CXL 2.0 Errata F4 states further that the layouts in the specification are
+ * shown as greater than 128 bits, but implementations are expected to
+ * use any size of access up to 64 bits.
+ *
+ * Here we've chosen to make it 4 dwords. The spec allows any pow2 multiple
+ * access to be used for a register up to 64 bits.
+ */
+#define CXL_DEVICE_CAPABILITY_HEADER_REGISTER(n, offset)  \
+    REG32(CXL_DEV_##n##_CAP_HDR0, offset)                 \
+        FIELD(CXL_DEV_##n##_CAP_HDR0, CAP_ID, 0, 16)      \
+        FIELD(CXL_DEV_##n##_CAP_HDR0, CAP_VERSION, 16, 8) \
+    REG32(CXL_DEV_##n##_CAP_HDR1, offset + 4)             \
+        FIELD(CXL_DEV_##n##_CAP_HDR1, CAP_OFFSET, 0, 32)  \
+    REG32(CXL_DEV_##n##_CAP_HDR2, offset + 8)             \
+        FIELD(CXL_DEV_##n##_CAP_HDR2, CAP_LENGTH, 0, 32)
+
+CXL_DEVICE_CAPABILITY_HEADER_REGISTER(DEVICE_STATUS, CXL_DEVICE_CAP_HDR1_OFFSET)
+CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MAILBOX, CXL_DEVICE_CAP_HDR1_OFFSET + \
+                                               CXL_DEVICE_CAP_REG_SIZE)
+CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MEMORY_DEVICE,
+                                      CXL_DEVICE_CAP_HDR1_OFFSET +
+                                          CXL_DEVICE_CAP_REG_SIZE * 2)
+
+void cxl_initialize_mailbox_t3(CXLCCI *cci, DeviceState *d, size_t payload_max);
+void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf,
+                                  DeviceState *d, size_t payload_max);
+void cxl_init_cci(CXLCCI *cci, size_t payload_max);
+int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd,
+                            size_t len_in, uint8_t *pl_in,
+                            size_t *len_out, uint8_t *pl_out,
+                            bool *bg_started);
+void cxl_initialize_t3_fm_owned_ld_mctpcci(CXLCCI *cci, DeviceState *d,
+                                           DeviceState *intf,
+                                           size_t payload_max);
+
+void cxl_initialize_t3_ld_cci(CXLCCI *cci, DeviceState *d,
+                              DeviceState *intf, size_t payload_max);
+
+#define cxl_device_cap_init(dstate, reg, cap_id, ver)                      \
+    do {                                                                   \
+        uint32_t *cap_hdrs = dstate->caps_reg_state32;                     \
+        int which = R_CXL_DEV_##reg##_CAP_HDR0;                            \
+        cap_hdrs[which] =                                                  \
+            FIELD_DP32(cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0,          \
+                       CAP_ID, cap_id);                                    \
+        cap_hdrs[which] = FIELD_DP32(                                      \
+            cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, ver);  \
+        cap_hdrs[which + 1] =                                              \
+            FIELD_DP32(cap_hdrs[which + 1], CXL_DEV_##reg##_CAP_HDR1,      \
+                       CAP_OFFSET, CXL_##reg##_REGISTERS_OFFSET);          \
+        cap_hdrs[which + 2] =                                              \
+            FIELD_DP32(cap_hdrs[which + 2], CXL_DEV_##reg##_CAP_HDR2,      \
+                       CAP_LENGTH, CXL_##reg##_REGISTERS_LENGTH);          \
+    } while (0)
+
+/* CXL 3.0 8.2.8.3.1 Event Status Register */
+REG64(CXL_DEV_EVENT_STATUS, 0)
+    FIELD(CXL_DEV_EVENT_STATUS, EVENT_STATUS, 0, 32)
+
+/* CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register */
+REG32(CXL_DEV_MAILBOX_CAP, 0)
+    FIELD(CXL_DEV_MAILBOX_CAP, PAYLOAD_SIZE, 0, 5)
+    FIELD(CXL_DEV_MAILBOX_CAP, INT_CAP, 5, 1)
+    FIELD(CXL_DEV_MAILBOX_CAP, BG_INT_CAP, 6, 1)
+    FIELD(CXL_DEV_MAILBOX_CAP, MSI_N, 7, 4)
+
+/* CXL 2.0 8.2.8.4.4 Mailbox Control Register */
+REG32(CXL_DEV_MAILBOX_CTRL, 4)
+    FIELD(CXL_DEV_MAILBOX_CTRL, DOORBELL, 0, 1)
+    FIELD(CXL_DEV_MAILBOX_CTRL, INT_EN, 1, 1)
+    FIELD(CXL_DEV_MAILBOX_CTRL, BG_INT_EN, 2, 1)
+
+/* CXL 2.0 8.2.8.4.5 Command Register */
+REG64(CXL_DEV_MAILBOX_CMD, 8)
+    FIELD(CXL_DEV_MAILBOX_CMD, COMMAND, 0, 8)
+    FIELD(CXL_DEV_MAILBOX_CMD, COMMAND_SET, 8, 8)
+    FIELD(CXL_DEV_MAILBOX_CMD, LENGTH, 16, 20)
+
+/* CXL 2.0 8.2.8.4.6 Mailbox Status Register */
+REG64(CXL_DEV_MAILBOX_STS, 0x10)
+    FIELD(CXL_DEV_MAILBOX_STS, BG_OP, 0, 1)
+    FIELD(CXL_DEV_MAILBOX_STS, ERRNO, 32, 16)
+    FIELD(CXL_DEV_MAILBOX_STS, VENDOR_ERRNO, 48, 16)
+
+/* CXL 2.0 8.2.8.4.7 Background Command Status Register */
+REG64(CXL_DEV_BG_CMD_STS, 0x18)
+    FIELD(CXL_DEV_BG_CMD_STS, OP, 0, 16)
+    FIELD(CXL_DEV_BG_CMD_STS, PERCENTAGE_COMP, 16, 7)
+    FIELD(CXL_DEV_BG_CMD_STS, RET_CODE, 32, 16)
+    FIELD(CXL_DEV_BG_CMD_STS, VENDOR_RET_CODE, 48, 16)
+
+/* CXL 2.0 8.2.8.4.8 Command Payload Registers */
+REG32(CXL_DEV_CMD_PAYLOAD, 0x20)
+
+REG64(CXL_MEM_DEV_STS, 0)
+    FIELD(CXL_MEM_DEV_STS, FATAL, 0, 1)
+    FIELD(CXL_MEM_DEV_STS, FW_HALT, 1, 1)
+    FIELD(CXL_MEM_DEV_STS, MEDIA_STATUS, 2, 2)
+    FIELD(CXL_MEM_DEV_STS, MBOX_READY, 4, 1)
+    FIELD(CXL_MEM_DEV_STS, RESET_NEEDED, 5, 3)
+
+static inline void __toggle_media(CXLDeviceState *cxl_dstate, int val)
+{
+    uint64_t dev_status_reg;
+
+    dev_status_reg = cxl_dstate->memdev_status;
+    dev_status_reg = FIELD_DP64(dev_status_reg, CXL_MEM_DEV_STS, MEDIA_STATUS,
+                                val);
+    cxl_dstate->memdev_status = dev_status_reg;
+}
+#define cxl_dev_disable_media(cxlds)                    \
+        do { __toggle_media((cxlds), 0x3); } while (0)
+#define cxl_dev_enable_media(cxlds)                     \
+        do { __toggle_media((cxlds), 0x1); } while (0)
+
+static inline bool sanitize_running(CXLCCI *cci)
+{
+    return !!cci->bg.runtime && cci->bg.opcode == 0x4400;
+}
+
+typedef struct CXLError {
+    QTAILQ_ENTRY(CXLError) node;
+    int type; /* Error code as per FE definition */
+    uint32_t header[CXL_RAS_ERR_HEADER_NUM];
+} CXLError;
+
+typedef QTAILQ_HEAD(, CXLError) CXLErrorList;
+
+typedef struct CXLPoison {
+    uint64_t start, length;
+    uint8_t type;
+#define CXL_POISON_TYPE_EXTERNAL 0x1
+#define CXL_POISON_TYPE_INTERNAL 0x2
+#define CXL_POISON_TYPE_INJECTED 0x3
+    QLIST_ENTRY(CXLPoison) node;
+} CXLPoison;
+
+typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
+#define CXL_POISON_LIST_LIMIT 256
+
+struct CXLType3Dev {
+    /* Private */
+    PCIDevice parent_obj;
+
+    /* Properties */
+    HostMemoryBackend *hostmem; /* deprecated */
+    HostMemoryBackend *hostvmem;
+    HostMemoryBackend *hostpmem;
+    HostMemoryBackend *lsa;
+    uint64_t sn;
+
+    /* State */
+    AddressSpace hostvmem_as;
+    AddressSpace hostpmem_as;
+    CXLComponentState cxl_cstate;
+    CXLDeviceState cxl_dstate;
+    CXLCCI cci; /* Primary PCI mailbox CCI */
+    /* Always initialized as no way to know if a VDM might show up */
+    CXLCCI vdm_fm_owned_ld_mctp_cci;
+    CXLCCI ld0_cci;
+
+    /* DOE */
+    DOECap doe_cdat;
+
+    /* Error injection */
+    CXLErrorList error_list;
+
+    /* Poison Injection - cache */
+    CXLPoisonList poison_list;
+    unsigned int poison_list_cnt;
+    bool poison_list_overflowed;
+    uint64_t poison_list_overflow_ts;
+};
+
+#define TYPE_CXL_TYPE3 "cxl-type3"
+OBJECT_DECLARE_TYPE(CXLType3Dev, CXLType3Class, CXL_TYPE3)
+
+struct CXLType3Class {
+    /* Private */
+    PCIDeviceClass parent_class;
+
+    /* public */
+    uint64_t (*get_lsa_size)(CXLType3Dev *ct3d);
+
+    uint64_t (*get_lsa)(CXLType3Dev *ct3d, void *buf, uint64_t size,
+                        uint64_t offset);
+    void (*set_lsa)(CXLType3Dev *ct3d, const void *buf, uint64_t size,
+                    uint64_t offset);
+    bool (*set_cacheline)(CXLType3Dev *ct3d, uint64_t dpa_offset,
+                          uint8_t *data);
+};
+
+struct CSWMBCCIDev {
+    PCIDevice parent_obj;
+    PCIDevice *target;
+    CXLComponentState cxl_cstate;
+    CXLDeviceState cxl_dstate;
+    CXLCCI *cci;
+};
+
+#define TYPE_CXL_SWITCH_MAILBOX_CCI "cxl-switch-mailbox-cci"
+OBJECT_DECLARE_TYPE(CSWMBCCIDev, CSWMBCCIClass, CXL_SWITCH_MAILBOX_CCI)
+
+MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
+                           unsigned size, MemTxAttrs attrs);
+MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
+                            unsigned size, MemTxAttrs attrs);
+
+uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds);
+
+void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num);
+bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type,
+                      CXLEventRecordRaw *event);
+CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl,
+                                 uint8_t log_type, int max_recs,
+                                 size_t *len);
+CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds,
+                                   CXLClearEventPayload *pl);
+
+void cxl_event_irq_assert(CXLType3Dev *ct3d);
+
+void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
+
+#endif
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
new file mode 100644 (file)
index 0000000..d778487
--- /dev/null
@@ -0,0 +1,169 @@
+/*
+ * QEMU CXL Events
+ *
+ * Copyright (c) 2022 Intel
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_EVENTS_H
+#define CXL_EVENTS_H
+
+#include "qemu/uuid.h"
+
+/*
+ * CXL rev 3.0 section 8.2.9.2.2; Table 8-49
+ *
+ * Define these as the bit position for the event status register for ease of
+ * setting the status.
+ */
+typedef enum CXLEventLogType {
+    CXL_EVENT_TYPE_INFO          = 0,
+    CXL_EVENT_TYPE_WARN          = 1,
+    CXL_EVENT_TYPE_FAIL          = 2,
+    CXL_EVENT_TYPE_FATAL         = 3,
+    CXL_EVENT_TYPE_DYNAMIC_CAP   = 4,
+    CXL_EVENT_TYPE_MAX
+} CXLEventLogType;
+
+/*
+ * Common Event Record Format
+ * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
+ */
+#define CXL_EVENT_REC_HDR_RES_LEN 0xf
+typedef struct CXLEventRecordHdr {
+    QemuUUID id;
+    uint8_t length;
+    uint8_t flags[3];
+    uint16_t handle;
+    uint16_t related_handle;
+    uint64_t timestamp;
+    uint8_t maint_op_class;
+    uint8_t reserved[CXL_EVENT_REC_HDR_RES_LEN];
+} QEMU_PACKED CXLEventRecordHdr;
+
+#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
+typedef struct CXLEventRecordRaw {
+    CXLEventRecordHdr hdr;
+    uint8_t data[CXL_EVENT_RECORD_DATA_LENGTH];
+} QEMU_PACKED CXLEventRecordRaw;
+#define CXL_EVENT_RECORD_SIZE (sizeof(CXLEventRecordRaw))
+
+/*
+ * Get Event Records output payload
+ * CXL rev 3.0 section 8.2.9.2.2; Table 8-50
+ */
+#define CXL_GET_EVENT_FLAG_OVERFLOW     BIT(0)
+#define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1)
+typedef struct CXLGetEventPayload {
+    uint8_t flags;
+    uint8_t reserved1;
+    uint16_t overflow_err_count;
+    uint64_t first_overflow_timestamp;
+    uint64_t last_overflow_timestamp;
+    uint16_t record_count;
+    uint8_t reserved2[0xa];
+    CXLEventRecordRaw records[];
+} QEMU_PACKED CXLGetEventPayload;
+#define CXL_EVENT_PAYLOAD_HDR_SIZE (sizeof(CXLGetEventPayload))
+
+/*
+ * Clear Event Records input payload
+ * CXL rev 3.0 section 8.2.9.2.3; Table 8-51
+ */
+typedef struct CXLClearEventPayload {
+    uint8_t event_log;      /* CXLEventLogType */
+    uint8_t clear_flags;
+    uint8_t nr_recs;
+    uint8_t reserved[3];
+    uint16_t handle[];
+} CXLClearEventPayload;
+
+/**
+ * Event Interrupt Policy
+ *
+ * CXL rev 3.0 section 8.2.9.2.4; Table 8-52
+ */
+typedef enum CXLEventIntMode {
+    CXL_INT_NONE     = 0x00,
+    CXL_INT_MSI_MSIX = 0x01,
+    CXL_INT_FW       = 0x02,
+    CXL_INT_RES      = 0x03,
+} CXLEventIntMode;
+#define CXL_EVENT_INT_MODE_MASK 0x3
+#define CXL_EVENT_INT_SETTING(vector) \
+    ((((uint8_t)vector & 0xf) << 4) | CXL_INT_MSI_MSIX)
+typedef struct CXLEventInterruptPolicy {
+    uint8_t info_settings;
+    uint8_t warn_settings;
+    uint8_t failure_settings;
+    uint8_t fatal_settings;
+    uint8_t dyn_cap_settings;
+} QEMU_PACKED CXLEventInterruptPolicy;
+/* DCD is optional but other fields are not */
+#define CXL_EVENT_INT_SETTING_MIN_LEN 4
+
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CXL_EVENT_GEN_MED_COMP_ID_SIZE  0x10
+#define CXL_EVENT_GEN_MED_RES_SIZE      0x2e
+typedef struct CXLEventGenMedia {
+    CXLEventRecordHdr hdr;
+    uint64_t phys_addr;
+    uint8_t descriptor;
+    uint8_t type;
+    uint8_t transaction_type;
+    uint16_t validity_flags;
+    uint8_t channel;
+    uint8_t rank;
+    uint8_t device[3];
+    uint8_t component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
+    uint8_t reserved[CXL_EVENT_GEN_MED_RES_SIZE];
+} QEMU_PACKED CXLEventGenMedia;
+
+/*
+ * DRAM Event Record
+ * CXL Rev 3.0 Section 8.2.9.2.1.2: Table 8-44
+ * All fields little endian.
+ */
+typedef struct CXLEventDram {
+    CXLEventRecordHdr hdr;
+    uint64_t phys_addr;
+    uint8_t descriptor;
+    uint8_t type;
+    uint8_t transaction_type;
+    uint16_t validity_flags;
+    uint8_t channel;
+    uint8_t rank;
+    uint8_t nibble_mask[3];
+    uint8_t bank_group;
+    uint8_t bank;
+    uint8_t row[3];
+    uint16_t column;
+    uint64_t correction_mask[4];
+    uint8_t reserved[0x17];
+} QEMU_PACKED CXLEventDram;
+
+/*
+ * Memory Module Event Record
+ * CXL Rev 3.0 Section 8.2.9.2.1.3: Table 8-45
+ * All fields little endian.
+ */
+typedef struct CXLEventMemoryModule {
+    CXLEventRecordHdr hdr;
+    uint8_t type;
+    uint8_t health_status;
+    uint8_t media_status;
+    uint8_t additional_status;
+    uint8_t life_used;
+    int16_t temperature;
+    uint32_t dirty_shutdown_count;
+    uint32_t corrected_volatile_error_count;
+    uint32_t corrected_persistent_error_count;
+    uint8_t reserved[0x3d];
+} QEMU_PACKED CXLEventMemoryModule;
+
+#endif /* CXL_EVENTS_H */
diff --git a/include/hw/cxl/cxl_host.h b/include/hw/cxl/cxl_host.h
new file mode 100644 (file)
index 0000000..c9bc9c7
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * QEMU CXL Host Setup
+ *
+ * Copyright (c) 2022 Huawei
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "hw/cxl/cxl.h"
+#include "hw/boards.h"
+
+#ifndef CXL_HOST_H
+#define CXL_HOST_H
+
+void cxl_machine_init(Object *obj, CXLState *state);
+void cxl_fmws_link_targets(CXLState *stat, Error **errp);
+void cxl_hook_up_pxb_registers(PCIBus *bus, CXLState *state, Error **errp);
+
+extern const MemoryRegionOps cfmws_ops;
+
+#endif
diff --git a/include/hw/cxl/cxl_pci.h b/include/hw/cxl/cxl_pci.h
new file mode 100644 (file)
index 0000000..ddf01a5
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ * QEMU CXL PCI interfaces
+ *
+ * Copyright (c) 2020 Intel
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_PCI_H
+#define CXL_PCI_H
+
+
+#define CXL_VENDOR_ID 0x1e98
+
+#define PCIE_DVSEC_HEADER1_OFFSET 0x4 /* Offset from start of extend cap */
+#define PCIE_DVSEC_ID_OFFSET 0x8
+
+#define PCIE_CXL_DEVICE_DVSEC_LENGTH 0x38
+#define PCIE_CXL1_DEVICE_DVSEC_REVID 0
+#define PCIE_CXL2_DEVICE_DVSEC_REVID 1
+
+#define EXTENSIONS_PORT_DVSEC_LENGTH 0x28
+#define EXTENSIONS_PORT_DVSEC_REVID 0
+
+#define GPF_PORT_DVSEC_LENGTH 0x10
+#define GPF_PORT_DVSEC_REVID  0
+
+#define GPF_DEVICE_DVSEC_LENGTH 0x10
+#define GPF_DEVICE_DVSEC_REVID 0
+
+#define PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0 0x14
+#define PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0  1
+
+#define REG_LOC_DVSEC_LENGTH 0x24
+#define REG_LOC_DVSEC_REVID  0
+
+enum {
+    PCIE_CXL_DEVICE_DVSEC      = 0,
+    NON_CXL_FUNCTION_MAP_DVSEC = 2,
+    EXTENSIONS_PORT_DVSEC      = 3,
+    GPF_PORT_DVSEC             = 4,
+    GPF_DEVICE_DVSEC           = 5,
+    PCIE_FLEXBUS_PORT_DVSEC    = 7,
+    REG_LOC_DVSEC              = 8,
+    MLD_DVSEC                  = 9,
+    CXL20_MAX_DVSEC
+};
+
+typedef struct DVSECHeader {
+    uint32_t cap_hdr;
+    uint32_t dv_hdr1;
+    uint16_t dv_hdr2;
+} QEMU_PACKED DVSECHeader;
+QEMU_BUILD_BUG_ON(sizeof(DVSECHeader) != 10);
+
+/*
+ * CXL 2.0 devices must implement certain DVSEC IDs, and can [optionally]
+ * implement others.
+ *
+ * CXL 2.0 Device: 0, [2], 5, 8
+ * CXL 2.0 RP: 3, 4, 7, 8
+ * CXL 2.0 Upstream Port: [2], 7, 8
+ * CXL 2.0 Downstream Port: 3, 4, 7, 8
+ */
+
+/* CXL 2.0 - 8.1.3 (ID 0001) */
+typedef struct CXLDVSECDevice {
+    DVSECHeader hdr;
+    uint16_t cap;
+    uint16_t ctrl;
+    uint16_t status;
+    uint16_t ctrl2;
+    uint16_t status2;
+    uint16_t lock;
+    uint16_t cap2;
+    uint32_t range1_size_hi;
+    uint32_t range1_size_lo;
+    uint32_t range1_base_hi;
+    uint32_t range1_base_lo;
+    uint32_t range2_size_hi;
+    uint32_t range2_size_lo;
+    uint32_t range2_base_hi;
+    uint32_t range2_base_lo;
+} CXLDVSECDevice;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECDevice) != 0x38);
+
+/* CXL 2.0 - 8.1.5 (ID 0003) */
+typedef struct CXLDVSECPortExt {
+    DVSECHeader hdr;
+    uint16_t status;
+    uint16_t control;
+    uint8_t alt_bus_base;
+    uint8_t alt_bus_limit;
+    uint16_t alt_memory_base;
+    uint16_t alt_memory_limit;
+    uint16_t alt_prefetch_base;
+    uint16_t alt_prefetch_limit;
+    uint32_t alt_prefetch_base_high;
+    uint32_t alt_prefetch_limit_high;
+    uint32_t rcrb_base;
+    uint32_t rcrb_base_high;
+} CXLDVSECPortExt;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortExt) != 0x28);
+
+#define PORT_CONTROL_OFFSET          0xc
+#define PORT_CONTROL_UNMASK_SBR      1
+#define PORT_CONTROL_ALT_MEMID_EN    4
+
+/* CXL 2.0 - 8.1.6 GPF DVSEC (ID 0004) */
+typedef struct CXLDVSECPortGPF {
+    DVSECHeader hdr;
+    uint16_t rsvd;
+    uint16_t phase1_ctrl;
+    uint16_t phase2_ctrl;
+} CXLDVSECPortGPF;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortGPF) != 0x10);
+
+/* CXL 2.0 - 8.1.7 GPF DVSEC for CXL Device */
+typedef struct CXLDVSECDeviceGPF {
+    DVSECHeader hdr;
+    uint16_t phase2_duration;
+    uint32_t phase2_power;
+} CXLDVSECDeviceGPF;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECDeviceGPF) != 0x10);
+
+/* CXL 2.0 - 8.1.8/8.2.1.3 Flex Bus DVSEC (ID 0007) */
+typedef struct CXLDVSECPortFlexBus {
+    DVSECHeader hdr;
+    uint16_t cap;
+    uint16_t ctrl;
+    uint16_t status;
+    uint32_t rcvd_mod_ts_data_phase1;
+} CXLDVSECPortFlexBus;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECPortFlexBus) != 0x14);
+
+/* CXL 2.0 - 8.1.9 Register Locator DVSEC (ID 0008) */
+typedef struct CXLDVSECRegisterLocator {
+    DVSECHeader hdr;
+    uint16_t rsvd;
+    uint32_t reg0_base_lo;
+    uint32_t reg0_base_hi;
+    uint32_t reg1_base_lo;
+    uint32_t reg1_base_hi;
+    uint32_t reg2_base_lo;
+    uint32_t reg2_base_hi;
+} CXLDVSECRegisterLocator;
+QEMU_BUILD_BUG_ON(sizeof(CXLDVSECRegisterLocator) != 0x24);
+
+/* BAR Equivalence Indicator */
+#define BEI_BAR_10H 0
+#define BEI_BAR_14H 1
+#define BEI_BAR_18H 2
+#define BEI_BAR_1cH 3
+#define BEI_BAR_20H 4
+#define BEI_BAR_24H 5
+
+/* Register Block Identifier */
+#define RBI_EMPTY          0
+#define RBI_COMPONENT_REG  (1 << 8)
+#define RBI_BAR_VIRT_ACL   (2 << 8)
+#define RBI_CXL_DEVICE_REG (3 << 8)
+
+#endif
index 1f8fc9b37500a4c128b1e3159b23e67364b4c5ea..520f8ec20279e3adc9b3f247f48df6c1406d0d91 100644 (file)
@@ -11,6 +11,7 @@ typedef struct qemu_edid_info {
     uint32_t    prefy;
     uint32_t    maxx;
     uint32_t    maxy;
+    uint32_t    refresh_rate;
 } qemu_edid_info;
 
 void qemu_edid_generate(uint8_t *edid, size_t size,
@@ -21,10 +22,11 @@ void qemu_edid_region_io(MemoryRegion *region, Object *owner,
 
 uint32_t qemu_edid_dpi_to_mm(uint32_t dpi, uint32_t res);
 
-#define DEFINE_EDID_PROPERTIES(_state, _edid_info)              \
-    DEFINE_PROP_UINT32("xres", _state, _edid_info.prefx, 0),    \
-    DEFINE_PROP_UINT32("yres", _state, _edid_info.prefy, 0),    \
-    DEFINE_PROP_UINT32("xmax", _state, _edid_info.maxx, 0),     \
-    DEFINE_PROP_UINT32("ymax", _state, _edid_info.maxy, 0)
+#define DEFINE_EDID_PROPERTIES(_state, _edid_info)                         \
+    DEFINE_PROP_UINT32("xres", _state, _edid_info.prefx, 0),               \
+    DEFINE_PROP_UINT32("yres", _state, _edid_info.prefy, 0),               \
+    DEFINE_PROP_UINT32("xmax", _state, _edid_info.maxx, 0),                \
+    DEFINE_PROP_UINT32("ymax", _state, _edid_info.maxy, 0),                \
+    DEFINE_PROP_UINT32("refresh_rate", _state, _edid_info.refresh_rate, 0)
 
 #endif /* EDID_H */
index c133fa271efc49c78f6474f4de5c12beca31fb93..27cebefc9e712a55c3a05a3a362a5b0da7df1158 100644 (file)
 #ifndef MACFB_H
 #define MACFB_H
 
-#include "qemu/osdep.h"
 #include "exec/memory.h"
+#include "hw/irq.h"
+#include "hw/nubus/nubus.h"
+#include "hw/sysbus.h"
 #include "ui/console.h"
-#include "qom/object.h"
+#include "qemu/timer.h"
+
+typedef enum  {
+    MACFB_DISPLAY_APPLE_21_COLOR = 0,
+    MACFB_DISPLAY_APPLE_PORTRAIT = 1,
+    MACFB_DISPLAY_APPLE_12_RGB = 2,
+    MACFB_DISPLAY_APPLE_2PAGE_MONO = 3,
+    MACFB_DISPLAY_NTSC_UNDERSCAN = 4,
+    MACFB_DISPLAY_NTSC_OVERSCAN = 5,
+    MACFB_DISPLAY_APPLE_12_MONO = 6,
+    MACFB_DISPLAY_APPLE_13_RGB = 7,
+    MACFB_DISPLAY_16_COLOR = 8,
+    MACFB_DISPLAY_PAL1_UNDERSCAN = 9,
+    MACFB_DISPLAY_PAL1_OVERSCAN = 10,
+    MACFB_DISPLAY_PAL2_UNDERSCAN = 11,
+    MACFB_DISPLAY_PAL2_OVERSCAN = 12,
+    MACFB_DISPLAY_VGA = 13,
+    MACFB_DISPLAY_SVGA = 14,
+} MacfbDisplayType;
+
+typedef struct MacFbMode {
+    uint8_t type;
+    uint8_t depth;
+    uint32_t mode_ctrl1;
+    uint32_t mode_ctrl2;
+    uint32_t width;
+    uint32_t height;
+    uint32_t stride;
+    uint32_t offset;
+} MacFbMode;
+
+#define MACFB_CTRL_TOPADDR  0x200
+#define MACFB_NUM_REGS      (MACFB_CTRL_TOPADDR / sizeof(uint32_t))
 
 typedef struct MacfbState {
     MemoryRegion mem_vram;
@@ -29,6 +63,13 @@ typedef struct MacfbState {
     uint8_t color_palette[256 * 3];
     uint32_t width, height; /* in pixels */
     uint8_t depth;
+    uint8_t type;
+
+    uint32_t regs[MACFB_NUM_REGS];
+    MacFbMode *mode;
+
+    QEMUTimer *vbl_timer;
+    qemu_irq irq;
 } MacfbState;
 
 #define TYPE_MACFB "sysbus-macfb"
@@ -47,6 +88,7 @@ struct MacfbNubusDeviceClass {
     DeviceClass parent_class;
 
     DeviceRealize parent_realize;
+    DeviceUnrealize parent_unrealize;
 };
 
 
diff --git a/include/hw/display/milkymist_tmu2.h b/include/hw/display/milkymist_tmu2.h
deleted file mode 100644 (file)
index fdce953..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- *  QEMU model of the Milkymist texture mapping unit.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *  Copyright (c) 2010 Sebastien Bourdeauducq
- *                       <sebastien.bourdeauducq@lekernel.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://milkymist.walle.cc/socdoc/tmu2.pdf
- *
- */
-
-#ifndef HW_DISPLAY_MILKYMIST_TMU2_H
-#define HW_DISPLAY_MILKYMIST_TMU2_H
-
-#include "exec/hwaddr.h"
-#include "hw/qdev-core.h"
-
-#if defined(CONFIG_X11) && defined(CONFIG_OPENGL)
-DeviceState *milkymist_tmu2_create(hwaddr base, qemu_irq irq);
-#else
-static inline DeviceState *milkymist_tmu2_create(hwaddr base, qemu_irq irq)
-{
-    return NULL;
-}
-#endif
-
-#endif /* HW_DISPLAY_MILKYMIST_TMU2_H */
index b33a2c467b28824dd4aa392b9e3d76b8cdaa7673..a7e00191445e17522e978c50e135ad2c722dafc7 100644 (file)
@@ -1,11 +1,15 @@
 #ifndef RAMFB_H
 #define RAMFB_H
 
+#include "migration/vmstate.h"
+
 /* ramfb.c */
 typedef struct RAMFBState RAMFBState;
 void ramfb_display_update(QemuConsole *con, RAMFBState *s);
 RAMFBState *ramfb_setup(Error **errp);
 
+extern const VMStateDescription ramfb_vmstate;
+
 /* ramfb-standalone.c */
 #define TYPE_RAMFB_DEVICE "ramfb"
 
index ca0003dbfd92e9a39bdf400d7aee5dedbfaffd40..a79aa2909b25424b157becb9dd66e407dfddec9b 100644 (file)
@@ -9,7 +9,11 @@
 #ifndef QEMU_HW_DISPLAY_VGA_H
 #define QEMU_HW_DISPLAY_VGA_H
 
-#include "exec/hwaddr.h"
+/*
+ * modules can reference this symbol to avoid being loaded
+ * into system emulators without vga support
+ */
+extern bool have_vga;
 
 enum vga_retrace_method {
     VGA_RETRACE_DUMB,
@@ -18,8 +22,6 @@ enum vga_retrace_method {
 
 extern enum vga_retrace_method vga_retrace_method;
 
-int isa_vga_mm_init(hwaddr vram_base,
-                    hwaddr ctrl_base, int it_shift,
-                    MemoryRegion *address_space);
+#define TYPE_VGA_MMIO "vga-mmio"
 
 #endif
index 8ab4733bb85418f5908c16cb6441364a2483122d..e86a87f235e16c2623d200f3df18f53f0b3fee90 100644 (file)
 #include "hw/dma/xlnx_dpdma.h"
 #include "audio/audio.h"
 #include "qom/object.h"
+#include "hw/ptimer.h"
 
 #define AUD_CHBUF_MAX_DEPTH                 (32 * KiB)
 #define MAX_QEMU_BUFFER_SIZE                (4 * KiB)
 
-#define DP_CORE_REG_ARRAY_SIZE              (0x3AF >> 2)
+#define DP_CORE_REG_OFFSET                  (0x0000)
+#define DP_CORE_REG_ARRAY_SIZE              (0x3B0 >> 2)
+#define DP_AVBUF_REG_OFFSET                 (0xB000)
 #define DP_AVBUF_REG_ARRAY_SIZE             (0x238 >> 2)
-#define DP_VBLEND_REG_ARRAY_SIZE            (0x1DF >> 2)
+#define DP_VBLEND_REG_OFFSET                (0xA000)
+#define DP_VBLEND_REG_ARRAY_SIZE            (0x1E0 >> 2)
+#define DP_AUDIO_REG_OFFSET                 (0xC000)
 #define DP_AUDIO_REG_ARRAY_SIZE             (0x50 >> 2)
+#define DP_CONTAINER_SIZE                   (0xC050)
 
 struct PixmanPlane {
     pixman_format_code_t format;
@@ -102,6 +108,8 @@ struct XlnxDPState {
      */
     DPCDState *dpcd;
     I2CDDCState *edid;
+
+    ptimer_state *vblank;
 };
 
 #define TYPE_XLNX_DP "xlnx.v-dp"
diff --git a/include/hw/dma/bcm2835_dma.h b/include/hw/dma/bcm2835_dma.h
new file mode 100644 (file)
index 0000000..1d26b1d
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Raspberry Pi emulation (c) 2012 Gregory Estrade
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_DMA_H
+#define BCM2835_DMA_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+typedef struct {
+    uint32_t cs;
+    uint32_t conblk_ad;
+    uint32_t ti;
+    uint32_t source_ad;
+    uint32_t dest_ad;
+    uint32_t txfr_len;
+    uint32_t stride;
+    uint32_t nextconbk;
+    uint32_t debug;
+
+    qemu_irq irq;
+} BCM2835DMAChan;
+
+#define TYPE_BCM2835_DMA "bcm2835-dma"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835DMAState, BCM2835_DMA)
+
+#define BCM2835_DMA_NCHANS 16
+
+struct BCM2835DMAState {
+    /*< private >*/
+    SysBusDevice busdev;
+    /*< public >*/
+
+    MemoryRegion iomem0, iomem15;
+    MemoryRegion *dma_mr;
+    AddressSpace dma_as;
+
+    BCM2835DMAChan chan[BCM2835_DMA_NCHANS];
+    uint32_t int_status;
+    uint32_t enable;
+};
+
+#endif
diff --git a/include/hw/dma/i8257.h b/include/hw/dma/i8257.h
new file mode 100644 (file)
index 0000000..f652345
--- /dev/null
@@ -0,0 +1,50 @@
+#ifndef HW_I8257_H
+#define HW_I8257_H
+
+#include "hw/isa/isa.h"
+#include "exec/ioport.h"
+#include "qom/object.h"
+
+#define TYPE_I8257 "i8257"
+OBJECT_DECLARE_SIMPLE_TYPE(I8257State, I8257)
+
+typedef struct I8257Regs {
+    int now[2];
+    uint16_t base[2];
+    uint8_t mode;
+    uint8_t page;
+    uint8_t pageh;
+    uint8_t dack;
+    uint8_t eop;
+    IsaDmaTransferHandler transfer_handler;
+    void *opaque;
+} I8257Regs;
+
+struct I8257State {
+    /* <private> */
+    ISADevice parent_obj;
+
+    /* <public> */
+    int32_t base;
+    int32_t page_base;
+    int32_t pageh_base;
+    int32_t dshift;
+
+    uint8_t status;
+    uint8_t command;
+    uint8_t mask;
+    uint8_t flip_flop;
+    I8257Regs regs[4];
+    MemoryRegion channel_io;
+    MemoryRegion cont_io;
+
+    QEMUBH *dma_bh;
+    bool dma_bh_scheduled;
+    int running;
+    PortioList portio_page;
+    PortioList portio_pageh;
+};
+
+void i8257_dma_init(ISABus *bus, bool high_page_enable);
+
+#endif
diff --git a/include/hw/dma/pl080.h b/include/hw/dma/pl080.h
new file mode 100644 (file)
index 0000000..3c9659e
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * ARM PrimeCell PL080/PL081 DMA controller
+ *
+ * Copyright (c) 2006 CodeSourcery.
+ * Copyright (c) 2018 Linaro Limited
+ * Written by Paul Brook, Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/*
+ * This is a model of the Arm PrimeCell PL080/PL081 DMA controller:
+ * The PL080 TRM is:
+ * https://developer.arm.com/documentation/ddi0196/latest
+ * and the PL081 TRM is:
+ * https://developer.arm.com/documentation/ddi0218/latest
+ *
+ * QEMU interface:
+ * + sysbus IRQ 0: DMACINTR combined interrupt line
+ * + sysbus IRQ 1: DMACINTERR error interrupt request
+ * + sysbus IRQ 2: DMACINTTC count interrupt request
+ * + sysbus MMIO region 0: MemoryRegion for the device's registers
+ * + QOM property "downstream": MemoryRegion defining where DMA
+ *   bus master transactions are made
+ */
+
+#ifndef HW_DMA_PL080_H
+#define HW_DMA_PL080_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define PL080_MAX_CHANNELS 8
+
+typedef struct {
+    uint32_t src;
+    uint32_t dest;
+    uint32_t lli;
+    uint32_t ctrl;
+    uint32_t conf;
+} pl080_channel;
+
+#define TYPE_PL080 "pl080"
+#define TYPE_PL081 "pl081"
+OBJECT_DECLARE_SIMPLE_TYPE(PL080State, PL080)
+
+struct PL080State {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint8_t tc_int;
+    uint8_t tc_mask;
+    uint8_t err_int;
+    uint8_t err_mask;
+    uint32_t conf;
+    uint32_t sync;
+    uint32_t req_single;
+    uint32_t req_burst;
+    pl080_channel chan[PL080_MAX_CHANNELS];
+    int nchannels;
+    /* Flag to avoid recursive DMA invocations.  */
+    int running;
+    qemu_irq irq;
+    qemu_irq interr;
+    qemu_irq inttc;
+
+    MemoryRegion *downstream;
+    AddressSpace downstream_as;
+};
+
+#endif
diff --git a/include/hw/dma/sifive_pdma.h b/include/hw/dma/sifive_pdma.h
new file mode 100644 (file)
index 0000000..8c6cfa7
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * SiFive Platform DMA emulation
+ *
+ * Copyright (c) 2020 Wind River Systems, Inc.
+ *
+ * Author:
+ *   Bin Meng <bin.meng@windriver.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef SIFIVE_PDMA_H
+#define SIFIVE_PDMA_H
+
+#include "hw/sysbus.h"
+
+struct sifive_pdma_chan {
+    uint32_t control;
+    uint32_t next_config;
+    uint64_t next_bytes;
+    uint64_t next_dst;
+    uint64_t next_src;
+    uint32_t exec_config;
+    uint64_t exec_bytes;
+    uint64_t exec_dst;
+    uint64_t exec_src;
+    int state;
+};
+
+#define SIFIVE_PDMA_CHANS           4
+#define SIFIVE_PDMA_IRQS            (SIFIVE_PDMA_CHANS * 2)
+#define SIFIVE_PDMA_REG_SIZE        0x100000
+#define SIFIVE_PDMA_CHAN_NO(reg)    ((reg & (SIFIVE_PDMA_REG_SIZE - 1)) >> 12)
+
+typedef struct SiFivePDMAState {
+    SysBusDevice parent;
+    MemoryRegion iomem;
+    qemu_irq irq[SIFIVE_PDMA_IRQS];
+
+    struct sifive_pdma_chan chan[SIFIVE_PDMA_CHANS];
+} SiFivePDMAState;
+
+#define TYPE_SIFIVE_PDMA    "sifive.pdma"
+
+#define SIFIVE_PDMA(obj)    \
+    OBJECT_CHECK(SiFivePDMAState, (obj), TYPE_SIFIVE_PDMA)
+
+#endif /* SIFIVE_PDMA_H */
diff --git a/include/hw/dma/xlnx-zdma.h b/include/hw/dma/xlnx-zdma.h
new file mode 100644 (file)
index 0000000..efc7521
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * QEMU model of the ZynqMP generic DMA
+ *
+ * Copyright (c) 2014 Xilinx Inc.
+ * Copyright (c) 2018 FEIMTECH AB
+ *
+ * Written by Edgar E. Iglesias <edgar.iglesias@xilinx.com>,
+ *            Francisco Iglesias <francisco.iglesias@feimtech.se>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XLNX_ZDMA_H
+#define XLNX_ZDMA_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "sysemu/dma.h"
+#include "qom/object.h"
+
+#define ZDMA_R_MAX (0x204 / 4)
+
+typedef enum {
+    DISABLED = 0,
+    ENABLED = 1,
+    PAUSED = 2,
+} XlnxZDMAState;
+
+typedef union {
+    struct {
+        uint64_t addr;
+        uint32_t size;
+        uint32_t attr;
+    };
+    uint32_t words[4];
+} XlnxZDMADescr;
+
+struct XlnxZDMA {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    MemTxAttrs attr;
+    MemoryRegion *dma_mr;
+    AddressSpace dma_as;
+    qemu_irq irq_zdma_ch_imr;
+
+    struct {
+        uint32_t bus_width;
+    } cfg;
+
+    XlnxZDMAState state;
+    bool error;
+
+    XlnxZDMADescr dsc_src;
+    XlnxZDMADescr dsc_dst;
+
+    uint32_t regs[ZDMA_R_MAX];
+    RegisterInfo regs_info[ZDMA_R_MAX];
+
+    /* We don't model the common bufs. Must be at least 16 bytes
+       to model write only mode.  */
+    uint8_t buf[2048];
+};
+
+#define TYPE_XLNX_ZDMA "xlnx.zdma"
+
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxZDMA, XLNX_ZDMA)
+
+#endif /* XLNX_ZDMA_H */
diff --git a/include/hw/dma/xlnx-zynq-devcfg.h b/include/hw/dma/xlnx-zynq-devcfg.h
new file mode 100644 (file)
index 0000000..e4cf085
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * QEMU model of the Xilinx Devcfg Interface
+ *
+ * (C) 2011 PetaLogix Pty Ltd
+ * (C) 2014 Xilinx Inc.
+ * Written by Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XLNX_ZYNQ_DEVCFG_H
+#define XLNX_ZYNQ_DEVCFG_H
+
+#include "hw/register.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_XLNX_ZYNQ_DEVCFG "xlnx.ps7-dev-cfg"
+
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqDevcfg, XLNX_ZYNQ_DEVCFG)
+
+#define XLNX_ZYNQ_DEVCFG_R_MAX (0x100 / 4)
+
+#define XLNX_ZYNQ_DEVCFG_DMA_CMD_FIFO_LEN 10
+
+typedef struct XlnxZynqDevcfgDMACmd {
+    uint32_t src_addr;
+    uint32_t dest_addr;
+    uint32_t src_len;
+    uint32_t dest_len;
+} XlnxZynqDevcfgDMACmd;
+
+struct XlnxZynqDevcfg {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    XlnxZynqDevcfgDMACmd dma_cmd_fifo[XLNX_ZYNQ_DEVCFG_DMA_CMD_FIFO_LEN];
+    uint8_t dma_cmd_fifo_num;
+
+    uint32_t regs[XLNX_ZYNQ_DEVCFG_R_MAX];
+    RegisterInfo regs_info[XLNX_ZYNQ_DEVCFG_R_MAX];
+};
+
+#endif
diff --git a/include/hw/dma/xlnx_csu_dma.h b/include/hw/dma/xlnx_csu_dma.h
new file mode 100644 (file)
index 0000000..922ab80
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Xilinx Platform CSU Stream DMA emulation
+ *
+ * This implementation is based on
+ * https://github.com/Xilinx/qemu/blob/master/hw/dma/csu_stream_dma.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef XLNX_CSU_DMA_H
+#define XLNX_CSU_DMA_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/ptimer.h"
+#include "hw/stream.h"
+
+#define TYPE_XLNX_CSU_DMA "xlnx.csu_dma"
+
+#define XLNX_CSU_DMA_R_MAX (0x2c / 4)
+
+typedef struct XlnxCSUDMA {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    MemTxAttrs attr;
+    MemoryRegion *dma_mr;
+    AddressSpace dma_as;
+    qemu_irq irq;
+    StreamSink *tx_dev; /* Used as generic StreamSink */
+    ptimer_state *src_timer;
+
+    uint16_t width;
+    bool is_dst;
+    bool r_size_last_word;
+
+    StreamCanPushNotifyFn notify;
+    void *notify_opaque;
+
+    uint32_t regs[XLNX_CSU_DMA_R_MAX];
+    RegisterInfo regs_info[XLNX_CSU_DMA_R_MAX];
+} XlnxCSUDMA;
+
+OBJECT_DECLARE_TYPE(XlnxCSUDMA, XlnxCSUDMAClass, XLNX_CSU_DMA)
+
+struct XlnxCSUDMAClass {
+    SysBusDeviceClass parent_class;
+
+    /*
+     * read: Start a read transfer on a Xilinx CSU DMA engine
+     *
+     * @s: the Xilinx CSU DMA engine to start the transfer on
+     * @addr: the address to read
+     * @len: the number of bytes to read at 'addr'
+     *
+     * @return a MemTxResult indicating whether the operation succeeded ('len'
+     * bytes were read) or failed.
+     */
+    MemTxResult (*read)(XlnxCSUDMA *s, hwaddr addr, uint32_t len);
+};
+
+#endif
diff --git a/include/hw/dma/xlnx_dpdma.h b/include/hw/dma/xlnx_dpdma.h
new file mode 100644 (file)
index 0000000..40537a8
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * xlnx_dpdma.h
+ *
+ *  Copyright (C) 2015 : GreenSocs Ltd
+ *      http://www.greensocs.com/ , email: info@greensocs.com
+ *
+ *  Developed by :
+ *  Frederic Konrad   <fred.konrad@greensocs.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef XLNX_DPDMA_H
+#define XLNX_DPDMA_H
+
+#include "hw/sysbus.h"
+#include "ui/console.h"
+#include "sysemu/dma.h"
+#include "qom/object.h"
+
+#define XLNX_DPDMA_REG_ARRAY_SIZE (0x1000 >> 2)
+
+struct XlnxDPDMAState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    MemoryRegion iomem;
+    uint32_t registers[XLNX_DPDMA_REG_ARRAY_SIZE];
+    uint8_t *data[6];
+    bool operation_finished[6];
+    qemu_irq irq;
+};
+
+
+#define TYPE_XLNX_DPDMA "xlnx.dpdma"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxDPDMAState, XLNX_DPDMA)
+
+/*
+ * xlnx_dpdma_start_operation: Start the operation on the specified channel. The
+ *                             DPDMA gets the current descriptor and retrieves
+ *                             data to the buffer specified by
+ *                             dpdma_set_host_data_location().
+ *
+ * Returns The number of bytes transferred by the DPDMA
+ *         or 0 if an error occurred.
+ *
+ * @s The DPDMA state.
+ * @channel The channel to start.
+ */
+size_t xlnx_dpdma_start_operation(XlnxDPDMAState *s, uint8_t channel,
+                                  bool one_desc);
+
+/*
+ * xlnx_dpdma_set_host_data_location: Set the location in the host memory where
+ *                                    to store the data out from the dma
+ *                                    channel.
+ *
+ * @s The DPDMA state.
+ * @channel The channel associated to the pointer.
+ * @p The buffer where to store the data.
+ */
+/* XXX: add a maximum size arg and send an interrupt in case of overflow. */
+void xlnx_dpdma_set_host_data_location(XlnxDPDMAState *s, uint8_t channel,
+                                       void *p);
+
+/*
+ * xlnx_dpdma_trigger_vsync_irq: Trigger a VSYNC IRQ when the display is
+ *                               updated.
+ *
+ * @s The DPDMA state.
+ */
+void xlnx_dpdma_trigger_vsync_irq(XlnxDPDMAState *s);
+
+#endif /* XLNX_DPDMA_H */
index 6fdff3dced59d13e10e40ad936c98917877a452d..9c35d1b9da6c36198b6f49e99db6eb49cdfae058 100644 (file)
@@ -1,30 +1,30 @@
 static void glue(bswap_ehdr, SZ)(struct elfhdr *ehdr)
 {
-    bswap16s(&ehdr->e_type);                   /* Object file type */
-    bswap16s(&ehdr->e_machine);                /* Architecture */
-    bswap32s(&ehdr->e_version);                /* Object file version */
-    bswapSZs(&ehdr->e_entry);          /* Entry point virtual address */
-    bswapSZs(&ehdr->e_phoff);          /* Program header table file offset */
-    bswapSZs(&ehdr->e_shoff);          /* Section header table file offset */
-    bswap32s(&ehdr->e_flags);          /* Processor-specific flags */
-    bswap16s(&ehdr->e_ehsize);         /* ELF header size in bytes */
-    bswap16s(&ehdr->e_phentsize);              /* Program header table entry size */
-    bswap16s(&ehdr->e_phnum);          /* Program header table entry count */
-    bswap16s(&ehdr->e_shentsize);              /* Section header table entry size */
-    bswap16s(&ehdr->e_shnum);          /* Section header table entry count */
-    bswap16s(&ehdr->e_shstrndx);               /* Section header string table index */
+    bswap16s(&ehdr->e_type);      /* Object file type */
+    bswap16s(&ehdr->e_machine);   /* Architecture */
+    bswap32s(&ehdr->e_version);   /* Object file version */
+    bswapSZs(&ehdr->e_entry);     /* Entry point virtual address */
+    bswapSZs(&ehdr->e_phoff);     /* Program header table file offset */
+    bswapSZs(&ehdr->e_shoff);     /* Section header table file offset */
+    bswap32s(&ehdr->e_flags);     /* Processor-specific flags */
+    bswap16s(&ehdr->e_ehsize);    /* ELF header size in bytes */
+    bswap16s(&ehdr->e_phentsize); /* Program header table entry size */
+    bswap16s(&ehdr->e_phnum);     /* Program header table entry count */
+    bswap16s(&ehdr->e_shentsize); /* Section header table entry size */
+    bswap16s(&ehdr->e_shnum);     /* Section header table entry count */
+    bswap16s(&ehdr->e_shstrndx);  /* Section header string table index */
 }
 
 static void glue(bswap_phdr, SZ)(struct elf_phdr *phdr)
 {
-    bswap32s(&phdr->p_type);                   /* Segment type */
-    bswapSZs(&phdr->p_offset);         /* Segment file offset */
-    bswapSZs(&phdr->p_vaddr);          /* Segment virtual address */
-    bswapSZs(&phdr->p_paddr);          /* Segment physical address */
-    bswapSZs(&phdr->p_filesz);         /* Segment size in file */
-    bswapSZs(&phdr->p_memsz);          /* Segment size in memory */
-    bswap32s(&phdr->p_flags);          /* Segment flags */
-    bswapSZs(&phdr->p_align);          /* Segment alignment */
+    bswap32s(&phdr->p_type);   /* Segment type */
+    bswapSZs(&phdr->p_offset); /* Segment file offset */
+    bswapSZs(&phdr->p_vaddr);  /* Segment virtual address */
+    bswapSZs(&phdr->p_paddr);  /* Segment physical address */
+    bswapSZs(&phdr->p_filesz); /* Segment size in file */
+    bswapSZs(&phdr->p_memsz);  /* Segment size in memory */
+    bswap32s(&phdr->p_flags);  /* Segment flags */
+    bswapSZs(&phdr->p_align);  /* Segment alignment */
 }
 
 static void glue(bswap_shdr, SZ)(struct elf_shdr *shdr)
@@ -117,7 +117,7 @@ static void glue(load_symbols, SZ)(struct elfhdr *ehdr, int fd, int must_swab,
     shdr_table = load_at(fd, ehdr->e_shoff,
                          sizeof(struct elf_shdr) * ehdr->e_shnum);
     if (!shdr_table) {
-        return ;
+        return;
     }
 
     if (must_swab) {
@@ -312,26 +312,26 @@ static struct elf_note *glue(get_elf_note_type, SZ)(struct elf_note *nhdr,
     return nhdr;
 }
 
-static int glue(load_elf, SZ)(const char *name, int fd,
-                              uint64_t (*elf_note_fn)(void *, void *, bool),
-                              uint64_t (*translate_fn)(void *, uint64_t),
-                              void *translate_opaque,
-                              int must_swab, uint64_t *pentry,
-                              uint64_t *lowaddr, uint64_t *highaddr,
-                              uint32_t *pflags, int elf_machine,
-                              int clear_lsb, int data_swab,
-                              AddressSpace *as, bool load_rom,
-                              symbol_fn_t sym_cb)
+static ssize_t glue(load_elf, SZ)(const char *name, int fd,
+                                  uint64_t (*elf_note_fn)(void *, void *, bool),
+                                  uint64_t (*translate_fn)(void *, uint64_t),
+                                  void *translate_opaque,
+                                  int must_swab, uint64_t *pentry,
+                                  uint64_t *lowaddr, uint64_t *highaddr,
+                                  uint32_t *pflags, int elf_machine,
+                                  int clear_lsb, int data_swab,
+                                  AddressSpace *as, bool load_rom,
+                                  symbol_fn_t sym_cb)
 {
     struct elfhdr ehdr;
     struct elf_phdr *phdr = NULL, *ph;
-    int size, i, total_size;
+    int size, i;
+    ssize_t total_size;
     elf_word mem_size, file_size, data_offset;
     uint64_t addr, low = (uint64_t)-1, high = 0;
     GMappedFile *mapped_file = NULL;
     uint8_t *data = NULL;
-    char label[128];
-    int ret = ELF_LOAD_FAILED;
+    ssize_t ret = ELF_LOAD_FAILED;
 
     if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr))
         goto fail;
@@ -369,14 +369,6 @@ static int glue(load_elf, SZ)(const char *name, int fd,
                 }
             }
             break;
-        case EM_MOXIE:
-            if (ehdr.e_machine != EM_MOXIE) {
-                if (ehdr.e_machine != EM_MOXIE_OLD) {
-                    ret = ELF_LOAD_WRONG_ARCH;
-                    goto fail;
-                }
-            }
-            break;
         case EM_MIPS:
         case EM_NANOMIPS:
             if ((ehdr.e_machine != EM_MIPS) &&
@@ -393,10 +385,11 @@ static int glue(load_elf, SZ)(const char *name, int fd,
     }
 
     if (pflags) {
-        *pflags = (elf_word)ehdr.e_flags;
+        *pflags = ehdr.e_flags;
+    }
+    if (pentry) {
+        *pentry = ehdr.e_entry;
     }
-    if (pentry)
-        *pentry = (uint64_t)(elf_sword)ehdr.e_entry;
 
     glue(load_symbols, SZ)(&ehdr, fd, must_swab, clear_lsb, sym_cb);
 
@@ -418,7 +411,7 @@ static int glue(load_elf, SZ)(const char *name, int fd,
 
     /*
      * Since we want to be able to modify the mapped buffer, we set the
-     * 'writeble' parameter to 'true'. Modifications to the buffer are not
+     * 'writable' parameter to 'true'. Modifications to the buffer are not
      * written back to the file.
      */
     mapped_file = g_mapped_file_new_from_fd(fd, true, NULL);
@@ -491,7 +484,7 @@ static int glue(load_elf, SZ)(const char *name, int fd,
                 }
             }
 
-            if (mem_size > INT_MAX - total_size) {
+            if (mem_size > SSIZE_MAX - total_size) {
                 ret = ELF_LOAD_TOO_BIG;
                 goto fail;
             }
@@ -507,7 +500,7 @@ static int glue(load_elf, SZ)(const char *name, int fd,
             }
 
             if (data_swab) {
-                int j;
+                elf_word j;
                 for (j = 0; j < file_size; j += (1 << data_swab)) {
                     uint8_t *dp = data + j;
                     switch (data_swab) {
@@ -544,7 +537,9 @@ static int glue(load_elf, SZ)(const char *name, int fd,
              */
             if (mem_size != 0) {
                 if (load_rom) {
-                    snprintf(label, sizeof(label), "phdr #%d: %s", i, name);
+                    g_autofree char *label =
+                        g_strdup_printf("%s ELF program header segment %d",
+                                        name, i);
 
                     /*
                      * rom_add_elf_program() takes its own reference to
@@ -561,6 +556,19 @@ static int glue(load_elf, SZ)(const char *name, int fd,
                     if (res != MEMTX_OK) {
                         goto fail;
                     }
+                    /*
+                     * We need to zero'ify the space that is not copied
+                     * from file
+                     */
+                    if (file_size < mem_size) {
+                        res = address_space_set(as ? as : &address_space_memory,
+                                                addr + file_size, 0,
+                                                mem_size - file_size,
+                                                MEMTXATTRS_UNSPECIFIED);
+                        if (res != MEMTX_OK) {
+                            goto fail;
+                        }
+                    }
                 }
             }
 
@@ -597,18 +605,18 @@ static int glue(load_elf, SZ)(const char *name, int fd,
             nhdr = glue(get_elf_note_type, SZ)(nhdr, file_size, ph->p_align,
                                                *(uint64_t *)translate_opaque);
             if (nhdr != NULL) {
-                bool is64 =
-                    sizeof(struct elf_note) == sizeof(struct elf64_note);
-                elf_note_fn((void *)nhdr, (void *)&ph->p_align, is64);
+                elf_note_fn((void *)nhdr, (void *)&ph->p_align, SZ == 64);
             }
             data = NULL;
         }
     }
 
-    if (lowaddr)
-        *lowaddr = (uint64_t)(elf_sword)low;
-    if (highaddr)
-        *highaddr = (uint64_t)(elf_sword)high;
+    if (lowaddr) {
+        *lowaddr = low;
+    }
+    if (highaddr) {
+        *highaddr = high;
+    }
     ret = total_size;
  fail:
     if (mapped_file) {
diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h
new file mode 100644 (file)
index 0000000..7f3259a
--- /dev/null
@@ -0,0 +1,305 @@
+#ifndef QEMU_SMBIOS_H
+#define QEMU_SMBIOS_H
+
+#include "qapi/qapi-types-machine.h"
+
+/*
+ * SMBIOS Support
+ *
+ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors:
+ *  Alex Williamson <alex.williamson@hp.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+
+#define SMBIOS_MAX_TYPE 127
+#define offsetofend(TYPE, MEMBER) \
+       (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+
+/* memory area description, used by type 19 table */
+struct smbios_phys_mem_area {
+    uint64_t address;
+    uint64_t length;
+};
+
+/* SMBIOS Entry Point
+ * There are two types of entry points defined in the SMBIOS specification
+ * (see below). BIOS must place the entry point(s) at a 16-byte-aligned
+ * address between 0xf0000 and 0xfffff. Note that either entry point type
+ * can be used in a 64-bit target system, except that SMBIOS 2.1 entry point
+ * only allows the SMBIOS struct table to reside below 4GB address space.
+ */
+
+/* SMBIOS 2.1 (32-bit) Entry Point
+ *  - introduced since SMBIOS 2.1
+ *  - supports structure table below 4GB only
+ */
+struct smbios_21_entry_point {
+    uint8_t anchor_string[4];
+    uint8_t checksum;
+    uint8_t length;
+    uint8_t smbios_major_version;
+    uint8_t smbios_minor_version;
+    uint16_t max_structure_size;
+    uint8_t entry_point_revision;
+    uint8_t formatted_area[5];
+    uint8_t intermediate_anchor_string[5];
+    uint8_t intermediate_checksum;
+    uint16_t structure_table_length;
+    uint32_t structure_table_address;
+    uint16_t number_of_structures;
+    uint8_t smbios_bcd_revision;
+} QEMU_PACKED;
+
+/* SMBIOS 3.0 (64-bit) Entry Point
+ *  - introduced since SMBIOS 3.0
+ *  - supports structure table at 64-bit address space
+ */
+struct smbios_30_entry_point {
+    uint8_t anchor_string[5];
+    uint8_t checksum;
+    uint8_t length;
+    uint8_t smbios_major_version;
+    uint8_t smbios_minor_version;
+    uint8_t smbios_doc_rev;
+    uint8_t entry_point_revision;
+    uint8_t reserved;
+    uint32_t structure_table_max_size;
+    uint64_t structure_table_address;
+} QEMU_PACKED;
+
+typedef union {
+    struct smbios_21_entry_point ep21;
+    struct smbios_30_entry_point ep30;
+} QEMU_PACKED SmbiosEntryPoint;
+
+/* This goes at the beginning of every SMBIOS structure. */
+struct smbios_structure_header {
+    uint8_t type;
+    uint8_t length;
+    uint16_t handle;
+} QEMU_PACKED;
+
+/* SMBIOS type 0 - BIOS Information */
+struct smbios_type_0 {
+    struct smbios_structure_header header;
+    uint8_t vendor_str;
+    uint8_t bios_version_str;
+    uint16_t bios_starting_address_segment;
+    uint8_t bios_release_date_str;
+    uint8_t bios_rom_size;
+    uint64_t bios_characteristics;
+    uint8_t bios_characteristics_extension_bytes[2];
+    uint8_t system_bios_major_release;
+    uint8_t system_bios_minor_release;
+    uint8_t embedded_controller_major_release;
+    uint8_t embedded_controller_minor_release;
+} QEMU_PACKED;
+
+/* UUID encoding. The time_* fields are little-endian, as specified by SMBIOS
+ * version 2.6.
+ */
+struct smbios_uuid {
+    uint32_t time_low;
+    uint16_t time_mid;
+    uint16_t time_hi_and_version;
+    uint8_t clock_seq_hi_and_reserved;
+    uint8_t clock_seq_low;
+    uint8_t node[6];
+} QEMU_PACKED;
+
+/* SMBIOS type 1 - System Information */
+struct smbios_type_1 {
+    struct smbios_structure_header header;
+    uint8_t manufacturer_str;
+    uint8_t product_name_str;
+    uint8_t version_str;
+    uint8_t serial_number_str;
+    struct smbios_uuid uuid;
+    uint8_t wake_up_type;
+    uint8_t sku_number_str;
+    uint8_t family_str;
+} QEMU_PACKED;
+
+/* SMBIOS type 2 - Base Board */
+struct smbios_type_2 {
+    struct smbios_structure_header header;
+    uint8_t manufacturer_str;
+    uint8_t product_str;
+    uint8_t version_str;
+    uint8_t serial_number_str;
+    uint8_t asset_tag_number_str;
+    uint8_t feature_flags;
+    uint8_t location_str;
+    uint16_t chassis_handle;
+    uint8_t board_type;
+    uint8_t contained_element_count;
+    /* contained elements follow */
+} QEMU_PACKED;
+
+/* SMBIOS type 3 - System Enclosure (v2.7) */
+struct smbios_type_3 {
+    struct smbios_structure_header header;
+    uint8_t manufacturer_str;
+    uint8_t type;
+    uint8_t version_str;
+    uint8_t serial_number_str;
+    uint8_t asset_tag_number_str;
+    uint8_t boot_up_state;
+    uint8_t power_supply_state;
+    uint8_t thermal_state;
+    uint8_t security_status;
+    uint32_t oem_defined;
+    uint8_t height;
+    uint8_t number_of_power_cords;
+    uint8_t contained_element_count;
+    uint8_t contained_element_record_length;
+    uint8_t sku_number_str;
+    /* contained elements follow */
+} QEMU_PACKED;
+
+/* SMBIOS type 4 - Processor Information (v2.6) */
+struct smbios_type_4 {
+    struct smbios_structure_header header;
+    uint8_t socket_designation_str;
+    uint8_t processor_type;
+    uint8_t processor_family;
+    uint8_t processor_manufacturer_str;
+    uint32_t processor_id[2];
+    uint8_t processor_version_str;
+    uint8_t voltage;
+    uint16_t external_clock;
+    uint16_t max_speed;
+    uint16_t current_speed;
+    uint8_t status;
+    uint8_t processor_upgrade;
+    uint16_t l1_cache_handle;
+    uint16_t l2_cache_handle;
+    uint16_t l3_cache_handle;
+    uint8_t serial_number_str;
+    uint8_t asset_tag_number_str;
+    uint8_t part_number_str;
+    uint8_t core_count;
+    uint8_t core_enabled;
+    uint8_t thread_count;
+    uint16_t processor_characteristics;
+    uint16_t processor_family2;
+    /* SMBIOS spec 3.0.0, Table 21 */
+    uint16_t core_count2;
+    uint16_t core_enabled2;
+    uint16_t thread_count2;
+} QEMU_PACKED;
+
+typedef enum smbios_type_4_len_ver {
+    SMBIOS_TYPE_4_LEN_V28 = offsetofend(struct smbios_type_4,
+                                        processor_family2),
+    SMBIOS_TYPE_4_LEN_V30 = offsetofend(struct smbios_type_4, thread_count2),
+} smbios_type_4_len_ver;
+
+/* SMBIOS type 8 - Port Connector Information */
+struct smbios_type_8 {
+    struct smbios_structure_header header;
+    uint8_t internal_reference_str;
+    uint8_t internal_connector_type;
+    uint8_t external_reference_str;
+    uint8_t external_connector_type;
+    uint8_t port_type;
+} QEMU_PACKED;
+
+/* SMBIOS type 11 - OEM strings */
+struct smbios_type_11 {
+    struct smbios_structure_header header;
+    uint8_t count;
+} QEMU_PACKED;
+
+/* SMBIOS type 16 - Physical Memory Array (v2.7) */
+struct smbios_type_16 {
+    struct smbios_structure_header header;
+    uint8_t location;
+    uint8_t use;
+    uint8_t error_correction;
+    uint32_t maximum_capacity;
+    uint16_t memory_error_information_handle;
+    uint16_t number_of_memory_devices;
+    uint64_t extended_maximum_capacity;
+} QEMU_PACKED;
+
+/* SMBIOS type 17 - Memory Device (v2.8) */
+struct smbios_type_17 {
+    struct smbios_structure_header header;
+    uint16_t physical_memory_array_handle;
+    uint16_t memory_error_information_handle;
+    uint16_t total_width;
+    uint16_t data_width;
+    uint16_t size;
+    uint8_t form_factor;
+    uint8_t device_set;
+    uint8_t device_locator_str;
+    uint8_t bank_locator_str;
+    uint8_t memory_type;
+    uint16_t type_detail;
+    uint16_t speed;
+    uint8_t manufacturer_str;
+    uint8_t serial_number_str;
+    uint8_t asset_tag_number_str;
+    uint8_t part_number_str;
+    uint8_t attributes;
+    uint32_t extended_size;
+    uint16_t configured_clock_speed;
+    uint16_t minimum_voltage;
+    uint16_t maximum_voltage;
+    uint16_t configured_voltage;
+} QEMU_PACKED;
+
+/* SMBIOS type 19 - Memory Array Mapped Address (v2.7) */
+struct smbios_type_19 {
+    struct smbios_structure_header header;
+    uint32_t starting_address;
+    uint32_t ending_address;
+    uint16_t memory_array_handle;
+    uint8_t partition_width;
+    uint64_t extended_starting_address;
+    uint64_t extended_ending_address;
+} QEMU_PACKED;
+
+/* SMBIOS type 32 - System Boot Information */
+struct smbios_type_32 {
+    struct smbios_structure_header header;
+    uint8_t reserved[6];
+    uint8_t boot_status;
+} QEMU_PACKED;
+
+/* SMBIOS type 41 - Onboard Devices Extended Information */
+struct smbios_type_41 {
+    struct smbios_structure_header header;
+    uint8_t reference_designation_str;
+    uint8_t device_type;
+    uint8_t device_type_instance;
+    uint16_t segment_group_number;
+    uint8_t bus_number;
+    uint8_t device_number;
+} QEMU_PACKED;
+
+/* SMBIOS type 127 -- End-of-table */
+struct smbios_type_127 {
+    struct smbios_structure_header header;
+} QEMU_PACKED;
+
+void smbios_entry_add(QemuOpts *opts, Error **errp);
+void smbios_set_cpuid(uint32_t version, uint32_t features);
+void smbios_set_defaults(const char *manufacturer, const char *product,
+                         const char *version, bool legacy_mode,
+                         bool uuid_encoded, SmbiosEntryPointType ep_type);
+uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length);
+void smbios_get_tables(MachineState *ms,
+                       const struct smbios_phys_mem_area *mem_array,
+                       const unsigned int mem_array_size,
+                       uint8_t **tables, size_t *tables_len,
+                       uint8_t **anchor, size_t *anchor_len,
+                       Error **errp);
+#endif /* QEMU_SMBIOS_H */
diff --git a/include/hw/gpio/aspeed_gpio.h b/include/hw/gpio/aspeed_gpio.h
new file mode 100644 (file)
index 0000000..904eecf
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ *  ASPEED GPIO Controller
+ *
+ *  Copyright (C) 2017-2018 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef ASPEED_GPIO_H
+#define ASPEED_GPIO_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_ASPEED_GPIO "aspeed.gpio"
+OBJECT_DECLARE_TYPE(AspeedGPIOState, AspeedGPIOClass, ASPEED_GPIO)
+
+#define ASPEED_GPIO_MAX_NR_SETS 8
+#define ASPEED_GPIOS_PER_SET 32
+#define ASPEED_REGS_PER_BANK 14
+#define ASPEED_GPIO_MAX_NR_REGS (ASPEED_REGS_PER_BANK * ASPEED_GPIO_MAX_NR_SETS)
+#define ASPEED_GROUPS_PER_SET 4
+#define ASPEED_GPIO_NR_DEBOUNCE_REGS 3
+#define ASPEED_CHARS_PER_GROUP_LABEL 4
+
+typedef struct GPIOSets GPIOSets;
+
+typedef struct GPIOSetProperties {
+    uint32_t input;
+    uint32_t output;
+    char group_label[ASPEED_GROUPS_PER_SET][ASPEED_CHARS_PER_GROUP_LABEL];
+} GPIOSetProperties;
+
+enum GPIORegType {
+    gpio_not_a_reg,
+    gpio_reg_data_value,
+    gpio_reg_direction,
+    gpio_reg_int_enable,
+    gpio_reg_int_sens_0,
+    gpio_reg_int_sens_1,
+    gpio_reg_int_sens_2,
+    gpio_reg_int_status,
+    gpio_reg_reset_tolerant,
+    gpio_reg_debounce_1,
+    gpio_reg_debounce_2,
+    gpio_reg_cmd_source_0,
+    gpio_reg_cmd_source_1,
+    gpio_reg_data_read,
+    gpio_reg_input_mask,
+};
+
+/* GPIO index mode */
+enum GPIORegIndexType {
+    gpio_reg_idx_data = 0,
+    gpio_reg_idx_direction,
+    gpio_reg_idx_interrupt,
+    gpio_reg_idx_debounce,
+    gpio_reg_idx_tolerance,
+    gpio_reg_idx_cmd_src,
+    gpio_reg_idx_input_mask,
+    gpio_reg_idx_reserved,
+    gpio_reg_idx_new_w_cmd_src,
+    gpio_reg_idx_new_r_cmd_src,
+};
+
+typedef struct AspeedGPIOReg {
+    uint16_t set_idx;
+    enum GPIORegType type;
+} AspeedGPIOReg;
+
+struct AspeedGPIOClass {
+    SysBusDevice parent_obj;
+    const GPIOSetProperties *props;
+    uint32_t nr_gpio_pins;
+    uint32_t nr_gpio_sets;
+    const AspeedGPIOReg *reg_table;
+};
+
+struct AspeedGPIOState {
+    /* <private> */
+    SysBusDevice parent;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    int pending;
+    qemu_irq irq;
+    qemu_irq gpios[ASPEED_GPIO_MAX_NR_SETS][ASPEED_GPIOS_PER_SET];
+
+/* Parallel GPIO Registers */
+    uint32_t debounce_regs[ASPEED_GPIO_NR_DEBOUNCE_REGS];
+    struct GPIOSets {
+        uint32_t data_value; /* Reflects pin values */
+        uint32_t data_read; /* Contains last value written to data value */
+        uint32_t direction;
+        uint32_t int_enable;
+        uint32_t int_sens_0;
+        uint32_t int_sens_1;
+        uint32_t int_sens_2;
+        uint32_t int_status;
+        uint32_t reset_tol;
+        uint32_t cmd_source_0;
+        uint32_t cmd_source_1;
+        uint32_t debounce_1;
+        uint32_t debounce_2;
+        uint32_t input_mask;
+    } sets[ASPEED_GPIO_MAX_NR_SETS];
+};
+
+#endif /* ASPEED_GPIO_H */
diff --git a/include/hw/gpio/bcm2835_gpio.h b/include/hw/gpio/bcm2835_gpio.h
new file mode 100644 (file)
index 0000000..1c53a05
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Raspberry Pi (BCM2835) GPIO Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_GPIO_H
+#define BCM2835_GPIO_H
+
+#include "hw/sd/sd.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+struct BCM2835GpioState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+
+    /* SDBus selector */
+    SDBus sdbus;
+    SDBus *sdbus_sdhci;
+    SDBus *sdbus_sdhost;
+
+    uint8_t fsel[54];
+    uint32_t lev0, lev1;
+    uint8_t sd_fsel;
+    qemu_irq out[54];
+};
+
+#define TYPE_BCM2835_GPIO "bcm2835_gpio"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835GpioState, BCM2835_GPIO)
+
+#endif
diff --git a/include/hw/gpio/imx_gpio.h b/include/hw/gpio/imx_gpio.h
new file mode 100644 (file)
index 0000000..227860b
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * i.MX processors GPIO registers definition.
+ *
+ * Copyright (C) 2015 Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef IMX_GPIO_H
+#define IMX_GPIO_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_IMX_GPIO "imx.gpio"
+OBJECT_DECLARE_SIMPLE_TYPE(IMXGPIOState, IMX_GPIO)
+
+#define IMX_GPIO_MEM_SIZE 0x20
+
+/* i.MX GPIO memory map */
+#define DR_ADDR             0x00 /* DATA REGISTER */
+#define GDIR_ADDR           0x04 /* DIRECTION REGISTER */
+#define PSR_ADDR            0x08 /* PAD STATUS REGISTER */
+#define ICR1_ADDR           0x0c /* INTERRUPT CONFIGURATION REGISTER 1 */
+#define ICR2_ADDR           0x10 /* INTERRUPT CONFIGURATION REGISTER 2 */
+#define IMR_ADDR            0x14 /* INTERRUPT MASK REGISTER */
+#define ISR_ADDR            0x18 /* INTERRUPT STATUS REGISTER */
+#define EDGE_SEL_ADDR       0x1c /* EDGE SEL REGISTER */
+
+#define IMX_GPIO_PIN_COUNT 32
+
+struct IMXGPIOState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    uint32_t dr;
+    uint32_t gdir;
+    uint32_t psr;
+    uint64_t icr;
+    uint32_t imr;
+    uint32_t isr;
+    bool has_edge_sel;
+    uint32_t edge_sel;
+    bool has_upper_pin_irq;
+
+    qemu_irq irq[2];
+    qemu_irq output[IMX_GPIO_PIN_COUNT];
+};
+
+#endif /* IMX_GPIO_H */
diff --git a/include/hw/gpio/npcm7xx_gpio.h b/include/hw/gpio/npcm7xx_gpio.h
new file mode 100644 (file)
index 0000000..b1d771b
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Nuvoton NPCM7xx General Purpose Input / Output (GPIO)
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#ifndef NPCM7XX_GPIO_H
+#define NPCM7XX_GPIO_H
+
+#include "exec/memory.h"
+#include "hw/sysbus.h"
+
+/* Number of pins managed by each controller. */
+#define NPCM7XX_GPIO_NR_PINS (32)
+
+/*
+ * Number of registers in our device state structure. Don't change this without
+ * incrementing the version_id in the vmstate.
+ */
+#define NPCM7XX_GPIO_NR_REGS (0x80 / sizeof(uint32_t))
+
+typedef struct NPCM7xxGPIOState {
+    SysBusDevice parent;
+
+    /* Properties to be defined by the SoC */
+    uint32_t reset_pu;
+    uint32_t reset_pd;
+    uint32_t reset_osrc;
+    uint32_t reset_odsc;
+
+    MemoryRegion mmio;
+
+    qemu_irq irq;
+    qemu_irq output[NPCM7XX_GPIO_NR_PINS];
+
+    uint32_t pin_level;
+    uint32_t ext_level;
+    uint32_t ext_driven;
+
+    uint32_t regs[NPCM7XX_GPIO_NR_REGS];
+} NPCM7xxGPIOState;
+
+#define TYPE_NPCM7XX_GPIO "npcm7xx-gpio"
+#define NPCM7XX_GPIO(obj) \
+    OBJECT_CHECK(NPCM7xxGPIOState, (obj), TYPE_NPCM7XX_GPIO)
+
+#endif /* NPCM7XX_GPIO_H */
diff --git a/include/hw/gpio/nrf51_gpio.h b/include/hw/gpio/nrf51_gpio.h
new file mode 100644 (file)
index 0000000..fcfa2ba
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * nRF51 System-on-Chip general purpose input/output register definition
+ *
+ * QEMU interface:
+ * + sysbus MMIO regions 0: GPIO registers
+ * + Unnamed GPIO inputs 0-31: Set tri-state input level for GPIO pin.
+ *   Level -1: Externally Disconnected/Floating; Pull-up/down will be regarded
+ *   Level 0: Input externally driven LOW
+ *   Level 1: Input externally driven HIGH
+ * + Unnamed GPIO outputs 0-31:
+ *   Level -1: Disconnected/Floating
+ *   Level 0: Driven LOW
+ *   Level 1: Driven HIGH
+ *
+ * Accuracy of the peripheral model:
+ * + The nRF51 GPIO output driver supports two modes, standard and high-current
+ *   mode. These different drive modes are not modeled and handled the same.
+ * + Pin SENSEing is not modeled/implemented.
+ *
+ * Copyright 2018 Steffen Görtz <contrib@steffen-goertz.de>
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+#ifndef NRF51_GPIO_H
+#define NRF51_GPIO_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+#define TYPE_NRF51_GPIO "nrf51_soc.gpio"
+OBJECT_DECLARE_SIMPLE_TYPE(NRF51GPIOState, NRF51_GPIO)
+
+#define NRF51_GPIO_PINS 32
+
+#define NRF51_GPIO_SIZE 0x1000
+
+#define NRF51_GPIO_REG_OUT          0x504
+#define NRF51_GPIO_REG_OUTSET       0x508
+#define NRF51_GPIO_REG_OUTCLR       0x50C
+#define NRF51_GPIO_REG_IN           0x510
+#define NRF51_GPIO_REG_DIR          0x514
+#define NRF51_GPIO_REG_DIRSET       0x518
+#define NRF51_GPIO_REG_DIRCLR       0x51C
+#define NRF51_GPIO_REG_CNF_START    0x700
+#define NRF51_GPIO_REG_CNF_END      0x77C
+
+#define NRF51_GPIO_PULLDOWN 1
+#define NRF51_GPIO_PULLUP 3
+
+struct NRF51GPIOState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mmio;
+    qemu_irq irq;
+
+    uint32_t out;
+    uint32_t in;
+    uint32_t in_mask;
+    uint32_t dir;
+    uint32_t cnf[NRF51_GPIO_PINS];
+
+    uint32_t old_out;
+    uint32_t old_out_connected;
+
+    qemu_irq output[NRF51_GPIO_PINS];
+    qemu_irq detect;
+};
+
+
+#endif
diff --git a/include/hw/gpio/sifive_gpio.h b/include/hw/gpio/sifive_gpio.h
new file mode 100644 (file)
index 0000000..fc53785
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * SiFive System-on-Chip general purpose input/output register definition
+ *
+ * Copyright 2019 AdaCore
+ *
+ * Base on nrf51_gpio.c:
+ *
+ * Copyright 2018 Steffen Görtz <contrib@steffen-goertz.de>
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef SIFIVE_GPIO_H
+#define SIFIVE_GPIO_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_SIFIVE_GPIO "sifive_soc.gpio"
+typedef struct SIFIVEGPIOState SIFIVEGPIOState;
+DECLARE_INSTANCE_CHECKER(SIFIVEGPIOState, SIFIVE_GPIO,
+                         TYPE_SIFIVE_GPIO)
+
+#define SIFIVE_GPIO_PINS 32
+
+#define SIFIVE_GPIO_SIZE 0x100
+
+#define SIFIVE_GPIO_REG_VALUE      0x000
+#define SIFIVE_GPIO_REG_INPUT_EN   0x004
+#define SIFIVE_GPIO_REG_OUTPUT_EN  0x008
+#define SIFIVE_GPIO_REG_PORT       0x00C
+#define SIFIVE_GPIO_REG_PUE        0x010
+#define SIFIVE_GPIO_REG_DS         0x014
+#define SIFIVE_GPIO_REG_RISE_IE    0x018
+#define SIFIVE_GPIO_REG_RISE_IP    0x01C
+#define SIFIVE_GPIO_REG_FALL_IE    0x020
+#define SIFIVE_GPIO_REG_FALL_IP    0x024
+#define SIFIVE_GPIO_REG_HIGH_IE    0x028
+#define SIFIVE_GPIO_REG_HIGH_IP    0x02C
+#define SIFIVE_GPIO_REG_LOW_IE     0x030
+#define SIFIVE_GPIO_REG_LOW_IP     0x034
+#define SIFIVE_GPIO_REG_IOF_EN     0x038
+#define SIFIVE_GPIO_REG_IOF_SEL    0x03C
+#define SIFIVE_GPIO_REG_OUT_XOR    0x040
+
+struct SIFIVEGPIOState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mmio;
+
+    qemu_irq irq[SIFIVE_GPIO_PINS];
+    qemu_irq output[SIFIVE_GPIO_PINS];
+
+    uint32_t value;             /* Actual value of the pin */
+    uint32_t input_en;
+    uint32_t output_en;
+    uint32_t port;              /* Pin value requested by the user */
+    uint32_t pue;
+    uint32_t ds;
+    uint32_t rise_ie;
+    uint32_t rise_ip;
+    uint32_t fall_ie;
+    uint32_t fall_ip;
+    uint32_t high_ie;
+    uint32_t high_ip;
+    uint32_t low_ie;
+    uint32_t low_ip;
+    uint32_t iof_en;
+    uint32_t iof_sel;
+    uint32_t out_xor;
+    uint32_t in;
+    uint32_t in_mask;
+
+    /* config */
+    uint32_t ngpio;
+};
+
+#endif /* SIFIVE_GPIO_H */
index e15f59c8b3c7646073e4067f298dbc88119b3867..a9840ed485404f9bdf6531bc563ca46242dc89c8 100644 (file)
@@ -48,6 +48,7 @@ typedef void (*hotplug_fn)(HotplugHandler *plug_handler,
  * @unplug: unplug callback.
  *          Used for device removal with devices that implement
  *          asynchronous and synchronous (surprise) removal.
+ * @is_hotpluggable_bus: called to check if bus/its parent allow hotplug on bus
  */
 struct HotplugHandlerClass {
     /* <private> */
@@ -58,6 +59,7 @@ struct HotplugHandlerClass {
     hotplug_fn plug;
     hotplug_fn unplug_request;
     hotplug_fn unplug;
+    bool (*is_hotpluggable_bus)(HotplugHandler *plug_handler, BusState *bus);
 };
 
 /**
index fc5301f29372e699c7bcd0767ca008af47679c74..045c1c8b09b3a20257b38f3aa74228524f95d7a8 100644 (file)
@@ -5,6 +5,6 @@
 #error Cannot include hw/hw.h from user emulation
 #endif
 
-void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+G_NORETURN void hw_error(const char *fmt, ...) G_GNUC_PRINTF(1, 2);
 
 #endif
diff --git a/include/hw/hyperv/dynmem-proto.h b/include/hw/hyperv/dynmem-proto.h
new file mode 100644 (file)
index 0000000..a657786
--- /dev/null
@@ -0,0 +1,423 @@
+#ifndef HW_HYPERV_DYNMEM_PROTO_H
+#define HW_HYPERV_DYNMEM_PROTO_H
+
+/*
+ * Hyper-V Dynamic Memory Protocol definitions
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * Based on drivers/hv/hv_balloon.c from Linux kernel:
+ * Copyright (c) 2012, Microsoft Corporation.
+ *
+ * Author: K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * Protocol versions. The low word is the minor version, the high word the major
+ * version.
+ *
+ * History:
+ * Initial version 1.0
+ * Changed to 0.1 on 2009/03/25
+ * Changes to 0.2 on 2009/05/14
+ * Changes to 0.3 on 2009/12/03
+ * Changed to 1.0 on 2011/04/05
+ * Changed to 2.0 on 2019/12/10
+ */
+
+#define DYNMEM_MAKE_VERSION(Major, Minor) ((uint32_t)(((Major) << 16) | (Minor)))
+#define DYNMEM_MAJOR_VERSION(Version) ((uint32_t)(Version) >> 16)
+#define DYNMEM_MINOR_VERSION(Version) ((uint32_t)(Version) & 0xff)
+
+enum {
+    DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
+    DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
+    DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0),
+
+    DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1,
+    DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2,
+    DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3,
+
+    DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10
+};
+
+
+
+/*
+ * Message Types
+ */
+
+enum dm_message_type {
+    /*
+     * Version 0.3
+     */
+    DM_ERROR = 0,
+    DM_VERSION_REQUEST = 1,
+    DM_VERSION_RESPONSE = 2,
+    DM_CAPABILITIES_REPORT = 3,
+    DM_CAPABILITIES_RESPONSE = 4,
+    DM_STATUS_REPORT = 5,
+    DM_BALLOON_REQUEST = 6,
+    DM_BALLOON_RESPONSE = 7,
+    DM_UNBALLOON_REQUEST = 8,
+    DM_UNBALLOON_RESPONSE = 9,
+    DM_MEM_HOT_ADD_REQUEST = 10,
+    DM_MEM_HOT_ADD_RESPONSE = 11,
+    DM_VERSION_03_MAX = 11,
+    /*
+     * Version 1.0.
+     */
+    DM_INFO_MESSAGE = 12,
+    DM_VERSION_1_MAX = 12,
+
+    /*
+     * Version 2.0
+     */
+    DM_MEM_HOT_REMOVE_REQUEST = 13,
+    DM_MEM_HOT_REMOVE_RESPONSE = 14
+};
+
+
+/*
+ * Structures defining the dynamic memory management
+ * protocol.
+ */
+
+union dm_version {
+    struct {
+        uint16_t minor_version;
+        uint16_t major_version;
+    };
+    uint32_t version;
+} QEMU_PACKED;
+
+
+union dm_caps {
+    struct {
+        uint64_t balloon:1;
+        uint64_t hot_add:1;
+        /*
+         * To support guests that may have alignment
+         * limitations on hot-add, the guest can specify
+         * its alignment requirements; a value of n
+         * represents an alignment of 2^n in mega bytes.
+         */
+        uint64_t hot_add_alignment:4;
+        uint64_t hot_remove:1;
+        uint64_t reservedz:57;
+    } cap_bits;
+    uint64_t caps;
+} QEMU_PACKED;
+
+union dm_mem_page_range {
+    struct  {
+        /*
+         * The PFN number of the first page in the range.
+         * 40 bits is the architectural limit of a PFN
+         * number for AMD64.
+         */
+        uint64_t start_page:40;
+        /*
+         * The number of pages in the range.
+         */
+        uint64_t page_cnt:24;
+    } finfo;
+    uint64_t  page_range;
+} QEMU_PACKED;
+
+
+
+/*
+ * The header for all dynamic memory messages:
+ *
+ * type: Type of the message.
+ * size: Size of the message in bytes; including the header.
+ * trans_id: The guest is responsible for manufacturing this ID.
+ */
+
+struct dm_header {
+    uint16_t type;
+    uint16_t size;
+    uint32_t trans_id;
+} QEMU_PACKED;
+
+/*
+ * A generic message format for dynamic memory.
+ * Specific message formats are defined later in the file.
+ */
+
+struct dm_message {
+    struct dm_header hdr;
+    uint8_t data[]; /* enclosed message */
+} QEMU_PACKED;
+
+
+/*
+ * Specific message types supporting the dynamic memory protocol.
+ */
+
+/*
+ * Version negotiation message. Sent from the guest to the host.
+ * The guest is free to try different versions until the host
+ * accepts the version.
+ *
+ * dm_version: The protocol version requested.
+ * is_last_attempt: If TRUE, this is the last version guest will request.
+ * reservedz: Reserved field, set to zero.
+ */
+
+struct dm_version_request {
+    struct dm_header hdr;
+    union dm_version version;
+    uint32_t is_last_attempt:1;
+    uint32_t reservedz:31;
+} QEMU_PACKED;
+
+/*
+ * Version response message; Host to Guest and indicates
+ * if the host has accepted the version sent by the guest.
+ *
+ * is_accepted: If TRUE, host has accepted the version and the guest
+ * should proceed to the next stage of the protocol. FALSE indicates that
+ * guest should re-try with a different version.
+ *
+ * reservedz: Reserved field, set to zero.
+ */
+
+struct dm_version_response {
+    struct dm_header hdr;
+    uint64_t is_accepted:1;
+    uint64_t reservedz:63;
+} QEMU_PACKED;
+
+/*
+ * Message reporting capabilities. This is sent from the guest to the
+ * host.
+ */
+
+struct dm_capabilities {
+    struct dm_header hdr;
+    union dm_caps caps;
+    uint64_t min_page_cnt;
+    uint64_t max_page_number;
+} QEMU_PACKED;
+
+/*
+ * Response to the capabilities message. This is sent from the host to the
+ * guest. This message notifies if the host has accepted the guest's
+ * capabilities. If the host has not accepted, the guest must shutdown
+ * the service.
+ *
+ * is_accepted: Indicates if the host has accepted guest's capabilities.
+ * reservedz: Must be 0.
+ */
+
+struct dm_capabilities_resp_msg {
+    struct dm_header hdr;
+    uint64_t is_accepted:1;
+    uint64_t hot_remove:1;
+    uint64_t suppress_pressure_reports:1;
+    uint64_t reservedz:61;
+} QEMU_PACKED;
+
+/*
+ * This message is used to report memory pressure from the guest.
+ * This message is not part of any transaction and there is no
+ * response to this message.
+ *
+ * num_avail: Available memory in pages.
+ * num_committed: Committed memory in pages.
+ * page_file_size: The accumulated size of all page files
+ *                 in the system in pages.
+ * zero_free: The number of zero and free pages.
+ * page_file_writes: The writes to the page file in pages.
+ * io_diff: An indicator of file cache efficiency or page file activity,
+ *          calculated as File Cache Page Fault Count - Page Read Count.
+ *          This value is in pages.
+ *
+ * Some of these metrics are Windows specific and fortunately
+ * the algorithm on the host side that computes the guest memory
+ * pressure only uses num_committed value.
+ */
+
+struct dm_status {
+    struct dm_header hdr;
+    uint64_t num_avail;
+    uint64_t num_committed;
+    uint64_t page_file_size;
+    uint64_t zero_free;
+    uint32_t page_file_writes;
+    uint32_t io_diff;
+} QEMU_PACKED;
+
+
+/*
+ * Message to ask the guest to allocate memory - balloon up message.
+ * This message is sent from the host to the guest. The guest may not be
+ * able to allocate as much memory as requested.
+ *
+ * num_pages: number of pages to allocate.
+ */
+
+struct dm_balloon {
+    struct dm_header hdr;
+    uint32_t num_pages;
+    uint32_t reservedz;
+} QEMU_PACKED;
+
+
+/*
+ * Balloon response message; this message is sent from the guest
+ * to the host in response to the balloon message.
+ *
+ * reservedz: Reserved; must be set to zero.
+ * more_pages: If FALSE, this is the last message of the transaction.
+ * if TRUE there will be at least one more message from the guest.
+ *
+ * range_count: The number of ranges in the range array.
+ *
+ * range_array: An array of page ranges returned to the host.
+ *
+ */
+
+struct dm_balloon_response {
+    struct dm_header hdr;
+    uint32_t reservedz;
+    uint32_t more_pages:1;
+    uint32_t range_count:31;
+    union dm_mem_page_range range_array[];
+} QEMU_PACKED;
+
+/*
+ * Un-balloon message; this message is sent from the host
+ * to the guest to give guest more memory.
+ *
+ * more_pages: If FALSE, this is the last message of the transaction.
+ * if TRUE there will be at least one more message from the guest.
+ *
+ * reservedz: Reserved; must be set to zero.
+ *
+ * range_count: The number of ranges in the range array.
+ *
+ * range_array: An array of page ranges returned to the host.
+ *
+ */
+
+struct dm_unballoon_request {
+    struct dm_header hdr;
+    uint32_t more_pages:1;
+    uint32_t reservedz:31;
+    uint32_t range_count;
+    union dm_mem_page_range range_array[];
+} QEMU_PACKED;
+
+/*
+ * Un-balloon response message; this message is sent from the guest
+ * to the host in response to an unballoon request.
+ *
+ */
+
+struct dm_unballoon_response {
+    struct dm_header hdr;
+} QEMU_PACKED;
+
+
+/*
+ * Hot add request message. Message sent from the host to the guest.
+ *
+ * mem_range: Memory range to hot add.
+ *
+ */
+
+struct dm_hot_add {
+    struct dm_header hdr;
+    union dm_mem_page_range range;
+} QEMU_PACKED;
+
+/*
+ * Hot add response message.
+ * This message is sent by the guest to report the status of a hot add request.
+ * If page_count is less than the requested page count, then the host should
+ * assume all further hot add requests will fail, since this indicates that
+ * the guest has hit an upper physical memory barrier.
+ *
+ * Hot adds may also fail due to low resources; in this case, the guest must
+ * not complete this message until the hot add can succeed, and the host must
+ * not send a new hot add request until the response is sent.
+ * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS
+ * times it fails the request.
+ *
+ *
+ * page_count: number of pages that were successfully hot added.
+ *
+ * result: result of the operation 1: success, 0: failure.
+ *
+ */
+
+struct dm_hot_add_response {
+    struct dm_header hdr;
+    uint32_t page_count;
+    uint32_t result;
+} QEMU_PACKED;
+
+struct dm_hot_remove {
+    struct dm_header hdr;
+    uint32_t virtual_node;
+    uint32_t page_count;
+    uint32_t qos_flags;
+    uint32_t reservedZ;
+} QEMU_PACKED;
+
+struct dm_hot_remove_response {
+    struct dm_header hdr;
+    uint32_t result;
+    uint32_t range_count;
+    uint64_t more_pages:1;
+    uint64_t reservedz:63;
+    union dm_mem_page_range range_array[];
+} QEMU_PACKED;
+
+#define DM_REMOVE_QOS_LARGE (1 << 0)
+#define DM_REMOVE_QOS_LOCAL (1 << 1)
+#define DM_REMOVE_QOS_MASK (0x3)
+
+/*
+ * Types of information sent from host to the guest.
+ */
+
+enum dm_info_type {
+    INFO_TYPE_MAX_PAGE_CNT = 0,
+    MAX_INFO_TYPE
+};
+
+
+/*
+ * Header for the information message.
+ */
+
+struct dm_info_header {
+    enum dm_info_type type;
+    uint32_t data_size;
+    uint8_t  data[];
+} QEMU_PACKED;
+
+/*
+ * This message is sent from the host to the guest to pass
+ * some relevant information (win8 addition).
+ *
+ * reserved: no used.
+ * info_size: size of the information blob.
+ * info: information blob.
+ */
+
+struct dm_info_msg {
+    struct dm_header hdr;
+    uint32_t reserved;
+    uint32_t info_size;
+    uint8_t  info[];
+};
+
+#endif
diff --git a/include/hw/hyperv/hv-balloon.h b/include/hw/hyperv/hv-balloon.h
new file mode 100644 (file)
index 0000000..c1efe70
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * QEMU Hyper-V Dynamic Memory Protocol driver
+ *
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HV_BALLOON_H
+#define HW_HV_BALLOON_H
+
+#include "qom/object.h"
+
+#define TYPE_HV_BALLOON "hv-balloon"
+OBJECT_DECLARE_SIMPLE_TYPE(HvBalloon, HV_BALLOON)
+
+#endif
diff --git a/include/hw/hyperv/hyperv-proto.h b/include/hw/hyperv/hyperv-proto.h
new file mode 100644 (file)
index 0000000..4a22973
--- /dev/null
@@ -0,0 +1,182 @@
+/*
+ * Definitions for Hyper-V guest/hypervisor interaction
+ *
+ * Copyright (c) 2017-2018 Virtuozzo International GmbH.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HYPERV_PROTO_H
+#define HW_HYPERV_HYPERV_PROTO_H
+
+#include "qemu/bitmap.h"
+
+/*
+ * Hypercall status code
+ */
+#define HV_STATUS_SUCCESS                     0
+#define HV_STATUS_INVALID_HYPERCALL_CODE      2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT     3
+#define HV_STATUS_INVALID_ALIGNMENT           4
+#define HV_STATUS_INVALID_PARAMETER           5
+#define HV_STATUS_INSUFFICIENT_MEMORY         11
+#define HV_STATUS_INVALID_PORT_ID             17
+#define HV_STATUS_INVALID_CONNECTION_ID       18
+#define HV_STATUS_INSUFFICIENT_BUFFERS        19
+#define HV_STATUS_NOT_ACKNOWLEDGED            20
+#define HV_STATUS_NO_DATA                     27
+
+/*
+ * Hypercall numbers
+ */
+#define HV_POST_MESSAGE                       0x005c
+#define HV_SIGNAL_EVENT                       0x005d
+#define HV_POST_DEBUG_DATA                    0x0069
+#define HV_RETRIEVE_DEBUG_DATA                0x006a
+#define HV_RESET_DEBUG_SESSION                0x006b
+#define HV_HYPERCALL_FAST                     (1u << 16)
+
+/*
+ * Message size
+ */
+#define HV_MESSAGE_PAYLOAD_SIZE               240
+
+/*
+ * Message types
+ */
+#define HV_MESSAGE_NONE                       0x00000000
+#define HV_MESSAGE_VMBUS                      0x00000001
+#define HV_MESSAGE_UNMAPPED_GPA               0x80000000
+#define HV_MESSAGE_GPA_INTERCEPT              0x80000001
+#define HV_MESSAGE_TIMER_EXPIRED              0x80000010
+#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE  0x80000020
+#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION    0x80000021
+#define HV_MESSAGE_UNSUPPORTED_FEATURE        0x80000022
+#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE    0x80000040
+#define HV_MESSAGE_X64_IOPORT_INTERCEPT       0x80010000
+#define HV_MESSAGE_X64_MSR_INTERCEPT          0x80010001
+#define HV_MESSAGE_X64_CPUID_INTERCEPT        0x80010002
+#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT    0x80010003
+#define HV_MESSAGE_X64_APIC_EOI               0x80010004
+#define HV_MESSAGE_X64_LEGACY_FP_ERROR        0x80010005
+
+/*
+ * Message flags
+ */
+#define HV_MESSAGE_FLAG_PENDING               0x1
+
+/*
+ * Number of synthetic interrupts
+ */
+#define HV_SINT_COUNT                         16
+
+/*
+ * Event flags number per SINT
+ */
+#define HV_EVENT_FLAGS_COUNT                  (256 * 8)
+
+/*
+ * Connection id valid bits
+ */
+#define HV_CONNECTION_ID_MASK                 0x00ffffff
+
+/*
+ * Input structure for POST_MESSAGE hypercall
+ */
+struct hyperv_post_message_input {
+    uint32_t connection_id;
+    uint32_t _reserved;
+    uint32_t message_type;
+    uint32_t payload_size;
+    uint8_t  payload[HV_MESSAGE_PAYLOAD_SIZE];
+};
+
+/*
+ * Input structure for SIGNAL_EVENT hypercall
+ */
+struct hyperv_signal_event_input {
+    uint32_t connection_id;
+    uint16_t flag_number;
+    uint16_t _reserved_zero;
+};
+
+/*
+ * SynIC message structures
+ */
+struct hyperv_message_header {
+    uint32_t message_type;
+    uint8_t  payload_size;
+    uint8_t  message_flags; /* HV_MESSAGE_FLAG_XX */
+    uint8_t  _reserved[2];
+    uint64_t sender;
+};
+
+struct hyperv_message {
+    struct hyperv_message_header header;
+    uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE];
+};
+
+struct hyperv_message_page {
+    struct hyperv_message slot[HV_SINT_COUNT];
+};
+
+/*
+ * SynIC event flags structures
+ */
+struct hyperv_event_flags {
+    DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT);
+};
+
+struct hyperv_event_flags_page {
+    struct hyperv_event_flags slot[HV_SINT_COUNT];
+};
+
+/*
+ * Kernel debugger structures
+ */
+
+/* Options flags for hyperv_reset_debug_session */
+#define HV_DEBUG_PURGE_INCOMING_DATA        0x00000001
+#define HV_DEBUG_PURGE_OUTGOING_DATA        0x00000002
+struct hyperv_reset_debug_session_input {
+    uint32_t options;
+} __attribute__ ((__packed__));
+
+struct hyperv_reset_debug_session_output {
+    uint32_t host_ip;
+    uint32_t target_ip;
+    uint16_t host_port;
+    uint16_t target_port;
+    uint8_t host_mac[6];
+    uint8_t target_mac[6];
+} __attribute__ ((__packed__));
+
+/* Options for hyperv_post_debug_data */
+#define HV_DEBUG_POST_LOOP                  0x00000001
+
+struct hyperv_post_debug_data_input {
+    uint32_t count;
+    uint32_t options;
+    /*uint8_t data[HV_HYP_PAGE_SIZE - 2 * sizeof(uint32_t)];*/
+} __attribute__ ((__packed__));
+
+struct hyperv_post_debug_data_output {
+    uint32_t pending_count;
+} __attribute__ ((__packed__));
+
+/* Options for hyperv_retrieve_debug_data */
+#define HV_DEBUG_RETRIEVE_LOOP              0x00000001
+#define HV_DEBUG_RETRIEVE_TEST_ACTIVITY     0x00000002
+
+struct hyperv_retrieve_debug_data_input {
+    uint32_t count;
+    uint32_t options;
+    uint64_t timeout;
+} __attribute__ ((__packed__));
+
+struct hyperv_retrieve_debug_data_output {
+    uint32_t retrieved_count;
+    uint32_t remaining_count;
+} __attribute__ ((__packed__));
+#endif
diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h
new file mode 100644 (file)
index 0000000..015c352
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Hyper-V guest/hypervisor interaction
+ *
+ * Copyright (c) 2015-2018 Virtuozzo International GmbH.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_HYPERV_H
+#define HW_HYPERV_HYPERV_H
+
+#include "cpu-qom.h"
+#include "hw/hyperv/hyperv-proto.h"
+
+typedef struct HvSintRoute HvSintRoute;
+
+/*
+ * Callback executed in a bottom-half when the status of posting the message
+ * becomes known, before unblocking the connection for further messages
+ */
+typedef void (*HvSintMsgCb)(void *data, int status);
+
+HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
+                                   HvSintMsgCb cb, void *cb_data);
+void hyperv_sint_route_ref(HvSintRoute *sint_route);
+void hyperv_sint_route_unref(HvSintRoute *sint_route);
+
+int hyperv_sint_route_set_sint(HvSintRoute *sint_route);
+
+/*
+ * Submit a message to be posted in vcpu context.  If the submission succeeds,
+ * the status of posting the message is reported via the callback associated
+ * with the @sint_route; until then no more messages are accepted.
+ */
+int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg);
+/*
+ * Set event flag @eventno, and signal the SINT if the flag has changed.
+ */
+int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno);
+
+/*
+ * Handler for messages arriving from the guest via HV_POST_MESSAGE hypercall.
+ * Executed in vcpu context.
+ */
+typedef uint16_t (*HvMsgHandler)(const struct hyperv_post_message_input *msg,
+                                 void *data);
+/*
+ * Associate @handler with the message connection @conn_id, such that @handler
+ * is called with @data when the guest executes HV_POST_MESSAGE hypercall on
+ * @conn_id.  If @handler is NULL clear the association.
+ */
+int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data);
+/*
+ * Associate @notifier with the event connection @conn_id, such that @notifier
+ * is signaled when the guest executes HV_SIGNAL_EVENT hypercall on @conn_id.
+ * If @notifier is NULL clear the association.
+ */
+int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier);
+
+/*
+ * Process HV_POST_MESSAGE hypercall: parse the data in the guest memory as
+ * specified in @param, and call the HvMsgHandler associated with the
+ * connection on the message contained therein.
+ */
+uint16_t hyperv_hcall_post_message(uint64_t param, bool fast);
+/*
+ * Process HV_SIGNAL_EVENT hypercall: signal the EventNotifier associated with
+ * the connection as specified in @param.
+ */
+uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast);
+
+static inline uint32_t hyperv_vp_index(CPUState *cs)
+{
+    return cs->cpu_index;
+}
+
+void hyperv_synic_add(CPUState *cs);
+void hyperv_synic_reset(CPUState *cs);
+void hyperv_synic_update(CPUState *cs, bool enable,
+                         hwaddr msg_page_addr, hwaddr event_page_addr);
+bool hyperv_is_synic_enabled(void);
+
+/*
+ * Process HVCALL_RESET_DEBUG_SESSION hypercall.
+ */
+uint16_t hyperv_hcall_reset_dbg_session(uint64_t outgpa);
+/*
+ * Process HVCALL_RETREIVE_DEBUG_DATA hypercall.
+ */
+uint16_t hyperv_hcall_retreive_dbg_data(uint64_t ingpa, uint64_t outgpa,
+                                        bool fast);
+/*
+ * Process HVCALL_POST_DEBUG_DATA hypercall.
+ */
+uint16_t hyperv_hcall_post_dbg_data(uint64_t ingpa, uint64_t outgpa, bool fast);
+
+uint32_t hyperv_syndbg_send(uint64_t ingpa, uint32_t count);
+uint32_t hyperv_syndbg_recv(uint64_t ingpa, uint32_t count);
+void hyperv_syndbg_set_pending_page(uint64_t ingpa);
+uint64_t hyperv_syndbg_query_options(void);
+
+typedef enum HvSynthDbgMsgType {
+    HV_SYNDBG_MSG_CONNECTION_INFO,
+    HV_SYNDBG_MSG_SEND,
+    HV_SYNDBG_MSG_RECV,
+    HV_SYNDBG_MSG_SET_PENDING_PAGE,
+    HV_SYNDBG_MSG_QUERY_OPTIONS
+} HvDbgSynthMsgType;
+
+typedef struct HvSynDbgMsg {
+    HvDbgSynthMsgType type;
+    union {
+        struct {
+            uint32_t host_ip;
+            uint16_t host_port;
+        } connection_info;
+        struct {
+            uint64_t buf_gpa;
+            uint32_t count;
+            uint32_t pending_count;
+            bool is_raw;
+        } send;
+        struct {
+            uint64_t buf_gpa;
+            uint32_t count;
+            uint32_t options;
+            uint64_t timeout;
+            uint32_t retrieved_count;
+            bool is_raw;
+        } recv;
+        struct {
+            uint64_t buf_gpa;
+        } pending_page;
+        struct {
+            uint64_t options;
+        } query_options;
+    } u;
+} HvSynDbgMsg;
+typedef uint16_t (*HvSynDbgHandler)(void *context, HvSynDbgMsg *msg);
+void hyperv_set_syndbg_handler(HvSynDbgHandler handler, void *context);
+#endif
diff --git a/include/hw/hyperv/vmbus-bridge.h b/include/hw/hyperv/vmbus-bridge.h
new file mode 100644 (file)
index 0000000..1e54195
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * QEMU Hyper-V VMBus root bridge
+ *
+ * Copyright (c) 2017-2018 Virtuozzo International GmbH.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_VMBUS_BRIDGE_H
+#define HW_HYPERV_VMBUS_BRIDGE_H
+
+#include "hw/sysbus.h"
+#include "hw/hyperv/vmbus.h"
+#include "qom/object.h"
+
+#define TYPE_VMBUS_BRIDGE "vmbus-bridge"
+
+struct VMBusBridge {
+    SysBusDevice parent_obj;
+
+    uint8_t irq;
+
+    VMBus *bus;
+};
+
+OBJECT_DECLARE_SIMPLE_TYPE(VMBusBridge, VMBUS_BRIDGE)
+
+static inline VMBusBridge *vmbus_bridge_find(void)
+{
+    return VMBUS_BRIDGE(object_resolve_path_type("", TYPE_VMBUS_BRIDGE, NULL));
+}
+
+#endif
diff --git a/include/hw/hyperv/vmbus-proto.h b/include/hw/hyperv/vmbus-proto.h
new file mode 100644 (file)
index 0000000..4628d3b
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * QEMU Hyper-V VMBus support
+ *
+ * Copyright (c) 2017-2018 Virtuozzo International GmbH.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_VMBUS_PROTO_H
+#define HW_HYPERV_VMBUS_PROTO_H
+
+#define VMBUS_VERSION_WS2008                    ((0 << 16) | (13))
+#define VMBUS_VERSION_WIN7                      ((1 << 16) | (1))
+#define VMBUS_VERSION_WIN8                      ((2 << 16) | (4))
+#define VMBUS_VERSION_WIN8_1                    ((3 << 16) | (0))
+#define VMBUS_VERSION_WIN10                     ((4 << 16) | (0))
+#define VMBUS_VERSION_INVAL                     -1
+#define VMBUS_VERSION_CURRENT                   VMBUS_VERSION_WIN10
+
+#define VMBUS_MESSAGE_CONNECTION_ID             1
+#define VMBUS_EVENT_CONNECTION_ID               2
+#define VMBUS_MONITOR_CONNECTION_ID             3
+#define VMBUS_SINT                              2
+
+#define VMBUS_MSG_INVALID               0
+#define VMBUS_MSG_OFFERCHANNEL          1
+#define VMBUS_MSG_RESCIND_CHANNELOFFER  2
+#define VMBUS_MSG_REQUESTOFFERS         3
+#define VMBUS_MSG_ALLOFFERS_DELIVERED   4
+#define VMBUS_MSG_OPENCHANNEL           5
+#define VMBUS_MSG_OPENCHANNEL_RESULT    6
+#define VMBUS_MSG_CLOSECHANNEL          7
+#define VMBUS_MSG_GPADL_HEADER          8
+#define VMBUS_MSG_GPADL_BODY            9
+#define VMBUS_MSG_GPADL_CREATED         10
+#define VMBUS_MSG_GPADL_TEARDOWN        11
+#define VMBUS_MSG_GPADL_TORNDOWN        12
+#define VMBUS_MSG_RELID_RELEASED        13
+#define VMBUS_MSG_INITIATE_CONTACT      14
+#define VMBUS_MSG_VERSION_RESPONSE      15
+#define VMBUS_MSG_UNLOAD                16
+#define VMBUS_MSG_UNLOAD_RESPONSE       17
+#define VMBUS_MSG_COUNT                 18
+
+#define VMBUS_MESSAGE_SIZE_ALIGN        sizeof(uint64_t)
+
+#define VMBUS_PACKET_INVALID                    0x0
+#define VMBUS_PACKET_SYNCH                      0x1
+#define VMBUS_PACKET_ADD_XFER_PAGESET           0x2
+#define VMBUS_PACKET_RM_XFER_PAGESET            0x3
+#define VMBUS_PACKET_ESTABLISH_GPADL            0x4
+#define VMBUS_PACKET_TEARDOWN_GPADL             0x5
+#define VMBUS_PACKET_DATA_INBAND                0x6
+#define VMBUS_PACKET_DATA_USING_XFER_PAGES      0x7
+#define VMBUS_PACKET_DATA_USING_GPADL           0x8
+#define VMBUS_PACKET_DATA_USING_GPA_DIRECT      0x9
+#define VMBUS_PACKET_CANCEL_REQUEST             0xa
+#define VMBUS_PACKET_COMP                       0xb
+#define VMBUS_PACKET_DATA_USING_ADDITIONAL_PKT  0xc
+#define VMBUS_PACKET_ADDITIONAL_DATA            0xd
+
+#define VMBUS_CHANNEL_USER_DATA_SIZE            120
+
+#define VMBUS_OFFER_MONITOR_ALLOCATED           0x1
+#define VMBUS_OFFER_INTERRUPT_DEDICATED         0x1
+
+#define VMBUS_RING_BUFFER_FEAT_PENDING_SZ       (1ul << 0)
+
+#define VMBUS_CHANNEL_ENUMERATE_DEVICE_INTERFACE      0x1
+#define VMBUS_CHANNEL_SERVER_SUPPORTS_TRANSFER_PAGES  0x2
+#define VMBUS_CHANNEL_SERVER_SUPPORTS_GPADLS          0x4
+#define VMBUS_CHANNEL_NAMED_PIPE_MODE                 0x10
+#define VMBUS_CHANNEL_LOOPBACK_OFFER                  0x100
+#define VMBUS_CHANNEL_PARENT_OFFER                    0x200
+#define VMBUS_CHANNEL_REQUEST_MONITORED_NOTIFICATION  0x400
+#define VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER            0x2000
+
+#define VMBUS_PACKET_FLAG_REQUEST_COMPLETION    1
+
+typedef struct vmbus_message_header {
+    uint32_t message_type;
+    uint32_t _padding;
+} vmbus_message_header;
+
+typedef struct vmbus_message_initiate_contact {
+    vmbus_message_header header;
+    uint32_t version_requested;
+    uint32_t target_vcpu;
+    uint64_t interrupt_page;
+    uint64_t monitor_page1;
+    uint64_t monitor_page2;
+} vmbus_message_initiate_contact;
+
+typedef struct vmbus_message_version_response {
+    vmbus_message_header header;
+    uint8_t version_supported;
+    uint8_t status;
+} vmbus_message_version_response;
+
+typedef struct vmbus_message_offer_channel {
+    vmbus_message_header header;
+    uint8_t  type_uuid[16];
+    uint8_t  instance_uuid[16];
+    uint64_t _reserved1;
+    uint64_t _reserved2;
+    uint16_t channel_flags;
+    uint16_t mmio_size_mb;
+    uint8_t  user_data[VMBUS_CHANNEL_USER_DATA_SIZE];
+    uint16_t sub_channel_index;
+    uint16_t _reserved3;
+    uint32_t child_relid;
+    uint8_t  monitor_id;
+    uint8_t  monitor_flags;
+    uint16_t interrupt_flags;
+    uint32_t connection_id;
+} vmbus_message_offer_channel;
+
+typedef struct vmbus_message_rescind_channel_offer {
+    vmbus_message_header header;
+    uint32_t child_relid;
+} vmbus_message_rescind_channel_offer;
+
+typedef struct vmbus_gpa_range {
+    uint32_t byte_count;
+    uint32_t byte_offset;
+    uint64_t pfn_array[];
+} vmbus_gpa_range;
+
+typedef struct vmbus_message_gpadl_header {
+    vmbus_message_header header;
+    uint32_t child_relid;
+    uint32_t gpadl_id;
+    uint16_t range_buflen;
+    uint16_t rangecount;
+    vmbus_gpa_range range[];
+} QEMU_PACKED vmbus_message_gpadl_header;
+
+typedef struct vmbus_message_gpadl_body {
+    vmbus_message_header header;
+    uint32_t message_number;
+    uint32_t gpadl_id;
+    uint64_t pfn_array[];
+} vmbus_message_gpadl_body;
+
+typedef struct vmbus_message_gpadl_created {
+    vmbus_message_header header;
+    uint32_t child_relid;
+    uint32_t gpadl_id;
+    uint32_t status;
+} vmbus_message_gpadl_created;
+
+typedef struct vmbus_message_gpadl_teardown {
+    vmbus_message_header header;
+    uint32_t child_relid;
+    uint32_t gpadl_id;
+} vmbus_message_gpadl_teardown;
+
+typedef struct vmbus_message_gpadl_torndown {
+    vmbus_message_header header;
+    uint32_t gpadl_id;
+} vmbus_message_gpadl_torndown;
+
+typedef struct vmbus_message_open_channel {
+    vmbus_message_header header;
+    uint32_t child_relid;
+    uint32_t open_id;
+    uint32_t ring_buffer_gpadl_id;
+    uint32_t target_vp;
+    uint32_t ring_buffer_offset;
+    uint8_t  user_data[VMBUS_CHANNEL_USER_DATA_SIZE];
+} vmbus_message_open_channel;
+
+typedef struct vmbus_message_open_result {
+    vmbus_message_header header;
+    uint32_t child_relid;
+    uint32_t open_id;
+    uint32_t status;
+} vmbus_message_open_result;
+
+typedef struct vmbus_message_close_channel {
+    vmbus_message_header header;
+    uint32_t child_relid;
+} vmbus_message_close_channel;
+
+typedef struct vmbus_ring_buffer {
+    uint32_t write_index;
+    uint32_t read_index;
+    uint32_t interrupt_mask;
+    uint32_t pending_send_sz;
+    uint32_t _reserved1[12];
+    uint32_t feature_bits;
+} vmbus_ring_buffer;
+
+typedef struct vmbus_packet_hdr {
+    uint16_t type;
+    uint16_t offset_qwords;
+    uint16_t len_qwords;
+    uint16_t flags;
+    uint64_t transaction_id;
+} vmbus_packet_hdr;
+
+typedef struct vmbus_pkt_gpa_direct {
+    uint32_t _reserved;
+    uint32_t rangecount;
+    vmbus_gpa_range range[];
+} vmbus_pkt_gpa_direct;
+
+typedef struct vmbus_xferpg_range {
+    uint32_t byte_count;
+    uint32_t byte_offset;
+} vmbus_xferpg_range;
+
+typedef struct vmbus_pkt_xferpg {
+    uint16_t buffer_id;
+    uint8_t sender_owns_set;
+    uint8_t _reserved;
+    uint32_t rangecount;
+    vmbus_xferpg_range range[];
+} vmbus_pkt_xferpg;
+
+#endif
diff --git a/include/hw/hyperv/vmbus.h b/include/hw/hyperv/vmbus.h
new file mode 100644 (file)
index 0000000..5c50585
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+ * QEMU Hyper-V VMBus
+ *
+ * Copyright (c) 2017-2018 Virtuozzo International GmbH.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_HYPERV_VMBUS_H
+#define HW_HYPERV_VMBUS_H
+
+#include "sysemu/sysemu.h"
+#include "sysemu/dma.h"
+#include "hw/qdev-core.h"
+#include "migration/vmstate.h"
+#include "hw/hyperv/vmbus-proto.h"
+#include "qemu/uuid.h"
+#include "qom/object.h"
+
+#define TYPE_VMBUS_DEVICE "vmbus-dev"
+
+OBJECT_DECLARE_TYPE(VMBusDevice, VMBusDeviceClass,
+                    VMBUS_DEVICE)
+
+#define TYPE_VMBUS "vmbus"
+OBJECT_DECLARE_SIMPLE_TYPE(VMBus, VMBUS)
+
+/*
+ * Object wrapping a GPADL -- GPA Descriptor List -- an array of guest physical
+ * pages, to be used for various buffers shared between the host and the guest.
+ */
+typedef struct VMBusGpadl VMBusGpadl;
+/*
+ * VMBus channel -- a pair of ring buffers for either direction, placed within
+ * one GPADL, and the associated notification means.
+ */
+typedef struct VMBusChannel VMBusChannel;
+/*
+ * Base class for VMBus devices.  Includes one or more channels.  Identified by
+ * class GUID and instance GUID.
+ */
+
+typedef void(*VMBusChannelNotifyCb)(struct VMBusChannel *chan);
+
+struct VMBusDeviceClass {
+    DeviceClass parent;
+
+    QemuUUID classid;
+    QemuUUID instanceid;     /* Fixed UUID for singleton devices */
+    uint16_t channel_flags;
+    uint16_t mmio_size_mb;
+
+    /* Extensions to standard device callbacks */
+    void (*vmdev_realize)(VMBusDevice *vdev, Error **errp);
+    void (*vmdev_unrealize)(VMBusDevice *vdev);
+    void (*vmdev_reset)(VMBusDevice *vdev);
+    /*
+     * Calculate the number of channels based on the device properties.  Called
+     * at realize time.
+     **/
+    uint16_t (*num_channels)(VMBusDevice *vdev);
+    /*
+     * Device-specific actions to complete the otherwise successful process of
+     * opening a channel.
+     * Return 0 on success, -errno on failure.
+     */
+    int (*open_channel)(VMBusChannel *chan);
+    /*
+     * Device-specific actions to perform before closing a channel.
+     */
+    void (*close_channel)(VMBusChannel *chan);
+    /*
+     * Main device worker; invoked in response to notifications from either
+     * side, when there's work to do with the data in the channel ring buffers.
+     */
+    VMBusChannelNotifyCb chan_notify_cb;
+};
+
+struct VMBusDevice {
+    DeviceState parent;
+    QemuUUID instanceid;
+    uint16_t num_channels;
+    VMBusChannel *channels;
+    AddressSpace *dma_as;
+};
+
+extern const VMStateDescription vmstate_vmbus_dev;
+
+/*
+ * A unit of work parsed out of a message in the receive (i.e. guest->host)
+ * ring buffer of a channel.  It's supposed to be subclassed (through
+ * embedding) by the specific devices.
+ */
+typedef struct VMBusChanReq {
+    VMBusChannel *chan;
+    uint16_t pkt_type;
+    uint32_t msglen;
+    void *msg;
+    uint64_t transaction_id;
+    bool need_comp;
+    QEMUSGList sgl;
+} VMBusChanReq;
+
+VMBusDevice *vmbus_channel_device(VMBusChannel *chan);
+VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx);
+uint32_t vmbus_channel_idx(VMBusChannel *chan);
+bool vmbus_channel_is_open(VMBusChannel *chan);
+
+/*
+ * Notify (on guest's behalf) the host side of the channel that there's data in
+ * the ringbuffer to process.
+ */
+void vmbus_channel_notify_host(VMBusChannel *chan);
+
+/*
+ * Reserve space for a packet in the send (i.e. host->guest) ringbuffer.  If
+ * there isn't enough room, indicate that to the guest, to be notified when it
+ * becomes available.
+ * Return 0 on success, negative errno on failure.
+ * The ringbuffer indices are NOT updated, the requested space indicator may.
+ */
+int vmbus_channel_reserve(VMBusChannel *chan,
+                          uint32_t desclen, uint32_t msglen);
+
+/*
+ * Send a packet to the guest.  The space for the packet MUST be reserved
+ * first.
+ * Return total number of bytes placed in the send ringbuffer on success,
+ * negative errno on failure.
+ * The ringbuffer indices are updated on success, and the guest is signaled if
+ * needed.
+ */
+ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
+                           void *desc, uint32_t desclen,
+                           void *msg, uint32_t msglen,
+                           bool need_comp, uint64_t transaction_id);
+
+/*
+ * Prepare to fetch a batch of packets from the receive ring buffer.
+ * Return 0 on success, negative errno on failure.
+ */
+int vmbus_channel_recv_start(VMBusChannel *chan);
+
+/*
+ * Shortcut for a common case of sending a simple completion packet with no
+ * auxiliary descriptors.
+ */
+ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
+                                      void *msg, uint32_t msglen);
+
+/*
+ * Peek at the receive (i.e. guest->host) ring buffer and extract a unit of
+ * work (a device-specific subclass of VMBusChanReq) from a packet if there's
+ * one.
+ * Return an allocated buffer, containing the request of @size with filled
+ * VMBusChanReq at the beginning, followed by the message payload, or NULL on
+ * failure.
+ * The ringbuffer indices are NOT updated, nor is the private copy of the read
+ * index.
+ */
+void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size);
+
+/*
+ * Update the private copy of the read index once the preceding peek is deemed
+ * successful.
+ * The ringbuffer indices are NOT updated.
+ */
+void vmbus_channel_recv_pop(VMBusChannel *chan);
+
+/*
+ * Propagate the private copy of the read index into the receive ring buffer,
+ * and thus complete the reception of a series of packets.  Notify guest if
+ * needed.
+ * Return the number of bytes popped off the receive ring buffer by the
+ * preceding recv_peek/recv_pop calls on success, negative errno on failure.
+ */
+ssize_t vmbus_channel_recv_done(VMBusChannel *chan);
+
+/*
+ * Free the request allocated by vmbus_channel_recv_peek, together with its
+ * fields.
+ */
+void vmbus_free_req(void *req);
+
+/*
+ * Find and reference a GPADL by @gpadl_id.
+ * If not found return NULL.
+ */
+VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id);
+
+/*
+ * Unreference @gpadl.  If the reference count drops to zero, free it.
+ * @gpadl may be NULL, in which case nothing is done.
+ */
+void vmbus_put_gpadl(VMBusGpadl *gpadl);
+
+/*
+ * Calculate total length in bytes of @gpadl.
+ * @gpadl must be valid.
+ */
+uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl);
+
+/*
+ * Copy data from @iov to @gpadl at offset @off.
+ * Return the number of bytes copied, or a negative status on failure.
+ */
+ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
+                           const struct iovec *iov, size_t iov_cnt);
+
+/*
+ * Map SGList contained in the request @req, at offset @off and no more than
+ * @len bytes, for io in direction @dir, and populate @iov with the mapped
+ * iovecs.
+ * Return the number of iovecs mapped, or negative status on failure.
+ */
+int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
+                  unsigned iov_cnt, size_t len, size_t off);
+
+/*
+ * Unmap *iov mapped with vmbus_map_sgl, marking the number of bytes @accessed.
+ */
+void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
+                     unsigned iov_cnt, size_t accessed);
+
+#endif
diff --git a/include/hw/i2c/allwinner-i2c.h b/include/hw/i2c/allwinner-i2c.h
new file mode 100644 (file)
index 0000000..0e325d2
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ *  Allwinner I2C Bus Serial Interface registers definition
+ *
+ *  Copyright (C) 2022 Strahinja Jankovic. <strahinja.p.jankovic@gmail.com>
+ *
+ *  This file is derived from IMX I2C controller,
+ *  by Jean-Christophe DUBOIS .
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef ALLWINNER_I2C_H
+#define ALLWINNER_I2C_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_AW_I2C "allwinner.i2c"
+
+/** Allwinner I2C sun6i family and newer (A31, H2+, H3, etc) */
+#define TYPE_AW_I2C_SUN6I    TYPE_AW_I2C "-sun6i"
+
+OBJECT_DECLARE_SIMPLE_TYPE(AWI2CState, AW_I2C)
+
+#define AW_I2C_MEM_SIZE         0x24
+
+struct AWI2CState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    I2CBus *bus;
+    qemu_irq irq;
+
+    uint8_t addr;
+    uint8_t xaddr;
+    uint8_t data;
+    uint8_t cntr;
+    uint8_t stat;
+    uint8_t ccr;
+    uint8_t srst;
+    uint8_t efr;
+    uint8_t lcr;
+
+    bool irq_clear_inverted;
+};
+
+#endif /* ALLWINNER_I2C_H */
diff --git a/include/hw/i2c/arm_sbcon_i2c.h b/include/hw/i2c/arm_sbcon_i2c.h
new file mode 100644 (file)
index 0000000..da9b5e8
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * ARM SBCon two-wire serial bus interface (I2C bitbang)
+ *   a.k.a.
+ * ARM Versatile I2C controller
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Copyright (c) 2012 Oskar Andero <oskar.andero@gmail.com>
+ * Copyright (C) 2020 Philippe Mathieu-Daudé <f4bug@amsat.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_I2C_ARM_SBCON_I2C_H
+#define HW_I2C_ARM_SBCON_I2C_H
+
+#include "hw/sysbus.h"
+#include "hw/i2c/bitbang_i2c.h"
+#include "qom/object.h"
+
+#define TYPE_ARM_SBCON_I2C "versatile_i2c"
+
+typedef struct ArmSbconI2CState ArmSbconI2CState;
+DECLARE_INSTANCE_CHECKER(ArmSbconI2CState, ARM_SBCON_I2C, TYPE_ARM_SBCON_I2C)
+
+struct ArmSbconI2CState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion iomem;
+    bitbang_i2c_interface bitbang;
+    int out;
+    int in;
+};
+
+#endif /* HW_I2C_ARM_SBCON_I2C_H */
diff --git a/include/hw/i2c/aspeed_i2c.h b/include/hw/i2c/aspeed_i2c.h
new file mode 100644 (file)
index 0000000..a064479
--- /dev/null
@@ -0,0 +1,389 @@
+/*
+ *  ASPEED AST2400 I2C Controller
+ *
+ *  Copyright (C) 2016 IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef ASPEED_I2C_H
+#define ASPEED_I2C_H
+
+#include "hw/i2c/i2c.h"
+#include "hw/sysbus.h"
+#include "hw/registerfields.h"
+#include "qom/object.h"
+
+#define TYPE_ASPEED_I2C "aspeed.i2c"
+#define TYPE_ASPEED_2400_I2C TYPE_ASPEED_I2C "-ast2400"
+#define TYPE_ASPEED_2500_I2C TYPE_ASPEED_I2C "-ast2500"
+#define TYPE_ASPEED_2600_I2C TYPE_ASPEED_I2C "-ast2600"
+#define TYPE_ASPEED_1030_I2C TYPE_ASPEED_I2C "-ast1030"
+OBJECT_DECLARE_TYPE(AspeedI2CState, AspeedI2CClass, ASPEED_I2C)
+
+#define ASPEED_I2C_NR_BUSSES 16
+#define ASPEED_I2C_MAX_POOL_SIZE 0x800
+#define ASPEED_I2C_OLD_NUM_REG 11
+#define ASPEED_I2C_NEW_NUM_REG 22
+
+#define A_I2CD_M_STOP_CMD       BIT(5)
+#define A_I2CD_M_RX_CMD         BIT(3)
+#define A_I2CD_M_TX_CMD         BIT(1)
+#define A_I2CD_M_START_CMD      BIT(0)
+
+#define A_I2CD_MASTER_EN        BIT(0)
+
+/* Tx State Machine */
+#define   I2CD_TX_STATE_MASK                  0xf
+#define     I2CD_IDLE                         0x0
+#define     I2CD_MACTIVE                      0x8
+#define     I2CD_MSTART                       0x9
+#define     I2CD_MSTARTR                      0xa
+#define     I2CD_MSTOP                        0xb
+#define     I2CD_MTXD                         0xc
+#define     I2CD_MRXACK                       0xd
+#define     I2CD_MRXD                         0xe
+#define     I2CD_MTXACK                       0xf
+#define     I2CD_SWAIT                        0x1
+#define     I2CD_SRXD                         0x4
+#define     I2CD_STXACK                       0x5
+#define     I2CD_STXD                         0x6
+#define     I2CD_SRXACK                       0x7
+#define     I2CD_RECOVER                      0x3
+
+/* I2C Global Register */
+REG32(I2C_CTRL_STATUS, 0x0) /* Device Interrupt Status */
+REG32(I2C_CTRL_ASSIGN, 0x8) /* Device Interrupt Target Assignment */
+REG32(I2C_CTRL_GLOBAL, 0xC) /* Global Control Register */
+    FIELD(I2C_CTRL_GLOBAL, REG_MODE, 2, 1)
+    FIELD(I2C_CTRL_GLOBAL, SRAM_EN, 0, 1)
+REG32(I2C_CTRL_NEW_CLK_DIVIDER, 0x10) /* New mode clock divider */
+
+/* I2C Old Mode Device (Bus) Register */
+REG32(I2CD_FUN_CTRL, 0x0) /* I2CD Function Control  */
+    FIELD(I2CD_FUN_CTRL, POOL_PAGE_SEL, 20, 3) /* AST2400 */
+    SHARED_FIELD(M_SDA_LOCK_EN, 16, 1)
+    SHARED_FIELD(MULTI_MASTER_DIS, 15, 1)
+    SHARED_FIELD(M_SCL_DRIVE_EN, 14, 1)
+    SHARED_FIELD(MSB_STS, 9, 1)
+    SHARED_FIELD(SDA_DRIVE_IT_EN, 8, 1)
+    SHARED_FIELD(M_SDA_DRIVE_IT_EN, 7, 1)
+    SHARED_FIELD(M_HIGH_SPEED_EN, 6, 1)
+    SHARED_FIELD(DEF_ADDR_EN, 5, 1)
+    SHARED_FIELD(DEF_ALERT_EN, 4, 1)
+    SHARED_FIELD(DEF_ARP_EN, 3, 1)
+    SHARED_FIELD(DEF_GCALL_EN, 2, 1)
+    SHARED_FIELD(SLAVE_EN, 1, 1)
+    SHARED_FIELD(MASTER_EN, 0, 1)
+REG32(I2CD_AC_TIMING1, 0x04) /* Clock and AC Timing Control #1 */
+REG32(I2CD_AC_TIMING2, 0x08) /* Clock and AC Timing Control #2 */
+REG32(I2CD_INTR_CTRL, 0x0C)  /* I2CD Interrupt Control */
+REG32(I2CD_INTR_STS, 0x10)   /* I2CD Interrupt Status */
+    SHARED_FIELD(SLAVE_ADDR_MATCH, 31, 1)    /* 0: addr1 1: addr2 */
+    SHARED_FIELD(SLAVE_ADDR_RX_PENDING, 29, 1)
+    SHARED_FIELD(SLAVE_INACTIVE_TIMEOUT, 15, 1)
+    SHARED_FIELD(SDA_DL_TIMEOUT, 14, 1)
+    SHARED_FIELD(BUS_RECOVER_DONE, 13, 1)
+    SHARED_FIELD(SMBUS_ALERT, 12, 1)                    /* Bus [0-3] only */
+    FIELD(I2CD_INTR_STS, SMBUS_ARP_ADDR, 11, 1)         /* Removed */
+    FIELD(I2CD_INTR_STS, SMBUS_DEV_ALERT_ADDR, 10, 1)   /* Removed */
+    FIELD(I2CD_INTR_STS, SMBUS_DEF_ADDR, 9, 1)          /* Removed */
+    FIELD(I2CD_INTR_STS, GCALL_ADDR, 8, 1)              /* Removed */
+    FIELD(I2CD_INTR_STS, SLAVE_ADDR_RX_MATCH, 7, 1)     /* use RX_DONE */
+    SHARED_FIELD(SCL_TIMEOUT, 6, 1)
+    SHARED_FIELD(ABNORMAL, 5, 1)
+    SHARED_FIELD(NORMAL_STOP, 4, 1)
+    SHARED_FIELD(ARBIT_LOSS, 3, 1)
+    SHARED_FIELD(RX_DONE, 2, 1)
+    SHARED_FIELD(TX_NAK, 1, 1)
+    SHARED_FIELD(TX_ACK, 0, 1)
+REG32(I2CD_CMD, 0x14) /* I2CD Command/Status */
+    SHARED_FIELD(SDA_OE, 28, 1)
+    SHARED_FIELD(SDA_O, 27, 1)
+    SHARED_FIELD(SCL_OE, 26, 1)
+    SHARED_FIELD(SCL_O, 25, 1)
+    SHARED_FIELD(TX_TIMING, 23, 2)
+    SHARED_FIELD(TX_STATE, 19, 4)
+    SHARED_FIELD(SCL_LINE_STS, 18, 1)
+    SHARED_FIELD(SDA_LINE_STS, 17, 1)
+    SHARED_FIELD(BUS_BUSY_STS, 16, 1)
+    SHARED_FIELD(SDA_OE_OUT_DIR, 15, 1)
+    SHARED_FIELD(SDA_O_OUT_DIR, 14, 1)
+    SHARED_FIELD(SCL_OE_OUT_DIR, 13, 1)
+    SHARED_FIELD(SCL_O_OUT_DIR, 12, 1)
+    SHARED_FIELD(BUS_RECOVER_CMD_EN, 11, 1)
+    SHARED_FIELD(S_ALT_EN, 10, 1)
+    /* Command Bits */
+    SHARED_FIELD(RX_DMA_EN, 9, 1)
+    SHARED_FIELD(TX_DMA_EN, 8, 1)
+    SHARED_FIELD(RX_BUFF_EN, 7, 1)
+    SHARED_FIELD(TX_BUFF_EN, 6, 1)
+    SHARED_FIELD(M_STOP_CMD, 5, 1)
+    SHARED_FIELD(M_S_RX_CMD_LAST, 4, 1)
+    SHARED_FIELD(M_RX_CMD, 3, 1)
+    SHARED_FIELD(S_TX_CMD, 2, 1)
+    SHARED_FIELD(M_TX_CMD, 1, 1)
+    SHARED_FIELD(M_START_CMD, 0, 1)
+REG32(I2CD_DEV_ADDR, 0x18) /* Slave Device Address */
+    SHARED_FIELD(SLAVE_DEV_ADDR1, 0, 7)
+REG32(I2CD_POOL_CTRL, 0x1C) /* Pool Buffer Control */
+    SHARED_FIELD(RX_COUNT, 24, 6)
+    SHARED_FIELD(RX_SIZE, 16, 5)
+    SHARED_FIELD(TX_COUNT, 8, 5)
+    FIELD(I2CD_POOL_CTRL, OFFSET, 2, 6) /* AST2400 */
+    SHARED_FIELD(BUF_ORGANIZATION, 0, 1) /* AST2600 */
+REG32(I2CD_BYTE_BUF, 0x20) /* Transmit/Receive Byte Buffer */
+    SHARED_FIELD(RX_BUF, 8, 8)
+    SHARED_FIELD(TX_BUF, 0, 8)
+REG32(I2CD_DMA_ADDR, 0x24) /* DMA Buffer Address */
+REG32(I2CD_DMA_LEN, 0x28) /* DMA Transfer Length < 4KB */
+
+/* I2C New Mode Device (Bus) Register */
+REG32(I2CC_FUN_CTRL, 0x0)
+    FIELD(I2CC_FUN_CTRL, RB_EARLY_DONE_EN, 22, 1)
+    FIELD(I2CC_FUN_CTRL, DMA_DIS_AUTO_RECOVER, 21, 1)
+    FIELD(I2CC_FUN_CTRL, S_SAVE_ADDR, 20, 1)
+    FIELD(I2CC_FUN_CTRL, M_PKT_RETRY_CNT, 18, 2)
+    /* 17:0 shared with I2CD_FUN_CTRL[17:0] */
+REG32(I2CC_AC_TIMING, 0x04)
+REG32(I2CC_MS_TXRX_BYTE_BUF, 0x08)
+    /* 31:16 shared with I2CD_CMD[31:16] */
+    /* 15:0  shared with I2CD_BYTE_BUF[15:0] */
+REG32(I2CC_POOL_CTRL, 0x0c)
+    /* 31:0 shared with I2CD_POOL_CTRL[31:0] */
+REG32(I2CM_INTR_CTRL, 0x10)
+REG32(I2CM_INTR_STS, 0x14)
+    FIELD(I2CM_INTR_STS, PKT_STATE, 28, 4)
+    FIELD(I2CM_INTR_STS, PKT_CMD_TIMEOUT, 18, 1)
+    FIELD(I2CM_INTR_STS, PKT_CMD_FAIL, 17, 1)
+    FIELD(I2CM_INTR_STS, PKT_CMD_DONE, 16, 1)
+    FIELD(I2CM_INTR_STS, BUS_RECOVER_FAIL, 15, 1)
+    /* 14:0 shared with I2CD_INTR_STS[14:0] */
+REG32(I2CM_CMD, 0x18)
+    FIELD(I2CM_CMD, W1_CTRL, 31, 1)
+    FIELD(I2CM_CMD, PKT_DEV_ADDR, 24, 7)
+    FIELD(I2CM_CMD, HS_MASTER_MODE_LSB, 17, 3)
+    FIELD(I2CM_CMD, PKT_OP_EN, 16, 1)
+    /* 15:0 shared with I2CD_CMD[15:0] */
+REG32(I2CM_DMA_LEN, 0x1c)
+    FIELD(I2CM_DMA_LEN, RX_BUF_LEN_W1T, 31, 1)
+    FIELD(I2CM_DMA_LEN, RX_BUF_LEN, 16, 11)
+    FIELD(I2CM_DMA_LEN, TX_BUF_LEN_W1T, 15, 1)
+    FIELD(I2CM_DMA_LEN, TX_BUF_LEN, 0, 11)
+REG32(I2CS_INTR_CTRL, 0x20)
+    FIELD(I2CS_INTR_CTRL, PKT_CMD_FAIL, 17, 1)
+    FIELD(I2CS_INTR_CTRL, PKT_CMD_DONE, 16, 1)
+REG32(I2CS_INTR_STS, 0x24)
+    /* 31:29 shared with I2CD_INTR_STS[31:29] */
+    FIELD(I2CS_INTR_STS, SLAVE_PARKING_STS, 24, 2)
+    FIELD(I2CS_INTR_STS, SLAVE_ADDR3_NAK, 22, 1)
+    FIELD(I2CS_INTR_STS, SLAVE_ADDR2_NAK, 21, 1)
+    FIELD(I2CS_INTR_STS, SLAVE_ADDR1_NAK, 20, 1)
+    FIELD(I2CS_INTR_STS, SLAVE_ADDR_INDICATOR, 18, 2)
+    FIELD(I2CS_INTR_STS, PKT_CMD_FAIL, 17, 1)
+    FIELD(I2CS_INTR_STS, PKT_CMD_DONE, 16, 1)
+    /* 14:0 shared with I2CD_INTR_STS[14:0] */
+    FIELD(I2CS_INTR_STS, SLAVE_ADDR_RX_MATCH, 7, 1)
+REG32(I2CS_CMD, 0x28)
+    FIELD(I2CS_CMD, W1_CTRL, 31, 1)
+    FIELD(I2CS_CMD, PKT_MODE_ACTIVE_ADDR, 17, 2)
+    FIELD(I2CS_CMD, PKT_MODE_EN, 16, 1)
+    FIELD(I2CS_CMD, AUTO_NAK_INACTIVE_ADDR, 15, 1)
+    FIELD(I2CS_CMD, AUTO_NAK_ACTIVE_ADDR, 14, 1)
+    /* 13:0 shared with I2CD_CMD[13:0] */
+REG32(I2CS_DMA_LEN, 0x2c)
+    FIELD(I2CS_DMA_LEN, RX_BUF_LEN_W1T, 31, 1)
+    FIELD(I2CS_DMA_LEN, RX_BUF_LEN, 16, 11)
+    FIELD(I2CS_DMA_LEN, TX_BUF_LEN_W1T, 15, 1)
+    FIELD(I2CS_DMA_LEN, TX_BUF_LEN, 0, 11)
+REG32(I2CM_DMA_TX_ADDR, 0x30)
+    FIELD(I2CM_DMA_TX_ADDR, ADDR, 0, 31)
+REG32(I2CM_DMA_RX_ADDR, 0x34)
+    FIELD(I2CM_DMA_RX_ADDR, ADDR, 0, 31)
+REG32(I2CS_DMA_TX_ADDR, 0x38)
+    FIELD(I2CS_DMA_TX_ADDR, ADDR, 0, 31)
+REG32(I2CS_DMA_RX_ADDR, 0x3c)
+    FIELD(I2CS_DMA_RX_ADDR, ADDR, 0, 31)
+REG32(I2CS_DEV_ADDR, 0x40)
+REG32(I2CM_DMA_LEN_STS, 0x48)
+    FIELD(I2CM_DMA_LEN_STS, RX_LEN, 16, 13)
+    FIELD(I2CM_DMA_LEN_STS, TX_LEN, 0, 13)
+REG32(I2CS_DMA_LEN_STS, 0x4c)
+    FIELD(I2CS_DMA_LEN_STS, RX_LEN, 16, 13)
+    FIELD(I2CS_DMA_LEN_STS, TX_LEN, 0, 13)
+REG32(I2CC_DMA_ADDR, 0x50)
+REG32(I2CC_DMA_LEN, 0x54)
+
+struct AspeedI2CState;
+
+#define TYPE_ASPEED_I2C_BUS "aspeed.i2c.bus"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedI2CBus, ASPEED_I2C_BUS)
+struct AspeedI2CBus {
+    SysBusDevice parent_obj;
+
+    struct AspeedI2CState *controller;
+
+    /* slave mode */
+    I2CSlave *slave;
+
+    MemoryRegion mr;
+
+    I2CBus *bus;
+    uint8_t id;
+    qemu_irq irq;
+
+    uint32_t regs[ASPEED_I2C_NEW_NUM_REG];
+};
+
+struct AspeedI2CState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    uint32_t intr_status;
+    uint32_t ctrl_global;
+    uint32_t new_clk_divider;
+    MemoryRegion pool_iomem;
+    uint8_t pool[ASPEED_I2C_MAX_POOL_SIZE];
+
+    AspeedI2CBus busses[ASPEED_I2C_NR_BUSSES];
+    MemoryRegion *dram_mr;
+    AddressSpace dram_as;
+};
+
+#define TYPE_ASPEED_I2C_BUS_SLAVE "aspeed.i2c.slave"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedI2CBusSlave, ASPEED_I2C_BUS_SLAVE)
+struct AspeedI2CBusSlave {
+    I2CSlave i2c;
+};
+
+struct AspeedI2CClass {
+    SysBusDeviceClass parent_class;
+
+    uint8_t num_busses;
+    uint8_t reg_size;
+    uint8_t gap;
+    qemu_irq (*bus_get_irq)(AspeedI2CBus *);
+
+    uint64_t pool_size;
+    hwaddr pool_base;
+    uint8_t *(*bus_pool_base)(AspeedI2CBus *);
+    bool check_sram;
+    bool has_dma;
+
+};
+
+static inline bool aspeed_i2c_is_new_mode(AspeedI2CState *s)
+{
+    return FIELD_EX32(s->ctrl_global, I2C_CTRL_GLOBAL, REG_MODE);
+}
+
+static inline bool aspeed_i2c_bus_pkt_mode_en(AspeedI2CBus *bus)
+{
+    if (aspeed_i2c_is_new_mode(bus->controller)) {
+        return ARRAY_FIELD_EX32(bus->regs, I2CM_CMD, PKT_OP_EN);
+    }
+    return false;
+}
+
+static inline uint32_t aspeed_i2c_bus_ctrl_offset(AspeedI2CBus *bus)
+{
+    if (aspeed_i2c_is_new_mode(bus->controller)) {
+        return R_I2CC_FUN_CTRL;
+    }
+    return R_I2CD_FUN_CTRL;
+}
+
+static inline uint32_t aspeed_i2c_bus_cmd_offset(AspeedI2CBus *bus)
+{
+    if (aspeed_i2c_is_new_mode(bus->controller)) {
+        return R_I2CM_CMD;
+    }
+    return R_I2CD_CMD;
+}
+
+static inline uint32_t aspeed_i2c_bus_dev_addr_offset(AspeedI2CBus *bus)
+{
+    if (aspeed_i2c_is_new_mode(bus->controller)) {
+        return R_I2CS_DEV_ADDR;
+    }
+    return R_I2CD_DEV_ADDR;
+}
+
+static inline uint32_t aspeed_i2c_bus_intr_ctrl_offset(AspeedI2CBus *bus)
+{
+    if (aspeed_i2c_is_new_mode(bus->controller)) {
+        return R_I2CM_INTR_CTRL;
+    }
+    return R_I2CD_INTR_CTRL;
+}
+
+static inline uint32_t aspeed_i2c_bus_intr_sts_offset(AspeedI2CBus *bus)
+{
+    if (aspeed_i2c_is_new_mode(bus->controller)) {
+        return R_I2CM_INTR_STS;
+    }
+    return R_I2CD_INTR_STS;
+}
+
+static inline uint32_t aspeed_i2c_bus_pool_ctrl_offset(AspeedI2CBus *bus)
+{
+    if (aspeed_i2c_is_new_mode(bus->controller)) {
+        return R_I2CC_POOL_CTRL;
+    }
+    return R_I2CD_POOL_CTRL;
+}
+
+static inline uint32_t aspeed_i2c_bus_byte_buf_offset(AspeedI2CBus *bus)
+{
+    if (aspeed_i2c_is_new_mode(bus->controller)) {
+        return R_I2CC_MS_TXRX_BYTE_BUF;
+    }
+    return R_I2CD_BYTE_BUF;
+}
+
+static inline uint32_t aspeed_i2c_bus_dma_len_offset(AspeedI2CBus *bus)
+{
+    if (aspeed_i2c_is_new_mode(bus->controller)) {
+        return R_I2CC_DMA_LEN;
+    }
+    return R_I2CD_DMA_LEN;
+}
+
+static inline uint32_t aspeed_i2c_bus_dma_addr_offset(AspeedI2CBus *bus)
+{
+    if (aspeed_i2c_is_new_mode(bus->controller)) {
+        return R_I2CC_DMA_ADDR;
+    }
+    return R_I2CD_DMA_ADDR;
+}
+
+static inline bool aspeed_i2c_bus_is_master(AspeedI2CBus *bus)
+{
+    return SHARED_ARRAY_FIELD_EX32(bus->regs, aspeed_i2c_bus_ctrl_offset(bus),
+                                   MASTER_EN);
+}
+
+static inline bool aspeed_i2c_bus_is_enabled(AspeedI2CBus *bus)
+{
+    uint32_t ctrl_reg = aspeed_i2c_bus_ctrl_offset(bus);
+    return SHARED_ARRAY_FIELD_EX32(bus->regs, ctrl_reg, MASTER_EN) ||
+           SHARED_ARRAY_FIELD_EX32(bus->regs, ctrl_reg, SLAVE_EN);
+}
+
+I2CBus *aspeed_i2c_get_bus(AspeedI2CState *s, int busnr);
+
+#endif /* ASPEED_I2C_H */
diff --git a/include/hw/i2c/bitbang_i2c.h b/include/hw/i2c/bitbang_i2c.h
new file mode 100644 (file)
index 0000000..a079e6d
--- /dev/null
@@ -0,0 +1,52 @@
+#ifndef BITBANG_I2C_H
+#define BITBANG_I2C_H
+
+#include "hw/i2c/i2c.h"
+
+#define TYPE_GPIO_I2C "gpio_i2c"
+
+typedef struct bitbang_i2c_interface bitbang_i2c_interface;
+
+#define BITBANG_I2C_SDA 0
+#define BITBANG_I2C_SCL 1
+
+typedef enum bitbang_i2c_state {
+    STOPPED = 0,
+    SENDING_BIT7,
+    SENDING_BIT6,
+    SENDING_BIT5,
+    SENDING_BIT4,
+    SENDING_BIT3,
+    SENDING_BIT2,
+    SENDING_BIT1,
+    SENDING_BIT0,
+    WAITING_FOR_ACK,
+    RECEIVING_BIT7,
+    RECEIVING_BIT6,
+    RECEIVING_BIT5,
+    RECEIVING_BIT4,
+    RECEIVING_BIT3,
+    RECEIVING_BIT2,
+    RECEIVING_BIT1,
+    RECEIVING_BIT0,
+    SENDING_ACK,
+    SENT_NACK
+} bitbang_i2c_state;
+
+struct bitbang_i2c_interface {
+    I2CBus *bus;
+    bitbang_i2c_state state;
+    int last_data;
+    int last_clock;
+    int device_out;
+    uint8_t buffer;
+    int current_addr;
+};
+
+/**
+ * bitbang_i2c_init: in-place initialize the bitbang_i2c_interface struct
+ */
+void bitbang_i2c_init(bitbang_i2c_interface *s, I2CBus *bus);
+int bitbang_i2c_set(bitbang_i2c_interface *i2c, int line, int level);
+
+#endif
diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
new file mode 100644 (file)
index 0000000..2a3abac
--- /dev/null
@@ -0,0 +1,226 @@
+#ifndef QEMU_I2C_H
+#define QEMU_I2C_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+/* The QEMU I2C implementation only supports simple transfers that complete
+   immediately.  It does not support slave devices that need to be able to
+   defer their response (eg. CPU slave interfaces where the data is supplied
+   by the device driver in response to an interrupt).  */
+
+enum i2c_event {
+    I2C_START_RECV,
+    I2C_START_SEND,
+    I2C_START_SEND_ASYNC,
+    I2C_FINISH,
+    I2C_NACK /* Masker NACKed a receive byte.  */
+};
+
+typedef struct I2CNodeList I2CNodeList;
+
+#define TYPE_I2C_SLAVE "i2c-slave"
+OBJECT_DECLARE_TYPE(I2CSlave, I2CSlaveClass,
+                    I2C_SLAVE)
+
+struct I2CSlaveClass {
+    DeviceClass parent_class;
+
+    /* Master to slave. Returns non-zero for a NAK, 0 for success. */
+    int (*send)(I2CSlave *s, uint8_t data);
+
+    /* Master to slave (asynchronous). Receiving slave must call i2c_ack(). */
+    void (*send_async)(I2CSlave *s, uint8_t data);
+
+    /*
+     * Slave to master.  This cannot fail, the device should always
+     * return something here.
+     */
+    uint8_t (*recv)(I2CSlave *s);
+
+    /*
+     * Notify the slave of a bus state change.  For start event,
+     * returns non-zero to NAK an operation.  For other events the
+     * return code is not used and should be zero.
+     */
+    int (*event)(I2CSlave *s, enum i2c_event event);
+
+    /*
+     * Check if this device matches the address provided.  Returns bool of
+     * true if it matches (or broadcast), and updates the device list, false
+     * otherwise.
+     *
+     * If broadcast is true, match should add the device and return true.
+     */
+    bool (*match_and_add)(I2CSlave *candidate, uint8_t address, bool broadcast,
+                          I2CNodeList *current_devs);
+};
+
+struct I2CSlave {
+    DeviceState qdev;
+
+    /* Remaining fields for internal use by the I2C code.  */
+    uint8_t address;
+};
+
+#define TYPE_I2C_BUS "i2c-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(I2CBus, I2C_BUS)
+
+typedef struct I2CNode I2CNode;
+
+struct I2CNode {
+    I2CSlave *elt;
+    QLIST_ENTRY(I2CNode) next;
+};
+
+typedef struct I2CPendingMaster I2CPendingMaster;
+
+struct I2CPendingMaster {
+    QEMUBH *bh;
+    QSIMPLEQ_ENTRY(I2CPendingMaster) entry;
+};
+
+typedef QLIST_HEAD(I2CNodeList, I2CNode) I2CNodeList;
+typedef QSIMPLEQ_HEAD(I2CPendingMasters, I2CPendingMaster) I2CPendingMasters;
+
+struct I2CBus {
+    BusState qbus;
+    I2CNodeList current_devs;
+    I2CPendingMasters pending_masters;
+    uint8_t saved_address;
+    bool broadcast;
+
+    /* Set from slave currently mastering the bus. */
+    QEMUBH *bh;
+};
+
+I2CBus *i2c_init_bus(DeviceState *parent, const char *name);
+int i2c_bus_busy(I2CBus *bus);
+
+/**
+ * i2c_start_transfer: start a transfer on an I2C bus.
+ *
+ * @bus: #I2CBus to be used
+ * @address: address of the slave
+ * @is_recv: indicates the transfer direction
+ *
+ * When @is_recv is a known boolean constant, use the
+ * i2c_start_recv() or i2c_start_send() helper instead.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int i2c_start_transfer(I2CBus *bus, uint8_t address, bool is_recv);
+
+/**
+ * i2c_start_recv: start a 'receive' transfer on an I2C bus.
+ *
+ * @bus: #I2CBus to be used
+ * @address: address of the slave
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int i2c_start_recv(I2CBus *bus, uint8_t address);
+
+/**
+ * i2c_start_send: start a 'send' transfer on an I2C bus.
+ *
+ * @bus: #I2CBus to be used
+ * @address: address of the slave
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int i2c_start_send(I2CBus *bus, uint8_t address);
+
+/**
+ * i2c_start_send_async: start an asynchronous 'send' transfer on an I2C bus.
+ *
+ * @bus: #I2CBus to be used
+ * @address: address of the slave
+ *
+ * Return: 0 on success, -1 on error
+ */
+int i2c_start_send_async(I2CBus *bus, uint8_t address);
+
+void i2c_schedule_pending_master(I2CBus *bus);
+
+void i2c_end_transfer(I2CBus *bus);
+void i2c_nack(I2CBus *bus);
+void i2c_ack(I2CBus *bus);
+void i2c_bus_master(I2CBus *bus, QEMUBH *bh);
+void i2c_bus_release(I2CBus *bus);
+int i2c_send(I2CBus *bus, uint8_t data);
+int i2c_send_async(I2CBus *bus, uint8_t data);
+uint8_t i2c_recv(I2CBus *bus);
+bool i2c_scan_bus(I2CBus *bus, uint8_t address, bool broadcast,
+                  I2CNodeList *current_devs);
+
+/**
+ * Create an I2C slave device on the heap.
+ * @name: a device type name
+ * @addr: I2C address of the slave when put on a bus
+ *
+ * This only initializes the device state structure and allows
+ * properties to be set. Type @name must exist. The device still
+ * needs to be realized. See qdev-core.h.
+ */
+I2CSlave *i2c_slave_new(const char *name, uint8_t addr);
+
+/**
+ * Create and realize an I2C slave device on the heap.
+ * @bus: I2C bus to put it on
+ * @name: I2C slave device type name
+ * @addr: I2C address of the slave when put on a bus
+ *
+ * Create the device state structure, initialize it, put it on the
+ * specified @bus, and drop the reference to it (the device is realized).
+ */
+I2CSlave *i2c_slave_create_simple(I2CBus *bus, const char *name, uint8_t addr);
+
+/**
+ * Realize and drop a reference an I2C slave device
+ * @dev: I2C slave device to realize
+ * @bus: I2C bus to put it on
+ * @addr: I2C address of the slave on the bus
+ * @errp: pointer to NULL initialized error object
+ *
+ * Returns: %true on success, %false on failure.
+ *
+ * Call 'realize' on @dev, put it on the specified @bus, and drop the
+ * reference to it.
+ *
+ * This function is useful if you have created @dev via qdev_new(),
+ * i2c_slave_new() or i2c_slave_try_new() (which take a reference to
+ * the device it returns to you), so that you can set properties on it
+ * before realizing it. If you don't need to set properties then
+ * i2c_slave_create_simple() is probably better (as it does the create,
+ * init and realize in one step).
+ *
+ * If you are embedding the I2C slave into another QOM device and
+ * initialized it via some variant on object_initialize_child() then
+ * do not use this function, because that family of functions arrange
+ * for the only reference to the child device to be held by the parent
+ * via the child<> property, and so the reference-count-drop done here
+ * would be incorrect.  (Instead you would want i2c_slave_realize(),
+ * which doesn't currently exist but would be trivial to create if we
+ * had any code that wanted it.)
+ */
+bool i2c_slave_realize_and_unref(I2CSlave *dev, I2CBus *bus, Error **errp);
+
+/**
+ * Set the I2C bus address of a slave device
+ * @dev: I2C slave device
+ * @address: I2C address of the slave when put on a bus
+ */
+void i2c_slave_set_address(I2CSlave *dev, uint8_t address);
+
+extern const VMStateDescription vmstate_i2c_slave;
+
+#define VMSTATE_I2C_SLAVE(_field, _state) {                          \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(I2CSlave),                                  \
+    .vmsd       = &vmstate_i2c_slave,                                \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, I2CSlave),    \
+}
+
+#endif
diff --git a/include/hw/i2c/i2c_mux_pca954x.h b/include/hw/i2c/i2c_mux_pca954x.h
new file mode 100644 (file)
index 0000000..3dd25ec
--- /dev/null
@@ -0,0 +1,19 @@
+#ifndef QEMU_I2C_MUX_PCA954X_H
+#define QEMU_I2C_MUX_PCA954X_H
+
+#include "hw/i2c/i2c.h"
+
+#define TYPE_PCA9546 "pca9546"
+#define TYPE_PCA9548 "pca9548"
+
+/**
+ * Retrieves the i2c bus associated with the specified channel on this i2c
+ * mux.
+ * @mux: an i2c mux device.
+ * @channel: the i2c channel requested
+ *
+ * Returns: a pointer to the associated i2c bus.
+ */
+I2CBus *pca954x_i2c_get_bus(I2CSlave *mux, uint8_t channel);
+
+#endif
diff --git a/include/hw/i2c/imx_i2c.h b/include/hw/i2c/imx_i2c.h
new file mode 100644 (file)
index 0000000..e4f9133
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ *  i.MX I2C Bus Serial Interface registers definition
+ *
+ *  Copyright (C) 2013 Jean-Christophe Dubois. <jcd@tribudubois.net>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef IMX_I2C_H
+#define IMX_I2C_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_IMX_I2C "imx.i2c"
+OBJECT_DECLARE_SIMPLE_TYPE(IMXI2CState, IMX_I2C)
+
+#define IMX_I2C_MEM_SIZE           0x14
+
+/* i.MX I2C memory map */
+#define IADR_ADDR                  0x00  /* address register */
+#define IFDR_ADDR                  0x04  /* frequency divider register */
+#define I2CR_ADDR                  0x08  /* control register */
+#define I2SR_ADDR                  0x0c  /* status register */
+#define I2DR_ADDR                  0x10  /* data register */
+
+#define IADR_MASK                  0xFE
+#define IADR_RESET                 0
+
+#define IFDR_MASK                  0x3F
+#define IFDR_RESET                 0
+
+#define I2CR_IEN                   (1 << 7)
+#define I2CR_IIEN                  (1 << 6)
+#define I2CR_MSTA                  (1 << 5)
+#define I2CR_MTX                   (1 << 4)
+#define I2CR_TXAK                  (1 << 3)
+#define I2CR_RSTA                  (1 << 2)
+#define I2CR_MASK                  0xFC
+#define I2CR_RESET                 0
+
+#define I2SR_ICF                   (1 << 7)
+#define I2SR_IAAF                  (1 << 6)
+#define I2SR_IBB                   (1 << 5)
+#define I2SR_IAL                   (1 << 4)
+#define I2SR_SRW                   (1 << 2)
+#define I2SR_IIF                   (1 << 1)
+#define I2SR_RXAK                  (1 << 0)
+#define I2SR_MASK                  0xE9
+#define I2SR_RESET                 0x81
+
+#define I2DR_MASK                  0xFF
+#define I2DR_RESET                 0
+
+#define ADDR_RESET                 0xFF00
+
+struct IMXI2CState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    I2CBus *bus;
+    qemu_irq irq;
+
+    uint16_t  address;
+
+    uint16_t iadr;
+    uint16_t ifdr;
+    uint16_t i2cr;
+    uint16_t i2sr;
+    uint16_t i2dr_read;
+    uint16_t i2dr_write;
+};
+
+#endif /* IMX_I2C_H */
diff --git a/include/hw/i2c/microbit_i2c.h b/include/hw/i2c/microbit_i2c.h
new file mode 100644 (file)
index 0000000..3c29e09
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Microbit stub for Nordic Semiconductor nRF51 SoC Two-Wire Interface
+ * http://infocenter.nordicsemi.com/pdf/nRF51_RM_v3.0.1.pdf
+ *
+ * Copyright 2019 Red Hat, Inc.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef MICROBIT_I2C_H
+#define MICROBIT_I2C_H
+
+#include "hw/sysbus.h"
+#include "hw/arm/nrf51.h"
+#include "qom/object.h"
+
+#define NRF51_TWI_TASK_STARTRX 0x000
+#define NRF51_TWI_TASK_STARTTX 0x008
+#define NRF51_TWI_TASK_STOP 0x014
+#define NRF51_TWI_EVENT_STOPPED 0x104
+#define NRF51_TWI_EVENT_RXDREADY 0x108
+#define NRF51_TWI_EVENT_TXDSENT 0x11c
+#define NRF51_TWI_REG_ENABLE 0x500
+#define NRF51_TWI_REG_RXD 0x518
+#define NRF51_TWI_REG_TXD 0x51c
+#define NRF51_TWI_REG_ADDRESS 0x588
+
+#define TYPE_MICROBIT_I2C "microbit.i2c"
+OBJECT_DECLARE_SIMPLE_TYPE(MicrobitI2CState, MICROBIT_I2C)
+
+#define MICROBIT_I2C_NREGS (NRF51_PERIPHERAL_SIZE / sizeof(uint32_t))
+
+struct MicrobitI2CState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint32_t regs[MICROBIT_I2C_NREGS];
+    uint32_t read_idx;
+};
+
+#endif /* MICROBIT_I2C_H */
diff --git a/include/hw/i2c/npcm7xx_smbus.h b/include/hw/i2c/npcm7xx_smbus.h
new file mode 100644 (file)
index 0000000..dc45963
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+ * Nuvoton NPCM7xx SMBus Module.
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM7XX_SMBUS_H
+#define NPCM7XX_SMBUS_H
+
+#include "exec/memory.h"
+#include "hw/i2c/i2c.h"
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+
+/*
+ * Number of addresses this module contains. Do not change this without
+ * incrementing the version_id in the vmstate.
+ */
+#define NPCM7XX_SMBUS_NR_ADDRS 10
+
+/* Size of the FIFO buffer. */
+#define NPCM7XX_SMBUS_FIFO_SIZE 16
+
+typedef enum NPCM7xxSMBusStatus {
+    NPCM7XX_SMBUS_STATUS_IDLE,
+    NPCM7XX_SMBUS_STATUS_SENDING,
+    NPCM7XX_SMBUS_STATUS_RECEIVING,
+    NPCM7XX_SMBUS_STATUS_NEGACK,
+    NPCM7XX_SMBUS_STATUS_STOPPING_LAST_RECEIVE,
+    NPCM7XX_SMBUS_STATUS_STOPPING_NEGACK,
+} NPCM7xxSMBusStatus;
+
+/*
+ * struct NPCM7xxSMBusState - System Management Bus device state.
+ * @bus: The underlying I2C Bus.
+ * @irq: GIC interrupt line to fire on events (if enabled).
+ * @sda: The serial data register.
+ * @st: The status register.
+ * @cst: The control status register.
+ * @cst2: The control status register 2.
+ * @cst3: The control status register 3.
+ * @ctl1: The control register 1.
+ * @ctl2: The control register 2.
+ * @ctl3: The control register 3.
+ * @ctl4: The control register 4.
+ * @ctl5: The control register 5.
+ * @addr: The SMBus module's own addresses on the I2C bus.
+ * @scllt: The SCL low time register.
+ * @sclht: The SCL high time register.
+ * @fif_ctl: The FIFO control register.
+ * @fif_cts: The FIFO control status register.
+ * @fair_per: The fair period register.
+ * @txf_ctl: The transmit FIFO control register.
+ * @t_out: The SMBus timeout register.
+ * @txf_sts: The transmit FIFO status register.
+ * @rxf_sts: The receive FIFO status register.
+ * @rxf_ctl: The receive FIFO control register.
+ * @rx_fifo: The FIFO buffer for receiving in FIFO mode.
+ * @rx_cur: The current position of rx_fifo.
+ * @status: The current status of the SMBus.
+ */
+struct NPCM7xxSMBusState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+
+    I2CBus      *bus;
+    qemu_irq     irq;
+
+    uint8_t      sda;
+    uint8_t      st;
+    uint8_t      cst;
+    uint8_t      cst2;
+    uint8_t      cst3;
+    uint8_t      ctl1;
+    uint8_t      ctl2;
+    uint8_t      ctl3;
+    uint8_t      ctl4;
+    uint8_t      ctl5;
+    uint8_t      addr[NPCM7XX_SMBUS_NR_ADDRS];
+
+    uint8_t      scllt;
+    uint8_t      sclht;
+
+    uint8_t      fif_ctl;
+    uint8_t      fif_cts;
+    uint8_t      fair_per;
+    uint8_t      txf_ctl;
+    uint8_t      t_out;
+    uint8_t      txf_sts;
+    uint8_t      rxf_sts;
+    uint8_t      rxf_ctl;
+
+    uint8_t      rx_fifo[NPCM7XX_SMBUS_FIFO_SIZE];
+    uint8_t      rx_cur;
+
+    NPCM7xxSMBusStatus status;
+};
+
+#define TYPE_NPCM7XX_SMBUS "npcm7xx-smbus"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxSMBusState, NPCM7XX_SMBUS)
+
+#endif /* NPCM7XX_SMBUS_H */
diff --git a/include/hw/i2c/pm_smbus.h b/include/hw/i2c/pm_smbus.h
new file mode 100644 (file)
index 0000000..0d74207
--- /dev/null
@@ -0,0 +1,56 @@
+#ifndef PM_SMBUS_H
+#define PM_SMBUS_H
+
+#include "exec/memory.h"
+#include "hw/i2c/smbus_master.h"
+
+#define PM_SMBUS_MAX_MSG_SIZE 32
+
+typedef struct PMSMBus {
+    I2CBus *smbus;
+    MemoryRegion io;
+
+    uint8_t smb_stat;
+    uint8_t smb_ctl;
+    uint8_t smb_cmd;
+    uint8_t smb_addr;
+    uint8_t smb_data0;
+    uint8_t smb_data1;
+    uint8_t smb_data[PM_SMBUS_MAX_MSG_SIZE];
+    uint8_t smb_blkdata;
+    uint8_t smb_auxctl;
+    uint32_t smb_index;
+
+    /* Set by pm_smbus.c */
+    void (*reset)(struct PMSMBus *s);
+
+    /* Set by the user. */
+    bool i2c_enable;
+    void (*set_irq)(struct PMSMBus *s, bool enabled);
+    void *opaque;
+
+    /* Internally used by pm_smbus. */
+
+    /* Set on block transfers after the last byte has been read, so the
+       INTR bit can be set at the right time. */
+    bool op_done;
+
+    /* Set during an I2C block read, so we know how to handle data. */
+    bool in_i2c_block_read;
+
+    /* Used to work around a bug in AMIBIOS, see smb_transaction_start() */
+    bool start_transaction_on_status_read;
+} PMSMBus;
+
+void pm_smbus_init(DeviceState *parent, PMSMBus *smb, bool force_aux_blk);
+
+/*
+ * For backwards compatibility on migration, older versions don't have
+ * working migration for pm_smbus, this lets us ignore the migrations
+ * for older machine versions.
+ */
+bool pm_smbus_vmstate_needed(void);
+
+extern const VMStateDescription pmsmb_vmstate;
+
+#endif /* PM_SMBUS_H */
diff --git a/include/hw/i2c/pmbus_device.h b/include/hw/i2c/pmbus_device.h
new file mode 100644 (file)
index 0000000..f195c11
--- /dev/null
@@ -0,0 +1,564 @@
+/*
+ * QEMU PMBus device emulation
+ *
+ * Copyright 2021 Google LLC
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_PMBUS_DEVICE_H
+#define HW_PMBUS_DEVICE_H
+
+#include "qemu/bitops.h"
+#include "hw/i2c/smbus_slave.h"
+
+enum pmbus_registers {
+    PMBUS_PAGE                      = 0x00, /* R/W byte */
+    PMBUS_OPERATION                 = 0x01, /* R/W byte */
+    PMBUS_ON_OFF_CONFIG             = 0x02, /* R/W byte */
+    PMBUS_CLEAR_FAULTS              = 0x03, /* Send Byte */
+    PMBUS_PHASE                     = 0x04, /* R/W byte */
+    PMBUS_PAGE_PLUS_WRITE           = 0x05, /* Block Write-only */
+    PMBUS_PAGE_PLUS_READ            = 0x06, /* Block Read-only */
+    PMBUS_WRITE_PROTECT             = 0x10, /* R/W byte */
+    PMBUS_STORE_DEFAULT_ALL         = 0x11, /* Send Byte */
+    PMBUS_RESTORE_DEFAULT_ALL       = 0x12, /* Send Byte */
+    PMBUS_STORE_DEFAULT_CODE        = 0x13, /* Write-only Byte */
+    PMBUS_RESTORE_DEFAULT_CODE      = 0x14, /* Write-only Byte */
+    PMBUS_STORE_USER_ALL            = 0x15, /* Send Byte */
+    PMBUS_RESTORE_USER_ALL          = 0x16, /* Send Byte */
+    PMBUS_STORE_USER_CODE           = 0x17, /* Write-only Byte */
+    PMBUS_RESTORE_USER_CODE         = 0x18, /* Write-only Byte */
+    PMBUS_CAPABILITY                = 0x19, /* Read-Only byte */
+    PMBUS_QUERY                     = 0x1A, /* Write-Only */
+    PMBUS_SMBALERT_MASK             = 0x1B, /* Block read, Word write */
+    PMBUS_VOUT_MODE                 = 0x20, /* R/W byte */
+    PMBUS_VOUT_COMMAND              = 0x21, /* R/W word */
+    PMBUS_VOUT_TRIM                 = 0x22, /* R/W word */
+    PMBUS_VOUT_CAL_OFFSET           = 0x23, /* R/W word */
+    PMBUS_VOUT_MAX                  = 0x24, /* R/W word */
+    PMBUS_VOUT_MARGIN_HIGH          = 0x25, /* R/W word */
+    PMBUS_VOUT_MARGIN_LOW           = 0x26, /* R/W word */
+    PMBUS_VOUT_TRANSITION_RATE      = 0x27, /* R/W word */
+    PMBUS_VOUT_DROOP                = 0x28, /* R/W word */
+    PMBUS_VOUT_SCALE_LOOP           = 0x29, /* R/W word */
+    PMBUS_VOUT_SCALE_MONITOR        = 0x2A, /* R/W word */
+    PMBUS_VOUT_MIN                  = 0x2B, /* R/W word */
+    PMBUS_COEFFICIENTS              = 0x30, /* Read-only block 5 bytes */
+    PMBUS_POUT_MAX                  = 0x31, /* R/W word */
+    PMBUS_MAX_DUTY                  = 0x32, /* R/W word */
+    PMBUS_FREQUENCY_SWITCH          = 0x33, /* R/W word */
+    PMBUS_VIN_ON                    = 0x35, /* R/W word */
+    PMBUS_VIN_OFF                   = 0x36, /* R/W word */
+    PMBUS_INTERLEAVE                = 0x37, /* R/W word */
+    PMBUS_IOUT_CAL_GAIN             = 0x38, /* R/W word */
+    PMBUS_IOUT_CAL_OFFSET           = 0x39, /* R/W word */
+    PMBUS_FAN_CONFIG_1_2            = 0x3A, /* R/W byte */
+    PMBUS_FAN_COMMAND_1             = 0x3B, /* R/W word */
+    PMBUS_FAN_COMMAND_2             = 0x3C, /* R/W word */
+    PMBUS_FAN_CONFIG_3_4            = 0x3D, /* R/W byte */
+    PMBUS_FAN_COMMAND_3             = 0x3E, /* R/W word */
+    PMBUS_FAN_COMMAND_4             = 0x3F, /* R/W word */
+    PMBUS_VOUT_OV_FAULT_LIMIT       = 0x40, /* R/W word */
+    PMBUS_VOUT_OV_FAULT_RESPONSE    = 0x41, /* R/W byte */
+    PMBUS_VOUT_OV_WARN_LIMIT        = 0x42, /* R/W word */
+    PMBUS_VOUT_UV_WARN_LIMIT        = 0x43, /* R/W word */
+    PMBUS_VOUT_UV_FAULT_LIMIT       = 0x44, /* R/W word */
+    PMBUS_VOUT_UV_FAULT_RESPONSE    = 0x45, /* R/W byte */
+    PMBUS_IOUT_OC_FAULT_LIMIT       = 0x46, /* R/W word */
+    PMBUS_IOUT_OC_FAULT_RESPONSE    = 0x47, /* R/W byte */
+    PMBUS_IOUT_OC_LV_FAULT_LIMIT    = 0x48, /* R/W word */
+    PMBUS_IOUT_OC_LV_FAULT_RESPONSE = 0x49, /* R/W byte */
+    PMBUS_IOUT_OC_WARN_LIMIT        = 0x4A, /* R/W word */
+    PMBUS_IOUT_UC_FAULT_LIMIT       = 0x4B, /* R/W word */
+    PMBUS_IOUT_UC_FAULT_RESPONSE    = 0x4C, /* R/W byte */
+    PMBUS_OT_FAULT_LIMIT            = 0x4F, /* R/W word */
+    PMBUS_OT_FAULT_RESPONSE         = 0x50, /* R/W byte */
+    PMBUS_OT_WARN_LIMIT             = 0x51, /* R/W word */
+    PMBUS_UT_WARN_LIMIT             = 0x52, /* R/W word */
+    PMBUS_UT_FAULT_LIMIT            = 0x53, /* R/W word */
+    PMBUS_UT_FAULT_RESPONSE         = 0x54, /* R/W byte */
+    PMBUS_VIN_OV_FAULT_LIMIT        = 0x55, /* R/W word */
+    PMBUS_VIN_OV_FAULT_RESPONSE     = 0x56, /* R/W byte */
+    PMBUS_VIN_OV_WARN_LIMIT         = 0x57, /* R/W word */
+    PMBUS_VIN_UV_WARN_LIMIT         = 0x58, /* R/W word */
+    PMBUS_VIN_UV_FAULT_LIMIT        = 0x59, /* R/W word */
+    PMBUS_VIN_UV_FAULT_RESPONSE     = 0x5A, /* R/W byte */
+    PMBUS_IIN_OC_FAULT_LIMIT        = 0x5B, /* R/W word */
+    PMBUS_IIN_OC_FAULT_RESPONSE     = 0x5C, /* R/W byte */
+    PMBUS_IIN_OC_WARN_LIMIT         = 0x5D, /* R/W word */
+    PMBUS_POWER_GOOD_ON             = 0x5E, /* R/W word */
+    PMBUS_POWER_GOOD_OFF            = 0x5F, /* R/W word */
+    PMBUS_TON_DELAY                 = 0x60, /* R/W word */
+    PMBUS_TON_RISE                  = 0x61, /* R/W word */
+    PMBUS_TON_MAX_FAULT_LIMIT       = 0x62, /* R/W word */
+    PMBUS_TON_MAX_FAULT_RESPONSE    = 0x63, /* R/W byte */
+    PMBUS_TOFF_DELAY                = 0x64, /* R/W word */
+    PMBUS_TOFF_FALL                 = 0x65, /* R/W word */
+    PMBUS_TOFF_MAX_WARN_LIMIT       = 0x66, /* R/W word */
+    PMBUS_POUT_OP_FAULT_LIMIT       = 0x68, /* R/W word */
+    PMBUS_POUT_OP_FAULT_RESPONSE    = 0x69, /* R/W byte */
+    PMBUS_POUT_OP_WARN_LIMIT        = 0x6A, /* R/W word */
+    PMBUS_PIN_OP_WARN_LIMIT         = 0x6B, /* R/W word */
+    PMBUS_STATUS_BYTE               = 0x78, /* R/W byte */
+    PMBUS_STATUS_WORD               = 0x79, /* R/W word */
+    PMBUS_STATUS_VOUT               = 0x7A, /* R/W byte */
+    PMBUS_STATUS_IOUT               = 0x7B, /* R/W byte */
+    PMBUS_STATUS_INPUT              = 0x7C, /* R/W byte */
+    PMBUS_STATUS_TEMPERATURE        = 0x7D, /* R/W byte */
+    PMBUS_STATUS_CML                = 0x7E, /* R/W byte */
+    PMBUS_STATUS_OTHER              = 0x7F, /* R/W byte */
+    PMBUS_STATUS_MFR_SPECIFIC       = 0x80, /* R/W byte */
+    PMBUS_STATUS_FANS_1_2           = 0x81, /* R/W byte */
+    PMBUS_STATUS_FANS_3_4           = 0x82, /* R/W byte */
+    PMBUS_READ_EIN                  = 0x86, /* Read-Only block 5 bytes */
+    PMBUS_READ_EOUT                 = 0x87, /* Read-Only block 5 bytes */
+    PMBUS_READ_VIN                  = 0x88, /* Read-Only word */
+    PMBUS_READ_IIN                  = 0x89, /* Read-Only word */
+    PMBUS_READ_VCAP                 = 0x8A, /* Read-Only word */
+    PMBUS_READ_VOUT                 = 0x8B, /* Read-Only word */
+    PMBUS_READ_IOUT                 = 0x8C, /* Read-Only word */
+    PMBUS_READ_TEMPERATURE_1        = 0x8D, /* Read-Only word */
+    PMBUS_READ_TEMPERATURE_2        = 0x8E, /* Read-Only word */
+    PMBUS_READ_TEMPERATURE_3        = 0x8F, /* Read-Only word */
+    PMBUS_READ_FAN_SPEED_1          = 0x90, /* Read-Only word */
+    PMBUS_READ_FAN_SPEED_2          = 0x91, /* Read-Only word */
+    PMBUS_READ_FAN_SPEED_3          = 0x92, /* Read-Only word */
+    PMBUS_READ_FAN_SPEED_4          = 0x93, /* Read-Only word */
+    PMBUS_READ_DUTY_CYCLE           = 0x94, /* Read-Only word */
+    PMBUS_READ_FREQUENCY            = 0x95, /* Read-Only word */
+    PMBUS_READ_POUT                 = 0x96, /* Read-Only word */
+    PMBUS_READ_PIN                  = 0x97, /* Read-Only word */
+    PMBUS_REVISION                  = 0x98, /* Read-Only byte */
+    PMBUS_MFR_ID                    = 0x99, /* R/W block */
+    PMBUS_MFR_MODEL                 = 0x9A, /* R/W block */
+    PMBUS_MFR_REVISION              = 0x9B, /* R/W block */
+    PMBUS_MFR_LOCATION              = 0x9C, /* R/W block */
+    PMBUS_MFR_DATE                  = 0x9D, /* R/W block */
+    PMBUS_MFR_SERIAL                = 0x9E, /* R/W block */
+    PMBUS_APP_PROFILE_SUPPORT       = 0x9F, /* Read-Only block-read */
+    PMBUS_MFR_VIN_MIN               = 0xA0, /* Read-Only word */
+    PMBUS_MFR_VIN_MAX               = 0xA1, /* Read-Only word */
+    PMBUS_MFR_IIN_MAX               = 0xA2, /* Read-Only word */
+    PMBUS_MFR_PIN_MAX               = 0xA3, /* Read-Only word */
+    PMBUS_MFR_VOUT_MIN              = 0xA4, /* Read-Only word */
+    PMBUS_MFR_VOUT_MAX              = 0xA5, /* Read-Only word */
+    PMBUS_MFR_IOUT_MAX              = 0xA6, /* Read-Only word */
+    PMBUS_MFR_POUT_MAX              = 0xA7, /* Read-Only word */
+    PMBUS_MFR_TAMBIENT_MAX          = 0xA8, /* Read-Only word */
+    PMBUS_MFR_TAMBIENT_MIN          = 0xA9, /* Read-Only word */
+    PMBUS_MFR_EFFICIENCY_LL         = 0xAA, /* Read-Only block 14 bytes */
+    PMBUS_MFR_EFFICIENCY_HL         = 0xAB, /* Read-Only block 14 bytes */
+    PMBUS_MFR_PIN_ACCURACY          = 0xAC, /* Read-Only byte */
+    PMBUS_IC_DEVICE_ID              = 0xAD, /* Read-Only block-read */
+    PMBUS_IC_DEVICE_REV             = 0xAE, /* Read-Only block-read */
+    PMBUS_MFR_MAX_TEMP_1            = 0xC0, /* R/W word */
+    PMBUS_MFR_MAX_TEMP_2            = 0xC1, /* R/W word */
+    PMBUS_MFR_MAX_TEMP_3            = 0xC2, /* R/W word */
+    PMBUS_IDLE_STATE                = 0xFF,
+};
+
+/* STATUS_WORD */
+#define PB_STATUS_VOUT           BIT(15)
+#define PB_STATUS_IOUT_POUT      BIT(14)
+#define PB_STATUS_INPUT          BIT(13)
+#define PB_STATUS_WORD_MFR       BIT(12)
+#define PB_STATUS_POWER_GOOD_N   BIT(11)
+#define PB_STATUS_FAN            BIT(10)
+#define PB_STATUS_OTHER          BIT(9)
+#define PB_STATUS_UNKNOWN        BIT(8)
+/* STATUS_BYTE */
+#define PB_STATUS_BUSY           BIT(7)
+#define PB_STATUS_OFF            BIT(6)
+#define PB_STATUS_VOUT_OV        BIT(5)
+#define PB_STATUS_IOUT_OC        BIT(4)
+#define PB_STATUS_VIN_UV         BIT(3)
+#define PB_STATUS_TEMPERATURE    BIT(2)
+#define PB_STATUS_CML            BIT(1)
+#define PB_STATUS_NONE_ABOVE     BIT(0)
+
+/* STATUS_VOUT */
+#define PB_STATUS_VOUT_OV_FAULT         BIT(7) /* Output Overvoltage Fault */
+#define PB_STATUS_VOUT_OV_WARN          BIT(6) /* Output Overvoltage Warning */
+#define PB_STATUS_VOUT_UV_WARN          BIT(5) /* Output Undervoltage Warning */
+#define PB_STATUS_VOUT_UV_FAULT         BIT(4) /* Output Undervoltage Fault */
+#define PB_STATUS_VOUT_MAX              BIT(3)
+#define PB_STATUS_VOUT_TON_MAX_FAULT    BIT(2)
+#define PB_STATUS_VOUT_TOFF_MAX_WARN    BIT(1)
+
+/* STATUS_IOUT */
+#define PB_STATUS_IOUT_OC_FAULT    BIT(7) /* Output Overcurrent Fault */
+#define PB_STATUS_IOUT_OC_LV_FAULT BIT(6) /* Output OC And Low Voltage Fault */
+#define PB_STATUS_IOUT_OC_WARN     BIT(5) /* Output Overcurrent Warning */
+#define PB_STATUS_IOUT_UC_FAULT    BIT(4) /* Output Undercurrent Fault */
+#define PB_STATUS_CURR_SHARE       BIT(3) /* Current Share Fault */
+#define PB_STATUS_PWR_LIM_MODE     BIT(2) /* In Power Limiting Mode */
+#define PB_STATUS_POUT_OP_FAULT    BIT(1) /* Output Overpower Fault */
+#define PB_STATUS_POUT_OP_WARN     BIT(0) /* Output Overpower Warning */
+
+/* STATUS_INPUT */
+#define PB_STATUS_INPUT_VIN_OV_FAULT    BIT(7) /* Input Overvoltage Fault */
+#define PB_STATUS_INPUT_VIN_OV_WARN     BIT(6) /* Input Overvoltage Warning */
+#define PB_STATUS_INPUT_VIN_UV_WARN     BIT(5) /* Input Undervoltage Warning */
+#define PB_STATUS_INPUT_VIN_UV_FAULT    BIT(4) /* Input Undervoltage Fault */
+#define PB_STATUS_INPUT_IIN_OC_FAULT    BIT(2) /* Input Overcurrent Fault */
+#define PB_STATUS_INPUT_IIN_OC_WARN     BIT(1) /* Input Overcurrent Warning */
+#define PB_STATUS_INPUT_PIN_OP_WARN     BIT(0) /* Input Overpower Warning */
+
+/* STATUS_TEMPERATURE */
+#define PB_STATUS_OT_FAULT              BIT(7) /* Overtemperature Fault */
+#define PB_STATUS_OT_WARN               BIT(6) /* Overtemperature Warning */
+#define PB_STATUS_UT_WARN               BIT(5) /* Undertemperature Warning */
+#define PB_STATUS_UT_FAULT              BIT(4) /* Undertemperature Fault */
+
+/* STATUS_CML */
+#define PB_CML_FAULT_INVALID_CMD     BIT(7) /* Invalid/Unsupported Command */
+#define PB_CML_FAULT_INVALID_DATA    BIT(6) /* Invalid/Unsupported Data  */
+#define PB_CML_FAULT_PEC             BIT(5) /* Packet Error Check Failed */
+#define PB_CML_FAULT_MEMORY          BIT(4) /* Memory Fault Detected */
+#define PB_CML_FAULT_PROCESSOR       BIT(3) /* Processor Fault Detected */
+#define PB_CML_FAULT_OTHER_COMM      BIT(1) /* Other communication fault */
+#define PB_CML_FAULT_OTHER_MEM_LOGIC BIT(0) /* Other Memory Or Logic Fault */
+
+/* OPERATION*/
+#define PB_OP_ON                BIT(7) /* PSU is switched on */
+#define PB_OP_MARGIN_HIGH       BIT(5) /* PSU vout is set to margin high */
+#define PB_OP_MARGIN_LOW        BIT(4) /* PSU vout is set to margin low */
+
+/* PAGES */
+#define PB_MAX_PAGES            0x1F
+#define PB_ALL_PAGES            0xFF
+
+#define PMBUS_ERR_BYTE          0xFF
+
+#define TYPE_PMBUS_DEVICE "pmbus-device"
+OBJECT_DECLARE_TYPE(PMBusDevice, PMBusDeviceClass,
+                    PMBUS_DEVICE)
+
+/* flags */
+#define PB_HAS_COEFFICIENTS        BIT_ULL(9)
+#define PB_HAS_VIN                 BIT_ULL(10)
+#define PB_HAS_VOUT                BIT_ULL(11)
+#define PB_HAS_VOUT_MARGIN         BIT_ULL(12)
+#define PB_HAS_VIN_RATING          BIT_ULL(13)
+#define PB_HAS_VOUT_RATING         BIT_ULL(14)
+#define PB_HAS_VOUT_MODE           BIT_ULL(15)
+#define PB_HAS_VCAP                BIT_ULL(16)
+#define PB_HAS_IOUT                BIT_ULL(21)
+#define PB_HAS_IIN                 BIT_ULL(22)
+#define PB_HAS_IOUT_RATING         BIT_ULL(23)
+#define PB_HAS_IIN_RATING          BIT_ULL(24)
+#define PB_HAS_IOUT_GAIN           BIT_ULL(25)
+#define PB_HAS_POUT                BIT_ULL(30)
+#define PB_HAS_PIN                 BIT_ULL(31)
+#define PB_HAS_EIN                 BIT_ULL(32)
+#define PB_HAS_EOUT                BIT_ULL(33)
+#define PB_HAS_POUT_RATING         BIT_ULL(34)
+#define PB_HAS_PIN_RATING          BIT_ULL(35)
+#define PB_HAS_TEMPERATURE         BIT_ULL(40)
+#define PB_HAS_TEMP2               BIT_ULL(41)
+#define PB_HAS_TEMP3               BIT_ULL(42)
+#define PB_HAS_TEMP_RATING         BIT_ULL(43)
+#define PB_HAS_FAN                 BIT_ULL(44)
+#define PB_HAS_MFR_INFO            BIT_ULL(50)
+#define PB_HAS_STATUS_MFR_SPECIFIC BIT_ULL(51)
+
+struct PMBusDeviceClass {
+    SMBusDeviceClass parent_class;
+    uint8_t device_num_pages;
+
+    /**
+     * Implement quick_cmd, receive byte, and write_data to support non-standard
+     * PMBus functionality
+     */
+    void (*quick_cmd)(PMBusDevice *dev, uint8_t read);
+    int (*write_data)(PMBusDevice *dev, const uint8_t *buf, uint8_t len);
+    uint8_t (*receive_byte)(PMBusDevice *dev);
+};
+
+/*
+ * According to the spec, each page may offer the full range of PMBus commands
+ * available for each output or non-PMBus device.
+ * Therefore, we can't assume that any registers will always be the same across
+ * all pages.
+ * The page 0xFF is intended for writes to all pages
+ */
+typedef struct PMBusPage {
+    uint64_t page_flags;
+
+    uint8_t page;                      /* R/W byte */
+    uint8_t operation;                 /* R/W byte */
+    uint8_t on_off_config;             /* R/W byte */
+    uint8_t write_protect;             /* R/W byte */
+    uint8_t phase;                     /* R/W byte */
+    uint8_t vout_mode;                 /* R/W byte */
+    uint16_t vout_command;             /* R/W word */
+    uint16_t vout_trim;                /* R/W word */
+    uint16_t vout_cal_offset;          /* R/W word */
+    uint16_t vout_max;                 /* R/W word */
+    uint16_t vout_margin_high;         /* R/W word */
+    uint16_t vout_margin_low;          /* R/W word */
+    uint16_t vout_transition_rate;     /* R/W word */
+    uint16_t vout_droop;               /* R/W word */
+    uint16_t vout_scale_loop;          /* R/W word */
+    uint16_t vout_scale_monitor;       /* R/W word */
+    uint16_t vout_min;                 /* R/W word */
+    uint8_t coefficients[5];           /* Read-only block 5 bytes */
+    uint16_t pout_max;                 /* R/W word */
+    uint16_t max_duty;                 /* R/W word */
+    uint16_t frequency_switch;         /* R/W word */
+    uint16_t vin_on;                   /* R/W word */
+    uint16_t vin_off;                  /* R/W word */
+    uint16_t iout_cal_gain;            /* R/W word */
+    uint16_t iout_cal_offset;          /* R/W word */
+    uint8_t fan_config_1_2;            /* R/W byte */
+    uint16_t fan_command_1;            /* R/W word */
+    uint16_t fan_command_2;            /* R/W word */
+    uint8_t fan_config_3_4;            /* R/W byte */
+    uint16_t fan_command_3;            /* R/W word */
+    uint16_t fan_command_4;            /* R/W word */
+    uint16_t vout_ov_fault_limit;      /* R/W word */
+    uint8_t vout_ov_fault_response;    /* R/W byte */
+    uint16_t vout_ov_warn_limit;       /* R/W word */
+    uint16_t vout_uv_warn_limit;       /* R/W word */
+    uint16_t vout_uv_fault_limit;      /* R/W word */
+    uint8_t vout_uv_fault_response;    /* R/W byte */
+    uint16_t iout_oc_fault_limit;      /* R/W word */
+    uint8_t iout_oc_fault_response;    /* R/W byte */
+    uint16_t iout_oc_lv_fault_limit;   /* R/W word */
+    uint8_t iout_oc_lv_fault_response; /* R/W byte */
+    uint16_t iout_oc_warn_limit;       /* R/W word */
+    uint16_t iout_uc_fault_limit;      /* R/W word */
+    uint8_t iout_uc_fault_response;    /* R/W byte */
+    uint16_t ot_fault_limit;           /* R/W word */
+    uint8_t ot_fault_response;         /* R/W byte */
+    uint16_t ot_warn_limit;            /* R/W word */
+    uint16_t ut_warn_limit;            /* R/W word */
+    uint16_t ut_fault_limit;           /* R/W word */
+    uint8_t ut_fault_response;         /* R/W byte */
+    uint16_t vin_ov_fault_limit;       /* R/W word */
+    uint8_t vin_ov_fault_response;     /* R/W byte */
+    uint16_t vin_ov_warn_limit;        /* R/W word */
+    uint16_t vin_uv_warn_limit;        /* R/W word */
+    uint16_t vin_uv_fault_limit;       /* R/W word */
+    uint8_t vin_uv_fault_response;     /* R/W byte */
+    uint16_t iin_oc_fault_limit;       /* R/W word */
+    uint8_t iin_oc_fault_response;     /* R/W byte */
+    uint16_t iin_oc_warn_limit;        /* R/W word */
+    uint16_t power_good_on;            /* R/W word */
+    uint16_t power_good_off;           /* R/W word */
+    uint16_t ton_delay;                /* R/W word */
+    uint16_t ton_rise;                 /* R/W word */
+    uint16_t ton_max_fault_limit;      /* R/W word */
+    uint8_t ton_max_fault_response;    /* R/W byte */
+    uint16_t toff_delay;               /* R/W word */
+    uint16_t toff_fall;                /* R/W word */
+    uint16_t toff_max_warn_limit;      /* R/W word */
+    uint16_t pout_op_fault_limit;      /* R/W word */
+    uint8_t pout_op_fault_response;    /* R/W byte */
+    uint16_t pout_op_warn_limit;       /* R/W word */
+    uint16_t pin_op_warn_limit;        /* R/W word */
+    uint16_t status_word;              /* R/W word */
+    uint8_t status_vout;               /* R/W byte */
+    uint8_t status_iout;               /* R/W byte */
+    uint8_t status_input;              /* R/W byte */
+    uint8_t status_temperature;        /* R/W byte */
+    uint8_t status_cml;                /* R/W byte */
+    uint8_t status_other;              /* R/W byte */
+    uint8_t status_mfr_specific;       /* R/W byte */
+    uint8_t status_fans_1_2;           /* R/W byte */
+    uint8_t status_fans_3_4;           /* R/W byte */
+    uint8_t read_ein[5];               /* Read-Only block 5 bytes */
+    uint8_t read_eout[5];              /* Read-Only block 5 bytes */
+    uint16_t read_vin;                 /* Read-Only word */
+    uint16_t read_iin;                 /* Read-Only word */
+    uint16_t read_vcap;                /* Read-Only word */
+    uint16_t read_vout;                /* Read-Only word */
+    uint16_t read_iout;                /* Read-Only word */
+    uint16_t read_temperature_1;       /* Read-Only word */
+    uint16_t read_temperature_2;       /* Read-Only word */
+    uint16_t read_temperature_3;       /* Read-Only word */
+    uint16_t read_fan_speed_1;         /* Read-Only word */
+    uint16_t read_fan_speed_2;         /* Read-Only word */
+    uint16_t read_fan_speed_3;         /* Read-Only word */
+    uint16_t read_fan_speed_4;         /* Read-Only word */
+    uint16_t read_duty_cycle;          /* Read-Only word */
+    uint16_t read_frequency;           /* Read-Only word */
+    uint16_t read_pout;                /* Read-Only word */
+    uint16_t read_pin;                 /* Read-Only word */
+    uint8_t revision;                  /* Read-Only byte */
+    const char *mfr_id;                /* R/W block */
+    const char *mfr_model;             /* R/W block */
+    const char *mfr_revision;          /* R/W block */
+    const char *mfr_location;          /* R/W block */
+    const char *mfr_date;              /* R/W block */
+    const char *mfr_serial;            /* R/W block */
+    const char *app_profile_support;   /* Read-Only block-read */
+    uint16_t mfr_vin_min;              /* Read-Only word */
+    uint16_t mfr_vin_max;              /* Read-Only word */
+    uint16_t mfr_iin_max;              /* Read-Only word */
+    uint16_t mfr_pin_max;              /* Read-Only word */
+    uint16_t mfr_vout_min;             /* Read-Only word */
+    uint16_t mfr_vout_max;             /* Read-Only word */
+    uint16_t mfr_iout_max;             /* Read-Only word */
+    uint16_t mfr_pout_max;             /* Read-Only word */
+    uint16_t mfr_tambient_max;         /* Read-Only word */
+    uint16_t mfr_tambient_min;         /* Read-Only word */
+    uint8_t mfr_efficiency_ll[14];     /* Read-Only block 14 bytes */
+    uint8_t mfr_efficiency_hl[14];     /* Read-Only block 14 bytes */
+    uint8_t mfr_pin_accuracy;          /* Read-Only byte */
+    uint16_t mfr_max_temp_1;           /* R/W word */
+    uint16_t mfr_max_temp_2;           /* R/W word */
+    uint16_t mfr_max_temp_3;           /* R/W word */
+} PMBusPage;
+
+/* State */
+struct PMBusDevice {
+    SMBusDevice smb;
+
+    uint8_t num_pages;
+    uint8_t code;
+    uint8_t page;
+
+    /*
+     * PMBus registers are stored in a PMBusPage structure allocated by
+     * calling pmbus_pages_alloc()
+     */
+    PMBusPage *pages;
+    uint8_t capability;
+
+
+    int32_t in_buf_len;
+    uint8_t *in_buf;
+    int32_t out_buf_len;
+    uint8_t out_buf[SMBUS_DATA_MAX_LEN];
+};
+
+/**
+ * Direct mode coefficients
+ * @var m - mantissa
+ * @var b - offset
+ * @var R - exponent
+ */
+typedef struct PMBusCoefficients {
+    int32_t m;     /* mantissa */
+    int64_t b;     /* offset */
+    int32_t R;     /* exponent */
+} PMBusCoefficients;
+
+/**
+ * VOUT_Mode bit fields
+ */
+typedef struct PMBusVoutMode {
+    uint8_t  mode:3;
+    int8_t   exp:5;
+} PMBusVoutMode;
+
+/**
+ * Convert sensor values to direct mode format
+ *
+ * Y = (m * x - b) * 10^R
+ *
+ * @return uint16_t
+ */
+uint16_t pmbus_data2direct_mode(PMBusCoefficients c, uint32_t value);
+
+/**
+ * Convert direct mode formatted data into sensor reading
+ *
+ * X = (Y * 10^-R - b) / m
+ *
+ * @return uint32_t
+ */
+uint32_t pmbus_direct_mode2data(PMBusCoefficients c, uint16_t value);
+
+/**
+ * Convert sensor values to linear mode format
+ *
+ * L = D * 2^(-e)
+ *
+ * @return uint16
+ */
+uint16_t pmbus_data2linear_mode(uint16_t value, int exp);
+
+/**
+ * Convert linear mode formatted data into sensor reading
+ *
+ * D = L * 2^e
+ *
+ * @return uint16
+ */
+uint16_t pmbus_linear_mode2data(uint16_t value, int exp);
+
+/**
+ * @brief Send a block of data over PMBus
+ * Assumes that the bytes in the block are already ordered correctly,
+ * also assumes the length has been prepended to the block if necessary
+ *     | low_byte | ... | high_byte |
+ * @param state - maintains state of the PMBus device
+ * @param data - byte array to be sent by device
+ * @param len - number
+ */
+void pmbus_send(PMBusDevice *state, const uint8_t *data, uint16_t len);
+void pmbus_send8(PMBusDevice *state, uint8_t data);
+void pmbus_send16(PMBusDevice *state, uint16_t data);
+void pmbus_send32(PMBusDevice *state, uint32_t data);
+void pmbus_send64(PMBusDevice *state, uint64_t data);
+
+/**
+ * @brief Send a string over PMBus with length prepended.
+ * Length is calculated using str_len()
+ */
+void pmbus_send_string(PMBusDevice *state, const char *data);
+
+/**
+ * @brief Receive data sent with Block Write.
+ * @param dest - memory with enough capacity to receive the write
+ * @param len - the capacity of dest
+ */
+uint8_t pmbus_receive_block(PMBusDevice *pmdev, uint8_t *dest, size_t len);
+
+/**
+ * @brief Receive data over PMBus
+ * These methods help track how much data is being received over PMBus
+ * Log to GUEST_ERROR if too much or too little is sent.
+ */
+uint8_t pmbus_receive8(PMBusDevice *pmdev);
+uint16_t pmbus_receive16(PMBusDevice *pmdev);
+uint32_t pmbus_receive32(PMBusDevice *pmdev);
+uint64_t pmbus_receive64(PMBusDevice *pmdev);
+
+/**
+ * PMBus page config must be called before any page is first used.
+ * It will allocate memory for all the pages if needed.
+ * Passed in flags overwrite existing flags if any.
+ * @param page_index the page to which the flags are applied, setting page_index
+ * to 0xFF applies the passed in flags to all pages.
+ * @param flags
+ */
+int pmbus_page_config(PMBusDevice *pmdev, uint8_t page_index, uint64_t flags);
+
+/**
+ * Update the status registers when sensor values change.
+ * Useful if modifying sensors through qmp, this way status registers get
+ * updated
+ */
+void pmbus_check_limits(PMBusDevice *pmdev);
+
+/**
+ * Enter an idle state where only the PMBUS_ERR_BYTE will be returned
+ * indefinitely until a new command is issued.
+ */
+void pmbus_idle(PMBusDevice *pmdev);
+
+extern const VMStateDescription vmstate_pmbus_device;
+
+#define VMSTATE_PMBUS_DEVICE(_field, _state) {                       \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(PMBusDevice),                               \
+    .vmsd       = &vmstate_pmbus_device,                             \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, PMBusDevice), \
+}
+
+#endif
diff --git a/include/hw/i2c/ppc4xx_i2c.h b/include/hw/i2c/ppc4xx_i2c.h
new file mode 100644 (file)
index 0000000..4e882fa
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * PPC4xx I2C controller emulation
+ *
+ * Copyright (c) 2007 Jocelyn Mayer
+ * Copyright (c) 2012 François Revol
+ * Copyright (c) 2016-2018 BALATON Zoltan
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef PPC4XX_I2C_H
+#define PPC4XX_I2C_H
+
+#include "hw/sysbus.h"
+#include "hw/i2c/bitbang_i2c.h"
+#include "qom/object.h"
+
+#define TYPE_PPC4xx_I2C "ppc4xx-i2c"
+OBJECT_DECLARE_SIMPLE_TYPE(PPC4xxI2CState, PPC4xx_I2C)
+
+struct PPC4xxI2CState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    I2CBus *bus;
+    qemu_irq irq;
+    MemoryRegion iomem;
+    bitbang_i2c_interface bitbang;
+    int mdidx;
+    uint8_t mdata[4];
+    uint8_t lmadr;
+    uint8_t hmadr;
+    uint8_t cntl;
+    uint8_t mdcntl;
+    uint8_t sts;
+    uint8_t extsts;
+    uint8_t lsadr;
+    uint8_t hsadr;
+    uint8_t clkdiv;
+    uint8_t intrmsk;
+    uint8_t xfrcnt;
+    uint8_t xtcntlss;
+    uint8_t directcntl;
+};
+
+#endif /* PPC4XX_I2C_H */
diff --git a/include/hw/i2c/smbus_eeprom.h b/include/hw/i2c/smbus_eeprom.h
new file mode 100644 (file)
index 0000000..68b0063
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * QEMU SMBus EEPROM API
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_SMBUS_EEPROM_H
+#define HW_SMBUS_EEPROM_H
+
+#include "exec/cpu-common.h"
+#include "hw/i2c/i2c.h"
+
+void smbus_eeprom_init_one(I2CBus *bus, uint8_t address, uint8_t *eeprom_buf);
+void smbus_eeprom_init(I2CBus *bus, int nb_eeprom,
+                       const uint8_t *eeprom_spd, int size);
+
+enum sdram_type { SDR = 0x4, DDR = 0x7, DDR2 = 0x8 };
+uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t size);
+
+#endif
diff --git a/include/hw/i2c/smbus_master.h b/include/hw/i2c/smbus_master.h
new file mode 100644 (file)
index 0000000..bb13bc4
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * QEMU SMBus host (master) API
+ *
+ * Copyright (c) 2007 Arastra, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_SMBUS_MASTER_H
+#define HW_SMBUS_MASTER_H
+
+#include "hw/i2c/i2c.h"
+
+/* Master device commands.  */
+int smbus_quick_command(I2CBus *bus, uint8_t addr, int read);
+int smbus_receive_byte(I2CBus *bus, uint8_t addr);
+int smbus_send_byte(I2CBus *bus, uint8_t addr, uint8_t data);
+int smbus_read_byte(I2CBus *bus, uint8_t addr, uint8_t command);
+int smbus_write_byte(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t data);
+int smbus_read_word(I2CBus *bus, uint8_t addr, uint8_t command);
+int smbus_write_word(I2CBus *bus, uint8_t addr, uint8_t command, uint16_t data);
+
+/*
+ * Do a block transfer from an I2C device.  If recv_len is set, then the
+ * first received byte is a length field and is used to know how much data
+ * to receive.  Otherwise receive "len" bytes.  If send_cmd is set, send
+ * the command byte first before receiving the data.
+ */
+int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data,
+                     int len, bool recv_len, bool send_cmd);
+
+/*
+ * Do a block transfer to an I2C device.  If send_len is set, send the
+ * "len" value before the data.
+ */
+int smbus_write_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data,
+                      int len, bool send_len);
+
+#endif
diff --git a/include/hw/i2c/smbus_slave.h b/include/hw/i2c/smbus_slave.h
new file mode 100644 (file)
index 0000000..86bfe0a
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * QEMU SMBus device (slave) API
+ *
+ * Copyright (c) 2007 Arastra, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_SMBUS_SLAVE_H
+#define HW_SMBUS_SLAVE_H
+
+#include "hw/i2c/i2c.h"
+#include "qom/object.h"
+
+#define TYPE_SMBUS_DEVICE "smbus-device"
+OBJECT_DECLARE_TYPE(SMBusDevice, SMBusDeviceClass,
+                    SMBUS_DEVICE)
+
+
+struct SMBusDeviceClass {
+    I2CSlaveClass parent_class;
+
+    /*
+     * An operation with no data, special in SMBus.
+     * This may be NULL, quick commands are ignore in that case.
+     */
+    void (*quick_cmd)(SMBusDevice *dev, uint8_t read);
+
+    /*
+     * We can't distinguish between a word write and a block write with
+     * length 1, so pass the whole data block including the length byte
+     * (if present).  The device is responsible figuring out what type of
+     * command this is.
+     * This may be NULL if no data is written to the device.  Writes
+     * will be ignore in that case.
+     */
+    int (*write_data)(SMBusDevice *dev, uint8_t *buf, uint8_t len);
+
+    /*
+     * Likewise we can't distinguish between different reads, or even know
+     * the length of the read until the read is complete, so read data a
+     * byte at a time.  The device is responsible for adding the length
+     * byte on block reads.  This call cannot fail, it should return
+     * something, preferably 0xff if nothing is available.
+     * This may be NULL if no data is read from the device.  Reads will
+     * return 0xff in that case.
+     */
+    uint8_t (*receive_byte)(SMBusDevice *dev);
+};
+
+#define SMBUS_DATA_MAX_LEN 34  /* command + len + 32 bytes of data.  */
+
+struct SMBusDevice {
+    /* The SMBus protocol is implemented on top of I2C.  */
+    I2CSlave i2c;
+
+    /* Remaining fields for internal use only.  */
+    int32_t mode;
+    int32_t data_len;
+    uint8_t data_buf[SMBUS_DATA_MAX_LEN];
+};
+
+extern const VMStateDescription vmstate_smbus_device;
+
+#define VMSTATE_SMBUS_DEVICE(_field, _state) {                       \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(SMBusDevice),                               \
+    .vmsd       = &vmstate_smbus_device,                             \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, SMBusDevice), \
+}
+
+/*
+ * Users should call this in their .needed functions to know if the
+ * SMBus slave data needs to be transferred.
+ */
+bool smbus_vmstate_needed(SMBusDevice *dev);
+
+#endif
diff --git a/include/hw/i386/apic-msidef.h b/include/hw/i386/apic-msidef.h
new file mode 100644 (file)
index 0000000..420b411
--- /dev/null
@@ -0,0 +1,31 @@
+#ifndef HW_APIC_MSIDEF_H
+#define HW_APIC_MSIDEF_H
+
+/*
+ * Intel APIC constants: from include/asm/msidef.h
+ */
+
+/*
+ * Shifts for MSI data
+ */
+
+#define MSI_DATA_VECTOR_SHIFT           0
+#define  MSI_DATA_VECTOR_MASK           0x000000ff
+
+#define MSI_DATA_DELIVERY_MODE_SHIFT    8
+#define MSI_DATA_LEVEL_SHIFT            14
+#define MSI_DATA_TRIGGER_SHIFT          15
+
+/*
+ * Shift/mask fields for msi address
+ */
+
+#define MSI_ADDR_DEST_MODE_SHIFT        2
+
+#define MSI_ADDR_REDIRECTION_SHIFT      3
+
+#define MSI_ADDR_DEST_ID_SHIFT          12
+#define MSI_ADDR_DEST_IDX_SHIFT         4
+#define  MSI_ADDR_DEST_ID_MASK          0x000ff000
+
+#endif /* HW_APIC_MSIDEF_H */
diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h
new file mode 100644 (file)
index 0000000..bdc15a7
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef APIC_H
+#define APIC_H
+
+
+/* apic.c */
+void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
+                      uint8_t vector_num, uint8_t trigger_mode);
+int apic_accept_pic_intr(DeviceState *s);
+void apic_deliver_pic_intr(DeviceState *s, int level);
+void apic_deliver_nmi(DeviceState *d);
+int apic_get_interrupt(DeviceState *s);
+void cpu_set_apic_base(DeviceState *s, uint64_t val);
+uint64_t cpu_get_apic_base(DeviceState *s);
+void cpu_set_apic_tpr(DeviceState *s, uint8_t val);
+uint8_t cpu_get_apic_tpr(DeviceState *s);
+void apic_init_reset(DeviceState *s);
+void apic_sipi(DeviceState *s);
+void apic_poll_irq(DeviceState *d);
+void apic_designate_bsp(DeviceState *d, bool bsp);
+int apic_get_highest_priority_irr(DeviceState *dev);
+
+/* pc.c */
+DeviceState *cpu_get_current_apic(void);
+
+#endif
diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h
new file mode 100644 (file)
index 0000000..5f2ba24
--- /dev/null
@@ -0,0 +1,230 @@
+/*
+ *  APIC support - internal interfaces
+ *
+ *  Copyright (c) 2004-2005 Fabrice Bellard
+ *  Copyright (c) 2011      Jan Kiszka, Siemens AG
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#ifndef QEMU_APIC_INTERNAL_H
+#define QEMU_APIC_INTERNAL_H
+
+#include "cpu.h"
+#include "exec/memory.h"
+#include "qemu/timer.h"
+#include "target/i386/cpu-qom.h"
+#include "qom/object.h"
+
+/* APIC Local Vector Table */
+#define APIC_LVT_TIMER                  0
+#define APIC_LVT_THERMAL                1
+#define APIC_LVT_PERFORM                2
+#define APIC_LVT_LINT0                  3
+#define APIC_LVT_LINT1                  4
+#define APIC_LVT_ERROR                  5
+#define APIC_LVT_NB                     6
+
+/* APIC delivery modes */
+#define APIC_DM_FIXED                   0
+#define APIC_DM_LOWPRI                  1
+#define APIC_DM_SMI                     2
+#define APIC_DM_NMI                     4
+#define APIC_DM_INIT                    5
+#define APIC_DM_SIPI                    6
+#define APIC_DM_EXTINT                  7
+
+/* APIC destination mode */
+#define APIC_DESTMODE_FLAT              0xf
+#define APIC_DESTMODE_CLUSTER           1
+
+#define APIC_TRIGGER_EDGE               0
+#define APIC_TRIGGER_LEVEL              1
+
+#define APIC_VECTOR_MASK                0xff
+#define APIC_DCR_MASK                   0xf
+
+#define APIC_LVT_TIMER_SHIFT            17
+#define APIC_LVT_MASKED_SHIFT           16
+#define APIC_LVT_LEVEL_TRIGGER_SHIFT    15
+#define APIC_LVT_REMOTE_IRR_SHIFT       14
+#define APIC_LVT_INT_POLARITY_SHIFT     13
+#define APIC_LVT_DELIV_STS_SHIFT        12
+#define APIC_LVT_DELIV_MOD_SHIFT        8
+
+#define APIC_LVT_TIMER_TSCDEADLINE      (2 << APIC_LVT_TIMER_SHIFT)
+#define APIC_LVT_TIMER_PERIODIC         (1 << APIC_LVT_TIMER_SHIFT)
+#define APIC_LVT_MASKED                 (1 << APIC_LVT_MASKED_SHIFT)
+#define APIC_LVT_LEVEL_TRIGGER          (1 << APIC_LVT_LEVEL_TRIGGER_SHIFT)
+#define APIC_LVT_REMOTE_IRR             (1 << APIC_LVT_REMOTE_IRR_SHIFT)
+#define APIC_LVT_INT_POLARITY           (1 << APIC_LVT_INT_POLARITY_SHIFT)
+#define APIC_LVT_DELIV_STS              (1 << APIC_LVT_DELIV_STS_SHIFT)
+#define APIC_LVT_DELIV_MOD              (7 << APIC_LVT_DELIV_MOD_SHIFT)
+
+#define APIC_ESR_ILL_ADDRESS_SHIFT      7
+#define APIC_ESR_RECV_ILL_VECT_SHIFT    6
+#define APIC_ESR_SEND_ILL_VECT_SHIFT    5
+#define APIC_ESR_RECV_ACCEPT_SHIFT      3
+#define APIC_ESR_SEND_ACCEPT_SHIFT      2
+#define APIC_ESR_RECV_CHECK_SUM_SHIFT   1
+
+#define APIC_ESR_ILLEGAL_ADDRESS        (1 << APIC_ESR_ILL_ADDRESS_SHIFT)
+#define APIC_ESR_RECV_ILLEGAL_VECT      (1 << APIC_ESR_RECV_ILL_VECT_SHIFT)
+#define APIC_ESR_SEND_ILLEGAL_VECT      (1 << APIC_ESR_SEND_ILL_VECT_SHIFT)
+#define APIC_ESR_RECV_ACCEPT            (1 << APIC_ESR_RECV_ACCEPT_SHIFT)
+#define APIC_ESR_SEND_ACCEPT            (1 << APIC_ESR_SEND_ACCEPT_SHIFT)
+#define APIC_ESR_RECV_CHECK_SUM         (1 << APIC_ESR_RECV_CHECK_SUM_SHIFT)
+#define APIC_ESR_SEND_CHECK_SUM         1
+
+#define APIC_ICR_DEST_SHIFT             24
+#define APIC_ICR_DEST_SHORT_SHIFT       18
+#define APIC_ICR_TRIGGER_MOD_SHIFT      15
+#define APIC_ICR_LEVEL_SHIFT            14
+#define APIC_ICR_DELIV_STS_SHIFT        12
+#define APIC_ICR_DEST_MOD_SHIFT         11
+#define APIC_ICR_DELIV_MOD_SHIFT        8
+
+#define APIC_ICR_DEST_SHORT             (3 << APIC_ICR_DEST_SHORT_SHIFT)
+#define APIC_ICR_TRIGGER_MOD            (1 << APIC_ICR_TRIGGER_MOD_SHIFT)
+#define APIC_ICR_LEVEL                  (1 << APIC_ICR_LEVEL_SHIFT)
+#define APIC_ICR_DELIV_STS              (1 << APIC_ICR_DELIV_STS_SHIFT)
+#define APIC_ICR_DEST_MOD               (1 << APIC_ICR_DEST_MOD_SHIFT)
+#define APIC_ICR_DELIV_MOD              (7 << APIC_ICR_DELIV_MOD_SHIFT)
+
+#define APIC_PR_CLASS_SHIFT             4
+#define APIC_PR_SUB_CLASS               0xf
+
+#define APIC_LOGDEST_XAPIC_SHIFT        4
+#define APIC_LOGDEST_XAPIC_ID           0xf
+
+#define APIC_LOGDEST_X2APIC_SHIFT       16
+#define APIC_LOGDEST_X2APIC_ID          0xffff
+
+#define APIC_SPURIO_FOCUS_SHIFT         9
+#define APIC_SPURIO_ENABLED_SHIFT       8
+
+#define APIC_SPURIO_FOCUS               (1 << APIC_SPURIO_FOCUS_SHIFT)
+#define APIC_SPURIO_ENABLED             (1 << APIC_SPURIO_ENABLED_SHIFT)
+
+#define APIC_SV_DIRECTED_IO             (1 << 12)
+#define APIC_SV_ENABLE                  (1 << 8)
+
+#define VAPIC_ENABLE_BIT                0
+#define VAPIC_ENABLE_MASK               (1 << VAPIC_ENABLE_BIT)
+
+typedef struct APICCommonState APICCommonState;
+
+#define TYPE_APIC_COMMON "apic-common"
+typedef struct APICCommonClass APICCommonClass;
+DECLARE_OBJ_CHECKERS(APICCommonState, APICCommonClass,
+                     APIC_COMMON, TYPE_APIC_COMMON)
+
+struct APICCommonClass {
+    DeviceClass parent_class;
+
+    DeviceRealize realize;
+    DeviceUnrealize unrealize;
+    void (*set_base)(APICCommonState *s, uint64_t val);
+    void (*set_tpr)(APICCommonState *s, uint8_t val);
+    uint8_t (*get_tpr)(APICCommonState *s);
+    void (*enable_tpr_reporting)(APICCommonState *s, bool enable);
+    void (*vapic_base_update)(APICCommonState *s);
+    void (*external_nmi)(APICCommonState *s);
+    void (*pre_save)(APICCommonState *s);
+    void (*post_load)(APICCommonState *s);
+    void (*reset)(APICCommonState *s);
+    /* send_msi emulates an APIC bus and its proper place would be in a new
+     * device, but it's convenient to have it here for now.
+     */
+    void (*send_msi)(MSIMessage *msi);
+};
+
+struct APICCommonState {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    MemoryRegion io_memory;
+    X86CPU *cpu;
+    uint32_t apicbase;
+    uint8_t id; /* legacy APIC ID */
+    uint32_t initial_apic_id;
+    uint8_t version;
+    uint8_t arb_id;
+    uint8_t tpr;
+    uint32_t spurious_vec;
+    uint8_t log_dest;
+    uint8_t dest_mode;
+    uint32_t isr[8];  /* in service register */
+    uint32_t tmr[8];  /* trigger mode register */
+    uint32_t irr[8]; /* interrupt request register */
+    uint32_t lvt[APIC_LVT_NB];
+    uint32_t esr; /* error register */
+    uint32_t icr[2];
+
+    uint32_t divide_conf;
+    int count_shift;
+    uint32_t initial_count;
+    int64_t initial_count_load_time;
+    int64_t next_time;
+    QEMUTimer *timer;
+    int64_t timer_expiry;
+    int sipi_vector;
+    int wait_for_sipi;
+
+    uint32_t vapic_control;
+    DeviceState *vapic;
+    hwaddr vapic_paddr; /* note: persistence via kvmvapic */
+    bool legacy_instance_id;
+};
+
+typedef struct VAPICState {
+    uint8_t tpr;
+    uint8_t isr;
+    uint8_t zero;
+    uint8_t irr;
+    uint8_t enabled;
+} QEMU_PACKED VAPICState;
+
+extern bool apic_report_tpr_access;
+
+bool apic_next_timer(APICCommonState *s, int64_t current_time);
+void apic_enable_tpr_access_reporting(DeviceState *d, bool enable);
+void apic_enable_vapic(DeviceState *d, hwaddr paddr);
+
+void vapic_report_tpr_access(DeviceState *dev, CPUState *cpu, target_ulong ip,
+                             TPRAccess access);
+
+int apic_get_ppr(APICCommonState *s);
+uint32_t apic_get_current_count(APICCommonState *s);
+
+static inline void apic_set_bit(uint32_t *tab, int index)
+{
+    int i, mask;
+    i = index >> 5;
+    mask = 1 << (index & 0x1f);
+    tab[i] |= mask;
+}
+
+static inline int apic_get_bit(uint32_t *tab, int index)
+{
+    int i, mask;
+    i = index >> 5;
+    mask = 1 << (index & 0x1f);
+    return !!(tab[i] & mask);
+}
+
+APICCommonClass *apic_get_class(Error **errp);
+
+#endif /* QEMU_APIC_INTERNAL_H */
diff --git a/include/hw/i386/hostmem-epc.h b/include/hw/i386/hostmem-epc.h
new file mode 100644 (file)
index 0000000..846c726
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * SGX EPC backend
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Authors:
+ *   Sean Christopherson <sean.j.christopherson@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_HOSTMEM_EPC_H
+#define QEMU_HOSTMEM_EPC_H
+
+#include "sysemu/hostmem.h"
+
+#define TYPE_MEMORY_BACKEND_EPC "memory-backend-epc"
+
+#define MEMORY_BACKEND_EPC(obj)                                        \
+    OBJECT_CHECK(HostMemoryBackendEpc, (obj), TYPE_MEMORY_BACKEND_EPC)
+
+typedef struct HostMemoryBackendEpc HostMemoryBackendEpc;
+
+struct HostMemoryBackendEpc {
+    HostMemoryBackend parent_obj;
+};
+
+#endif
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
new file mode 100644 (file)
index 0000000..7fa0a69
--- /dev/null
@@ -0,0 +1,320 @@
+/*
+ * QEMU emulation of an Intel IOMMU (VT-d)
+ *   (DMA Remapping device)
+ *
+ * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com>
+ * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef INTEL_IOMMU_H
+#define INTEL_IOMMU_H
+
+#include "hw/i386/x86-iommu.h"
+#include "qemu/iova-tree.h"
+#include "qom/object.h"
+
+#define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
+OBJECT_DECLARE_SIMPLE_TYPE(IntelIOMMUState, INTEL_IOMMU_DEVICE)
+
+#define TYPE_INTEL_IOMMU_MEMORY_REGION "intel-iommu-iommu-memory-region"
+
+/* DMAR Hardware Unit Definition address (IOMMU unit) */
+#define Q35_HOST_BRIDGE_IOMMU_ADDR  0xfed90000ULL
+
+#define VTD_PCI_BUS_MAX             256
+#define VTD_PCI_SLOT_MAX            32
+#define VTD_PCI_FUNC_MAX            8
+#define VTD_PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
+#define VTD_PCI_FUNC(devfn)         ((devfn) & 0x07)
+#define VTD_SID_TO_BUS(sid)         (((sid) >> 8) & 0xff)
+#define VTD_SID_TO_DEVFN(sid)       ((sid) & 0xff)
+
+#define DMAR_REG_SIZE               0x230
+#define VTD_HOST_AW_39BIT           39
+#define VTD_HOST_AW_48BIT           48
+#define VTD_HOST_ADDRESS_WIDTH      VTD_HOST_AW_39BIT
+#define VTD_HAW_MASK(aw)            ((1ULL << (aw)) - 1)
+
+#define DMAR_REPORT_F_INTR          (1)
+
+#define  VTD_MSI_ADDR_HI_MASK        (0xffffffff00000000ULL)
+#define  VTD_MSI_ADDR_HI_SHIFT       (32)
+#define  VTD_MSI_ADDR_LO_MASK        (0x00000000ffffffffULL)
+
+typedef struct VTDContextEntry VTDContextEntry;
+typedef struct VTDContextCacheEntry VTDContextCacheEntry;
+typedef struct VTDAddressSpace VTDAddressSpace;
+typedef struct VTDIOTLBEntry VTDIOTLBEntry;
+typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
+typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
+typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
+typedef struct VTDPASIDEntry VTDPASIDEntry;
+
+/* Context-Entry */
+struct VTDContextEntry {
+    union {
+        struct {
+            uint64_t lo;
+            uint64_t hi;
+        };
+        struct {
+            uint64_t val[4];
+        };
+    };
+};
+
+struct VTDContextCacheEntry {
+    /* The cache entry is obsolete if
+     * context_cache_gen!=IntelIOMMUState.context_cache_gen
+     */
+    uint32_t context_cache_gen;
+    struct VTDContextEntry context_entry;
+};
+
+/* PASID Directory Entry */
+struct VTDPASIDDirEntry {
+    uint64_t val;
+};
+
+/* PASID Table Entry */
+struct VTDPASIDEntry {
+    uint64_t val[8];
+};
+
+struct VTDAddressSpace {
+    PCIBus *bus;
+    uint8_t devfn;
+    uint32_t pasid;
+    AddressSpace as;
+    IOMMUMemoryRegion iommu;
+    MemoryRegion root;          /* The root container of the device */
+    MemoryRegion nodmar;        /* The alias of shared nodmar MR */
+    MemoryRegion iommu_ir;      /* Interrupt region: 0xfeeXXXXX */
+    MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */
+    IntelIOMMUState *iommu_state;
+    VTDContextCacheEntry context_cache_entry;
+    QLIST_ENTRY(VTDAddressSpace) next;
+    /* Superset of notifier flags that this address space has */
+    IOMMUNotifierFlag notifier_flags;
+    /*
+     * @iova_tree traces mapped IOVA ranges.
+     *
+     * The tree is not needed if no MAP notifier is registered with current
+     * VTD address space, because all guest invalidate commands can be
+     * directly passed to the IOMMU UNMAP notifiers without any further
+     * reshuffling.
+     *
+     * The tree OTOH is required for MAP typed iommu notifiers for a few
+     * reasons.
+     *
+     * Firstly, there's no way to identify whether an PSI (Page Selective
+     * Invalidations) or DSI (Domain Selective Invalidations) event is an
+     * MAP or UNMAP event within the message itself.  Without having prior
+     * knowledge of existing state vIOMMU doesn't know whether it should
+     * notify MAP or UNMAP for a PSI message it received when caching mode
+     * is enabled (for MAP notifiers).
+     *
+     * Secondly, PSI messages received from guest driver can be enlarged in
+     * range, covers but not limited to what the guest driver wanted to
+     * invalidate.  When the range to invalidates gets bigger than the
+     * limit of a PSI message, it can even become a DSI which will
+     * invalidate the whole domain.  If the vIOMMU directly notifies the
+     * registered device with the unmodified range, it may confuse the
+     * registered drivers (e.g. vfio-pci) on either:
+     *
+     *   (1) Trying to map the same region more than once (for
+     *       VFIO_IOMMU_MAP_DMA, -EEXIST will trigger), or,
+     *
+     *   (2) Trying to UNMAP a range that is still partially mapped.
+     *
+     * That accuracy is not required for UNMAP-only notifiers, but it is a
+     * must-to-have for notifiers registered with MAP events, because the
+     * vIOMMU needs to make sure the shadow page table is always in sync
+     * with the guest IOMMU pgtables for a device.
+     */
+    IOVATree *iova_tree;
+};
+
+struct VTDIOTLBEntry {
+    uint64_t gfn;
+    uint16_t domain_id;
+    uint32_t pasid;
+    uint64_t slpte;
+    uint64_t mask;
+    uint8_t access_flags;
+};
+
+/* VT-d Source-ID Qualifier types */
+enum {
+    VTD_SQ_FULL = 0x00,     /* Full SID verification */
+    VTD_SQ_IGN_3 = 0x01,    /* Ignore bit 3 */
+    VTD_SQ_IGN_2_3 = 0x02,  /* Ignore bits 2 & 3 */
+    VTD_SQ_IGN_1_3 = 0x03,  /* Ignore bits 1-3 */
+    VTD_SQ_MAX,
+};
+
+/* VT-d Source Validation Types */
+enum {
+    VTD_SVT_NONE = 0x00,    /* No validation */
+    VTD_SVT_ALL = 0x01,     /* Do full validation */
+    VTD_SVT_BUS = 0x02,     /* Validate bus range */
+    VTD_SVT_MAX,
+};
+
+/* Interrupt Remapping Table Entry Definition */
+union VTD_IR_TableEntry {
+    struct {
+#if HOST_BIG_ENDIAN
+        uint64_t dest_id:32;         /* Destination ID */
+        uint64_t __reserved_1:8;     /* Reserved 1 */
+        uint64_t vector:8;           /* Interrupt Vector */
+        uint64_t irte_mode:1;        /* IRTE Mode */
+        uint64_t __reserved_0:3;     /* Reserved 0 */
+        uint64_t __avail:4;          /* Available spaces for software */
+        uint64_t delivery_mode:3;    /* Delivery Mode */
+        uint64_t trigger_mode:1;     /* Trigger Mode */
+        uint64_t redir_hint:1;       /* Redirection Hint */
+        uint64_t dest_mode:1;        /* Destination Mode */
+        uint64_t fault_disable:1;    /* Fault Processing Disable */
+        uint64_t present:1;          /* Whether entry present/available */
+#else
+        uint64_t present:1;          /* Whether entry present/available */
+        uint64_t fault_disable:1;    /* Fault Processing Disable */
+        uint64_t dest_mode:1;        /* Destination Mode */
+        uint64_t redir_hint:1;       /* Redirection Hint */
+        uint64_t trigger_mode:1;     /* Trigger Mode */
+        uint64_t delivery_mode:3;    /* Delivery Mode */
+        uint64_t __avail:4;          /* Available spaces for software */
+        uint64_t __reserved_0:3;     /* Reserved 0 */
+        uint64_t irte_mode:1;        /* IRTE Mode */
+        uint64_t vector:8;           /* Interrupt Vector */
+        uint64_t __reserved_1:8;     /* Reserved 1 */
+        uint64_t dest_id:32;         /* Destination ID */
+#endif
+#if HOST_BIG_ENDIAN
+        uint64_t __reserved_2:44;    /* Reserved 2 */
+        uint64_t sid_vtype:2;        /* Source-ID Validation Type */
+        uint64_t sid_q:2;            /* Source-ID Qualifier */
+        uint64_t source_id:16;       /* Source-ID */
+#else
+        uint64_t source_id:16;       /* Source-ID */
+        uint64_t sid_q:2;            /* Source-ID Qualifier */
+        uint64_t sid_vtype:2;        /* Source-ID Validation Type */
+        uint64_t __reserved_2:44;    /* Reserved 2 */
+#endif
+    } QEMU_PACKED irte;
+    uint64_t data[2];
+};
+
+#define VTD_IR_INT_FORMAT_COMPAT     (0) /* Compatible Interrupt */
+#define VTD_IR_INT_FORMAT_REMAP      (1) /* Remappable Interrupt */
+
+/* Programming format for MSI/MSI-X addresses */
+union VTD_IR_MSIAddress {
+    struct {
+#if HOST_BIG_ENDIAN
+        uint32_t __head:12;          /* Should always be: 0x0fee */
+        uint32_t index_l:15;         /* Interrupt index bit 14-0 */
+        uint32_t int_mode:1;         /* Interrupt format */
+        uint32_t sub_valid:1;        /* SHV: Sub-Handle Valid bit */
+        uint32_t index_h:1;          /* Interrupt index bit 15 */
+        uint32_t __not_care:2;
+#else
+        uint32_t __not_care:2;
+        uint32_t index_h:1;          /* Interrupt index bit 15 */
+        uint32_t sub_valid:1;        /* SHV: Sub-Handle Valid bit */
+        uint32_t int_mode:1;         /* Interrupt format */
+        uint32_t index_l:15;         /* Interrupt index bit 14-0 */
+        uint32_t __head:12;          /* Should always be: 0x0fee */
+#endif
+    } QEMU_PACKED addr;
+    uint32_t data;
+};
+
+/* When IR is enabled, all MSI/MSI-X data bits should be zero */
+#define VTD_IR_MSI_DATA          (0)
+
+/* The iommu (DMAR) device state struct */
+struct IntelIOMMUState {
+    X86IOMMUState x86_iommu;
+    MemoryRegion csrmem;
+    MemoryRegion mr_nodmar;
+    MemoryRegion mr_ir;
+    MemoryRegion mr_sys_alias;
+    uint8_t csr[DMAR_REG_SIZE];     /* register values */
+    uint8_t wmask[DMAR_REG_SIZE];   /* R/W bytes */
+    uint8_t w1cmask[DMAR_REG_SIZE]; /* RW1C(Write 1 to Clear) bytes */
+    uint8_t womask[DMAR_REG_SIZE];  /* WO (write only - read returns 0) */
+    uint32_t version;
+
+    bool caching_mode;              /* RO - is cap CM enabled? */
+    bool scalable_mode;             /* RO - is Scalable Mode supported? */
+    bool snoop_control;             /* RO - is SNP filed supported? */
+
+    dma_addr_t root;                /* Current root table pointer */
+    bool root_scalable;             /* Type of root table (scalable or not) */
+    bool dmar_enabled;              /* Set if DMA remapping is enabled */
+
+    uint16_t iq_head;               /* Current invalidation queue head */
+    uint16_t iq_tail;               /* Current invalidation queue tail */
+    dma_addr_t iq;                  /* Current invalidation queue pointer */
+    uint16_t iq_size;               /* IQ Size in number of entries */
+    bool iq_dw;                     /* IQ descriptor width 256bit or not */
+    bool qi_enabled;                /* Set if the QI is enabled */
+    uint8_t iq_last_desc_type;      /* The type of last completed descriptor */
+
+    /* The index of the Fault Recording Register to be used next.
+     * Wraps around from N-1 to 0, where N is the number of FRCD_REG.
+     */
+    uint16_t next_frcd_reg;
+
+    uint64_t cap;                   /* The value of capability reg */
+    uint64_t ecap;                  /* The value of extended capability reg */
+
+    uint32_t context_cache_gen;     /* Should be in [1,MAX] */
+    GHashTable *iotlb;              /* IOTLB */
+
+    GHashTable *vtd_address_spaces;             /* VTD address spaces */
+    VTDAddressSpace *vtd_as_cache[VTD_PCI_BUS_MAX]; /* VTD address space cache */
+    /* list of registered notifiers */
+    QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
+
+    /* interrupt remapping */
+    bool intr_enabled;              /* Whether guest enabled IR */
+    dma_addr_t intr_root;           /* Interrupt remapping table pointer */
+    uint32_t intr_size;             /* Number of IR table entries */
+    bool intr_eime;                 /* Extended interrupt mode enabled */
+    OnOffAuto intr_eim;             /* Toggle for EIM cabability */
+    bool buggy_eim;                 /* Force buggy EIM unless eim=off */
+    uint8_t aw_bits;                /* Host/IOVA address width (in bits) */
+    bool dma_drain;                 /* Whether DMA r/w draining enabled */
+    bool dma_translation;           /* Whether DMA translation supported */
+    bool pasid;                     /* Whether to support PASID */
+
+    /*
+     * Protects IOMMU states in general.  Currently it protects the
+     * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace.
+     */
+    QemuMutex iommu_lock;
+};
+
+/* Find the VTD Address space associated with the given bus pointer,
+ * create a new one if none exists
+ */
+VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
+                                 int devfn, unsigned int pasid);
+
+#endif
diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h
new file mode 100644 (file)
index 0000000..fad97a8
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2018 Intel Corporation
+ * Copyright (c) 2019 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_I386_MICROVM_H
+#define HW_I386_MICROVM_H
+
+#include "exec/hwaddr.h"
+#include "qemu/notify.h"
+
+#include "hw/boards.h"
+#include "hw/i386/x86.h"
+#include "hw/acpi/acpi_dev_interface.h"
+#include "hw/pci-host/gpex.h"
+#include "qom/object.h"
+
+/*
+ *  IRQ    |  pc        | microvm (acpi=on)
+ * --------+------------+------------------
+ *   0     |  pit       |
+ *   1     |  kbd       |
+ *   2     |  cascade   |
+ *   3     |  serial 1  |
+ *   4     |  serial 0  | serial
+ *   5     |  -         |
+ *   6     |  floppy    |
+ *   7     |  parallel  |
+ *   8     |  rtc       | rtc (rtc=on)
+ *   9     |  acpi      | acpi (ged)
+ *  10     |  pci lnk   | xhci (usb=on)
+ *  11     |  pci lnk   |
+ *  12     |  ps2       | pcie
+ *  13     |  fpu       | pcie
+ *  14     |  ide 0     | pcie
+ *  15     |  ide 1     | pcie
+ *  16-23  |  pci gsi   | virtio
+ */
+
+/* Platform virtio definitions */
+#define VIRTIO_MMIO_BASE      0xfeb00000
+#define VIRTIO_CMDLINE_MAXLEN 64
+
+#define GED_MMIO_BASE         0xfea00000
+#define GED_MMIO_BASE_MEMHP   (GED_MMIO_BASE + 0x100)
+#define GED_MMIO_BASE_REGS    (GED_MMIO_BASE + 0x200)
+#define GED_MMIO_IRQ          9
+
+#define MICROVM_XHCI_BASE     0xfe900000
+#define MICROVM_XHCI_IRQ      10
+
+#define PCIE_MMIO_BASE        0xc0000000
+#define PCIE_MMIO_SIZE        0x20000000
+#define PCIE_ECAM_BASE        0xe0000000
+#define PCIE_ECAM_SIZE        0x10000000
+
+/* Machine type options */
+#define MICROVM_MACHINE_RTC                 "rtc"
+#define MICROVM_MACHINE_PCIE                "pcie"
+#define MICROVM_MACHINE_IOAPIC2             "ioapic2"
+#define MICROVM_MACHINE_ISA_SERIAL          "isa-serial"
+#define MICROVM_MACHINE_OPTION_ROMS         "x-option-roms"
+#define MICROVM_MACHINE_AUTO_KERNEL_CMDLINE "auto-kernel-cmdline"
+
+struct MicrovmMachineClass {
+    X86MachineClass parent;
+    HotplugHandler *(*orig_hotplug_handler)(MachineState *machine,
+                                           DeviceState *dev);
+};
+
+struct MicrovmMachineState {
+    X86MachineState parent;
+
+    /* Machine type options */
+    OnOffAuto rtc;
+    OnOffAuto pcie;
+    OnOffAuto ioapic2;
+    bool isa_serial;
+    bool option_roms;
+    bool auto_kernel_cmdline;
+
+    /* Machine state */
+    uint32_t pcie_irq_base;
+    uint32_t virtio_irq_base;
+    uint32_t virtio_num_transports;
+    bool kernel_cmdline_fixed;
+    Notifier machine_done;
+    Notifier powerdown_req;
+    struct GPEXConfig gpex;
+
+    /* device tree */
+    void *fdt;
+    uint32_t ioapic_phandle[2];
+};
+
+#define TYPE_MICROVM_MACHINE   MACHINE_TYPE_NAME("microvm")
+OBJECT_DECLARE_TYPE(MicrovmMachineState, MicrovmMachineClass, MICROVM_MACHINE)
+
+#endif
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
new file mode 100644 (file)
index 0000000..a10ceea
--- /dev/null
@@ -0,0 +1,333 @@
+#ifndef HW_PC_H
+#define HW_PC_H
+
+#include "qemu/notify.h"
+#include "qapi/qapi-types-common.h"
+#include "qemu/uuid.h"
+#include "hw/boards.h"
+#include "hw/block/fdc.h"
+#include "hw/block/flash.h"
+#include "hw/i386/x86.h"
+
+#include "hw/hotplug.h"
+#include "qom/object.h"
+#include "hw/i386/sgx-epc.h"
+#include "hw/firmware/smbios.h"
+#include "hw/cxl/cxl.h"
+
+#define HPET_INTCAP "hpet-intcap"
+
+/**
+ * PCMachineState:
+ * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling
+ * @boot_cpus: number of present VCPUs
+ */
+typedef struct PCMachineState {
+    /*< private >*/
+    X86MachineState parent_obj;
+
+    /* <public> */
+
+    /* State for other subsystems/APIs: */
+    Notifier machine_done;
+
+    /* Pointers to devices and objects: */
+    PCIBus *bus;
+    BusState *xenbus;
+    I2CBus *smbus;
+    PFlashCFI01 *flash[2];
+    ISADevice *pcspk;
+    DeviceState *iommu;
+
+    /* Configuration options: */
+    uint64_t max_ram_below_4g;
+    OnOffAuto vmport;
+    SmbiosEntryPointType smbios_entry_point_type;
+    const char *south_bridge;
+
+    bool acpi_build_enabled;
+    bool smbus_enabled;
+    bool sata_enabled;
+    bool hpet_enabled;
+    bool i8042_enabled;
+    bool default_bus_bypass_iommu;
+    uint64_t max_fw_size;
+
+    /* ACPI Memory hotplug IO base address */
+    hwaddr memhp_io_base;
+
+    SGXEPCState sgx_epc;
+    CXLState cxl_devices_state;
+} PCMachineState;
+
+#define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
+#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
+#define PC_MACHINE_VMPORT           "vmport"
+#define PC_MACHINE_SMBUS            "smbus"
+#define PC_MACHINE_SATA             "sata"
+#define PC_MACHINE_I8042            "i8042"
+#define PC_MACHINE_MAX_FW_SIZE      "max-fw-size"
+#define PC_MACHINE_SMBIOS_EP        "smbios-entry-point-type"
+
+/**
+ * PCMachineClass:
+ *
+ * Compat fields:
+ *
+ * @enforce_aligned_dimm: check that DIMM's address/size is aligned by
+ *                        backend's alignment value if provided
+ * @acpi_data_size: Size of the chunk of memory at the top of RAM
+ *                  for the BIOS ACPI tables and other BIOS
+ *                  datastructures.
+ * @gigabyte_align: Make sure that guest addresses aligned at
+ *                  1Gbyte boundaries get mapped to host
+ *                  addresses aligned at 1Gbyte boundaries. This
+ *                  way we can use 1GByte pages in the host.
+ *
+ */
+struct PCMachineClass {
+    /*< private >*/
+    X86MachineClass parent_class;
+
+    /*< public >*/
+
+    /* Device configuration: */
+    bool pci_enabled;
+    bool kvmclock_enabled;
+    const char *default_south_bridge;
+
+    /* Compat options: */
+
+    /* Default CPU model version.  See x86_cpu_set_default_version(). */
+    int default_cpu_version;
+
+    /* ACPI compat: */
+    bool has_acpi_build;
+    bool rsdp_in_ram;
+    int legacy_acpi_table_size;
+    unsigned acpi_data_size;
+    int pci_root_uid;
+
+    /* SMBIOS compat: */
+    bool smbios_defaults;
+    bool smbios_legacy_mode;
+    bool smbios_uuid_encoded;
+    SmbiosEntryPointType default_smbios_ep_type;
+
+    /* RAM / address space compat: */
+    bool gigabyte_align;
+    bool has_reserved_memory;
+    bool enforce_aligned_dimm;
+    bool broken_reserved_end;
+    bool enforce_amd_1tb_hole;
+
+    /* generate legacy CPU hotplug AML */
+    bool legacy_cpu_hotplug;
+
+    /* use PVH to load kernels that support this feature */
+    bool pvh_enabled;
+
+    /* create kvmclock device even when KVM PV features are not exposed */
+    bool kvmclock_create_always;
+
+    /* resizable acpi blob compat */
+    bool resizable_acpi_blob;
+
+    /*
+     * whether the machine type implements broken 32-bit address space bound
+     * check for memory.
+     */
+    bool broken_32bit_mem_addr_check;
+};
+
+#define TYPE_PC_MACHINE "generic-pc-machine"
+OBJECT_DECLARE_TYPE(PCMachineState, PCMachineClass, PC_MACHINE)
+
+/* ioapic.c */
+
+GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled);
+
+/* pc.c */
+extern int fd_bootchk;
+
+void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
+
+void pc_guest_info_init(PCMachineState *pcms);
+
+#define PCI_HOST_PROP_RAM_MEM          "ram-mem"
+#define PCI_HOST_PROP_PCI_MEM          "pci-mem"
+#define PCI_HOST_PROP_SYSTEM_MEM       "system-mem"
+#define PCI_HOST_PROP_IO_MEM           "io-mem"
+#define PCI_HOST_PROP_PCI_HOLE_START   "pci-hole-start"
+#define PCI_HOST_PROP_PCI_HOLE_END     "pci-hole-end"
+#define PCI_HOST_PROP_PCI_HOLE64_START "pci-hole64-start"
+#define PCI_HOST_PROP_PCI_HOLE64_END   "pci-hole64-end"
+#define PCI_HOST_PROP_PCI_HOLE64_SIZE  "pci-hole64-size"
+#define PCI_HOST_BELOW_4G_MEM_SIZE     "below-4g-mem-size"
+#define PCI_HOST_ABOVE_4G_MEM_SIZE     "above-4g-mem-size"
+
+
+void pc_pci_as_mapping_init(MemoryRegion *system_memory,
+                            MemoryRegion *pci_address_space);
+
+void xen_load_linux(PCMachineState *pcms);
+void pc_memory_init(PCMachineState *pcms,
+                    MemoryRegion *system_memory,
+                    MemoryRegion *rom_memory,
+                    uint64_t pci_hole64_size);
+uint64_t pc_pci_hole64_start(void);
+DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
+void pc_basic_device_init(struct PCMachineState *pcms,
+                          ISABus *isa_bus, qemu_irq *gsi,
+                          ISADevice *rtc_state,
+                          bool create_fdctrl,
+                          uint32_t hpet_irqs);
+void pc_cmos_init(PCMachineState *pcms,
+                  BusState *ide0, BusState *ide1,
+                  ISADevice *s);
+void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus,
+                 BusState *xen_bus);
+
+void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs);
+
+/* port92.c */
+#define PORT92_A20_LINE "a20"
+
+#define TYPE_PORT92 "port92"
+
+/* pc_sysfw.c */
+void pc_system_flash_create(PCMachineState *pcms);
+void pc_system_flash_cleanup_unused(PCMachineState *pcms);
+void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory);
+bool pc_system_ovmf_table_find(const char *entry, uint8_t **data,
+                               int *data_len);
+void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size);
+
+/* hw/i386/acpi-common.c */
+void pc_madt_cpu_entry(int uid, const CPUArchIdList *apic_ids,
+                       GArray *entry, bool force_enabled);
+
+/* sgx.c */
+void pc_machine_init_sgx_epc(PCMachineState *pcms);
+
+extern GlobalProperty pc_compat_8_1[];
+extern const size_t pc_compat_8_1_len;
+
+extern GlobalProperty pc_compat_8_0[];
+extern const size_t pc_compat_8_0_len;
+
+extern GlobalProperty pc_compat_7_2[];
+extern const size_t pc_compat_7_2_len;
+
+extern GlobalProperty pc_compat_7_1[];
+extern const size_t pc_compat_7_1_len;
+
+extern GlobalProperty pc_compat_7_0[];
+extern const size_t pc_compat_7_0_len;
+
+extern GlobalProperty pc_compat_6_2[];
+extern const size_t pc_compat_6_2_len;
+
+extern GlobalProperty pc_compat_6_1[];
+extern const size_t pc_compat_6_1_len;
+
+extern GlobalProperty pc_compat_6_0[];
+extern const size_t pc_compat_6_0_len;
+
+extern GlobalProperty pc_compat_5_2[];
+extern const size_t pc_compat_5_2_len;
+
+extern GlobalProperty pc_compat_5_1[];
+extern const size_t pc_compat_5_1_len;
+
+extern GlobalProperty pc_compat_5_0[];
+extern const size_t pc_compat_5_0_len;
+
+extern GlobalProperty pc_compat_4_2[];
+extern const size_t pc_compat_4_2_len;
+
+extern GlobalProperty pc_compat_4_1[];
+extern const size_t pc_compat_4_1_len;
+
+extern GlobalProperty pc_compat_4_0[];
+extern const size_t pc_compat_4_0_len;
+
+extern GlobalProperty pc_compat_3_1[];
+extern const size_t pc_compat_3_1_len;
+
+extern GlobalProperty pc_compat_3_0[];
+extern const size_t pc_compat_3_0_len;
+
+extern GlobalProperty pc_compat_2_12[];
+extern const size_t pc_compat_2_12_len;
+
+extern GlobalProperty pc_compat_2_11[];
+extern const size_t pc_compat_2_11_len;
+
+extern GlobalProperty pc_compat_2_10[];
+extern const size_t pc_compat_2_10_len;
+
+extern GlobalProperty pc_compat_2_9[];
+extern const size_t pc_compat_2_9_len;
+
+extern GlobalProperty pc_compat_2_8[];
+extern const size_t pc_compat_2_8_len;
+
+extern GlobalProperty pc_compat_2_7[];
+extern const size_t pc_compat_2_7_len;
+
+extern GlobalProperty pc_compat_2_6[];
+extern const size_t pc_compat_2_6_len;
+
+extern GlobalProperty pc_compat_2_5[];
+extern const size_t pc_compat_2_5_len;
+
+extern GlobalProperty pc_compat_2_4[];
+extern const size_t pc_compat_2_4_len;
+
+extern GlobalProperty pc_compat_2_3[];
+extern const size_t pc_compat_2_3_len;
+
+extern GlobalProperty pc_compat_2_2[];
+extern const size_t pc_compat_2_2_len;
+
+extern GlobalProperty pc_compat_2_1[];
+extern const size_t pc_compat_2_1_len;
+
+extern GlobalProperty pc_compat_2_0[];
+extern const size_t pc_compat_2_0_len;
+
+extern GlobalProperty pc_compat_1_7[];
+extern const size_t pc_compat_1_7_len;
+
+extern GlobalProperty pc_compat_1_6[];
+extern const size_t pc_compat_1_6_len;
+
+extern GlobalProperty pc_compat_1_5[];
+extern const size_t pc_compat_1_5_len;
+
+extern GlobalProperty pc_compat_1_4[];
+extern const size_t pc_compat_1_4_len;
+
+int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
+
+#define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \
+    static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \
+    { \
+        MachineClass *mc = MACHINE_CLASS(oc); \
+        optsfn(mc); \
+        mc->init = initfn; \
+        mc->kvm_type = pc_machine_kvm_type; \
+    } \
+    static const TypeInfo pc_machine_type_##suffix = { \
+        .name       = namestr TYPE_MACHINE_SUFFIX, \
+        .parent     = TYPE_PC_MACHINE, \
+        .class_init = pc_machine_##suffix##_class_init, \
+    }; \
+    static void pc_machine_init_##suffix(void) \
+    { \
+        type_register(&pc_machine_type_##suffix); \
+    } \
+    type_init(pc_machine_init_##suffix)
+
+#endif
diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h
new file mode 100644 (file)
index 0000000..3e00efd
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * SGX EPC device
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Authors:
+ *   Sean Christopherson <sean.j.christopherson@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_SGX_EPC_H
+#define QEMU_SGX_EPC_H
+
+#include "hw/qdev-core.h"
+#include "hw/i386/hostmem-epc.h"
+
+#define TYPE_SGX_EPC "sgx-epc"
+#define SGX_EPC(obj) \
+    OBJECT_CHECK(SGXEPCDevice, (obj), TYPE_SGX_EPC)
+#define SGX_EPC_CLASS(oc) \
+    OBJECT_CLASS_CHECK(SGXEPCDeviceClass, (oc), TYPE_SGX_EPC)
+#define SGX_EPC_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(SGXEPCDeviceClass, (obj), TYPE_SGX_EPC)
+
+#define SGX_EPC_ADDR_PROP "addr"
+#define SGX_EPC_SIZE_PROP "size"
+#define SGX_EPC_MEMDEV_PROP "memdev"
+#define SGX_EPC_NUMA_NODE_PROP "node"
+
+/**
+ * SGXEPCDevice:
+ * @addr: starting guest physical address, where @SGXEPCDevice is mapped.
+ *         Default value: 0, means that address is auto-allocated.
+ * @hostmem: host memory backend providing memory for @SGXEPCDevice
+ */
+typedef struct SGXEPCDevice {
+    /* private */
+    DeviceState parent_obj;
+
+    /* public */
+    uint64_t addr;
+    uint32_t node;
+    HostMemoryBackendEpc *hostmem;
+} SGXEPCDevice;
+
+/*
+ * @base: address in guest physical address space where EPC regions start
+ * @mr: address space container for memory devices
+ */
+typedef struct SGXEPCState {
+    uint64_t base;
+    uint64_t size;
+
+    MemoryRegion mr;
+
+    struct SGXEPCDevice **sections;
+    int nr_sections;
+} SGXEPCState;
+
+bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size);
+void sgx_epc_build_srat(GArray *table_data);
+
+static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc)
+{
+    assert(sgx_epc != NULL && sgx_epc->base >= 0x100000000ULL);
+
+    return sgx_epc->base + sgx_epc->size;
+}
+
+#endif
diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h
new file mode 100644 (file)
index 0000000..d4eeb7a
--- /dev/null
@@ -0,0 +1,171 @@
+/*
+ *  x86 CPU topology data structures and functions
+ *
+ *  Copyright (c) 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef HW_I386_TOPOLOGY_H
+#define HW_I386_TOPOLOGY_H
+
+/*
+ * This file implements the APIC-ID-based CPU topology enumeration logic,
+ * documented at the following document:
+ *   Intel® 64 Architecture Processor Topology Enumeration
+ *   http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/
+ *
+ * This code should be compatible with AMD's "Extended Method" described at:
+ *   AMD CPUID Specification (Publication #25481)
+ *   Section 3: Multiple Core Calculation
+ * as long as:
+ *  nr_threads is set to 1;
+ *  OFFSET_IDX is assumed to be 0;
+ *  CPUID Fn8000_0008_ECX[ApicIdCoreIdSize[3:0]] is set to apicid_core_width().
+ */
+
+
+#include "qemu/bitops.h"
+
+/*
+ * APIC IDs can be 32-bit, but beware: APIC IDs > 255 require x2APIC support
+ */
+typedef uint32_t apic_id_t;
+
+typedef struct X86CPUTopoIDs {
+    unsigned pkg_id;
+    unsigned die_id;
+    unsigned core_id;
+    unsigned smt_id;
+} X86CPUTopoIDs;
+
+typedef struct X86CPUTopoInfo {
+    unsigned dies_per_pkg;
+    unsigned cores_per_die;
+    unsigned threads_per_core;
+} X86CPUTopoInfo;
+
+/* Return the bit width needed for 'count' IDs */
+static unsigned apicid_bitwidth_for_count(unsigned count)
+{
+    g_assert(count >= 1);
+    count -= 1;
+    return count ? 32 - clz32(count) : 0;
+}
+
+/* Bit width of the SMT_ID (thread ID) field on the APIC ID */
+static inline unsigned apicid_smt_width(X86CPUTopoInfo *topo_info)
+{
+    return apicid_bitwidth_for_count(topo_info->threads_per_core);
+}
+
+/* Bit width of the Core_ID field */
+static inline unsigned apicid_core_width(X86CPUTopoInfo *topo_info)
+{
+    return apicid_bitwidth_for_count(topo_info->cores_per_die);
+}
+
+/* Bit width of the Die_ID field */
+static inline unsigned apicid_die_width(X86CPUTopoInfo *topo_info)
+{
+    return apicid_bitwidth_for_count(topo_info->dies_per_pkg);
+}
+
+/* Bit offset of the Core_ID field */
+static inline unsigned apicid_core_offset(X86CPUTopoInfo *topo_info)
+{
+    return apicid_smt_width(topo_info);
+}
+
+/* Bit offset of the Die_ID field */
+static inline unsigned apicid_die_offset(X86CPUTopoInfo *topo_info)
+{
+    return apicid_core_offset(topo_info) + apicid_core_width(topo_info);
+}
+
+/* Bit offset of the Pkg_ID (socket ID) field */
+static inline unsigned apicid_pkg_offset(X86CPUTopoInfo *topo_info)
+{
+    return apicid_die_offset(topo_info) + apicid_die_width(topo_info);
+}
+
+/*
+ * Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID
+ *
+ * The caller must make sure core_id < nr_cores and smt_id < nr_threads.
+ */
+static inline apic_id_t x86_apicid_from_topo_ids(X86CPUTopoInfo *topo_info,
+                                                 const X86CPUTopoIDs *topo_ids)
+{
+    return (topo_ids->pkg_id  << apicid_pkg_offset(topo_info)) |
+           (topo_ids->die_id  << apicid_die_offset(topo_info)) |
+           (topo_ids->core_id << apicid_core_offset(topo_info)) |
+           topo_ids->smt_id;
+}
+
+/*
+ * Calculate thread/core/package IDs for a specific topology,
+ * based on (contiguous) CPU index
+ */
+static inline void x86_topo_ids_from_idx(X86CPUTopoInfo *topo_info,
+                                         unsigned cpu_index,
+                                         X86CPUTopoIDs *topo_ids)
+{
+    unsigned nr_dies = topo_info->dies_per_pkg;
+    unsigned nr_cores = topo_info->cores_per_die;
+    unsigned nr_threads = topo_info->threads_per_core;
+
+    topo_ids->pkg_id = cpu_index / (nr_dies * nr_cores * nr_threads);
+    topo_ids->die_id = cpu_index / (nr_cores * nr_threads) % nr_dies;
+    topo_ids->core_id = cpu_index / nr_threads % nr_cores;
+    topo_ids->smt_id = cpu_index % nr_threads;
+}
+
+/*
+ * Calculate thread/core/package IDs for a specific topology,
+ * based on APIC ID
+ */
+static inline void x86_topo_ids_from_apicid(apic_id_t apicid,
+                                            X86CPUTopoInfo *topo_info,
+                                            X86CPUTopoIDs *topo_ids)
+{
+    topo_ids->smt_id = apicid &
+            ~(0xFFFFFFFFUL << apicid_smt_width(topo_info));
+    topo_ids->core_id =
+            (apicid >> apicid_core_offset(topo_info)) &
+            ~(0xFFFFFFFFUL << apicid_core_width(topo_info));
+    topo_ids->die_id =
+            (apicid >> apicid_die_offset(topo_info)) &
+            ~(0xFFFFFFFFUL << apicid_die_width(topo_info));
+    topo_ids->pkg_id = apicid >> apicid_pkg_offset(topo_info);
+}
+
+/*
+ * Make APIC ID for the CPU 'cpu_index'
+ *
+ * 'cpu_index' is a sequential, contiguous ID for the CPU.
+ */
+static inline apic_id_t x86_apicid_from_cpu_idx(X86CPUTopoInfo *topo_info,
+                                                unsigned cpu_index)
+{
+    X86CPUTopoIDs topo_ids;
+    x86_topo_ids_from_idx(topo_info, cpu_index, &topo_ids);
+    return x86_apicid_from_topo_ids(topo_info, &topo_ids);
+}
+
+#endif /* HW_I386_TOPOLOGY_H */
diff --git a/include/hw/i386/vmport.h b/include/hw/i386/vmport.h
new file mode 100644 (file)
index 0000000..8f5e27c
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef HW_VMPORT_H
+#define HW_VMPORT_H
+
+#include "hw/isa/isa.h"
+
+#define TYPE_VMPORT "vmport"
+typedef uint32_t VMPortReadFunc(void *opaque, uint32_t address);
+
+typedef enum {
+    VMPORT_CMD_GETVERSION       = 10,
+    VMPORT_CMD_GETBIOSUUID      = 19,
+    VMPORT_CMD_GETRAMSIZE       = 20,
+    VMPORT_CMD_VMMOUSE_DATA     = 39,
+    VMPORT_CMD_VMMOUSE_STATUS   = 40,
+    VMPORT_CMD_VMMOUSE_COMMAND  = 41,
+    VMPORT_CMD_GETHZ            = 45,
+    VMPORT_CMD_GET_VCPU_INFO    = 68,
+    VMPORT_ENTRIES
+} VMPortCommand;
+
+static inline void vmport_init(ISABus *bus)
+{
+    isa_create_simple(bus, TYPE_VMPORT);
+}
+
+void vmport_register(VMPortCommand command, VMPortReadFunc *func, void *opaque);
+
+#endif
diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
new file mode 100644 (file)
index 0000000..bfd2164
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ * Common IOMMU interface for X86 platform
+ *
+ * Copyright (C) 2016 Peter Xu, Red Hat <peterx@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_I386_X86_IOMMU_H
+#define HW_I386_X86_IOMMU_H
+
+#include "hw/sysbus.h"
+#include "hw/pci/msi.h"
+#include "qom/object.h"
+
+#define  TYPE_X86_IOMMU_DEVICE  ("x86-iommu")
+OBJECT_DECLARE_TYPE(X86IOMMUState, X86IOMMUClass, X86_IOMMU_DEVICE)
+
+#define X86_IOMMU_SID_INVALID             (0xffff)
+
+typedef struct X86IOMMUIrq X86IOMMUIrq;
+typedef struct X86IOMMU_MSIMessage X86IOMMU_MSIMessage;
+
+struct X86IOMMUClass {
+    SysBusDeviceClass parent;
+    /* Intel/AMD specific realize() hook */
+    DeviceRealize realize;
+    /* MSI-based interrupt remapping */
+    int (*int_remap)(X86IOMMUState *iommu, MSIMessage *src,
+                     MSIMessage *dst, uint16_t sid);
+};
+
+/**
+ * iec_notify_fn - IEC (Interrupt Entry Cache) notifier hook,
+ *                 triggered when IR invalidation happens.
+ * @private: private data
+ * @global: whether this is a global IEC invalidation
+ * @index: IRTE index to invalidate (start from)
+ * @mask: invalidation mask
+ */
+typedef void (*iec_notify_fn)(void *private, bool global,
+                              uint32_t index, uint32_t mask);
+
+struct IEC_Notifier {
+    iec_notify_fn iec_notify;
+    void *private;
+    QLIST_ENTRY(IEC_Notifier) list;
+};
+typedef struct IEC_Notifier IEC_Notifier;
+
+struct X86IOMMUState {
+    SysBusDevice busdev;
+    OnOffAuto intr_supported;   /* Whether vIOMMU supports IR */
+    bool dt_supported;          /* Whether vIOMMU supports DT */
+    bool pt_supported;          /* Whether vIOMMU supports pass-through */
+    QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
+};
+
+bool x86_iommu_ir_supported(X86IOMMUState *s);
+
+/* Generic IRQ entry information when interrupt remapping is enabled */
+struct X86IOMMUIrq {
+    /* Used by both IOAPIC/MSI interrupt remapping */
+    uint8_t trigger_mode;
+    uint8_t vector;
+    uint8_t delivery_mode;
+    uint32_t dest;
+    uint8_t dest_mode;
+
+    /* only used by MSI interrupt remapping */
+    uint8_t redir_hint;
+    uint8_t msi_addr_last_bits;
+};
+
+struct X86IOMMU_MSIMessage {
+    union {
+        struct {
+#if HOST_BIG_ENDIAN
+            uint64_t __addr_hi:32;
+            uint64_t __addr_head:12; /* 0xfee */
+            uint64_t dest:8;
+            uint64_t __reserved:8;
+            uint64_t redir_hint:1;
+            uint64_t dest_mode:1;
+            uint64_t __not_used:2;
+#else
+            uint64_t __not_used:2;
+            uint64_t dest_mode:1;
+            uint64_t redir_hint:1;
+            uint64_t __reserved:8;
+            uint64_t dest:8;
+            uint64_t __addr_head:12; /* 0xfee */
+            uint64_t __addr_hi:32;
+#endif
+        } QEMU_PACKED;
+        uint64_t msi_addr;
+    };
+    union {
+        struct {
+#if HOST_BIG_ENDIAN
+            uint32_t __resved1:16;
+            uint32_t trigger_mode:1;
+            uint32_t level:1;
+            uint32_t __resved:3;
+            uint32_t delivery_mode:3;
+            uint32_t vector:8;
+#else
+            uint32_t vector:8;
+            uint32_t delivery_mode:3;
+            uint32_t __resved:3;
+            uint32_t level:1;
+            uint32_t trigger_mode:1;
+            uint32_t __resved1:16;
+#endif
+        } QEMU_PACKED;
+        uint32_t msi_data;
+    };
+};
+
+/**
+ * x86_iommu_get_default - get default IOMMU device
+ * @return: pointer to default IOMMU device
+ */
+X86IOMMUState *x86_iommu_get_default(void);
+
+/**
+ * x86_iommu_iec_register_notifier - register IEC (Interrupt Entry
+ *                                   Cache) notifiers
+ * @iommu: IOMMU device to register
+ * @fn: IEC notifier hook function
+ * @data: notifier private data
+ */
+void x86_iommu_iec_register_notifier(X86IOMMUState *iommu,
+                                     iec_notify_fn fn, void *data);
+
+/**
+ * x86_iommu_iec_notify_all - Notify IEC invalidations
+ * @iommu: IOMMU device that sends the notification
+ * @global: whether this is a global invalidation. If true, @index
+ *          and @mask are undefined.
+ * @index: starting index of interrupt entry to invalidate
+ * @mask: index mask for the invalidation
+ */
+void x86_iommu_iec_notify_all(X86IOMMUState *iommu, bool global,
+                              uint32_t index, uint32_t mask);
+
+/**
+ * x86_iommu_irq_to_msi_message - Populate one MSIMessage from X86IOMMUIrq
+ * @X86IOMMUIrq: The IRQ information
+ * @out: Output MSI message
+ */
+void x86_iommu_irq_to_msi_message(X86IOMMUIrq *irq, MSIMessage *out);
+#endif
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
new file mode 100644 (file)
index 0000000..da19ae1
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2019 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_I386_X86_H
+#define HW_I386_X86_H
+
+#include "exec/hwaddr.h"
+
+#include "hw/boards.h"
+#include "hw/intc/ioapic.h"
+#include "hw/isa/isa.h"
+#include "qom/object.h"
+
+struct X86MachineClass {
+    /*< private >*/
+    MachineClass parent;
+
+    /*< public >*/
+
+    /* TSC rate migration: */
+    bool save_tsc_khz;
+    /* use DMA capable linuxboot option rom */
+    bool fwcfg_dma_enabled;
+};
+
+struct X86MachineState {
+    /*< private >*/
+    MachineState parent;
+
+    /*< public >*/
+
+    /* Pointers to devices and objects: */
+    ISADevice *rtc;
+    FWCfgState *fw_cfg;
+    qemu_irq *gsi;
+    DeviceState *ioapic2;
+    GMappedFile *initrd_mapped_file;
+    HotplugHandler *acpi_dev;
+
+    /* RAM information (sizes, addresses, configuration): */
+    ram_addr_t below_4g_mem_size, above_4g_mem_size;
+
+    /* Start address of the initial RAM above 4G */
+    uint64_t above_4g_mem_start;
+
+    /* CPU and apic information: */
+    bool apic_xrupt_override;
+    unsigned pci_irq_mask;
+    unsigned apic_id_limit;
+    uint16_t boot_cpus;
+    SgxEPCList *sgx_epc_list;
+
+    OnOffAuto smm;
+    OnOffAuto acpi;
+    OnOffAuto pit;
+    OnOffAuto pic;
+
+    char *oem_id;
+    char *oem_table_id;
+    /*
+     * Address space used by IOAPIC device. All IOAPIC interrupts
+     * will be translated to MSI messages in the address space.
+     */
+    AddressSpace *ioapic_as;
+
+    /*
+     * Ratelimit enforced on detected bus locks in guest.
+     * The default value of the bus_lock_ratelimit is 0 per second,
+     * which means no limitation on the guest's bus locks.
+     */
+    uint64_t bus_lock_ratelimit;
+};
+
+#define X86_MACHINE_SMM              "smm"
+#define X86_MACHINE_ACPI             "acpi"
+#define X86_MACHINE_PIT              "pit"
+#define X86_MACHINE_PIC              "pic"
+#define X86_MACHINE_OEM_ID           "x-oem-id"
+#define X86_MACHINE_OEM_TABLE_ID     "x-oem-table-id"
+#define X86_MACHINE_BUS_LOCK_RATELIMIT  "bus-lock-ratelimit"
+
+#define TYPE_X86_MACHINE   MACHINE_TYPE_NAME("x86")
+OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE)
+
+uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms,
+                                    unsigned int cpu_index);
+
+void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp);
+void x86_cpus_init(X86MachineState *pcms, int default_cpu_version);
+CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms,
+                                             unsigned cpu_index);
+int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx);
+const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms);
+CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx);
+void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count);
+void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
+                      DeviceState *dev, Error **errp);
+void x86_cpu_plug(HotplugHandler *hotplug_dev,
+                  DeviceState *dev, Error **errp);
+void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
+                               DeviceState *dev, Error **errp);
+void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev,
+                       DeviceState *dev, Error **errp);
+
+void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
+                       MemoryRegion *rom_memory, bool isapc_ram_fw);
+
+void x86_load_linux(X86MachineState *x86ms,
+                    FWCfgState *fw_cfg,
+                    int acpi_data_size,
+                    bool pvh_enabled);
+
+bool x86_machine_is_smm_enabled(const X86MachineState *x86ms);
+bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms);
+
+/* Global System Interrupts */
+
+#define ACPI_BUILD_PCI_IRQS ((1<<5) | (1<<9) | (1<<10) | (1<<11))
+
+typedef struct GSIState {
+    qemu_irq i8259_irq[ISA_NUM_IRQS];
+    qemu_irq ioapic_irq[IOAPIC_NUM_PINS];
+    qemu_irq ioapic2_irq[IOAPIC_NUM_PINS];
+} GSIState;
+
+qemu_irq x86_allocate_cpu_irq(void);
+void gsi_handler(void *opaque, int n, int level);
+void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name);
+DeviceState *ioapic_init_secondary(GSIState *gsi_state);
+
+/* pc_sysfw.c */
+void x86_firmware_configure(void *ptr, int size);
+
+#endif
diff --git a/include/hw/i386/xen_arch_hvm.h b/include/hw/i386/xen_arch_hvm.h
new file mode 100644 (file)
index 0000000..1000f8f
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef HW_XEN_ARCH_I386_HVM_H
+#define HW_XEN_ARCH_I386_HVM_H
+
+#include <xen/hvm/ioreq.h>
+#include "hw/xen/xen-hvm-common.h"
+
+void arch_handle_ioreq(XenIOState *state, ioreq_t *req);
+void arch_xen_set_memory(XenIOState *state,
+                         MemoryRegionSection *section,
+                         bool add);
+#endif
index c5ce5da4f478d31f61297eb981fc711932aab7ac..db963bdb770697770aa0720d4f8594f4a6f21e6f 100644 (file)
@@ -1,23 +1,8 @@
 #ifndef HW_IDE_H
 #define HW_IDE_H
 
-#include "hw/isa/isa.h"
 #include "exec/memory.h"
 
-/* ide-isa.c */
-ISADevice *isa_ide_init(ISABus *bus, int iobase, int iobase2, int isairq,
-                        DriveInfo *hd0, DriveInfo *hd1);
-
-/* ide-pci.c */
-int pci_piix3_xen_ide_unplug(DeviceState *dev, bool aux);
-
-/* ide-mmio.c */
-void mmio_ide_init_drives(DeviceState *dev, DriveInfo *hd0, DriveInfo *hd1);
-
-int ide_get_geometry(BusState *bus, int unit,
-                     int16_t *cyls, int8_t *heads, int8_t *secs);
-int ide_get_bios_chs_trans(BusState *bus, int unit);
-
 /* ide/core.c */
 void ide_drive_get(DriveInfo **hd, int max_bus);
 
diff --git a/include/hw/ide/ahci.h b/include/hw/ide/ahci.h
new file mode 100644 (file)
index 0000000..210e5e7
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * QEMU AHCI Emulation
+ *
+ * Copyright (c) 2010 qiaochong@loongson.cn
+ * Copyright (c) 2010 Roland Elek <elek.roland@gmail.com>
+ * Copyright (c) 2010 Sebastian Herbszt <herbszt@gmx.de>
+ * Copyright (c) 2010 Alexander Graf <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef HW_IDE_AHCI_H
+#define HW_IDE_AHCI_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+typedef struct AHCIDevice AHCIDevice;
+
+typedef struct AHCIControlRegs {
+    uint32_t    cap;
+    uint32_t    ghc;
+    uint32_t    irqstatus;
+    uint32_t    impl;
+    uint32_t    version;
+} AHCIControlRegs;
+
+typedef struct AHCIState {
+    DeviceState *container;
+
+    AHCIDevice *dev;
+    AHCIControlRegs control_regs;
+    MemoryRegion mem;
+    MemoryRegion idp;       /* Index-Data Pair I/O port space */
+    unsigned idp_offset;    /* Offset of index in I/O port space */
+    uint32_t idp_index;     /* Current IDP index */
+    int32_t ports;
+    qemu_irq irq;
+    AddressSpace *as;
+} AHCIState;
+
+
+#define TYPE_ICH9_AHCI "ich9-ahci"
+OBJECT_DECLARE_SIMPLE_TYPE(AHCIPCIState, ICH9_AHCI)
+
+int32_t ahci_get_num_ports(PCIDevice *dev);
+void ahci_ide_create_devs(PCIDevice *dev, DriveInfo **hd);
+
+#define TYPE_SYSBUS_AHCI "sysbus-ahci"
+OBJECT_DECLARE_SIMPLE_TYPE(SysbusAHCIState, SYSBUS_AHCI)
+
+struct SysbusAHCIState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    AHCIState ahci;
+    uint32_t num_ports;
+};
+
+#define TYPE_ALLWINNER_AHCI "allwinner-ahci"
+OBJECT_DECLARE_SIMPLE_TYPE(AllwinnerAHCIState, ALLWINNER_AHCI)
+
+#define ALLWINNER_AHCI_MMIO_OFF  0x80
+#define ALLWINNER_AHCI_MMIO_SIZE 0x80
+
+struct AllwinnerAHCIState {
+    /*< private >*/
+    SysbusAHCIState parent_obj;
+    /*< public >*/
+
+    MemoryRegion mmio;
+    uint32_t regs[ALLWINNER_AHCI_MMIO_SIZE/4];
+};
+
+#endif /* HW_IDE_AHCI_H */
diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h
new file mode 100644 (file)
index 0000000..3bdcc75
--- /dev/null
@@ -0,0 +1,659 @@
+#ifndef HW_IDE_INTERNAL_H
+#define HW_IDE_INTERNAL_H
+
+/*
+ * QEMU IDE Emulation -- internal header file
+ * only files in hw/ide/ are supposed to include this file.
+ * non-internal declarations are in hw/ide.h
+ */
+
+#include "hw/ide.h"
+#include "sysemu/dma.h"
+#include "hw/block/block.h"
+#include "exec/ioport.h"
+
+/* debug IDE devices */
+#define USE_DMA_CDROM
+#include "qom/object.h"
+
+typedef struct IDEDevice IDEDevice;
+typedef struct IDEState IDEState;
+typedef struct IDEDMA IDEDMA;
+typedef struct IDEDMAOps IDEDMAOps;
+
+#define TYPE_IDE_BUS "IDE"
+OBJECT_DECLARE_SIMPLE_TYPE(IDEBus, IDE_BUS)
+
+#define MAX_IDE_DEVS 2
+
+/* Device/Head ("select") Register */
+#define ATA_DEV_SELECT          0x10
+/* ATA1,3: Defined as '1'.
+ * ATA2:   Reserved.
+ * ATA3-7: obsolete. */
+#define ATA_DEV_ALWAYS_ON       0xA0
+#define ATA_DEV_LBA             0x40
+#define ATA_DEV_LBA_MSB         0x0F  /* LBA 24:27 */
+#define ATA_DEV_HS              0x0F  /* HS 3:0 */
+
+
+/* Bits of HD_STATUS */
+#define ERR_STAT                0x01
+#define INDEX_STAT              0x02
+#define ECC_STAT                0x04    /* Corrected error */
+#define DRQ_STAT                0x08
+#define SEEK_STAT               0x10
+#define SRV_STAT                0x10
+#define WRERR_STAT              0x20
+#define READY_STAT              0x40
+#define BUSY_STAT               0x80
+
+/* Bits for HD_ERROR */
+#define MARK_ERR                0x01    /* Bad address mark */
+#define TRK0_ERR                0x02    /* couldn't find track 0 */
+#define ABRT_ERR                0x04    /* Command aborted */
+#define MCR_ERR                 0x08    /* media change request */
+#define ID_ERR                  0x10    /* ID field not found */
+#define MC_ERR                  0x20    /* media changed */
+#define ECC_ERR                 0x40    /* Uncorrectable ECC error */
+#define BBD_ERR                 0x80    /* pre-EIDE meaning:  block marked bad */
+#define ICRC_ERR                0x80    /* new meaning:  CRC error during transfer */
+
+/* Bits of HD_NSECTOR */
+#define CD                      0x01
+#define IO                      0x02
+#define REL                     0x04
+#define TAG_MASK                0xf8
+
+/* Bits of Device Control register */
+#define IDE_CTRL_HOB            0x80
+#define IDE_CTRL_RESET          0x04
+#define IDE_CTRL_DISABLE_IRQ    0x02
+
+/* ACS-2 T13/2015-D Table B.2 Command codes */
+#define WIN_NOP                         0x00
+/* reserved                             0x01..0x02 */
+#define CFA_REQ_EXT_ERROR_CODE          0x03 /* CFA Request Extended Error Code */
+/* reserved                             0x04..0x05 */
+#define WIN_DSM                         0x06
+/* reserved                             0x07 */
+#define WIN_DEVICE_RESET                0x08
+/* reserved                             0x09..0x0a */
+/* REQUEST SENSE DATA EXT               0x0B */
+/* reserved                             0x0C..0x0F */
+#define WIN_RECAL                       0x10 /* obsolete since ATA4 */
+/* obsolete since ATA3, retired in ATA4 0x11..0x1F */
+#define WIN_READ                        0x20 /* 28-Bit */
+#define WIN_READ_ONCE                   0x21 /* 28-Bit w/o retries, obsolete since ATA5 */
+/* obsolete since ATA4                  0x22..0x23 */
+#define WIN_READ_EXT                    0x24 /* 48-Bit */
+#define WIN_READDMA_EXT                 0x25 /* 48-Bit */
+#define WIN_READDMA_QUEUED_EXT          0x26 /* 48-Bit, obsolete since ACS2 */
+#define WIN_READ_NATIVE_MAX_EXT         0x27 /* 48-Bit */
+/* reserved                             0x28 */
+#define WIN_MULTREAD_EXT                0x29 /* 48-Bit */
+/* READ STREAM DMA EXT                  0x2A */
+/* READ STREAM EXT                      0x2B */
+/* reserved                             0x2C..0x2E */
+/* READ LOG EXT                         0x2F */
+#define WIN_WRITE                       0x30 /* 28-Bit */
+#define WIN_WRITE_ONCE                  0x31 /* 28-Bit w/o retries, obsolete since ATA5 */
+/* obsolete since ATA4                  0x32..0x33 */
+#define WIN_WRITE_EXT                   0x34 /* 48-Bit */
+#define WIN_WRITEDMA_EXT                0x35 /* 48-Bit */
+#define WIN_WRITEDMA_QUEUED_EXT         0x36 /* 48-Bit */
+#define WIN_SET_MAX_EXT                 0x37 /* 48-Bit, obsolete since ACS2 */
+#define WIN_SET_MAX_EXT                 0x37 /* 48-Bit */
+#define CFA_WRITE_SECT_WO_ERASE         0x38 /* CFA Write Sectors without erase */
+#define WIN_MULTWRITE_EXT               0x39 /* 48-Bit */
+/* WRITE STREAM DMA EXT                 0x3A */
+/* WRITE STREAM EXT                     0x3B */
+#define WIN_WRITE_VERIFY                0x3C /* 28-Bit, obsolete since ATA4 */
+/* WRITE DMA FUA EXT                    0x3D */
+/* obsolete since ACS2                  0x3E */
+/* WRITE LOG EXT                        0x3F */
+#define WIN_VERIFY                      0x40 /* 28-Bit - Read Verify Sectors */
+#define WIN_VERIFY_ONCE                 0x41 /* 28-Bit - w/o retries, obsolete since ATA5 */
+#define WIN_VERIFY_EXT                  0x42 /* 48-Bit */
+/* reserved                             0x43..0x44 */
+/* WRITE UNCORRECTABLE EXT              0x45 */
+/* reserved                             0x46 */
+/* READ LOG DMA EXT                     0x47 */
+/* reserved                             0x48..0x4F */
+/* obsolete since ATA4                  0x50 */
+/* CONFIGURE STREAM                     0x51 */
+/* reserved                             0x52..0x56 */
+/* WRITE LOG DMA EXT                    0x57 */
+/* reserved                             0x58..0x5A */
+/* TRUSTED NON DATA                     0x5B */
+/* TRUSTED RECEIVE                      0x5C */
+/* TRUSTED RECEIVE DMA                  0x5D */
+/* TRUSTED SEND                         0x5E */
+/* TRUSTED SEND DMA                     0x5F */
+/* READ FPDMA QUEUED                    0x60 */
+/* WRITE FPDMA QUEUED                   0x61 */
+/* reserved                             0x62->0x6F */
+#define WIN_SEEK                        0x70 /* obsolete since ATA7 */
+/* reserved                             0x71-0x7F */
+/* vendor specific                      0x80-0x86 */
+#define CFA_TRANSLATE_SECTOR            0x87 /* CFA Translate Sector */
+/* vendor specific                      0x88-0x8F */
+#define WIN_DIAGNOSE                    0x90
+#define WIN_SPECIFY                     0x91 /* set drive geometry translation, obsolete since ATA6 */
+#define WIN_DOWNLOAD_MICROCODE          0x92
+/* DOWNLOAD MICROCODE DMA               0x93 */
+#define WIN_STANDBYNOW2                 0x94 /* retired in ATA4 */
+#define WIN_IDLEIMMEDIATE2              0x95 /* force drive to become "ready", retired in ATA4 */
+#define WIN_STANDBY2                    0x96 /* retired in ATA4 */
+#define WIN_SETIDLE2                    0x97 /* retired in ATA4 */
+#define WIN_CHECKPOWERMODE2             0x98 /* retired in ATA4 */
+#define WIN_SLEEPNOW2                   0x99 /* retired in ATA4 */
+/* vendor specific                      0x9A */
+/* reserved                             0x9B..0x9F */
+#define WIN_PACKETCMD                   0xA0 /* Send a packet command. */
+#define WIN_PIDENTIFY                   0xA1 /* identify ATAPI device */
+#define WIN_QUEUED_SERVICE              0xA2 /* obsolete since ACS2 */
+/* reserved                             0xA3..0xAF */
+#define WIN_SMART                       0xB0 /* self-monitoring and reporting */
+/* Device Configuration Overlay         0xB1 */
+/* reserved                             0xB2..0xB3 */
+/* Sanitize Device                      0xB4 */
+/* reserved                             0xB5 */
+/* NV Cache                             0xB6 */
+/* reserved for CFA                     0xB7..0xBB */
+#define CFA_ACCESS_METADATA_STORAGE     0xB8
+/* reserved                             0xBC..0xBF */
+#define CFA_ERASE_SECTORS               0xC0 /* microdrives implement as NOP */
+/* vendor specific                      0xC1..0xC3 */
+#define WIN_MULTREAD                    0xC4 /* read sectors using multiple mode*/
+#define WIN_MULTWRITE                   0xC5 /* write sectors using multiple mode */
+#define WIN_SETMULT                     0xC6 /* enable/disable multiple mode */
+#define WIN_READDMA_QUEUED              0xC7 /* read sectors using Queued DMA transfers, obsolete since ACS2 */
+#define WIN_READDMA                     0xC8 /* read sectors using DMA transfers */
+#define WIN_READDMA_ONCE                0xC9 /* 28-Bit - w/o retries, obsolete since ATA5 */
+#define WIN_WRITEDMA                    0xCA /* write sectors using DMA transfers */
+#define WIN_WRITEDMA_ONCE               0xCB /* 28-Bit - w/o retries, obsolete since ATA5 */
+#define WIN_WRITEDMA_QUEUED             0xCC /* write sectors using Queued DMA transfers, obsolete since ACS2 */
+#define CFA_WRITE_MULTI_WO_ERASE        0xCD /* CFA Write multiple without erase */
+/* WRITE MULTIPLE FUA EXT               0xCE */
+/* reserved                             0xCF..0xDO */
+/* CHECK MEDIA CARD TYPE                0xD1 */
+/* reserved for media card pass through 0xD2..0xD4 */
+/* reserved                             0xD5..0xD9 */
+#define WIN_GETMEDIASTATUS              0xDA /* obsolete since ATA8 */
+/* obsolete since ATA3, retired in ATA4 0xDB..0xDD */
+#define WIN_DOORLOCK                    0xDE /* lock door on removable drives, obsolete since ATA8 */
+#define WIN_DOORUNLOCK                  0xDF /* unlock door on removable drives, obsolete since ATA8 */
+#define WIN_STANDBYNOW1                 0xE0
+#define WIN_IDLEIMMEDIATE               0xE1 /* force drive to become "ready" */
+#define WIN_STANDBY                     0xE2 /* Set device in Standby Mode */
+#define WIN_SETIDLE1                    0xE3
+#define WIN_READ_BUFFER                 0xE4 /* force read only 1 sector */
+#define WIN_CHECKPOWERMODE1             0xE5
+#define WIN_SLEEPNOW1                   0xE6
+#define WIN_FLUSH_CACHE                 0xE7
+#define WIN_WRITE_BUFFER                0xE8 /* force write only 1 sector */
+/* READ BUFFER DMA                      0xE9 */
+#define WIN_FLUSH_CACHE_EXT             0xEA /* 48-Bit */
+/* WRITE BUFFER DMA                     0xEB */
+#define WIN_IDENTIFY                    0xEC /* ask drive to identify itself */
+#define WIN_MEDIAEJECT                  0xED /* obsolete since ATA8 */
+/* obsolete since ATA4                  0xEE */
+#define WIN_SETFEATURES                 0xEF /* set special drive features */
+#define IBM_SENSE_CONDITION             0xF0 /* measure disk temperature, vendor specific */
+#define WIN_SECURITY_SET_PASS           0xF1
+#define WIN_SECURITY_UNLOCK             0xF2
+#define WIN_SECURITY_ERASE_PREPARE      0xF3
+#define WIN_SECURITY_ERASE_UNIT         0xF4
+#define WIN_SECURITY_FREEZE_LOCK        0xF5
+#define CFA_WEAR_LEVEL                  0xF5 /* microdrives implement as NOP; not specified in T13! */
+#define WIN_SECURITY_DISABLE            0xF6
+/* vendor specific                      0xF7 */
+#define WIN_READ_NATIVE_MAX             0xF8 /* return the native maximum address */
+#define WIN_SET_MAX                     0xF9
+/* vendor specific                      0xFA..0xFF */
+
+/* set to 1 set disable mult support */
+#define MAX_MULT_SECTORS 16
+
+#define IDE_DMA_BUF_SECTORS 256
+
+/* feature values for Data Set Management */
+#define DSM_TRIM                        0x01
+
+#if (IDE_DMA_BUF_SECTORS < MAX_MULT_SECTORS)
+#error "IDE_DMA_BUF_SECTORS must be bigger or equal to MAX_MULT_SECTORS"
+#endif
+
+/* ATAPI defines */
+
+#define ATAPI_PACKET_SIZE 12
+
+/* The generic packet command opcodes for CD/DVD Logical Units,
+ * From Table 57 of the SFF8090 Ver. 3 (Mt. Fuji) draft standard. */
+#define GPCMD_BLANK                         0xa1
+#define GPCMD_CLOSE_TRACK                   0x5b
+#define GPCMD_FLUSH_CACHE                   0x35
+#define GPCMD_FORMAT_UNIT                   0x04
+#define GPCMD_GET_CONFIGURATION             0x46
+#define GPCMD_GET_EVENT_STATUS_NOTIFICATION 0x4a
+#define GPCMD_GET_PERFORMANCE               0xac
+#define GPCMD_INQUIRY                       0x12
+#define GPCMD_LOAD_UNLOAD                   0xa6
+#define GPCMD_MECHANISM_STATUS              0xbd
+#define GPCMD_MODE_SELECT_10                0x55
+#define GPCMD_MODE_SENSE_10                 0x5a
+#define GPCMD_PAUSE_RESUME                  0x4b
+#define GPCMD_PLAY_AUDIO_10                 0x45
+#define GPCMD_PLAY_AUDIO_MSF                0x47
+#define GPCMD_PLAY_AUDIO_TI                 0x48
+#define GPCMD_PLAY_CD                       0xbc
+#define GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL  0x1e
+#define GPCMD_READ_10                       0x28
+#define GPCMD_READ_12                       0xa8
+#define GPCMD_READ_CDVD_CAPACITY            0x25
+#define GPCMD_READ_CD                       0xbe
+#define GPCMD_READ_CD_MSF                   0xb9
+#define GPCMD_READ_DISC_INFO                0x51
+#define GPCMD_READ_DVD_STRUCTURE            0xad
+#define GPCMD_READ_FORMAT_CAPACITIES        0x23
+#define GPCMD_READ_HEADER                   0x44
+#define GPCMD_READ_TRACK_RZONE_INFO         0x52
+#define GPCMD_READ_SUBCHANNEL               0x42
+#define GPCMD_READ_TOC_PMA_ATIP             0x43
+#define GPCMD_REPAIR_RZONE_TRACK            0x58
+#define GPCMD_REPORT_KEY                    0xa4
+#define GPCMD_REQUEST_SENSE                 0x03
+#define GPCMD_RESERVE_RZONE_TRACK           0x53
+#define GPCMD_SCAN                          0xba
+#define GPCMD_SEEK                          0x2b
+#define GPCMD_SEND_DVD_STRUCTURE            0xad
+#define GPCMD_SEND_EVENT                    0xa2
+#define GPCMD_SEND_KEY                      0xa3
+#define GPCMD_SEND_OPC                      0x54
+#define GPCMD_SET_READ_AHEAD                0xa7
+#define GPCMD_SET_STREAMING                 0xb6
+#define GPCMD_START_STOP_UNIT               0x1b
+#define GPCMD_STOP_PLAY_SCAN                0x4e
+#define GPCMD_TEST_UNIT_READY               0x00
+#define GPCMD_VERIFY_10                     0x2f
+#define GPCMD_WRITE_10                      0x2a
+#define GPCMD_WRITE_AND_VERIFY_10           0x2e
+/* This is listed as optional in ATAPI 2.6, but is (curiously)
+ * missing from Mt. Fuji, Table 57.  It _is_ mentioned in Mt. Fuji
+ * Table 377 as an MMC command for SCSi devices though...  Most ATAPI
+ * drives support it. */
+#define GPCMD_SET_SPEED                     0xbb
+/* This seems to be a SCSI specific CD-ROM opcode
+ * to play data at track/index */
+#define GPCMD_PLAYAUDIO_TI                  0x48
+/*
+ * From MS Media Status Notification Support Specification. For
+ * older drives only.
+ */
+#define GPCMD_GET_MEDIA_STATUS              0xda
+#define GPCMD_MODE_SENSE_6                  0x1a
+
+#define ATAPI_INT_REASON_CD             0x01 /* 0 = data transfer */
+#define ATAPI_INT_REASON_IO             0x02 /* 1 = transfer to the host */
+#define ATAPI_INT_REASON_REL            0x04
+#define ATAPI_INT_REASON_TAG            0xf8
+
+/* same constants as bochs */
+#define ASC_NO_SEEK_COMPLETE                 0x02
+#define ASC_ILLEGAL_OPCODE                   0x20
+#define ASC_LOGICAL_BLOCK_OOR                0x21
+#define ASC_INV_FIELD_IN_CMD_PACKET          0x24
+#define ASC_MEDIUM_MAY_HAVE_CHANGED          0x28
+#define ASC_INCOMPATIBLE_FORMAT              0x30
+#define ASC_MEDIUM_NOT_PRESENT               0x3a
+#define ASC_SAVING_PARAMETERS_NOT_SUPPORTED  0x39
+#define ASC_DATA_PHASE_ERROR                 0x4b
+#define ASC_MEDIA_REMOVAL_PREVENTED          0x53
+
+#define CFA_NO_ERROR            0x00
+#define CFA_MISC_ERROR          0x09
+#define CFA_INVALID_COMMAND     0x20
+#define CFA_INVALID_ADDRESS     0x21
+#define CFA_ADDRESS_OVERFLOW    0x2f
+
+#define SMART_READ_DATA       0xd0
+#define SMART_READ_THRESH     0xd1
+#define SMART_ATTR_AUTOSAVE   0xd2
+#define SMART_SAVE_ATTR       0xd3
+#define SMART_EXECUTE_OFFLINE 0xd4
+#define SMART_READ_LOG        0xd5
+#define SMART_WRITE_LOG       0xd6
+#define SMART_ENABLE          0xd8
+#define SMART_DISABLE         0xd9
+#define SMART_STATUS          0xda
+
+typedef enum { IDE_HD, IDE_CD, IDE_CFATA } IDEDriveKind;
+
+typedef void EndTransferFunc(IDEState *);
+
+typedef void DMAStartFunc(const IDEDMA *, IDEState *, BlockCompletionFunc *);
+typedef void DMAVoidFunc(const IDEDMA *);
+typedef int DMAIntFunc(const IDEDMA *, bool);
+typedef int32_t DMAInt32Func(const IDEDMA *, int32_t len);
+typedef void DMAu32Func(const IDEDMA *, uint32_t);
+typedef void DMAStopFunc(const IDEDMA *, bool);
+
+struct unreported_events {
+    bool eject_request;
+    bool new_media;
+};
+
+enum ide_dma_cmd {
+    IDE_DMA_READ = 0,
+    IDE_DMA_WRITE,
+    IDE_DMA_TRIM,
+    IDE_DMA_ATAPI,
+    IDE_DMA__COUNT
+};
+
+extern const char *IDE_DMA_CMD_lookup[IDE_DMA__COUNT];
+
+extern const MemoryRegionPortio ide_portio_list[];
+extern const MemoryRegionPortio ide_portio2_list[];
+
+#define ide_cmd_is_read(s) \
+        ((s)->dma_cmd == IDE_DMA_READ)
+
+typedef struct IDEBufferedRequest {
+    QLIST_ENTRY(IDEBufferedRequest) list;
+    QEMUIOVector qiov;
+    QEMUIOVector *original_qiov;
+    BlockCompletionFunc *original_cb;
+    void *original_opaque;
+    bool orphaned;
+} IDEBufferedRequest;
+
+/* NOTE: IDEState represents in fact one drive */
+struct IDEState {
+    IDEBus *bus;
+    uint8_t unit;
+    /* ide config */
+    IDEDriveKind drive_kind;
+    int drive_heads, drive_sectors;
+    int cylinders, heads, sectors, chs_trans;
+    int64_t nb_sectors;
+    int mult_sectors;
+    int identify_set;
+    uint8_t identify_data[512];
+    int drive_serial;
+    char drive_serial_str[21];
+    char drive_model_str[41];
+    uint64_t wwn;
+    /* ide regs */
+    uint8_t feature;
+    uint8_t error;
+    uint32_t nsector;
+    uint8_t sector;
+    uint8_t lcyl;
+    uint8_t hcyl;
+    /* other part of tf for lba48 support */
+    uint8_t hob_feature;
+    uint8_t hob_nsector;
+    uint8_t hob_sector;
+    uint8_t hob_lcyl;
+    uint8_t hob_hcyl;
+
+    uint8_t select;
+    uint8_t status;
+
+    bool io8;
+    bool reset_reverts;
+
+    /* set for lba48 access */
+    uint8_t lba48;
+    BlockBackend *blk;
+    char version[9];
+    /* ATAPI specific */
+    struct unreported_events events;
+    uint8_t sense_key;
+    uint8_t asc;
+    bool tray_open;
+    bool tray_locked;
+    uint8_t cdrom_changed;
+    int packet_transfer_size;
+    int elementary_transfer_size;
+    int32_t io_buffer_index;
+    int lba;
+    int cd_sector_size;
+    int atapi_dma; /* true if dma is requested for the packet cmd */
+    BlockAcctCookie acct;
+    BlockAIOCB *pio_aiocb;
+    QEMUIOVector qiov;
+    QLIST_HEAD(, IDEBufferedRequest) buffered_requests;
+    /* ATA DMA state */
+    uint64_t io_buffer_offset;
+    int32_t io_buffer_size;
+    QEMUSGList sg;
+    /* PIO transfer handling */
+    int req_nb_sectors; /* number of sectors per interrupt */
+    EndTransferFunc *end_transfer_func;
+    uint8_t *data_ptr;
+    uint8_t *data_end;
+    uint8_t *io_buffer;
+    /* PIO save/restore */
+    int32_t io_buffer_total_len;
+    int32_t cur_io_buffer_offset;
+    int32_t cur_io_buffer_len;
+    uint8_t end_transfer_fn_idx;
+    QEMUTimer *sector_write_timer; /* only used for win2k install hack */
+    uint32_t irq_count; /* counts IRQs when using win2k install hack */
+    /* CF-ATA extended error */
+    uint8_t ext_error;
+    /* CF-ATA metadata storage */
+    uint32_t mdata_size;
+    uint8_t *mdata_storage;
+    int media_changed;
+    enum ide_dma_cmd dma_cmd;
+    /* SMART */
+    uint8_t smart_enabled;
+    uint8_t smart_autosave;
+    int smart_errors;
+    uint8_t smart_selftest_count;
+    uint8_t *smart_selftest_data;
+    /* AHCI */
+    int ncq_queues;
+};
+
+struct IDEDMAOps {
+    DMAStartFunc *start_dma;
+    DMAVoidFunc *pio_transfer;
+    DMAInt32Func *prepare_buf;
+    DMAu32Func *commit_buf;
+    DMAIntFunc *rw_buf;
+    DMAVoidFunc *restart;
+    DMAVoidFunc *restart_dma;
+    DMAStopFunc *set_inactive;
+    DMAVoidFunc *cmd_done;
+    DMAVoidFunc *reset;
+};
+
+struct IDEDMA {
+    const struct IDEDMAOps *ops;
+    QEMUIOVector qiov;
+    BlockAIOCB *aiocb;
+};
+
+struct IDEBus {
+    BusState qbus;
+    IDEDevice *master;
+    IDEDevice *slave;
+    IDEState ifs[2];
+    QEMUBH *bh;
+
+    int bus_id;
+    int max_units;
+    IDEDMA *dma;
+    uint8_t unit;
+    uint8_t cmd;
+    qemu_irq irq; /* bus output */
+
+    int error_status;
+    uint8_t retry_unit;
+    int64_t retry_sector_num;
+    uint32_t retry_nsector;
+    PortioList portio_list;
+    PortioList portio2_list;
+    VMChangeStateEntry *vmstate;
+};
+
+#define TYPE_IDE_DEVICE "ide-device"
+OBJECT_DECLARE_TYPE(IDEDevice, IDEDeviceClass, IDE_DEVICE)
+
+struct IDEDeviceClass {
+    DeviceClass parent_class;
+    void (*realize)(IDEDevice *dev, Error **errp);
+};
+
+struct IDEDevice {
+    DeviceState qdev;
+    uint32_t unit;
+    BlockConf conf;
+    int chs_trans;
+    char *version;
+    char *serial;
+    char *model;
+    uint64_t wwn;
+    /*
+     * 0x0000        - rotation rate not reported
+     * 0x0001        - non-rotating medium (SSD)
+     * 0x0002-0x0400 - reserved
+     * 0x0401-0xffe  - rotations per minute
+     * 0xffff        - reserved
+     */
+    uint16_t rotation_rate;
+};
+
+/* These are used for the error_status field of IDEBus */
+#define IDE_RETRY_MASK 0xf8
+#define IDE_RETRY_DMA  0x08
+#define IDE_RETRY_PIO  0x10
+#define IDE_RETRY_ATAPI 0x20 /* reused IDE_RETRY_READ bit */
+#define IDE_RETRY_READ  0x20
+#define IDE_RETRY_FLUSH 0x40
+#define IDE_RETRY_TRIM 0x80
+#define IDE_RETRY_HBA  0x100
+
+#define IS_IDE_RETRY_DMA(_status) \
+    ((_status) & IDE_RETRY_DMA)
+
+#define IS_IDE_RETRY_PIO(_status) \
+    ((_status) & IDE_RETRY_PIO)
+
+/*
+ * The method of the IDE_RETRY_ATAPI determination is to use a previously
+ * impossible bit combination as a new status value.
+ */
+#define IS_IDE_RETRY_ATAPI(_status)   \
+    (((_status) & IDE_RETRY_MASK) == IDE_RETRY_ATAPI)
+
+static inline uint8_t ide_dma_cmd_to_retry(uint8_t dma_cmd)
+{
+    switch (dma_cmd) {
+    case IDE_DMA_READ:
+        return IDE_RETRY_DMA | IDE_RETRY_READ;
+    case IDE_DMA_WRITE:
+        return IDE_RETRY_DMA;
+    case IDE_DMA_TRIM:
+        return IDE_RETRY_DMA | IDE_RETRY_TRIM;
+    case IDE_DMA_ATAPI:
+        return IDE_RETRY_ATAPI;
+    default:
+        break;
+    }
+    return 0;
+}
+
+static inline IDEState *ide_bus_active_if(IDEBus *bus)
+{
+    return bus->ifs + bus->unit;
+}
+
+/* hw/ide/core.c */
+extern const VMStateDescription vmstate_ide_bus;
+
+#define VMSTATE_IDE_BUS(_field, _state)                          \
+    VMSTATE_STRUCT(_field, _state, 1, vmstate_ide_bus, IDEBus)
+
+#define VMSTATE_IDE_BUS_ARRAY(_field, _state, _num)              \
+    VMSTATE_STRUCT_ARRAY(_field, _state, _num, 1, vmstate_ide_bus, IDEBus)
+
+extern const VMStateDescription vmstate_ide_drive;
+
+#define VMSTATE_IDE_DRIVES(_field, _state) \
+    VMSTATE_STRUCT_ARRAY(_field, _state, 2, 3, vmstate_ide_drive, IDEState)
+
+#define VMSTATE_IDE_DRIVE(_field, _state) \
+    VMSTATE_STRUCT(_field, _state, 1, vmstate_ide_drive, IDEState)
+
+void ide_bus_reset(IDEBus *bus);
+int64_t ide_get_sector(IDEState *s);
+void ide_set_sector(IDEState *s, int64_t sector_num);
+
+void ide_start_dma(IDEState *s, BlockCompletionFunc *cb);
+void dma_buf_commit(IDEState *s, uint32_t tx_bytes);
+void ide_dma_error(IDEState *s);
+void ide_abort_command(IDEState *s);
+
+void ide_atapi_cmd_ok(IDEState *s);
+void ide_atapi_cmd_error(IDEState *s, int sense_key, int asc);
+void ide_atapi_dma_restart(IDEState *s);
+void ide_atapi_io_error(IDEState *s, int ret);
+
+void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val);
+uint32_t ide_ioport_read(void *opaque, uint32_t addr1);
+uint32_t ide_status_read(void *opaque, uint32_t addr);
+void ide_ctrl_write(void *opaque, uint32_t addr, uint32_t val);
+void ide_data_writew(void *opaque, uint32_t addr, uint32_t val);
+uint32_t ide_data_readw(void *opaque, uint32_t addr);
+void ide_data_writel(void *opaque, uint32_t addr, uint32_t val);
+uint32_t ide_data_readl(void *opaque, uint32_t addr);
+
+int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind,
+                   const char *version, const char *serial, const char *model,
+                   uint64_t wwn,
+                   uint32_t cylinders, uint32_t heads, uint32_t secs,
+                   int chs_trans, Error **errp);
+void ide_exit(IDEState *s);
+void ide_bus_init_output_irq(IDEBus *bus, qemu_irq irq_out);
+int ide_init_ioport(IDEBus *bus, ISADevice *isa, int iobase, int iobase2);
+void ide_bus_set_irq(IDEBus *bus);
+void ide_bus_register_restart_cb(IDEBus *bus);
+
+void ide_bus_exec_cmd(IDEBus *bus, uint32_t val);
+
+void ide_transfer_start(IDEState *s, uint8_t *buf, int size,
+                        EndTransferFunc *end_transfer_func);
+bool ide_transfer_start_norecurse(IDEState *s, uint8_t *buf, int size,
+                                  EndTransferFunc *end_transfer_func);
+void ide_transfer_stop(IDEState *s);
+void ide_set_inactive(IDEState *s, bool more);
+BlockAIOCB *ide_issue_trim(
+        int64_t offset, QEMUIOVector *qiov,
+        BlockCompletionFunc *cb, void *cb_opaque, void *opaque);
+BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num,
+                               QEMUIOVector *iov, int nb_sectors,
+                               BlockCompletionFunc *cb, void *opaque);
+void ide_cancel_dma_sync(IDEState *s);
+
+/* hw/ide/atapi.c */
+void ide_atapi_cmd(IDEState *s);
+void ide_atapi_cmd_reply_end(IDEState *s);
+
+/* hw/ide/qdev.c */
+void ide_bus_init(IDEBus *idebus, size_t idebus_size, DeviceState *dev,
+                  int bus_id, int max_units);
+IDEDevice *ide_bus_create_drive(IDEBus *bus, int unit, DriveInfo *drive);
+
+int ide_get_geometry(BusState *bus, int unit,
+                     int16_t *cyls, int8_t *heads, int8_t *secs);
+int ide_get_bios_chs_trans(BusState *bus, int unit);
+
+int ide_handle_rw_error(IDEState *s, int error, int op);
+
+#endif /* HW_IDE_INTERNAL_H */
diff --git a/include/hw/ide/isa.h b/include/hw/ide/isa.h
new file mode 100644 (file)
index 0000000..1cd0ff1
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ * QEMU IDE Emulation: ISA Bus support.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2006 Openedhand Ltd.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef HW_IDE_ISA_H
+#define HW_IDE_ISA_H
+
+#include "qom/object.h"
+
+#define TYPE_ISA_IDE "isa-ide"
+OBJECT_DECLARE_SIMPLE_TYPE(ISAIDEState, ISA_IDE)
+
+ISADevice *isa_ide_init(ISABus *bus, int iobase, int iobase2, int irqnum,
+                        DriveInfo *hd0, DriveInfo *hd1);
+
+#endif
diff --git a/include/hw/ide/mmio.h b/include/hw/ide/mmio.h
new file mode 100644 (file)
index 0000000..d726a49
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * QEMU IDE Emulation: mmio support (for embedded).
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2006 Openedhand Ltd.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef HW_IDE_MMIO_H
+#define HW_IDE_MMIO_H
+
+#include "qom/object.h"
+
+/*
+ * QEMU interface:
+ *  + sysbus IRQ 0: asserted by the IDE channel
+ *  + sysbus MMIO region 0: data registers
+ *  + sysbus MMIO region 1: status & control registers
+ */
+#define TYPE_MMIO_IDE "mmio-ide"
+OBJECT_DECLARE_SIMPLE_TYPE(MMIOIDEState, MMIO_IDE)
+
+void mmio_ide_init_drives(DeviceState *dev, DriveInfo *hd0, DriveInfo *hd1);
+
+#endif
diff --git a/include/hw/ide/pci.h b/include/hw/ide/pci.h
new file mode 100644 (file)
index 0000000..a814a0a
--- /dev/null
@@ -0,0 +1,69 @@
+#ifndef HW_IDE_PCI_H
+#define HW_IDE_PCI_H
+
+#include "hw/ide/internal.h"
+#include "hw/pci/pci_device.h"
+#include "qom/object.h"
+
+#define BM_STATUS_DMAING 0x01
+#define BM_STATUS_ERROR  0x02
+#define BM_STATUS_INT    0x04
+
+#define BM_CMD_START     0x01
+#define BM_CMD_READ      0x08
+
+typedef struct BMDMAState {
+    IDEDMA dma;
+    uint8_t cmd;
+    uint8_t status;
+    uint32_t addr;
+
+    IDEBus *bus;
+    /* current transfer state */
+    uint32_t cur_addr;
+    uint32_t cur_prd_last;
+    uint32_t cur_prd_addr;
+    uint32_t cur_prd_len;
+    BlockCompletionFunc *dma_cb;
+    MemoryRegion addr_ioport;
+    MemoryRegion extra_io;
+    qemu_irq irq;
+
+    /* Bit 0-2 and 7:   BM status register
+     * Bit 3-6:         bus->error_status */
+    uint8_t migration_compat_status;
+    uint8_t migration_retry_unit;
+    int64_t migration_retry_sector_num;
+    uint32_t migration_retry_nsector;
+
+    struct PCIIDEState *pci_dev;
+} BMDMAState;
+
+#define TYPE_PCI_IDE "pci-ide"
+OBJECT_DECLARE_SIMPLE_TYPE(PCIIDEState, PCI_IDE)
+
+struct PCIIDEState {
+    /*< private >*/
+    PCIDevice parent_obj;
+    /*< public >*/
+
+    IDEBus bus[2];
+    BMDMAState bmdma[2];
+    qemu_irq isa_irq[2];
+    uint32_t secondary; /* used only for cmd646 */
+    MemoryRegion bmdma_bar;
+    MemoryRegion cmd_bar[2];
+    MemoryRegion data_bar[2];
+};
+
+void bmdma_init(IDEBus *bus, BMDMAState *bm, PCIIDEState *d);
+void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val);
+void bmdma_status_writeb(BMDMAState *bm, uint32_t val);
+extern MemoryRegionOps bmdma_addr_ioport_ops;
+void pci_ide_create_devs(PCIDevice *dev);
+void pci_ide_update_mode(PCIIDEState *s);
+
+extern const VMStateDescription vmstate_ide_pci;
+extern const MemoryRegionOps pci_ide_cmd_le_ops;
+extern const MemoryRegionOps pci_ide_data_le_ops;
+#endif
diff --git a/include/hw/ide/piix.h b/include/hw/ide/piix.h
new file mode 100644 (file)
index 0000000..ef3ef3d
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef HW_IDE_PIIX_H
+#define HW_IDE_PIIX_H
+
+#define TYPE_PIIX3_IDE "piix3-ide"
+#define TYPE_PIIX4_IDE "piix4-ide"
+
+#endif /* HW_IDE_PIIX_H */
diff --git a/include/hw/input/adb-keys.h b/include/hw/input/adb-keys.h
new file mode 100644 (file)
index 0000000..525fba8
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2016 John Arbuckle
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ *  adb-keys.h
+ *
+ *  Provides an enum of all the Macintosh keycodes.
+ *  Additional information: http://www.archive.org/stream/apple-guide-macintosh-family-hardware/Apple_Guide_to_the_Macintosh_Family_Hardware_2e#page/n345/mode/2up
+ *                          page 308
+ */
+
+#ifndef ADB_KEYS_H
+#define ADB_KEYS_H
+
+enum {
+    ADB_KEY_A = 0x00,
+    ADB_KEY_B = 0x0b,
+    ADB_KEY_C = 0x08,
+    ADB_KEY_D = 0x02,
+    ADB_KEY_E = 0x0e,
+    ADB_KEY_F = 0x03,
+    ADB_KEY_G = 0x05,
+    ADB_KEY_H = 0x04,
+    ADB_KEY_I = 0x22,
+    ADB_KEY_J = 0x26,
+    ADB_KEY_K = 0x28,
+    ADB_KEY_L = 0x25,
+    ADB_KEY_M = 0x2e,
+    ADB_KEY_N = 0x2d,
+    ADB_KEY_O = 0x1f,
+    ADB_KEY_P = 0x23,
+    ADB_KEY_Q = 0x0c,
+    ADB_KEY_R = 0x0f,
+    ADB_KEY_S = 0x01,
+    ADB_KEY_T = 0x11,
+    ADB_KEY_U = 0x20,
+    ADB_KEY_V = 0x09,
+    ADB_KEY_W = 0x0d,
+    ADB_KEY_X = 0x07,
+    ADB_KEY_Y = 0x10,
+    ADB_KEY_Z = 0x06,
+
+    ADB_KEY_0 = 0x1d,
+    ADB_KEY_1 = 0x12,
+    ADB_KEY_2 = 0x13,
+    ADB_KEY_3 = 0x14,
+    ADB_KEY_4 = 0x15,
+    ADB_KEY_5 = 0x17,
+    ADB_KEY_6 = 0x16,
+    ADB_KEY_7 = 0x1a,
+    ADB_KEY_8 = 0x1c,
+    ADB_KEY_9 = 0x19,
+
+    ADB_KEY_GRAVE_ACCENT = 0x32,
+    ADB_KEY_MINUS = 0x1b,
+    ADB_KEY_EQUAL = 0x18,
+    ADB_KEY_DELETE = 0x33,
+    ADB_KEY_CAPS_LOCK = 0x39,
+    ADB_KEY_TAB = 0x30,
+    ADB_KEY_RETURN = 0x24,
+    ADB_KEY_LEFT_BRACKET = 0x21,
+    ADB_KEY_RIGHT_BRACKET = 0x1e,
+    ADB_KEY_BACKSLASH = 0x2a,
+    ADB_KEY_SEMICOLON = 0x29,
+    ADB_KEY_APOSTROPHE = 0x27,
+    ADB_KEY_COMMA = 0x2b,
+    ADB_KEY_PERIOD = 0x2f,
+    ADB_KEY_FORWARD_SLASH = 0x2c,
+    ADB_KEY_LEFT_SHIFT = 0x38,
+    ADB_KEY_RIGHT_SHIFT = 0x7b,
+    ADB_KEY_SPACEBAR = 0x31,
+    ADB_KEY_LEFT_CONTROL = 0x36,
+    ADB_KEY_RIGHT_CONTROL = 0x7d,
+    ADB_KEY_LEFT_OPTION = 0x3a,
+    ADB_KEY_RIGHT_OPTION = 0x7c,
+    ADB_KEY_COMMAND = 0x37,
+
+    ADB_KEY_KP_0 = 0x52,
+    ADB_KEY_KP_1 = 0x53,
+    ADB_KEY_KP_2 = 0x54,
+    ADB_KEY_KP_3 = 0x55,
+    ADB_KEY_KP_4 = 0x56,
+    ADB_KEY_KP_5 = 0x57,
+    ADB_KEY_KP_6 = 0x58,
+    ADB_KEY_KP_7 = 0x59,
+    ADB_KEY_KP_8 = 0x5b,
+    ADB_KEY_KP_9 = 0x5c,
+    ADB_KEY_KP_PERIOD = 0x41,
+    ADB_KEY_KP_ENTER = 0x4c,
+    ADB_KEY_KP_PLUS = 0x45,
+    ADB_KEY_KP_SUBTRACT = 0x4e,
+    ADB_KEY_KP_MULTIPLY = 0x43,
+    ADB_KEY_KP_DIVIDE = 0x4b,
+    ADB_KEY_KP_EQUAL = 0x51,
+    ADB_KEY_KP_CLEAR = 0x47,
+
+    ADB_KEY_UP = 0x3e,
+    ADB_KEY_DOWN = 0x3d,
+    ADB_KEY_LEFT = 0x3b,
+    ADB_KEY_RIGHT = 0x3c,
+
+    ADB_KEY_HELP = 0x72,
+    ADB_KEY_HOME = 0x73,
+    ADB_KEY_PAGE_UP = 0x74,
+    ADB_KEY_PAGE_DOWN = 0x79,
+    ADB_KEY_END = 0x77,
+    ADB_KEY_FORWARD_DELETE = 0x75,
+
+    ADB_KEY_ESC = 0x35,
+    ADB_KEY_F1 = 0x7a,
+    ADB_KEY_F2 = 0x78,
+    ADB_KEY_F3 = 0x63,
+    ADB_KEY_F4 = 0x76,
+    ADB_KEY_F5 = 0x60,
+    ADB_KEY_F6 = 0x61,
+    ADB_KEY_F7 = 0x62,
+    ADB_KEY_F8 = 0x64,
+    ADB_KEY_F9 = 0x65,
+    ADB_KEY_F10 = 0x6d,
+    ADB_KEY_F11 = 0x67,
+    ADB_KEY_F12 = 0x6f,
+    ADB_KEY_F13 = 0x69,
+    ADB_KEY_F14 = 0x6b,
+    ADB_KEY_F15 = 0x71,
+
+    ADB_KEY_VOLUME_UP = 0x48,
+    ADB_KEY_VOLUME_DOWN = 0x49,
+    ADB_KEY_VOLUME_MUTE = 0x4a,
+    ADB_KEY_POWER = 0x7f7f
+};
+
+/* Could not find the value for this key. */
+/* #define ADB_KEY_EJECT */
+
+#endif /* ADB_KEYS_H */
diff --git a/include/hw/input/adb.h b/include/hw/input/adb.h
new file mode 100644 (file)
index 0000000..20fced1
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * QEMU ADB emulation shared definitions and prototypes
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef ADB_H
+#define ADB_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+#define MAX_ADB_DEVICES 16
+
+#define ADB_MAX_OUT_LEN 16
+
+typedef struct ADBDevice ADBDevice;
+
+/* buf = NULL means polling */
+typedef int ADBDeviceRequest(ADBDevice *d, uint8_t *buf_out,
+                              const uint8_t *buf, int len);
+
+typedef bool ADBDeviceHasData(ADBDevice *d);
+
+#define TYPE_ADB_DEVICE "adb-device"
+OBJECT_DECLARE_TYPE(ADBDevice, ADBDeviceClass, ADB_DEVICE)
+
+struct ADBDevice {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    int devaddr;
+    int handler;
+};
+
+
+struct ADBDeviceClass {
+    /*< private >*/
+    DeviceClass parent_class;
+    /*< public >*/
+
+    ADBDeviceRequest *devreq;
+    ADBDeviceHasData *devhasdata;
+};
+
+#define TYPE_ADB_BUS "apple-desktop-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(ADBBusState, ADB_BUS)
+
+#define ADB_STATUS_BUSTIMEOUT  0x1
+#define ADB_STATUS_POLLREPLY   0x2
+
+struct ADBBusState {
+    /*< private >*/
+    BusState parent_obj;
+    /*< public >*/
+
+    ADBDevice *devices[MAX_ADB_DEVICES];
+    uint16_t pending;
+    int nb_devices;
+    int poll_index;
+    uint8_t status;
+
+    QEMUTimer *autopoll_timer;
+    bool autopoll_enabled;
+    bool autopoll_blocked;
+    uint8_t autopoll_rate_ms;
+    uint16_t autopoll_mask;
+    void (*autopoll_cb)(void *opaque);
+    void *autopoll_cb_opaque;
+};
+
+int adb_request(ADBBusState *s, uint8_t *buf_out,
+                const uint8_t *buf, int len);
+int adb_poll(ADBBusState *s, uint8_t *buf_out, uint16_t poll_mask);
+
+void adb_autopoll_block(ADBBusState *s);
+void adb_autopoll_unblock(ADBBusState *s);
+
+void adb_set_autopoll_enabled(ADBBusState *s, bool enabled);
+void adb_set_autopoll_rate_ms(ADBBusState *s, int rate_ms);
+void adb_set_autopoll_mask(ADBBusState *s, uint16_t mask);
+void adb_register_autopoll_callback(ADBBusState *s, void (*cb)(void *opaque),
+                                    void *opaque);
+
+#define TYPE_ADB_KEYBOARD "adb-keyboard"
+#define TYPE_ADB_MOUSE "adb-mouse"
+
+#endif /* ADB_H */
diff --git a/include/hw/input/hid.h b/include/hw/input/hid.h
new file mode 100644 (file)
index 0000000..6a9d7bf
--- /dev/null
@@ -0,0 +1,83 @@
+#ifndef QEMU_HID_H
+#define QEMU_HID_H
+
+#include "ui/input.h"
+
+#define HID_MOUSE     1
+#define HID_TABLET    2
+#define HID_KEYBOARD  3
+
+typedef struct HIDPointerEvent {
+    int32_t xdx, ydy; /* relative iff it's a mouse, otherwise absolute */
+    int32_t dz, buttons_state;
+} HIDPointerEvent;
+
+#define QUEUE_LENGTH    16 /* should be enough for a triple-click */
+#define QUEUE_MASK      (QUEUE_LENGTH-1u)
+#define QUEUE_INCR(v)   ((v)++, (v) &= QUEUE_MASK)
+
+typedef struct HIDState HIDState;
+typedef void (*HIDEventFunc)(HIDState *s);
+
+typedef struct HIDMouseState {
+    HIDPointerEvent queue[QUEUE_LENGTH];
+    int mouse_grabbed;
+} HIDMouseState;
+
+typedef struct HIDKeyboardState {
+    uint32_t keycodes[QUEUE_LENGTH];
+    uint16_t modifiers;
+    uint8_t leds;
+    uint8_t key[16];
+    int32_t keys;
+} HIDKeyboardState;
+
+struct HIDState {
+    union {
+        HIDMouseState ptr;
+        HIDKeyboardState kbd;
+    };
+    uint32_t head; /* index into circular queue */
+    uint32_t n;
+    int kind;
+    int32_t protocol;
+    uint8_t idle;
+    bool idle_pending;
+    QEMUTimer *idle_timer;
+    HIDEventFunc event;
+    QemuInputHandlerState *s;
+};
+
+void hid_init(HIDState *hs, int kind, HIDEventFunc event);
+void hid_reset(HIDState *hs);
+void hid_free(HIDState *hs);
+
+bool hid_has_events(HIDState *hs);
+void hid_set_next_idle(HIDState *hs);
+void hid_pointer_activate(HIDState *hs);
+int hid_pointer_poll(HIDState *hs, uint8_t *buf, int len);
+int hid_keyboard_poll(HIDState *hs, uint8_t *buf, int len);
+int hid_keyboard_write(HIDState *hs, uint8_t *buf, int len);
+
+extern const VMStateDescription vmstate_hid_keyboard_device;
+
+#define VMSTATE_HID_KEYBOARD_DEVICE(_field, _state) {                \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(HIDState),                                  \
+    .vmsd       = &vmstate_hid_keyboard_device,                      \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, HIDState),    \
+}
+
+extern const VMStateDescription vmstate_hid_ptr_device;
+
+#define VMSTATE_HID_POINTER_DEVICE(_field, _state) {                 \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(HIDState),                                  \
+    .vmsd       = &vmstate_hid_ptr_device,                           \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, HIDState),    \
+}
+
+
+#endif /* QEMU_HID_H */
diff --git a/include/hw/input/i8042.h b/include/hw/input/i8042.h
new file mode 100644 (file)
index 0000000..9fb3f8d
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ * QEMU PS/2 Controller
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef HW_INPUT_I8042_H
+#define HW_INPUT_I8042_H
+
+#include "hw/isa/isa.h"
+#include "hw/sysbus.h"
+#include "hw/input/ps2.h"
+#include "qom/object.h"
+
+#define I8042_KBD_IRQ      0
+#define I8042_MOUSE_IRQ    1
+
+typedef struct KBDState {
+    uint8_t write_cmd; /* if non zero, write data to port 60 is expected */
+    uint8_t status;
+    uint8_t mode;
+    uint8_t outport;
+    uint32_t migration_flags;
+    uint32_t obsrc;
+    bool outport_present;
+    bool extended_state;
+    bool extended_state_loaded;
+    /* Bitmask of devices with data available.  */
+    uint8_t pending;
+    uint8_t obdata;
+    uint8_t cbdata;
+    uint8_t pending_tmp;
+    PS2KbdState ps2kbd;
+    PS2MouseState ps2mouse;
+    QEMUTimer *throttle_timer;
+
+    qemu_irq irqs[2];
+    qemu_irq a20_out;
+    hwaddr mask;
+} KBDState;
+
+/*
+ * QEMU interface:
+ * + Named GPIO input "ps2-kbd-input-irq": set to 1 if the downstream PS2
+ *   keyboard device has asserted its irq
+ * + Named GPIO input "ps2-mouse-input-irq": set to 1 if the downstream PS2
+ *   mouse device has asserted its irq
+ * + Named GPIO output "a20": A20 line for x86 PCs
+ * + Unnamed GPIO output 0-1: i8042 output irqs for keyboard (0) or mouse (1)
+ */
+
+#define TYPE_I8042 "i8042"
+OBJECT_DECLARE_SIMPLE_TYPE(ISAKBDState, I8042)
+
+struct ISAKBDState {
+    ISADevice parent_obj;
+
+    KBDState kbd;
+    bool kbd_throttle;
+    MemoryRegion io[2];
+    uint8_t kbd_irq;
+    uint8_t mouse_irq;
+};
+
+/*
+ * QEMU interface:
+ * + sysbus MMIO region 0: MemoryRegion defining the command/status/data
+ *   registers (access determined by mask property and access type)
+ * + Named GPIO input "ps2-kbd-input-irq": set to 1 if the downstream PS2
+ *   keyboard device has asserted its irq
+ * + Named GPIO input "ps2-mouse-input-irq": set to 1 if the downstream PS2
+ *   mouse device has asserted its irq
+ * + Unnamed GPIO output 0-1: i8042 output irqs for keyboard (0) or mouse (1)
+ */
+
+#define TYPE_I8042_MMIO "i8042-mmio"
+OBJECT_DECLARE_SIMPLE_TYPE(MMIOKBDState, I8042_MMIO)
+
+struct MMIOKBDState {
+    SysBusDevice parent_obj;
+
+    KBDState kbd;
+    uint32_t size;
+    MemoryRegion region;
+};
+
+#define I8042_A20_LINE "a20"
+
+
+void i8042_isa_mouse_fake_event(ISAKBDState *isa);
+void i8042_setup_a20_line(ISADevice *dev, qemu_irq a20_out);
+
+static inline bool i8042_present(void)
+{
+    bool amb = false;
+    return object_resolve_path_type("", TYPE_I8042, &amb) || amb;
+}
+
+/*
+ * ACPI v2, Table 5-10 - Fixed ACPI Description Table Boot Architecture
+ * Flags, bit offset 1 - 8042.
+ */
+static inline uint16_t iapc_boot_arch_8042(void)
+{
+    return i8042_present() ? 0x1 << 1 : 0x0 ;
+}
+
+#endif /* HW_INPUT_I8042_H */
diff --git a/include/hw/input/lasips2.h b/include/hw/input/lasips2.h
new file mode 100644 (file)
index 0000000..01911c5
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * QEMU LASI PS/2 emulation
+ *
+ * Copyright (c) 2019 Sven Schnelle
+ *
+ */
+
+/*
+ * QEMU interface:
+ * + sysbus MMIO region 0: MemoryRegion defining the LASI PS2 keyboard
+ *   registers
+ * + sysbus MMIO region 1: MemoryRegion defining the LASI PS2 mouse
+ *   registers
+ * + sysbus IRQ 0: LASI PS2 output irq
+ * + Named GPIO input "lasips2-port-input-irq[0..1]": set to 1 if the downstream
+ *   LASIPS2Port has asserted its irq
+ */
+
+#ifndef HW_INPUT_LASIPS2_H
+#define HW_INPUT_LASIPS2_H
+
+#include "exec/hwaddr.h"
+#include "hw/sysbus.h"
+#include "hw/input/ps2.h"
+
+#define TYPE_LASIPS2_PORT "lasips2-port"
+OBJECT_DECLARE_TYPE(LASIPS2Port, LASIPS2PortDeviceClass, LASIPS2_PORT)
+
+struct LASIPS2PortDeviceClass {
+    DeviceClass parent;
+
+    DeviceRealize parent_realize;
+};
+
+typedef struct LASIPS2State LASIPS2State;
+
+struct LASIPS2Port {
+    DeviceState parent_obj;
+
+    LASIPS2State *lasips2;
+    MemoryRegion reg;
+    PS2State *ps2dev;
+    uint8_t id;
+    uint8_t control;
+    uint8_t buf;
+    bool loopback_rbne;
+    qemu_irq irq;
+};
+
+#define TYPE_LASIPS2_KBD_PORT "lasips2-kbd-port"
+OBJECT_DECLARE_SIMPLE_TYPE(LASIPS2KbdPort, LASIPS2_KBD_PORT)
+
+struct LASIPS2KbdPort {
+    LASIPS2Port parent_obj;
+
+    PS2KbdState kbd;
+};
+
+#define TYPE_LASIPS2_MOUSE_PORT "lasips2-mouse-port"
+OBJECT_DECLARE_SIMPLE_TYPE(LASIPS2MousePort, LASIPS2_MOUSE_PORT)
+
+struct LASIPS2MousePort {
+    LASIPS2Port parent_obj;
+
+    PS2MouseState mouse;
+};
+
+struct LASIPS2State {
+    SysBusDevice parent_obj;
+
+    LASIPS2KbdPort kbd_port;
+    LASIPS2MousePort mouse_port;
+    uint8_t int_status;
+    qemu_irq irq;
+};
+
+#define TYPE_LASIPS2 "lasips2"
+OBJECT_DECLARE_SIMPLE_TYPE(LASIPS2State, LASIPS2)
+
+#endif /* HW_INPUT_LASIPS2_H */
diff --git a/include/hw/input/lm832x.h b/include/hw/input/lm832x.h
new file mode 100644 (file)
index 0000000..e0e5d5e
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * National Semiconductor LM8322/8323 GPIO keyboard & PWM chips.
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Written by Andrzej Zaborowski <andrew@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_INPUT_LM832X_H
+#define HW_INPUT_LM832X_H
+
+#define TYPE_LM8323 "lm8323"
+
+void lm832x_key_event(DeviceState *dev, int key, int state);
+
+#endif
diff --git a/include/hw/input/pl050.h b/include/hw/input/pl050.h
new file mode 100644 (file)
index 0000000..4cb8985
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Arm PrimeCell PL050 Keyboard / Mouse Interface
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Written by Paul Brook
+ *
+ * This code is licensed under the GPL.
+ */
+
+#ifndef HW_PL050_H
+#define HW_PL050_H
+
+#include "hw/sysbus.h"
+#include "migration/vmstate.h"
+#include "hw/input/ps2.h"
+#include "hw/irq.h"
+
+struct PL050DeviceClass {
+    SysBusDeviceClass parent_class;
+
+    DeviceRealize parent_realize;
+};
+
+#define TYPE_PL050 "pl050"
+OBJECT_DECLARE_TYPE(PL050State, PL050DeviceClass, PL050)
+
+struct PL050State {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    PS2State *ps2dev;
+    uint32_t cr;
+    uint32_t clk;
+    uint32_t last;
+    int pending;
+    qemu_irq irq;
+    bool is_mouse;
+};
+
+#define TYPE_PL050_KBD_DEVICE "pl050_keyboard"
+OBJECT_DECLARE_SIMPLE_TYPE(PL050KbdState, PL050_KBD_DEVICE)
+
+struct PL050KbdState {
+    PL050State parent_obj;
+
+    PS2KbdState kbd;
+};
+
+#define TYPE_PL050_MOUSE_DEVICE "pl050_mouse"
+OBJECT_DECLARE_SIMPLE_TYPE(PL050MouseState, PL050_MOUSE_DEVICE)
+
+struct PL050MouseState {
+    PL050State parent_obj;
+
+    PS2MouseState mouse;
+};
+
+#endif
diff --git a/include/hw/input/ps2.h b/include/hw/input/ps2.h
new file mode 100644 (file)
index 0000000..cd61a63
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * QEMU PS/2 keyboard/mouse emulation
+ *
+ * Copyright (C) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_PS2_H
+#define HW_PS2_H
+
+#include "hw/sysbus.h"
+
+#define PS2_MOUSE_BUTTON_LEFT   0x01
+#define PS2_MOUSE_BUTTON_RIGHT  0x02
+#define PS2_MOUSE_BUTTON_MIDDLE 0x04
+#define PS2_MOUSE_BUTTON_SIDE   0x08
+#define PS2_MOUSE_BUTTON_EXTRA  0x10
+
+struct PS2DeviceClass {
+    SysBusDeviceClass parent_class;
+
+    ResettablePhases parent_phases;
+};
+
+/*
+ * PS/2 buffer size. Keep 256 bytes for compatibility with
+ * older QEMU versions.
+ */
+#define PS2_BUFFER_SIZE     256
+
+typedef struct {
+    uint8_t data[PS2_BUFFER_SIZE];
+    int rptr, wptr, cwptr, count;
+} PS2Queue;
+
+/* Output IRQ */
+#define PS2_DEVICE_IRQ      0
+
+struct PS2State {
+    SysBusDevice parent_obj;
+
+    PS2Queue queue;
+    int32_t write_cmd;
+    qemu_irq irq;
+};
+
+#define TYPE_PS2_DEVICE "ps2-device"
+OBJECT_DECLARE_TYPE(PS2State, PS2DeviceClass, PS2_DEVICE)
+
+struct PS2KbdState {
+    PS2State parent_obj;
+
+    int scan_enabled;
+    int translate;
+    int scancode_set; /* 1=XT, 2=AT, 3=PS/2 */
+    int ledstate;
+    bool need_high_bit;
+    unsigned int modifiers; /* bitmask of MOD_* constants above */
+};
+
+#define TYPE_PS2_KBD_DEVICE "ps2-kbd"
+OBJECT_DECLARE_SIMPLE_TYPE(PS2KbdState, PS2_KBD_DEVICE)
+
+struct PS2MouseState {
+    PS2State parent_obj;
+
+    uint8_t mouse_status;
+    uint8_t mouse_resolution;
+    uint8_t mouse_sample_rate;
+    uint8_t mouse_wrap;
+    uint8_t mouse_type; /* 0 = PS2, 3 = IMPS/2, 4 = IMEX */
+    uint8_t mouse_detect_state;
+    int mouse_dx; /* current values, needed for 'poll' mode */
+    int mouse_dy;
+    int mouse_dz;
+    int mouse_dw;
+    uint8_t mouse_buttons;
+};
+
+#define TYPE_PS2_MOUSE_DEVICE "ps2-mouse"
+OBJECT_DECLARE_SIMPLE_TYPE(PS2MouseState, PS2_MOUSE_DEVICE)
+
+/* ps2.c */
+void ps2_write_mouse(PS2MouseState *s, int val);
+void ps2_write_keyboard(PS2KbdState *s, int val);
+uint32_t ps2_read_data(PS2State *s);
+void ps2_queue_noirq(PS2State *s, int b);
+void ps2_queue(PS2State *s, int b);
+void ps2_queue_2(PS2State *s, int b1, int b2);
+void ps2_queue_3(PS2State *s, int b1, int b2, int b3);
+void ps2_queue_4(PS2State *s, int b1, int b2, int b3, int b4);
+void ps2_keyboard_set_translation(PS2KbdState *s, int mode);
+void ps2_mouse_fake_event(PS2MouseState *s);
+int ps2_queue_empty(PS2State *s);
+
+#endif /* HW_PS2_H */
diff --git a/include/hw/input/stellaris_gamepad.h b/include/hw/input/stellaris_gamepad.h
new file mode 100644 (file)
index 0000000..51085e1
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Gamepad style buttons connected to IRQ/GPIO lines
+ *
+ * Copyright (c) 2007 CodeSourcery.
+ * Written by Paul Brook
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_INPUT_STELLARIS_GAMEPAD_H
+#define HW_INPUT_STELLARIS_GAMEPAD_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+/*
+ * QEMU interface:
+ *  + QOM array property "keycodes": uint32_t QEMU keycodes to handle
+ *    (these are QCodes, ie the Q_KEY_* values)
+ *  + unnamed GPIO outputs: one per keycode, in the same order as the
+ *    "keycodes" array property entries; asserted when key is down
+ */
+
+#define TYPE_STELLARIS_GAMEPAD "stellaris-gamepad"
+OBJECT_DECLARE_SIMPLE_TYPE(StellarisGamepad, STELLARIS_GAMEPAD)
+
+struct StellarisGamepad {
+    SysBusDevice parent_obj;
+
+    uint32_t num_buttons;
+    qemu_irq *irqs;
+    uint32_t *keycodes;
+    uint8_t *pressed;
+};
+
+#endif
diff --git a/include/hw/input/tsc2xxx.h b/include/hw/input/tsc2xxx.h
new file mode 100644 (file)
index 0000000..00eca17
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * TI touchscreen controller
+ *
+ * Copyright (c) 2006 Andrzej Zaborowski
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_INPUT_TSC2XXX_H
+#define HW_INPUT_TSC2XXX_H
+
+typedef struct MouseTransformInfo {
+    /* Touchscreen resolution */
+    int x;
+    int y;
+    /* Calibration values as used/generated by tslib */
+    int a[7];
+} MouseTransformInfo;
+
+typedef struct uWireSlave {
+    uint16_t (*receive)(void *opaque);
+    void (*send)(void *opaque, uint16_t data);
+    void *opaque;
+} uWireSlave;
+
+/* tsc210x.c */
+uWireSlave *tsc2102_init(qemu_irq pint);
+uWireSlave *tsc2301_init(qemu_irq penirq, qemu_irq kbirq, qemu_irq dav);
+I2SCodec *tsc210x_codec(uWireSlave *chip);
+uint32_t tsc210x_txrx(void *opaque, uint32_t value, int len);
+void tsc210x_set_transform(uWireSlave *chip, const MouseTransformInfo *info);
+void tsc210x_key_event(uWireSlave *chip, int key, int down);
+
+/* tsc2005.c */
+void *tsc2005_init(qemu_irq pintdav);
+uint32_t tsc2005_txrx(void *opaque, uint32_t value, int len);
+void tsc2005_set_transform(void *opaque, const MouseTransformInfo *info);
+
+#endif
diff --git a/include/hw/intc/allwinner-a10-pic.h b/include/hw/intc/allwinner-a10-pic.h
new file mode 100644 (file)
index 0000000..b8364d3
--- /dev/null
@@ -0,0 +1,43 @@
+#ifndef ALLWINNER_A10_PIC_H
+#define ALLWINNER_A10_PIC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_AW_A10_PIC  "allwinner-a10-pic"
+OBJECT_DECLARE_SIMPLE_TYPE(AwA10PICState, AW_A10_PIC)
+
+#define AW_A10_PIC_VECTOR       0
+#define AW_A10_PIC_BASE_ADDR    4
+#define AW_A10_PIC_PROTECT      8
+#define AW_A10_PIC_NMI          0xc
+#define AW_A10_PIC_IRQ_PENDING  0x10
+#define AW_A10_PIC_FIQ_PENDING  0x20
+#define AW_A10_PIC_SELECT       0x30
+#define AW_A10_PIC_ENABLE       0x40
+#define AW_A10_PIC_MASK         0x50
+
+#define AW_A10_PIC_INT_NR       95
+#define AW_A10_PIC_REG_NUM      DIV_ROUND_UP(AW_A10_PIC_INT_NR, 32)
+
+struct AwA10PICState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    MemoryRegion iomem;
+    qemu_irq parent_fiq;
+    qemu_irq parent_irq;
+
+    uint32_t vector;
+    uint32_t base_addr;
+    uint32_t protect;
+    uint32_t nmi;
+    uint32_t irq_pending[AW_A10_PIC_REG_NUM];
+    uint32_t fiq_pending[AW_A10_PIC_REG_NUM];
+    uint32_t select[AW_A10_PIC_REG_NUM];
+    uint32_t enable[AW_A10_PIC_REG_NUM];
+    uint32_t mask[AW_A10_PIC_REG_NUM];
+    /*priority setting here*/
+};
+
+#endif
diff --git a/include/hw/intc/arm_gic.h b/include/hw/intc/arm_gic.h
new file mode 100644 (file)
index 0000000..48f6a51
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * ARM GIC support
+ *
+ * Copyright (c) 2012 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * QEMU interface:
+ *  + QOM property "num-cpu": number of CPUs to support
+ *  + QOM property "num-irq": number of IRQs (including both SPIs and PPIs)
+ *  + QOM property "revision": GIC version (1 or 2), or 0 for the 11MPCore GIC
+ *  + QOM property "has-security-extensions": set true if the GIC should
+ *    implement the security extensions
+ *  + QOM property "has-virtualization-extensions": set true if the GIC should
+ *    implement the virtualization extensions
+ *  + unnamed GPIO inputs: (where P is number of SPIs, i.e. num-irq - 32)
+ *    [0..P-1]  SPIs
+ *    [P..P+31] PPIs for CPU 0
+ *    [P+32..P+63] PPIs for CPU 1
+ *    ...
+ *  + sysbus IRQs: (in order; number will vary depending on number of cores)
+ *    - IRQ for CPU 0
+ *    - IRQ for CPU 1
+ *      ...
+ *    - FIQ for CPU 0
+ *    - FIQ for CPU 1
+ *      ...
+ *    - VIRQ for CPU 0 (exists even if virt extensions not present)
+ *    - VIRQ for CPU 1 (exists even if virt extensions not present)
+ *      ...
+ *    - VFIQ for CPU 0 (exists even if virt extensions not present)
+ *    - VFIQ for CPU 1 (exists even if virt extensions not present)
+ *      ...
+ *    - maintenance IRQ for CPU i/f 0 (only if virt extensions present)
+ *    - maintenance IRQ for CPU i/f 1 (only if virt extensions present)
+ *  + sysbus MMIO regions: (in order; numbers will vary depending on
+ *    whether virtualization extensions are present and on number of cores)
+ *    - distributor registers (GICD*)
+ *    - CPU interface for the accessing core (GICC*)
+ *    - virtual interface control registers (GICH*) (only if virt extns present)
+ *    - virtual CPU interface for the accessing core (GICV*) (only if virt)
+ *    - CPU 0 CPU interface registers
+ *    - CPU 1 CPU interface registers
+ *      ...
+ *    - CPU 0 virtual interface control registers (only if virt extns present)
+ *    - CPU 1 virtual interface control registers (only if virt extns present)
+ *      ...
+ */
+
+#ifndef HW_ARM_GIC_H
+#define HW_ARM_GIC_H
+
+#include "arm_gic_common.h"
+#include "qom/object.h"
+
+/* Number of SGI target-list bits */
+#define GIC_TARGETLIST_BITS 8
+#define GIC_MAX_PRIORITY_BITS 8
+#define GIC_MIN_PRIORITY_BITS 4
+
+#define TYPE_ARM_GIC "arm_gic"
+typedef struct ARMGICClass ARMGICClass;
+/* This is reusing the GICState typedef from TYPE_ARM_GIC_COMMON */
+DECLARE_OBJ_CHECKERS(GICState, ARMGICClass,
+                     ARM_GIC, TYPE_ARM_GIC)
+
+struct ARMGICClass {
+    /*< private >*/
+    ARMGICCommonClass parent_class;
+    /*< public >*/
+
+    DeviceRealize parent_realize;
+};
+
+const char *gic_class_name(void);
+
+#endif
diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h
new file mode 100644 (file)
index 0000000..7080375
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * ARM GIC support
+ *
+ * Copyright (c) 2012 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_ARM_GIC_COMMON_H
+#define HW_ARM_GIC_COMMON_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+/* Maximum number of possible interrupts, determined by the GIC architecture */
+#define GIC_MAXIRQ 1020
+/* First 32 are private to each CPU (SGIs and PPIs). */
+#define GIC_INTERNAL 32
+#define GIC_NR_SGIS 16
+/* Maximum number of possible CPU interfaces, determined by GIC architecture */
+#define GIC_NCPU 8
+/* Maximum number of possible CPU interfaces with their respective vCPU */
+#define GIC_NCPU_VCPU (GIC_NCPU * 2)
+
+#define MAX_NR_GROUP_PRIO 128
+#define GIC_NR_APRS (MAX_NR_GROUP_PRIO / 32)
+
+#define GIC_MIN_BPR 0
+#define GIC_MIN_ABPR (GIC_MIN_BPR + 1)
+
+/* Architectural maximum number of list registers in the virtual interface */
+#define GIC_MAX_LR 64
+
+/* Only 32 priority levels and 32 preemption levels in the vCPU interfaces */
+#define GIC_VIRT_MAX_GROUP_PRIO_BITS 5
+#define GIC_VIRT_MAX_NR_GROUP_PRIO (1 << GIC_VIRT_MAX_GROUP_PRIO_BITS)
+#define GIC_VIRT_NR_APRS (GIC_VIRT_MAX_NR_GROUP_PRIO / 32)
+
+#define GIC_VIRT_MIN_BPR 2
+#define GIC_VIRT_MIN_ABPR (GIC_VIRT_MIN_BPR + 1)
+
+typedef struct gic_irq_state {
+    /* The enable bits are only banked for per-cpu interrupts.  */
+    uint8_t enabled;
+    uint8_t pending;
+    uint8_t active;
+    uint8_t level;
+    bool model; /* 0 = N:N, 1 = 1:N */
+    bool edge_trigger; /* true: edge-triggered, false: level-triggered  */
+    uint8_t group;
+} gic_irq_state;
+
+struct GICState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    qemu_irq parent_irq[GIC_NCPU];
+    qemu_irq parent_fiq[GIC_NCPU];
+    qemu_irq parent_virq[GIC_NCPU];
+    qemu_irq parent_vfiq[GIC_NCPU];
+    qemu_irq maintenance_irq[GIC_NCPU];
+
+    /* GICD_CTLR; for a GIC with the security extensions the NS banked version
+     * of this register is just an alias of bit 1 of the S banked version.
+     */
+    uint32_t ctlr;
+    /* GICC_CTLR; again, the NS banked version is just aliases of bits of
+     * the S banked register, so our state only needs to store the S version.
+     */
+    uint32_t cpu_ctlr[GIC_NCPU_VCPU];
+
+    gic_irq_state irq_state[GIC_MAXIRQ];
+    uint8_t irq_target[GIC_MAXIRQ];
+    uint8_t priority1[GIC_INTERNAL][GIC_NCPU];
+    uint8_t priority2[GIC_MAXIRQ - GIC_INTERNAL];
+    /* For each SGI on the target CPU, we store 8 bits
+     * indicating which source CPUs have made this SGI
+     * pending on the target CPU. These correspond to
+     * the bytes in the GIC_SPENDSGIR* registers as
+     * read by the target CPU.
+     */
+    uint8_t sgi_pending[GIC_NR_SGIS][GIC_NCPU];
+
+    uint16_t priority_mask[GIC_NCPU_VCPU];
+    uint16_t running_priority[GIC_NCPU_VCPU];
+    uint16_t current_pending[GIC_NCPU_VCPU];
+    uint32_t n_prio_bits;
+
+    /* If we present the GICv2 without security extensions to a guest,
+     * the guest can configure the GICC_CTLR to configure group 1 binary point
+     * in the abpr.
+     * For a GIC with Security Extensions we use use bpr for the
+     * secure copy and abpr as storage for the non-secure copy of the register.
+     */
+    uint8_t  bpr[GIC_NCPU_VCPU];
+    uint8_t  abpr[GIC_NCPU_VCPU];
+
+    /* The APR is implementation defined, so we choose a layout identical to
+     * the KVM ABI layout for QEMU's implementation of the gic:
+     * If an interrupt for preemption level X is active, then
+     *   APRn[X mod 32] == 0b1,  where n = X / 32
+     * otherwise the bit is clear.
+     */
+    uint32_t apr[GIC_NR_APRS][GIC_NCPU];
+    uint32_t nsapr[GIC_NR_APRS][GIC_NCPU];
+
+    /* Virtual interface control registers */
+    uint32_t h_hcr[GIC_NCPU];
+    uint32_t h_misr[GIC_NCPU];
+    uint32_t h_lr[GIC_MAX_LR][GIC_NCPU];
+    uint32_t h_apr[GIC_NCPU];
+
+    /* Number of LRs implemented in this GIC instance */
+    uint32_t num_lrs;
+
+    uint32_t num_cpu;
+
+    MemoryRegion iomem; /* Distributor */
+    /* This is just so we can have an opaque pointer which identifies
+     * both this GIC and which CPU interface we should be accessing.
+     */
+    struct GICState *backref[GIC_NCPU];
+    MemoryRegion cpuiomem[GIC_NCPU + 1]; /* CPU interfaces */
+    MemoryRegion vifaceiomem[GIC_NCPU + 1]; /* Virtual interfaces */
+    MemoryRegion vcpuiomem; /* vCPU interface */
+
+    uint32_t num_irq;
+    uint32_t revision;
+    bool security_extn;
+    bool virt_extn;
+    bool irq_reset_nonsecure; /* configure IRQs as group 1 (NS) on reset? */
+    int dev_fd; /* kvm device fd if backed by kvm vgic support */
+    Error *migration_blocker;
+};
+typedef struct GICState GICState;
+
+#define TYPE_ARM_GIC_COMMON "arm_gic_common"
+typedef struct ARMGICCommonClass ARMGICCommonClass;
+DECLARE_OBJ_CHECKERS(GICState, ARMGICCommonClass,
+                     ARM_GIC_COMMON, TYPE_ARM_GIC_COMMON)
+
+struct ARMGICCommonClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+    /*< public >*/
+
+    void (*pre_save)(GICState *s);
+    void (*post_load)(GICState *s);
+};
+
+void gic_init_irqs_and_mmio(GICState *s, qemu_irq_handler handler,
+                            const MemoryRegionOps *ops,
+                            const MemoryRegionOps *virt_ops);
+
+#endif
diff --git a/include/hw/intc/arm_gicv3.h b/include/hw/intc/arm_gicv3.h
new file mode 100644 (file)
index 0000000..a81a6ae
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * ARM Generic Interrupt Controller v3
+ *
+ * Copyright (c) 2015 Huawei.
+ * Copyright (c) 2016 Linaro Limited
+ * Written by Shlomo Pongratz, Peter Maydell
+ *
+ * This code is licensed under the GPL, version 2 or (at your option)
+ * any later version.
+ */
+
+#ifndef HW_ARM_GICV3_H
+#define HW_ARM_GICV3_H
+
+#include "arm_gicv3_common.h"
+#include "qom/object.h"
+
+#define TYPE_ARM_GICV3 "arm-gicv3"
+typedef struct ARMGICv3Class ARMGICv3Class;
+/* This is reusing the GICState typedef from TYPE_ARM_GICV3_COMMON */
+DECLARE_OBJ_CHECKERS(GICv3State, ARMGICv3Class,
+                     ARM_GICV3, TYPE_ARM_GICV3)
+
+struct ARMGICv3Class {
+    /*< private >*/
+    ARMGICv3CommonClass parent_class;
+    /*< public >*/
+
+    DeviceRealize parent_realize;
+};
+
+#endif
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
new file mode 100644 (file)
index 0000000..4e2fb51
--- /dev/null
@@ -0,0 +1,342 @@
+/*
+ * ARM GIC support
+ *
+ * Copyright (c) 2012 Linaro Limited
+ * Copyright (c) 2015 Huawei.
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Written by Peter Maydell
+ * Reworked for GICv3 by Shlomo Pongratz and Pavel Fedin
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_ARM_GICV3_COMMON_H
+#define HW_ARM_GICV3_COMMON_H
+
+#include "hw/sysbus.h"
+#include "hw/intc/arm_gic_common.h"
+#include "qom/object.h"
+
+/*
+ * Maximum number of possible interrupts, determined by the GIC architecture.
+ * Note that this does not include LPIs. When implemented, these should be
+ * dealt with separately.
+ */
+#define GICV3_MAXIRQ 1020
+#define GICV3_MAXSPI (GICV3_MAXIRQ - GIC_INTERNAL)
+
+#define GICV3_LPI_INTID_START 8192
+
+/*
+ * The redistributor in GICv3 has two 64KB frames per CPU; in
+ * GICv4 it has four 64KB frames per CPU.
+ */
+#define GICV3_REDIST_SIZE 0x20000
+#define GICV4_REDIST_SIZE 0x40000
+
+/* Number of SGI target-list bits */
+#define GICV3_TARGETLIST_BITS 16
+
+/* Maximum number of list registers (architectural limit) */
+#define GICV3_LR_MAX 16
+
+/* For some distributor fields we want to model the array of 32-bit
+ * register values which hold various bitmaps corresponding to enabled,
+ * pending, etc bits. These macros and functions facilitate that; the
+ * APIs are generally modelled on the generic bitmap.h functions
+ * (which are unsuitable here because they use 'unsigned long' as the
+ * underlying storage type, which is very awkward when you need to
+ * access the data as 32-bit values.)
+ * Each bitmap contains a bit for each interrupt. Although there is
+ * space for the PPIs and SGIs, those bits (the first 32) are never
+ * used as that state lives in the redistributor. The unused bits are
+ * provided purely so that interrupt X's state is always in bit X; this
+ * avoids bugs where we forget to subtract GIC_INTERNAL from an
+ * interrupt number.
+ */
+#define GICV3_BMP_SIZE DIV_ROUND_UP(GICV3_MAXIRQ, 32)
+
+#define GIC_DECLARE_BITMAP(name) \
+    uint32_t name[GICV3_BMP_SIZE]
+
+#define GIC_BIT_MASK(nr) (1U << ((nr) % 32))
+#define GIC_BIT_WORD(nr) ((nr) / 32)
+
+static inline void gic_bmp_set_bit(int nr, uint32_t *addr)
+{
+    uint32_t mask = GIC_BIT_MASK(nr);
+    uint32_t *p = addr + GIC_BIT_WORD(nr);
+
+    *p |= mask;
+}
+
+static inline void gic_bmp_clear_bit(int nr, uint32_t *addr)
+{
+    uint32_t mask = GIC_BIT_MASK(nr);
+    uint32_t *p = addr + GIC_BIT_WORD(nr);
+
+    *p &= ~mask;
+}
+
+static inline int gic_bmp_test_bit(int nr, const uint32_t *addr)
+{
+    return 1U & (addr[GIC_BIT_WORD(nr)] >> (nr & 31));
+}
+
+static inline void gic_bmp_replace_bit(int nr, uint32_t *addr, int val)
+{
+    uint32_t mask = GIC_BIT_MASK(nr);
+    uint32_t *p = addr + GIC_BIT_WORD(nr);
+
+    *p &= ~mask;
+    *p |= (val & 1U) << (nr % 32);
+}
+
+/* Return a pointer to the 32-bit word containing the specified bit. */
+static inline uint32_t *gic_bmp_ptr32(uint32_t *addr, int nr)
+{
+    return addr + GIC_BIT_WORD(nr);
+}
+
+typedef struct GICv3State GICv3State;
+typedef struct GICv3CPUState GICv3CPUState;
+
+/* Some CPU interface registers come in three flavours:
+ * Group0, Group1 (Secure) and Group1 (NonSecure)
+ * (where the latter two are exposed as a single banked system register).
+ * In the state struct they are implemented as a 3-element array which
+ * can be indexed into by the GICV3_G0, GICV3_G1 and GICV3_G1NS constants.
+ * If the CPU doesn't support EL3 then the G1 element is unused.
+ *
+ * These constants are also used to communicate the group to use for
+ * an interrupt or SGI when it is passed between the cpu interface and
+ * the redistributor or distributor. For those purposes the receiving end
+ * must be prepared to cope with a Group 1 Secure interrupt even if it does
+ * not have security support enabled, because security can be disabled
+ * independently in the CPU and in the GIC. In that case the receiver should
+ * treat an incoming Group 1 Secure interrupt as if it were Group 0.
+ * (This architectural requirement is why the _G1 element is the unused one
+ * in a no-EL3 CPU:  we would otherwise have to translate back and forth
+ * between (G0, G1NS) from the distributor and (G0, G1) in the CPU i/f.)
+ */
+#define GICV3_G0 0
+#define GICV3_G1 1
+#define GICV3_G1NS 2
+
+/* ICC_CTLR_EL1, GICD_STATUSR and GICR_STATUSR are banked but not
+ * group-related, so those indices are just 0 for S and 1 for NS.
+ * (If the CPU or the GIC, respectively, don't support the Security
+ * extensions then the S element is unused.)
+ */
+#define GICV3_S 0
+#define GICV3_NS 1
+
+typedef struct {
+    int irq;
+    uint8_t prio;
+    int grp;
+} PendingIrq;
+
+struct GICv3CPUState {
+    GICv3State *gic;
+    CPUState *cpu;
+    qemu_irq parent_irq;
+    qemu_irq parent_fiq;
+    qemu_irq parent_virq;
+    qemu_irq parent_vfiq;
+
+    /* Redistributor */
+    uint32_t level;                  /* Current IRQ level */
+    /* RD_base page registers */
+    uint32_t gicr_ctlr;
+    uint64_t gicr_typer;
+    uint32_t gicr_statusr[2];
+    uint32_t gicr_waker;
+    uint64_t gicr_propbaser;
+    uint64_t gicr_pendbaser;
+    /* SGI_base page registers */
+    uint32_t gicr_igroupr0;
+    uint32_t gicr_ienabler0;
+    uint32_t gicr_ipendr0;
+    uint32_t gicr_iactiver0;
+    uint32_t edge_trigger; /* ICFGR0 and ICFGR1 even bits */
+    uint32_t gicr_igrpmodr0;
+    uint32_t gicr_nsacr;
+    uint8_t gicr_ipriorityr[GIC_INTERNAL];
+    /* VLPI_base page registers */
+    uint64_t gicr_vpropbaser;
+    uint64_t gicr_vpendbaser;
+
+    /* CPU interface */
+    uint64_t icc_sre_el1;
+    uint64_t icc_ctlr_el1[2];
+    uint64_t icc_pmr_el1;
+    uint64_t icc_bpr[3];
+    uint64_t icc_apr[3][4];
+    uint64_t icc_igrpen[3];
+    uint64_t icc_ctlr_el3;
+
+    /* Virtualization control interface */
+    uint64_t ich_apr[3][4]; /* ich_apr[GICV3_G1][x] never used */
+    uint64_t ich_hcr_el2;
+    uint64_t ich_lr_el2[GICV3_LR_MAX];
+    uint64_t ich_vmcr_el2;
+
+    /* Properties of the CPU interface. These are initialized from
+     * the settings in the CPU proper.
+     * If the number of implemented list registers is 0 then the
+     * virtualization support is not implemented.
+     */
+    int num_list_regs;
+    int vpribits; /* number of virtual priority bits */
+    int vprebits; /* number of virtual preemption bits */
+    int pribits; /* number of physical priority bits */
+    int prebits; /* number of physical preemption bits */
+
+    /* Current highest priority pending interrupt for this CPU.
+     * This is cached information that can be recalculated from the
+     * real state above; it doesn't need to be migrated.
+     */
+    PendingIrq hppi;
+
+    /*
+     * Cached information recalculated from LPI tables
+     * in guest memory
+     */
+    PendingIrq hpplpi;
+
+    /* Cached information recalculated from vLPI tables in guest memory */
+    PendingIrq hppvlpi;
+
+    /* This is temporary working state, to avoid a malloc in gicv3_update() */
+    bool seenbetter;
+};
+
+/*
+ * The redistributor pages might be split into more than one region
+ * on some machine types if there are many CPUs.
+ */
+typedef struct GICv3RedistRegion {
+    GICv3State *gic;
+    MemoryRegion iomem;
+    uint32_t cpuidx; /* index of first CPU this region covers */
+} GICv3RedistRegion;
+
+struct GICv3State {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion iomem_dist; /* Distributor */
+    GICv3RedistRegion *redist_regions; /* Redistributor Regions */
+    uint32_t *redist_region_count; /* redistributor count within each region */
+    uint32_t nb_redist_regions; /* number of redist regions */
+
+    uint32_t num_cpu;
+    uint32_t num_irq;
+    uint32_t revision;
+    bool lpi_enable;
+    bool security_extn;
+    bool force_8bit_prio;
+    bool irq_reset_nonsecure;
+    bool gicd_no_migration_shift_bug;
+
+    int dev_fd; /* kvm device fd if backed by kvm vgic support */
+    Error *migration_blocker;
+
+    MemoryRegion *dma;
+    AddressSpace dma_as;
+
+    /* Distributor */
+
+    /* for a GIC with the security extensions the NS banked version of this
+     * register is just an alias of bit 1 of the S banked version.
+     */
+    uint32_t gicd_ctlr;
+    uint32_t gicd_statusr[2];
+    GIC_DECLARE_BITMAP(group);        /* GICD_IGROUPR */
+    GIC_DECLARE_BITMAP(grpmod);       /* GICD_IGRPMODR */
+    GIC_DECLARE_BITMAP(enabled);      /* GICD_ISENABLER */
+    GIC_DECLARE_BITMAP(pending);      /* GICD_ISPENDR */
+    GIC_DECLARE_BITMAP(active);       /* GICD_ISACTIVER */
+    GIC_DECLARE_BITMAP(level);        /* Current level */
+    GIC_DECLARE_BITMAP(edge_trigger); /* GICD_ICFGR even bits */
+    uint8_t gicd_ipriority[GICV3_MAXIRQ];
+    uint64_t gicd_irouter[GICV3_MAXIRQ];
+    /* Cached information: pointer to the cpu i/f for the CPUs specified
+     * in the IROUTER registers
+     */
+    GICv3CPUState *gicd_irouter_target[GICV3_MAXIRQ];
+    uint32_t gicd_nsacr[DIV_ROUND_UP(GICV3_MAXIRQ, 16)];
+
+    GICv3CPUState *cpu;
+    /* List of all ITSes connected to this GIC */
+    GPtrArray *itslist;
+};
+
+#define GICV3_BITMAP_ACCESSORS(BMP)                                     \
+    static inline void gicv3_gicd_##BMP##_set(GICv3State *s, int irq)   \
+    {                                                                   \
+        gic_bmp_set_bit(irq, s->BMP);                                   \
+    }                                                                   \
+    static inline int gicv3_gicd_##BMP##_test(GICv3State *s, int irq)   \
+    {                                                                   \
+        return gic_bmp_test_bit(irq, s->BMP);                           \
+    }                                                                   \
+    static inline void gicv3_gicd_##BMP##_clear(GICv3State *s, int irq) \
+    {                                                                   \
+        gic_bmp_clear_bit(irq, s->BMP);                                 \
+    }                                                                   \
+    static inline void gicv3_gicd_##BMP##_replace(GICv3State *s,        \
+                                                  int irq, int value)   \
+    {                                                                   \
+        gic_bmp_replace_bit(irq, s->BMP, value);                        \
+    }
+
+GICV3_BITMAP_ACCESSORS(group)
+GICV3_BITMAP_ACCESSORS(grpmod)
+GICV3_BITMAP_ACCESSORS(enabled)
+GICV3_BITMAP_ACCESSORS(pending)
+GICV3_BITMAP_ACCESSORS(active)
+GICV3_BITMAP_ACCESSORS(level)
+GICV3_BITMAP_ACCESSORS(edge_trigger)
+
+#define TYPE_ARM_GICV3_COMMON "arm-gicv3-common"
+typedef struct ARMGICv3CommonClass ARMGICv3CommonClass;
+DECLARE_OBJ_CHECKERS(GICv3State, ARMGICv3CommonClass,
+                     ARM_GICV3_COMMON, TYPE_ARM_GICV3_COMMON)
+
+struct ARMGICv3CommonClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+    /*< public >*/
+
+    void (*pre_save)(GICv3State *s);
+    void (*post_load)(GICv3State *s);
+};
+
+void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
+                              const MemoryRegionOps *ops);
+
+/**
+ * gicv3_class_name
+ *
+ * Return name of GICv3 class to use depending on whether KVM acceleration is
+ * in use. May throw an error if the chosen implementation is not available.
+ *
+ * Returns: class name to use
+ */
+const char *gicv3_class_name(void);
+
+#endif
diff --git a/include/hw/intc/arm_gicv3_its_common.h b/include/hw/intc/arm_gicv3_its_common.h
new file mode 100644 (file)
index 0000000..7dc712b
--- /dev/null
@@ -0,0 +1,135 @@
+/*
+ * ITS support for ARM GICv3
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Written by Pavel Fedin
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_ARM_GICV3_ITS_COMMON_H
+#define QEMU_ARM_GICV3_ITS_COMMON_H
+
+#include "hw/sysbus.h"
+#include "hw/intc/arm_gicv3_common.h"
+#include "qom/object.h"
+
+#define TYPE_ARM_GICV3_ITS "arm-gicv3-its"
+
+#define ITS_CONTROL_SIZE 0x10000
+#define ITS_TRANS_SIZE   0x10000
+#define ITS_SIZE         (ITS_CONTROL_SIZE + ITS_TRANS_SIZE)
+
+#define GITS_CTLR        0x0
+#define GITS_IIDR        0x4
+#define GITS_TYPER       0x8
+#define GITS_CBASER      0x80
+#define GITS_CWRITER     0x88
+#define GITS_CREADR      0x90
+#define GITS_BASER       0x100
+
+#define GITS_TRANSLATER  0x0040
+
+typedef struct {
+    bool indirect;
+    uint16_t entry_sz;
+    uint32_t page_sz;
+    uint32_t num_entries;
+    uint64_t base_addr;
+} TableDesc;
+
+typedef struct {
+    uint32_t num_entries;
+    uint64_t base_addr;
+} CmdQDesc;
+
+struct GICv3ITSState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem_main;
+    MemoryRegion iomem_its_cntrl;
+    MemoryRegion iomem_its_translation;
+
+    GICv3State *gicv3;
+
+    int dev_fd; /* kvm device fd if backed by kvm vgic support */
+    uint64_t gits_translater_gpa;
+    bool translater_gpa_known;
+
+    /* Registers */
+    uint32_t ctlr;
+    uint32_t iidr;
+    uint64_t typer;
+    uint64_t cbaser;
+    uint64_t cwriter;
+    uint64_t creadr;
+    uint64_t baser[8];
+
+    TableDesc  dt;
+    TableDesc  ct;
+    TableDesc  vpet;
+    CmdQDesc   cq;
+
+    Error *migration_blocker;
+};
+
+typedef struct GICv3ITSState GICv3ITSState;
+
+void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops,
+                   const MemoryRegionOps *tops);
+
+/*
+ * The ITS should call this when it is realized to add itself
+ * to its GIC's list of connected ITSes.
+ */
+static inline void gicv3_add_its(GICv3State *s, DeviceState *its)
+{
+    g_ptr_array_add(s->itslist, its);
+}
+
+/*
+ * The ITS can use this for operations that must be performed on
+ * every ITS connected to the same GIC that it is
+ */
+static inline void gicv3_foreach_its(GICv3State *s, GFunc func, void *opaque)
+{
+    g_ptr_array_foreach(s->itslist, func, opaque);
+}
+
+#define TYPE_ARM_GICV3_ITS_COMMON "arm-gicv3-its-common"
+typedef struct GICv3ITSCommonClass GICv3ITSCommonClass;
+DECLARE_OBJ_CHECKERS(GICv3ITSState, GICv3ITSCommonClass,
+                     ARM_GICV3_ITS_COMMON, TYPE_ARM_GICV3_ITS_COMMON)
+
+struct GICv3ITSCommonClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+    /*< public >*/
+
+    int (*send_msi)(GICv3ITSState *s, uint32_t data, uint16_t devid);
+    void (*pre_save)(GICv3ITSState *s);
+    void (*post_load)(GICv3ITSState *s);
+};
+
+/**
+ * its_class_name:
+ *
+ * Return the ITS class name to use depending on whether KVM acceleration
+ * and KVM CAP_SIGNAL_MSI are supported
+ *
+ * Returns: class name to use or NULL
+ */
+const char *its_class_name(void);
+
+#endif
diff --git a/include/hw/intc/armv7m_nvic.h b/include/hw/intc/armv7m_nvic.h
new file mode 100644 (file)
index 0000000..6b4ae56
--- /dev/null
@@ -0,0 +1,209 @@
+/*
+ * ARMv7M NVIC object
+ *
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This code is licensed under the GPL version 2 or later.
+ */
+
+#ifndef HW_ARM_ARMV7M_NVIC_H
+#define HW_ARM_ARMV7M_NVIC_H
+
+#include "target/arm/cpu.h"
+#include "hw/sysbus.h"
+#include "hw/timer/armv7m_systick.h"
+#include "qom/object.h"
+
+#define TYPE_NVIC "armv7m_nvic"
+OBJECT_DECLARE_SIMPLE_TYPE(NVICState, NVIC)
+
+/* Highest permitted number of exceptions (architectural limit) */
+#define NVIC_MAX_VECTORS 512
+/* Number of internal exceptions */
+#define NVIC_INTERNAL_VECTORS 16
+
+typedef struct VecInfo {
+    /* Exception priorities can range from -3 to 255; only the unmodifiable
+     * priority values for RESET, NMI and HardFault can be negative.
+     */
+    int16_t prio;
+    uint8_t enabled;
+    uint8_t pending;
+    uint8_t active;
+    uint8_t level; /* exceptions <=15 never set level */
+} VecInfo;
+
+struct NVICState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    ARMCPU *cpu;
+
+    VecInfo vectors[NVIC_MAX_VECTORS];
+    /* If the v8M security extension is implemented, some of the internal
+     * exceptions are banked between security states (ie there exists both
+     * a Secure and a NonSecure version of the exception and its state):
+     *  HardFault, MemManage, UsageFault, SVCall, PendSV, SysTick (R_PJHV)
+     * The rest (including all the external exceptions) are not banked, though
+     * they may be configurable to target either Secure or NonSecure state.
+     * We store the secure exception state in sec_vectors[] for the banked
+     * exceptions, and otherwise use only vectors[] (including for exceptions
+     * like SecureFault that unconditionally target Secure state).
+     * Entries in sec_vectors[] for non-banked exception numbers are unused.
+     */
+    VecInfo sec_vectors[NVIC_INTERNAL_VECTORS];
+    /* The PRIGROUP field in AIRCR is banked */
+    uint32_t prigroup[M_REG_NUM_BANKS];
+    uint8_t num_prio_bits;
+
+    /* v8M NVIC_ITNS state (stored as a bool per bit) */
+    bool itns[NVIC_MAX_VECTORS];
+
+    /* The following fields are all cached state that can be recalculated
+     * from the vectors[] and sec_vectors[] arrays and the prigroup field:
+     *  - vectpending
+     *  - vectpending_is_secure
+     *  - exception_prio
+     *  - vectpending_prio
+     */
+    unsigned int vectpending; /* highest prio pending enabled exception */
+    /* true if vectpending is a banked secure exception, ie it is in
+     * sec_vectors[] rather than vectors[]
+     */
+    bool vectpending_is_s_banked;
+    int exception_prio; /* group prio of the highest prio active exception */
+    int vectpending_prio; /* group prio of the exception in vectpending */
+
+    MemoryRegion sysregmem;
+
+    uint32_t num_irq;
+    qemu_irq excpout;
+    qemu_irq sysresetreq;
+};
+
+/* Interface between CPU and Interrupt controller.  */
+/**
+ * armv7m_nvic_set_pending: mark the specified exception as pending
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Marks the specified exception as pending. Note that we will assert()
+ * if @secure is true and @irq does not specify one of the fixed set
+ * of architecturally banked exceptions.
+ */
+void armv7m_nvic_set_pending(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_set_pending_derived: mark this derived exception as pending
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Similar to armv7m_nvic_set_pending(), but specifically for derived
+ * exceptions (exceptions generated in the course of trying to take
+ * a different exception).
+ */
+void armv7m_nvic_set_pending_derived(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_set_pending_lazyfp: mark this lazy FP exception as pending
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Similar to armv7m_nvic_set_pending(), but specifically for exceptions
+ * generated in the course of lazy stacking of FP registers.
+ */
+void armv7m_nvic_set_pending_lazyfp(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_get_pending_irq_info: return highest priority pending
+ *    exception, and whether it targets Secure state
+ * @s: the NVIC
+ * @pirq: set to pending exception number
+ * @ptargets_secure: set to whether pending exception targets Secure
+ *
+ * This function writes the number of the highest priority pending
+ * exception (the one which would be made active by
+ * armv7m_nvic_acknowledge_irq()) to @pirq, and sets @ptargets_secure
+ * to true if the current highest priority pending exception should
+ * be taken to Secure state, false for NS.
+ */
+void armv7m_nvic_get_pending_irq_info(NVICState *s, int *pirq,
+                                      bool *ptargets_secure);
+/**
+ * armv7m_nvic_acknowledge_irq: make highest priority pending exception active
+ * @s: the NVIC
+ *
+ * Move the current highest priority pending exception from the pending
+ * state to the active state, and update v7m.exception to indicate that
+ * it is the exception currently being handled.
+ */
+void armv7m_nvic_acknowledge_irq(NVICState *s);
+/**
+ * armv7m_nvic_complete_irq: complete specified interrupt or exception
+ * @s: the NVIC
+ * @irq: the exception number to complete
+ * @secure: true if this exception was secure
+ *
+ * Returns: -1 if the irq was not active
+ *           1 if completing this irq brought us back to base (no active irqs)
+ *           0 if there is still an irq active after this one was completed
+ * (Ignoring -1, this is the same as the RETTOBASE value before completion.)
+ */
+int armv7m_nvic_complete_irq(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure)
+ * @s: the NVIC
+ * @irq: the exception number to mark pending
+ * @secure: false for non-banked exceptions or for the nonsecure
+ * version of a banked exception, true for the secure version of a banked
+ * exception.
+ *
+ * Return whether an exception is "ready", i.e. whether the exception is
+ * enabled and is configured at a priority which would allow it to
+ * interrupt the current execution priority. This controls whether the
+ * RDY bit for it in the FPCCR is set.
+ */
+bool armv7m_nvic_get_ready_status(NVICState *s, int irq, bool secure);
+/**
+ * armv7m_nvic_raw_execution_priority: return the raw execution priority
+ * @s: the NVIC
+ *
+ * Returns: the raw execution priority as defined by the v8M architecture.
+ * This is the execution priority minus the effects of AIRCR.PRIS,
+ * and minus any PRIMASK/FAULTMASK/BASEPRI priority boosting.
+ * (v8M ARM ARM I_PKLD.)
+ */
+int armv7m_nvic_raw_execution_priority(NVICState *s);
+/**
+ * armv7m_nvic_neg_prio_requested: return true if the requested execution
+ * priority is negative for the specified security state.
+ * @s: the NVIC
+ * @secure: the security state to test
+ * This corresponds to the pseudocode IsReqExecPriNeg().
+ */
+#ifndef CONFIG_USER_ONLY
+bool armv7m_nvic_neg_prio_requested(NVICState *s, bool secure);
+#else
+static inline bool armv7m_nvic_neg_prio_requested(NVICState *s, bool secure)
+{
+    return false;
+}
+#endif
+#ifndef CONFIG_USER_ONLY
+bool armv7m_nvic_can_take_pending_exception(NVICState *s);
+#else
+static inline bool armv7m_nvic_can_take_pending_exception(NVICState *s)
+{
+    return true;
+}
+#endif
+
+#endif
diff --git a/include/hw/intc/aspeed_vic.h b/include/hw/intc/aspeed_vic.h
new file mode 100644 (file)
index 0000000..68d6ab9
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * ASPEED Interrupt Controller (New)
+ *
+ * Andrew Jeffery <andrew@aj.id.au>
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Need to add SVIC and CVIC support
+ */
+#ifndef ASPEED_VIC_H
+#define ASPEED_VIC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_ASPEED_VIC "aspeed.vic"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedVICState, ASPEED_VIC)
+
+#define ASPEED_VIC_NR_IRQS 51
+
+struct AspeedVICState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    qemu_irq irq;
+    qemu_irq fiq;
+
+    uint64_t level;
+    uint64_t raw;
+    uint64_t select;
+    uint64_t enable;
+    uint64_t trigger;
+
+    /* 0=edge, 1=level */
+    uint64_t sense;
+
+    /* 0=single-edge, 1=dual-edge */
+    uint64_t dual_edge;
+
+    /* 0=low-sensitive/falling-edge, 1=high-sensitive/rising-edge */
+    uint64_t event;
+};
+
+#endif /* ASPEED_VIC_H */
diff --git a/include/hw/intc/bcm2835_ic.h b/include/hw/intc/bcm2835_ic.h
new file mode 100644 (file)
index 0000000..588eb76
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Raspberry Pi emulation (c) 2012 Gregory Estrade
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_IC_H
+#define BCM2835_IC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_BCM2835_IC "bcm2835-ic"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835ICState, BCM2835_IC)
+
+#define BCM2835_IC_GPU_IRQ "gpu-irq"
+#define BCM2835_IC_ARM_IRQ "arm-irq"
+
+struct BCM2835ICState {
+    /*< private >*/
+    SysBusDevice busdev;
+    /*< public >*/
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+    qemu_irq fiq;
+
+    /* 64 GPU IRQs + 8 ARM IRQs = 72 total (GPU first) */
+    uint64_t gpu_irq_level, gpu_irq_enable;
+    uint8_t arm_irq_level, arm_irq_enable;
+    bool fiq_enable;
+    uint8_t fiq_select;
+};
+
+#endif
diff --git a/include/hw/intc/bcm2836_control.h b/include/hw/intc/bcm2836_control.h
new file mode 100644 (file)
index 0000000..a410c81
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Raspberry Pi emulation (c) 2012 Gregory Estrade
+ * Upstreaming code cleanup [including bcm2835_*] (c) 2013 Jan Petrous
+ *
+ * Rasperry Pi 2 emulation and refactoring Copyright (c) 2015, Microsoft
+ * Written by Andrew Baumann
+ *
+ * ARM Local Timer IRQ Copyright (c) 2019. Zoltán Baldaszti
+ * Added basic IRQ_TIMER interrupt support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2836_CONTROL_H
+#define BCM2836_CONTROL_H
+
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+
+/* 4 mailboxes per core, for 16 total */
+#define BCM2836_NCORES 4
+#define BCM2836_MBPERCORE 4
+
+#define TYPE_BCM2836_CONTROL "bcm2836-control"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2836ControlState, BCM2836_CONTROL)
+
+struct BCM2836ControlState {
+    /*< private >*/
+    SysBusDevice busdev;
+    /*< public >*/
+    MemoryRegion iomem;
+
+    /* mailbox state */
+    uint32_t mailboxes[BCM2836_NCORES * BCM2836_MBPERCORE];
+
+    /* interrupt routing/control registers */
+    uint8_t route_gpu_irq, route_gpu_fiq;
+    uint32_t timercontrol[BCM2836_NCORES];
+    uint32_t mailboxcontrol[BCM2836_NCORES];
+
+    /* interrupt status regs (derived from input pins; not visible to user) */
+    bool gpu_irq, gpu_fiq;
+    uint8_t timerirqs[BCM2836_NCORES];
+
+    /* local timer */
+    QEMUTimer timer;
+    uint32_t local_timer_control;
+    uint8_t route_localtimer;
+
+    /* interrupt source registers, post-routing (also input-derived; visible) */
+    uint32_t irqsrc[BCM2836_NCORES];
+    uint32_t fiqsrc[BCM2836_NCORES];
+
+    /* outputs to CPU cores */
+    qemu_irq irq[BCM2836_NCORES];
+    qemu_irq fiq[BCM2836_NCORES];
+};
+
+#endif
diff --git a/include/hw/intc/exynos4210_combiner.h b/include/hw/intc/exynos4210_combiner.h
new file mode 100644 (file)
index 0000000..bd207a7
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Samsung exynos4210 Interrupt Combiner
+ *
+ * Copyright (c) 2000 - 2011 Samsung Electronics Co., Ltd.
+ * All rights reserved.
+ *
+ * Evgeny Voevodin <e.voevodin@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_INTC_EXYNOS4210_COMBINER_H
+#define HW_INTC_EXYNOS4210_COMBINER_H
+
+#include "hw/sysbus.h"
+
+/*
+ * State for each output signal of internal combiner
+ */
+typedef struct CombinerGroupState {
+    uint8_t src_mask;            /* 1 - source enabled, 0 - disabled */
+    uint8_t src_pending;        /* Pending source interrupts before masking */
+} CombinerGroupState;
+
+#define TYPE_EXYNOS4210_COMBINER "exynos4210.combiner"
+OBJECT_DECLARE_SIMPLE_TYPE(Exynos4210CombinerState, EXYNOS4210_COMBINER)
+
+/* Number of groups and total number of interrupts for the internal combiner */
+#define IIC_NGRP 64
+#define IIC_NIRQ (IIC_NGRP * 8)
+#define IIC_REGSET_SIZE 0x41
+
+struct Exynos4210CombinerState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+
+    struct CombinerGroupState group[IIC_NGRP];
+    uint32_t reg_set[IIC_REGSET_SIZE];
+    uint32_t icipsr[2];
+    uint32_t external;          /* 1 means that this combiner is external */
+
+    qemu_irq output_irq[IIC_NGRP];
+};
+
+#endif
diff --git a/include/hw/intc/exynos4210_gic.h b/include/hw/intc/exynos4210_gic.h
new file mode 100644 (file)
index 0000000..f64c406
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Samsung exynos4210 GIC implementation. Based on hw/arm_gic.c
+ *
+ * Copyright (c) 2000 - 2011 Samsung Electronics Co., Ltd.
+ * All rights reserved.
+ *
+ * Evgeny Voevodin <e.voevodin@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef HW_INTC_EXYNOS4210_GIC_H
+#define HW_INTC_EXYNOS4210_GIC_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_EXYNOS4210_GIC "exynos4210.gic"
+OBJECT_DECLARE_SIMPLE_TYPE(Exynos4210GicState, EXYNOS4210_GIC)
+
+#define EXYNOS4210_GIC_NCPUS 2
+
+struct Exynos4210GicState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion cpu_container;
+    MemoryRegion dist_container;
+    MemoryRegion cpu_alias[EXYNOS4210_GIC_NCPUS];
+    MemoryRegion dist_alias[EXYNOS4210_GIC_NCPUS];
+    uint32_t num_cpu;
+    DeviceState *gic;
+};
+
+#endif
diff --git a/include/hw/intc/goldfish_pic.h b/include/hw/intc/goldfish_pic.h
new file mode 100644 (file)
index 0000000..3e79580
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Goldfish PIC
+ *
+ * (c) 2020 Laurent Vivier <laurent@vivier.eu>
+ *
+ */
+
+#ifndef HW_INTC_GOLDFISH_PIC_H
+#define HW_INTC_GOLDFISH_PIC_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_GOLDFISH_PIC "goldfish_pic"
+OBJECT_DECLARE_SIMPLE_TYPE(GoldfishPICState, GOLDFISH_PIC)
+
+#define GOLDFISH_PIC_IRQ_NB 32
+
+struct GoldfishPICState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    uint32_t pending;
+    uint32_t enabled;
+
+    /* statistics */
+    uint64_t stats_irq_count[32];
+    /* for tracing */
+    uint8_t idx;
+};
+
+#endif
diff --git a/include/hw/intc/heathrow_pic.h b/include/hw/intc/heathrow_pic.h
new file mode 100644 (file)
index 0000000..c0a7f6f
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Heathrow PIC support (OldWorld PowerMac)
+ *
+ * Copyright (c) 2005-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_INTC_HEATHROW_PIC_H
+#define HW_INTC_HEATHROW_PIC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_HEATHROW "heathrow"
+OBJECT_DECLARE_SIMPLE_TYPE(HeathrowState, HEATHROW)
+
+typedef struct HeathrowPICState {
+    uint32_t events;
+    uint32_t mask;
+    uint32_t levels;
+    uint32_t level_triggered;
+} HeathrowPICState;
+
+struct HeathrowState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mem;
+    HeathrowPICState pics[2];
+    qemu_irq irqs[1];
+};
+
+#define HEATHROW_NUM_IRQS 64
+
+#endif /* HW_INTC_HEATHROW_PIC_H */
diff --git a/include/hw/intc/i8259.h b/include/hw/intc/i8259.h
new file mode 100644 (file)
index 0000000..c412575
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef HW_I8259_H
+#define HW_I8259_H
+
+/* i8259.c */
+
+extern PICCommonState *isa_pic;
+
+/*
+ * i8259_init()
+ *
+ * Create a i8259 device on an ISA @bus,
+ * connect its output to @parent_irq_in,
+ * return an (allocated) array of 16 input IRQs.
+ */
+qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq_in);
+qemu_irq *kvm_i8259_init(ISABus *bus);
+int pic_get_output(PICCommonState *s);
+int pic_read_irq(PICCommonState *s);
+
+#endif
diff --git a/include/hw/intc/imx_avic.h b/include/hw/intc/imx_avic.h
new file mode 100644 (file)
index 0000000..75fbd1a
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * i.MX31 Vectored Interrupt Controller
+ *
+ * Note this is NOT the PL192 provided by ARM, but
+ * a custom implementation by Freescale.
+ *
+ * Copyright (c) 2008 OKL
+ * Copyright (c) 2011 NICTA Pty Ltd
+ * Originally written by Hans Jiang
+ * Updated by Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ *
+ * TODO: implement vectors.
+ */
+#ifndef IMX_AVIC_H
+#define IMX_AVIC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_IMX_AVIC "imx.avic"
+OBJECT_DECLARE_SIMPLE_TYPE(IMXAVICState, IMX_AVIC)
+
+#define IMX_AVIC_NUM_IRQS 64
+
+/* Interrupt Control Bits */
+#define ABFLAG (1<<25)
+#define ABFEN  (1<<24)
+#define NIDIS  (1<<22) /* Normal Interrupt disable */
+#define FIDIS  (1<<21) /* Fast interrupt disable */
+#define NIAD   (1<<20) /* Normal Interrupt Arbiter Rise ARM level */
+#define FIAD   (1<<19) /* Fast Interrupt Arbiter Rise ARM level */
+#define NM     (1<<18) /* Normal interrupt mode */
+
+#define PRIO_PER_WORD (sizeof(uint32_t) * 8 / 4)
+#define PRIO_WORDS (IMX_AVIC_NUM_IRQS/PRIO_PER_WORD)
+
+struct IMXAVICState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    uint64_t pending;
+    uint64_t enabled;
+    uint64_t is_fiq;
+    uint32_t intcntl;
+    uint32_t intmask;
+    qemu_irq irq;
+    qemu_irq fiq;
+    uint32_t prio[PRIO_WORDS]; /* Priorities are 4-bits each */
+};
+
+#endif /* IMX_AVIC_H */
diff --git a/include/hw/intc/imx_gpcv2.h b/include/hw/intc/imx_gpcv2.h
new file mode 100644 (file)
index 0000000..7bdee7e
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef IMX_GPCV2_H
+#define IMX_GPCV2_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+enum IMXGPCv2Registers {
+    GPC_NUM        = 0xE00 / sizeof(uint32_t),
+};
+
+struct IMXGPCv2State {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    uint32_t     regs[GPC_NUM];
+};
+
+#define TYPE_IMX_GPCV2 "imx-gpcv2"
+OBJECT_DECLARE_SIMPLE_TYPE(IMXGPCv2State, IMX_GPCV2)
+
+#endif /* IMX_GPCV2_H */
diff --git a/include/hw/intc/intc.h b/include/hw/intc/intc.h
new file mode 100644 (file)
index 0000000..7018f60
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef INTC_H
+#define INTC_H
+
+#include "qom/object.h"
+
+#define TYPE_INTERRUPT_STATS_PROVIDER "intctrl"
+
+typedef struct InterruptStatsProviderClass InterruptStatsProviderClass;
+DECLARE_CLASS_CHECKERS(InterruptStatsProviderClass, INTERRUPT_STATS_PROVIDER,
+                       TYPE_INTERRUPT_STATS_PROVIDER)
+#define INTERRUPT_STATS_PROVIDER(obj) \
+    INTERFACE_CHECK(InterruptStatsProvider, (obj), \
+                    TYPE_INTERRUPT_STATS_PROVIDER)
+
+typedef struct InterruptStatsProvider InterruptStatsProvider;
+
+struct InterruptStatsProviderClass {
+    InterfaceClass parent;
+
+    /* The returned pointer and statistics must remain valid until
+     * the BQL is next dropped.
+     */
+    bool (*get_statistics)(InterruptStatsProvider *obj, uint64_t **irq_counts,
+                           unsigned int *nb_irqs);
+    void (*print_info)(InterruptStatsProvider *obj, Monitor *mon);
+};
+
+#endif
diff --git a/include/hw/intc/ioapic.h b/include/hw/intc/ioapic.h
new file mode 100644 (file)
index 0000000..aa122e2
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ *  ioapic.c IOAPIC emulation logic
+ *
+ *  Copyright (c) 2011 Jan Kiszka, Siemens AG
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_INTC_IOAPIC_H
+#define HW_INTC_IOAPIC_H
+
+#define IOAPIC_NUM_PINS 24
+#define IO_APIC_DEFAULT_ADDRESS 0xfec00000
+#define IO_APIC_SECONDARY_ADDRESS (IO_APIC_DEFAULT_ADDRESS + 0x10000)
+#define IO_APIC_SECONDARY_IRQBASE 24 /* primary 0 -> 23, secondary 24 -> 47 */
+
+#define TYPE_KVM_IOAPIC "kvm-ioapic"
+#define TYPE_IOAPIC "ioapic"
+
+void ioapic_eoi_broadcast(int vector);
+
+#endif /* HW_INTC_IOAPIC_H */
diff --git a/include/hw/intc/kvm_irqcount.h b/include/hw/intc/kvm_irqcount.h
new file mode 100644 (file)
index 0000000..0ed5999
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#ifndef KVM_IRQCOUNT_H
+#define KVM_IRQCOUNT_H
+
+void kvm_report_irq_delivered(int delivered);
+void kvm_reset_irq_delivered(void);
+int kvm_get_irq_delivered(void);
+
+#endif
diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h
new file mode 100644 (file)
index 0000000..fbdef9a
--- /dev/null
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch 3A5000 ext interrupt controller definitions
+ *
+ * Copyright (C) 2021 Loongson Technology Corporation Limited
+ */
+
+#include "hw/sysbus.h"
+#include "hw/loongarch/virt.h"
+
+#ifndef LOONGARCH_EXTIOI_H
+#define LOONGARCH_EXTIOI_H
+
+#define LS3A_INTC_IP               8
+#define EXTIOI_IRQS                (256)
+#define EXTIOI_IRQS_BITMAP_SIZE    (256 / 8)
+/* irq from EXTIOI is routed to no more than 4 cpus */
+#define EXTIOI_CPUS                (4)
+/* map to ipnum per 32 irqs */
+#define EXTIOI_IRQS_IPMAP_SIZE     (256 / 32)
+#define EXTIOI_IRQS_COREMAP_SIZE   256
+#define EXTIOI_IRQS_NODETYPE_COUNT  16
+#define EXTIOI_IRQS_GROUP_COUNT    8
+
+#define APIC_OFFSET                  0x400
+#define APIC_BASE                    (0x1000ULL + APIC_OFFSET)
+
+#define EXTIOI_NODETYPE_START        (0x4a0 - APIC_OFFSET)
+#define EXTIOI_NODETYPE_END          (0x4c0 - APIC_OFFSET)
+#define EXTIOI_IPMAP_START           (0x4c0 - APIC_OFFSET)
+#define EXTIOI_IPMAP_END             (0x4c8 - APIC_OFFSET)
+#define EXTIOI_ENABLE_START          (0x600 - APIC_OFFSET)
+#define EXTIOI_ENABLE_END            (0x620 - APIC_OFFSET)
+#define EXTIOI_BOUNCE_START          (0x680 - APIC_OFFSET)
+#define EXTIOI_BOUNCE_END            (0x6a0 - APIC_OFFSET)
+#define EXTIOI_ISR_START             (0x700 - APIC_OFFSET)
+#define EXTIOI_ISR_END               (0x720 - APIC_OFFSET)
+#define EXTIOI_COREISR_START         (0x800 - APIC_OFFSET)
+#define EXTIOI_COREISR_END           (0xB20 - APIC_OFFSET)
+#define EXTIOI_COREMAP_START         (0xC00 - APIC_OFFSET)
+#define EXTIOI_COREMAP_END           (0xD00 - APIC_OFFSET)
+
+#define TYPE_LOONGARCH_EXTIOI "loongarch.extioi"
+OBJECT_DECLARE_SIMPLE_TYPE(LoongArchExtIOI, LOONGARCH_EXTIOI)
+struct LoongArchExtIOI {
+    SysBusDevice parent_obj;
+    /* hardware state */
+    uint32_t nodetype[EXTIOI_IRQS_NODETYPE_COUNT / 2];
+    uint32_t bounce[EXTIOI_IRQS_GROUP_COUNT];
+    uint32_t isr[EXTIOI_IRQS / 32];
+    uint32_t coreisr[EXTIOI_CPUS][EXTIOI_IRQS_GROUP_COUNT];
+    uint32_t enable[EXTIOI_IRQS / 32];
+    uint32_t ipmap[EXTIOI_IRQS_IPMAP_SIZE / 4];
+    uint32_t coremap[EXTIOI_IRQS / 4];
+    uint32_t sw_pending[EXTIOI_IRQS / 32];
+    DECLARE_BITMAP(sw_isr[EXTIOI_CPUS][LS3A_INTC_IP], EXTIOI_IRQS);
+    uint8_t  sw_ipmap[EXTIOI_IRQS_IPMAP_SIZE];
+    uint8_t  sw_coremap[EXTIOI_IRQS];
+    qemu_irq parent_irq[EXTIOI_CPUS][LS3A_INTC_IP];
+    qemu_irq irq[EXTIOI_IRQS];
+    MemoryRegion extioi_iocsr_mem[EXTIOI_CPUS];
+    MemoryRegion extioi_system_mem;
+};
+#endif /* LOONGARCH_EXTIOI_H */
diff --git a/include/hw/intc/loongarch_ipi.h b/include/hw/intc/loongarch_ipi.h
new file mode 100644 (file)
index 0000000..6c61947
--- /dev/null
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch ipi interrupt header files
+ *
+ * Copyright (C) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef HW_LOONGARCH_IPI_H
+#define HW_LOONGARCH_IPI_H
+
+#include "hw/sysbus.h"
+
+/* Mainy used by iocsr read and write */
+#define SMP_IPI_MAILBOX      0x1000ULL
+#define CORE_STATUS_OFF       0x0
+#define CORE_EN_OFF           0x4
+#define CORE_SET_OFF          0x8
+#define CORE_CLEAR_OFF        0xc
+#define CORE_BUF_20           0x20
+#define CORE_BUF_28           0x28
+#define CORE_BUF_30           0x30
+#define CORE_BUF_38           0x38
+#define IOCSR_IPI_SEND        0x40
+#define IOCSR_MAIL_SEND       0x48
+#define IOCSR_ANY_SEND        0x158
+
+#define MAIL_SEND_ADDR        (SMP_IPI_MAILBOX + IOCSR_MAIL_SEND)
+#define MAIL_SEND_OFFSET      0
+#define ANY_SEND_OFFSET       (IOCSR_ANY_SEND - IOCSR_MAIL_SEND)
+
+#define IPI_MBX_NUM           4
+
+#define TYPE_LOONGARCH_IPI "loongarch_ipi"
+OBJECT_DECLARE_SIMPLE_TYPE(LoongArchIPI, LOONGARCH_IPI)
+
+typedef struct IPICore {
+    uint32_t status;
+    uint32_t en;
+    uint32_t set;
+    uint32_t clear;
+    /* 64bit buf divide into 2 32bit buf */
+    uint32_t buf[IPI_MBX_NUM * 2];
+    qemu_irq irq;
+} IPICore;
+
+struct LoongArchIPI {
+    SysBusDevice parent_obj;
+    MemoryRegion ipi_iocsr_mem;
+    MemoryRegion ipi64_iocsr_mem;
+    IPICore ipi_core;
+};
+
+#endif
diff --git a/include/hw/intc/loongarch_pch_msi.h b/include/hw/intc/loongarch_pch_msi.h
new file mode 100644 (file)
index 0000000..b8586fb
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch 7A1000 I/O interrupt controller definitions
+ *
+ * Copyright (C) 2021 Loongson Technology Corporation Limited
+ */
+
+#include "hw/sysbus.h"
+
+#define TYPE_LOONGARCH_PCH_MSI "loongarch_pch_msi"
+OBJECT_DECLARE_SIMPLE_TYPE(LoongArchPCHMSI, LOONGARCH_PCH_MSI)
+
+/* MSI irq start from 32 to 255 */
+#define PCH_MSI_IRQ_START   32
+#define PCH_MSI_IRQ_END     255
+#define PCH_MSI_IRQ_NUM     224
+
+struct LoongArchPCHMSI {
+    SysBusDevice parent_obj;
+    qemu_irq *pch_msi_irq;
+    MemoryRegion msi_mmio;
+    /* irq base passed to upper extioi intc */
+    unsigned int irq_base;
+    unsigned int irq_num;
+};
diff --git a/include/hw/intc/loongarch_pch_pic.h b/include/hw/intc/loongarch_pch_pic.h
new file mode 100644 (file)
index 0000000..d5437e8
--- /dev/null
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch 7A1000 I/O interrupt controller definitions
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#include "hw/sysbus.h"
+
+#define TYPE_LOONGARCH_PCH_PIC "loongarch_pch_pic"
+#define PCH_PIC_NAME(name) TYPE_LOONGARCH_PCH_PIC#name
+OBJECT_DECLARE_SIMPLE_TYPE(LoongArchPCHPIC, LOONGARCH_PCH_PIC)
+
+#define PCH_PIC_INT_ID_VAL              0x7000000UL
+#define PCH_PIC_INT_ID_VER              0x1UL
+
+#define PCH_PIC_INT_ID_LO               0x00
+#define PCH_PIC_INT_ID_HI               0x04
+#define PCH_PIC_INT_MASK_LO             0x20
+#define PCH_PIC_INT_MASK_HI             0x24
+#define PCH_PIC_HTMSI_EN_LO             0x40
+#define PCH_PIC_HTMSI_EN_HI             0x44
+#define PCH_PIC_INT_EDGE_LO             0x60
+#define PCH_PIC_INT_EDGE_HI             0x64
+#define PCH_PIC_INT_CLEAR_LO            0x80
+#define PCH_PIC_INT_CLEAR_HI            0x84
+#define PCH_PIC_AUTO_CTRL0_LO           0xc0
+#define PCH_PIC_AUTO_CTRL0_HI           0xc4
+#define PCH_PIC_AUTO_CTRL1_LO           0xe0
+#define PCH_PIC_AUTO_CTRL1_HI           0xe4
+#define PCH_PIC_ROUTE_ENTRY_OFFSET      0x100
+#define PCH_PIC_ROUTE_ENTRY_END         0x13f
+#define PCH_PIC_HTMSI_VEC_OFFSET        0x200
+#define PCH_PIC_HTMSI_VEC_END           0x23f
+#define PCH_PIC_INT_STATUS_LO           0x3a0
+#define PCH_PIC_INT_STATUS_HI           0x3a4
+#define PCH_PIC_INT_POL_LO              0x3e0
+#define PCH_PIC_INT_POL_HI              0x3e4
+
+#define STATUS_LO_START                 0
+#define STATUS_HI_START                 0x4
+#define POL_LO_START                    0x40
+#define POL_HI_START                    0x44
+struct LoongArchPCHPIC {
+    SysBusDevice parent_obj;
+    qemu_irq parent_irq[64];
+    uint64_t int_mask; /*0x020 interrupt mask register*/
+    uint64_t htmsi_en; /*0x040 1=msi*/
+    uint64_t intedge; /*0x060 edge=1 level  =0*/
+    uint64_t intclr; /*0x080 for clean edge int,set 1 clean,set 0 is noused*/
+    uint64_t auto_crtl0; /*0x0c0*/
+    uint64_t auto_crtl1; /*0x0e0*/
+    uint64_t last_intirr;    /* edge detection */
+    uint64_t intirr; /* 0x380 interrupt request register */
+    uint64_t intisr; /* 0x3a0 interrupt service register */
+    /*
+     * 0x3e0 interrupt level polarity selection
+     * register 0 for high level trigger
+     */
+    uint64_t int_polarity;
+
+    uint8_t route_entry[64]; /*0x100 - 0x138*/
+    uint8_t htmsi_vector[64]; /*0x200 - 0x238*/
+
+    MemoryRegion iomem32_low;
+    MemoryRegion iomem32_high;
+    MemoryRegion iomem8;
+    unsigned int irq_num;
+};
diff --git a/include/hw/intc/loongson_liointc.h b/include/hw/intc/loongson_liointc.h
new file mode 100644 (file)
index 0000000..848e65e
--- /dev/null
@@ -0,0 +1,22 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2020 Huacai Chen <chenhc@lemote.com>
+ * Copyright (c) 2020 Jiaxun Yang <jiaxun.yang@flygoat.com>
+ *
+ */
+
+#ifndef LOONGSON_LIOINTC_H
+#define LOONGSON_LIOINTC_H
+
+#include "qemu/units.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_LOONGSON_LIOINTC "loongson.liointc"
+DECLARE_INSTANCE_CHECKER(struct loongson_liointc, LOONGSON_LIOINTC,
+                         TYPE_LOONGSON_LIOINTC)
+
+#endif /* LOONGSON_LIOINTC_H */
diff --git a/include/hw/intc/m68k_irqc.h b/include/hw/intc/m68k_irqc.h
new file mode 100644 (file)
index 0000000..693e33b
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * QEMU Motorola 680x0 IRQ Controller
+ *
+ * (c) 2020 Laurent Vivier <laurent@vivier.eu>
+ *
+ */
+
+#ifndef M68K_IRQC_H
+#define M68K_IRQC_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_M68K_IRQC "m68k-irq-controller"
+#define M68K_IRQC(obj) OBJECT_CHECK(M68KIRQCState, (obj), \
+                                    TYPE_M68K_IRQC)
+
+#define M68K_IRQC_AUTOVECTOR_BASE 25
+
+enum {
+    M68K_IRQC_LEVEL_1 = 0,
+    M68K_IRQC_LEVEL_2,
+    M68K_IRQC_LEVEL_3,
+    M68K_IRQC_LEVEL_4,
+    M68K_IRQC_LEVEL_5,
+    M68K_IRQC_LEVEL_6,
+    M68K_IRQC_LEVEL_7,
+};
+#define M68K_IRQC_LEVEL_NUM (M68K_IRQC_LEVEL_7 - M68K_IRQC_LEVEL_1 + 1)
+
+typedef struct M68KIRQCState {
+    SysBusDevice parent_obj;
+
+    uint8_t ipr;
+    ArchCPU *cpu;
+
+    /* statistics */
+    uint64_t stats_irq_count[M68K_IRQC_LEVEL_NUM];
+} M68KIRQCState;
+
+#endif
diff --git a/include/hw/intc/mips_gic.h b/include/hw/intc/mips_gic.h
new file mode 100644 (file)
index 0000000..5e4c71e
--- /dev/null
@@ -0,0 +1,218 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000, 07 MIPS Technologies, Inc.
+ * Copyright (C) 2016 Imagination Technologies
+ *
+ */
+
+#ifndef MIPS_GIC_H
+#define MIPS_GIC_H
+
+#include "qemu/units.h"
+#include "hw/timer/mips_gictimer.h"
+#include "hw/sysbus.h"
+#include "cpu.h"
+#include "qom/object.h"
+/*
+ * GIC Specific definitions
+ */
+
+/* The MIPS default location */
+#define GIC_BASE_ADDR           0x1bdc0000ULL
+#define GIC_ADDRSPACE_SZ        (128 * KiB)
+
+/* Constants */
+#define GIC_POL_POS     1
+#define GIC_POL_NEG     0
+#define GIC_TRIG_EDGE   1
+#define GIC_TRIG_LEVEL  0
+
+#define MSK(n)              ((1ULL << (n)) - 1)
+
+/* GIC Address Space */
+#define SHARED_SECTION_OFS          0x0000
+#define SHARED_SECTION_SIZE         0x8000
+#define VP_LOCAL_SECTION_OFS        0x8000
+#define VP_LOCAL_SECTION_SIZE       0x4000
+#define VP_OTHER_SECTION_OFS        0xc000
+#define VP_OTHER_SECTION_SIZE       0x4000
+#define USM_VISIBLE_SECTION_OFS     0x10000
+#define USM_VISIBLE_SECTION_SIZE    0x10000
+
+/* Register Map for Shared Section */
+
+#define GIC_SH_CONFIG_OFS           0x0000
+
+/* Shared Global Counter */
+#define GIC_SH_COUNTERLO_OFS        0x0010
+#define GIC_SH_COUNTERHI_OFS        0x0014
+#define GIC_SH_REVISIONID_OFS       0x0020
+
+/* Set/Clear corresponding bit in Edge Detect Register */
+#define GIC_SH_WEDGE_OFS            0x0280
+
+/* Reset Mask - Disables Interrupt */
+#define GIC_SH_RMASK_OFS            0x0300
+#define GIC_SH_RMASK_LAST_OFS       0x031c
+
+/* Set Mask (WO) - Enables Interrupt */
+#define GIC_SH_SMASK_OFS            0x0380
+#define GIC_SH_SMASK_LAST_OFS       0x039c
+
+/* Global Interrupt Mask Register (RO) - Bit Set == Interrupt enabled */
+#define GIC_SH_MASK_OFS             0x0400
+#define GIC_SH_MASK_LAST_OFS        0x041c
+
+/* Pending Global Interrupts (RO) */
+#define GIC_SH_PEND_OFS             0x0480
+#define GIC_SH_PEND_LAST_OFS        0x049c
+
+#define GIC_SH_MAP0_PIN_OFS         0x0500
+#define GIC_SH_MAP255_PIN_OFS       0x08fc
+
+#define GIC_SH_MAP0_VP_OFS          0x2000
+#define GIC_SH_MAP255_VP_LAST_OFS   0x3fe4
+
+/* Register Map for Local Section */
+#define GIC_VP_CTL_OFS              0x0000
+#define GIC_VP_PEND_OFS             0x0004
+#define GIC_VP_MASK_OFS             0x0008
+#define GIC_VP_RMASK_OFS            0x000c
+#define GIC_VP_SMASK_OFS            0x0010
+#define GIC_VP_WD_MAP_OFS           0x0040
+#define GIC_VP_COMPARE_MAP_OFS      0x0044
+#define GIC_VP_TIMER_MAP_OFS        0x0048
+#define GIC_VP_FDC_MAP_OFS          0x004c
+#define GIC_VP_PERFCTR_MAP_OFS      0x0050
+#define GIC_VP_SWINT0_MAP_OFS       0x0054
+#define GIC_VP_SWINT1_MAP_OFS       0x0058
+#define GIC_VP_OTHER_ADDR_OFS       0x0080
+#define GIC_VP_IDENT_OFS            0x0088
+#define GIC_VP_WD_CONFIG0_OFS       0x0090
+#define GIC_VP_WD_COUNT0_OFS        0x0094
+#define GIC_VP_WD_INITIAL0_OFS      0x0098
+#define GIC_VP_COMPARE_LO_OFS       0x00a0
+#define GIC_VP_COMPARE_HI_OFS       0x00a4
+#define GIC_VL_BRK_GROUP            0x3080
+
+/* User-Mode Visible Section Register */
+/* Read-only alias for GIC Shared CounterLo */
+#define GIC_USER_MODE_COUNTERLO     0x0000
+/* Read-only alias for GIC Shared CounterHi */
+#define GIC_USER_MODE_COUNTERHI     0x0004
+
+/* Masks */
+#define GIC_SH_CONFIG_COUNTSTOP_SHF     28
+#define GIC_SH_CONFIG_COUNTSTOP_MSK     (MSK(1) << GIC_SH_CONFIG_COUNTSTOP_SHF)
+#define GIC_SH_CONFIG_COUNTBITS_SHF     24
+#define GIC_SH_CONFIG_COUNTBITS_MSK     (MSK(4) << GIC_SH_CONFIG_COUNTBITS_SHF)
+#define GIC_SH_CONFIG_NUMINTRS_SHF      16
+#define GIC_SH_CONFIG_NUMINTRS_MSK      (MSK(8) << GIC_SH_CONFIG_NUMINTRS_SHF)
+#define GIC_SH_CONFIG_PVPS_SHF          0
+#define GIC_SH_CONFIG_PVPS_MSK          (MSK(8) << GIC_SH_CONFIG_NUMVPS_SHF)
+
+#define GIC_SH_WEDGE_RW_SHF             31
+#define GIC_SH_WEDGE_RW_MSK             (MSK(1) << GIC_SH_WEDGE_RW_SHF)
+
+#define GIC_MAP_TO_PIN_SHF              31
+#define GIC_MAP_TO_PIN_MSK              (MSK(1) << GIC_MAP_TO_PIN_SHF)
+#define GIC_MAP_TO_NMI_SHF              30
+#define GIC_MAP_TO_NMI_MSK              (MSK(1) << GIC_MAP_TO_NMI_SHF)
+#define GIC_MAP_TO_YQ_SHF               29
+#define GIC_MAP_TO_YQ_MSK               (MSK(1) << GIC_MAP_TO_YQ_SHF)
+#define GIC_MAP_SHF                     0
+#define GIC_MAP_MSK                     (MSK(6) << GIC_MAP_SHF)
+#define GIC_MAP_TO_PIN_REG_MSK          \
+    (GIC_MAP_TO_PIN_MSK | GIC_MAP_TO_NMI_MSK | GIC_MAP_TO_YQ_MSK | GIC_MAP_MSK)
+
+/* GIC_VP_CTL Masks */
+#define GIC_VP_CTL_FDC_RTBL_SHF         4
+#define GIC_VP_CTL_FDC_RTBL_MSK         (MSK(1) << GIC_VP_CTL_FDC_RTBL_SHF)
+#define GIC_VP_CTL_SWINT_RTBL_SHF       3
+#define GIC_VP_CTL_SWINT_RTBL_MSK       (MSK(1) << GIC_VP_CTL_SWINT_RTBL_SHF)
+#define GIC_VP_CTL_PERFCNT_RTBL_SHF     2
+#define GIC_VP_CTL_PERFCNT_RTBL_MSK     (MSK(1) << GIC_VP_CTL_PERFCNT_RTBL_SHF)
+#define GIC_VP_CTL_TIMER_RTBL_SHF       1
+#define GIC_VP_CTL_TIMER_RTBL_MSK       (MSK(1) << GIC_VP_CTL_TIMER_RTBL_SHF)
+#define GIC_VP_CTL_EIC_MODE_SHF         0
+#define GIC_VP_CTL_EIC_MODE_MSK         (MSK(1) << GIC_VP_CTL_EIC_MODE_SHF)
+
+/* GIC_VP_MASK Masks */
+#define GIC_VP_MASK_FDC_SHF         6
+#define GIC_VP_MASK_FDC_MSK         (MSK(1) << GIC_VP_MASK_FDC_SHF)
+#define GIC_VP_MASK_SWINT1_SHF      5
+#define GIC_VP_MASK_SWINT1_MSK      (MSK(1) << GIC_VP_MASK_SWINT1_SHF)
+#define GIC_VP_MASK_SWINT0_SHF      4
+#define GIC_VP_MASK_SWINT0_MSK      (MSK(1) << GIC_VP_MASK_SWINT0_SHF)
+#define GIC_VP_MASK_PERFCNT_SHF     3
+#define GIC_VP_MASK_PERFCNT_MSK     (MSK(1) << GIC_VP_MASK_PERFCNT_SHF)
+#define GIC_VP_MASK_TIMER_SHF       2
+#define GIC_VP_MASK_TIMER_MSK       (MSK(1) << GIC_VP_MASK_TIMER_SHF)
+#define GIC_VP_MASK_CMP_SHF         1
+#define GIC_VP_MASK_CMP_MSK         (MSK(1) << GIC_VP_MASK_CMP_SHF)
+#define GIC_VP_MASK_WD_SHF          0
+#define GIC_VP_MASK_WD_MSK          (MSK(1) << GIC_VP_MASK_WD_SHF)
+#define GIC_VP_SET_RESET_MSK        (MSK(7) << GIC_VP_MASK_WD_SHF)
+
+#define GIC_CPU_INT_MAX             5 /* Core Interrupt 7 */
+#define GIC_CPU_PIN_OFFSET          2
+
+/* Local GIC interrupts. */
+#define GIC_NUM_LOCAL_INTRS     7
+#define GIC_LOCAL_INT_FDC       6 /* CPU fast debug channel */
+#define GIC_LOCAL_INT_SWINT1    5 /* CPU software interrupt 1 */
+#define GIC_LOCAL_INT_SWINT0    4 /* CPU software interrupt 0 */
+#define GIC_LOCAL_INT_PERFCTR   3 /* CPU performance counter */
+#define GIC_LOCAL_INT_TIMER     2 /* CPU timer interrupt */
+#define GIC_LOCAL_INT_COMPARE   1 /* GIC count and compare timer */
+#define GIC_LOCAL_INT_WD        0 /* GIC watchdog */
+
+#define TYPE_MIPS_GIC "mips-gic"
+OBJECT_DECLARE_SIMPLE_TYPE(MIPSGICState, MIPS_GIC)
+
+/* Support up to 32 VPs and 256 IRQs */
+#define GIC_MAX_VPS             32
+#define GIC_MAX_INTRS           256
+
+typedef struct MIPSGICIRQState MIPSGICIRQState;
+typedef struct MIPSGICVPState MIPSGICVPState;
+
+struct MIPSGICIRQState {
+    uint8_t enabled;
+    uint8_t pending;
+    uint32_t map_pin;
+    int32_t map_vp;
+    qemu_irq irq;
+};
+
+struct MIPSGICVPState {
+    uint32_t ctl;
+    uint32_t pend;
+    uint32_t mask;
+    uint32_t compare_map;
+    uint32_t other_addr;
+    CPUMIPSState *env;
+};
+
+struct MIPSGICState {
+    SysBusDevice parent_obj;
+    MemoryRegion mr;
+
+    /* Shared Section Registers */
+    uint32_t sh_config;
+    MIPSGICIRQState *irq_state;
+
+    /* VP Local/Other Section Registers */
+    MIPSGICVPState *vps;
+
+    /* GIC VP Timer */
+    MIPSGICTimerState *gic_timer;
+
+    uint32_t num_vps;
+    uint32_t num_irq;
+};
+
+#endif /* MIPS_GIC_H */
diff --git a/include/hw/intc/nios2_vic.h b/include/hw/intc/nios2_vic.h
new file mode 100644 (file)
index 0000000..5c975a2
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Vectored Interrupt Controller for nios2 processor
+ *
+ * Copyright (c) 2022 Neuroblade
+ *
+ * Interface:
+ * QOM property "cpu": link to the Nios2 CPU (must be set)
+ * Unnamed GPIO inputs 0..NIOS2_VIC_MAX_IRQ-1: input IRQ lines
+ * IRQ should be connected to nios2 IRQ0.
+ *
+ * Reference: "Embedded Peripherals IP User Guide
+ *             for Intel® Quartus® Prime Design Suite: 21.4"
+ * Chapter 38 "Vectored Interrupt Controller Core"
+ * See: https://www.intel.com/content/www/us/en/docs/programmable/683130/21-4/vectored-interrupt-controller-core.html
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_INTC_NIOS2_VIC_H
+#define HW_INTC_NIOS2_VIC_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_NIOS2_VIC "nios2-vic"
+OBJECT_DECLARE_SIMPLE_TYPE(Nios2VIC, NIOS2_VIC)
+
+#define NIOS2_VIC_MAX_IRQ 32
+
+struct Nios2VIC {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    qemu_irq output_int;
+
+    /* properties */
+    CPUState *cpu;
+    MemoryRegion csr;
+
+    uint32_t int_config[NIOS2_VIC_MAX_IRQ];
+    uint32_t vic_config;
+    uint32_t int_raw_status;
+    uint32_t int_enable;
+    uint32_t sw_int;
+    uint32_t vic_status;
+    uint32_t vec_tbl_base;
+    uint32_t vec_tbl_addr;
+};
+
+#endif /* HW_INTC_NIOS2_VIC_H */
diff --git a/include/hw/intc/ppc-uic.h b/include/hw/intc/ppc-uic.h
new file mode 100644 (file)
index 0000000..4d82e9a
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * "Universal" Interrupt Controller for PowerPPC 4xx embedded processors
+ *
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_INTC_PPC_UIC_H
+#define HW_INTC_PPC_UIC_H
+
+#include "hw/ppc/ppc4xx.h"
+
+#define TYPE_PPC_UIC "ppc-uic"
+OBJECT_DECLARE_SIMPLE_TYPE(PPCUIC, PPC_UIC)
+
+/*
+ * QEMU interface:
+ * QOM property "cpu": link to the PPC CPU
+ *    (no default, must be set)
+ * QOM property "dcr-base": base of the bank of DCR registers for the UIC
+ *    (default 0x30)
+ * QOM property "use-vectors": true if the UIC has vector registers
+ *    (default true)
+ * unnamed GPIO inputs 0..UIC_MAX_IRQ: input IRQ lines
+ * sysbus IRQs:
+ *  0 (PPCUIC_OUTPUT_INT): output INT line to the CPU
+ *  1 (PPCUIC_OUTPUT_CINT): output CINT line to the CPU
+ */
+
+#define UIC_MAX_IRQ 32
+
+/* Symbolic constants for the sysbus IRQ outputs */
+enum {
+    PPCUIC_OUTPUT_INT = 0,
+    PPCUIC_OUTPUT_CINT = 1,
+    PPCUIC_OUTPUT_NB,
+};
+
+struct PPCUIC {
+    /*< private >*/
+    Ppc4xxDcrDeviceState parent_obj;
+
+    /*< public >*/
+    qemu_irq output_int;
+    qemu_irq output_cint;
+
+    /* properties */
+    uint32_t dcr_base;
+    bool use_vectors;
+
+    uint32_t level;  /* Remembers the state of level-triggered interrupts. */
+    uint32_t uicsr;  /* Status register */
+    uint32_t uicer;  /* Enable register */
+    uint32_t uiccr;  /* Critical register */
+    uint32_t uicpr;  /* Polarity register */
+    uint32_t uictr;  /* Triggering register */
+    uint32_t uicvcr; /* Vector configuration register */
+    uint32_t uicvr;
+};
+
+#endif
diff --git a/include/hw/intc/realview_gic.h b/include/hw/intc/realview_gic.h
new file mode 100644 (file)
index 0000000..f37339d
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * ARM RealView Emulation Baseboard Interrupt Controller
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Written by Paul Brook
+ *
+ * This code is licensed under the GPL.
+ */
+
+#ifndef HW_INTC_REALVIEW_GIC_H
+#define HW_INTC_REALVIEW_GIC_H
+
+#include "hw/sysbus.h"
+#include "hw/intc/arm_gic.h"
+#include "qom/object.h"
+
+#define TYPE_REALVIEW_GIC "realview_gic"
+OBJECT_DECLARE_SIMPLE_TYPE(RealViewGICState, REALVIEW_GIC)
+
+struct RealViewGICState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion container;
+
+    GICState gic;
+};
+
+#endif
diff --git a/include/hw/intc/riscv_aclint.h b/include/hw/intc/riscv_aclint.h
new file mode 100644 (file)
index 0000000..693415e
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * RISC-V ACLINT (Advanced Core Local Interruptor) interface
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2017 SiFive, Inc.
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_ACLINT_H
+#define HW_RISCV_ACLINT_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_RISCV_ACLINT_MTIMER "riscv.aclint.mtimer"
+
+#define RISCV_ACLINT_MTIMER(obj) \
+    OBJECT_CHECK(RISCVAclintMTimerState, (obj), TYPE_RISCV_ACLINT_MTIMER)
+
+typedef struct RISCVAclintMTimerState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    uint64_t time_delta;
+    uint64_t *timecmp;
+    QEMUTimer **timers;
+
+    /*< public >*/
+    MemoryRegion mmio;
+    uint32_t hartid_base;
+    uint32_t num_harts;
+    uint32_t timecmp_base;
+    uint32_t time_base;
+    uint32_t aperture_size;
+    uint32_t timebase_freq;
+    qemu_irq *timer_irqs;
+} RISCVAclintMTimerState;
+
+DeviceState *riscv_aclint_mtimer_create(hwaddr addr, hwaddr size,
+    uint32_t hartid_base, uint32_t num_harts,
+    uint32_t timecmp_base, uint32_t time_base, uint32_t timebase_freq,
+    bool provide_rdtime);
+
+#define TYPE_RISCV_ACLINT_SWI "riscv.aclint.swi"
+
+#define RISCV_ACLINT_SWI(obj) \
+    OBJECT_CHECK(RISCVAclintSwiState, (obj), TYPE_RISCV_ACLINT_SWI)
+
+typedef struct RISCVAclintSwiState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion mmio;
+    uint32_t hartid_base;
+    uint32_t num_harts;
+    uint32_t sswi;
+    qemu_irq *soft_irqs;
+} RISCVAclintSwiState;
+
+DeviceState *riscv_aclint_swi_create(hwaddr addr, uint32_t hartid_base,
+    uint32_t num_harts, bool sswi);
+
+enum {
+    RISCV_ACLINT_DEFAULT_MTIMECMP      = 0x0,
+    RISCV_ACLINT_DEFAULT_MTIME         = 0x7ff8,
+    RISCV_ACLINT_DEFAULT_MTIMER_SIZE   = 0x8000,
+    RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ = 10000000,
+    RISCV_ACLINT_MAX_HARTS             = 4095,
+    RISCV_ACLINT_SWI_SIZE              = 0x4000
+};
+
+#endif
diff --git a/include/hw/intc/riscv_aplic.h b/include/hw/intc/riscv_aplic.h
new file mode 100644 (file)
index 0000000..de8532f
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * RISC-V APLIC (Advanced Platform Level Interrupt Controller) interface
+ *
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_APLIC_H
+#define HW_RISCV_APLIC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_RISCV_APLIC "riscv.aplic"
+
+typedef struct RISCVAPLICState RISCVAPLICState;
+DECLARE_INSTANCE_CHECKER(RISCVAPLICState, RISCV_APLIC, TYPE_RISCV_APLIC)
+
+#define APLIC_MIN_SIZE            0x4000
+#define APLIC_SIZE_ALIGN(__x)     (((__x) + (APLIC_MIN_SIZE - 1)) & \
+                                   ~(APLIC_MIN_SIZE - 1))
+#define APLIC_SIZE(__num_harts)   (APLIC_MIN_SIZE + \
+                                   APLIC_SIZE_ALIGN(32 * (__num_harts)))
+
+struct RISCVAPLICState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    qemu_irq *external_irqs;
+
+    /*< public >*/
+    MemoryRegion mmio;
+    uint32_t bitfield_words;
+    uint32_t domaincfg;
+    uint32_t mmsicfgaddr;
+    uint32_t mmsicfgaddrH;
+    uint32_t smsicfgaddr;
+    uint32_t smsicfgaddrH;
+    uint32_t genmsi;
+    uint32_t *sourcecfg;
+    uint32_t *state;
+    uint32_t *target;
+    uint32_t *idelivery;
+    uint32_t *iforce;
+    uint32_t *ithreshold;
+
+    /* topology */
+#define QEMU_APLIC_MAX_CHILDREN        16
+    struct RISCVAPLICState *parent;
+    struct RISCVAPLICState *children[QEMU_APLIC_MAX_CHILDREN];
+    uint16_t num_children;
+
+    /* config */
+    uint32_t aperture_size;
+    uint32_t hartid_base;
+    uint32_t num_harts;
+    uint32_t iprio_mask;
+    uint32_t num_irqs;
+    bool msimode;
+    bool mmode;
+};
+
+void riscv_aplic_add_child(DeviceState *parent, DeviceState *child);
+
+DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size,
+    uint32_t hartid_base, uint32_t num_harts, uint32_t num_sources,
+    uint32_t iprio_bits, bool msimode, bool mmode, DeviceState *parent);
+
+#endif
diff --git a/include/hw/intc/riscv_imsic.h b/include/hw/intc/riscv_imsic.h
new file mode 100644 (file)
index 0000000..58c2aaa
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * RISC-V IMSIC (Incoming Message Signal Interrupt Controller) interface
+ *
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_IMSIC_H
+#define HW_RISCV_IMSIC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_RISCV_IMSIC "riscv.imsic"
+
+typedef struct RISCVIMSICState RISCVIMSICState;
+DECLARE_INSTANCE_CHECKER(RISCVIMSICState, RISCV_IMSIC, TYPE_RISCV_IMSIC)
+
+#define IMSIC_MMIO_PAGE_SHIFT          12
+#define IMSIC_MMIO_PAGE_SZ             (1UL << IMSIC_MMIO_PAGE_SHIFT)
+#define IMSIC_MMIO_SIZE(__num_pages)   ((__num_pages) * IMSIC_MMIO_PAGE_SZ)
+
+#define IMSIC_MMIO_HART_GUEST_MAX_BTIS 6
+#define IMSIC_MMIO_GROUP_MIN_SHIFT     24
+
+#define IMSIC_HART_NUM_GUESTS(__guest_bits)           \
+    (1U << (__guest_bits))
+#define IMSIC_HART_SIZE(__guest_bits)                 \
+    (IMSIC_HART_NUM_GUESTS(__guest_bits) * IMSIC_MMIO_PAGE_SZ)
+#define IMSIC_GROUP_NUM_HARTS(__hart_bits)            \
+    (1U << (__hart_bits))
+#define IMSIC_GROUP_SIZE(__hart_bits, __guest_bits)   \
+    (IMSIC_GROUP_NUM_HARTS(__hart_bits) * IMSIC_HART_SIZE(__guest_bits))
+
+struct RISCVIMSICState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    qemu_irq *external_irqs;
+
+    /*< public >*/
+    MemoryRegion mmio;
+    uint32_t num_eistate;
+    uint32_t *eidelivery;
+    uint32_t *eithreshold;
+    uint32_t *eistate;
+
+    /* config */
+    bool mmode;
+    uint32_t hartid;
+    uint32_t num_pages;
+    uint32_t num_irqs;
+};
+
+DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode,
+                                uint32_t num_pages, uint32_t num_ids);
+
+#endif
diff --git a/include/hw/intc/rx_icu.h b/include/hw/intc/rx_icu.h
new file mode 100644 (file)
index 0000000..b23504f
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * RX Interrupt Control Unit
+ *
+ * Copyright (c) 2019 Yoshinori Sato
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_INTC_RX_ICU_H
+#define HW_INTC_RX_ICU_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+enum TRG_MODE {
+    TRG_LEVEL = 0,
+    TRG_NEDGE = 1,      /* Falling */
+    TRG_PEDGE = 2,      /* Raising */
+    TRG_BEDGE = 3,      /* Both */
+};
+
+struct IRQSource {
+    enum TRG_MODE sense;
+    int level;
+};
+
+enum {
+    /* Software interrupt request */
+    SWI = 27,
+    NR_IRQS = 256
+};
+
+struct RXICUState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion memory;
+    struct IRQSource src[NR_IRQS];
+    uint32_t nr_irqs;
+    uint8_t *map;
+    uint32_t nr_sense;
+    uint8_t *init_sense;
+
+    uint8_t ir[NR_IRQS];
+    uint8_t dtcer[NR_IRQS];
+    uint8_t ier[NR_IRQS / 8];
+    uint8_t ipr[142];
+    uint8_t dmasr[4];
+    uint16_t fir;
+    uint8_t nmisr;
+    uint8_t nmier;
+    uint8_t nmiclr;
+    uint8_t nmicr;
+    int16_t req_irq;
+    qemu_irq _irq;
+    qemu_irq _fir;
+    qemu_irq _swi;
+};
+
+#define TYPE_RX_ICU "rx-icu"
+OBJECT_DECLARE_SIMPLE_TYPE(RXICUState, RX_ICU)
+
+#endif /* HW_INTC_RX_ICU_H */
diff --git a/include/hw/intc/sifive_plic.h b/include/hw/intc/sifive_plic.h
new file mode 100644 (file)
index 0000000..d3f45ec
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * SiFive PLIC (Platform Level Interrupt Controller) interface
+ *
+ * Copyright (c) 2017 SiFive, Inc.
+ *
+ * This provides a RISC-V PLIC device
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_PLIC_H
+#define HW_SIFIVE_PLIC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_SIFIVE_PLIC "riscv.sifive.plic"
+
+typedef struct SiFivePLICState SiFivePLICState;
+DECLARE_INSTANCE_CHECKER(SiFivePLICState, SIFIVE_PLIC,
+                         TYPE_SIFIVE_PLIC)
+
+typedef enum PLICMode {
+    PLICMode_U,
+    PLICMode_S,
+    PLICMode_M
+} PLICMode;
+
+typedef struct PLICAddr {
+    uint32_t addrid;
+    uint32_t hartid;
+    PLICMode mode;
+} PLICAddr;
+
+struct SiFivePLICState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion mmio;
+    uint32_t num_addrs;
+    uint32_t num_harts;
+    uint32_t bitfield_words;
+    uint32_t num_enables;
+    PLICAddr *addr_config;
+    uint32_t *source_priority;
+    uint32_t *target_priority;
+    uint32_t *pending;
+    uint32_t *claimed;
+    uint32_t *enable;
+
+    /* config */
+    char *hart_config;
+    uint32_t hartid_base;
+    uint32_t num_sources;
+    uint32_t num_priorities;
+    uint32_t priority_base;
+    uint32_t pending_base;
+    uint32_t enable_base;
+    uint32_t enable_stride;
+    uint32_t context_base;
+    uint32_t context_stride;
+    uint32_t aperture_size;
+
+    qemu_irq *m_external_irqs;
+    qemu_irq *s_external_irqs;
+};
+
+DeviceState *sifive_plic_create(hwaddr addr, char *hart_config,
+    uint32_t num_harts,
+    uint32_t hartid_base, uint32_t num_sources,
+    uint32_t num_priorities, uint32_t priority_base,
+    uint32_t pending_base, uint32_t enable_base,
+    uint32_t enable_stride, uint32_t context_base,
+    uint32_t context_stride, uint32_t aperture_size);
+
+#endif
diff --git a/include/hw/intc/xlnx-pmu-iomod-intc.h b/include/hw/intc/xlnx-pmu-iomod-intc.h
new file mode 100644 (file)
index 0000000..ccc8bd2
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * QEMU model of Xilinx I/O Module Interrupt Controller
+ *
+ * Copyright (c) 2014 Xilinx Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_INTC_XLNX_PMU_IOMOD_INTC_H
+#define HW_INTC_XLNX_PMU_IOMOD_INTC_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "qom/object.h"
+
+#define TYPE_XLNX_PMU_IO_INTC "xlnx.pmu_io_intc"
+
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxPMUIOIntc, XLNX_PMU_IO_INTC)
+
+/* This is R_PIT3_CONTROL + 1 */
+#define XLNXPMUIOINTC_R_MAX (0x78 + 1)
+
+struct XlnxPMUIOIntc {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+
+    qemu_irq parent_irq;
+
+    struct {
+        uint32_t intr_size;
+        uint32_t level_edge;
+        uint32_t positive;
+    } cfg;
+
+    uint32_t irq_raw;
+
+    uint32_t regs[XLNXPMUIOINTC_R_MAX];
+    RegisterInfo regs_info[XLNXPMUIOINTC_R_MAX];
+};
+
+#endif /* HW_INTC_XLNX_PMU_IOMOD_INTC_H */
diff --git a/include/hw/intc/xlnx-zynqmp-ipi.h b/include/hw/intc/xlnx-zynqmp-ipi.h
new file mode 100644 (file)
index 0000000..33eff1d
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * QEMU model of the IPI Inter Processor Interrupt block
+ *
+ * Copyright (c) 2014 Xilinx Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XLNX_ZYNQMP_IPI_H
+#define XLNX_ZYNQMP_IPI_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "qom/object.h"
+
+#define TYPE_XLNX_ZYNQMP_IPI "xlnx.zynqmp_ipi"
+
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPIPI, XLNX_ZYNQMP_IPI)
+
+/* This is R_IPI_IDR + 1 */
+#define R_XLNX_ZYNQMP_IPI_MAX ((0x1c / 4) + 1)
+
+#define NUM_IPIS 11
+
+struct XlnxZynqMPIPI {
+    /* Private */
+    SysBusDevice parent_obj;
+
+    /* Public */
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    qemu_irq irq_trig_out[NUM_IPIS];
+    qemu_irq irq_obs_out[NUM_IPIS];
+
+    uint32_t regs[R_XLNX_ZYNQMP_IPI_MAX];
+    RegisterInfo regs_info[R_XLNX_ZYNQMP_IPI_MAX];
+};
+
+#endif /* XLNX_ZYNQMP_IPI_H */
diff --git a/include/hw/ipack/ipack.h b/include/hw/ipack/ipack.h
new file mode 100644 (file)
index 0000000..cbcdda5
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * QEMU IndustryPack emulation
+ *
+ * Copyright (C) 2012 Igalia, S.L.
+ * Author: Alberto Garcia <berto@igalia.com>
+ *
+ * This code is licensed under the GNU GPL v2 or (at your option) any
+ * later version.
+ */
+
+#ifndef QEMU_IPACK_H
+#define QEMU_IPACK_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+
+#define TYPE_IPACK_BUS "IndustryPack"
+OBJECT_DECLARE_SIMPLE_TYPE(IPackBus, IPACK_BUS)
+
+struct IPackBus {
+    /*< private >*/
+    BusState parent_obj;
+
+    /* All fields are private */
+    uint8_t n_slots;
+    uint8_t free_slot;
+    qemu_irq_handler set_irq;
+};
+
+
+#define TYPE_IPACK_DEVICE "ipack-device"
+OBJECT_DECLARE_TYPE(IPackDevice, IPackDeviceClass,
+                    IPACK_DEVICE)
+
+struct IPackDeviceClass {
+    /*< private >*/
+    DeviceClass parent_class;
+    /*< public >*/
+
+    DeviceRealize realize;
+    DeviceUnrealize unrealize;
+
+    uint16_t (*io_read)(IPackDevice *dev, uint8_t addr);
+    void (*io_write)(IPackDevice *dev, uint8_t addr, uint16_t val);
+
+    uint16_t (*id_read)(IPackDevice *dev, uint8_t addr);
+    void (*id_write)(IPackDevice *dev, uint8_t addr, uint16_t val);
+
+    uint16_t (*int_read)(IPackDevice *dev, uint8_t addr);
+    void (*int_write)(IPackDevice *dev, uint8_t addr, uint16_t val);
+
+    uint16_t (*mem_read16)(IPackDevice *dev, uint32_t addr);
+    void (*mem_write16)(IPackDevice *dev, uint32_t addr, uint16_t val);
+
+    uint8_t (*mem_read8)(IPackDevice *dev, uint32_t addr);
+    void (*mem_write8)(IPackDevice *dev, uint32_t addr, uint8_t val);
+};
+
+struct IPackDevice {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    int32_t slot;
+    /* IRQ objects for the IndustryPack INT0# and INT1# */
+    qemu_irq *irq;
+};
+
+extern const VMStateDescription vmstate_ipack_device;
+
+#define VMSTATE_IPACK_DEVICE(_field, _state)                            \
+    VMSTATE_STRUCT(_field, _state, 1, vmstate_ipack_device, IPackDevice)
+
+IPackDevice *ipack_device_find(IPackBus *bus, int32_t slot);
+void ipack_bus_init(IPackBus *bus, size_t bus_size,
+                    DeviceState *parent,
+                    uint8_t n_slots,
+                    qemu_irq_handler handler);
+
+#endif
diff --git a/include/hw/ipmi/ipmi.h b/include/hw/ipmi/ipmi.h
new file mode 100644 (file)
index 0000000..77a7213
--- /dev/null
@@ -0,0 +1,305 @@
+/*
+ * IPMI base class
+ *
+ * Copyright (c) 2015 Corey Minyard, MontaVista Software, LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_IPMI_H
+#define HW_IPMI_H
+
+#include "exec/memory.h"
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+#define MAX_IPMI_MSG_SIZE 300
+
+enum ipmi_op {
+    IPMI_RESET_CHASSIS,
+    IPMI_POWEROFF_CHASSIS,
+    IPMI_POWERON_CHASSIS,
+    IPMI_POWERCYCLE_CHASSIS,
+    IPMI_PULSE_DIAG_IRQ,
+    IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP,
+    IPMI_SEND_NMI
+};
+
+#define IPMI_CC_INVALID_CMD                              0xc1
+#define IPMI_CC_COMMAND_INVALID_FOR_LUN                  0xc2
+#define IPMI_CC_TIMEOUT                                  0xc3
+#define IPMI_CC_OUT_OF_SPACE                             0xc4
+#define IPMI_CC_INVALID_RESERVATION                      0xc5
+#define IPMI_CC_REQUEST_DATA_TRUNCATED                   0xc6
+#define IPMI_CC_REQUEST_DATA_LENGTH_INVALID              0xc7
+#define IPMI_CC_PARM_OUT_OF_RANGE                        0xc9
+#define IPMI_CC_CANNOT_RETURN_REQ_NUM_BYTES              0xca
+#define IPMI_CC_REQ_ENTRY_NOT_PRESENT                    0xcb
+#define IPMI_CC_INVALID_DATA_FIELD                       0xcc
+#define IPMI_CC_BMC_INIT_IN_PROGRESS                     0xd2
+#define IPMI_CC_COMMAND_NOT_SUPPORTED                    0xd5
+#define IPMI_CC_UNSPECIFIED                              0xff
+
+#define IPMI_NETFN_APP                0x06
+#define IPMI_NETFN_OEM                0x3a
+
+#define IPMI_DEBUG 1
+
+/* Specified in the SMBIOS spec. */
+#define IPMI_SMBIOS_KCS         0x01
+#define IPMI_SMBIOS_SMIC        0x02
+#define IPMI_SMBIOS_BT          0x03
+#define IPMI_SMBIOS_SSIF        0x04
+
+/*
+ * Used for transferring information to interfaces that add
+ * entries to firmware tables.
+ */
+typedef struct IPMIFwInfo {
+    const char *interface_name;
+    int interface_type;
+    uint8_t ipmi_spec_major_revision;
+    uint8_t ipmi_spec_minor_revision;
+    uint8_t i2c_slave_address;
+    uint32_t uuid;
+
+    uint64_t base_address;
+    uint64_t register_length;
+    uint8_t register_spacing;
+    enum {
+        IPMI_MEMSPACE_IO,
+        IPMI_MEMSPACE_MEM32,
+        IPMI_MEMSPACE_MEM64,
+        IPMI_MEMSPACE_SMBUS
+    } memspace;
+
+    int interrupt_number;
+    enum {
+        IPMI_LEVEL_IRQ,
+        IPMI_EDGE_IRQ
+    } irq_type;
+} IPMIFwInfo;
+
+/*
+ * Called by each instantiated IPMI interface device to get it's uuid.
+ */
+uint32_t ipmi_next_uuid(void);
+
+/* IPMI Interface types (KCS, SMIC, BT) are prefixed with this */
+#define TYPE_IPMI_INTERFACE_PREFIX "ipmi-interface-"
+
+/*
+ * An IPMI Interface, the interface for talking between the target
+ * and the BMC.
+ */
+#define TYPE_IPMI_INTERFACE "ipmi-interface"
+#define IPMI_INTERFACE(obj) \
+     INTERFACE_CHECK(IPMIInterface, (obj), TYPE_IPMI_INTERFACE)
+typedef struct IPMIInterfaceClass IPMIInterfaceClass;
+DECLARE_CLASS_CHECKERS(IPMIInterfaceClass, IPMI_INTERFACE,
+                       TYPE_IPMI_INTERFACE)
+
+typedef struct IPMIInterface IPMIInterface;
+
+struct IPMIInterfaceClass {
+    InterfaceClass parent;
+
+    /*
+     * min_size is the requested I/O size and must be a power of 2.
+     * This is so PCI (or other busses) can request a bigger range.
+     * Use 0 for the default.
+     */
+    void (*init)(struct IPMIInterface *s, unsigned int min_size, Error **errp);
+
+    /*
+     * Perform various operations on the hardware.  If checkonly is
+     * true, it will return if the operation can be performed, but it
+     * will not do the operation.
+     */
+    int (*do_hw_op)(struct IPMIInterface *s, enum ipmi_op op, int checkonly);
+
+    /*
+     * Enable/disable irqs on the interface when the BMC requests this.
+     */
+    void (*set_irq_enable)(struct IPMIInterface *s, int val);
+
+    /*
+     * Handle an event that occurred on the interface, generally the.
+     * target writing to a register.
+     */
+    void (*handle_if_event)(struct IPMIInterface *s);
+
+    /*
+     * The interfaces use this to perform certain ops
+     */
+    void (*set_atn)(struct IPMIInterface *s, int val, int irq);
+
+    /*
+     * Got an IPMI warm/cold reset.
+     */
+    void (*reset)(struct IPMIInterface *s, bool is_cold);
+
+    /*
+     * Handle a response from the bmc.
+     */
+    void (*handle_rsp)(struct IPMIInterface *s, uint8_t msg_id,
+                       unsigned char *rsp, unsigned int rsp_len);
+
+    /*
+     * Set by the owner to hold the backend data for the interface.
+     */
+    void *(*get_backend_data)(struct IPMIInterface *s);
+
+    /*
+     * Return the firmware info for a device.
+     */
+    void (*get_fwinfo)(struct IPMIInterface *s, IPMIFwInfo *info);
+};
+
+/*
+ * Define a BMC simulator (or perhaps a connection to a real BMC)
+ */
+#define TYPE_IPMI_BMC "ipmi-bmc"
+OBJECT_DECLARE_TYPE(IPMIBmc, IPMIBmcClass,
+                    IPMI_BMC)
+
+struct IPMIBmc {
+    DeviceState parent;
+
+    uint8_t slave_addr;
+
+    IPMIInterface *intf;
+};
+
+struct IPMIBmcClass {
+    DeviceClass parent;
+
+    /* Called when the system resets to report to the bmc. */
+    void (*handle_reset)(struct IPMIBmc *s);
+
+    /*
+     * Handle a command to the bmc.
+     */
+    void (*handle_command)(struct IPMIBmc *s,
+                           uint8_t *cmd, unsigned int cmd_len,
+                           unsigned int max_cmd_len,
+                           uint8_t msg_id);
+};
+
+/*
+ * Add a link property to obj that points to a BMC.
+ */
+void ipmi_bmc_find_and_link(Object *obj, Object **bmc);
+
+#ifdef IPMI_DEBUG
+#define ipmi_debug(fs, ...) \
+    fprintf(stderr, "IPMI (%s): " fs, __func__, ##__VA_ARGS__)
+#else
+#define ipmi_debug(fs, ...)
+#endif
+
+struct ipmi_sdr_header {
+    uint8_t  rec_id[2];
+    uint8_t  sdr_version;               /* 0x51 */
+    uint8_t  rec_type;
+    uint8_t  rec_length;
+};
+#define IPMI_SDR_HEADER_SIZE     sizeof(struct ipmi_sdr_header)
+
+#define ipmi_sdr_recid(sdr) ((sdr)->rec_id[0] | ((sdr)->rec_id[1] << 8))
+#define ipmi_sdr_length(sdr) ((sdr)->rec_length + IPMI_SDR_HEADER_SIZE)
+
+/*
+ * 43.2 SDR Type 02h. Compact Sensor Record
+ */
+#define IPMI_SDR_COMPACT_TYPE    2
+
+struct ipmi_sdr_compact {
+    struct ipmi_sdr_header header;
+
+    uint8_t  sensor_owner_id;
+    uint8_t  sensor_owner_lun;
+    uint8_t  sensor_owner_number;       /* byte 8 */
+    uint8_t  entity_id;
+    uint8_t  entity_instance;
+    uint8_t  sensor_init;
+    uint8_t  sensor_caps;
+    uint8_t  sensor_type;
+    uint8_t  reading_type;
+    uint8_t  assert_mask[2];            /* byte 16 */
+    uint8_t  deassert_mask[2];
+    uint8_t  discrete_mask[2];
+    uint8_t  sensor_unit1;
+    uint8_t  sensor_unit2;
+    uint8_t  sensor_unit3;
+    uint8_t  sensor_direction[2];       /* byte 24 */
+    uint8_t  positive_threshold;
+    uint8_t  negative_threshold;
+    uint8_t  reserved[3];
+    uint8_t  oem;
+    uint8_t  id_str_len;                /* byte 32 */
+    uint8_t  id_string[16];
+};
+
+typedef uint8_t ipmi_sdr_compact_buffer[sizeof(struct ipmi_sdr_compact)];
+
+int ipmi_bmc_sdr_find(IPMIBmc *b, uint16_t recid,
+                      const struct ipmi_sdr_compact **sdr, uint16_t *nextrec);
+void ipmi_bmc_gen_event(IPMIBmc *b, uint8_t *evt, bool log);
+
+#define TYPE_IPMI_BMC_SIMULATOR "ipmi-bmc-sim"
+OBJECT_DECLARE_SIMPLE_TYPE(IPMIBmcSim, IPMI_BMC_SIMULATOR)
+
+
+typedef struct RspBuffer {
+    uint8_t buffer[MAX_IPMI_MSG_SIZE];
+    unsigned int len;
+} RspBuffer;
+
+static inline void rsp_buffer_set_error(RspBuffer *rsp, uint8_t byte)
+{
+    rsp->buffer[2] = byte;
+}
+
+/* Add a byte to the response. */
+static inline void rsp_buffer_push(RspBuffer *rsp, uint8_t byte)
+{
+    if (rsp->len >= sizeof(rsp->buffer)) {
+        rsp_buffer_set_error(rsp, IPMI_CC_REQUEST_DATA_TRUNCATED);
+        return;
+    }
+    rsp->buffer[rsp->len++] = byte;
+}
+
+typedef struct IPMICmdHandler {
+    void (*cmd_handler)(IPMIBmcSim *s,
+                        uint8_t *cmd, unsigned int cmd_len,
+                        RspBuffer *rsp);
+    unsigned int cmd_len_min;
+} IPMICmdHandler;
+
+typedef struct IPMINetfn {
+    unsigned int cmd_nums;
+    const IPMICmdHandler *cmd_handlers;
+} IPMINetfn;
+
+int ipmi_sim_register_netfn(IPMIBmcSim *s, unsigned int netfn,
+                            const IPMINetfn *netfnd);
+
+#endif
diff --git a/include/hw/ipmi/ipmi_bt.h b/include/hw/ipmi/ipmi_bt.h
new file mode 100644 (file)
index 0000000..8a4316e
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * QEMU IPMI BT emulation
+ *
+ * Copyright (c) 2015 Corey Minyard, MontaVista Software, LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_IPMI_BT_H
+#define HW_IPMI_BT_H
+
+#include "hw/ipmi/ipmi.h"
+
+typedef struct IPMIBT {
+    IPMIBmc *bmc;
+
+    bool do_wake;
+
+    bool obf_irq_set;
+    bool atn_irq_set;
+    bool irqs_enabled;
+
+    uint8_t outmsg[MAX_IPMI_MSG_SIZE];
+    uint32_t outpos;
+    uint32_t outlen;
+
+    uint8_t inmsg[MAX_IPMI_MSG_SIZE];
+    uint32_t inlen;
+
+    uint8_t control_reg;
+    uint8_t mask_reg;
+
+    /*
+     * This is a response number that we send with the command to make
+     * sure that the response matches the command.
+     */
+    uint8_t waiting_rsp;
+    uint8_t waiting_seq;
+
+    uint32_t io_base;
+    unsigned long io_length;
+    MemoryRegion io;
+    unsigned long size_mask;
+
+    void (*raise_irq)(struct IPMIBT *ib);
+    void (*lower_irq)(struct IPMIBT *ib);
+    void *opaque;
+
+    bool use_irq;
+} IPMIBT;
+
+void ipmi_bt_get_fwinfo(IPMIBT *ik, IPMIFwInfo *info);
+void ipmi_bt_class_init(IPMIInterfaceClass *iic);
+extern const VMStateDescription vmstate_IPMIBT;
+int ipmi_bt_vmstate_post_load(void *opaque, int version);
+
+#endif /* HW_IPMI_BT_H */
diff --git a/include/hw/ipmi/ipmi_kcs.h b/include/hw/ipmi/ipmi_kcs.h
new file mode 100644 (file)
index 0000000..6e6ef4c
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * QEMU IPMI KCS emulation
+ *
+ * Copyright (c) 2015,2017 Corey Minyard, MontaVista Software, LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_IPMI_KCS_H
+#define HW_IPMI_KCS_H
+
+#include "hw/ipmi/ipmi.h"
+
+typedef struct IPMIKCS {
+    IPMIBmc *bmc;
+
+    bool do_wake;
+
+    bool obf_irq_set;
+    bool atn_irq_set;
+    bool irqs_enabled;
+
+    uint8_t outmsg[MAX_IPMI_MSG_SIZE];
+    uint32_t outpos;
+    uint32_t outlen;
+
+    uint8_t inmsg[MAX_IPMI_MSG_SIZE];
+    uint32_t inlen;
+    bool write_end;
+
+    uint8_t status_reg;
+    uint8_t data_out_reg;
+
+    int16_t data_in_reg; /* -1 means not written */
+    int16_t cmd_reg;
+
+    /*
+     * This is a response number that we send with the command to make
+     * sure that the response matches the command.
+     */
+    uint8_t waiting_rsp;
+
+    uint32_t io_base;
+    unsigned long io_length;
+    MemoryRegion io;
+    unsigned long size_mask;
+
+    void (*raise_irq)(struct IPMIKCS *ik);
+    void (*lower_irq)(struct IPMIKCS *ik);
+    void *opaque;
+
+    bool use_irq;
+} IPMIKCS;
+
+void ipmi_kcs_get_fwinfo(IPMIKCS *ik, IPMIFwInfo *info);
+void ipmi_kcs_class_init(IPMIInterfaceClass *iic);
+extern const VMStateDescription vmstate_IPMIKCS;
+int ipmi_kcs_vmstate_post_load(void *opaque, int version);
+
+#endif /* HW_IPMI_KCS_H */
index dc7abf199e360e387dccfba520a3002f5f3ab7b7..645b73d251237b140a847e66a81fb31e41556171 100644 (file)
@@ -46,11 +46,6 @@ void qemu_free_irq(qemu_irq irq);
 /* Returns a new IRQ with opposite polarity.  */
 qemu_irq qemu_irq_invert(qemu_irq irq);
 
-/* Returns a new IRQ which feeds into both the passed IRQs.
- * It's probably better to use the TYPE_SPLIT_IRQ device instead.
- */
-qemu_irq qemu_irq_split(qemu_irq irq1, qemu_irq irq2);
-
 /* For internal use in qtest.  Similar to qemu_irq_split, but operating
    on an existing vector of qemu_irq.  */
 void qemu_irq_intercept_in(qemu_irq *gpio_in, qemu_irq_handler handler, int n);
index a6ae8a583feb65b982e7a8b7221eadeceb17db03..f9dcc4163ef3699c1fb127b19f9fc69bba6ea1f5 100644 (file)
@@ -35,7 +35,7 @@
 OBJECT_DECLARE_TYPE(PICCommonState, PICCommonClass, PIC_COMMON)
 
 struct PICCommonClass {
-    ISADeviceClass parent_class;
+    DeviceClass parent_class;
 
     void (*pre_save)(PICCommonState *s);
     void (*post_load)(PICCommonState *s);
@@ -61,6 +61,7 @@ struct PICCommonState {
     uint8_t single_mode; /* true if slave pic is not initialized */
     uint8_t elcr; /* PIIX edge/trigger selection*/
     uint8_t elcr_mask;
+    uint8_t ltim; /* Edge/Level Bank Select (pre-PIIX, chip-wide) */
     qemu_irq int_out[1];
     uint32_t master; /* reflects /SP input pin */
     uint32_t iobase;
@@ -72,8 +73,5 @@ struct PICCommonState {
 void pic_reset_common(PICCommonState *s);
 ISADevice *i8259_init_chip(const char *name, ISABus *bus, bool master);
 void pic_stat_update_irq(int irq, int level);
-bool pic_get_statistics(InterruptStatsProvider *obj,
-                        uint64_t **irq_counts, unsigned int *nb_irqs);
-void pic_print_info(InterruptStatsProvider *obj, Monitor *mon);
 
 #endif /* QEMU_I8259_INTERNAL_H */
index ddaae89a8537d3f57997ec37a8b7638f286bf0b6..40d6224a4e62bd465731688ba8de185f749b6191 100644 (file)
 #define ISA_NUM_IRQS 16
 
 #define TYPE_ISA_DEVICE "isa-device"
-OBJECT_DECLARE_TYPE(ISADevice, ISADeviceClass, ISA_DEVICE)
+OBJECT_DECLARE_SIMPLE_TYPE(ISADevice, ISA_DEVICE)
 
 #define TYPE_ISA_BUS "ISA"
 OBJECT_DECLARE_SIMPLE_TYPE(ISABus, ISA_BUS)
 
-#define TYPE_APPLE_SMC "isa-applesmc"
-#define APPLESMC_MAX_DATA_LENGTH       32
-#define APPLESMC_PROP_IO_BASE "iobase"
-
-static inline uint16_t applesmc_port(void)
-{
-    Object *obj = object_resolve_path_type("", TYPE_APPLE_SMC, NULL);
-
-    if (obj) {
-        return object_property_get_uint(obj, APPLESMC_PROP_IO_BASE, NULL);
-    }
-    return 0;
-}
-
 #define TYPE_ISADMA "isa-dma"
 
 typedef struct IsaDmaClass IsaDmaClass;
@@ -62,11 +48,6 @@ struct IsaDmaClass {
                              void *opaque);
 };
 
-struct ISADeviceClass {
-    DeviceClass parent_class;
-    void (*build_aml)(ISADevice *dev, Aml *scope);
-};
-
 struct ISABus {
     /*< private >*/
     BusState parent_obj;
@@ -74,7 +55,7 @@ struct ISABus {
 
     MemoryRegion *address_space;
     MemoryRegion *address_space_io;
-    qemu_irq *irqs;
+    qemu_irq *irqs_in;
     IsaDma *dma[2];
 };
 
@@ -83,28 +64,34 @@ struct ISADevice {
     DeviceState parent_obj;
     /*< public >*/
 
-    int8_t isairq[2];      /* -1 = unassigned */
-    int nirqs;
     int ioport_id;
 };
 
 ISABus *isa_bus_new(DeviceState *dev, MemoryRegion *address_space,
                     MemoryRegion *address_space_io, Error **errp);
-void isa_bus_irqs(ISABus *bus, qemu_irq *irqs);
-qemu_irq isa_get_irq(ISADevice *dev, unsigned isairq);
-void isa_init_irq(ISADevice *dev, qemu_irq *p, unsigned isairq);
-void isa_connect_gpio_out(ISADevice *isadev, int gpioirq, unsigned isairq);
+void isa_bus_register_input_irqs(ISABus *bus, qemu_irq *irqs_in);
 void isa_bus_dma(ISABus *bus, IsaDma *dma8, IsaDma *dma16);
-IsaDma *isa_get_dma(ISABus *bus, int nchan);
-MemoryRegion *isa_address_space(ISADevice *dev);
-MemoryRegion *isa_address_space_io(ISADevice *dev);
+IsaDma *isa_bus_get_dma(ISABus *bus, int nchan);
+/**
+ * isa_bus_get_irq: Return input IRQ on ISA bus.
+ * @bus: the #ISABus to plug ISA devices on.
+ * @irqnum: the ISA IRQ number.
+ *
+ * Return IRQ @irqnum from the PIC associated on ISA @bus.
+ */
+qemu_irq isa_bus_get_irq(ISABus *bus, unsigned irqnum);
 ISADevice *isa_new(const char *name);
 ISADevice *isa_try_new(const char *name);
 bool isa_realize_and_unref(ISADevice *dev, ISABus *bus, Error **errp);
 ISADevice *isa_create_simple(ISABus *bus, const char *name);
 
 ISADevice *isa_vga_init(ISABus *bus);
-void isa_build_aml(ISABus *bus, Aml *scope);
+
+qemu_irq isa_get_irq(ISADevice *dev, unsigned isairq);
+void isa_connect_gpio_out(ISADevice *isadev, int gpioirq, unsigned isairq);
+MemoryRegion *isa_address_space(ISADevice *dev);
+MemoryRegion *isa_address_space_io(ISADevice *dev);
+ISABus *isa_bus_from_device(ISADevice *dev);
 
 /**
  * isa_register_ioport: Install an I/O port region on the ISA bus.
@@ -132,18 +119,14 @@ void isa_register_ioport(ISADevice *dev, MemoryRegion *io, uint16_t start);
  * @portio: the ports, sorted by offset.
  * @opaque: passed into the portio callbacks.
  * @name: passed into memory_region_init_io.
+ *
+ * Returns: 0 on success, negative error code otherwise (e.g. if the
+ *          ISA bus is not available)
  */
-void isa_register_portio_list(ISADevice *dev,
-                              PortioList *piolist,
-                              uint16_t start,
-                              const MemoryRegionPortio *portio,
-                              void *opaque, const char *name);
-
-static inline ISABus *isa_bus_from_device(ISADevice *d)
-{
-    return ISA_BUS(qdev_get_parent_bus(DEVICE(d)));
-}
-
-#define TYPE_PIIX4_PCI_DEVICE "piix4-isa"
+int isa_register_portio_list(ISADevice *dev,
+                             PortioList *piolist,
+                             uint16_t start,
+                             const MemoryRegionPortio *portio,
+                             void *opaque, const char *name);
 
 #endif
index b9f5c19155f25c8be4bc14cc33c445024f7a9fe4..0dc45104d472dd69d7612512f141a26cef5cf0a9 100644 (file)
@@ -44,7 +44,7 @@ typedef struct ISASuperIOFuncs {
 
 struct ISASuperIOClass {
     /*< private >*/
-    ISADeviceClass parent_class;
+    DeviceClass parent_class;
     /*< public >*/
     DeviceRealize parent_realize;
 
index f23f45dfb1d0d4df5d43b00a04fd2bd6718d62bb..da1722daf2b5a33f418515b03ce1e5b51dc13ea3 100644 (file)
@@ -1,14 +1,39 @@
 #ifndef HW_VT82C686_H
 #define HW_VT82C686_H
 
+#include "hw/pci/pci_device.h"
+#include "audio/audio.h"
 
-#define TYPE_VT82C686B_SUPERIO "vt82c686b-superio"
+#define TYPE_VT82C686B_ISA "vt82c686b-isa"
+#define TYPE_VT82C686B_USB_UHCI "vt82c686b-usb-uhci"
+#define TYPE_VT8231_ISA "vt8231-isa"
+#define TYPE_VIA_AC97 "via-ac97"
+#define TYPE_VIA_IDE "via-ide"
+#define TYPE_VIA_MC97 "via-mc97"
 
-/* vt82c686.c */
-ISABus *vt82c686b_isa_init(PCIBus * bus, int devfn);
-void vt82c686b_ac97_init(PCIBus *bus, int devfn);
-void vt82c686b_mc97_init(PCIBus *bus, int devfn);
-I2CBus *vt82c686b_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
-                          qemu_irq sci_irq);
+typedef struct {
+    uint8_t stat;
+    uint8_t type;
+    uint32_t base;
+    uint32_t curr;
+    uint32_t addr;
+    uint32_t clen;
+} ViaAC97SGDChannel;
+
+OBJECT_DECLARE_SIMPLE_TYPE(ViaAC97State, VIA_AC97);
+
+struct ViaAC97State {
+    PCIDevice dev;
+    QEMUSoundCard card;
+    MemoryRegion sgd;
+    MemoryRegion fm;
+    MemoryRegion midi;
+    SWVoiceOut *vo;
+    ViaAC97SGDChannel aur;
+    uint16_t codec_regs[128];
+    uint32_t ac97_cmd;
+};
+
+void via_isa_set_irq(PCIDevice *d, int n, int level);
 
 #endif
index a9eeea39521d2d5b5e9b73e0fcbd4d4ce9292046..8685e27334f9c046f651a702bd27a92760c4d211 100644 (file)
@@ -40,8 +40,8 @@ ssize_t load_image_size(const char *filename, void *addr, size_t size);
  *
  * Returns the size of the loaded image on success, -1 otherwise.
  */
-int load_image_targphys_as(const char *filename,
-                           hwaddr addr, uint64_t max_sz, AddressSpace *as);
+ssize_t load_image_targphys_as(const char *filename,
+                               hwaddr addr, uint64_t max_sz, AddressSpace *as);
 
 /**load_targphys_hex_as:
  * @filename: Path to the .hex file
@@ -53,14 +53,15 @@ int load_image_targphys_as(const char *filename,
  *
  * Returns the size of the loaded .hex file on success, -1 otherwise.
  */
-int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as);
+ssize_t load_targphys_hex_as(const char *filename, hwaddr *entry,
+                             AddressSpace *as);
 
 /** load_image_targphys:
  * Same as load_image_targphys_as(), but doesn't allow the caller to specify
  * an AddressSpace.
  */
-int load_image_targphys(const char *filename, hwaddr,
-                        uint64_t max_sz);
+ssize_t load_image_targphys(const char *filename, hwaddr,
+                            uint64_t max_sz);
 
 /**
  * load_image_mr: load an image into a memory region
@@ -73,7 +74,7 @@ int load_image_targphys(const char *filename, hwaddr,
  * If the file is larger than the memory region's size the call will fail.
  * Returns -1 on failure, or the size of the file.
  */
-int load_image_mr(const char *filename, MemoryRegion *mr);
+ssize_t load_image_mr(const char *filename, MemoryRegion *mr);
 
 /* This is the limit on the maximum uncompressed image size that
  * load_image_gzipped_buffer() and load_image_gzipped() will read. It prevents
@@ -81,16 +82,35 @@ int load_image_mr(const char *filename, MemoryRegion *mr);
  */
 #define LOAD_IMAGE_MAX_GUNZIP_BYTES (256 << 20)
 
-int load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
-                              uint8_t **buffer);
-int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
+ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
+                                  uint8_t **buffer);
+ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
+
+/**
+ * unpack_efi_zboot_image:
+ * @buffer: pointer to a variable holding the address of a buffer containing the
+ *          image
+ * @size: pointer to a variable holding the size of the buffer
+ *
+ * Check whether the buffer contains a EFI zboot image, and if it does, extract
+ * the compressed payload and decompress it into a new buffer. If successful,
+ * the old buffer is freed, and the *buffer and size variables pointed to by the
+ * function arguments are updated to refer to the newly populated buffer.
+ *
+ * Returns 0 if the image could not be identified as a EFI zboot image.
+ * Returns -1 if the buffer contents were identified as a EFI zboot image, but
+ * unpacking failed for any reason.
+ * Returns the size of the decompressed payload if decompression was performed
+ * successfully.
+ */
+ssize_t unpack_efi_zboot_image(uint8_t **buffer, int *size);
 
 #define ELF_LOAD_FAILED       -1
 #define ELF_LOAD_NOT_ELF      -2
 #define ELF_LOAD_WRONG_ARCH   -3
 #define ELF_LOAD_WRONG_ENDIAN -4
 #define ELF_LOAD_TOO_BIG      -5
-const char *load_elf_strerror(int error);
+const char *load_elf_strerror(ssize_t error);
 
 /** load_elf_ram_sym:
  * @filename: Path of ELF file
@@ -128,48 +148,48 @@ const char *load_elf_strerror(int error);
 typedef void (*symbol_fn_t)(const char *st_name, int st_info,
                             uint64_t st_value, uint64_t st_size);
 
-int load_elf_ram_sym(const char *filename,
-                     uint64_t (*elf_note_fn)(void *, void *, bool),
-                     uint64_t (*translate_fn)(void *, uint64_t),
-                     void *translate_opaque, uint64_t *pentry,
-                     uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags,
-                     int big_endian, int elf_machine,
-                     int clear_lsb, int data_swab,
-                     AddressSpace *as, bool load_rom, symbol_fn_t sym_cb);
+ssize_t load_elf_ram_sym(const char *filename,
+                         uint64_t (*elf_note_fn)(void *, void *, bool),
+                         uint64_t (*translate_fn)(void *, uint64_t),
+                         void *translate_opaque, uint64_t *pentry,
+                         uint64_t *lowaddr, uint64_t *highaddr,
+                         uint32_t *pflags, int big_endian, int elf_machine,
+                         int clear_lsb, int data_swab,
+                         AddressSpace *as, bool load_rom, symbol_fn_t sym_cb);
 
 /** load_elf_ram:
  * Same as load_elf_ram_sym(), but doesn't allow the caller to specify a
  * symbol callback function
  */
-int load_elf_ram(const char *filename,
-                 uint64_t (*elf_note_fn)(void *, void *, bool),
-                 uint64_t (*translate_fn)(void *, uint64_t),
-                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
-                 uint64_t *highaddr, uint32_t *pflags, int big_endian,
-                 int elf_machine, int clear_lsb, int data_swab,
-                 AddressSpace *as, bool load_rom);
+ssize_t load_elf_ram(const char *filename,
+                     uint64_t (*elf_note_fn)(void *, void *, bool),
+                     uint64_t (*translate_fn)(void *, uint64_t),
+                     void *translate_opaque, uint64_t *pentry,
+                     uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags,
+                     int big_endian, int elf_machine, int clear_lsb,
+                     int data_swab, AddressSpace *as, bool load_rom);
 
 /** load_elf_as:
  * Same as load_elf_ram(), but always loads the elf as ROM
  */
-int load_elf_as(const char *filename,
-                uint64_t (*elf_note_fn)(void *, void *, bool),
-                uint64_t (*translate_fn)(void *, uint64_t),
-                void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
-                uint64_t *highaddr, uint32_t *pflags, int big_endian,
-                int elf_machine, int clear_lsb, int data_swab,
-                AddressSpace *as);
+ssize_t load_elf_as(const char *filename,
+                    uint64_t (*elf_note_fn)(void *, void *, bool),
+                    uint64_t (*translate_fn)(void *, uint64_t),
+                    void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                    uint64_t *highaddr, uint32_t *pflags, int big_endian,
+                    int elf_machine, int clear_lsb, int data_swab,
+                    AddressSpace *as);
 
 /** load_elf:
  * Same as load_elf_as(), but doesn't allow the caller to specify an
  * AddressSpace.
  */
-int load_elf(const char *filename,
-             uint64_t (*elf_note_fn)(void *, void *, bool),
-             uint64_t (*translate_fn)(void *, uint64_t),
-             void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
-             uint64_t *highaddr, uint32_t *pflags, int big_endian,
-             int elf_machine, int clear_lsb, int data_swab);
+ssize_t load_elf(const char *filename,
+                 uint64_t (*elf_note_fn)(void *, void *, bool),
+                 uint64_t (*translate_fn)(void *, uint64_t),
+                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                 uint64_t *highaddr, uint32_t *pflags, int big_endian,
+                 int elf_machine, int clear_lsb, int data_swab);
 
 /** load_elf_hdr:
  * @filename: Path of ELF file
@@ -183,8 +203,8 @@ int load_elf(const char *filename,
  */
 void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp);
 
-int load_aout(const char *filename, hwaddr addr, int max_sz,
-              int bswap_needed, hwaddr target_page_size);
+ssize_t load_aout(const char *filename, hwaddr addr, int max_sz,
+                  int bswap_needed, hwaddr target_page_size);
 
 #define LOAD_UIMAGE_LOADADDR_INVALID (-1)
 
@@ -205,19 +225,19 @@ int load_aout(const char *filename, hwaddr addr, int max_sz,
  *
  * Returns the size of the loaded image on success, -1 otherwise.
  */
-int load_uimage_as(const char *filename, hwaddr *ep,
-                   hwaddr *loadaddr, int *is_linux,
-                   uint64_t (*translate_fn)(void *, uint64_t),
-                   void *translate_opaque, AddressSpace *as);
+ssize_t load_uimage_as(const char *filename, hwaddr *ep,
+                       hwaddr *loadaddr, int *is_linux,
+                       uint64_t (*translate_fn)(void *, uint64_t),
+                       void *translate_opaque, AddressSpace *as);
 
 /** load_uimage:
  * Same as load_uimage_as(), but doesn't allow the caller to specify an
  * AddressSpace.
  */
-int load_uimage(const char *filename, hwaddr *ep,
-                hwaddr *loadaddr, int *is_linux,
-                uint64_t (*translate_fn)(void *, uint64_t),
-                void *translate_opaque);
+ssize_t load_uimage(const char *filename, hwaddr *ep,
+                    hwaddr *loadaddr, int *is_linux,
+                    uint64_t (*translate_fn)(void *, uint64_t),
+                    void *translate_opaque);
 
 /**
  * load_ramdisk_as:
@@ -232,15 +252,15 @@ int load_uimage(const char *filename, hwaddr *ep,
  *
  * Returns the size of the loaded image on success, -1 otherwise.
  */
-int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz,
-                    AddressSpace *as);
+ssize_t load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz,
+                        AddressSpace *as);
 
 /**
  * load_ramdisk:
  * Same as load_ramdisk_as(), but doesn't allow the caller to specify
  * an AddressSpace.
  */
-int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz);
+ssize_t load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz);
 
 ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen);
 
@@ -250,12 +270,9 @@ void pstrcpy_targphys(const char *name,
                       hwaddr dest, int buf_size,
                       const char *source);
 
-extern bool option_rom_has_mr;
-extern bool rom_file_has_mr;
-
-int rom_add_file(const char *file, const char *fw_dir,
-                 hwaddr addr, int32_t bootindex,
-                 bool option_rom, MemoryRegion *mr, AddressSpace *as);
+ssize_t rom_add_file(const char *file, const char *fw_dir,
+                     hwaddr addr, int32_t bootindex,
+                     bool has_option_rom, MemoryRegion *mr, AddressSpace *as);
 MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
                            size_t max_len, hwaddr addr,
                            const char *fw_file_name,
@@ -290,6 +307,37 @@ void rom_transaction_end(bool commit);
 
 int rom_copy(uint8_t *dest, hwaddr addr, size_t size);
 void *rom_ptr(hwaddr addr, size_t size);
+/**
+ * rom_ptr_for_as: Return a pointer to ROM blob data for the address
+ * @as: AddressSpace to look for the ROM blob in
+ * @addr: Address within @as
+ * @size: size of data required in bytes
+ *
+ * Returns: pointer into the data which backs the matching ROM blob,
+ * or NULL if no blob covers the address range.
+ *
+ * This function looks for a ROM blob which covers the specified range
+ * of bytes of length @size starting at @addr within the address space
+ * @as. This is useful for code which runs as part of board
+ * initialization or CPU reset which wants to read data that is part
+ * of a user-supplied guest image or other guest memory contents, but
+ * which runs before the ROM loader's reset function has copied the
+ * blobs into guest memory.
+ *
+ * rom_ptr_for_as() will look not just for blobs loaded directly to
+ * the specified address, but also for blobs which were loaded to an
+ * alias of the region at a different location in the AddressSpace.
+ * In other words, if a machine model has RAM at address 0x0000_0000
+ * which is aliased to also appear at 0x1000_0000, rom_ptr_for_as()
+ * will return the correct data whether the guest image was linked and
+ * loaded at 0x0000_0000 or 0x1000_0000.  Contrast rom_ptr(), which
+ * will only return data if the image load address is an exact match
+ * with the queried address.
+ *
+ * New code should prefer to use rom_ptr_for_as() instead of
+ * rom_ptr().
+ */
+void *rom_ptr_for_as(AddressSpace *as, hwaddr addr, size_t size);
 void hmp_info_roms(Monitor *mon, const QDict *qdict);
 
 #define rom_add_file_fixed(_f, _a, _i)          \
@@ -305,17 +353,25 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict);
 #define rom_add_blob_fixed_as(_f, _b, _l, _a, _as)      \
     rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as, true)
 
-#define PC_ROM_MIN_VGA     0xc0000
-#define PC_ROM_MIN_OPTION  0xc8000
-#define PC_ROM_MAX         0xe0000
-#define PC_ROM_ALIGN       0x800
-#define PC_ROM_SIZE        (PC_ROM_MAX - PC_ROM_MIN_VGA)
-
-int rom_add_vga(const char *file);
-int rom_add_option(const char *file, int32_t bootindex);
+ssize_t rom_add_vga(const char *file);
+ssize_t rom_add_option(const char *file, int32_t bootindex);
 
 /* This is the usual maximum in uboot, so if a uImage overflows this, it would
  * overflow on real hardware too. */
 #define UBOOT_MAX_GUNZIP_BYTES (64 << 20)
 
+typedef struct RomGap {
+    hwaddr base;
+    size_t size;
+} RomGap;
+
+/**
+ * rom_find_largest_gap_between: return largest gap between ROMs in given range
+ *
+ * Given a range of addresses, this function finds the largest
+ * contiguous subrange which has no ROMs loaded to it. That is,
+ * it finds the biggest gap which is free for use for other things.
+ */
+RomGap rom_find_largest_gap_between(hwaddr base, size_t size);
+
 #endif
diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h
new file mode 100644 (file)
index 0000000..674f465
--- /dev/null
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Definitions for loongarch board emulation.
+ *
+ * Copyright (C) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef HW_LOONGARCH_H
+#define HW_LOONGARCH_H
+
+#include "target/loongarch/cpu.h"
+#include "hw/boards.h"
+#include "qemu/queue.h"
+#include "hw/intc/loongarch_ipi.h"
+#include "hw/block/flash.h"
+
+#define LOONGARCH_MAX_CPUS      256
+
+#define VIRT_FWCFG_BASE         0x1e020000UL
+#define VIRT_BIOS_BASE          0x1c000000UL
+#define VIRT_BIOS_SIZE          (4 * MiB)
+#define VIRT_FLASH_SECTOR_SIZE  (128 * KiB)
+#define VIRT_FLASH_BASE         0x1d000000UL
+#define VIRT_FLASH_SIZE         (16 * MiB)
+
+#define VIRT_LOWMEM_BASE        0
+#define VIRT_LOWMEM_SIZE        0x10000000
+#define VIRT_HIGHMEM_BASE       0x90000000
+#define VIRT_GED_EVT_ADDR       0x100e0000
+#define VIRT_GED_MEM_ADDR       (VIRT_GED_EVT_ADDR + ACPI_GED_EVT_SEL_LEN)
+#define VIRT_GED_REG_ADDR       (VIRT_GED_MEM_ADDR + MEMORY_HOTPLUG_IO_LEN)
+
+struct LoongArchMachineState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    MemoryRegion lowmem;
+    MemoryRegion highmem;
+    MemoryRegion bios;
+    bool         bios_loaded;
+    /* State for other subsystems/APIs: */
+    FWCfgState  *fw_cfg;
+    Notifier     machine_done;
+    Notifier     powerdown_notifier;
+    OnOffAuto    acpi;
+    char         *oem_id;
+    char         *oem_table_id;
+    DeviceState  *acpi_ged;
+    int          fdt_size;
+    DeviceState *platform_bus_dev;
+    PCIBus       *pci_bus;
+    PFlashCFI01  *flash;
+};
+
+#define TYPE_LOONGARCH_MACHINE  MACHINE_TYPE_NAME("virt")
+OBJECT_DECLARE_SIMPLE_TYPE(LoongArchMachineState, LOONGARCH_MACHINE)
+bool loongarch_is_acpi_enabled(LoongArchMachineState *lams);
+void loongarch_acpi_setup(LoongArchMachineState *lams);
+#endif
diff --git a/include/hw/m68k/mcf.h b/include/hw/m68k/mcf.h
new file mode 100644 (file)
index 0000000..5d9f876
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef HW_MCF_H
+#define HW_MCF_H
+/* Motorola ColdFire device prototypes.  */
+
+#include "exec/hwaddr.h"
+#include "target/m68k/cpu-qom.h"
+
+/* mcf_uart.c */
+uint64_t mcf_uart_read(void *opaque, hwaddr addr,
+                       unsigned size);
+void mcf_uart_write(void *opaque, hwaddr addr,
+                    uint64_t val, unsigned size);
+DeviceState *mcf_uart_create(qemu_irq irq, Chardev *chr);
+DeviceState *mcf_uart_create_mmap(hwaddr base, qemu_irq irq, Chardev *chr);
+
+/* mcf_intc.c */
+qemu_irq *mcf_intc_init(struct MemoryRegion *sysmem,
+                        hwaddr base,
+                        M68kCPU *cpu);
+
+/* mcf5206.c */
+#define TYPE_MCF5206_MBAR "mcf5206-mbar"
+
+#endif
diff --git a/include/hw/m68k/mcf_fec.h b/include/hw/m68k/mcf_fec.h
new file mode 100644 (file)
index 0000000..80d4f65
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * Definitions for the ColdFire Fast Ethernet Controller emulation.
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef HW_M68K_MCF_FEC_H
+#define HW_M68K_MCF_FEC_H
+#include "qom/object.h"
+
+#define TYPE_MCF_FEC_NET "mcf-fec"
+OBJECT_DECLARE_SIMPLE_TYPE(mcf_fec_state, MCF_FEC_NET)
+
+#define FEC_NUM_IRQ 13
+
+#endif
diff --git a/include/hw/m68k/next-cube.h b/include/hw/m68k/next-cube.h
new file mode 100644 (file)
index 0000000..4357728
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * NeXT Cube
+ *
+ * Copyright (c) 2011 Bryce Lanham
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ */
+
+#ifndef NEXT_CUBE_H
+#define NEXT_CUBE_H
+
+#define TYPE_NEXTFB "next-fb"
+
+#define TYPE_NEXTKBD "next-kbd"
+
+enum next_dma_chan {
+    NEXTDMA_FD,
+    NEXTDMA_ENRX,
+    NEXTDMA_ENTX,
+    NEXTDMA_SCSI,
+    NEXTDMA_SCC,
+    NEXTDMA_SND
+};
+
+#define DMA_ENABLE      0x01000000
+#define DMA_SUPDATE     0x02000000
+#define DMA_COMPLETE    0x08000000
+
+#define DMA_M2DEV       0x0
+#define DMA_SETENABLE   0x00010000
+#define DMA_SETSUPDATE  0x00020000
+#define DMA_DEV2M       0x00040000
+#define DMA_CLRCOMPLETE 0x00080000
+#define DMA_RESET       0x00100000
+
+enum next_irqs {
+    NEXT_FD_I,
+    NEXT_KBD_I,
+    NEXT_PWR_I,
+    NEXT_ENRX_I,
+    NEXT_ENTX_I,
+    NEXT_SCSI_I,
+    NEXT_CLK_I,
+    NEXT_SCC_I,
+    NEXT_ENTX_DMA_I,
+    NEXT_ENRX_DMA_I,
+    NEXT_SCSI_DMA_I,
+    NEXT_SCC_DMA_I,
+    NEXT_SND_I,
+    NEXT_NUM_IRQS
+};
+
+#endif /* NEXT_CUBE_H */
diff --git a/include/hw/m68k/q800-glue.h b/include/hw/m68k/q800-glue.h
new file mode 100644 (file)
index 0000000..ceb916d
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * QEMU q800 logic GLUE (General Logic Unit)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_Q800_GLUE_H
+#define HW_Q800_GLUE_H
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+
+#define TYPE_GLUE "q800-glue"
+OBJECT_DECLARE_SIMPLE_TYPE(GLUEState, GLUE)
+
+struct GLUEState {
+    SysBusDevice parent_obj;
+
+    M68kCPU *cpu;
+    uint8_t ipr;
+    uint8_t auxmode;
+    qemu_irq irqs[2];
+    QEMUTimer *nmi_release;
+};
+
+#define GLUE_IRQ_IN_VIA1       0
+#define GLUE_IRQ_IN_VIA2       1
+#define GLUE_IRQ_IN_SONIC      2
+#define GLUE_IRQ_IN_ESCC       3
+#define GLUE_IRQ_IN_NMI        4
+#define GLUE_IRQ_IN_ASC        5
+
+#define GLUE_IRQ_NUBUS_9       0
+#define GLUE_IRQ_ASC           1
+
+#endif
diff --git a/include/hw/m68k/q800.h b/include/hw/m68k/q800.h
new file mode 100644 (file)
index 0000000..a9661f6
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * QEMU Motorla 680x0 Macintosh hardware System Emulator
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_Q800_H
+#define HW_Q800_H
+
+#include "hw/boards.h"
+#include "qom/object.h"
+#include "target/m68k/cpu-qom.h"
+#include "exec/memory.h"
+#include "hw/m68k/q800-glue.h"
+#include "hw/misc/mac_via.h"
+#include "hw/net/dp8393x.h"
+#include "hw/char/escc.h"
+#include "hw/or-irq.h"
+#include "hw/scsi/esp.h"
+#include "hw/block/swim.h"
+#include "hw/nubus/mac-nubus-bridge.h"
+#include "hw/display/macfb.h"
+#include "hw/misc/djmemc.h"
+#include "hw/misc/iosb.h"
+#include "hw/audio/asc.h"
+
+/*
+ * The main Q800 machine
+ */
+
+struct Q800MachineState {
+    MachineState parent_obj;
+
+    bool easc;
+    M68kCPU cpu;
+    MemoryRegion rom;
+    MemoryRegion rom_alias;
+    GLUEState glue;
+    MOS6522Q800VIA1State via1;
+    MOS6522Q800VIA2State via2;
+    dp8393xState dp8393x;
+    ESCCState escc;
+    OrIRQState escc_orgate;
+    SysBusESPState esp;
+    Swim swim;
+    MacNubusBridge mac_nubus_bridge;
+    MacfbNubusState macfb;
+    DJMEMCState djmemc;
+    IOSBState iosb;
+    ASCState asc;
+    MemoryRegion ramio;
+    MemoryRegion macio;
+    MemoryRegion macio_alias;
+    MemoryRegion machine_id;
+    MemoryRegion escc_alias;
+};
+
+#define TYPE_Q800_MACHINE MACHINE_TYPE_NAME("q800")
+OBJECT_DECLARE_SIMPLE_TYPE(Q800MachineState, Q800_MACHINE)
+
+#endif
index 48d2611fc5eff8911040e0e4fa19dbd533e97616..a1d62cc551ab592941033ecb1c221d332ccb410e 100644 (file)
@@ -14,6 +14,7 @@
 #define MEMORY_DEVICE_H
 
 #include "hw/qdev-core.h"
+#include "qemu/typedefs.h"
 #include "qapi/qapi-types-machine.h"
 #include "qom/object.h"
 
@@ -37,10 +38,25 @@ typedef struct MemoryDeviceState MemoryDeviceState;
  * address in guest physical memory can either be specified explicitly
  * or get assigned automatically.
  *
+ * Some memory device might not own a memory region in certain device
+ * configurations. Such devices can logically get (un)plugged, however,
+ * empty memory devices are mostly ignored by the memory device code.
+ *
  * Conceptually, memory devices only span one memory region. If multiple
  * successive memory regions are used, a covering memory region has to
  * be provided. Scattered memory regions are not supported for single
  * devices.
+ *
+ * The device memory region returned via @get_memory_region may either be a
+ * single RAM memory region or a memory region container with subregions
+ * that are RAM memory regions or aliases to RAM memory regions. Other
+ * memory regions or subregions are not supported.
+ *
+ * If the device memory region returned via @get_memory_region is a
+ * memory region container, it's supported to dynamically (un)map subregions
+ * as long as the number of memslots returned by @get_memslots() won't
+ * be exceeded and as long as all memory regions are of the same kind (e.g.,
+ * all RAM or all ROM).
  */
 struct MemoryDeviceClass {
     /* private */
@@ -79,7 +95,8 @@ struct MemoryDeviceClass {
     uint64_t (*get_plugged_size)(const MemoryDeviceState *md, Error **errp);
 
     /*
-     * Return the memory region of the memory device.
+     * Return the memory region of the memory device. If the device is
+     * completely empty, returns NULL without an error.
      *
      * Called when (un)plugging the memory device, to (un)map the
      * memory region in guest physical memory, but also to detect the
@@ -88,6 +105,28 @@ struct MemoryDeviceClass {
      */
     MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp);
 
+    /*
+     * Optional: Instruct the memory device to decide how many memory slots
+     * it requires, not exceeding the given limit.
+     *
+     * Called exactly once when pre-plugging the memory device, before
+     * querying the number of memslots using @get_memslots the first time.
+     */
+    void (*decide_memslots)(MemoryDeviceState *md, unsigned int limit);
+
+    /*
+     * Optional for memory devices that require only a single memslot,
+     * required for all other memory devices: Return the number of memslots
+     * (distinct RAM memory regions in the device memory region) that are
+     * required by the device.
+     *
+     * If this function is not implemented, the assumption is "1".
+     *
+     * Called when (un)plugging the memory device, to check if the requirements
+     * can be satisfied, and to do proper accounting.
+     */
+    unsigned int (*get_memslots)(MemoryDeviceState *md);
+
     /*
      * Optional: Return the desired minimum alignment of the device in guest
      * physical address space. The final alignment is computed based on this
@@ -105,8 +144,31 @@ struct MemoryDeviceClass {
                              MemoryDeviceInfo *info);
 };
 
+/*
+ * Traditionally, KVM/vhost in many setups supported 509 memslots, whereby
+ * 253 memslots were "reserved" for boot memory and other devices (such
+ * as PCI BARs, which can get mapped dynamically) and 256 memslots were
+ * dedicated for DIMMs. These magic numbers worked reliably in the past.
+ *
+ * Further, using many memslots can negatively affect performance, so setting
+ * the soft-limit of memslots used by memory devices to the traditional
+ * DIMM limit of 256 sounds reasonable.
+ *
+ * If we have less than 509 memslots, we will instruct memory devices that
+ * support automatically deciding how many memslots to use to only use a single
+ * one.
+ *
+ * Hotplugging vhost devices with at least 509 memslots is not expected to
+ * cause problems, not even when devices automatically decided how many memslots
+ * to use.
+ */
+#define MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT 256
+#define MEMORY_DEVICES_SAFE_MAX_MEMSLOTS 509
+
 MemoryDeviceInfoList *qmp_memory_device_list(void);
 uint64_t get_plugged_memory_size(void);
+unsigned int memory_devices_get_reserved_memslots(void);
+bool memory_devices_memslot_auto_decision_active(void);
 void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
                             const uint64_t *legacy_align, Error **errp);
 void memory_device_plug(MemoryDeviceState *md, MachineState *ms);
index c699842dd05f920baa777fbf1db47bf7e6c131b2..d3b763453a5aa309a1010681f5a0477cc2ce0556 100644 (file)
 #include "hw/acpi/aml-build.h"
 #include "qom/object.h"
 
-#define NVDIMM_DEBUG 0
-#define nvdimm_debug(fmt, ...)                                \
-    do {                                                      \
-        if (NVDIMM_DEBUG) {                                   \
-            fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__);  \
-        }                                                     \
-    } while (0)
-
 /*
  * The minimum label data size is required by NVDIMM Namespace
  * specification, see the chapter 2 Namespaces:
@@ -85,6 +77,12 @@ struct NVDIMMDevice {
      */
     bool unarmed;
 
+    /*
+     * Whether our DIMM is backed by ROM, and even label data cannot be
+     * written. If set, implies that "unarmed" is also set.
+     */
+    bool readonly;
+
     /*
      * The PPC64 - spapr requires each nvdimm device have a uuid.
      */
@@ -103,6 +101,8 @@ struct NVDIMMClass {
     /* write @size bytes from @buf to NVDIMM label data at @offset. */
     void (*write_label_data)(NVDIMMDevice *nvdimm, const void *buf,
                              uint64_t size, uint64_t offset);
+    void (*realize)(NVDIMMDevice *nvdimm, Error **errp);
+    void (*unrealize)(NVDIMMDevice *nvdimm);
 };
 
 #define NVDIMM_DSM_MEM_FILE     "etc/acpi/nvdimm-mem"
@@ -154,7 +154,8 @@ void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io,
 void nvdimm_build_srat(GArray *table_data);
 void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data,
                        BIOSLinker *linker, NVDIMMState *state,
-                       uint32_t ram_slots);
+                       uint32_t ram_slots, const char *oem_id,
+                       const char *oem_table_id);
 void nvdimm_plug(NVDIMMState *state);
 void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev);
 #endif
index 3d3db82641f8332108d2c6d282ae246400ab7c66..322bebe555b5f346b0ae7755e0396dbf706dc2f5 100644 (file)
@@ -56,9 +56,6 @@ struct PCDIMMDevice {
  * PCDIMMDeviceClass:
  * @realize: called after common dimm is realized so that the dimm based
  * devices get the chance to do specified operations.
- * @get_vmstate_memory_region: returns #MemoryRegion which indicates the
- * memory of @dimm should be kept during live migration. Will not fail
- * after the device was realized.
  */
 struct PCDIMMDeviceClass {
     /* private */
@@ -66,8 +63,7 @@ struct PCDIMMDeviceClass {
 
     /* public */
     void (*realize)(PCDIMMDevice *dimm, Error **errp);
-    MemoryRegion *(*get_vmstate_memory_region)(PCDIMMDevice *dimm,
-                                               Error **errp);
+    void (*unrealize)(PCDIMMDevice *dimm);
 };
 
 void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine,
diff --git a/include/hw/mem/sparse-mem.h b/include/hw/mem/sparse-mem.h
new file mode 100644 (file)
index 0000000..f9863b1
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * A sparse memory device. Useful for fuzzing
+ *
+ * Copyright Red Hat Inc., 2021
+ *
+ * Authors:
+ *  Alexander Bulekov   <alxndr@bu.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef SPARSE_MEM_H
+#define SPARSE_MEM_H
+#define TYPE_SPARSE_MEM "sparse-mem"
+
+MemoryRegion *sparse_mem_init(uint64_t addr, uint64_t length);
+
+#endif
diff --git a/include/hw/mips/bios.h b/include/hw/mips/bios.h
new file mode 100644 (file)
index 0000000..44acb68
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef HW_MIPS_BIOS_H
+#define HW_MIPS_BIOS_H
+
+#include "qemu/units.h"
+#include "cpu.h"
+
+#define BIOS_SIZE (4 * MiB)
+#if TARGET_BIG_ENDIAN
+#define BIOS_FILENAME "mips_bios.bin"
+#else
+#define BIOS_FILENAME "mipsel_bios.bin"
+#endif
+
+#endif
diff --git a/include/hw/mips/bootloader.h b/include/hw/mips/bootloader.h
new file mode 100644 (file)
index 0000000..c32f6c2
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Utility for QEMU MIPS to generate it's simple bootloader
+ *
+ * Copyright (C) 2020 Jiaxun Yang <jiaxun.yang@flygoat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_MIPS_BOOTLOADER_H
+#define HW_MIPS_BOOTLOADER_H
+
+#include "exec/cpu-defs.h"
+
+void bl_gen_jump_to(void **ptr, target_ulong jump_addr);
+void bl_gen_jump_kernel(void **ptr,
+                        bool set_sp, target_ulong sp,
+                        bool set_a0, target_ulong a0,
+                        bool set_a1, target_ulong a1,
+                        bool set_a2, target_ulong a2,
+                        bool set_a3, target_ulong a3,
+                        target_ulong kernel_addr);
+void bl_gen_write_ulong(void **ptr, target_ulong addr, target_ulong val);
+void bl_gen_write_u32(void **ptr, target_ulong addr, uint32_t val);
+void bl_gen_write_u64(void **ptr, target_ulong addr, uint64_t val);
+
+#endif
diff --git a/include/hw/mips/cps.h b/include/hw/mips/cps.h
new file mode 100644 (file)
index 0000000..04d6362
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Coherent Processing System emulation.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MIPS_CPS_H
+#define MIPS_CPS_H
+
+#include "hw/sysbus.h"
+#include "hw/clock.h"
+#include "hw/misc/mips_cmgcr.h"
+#include "hw/intc/mips_gic.h"
+#include "hw/misc/mips_cpc.h"
+#include "hw/misc/mips_itu.h"
+#include "target/mips/cpu.h"
+#include "qom/object.h"
+
+#define TYPE_MIPS_CPS "mips-cps"
+OBJECT_DECLARE_SIMPLE_TYPE(MIPSCPSState, MIPS_CPS)
+
+struct MIPSCPSState {
+    SysBusDevice parent_obj;
+
+    uint32_t num_vp;
+    uint32_t num_irq;
+    char *cpu_type;
+
+    MemoryRegion container;
+    MIPSGCRState gcr;
+    MIPSGICState gic;
+    MIPSCPCState cpc;
+    MIPSITUState itu;
+    Clock *clock;
+};
+
+qemu_irq get_cps_irq(MIPSCPSState *cps, int pin_number);
+
+#endif
diff --git a/include/hw/mips/mips.h b/include/hw/mips/mips.h
new file mode 100644 (file)
index 0000000..101799f
--- /dev/null
@@ -0,0 +1,22 @@
+#ifndef HW_MIPS_H
+#define HW_MIPS_H
+/* Definitions for mips board emulation.  */
+
+#include "qemu/units.h"
+
+/* Kernels can be configured with 64KB pages */
+#define INITRD_PAGE_SIZE (64 * KiB)
+
+#include "exec/memory.h"
+
+/* bonito.c */
+PCIBus *bonito_init(qemu_irq *pic);
+
+/* rc4030.c */
+typedef struct rc4030DMAState *rc4030_dma;
+void rc4030_dma_read(void *dma, uint8_t *buf, int len);
+void rc4030_dma_write(void *dma, uint8_t *buf, int len);
+
+DeviceState *rc4030_init(rc4030_dma **dmas, IOMMUMemoryRegion **dma_mr);
+
+#endif
diff --git a/include/hw/misc/a9scu.h b/include/hw/misc/a9scu.h
new file mode 100644 (file)
index 0000000..c3759fb
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Cortex-A9MPCore Snoop Control Unit (SCU) emulation.
+ *
+ * Copyright (c) 2009 CodeSourcery.
+ * Copyright (c) 2011 Linaro Limited.
+ * Written by Paul Brook, Peter Maydell.
+ *
+ * This code is licensed under the GPL.
+ */
+#ifndef HW_MISC_A9SCU_H
+#define HW_MISC_A9SCU_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+/* A9MP private memory region.  */
+
+struct A9SCUState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion iomem;
+    uint32_t control;
+    uint32_t status;
+    uint32_t num_cpu;
+};
+
+#define TYPE_A9_SCU "a9-scu"
+OBJECT_DECLARE_SIMPLE_TYPE(A9SCUState, A9_SCU)
+
+#endif
diff --git a/include/hw/misc/allwinner-a10-ccm.h b/include/hw/misc/allwinner-a10-ccm.h
new file mode 100644 (file)
index 0000000..7f22532
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Allwinner A10 Clock Control Module emulation
+ *
+ * Copyright (C) 2022 Strahinja Jankovic <strahinja.p.jankovic@gmail.com>
+ *
+ *  This file is derived from Allwinner H3 CCU,
+ *  by Niek Linnenbank.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MISC_ALLWINNER_A10_CCM_H
+#define HW_MISC_ALLWINNER_A10_CCM_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+
+/**
+ * @name Constants
+ * @{
+ */
+
+/** Size of register I/O address space used by CCM device */
+#define AW_A10_CCM_IOSIZE        (0x400)
+
+/** Total number of known registers */
+#define AW_A10_CCM_REGS_NUM      (AW_A10_CCM_IOSIZE / sizeof(uint32_t))
+
+/** @} */
+
+/**
+ * @name Object model
+ * @{
+ */
+
+#define TYPE_AW_A10_CCM    "allwinner-a10-ccm"
+OBJECT_DECLARE_SIMPLE_TYPE(AwA10ClockCtlState, AW_A10_CCM)
+
+/** @} */
+
+/**
+ * Allwinner A10 CCM object instance state.
+ */
+struct AwA10ClockCtlState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    /** Maps I/O registers in physical memory */
+    MemoryRegion iomem;
+
+    /** Array of hardware registers */
+    uint32_t regs[AW_A10_CCM_REGS_NUM];
+};
+
+#endif /* HW_MISC_ALLWINNER_H3_CCU_H */
diff --git a/include/hw/misc/allwinner-a10-dramc.h b/include/hw/misc/allwinner-a10-dramc.h
new file mode 100644 (file)
index 0000000..b61fbec
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Allwinner A10 DRAM Controller emulation
+ *
+ * Copyright (C) 2022 Strahinja Jankovic <strahinja.p.jankovic@gmail.com>
+ *
+ *  This file is derived from Allwinner H3 DRAMC,
+ *  by Niek Linnenbank.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MISC_ALLWINNER_A10_DRAMC_H
+#define HW_MISC_ALLWINNER_A10_DRAMC_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+
+/**
+ * @name Constants
+ * @{
+ */
+
+/** Size of register I/O address space used by DRAMC device */
+#define AW_A10_DRAMC_IOSIZE        (0x1000)
+
+/** Total number of known registers */
+#define AW_A10_DRAMC_REGS_NUM      (AW_A10_DRAMC_IOSIZE / sizeof(uint32_t))
+
+/** @} */
+
+/**
+ * @name Object model
+ * @{
+ */
+
+#define TYPE_AW_A10_DRAMC    "allwinner-a10-dramc"
+OBJECT_DECLARE_SIMPLE_TYPE(AwA10DramControllerState, AW_A10_DRAMC)
+
+/** @} */
+
+/**
+ * Allwinner A10 DRAMC object instance state.
+ */
+struct AwA10DramControllerState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    /** Maps I/O registers in physical memory */
+    MemoryRegion iomem;
+
+    /** Array of hardware registers */
+    uint32_t regs[AW_A10_DRAMC_REGS_NUM];
+};
+
+#endif /* HW_MISC_ALLWINNER_A10_DRAMC_H */
diff --git a/include/hw/misc/allwinner-cpucfg.h b/include/hw/misc/allwinner-cpucfg.h
new file mode 100644 (file)
index 0000000..a717b47
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Allwinner CPU Configuration Module emulation
+ *
+ * Copyright (C) 2019 Niek Linnenbank <nieklinnenbank@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MISC_ALLWINNER_CPUCFG_H
+#define HW_MISC_ALLWINNER_CPUCFG_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+
+/**
+ * Object model
+ * @{
+ */
+
+#define TYPE_AW_CPUCFG   "allwinner-cpucfg"
+OBJECT_DECLARE_SIMPLE_TYPE(AwCpuCfgState, AW_CPUCFG)
+
+/** @} */
+
+/**
+ * Allwinner CPU Configuration Module instance state
+ */
+struct AwCpuCfgState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion iomem;
+    uint32_t gen_ctrl;
+    uint32_t super_standby;
+    uint32_t entry_addr;
+
+};
+
+#endif /* HW_MISC_ALLWINNER_CPUCFG_H */
diff --git a/include/hw/misc/allwinner-h3-ccu.h b/include/hw/misc/allwinner-h3-ccu.h
new file mode 100644 (file)
index 0000000..a04875b
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Allwinner H3 Clock Control Unit emulation
+ *
+ * Copyright (C) 2019 Niek Linnenbank <nieklinnenbank@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MISC_ALLWINNER_H3_CCU_H
+#define HW_MISC_ALLWINNER_H3_CCU_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+
+/**
+ * @name Constants
+ * @{
+ */
+
+/** Size of register I/O address space used by CCU device */
+#define AW_H3_CCU_IOSIZE        (0x400)
+
+/** Total number of known registers */
+#define AW_H3_CCU_REGS_NUM      (AW_H3_CCU_IOSIZE / sizeof(uint32_t))
+
+/** @} */
+
+/**
+ * @name Object model
+ * @{
+ */
+
+#define TYPE_AW_H3_CCU    "allwinner-h3-ccu"
+OBJECT_DECLARE_SIMPLE_TYPE(AwH3ClockCtlState, AW_H3_CCU)
+
+/** @} */
+
+/**
+ * Allwinner H3 CCU object instance state.
+ */
+struct AwH3ClockCtlState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    /** Maps I/O registers in physical memory */
+    MemoryRegion iomem;
+
+    /** Array of hardware registers */
+    uint32_t regs[AW_H3_CCU_REGS_NUM];
+
+};
+
+#endif /* HW_MISC_ALLWINNER_H3_CCU_H */
diff --git a/include/hw/misc/allwinner-h3-dramc.h b/include/hw/misc/allwinner-h3-dramc.h
new file mode 100644 (file)
index 0000000..0b6c877
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Allwinner H3 SDRAM Controller emulation
+ *
+ * Copyright (C) 2019 Niek Linnenbank <nieklinnenbank@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MISC_ALLWINNER_H3_DRAMC_H
+#define HW_MISC_ALLWINNER_H3_DRAMC_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+#include "exec/hwaddr.h"
+
+/**
+ * Constants
+ * @{
+ */
+
+/** Highest register address used by DRAMCOM module */
+#define AW_H3_DRAMCOM_REGS_MAXADDR  (0x804)
+
+/** Total number of known DRAMCOM registers */
+#define AW_H3_DRAMCOM_REGS_NUM      (AW_H3_DRAMCOM_REGS_MAXADDR / \
+                                     sizeof(uint32_t))
+
+/** Highest register address used by DRAMCTL module */
+#define AW_H3_DRAMCTL_REGS_MAXADDR  (0x88c)
+
+/** Total number of known DRAMCTL registers */
+#define AW_H3_DRAMCTL_REGS_NUM      (AW_H3_DRAMCTL_REGS_MAXADDR / \
+                                     sizeof(uint32_t))
+
+/** Highest register address used by DRAMPHY module */
+#define AW_H3_DRAMPHY_REGS_MAXADDR  (0x4)
+
+/** Total number of known DRAMPHY registers */
+#define AW_H3_DRAMPHY_REGS_NUM      (AW_H3_DRAMPHY_REGS_MAXADDR / \
+                                     sizeof(uint32_t))
+
+/** @} */
+
+/**
+ * Object model
+ * @{
+ */
+
+#define TYPE_AW_H3_DRAMC "allwinner-h3-dramc"
+OBJECT_DECLARE_SIMPLE_TYPE(AwH3DramCtlState, AW_H3_DRAMC)
+
+/** @} */
+
+/**
+ * Allwinner H3 SDRAM Controller object instance state.
+ */
+struct AwH3DramCtlState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    /** Physical base address for start of RAM */
+    hwaddr ram_addr;
+
+    /** Total RAM size in megabytes */
+    uint32_t ram_size;
+
+    /**
+     * @name Memory Regions
+     * @{
+     */
+
+    MemoryRegion row_mirror;       /**< Simulates rows for RAM size detection */
+    MemoryRegion row_mirror_alias; /**< Alias of the row which is mirrored */
+    MemoryRegion dramcom_iomem;    /**< DRAMCOM module I/O registers */
+    MemoryRegion dramctl_iomem;    /**< DRAMCTL module I/O registers */
+    MemoryRegion dramphy_iomem;    /**< DRAMPHY module I/O registers */
+
+    /** @} */
+
+    /**
+     * @name Hardware Registers
+     * @{
+     */
+
+    uint32_t dramcom[AW_H3_DRAMCOM_REGS_NUM]; /**< Array of DRAMCOM registers */
+    uint32_t dramctl[AW_H3_DRAMCTL_REGS_NUM]; /**< Array of DRAMCTL registers */
+    uint32_t dramphy[AW_H3_DRAMPHY_REGS_NUM] ;/**< Array of DRAMPHY registers */
+
+    /** @} */
+
+};
+
+#endif /* HW_MISC_ALLWINNER_H3_DRAMC_H */
diff --git a/include/hw/misc/allwinner-h3-sysctrl.h b/include/hw/misc/allwinner-h3-sysctrl.h
new file mode 100644 (file)
index 0000000..ec1c220
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Allwinner H3 System Control emulation
+ *
+ * Copyright (C) 2019 Niek Linnenbank <nieklinnenbank@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MISC_ALLWINNER_H3_SYSCTRL_H
+#define HW_MISC_ALLWINNER_H3_SYSCTRL_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+
+/**
+ * @name Constants
+ * @{
+ */
+
+/** Highest register address used by System Control device */
+#define AW_H3_SYSCTRL_REGS_MAXADDR   (0x30)
+
+/** Total number of known registers */
+#define AW_H3_SYSCTRL_REGS_NUM       ((AW_H3_SYSCTRL_REGS_MAXADDR / \
+                                      sizeof(uint32_t)) + 1)
+
+/** @} */
+
+/**
+ * @name Object model
+ * @{
+ */
+
+#define TYPE_AW_H3_SYSCTRL    "allwinner-h3-sysctrl"
+OBJECT_DECLARE_SIMPLE_TYPE(AwH3SysCtrlState, AW_H3_SYSCTRL)
+
+/** @} */
+
+/**
+ * Allwinner H3 System Control object instance state
+ */
+struct AwH3SysCtrlState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    /** Maps I/O registers in physical memory */
+    MemoryRegion iomem;
+
+    /** Array of hardware registers */
+    uint32_t regs[AW_H3_SYSCTRL_REGS_NUM];
+
+};
+
+#endif /* HW_MISC_ALLWINNER_H3_SYSCTRL_H */
diff --git a/include/hw/misc/allwinner-r40-ccu.h b/include/hw/misc/allwinner-r40-ccu.h
new file mode 100644 (file)
index 0000000..ceb74ef
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Allwinner R40 Clock Control Unit emulation
+ *
+ * Copyright (C) 2023 qianfan Zhao <qianfanguijin@163.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MISC_ALLWINNER_R40_CCU_H
+#define HW_MISC_ALLWINNER_R40_CCU_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+
+/**
+ * @name Constants
+ * @{
+ */
+
+/** Size of register I/O address space used by CCU device */
+#define AW_R40_CCU_IOSIZE        (0x400)
+
+/** Total number of known registers */
+#define AW_R40_CCU_REGS_NUM      (AW_R40_CCU_IOSIZE / sizeof(uint32_t))
+
+/** @} */
+
+/**
+ * @name Object model
+ * @{
+ */
+
+#define TYPE_AW_R40_CCU    "allwinner-r40-ccu"
+OBJECT_DECLARE_SIMPLE_TYPE(AwR40ClockCtlState, AW_R40_CCU)
+
+/** @} */
+
+/**
+ * Allwinner R40 CCU object instance state.
+ */
+struct AwR40ClockCtlState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    /** Maps I/O registers in physical memory */
+    MemoryRegion iomem;
+
+    /** Array of hardware registers */
+    uint32_t regs[AW_R40_CCU_REGS_NUM];
+
+};
+
+#endif /* HW_MISC_ALLWINNER_R40_CCU_H */
diff --git a/include/hw/misc/allwinner-r40-dramc.h b/include/hw/misc/allwinner-r40-dramc.h
new file mode 100644 (file)
index 0000000..6a1a3a7
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Allwinner R40 SDRAM Controller emulation
+ *
+ * Copyright (C) 2023 qianfan Zhao <qianfanguijin@163.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MISC_ALLWINNER_R40_DRAMC_H
+#define HW_MISC_ALLWINNER_R40_DRAMC_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+#include "exec/hwaddr.h"
+
+/**
+ * Constants
+ * @{
+ */
+
+/** Highest register address used by DRAMCOM module */
+#define AW_R40_DRAMCOM_REGS_MAXADDR  (0x804)
+
+/** Total number of known DRAMCOM registers */
+#define AW_R40_DRAMCOM_REGS_NUM      (AW_R40_DRAMCOM_REGS_MAXADDR / \
+                                     sizeof(uint32_t))
+
+/** Highest register address used by DRAMCTL module */
+#define AW_R40_DRAMCTL_REGS_MAXADDR  (0x88c)
+
+/** Total number of known DRAMCTL registers */
+#define AW_R40_DRAMCTL_REGS_NUM      (AW_R40_DRAMCTL_REGS_MAXADDR / \
+                                     sizeof(uint32_t))
+
+/** Highest register address used by DRAMPHY module */
+#define AW_R40_DRAMPHY_REGS_MAXADDR  (0x4)
+
+/** Total number of known DRAMPHY registers */
+#define AW_R40_DRAMPHY_REGS_NUM      (AW_R40_DRAMPHY_REGS_MAXADDR / \
+                                     sizeof(uint32_t))
+
+/** @} */
+
+/**
+ * Object model
+ * @{
+ */
+
+#define TYPE_AW_R40_DRAMC "allwinner-r40-dramc"
+OBJECT_DECLARE_SIMPLE_TYPE(AwR40DramCtlState, AW_R40_DRAMC)
+
+/** @} */
+
+/**
+ * Allwinner R40 SDRAM Controller object instance state.
+ */
+struct AwR40DramCtlState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    /** Physical base address for start of RAM */
+    hwaddr ram_addr;
+
+    /** Total RAM size in megabytes */
+    uint32_t ram_size;
+
+    uint8_t set_row_bits;
+    uint8_t set_bank_bits;
+    uint8_t set_col_bits;
+
+    /**
+     * @name Memory Regions
+     * @{
+     */
+    MemoryRegion dramcom_iomem;    /**< DRAMCOM module I/O registers */
+    MemoryRegion dramctl_iomem;    /**< DRAMCTL module I/O registers */
+    MemoryRegion dramphy_iomem;    /**< DRAMPHY module I/O registers */
+    MemoryRegion dram_high;        /**< The high 1G dram for dualrank detect */
+    MemoryRegion detect_cells;     /**< DRAM memory cells for auto detect */
+
+    /** @} */
+
+    /**
+     * @name Hardware Registers
+     * @{
+     */
+
+    uint32_t dramcom[AW_R40_DRAMCOM_REGS_NUM]; /**< DRAMCOM registers */
+    uint32_t dramctl[AW_R40_DRAMCTL_REGS_NUM]; /**< DRAMCTL registers */
+    uint32_t dramphy[AW_R40_DRAMPHY_REGS_NUM] ;/**< DRAMPHY registers */
+
+    /** @} */
+
+};
+
+#endif /* HW_MISC_ALLWINNER_R40_DRAMC_H */
diff --git a/include/hw/misc/allwinner-sid.h b/include/hw/misc/allwinner-sid.h
new file mode 100644 (file)
index 0000000..3bfa887
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Allwinner Security ID emulation
+ *
+ * Copyright (C) 2019 Niek Linnenbank <nieklinnenbank@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MISC_ALLWINNER_SID_H
+#define HW_MISC_ALLWINNER_SID_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+#include "qemu/uuid.h"
+
+/**
+ * Object model
+ * @{
+ */
+
+#define TYPE_AW_SID    "allwinner-sid"
+OBJECT_DECLARE_SIMPLE_TYPE(AwSidState, AW_SID)
+
+/** @} */
+
+/**
+ * Allwinner Security ID object instance state
+ */
+struct AwSidState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    /** Maps I/O registers in physical memory */
+    MemoryRegion iomem;
+
+    /** Control register defines how and what to read */
+    uint32_t control;
+
+    /** RdKey register contains the data retrieved by the device */
+    uint32_t rdkey;
+
+    /** Stores the emulated device identifier */
+    QemuUUID identifier;
+
+};
+
+#endif /* HW_MISC_ALLWINNER_SID_H */
diff --git a/include/hw/misc/allwinner-sramc.h b/include/hw/misc/allwinner-sramc.h
new file mode 100644 (file)
index 0000000..66b01b8
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * Allwinner SRAM controller emulation
+ *
+ * Copyright (C) 2023 qianfan Zhao <qianfanguijin@163.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MISC_ALLWINNER_SRAMC_H
+#define HW_MISC_ALLWINNER_SRAMC_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+#include "qemu/uuid.h"
+
+/**
+ * Object model
+ * @{
+ */
+#define TYPE_AW_SRAMC               "allwinner-sramc"
+#define TYPE_AW_SRAMC_SUN8I_R40     TYPE_AW_SRAMC "-sun8i-r40"
+OBJECT_DECLARE_TYPE(AwSRAMCState, AwSRAMCClass, AW_SRAMC)
+
+/** @} */
+
+/**
+ * Allwinner SRAMC object instance state
+ */
+struct AwSRAMCState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    /** Maps I/O registers in physical memory */
+    MemoryRegion iomem;
+
+    /* registers */
+    uint32_t sram_ctl1;
+    uint32_t sram_ver;
+    uint32_t sram_soft_entry_reg0;
+};
+
+/**
+ * Allwinner SRAM Controller class-level struct.
+ *
+ * This struct is filled by each sunxi device specific code
+ * such that the generic code can use this struct to support
+ * all devices.
+ */
+struct AwSRAMCClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+    /*< public >*/
+
+    uint32_t sram_version_code;
+};
+
+#endif /* HW_MISC_ALLWINNER_SRAMC_H */
diff --git a/include/hw/misc/arm11scu.h b/include/hw/misc/arm11scu.h
new file mode 100644 (file)
index 0000000..e5c0282
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * ARM11MPCore Snoop Control Unit (SCU) emulation
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Copyright (c) 2013 SUSE LINUX Products GmbH
+ * Written by Paul Brook and Andreas Färber
+ *
+ * This code is licensed under the GPL.
+ */
+
+#ifndef HW_MISC_ARM11SCU_H
+#define HW_MISC_ARM11SCU_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_ARM11_SCU "arm11-scu"
+OBJECT_DECLARE_SIMPLE_TYPE(ARM11SCUState, ARM11_SCU)
+
+struct ARM11SCUState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint32_t control;
+    uint32_t num_cpu;
+    MemoryRegion iomem;
+};
+
+#endif
diff --git a/include/hw/misc/arm_integrator_debug.h b/include/hw/misc/arm_integrator_debug.h
new file mode 100644 (file)
index 0000000..798b082
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * ARM Integrator Board Debug, switch and LED section
+ *
+ * Browse the data sheet:
+ *
+ *  https://developer.arm.com/documentation/dui0159/b/peripherals-and-interfaces/debug-leds-and-dip-switch-interface
+ *
+ * Copyright (c) 2013 Alex Bennée <alex@bennee.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef ARM_INTEGRATOR_DEBUG_H
+#define ARM_INTEGRATOR_DEBUG_H
+
+#define TYPE_INTEGRATOR_DEBUG "integrator_debug"
+
+#endif
diff --git a/include/hw/misc/armsse-cpu-pwrctrl.h b/include/hw/misc/armsse-cpu-pwrctrl.h
new file mode 100644 (file)
index 0000000..51d45ed
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * ARM SSE CPU PWRCTRL register block
+ *
+ * Copyright (c) 2021 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "CPU<N>_PWRCTRL block" which is part of the
+ * Arm Corstone SSE-300 Example Subsystem and documented in
+ * https://developer.arm.com/documentation/101773/0000
+ *
+ * QEMU interface:
+ *  + sysbus MMIO region 0: the register bank
+ */
+
+#ifndef HW_MISC_ARMSSE_CPU_PWRCTRL_H
+#define HW_MISC_ARMSSE_CPU_PWRCTRL_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_ARMSSE_CPU_PWRCTRL "armsse-cpu-pwrctrl"
+OBJECT_DECLARE_SIMPLE_TYPE(ARMSSECPUPwrCtrl, ARMSSE_CPU_PWRCTRL)
+
+struct ARMSSECPUPwrCtrl {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    uint32_t cpupwrcfg;
+};
+
+#endif
diff --git a/include/hw/misc/armsse-cpuid.h b/include/hw/misc/armsse-cpuid.h
new file mode 100644 (file)
index 0000000..9c09263
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * ARM SSE-200 CPU_IDENTITY register block
+ *
+ * Copyright (c) 2019 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "CPU_IDENTITY" register block which is part of the
+ * Arm SSE-200 and documented in
+ * https://developer.arm.com/documentation/101104/latest/
+ *
+ * QEMU interface:
+ *  + QOM property "CPUID": the value to use for the CPUID register
+ *  + sysbus MMIO region 0: the system information register bank
+ */
+
+#ifndef HW_MISC_ARMSSE_CPUID_H
+#define HW_MISC_ARMSSE_CPUID_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_ARMSSE_CPUID "armsse-cpuid"
+OBJECT_DECLARE_SIMPLE_TYPE(ARMSSECPUID, ARMSSE_CPUID)
+
+struct ARMSSECPUID {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    /* Properties */
+    uint32_t cpuid;
+};
+
+#endif
diff --git a/include/hw/misc/armsse-mhu.h b/include/hw/misc/armsse-mhu.h
new file mode 100644 (file)
index 0000000..41925de
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * ARM SSE-200 Message Handling Unit (MHU)
+ *
+ * Copyright (c) 2019 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the Message Handling Unit (MHU) which is part of the
+ * Arm SSE-200 and documented in
+ * https://developer.arm.com/documentation/101104/latest/
+ *
+ * QEMU interface:
+ *  + sysbus MMIO region 0: the system information register bank
+ *  + sysbus IRQ 0: interrupt for CPU 0
+ *  + sysbus IRQ 1: interrupt for CPU 1
+ */
+
+#ifndef HW_MISC_ARMSSE_MHU_H
+#define HW_MISC_ARMSSE_MHU_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_ARMSSE_MHU "armsse-mhu"
+OBJECT_DECLARE_SIMPLE_TYPE(ARMSSEMHU, ARMSSE_MHU)
+
+struct ARMSSEMHU {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    qemu_irq cpu0irq;
+    qemu_irq cpu1irq;
+
+    uint32_t cpu0intr;
+    uint32_t cpu1intr;
+};
+
+#endif
diff --git a/include/hw/misc/armv7m_ras.h b/include/hw/misc/armv7m_ras.h
new file mode 100644 (file)
index 0000000..ba6dacc
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Arm M-profile RAS (Reliability, Availability and Serviceability) block
+ *
+ * Copyright (c) 2021 Linaro Limited
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the RAS register block of an M-profile CPU
+ * (the registers starting at 0xE0005000 with ERRFRn).
+ *
+ * QEMU interface:
+ *  + sysbus MMIO region 0: the register bank
+ *
+ * The QEMU implementation currently provides "minimal RAS" only.
+ */
+
+#ifndef HW_MISC_ARMV7M_RAS_H
+#define HW_MISC_ARMV7M_RAS_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ARMV7M_RAS "armv7m-ras"
+OBJECT_DECLARE_SIMPLE_TYPE(ARMv7MRAS, ARMV7M_RAS)
+
+struct ARMv7MRAS {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+};
+
+#endif
diff --git a/include/hw/misc/aspeed_hace.h b/include/hw/misc/aspeed_hace.h
new file mode 100644 (file)
index 0000000..ecb1b67
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * ASPEED Hash and Crypto Engine
+ *
+ * Copyright (C) 2021 IBM Corp.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef ASPEED_HACE_H
+#define ASPEED_HACE_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ASPEED_HACE "aspeed.hace"
+#define TYPE_ASPEED_AST2400_HACE TYPE_ASPEED_HACE "-ast2400"
+#define TYPE_ASPEED_AST2500_HACE TYPE_ASPEED_HACE "-ast2500"
+#define TYPE_ASPEED_AST2600_HACE TYPE_ASPEED_HACE "-ast2600"
+#define TYPE_ASPEED_AST1030_HACE TYPE_ASPEED_HACE "-ast1030"
+
+OBJECT_DECLARE_TYPE(AspeedHACEState, AspeedHACEClass, ASPEED_HACE)
+
+#define ASPEED_HACE_NR_REGS (0x64 >> 2)
+#define ASPEED_HACE_MAX_SG  256 /* max number of entries */
+
+struct AspeedHACEState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    struct iovec iov_cache[ASPEED_HACE_MAX_SG];
+    uint32_t regs[ASPEED_HACE_NR_REGS];
+    uint32_t total_req_len;
+    uint32_t iov_count;
+
+    MemoryRegion *dram_mr;
+    AddressSpace dram_as;
+};
+
+
+struct AspeedHACEClass {
+    SysBusDeviceClass parent_class;
+
+    uint32_t src_mask;
+    uint32_t dest_mask;
+    uint32_t key_mask;
+    uint32_t hash_mask;
+};
+
+#endif /* ASPEED_HACE_H */
diff --git a/include/hw/misc/aspeed_i3c.h b/include/hw/misc/aspeed_i3c.h
new file mode 100644 (file)
index 0000000..39679df
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * ASPEED I3C Controller
+ *
+ * Copyright (C) 2021 ASPEED Technology Inc.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef ASPEED_I3C_H
+#define ASPEED_I3C_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ASPEED_I3C "aspeed.i3c"
+#define TYPE_ASPEED_I3C_DEVICE "aspeed.i3c.device"
+OBJECT_DECLARE_TYPE(AspeedI3CState, AspeedI3CClass, ASPEED_I3C)
+
+#define ASPEED_I3C_NR_REGS (0x70 >> 2)
+#define ASPEED_I3C_DEVICE_NR_REGS (0x300 >> 2)
+#define ASPEED_I3C_NR_DEVICES 6
+
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedI3CDevice, ASPEED_I3C_DEVICE)
+typedef struct AspeedI3CDevice {
+    /* <private> */
+    SysBusDevice parent;
+
+    /* <public> */
+    MemoryRegion mr;
+    qemu_irq irq;
+
+    uint8_t id;
+    uint32_t regs[ASPEED_I3C_DEVICE_NR_REGS];
+} AspeedI3CDevice;
+
+typedef struct AspeedI3CState {
+    /* <private> */
+    SysBusDevice parent;
+
+    /* <public> */
+    MemoryRegion iomem;
+    MemoryRegion iomem_container;
+    qemu_irq irq;
+
+    uint32_t regs[ASPEED_I3C_NR_REGS];
+    AspeedI3CDevice devices[ASPEED_I3C_NR_DEVICES];
+} AspeedI3CState;
+#endif /* ASPEED_I3C_H */
diff --git a/include/hw/misc/aspeed_lpc.h b/include/hw/misc/aspeed_lpc.h
new file mode 100644 (file)
index 0000000..fa39895
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ *  ASPEED LPC Controller
+ *
+ *  Copyright (C) 2017-2018 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef ASPEED_LPC_H
+#define ASPEED_LPC_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ASPEED_LPC "aspeed.lpc"
+#define ASPEED_LPC(obj) OBJECT_CHECK(AspeedLPCState, (obj), TYPE_ASPEED_LPC)
+
+#define ASPEED_LPC_NR_REGS      (0x260 >> 2)
+
+enum aspeed_lpc_subdevice {
+    aspeed_lpc_kcs_1 = 0,
+    aspeed_lpc_kcs_2,
+    aspeed_lpc_kcs_3,
+    aspeed_lpc_kcs_4,
+    aspeed_lpc_ibt,
+};
+
+#define ASPEED_LPC_NR_SUBDEVS   5
+
+typedef struct AspeedLPCState {
+    /* <private> */
+    SysBusDevice parent;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    qemu_irq subdevice_irqs[ASPEED_LPC_NR_SUBDEVS];
+    uint32_t subdevice_irqs_pending;
+
+    uint32_t regs[ASPEED_LPC_NR_REGS];
+    uint32_t hicr7;
+} AspeedLPCState;
+
+#endif /* ASPEED_LPC_H */
diff --git a/include/hw/misc/aspeed_peci.h b/include/hw/misc/aspeed_peci.h
new file mode 100644 (file)
index 0000000..8382707
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Aspeed PECI Controller
+ *
+ * Copyright (c) Meta Platforms, Inc. and affiliates. (http://www.meta.com)
+ *
+ * This code is licensed under the GPL version 2 or later. See the COPYING
+ * file in the top-level directory.
+ */
+
+#ifndef ASPEED_PECI_H
+#define ASPEED_PECI_H
+
+#include "hw/sysbus.h"
+
+#define ASPEED_PECI_NR_REGS ((0xFC + 4) >> 2)
+#define TYPE_ASPEED_PECI "aspeed.peci"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedPECIState, ASPEED_PECI);
+
+struct AspeedPECIState {
+    /* <private> */
+    SysBusDevice parent;
+
+    MemoryRegion mmio;
+    qemu_irq irq;
+
+    uint32_t regs[ASPEED_PECI_NR_REGS];
+};
+
+#endif
diff --git a/include/hw/misc/aspeed_sbc.h b/include/hw/misc/aspeed_sbc.h
new file mode 100644 (file)
index 0000000..405e678
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * ASPEED Secure Boot Controller
+ *
+ * Copyright (C) 2021-2022 IBM Corp.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef ASPEED_SBC_H
+#define ASPEED_SBC_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ASPEED_SBC "aspeed.sbc"
+#define TYPE_ASPEED_AST2600_SBC TYPE_ASPEED_SBC "-ast2600"
+OBJECT_DECLARE_TYPE(AspeedSBCState, AspeedSBCClass, ASPEED_SBC)
+
+#define ASPEED_SBC_NR_REGS (0x93c >> 2)
+
+#define QSR_AES                     BIT(27)
+#define QSR_RSA1024                 (0x0 << 12)
+#define QSR_RSA2048                 (0x1 << 12)
+#define QSR_RSA3072                 (0x2 << 12)
+#define QSR_RSA4096                 (0x3 << 12)
+#define QSR_SHA224                  (0x0 << 10)
+#define QSR_SHA256                  (0x1 << 10)
+#define QSR_SHA384                  (0x2 << 10)
+#define QSR_SHA512                  (0x3 << 10)
+
+struct AspeedSBCState {
+    SysBusDevice parent;
+
+    bool emmc_abr;
+    uint32_t signing_settings;
+
+    MemoryRegion iomem;
+
+    uint32_t regs[ASPEED_SBC_NR_REGS];
+};
+
+struct AspeedSBCClass {
+    SysBusDeviceClass parent_class;
+};
+
+#endif /* ASPEED_SBC_H */
diff --git a/include/hw/misc/aspeed_scu.h b/include/hw/misc/aspeed_scu.h
new file mode 100644 (file)
index 0000000..7cb6018
--- /dev/null
@@ -0,0 +1,364 @@
+/*
+ * ASPEED System Control Unit
+ *
+ * Andrew Jeffery <andrew@aj.id.au>
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+#ifndef ASPEED_SCU_H
+#define ASPEED_SCU_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_ASPEED_SCU "aspeed.scu"
+OBJECT_DECLARE_TYPE(AspeedSCUState, AspeedSCUClass, ASPEED_SCU)
+#define TYPE_ASPEED_2400_SCU TYPE_ASPEED_SCU "-ast2400"
+#define TYPE_ASPEED_2500_SCU TYPE_ASPEED_SCU "-ast2500"
+#define TYPE_ASPEED_2600_SCU TYPE_ASPEED_SCU "-ast2600"
+#define TYPE_ASPEED_1030_SCU TYPE_ASPEED_SCU "-ast1030"
+
+#define ASPEED_SCU_NR_REGS (0x1A8 >> 2)
+#define ASPEED_AST2600_SCU_NR_REGS (0xE20 >> 2)
+
+struct AspeedSCUState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    uint32_t regs[ASPEED_AST2600_SCU_NR_REGS];
+    uint32_t silicon_rev;
+    uint32_t hw_strap1;
+    uint32_t hw_strap2;
+    uint32_t hw_prot_key;
+};
+
+#define AST2400_A0_SILICON_REV   0x02000303U
+#define AST2400_A1_SILICON_REV   0x02010303U
+#define AST2500_A0_SILICON_REV   0x04000303U
+#define AST2500_A1_SILICON_REV   0x04010303U
+#define AST2600_A0_SILICON_REV   0x05000303U
+#define AST2600_A1_SILICON_REV   0x05010303U
+#define AST2600_A2_SILICON_REV   0x05020303U
+#define AST2600_A3_SILICON_REV   0x05030303U
+#define AST1030_A0_SILICON_REV   0x80000000U
+#define AST1030_A1_SILICON_REV   0x80010000U
+
+#define ASPEED_IS_AST2500(si_rev)     ((((si_rev) >> 24) & 0xff) == 0x04)
+
+bool is_supported_silicon_rev(uint32_t silicon_rev);
+
+
+struct AspeedSCUClass {
+    SysBusDeviceClass parent_class;
+
+    const uint32_t *resets;
+    uint32_t (*calc_hpll)(AspeedSCUState *s, uint32_t hpll_reg);
+    uint32_t (*get_apb)(AspeedSCUState *s);
+    uint32_t apb_divider;
+    uint32_t nr_regs;
+    bool clkin_25Mhz;
+    const MemoryRegionOps *ops;
+};
+
+#define ASPEED_SCU_PROT_KEY      0x1688A8A8
+
+uint32_t aspeed_scu_get_apb_freq(AspeedSCUState *s);
+
+/*
+ * Extracted from Aspeed SDK v00.03.21. Fixes and extra definitions
+ * were added.
+ *
+ * Original header file :
+ *    arch/arm/mach-aspeed/include/mach/regs-scu.h
+ *
+ *    Copyright (C) 2012-2020  ASPEED Technology Inc.
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License version 2 as
+ *    published by the Free Software Foundation.
+ *
+ *      History      :
+ *       1. 2012/12/29 Ryan Chen Create
+ */
+
+/* SCU08   Clock Selection Register
+ *
+ *  31     Enable Video Engine clock dynamic slow down
+ *  30:28  Video Engine clock slow down setting
+ *  27     2D Engine GCLK clock source selection
+ *  26     2D Engine GCLK clock throttling enable
+ *  25:23  APB PCLK divider selection
+ *  22:20  LPC Host LHCLK divider selection
+ *  19     LPC Host LHCLK clock generation/output enable control
+ *  18:16  MAC AHB bus clock divider selection
+ *  15     SD/SDIO clock running enable
+ *  14:12  SD/SDIO divider selection
+ *  11     Reserved
+ *  10:8   Video port output clock delay control bit
+ *  7      ARM CPU/AHB clock slow down enable
+ *  6:4    ARM CPU/AHB clock slow down setting
+ *  3:2    ECLK clock source selection
+ *  1      CPU/AHB clock slow down idle timer
+ *  0      CPU/AHB clock dynamic slow down enable (defined in bit[6:4])
+ */
+#define SCU_CLK_GET_PCLK_DIV(x)                    (((x) >> 23) & 0x7)
+
+/* SCU24   H-PLL Parameter Register (for Aspeed AST2400 SOC)
+ *
+ *  18     H-PLL parameter selection
+ *           0: Select H-PLL by strapping resistors
+ *           1: Select H-PLL by the programmed registers (SCU24[17:0])
+ *  17     Enable H-PLL bypass mode
+ *  16     Turn off H-PLL
+ *  10:5   H-PLL Numerator
+ *  4      H-PLL Output Divider
+ *  3:0    H-PLL Denumerator
+ *
+ *  (Output frequency) = 24MHz * (2-OD) * [(Numerator+2) / (Denumerator+1)]
+ */
+
+#define SCU_AST2400_H_PLL_PROGRAMMED               (0x1 << 18)
+#define SCU_AST2400_H_PLL_BYPASS_EN                (0x1 << 17)
+#define SCU_AST2400_H_PLL_OFF                      (0x1 << 16)
+
+/* SCU24   H-PLL Parameter Register (for Aspeed AST2500 SOC)
+ *
+ *  21     Enable H-PLL reset
+ *  20     Enable H-PLL bypass mode
+ *  19     Turn off H-PLL
+ *  18:13  H-PLL Post Divider
+ *  12:5   H-PLL Numerator (M)
+ *  4:0    H-PLL Denumerator (N)
+ *
+ *  (Output frequency) = CLKIN(24MHz) * [(M+1) / (N+1)] / (P+1)
+ *
+ * The default frequency is 792Mhz when CLKIN = 24MHz
+ */
+
+#define SCU_H_PLL_BYPASS_EN                        (0x1 << 20)
+#define SCU_H_PLL_OFF                              (0x1 << 19)
+
+/* SCU70  Hardware Strapping Register definition (for Aspeed AST2400 SOC)
+ *
+ * 31:29  Software defined strapping registers
+ * 28:27  DRAM size setting (for VGA driver use)
+ * 26:24  DRAM configuration setting
+ * 23     Enable 25 MHz reference clock input
+ * 22     Enable GPIOE pass-through mode
+ * 21     Enable GPIOD pass-through mode
+ * 20     Disable LPC to decode SuperIO 0x2E/0x4E address
+ * 19     Disable ACPI function
+ * 23,18  Clock source selection
+ * 17     Enable BMC 2nd boot watchdog timer
+ * 16     SuperIO configuration address selection
+ * 15     VGA Class Code selection
+ * 14     Enable LPC dedicated reset pin function
+ * 13:12  SPI mode selection
+ * 11:10  CPU/AHB clock frequency ratio selection
+ * 9:8    H-PLL default clock frequency selection
+ * 7      Define MAC#2 interface
+ * 6      Define MAC#1 interface
+ * 5      Enable VGA BIOS ROM
+ * 4      Boot flash memory extended option
+ * 3:2    VGA memory size selection
+ * 1:0    BMC CPU boot code selection
+ */
+#define SCU_AST2400_HW_STRAP_SW_DEFINE(x)          ((x) << 29)
+#define SCU_AST2400_HW_STRAP_SW_DEFINE_MASK        (0x7 << 29)
+
+#define SCU_AST2400_HW_STRAP_DRAM_SIZE(x)          ((x) << 27)
+#define SCU_AST2400_HW_STRAP_DRAM_SIZE_MASK        (0x3 << 27)
+#define     DRAM_SIZE_64MB                             0
+#define     DRAM_SIZE_128MB                            1
+#define     DRAM_SIZE_256MB                            2
+#define     DRAM_SIZE_512MB                            3
+
+#define SCU_AST2400_HW_STRAP_DRAM_CONFIG(x)        ((x) << 24)
+#define SCU_AST2400_HW_STRAP_DRAM_CONFIG_MASK      (0x7 << 24)
+
+#define SCU_HW_STRAP_GPIOE_PT_EN                   (0x1 << 22)
+#define SCU_HW_STRAP_GPIOD_PT_EN                   (0x1 << 21)
+#define SCU_HW_STRAP_LPC_DEC_SUPER_IO              (0x1 << 20)
+#define SCU_AST2400_HW_STRAP_ACPI_DIS              (0x1 << 19)
+
+/* bit 23, 18 [1,0] */
+#define SCU_AST2400_HW_STRAP_SET_CLK_SOURCE(x)     (((((x) & 0x3) >> 1) << 23) \
+                                                    | (((x) & 0x1) << 18))
+#define SCU_AST2400_HW_STRAP_GET_CLK_SOURCE(x)     (((((x) >> 23) & 0x1) << 1) \
+                                                    | (((x) >> 18) & 0x1))
+#define SCU_AST2400_HW_STRAP_CLK_SOURCE_MASK       ((0x1 << 23) | (0x1 << 18))
+#define SCU_HW_STRAP_CLK_25M_IN                    (0x1 << 23)
+#define     AST2400_CLK_24M_IN                         0
+#define     AST2400_CLK_48M_IN                         1
+#define     AST2400_CLK_25M_IN_24M_USB_CKI             2
+#define     AST2400_CLK_25M_IN_48M_USB_CKI             3
+
+#define SCU_HW_STRAP_CLK_48M_IN                    (0x1 << 18)
+#define SCU_HW_STRAP_2ND_BOOT_WDT                  (0x1 << 17)
+#define SCU_HW_STRAP_SUPER_IO_CONFIG               (0x1 << 16)
+#define SCU_HW_STRAP_VGA_CLASS_CODE                (0x1 << 15)
+#define SCU_HW_STRAP_LPC_RESET_PIN                 (0x1 << 14)
+
+#define SCU_HW_STRAP_SPI_MODE(x)                   ((x) << 12)
+#define SCU_HW_STRAP_SPI_MODE_MASK                 (0x3 << 12)
+#define     SCU_HW_STRAP_SPI_DIS                       0
+#define     SCU_HW_STRAP_SPI_MASTER                    1
+#define     SCU_HW_STRAP_SPI_M_S_EN                    2
+#define     SCU_HW_STRAP_SPI_PASS_THROUGH              3
+
+#define SCU_AST2400_HW_STRAP_SET_CPU_AHB_RATIO(x)  ((x) << 10)
+#define SCU_AST2400_HW_STRAP_GET_CPU_AHB_RATIO(x)  (((x) >> 10) & 3)
+#define SCU_AST2400_HW_STRAP_CPU_AHB_RATIO_MASK    (0x3 << 10)
+#define     AST2400_CPU_AHB_RATIO_1_1                  0
+#define     AST2400_CPU_AHB_RATIO_2_1                  1
+#define     AST2400_CPU_AHB_RATIO_4_1                  2
+#define     AST2400_CPU_AHB_RATIO_3_1                  3
+
+#define SCU_AST2400_HW_STRAP_GET_H_PLL_CLK(x)      (((x) >> 8) & 0x3)
+#define SCU_AST2400_HW_STRAP_H_PLL_CLK_MASK        (0x3 << 8)
+#define     AST2400_CPU_384MHZ                         0
+#define     AST2400_CPU_360MHZ                         1
+#define     AST2400_CPU_336MHZ                         2
+#define     AST2400_CPU_408MHZ                         3
+
+#define SCU_HW_STRAP_MAC1_RGMII                    (0x1 << 7)
+#define SCU_HW_STRAP_MAC0_RGMII                    (0x1 << 6)
+#define SCU_HW_STRAP_VGA_BIOS_ROM                  (0x1 << 5)
+#define SCU_HW_STRAP_SPI_WIDTH                     (0x1 << 4)
+
+#define SCU_HW_STRAP_VGA_SIZE_GET(x)               (((x) >> 2) & 0x3)
+#define SCU_HW_STRAP_VGA_MASK                      (0x3 << 2)
+#define SCU_HW_STRAP_VGA_SIZE_SET(x)               ((x) << 2)
+#define     VGA_8M_DRAM                                0
+#define     VGA_16M_DRAM                               1
+#define     VGA_32M_DRAM                               2
+#define     VGA_64M_DRAM                               3
+
+#define SCU_AST2400_HW_STRAP_BOOT_MODE(x)          (x)
+#define     AST2400_NOR_BOOT                           0
+#define     AST2400_NAND_BOOT                          1
+#define     AST2400_SPI_BOOT                           2
+#define     AST2400_DIS_BOOT                           3
+
+/*
+ * SCU70  Hardware strapping register definition (for Aspeed AST2500
+ *        SoC and higher)
+ *
+ * 31     Enable SPI Flash Strap Auto Fetch Mode
+ * 30     Enable GPIO Strap Mode
+ * 29     Select UART Debug Port
+ * 28     Reserved (1)
+ * 27     Enable fast reset mode for ARM ICE debugger
+ * 26     Enable eSPI flash mode
+ * 25     Enable eSPI mode
+ * 24     Select DDR4 SDRAM
+ * 23     Select 25 MHz reference clock input mode
+ * 22     Enable GPIOE pass-through mode
+ * 21     Enable GPIOD pass-through mode
+ * 20     Disable LPC to decode SuperIO 0x2E/0x4E address
+ * 19     Enable ACPI function
+ * 18     Select USBCKI input frequency
+ * 17     Enable BMC 2nd boot watchdog timer
+ * 16     SuperIO configuration address selection
+ * 15     VGA Class Code selection
+ * 14     Select dedicated LPC reset input
+ * 13:12  SPI mode selection
+ * 11:9   AXI/AHB clock frequency ratio selection
+ * 8      Reserved (0)
+ * 7      Define MAC#2 interface
+ * 6      Define MAC#1 interface
+ * 5      Enable dedicated VGA BIOS ROM
+ * 4      Reserved (0)
+ * 3:2    VGA memory size selection
+ * 1      Reserved (1)
+ * 0      Disable CPU boot
+ */
+#define SCU_AST2500_HW_STRAP_SPI_AUTOFETCH_ENABLE  (0x1 << 31)
+#define SCU_AST2500_HW_STRAP_GPIO_STRAP_ENABLE     (0x1 << 30)
+#define SCU_AST2500_HW_STRAP_UART_DEBUG            (0x1 << 29)
+#define     UART_DEBUG_UART1                           0
+#define     UART_DEBUG_UART5                           1
+#define SCU_AST2500_HW_STRAP_RESERVED28            (0x1 << 28)
+
+#define SCU_AST2500_HW_STRAP_FAST_RESET_DBG        (0x1 << 27)
+#define SCU_AST2500_HW_STRAP_ESPI_FLASH_ENABLE     (0x1 << 26)
+#define SCU_AST2500_HW_STRAP_ESPI_ENABLE           (0x1 << 25)
+#define SCU_AST2500_HW_STRAP_DDR4_ENABLE           (0x1 << 24)
+#define SCU_AST2500_HW_STRAP_25HZ_CLOCK_MODE       (0x1 << 23)
+
+#define SCU_AST2500_HW_STRAP_ACPI_ENABLE           (0x1 << 19)
+#define SCU_AST2500_HW_STRAP_USBCKI_FREQ           (0x1 << 18)
+#define     USBCKI_FREQ_24MHZ                          0
+#define     USBCKI_FREQ_28MHZ                          1
+
+#define SCU_AST2500_HW_STRAP_SET_AXI_AHB_RATIO(x)  ((x) << 9)
+#define SCU_AST2500_HW_STRAP_GET_AXI_AHB_RATIO(x)  (((x) >> 9) & 7)
+#define SCU_AST2500_HW_STRAP_CPU_AXI_RATIO_MASK    (0x7 << 9)
+#define     AXI_AHB_RATIO_UNDEFINED                    0
+#define     AXI_AHB_RATIO_2_1                          1
+#define     AXI_AHB_RATIO_3_1                          2
+#define     AXI_AHB_RATIO_4_1                          3
+#define     AXI_AHB_RATIO_5_1                          4
+#define     AXI_AHB_RATIO_6_1                          5
+#define     AXI_AHB_RATIO_7_1                          6
+#define     AXI_AHB_RATIO_8_1                          7
+
+#define SCU_AST2500_HW_STRAP_RESERVED1             (0x1 << 1)
+#define SCU_AST2500_HW_STRAP_DIS_BOOT              (0x1 << 0)
+
+#define AST2500_HW_STRAP1_DEFAULTS (                                    \
+        SCU_AST2500_HW_STRAP_RESERVED28 |                               \
+        SCU_HW_STRAP_2ND_BOOT_WDT |                                     \
+        SCU_HW_STRAP_VGA_CLASS_CODE |                                   \
+        SCU_HW_STRAP_LPC_RESET_PIN |                                    \
+        SCU_AST2500_HW_STRAP_SET_AXI_AHB_RATIO(AXI_AHB_RATIO_2_1) |     \
+        SCU_HW_STRAP_VGA_SIZE_SET(VGA_16M_DRAM) |                       \
+        SCU_AST2500_HW_STRAP_RESERVED1)
+
+/*
+ * SCU200   H-PLL Parameter Register (for Aspeed AST2600 SOC)
+ *
+ *  28:26  H-PLL Parameters
+ *  25     Enable H-PLL reset
+ *  24     Enable H-PLL bypass mode
+ *  23     Turn off H-PLL
+ *  22:19  H-PLL Post Divider (P)
+ *  18:13  H-PLL Numerator (M)
+ *  12:0   H-PLL Denumerator (N)
+ *
+ *  (Output frequency) = CLKIN(25MHz) * [(M+1) / (N+1)] / (P+1)
+ *
+ * The default frequency is 1200Mhz when CLKIN = 25MHz
+ */
+#define SCU_AST2600_H_PLL_BYPASS_EN                        (0x1 << 24)
+#define SCU_AST2600_H_PLL_OFF                              (0x1 << 23)
+
+/*
+ * SCU310   Clock Selection Register Set 4 (for Aspeed AST1030 SOC)
+ *
+ *  31     I3C Clock Source selection
+ *  30:28  I3C clock divider selection
+ *  26:24  MAC AHB clock divider selection
+ *  22:20  RGMII 125MHz clock divider ration
+ *  19:16  RGMII 50MHz clock divider ration
+ *  15     LHCLK clock generation/output enable control
+ *  14:12  LHCLK divider selection
+ *  11:8   APB Bus PCLK divider selection
+ *  7      Select PECI clock source
+ *  6      Select UART debug port clock source
+ *  5      Select UART6 clock source
+ *  4      Select UART5 clock source
+ *  3      Select UART4 clock source
+ *  2      Select UART3 clock source
+ *  1      Select UART2 clock source
+ *  0      Select UART1 clock source
+ */
+#define SCU_AST1030_CLK_GET_PCLK_DIV(x)                    (((x) >> 8) & 0xf)
+
+#endif /* ASPEED_SCU_H */
diff --git a/include/hw/misc/aspeed_sdmc.h b/include/hw/misc/aspeed_sdmc.h
new file mode 100644 (file)
index 0000000..ec2d59a
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * ASPEED SDRAM Memory Controller
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#ifndef ASPEED_SDMC_H
+#define ASPEED_SDMC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_ASPEED_SDMC "aspeed.sdmc"
+OBJECT_DECLARE_TYPE(AspeedSDMCState, AspeedSDMCClass, ASPEED_SDMC)
+#define TYPE_ASPEED_2400_SDMC TYPE_ASPEED_SDMC "-ast2400"
+#define TYPE_ASPEED_2500_SDMC TYPE_ASPEED_SDMC "-ast2500"
+#define TYPE_ASPEED_2600_SDMC TYPE_ASPEED_SDMC "-ast2600"
+
+/*
+ * SDMC has 174 documented registers. In addition the u-boot device tree
+ * describes the following regions:
+ *  - PHY status regs at offset 0x400, length 0x200
+ *  - PHY setting regs at offset 0x100, length 0x300
+ *
+ * There are two sets of MRS (Mode Registers) configuration in ast2600 memory
+ * system: one is in the SDRAM MC (memory controller) which is used in run
+ * time, and the other is in the DDR-PHY IP which is used during DDR-PHY
+ * training.
+ */
+#define ASPEED_SDMC_NR_REGS (0x500 >> 2)
+
+struct AspeedSDMCState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    uint32_t regs[ASPEED_SDMC_NR_REGS];
+    uint64_t ram_size;
+    uint64_t max_ram_size;
+};
+
+
+struct AspeedSDMCClass {
+    SysBusDeviceClass parent_class;
+
+    uint64_t max_ram_size;
+    const uint64_t *valid_ram_sizes;
+    uint32_t (*compute_conf)(AspeedSDMCState *s, uint32_t data);
+    void (*write)(AspeedSDMCState *s, uint32_t reg, uint32_t data);
+};
+
+#endif /* ASPEED_SDMC_H */
diff --git a/include/hw/misc/aspeed_xdma.h b/include/hw/misc/aspeed_xdma.h
new file mode 100644 (file)
index 0000000..b1478fd
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * ASPEED XDMA Controller
+ * Eddie James <eajames@linux.ibm.com>
+ *
+ * Copyright (C) 2019 IBM Corp.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef ASPEED_XDMA_H
+#define ASPEED_XDMA_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_ASPEED_XDMA "aspeed.xdma"
+#define TYPE_ASPEED_2400_XDMA TYPE_ASPEED_XDMA "-ast2400"
+#define TYPE_ASPEED_2500_XDMA TYPE_ASPEED_XDMA "-ast2500"
+#define TYPE_ASPEED_2600_XDMA TYPE_ASPEED_XDMA "-ast2600"
+OBJECT_DECLARE_TYPE(AspeedXDMAState, AspeedXDMAClass, ASPEED_XDMA)
+
+#define ASPEED_XDMA_NUM_REGS (ASPEED_XDMA_REG_SIZE / sizeof(uint32_t))
+#define ASPEED_XDMA_REG_SIZE 0x7C
+
+struct AspeedXDMAState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    char bmc_cmdq_readp_set;
+    uint32_t regs[ASPEED_XDMA_NUM_REGS];
+};
+
+struct AspeedXDMAClass {
+    SysBusDeviceClass parent_class;
+
+    uint8_t cmdq_endp;
+    uint8_t cmdq_wrp;
+    uint8_t cmdq_rdp;
+    uint8_t intr_ctrl;
+    uint32_t intr_ctrl_mask;
+    uint8_t intr_status;
+    uint32_t intr_complete;
+};
+
+#endif /* ASPEED_XDMA_H */
diff --git a/include/hw/misc/auxbus.h b/include/hw/misc/auxbus.h
new file mode 100644 (file)
index 0000000..03cacde
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * auxbus.h
+ *
+ *  Copyright (C)2014 : GreenSocs Ltd
+ *      http://www.greensocs.com/ , email: info@greensocs.com
+ *
+ *  Developed by :
+ *  Frederic Konrad   <fred.konrad@greensocs.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option)any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef HW_MISC_AUXBUS_H
+#define HW_MISC_AUXBUS_H
+
+#include "exec/memory.h"
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+typedef struct AUXSlave AUXSlave;
+typedef enum AUXCommand AUXCommand;
+typedef enum AUXReply AUXReply;
+
+#define TYPE_AUXTOI2C "aux-to-i2c-bridge"
+OBJECT_DECLARE_SIMPLE_TYPE(AUXTOI2CState, AUXTOI2C)
+
+enum AUXCommand {
+    WRITE_I2C = 0,
+    READ_I2C = 1,
+    WRITE_I2C_STATUS = 2,
+    WRITE_I2C_MOT = 4,
+    READ_I2C_MOT = 5,
+    WRITE_AUX = 8,
+    READ_AUX = 9
+};
+
+enum AUXReply {
+    AUX_I2C_ACK = 0,
+    AUX_NACK = 1,
+    AUX_DEFER = 2,
+    AUX_I2C_NACK = 4,
+    AUX_I2C_DEFER = 8
+};
+
+#define TYPE_AUX_BUS "aux-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(AUXBus, AUX_BUS)
+
+struct AUXBus {
+    /* < private > */
+    BusState qbus;
+
+    /* < public > */
+    AUXSlave *current_dev;
+    AUXSlave *dev;
+    uint32_t last_i2c_address;
+    AUXCommand last_transaction;
+
+    AUXTOI2CState *bridge;
+
+    MemoryRegion *aux_io;
+    AddressSpace aux_addr_space;
+};
+
+#define TYPE_AUX_SLAVE "aux-slave"
+OBJECT_DECLARE_SIMPLE_TYPE(AUXSlave, AUX_SLAVE)
+
+struct AUXSlave {
+    /* < private > */
+    DeviceState parent_obj;
+
+    /* < public > */
+    MemoryRegion *mmio;
+};
+
+/**
+ * aux_bus_init: Initialize an AUX bus.
+ *
+ * Returns the new AUX bus created.
+ *
+ * @parent The device where this bus is located.
+ * @name The name of the bus.
+ */
+AUXBus *aux_bus_init(DeviceState *parent, const char *name);
+
+/**
+ * aux_bus_realize: Realize an AUX bus.
+ *
+ * @bus: The AUX bus.
+ */
+void aux_bus_realize(AUXBus *bus);
+
+/*
+ * aux_request: Make a request on the bus.
+ *
+ * Returns the reply of the request.
+ *
+ * @bus The bus where the request happen.
+ * @cmd The command requested.
+ * @address The 20bits address of the slave.
+ * @len The length of the read or write.
+ * @data The data array which will be filled or read during transfer.
+ */
+AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address,
+                              uint8_t len, uint8_t *data);
+
+/*
+ * aux_get_i2c_bus: Get the i2c bus for I2C over AUX command.
+ *
+ * Returns the i2c bus associated to this AUX bus.
+ *
+ * @bus The AUX bus.
+ */
+I2CBus *aux_get_i2c_bus(AUXBus *bus);
+
+/*
+ * aux_init_mmio: Init an mmio for an AUX slave.
+ *
+ * @aux_slave The AUX slave.
+ * @mmio The mmio to be registered.
+ */
+void aux_init_mmio(AUXSlave *aux_slave, MemoryRegion *mmio);
+
+/* aux_map_slave: Map the mmio for an AUX slave on the bus.
+ *
+ * @dev The AUX slave.
+ * @addr The address for the slave's mmio.
+ */
+void aux_map_slave(AUXSlave *dev, hwaddr addr);
+
+#endif /* HW_MISC_AUXBUS_H */
diff --git a/include/hw/misc/avr_power.h b/include/hw/misc/avr_power.h
new file mode 100644 (file)
index 0000000..388e421
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * AVR Power Reduction Management
+ *
+ * Copyright (c) 2019-2020 Michael Rolnik
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_MISC_AVR_POWER_H
+#define HW_MISC_AVR_POWER_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+
+#define TYPE_AVR_MASK "avr-power"
+OBJECT_DECLARE_SIMPLE_TYPE(AVRMaskState, AVR_MASK)
+
+struct AVRMaskState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+
+    uint8_t val;
+    qemu_irq irq[8];
+};
+
+#endif /* HW_MISC_AVR_POWER_H */
diff --git a/include/hw/misc/bcm2835_cprman.h b/include/hw/misc/bcm2835_cprman.h
new file mode 100644 (file)
index 0000000..0d38036
--- /dev/null
@@ -0,0 +1,210 @@
+/*
+ * BCM2835 CPRMAN clock manager
+ *
+ * Copyright (c) 2020 Luc Michel <luc@lmichel.fr>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_MISC_BCM2835_CPRMAN_H
+#define HW_MISC_BCM2835_CPRMAN_H
+
+#include "hw/sysbus.h"
+#include "hw/qdev-clock.h"
+
+#define TYPE_BCM2835_CPRMAN "bcm2835-cprman"
+
+typedef struct BCM2835CprmanState BCM2835CprmanState;
+
+DECLARE_INSTANCE_CHECKER(BCM2835CprmanState, CPRMAN,
+                         TYPE_BCM2835_CPRMAN)
+
+#define CPRMAN_NUM_REGS (0x2000 / sizeof(uint32_t))
+
+typedef enum CprmanPll {
+    CPRMAN_PLLA = 0,
+    CPRMAN_PLLC,
+    CPRMAN_PLLD,
+    CPRMAN_PLLH,
+    CPRMAN_PLLB,
+
+    CPRMAN_NUM_PLL
+} CprmanPll;
+
+typedef enum CprmanPllChannel {
+    CPRMAN_PLLA_CHANNEL_DSI0 = 0,
+    CPRMAN_PLLA_CHANNEL_CORE,
+    CPRMAN_PLLA_CHANNEL_PER,
+    CPRMAN_PLLA_CHANNEL_CCP2,
+
+    CPRMAN_PLLC_CHANNEL_CORE2,
+    CPRMAN_PLLC_CHANNEL_CORE1,
+    CPRMAN_PLLC_CHANNEL_PER,
+    CPRMAN_PLLC_CHANNEL_CORE0,
+
+    CPRMAN_PLLD_CHANNEL_DSI0,
+    CPRMAN_PLLD_CHANNEL_CORE,
+    CPRMAN_PLLD_CHANNEL_PER,
+    CPRMAN_PLLD_CHANNEL_DSI1,
+
+    CPRMAN_PLLH_CHANNEL_AUX,
+    CPRMAN_PLLH_CHANNEL_RCAL,
+    CPRMAN_PLLH_CHANNEL_PIX,
+
+    CPRMAN_PLLB_CHANNEL_ARM,
+
+    CPRMAN_NUM_PLL_CHANNEL,
+
+    /* Special values used when connecting clock sources to clocks */
+    CPRMAN_CLOCK_SRC_NORMAL = -1,
+    CPRMAN_CLOCK_SRC_FORCE_GROUND = -2,
+    CPRMAN_CLOCK_SRC_DSI0HSCK = -3,
+} CprmanPllChannel;
+
+typedef enum CprmanClockMux {
+    CPRMAN_CLOCK_GNRIC,
+    CPRMAN_CLOCK_VPU,
+    CPRMAN_CLOCK_SYS,
+    CPRMAN_CLOCK_PERIA,
+    CPRMAN_CLOCK_PERII,
+    CPRMAN_CLOCK_H264,
+    CPRMAN_CLOCK_ISP,
+    CPRMAN_CLOCK_V3D,
+    CPRMAN_CLOCK_CAM0,
+    CPRMAN_CLOCK_CAM1,
+    CPRMAN_CLOCK_CCP2,
+    CPRMAN_CLOCK_DSI0E,
+    CPRMAN_CLOCK_DSI0P,
+    CPRMAN_CLOCK_DPI,
+    CPRMAN_CLOCK_GP0,
+    CPRMAN_CLOCK_GP1,
+    CPRMAN_CLOCK_GP2,
+    CPRMAN_CLOCK_HSM,
+    CPRMAN_CLOCK_OTP,
+    CPRMAN_CLOCK_PCM,
+    CPRMAN_CLOCK_PWM,
+    CPRMAN_CLOCK_SLIM,
+    CPRMAN_CLOCK_SMI,
+    CPRMAN_CLOCK_TEC,
+    CPRMAN_CLOCK_TD0,
+    CPRMAN_CLOCK_TD1,
+    CPRMAN_CLOCK_TSENS,
+    CPRMAN_CLOCK_TIMER,
+    CPRMAN_CLOCK_UART,
+    CPRMAN_CLOCK_VEC,
+    CPRMAN_CLOCK_PULSE,
+    CPRMAN_CLOCK_SDC,
+    CPRMAN_CLOCK_ARM,
+    CPRMAN_CLOCK_AVEO,
+    CPRMAN_CLOCK_EMMC,
+    CPRMAN_CLOCK_EMMC2,
+
+    CPRMAN_NUM_CLOCK_MUX
+} CprmanClockMux;
+
+typedef enum CprmanClockMuxSource {
+    CPRMAN_CLOCK_SRC_GND = 0,
+    CPRMAN_CLOCK_SRC_XOSC,
+    CPRMAN_CLOCK_SRC_TD0,
+    CPRMAN_CLOCK_SRC_TD1,
+    CPRMAN_CLOCK_SRC_PLLA,
+    CPRMAN_CLOCK_SRC_PLLC,
+    CPRMAN_CLOCK_SRC_PLLD,
+    CPRMAN_CLOCK_SRC_PLLH,
+    CPRMAN_CLOCK_SRC_PLLC_CORE1,
+    CPRMAN_CLOCK_SRC_PLLC_CORE2,
+
+    CPRMAN_NUM_CLOCK_MUX_SRC
+} CprmanClockMuxSource;
+
+typedef struct CprmanPllState {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    CprmanPll id;
+
+    uint32_t *reg_cm;
+    uint32_t *reg_a2w_ctrl;
+    uint32_t *reg_a2w_ana; /* ANA[0] .. ANA[3] */
+    uint32_t prediv_mask; /* prediv bit in ana[1] */
+    uint32_t *reg_a2w_frac;
+
+    Clock *xosc_in;
+    Clock *out;
+} CprmanPllState;
+
+typedef struct CprmanPllChannelState {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    CprmanPllChannel id;
+    CprmanPll parent;
+
+    uint32_t *reg_cm;
+    uint32_t hold_mask;
+    uint32_t load_mask;
+    uint32_t *reg_a2w_ctrl;
+    int fixed_divider;
+
+    Clock *pll_in;
+    Clock *out;
+} CprmanPllChannelState;
+
+typedef struct CprmanClockMuxState {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    CprmanClockMux id;
+
+    uint32_t *reg_ctl;
+    uint32_t *reg_div;
+    int int_bits;
+    int frac_bits;
+
+    Clock *srcs[CPRMAN_NUM_CLOCK_MUX_SRC];
+    Clock *out;
+
+    /*
+     * Used by clock srcs update callback to retrieve both the clock and the
+     * source number.
+     */
+    struct CprmanClockMuxState *backref[CPRMAN_NUM_CLOCK_MUX_SRC];
+} CprmanClockMuxState;
+
+typedef struct CprmanDsi0HsckMuxState {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    CprmanClockMux id;
+
+    uint32_t *reg_cm;
+
+    Clock *plla_in;
+    Clock *plld_in;
+    Clock *out;
+} CprmanDsi0HsckMuxState;
+
+struct BCM2835CprmanState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    CprmanPllState plls[CPRMAN_NUM_PLL];
+    CprmanPllChannelState channels[CPRMAN_NUM_PLL_CHANNEL];
+    CprmanClockMuxState clock_muxes[CPRMAN_NUM_CLOCK_MUX];
+    CprmanDsi0HsckMuxState dsi0hsck_mux;
+
+    uint32_t regs[CPRMAN_NUM_REGS];
+    uint32_t xosc_freq;
+
+    Clock *xosc;
+    Clock *gnd;
+};
+
+#endif
diff --git a/include/hw/misc/bcm2835_cprman_internals.h b/include/hw/misc/bcm2835_cprman_internals.h
new file mode 100644 (file)
index 0000000..7617aff
--- /dev/null
@@ -0,0 +1,1019 @@
+/*
+ * BCM2835 CPRMAN clock manager
+ *
+ * Copyright (c) 2020 Luc Michel <luc@lmichel.fr>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_MISC_BCM2835_CPRMAN_INTERNALS_H
+#define HW_MISC_BCM2835_CPRMAN_INTERNALS_H
+
+#include "hw/registerfields.h"
+#include "hw/misc/bcm2835_cprman.h"
+
+#define TYPE_CPRMAN_PLL "bcm2835-cprman-pll"
+#define TYPE_CPRMAN_PLL_CHANNEL "bcm2835-cprman-pll-channel"
+#define TYPE_CPRMAN_CLOCK_MUX "bcm2835-cprman-clock-mux"
+#define TYPE_CPRMAN_DSI0HSCK_MUX "bcm2835-cprman-dsi0hsck-mux"
+
+DECLARE_INSTANCE_CHECKER(CprmanPllState, CPRMAN_PLL,
+                         TYPE_CPRMAN_PLL)
+DECLARE_INSTANCE_CHECKER(CprmanPllChannelState, CPRMAN_PLL_CHANNEL,
+                         TYPE_CPRMAN_PLL_CHANNEL)
+DECLARE_INSTANCE_CHECKER(CprmanClockMuxState, CPRMAN_CLOCK_MUX,
+                         TYPE_CPRMAN_CLOCK_MUX)
+DECLARE_INSTANCE_CHECKER(CprmanDsi0HsckMuxState, CPRMAN_DSI0HSCK_MUX,
+                         TYPE_CPRMAN_DSI0HSCK_MUX)
+
+/* Register map */
+
+/* PLLs */
+REG32(CM_PLLA, 0x104)
+    FIELD(CM_PLLA, LOADDSI0, 0, 1)
+    FIELD(CM_PLLA, HOLDDSI0, 1, 1)
+    FIELD(CM_PLLA, LOADCCP2, 2, 1)
+    FIELD(CM_PLLA, HOLDCCP2, 3, 1)
+    FIELD(CM_PLLA, LOADCORE, 4, 1)
+    FIELD(CM_PLLA, HOLDCORE, 5, 1)
+    FIELD(CM_PLLA, LOADPER, 6, 1)
+    FIELD(CM_PLLA, HOLDPER, 7, 1)
+    FIELD(CM_PLLx, ANARST, 8, 1)
+REG32(CM_PLLC, 0x108)
+    FIELD(CM_PLLC, LOADCORE0, 0, 1)
+    FIELD(CM_PLLC, HOLDCORE0, 1, 1)
+    FIELD(CM_PLLC, LOADCORE1, 2, 1)
+    FIELD(CM_PLLC, HOLDCORE1, 3, 1)
+    FIELD(CM_PLLC, LOADCORE2, 4, 1)
+    FIELD(CM_PLLC, HOLDCORE2, 5, 1)
+    FIELD(CM_PLLC, LOADPER, 6, 1)
+    FIELD(CM_PLLC, HOLDPER, 7, 1)
+REG32(CM_PLLD, 0x10c)
+    FIELD(CM_PLLD, LOADDSI0, 0, 1)
+    FIELD(CM_PLLD, HOLDDSI0, 1, 1)
+    FIELD(CM_PLLD, LOADDSI1, 2, 1)
+    FIELD(CM_PLLD, HOLDDSI1, 3, 1)
+    FIELD(CM_PLLD, LOADCORE, 4, 1)
+    FIELD(CM_PLLD, HOLDCORE, 5, 1)
+    FIELD(CM_PLLD, LOADPER, 6, 1)
+    FIELD(CM_PLLD, HOLDPER, 7, 1)
+REG32(CM_PLLH, 0x110)
+    FIELD(CM_PLLH, LOADPIX, 0, 1)
+    FIELD(CM_PLLH, LOADAUX, 1, 1)
+    FIELD(CM_PLLH, LOADRCAL, 2, 1)
+REG32(CM_PLLB, 0x170)
+    FIELD(CM_PLLB, LOADARM, 0, 1)
+    FIELD(CM_PLLB, HOLDARM, 1, 1)
+
+REG32(A2W_PLLA_CTRL, 0x1100)
+    FIELD(A2W_PLLx_CTRL, NDIV, 0, 10)
+    FIELD(A2W_PLLx_CTRL, PDIV, 12, 3)
+    FIELD(A2W_PLLx_CTRL, PWRDN, 16, 1)
+    FIELD(A2W_PLLx_CTRL, PRST_DISABLE, 17, 1)
+REG32(A2W_PLLC_CTRL, 0x1120)
+REG32(A2W_PLLD_CTRL, 0x1140)
+REG32(A2W_PLLH_CTRL, 0x1160)
+REG32(A2W_PLLB_CTRL, 0x11e0)
+
+REG32(A2W_PLLA_ANA0, 0x1010)
+REG32(A2W_PLLA_ANA1, 0x1014)
+    FIELD(A2W_PLLx_ANA1, FB_PREDIV, 14, 1)
+REG32(A2W_PLLA_ANA2, 0x1018)
+REG32(A2W_PLLA_ANA3, 0x101c)
+
+REG32(A2W_PLLC_ANA0, 0x1030)
+REG32(A2W_PLLC_ANA1, 0x1034)
+REG32(A2W_PLLC_ANA2, 0x1038)
+REG32(A2W_PLLC_ANA3, 0x103c)
+
+REG32(A2W_PLLD_ANA0, 0x1050)
+REG32(A2W_PLLD_ANA1, 0x1054)
+REG32(A2W_PLLD_ANA2, 0x1058)
+REG32(A2W_PLLD_ANA3, 0x105c)
+
+REG32(A2W_PLLH_ANA0, 0x1070)
+REG32(A2W_PLLH_ANA1, 0x1074)
+    FIELD(A2W_PLLH_ANA1, FB_PREDIV, 11, 1)
+REG32(A2W_PLLH_ANA2, 0x1078)
+REG32(A2W_PLLH_ANA3, 0x107c)
+
+REG32(A2W_PLLB_ANA0, 0x10f0)
+REG32(A2W_PLLB_ANA1, 0x10f4)
+REG32(A2W_PLLB_ANA2, 0x10f8)
+REG32(A2W_PLLB_ANA3, 0x10fc)
+
+REG32(A2W_PLLA_FRAC, 0x1200)
+    FIELD(A2W_PLLx_FRAC, FRAC, 0, 20)
+REG32(A2W_PLLC_FRAC, 0x1220)
+REG32(A2W_PLLD_FRAC, 0x1240)
+REG32(A2W_PLLH_FRAC, 0x1260)
+REG32(A2W_PLLB_FRAC, 0x12e0)
+
+/* PLL channels */
+REG32(A2W_PLLA_DSI0, 0x1300)
+    FIELD(A2W_PLLx_CHANNELy, DIV, 0, 8)
+    FIELD(A2W_PLLx_CHANNELy, DISABLE, 8, 1)
+REG32(A2W_PLLA_CORE, 0x1400)
+REG32(A2W_PLLA_PER, 0x1500)
+REG32(A2W_PLLA_CCP2, 0x1600)
+
+REG32(A2W_PLLC_CORE2, 0x1320)
+REG32(A2W_PLLC_CORE1, 0x1420)
+REG32(A2W_PLLC_PER, 0x1520)
+REG32(A2W_PLLC_CORE0, 0x1620)
+
+REG32(A2W_PLLD_DSI0, 0x1340)
+REG32(A2W_PLLD_CORE, 0x1440)
+REG32(A2W_PLLD_PER, 0x1540)
+REG32(A2W_PLLD_DSI1, 0x1640)
+
+REG32(A2W_PLLH_AUX, 0x1360)
+REG32(A2W_PLLH_RCAL, 0x1460)
+REG32(A2W_PLLH_PIX, 0x1560)
+REG32(A2W_PLLH_STS, 0x1660)
+
+REG32(A2W_PLLB_ARM, 0x13e0)
+
+/* Clock muxes */
+REG32(CM_GNRICCTL, 0x000)
+    FIELD(CM_CLOCKx_CTL, SRC, 0, 4)
+    FIELD(CM_CLOCKx_CTL, ENABLE, 4, 1)
+    FIELD(CM_CLOCKx_CTL, KILL, 5, 1)
+    FIELD(CM_CLOCKx_CTL, GATE, 6, 1)
+    FIELD(CM_CLOCKx_CTL, BUSY, 7, 1)
+    FIELD(CM_CLOCKx_CTL, BUSYD, 8, 1)
+    FIELD(CM_CLOCKx_CTL, MASH, 9, 2)
+    FIELD(CM_CLOCKx_CTL, FLIP, 11, 1)
+REG32(CM_GNRICDIV, 0x004)
+    FIELD(CM_CLOCKx_DIV, FRAC, 0, 12)
+REG32(CM_VPUCTL, 0x008)
+REG32(CM_VPUDIV, 0x00c)
+REG32(CM_SYSCTL, 0x010)
+REG32(CM_SYSDIV, 0x014)
+REG32(CM_PERIACTL, 0x018)
+REG32(CM_PERIADIV, 0x01c)
+REG32(CM_PERIICTL, 0x020)
+REG32(CM_PERIIDIV, 0x024)
+REG32(CM_H264CTL, 0x028)
+REG32(CM_H264DIV, 0x02c)
+REG32(CM_ISPCTL, 0x030)
+REG32(CM_ISPDIV, 0x034)
+REG32(CM_V3DCTL, 0x038)
+REG32(CM_V3DDIV, 0x03c)
+REG32(CM_CAM0CTL, 0x040)
+REG32(CM_CAM0DIV, 0x044)
+REG32(CM_CAM1CTL, 0x048)
+REG32(CM_CAM1DIV, 0x04c)
+REG32(CM_CCP2CTL, 0x050)
+REG32(CM_CCP2DIV, 0x054)
+REG32(CM_DSI0ECTL, 0x058)
+REG32(CM_DSI0EDIV, 0x05c)
+REG32(CM_DSI0PCTL, 0x060)
+REG32(CM_DSI0PDIV, 0x064)
+REG32(CM_DPICTL, 0x068)
+REG32(CM_DPIDIV, 0x06c)
+REG32(CM_GP0CTL, 0x070)
+REG32(CM_GP0DIV, 0x074)
+REG32(CM_GP1CTL, 0x078)
+REG32(CM_GP1DIV, 0x07c)
+REG32(CM_GP2CTL, 0x080)
+REG32(CM_GP2DIV, 0x084)
+REG32(CM_HSMCTL, 0x088)
+REG32(CM_HSMDIV, 0x08c)
+REG32(CM_OTPCTL, 0x090)
+REG32(CM_OTPDIV, 0x094)
+REG32(CM_PCMCTL, 0x098)
+REG32(CM_PCMDIV, 0x09c)
+REG32(CM_PWMCTL, 0x0a0)
+REG32(CM_PWMDIV, 0x0a4)
+REG32(CM_SLIMCTL, 0x0a8)
+REG32(CM_SLIMDIV, 0x0ac)
+REG32(CM_SMICTL, 0x0b0)
+REG32(CM_SMIDIV, 0x0b4)
+REG32(CM_TCNTCTL, 0x0c0)
+REG32(CM_TCNTCNT, 0x0c4)
+REG32(CM_TECCTL, 0x0c8)
+REG32(CM_TECDIV, 0x0cc)
+REG32(CM_TD0CTL, 0x0d0)
+REG32(CM_TD0DIV, 0x0d4)
+REG32(CM_TD1CTL, 0x0d8)
+REG32(CM_TD1DIV, 0x0dc)
+REG32(CM_TSENSCTL, 0x0e0)
+REG32(CM_TSENSDIV, 0x0e4)
+REG32(CM_TIMERCTL, 0x0e8)
+REG32(CM_TIMERDIV, 0x0ec)
+REG32(CM_UARTCTL, 0x0f0)
+REG32(CM_UARTDIV, 0x0f4)
+REG32(CM_VECCTL, 0x0f8)
+REG32(CM_VECDIV, 0x0fc)
+REG32(CM_PULSECTL, 0x190)
+REG32(CM_PULSEDIV, 0x194)
+REG32(CM_SDCCTL, 0x1a8)
+REG32(CM_SDCDIV, 0x1ac)
+REG32(CM_ARMCTL, 0x1b0)
+REG32(CM_AVEOCTL, 0x1b8)
+REG32(CM_AVEODIV, 0x1bc)
+REG32(CM_EMMCCTL, 0x1c0)
+REG32(CM_EMMCDIV, 0x1c4)
+REG32(CM_EMMC2CTL, 0x1d0)
+REG32(CM_EMMC2DIV, 0x1d4)
+
+/* misc registers */
+REG32(CM_LOCK, 0x114)
+    FIELD(CM_LOCK, FLOCKH, 12, 1)
+    FIELD(CM_LOCK, FLOCKD, 11, 1)
+    FIELD(CM_LOCK, FLOCKC, 10, 1)
+    FIELD(CM_LOCK, FLOCKB, 9, 1)
+    FIELD(CM_LOCK, FLOCKA, 8, 1)
+
+REG32(CM_DSI0HSCK, 0x120)
+    FIELD(CM_DSI0HSCK, SELPLLD, 0, 1)
+
+/*
+ * This field is common to all registers. Each register write value must match
+ * the CPRMAN_PASSWORD magic value in its 8 MSB.
+ */
+FIELD(CPRMAN, PASSWORD, 24, 8)
+#define CPRMAN_PASSWORD 0x5a
+
+/* PLL init info */
+typedef struct PLLInitInfo {
+    const char *name;
+    size_t cm_offset;
+    size_t a2w_ctrl_offset;
+    size_t a2w_ana_offset;
+    uint32_t prediv_mask; /* Prediv bit in ana[1] */
+    size_t a2w_frac_offset;
+} PLLInitInfo;
+
+#define FILL_PLL_INIT_INFO(pll_)                \
+    .cm_offset = R_CM_ ## pll_,                 \
+    .a2w_ctrl_offset = R_A2W_ ## pll_ ## _CTRL, \
+    .a2w_ana_offset = R_A2W_ ## pll_ ## _ANA0,  \
+    .a2w_frac_offset = R_A2W_ ## pll_ ## _FRAC
+
+static const PLLInitInfo PLL_INIT_INFO[] = {
+    [CPRMAN_PLLA] = {
+        .name = "plla",
+        .prediv_mask = R_A2W_PLLx_ANA1_FB_PREDIV_MASK,
+        FILL_PLL_INIT_INFO(PLLA),
+    },
+    [CPRMAN_PLLC] = {
+        .name = "pllc",
+        .prediv_mask = R_A2W_PLLx_ANA1_FB_PREDIV_MASK,
+        FILL_PLL_INIT_INFO(PLLC),
+    },
+    [CPRMAN_PLLD] = {
+        .name = "plld",
+        .prediv_mask = R_A2W_PLLx_ANA1_FB_PREDIV_MASK,
+        FILL_PLL_INIT_INFO(PLLD),
+    },
+    [CPRMAN_PLLH] = {
+        .name = "pllh",
+        .prediv_mask = R_A2W_PLLH_ANA1_FB_PREDIV_MASK,
+        FILL_PLL_INIT_INFO(PLLH),
+    },
+    [CPRMAN_PLLB] = {
+        .name = "pllb",
+        .prediv_mask = R_A2W_PLLx_ANA1_FB_PREDIV_MASK,
+        FILL_PLL_INIT_INFO(PLLB),
+    },
+};
+
+#undef FILL_PLL_CHANNEL_INIT_INFO
+
+static inline void set_pll_init_info(BCM2835CprmanState *s,
+                                     CprmanPllState *pll,
+                                     CprmanPll id)
+{
+    pll->id = id;
+    pll->reg_cm = &s->regs[PLL_INIT_INFO[id].cm_offset];
+    pll->reg_a2w_ctrl = &s->regs[PLL_INIT_INFO[id].a2w_ctrl_offset];
+    pll->reg_a2w_ana = &s->regs[PLL_INIT_INFO[id].a2w_ana_offset];
+    pll->prediv_mask = PLL_INIT_INFO[id].prediv_mask;
+    pll->reg_a2w_frac = &s->regs[PLL_INIT_INFO[id].a2w_frac_offset];
+}
+
+
+/* PLL channel init info */
+typedef struct PLLChannelInitInfo {
+    const char *name;
+    CprmanPll parent;
+    size_t cm_offset;
+    uint32_t cm_hold_mask;
+    uint32_t cm_load_mask;
+    size_t a2w_ctrl_offset;
+    unsigned int fixed_divider;
+} PLLChannelInitInfo;
+
+#define FILL_PLL_CHANNEL_INIT_INFO_common(pll_, channel_)            \
+    .parent = CPRMAN_ ## pll_,                                       \
+    .cm_offset = R_CM_ ## pll_,                                      \
+    .cm_load_mask = R_CM_ ## pll_ ## _ ## LOAD ## channel_ ## _MASK, \
+    .a2w_ctrl_offset = R_A2W_ ## pll_ ## _ ## channel_
+
+#define FILL_PLL_CHANNEL_INIT_INFO(pll_, channel_)                   \
+    FILL_PLL_CHANNEL_INIT_INFO_common(pll_, channel_),               \
+    .cm_hold_mask = R_CM_ ## pll_ ## _ ## HOLD ## channel_ ## _MASK, \
+    .fixed_divider = 1
+
+#define FILL_PLL_CHANNEL_INIT_INFO_nohold(pll_, channel_) \
+    FILL_PLL_CHANNEL_INIT_INFO_common(pll_, channel_),    \
+    .cm_hold_mask = 0
+
+static PLLChannelInitInfo PLL_CHANNEL_INIT_INFO[] = {
+    [CPRMAN_PLLA_CHANNEL_DSI0] = {
+        .name = "plla-dsi0",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLA, DSI0),
+    },
+    [CPRMAN_PLLA_CHANNEL_CORE] = {
+        .name = "plla-core",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLA, CORE),
+    },
+    [CPRMAN_PLLA_CHANNEL_PER] = {
+        .name = "plla-per",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLA, PER),
+    },
+    [CPRMAN_PLLA_CHANNEL_CCP2] = {
+        .name = "plla-ccp2",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLA, CCP2),
+    },
+
+    [CPRMAN_PLLC_CHANNEL_CORE2] = {
+        .name = "pllc-core2",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLC, CORE2),
+    },
+    [CPRMAN_PLLC_CHANNEL_CORE1] = {
+        .name = "pllc-core1",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLC, CORE1),
+    },
+    [CPRMAN_PLLC_CHANNEL_PER] = {
+        .name = "pllc-per",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLC, PER),
+    },
+    [CPRMAN_PLLC_CHANNEL_CORE0] = {
+        .name = "pllc-core0",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLC, CORE0),
+    },
+
+    [CPRMAN_PLLD_CHANNEL_DSI0] = {
+        .name = "plld-dsi0",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLD, DSI0),
+    },
+    [CPRMAN_PLLD_CHANNEL_CORE] = {
+        .name = "plld-core",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLD, CORE),
+    },
+    [CPRMAN_PLLD_CHANNEL_PER] = {
+        .name = "plld-per",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLD, PER),
+    },
+    [CPRMAN_PLLD_CHANNEL_DSI1] = {
+        .name = "plld-dsi1",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLD, DSI1),
+    },
+
+    [CPRMAN_PLLH_CHANNEL_AUX] = {
+        .name = "pllh-aux",
+        .fixed_divider = 1,
+        FILL_PLL_CHANNEL_INIT_INFO_nohold(PLLH, AUX),
+    },
+    [CPRMAN_PLLH_CHANNEL_RCAL] = {
+        .name = "pllh-rcal",
+        .fixed_divider = 10,
+        FILL_PLL_CHANNEL_INIT_INFO_nohold(PLLH, RCAL),
+    },
+    [CPRMAN_PLLH_CHANNEL_PIX] = {
+        .name = "pllh-pix",
+        .fixed_divider = 10,
+        FILL_PLL_CHANNEL_INIT_INFO_nohold(PLLH, PIX),
+    },
+
+    [CPRMAN_PLLB_CHANNEL_ARM] = {
+        .name = "pllb-arm",
+        FILL_PLL_CHANNEL_INIT_INFO(PLLB, ARM),
+    },
+};
+
+#undef FILL_PLL_CHANNEL_INIT_INFO_nohold
+#undef FILL_PLL_CHANNEL_INIT_INFO
+#undef FILL_PLL_CHANNEL_INIT_INFO_common
+
+static inline void set_pll_channel_init_info(BCM2835CprmanState *s,
+                                             CprmanPllChannelState *channel,
+                                             CprmanPllChannel id)
+{
+    channel->id = id;
+    channel->parent = PLL_CHANNEL_INIT_INFO[id].parent;
+    channel->reg_cm = &s->regs[PLL_CHANNEL_INIT_INFO[id].cm_offset];
+    channel->hold_mask = PLL_CHANNEL_INIT_INFO[id].cm_hold_mask;
+    channel->load_mask = PLL_CHANNEL_INIT_INFO[id].cm_load_mask;
+    channel->reg_a2w_ctrl = &s->regs[PLL_CHANNEL_INIT_INFO[id].a2w_ctrl_offset];
+    channel->fixed_divider = PLL_CHANNEL_INIT_INFO[id].fixed_divider;
+}
+
+/* Clock mux init info */
+typedef struct ClockMuxInitInfo {
+    const char *name;
+    size_t cm_offset; /* cm_offset[0]->CM_CTL, cm_offset[1]->CM_DIV */
+    int int_bits;
+    int frac_bits;
+
+    CprmanPllChannel src_mapping[CPRMAN_NUM_CLOCK_MUX_SRC];
+} ClockMuxInitInfo;
+
+/*
+ * Each clock mux can have up to 10 sources. Sources 0 to 3 are always the
+ * same (ground, xosc, td0, td1). Sources 4 to 9 are mux specific, and are not
+ * always populated. The following macros catch all those cases.
+ */
+
+/* Unknown mapping. Connect everything to ground */
+#define SRC_MAPPING_INFO_unknown                          \
+    .src_mapping = {                                      \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, /* gnd */          \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, /* xosc */         \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, /* test debug 0 */ \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, /* test debug 1 */ \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll a */        \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll c */        \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll d */        \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll h */        \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll c, core1 */ \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, /* pll c, core2 */ \
+    }
+
+/* Only the oscillator and the two test debug clocks */
+#define SRC_MAPPING_INFO_xosc          \
+    .src_mapping = {                   \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+    }
+
+/* All the PLL "core" channels */
+#define SRC_MAPPING_INFO_core      \
+    .src_mapping = {               \
+        CPRMAN_CLOCK_SRC_NORMAL,   \
+        CPRMAN_CLOCK_SRC_NORMAL,   \
+        CPRMAN_CLOCK_SRC_NORMAL,   \
+        CPRMAN_CLOCK_SRC_NORMAL,   \
+        CPRMAN_PLLA_CHANNEL_CORE,  \
+        CPRMAN_PLLC_CHANNEL_CORE0, \
+        CPRMAN_PLLD_CHANNEL_CORE,  \
+        CPRMAN_PLLH_CHANNEL_AUX,   \
+        CPRMAN_PLLC_CHANNEL_CORE1, \
+        CPRMAN_PLLC_CHANNEL_CORE2, \
+    }
+
+/* All the PLL "per" channels */
+#define SRC_MAPPING_INFO_periph        \
+    .src_mapping = {                   \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_PLLA_CHANNEL_PER,       \
+        CPRMAN_PLLC_CHANNEL_PER,       \
+        CPRMAN_PLLD_CHANNEL_PER,       \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+    }
+
+/*
+ * The DSI0 channels. This one got an intermediate mux between the PLL channels
+ * and the clock input.
+ */
+#define SRC_MAPPING_INFO_dsi0          \
+    .src_mapping = {                   \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_DSI0HSCK,     \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+    }
+
+/* The DSI1 channel */
+#define SRC_MAPPING_INFO_dsi1          \
+    .src_mapping = {                   \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_CLOCK_SRC_NORMAL,       \
+        CPRMAN_PLLD_CHANNEL_DSI1,      \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+        CPRMAN_CLOCK_SRC_FORCE_GROUND, \
+    }
+
+#define FILL_CLOCK_MUX_SRC_MAPPING_INIT_INFO(kind_) \
+    SRC_MAPPING_INFO_ ## kind_
+
+#define FILL_CLOCK_MUX_INIT_INFO(clock_, kind_) \
+    .cm_offset = R_CM_ ## clock_ ## CTL,        \
+    FILL_CLOCK_MUX_SRC_MAPPING_INIT_INFO(kind_)
+
+static ClockMuxInitInfo CLOCK_MUX_INIT_INFO[] = {
+    [CPRMAN_CLOCK_GNRIC] = {
+        .name = "gnric",
+        FILL_CLOCK_MUX_INIT_INFO(GNRIC, unknown),
+    },
+    [CPRMAN_CLOCK_VPU] = {
+        .name = "vpu",
+        .int_bits = 12,
+        .frac_bits = 8,
+        FILL_CLOCK_MUX_INIT_INFO(VPU, core),
+    },
+    [CPRMAN_CLOCK_SYS] = {
+        .name = "sys",
+        FILL_CLOCK_MUX_INIT_INFO(SYS, unknown),
+    },
+    [CPRMAN_CLOCK_PERIA] = {
+        .name = "peria",
+        FILL_CLOCK_MUX_INIT_INFO(PERIA, unknown),
+    },
+    [CPRMAN_CLOCK_PERII] = {
+        .name = "perii",
+        FILL_CLOCK_MUX_INIT_INFO(PERII, unknown),
+    },
+    [CPRMAN_CLOCK_H264] = {
+        .name = "h264",
+        .int_bits = 4,
+        .frac_bits = 8,
+        FILL_CLOCK_MUX_INIT_INFO(H264, core),
+    },
+    [CPRMAN_CLOCK_ISP] = {
+        .name = "isp",
+        .int_bits = 4,
+        .frac_bits = 8,
+        FILL_CLOCK_MUX_INIT_INFO(ISP, core),
+    },
+    [CPRMAN_CLOCK_V3D] = {
+        .name = "v3d",
+        FILL_CLOCK_MUX_INIT_INFO(V3D, core),
+    },
+    [CPRMAN_CLOCK_CAM0] = {
+        .name = "cam0",
+        .int_bits = 4,
+        .frac_bits = 8,
+        FILL_CLOCK_MUX_INIT_INFO(CAM0, periph),
+    },
+    [CPRMAN_CLOCK_CAM1] = {
+        .name = "cam1",
+        .int_bits = 4,
+        .frac_bits = 8,
+        FILL_CLOCK_MUX_INIT_INFO(CAM1, periph),
+    },
+    [CPRMAN_CLOCK_CCP2] = {
+        .name = "ccp2",
+        FILL_CLOCK_MUX_INIT_INFO(CCP2, unknown),
+    },
+    [CPRMAN_CLOCK_DSI0E] = {
+        .name = "dsi0e",
+        .int_bits = 4,
+        .frac_bits = 8,
+        FILL_CLOCK_MUX_INIT_INFO(DSI0E, dsi0),
+    },
+    [CPRMAN_CLOCK_DSI0P] = {
+        .name = "dsi0p",
+        .int_bits = 0,
+        .frac_bits = 0,
+        FILL_CLOCK_MUX_INIT_INFO(DSI0P, dsi0),
+    },
+    [CPRMAN_CLOCK_DPI] = {
+        .name = "dpi",
+        .int_bits = 4,
+        .frac_bits = 8,
+        FILL_CLOCK_MUX_INIT_INFO(DPI, periph),
+    },
+    [CPRMAN_CLOCK_GP0] = {
+        .name = "gp0",
+        .int_bits = 12,
+        .frac_bits = 12,
+        FILL_CLOCK_MUX_INIT_INFO(GP0, periph),
+    },
+    [CPRMAN_CLOCK_GP1] = {
+        .name = "gp1",
+        .int_bits = 12,
+        .frac_bits = 12,
+        FILL_CLOCK_MUX_INIT_INFO(GP1, periph),
+    },
+    [CPRMAN_CLOCK_GP2] = {
+        .name = "gp2",
+        .int_bits = 12,
+        .frac_bits = 12,
+        FILL_CLOCK_MUX_INIT_INFO(GP2, periph),
+    },
+    [CPRMAN_CLOCK_HSM] = {
+        .name = "hsm",
+        .int_bits = 4,
+        .frac_bits = 8,
+        FILL_CLOCK_MUX_INIT_INFO(HSM, periph),
+    },
+    [CPRMAN_CLOCK_OTP] = {
+        .name = "otp",
+        .int_bits = 4,
+        .frac_bits = 0,
+        FILL_CLOCK_MUX_INIT_INFO(OTP, xosc),
+    },
+    [CPRMAN_CLOCK_PCM] = {
+        .name = "pcm",
+        .int_bits = 12,
+        .frac_bits = 12,
+        FILL_CLOCK_MUX_INIT_INFO(PCM, periph),
+    },
+    [CPRMAN_CLOCK_PWM] = {
+        .name = "pwm",
+        .int_bits = 12,
+        .frac_bits = 12,
+        FILL_CLOCK_MUX_INIT_INFO(PWM, periph),
+    },
+    [CPRMAN_CLOCK_SLIM] = {
+        .name = "slim",
+        .int_bits = 12,
+        .frac_bits = 12,
+        FILL_CLOCK_MUX_INIT_INFO(SLIM, periph),
+    },
+    [CPRMAN_CLOCK_SMI] = {
+        .name = "smi",
+        .int_bits = 4,
+        .frac_bits = 8,
+        FILL_CLOCK_MUX_INIT_INFO(SMI, periph),
+    },
+    [CPRMAN_CLOCK_TEC] = {
+        .name = "tec",
+        .int_bits = 6,
+        .frac_bits = 0,
+        FILL_CLOCK_MUX_INIT_INFO(TEC, xosc),
+    },
+    [CPRMAN_CLOCK_TD0] = {
+        .name = "td0",
+        FILL_CLOCK_MUX_INIT_INFO(TD0, unknown),
+    },
+    [CPRMAN_CLOCK_TD1] = {
+        .name = "td1",
+        FILL_CLOCK_MUX_INIT_INFO(TD1, unknown),
+    },
+    [CPRMAN_CLOCK_TSENS] = {
+        .name = "tsens",
+        .int_bits = 5,
+        .frac_bits = 0,
+        FILL_CLOCK_MUX_INIT_INFO(TSENS, xosc),
+    },
+    [CPRMAN_CLOCK_TIMER] = {
+        .name = "timer",
+        .int_bits = 6,
+        .frac_bits = 12,
+        FILL_CLOCK_MUX_INIT_INFO(TIMER, xosc),
+    },
+    [CPRMAN_CLOCK_UART] = {
+        .name = "uart",
+        .int_bits = 10,
+        .frac_bits = 12,
+        FILL_CLOCK_MUX_INIT_INFO(UART, periph),
+    },
+    [CPRMAN_CLOCK_VEC] = {
+        .name = "vec",
+        .int_bits = 4,
+        .frac_bits = 0,
+        FILL_CLOCK_MUX_INIT_INFO(VEC, periph),
+    },
+    [CPRMAN_CLOCK_PULSE] = {
+        .name = "pulse",
+        FILL_CLOCK_MUX_INIT_INFO(PULSE, xosc),
+    },
+    [CPRMAN_CLOCK_SDC] = {
+        .name = "sdram",
+        .int_bits = 6,
+        .frac_bits = 0,
+        FILL_CLOCK_MUX_INIT_INFO(SDC, core),
+    },
+    [CPRMAN_CLOCK_ARM] = {
+        .name = "arm",
+        FILL_CLOCK_MUX_INIT_INFO(ARM, unknown),
+    },
+    [CPRMAN_CLOCK_AVEO] = {
+        .name = "aveo",
+        .int_bits = 4,
+        .frac_bits = 0,
+        FILL_CLOCK_MUX_INIT_INFO(AVEO, periph),
+    },
+    [CPRMAN_CLOCK_EMMC] = {
+        .name = "emmc",
+        .int_bits = 4,
+        .frac_bits = 8,
+        FILL_CLOCK_MUX_INIT_INFO(EMMC, periph),
+    },
+    [CPRMAN_CLOCK_EMMC2] = {
+        .name = "emmc2",
+        .int_bits = 4,
+        .frac_bits = 8,
+        FILL_CLOCK_MUX_INIT_INFO(EMMC2, unknown),
+    },
+};
+
+#undef FILL_CLOCK_MUX_INIT_INFO
+#undef FILL_CLOCK_MUX_SRC_MAPPING_INIT_INFO
+#undef SRC_MAPPING_INFO_dsi1
+#undef SRC_MAPPING_INFO_dsi0
+#undef SRC_MAPPING_INFO_periph
+#undef SRC_MAPPING_INFO_core
+#undef SRC_MAPPING_INFO_xosc
+#undef SRC_MAPPING_INFO_unknown
+
+static inline void set_clock_mux_init_info(BCM2835CprmanState *s,
+                                           CprmanClockMuxState *mux,
+                                           CprmanClockMux id)
+{
+    mux->id = id;
+    mux->reg_ctl = &s->regs[CLOCK_MUX_INIT_INFO[id].cm_offset];
+    mux->reg_div = &s->regs[CLOCK_MUX_INIT_INFO[id].cm_offset + 1];
+    mux->int_bits = CLOCK_MUX_INIT_INFO[id].int_bits;
+    mux->frac_bits = CLOCK_MUX_INIT_INFO[id].frac_bits;
+}
+
+
+/*
+ * Object reset info
+ * Those values have been dumped from a Raspberry Pi 3 Model B v1.2 using the
+ * clk debugfs interface in Linux.
+ */
+typedef struct PLLResetInfo {
+    uint32_t cm;
+    uint32_t a2w_ctrl;
+    uint32_t a2w_ana[4];
+    uint32_t a2w_frac;
+} PLLResetInfo;
+
+static const PLLResetInfo PLL_RESET_INFO[] = {
+    [CPRMAN_PLLA] = {
+        .cm = 0x0000008a,
+        .a2w_ctrl = 0x0002103a,
+        .a2w_frac = 0x00098000,
+        .a2w_ana = { 0x00000000, 0x00144000, 0x00000000, 0x00000100 }
+    },
+
+    [CPRMAN_PLLC] = {
+        .cm = 0x00000228,
+        .a2w_ctrl = 0x0002103e,
+        .a2w_frac = 0x00080000,
+        .a2w_ana = { 0x00000000, 0x00144000, 0x00000000, 0x00000100 }
+    },
+
+    [CPRMAN_PLLD] = {
+        .cm = 0x0000020a,
+        .a2w_ctrl = 0x00021034,
+        .a2w_frac = 0x00015556,
+        .a2w_ana = { 0x00000000, 0x00144000, 0x00000000, 0x00000100 }
+    },
+
+    [CPRMAN_PLLH] = {
+        .cm = 0x00000000,
+        .a2w_ctrl = 0x0002102d,
+        .a2w_frac = 0x00000000,
+        .a2w_ana = { 0x00900000, 0x0000000c, 0x00000000, 0x00000000 }
+    },
+
+    [CPRMAN_PLLB] = {
+        /* unknown */
+        .cm = 0x00000000,
+        .a2w_ctrl = 0x00000000,
+        .a2w_frac = 0x00000000,
+        .a2w_ana = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }
+    }
+};
+
+typedef struct PLLChannelResetInfo {
+    /*
+     * Even though a PLL channel has a CM register, it shares it with its
+     * parent PLL. The parent already takes care of the reset value.
+     */
+    uint32_t a2w_ctrl;
+} PLLChannelResetInfo;
+
+static const PLLChannelResetInfo PLL_CHANNEL_RESET_INFO[] = {
+    [CPRMAN_PLLA_CHANNEL_DSI0] = { .a2w_ctrl = 0x00000100 },
+    [CPRMAN_PLLA_CHANNEL_CORE] = { .a2w_ctrl = 0x00000003 },
+    [CPRMAN_PLLA_CHANNEL_PER] = { .a2w_ctrl = 0x00000000 }, /* unknown */
+    [CPRMAN_PLLA_CHANNEL_CCP2] = { .a2w_ctrl = 0x00000100 },
+
+    [CPRMAN_PLLC_CHANNEL_CORE2] = { .a2w_ctrl = 0x00000100 },
+    [CPRMAN_PLLC_CHANNEL_CORE1] = { .a2w_ctrl = 0x00000100 },
+    [CPRMAN_PLLC_CHANNEL_PER] = { .a2w_ctrl = 0x00000002 },
+    [CPRMAN_PLLC_CHANNEL_CORE0] = { .a2w_ctrl = 0x00000002 },
+
+    [CPRMAN_PLLD_CHANNEL_DSI0] = { .a2w_ctrl = 0x00000100 },
+    [CPRMAN_PLLD_CHANNEL_CORE] = { .a2w_ctrl = 0x00000004 },
+    [CPRMAN_PLLD_CHANNEL_PER] = { .a2w_ctrl = 0x00000004 },
+    [CPRMAN_PLLD_CHANNEL_DSI1] = { .a2w_ctrl = 0x00000100 },
+
+    [CPRMAN_PLLH_CHANNEL_AUX] = { .a2w_ctrl = 0x00000004 },
+    [CPRMAN_PLLH_CHANNEL_RCAL] = { .a2w_ctrl = 0x00000000 },
+    [CPRMAN_PLLH_CHANNEL_PIX] = { .a2w_ctrl = 0x00000000 },
+
+    [CPRMAN_PLLB_CHANNEL_ARM] = { .a2w_ctrl = 0x00000000 }, /* unknown */
+};
+
+typedef struct ClockMuxResetInfo {
+    uint32_t cm_ctl;
+    uint32_t cm_div;
+} ClockMuxResetInfo;
+
+static const ClockMuxResetInfo CLOCK_MUX_RESET_INFO[] = {
+    [CPRMAN_CLOCK_GNRIC] = {
+        .cm_ctl = 0, /* unknown */
+        .cm_div = 0
+    },
+
+    [CPRMAN_CLOCK_VPU] = {
+        .cm_ctl = 0x00000245,
+        .cm_div = 0x00003000,
+    },
+
+    [CPRMAN_CLOCK_SYS] = {
+        .cm_ctl = 0, /* unknown */
+        .cm_div = 0
+    },
+
+    [CPRMAN_CLOCK_PERIA] = {
+        .cm_ctl = 0, /* unknown */
+        .cm_div = 0
+    },
+
+    [CPRMAN_CLOCK_PERII] = {
+        .cm_ctl = 0, /* unknown */
+        .cm_div = 0
+    },
+
+    [CPRMAN_CLOCK_H264] = {
+        .cm_ctl = 0x00000244,
+        .cm_div = 0x00003000,
+    },
+
+    [CPRMAN_CLOCK_ISP] = {
+        .cm_ctl = 0x00000244,
+        .cm_div = 0x00003000,
+    },
+
+    [CPRMAN_CLOCK_V3D] = {
+        .cm_ctl = 0, /* unknown */
+        .cm_div = 0
+    },
+
+    [CPRMAN_CLOCK_CAM0] = {
+        .cm_ctl = 0x00000000,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_CAM1] = {
+        .cm_ctl = 0x00000000,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_CCP2] = {
+        .cm_ctl = 0, /* unknown */
+        .cm_div = 0
+    },
+
+    [CPRMAN_CLOCK_DSI0E] = {
+        .cm_ctl = 0x00000000,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_DSI0P] = {
+        .cm_ctl = 0x00000000,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_DPI] = {
+        .cm_ctl = 0x00000000,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_GP0] = {
+        .cm_ctl = 0x00000200,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_GP1] = {
+        .cm_ctl = 0x00000096,
+        .cm_div = 0x00014000,
+    },
+
+    [CPRMAN_CLOCK_GP2] = {
+        .cm_ctl = 0x00000291,
+        .cm_div = 0x00249f00,
+    },
+
+    [CPRMAN_CLOCK_HSM] = {
+        .cm_ctl = 0x00000000,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_OTP] = {
+        .cm_ctl = 0x00000091,
+        .cm_div = 0x00004000,
+    },
+
+    [CPRMAN_CLOCK_PCM] = {
+        .cm_ctl = 0x00000200,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_PWM] = {
+        .cm_ctl = 0x00000200,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_SLIM] = {
+        .cm_ctl = 0x00000200,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_SMI] = {
+        .cm_ctl = 0x00000000,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_TEC] = {
+        .cm_ctl = 0x00000000,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_TD0] = {
+        .cm_ctl = 0, /* unknown */
+        .cm_div = 0
+    },
+
+    [CPRMAN_CLOCK_TD1] = {
+        .cm_ctl = 0, /* unknown */
+        .cm_div = 0
+    },
+
+    [CPRMAN_CLOCK_TSENS] = {
+        .cm_ctl = 0x00000091,
+        .cm_div = 0x0000a000,
+    },
+
+    [CPRMAN_CLOCK_TIMER] = {
+        .cm_ctl = 0x00000291,
+        .cm_div = 0x00013333,
+    },
+
+    [CPRMAN_CLOCK_UART] = {
+        .cm_ctl = 0x00000296,
+        .cm_div = 0x0000a6ab,
+    },
+
+    [CPRMAN_CLOCK_VEC] = {
+        .cm_ctl = 0x00000097,
+        .cm_div = 0x00002000,
+    },
+
+    [CPRMAN_CLOCK_PULSE] = {
+        .cm_ctl = 0, /* unknown */
+        .cm_div = 0
+    },
+
+    [CPRMAN_CLOCK_SDC] = {
+        .cm_ctl = 0x00004006,
+        .cm_div = 0x00003000,
+    },
+
+    [CPRMAN_CLOCK_ARM] = {
+        .cm_ctl = 0, /* unknown */
+        .cm_div = 0
+    },
+
+    [CPRMAN_CLOCK_AVEO] = {
+        .cm_ctl = 0x00000000,
+        .cm_div = 0x00000000,
+    },
+
+    [CPRMAN_CLOCK_EMMC] = {
+        .cm_ctl = 0x00000295,
+        .cm_div = 0x00006000,
+    },
+
+    [CPRMAN_CLOCK_EMMC2] = {
+        .cm_ctl = 0, /* unknown */
+        .cm_div = 0
+    },
+};
+
+#endif
diff --git a/include/hw/misc/bcm2835_mbox.h b/include/hw/misc/bcm2835_mbox.h
new file mode 100644 (file)
index 0000000..ade27af
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Raspberry Pi emulation (c) 2012 Gregory Estrade
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_MBOX_H
+#define BCM2835_MBOX_H
+
+#include "bcm2835_mbox_defs.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_BCM2835_MBOX "bcm2835-mbox"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835MboxState, BCM2835_MBOX)
+
+typedef struct {
+    uint32_t reg[MBOX_SIZE];
+    uint32_t count;
+    uint32_t status;
+    uint32_t config;
+} BCM2835Mbox;
+
+struct BCM2835MboxState {
+    /*< private >*/
+    SysBusDevice busdev;
+    /*< public >*/
+    MemoryRegion *mbox_mr;
+    AddressSpace mbox_as;
+    MemoryRegion iomem;
+    qemu_irq arm_irq;
+
+    bool mbox_irq_disabled;
+    bool available[MBOX_CHAN_COUNT];
+    BCM2835Mbox mbox[2];
+};
+
+#endif
diff --git a/include/hw/misc/bcm2835_mbox_defs.h b/include/hw/misc/bcm2835_mbox_defs.h
new file mode 100644 (file)
index 0000000..9670bf3
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Raspberry Pi emulation (c) 2012 Gregory Estrade
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_MBOX_DEFS_H
+#define BCM2835_MBOX_DEFS_H
+
+/* Constants shared with the ARM identifying separate mailbox channels */
+#define MBOX_CHAN_POWER    0 /* for use by the power management interface */
+#define MBOX_CHAN_FB       1 /* for use by the frame buffer */
+#define MBOX_CHAN_VCHIQ    3 /* for use by the VCHIQ interface */
+#define MBOX_CHAN_PROPERTY 8 /* for use by the property channel */
+#define MBOX_CHAN_COUNT    9
+
+#define MBOX_SIZE          32
+#define MBOX_INVALID_DATA  0x0f
+
+/* Layout of the private address space used for communication between
+ * the mbox device emulation, and child devices: each channel occupies
+ * 16 bytes of address space, but only two registers are presently defined.
+ */
+#define MBOX_AS_CHAN_SHIFT 4
+#define MBOX_AS_DATA       0 /* request / response data (RW at offset 0) */
+#define MBOX_AS_PENDING    4 /* pending response status (RO at offset 4) */
+
+#endif /* BCM2835_MBOX_DEFS_H */
diff --git a/include/hw/misc/bcm2835_mphi.h b/include/hw/misc/bcm2835_mphi.h
new file mode 100644 (file)
index 0000000..751363f
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * BCM2835 SOC MPHI state definitions
+ *
+ * Copyright (c) 2020 Paul Zimmerman <pauldzim@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef HW_MISC_BCM2835_MPHI_H
+#define HW_MISC_BCM2835_MPHI_H
+
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define MPHI_MMIO_SIZE      0x1000
+
+typedef struct BCM2835MphiState BCM2835MphiState;
+
+struct BCM2835MphiState {
+    SysBusDevice parent_obj;
+    qemu_irq irq;
+    MemoryRegion iomem;
+
+    uint32_t outdda;
+    uint32_t outddb;
+    uint32_t ctrl;
+    uint32_t intstat;
+    uint32_t swirq;
+};
+
+#define TYPE_BCM2835_MPHI   "bcm2835-mphi"
+
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835MphiState, BCM2835_MPHI)
+
+#endif
diff --git a/include/hw/misc/bcm2835_powermgt.h b/include/hw/misc/bcm2835_powermgt.h
new file mode 100644 (file)
index 0000000..303b9a6
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * BCM2835 Power Management emulation
+ *
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
+ * Copyright (C) 2021 Nolan Leake <nolan@sigbus.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_POWERMGT_H
+#define BCM2835_POWERMGT_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_BCM2835_POWERMGT "bcm2835-powermgt"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835PowerMgtState, BCM2835_POWERMGT)
+
+struct BCM2835PowerMgtState {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+
+    uint32_t rstc;
+    uint32_t rsts;
+    uint32_t wdog;
+};
+
+#endif
diff --git a/include/hw/misc/bcm2835_property.h b/include/hw/misc/bcm2835_property.h
new file mode 100644 (file)
index 0000000..ba88966
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Raspberry Pi emulation (c) 2012 Gregory Estrade
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_PROPERTY_H
+#define BCM2835_PROPERTY_H
+
+#include "hw/sysbus.h"
+#include "net/net.h"
+#include "hw/display/bcm2835_fb.h"
+#include "qom/object.h"
+
+#define TYPE_BCM2835_PROPERTY "bcm2835-property"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835PropertyState, BCM2835_PROPERTY)
+
+struct BCM2835PropertyState {
+    /*< private >*/
+    SysBusDevice busdev;
+    /*< public >*/
+
+    MemoryRegion *dma_mr;
+    AddressSpace dma_as;
+    MemoryRegion iomem;
+    qemu_irq mbox_irq;
+    BCM2835FBState *fbdev;
+
+    MACAddr macaddr;
+    uint32_t board_rev;
+    uint32_t addr;
+    char *command_line;
+    bool pending;
+};
+
+#endif
diff --git a/include/hw/misc/bcm2835_rng.h b/include/hw/misc/bcm2835_rng.h
new file mode 100644 (file)
index 0000000..7c1fb3e
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * BCM2835 Random Number Generator emulation
+ *
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_RNG_H
+#define BCM2835_RNG_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_BCM2835_RNG "bcm2835-rng"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835RngState, BCM2835_RNG)
+
+struct BCM2835RngState {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+
+    uint32_t rng_ctrl;
+    uint32_t rng_status;
+};
+
+#endif
diff --git a/include/hw/misc/bcm2835_thermal.h b/include/hw/misc/bcm2835_thermal.h
new file mode 100644 (file)
index 0000000..f90f9e4
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * BCM2835 dummy thermal sensor
+ *
+ * Copyright (C) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_MISC_BCM2835_THERMAL_H
+#define HW_MISC_BCM2835_THERMAL_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_BCM2835_THERMAL "bcm2835-thermal"
+
+OBJECT_DECLARE_SIMPLE_TYPE(Bcm2835ThermalState, BCM2835_THERMAL)
+
+struct Bcm2835ThermalState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    MemoryRegion iomem;
+    uint32_t ctl;
+};
+
+#endif
diff --git a/include/hw/misc/cbus.h b/include/hw/misc/cbus.h
new file mode 100644 (file)
index 0000000..5334984
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * CBUS three-pin bus and the Retu / Betty / Tahvo / Vilma / Avilma /
+ * Hinku / Vinku / Ahne / Pihi chips used in various Nokia platforms.
+ * Based on reverse-engineering of a linux driver.
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Written by Andrzej Zaborowski
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_MISC_CBUS_H
+#define HW_MISC_CBUS_H
+
+
+typedef struct {
+    qemu_irq clk;
+    qemu_irq dat;
+    qemu_irq sel;
+} CBus;
+
+CBus *cbus_init(qemu_irq dat_out);
+void cbus_attach(CBus *bus, void *slave_opaque);
+
+void *retu_init(qemu_irq irq, int vilma);
+void *tahvo_init(qemu_irq irq, int betty);
+
+void retu_key_event(void *retu, int state);
+
+#endif
diff --git a/include/hw/misc/djmemc.h b/include/hw/misc/djmemc.h
new file mode 100644 (file)
index 0000000..82d4e4a
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * djMEMC, macintosh memory and interrupt controller
+ * (Quadra 610/650/800 & Centris 610/650)
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_MISC_DJMEMC_H
+#define HW_MISC_DJMEMC_H
+
+#include "hw/sysbus.h"
+
+#define DJMEMC_SIZE        0x2000
+#define DJMEMC_NUM_REGS    (0x38 / sizeof(uint32_t))
+
+#define DJMEMC_MAXBANKS    10
+
+struct DJMEMCState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mem_regs;
+
+    /* Memory controller */
+    uint32_t regs[DJMEMC_NUM_REGS];
+};
+
+#define TYPE_DJMEMC "djMEMC"
+OBJECT_DECLARE_SIMPLE_TYPE(DJMEMCState, DJMEMC);
+
+#endif
diff --git a/include/hw/misc/empty_slot.h b/include/hw/misc/empty_slot.h
new file mode 100644 (file)
index 0000000..dec56e5
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * QEMU Empty Slot
+ *
+ * The empty_slot device emulates known to a bus but not connected devices.
+ *
+ * Copyright (c) 2010 Artyom Tarasenko
+ *
+ * This code is licensed under the GNU GPL v2 or (at your option) any later
+ * version.
+ */
+
+#ifndef HW_EMPTY_SLOT_H
+#define HW_EMPTY_SLOT_H
+
+#include "exec/hwaddr.h"
+
+void empty_slot_init(const char *name, hwaddr addr, uint64_t slot_size);
+
+#endif
diff --git a/include/hw/misc/grlib_ahb_apb_pnp.h b/include/hw/misc/grlib_ahb_apb_pnp.h
new file mode 100644 (file)
index 0000000..bab0b5f
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * GRLIB AHB APB PNP
+ *
+ *  Copyright (C) 2019 AdaCore
+ *
+ *  Developed by :
+ *  Frederic Konrad   <frederic.konrad@adacore.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef GRLIB_AHB_APB_PNP_H
+#define GRLIB_AHB_APB_PNP_H
+#include "qom/object.h"
+
+#define TYPE_GRLIB_AHB_PNP "grlib-ahbpnp"
+OBJECT_DECLARE_SIMPLE_TYPE(AHBPnp, GRLIB_AHB_PNP)
+
+#define TYPE_GRLIB_APB_PNP "grlib-apbpnp"
+OBJECT_DECLARE_SIMPLE_TYPE(APBPnp, GRLIB_APB_PNP)
+
+void grlib_ahb_pnp_add_entry(AHBPnp *dev, uint32_t address, uint32_t mask,
+                             uint8_t vendor, uint16_t device, int slave,
+                             int type);
+void grlib_apb_pnp_add_entry(APBPnp *dev, uint32_t address, uint32_t mask,
+                             uint8_t vendor, uint16_t device, uint8_t version,
+                             uint8_t irq, int type);
+
+/* VENDORS */
+#define GRLIB_VENDOR_GAISLER (0x01)
+/* DEVICES */
+#define GRLIB_LEON3_DEV      (0x03)
+#define GRLIB_APBMST_DEV     (0x06)
+#define GRLIB_APBUART_DEV    (0x0C)
+#define GRLIB_IRQMP_DEV      (0x0D)
+#define GRLIB_GPTIMER_DEV    (0x11)
+/* TYPE */
+#define GRLIB_CPU_AREA       (0x00)
+#define GRLIB_APBIO_AREA     (0x01)
+#define GRLIB_AHBMEM_AREA    (0x02)
+
+#define GRLIB_AHB_MASTER     (0x00)
+#define GRLIB_AHB_SLAVE      (0x01)
+
+#endif /* GRLIB_AHB_APB_PNP_H */
diff --git a/include/hw/misc/imx25_ccm.h b/include/hw/misc/imx25_ccm.h
new file mode 100644 (file)
index 0000000..c3b8901
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * IMX25 Clock Control Module
+ *
+ * Copyright (C) 2012 NICTA
+ * Updated by Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX25_CCM_H
+#define IMX25_CCM_H
+
+#include "hw/misc/imx_ccm.h"
+#include "qom/object.h"
+
+#define IMX25_CCM_MPCTL_REG  0
+#define IMX25_CCM_UPCTL_REG  1
+#define IMX25_CCM_CCTL_REG   2
+#define IMX25_CCM_CGCR0_REG  3
+#define IMX25_CCM_CGCR1_REG  4
+#define IMX25_CCM_CGCR2_REG  5
+#define IMX25_CCM_PCDR0_REG  6
+#define IMX25_CCM_PCDR1_REG  7
+#define IMX25_CCM_PCDR2_REG  8
+#define IMX25_CCM_PCDR3_REG  9
+#define IMX25_CCM_RCSR_REG   10
+#define IMX25_CCM_CRDR_REG   11
+#define IMX25_CCM_DCVR0_REG  12
+#define IMX25_CCM_DCVR1_REG  13
+#define IMX25_CCM_DCVR2_REG  14
+#define IMX25_CCM_DCVR3_REG  15
+#define IMX25_CCM_LTR0_REG   16
+#define IMX25_CCM_LTR1_REG   17
+#define IMX25_CCM_LTR2_REG   18
+#define IMX25_CCM_LTR3_REG   19
+#define IMX25_CCM_LTBR0_REG  20
+#define IMX25_CCM_LTBR1_REG  21
+#define IMX25_CCM_PMCR0_REG  22
+#define IMX25_CCM_PMCR1_REG  23
+#define IMX25_CCM_PMCR2_REG  24
+#define IMX25_CCM_MCR_REG    25
+#define IMX25_CCM_LPIMR0_REG 26
+#define IMX25_CCM_LPIMR1_REG 27
+#define IMX25_CCM_MAX_REG    28
+
+/* CCTL */
+#define CCTL_ARM_CLK_DIV_SHIFT (30)
+#define CCTL_ARM_CLK_DIV_MASK  (0x3)
+#define CCTL_AHB_CLK_DIV_SHIFT (28)
+#define CCTL_AHB_CLK_DIV_MASK  (0x3)
+#define CCTL_MPLL_BYPASS_SHIFT (22)
+#define CCTL_MPLL_BYPASS_MASK  (0x1)
+#define CCTL_USB_DIV_SHIFT     (16)
+#define CCTL_USB_DIV_MASK      (0x3F)
+#define CCTL_ARM_SRC_SHIFT     (13)
+#define CCTL_ARM_SRC_MASK      (0x1)
+#define CCTL_UPLL_DIS_SHIFT    (23)
+#define CCTL_UPLL_DIS_MASK     (0x1)
+
+#define EXTRACT(value, name) (((value) >> CCTL_##name##_SHIFT) \
+                              & CCTL_##name##_MASK)
+#define INSERT(value, name) (((value) & CCTL_##name##_MASK) << \
+                             CCTL_##name##_SHIFT)
+
+#define TYPE_IMX25_CCM "imx25.ccm"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX25CCMState, IMX25_CCM)
+
+struct IMX25CCMState {
+    /* <private> */
+    IMXCCMState parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+
+    uint32_t reg[IMX25_CCM_MAX_REG];
+
+};
+
+#endif /* IMX25_CCM_H */
diff --git a/include/hw/misc/imx31_ccm.h b/include/hw/misc/imx31_ccm.h
new file mode 100644 (file)
index 0000000..18e08ee
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * IMX31 Clock Control Module
+ *
+ * Copyright (C) 2012 NICTA
+ * Updated by Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX31_CCM_H
+#define IMX31_CCM_H
+
+#include "hw/misc/imx_ccm.h"
+#include "qom/object.h"
+
+#define IMX31_CCM_CCMR_REG  0
+#define IMX31_CCM_PDR0_REG  1
+#define IMX31_CCM_PDR1_REG  2
+#define IMX31_CCM_RCSR_REG  3
+#define IMX31_CCM_MPCTL_REG 4
+#define IMX31_CCM_UPCTL_REG 5
+#define IMX31_CCM_SPCTL_REG 6
+#define IMX31_CCM_COSR_REG  7
+#define IMX31_CCM_CGR0_REG  8
+#define IMX31_CCM_CGR1_REG  9
+#define IMX31_CCM_CGR2_REG  10
+#define IMX31_CCM_WIMR_REG  11
+#define IMX31_CCM_LDC_REG   12
+#define IMX31_CCM_DCVR0_REG 13
+#define IMX31_CCM_DCVR1_REG 14
+#define IMX31_CCM_DCVR2_REG 15
+#define IMX31_CCM_DCVR3_REG 16
+#define IMX31_CCM_LTR0_REG  17
+#define IMX31_CCM_LTR1_REG  18
+#define IMX31_CCM_LTR2_REG  19
+#define IMX31_CCM_LTR3_REG  20
+#define IMX31_CCM_LTBR0_REG 21
+#define IMX31_CCM_LTBR1_REG 22
+#define IMX31_CCM_PMCR0_REG 23
+#define IMX31_CCM_PMCR1_REG 24
+#define IMX31_CCM_PDR2_REG  25
+#define IMX31_CCM_MAX_REG   26
+
+/* CCMR */
+#define CCMR_FPME    (1<<0)
+#define CCMR_MPE     (1<<3)
+#define CCMR_MDS     (1<<7)
+#define CCMR_FPMF    (1<<26)
+#define CCMR_PRCS    (3<<1)
+
+#define PMCR0_DFSUP1 (1<<31)
+
+/* PDR0 */
+#define PDR0_MCU_PODF_SHIFT (0)
+#define PDR0_MCU_PODF_MASK (0x7)
+#define PDR0_MAX_PODF_SHIFT (3)
+#define PDR0_MAX_PODF_MASK (0x7)
+#define PDR0_IPG_PODF_SHIFT (6)
+#define PDR0_IPG_PODF_MASK (0x3)
+#define PDR0_NFC_PODF_SHIFT (8)
+#define PDR0_NFC_PODF_MASK (0x7)
+#define PDR0_HSP_PODF_SHIFT (11)
+#define PDR0_HSP_PODF_MASK (0x7)
+#define PDR0_PER_PODF_SHIFT (16)
+#define PDR0_PER_PODF_MASK (0x1f)
+#define PDR0_CSI_PODF_SHIFT (23)
+#define PDR0_CSI_PODF_MASK (0x1ff)
+
+#define EXTRACT(value, name) (((value) >> PDR0_##name##_PODF_SHIFT) \
+                              & PDR0_##name##_PODF_MASK)
+#define INSERT(value, name) (((value) & PDR0_##name##_PODF_MASK) << \
+                             PDR0_##name##_PODF_SHIFT)
+
+#define TYPE_IMX31_CCM "imx31.ccm"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX31CCMState, IMX31_CCM)
+
+struct IMX31CCMState {
+    /* <private> */
+    IMXCCMState parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+
+    uint32_t reg[IMX31_CCM_MAX_REG];
+
+};
+
+#endif /* IMX31_CCM_H */
diff --git a/include/hw/misc/imx6_ccm.h b/include/hw/misc/imx6_ccm.h
new file mode 100644 (file)
index 0000000..ccf46d7
--- /dev/null
@@ -0,0 +1,198 @@
+/*
+ * IMX6 Clock Control Module
+ *
+ * Copyright (C) 2012 NICTA
+ * Updated by Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX6_CCM_H
+#define IMX6_CCM_H
+
+#include "hw/misc/imx_ccm.h"
+#include "qemu/bitops.h"
+#include "qom/object.h"
+
+#define CCM_CCR 0
+#define CCM_CCDR 1
+#define CCM_CSR 2
+#define CCM_CCSR 3
+#define CCM_CACRR 4
+#define CCM_CBCDR 5
+#define CCM_CBCMR 6
+#define CCM_CSCMR1 7
+#define CCM_CSCMR2 8
+#define CCM_CSCDR1 9
+#define CCM_CS1CDR 10
+#define CCM_CS2CDR 11
+#define CCM_CDCDR 12
+#define CCM_CHSCCDR 13
+#define CCM_CSCDR2 14
+#define CCM_CSCDR3 15
+#define CCM_CDHIPR 18
+#define CCM_CTOR 20
+#define CCM_CLPCR 21
+#define CCM_CISR 22
+#define CCM_CIMR 23
+#define CCM_CCOSR 24
+#define CCM_CGPR 25
+#define CCM_CCGR0 26
+#define CCM_CCGR1 27
+#define CCM_CCGR2 28
+#define CCM_CCGR3 29
+#define CCM_CCGR4 30
+#define CCM_CCGR5 31
+#define CCM_CCGR6 32
+#define CCM_CMEOR 34
+#define CCM_MAX 35
+
+#define CCM_ANALOG_PLL_ARM 0
+#define CCM_ANALOG_PLL_ARM_SET 1
+#define CCM_ANALOG_PLL_ARM_CLR 2
+#define CCM_ANALOG_PLL_ARM_TOG 3
+#define CCM_ANALOG_PLL_USB1 4
+#define CCM_ANALOG_PLL_USB1_SET 5
+#define CCM_ANALOG_PLL_USB1_CLR 6
+#define CCM_ANALOG_PLL_USB1_TOG 7
+#define CCM_ANALOG_PLL_USB2 8
+#define CCM_ANALOG_PLL_USB2_SET 9
+#define CCM_ANALOG_PLL_USB2_CLR 10
+#define CCM_ANALOG_PLL_USB2_TOG 11
+#define CCM_ANALOG_PLL_SYS 12
+#define CCM_ANALOG_PLL_SYS_SET 13
+#define CCM_ANALOG_PLL_SYS_CLR 14
+#define CCM_ANALOG_PLL_SYS_TOG 15
+#define CCM_ANALOG_PLL_SYS_SS 16
+#define CCM_ANALOG_PLL_SYS_NUM 20
+#define CCM_ANALOG_PLL_SYS_DENOM 24
+#define CCM_ANALOG_PLL_AUDIO 28
+#define CCM_ANALOG_PLL_AUDIO_SET 29
+#define CCM_ANALOG_PLL_AUDIO_CLR 30
+#define CCM_ANALOG_PLL_AUDIO_TOG 31
+#define CCM_ANALOG_PLL_AUDIO_NUM 32
+#define CCM_ANALOG_PLL_AUDIO_DENOM 36
+#define CCM_ANALOG_PLL_VIDEO 40
+#define CCM_ANALOG_PLL_VIDEO_SET 41
+#define CCM_ANALOG_PLL_VIDEO_CLR 42
+#define CCM_ANALOG_PLL_VIDEO_TOG 44
+#define CCM_ANALOG_PLL_VIDEO_NUM 46
+#define CCM_ANALOG_PLL_VIDEO_DENOM 48
+#define CCM_ANALOG_PLL_MLB 52
+#define CCM_ANALOG_PLL_MLB_SET 53
+#define CCM_ANALOG_PLL_MLB_CLR 54
+#define CCM_ANALOG_PLL_MLB_TOG 55
+#define CCM_ANALOG_PLL_ENET 56
+#define CCM_ANALOG_PLL_ENET_SET 57
+#define CCM_ANALOG_PLL_ENET_CLR 58
+#define CCM_ANALOG_PLL_ENET_TOG 59
+#define CCM_ANALOG_PFD_480 60
+#define CCM_ANALOG_PFD_480_SET 61
+#define CCM_ANALOG_PFD_480_CLR 62
+#define CCM_ANALOG_PFD_480_TOG 63
+#define CCM_ANALOG_PFD_528 64
+#define CCM_ANALOG_PFD_528_SET 65
+#define CCM_ANALOG_PFD_528_CLR 66
+#define CCM_ANALOG_PFD_528_TOG 67
+
+/* PMU registers */
+#define PMU_REG_1P1 68
+#define PMU_REG_3P0 72
+#define PMU_REG_2P5 76
+#define PMU_REG_CORE 80
+
+#define CCM_ANALOG_MISC0 84
+#define PMU_MISC0 84
+#define CCM_ANALOG_MISC0_SET 85
+#define CCM_ANALOG_MISC0_CLR 86
+#define CCM_ANALOG_MISC0_TOG 87
+
+#define PMU_MISC1 88
+#define PMU_MISC1_SET 89
+#define PMU_MISC1_CLR 90
+#define PMU_MISC1_TOG 91
+
+#define CCM_ANALOG_MISC2 92
+#define PMU_MISC2 92
+#define CCM_ANALOG_MISC2_SET 93
+#define CCM_ANALOG_MISC2_CLR 94
+#define CCM_ANALOG_MISC2_TOG 95
+
+#define USB_ANALOG_USB1_VBUS_DETECT 104
+#define USB_ANALOG_USB1_VBUS_DETECT_SET 105
+#define USB_ANALOG_USB1_VBUS_DETECT_CLR 106
+#define USB_ANALOG_USB1_VBUS_DETECT_TOG 107
+#define USB_ANALOG_USB1_CHRG_DETECT 108
+#define USB_ANALOG_USB1_CHRG_DETECT_SET 109
+#define USB_ANALOG_USB1_CHRG_DETECT_CLR 110
+#define USB_ANALOG_USB1_CHRG_DETECT_TOG 111
+#define USB_ANALOG_USB1_VBUS_DETECT_STAT 112
+#define USB_ANALOG_USB1_CHRG_DETECT_STAT 116
+#define USB_ANALOG_USB1_MISC 124
+#define USB_ANALOG_USB1_MISC_SET 125
+#define USB_ANALOG_USB1_MISC_CLR 126
+#define USB_ANALOG_USB1_MISC_TOG 127
+#define USB_ANALOG_USB2_VBUS_DETECT 128
+#define USB_ANALOG_USB2_VBUS_DETECT_SET 129
+#define USB_ANALOG_USB2_VBUS_DETECT_CLR 130
+#define USB_ANALOG_USB2_VBUS_DETECT_TOG 131
+#define USB_ANALOG_USB2_CHRG_DETECT 132
+#define USB_ANALOG_USB2_CHRG_DETECT_SET 133
+#define USB_ANALOG_USB2_CHRG_DETECT_CLR 134
+#define USB_ANALOG_USB2_CHRG_DETECT_TOG 135
+#define USB_ANALOG_USB2_VBUS_DETECT_STAT 136
+#define USB_ANALOG_USB2_CHRG_DETECT_STAT 140
+#define USB_ANALOG_USB2_MISC 148
+#define USB_ANALOG_USB2_MISC_SET 149
+#define USB_ANALOG_USB2_MISC_CLR 150
+#define USB_ANALOG_USB2_MISC_TOG 151
+#define USB_ANALOG_DIGPROG 152
+#define CCM_ANALOG_MAX 153
+
+/* CCM_CBCMR */
+#define PRE_PERIPH_CLK_SEL_SHIFT  (18)
+#define PRE_PERIPH_CLK_SEL_LENGTH (2)
+
+/* CCM_CBCDR */
+#define AHB_PODF_SHIFT           (10)
+#define AHB_PODF_LENGTH          (3)
+#define IPG_PODF_SHIFT           (8)
+#define IPG_PODF_LENGTH          (2)
+
+/* CCM_CSCMR1 */
+#define PERCLK_PODF_SHIFT        (0)
+#define PERCLK_PODF_LENGTH       (6)
+
+/* CCM_ANALOG_PFD_528 */
+#define PFD0_FRAC_SHIFT          (0)
+#define PFD0_FRAC_LENGTH         (6)
+#define PFD2_FRAC_SHIFT          (16)
+#define PFD2_FRAC_LENGTH         (6)
+
+/* CCM_ANALOG_PLL_SYS */
+#define DIV_SELECT_SHIFT         (0)
+#define DIV_SELECT_LENGTH        (1)
+
+#define CCM_ANALOG_PLL_LOCK      (1 << 31);
+
+#define EXTRACT(value, name) extract32(value, name##_SHIFT, name##_LENGTH)
+
+#define TYPE_IMX6_CCM "imx6.ccm"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX6CCMState, IMX6_CCM)
+
+struct IMX6CCMState {
+    /* <private> */
+    IMXCCMState parent_obj;
+
+    /* <public> */
+    MemoryRegion container;
+    MemoryRegion ioccm;
+    MemoryRegion ioanalog;
+
+    uint32_t ccm[CCM_MAX];
+    uint32_t analog[CCM_ANALOG_MAX];
+
+};
+
+#endif /* IMX6_CCM_H */
diff --git a/include/hw/misc/imx6_src.h b/include/hw/misc/imx6_src.h
new file mode 100644 (file)
index 0000000..f380da3
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * IMX6 System Reset Controller
+ *
+ * Copyright (C) 2012 NICTA
+ * Updated by Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX6_SRC_H
+#define IMX6_SRC_H
+
+#include "hw/sysbus.h"
+#include "qemu/bitops.h"
+#include "qom/object.h"
+
+#define SRC_SCR 0
+#define SRC_SBMR1 1
+#define SRC_SRSR 2
+#define SRC_SISR 5
+#define SRC_SIMR 6
+#define SRC_SBMR2 7
+#define SRC_GPR1 8
+#define SRC_GPR2 9
+#define SRC_GPR3 10
+#define SRC_GPR4 11
+#define SRC_GPR5 12
+#define SRC_GPR6 13
+#define SRC_GPR7 14
+#define SRC_GPR8 15
+#define SRC_GPR9 16
+#define SRC_GPR10 17
+#define SRC_MAX 18
+
+/* SRC_SCR */
+#define CORE3_ENABLE_SHIFT     24
+#define CORE3_ENABLE_LENGTH    1
+#define CORE2_ENABLE_SHIFT     23
+#define CORE2_ENABLE_LENGTH    1
+#define CORE1_ENABLE_SHIFT     22
+#define CORE1_ENABLE_LENGTH    1
+#define CORE3_RST_SHIFT        16
+#define CORE3_RST_LENGTH       1
+#define CORE2_RST_SHIFT        15
+#define CORE2_RST_LENGTH       1
+#define CORE1_RST_SHIFT        14
+#define CORE1_RST_LENGTH       1
+#define CORE0_RST_SHIFT        13
+#define CORE0_RST_LENGTH       1
+#define SW_IPU1_RST_SHIFT      3
+#define SW_IPU1_RST_LENGTH     1
+#define SW_IPU2_RST_SHIFT      12
+#define SW_IPU2_RST_LENGTH     1
+#define WARM_RST_ENABLE_SHIFT  0
+#define WARM_RST_ENABLE_LENGTH 1
+
+#define EXTRACT(value, name) extract32(value, name##_SHIFT, name##_LENGTH)
+
+#define TYPE_IMX6_SRC "imx6.src"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX6SRCState, IMX6_SRC)
+
+struct IMX6SRCState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+
+    uint32_t regs[SRC_MAX];
+
+};
+
+#endif /* IMX6_SRC_H */
diff --git a/include/hw/misc/imx6ul_ccm.h b/include/hw/misc/imx6ul_ccm.h
new file mode 100644 (file)
index 0000000..edb5f78
--- /dev/null
@@ -0,0 +1,227 @@
+/*
+ * IMX6UL Clock Control Module
+ *
+ * Copyright (C) 2018 by Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX6UL_CCM_H
+#define IMX6UL_CCM_H
+
+#include "hw/misc/imx_ccm.h"
+#include "qemu/bitops.h"
+#include "qom/object.h"
+
+#define CCM_CCR 0
+#define CCM_CCDR 1
+#define CCM_CSR 2
+#define CCM_CCSR 3
+#define CCM_CACRR 4
+#define CCM_CBCDR 5
+#define CCM_CBCMR 6
+#define CCM_CSCMR1 7
+#define CCM_CSCMR2 8
+#define CCM_CSCDR1 9
+#define CCM_CS1CDR 10
+#define CCM_CS2CDR 11
+#define CCM_CDCDR 12
+#define CCM_CHSCCDR 13
+#define CCM_CSCDR2 14
+#define CCM_CSCDR3 15
+#define CCM_CDHIPR 18
+#define CCM_CTOR 20
+#define CCM_CLPCR 21
+#define CCM_CISR 22
+#define CCM_CIMR 23
+#define CCM_CCOSR 24
+#define CCM_CGPR 25
+#define CCM_CCGR0 26
+#define CCM_CCGR1 27
+#define CCM_CCGR2 28
+#define CCM_CCGR3 29
+#define CCM_CCGR4 30
+#define CCM_CCGR5 31
+#define CCM_CCGR6 32
+#define CCM_CMEOR 34
+#define CCM_MAX 35
+
+#define CCM_ANALOG_PLL_ARM 0
+#define CCM_ANALOG_PLL_ARM_SET 1
+#define CCM_ANALOG_PLL_ARM_CLR 2
+#define CCM_ANALOG_PLL_ARM_TOG 3
+#define CCM_ANALOG_PLL_USB1 4
+#define CCM_ANALOG_PLL_USB1_SET 5
+#define CCM_ANALOG_PLL_USB1_CLR 6
+#define CCM_ANALOG_PLL_USB1_TOG 7
+#define CCM_ANALOG_PLL_USB2 8
+#define CCM_ANALOG_PLL_USB2_SET 9
+#define CCM_ANALOG_PLL_USB2_CLR 10
+#define CCM_ANALOG_PLL_USB2_TOG 11
+#define CCM_ANALOG_PLL_SYS 12
+#define CCM_ANALOG_PLL_SYS_SET 13
+#define CCM_ANALOG_PLL_SYS_CLR 14
+#define CCM_ANALOG_PLL_SYS_TOG 15
+#define CCM_ANALOG_PLL_SYS_SS 16
+#define CCM_ANALOG_PLL_SYS_NUM 20
+#define CCM_ANALOG_PLL_SYS_DENOM 24
+#define CCM_ANALOG_PLL_AUDIO 28
+#define CCM_ANALOG_PLL_AUDIO_SET 29
+#define CCM_ANALOG_PLL_AUDIO_CLR 30
+#define CCM_ANALOG_PLL_AUDIO_TOG 31
+#define CCM_ANALOG_PLL_AUDIO_NUM 32
+#define CCM_ANALOG_PLL_AUDIO_DENOM 36
+#define CCM_ANALOG_PLL_VIDEO 40
+#define CCM_ANALOG_PLL_VIDEO_SET 41
+#define CCM_ANALOG_PLL_VIDEO_CLR 42
+#define CCM_ANALOG_PLL_VIDEO_TOG 44
+#define CCM_ANALOG_PLL_VIDEO_NUM 46
+#define CCM_ANALOG_PLL_VIDEO_DENOM 48
+#define CCM_ANALOG_PLL_ENET 56
+#define CCM_ANALOG_PLL_ENET_SET 57
+#define CCM_ANALOG_PLL_ENET_CLR 58
+#define CCM_ANALOG_PLL_ENET_TOG 59
+#define CCM_ANALOG_PFD_480 60
+#define CCM_ANALOG_PFD_480_SET 61
+#define CCM_ANALOG_PFD_480_CLR 62
+#define CCM_ANALOG_PFD_480_TOG 63
+#define CCM_ANALOG_PFD_528 64
+#define CCM_ANALOG_PFD_528_SET 65
+#define CCM_ANALOG_PFD_528_CLR 66
+#define CCM_ANALOG_PFD_528_TOG 67
+
+/* PMU registers */
+#define PMU_REG_1P1 68
+#define PMU_REG_3P0 72
+#define PMU_REG_2P5 76
+#define PMU_REG_CORE 80
+
+#define CCM_ANALOG_MISC0 84
+#define PMU_MISC0 CCM_ANALOG_MISC0
+#define CCM_ANALOG_MISC0_SET 85
+#define PMU_MISC0_SET CCM_ANALOG_MISC0_SET
+#define CCM_ANALOG_MISC0_CLR 86
+#define PMU_MISC0_CLR CCM_ANALOG_MISC0_CLR
+#define CCM_ANALOG_MISC0_TOG 87
+#define PMU_MISC0_TOG CCM_ANALOG_MISC0_TOG
+
+#define CCM_ANALOG_MISC1 88
+#define PMU_MISC1 CCM_ANALOG_MISC1
+#define CCM_ANALOG_MISC1_SET 89
+#define PMU_MISC1_SET CCM_ANALOG_MISC1_SET
+#define CCM_ANALOG_MISC1_CLR 90
+#define PMU_MISC1_CLR CCM_ANALOG_MISC1_CLR
+#define CCM_ANALOG_MISC1_TOG 91
+#define PMU_MISC1_TOG CCM_ANALOG_MISC1_TOG
+
+#define CCM_ANALOG_MISC2 92
+#define PMU_MISC2 CCM_ANALOG_MISC2
+#define CCM_ANALOG_MISC2_SET 93
+#define PMU_MISC2_SET CCM_ANALOG_MISC2_SET
+#define CCM_ANALOG_MISC2_CLR 94
+#define PMU_MISC2_CLR CCM_ANALOG_MISC2_CLR
+#define CCM_ANALOG_MISC2_TOG 95
+#define PMU_MISC2_TOG CCM_ANALOG_MISC2_TOG
+
+#define TEMPMON_TEMPSENSE0 96
+#define TEMPMON_TEMPSENSE0_SET 97
+#define TEMPMON_TEMPSENSE0_CLR 98
+#define TEMPMON_TEMPSENSE0_TOG 99
+#define TEMPMON_TEMPSENSE1 100
+#define TEMPMON_TEMPSENSE1_SET 101
+#define TEMPMON_TEMPSENSE1_CLR 102
+#define TEMPMON_TEMPSENSE1_TOG 103
+#define TEMPMON_TEMPSENSE2 164
+#define TEMPMON_TEMPSENSE2_SET 165
+#define TEMPMON_TEMPSENSE2_CLR 166
+#define TEMPMON_TEMPSENSE2_TOG 167
+
+#define PMU_LOWPWR_CTRL 155
+#define PMU_LOWPWR_CTRL_SET 156
+#define PMU_LOWPWR_CTRL_CLR 157
+#define PMU_LOWPWR_CTRL_TOG 158
+
+#define USB_ANALOG_USB1_VBUS_DETECT 104
+#define USB_ANALOG_USB1_VBUS_DETECT_SET 105
+#define USB_ANALOG_USB1_VBUS_DETECT_CLR 106
+#define USB_ANALOG_USB1_VBUS_DETECT_TOG 107
+#define USB_ANALOG_USB1_CHRG_DETECT 108
+#define USB_ANALOG_USB1_CHRG_DETECT_SET 109
+#define USB_ANALOG_USB1_CHRG_DETECT_CLR 110
+#define USB_ANALOG_USB1_CHRG_DETECT_TOG 111
+#define USB_ANALOG_USB1_VBUS_DETECT_STAT 112
+#define USB_ANALOG_USB1_CHRG_DETECT_STAT 116
+#define USB_ANALOG_USB1_MISC 124
+#define USB_ANALOG_USB1_MISC_SET 125
+#define USB_ANALOG_USB1_MISC_CLR 126
+#define USB_ANALOG_USB1_MISC_TOG 127
+#define USB_ANALOG_USB2_VBUS_DETECT 128
+#define USB_ANALOG_USB2_VBUS_DETECT_SET 129
+#define USB_ANALOG_USB2_VBUS_DETECT_CLR 130
+#define USB_ANALOG_USB2_VBUS_DETECT_TOG 131
+#define USB_ANALOG_USB2_CHRG_DETECT 132
+#define USB_ANALOG_USB2_CHRG_DETECT_SET 133
+#define USB_ANALOG_USB2_CHRG_DETECT_CLR 134
+#define USB_ANALOG_USB2_CHRG_DETECT_TOG 135
+#define USB_ANALOG_USB2_VBUS_DETECT_STAT 136
+#define USB_ANALOG_USB2_CHRG_DETECT_STAT 140
+#define USB_ANALOG_USB2_MISC 148
+#define USB_ANALOG_USB2_MISC_SET 149
+#define USB_ANALOG_USB2_MISC_CLR 150
+#define USB_ANALOG_USB2_MISC_TOG 151
+#define USB_ANALOG_DIGPROG 152
+#define CCM_ANALOG_MAX 4096
+
+/* CCM_CBCMR */
+#define R_CBCMR_PRE_PERIPH_CLK_SEL_SHIFT  (18)
+#define R_CBCMR_PRE_PERIPH_CLK_SEL_LENGTH (2)
+#define R_CBCMR_PERIPH_CLK2_SEL_SHIFT    (12)
+#define R_CBCMR_PERIPH_CLK2_SEL_LENGTH   (2)
+
+/* CCM_CBCDR */
+#define R_CBCDR_AHB_PODF_SHIFT           (10)
+#define R_CBCDR_AHB_PODF_LENGTH          (3)
+#define R_CBCDR_IPG_PODF_SHIFT           (8)
+#define R_CBCDR_IPG_PODF_LENGTH          (2)
+#define R_CBCDR_PERIPH_CLK_SEL_SHIFT     (25)
+#define R_CBCDR_PERIPH_CLK_SEL_LENGTH    (1)
+#define R_CBCDR_PERIPH_CLK2_PODF_SHIFT   (27)
+#define R_CBCDR_PERIPH_CLK2_PODF_LENGTH  (3)
+
+/* CCM_CSCMR1 */
+#define R_CSCMR1_PERCLK_PODF_SHIFT        (0)
+#define R_CSCMR1_PERCLK_PODF_LENGTH       (6)
+#define R_CSCMR1_PERCLK_CLK_SEL_SHIFT     (6)
+#define R_CSCMR1_PERCLK_CLK_SEL_LENGTH    (1)
+
+/* CCM_ANALOG_PFD_528 */
+#define R_ANALOG_PFD_528_PFD0_FRAC_SHIFT          (0)
+#define R_ANALOG_PFD_528_PFD0_FRAC_LENGTH         (6)
+#define R_ANALOG_PFD_528_PFD2_FRAC_SHIFT          (16)
+#define R_ANALOG_PFD_528_PFD2_FRAC_LENGTH         (6)
+
+/* CCM_ANALOG_PLL_SYS */
+#define R_ANALOG_PLL_SYS_DIV_SELECT_SHIFT         (0)
+#define R_ANALOG_PLL_SYS_DIV_SELECT_LENGTH        (1)
+
+#define CCM_ANALOG_PLL_LOCK      (1 << 31);
+
+#define TYPE_IMX6UL_CCM "imx6ul.ccm"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX6ULCCMState, IMX6UL_CCM)
+
+struct IMX6ULCCMState {
+    /* <private> */
+    IMXCCMState parent_obj;
+
+    /* <public> */
+    MemoryRegion container;
+    MemoryRegion ioccm;
+    MemoryRegion ioanalog;
+
+    uint32_t ccm[CCM_MAX];
+    uint32_t analog[CCM_ANALOG_MAX];
+
+};
+
+#endif /* IMX6UL_CCM_H */
diff --git a/include/hw/misc/imx7_ccm.h b/include/hw/misc/imx7_ccm.h
new file mode 100644 (file)
index 0000000..dcaebfb
--- /dev/null
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2017, Impinj, Inc.
+ *
+ * i.MX7 CCM, PMU and ANALOG IP blocks emulation code
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX7_CCM_H
+#define IMX7_CCM_H
+
+#include "hw/misc/imx_ccm.h"
+#include "qemu/bitops.h"
+#include "qom/object.h"
+
+enum IMX7AnalogRegisters {
+    ANALOG_PLL_ARM,
+    ANALOG_PLL_ARM_SET,
+    ANALOG_PLL_ARM_CLR,
+    ANALOG_PLL_ARM_TOG,
+    ANALOG_PLL_DDR,
+    ANALOG_PLL_DDR_SET,
+    ANALOG_PLL_DDR_CLR,
+    ANALOG_PLL_DDR_TOG,
+    ANALOG_PLL_DDR_SS,
+    ANALOG_PLL_DDR_SS_SET,
+    ANALOG_PLL_DDR_SS_CLR,
+    ANALOG_PLL_DDR_SS_TOG,
+    ANALOG_PLL_DDR_NUM,
+    ANALOG_PLL_DDR_NUM_SET,
+    ANALOG_PLL_DDR_NUM_CLR,
+    ANALOG_PLL_DDR_NUM_TOG,
+    ANALOG_PLL_DDR_DENOM,
+    ANALOG_PLL_DDR_DENOM_SET,
+    ANALOG_PLL_DDR_DENOM_CLR,
+    ANALOG_PLL_DDR_DENOM_TOG,
+    ANALOG_PLL_480,
+    ANALOG_PLL_480_SET,
+    ANALOG_PLL_480_CLR,
+    ANALOG_PLL_480_TOG,
+    ANALOG_PLL_480A,
+    ANALOG_PLL_480A_SET,
+    ANALOG_PLL_480A_CLR,
+    ANALOG_PLL_480A_TOG,
+    ANALOG_PLL_480B,
+    ANALOG_PLL_480B_SET,
+    ANALOG_PLL_480B_CLR,
+    ANALOG_PLL_480B_TOG,
+    ANALOG_PLL_ENET,
+    ANALOG_PLL_ENET_SET,
+    ANALOG_PLL_ENET_CLR,
+    ANALOG_PLL_ENET_TOG,
+    ANALOG_PLL_AUDIO,
+    ANALOG_PLL_AUDIO_SET,
+    ANALOG_PLL_AUDIO_CLR,
+    ANALOG_PLL_AUDIO_TOG,
+    ANALOG_PLL_AUDIO_SS,
+    ANALOG_PLL_AUDIO_SS_SET,
+    ANALOG_PLL_AUDIO_SS_CLR,
+    ANALOG_PLL_AUDIO_SS_TOG,
+    ANALOG_PLL_AUDIO_NUM,
+    ANALOG_PLL_AUDIO_NUM_SET,
+    ANALOG_PLL_AUDIO_NUM_CLR,
+    ANALOG_PLL_AUDIO_NUM_TOG,
+    ANALOG_PLL_AUDIO_DENOM,
+    ANALOG_PLL_AUDIO_DENOM_SET,
+    ANALOG_PLL_AUDIO_DENOM_CLR,
+    ANALOG_PLL_AUDIO_DENOM_TOG,
+    ANALOG_PLL_VIDEO,
+    ANALOG_PLL_VIDEO_SET,
+    ANALOG_PLL_VIDEO_CLR,
+    ANALOG_PLL_VIDEO_TOG,
+    ANALOG_PLL_VIDEO_SS,
+    ANALOG_PLL_VIDEO_SS_SET,
+    ANALOG_PLL_VIDEO_SS_CLR,
+    ANALOG_PLL_VIDEO_SS_TOG,
+    ANALOG_PLL_VIDEO_NUM,
+    ANALOG_PLL_VIDEO_NUM_SET,
+    ANALOG_PLL_VIDEO_NUM_CLR,
+    ANALOG_PLL_VIDEO_NUM_TOG,
+    ANALOG_PLL_VIDEO_DENOM,
+    ANALOG_PLL_VIDEO_DENOM_SET,
+    ANALOG_PLL_VIDEO_DENOM_CLR,
+    ANALOG_PLL_VIDEO_DENOM_TOG,
+    ANALOG_PLL_MISC0,
+    ANALOG_PLL_MISC0_SET,
+    ANALOG_PLL_MISC0_CLR,
+    ANALOG_PLL_MISC0_TOG,
+
+    ANALOG_DIGPROG = 0x800 / sizeof(uint32_t),
+    ANALOG_MAX,
+
+    ANALOG_PLL_LOCK = BIT(31)
+};
+
+enum IMX7CCMRegisters {
+    CCM_MAX = 0xBE00 / sizeof(uint32_t) + 1,
+};
+
+enum IMX7PMURegisters {
+    PMU_MAX = 0x140 / sizeof(uint32_t),
+};
+
+#define TYPE_IMX7_CCM "imx7.ccm"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX7CCMState, IMX7_CCM)
+
+struct IMX7CCMState {
+    /* <private> */
+    IMXCCMState parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+
+    uint32_t ccm[CCM_MAX];
+};
+
+
+#define TYPE_IMX7_ANALOG "imx7.analog"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX7AnalogState, IMX7_ANALOG)
+
+struct IMX7AnalogState {
+    /* <private> */
+    IMXCCMState parent_obj;
+
+    /* <public> */
+    struct {
+        MemoryRegion container;
+        MemoryRegion analog;
+        MemoryRegion digprog;
+        MemoryRegion pmu;
+    } mmio;
+
+    uint32_t analog[ANALOG_MAX];
+    uint32_t pmu[PMU_MAX];
+};
+
+#endif /* IMX7_CCM_H */
diff --git a/include/hw/misc/imx7_gpr.h b/include/hw/misc/imx7_gpr.h
new file mode 100644 (file)
index 0000000..df364bd
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017, Impinj, Inc.
+ *
+ * i.MX7 GPR IP block emulation code
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX7_GPR_H
+#define IMX7_GPR_H
+
+#include "qemu/bitops.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_IMX7_GPR "imx7.gpr"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX7GPRState, IMX7_GPR)
+
+struct IMX7GPRState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    MemoryRegion mmio;
+};
+
+#endif /* IMX7_GPR_H */
diff --git a/include/hw/misc/imx7_snvs.h b/include/hw/misc/imx7_snvs.h
new file mode 100644 (file)
index 0000000..14a1d6f
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017, Impinj, Inc.
+ *
+ * i.MX7 SNVS block emulation code
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX7_SNVS_H
+#define IMX7_SNVS_H
+
+#include "qemu/bitops.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+
+enum IMX7SNVSRegisters {
+    SNVS_LPCR = 0x38,
+    SNVS_LPCR_TOP   = BIT(6),
+    SNVS_LPCR_DP_EN = BIT(5)
+};
+
+#define TYPE_IMX7_SNVS "imx7.snvs"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX7SNVSState, IMX7_SNVS)
+
+struct IMX7SNVSState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    MemoryRegion mmio;
+};
+
+#endif /* IMX7_SNVS_H */
diff --git a/include/hw/misc/imx7_src.h b/include/hw/misc/imx7_src.h
new file mode 100644 (file)
index 0000000..b4b97dc
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * IMX7 System Reset Controller
+ *
+ * Copyright (C) 2023 Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX7_SRC_H
+#define IMX7_SRC_H
+
+#include "hw/sysbus.h"
+#include "qemu/bitops.h"
+#include "qom/object.h"
+
+#define SRC_SCR 0
+#define SRC_A7RCR0 1
+#define SRC_A7RCR1 2
+#define SRC_M4RCR 3
+#define SRC_ERCR 5
+#define SRC_HSICPHY_RCR 7
+#define SRC_USBOPHY1_RCR 8
+#define SRC_USBOPHY2_RCR 9
+#define SRC_MPIPHY_RCR 10
+#define SRC_PCIEPHY_RCR 11
+#define SRC_SBMR1 22
+#define SRC_SRSR 23
+#define SRC_SISR 26
+#define SRC_SIMR 27
+#define SRC_SBMR2 28
+#define SRC_GPR1 29
+#define SRC_GPR2 30
+#define SRC_GPR3 31
+#define SRC_GPR4 32
+#define SRC_GPR5 33
+#define SRC_GPR6 34
+#define SRC_GPR7 35
+#define SRC_GPR8 36
+#define SRC_GPR9 37
+#define SRC_GPR10 38
+#define SRC_MAX 39
+
+/* SRC_A7SCR1 */
+#define R_CORE1_ENABLE_SHIFT     1
+#define R_CORE1_ENABLE_LENGTH    1
+/* SRC_A7SCR0 */
+#define R_CORE1_RST_SHIFT        5
+#define R_CORE1_RST_LENGTH       1
+#define R_CORE0_RST_SHIFT        4
+#define R_CORE0_RST_LENGTH       1
+
+#define TYPE_IMX7_SRC "imx7.src"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX7SRCState, IMX7_SRC)
+
+struct IMX7SRCState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+
+    uint32_t regs[SRC_MAX];
+};
+
+#endif /* IMX7_SRC_H */
diff --git a/include/hw/misc/imx_ccm.h b/include/hw/misc/imx_ccm.h
new file mode 100644 (file)
index 0000000..7e5678e
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * IMX Clock Control Module base class
+ *
+ * Copyright (C) 2012 NICTA
+ * Updated by Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX_CCM_H
+#define IMX_CCM_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define CKIL_FREQ 32768 /* nominal 32khz clock */
+
+/* PLL control registers */
+#define PD(v) (((v) >> 26) & 0xf)
+#define MFD(v) (((v) >> 16) & 0x3ff)
+#define MFI(v) (((v) >> 10) & 0xf);
+#define MFN(v) ((v) & 0x3ff)
+
+#define PLL_PD(x)               (((x) & 0xf) << 26)
+#define PLL_MFD(x)              (((x) & 0x3ff) << 16)
+#define PLL_MFI(x)              (((x) & 0xf) << 10)
+#define PLL_MFN(x)              (((x) & 0x3ff) << 0)
+
+#define TYPE_IMX_CCM "imx.ccm"
+OBJECT_DECLARE_TYPE(IMXCCMState, IMXCCMClass, IMX_CCM)
+
+struct IMXCCMState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+
+};
+
+typedef enum  {
+    CLK_NONE,
+    CLK_IPG,
+    CLK_IPG_HIGH,
+    CLK_32k,
+    CLK_EXT,
+    CLK_HIGH_DIV,
+    CLK_HIGH,
+} IMXClk;
+
+struct IMXCCMClass {
+    /* <private> */
+    SysBusDeviceClass parent_class;
+
+    /* <public> */
+    uint32_t (*get_clock_frequency)(IMXCCMState *s, IMXClk clk);
+};
+
+uint32_t imx_ccm_calc_pll(uint32_t pllreg, uint32_t base_freq);
+
+uint32_t imx_ccm_get_clock_frequency(IMXCCMState *s, IMXClk clock);
+
+#endif /* IMX_CCM_H */
diff --git a/include/hw/misc/imx_rngc.h b/include/hw/misc/imx_rngc.h
new file mode 100644 (file)
index 0000000..34ad699
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Freescale i.MX RNGC emulation
+ *
+ * Copyright (C) 2020 Martin Kaiser <martin@kaiser.cx>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX_RNGC_H
+#define IMX_RNGC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_IMX_RNGC "imx.rngc"
+OBJECT_DECLARE_SIMPLE_TYPE(IMXRNGCState, IMX_RNGC)
+
+struct IMXRNGCState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion  iomem;
+
+    uint8_t op_self_test;
+    uint8_t op_seed;
+    uint8_t mask;
+    bool    auto_seed;
+
+    QEMUBH *self_test_bh;
+    QEMUBH *seed_bh;
+    qemu_irq irq;
+};
+
+#endif /* IMX_RNGC_H */
diff --git a/include/hw/misc/iosb.h b/include/hw/misc/iosb.h
new file mode 100644 (file)
index 0000000..377f8ca
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * QEMU IOSB emulation
+ *
+ * Copyright (c) 2019 Laurent Vivier
+ * Copyright (c) 2022 Mark Cave-Ayland
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_MEM_IOSB_H
+#define HW_MEM_IOSB_H
+
+#define IOSB_REGS 7
+
+struct IOSBState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mem_regs;
+    uint32_t regs[IOSB_REGS];
+};
+
+#define TYPE_IOSB "IOSB"
+OBJECT_DECLARE_SIMPLE_TYPE(IOSBState, IOSB);
+
+#endif
diff --git a/include/hw/misc/iotkit-secctl.h b/include/hw/misc/iotkit-secctl.h
new file mode 100644 (file)
index 0000000..79a3628
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * ARM IoT Kit security controller
+ *
+ * Copyright (c) 2018 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/* This is a model of the security controller which is part of the
+ * Arm IoT Kit and documented in
+ * https://developer.arm.com/documentation/ecm0601256/latest
+ *
+ * QEMU interface:
+ *  + sysbus MMIO region 0 is the "secure privilege control block" registers
+ *  + sysbus MMIO region 1 is the "non-secure privilege control block" registers
+ *  + named GPIO output "sec_resp_cfg" indicating whether blocked accesses
+ *    should RAZ/WI or bus error
+ *  + named GPIO output "nsc_cfg" whose value tracks the NSCCFG register value
+ *  + named GPIO output "msc_irq" for the combined IRQ line from the MSCs
+ * Controlling the 2 APB PPCs in the IoTKit:
+ *  + named GPIO outputs apb_ppc0_nonsec[0..2] and apb_ppc1_nonsec
+ *  + named GPIO outputs apb_ppc0_ap[0..2] and apb_ppc1_ap
+ *  + named GPIO outputs apb_ppc{0,1}_irq_enable
+ *  + named GPIO outputs apb_ppc{0,1}_irq_clear
+ *  + named GPIO inputs apb_ppc{0,1}_irq_status
+ * Controlling each of the 4 expansion APB PPCs which a system using the IoTKit
+ * might provide:
+ *  + named GPIO outputs apb_ppcexp{0,1,2,3}_nonsec[0..15]
+ *  + named GPIO outputs apb_ppcexp{0,1,2,3}_ap[0..15]
+ *  + named GPIO outputs apb_ppcexp{0,1,2,3}_irq_enable
+ *  + named GPIO outputs apb_ppcexp{0,1,2,3}_irq_clear
+ *  + named GPIO inputs apb_ppcexp{0,1,2,3}_irq_status
+ * Controlling each of the 4 expansion AHB PPCs which a system using the IoTKit
+ * might provide:
+ *  + named GPIO outputs ahb_ppcexp{0,1,2,3}_nonsec[0..15]
+ *  + named GPIO outputs ahb_ppcexp{0,1,2,3}_ap[0..15]
+ *  + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_enable
+ *  + named GPIO outputs ahb_ppcexp{0,1,2,3}_irq_clear
+ *  + named GPIO inputs ahb_ppcexp{0,1,2,3}_irq_status
+ * Controlling the (up to) 4 MPCs in the IoTKit/SSE:
+ *  + named GPIO inputs mpc_status[0..3]
+ * Controlling each of the 16 expansion MPCs which a system using the IoTKit
+ * might provide:
+ *  + named GPIO inputs mpcexp_status[0..15]
+ * Controlling each of the 16 expansion MSCs which a system using the IoTKit
+ * might provide:
+ *  + named GPIO inputs mscexp_status[0..15]
+ *  + named GPIO outputs mscexp_clear[0..15]
+ *  + named GPIO outputs mscexp_ns[0..15]
+ */
+
+#ifndef IOTKIT_SECCTL_H
+#define IOTKIT_SECCTL_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_IOTKIT_SECCTL "iotkit-secctl"
+OBJECT_DECLARE_SIMPLE_TYPE(IoTKitSecCtl, IOTKIT_SECCTL)
+
+#define IOTS_APB_PPC0_NUM_PORTS 3
+#define IOTS_APB_PPC1_NUM_PORTS 1
+#define IOTS_PPC_NUM_PORTS 16
+#define IOTS_NUM_APB_PPC 2
+#define IOTS_NUM_APB_EXP_PPC 4
+#define IOTS_NUM_AHB_EXP_PPC 4
+#define IOTS_NUM_EXP_MPC 16
+#define IOTS_NUM_MPC 4
+#define IOTS_NUM_EXP_MSC 16
+
+
+/* State and IRQ lines relating to a PPC. For the
+ * PPCs in the IoTKit not all the IRQ lines are used.
+ */
+typedef struct IoTKitSecCtlPPC {
+    qemu_irq nonsec[IOTS_PPC_NUM_PORTS];
+    qemu_irq ap[IOTS_PPC_NUM_PORTS];
+    qemu_irq irq_enable;
+    qemu_irq irq_clear;
+
+    uint32_t ns;
+    uint32_t sp;
+    uint32_t nsp;
+
+    /* Number of ports actually present */
+    int numports;
+    /* Offset of this PPC's interrupt bits in SECPPCINTSTAT */
+    int irq_bit_offset;
+    IoTKitSecCtl *parent;
+} IoTKitSecCtlPPC;
+
+struct IoTKitSecCtl {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    qemu_irq sec_resp_cfg;
+    qemu_irq nsc_cfg_irq;
+
+    MemoryRegion s_regs;
+    MemoryRegion ns_regs;
+
+    uint32_t secppcintstat;
+    uint32_t secppcinten;
+    uint32_t secrespcfg;
+    uint32_t nsccfg;
+    uint32_t brginten;
+    uint32_t mpcintstatus;
+
+    uint32_t secmscintstat;
+    uint32_t secmscinten;
+    uint32_t nsmscexp;
+    qemu_irq mscexp_clear[IOTS_NUM_EXP_MSC];
+    qemu_irq mscexp_ns[IOTS_NUM_EXP_MSC];
+    qemu_irq msc_irq;
+
+    IoTKitSecCtlPPC apb[IOTS_NUM_APB_PPC];
+    IoTKitSecCtlPPC apbexp[IOTS_NUM_APB_EXP_PPC];
+    IoTKitSecCtlPPC ahbexp[IOTS_NUM_APB_EXP_PPC];
+
+    uint32_t sse_version;
+};
+
+#endif
diff --git a/include/hw/misc/iotkit-sysctl.h b/include/hw/misc/iotkit-sysctl.h
new file mode 100644 (file)
index 0000000..481e27f
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * ARM IoTKit system control element
+ *
+ * Copyright (c) 2018 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "system control element" which is part of the
+ * Arm IoTKit and documented in
+ * https://developer.arm.com/documentation/ecm0601256/latest
+ * Specifically, it implements the "system information block" and
+ * "system control register" blocks.
+ *
+ * QEMU interface:
+ *  + QOM property "sse-version": indicates which SSE version this is part of
+ *    (used to identify whether to provide SSE-200-only registers, etc)
+ *  + sysbus MMIO region 0: the system information register bank
+ *  + sysbus MMIO region 1: the system control register bank
+ */
+
+#ifndef HW_MISC_IOTKIT_SYSCTL_H
+#define HW_MISC_IOTKIT_SYSCTL_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_IOTKIT_SYSCTL "iotkit-sysctl"
+OBJECT_DECLARE_SIMPLE_TYPE(IoTKitSysCtl, IOTKIT_SYSCTL)
+
+struct IoTKitSysCtl {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    uint32_t secure_debug;
+    uint32_t reset_syndrome;
+    uint32_t reset_mask;
+    uint32_t gretreg;
+    uint32_t initsvtor0;
+    uint32_t cpuwait;
+    uint32_t wicctrl;
+    uint32_t scsecctrl;
+    uint32_t fclk_div;
+    uint32_t sysclk_div;
+    uint32_t clock_force;
+    uint32_t initsvtor1;
+    uint32_t nmi_enable;
+    uint32_t ewctrl;
+    uint32_t pwrctrl;
+    uint32_t pdcm_pd_sys_sense;
+    uint32_t pdcm_pd_sram0_sense;
+    uint32_t pdcm_pd_sram1_sense;
+    uint32_t pdcm_pd_sram2_sense;
+    uint32_t pdcm_pd_sram3_sense;
+    uint32_t pdcm_pd_cpu0_sense;
+    uint32_t pdcm_pd_vmr0_sense;
+    uint32_t pdcm_pd_vmr1_sense;
+
+    /* Properties */
+    uint32_t sse_version;
+    uint32_t cpuwait_rst;
+    uint32_t initsvtor0_rst;
+    uint32_t initsvtor1_rst;
+};
+
+#endif
diff --git a/include/hw/misc/iotkit-sysinfo.h b/include/hw/misc/iotkit-sysinfo.h
new file mode 100644 (file)
index 0000000..91c23f9
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * ARM IoTKit system information block
+ *
+ * Copyright (c) 2018 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "system information block" which is part of the
+ * Arm IoTKit and documented in
+ * https://developer.arm.com/documentation/ecm0601256/latest
+ * QEMU interface:
+ *  + QOM property "SYS_VERSION": value to use for SYS_VERSION register
+ *  + QOM property "SYS_CONFIG": value to use for SYS_CONFIG register
+ *  + sysbus MMIO region 0: the system information register bank
+ */
+
+#ifndef HW_MISC_IOTKIT_SYSINFO_H
+#define HW_MISC_IOTKIT_SYSINFO_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_IOTKIT_SYSINFO "iotkit-sysinfo"
+OBJECT_DECLARE_SIMPLE_TYPE(IoTKitSysInfo, IOTKIT_SYSINFO)
+
+struct IoTKitSysInfo {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    /* Properties */
+    uint32_t sys_version;
+    uint32_t sys_config;
+    uint32_t sse_version;
+    uint32_t iidr;
+};
+
+#endif
diff --git a/include/hw/misc/ivshmem.h b/include/hw/misc/ivshmem.h
new file mode 100644 (file)
index 0000000..433ef53
--- /dev/null
@@ -0,0 +1,25 @@
+
+/*
+ * Inter-VM Shared Memory PCI device.
+ *
+ * Author:
+ *      Cam Macdonell <cam@cs.ualberta.ca>
+ *
+ * Based On: cirrus_vga.c
+ *          Copyright (c) 2004 Fabrice Bellard
+ *          Copyright (c) 2004 Makoto Suzuki (suzu)
+ *
+ *      and rtl8139.c
+ *          Copyright (c) 2006 Igor Kovalenko
+ *
+ * This code is licensed under the GNU GPL v2.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#ifndef IVSHMEM_H
+#define IVSHMEM_H
+
+#define IVSHMEM_PROTOCOL_VERSION 0
+
+#endif /* IVSHMEM_H */
diff --git a/include/hw/misc/lasi.h b/include/hw/misc/lasi.h
new file mode 100644 (file)
index 0000000..0a8c735
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * HP-PARISC Lasi chipset emulation.
+ *
+ * (C) 2019 by Helge Deller <deller@gmx.de>
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ * Documentation available at:
+ * https://parisc.wiki.kernel.org/images-parisc/7/79/Lasi_ers.pdf
+ */
+
+#ifndef LASI_H
+#define LASI_H
+
+#include "exec/address-spaces.h"
+#include "hw/pci/pci_host.h"
+#include "hw/boards.h"
+
+#define TYPE_LASI_CHIP "lasi-chip"
+OBJECT_DECLARE_SIMPLE_TYPE(LasiState, LASI_CHIP)
+
+#define LASI_IRR        0x00    /* RO */
+#define LASI_IMR        0x04
+#define LASI_IPR        0x08
+#define LASI_ICR        0x0c
+#define LASI_IAR        0x10
+
+#define LASI_LPT        0x02000
+#define LASI_UART       0x05000
+#define LASI_LAN        0x07000
+#define LASI_RTC        0x09000
+
+#define LASI_PCR        0x0C000 /* LASI Power Control register */
+#define LASI_ERRLOG     0x0C004 /* LASI Error Logging register */
+#define LASI_VER        0x0C008 /* LASI Version Control register */
+#define LASI_IORESET    0x0C00C /* LASI I/O Reset register */
+#define LASI_AMR        0x0C010 /* LASI Arbitration Mask register */
+#define LASI_IO_CONF    0x7FFFE /* LASI primary configuration register */
+#define LASI_IO_CONF2   0x7FFFF /* LASI secondary configuration register */
+
+#define LASI_BIT(x)     (1ul << (x))
+#define LASI_IRQ_BITS   (LASI_BIT(5) | LASI_BIT(7) | LASI_BIT(8) | LASI_BIT(9) \
+            | LASI_BIT(13) | LASI_BIT(14) | LASI_BIT(16) | LASI_BIT(17) \
+            | LASI_BIT(18) | LASI_BIT(19) | LASI_BIT(20) | LASI_BIT(21) \
+            | LASI_BIT(26))
+
+#define ICR_BUS_ERROR_BIT  LASI_BIT(8)  /* bit 8 in ICR */
+#define ICR_TOC_BIT        LASI_BIT(1)  /* bit 1 in ICR */
+
+#define LASI_IRQS           27
+
+#define LASI_IRQ_HPA        14
+#define LASI_IRQ_UART_HPA   5
+#define LASI_IRQ_LPT_HPA    7
+#define LASI_IRQ_LAN_HPA    8
+#define LASI_IRQ_SCSI_HPA   9
+#define LASI_IRQ_AUDIO_HPA  13
+#define LASI_IRQ_PS2KBD_HPA 26
+#define LASI_IRQ_PS2MOU_HPA 26
+
+struct LasiState {
+    PCIHostState parent_obj;
+
+    uint32_t irr;
+    uint32_t imr;
+    uint32_t ipr;
+    uint32_t icr;
+    uint32_t iar;
+
+    uint32_t errlog;
+    uint32_t amr;
+    uint32_t rtc_ref;
+
+    MemoryRegion this_mem;
+};
+
+#endif
diff --git a/include/hw/misc/led.h b/include/hw/misc/led.h
new file mode 100644 (file)
index 0000000..29c0879
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * QEMU single LED device
+ *
+ * Copyright (C) 2020 Philippe Mathieu-Daudé <f4bug@amsat.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef HW_MISC_LED_H
+#define HW_MISC_LED_H
+
+#include "qom/object.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_LED "led"
+
+/**
+ * LEDColor: Color of a LED
+ *
+ * This set is restricted to physically available LED colors.
+ *
+ * LED colors from 'Table 1. Product performance of LUXEON Rebel Color
+ * Line' of the 'DS68 LUXEON Rebel Color Line' datasheet available at:
+ * https://www.lumileds.com/products/color-leds/luxeon-rebel-color/
+ */
+typedef enum {          /* Coarse wavelength range */
+    LED_COLOR_VIOLET,   /* 425 nm */
+    LED_COLOR_BLUE,     /* 475 nm */
+    LED_COLOR_CYAN,     /* 500 nm */
+    LED_COLOR_GREEN,    /* 535 nm */
+    LED_COLOR_YELLOW,   /* 567 nm */
+    LED_COLOR_AMBER,    /* 590 nm */
+    LED_COLOR_ORANGE,   /* 615 nm */
+    LED_COLOR_RED,      /* 630 nm */
+} LEDColor;
+
+struct LEDState {
+    /* Private */
+    DeviceState parent_obj;
+    /* Public */
+
+    uint8_t intensity_percent;
+    qemu_irq irq;
+
+    /* Properties */
+    char *description;
+    char *color;
+    /*
+     * Determines whether a GPIO is using a positive (active-high)
+     * logic (when used with GPIO, the intensity at reset is related
+     * to the GPIO polarity).
+     */
+    bool gpio_active_high;
+};
+typedef struct LEDState LEDState;
+DECLARE_INSTANCE_CHECKER(LEDState, LED, TYPE_LED)
+
+/**
+ * led_set_intensity: Set the intensity of a LED device
+ * @s: the LED object
+ * @intensity_percent: intensity as percentage in range 0 to 100.
+ */
+void led_set_intensity(LEDState *s, unsigned intensity_percent);
+
+/**
+ * led_get_intensity:
+ * @s: the LED object
+ *
+ * Returns: The LED intensity as percentage in range 0 to 100.
+ */
+unsigned led_get_intensity(LEDState *s);
+
+/**
+ * led_set_state: Set the state of a LED device
+ * @s: the LED object
+ * @is_emitting: boolean indicating whether the LED is emitting
+ *
+ * This utility is meant for LED connected to GPIO.
+ */
+void led_set_state(LEDState *s, bool is_emitting);
+
+/**
+ * led_create_simple: Create and realize a LED device
+ * @parentobj: the parent object
+ * @gpio_polarity: GPIO polarity
+ * @color: color of the LED
+ * @description: description of the LED (optional)
+ *
+ * Create the device state structure, initialize it, and
+ * drop the reference to it (the device is realized).
+ *
+ * Returns: The newly allocated and instantiated LED object.
+ */
+LEDState *led_create_simple(Object *parentobj,
+                            GpioPolarity gpio_polarity,
+                            LEDColor color,
+                            const char *description);
+
+#endif /* HW_MISC_LED_H */
diff --git a/include/hw/misc/mac_via.h b/include/hw/misc/mac_via.h
new file mode 100644 (file)
index 0000000..63cdcf7
--- /dev/null
@@ -0,0 +1,115 @@
+/*
+ *
+ * Copyright (c) 2011-2018 Laurent Vivier
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_MISC_MAC_VIA_H
+#define HW_MISC_MAC_VIA_H
+
+#include "exec/memory.h"
+#include "hw/sysbus.h"
+#include "hw/misc/mos6522.h"
+#include "hw/input/adb.h"
+#include "qom/object.h"
+
+
+#define VIA_SIZE   0x2000
+
+/* VIA 1 */
+#define VIA1_IRQ_ONE_SECOND_BIT CA2_INT_BIT
+#define VIA1_IRQ_60HZ_BIT       CA1_INT_BIT
+#define VIA1_IRQ_ADB_READY_BIT  SR_INT_BIT
+#define VIA1_IRQ_ADB_DATA_BIT   CB2_INT_BIT
+#define VIA1_IRQ_ADB_CLOCK_BIT  CB1_INT_BIT
+
+#define VIA1_IRQ_ONE_SECOND     BIT(VIA1_IRQ_ONE_SECOND_BIT)
+#define VIA1_IRQ_60HZ           BIT(VIA1_IRQ_60HZ_BIT)
+#define VIA1_IRQ_ADB_READY      BIT(VIA1_IRQ_ADB_READY_BIT)
+#define VIA1_IRQ_ADB_DATA       BIT(VIA1_IRQ_ADB_DATA_BIT)
+#define VIA1_IRQ_ADB_CLOCK      BIT(VIA1_IRQ_ADB_CLOCK_BIT)
+
+
+#define TYPE_MOS6522_Q800_VIA1 "mos6522-q800-via1"
+OBJECT_DECLARE_SIMPLE_TYPE(MOS6522Q800VIA1State, MOS6522_Q800_VIA1)
+
+struct MOS6522Q800VIA1State {
+    /*< private >*/
+    MOS6522State parent_obj;
+
+    MemoryRegion via_mem;
+
+    qemu_irq auxmode_irq;
+    uint8_t last_b;
+
+    /* RTC */
+    uint8_t PRAM[256];
+    BlockBackend *blk;
+    VMChangeStateEntry *vmstate;
+
+    uint32_t tick_offset;
+
+    uint8_t data_out;
+    int data_out_cnt;
+    uint8_t data_in;
+    uint8_t data_in_cnt;
+    uint8_t cmd;
+    int wprotect;
+    int alt;
+
+    /* ADB */
+    ADBBusState adb_bus;
+    qemu_irq adb_data_ready;
+    int adb_data_in_size;
+    int adb_data_in_index;
+    int adb_data_out_index;
+    uint8_t adb_data_in[128];
+    uint8_t adb_data_out[16];
+    uint8_t adb_autopoll_cmd;
+
+    /* external timers */
+    QEMUTimer *one_second_timer;
+    int64_t next_second;
+    QEMUTimer *sixty_hz_timer;
+    int64_t next_sixty_hz;
+
+    /* SETUPTIMEK hack */
+    int timer_hack_state;
+};
+
+
+/* VIA 2 */
+#define VIA2_IRQ_SCSI_DATA_BIT  CA2_INT_BIT
+#define VIA2_IRQ_NUBUS_BIT      CA1_INT_BIT
+#define VIA2_IRQ_SCSI_BIT       CB2_INT_BIT
+#define VIA2_IRQ_ASC_BIT        CB1_INT_BIT
+
+#define VIA2_IRQ_SCSI_DATA      BIT(VIA2_IRQ_SCSI_DATA_BIT)
+#define VIA2_IRQ_NUBUS          BIT(VIA2_IRQ_NUBUS_BIT)
+#define VIA2_IRQ_UNUSED         BIT(VIA2_IRQ_SCSI_BIT)
+#define VIA2_IRQ_SCSI           BIT(VIA2_IRQ_UNUSED_BIT)
+#define VIA2_IRQ_ASC            BIT(VIA2_IRQ_ASC_BIT)
+
+#define VIA2_NUBUS_IRQ_NB       7
+
+#define VIA2_NUBUS_IRQ_9        0
+#define VIA2_NUBUS_IRQ_A        1
+#define VIA2_NUBUS_IRQ_B        2
+#define VIA2_NUBUS_IRQ_C        3
+#define VIA2_NUBUS_IRQ_D        4
+#define VIA2_NUBUS_IRQ_E        5
+#define VIA2_NUBUS_IRQ_INTVIDEO 6
+
+#define TYPE_MOS6522_Q800_VIA2 "mos6522-q800-via2"
+OBJECT_DECLARE_SIMPLE_TYPE(MOS6522Q800VIA2State, MOS6522_Q800_VIA2)
+
+struct MOS6522Q800VIA2State {
+    /*< private >*/
+    MOS6522State parent_obj;
+
+    MemoryRegion via_mem;
+};
+
+#endif
diff --git a/include/hw/misc/macio/cuda.h b/include/hw/misc/macio/cuda.h
new file mode 100644 (file)
index 0000000..8a6678c
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * QEMU PowerMac CUDA device support
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef CUDA_H
+#define CUDA_H
+
+#include "hw/input/adb.h"
+#include "hw/misc/mos6522.h"
+#include "qom/object.h"
+
+/* CUDA commands (2nd byte) */
+#define CUDA_WARM_START                0x0
+#define CUDA_AUTOPOLL                  0x1
+#define CUDA_GET_6805_ADDR             0x2
+#define CUDA_GET_TIME                  0x3
+#define CUDA_GET_PRAM                  0x7
+#define CUDA_SET_6805_ADDR             0x8
+#define CUDA_SET_TIME                  0x9
+#define CUDA_POWERDOWN                 0xa
+#define CUDA_POWERUP_TIME              0xb
+#define CUDA_SET_PRAM                  0xc
+#define CUDA_MS_RESET                  0xd
+#define CUDA_SEND_DFAC                 0xe
+#define CUDA_BATTERY_SWAP_SENSE        0x10
+#define CUDA_RESET_SYSTEM              0x11
+#define CUDA_SET_IPL                   0x12
+#define CUDA_FILE_SERVER_FLAG          0x13
+#define CUDA_SET_AUTO_RATE             0x14
+#define CUDA_GET_AUTO_RATE             0x16
+#define CUDA_SET_DEVICE_LIST           0x19
+#define CUDA_GET_DEVICE_LIST           0x1a
+#define CUDA_SET_ONE_SECOND_MODE       0x1b
+#define CUDA_SET_POWER_MESSAGES        0x21
+#define CUDA_GET_SET_IIC               0x22
+#define CUDA_WAKEUP                    0x23
+#define CUDA_TIMER_TICKLE              0x24
+#define CUDA_COMBINED_FORMAT_IIC       0x25
+
+
+/* MOS6522 CUDA */
+struct MOS6522CUDAState {
+    /*< private >*/
+    MOS6522State parent_obj;
+};
+
+#define TYPE_MOS6522_CUDA "mos6522-cuda"
+OBJECT_DECLARE_SIMPLE_TYPE(MOS6522CUDAState, MOS6522_CUDA)
+
+/* Cuda */
+#define TYPE_CUDA "cuda"
+OBJECT_DECLARE_SIMPLE_TYPE(CUDAState, CUDA)
+
+struct CUDAState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    MemoryRegion mem;
+
+    ADBBusState adb_bus;
+    MOS6522CUDAState mos6522_cuda;
+
+    uint32_t tick_offset;
+    uint64_t tb_frequency;
+
+    uint8_t last_b;
+    uint8_t last_acr;
+
+    /* MacOS 9 is racy and requires a delay upon setting the SR_INT bit */
+    uint64_t sr_delay_ns;
+    QEMUTimer *sr_delay_timer;
+
+    int data_in_size;
+    int data_in_index;
+    int data_out_index;
+
+    qemu_irq irq;
+    uint8_t data_in[128];
+    uint8_t data_out[16];
+};
+
+#endif /* CUDA_H */
diff --git a/include/hw/misc/macio/gpio.h b/include/hw/misc/macio/gpio.h
new file mode 100644 (file)
index 0000000..7d2aa88
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * PowerMac NewWorld MacIO GPIO emulation
+ *
+ * Copyright (c) 2016 Benjamin Herrenschmidt
+ * Copyright (c) 2018 Mark Cave-Ayland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MACIO_GPIO_H
+#define MACIO_GPIO_H
+
+#include "hw/ppc/openpic.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_MACIO_GPIO "macio-gpio"
+OBJECT_DECLARE_SIMPLE_TYPE(MacIOGPIOState, MACIO_GPIO)
+
+struct MacIOGPIOState {
+    /*< private >*/
+    SysBusDevice parent;
+    /*< public >*/
+
+    MemoryRegion gpiomem;
+    qemu_irq gpio_extirqs[10];
+    uint8_t gpio_levels[8];
+    uint8_t gpio_regs[36]; /* XXX Check count */
+};
+
+void macio_set_gpio(MacIOGPIOState *s, uint32_t gpio, bool state);
+
+#endif
diff --git a/include/hw/misc/macio/macio.h b/include/hw/misc/macio/macio.h
new file mode 100644 (file)
index 0000000..86df2c2
--- /dev/null
@@ -0,0 +1,144 @@
+/*
+ * PowerMac MacIO device emulation
+ *
+ * Copyright (c) 2005-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MACIO_H
+#define MACIO_H
+
+#include "hw/char/escc.h"
+#include "hw/pci/pci_device.h"
+#include "hw/ide/internal.h"
+#include "hw/intc/heathrow_pic.h"
+#include "hw/misc/macio/cuda.h"
+#include "hw/misc/macio/gpio.h"
+#include "hw/misc/macio/pmu.h"
+#include "hw/nvram/mac_nvram.h"
+#include "hw/ppc/mac_dbdma.h"
+#include "hw/ppc/openpic.h"
+#include "qom/object.h"
+
+/* Old World IRQs */
+#define OLDWORLD_CUDA_IRQ      0x12
+#define OLDWORLD_ESCCB_IRQ     0x10
+#define OLDWORLD_ESCCA_IRQ     0xf
+#define OLDWORLD_IDE0_IRQ      0xd
+#define OLDWORLD_IDE0_DMA_IRQ  0x2
+#define OLDWORLD_IDE1_IRQ      0xe
+#define OLDWORLD_IDE1_DMA_IRQ  0x3
+
+/* New World IRQs */
+#define NEWWORLD_CUDA_IRQ      0x19
+#define NEWWORLD_PMU_IRQ       0x19
+#define NEWWORLD_ESCCB_IRQ     0x24
+#define NEWWORLD_ESCCA_IRQ     0x25
+#define NEWWORLD_IDE0_IRQ      0xd
+#define NEWWORLD_IDE0_DMA_IRQ  0x2
+#define NEWWORLD_IDE1_IRQ      0xe
+#define NEWWORLD_IDE1_DMA_IRQ  0x3
+#define NEWWORLD_EXTING_GPIO1  0x2f
+#define NEWWORLD_EXTING_GPIO9  0x37
+
+/* MacIO virtual bus */
+#define TYPE_MACIO_BUS "macio-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(MacIOBusState, MACIO_BUS)
+
+struct MacIOBusState {
+    /*< private >*/
+    BusState parent_obj;
+};
+
+/* MacIO IDE */
+#define TYPE_MACIO_IDE "macio-ide"
+OBJECT_DECLARE_SIMPLE_TYPE(MACIOIDEState, MACIO_IDE)
+
+struct MACIOIDEState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    uint32_t addr;
+    uint32_t channel;
+    qemu_irq real_ide_irq;
+    qemu_irq real_dma_irq;
+    qemu_irq ide_irq;
+    qemu_irq dma_irq;
+
+    MemoryRegion mem;
+    IDEBus bus;
+    IDEDMA dma;
+    void *dbdma;
+    bool dma_active;
+    uint32_t timing_reg;
+    uint32_t irq_reg;
+};
+
+void macio_ide_init_drives(MACIOIDEState *ide, DriveInfo **hd_table);
+void macio_ide_register_dma(MACIOIDEState *ide);
+
+#define TYPE_MACIO "macio"
+OBJECT_DECLARE_SIMPLE_TYPE(MacIOState, MACIO)
+
+struct MacIOState {
+    /*< private >*/
+    PCIDevice parent;
+    /*< public >*/
+
+    MacIOBusState macio_bus;
+    MemoryRegion bar;
+    CUDAState cuda;
+    PMUState pmu;
+    DBDMAState dbdma;
+    ESCCState escc;
+    uint64_t frequency;
+};
+
+#define TYPE_OLDWORLD_MACIO "macio-oldworld"
+OBJECT_DECLARE_SIMPLE_TYPE(OldWorldMacIOState, OLDWORLD_MACIO)
+
+struct OldWorldMacIOState {
+    /*< private >*/
+    MacIOState parent_obj;
+    /*< public >*/
+
+    HeathrowState pic;
+
+    MacIONVRAMState nvram;
+    MACIOIDEState ide[2];
+};
+
+#define TYPE_NEWWORLD_MACIO "macio-newworld"
+OBJECT_DECLARE_SIMPLE_TYPE(NewWorldMacIOState, NEWWORLD_MACIO)
+
+struct NewWorldMacIOState {
+    /*< private >*/
+    MacIOState parent_obj;
+    /*< public >*/
+
+    bool has_pmu;
+    bool has_adb;
+    OpenPICState pic;
+    MACIOIDEState ide[2];
+    MacIOGPIOState gpio;
+};
+
+#endif /* MACIO_H */
diff --git a/include/hw/misc/macio/pmu.h b/include/hw/misc/macio/pmu.h
new file mode 100644 (file)
index 0000000..ceb1208
--- /dev/null
@@ -0,0 +1,235 @@
+/*
+ * Definitions for talking to the PMU.  The PMU is a microcontroller
+ * which controls battery charging and system power on PowerBook 3400
+ * and 2400 models as well as the RTC and various other things.
+ *
+ * Copyright (C) 1998 Paul Mackerras.
+ * Copyright (C) 2016 Ben Herrenschmidt
+ */
+
+#ifndef PMU_H
+#define PMU_H
+
+#include "hw/input/adb.h"
+#include "hw/misc/mos6522.h"
+#include "hw/misc/macio/gpio.h"
+#include "qom/object.h"
+
+/*
+ * PMU commands
+ */
+
+#define PMU_POWER_CTRL0            0x10  /* control power of some devices */
+#define PMU_POWER_CTRL             0x11  /* control power of some devices */
+#define PMU_ADB_CMD                0x20  /* send ADB packet */
+#define PMU_ADB_POLL_OFF           0x21  /* disable ADB auto-poll */
+#define PMU_WRITE_NVRAM            0x33  /* write non-volatile RAM */
+#define PMU_READ_NVRAM             0x3b  /* read non-volatile RAM */
+#define PMU_SET_RTC                0x30  /* set real-time clock */
+#define PMU_READ_RTC               0x38  /* read real-time clock */
+#define PMU_SET_VOLBUTTON          0x40  /* set volume up/down position */
+#define PMU_BACKLIGHT_BRIGHT       0x41  /* set backlight brightness */
+#define PMU_GET_VOLBUTTON          0x48  /* get volume up/down position */
+#define PMU_PCEJECT                0x4c  /* eject PC-card from slot */
+#define PMU_BATTERY_STATE          0x6b  /* report battery state etc. */
+#define PMU_SMART_BATTERY_STATE    0x6f  /* report battery state (new way) */
+#define PMU_SET_INTR_MASK          0x70  /* set PMU interrupt mask */
+#define PMU_INT_ACK                0x78  /* read interrupt bits */
+#define PMU_SHUTDOWN               0x7e  /* turn power off */
+#define PMU_CPU_SPEED              0x7d  /* control CPU speed on some models */
+#define PMU_SLEEP                  0x7f  /* put CPU to sleep */
+#define PMU_POWER_EVENTS           0x8f  /* Send power-event commands to PMU */
+#define PMU_I2C_CMD                0x9a  /* I2C operations */
+#define PMU_RESET                  0xd0  /* reset CPU */
+#define PMU_GET_BRIGHTBUTTON       0xd9  /* report brightness up/down pos */
+#define PMU_GET_COVER              0xdc  /* report cover open/closed */
+#define PMU_SYSTEM_READY           0xdf  /* tell PMU we are awake */
+#define PMU_DOWNLOAD_STATUS        0xe2  /* Called by MacOS during boot... */
+#define PMU_READ_PMU_RAM           0xe8  /* read the PMU RAM... ??? */
+#define PMU_GET_VERSION            0xea  /* read the PMU version */
+
+/* Bits to use with the PMU_POWER_CTRL0 command */
+#define PMU_POW0_ON            0x80    /* OR this to power ON the device */
+#define PMU_POW0_OFF           0x00    /* leave bit 7 to 0 to power it OFF */
+#define PMU_POW0_HARD_DRIVE    0x04    /* Hard drive power
+                                        * (on wallstreet/lombard ?) */
+
+/* Bits to use with the PMU_POWER_CTRL command */
+#define PMU_POW_ON             0x80    /* OR this to power ON the device */
+#define PMU_POW_OFF            0x00    /* leave bit 7 to 0 to power it OFF */
+#define PMU_POW_BACKLIGHT      0x01    /* backlight power */
+#define PMU_POW_CHARGER        0x02    /* battery charger power */
+#define PMU_POW_IRLED          0x04    /* IR led power (on wallstreet) */
+#define PMU_POW_MEDIABAY       0x08    /* media bay power
+                                        * (wallstreet/lombard ?) */
+
+/* Bits in PMU interrupt and interrupt mask bytes */
+#define PMU_INT_PCEJECT        0x04    /* PC-card eject buttons */
+#define PMU_INT_SNDBRT         0x08    /* sound/brightness up/down buttons */
+#define PMU_INT_ADB            0x10    /* ADB autopoll or reply data */
+#define PMU_INT_BATTERY        0x20    /* Battery state change */
+#define PMU_INT_ENVIRONMENT    0x40    /* Environment interrupts */
+#define PMU_INT_TICK           0x80    /* 1-second tick interrupt */
+
+/* Other bits in PMU interrupt valid when PMU_INT_ADB is set */
+#define PMU_INT_ADB_AUTO           0x04    /* ADB autopoll, when PMU_INT_ADB */
+#define PMU_INT_WAITING_CHARGER    0x01    /* ??? */
+#define PMU_INT_AUTO_SRQ_POLL      0x02    /* ??? */
+
+/* Bits in the environment message (either obtained via PMU_GET_COVER,
+ * or via PMU_INT_ENVIRONMENT on core99 */
+#define PMU_ENV_LID_CLOSED     0x01    /* The lid is closed */
+
+/* I2C related definitions */
+#define PMU_I2C_MODE_SIMPLE    0
+#define PMU_I2C_MODE_STDSUB    1
+#define PMU_I2C_MODE_COMBINED  2
+
+#define PMU_I2C_BUS_STATUS     0
+#define PMU_I2C_BUS_SYSCLK     1
+#define PMU_I2C_BUS_POWER      2
+
+#define PMU_I2C_STATUS_OK          0
+#define PMU_I2C_STATUS_DATAREAD    1
+#define PMU_I2C_STATUS_BUSY        0xfe
+
+/* Kind of PMU (model) */
+enum {
+    PMU_UNKNOWN,
+    PMU_OHARE_BASED,        /* 2400, 3400, 3500 (old G3 powerbook) */
+    PMU_HEATHROW_BASED,     /* PowerBook G3 series */
+    PMU_PADDINGTON_BASED,   /* 1999 PowerBook G3 */
+    PMU_KEYLARGO_BASED,     /* Core99 motherboard (PMU99) */
+    PMU_68K_V1,             /* 68K PMU, version 1 */
+    PMU_68K_V2,             /* 68K PMU, version 2 */
+};
+
+/* PMU PMU_POWER_EVENTS commands */
+enum {
+    PMU_PWR_GET_POWERUP_EVENTS = 0x00,
+    PMU_PWR_SET_POWERUP_EVENTS = 0x01,
+    PMU_PWR_CLR_POWERUP_EVENTS = 0x02,
+    PMU_PWR_GET_WAKEUP_EVENTS = 0x03,
+    PMU_PWR_SET_WAKEUP_EVENTS = 0x04,
+    PMU_PWR_CLR_WAKEUP_EVENTS = 0x05,
+};
+
+/* Power events wakeup bits */
+enum {
+    PMU_PWR_WAKEUP_KEY = 0x01,           /* Wake on key press */
+    PMU_PWR_WAKEUP_AC_INSERT = 0x02,     /* Wake on AC adapter plug */
+    PMU_PWR_WAKEUP_AC_CHANGE = 0x04,
+    PMU_PWR_WAKEUP_LID_OPEN = 0x08,
+    PMU_PWR_WAKEUP_RING = 0x10,
+};
+
+/*
+ * This table indicates for each PMU opcode:
+ * - the number of data bytes to be sent with the command, or -1
+ *   if a length byte should be sent,
+ * - the number of response bytes which the PMU will return, or
+ *   -1 if it will send a length byte.
+ */
+
+static const int8_t pmu_data_len[256][2] = {
+/*  0        1        2        3        4        5        6        7  */
+    {-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},
+    { 1,  0},{ 1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    { 0,  1},{ 0,  1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{ 0,  0},
+    {-1,  0},{ 0,  0},{ 2,  0},{ 1,  0},{ 1,  0},{-1,  0},{-1,  0},{-1,  0},
+    { 0, -1},{ 0, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{ 0, -1},
+    { 4,  0},{20,  0},{-1,  0},{ 3,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    { 0,  4},{ 0, 20},{ 2, -1},{ 2,  1},{ 3, -1},{-1, -1},{-1, -1},{ 4,  0},
+    { 1,  0},{ 1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    { 0,  1},{ 0,  1},{-1, -1},{ 1,  0},{ 1,  0},{-1, -1},{-1, -1},{-1, -1},
+    { 1,  0},{ 0,  0},{ 2,  0},{ 2,  0},{-1,  0},{ 1,  0},{ 3,  0},{ 1,  0},
+    { 0,  1},{ 1,  0},{ 0,  2},{ 0,  2},{ 0, -1},{-1, -1},{-1, -1},{-1, -1},
+    { 2,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    { 0,  3},{ 0,  3},{ 0,  2},{ 0,  8},{ 0, -1},{ 0, -1},{-1, -1},{-1, -1},
+    { 1,  0},{ 1,  0},{ 1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    { 0, -1},{ 0, -1},{-1, -1},{-1, -1},{-1, -1},{ 5,  1},{ 4,  1},{ 4,  1},
+    { 4,  0},{-1,  0},{ 0,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    { 0,  5},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},
+    { 1,  0},{ 2,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    { 0,  1},{ 0,  1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},
+    { 2,  0},{ 2,  0},{ 2,  0},{ 4,  0},{-1,  0},{ 0,  0},{-1,  0},{-1,  0},
+    { 1,  1},{ 1,  0},{ 3,  0},{ 2,  0},{-1, -1},{-1, -1},{-1, -1},{-1, -1},
+    {-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},
+    {-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},
+    { 0,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    { 1,  1},{ 1,  1},{-1, -1},{-1, -1},{ 0,  1},{ 0, -1},{-1, -1},{-1, -1},
+    {-1,  0},{ 4,  0},{ 0,  1},{-1,  0},{-1,  0},{ 4,  0},{-1,  0},{-1,  0},
+    { 3, -1},{-1, -1},{ 0,  1},{-1, -1},{ 0, -1},{-1, -1},{-1, -1},{ 0,  0},
+    {-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},{-1,  0},
+    {-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},{-1, -1},
+};
+
+/* Command protocol state machine */
+typedef enum {
+    pmu_state_idle, /* Waiting for command */
+    pmu_state_cmd,  /* Receiving command */
+    pmu_state_rsp,  /* Responding to command */
+} PMUCmdState;
+
+/* MOS6522 PMU */
+struct MOS6522PMUState {
+    /*< private >*/
+    MOS6522State parent_obj;
+};
+
+#define TYPE_MOS6522_PMU "mos6522-pmu"
+OBJECT_DECLARE_SIMPLE_TYPE(MOS6522PMUState, MOS6522_PMU)
+/**
+ * PMUState:
+ * @last_b: last value of B register
+ */
+
+struct PMUState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion mem;
+    uint64_t frequency;
+
+    /* PMU state */
+    MOS6522PMUState mos6522_pmu;
+
+    /* PMU low level protocol state */
+    PMUCmdState cmd_state;
+    uint8_t last_b;
+    uint8_t cmd;
+    uint32_t cmdlen;
+    uint32_t rsplen;
+    uint8_t cmd_buf_pos;
+    uint8_t cmd_buf[128];
+    uint8_t cmd_rsp_pos;
+    uint8_t cmd_rsp_sz;
+    uint8_t cmd_rsp[128];
+
+    /* PMU events/interrupts */
+    uint8_t intbits;
+    uint8_t intmask;
+
+    /* ADB */
+    bool has_adb;
+    ADBBusState adb_bus;
+    uint8_t adb_reply_size;
+    uint8_t adb_reply[ADB_MAX_OUT_LEN];
+
+    /* RTC */
+    uint32_t tick_offset;
+    QEMUTimer *one_sec_timer;
+    int64_t one_sec_target;
+
+    /* GPIO */
+    MacIOGPIOState *gpio;
+};
+
+#define TYPE_VIA_PMU "via-pmu"
+OBJECT_DECLARE_SIMPLE_TYPE(PMUState, VIA_PMU)
+
+#endif /* PMU_H */
diff --git a/include/hw/misc/mchp_pfsoc_dmc.h b/include/hw/misc/mchp_pfsoc_dmc.h
new file mode 100644 (file)
index 0000000..3bc1581
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Microchip PolarFire SoC DDR Memory Controller module emulation
+ *
+ * Copyright (c) 2020 Wind River Systems, Inc.
+ *
+ * Author:
+ *   Bin Meng <bin.meng@windriver.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MCHP_PFSOC_DMC_H
+#define MCHP_PFSOC_DMC_H
+
+#include "hw/sysbus.h"
+
+/* DDR SGMII PHY module */
+
+#define MCHP_PFSOC_DDR_SGMII_PHY_REG_SIZE   0x1000
+
+typedef struct MchpPfSoCDdrSgmiiPhyState {
+    SysBusDevice parent;
+    MemoryRegion sgmii_phy;
+} MchpPfSoCDdrSgmiiPhyState;
+
+#define TYPE_MCHP_PFSOC_DDR_SGMII_PHY "mchp.pfsoc.ddr_sgmii_phy"
+
+#define MCHP_PFSOC_DDR_SGMII_PHY(obj) \
+    OBJECT_CHECK(MchpPfSoCDdrSgmiiPhyState, (obj), \
+                 TYPE_MCHP_PFSOC_DDR_SGMII_PHY)
+
+/* DDR CFG module */
+
+#define MCHP_PFSOC_DDR_CFG_REG_SIZE         0x40000
+
+typedef struct MchpPfSoCDdrCfgState {
+    SysBusDevice parent;
+    MemoryRegion cfg;
+} MchpPfSoCDdrCfgState;
+
+#define TYPE_MCHP_PFSOC_DDR_CFG "mchp.pfsoc.ddr_cfg"
+
+#define MCHP_PFSOC_DDR_CFG(obj) \
+    OBJECT_CHECK(MchpPfSoCDdrCfgState, (obj), \
+                 TYPE_MCHP_PFSOC_DDR_CFG)
+
+#endif /* MCHP_PFSOC_DMC_H */
diff --git a/include/hw/misc/mchp_pfsoc_ioscb.h b/include/hw/misc/mchp_pfsoc_ioscb.h
new file mode 100644 (file)
index 0000000..3fd3e74
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Microchip PolarFire SoC IOSCB module emulation
+ *
+ * Copyright (c) 2020 Wind River Systems, Inc.
+ *
+ * Author:
+ *   Bin Meng <bin.meng@windriver.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MCHP_PFSOC_IOSCB_H
+#define MCHP_PFSOC_IOSCB_H
+
+#include "hw/sysbus.h"
+
+typedef struct MchpPfSoCIoscbState {
+    SysBusDevice parent;
+    MemoryRegion container;
+    MemoryRegion lane01;
+    MemoryRegion lane23;
+    MemoryRegion ctrl;
+    MemoryRegion qspixip;
+    MemoryRegion mailbox;
+    MemoryRegion cfg;
+    MemoryRegion ccc;
+    MemoryRegion pll_mss;
+    MemoryRegion cfm_mss;
+    MemoryRegion pll_ddr;
+    MemoryRegion bc_ddr;
+    MemoryRegion io_calib_ddr;
+    MemoryRegion pll_sgmii;
+    MemoryRegion dll_sgmii;
+    MemoryRegion cfm_sgmii;
+    MemoryRegion bc_sgmii;
+    MemoryRegion io_calib_sgmii;
+    qemu_irq irq;
+} MchpPfSoCIoscbState;
+
+#define TYPE_MCHP_PFSOC_IOSCB "mchp.pfsoc.ioscb"
+
+#define MCHP_PFSOC_IOSCB(obj) \
+    OBJECT_CHECK(MchpPfSoCIoscbState, (obj), TYPE_MCHP_PFSOC_IOSCB)
+
+#endif /* MCHP_PFSOC_IOSCB_H */
diff --git a/include/hw/misc/mchp_pfsoc_sysreg.h b/include/hw/misc/mchp_pfsoc_sysreg.h
new file mode 100644 (file)
index 0000000..c2232bd
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Microchip PolarFire SoC SYSREG module emulation
+ *
+ * Copyright (c) 2020 Wind River Systems, Inc.
+ *
+ * Author:
+ *   Bin Meng <bin.meng@windriver.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MCHP_PFSOC_SYSREG_H
+#define MCHP_PFSOC_SYSREG_H
+
+#include "hw/sysbus.h"
+
+#define MCHP_PFSOC_SYSREG_REG_SIZE  0x2000
+
+typedef struct MchpPfSoCSysregState {
+    SysBusDevice parent;
+    MemoryRegion sysreg;
+    qemu_irq irq;
+} MchpPfSoCSysregState;
+
+#define TYPE_MCHP_PFSOC_SYSREG "mchp.pfsoc.sysreg"
+
+#define MCHP_PFSOC_SYSREG(obj) \
+    OBJECT_CHECK(MchpPfSoCSysregState, (obj), \
+                 TYPE_MCHP_PFSOC_SYSREG)
+
+#endif /* MCHP_PFSOC_SYSREG_H */
diff --git a/include/hw/misc/mips_cmgcr.h b/include/hw/misc/mips_cmgcr.h
new file mode 100644 (file)
index 0000000..db4bf5f
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2015 Imagination Technologies
+ *
+ */
+
+#ifndef MIPS_CMGCR_H
+#define MIPS_CMGCR_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_MIPS_GCR "mips-gcr"
+OBJECT_DECLARE_SIMPLE_TYPE(MIPSGCRState, MIPS_GCR)
+
+#define GCR_BASE_ADDR           0x1fbf8000ULL
+#define GCR_ADDRSPACE_SZ        0x8000
+
+/* Offsets to register blocks */
+#define MIPS_GCB_OFS        0x0000 /* Global Control Block */
+#define MIPS_CLCB_OFS       0x2000 /* Core Local Control Block */
+#define MIPS_COCB_OFS       0x4000 /* Core Other Control Block */
+#define MIPS_GDB_OFS        0x6000 /* Global Debug Block */
+
+/* Global Control Block Register Map */
+#define GCR_CONFIG_OFS      0x0000
+#define GCR_BASE_OFS        0x0008
+#define GCR_REV_OFS         0x0030
+#define GCR_GIC_BASE_OFS    0x0080
+#define GCR_CPC_BASE_OFS    0x0088
+#define GCR_GIC_STATUS_OFS  0x00D0
+#define GCR_CPC_STATUS_OFS  0x00F0
+#define GCR_L2_CONFIG_OFS   0x0130
+
+/* Core Local and Core Other Block Register Map */
+#define GCR_CL_CONFIG_OFS   0x0010
+#define GCR_CL_OTHER_OFS    0x0018
+#define GCR_CL_RESETBASE_OFS 0x0020
+
+/* GCR_L2_CONFIG register fields */
+#define GCR_L2_CONFIG_BYPASS_SHF    20
+#define GCR_L2_CONFIG_BYPASS_MSK    ((0x1ULL) << GCR_L2_CONFIG_BYPASS_SHF)
+
+/* GCR_BASE register fields */
+#define GCR_BASE_GCRBASE_MSK     0xffffffff8000ULL
+
+/* GCR_GIC_BASE register fields */
+#define GCR_GIC_BASE_GICEN_MSK   1
+#define GCR_GIC_BASE_GICBASE_MSK 0xFFFFFFFE0000ULL
+#define GCR_GIC_BASE_MSK (GCR_GIC_BASE_GICEN_MSK | GCR_GIC_BASE_GICBASE_MSK)
+
+/* GCR_CPC_BASE register fields */
+#define GCR_CPC_BASE_CPCEN_MSK   1
+#define GCR_CPC_BASE_CPCBASE_MSK 0xFFFFFFFF8000ULL
+#define GCR_CPC_BASE_MSK (GCR_CPC_BASE_CPCEN_MSK | GCR_CPC_BASE_CPCBASE_MSK)
+
+/* GCR_CL_OTHER_OFS register fields */
+#define GCR_CL_OTHER_VPOTHER_MSK 0x7
+#define GCR_CL_OTHER_MSK GCR_CL_OTHER_VPOTHER_MSK
+
+/* GCR_CL_RESETBASE_OFS register fields */
+#define GCR_CL_RESET_BASE_RESETBASE_MSK 0xFFFFF000U
+#define GCR_CL_RESET_BASE_MSK GCR_CL_RESET_BASE_RESETBASE_MSK
+
+typedef struct MIPSGCRVPState MIPSGCRVPState;
+struct MIPSGCRVPState {
+    uint32_t other;
+    uint64_t reset_base;
+};
+
+struct MIPSGCRState {
+    SysBusDevice parent_obj;
+
+    int32_t gcr_rev;
+    uint32_t num_vps;
+    hwaddr gcr_base;
+    MemoryRegion iomem;
+    MemoryRegion *cpc_mr;
+    MemoryRegion *gic_mr;
+
+    uint64_t cpc_base;
+    uint64_t gic_base;
+
+    /* VP Local/Other Registers */
+    MIPSGCRVPState *vps;
+};
+
+#endif /* MIPS_CMGCR_H */
diff --git a/include/hw/misc/mips_cpc.h b/include/hw/misc/mips_cpc.h
new file mode 100644 (file)
index 0000000..fcafbd5
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Cluster Power Controller emulation
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MIPS_CPC_H
+#define MIPS_CPC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define CPC_ADDRSPACE_SZ    0x6000
+
+/* CPC blocks offsets relative to base address */
+#define CPC_CL_BASE_OFS     0x2000
+#define CPC_CO_BASE_OFS     0x4000
+
+/* CPC register offsets relative to block offsets */
+#define CPC_VP_STOP_OFS     0x20
+#define CPC_VP_RUN_OFS      0x28
+#define CPC_VP_RUNNING_OFS  0x30
+
+#define TYPE_MIPS_CPC "mips-cpc"
+OBJECT_DECLARE_SIMPLE_TYPE(MIPSCPCState, MIPS_CPC)
+
+struct MIPSCPCState {
+    SysBusDevice parent_obj;
+
+    uint32_t num_vp;
+    uint64_t vp_start_running; /* VPs running from restart */
+
+    MemoryRegion mr;
+    uint64_t vp_running; /* Indicates which VPs are in the run state */
+};
+
+#endif /* MIPS_CPC_H */
diff --git a/include/hw/misc/mips_itu.h b/include/hw/misc/mips_itu.h
new file mode 100644 (file)
index 0000000..5caed6c
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * Inter-Thread Communication Unit emulation.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MIPS_ITU_H
+#define MIPS_ITU_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_MIPS_ITU "mips-itu"
+OBJECT_DECLARE_SIMPLE_TYPE(MIPSITUState, MIPS_ITU)
+
+#define ITC_CELL_DEPTH_SHIFT 2
+#define ITC_CELL_DEPTH (1u << ITC_CELL_DEPTH_SHIFT)
+
+typedef struct ITCStorageCell {
+    struct {
+        uint8_t FIFODepth; /* Log2 of the cell depth */
+        uint8_t FIFOPtr; /* Number of elements in a FIFO cell */
+        uint8_t FIFO; /* 1 - FIFO cell, 0 - Semaphore cell */
+        uint8_t T; /* Trap Bit */
+        uint8_t F; /* Full Bit */
+        uint8_t E; /* Empty Bit */
+    } tag;
+
+    /* Index of the oldest element in the queue */
+    uint8_t fifo_out;
+
+    /* Circular buffer for FIFO. Semaphore cells use index 0 only */
+    uint64_t data[ITC_CELL_DEPTH];
+
+    /* Bitmap tracking blocked threads on the cell.
+       TODO: support >64 threads ? */
+    uint64_t blocked_threads;
+} ITCStorageCell;
+
+#define ITC_ADDRESSMAP_NUM 2
+
+struct MIPSITUState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint32_t num_fifo;
+    uint32_t num_semaphores;
+
+    /* ITC Storage */
+    ITCStorageCell *cell;
+    MemoryRegion storage_io;
+
+    /* ITC Configuration Tags */
+    uint64_t ITCAddressMap[ITC_ADDRESSMAP_NUM];
+    MemoryRegion tag_io;
+
+    /* ITU Control Register */
+    uint64_t icr0;
+
+    /* SAAR */
+    uint64_t *saar;
+    ArchCPU *cpu0;
+};
+
+/* Get ITC Configuration Tag memory region. */
+MemoryRegion *mips_itu_get_tag_region(MIPSITUState *itu);
+
+void itc_reconfigure(struct MIPSITUState *tag);
+
+#endif /* MIPS_ITU_H */
diff --git a/include/hw/misc/mos6522.h b/include/hw/misc/mos6522.h
new file mode 100644 (file)
index 0000000..fba4566
--- /dev/null
@@ -0,0 +1,177 @@
+/*
+ * QEMU MOS6522 VIA emulation
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ * Copyright (c) 2018 Mark Cave-Ayland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MOS6522_H
+#define MOS6522_H
+
+#include "exec/hwaddr.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define MOS6522_NUM_REGS 16
+
+/* Bits in ACR */
+#define SR_CTRL            0x1c    /* Shift register control bits */
+#define SR_EXT             0x0c    /* Shift on external clock */
+#define SR_OUT             0x10    /* Shift out if 1 */
+
+/* Bits in IFR and IER */
+#define IER_SET            0x80    /* set bits in IER */
+#define IER_CLR            0       /* clear bits in IER */
+
+#define CA2_INT_BIT        0
+#define CA1_INT_BIT        1
+#define SR_INT_BIT         2       /* Shift register full/empty */
+#define CB2_INT_BIT        3
+#define CB1_INT_BIT        4
+#define T2_INT_BIT         5       /* Timer 2 interrupt */
+#define T1_INT_BIT         6       /* Timer 1 interrupt */
+
+#define CA2_INT            BIT(CA2_INT_BIT)
+#define CA1_INT            BIT(CA1_INT_BIT)
+#define SR_INT             BIT(SR_INT_BIT)
+#define CB2_INT            BIT(CB2_INT_BIT)
+#define CB1_INT            BIT(CB1_INT_BIT)
+#define T2_INT             BIT(T2_INT_BIT)
+#define T1_INT             BIT(T1_INT_BIT)
+
+#define VIA_NUM_INTS       5
+
+/* Bits in ACR */
+#define T1MODE             0xc0    /* Timer 1 mode */
+#define T1MODE_CONT        0x40    /*  continuous interrupts */
+
+/* Bits in PCR */
+#define CB2_CTRL_MASK      0xe0
+#define CB2_CTRL_SHIFT     5
+#define CB1_CTRL_MASK      0x10
+#define CB1_CTRL_SHIFT     4
+#define CA2_CTRL_MASK      0x0e
+#define CA2_CTRL_SHIFT     1
+#define CA1_CTRL_MASK      0x1
+#define CA1_CTRL_SHIFT     0
+
+#define C2_POS             0x2
+#define C2_IND             0x1
+
+#define C1_POS             0x1
+
+/* VIA registers */
+#define VIA_REG_B       0x00
+#define VIA_REG_A       0x01
+#define VIA_REG_DIRB    0x02
+#define VIA_REG_DIRA    0x03
+#define VIA_REG_T1CL    0x04
+#define VIA_REG_T1CH    0x05
+#define VIA_REG_T1LL    0x06
+#define VIA_REG_T1LH    0x07
+#define VIA_REG_T2CL    0x08
+#define VIA_REG_T2CH    0x09
+#define VIA_REG_SR      0x0a
+#define VIA_REG_ACR     0x0b
+#define VIA_REG_PCR     0x0c
+#define VIA_REG_IFR     0x0d
+#define VIA_REG_IER     0x0e
+#define VIA_REG_ANH     0x0f
+
+/**
+ * MOS6522Timer:
+ * @counter_value: counter value at load time
+ */
+typedef struct MOS6522Timer {
+    int index;
+    uint16_t latch;
+    uint16_t counter_value;
+    int64_t load_time;
+    int64_t next_irq_time;
+    uint64_t frequency;
+    QEMUTimer *timer;
+} MOS6522Timer;
+
+/**
+ * MOS6522State:
+ * @b: B-side data
+ * @a: A-side data
+ * @dirb: B-side direction (1=output)
+ * @dira: A-side direction (1=output)
+ * @sr: Shift register
+ * @acr: Auxiliary control register
+ * @pcr: Peripheral control register
+ * @ifr: Interrupt flag register
+ * @ier: Interrupt enable register
+ * @anh: A-side data, no handshake
+ * @last_b: last value of B register
+ * @last_acr: last value of ACR register
+ */
+struct MOS6522State {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion mem;
+    /* VIA registers */
+    uint8_t b;
+    uint8_t a;
+    uint8_t dirb;
+    uint8_t dira;
+    uint8_t sr;
+    uint8_t acr;
+    uint8_t pcr;
+    uint8_t ifr;
+    uint8_t ier;
+
+    MOS6522Timer timers[2];
+    uint64_t frequency;
+
+    qemu_irq irq;
+    uint8_t last_irq_levels;
+};
+
+#define TYPE_MOS6522 "mos6522"
+OBJECT_DECLARE_TYPE(MOS6522State, MOS6522DeviceClass, MOS6522)
+
+struct MOS6522DeviceClass {
+    DeviceClass parent_class;
+
+    ResettablePhases parent_phases;
+    void (*portB_write)(MOS6522State *dev);
+    void (*portA_write)(MOS6522State *dev);
+    /* These are used to influence the CUDA MacOS timebase calibration */
+    uint64_t (*get_timer1_counter_value)(MOS6522State *dev, MOS6522Timer *ti);
+    uint64_t (*get_timer2_counter_value)(MOS6522State *dev, MOS6522Timer *ti);
+    uint64_t (*get_timer1_load_time)(MOS6522State *dev, MOS6522Timer *ti);
+    uint64_t (*get_timer2_load_time)(MOS6522State *dev, MOS6522Timer *ti);
+};
+
+
+extern const VMStateDescription vmstate_mos6522;
+
+uint64_t mos6522_read(void *opaque, hwaddr addr, unsigned size);
+void mos6522_write(void *opaque, hwaddr addr, uint64_t val, unsigned size);
+
+void hmp_info_via(Monitor *mon, const QDict *qdict);
+
+#endif /* MOS6522_H */
diff --git a/include/hw/misc/mps2-fpgaio.h b/include/hw/misc/mps2-fpgaio.h
new file mode 100644 (file)
index 0000000..7b8bd60
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * ARM MPS2 FPGAIO emulation
+ *
+ * Copyright (c) 2018 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/* This is a model of the FPGAIO register block in the AN505
+ * FPGA image for the MPS2 dev board; it is documented in the
+ * application note:
+ * https://developer.arm.com/documentation/dai0505/latest/
+ *
+ * QEMU interface:
+ *  + sysbus MMIO region 0: the register bank
+ */
+
+#ifndef MPS2_FPGAIO_H
+#define MPS2_FPGAIO_H
+
+#include "hw/sysbus.h"
+#include "hw/misc/led.h"
+#include "qom/object.h"
+
+#define TYPE_MPS2_FPGAIO "mps2-fpgaio"
+OBJECT_DECLARE_SIMPLE_TYPE(MPS2FPGAIO, MPS2_FPGAIO)
+
+#define MPS2FPGAIO_MAX_LEDS 32
+
+struct MPS2FPGAIO {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    LEDState *led[MPS2FPGAIO_MAX_LEDS];
+    uint32_t num_leds;
+    bool has_switches;
+    bool has_dbgctrl;
+
+    uint32_t led0;
+    uint32_t prescale;
+    uint32_t misc;
+    uint32_t dbgctrl;
+
+    /* QEMU_CLOCK_VIRTUAL time at which counter and pscntr were last synced */
+    int64_t pscntr_sync_ticks;
+    /* Values of COUNTER and PSCNTR at time pscntr_sync_ticks */
+    uint32_t counter;
+    uint32_t pscntr;
+
+    uint32_t prescale_clk;
+
+    /* These hold the CLOCK_VIRTUAL ns tick when the CLK1HZ/CLK100HZ was zero */
+    int64_t clk1hz_tick_offset;
+    int64_t clk100hz_tick_offset;
+};
+
+#endif
diff --git a/include/hw/misc/mps2-scc.h b/include/hw/misc/mps2-scc.h
new file mode 100644 (file)
index 0000000..3b2d13a
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * ARM MPS2 SCC emulation
+ *
+ * Copyright (c) 2017 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the Serial Communication Controller (SCC)
+ * block found in most MPS FPGA images.
+ *
+ * QEMU interface:
+ *  + sysbus MMIO region 0: the register bank
+ *  + QOM property "scc-cfg4": value of the read-only CFG4 register
+ *  + QOM property "scc-aid": value of the read-only SCC_AID register
+ *  + QOM property "scc-id": value of the read-only SCC_ID register
+ *  + QOM property "scc-cfg0": reset value of the CFG0 register
+ *  + QOM property array "oscclk": reset values of the OSCCLK registers
+ *    (which are accessed via the SYS_CFG channel provided by this device)
+ *  + named GPIO output "remap": this tracks the value of CFG0 register
+ *    bit 0. Boards where this bit controls memory remapping should
+ *    connect this GPIO line to a function performing that mapping.
+ *    Boards where bit 0 has no special function should leave the GPIO
+ *    output disconnected.
+ */
+#ifndef MPS2_SCC_H
+#define MPS2_SCC_H
+
+#include "hw/sysbus.h"
+#include "hw/misc/led.h"
+#include "qom/object.h"
+
+#define TYPE_MPS2_SCC "mps2-scc"
+OBJECT_DECLARE_SIMPLE_TYPE(MPS2SCC, MPS2_SCC)
+
+struct MPS2SCC {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    LEDState *led[8];
+
+    uint32_t cfg0;
+    uint32_t cfg1;
+    uint32_t cfg2;
+    uint32_t cfg4;
+    uint32_t cfg5;
+    uint32_t cfg6;
+    uint32_t cfgdata_rtn;
+    uint32_t cfgdata_out;
+    uint32_t cfgctrl;
+    uint32_t cfgstat;
+    uint32_t dll;
+    uint32_t aid;
+    uint32_t id;
+    uint32_t num_oscclk;
+    uint32_t *oscclk;
+    uint32_t *oscclk_reset;
+    uint32_t cfg0_reset;
+
+    qemu_irq remap;
+};
+
+#endif
diff --git a/include/hw/misc/msf2-sysreg.h b/include/hw/misc/msf2-sysreg.h
new file mode 100644 (file)
index 0000000..fc1890e
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Microsemi SmartFusion2 SYSREG
+ *
+ * Copyright (c) 2017 Subbaraya Sundeep <sundeep.lkml@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_MSF2_SYSREG_H
+#define HW_MSF2_SYSREG_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+enum {
+    ESRAM_CR        = 0x00 / 4,
+    ESRAM_MAX_LAT,
+    DDR_CR,
+    ENVM_CR,
+    ENVM_REMAP_BASE_CR,
+    ENVM_REMAP_FAB_CR,
+    CC_CR,
+    CC_REGION_CR,
+    CC_LOCK_BASE_ADDR_CR,
+    CC_FLUSH_INDX_CR,
+    DDRB_BUF_TIMER_CR,
+    DDRB_NB_ADDR_CR,
+    DDRB_NB_SIZE_CR,
+    DDRB_CR,
+
+    SOFT_RESET_CR  = 0x48 / 4,
+    M3_CR,
+
+    GPIO_SYSRESET_SEL_CR = 0x58 / 4,
+
+    MDDR_CR = 0x60 / 4,
+
+    MSSDDR_PLL_STATUS_LOW_CR = 0x90 / 4,
+    MSSDDR_PLL_STATUS_HIGH_CR,
+    MSSDDR_FACC1_CR,
+    MSSDDR_FACC2_CR,
+
+    MSSDDR_PLL_STATUS = 0x150 / 4,
+};
+
+#define MSF2_SYSREG_MMIO_SIZE     0x300
+
+#define TYPE_MSF2_SYSREG          "msf2-sysreg"
+OBJECT_DECLARE_SIMPLE_TYPE(MSF2SysregState, MSF2_SYSREG)
+
+struct MSF2SysregState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+
+    uint8_t apb0div;
+    uint8_t apb1div;
+
+    uint32_t regs[MSF2_SYSREG_MMIO_SIZE / 4];
+};
+
+#endif /* HW_MSF2_SYSREG_H */
diff --git a/include/hw/misc/npcm7xx_clk.h b/include/hw/misc/npcm7xx_clk.h
new file mode 100644 (file)
index 0000000..5ed4a46
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * Nuvoton NPCM7xx Clock Control Registers.
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM7XX_CLK_H
+#define NPCM7XX_CLK_H
+
+#include "exec/memory.h"
+#include "hw/clock.h"
+#include "hw/sysbus.h"
+
+/*
+ * Number of registers in our device state structure. Don't change this without
+ * incrementing the version_id in the vmstate.
+ */
+#define NPCM7XX_CLK_NR_REGS             (0x70 / sizeof(uint32_t))
+
+#define NPCM7XX_WATCHDOG_RESET_GPIO_IN "npcm7xx-clk-watchdog-reset-gpio-in"
+
+/* Maximum amount of clock inputs in a SEL module. */
+#define NPCM7XX_CLK_SEL_MAX_INPUT 5
+
+/* PLLs in CLK module. */
+typedef enum NPCM7xxClockPLL {
+    NPCM7XX_CLOCK_PLL0,
+    NPCM7XX_CLOCK_PLL1,
+    NPCM7XX_CLOCK_PLL2,
+    NPCM7XX_CLOCK_PLLG,
+    NPCM7XX_CLOCK_NR_PLLS,
+} NPCM7xxClockPLL;
+
+/* SEL/MUX in CLK module. */
+typedef enum NPCM7xxClockSEL {
+    NPCM7XX_CLOCK_PIXCKSEL,
+    NPCM7XX_CLOCK_MCCKSEL,
+    NPCM7XX_CLOCK_CPUCKSEL,
+    NPCM7XX_CLOCK_CLKOUTSEL,
+    NPCM7XX_CLOCK_UARTCKSEL,
+    NPCM7XX_CLOCK_TIMCKSEL,
+    NPCM7XX_CLOCK_SDCKSEL,
+    NPCM7XX_CLOCK_GFXMSEL,
+    NPCM7XX_CLOCK_SUCKSEL,
+    NPCM7XX_CLOCK_NR_SELS,
+} NPCM7xxClockSEL;
+
+/* Dividers in CLK module. */
+typedef enum NPCM7xxClockDivider {
+    NPCM7XX_CLOCK_PLL1D2, /* PLL1/2 */
+    NPCM7XX_CLOCK_PLL2D2, /* PLL2/2 */
+    NPCM7XX_CLOCK_MC_DIVIDER,
+    NPCM7XX_CLOCK_AXI_DIVIDER,
+    NPCM7XX_CLOCK_AHB_DIVIDER,
+    NPCM7XX_CLOCK_AHB3_DIVIDER,
+    NPCM7XX_CLOCK_SPI0_DIVIDER,
+    NPCM7XX_CLOCK_SPIX_DIVIDER,
+    NPCM7XX_CLOCK_APB1_DIVIDER,
+    NPCM7XX_CLOCK_APB2_DIVIDER,
+    NPCM7XX_CLOCK_APB3_DIVIDER,
+    NPCM7XX_CLOCK_APB4_DIVIDER,
+    NPCM7XX_CLOCK_APB5_DIVIDER,
+    NPCM7XX_CLOCK_CLKOUT_DIVIDER,
+    NPCM7XX_CLOCK_UART_DIVIDER,
+    NPCM7XX_CLOCK_TIMER_DIVIDER,
+    NPCM7XX_CLOCK_ADC_DIVIDER,
+    NPCM7XX_CLOCK_MMC_DIVIDER,
+    NPCM7XX_CLOCK_SDHC_DIVIDER,
+    NPCM7XX_CLOCK_GFXM_DIVIDER, /* divide by 3 */
+    NPCM7XX_CLOCK_UTMI_DIVIDER,
+    NPCM7XX_CLOCK_NR_DIVIDERS,
+} NPCM7xxClockConverter;
+
+typedef struct NPCM7xxCLKState NPCM7xxCLKState;
+
+/**
+ * struct NPCM7xxClockPLLState - A PLL module in CLK module.
+ * @name: The name of the module.
+ * @clk: The CLK module that owns this module.
+ * @clock_in: The input clock of this module.
+ * @clock_out: The output clock of this module.
+ * @reg: The control registers for this PLL module.
+ */
+typedef struct NPCM7xxClockPLLState {
+    DeviceState parent;
+
+    const char *name;
+    NPCM7xxCLKState *clk;
+    Clock *clock_in;
+    Clock *clock_out;
+
+    int reg;
+} NPCM7xxClockPLLState;
+
+/**
+ * struct NPCM7xxClockSELState - A SEL module in CLK module.
+ * @name: The name of the module.
+ * @clk: The CLK module that owns this module.
+ * @input_size: The size of inputs of this module.
+ * @clock_in: The input clocks of this module.
+ * @clock_out: The output clocks of this module.
+ * @offset: The offset of this module in the control register.
+ * @len: The length of this module in the control register.
+ */
+typedef struct NPCM7xxClockSELState {
+    DeviceState parent;
+
+    const char *name;
+    NPCM7xxCLKState *clk;
+    uint8_t input_size;
+    Clock *clock_in[NPCM7XX_CLK_SEL_MAX_INPUT];
+    Clock *clock_out;
+
+    int offset;
+    int len;
+} NPCM7xxClockSELState;
+
+/**
+ * struct NPCM7xxClockDividerState - A Divider module in CLK module.
+ * @name: The name of the module.
+ * @clk: The CLK module that owns this module.
+ * @clock_in: The input clock of this module.
+ * @clock_out: The output clock of this module.
+ * @divide: The function the divider uses to divide the input.
+ * @reg: The index of the control register that contains the divisor.
+ * @offset: The offset of the divisor in the control register.
+ * @len: The length of the divisor in the control register.
+ * @divisor: The divisor for a constant divisor
+ */
+typedef struct NPCM7xxClockDividerState {
+    DeviceState parent;
+
+    const char *name;
+    NPCM7xxCLKState *clk;
+    Clock *clock_in;
+    Clock *clock_out;
+
+    uint32_t (*divide)(struct NPCM7xxClockDividerState *s);
+    union {
+        struct {
+            int reg;
+            int offset;
+            int len;
+        };
+        int divisor;
+    };
+} NPCM7xxClockDividerState;
+
+struct NPCM7xxCLKState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+
+    /* Clock converters */
+    NPCM7xxClockPLLState plls[NPCM7XX_CLOCK_NR_PLLS];
+    NPCM7xxClockSELState sels[NPCM7XX_CLOCK_NR_SELS];
+    NPCM7xxClockDividerState dividers[NPCM7XX_CLOCK_NR_DIVIDERS];
+
+    uint32_t regs[NPCM7XX_CLK_NR_REGS];
+
+    /* Time reference for SECCNT and CNTR25M, initialized by power on reset */
+    int64_t ref_ns;
+
+    /* The incoming reference clock. */
+    Clock *clkref;
+};
+
+#define TYPE_NPCM7XX_CLK "npcm7xx-clk"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxCLKState, NPCM7XX_CLK)
+
+#endif /* NPCM7XX_CLK_H */
diff --git a/include/hw/misc/npcm7xx_gcr.h b/include/hw/misc/npcm7xx_gcr.h
new file mode 100644 (file)
index 0000000..c0bbdda
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Nuvoton NPCM7xx System Global Control Registers.
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM7XX_GCR_H
+#define NPCM7XX_GCR_H
+
+#include "exec/memory.h"
+#include "hw/sysbus.h"
+
+/*
+ * NPCM7XX PWRON STRAP bit fields
+ * 12: SPI0 powered by VSBV3 at 1.8V
+ * 11: System flash attached to BMC
+ * 10: BSP alternative pins.
+ * 9:8: Flash UART command route enabled.
+ * 7: Security enabled.
+ * 6: HI-Z state control.
+ * 5: ECC disabled.
+ * 4: Reserved
+ * 3: JTAG2 enabled.
+ * 2:0: CPU and DRAM clock frequency.
+ */
+#define NPCM7XX_PWRON_STRAP_SPI0F18                 BIT(12)
+#define NPCM7XX_PWRON_STRAP_SFAB                    BIT(11)
+#define NPCM7XX_PWRON_STRAP_BSPA                    BIT(10)
+#define NPCM7XX_PWRON_STRAP_FUP(x)                  ((x) << 8)
+#define     FUP_NORM_UART2      3
+#define     FUP_PROG_UART3      2
+#define     FUP_PROG_UART2      1
+#define     FUP_NORM_UART3      0
+#define NPCM7XX_PWRON_STRAP_SECEN                   BIT(7)
+#define NPCM7XX_PWRON_STRAP_HIZ                     BIT(6)
+#define NPCM7XX_PWRON_STRAP_ECC                     BIT(5)
+#define NPCM7XX_PWRON_STRAP_RESERVE1                BIT(4)
+#define NPCM7XX_PWRON_STRAP_J2EN                    BIT(3)
+#define NPCM7XX_PWRON_STRAP_CKFRQ(x)                (x)
+#define     CKFRQ_SKIPINIT      0x000
+#define     CKFRQ_DEFAULT       0x111
+
+/*
+ * Number of registers in our device state structure. Don't change this without
+ * incrementing the version_id in the vmstate.
+ */
+#define NPCM7XX_GCR_NR_REGS (0x148 / sizeof(uint32_t))
+
+struct NPCM7xxGCRState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+
+    uint32_t regs[NPCM7XX_GCR_NR_REGS];
+
+    uint32_t reset_pwron;
+    uint32_t reset_mdlr;
+    uint32_t reset_intcr3;
+};
+
+#define TYPE_NPCM7XX_GCR "npcm7xx-gcr"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxGCRState, NPCM7XX_GCR)
+
+#endif /* NPCM7XX_GCR_H */
diff --git a/include/hw/misc/npcm7xx_mft.h b/include/hw/misc/npcm7xx_mft.h
new file mode 100644 (file)
index 0000000..d638438
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * Nuvoton NPCM7xx MFT Module
+ *
+ * Copyright 2021 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM7XX_MFT_H
+#define NPCM7XX_MFT_H
+
+#include "exec/memory.h"
+#include "hw/clock.h"
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+/* Max Fan input number. */
+#define NPCM7XX_MFT_MAX_FAN_INPUT 19
+
+/*
+ * Number of registers in one MFT module. Don't change this without increasing
+ * the version_id in vmstate.
+ */
+#define NPCM7XX_MFT_NR_REGS (0x20 / sizeof(uint16_t))
+
+/*
+ * The MFT can take up to 4 inputs: A0, B0, A1, B1. It can measure one A and one
+ * B simultaneously. NPCM7XX_MFT_INASEL and NPCM7XX_MFT_INBSEL are used to
+ * select which A or B input are used.
+ */
+#define NPCM7XX_MFT_FANIN_COUNT 4
+
+/**
+ * struct NPCM7xxMFTState - Multi Functional Tachometer device state.
+ * @parent: System bus device.
+ * @iomem: Memory region through which registers are accessed.
+ * @clock_in: The input clock for MFT from CLK module.
+ * @clock_{1,2}: The counter clocks for NPCM7XX_MFT_CNT{1,2}
+ * @irq: The IRQ for this MFT state.
+ * @regs: The MMIO registers.
+ * @max_rpm: The maximum rpm for fans. Order: A0, B0, A1, B1.
+ * @duty: The duty cycles for fans, relative to NPCM7XX_PWM_MAX_DUTY.
+ */
+struct NPCM7xxMFTState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+
+    Clock       *clock_in;
+    Clock       *clock_1, *clock_2;
+    qemu_irq    irq;
+    uint16_t    regs[NPCM7XX_MFT_NR_REGS];
+
+    uint32_t    max_rpm[NPCM7XX_MFT_FANIN_COUNT];
+    uint32_t    duty[NPCM7XX_MFT_FANIN_COUNT];
+};
+
+#define TYPE_NPCM7XX_MFT "npcm7xx-mft"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxMFTState, NPCM7XX_MFT)
+
+#endif /* NPCM7XX_MFT_H */
diff --git a/include/hw/misc/npcm7xx_pwm.h b/include/hw/misc/npcm7xx_pwm.h
new file mode 100644 (file)
index 0000000..bf95344
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Nuvoton NPCM7xx PWM Module
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM7XX_PWM_H
+#define NPCM7XX_PWM_H
+
+#include "hw/clock.h"
+#include "hw/sysbus.h"
+#include "hw/irq.h"
+
+/* Each PWM module holds 4 PWM channels. */
+#define NPCM7XX_PWM_PER_MODULE 4
+
+/*
+ * Number of registers in one pwm module. Don't change this without increasing
+ * the version_id in vmstate.
+ */
+#define NPCM7XX_PWM_NR_REGS (0x54 / sizeof(uint32_t))
+
+/*
+ * The maximum duty values. Each duty unit represents 1/NPCM7XX_PWM_MAX_DUTY
+ * cycles. For example, if NPCM7XX_PWM_MAX_DUTY=1,000,000 and a PWM has a duty
+ * value of 100,000 the duty cycle for that PWM is 10%.
+ */
+#define NPCM7XX_PWM_MAX_DUTY 1000000
+
+typedef struct NPCM7xxPWMState NPCM7xxPWMState;
+
+/**
+ * struct NPCM7xxPWM - The state of a single PWM channel.
+ * @module: The PWM module that contains this channel.
+ * @irq: GIC interrupt line to fire on expiration if enabled.
+ * @running: Whether this PWM channel is generating output.
+ * @inverted: Whether this PWM channel is inverted.
+ * @index: The index of this PWM channel.
+ * @cnr: The counter register.
+ * @cmr: The comparator register.
+ * @pdr: The data register.
+ * @pwdr: The watchdog register.
+ * @freq: The frequency of this PWM channel.
+ * @duty: The duty cycle of this PWM channel. One unit represents
+ *   1/NPCM7XX_MAX_DUTY cycles.
+ */
+typedef struct NPCM7xxPWM {
+    NPCM7xxPWMState         *module;
+
+    qemu_irq                irq;
+
+    bool                    running;
+    bool                    inverted;
+
+    uint8_t                 index;
+    uint32_t                cnr;
+    uint32_t                cmr;
+    uint32_t                pdr;
+    uint32_t                pwdr;
+
+    uint32_t                freq;
+    uint32_t                duty;
+} NPCM7xxPWM;
+
+/**
+ * struct NPCM7xxPWMState - Pulse Width Modulation device state.
+ * @parent: System bus device.
+ * @iomem: Memory region through which registers are accessed.
+ * @clock: The PWM clock.
+ * @pwm: The PWM channels owned by this module.
+ * @duty_gpio_out: The duty cycle of each PWM channels as a output GPIO.
+ * @ppr: The prescaler register.
+ * @csr: The clock selector register.
+ * @pcr: The control register.
+ * @pier: The interrupt enable register.
+ * @piir: The interrupt indication register.
+ */
+struct NPCM7xxPWMState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+
+    Clock       *clock;
+    NPCM7xxPWM  pwm[NPCM7XX_PWM_PER_MODULE];
+    qemu_irq    duty_gpio_out[NPCM7XX_PWM_PER_MODULE];
+
+    uint32_t    ppr;
+    uint32_t    csr;
+    uint32_t    pcr;
+    uint32_t    pier;
+    uint32_t    piir;
+};
+
+#define TYPE_NPCM7XX_PWM "npcm7xx-pwm"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxPWMState, NPCM7XX_PWM)
+
+#endif /* NPCM7XX_PWM_H */
diff --git a/include/hw/misc/npcm7xx_rng.h b/include/hw/misc/npcm7xx_rng.h
new file mode 100644 (file)
index 0000000..650375d
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Nuvoton NPCM7xx Random Number Generator.
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM7XX_RNG_H
+#define NPCM7XX_RNG_H
+
+#include "hw/sysbus.h"
+
+struct NPCM7xxRNGState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+
+    uint8_t rngcs;
+    uint8_t rngd;
+    uint8_t rngmode;
+};
+
+#define TYPE_NPCM7XX_RNG "npcm7xx-rng"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxRNGState, NPCM7XX_RNG)
+
+#endif /* NPCM7XX_RNG_H */
diff --git a/include/hw/misc/nrf51_rng.h b/include/hw/misc/nrf51_rng.h
new file mode 100644 (file)
index 0000000..9aff9a7
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * nRF51 Random Number Generator
+ *
+ * QEMU interface:
+ * + Property "period_unfiltered_us": Time between two biased values in
+ *   microseconds.
+ * + Property "period_filtered_us": Time between two unbiased values in
+ *   microseconds.
+ * + sysbus MMIO regions 0: Memory Region with tasks, events and registers
+ *   to be mapped to the peripherals instance address by the SOC.
+ * + Named GPIO output "irq": Interrupt line of the peripheral. Must be
+ *   connected to the associated peripheral interrupt line of the NVIC.
+ * + Named GPIO output "eep_valrdy": Event set when new random value is ready
+ *   to be read.
+ * + Named GPIO input "tep_start": Task that triggers start of continuous
+ *   generation of random values.
+ * + Named GPIO input "tep_stop": Task that ends continuous generation of
+ *   random values.
+ *
+ * Accuracy of the peripheral model:
+ * + Stochastic properties of different configurations of the random source
+ *   are not modeled.
+ * + Generation of unfiltered and filtered random values take at least the
+ *   average generation time stated in the production specification;
+ *   non-deterministic generation times are not modeled.
+ *
+ * Copyright 2018 Steffen Görtz <contrib@steffen-goertz.de>
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef NRF51_RNG_H
+#define NRF51_RNG_H
+
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+#define TYPE_NRF51_RNG "nrf51_soc.rng"
+OBJECT_DECLARE_SIMPLE_TYPE(NRF51RNGState, NRF51_RNG)
+
+#define NRF51_RNG_SIZE         0x1000
+
+#define NRF51_RNG_TASK_START   0x000
+#define NRF51_RNG_TASK_STOP    0x004
+#define NRF51_RNG_EVENT_VALRDY 0x100
+#define NRF51_RNG_REG_SHORTS   0x200
+#define NRF51_RNG_REG_SHORTS_VALRDY_STOP 0
+#define NRF51_RNG_REG_INTEN    0x300
+#define NRF51_RNG_REG_INTEN_VALRDY 0
+#define NRF51_RNG_REG_INTENSET 0x304
+#define NRF51_RNG_REG_INTENCLR 0x308
+#define NRF51_RNG_REG_CONFIG   0x504
+#define NRF51_RNG_REG_CONFIG_DECEN 0
+#define NRF51_RNG_REG_VALUE    0x508
+
+struct NRF51RNGState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mmio;
+    qemu_irq irq;
+
+    /* Event End Points */
+    qemu_irq eep_valrdy;
+
+    QEMUTimer timer;
+
+    /* Time between generation of successive unfiltered values in us */
+    uint16_t period_unfiltered_us;
+    /* Time between generation of successive filtered values in us */
+    uint16_t period_filtered_us;
+
+    uint8_t value;
+
+    uint32_t active;
+    uint32_t event_valrdy;
+    uint32_t shortcut_stop_on_valrdy;
+    uint32_t interrupt_enabled;
+    uint32_t filter_enabled;
+
+};
+
+
+#endif /* NRF51_RNG_H */
diff --git a/include/hw/misc/pca9552.h b/include/hw/misc/pca9552.h
new file mode 100644 (file)
index 0000000..b6f4e26
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * PCA9552 I2C LED blinker
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+#ifndef PCA9552_H
+#define PCA9552_H
+
+#include "hw/i2c/i2c.h"
+#include "qom/object.h"
+
+#define TYPE_PCA9552 "pca9552"
+#define TYPE_PCA955X "pca955x"
+typedef struct PCA955xState PCA955xState;
+DECLARE_INSTANCE_CHECKER(PCA955xState, PCA955X,
+                         TYPE_PCA955X)
+
+#define PCA955X_NR_REGS 10
+#define PCA955X_PIN_COUNT_MAX 16
+
+struct PCA955xState {
+    /*< private >*/
+    I2CSlave i2c;
+    /*< public >*/
+
+    uint8_t len;
+    uint8_t pointer;
+
+    uint8_t regs[PCA955X_NR_REGS];
+    qemu_irq gpio[PCA955X_PIN_COUNT_MAX];
+    char *description; /* For debugging purpose only */
+};
+
+#endif
diff --git a/include/hw/misc/pca9552_regs.h b/include/hw/misc/pca9552_regs.h
new file mode 100644 (file)
index 0000000..d8051cf
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * PCA9552 I2C LED blinker registers
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+#ifndef PCA9552_REGS_H
+#define PCA9552_REGS_H
+
+/*
+ * Bits [0:3] are used to address a specific register.
+ */
+#define PCA9552_INPUT0   0 /* read only input register 0 */
+#define PCA9552_INPUT1   1 /* read only input register 1  */
+#define PCA9552_PSC0     2 /* read/write frequency prescaler 0 */
+#define PCA9552_PWM0     3 /* read/write PWM register 0 */
+#define PCA9552_PSC1     4 /* read/write frequency prescaler 1 */
+#define PCA9552_PWM1     5 /* read/write PWM register 1 */
+#define PCA9552_LS0      6 /* read/write LED0 to LED3 selector */
+#define PCA9552_LS1      7 /* read/write LED4 to LED7 selector */
+#define PCA9552_LS2      8 /* read/write LED8 to LED11 selector */
+#define PCA9552_LS3      9 /* read/write LED12 to LED15 selector */
+
+/*
+ * Bit [4] is used to activate the Auto-Increment option of the
+ * register address
+ */
+#define PCA9552_AUTOINC  (1 << 4)
+
+#endif
diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h
new file mode 100644 (file)
index 0000000..fab9416
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * QEMU simulated pvpanic device.
+ *
+ * Copyright Fujitsu, Corp. 2013
+ *
+ * Authors:
+ *     Wen Congyang <wency@cn.fujitsu.com>
+ *     Hu Tao <hutao@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_MISC_PVPANIC_H
+#define HW_MISC_PVPANIC_H
+
+#include "exec/memory.h"
+#include "qom/object.h"
+
+#define TYPE_PVPANIC_ISA_DEVICE "pvpanic"
+#define TYPE_PVPANIC_PCI_DEVICE "pvpanic-pci"
+
+#define PVPANIC_IOPORT_PROP "ioport"
+
+/*
+ * PVPanicState for any device type
+ */
+typedef struct PVPanicState PVPanicState;
+struct PVPanicState {
+    MemoryRegion mr;
+    uint8_t events;
+};
+
+void pvpanic_setup_io(PVPanicState *s, DeviceState *dev, unsigned size);
+
+#endif
diff --git a/include/hw/misc/sifive_e_aon.h b/include/hw/misc/sifive_e_aon.h
new file mode 100644 (file)
index 0000000..2ae1c41
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * SiFive HiFive1 AON (Always On Domain) interface.
+ *
+ * Copyright (c) 2022 SiFive, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_AON_H
+#define HW_SIFIVE_AON_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_SIFIVE_E_AON "riscv.sifive.e.aon"
+OBJECT_DECLARE_SIMPLE_TYPE(SiFiveEAONState, SIFIVE_E_AON)
+
+#define SIFIVE_E_AON_WDOGKEY (0x51F15E)
+#define SIFIVE_E_AON_WDOGFEED (0xD09F00D)
+#define SIFIVE_E_LFCLK_DEFAULT_FREQ (32768)
+
+enum {
+    SIFIVE_E_AON_WDT    = 0x0,
+    SIFIVE_E_AON_RTC    = 0x40,
+    SIFIVE_E_AON_LFROSC = 0x70,
+    SIFIVE_E_AON_BACKUP = 0x80,
+    SIFIVE_E_AON_PMU    = 0x100,
+    SIFIVE_E_AON_MAX    = 0x150
+};
+
+struct SiFiveEAONState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion mmio;
+
+    /*< watchdog timer >*/
+    QEMUTimer *wdog_timer;
+    qemu_irq wdog_irq;
+    uint64_t wdog_restart_time;
+    uint64_t wdogclk_freq;
+
+    uint32_t wdogcfg;
+    uint16_t wdogcmp0;
+    uint32_t wdogcount;
+    uint8_t wdogunlock;
+};
+
+#endif
diff --git a/include/hw/misc/sifive_e_prci.h b/include/hw/misc/sifive_e_prci.h
new file mode 100644 (file)
index 0000000..6aa949e
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * QEMU SiFive E PRCI (Power, Reset, Clock, Interrupt) interface
+ *
+ * Copyright (c) 2017 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_E_PRCI_H
+#define HW_SIFIVE_E_PRCI_H
+
+#include "hw/sysbus.h"
+
+enum {
+    SIFIVE_E_PRCI_HFROSCCFG = 0x0,
+    SIFIVE_E_PRCI_HFXOSCCFG = 0x4,
+    SIFIVE_E_PRCI_PLLCFG    = 0x8,
+    SIFIVE_E_PRCI_PLLOUTDIV = 0xC
+};
+
+enum {
+    SIFIVE_E_PRCI_HFROSCCFG_RDY = (1 << 31),
+    SIFIVE_E_PRCI_HFROSCCFG_EN  = (1 << 30)
+};
+
+enum {
+    SIFIVE_E_PRCI_HFXOSCCFG_RDY = (1 << 31),
+    SIFIVE_E_PRCI_HFXOSCCFG_EN  = (1 << 30)
+};
+
+enum {
+    SIFIVE_E_PRCI_PLLCFG_PLLSEL = (1 << 16),
+    SIFIVE_E_PRCI_PLLCFG_REFSEL = (1 << 17),
+    SIFIVE_E_PRCI_PLLCFG_BYPASS = (1 << 18),
+    SIFIVE_E_PRCI_PLLCFG_LOCK   = (1 << 31)
+};
+
+enum {
+    SIFIVE_E_PRCI_PLLOUTDIV_DIV1 = (1 << 8)
+};
+
+#define SIFIVE_E_PRCI_REG_SIZE  0x1000
+
+#define TYPE_SIFIVE_E_PRCI      "riscv.sifive.e.prci"
+
+typedef struct SiFiveEPRCIState SiFiveEPRCIState;
+DECLARE_INSTANCE_CHECKER(SiFiveEPRCIState, SIFIVE_E_PRCI,
+                         TYPE_SIFIVE_E_PRCI)
+
+struct SiFiveEPRCIState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion mmio;
+    uint32_t hfrosccfg;
+    uint32_t hfxosccfg;
+    uint32_t pllcfg;
+    uint32_t plloutdiv;
+};
+
+DeviceState *sifive_e_prci_create(hwaddr addr);
+
+#endif
diff --git a/include/hw/misc/sifive_test.h b/include/hw/misc/sifive_test.h
new file mode 100644 (file)
index 0000000..88a38d0
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * QEMU Test Finisher interface
+ *
+ * Copyright (c) 2018 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_TEST_H
+#define HW_SIFIVE_TEST_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_SIFIVE_TEST "riscv.sifive.test"
+
+typedef struct SiFiveTestState SiFiveTestState;
+DECLARE_INSTANCE_CHECKER(SiFiveTestState, SIFIVE_TEST,
+                         TYPE_SIFIVE_TEST)
+
+struct SiFiveTestState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion mmio;
+};
+
+enum {
+    FINISHER_FAIL = 0x3333,
+    FINISHER_PASS = 0x5555,
+    FINISHER_RESET = 0x7777
+};
+
+DeviceState *sifive_test_create(hwaddr addr);
+
+#endif
diff --git a/include/hw/misc/sifive_u_otp.h b/include/hw/misc/sifive_u_otp.h
new file mode 100644 (file)
index 0000000..170d214
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * QEMU SiFive U OTP (One-Time Programmable) Memory interface
+ *
+ * Copyright (c) 2019 Bin Meng <bmeng.cn@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_U_OTP_H
+#define HW_SIFIVE_U_OTP_H
+
+#include "hw/sysbus.h"
+
+#define SIFIVE_U_OTP_PA         0x00
+#define SIFIVE_U_OTP_PAIO       0x04
+#define SIFIVE_U_OTP_PAS        0x08
+#define SIFIVE_U_OTP_PCE        0x0C
+#define SIFIVE_U_OTP_PCLK       0x10
+#define SIFIVE_U_OTP_PDIN       0x14
+#define SIFIVE_U_OTP_PDOUT      0x18
+#define SIFIVE_U_OTP_PDSTB      0x1C
+#define SIFIVE_U_OTP_PPROG      0x20
+#define SIFIVE_U_OTP_PTC        0x24
+#define SIFIVE_U_OTP_PTM        0x28
+#define SIFIVE_U_OTP_PTM_REP    0x2C
+#define SIFIVE_U_OTP_PTR        0x30
+#define SIFIVE_U_OTP_PTRIM      0x34
+#define SIFIVE_U_OTP_PWE        0x38
+
+#define SIFIVE_U_OTP_PWE_EN     (1 << 0)
+
+#define SIFIVE_U_OTP_PCE_EN     (1 << 0)
+
+#define SIFIVE_U_OTP_PDSTB_EN   (1 << 0)
+
+#define SIFIVE_U_OTP_PTRIM_EN   (1 << 0)
+
+#define SIFIVE_U_OTP_PA_MASK        0xfff
+#define SIFIVE_U_OTP_NUM_FUSES      0x1000
+#define SIFIVE_U_OTP_FUSE_WORD      4
+#define SIFIVE_U_OTP_SERIAL_ADDR    0xfc
+
+#define SIFIVE_U_OTP_REG_SIZE       0x1000
+
+#define TYPE_SIFIVE_U_OTP           "riscv.sifive.u.otp"
+
+typedef struct SiFiveUOTPState SiFiveUOTPState;
+DECLARE_INSTANCE_CHECKER(SiFiveUOTPState, SIFIVE_U_OTP,
+                         TYPE_SIFIVE_U_OTP)
+
+struct SiFiveUOTPState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion mmio;
+    uint32_t pa;
+    uint32_t paio;
+    uint32_t pas;
+    uint32_t pce;
+    uint32_t pclk;
+    uint32_t pdin;
+    uint32_t pdstb;
+    uint32_t pprog;
+    uint32_t ptc;
+    uint32_t ptm;
+    uint32_t ptm_rep;
+    uint32_t ptr;
+    uint32_t ptrim;
+    uint32_t pwe;
+    uint32_t fuse[SIFIVE_U_OTP_NUM_FUSES];
+    uint32_t fuse_wo[SIFIVE_U_OTP_NUM_FUSES];
+    /* config */
+    uint32_t serial;
+    BlockBackend *blk;
+};
+
+#endif /* HW_SIFIVE_U_OTP_H */
diff --git a/include/hw/misc/sifive_u_prci.h b/include/hw/misc/sifive_u_prci.h
new file mode 100644 (file)
index 0000000..4d2491a
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ * QEMU SiFive U PRCI (Power, Reset, Clock, Interrupt) interface
+ *
+ * Copyright (c) 2019 Bin Meng <bmeng.cn@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_U_PRCI_H
+#define HW_SIFIVE_U_PRCI_H
+
+#include "hw/sysbus.h"
+
+#define SIFIVE_U_PRCI_HFXOSCCFG     0x00
+#define SIFIVE_U_PRCI_COREPLLCFG0   0x04
+#define SIFIVE_U_PRCI_DDRPLLCFG0    0x0C
+#define SIFIVE_U_PRCI_DDRPLLCFG1    0x10
+#define SIFIVE_U_PRCI_GEMGXLPLLCFG0 0x1C
+#define SIFIVE_U_PRCI_GEMGXLPLLCFG1 0x20
+#define SIFIVE_U_PRCI_CORECLKSEL    0x24
+#define SIFIVE_U_PRCI_DEVICESRESET  0x28
+#define SIFIVE_U_PRCI_CLKMUXSTATUS  0x2C
+
+/*
+ * Current FU540-C000 manual says ready bit is at bit 29, but
+ * freedom-u540-c000-bootloader codes (ux00prci.h) says it is at bit 31.
+ * We have to trust the actual code that works.
+ *
+ * see https://github.com/sifive/freedom-u540-c000-bootloader
+ */
+
+#define SIFIVE_U_PRCI_HFXOSCCFG_EN  (1 << 30)
+#define SIFIVE_U_PRCI_HFXOSCCFG_RDY (1 << 31)
+
+/* xxxPLLCFG0 register bits */
+#define SIFIVE_U_PRCI_PLLCFG0_DIVR  (1 << 0)
+#define SIFIVE_U_PRCI_PLLCFG0_DIVF  (31 << 6)
+#define SIFIVE_U_PRCI_PLLCFG0_DIVQ  (3 << 15)
+#define SIFIVE_U_PRCI_PLLCFG0_FSE   (1 << 25)
+#define SIFIVE_U_PRCI_PLLCFG0_LOCK  (1 << 31)
+
+/* xxxPLLCFG1 register bits */
+#define SIFIVE_U_PRCI_PLLCFG1_CKE   (1 << 24)
+
+/* coreclksel register bits */
+#define SIFIVE_U_PRCI_CORECLKSEL_HFCLK  (1 << 0)
+
+
+#define SIFIVE_U_PRCI_REG_SIZE  0x1000
+
+#define TYPE_SIFIVE_U_PRCI      "riscv.sifive.u.prci"
+
+typedef struct SiFiveUPRCIState SiFiveUPRCIState;
+DECLARE_INSTANCE_CHECKER(SiFiveUPRCIState, SIFIVE_U_PRCI,
+                         TYPE_SIFIVE_U_PRCI)
+
+struct SiFiveUPRCIState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion mmio;
+    uint32_t hfxosccfg;
+    uint32_t corepllcfg0;
+    uint32_t ddrpllcfg0;
+    uint32_t ddrpllcfg1;
+    uint32_t gemgxlpllcfg0;
+    uint32_t gemgxlpllcfg1;
+    uint32_t coreclksel;
+    uint32_t devicesreset;
+    uint32_t clkmuxstatus;
+};
+
+/*
+ * Clock indexes for use by Device Tree data and the PRCI driver.
+ *
+ * These values are from sifive-fu540-prci.h in the Linux kernel.
+ */
+#define PRCI_CLK_COREPLL        0
+#define PRCI_CLK_DDRPLL         1
+#define PRCI_CLK_GEMGXLPLL      2
+#define PRCI_CLK_TLCLK          3
+
+#endif /* HW_SIFIVE_U_PRCI_H */
diff --git a/include/hw/misc/stm32f2xx_syscfg.h b/include/hw/misc/stm32f2xx_syscfg.h
new file mode 100644 (file)
index 0000000..8595a3b
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * STM32F2XX SYSCFG
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_STM32F2XX_SYSCFG_H
+#define HW_STM32F2XX_SYSCFG_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define SYSCFG_MEMRMP  0x00
+#define SYSCFG_PMC     0x04
+#define SYSCFG_EXTICR1 0x08
+#define SYSCFG_EXTICR2 0x0C
+#define SYSCFG_EXTICR3 0x10
+#define SYSCFG_EXTICR4 0x14
+#define SYSCFG_CMPCR   0x20
+
+#define TYPE_STM32F2XX_SYSCFG "stm32f2xx-syscfg"
+OBJECT_DECLARE_SIMPLE_TYPE(STM32F2XXSyscfgState, STM32F2XX_SYSCFG)
+
+struct STM32F2XXSyscfgState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint32_t syscfg_memrmp;
+    uint32_t syscfg_pmc;
+    uint32_t syscfg_exticr1;
+    uint32_t syscfg_exticr2;
+    uint32_t syscfg_exticr3;
+    uint32_t syscfg_exticr4;
+    uint32_t syscfg_cmpcr;
+};
+
+#endif /* HW_STM32F2XX_SYSCFG_H */
diff --git a/include/hw/misc/stm32f4xx_exti.h b/include/hw/misc/stm32f4xx_exti.h
new file mode 100644 (file)
index 0000000..fc11c59
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * STM32F4XX EXTI
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_STM32F4XX_EXTI_H
+#define HW_STM32F4XX_EXTI_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define EXTI_IMR   0x00
+#define EXTI_EMR   0x04
+#define EXTI_RTSR  0x08
+#define EXTI_FTSR  0x0C
+#define EXTI_SWIER 0x10
+#define EXTI_PR    0x14
+
+#define TYPE_STM32F4XX_EXTI "stm32f4xx-exti"
+OBJECT_DECLARE_SIMPLE_TYPE(STM32F4xxExtiState, STM32F4XX_EXTI)
+
+#define NUM_GPIO_EVENT_IN_LINES 16
+#define NUM_INTERRUPT_OUT_LINES 16
+
+struct STM32F4xxExtiState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mmio;
+
+    uint32_t exti_imr;
+    uint32_t exti_emr;
+    uint32_t exti_rtsr;
+    uint32_t exti_ftsr;
+    uint32_t exti_swier;
+    uint32_t exti_pr;
+
+    qemu_irq irq[NUM_INTERRUPT_OUT_LINES];
+};
+
+#endif
diff --git a/include/hw/misc/stm32f4xx_syscfg.h b/include/hw/misc/stm32f4xx_syscfg.h
new file mode 100644 (file)
index 0000000..9fce67f
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * STM32F4xx SYSCFG
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_STM32F4XX_SYSCFG_H
+#define HW_STM32F4XX_SYSCFG_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define SYSCFG_MEMRMP  0x00
+#define SYSCFG_PMC     0x04
+#define SYSCFG_EXTICR1 0x08
+#define SYSCFG_EXTICR2 0x0C
+#define SYSCFG_EXTICR3 0x10
+#define SYSCFG_EXTICR4 0x14
+#define SYSCFG_CMPCR   0x20
+
+#define TYPE_STM32F4XX_SYSCFG "stm32f4xx-syscfg"
+OBJECT_DECLARE_SIMPLE_TYPE(STM32F4xxSyscfgState, STM32F4XX_SYSCFG)
+
+#define SYSCFG_NUM_EXTICR 4
+
+struct STM32F4xxSyscfgState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint32_t syscfg_memrmp;
+    uint32_t syscfg_pmc;
+    uint32_t syscfg_exticr[SYSCFG_NUM_EXTICR];
+    uint32_t syscfg_cmpcr;
+
+    qemu_irq irq;
+    qemu_irq gpio_out[16];
+};
+
+#endif
diff --git a/include/hw/misc/tz-mpc.h b/include/hw/misc/tz-mpc.h
new file mode 100644 (file)
index 0000000..74d5d82
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * ARM AHB5 TrustZone Memory Protection Controller emulation
+ *
+ * Copyright (c) 2018 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/* This is a model of the TrustZone memory protection controller (MPC).
+ * It is documented in the ARM CoreLink SIE-200 System IP for Embedded TRM
+ * (DDI 0571G):
+ * https://developer.arm.com/products/architecture/m-profile/docs/ddi0571/g
+ *
+ * The MPC sits in front of memory and allows secure software to
+ * configure it to either pass through or reject transactions.
+ * Rejected transactions may be configured to either be aborted, or to
+ * behave as RAZ/WI. An interrupt can be signalled for a rejected transaction.
+ *
+ * The MPC has a register interface which the guest uses to configure it.
+ *
+ * QEMU interface:
+ * + sysbus MMIO region 0: MemoryRegion for the MPC's config registers
+ * + sysbus MMIO region 1: MemoryRegion for the upstream end of the MPC
+ * + Property "downstream": MemoryRegion defining the downstream memory
+ * + Named GPIO output "irq": set for a transaction-failed interrupt
+ */
+
+#ifndef TZ_MPC_H
+#define TZ_MPC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_TZ_MPC "tz-mpc"
+OBJECT_DECLARE_SIMPLE_TYPE(TZMPC, TZ_MPC)
+
+#define TZ_NUM_PORTS 16
+
+#define TYPE_TZ_MPC_IOMMU_MEMORY_REGION "tz-mpc-iommu-memory-region"
+
+
+struct TZMPC {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+
+    /* State */
+    uint32_t ctrl;
+    uint32_t blk_idx;
+    uint32_t int_stat;
+    uint32_t int_en;
+    uint32_t int_info1;
+    uint32_t int_info2;
+
+    uint32_t *blk_lut;
+
+    qemu_irq irq;
+
+    /* Properties */
+    MemoryRegion *downstream;
+
+    hwaddr blocksize;
+    uint32_t blk_max;
+
+    /* MemoryRegions exposed to user */
+    MemoryRegion regmr;
+    IOMMUMemoryRegion upstream;
+
+    /* MemoryRegion used internally */
+    MemoryRegion blocked_io;
+
+    AddressSpace downstream_as;
+    AddressSpace blocked_io_as;
+};
+
+#endif
diff --git a/include/hw/misc/tz-msc.h b/include/hw/misc/tz-msc.h
new file mode 100644 (file)
index 0000000..77cc7f2
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * ARM TrustZone master security controller emulation
+ *
+ * Copyright (c) 2018 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/*
+ * This is a model of the TrustZone master security controller (MSC).
+ * It is documented in the ARM CoreLink SIE-200 System IP for Embedded TRM
+ * (DDI 0571G):
+ * https://developer.arm.com/products/architecture/m-profile/docs/ddi0571/g
+ *
+ * The MSC sits in front of a device which can be a bus master (such as
+ * a DMA controller) and allows secure software to configure it to either
+ * pass through or reject transactions made by that bus master.
+ * Rejected transactions may be configured to either be aborted, or to
+ * behave as RAZ/WI. An interrupt can be signalled for a rejected transaction.
+ *
+ * The MSC has no register interface -- it is configured purely by a
+ * collection of input signals from other hardware in the system. Typically
+ * they are either hardwired or exposed in an ad-hoc register interface by
+ * the SoC that uses the MSC.
+ *
+ * We don't currently implement the irq_enable GPIO input, because on
+ * the MPS2 FPGA images it is always tied high, which is awkward to
+ * implement in QEMU.
+ *
+ * QEMU interface:
+ * + Named GPIO input "cfg_nonsec": set to 1 if the bus master should be
+ *   treated as nonsecure, or 0 for secure
+ * + Named GPIO input "cfg_sec_resp": set to 1 if a rejected transaction should
+ *   result in a transaction error, or 0 for the transaction to RAZ/WI
+ * + Named GPIO input "irq_clear": set to 1 to clear a pending interrupt
+ * + Named GPIO output "irq": set for a transaction-failed interrupt
+ * + Property "downstream": MemoryRegion defining where bus master transactions
+ *   are made if they are not blocked
+ * + Property "idau": an object implementing IDAUInterface, which defines which
+ *   addresses should be treated as secure and which as non-secure.
+ *   This need not be the same IDAU as the one used by the CPU.
+ * + sysbus MMIO region 0: MemoryRegion defining the upstream end of the MSC;
+ *   this should be passed to the bus master device as the region it should
+ *   make memory transactions to
+ */
+
+#ifndef TZ_MSC_H
+#define TZ_MSC_H
+
+#include "hw/sysbus.h"
+#include "target/arm/idau.h"
+#include "qom/object.h"
+
+#define TYPE_TZ_MSC "tz-msc"
+OBJECT_DECLARE_SIMPLE_TYPE(TZMSC, TZ_MSC)
+
+struct TZMSC {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+
+    /* State: these just track the values of our input signals */
+    bool cfg_nonsec;
+    bool cfg_sec_resp;
+    bool irq_clear;
+    /* State: are we asserting irq ? */
+    bool irq_status;
+
+    qemu_irq irq;
+    MemoryRegion *downstream;
+    AddressSpace downstream_as;
+    MemoryRegion upstream;
+    IDAUInterface *idau;
+};
+
+#endif
diff --git a/include/hw/misc/tz-ppc.h b/include/hw/misc/tz-ppc.h
new file mode 100644 (file)
index 0000000..021d671
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * ARM TrustZone peripheral protection controller emulation
+ *
+ * Copyright (c) 2018 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/* This is a model of the TrustZone peripheral protection controller (PPC).
+ * It is documented in the ARM CoreLink SIE-200 System IP for Embedded TRM
+ * (DDI 0571G):
+ * https://developer.arm.com/products/architecture/m-profile/docs/ddi0571/g
+ *
+ * The PPC sits in front of peripherals and allows secure software to
+ * configure it to either pass through or reject transactions.
+ * Rejected transactions may be configured to either be aborted, or to
+ * behave as RAZ/WI. An interrupt can be signalled for a rejected transaction.
+ *
+ * The PPC has no register interface -- it is configured purely by a
+ * collection of input signals from other hardware in the system. Typically
+ * they are either hardwired or exposed in an ad-hoc register interface by
+ * the SoC that uses the PPC.
+ *
+ * This QEMU model can be used to model either the AHB5 or APB4 TZ PPC,
+ * since the only difference between them is that the AHB version has a
+ * "default" port which has no security checks applied. In QEMU the default
+ * port can be emulated simply by wiring its downstream devices directly
+ * into the parent address space, since the PPC does not need to intercept
+ * transactions there.
+ *
+ * In the hardware, selection of which downstream port to use is done by
+ * the user's decode logic asserting one of the hsel[] signals. In QEMU,
+ * we provide 16 MMIO regions, one per port, and the user maps these into
+ * the desired addresses to implement the address decode.
+ *
+ * QEMU interface:
+ * + sysbus MMIO regions 0..15: MemoryRegions defining the upstream end
+ *   of each of the 16 ports of the PPC. When a port is unused (i.e. no
+ *   downstream MemoryRegion is connected to it) at the end of the 0..15
+ *   range then no sysbus MMIO region is created for its upstream. When an
+ *   unused port lies in the middle of the range with other used ports at
+ *   higher port numbers, a dummy MMIO region is created to ensure that
+ *   port N's upstream is always sysbus MMIO region N. Dummy regions should
+ *   not be mapped, and will assert if any access is made to them.
+ * + Property "port[0..15]": MemoryRegion defining the downstream device(s)
+ *   for each of the 16 ports of the PPC
+ * + Named GPIO inputs "cfg_nonsec[0..15]": set to 1 if the port should be
+ *   accessible to NonSecure transactions
+ * + Named GPIO inputs "cfg_ap[0..15]": set to 1 if the port should be
+ *   accessible to non-privileged transactions
+ * + Named GPIO input "cfg_sec_resp": set to 1 if a rejected transaction should
+ *   result in a transaction error, or 0 for the transaction to RAZ/WI
+ * + Named GPIO input "irq_enable": set to 1 to enable interrupts
+ * + Named GPIO input "irq_clear": set to 1 to clear a pending interrupt
+ * + Named GPIO output "irq": set for a transaction-failed interrupt
+ * + Property "NONSEC_MASK": if a bit is set in this mask then accesses to
+ *   the associated port do not have the TZ security check performed. (This
+ *   corresponds to the hardware allowing this to be set as a Verilog
+ *   parameter.)
+ */
+
+#ifndef TZ_PPC_H
+#define TZ_PPC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_TZ_PPC "tz-ppc"
+OBJECT_DECLARE_SIMPLE_TYPE(TZPPC, TZ_PPC)
+
+#define TZ_NUM_PORTS 16
+
+
+typedef struct TZPPCPort {
+    TZPPC *ppc;
+    MemoryRegion upstream;
+    AddressSpace downstream_as;
+    MemoryRegion *downstream;
+} TZPPCPort;
+
+struct TZPPC {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+
+    /* State: these just track the values of our input signals */
+    bool cfg_nonsec[TZ_NUM_PORTS];
+    bool cfg_ap[TZ_NUM_PORTS];
+    bool cfg_sec_resp;
+    bool irq_enable;
+    bool irq_clear;
+    /* State: are we asserting irq ? */
+    bool irq_status;
+
+    qemu_irq irq;
+
+    /* Properties */
+    uint32_t nonsec_mask;
+
+    TZPPCPort port[TZ_NUM_PORTS];
+};
+
+#endif
diff --git a/include/hw/misc/unimp.h b/include/hw/misc/unimp.h
new file mode 100644 (file)
index 0000000..518d627
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * "Unimplemented" device
+ *
+ * Copyright Linaro Limited, 2017
+ * Written by Peter Maydell
+ */
+
+#ifndef HW_MISC_UNIMP_H
+#define HW_MISC_UNIMP_H
+
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "qapi/error.h"
+#include "qom/object.h"
+
+#define TYPE_UNIMPLEMENTED_DEVICE "unimplemented-device"
+
+OBJECT_DECLARE_SIMPLE_TYPE(UnimplementedDeviceState, UNIMPLEMENTED_DEVICE)
+
+struct UnimplementedDeviceState {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    unsigned offset_fmt_width;
+    char *name;
+    uint64_t size;
+};
+
+/**
+ * create_unimplemented_device: create and map a dummy device
+ * @name: name of the device for debug logging
+ * @base: base address of the device's MMIO region
+ * @size: size of the device's MMIO region
+ *
+ * This utility function creates and maps an instance of unimplemented-device,
+ * which is a dummy device which simply logs all guest accesses to
+ * it via the qemu_log LOG_UNIMP debug log.
+ * The device is mapped at priority -1000, which means that you can
+ * use it to cover a large region and then map other devices on top of it
+ * if necessary.
+ */
+static inline void create_unimplemented_device(const char *name,
+                                               hwaddr base,
+                                               hwaddr size)
+{
+    DeviceState *dev = qdev_new(TYPE_UNIMPLEMENTED_DEVICE);
+
+    qdev_prop_set_string(dev, "name", name);
+    qdev_prop_set_uint64(dev, "size", size);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+    sysbus_mmio_map_overlap(SYS_BUS_DEVICE(dev), 0, base, -1000);
+}
+
+#endif
diff --git a/include/hw/misc/virt_ctrl.h b/include/hw/misc/virt_ctrl.h
new file mode 100644 (file)
index 0000000..81346cf
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Virt system Controller
+ */
+
+#ifndef VIRT_CTRL_H
+#define VIRT_CTRL_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_VIRT_CTRL "virt-ctrl"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtCtrlState, VIRT_CTRL)
+
+struct VirtCtrlState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    uint32_t irq_enabled;
+};
+
+#endif
diff --git a/include/hw/misc/vmcoreinfo.h b/include/hw/misc/vmcoreinfo.h
new file mode 100644 (file)
index 0000000..0b7b55d
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Virtual Machine coreinfo device
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Authors: Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef VMCOREINFO_H
+#define VMCOREINFO_H
+
+#include "hw/qdev-core.h"
+#include "standard-headers/linux/qemu_fw_cfg.h"
+#include "qom/object.h"
+
+#define VMCOREINFO_DEVICE "vmcoreinfo"
+typedef struct VMCoreInfoState VMCoreInfoState;
+DECLARE_INSTANCE_CHECKER(VMCoreInfoState, VMCOREINFO,
+                         VMCOREINFO_DEVICE)
+
+typedef struct fw_cfg_vmcoreinfo FWCfgVMCoreInfo;
+
+struct VMCoreInfoState {
+    DeviceState parent_obj;
+
+    bool has_vmcoreinfo;
+    FWCfgVMCoreInfo vmcoreinfo;
+};
+
+/* returns NULL unless there is exactly one device */
+static inline VMCoreInfoState *vmcoreinfo_find(void)
+{
+    Object *o = object_resolve_path_type("", VMCOREINFO_DEVICE, NULL);
+
+    return o ? VMCOREINFO(o) : NULL;
+}
+
+#endif
diff --git a/include/hw/misc/xlnx-cfi-if.h b/include/hw/misc/xlnx-cfi-if.h
new file mode 100644 (file)
index 0000000..f9bd122
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Xilinx CFI interface
+ *
+ * Copyright (C) 2023, Advanced Micro Devices, Inc.
+ *
+ * Written by Francisco Iglesias <francisco.iglesias@amd.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef XLNX_CFI_IF_H
+#define XLNX_CFI_IF_H 1
+
+#include "qemu/help-texts.h"
+#include "hw/hw.h"
+#include "qom/object.h"
+
+#define TYPE_XLNX_CFI_IF "xlnx-cfi-if"
+typedef struct XlnxCfiIfClass XlnxCfiIfClass;
+DECLARE_CLASS_CHECKERS(XlnxCfiIfClass, XLNX_CFI_IF, TYPE_XLNX_CFI_IF)
+
+#define XLNX_CFI_IF(obj) \
+     INTERFACE_CHECK(XlnxCfiIf, (obj), TYPE_XLNX_CFI_IF)
+
+typedef enum {
+    PACKET_TYPE_CFU = 0x52,
+    PACKET_TYPE_CFRAME = 0xA1,
+} xlnx_cfi_packet_type;
+
+typedef enum {
+    CFRAME_FAR = 1,
+    CFRAME_SFR = 2,
+    CFRAME_FDRI = 4,
+    CFRAME_CMD = 6,
+} xlnx_cfi_reg_addr;
+
+typedef struct XlnxCfiPacket {
+    uint8_t reg_addr;
+    uint32_t data[4];
+} XlnxCfiPacket;
+
+typedef struct XlnxCfiIf {
+    Object Parent;
+} XlnxCfiIf;
+
+typedef struct XlnxCfiIfClass {
+    InterfaceClass parent;
+
+    void (*cfi_transfer_packet)(XlnxCfiIf *cfi_if, XlnxCfiPacket *pkt);
+} XlnxCfiIfClass;
+
+/**
+ * Transfer a XlnxCfiPacket.
+ *
+ * @cfi_if: the object implementing this interface
+ * @XlnxCfiPacket: a pointer to the XlnxCfiPacket to transfer
+ */
+void xlnx_cfi_transfer_packet(XlnxCfiIf *cfi_if, XlnxCfiPacket *pkt);
+
+#endif /* XLNX_CFI_IF_H */
diff --git a/include/hw/misc/xlnx-versal-cframe-reg.h b/include/hw/misc/xlnx-versal-cframe-reg.h
new file mode 100644 (file)
index 0000000..0091505
--- /dev/null
@@ -0,0 +1,303 @@
+/*
+ * QEMU model of the Configuration Frame Control module
+ *
+ * Copyright (C) 2023, Advanced Micro Devices, Inc.
+ *
+ * Written by Francisco Iglesias <francisco.iglesias@amd.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * References:
+ * [1] Versal ACAP Technical Reference Manual,
+ *     https://www.xilinx.com/support/documentation/architecture-manuals/am011-versal-acap-trm.pdf
+ *
+ * [2] Versal ACAP Register Reference,
+ *     https://docs.xilinx.com/r/en-US/am012-versal-register-reference/CFRAME_REG-Module
+ */
+#ifndef HW_MISC_XLNX_VERSAL_CFRAME_REG_H
+#define HW_MISC_XLNX_VERSAL_CFRAME_REG_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/misc/xlnx-cfi-if.h"
+#include "hw/misc/xlnx-versal-cfu.h"
+#include "qemu/fifo32.h"
+
+#define TYPE_XLNX_VERSAL_CFRAME_REG "xlnx,cframe-reg"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFrameReg, XLNX_VERSAL_CFRAME_REG)
+
+#define TYPE_XLNX_VERSAL_CFRAME_BCAST_REG "xlnx.cframe-bcast-reg"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFrameBcastReg,
+                           XLNX_VERSAL_CFRAME_BCAST_REG)
+
+/*
+ * The registers in this module are 128 bits wide but it is ok to write
+ * and read them through 4 sequential 32 bit accesses (address[3:2] = 0,
+ * 1, 2, 3).
+ */
+REG32(CRC0, 0x0)
+    FIELD(CRC, CRC, 0, 32)
+REG32(CRC1, 0x4)
+REG32(CRC2, 0x8)
+REG32(CRC3, 0xc)
+REG32(FAR0, 0x10)
+    FIELD(FAR0, SEGMENT, 23, 2)
+    FIELD(FAR0, BLOCKTYPE, 20, 3)
+    FIELD(FAR0, FRAME_ADDR, 0, 20)
+REG32(FAR1, 0x14)
+REG32(FAR2, 0x18)
+REG32(FAR3, 0x1c)
+REG32(FAR_SFR0, 0x20)
+    FIELD(FAR_SFR0, BLOCKTYPE, 20, 3)
+    FIELD(FAR_SFR0, FRAME_ADDR, 0, 20)
+REG32(FAR_SFR1, 0x24)
+REG32(FAR_SFR2, 0x28)
+REG32(FAR_SFR3, 0x2c)
+REG32(FDRI0, 0x40)
+REG32(FDRI1, 0x44)
+REG32(FDRI2, 0x48)
+REG32(FDRI3, 0x4c)
+REG32(FRCNT0, 0x50)
+    FIELD(FRCNT0, FRCNT, 0, 32)
+REG32(FRCNT1, 0x54)
+REG32(FRCNT2, 0x58)
+REG32(FRCNT3, 0x5c)
+REG32(CMD0, 0x60)
+    FIELD(CMD0, CMD, 0, 5)
+REG32(CMD1, 0x64)
+REG32(CMD2, 0x68)
+REG32(CMD3, 0x6c)
+REG32(CR_MASK0, 0x70)
+REG32(CR_MASK1, 0x74)
+REG32(CR_MASK2, 0x78)
+REG32(CR_MASK3, 0x7c)
+REG32(CTL0, 0x80)
+    FIELD(CTL, PER_FRAME_CRC, 0, 1)
+REG32(CTL1, 0x84)
+REG32(CTL2, 0x88)
+REG32(CTL3, 0x8c)
+REG32(CFRM_ISR0, 0x150)
+    FIELD(CFRM_ISR0, READ_BROADCAST_ERROR, 21, 1)
+    FIELD(CFRM_ISR0, CMD_MISSING_ERROR, 20, 1)
+    FIELD(CFRM_ISR0, RW_ROWOFF_ERROR, 19, 1)
+    FIELD(CFRM_ISR0, READ_REG_ADDR_ERROR, 18, 1)
+    FIELD(CFRM_ISR0, READ_BLK_TYPE_ERROR, 17, 1)
+    FIELD(CFRM_ISR0, READ_FRAME_ADDR_ERROR, 16, 1)
+    FIELD(CFRM_ISR0, WRITE_REG_ADDR_ERROR, 15, 1)
+    FIELD(CFRM_ISR0, WRITE_BLK_TYPE_ERROR, 13, 1)
+    FIELD(CFRM_ISR0, WRITE_FRAME_ADDR_ERROR, 12, 1)
+    FIELD(CFRM_ISR0, MFW_OVERRUN_ERROR, 11, 1)
+    FIELD(CFRM_ISR0, FAR_FIFO_UNDERFLOW, 10, 1)
+    FIELD(CFRM_ISR0, FAR_FIFO_OVERFLOW, 9, 1)
+    FIELD(CFRM_ISR0, PER_FRAME_SEQ_ERROR, 8, 1)
+    FIELD(CFRM_ISR0, CRC_ERROR, 7, 1)
+    FIELD(CFRM_ISR0, WRITE_OVERRUN_ERROR, 6, 1)
+    FIELD(CFRM_ISR0, READ_OVERRUN_ERROR, 5, 1)
+    FIELD(CFRM_ISR0, CMD_INTERRUPT_ERROR, 4, 1)
+    FIELD(CFRM_ISR0, WRITE_INTERRUPT_ERROR, 3, 1)
+    FIELD(CFRM_ISR0, READ_INTERRUPT_ERROR, 2, 1)
+    FIELD(CFRM_ISR0, SEU_CRC_ERROR, 1, 1)
+    FIELD(CFRM_ISR0, SEU_ECC_ERROR, 0, 1)
+REG32(CFRM_ISR1, 0x154)
+REG32(CFRM_ISR2, 0x158)
+REG32(CFRM_ISR3, 0x15c)
+REG32(CFRM_IMR0, 0x160)
+    FIELD(CFRM_IMR0, READ_BROADCAST_ERROR, 21, 1)
+    FIELD(CFRM_IMR0, CMD_MISSING_ERROR, 20, 1)
+    FIELD(CFRM_IMR0, RW_ROWOFF_ERROR, 19, 1)
+    FIELD(CFRM_IMR0, READ_REG_ADDR_ERROR, 18, 1)
+    FIELD(CFRM_IMR0, READ_BLK_TYPE_ERROR, 17, 1)
+    FIELD(CFRM_IMR0, READ_FRAME_ADDR_ERROR, 16, 1)
+    FIELD(CFRM_IMR0, WRITE_REG_ADDR_ERROR, 15, 1)
+    FIELD(CFRM_IMR0, WRITE_BLK_TYPE_ERROR, 13, 1)
+    FIELD(CFRM_IMR0, WRITE_FRAME_ADDR_ERROR, 12, 1)
+    FIELD(CFRM_IMR0, MFW_OVERRUN_ERROR, 11, 1)
+    FIELD(CFRM_IMR0, FAR_FIFO_UNDERFLOW, 10, 1)
+    FIELD(CFRM_IMR0, FAR_FIFO_OVERFLOW, 9, 1)
+    FIELD(CFRM_IMR0, PER_FRAME_SEQ_ERROR, 8, 1)
+    FIELD(CFRM_IMR0, CRC_ERROR, 7, 1)
+    FIELD(CFRM_IMR0, WRITE_OVERRUN_ERROR, 6, 1)
+    FIELD(CFRM_IMR0, READ_OVERRUN_ERROR, 5, 1)
+    FIELD(CFRM_IMR0, CMD_INTERRUPT_ERROR, 4, 1)
+    FIELD(CFRM_IMR0, WRITE_INTERRUPT_ERROR, 3, 1)
+    FIELD(CFRM_IMR0, READ_INTERRUPT_ERROR, 2, 1)
+    FIELD(CFRM_IMR0, SEU_CRC_ERROR, 1, 1)
+    FIELD(CFRM_IMR0, SEU_ECC_ERROR, 0, 1)
+REG32(CFRM_IMR1, 0x164)
+REG32(CFRM_IMR2, 0x168)
+REG32(CFRM_IMR3, 0x16c)
+REG32(CFRM_IER0, 0x170)
+    FIELD(CFRM_IER0, READ_BROADCAST_ERROR, 21, 1)
+    FIELD(CFRM_IER0, CMD_MISSING_ERROR, 20, 1)
+    FIELD(CFRM_IER0, RW_ROWOFF_ERROR, 19, 1)
+    FIELD(CFRM_IER0, READ_REG_ADDR_ERROR, 18, 1)
+    FIELD(CFRM_IER0, READ_BLK_TYPE_ERROR, 17, 1)
+    FIELD(CFRM_IER0, READ_FRAME_ADDR_ERROR, 16, 1)
+    FIELD(CFRM_IER0, WRITE_REG_ADDR_ERROR, 15, 1)
+    FIELD(CFRM_IER0, WRITE_BLK_TYPE_ERROR, 13, 1)
+    FIELD(CFRM_IER0, WRITE_FRAME_ADDR_ERROR, 12, 1)
+    FIELD(CFRM_IER0, MFW_OVERRUN_ERROR, 11, 1)
+    FIELD(CFRM_IER0, FAR_FIFO_UNDERFLOW, 10, 1)
+    FIELD(CFRM_IER0, FAR_FIFO_OVERFLOW, 9, 1)
+    FIELD(CFRM_IER0, PER_FRAME_SEQ_ERROR, 8, 1)
+    FIELD(CFRM_IER0, CRC_ERROR, 7, 1)
+    FIELD(CFRM_IER0, WRITE_OVERRUN_ERROR, 6, 1)
+    FIELD(CFRM_IER0, READ_OVERRUN_ERROR, 5, 1)
+    FIELD(CFRM_IER0, CMD_INTERRUPT_ERROR, 4, 1)
+    FIELD(CFRM_IER0, WRITE_INTERRUPT_ERROR, 3, 1)
+    FIELD(CFRM_IER0, READ_INTERRUPT_ERROR, 2, 1)
+    FIELD(CFRM_IER0, SEU_CRC_ERROR, 1, 1)
+    FIELD(CFRM_IER0, SEU_ECC_ERROR, 0, 1)
+REG32(CFRM_IER1, 0x174)
+REG32(CFRM_IER2, 0x178)
+REG32(CFRM_IER3, 0x17c)
+REG32(CFRM_IDR0, 0x180)
+    FIELD(CFRM_IDR0, READ_BROADCAST_ERROR, 21, 1)
+    FIELD(CFRM_IDR0, CMD_MISSING_ERROR, 20, 1)
+    FIELD(CFRM_IDR0, RW_ROWOFF_ERROR, 19, 1)
+    FIELD(CFRM_IDR0, READ_REG_ADDR_ERROR, 18, 1)
+    FIELD(CFRM_IDR0, READ_BLK_TYPE_ERROR, 17, 1)
+    FIELD(CFRM_IDR0, READ_FRAME_ADDR_ERROR, 16, 1)
+    FIELD(CFRM_IDR0, WRITE_REG_ADDR_ERROR, 15, 1)
+    FIELD(CFRM_IDR0, WRITE_BLK_TYPE_ERROR, 13, 1)
+    FIELD(CFRM_IDR0, WRITE_FRAME_ADDR_ERROR, 12, 1)
+    FIELD(CFRM_IDR0, MFW_OVERRUN_ERROR, 11, 1)
+    FIELD(CFRM_IDR0, FAR_FIFO_UNDERFLOW, 10, 1)
+    FIELD(CFRM_IDR0, FAR_FIFO_OVERFLOW, 9, 1)
+    FIELD(CFRM_IDR0, PER_FRAME_SEQ_ERROR, 8, 1)
+    FIELD(CFRM_IDR0, CRC_ERROR, 7, 1)
+    FIELD(CFRM_IDR0, WRITE_OVERRUN_ERROR, 6, 1)
+    FIELD(CFRM_IDR0, READ_OVERRUN_ERROR, 5, 1)
+    FIELD(CFRM_IDR0, CMD_INTERRUPT_ERROR, 4, 1)
+    FIELD(CFRM_IDR0, WRITE_INTERRUPT_ERROR, 3, 1)
+    FIELD(CFRM_IDR0, READ_INTERRUPT_ERROR, 2, 1)
+    FIELD(CFRM_IDR0, SEU_CRC_ERROR, 1, 1)
+    FIELD(CFRM_IDR0, SEU_ECC_ERROR, 0, 1)
+REG32(CFRM_IDR1, 0x184)
+REG32(CFRM_IDR2, 0x188)
+REG32(CFRM_IDR3, 0x18c)
+REG32(CFRM_ITR0, 0x190)
+    FIELD(CFRM_ITR0, READ_BROADCAST_ERROR, 21, 1)
+    FIELD(CFRM_ITR0, CMD_MISSING_ERROR, 20, 1)
+    FIELD(CFRM_ITR0, RW_ROWOFF_ERROR, 19, 1)
+    FIELD(CFRM_ITR0, READ_REG_ADDR_ERROR, 18, 1)
+    FIELD(CFRM_ITR0, READ_BLK_TYPE_ERROR, 17, 1)
+    FIELD(CFRM_ITR0, READ_FRAME_ADDR_ERROR, 16, 1)
+    FIELD(CFRM_ITR0, WRITE_REG_ADDR_ERROR, 15, 1)
+    FIELD(CFRM_ITR0, WRITE_BLK_TYPE_ERROR, 13, 1)
+    FIELD(CFRM_ITR0, WRITE_FRAME_ADDR_ERROR, 12, 1)
+    FIELD(CFRM_ITR0, MFW_OVERRUN_ERROR, 11, 1)
+    FIELD(CFRM_ITR0, FAR_FIFO_UNDERFLOW, 10, 1)
+    FIELD(CFRM_ITR0, FAR_FIFO_OVERFLOW, 9, 1)
+    FIELD(CFRM_ITR0, PER_FRAME_SEQ_ERROR, 8, 1)
+    FIELD(CFRM_ITR0, CRC_ERROR, 7, 1)
+    FIELD(CFRM_ITR0, WRITE_OVERRUN_ERROR, 6, 1)
+    FIELD(CFRM_ITR0, READ_OVERRUN_ERROR, 5, 1)
+    FIELD(CFRM_ITR0, CMD_INTERRUPT_ERROR, 4, 1)
+    FIELD(CFRM_ITR0, WRITE_INTERRUPT_ERROR, 3, 1)
+    FIELD(CFRM_ITR0, READ_INTERRUPT_ERROR, 2, 1)
+    FIELD(CFRM_ITR0, SEU_CRC_ERROR, 1, 1)
+    FIELD(CFRM_ITR0, SEU_ECC_ERROR, 0, 1)
+REG32(CFRM_ITR1, 0x194)
+REG32(CFRM_ITR2, 0x198)
+REG32(CFRM_ITR3, 0x19c)
+REG32(SEU_SYNDRM00, 0x1a0)
+REG32(SEU_SYNDRM01, 0x1a4)
+REG32(SEU_SYNDRM02, 0x1a8)
+REG32(SEU_SYNDRM03, 0x1ac)
+REG32(SEU_SYNDRM10, 0x1b0)
+REG32(SEU_SYNDRM11, 0x1b4)
+REG32(SEU_SYNDRM12, 0x1b8)
+REG32(SEU_SYNDRM13, 0x1bc)
+REG32(SEU_SYNDRM20, 0x1c0)
+REG32(SEU_SYNDRM21, 0x1c4)
+REG32(SEU_SYNDRM22, 0x1c8)
+REG32(SEU_SYNDRM23, 0x1cc)
+REG32(SEU_SYNDRM30, 0x1d0)
+REG32(SEU_SYNDRM31, 0x1d4)
+REG32(SEU_SYNDRM32, 0x1d8)
+REG32(SEU_SYNDRM33, 0x1dc)
+REG32(SEU_VIRTUAL_SYNDRM0, 0x1e0)
+REG32(SEU_VIRTUAL_SYNDRM1, 0x1e4)
+REG32(SEU_VIRTUAL_SYNDRM2, 0x1e8)
+REG32(SEU_VIRTUAL_SYNDRM3, 0x1ec)
+REG32(SEU_CRC0, 0x1f0)
+REG32(SEU_CRC1, 0x1f4)
+REG32(SEU_CRC2, 0x1f8)
+REG32(SEU_CRC3, 0x1fc)
+REG32(CFRAME_FAR_BOT0, 0x200)
+REG32(CFRAME_FAR_BOT1, 0x204)
+REG32(CFRAME_FAR_BOT2, 0x208)
+REG32(CFRAME_FAR_BOT3, 0x20c)
+REG32(CFRAME_FAR_TOP0, 0x210)
+REG32(CFRAME_FAR_TOP1, 0x214)
+REG32(CFRAME_FAR_TOP2, 0x218)
+REG32(CFRAME_FAR_TOP3, 0x21c)
+REG32(LAST_FRAME_BOT0, 0x220)
+    FIELD(LAST_FRAME_BOT0, BLOCKTYPE1_LAST_FRAME_LSB, 20, 12)
+    FIELD(LAST_FRAME_BOT0, BLOCKTYPE0_LAST_FRAME, 0, 20)
+REG32(LAST_FRAME_BOT1, 0x224)
+    FIELD(LAST_FRAME_BOT1, BLOCKTYPE3_LAST_FRAME_LSB, 28, 4)
+    FIELD(LAST_FRAME_BOT1, BLOCKTYPE2_LAST_FRAME, 8, 20)
+    FIELD(LAST_FRAME_BOT1, BLOCKTYPE1_LAST_FRAME_MSB, 0, 8)
+REG32(LAST_FRAME_BOT2, 0x228)
+    FIELD(LAST_FRAME_BOT2, BLOCKTYPE3_LAST_FRAME_MSB, 0, 16)
+REG32(LAST_FRAME_BOT3, 0x22c)
+REG32(LAST_FRAME_TOP0, 0x230)
+    FIELD(LAST_FRAME_TOP0, BLOCKTYPE5_LAST_FRAME_LSB, 20, 12)
+    FIELD(LAST_FRAME_TOP0, BLOCKTYPE4_LAST_FRAME, 0, 20)
+REG32(LAST_FRAME_TOP1, 0x234)
+    FIELD(LAST_FRAME_TOP1, BLOCKTYPE6_LAST_FRAME, 8, 20)
+    FIELD(LAST_FRAME_TOP1, BLOCKTYPE5_LAST_FRAME_MSB, 0, 8)
+REG32(LAST_FRAME_TOP2, 0x238)
+REG32(LAST_FRAME_TOP3, 0x23c)
+
+#define CFRAME_REG_R_MAX (R_LAST_FRAME_TOP3 + 1)
+
+#define FRAME_NUM_QWORDS 25
+#define FRAME_NUM_WORDS (FRAME_NUM_QWORDS * 4) /* 25 * 128 bits */
+
+typedef struct XlnxCFrame {
+    uint32_t data[FRAME_NUM_WORDS];
+} XlnxCFrame;
+
+struct XlnxVersalCFrameReg {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    MemoryRegion iomem_fdri;
+    qemu_irq irq_cfrm_imr;
+
+    /* 128-bit wfifo.  */
+    uint32_t wfifo[WFIFO_SZ];
+
+    uint32_t regs[CFRAME_REG_R_MAX];
+    RegisterInfo regs_info[CFRAME_REG_R_MAX];
+
+    bool rowon;
+    bool wcfg;
+    bool rcfg;
+
+    GTree *cframes;
+    Fifo32 new_f_data;
+
+    struct {
+        XlnxCfiIf *cfu_fdro;
+        uint32_t blktype_num_frames[7];
+    } cfg;
+    bool row_configured;
+};
+
+struct XlnxVersalCFrameBcastReg {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem_reg;
+    MemoryRegion iomem_fdri;
+
+    /* 128-bit wfifo. */
+    uint32_t wfifo[WFIFO_SZ];
+
+    struct {
+        XlnxCfiIf *cframe[15];
+    } cfg;
+};
+
+#endif
diff --git a/include/hw/misc/xlnx-versal-cfu.h b/include/hw/misc/xlnx-versal-cfu.h
new file mode 100644 (file)
index 0000000..be62bab
--- /dev/null
@@ -0,0 +1,258 @@
+/*
+ * QEMU model of the CFU Configuration Unit.
+ *
+ * Copyright (C) 2023, Advanced Micro Devices, Inc.
+ *
+ * Written by Francisco Iglesias <francisco.iglesias@amd.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * References:
+ * [1] Versal ACAP Technical Reference Manual,
+ *     https://www.xilinx.com/support/documentation/architecture-manuals/am011-versal-acap-trm.pdf
+ *
+ * [2] Versal ACAP Register Reference,
+ *     https://docs.xilinx.com/r/en-US/am012-versal-register-reference/CFU_CSR-Module
+ */
+#ifndef HW_MISC_XLNX_VERSAL_CFU_APB_H
+#define HW_MISC_XLNX_VERSAL_CFU_APB_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/misc/xlnx-cfi-if.h"
+#include "qemu/fifo32.h"
+
+#define TYPE_XLNX_VERSAL_CFU_APB "xlnx,versal-cfu-apb"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFUAPB, XLNX_VERSAL_CFU_APB)
+
+#define TYPE_XLNX_VERSAL_CFU_FDRO "xlnx,versal-cfu-fdro"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFUFDRO, XLNX_VERSAL_CFU_FDRO)
+
+#define TYPE_XLNX_VERSAL_CFU_SFR "xlnx,versal-cfu-sfr"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFUSFR, XLNX_VERSAL_CFU_SFR)
+
+REG32(CFU_ISR, 0x0)
+    FIELD(CFU_ISR, USR_GTS_EVENT, 9, 1)
+    FIELD(CFU_ISR, USR_GSR_EVENT, 8, 1)
+    FIELD(CFU_ISR, SLVERR, 7, 1)
+    FIELD(CFU_ISR, DECOMP_ERROR, 6, 1)
+    FIELD(CFU_ISR, BAD_CFI_PACKET, 5, 1)
+    FIELD(CFU_ISR, AXI_ALIGN_ERROR, 4, 1)
+    FIELD(CFU_ISR, CFI_ROW_ERROR, 3, 1)
+    FIELD(CFU_ISR, CRC32_ERROR, 2, 1)
+    FIELD(CFU_ISR, CRC8_ERROR, 1, 1)
+    FIELD(CFU_ISR, SEU_ENDOFCALIB, 0, 1)
+REG32(CFU_IMR, 0x4)
+    FIELD(CFU_IMR, USR_GTS_EVENT, 9, 1)
+    FIELD(CFU_IMR, USR_GSR_EVENT, 8, 1)
+    FIELD(CFU_IMR, SLVERR, 7, 1)
+    FIELD(CFU_IMR, DECOMP_ERROR, 6, 1)
+    FIELD(CFU_IMR, BAD_CFI_PACKET, 5, 1)
+    FIELD(CFU_IMR, AXI_ALIGN_ERROR, 4, 1)
+    FIELD(CFU_IMR, CFI_ROW_ERROR, 3, 1)
+    FIELD(CFU_IMR, CRC32_ERROR, 2, 1)
+    FIELD(CFU_IMR, CRC8_ERROR, 1, 1)
+    FIELD(CFU_IMR, SEU_ENDOFCALIB, 0, 1)
+REG32(CFU_IER, 0x8)
+    FIELD(CFU_IER, USR_GTS_EVENT, 9, 1)
+    FIELD(CFU_IER, USR_GSR_EVENT, 8, 1)
+    FIELD(CFU_IER, SLVERR, 7, 1)
+    FIELD(CFU_IER, DECOMP_ERROR, 6, 1)
+    FIELD(CFU_IER, BAD_CFI_PACKET, 5, 1)
+    FIELD(CFU_IER, AXI_ALIGN_ERROR, 4, 1)
+    FIELD(CFU_IER, CFI_ROW_ERROR, 3, 1)
+    FIELD(CFU_IER, CRC32_ERROR, 2, 1)
+    FIELD(CFU_IER, CRC8_ERROR, 1, 1)
+    FIELD(CFU_IER, SEU_ENDOFCALIB, 0, 1)
+REG32(CFU_IDR, 0xc)
+    FIELD(CFU_IDR, USR_GTS_EVENT, 9, 1)
+    FIELD(CFU_IDR, USR_GSR_EVENT, 8, 1)
+    FIELD(CFU_IDR, SLVERR, 7, 1)
+    FIELD(CFU_IDR, DECOMP_ERROR, 6, 1)
+    FIELD(CFU_IDR, BAD_CFI_PACKET, 5, 1)
+    FIELD(CFU_IDR, AXI_ALIGN_ERROR, 4, 1)
+    FIELD(CFU_IDR, CFI_ROW_ERROR, 3, 1)
+    FIELD(CFU_IDR, CRC32_ERROR, 2, 1)
+    FIELD(CFU_IDR, CRC8_ERROR, 1, 1)
+    FIELD(CFU_IDR, SEU_ENDOFCALIB, 0, 1)
+REG32(CFU_ITR, 0x10)
+    FIELD(CFU_ITR, USR_GTS_EVENT, 9, 1)
+    FIELD(CFU_ITR, USR_GSR_EVENT, 8, 1)
+    FIELD(CFU_ITR, SLVERR, 7, 1)
+    FIELD(CFU_ITR, DECOMP_ERROR, 6, 1)
+    FIELD(CFU_ITR, BAD_CFI_PACKET, 5, 1)
+    FIELD(CFU_ITR, AXI_ALIGN_ERROR, 4, 1)
+    FIELD(CFU_ITR, CFI_ROW_ERROR, 3, 1)
+    FIELD(CFU_ITR, CRC32_ERROR, 2, 1)
+    FIELD(CFU_ITR, CRC8_ERROR, 1, 1)
+    FIELD(CFU_ITR, SEU_ENDOFCALIB, 0, 1)
+REG32(CFU_PROTECT, 0x14)
+    FIELD(CFU_PROTECT, ACTIVE, 0, 1)
+REG32(CFU_FGCR, 0x18)
+    FIELD(CFU_FGCR, GCLK_CAL, 14, 1)
+    FIELD(CFU_FGCR, SC_HBC_TRIGGER, 13, 1)
+    FIELD(CFU_FGCR, GLOW, 12, 1)
+    FIELD(CFU_FGCR, GPWRDWN, 11, 1)
+    FIELD(CFU_FGCR, GCAP, 10, 1)
+    FIELD(CFU_FGCR, GSCWE, 9, 1)
+    FIELD(CFU_FGCR, GHIGH_B, 8, 1)
+    FIELD(CFU_FGCR, GMC_B, 7, 1)
+    FIELD(CFU_FGCR, GWE, 6, 1)
+    FIELD(CFU_FGCR, GRESTORE, 5, 1)
+    FIELD(CFU_FGCR, GTS_CFG_B, 4, 1)
+    FIELD(CFU_FGCR, GLUTMASK, 3, 1)
+    FIELD(CFU_FGCR, EN_GLOBS_B, 2, 1)
+    FIELD(CFU_FGCR, EOS, 1, 1)
+    FIELD(CFU_FGCR, INIT_COMPLETE, 0, 1)
+REG32(CFU_CTL, 0x1c)
+    FIELD(CFU_CTL, GSR_GSC, 15, 1)
+    FIELD(CFU_CTL, SLVERR_EN, 14, 1)
+    FIELD(CFU_CTL, CRC32_RESET, 13, 1)
+    FIELD(CFU_CTL, AXI_ERROR_EN, 12, 1)
+    FIELD(CFU_CTL, FLUSH_AXI, 11, 1)
+    FIELD(CFU_CTL, SSI_PER_SLR_PR, 10, 1)
+    FIELD(CFU_CTL, GCAP_CLK_EN, 9, 1)
+    FIELD(CFU_CTL, STATUS_SYNC_DISABLE, 8, 1)
+    FIELD(CFU_CTL, IGNORE_CFI_ERROR, 7, 1)
+    FIELD(CFU_CTL, CFRAME_DISABLE, 6, 1)
+    FIELD(CFU_CTL, QWORD_CNT_RESET, 5, 1)
+    FIELD(CFU_CTL, CRC8_DISABLE, 4, 1)
+    FIELD(CFU_CTL, CRC32_CHECK, 3, 1)
+    FIELD(CFU_CTL, DECOMPRESS, 2, 1)
+    FIELD(CFU_CTL, SEU_GO, 1, 1)
+    FIELD(CFU_CTL, CFI_LOCAL_RESET, 0, 1)
+REG32(CFU_CRAM_RW, 0x20)
+    FIELD(CFU_CRAM_RW, RFIFO_AFULL_DEPTH, 18, 9)
+    FIELD(CFU_CRAM_RW, RD_WAVE_CNT_LEFT, 12, 6)
+    FIELD(CFU_CRAM_RW, RD_WAVE_CNT, 6, 6)
+    FIELD(CFU_CRAM_RW, WR_WAVE_CNT, 0, 6)
+REG32(CFU_MASK, 0x28)
+REG32(CFU_CRC_EXPECT, 0x2c)
+REG32(CFU_CFRAME_LEFT_T0, 0x60)
+    FIELD(CFU_CFRAME_LEFT_T0, NUM, 0, 20)
+REG32(CFU_CFRAME_LEFT_T1, 0x64)
+    FIELD(CFU_CFRAME_LEFT_T1, NUM, 0, 20)
+REG32(CFU_CFRAME_LEFT_T2, 0x68)
+    FIELD(CFU_CFRAME_LEFT_T2, NUM, 0, 20)
+REG32(CFU_ROW_RANGE, 0x6c)
+    FIELD(CFU_ROW_RANGE, HALF_FSR, 5, 1)
+    FIELD(CFU_ROW_RANGE, NUM, 0, 5)
+REG32(CFU_STATUS, 0x100)
+    FIELD(CFU_STATUS, SEU_WRITE_ERROR, 30, 1)
+    FIELD(CFU_STATUS, FRCNT_ERROR, 29, 1)
+    FIELD(CFU_STATUS, RSVD_ERROR, 28, 1)
+    FIELD(CFU_STATUS, FDRO_ERROR, 27, 1)
+    FIELD(CFU_STATUS, FDRI_ERROR, 26, 1)
+    FIELD(CFU_STATUS, FDRI_READ_ERROR, 25, 1)
+    FIELD(CFU_STATUS, READ_FDRI_ERROR, 24, 1)
+    FIELD(CFU_STATUS, READ_SFR_ERROR, 23, 1)
+    FIELD(CFU_STATUS, READ_STREAM_ERROR, 22, 1)
+    FIELD(CFU_STATUS, UNKNOWN_STREAM_PKT, 21, 1)
+    FIELD(CFU_STATUS, USR_GTS, 20, 1)
+    FIELD(CFU_STATUS, USR_GSR, 19, 1)
+    FIELD(CFU_STATUS, AXI_BAD_WSTRB, 18, 1)
+    FIELD(CFU_STATUS, AXI_BAD_AR_SIZE, 17, 1)
+    FIELD(CFU_STATUS, AXI_BAD_AW_SIZE, 16, 1)
+    FIELD(CFU_STATUS, AXI_BAD_ARADDR, 15, 1)
+    FIELD(CFU_STATUS, AXI_BAD_AWADDR, 14, 1)
+    FIELD(CFU_STATUS, SCAN_CLEAR_PASS, 13, 1)
+    FIELD(CFU_STATUS, HC_SEC_ERROR, 12, 1)
+    FIELD(CFU_STATUS, GHIGH_B_ISHIGH, 11, 1)
+    FIELD(CFU_STATUS, GHIGH_B_ISLOW, 10, 1)
+    FIELD(CFU_STATUS, GMC_B_ISHIGH, 9, 1)
+    FIELD(CFU_STATUS, GMC_B_ISLOW, 8, 1)
+    FIELD(CFU_STATUS, GPWRDWN_B_ISHIGH, 7, 1)
+    FIELD(CFU_STATUS, CFI_SEU_CRC_ERROR, 6, 1)
+    FIELD(CFU_STATUS, CFI_SEU_ECC_ERROR, 5, 1)
+    FIELD(CFU_STATUS, CFI_SEU_HEARTBEAT, 4, 1)
+    FIELD(CFU_STATUS, SCAN_CLEAR_DONE, 3, 1)
+    FIELD(CFU_STATUS, HC_COMPLETE, 2, 1)
+    FIELD(CFU_STATUS, CFI_CFRAME_BUSY, 1, 1)
+    FIELD(CFU_STATUS, CFU_STREAM_BUSY, 0, 1)
+REG32(CFU_INTERNAL_STATUS, 0x104)
+    FIELD(CFU_INTERNAL_STATUS, SSI_EOS, 22, 1)
+    FIELD(CFU_INTERNAL_STATUS, SSI_GWE, 21, 1)
+    FIELD(CFU_INTERNAL_STATUS, RFIFO_EMPTY, 20, 1)
+    FIELD(CFU_INTERNAL_STATUS, RFIFO_FULL, 19, 1)
+    FIELD(CFU_INTERNAL_STATUS, SEL_SFR, 18, 1)
+    FIELD(CFU_INTERNAL_STATUS, STREAM_CFRAME, 17, 1)
+    FIELD(CFU_INTERNAL_STATUS, FDRI_PHASE, 16, 1)
+    FIELD(CFU_INTERNAL_STATUS, CFI_PIPE_EN, 15, 1)
+    FIELD(CFU_INTERNAL_STATUS, AWFIFO_DCNT, 10, 5)
+    FIELD(CFU_INTERNAL_STATUS, WFIFO_DCNT, 5, 5)
+    FIELD(CFU_INTERNAL_STATUS, REPAIR_BUSY, 4, 1)
+    FIELD(CFU_INTERNAL_STATUS, TRIMU_BUSY, 3, 1)
+    FIELD(CFU_INTERNAL_STATUS, TRIMB_BUSY, 2, 1)
+    FIELD(CFU_INTERNAL_STATUS, HCLEANR_BUSY, 1, 1)
+    FIELD(CFU_INTERNAL_STATUS, HCLEAN_BUSY, 0, 1)
+REG32(CFU_QWORD_CNT, 0x108)
+REG32(CFU_CRC_LIVE, 0x10c)
+REG32(CFU_PENDING_READ_CNT, 0x110)
+    FIELD(CFU_PENDING_READ_CNT, NUM, 0, 25)
+REG32(CFU_FDRI_CNT, 0x114)
+REG32(CFU_ECO1, 0x118)
+REG32(CFU_ECO2, 0x11c)
+
+#define R_MAX (R_CFU_ECO2 + 1)
+
+#define NUM_STREAM 2
+#define WFIFO_SZ 4
+
+struct XlnxVersalCFUAPB {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    MemoryRegion iomem_stream[NUM_STREAM];
+    qemu_irq irq_cfu_imr;
+
+    /* 128-bit wfifo.  */
+    uint32_t wfifo[WFIFO_SZ];
+
+    uint32_t regs[R_MAX];
+    RegisterInfo regs_info[R_MAX];
+
+    uint8_t fdri_row_addr;
+
+    struct {
+        XlnxCfiIf *cframe[15];
+    } cfg;
+};
+
+
+struct XlnxVersalCFUFDRO {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem_fdro;
+
+    Fifo32 fdro_data;
+};
+
+struct XlnxVersalCFUSFR {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem_sfr;
+
+    /* 128-bit wfifo. */
+    uint32_t wfifo[WFIFO_SZ];
+
+    struct {
+        XlnxVersalCFUAPB *cfu;
+    } cfg;
+};
+
+/**
+ * This is a helper function for updating a CFI data write fifo, an array of 4
+ * uint32_t and 128 bits of data that are allowed to be written through 4
+ * sequential 32 bit accesses. After the last index has been written into the
+ * write fifo (wfifo), the data is copied to and returned in a secondary fifo
+ * provided to the function (wfifo_ret), and the write fifo is cleared
+ * (zeroized).
+ *
+ * @addr: the address used when calculating the wfifo array index to update
+ * @value: the value to write into the wfifo array
+ * @wfifo: the wfifo to update
+ * @wfifo_out: will return the wfifo data when all 128 bits have been written
+ *
+ * @return: true if all 128 bits have been updated.
+ */
+bool update_wfifo(hwaddr addr, uint64_t value,
+                  uint32_t *wfifo, uint32_t *wfifo_ret);
+
+#endif
diff --git a/include/hw/misc/xlnx-versal-crl.h b/include/hw/misc/xlnx-versal-crl.h
new file mode 100644 (file)
index 0000000..2857f41
--- /dev/null
@@ -0,0 +1,235 @@
+/*
+ * QEMU model of the Clock-Reset-LPD (CRL).
+ *
+ * Copyright (c) 2022 Xilinx Inc.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Written by Edgar E. Iglesias <edgar.iglesias@xilinx.com>
+ */
+#ifndef HW_MISC_XLNX_VERSAL_CRL_H
+#define HW_MISC_XLNX_VERSAL_CRL_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "target/arm/cpu.h"
+
+#define TYPE_XLNX_VERSAL_CRL "xlnx,versal-crl"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCRL, XLNX_VERSAL_CRL)
+
+REG32(ERR_CTRL, 0x0)
+    FIELD(ERR_CTRL, SLVERR_ENABLE, 0, 1)
+REG32(IR_STATUS, 0x4)
+    FIELD(IR_STATUS, ADDR_DECODE_ERR, 0, 1)
+REG32(IR_MASK, 0x8)
+    FIELD(IR_MASK, ADDR_DECODE_ERR, 0, 1)
+REG32(IR_ENABLE, 0xc)
+    FIELD(IR_ENABLE, ADDR_DECODE_ERR, 0, 1)
+REG32(IR_DISABLE, 0x10)
+    FIELD(IR_DISABLE, ADDR_DECODE_ERR, 0, 1)
+REG32(WPROT, 0x1c)
+    FIELD(WPROT, ACTIVE, 0, 1)
+REG32(PLL_CLK_OTHER_DMN, 0x20)
+    FIELD(PLL_CLK_OTHER_DMN, APLL_BYPASS, 0, 1)
+REG32(RPLL_CTRL, 0x40)
+    FIELD(RPLL_CTRL, POST_SRC, 24, 3)
+    FIELD(RPLL_CTRL, PRE_SRC, 20, 3)
+    FIELD(RPLL_CTRL, CLKOUTDIV, 16, 2)
+    FIELD(RPLL_CTRL, FBDIV, 8, 8)
+    FIELD(RPLL_CTRL, BYPASS, 3, 1)
+    FIELD(RPLL_CTRL, RESET, 0, 1)
+REG32(RPLL_CFG, 0x44)
+    FIELD(RPLL_CFG, LOCK_DLY, 25, 7)
+    FIELD(RPLL_CFG, LOCK_CNT, 13, 10)
+    FIELD(RPLL_CFG, LFHF, 10, 2)
+    FIELD(RPLL_CFG, CP, 5, 4)
+    FIELD(RPLL_CFG, RES, 0, 4)
+REG32(RPLL_FRAC_CFG, 0x48)
+    FIELD(RPLL_FRAC_CFG, ENABLED, 31, 1)
+    FIELD(RPLL_FRAC_CFG, SEED, 22, 3)
+    FIELD(RPLL_FRAC_CFG, ALGRTHM, 19, 1)
+    FIELD(RPLL_FRAC_CFG, ORDER, 18, 1)
+    FIELD(RPLL_FRAC_CFG, DATA, 0, 16)
+REG32(PLL_STATUS, 0x50)
+    FIELD(PLL_STATUS, RPLL_STABLE, 2, 1)
+    FIELD(PLL_STATUS, RPLL_LOCK, 0, 1)
+REG32(RPLL_TO_XPD_CTRL, 0x100)
+    FIELD(RPLL_TO_XPD_CTRL, CLKACT, 25, 1)
+    FIELD(RPLL_TO_XPD_CTRL, DIVISOR0, 8, 10)
+REG32(LPD_TOP_SWITCH_CTRL, 0x104)
+    FIELD(LPD_TOP_SWITCH_CTRL, CLKACT_ADMA, 26, 1)
+    FIELD(LPD_TOP_SWITCH_CTRL, CLKACT, 25, 1)
+    FIELD(LPD_TOP_SWITCH_CTRL, DIVISOR0, 8, 10)
+    FIELD(LPD_TOP_SWITCH_CTRL, SRCSEL, 0, 3)
+REG32(LPD_LSBUS_CTRL, 0x108)
+    FIELD(LPD_LSBUS_CTRL, CLKACT, 25, 1)
+    FIELD(LPD_LSBUS_CTRL, DIVISOR0, 8, 10)
+    FIELD(LPD_LSBUS_CTRL, SRCSEL, 0, 3)
+REG32(CPU_R5_CTRL, 0x10c)
+    FIELD(CPU_R5_CTRL, CLKACT_OCM2, 28, 1)
+    FIELD(CPU_R5_CTRL, CLKACT_OCM, 27, 1)
+    FIELD(CPU_R5_CTRL, CLKACT_CORE, 26, 1)
+    FIELD(CPU_R5_CTRL, CLKACT, 25, 1)
+    FIELD(CPU_R5_CTRL, DIVISOR0, 8, 10)
+    FIELD(CPU_R5_CTRL, SRCSEL, 0, 3)
+REG32(IOU_SWITCH_CTRL, 0x114)
+    FIELD(IOU_SWITCH_CTRL, CLKACT, 25, 1)
+    FIELD(IOU_SWITCH_CTRL, DIVISOR0, 8, 10)
+    FIELD(IOU_SWITCH_CTRL, SRCSEL, 0, 3)
+REG32(GEM0_REF_CTRL, 0x118)
+    FIELD(GEM0_REF_CTRL, CLKACT_RX, 27, 1)
+    FIELD(GEM0_REF_CTRL, CLKACT_TX, 26, 1)
+    FIELD(GEM0_REF_CTRL, CLKACT, 25, 1)
+    FIELD(GEM0_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(GEM0_REF_CTRL, SRCSEL, 0, 3)
+REG32(GEM1_REF_CTRL, 0x11c)
+    FIELD(GEM1_REF_CTRL, CLKACT_RX, 27, 1)
+    FIELD(GEM1_REF_CTRL, CLKACT_TX, 26, 1)
+    FIELD(GEM1_REF_CTRL, CLKACT, 25, 1)
+    FIELD(GEM1_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(GEM1_REF_CTRL, SRCSEL, 0, 3)
+REG32(GEM_TSU_REF_CTRL, 0x120)
+    FIELD(GEM_TSU_REF_CTRL, CLKACT, 25, 1)
+    FIELD(GEM_TSU_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(GEM_TSU_REF_CTRL, SRCSEL, 0, 3)
+REG32(USB0_BUS_REF_CTRL, 0x124)
+    FIELD(USB0_BUS_REF_CTRL, CLKACT, 25, 1)
+    FIELD(USB0_BUS_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(USB0_BUS_REF_CTRL, SRCSEL, 0, 3)
+REG32(UART0_REF_CTRL, 0x128)
+    FIELD(UART0_REF_CTRL, CLKACT, 25, 1)
+    FIELD(UART0_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(UART0_REF_CTRL, SRCSEL, 0, 3)
+REG32(UART1_REF_CTRL, 0x12c)
+    FIELD(UART1_REF_CTRL, CLKACT, 25, 1)
+    FIELD(UART1_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(UART1_REF_CTRL, SRCSEL, 0, 3)
+REG32(SPI0_REF_CTRL, 0x130)
+    FIELD(SPI0_REF_CTRL, CLKACT, 25, 1)
+    FIELD(SPI0_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(SPI0_REF_CTRL, SRCSEL, 0, 3)
+REG32(SPI1_REF_CTRL, 0x134)
+    FIELD(SPI1_REF_CTRL, CLKACT, 25, 1)
+    FIELD(SPI1_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(SPI1_REF_CTRL, SRCSEL, 0, 3)
+REG32(CAN0_REF_CTRL, 0x138)
+    FIELD(CAN0_REF_CTRL, CLKACT, 25, 1)
+    FIELD(CAN0_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(CAN0_REF_CTRL, SRCSEL, 0, 3)
+REG32(CAN1_REF_CTRL, 0x13c)
+    FIELD(CAN1_REF_CTRL, CLKACT, 25, 1)
+    FIELD(CAN1_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(CAN1_REF_CTRL, SRCSEL, 0, 3)
+REG32(I2C0_REF_CTRL, 0x140)
+    FIELD(I2C0_REF_CTRL, CLKACT, 25, 1)
+    FIELD(I2C0_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(I2C0_REF_CTRL, SRCSEL, 0, 3)
+REG32(I2C1_REF_CTRL, 0x144)
+    FIELD(I2C1_REF_CTRL, CLKACT, 25, 1)
+    FIELD(I2C1_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(I2C1_REF_CTRL, SRCSEL, 0, 3)
+REG32(DBG_LPD_CTRL, 0x148)
+    FIELD(DBG_LPD_CTRL, CLKACT, 25, 1)
+    FIELD(DBG_LPD_CTRL, DIVISOR0, 8, 10)
+    FIELD(DBG_LPD_CTRL, SRCSEL, 0, 3)
+REG32(TIMESTAMP_REF_CTRL, 0x14c)
+    FIELD(TIMESTAMP_REF_CTRL, CLKACT, 25, 1)
+    FIELD(TIMESTAMP_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(TIMESTAMP_REF_CTRL, SRCSEL, 0, 3)
+REG32(CRL_SAFETY_CHK, 0x150)
+REG32(PSM_REF_CTRL, 0x154)
+    FIELD(PSM_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(PSM_REF_CTRL, SRCSEL, 0, 3)
+REG32(DBG_TSTMP_CTRL, 0x158)
+    FIELD(DBG_TSTMP_CTRL, CLKACT, 25, 1)
+    FIELD(DBG_TSTMP_CTRL, DIVISOR0, 8, 10)
+    FIELD(DBG_TSTMP_CTRL, SRCSEL, 0, 3)
+REG32(CPM_TOPSW_REF_CTRL, 0x15c)
+    FIELD(CPM_TOPSW_REF_CTRL, CLKACT, 25, 1)
+    FIELD(CPM_TOPSW_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(CPM_TOPSW_REF_CTRL, SRCSEL, 0, 3)
+REG32(USB3_DUAL_REF_CTRL, 0x160)
+    FIELD(USB3_DUAL_REF_CTRL, CLKACT, 25, 1)
+    FIELD(USB3_DUAL_REF_CTRL, DIVISOR0, 8, 10)
+    FIELD(USB3_DUAL_REF_CTRL, SRCSEL, 0, 3)
+REG32(RST_CPU_R5, 0x300)
+    FIELD(RST_CPU_R5, RESET_PGE, 4, 1)
+    FIELD(RST_CPU_R5, RESET_AMBA, 2, 1)
+    FIELD(RST_CPU_R5, RESET_CPU1, 1, 1)
+    FIELD(RST_CPU_R5, RESET_CPU0, 0, 1)
+REG32(RST_ADMA, 0x304)
+    FIELD(RST_ADMA, RESET, 0, 1)
+REG32(RST_GEM0, 0x308)
+    FIELD(RST_GEM0, RESET, 0, 1)
+REG32(RST_GEM1, 0x30c)
+    FIELD(RST_GEM1, RESET, 0, 1)
+REG32(RST_SPARE, 0x310)
+    FIELD(RST_SPARE, RESET, 0, 1)
+REG32(RST_USB0, 0x314)
+    FIELD(RST_USB0, RESET, 0, 1)
+REG32(RST_UART0, 0x318)
+    FIELD(RST_UART0, RESET, 0, 1)
+REG32(RST_UART1, 0x31c)
+    FIELD(RST_UART1, RESET, 0, 1)
+REG32(RST_SPI0, 0x320)
+    FIELD(RST_SPI0, RESET, 0, 1)
+REG32(RST_SPI1, 0x324)
+    FIELD(RST_SPI1, RESET, 0, 1)
+REG32(RST_CAN0, 0x328)
+    FIELD(RST_CAN0, RESET, 0, 1)
+REG32(RST_CAN1, 0x32c)
+    FIELD(RST_CAN1, RESET, 0, 1)
+REG32(RST_I2C0, 0x330)
+    FIELD(RST_I2C0, RESET, 0, 1)
+REG32(RST_I2C1, 0x334)
+    FIELD(RST_I2C1, RESET, 0, 1)
+REG32(RST_DBG_LPD, 0x338)
+    FIELD(RST_DBG_LPD, RPU_DBG1_RESET, 5, 1)
+    FIELD(RST_DBG_LPD, RPU_DBG0_RESET, 4, 1)
+    FIELD(RST_DBG_LPD, RESET_HSDP, 1, 1)
+    FIELD(RST_DBG_LPD, RESET, 0, 1)
+REG32(RST_GPIO, 0x33c)
+    FIELD(RST_GPIO, RESET, 0, 1)
+REG32(RST_TTC, 0x344)
+    FIELD(RST_TTC, TTC3_RESET, 3, 1)
+    FIELD(RST_TTC, TTC2_RESET, 2, 1)
+    FIELD(RST_TTC, TTC1_RESET, 1, 1)
+    FIELD(RST_TTC, TTC0_RESET, 0, 1)
+REG32(RST_TIMESTAMP, 0x348)
+    FIELD(RST_TIMESTAMP, RESET, 0, 1)
+REG32(RST_SWDT, 0x34c)
+    FIELD(RST_SWDT, RESET, 0, 1)
+REG32(RST_OCM, 0x350)
+    FIELD(RST_OCM, RESET, 0, 1)
+REG32(RST_IPI, 0x354)
+    FIELD(RST_IPI, RESET, 0, 1)
+REG32(RST_SYSMON, 0x358)
+    FIELD(RST_SYSMON, SEQ_RST, 1, 1)
+    FIELD(RST_SYSMON, CFG_RST, 0, 1)
+REG32(RST_FPD, 0x360)
+    FIELD(RST_FPD, SRST, 1, 1)
+    FIELD(RST_FPD, POR, 0, 1)
+REG32(PSM_RST_MODE, 0x370)
+    FIELD(PSM_RST_MODE, WAKEUP, 2, 1)
+    FIELD(PSM_RST_MODE, RST_MODE, 0, 2)
+
+#define CRL_R_MAX (R_PSM_RST_MODE + 1)
+
+#define RPU_MAX_CPU 2
+
+struct XlnxVersalCRL {
+    SysBusDevice parent_obj;
+    qemu_irq irq;
+
+    struct {
+        ARMCPU *cpu_r5[RPU_MAX_CPU];
+        DeviceState *adma[8];
+        DeviceState *uart[2];
+        DeviceState *gem[2];
+        DeviceState *usb;
+    } cfg;
+
+    RegisterInfoArray *reg_array;
+    uint32_t regs[CRL_R_MAX];
+    RegisterInfo regs_info[CRL_R_MAX];
+};
+#endif
diff --git a/include/hw/misc/xlnx-versal-pmc-iou-slcr.h b/include/hw/misc/xlnx-versal-pmc-iou-slcr.h
new file mode 100644 (file)
index 0000000..0c4a4fd
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Header file for the Xilinx Versal's PMC IOU SLCR
+ *
+ * Copyright (C) 2021 Xilinx Inc
+ * Written by Edgar E. Iglesias <edgar.iglesias@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * This is a model of Xilinx Versal's PMC I/O Peripheral Control and Status
+ * module documented in Versal's Technical Reference manual [1] and the Versal
+ * ACAP Register reference [2].
+ *
+ * References:
+ *
+ * [1] Versal ACAP Technical Reference Manual,
+ *     https://www.xilinx.com/support/documentation/architecture-manuals/am011-versal-acap-trm.pdf
+ *
+ * [2] Versal ACAP Register Reference,
+ *     https://docs.xilinx.com/r/en-US/am012-versal-register-reference/PMC_IOP_SLCR-Module
+ *
+ * QEMU interface:
+ * + sysbus MMIO region 0: MemoryRegion for the device's registers
+ * + sysbus IRQ 0: PMC (AXI and APB) parity error interrupt detected by the PMC
+ *   I/O peripherals.
+ * + sysbus IRQ 1: Device interrupt.
+ * + Named GPIO output "sd-emmc-sel[0]": Enables 0: SD mode or 1: eMMC mode on
+ *   SD/eMMC controller 0.
+ * + Named GPIO output "sd-emmc-sel[1]": Enables 0: SD mode or 1: eMMC mode on
+ *   SD/eMMC controller 1.
+ * + Named GPIO output "qspi-ospi-mux-sel": Selects 0: QSPI linear region or 1:
+ *   OSPI linear region.
+ * + Named GPIO output "ospi-mux-sel": Selects 0: OSPI Indirect access mode or
+ *   1: OSPI direct access mode.
+ */
+
+#ifndef XLNX_VERSAL_PMC_IOU_SLCR_H
+#define XLNX_VERSAL_PMC_IOU_SLCR_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+
+#define TYPE_XILINX_VERSAL_PMC_IOU_SLCR "xlnx.versal-pmc-iou-slcr"
+
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalPmcIouSlcr, XILINX_VERSAL_PMC_IOU_SLCR)
+
+#define XILINX_VERSAL_PMC_IOU_SLCR_R_MAX (0x828 / 4 + 1)
+
+struct XlnxVersalPmcIouSlcr {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    qemu_irq irq_parity_imr;
+    qemu_irq irq_imr;
+    qemu_irq sd_emmc_sel[2];
+    qemu_irq qspi_ospi_mux_sel;
+    qemu_irq ospi_mux_sel;
+
+    uint32_t regs[XILINX_VERSAL_PMC_IOU_SLCR_R_MAX];
+    RegisterInfo regs_info[XILINX_VERSAL_PMC_IOU_SLCR_R_MAX];
+};
+
+#endif /* XLNX_VERSAL_PMC_IOU_SLCR_H */
diff --git a/include/hw/misc/xlnx-versal-trng.h b/include/hw/misc/xlnx-versal-trng.h
new file mode 100644 (file)
index 0000000..0bcef8a
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Non-crypto strength model of the True Random Number Generator
+ * in the AMD/Xilinx Versal device family.
+ *
+ * Copyright (c) 2017-2020 Xilinx Inc.
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef XLNX_VERSAL_TRNG_H
+#define XLNX_VERSAL_TRNG_H
+
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+
+#define TYPE_XLNX_VERSAL_TRNG "xlnx.versal-trng"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalTRng, XLNX_VERSAL_TRNG);
+
+#define RMAX_XLNX_VERSAL_TRNG ((0xf0 / 4) + 1)
+
+typedef struct XlnxVersalTRng {
+    SysBusDevice parent_obj;
+    qemu_irq irq;
+    GRand *prng;
+
+    uint32_t hw_version;
+    uint32_t forced_faults;
+
+    uint32_t rand_count;
+    uint64_t rand_reseed;
+
+    uint64_t forced_prng_seed;
+    uint64_t forced_prng_count;
+    uint64_t tst_seed[2];
+
+    uint32_t regs[RMAX_XLNX_VERSAL_TRNG];
+    RegisterInfo regs_info[RMAX_XLNX_VERSAL_TRNG];
+} XlnxVersalTRng;
+
+#undef RMAX_XLNX_VERSAL_TRNG
+#endif
diff --git a/include/hw/misc/xlnx-versal-xramc.h b/include/hw/misc/xlnx-versal-xramc.h
new file mode 100644 (file)
index 0000000..d3d1862
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * QEMU model of the Xilinx XRAM Controller.
+ *
+ * Copyright (c) 2021 Xilinx Inc.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Written by Edgar E. Iglesias <edgar.iglesias@xilinx.com>
+ */
+
+#ifndef XLNX_VERSAL_XRAMC_H
+#define XLNX_VERSAL_XRAMC_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+
+#define TYPE_XLNX_XRAM_CTRL "xlnx.versal-xramc"
+
+#define XLNX_XRAM_CTRL(obj) \
+     OBJECT_CHECK(XlnxXramCtrl, (obj), TYPE_XLNX_XRAM_CTRL)
+
+REG32(XRAM_ERR_CTRL, 0x0)
+    FIELD(XRAM_ERR_CTRL, UE_RES, 3, 1)
+    FIELD(XRAM_ERR_CTRL, PWR_ERR_RES, 2, 1)
+    FIELD(XRAM_ERR_CTRL, PZ_ERR_RES, 1, 1)
+    FIELD(XRAM_ERR_CTRL, APB_ERR_RES, 0, 1)
+REG32(XRAM_ISR, 0x4)
+    FIELD(XRAM_ISR, INV_APB, 0, 1)
+REG32(XRAM_IMR, 0x8)
+    FIELD(XRAM_IMR, INV_APB, 0, 1)
+REG32(XRAM_IEN, 0xc)
+    FIELD(XRAM_IEN, INV_APB, 0, 1)
+REG32(XRAM_IDS, 0x10)
+    FIELD(XRAM_IDS, INV_APB, 0, 1)
+REG32(XRAM_ECC_CNTL, 0x14)
+    FIELD(XRAM_ECC_CNTL, FI_MODE, 2, 1)
+    FIELD(XRAM_ECC_CNTL, DET_ONLY, 1, 1)
+    FIELD(XRAM_ECC_CNTL, ECC_ON_OFF, 0, 1)
+REG32(XRAM_CLR_EXE, 0x18)
+    FIELD(XRAM_CLR_EXE, MON_7, 7, 1)
+    FIELD(XRAM_CLR_EXE, MON_6, 6, 1)
+    FIELD(XRAM_CLR_EXE, MON_5, 5, 1)
+    FIELD(XRAM_CLR_EXE, MON_4, 4, 1)
+    FIELD(XRAM_CLR_EXE, MON_3, 3, 1)
+    FIELD(XRAM_CLR_EXE, MON_2, 2, 1)
+    FIELD(XRAM_CLR_EXE, MON_1, 1, 1)
+    FIELD(XRAM_CLR_EXE, MON_0, 0, 1)
+REG32(XRAM_CE_FFA, 0x1c)
+    FIELD(XRAM_CE_FFA, ADDR, 0, 20)
+REG32(XRAM_CE_FFD0, 0x20)
+REG32(XRAM_CE_FFD1, 0x24)
+REG32(XRAM_CE_FFD2, 0x28)
+REG32(XRAM_CE_FFD3, 0x2c)
+REG32(XRAM_CE_FFE, 0x30)
+    FIELD(XRAM_CE_FFE, SYNDROME, 0, 16)
+REG32(XRAM_UE_FFA, 0x34)
+    FIELD(XRAM_UE_FFA, ADDR, 0, 20)
+REG32(XRAM_UE_FFD0, 0x38)
+REG32(XRAM_UE_FFD1, 0x3c)
+REG32(XRAM_UE_FFD2, 0x40)
+REG32(XRAM_UE_FFD3, 0x44)
+REG32(XRAM_UE_FFE, 0x48)
+    FIELD(XRAM_UE_FFE, SYNDROME, 0, 16)
+REG32(XRAM_FI_D0, 0x4c)
+REG32(XRAM_FI_D1, 0x50)
+REG32(XRAM_FI_D2, 0x54)
+REG32(XRAM_FI_D3, 0x58)
+REG32(XRAM_FI_SY, 0x5c)
+    FIELD(XRAM_FI_SY, DATA, 0, 16)
+REG32(XRAM_RMW_UE_FFA, 0x70)
+    FIELD(XRAM_RMW_UE_FFA, ADDR, 0, 20)
+REG32(XRAM_FI_CNTR, 0x74)
+    FIELD(XRAM_FI_CNTR, COUNT, 0, 24)
+REG32(XRAM_IMP, 0x80)
+    FIELD(XRAM_IMP, SIZE, 0, 4)
+REG32(XRAM_PRDY_DBG, 0x84)
+    FIELD(XRAM_PRDY_DBG, ISLAND3, 12, 4)
+    FIELD(XRAM_PRDY_DBG, ISLAND2, 8, 4)
+    FIELD(XRAM_PRDY_DBG, ISLAND1, 4, 4)
+    FIELD(XRAM_PRDY_DBG, ISLAND0, 0, 4)
+REG32(XRAM_SAFETY_CHK, 0xff8)
+
+#define XRAM_CTRL_R_MAX (R_XRAM_SAFETY_CHK + 1)
+
+typedef struct XlnxXramCtrl {
+    SysBusDevice parent_obj;
+    MemoryRegion ram;
+    qemu_irq irq;
+
+    struct {
+        uint64_t size;
+        unsigned int encoded_size;
+    } cfg;
+
+    RegisterInfoArray *reg_array;
+    uint32_t regs[XRAM_CTRL_R_MAX];
+    RegisterInfo regs_info[XRAM_CTRL_R_MAX];
+} XlnxXramCtrl;
+#endif
diff --git a/include/hw/misc/xlnx-zynqmp-apu-ctrl.h b/include/hw/misc/xlnx-zynqmp-apu-ctrl.h
new file mode 100644 (file)
index 0000000..c3bf3c1
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * QEMU model of ZynqMP APU Control.
+ *
+ * Copyright (c) 2013-2022 Xilinx Inc
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Written by Peter Crosthwaite <peter.crosthwaite@xilinx.com> and
+ * Edgar E. Iglesias <edgar.iglesias@xilinx.com>
+ *
+ */
+#ifndef HW_MISC_XLNX_ZYNQMP_APU_CTRL_H
+#define HW_MISC_XLNX_ZYNQMP_APU_CTRL_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "target/arm/cpu-qom.h"
+
+#define TYPE_XLNX_ZYNQMP_APU_CTRL "xlnx.apu-ctrl"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPAPUCtrl, XLNX_ZYNQMP_APU_CTRL)
+
+REG32(APU_ERR_CTRL, 0x0)
+    FIELD(APU_ERR_CTRL, PSLVERR, 0, 1)
+REG32(ISR, 0x10)
+    FIELD(ISR, INV_APB, 0, 1)
+REG32(IMR, 0x14)
+    FIELD(IMR, INV_APB, 0, 1)
+REG32(IEN, 0x18)
+    FIELD(IEN, INV_APB, 0, 1)
+REG32(IDS, 0x1c)
+    FIELD(IDS, INV_APB, 0, 1)
+REG32(CONFIG_0, 0x20)
+    FIELD(CONFIG_0, CFGTE, 24, 4)
+    FIELD(CONFIG_0, CFGEND, 16, 4)
+    FIELD(CONFIG_0, VINITHI, 8, 4)
+    FIELD(CONFIG_0, AA64NAA32, 0, 4)
+REG32(CONFIG_1, 0x24)
+    FIELD(CONFIG_1, L2RSTDISABLE, 29, 1)
+    FIELD(CONFIG_1, L1RSTDISABLE, 28, 1)
+    FIELD(CONFIG_1, CP15DISABLE, 0, 4)
+REG32(RVBARADDR0L, 0x40)
+    FIELD(RVBARADDR0L, ADDR, 2, 30)
+REG32(RVBARADDR0H, 0x44)
+    FIELD(RVBARADDR0H, ADDR, 0, 8)
+REG32(RVBARADDR1L, 0x48)
+    FIELD(RVBARADDR1L, ADDR, 2, 30)
+REG32(RVBARADDR1H, 0x4c)
+    FIELD(RVBARADDR1H, ADDR, 0, 8)
+REG32(RVBARADDR2L, 0x50)
+    FIELD(RVBARADDR2L, ADDR, 2, 30)
+REG32(RVBARADDR2H, 0x54)
+    FIELD(RVBARADDR2H, ADDR, 0, 8)
+REG32(RVBARADDR3L, 0x58)
+    FIELD(RVBARADDR3L, ADDR, 2, 30)
+REG32(RVBARADDR3H, 0x5c)
+    FIELD(RVBARADDR3H, ADDR, 0, 8)
+REG32(ACE_CTRL, 0x60)
+    FIELD(ACE_CTRL, AWQOS, 16, 4)
+    FIELD(ACE_CTRL, ARQOS, 0, 4)
+REG32(SNOOP_CTRL, 0x80)
+    FIELD(SNOOP_CTRL, ACE_INACT, 4, 1)
+    FIELD(SNOOP_CTRL, ACP_INACT, 0, 1)
+REG32(PWRCTL, 0x90)
+    FIELD(PWRCTL, CLREXMONREQ, 17, 1)
+    FIELD(PWRCTL, L2FLUSHREQ, 16, 1)
+    FIELD(PWRCTL, CPUPWRDWNREQ, 0, 4)
+REG32(PWRSTAT, 0x94)
+    FIELD(PWRSTAT, CLREXMONACK, 17, 1)
+    FIELD(PWRSTAT, L2FLUSHDONE, 16, 1)
+    FIELD(PWRSTAT, DBGNOPWRDWN, 0, 4)
+
+#define APU_R_MAX ((R_PWRSTAT) + 1)
+
+#define APU_MAX_CPU    4
+
+struct XlnxZynqMPAPUCtrl {
+    SysBusDevice busdev;
+
+    ARMCPU *cpus[APU_MAX_CPU];
+    /* WFIs towards PMU. */
+    qemu_irq wfi_out[4];
+    /* CPU Power status towards INTC Redirect. */
+    qemu_irq cpu_power_status[4];
+    qemu_irq irq_imr;
+
+    uint8_t cpu_pwrdwn_req;
+    uint8_t cpu_in_wfi;
+
+    RegisterInfoArray *reg_array;
+    uint32_t regs[APU_R_MAX];
+    RegisterInfo regs_info[APU_R_MAX];
+};
+
+#endif
diff --git a/include/hw/misc/xlnx-zynqmp-crf.h b/include/hw/misc/xlnx-zynqmp-crf.h
new file mode 100644 (file)
index 0000000..02ef0bd
--- /dev/null
@@ -0,0 +1,211 @@
+/*
+ * QEMU model of the CRF - Clock Reset FPD.
+ *
+ * Copyright (c) 2022 Xilinx Inc.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Written by Edgar E. Iglesias <edgar.iglesias@xilinx.com>
+ */
+#ifndef HW_MISC_XLNX_ZYNQMP_CRF_H
+#define HW_MISC_XLNX_ZYNQMP_CRF_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+
+#define TYPE_XLNX_ZYNQMP_CRF "xlnx.zynqmp_crf"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPCRF, XLNX_ZYNQMP_CRF)
+
+REG32(ERR_CTRL, 0x0)
+    FIELD(ERR_CTRL, SLVERR_ENABLE, 0, 1)
+REG32(IR_STATUS, 0x4)
+    FIELD(IR_STATUS, ADDR_DECODE_ERR, 0, 1)
+REG32(IR_MASK, 0x8)
+    FIELD(IR_MASK, ADDR_DECODE_ERR, 0, 1)
+REG32(IR_ENABLE, 0xc)
+    FIELD(IR_ENABLE, ADDR_DECODE_ERR, 0, 1)
+REG32(IR_DISABLE, 0x10)
+    FIELD(IR_DISABLE, ADDR_DECODE_ERR, 0, 1)
+REG32(CRF_WPROT, 0x1c)
+    FIELD(CRF_WPROT, ACTIVE, 0, 1)
+REG32(APLL_CTRL, 0x20)
+    FIELD(APLL_CTRL, POST_SRC, 24, 3)
+    FIELD(APLL_CTRL, PRE_SRC, 20, 3)
+    FIELD(APLL_CTRL, CLKOUTDIV, 17, 1)
+    FIELD(APLL_CTRL, DIV2, 16, 1)
+    FIELD(APLL_CTRL, FBDIV, 8, 7)
+    FIELD(APLL_CTRL, BYPASS, 3, 1)
+    FIELD(APLL_CTRL, RESET, 0, 1)
+REG32(APLL_CFG, 0x24)
+    FIELD(APLL_CFG, LOCK_DLY, 25, 7)
+    FIELD(APLL_CFG, LOCK_CNT, 13, 10)
+    FIELD(APLL_CFG, LFHF, 10, 2)
+    FIELD(APLL_CFG, CP, 5, 4)
+    FIELD(APLL_CFG, RES, 0, 4)
+REG32(APLL_FRAC_CFG, 0x28)
+    FIELD(APLL_FRAC_CFG, ENABLED, 31, 1)
+    FIELD(APLL_FRAC_CFG, SEED, 22, 3)
+    FIELD(APLL_FRAC_CFG, ALGRTHM, 19, 1)
+    FIELD(APLL_FRAC_CFG, ORDER, 18, 1)
+    FIELD(APLL_FRAC_CFG, DATA, 0, 16)
+REG32(DPLL_CTRL, 0x2c)
+    FIELD(DPLL_CTRL, POST_SRC, 24, 3)
+    FIELD(DPLL_CTRL, PRE_SRC, 20, 3)
+    FIELD(DPLL_CTRL, CLKOUTDIV, 17, 1)
+    FIELD(DPLL_CTRL, DIV2, 16, 1)
+    FIELD(DPLL_CTRL, FBDIV, 8, 7)
+    FIELD(DPLL_CTRL, BYPASS, 3, 1)
+    FIELD(DPLL_CTRL, RESET, 0, 1)
+REG32(DPLL_CFG, 0x30)
+    FIELD(DPLL_CFG, LOCK_DLY, 25, 7)
+    FIELD(DPLL_CFG, LOCK_CNT, 13, 10)
+    FIELD(DPLL_CFG, LFHF, 10, 2)
+    FIELD(DPLL_CFG, CP, 5, 4)
+    FIELD(DPLL_CFG, RES, 0, 4)
+REG32(DPLL_FRAC_CFG, 0x34)
+    FIELD(DPLL_FRAC_CFG, ENABLED, 31, 1)
+    FIELD(DPLL_FRAC_CFG, SEED, 22, 3)
+    FIELD(DPLL_FRAC_CFG, ALGRTHM, 19, 1)
+    FIELD(DPLL_FRAC_CFG, ORDER, 18, 1)
+    FIELD(DPLL_FRAC_CFG, DATA, 0, 16)
+REG32(VPLL_CTRL, 0x38)
+    FIELD(VPLL_CTRL, POST_SRC, 24, 3)
+    FIELD(VPLL_CTRL, PRE_SRC, 20, 3)
+    FIELD(VPLL_CTRL, CLKOUTDIV, 17, 1)
+    FIELD(VPLL_CTRL, DIV2, 16, 1)
+    FIELD(VPLL_CTRL, FBDIV, 8, 7)
+    FIELD(VPLL_CTRL, BYPASS, 3, 1)
+    FIELD(VPLL_CTRL, RESET, 0, 1)
+REG32(VPLL_CFG, 0x3c)
+    FIELD(VPLL_CFG, LOCK_DLY, 25, 7)
+    FIELD(VPLL_CFG, LOCK_CNT, 13, 10)
+    FIELD(VPLL_CFG, LFHF, 10, 2)
+    FIELD(VPLL_CFG, CP, 5, 4)
+    FIELD(VPLL_CFG, RES, 0, 4)
+REG32(VPLL_FRAC_CFG, 0x40)
+    FIELD(VPLL_FRAC_CFG, ENABLED, 31, 1)
+    FIELD(VPLL_FRAC_CFG, SEED, 22, 3)
+    FIELD(VPLL_FRAC_CFG, ALGRTHM, 19, 1)
+    FIELD(VPLL_FRAC_CFG, ORDER, 18, 1)
+    FIELD(VPLL_FRAC_CFG, DATA, 0, 16)
+REG32(PLL_STATUS, 0x44)
+    FIELD(PLL_STATUS, VPLL_STABLE, 5, 1)
+    FIELD(PLL_STATUS, DPLL_STABLE, 4, 1)
+    FIELD(PLL_STATUS, APLL_STABLE, 3, 1)
+    FIELD(PLL_STATUS, VPLL_LOCK, 2, 1)
+    FIELD(PLL_STATUS, DPLL_LOCK, 1, 1)
+    FIELD(PLL_STATUS, APLL_LOCK, 0, 1)
+REG32(APLL_TO_LPD_CTRL, 0x48)
+    FIELD(APLL_TO_LPD_CTRL, DIVISOR0, 8, 6)
+REG32(DPLL_TO_LPD_CTRL, 0x4c)
+    FIELD(DPLL_TO_LPD_CTRL, DIVISOR0, 8, 6)
+REG32(VPLL_TO_LPD_CTRL, 0x50)
+    FIELD(VPLL_TO_LPD_CTRL, DIVISOR0, 8, 6)
+REG32(ACPU_CTRL, 0x60)
+    FIELD(ACPU_CTRL, CLKACT_HALF, 25, 1)
+    FIELD(ACPU_CTRL, CLKACT_FULL, 24, 1)
+    FIELD(ACPU_CTRL, DIVISOR0, 8, 6)
+    FIELD(ACPU_CTRL, SRCSEL, 0, 3)
+REG32(DBG_TRACE_CTRL, 0x64)
+    FIELD(DBG_TRACE_CTRL, CLKACT, 24, 1)
+    FIELD(DBG_TRACE_CTRL, DIVISOR0, 8, 6)
+    FIELD(DBG_TRACE_CTRL, SRCSEL, 0, 3)
+REG32(DBG_FPD_CTRL, 0x68)
+    FIELD(DBG_FPD_CTRL, CLKACT, 24, 1)
+    FIELD(DBG_FPD_CTRL, DIVISOR0, 8, 6)
+    FIELD(DBG_FPD_CTRL, SRCSEL, 0, 3)
+REG32(DP_VIDEO_REF_CTRL, 0x70)
+    FIELD(DP_VIDEO_REF_CTRL, CLKACT, 24, 1)
+    FIELD(DP_VIDEO_REF_CTRL, DIVISOR1, 16, 6)
+    FIELD(DP_VIDEO_REF_CTRL, DIVISOR0, 8, 6)
+    FIELD(DP_VIDEO_REF_CTRL, SRCSEL, 0, 3)
+REG32(DP_AUDIO_REF_CTRL, 0x74)
+    FIELD(DP_AUDIO_REF_CTRL, CLKACT, 24, 1)
+    FIELD(DP_AUDIO_REF_CTRL, DIVISOR1, 16, 6)
+    FIELD(DP_AUDIO_REF_CTRL, DIVISOR0, 8, 6)
+    FIELD(DP_AUDIO_REF_CTRL, SRCSEL, 0, 3)
+REG32(DP_STC_REF_CTRL, 0x7c)
+    FIELD(DP_STC_REF_CTRL, CLKACT, 24, 1)
+    FIELD(DP_STC_REF_CTRL, DIVISOR1, 16, 6)
+    FIELD(DP_STC_REF_CTRL, DIVISOR0, 8, 6)
+    FIELD(DP_STC_REF_CTRL, SRCSEL, 0, 3)
+REG32(DDR_CTRL, 0x80)
+    FIELD(DDR_CTRL, CLKACT, 24, 1)
+    FIELD(DDR_CTRL, DIVISOR0, 8, 6)
+    FIELD(DDR_CTRL, SRCSEL, 0, 3)
+REG32(GPU_REF_CTRL, 0x84)
+    FIELD(GPU_REF_CTRL, PP1_CLKACT, 26, 1)
+    FIELD(GPU_REF_CTRL, PP0_CLKACT, 25, 1)
+    FIELD(GPU_REF_CTRL, CLKACT, 24, 1)
+    FIELD(GPU_REF_CTRL, DIVISOR0, 8, 6)
+    FIELD(GPU_REF_CTRL, SRCSEL, 0, 3)
+REG32(SATA_REF_CTRL, 0xa0)
+    FIELD(SATA_REF_CTRL, CLKACT, 24, 1)
+    FIELD(SATA_REF_CTRL, DIVISOR0, 8, 6)
+    FIELD(SATA_REF_CTRL, SRCSEL, 0, 3)
+REG32(PCIE_REF_CTRL, 0xb4)
+    FIELD(PCIE_REF_CTRL, CLKACT, 24, 1)
+    FIELD(PCIE_REF_CTRL, DIVISOR0, 8, 6)
+    FIELD(PCIE_REF_CTRL, SRCSEL, 0, 3)
+REG32(GDMA_REF_CTRL, 0xb8)
+    FIELD(GDMA_REF_CTRL, CLKACT, 24, 1)
+    FIELD(GDMA_REF_CTRL, DIVISOR0, 8, 6)
+    FIELD(GDMA_REF_CTRL, SRCSEL, 0, 3)
+REG32(DPDMA_REF_CTRL, 0xbc)
+    FIELD(DPDMA_REF_CTRL, CLKACT, 24, 1)
+    FIELD(DPDMA_REF_CTRL, DIVISOR0, 8, 6)
+    FIELD(DPDMA_REF_CTRL, SRCSEL, 0, 3)
+REG32(TOPSW_MAIN_CTRL, 0xc0)
+    FIELD(TOPSW_MAIN_CTRL, CLKACT, 24, 1)
+    FIELD(TOPSW_MAIN_CTRL, DIVISOR0, 8, 6)
+    FIELD(TOPSW_MAIN_CTRL, SRCSEL, 0, 3)
+REG32(TOPSW_LSBUS_CTRL, 0xc4)
+    FIELD(TOPSW_LSBUS_CTRL, CLKACT, 24, 1)
+    FIELD(TOPSW_LSBUS_CTRL, DIVISOR0, 8, 6)
+    FIELD(TOPSW_LSBUS_CTRL, SRCSEL, 0, 3)
+REG32(DBG_TSTMP_CTRL, 0xf8)
+    FIELD(DBG_TSTMP_CTRL, DIVISOR0, 8, 6)
+    FIELD(DBG_TSTMP_CTRL, SRCSEL, 0, 3)
+REG32(RST_FPD_TOP, 0x100)
+    FIELD(RST_FPD_TOP, PCIE_CFG_RESET, 19, 1)
+    FIELD(RST_FPD_TOP, PCIE_BRIDGE_RESET, 18, 1)
+    FIELD(RST_FPD_TOP, PCIE_CTRL_RESET, 17, 1)
+    FIELD(RST_FPD_TOP, DP_RESET, 16, 1)
+    FIELD(RST_FPD_TOP, SWDT_RESET, 15, 1)
+    FIELD(RST_FPD_TOP, AFI_FM5_RESET, 12, 1)
+    FIELD(RST_FPD_TOP, AFI_FM4_RESET, 11, 1)
+    FIELD(RST_FPD_TOP, AFI_FM3_RESET, 10, 1)
+    FIELD(RST_FPD_TOP, AFI_FM2_RESET, 9, 1)
+    FIELD(RST_FPD_TOP, AFI_FM1_RESET, 8, 1)
+    FIELD(RST_FPD_TOP, AFI_FM0_RESET, 7, 1)
+    FIELD(RST_FPD_TOP, GDMA_RESET, 6, 1)
+    FIELD(RST_FPD_TOP, GPU_PP1_RESET, 5, 1)
+    FIELD(RST_FPD_TOP, GPU_PP0_RESET, 4, 1)
+    FIELD(RST_FPD_TOP, GPU_RESET, 3, 1)
+    FIELD(RST_FPD_TOP, GT_RESET, 2, 1)
+    FIELD(RST_FPD_TOP, SATA_RESET, 1, 1)
+REG32(RST_FPD_APU, 0x104)
+    FIELD(RST_FPD_APU, ACPU3_PWRON_RESET, 13, 1)
+    FIELD(RST_FPD_APU, ACPU2_PWRON_RESET, 12, 1)
+    FIELD(RST_FPD_APU, ACPU1_PWRON_RESET, 11, 1)
+    FIELD(RST_FPD_APU, ACPU0_PWRON_RESET, 10, 1)
+    FIELD(RST_FPD_APU, APU_L2_RESET, 8, 1)
+    FIELD(RST_FPD_APU, ACPU3_RESET, 3, 1)
+    FIELD(RST_FPD_APU, ACPU2_RESET, 2, 1)
+    FIELD(RST_FPD_APU, ACPU1_RESET, 1, 1)
+    FIELD(RST_FPD_APU, ACPU0_RESET, 0, 1)
+REG32(RST_DDR_SS, 0x108)
+    FIELD(RST_DDR_SS, DDR_RESET, 3, 1)
+    FIELD(RST_DDR_SS, APM_RESET, 2, 1)
+
+#define CRF_R_MAX (R_RST_DDR_SS + 1)
+
+struct XlnxZynqMPCRF {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    qemu_irq irq_ir;
+
+    RegisterInfoArray *reg_array;
+    uint32_t regs[CRF_R_MAX];
+    RegisterInfo regs_info[CRF_R_MAX];
+};
+
+#endif
diff --git a/include/hw/net/allwinner-sun8i-emac.h b/include/hw/net/allwinner-sun8i-emac.h
new file mode 100644 (file)
index 0000000..185895f
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Allwinner Sun8i Ethernet MAC emulation
+ *
+ * Copyright (C) 2019 Niek Linnenbank <nieklinnenbank@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_NET_ALLWINNER_SUN8I_EMAC_H
+#define HW_NET_ALLWINNER_SUN8I_EMAC_H
+
+#include "qom/object.h"
+#include "net/net.h"
+#include "hw/sysbus.h"
+
+/**
+ * Object model
+ * @{
+ */
+
+#define TYPE_AW_SUN8I_EMAC "allwinner-sun8i-emac"
+OBJECT_DECLARE_SIMPLE_TYPE(AwSun8iEmacState, AW_SUN8I_EMAC)
+
+/** @} */
+
+/**
+ * Allwinner Sun8i EMAC object instance state
+ */
+struct AwSun8iEmacState {
+    /*< private >*/
+    SysBusDevice  parent_obj;
+    /*< public >*/
+
+    /** Maps I/O registers in physical memory */
+    MemoryRegion iomem;
+
+    /** Interrupt output signal to notify CPU */
+    qemu_irq     irq;
+
+    /** Memory region where DMA transfers are done */
+    MemoryRegion *dma_mr;
+
+    /** Address space used internally for DMA transfers */
+    AddressSpace dma_as;
+
+    /** Generic Network Interface Controller (NIC) for networking API */
+    NICState     *nic;
+
+    /** Generic Network Interface Controller (NIC) configuration */
+    NICConf      conf;
+
+    /**
+     * @name Media Independent Interface (MII)
+     * @{
+     */
+
+    uint8_t      mii_phy_addr;  /**< PHY address */
+    uint32_t     mii_cr;        /**< Control */
+    uint32_t     mii_st;        /**< Status */
+    uint32_t     mii_adv;       /**< Advertised Abilities */
+
+    /** @} */
+
+    /**
+     * @name Hardware Registers
+     * @{
+     */
+
+    uint32_t     basic_ctl0;    /**< Basic Control 0 */
+    uint32_t     basic_ctl1;    /**< Basic Control 1 */
+    uint32_t     int_en;        /**< Interrupt Enable */
+    uint32_t     int_sta;       /**< Interrupt Status */
+    uint32_t     frm_flt;       /**< Receive Frame Filter */
+
+    uint32_t     rx_ctl0;       /**< Receive Control 0 */
+    uint32_t     rx_ctl1;       /**< Receive Control 1 */
+    uint32_t     rx_desc_head;  /**< Receive Descriptor List Address */
+    uint32_t     rx_desc_curr;  /**< Current Receive Descriptor Address */
+
+    uint32_t     tx_ctl0;       /**< Transmit Control 0 */
+    uint32_t     tx_ctl1;       /**< Transmit Control 1 */
+    uint32_t     tx_desc_head;  /**< Transmit Descriptor List Address */
+    uint32_t     tx_desc_curr;  /**< Current Transmit Descriptor Address */
+    uint32_t     tx_flowctl;    /**< Transmit Flow Control */
+
+    uint32_t     mii_cmd;       /**< Management Interface Command */
+    uint32_t     mii_data;      /**< Management Interface Data */
+
+    /** @} */
+
+};
+
+#endif /* HW_NET_ALLWINNER_SUN8I_EMAC_H */
diff --git a/include/hw/net/allwinner_emac.h b/include/hw/net/allwinner_emac.h
new file mode 100644 (file)
index 0000000..534e748
--- /dev/null
@@ -0,0 +1,177 @@
+/*
+ * Emulation of Allwinner EMAC Fast Ethernet controller and
+ * Realtek RTL8201CP PHY
+ *
+ * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
+ *
+ * Allwinner EMAC register definitions from Linux kernel are:
+ *   Copyright 2012 Stefan Roese <sr@denx.de>
+ *   Copyright 2013 Maxime Ripard <maxime.ripard@free-electrons.com>
+ *   Copyright 1997 Sten Wang
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef ALLWINNER_EMAC_H
+#define ALLWINNER_EMAC_H
+
+#include "qemu/units.h"
+#include "net/net.h"
+#include "qemu/fifo8.h"
+#include "hw/net/mii.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_AW_EMAC "allwinner-emac"
+OBJECT_DECLARE_SIMPLE_TYPE(AwEmacState, AW_EMAC)
+
+/*
+ * Allwinner EMAC register list
+ */
+#define EMAC_CTL_REG            0x00
+
+#define EMAC_TX_MODE_REG        0x04
+#define EMAC_TX_FLOW_REG        0x08
+#define EMAC_TX_CTL0_REG        0x0C
+#define EMAC_TX_CTL1_REG        0x10
+#define EMAC_TX_INS_REG         0x14
+#define EMAC_TX_PL0_REG         0x18
+#define EMAC_TX_PL1_REG         0x1C
+#define EMAC_TX_STA_REG         0x20
+#define EMAC_TX_IO_DATA_REG     0x24
+#define EMAC_TX_IO_DATA1_REG    0x28
+#define EMAC_TX_TSVL0_REG       0x2C
+#define EMAC_TX_TSVH0_REG       0x30
+#define EMAC_TX_TSVL1_REG       0x34
+#define EMAC_TX_TSVH1_REG       0x38
+
+#define EMAC_RX_CTL_REG         0x3C
+#define EMAC_RX_HASH0_REG       0x40
+#define EMAC_RX_HASH1_REG       0x44
+#define EMAC_RX_STA_REG         0x48
+#define EMAC_RX_IO_DATA_REG     0x4C
+#define EMAC_RX_FBC_REG         0x50
+
+#define EMAC_INT_CTL_REG        0x54
+#define EMAC_INT_STA_REG        0x58
+
+#define EMAC_MAC_CTL0_REG       0x5C
+#define EMAC_MAC_CTL1_REG       0x60
+#define EMAC_MAC_IPGT_REG       0x64
+#define EMAC_MAC_IPGR_REG       0x68
+#define EMAC_MAC_CLRT_REG       0x6C
+#define EMAC_MAC_MAXF_REG       0x70
+#define EMAC_MAC_SUPP_REG       0x74
+#define EMAC_MAC_TEST_REG       0x78
+#define EMAC_MAC_MCFG_REG       0x7C
+#define EMAC_MAC_MCMD_REG       0x80
+#define EMAC_MAC_MADR_REG       0x84
+#define EMAC_MAC_MWTD_REG       0x88
+#define EMAC_MAC_MRDD_REG       0x8C
+#define EMAC_MAC_MIND_REG       0x90
+#define EMAC_MAC_SSRR_REG       0x94
+#define EMAC_MAC_A0_REG         0x98
+#define EMAC_MAC_A1_REG         0x9C
+#define EMAC_MAC_A2_REG         0xA0
+
+#define EMAC_SAFX_L_REG0        0xA4
+#define EMAC_SAFX_H_REG0        0xA8
+#define EMAC_SAFX_L_REG1        0xAC
+#define EMAC_SAFX_H_REG1        0xB0
+#define EMAC_SAFX_L_REG2        0xB4
+#define EMAC_SAFX_H_REG2        0xB8
+#define EMAC_SAFX_L_REG3        0xBC
+#define EMAC_SAFX_H_REG3        0xC0
+
+/* CTL register fields */
+#define EMAC_CTL_RESET                  (1 << 0)
+#define EMAC_CTL_TX_EN                  (1 << 1)
+#define EMAC_CTL_RX_EN                  (1 << 2)
+
+/* TX MODE register fields */
+#define EMAC_TX_MODE_ABORTED_FRAME_EN   (1 << 0)
+#define EMAC_TX_MODE_DMA_EN             (1 << 1)
+
+/* RX CTL register fields */
+#define EMAC_RX_CTL_AUTO_DRQ_EN         (1 << 1)
+#define EMAC_RX_CTL_DMA_EN              (1 << 2)
+#define EMAC_RX_CTL_PASS_ALL_EN         (1 << 4)
+#define EMAC_RX_CTL_PASS_CTL_EN         (1 << 5)
+#define EMAC_RX_CTL_PASS_CRC_ERR_EN     (1 << 6)
+#define EMAC_RX_CTL_PASS_LEN_ERR_EN     (1 << 7)
+#define EMAC_RX_CTL_PASS_LEN_OOR_EN     (1 << 8)
+#define EMAC_RX_CTL_ACCEPT_UNICAST_EN   (1 << 16)
+#define EMAC_RX_CTL_DA_FILTER_EN        (1 << 17)
+#define EMAC_RX_CTL_ACCEPT_MULTICAST_EN (1 << 20)
+#define EMAC_RX_CTL_HASH_FILTER_EN      (1 << 21)
+#define EMAC_RX_CTL_ACCEPT_BROADCAST_EN (1 << 22)
+#define EMAC_RX_CTL_SA_FILTER_EN        (1 << 24)
+#define EMAC_RX_CTL_SA_FILTER_INVERT_EN (1 << 25)
+
+/* RX IO DATA register fields */
+#define EMAC_RX_HEADER(len, status)     (((len) & 0xffff) | ((status) << 16))
+#define EMAC_RX_IO_DATA_STATUS_CRC_ERR  (1 << 4)
+#define EMAC_RX_IO_DATA_STATUS_LEN_ERR  (3 << 5)
+#define EMAC_RX_IO_DATA_STATUS_OK       (1 << 7)
+#define EMAC_UNDOCUMENTED_MAGIC         0x0143414d  /* header for RX frames */
+
+/* INT CTL and INT STA registers fields */
+#define EMAC_INT_TX_CHAN(x) (1 << (x))
+#define EMAC_INT_RX         (1 << 8)
+
+/* Due to lack of specifications, size of fifos is chosen arbitrarily */
+#define TX_FIFO_SIZE        (4 * KiB)
+#define RX_FIFO_SIZE        (32 * KiB)
+
+#define NUM_TX_FIFOS        2
+#define RX_HDR_SIZE         8
+#define CRC_SIZE            4
+
+#define PHY_REG_SHIFT       0
+#define PHY_ADDR_SHIFT      8
+
+typedef struct RTL8201CPState {
+    uint16_t bmcr;
+    uint16_t bmsr;
+    uint16_t anar;
+    uint16_t anlpar;
+} RTL8201CPState;
+
+struct AwEmacState {
+    /*< private >*/
+    SysBusDevice  parent_obj;
+    /*< public >*/
+
+    MemoryRegion   iomem;
+    qemu_irq       irq;
+    NICState       *nic;
+    NICConf        conf;
+    RTL8201CPState mii;
+    uint8_t        phy_addr;
+
+    uint32_t       ctl;
+    uint32_t       tx_mode;
+    uint32_t       rx_ctl;
+    uint32_t       int_ctl;
+    uint32_t       int_sta;
+    uint32_t       phy_target;
+
+    Fifo8          rx_fifo;
+    uint32_t       rx_num_packets;
+    uint32_t       rx_packet_size;
+    uint32_t       rx_packet_pos;
+
+    Fifo8          tx_fifo[NUM_TX_FIFOS];
+    uint32_t       tx_length[NUM_TX_FIFOS];
+    uint32_t       tx_channel;
+};
+
+#endif
diff --git a/include/hw/net/cadence_gem.h b/include/hw/net/cadence_gem.h
new file mode 100644 (file)
index 0000000..91ebb5c
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * QEMU Cadence GEM emulation
+ *
+ * Copyright (c) 2011 Xilinx, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef CADENCE_GEM_H
+#define CADENCE_GEM_H
+#include "qom/object.h"
+
+#define TYPE_CADENCE_GEM "cadence_gem"
+OBJECT_DECLARE_SIMPLE_TYPE(CadenceGEMState, CADENCE_GEM)
+
+#include "net/net.h"
+#include "hw/sysbus.h"
+
+#define CADENCE_GEM_MAXREG        (0x00000800 / 4) /* Last valid GEM address */
+
+/* Max number of words in a DMA descriptor.  */
+#define DESC_MAX_NUM_WORDS              6
+
+#define MAX_PRIORITY_QUEUES             8
+#define MAX_TYPE1_SCREENERS             16
+#define MAX_TYPE2_SCREENERS             16
+
+#define MAX_JUMBO_FRAME_SIZE_MASK 0x3FFF
+#define MAX_FRAME_SIZE MAX_JUMBO_FRAME_SIZE_MASK
+
+struct CadenceGEMState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    MemoryRegion *dma_mr;
+    AddressSpace dma_as;
+    NICState *nic;
+    NICConf conf;
+    qemu_irq irq[MAX_PRIORITY_QUEUES];
+
+    /* Static properties */
+    uint8_t num_priority_queues;
+    uint8_t num_type1_screeners;
+    uint8_t num_type2_screeners;
+    uint32_t revision;
+    uint16_t jumbo_max_len;
+
+    /* GEM registers backing store */
+    uint32_t regs[CADENCE_GEM_MAXREG];
+    /* Mask of register bits which are write only */
+    uint32_t regs_wo[CADENCE_GEM_MAXREG];
+    /* Mask of register bits which are read only */
+    uint32_t regs_ro[CADENCE_GEM_MAXREG];
+    /* Mask of register bits which are clear on read */
+    uint32_t regs_rtc[CADENCE_GEM_MAXREG];
+    /* Mask of register bits which are write 1 to clear */
+    uint32_t regs_w1c[CADENCE_GEM_MAXREG];
+
+    /* PHY address */
+    uint8_t phy_addr;
+    /* PHY registers backing store */
+    uint16_t phy_regs[32];
+
+    uint8_t phy_loop; /* Are we in phy loopback? */
+
+    /* The current DMA descriptor pointers */
+    uint32_t rx_desc_addr[MAX_PRIORITY_QUEUES];
+    uint32_t tx_desc_addr[MAX_PRIORITY_QUEUES];
+
+    uint8_t can_rx_state; /* Debug only */
+
+    uint8_t tx_packet[MAX_FRAME_SIZE];
+    uint8_t rx_packet[MAX_FRAME_SIZE];
+    uint32_t rx_desc[MAX_PRIORITY_QUEUES][DESC_MAX_NUM_WORDS];
+
+    bool sar_active[4];
+};
+
+#endif
diff --git a/include/hw/net/dp8393x.h b/include/hw/net/dp8393x.h
new file mode 100644 (file)
index 0000000..4a3f747
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * QEMU NS SONIC DP8393x netcard
+ *
+ * Copyright (c) 2008-2009 Herve Poussineau
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_NET_DP8393X_H
+#define HW_NET_DP8393X_H
+
+#include "hw/sysbus.h"
+#include "net/net.h"
+#include "exec/memory.h"
+
+#define SONIC_REG_COUNT  0x40
+
+#define TYPE_DP8393X "dp8393x"
+OBJECT_DECLARE_SIMPLE_TYPE(dp8393xState, DP8393X)
+
+struct dp8393xState {
+    SysBusDevice parent_obj;
+
+    /* Hardware */
+    uint8_t it_shift;
+    bool big_endian;
+    bool last_rba_is_full;
+    qemu_irq irq;
+    int irq_level;
+    QEMUTimer *watchdog;
+    int64_t wt_last_update;
+    NICConf conf;
+    NICState *nic;
+    MemoryRegion mmio;
+
+    /* Registers */
+    uint16_t cam[16][3];
+    uint16_t regs[SONIC_REG_COUNT];
+
+    /* Temporaries */
+    uint8_t tx_buffer[0x10000];
+    int loopback_packet;
+
+    /* Memory access */
+    MemoryRegion *dma_mr;
+    AddressSpace as;
+};
+
+#endif
diff --git a/include/hw/net/ftgmac100.h b/include/hw/net/ftgmac100.h
new file mode 100644 (file)
index 0000000..765d153
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Faraday FTGMAC100 Gigabit Ethernet
+ *
+ * Copyright (C) 2016-2017, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef FTGMAC100_H
+#define FTGMAC100_H
+#include "qom/object.h"
+
+#define TYPE_FTGMAC100 "ftgmac100"
+OBJECT_DECLARE_SIMPLE_TYPE(FTGMAC100State, FTGMAC100)
+
+#include "hw/sysbus.h"
+#include "net/net.h"
+
+/*
+ * Max frame size for the receiving buffer
+ */
+#define FTGMAC100_MAX_FRAME_SIZE    9220
+
+struct FTGMAC100State {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    NICState *nic;
+    NICConf conf;
+    qemu_irq irq;
+    MemoryRegion iomem;
+
+    uint8_t frame[FTGMAC100_MAX_FRAME_SIZE];
+
+    uint32_t irq_state;
+    uint32_t isr;
+    uint32_t ier;
+    uint32_t rx_enabled;
+    uint32_t rx_ring;
+    uint32_t rx_descriptor;
+    uint32_t tx_ring;
+    uint32_t tx_descriptor;
+    uint32_t math[2];
+    uint32_t rbsr;
+    uint32_t itc;
+    uint32_t aptcr;
+    uint32_t dblac;
+    uint32_t revr;
+    uint32_t fear1;
+    uint32_t tpafcr;
+    uint32_t maccr;
+    uint32_t phycr;
+    uint32_t phydata;
+    uint32_t fcr;
+
+
+    uint32_t phy_status;
+    uint32_t phy_control;
+    uint32_t phy_advertise;
+    uint32_t phy_int;
+    uint32_t phy_int_mask;
+
+    bool aspeed;
+    uint32_t txdes0_edotr;
+    uint32_t rxdes0_edorr;
+};
+
+#define TYPE_ASPEED_MII "aspeed-mmi"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedMiiState, ASPEED_MII)
+
+/*
+ * AST2600 MII controller
+ */
+struct AspeedMiiState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    FTGMAC100State *nic;
+
+    MemoryRegion iomem;
+    uint32_t phycr;
+    uint32_t phydata;
+};
+
+#endif
diff --git a/include/hw/net/imx_fec.h b/include/hw/net/imx_fec.h
new file mode 100644 (file)
index 0000000..2d13290
--- /dev/null
@@ -0,0 +1,282 @@
+/*
+ * i.MX FEC/ENET Ethernet Controller emulation.
+ *
+ * Copyright (c) 2013 Jean-Christophe Dubois. <jcd@tribudubois.net>
+ *
+ * Based on Coldfire Fast Ethernet Controller emulation.
+ *
+ * Copyright (c) 2007 CodeSourcery.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef IMX_FEC_H
+#define IMX_FEC_H
+#include "qom/object.h"
+
+#define TYPE_IMX_FEC "imx.fec"
+OBJECT_DECLARE_SIMPLE_TYPE(IMXFECState, IMX_FEC)
+
+#define TYPE_IMX_ENET "imx.enet"
+
+#include "hw/sysbus.h"
+#include "net/net.h"
+
+#define ENET_EIR               1
+#define ENET_EIMR              2
+#define ENET_RDAR              4
+#define ENET_TDAR              5
+#define ENET_ECR               9
+#define ENET_MMFR              16
+#define ENET_MSCR              17
+#define ENET_MIBC              25
+#define ENET_RCR               33
+#define ENET_TCR               49
+#define ENET_PALR              57
+#define ENET_PAUR              58
+#define ENET_OPD               59
+#define ENET_IAUR              70
+#define ENET_IALR              71
+#define ENET_GAUR              72
+#define ENET_GALR              73
+#define ENET_TFWR              81
+#define ENET_FRBR              83
+#define ENET_FRSR              84
+#define ENET_TDSR1             89
+#define ENET_TDSR2             92
+#define ENET_RDSR              96
+#define ENET_TDSR              97
+#define ENET_MRBR              98
+#define ENET_RSFL              100
+#define ENET_RSEM              101
+#define ENET_RAEM              102
+#define ENET_RAFL              103
+#define ENET_TSEM              104
+#define ENET_TAEM              105
+#define ENET_TAFL              106
+#define ENET_TIPG              107
+#define ENET_FTRL              108
+#define ENET_TACC              112
+#define ENET_RACC              113
+#define ENET_TDAR1             121
+#define ENET_TDAR2             123
+#define ENET_MIIGSK_CFGR       192
+#define ENET_MIIGSK_ENR        194
+#define ENET_ATCR              256
+#define ENET_ATVR              257
+#define ENET_ATOFF             258
+#define ENET_ATPER             259
+#define ENET_ATCOR             260
+#define ENET_ATINC             261
+#define ENET_ATSTMP            262
+#define ENET_TGSR              385
+#define ENET_TCSR0             386
+#define ENET_TCCR0             387
+#define ENET_TCSR1             388
+#define ENET_TCCR1             389
+#define ENET_TCSR2             390
+#define ENET_TCCR2             391
+#define ENET_TCSR3             392
+#define ENET_TCCR3             393
+#define ENET_MAX               400
+
+
+/* EIR and EIMR */
+#define ENET_INT_HB            (1 << 31)
+#define ENET_INT_BABR          (1 << 30)
+#define ENET_INT_BABT          (1 << 29)
+#define ENET_INT_GRA           (1 << 28)
+#define ENET_INT_TXF           (1 << 27)
+#define ENET_INT_TXB           (1 << 26)
+#define ENET_INT_RXF           (1 << 25)
+#define ENET_INT_RXB           (1 << 24)
+#define ENET_INT_MII           (1 << 23)
+#define ENET_INT_EBERR         (1 << 22)
+#define ENET_INT_LC            (1 << 21)
+#define ENET_INT_RL            (1 << 20)
+#define ENET_INT_UN            (1 << 19)
+#define ENET_INT_PLR           (1 << 18)
+#define ENET_INT_WAKEUP        (1 << 17)
+#define ENET_INT_TS_AVAIL      (1 << 16)
+#define ENET_INT_TS_TIMER      (1 << 15)
+#define ENET_INT_TXF2          (1 <<  7)
+#define ENET_INT_TXB2          (1 <<  6)
+#define ENET_INT_TXF1          (1 <<  3)
+#define ENET_INT_TXB1          (1 <<  2)
+
+#define ENET_INT_MAC           (ENET_INT_HB | ENET_INT_BABR | ENET_INT_BABT | \
+                                ENET_INT_GRA | ENET_INT_TXF | ENET_INT_TXB | \
+                                ENET_INT_RXF | ENET_INT_RXB | ENET_INT_MII | \
+                                ENET_INT_EBERR | ENET_INT_LC | ENET_INT_RL | \
+                                ENET_INT_UN | ENET_INT_PLR | ENET_INT_WAKEUP | \
+                                ENET_INT_TS_AVAIL | ENET_INT_TXF1 | \
+                                ENET_INT_TXB1 | ENET_INT_TXF2 | ENET_INT_TXB2)
+
+/* RDAR */
+#define ENET_RDAR_RDAR         (1 << 24)
+
+/* TDAR */
+#define ENET_TDAR_TDAR         (1 << 24)
+
+/* ECR */
+#define ENET_ECR_RESET         (1 << 0)
+#define ENET_ECR_ETHEREN       (1 << 1)
+#define ENET_ECR_MAGICEN       (1 << 2)
+#define ENET_ECR_SLEEP         (1 << 3)
+#define ENET_ECR_EN1588        (1 << 4)
+#define ENET_ECR_SPEED         (1 << 5)
+#define ENET_ECR_DBGEN         (1 << 6)
+#define ENET_ECR_STOPEN        (1 << 7)
+#define ENET_ECR_DSBWP         (1 << 8)
+
+/* MIBC */
+#define ENET_MIBC_MIB_DIS      (1 << 31)
+#define ENET_MIBC_MIB_IDLE     (1 << 30)
+#define ENET_MIBC_MIB_CLEAR    (1 << 29)
+
+/* RCR */
+#define ENET_RCR_LOOP          (1 << 0)
+#define ENET_RCR_DRT           (1 << 1)
+#define ENET_RCR_MII_MODE      (1 << 2)
+#define ENET_RCR_PROM          (1 << 3)
+#define ENET_RCR_BC_REJ        (1 << 4)
+#define ENET_RCR_FCE           (1 << 5)
+#define ENET_RCR_RGMII_EN      (1 << 6)
+#define ENET_RCR_RMII_MODE     (1 << 8)
+#define ENET_RCR_RMII_10T      (1 << 9)
+#define ENET_RCR_PADEN         (1 << 12)
+#define ENET_RCR_PAUFWD        (1 << 13)
+#define ENET_RCR_CRCFWD        (1 << 14)
+#define ENET_RCR_CFEN          (1 << 15)
+#define ENET_RCR_MAX_FL_SHIFT  (16)
+#define ENET_RCR_MAX_FL_LENGTH (14)
+#define ENET_RCR_NLC           (1 << 30)
+#define ENET_RCR_GRS           (1 << 31)
+
+#define ENET_MAX_FRAME_SIZE    (1 << ENET_RCR_MAX_FL_LENGTH)
+
+/* TCR */
+#define ENET_TCR_GTS           (1 << 0)
+#define ENET_TCR_FDEN          (1 << 2)
+#define ENET_TCR_TFC_PAUSE     (1 << 3)
+#define ENET_TCR_RFC_PAUSE     (1 << 4)
+#define ENET_TCR_ADDSEL_SHIFT  (5)
+#define ENET_TCR_ADDSEL_LENGTH (3)
+#define ENET_TCR_CRCFWD        (1 << 9)
+
+/* RDSR */
+#define ENET_TWFR_TFWR_SHIFT   (0)
+#define ENET_TWFR_TFWR_LENGTH  (6)
+#define ENET_TWFR_STRFWD       (1 << 8)
+
+#define ENET_RACC_SHIFT16      BIT(7)
+
+/* Buffer Descriptor.  */
+typedef struct {
+    uint16_t length;
+    uint16_t flags;
+    uint32_t data;
+} IMXFECBufDesc;
+
+#define ENET_BD_R              (1 << 15)
+#define ENET_BD_E              (1 << 15)
+#define ENET_BD_O1             (1 << 14)
+#define ENET_BD_W              (1 << 13)
+#define ENET_BD_O2             (1 << 12)
+#define ENET_BD_L              (1 << 11)
+#define ENET_BD_TC             (1 << 10)
+#define ENET_BD_ABC            (1 << 9)
+#define ENET_BD_M              (1 << 8)
+#define ENET_BD_BC             (1 << 7)
+#define ENET_BD_MC             (1 << 6)
+#define ENET_BD_LG             (1 << 5)
+#define ENET_BD_NO             (1 << 4)
+#define ENET_BD_CR             (1 << 2)
+#define ENET_BD_OV             (1 << 1)
+#define ENET_BD_TR             (1 << 0)
+
+typedef struct {
+    uint16_t length;
+    uint16_t flags;
+    uint32_t data;
+    uint16_t status;
+    uint16_t option;
+    uint16_t checksum;
+    uint16_t head_proto;
+    uint32_t last_buffer;
+    uint32_t timestamp;
+    uint32_t reserved[2];
+} IMXENETBufDesc;
+
+#define ENET_BD_ME             (1 << 15)
+#define ENET_BD_TX_INT         (1 << 14)
+#define ENET_BD_TS             (1 << 13)
+#define ENET_BD_PINS           (1 << 12)
+#define ENET_BD_IINS           (1 << 11)
+#define ENET_BD_PE             (1 << 10)
+#define ENET_BD_CE             (1 << 9)
+#define ENET_BD_UC             (1 << 8)
+#define ENET_BD_RX_INT         (1 << 7)
+
+#define ENET_BD_TXE            (1 << 15)
+#define ENET_BD_UE             (1 << 13)
+#define ENET_BD_EE             (1 << 12)
+#define ENET_BD_FE             (1 << 11)
+#define ENET_BD_LCE            (1 << 10)
+#define ENET_BD_OE             (1 << 9)
+#define ENET_BD_TSE            (1 << 8)
+#define ENET_BD_ICE            (1 << 5)
+#define ENET_BD_PCR            (1 << 4)
+#define ENET_BD_VLAN           (1 << 2)
+#define ENET_BD_IPV6           (1 << 1)
+#define ENET_BD_FRAG           (1 << 0)
+
+#define ENET_BD_BDU            (1 << 31)
+
+#define ENET_TX_RING_NUM       3
+
+#define FSL_IMX25_FEC_SIZE      0x4000
+
+struct IMXFECState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    NICState *nic;
+    NICConf conf;
+    qemu_irq irq[2];
+    MemoryRegion iomem;
+
+    uint32_t regs[ENET_MAX];
+    uint32_t rx_descriptor;
+
+    uint32_t tx_descriptor[ENET_TX_RING_NUM];
+    uint32_t tx_ring_num;
+
+    uint32_t phy_status;
+    uint32_t phy_control;
+    uint32_t phy_advertise;
+    uint32_t phy_int;
+    uint32_t phy_int_mask;
+    uint32_t phy_num;
+    bool phy_connected;
+    struct IMXFECState *phy_consumer;
+
+    bool is_fec;
+
+    /* Buffer used to assemble a Tx frame */
+    uint8_t frame[ENET_MAX_FRAME_SIZE];
+};
+
+#endif
diff --git a/include/hw/net/lan9118.h b/include/hw/net/lan9118.h
new file mode 100644 (file)
index 0000000..3d0c67f
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ * SMSC LAN9118 Ethernet interface emulation
+ *
+ * Copyright (c) 2009 CodeSourcery, LLC.
+ * Written by Paul Brook
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_NET_LAN9118_H
+#define HW_NET_LAN9118_H
+
+#include "net/net.h"
+
+#define TYPE_LAN9118 "lan9118"
+
+void lan9118_init(NICInfo *, uint32_t, qemu_irq);
+
+#endif
diff --git a/include/hw/net/lance.h b/include/hw/net/lance.h
new file mode 100644 (file)
index 0000000..f645d6a
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * QEMU Lance (Am7990) device emulation
+ *
+ * Copyright (c) 2004 Antony T Curtis
+ * Copyright (c) 2017 Mark Cave-Ayland
+ *
+ * This represents the Sparc32 lance (Am7990) ethernet device which is an
+ * earlier register-compatible member of the AMD PC-Net II (Am79C970A) family.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef LANCE_H
+#define LANCE_H
+
+#include "net/net.h"
+#include "hw/net/pcnet.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_LANCE "lance"
+typedef struct SysBusPCNetState SysBusPCNetState;
+DECLARE_INSTANCE_CHECKER(SysBusPCNetState, SYSBUS_PCNET,
+                         TYPE_LANCE)
+
+struct SysBusPCNetState {
+    SysBusDevice parent_obj;
+
+    PCNetState state;
+};
+
+#endif
diff --git a/include/hw/net/lasi_82596.h b/include/hw/net/lasi_82596.h
new file mode 100644 (file)
index 0000000..3ef2f47
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * QEMU LASI i82596 device emulation
+ *
+ * Copyright (c) 201 Helge Deller <deller@gmx.de>
+ *
+ */
+
+#ifndef LASI_82596_H
+#define LASI_82596_H
+
+#include "net/net.h"
+#include "hw/net/i82596.h"
+#include "hw/sysbus.h"
+
+#define TYPE_LASI_82596 "lasi_82596"
+typedef struct SysBusI82596State SysBusI82596State;
+DECLARE_INSTANCE_CHECKER(SysBusI82596State, SYSBUS_I82596,
+                         TYPE_LASI_82596)
+
+struct SysBusI82596State {
+    SysBusDevice parent_obj;
+
+    I82596State state;
+    uint16_t last_val;
+    int val_index:1;
+};
+
+SysBusI82596State *lasi_82596_init(MemoryRegion *addr_space,
+                                    hwaddr hpa, qemu_irq irq);
+
+#endif
diff --git a/include/hw/net/mii.h b/include/hw/net/mii.h
new file mode 100644 (file)
index 0000000..f7fedda
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * Common network MII address and register definitions.
+ *
+ * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
+ *
+ * Allwinner EMAC register definitions from Linux kernel are:
+ *   Copyright 2012 Stefan Roese <sr@denx.de>
+ *   Copyright 2013 Maxime Ripard <maxime.ripard@free-electrons.com>
+ *   Copyright 1997 Sten Wang
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef MII_H
+#define MII_H
+
+/* PHY registers */
+#define MII_BMCR            0  /* Basic mode control register */
+#define MII_BMSR            1  /* Basic mode status register */
+#define MII_PHYID1          2  /* ID register 1 */
+#define MII_PHYID2          3  /* ID register 2 */
+#define MII_ANAR            4  /* Autonegotiation advertisement */
+#define MII_ANLPAR          5  /* Autonegotiation lnk partner abilities */
+#define MII_ANER            6  /* Autonegotiation expansion */
+#define MII_ANNP            7  /* Autonegotiation next page */
+#define MII_ANLPRNP         8  /* Autonegotiation link partner rx next page */
+#define MII_CTRL1000        9  /* 1000BASE-T control */
+#define MII_STAT1000        10 /* 1000BASE-T status */
+#define MII_MDDACR          13 /* MMD access control */
+#define MII_MDDAADR         14 /* MMD access address data */
+#define MII_EXTSTAT         15 /* Extended Status */
+#define MII_NSR             16
+#define MII_LBREMR          17
+#define MII_REC             18
+#define MII_SNRDR           19
+#define MII_TEST            25
+
+/* PHY registers fields */
+#define MII_BMCR_RESET      (1 << 15)
+#define MII_BMCR_LOOPBACK   (1 << 14)
+#define MII_BMCR_SPEED100   (1 << 13)  /* LSB of Speed (100) */
+#define MII_BMCR_SPEED      MII_BMCR_SPEED100
+#define MII_BMCR_AUTOEN     (1 << 12) /* Autonegotiation enable */
+#define MII_BMCR_PDOWN      (1 << 11) /* Enable low power state */
+#define MII_BMCR_ISOLATE    (1 << 10) /* Isolate data paths from MII */
+#define MII_BMCR_ANRESTART  (1 << 9)  /* Auto negotiation restart */
+#define MII_BMCR_FD         (1 << 8)  /* Set duplex mode */
+#define MII_BMCR_CTST       (1 << 7)  /* Collision test */
+#define MII_BMCR_SPEED1000  (1 << 6)  /* MSB of Speed (1000) */
+
+#define MII_BMSR_100T4      (1 << 15) /* Can do 100mbps T4 */
+#define MII_BMSR_100TX_FD   (1 << 14) /* Can do 100mbps, full-duplex */
+#define MII_BMSR_100TX_HD   (1 << 13) /* Can do 100mbps, half-duplex */
+#define MII_BMSR_10T_FD     (1 << 12) /* Can do 10mbps, full-duplex */
+#define MII_BMSR_10T_HD     (1 << 11) /* Can do 10mbps, half-duplex */
+#define MII_BMSR_100T2_FD   (1 << 10) /* Can do 100mbps T2, full-duplex */
+#define MII_BMSR_100T2_HD   (1 << 9)  /* Can do 100mbps T2, half-duplex */
+#define MII_BMSR_EXTSTAT    (1 << 8)  /* Extended status in register 15 */
+#define MII_BMSR_MFPS       (1 << 6)  /* MII Frame Preamble Suppression */
+#define MII_BMSR_AN_COMP    (1 << 5)  /* Auto-negotiation complete */
+#define MII_BMSR_RFAULT     (1 << 4)  /* Remote fault */
+#define MII_BMSR_AUTONEG    (1 << 3)  /* Able to do auto-negotiation */
+#define MII_BMSR_LINK_ST    (1 << 2)  /* Link status */
+#define MII_BMSR_JABBER     (1 << 1)  /* Jabber detected */
+#define MII_BMSR_EXTCAP     (1 << 0)  /* Ext-reg capability */
+
+#define MII_ANAR_PAUSE_ASYM (1 << 11) /* Try for asymmetric pause */
+#define MII_ANAR_PAUSE      (1 << 10) /* Try for pause */
+#define MII_ANAR_TXFD       (1 << 8)
+#define MII_ANAR_TX         (1 << 7)
+#define MII_ANAR_10FD       (1 << 6)
+#define MII_ANAR_10         (1 << 5)
+#define MII_ANAR_CSMACD     (1 << 0)
+
+#define MII_ANLPAR_ACK      (1 << 14)
+#define MII_ANLPAR_PAUSEASY (1 << 11) /* can pause asymmetrically */
+#define MII_ANLPAR_PAUSE    (1 << 10) /* can pause */
+#define MII_ANLPAR_T4       (1 << 9)
+#define MII_ANLPAR_TXFD     (1 << 8)
+#define MII_ANLPAR_TX       (1 << 7)
+#define MII_ANLPAR_10FD     (1 << 6)
+#define MII_ANLPAR_10       (1 << 5)
+#define MII_ANLPAR_CSMACD   (1 << 0)
+
+#define MII_ANER_NP         (1 << 2)  /* Next Page Able */
+#define MII_ANER_NWAY       (1 << 0)  /* Can do N-way auto-nego */
+
+#define MII_ANNP_MP         (1 << 13) /* Message Page */
+
+#define MII_CTRL1000_MASTER (1 << 11) /* MASTER-SLAVE Manual Configuration Value */
+#define MII_CTRL1000_PORT   (1 << 10) /* T2_Repeater/DTE bit */
+#define MII_CTRL1000_FULL   (1 << 9)  /* 1000BASE-T full duplex */
+#define MII_CTRL1000_HALF   (1 << 8)  /* 1000BASE-T half duplex */
+
+#define MII_STAT1000_LOK    (1 << 13) /* Local Receiver Status */
+#define MII_STAT1000_ROK    (1 << 12) /* Remote Receiver Status */
+#define MII_STAT1000_FULL   (1 << 11) /* 1000BASE-T full duplex */
+#define MII_STAT1000_HALF   (1 << 10) /* 1000BASE-T half duplex */
+
+#define MII_EXTSTAT_1000T_FD (1 << 13) /* 1000BASE-T Full Duplex */
+#define MII_EXTSTAT_1000T_HD (1 << 12) /* 1000BASE-T Half Duplex */
+
+/* List of vendor identifiers */
+/* RealTek 8201 */
+#define RTL8201CP_PHYID1    0x0000
+#define RTL8201CP_PHYID2    0x8201
+
+/* RealTek 8211E */
+#define RTL8211E_PHYID1     0x001c
+#define RTL8211E_PHYID2     0xc915
+
+/* National Semiconductor DP83840 */
+#define DP83840_PHYID1      0x2000
+#define DP83840_PHYID2      0x5c01
+
+/* National Semiconductor DP83848 */
+#define DP83848_PHYID1      0x2000
+#define DP83848_PHYID2      0x5c90
+
+#endif /* MII_H */
diff --git a/include/hw/net/msf2-emac.h b/include/hw/net/msf2-emac.h
new file mode 100644 (file)
index 0000000..846ba6e
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * QEMU model of the Smartfusion2 Ethernet MAC.
+ *
+ * Copyright (c) 2020 Subbaraya Sundeep <sundeep.lkml@gmail.com>.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "hw/sysbus.h"
+#include "exec/memory.h"
+#include "net/net.h"
+#include "net/eth.h"
+#include "qom/object.h"
+
+#define TYPE_MSS_EMAC "msf2-emac"
+OBJECT_DECLARE_SIMPLE_TYPE(MSF2EmacState, MSS_EMAC)
+
+#define R_MAX         (0x1a0 / 4)
+#define PHY_MAX_REGS  32
+
+struct MSF2EmacState {
+    SysBusDevice parent;
+
+    MemoryRegion mmio;
+    MemoryRegion *dma_mr;
+    AddressSpace dma_as;
+
+    qemu_irq irq;
+    NICState *nic;
+    NICConf conf;
+
+    uint8_t mac_addr[ETH_ALEN];
+    uint32_t rx_desc;
+    uint16_t phy_regs[PHY_MAX_REGS];
+
+    uint32_t regs[R_MAX];
+};
diff --git a/include/hw/net/mv88w8618_eth.h b/include/hw/net/mv88w8618_eth.h
new file mode 100644 (file)
index 0000000..4107494
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Marvell MV88W8618 / Freecom MusicPal emulation.
+ *
+ * Copyright (c) 2008-2021 QEMU contributors
+ */
+
+#ifndef HW_NET_MV88W8618_ETH_H
+#define HW_NET_MV88W8618_ETH_H
+
+#define TYPE_MV88W8618_ETH "mv88w8618_eth"
+
+#endif
diff --git a/include/hw/net/ne2000-isa.h b/include/hw/net/ne2000-isa.h
new file mode 100644 (file)
index 0000000..af59ee0
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * QEMU NE2000 emulation -- isa bus windup
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_NET_NE2000_ISA_H
+#define HW_NET_NE2000_ISA_H
+
+#include "hw/isa/isa.h"
+#include "hw/qdev-properties.h"
+#include "net/net.h"
+#include "qapi/error.h"
+
+#define TYPE_ISA_NE2000 "ne2k_isa"
+
+static inline ISADevice *isa_ne2000_init(ISABus *bus, int base, int irq,
+                                         NICInfo *nd)
+{
+    ISADevice *d;
+
+    qemu_check_nic_model(nd, "ne2k_isa");
+
+    d = isa_try_new(TYPE_ISA_NE2000);
+    if (d) {
+        DeviceState *dev = DEVICE(d);
+
+        qdev_prop_set_uint32(dev, "iobase", base);
+        qdev_prop_set_uint32(dev, "irq",    irq);
+        qdev_set_nic_properties(dev, nd);
+        isa_realize_and_unref(d, bus, &error_fatal);
+    }
+    return d;
+}
+
+#endif
diff --git a/include/hw/net/npcm7xx_emc.h b/include/hw/net/npcm7xx_emc.h
new file mode 100644 (file)
index 0000000..b789007
--- /dev/null
@@ -0,0 +1,283 @@
+/*
+ * Nuvoton NPCM7xx EMC Module
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef NPCM7XX_EMC_H
+#define NPCM7XX_EMC_H
+
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "net/net.h"
+
+/* 32-bit register indices. */
+enum NPCM7xxPWMRegister {
+    /* Control registers. */
+    REG_CAMCMR,
+    REG_CAMEN,
+
+    /* There are 16 CAMn[ML] registers. */
+    REG_CAMM_BASE,
+    REG_CAML_BASE,
+    REG_CAMML_LAST = 0x21,
+
+    REG_TXDLSA = 0x22,
+    REG_RXDLSA,
+    REG_MCMDR,
+    REG_MIID,
+    REG_MIIDA,
+    REG_FFTCR,
+    REG_TSDR,
+    REG_RSDR,
+    REG_DMARFC,
+    REG_MIEN,
+
+    /* Status registers. */
+    REG_MISTA,
+    REG_MGSTA,
+    REG_MPCNT,
+    REG_MRPC,
+    REG_MRPCC,
+    REG_MREPC,
+    REG_DMARFS,
+    REG_CTXDSA,
+    REG_CTXBSA,
+    REG_CRXDSA,
+    REG_CRXBSA,
+
+    NPCM7XX_NUM_EMC_REGS,
+};
+
+/* REG_CAMCMR fields */
+/* Enable CAM Compare */
+#define REG_CAMCMR_ECMP (1 << 4)
+/* Complement CAM Compare */
+#define REG_CAMCMR_CCAM (1 << 3)
+/* Accept Broadcast Packet */
+#define REG_CAMCMR_ABP (1 << 2)
+/* Accept Multicast Packet */
+#define REG_CAMCMR_AMP (1 << 1)
+/* Accept Unicast Packet */
+#define REG_CAMCMR_AUP (1 << 0)
+
+/* REG_MCMDR fields */
+/* Software Reset */
+#define REG_MCMDR_SWR (1 << 24)
+/* Internal Loopback Select */
+#define REG_MCMDR_LBK (1 << 21)
+/* Operation Mode Select */
+#define REG_MCMDR_OPMOD (1 << 20)
+/* Enable MDC Clock Generation */
+#define REG_MCMDR_ENMDC (1 << 19)
+/* Full-Duplex Mode Select */
+#define REG_MCMDR_FDUP (1 << 18)
+/* Enable SQE Checking */
+#define REG_MCMDR_ENSEQ (1 << 17)
+/* Send PAUSE Frame */
+#define REG_MCMDR_SDPZ (1 << 16)
+/* No Defer */
+#define REG_MCMDR_NDEF (1 << 9)
+/* Frame Transmission On */
+#define REG_MCMDR_TXON (1 << 8)
+/* Strip CRC Checksum */
+#define REG_MCMDR_SPCRC (1 << 5)
+/* Accept CRC Error Packet */
+#define REG_MCMDR_AEP (1 << 4)
+/* Accept Control Packet */
+#define REG_MCMDR_ACP (1 << 3)
+/* Accept Runt Packet */
+#define REG_MCMDR_ARP (1 << 2)
+/* Accept Long Packet */
+#define REG_MCMDR_ALP (1 << 1)
+/* Frame Reception On */
+#define REG_MCMDR_RXON (1 << 0)
+
+/* REG_MIEN fields */
+/* Enable Transmit Descriptor Unavailable Interrupt */
+#define REG_MIEN_ENTDU (1 << 23)
+/* Enable Transmit Completion Interrupt */
+#define REG_MIEN_ENTXCP (1 << 18)
+/* Enable Transmit Interrupt */
+#define REG_MIEN_ENTXINTR (1 << 16)
+/* Enable Receive Descriptor Unavailable Interrupt */
+#define REG_MIEN_ENRDU (1 << 10)
+/* Enable Receive Good Interrupt */
+#define REG_MIEN_ENRXGD (1 << 4)
+/* Enable Receive Interrupt */
+#define REG_MIEN_ENRXINTR (1 << 0)
+
+/* REG_MISTA fields */
+/* TODO: Add error fields and support simulated errors? */
+/* Transmit Bus Error Interrupt */
+#define REG_MISTA_TXBERR (1 << 24)
+/* Transmit Descriptor Unavailable Interrupt */
+#define REG_MISTA_TDU (1 << 23)
+/* Transmit Completion Interrupt */
+#define REG_MISTA_TXCP (1 << 18)
+/* Transmit Interrupt */
+#define REG_MISTA_TXINTR (1 << 16)
+/* Receive Bus Error Interrupt */
+#define REG_MISTA_RXBERR (1 << 11)
+/* Receive Descriptor Unavailable Interrupt */
+#define REG_MISTA_RDU (1 << 10)
+/* DMA Early Notification Interrupt */
+#define REG_MISTA_DENI (1 << 9)
+/* Maximum Frame Length Interrupt */
+#define REG_MISTA_DFOI (1 << 8)
+/* Receive Good Interrupt */
+#define REG_MISTA_RXGD (1 << 4)
+/* Packet Too Long Interrupt */
+#define REG_MISTA_PTLE (1 << 3)
+/* Receive Interrupt */
+#define REG_MISTA_RXINTR (1 << 0)
+
+/* REG_MGSTA fields */
+/* Transmission Halted */
+#define REG_MGSTA_TXHA (1 << 11)
+/* Receive Halted */
+#define REG_MGSTA_RXHA (1 << 11)
+
+/* REG_DMARFC fields */
+/* Maximum Receive Frame Length */
+#define REG_DMARFC_RXMS(word) extract32((word), 0, 16)
+
+/* REG MIIDA fields */
+/* Busy Bit */
+#define REG_MIIDA_BUSY (1 << 17)
+
+/* Transmit and receive descriptors */
+typedef struct NPCM7xxEMCTxDesc NPCM7xxEMCTxDesc;
+typedef struct NPCM7xxEMCRxDesc NPCM7xxEMCRxDesc;
+
+struct NPCM7xxEMCTxDesc {
+    uint32_t flags;
+    uint32_t txbsa;
+    uint32_t status_and_length;
+    uint32_t ntxdsa;
+};
+
+struct NPCM7xxEMCRxDesc {
+    uint32_t status_and_length;
+    uint32_t rxbsa;
+    uint32_t reserved;
+    uint32_t nrxdsa;
+};
+
+/* NPCM7xxEMCTxDesc.flags values */
+/* Owner: 0 = cpu, 1 = emc */
+#define TX_DESC_FLAG_OWNER_MASK (1 << 31)
+/* Transmit interrupt enable */
+#define TX_DESC_FLAG_INTEN (1 << 2)
+/* CRC append */
+#define TX_DESC_FLAG_CRCAPP (1 << 1)
+/* Padding enable */
+#define TX_DESC_FLAG_PADEN (1 << 0)
+
+/* NPCM7xxEMCTxDesc.status_and_length values */
+/* Collision count */
+#define TX_DESC_STATUS_CCNT_SHIFT 28
+#define TX_DESC_STATUS_CCNT_BITSIZE 4
+/* SQE error */
+#define TX_DESC_STATUS_SQE (1 << 26)
+/* Transmission paused */
+#define TX_DESC_STATUS_PAU (1 << 25)
+/* P transmission halted */
+#define TX_DESC_STATUS_TXHA (1 << 24)
+/* Late collision */
+#define TX_DESC_STATUS_LC (1 << 23)
+/* Transmission abort */
+#define TX_DESC_STATUS_TXABT (1 << 22)
+/* No carrier sense */
+#define TX_DESC_STATUS_NCS (1 << 21)
+/* Defer exceed */
+#define TX_DESC_STATUS_EXDEF (1 << 20)
+/* Transmission complete */
+#define TX_DESC_STATUS_TXCP (1 << 19)
+/* Transmission deferred */
+#define TX_DESC_STATUS_DEF (1 << 17)
+/* Transmit interrupt */
+#define TX_DESC_STATUS_TXINTR (1 << 16)
+
+#define TX_DESC_PKT_LEN(word) extract32((word), 0, 16)
+
+/* Transmit buffer start address */
+#define TX_DESC_TXBSA(word) ((uint32_t) (word) & ~3u)
+
+/* Next transmit descriptor start address */
+#define TX_DESC_NTXDSA(word) ((uint32_t) (word) & ~3u)
+
+/* NPCM7xxEMCRxDesc.status_and_length values */
+/* Owner: 0b00 = cpu, 0b01 = undefined, 0b10 = emc, 0b11 = undefined */
+#define RX_DESC_STATUS_OWNER_SHIFT 30
+#define RX_DESC_STATUS_OWNER_BITSIZE 2
+#define RX_DESC_STATUS_OWNER_MASK (3 << RX_DESC_STATUS_OWNER_SHIFT)
+/* Runt packet */
+#define RX_DESC_STATUS_RP (1 << 22)
+/* Alignment error */
+#define RX_DESC_STATUS_ALIE (1 << 21)
+/* Frame reception complete */
+#define RX_DESC_STATUS_RXGD (1 << 20)
+/* Packet too long */
+#define RX_DESC_STATUS_PTLE (1 << 19)
+/* CRC error */
+#define RX_DESC_STATUS_CRCE (1 << 17)
+/* Receive interrupt */
+#define RX_DESC_STATUS_RXINTR (1 << 16)
+
+#define RX_DESC_PKT_LEN(word) extract32((word), 0, 16)
+
+/* Receive buffer start address */
+#define RX_DESC_RXBSA(word) ((uint32_t) (word) & ~3u)
+
+/* Next receive descriptor start address */
+#define RX_DESC_NRXDSA(word) ((uint32_t) (word) & ~3u)
+
+/* Minimum packet length, when TX_DESC_FLAG_PADEN is set. */
+#define MIN_PACKET_LENGTH 64
+
+struct NPCM7xxEMCState {
+    /*< private >*/
+    SysBusDevice parent;
+    /*< public >*/
+
+    MemoryRegion iomem;
+
+    qemu_irq tx_irq;
+    qemu_irq rx_irq;
+
+    NICState *nic;
+    NICConf conf;
+
+    /* 0 or 1, for log messages */
+    uint8_t emc_num;
+
+    uint32_t regs[NPCM7XX_NUM_EMC_REGS];
+
+    /*
+     * tx is active. Set to true by TSDR and then switches off when out of
+     * descriptors. If the TXON bit in REG_MCMDR is off then this is off.
+     */
+    bool tx_active;
+
+    /*
+     * rx is active. Set to true by RSDR and then switches off when out of
+     * descriptors. If the RXON bit in REG_MCMDR is off then this is off.
+     */
+    bool rx_active;
+};
+
+#define TYPE_NPCM7XX_EMC "npcm7xx-emc"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxEMCState, NPCM7XX_EMC)
+
+#endif /* NPCM7XX_EMC_H */
diff --git a/include/hw/net/smc91c111.h b/include/hw/net/smc91c111.h
new file mode 100644 (file)
index 0000000..df5b11d
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * SMSC 91C111 Ethernet interface emulation
+ *
+ * Copyright (c) 2005 CodeSourcery, LLC.
+ * Written by Paul Brook
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_NET_SMC91C111_H
+#define HW_NET_SMC91C111_H
+
+#include "net/net.h"
+
+void smc91c111_init(NICInfo *, uint32_t, qemu_irq);
+
+#endif
diff --git a/include/hw/net/xlnx-versal-canfd.h b/include/hw/net/xlnx-versal-canfd.h
new file mode 100644 (file)
index 0000000..ad3104d
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * QEMU model of the Xilinx Versal CANFD Controller.
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Written-by: Vikram Garhwal<vikram.garhwal@amd.com>
+ * Based on QEMU CANFD Device emulation implemented by Jin Yang, Deniz Eren and
+ * Pavel Pisa.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_CANFD_XILINX_H
+#define HW_CANFD_XILINX_H
+
+#include "hw/register.h"
+#include "hw/ptimer.h"
+#include "net/can_emu.h"
+#include "hw/qdev-clock.h"
+
+#define TYPE_XILINX_CANFD "xlnx.versal-canfd"
+
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCANFDState, XILINX_CANFD)
+
+#define NUM_REGS_PER_MSG_SPACE 18 /* 1 ID + 1 DLC + 16 Data(DW0 - DW15) regs. */
+#define MAX_NUM_RX             64
+#define OFFSET_RX1_DW15        (0x4144 / 4)
+#define CANFD_TIMER_MAX        0xFFFFUL
+#define CANFD_DEFAULT_CLOCK    (25 * 1000 * 1000)
+
+#define XLNX_VERSAL_CANFD_R_MAX (OFFSET_RX1_DW15 + \
+                    ((MAX_NUM_RX - 1) * NUM_REGS_PER_MSG_SPACE) + 1)
+
+typedef struct XlnxVersalCANFDState {
+    SysBusDevice            parent_obj;
+    MemoryRegion            iomem;
+
+    qemu_irq                irq_canfd_int;
+    qemu_irq                irq_addr_err;
+
+    RegisterInfo            reg_info[XLNX_VERSAL_CANFD_R_MAX];
+    RegisterAccessInfo      *tx_regs;
+    RegisterAccessInfo      *rx0_regs;
+    RegisterAccessInfo      *rx1_regs;
+    RegisterAccessInfo      *af_regs;
+    RegisterAccessInfo      *txe_regs;
+    RegisterAccessInfo      *rx_mailbox_regs;
+    RegisterAccessInfo      *af_mask_regs_mailbox;
+
+    uint32_t                regs[XLNX_VERSAL_CANFD_R_MAX];
+
+    ptimer_state            *canfd_timer;
+
+    CanBusClientState       bus_client;
+    CanBusState             *canfdbus;
+
+    struct {
+        uint8_t             rx0_fifo;
+        uint8_t             rx1_fifo;
+        uint8_t             tx_fifo;
+        bool                enable_rx_fifo1;
+        uint32_t            ext_clk_freq;
+   } cfg;
+
+} XlnxVersalCANFDState;
+
+typedef struct tx_ready_reg_info {
+    uint32_t can_id;
+    uint32_t reg_num;
+} tx_ready_reg_info;
+
+#endif
diff --git a/include/hw/net/xlnx-zynqmp-can.h b/include/hw/net/xlnx-zynqmp-can.h
new file mode 100644 (file)
index 0000000..fd2aa77
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * QEMU model of the Xilinx ZynqMP CAN controller.
+ *
+ * Copyright (c) 2020 Xilinx Inc.
+ *
+ * Written-by: Vikram Garhwal<fnu.vikram@xilinx.com>
+ *
+ * Based on QEMU CAN Device emulation implemented by Jin Yang, Deniz Eren and
+ * Pavel Pisa.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XLNX_ZYNQMP_CAN_H
+#define XLNX_ZYNQMP_CAN_H
+
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "net/can_emu.h"
+#include "net/can_host.h"
+#include "qemu/fifo32.h"
+#include "hw/ptimer.h"
+#include "hw/qdev-clock.h"
+
+#define TYPE_XLNX_ZYNQMP_CAN "xlnx.zynqmp-can"
+
+#define XLNX_ZYNQMP_CAN(obj) \
+     OBJECT_CHECK(XlnxZynqMPCANState, (obj), TYPE_XLNX_ZYNQMP_CAN)
+
+#define MAX_CAN_CTRLS      2
+#define XLNX_ZYNQMP_CAN_R_MAX     (0x84 / 4)
+#define MAILBOX_CAPACITY   64
+#define CAN_TIMER_MAX  0XFFFFUL
+#define CAN_DEFAULT_CLOCK (24 * 1000 * 1000)
+
+/* Each CAN_FRAME will have 4 * 32bit size. */
+#define CAN_FRAME_SIZE     4
+#define RXFIFO_SIZE        (MAILBOX_CAPACITY * CAN_FRAME_SIZE)
+
+typedef struct XlnxZynqMPCANState {
+    SysBusDevice        parent_obj;
+    MemoryRegion        iomem;
+
+    qemu_irq            irq;
+
+    CanBusClientState   bus_client;
+    CanBusState         *canbus;
+
+    struct {
+        uint32_t        ext_clk_freq;
+    } cfg;
+
+    RegisterInfo        reg_info[XLNX_ZYNQMP_CAN_R_MAX];
+    uint32_t            regs[XLNX_ZYNQMP_CAN_R_MAX];
+
+    Fifo32              rx_fifo;
+    Fifo32              tx_fifo;
+    Fifo32              txhpb_fifo;
+
+    ptimer_state        *can_timer;
+} XlnxZynqMPCANState;
+
+#endif
diff --git a/include/hw/nubus/mac-nubus-bridge.h b/include/hw/nubus/mac-nubus-bridge.h
new file mode 100644 (file)
index 0000000..be4dd83
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013-2018 Laurent Vivier <laurent@vivier.eu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_NUBUS_MAC_NUBUS_BRIDGE_H
+#define HW_NUBUS_MAC_NUBUS_BRIDGE_H
+
+#include "hw/nubus/nubus.h"
+#include "qom/object.h"
+
+#define MAC_NUBUS_FIRST_SLOT 0x9
+#define MAC_NUBUS_LAST_SLOT  0xe
+#define MAC_NUBUS_SLOT_NB    (MAC_NUBUS_LAST_SLOT - MAC_NUBUS_FIRST_SLOT + 1)
+
+#define TYPE_MAC_NUBUS_BRIDGE "mac-nubus-bridge"
+OBJECT_DECLARE_SIMPLE_TYPE(MacNubusBridge, MAC_NUBUS_BRIDGE)
+
+struct MacNubusBridge {
+    NubusBridge parent_obj;
+
+    MemoryRegion super_slot_alias;
+    MemoryRegion slot_alias;
+};
+
+#endif
diff --git a/include/hw/nubus/nubus.h b/include/hw/nubus/nubus.h
new file mode 100644 (file)
index 0000000..b3b4d2e
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2013-2018 Laurent Vivier <laurent@vivier.eu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_NUBUS_NUBUS_H
+#define HW_NUBUS_NUBUS_H
+
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "exec/address-spaces.h"
+#include "qom/object.h"
+#include "qemu/units.h"
+
+#define NUBUS_SUPER_SLOT_SIZE 0x10000000U
+#define NUBUS_SUPER_SLOT_NB   0xe
+
+#define NUBUS_SLOT_BASE       (NUBUS_SUPER_SLOT_SIZE * \
+                               (NUBUS_SUPER_SLOT_NB + 1))
+
+#define NUBUS_SLOT_SIZE       0x01000000
+#define NUBUS_FIRST_SLOT      0x0
+#define NUBUS_LAST_SLOT       0xf
+#define NUBUS_SLOT_NB         (NUBUS_LAST_SLOT - NUBUS_FIRST_SLOT + 1)
+
+#define NUBUS_IRQS            16
+
+#define TYPE_NUBUS_DEVICE "nubus-device"
+OBJECT_DECLARE_SIMPLE_TYPE(NubusDevice, NUBUS_DEVICE)
+
+#define TYPE_NUBUS_BUS "nubus-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(NubusBus, NUBUS_BUS)
+
+#define TYPE_NUBUS_BRIDGE "nubus-bridge"
+OBJECT_DECLARE_SIMPLE_TYPE(NubusBridge, NUBUS_BRIDGE);
+
+struct NubusBus {
+    BusState qbus;
+
+    AddressSpace nubus_as;
+    MemoryRegion nubus_mr;
+
+    MemoryRegion super_slot_io;
+    MemoryRegion slot_io;
+
+    uint16_t slot_available_mask;
+
+    qemu_irq irqs[NUBUS_IRQS];
+};
+
+#define NUBUS_DECL_ROM_MAX_SIZE    (128 * KiB)
+
+struct NubusDevice {
+    DeviceState qdev;
+
+    int32_t slot;
+    MemoryRegion super_slot_mem;
+    MemoryRegion slot_mem;
+
+    char *romfile;
+    MemoryRegion decl_rom;
+};
+
+void nubus_set_irq(NubusDevice *nd, int level);
+
+struct NubusBridge {
+    SysBusDevice parent_obj;
+
+    NubusBus bus;
+};
+
+#endif
diff --git a/include/hw/nvram/eeprom_at24c.h b/include/hw/nvram/eeprom_at24c.h
new file mode 100644 (file)
index 0000000..acb9857
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-only
+ */
+
+#ifndef EEPROM_AT24C_H
+#define EEPROM_AT24C_H
+
+#include "hw/i2c/i2c.h"
+
+/*
+ * Create and realize an AT24C EEPROM device on the heap.
+ * @bus: I2C bus to put it on
+ * @address: I2C address of the EEPROM slave when put on a bus
+ * @rom_size: size of the EEPROM
+ *
+ * Create the device state structure, initialize it, put it on the specified
+ * @bus, and drop the reference to it (the device is realized).
+ */
+I2CSlave *at24c_eeprom_init(I2CBus *bus, uint8_t address, uint32_t rom_size);
+
+
+/*
+ * Create and realize an AT24C EEPROM device on the heap with initial data.
+ * @bus: I2C bus to put it on
+ * @address: I2C address of the EEPROM slave when put on a bus
+ * @rom_size: size of the EEPROM
+ * @init_rom: Array of bytes to initialize EEPROM memory with
+ * @init_rom_size: Size of @init_rom, must be less than or equal to @rom_size
+ *
+ * Create the device state structure, initialize it, put it on the specified
+ * @bus, and drop the reference to it (the device is realized). Copies the data
+ * from @init_rom to the beginning of the EEPROM memory buffer.
+ */
+I2CSlave *at24c_eeprom_init_rom(I2CBus *bus, uint8_t address, uint32_t rom_size,
+                                const uint8_t *init_rom, uint32_t init_rom_size);
+
+#endif
index 8a9f5738bfa7cd4fc80e52393f2a10616b0e771f..c1f81a5f13a9f070f4a12e71c3e0d66b9c114f30 100644 (file)
@@ -308,6 +308,15 @@ void *fw_cfg_modify_file(FWCfgState *s, const char *filename, void *data,
 bool fw_cfg_add_from_generator(FWCfgState *s, const char *filename,
                                const char *gen_id, Error **errp);
 
+/**
+ * fw_cfg_add_extra_pci_roots:
+ * @bus: main pci root bus to be scanned from
+ * @s: fw_cfg device being modified
+ *
+ * Add a new fw_cfg item...
+ */
+void fw_cfg_add_extra_pci_roots(PCIBus *bus, FWCfgState *s);
+
 FWCfgState *fw_cfg_init_io_dma(uint32_t iobase, uint32_t dma_iobase,
                                 AddressSpace *dma_as);
 FWCfgState *fw_cfg_init_io(uint32_t iobase);
@@ -333,4 +342,25 @@ bool fw_cfg_dma_enabled(void *opaque);
  */
 const char *fw_cfg_arch_key_name(uint16_t key);
 
+/**
+ * load_image_to_fw_cfg() - Load an image file into an fw_cfg entry identified
+ *                          by key.
+ * @fw_cfg:         The firmware config instance to store the data in.
+ * @size_key:       The firmware config key to store the size of the loaded
+ *                  data under, with fw_cfg_add_i32().
+ * @data_key:       The firmware config key to store the loaded data under,
+ *                  with fw_cfg_add_bytes().
+ * @image_name:     The name of the image file to load. If it is NULL, the
+ *                  function returns without doing anything.
+ * @try_decompress: Whether the image should be decompressed (gunzipped) before
+ *                  adding it to fw_cfg. If decompression fails, the image is
+ *                  loaded as-is.
+ *
+ * In case of failure, the function prints an error message to stderr and the
+ * process exits with status 1.
+ */
+void load_image_to_fw_cfg(FWCfgState *fw_cfg, uint16_t size_key,
+                          uint16_t data_key, const char *image_name,
+                          bool try_decompress);
+
 #endif
diff --git a/include/hw/nvram/mac_nvram.h b/include/hw/nvram/mac_nvram.h
new file mode 100644 (file)
index 0000000..0c4dfae
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * PowerMac NVRAM emulation
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MAC_NVRAM_H
+#define MAC_NVRAM_H
+
+#include "exec/memory.h"
+#include "hw/sysbus.h"
+
+#define MACIO_NVRAM_SIZE 0x2000
+
+#define TYPE_MACIO_NVRAM "macio-nvram"
+OBJECT_DECLARE_SIMPLE_TYPE(MacIONVRAMState, MACIO_NVRAM)
+
+struct MacIONVRAMState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint32_t size;
+    uint32_t it_shift;
+
+    MemoryRegion mem;
+    uint8_t *data;
+    BlockBackend *blk;
+};
+
+void pmac_format_nvram_partition(MacIONVRAMState *nvr, int len);
+
+#endif /* MAC_NVRAM_H */
index 156bbd151ab7558c0a347ae50978a7bddcef62d5..ea4b5d0731039b53abc93a8ce3086053f3d2feba 100644 (file)
@@ -73,7 +73,7 @@ typedef struct NPCM7xxOTPClass NPCM7xxOTPClass;
  * Each nibble of data is encoded into a byte, so the number of bytes written
  * to the array will be @len * 2.
  */
-extern void npcm7xx_otp_array_write(NPCM7xxOTPState *s, const void *data,
-                                    unsigned int offset, unsigned int len);
+void npcm7xx_otp_array_write(NPCM7xxOTPState *s, const void *data,
+                             unsigned int offset, unsigned int len);
 
 #endif /* NPCM7XX_OTP_H */
diff --git a/include/hw/nvram/xlnx-bbram.h b/include/hw/nvram/xlnx-bbram.h
new file mode 100644 (file)
index 0000000..6fc13f8
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * QEMU model of the Xilinx BBRAM Battery Backed RAM
+ *
+ * Copyright (c) 2015-2021 Xilinx Inc.
+ *
+ * Written by Edgar E. Iglesias <edgari@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef XLNX_BBRAM_H
+#define XLNX_BBRAM_H
+
+#include "sysemu/block-backend.h"
+#include "hw/qdev-core.h"
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+
+#define RMAX_XLNX_BBRAM ((0x4c / 4) + 1)
+
+#define TYPE_XLNX_BBRAM "xlnx.bbram-ctrl"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxBBRam, XLNX_BBRAM);
+
+struct XlnxBBRam {
+    SysBusDevice parent_obj;
+    qemu_irq irq_bbram;
+
+    BlockBackend *blk;
+
+    uint32_t crc_zpads;
+    bool bbram8_wo;
+    bool blk_ro;
+
+    uint32_t regs[RMAX_XLNX_BBRAM];
+    RegisterInfo regs_info[RMAX_XLNX_BBRAM];
+};
+
+#endif
diff --git a/include/hw/nvram/xlnx-efuse.h b/include/hw/nvram/xlnx-efuse.h
new file mode 100644 (file)
index 0000000..58414e4
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * QEMU model of the Xilinx eFuse core
+ *
+ * Copyright (c) 2015 Xilinx Inc.
+ *
+ * Written by Edgar E. Iglesias <edgari@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XLNX_EFUSE_H
+#define XLNX_EFUSE_H
+
+#include "sysemu/block-backend.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_XLNX_EFUSE "xlnx,efuse"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxEFuse, XLNX_EFUSE);
+
+struct XlnxEFuse {
+    DeviceState parent_obj;
+    BlockBackend *blk;
+    bool blk_ro;
+    uint32_t *fuse32;
+
+    DeviceState *dev;
+
+    bool init_tbits;
+
+    uint8_t efuse_nr;
+    uint32_t efuse_size;
+
+    uint32_t *ro_bits;
+    uint32_t ro_bits_cnt;
+};
+
+/**
+ * xlnx_efuse_calc_crc:
+ * @data: an array of 32-bit words for which the CRC should be computed
+ * @u32_cnt: the array size in number of 32-bit words
+ * @zpads: the number of 32-bit zeros prepended to @data before computation
+ *
+ * This function is used to compute the CRC for an array of 32-bit words,
+ * using a Xilinx-specific data padding.
+ *
+ * Returns: the computed 32-bit CRC
+ */
+uint32_t xlnx_efuse_calc_crc(const uint32_t *data, unsigned u32_cnt,
+                             unsigned zpads);
+
+/**
+ * xlnx_efuse_get_bit:
+ * @s: the efuse object
+ * @bit: the efuse bit-address to read the data
+ *
+ * Returns: the bit, 0 or 1, at @bit of object @s
+ */
+bool xlnx_efuse_get_bit(XlnxEFuse *s, unsigned int bit);
+
+/**
+ * xlnx_efuse_set_bit:
+ * @s: the efuse object
+ * @bit: the efuse bit-address to be written a value of 1
+ *
+ * Returns: true on success, false on failure
+ */
+bool xlnx_efuse_set_bit(XlnxEFuse *s, unsigned int bit);
+
+/**
+ * xlnx_efuse_k256_check:
+ * @s: the efuse object
+ * @crc: the 32-bit CRC to be compared with
+ * @start: the efuse bit-address (which must be multiple of 32) of the
+ *         start of a 256-bit array
+ *
+ * This function computes the CRC of a 256-bit array starting at @start
+ * then compares to the given @crc
+ *
+ * Returns: true of @crc == computed, false otherwise
+ */
+bool xlnx_efuse_k256_check(XlnxEFuse *s, uint32_t crc, unsigned start);
+
+/**
+ * xlnx_efuse_tbits_check:
+ * @s: the efuse object
+ *
+ * This function inspects a number of efuse bits at specific addresses
+ * to see if they match a validation pattern. Each pattern is a group
+ * of 4 bits, and there are 3 groups.
+ *
+ * Returns: a 3-bit mask, where a bit of '1' means the corresponding
+ * group has a valid pattern.
+ */
+uint32_t xlnx_efuse_tbits_check(XlnxEFuse *s);
+
+/**
+ * xlnx_efuse_get_row:
+ * @s: the efuse object
+ * @bit: the efuse bit address for which a 32-bit value is read
+ *
+ * Returns: the entire 32 bits of the efuse, starting at a bit
+ * address that is multiple of 32 and contains the bit at @bit
+ */
+static inline uint32_t xlnx_efuse_get_row(XlnxEFuse *s, unsigned int bit)
+{
+    if (!(s->fuse32)) {
+        return 0;
+    } else {
+        unsigned int row_idx = bit / 32;
+
+        assert(row_idx < (s->efuse_size * s->efuse_nr / 32));
+        return s->fuse32[row_idx];
+    }
+}
+
+#endif
diff --git a/include/hw/nvram/xlnx-versal-efuse.h b/include/hw/nvram/xlnx-versal-efuse.h
new file mode 100644 (file)
index 0000000..a873dc5
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Xilinx Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef XLNX_VERSAL_EFUSE_H
+#define XLNX_VERSAL_EFUSE_H
+
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/nvram/xlnx-efuse.h"
+
+#define XLNX_VERSAL_EFUSE_CTRL_R_MAX ((0x100 / 4) + 1)
+
+#define TYPE_XLNX_VERSAL_EFUSE_CTRL  "xlnx,versal-efuse"
+#define TYPE_XLNX_VERSAL_EFUSE_CACHE "xlnx,pmc-efuse-cache"
+
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalEFuseCtrl, XLNX_VERSAL_EFUSE_CTRL);
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalEFuseCache, XLNX_VERSAL_EFUSE_CACHE);
+
+struct XlnxVersalEFuseCtrl {
+    SysBusDevice parent_obj;
+    qemu_irq irq_efuse_imr;
+
+    XlnxEFuse *efuse;
+
+    void *extra_pg0_lock_spec;      /* Opaque property */
+    uint32_t extra_pg0_lock_n16;
+
+    uint32_t regs[XLNX_VERSAL_EFUSE_CTRL_R_MAX];
+    RegisterInfo regs_info[XLNX_VERSAL_EFUSE_CTRL_R_MAX];
+};
+
+struct XlnxVersalEFuseCache {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+
+    XlnxEFuse *efuse;
+};
+
+/**
+ * xlnx_versal_efuse_read_row:
+ * @s: the efuse object
+ * @bit: the bit-address within the 32-bit row to be read
+ * @denied: if non-NULL, to receive true if the row is write-only
+ *
+ * Returns: the 32-bit word containing address @bit; 0 if @denies is true
+ */
+uint32_t xlnx_versal_efuse_read_row(XlnxEFuse *s, uint32_t bit, bool *denied);
+
+#endif
diff --git a/include/hw/nvram/xlnx-zynqmp-efuse.h b/include/hw/nvram/xlnx-zynqmp-efuse.h
new file mode 100644 (file)
index 0000000..6b051ec
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Xilinx Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef XLNX_ZYNQMP_EFUSE_H
+#define XLNX_ZYNQMP_EFUSE_H
+
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/nvram/xlnx-efuse.h"
+
+#define XLNX_ZYNQMP_EFUSE_R_MAX ((0x10fc / 4) + 1)
+
+#define TYPE_XLNX_ZYNQMP_EFUSE "xlnx,zynqmp-efuse"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPEFuse, XLNX_ZYNQMP_EFUSE);
+
+struct XlnxZynqMPEFuse {
+    SysBusDevice parent_obj;
+    qemu_irq irq;
+
+    XlnxEFuse *efuse;
+    uint32_t regs[XLNX_ZYNQMP_EFUSE_R_MAX];
+    RegisterInfo regs_info[XLNX_ZYNQMP_EFUSE_R_MAX];
+};
+
+#endif
diff --git a/include/hw/openrisc/boot.h b/include/hw/openrisc/boot.h
new file mode 100644 (file)
index 0000000..25a313d
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * QEMU OpenRISC boot helpers.
+ *
+ * Copyright (c) 2022 Stafford Horne <shorne@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef OPENRISC_BOOT_H
+#define OPENRISC_BOOT_H
+
+#include "exec/cpu-defs.h"
+
+hwaddr openrisc_load_kernel(ram_addr_t ram_size,
+                            const char *kernel_filename,
+                            uint32_t *bootstrap_pc);
+
+hwaddr openrisc_load_initrd(void *fdt, const char *filename,
+                            hwaddr load_start, uint64_t mem_size);
+
+uint32_t openrisc_load_fdt(void *fdt, hwaddr load_start,
+                           uint64_t mem_size);
+
+#endif /* OPENRISC_BOOT_H */
index f2f0a27381065df9ce69bf3d785a7dace608eeac..c0a42f3711252909416e5805dbc1f3bb0f1a19c5 100644 (file)
  */
 #define MAX_OR_LINES      48
 
-typedef struct OrIRQState qemu_or_irq;
-
-DECLARE_INSTANCE_CHECKER(qemu_or_irq, OR_IRQ,
-                         TYPE_OR_IRQ)
+OBJECT_DECLARE_SIMPLE_TYPE(OrIRQState, OR_IRQ)
 
 struct OrIRQState {
     DeviceState parent_obj;
diff --git a/include/hw/pci-bridge/cxl_upstream_port.h b/include/hw/pci-bridge/cxl_upstream_port.h
new file mode 100644 (file)
index 0000000..1263513
--- /dev/null
@@ -0,0 +1,19 @@
+
+#ifndef CXL_USP_H
+#define CXL_USP_H
+#include "hw/pci/pcie.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/cxl/cxl.h"
+
+typedef struct CXLUpstreamPort {
+    /*< private >*/
+    PCIEPort parent_obj;
+
+    /*< public >*/
+    CXLComponentState cxl_cstate;
+    CXLCCI swcci;
+    DOECap doe_cdat;
+    uint64_t sn;
+} CXLUpstreamPort;
+
+#endif /* CXL_SUP_H */
diff --git a/include/hw/pci-bridge/pci_expander_bridge.h b/include/hw/pci-bridge/pci_expander_bridge.h
new file mode 100644 (file)
index 0000000..0b3856d
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef PCI_EXPANDER_BRIDGE_H
+#define PCI_EXPANDER_BRIDGE_H
+
+#include "hw/cxl/cxl.h"
+
+void pxb_cxl_hook_up_registers(CXLState *state, PCIBus *bus, Error **errp);
+
+#endif /* PCI_EXPANDER_BRIDGE_H */
diff --git a/include/hw/pci-bridge/simba.h b/include/hw/pci-bridge/simba.h
new file mode 100644 (file)
index 0000000..979cb17
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * QEMU Simba PCI bridge
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ * Copyright (c) 2012,2013 Artyom Tarasenko
+ * Copyright (c) 2017 Mark Cave-Ayland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_PCI_BRIDGE_SIMBA_H
+#define HW_PCI_BRIDGE_SIMBA_H
+
+#include "hw/pci/pci_bridge.h"
+#include "qom/object.h"
+
+
+struct SimbaPCIBridge {
+    /*< private >*/
+    PCIBridge parent_obj;
+};
+
+#define TYPE_SIMBA_PCI_BRIDGE "pbm-bridge"
+OBJECT_DECLARE_SIMPLE_TYPE(SimbaPCIBridge, SIMBA_PCI_BRIDGE)
+
+#endif
diff --git a/include/hw/pci-bridge/xio3130_downstream.h b/include/hw/pci-bridge/xio3130_downstream.h
new file mode 100644 (file)
index 0000000..1d10139
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * TI X3130 pci express downstream port switch
+ *
+ * Copyright (C) 2022 Igor Mammedov <imammedo@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_PCI_BRIDGE_XIO3130_DOWNSTREAM_H
+#define HW_PCI_BRIDGE_XIO3130_DOWNSTREAM_H
+
+#define TYPE_XIO3130_DOWNSTREAM "xio3130-downstream"
+
+#endif
+
diff --git a/include/hw/pci-host/articia.h b/include/hw/pci-host/articia.h
new file mode 100644 (file)
index 0000000..529c240
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * Mai Logic Articia S emulation
+ *
+ * Copyright (c) 2023 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#ifndef ARTICIA_H
+#define ARTICIA_H
+
+#define TYPE_ARTICIA "articia"
+#define TYPE_ARTICIA_PCI_HOST "articia-pci-host"
+#define TYPE_ARTICIA_PCI_BRIDGE "articia-pci-bridge"
+
+#endif
diff --git a/include/hw/pci-host/astro.h b/include/hw/pci-host/astro.h
new file mode 100644 (file)
index 0000000..f63fd22
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * HP-PARISC Astro Bus connector with Elroy PCI host bridges
+ */
+
+#ifndef ASTRO_H
+#define ASTRO_H
+
+#include "hw/pci/pci_host.h"
+
+#define ASTRO_HPA               0xfed00000
+
+#define ROPES_PER_IOC           8       /* per Ike half or Pluto/Astro */
+
+#define TYPE_ASTRO_CHIP "astro-chip"
+OBJECT_DECLARE_SIMPLE_TYPE(AstroState, ASTRO_CHIP)
+
+#define TYPE_ELROY_PCI_HOST_BRIDGE "elroy-pcihost"
+OBJECT_DECLARE_SIMPLE_TYPE(ElroyState, ELROY_PCI_HOST_BRIDGE)
+
+#define ELROY_NUM               4 /* # of Elroys */
+#define ELROY_IRQS              8 /* IOSAPIC IRQs */
+
+/* ASTRO Memory and I/O regions */
+#define LMMIO_DIST_BASE_ADDR      0xf4000000ULL
+#define LMMIO_DIST_BASE_SIZE       0x4000000ULL
+
+#define IOS_DIST_BASE_ADDR      0xfffee00000ULL
+#define IOS_DIST_BASE_SIZE           0x10000ULL
+
+struct AstroState;
+
+struct ElroyState {
+    PCIHostState parent_obj;
+
+    /* parent Astro device */
+    struct AstroState *astro;
+
+    /* HPA of this Elroy */
+    hwaddr hpa;
+
+    /* PCI bus number (Elroy number) */
+    unsigned int pci_bus_num;
+
+    uint64_t config_address;
+    uint64_t config_reg_elroy;
+
+    uint64_t status_control;
+    uint64_t arb_mask;
+    uint64_t mmio_base[(0x0250 - 0x200) / 8];
+    uint64_t error_config;
+
+    uint32_t iosapic_reg_select;
+    uint64_t iosapic_reg[0x20];
+
+    uint32_t ilr;
+
+    MemoryRegion this_mem;
+
+    MemoryRegion pci_mmio;
+    MemoryRegion pci_mmio_alias;
+    MemoryRegion pci_hole;
+    MemoryRegion pci_io;
+};
+
+struct AstroState {
+    PCIHostState parent_obj;
+
+    uint64_t ioc_ctrl;
+    uint64_t ioc_status_ctrl;
+    uint64_t ioc_ranges[(0x03d8 - 0x300) / 8];
+    uint64_t ioc_rope_config;
+    uint64_t ioc_status_control;
+    uint64_t ioc_flush_control;
+    uint64_t ioc_rope_control[8];
+    uint64_t tlb_ibase;
+    uint64_t tlb_imask;
+    uint64_t tlb_pcom;
+    uint64_t tlb_tcnfg;
+    uint64_t tlb_pdir_base;
+
+    struct ElroyState *elroy[ELROY_NUM];
+
+    MemoryRegion this_mem;
+
+    MemoryRegion pci_mmio;
+    MemoryRegion pci_io;
+
+    IOMMUMemoryRegion iommu;
+    AddressSpace iommu_as;
+};
+
+#endif
diff --git a/include/hw/pci-host/bonito.h b/include/hw/pci-host/bonito.h
new file mode 100644 (file)
index 0000000..b8ecf78
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * QEMU Bonito64 north bridge support
+ *
+ * Copyright (c) 2008 yajin (yajin@vm-kernel.org)
+ * Copyright (c) 2010 Huacai Chen (zltjiangshi@gmail.com)
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_PCI_HOST_BONITO_H
+#define HW_PCI_HOST_BONITO_H
+
+#include "qom/object.h"
+
+#define TYPE_BONITO_PCI_HOST_BRIDGE "Bonito-pcihost"
+OBJECT_DECLARE_SIMPLE_TYPE(BonitoState, BONITO_PCI_HOST_BRIDGE)
+
+#endif
index 6d9b51ae67cd9bf52c2338842955eaf2f349dedf..908f3d946bebad1091404bd847ff037566f20aa6 100644 (file)
@@ -22,9 +22,6 @@
 #define DESIGNWARE_H
 
 #include "hw/sysbus.h"
-#include "hw/pci/pci.h"
-#include "hw/pci/pci_bus.h"
-#include "hw/pci/pcie_host.h"
 #include "hw/pci/pci_bridge.h"
 #include "qom/object.h"
 
diff --git a/include/hw/pci-host/dino.h b/include/hw/pci-host/dino.h
new file mode 100644 (file)
index 0000000..fd7975c
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * HP-PARISC Dino PCI chipset emulation, as in B160L and similar machines
+ *
+ * (C) 2017-2019 by Helge Deller <deller@gmx.de>
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ * Documentation available at:
+ * https://parisc.wiki.kernel.org/images-parisc/9/91/Dino_ers.pdf
+ * https://parisc.wiki.kernel.org/images-parisc/7/70/Dino_3_1_Errata.pdf
+ */
+
+#ifndef DINO_H
+#define DINO_H
+
+#include "hw/pci/pci_host.h"
+
+#define TYPE_DINO_PCI_HOST_BRIDGE "dino-pcihost"
+OBJECT_DECLARE_SIMPLE_TYPE(DinoState, DINO_PCI_HOST_BRIDGE)
+
+#define DINO_IAR0               0x004
+#define DINO_IODC               0x008
+#define DINO_IRR0               0x00C  /* RO */
+#define DINO_IAR1               0x010
+#define DINO_IRR1               0x014  /* RO */
+#define DINO_IMR                0x018
+#define DINO_IPR                0x01C
+#define DINO_TOC_ADDR           0x020
+#define DINO_ICR                0x024
+#define DINO_ILR                0x028  /* RO */
+#define DINO_IO_COMMAND         0x030  /* WO */
+#define DINO_IO_STATUS          0x034  /* RO */
+#define DINO_IO_CONTROL         0x038
+#define DINO_IO_GSC_ERR_RESP    0x040  /* RO */
+#define DINO_IO_ERR_INFO        0x044  /* RO */
+#define DINO_IO_PCI_ERR_RESP    0x048  /* RO */
+#define DINO_IO_FBB_EN          0x05c
+#define DINO_IO_ADDR_EN         0x060
+#define DINO_PCI_CONFIG_ADDR    0x064
+#define DINO_PCI_CONFIG_DATA    0x068
+#define DINO_PCI_IO_DATA        0x06c
+#define DINO_PCI_MEM_DATA       0x070  /* Dino 3.x only */
+#define DINO_GSC2X_CONFIG       0x7b4  /* RO */
+#define DINO_GMASK              0x800
+#define DINO_PAMR               0x804
+#define DINO_PAPR               0x808
+#define DINO_DAMODE             0x80c
+#define DINO_PCICMD             0x810
+#define DINO_PCISTS             0x814  /* R/WC */
+#define DINO_MLTIM              0x81c
+#define DINO_BRDG_FEAT          0x820
+#define DINO_PCIROR             0x824
+#define DINO_PCIWOR             0x828
+#define DINO_TLTIM              0x830
+
+#define DINO_IRQS         11      /* bits 0-10 are architected */
+#define DINO_IRR_MASK     0x5ff   /* only 10 bits are implemented */
+#define DINO_LOCAL_IRQS   (DINO_IRQS + 1)
+#define DINO_MASK_IRQ(x)  (1 << (x))
+
+#define DINO_IRQ_PCIINTA   0
+#define DINO_IRQ_PCIINTB   1
+#define DINO_IRQ_PCIINTC   2
+#define DINO_IRQ_PCIINTD   3
+#define DINO_IRQ_PCIINTE   4
+#define DINO_IRQ_PCIINTF   5
+#define DINO_IRQ_GSCEXTINT 6
+#define DINO_IRQ_BUSERRINT 7
+#define DINO_IRQ_PS2INT    8
+#define DINO_IRQ_UNUSED    9
+#define DINO_IRQ_RS232INT  10
+
+#define PCIINTA   0x001
+#define PCIINTB   0x002
+#define PCIINTC   0x004
+#define PCIINTD   0x008
+#define PCIINTE   0x010
+#define PCIINTF   0x020
+#define GSCEXTINT 0x040
+/* #define xxx       0x080 - bit 7 is "default" */
+/* #define xxx    0x100 - bit 8 not used */
+/* #define xxx    0x200 - bit 9 not used */
+#define RS232INT  0x400
+
+#define DINO_MEM_CHUNK_SIZE (8 * MiB)
+
+#define DINO800_REGS (1 + (DINO_TLTIM - DINO_GMASK) / 4)
+static const uint32_t reg800_keep_bits[DINO800_REGS] = {
+    MAKE_64BIT_MASK(0, 1),  /* GMASK */
+    MAKE_64BIT_MASK(0, 7),  /* PAMR */
+    MAKE_64BIT_MASK(0, 7),  /* PAPR */
+    MAKE_64BIT_MASK(0, 8),  /* DAMODE */
+    MAKE_64BIT_MASK(0, 7),  /* PCICMD */
+    MAKE_64BIT_MASK(0, 9),  /* PCISTS */
+    MAKE_64BIT_MASK(0, 32), /* Undefined */
+    MAKE_64BIT_MASK(0, 8),  /* MLTIM */
+    MAKE_64BIT_MASK(0, 30), /* BRDG_FEAT */
+    MAKE_64BIT_MASK(0, 24), /* PCIROR */
+    MAKE_64BIT_MASK(0, 22), /* PCIWOR */
+    MAKE_64BIT_MASK(0, 32), /* Undocumented */
+    MAKE_64BIT_MASK(0, 9),  /* TLTIM */
+};
+
+/* offsets to DINO HPA: */
+#define DINO_PCI_ADDR           0x064
+#define DINO_CONFIG_DATA        0x068
+#define DINO_IO_DATA            0x06c
+
+struct DinoState {
+    PCIHostState parent_obj;
+
+    /*
+     * PCI_CONFIG_ADDR is parent_obj.config_reg, via pci_host_conf_be_ops,
+     * so that we can map PCI_CONFIG_DATA to pci_host_data_be_ops.
+     */
+    uint32_t config_reg_dino; /* keep original copy, including 2 lowest bits */
+
+    uint32_t iar0;
+    uint32_t iar1;
+    uint32_t imr;
+    uint32_t ipr;
+    uint32_t icr;
+    uint32_t ilr;
+    uint32_t io_fbb_en;
+    uint32_t io_addr_en;
+    uint32_t io_control;
+    uint32_t toc_addr;
+
+    uint32_t reg800[DINO800_REGS];
+
+    MemoryRegion this_mem;
+    MemoryRegion pci_mem;
+    MemoryRegion pci_mem_alias[32];
+
+    MemoryRegion *memory_as;
+
+    AddressSpace bm_as;
+    MemoryRegion bm;
+    MemoryRegion bm_ram_alias;
+    MemoryRegion bm_pci_alias;
+    MemoryRegion bm_cpu_alias;
+
+    qemu_irq irqs[DINO_IRQS];
+};
+
+#endif
index d52ea80d4e04b2c60050c62055c246441a5ab1c3..b0240bd7681fb36db4e6b64ed056855e28c363de 100644 (file)
@@ -22,7 +22,7 @@
 
 #include "exec/hwaddr.h"
 #include "hw/sysbus.h"
-#include "hw/pci/pci.h"
+#include "hw/pci/pci_device.h"
 #include "hw/pci/pcie_host.h"
 #include "qom/object.h"
 
@@ -49,8 +49,12 @@ struct GPEXHost {
 
     MemoryRegion io_ioport;
     MemoryRegion io_mmio;
+    MemoryRegion io_ioport_window;
+    MemoryRegion io_mmio_window;
     qemu_irq irq[GPEX_NUM_IRQS];
     int irq_num[GPEX_NUM_IRQS];
+
+    bool allow_unmapped_accesses;
 };
 
 struct GPEXConfig {
@@ -59,6 +63,7 @@ struct GPEXConfig {
     MemMapEntry mmio64;
     MemMapEntry pio;
     int         irq;
+    PCIBus      *bus;
 };
 
 int gpex_set_irq_num(GPEXHost *s, int index, int gsi);
diff --git a/include/hw/pci-host/grackle.h b/include/hw/pci-host/grackle.h
new file mode 100644 (file)
index 0000000..7ad3a77
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * QEMU Grackle PCI host (heathrow OldWorld PowerMac)
+ *
+ * Copyright (c) 2006-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef GRACKLE_H
+#define GRACKLE_H
+
+#include "hw/pci/pci_host.h"
+
+#define TYPE_GRACKLE_PCI_HOST_BRIDGE "grackle-pcihost"
+OBJECT_DECLARE_SIMPLE_TYPE(GrackleState, GRACKLE_PCI_HOST_BRIDGE)
+
+struct GrackleState {
+    PCIHostState parent_obj;
+
+    uint32_t ofw_addr;
+    qemu_irq irqs[4];
+    MemoryRegion pci_mmio;
+    MemoryRegion pci_hole;
+    MemoryRegion pci_io;
+};
+
+#endif
index 6c16eaf876dd29c49b6c2a98aa155a12656611af..c988f708901ec9c66ae951ccd24509b85ba991bf 100644 (file)
 #ifndef HW_PCI_I440FX_H
 #define HW_PCI_I440FX_H
 
-#include "hw/hw.h"
-#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_device.h"
 #include "hw/pci-host/pam.h"
 #include "qom/object.h"
 
+#define I440FX_HOST_PROP_PCI_TYPE "pci-type"
+
 #define TYPE_I440FX_PCI_HOST_BRIDGE "i440FX-pcihost"
 #define TYPE_I440FX_PCI_DEVICE "i440FX"
 
@@ -26,26 +27,11 @@ struct PCII440FXState {
     PCIDevice parent_obj;
     /*< public >*/
 
-    MemoryRegion *system_memory;
-    MemoryRegion *pci_address_space;
-    MemoryRegion *ram_memory;
-    PAMMemoryRegion pam_regions[13];
+    PAMMemoryRegion pam_regions[PAM_REGIONS_COUNT];
     MemoryRegion smram_region;
     MemoryRegion smram, low_smram;
 };
 
 #define TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE "igd-passthrough-i440FX"
 
-PCIBus *i440fx_init(const char *host_type, const char *pci_type,
-                    PCII440FXState **pi440fx_state,
-                    MemoryRegion *address_space_mem,
-                    MemoryRegion *address_space_io,
-                    ram_addr_t ram_size,
-                    ram_addr_t below_4g_mem_size,
-                    ram_addr_t above_4g_mem_size,
-                    MemoryRegion *pci_memory,
-                    MemoryRegion *ram_memory);
-
-PCIBus *find_i440fx(void);
-
 #endif
diff --git a/include/hw/pci-host/ls7a.h b/include/hw/pci-host/ls7a.h
new file mode 100644 (file)
index 0000000..e753449
--- /dev/null
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU LoongArch CPU
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef HW_LS7A_H
+#define HW_LS7A_H
+
+#include "hw/pci-host/pam.h"
+#include "qemu/units.h"
+#include "qemu/range.h"
+#include "qom/object.h"
+
+#define VIRT_PCI_MEM_BASE        0x40000000UL
+#define VIRT_PCI_MEM_SIZE        0x40000000UL
+#define VIRT_PCI_IO_OFFSET       0x4000
+#define VIRT_PCI_CFG_BASE        0x20000000
+#define VIRT_PCI_CFG_SIZE        0x08000000
+#define VIRT_PCI_IO_BASE         0x18004000UL
+#define VIRT_PCI_IO_SIZE         0xC000
+
+#define VIRT_PCH_REG_BASE        0x10000000UL
+#define VIRT_IOAPIC_REG_BASE     (VIRT_PCH_REG_BASE)
+#define VIRT_PCH_MSI_ADDR_LOW    0x2FF00000UL
+
+/*
+ * GSI_BASE is hard-coded with 64 in linux kernel, else kernel fails to boot
+ * 0  - 15  GSI for ISA devices even if there is no ISA devices
+ * 16 - 63  GSI for CPU devices such as timers/perf monitor etc
+ * 64 -     GSI for external devices
+ */
+#define VIRT_PCH_PIC_IRQ_NUM     32
+#define VIRT_GSI_BASE            64
+#define VIRT_DEVICE_IRQS         16
+#define VIRT_UART_IRQ            (VIRT_GSI_BASE + 2)
+#define VIRT_UART_BASE           0x1fe001e0
+#define VIRT_UART_SIZE           0X100
+#define VIRT_RTC_IRQ             (VIRT_GSI_BASE + 3)
+#define VIRT_MISC_REG_BASE       (VIRT_PCH_REG_BASE + 0x00080000)
+#define VIRT_RTC_REG_BASE        (VIRT_MISC_REG_BASE + 0x00050100)
+#define VIRT_RTC_LEN             0x100
+#define VIRT_SCI_IRQ             (VIRT_GSI_BASE + 4)
+
+#define VIRT_PLATFORM_BUS_BASEADDRESS   0x16000000
+#define VIRT_PLATFORM_BUS_SIZE          0x2000000
+#define VIRT_PLATFORM_BUS_NUM_IRQS      2
+#define VIRT_PLATFORM_BUS_IRQ           (VIRT_GSI_BASE + 5)
+#endif
diff --git a/include/hw/pci-host/mv64361.h b/include/hw/pci-host/mv64361.h
new file mode 100644 (file)
index 0000000..9cdb35c
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef MV64361_H
+#define MV64361_H
+
+#define TYPE_MV64361 "mv64361"
+
+PCIBus *mv64361_get_pci_bus(DeviceState *dev, int n);
+
+#endif
index fec5cd35d60f58e92847bd6aec98aeaeb26fe3c2..005916f826be30cb72f76d5a2e4fea6ca6a4014e 100644 (file)
 #define SMRAM_C_BASE_SEG_MASK  ((uint8_t)0x7)
 #define SMRAM_C_BASE_SEG       ((uint8_t)0x2)  /* hardwired to b010 */
 
+#define PAM_REGIONS_COUNT       13
+
 typedef struct PAMMemoryRegion {
     MemoryRegion alias[4];  /* index = PAM value */
     unsigned current;
 } PAMMemoryRegion;
 
-void init_pam(DeviceState *dev, MemoryRegion *ram, MemoryRegion *system,
-              MemoryRegion *pci, PAMMemoryRegion *mem, uint32_t start, uint32_t size);
+void init_pam(PAMMemoryRegion *mem, Object *owner, MemoryRegion *ram,
+              MemoryRegion *system, MemoryRegion *pci,
+              uint32_t start, uint32_t size);
 void pam_update(PAMMemoryRegion *mem, int idx, uint8_t val);
 
 #endif /* QEMU_PAM_H */
index e2a2e3624532d5db52ea61a1261a5710cc19626f..d62b3091ac81df7a717c2e48d3c1f7e1f6fc6fa2 100644 (file)
 #ifndef PCI_HOST_PNV_PHB3_H
 #define PCI_HOST_PNV_PHB3_H
 
-#include "hw/pci/pcie_host.h"
-#include "hw/pci/pcie_port.h"
 #include "hw/ppc/xics.h"
 #include "qom/object.h"
+#include "hw/pci-host/pnv_phb.h"
 
 typedef struct PnvPHB3 PnvPHB3;
 
@@ -102,15 +101,16 @@ struct PnvPBCQState {
 };
 
 /*
- * PHB3 PCIe Root port
+ * PHB3 PCIe Root Bus
  */
-#define TYPE_PNV_PHB3_ROOT_BUS "pnv-phb3-root-bus"
+#define TYPE_PNV_PHB3_ROOT_BUS "pnv-phb3-root"
+struct PnvPHB3RootBus {
+    PCIBus parent;
 
-#define TYPE_PNV_PHB3_ROOT_PORT "pnv-phb3-root-port"
-
-typedef struct PnvPHB3RootPort {
-    PCIESlot parent_obj;
-} PnvPHB3RootPort;
+    uint32_t chip_id;
+    uint32_t phb_id;
+};
+OBJECT_DECLARE_SIMPLE_TYPE(PnvPHB3RootBus, PNV_PHB3_ROOT_BUS)
 
 /*
  * PHB3 PCIe Host Bridge for PowerNV machines (POWER8)
@@ -126,7 +126,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(PnvPHB3, PNV_PHB3)
 #define PCI_MMIO_TOTAL_SIZE   (0x1ull << 60)
 
 struct PnvPHB3 {
-    PCIExpressHost parent_obj;
+    DeviceState parent;
+
+    PnvPHB *phb_base;
 
     uint32_t chip_id;
     uint32_t phb_id;
@@ -154,14 +156,15 @@ struct PnvPHB3 {
 
     PnvPBCQState pbcq;
 
-    PnvPHB3RootPort root;
-
     QLIST_HEAD(, PnvPhb3DMASpace) dma_spaces;
+
+    PnvChip *chip;
 };
 
 uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size);
 void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size);
 void pnv_phb3_update_regions(PnvPHB3 *phb);
 void pnv_phb3_remap_irqs(PnvPHB3 *phb);
+void pnv_phb3_bus_init(DeviceState *dev, PnvPHB3 *phb);
 
 #endif /* PCI_HOST_PNV_PHB3_H */
index a174ef1f7045d4d042129e2b32cb5acdbf5b67ee..38f8ce9d7406cfa0b992528ffec439c1596fc550 100644 (file)
 
 #include "qemu/host-utils.h"
 
-/*
- * QEMU version of the GETFIELD/SETFIELD macros
- *
- * These are common with the PnvXive model.
- */
-static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
-{
-    return (word & mask) >> ctz64(mask);
-}
-
-static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
-                                uint64_t value)
-{
-    return (word & ~mask) | ((value << ctz64(mask)) & mask);
-}
-
 /*
  * PBCQ XSCOM registers
  */
index 27556ae5342574859929f288c164736676c7c51d..3212e68160ed14b2f225c6fe81db8344b666c124 100644 (file)
 #ifndef PCI_HOST_PNV_PHB4_H
 #define PCI_HOST_PNV_PHB4_H
 
-#include "hw/pci/pcie_host.h"
-#include "hw/pci/pcie_port.h"
+#include "hw/pci-host/pnv_phb.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/ppc/pnv.h"
 #include "hw/ppc/xive.h"
 #include "qom/object.h"
 
 typedef struct PnvPhb4PecStack PnvPhb4PecStack;
 typedef struct PnvPHB4 PnvPHB4;
-typedef struct PnvChip PnvChip;
 
 /*
  * We have one such address space wrapper per possible device under
@@ -44,14 +44,16 @@ typedef struct PnvPhb4DMASpace {
 } PnvPhb4DMASpace;
 
 /*
- * PHB4 PCIe Root port
+ * PHB4 PCIe Root Bus
  */
-#define TYPE_PNV_PHB4_ROOT_BUS "pnv-phb4-root-bus"
-#define TYPE_PNV_PHB4_ROOT_PORT "pnv-phb4-root-port"
+#define TYPE_PNV_PHB4_ROOT_BUS "pnv-phb4-root"
+struct PnvPHB4RootBus {
+    PCIBus parent;
 
-typedef struct PnvPHB4RootPort {
-    PCIESlot parent_obj;
-} PnvPHB4RootPort;
+    uint32_t chip_id;
+    uint32_t phb_id;
+};
+OBJECT_DECLARE_SIMPLE_TYPE(PnvPHB4RootBus, PNV_PHB4_ROOT_BUS)
 
 /*
  * PHB4 PCIe Host Bridge for PowerNV machines (POWER9)
@@ -76,15 +78,15 @@ OBJECT_DECLARE_SIMPLE_TYPE(PnvPHB4, PNV_PHB4)
 #define PCI_MMIO_TOTAL_SIZE        (0x1ull << 60)
 
 struct PnvPHB4 {
-    PCIExpressHost parent_obj;
+    DeviceState parent;
 
-    PnvPHB4RootPort root;
+    PnvPHB *phb_base;
 
     uint32_t chip_id;
     uint32_t phb_id;
 
-    uint64_t version;
-    uint16_t device_id;
+    /* The owner PEC */
+    PnvPhb4PecState *pec;
 
     char bus_path[8];
 
@@ -109,6 +111,29 @@ struct PnvPHB4 {
     MemoryRegion pci_mmio;
     MemoryRegion pci_io;
 
+    /* PCI registers (excluding pass-through) */
+#define PHB4_PEC_PCI_STK_REGS_COUNT  0xf
+    uint64_t pci_regs[PHB4_PEC_PCI_STK_REGS_COUNT];
+    MemoryRegion pci_regs_mr;
+
+    /* Nest registers */
+#define PHB4_PEC_NEST_STK_REGS_COUNT  0x18
+    uint64_t nest_regs[PHB4_PEC_NEST_STK_REGS_COUNT];
+    MemoryRegion nest_regs_mr;
+
+    /* PHB pass-through XSCOM */
+    MemoryRegion phb_regs_mr;
+
+    /* Memory windows from PowerBus to PHB */
+    MemoryRegion phbbar;
+    MemoryRegion intbar;
+    MemoryRegion mmbar0;
+    MemoryRegion mmbar1;
+    uint64_t mmio0_base;
+    uint64_t mmio0_size;
+    uint64_t mmio1_base;
+    uint64_t mmio1_size;
+
     /* On-chip IODA tables */
     uint64_t ioda_LIST[PNV_PHB4_MAX_LSIs];
     uint64_t ioda_MIST[PNV_PHB4_MAX_MIST];
@@ -127,13 +152,13 @@ struct PnvPHB4 {
     XiveSource xsrc;
     qemu_irq *qirqs;
 
-    PnvPhb4PecStack *stack;
-
     QLIST_HEAD(, PnvPhb4DMASpace) dma_spaces;
 };
 
 void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon);
-void pnv_phb4_update_regions(PnvPhb4PecStack *stack);
+int pnv_phb4_pec_get_phb_id(PnvPhb4PecState *pec, int stack_index);
+PnvPhb4PecState *pnv_pec_add_phb(PnvChip *chip, PnvPHB *phb, Error **errp);
+void pnv_phb4_bus_init(DeviceState *dev, PnvPHB4 *phb);
 extern const MemoryRegionOps pnv_phb4_xscom_ops;
 
 /*
@@ -142,46 +167,6 @@ extern const MemoryRegionOps pnv_phb4_xscom_ops;
 #define TYPE_PNV_PHB4_PEC "pnv-phb4-pec"
 OBJECT_DECLARE_TYPE(PnvPhb4PecState, PnvPhb4PecClass, PNV_PHB4_PEC)
 
-#define TYPE_PNV_PHB4_PEC_STACK "pnv-phb4-pec-stack"
-OBJECT_DECLARE_SIMPLE_TYPE(PnvPhb4PecStack, PNV_PHB4_PEC_STACK)
-
-/* Per-stack data */
-struct PnvPhb4PecStack {
-    DeviceState parent;
-
-    /* My own stack number */
-    uint32_t stack_no;
-
-    /* Nest registers */
-#define PHB4_PEC_NEST_STK_REGS_COUNT  0x17
-    uint64_t nest_regs[PHB4_PEC_NEST_STK_REGS_COUNT];
-    MemoryRegion nest_regs_mr;
-
-    /* PCI registers (excluding pass-through) */
-#define PHB4_PEC_PCI_STK_REGS_COUNT  0xf
-    uint64_t pci_regs[PHB4_PEC_PCI_STK_REGS_COUNT];
-    MemoryRegion pci_regs_mr;
-
-    /* PHB pass-through XSCOM */
-    MemoryRegion phb_regs_mr;
-
-    /* Memory windows from PowerBus to PHB */
-    MemoryRegion mmbar0;
-    MemoryRegion mmbar1;
-    MemoryRegion phbbar;
-    MemoryRegion intbar;
-    uint64_t mmio0_base;
-    uint64_t mmio0_size;
-    uint64_t mmio1_base;
-    uint64_t mmio1_size;
-
-    /* The owner PEC */
-    PnvPhb4PecState *pec;
-
-    /* The actual PHB */
-    PnvPHB4 phb;
-};
-
 struct PnvPhb4PecState {
     DeviceState parent;
 
@@ -189,22 +174,22 @@ struct PnvPhb4PecState {
     uint32_t index;
     uint32_t chip_id;
 
-    MemoryRegion *system_memory;
-
     /* Nest registers, excuding per-stack */
 #define PHB4_PEC_NEST_REGS_COUNT    0xf
     uint64_t nest_regs[PHB4_PEC_NEST_REGS_COUNT];
     MemoryRegion nest_regs_mr;
 
     /* PCI registers, excluding per-stack */
-#define PHB4_PEC_PCI_REGS_COUNT     0x2
+#define PHB4_PEC_PCI_REGS_COUNT     0x3
     uint64_t pci_regs[PHB4_PEC_PCI_REGS_COUNT];
     MemoryRegion pci_regs_mr;
 
-    /* Stacks */
-    #define PHB4_PEC_MAX_STACKS     3
-    uint32_t num_stacks;
-    PnvPhb4PecStack stacks[PHB4_PEC_MAX_STACKS];
+    /* PHBs */
+    uint32_t num_phbs;
+#define MAX_PHBS_PER_PEC        3
+    PnvPHB *phbs[MAX_PHBS_PER_PEC];
+
+    PnvChip *chip;
 };
 
 
@@ -219,6 +204,23 @@ struct PnvPhb4PecClass {
     int compat_size;
     const char *stk_compat;
     int stk_compat_size;
+    uint64_t version;
+    const char *phb_type;
+    const uint32_t *num_phbs;
 };
 
+/*
+ * POWER10 definitions
+ */
+
+#define TYPE_PNV_PHB5 "pnv-phb5"
+#define PNV_PHB5(obj) \
+    OBJECT_CHECK(PnvPhb4, (obj), TYPE_PNV_PHB5)
+
+#define PNV_PHB5_VERSION           0x000000a500000002ull
+
+#define TYPE_PNV_PHB5_PEC "pnv-phb5-pec"
+#define PNV_PHB5_PEC(obj) \
+    OBJECT_CHECK(PnvPhb4PecState, (obj), TYPE_PNV_PHB5_PEC)
+
 #endif /* PCI_HOST_PNV_PHB4_H */
index 55df2c3e5ece7bec80468e860ceb059b45822098..bea96f4d91aae54ac7779e925a84e6ad6ca170e3 100644 (file)
 #define   PEC_NEST_STK_BAR_EN_PHB               PPC_BIT(2)
 #define   PEC_NEST_STK_BAR_EN_INT               PPC_BIT(3)
 #define PEC_NEST_STK_DATA_FRZ_TYPE      0x15
-#define PEC_NEST_STK_PBCQ_TUN_BAR       0x16
+#define PEC_NEST_STK_PBCQ_SPARSE_PAGE   0x16 /* P10 */
+#define PEC_NEST_STK_PBCQ_CACHE_INJ     0x17 /* P10 */
 
 /* XSCOM PCI global registers */
 #define PEC_PCI_PBAIB_HW_CONFIG         0x00
+#define PEC_PCI_PBAIB_HW_OVR            0x01
 #define PEC_PCI_PBAIB_READ_STK_OVR      0x02
 
 /* XSCOM PCI per-stack registers */
 #define   PHB_PAPR_ERR_INJ_MASK_MMIO            PPC_BITMASK(16, 63)
 #define PHB_ETU_ERR_SUMMARY             0x2c8
 #define PHB_INT_NOTIFY_ADDR             0x300
+#define   PHB_INT_NOTIFY_ADDR_64K       PPC_BIT(1)   /* P10 */
 #define PHB_INT_NOTIFY_INDEX            0x308
 
 /* Fundamental register set B */
 #define PHB_VERSION                     0x800
 #define PHB_CTRLR                       0x810
+#define   PHB_CTRLR_IRQ_PQ_DISABLE      PPC_BIT(9)   /* P10 */
+#define   PHB_CTRLR_IRQ_ABT_MODE        PPC_BIT(10)  /* P10 */
 #define   PHB_CTRLR_IRQ_PGSZ_64K        PPC_BIT(11)
 #define   PHB_CTRLR_IRQ_STORE_EOI       PPC_BIT(12)
 #define   PHB_CTRLR_MMIO_RD_STRICT      PPC_BIT(13)
index bbb958176565d5898aeaa11f4e2fd84da3aa56ca..bafcbe675214b330985c1d604cd983268b581e66 100644 (file)
@@ -22,7 +22,7 @@
 #ifndef HW_Q35_H
 #define HW_Q35_H
 
-#include "hw/pci/pci.h"
+#include "hw/pci/pci_device.h"
 #include "hw/pci/pcie_host.h"
 #include "hw/pci-host/pam.h"
 #include "qemu/units.h"
@@ -44,7 +44,7 @@ struct MCHPCIState {
     MemoryRegion *pci_address_space;
     MemoryRegion *system_memory;
     MemoryRegion *address_space_io;
-    PAMMemoryRegion pam_regions[13];
+    PAMMemoryRegion pam_regions[PAM_REGIONS_COUNT];
     MemoryRegion smram_region, open_high_smram;
     MemoryRegion smram, low_smram, high_smram;
     MemoryRegion tseg_blackhole, tseg_window;
@@ -54,7 +54,6 @@ struct MCHPCIState {
     uint64_t below_4g_mem_size;
     uint64_t above_4g_mem_size;
     uint64_t pci_hole64_size;
-    uint32_t short_root_bus;
     uint16_t ext_tseg_mbytes;
 };
 
@@ -74,11 +73,6 @@ struct Q35PCIHost {
  * gmch part
  */
 
-#define MCH_HOST_PROP_RAM_MEM "ram-mem"
-#define MCH_HOST_PROP_PCI_MEM "pci-mem"
-#define MCH_HOST_PROP_SYSTEM_MEM "system-mem"
-#define MCH_HOST_PROP_IO_MEM "io-mem"
-
 /* PCI configuration */
 #define MCH_HOST_BRIDGE                        "MCH"
 
diff --git a/include/hw/pci-host/remote.h b/include/hw/pci-host/remote.h
new file mode 100644 (file)
index 0000000..690a01f
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * PCI Host for remote device
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PCI_HOST_REMOTE_H
+#define PCI_HOST_REMOTE_H
+
+#include "exec/memory.h"
+#include "hw/pci/pcie_host.h"
+
+#define TYPE_REMOTE_PCIHOST "remote-pcihost"
+OBJECT_DECLARE_SIMPLE_TYPE(RemotePCIHost, REMOTE_PCIHOST)
+
+struct RemotePCIHost {
+    /*< private >*/
+    PCIExpressHost parent_obj;
+    /*< public >*/
+
+    MemoryRegion *mr_pci_mem;
+    MemoryRegion *mr_sys_io;
+    MemoryRegion *mr_sys_mem;
+};
+
+#endif
index 01190241bbdefc310ce2ea3a65171fd4f18e0d4d..d12de84ea23a4427849ab7cf8b5727ddbc8cc8dc 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef HW_PCI_HOST_SABRE_H
 #define HW_PCI_HOST_SABRE_H
 
-#include "hw/pci/pci.h"
+#include "hw/pci/pci_device.h"
 #include "hw/pci/pci_host.h"
 #include "hw/sparc/sun4u_iommu.h"
 #include "qom/object.h"
index 4f58f0223b56c8f4aac9bdccf80dfb303a7c4a7f..3778aac27b603dcc7d4e61cf4a89730d8f830da6 100644 (file)
@@ -47,8 +47,6 @@ typedef struct SpaprPciLsi {
     uint32_t irq;
 } SpaprPciLsi;
 
-typedef struct SpaprPhbPciNvGpuConfig SpaprPhbPciNvGpuConfig;
-
 struct SpaprPhbState {
     PCIHostState parent_obj;
 
@@ -90,9 +88,6 @@ struct SpaprPhbState {
     uint32_t mig_liobn;
     hwaddr mig_mem_win_addr, mig_mem_win_size;
     hwaddr mig_io_win_addr, mig_io_win_size;
-    hwaddr nv2_gpa_win_addr;
-    hwaddr nv2_atsd_win_addr;
-    SpaprPhbPciNvGpuConfig *nvgpus;
     bool pre_5_1_assoc;
 };
 
@@ -112,22 +107,6 @@ struct SpaprPhbState {
 
 #define SPAPR_PCI_MSI_WINDOW         0x40000000000ULL
 
-#define SPAPR_PCI_NV2RAM64_WIN_BASE  SPAPR_PCI_LIMIT
-#define SPAPR_PCI_NV2RAM64_WIN_SIZE  (2 * TiB) /* For up to 6 GPUs 256GB each */
-
-/* Max number of these GPUsper a physical box */
-#define NVGPU_MAX_NUM                6
-/* Max number of NVLinks per GPU in any physical box */
-#define NVGPU_MAX_LINKS              3
-
-/*
- * GPU RAM starts at 64TiB so huge DMA window to cover it all ends at 128TiB
- * which is enough. We do not need DMA for ATSD so we put them at 128TiB.
- */
-#define SPAPR_PCI_NV2ATSD_WIN_BASE   (128 * TiB)
-#define SPAPR_PCI_NV2ATSD_WIN_SIZE   (NVGPU_MAX_NUM * NVGPU_MAX_LINKS * \
-                                      64 * KiB)
-
 int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
                  uint32_t intc_phandle, void *fdt, int *node_offset);
 
@@ -151,13 +130,6 @@ int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state);
 int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option);
 int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb);
 void spapr_phb_vfio_reset(DeviceState *qdev);
-void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp);
-void spapr_phb_nvgpu_free(SpaprPhbState *sphb);
-void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off,
-                                 Error **errp);
-void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt);
-void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
-                                        SpaprPhbState *sphb);
 #else
 static inline bool spapr_phb_eeh_available(SpaprPhbState *sphb)
 {
@@ -184,25 +156,6 @@ static inline int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
 static inline void spapr_phb_vfio_reset(DeviceState *qdev)
 {
 }
-static inline void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp)
-{
-}
-static inline void spapr_phb_nvgpu_free(SpaprPhbState *sphb)
-{
-}
-static inline void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt,
-                                               int bus_off, Error **errp)
-{
-}
-static inline void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb,
-                                                   void *fdt)
-{
-}
-static inline void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt,
-                                                      int offset,
-                                                      SpaprPhbState *sphb)
-{
-}
 #endif
 
 void spapr_phb_dma_reset(SpaprPhbState *sphb);
@@ -212,4 +165,6 @@ static inline unsigned spapr_phb_windows_supported(SpaprPhbState *sphb)
     return sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
 }
 
+char *spapr_pci_fw_dev_name(PCIDevice *dev);
+
 #endif /* PCI_HOST_SPAPR_H */
index 89be88d87fdd5328527c5e1583dec59baa892d86..e1b3c1c280466fbd72933dfda374c39b58d9d01d 100644 (file)
@@ -21,7 +21,6 @@
 #define HW_XILINX_PCIE_H
 
 #include "hw/sysbus.h"
-#include "hw/pci/pci.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci/pcie_host.h"
 #include "qom/object.h"
index 408768848643cc9ca29cfc6d9137a222ad25ecfc..abcfd13925219e96d0c9aa3530271f56e06ede5c 100644 (file)
@@ -21,7 +21,7 @@
 #ifndef QEMU_MSI_H
 #define QEMU_MSI_H
 
-#include "hw/pci/pci.h"
+#include "hw/pci/pci_device.h"
 
 struct MSIMessage {
     uint64_t address;
@@ -33,6 +33,7 @@ extern bool msi_nonbroken;
 void msi_set_message(PCIDevice *dev, MSIMessage msg);
 MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector);
 bool msi_enabled(const PCIDevice *dev);
+void msi_set_enabled(PCIDevice *dev);
 int msi_init(struct PCIDevice *dev, uint8_t offset,
              unsigned int nr_vectors, bool msi64bit,
              bool msi_per_vector_mask, Error **errp);
@@ -43,6 +44,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector);
 void msi_send_message(PCIDevice *dev, MSIMessage msg);
 void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len);
 unsigned int msi_nr_vectors_allocated(const PCIDevice *dev);
+void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp);
 
 static inline bool msi_present(const PCIDevice *dev)
 {
index 4c4a60c7399a0411c3c8c948f3be38e6af1d3913..0e6f257e4511e9e48a31bd71e0fa7d74a245c59f 100644 (file)
@@ -33,9 +33,10 @@ bool msix_is_masked(PCIDevice *dev, unsigned vector);
 void msix_set_pending(PCIDevice *dev, unsigned vector);
 void msix_clr_pending(PCIDevice *dev, int vector);
 
-int msix_vector_use(PCIDevice *dev, unsigned vector);
+void msix_vector_use(PCIDevice *dev, unsigned vector);
 void msix_vector_unuse(PCIDevice *dev, unsigned vector);
 void msix_unuse_all_vectors(PCIDevice *dev);
+void msix_set_mask(PCIDevice *dev, int vector, bool mask);
 
 void msix_notify(PCIDevice *dev, unsigned vector);
 
index 72ce649eee3604c003dc6dab2d2e728aac93e7f7..fa6313aabc43b81594ee95d1d17bd9653aeb7a69 100644 (file)
@@ -7,9 +7,6 @@
 /* PCI includes legacy ISA access.  */
 #include "hw/isa/isa.h"
 
-#include "hw/pci/pcie.h"
-#include "qom/object.h"
-
 extern bool pci_available;
 
 /* PCI bus */
@@ -19,6 +16,7 @@ extern bool pci_available;
 #define PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
 #define PCI_FUNC(devfn)         ((devfn) & 0x07)
 #define PCI_BUILD_BDF(bus, devfn)     ((bus << 8) | (devfn))
+#define PCI_BDF_TO_DEVFN(x)     ((x) & 0xff)
 #define PCI_BUS_MAX             256
 #define PCI_DEVFN_MAX           256
 #define PCI_SLOT_MAX            32
@@ -78,6 +76,7 @@ extern bool pci_available;
 #define PCI_SUBVENDOR_ID_REDHAT_QUMRANET 0x1af4
 #define PCI_SUBDEVICE_ID_QEMU            0x1100
 
+/* legacy virtio-pci devices */
 #define PCI_DEVICE_ID_VIRTIO_NET         0x1000
 #define PCI_DEVICE_ID_VIRTIO_BLOCK       0x1001
 #define PCI_DEVICE_ID_VIRTIO_BALLOON     0x1002
@@ -86,9 +85,15 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_VIRTIO_RNG         0x1005
 #define PCI_DEVICE_ID_VIRTIO_9P          0x1009
 #define PCI_DEVICE_ID_VIRTIO_VSOCK       0x1012
-#define PCI_DEVICE_ID_VIRTIO_PMEM        0x1013
-#define PCI_DEVICE_ID_VIRTIO_IOMMU       0x1014
-#define PCI_DEVICE_ID_VIRTIO_MEM         0x1015
+
+/*
+ * modern virtio-pci devices get their id assigned automatically,
+ * there is no need to add #defines here.  It gets calculated as
+ *
+ * PCI_DEVICE_ID = PCI_DEVICE_ID_VIRTIO_10_BASE +
+ *                 virtio_bus_get_vdev_id(bus)
+ */
+#define PCI_DEVICE_ID_VIRTIO_10_BASE     0x1040
 
 #define PCI_VENDOR_ID_REDHAT             0x1b36
 #define PCI_DEVICE_ID_REDHAT_BRIDGE      0x0001
@@ -107,6 +112,9 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_REDHAT_PCIE_BRIDGE 0x000e
 #define PCI_DEVICE_ID_REDHAT_MDPY        0x000f
 #define PCI_DEVICE_ID_REDHAT_NVME        0x0010
+#define PCI_DEVICE_ID_REDHAT_PVPANIC     0x0011
+#define PCI_DEVICE_ID_REDHAT_ACPI_ERST   0x0012
+#define PCI_DEVICE_ID_REDHAT_UFS         0x0013
 #define PCI_DEVICE_ID_REDHAT_QXL         0x0100
 
 #define FMT_PCIBUS                      PRIx64
@@ -128,6 +136,10 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
                                 pcibus_t addr, pcibus_t size, int type);
 typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
 
+typedef void MSITriggerFunc(PCIDevice *dev, MSIMessage msg);
+typedef MSIMessage MSIPrepareMessageFunc(PCIDevice *dev, unsigned vector);
+typedef MSIMessage MSIxPrepareMessageFunc(PCIDevice *dev, unsigned vector);
+
 typedef struct PCIIORegion {
     pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
 #define PCI_BAR_UNMAPPED (~(pcibus_t)0)
@@ -194,19 +206,14 @@ enum {
     QEMU_PCIE_LNKSTA_DLLLA = (1 << QEMU_PCIE_LNKSTA_DLLLA_BITNR),
 #define QEMU_PCIE_EXTCAP_INIT_BITNR 9
     QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR),
+#define QEMU_PCIE_CXL_BITNR 10
+    QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
+#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
+    QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
+#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
+    QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
 };
 
-#define TYPE_PCI_DEVICE "pci-device"
-typedef struct PCIDeviceClass PCIDeviceClass;
-DECLARE_OBJ_CHECKERS(PCIDevice, PCIDeviceClass,
-                     PCI_DEVICE, TYPE_PCI_DEVICE)
-
-/* Implemented by devices that can be plugged on PCI Express buses */
-#define INTERFACE_PCIE_DEVICE "pci-express-device"
-
-/* Implemented by devices that can be plugged on Conventional PCI buses */
-#define INTERFACE_CONVENTIONAL_PCI_DEVICE "conventional-pci-device"
-
 typedef struct PCIINTxRoute {
     enum {
         PCI_INTX_ENABLED,
@@ -216,32 +223,6 @@ typedef struct PCIINTxRoute {
     int irq;
 } PCIINTxRoute;
 
-struct PCIDeviceClass {
-    DeviceClass parent_class;
-
-    void (*realize)(PCIDevice *dev, Error **errp);
-    PCIUnregisterFunc *exit;
-    PCIConfigReadFunc *config_read;
-    PCIConfigWriteFunc *config_write;
-
-    uint16_t vendor_id;
-    uint16_t device_id;
-    uint8_t revision;
-    uint16_t class_id;
-    uint16_t subsystem_vendor_id;       /* only for header type = 0 */
-    uint16_t subsystem_id;              /* only for header type = 0 */
-
-    /*
-     * pci-to-pci bridge or normal device.
-     * This doesn't mean pci host switch.
-     * When card bus bridge is supported, this would be enhanced.
-     */
-    bool is_bridge;
-
-    /* rom bar */
-    const char *romfile;
-};
-
 typedef void (*PCIINTxRoutingNotifier)(PCIDevice *dev);
 typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector,
                                       MSIMessage msg);
@@ -250,115 +231,6 @@ typedef void (*MSIVectorPollNotifier)(PCIDevice *dev,
                                       unsigned int vector_start,
                                       unsigned int vector_end);
 
-enum PCIReqIDType {
-    PCI_REQ_ID_INVALID = 0,
-    PCI_REQ_ID_BDF,
-    PCI_REQ_ID_SECONDARY_BUS,
-    PCI_REQ_ID_MAX,
-};
-typedef enum PCIReqIDType PCIReqIDType;
-
-struct PCIReqIDCache {
-    PCIDevice *dev;
-    PCIReqIDType type;
-};
-typedef struct PCIReqIDCache PCIReqIDCache;
-
-struct PCIDevice {
-    DeviceState qdev;
-    bool partially_hotplugged;
-
-    /* PCI config space */
-    uint8_t *config;
-
-    /* Used to enable config checks on load. Note that writable bits are
-     * never checked even if set in cmask. */
-    uint8_t *cmask;
-
-    /* Used to implement R/W bytes */
-    uint8_t *wmask;
-
-    /* Used to implement RW1C(Write 1 to Clear) bytes */
-    uint8_t *w1cmask;
-
-    /* Used to allocate config space for capabilities. */
-    uint8_t *used;
-
-    /* the following fields are read only */
-    int32_t devfn;
-    /* Cached device to fetch requester ID from, to avoid the PCI
-     * tree walking every time we invoke PCI request (e.g.,
-     * MSI). For conventional PCI root complex, this field is
-     * meaningless. */
-    PCIReqIDCache requester_id_cache;
-    char name[64];
-    PCIIORegion io_regions[PCI_NUM_REGIONS];
-    AddressSpace bus_master_as;
-    MemoryRegion bus_master_container_region;
-    MemoryRegion bus_master_enable_region;
-
-    /* do not access the following fields */
-    PCIConfigReadFunc *config_read;
-    PCIConfigWriteFunc *config_write;
-
-    /* Legacy PCI VGA regions */
-    MemoryRegion *vga_regions[QEMU_PCI_VGA_NUM_REGIONS];
-    bool has_vga;
-
-    /* Current IRQ levels.  Used internally by the generic PCI code.  */
-    uint8_t irq_state;
-
-    /* Capability bits */
-    uint32_t cap_present;
-
-    /* Offset of MSI-X capability in config space */
-    uint8_t msix_cap;
-
-    /* MSI-X entries */
-    int msix_entries_nr;
-
-    /* Space to store MSIX table & pending bit array */
-    uint8_t *msix_table;
-    uint8_t *msix_pba;
-    /* MemoryRegion container for msix exclusive BAR setup */
-    MemoryRegion msix_exclusive_bar;
-    /* Memory Regions for MSIX table and pending bit entries. */
-    MemoryRegion msix_table_mmio;
-    MemoryRegion msix_pba_mmio;
-    /* Reference-count for entries actually in use by driver. */
-    unsigned *msix_entry_used;
-    /* MSIX function mask set or MSIX disabled */
-    bool msix_function_masked;
-    /* Version id needed for VMState */
-    int32_t version_id;
-
-    /* Offset of MSI capability in config space */
-    uint8_t msi_cap;
-
-    /* PCI Express */
-    PCIExpressDevice exp;
-
-    /* SHPC */
-    SHPCDevice *shpc;
-
-    /* Location of option rom */
-    char *romfile;
-    bool has_rom;
-    MemoryRegion rom;
-    uint32_t rom_bar;
-
-    /* INTx routing notifier */
-    PCIINTxRoutingNotifier intx_routing_notifier;
-
-    /* MSI-X notifiers */
-    MSIVectorUseNotifier msix_vector_use_notifier;
-    MSIVectorReleaseNotifier msix_vector_release_notifier;
-    MSIVectorPollNotifier msix_vector_poll_notifier;
-
-    /* ID of standby device in net_failover pair */
-    char *failover_pair_id;
-};
-
 void pci_register_bar(PCIDevice *pci_dev, int region_num,
                       uint8_t attr, MemoryRegion *memory);
 void pci_register_vga(PCIDevice *pci_dev, MemoryRegion *mem,
@@ -397,23 +269,30 @@ typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
 #define TYPE_PCI_BUS "PCI"
 OBJECT_DECLARE_TYPE(PCIBus, PCIBusClass, PCI_BUS)
 #define TYPE_PCIE_BUS "PCIE"
+#define TYPE_CXL_BUS "CXL"
+
+typedef void (*pci_bus_dev_fn)(PCIBus *b, PCIDevice *d, void *opaque);
+typedef void (*pci_bus_fn)(PCIBus *b, void *opaque);
+typedef void *(*pci_bus_ret_fn)(PCIBus *b, void *opaque);
 
-bool pci_bus_is_express(PCIBus *bus);
+bool pci_bus_is_express(const PCIBus *bus);
 
-void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
-                              const char *name,
-                              MemoryRegion *address_space_mem,
-                              MemoryRegion *address_space_io,
-                              uint8_t devfn_min, const char *typename);
+void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent,
+                       const char *name,
+                       MemoryRegion *mem, MemoryRegion *io,
+                       uint8_t devfn_min, const char *typename);
 PCIBus *pci_root_bus_new(DeviceState *parent, const char *name,
-                         MemoryRegion *address_space_mem,
-                         MemoryRegion *address_space_io,
+                         MemoryRegion *mem, MemoryRegion *io,
                          uint8_t devfn_min, const char *typename);
 void pci_root_bus_cleanup(PCIBus *bus);
-void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
+void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq,
                   void *irq_opaque, int nirq);
+void pci_bus_map_irqs(PCIBus *bus, pci_map_irq_fn map_irq);
 void pci_bus_irqs_cleanup(PCIBus *bus);
 int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
+uint32_t pci_bus_get_slot_reserved_mask(PCIBus *bus);
+void pci_bus_set_slot_reserved_mask(PCIBus *bus, uint32_t mask);
+void pci_bus_clear_slot_reserved_mask(PCIBus *bus, uint32_t mask);
 /* 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD */
 static inline int pci_swizzle(int slot, int pin)
 {
@@ -423,8 +302,7 @@ int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin);
 PCIBus *pci_register_root_bus(DeviceState *parent, const char *name,
                               pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
                               void *irq_opaque,
-                              MemoryRegion *address_space_mem,
-                              MemoryRegion *address_space_io,
+                              MemoryRegion *mem, MemoryRegion *io,
                               uint8_t devfn_min, int nirq,
                               const char *typename);
 void pci_unregister_root_bus(PCIBus *bus);
@@ -447,6 +325,7 @@ static inline PCIBus *pci_get_bus(const PCIDevice *dev)
     return PCI_BUS(qdev_get_parent_bus(DEVICE(dev)));
 }
 int pci_bus_num(PCIBus *s);
+void pci_bus_range(PCIBus *bus, int *min_bus, int *max_bus);
 static inline int pci_dev_bus_num(const PCIDevice *dev)
 {
     return pci_bus_num(pci_get_bus(dev));
@@ -454,39 +333,75 @@ static inline int pci_dev_bus_num(const PCIDevice *dev)
 
 int pci_bus_numa_node(PCIBus *bus);
 void pci_for_each_device(PCIBus *bus, int bus_num,
-                         void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
+                         pci_bus_dev_fn fn,
                          void *opaque);
 void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
-                                 void (*fn)(PCIBus *bus, PCIDevice *d,
-                                            void *opaque),
+                                 pci_bus_dev_fn fn,
                                  void *opaque);
-void pci_for_each_bus_depth_first(PCIBus *bus,
-                                  void *(*begin)(PCIBus *bus, void *parent_state),
-                                  void (*end)(PCIBus *bus, void *state),
-                                  void *parent_state);
+void pci_for_each_device_under_bus(PCIBus *bus,
+                                   pci_bus_dev_fn fn, void *opaque);
+void pci_for_each_device_under_bus_reverse(PCIBus *bus,
+                                           pci_bus_dev_fn fn,
+                                           void *opaque);
+void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin,
+                                  pci_bus_fn end, void *parent_state);
 PCIDevice *pci_get_function_0(PCIDevice *pci_dev);
 
 /* Use this wrapper when specific scan order is not required. */
 static inline
-void pci_for_each_bus(PCIBus *bus,
-                      void (*fn)(PCIBus *bus, void *opaque),
-                      void *opaque)
+void pci_for_each_bus(PCIBus *bus, pci_bus_fn fn, void *opaque)
 {
     pci_for_each_bus_depth_first(bus, NULL, fn, opaque);
 }
 
 PCIBus *pci_device_root_bus(const PCIDevice *d);
 const char *pci_root_bus_path(PCIDevice *dev);
+bool pci_bus_bypass_iommu(PCIBus *bus);
 PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn);
 int pci_qdev_find_device(const char *id, PCIDevice **pdev);
 void pci_bus_get_w64_range(PCIBus *bus, Range *range);
 
 void pci_device_deassert_intx(PCIDevice *dev);
 
-typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
+
+/**
+ * struct PCIIOMMUOps: callbacks structure for specific IOMMU handlers
+ * of a PCIBus
+ *
+ * Allows to modify the behavior of some IOMMU operations of the PCI
+ * framework for a set of devices on a PCI bus.
+ */
+typedef struct PCIIOMMUOps {
+    /**
+     * @get_address_space: get the address space for a set of devices
+     * on a PCI bus.
+     *
+     * Mandatory callback which returns a pointer to an #AddressSpace
+     *
+     * @bus: the #PCIBus being accessed.
+     *
+     * @opaque: the data passed to pci_setup_iommu().
+     *
+     * @devfn: device and function number
+     */
+   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+} PCIIOMMUOps;
 
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
-void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
+
+/**
+ * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
+ *
+ * Let PCI host bridges define specific operations.
+ *
+ * @bus: the #PCIBus being updated.
+ * @ops: the #PCIIOMMUOps
+ * @opaque: passed to callbacks of the @ops structure.
+ */
+void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque);
+
+pcibus_t pci_bar_address(PCIDevice *d,
+                         int reg, uint8_t type, pcibus_t size);
 
 static inline void
 pci_set_byte(uint8_t *config, uint8_t val)
@@ -654,69 +569,51 @@ static inline void
 pci_set_byte_by_mask(uint8_t *config, uint8_t mask, uint8_t reg)
 {
     uint8_t val = pci_get_byte(config);
-    uint8_t rval = reg << ctz32(mask);
-    pci_set_byte(config, (~mask & val) | (mask & rval));
-}
+    uint8_t rval;
 
-static inline uint8_t
-pci_get_byte_by_mask(uint8_t *config, uint8_t mask)
-{
-    uint8_t val = pci_get_byte(config);
-    return (val & mask) >> ctz32(mask);
+    assert(mask);
+    rval = reg << ctz32(mask);
+    pci_set_byte(config, (~mask & val) | (mask & rval));
 }
 
 static inline void
 pci_set_word_by_mask(uint8_t *config, uint16_t mask, uint16_t reg)
 {
     uint16_t val = pci_get_word(config);
-    uint16_t rval = reg << ctz32(mask);
-    pci_set_word(config, (~mask & val) | (mask & rval));
-}
+    uint16_t rval;
 
-static inline uint16_t
-pci_get_word_by_mask(uint8_t *config, uint16_t mask)
-{
-    uint16_t val = pci_get_word(config);
-    return (val & mask) >> ctz32(mask);
+    assert(mask);
+    rval = reg << ctz32(mask);
+    pci_set_word(config, (~mask & val) | (mask & rval));
 }
 
 static inline void
 pci_set_long_by_mask(uint8_t *config, uint32_t mask, uint32_t reg)
 {
     uint32_t val = pci_get_long(config);
-    uint32_t rval = reg << ctz32(mask);
-    pci_set_long(config, (~mask & val) | (mask & rval));
-}
+    uint32_t rval;
 
-static inline uint32_t
-pci_get_long_by_mask(uint8_t *config, uint32_t mask)
-{
-    uint32_t val = pci_get_long(config);
-    return (val & mask) >> ctz32(mask);
+    assert(mask);
+    rval = reg << ctz32(mask);
+    pci_set_long(config, (~mask & val) | (mask & rval));
 }
 
 static inline void
 pci_set_quad_by_mask(uint8_t *config, uint64_t mask, uint64_t reg)
 {
     uint64_t val = pci_get_quad(config);
-    uint64_t rval = reg << ctz32(mask);
-    pci_set_quad(config, (~mask & val) | (mask & rval));
-}
+    uint64_t rval;
 
-static inline uint64_t
-pci_get_quad_by_mask(uint8_t *config, uint64_t mask)
-{
-    uint64_t val = pci_get_quad(config);
-    return (val & mask) >> ctz32(mask);
+    assert(mask);
+    rval = reg << ctz32(mask);
+    pci_set_quad(config, (~mask & val) | (mask & rval));
 }
 
-PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
-                                    const char *name);
+PCIDevice *pci_new_multifunction(int devfn, const char *name);
 PCIDevice *pci_new(int devfn, const char *name);
 bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp);
 
 PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn,
-                                           bool multifunction,
                                            const char *name);
 PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name);
 
@@ -745,121 +642,7 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev)
     pci_irq_deassert(pci_dev);
 }
 
-static inline int pci_is_express(const PCIDevice *d)
-{
-    return d->cap_present & QEMU_PCI_CAP_EXPRESS;
-}
-
-static inline int pci_is_express_downstream_port(const PCIDevice *d)
-{
-    uint8_t type;
-
-    if (!pci_is_express(d) || !d->exp.exp_cap) {
-        return 0;
-    }
-
-    type = pcie_cap_get_type(d);
-
-    return type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_ROOT_PORT;
-}
-
-static inline uint32_t pci_config_size(const PCIDevice *d)
-{
-    return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
-}
-
-static inline uint16_t pci_get_bdf(PCIDevice *dev)
-{
-    return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn);
-}
-
-uint16_t pci_requester_id(PCIDevice *dev);
-
-/* DMA access functions */
-static inline AddressSpace *pci_get_address_space(PCIDevice *dev)
-{
-    return &dev->bus_master_as;
-}
-
-static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
-                             void *buf, dma_addr_t len, DMADirection dir)
-{
-    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir);
-}
-
-static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr,
-                               void *buf, dma_addr_t len)
-{
-    return pci_dma_rw(dev, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
-}
-
-static inline int pci_dma_write(PCIDevice *dev, dma_addr_t addr,
-                                const void *buf, dma_addr_t len)
-{
-    return pci_dma_rw(dev, addr, (void *) buf, len, DMA_DIRECTION_FROM_DEVICE);
-}
-
-#define PCI_DMA_DEFINE_LDST(_l, _s, _bits)                              \
-    static inline uint##_bits##_t ld##_l##_pci_dma(PCIDevice *dev,      \
-                                                   dma_addr_t addr)     \
-    {                                                                   \
-        return ld##_l##_dma(pci_get_address_space(dev), addr);          \
-    }                                                                   \
-    static inline void st##_s##_pci_dma(PCIDevice *dev,                 \
-                                        dma_addr_t addr, uint##_bits##_t val) \
-    {                                                                   \
-        st##_s##_dma(pci_get_address_space(dev), addr, val);            \
-    }
-
-PCI_DMA_DEFINE_LDST(ub, b, 8);
-PCI_DMA_DEFINE_LDST(uw_le, w_le, 16)
-PCI_DMA_DEFINE_LDST(l_le, l_le, 32);
-PCI_DMA_DEFINE_LDST(q_le, q_le, 64);
-PCI_DMA_DEFINE_LDST(uw_be, w_be, 16)
-PCI_DMA_DEFINE_LDST(l_be, l_be, 32);
-PCI_DMA_DEFINE_LDST(q_be, q_be, 64);
-
-#undef PCI_DMA_DEFINE_LDST
-
-static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t addr,
-                                dma_addr_t *plen, DMADirection dir)
-{
-    void *buf;
-
-    buf = dma_memory_map(pci_get_address_space(dev), addr, plen, dir);
-    return buf;
-}
-
-static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
-                                 DMADirection dir, dma_addr_t access_len)
-{
-    dma_memory_unmap(pci_get_address_space(dev), buffer, len, dir, access_len);
-}
-
-static inline void pci_dma_sglist_init(QEMUSGList *qsg, PCIDevice *dev,
-                                       int alloc_hint)
-{
-    qemu_sglist_init(qsg, DEVICE(dev), alloc_hint, pci_get_address_space(dev));
-}
-
-extern const VMStateDescription vmstate_pci_device;
-
-#define VMSTATE_PCI_DEVICE(_field, _state) {                         \
-    .name       = (stringify(_field)),                               \
-    .size       = sizeof(PCIDevice),                                 \
-    .vmsd       = &vmstate_pci_device,                               \
-    .flags      = VMS_STRUCT,                                        \
-    .offset     = vmstate_offset_value(_state, _field, PCIDevice),   \
-}
-
-#define VMSTATE_PCI_DEVICE_POINTER(_field, _state) {                 \
-    .name       = (stringify(_field)),                               \
-    .size       = sizeof(PCIDevice),                                 \
-    .vmsd       = &vmstate_pci_device,                               \
-    .flags      = VMS_STRUCT|VMS_POINTER,                            \
-    .offset     = vmstate_offset_pointer(_state, _field, PCIDevice), \
-}
-
 MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
+void pci_set_power(PCIDevice *pci_dev, bool state);
 
 #endif
index a94d350034bfad910adfaa157ca2f2dc5067fdd6..5cd452115adab531bac4b8509ac5b734283117e5 100644 (file)
@@ -26,8 +26,9 @@
 #ifndef QEMU_PCI_BRIDGE_H
 #define QEMU_PCI_BRIDGE_H
 
-#include "hw/pci/pci.h"
+#include "hw/pci/pci_device.h"
 #include "hw/pci/pci_bus.h"
+#include "hw/cxl/cxl.h"
 #include "qom/object.h"
 
 typedef struct PCIBridgeWindows PCIBridgeWindows;
@@ -52,6 +53,7 @@ struct PCIBridgeWindows {
 
 #define TYPE_PCI_BRIDGE "base-pci-bridge"
 OBJECT_DECLARE_SIMPLE_TYPE(PCIBridge, PCI_BRIDGE)
+#define IS_PCI_BRIDGE(dev) object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)
 
 struct PCIBridge {
     /*< private >*/
@@ -71,15 +73,49 @@ struct PCIBridge {
     MemoryRegion address_space_mem;
     MemoryRegion address_space_io;
 
-    PCIBridgeWindows *windows;
+    PCIBridgeWindows windows;
 
     pci_map_irq_fn map_irq;
     const char *bus_name;
+
+    /* SLT is RO for PCIE to PCIE bridges, but old QEMU versions had it RW */
+    bool pcie_writeable_slt_bug;
 };
 
 #define PCI_BRIDGE_DEV_PROP_CHASSIS_NR "chassis_nr"
 #define PCI_BRIDGE_DEV_PROP_MSI        "msi"
 #define PCI_BRIDGE_DEV_PROP_SHPC       "shpc"
+typedef struct CXLHost CXLHost;
+
+typedef struct PXBDev {
+    /*< private >*/
+    PCIDevice parent_obj;
+    /*< public >*/
+
+    uint8_t bus_nr;
+    uint16_t numa_node;
+    bool bypass_iommu;
+} PXBDev;
+
+typedef struct PXBPCIEDev {
+    /*< private >*/
+    PXBDev parent_obj;
+} PXBPCIEDev;
+
+#define TYPE_PXB_DEV "pxb"
+OBJECT_DECLARE_SIMPLE_TYPE(PXBDev, PXB_DEV)
+
+typedef struct PXBCXLDev {
+    /*< private >*/
+    PXBPCIEDev parent_obj;
+    /*< public >*/
+
+    bool hdm_for_passthrough;
+    CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */
+} PXBCXLDev;
+
+#define TYPE_PXB_CXL_DEV "pxb-cxl"
+OBJECT_DECLARE_SIMPLE_TYPE(PXBCXLDev, PXB_CXL_DEV)
 
 int pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset,
                           uint16_t svid, uint16_t ssid,
@@ -116,11 +152,11 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name,
                         pci_map_irq_fn map_irq);
 
 /* TODO: add this define to pci_regs.h in linux and then in qemu. */
-#define  PCI_BRIDGE_CTL_VGA_16BIT      0x10    /* VGA 16-bit decode */
-#define  PCI_BRIDGE_CTL_DISCARD                0x100   /* Primary discard timer */
-#define  PCI_BRIDGE_CTL_SEC_DISCARD    0x200   /* Secondary discard timer */
-#define  PCI_BRIDGE_CTL_DISCARD_STATUS 0x400   /* Discard timer status */
-#define  PCI_BRIDGE_CTL_DISCARD_SERR   0x800   /* Discard timer SERR# enable */
+#define  PCI_BRIDGE_CTL_VGA_16BIT       0x10    /* VGA 16-bit decode */
+#define  PCI_BRIDGE_CTL_DISCARD         0x100   /* Primary discard timer */
+#define  PCI_BRIDGE_CTL_SEC_DISCARD     0x200   /* Secondary discard timer */
+#define  PCI_BRIDGE_CTL_DISCARD_STATUS  0x400   /* Discard timer status */
+#define  PCI_BRIDGE_CTL_DISCARD_SERR    0x800   /* Discard timer SERR# enable */
 
 typedef struct PCIBridgeQemuCap {
     uint8_t id;     /* Standard PCI capability header field */
@@ -138,6 +174,7 @@ typedef struct PCIBridgeQemuCap {
     uint64_t mem_pref_64; /* Prefetchable memory to reserve (64-bit MMIO) */
 } PCIBridgeQemuCap;
 
+#define REDHAT_PCI_CAP_TYPE_OFFSET      3
 #define REDHAT_PCI_CAP_RESOURCE_RESERVE 1
 
 /*
@@ -152,6 +189,13 @@ typedef struct PCIResReserve {
     uint64_t mem_pref_64;
 } PCIResReserve;
 
+#define REDHAT_PCI_CAP_RES_RESERVE_BUS_RES     4
+#define REDHAT_PCI_CAP_RES_RESERVE_IO          8
+#define REDHAT_PCI_CAP_RES_RESERVE_MEM         16
+#define REDHAT_PCI_CAP_RES_RESERVE_PREF_MEM_32 20
+#define REDHAT_PCI_CAP_RES_RESERVE_PREF_MEM_64 24
+#define REDHAT_PCI_CAP_RES_RESERVE_CAP_SIZE    32
+
 int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset,
                                PCIResReserve res_reserve, Error **errp);
 
index 347440d42ca19b3bffbc62126e60bc60c986c222..226131254621f257eefb45356489c125381e6a87 100644 (file)
@@ -24,12 +24,16 @@ enum PCIBusFlags {
     PCI_BUS_IS_ROOT                                         = 0x0001,
     /* PCIe extended configuration space is accessible on this bus */
     PCI_BUS_EXTENDED_CONFIG_SPACE                           = 0x0002,
+    /* This is a CXL Type BUS */
+    PCI_BUS_CXL                                             = 0x0004,
 };
 
+#define PCI_NO_PASID UINT32_MAX
+
 struct PCIBus {
     BusState qbus;
     enum PCIBusFlags flags;
-    PCIIOMMUFunc iommu_fn;
+    const PCIIOMMUOps *iommu_ops;
     void *iommu_opaque;
     uint8_t devfn_min;
     uint32_t slot_reserved_mask;
@@ -53,6 +57,11 @@ struct PCIBus {
     Notifier machine_done;
 };
 
+static inline bool pci_bus_is_cxl(PCIBus *bus)
+{
+    return !!(bus->flags & PCI_BUS_CXL);
+}
+
 static inline bool pci_bus_is_root(PCIBus *bus)
 {
     return !!(bus->flags & PCI_BUS_IS_ROOT);
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
new file mode 100644 (file)
index 0000000..d3dd0f6
--- /dev/null
@@ -0,0 +1,350 @@
+#ifndef QEMU_PCI_DEVICE_H
+#define QEMU_PCI_DEVICE_H
+
+#include "hw/pci/pci.h"
+#include "hw/pci/pcie.h"
+
+#define TYPE_PCI_DEVICE "pci-device"
+typedef struct PCIDeviceClass PCIDeviceClass;
+DECLARE_OBJ_CHECKERS(PCIDevice, PCIDeviceClass,
+                     PCI_DEVICE, TYPE_PCI_DEVICE)
+
+/*
+ * Implemented by devices that can be plugged on CXL buses. In the spec, this is
+ * actually a "CXL Component, but we name it device to match the PCI naming.
+ */
+#define INTERFACE_CXL_DEVICE "cxl-device"
+
+/* Implemented by devices that can be plugged on PCI Express buses */
+#define INTERFACE_PCIE_DEVICE "pci-express-device"
+
+/* Implemented by devices that can be plugged on Conventional PCI buses */
+#define INTERFACE_CONVENTIONAL_PCI_DEVICE "conventional-pci-device"
+
+struct PCIDeviceClass {
+    DeviceClass parent_class;
+
+    void (*realize)(PCIDevice *dev, Error **errp);
+    PCIUnregisterFunc *exit;
+    PCIConfigReadFunc *config_read;
+    PCIConfigWriteFunc *config_write;
+
+    uint16_t vendor_id;
+    uint16_t device_id;
+    uint8_t revision;
+    uint16_t class_id;
+    uint16_t subsystem_vendor_id;       /* only for header type = 0 */
+    uint16_t subsystem_id;              /* only for header type = 0 */
+
+    const char *romfile;                /* rom bar */
+};
+
+enum PCIReqIDType {
+    PCI_REQ_ID_INVALID = 0,
+    PCI_REQ_ID_BDF,
+    PCI_REQ_ID_SECONDARY_BUS,
+    PCI_REQ_ID_MAX,
+};
+typedef enum PCIReqIDType PCIReqIDType;
+
+struct PCIReqIDCache {
+    PCIDevice *dev;
+    PCIReqIDType type;
+};
+typedef struct PCIReqIDCache PCIReqIDCache;
+
+struct PCIDevice {
+    DeviceState qdev;
+    bool partially_hotplugged;
+    bool has_power;
+
+    /* PCI config space */
+    uint8_t *config;
+
+    /*
+     * Used to enable config checks on load. Note that writable bits are
+     * never checked even if set in cmask.
+     */
+    uint8_t *cmask;
+
+    /* Used to implement R/W bytes */
+    uint8_t *wmask;
+
+    /* Used to implement RW1C(Write 1 to Clear) bytes */
+    uint8_t *w1cmask;
+
+    /* Used to allocate config space for capabilities. */
+    uint8_t *used;
+
+    /* the following fields are read only */
+    int32_t devfn;
+    /*
+     * Cached device to fetch requester ID from, to avoid the PCI tree
+     * walking every time we invoke PCI request (e.g., MSI). For
+     * conventional PCI root complex, this field is meaningless.
+     */
+    PCIReqIDCache requester_id_cache;
+    char name[64];
+    PCIIORegion io_regions[PCI_NUM_REGIONS];
+    AddressSpace bus_master_as;
+    MemoryRegion bus_master_container_region;
+    MemoryRegion bus_master_enable_region;
+
+    /* do not access the following fields */
+    PCIConfigReadFunc *config_read;
+    PCIConfigWriteFunc *config_write;
+
+    /* Legacy PCI VGA regions */
+    MemoryRegion *vga_regions[QEMU_PCI_VGA_NUM_REGIONS];
+    bool has_vga;
+
+    /* Current IRQ levels.  Used internally by the generic PCI code.  */
+    uint8_t irq_state;
+
+    /* Capability bits */
+    uint32_t cap_present;
+
+    /* Offset of MSI-X capability in config space */
+    uint8_t msix_cap;
+
+    /* MSI-X entries */
+    int msix_entries_nr;
+
+    /* Space to store MSIX table & pending bit array */
+    uint8_t *msix_table;
+    uint8_t *msix_pba;
+
+    /* May be used by INTx or MSI during interrupt notification */
+    void *irq_opaque;
+
+    MSITriggerFunc *msi_trigger;
+    MSIPrepareMessageFunc *msi_prepare_message;
+    MSIxPrepareMessageFunc *msix_prepare_message;
+
+    /* MemoryRegion container for msix exclusive BAR setup */
+    MemoryRegion msix_exclusive_bar;
+    /* Memory Regions for MSIX table and pending bit entries. */
+    MemoryRegion msix_table_mmio;
+    MemoryRegion msix_pba_mmio;
+    /* Reference-count for entries actually in use by driver. */
+    unsigned *msix_entry_used;
+    /* MSIX function mask set or MSIX disabled */
+    bool msix_function_masked;
+    /* Version id needed for VMState */
+    int32_t version_id;
+
+    /* Offset of MSI capability in config space */
+    uint8_t msi_cap;
+
+    /* PCI Express */
+    PCIExpressDevice exp;
+
+    /* SHPC */
+    SHPCDevice *shpc;
+
+    /* Location of option rom */
+    char *romfile;
+    uint32_t romsize;
+    bool has_rom;
+    MemoryRegion rom;
+    uint32_t rom_bar;
+
+    /* INTx routing notifier */
+    PCIINTxRoutingNotifier intx_routing_notifier;
+
+    /* MSI-X notifiers */
+    MSIVectorUseNotifier msix_vector_use_notifier;
+    MSIVectorReleaseNotifier msix_vector_release_notifier;
+    MSIVectorPollNotifier msix_vector_poll_notifier;
+
+    /* ID of standby device in net_failover pair */
+    char *failover_pair_id;
+    uint32_t acpi_index;
+};
+
+static inline int pci_intx(PCIDevice *pci_dev)
+{
+    return pci_get_byte(pci_dev->config + PCI_INTERRUPT_PIN) - 1;
+}
+
+static inline int pci_is_cxl(const PCIDevice *d)
+{
+    return d->cap_present & QEMU_PCIE_CAP_CXL;
+}
+
+static inline int pci_is_express(const PCIDevice *d)
+{
+    return d->cap_present & QEMU_PCI_CAP_EXPRESS;
+}
+
+static inline int pci_is_express_downstream_port(const PCIDevice *d)
+{
+    uint8_t type;
+
+    if (!pci_is_express(d) || !d->exp.exp_cap) {
+        return 0;
+    }
+
+    type = pcie_cap_get_type(d);
+
+    return type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_ROOT_PORT;
+}
+
+static inline int pci_is_vf(const PCIDevice *d)
+{
+    return d->exp.sriov_vf.pf != NULL;
+}
+
+static inline uint32_t pci_config_size(const PCIDevice *d)
+{
+    return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
+}
+
+static inline uint16_t pci_get_bdf(PCIDevice *dev)
+{
+    return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn);
+}
+
+uint16_t pci_requester_id(PCIDevice *dev);
+
+/* DMA access functions */
+static inline AddressSpace *pci_get_address_space(PCIDevice *dev)
+{
+    return &dev->bus_master_as;
+}
+
+/**
+ * pci_dma_rw: Read from or write to an address space from PCI device.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @dev: #PCIDevice doing the memory access
+ * @addr: address within the #PCIDevice address space
+ * @buf: buffer with the data transferred
+ * @len: the number of bytes to read or write
+ * @dir: indicates the transfer direction
+ */
+static inline MemTxResult pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
+                                     void *buf, dma_addr_t len,
+                                     DMADirection dir, MemTxAttrs attrs)
+{
+    return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
+                         dir, attrs);
+}
+
+/**
+ * pci_dma_read: Read from an address space from PCI device.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).  Called within RCU critical section.
+ *
+ * @dev: #PCIDevice doing the memory access
+ * @addr: address within the #PCIDevice address space
+ * @buf: buffer with the data transferred
+ * @len: length of the data transferred
+ */
+static inline MemTxResult pci_dma_read(PCIDevice *dev, dma_addr_t addr,
+                                       void *buf, dma_addr_t len)
+{
+    return pci_dma_rw(dev, addr, buf, len,
+                      DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
+}
+
+/**
+ * pci_dma_write: Write to address space from PCI device.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @dev: #PCIDevice doing the memory access
+ * @addr: address within the #PCIDevice address space
+ * @buf: buffer with the data transferred
+ * @len: the number of bytes to write
+ */
+static inline MemTxResult pci_dma_write(PCIDevice *dev, dma_addr_t addr,
+                                        const void *buf, dma_addr_t len)
+{
+    return pci_dma_rw(dev, addr, (void *) buf, len,
+                      DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
+}
+
+#define PCI_DMA_DEFINE_LDST(_l, _s, _bits) \
+    static inline MemTxResult ld##_l##_pci_dma(PCIDevice *dev, \
+                                               dma_addr_t addr, \
+                                               uint##_bits##_t *val, \
+                                               MemTxAttrs attrs) \
+    { \
+        return ld##_l##_dma(pci_get_address_space(dev), addr, val, attrs); \
+    } \
+    static inline MemTxResult st##_s##_pci_dma(PCIDevice *dev, \
+                                               dma_addr_t addr, \
+                                               uint##_bits##_t val, \
+                                               MemTxAttrs attrs) \
+    { \
+        return st##_s##_dma(pci_get_address_space(dev), addr, val, attrs); \
+    }
+
+PCI_DMA_DEFINE_LDST(ub, b, 8);
+PCI_DMA_DEFINE_LDST(uw_le, w_le, 16)
+PCI_DMA_DEFINE_LDST(l_le, l_le, 32);
+PCI_DMA_DEFINE_LDST(q_le, q_le, 64);
+PCI_DMA_DEFINE_LDST(uw_be, w_be, 16)
+PCI_DMA_DEFINE_LDST(l_be, l_be, 32);
+PCI_DMA_DEFINE_LDST(q_be, q_be, 64);
+
+#undef PCI_DMA_DEFINE_LDST
+
+/**
+ * pci_dma_map: Map device PCI address space range into host virtual address
+ * @dev: #PCIDevice to be accessed
+ * @addr: address within that device's address space
+ * @plen: pointer to length of buffer; updated on return to indicate
+ *        if only a subset of the requested range has been mapped
+ * @dir: indicates the transfer direction
+ *
+ * Return: A host pointer, or %NULL if the resources needed to
+ *         perform the mapping are exhausted (in that case *@plen
+ *         is set to zero).
+ */
+static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t addr,
+                                dma_addr_t *plen, DMADirection dir)
+{
+    return dma_memory_map(pci_get_address_space(dev), addr, plen, dir,
+                          MEMTXATTRS_UNSPECIFIED);
+}
+
+static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
+                                 DMADirection dir, dma_addr_t access_len)
+{
+    dma_memory_unmap(pci_get_address_space(dev), buffer, len, dir, access_len);
+}
+
+static inline void pci_dma_sglist_init(QEMUSGList *qsg, PCIDevice *dev,
+                                       int alloc_hint)
+{
+    qemu_sglist_init(qsg, DEVICE(dev), alloc_hint, pci_get_address_space(dev));
+}
+
+extern const VMStateDescription vmstate_pci_device;
+
+#define VMSTATE_PCI_DEVICE(_field, _state) {                         \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(PCIDevice),                                 \
+    .vmsd       = &vmstate_pci_device,                               \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, PCIDevice),   \
+}
+
+#define VMSTATE_PCI_DEVICE_POINTER(_field, _state) {                 \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(PCIDevice),                                 \
+    .vmsd       = &vmstate_pci_device,                               \
+    .flags      = VMS_STRUCT | VMS_POINTER,                          \
+    .offset     = vmstate_offset_pointer(_state, _field, PCIDevice), \
+}
+
+#endif
index 52e038c0196fd6dcc5f5dc115a268e9796ebcdcf..e52d8ec2cdc63d0f3c1b53e23455e78b5019e907 100644 (file)
@@ -31,6 +31,8 @@
 #include "hw/sysbus.h"
 #include "qom/object.h"
 
+#define PCI_HOST_BYPASS_IOMMU "bypass-iommu"
+
 #define TYPE_PCI_HOST_BRIDGE "pci-host-bridge"
 OBJECT_DECLARE_TYPE(PCIHostState, PCIHostBridgeClass, PCI_HOST_BRIDGE)
 
@@ -43,6 +45,7 @@ struct PCIHostState {
     uint32_t config_reg;
     bool mig_enabled;
     PCIBus *bus;
+    bool bypass_iommu;
 
     QLIST_ENTRY(PCIHostState) next;
 };
index 11f8ab71498cd08294c2ad57d7d0fe68f5214468..f1a53fea8d629168401c3e49f9f80f23a06a1111 100644 (file)
@@ -26,6 +26,7 @@
 #define PCI_CLASS_STORAGE_SATA           0x0106
 #define PCI_CLASS_STORAGE_SAS            0x0107
 #define PCI_CLASS_STORAGE_EXPRESS        0x0108
+#define PCI_CLASS_STORAGE_UFS            0x0109
 #define PCI_CLASS_STORAGE_OTHER          0x0180
 
 #define PCI_BASE_CLASS_NETWORK           0x02
@@ -53,6 +54,7 @@
 #define PCI_BASE_CLASS_MEMORY            0x05
 #define PCI_CLASS_MEMORY_RAM             0x0500
 #define PCI_CLASS_MEMORY_FLASH           0x0501
+#define PCI_CLASS_MEMORY_CXL             0x0502
 #define PCI_CLASS_MEMORY_OTHER           0x0580
 
 #define PCI_BASE_CLASS_BRIDGE            0x06
 
 /* Vendors and devices.  Sort key: vendor first, device next. */
 
+/* Ref: PCIe r6.0 Table 6-32 */
+#define PCI_VENDOR_ID_PCI_SIG            0x0001
+
 #define PCI_VENDOR_ID_LSI_LOGIC          0x1000
 #define PCI_DEVICE_ID_LSI_53C810         0x0001
 #define PCI_DEVICE_ID_LSI_53C895A        0x0012
 
 #define PCI_VENDOR_ID_DEC                0x1011
 #define PCI_DEVICE_ID_DEC_21143          0x0019
-#define PCI_DEVICE_ID_DEC_21154          0x0026
 
 #define PCI_VENDOR_ID_CIRRUS             0x1013
 
 #define PCI_DEVICE_ID_AMD_LANCE          0x2000
 #define PCI_DEVICE_ID_AMD_SCSI           0x2020
 
+#define PCI_VENDOR_ID_HP                 0x103c
+
 #define PCI_VENDOR_ID_TI                 0x104c
 
 #define PCI_VENDOR_ID_MOTOROLA           0x1057
 #define PCI_DEVICE_ID_SUN_SIMBA          0x5000
 #define PCI_DEVICE_ID_SUN_SABRE          0xa000
 
+#define PCI_VENDOR_ID_ORACLE             0x108e
+#define PCI_DEVICE_ID_REMOTE_IOHUB       0xb000
+
 #define PCI_VENDOR_ID_CMD                0x1095
 #define PCI_DEVICE_ID_CMD_646            0x0646
 
 #define PCI_VENDOR_ID_XILINX             0x10ee
 
 #define PCI_VENDOR_ID_VIA                0x1106
-#define PCI_DEVICE_ID_VIA_ISA_BRIDGE     0x0686
+#define PCI_DEVICE_ID_VIA_82C686B_ISA    0x0686
 #define PCI_DEVICE_ID_VIA_IDE            0x0571
 #define PCI_DEVICE_ID_VIA_UHCI           0x3038
-#define PCI_DEVICE_ID_VIA_ACPI           0x3057
+#define PCI_DEVICE_ID_VIA_82C686B_PM     0x3057
 #define PCI_DEVICE_ID_VIA_AC97           0x3058
 #define PCI_DEVICE_ID_VIA_MC97           0x3068
+#define PCI_DEVICE_ID_VIA_8231_ISA       0x8231
+#define PCI_DEVICE_ID_VIA_8231_PM        0x8235
 
 #define PCI_VENDOR_ID_MARVELL            0x11ab
+#define PCI_DEVICE_ID_MARVELL_MV6436X    0x6460
 
 #define PCI_VENDOR_ID_SILICON_MOTION     0x126f
 #define PCI_DEVICE_ID_SM501              0x0501
 #define PCI_VENDOR_ID_FREESCALE          0x1957
 #define PCI_DEVICE_ID_MPC8533E           0x0030
 
+#define PCI_VENDOR_ID_BAIDU              0x1d22
+#define PCI_DEVICE_ID_KUNLUN_VF          0x3685
+
 #define PCI_VENDOR_ID_INTEL              0x8086
 #define PCI_DEVICE_ID_INTEL_82378        0x0484
 #define PCI_DEVICE_ID_INTEL_82441        0x1237
 #define PCI_DEVICE_ID_INTEL_82801BA_11   0x244e
 #define PCI_DEVICE_ID_INTEL_82801D       0x24CD
 #define PCI_DEVICE_ID_INTEL_ESB_9        0x25ab
+#define PCI_DEVICE_ID_INTEL_NVME         0x5845
 #define PCI_DEVICE_ID_INTEL_82371SB_0    0x7000
 #define PCI_DEVICE_ID_INTEL_82371SB_1    0x7010
 #define PCI_DEVICE_ID_INTEL_82371SB_2    0x7020
index 77ba64b9314d50de04acc3d0786ee0110a9b9f41..a590140962291037b9d2c54e63b8125dcb499b1d 100644 (file)
@@ -4,5 +4,6 @@
 #include "standard-headers/linux/pci_regs.h"
 
 #define  PCI_PM_CAP_VER_1_1     0x0002  /* PCI PM spec ver. 1.1 */
+#define  PCI_PM_CAP_VER_1_2     0x0003  /* PCI PM spec ver. 1.2 */
 
 #endif
index 6063bee0ec632c563f236f520aef8e52dc87b182..11f5a91bbb7ec318912e2440daec4cf7d3e5d669 100644 (file)
 #include "hw/pci/pci_regs.h"
 #include "hw/pci/pcie_regs.h"
 #include "hw/pci/pcie_aer.h"
+#include "hw/pci/pcie_sriov.h"
 #include "hw/hotplug.h"
 
-typedef enum {
-    /* for attention and power indicator */
-    PCI_EXP_HP_IND_RESERVED     = PCI_EXP_SLTCTL_IND_RESERVED,
-    PCI_EXP_HP_IND_ON           = PCI_EXP_SLTCTL_IND_ON,
-    PCI_EXP_HP_IND_BLINK        = PCI_EXP_SLTCTL_IND_BLINK,
-    PCI_EXP_HP_IND_OFF          = PCI_EXP_SLTCTL_IND_OFF,
-} PCIExpressIndicator;
-
 typedef enum {
     /* these bits must match the bits in Slot Control/Status registers.
      * PCI_EXP_HP_EV_xxx = PCI_EXP_SLTCTL_xxxE = PCI_EXP_SLTSTA_xxx
@@ -81,6 +74,11 @@ struct PCIExpressDevice {
 
     /* ACS */
     uint16_t acs_cap;
+
+    /* SR/IOV */
+    uint16_t sriov_cap;
+    PCIESriovPF sriov_pf;
+    PCIESriovVF sriov_vf;
 };
 
 #define COMPAT_PROP_PCP "power_controller_present"
@@ -95,6 +93,7 @@ void pcie_cap_exit(PCIDevice *dev);
 int pcie_endpoint_cap_v1_init(PCIDevice *dev, uint8_t offset);
 void pcie_cap_v1_exit(PCIDevice *dev);
 uint8_t pcie_cap_get_type(const PCIDevice *dev);
+uint8_t pcie_cap_get_version(const PCIDevice *dev);
 void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector);
 uint8_t pcie_cap_flags_get_vector(PCIDevice *dev);
 
@@ -112,6 +111,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
                                 uint32_t addr, uint32_t val, int len);
 int pcie_cap_slot_post_load(void *opaque, int version_id);
 void pcie_cap_slot_push_attention_button(PCIDevice *dev);
+void pcie_cap_slot_enable_power(PCIDevice *dev);
 
 void pcie_cap_root_init(PCIDevice *dev);
 void pcie_cap_root_reset(PCIDevice *dev);
@@ -135,7 +135,7 @@ void pcie_sync_bridge_lnk(PCIDevice *dev);
 void pcie_acs_init(PCIDevice *dev, uint16_t offset);
 void pcie_acs_reset(PCIDevice *dev);
 
-void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
+void pcie_ari_init(PCIDevice *dev, uint16_t offset);
 void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
 void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned);
 
index 65e71d98febd57b9f3d47db201743036ee0188d8..4a9f0ea69dc753cc6846554526b2cecc54f02973 100644 (file)
@@ -40,7 +40,7 @@ struct PCIEAERLog {
      * The specified value will be clipped down to PCIE_AER_LOG_MAX_LIMIT
      * to avoid unreasonable memory usage.
      * I bet that 128 log size would be big enough, otherwise too many errors
-     * for system to function normaly. But could consecutive errors occur?
+     * for system to function normally. But could consecutive errors occur?
      */
 #define PCIE_AER_LOG_MAX_DEFAULT        8
 #define PCIE_AER_LOG_MAX_LIMIT          128
@@ -100,4 +100,5 @@ void pcie_aer_root_write_config(PCIDevice *dev,
                                 uint32_t addr, uint32_t val, int len,
                                 uint32_t root_cmd_prev);
 
+int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
 #endif /* QEMU_PCIE_AER_H */
diff --git a/include/hw/pci/pcie_doe.h b/include/hw/pci/pcie_doe.h
new file mode 100644 (file)
index 0000000..87dc17d
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * PCIe Data Object Exchange
+ *
+ * Copyright (C) 2021 Avery Design Systems, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef PCIE_DOE_H
+#define PCIE_DOE_H
+
+#include "qemu/range.h"
+#include "hw/register.h"
+
+/*
+ * Reference:
+ * PCIe r6.0 - 7.9.24 Data Object Exchange Extended Capability
+ */
+/* Capabilities Register - r6.0 7.9.24.2 */
+#define PCI_EXP_DOE_CAP             0x04
+REG32(PCI_DOE_CAP_REG, 0)
+    FIELD(PCI_DOE_CAP_REG, INTR_SUPP, 0, 1)
+    FIELD(PCI_DOE_CAP_REG, DOE_INTR_MSG_NUM, 1, 11)
+
+/* Control Register - r6.0 7.9.24.3 */
+#define PCI_EXP_DOE_CTRL            0x08
+REG32(PCI_DOE_CAP_CONTROL, 0)
+    FIELD(PCI_DOE_CAP_CONTROL, DOE_ABORT, 0, 1)
+    FIELD(PCI_DOE_CAP_CONTROL, DOE_INTR_EN, 1, 1)
+    FIELD(PCI_DOE_CAP_CONTROL, DOE_GO, 31, 1)
+
+/* Status Register - r6.0 7.9.24.4 */
+#define PCI_EXP_DOE_STATUS          0x0c
+REG32(PCI_DOE_CAP_STATUS, 0)
+    FIELD(PCI_DOE_CAP_STATUS, DOE_BUSY, 0, 1)
+    FIELD(PCI_DOE_CAP_STATUS, DOE_INTR_STATUS, 1, 1)
+    FIELD(PCI_DOE_CAP_STATUS, DOE_ERROR, 2, 1)
+    FIELD(PCI_DOE_CAP_STATUS, DATA_OBJ_RDY, 31, 1)
+
+/* Write Data Mailbox Register - r6.0 7.9.24.5 */
+#define PCI_EXP_DOE_WR_DATA_MBOX    0x10
+
+/* Read Data Mailbox Register - 7.9.xx.6 */
+#define PCI_EXP_DOE_RD_DATA_MBOX    0x14
+
+/* PCI-SIG defined Data Object Types - r6.0 Table 6-32 */
+#define PCI_SIG_DOE_DISCOVERY       0x00
+
+#define PCI_DOE_DW_SIZE_MAX         (1 << 18)
+#define PCI_DOE_PROTOCOL_NUM_MAX    256
+
+#define DATA_OBJ_BUILD_HEADER1(v, p)    (((p) << 16) | (v))
+#define DATA_OBJ_LEN_MASK(len)          ((len) & (PCI_DOE_DW_SIZE_MAX - 1))
+
+typedef struct DOEHeader DOEHeader;
+typedef struct DOEProtocol DOEProtocol;
+typedef struct DOECap DOECap;
+
+struct DOEHeader {
+    uint16_t vendor_id;
+    uint8_t data_obj_type;
+    uint8_t reserved;
+    uint32_t length;
+} QEMU_PACKED;
+
+/* Protocol infos and rsp function callback */
+struct DOEProtocol {
+    uint16_t vendor_id;
+    uint8_t data_obj_type;
+    bool (*handle_request)(DOECap *);
+};
+
+struct DOECap {
+    /* Owner */
+    PCIDevice *pdev;
+
+    uint16_t offset;
+
+    struct {
+        bool intr;
+        uint16_t vec;
+    } cap;
+
+    struct {
+        bool abort;
+        bool intr;
+        bool go;
+    } ctrl;
+
+    struct {
+        bool busy;
+        bool intr;
+        bool error;
+        bool ready;
+    } status;
+
+    uint32_t *write_mbox;
+    uint32_t *read_mbox;
+
+    /* Mailbox position indicator */
+    uint32_t read_mbox_idx;
+    uint32_t read_mbox_len;
+    uint32_t write_mbox_len;
+
+    /* Protocols and its callback response */
+    DOEProtocol *protocols;
+    uint16_t protocol_num;
+};
+
+void pcie_doe_init(PCIDevice *pdev, DOECap *doe_cap, uint16_t offset,
+                   DOEProtocol *protocols, bool intr, uint16_t vec);
+void pcie_doe_fini(DOECap *doe_cap);
+bool pcie_doe_read_config(DOECap *doe_cap, uint32_t addr, int size,
+                          uint32_t *buf);
+void pcie_doe_write_config(DOECap *doe_cap, uint32_t addr,
+                           uint32_t val, int size);
+uint32_t pcie_doe_build_protocol(DOEProtocol *p);
+void *pcie_doe_get_write_mbox_ptr(DOECap *doe_cap);
+void pcie_doe_set_rsp(DOECap *doe_cap, void *rsp);
+uint32_t pcie_doe_get_obj_len(void *obj);
+#endif /* PCIE_DOE_H */
index 076457b270e2eba6d2f70cfe844da1b1398f6475..82d92177da9e89fbc3652623205c2bba641c21ef 100644 (file)
@@ -60,15 +60,15 @@ void pcie_host_mmcfg_update(PCIExpressHost *e,
 /*
  * PCI express ECAM (Enhanced Configuration Address Mapping) format.
  * AKA mmcfg address
- * bit 20 - 28: bus number
+ * bit 20 - 27: bus number
  * bit 15 - 19: device number
  * bit 12 - 14: function number
  * bit  0 - 11: offset in configuration space of a given device
  */
-#define PCIE_MMCFG_SIZE_MAX             (1ULL << 29)
+#define PCIE_MMCFG_SIZE_MAX             (1ULL << 28)
 #define PCIE_MMCFG_SIZE_MIN             (1ULL << 20)
 #define PCIE_MMCFG_BUS_BIT              20
-#define PCIE_MMCFG_BUS_MASK             0x1ff
+#define PCIE_MMCFG_BUS_MASK             0xff
 #define PCIE_MMCFG_DEVFN_BIT            12
 #define PCIE_MMCFG_DEVFN_MASK           0xff
 #define PCIE_MMCFG_CONFOFFSET_MASK      0xfff
index bea8ecad0fdb044111094fb269b983c46b547622..90e6cf45b873721bf07a61af7b55af8ecf1f0387 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_device.h"
 #include "qom/object.h"
 
 #define TYPE_PCIE_PORT "pcie-port"
@@ -39,6 +40,10 @@ struct PCIEPort {
 
 void pcie_port_init_reg(PCIDevice *d);
 
+PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn);
+PCIDevice *pcie_find_port_first(PCIBus *bus);
+int pcie_count_ds_ports(PCIBus *bus);
+
 #define TYPE_PCIE_SLOT "pcie-slot"
 OBJECT_DECLARE_SIMPLE_TYPE(PCIESlot, PCIE_SLOT)
 
@@ -57,8 +62,12 @@ struct PCIESlot {
     /* Disable ACS (really for a pcie_root_port) */
     bool        disable_acs;
 
-    /* Indicates whether hot-plug is enabled on the slot */
+    /* Indicates whether any type of hot-plug is allowed on the slot */
     bool        hotplug;
+
+    /* broken ACPI hotplug compat knob to preserve 6.1 ABI intact */
+    bool        hide_native_hotplug_cap;
+
     QLIST_ENTRY(PCIESlot) next;
 };
 
@@ -75,7 +84,7 @@ DECLARE_CLASS_CHECKERS(PCIERootPortClass, PCIE_ROOT_PORT,
 struct PCIERootPortClass {
     PCIDeviceClass parent_class;
     DeviceRealize parent_realize;
-    DeviceReset parent_reset;
+    ResettablePhases parent_phases;
 
     uint8_t (*aer_vector)(const PCIDevice *dev);
     int (*interrupts_init)(PCIDevice *dev, Error **errp);
index 1db86b0ec4d36335eb936804624fc44d0919040e..4972106c42948a47b4b5e51788bbe444e6c26ea8 100644 (file)
@@ -66,20 +66,6 @@ typedef enum PCIExpLinkWidth {
 
 #define PCI_EXP_SLTCAP_PSN_SHIFT        ctz32(PCI_EXP_SLTCAP_PSN)
 
-#define PCI_EXP_SLTCTL_IND_RESERVED     0x0
-#define PCI_EXP_SLTCTL_IND_ON           0x1
-#define PCI_EXP_SLTCTL_IND_BLINK        0x2
-#define PCI_EXP_SLTCTL_IND_OFF          0x3
-#define PCI_EXP_SLTCTL_AIC_SHIFT        ctz32(PCI_EXP_SLTCTL_AIC)
-#define PCI_EXP_SLTCTL_AIC_OFF                          \
-    (PCI_EXP_SLTCTL_IND_OFF << PCI_EXP_SLTCTL_AIC_SHIFT)
-
-#define PCI_EXP_SLTCTL_PIC_SHIFT        ctz32(PCI_EXP_SLTCTL_PIC)
-#define PCI_EXP_SLTCTL_PIC_OFF                          \
-    (PCI_EXP_SLTCTL_IND_OFF << PCI_EXP_SLTCTL_PIC_SHIFT)
-#define PCI_EXP_SLTCTL_PIC_ON                          \
-    (PCI_EXP_SLTCTL_IND_ON << PCI_EXP_SLTCTL_PIC_SHIFT)
-
 #define PCI_EXP_SLTCTL_SUPPORTED        \
             (PCI_EXP_SLTCTL_ABPE |      \
              PCI_EXP_SLTCTL_PDCE |      \
@@ -155,6 +141,9 @@ typedef enum PCIExpLinkWidth {
                                          PCI_ERR_UNC_ATOP_EBLOCKED |    \
                                          PCI_ERR_UNC_TLP_PRF_BLOCKED)
 
+#define PCI_ERR_UNC_MASK_DEFAULT        (PCI_ERR_UNC_INTN | \
+                                         PCI_ERR_UNC_TLP_PRF_BLOCKED)
+
 #define PCI_ERR_UNC_SEVERITY_DEFAULT    (PCI_ERR_UNC_DLP |              \
                                          PCI_ERR_UNC_SDN |              \
                                          PCI_ERR_UNC_FCP |              \
@@ -179,4 +168,8 @@ typedef enum PCIExpLinkWidth {
 #define PCI_ACS_VER                     0x1
 #define PCI_ACS_SIZEOF                  8
 
+/* DOE Capability Register Fields */
+#define PCI_DOE_VER                     0x1
+#define PCI_DOE_SIZEOF                  24
+
 #endif /* QEMU_PCIE_REGS_H */
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
new file mode 100644 (file)
index 0000000..095fb0c
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * pcie_sriov.h:
+ *
+ * Implementation of SR/IOV emulation support.
+ *
+ * Copyright (c) 2015 Knut Omang <knut.omang@oracle.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_PCIE_SRIOV_H
+#define QEMU_PCIE_SRIOV_H
+
+#include "hw/pci/pci.h"
+
+struct PCIESriovPF {
+    uint16_t num_vfs;   /* Number of virtual functions created */
+    uint8_t vf_bar_type[PCI_NUM_REGIONS];   /* Store type for each VF bar */
+    const char *vfname; /* Reference to the device type used for the VFs */
+    PCIDevice **vf;     /* Pointer to an array of num_vfs VF devices */
+};
+
+struct PCIESriovVF {
+    PCIDevice *pf;      /* Pointer back to owner physical function */
+    uint16_t vf_number; /* Logical VF number of this function */
+};
+
+void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+                        const char *vfname, uint16_t vf_dev_id,
+                        uint16_t init_vfs, uint16_t total_vfs,
+                        uint16_t vf_offset, uint16_t vf_stride);
+void pcie_sriov_pf_exit(PCIDevice *dev);
+
+/* Set up a VF bar in the SR/IOV bar area */
+void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
+                               uint8_t type, dma_addr_t size);
+
+/* Instantiate a bar for a VF */
+void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
+                                MemoryRegion *memory);
+
+/*
+ * Default (minimal) page size support values
+ * as required by the SR/IOV standard:
+ * 0x553 << 12 = 0x553000 = 4K + 8K + 64K + 256K + 1M + 4M
+ */
+#define SRIOV_SUP_PGSIZE_MINREQ 0x553
+
+/*
+ * Optionally add supported page sizes to the mask of supported page sizes
+ * Page size values are interpreted as opt_sup_pgsize << 12.
+ */
+void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize);
+
+/* SR/IOV capability config write handler */
+void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
+                             uint32_t val, int len);
+
+/* Reset SR/IOV VF Enable bit to unregister all VFs */
+void pcie_sriov_pf_disable_vfs(PCIDevice *dev);
+
+/* Get logical VF number of a VF - only valid for VFs */
+uint16_t pcie_sriov_vf_number(PCIDevice *dev);
+
+/*
+ * Get the physical function that owns this VF.
+ * Returns NULL if dev is not a virtual function
+ */
+PCIDevice *pcie_sriov_get_pf(PCIDevice *dev);
+
+/*
+ * Get the n-th VF of this physical function - only valid for PF.
+ * Returns NULL if index is invalid
+ */
+PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n);
+
+/* Returns the current number of virtual functions. */
+uint16_t pcie_sriov_num_vfs(PCIDevice *dev);
+
+#endif /* QEMU_PCIE_SRIOV_H */
index d5683b73990857759d9d697be5a27f2c368b390c..89c7a3b7fa9fcd88326f5e2dd500bd910ee7ede4 100644 (file)
@@ -3,7 +3,7 @@
 
 #include "exec/memory.h"
 #include "hw/hotplug.h"
-#include "hw/pci/pci.h"
+#include "hw/pci/pci_device.h"
 #include "migration/vmstate.h"
 
 struct SHPCDevice {
index e3ba44e0bf69f9a1a8124f7c26df7c50fba957e8..ab268027511b3f4ec78fee71eb930820e6719cb6 100644 (file)
@@ -43,22 +43,22 @@ struct PCMCIACardClass {
     void (*io_write)(PCMCIACardState *card, uint32_t address, uint16_t value);
 };
 
-#define CISTPL_DEVICE          0x01    /* 5V Device Information Tuple */
-#define CISTPL_NO_LINK         0x14    /* No Link Tuple */
-#define CISTPL_VERS_1          0x15    /* Level 1 Version Tuple */
-#define CISTPL_JEDEC_C         0x18    /* JEDEC ID Tuple */
-#define CISTPL_JEDEC_A         0x19    /* JEDEC ID Tuple */
-#define CISTPL_CONFIG          0x1a    /* Configuration Tuple */
-#define CISTPL_CFTABLE_ENTRY   0x1b    /* 16-bit PCCard Configuration */
-#define CISTPL_DEVICE_OC       0x1c    /* Additional Device Information */
-#define CISTPL_DEVICE_OA       0x1d    /* Additional Device Information */
-#define CISTPL_DEVICE_GEO      0x1e    /* Additional Device Information */
-#define CISTPL_DEVICE_GEO_A    0x1f    /* Additional Device Information */
-#define CISTPL_MANFID          0x20    /* Manufacture ID Tuple */
-#define CISTPL_FUNCID          0x21    /* Function ID Tuple */
-#define CISTPL_FUNCE           0x22    /* Function Extension Tuple */
-#define CISTPL_END             0xff    /* Tuple End */
-#define CISTPL_ENDMARK         0xff
+#define CISTPL_DEVICE         0x01  /* 5V Device Information Tuple */
+#define CISTPL_NO_LINK        0x14  /* No Link Tuple */
+#define CISTPL_VERS_1         0x15  /* Level 1 Version Tuple */
+#define CISTPL_JEDEC_C        0x18  /* JEDEC ID Tuple */
+#define CISTPL_JEDEC_A        0x19  /* JEDEC ID Tuple */
+#define CISTPL_CONFIG         0x1a  /* Configuration Tuple */
+#define CISTPL_CFTABLE_ENTRY  0x1b  /* 16-bit PCCard Configuration */
+#define CISTPL_DEVICE_OC      0x1c  /* Additional Device Information */
+#define CISTPL_DEVICE_OA      0x1d  /* Additional Device Information */
+#define CISTPL_DEVICE_GEO     0x1e  /* Additional Device Information */
+#define CISTPL_DEVICE_GEO_A   0x1f  /* Additional Device Information */
+#define CISTPL_MANFID         0x20  /* Manufacture ID Tuple */
+#define CISTPL_FUNCID         0x21  /* Function ID Tuple */
+#define CISTPL_FUNCE          0x22  /* Function Extension Tuple */
+#define CISTPL_END            0xff  /* Tuple End */
+#define CISTPL_ENDMARK        0xff
 
 /* dscm1xxxx.c */
 PCMCIACardState *dscm1xxxx_init(DriveInfo *bdrv);
diff --git a/include/hw/ppc/fdt.h b/include/hw/ppc/fdt.h
new file mode 100644 (file)
index 0000000..b56ac2a
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * QEMU PowerPC helper routines for the device tree.
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_FDT_H
+#define PPC_FDT_H
+
+#include "qemu/error-report.h"
+#include "target/ppc/cpu-qom.h"
+
+#define _FDT(exp)                                                  \
+    do {                                                           \
+        int _ret = (exp);                                          \
+        if (_ret < 0) {                                            \
+            error_report("error creating device tree: %s: %s",     \
+                    #exp, fdt_strerror(_ret));                     \
+            exit(1);                                               \
+        }                                                          \
+    } while (0)
+
+size_t ppc_create_page_sizes_prop(PowerPCCPU *cpu, uint32_t *prop,
+                                  size_t maxsize);
+
+#endif /* PPC_FDT_H */
diff --git a/include/hw/ppc/mac_dbdma.h b/include/hw/ppc/mac_dbdma.h
new file mode 100644 (file)
index 0000000..4a3f644
--- /dev/null
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2009 Laurent Vivier
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_MAC_DBDMA_H
+#define HW_MAC_DBDMA_H
+
+#include "exec/memory.h"
+#include "qemu/iov.h"
+#include "sysemu/dma.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+typedef struct DBDMA_io DBDMA_io;
+
+typedef void (*DBDMA_flush)(DBDMA_io *io);
+typedef void (*DBDMA_rw)(DBDMA_io *io);
+typedef void (*DBDMA_end)(DBDMA_io *io);
+struct DBDMA_io {
+    void *opaque;
+    void *channel;
+    hwaddr addr;
+    int len;
+    int is_last;
+    int is_dma_out;
+    DBDMA_end dma_end;
+    /* DMA is in progress, don't start another one */
+    bool processing;
+    /* DMA request */
+    void *dma_mem;
+    dma_addr_t dma_len;
+    DMADirection dir;
+};
+
+/*
+ * DBDMA control/status registers.  All little-endian.
+ */
+
+#define DBDMA_CONTROL         0x00
+#define DBDMA_STATUS          0x01
+#define DBDMA_CMDPTR_HI       0x02
+#define DBDMA_CMDPTR_LO       0x03
+#define DBDMA_INTR_SEL        0x04
+#define DBDMA_BRANCH_SEL      0x05
+#define DBDMA_WAIT_SEL        0x06
+#define DBDMA_XFER_MODE       0x07
+#define DBDMA_DATA2PTR_HI     0x08
+#define DBDMA_DATA2PTR_LO     0x09
+#define DBDMA_RES1            0x0A
+#define DBDMA_ADDRESS_HI      0x0B
+#define DBDMA_BRANCH_ADDR_HI  0x0C
+#define DBDMA_RES2            0x0D
+#define DBDMA_RES3            0x0E
+#define DBDMA_RES4            0x0F
+
+#define DBDMA_REGS            16
+#define DBDMA_SIZE            (DBDMA_REGS * sizeof(uint32_t))
+
+#define DBDMA_CHANNEL_SHIFT   7
+#define DBDMA_CHANNEL_SIZE    (1 << DBDMA_CHANNEL_SHIFT)
+
+#define DBDMA_CHANNELS        (0x1000 >> DBDMA_CHANNEL_SHIFT)
+
+/* Bits in control and status registers */
+
+#define RUN        0x8000
+#define PAUSE      0x4000
+#define FLUSH      0x2000
+#define WAKE       0x1000
+#define DEAD       0x0800
+#define ACTIVE     0x0400
+#define BT         0x0100
+#define DEVSTAT    0x00ff
+
+/*
+ * DBDMA command structure.  These fields are all little-endian!
+ */
+
+typedef struct dbdma_cmd {
+    uint16_t req_count;          /* requested byte transfer count */
+    uint16_t command;            /* command word (has bit-fields) */
+    uint32_t phy_addr;           /* physical data address */
+    uint32_t cmd_dep;            /* command-dependent field */
+    uint16_t res_count;          /* residual count after completion */
+    uint16_t xfer_status;        /* transfer status */
+} dbdma_cmd;
+
+/* DBDMA command values in command field */
+
+#define COMMAND_MASK    0xf000
+#define OUTPUT_MORE     0x0000        /* transfer memory data to stream */
+#define OUTPUT_LAST     0x1000        /* ditto followed by end marker */
+#define INPUT_MORE      0x2000        /* transfer stream data to memory */
+#define INPUT_LAST      0x3000        /* ditto, expect end marker */
+#define STORE_WORD      0x4000        /* write word (4 bytes) to device reg */
+#define LOAD_WORD       0x5000        /* read word (4 bytes) from device reg */
+#define DBDMA_NOP       0x6000        /* do nothing */
+#define DBDMA_STOP      0x7000        /* suspend processing */
+
+/* Key values in command field */
+
+#define KEY_MASK        0x0700
+#define KEY_STREAM0     0x0000        /* usual data stream */
+#define KEY_STREAM1     0x0100        /* control/status stream */
+#define KEY_STREAM2     0x0200        /* device-dependent stream */
+#define KEY_STREAM3     0x0300        /* device-dependent stream */
+#define KEY_STREAM4     0x0400        /* reserved */
+#define KEY_REGS        0x0500        /* device register space */
+#define KEY_SYSTEM      0x0600        /* system memory-mapped space */
+#define KEY_DEVICE      0x0700        /* device memory-mapped space */
+
+/* Interrupt control values in command field */
+
+#define INTR_MASK       0x0030
+#define INTR_NEVER      0x0000        /* don't interrupt */
+#define INTR_IFSET      0x0010        /* intr if condition bit is 1 */
+#define INTR_IFCLR      0x0020        /* intr if condition bit is 0 */
+#define INTR_ALWAYS     0x0030        /* always interrupt */
+
+/* Branch control values in command field */
+
+#define BR_MASK         0x000c
+#define BR_NEVER        0x0000        /* don't branch */
+#define BR_IFSET        0x0004        /* branch if condition bit is 1 */
+#define BR_IFCLR        0x0008        /* branch if condition bit is 0 */
+#define BR_ALWAYS       0x000c        /* always branch */
+
+/* Wait control values in command field */
+
+#define WAIT_MASK       0x0003
+#define WAIT_NEVER      0x0000        /* don't wait */
+#define WAIT_IFSET      0x0001        /* wait if condition bit is 1 */
+#define WAIT_IFCLR      0x0002        /* wait if condition bit is 0 */
+#define WAIT_ALWAYS     0x0003        /* always wait */
+
+typedef struct DBDMA_channel {
+    int channel;
+    uint32_t regs[DBDMA_REGS];
+    qemu_irq irq;
+    DBDMA_io io;
+    DBDMA_rw rw;
+    DBDMA_flush flush;
+    dbdma_cmd current;
+} DBDMA_channel;
+
+struct DBDMAState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mem;
+    DBDMA_channel channels[DBDMA_CHANNELS];
+    QEMUBH *bh;
+};
+typedef struct DBDMAState DBDMAState;
+
+/* Externally callable functions */
+
+void DBDMA_register_channel(void *dbdma, int nchan, qemu_irq irq,
+                            DBDMA_rw rw, DBDMA_flush flush,
+                            void *opaque);
+void DBDMA_kick(DBDMAState *dbdma);
+
+#define TYPE_MAC_DBDMA "mac-dbdma"
+OBJECT_DECLARE_SIMPLE_TYPE(DBDMAState, MAC_DBDMA)
+
+#endif
diff --git a/include/hw/ppc/openpic.h b/include/hw/ppc/openpic.h
new file mode 100644 (file)
index 0000000..9c6af8e
--- /dev/null
@@ -0,0 +1,176 @@
+#ifndef OPENPIC_H
+#define OPENPIC_H
+
+#include "hw/sysbus.h"
+#include "hw/core/cpu.h"
+#include "qom/object.h"
+
+#define MAX_CPU     32
+#define MAX_MSI     8
+#define VID         0x03 /* MPIC version ID */
+
+/* OpenPIC have 5 outputs per CPU connected and one IRQ out single output */
+enum {
+    OPENPIC_OUTPUT_INT = 0, /* IRQ                       */
+    OPENPIC_OUTPUT_CINT,    /* critical IRQ              */
+    OPENPIC_OUTPUT_MCK,     /* Machine check event       */
+    OPENPIC_OUTPUT_DEBUG,   /* Unconditional debug event */
+    OPENPIC_OUTPUT_RESET,   /* Core reset event          */
+    OPENPIC_OUTPUT_NB,
+};
+
+typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines;
+
+#define OPENPIC_MODEL_FSL_MPIC_20 1
+#define OPENPIC_MODEL_FSL_MPIC_42 2
+#define OPENPIC_MODEL_KEYLARGO    3
+
+#define OPENPIC_MAX_SRC     256
+#define OPENPIC_MAX_TMR     4
+#define OPENPIC_MAX_IPI     4
+#define OPENPIC_MAX_IRQ     (OPENPIC_MAX_SRC + OPENPIC_MAX_IPI + \
+                             OPENPIC_MAX_TMR)
+
+/* KeyLargo */
+#define KEYLARGO_MAX_CPU  4
+#define KEYLARGO_MAX_EXT  64
+#define KEYLARGO_MAX_IPI  4
+#define KEYLARGO_MAX_IRQ  (64 + KEYLARGO_MAX_IPI)
+#define KEYLARGO_MAX_TMR  0
+#define KEYLARGO_IPI_IRQ  (KEYLARGO_MAX_EXT) /* First IPI IRQ */
+/* Timers don't exist but this makes the code happy... */
+#define KEYLARGO_TMR_IRQ  (KEYLARGO_IPI_IRQ + KEYLARGO_MAX_IPI)
+
+typedef struct FslMpicInfo {
+    int max_ext;
+} FslMpicInfo;
+
+typedef enum IRQType {
+    IRQ_TYPE_NORMAL = 0,
+    IRQ_TYPE_FSLINT,        /* FSL internal interrupt -- level only */
+    IRQ_TYPE_FSLSPECIAL,    /* FSL timer/IPI interrupt, edge, no polarity */
+} IRQType;
+
+/*
+ * Round up to the nearest 64 IRQs so that the queue length
+ * won't change when moving between 32 and 64 bit hosts.
+ */
+#define IRQQUEUE_SIZE_BITS ROUND_UP(OPENPIC_MAX_IRQ, 64)
+
+typedef struct IRQQueue {
+    unsigned long *queue;
+    int32_t queue_size; /* Only used for VMSTATE_BITMAP */
+    int next;
+    int priority;
+} IRQQueue;
+
+typedef struct IRQSource {
+    uint32_t ivpr;  /* IRQ vector/priority register */
+    uint32_t idr;   /* IRQ destination register */
+    uint32_t destmask; /* bitmap of CPU destinations */
+    int last_cpu;
+    int output;     /* IRQ level, e.g. OPENPIC_OUTPUT_INT */
+    int pending;    /* TRUE if IRQ is pending */
+    IRQType type;
+    bool level:1;   /* level-triggered */
+    bool nomask:1;  /* critical interrupts ignore mask on some FSL MPICs */
+} IRQSource;
+
+#define IVPR_MASK_SHIFT       31
+#define IVPR_MASK_MASK        (1U << IVPR_MASK_SHIFT)
+#define IVPR_ACTIVITY_SHIFT   30
+#define IVPR_ACTIVITY_MASK    (1U << IVPR_ACTIVITY_SHIFT)
+#define IVPR_MODE_SHIFT       29
+#define IVPR_MODE_MASK        (1U << IVPR_MODE_SHIFT)
+#define IVPR_POLARITY_SHIFT   23
+#define IVPR_POLARITY_MASK    (1U << IVPR_POLARITY_SHIFT)
+#define IVPR_SENSE_SHIFT      22
+#define IVPR_SENSE_MASK       (1U << IVPR_SENSE_SHIFT)
+
+#define IVPR_PRIORITY_MASK     (0xFU << 16)
+#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
+#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
+
+/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
+#define IDR_EP      0x80000000  /* external pin */
+#define IDR_CI      0x40000000  /* critical interrupt */
+
+typedef struct OpenPICTimer {
+    uint32_t tccr;  /* Global timer current count register */
+    uint32_t tbcr;  /* Global timer base count register */
+    int                   n_IRQ;
+    bool                  qemu_timer_active; /* Is the qemu_timer is running? */
+    struct QEMUTimer     *qemu_timer;
+    struct OpenPICState  *opp;          /* Device timer is part of. */
+    /*
+     * The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last
+     * current_count written or read, only defined if qemu_timer_active.
+     */
+    uint64_t              origin_time;
+} OpenPICTimer;
+
+typedef struct OpenPICMSI {
+    uint32_t msir;   /* Shared Message Signaled Interrupt Register */
+} OpenPICMSI;
+
+typedef struct IRQDest {
+    int32_t ctpr; /* CPU current task priority */
+    IRQQueue raised;
+    IRQQueue servicing;
+    qemu_irq *irqs;
+
+    /* Count of IRQ sources asserting on non-INT outputs */
+    uint32_t outputs_active[OPENPIC_OUTPUT_NB];
+} IRQDest;
+
+#define TYPE_OPENPIC "openpic"
+OBJECT_DECLARE_SIMPLE_TYPE(OpenPICState, OPENPIC)
+
+struct OpenPICState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion mem;
+
+    /* Behavior control */
+    FslMpicInfo *fsl;
+    uint32_t model;
+    uint32_t flags;
+    uint32_t nb_irqs;
+    uint32_t vid;
+    uint32_t vir; /* Vendor identification register */
+    uint32_t vector_mask;
+    uint32_t tfrr_reset;
+    uint32_t ivpr_reset;
+    uint32_t idr_reset;
+    uint32_t brr1;
+    uint32_t mpic_mode_mask;
+
+    /* Sub-regions */
+    MemoryRegion sub_io_mem[6];
+
+    /* Global registers */
+    uint32_t frr; /* Feature reporting register */
+    uint32_t gcr; /* Global configuration register  */
+    uint32_t pir; /* Processor initialization register */
+    uint32_t spve; /* Spurious vector register */
+    uint32_t tfrr; /* Timer frequency reporting register */
+    /* Source registers */
+    IRQSource src[OPENPIC_MAX_IRQ];
+    /* Local registers per output pin */
+    IRQDest dst[MAX_CPU];
+    uint32_t nb_cpus;
+    /* Timer registers */
+    OpenPICTimer timers[OPENPIC_MAX_TMR];
+    uint32_t max_tmr;
+
+    /* Shared MSI registers */
+    OpenPICMSI msi[MAX_MSI];
+    uint32_t max_irq;
+    uint32_t irq_ipi0;
+    uint32_t irq_tim0;
+    uint32_t irq_msi;
+};
+
+#endif /* OPENPIC_H */
diff --git a/include/hw/ppc/openpic_kvm.h b/include/hw/ppc/openpic_kvm.h
new file mode 100644 (file)
index 0000000..9ef4215
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef OPENPIC_KVM_H
+#define OPENPIC_KVM_H
+
+#define TYPE_KVM_OPENPIC "kvm-openpic"
+int kvm_openpic_connect_vcpu(DeviceState *d, CPUState *cs);
+
+#endif /* OPENPIC_KVM_H */
diff --git a/include/hw/ppc/pef.h b/include/hw/ppc/pef.h
new file mode 100644 (file)
index 0000000..707dbe5
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * PEF (Protected Execution Facility) for POWER support
+ *
+ * Copyright Red Hat.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_PPC_PEF_H
+#define HW_PPC_PEF_H
+
+int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
+int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp);
+
+#endif /* HW_PPC_PEF_H */
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
new file mode 100644 (file)
index 0000000..7e5fef7
--- /dev/null
@@ -0,0 +1,246 @@
+/*
+ * QEMU PowerPC PowerNV various definitions
+ *
+ * Copyright (c) 2014-2016 BenH, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PPC_PNV_H
+#define PPC_PNV_H
+
+#include "cpu.h"
+#include "hw/boards.h"
+#include "hw/sysbus.h"
+#include "hw/ipmi/ipmi.h"
+#include "hw/ppc/pnv_pnor.h"
+
+#define TYPE_PNV_CHIP "pnv-chip"
+
+typedef struct PnvChip PnvChip;
+typedef struct Pnv8Chip Pnv8Chip;
+typedef struct Pnv9Chip Pnv9Chip;
+typedef struct Pnv10Chip Pnv10Chip;
+
+#define PNV_CHIP_TYPE_SUFFIX "-" TYPE_PNV_CHIP
+#define PNV_CHIP_TYPE_NAME(cpu_model) cpu_model PNV_CHIP_TYPE_SUFFIX
+
+#define TYPE_PNV_CHIP_POWER8E PNV_CHIP_TYPE_NAME("power8e_v2.1")
+DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER8E,
+                         TYPE_PNV_CHIP_POWER8E)
+
+#define TYPE_PNV_CHIP_POWER8 PNV_CHIP_TYPE_NAME("power8_v2.0")
+DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER8,
+                         TYPE_PNV_CHIP_POWER8)
+
+#define TYPE_PNV_CHIP_POWER8NVL PNV_CHIP_TYPE_NAME("power8nvl_v1.0")
+DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER8NVL,
+                         TYPE_PNV_CHIP_POWER8NVL)
+
+#define TYPE_PNV_CHIP_POWER9 PNV_CHIP_TYPE_NAME("power9_v2.2")
+DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER9,
+                         TYPE_PNV_CHIP_POWER9)
+
+#define TYPE_PNV_CHIP_POWER10 PNV_CHIP_TYPE_NAME("power10_v2.0")
+DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER10,
+                         TYPE_PNV_CHIP_POWER10)
+
+PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir);
+
+typedef struct PnvPHB PnvPHB;
+
+#define TYPE_PNV_MACHINE       MACHINE_TYPE_NAME("powernv")
+typedef struct PnvMachineClass PnvMachineClass;
+typedef struct PnvMachineState PnvMachineState;
+DECLARE_OBJ_CHECKERS(PnvMachineState, PnvMachineClass,
+                     PNV_MACHINE, TYPE_PNV_MACHINE)
+
+
+struct PnvMachineClass {
+    /*< private >*/
+    MachineClass parent_class;
+
+    /*< public >*/
+    const char *compat;
+    int compat_size;
+
+    void (*dt_power_mgt)(PnvMachineState *pnv, void *fdt);
+};
+
+struct PnvMachineState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    uint32_t     initrd_base;
+    long         initrd_size;
+
+    uint32_t     num_chips;
+    PnvChip      **chips;
+
+    ISABus       *isa_bus;
+    uint32_t     cpld_irqstate;
+
+    IPMIBmc      *bmc;
+    Notifier     powerdown_notifier;
+
+    PnvPnor      *pnor;
+
+    hwaddr       fw_load_addr;
+};
+
+PnvChip *pnv_get_chip(PnvMachineState *pnv, uint32_t chip_id);
+PnvChip *pnv_chip_add_phb(PnvChip *chip, PnvPHB *phb);
+
+#define PNV_FDT_ADDR          0x01000000
+#define PNV_TIMEBASE_FREQ     512000000ULL
+
+/*
+ * BMC helpers
+ */
+void pnv_dt_bmc_sensors(IPMIBmc *bmc, void *fdt);
+void pnv_bmc_powerdown(IPMIBmc *bmc);
+IPMIBmc *pnv_bmc_create(PnvPnor *pnor);
+IPMIBmc *pnv_bmc_find(Error **errp);
+void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor);
+
+/*
+ * POWER8 MMIO base addresses
+ */
+#define PNV_XSCOM_SIZE        0x800000000ull
+#define PNV_XSCOM_BASE(chip)                                            \
+    (0x0003fc0000000000ull + ((uint64_t)(chip)->chip_id) * PNV_XSCOM_SIZE)
+
+#define PNV_OCC_COMMON_AREA_SIZE    0x0000000000800000ull
+#define PNV_OCC_COMMON_AREA_BASE    0x7fff800000ull
+#define PNV_OCC_SENSOR_BASE(chip)   (PNV_OCC_COMMON_AREA_BASE + \
+    PNV_OCC_SENSOR_DATA_BLOCK_BASE((chip)->chip_id))
+
+#define PNV_HOMER_SIZE              0x0000000000400000ull
+#define PNV_HOMER_BASE(chip)                                            \
+    (0x7ffd800000ull + ((uint64_t)(chip)->chip_id) * PNV_HOMER_SIZE)
+
+
+/*
+ * XSCOM 0x20109CA defines the ICP BAR:
+ *
+ * 0:29   : bits 14 to 43 of address to define 1 MB region.
+ * 30     : 1 to enable ICP to receive loads/stores against its BAR region
+ * 31:63  : Constant 0
+ *
+ * Usually defined as :
+ *
+ *      0xffffe00200000000 -> 0x0003ffff80000000
+ *      0xffffe00600000000 -> 0x0003ffff80100000
+ *      0xffffe02200000000 -> 0x0003ffff80800000
+ *      0xffffe02600000000 -> 0x0003ffff80900000
+ */
+#define PNV_ICP_SIZE         0x0000000000100000ull
+#define PNV_ICP_BASE(chip)                                              \
+    (0x0003ffff80000000ull + (uint64_t) (chip)->chip_id * PNV_ICP_SIZE)
+
+
+#define PNV_PSIHB_SIZE       0x0000000000100000ull
+#define PNV_PSIHB_BASE(chip) \
+    (0x0003fffe80000000ull + (uint64_t)(chip)->chip_id * PNV_PSIHB_SIZE)
+
+#define PNV_PSIHB_FSP_SIZE   0x0000000100000000ull
+#define PNV_PSIHB_FSP_BASE(chip) \
+    (0x0003ffe000000000ull + (uint64_t)(chip)->chip_id * \
+     PNV_PSIHB_FSP_SIZE)
+
+/*
+ * POWER9 MMIO base addresses
+ */
+#define PNV9_CHIP_BASE(chip, base)   \
+    ((base) + ((uint64_t) (chip)->chip_id << 42))
+
+#define PNV9_XIVE_VC_SIZE            0x0000008000000000ull
+#define PNV9_XIVE_VC_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006010000000000ull)
+
+#define PNV9_XIVE_PC_SIZE            0x0000001000000000ull
+#define PNV9_XIVE_PC_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006018000000000ull)
+
+#define PNV9_LPCM_SIZE               0x0000000100000000ull
+#define PNV9_LPCM_BASE(chip)         PNV9_CHIP_BASE(chip, 0x0006030000000000ull)
+
+#define PNV9_PSIHB_SIZE              0x0000000000100000ull
+#define PNV9_PSIHB_BASE(chip)        PNV9_CHIP_BASE(chip, 0x0006030203000000ull)
+
+#define PNV9_XIVE_IC_SIZE            0x0000000000080000ull
+#define PNV9_XIVE_IC_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006030203100000ull)
+
+#define PNV9_XIVE_TM_SIZE            0x0000000000040000ull
+#define PNV9_XIVE_TM_BASE(chip)      PNV9_CHIP_BASE(chip, 0x0006030203180000ull)
+
+#define PNV9_PSIHB_ESB_SIZE          0x0000000000010000ull
+#define PNV9_PSIHB_ESB_BASE(chip)    PNV9_CHIP_BASE(chip, 0x00060302031c0000ull)
+
+#define PNV9_XSCOM_SIZE              0x0000000400000000ull
+#define PNV9_XSCOM_BASE(chip)        PNV9_CHIP_BASE(chip, 0x00603fc00000000ull)
+
+#define PNV9_OCC_COMMON_AREA_SIZE    0x0000000000800000ull
+#define PNV9_OCC_COMMON_AREA_BASE    0x203fff800000ull
+#define PNV9_OCC_SENSOR_BASE(chip)   (PNV9_OCC_COMMON_AREA_BASE +       \
+    PNV_OCC_SENSOR_DATA_BLOCK_BASE((chip)->chip_id))
+
+#define PNV9_HOMER_SIZE              0x0000000000400000ull
+#define PNV9_HOMER_BASE(chip)                                           \
+    (0x203ffd800000ull + ((uint64_t)(chip)->chip_id) * PNV9_HOMER_SIZE)
+
+/*
+ * POWER10 MMIO base addresses - 16TB stride per chip
+ */
+#define PNV10_CHIP_BASE(chip, base)   \
+    ((base) + ((uint64_t) (chip)->chip_id << 44))
+
+#define PNV10_XSCOM_SIZE             0x0000000400000000ull
+#define PNV10_XSCOM_BASE(chip)       PNV10_CHIP_BASE(chip, 0x00603fc00000000ull)
+
+#define PNV10_LPCM_SIZE             0x0000000100000000ull
+#define PNV10_LPCM_BASE(chip)       PNV10_CHIP_BASE(chip, 0x0006030000000000ull)
+
+#define PNV10_XIVE2_IC_SIZE         0x0000000002000000ull
+#define PNV10_XIVE2_IC_BASE(chip)   PNV10_CHIP_BASE(chip, 0x0006030200000000ull)
+
+#define PNV10_PSIHB_ESB_SIZE        0x0000000000100000ull
+#define PNV10_PSIHB_ESB_BASE(chip)  PNV10_CHIP_BASE(chip, 0x0006030202000000ull)
+
+#define PNV10_PSIHB_SIZE            0x0000000000100000ull
+#define PNV10_PSIHB_BASE(chip)      PNV10_CHIP_BASE(chip, 0x0006030203000000ull)
+
+#define PNV10_XIVE2_TM_SIZE         0x0000000000040000ull
+#define PNV10_XIVE2_TM_BASE(chip)   PNV10_CHIP_BASE(chip, 0x0006030203180000ull)
+
+#define PNV10_XIVE2_NVC_SIZE        0x0000000008000000ull
+#define PNV10_XIVE2_NVC_BASE(chip)  PNV10_CHIP_BASE(chip, 0x0006030208000000ull)
+
+#define PNV10_XIVE2_NVPG_SIZE       0x0000010000000000ull
+#define PNV10_XIVE2_NVPG_BASE(chip) PNV10_CHIP_BASE(chip, 0x0006040000000000ull)
+
+#define PNV10_XIVE2_ESB_SIZE        0x0000010000000000ull
+#define PNV10_XIVE2_ESB_BASE(chip)  PNV10_CHIP_BASE(chip, 0x0006050000000000ull)
+
+#define PNV10_XIVE2_END_SIZE        0x0000020000000000ull
+#define PNV10_XIVE2_END_BASE(chip)  PNV10_CHIP_BASE(chip, 0x0006060000000000ull)
+
+#define PNV10_OCC_COMMON_AREA_SIZE  0x0000000000800000ull
+#define PNV10_OCC_COMMON_AREA_BASE  0x300fff800000ull
+#define PNV10_OCC_SENSOR_BASE(chip) (PNV10_OCC_COMMON_AREA_BASE +       \
+    PNV_OCC_SENSOR_DATA_BLOCK_BASE((chip)->chip_id))
+
+#define PNV10_HOMER_SIZE              0x0000000000400000ull
+#define PNV10_HOMER_BASE(chip)                                           \
+    (0x300ffd800000ll + ((uint64_t)(chip)->chip_id) * PNV10_HOMER_SIZE)
+
+#endif /* PPC_PNV_H */
diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
new file mode 100644 (file)
index 0000000..0ab5c42
--- /dev/null
@@ -0,0 +1,157 @@
+#ifndef PPC_PNV_CHIP_H
+#define PPC_PNV_CHIP_H
+
+#include "hw/pci-host/pnv_phb4.h"
+#include "hw/ppc/pnv_core.h"
+#include "hw/ppc/pnv_homer.h"
+#include "hw/ppc/pnv_lpc.h"
+#include "hw/ppc/pnv_occ.h"
+#include "hw/ppc/pnv_psi.h"
+#include "hw/ppc/pnv_sbe.h"
+#include "hw/ppc/pnv_xive.h"
+#include "hw/ppc/pnv_i2c.h"
+#include "hw/sysbus.h"
+
+OBJECT_DECLARE_TYPE(PnvChip, PnvChipClass,
+                    PNV_CHIP)
+
+struct PnvChip {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    uint32_t     chip_id;
+    uint64_t     ram_start;
+    uint64_t     ram_size;
+
+    uint32_t     nr_cores;
+    uint32_t     nr_threads;
+    uint64_t     cores_mask;
+    PnvCore      **cores;
+
+    uint32_t     num_pecs;
+
+    MemoryRegion xscom_mmio;
+    MemoryRegion xscom;
+    AddressSpace xscom_as;
+
+    MemoryRegion *fw_mr;
+    gchar        *dt_isa_nodename;
+};
+
+#define TYPE_PNV8_CHIP "pnv8-chip"
+DECLARE_INSTANCE_CHECKER(Pnv8Chip, PNV8_CHIP,
+                         TYPE_PNV8_CHIP)
+
+struct Pnv8Chip {
+    /*< private >*/
+    PnvChip      parent_obj;
+
+    /*< public >*/
+    MemoryRegion icp_mmio;
+
+    PnvLpcController lpc;
+    Pnv8Psi      psi;
+    PnvOCC       occ;
+    PnvHomer     homer;
+
+#define PNV8_CHIP_PHB3_MAX 4
+    /*
+     * The array is used to allow quick access to the phbs by
+     * pnv_ics_get_child() and pnv_ics_resend_child().
+     */
+    PnvPHB       *phbs[PNV8_CHIP_PHB3_MAX];
+    uint32_t     num_phbs;
+
+    XICSFabric    *xics;
+};
+
+#define TYPE_PNV9_CHIP "pnv9-chip"
+DECLARE_INSTANCE_CHECKER(Pnv9Chip, PNV9_CHIP,
+                         TYPE_PNV9_CHIP)
+
+struct Pnv9Chip {
+    /*< private >*/
+    PnvChip      parent_obj;
+
+    /*< public >*/
+    PnvXive      xive;
+    Pnv9Psi      psi;
+    PnvLpcController lpc;
+    PnvOCC       occ;
+    PnvSBE       sbe;
+    PnvHomer     homer;
+
+    uint32_t     nr_quads;
+    PnvQuad      *quads;
+
+#define PNV9_CHIP_MAX_PEC 3
+    PnvPhb4PecState pecs[PNV9_CHIP_MAX_PEC];
+
+#define PNV9_CHIP_MAX_I2C 4
+    PnvI2C      i2c[PNV9_CHIP_MAX_I2C];
+};
+
+/*
+ * A SMT8 fused core is a pair of SMT4 cores.
+ */
+#define PNV9_PIR2FUSEDCORE(pir) (((pir) >> 3) & 0xf)
+#define PNV9_PIR2CHIP(pir)      (((pir) >> 8) & 0x7f)
+
+#define TYPE_PNV10_CHIP "pnv10-chip"
+DECLARE_INSTANCE_CHECKER(Pnv10Chip, PNV10_CHIP,
+                         TYPE_PNV10_CHIP)
+
+struct Pnv10Chip {
+    /*< private >*/
+    PnvChip      parent_obj;
+
+    /*< public >*/
+    PnvXive2     xive;
+    Pnv9Psi      psi;
+    PnvLpcController lpc;
+    PnvOCC       occ;
+    PnvSBE       sbe;
+    PnvHomer     homer;
+
+    uint32_t     nr_quads;
+    PnvQuad      *quads;
+
+#define PNV10_CHIP_MAX_PEC 2
+    PnvPhb4PecState pecs[PNV10_CHIP_MAX_PEC];
+
+#define PNV10_CHIP_MAX_I2C 4
+    PnvI2C       i2c[PNV10_CHIP_MAX_I2C];
+};
+
+#define PNV10_PIR2FUSEDCORE(pir) (((pir) >> 3) & 0xf)
+#define PNV10_PIR2CHIP(pir)      (((pir) >> 8) & 0x7f)
+
+struct PnvChipClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+
+    /*< public >*/
+    uint64_t     chip_cfam_id;
+    uint64_t     cores_mask;
+    uint32_t     num_pecs;
+    uint32_t     num_phbs;
+
+    uint32_t     i2c_num_engines;
+    const int    *i2c_ports_per_engine;
+
+    DeviceRealize parent_realize;
+
+    uint32_t (*core_pir)(PnvChip *chip, uint32_t core_id);
+    void (*intc_create)(PnvChip *chip, PowerPCCPU *cpu, Error **errp);
+    void (*intc_reset)(PnvChip *chip, PowerPCCPU *cpu);
+    void (*intc_destroy)(PnvChip *chip, PowerPCCPU *cpu);
+    void (*intc_print_info)(PnvChip *chip, PowerPCCPU *cpu, Monitor *mon);
+    ISABus *(*isa_create)(PnvChip *chip, Error **errp);
+    void (*dt_populate)(PnvChip *chip, void *fdt);
+    void (*pic_print_info)(PnvChip *chip, Monitor *mon);
+    uint64_t (*xscom_core_base)(PnvChip *chip, uint32_t core_id);
+    uint32_t (*xscom_pcba)(PnvChip *chip, uint64_t addr);
+};
+
+#endif
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
new file mode 100644 (file)
index 0000000..4db2122
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * QEMU PowerPC PowerNV CPU Core model
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PPC_PNV_CORE_H
+#define PPC_PNV_CORE_H
+
+#include "hw/cpu/core.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/pnv.h"
+#include "qom/object.h"
+
+#define TYPE_PNV_CORE "powernv-cpu-core"
+OBJECT_DECLARE_TYPE(PnvCore, PnvCoreClass,
+                    PNV_CORE)
+
+struct PnvCore {
+    /*< private >*/
+    CPUCore parent_obj;
+
+    /*< public >*/
+    PowerPCCPU **threads;
+    uint32_t pir;
+    uint64_t hrmor;
+    PnvChip *chip;
+
+    MemoryRegion xscom_regs;
+};
+
+struct PnvCoreClass {
+    DeviceClass parent_class;
+
+    const MemoryRegionOps *xscom_ops;
+    uint64_t xscom_size;
+};
+
+#define PNV_CORE_TYPE_SUFFIX "-" TYPE_PNV_CORE
+#define PNV_CORE_TYPE_NAME(cpu_model) cpu_model PNV_CORE_TYPE_SUFFIX
+
+typedef struct PnvCPUState {
+    Object *intc;
+} PnvCPUState;
+
+static inline PnvCPUState *pnv_cpu_state(PowerPCCPU *cpu)
+{
+    return (PnvCPUState *)cpu->machine_data;
+}
+
+struct PnvQuadClass {
+    DeviceClass parent_class;
+
+    const MemoryRegionOps *xscom_ops;
+    uint64_t xscom_size;
+
+    const MemoryRegionOps *xscom_qme_ops;
+    uint64_t xscom_qme_size;
+};
+
+#define TYPE_PNV_QUAD "powernv-cpu-quad"
+
+#define PNV_QUAD_TYPE_SUFFIX "-" TYPE_PNV_QUAD
+#define PNV_QUAD_TYPE_NAME(cpu_model) cpu_model PNV_QUAD_TYPE_SUFFIX
+
+OBJECT_DECLARE_TYPE(PnvQuad, PnvQuadClass, PNV_QUAD)
+
+struct PnvQuad {
+    DeviceState parent_obj;
+
+    uint32_t quad_id;
+    MemoryRegion xscom_regs;
+    MemoryRegion xscom_qme_regs;
+};
+#endif /* PPC_PNV_CORE_H */
diff --git a/include/hw/ppc/pnv_homer.h b/include/hw/ppc/pnv_homer.h
new file mode 100644 (file)
index 0000000..b1c5d49
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * QEMU PowerPC PowerNV Emulation of a few HOMER related registers
+ *
+ * Copyright (c) 2019, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PPC_PNV_HOMER_H
+#define PPC_PNV_HOMER_H
+
+#include "hw/ppc/pnv.h"
+#include "qom/object.h"
+
+#define TYPE_PNV_HOMER "pnv-homer"
+OBJECT_DECLARE_TYPE(PnvHomer, PnvHomerClass,
+                    PNV_HOMER)
+#define TYPE_PNV8_HOMER TYPE_PNV_HOMER "-POWER8"
+DECLARE_INSTANCE_CHECKER(PnvHomer, PNV8_HOMER,
+                         TYPE_PNV8_HOMER)
+#define TYPE_PNV9_HOMER TYPE_PNV_HOMER "-POWER9"
+DECLARE_INSTANCE_CHECKER(PnvHomer, PNV9_HOMER,
+                         TYPE_PNV9_HOMER)
+#define TYPE_PNV10_HOMER TYPE_PNV_HOMER "-POWER10"
+DECLARE_INSTANCE_CHECKER(PnvHomer, PNV10_HOMER,
+                         TYPE_PNV10_HOMER)
+
+struct PnvHomer {
+    DeviceState parent;
+
+    PnvChip *chip;
+    MemoryRegion pba_regs;
+    MemoryRegion regs;
+};
+
+
+struct PnvHomerClass {
+    DeviceClass parent_class;
+
+    int pba_size;
+    const MemoryRegionOps *pba_ops;
+    int homer_size;
+    const MemoryRegionOps *homer_ops;
+
+    hwaddr core_max_base;
+};
+
+#endif /* PPC_PNV_HOMER_H */
diff --git a/include/hw/ppc/pnv_i2c.h b/include/hw/ppc/pnv_i2c.h
new file mode 100644 (file)
index 0000000..1a37730
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * QEMU PowerPC PowerNV Processor I2C model
+ *
+ * Copyright (c) 2019-2023, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef PPC_PNV_I2C_H
+#define PPC_PNV_I2C_H
+
+#include "hw/ppc/pnv.h"
+#include "hw/i2c/i2c.h"
+#include "qemu/fifo8.h"
+
+#define TYPE_PNV_I2C "pnv-i2c"
+#define PNV_I2C(obj) OBJECT_CHECK(PnvI2C, (obj), TYPE_PNV_I2C)
+
+#define PNV_I2C_REGS 0x20
+
+typedef struct PnvI2C {
+    DeviceState parent;
+
+    struct PnvChip *chip;
+
+    qemu_irq psi_irq;
+
+    uint64_t regs[PNV_I2C_REGS];
+    uint32_t engine;
+    uint32_t num_busses;
+    I2CBus **busses;
+
+    MemoryRegion xscom_regs;
+
+    Fifo8 fifo;
+} PnvI2C;
+
+#endif /* PPC_PNV_I2C_H */
diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h
new file mode 100644 (file)
index 0000000..5d22c45
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * QEMU PowerPC PowerNV LPC controller
+ *
+ * Copyright (c) 2016-2022, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PPC_PNV_LPC_H
+#define PPC_PNV_LPC_H
+
+#include "exec/memory.h"
+#include "hw/ppc/pnv.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_PNV_LPC "pnv-lpc"
+typedef struct PnvLpcClass PnvLpcClass;
+typedef struct PnvLpcController PnvLpcController;
+DECLARE_OBJ_CHECKERS(PnvLpcController, PnvLpcClass,
+                     PNV_LPC, TYPE_PNV_LPC)
+#define TYPE_PNV8_LPC TYPE_PNV_LPC "-POWER8"
+DECLARE_INSTANCE_CHECKER(PnvLpcController, PNV8_LPC,
+                         TYPE_PNV8_LPC)
+
+#define TYPE_PNV9_LPC TYPE_PNV_LPC "-POWER9"
+DECLARE_INSTANCE_CHECKER(PnvLpcController, PNV9_LPC,
+                         TYPE_PNV9_LPC)
+
+#define TYPE_PNV10_LPC TYPE_PNV_LPC "-POWER10"
+DECLARE_INSTANCE_CHECKER(PnvLpcController, PNV10_LPC,
+                         TYPE_PNV10_LPC)
+
+struct PnvLpcController {
+    DeviceState parent;
+
+    uint64_t eccb_stat_reg;
+    uint32_t eccb_data_reg;
+
+    /* OPB bus */
+    MemoryRegion opb_mr;
+    AddressSpace opb_as;
+
+    /* ISA IO and Memory space */
+    MemoryRegion isa_io;
+    MemoryRegion isa_mem;
+    MemoryRegion isa_fw;
+
+    /* Windows from OPB to ISA (aliases) */
+    MemoryRegion opb_isa_io;
+    MemoryRegion opb_isa_mem;
+    MemoryRegion opb_isa_fw;
+
+    /* Registers */
+    MemoryRegion lpc_hc_regs;
+    MemoryRegion opb_master_regs;
+
+    /* OPB Master LS registers */
+    uint32_t opb_irq_route0;
+    uint32_t opb_irq_route1;
+    uint32_t opb_irq_stat;
+    uint32_t opb_irq_mask;
+    uint32_t opb_irq_pol;
+    uint32_t opb_irq_input;
+
+    /* LPC HC registers */
+    uint32_t lpc_hc_fw_seg_idsel;
+    uint32_t lpc_hc_fw_rd_acc_size;
+    uint32_t lpc_hc_irqser_ctrl;
+    uint32_t lpc_hc_irqmask;
+    uint32_t lpc_hc_irqstat;
+    uint32_t lpc_hc_error_addr;
+
+    /* XSCOM registers */
+    MemoryRegion xscom_regs;
+
+    /* PSI to generate interrupts */
+    qemu_irq psi_irq;
+};
+
+struct PnvLpcClass {
+    DeviceClass parent_class;
+
+    DeviceRealize parent_realize;
+};
+
+ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp);
+int pnv_dt_lpc(PnvChip *chip, void *fdt, int root_offset,
+               uint64_t lpcm_addr, uint64_t lpcm_size);
+
+#endif /* PPC_PNV_LPC_H */
diff --git a/include/hw/ppc/pnv_occ.h b/include/hw/ppc/pnv_occ.h
new file mode 100644 (file)
index 0000000..df32124
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * QEMU PowerPC PowerNV Emulation of a few OCC related registers
+ *
+ * Copyright (c) 2015-2022, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PPC_PNV_OCC_H
+#define PPC_PNV_OCC_H
+
+#include "exec/memory.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_PNV_OCC "pnv-occ"
+OBJECT_DECLARE_TYPE(PnvOCC, PnvOCCClass,
+                    PNV_OCC)
+#define TYPE_PNV8_OCC TYPE_PNV_OCC "-POWER8"
+DECLARE_INSTANCE_CHECKER(PnvOCC, PNV8_OCC,
+                         TYPE_PNV8_OCC)
+#define TYPE_PNV9_OCC TYPE_PNV_OCC "-POWER9"
+DECLARE_INSTANCE_CHECKER(PnvOCC, PNV9_OCC,
+                         TYPE_PNV9_OCC)
+#define TYPE_PNV10_OCC TYPE_PNV_OCC "-POWER10"
+DECLARE_INSTANCE_CHECKER(PnvOCC, PNV10_OCC, TYPE_PNV10_OCC)
+
+#define PNV_OCC_SENSOR_DATA_BLOCK_OFFSET 0x00580000
+#define PNV_OCC_SENSOR_DATA_BLOCK_SIZE   0x00025800
+
+struct PnvOCC {
+    DeviceState xd;
+
+    /* OCC Misc interrupt */
+    uint64_t occmisc;
+
+    qemu_irq psi_irq;
+
+    MemoryRegion xscom_regs;
+    MemoryRegion sram_regs;
+};
+
+struct PnvOCCClass {
+    DeviceClass parent_class;
+
+    int xscom_size;
+    const MemoryRegionOps *xscom_ops;
+};
+
+#define PNV_OCC_SENSOR_DATA_BLOCK_BASE(i)                               \
+    (PNV_OCC_SENSOR_DATA_BLOCK_OFFSET + (i) * PNV_OCC_SENSOR_DATA_BLOCK_SIZE)
+
+#endif /* PPC_PNV_OCC_H */
diff --git a/include/hw/ppc/pnv_pnor.h b/include/hw/ppc/pnv_pnor.h
new file mode 100644 (file)
index 0000000..2e37ac8
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * QEMU PowerNV PNOR simple model
+ *
+ * Copyright (c) 2019, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_PNV_PNOR_H
+#define PPC_PNV_PNOR_H
+
+#include "hw/sysbus.h"
+
+/*
+ * PNOR offset on the LPC FW address space
+ */
+#define PNOR_SPI_OFFSET         0x0c000000UL
+
+#define TYPE_PNV_PNOR  "pnv-pnor"
+OBJECT_DECLARE_SIMPLE_TYPE(PnvPnor, PNV_PNOR)
+
+struct PnvPnor {
+    SysBusDevice   parent_obj;
+
+    BlockBackend   *blk;
+
+    uint8_t        *storage;
+    int64_t        size;
+    MemoryRegion   mmio;
+};
+
+#endif /* PPC_PNV_PNOR_H */
diff --git a/include/hw/ppc/pnv_psi.h b/include/hw/ppc/pnv_psi.h
new file mode 100644 (file)
index 0000000..2a6f715
--- /dev/null
@@ -0,0 +1,115 @@
+/*
+ * QEMU PowerPC PowerNV Processor Service Interface (PSI) model
+ *
+ * Copyright (c) 2015-2022, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PPC_PNV_PSI_H
+#define PPC_PNV_PSI_H
+
+#include "hw/sysbus.h"
+#include "hw/ppc/xics.h"
+#include "hw/ppc/xive.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_PNV_PSI "pnv-psi"
+OBJECT_DECLARE_TYPE(PnvPsi, PnvPsiClass,
+                    PNV_PSI)
+
+#define PSIHB_XSCOM_MAX         0x20
+
+struct PnvPsi {
+    DeviceState parent;
+
+    MemoryRegion regs_mr;
+    uint64_t bar;
+
+    /* FSP region not supported */
+    /* MemoryRegion fsp_mr; */
+    uint64_t fsp_bar;
+
+    /* Interrupt generation */
+    qemu_irq *qirqs;
+
+    /* Registers */
+    uint64_t regs[PSIHB_XSCOM_MAX];
+
+    MemoryRegion xscom_regs;
+};
+
+#define TYPE_PNV8_PSI TYPE_PNV_PSI "-POWER8"
+OBJECT_DECLARE_SIMPLE_TYPE(Pnv8Psi, PNV8_PSI)
+
+struct Pnv8Psi {
+    PnvPsi   parent;
+
+    ICSState ics;
+};
+
+#define TYPE_PNV9_PSI TYPE_PNV_PSI "-POWER9"
+OBJECT_DECLARE_SIMPLE_TYPE(Pnv9Psi, PNV9_PSI)
+
+struct Pnv9Psi {
+    PnvPsi   parent;
+
+    XiveSource source;
+};
+
+#define TYPE_PNV10_PSI TYPE_PNV_PSI "-POWER10"
+
+
+struct PnvPsiClass {
+    SysBusDeviceClass parent_class;
+
+    uint32_t xscom_pcba;
+    uint32_t xscom_size;
+    uint64_t bar_mask;
+    const char *compat;
+    int compat_size;
+};
+
+/* The PSI and FSP interrupts are muxed on the same IRQ number */
+typedef enum PnvPsiIrq {
+    PSIHB_IRQ_FSP, /* internal use only */
+    PSIHB_IRQ_OCC,
+    PSIHB_IRQ_FSI,
+    PSIHB_IRQ_LPC_I2C,
+    PSIHB_IRQ_LOCAL_ERR,
+    PSIHB_IRQ_EXTERNAL,
+} PnvPsiIrq;
+
+#define PSI_NUM_INTERRUPTS 6
+
+/* P9 PSI Interrupts */
+#define PSIHB9_IRQ_PSI          0
+#define PSIHB9_IRQ_OCC          1
+#define PSIHB9_IRQ_FSI          2
+#define PSIHB9_IRQ_LPCHC        3
+#define PSIHB9_IRQ_LOCAL_ERR    4
+#define PSIHB9_IRQ_GLOBAL_ERR   5
+#define PSIHB9_IRQ_TPM          6
+#define PSIHB9_IRQ_LPC_SIRQ0    7
+#define PSIHB9_IRQ_LPC_SIRQ1    8
+#define PSIHB9_IRQ_LPC_SIRQ2    9
+#define PSIHB9_IRQ_LPC_SIRQ3    10
+#define PSIHB9_IRQ_SBE_I2C      11
+#define PSIHB9_IRQ_DIO          12
+#define PSIHB9_IRQ_PSU          13
+#define PSIHB9_NUM_IRQS         14
+
+void pnv_psi_pic_print_info(Pnv9Psi *psi, Monitor *mon);
+
+#endif /* PPC_PNV_PSI_H */
diff --git a/include/hw/ppc/pnv_sbe.h b/include/hw/ppc/pnv_sbe.h
new file mode 100644 (file)
index 0000000..b6b378a
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * QEMU PowerPC PowerNV Emulation of some SBE behaviour
+ *
+ * Copyright (c) 2022, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PPC_PNV_SBE_H
+#define PPC_PNV_SBE_H
+
+#include "exec/memory.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_PNV_SBE "pnv-sbe"
+OBJECT_DECLARE_TYPE(PnvSBE, PnvSBEClass, PNV_SBE)
+#define TYPE_PNV9_SBE TYPE_PNV_SBE "-POWER9"
+DECLARE_INSTANCE_CHECKER(PnvSBE, PNV9_SBE, TYPE_PNV9_SBE)
+#define TYPE_PNV10_SBE TYPE_PNV_SBE "-POWER10"
+DECLARE_INSTANCE_CHECKER(PnvSBE, PNV10_SBE, TYPE_PNV10_SBE)
+
+struct PnvSBE {
+    DeviceState xd;
+
+    uint64_t mbox[8];
+    uint64_t sbe_doorbell;
+    uint64_t host_doorbell;
+
+    qemu_irq psi_irq;
+    QEMUTimer *timer;
+
+    MemoryRegion xscom_mbox_regs;
+    MemoryRegion xscom_ctrl_regs;
+};
+
+struct PnvSBEClass {
+    DeviceClass parent_class;
+
+    int xscom_ctrl_size;
+    int xscom_mbox_size;
+    const MemoryRegionOps *xscom_ctrl_ops;
+    const MemoryRegionOps *xscom_mbox_ops;
+};
+
+#endif /* PPC_PNV_SBE_H */
diff --git a/include/hw/ppc/pnv_xive.h b/include/hw/ppc/pnv_xive.h
new file mode 100644 (file)
index 0000000..9c48430
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2019, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_PNV_XIVE_H
+#define PPC_PNV_XIVE_H
+
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/xive.h"
+#include "qom/object.h"
+#include "hw/ppc/xive2.h"
+
+#define TYPE_PNV_XIVE "pnv-xive"
+OBJECT_DECLARE_TYPE(PnvXive, PnvXiveClass,
+                    PNV_XIVE)
+
+#define XIVE_BLOCK_MAX      16
+
+#define XIVE_TABLE_BLK_MAX  16  /* Block Scope Table (0-15) */
+#define XIVE_TABLE_MIG_MAX  16  /* Migration Register Table (1-15) */
+#define XIVE_TABLE_VDT_MAX  16  /* VDT Domain Table (0-15) */
+#define XIVE_TABLE_EDT_MAX  64  /* EDT Domain Table (0-63) */
+
+struct PnvXive {
+    XiveRouter    parent_obj;
+
+    /* Owning chip */
+    PnvChip *chip;
+
+    /* XSCOM addresses giving access to the controller registers */
+    MemoryRegion  xscom_regs;
+
+    /* Main MMIO regions that can be configured by FW */
+    MemoryRegion  ic_mmio;
+    MemoryRegion    ic_reg_mmio;
+    MemoryRegion    ic_notify_mmio;
+    MemoryRegion    ic_lsi_mmio;
+    MemoryRegion    tm_indirect_mmio;
+    MemoryRegion  vc_mmio;
+    MemoryRegion  pc_mmio;
+    MemoryRegion  tm_mmio;
+
+    /*
+     * IPI and END address spaces modeling the EDT segmentation in the
+     * VC region
+     */
+    AddressSpace  ipi_as;
+    MemoryRegion  ipi_mmio;
+    MemoryRegion    ipi_edt_mmio;
+
+    AddressSpace  end_as;
+    MemoryRegion  end_mmio;
+    MemoryRegion    end_edt_mmio;
+
+    /* Shortcut values for the Main MMIO regions */
+    hwaddr        ic_base;
+    uint32_t      ic_shift;
+    hwaddr        vc_base;
+    uint32_t      vc_shift;
+    hwaddr        pc_base;
+    uint32_t      pc_shift;
+    hwaddr        tm_base;
+    uint32_t      tm_shift;
+
+    /* Our XIVE source objects for IPIs and ENDs */
+    XiveSource    ipi_source;
+    XiveENDSource end_source;
+
+    /* Interrupt controller registers */
+    uint64_t      regs[0x300];
+
+    /*
+     * Virtual Structure Descriptor tables : EAT, SBE, ENDT, NVTT, IRQ
+     * These are in a SRAM protected by ECC.
+     */
+    uint64_t      vsds[5][XIVE_BLOCK_MAX];
+
+    /* Translation tables */
+    uint64_t      blk[XIVE_TABLE_BLK_MAX];
+    uint64_t      mig[XIVE_TABLE_MIG_MAX];
+    uint64_t      vdt[XIVE_TABLE_VDT_MAX];
+    uint64_t      edt[XIVE_TABLE_EDT_MAX];
+};
+
+struct PnvXiveClass {
+    XiveRouterClass parent_class;
+
+    DeviceRealize parent_realize;
+};
+
+void pnv_xive_pic_print_info(PnvXive *xive, Monitor *mon);
+
+/*
+ * XIVE2 interrupt controller (POWER10)
+ */
+#define TYPE_PNV_XIVE2 "pnv-xive2"
+OBJECT_DECLARE_TYPE(PnvXive2, PnvXive2Class, PNV_XIVE2);
+
+typedef struct PnvXive2 {
+    Xive2Router   parent_obj;
+
+    /* Owning chip */
+    PnvChip *chip;
+
+    /* XSCOM addresses giving access to the controller registers */
+    MemoryRegion  xscom_regs;
+
+    MemoryRegion  ic_mmio;
+    MemoryRegion  ic_mmios[8];
+    MemoryRegion  esb_mmio;
+    MemoryRegion  end_mmio;
+    MemoryRegion  nvc_mmio;
+    MemoryRegion  nvpg_mmio;
+    MemoryRegion  tm_mmio;
+
+    /* Shortcut values for the Main MMIO regions */
+    hwaddr        ic_base;
+    uint32_t      ic_shift;
+    hwaddr        esb_base;
+    uint32_t      esb_shift;
+    hwaddr        end_base;
+    uint32_t      end_shift;
+    hwaddr        nvc_base;
+    uint32_t      nvc_shift;
+    hwaddr        nvpg_base;
+    uint32_t      nvpg_shift;
+    hwaddr        tm_base;
+    uint32_t      tm_shift;
+
+    /* Interrupt controller registers */
+    uint64_t      cq_regs[0x40];
+    uint64_t      vc_regs[0x100];
+    uint64_t      pc_regs[0x100];
+    uint64_t      tctxt_regs[0x30];
+
+    /* To change default behavior */
+    uint64_t      capabilities;
+    uint64_t      config;
+
+    /* Our XIVE source objects for IPIs and ENDs */
+    XiveSource    ipi_source;
+    Xive2EndSource end_source;
+
+    /*
+     * Virtual Structure Descriptor tables
+     * These are in a SRAM protected by ECC.
+     */
+    uint64_t      vsds[9][XIVE_BLOCK_MAX];
+
+    /* Translation tables */
+    uint64_t      tables[8][XIVE_BLOCK_MAX];
+
+} PnvXive2;
+
+typedef struct PnvXive2Class {
+    Xive2RouterClass parent_class;
+
+    DeviceRealize parent_realize;
+} PnvXive2Class;
+
+void pnv_xive2_pic_print_info(PnvXive2 *xive, Monitor *mon);
+
+#endif /* PPC_PNV_XIVE_H */
diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
new file mode 100644 (file)
index 0000000..f5becba
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * QEMU PowerPC PowerNV XSCOM bus definitions
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PPC_PNV_XSCOM_H
+#define PPC_PNV_XSCOM_H
+
+#include "exec/memory.h"
+#include "hw/ppc/pnv.h"
+
+typedef struct PnvXScomInterface PnvXScomInterface;
+
+#define TYPE_PNV_XSCOM_INTERFACE "pnv-xscom-interface"
+#define PNV_XSCOM_INTERFACE(obj) \
+    INTERFACE_CHECK(PnvXScomInterface, (obj), TYPE_PNV_XSCOM_INTERFACE)
+typedef struct PnvXScomInterfaceClass PnvXScomInterfaceClass;
+DECLARE_CLASS_CHECKERS(PnvXScomInterfaceClass, PNV_XSCOM_INTERFACE,
+                       TYPE_PNV_XSCOM_INTERFACE)
+
+struct PnvXScomInterfaceClass {
+    InterfaceClass parent;
+    int (*dt_xscom)(PnvXScomInterface *dev, void *fdt, int offset);
+};
+
+/*
+ * Layout of the XSCOM PCB addresses of EX core 1 (POWER 8)
+ *
+ *   GPIO        0x1100xxxx
+ *   SCOM        0x1101xxxx
+ *   OHA         0x1102xxxx
+ *   CLOCK CTL   0x1103xxxx
+ *   FIR         0x1104xxxx
+ *   THERM       0x1105xxxx
+ *   <reserved>  0x1106xxxx
+ *               ..
+ *               0x110Exxxx
+ *   PCB SLAVE   0x110Fxxxx
+ */
+
+#define PNV_XSCOM_EX_CORE_BASE    0x10000000ull
+
+#define PNV_XSCOM_EX_BASE(core) \
+    (PNV_XSCOM_EX_CORE_BASE | ((uint64_t)(core) << 24))
+#define PNV_XSCOM_EX_SIZE         0x100000
+
+#define PNV_XSCOM_LPC_BASE        0xb0020
+#define PNV_XSCOM_LPC_SIZE        0x4
+
+#define PNV_XSCOM_PSIHB_BASE      0x2010900
+#define PNV_XSCOM_PSIHB_SIZE      0x20
+
+#define PNV_XSCOM_OCC_BASE        0x0066000
+#define PNV_XSCOM_OCC_SIZE        0x6000
+
+#define PNV_XSCOM_PBA_BASE        0x2013f00
+#define PNV_XSCOM_PBA_SIZE        0x40
+
+#define PNV_XSCOM_PBCQ_NEST_BASE  0x2012000
+#define PNV_XSCOM_PBCQ_NEST_SIZE  0x46
+
+#define PNV_XSCOM_PBCQ_PCI_BASE   0x9012000
+#define PNV_XSCOM_PBCQ_PCI_SIZE   0x15
+
+#define PNV_XSCOM_PBCQ_SPCI_BASE  0x9013c00
+#define PNV_XSCOM_PBCQ_SPCI_SIZE  0x5
+
+/*
+ * Layout of the XSCOM PCB addresses (POWER 9)
+ */
+#define PNV9_XSCOM_EC_BASE(core) \
+    ((uint64_t)(((core) & 0x1F) + 0x20) << 24)
+#define PNV9_XSCOM_EC_SIZE        0x100000
+
+#define PNV9_XSCOM_EQ_BASE(core) \
+    ((uint64_t)(((core) & 0x1C) + 0x40) << 22)
+#define PNV9_XSCOM_EQ_SIZE        0x100000
+
+#define PNV9_XSCOM_I2CM_BASE      0xa0000
+#define PNV9_XSCOM_I2CM_SIZE      0x1000
+
+#define PNV9_XSCOM_OCC_BASE       PNV_XSCOM_OCC_BASE
+#define PNV9_XSCOM_OCC_SIZE       0x8000
+
+#define PNV9_XSCOM_SBE_CTRL_BASE  0x00050008
+#define PNV9_XSCOM_SBE_CTRL_SIZE  0x1
+
+#define PNV9_XSCOM_SBE_MBOX_BASE  0x000D0050
+#define PNV9_XSCOM_SBE_MBOX_SIZE  0x16
+
+#define PNV9_XSCOM_PBA_BASE       0x5012b00
+#define PNV9_XSCOM_PBA_SIZE       0x40
+
+#define PNV9_XSCOM_PSIHB_BASE     0x5012900
+#define PNV9_XSCOM_PSIHB_SIZE     0x100
+
+#define PNV9_XSCOM_XIVE_BASE      0x5013000
+#define PNV9_XSCOM_XIVE_SIZE      0x300
+
+#define PNV9_XSCOM_PEC_NEST_BASE  0x4010c00
+#define PNV9_XSCOM_PEC_NEST_SIZE  0x100
+
+#define PNV9_XSCOM_PEC_PCI_BASE   0xd010800
+#define PNV9_XSCOM_PEC_PCI_SIZE   0x200
+
+/* XSCOM PCI "pass-through" window to PHB SCOM */
+#define PNV9_XSCOM_PEC_PCI_STK0   0x100
+#define PNV9_XSCOM_PEC_PCI_STK1   0x140
+#define PNV9_XSCOM_PEC_PCI_STK2   0x180
+
+/*
+ * Layout of the XSCOM PCB addresses (POWER 10)
+ */
+#define PNV10_XSCOM_EQ_CHIPLET(core)  (0x20 + ((core) >> 2))
+#define PNV10_XSCOM_EQ(chiplet)       ((chiplet) << 24)
+#define PNV10_XSCOM_EC(proc)                    \
+    ((0x2 << 16) | ((1 << (3 - (proc))) << 12))
+
+#define PNV10_XSCOM_QME(chiplet) \
+        (PNV10_XSCOM_EQ(chiplet) | (0xE << 16))
+
+/*
+ * Make the region larger by 0x1000 (instead of starting at an offset) so the
+ * modelled addresses start from 0
+ */
+#define PNV10_XSCOM_QME_BASE(core)     \
+    ((uint64_t) PNV10_XSCOM_QME(PNV10_XSCOM_EQ_CHIPLET(core)))
+#define PNV10_XSCOM_QME_SIZE        (0x8000 + 0x1000)
+
+#define PNV10_XSCOM_EQ_BASE(core)     \
+    ((uint64_t) PNV10_XSCOM_EQ(PNV10_XSCOM_EQ_CHIPLET(core)))
+#define PNV10_XSCOM_EQ_SIZE        0x20000
+
+#define PNV10_XSCOM_EC_BASE(core) \
+    ((uint64_t) PNV10_XSCOM_EQ_BASE(core) | PNV10_XSCOM_EC(core & 0x3))
+#define PNV10_XSCOM_EC_SIZE        0x1000
+
+#define PNV10_XSCOM_PSIHB_BASE     0x3011D00
+#define PNV10_XSCOM_PSIHB_SIZE     0x100
+
+#define PNV10_XSCOM_I2CM_BASE      PNV9_XSCOM_I2CM_BASE
+#define PNV10_XSCOM_I2CM_SIZE      PNV9_XSCOM_I2CM_SIZE
+
+#define PNV10_XSCOM_OCC_BASE       PNV9_XSCOM_OCC_BASE
+#define PNV10_XSCOM_OCC_SIZE       PNV9_XSCOM_OCC_SIZE
+
+#define PNV10_XSCOM_SBE_CTRL_BASE  PNV9_XSCOM_SBE_CTRL_BASE
+#define PNV10_XSCOM_SBE_CTRL_SIZE  PNV9_XSCOM_SBE_CTRL_SIZE
+
+#define PNV10_XSCOM_SBE_MBOX_BASE  PNV9_XSCOM_SBE_MBOX_BASE
+#define PNV10_XSCOM_SBE_MBOX_SIZE  PNV9_XSCOM_SBE_MBOX_SIZE
+
+#define PNV10_XSCOM_PBA_BASE       0x01010CDA
+#define PNV10_XSCOM_PBA_SIZE       0x40
+
+#define PNV10_XSCOM_XIVE2_BASE     0x2010800
+#define PNV10_XSCOM_XIVE2_SIZE     0x400
+
+#define PNV10_XSCOM_PEC_NEST_BASE  0x3011800 /* index goes downwards ... */
+#define PNV10_XSCOM_PEC_NEST_SIZE  0x100
+
+#define PNV10_XSCOM_PEC_PCI_BASE   0x8010800 /* index goes upwards ... */
+#define PNV10_XSCOM_PEC_PCI_SIZE   0x200
+
+void pnv_xscom_init(PnvChip *chip, uint64_t size, hwaddr addr);
+int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset,
+                 uint64_t xscom_base, uint64_t xscom_size,
+                 const char *compat, int compat_size);
+
+void pnv_xscom_add_subregion(PnvChip *chip, hwaddr offset,
+                             MemoryRegion *mr);
+void pnv_xscom_region_init(MemoryRegion *mr,
+                           Object *owner,
+                           const MemoryRegionOps *ops,
+                           void *opaque,
+                           const char *name,
+                           uint64_t size);
+
+#endif /* PPC_PNV_XSCOM_H */
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
new file mode 100644 (file)
index 0000000..d5d119e
--- /dev/null
@@ -0,0 +1,121 @@
+#ifndef HW_PPC_H
+#define HW_PPC_H
+
+#include "target/ppc/cpu.h"
+
+void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level);
+PowerPCCPU *ppc_get_vcpu_by_pir(int pir);
+int ppc_cpu_pir(PowerPCCPU *cpu);
+int ppc_cpu_tir(PowerPCCPU *cpu);
+
+/* PowerPC hardware exceptions management helpers */
+typedef void (*clk_setup_cb)(void *opaque, uint32_t freq);
+typedef struct clk_setup_t clk_setup_t;
+struct clk_setup_t {
+    clk_setup_cb cb;
+    void *opaque;
+};
+static inline void clk_setup (clk_setup_t *clk, uint32_t freq)
+{
+    if (clk->cb != NULL)
+        (*clk->cb)(clk->opaque, freq);
+}
+
+struct ppc_tb_t {
+    /* Time base management */
+    int64_t  tb_offset;    /* Compensation                    */
+    int64_t  atb_offset;   /* Compensation                    */
+    int64_t  vtb_offset;
+    uint32_t tb_freq;      /* TB frequency                    */
+    /* Decrementer management */
+    uint64_t decr_next;    /* Tick for next decr interrupt    */
+    uint32_t decr_freq;    /* decrementer frequency           */
+    QEMUTimer *decr_timer;
+    /* Hypervisor decrementer management */
+    uint64_t hdecr_next;    /* Tick for next hdecr interrupt  */
+    QEMUTimer *hdecr_timer;
+    int64_t purr_offset;
+    void *opaque;
+    uint32_t flags;
+};
+
+/* PPC Timers flags */
+#define PPC_TIMER_BOOKE              (1 << 0) /* Enable Booke support */
+#define PPC_TIMER_E500               (1 << 1) /* Enable e500 support */
+#define PPC_DECR_UNDERFLOW_TRIGGERED (1 << 2) /* Decr interrupt triggered when
+                                               * the most significant bit
+                                               * changes from 0 to 1.
+                                               */
+#define PPC_DECR_ZERO_TRIGGERED      (1 << 3) /* Decr interrupt triggered when
+                                               * the decrementer reaches zero.
+                                               */
+#define PPC_DECR_UNDERFLOW_LEVEL     (1 << 4) /* Decr interrupt active when
+                                               * the most significant bit is 1.
+                                               */
+
+uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset);
+void cpu_ppc_tb_init(CPUPPCState *env, uint32_t freq);
+void cpu_ppc_tb_reset(CPUPPCState *env);
+void cpu_ppc_tb_free(CPUPPCState *env);
+void cpu_ppc_hdecr_init(CPUPPCState *env);
+void cpu_ppc_hdecr_exit(CPUPPCState *env);
+
+/* Embedded PowerPC DCR management */
+typedef uint32_t (*dcr_read_cb)(void *opaque, int dcrn);
+typedef void (*dcr_write_cb)(void *opaque, int dcrn, uint32_t val);
+int ppc_dcr_init (CPUPPCState *env, int (*dcr_read_error)(int dcrn),
+                  int (*dcr_write_error)(int dcrn));
+int ppc_dcr_register (CPUPPCState *env, int dcrn, void *opaque,
+                      dcr_read_cb drc_read, dcr_write_cb dcr_write);
+clk_setup_cb ppc_40x_timers_init (CPUPPCState *env, uint32_t freq,
+                                  unsigned int decr_excp);
+
+/* Embedded PowerPC reset */
+void ppc40x_core_reset(PowerPCCPU *cpu);
+void ppc40x_chip_reset(PowerPCCPU *cpu);
+void ppc40x_system_reset(PowerPCCPU *cpu);
+
+#if defined(CONFIG_USER_ONLY)
+static inline void ppc40x_irq_init(PowerPCCPU *cpu) {}
+static inline void ppc6xx_irq_init(PowerPCCPU *cpu) {}
+static inline void ppc970_irq_init(PowerPCCPU *cpu) {}
+static inline void ppcPOWER7_irq_init(PowerPCCPU *cpu) {}
+static inline void ppcPOWER9_irq_init(PowerPCCPU *cpu) {}
+static inline void ppce500_irq_init(PowerPCCPU *cpu) {}
+static inline void ppc_irq_reset(PowerPCCPU *cpu) {}
+#else
+void ppc40x_irq_init(PowerPCCPU *cpu);
+void ppce500_irq_init(PowerPCCPU *cpu);
+void ppc6xx_irq_init(PowerPCCPU *cpu);
+void ppc970_irq_init(PowerPCCPU *cpu);
+void ppcPOWER7_irq_init(PowerPCCPU *cpu);
+void ppcPOWER9_irq_init(PowerPCCPU *cpu);
+void ppc_irq_reset(PowerPCCPU *cpu);
+#endif
+
+/* PPC machines for OpenBIOS */
+enum {
+    ARCH_PREP = 0,
+    ARCH_MAC99,
+    ARCH_HEATHROW,
+    ARCH_MAC99_U3,
+};
+
+#define FW_CFG_PPC_WIDTH        (FW_CFG_ARCH_LOCAL + 0x00)
+#define FW_CFG_PPC_HEIGHT       (FW_CFG_ARCH_LOCAL + 0x01)
+#define FW_CFG_PPC_DEPTH        (FW_CFG_ARCH_LOCAL + 0x02)
+#define FW_CFG_PPC_TBFREQ       (FW_CFG_ARCH_LOCAL + 0x03)
+#define FW_CFG_PPC_CLOCKFREQ    (FW_CFG_ARCH_LOCAL + 0x04)
+#define FW_CFG_PPC_IS_KVM       (FW_CFG_ARCH_LOCAL + 0x05)
+#define FW_CFG_PPC_KVM_HC       (FW_CFG_ARCH_LOCAL + 0x06)
+#define FW_CFG_PPC_KVM_PID      (FW_CFG_ARCH_LOCAL + 0x07)
+#define FW_CFG_PPC_NVRAM_ADDR   (FW_CFG_ARCH_LOCAL + 0x08)
+#define FW_CFG_PPC_BUSFREQ      (FW_CFG_ARCH_LOCAL + 0x09)
+#define FW_CFG_PPC_NVRAM_FLAT   (FW_CFG_ARCH_LOCAL + 0x0a)
+#define FW_CFG_PPC_VIACONFIG    (FW_CFG_ARCH_LOCAL + 0x0b)
+
+#define PPC_SERIAL_MM_BAUDBASE 399193
+
+/* ppc_booke.c */
+void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags);
+#endif
diff --git a/include/hw/ppc/ppc4xx.h b/include/hw/ppc/ppc4xx.h
new file mode 100644 (file)
index 0000000..ea77402
--- /dev/null
@@ -0,0 +1,158 @@
+/*
+ * QEMU PowerPC 4xx emulation shared definitions
+ *
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef PPC4XX_H
+#define PPC4XX_H
+
+#include "hw/ppc/ppc.h"
+#include "exec/memory.h"
+#include "hw/sysbus.h"
+
+#define TYPE_PPC4xx_HOST_BRIDGE "ppc4xx-host-bridge"
+#define TYPE_PPC4xx_PCI_HOST "ppc4xx-pci-host"
+#define TYPE_PPC440_PCIX_HOST "ppc440-pcix-host"
+#define TYPE_PPC460EX_PCIE_HOST "ppc460ex-pcie-host"
+
+/*
+ * Generic DCR device
+ */
+#define TYPE_PPC4xx_DCR_DEVICE "ppc4xx-dcr-device"
+OBJECT_DECLARE_SIMPLE_TYPE(Ppc4xxDcrDeviceState, PPC4xx_DCR_DEVICE);
+struct Ppc4xxDcrDeviceState {
+    SysBusDevice parent_obj;
+
+    PowerPCCPU *cpu;
+};
+
+void ppc4xx_dcr_register(Ppc4xxDcrDeviceState *dev, int dcrn, void *opaque,
+                         dcr_read_cb dcr_read, dcr_write_cb dcr_write);
+bool ppc4xx_dcr_realize(Ppc4xxDcrDeviceState *dev, PowerPCCPU *cpu,
+                        Error **errp);
+
+/* Memory Access Layer (MAL) */
+#define TYPE_PPC4xx_MAL "ppc4xx-mal"
+OBJECT_DECLARE_SIMPLE_TYPE(Ppc4xxMalState, PPC4xx_MAL);
+struct Ppc4xxMalState {
+    Ppc4xxDcrDeviceState parent_obj;
+
+    qemu_irq irqs[4];
+    uint32_t cfg;
+    uint32_t esr;
+    uint32_t ier;
+    uint32_t txcasr;
+    uint32_t txcarr;
+    uint32_t txeobisr;
+    uint32_t txdeir;
+    uint32_t rxcasr;
+    uint32_t rxcarr;
+    uint32_t rxeobisr;
+    uint32_t rxdeir;
+    uint32_t *txctpr;
+    uint32_t *rxctpr;
+    uint32_t *rcbs;
+    uint8_t  txcnum;
+    uint8_t  rxcnum;
+};
+
+/* Peripheral local bus arbitrer */
+#define TYPE_PPC4xx_PLB "ppc4xx-plb"
+OBJECT_DECLARE_SIMPLE_TYPE(Ppc4xxPlbState, PPC4xx_PLB);
+struct Ppc4xxPlbState {
+    Ppc4xxDcrDeviceState parent_obj;
+
+    uint32_t acr;
+    uint32_t bear;
+    uint32_t besr;
+};
+
+/* Peripheral controller */
+#define TYPE_PPC4xx_EBC "ppc4xx-ebc"
+OBJECT_DECLARE_SIMPLE_TYPE(Ppc4xxEbcState, PPC4xx_EBC);
+struct Ppc4xxEbcState {
+    Ppc4xxDcrDeviceState parent_obj;
+
+    uint32_t addr;
+    uint32_t bcr[8];
+    uint32_t bap[8];
+    uint32_t bear;
+    uint32_t besr0;
+    uint32_t besr1;
+    uint32_t cfg;
+};
+
+/* SDRAM DDR controller */
+typedef struct {
+    MemoryRegion ram;
+    MemoryRegion container; /* used for clipping */
+    hwaddr base;
+    hwaddr size;
+    uint32_t bcr;
+} Ppc4xxSdramBank;
+
+#define SDR0_DDR0_DDRM_ENCODE(n)  ((((unsigned long)(n)) & 0x03) << 29)
+#define SDR0_DDR0_DDRM_DDR1       0x20000000
+#define SDR0_DDR0_DDRM_DDR2       0x40000000
+
+#define TYPE_PPC4xx_SDRAM_DDR "ppc4xx-sdram-ddr"
+OBJECT_DECLARE_SIMPLE_TYPE(Ppc4xxSdramDdrState, PPC4xx_SDRAM_DDR);
+struct Ppc4xxSdramDdrState {
+    Ppc4xxDcrDeviceState parent_obj;
+
+    MemoryRegion *dram_mr;
+    uint32_t nbanks; /* Banks to use from 4, e.g. when board has less slots */
+    Ppc4xxSdramBank bank[4];
+    qemu_irq irq;
+
+    uint32_t addr;
+    uint32_t besr0;
+    uint32_t besr1;
+    uint32_t bear;
+    uint32_t cfg;
+    uint32_t status;
+    uint32_t rtr;
+    uint32_t pmit;
+    uint32_t tr;
+    uint32_t ecccfg;
+    uint32_t eccesr;
+};
+
+void ppc4xx_sdram_ddr_enable(Ppc4xxSdramDdrState *s);
+
+/* SDRAM DDR2 controller */
+#define TYPE_PPC4xx_SDRAM_DDR2 "ppc4xx-sdram-ddr2"
+OBJECT_DECLARE_SIMPLE_TYPE(Ppc4xxSdramDdr2State, PPC4xx_SDRAM_DDR2);
+struct Ppc4xxSdramDdr2State {
+    Ppc4xxDcrDeviceState parent_obj;
+
+    MemoryRegion *dram_mr;
+    uint32_t nbanks; /* Banks to use from 4, e.g. when board has less slots */
+    Ppc4xxSdramBank bank[4];
+
+    uint32_t addr;
+    uint32_t mcopt2;
+};
+
+void ppc4xx_sdram_ddr2_enable(Ppc4xxSdramDdr2State *s);
+
+#endif /* PPC4XX_H */
diff --git a/include/hw/ppc/ppc_e500.h b/include/hw/ppc/ppc_e500.h
new file mode 100644 (file)
index 0000000..b66c0e3
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef HW_PPC_E500_H
+#define HW_PPC_E500_H
+
+void ppce500_set_mpic_proxy(bool enabled);
+
+#endif
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
new file mode 100644 (file)
index 0000000..e91791a
--- /dev/null
@@ -0,0 +1,1029 @@
+#ifndef HW_SPAPR_H
+#define HW_SPAPR_H
+
+#include "qemu/units.h"
+#include "sysemu/dma.h"
+#include "hw/boards.h"
+#include "hw/ppc/spapr_drc.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/ppc/spapr_ovec.h"
+#include "hw/ppc/spapr_irq.h"
+#include "qom/object.h"
+#include "hw/ppc/spapr_xive.h"  /* For SpaprXive */
+#include "hw/ppc/xics.h"        /* For ICSState */
+#include "hw/ppc/spapr_tpm_proxy.h"
+
+struct SpaprVioBus;
+struct SpaprPhbState;
+struct SpaprNvram;
+
+typedef struct SpaprEventLogEntry SpaprEventLogEntry;
+typedef struct SpaprEventSource SpaprEventSource;
+typedef struct SpaprPendingHpt SpaprPendingHpt;
+
+typedef struct Vof Vof;
+
+#define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
+#define SPAPR_ENTRY_POINT       0x100
+
+#define SPAPR_TIMEBASE_FREQ     512000000ULL
+
+#define TYPE_SPAPR_RTC "spapr-rtc"
+
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprRtcState, SPAPR_RTC)
+
+struct SpaprRtcState {
+    /*< private >*/
+    DeviceState parent_obj;
+    int64_t ns_offset;
+};
+
+typedef struct SpaprDimmState SpaprDimmState;
+
+#define TYPE_SPAPR_MACHINE      "spapr-machine"
+OBJECT_DECLARE_TYPE(SpaprMachineState, SpaprMachineClass, SPAPR_MACHINE)
+
+typedef enum {
+    SPAPR_RESIZE_HPT_DEFAULT = 0,
+    SPAPR_RESIZE_HPT_DISABLED,
+    SPAPR_RESIZE_HPT_ENABLED,
+    SPAPR_RESIZE_HPT_REQUIRED,
+} SpaprResizeHpt;
+
+/**
+ * Capabilities
+ */
+
+/* Hardware Transactional Memory */
+#define SPAPR_CAP_HTM                   0x00
+/* Vector Scalar Extensions */
+#define SPAPR_CAP_VSX                   0x01
+/* Decimal Floating Point */
+#define SPAPR_CAP_DFP                   0x02
+/* Cache Flush on Privilege Change */
+#define SPAPR_CAP_CFPC                  0x03
+/* Speculation Barrier Bounds Checking */
+#define SPAPR_CAP_SBBC                  0x04
+/* Indirect Branch Serialisation */
+#define SPAPR_CAP_IBS                   0x05
+/* HPT Maximum Page Size (encoded as a shift) */
+#define SPAPR_CAP_HPT_MAXPAGESIZE       0x06
+/* Nested KVM-HV */
+#define SPAPR_CAP_NESTED_KVM_HV         0x07
+/* Large Decrementer */
+#define SPAPR_CAP_LARGE_DECREMENTER     0x08
+/* Count Cache Flush Assist HW Instruction */
+#define SPAPR_CAP_CCF_ASSIST            0x09
+/* Implements PAPR FWNMI option */
+#define SPAPR_CAP_FWNMI                 0x0A
+/* Support H_RPT_INVALIDATE */
+#define SPAPR_CAP_RPT_INVALIDATE        0x0B
+/* Support for AIL modes */
+#define SPAPR_CAP_AIL_MODE_3            0x0C
+/* Num Caps */
+#define SPAPR_CAP_NUM                   (SPAPR_CAP_AIL_MODE_3 + 1)
+
+/*
+ * Capability Values
+ */
+/* Bool Caps */
+#define SPAPR_CAP_OFF                   0x00
+#define SPAPR_CAP_ON                    0x01
+
+/* Custom Caps */
+
+/* Generic */
+#define SPAPR_CAP_BROKEN                0x00
+#define SPAPR_CAP_WORKAROUND            0x01
+#define SPAPR_CAP_FIXED                 0x02
+/* SPAPR_CAP_IBS (cap-ibs) */
+#define SPAPR_CAP_FIXED_IBS             0x02
+#define SPAPR_CAP_FIXED_CCD             0x03
+#define SPAPR_CAP_FIXED_NA              0x10 /* Lets leave a bit of a gap... */
+
+#define FDT_MAX_SIZE                    0x200000
+
+/* Max number of NUMA nodes */
+#define NUMA_NODES_MAX_NUM         (MAX_NODES)
+
+/*
+ * NUMA FORM1 macros. FORM1_DIST_REF_POINTS was taken from
+ * MAX_DISTANCE_REF_POINTS in arch/powerpc/mm/numa.h from Linux
+ * kernel source. It represents the amount of associativity domains
+ * for non-CPU resources.
+ *
+ * FORM1_NUMA_ASSOC_SIZE is the base array size of an ibm,associativity
+ * array for any non-CPU resource.
+ */
+#define FORM1_DIST_REF_POINTS            4
+#define FORM1_NUMA_ASSOC_SIZE            (FORM1_DIST_REF_POINTS + 1)
+
+/*
+ * FORM2 NUMA affinity has a single associativity domain, giving
+ * us a assoc size of 2.
+ */
+#define FORM2_DIST_REF_POINTS            1
+#define FORM2_NUMA_ASSOC_SIZE            (FORM2_DIST_REF_POINTS + 1)
+
+typedef struct SpaprCapabilities SpaprCapabilities;
+struct SpaprCapabilities {
+    uint8_t caps[SPAPR_CAP_NUM];
+};
+
+/**
+ * SpaprMachineClass:
+ */
+struct SpaprMachineClass {
+    /*< private >*/
+    MachineClass parent_class;
+
+    /*< public >*/
+    bool dr_lmb_enabled;       /* enable dynamic-reconfig/hotplug of LMBs */
+    bool dr_phb_enabled;       /* enable dynamic-reconfig/hotplug of PHBs */
+    bool update_dt_enabled;    /* enable KVMPPC_H_UPDATE_DT */
+    bool use_ohci_by_default;  /* use USB-OHCI instead of XHCI */
+    bool pre_2_10_has_unused_icps;
+    bool legacy_irq_allocation;
+    uint32_t nr_xirqs;
+    bool broken_host_serial_model; /* present real host info to the guest */
+    bool pre_4_1_migration; /* don't migrate hpt-max-page-size */
+    bool linux_pci_probe;
+    bool smp_threads_vsmt; /* set VSMT to smp_threads by default */
+    hwaddr rma_limit;          /* clamp the RMA to this size */
+    bool pre_5_1_assoc_refpoints;
+    bool pre_5_2_numa_associativity;
+    bool pre_6_2_numa_affinity;
+
+    bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
+                          uint64_t *buid, hwaddr *pio,
+                          hwaddr *mmio32, hwaddr *mmio64,
+                          unsigned n_dma, uint32_t *liobns, Error **errp);
+    SpaprResizeHpt resize_hpt_default;
+    SpaprCapabilities default_caps;
+    SpaprIrq *irq;
+};
+
+#define WDT_MAX_WATCHDOGS       4      /* Maximum number of watchdog devices */
+
+#define TYPE_SPAPR_WDT "spapr-wdt"
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprWatchdog, SPAPR_WDT)
+
+typedef struct SpaprWatchdog {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    QEMUTimer timer;
+    uint8_t action;         /* One of PSERIES_WDTF_ACTION_xxx */
+    uint8_t leave_others;   /* leaveOtherWatchdogsRunningOnTimeout */
+} SpaprWatchdog;
+
+/**
+ * SpaprMachineState:
+ */
+struct SpaprMachineState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    struct SpaprVioBus *vio_bus;
+    QLIST_HEAD(, SpaprPhbState) phbs;
+    struct SpaprNvram *nvram;
+    SpaprRtcState rtc;
+
+    SpaprResizeHpt resize_hpt;
+    void *htab;
+    uint32_t htab_shift;
+    uint64_t patb_entry; /* Process tbl registered in H_REGISTER_PROC_TBL */
+    SpaprPendingHpt *pending_hpt; /* in-progress resize */
+
+    hwaddr rma_size;
+    uint32_t fdt_size;
+    uint32_t fdt_initial_size;
+    void *fdt_blob;
+    uint8_t fdt_rng_seed[32];
+    long kernel_size;
+    bool kernel_le;
+    uint64_t kernel_addr;
+    uint32_t initrd_base;
+    long initrd_size;
+    Vof *vof;
+    uint64_t rtc_offset; /* Now used only during incoming migration */
+    struct PPCTimebase tb;
+    bool want_stdout_path;
+    uint32_t vsmt;       /* Virtual SMT mode (KVM's "core stride") */
+
+    /* Nested HV support (TCG only) */
+    uint64_t nested_ptcr;
+
+    Notifier epow_notifier;
+    QTAILQ_HEAD(, SpaprEventLogEntry) pending_events;
+    bool use_hotplug_event_source;
+    SpaprEventSource *event_sources;
+
+    /* ibm,client-architecture-support option negotiation */
+    bool cas_pre_isa3_guest;
+    SpaprOptionVector *ov5;         /* QEMU-supported option vectors */
+    SpaprOptionVector *ov5_cas;     /* negotiated (via CAS) option vectors */
+    uint32_t max_compat_pvr;
+
+    /* Migration state */
+    int htab_save_index;
+    bool htab_first_pass;
+    int htab_fd;
+
+    /* Pending DIMM unplug cache. It is populated when a LMB
+     * unplug starts. It can be regenerated if a migration
+     * occurs during the unplug process. */
+    QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
+
+    /* State related to FWNMI option */
+
+    /* System Reset and Machine Check Notification Routine addresses
+     * registered by "ibm,nmi-register" RTAS call.
+     */
+    target_ulong fwnmi_system_reset_addr;
+    target_ulong fwnmi_machine_check_addr;
+
+    /* Machine Check FWNMI synchronization, fwnmi_machine_check_interlock is
+     * set to -1 if a FWNMI machine check is not in progress, else is set to
+     * the CPU that was delivered the machine check, and is set back to -1
+     * when that CPU makes an "ibm,nmi-interlock" RTAS call. The cond is used
+     * to synchronize other CPUs.
+     */
+    int fwnmi_machine_check_interlock;
+    QemuCond fwnmi_machine_check_interlock_cond;
+
+    /* Set by -boot */
+    char *boot_device;
+
+    /*< public >*/
+    char *kvm_type;
+    char *host_model;
+    char *host_serial;
+
+    int32_t irq_map_nr;
+    unsigned long *irq_map;
+    SpaprIrq *irq;
+    qemu_irq *qirqs;
+    SpaprInterruptController *active_intc;
+    ICSState *ics;
+    SpaprXive *xive;
+
+    bool cmd_line_caps[SPAPR_CAP_NUM];
+    SpaprCapabilities def, eff, mig;
+
+    SpaprTpmProxy *tpm_proxy;
+
+    uint32_t FORM1_assoc_array[NUMA_NODES_MAX_NUM][FORM1_NUMA_ASSOC_SIZE];
+    uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
+
+    Error *fwnmi_migration_blocker;
+
+    SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
+};
+
+#define H_SUCCESS         0
+#define H_BUSY            1        /* Hardware busy -- retry later */
+#define H_CLOSED          2        /* Resource closed */
+#define H_NOT_AVAILABLE   3
+#define H_CONSTRAINED     4        /* Resource request constrained to max allowed */
+#define H_PARTIAL         5
+#define H_IN_PROGRESS     14       /* Kind of like busy */
+#define H_PAGE_REGISTERED 15
+#define H_PARTIAL_STORE   16
+#define H_PENDING         17       /* returned from H_POLL_PENDING */
+#define H_CONTINUE        18       /* Returned from H_Join on success */
+#define H_LONG_BUSY_START_RANGE         9900  /* Start of long busy range */
+#define H_LONG_BUSY_ORDER_1_MSEC        9900  /* Long busy, hint that 1msec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_ORDER_10_MSEC       9901  /* Long busy, hint that 10msec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_ORDER_100_MSEC      9902  /* Long busy, hint that 100msec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_ORDER_1_SEC         9903  /* Long busy, hint that 1sec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_ORDER_10_SEC        9904  /* Long busy, hint that 10sec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_ORDER_100_SEC       9905  /* Long busy, hint that 100sec \
+                                                 is a good time to retry */
+#define H_LONG_BUSY_END_RANGE           9905  /* End of long busy range */
+#define H_HARDWARE        -1       /* Hardware error */
+#define H_FUNCTION        -2       /* Function not supported */
+#define H_PRIVILEGE       -3       /* Caller not privileged */
+#define H_PARAMETER       -4       /* Parameter invalid, out-of-range or conflicting */
+#define H_BAD_MODE        -5       /* Illegal msr value */
+#define H_PTEG_FULL       -6       /* PTEG is full */
+#define H_NOT_FOUND       -7       /* PTE was not found" */
+#define H_RESERVED_DABR   -8       /* DABR address is reserved by the hypervisor on this processor" */
+#define H_NO_MEM          -9
+#define H_AUTHORITY       -10
+#define H_PERMISSION      -11
+#define H_DROPPED         -12
+#define H_SOURCE_PARM     -13
+#define H_DEST_PARM       -14
+#define H_REMOTE_PARM     -15
+#define H_RESOURCE        -16
+#define H_ADAPTER_PARM    -17
+#define H_RH_PARM         -18
+#define H_RCQ_PARM        -19
+#define H_SCQ_PARM        -20
+#define H_EQ_PARM         -21
+#define H_RT_PARM         -22
+#define H_ST_PARM         -23
+#define H_SIGT_PARM       -24
+#define H_TOKEN_PARM      -25
+#define H_MLENGTH_PARM    -27
+#define H_MEM_PARM        -28
+#define H_MEM_ACCESS_PARM -29
+#define H_ATTR_PARM       -30
+#define H_PORT_PARM       -31
+#define H_MCG_PARM        -32
+#define H_VL_PARM         -33
+#define H_TSIZE_PARM      -34
+#define H_TRACE_PARM      -35
+
+#define H_MASK_PARM       -37
+#define H_MCG_FULL        -38
+#define H_ALIAS_EXIST     -39
+#define H_P_COUNTER       -40
+#define H_TABLE_FULL      -41
+#define H_ALT_TABLE       -42
+#define H_MR_CONDITION    -43
+#define H_NOT_ENOUGH_RESOURCES -44
+#define H_R_STATE         -45
+#define H_RESCINDEND      -46
+#define H_P2              -55
+#define H_P3              -56
+#define H_P4              -57
+#define H_P5              -58
+#define H_P6              -59
+#define H_P7              -60
+#define H_P8              -61
+#define H_P9              -62
+#define H_NOOP            -63
+#define H_UNSUPPORTED     -67
+#define H_OVERLAP         -68
+#define H_UNSUPPORTED_FLAG -256
+#define H_MULTI_THREADS_ACTIVE -9005
+
+
+/* Long Busy is a condition that can be returned by the firmware
+ * when a call cannot be completed now, but the identical call
+ * should be retried later.  This prevents calls blocking in the
+ * firmware for long periods of time.  Annoyingly the firmware can return
+ * a range of return codes, hinting at how long we should wait before
+ * retrying.  If you don't care for the hint, the macro below is a good
+ * way to check for the long_busy return codes
+ */
+#define H_IS_LONG_BUSY(x)  ((x >= H_LONG_BUSY_START_RANGE) \
+                            && (x <= H_LONG_BUSY_END_RANGE))
+
+/* Flags */
+#define H_LARGE_PAGE      (1ULL<<(63-16))
+#define H_EXACT           (1ULL<<(63-24))       /* Use exact PTE or return H_PTEG_FULL */
+#define H_R_XLATE         (1ULL<<(63-25))       /* include a valid logical page num in the pte if the valid bit is set */
+#define H_READ_4          (1ULL<<(63-26))       /* Return 4 PTEs */
+#define H_PAGE_STATE_CHANGE (1ULL<<(63-28))
+#define H_PAGE_UNUSED     ((1ULL<<(63-29)) | (1ULL<<(63-30)))
+#define H_PAGE_SET_UNUSED (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED)
+#define H_PAGE_SET_LOANED (H_PAGE_SET_UNUSED | (1ULL<<(63-31)))
+#define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE
+#define H_AVPN            (1ULL<<(63-32))       /* An avpn is provided as a sanity test */
+#define H_ANDCOND         (1ULL<<(63-33))
+#define H_ICACHE_INVALIDATE (1ULL<<(63-40))     /* icbi, etc.  (ignored for IO pages) */
+#define H_ICACHE_SYNCHRONIZE (1ULL<<(63-41))    /* dcbst, icbi, etc (ignored for IO pages */
+#define H_ZERO_PAGE       (1ULL<<(63-48))       /* zero the page before mapping (ignored for IO pages) */
+#define H_COPY_PAGE       (1ULL<<(63-49))
+#define H_N               (1ULL<<(63-61))
+#define H_PP1             (1ULL<<(63-62))
+#define H_PP2             (1ULL<<(63-63))
+
+/* Values for 2nd argument to H_SET_MODE */
+#define H_SET_MODE_RESOURCE_SET_CIABR           1
+#define H_SET_MODE_RESOURCE_SET_DAWR0           2
+#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE     3
+#define H_SET_MODE_RESOURCE_LE                  4
+
+/* Flags for H_SET_MODE_RESOURCE_LE */
+#define H_SET_MODE_ENDIAN_BIG    0
+#define H_SET_MODE_ENDIAN_LITTLE 1
+
+/* VASI States */
+#define H_VASI_INVALID    0
+#define H_VASI_ENABLED    1
+#define H_VASI_ABORTED    2
+#define H_VASI_SUSPENDING 3
+#define H_VASI_SUSPENDED  4
+#define H_VASI_RESUMED    5
+#define H_VASI_COMPLETED  6
+
+/* DABRX flags */
+#define H_DABRX_HYPERVISOR (1ULL<<(63-61))
+#define H_DABRX_KERNEL     (1ULL<<(63-62))
+#define H_DABRX_USER       (1ULL<<(63-63))
+
+/* Values for KVM_PPC_GET_CPU_CHAR & H_GET_CPU_CHARACTERISTICS */
+#define H_CPU_CHAR_SPEC_BAR_ORI31               PPC_BIT(0)
+#define H_CPU_CHAR_BCCTRL_SERIALISED            PPC_BIT(1)
+#define H_CPU_CHAR_L1D_FLUSH_ORI30              PPC_BIT(2)
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2              PPC_BIT(3)
+#define H_CPU_CHAR_L1D_THREAD_PRIV              PPC_BIT(4)
+#define H_CPU_CHAR_HON_BRANCH_HINTS             PPC_BIT(5)
+#define H_CPU_CHAR_THR_RECONF_TRIG              PPC_BIT(6)
+#define H_CPU_CHAR_CACHE_COUNT_DIS              PPC_BIT(7)
+#define H_CPU_CHAR_BCCTR_FLUSH_ASSIST           PPC_BIT(9)
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY             PPC_BIT(0)
+#define H_CPU_BEHAV_L1D_FLUSH_PR                PPC_BIT(1)
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR           PPC_BIT(2)
+#define H_CPU_BEHAV_FLUSH_COUNT_CACHE           PPC_BIT(5)
+#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY          PPC_BIT(7)
+#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS        PPC_BIT(8)
+
+/* Each control block has to be on a 4K boundary */
+#define H_CB_ALIGNMENT     4096
+
+/* pSeries hypervisor opcodes */
+#define H_REMOVE                0x04
+#define H_ENTER                 0x08
+#define H_READ                  0x0c
+#define H_CLEAR_MOD             0x10
+#define H_CLEAR_REF             0x14
+#define H_PROTECT               0x18
+#define H_GET_TCE               0x1c
+#define H_PUT_TCE               0x20
+#define H_SET_SPRG0             0x24
+#define H_SET_DABR              0x28
+#define H_PAGE_INIT             0x2c
+#define H_SET_ASR               0x30
+#define H_ASR_ON                0x34
+#define H_ASR_OFF               0x38
+#define H_LOGICAL_CI_LOAD       0x3c
+#define H_LOGICAL_CI_STORE      0x40
+#define H_LOGICAL_CACHE_LOAD    0x44
+#define H_LOGICAL_CACHE_STORE   0x48
+#define H_LOGICAL_ICBI          0x4c
+#define H_LOGICAL_DCBF          0x50
+#define H_GET_TERM_CHAR         0x54
+#define H_PUT_TERM_CHAR         0x58
+#define H_REAL_TO_LOGICAL       0x5c
+#define H_HYPERVISOR_DATA       0x60
+#define H_EOI                   0x64
+#define H_CPPR                  0x68
+#define H_IPI                   0x6c
+#define H_IPOLL                 0x70
+#define H_XIRR                  0x74
+#define H_PERFMON               0x7c
+#define H_MIGRATE_DMA           0x78
+#define H_REGISTER_VPA          0xDC
+#define H_CEDE                  0xE0
+#define H_CONFER                0xE4
+#define H_PROD                  0xE8
+#define H_GET_PPP               0xEC
+#define H_SET_PPP               0xF0
+#define H_PURR                  0xF4
+#define H_PIC                   0xF8
+#define H_REG_CRQ               0xFC
+#define H_FREE_CRQ              0x100
+#define H_VIO_SIGNAL            0x104
+#define H_SEND_CRQ              0x108
+#define H_COPY_RDMA             0x110
+#define H_REGISTER_LOGICAL_LAN  0x114
+#define H_FREE_LOGICAL_LAN      0x118
+#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
+#define H_SEND_LOGICAL_LAN      0x120
+#define H_BULK_REMOVE           0x124
+#define H_MULTICAST_CTRL        0x130
+#define H_SET_XDABR             0x134
+#define H_STUFF_TCE             0x138
+#define H_PUT_TCE_INDIRECT      0x13C
+#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
+#define H_VTERM_PARTNER_INFO    0x150
+#define H_REGISTER_VTERM        0x154
+#define H_FREE_VTERM            0x158
+#define H_RESET_EVENTS          0x15C
+#define H_ALLOC_RESOURCE        0x160
+#define H_FREE_RESOURCE         0x164
+#define H_MODIFY_QP             0x168
+#define H_QUERY_QP              0x16C
+#define H_REREGISTER_PMR        0x170
+#define H_REGISTER_SMR          0x174
+#define H_QUERY_MR              0x178
+#define H_QUERY_MW              0x17C
+#define H_QUERY_HCA             0x180
+#define H_QUERY_PORT            0x184
+#define H_MODIFY_PORT           0x188
+#define H_DEFINE_AQP1           0x18C
+#define H_GET_TRACE_BUFFER      0x190
+#define H_DEFINE_AQP0           0x194
+#define H_RESIZE_MR             0x198
+#define H_ATTACH_MCQP           0x19C
+#define H_DETACH_MCQP           0x1A0
+#define H_CREATE_RPT            0x1A4
+#define H_REMOVE_RPT            0x1A8
+#define H_REGISTER_RPAGES       0x1AC
+#define H_DISABLE_AND_GETC      0x1B0
+#define H_ERROR_DATA            0x1B4
+#define H_GET_HCA_INFO          0x1B8
+#define H_GET_PERF_COUNT        0x1BC
+#define H_MANAGE_TRACE          0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
+#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
+#define H_QUERY_INT_STATE       0x1E4
+#define H_POLL_PENDING          0x1D8
+#define H_ILLAN_ATTRIBUTES      0x244
+#define H_MODIFY_HEA_QP         0x250
+#define H_QUERY_HEA_QP          0x254
+#define H_QUERY_HEA             0x258
+#define H_QUERY_HEA_PORT        0x25C
+#define H_MODIFY_HEA_PORT       0x260
+#define H_REG_BCMC              0x264
+#define H_DEREG_BCMC            0x268
+#define H_REGISTER_HEA_RPAGES   0x26C
+#define H_DISABLE_AND_GET_HEA   0x270
+#define H_GET_HEA_INFO          0x274
+#define H_ALLOC_HEA_RESOURCE    0x278
+#define H_ADD_CONN              0x284
+#define H_DEL_CONN              0x288
+#define H_JOIN                  0x298
+#define H_VASI_STATE            0x2A4
+#define H_ENABLE_CRQ            0x2B0
+#define H_GET_EM_PARMS          0x2B8
+#define H_SET_MPP               0x2D0
+#define H_GET_MPP               0x2D4
+#define H_HOME_NODE_ASSOCIATIVITY 0x2EC
+#define H_XIRR_X                0x2FC
+#define H_RANDOM                0x300
+#define H_SET_MODE              0x31C
+#define H_RESIZE_HPT_PREPARE    0x36C
+#define H_RESIZE_HPT_COMMIT     0x370
+#define H_CLEAN_SLB             0x374
+#define H_INVALIDATE_PID        0x378
+#define H_REGISTER_PROC_TBL     0x37C
+#define H_SIGNAL_SYS_RESET      0x380
+
+#define H_INT_GET_SOURCE_INFO   0x3A8
+#define H_INT_SET_SOURCE_CONFIG 0x3AC
+#define H_INT_GET_SOURCE_CONFIG 0x3B0
+#define H_INT_GET_QUEUE_INFO    0x3B4
+#define H_INT_SET_QUEUE_CONFIG  0x3B8
+#define H_INT_GET_QUEUE_CONFIG  0x3BC
+#define H_INT_SET_OS_REPORTING_LINE 0x3C0
+#define H_INT_GET_OS_REPORTING_LINE 0x3C4
+#define H_INT_ESB               0x3C8
+#define H_INT_SYNC              0x3CC
+#define H_INT_RESET             0x3D0
+#define H_SCM_READ_METADATA     0x3E4
+#define H_SCM_WRITE_METADATA    0x3E8
+#define H_SCM_BIND_MEM          0x3EC
+#define H_SCM_UNBIND_MEM        0x3F0
+#define H_SCM_UNBIND_ALL        0x3FC
+#define H_SCM_HEALTH            0x400
+#define H_RPT_INVALIDATE        0x448
+#define H_SCM_FLUSH             0x44C
+#define H_WATCHDOG              0x45C
+
+#define MAX_HCALL_OPCODE        H_WATCHDOG
+
+/* The hcalls above are standardized in PAPR and implemented by pHyp
+ * as well.
+ *
+ * We also need some hcalls which are specific to qemu / KVM-on-POWER.
+ * We put those into the 0xf000-0xfffc range which is reserved by PAPR
+ * for "platform-specific" hcalls.
+ */
+#define KVMPPC_HCALL_BASE       0xf000
+#define KVMPPC_H_RTAS           (KVMPPC_HCALL_BASE + 0x0)
+#define KVMPPC_H_LOGICAL_MEMOP  (KVMPPC_HCALL_BASE + 0x1)
+/* Client Architecture support */
+#define KVMPPC_H_CAS            (KVMPPC_HCALL_BASE + 0x2)
+#define KVMPPC_H_UPDATE_DT      (KVMPPC_HCALL_BASE + 0x3)
+/* 0x4 was used for KVMPPC_H_UPDATE_PHANDLE in SLOF */
+#define KVMPPC_H_VOF_CLIENT     (KVMPPC_HCALL_BASE + 0x5)
+
+/* Platform-specific hcalls used for nested HV KVM */
+#define KVMPPC_H_SET_PARTITION_TABLE   (KVMPPC_HCALL_BASE + 0x800)
+#define KVMPPC_H_ENTER_NESTED          (KVMPPC_HCALL_BASE + 0x804)
+#define KVMPPC_H_TLB_INVALIDATE        (KVMPPC_HCALL_BASE + 0x808)
+#define KVMPPC_H_COPY_TOFROM_GUEST     (KVMPPC_HCALL_BASE + 0x80C)
+
+#define KVMPPC_HCALL_MAX        KVMPPC_H_COPY_TOFROM_GUEST
+
+/*
+ * The hcall range 0xEF00 to 0xEF80 is reserved for use in facilitating
+ * Secure VM mode via an Ultravisor / Protected Execution Facility
+ */
+#define SVM_HCALL_BASE              0xEF00
+#define SVM_H_TPM_COMM              0xEF10
+#define SVM_HCALL_MAX               SVM_H_TPM_COMM
+
+typedef struct SpaprDeviceTreeUpdateHeader {
+    uint32_t version_id;
+} SpaprDeviceTreeUpdateHeader;
+
+#define hcall_dprintf(fmt, ...) \
+    do { \
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: " fmt, __func__, ## __VA_ARGS__); \
+    } while (0)
+
+typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, SpaprMachineState *sm,
+                                       target_ulong opcode,
+                                       target_ulong *args);
+
+void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn);
+target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
+                             target_ulong *args);
+
+target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                         target_ulong shift);
+target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                        target_ulong flags, target_ulong shift);
+bool is_ram_address(SpaprMachineState *spapr, hwaddr addr);
+void push_sregs_to_kvm_pr(SpaprMachineState *spapr);
+
+/* Virtual Processor Area structure constants */
+#define VPA_MIN_SIZE           640
+#define VPA_SIZE_OFFSET        0x4
+#define VPA_SHARED_PROC_OFFSET 0x9
+#define VPA_SHARED_PROC_VAL    0x2
+#define VPA_DISPATCH_COUNTER   0x100
+
+/* ibm,set-eeh-option */
+#define RTAS_EEH_DISABLE                 0
+#define RTAS_EEH_ENABLE                  1
+#define RTAS_EEH_THAW_IO                 2
+#define RTAS_EEH_THAW_DMA                3
+
+/* ibm,get-config-addr-info2 */
+#define RTAS_GET_PE_ADDR                 0
+#define RTAS_GET_PE_MODE                 1
+#define RTAS_PE_MODE_NONE                0
+#define RTAS_PE_MODE_NOT_SHARED          1
+#define RTAS_PE_MODE_SHARED              2
+
+/* ibm,read-slot-reset-state2 */
+#define RTAS_EEH_PE_STATE_NORMAL         0
+#define RTAS_EEH_PE_STATE_RESET          1
+#define RTAS_EEH_PE_STATE_STOPPED_IO_DMA 2
+#define RTAS_EEH_PE_STATE_STOPPED_DMA    4
+#define RTAS_EEH_PE_STATE_UNAVAIL        5
+#define RTAS_EEH_NOT_SUPPORT             0
+#define RTAS_EEH_SUPPORT                 1
+#define RTAS_EEH_PE_UNAVAIL_INFO         1000
+#define RTAS_EEH_PE_RECOVER_INFO         0
+
+/* ibm,set-slot-reset */
+#define RTAS_SLOT_RESET_DEACTIVATE       0
+#define RTAS_SLOT_RESET_HOT              1
+#define RTAS_SLOT_RESET_FUNDAMENTAL      3
+
+/* ibm,slot-error-detail */
+#define RTAS_SLOT_TEMP_ERR_LOG           1
+#define RTAS_SLOT_PERM_ERR_LOG           2
+
+/* RTAS return codes */
+#define RTAS_OUT_SUCCESS                        0
+#define RTAS_OUT_NO_ERRORS_FOUND                1
+#define RTAS_OUT_HW_ERROR                       -1
+#define RTAS_OUT_BUSY                           -2
+#define RTAS_OUT_PARAM_ERROR                    -3
+#define RTAS_OUT_NOT_SUPPORTED                  -3
+#define RTAS_OUT_NO_SUCH_INDICATOR              -3
+#define RTAS_OUT_NOT_AUTHORIZED                 -9002
+#define RTAS_OUT_SYSPARM_PARAM_ERROR            -9999
+
+/* DDW pagesize mask values from ibm,query-pe-dma-window */
+#define RTAS_DDW_PGSIZE_4K       0x01
+#define RTAS_DDW_PGSIZE_64K      0x02
+#define RTAS_DDW_PGSIZE_16M      0x04
+#define RTAS_DDW_PGSIZE_32M      0x08
+#define RTAS_DDW_PGSIZE_64M      0x10
+#define RTAS_DDW_PGSIZE_128M     0x20
+#define RTAS_DDW_PGSIZE_256M     0x40
+#define RTAS_DDW_PGSIZE_16G      0x80
+#define RTAS_DDW_PGSIZE_2M       0x100
+
+/* RTAS tokens */
+#define RTAS_TOKEN_BASE      0x2000
+
+#define RTAS_DISPLAY_CHARACTER                  (RTAS_TOKEN_BASE + 0x00)
+#define RTAS_GET_TIME_OF_DAY                    (RTAS_TOKEN_BASE + 0x01)
+#define RTAS_SET_TIME_OF_DAY                    (RTAS_TOKEN_BASE + 0x02)
+#define RTAS_POWER_OFF                          (RTAS_TOKEN_BASE + 0x03)
+#define RTAS_SYSTEM_REBOOT                      (RTAS_TOKEN_BASE + 0x04)
+#define RTAS_QUERY_CPU_STOPPED_STATE            (RTAS_TOKEN_BASE + 0x05)
+#define RTAS_START_CPU                          (RTAS_TOKEN_BASE + 0x06)
+#define RTAS_STOP_SELF                          (RTAS_TOKEN_BASE + 0x07)
+#define RTAS_IBM_GET_SYSTEM_PARAMETER           (RTAS_TOKEN_BASE + 0x08)
+#define RTAS_IBM_SET_SYSTEM_PARAMETER           (RTAS_TOKEN_BASE + 0x09)
+#define RTAS_IBM_SET_XIVE                       (RTAS_TOKEN_BASE + 0x0A)
+#define RTAS_IBM_GET_XIVE                       (RTAS_TOKEN_BASE + 0x0B)
+#define RTAS_IBM_INT_OFF                        (RTAS_TOKEN_BASE + 0x0C)
+#define RTAS_IBM_INT_ON                         (RTAS_TOKEN_BASE + 0x0D)
+#define RTAS_CHECK_EXCEPTION                    (RTAS_TOKEN_BASE + 0x0E)
+#define RTAS_EVENT_SCAN                         (RTAS_TOKEN_BASE + 0x0F)
+#define RTAS_IBM_SET_TCE_BYPASS                 (RTAS_TOKEN_BASE + 0x10)
+#define RTAS_QUIESCE                            (RTAS_TOKEN_BASE + 0x11)
+#define RTAS_NVRAM_FETCH                        (RTAS_TOKEN_BASE + 0x12)
+#define RTAS_NVRAM_STORE                        (RTAS_TOKEN_BASE + 0x13)
+#define RTAS_READ_PCI_CONFIG                    (RTAS_TOKEN_BASE + 0x14)
+#define RTAS_WRITE_PCI_CONFIG                   (RTAS_TOKEN_BASE + 0x15)
+#define RTAS_IBM_READ_PCI_CONFIG                (RTAS_TOKEN_BASE + 0x16)
+#define RTAS_IBM_WRITE_PCI_CONFIG               (RTAS_TOKEN_BASE + 0x17)
+#define RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER  (RTAS_TOKEN_BASE + 0x18)
+#define RTAS_IBM_CHANGE_MSI                     (RTAS_TOKEN_BASE + 0x19)
+#define RTAS_SET_INDICATOR                      (RTAS_TOKEN_BASE + 0x1A)
+#define RTAS_SET_POWER_LEVEL                    (RTAS_TOKEN_BASE + 0x1B)
+#define RTAS_GET_POWER_LEVEL                    (RTAS_TOKEN_BASE + 0x1C)
+#define RTAS_GET_SENSOR_STATE                   (RTAS_TOKEN_BASE + 0x1D)
+#define RTAS_IBM_CONFIGURE_CONNECTOR            (RTAS_TOKEN_BASE + 0x1E)
+#define RTAS_IBM_OS_TERM                        (RTAS_TOKEN_BASE + 0x1F)
+#define RTAS_IBM_SET_EEH_OPTION                 (RTAS_TOKEN_BASE + 0x20)
+#define RTAS_IBM_GET_CONFIG_ADDR_INFO2          (RTAS_TOKEN_BASE + 0x21)
+#define RTAS_IBM_READ_SLOT_RESET_STATE2         (RTAS_TOKEN_BASE + 0x22)
+#define RTAS_IBM_SET_SLOT_RESET                 (RTAS_TOKEN_BASE + 0x23)
+#define RTAS_IBM_CONFIGURE_PE                   (RTAS_TOKEN_BASE + 0x24)
+#define RTAS_IBM_SLOT_ERROR_DETAIL              (RTAS_TOKEN_BASE + 0x25)
+#define RTAS_IBM_QUERY_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x26)
+#define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
+#define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
+#define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
+#define RTAS_IBM_SUSPEND_ME                     (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2B)
+#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2C)
+
+#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2D)
+
+/* RTAS ibm,get-system-parameter token values */
+#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
+#define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE        42
+#define RTAS_SYSPARM_UUID                        48
+
+/* RTAS indicator/sensor types
+ *
+ * as defined by PAPR+ 2.7 7.3.5.4, Table 41
+ *
+ * NOTE: currently only DR-related sensors are implemented here
+ */
+#define RTAS_SENSOR_TYPE_ISOLATION_STATE        9001
+#define RTAS_SENSOR_TYPE_DR                     9002
+#define RTAS_SENSOR_TYPE_ALLOCATION_STATE       9003
+#define RTAS_SENSOR_TYPE_ENTITY_SENSE RTAS_SENSOR_TYPE_ALLOCATION_STATE
+
+/* Possible values for the platform-processor-diagnostics-run-mode parameter
+ * of the RTAS ibm,get-system-parameter call.
+ */
+#define DIAGNOSTICS_RUN_MODE_DISABLED  0
+#define DIAGNOSTICS_RUN_MODE_STAGGERED 1
+#define DIAGNOSTICS_RUN_MODE_IMMEDIATE 2
+#define DIAGNOSTICS_RUN_MODE_PERIODIC  3
+
+static inline uint64_t ppc64_phys_to_real(uint64_t addr)
+{
+    return addr & ~0xF000000000000000ULL;
+}
+
+static inline uint32_t rtas_ld(target_ulong phys, int n)
+{
+    return ldl_be_phys(&address_space_memory,
+                       ppc64_phys_to_real(phys + 4 * n));
+}
+
+static inline uint64_t rtas_ldq(target_ulong phys, int n)
+{
+    return (uint64_t)rtas_ld(phys, n) << 32 | rtas_ld(phys, n + 1);
+}
+
+static inline void rtas_st(target_ulong phys, int n, uint32_t val)
+{
+    stl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4 * n), val);
+}
+
+typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, SpaprMachineState *sm,
+                              uint32_t token,
+                              uint32_t nargs, target_ulong args,
+                              uint32_t nret, target_ulong rets);
+void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn);
+target_ulong spapr_rtas_call(PowerPCCPU *cpu, SpaprMachineState *sm,
+                             uint32_t token, uint32_t nargs, target_ulong args,
+                             uint32_t nret, target_ulong rets);
+void spapr_dt_rtas_tokens(void *fdt, int rtas);
+void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr);
+
+#define SPAPR_TCE_PAGE_SHIFT   12
+#define SPAPR_TCE_PAGE_SIZE    (1ULL << SPAPR_TCE_PAGE_SHIFT)
+#define SPAPR_TCE_PAGE_MASK    (SPAPR_TCE_PAGE_SIZE - 1)
+
+#define SPAPR_VIO_BASE_LIOBN    0x00000000
+#define SPAPR_VIO_LIOBN(reg)    (0x00000000 | (reg))
+#define SPAPR_PCI_LIOBN(phb_index, window_num) \
+    (0x80000000 | ((phb_index) << 8) | (window_num))
+#define SPAPR_IS_PCI_LIOBN(liobn)   (!!((liobn) & 0x80000000))
+#define SPAPR_PCI_DMA_WINDOW_NUM(liobn) ((liobn) & 0xff)
+
+#define RTAS_MIN_SIZE           20 /* hv_rtas_size in SLOF */
+#define RTAS_ERROR_LOG_MAX      2048
+
+/* Offset from rtas-base where error log is placed */
+#define RTAS_ERROR_LOG_OFFSET       0x30
+
+#define RTAS_EVENT_SCAN_RATE    1
+
+/* This helper should be used to encode interrupt specifiers when the related
+ * "interrupt-controller" node has its "#interrupt-cells" property set to 2 (ie,
+ * VIO devices, RTAS event sources and PHBs).
+ */
+static inline void spapr_dt_irq(uint32_t *intspec, int irq, bool is_lsi)
+{
+    intspec[0] = cpu_to_be32(irq);
+    intspec[1] = is_lsi ? cpu_to_be32(1) : 0;
+}
+
+
+#define TYPE_SPAPR_TCE_TABLE "spapr-tce-table"
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprTceTable, SPAPR_TCE_TABLE)
+
+#define TYPE_SPAPR_IOMMU_MEMORY_REGION "spapr-iommu-memory-region"
+DECLARE_INSTANCE_CHECKER(IOMMUMemoryRegion, SPAPR_IOMMU_MEMORY_REGION,
+                         TYPE_SPAPR_IOMMU_MEMORY_REGION)
+
+struct SpaprTceTable {
+    DeviceState parent;
+    uint32_t liobn;
+    uint32_t nb_table;
+    uint64_t bus_offset;
+    uint32_t page_shift;
+    uint64_t *table;
+    uint32_t mig_nb_table;
+    uint64_t *mig_table;
+    bool bypass;
+    bool need_vfio;
+    bool skipping_replay;
+    bool def_win;
+    int fd;
+    MemoryRegion root;
+    IOMMUMemoryRegion iommu;
+    struct SpaprVioDevice *vdev; /* for @bypass migration compatibility only */
+    QLIST_ENTRY(SpaprTceTable) list;
+};
+
+SpaprTceTable *spapr_tce_find_by_liobn(target_ulong liobn);
+
+struct SpaprEventLogEntry {
+    uint32_t summary;
+    uint32_t extended_length;
+    void *extended_log;
+    QTAILQ_ENTRY(SpaprEventLogEntry) next;
+};
+
+void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space);
+void spapr_events_init(SpaprMachineState *sm);
+void spapr_dt_events(SpaprMachineState *sm, void *fdt);
+void close_htab_fd(SpaprMachineState *spapr);
+void spapr_setup_hpt(SpaprMachineState *spapr);
+void spapr_free_hpt(SpaprMachineState *spapr);
+void spapr_check_mmu_mode(bool guest_radix);
+SpaprTceTable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn);
+void spapr_tce_table_enable(SpaprTceTable *tcet,
+                            uint32_t page_shift, uint64_t bus_offset,
+                            uint32_t nb_table);
+void spapr_tce_table_disable(SpaprTceTable *tcet);
+void spapr_tce_set_need_vfio(SpaprTceTable *tcet, bool need_vfio);
+
+MemoryRegion *spapr_tce_get_iommu(SpaprTceTable *tcet);
+int spapr_dma_dt(void *fdt, int node_off, const char *propname,
+                 uint32_t liobn, uint64_t window, uint32_t size);
+int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
+                      SpaprTceTable *tcet);
+void spapr_pci_switch_vga(SpaprMachineState *spapr, bool big_endian);
+void spapr_hotplug_req_add_by_index(SpaprDrc *drc);
+void spapr_hotplug_req_remove_by_index(SpaprDrc *drc);
+void spapr_hotplug_req_add_by_count(SpaprDrcType drc_type,
+                                       uint32_t count);
+void spapr_hotplug_req_remove_by_count(SpaprDrcType drc_type,
+                                          uint32_t count);
+void spapr_hotplug_req_add_by_count_indexed(SpaprDrcType drc_type,
+                                            uint32_t count, uint32_t index);
+void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
+                                               uint32_t count, uint32_t index);
+int spapr_hpt_shift_for_ramsize(uint64_t ramsize);
+int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp);
+void spapr_clear_pending_events(SpaprMachineState *spapr);
+void spapr_clear_pending_hotplug_events(SpaprMachineState *spapr);
+void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev);
+int spapr_max_server_number(SpaprMachineState *spapr);
+void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+                      uint64_t pte0, uint64_t pte1);
+void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
+
+/* DRC callbacks. */
+void spapr_core_release(DeviceState *dev);
+int spapr_core_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+                           void *fdt, int *fdt_start_offset, Error **errp);
+void spapr_lmb_release(DeviceState *dev);
+int spapr_lmb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+                          void *fdt, int *fdt_start_offset, Error **errp);
+void spapr_phb_release(DeviceState *dev);
+int spapr_phb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+                          void *fdt, int *fdt_start_offset, Error **errp);
+
+void spapr_rtc_read(SpaprRtcState *rtc, struct tm *tm, uint32_t *ns);
+int spapr_rtc_import_offset(SpaprRtcState *rtc, int64_t legacy_offset);
+
+#define TYPE_SPAPR_RNG "spapr-rng"
+
+#define SPAPR_MEMORY_BLOCK_SIZE ((hwaddr)1 << 28) /* 256MB */
+
+/*
+ * This defines the maximum number of DIMM slots we can have for sPAPR
+ * guest. This is not defined by sPAPR but we are defining it to 32 slots
+ * based on default number of slots provided by PowerPC kernel.
+ */
+#define SPAPR_MAX_RAM_SLOTS     32
+
+/* 1GB alignment for hotplug memory region */
+#define SPAPR_DEVICE_MEM_ALIGN (1 * GiB)
+
+/*
+ * Number of 32 bit words in each LMB list entry in ibm,dynamic-memory
+ * property under ibm,dynamic-reconfiguration-memory node.
+ */
+#define SPAPR_DR_LMB_LIST_ENTRY_SIZE 6
+
+/*
+ * Defines for flag value in ibm,dynamic-memory property under
+ * ibm,dynamic-reconfiguration-memory node.
+ */
+#define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008
+#define SPAPR_LMB_FLAGS_DRC_INVALID 0x00000020
+#define SPAPR_LMB_FLAGS_RESERVED 0x00000080
+#define SPAPR_LMB_FLAGS_HOTREMOVABLE 0x00000100
+
+void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg);
+
+#define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
+
+int spapr_get_vcpu_id(PowerPCCPU *cpu);
+bool spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp);
+PowerPCCPU *spapr_find_cpu(int vcpu_id);
+
+int spapr_caps_pre_load(void *opaque);
+int spapr_caps_pre_save(void *opaque);
+
+/*
+ * Handling of optional capabilities
+ */
+extern const VMStateDescription vmstate_spapr_cap_htm;
+extern const VMStateDescription vmstate_spapr_cap_vsx;
+extern const VMStateDescription vmstate_spapr_cap_dfp;
+extern const VMStateDescription vmstate_spapr_cap_cfpc;
+extern const VMStateDescription vmstate_spapr_cap_sbbc;
+extern const VMStateDescription vmstate_spapr_cap_ibs;
+extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
+extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
+extern const VMStateDescription vmstate_spapr_cap_large_decr;
+extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
+extern const VMStateDescription vmstate_spapr_cap_fwnmi;
+extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
+extern const VMStateDescription vmstate_spapr_wdt;
+
+static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
+{
+    return spapr->eff.caps[cap];
+}
+
+void spapr_caps_init(SpaprMachineState *spapr);
+void spapr_caps_apply(SpaprMachineState *spapr);
+void spapr_caps_cpu_apply(SpaprMachineState *spapr, PowerPCCPU *cpu);
+void spapr_caps_add_properties(SpaprMachineClass *smc);
+int spapr_caps_post_migration(SpaprMachineState *spapr);
+
+bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
+                          Error **errp);
+/*
+ * XIVE definitions
+ */
+#define SPAPR_OV5_XIVE_LEGACY   0x0
+#define SPAPR_OV5_XIVE_EXPLOIT  0x40
+#define SPAPR_OV5_XIVE_BOTH     0x80 /* Only to advertise on the platform */
+
+void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
+void spapr_init_all_lpcrs(target_ulong value, target_ulong mask);
+hwaddr spapr_get_rtas_addr(void);
+bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr);
+
+void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp);
+void spapr_vof_quiesce(MachineState *ms);
+bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname,
+                       void *val, int vallen);
+target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                target_ulong opcode, target_ulong *args);
+target_ulong spapr_vof_client_architecture_support(MachineState *ms,
+                                                   CPUState *cs,
+                                                   target_ulong ovec_addr);
+void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt);
+
+/* H_WATCHDOG */
+void spapr_watchdog_init(SpaprMachineState *spapr);
+
+#endif /* HW_SPAPR_H */
diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h
new file mode 100644 (file)
index 0000000..69a52e3
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * sPAPR CPU core device.
+ *
+ * Copyright (C) 2016 Bharata B Rao <bharata@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef HW_SPAPR_CPU_CORE_H
+#define HW_SPAPR_CPU_CORE_H
+
+#include "hw/cpu/core.h"
+#include "hw/qdev-core.h"
+#include "target/ppc/cpu-qom.h"
+#include "target/ppc/cpu.h"
+#include "qom/object.h"
+
+#define TYPE_SPAPR_CPU_CORE "spapr-cpu-core"
+OBJECT_DECLARE_TYPE(SpaprCpuCore, SpaprCpuCoreClass,
+                    SPAPR_CPU_CORE)
+
+#define SPAPR_CPU_CORE_TYPE_NAME(model) model "-" TYPE_SPAPR_CPU_CORE
+
+struct SpaprCpuCore {
+    /*< private >*/
+    CPUCore parent_obj;
+
+    /*< public >*/
+    PowerPCCPU **threads;
+    int node_id;
+    bool pre_3_0_migration; /* older machine don't know about SpaprCpuState */
+};
+
+struct SpaprCpuCoreClass {
+    DeviceClass parent_class;
+    const char *cpu_type;
+};
+
+const char *spapr_get_cpu_core_type(const char *cpu_type);
+void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,
+                               target_ulong r1, target_ulong r3,
+                               target_ulong r4);
+
+struct nested_ppc_state;
+
+typedef struct SpaprCpuState {
+    uint64_t vpa_addr;
+    uint64_t slb_shadow_addr, slb_shadow_size;
+    uint64_t dtl_addr, dtl_size;
+    bool prod; /* not migrated, only used to improve dispatch latencies */
+    struct ICPState *icp;
+    struct XiveTCTX *tctx;
+
+    /* Fields for nested-HV support */
+    bool in_nested; /* true while the L2 is executing */
+    struct nested_ppc_state *nested_host_state; /* holds the L1 state while L2 executes */
+} SpaprCpuState;
+
+static inline SpaprCpuState *spapr_cpu_state(PowerPCCPU *cpu)
+{
+    return (SpaprCpuState *)cpu->machine_data;
+}
+
+#endif
diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
new file mode 100644 (file)
index 0000000..02a63b3
--- /dev/null
@@ -0,0 +1,259 @@
+/*
+ * QEMU SPAPR Dynamic Reconfiguration Connector Implementation
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Authors:
+ *  Michael Roth      <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_SPAPR_DRC_H
+#define HW_SPAPR_DRC_H
+
+#include <libfdt.h>
+#include "qom/object.h"
+#include "sysemu/runstate.h"
+#include "hw/qdev-core.h"
+#include "qapi/error.h"
+
+#define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector"
+#define SPAPR_DR_CONNECTOR_GET_CLASS(obj) \
+        OBJECT_GET_CLASS(SpaprDrcClass, obj, TYPE_SPAPR_DR_CONNECTOR)
+#define SPAPR_DR_CONNECTOR_CLASS(klass) \
+        OBJECT_CLASS_CHECK(SpaprDrcClass, klass, \
+                           TYPE_SPAPR_DR_CONNECTOR)
+#define SPAPR_DR_CONNECTOR(obj) OBJECT_CHECK(SpaprDrc, (obj), \
+                                             TYPE_SPAPR_DR_CONNECTOR)
+
+#define TYPE_SPAPR_DRC_PHYSICAL "spapr-drc-physical"
+#define SPAPR_DRC_PHYSICAL(obj) OBJECT_CHECK(SpaprDrcPhysical, (obj), \
+                                             TYPE_SPAPR_DRC_PHYSICAL)
+
+#define TYPE_SPAPR_DRC_LOGICAL "spapr-drc-logical"
+
+#define TYPE_SPAPR_DRC_CPU "spapr-drc-cpu"
+
+#define TYPE_SPAPR_DRC_PCI "spapr-drc-pci"
+
+#define TYPE_SPAPR_DRC_LMB "spapr-drc-lmb"
+
+#define TYPE_SPAPR_DRC_PHB "spapr-drc-phb"
+
+#define TYPE_SPAPR_DRC_PMEM "spapr-drc-pmem"
+
+/*
+ * Various hotplug types managed by SpaprDrc
+ *
+ * these are somewhat arbitrary, but to make things easier
+ * when generating DRC indexes later we've aligned the bit
+ * positions with the values used to assign DRC indexes on
+ * pSeries. we use those values as bit shifts to allow for
+ * the OR'ing of these values in various QEMU routines, but
+ * for values exposed to the guest (via DRC indexes for
+ * instance) we will use the shift amounts.
+ */
+typedef enum {
+    SPAPR_DR_CONNECTOR_TYPE_SHIFT_CPU = 1,
+    SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB = 2,
+    SPAPR_DR_CONNECTOR_TYPE_SHIFT_VIO = 3,
+    SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI = 4,
+    SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB = 8,
+    SPAPR_DR_CONNECTOR_TYPE_SHIFT_PMEM = 9,
+} SpaprDrcTypeShift;
+
+typedef enum {
+    SPAPR_DR_CONNECTOR_TYPE_ANY = ~0,
+    SPAPR_DR_CONNECTOR_TYPE_CPU = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_CPU,
+    SPAPR_DR_CONNECTOR_TYPE_PHB = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB,
+    SPAPR_DR_CONNECTOR_TYPE_VIO = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_VIO,
+    SPAPR_DR_CONNECTOR_TYPE_PCI = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI,
+    SPAPR_DR_CONNECTOR_TYPE_LMB = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB,
+    SPAPR_DR_CONNECTOR_TYPE_PMEM = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PMEM,
+} SpaprDrcType;
+
+/*
+ * set via set-indicator RTAS calls
+ * as documented by PAPR+ 2.7 13.5.3.4, Table 177
+ *
+ * isolated: put device under firmware control
+ * unisolated: claim OS control of device (may or may not be in use)
+ */
+typedef enum {
+    SPAPR_DR_ISOLATION_STATE_ISOLATED   = 0,
+    SPAPR_DR_ISOLATION_STATE_UNISOLATED = 1
+} SpaprDRIsolationState;
+
+/*
+ * set via set-indicator RTAS calls
+ * as documented by PAPR+ 2.7 13.5.3.4, Table 177
+ *
+ * unusable: mark device as unavailable to OS
+ * usable: mark device as available to OS
+ * exchange: (currently unused)
+ * recover: (currently unused)
+ */
+typedef enum {
+    SPAPR_DR_ALLOCATION_STATE_UNUSABLE  = 0,
+    SPAPR_DR_ALLOCATION_STATE_USABLE    = 1,
+    SPAPR_DR_ALLOCATION_STATE_EXCHANGE  = 2,
+    SPAPR_DR_ALLOCATION_STATE_RECOVER   = 3
+} SpaprDRAllocationState;
+
+/*
+ * DR-indicator (LED/visual indicator)
+ *
+ * set via set-indicator RTAS calls
+ * as documented by PAPR+ 2.7 13.5.3.4, Table 177,
+ * and PAPR+ 2.7 13.5.4.1, Table 180
+ *
+ * inactive: hotpluggable entity inactive and safely removable
+ * active: hotpluggable entity in use and not safely removable
+ * identify: (currently unused)
+ * action: (currently unused)
+ */
+typedef enum {
+    SPAPR_DR_INDICATOR_INACTIVE   = 0,
+    SPAPR_DR_INDICATOR_ACTIVE     = 1,
+    SPAPR_DR_INDICATOR_IDENTIFY   = 2,
+    SPAPR_DR_INDICATOR_ACTION     = 3,
+} SpaprDRIndicatorState;
+
+/*
+ * returned via get-sensor-state RTAS calls
+ * as documented by PAPR+ 2.7 13.5.3.3, Table 175:
+ *
+ * empty: connector slot empty (e.g. empty hotpluggable PCI slot)
+ * present: connector slot populated and device available to OS
+ * unusable: device not currently available to OS
+ * exchange: (currently unused)
+ * recover: (currently unused)
+ */
+typedef enum {
+    SPAPR_DR_ENTITY_SENSE_EMPTY     = 0,
+    SPAPR_DR_ENTITY_SENSE_PRESENT   = 1,
+    SPAPR_DR_ENTITY_SENSE_UNUSABLE  = 2,
+    SPAPR_DR_ENTITY_SENSE_EXCHANGE  = 3,
+    SPAPR_DR_ENTITY_SENSE_RECOVER   = 4,
+} SpaprDREntitySense;
+
+typedef enum {
+    SPAPR_DR_CC_RESPONSE_NEXT_SIB         = 1, /* currently unused */
+    SPAPR_DR_CC_RESPONSE_NEXT_CHILD       = 2,
+    SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY    = 3,
+    SPAPR_DR_CC_RESPONSE_PREV_PARENT      = 4,
+    SPAPR_DR_CC_RESPONSE_SUCCESS          = 0,
+    SPAPR_DR_CC_RESPONSE_ERROR            = -1,
+    SPAPR_DR_CC_RESPONSE_CONTINUE         = -2,
+    SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE = -9003,
+} SpaprDRCCResponse;
+
+typedef enum {
+    /*
+     * Values come from Fig. 12 in LoPAPR section 13.4
+     *
+     * These are exposed in the migration stream, so don't change
+     * them.
+     */
+    SPAPR_DRC_STATE_INVALID             = 0,
+    SPAPR_DRC_STATE_LOGICAL_UNUSABLE    = 1,
+    SPAPR_DRC_STATE_LOGICAL_AVAILABLE   = 2,
+    SPAPR_DRC_STATE_LOGICAL_UNISOLATE   = 3,
+    SPAPR_DRC_STATE_LOGICAL_CONFIGURED  = 4,
+    SPAPR_DRC_STATE_PHYSICAL_AVAILABLE  = 5,
+    SPAPR_DRC_STATE_PHYSICAL_POWERON    = 6,
+    SPAPR_DRC_STATE_PHYSICAL_UNISOLATE  = 7,
+    SPAPR_DRC_STATE_PHYSICAL_CONFIGURED = 8,
+} SpaprDrcState;
+
+typedef struct SpaprDrc {
+    /*< private >*/
+    DeviceState parent;
+
+    uint32_t id;
+    Object *owner;
+
+    uint32_t state;
+
+    /* RTAS ibm,configure-connector state */
+    /* (only valid in UNISOLATE state) */
+    int ccs_offset;
+    int ccs_depth;
+
+    /* device pointer, via link property */
+    DeviceState *dev;
+    bool unplug_requested;
+    void *fdt;
+    int fdt_start_offset;
+} SpaprDrc;
+
+struct SpaprMachineState;
+
+typedef struct SpaprDrcClass {
+    /*< private >*/
+    DeviceClass parent;
+    SpaprDrcState empty_state;
+    SpaprDrcState ready_state;
+
+    /*< public >*/
+    SpaprDrcTypeShift typeshift;
+    const char *typename; /* used in device tree, PAPR 13.5.2.6 & C.6.1 */
+    const char *drc_name_prefix; /* used other places in device tree */
+
+    SpaprDREntitySense (*dr_entity_sense)(SpaprDrc *drc);
+    uint32_t (*isolate)(SpaprDrc *drc);
+    uint32_t (*unisolate)(SpaprDrc *drc);
+    void (*release)(DeviceState *dev);
+
+    int (*dt_populate)(SpaprDrc *drc, struct SpaprMachineState *spapr,
+                       void *fdt, int *fdt_start_offset, Error **errp);
+} SpaprDrcClass;
+
+typedef struct SpaprDrcPhysical {
+    /*< private >*/
+    SpaprDrc parent;
+
+    /* DR-indicator */
+    uint32_t dr_indicator;
+} SpaprDrcPhysical;
+
+static inline bool spapr_drc_hotplugged(DeviceState *dev)
+{
+    return dev->hotplugged && !runstate_check(RUN_STATE_INMIGRATE);
+}
+
+/* Returns true if an unplug request completed */
+bool spapr_drc_reset(SpaprDrc *drc);
+
+uint32_t spapr_drc_index(SpaprDrc *drc);
+SpaprDrcType spapr_drc_type(SpaprDrc *drc);
+
+SpaprDrc *spapr_dr_connector_new(Object *owner, const char *type,
+                                         uint32_t id);
+SpaprDrc *spapr_drc_by_index(uint32_t index);
+SpaprDrc *spapr_drc_by_id(const char *type, uint32_t id);
+int spapr_dt_drc(void *fdt, int offset, Object *owner, uint32_t drc_type_mask);
+
+/*
+ * These functions respectively abort if called with a device already
+ * attached or no device attached. In the case of spapr_drc_attach(),
+ * this means that the attachability of the DRC *must* be checked
+ * beforehand (eg. check drc->dev at pre-plug).
+ */
+void spapr_drc_attach(SpaprDrc *drc, DeviceState *d);
+void spapr_drc_unplug_request(SpaprDrc *drc);
+
+/*
+ * Reset all DRCs, causing pending hot-plug/unplug requests to complete.
+ * Safely handles potential DRC removal (eg. PHBs or PCI bridges).
+ */
+void spapr_drc_reset_all(struct SpaprMachineState *spapr);
+
+static inline bool spapr_drc_unplug_requested(SpaprDrc *drc)
+{
+    return drc->unplug_requested;
+}
+
+#endif /* HW_SPAPR_DRC_H */
diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
new file mode 100644 (file)
index 0000000..4fd2d58
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * QEMU PowerPC sPAPR IRQ backend definitions
+ *
+ * Copyright (c) 2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef HW_SPAPR_IRQ_H
+#define HW_SPAPR_IRQ_H
+
+#include "target/ppc/cpu-qom.h"
+#include "qom/object.h"
+
+/*
+ * The XIVE IRQ backend uses the same layout as the XICS backend but
+ * covers the full range of the IRQ number space. The IRQ numbers for
+ * the CPU IPIs are allocated at the bottom of this space, below 4K,
+ * to preserve compatibility with XICS which does not use that range.
+ */
+
+/*
+ * CPU IPI range (XIVE only)
+ */
+#define SPAPR_IRQ_IPI        0x0
+#define SPAPR_IRQ_NR_IPIS    0x1000
+
+/*
+ * IRQ range offsets per device type
+ */
+
+#define SPAPR_XIRQ_BASE      XICS_IRQ_BASE /* 0x1000 */
+#define SPAPR_IRQ_EPOW       (SPAPR_XIRQ_BASE + 0x0000)
+#define SPAPR_IRQ_HOTPLUG    (SPAPR_XIRQ_BASE + 0x0001)
+#define SPAPR_IRQ_VIO        (SPAPR_XIRQ_BASE + 0x0100)  /* 256 VIO devices */
+#define SPAPR_IRQ_PCI_LSI    (SPAPR_XIRQ_BASE + 0x0200)  /* 32+ PHBs devices */
+
+/* Offset of the dynamic range covered by the bitmap allocator */
+#define SPAPR_IRQ_MSI        (SPAPR_XIRQ_BASE + 0x0300)
+
+#define SPAPR_NR_XIRQS       0x1000
+
+struct SpaprMachineState;
+
+typedef struct SpaprInterruptController SpaprInterruptController;
+
+#define TYPE_SPAPR_INTC "spapr-interrupt-controller"
+#define SPAPR_INTC(obj)                                     \
+    INTERFACE_CHECK(SpaprInterruptController, (obj), TYPE_SPAPR_INTC)
+typedef struct SpaprInterruptControllerClass SpaprInterruptControllerClass;
+DECLARE_CLASS_CHECKERS(SpaprInterruptControllerClass, SPAPR_INTC,
+                       TYPE_SPAPR_INTC)
+
+struct SpaprInterruptControllerClass {
+    InterfaceClass parent;
+
+    int (*activate)(SpaprInterruptController *intc, uint32_t nr_servers,
+                    Error **errp);
+    void (*deactivate)(SpaprInterruptController *intc);
+
+    /*
+     * These methods will typically be called on all intcs, active and
+     * inactive
+     */
+    int (*cpu_intc_create)(SpaprInterruptController *intc,
+                            PowerPCCPU *cpu, Error **errp);
+    void (*cpu_intc_reset)(SpaprInterruptController *intc, PowerPCCPU *cpu);
+    void (*cpu_intc_destroy)(SpaprInterruptController *intc, PowerPCCPU *cpu);
+    int (*claim_irq)(SpaprInterruptController *intc, int irq, bool lsi,
+                     Error **errp);
+    void (*free_irq)(SpaprInterruptController *intc, int irq);
+
+    /* These methods should only be called on the active intc */
+    void (*set_irq)(SpaprInterruptController *intc, int irq, int val);
+    void (*print_info)(SpaprInterruptController *intc, Monitor *mon);
+    void (*dt)(SpaprInterruptController *intc, uint32_t nr_servers,
+               void *fdt, uint32_t phandle);
+    int (*post_load)(SpaprInterruptController *intc, int version_id);
+};
+
+void spapr_irq_update_active_intc(struct SpaprMachineState *spapr);
+
+int spapr_irq_cpu_intc_create(struct SpaprMachineState *spapr,
+                              PowerPCCPU *cpu, Error **errp);
+void spapr_irq_cpu_intc_reset(struct SpaprMachineState *spapr, PowerPCCPU *cpu);
+void spapr_irq_cpu_intc_destroy(struct SpaprMachineState *spapr, PowerPCCPU *cpu);
+void spapr_irq_print_info(struct SpaprMachineState *spapr, Monitor *mon);
+void spapr_irq_dt(struct SpaprMachineState *spapr, uint32_t nr_servers,
+                  void *fdt, uint32_t phandle);
+
+uint32_t spapr_irq_nr_msis(struct SpaprMachineState *spapr);
+int spapr_irq_msi_alloc(struct SpaprMachineState *spapr, uint32_t num, bool align,
+                        Error **errp);
+void spapr_irq_msi_free(struct SpaprMachineState *spapr, int irq, uint32_t num);
+
+typedef struct SpaprIrq {
+    bool        xics;
+    bool        xive;
+} SpaprIrq;
+
+extern SpaprIrq spapr_irq_xics;
+extern SpaprIrq spapr_irq_xics_legacy;
+extern SpaprIrq spapr_irq_xive;
+extern SpaprIrq spapr_irq_dual;
+
+void spapr_irq_init(struct SpaprMachineState *spapr, Error **errp);
+int spapr_irq_claim(struct SpaprMachineState *spapr, int irq, bool lsi, Error **errp);
+void spapr_irq_free(struct SpaprMachineState *spapr, int irq, int num);
+qemu_irq spapr_qirq(struct SpaprMachineState *spapr, int irq);
+int spapr_irq_post_load(struct SpaprMachineState *spapr, int version_id);
+void spapr_irq_reset(struct SpaprMachineState *spapr, Error **errp);
+int spapr_irq_get_phandle(struct SpaprMachineState *spapr, void *fdt, Error **errp);
+
+typedef int (*SpaprInterruptControllerInitKvm)(SpaprInterruptController *,
+                                               uint32_t, Error **);
+
+int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn,
+                       SpaprInterruptController *intc,
+                       uint32_t nr_servers,
+                       Error **errp);
+
+/*
+ * XICS legacy routines
+ */
+int spapr_irq_find(struct SpaprMachineState *spapr, int num, bool align, Error **errp);
+#define spapr_irq_findone(spapr, errp) spapr_irq_find(spapr, 1, false, errp)
+
+#endif
diff --git a/include/hw/ppc/spapr_nested.h b/include/hw/ppc/spapr_nested.h
new file mode 100644 (file)
index 0000000..d383486
--- /dev/null
@@ -0,0 +1,102 @@
+#ifndef HW_SPAPR_NESTED_H
+#define HW_SPAPR_NESTED_H
+
+#include "qemu/osdep.h"
+#include "target/ppc/cpu.h"
+
+/*
+ * Register state for entering a nested guest with H_ENTER_NESTED.
+ * New member must be added at the end.
+ */
+struct kvmppc_hv_guest_state {
+    uint64_t version;      /* version of this structure layout, must be first */
+    uint32_t lpid;
+    uint32_t vcpu_token;
+    /* These registers are hypervisor privileged (at least for writing) */
+    uint64_t lpcr;
+    uint64_t pcr;
+    uint64_t amor;
+    uint64_t dpdes;
+    uint64_t hfscr;
+    int64_t tb_offset;
+    uint64_t dawr0;
+    uint64_t dawrx0;
+    uint64_t ciabr;
+    uint64_t hdec_expiry;
+    uint64_t purr;
+    uint64_t spurr;
+    uint64_t ic;
+    uint64_t vtb;
+    uint64_t hdar;
+    uint64_t hdsisr;
+    uint64_t heir;
+    uint64_t asdr;
+    /* These are OS privileged but need to be set late in guest entry */
+    uint64_t srr0;
+    uint64_t srr1;
+    uint64_t sprg[4];
+    uint64_t pidr;
+    uint64_t cfar;
+    uint64_t ppr;
+    /* Version 1 ends here */
+    uint64_t dawr1;
+    uint64_t dawrx1;
+    /* Version 2 ends here */
+};
+
+/* Latest version of hv_guest_state structure */
+#define HV_GUEST_STATE_VERSION  2
+
+/* Linux 64-bit powerpc pt_regs struct, used by nested HV */
+struct kvmppc_pt_regs {
+    uint64_t gpr[32];
+    uint64_t nip;
+    uint64_t msr;
+    uint64_t orig_gpr3;    /* Used for restarting system calls */
+    uint64_t ctr;
+    uint64_t link;
+    uint64_t xer;
+    uint64_t ccr;
+    uint64_t softe;        /* Soft enabled/disabled */
+    uint64_t trap;         /* Reason for being here */
+    uint64_t dar;          /* Fault registers */
+    uint64_t dsisr;        /* on 4xx/Book-E used for ESR */
+    uint64_t result;       /* Result of a system call */
+};
+
+/*
+ * nested_ppc_state is used to save the host CPU state before switching it to
+ * the guest CPU state, to be restored on H_ENTER_NESTED exit.
+ */
+struct nested_ppc_state {
+    uint64_t gpr[32];
+    uint64_t lr;
+    uint64_t ctr;
+    uint64_t cfar;
+    uint64_t msr;
+    uint64_t nip;
+    uint32_t cr;
+
+    uint64_t xer;
+
+    uint64_t lpcr;
+    uint64_t lpidr;
+    uint64_t pidr;
+    uint64_t pcr;
+    uint64_t dpdes;
+    uint64_t hfscr;
+    uint64_t srr0;
+    uint64_t srr1;
+    uint64_t sprg0;
+    uint64_t sprg1;
+    uint64_t sprg2;
+    uint64_t sprg3;
+    uint64_t ppr;
+
+    int64_t tb_offset;
+};
+
+void spapr_register_nested(void);
+void spapr_exit_nested(PowerPCCPU *cpu, int excp);
+
+#endif /* HW_SPAPR_NESTED_H */
diff --git a/include/hw/ppc/spapr_numa.h b/include/hw/ppc/spapr_numa.h
new file mode 100644 (file)
index 0000000..7cb3367
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition NUMA associativity handling
+ *
+ * Copyright IBM Corp. 2020
+ *
+ * Authors:
+ *  Daniel Henrique Barboza      <danielhb413@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_SPAPR_NUMA_H
+#define HW_SPAPR_NUMA_H
+
+#include "hw/boards.h"
+#include "hw/ppc/spapr.h"
+
+/*
+ * Having both SpaprMachineState and MachineState as arguments
+ * feels odd, but it will spare a MACHINE() call inside the
+ * function. spapr_machine_init() is the only caller for it, and
+ * it has both pointers resolved already.
+ */
+void spapr_numa_associativity_init(SpaprMachineState *spapr,
+                                   MachineState *machine);
+void spapr_numa_associativity_check(SpaprMachineState *spapr);
+void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas);
+void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
+                                       int offset, int nodeid);
+int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt,
+                            int offset, PowerPCCPU *cpu);
+int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
+                                         int offset);
+unsigned int spapr_numa_initial_nvgpu_numa_id(MachineState *machine);
+
+#endif /* HW_SPAPR_NUMA_H */
diff --git a/include/hw/ppc/spapr_nvdimm.h b/include/hw/ppc/spapr_nvdimm.h
new file mode 100644 (file)
index 0000000..e9436cb
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * QEMU PowerPC PAPR SCM backend definitions
+ *
+ * Copyright (c) 2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef HW_SPAPR_NVDIMM_H
+#define HW_SPAPR_NVDIMM_H
+
+#include "hw/mem/nvdimm.h"
+
+typedef struct SpaprDrc SpaprDrc;
+typedef struct SpaprMachineState SpaprMachineState;
+
+int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+                           void *fdt, int *fdt_start_offset, Error **errp);
+void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt);
+bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
+                           uint64_t size, Error **errp);
+void spapr_add_nvdimm(DeviceState *dev, uint64_t slot);
+void spapr_nvdimm_finish_flushes(void);
+
+#endif
diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
new file mode 100644 (file)
index 0000000..c3e8b98
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * QEMU SPAPR Option/Architecture Vector Definitions
+ *
+ * Each architecture option is organized/documented by the following
+ * in LoPAPR 1.1, Table 244:
+ *
+ *   <vector number>: the bit-vector in which the option is located
+ *   <vector byte>: the byte offset of the vector entry
+ *   <vector bit>: the bit offset within the vector entry
+ *
+ * where each vector entry can be one or more bytes.
+ *
+ * Firmware expects a somewhat literal encoding of this bit-vector
+ * structure, where each entry is stored in little-endian so that the
+ * byte ordering reflects that of the documentation, but where each bit
+ * offset is from "left-to-right" in the traditional representation of
+ * a byte value where the MSB is the left-most bit. Thus, each
+ * individual byte encodes the option bits in reverse order of the
+ * documented bit.
+ *
+ * These definitions/helpers attempt to abstract away this internal
+ * representation so that we can define/set/test for individual option
+ * bits using only the documented values. This is done mainly by relying
+ * on a bitmap to approximate the documented "bit-vector" structure and
+ * handling conversations to-from the internal representation under the
+ * covers.
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Authors:
+ *  Michael Roth      <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef SPAPR_OVEC_H
+#define SPAPR_OVEC_H
+
+#include "cpu.h"
+
+typedef struct SpaprOptionVector SpaprOptionVector;
+
+#define OV_BIT(byte, bit) ((byte - 1) * BITS_PER_BYTE + bit)
+
+/* option vector 1 */
+#define OV1_PPC_3_00            OV_BIT(3, 0) /* guest supports PowerPC 3.00? */
+
+/* option vector 5 */
+#define OV5_DRCONF_MEMORY       OV_BIT(2, 2)
+#define OV5_FORM1_AFFINITY      OV_BIT(5, 0)
+#define OV5_FORM2_AFFINITY      OV_BIT(5, 2)
+#define OV5_HP_EVT              OV_BIT(6, 5)
+#define OV5_HPT_RESIZE          OV_BIT(6, 7)
+#define OV5_DRMEM_V2            OV_BIT(22, 0)
+#define OV5_XIVE_BOTH           OV_BIT(23, 0)
+#define OV5_XIVE_EXPLOIT        OV_BIT(23, 1) /* 1=exploitation 0=legacy */
+
+/* ISA 3.00 MMU features: */
+#define OV5_MMU_BOTH            OV_BIT(24, 0) /* Radix and hash */
+#define OV5_MMU_RADIX_300       OV_BIT(24, 1) /* 1=Radix only, 0=Hash only */
+#define OV5_MMU_RADIX_GTSE      OV_BIT(26, 1) /* Radix GTSE */
+
+/* interfaces */
+SpaprOptionVector *spapr_ovec_new(void);
+SpaprOptionVector *spapr_ovec_clone(SpaprOptionVector *ov_orig);
+void spapr_ovec_intersect(SpaprOptionVector *ov,
+                          SpaprOptionVector *ov1,
+                          SpaprOptionVector *ov2);
+bool spapr_ovec_subset(SpaprOptionVector *ov1, SpaprOptionVector *ov2);
+void spapr_ovec_cleanup(SpaprOptionVector *ov);
+void spapr_ovec_set(SpaprOptionVector *ov, long bitnr);
+void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr);
+bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr);
+bool spapr_ovec_empty(SpaprOptionVector *ov);
+SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector);
+int spapr_dt_ovec(void *fdt, int fdt_offset,
+                  SpaprOptionVector *ov, const char *name);
+
+/* migration */
+extern const VMStateDescription vmstate_spapr_ovec;
+
+#endif /* SPAPR_OVEC_H */
diff --git a/include/hw/ppc/spapr_tpm_proxy.h b/include/hw/ppc/spapr_tpm_proxy.h
new file mode 100644 (file)
index 0000000..96d2a96
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * SPAPR TPM Proxy/Hypercall
+ *
+ * Copyright IBM Corp. 2019
+ *
+ * Authors:
+ *  Michael Roth      <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_SPAPR_TPM_PROXY_H
+#define HW_SPAPR_TPM_PROXY_H
+
+#include "qom/object.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_SPAPR_TPM_PROXY "spapr-tpm-proxy"
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprTpmProxy, SPAPR_TPM_PROXY)
+
+struct SpaprTpmProxy {
+    /*< private >*/
+    DeviceState parent;
+
+    char *host_path;
+    int host_fd;
+};
+
+#endif /* HW_SPAPR_TPM_PROXY_H */
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
new file mode 100644 (file)
index 0000000..7eae1a4
--- /dev/null
@@ -0,0 +1,153 @@
+#ifndef HW_SPAPR_VIO_H
+#define HW_SPAPR_VIO_H
+
+/*
+ * QEMU sPAPR VIO bus definitions
+ *
+ * Copyright (c) 2010 David Gibson, IBM Corporation <david@gibson.dropbear.id.au>
+ * Based on the s390 virtio bus definitions:
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/ppc/spapr.h"
+#include "sysemu/dma.h"
+#include "hw/irq.h"
+#include "qom/object.h"
+
+#define TYPE_VIO_SPAPR_DEVICE "vio-spapr-device"
+OBJECT_DECLARE_TYPE(SpaprVioDevice, SpaprVioDeviceClass,
+                    VIO_SPAPR_DEVICE)
+
+#define TYPE_SPAPR_VIO_BUS "spapr-vio-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprVioBus, SPAPR_VIO_BUS)
+
+#define TYPE_SPAPR_VIO_BRIDGE "spapr-vio-bridge"
+
+typedef struct SpaprVioCrq {
+    uint64_t qladdr;
+    uint32_t qsize;
+    uint32_t qnext;
+    int(*SendFunc)(struct SpaprVioDevice *vdev, uint8_t *crq);
+} SpaprVioCrq;
+
+
+struct SpaprVioDeviceClass {
+    DeviceClass parent_class;
+
+    const char *dt_name, *dt_type, *dt_compatible;
+    target_ulong signal_mask;
+    uint32_t rtce_window_size;
+    void (*realize)(SpaprVioDevice *dev, Error **errp);
+    void (*reset)(SpaprVioDevice *dev);
+    int (*devnode)(SpaprVioDevice *dev, void *fdt, int node_off);
+    const char *(*get_dt_compatible)(SpaprVioDevice *dev);
+};
+
+struct SpaprVioDevice {
+    DeviceState qdev;
+    uint32_t reg;
+    uint32_t irq;
+    uint64_t signal_state;
+    SpaprVioCrq crq;
+    AddressSpace as;
+    MemoryRegion mrroot;
+    MemoryRegion mrbypass;
+    SpaprTceTable *tcet;
+};
+
+#define DEFINE_SPAPR_PROPERTIES(type, field)           \
+        DEFINE_PROP_UINT32("reg", type, field.reg, -1)
+
+struct SpaprVioBus {
+    BusState bus;
+    uint32_t next_reg;
+};
+
+SpaprVioBus *spapr_vio_bus_init(void);
+SpaprVioDevice *spapr_vio_find_by_reg(SpaprVioBus *bus, uint32_t reg);
+void spapr_dt_vdevice(SpaprVioBus *bus, void *fdt);
+gchar *spapr_vio_stdout_path(SpaprVioBus *bus);
+
+static inline void spapr_vio_irq_pulse(SpaprVioDevice *dev)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
+    qemu_irq_pulse(spapr_qirq(spapr, dev->irq));
+}
+
+static inline bool spapr_vio_dma_valid(SpaprVioDevice *dev, uint64_t taddr,
+                                       uint32_t size, DMADirection dir)
+{
+    return dma_memory_valid(&dev->as, taddr, size, dir, MEMTXATTRS_UNSPECIFIED);
+}
+
+static inline int spapr_vio_dma_read(SpaprVioDevice *dev, uint64_t taddr,
+                                     void *buf, uint32_t size)
+{
+    return (dma_memory_read(&dev->as, taddr,
+                            buf, size, MEMTXATTRS_UNSPECIFIED) != 0) ?
+        H_DEST_PARM : H_SUCCESS;
+}
+
+static inline int spapr_vio_dma_write(SpaprVioDevice *dev, uint64_t taddr,
+                                      const void *buf, uint32_t size)
+{
+    return (dma_memory_write(&dev->as, taddr,
+                             buf, size, MEMTXATTRS_UNSPECIFIED) != 0) ?
+        H_DEST_PARM : H_SUCCESS;
+}
+
+static inline int spapr_vio_dma_set(SpaprVioDevice *dev, uint64_t taddr,
+                                    uint8_t c, uint32_t size)
+{
+    return (dma_memory_set(&dev->as, taddr,
+                           c, size, MEMTXATTRS_UNSPECIFIED) != 0) ?
+        H_DEST_PARM : H_SUCCESS;
+}
+
+#define vio_stb(_dev, _addr, _val) \
+        (stb_dma(&(_dev)->as, (_addr), (_val), MEMTXATTRS_UNSPECIFIED))
+#define vio_sth(_dev, _addr, _val) \
+        (stw_be_dma(&(_dev)->as, (_addr), (_val), MEMTXATTRS_UNSPECIFIED))
+#define vio_stl(_dev, _addr, _val) \
+        (stl_be_dma(&(_dev)->as, (_addr), (_val), MEMTXATTRS_UNSPECIFIED))
+#define vio_stq(_dev, _addr, _val) \
+        (stq_be_dma(&(_dev)->as, (_addr), (_val), MEMTXATTRS_UNSPECIFIED))
+#define vio_ldq(_dev, _addr) \
+        ({ \
+            uint64_t _val; \
+            ldq_be_dma(&(_dev)->as, (_addr), &_val, MEMTXATTRS_UNSPECIFIED); \
+            _val; \
+        })
+
+int spapr_vio_send_crq(SpaprVioDevice *dev, uint8_t *crq);
+
+SpaprVioDevice *vty_lookup(SpaprMachineState *spapr, target_ulong reg);
+void vty_putchars(SpaprVioDevice *sdev, uint8_t *buf, int len);
+void spapr_vty_create(SpaprVioBus *bus, Chardev *chardev);
+void spapr_vlan_create(SpaprVioBus *bus, NICInfo *nd);
+void spapr_vscsi_create(SpaprVioBus *bus);
+
+SpaprVioDevice *spapr_vty_get_default(SpaprVioBus *bus);
+
+extern const VMStateDescription vmstate_spapr_vio;
+
+#define VMSTATE_SPAPR_VIO(_f, _s) \
+    VMSTATE_STRUCT(_f, _s, 0, vmstate_spapr_vio, SpaprVioDevice)
+
+void spapr_vio_set_bypass(SpaprVioDevice *dev, bool bypass);
+
+#endif /* HW_SPAPR_VIO_H */
diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
new file mode 100644 (file)
index 0000000..b282960
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * QEMU PowerPC sPAPR XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_SPAPR_XIVE_H
+#define PPC_SPAPR_XIVE_H
+
+#include "hw/ppc/spapr_irq.h"
+#include "hw/ppc/xive.h"
+
+#define TYPE_SPAPR_XIVE "spapr-xive"
+#define SPAPR_XIVE(obj) OBJECT_CHECK(SpaprXive, (obj), TYPE_SPAPR_XIVE)
+#define SPAPR_XIVE_CLASS(klass)                                         \
+    OBJECT_CLASS_CHECK(SpaprXiveClass, (klass), TYPE_SPAPR_XIVE)
+#define SPAPR_XIVE_GET_CLASS(obj)                               \
+    OBJECT_GET_CLASS(SpaprXiveClass, (obj), TYPE_SPAPR_XIVE)
+
+typedef struct SpaprXive {
+    XiveRouter    parent;
+
+    /* Internal interrupt source for IPIs and virtual devices */
+    XiveSource    source;
+    hwaddr        vc_base;
+
+    /* END ESB MMIOs */
+    XiveENDSource end_source;
+    hwaddr        end_base;
+
+    /* DT */
+    gchar *nodename;
+
+    /* Routing table */
+    XiveEAS       *eat;
+    uint32_t      nr_irqs;
+    XiveEND       *endt;
+    uint32_t      nr_ends;
+
+    /* TIMA mapping address */
+    hwaddr        tm_base;
+    MemoryRegion  tm_mmio;
+
+    /* KVM support */
+    int           fd;
+    void          *tm_mmap;
+    MemoryRegion  tm_mmio_kvm;
+    VMChangeStateEntry *change;
+
+    uint8_t       hv_prio;
+} SpaprXive;
+
+typedef struct SpaprXiveClass {
+    XiveRouterClass parent;
+
+    DeviceRealize parent_realize;
+} SpaprXiveClass;
+
+/*
+ * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
+ * to the controller block id value. It can nevertheless be changed
+ * for testing purpose.
+ */
+#define SPAPR_XIVE_BLOCK_ID 0x0
+
+struct SpaprMachineState;
+void spapr_xive_hcall_init(struct SpaprMachineState *spapr);
+void spapr_xive_mmio_set_enabled(SpaprXive *xive, bool enable);
+void spapr_xive_map_mmio(SpaprXive *xive);
+
+int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx,
+                             uint32_t *out_server, uint8_t *out_prio);
+
+/*
+ * KVM XIVE device helpers
+ */
+int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
+                        Error **errp);
+void kvmppc_xive_disconnect(SpaprInterruptController *intc);
+void kvmppc_xive_reset(SpaprXive *xive, Error **errp);
+int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
+                                  Error **errp);
+void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp);
+uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
+                            uint64_t data, bool write);
+int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                 uint32_t end_idx, XiveEND *end,
+                                 Error **errp);
+int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                 uint32_t end_idx, XiveEND *end,
+                                 Error **errp);
+void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp);
+int kvmppc_xive_pre_save(SpaprXive *xive);
+int kvmppc_xive_post_load(SpaprXive *xive, int version_id);
+
+#endif /* PPC_SPAPR_XIVE_H */
diff --git a/include/hw/ppc/vof.h b/include/hw/ppc/vof.h
new file mode 100644 (file)
index 0000000..d3f293d
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Virtual Open Firmware
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef HW_VOF_H
+#define HW_VOF_H
+
+#include "qom/object.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "exec/cpu-defs.h"
+
+typedef struct Vof {
+    uint64_t top_addr; /* copied from rma_size */
+    GArray *claimed; /* array of SpaprOfClaimed */
+    uint64_t claimed_base;
+    GHashTable *of_instances; /* ihandle -> SpaprOfInstance */
+    uint32_t of_instance_last;
+    char *bootargs;
+    long fw_size;
+} Vof;
+
+int vof_client_call(MachineState *ms, Vof *vof, void *fdt,
+                    target_ulong args_real);
+uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size, uint64_t align);
+void vof_init(Vof *vof, uint64_t top_addr, Error **errp);
+void vof_cleanup(Vof *vof);
+void vof_build_dt(void *fdt, Vof *vof);
+uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename,
+                               const char *prop, const char *path);
+
+#define TYPE_VOF_MACHINE_IF "vof-machine-if"
+
+typedef struct VofMachineIfClass VofMachineIfClass;
+DECLARE_CLASS_CHECKERS(VofMachineIfClass, VOF_MACHINE, TYPE_VOF_MACHINE_IF)
+
+struct VofMachineIfClass {
+    InterfaceClass parent;
+    target_ulong (*client_architecture_support)(MachineState *ms, CPUState *cs,
+                                                target_ulong vec);
+    void (*quiesce)(MachineState *ms);
+    bool (*setprop)(MachineState *ms, const char *path, const char *propname,
+                    void *val, int vallen);
+};
+
+/*
+ * Initial stack size is from
+ * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#REF27292
+ *
+ * "Client programs shall be invoked with a valid stack pointer (r1) with
+ * at least 32K bytes of memory available for stack growth".
+ */
+#define VOF_STACK_SIZE       0x8000
+
+#define VOF_MEM_READ(pa, buf, size) \
+    address_space_read(&address_space_memory, \
+    (pa), MEMTXATTRS_UNSPECIFIED, (buf), (size))
+#define VOF_MEM_WRITE(pa, buf, size) \
+    address_space_write(&address_space_memory, \
+    (pa), MEMTXATTRS_UNSPECIFIED, (buf), (size))
+
+#define PROM_ERROR          (~0U)
+
+#endif /* HW_VOF_H */
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
new file mode 100644 (file)
index 0000000..95ead0d
--- /dev/null
@@ -0,0 +1,196 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
+ *
+ * Copyright (c) 2010,2011 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+
+#ifndef XICS_H
+#define XICS_H
+
+#include "exec/memory.h"
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+#define XICS_IPI        0x2
+#define XICS_BUID       0x1
+#define XICS_IRQ_BASE   (XICS_BUID << 12)
+
+/*
+ * We currently only support one BUID which is our interrupt base
+ * (the kernel implementation supports more but we don't exploit
+ *  that yet)
+ */
+typedef struct PnvICPState PnvICPState;
+typedef struct ICSStateClass ICSStateClass;
+typedef struct ICSState ICSState;
+typedef struct ICSIRQState ICSIRQState;
+typedef struct XICSFabric XICSFabric;
+
+#define TYPE_ICP "icp"
+OBJECT_DECLARE_TYPE(ICPState, ICPStateClass,
+                    ICP)
+
+#define TYPE_PNV_ICP "pnv-icp"
+DECLARE_INSTANCE_CHECKER(PnvICPState, PNV_ICP,
+                         TYPE_PNV_ICP)
+
+
+struct ICPStateClass {
+    DeviceClass parent_class;
+
+    DeviceRealize parent_realize;
+};
+
+struct ICPState {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+    CPUState *cs;
+    ICSState *xirr_owner;
+    uint32_t xirr;
+    uint8_t pending_priority;
+    uint8_t mfrr;
+    qemu_irq output;
+
+    XICSFabric *xics;
+};
+
+#define ICP_PROP_XICS "xics"
+#define ICP_PROP_CPU "cpu"
+
+struct PnvICPState {
+    ICPState parent_obj;
+
+    MemoryRegion mmio;
+    uint32_t links[3];
+};
+
+#define TYPE_ICS "ics"
+DECLARE_OBJ_CHECKERS(ICSState, ICSStateClass,
+                     ICS, TYPE_ICS)
+
+
+struct ICSStateClass {
+    DeviceClass parent_class;
+
+    DeviceRealize parent_realize;
+    ResettablePhases parent_phases;
+
+    void (*reject)(ICSState *s, uint32_t irq);
+    void (*resend)(ICSState *s);
+};
+
+struct ICSState {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+    uint32_t nr_irqs;
+    uint32_t offset;
+    ICSIRQState *irqs;
+    XICSFabric *xics;
+};
+
+#define ICS_PROP_XICS "xics"
+
+static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
+{
+    return (nr >= ics->offset) && (nr < (ics->offset + ics->nr_irqs));
+}
+
+struct ICSIRQState {
+    uint32_t server;
+    uint8_t priority;
+    uint8_t saved_priority;
+#define XICS_STATUS_ASSERTED           0x1
+#define XICS_STATUS_SENT               0x2
+#define XICS_STATUS_REJECTED           0x4
+#define XICS_STATUS_MASKED_PENDING     0x8
+#define XICS_STATUS_PRESENTED          0x10
+#define XICS_STATUS_QUEUED             0x20
+    uint8_t status;
+/* (flags & XICS_FLAGS_IRQ_MASK) == 0 means the interrupt is not allocated */
+#define XICS_FLAGS_IRQ_LSI             0x1
+#define XICS_FLAGS_IRQ_MSI             0x2
+#define XICS_FLAGS_IRQ_MASK            0x3
+    uint8_t flags;
+};
+
+#define TYPE_XICS_FABRIC "xics-fabric"
+#define XICS_FABRIC(obj)                                     \
+    INTERFACE_CHECK(XICSFabric, (obj), TYPE_XICS_FABRIC)
+typedef struct XICSFabricClass XICSFabricClass;
+DECLARE_CLASS_CHECKERS(XICSFabricClass, XICS_FABRIC,
+                       TYPE_XICS_FABRIC)
+
+struct XICSFabricClass {
+    InterfaceClass parent;
+    ICSState *(*ics_get)(XICSFabric *xi, int irq);
+    void (*ics_resend)(XICSFabric *xi);
+    ICPState *(*icp_get)(XICSFabric *xi, int server);
+};
+
+ICPState *xics_icp_get(XICSFabric *xi, int server);
+
+/* Internal XICS interfaces */
+void icp_set_cppr(ICPState *icp, uint8_t cppr);
+void icp_set_mfrr(ICPState *icp, uint8_t mfrr);
+uint32_t icp_accept(ICPState *ss);
+uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr);
+void icp_eoi(ICPState *icp, uint32_t xirr);
+void icp_irq(ICSState *ics, int server, int nr, uint8_t priority);
+void icp_reset(ICPState *icp);
+
+void ics_write_xive(ICSState *ics, int nr, int server,
+                    uint8_t priority, uint8_t saved_priority);
+void ics_set_irq(void *opaque, int srcno, int val);
+
+static inline bool ics_irq_free(ICSState *ics, uint32_t srcno)
+{
+    return !(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK);
+}
+
+void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
+void icp_pic_print_info(ICPState *icp, Monitor *mon);
+void ics_pic_print_info(ICSState *ics, Monitor *mon);
+
+void ics_resend(ICSState *ics);
+void icp_resend(ICPState *ss);
+
+Object *icp_create(Object *cpu, const char *type, XICSFabric *xi,
+                   Error **errp);
+void icp_destroy(ICPState *icp);
+
+/* KVM */
+void icp_get_kvm_state(ICPState *icp);
+int icp_set_kvm_state(ICPState *icp, Error **errp);
+void icp_synchronize_state(ICPState *icp);
+void icp_kvm_realize(DeviceState *dev, Error **errp);
+
+void ics_get_kvm_state(ICSState *ics);
+int ics_set_kvm_state_one(ICSState *ics, int srcno, Error **errp);
+int ics_set_kvm_state(ICSState *ics, Error **errp);
+void ics_synchronize_state(ICSState *ics);
+void ics_kvm_set_irq(ICSState *ics, int srcno, int val);
+
+#endif /* XICS_H */
diff --git a/include/hw/ppc/xics_spapr.h b/include/hw/ppc/xics_spapr.h
new file mode 100644 (file)
index 0000000..de752c0
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics
+ *
+ * Copyright (c) 2010, 2011 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XICS_SPAPR_H
+#define XICS_SPAPR_H
+
+#include "hw/ppc/spapr.h"
+#include "qom/object.h"
+
+#define TYPE_ICS_SPAPR "ics-spapr"
+/* This is reusing the ICSState typedef from TYPE_ICS */
+DECLARE_INSTANCE_CHECKER(ICSState, ICS_SPAPR,
+                         TYPE_ICS_SPAPR)
+
+int xics_kvm_connect(SpaprInterruptController *intc, uint32_t nr_servers,
+                     Error **errp);
+void xics_kvm_disconnect(SpaprInterruptController *intc);
+bool xics_kvm_has_broken_disconnect(void);
+
+#endif /* XICS_SPAPR_H */
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
new file mode 100644 (file)
index 0000000..f120874
--- /dev/null
@@ -0,0 +1,549 @@
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ *
+ * The POWER9 processor comes with a new interrupt controller, called
+ * XIVE as "eXternal Interrupt Virtualization Engine".
+ *
+ * = Overall architecture
+ *
+ *
+ *              XIVE Interrupt Controller
+ *              +------------------------------------+      IPIs
+ *              | +---------+ +---------+ +--------+ |    +-------+
+ *              | |VC       | |CQ       | |PC      |----> | CORES |
+ *              | |     esb | |         | |        |----> |       |
+ *              | |     eas | |  Bridge | |   tctx |----> |       |
+ *              | |SC   end | |         | |    nvt | |    |       |
+ *  +------+    | +---------+ +----+----+ +--------+ |    +-+-+-+-+
+ *  | RAM  |    +------------------|-----------------+      | | |
+ *  |      |                       |                        | | |
+ *  |      |                       |                        | | |
+ *  |      |  +--------------------v------------------------v-v-v--+    other
+ *  |      <--+                     Power Bus                      +--> chips
+ *  |  esb |  +---------+-----------------------+------------------+
+ *  |  eas |            |                       |
+ *  |  end |         +--|------+                |
+ *  |  nvt |       +----+----+ |           +----+----+
+ *  +------+       |SC       | |           |SC       |
+ *                 |         | |           |         |
+ *                 | PQ-bits | |           | PQ-bits |
+ *                 | local   |-+           |  in VC  |
+ *                 +---------+             +---------+
+ *                    PCIe                 NX,NPU,CAPI
+ *
+ *                   SC: Source Controller (aka. IVSE)
+ *                   VC: Virtualization Controller (aka. IVRE)
+ *                   PC: Presentation Controller (aka. IVPE)
+ *                   CQ: Common Queue (Bridge)
+ *
+ *              PQ-bits: 2 bits source state machine (P:pending Q:queued)
+ *                  esb: Event State Buffer (Array of PQ bits in an IVSE)
+ *                  eas: Event Assignment Structure
+ *                  end: Event Notification Descriptor
+ *                  nvt: Notification Virtual Target
+ *                 tctx: Thread interrupt Context
+ *
+ *
+ * The XIVE IC is composed of three sub-engines :
+ *
+ * - Interrupt Virtualization Source Engine (IVSE), or Source
+ *   Controller (SC). These are found in PCI PHBs, in the PSI host
+ *   bridge controller, but also inside the main controller for the
+ *   core IPIs and other sub-chips (NX, CAP, NPU) of the
+ *   chip/processor. They are configured to feed the IVRE with events.
+ *
+ * - Interrupt Virtualization Routing Engine (IVRE) or Virtualization
+ *   Controller (VC). Its job is to match an event source with an
+ *   Event Notification Descriptor (END).
+ *
+ * - Interrupt Virtualization Presentation Engine (IVPE) or
+ *   Presentation Controller (PC). It maintains the interrupt context
+ *   state of each thread and handles the delivery of the external
+ *   exception to the thread.
+ *
+ * In XIVE 1.0, the sub-engines used to be referred as:
+ *
+ *   SC     Source Controller
+ *   VC     Virtualization Controller
+ *   PC     Presentation Controller
+ *   CQ     Common Queue (PowerBUS Bridge)
+ *
+ *
+ * = XIVE internal tables
+ *
+ * Each of the sub-engines uses a set of tables to redirect exceptions
+ * from event sources to CPU threads.
+ *
+ *                                           +-------+
+ *   User or OS                              |  EQ   |
+ *       or                          +------>|entries|
+ *   Hypervisor                      |       |  ..   |
+ *     Memory                        |       +-------+
+ *                                   |           ^
+ *                                   |           |
+ *              +-------------------------------------------------+
+ *                                   |           |
+ *   Hypervisor      +------+    +---+--+    +---+--+   +------+
+ *     Memory        | ESB  |    | EAT  |    | ENDT |   | NVTT |
+ *    (skiboot)      +----+-+    +----+-+    +----+-+   +------+
+ *                     ^  |        ^  |        ^  |       ^
+ *                     |  |        |  |        |  |       |
+ *              +-------------------------------------------------+
+ *                     |  |        |  |        |  |       |
+ *                     |  |        |  |        |  |       |
+ *                +----|--|--------|--|--------|--|-+   +-|-----+    +------+
+ *                |    |  |        |  |        |  | |   | | tctx|    |Thread|
+ *   IPI or   --> |    +  v        +  v        +  v |---| +  .. |----->     |
+ *  HW events --> |                                 |   |       |    |      |
+ *    IVSE        |             IVRE                |   | IVPE  |    +------+
+ *                +---------------------------------+   +-------+
+ *
+ *
+ *
+ * The IVSE have a 2-bits state machine, P for pending and Q for queued,
+ * for each source that allows events to be triggered. They are stored in
+ * an Event State Buffer (ESB) array and can be controlled by MMIOs.
+ *
+ * If the event is let through, the IVRE looks up in the Event Assignment
+ * Structure (EAS) table for an Event Notification Descriptor (END)
+ * configured for the source. Each Event Notification Descriptor defines
+ * a notification path to a CPU and an in-memory Event Queue, in which
+ * will be enqueued an EQ data for the OS to pull.
+ *
+ * The IVPE determines if a Notification Virtual Target (NVT) can
+ * handle the event by scanning the thread contexts of the VCPUs
+ * dispatched on the processor HW threads. It maintains the state of
+ * the thread interrupt context (TCTX) of each thread in a NVT table.
+ *
+ * = Acronyms
+ *
+ *          Description                     In XIVE 1.0, used to be referred as
+ *
+ *   EAS    Event Assignment Structure      IVE   Interrupt Virt. Entry
+ *   EAT    Event Assignment Table          IVT   Interrupt Virt. Table
+ *   ENDT   Event Notif. Descriptor Table   EQDT  Event Queue Desc. Table
+ *   EQ     Event Queue                     same
+ *   ESB    Event State Buffer              SBE   State Bit Entry
+ *   NVT    Notif. Virtual Target           VPD   Virtual Processor Desc.
+ *   NVTT   Notif. Virtual Target Table     VPDT  Virtual Processor Desc. Table
+ *   TCTX   Thread interrupt Context
+ *
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PPC_XIVE_H
+#define PPC_XIVE_H
+
+#include "sysemu/kvm.h"
+#include "hw/sysbus.h"
+#include "hw/ppc/xive_regs.h"
+#include "qom/object.h"
+
+/*
+ * XIVE Notifier (Interface between Source and Router)
+ */
+
+typedef struct XiveNotifier XiveNotifier;
+
+#define TYPE_XIVE_NOTIFIER "xive-notifier"
+#define XIVE_NOTIFIER(obj)                                     \
+    INTERFACE_CHECK(XiveNotifier, (obj), TYPE_XIVE_NOTIFIER)
+typedef struct XiveNotifierClass XiveNotifierClass;
+DECLARE_CLASS_CHECKERS(XiveNotifierClass, XIVE_NOTIFIER,
+                       TYPE_XIVE_NOTIFIER)
+
+struct XiveNotifierClass {
+    InterfaceClass parent;
+    void (*notify)(XiveNotifier *xn, uint32_t lisn, bool pq_checked);
+};
+
+/*
+ * XIVE Interrupt Source
+ */
+
+#define TYPE_XIVE_SOURCE "xive-source"
+OBJECT_DECLARE_SIMPLE_TYPE(XiveSource, XIVE_SOURCE)
+
+/*
+ * XIVE Interrupt Source characteristics, which define how the ESB are
+ * controlled.
+ */
+#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
+#define XIVE_SRC_STORE_EOI     0x2 /* Store EOI supported */
+#define XIVE_SRC_PQ_DISABLE    0x4 /* Disable check on the PQ state bits */
+
+struct XiveSource {
+    DeviceState parent;
+
+    /* IRQs */
+    uint32_t        nr_irqs;
+    unsigned long   *lsi_map;
+
+    /* PQ bits and LSI assertion bit */
+    uint8_t         *status;
+    uint8_t         reset_pq; /* PQ state on reset */
+
+    /* ESB memory region */
+    uint64_t        esb_flags;
+    uint32_t        esb_shift;
+    MemoryRegion    esb_mmio;
+    MemoryRegion    esb_mmio_emulated;
+
+    /* KVM support */
+    void            *esb_mmap;
+    MemoryRegion    esb_mmio_kvm;
+
+    XiveNotifier    *xive;
+};
+
+/*
+ * ESB MMIO setting. Can be one page, for both source triggering and
+ * source management, or two different pages. See below for magic
+ * values.
+ */
+#define XIVE_ESB_4K          12 /* PSI HB only */
+#define XIVE_ESB_4K_2PAGE    13
+#define XIVE_ESB_64K         16
+#define XIVE_ESB_64K_2PAGE   17
+
+static inline bool xive_source_esb_has_2page(XiveSource *xsrc)
+{
+    return xsrc->esb_shift == XIVE_ESB_64K_2PAGE ||
+        xsrc->esb_shift == XIVE_ESB_4K_2PAGE;
+}
+
+static inline size_t xive_source_esb_len(XiveSource *xsrc)
+{
+    return (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+}
+
+/* The trigger page is always the first/even page */
+static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+    return (1ull << xsrc->esb_shift) * srcno;
+}
+
+/* In a two pages ESB MMIO setting, the odd page is for management */
+static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
+{
+    hwaddr addr = xive_source_esb_page(xsrc, srcno);
+
+    if (xive_source_esb_has_2page(xsrc)) {
+        addr += (1 << (xsrc->esb_shift - 1));
+    }
+
+    return addr;
+}
+
+/*
+ * Each interrupt source has a 2-bit state machine which can be
+ * controlled by MMIO. P indicates that an interrupt is pending (has
+ * been sent to a queue and is waiting for an EOI). Q indicates that
+ * the interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee that a
+ * given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ */
+#define XIVE_STATUS_ASSERTED  0x4  /* Extra bit for LSI */
+#define XIVE_ESB_VAL_P        0x2
+#define XIVE_ESB_VAL_Q        0x1
+
+#define XIVE_ESB_RESET        0x0
+#define XIVE_ESB_PENDING      XIVE_ESB_VAL_P
+#define XIVE_ESB_QUEUED       (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q)
+#define XIVE_ESB_OFF          XIVE_ESB_VAL_Q
+
+bool xive_esb_trigger(uint8_t *pq);
+bool xive_esb_eoi(uint8_t *pq);
+uint8_t xive_esb_set(uint8_t *pq, uint8_t value);
+
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * The following offsets into the ESB MMIO allow to read or manipulate
+ * the PQ bits. They must be used with an 8-byte load instruction.
+ * They all return the previous state of the interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a store and
+ * some ESBs support doing a trigger via a separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI      0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI       0x000 /* Load */
+#define XIVE_ESB_GET            0x800 /* Load */
+#define XIVE_ESB_INJECT         0x800 /* Store */
+#define XIVE_ESB_SET_PQ_00      0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01      0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10      0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11      0xf00 /* Load */
+
+uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno);
+uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq);
+
+/*
+ * Source status helpers
+ */
+static inline void xive_source_set_status(XiveSource *xsrc, uint32_t srcno,
+                                          uint8_t status, bool enable)
+{
+    if (enable) {
+        xsrc->status[srcno] |= status;
+    } else {
+        xsrc->status[srcno] &= ~status;
+    }
+}
+
+static inline void xive_source_set_asserted(XiveSource *xsrc, uint32_t srcno,
+                                            bool enable)
+{
+    xive_source_set_status(xsrc, srcno, XIVE_STATUS_ASSERTED, enable);
+}
+
+static inline bool xive_source_is_asserted(XiveSource *xsrc, uint32_t srcno)
+{
+    return xsrc->status[srcno] & XIVE_STATUS_ASSERTED;
+}
+
+void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset,
+                                Monitor *mon);
+
+static inline bool xive_source_irq_is_lsi(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+    return test_bit(srcno, xsrc->lsi_map);
+}
+
+static inline void xive_source_irq_set_lsi(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+    bitmap_set(xsrc->lsi_map, srcno, 1);
+}
+
+void xive_source_set_irq(void *opaque, int srcno, int val);
+
+/*
+ * XIVE Thread interrupt Management (TM) context
+ */
+
+#define TYPE_XIVE_TCTX "xive-tctx"
+OBJECT_DECLARE_SIMPLE_TYPE(XiveTCTX, XIVE_TCTX)
+
+/*
+ * XIVE Thread interrupt Management register rings :
+ *
+ *   QW-0  User       event-based exception state
+ *   QW-1  O/S        OS context for priority management, interrupt acks
+ *   QW-2  Pool       hypervisor pool context for virtual processors dispatched
+ *   QW-3  Physical   physical thread context and security context
+ */
+#define XIVE_TM_RING_COUNT      4
+#define XIVE_TM_RING_SIZE       0x10
+
+typedef struct XivePresenter XivePresenter;
+
+struct XiveTCTX {
+    DeviceState parent_obj;
+
+    CPUState    *cs;
+    qemu_irq    hv_output;
+    qemu_irq    os_output;
+
+    uint8_t     regs[XIVE_TM_RING_COUNT * XIVE_TM_RING_SIZE];
+
+    XivePresenter *xptr;
+};
+
+static inline uint32_t xive_tctx_word2(uint8_t *ring)
+{
+    return *((uint32_t *) &ring[TM_WORD2]);
+}
+
+/*
+ * XIVE Router
+ */
+typedef struct XiveFabric XiveFabric;
+
+struct XiveRouter {
+    SysBusDevice    parent;
+
+    XiveFabric *xfb;
+};
+
+#define TYPE_XIVE_ROUTER "xive-router"
+OBJECT_DECLARE_TYPE(XiveRouter, XiveRouterClass,
+                    XIVE_ROUTER)
+
+struct XiveRouterClass {
+    SysBusDeviceClass parent;
+
+    /* XIVE table accessors */
+    int (*get_eas)(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+                   XiveEAS *eas);
+    int (*get_pq)(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+                  uint8_t *pq);
+    int (*set_pq)(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+                  uint8_t *pq);
+    int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                   XiveEND *end);
+    int (*write_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                     XiveEND *end, uint8_t word_number);
+    int (*get_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+                   XiveNVT *nvt);
+    int (*write_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+                     XiveNVT *nvt, uint8_t word_number);
+    uint8_t (*get_block_id)(XiveRouter *xrtr);
+    void (*end_notify)(XiveRouter *xrtr, XiveEAS *eas);
+};
+
+int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+                        XiveEAS *eas);
+int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                        XiveEND *end);
+int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                          XiveEND *end, uint8_t word_number);
+int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+                        XiveNVT *nvt);
+int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
+                          XiveNVT *nvt, uint8_t word_number);
+void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked);
+void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas);
+
+/*
+ * XIVE Presenter
+ */
+
+typedef struct XiveTCTXMatch {
+    XiveTCTX *tctx;
+    uint8_t ring;
+} XiveTCTXMatch;
+
+#define TYPE_XIVE_PRESENTER "xive-presenter"
+#define XIVE_PRESENTER(obj)                                     \
+    INTERFACE_CHECK(XivePresenter, (obj), TYPE_XIVE_PRESENTER)
+typedef struct XivePresenterClass XivePresenterClass;
+DECLARE_CLASS_CHECKERS(XivePresenterClass, XIVE_PRESENTER,
+                       TYPE_XIVE_PRESENTER)
+
+#define XIVE_PRESENTER_GEN1_TIMA_OS     0x1
+
+struct XivePresenterClass {
+    InterfaceClass parent;
+    int (*match_nvt)(XivePresenter *xptr, uint8_t format,
+                     uint8_t nvt_blk, uint32_t nvt_idx,
+                     bool cam_ignore, uint8_t priority,
+                     uint32_t logic_serv, XiveTCTXMatch *match);
+    bool (*in_kernel)(const XivePresenter *xptr);
+    uint32_t (*get_config)(XivePresenter *xptr);
+};
+
+int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
+                              uint8_t format,
+                              uint8_t nvt_blk, uint32_t nvt_idx,
+                              bool cam_ignore, uint32_t logic_serv);
+bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
+                           uint8_t nvt_blk, uint32_t nvt_idx,
+                           bool cam_ignore, uint8_t priority,
+                           uint32_t logic_serv);
+
+/*
+ * XIVE Fabric (Interface between Interrupt Controller and Machine)
+ */
+
+#define TYPE_XIVE_FABRIC "xive-fabric"
+#define XIVE_FABRIC(obj)                                     \
+    INTERFACE_CHECK(XiveFabric, (obj), TYPE_XIVE_FABRIC)
+typedef struct XiveFabricClass XiveFabricClass;
+DECLARE_CLASS_CHECKERS(XiveFabricClass, XIVE_FABRIC,
+                       TYPE_XIVE_FABRIC)
+
+struct XiveFabricClass {
+    InterfaceClass parent;
+    int (*match_nvt)(XiveFabric *xfb, uint8_t format,
+                     uint8_t nvt_blk, uint32_t nvt_idx,
+                     bool cam_ignore, uint8_t priority,
+                     uint32_t logic_serv, XiveTCTXMatch *match);
+};
+
+/*
+ * XIVE END ESBs
+ */
+
+#define TYPE_XIVE_END_SOURCE "xive-end-source"
+OBJECT_DECLARE_SIMPLE_TYPE(XiveENDSource, XIVE_END_SOURCE)
+
+struct XiveENDSource {
+    DeviceState parent;
+
+    uint32_t        nr_ends;
+
+    /* ESB memory region */
+    uint32_t        esb_shift;
+    MemoryRegion    esb_mmio;
+
+    XiveRouter      *xrtr;
+};
+
+/*
+ * For legacy compatibility, the exceptions define up to 256 different
+ * priorities. P9 implements only 9 levels : 8 active levels [0 - 7]
+ * and the least favored level 0xFF.
+ */
+#define XIVE_PRIORITY_MAX  7
+
+/*
+ * Convert a priority number to an Interrupt Pending Buffer (IPB)
+ * register, which indicates a pending interrupt at the priority
+ * corresponding to the bit number
+ */
+static inline uint8_t xive_priority_to_ipb(uint8_t priority)
+{
+    return priority > XIVE_PRIORITY_MAX ?
+        0 : 1 << (XIVE_PRIORITY_MAX - priority);
+}
+
+/*
+ * XIVE Thread Interrupt Management Aera (TIMA)
+ *
+ * This region gives access to the registers of the thread interrupt
+ * management context. It is four page wide, each page providing a
+ * different view of the registers. The page with the lower offset is
+ * the most privileged and gives access to the entire context.
+ */
+#define XIVE_TM_HW_PAGE         0x0
+#define XIVE_TM_HV_PAGE         0x1
+#define XIVE_TM_OS_PAGE         0x2
+#define XIVE_TM_USER_PAGE       0x3
+
+void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
+                        uint64_t value, unsigned size);
+uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
+                           unsigned size);
+
+void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon);
+Object *xive_tctx_create(Object *cpu, XivePresenter *xptr, Error **errp);
+void xive_tctx_reset(XiveTCTX *tctx);
+void xive_tctx_destroy(XiveTCTX *tctx);
+void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb);
+void xive_tctx_reset_os_signal(XiveTCTX *tctx);
+
+/*
+ * KVM XIVE device helpers
+ */
+
+int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
+void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
+int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
+int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
+int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp);
+int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp);
+
+#endif /* PPC_XIVE_H */
diff --git a/include/hw/ppc/xive2.h b/include/hw/ppc/xive2.h
new file mode 100644 (file)
index 0000000..ab68f8d
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * QEMU PowerPC XIVE2 interrupt controller model  (POWER10)
+ *
+ * Copyright (c) 2019-2022, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PPC_XIVE2_H
+#define PPC_XIVE2_H
+
+#include "hw/ppc/xive.h"
+#include "hw/ppc/xive2_regs.h"
+#include "hw/sysbus.h"
+
+/*
+ * XIVE2 Router (POWER10)
+ */
+typedef struct Xive2Router {
+    SysBusDevice    parent;
+
+    XiveFabric *xfb;
+} Xive2Router;
+
+#define TYPE_XIVE2_ROUTER "xive2-router"
+OBJECT_DECLARE_TYPE(Xive2Router, Xive2RouterClass, XIVE2_ROUTER);
+
+/*
+ * Configuration flags
+ */
+
+#define XIVE2_GEN1_TIMA_OS      0x00000001
+#define XIVE2_VP_SAVE_RESTORE   0x00000002
+#define XIVE2_THREADID_8BITS    0x00000004
+
+typedef struct Xive2RouterClass {
+    SysBusDeviceClass parent;
+
+    /* XIVE table accessors */
+    int (*get_eas)(Xive2Router *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+                   Xive2Eas *eas);
+    int (*get_pq)(Xive2Router *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+                  uint8_t *pq);
+    int (*set_pq)(Xive2Router *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+                  uint8_t *pq);
+    int (*get_end)(Xive2Router *xrtr, uint8_t end_blk, uint32_t end_idx,
+                   Xive2End *end);
+    int (*write_end)(Xive2Router *xrtr, uint8_t end_blk, uint32_t end_idx,
+                     Xive2End *end, uint8_t word_number);
+    int (*get_nvp)(Xive2Router *xrtr, uint8_t nvp_blk, uint32_t nvp_idx,
+                   Xive2Nvp *nvp);
+    int (*write_nvp)(Xive2Router *xrtr, uint8_t nvp_blk, uint32_t nvp_idx,
+                     Xive2Nvp *nvp, uint8_t word_number);
+    uint8_t (*get_block_id)(Xive2Router *xrtr);
+    uint32_t (*get_config)(Xive2Router *xrtr);
+} Xive2RouterClass;
+
+int xive2_router_get_eas(Xive2Router *xrtr, uint8_t eas_blk, uint32_t eas_idx,
+                        Xive2Eas *eas);
+int xive2_router_get_end(Xive2Router *xrtr, uint8_t end_blk, uint32_t end_idx,
+                        Xive2End *end);
+int xive2_router_write_end(Xive2Router *xrtr, uint8_t end_blk, uint32_t end_idx,
+                          Xive2End *end, uint8_t word_number);
+int xive2_router_get_nvp(Xive2Router *xrtr, uint8_t nvp_blk, uint32_t nvp_idx,
+                        Xive2Nvp *nvp);
+int xive2_router_write_nvp(Xive2Router *xrtr, uint8_t nvp_blk, uint32_t nvp_idx,
+                          Xive2Nvp *nvp, uint8_t word_number);
+uint32_t xive2_router_get_config(Xive2Router *xrtr);
+
+void xive2_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked);
+
+/*
+ * XIVE2 Presenter (POWER10)
+ */
+
+int xive2_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
+                               uint8_t format,
+                               uint8_t nvt_blk, uint32_t nvt_idx,
+                               bool cam_ignore, uint32_t logic_serv);
+
+/*
+ * XIVE2 END ESBs  (POWER10)
+ */
+
+#define TYPE_XIVE2_END_SOURCE "xive2-end-source"
+OBJECT_DECLARE_SIMPLE_TYPE(Xive2EndSource, XIVE2_END_SOURCE)
+
+typedef struct Xive2EndSource {
+    DeviceState parent;
+
+    uint32_t        nr_ends;
+
+    /* ESB memory region */
+    uint32_t        esb_shift;
+    MemoryRegion    esb_mmio;
+
+    Xive2Router     *xrtr;
+} Xive2EndSource;
+
+/*
+ * XIVE2 Thread Interrupt Management Area (POWER10)
+ */
+
+void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
+                           uint64_t value, unsigned size);
+uint64_t xive2_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
+                               hwaddr offset, unsigned size);
+
+#endif /* PPC_XIVE2_H */
diff --git a/include/hw/ppc/xive2_regs.h b/include/hw/ppc/xive2_regs.h
new file mode 100644 (file)
index 0000000..b7adbdb
--- /dev/null
@@ -0,0 +1,212 @@
+/*
+ * QEMU PowerPC XIVE2 internal structure definitions (POWER10)
+ *
+ * Copyright (c) 2019-2022, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_XIVE2_REGS_H
+#define PPC_XIVE2_REGS_H
+
+#include "cpu.h"
+
+/*
+ * Thread Interrupt Management Area (TIMA)
+ *
+ * In Gen1 mode (P9 compat mode) word 2 is the same. However in Gen2
+ * mode (P10), the CAM line is slightly different as the VP space was
+ * increased.
+ */
+#define   TM2_QW0W2_VU           PPC_BIT32(0)
+#define   TM2_QW0W2_LOGIC_SERV   PPC_BITMASK32(4, 31)
+#define   TM2_QW1W2_VO           PPC_BIT32(0)
+#define   TM2_QW1W2_HO           PPC_BIT32(1)
+#define   TM2_QW1W2_OS_CAM       PPC_BITMASK32(4, 31)
+#define   TM2_QW2W2_VP           PPC_BIT32(0)
+#define   TM2_QW2W2_HP           PPC_BIT32(1)
+#define   TM2_QW2W2_POOL_CAM     PPC_BITMASK32(4, 31)
+#define   TM2_QW3W2_VT           PPC_BIT32(0)
+#define   TM2_QW3W2_HT           PPC_BIT32(1)
+#define   TM2_QW3W2_LP           PPC_BIT32(6)
+#define   TM2_QW3W2_LE           PPC_BIT32(7)
+
+/*
+ * Event Assignment Structure (EAS)
+ */
+
+typedef struct Xive2Eas {
+        uint64_t       w;
+#define EAS2_VALID                 PPC_BIT(0)
+#define EAS2_END_BLOCK             PPC_BITMASK(4, 7) /* Destination EQ block# */
+#define EAS2_END_INDEX             PPC_BITMASK(8, 31) /* Destination EQ index */
+#define EAS2_MASKED                PPC_BIT(32) /* Masked                 */
+#define EAS2_END_DATA              PPC_BITMASK(33, 63) /* written to the EQ */
+} Xive2Eas;
+
+#define xive2_eas_is_valid(eas)   (be64_to_cpu((eas)->w) & EAS2_VALID)
+#define xive2_eas_is_masked(eas)  (be64_to_cpu((eas)->w) & EAS2_MASKED)
+
+void xive2_eas_pic_print_info(Xive2Eas *eas, uint32_t lisn, Monitor *mon);
+
+/*
+ * Event Notifification Descriptor (END)
+ */
+
+typedef struct Xive2End {
+        uint32_t       w0;
+#define END2_W0_VALID              PPC_BIT32(0) /* "v" bit */
+#define END2_W0_ENQUEUE            PPC_BIT32(5) /* "q" bit */
+#define END2_W0_UCOND_NOTIFY       PPC_BIT32(6) /* "n" bit */
+#define END2_W0_SILENT_ESCALATE    PPC_BIT32(7) /* "s" bit */
+#define END2_W0_BACKLOG            PPC_BIT32(8) /* "b" bit */
+#define END2_W0_PRECL_ESC_CTL      PPC_BIT32(9) /* "p" bit */
+#define END2_W0_UNCOND_ESCALATE    PPC_BIT32(10) /* "u" bit */
+#define END2_W0_ESCALATE_CTL       PPC_BIT32(11) /* "e" bit */
+#define END2_W0_ADAPTIVE_ESC       PPC_BIT32(12) /* "a" bit */
+#define END2_W0_ESCALATE_END       PPC_BIT32(13) /* "N" bit */
+#define END2_W0_FIRMWARE1          PPC_BIT32(16) /* Owned by FW */
+#define END2_W0_FIRMWARE2          PPC_BIT32(17) /* Owned by FW */
+#define END2_W0_AEC_SIZE           PPC_BITMASK32(18, 19)
+#define END2_W0_AEG_SIZE           PPC_BITMASK32(20, 23)
+#define END2_W0_EQ_VG_PREDICT      PPC_BITMASK32(24, 31) /* Owned by HW */
+        uint32_t       w1;
+#define END2_W1_ESn                PPC_BITMASK32(0, 1)
+#define END2_W1_ESn_P              PPC_BIT32(0)
+#define END2_W1_ESn_Q              PPC_BIT32(1)
+#define END2_W1_ESe                PPC_BITMASK32(2, 3)
+#define END2_W1_ESe_P              PPC_BIT32(2)
+#define END2_W1_ESe_Q              PPC_BIT32(3)
+#define END2_W1_GEN_FLIPPED        PPC_BIT32(8)
+#define END2_W1_GENERATION         PPC_BIT32(9)
+#define END2_W1_PAGE_OFF           PPC_BITMASK32(10, 31)
+        uint32_t       w2;
+#define END2_W2_RESERVED           PPC_BITMASK32(4, 7)
+#define END2_W2_EQ_ADDR_HI         PPC_BITMASK32(8, 31)
+        uint32_t       w3;
+#define END2_W3_EQ_ADDR_LO         PPC_BITMASK32(0, 24)
+#define END2_W3_QSIZE              PPC_BITMASK32(28, 31)
+        uint32_t       w4;
+#define END2_W4_END_BLOCK          PPC_BITMASK32(4, 7)
+#define END2_W4_ESC_END_INDEX      PPC_BITMASK32(8, 31)
+#define END2_W4_ESB_BLOCK          PPC_BITMASK32(0, 3)
+#define END2_W4_ESC_ESB_INDEX      PPC_BITMASK32(4, 31)
+        uint32_t       w5;
+#define END2_W5_ESC_END_DATA       PPC_BITMASK32(1, 31)
+        uint32_t       w6;
+#define END2_W6_FORMAT_BIT         PPC_BIT32(0)
+#define END2_W6_IGNORE             PPC_BIT32(1)
+#define END2_W6_VP_BLOCK           PPC_BITMASK32(4, 7)
+#define END2_W6_VP_OFFSET          PPC_BITMASK32(8, 31)
+#define END2_W6_VP_OFFSET_GEN1     PPC_BITMASK32(13, 31)
+        uint32_t       w7;
+#define END2_W7_TOPO               PPC_BITMASK32(0, 3) /* Owned by HW */
+#define END2_W7_F0_PRIORITY        PPC_BITMASK32(8, 15)
+#define END2_W7_F1_LOG_SERVER_ID   PPC_BITMASK32(4, 31)
+} Xive2End;
+
+#define xive2_end_is_valid(end)    (be32_to_cpu((end)->w0) & END2_W0_VALID)
+#define xive2_end_is_enqueue(end)  (be32_to_cpu((end)->w0) & END2_W0_ENQUEUE)
+#define xive2_end_is_notify(end)                \
+    (be32_to_cpu((end)->w0) & END2_W0_UCOND_NOTIFY)
+#define xive2_end_is_backlog(end)  (be32_to_cpu((end)->w0) & END2_W0_BACKLOG)
+#define xive2_end_is_escalate(end)                      \
+    (be32_to_cpu((end)->w0) & END2_W0_ESCALATE_CTL)
+#define xive2_end_is_uncond_escalation(end)              \
+    (be32_to_cpu((end)->w0) & END2_W0_UNCOND_ESCALATE)
+#define xive2_end_is_silent_escalation(end)              \
+    (be32_to_cpu((end)->w0) & END2_W0_SILENT_ESCALATE)
+#define xive2_end_is_escalate_end(end)              \
+    (be32_to_cpu((end)->w0) & END2_W0_ESCALATE_END)
+#define xive2_end_is_firmware1(end)              \
+    (be32_to_cpu((end)->w0) & END2_W0_FIRMWARE1)
+#define xive2_end_is_firmware2(end)              \
+    (be32_to_cpu((end)->w0) & END2_W0_FIRMWARE2)
+
+static inline uint64_t xive2_end_qaddr(Xive2End *end)
+{
+    return ((uint64_t) be32_to_cpu(end->w2) & END2_W2_EQ_ADDR_HI) << 32 |
+        (be32_to_cpu(end->w3) & END2_W3_EQ_ADDR_LO);
+}
+
+void xive2_end_pic_print_info(Xive2End *end, uint32_t end_idx, Monitor *mon);
+void xive2_end_queue_pic_print_info(Xive2End *end, uint32_t width,
+                                    Monitor *mon);
+void xive2_end_eas_pic_print_info(Xive2End *end, uint32_t end_idx,
+                                   Monitor *mon);
+
+/*
+ * Notification Virtual Processor (NVP)
+ */
+typedef struct Xive2Nvp {
+        uint32_t       w0;
+#define NVP2_W0_VALID              PPC_BIT32(0)
+#define NVP2_W0_HW                 PPC_BIT32(7)
+#define NVP2_W0_ESC_END            PPC_BIT32(25) /* 'N' bit 0:ESB  1:END */
+        uint32_t       w1;
+#define NVP2_W1_CO                 PPC_BIT32(13)
+#define NVP2_W1_CO_PRIV            PPC_BITMASK32(14, 15)
+#define NVP2_W1_CO_THRID_VALID     PPC_BIT32(16)
+#define NVP2_W1_CO_THRID           PPC_BITMASK32(17, 31)
+        uint32_t       w2;
+#define NVP2_W2_CPPR               PPC_BITMASK32(0, 7)
+#define NVP2_W2_IPB                PPC_BITMASK32(8, 15)
+#define NVP2_W2_LSMFB              PPC_BITMASK32(16, 23)
+        uint32_t       w3;
+        uint32_t       w4;
+#define NVP2_W4_ESC_ESB_BLOCK      PPC_BITMASK32(0, 3)  /* N:0 */
+#define NVP2_W4_ESC_ESB_INDEX      PPC_BITMASK32(4, 31) /* N:0 */
+#define NVP2_W4_ESC_END_BLOCK      PPC_BITMASK32(4, 7)  /* N:1 */
+#define NVP2_W4_ESC_END_INDEX      PPC_BITMASK32(8, 31) /* N:1 */
+        uint32_t       w5;
+#define NVP2_W5_PSIZE              PPC_BITMASK32(0, 1)
+#define NVP2_W5_VP_END_BLOCK       PPC_BITMASK32(4, 7)
+#define NVP2_W5_VP_END_INDEX       PPC_BITMASK32(8, 31)
+        uint32_t       w6;
+        uint32_t       w7;
+} Xive2Nvp;
+
+#define xive2_nvp_is_valid(nvp)    (be32_to_cpu((nvp)->w0) & NVP2_W0_VALID)
+#define xive2_nvp_is_hw(nvp)       (be32_to_cpu((nvp)->w0) & NVP2_W0_HW)
+#define xive2_nvp_is_co(nvp)       (be32_to_cpu((nvp)->w1) & NVP2_W1_CO)
+
+/*
+ * The VP number space in a block is defined by the END2_W6_VP_OFFSET
+ * field of the XIVE END. When running in Gen1 mode (P9 compat mode),
+ * the VP space is reduced to (1 << 19) VPs per block
+ */
+#define XIVE2_NVP_SHIFT              24
+#define XIVE2_NVP_COUNT              (1 << XIVE2_NVP_SHIFT)
+
+static inline uint32_t xive2_nvp_cam_line(uint8_t nvp_blk, uint32_t nvp_idx)
+{
+    return (nvp_blk << XIVE2_NVP_SHIFT) | nvp_idx;
+}
+
+static inline uint32_t xive2_nvp_idx(uint32_t cam_line)
+{
+    return cam_line & ((1 << XIVE2_NVP_SHIFT) - 1);
+}
+
+static inline uint32_t xive2_nvp_blk(uint32_t cam_line)
+{
+    return (cam_line >> XIVE2_NVP_SHIFT) & 0xf;
+}
+
+/*
+ * Notification Virtual Group or Crowd (NVG/NVC)
+ */
+typedef struct Xive2Nvgc {
+        uint32_t        w0;
+#define NVGC2_W0_VALID             PPC_BIT32(0)
+        uint32_t        w1;
+        uint32_t        w2;
+        uint32_t        w3;
+        uint32_t        w4;
+        uint32_t        w5;
+        uint32_t        w6;
+        uint32_t        w7;
+} Xive2Nvgc;
+
+#endif /* PPC_XIVE2_REGS_H */
diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
new file mode 100644 (file)
index 0000000..4a3c9ba
--- /dev/null
@@ -0,0 +1,317 @@
+/*
+ * QEMU PowerPC XIVE internal structure definitions
+ *
+ *
+ * The XIVE structures are accessed by the HW and their format is
+ * architected to be big-endian. Some macros are provided to ease
+ * access to the different fields.
+ *
+ *
+ * Copyright (c) 2016-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_XIVE_REGS_H
+#define PPC_XIVE_REGS_H
+
+#include "qemu/bswap.h"
+#include "qemu/host-utils.h"
+
+/*
+ * Interrupt source number encoding on PowerBUS
+ */
+/*
+ * Trigger data definition
+ *
+ * The trigger definition is used for triggers both for HW source
+ * interrupts (PHB, PSI), as well as for rerouting interrupts between
+ * Interrupt Controller.
+ *
+ * HW source controllers set bit0 of word0 to ‘0’ as they provide EAS
+ * information (EAS block + EAS index) in the 8 byte data and not END
+ * information, which is use for rerouting interrupts.
+ *
+ * bit1 of word0 to ‘1’ signals that the state bit check has been
+ * performed.
+ */
+#define XIVE_TRIGGER_END        PPC_BIT(0)
+#define XIVE_TRIGGER_PQ         PPC_BIT(1)
+
+/*
+ * QEMU macros to manipulate the trigger payload in native endian
+ */
+#define XIVE_EAS_BLOCK(n)       (((n) >> 28) & 0xf)
+#define XIVE_EAS_INDEX(n)       ((n) & 0x0fffffff)
+#define XIVE_EAS(blk, idx)      ((uint32_t)(blk) << 28 | (idx))
+
+#define TM_SHIFT                16
+
+/*
+ * TIMA addresses are 12-bits (4k page).
+ * The MSB indicates a special op with side effect, which can be
+ * refined with bit 10 (see below).
+ * The registers, logically grouped in 4 rings (a quad-word each), are
+ * defined on the 6 LSBs (offset below 0x40)
+ * In between, we can add a cache line index from 0...3 (ie, 0, 0x80,
+ * 0x100, 0x180) to select a specific snooper. Those 'snoop port
+ * address' bits should be dropped when processing the operations as
+ * they are all equivalent.
+ */
+#define TM_ADDRESS_MASK         0xC3F
+#define TM_SPECIAL_OP           0x800
+#define TM_RING_OFFSET          0x30
+#define TM_REG_OFFSET           0x3F
+
+/* TM register offsets */
+#define TM_QW0_USER             0x000 /* All rings */
+#define TM_QW1_OS               0x010 /* Ring 0..2 */
+#define TM_QW2_HV_POOL          0x020 /* Ring 0..1 */
+#define TM_QW3_HV_PHYS          0x030 /* Ring 0..1 */
+
+/* Byte offsets inside a QW             QW0 QW1 QW2 QW3 */
+#define TM_NSR                  0x0  /*  +   +   -   +  */
+#define TM_CPPR                 0x1  /*  -   +   -   +  */
+#define TM_IPB                  0x2  /*  -   +   +   +  */
+#define TM_LSMFB                0x3  /*  -   +   +   +  */
+#define TM_ACK_CNT              0x4  /*  -   +   -   -  */
+#define TM_INC                  0x5  /*  -   +   -   +  */
+#define TM_AGE                  0x6  /*  -   +   -   +  */
+#define TM_PIPR                 0x7  /*  -   +   -   +  */
+
+#define TM_WORD0                0x0
+#define TM_WORD1                0x4
+
+/*
+ * QW word 2 contains the valid bit at the top and other fields
+ * depending on the QW.
+ */
+#define   TM_WORD2              0x8
+#define   TM_QW0W2_VU           PPC_BIT32(0)
+#define   TM_QW0W2_LOGIC_SERV   PPC_BITMASK32(1, 31) /* XX 2,31 ? */
+#define   TM_QW1W2_VO           PPC_BIT32(0)
+#define   TM_QW1W2_OS_CAM       PPC_BITMASK32(8, 31)
+#define   TM_QW2W2_VP           PPC_BIT32(0)
+#define   TM_QW2W2_POOL_CAM     PPC_BITMASK32(8, 31)
+#define   TM_QW3W2_VT           PPC_BIT32(0)
+#define   TM_QW3W2_LP           PPC_BIT32(6)
+#define   TM_QW3W2_LE           PPC_BIT32(7)
+#define   TM_QW3W2_T            PPC_BIT32(31)
+
+/*
+ * In addition to normal loads to "peek" and writes (only when invalid)
+ * using 4 and 8 bytes accesses, the above registers support these
+ * "special" byte operations:
+ *
+ *   - Byte load from QW0[NSR] - User level NSR (EBB)
+ *   - Byte store to QW0[NSR] - User level NSR (EBB)
+ *   - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access
+ *   - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0
+ *                                    otherwise VT||0000000
+ *   - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present)
+ *
+ * Then we have all these "special" CI ops at these offset that trigger
+ * all sorts of side effects:
+ */
+#define TM_SPC_ACK_EBB          0x800   /* Load8 ack EBB to reg*/
+#define TM_SPC_ACK_OS_REG       0x810   /* Load16 ack OS irq to reg */
+#define TM_SPC_PUSH_USR_CTX     0x808   /* Store32 Push/Validate user context */
+#define TM_SPC_PULL_USR_CTX     0x808   /* Load32 Pull/Invalidate user
+                                         * context */
+#define TM_SPC_SET_OS_PENDING   0x812   /* Store8 Set OS irq pending bit */
+#define TM_SPC_PULL_OS_CTX      0x818   /* Load32/Load64 Pull/Invalidate OS
+                                         * context to reg */
+#define TM_SPC_PULL_POOL_CTX    0x828   /* Load32/Load64 Pull/Invalidate Pool
+                                         * context to reg*/
+#define TM_SPC_ACK_HV_REG       0x830   /* Load16 ack HV irq to reg */
+#define TM_SPC_PULL_USR_CTX_OL  0xc08   /* Store8 Pull/Inval usr ctx to odd
+                                         * line */
+#define TM_SPC_ACK_OS_EL        0xc10   /* Store8 ack OS irq to even line */
+#define TM_SPC_ACK_HV_POOL_EL   0xc20   /* Store8 ack HV evt pool to even
+                                         * line */
+#define TM_SPC_ACK_HV_EL        0xc30   /* Store8 ack HV irq to even line */
+/* XXX more... */
+
+/* NSR fields for the various QW ack types */
+#define TM_QW0_NSR_EB           PPC_BIT8(0)
+#define TM_QW1_NSR_EO           PPC_BIT8(0)
+#define TM_QW3_NSR_HE           PPC_BITMASK8(0, 1)
+#define  TM_QW3_NSR_HE_NONE     0
+#define  TM_QW3_NSR_HE_POOL     1
+#define  TM_QW3_NSR_HE_PHYS     2
+#define  TM_QW3_NSR_HE_LSI      3
+#define TM_QW3_NSR_I            PPC_BIT8(2)
+#define TM_QW3_NSR_GRP_LVL      PPC_BIT8(3, 7)
+
+/*
+ * EAS (Event Assignment Structure)
+ *
+ * One per interrupt source. Targets an interrupt to a given Event
+ * Notification Descriptor (END) and provides the corresponding
+ * logical interrupt number (END data)
+ */
+typedef struct XiveEAS {
+        /*
+         * Use a single 64-bit definition to make it easier to perform
+         * atomic updates
+         */
+        uint64_t        w;
+#define EAS_VALID       PPC_BIT(0)
+#define EAS_END_BLOCK   PPC_BITMASK(4, 7)        /* Destination END block# */
+#define EAS_END_INDEX   PPC_BITMASK(8, 31)       /* Destination END index */
+#define EAS_MASKED      PPC_BIT(32)              /* Masked */
+#define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
+} XiveEAS;
+
+#define xive_eas_is_valid(eas)   (be64_to_cpu((eas)->w) & EAS_VALID)
+#define xive_eas_is_masked(eas)  (be64_to_cpu((eas)->w) & EAS_MASKED)
+
+void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
+
+static inline uint64_t xive_get_field64(uint64_t mask, uint64_t word)
+{
+    return (be64_to_cpu(word) & mask) >> ctz64(mask);
+}
+
+static inline uint64_t xive_set_field64(uint64_t mask, uint64_t word,
+                                        uint64_t value)
+{
+    uint64_t tmp =
+        (be64_to_cpu(word) & ~mask) | ((value << ctz64(mask)) & mask);
+    return cpu_to_be64(tmp);
+}
+
+static inline uint32_t xive_get_field32(uint32_t mask, uint32_t word)
+{
+    return (be32_to_cpu(word) & mask) >> ctz32(mask);
+}
+
+static inline uint32_t xive_set_field32(uint32_t mask, uint32_t word,
+                                        uint32_t value)
+{
+    uint32_t tmp =
+        (be32_to_cpu(word) & ~mask) | ((value << ctz32(mask)) & mask);
+    return cpu_to_be32(tmp);
+}
+
+/* Event Notification Descriptor (END) */
+typedef struct XiveEND {
+        uint32_t        w0;
+#define END_W0_VALID             PPC_BIT32(0) /* "v" bit */
+#define END_W0_ENQUEUE           PPC_BIT32(1) /* "q" bit */
+#define END_W0_UCOND_NOTIFY      PPC_BIT32(2) /* "n" bit */
+#define END_W0_BACKLOG           PPC_BIT32(3) /* "b" bit */
+#define END_W0_PRECL_ESC_CTL     PPC_BIT32(4) /* "p" bit */
+#define END_W0_ESCALATE_CTL      PPC_BIT32(5) /* "e" bit */
+#define END_W0_UNCOND_ESCALATE   PPC_BIT32(6) /* "u" bit - DD2.0 */
+#define END_W0_SILENT_ESCALATE   PPC_BIT32(7) /* "s" bit - DD2.0 */
+#define END_W0_QSIZE             PPC_BITMASK32(12, 15)
+#define END_W0_SW0               PPC_BIT32(16)
+#define END_W0_FIRMWARE          END_W0_SW0 /* Owned by FW */
+#define END_QSIZE_4K             0
+#define END_QSIZE_64K            4
+#define END_W0_HWDEP             PPC_BITMASK32(24, 31)
+        uint32_t        w1;
+#define END_W1_ESn               PPC_BITMASK32(0, 1)
+#define END_W1_ESn_P             PPC_BIT32(0)
+#define END_W1_ESn_Q             PPC_BIT32(1)
+#define END_W1_ESe               PPC_BITMASK32(2, 3)
+#define END_W1_ESe_P             PPC_BIT32(2)
+#define END_W1_ESe_Q             PPC_BIT32(3)
+#define END_W1_GENERATION        PPC_BIT32(9)
+#define END_W1_PAGE_OFF          PPC_BITMASK32(10, 31)
+        uint32_t        w2;
+#define END_W2_MIGRATION_REG     PPC_BITMASK32(0, 3)
+#define END_W2_OP_DESC_HI        PPC_BITMASK32(4, 31)
+        uint32_t        w3;
+#define END_W3_OP_DESC_LO        PPC_BITMASK32(0, 31)
+        uint32_t        w4;
+#define END_W4_ESC_END_BLOCK     PPC_BITMASK32(4, 7)
+#define END_W4_ESC_END_INDEX     PPC_BITMASK32(8, 31)
+        uint32_t        w5;
+#define END_W5_ESC_END_DATA      PPC_BITMASK32(1, 31)
+        uint32_t        w6;
+#define END_W6_FORMAT_BIT        PPC_BIT32(8)
+#define END_W6_NVT_BLOCK         PPC_BITMASK32(9, 12)
+#define END_W6_NVT_INDEX         PPC_BITMASK32(13, 31)
+        uint32_t        w7;
+#define END_W7_F0_IGNORE         PPC_BIT32(0)
+#define END_W7_F0_BLK_GROUPING   PPC_BIT32(1)
+#define END_W7_F0_PRIORITY       PPC_BITMASK32(8, 15)
+#define END_W7_F1_WAKEZ          PPC_BIT32(0)
+#define END_W7_F1_LOG_SERVER_ID  PPC_BITMASK32(1, 31)
+} XiveEND;
+
+#define xive_end_is_valid(end)    (be32_to_cpu((end)->w0) & END_W0_VALID)
+#define xive_end_is_enqueue(end)  (be32_to_cpu((end)->w0) & END_W0_ENQUEUE)
+#define xive_end_is_notify(end)   (be32_to_cpu((end)->w0) & END_W0_UCOND_NOTIFY)
+#define xive_end_is_backlog(end)  (be32_to_cpu((end)->w0) & END_W0_BACKLOG)
+#define xive_end_is_escalate(end) (be32_to_cpu((end)->w0) & END_W0_ESCALATE_CTL)
+#define xive_end_is_uncond_escalation(end)              \
+    (be32_to_cpu((end)->w0) & END_W0_UNCOND_ESCALATE)
+#define xive_end_is_silent_escalation(end)              \
+    (be32_to_cpu((end)->w0) & END_W0_SILENT_ESCALATE)
+#define xive_end_is_firmware(end)              \
+    (be32_to_cpu((end)->w0) & END_W0_FIRMWARE)
+
+static inline uint64_t xive_end_qaddr(XiveEND *end)
+{
+    return ((uint64_t) be32_to_cpu(end->w2) & 0x0fffffff) << 32 |
+        be32_to_cpu(end->w3);
+}
+
+void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon);
+void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon);
+void xive_end_eas_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon);
+
+/* Notification Virtual Target (NVT) */
+typedef struct XiveNVT {
+        uint32_t        w0;
+#define NVT_W0_VALID             PPC_BIT32(0)
+        uint32_t        w1;
+#define NVT_W1_EQ_BLOCK          PPC_BITMASK32(0, 3)
+#define NVT_W1_EQ_INDEX          PPC_BITMASK32(4, 31)
+        uint32_t        w2;
+        uint32_t        w3;
+        uint32_t        w4;
+#define NVT_W4_IPB               PPC_BITMASK32(16, 23)
+        uint32_t        w5;
+        uint32_t        w6;
+        uint32_t        w7;
+        uint32_t        w8;
+#define NVT_W8_GRP_VALID         PPC_BIT32(0)
+        uint32_t        w9;
+        uint32_t        wa;
+        uint32_t        wb;
+        uint32_t        wc;
+        uint32_t        wd;
+        uint32_t        we;
+        uint32_t        wf;
+} XiveNVT;
+
+#define xive_nvt_is_valid(nvt)    (be32_to_cpu((nvt)->w0) & NVT_W0_VALID)
+
+/*
+ * The VP number space in a block is defined by the END_W6_NVT_INDEX
+ * field of the XIVE END
+ */
+#define XIVE_NVT_SHIFT                19
+#define XIVE_NVT_COUNT                (1 << XIVE_NVT_SHIFT)
+
+static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx)
+{
+    return (nvt_blk << XIVE_NVT_SHIFT) | nvt_idx;
+}
+
+static inline uint32_t xive_nvt_idx(uint32_t cam_line)
+{
+    return cam_line & ((1 << XIVE_NVT_SHIFT) - 1);
+}
+
+static inline uint32_t xive_nvt_blk(uint32_t cam_line)
+{
+    return (cam_line >> XIVE_NVT_SHIFT) & 0xf;
+}
+
+#endif /* PPC_XIVE_REGS_H */
index 412763fffb2ecf04df412002e56b811931ccf3bb..4dc02b0de472ed2a14a8e8fa03cb0ec69f1cf16d 100644 (file)
  * to stderr when the guest attempts to enable the timer.
  */
 
-/* The default ptimer policy retains backward compatibility with the legacy
- * timers. Custom policies are adjusting the default one. Consider providing
- * a correct policy for your timer.
+/*
+ * The 'legacy' ptimer policy retains backward compatibility with the
+ * traditional ptimer behaviour from before policy flags were introduced.
+ * It has several weird behaviours which don't match typical hardware
+ * timer behaviour. For a new device using ptimers, you should not
+ * use PTIMER_POLICY_LEGACY, but instead check the actual behaviour
+ * that you need and specify the right set of policy flags to get that.
+ *
+ * If you are overhauling an existing device that uses PTIMER_POLICY_LEGACY
+ * and are in a position to check or test the real hardware behaviour,
+ * consider updating it to specify the right policy flags.
  *
  * The rough edges of the default policy:
  *  - Starting to run with a period = 0 emits error message and stops the
@@ -54,7 +62,7 @@
  *    since the last period, effectively restarting the timer with a
  *    counter = counter value at the moment of change (.i.e. one less).
  */
-#define PTIMER_POLICY_DEFAULT               0
+#define PTIMER_POLICY_LEGACY                0
 
 /* Periodic timer counter stays with "0" for a one period before wrapping
  * around.  */
@@ -165,6 +173,28 @@ void ptimer_transaction_commit(ptimer_state *s);
  */
 void ptimer_set_period(ptimer_state *s, int64_t period);
 
+/**
+ * ptimer_set_period_from_clock - Set counter increment from a Clock
+ * @s: ptimer to configure
+ * @clk: pointer to Clock object to take period from
+ * @divisor: value to scale the clock frequency down by
+ *
+ * If the ptimer is being driven from a Clock, this is the preferred
+ * way to tell the ptimer about the period, because it avoids any
+ * possible rounding errors that might happen if the internal
+ * representation of the Clock period was converted to either a period
+ * in ns or a frequency in Hz.
+ *
+ * If the ptimer should run at the same frequency as the clock,
+ * pass 1 as the @divisor; if the ptimer should run at half the
+ * frequency, pass 2, and so on.
+ *
+ * This function will assert if it is called outside a
+ * ptimer_transaction_begin/commit block.
+ */
+void ptimer_set_period_from_clock(ptimer_state *s, const Clock *clock,
+                                  unsigned int divisor);
+
 /**
  * ptimer_set_freq - Set counter frequency in Hz
  * @s: ptimer to configure
index 64ca4d266f2205b1ff4091b1b5bceabb7595f381..ffa0f7ba09e08c22858736763986365f5ff34702 100644 (file)
@@ -22,6 +22,8 @@
  * @name: the name of the clock (can't be NULL).
  * @callback: optional callback to be called on update or NULL.
  * @opaque: argument for the callback
+ * @events: the events the callback should be called for
+ *          (logical OR of ClockEvent enum values)
  * @returns: a pointer to the newly added clock
  *
  * Add an input clock to device @dev as a clock named @name.
@@ -29,7 +31,8 @@
  * The callback will be called with @opaque as opaque parameter.
  */
 Clock *qdev_init_clock_in(DeviceState *dev, const char *name,
-                          ClockCallback *callback, void *opaque);
+                          ClockCallback *callback, void *opaque,
+                          unsigned int events);
 
 /**
  * qdev_init_clock_out:
@@ -105,6 +108,7 @@ void qdev_finalize_clocklist(DeviceState *dev);
  * @output: indicates whether the clock is input or output
  * @callback: for inputs, optional callback to be called on clock's update
  * with device as opaque
+ * @callback_events: mask of ClockEvent values for when callback is called
  * @offset: optional offset to store the ClockIn or ClockOut pointer in device
  * state structure (0 means unused)
  */
@@ -112,6 +116,7 @@ struct ClockPortInitElem {
     const char *name;
     bool is_output;
     ClockCallback *callback;
+    unsigned int callback_events;
     size_t offset;
 };
 
@@ -119,10 +124,11 @@ struct ClockPortInitElem {
     (offsetof(devstate, field) + \
      type_check(Clock *, typeof_field(devstate, field)))
 
-#define QDEV_CLOCK(out_not_in, devstate, field, cb) { \
+#define QDEV_CLOCK(out_not_in, devstate, field, cb, cbevents) {  \
     .name = (stringify(field)), \
     .is_output = out_not_in, \
     .callback = cb, \
+    .callback_events = cbevents, \
     .offset = clock_offset_value(devstate, field), \
 }
 
@@ -133,14 +139,15 @@ struct ClockPortInitElem {
  * @field: a field in @_devstate (must be Clock*)
  * @callback: (for input only) callback (or NULL) to be called with the device
  * state as argument
+ * @cbevents: (for input only) ClockEvent mask for when callback is called
  *
  * The name of the clock will be derived from @field
  */
-#define QDEV_CLOCK_IN(devstate, field, callback) \
-    QDEV_CLOCK(false, devstate, field, callback)
+#define QDEV_CLOCK_IN(devstate, field, callback, cbevents)       \
+    QDEV_CLOCK(false, devstate, field, callback, cbevents)
 
 #define QDEV_CLOCK_OUT(devstate, field) \
-    QDEV_CLOCK(true, devstate, field, NULL)
+    QDEV_CLOCK(true, devstate, field, NULL, 0)
 
 #define QDEV_CLOCK_END { .name = NULL }
 
index 5e737195b59f2dc8beeaa95b5a5623b5574f72a8..151d9682380d292357a5b28c77824e15050fcb55 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef QDEV_CORE_H
 #define QDEV_CORE_H
 
+#include "qemu/atomic.h"
 #include "qemu/queue.h"
 #include "qemu/bitmap.h"
 #include "qemu/rcu.h"
@@ -9,6 +10,65 @@
 #include "hw/hotplug.h"
 #include "hw/resettable.h"
 
+/**
+ * DOC: The QEMU Device API
+ *
+ * All modern devices should represented as a derived QOM class of
+ * TYPE_DEVICE. The device API introduces the additional methods of
+ * @realize and @unrealize to represent additional stages in a device
+ * objects life cycle.
+ *
+ * Realization
+ * -----------
+ *
+ * Devices are constructed in two stages:
+ *
+ * 1) object instantiation via object_initialize() and
+ * 2) device realization via the #DeviceState.realized property
+ *
+ * The former may not fail (and must not abort or exit, since it is called
+ * during device introspection already), and the latter may return error
+ * information to the caller and must be re-entrant.
+ * Trivial field initializations should go into #TypeInfo.instance_init.
+ * Operations depending on @props static properties should go into @realize.
+ * After successful realization, setting static properties will fail.
+ *
+ * As an interim step, the #DeviceState.realized property can also be
+ * set with qdev_realize(). In the future, devices will propagate this
+ * state change to their children and along busses they expose. The
+ * point in time will be deferred to machine creation, so that values
+ * set in @realize will not be introspectable beforehand. Therefore
+ * devices must not create children during @realize; they should
+ * initialize them via object_initialize() in their own
+ * #TypeInfo.instance_init and forward the realization events
+ * appropriately.
+ *
+ * Any type may override the @realize and/or @unrealize callbacks but needs
+ * to call the parent type's implementation if keeping their functionality
+ * is desired. Refer to QOM documentation for further discussion and examples.
+ *
+ * .. note::
+ *   Since TYPE_DEVICE doesn't implement @realize and @unrealize, types
+ *   derived directly from it need not call their parent's @realize and
+ *   @unrealize. For other types consult the documentation and
+ *   implementation of the respective parent types.
+ *
+ * Hiding a device
+ * ---------------
+ *
+ * To hide a device, a DeviceListener function hide_device() needs to
+ * be registered. It can be used to defer adding a device and
+ * therefore hide it from the guest. The handler registering to this
+ * DeviceListener can save the QOpts passed to it for re-using it
+ * later. It must return if it wants the device to be hidden or
+ * visible. When the handler function decides the device shall be
+ * visible it will be added with qdev_device_add() and realized as any
+ * other device. Otherwise qdev_device_add() will return early without
+ * adding the device. The guest will not see a "hidden" device until
+ * it was marked visible and qdev_device_add called again.
+ *
+ */
+
 enum {
     DEV_NVECTORS_UNSPECIFIED = -1,
 };
@@ -26,6 +86,7 @@ typedef enum DeviceCategory {
     DEVICE_CATEGORY_SOUND,
     DEVICE_CATEGORY_MISC,
     DEVICE_CATEGORY_CPU,
+    DEVICE_CATEGORY_WATCHDOG,
     DEVICE_CATEGORY_MAX
 } DeviceCategory;
 
@@ -36,7 +97,7 @@ typedef void (*BusRealize)(BusState *bus, Error **errp);
 typedef void (*BusUnrealize)(BusState *bus);
 
 /**
- * DeviceClass:
+ * struct DeviceClass - The base class for all devices.
  * @props: Properties accessing state fields.
  * @realize: Callback function invoked when the #DeviceState:realized
  * property is changed to %true.
@@ -45,71 +106,37 @@ typedef void (*BusUnrealize)(BusState *bus);
  * @hotpluggable: indicates if #DeviceClass is hotpluggable, available
  * as readonly "hotpluggable" property of #DeviceState instance
  *
- * # Realization #
- * Devices are constructed in two stages,
- * 1) object instantiation via object_initialize() and
- * 2) device realization via #DeviceState:realized property.
- * The former may not fail (and must not abort or exit, since it is called
- * during device introspection already), and the latter may return error
- * information to the caller and must be re-entrant.
- * Trivial field initializations should go into #TypeInfo.instance_init.
- * Operations depending on @props static properties should go into @realize.
- * After successful realization, setting static properties will fail.
- *
- * As an interim step, the #DeviceState:realized property can also be
- * set with qdev_realize().
- * In the future, devices will propagate this state change to their children
- * and along busses they expose.
- * The point in time will be deferred to machine creation, so that values
- * set in @realize will not be introspectable beforehand. Therefore devices
- * must not create children during @realize; they should initialize them via
- * object_initialize() in their own #TypeInfo.instance_init and forward the
- * realization events appropriately.
- *
- * Any type may override the @realize and/or @unrealize callbacks but needs
- * to call the parent type's implementation if keeping their functionality
- * is desired. Refer to QOM documentation for further discussion and examples.
- *
- * <note>
- *   <para>
- * Since TYPE_DEVICE doesn't implement @realize and @unrealize, types
- * derived directly from it need not call their parent's @realize and
- * @unrealize.
- * For other types consult the documentation and implementation of the
- * respective parent types.
- *   </para>
- * </note>
- *
- * # Hiding a device #
- * To hide a device, a DeviceListener function should_be_hidden() needs to
- * be registered.
- * It can be used to defer adding a device and therefore hide it from the
- * guest. The handler registering to this DeviceListener can save the QOpts
- * passed to it for re-using it later and must return that it wants the device
- * to be/remain hidden or not. When the handler function decides the device
- * shall not be hidden it will be added in qdev_device_add() and
- * realized as any other device. Otherwise qdev_device_add() will return early
- * without adding the device. The guest will not see a "hidden" device
- * until it was marked don't hide and qdev_device_add called again.
- *
  */
 struct DeviceClass {
-    /*< private >*/
+    /* private: */
     ObjectClass parent_class;
-    /*< public >*/
 
+    /* public: */
+
+    /**
+     * @categories: device categories device belongs to
+     */
     DECLARE_BITMAP(categories, DEVICE_CATEGORY_MAX);
+    /**
+     * @fw_name: name used to identify device to firmware interfaces
+     */
     const char *fw_name;
+    /**
+     * @desc: human readable description of device
+     */
     const char *desc;
 
-    /*
-     * The underscore at the end ensures a compile-time error if someone
-     * assigns to dc->props instead of using device_class_set_props.
+    /**
+     * @props_: properties associated with device, should only be
+     * assigned by using device_class_set_props(). The underscore
+     * ensures a compile-time error if someone attempts to assign
+     * dc->props directly.
      */
     Property *props_;
 
-    /*
-     * Can this device be instantiated with -device / device_add?
+    /**
+     * @user_creatable: Can user instantiate with -device / device_add?
+     *
      * All devices should support instantiation with device_add, and
      * this flag should not exist.  But we're not there, yet.  Some
      * devices fail to instantiate with cryptic error messages.
@@ -117,25 +144,35 @@ struct DeviceClass {
      * behavior would be cruel; clearing this flag will protect them.
      * It should never be cleared without a comment explaining why it
      * is cleared.
+     *
      * TODO remove once we're there
      */
     bool user_creatable;
     bool hotpluggable;
 
     /* callbacks */
-    /*
-     * Reset method here is deprecated and replaced by methods in the
-     * resettable class interface to implement a multi-phase reset.
+    /**
+     * @reset: deprecated device reset method pointer
+     *
+     * Modern code should use the ResettableClass interface to
+     * implement a multi-phase reset.
+     *
      * TODO: remove once every reset callback is unused
      */
     DeviceReset reset;
     DeviceRealize realize;
     DeviceUnrealize unrealize;
 
-    /* device state */
+    /**
+     * @vmsd: device state serialisation description for
+     * migration/save/restore
+     */
     const VMStateDescription *vmsd;
 
-    /* Private to qdev / bus.  */
+    /**
+     * @bus_type: bus type
+     * private: to qdev / bus.
+     */
     const char *bus_type;
 };
 
@@ -160,47 +197,117 @@ struct NamedClockList {
     QLIST_ENTRY(NamedClockList) node;
 };
 
+typedef struct {
+    bool engaged_in_io;
+} MemReentrancyGuard;
+
+
+typedef QLIST_HEAD(, NamedGPIOList) NamedGPIOListHead;
+typedef QLIST_HEAD(, NamedClockList) NamedClockListHead;
+typedef QLIST_HEAD(, BusState) BusStateHead;
+
 /**
- * DeviceState:
- * @realized: Indicates whether the device has been fully constructed.
- *            When accessed outside big qemu lock, must be accessed with
- *            qatomic_load_acquire()
- * @reset: ResettableState for the device; handled by Resettable interface.
+ * struct DeviceState - common device state, accessed with qdev helpers
  *
  * This structure should not be accessed directly.  We declare it here
  * so that it can be embedded in individual device state structures.
  */
 struct DeviceState {
-    /*< private >*/
+    /* private: */
     Object parent_obj;
-    /*< public >*/
+    /* public: */
 
-    const char *id;
+    /**
+     * @id: global device id
+     */
+    char *id;
+    /**
+     * @canonical_path: canonical path of realized device in the QOM tree
+     */
     char *canonical_path;
+    /**
+     * @realized: has device been realized?
+     */
     bool realized;
+    /**
+     * @pending_deleted_event: track pending deletion events during unplug
+     */
     bool pending_deleted_event;
-    QemuOpts *opts;
+    /**
+     * @pending_deleted_expires_ms: optional timeout for deletion events
+     */
+    int64_t pending_deleted_expires_ms;
+    /**
+     * @opts: QDict of options for the device
+     */
+    QDict *opts;
+    /**
+     * @hotplugged: was device added after PHASE_MACHINE_READY?
+     */
     int hotplugged;
+    /**
+     * @allow_unplug_during_migration: can device be unplugged during migration
+     */
     bool allow_unplug_during_migration;
+    /**
+     * @parent_bus: bus this device belongs to
+     */
     BusState *parent_bus;
-    QLIST_HEAD(, NamedGPIOList) gpios;
-    QLIST_HEAD(, NamedClockList) clocks;
-    QLIST_HEAD(, BusState) child_bus;
+    /**
+     * @gpios: QLIST of named GPIOs the device provides.
+     */
+    NamedGPIOListHead gpios;
+    /**
+     * @clocks: QLIST of named clocks the device provides.
+     */
+    NamedClockListHead clocks;
+    /**
+     * @child_bus: QLIST of child buses
+     */
+    BusStateHead child_bus;
+    /**
+     * @num_child_bus: number of @child_bus entries
+     */
     int num_child_bus;
+    /**
+     * @instance_id_alias: device alias for handling legacy migration setups
+     */
     int instance_id_alias;
+    /**
+     * @alias_required_for_version: indicates @instance_id_alias is
+     * needed for migration
+     */
     int alias_required_for_version;
+    /**
+     * @reset: ResettableState for the device; handled by Resettable interface.
+     */
     ResettableState reset;
+    /**
+     * @unplug_blockers: list of reasons to block unplugging of device
+     */
+    GSList *unplug_blockers;
+    /**
+     * @mem_reentrancy_guard: Is the device currently in mmio/pio/dma?
+     *
+     * Used to prevent re-entrancy confusing things.
+     */
+    MemReentrancyGuard mem_reentrancy_guard;
 };
 
 struct DeviceListener {
     void (*realize)(DeviceListener *listener, DeviceState *dev);
     void (*unrealize)(DeviceListener *listener, DeviceState *dev);
     /*
-     * This callback is called upon init of the DeviceState and allows to
-     * inform qdev that a device should be hidden, depending on the device
-     * opts, for example, to hide a standby device.
+     * This callback is called upon init of the DeviceState and
+     * informs qdev if a device should be visible or hidden.  We can
+     * hide a failover device depending for example on the device
+     * opts.
+     *
+     * On errors, it returns false and errp is set. Device creation
+     * should fail in this case.
      */
-    int (*should_be_hidden)(DeviceListener *listener, QemuOpts *device_opts);
+    bool (*hide_device)(DeviceListener *listener, const QDict *device_opts,
+                        bool from_json, Error **errp);
     QTAILQ_ENTRY(DeviceListener) link;
 };
 
@@ -250,69 +357,50 @@ typedef struct BusChild {
 
 #define QDEV_HOTPLUG_HANDLER_PROPERTY "hotplug-handler"
 
+typedef QTAILQ_HEAD(, BusChild) BusChildHead;
+typedef QLIST_ENTRY(BusState) BusStateEntry;
+
 /**
- * BusState:
+ * struct BusState:
+ * @obj: parent object
+ * @parent: parent Device
+ * @name: name of bus
  * @hotplug_handler: link to a hotplug handler associated with bus.
- * @reset: ResettableState for the bus; handled by Resettable interface.
+ * @max_index: max number of child buses
+ * @realized: is the bus itself realized?
+ * @full: is the bus full?
+ * @num_children: current number of child buses
  */
 struct BusState {
+    /* private: */
     Object obj;
+    /* public: */
     DeviceState *parent;
     char *name;
     HotplugHandler *hotplug_handler;
     int max_index;
     bool realized;
+    bool full;
     int num_children;
 
-    /*
-     * children is a RCU QTAILQ, thus readers must use RCU to access it,
-     * and writers must hold the big qemu lock
+    /**
+     * @children: an RCU protected QTAILQ, thus readers must use RCU
+     * to access it, and writers must hold the big qemu lock
+     */
+    BusChildHead children;
+    /**
+     * @sibling: next bus
+     */
+    BusStateEntry sibling;
+    /**
+     * @reset: ResettableState for the bus; handled by Resettable interface.
      */
-
-    QTAILQ_HEAD(, BusChild) children;
-    QLIST_ENTRY(BusState) sibling;
     ResettableState reset;
 };
 
 /**
- * Property:
- * @set_default: true if the default value should be set from @defval,
- *    in which case @info->set_default_value must not be NULL
- *    (if false then no default value is set by the property system
- *     and the field retains whatever value it was given by instance_init).
- * @defval: default value for the property. This is used only if @set_default
- *     is true.
- */
-struct Property {
-    const char   *name;
-    const PropertyInfo *info;
-    ptrdiff_t    offset;
-    uint8_t      bitnr;
-    bool         set_default;
-    union {
-        int64_t i;
-        uint64_t u;
-    } defval;
-    int          arrayoffset;
-    const PropertyInfo *arrayinfo;
-    int          arrayfieldsize;
-    const char   *link_type;
-};
-
-struct PropertyInfo {
-    const char *name;
-    const char *description;
-    const QEnumLookup *enum_table;
-    int (*print)(DeviceState *dev, Property *prop, char *dest, size_t len);
-    void (*set_default_value)(ObjectProperty *op, const Property *prop);
-    void (*create)(ObjectClass *oc, Property *prop);
-    ObjectPropertyAccessor *get;
-    ObjectPropertyAccessor *set;
-    ObjectPropertyRelease *release;
-};
-
-/**
- * GlobalProperty:
+ * typedef GlobalProperty - a global property type
+ *
  * @used: Set to true if property was used when initializing a device.
  * @optional: If set to true, GlobalProperty will be skipped without errors
  *            if the property doesn't exist.
@@ -346,17 +434,35 @@ compat_props_add(GPtrArray *arr,
  * This only allocates the memory and initializes the device state
  * structure, ready for the caller to set properties if they wish.
  * The device still needs to be realized.
- * The returned object has a reference count of 1.
+ *
+ * Return: a derived DeviceState object with a reference count of 1.
  */
 DeviceState *qdev_new(const char *name);
+
 /**
  * qdev_try_new: Try to create a device on the heap
  * @name: device type to create
  *
  * This is like qdev_new(), except it returns %NULL when type @name
  * does not exist, rather than asserting.
+ *
+ * Return: a derived DeviceState object with a reference count of 1 or
+ * NULL if type @name does not exist.
  */
 DeviceState *qdev_try_new(const char *name);
+
+/**
+ * qdev_is_realized() - check if device is realized
+ * @dev: The device to check.
+ *
+ * Context: May be called outside big qemu lock.
+ * Return: true if the device has been fully constructed, false otherwise.
+ */
+static inline bool qdev_is_realized(DeviceState *dev)
+{
+    return qatomic_load_acquire(&dev->realized);
+}
+
 /**
  * qdev_realize: Realize @dev.
  * @dev: device to realize
@@ -368,13 +474,14 @@ DeviceState *qdev_try_new(const char *name);
  * @dev must not be plugged into a bus already.
  * If @bus, plug @dev into @bus.  This takes a reference to @dev.
  * If @dev has no QOM parent, make one up, taking another reference.
- * On success, return true.
- * On failure, store an error through @errp and return false.
  *
  * If you created @dev using qdev_new(), you probably want to use
  * qdev_realize_and_unref() instead.
+ *
+ * Return: true on success, else false setting @errp with error
  */
 bool qdev_realize(DeviceState *dev, BusState *bus, Error **errp);
+
 /**
  * qdev_realize_and_unref: Realize @dev and drop a reference
  * @dev: device to realize
@@ -398,8 +505,11 @@ bool qdev_realize(DeviceState *dev, BusState *bus, Error **errp);
  * for the only reference to the child device to be held by the parent
  * via the child<> property, and so the reference-count-drop done here
  * would be incorrect. For that use case you want qdev_realize().
+ *
+ * Return: true on success, else false setting @errp with error
  */
 bool qdev_realize_and_unref(DeviceState *dev, BusState *bus, Error **errp);
+
 /**
  * qdev_unrealize: Unrealize a device
  * @dev: device to unrealize
@@ -409,7 +519,7 @@ bool qdev_realize_and_unref(DeviceState *dev, BusState *bus, Error **errp);
  *
  *  - unrealize any child buses by calling qbus_unrealize()
  *    (this will recursively unrealize any devices on those buses)
- *  - call the the unrealize method of @dev
+ *  - call the unrealize method of @dev
  *
  * The device can then be freed by causing its reference count to go
  * to zero.
@@ -425,16 +535,16 @@ void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
 HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev);
 HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev);
 bool qdev_hotplug_allowed(DeviceState *dev, Error **errp);
+
 /**
- * qdev_get_hotplug_handler: Get handler responsible for device wiring
- *
- * Find HOTPLUG_HANDLER for @dev that provides [pre|un]plug callbacks for it.
+ * qdev_get_hotplug_handler() - Get handler responsible for device wiring
+ * @dev: the device we want the HOTPLUG_HANDLER for.
  *
  * Note: in case @dev has a parent bus, it will be returned as handler unless
  * machine handler overrides it.
  *
- * Returns: pointer to object that implements TYPE_HOTPLUG_HANDLER interface
- *          or NULL if there aren't any.
+ * Return: pointer to object that implements TYPE_HOTPLUG_HANDLER interface
+ * or NULL if there aren't any.
  */
 HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev);
 void qdev_unplug(DeviceState *dev, Error **errp);
@@ -444,7 +554,35 @@ void qdev_machine_creation_done(void);
 bool qdev_machine_modified(void);
 
 /**
- * GpioPolarity: Polarity of a GPIO line
+ * qdev_add_unplug_blocker: Add an unplug blocker to a device
+ *
+ * @dev: Device to be blocked from unplug
+ * @reason: Reason for blocking
+ */
+void qdev_add_unplug_blocker(DeviceState *dev, Error *reason);
+
+/**
+ * qdev_del_unplug_blocker: Remove an unplug blocker from a device
+ *
+ * @dev: Device to be unblocked
+ * @reason: Pointer to the Error used with qdev_add_unplug_blocker.
+ *          Used as a handle to lookup the blocker for deletion.
+ */
+void qdev_del_unplug_blocker(DeviceState *dev, Error *reason);
+
+/**
+ * qdev_unplug_blocked: Confirm if a device is blocked from unplug
+ *
+ * @dev: Device to be tested
+ * @errp: The reasons why the device is blocked, if any
+ *
+ * Returns: true (also setting @errp) if device is blocked from unplug,
+ * false otherwise
+ */
+bool qdev_unplug_blocked(DeviceState *dev, Error **errp);
+
+/**
+ * typedef GpioPolarity - Polarity of a GPIO line
  *
  * GPIO lines use either positive (active-high) logic,
  * or negative (active-low) logic.
@@ -476,8 +614,11 @@ typedef enum {
  * connect another device's output GPIO line to this input.
  *
  * For named input GPIO lines, use qdev_get_gpio_in_named().
+ *
+ * Return: qemu_irq corresponding to anonymous input GPIO line
  */
 qemu_irq qdev_get_gpio_in(DeviceState *dev, int n);
+
 /**
  * qdev_get_gpio_in_named: Get one of a device's named input GPIO lines
  * @dev: Device whose GPIO we want
@@ -492,6 +633,8 @@ qemu_irq qdev_get_gpio_in(DeviceState *dev, int n);
  * array); this function will assert() if passed an invalid name or index.
  *
  * For anonymous input GPIO lines, use qdev_get_gpio_in().
+ *
+ * Return: qemu_irq corresponding to named input GPIO line
  */
 qemu_irq qdev_get_gpio_in_named(DeviceState *dev, const char *name, int n);
 
@@ -516,7 +659,7 @@ qemu_irq qdev_get_gpio_in_named(DeviceState *dev, const char *name, int n);
  * qemu_irqs at once, or to connect multiple outbound GPIOs to the
  * same qemu_irq. (Warning: there is no assertion or other guard to
  * catch this error: the model will just not do the right thing.)
- * Instead, for fan-out you can use the TYPE_IRQ_SPLIT device: connect
+ * Instead, for fan-out you can use the TYPE_SPLIT_IRQ device: connect
  * a device's outbound GPIO to the splitter's input, and connect each
  * of the splitter's outputs to a different device.  For fan-in you
  * can use the TYPE_OR_IRQ device, which is a model of a logical OR
@@ -525,12 +668,14 @@ qemu_irq qdev_get_gpio_in_named(DeviceState *dev, const char *name, int n);
  * For named output GPIO lines, use qdev_connect_gpio_out_named().
  */
 void qdev_connect_gpio_out(DeviceState *dev, int n, qemu_irq pin);
+
 /**
- * qdev_connect_gpio_out: Connect one of a device's anonymous output GPIO lines
+ * qdev_connect_gpio_out_named: Connect one of a device's named output
+ *                              GPIO lines
  * @dev: Device whose GPIO to connect
  * @name: Name of the output GPIO array
  * @n: Number of the anonymous output GPIO line (which must be in range)
- * @pin: qemu_irq to connect the output line to
+ * @input_pin: qemu_irq to connect the output line to
  *
  * This function connects an anonymous output GPIO line on a device
  * up to an arbitrary qemu_irq, so that when the device asserts that
@@ -548,10 +693,11 @@ void qdev_connect_gpio_out(DeviceState *dev, int n, qemu_irq pin);
  * qemu_irqs at once, or to connect multiple outbound GPIOs to the
  * same qemu_irq; see qdev_connect_gpio_out() for details.
  *
- * For named output GPIO lines, use qdev_connect_gpio_out_named().
+ * For anonymous output GPIO lines, use qdev_connect_gpio_out().
  */
 void qdev_connect_gpio_out_named(DeviceState *dev, const char *name, int n,
-                                 qemu_irq pin);
+                                 qemu_irq input_pin);
+
 /**
  * qdev_get_gpio_out_connector: Get the qemu_irq connected to an output GPIO
  * @dev: Device whose output GPIO we are interested in
@@ -567,8 +713,11 @@ void qdev_connect_gpio_out_named(DeviceState *dev, const char *name, int n,
  *
  * You probably don't need to use this function -- it is used only
  * by the platform-bus subsystem.
+ *
+ * Return: qemu_irq associated with GPIO or NULL if un-wired.
  */
 qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n);
+
 /**
  * qdev_intercept_gpio_out: Intercept an existing GPIO connection
  * @dev: Device to intercept the outbound GPIO line from
@@ -576,14 +725,17 @@ qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n);
  * @name: Name of the output GPIO array
  * @n: Number of the GPIO line in the array
  *
- * This function is provided only for use by the qtest testing framework
- * and is not suitable for use in non-testing parts of QEMU.
+ * .. note::
+ *   This function is provided only for use by the qtest testing framework
+ *   and is not suitable for use in non-testing parts of QEMU.
  *
  * This function breaks an existing connection of an outbound GPIO
  * line from @dev, and replaces it with the new qemu_irq @icpt, as if
  * ``qdev_connect_gpio_out_named(dev, icpt, name, n)`` had been called.
  * The previously connected qemu_irq is returned, so it can be restored
  * by a second call to qdev_intercept_gpio_out() if desired.
+ *
+ * Return: old disconnected qemu_irq if one existed
  */
 qemu_irq qdev_intercept_gpio_out(DeviceState *dev, qemu_irq icpt,
                                  const char *name, int n);
@@ -610,6 +762,7 @@ BusState *qdev_get_child_bus(DeviceState *dev, const char *name);
  * hold of an input GPIO line to manipulate it.
  */
 void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n);
+
 /**
  * qdev_init_gpio_out: create an array of anonymous output GPIO lines
  * @dev: Device to create output GPIOs for
@@ -632,10 +785,15 @@ void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n);
  *
  * See qdev_connect_gpio_out() for how code that uses such a device
  * can connect to one of its output GPIO lines.
+ *
+ * There is no need to release the @pins allocated array because it
+ * will be automatically released when @dev calls its instance_finalize()
+ * handler.
  */
 void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n);
+
 /**
- * qdev_init_gpio_out: create an array of named output GPIO lines
+ * qdev_init_gpio_out_named: create an array of named output GPIO lines
  * @dev: Device to create output GPIOs for
  * @pins: Pointer to qemu_irq or qemu_irq array for the GPIO lines
  * @name: Name to give this array of GPIO lines
@@ -647,10 +805,9 @@ void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n);
  */
 void qdev_init_gpio_out_named(DeviceState *dev, qemu_irq *pins,
                               const char *name, int n);
+
 /**
- * qdev_init_gpio_in_named_with_opaque: create an array of input GPIO lines
- *   for the specified device
- *
+ * qdev_init_gpio_in_named_with_opaque() - create an array of input GPIO lines
  * @dev: Device to create input GPIOs for
  * @handler: Function to call when GPIO line value is set
  * @opaque: Opaque data pointer to pass to @handler
@@ -663,8 +820,11 @@ void qdev_init_gpio_in_named_with_opaque(DeviceState *dev,
                                          const char *name, int n);
 
 /**
- * qdev_init_gpio_in_named: create an array of input GPIO lines
- *   for the specified device
+ * qdev_init_gpio_in_named() - create an array of input GPIO lines
+ * @dev: device to add array to
+ * @handler: a &typedef qemu_irq_handler function to call when GPIO is set
+ * @name: Name of the GPIO input (must be unique for this device)
+ * @n: Number of GPIO lines in this input set
  *
  * Like qdev_init_gpio_in_named_with_opaque(), but the opaque pointer
  * passed to the handler is @dev (which is the most commonly desired behaviour).
@@ -698,7 +858,7 @@ static inline void qdev_init_gpio_in_named(DeviceState *dev,
 void qdev_pass_gpios(DeviceState *dev, DeviceState *container,
                      const char *name);
 
-BusState *qdev_get_parent_bus(DeviceState *dev);
+BusState *qdev_get_parent_bus(const DeviceState *dev);
 
 /*** BUS API. ***/
 
@@ -708,9 +868,9 @@ DeviceState *qdev_find_recursive(BusState *bus, const char *id);
 typedef int (qbus_walkerfn)(BusState *bus, void *opaque);
 typedef int (qdev_walkerfn)(DeviceState *dev, void *opaque);
 
-void qbus_create_inplace(void *bus, size_t size, const char *typename,
-                         DeviceState *parent, const char *name);
-BusState *qbus_create(const char *typename, DeviceState *parent, const char *name);
+void qbus_init(void *bus, size_t size, const char *typename,
+               DeviceState *parent, const char *name);
+BusState *qbus_new(const char *typename, DeviceState *parent, const char *name);
 bool qbus_realize(BusState *bus, Error **errp);
 void qbus_unrealize(BusState *bus);
 
@@ -727,40 +887,17 @@ int qdev_walk_children(DeviceState *dev,
                        void *opaque);
 
 /**
- * @qdev_reset_all:
- * Reset @dev. See @qbus_reset_all() for more details.
+ * device_cold_reset() - perform a recursive cold reset on a device
+ * @dev: device to reset.
  *
- * Note: This function is deprecated and will be removed when it becomes unused.
- * Please use device_cold_reset() now.
- */
-void qdev_reset_all(DeviceState *dev);
-void qdev_reset_all_fn(void *opaque);
-
-/**
- * @qbus_reset_all:
- * @bus: Bus to be reset.
- *
- * Reset @bus and perform a bus-level ("hard") reset of all devices connected
- * to it, including recursive processing of all buses below @bus itself.  A
- * hard reset means that qbus_reset_all will reset all state of the device.
- * For PCI devices, for example, this will include the base address registers
- * or configuration space.
- *
- * Note: This function is deprecated and will be removed when it becomes unused.
- * Please use bus_cold_reset() now.
- */
-void qbus_reset_all(BusState *bus);
-void qbus_reset_all_fn(void *opaque);
-
-/**
- * device_cold_reset:
  * Reset device @dev and perform a recursive processing using the resettable
  * interface. It triggers a RESET_TYPE_COLD.
  */
 void device_cold_reset(DeviceState *dev);
 
 /**
- * bus_cold_reset:
+ * bus_cold_reset() - perform a recursive cold reset on a bus
+ * @bus: bus to reset
  *
  * Reset bus @bus and perform a recursive processing using the resettable
  * interface. It triggers a RESET_TYPE_COLD.
@@ -768,14 +905,18 @@ void device_cold_reset(DeviceState *dev);
 void bus_cold_reset(BusState *bus);
 
 /**
- * device_is_in_reset:
- * Return true if the device @dev is currently being reset.
+ * device_is_in_reset() - check device reset state
+ * @dev: device to check
+ *
+ * Return: true if the device @dev is currently being reset.
  */
 bool device_is_in_reset(DeviceState *dev);
 
 /**
- * bus_is_in_reset:
- * Return true if the bus @bus is currently being reset.
+ * bus_is_in_reset() - check bus reset state
+ * @bus: bus to check
+ *
+ * Return: true if the bus @bus is currently being reset.
  */
 bool bus_is_in_reset(BusState *bus);
 
@@ -786,35 +927,61 @@ char *qdev_get_fw_dev_path(DeviceState *dev);
 char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev);
 
 /**
- * @qdev_machine_init
+ * device_class_set_props(): add a set of properties to an device
+ * @dc: the parent DeviceClass all devices inherit
+ * @props: an array of properties, terminate by DEFINE_PROP_END_OF_LIST()
  *
- * Initialize platform devices before machine init.  This is a hack until full
- * support for composition is added.
+ * This will add a set of properties to the object. It will fault if
+ * you attempt to add an existing property defined by a parent class.
+ * To modify an inherited property you need to use????
  */
-void qdev_machine_init(void);
-
-/**
- * device_legacy_reset:
- *
- * Reset a single device (by calling the reset method).
- * Note: This function is deprecated and will be removed when it becomes unused.
- * Please use device_cold_reset() now.
- */
-void device_legacy_reset(DeviceState *dev);
-
 void device_class_set_props(DeviceClass *dc, Property *props);
 
 /**
- * device_class_set_parent_reset:
+ * device_class_set_parent_reset() - legacy set device reset handlers
+ * @dc: device class
+ * @dev_reset: function pointer to reset handler
+ * @parent_reset: function pointer to parents reset handler
+ *
+ * Modern code should use the ResettableClass interface to
+ * implement a multi-phase reset instead.
+ *
  * TODO: remove the function when DeviceClass's reset method
  * is not used anymore.
  */
 void device_class_set_parent_reset(DeviceClass *dc,
                                    DeviceReset dev_reset,
                                    DeviceReset *parent_reset);
+
+/**
+ * device_class_set_parent_realize() - set up for chaining realize fns
+ * @dc: The device class
+ * @dev_realize: the device realize function
+ * @parent_realize: somewhere to save the parents realize function
+ *
+ * This is intended to be used when the new realize function will
+ * eventually call its parent realization function during creation.
+ * This requires storing the function call somewhere (usually in the
+ * instance structure) so you can eventually call
+ * dc->parent_realize(dev, errp)
+ */
 void device_class_set_parent_realize(DeviceClass *dc,
                                      DeviceRealize dev_realize,
                                      DeviceRealize *parent_realize);
+
+
+/**
+ * device_class_set_parent_unrealize() - set up for chaining unrealize fns
+ * @dc: The device class
+ * @dev_unrealize: the device realize function
+ * @parent_unrealize: somewhere to save the parents unrealize function
+ *
+ * This is intended to be used when the new unrealize function will
+ * eventually call its parent unrealization function during the
+ * unrealize phase. This requires storing the function call somewhere
+ * (usually in the instance structure) so you can eventually call
+ * dc->parent_unrealize(dev);
+ */
 void device_class_set_parent_unrealize(DeviceClass *dc,
                                        DeviceUnrealize dev_unrealize,
                                        DeviceUnrealize *parent_unrealize);
@@ -823,12 +990,12 @@ const VMStateDescription *qdev_get_vmsd(DeviceState *dev);
 
 const char *qdev_fw_name(DeviceState *dev);
 
+void qdev_assert_realized_properly(void);
 Object *qdev_get_machine(void);
 
 /* FIXME: make this a link<> */
 bool qdev_set_parent_bus(DeviceState *dev, BusState *bus, Error **errp);
 
-extern bool qdev_hotplug;
 extern bool qdev_hot_removed;
 
 char *qdev_get_dev_path(DeviceState *dev);
@@ -838,20 +1005,88 @@ void qbus_set_bus_hotplug_handler(BusState *bus);
 
 static inline bool qbus_is_hotpluggable(BusState *bus)
 {
-   return bus->hotplug_handler;
+    HotplugHandler *plug_handler = bus->hotplug_handler;
+    bool ret = !!plug_handler;
+
+    if (plug_handler) {
+        HotplugHandlerClass *hdc;
+
+        hdc = HOTPLUG_HANDLER_GET_CLASS(plug_handler);
+        if (hdc->is_hotpluggable_bus) {
+            ret = hdc->is_hotpluggable_bus(plug_handler, bus);
+        }
+    }
+    return ret;
+}
+
+/**
+ * qbus_mark_full: Mark this bus as full, so no more devices can be attached
+ * @bus: Bus to mark as full
+ *
+ * By default, QEMU will allow devices to be plugged into a bus up
+ * to the bus class's device count limit. Calling this function
+ * marks a particular bus as full, so that no more devices can be
+ * plugged into it. In particular this means that the bus will not
+ * be considered as a candidate for plugging in devices created by
+ * the user on the commandline or via the monitor.
+ * If a machine has multiple buses of a given type, such as I2C,
+ * where some of those buses in the real hardware are used only for
+ * internal devices and some are exposed via expansion ports, you
+ * can use this function to mark the internal-only buses as full
+ * after you have created all their internal devices. Then user
+ * created devices will appear on the expansion-port bus where
+ * guest software expects them.
+ */
+static inline void qbus_mark_full(BusState *bus)
+{
+    bus->full = true;
 }
 
 void device_listener_register(DeviceListener *listener);
 void device_listener_unregister(DeviceListener *listener);
 
 /**
- * @qdev_should_hide_device:
- * @opts: QemuOpts as passed on cmdline.
+ * qdev_should_hide_device() - check if device should be hidden
+ *
+ * @opts: options QDict
+ * @from_json: true if @opts entries are typed, false for all strings
+ * @errp: pointer to error object
  *
- * Check if a device should be added.
- * When a device is added via qdev_device_add() this will be called,
- * and return if the device should be added now or not.
+ * When a device is added via qdev_device_add() this will be called.
+ *
+ * Return: if the device should be added now or not.
  */
-bool qdev_should_hide_device(QemuOpts *opts);
+bool qdev_should_hide_device(const QDict *opts, bool from_json, Error **errp);
+
+typedef enum MachineInitPhase {
+    /* current_machine is NULL.  */
+    PHASE_NO_MACHINE,
+
+    /* current_machine is not NULL, but current_machine->accel is NULL.  */
+    PHASE_MACHINE_CREATED,
+
+    /*
+     * current_machine->accel is not NULL, but the machine properties have
+     * not been validated and machine_class->init has not yet been called.
+     */
+    PHASE_ACCEL_CREATED,
+
+    /*
+     * machine_class->init has been called, thus creating any embedded
+     * devices and validating machine properties.  Devices created at
+     * this time are considered to be cold-plugged.
+     */
+    PHASE_MACHINE_INITIALIZED,
+
+    /*
+     * QEMU is ready to start CPUs and devices created at this time
+     * are considered to be hot-plugged.  The monitor is not restricted
+     * to "preconfig" commands.
+     */
+    PHASE_MACHINE_READY,
+} MachineInitPhase;
+
+bool phase_check(MachineInitPhase phase);
+void phase_advance(MachineInitPhase phase);
 
 #endif
diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h
new file mode 100644 (file)
index 0000000..91f7a24
--- /dev/null
@@ -0,0 +1,85 @@
+#ifndef HW_QDEV_PROPERTIES_SYSTEM_H
+#define HW_QDEV_PROPERTIES_SYSTEM_H
+
+#include "hw/qdev-properties.h"
+
+extern const PropertyInfo qdev_prop_chr;
+extern const PropertyInfo qdev_prop_macaddr;
+extern const PropertyInfo qdev_prop_reserved_region;
+extern const PropertyInfo qdev_prop_multifd_compression;
+extern const PropertyInfo qdev_prop_mig_mode;
+extern const PropertyInfo qdev_prop_losttickpolicy;
+extern const PropertyInfo qdev_prop_blockdev_on_error;
+extern const PropertyInfo qdev_prop_bios_chs_trans;
+extern const PropertyInfo qdev_prop_fdc_drive_type;
+extern const PropertyInfo qdev_prop_drive;
+extern const PropertyInfo qdev_prop_drive_iothread;
+extern const PropertyInfo qdev_prop_netdev;
+extern const PropertyInfo qdev_prop_pci_devfn;
+extern const PropertyInfo qdev_prop_blocksize;
+extern const PropertyInfo qdev_prop_pci_host_devaddr;
+extern const PropertyInfo qdev_prop_uuid;
+extern const PropertyInfo qdev_prop_audiodev;
+extern const PropertyInfo qdev_prop_off_auto_pcibar;
+extern const PropertyInfo qdev_prop_pcie_link_speed;
+extern const PropertyInfo qdev_prop_pcie_link_width;
+extern const PropertyInfo qdev_prop_cpus390entitlement;
+
+#define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d)                   \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t)
+
+#define DEFINE_PROP_CHR(_n, _s, _f)             \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_chr, CharBackend)
+#define DEFINE_PROP_NETDEV(_n, _s, _f)             \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_netdev, NICPeers)
+#define DEFINE_PROP_DRIVE(_n, _s, _f) \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockBackend *)
+#define DEFINE_PROP_DRIVE_IOTHREAD(_n, _s, _f) \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_drive_iothread, BlockBackend *)
+#define DEFINE_PROP_MACADDR(_n, _s, _f)         \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr)
+#define DEFINE_PROP_RESERVED_REGION(_n, _s, _f)         \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_reserved_region, ReservedRegion)
+#define DEFINE_PROP_MULTIFD_COMPRESSION(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_multifd_compression, \
+                       MultiFDCompression)
+#define DEFINE_PROP_MIG_MODE(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_mig_mode, \
+                       MigMode)
+#define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
+                        LostTickPolicy)
+#define DEFINE_PROP_BLOCKDEV_ON_ERROR(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_blockdev_on_error, \
+                        BlockdevOnError)
+#define DEFINE_PROP_BIOS_CHS_TRANS(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_bios_chs_trans, int)
+#define DEFINE_PROP_BLOCKSIZE(_n, _s, _f) \
+    DEFINE_PROP_UNSIGNED(_n, _s, _f, 0, qdev_prop_blocksize, uint32_t)
+#define DEFINE_PROP_PCI_HOST_DEVADDR(_n, _s, _f) \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_pci_host_devaddr, PCIHostDeviceAddress)
+#define DEFINE_PROP_OFF_AUTO_PCIBAR(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_off_auto_pcibar, \
+                        OffAutoPCIBAR)
+#define DEFINE_PROP_PCIE_LINK_SPEED(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_speed, \
+                        PCIExpLinkSpeed)
+#define DEFINE_PROP_PCIE_LINK_WIDTH(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_width, \
+                        PCIExpLinkWidth)
+
+#define DEFINE_PROP_UUID(_name, _state, _field) \
+    DEFINE_PROP(_name, _state, _field, qdev_prop_uuid, QemuUUID, \
+                .set_default = true)
+
+#define DEFINE_PROP_AUDIODEV(_n, _s, _f) \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_audiodev, QEMUSoundCard)
+
+#define DEFINE_PROP_UUID_NODEFAULT(_name, _state, _field) \
+    DEFINE_PROP(_name, _state, _field, qdev_prop_uuid, QemuUUID)
+
+#define DEFINE_PROP_CPUS390ENTITLEMENT(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_cpus390entitlement, \
+                       CpuS390Entitlement)
+
+#endif
index 44374500659d4964368d7a7be6153ca89cc6be5b..25743a29a000b15d7a15b15e6d83fa3a279d1edd 100644 (file)
@@ -3,6 +3,47 @@
 
 #include "hw/qdev-core.h"
 
+/**
+ * Property:
+ * @set_default: true if the default value should be set from @defval,
+ *    in which case @info->set_default_value must not be NULL
+ *    (if false then no default value is set by the property system
+ *     and the field retains whatever value it was given by instance_init).
+ * @defval: default value for the property. This is used only if @set_default
+ *     is true.
+ */
+struct Property {
+    const char   *name;
+    const PropertyInfo *info;
+    ptrdiff_t    offset;
+    uint8_t      bitnr;
+    uint64_t     bitmask;
+    bool         set_default;
+    union {
+        int64_t i;
+        uint64_t u;
+    } defval;
+    int          arrayoffset;
+    const PropertyInfo *arrayinfo;
+    int          arrayfieldsize;
+    const char   *link_type;
+};
+
+struct PropertyInfo {
+    const char *name;
+    const char *description;
+    const QEnumLookup *enum_table;
+    bool realized_set_allowed; /* allow setting property on realized device */
+    int (*print)(Object *obj, Property *prop, char *dest, size_t len);
+    void (*set_default_value)(ObjectProperty *op, const Property *prop);
+    ObjectProperty *(*create)(ObjectClass *oc, const char *name,
+                              Property *prop);
+    ObjectPropertyAccessor *get;
+    ObjectPropertyAccessor *set;
+    ObjectPropertyRelease *release;
+};
+
+
 /*** qdev-properties.c ***/
 
 extern const PropertyInfo qdev_prop_bit;
@@ -14,103 +55,65 @@ extern const PropertyInfo qdev_prop_uint16;
 extern const PropertyInfo qdev_prop_uint32;
 extern const PropertyInfo qdev_prop_int32;
 extern const PropertyInfo qdev_prop_uint64;
+extern const PropertyInfo qdev_prop_uint64_checkmask;
 extern const PropertyInfo qdev_prop_int64;
 extern const PropertyInfo qdev_prop_size;
 extern const PropertyInfo qdev_prop_string;
-extern const PropertyInfo qdev_prop_chr;
-extern const PropertyInfo qdev_prop_tpm;
-extern const PropertyInfo qdev_prop_macaddr;
-extern const PropertyInfo qdev_prop_reserved_region;
 extern const PropertyInfo qdev_prop_on_off_auto;
-extern const PropertyInfo qdev_prop_multifd_compression;
-extern const PropertyInfo qdev_prop_losttickpolicy;
-extern const PropertyInfo qdev_prop_blockdev_on_error;
-extern const PropertyInfo qdev_prop_bios_chs_trans;
-extern const PropertyInfo qdev_prop_fdc_drive_type;
-extern const PropertyInfo qdev_prop_drive;
-extern const PropertyInfo qdev_prop_drive_iothread;
-extern const PropertyInfo qdev_prop_netdev;
-extern const PropertyInfo qdev_prop_pci_devfn;
 extern const PropertyInfo qdev_prop_size32;
-extern const PropertyInfo qdev_prop_blocksize;
-extern const PropertyInfo qdev_prop_pci_host_devaddr;
-extern const PropertyInfo qdev_prop_uuid;
-extern const PropertyInfo qdev_prop_arraylen;
-extern const PropertyInfo qdev_prop_audiodev;
+extern const PropertyInfo qdev_prop_array;
 extern const PropertyInfo qdev_prop_link;
-extern const PropertyInfo qdev_prop_off_auto_pcibar;
-extern const PropertyInfo qdev_prop_pcie_link_speed;
-extern const PropertyInfo qdev_prop_pcie_link_width;
 
-#define DEFINE_PROP(_name, _state, _field, _prop, _type) { \
+#define DEFINE_PROP(_name, _state, _field, _prop, _type, ...) {  \
         .name      = (_name),                                    \
         .info      = &(_prop),                                   \
         .offset    = offsetof(_state, _field)                    \
             + type_check(_type, typeof_field(_state, _field)),   \
+        __VA_ARGS__                                              \
         }
 
-#define DEFINE_PROP_SIGNED(_name, _state, _field, _defval, _prop, _type) { \
-        .name      = (_name),                                           \
-        .info      = &(_prop),                                          \
-        .offset    = offsetof(_state, _field)                           \
-            + type_check(_type,typeof_field(_state, _field)),           \
-        .set_default = true,                                            \
-        .defval.i  = (_type)_defval,                                    \
-        }
+#define DEFINE_PROP_SIGNED(_name, _state, _field, _defval, _prop, _type) \
+    DEFINE_PROP(_name, _state, _field, _prop, _type,                     \
+                .set_default = true,                                     \
+                .defval.i    = (_type)_defval)
 
-#define DEFINE_PROP_SIGNED_NODEFAULT(_name, _state, _field, _prop, _type) { \
-        .name      = (_name),                                           \
-        .info      = &(_prop),                                          \
-        .offset    = offsetof(_state, _field)                           \
-            + type_check(_type, typeof_field(_state, _field)),          \
-        }
+#define DEFINE_PROP_SIGNED_NODEFAULT(_name, _state, _field, _prop, _type) \
+    DEFINE_PROP(_name, _state, _field, _prop, _type)
 
-#define DEFINE_PROP_BIT(_name, _state, _field, _bit, _defval) {  \
-        .name      = (_name),                                    \
-        .info      = &(qdev_prop_bit),                           \
-        .bitnr    = (_bit),                                      \
-        .offset    = offsetof(_state, _field)                    \
-            + type_check(uint32_t,typeof_field(_state, _field)), \
-        .set_default = true,                                     \
-        .defval.u  = (bool)_defval,                              \
-        }
+#define DEFINE_PROP_BIT(_name, _state, _field, _bit, _defval)   \
+    DEFINE_PROP(_name, _state, _field, qdev_prop_bit, uint32_t, \
+                .bitnr       = (_bit),                          \
+                .set_default = true,                            \
+                .defval.u    = (bool)_defval)
 
-#define DEFINE_PROP_UNSIGNED(_name, _state, _field, _defval, _prop, _type) { \
-        .name      = (_name),                                           \
-        .info      = &(_prop),                                          \
-        .offset    = offsetof(_state, _field)                           \
-            + type_check(_type, typeof_field(_state, _field)),          \
-        .set_default = true,                                            \
-        .defval.u  = (_type)_defval,                                    \
-        }
+#define DEFINE_PROP_UNSIGNED(_name, _state, _field, _defval, _prop, _type) \
+    DEFINE_PROP(_name, _state, _field, _prop, _type,                       \
+                .set_default = true,                                       \
+                .defval.u  = (_type)_defval)
 
-#define DEFINE_PROP_UNSIGNED_NODEFAULT(_name, _state, _field, _prop, _type) { \
-        .name      = (_name),                                           \
-        .info      = &(_prop),                                          \
-        .offset    = offsetof(_state, _field)                           \
-            + type_check(_type, typeof_field(_state, _field)),          \
-        }
+#define DEFINE_PROP_UNSIGNED_NODEFAULT(_name, _state, _field, _prop, _type) \
+    DEFINE_PROP(_name, _state, _field, _prop, _type)
 
-#define DEFINE_PROP_BIT64(_name, _state, _field, _bit, _defval) {       \
-        .name      = (_name),                                           \
-        .info      = &(qdev_prop_bit64),                                \
-        .bitnr    = (_bit),                                             \
-        .offset    = offsetof(_state, _field)                           \
-            + type_check(uint64_t, typeof_field(_state, _field)),       \
-        .set_default = true,                                            \
-        .defval.u  = (bool)_defval,                                     \
-        }
+#define DEFINE_PROP_BIT64(_name, _state, _field, _bit, _defval)   \
+    DEFINE_PROP(_name, _state, _field, qdev_prop_bit64, uint64_t, \
+                .bitnr    = (_bit),                               \
+                .set_default = true,                              \
+                .defval.u  = (bool)_defval)
 
-#define DEFINE_PROP_BOOL(_name, _state, _field, _defval) {       \
-        .name      = (_name),                                    \
-        .info      = &(qdev_prop_bool),                          \
-        .offset    = offsetof(_state, _field)                    \
-            + type_check(bool, typeof_field(_state, _field)),    \
-        .set_default = true,                                     \
-        .defval.u    = (bool)_defval,                            \
-        }
+#define DEFINE_PROP_BOOL(_name, _state, _field, _defval)     \
+    DEFINE_PROP(_name, _state, _field, qdev_prop_bool, bool, \
+                .set_default = true,                         \
+                .defval.u    = (bool)_defval)
 
-#define PROP_ARRAY_LEN_PREFIX "len-"
+/**
+ * The DEFINE_PROP_UINT64_CHECKMASK macro checks a user-supplied value
+ * against corresponding bitmask, rejects the value if it violates.
+ * The default value is set in instance_init().
+ */
+#define DEFINE_PROP_UINT64_CHECKMASK(_name, _state, _field, _bitmask)   \
+    DEFINE_PROP(_name, _state, _field, qdev_prop_uint64_checkmask, uint64_t, \
+                .bitmask    = (_bitmask),                     \
+                .set_default = false)
 
 /**
  * DEFINE_PROP_ARRAY:
@@ -122,40 +125,30 @@ extern const PropertyInfo qdev_prop_pcie_link_width;
  * @_arrayprop: PropertyInfo defining what property the array elements have
  * @_arraytype: C type of the array elements
  *
- * Define device properties for a variable-length array _name.  A
- * static property "len-arrayname" is defined. When the device creator
- * sets this property to the desired length of array, further dynamic
- * properties "arrayname[0]", "arrayname[1]", ...  are defined so the
- * device creator can set the array element values. Setting the
- * "len-arrayname" property more than once is an error.
+ * Define device properties for a variable-length array _name.  The array is
+ * represented as a list in the visitor interface.
  *
- * When the array length is set, the @_field member of the device
+ * @_arraytype is required to be movable with memcpy().
+ *
+ * When the array property is set, the @_field member of the device
  * struct is set to the array length, and @_arrayfield is set to point
- * to (zero-initialised) memory allocated for the array.  For a zero
- * length array, @_field will be set to 0 and @_arrayfield to NULL.
+ * to the memory allocated for the array.
+ *
  * It is the responsibility of the device deinit code to free the
  * @_arrayfield memory.
  */
 #define DEFINE_PROP_ARRAY(_name, _state, _field,                        \
-                          _arrayfield, _arrayprop, _arraytype) {        \
-        .name = (PROP_ARRAY_LEN_PREFIX _name),                          \
-        .info = &(qdev_prop_arraylen),                                  \
-        .set_default = true,                                            \
-        .defval.u = 0,                                                  \
-        .offset = offsetof(_state, _field)                              \
-            + type_check(uint32_t, typeof_field(_state, _field)),       \
-        .arrayinfo = &(_arrayprop),                                     \
-        .arrayfieldsize = sizeof(_arraytype),                           \
-        .arrayoffset = offsetof(_state, _arrayfield),                   \
-        }
+                          _arrayfield, _arrayprop, _arraytype)          \
+    DEFINE_PROP(_name, _state, _field, qdev_prop_array, uint32_t,       \
+                .set_default = true,                                    \
+                .defval.u = 0,                                          \
+                .arrayinfo = &(_arrayprop),                             \
+                .arrayfieldsize = sizeof(_arraytype),                   \
+                .arrayoffset = offsetof(_state, _arrayfield))
 
-#define DEFINE_PROP_LINK(_name, _state, _field, _type, _ptr_type) {     \
-        .name = (_name),                                                \
-        .info = &(qdev_prop_link),                                      \
-        .offset = offsetof(_state, _field)                              \
-            + type_check(_ptr_type, typeof_field(_state, _field)),      \
-        .link_type  = _type,                                            \
-        }
+#define DEFINE_PROP_LINK(_name, _state, _field, _type, _ptr_type)     \
+    DEFINE_PROP(_name, _state, _field, qdev_prop_link, _ptr_type,     \
+                .link_type  = _type)
 
 #define DEFINE_PROP_UINT8(_n, _s, _f, _d)                       \
     DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_uint8, uint8_t)
@@ -171,68 +164,12 @@ extern const PropertyInfo qdev_prop_pcie_link_width;
     DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_int64, int64_t)
 #define DEFINE_PROP_SIZE(_n, _s, _f, _d)                       \
     DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size, uint64_t)
-#define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d)                   \
-    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t)
-
-#define DEFINE_PROP_CHR(_n, _s, _f)             \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_chr, CharBackend)
 #define DEFINE_PROP_STRING(_n, _s, _f)             \
     DEFINE_PROP(_n, _s, _f, qdev_prop_string, char*)
-#define DEFINE_PROP_NETDEV(_n, _s, _f)             \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_netdev, NICPeers)
-#define DEFINE_PROP_DRIVE(_n, _s, _f) \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockBackend *)
-#define DEFINE_PROP_DRIVE_IOTHREAD(_n, _s, _f) \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_drive_iothread, BlockBackend *)
-#define DEFINE_PROP_MACADDR(_n, _s, _f)         \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr)
-#define DEFINE_PROP_RESERVED_REGION(_n, _s, _f)         \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_reserved_region, ReservedRegion)
 #define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \
     DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_on_off_auto, OnOffAuto)
-#define DEFINE_PROP_MULTIFD_COMPRESSION(_n, _s, _f, _d) \
-    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_multifd_compression, \
-                       MultiFDCompression)
-#define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \
-    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \
-                        LostTickPolicy)
-#define DEFINE_PROP_BLOCKDEV_ON_ERROR(_n, _s, _f, _d) \
-    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_blockdev_on_error, \
-                        BlockdevOnError)
-#define DEFINE_PROP_BIOS_CHS_TRANS(_n, _s, _f, _d) \
-    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_bios_chs_trans, int)
 #define DEFINE_PROP_SIZE32(_n, _s, _f, _d)                       \
     DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size32, uint32_t)
-#define DEFINE_PROP_BLOCKSIZE(_n, _s, _f) \
-    DEFINE_PROP_UNSIGNED(_n, _s, _f, 0, qdev_prop_blocksize, uint32_t)
-#define DEFINE_PROP_PCI_HOST_DEVADDR(_n, _s, _f) \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_pci_host_devaddr, PCIHostDeviceAddress)
-#define DEFINE_PROP_OFF_AUTO_PCIBAR(_n, _s, _f, _d) \
-    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_off_auto_pcibar, \
-                        OffAutoPCIBAR)
-#define DEFINE_PROP_PCIE_LINK_SPEED(_n, _s, _f, _d) \
-    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_speed, \
-                        PCIExpLinkSpeed)
-#define DEFINE_PROP_PCIE_LINK_WIDTH(_n, _s, _f, _d) \
-    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_width, \
-                        PCIExpLinkWidth)
-
-#define DEFINE_PROP_UUID(_name, _state, _field) {                  \
-        .name      = (_name),                                      \
-        .info      = &qdev_prop_uuid,                              \
-        .offset    = offsetof(_state, _field)                      \
-            + type_check(QemuUUID, typeof_field(_state, _field)),  \
-        .set_default = true,                                       \
-        }
-#define DEFINE_PROP_AUDIODEV(_n, _s, _f) \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_audiodev, QEMUSoundCard)
-
-#define DEFINE_PROP_UUID_NODEFAULT(_name, _state, _field) {        \
-        .name      = (_name),                                      \
-        .info      = &qdev_prop_uuid,                              \
-        .offset    = offsetof(_state, _field)                      \
-            + type_check(QemuUUID, typeof_field(_state, _field)),  \
-        }
 
 #define DEFINE_PROP_END_OF_LIST()               \
     {}
@@ -264,15 +201,18 @@ void qdev_prop_set_macaddr(DeviceState *dev, const char *name,
                            const uint8_t *value);
 void qdev_prop_set_enum(DeviceState *dev, const char *name, int value);
 
-void *qdev_get_prop_ptr(DeviceState *dev, Property *prop);
+/* Takes ownership of @values */
+void qdev_prop_set_array(DeviceState *dev, const char *name, QList *values);
+
+void *object_field_prop_ptr(Object *obj, Property *prop);
 
 void qdev_prop_register_global(GlobalProperty *prop);
-const GlobalProperty *qdev_find_global_prop(DeviceState *dev,
+const GlobalProperty *qdev_find_global_prop(Object *obj,
                                             const char *name);
 int qdev_prop_check_globals(void);
 void qdev_prop_set_globals(DeviceState *dev);
-void error_set_from_qdev_prop_error(Error **errp, int ret, DeviceState *dev,
-                                    Property *prop, const char *value);
+void error_set_from_qdev_prop_error(Error **errp, int ret, Object *obj,
+                                    const char *name, const char *value);
 
 /**
  * qdev_property_add_static:
diff --git a/include/hw/rdma/rdma.h b/include/hw/rdma/rdma.h
new file mode 100644 (file)
index 0000000..80b2e53
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * RDMA device interface
+ *
+ * Copyright (C) 2019 Oracle
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *     Yuval Shaia <yuval.shaia@oracle.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef RDMA_H
+#define RDMA_H
+
+#include "qom/object.h"
+
+#define INTERFACE_RDMA_PROVIDER "rdma"
+
+typedef struct RdmaProviderClass RdmaProviderClass;
+DECLARE_CLASS_CHECKERS(RdmaProviderClass, RDMA_PROVIDER,
+                       INTERFACE_RDMA_PROVIDER)
+#define RDMA_PROVIDER(obj) \
+    INTERFACE_CHECK(RdmaProvider, (obj), \
+                    INTERFACE_RDMA_PROVIDER)
+
+typedef struct RdmaProvider RdmaProvider;
+
+struct RdmaProviderClass {
+    InterfaceClass parent;
+
+    void (*format_statistics)(RdmaProvider *obj, GString *buf);
+};
+
+#endif
index 03c8926d275acb95bdc9ad8f5b6869a817939e69..6a076cfcdf0266069ce2d4d7e0238568bef8550d 100644 (file)
@@ -87,7 +87,7 @@ struct RegisterInfo {
     void *opaque;
 };
 
-#define TYPE_REGISTER "qemu,register"
+#define TYPE_REGISTER "qemu-register"
 DECLARE_INSTANCE_CHECKER(RegisterInfo, REGISTER,
                          TYPE_REGISTER)
 
@@ -204,6 +204,14 @@ RegisterInfoArray *register_init_block32(DeviceState *owner,
                                          bool debug_enabled,
                                          uint64_t memory_size);
 
+RegisterInfoArray *register_init_block64(DeviceState *owner,
+                                         const RegisterAccessInfo *rae,
+                                         int num, RegisterInfo *ri,
+                                         uint64_t *data,
+                                         const MemoryRegionOps *ops,
+                                         bool debug_enabled,
+                                         uint64_t memory_size);
+
 /**
  * This function should be called to cleanup the registers that were initialized
  * when calling register_init_block32(). This function should only be called
index 93fa4a84c22b363dc552be0f114c7c3f1e020534..1330ca77de617312f05b733429fed2e200e68cd5 100644 (file)
     enum { A_ ## reg = (addr) };                                          \
     enum { R_ ## reg = (addr) / 2 };
 
+#define REG64(reg, addr)                                                  \
+    enum { A_ ## reg = (addr) };                                          \
+    enum { R_ ## reg = (addr) / 8 };
+
 /* Define SHIFT, LENGTH and MASK constants for a field within a register */
 
 /* This macro will define R_FOO_BAR_MASK, R_FOO_BAR_SHIFT and R_FOO_BAR_LENGTH
     extract64((storage), R_ ## reg ## _ ## field ## _SHIFT,               \
               R_ ## reg ## _ ## field ## _LENGTH)
 
+#define FIELD_SEX8(storage, reg, field)                                   \
+    sextract8((storage), R_ ## reg ## _ ## field ## _SHIFT,               \
+              R_ ## reg ## _ ## field ## _LENGTH)
+#define FIELD_SEX16(storage, reg, field)                                  \
+    sextract16((storage), R_ ## reg ## _ ## field ## _SHIFT,              \
+               R_ ## reg ## _ ## field ## _LENGTH)
+#define FIELD_SEX32(storage, reg, field)                                  \
+    sextract32((storage), R_ ## reg ## _ ## field ## _SHIFT,              \
+               R_ ## reg ## _ ## field ## _LENGTH)
+#define FIELD_SEX64(storage, reg, field)                                  \
+    sextract64((storage), R_ ## reg ## _ ## field ## _SHIFT,              \
+               R_ ## reg ## _ ## field ## _LENGTH)
+
 /* Extract a field from an array of registers */
 #define ARRAY_FIELD_EX32(regs, reg, field)                                \
     FIELD_EX32((regs)[R_ ## reg], reg, field)
+#define ARRAY_FIELD_EX64(regs, reg, field)                                \
+    FIELD_EX64((regs)[R_ ## reg], reg, field)
 
 /* Deposit a register field.
  * Assigning values larger then the target field will result in
     _d; })
 #define FIELD_DP64(storage, reg, field, val) ({                           \
     struct {                                                              \
-        unsigned int v:R_ ## reg ## _ ## field ## _LENGTH;                \
+        uint64_t v:R_ ## reg ## _ ## field ## _LENGTH;                    \
+    } _v = { .v = val };                                                  \
+    uint64_t _d;                                                          \
+    _d = deposit64((storage), R_ ## reg ## _ ## field ## _SHIFT,          \
+                  R_ ## reg ## _ ## field ## _LENGTH, _v.v);              \
+    _d; })
+
+#define FIELD_SDP8(storage, reg, field, val) ({                           \
+    struct {                                                              \
+        signed int v:R_ ## reg ## _ ## field ## _LENGTH;                  \
+    } _v = { .v = val };                                                  \
+    uint8_t _d;                                                           \
+    _d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT,          \
+                  R_ ## reg ## _ ## field ## _LENGTH, _v.v);              \
+    _d; })
+#define FIELD_SDP16(storage, reg, field, val) ({                          \
+    struct {                                                              \
+        signed int v:R_ ## reg ## _ ## field ## _LENGTH;                  \
+    } _v = { .v = val };                                                  \
+    uint16_t _d;                                                          \
+    _d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT,          \
+                  R_ ## reg ## _ ## field ## _LENGTH, _v.v);              \
+    _d; })
+#define FIELD_SDP32(storage, reg, field, val) ({                          \
+    struct {                                                              \
+        signed int v:R_ ## reg ## _ ## field ## _LENGTH;                  \
+    } _v = { .v = val };                                                  \
+    uint32_t _d;                                                          \
+    _d = deposit32((storage), R_ ## reg ## _ ## field ## _SHIFT,          \
+                  R_ ## reg ## _ ## field ## _LENGTH, _v.v);              \
+    _d; })
+#define FIELD_SDP64(storage, reg, field, val) ({                          \
+    struct {                                                              \
+        int64_t v:R_ ## reg ## _ ## field ## _LENGTH;                     \
     } _v = { .v = val };                                                  \
     uint64_t _d;                                                          \
     _d = deposit64((storage), R_ ## reg ## _ ## field ## _SHIFT,          \
 /* Deposit a field to array of registers.  */
 #define ARRAY_FIELD_DP32(regs, reg, field, val)                           \
     (regs)[R_ ## reg] = FIELD_DP32((regs)[R_ ## reg], reg, field, val);
+#define ARRAY_FIELD_DP64(regs, reg, field, val)                           \
+    (regs)[R_ ## reg] = FIELD_DP64((regs)[R_ ## reg], reg, field, val);
+
+
+/*
+ * These macros can be used for defining and extracting fields that have the
+ * same bit position across multiple registers.
+ */
+
+/* Define shared SHIFT, LENGTH, and MASK constants */
+#define SHARED_FIELD(name, shift, length)   \
+    enum { name ## _ ## SHIFT = (shift)};   \
+    enum { name ## _ ## LENGTH = (length)}; \
+    enum { name ## _ ## MASK = MAKE_64BIT_MASK(shift, length)};
+
+/* Extract a shared field */
+#define SHARED_FIELD_EX8(storage, field) \
+    extract8((storage), field ## _SHIFT, field ## _LENGTH)
+
+#define SHARED_FIELD_EX16(storage, field) \
+    extract16((storage), field ## _SHIFT, field ## _LENGTH)
+
+#define SHARED_FIELD_EX32(storage, field) \
+    extract32((storage), field ## _SHIFT, field ## _LENGTH)
+
+#define SHARED_FIELD_EX64(storage, field) \
+    extract64((storage), field ## _SHIFT, field ## _LENGTH)
+
+/* Extract a shared field from a register array */
+#define SHARED_ARRAY_FIELD_EX32(regs, offset, field) \
+    SHARED_FIELD_EX32((regs)[(offset)], field)
+#define SHARED_ARRAY_FIELD_EX64(regs, offset, field) \
+    SHARED_FIELD_EX64((regs)[(offset)], field)
+
+/* Deposit a shared field */
+#define SHARED_FIELD_DP8(storage, field, val) ({                        \
+    struct {                                                            \
+        unsigned int v:field ## _LENGTH;                                \
+    } _v = { .v = val };                                                \
+    uint8_t _d;                                                         \
+    _d = deposit32((storage), field ## _SHIFT, field ## _LENGTH, _v.v); \
+    _d; })
+
+#define SHARED_FIELD_DP16(storage, field, val) ({                       \
+    struct {                                                            \
+        unsigned int v:field ## _LENGTH;                                \
+    } _v = { .v = val };                                                \
+    uint16_t _d;                                                        \
+    _d = deposit32((storage), field ## _SHIFT, field ## _LENGTH, _v.v); \
+    _d; })
+
+#define SHARED_FIELD_DP32(storage, field, val) ({                       \
+    struct {                                                            \
+        unsigned int v:field ## _LENGTH;                                \
+    } _v = { .v = val };                                                \
+    uint32_t _d;                                                        \
+    _d = deposit32((storage), field ## _SHIFT, field ## _LENGTH, _v.v); \
+    _d; })
+
+#define SHARED_FIELD_DP64(storage, field, val) ({                       \
+    struct {                                                            \
+        uint64_t v:field ## _LENGTH;                                    \
+    } _v = { .v = val };                                                \
+    uint64_t _d;                                                        \
+    _d = deposit64((storage), field ## _SHIFT, field ## _LENGTH, _v.v); \
+    _d; })
+
+/* Deposit a shared field to a register array */
+#define SHARED_ARRAY_FIELD_DP32(regs, offset, field, val) \
+    (regs)[(offset)] = SHARED_FIELD_DP32((regs)[(offset)], field, val);
+#define SHARED_ARRAY_FIELD_DP64(regs, offset, field, val) \
+    (regs)[(offset)] = SHARED_FIELD_DP64((regs)[(offset)], field, val);
 
 #endif
diff --git a/include/hw/remote/iohub.h b/include/hw/remote/iohub.h
new file mode 100644 (file)
index 0000000..6a8444f
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * IO Hub for remote device
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_IOHUB_H
+#define REMOTE_IOHUB_H
+
+#include "hw/pci/pci_device.h"
+#include "qemu/event_notifier.h"
+#include "qemu/thread-posix.h"
+#include "hw/remote/mpqemu-link.h"
+
+#define REMOTE_IOHUB_NB_PIRQS    PCI_DEVFN_MAX
+
+typedef struct ResampleToken {
+    void *iohub;
+    int pirq;
+} ResampleToken;
+
+typedef struct RemoteIOHubState {
+    PCIDevice d;
+    EventNotifier irqfds[REMOTE_IOHUB_NB_PIRQS];
+    EventNotifier resamplefds[REMOTE_IOHUB_NB_PIRQS];
+    unsigned int irq_level[REMOTE_IOHUB_NB_PIRQS];
+    ResampleToken token[REMOTE_IOHUB_NB_PIRQS];
+    QemuMutex irq_level_lock[REMOTE_IOHUB_NB_PIRQS];
+} RemoteIOHubState;
+
+int remote_iohub_map_irq(PCIDevice *pci_dev, int intx);
+void remote_iohub_set_irq(void *opaque, int pirq, int level);
+void process_set_irqfd_msg(PCIDevice *pci_dev, MPQemuMsg *msg);
+
+void remote_iohub_init(RemoteIOHubState *iohub);
+void remote_iohub_finalize(RemoteIOHubState *iohub);
+
+#endif
diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h
new file mode 100644 (file)
index 0000000..33b68a8
--- /dev/null
@@ -0,0 +1,40 @@
+/**
+ * Copyright © 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_IOMMU_H
+#define REMOTE_IOMMU_H
+
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci.h"
+
+#ifndef INT2VOIDP
+#define INT2VOIDP(i) (void *)(uintptr_t)(i)
+#endif
+
+typedef struct RemoteIommuElem {
+    MemoryRegion *mr;
+
+    AddressSpace as;
+} RemoteIommuElem;
+
+#define TYPE_REMOTE_IOMMU "x-remote-iommu"
+OBJECT_DECLARE_SIMPLE_TYPE(RemoteIommu, REMOTE_IOMMU)
+
+struct RemoteIommu {
+    Object parent;
+
+    GHashTable *elem_by_devfn;
+
+    QemuMutex lock;
+};
+
+void remote_iommu_setup(PCIBus *pci_bus);
+
+void remote_iommu_unplug_dev(PCIDevice *pci_dev);
+
+#endif
diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h
new file mode 100644 (file)
index 0000000..ac32fda
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Remote machine configuration
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_MACHINE_H
+#define REMOTE_MACHINE_H
+
+#include "qom/object.h"
+#include "hw/boards.h"
+#include "hw/pci-host/remote.h"
+#include "io/channel.h"
+#include "hw/remote/iohub.h"
+
+struct RemoteMachineState {
+    MachineState parent_obj;
+
+    RemotePCIHost *host;
+    RemoteIOHubState iohub;
+
+    bool vfio_user;
+
+    bool auto_shutdown;
+};
+
+/* Used to pass to co-routine device and ioc. */
+typedef struct RemoteCommDev {
+    PCIDevice *dev;
+    QIOChannel *ioc;
+} RemoteCommDev;
+
+#define TYPE_REMOTE_MACHINE "x-remote-machine"
+OBJECT_DECLARE_SIMPLE_TYPE(RemoteMachineState, REMOTE_MACHINE)
+
+void coroutine_fn mpqemu_remote_msg_loop_co(void *data);
+
+#endif
diff --git a/include/hw/remote/memory.h b/include/hw/remote/memory.h
new file mode 100644 (file)
index 0000000..bc2e309
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * Memory manager for remote device
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_MEMORY_H
+#define REMOTE_MEMORY_H
+
+#include "exec/hwaddr.h"
+#include "hw/remote/mpqemu-link.h"
+
+void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp);
+
+#endif
diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
new file mode 100644 (file)
index 0000000..4ec0915
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Communication channel between QEMU and remote device process
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef MPQEMU_LINK_H
+#define MPQEMU_LINK_H
+
+#include "qom/object.h"
+#include "qemu/thread.h"
+#include "io/channel.h"
+#include "exec/hwaddr.h"
+#include "io/channel-socket.h"
+#include "hw/remote/proxy.h"
+
+#define REMOTE_MAX_FDS 8
+
+#define MPQEMU_MSG_HDR_SIZE offsetof(MPQemuMsg, data.u64)
+
+/**
+ * MPQemuCmd:
+ *
+ * MPQemuCmd enum type to specify the command to be executed on the remote
+ * device.
+ *
+ * This uses a private protocol between QEMU and the remote process. vfio-user
+ * protocol would supersede this in the future.
+ *
+ */
+typedef enum {
+    MPQEMU_CMD_SYNC_SYSMEM,
+    MPQEMU_CMD_RET,
+    MPQEMU_CMD_PCI_CFGWRITE,
+    MPQEMU_CMD_PCI_CFGREAD,
+    MPQEMU_CMD_BAR_WRITE,
+    MPQEMU_CMD_BAR_READ,
+    MPQEMU_CMD_SET_IRQFD,
+    MPQEMU_CMD_DEVICE_RESET,
+    MPQEMU_CMD_MAX,
+} MPQemuCmd;
+
+typedef struct {
+    hwaddr gpas[REMOTE_MAX_FDS];
+    uint64_t sizes[REMOTE_MAX_FDS];
+    off_t offsets[REMOTE_MAX_FDS];
+} SyncSysmemMsg;
+
+typedef struct {
+    uint32_t addr;
+    uint32_t val;
+    int len;
+} PciConfDataMsg;
+
+typedef struct {
+    hwaddr addr;
+    uint64_t val;
+    unsigned size;
+    bool memory;
+} BarAccessMsg;
+
+/**
+ * MPQemuMsg:
+ * @cmd: The remote command
+ * @size: Size of the data to be shared
+ * @data: Structured data
+ * @fds: File descriptors to be shared with remote device
+ *
+ * MPQemuMsg Format of the message sent to the remote device from QEMU.
+ *
+ */
+
+typedef struct {
+    int cmd;
+    size_t size;
+
+    union {
+        uint64_t u64;
+        PciConfDataMsg pci_conf_data;
+        SyncSysmemMsg sync_sysmem;
+        BarAccessMsg bar_access;
+    } data;
+
+    int fds[REMOTE_MAX_FDS];
+    int num_fds;
+} MPQemuMsg;
+
+bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp);
+bool mpqemu_msg_recv(MPQemuMsg *msg, QIOChannel *ioc, Error **errp);
+
+uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev,
+                                         Error **errp);
+bool mpqemu_msg_valid(MPQemuMsg *msg);
+
+#endif
diff --git a/include/hw/remote/proxy-memory-listener.h b/include/hw/remote/proxy-memory-listener.h
new file mode 100644 (file)
index 0000000..c4f3efb
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PROXY_MEMORY_LISTENER_H
+#define PROXY_MEMORY_LISTENER_H
+
+#include "exec/memory.h"
+#include "io/channel.h"
+
+typedef struct ProxyMemoryListener {
+    MemoryListener listener;
+
+    int n_mr_sections;
+    MemoryRegionSection *mr_sections;
+
+    QIOChannel *ioc;
+} ProxyMemoryListener;
+
+void proxy_memory_listener_configure(ProxyMemoryListener *proxy_listener,
+                                     QIOChannel *ioc);
+void proxy_memory_listener_deconfigure(ProxyMemoryListener *proxy_listener);
+
+#endif
diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
new file mode 100644 (file)
index 0000000..0cfd966
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PROXY_H
+#define PROXY_H
+
+#include "hw/pci/pci_device.h"
+#include "io/channel.h"
+#include "hw/remote/proxy-memory-listener.h"
+#include "qemu/event_notifier.h"
+
+#define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
+OBJECT_DECLARE_SIMPLE_TYPE(PCIProxyDev, PCI_PROXY_DEV)
+
+typedef struct ProxyMemoryRegion {
+    PCIProxyDev *dev;
+    MemoryRegion mr;
+    bool memory;
+    bool present;
+    uint8_t type;
+} ProxyMemoryRegion;
+
+struct PCIProxyDev {
+    PCIDevice parent_dev;
+    char *fd;
+
+    /*
+     * Mutex used to protect the QIOChannel fd from
+     * the concurrent access by the VCPUs since proxy
+     * blocks while awaiting for the replies from the
+     * process remote.
+     */
+    QemuMutex io_mutex;
+    QIOChannel *ioc;
+    Error *migration_blocker;
+    ProxyMemoryListener proxy_listener;
+    int virq;
+    EventNotifier intr;
+    EventNotifier resample;
+    ProxyMemoryRegion region[PCI_NUM_REGIONS];
+};
+
+#endif /* PROXY_H */
diff --git a/include/hw/remote/vfio-user-obj.h b/include/hw/remote/vfio-user-obj.h
new file mode 100644 (file)
index 0000000..87ab78b
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef VFIO_USER_OBJ_H
+#define VFIO_USER_OBJ_H
+
+void vfu_object_set_bus_irq(PCIBus *pci_bus);
+
+#endif
diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
new file mode 100644 (file)
index 0000000..a2e4ae9
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * QEMU RISC-V Boot Helper
+ *
+ * Copyright (c) 2017 SiFive, Inc.
+ * Copyright (c) 2019 Alistair Francis <alistair.francis@wdc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef RISCV_BOOT_H
+#define RISCV_BOOT_H
+
+#include "exec/cpu-defs.h"
+#include "hw/loader.h"
+#include "hw/riscv/riscv_hart.h"
+
+#define RISCV32_BIOS_BIN    "opensbi-riscv32-generic-fw_dynamic.bin"
+#define RISCV64_BIOS_BIN    "opensbi-riscv64-generic-fw_dynamic.bin"
+
+bool riscv_is_32bit(RISCVHartArrayState *harts);
+
+char *riscv_plic_hart_config_string(int hart_count);
+
+target_ulong riscv_calc_kernel_start_addr(RISCVHartArrayState *harts,
+                                          target_ulong firmware_end_addr);
+target_ulong riscv_find_and_load_firmware(MachineState *machine,
+                                          const char *default_machine_firmware,
+                                          hwaddr firmware_load_addr,
+                                          symbol_fn_t sym_cb);
+const char *riscv_default_firmware_name(RISCVHartArrayState *harts);
+char *riscv_find_firmware(const char *firmware_filename,
+                          const char *default_machine_firmware);
+target_ulong riscv_load_firmware(const char *firmware_filename,
+                                 hwaddr firmware_load_addr,
+                                 symbol_fn_t sym_cb);
+target_ulong riscv_load_kernel(MachineState *machine,
+                               RISCVHartArrayState *harts,
+                               target_ulong firmware_end_addr,
+                               bool load_initrd,
+                               symbol_fn_t sym_cb);
+uint64_t riscv_compute_fdt_addr(hwaddr dram_start, uint64_t dram_size,
+                                MachineState *ms);
+void riscv_load_fdt(hwaddr fdt_addr, void *fdt);
+void riscv_setup_rom_reset_vec(MachineState *machine, RISCVHartArrayState *harts,
+                               hwaddr saddr,
+                               hwaddr rom_base, hwaddr rom_size,
+                               uint64_t kernel_entry,
+                               uint64_t fdt_load_addr);
+void riscv_rom_copy_firmware_info(MachineState *machine, hwaddr rom_base,
+                                  hwaddr rom_size,
+                                  uint32_t reset_vec_size,
+                                  uint64_t kernel_entry);
+void riscv_setup_direct_kernel(hwaddr kernel_addr, hwaddr fdt_addr);
+void riscv_setup_firmware_boot(MachineState *machine);
+
+#endif /* RISCV_BOOT_H */
diff --git a/include/hw/riscv/boot_opensbi.h b/include/hw/riscv/boot_opensbi.h
new file mode 100644 (file)
index 0000000..1b74966
--- /dev/null
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*
+ * Copyright (c) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Based on include/sbi/{fw_dynamic.h,sbi_scratch.h} from the OpenSBI project.
+ */
+
+#ifndef RISCV_BOOT_OPENSBI_H
+#define RISCV_BOOT_OPENSBI_H
+
+#include "exec/cpu-defs.h"
+
+/** Expected value of info magic ('OSBI' ascii string in hex) */
+#define FW_DYNAMIC_INFO_MAGIC_VALUE     0x4942534f
+
+/** Maximum supported info version */
+#define FW_DYNAMIC_INFO_VERSION         0x2
+
+/** Possible next mode values */
+#define FW_DYNAMIC_INFO_NEXT_MODE_U     0x0
+#define FW_DYNAMIC_INFO_NEXT_MODE_S     0x1
+#define FW_DYNAMIC_INFO_NEXT_MODE_M     0x3
+
+enum sbi_scratch_options {
+    /** Disable prints during boot */
+    SBI_SCRATCH_NO_BOOT_PRINTS = (1 << 0),
+    /** Enable runtime debug prints */
+    SBI_SCRATCH_DEBUG_PRINTS = (1 << 1),
+};
+
+/** Representation dynamic info passed by previous booting stage */
+struct fw_dynamic_info {
+    /** Info magic */
+    target_long magic;
+    /** Info version */
+    target_long version;
+    /** Next booting stage address */
+    target_long next_addr;
+    /** Next booting stage mode */
+    target_long next_mode;
+    /** Options for OpenSBI library */
+    target_long options;
+    /**
+     * Preferred boot HART id
+     *
+     * It is possible that the previous booting stage uses same link
+     * address as the FW_DYNAMIC firmware. In this case, the relocation
+     * lottery mechanism can potentially overwrite the previous booting
+     * stage while other HARTs are still running in the previous booting
+     * stage leading to boot-time crash. To avoid this boot-time crash,
+     * the previous booting stage can specify last HART that will jump
+     * to the FW_DYNAMIC firmware as the preferred boot HART.
+     *
+     * To avoid specifying a preferred boot HART, the previous booting
+     * stage can set it to -1UL which will force the FW_DYNAMIC firmware
+     * to use the relocation lottery mechanism.
+     */
+    target_long boot_hart;
+};
+
+#endif
diff --git a/include/hw/riscv/microchip_pfsoc.h b/include/hw/riscv/microchip_pfsoc.h
new file mode 100644 (file)
index 0000000..daef086
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * Microchip PolarFire SoC machine interface
+ *
+ * Copyright (c) 2020 Wind River Systems, Inc.
+ *
+ * Author:
+ *   Bin Meng <bin.meng@windriver.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MICROCHIP_PFSOC_H
+#define HW_MICROCHIP_PFSOC_H
+
+#include "hw/boards.h"
+#include "hw/char/mchp_pfsoc_mmuart.h"
+#include "hw/cpu/cluster.h"
+#include "hw/dma/sifive_pdma.h"
+#include "hw/misc/mchp_pfsoc_dmc.h"
+#include "hw/misc/mchp_pfsoc_ioscb.h"
+#include "hw/misc/mchp_pfsoc_sysreg.h"
+#include "hw/net/cadence_gem.h"
+#include "hw/sd/cadence_sdhci.h"
+#include "hw/riscv/riscv_hart.h"
+
+typedef struct MicrochipPFSoCState {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    CPUClusterState e_cluster;
+    CPUClusterState u_cluster;
+    RISCVHartArrayState e_cpus;
+    RISCVHartArrayState u_cpus;
+    DeviceState *plic;
+    MchpPfSoCDdrSgmiiPhyState ddr_sgmii_phy;
+    MchpPfSoCDdrCfgState ddr_cfg;
+    MchpPfSoCIoscbState ioscb;
+    MchpPfSoCMMUartState *serial0;
+    MchpPfSoCMMUartState *serial1;
+    MchpPfSoCMMUartState *serial2;
+    MchpPfSoCMMUartState *serial3;
+    MchpPfSoCMMUartState *serial4;
+    MchpPfSoCSysregState sysreg;
+    SiFivePDMAState dma;
+    CadenceGEMState gem0;
+    CadenceGEMState gem1;
+    CadenceSDHCIState sdhci;
+} MicrochipPFSoCState;
+
+#define TYPE_MICROCHIP_PFSOC    "microchip.pfsoc"
+#define MICROCHIP_PFSOC(obj) \
+    OBJECT_CHECK(MicrochipPFSoCState, (obj), TYPE_MICROCHIP_PFSOC)
+
+typedef struct MicrochipIcicleKitState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    /*< public >*/
+    MicrochipPFSoCState soc;
+} MicrochipIcicleKitState;
+
+#define TYPE_MICROCHIP_ICICLE_KIT_MACHINE \
+    MACHINE_TYPE_NAME("microchip-icicle-kit")
+#define MICROCHIP_ICICLE_KIT_MACHINE(obj) \
+    OBJECT_CHECK(MicrochipIcicleKitState, (obj), \
+                 TYPE_MICROCHIP_ICICLE_KIT_MACHINE)
+
+enum {
+    MICROCHIP_PFSOC_RSVD0,
+    MICROCHIP_PFSOC_DEBUG,
+    MICROCHIP_PFSOC_E51_DTIM,
+    MICROCHIP_PFSOC_BUSERR_UNIT0,
+    MICROCHIP_PFSOC_BUSERR_UNIT1,
+    MICROCHIP_PFSOC_BUSERR_UNIT2,
+    MICROCHIP_PFSOC_BUSERR_UNIT3,
+    MICROCHIP_PFSOC_BUSERR_UNIT4,
+    MICROCHIP_PFSOC_CLINT,
+    MICROCHIP_PFSOC_L2CC,
+    MICROCHIP_PFSOC_DMA,
+    MICROCHIP_PFSOC_L2LIM,
+    MICROCHIP_PFSOC_PLIC,
+    MICROCHIP_PFSOC_MMUART0,
+    MICROCHIP_PFSOC_WDOG0,
+    MICROCHIP_PFSOC_SYSREG,
+    MICROCHIP_PFSOC_AXISW,
+    MICROCHIP_PFSOC_MPUCFG,
+    MICROCHIP_PFSOC_FMETER,
+    MICROCHIP_PFSOC_DDR_SGMII_PHY,
+    MICROCHIP_PFSOC_EMMC_SD,
+    MICROCHIP_PFSOC_DDR_CFG,
+    MICROCHIP_PFSOC_MMUART1,
+    MICROCHIP_PFSOC_MMUART2,
+    MICROCHIP_PFSOC_MMUART3,
+    MICROCHIP_PFSOC_MMUART4,
+    MICROCHIP_PFSOC_WDOG1,
+    MICROCHIP_PFSOC_WDOG2,
+    MICROCHIP_PFSOC_WDOG3,
+    MICROCHIP_PFSOC_WDOG4,
+    MICROCHIP_PFSOC_SPI0,
+    MICROCHIP_PFSOC_SPI1,
+    MICROCHIP_PFSOC_I2C0,
+    MICROCHIP_PFSOC_I2C1,
+    MICROCHIP_PFSOC_CAN0,
+    MICROCHIP_PFSOC_CAN1,
+    MICROCHIP_PFSOC_GEM0,
+    MICROCHIP_PFSOC_GEM1,
+    MICROCHIP_PFSOC_GPIO0,
+    MICROCHIP_PFSOC_GPIO1,
+    MICROCHIP_PFSOC_GPIO2,
+    MICROCHIP_PFSOC_RTC,
+    MICROCHIP_PFSOC_ENVM_CFG,
+    MICROCHIP_PFSOC_ENVM_DATA,
+    MICROCHIP_PFSOC_USB,
+    MICROCHIP_PFSOC_QSPI_XIP,
+    MICROCHIP_PFSOC_IOSCB,
+    MICROCHIP_PFSOC_FABRIC_FIC0,
+    MICROCHIP_PFSOC_FABRIC_FIC1,
+    MICROCHIP_PFSOC_FABRIC_FIC3,
+    MICROCHIP_PFSOC_DRAM_LO,
+    MICROCHIP_PFSOC_DRAM_LO_ALIAS,
+    MICROCHIP_PFSOC_DRAM_HI,
+    MICROCHIP_PFSOC_DRAM_HI_ALIAS
+};
+
+enum {
+    MICROCHIP_PFSOC_DMA_IRQ0 = 5,
+    MICROCHIP_PFSOC_DMA_IRQ1 = 6,
+    MICROCHIP_PFSOC_DMA_IRQ2 = 7,
+    MICROCHIP_PFSOC_DMA_IRQ3 = 8,
+    MICROCHIP_PFSOC_DMA_IRQ4 = 9,
+    MICROCHIP_PFSOC_DMA_IRQ5 = 10,
+    MICROCHIP_PFSOC_DMA_IRQ6 = 11,
+    MICROCHIP_PFSOC_DMA_IRQ7 = 12,
+    MICROCHIP_PFSOC_GEM0_IRQ = 64,
+    MICROCHIP_PFSOC_GEM1_IRQ = 70,
+    MICROCHIP_PFSOC_EMMC_SD_IRQ = 88,
+    MICROCHIP_PFSOC_MMUART0_IRQ = 90,
+    MICROCHIP_PFSOC_MMUART1_IRQ = 91,
+    MICROCHIP_PFSOC_MMUART2_IRQ = 92,
+    MICROCHIP_PFSOC_MMUART3_IRQ = 93,
+    MICROCHIP_PFSOC_MMUART4_IRQ = 94,
+    MICROCHIP_PFSOC_MAILBOX_IRQ = 96,
+};
+
+#define MICROCHIP_PFSOC_MANAGEMENT_CPU_COUNT    1
+#define MICROCHIP_PFSOC_COMPUTE_CPU_COUNT       4
+
+#define MICROCHIP_PFSOC_PLIC_NUM_SOURCES        187
+#define MICROCHIP_PFSOC_PLIC_NUM_PRIORITIES     7
+#define MICROCHIP_PFSOC_PLIC_PRIORITY_BASE      0x00
+#define MICROCHIP_PFSOC_PLIC_PENDING_BASE       0x1000
+#define MICROCHIP_PFSOC_PLIC_ENABLE_BASE        0x2000
+#define MICROCHIP_PFSOC_PLIC_ENABLE_STRIDE      0x80
+#define MICROCHIP_PFSOC_PLIC_CONTEXT_BASE       0x200000
+#define MICROCHIP_PFSOC_PLIC_CONTEXT_STRIDE     0x1000
+
+#endif /* HW_MICROCHIP_PFSOC_H */
diff --git a/include/hw/riscv/numa.h b/include/hw/riscv/numa.h
new file mode 100644 (file)
index 0000000..8f52802
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * QEMU RISC-V NUMA Helper
+ *
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef RISCV_NUMA_H
+#define RISCV_NUMA_H
+
+#include "hw/boards.h"
+#include "hw/sysbus.h"
+#include "sysemu/numa.h"
+
+/**
+ * riscv_socket_count:
+ * @ms: pointer to machine state
+ *
+ * Returns: number of sockets for a numa system and 1 for a non-numa system
+ */
+int riscv_socket_count(const MachineState *ms);
+
+/**
+ * riscv_socket_first_hartid:
+ * @ms: pointer to machine state
+ * @socket_id: socket index
+ *
+ * Returns: first hartid for a valid socket and -1 for an invalid socket
+ */
+int riscv_socket_first_hartid(const MachineState *ms, int socket_id);
+
+/**
+ * riscv_socket_last_hartid:
+ * @ms: pointer to machine state
+ * @socket_id: socket index
+ *
+ * Returns: last hartid for a valid socket and -1 for an invalid socket
+ */
+int riscv_socket_last_hartid(const MachineState *ms, int socket_id);
+
+/**
+ * riscv_socket_hart_count:
+ * @ms: pointer to machine state
+ * @socket_id: socket index
+ *
+ * Returns: number of harts for a valid socket and -1 for an invalid socket
+ */
+int riscv_socket_hart_count(const MachineState *ms, int socket_id);
+
+/**
+ * riscv_socket_mem_offset:
+ * @ms: pointer to machine state
+ * @socket_id: socket index
+ *
+ * Returns: offset of ram belonging to given socket
+ */
+uint64_t riscv_socket_mem_offset(const MachineState *ms, int socket_id);
+
+/**
+ * riscv_socket_mem_size:
+ * @ms: pointer to machine state
+ * @socket_id: socket index
+ *
+ * Returns: size of ram belonging to given socket
+ */
+uint64_t riscv_socket_mem_size(const MachineState *ms, int socket_id);
+
+/**
+ * riscv_socket_check_hartids:
+ * @ms: pointer to machine state
+ * @socket_id: socket index
+ *
+ * Returns: true if hardids belonging to given socket are contiguous else false
+ */
+bool riscv_socket_check_hartids(const MachineState *ms, int socket_id);
+
+/**
+ * riscv_socket_fdt_write_id:
+ * @ms: pointer to machine state
+ * @socket_id: socket index
+ *
+ * Write NUMA node-id FDT property in MachineState->fdt
+ */
+void riscv_socket_fdt_write_id(const MachineState *ms, const char *node_name,
+                               int socket_id);
+
+/**
+ * riscv_socket_fdt_write_distance_matrix:
+ * @ms: pointer to machine state
+ * @socket_id: socket index
+ *
+ * Write NUMA distance matrix in MachineState->fdt
+ */
+void riscv_socket_fdt_write_distance_matrix(const MachineState *ms);
+
+CpuInstanceProperties
+riscv_numa_cpu_index_to_props(MachineState *ms, unsigned cpu_index);
+
+int64_t riscv_numa_get_default_cpu_node_id(const MachineState *ms, int idx);
+
+const CPUArchIdList *riscv_numa_possible_cpu_arch_ids(MachineState *ms);
+
+#endif /* RISCV_NUMA_H */
diff --git a/include/hw/riscv/opentitan.h b/include/hw/riscv/opentitan.h
new file mode 100644 (file)
index 0000000..609473d
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * QEMU RISC-V Board Compatible with OpenTitan FPGA platform
+ *
+ * Copyright (c) 2020 Western Digital
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_OPENTITAN_H
+#define HW_OPENTITAN_H
+
+#include "hw/riscv/riscv_hart.h"
+#include "hw/intc/sifive_plic.h"
+#include "hw/char/ibex_uart.h"
+#include "hw/timer/ibex_timer.h"
+#include "hw/ssi/ibex_spi_host.h"
+#include "hw/boards.h"
+#include "qom/object.h"
+
+#define TYPE_RISCV_IBEX_SOC "riscv.lowrisc.ibex.soc"
+OBJECT_DECLARE_SIMPLE_TYPE(LowRISCIbexSoCState, RISCV_IBEX_SOC)
+
+enum {
+    OPENTITAN_SPI_HOST0,
+    OPENTITAN_SPI_HOST1,
+    OPENTITAN_NUM_SPI_HOSTS,
+};
+
+struct LowRISCIbexSoCState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    RISCVHartArrayState cpus;
+    SiFivePLICState plic;
+    IbexUartState uart;
+    IbexTimerState timer;
+    IbexSPIHostState spi_host[OPENTITAN_NUM_SPI_HOSTS];
+
+    uint32_t resetvec;
+
+    MemoryRegion flash_mem;
+    MemoryRegion rom;
+    MemoryRegion flash_alias;
+};
+
+#define TYPE_OPENTITAN_MACHINE MACHINE_TYPE_NAME("opentitan")
+OBJECT_DECLARE_SIMPLE_TYPE(OpenTitanState, OPENTITAN_MACHINE)
+
+typedef struct OpenTitanState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    /*< public >*/
+    LowRISCIbexSoCState soc;
+} OpenTitanState;
+
+enum {
+    IBEX_DEV_ROM,
+    IBEX_DEV_RAM,
+    IBEX_DEV_FLASH,
+    IBEX_DEV_FLASH_VIRTUAL,
+    IBEX_DEV_UART,
+    IBEX_DEV_SPI_DEVICE,
+    IBEX_DEV_SPI_HOST0,
+    IBEX_DEV_SPI_HOST1,
+    IBEX_DEV_GPIO,
+    IBEX_DEV_I2C,
+    IBEX_DEV_PATTGEN,
+    IBEX_DEV_TIMER,
+    IBEX_DEV_SENSOR_CTRL,
+    IBEX_DEV_OTP_CTRL,
+    IBEX_DEV_LC_CTRL,
+    IBEX_DEV_PWRMGR,
+    IBEX_DEV_RSTMGR,
+    IBEX_DEV_CLKMGR,
+    IBEX_DEV_PINMUX,
+    IBEX_DEV_AON_TIMER,
+    IBEX_DEV_USBDEV,
+    IBEX_DEV_FLASH_CTRL,
+    IBEX_DEV_PLIC,
+    IBEX_DEV_AES,
+    IBEX_DEV_HMAC,
+    IBEX_DEV_KMAC,
+    IBEX_DEV_KEYMGR,
+    IBEX_DEV_CSRNG,
+    IBEX_DEV_ENTROPY,
+    IBEX_DEV_EDNO,
+    IBEX_DEV_EDN1,
+    IBEX_DEV_ALERT_HANDLER,
+    IBEX_DEV_SRAM_CTRL,
+    IBEX_DEV_OTBN,
+    IBEX_DEV_IBEX_CFG,
+};
+
+enum {
+    IBEX_UART0_TX_WATERMARK_IRQ   = 1,
+    IBEX_UART0_RX_WATERMARK_IRQ   = 2,
+    IBEX_UART0_TX_EMPTY_IRQ       = 3,
+    IBEX_UART0_RX_OVERFLOW_IRQ    = 4,
+    IBEX_UART0_RX_FRAME_ERR_IRQ   = 5,
+    IBEX_UART0_RX_BREAK_ERR_IRQ   = 6,
+    IBEX_UART0_RX_TIMEOUT_IRQ     = 7,
+    IBEX_UART0_RX_PARITY_ERR_IRQ  = 8,
+    IBEX_TIMER_TIMEREXPIRED0_0    = 124,
+    IBEX_SPI_HOST0_ERR_IRQ        = 131,
+    IBEX_SPI_HOST0_SPI_EVENT_IRQ  = 132,
+    IBEX_SPI_HOST1_ERR_IRQ        = 133,
+    IBEX_SPI_HOST1_SPI_EVENT_IRQ  = 134,
+};
+
+#endif
diff --git a/include/hw/riscv/riscv_hart.h b/include/hw/riscv/riscv_hart.h
new file mode 100644 (file)
index 0000000..912b4a2
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * QEMU RISC-V Hart Array interface
+ *
+ * Copyright (c) 2017 SiFive, Inc.
+ *
+ * Holds the state of a heterogeneous array of RISC-V harts
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_HART_H
+#define HW_RISCV_HART_H
+
+#include "hw/sysbus.h"
+#include "target/riscv/cpu.h"
+#include "qom/object.h"
+
+#define TYPE_RISCV_HART_ARRAY "riscv.hart_array"
+
+OBJECT_DECLARE_SIMPLE_TYPE(RISCVHartArrayState, RISCV_HART_ARRAY)
+
+struct RISCVHartArrayState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    uint32_t num_harts;
+    uint32_t hartid_base;
+    char *cpu_type;
+    uint64_t resetvec;
+    RISCVCPU *harts;
+};
+
+#endif
diff --git a/include/hw/riscv/shakti_c.h b/include/hw/riscv/shakti_c.h
new file mode 100644 (file)
index 0000000..539fe11
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Shakti C-class SoC emulation
+ *
+ * Copyright (c) 2021 Vijai Kumar K <vijai@behindbytes.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SHAKTI_C_H
+#define HW_SHAKTI_C_H
+
+#include "hw/riscv/riscv_hart.h"
+#include "hw/boards.h"
+#include "hw/char/shakti_uart.h"
+
+#define TYPE_RISCV_SHAKTI_SOC "riscv.shakti.cclass.soc"
+#define RISCV_SHAKTI_SOC(obj) \
+    OBJECT_CHECK(ShaktiCSoCState, (obj), TYPE_RISCV_SHAKTI_SOC)
+
+typedef struct ShaktiCSoCState {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    RISCVHartArrayState cpus;
+    DeviceState *plic;
+    ShaktiUartState uart;
+    MemoryRegion rom;
+
+} ShaktiCSoCState;
+
+#define TYPE_RISCV_SHAKTI_MACHINE MACHINE_TYPE_NAME("shakti_c")
+#define RISCV_SHAKTI_MACHINE(obj) \
+    OBJECT_CHECK(ShaktiCMachineState, (obj), TYPE_RISCV_SHAKTI_MACHINE)
+typedef struct ShaktiCMachineState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    /*< public >*/
+    ShaktiCSoCState soc;
+} ShaktiCMachineState;
+
+enum {
+    SHAKTI_C_ROM,
+    SHAKTI_C_RAM,
+    SHAKTI_C_UART,
+    SHAKTI_C_GPIO,
+    SHAKTI_C_PLIC,
+    SHAKTI_C_CLINT,
+    SHAKTI_C_I2C,
+};
+
+#define SHAKTI_C_PLIC_HART_CONFIG "MS"
+/* Including Interrupt ID 0 (no interrupt)*/
+#define SHAKTI_C_PLIC_NUM_SOURCES 28
+/* Excluding Priority 0 */
+#define SHAKTI_C_PLIC_NUM_PRIORITIES 2
+#define SHAKTI_C_PLIC_PRIORITY_BASE 0x00
+#define SHAKTI_C_PLIC_PENDING_BASE 0x1000
+#define SHAKTI_C_PLIC_ENABLE_BASE 0x2000
+#define SHAKTI_C_PLIC_ENABLE_STRIDE 0x80
+#define SHAKTI_C_PLIC_CONTEXT_BASE 0x200000
+#define SHAKTI_C_PLIC_CONTEXT_STRIDE 0x1000
+
+#endif
diff --git a/include/hw/riscv/sifive_cpu.h b/include/hw/riscv/sifive_cpu.h
new file mode 100644 (file)
index 0000000..1367996
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * SiFive CPU types
+ *
+ * Copyright (c) 2017 SiFive, Inc.
+ * Copyright (c) 2019 Bin Meng <bmeng.cn@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_CPU_H
+#define HW_SIFIVE_CPU_H
+
+#if defined(TARGET_RISCV32)
+#define SIFIVE_E_CPU TYPE_RISCV_CPU_SIFIVE_E31
+#define SIFIVE_U_CPU TYPE_RISCV_CPU_SIFIVE_U34
+#elif defined(TARGET_RISCV64)
+#define SIFIVE_E_CPU TYPE_RISCV_CPU_SIFIVE_E51
+#define SIFIVE_U_CPU TYPE_RISCV_CPU_SIFIVE_U54
+#endif
+
+#endif /* HW_SIFIVE_CPU_H */
diff --git a/include/hw/riscv/sifive_e.h b/include/hw/riscv/sifive_e.h
new file mode 100644 (file)
index 0000000..31180a6
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * SiFive E series machine interface
+ *
+ * Copyright (c) 2017 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_E_H
+#define HW_SIFIVE_E_H
+
+#include "hw/riscv/riscv_hart.h"
+#include "hw/riscv/sifive_cpu.h"
+#include "hw/gpio/sifive_gpio.h"
+#include "hw/misc/sifive_e_aon.h"
+#include "hw/boards.h"
+
+#define TYPE_RISCV_E_SOC "riscv.sifive.e.soc"
+#define RISCV_E_SOC(obj) \
+    OBJECT_CHECK(SiFiveESoCState, (obj), TYPE_RISCV_E_SOC)
+
+typedef struct SiFiveESoCState {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    RISCVHartArrayState cpus;
+    DeviceState *plic;
+    SiFiveEAONState aon;
+    SIFIVEGPIOState gpio;
+    MemoryRegion xip_mem;
+    MemoryRegion mask_rom;
+} SiFiveESoCState;
+
+typedef struct SiFiveEState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    /*< public >*/
+    SiFiveESoCState soc;
+    bool revb;
+} SiFiveEState;
+
+#define TYPE_RISCV_E_MACHINE MACHINE_TYPE_NAME("sifive_e")
+#define RISCV_E_MACHINE(obj) \
+    OBJECT_CHECK(SiFiveEState, (obj), TYPE_RISCV_E_MACHINE)
+
+enum {
+    SIFIVE_E_DEV_DEBUG,
+    SIFIVE_E_DEV_MROM,
+    SIFIVE_E_DEV_OTP,
+    SIFIVE_E_DEV_CLINT,
+    SIFIVE_E_DEV_PLIC,
+    SIFIVE_E_DEV_AON,
+    SIFIVE_E_DEV_PRCI,
+    SIFIVE_E_DEV_OTP_CTRL,
+    SIFIVE_E_DEV_GPIO0,
+    SIFIVE_E_DEV_UART0,
+    SIFIVE_E_DEV_QSPI0,
+    SIFIVE_E_DEV_PWM0,
+    SIFIVE_E_DEV_UART1,
+    SIFIVE_E_DEV_QSPI1,
+    SIFIVE_E_DEV_PWM1,
+    SIFIVE_E_DEV_QSPI2,
+    SIFIVE_E_DEV_PWM2,
+    SIFIVE_E_DEV_XIP,
+    SIFIVE_E_DEV_DTIM
+};
+
+enum {
+    SIFIVE_E_AON_WDT_IRQ  = 1,
+    SIFIVE_E_UART0_IRQ    = 3,
+    SIFIVE_E_UART1_IRQ    = 4,
+    SIFIVE_E_GPIO0_IRQ0   = 8
+};
+
+#define SIFIVE_E_PLIC_HART_CONFIG "M"
+/*
+ * Freedom E310 G002 and G003 supports 52 interrupt sources while
+ * Freedom E310 G000 supports 51 interrupt sources. We use the value
+ * of G002 and G003, so it is 53 (including interrupt source 0).
+ */
+#define SIFIVE_E_PLIC_NUM_SOURCES 53
+#define SIFIVE_E_PLIC_NUM_PRIORITIES 7
+#define SIFIVE_E_PLIC_PRIORITY_BASE 0x00
+#define SIFIVE_E_PLIC_PENDING_BASE 0x1000
+#define SIFIVE_E_PLIC_ENABLE_BASE 0x2000
+#define SIFIVE_E_PLIC_ENABLE_STRIDE 0x80
+#define SIFIVE_E_PLIC_CONTEXT_BASE 0x200000
+#define SIFIVE_E_PLIC_CONTEXT_STRIDE 0x1000
+
+#endif
diff --git a/include/hw/riscv/sifive_u.h b/include/hw/riscv/sifive_u.h
new file mode 100644 (file)
index 0000000..0696f85
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * SiFive U series machine interface
+ *
+ * Copyright (c) 2017 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_U_H
+#define HW_SIFIVE_U_H
+
+#include "hw/boards.h"
+#include "hw/cpu/cluster.h"
+#include "hw/dma/sifive_pdma.h"
+#include "hw/net/cadence_gem.h"
+#include "hw/riscv/riscv_hart.h"
+#include "hw/riscv/sifive_cpu.h"
+#include "hw/gpio/sifive_gpio.h"
+#include "hw/misc/sifive_u_otp.h"
+#include "hw/misc/sifive_u_prci.h"
+#include "hw/ssi/sifive_spi.h"
+#include "hw/timer/sifive_pwm.h"
+
+#define TYPE_RISCV_U_SOC "riscv.sifive.u.soc"
+#define RISCV_U_SOC(obj) \
+    OBJECT_CHECK(SiFiveUSoCState, (obj), TYPE_RISCV_U_SOC)
+
+typedef struct SiFiveUSoCState {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    CPUClusterState e_cluster;
+    CPUClusterState u_cluster;
+    RISCVHartArrayState e_cpus;
+    RISCVHartArrayState u_cpus;
+    DeviceState *plic;
+    SiFiveUPRCIState prci;
+    SIFIVEGPIOState gpio;
+    SiFiveUOTPState otp;
+    SiFivePDMAState dma;
+    SiFiveSPIState spi0;
+    SiFiveSPIState spi2;
+    CadenceGEMState gem;
+    SiFivePwmState pwm[2];
+
+    uint32_t serial;
+    char *cpu_type;
+} SiFiveUSoCState;
+
+#define TYPE_RISCV_U_MACHINE MACHINE_TYPE_NAME("sifive_u")
+#define RISCV_U_MACHINE(obj) \
+    OBJECT_CHECK(SiFiveUState, (obj), TYPE_RISCV_U_MACHINE)
+
+typedef struct SiFiveUState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    /*< public >*/
+    SiFiveUSoCState soc;
+    int fdt_size;
+
+    bool start_in_flash;
+    uint32_t msel;
+    uint32_t serial;
+} SiFiveUState;
+
+enum {
+    SIFIVE_U_DEV_DEBUG,
+    SIFIVE_U_DEV_MROM,
+    SIFIVE_U_DEV_CLINT,
+    SIFIVE_U_DEV_L2CC,
+    SIFIVE_U_DEV_PDMA,
+    SIFIVE_U_DEV_L2LIM,
+    SIFIVE_U_DEV_PLIC,
+    SIFIVE_U_DEV_PRCI,
+    SIFIVE_U_DEV_UART0,
+    SIFIVE_U_DEV_UART1,
+    SIFIVE_U_DEV_GPIO,
+    SIFIVE_U_DEV_QSPI0,
+    SIFIVE_U_DEV_QSPI2,
+    SIFIVE_U_DEV_OTP,
+    SIFIVE_U_DEV_DMC,
+    SIFIVE_U_DEV_FLASH0,
+    SIFIVE_U_DEV_DRAM,
+    SIFIVE_U_DEV_GEM,
+    SIFIVE_U_DEV_GEM_MGMT,
+    SIFIVE_U_DEV_PWM0,
+    SIFIVE_U_DEV_PWM1
+};
+
+enum {
+    SIFIVE_U_L2CC_IRQ0 = 1,
+    SIFIVE_U_L2CC_IRQ1 = 2,
+    SIFIVE_U_L2CC_IRQ2 = 3,
+    SIFIVE_U_UART0_IRQ = 4,
+    SIFIVE_U_UART1_IRQ = 5,
+    SIFIVE_U_QSPI2_IRQ = 6,
+    SIFIVE_U_GPIO_IRQ0 = 7,
+    SIFIVE_U_GPIO_IRQ1 = 8,
+    SIFIVE_U_GPIO_IRQ2 = 9,
+    SIFIVE_U_GPIO_IRQ3 = 10,
+    SIFIVE_U_GPIO_IRQ4 = 11,
+    SIFIVE_U_GPIO_IRQ5 = 12,
+    SIFIVE_U_GPIO_IRQ6 = 13,
+    SIFIVE_U_GPIO_IRQ7 = 14,
+    SIFIVE_U_GPIO_IRQ8 = 15,
+    SIFIVE_U_GPIO_IRQ9 = 16,
+    SIFIVE_U_GPIO_IRQ10 = 17,
+    SIFIVE_U_GPIO_IRQ11 = 18,
+    SIFIVE_U_GPIO_IRQ12 = 19,
+    SIFIVE_U_GPIO_IRQ13 = 20,
+    SIFIVE_U_GPIO_IRQ14 = 21,
+    SIFIVE_U_GPIO_IRQ15 = 22,
+    SIFIVE_U_PDMA_IRQ0 = 23,
+    SIFIVE_U_PDMA_IRQ1 = 24,
+    SIFIVE_U_PDMA_IRQ2 = 25,
+    SIFIVE_U_PDMA_IRQ3 = 26,
+    SIFIVE_U_PDMA_IRQ4 = 27,
+    SIFIVE_U_PDMA_IRQ5 = 28,
+    SIFIVE_U_PDMA_IRQ6 = 29,
+    SIFIVE_U_PDMA_IRQ7 = 30,
+    SIFIVE_U_PWM0_IRQ0 = 42,
+    SIFIVE_U_PWM0_IRQ1 = 43,
+    SIFIVE_U_PWM0_IRQ2 = 44,
+    SIFIVE_U_PWM0_IRQ3 = 45,
+    SIFIVE_U_PWM1_IRQ0 = 46,
+    SIFIVE_U_PWM1_IRQ1 = 47,
+    SIFIVE_U_PWM1_IRQ2 = 48,
+    SIFIVE_U_PWM1_IRQ3 = 49,
+    SIFIVE_U_QSPI0_IRQ = 51,
+    SIFIVE_U_GEM_IRQ = 53
+};
+
+enum {
+    SIFIVE_U_HFCLK_FREQ = 33333333,
+    SIFIVE_U_RTCCLK_FREQ = 1000000
+};
+
+enum {
+    MSEL_MEMMAP_QSPI0_FLASH = 1,
+    MSEL_L2LIM_QSPI0_FLASH = 6,
+    MSEL_L2LIM_QSPI2_SD = 11
+};
+
+#define SIFIVE_U_MANAGEMENT_CPU_COUNT   1
+#define SIFIVE_U_COMPUTE_CPU_COUNT      4
+
+#define SIFIVE_U_PLIC_NUM_SOURCES 54
+#define SIFIVE_U_PLIC_NUM_PRIORITIES 7
+#define SIFIVE_U_PLIC_PRIORITY_BASE 0x00
+#define SIFIVE_U_PLIC_PENDING_BASE 0x1000
+#define SIFIVE_U_PLIC_ENABLE_BASE 0x2000
+#define SIFIVE_U_PLIC_ENABLE_STRIDE 0x80
+#define SIFIVE_U_PLIC_CONTEXT_BASE 0x200000
+#define SIFIVE_U_PLIC_CONTEXT_STRIDE 0x1000
+
+#endif
diff --git a/include/hw/riscv/spike.h b/include/hw/riscv/spike.h
new file mode 100644 (file)
index 0000000..0c2a223
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Spike machine interface
+ *
+ * Copyright (c) 2017 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_SPIKE_H
+#define HW_RISCV_SPIKE_H
+
+#include "hw/boards.h"
+#include "hw/riscv/riscv_hart.h"
+#include "hw/sysbus.h"
+
+#define SPIKE_CPUS_MAX 8
+#define SPIKE_SOCKETS_MAX 8
+
+#define TYPE_SPIKE_MACHINE MACHINE_TYPE_NAME("spike")
+typedef struct SpikeState SpikeState;
+DECLARE_INSTANCE_CHECKER(SpikeState, SPIKE_MACHINE,
+                         TYPE_SPIKE_MACHINE)
+
+struct SpikeState {
+    /*< private >*/
+    MachineState parent;
+
+    /*< public >*/
+    RISCVHartArrayState soc[SPIKE_SOCKETS_MAX];
+};
+
+enum {
+    SPIKE_MROM,
+    SPIKE_HTIF,
+    SPIKE_CLINT,
+    SPIKE_DRAM
+};
+
+#endif
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
new file mode 100644 (file)
index 0000000..e5c474b
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * QEMU RISC-V VirtIO machine interface
+ *
+ * Copyright (c) 2017 SiFive, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_VIRT_H
+#define HW_RISCV_VIRT_H
+
+#include "hw/boards.h"
+#include "hw/riscv/riscv_hart.h"
+#include "hw/sysbus.h"
+#include "hw/block/flash.h"
+
+#define VIRT_CPUS_MAX_BITS             9
+#define VIRT_CPUS_MAX                  (1 << VIRT_CPUS_MAX_BITS)
+#define VIRT_SOCKETS_MAX_BITS          2
+#define VIRT_SOCKETS_MAX               (1 << VIRT_SOCKETS_MAX_BITS)
+
+#define TYPE_RISCV_VIRT_MACHINE MACHINE_TYPE_NAME("virt")
+typedef struct RISCVVirtState RISCVVirtState;
+DECLARE_INSTANCE_CHECKER(RISCVVirtState, RISCV_VIRT_MACHINE,
+                         TYPE_RISCV_VIRT_MACHINE)
+
+typedef enum RISCVVirtAIAType {
+    VIRT_AIA_TYPE_NONE = 0,
+    VIRT_AIA_TYPE_APLIC,
+    VIRT_AIA_TYPE_APLIC_IMSIC,
+} RISCVVirtAIAType;
+
+struct RISCVVirtState {
+    /*< private >*/
+    MachineState parent;
+
+    /*< public >*/
+    Notifier machine_done;
+    DeviceState *platform_bus_dev;
+    RISCVHartArrayState soc[VIRT_SOCKETS_MAX];
+    DeviceState *irqchip[VIRT_SOCKETS_MAX];
+    PFlashCFI01 *flash[2];
+    FWCfgState *fw_cfg;
+
+    int fdt_size;
+    bool have_aclint;
+    RISCVVirtAIAType aia_type;
+    int aia_guests;
+    char *oem_id;
+    char *oem_table_id;
+    OnOffAuto acpi;
+    const MemMapEntry *memmap;
+};
+
+enum {
+    VIRT_DEBUG,
+    VIRT_MROM,
+    VIRT_TEST,
+    VIRT_RTC,
+    VIRT_CLINT,
+    VIRT_ACLINT_SSWI,
+    VIRT_PLIC,
+    VIRT_APLIC_M,
+    VIRT_APLIC_S,
+    VIRT_UART0,
+    VIRT_VIRTIO,
+    VIRT_FW_CFG,
+    VIRT_IMSIC_M,
+    VIRT_IMSIC_S,
+    VIRT_FLASH,
+    VIRT_DRAM,
+    VIRT_PCIE_MMIO,
+    VIRT_PCIE_PIO,
+    VIRT_PLATFORM_BUS,
+    VIRT_PCIE_ECAM
+};
+
+enum {
+    UART0_IRQ = 10,
+    RTC_IRQ = 11,
+    VIRTIO_IRQ = 1, /* 1 to 8 */
+    VIRTIO_COUNT = 8,
+    PCIE_IRQ = 0x20, /* 32 to 35 */
+    VIRT_PLATFORM_BUS_IRQ = 64, /* 64 to 95 */
+};
+
+#define VIRT_PLATFORM_BUS_NUM_IRQS 32
+
+#define VIRT_IRQCHIP_NUM_MSIS 255
+#define VIRT_IRQCHIP_NUM_SOURCES 96
+#define VIRT_IRQCHIP_NUM_PRIO_BITS 3
+#define VIRT_IRQCHIP_MAX_GUESTS_BITS 3
+#define VIRT_IRQCHIP_MAX_GUESTS ((1U << VIRT_IRQCHIP_MAX_GUESTS_BITS) - 1U)
+
+#define VIRT_PLIC_PRIORITY_BASE 0x00
+#define VIRT_PLIC_PENDING_BASE 0x1000
+#define VIRT_PLIC_ENABLE_BASE 0x2000
+#define VIRT_PLIC_ENABLE_STRIDE 0x80
+#define VIRT_PLIC_CONTEXT_BASE 0x200000
+#define VIRT_PLIC_CONTEXT_STRIDE 0x1000
+#define VIRT_PLIC_SIZE(__num_context) \
+    (VIRT_PLIC_CONTEXT_BASE + (__num_context) * VIRT_PLIC_CONTEXT_STRIDE)
+
+#define FDT_PCI_ADDR_CELLS    3
+#define FDT_PCI_INT_CELLS     1
+#define FDT_PLIC_ADDR_CELLS   0
+#define FDT_PLIC_INT_CELLS    1
+#define FDT_APLIC_INT_CELLS   2
+#define FDT_IMSIC_INT_CELLS   0
+#define FDT_MAX_INT_CELLS     2
+#define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
+                                 1 + FDT_MAX_INT_CELLS)
+#define FDT_PLIC_INT_MAP_WIDTH  (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
+                                 1 + FDT_PLIC_INT_CELLS)
+#define FDT_APLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
+                                 1 + FDT_APLIC_INT_CELLS)
+
+bool virt_is_acpi_enabled(RISCVVirtState *s);
+void virt_acpi_setup(RISCVVirtState *vms);
+#endif
diff --git a/include/hw/rtc/allwinner-rtc.h b/include/hw/rtc/allwinner-rtc.h
new file mode 100644 (file)
index 0000000..bf41543
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * Allwinner Real Time Clock emulation
+ *
+ * Copyright (C) 2019 Niek Linnenbank <nieklinnenbank@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_MISC_ALLWINNER_RTC_H
+#define HW_MISC_ALLWINNER_RTC_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+
+/**
+ * Constants
+ * @{
+ */
+
+/** Highest register address used by RTC device */
+#define AW_RTC_REGS_MAXADDR     (0x200)
+
+/** Total number of known registers */
+#define AW_RTC_REGS_NUM         (AW_RTC_REGS_MAXADDR / sizeof(uint32_t))
+
+/** @} */
+
+/**
+ * Object model types
+ * @{
+ */
+
+/** Generic Allwinner RTC device (abstract) */
+#define TYPE_AW_RTC          "allwinner-rtc"
+
+/** Allwinner RTC sun4i family (A10, A12) */
+#define TYPE_AW_RTC_SUN4I    TYPE_AW_RTC "-sun4i"
+
+/** Allwinner RTC sun6i family and newer (A31, H2+, H3, etc) */
+#define TYPE_AW_RTC_SUN6I    TYPE_AW_RTC "-sun6i"
+
+/** Allwinner RTC sun7i family (A20) */
+#define TYPE_AW_RTC_SUN7I    TYPE_AW_RTC "-sun7i"
+
+/** @} */
+
+/**
+ * Object model macros
+ * @{
+ */
+
+OBJECT_DECLARE_TYPE(AwRtcState, AwRtcClass, AW_RTC)
+
+/** @} */
+
+/**
+ * Allwinner RTC per-object instance state.
+ */
+struct AwRtcState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    /**
+     * Actual year represented by the device when year counter is zero
+     *
+     * Can be overridden by the user using the corresponding 'base-year'
+     * property. The base year used by the target OS driver can vary, for
+     * example the Linux driver for sun6i uses 1970 while NetBSD uses 2000.
+     */
+    int base_year;
+
+    /** Maps I/O registers in physical memory */
+    MemoryRegion iomem;
+
+    /** Array of hardware registers */
+    uint32_t regs[AW_RTC_REGS_NUM];
+
+};
+
+/**
+ * Allwinner RTC class-level struct.
+ *
+ * This struct is filled by each sunxi device specific code
+ * such that the generic code can use this struct to support
+ * all devices.
+ */
+struct AwRtcClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+    /*< public >*/
+
+    /** Defines device specific register map */
+    const uint8_t *regmap;
+
+    /** Size of the regmap in bytes */
+    size_t regmap_size;
+
+    /**
+     * Read device specific register
+     *
+     * @offset: register offset to read
+     * @return true if register read successful, false otherwise
+     */
+    bool (*read)(AwRtcState *s, uint32_t offset);
+
+    /**
+     * Write device specific register
+     *
+     * @offset: register offset to write
+     * @data: value to set in register
+     * @return true if register write successful, false otherwise
+     */
+    bool (*write)(AwRtcState *s, uint32_t offset, uint32_t data);
+
+};
+
+#endif /* HW_MISC_ALLWINNER_RTC_H */
diff --git a/include/hw/rtc/aspeed_rtc.h b/include/hw/rtc/aspeed_rtc.h
new file mode 100644 (file)
index 0000000..596dfeb
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * ASPEED Real Time Clock
+ * Joel Stanley <joel@jms.id.au>
+ *
+ * Copyright 2019 IBM Corp
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef HW_RTC_ASPEED_RTC_H
+#define HW_RTC_ASPEED_RTC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+struct AspeedRtcState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    uint32_t reg[0x18];
+    int64_t offset;
+
+};
+
+#define TYPE_ASPEED_RTC "aspeed.rtc"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedRtcState, ASPEED_RTC)
+
+#endif /* HW_RTC_ASPEED_RTC_H */
diff --git a/include/hw/rtc/goldfish_rtc.h b/include/hw/rtc/goldfish_rtc.h
new file mode 100644 (file)
index 0000000..162be33
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Goldfish virtual platform RTC
+ *
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * For more details on Google Goldfish virtual platform refer:
+ * https://android.googlesource.com/platform/external/qemu/+/master/docs/GOLDFISH-VIRTUAL-HARDWARE.TXT
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RTC_GOLDFISH_RTC_H
+#define HW_RTC_GOLDFISH_RTC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_GOLDFISH_RTC "goldfish_rtc"
+OBJECT_DECLARE_SIMPLE_TYPE(GoldfishRTCState, GOLDFISH_RTC)
+
+struct GoldfishRTCState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    QEMUTimer *timer;
+    qemu_irq irq;
+
+    uint64_t tick_offset;
+    uint64_t tick_offset_vmstate;
+    uint64_t alarm_next;
+    uint32_t alarm_running;
+    uint32_t irq_pending;
+    uint32_t irq_enabled;
+    uint32_t time_high;
+
+    bool big_endian;
+};
+
+#endif
diff --git a/include/hw/rtc/m48t59.h b/include/hw/rtc/m48t59.h
new file mode 100644 (file)
index 0000000..c149374
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * QEMU M48T59 and M48T08 NVRAM emulation
+ *
+ * Copyright (c) 2003-2005, 2007 Jocelyn Mayer
+ * Copyright (c) 2013 Hervé Poussineau
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_RTC_M48T59_H
+#define HW_RTC_M48T59_H
+
+#include "exec/hwaddr.h"
+#include "qom/object.h"
+
+#define TYPE_NVRAM "nvram"
+
+typedef struct NvramClass NvramClass;
+DECLARE_CLASS_CHECKERS(NvramClass, NVRAM,
+                       TYPE_NVRAM)
+#define NVRAM(obj) \
+    INTERFACE_CHECK(Nvram, (obj), TYPE_NVRAM)
+
+typedef struct Nvram Nvram;
+
+struct NvramClass {
+    InterfaceClass parent;
+
+    uint32_t (*read)(Nvram *obj, uint32_t addr);
+    void (*write)(Nvram *obj, uint32_t addr, uint32_t val);
+    void (*toggle_lock)(Nvram *obj, int lock);
+};
+
+#endif /* HW_RTC_M48T59_H */
diff --git a/include/hw/rtc/mc146818rtc.h b/include/hw/rtc/mc146818rtc.h
new file mode 100644 (file)
index 0000000..97cec0b
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * QEMU MC146818 RTC emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef HW_RTC_MC146818RTC_H
+#define HW_RTC_MC146818RTC_H
+
+#include "qapi/qapi-types-machine.h"
+#include "qemu/queue.h"
+#include "qemu/timer.h"
+#include "hw/isa/isa.h"
+#include "qom/object.h"
+
+#define TYPE_MC146818_RTC "mc146818rtc"
+OBJECT_DECLARE_SIMPLE_TYPE(MC146818RtcState, MC146818_RTC)
+
+struct MC146818RtcState {
+    ISADevice parent_obj;
+
+    MemoryRegion io;
+    MemoryRegion coalesced_io;
+    uint8_t cmos_data[128];
+    uint8_t cmos_index;
+    uint8_t isairq;
+    uint16_t io_base;
+    int32_t base_year;
+    uint64_t base_rtc;
+    uint64_t last_update;
+    int64_t offset;
+    qemu_irq irq;
+    int it_shift;
+    /* periodic timer */
+    QEMUTimer *periodic_timer;
+    int64_t next_periodic_time;
+    /* update-ended timer */
+    QEMUTimer *update_timer;
+    uint64_t next_alarm_time;
+    uint16_t irq_reinject_on_ack_count;
+    uint32_t irq_coalesced;
+    uint32_t period;
+    QEMUTimer *coalesced_timer;
+    Notifier clock_reset_notifier;
+    LostTickPolicy lost_tick_policy;
+    Notifier suspend_notifier;
+    QLIST_ENTRY(MC146818RtcState) link;
+};
+
+#define RTC_ISA_IRQ 8
+
+MC146818RtcState *mc146818_rtc_init(ISABus *bus, int base_year,
+                                    qemu_irq intercept_irq);
+void mc146818rtc_set_cmos_data(MC146818RtcState *s, int addr, int val);
+int mc146818rtc_get_cmos_data(MC146818RtcState *s, int addr);
+void qmp_rtc_reset_reinjection(Error **errp);
+
+#endif /* HW_RTC_MC146818RTC_H */
diff --git a/include/hw/rtc/mc146818rtc_regs.h b/include/hw/rtc/mc146818rtc_regs.h
new file mode 100644 (file)
index 0000000..12197e0
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * QEMU MC146818 RTC emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_RTC_MC146818RTC_REGS_H
+#define HW_RTC_MC146818RTC_REGS_H
+
+#include "qemu/timer.h"
+#include "qemu/host-utils.h"
+
+#define RTC_SECONDS             0
+#define RTC_SECONDS_ALARM       1
+#define RTC_MINUTES             2
+#define RTC_MINUTES_ALARM       3
+#define RTC_HOURS               4
+#define RTC_HOURS_ALARM         5
+#define RTC_ALARM_DONT_CARE    0xC0
+
+#define RTC_DAY_OF_WEEK         6
+#define RTC_DAY_OF_MONTH        7
+#define RTC_MONTH               8
+#define RTC_YEAR                9
+
+#define RTC_REG_A               10
+#define RTC_REG_B               11
+#define RTC_REG_C               12
+#define RTC_REG_D               13
+
+/* PC cmos mappings */
+#define RTC_CENTURY              0x32
+#define RTC_IBM_PS2_CENTURY_BYTE 0x37
+
+#define REG_A_UIP 0x80
+
+#define REG_B_SET  0x80
+#define REG_B_PIE  0x40
+#define REG_B_AIE  0x20
+#define REG_B_UIE  0x10
+#define REG_B_SQWE 0x08
+#define REG_B_DM   0x04
+#define REG_B_24H  0x02
+
+#define REG_C_UF   0x10
+#define REG_C_IRQF 0x80
+#define REG_C_PF   0x40
+#define REG_C_AF   0x20
+#define REG_C_MASK 0x70
+
+static inline uint32_t periodic_period_to_clock(int period_code)
+{
+    if (!period_code) {
+        return 0;
+   }
+
+    if (period_code <= 2) {
+        period_code += 7;
+    }
+    /* period in 32 Khz cycles */
+   return 1 << (period_code - 1);
+}
+
+#define RTC_CLOCK_RATE            32768
+
+static inline int64_t periodic_clock_to_ns(int64_t clocks)
+{
+    return muldiv64(clocks, NANOSECONDS_PER_SECOND, RTC_CLOCK_RATE);
+}
+
+#endif
diff --git a/include/hw/rtc/pl031.h b/include/hw/rtc/pl031.h
new file mode 100644 (file)
index 0000000..9fd4be1
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * ARM AMBA PrimeCell PL031 RTC
+ *
+ * Copyright (c) 2007 CodeSourcery
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef HW_RTC_PL031_H
+#define HW_RTC_PL031_H
+
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+
+#define TYPE_PL031 "pl031"
+OBJECT_DECLARE_SIMPLE_TYPE(PL031State, PL031)
+
+struct PL031State {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    QEMUTimer *timer;
+    qemu_irq irq;
+
+    /*
+     * Needed to preserve the tick_count across migration, even if the
+     * absolute value of the rtc_clock is different on the source and
+     * destination.
+     */
+    uint32_t tick_offset_vmstate;
+    uint32_t tick_offset;
+    bool tick_offset_migrated;
+    bool migrate_tick_offset;
+
+    uint32_t mr;
+    uint32_t lr;
+    uint32_t cr;
+    uint32_t im;
+    uint32_t is;
+};
+
+#endif
diff --git a/include/hw/rtc/sun4v-rtc.h b/include/hw/rtc/sun4v-rtc.h
new file mode 100644 (file)
index 0000000..fc54dfc
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * QEMU sun4v Real Time Clock device
+ *
+ * The sun4v_rtc device (sun4v tod clock)
+ *
+ * Copyright (c) 2016 Artyom Tarasenko
+ *
+ * This code is licensed under the GNU GPL v3 or (at your option) any later
+ * version.
+ */
+
+#ifndef HW_RTC_SUN4V_RTC_H
+#define HW_RTC_SUN4V_RTC_H
+
+#include "exec/hwaddr.h"
+
+void sun4v_rtc_init(hwaddr addr);
+
+#endif
diff --git a/include/hw/rtc/xlnx-zynqmp-rtc.h b/include/hw/rtc/xlnx-zynqmp-rtc.h
new file mode 100644 (file)
index 0000000..f0c6a2d
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * QEMU model of the Xilinx ZynqMP Real Time Clock (RTC).
+ *
+ * Copyright (c) 2017 Xilinx Inc.
+ *
+ * Written-by: Alistair Francis
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_RTC_XLNX_ZYNQMP_RTC_H
+#define HW_RTC_XLNX_ZYNQMP_RTC_H
+
+#include "hw/register.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_XLNX_ZYNQMP_RTC "xlnx-zynmp.rtc"
+
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPRTC, XLNX_ZYNQMP_RTC)
+
+REG32(SET_TIME_WRITE, 0x0)
+REG32(SET_TIME_READ, 0x4)
+REG32(CALIB_WRITE, 0x8)
+    FIELD(CALIB_WRITE, FRACTION_EN, 20, 1)
+    FIELD(CALIB_WRITE, FRACTION_DATA, 16, 4)
+    FIELD(CALIB_WRITE, MAX_TICK, 0, 16)
+REG32(CALIB_READ, 0xc)
+    FIELD(CALIB_READ, FRACTION_EN, 20, 1)
+    FIELD(CALIB_READ, FRACTION_DATA, 16, 4)
+    FIELD(CALIB_READ, MAX_TICK, 0, 16)
+REG32(CURRENT_TIME, 0x10)
+REG32(CURRENT_TICK, 0x14)
+    FIELD(CURRENT_TICK, VALUE, 0, 16)
+REG32(ALARM, 0x18)
+REG32(RTC_INT_STATUS, 0x20)
+    FIELD(RTC_INT_STATUS, ALARM, 1, 1)
+    FIELD(RTC_INT_STATUS, SECONDS, 0, 1)
+REG32(RTC_INT_MASK, 0x24)
+    FIELD(RTC_INT_MASK, ALARM, 1, 1)
+    FIELD(RTC_INT_MASK, SECONDS, 0, 1)
+REG32(RTC_INT_EN, 0x28)
+    FIELD(RTC_INT_EN, ALARM, 1, 1)
+    FIELD(RTC_INT_EN, SECONDS, 0, 1)
+REG32(RTC_INT_DIS, 0x2c)
+    FIELD(RTC_INT_DIS, ALARM, 1, 1)
+    FIELD(RTC_INT_DIS, SECONDS, 0, 1)
+REG32(ADDR_ERROR, 0x30)
+    FIELD(ADDR_ERROR, STATUS, 0, 1)
+REG32(ADDR_ERROR_INT_MASK, 0x34)
+    FIELD(ADDR_ERROR_INT_MASK, MASK, 0, 1)
+REG32(ADDR_ERROR_INT_EN, 0x38)
+    FIELD(ADDR_ERROR_INT_EN, MASK, 0, 1)
+REG32(ADDR_ERROR_INT_DIS, 0x3c)
+    FIELD(ADDR_ERROR_INT_DIS, MASK, 0, 1)
+REG32(CONTROL, 0x40)
+    FIELD(CONTROL, BATTERY_DISABLE, 31, 1)
+    FIELD(CONTROL, OSC_CNTRL, 24, 4)
+    FIELD(CONTROL, SLVERR_ENABLE, 0, 1)
+REG32(SAFETY_CHK, 0x50)
+
+#define XLNX_ZYNQMP_RTC_R_MAX (R_SAFETY_CHK + 1)
+
+struct XlnxZynqMPRTC {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    qemu_irq irq_rtc_int;
+    qemu_irq irq_addr_error_int;
+
+    uint32_t tick_offset;
+
+    uint32_t regs[XLNX_ZYNQMP_RTC_R_MAX];
+    RegisterInfo regs_info[XLNX_ZYNQMP_RTC_R_MAX];
+};
+
+#endif
diff --git a/include/hw/rx/rx62n.h b/include/hw/rx/rx62n.h
new file mode 100644 (file)
index 0000000..73ceeb5
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * RX62N MCU Object
+ *
+ * Datasheet: RX62N Group, RX621 Group User's Manual: Hardware
+ *            (Rev.1.40 R01UH0033EJ0140)
+ *
+ * Copyright (c) 2019 Yoshinori Sato
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RX_RX62N_H
+#define HW_RX_RX62N_H
+
+#include "target/rx/cpu.h"
+#include "hw/intc/rx_icu.h"
+#include "hw/timer/renesas_tmr.h"
+#include "hw/timer/renesas_cmt.h"
+#include "hw/char/renesas_sci.h"
+#include "qemu/units.h"
+#include "qom/object.h"
+
+#define TYPE_RX62N_MCU "rx62n-mcu"
+typedef struct RX62NState RX62NState;
+DECLARE_INSTANCE_CHECKER(RX62NState, RX62N_MCU,
+                         TYPE_RX62N_MCU)
+
+#define TYPE_R5F562N7_MCU "r5f562n7-mcu"
+#define TYPE_R5F562N8_MCU "r5f562n8-mcu"
+
+#define EXT_CS_BASE         0x01000000
+#define VECTOR_TABLE_BASE   0xffffff80
+#define RX62N_CFLASH_BASE   0xfff80000
+
+#define RX62N_NR_TMR    2
+#define RX62N_NR_CMT    2
+#define RX62N_NR_SCI    6
+
+struct RX62NState {
+    /*< private >*/
+    DeviceState parent_obj;
+    /*< public >*/
+
+    RXCPU cpu;
+    RXICUState icu;
+    RTMRState tmr[RX62N_NR_TMR];
+    RCMTState cmt[RX62N_NR_CMT];
+    RSCIState sci[RX62N_NR_SCI];
+
+    MemoryRegion *sysmem;
+    bool kernel;
+
+    MemoryRegion iram;
+    MemoryRegion iomem1;
+    MemoryRegion d_flash;
+    MemoryRegion iomem2;
+    MemoryRegion iomem3;
+    MemoryRegion c_flash;
+    qemu_irq irq[NR_IRQS];
+
+    /* Input Clock (XTAL) frequency */
+    uint32_t xtal_freq_hz;
+    /* Peripheral Module Clock frequency */
+    uint32_t pclk_freq_hz;
+};
+
+#endif
diff --git a/include/hw/s390x/3270-ccw.h b/include/hw/s390x/3270-ccw.h
new file mode 100644 (file)
index 0000000..1439882
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Emulated ccw-attached 3270 definitions
+ *
+ * Copyright 2017 IBM Corp.
+ * Author(s): Yang Chen <bjcyang@linux.vnet.ibm.com>
+ *            Jing Liu <liujbjl@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390X_3270_CCW_H
+#define HW_S390X_3270_CCW_H
+
+#include "hw/sysbus.h"
+#include "hw/s390x/css.h"
+#include "hw/s390x/ccw-device.h"
+#include "qom/object.h"
+
+#define EMULATED_CCW_3270_CU_TYPE 0x3270
+#define EMULATED_CCW_3270_CHPID_TYPE 0x1a
+
+#define TYPE_EMULATED_CCW_3270 "emulated-ccw-3270"
+
+/* Local Channel Commands */
+#define TC_WRITE   0x01         /* Write */
+#define TC_RDBUF   0x02         /* Read buffer */
+#define TC_EWRITE  0x05         /* Erase write */
+#define TC_READMOD 0x06         /* Read modified */
+#define TC_EWRITEA 0x0d         /* Erase write alternate */
+#define TC_WRITESF 0x11         /* Write structured field */
+
+OBJECT_DECLARE_TYPE(EmulatedCcw3270Device, EmulatedCcw3270Class, EMULATED_CCW_3270)
+
+struct EmulatedCcw3270Device {
+    CcwDevice parent_obj;
+};
+
+struct EmulatedCcw3270Class {
+    CCWDeviceClass parent_class;
+
+    void (*init)(EmulatedCcw3270Device *, Error **);
+    int (*read_payload_3270)(EmulatedCcw3270Device *);
+    int (*write_payload_3270)(EmulatedCcw3270Device *, uint8_t);
+};
+
+#endif
diff --git a/include/hw/s390x/adapter.h b/include/hw/s390x/adapter.h
new file mode 100644 (file)
index 0000000..7f17035
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * s390 adapter definitions
+ *
+ * Copyright 2013,2014 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef S390X_ADAPTER_H
+#define S390X_ADAPTER_H
+
+struct AdapterInfo {
+    uint64_t ind_addr;
+    uint64_t summary_addr;
+    uint64_t ind_offset;
+    uint32_t summary_offset;
+    uint32_t adapter_id;
+};
+
+#endif
diff --git a/include/hw/s390x/ap-bridge.h b/include/hw/s390x/ap-bridge.h
new file mode 100644 (file)
index 0000000..470e439
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * ap bridge
+ *
+ * Copyright 2018 IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390X_AP_BRIDGE_H
+#define HW_S390X_AP_BRIDGE_H
+
+#define TYPE_AP_BRIDGE "ap-bridge"
+#define TYPE_AP_BUS "ap-bus"
+
+void s390_init_ap(void);
+
+#endif
diff --git a/include/hw/s390x/ap-device.h b/include/hw/s390x/ap-device.h
new file mode 100644 (file)
index 0000000..e502745
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Adjunct Processor (AP) matrix device interfaces
+ *
+ * Copyright 2018 IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390X_AP_DEVICE_H
+#define HW_S390X_AP_DEVICE_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+#define TYPE_AP_DEVICE       "ap-device"
+
+struct APDevice {
+    DeviceState parent_obj;
+};
+typedef struct APDevice APDevice;
+
+DECLARE_INSTANCE_CHECKER(APDevice, AP_DEVICE,
+                         TYPE_AP_DEVICE)
+
+#endif /* HW_S390X_AP_DEVICE_H */
diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
new file mode 100644 (file)
index 0000000..c064f42
--- /dev/null
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CPU Topology
+ *
+ * Copyright IBM Corp. 2022, 2023
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+#ifndef HW_S390X_CPU_TOPOLOGY_H
+#define HW_S390X_CPU_TOPOLOGY_H
+
+#ifndef CONFIG_USER_ONLY
+
+#include "qemu/queue.h"
+#include "hw/boards.h"
+#include "qapi/qapi-types-machine-target.h"
+
+#define S390_TOPOLOGY_CPU_IFL   0x03
+
+typedef struct S390TopologyId {
+    uint8_t sentinel;
+    uint8_t drawer;
+    uint8_t book;
+    uint8_t socket;
+    uint8_t type;
+    uint8_t vertical:1;
+    uint8_t entitlement:2;
+    uint8_t dedicated;
+    uint8_t origin;
+} S390TopologyId;
+
+typedef struct S390TopologyEntry {
+    QTAILQ_ENTRY(S390TopologyEntry) next;
+    S390TopologyId id;
+    uint64_t mask;
+} S390TopologyEntry;
+
+typedef struct S390Topology {
+    uint8_t *cores_per_socket;
+    CpuS390Polarization polarization;
+} S390Topology;
+
+typedef QTAILQ_HEAD(, S390TopologyEntry) S390TopologyList;
+
+#ifdef CONFIG_KVM
+bool s390_has_topology(void);
+void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp);
+void s390_topology_reset(void);
+#else
+static inline bool s390_has_topology(void)
+{
+    return false;
+}
+static inline void s390_topology_setup_cpu(MachineState *ms,
+                                           S390CPU *cpu,
+                                           Error **errp) {}
+static inline void s390_topology_reset(void)
+{
+    /* Unreachable, CPU topology not implemented for TCG */
+    assert(false);
+}
+#endif
+
+extern S390Topology s390_topology;
+
+static inline int s390_std_socket(int n, CpuTopology *smp)
+{
+    return (n / smp->cores) % smp->sockets;
+}
+
+static inline int s390_std_book(int n, CpuTopology *smp)
+{
+    return (n / (smp->cores * smp->sockets)) % smp->books;
+}
+
+static inline int s390_std_drawer(int n, CpuTopology *smp)
+{
+    return (n / (smp->cores * smp->sockets * smp->books)) % smp->drawers;
+}
+
+#endif /* CONFIG_USER_ONLY */
+
+#endif
diff --git a/include/hw/s390x/css-bridge.h b/include/hw/s390x/css-bridge.h
new file mode 100644 (file)
index 0000000..deb606d
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * virtual css bridge definition
+ *
+ * Copyright 2012,2016 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *            Pierre Morel <pmorel@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390X_CSS_BRIDGE_H
+#define HW_S390X_CSS_BRIDGE_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+
+/* virtual css bridge */
+struct VirtualCssBridge {
+    SysBusDevice sysbus_dev;
+    bool css_dev_path;
+};
+
+#define TYPE_VIRTUAL_CSS_BRIDGE "virtual-css-bridge"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtualCssBridge, VIRTUAL_CSS_BRIDGE)
+
+/* virtual css bus type */
+struct VirtualCssBus {
+    BusState parent_obj;
+};
+
+#define TYPE_VIRTUAL_CSS_BUS "virtual-css-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtualCssBus, VIRTUAL_CSS_BUS)
+VirtualCssBus *virtual_css_bus_init(void);
+
+#endif
diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
new file mode 100644 (file)
index 0000000..ba72ee3
--- /dev/null
@@ -0,0 +1,336 @@
+/*
+ * Channel subsystem structures and definitions.
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef CSS_H
+#define CSS_H
+
+#include "hw/s390x/adapter.h"
+#include "hw/s390x/s390_flic.h"
+#include "hw/s390x/ioinst.h"
+#include "sysemu/kvm.h"
+#include "target/s390x/cpu-qom.h"
+
+/* Channel subsystem constants. */
+#define MAX_DEVNO 65535
+#define MAX_SCHID 65535
+#define MAX_SSID 3
+#define MAX_CSSID 255
+#define MAX_CHPID 255
+
+#define MAX_ISC 7
+
+#define MAX_CIWS 62
+
+#define VIRTUAL_CSSID 0xfe
+#define VIRTIO_CCW_CHPID 0   /* used by convention */
+
+typedef struct CIW {
+    uint8_t type;
+    uint8_t command;
+    uint16_t count;
+} QEMU_PACKED CIW;
+
+typedef struct SenseId {
+    /* common part */
+    uint8_t reserved;        /* always 0x'FF' */
+    uint16_t cu_type;        /* control unit type */
+    uint8_t cu_model;        /* control unit model */
+    uint16_t dev_type;       /* device type */
+    uint8_t dev_model;       /* device model */
+    uint8_t unused;          /* padding byte */
+    /* extended part */
+    CIW ciw[MAX_CIWS];       /* variable # of CIWs */
+} SenseId;                   /* Note: No QEMU_PACKED due to unaligned members */
+
+/* Channel measurements, from linux/drivers/s390/cio/cmf.c. */
+typedef struct CMB {
+    uint16_t ssch_rsch_count;
+    uint16_t sample_count;
+    uint32_t device_connect_time;
+    uint32_t function_pending_time;
+    uint32_t device_disconnect_time;
+    uint32_t control_unit_queuing_time;
+    uint32_t device_active_only_time;
+    uint32_t reserved[2];
+} QEMU_PACKED CMB;
+
+typedef struct CMBE {
+    uint32_t ssch_rsch_count;
+    uint32_t sample_count;
+    uint32_t device_connect_time;
+    uint32_t function_pending_time;
+    uint32_t device_disconnect_time;
+    uint32_t control_unit_queuing_time;
+    uint32_t device_active_only_time;
+    uint32_t device_busy_time;
+    uint32_t initial_command_response_time;
+    uint32_t reserved[7];
+} QEMU_PACKED CMBE;
+
+typedef enum CcwDataStreamOp {
+    CDS_OP_R = 0, /* read, false when used as is_write */
+    CDS_OP_W = 1, /* write, true when used as is_write */
+    CDS_OP_A = 2  /* advance, should not be used as is_write */
+} CcwDataStreamOp;
+
+/* normal usage is via SuchchDev.cds instead of instantiating */
+typedef struct CcwDataStream {
+#define CDS_F_IDA   0x01
+#define CDS_F_MIDA  0x02
+#define CDS_F_I2K   0x04
+#define CDS_F_C64   0x08
+#define CDS_F_FMT   0x10 /* CCW format-1 */
+#define CDS_F_STREAM_BROKEN  0x80
+    uint8_t flags;
+    uint8_t at_idaw;
+    uint16_t at_byte;
+    uint16_t count;
+    uint32_t cda_orig;
+    int (*op_handler)(struct CcwDataStream *cds, void *buff, int len,
+                      CcwDataStreamOp op);
+    hwaddr cda;
+    bool do_skip;
+} CcwDataStream;
+
+/*
+ * IO instructions conclude according to this. Currently we have only
+ * cc codes. Valid values are 0, 1, 2, 3 and the generic semantic for
+ * IO instructions is described briefly. For more details consult the PoP.
+ */
+typedef enum IOInstEnding {
+    /* produced expected result */
+    IOINST_CC_EXPECTED = 0,
+    /* status conditions were present or produced alternate result */
+    IOINST_CC_STATUS_PRESENT = 1,
+    /* inst. ineffective because busy with previously initiated function */
+    IOINST_CC_BUSY = 2,
+    /* inst. ineffective because not operational */
+    IOINST_CC_NOT_OPERATIONAL = 3
+} IOInstEnding;
+
+typedef struct SubchDev SubchDev;
+struct SubchDev {
+    /* channel-subsystem related things: */
+    SCHIB curr_status;           /* Needs alignment and thus must come first */
+    ORB orb;
+    uint8_t cssid;
+    uint8_t ssid;
+    uint16_t schid;
+    uint16_t devno;
+    uint8_t sense_data[32];
+    hwaddr channel_prog;
+    CCW1 last_cmd;
+    bool last_cmd_valid;
+    bool ccw_fmt_1;
+    bool thinint_active;
+    uint8_t ccw_no_data_cnt;
+    uint16_t migrated_schid; /* used for mismatch detection */
+    CcwDataStream cds;
+    /* transport-provided data: */
+    int (*ccw_cb) (SubchDev *, CCW1);
+    void (*disable_cb)(SubchDev *);
+    IOInstEnding (*do_subchannel_work) (SubchDev *);
+    void (*irb_cb)(SubchDev *, IRB *);
+    SenseId id;
+    void *driver_data;
+    ESW esw;
+};
+
+static inline void sch_gen_unit_exception(SubchDev *sch)
+{
+    sch->curr_status.scsw.ctrl &= ~(SCSW_ACTL_DEVICE_ACTIVE |
+                                    SCSW_ACTL_SUBCH_ACTIVE);
+    sch->curr_status.scsw.ctrl |= SCSW_STCTL_PRIMARY |
+                                  SCSW_STCTL_SECONDARY |
+                                  SCSW_STCTL_ALERT |
+                                  SCSW_STCTL_STATUS_PEND;
+    sch->curr_status.scsw.cpa = sch->channel_prog + 8;
+    sch->curr_status.scsw.dstat =  SCSW_DSTAT_UNIT_EXCEP;
+}
+
+extern const VMStateDescription vmstate_subch_dev;
+
+/*
+ * Identify a device within the channel subsystem.
+ * Note that this can be used to identify either the subchannel or
+ * the attached I/O device, as there's always one I/O device per
+ * subchannel.
+ */
+typedef struct CssDevId {
+    uint8_t cssid;
+    uint8_t ssid;
+    uint16_t devid;
+    bool valid;
+} CssDevId;
+
+extern const PropertyInfo css_devid_propinfo;
+
+#define DEFINE_PROP_CSS_DEV_ID(_n, _s, _f) \
+    DEFINE_PROP(_n, _s, _f, css_devid_propinfo, CssDevId)
+
+typedef struct IndAddr {
+    hwaddr addr;
+    uint64_t map;
+    unsigned long refcnt;
+    int32_t len;
+    QTAILQ_ENTRY(IndAddr) sibling;
+} IndAddr;
+
+extern const VMStateDescription vmstate_ind_addr;
+
+#define VMSTATE_PTR_TO_IND_ADDR(_f, _s)                                   \
+    VMSTATE_STRUCT(_f, _s, 1, vmstate_ind_addr, IndAddr*)
+
+IndAddr *get_indicator(hwaddr ind_addr, int len);
+void release_indicator(AdapterInfo *adapter, IndAddr *indicator);
+int map_indicator(AdapterInfo *adapter, IndAddr *indicator);
+
+typedef SubchDev *(*css_subch_cb_func)(uint8_t m, uint8_t cssid, uint8_t ssid,
+                                       uint16_t schid);
+int css_create_css_image(uint8_t cssid, bool default_image);
+bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno);
+void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid,
+                      uint16_t devno, SubchDev *sch);
+void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type);
+int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id);
+unsigned int css_find_free_chpid(uint8_t cssid);
+uint16_t css_build_subchannel_id(SubchDev *sch);
+void copy_scsw_to_guest(SCSW *dest, const SCSW *src);
+void copy_esw_to_guest(ESW *dest, const ESW *src);
+void css_inject_io_interrupt(SubchDev *sch);
+void css_reset(void);
+void css_reset_sch(SubchDev *sch);
+void css_crw_add_to_queue(CRW crw);
+void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited,
+                   int chain, uint16_t rsid);
+void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
+                           int hotplugged, int add);
+void css_generate_chp_crws(uint8_t cssid, uint8_t chpid);
+void css_generate_css_crws(uint8_t cssid);
+void css_clear_sei_pending(void);
+IOInstEnding s390_ccw_cmd_request(SubchDev *sch);
+IOInstEnding do_subchannel_work_virtual(SubchDev *sub);
+IOInstEnding do_subchannel_work_passthrough(SubchDev *sub);
+void build_irb_passthrough(SubchDev *sch, IRB *irb);
+void build_irb_virtual(SubchDev *sch, IRB *irb);
+
+int s390_ccw_halt(SubchDev *sch);
+int s390_ccw_clear(SubchDev *sch);
+IOInstEnding s390_ccw_store(SubchDev *sch);
+
+typedef enum {
+    CSS_IO_ADAPTER_VIRTIO = 0,
+    CSS_IO_ADAPTER_PCI = 1,
+    CSS_IO_ADAPTER_TYPE_NUMS,
+} CssIoAdapterType;
+
+void css_adapter_interrupt(CssIoAdapterType type, uint8_t isc);
+int css_do_sic(S390CPU *cpu, uint8_t isc, uint16_t mode);
+uint32_t css_get_adapter_id(CssIoAdapterType type, uint8_t isc);
+void css_register_io_adapters(CssIoAdapterType type, bool swap, bool maskable,
+                              uint8_t flags, Error **errp);
+
+#ifndef CONFIG_USER_ONLY
+SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
+                         uint16_t schid);
+bool css_subch_visible(SubchDev *sch);
+void css_conditional_io_interrupt(SubchDev *sch);
+IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib);
+bool css_schid_final(int m, uint8_t cssid, uint8_t ssid, uint16_t schid);
+IOInstEnding css_do_msch(SubchDev *sch, const SCHIB *schib);
+IOInstEnding css_do_xsch(SubchDev *sch);
+IOInstEnding css_do_csch(SubchDev *sch);
+IOInstEnding css_do_hsch(SubchDev *sch);
+IOInstEnding css_do_ssch(SubchDev *sch, ORB *orb);
+int css_do_tsch_get_irb(SubchDev *sch, IRB *irb, int *irb_len);
+void css_do_tsch_update_subch(SubchDev *sch);
+int css_do_stcrw(CRW *crw);
+void css_undo_stcrw(CRW *crw);
+int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid,
+                         int rfmt, void *buf);
+void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo);
+int css_enable_mcsse(void);
+int css_enable_mss(void);
+IOInstEnding css_do_rsch(SubchDev *sch);
+int css_do_rchp(uint8_t cssid, uint8_t chpid);
+bool css_present(uint8_t cssid);
+#endif
+
+extern const PropertyInfo css_devid_ro_propinfo;
+
+#define DEFINE_PROP_CSS_DEV_ID_RO(_n, _s, _f) \
+    DEFINE_PROP(_n, _s, _f, css_devid_ro_propinfo, CssDevId)
+
+/**
+ * Create a subchannel for the given bus id.
+ *
+ * If @p bus_id is valid, verify that it is not already in use, and find a
+ * free devno for it.
+ * If @p bus_id is not valid find a free subchannel id and device number
+ * across all subchannel sets and all css images starting from the default
+ * css image.
+ *
+ * If either of the former actions succeed, allocate a subchannel structure,
+ * initialise it with the bus id, subchannel id and device number, register
+ * it with the CSS and return it. Otherwise return NULL.
+ *
+ * The caller becomes owner of the returned subchannel structure and
+ * is responsible for unregistering and freeing it.
+ */
+SubchDev *css_create_sch(CssDevId bus_id, Error **errp);
+
+/** Turn on css migration */
+void css_register_vmstate(void);
+
+
+void ccw_dstream_init(CcwDataStream *cds, CCW1 const *ccw, ORB const *orb);
+
+static inline void ccw_dstream_rewind(CcwDataStream *cds)
+{
+    cds->at_byte = 0;
+    cds->at_idaw = 0;
+    cds->cda = cds->cda_orig;
+}
+
+static inline bool ccw_dstream_good(CcwDataStream *cds)
+{
+    return !(cds->flags & CDS_F_STREAM_BROKEN);
+}
+
+static inline uint16_t ccw_dstream_residual_count(CcwDataStream *cds)
+{
+    return cds->count - cds->at_byte;
+}
+
+static inline uint16_t ccw_dstream_avail(CcwDataStream *cds)
+{
+    return ccw_dstream_good(cds) ? ccw_dstream_residual_count(cds) : 0;
+}
+
+static inline int ccw_dstream_advance(CcwDataStream *cds, int len)
+{
+    return cds->op_handler(cds, NULL, len, CDS_OP_A);
+}
+
+static inline int ccw_dstream_write_buf(CcwDataStream *cds, void *buff, int len)
+{
+    return cds->op_handler(cds, buff, len, CDS_OP_W);
+}
+
+static inline int ccw_dstream_read_buf(CcwDataStream *cds, void *buff, int len)
+{
+    return cds->op_handler(cds, buff, len, CDS_OP_R);
+}
+
+#define ccw_dstream_read(cds, v) ccw_dstream_read_buf((cds), &(v), sizeof(v))
+#define ccw_dstream_write(cds, v) ccw_dstream_write_buf((cds), &(v), sizeof(v))
+
+#endif
diff --git a/include/hw/s390x/ebcdic.h b/include/hw/s390x/ebcdic.h
new file mode 100644 (file)
index 0000000..69a04ca
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * EBCDIC/ASCII conversion Support
+ *
+ * Copyright (c) 2011 Alexander Graf
+ * Copyright IBM, Corp. 2013
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef EBCDIC_H
+#define EBCDIC_H
+
+/* EBCDIC handling */
+static const uint8_t ebcdic2ascii[] = {
+    0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+    0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+    0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+    0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+    0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+    0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+    0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+    0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+    0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+    0x87, 0xA4, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21,
+    0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+    0x8D, 0xE1, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
+    0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+    0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+    0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+    0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+    0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+    0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+    0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+    0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+    0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+    0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+    0x9B, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+    0xAB, 0x07, 0xAA, 0x7C, 0x07, 0x07, 0x07, 0x07,
+    0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+    0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+    0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+    0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+    0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+    0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+    0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07,
+};
+
+static const uint8_t ascii2ebcdic[] = {
+    0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F,
+    0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+    0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26,
+    0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F,
+    0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D,
+    0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+    0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+    0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+    0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+    0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6,
+    0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
+    0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+    0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,
+    0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+    0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF
+};
+
+static inline void ebcdic_put(uint8_t *p, const char *ascii, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++) {
+        p[i] = ascii2ebcdic[(uint8_t)ascii[i]];
+    }
+}
+
+static inline void ascii_put(uint8_t *p, const char *ebcdic, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++) {
+        p[i] = ebcdic2ascii[(uint8_t)ebcdic[i]];
+    }
+}
+
+#endif /* EBCDIC_H */
diff --git a/include/hw/s390x/event-facility.h b/include/hw/s390x/event-facility.h
new file mode 100644 (file)
index 0000000..3ffd575
--- /dev/null
@@ -0,0 +1,208 @@
+/*
+ * SCLP
+ *    Event Facility definitions
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Heinz Graalfs <graalfs@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_S390_SCLP_EVENT_FACILITY_H
+#define HW_S390_SCLP_EVENT_FACILITY_H
+
+#include "qemu/thread.h"
+#include "hw/qdev-core.h"
+#include "hw/s390x/sclp.h"
+#include "qom/object.h"
+
+/* SCLP event types */
+#define SCLP_EVENT_OPRTNS_COMMAND               0x01
+#define SCLP_EVENT_MESSAGE                      0x02
+#define SCLP_EVENT_CONFIG_MGT_DATA              0x04
+#define SCLP_EVENT_PMSGCMD                      0x09
+#define SCLP_EVENT_ASCII_CONSOLE_DATA           0x1a
+#define SCLP_EVENT_SIGNAL_QUIESCE               0x1d
+
+/* SCLP event masks */
+#define SCLP_EVMASK(T)  (1ULL << (sizeof(sccb_mask_t) * 8 - (T)))
+
+#define SCLP_EVENT_MASK_OP_CMD          SCLP_EVMASK(SCLP_EVENT_OPRTNS_COMMAND)
+#define SCLP_EVENT_MASK_MSG             SCLP_EVMASK(SCLP_EVENT_MESSAGE)
+#define SCLP_EVENT_MASK_CONFIG_MGT_DATA SCLP_EVMASK(SCLP_EVENT_CONFIG_MGT_DATA)
+#define SCLP_EVENT_MASK_PMSGCMD         SCLP_EVMASK(SCLP_EVENT_PMSGCMD)
+#define SCLP_EVENT_MASK_MSG_ASCII       SCLP_EVMASK(SCLP_EVENT_ASCII_CONSOLE_DATA)
+#define SCLP_EVENT_MASK_SIGNAL_QUIESCE  SCLP_EVMASK(SCLP_EVENT_SIGNAL_QUIESCE)
+
+#define SCLP_UNCONDITIONAL_READ                 0x00
+#define SCLP_SELECTIVE_READ                     0x01
+
+#define TYPE_SCLP_EVENT "s390-sclp-event-type"
+OBJECT_DECLARE_TYPE(SCLPEvent, SCLPEventClass,
+                    SCLP_EVENT)
+
+#define TYPE_SCLP_CPU_HOTPLUG "sclp-cpu-hotplug"
+#define TYPE_SCLP_QUIESCE "sclpquiesce"
+
+#define SCLP_EVENT_MASK_LEN_MAX 1021
+
+typedef struct WriteEventMask {
+    SCCBHeader h;
+    uint16_t _reserved;
+    uint16_t mask_length;
+    uint8_t masks[];
+/*
+ * Layout of the masks is
+ *  uint8_t cp_receive_mask[mask_length];
+ *  uint8_t cp_send_mask[mask_length];
+ *  uint8_t receive_mask[mask_length];
+ *  uint8_t send_mask[mask_length];
+ * where 1 <= mask_length <= SCLP_EVENT_MASK_LEN_MAX
+ */
+} QEMU_PACKED WriteEventMask;
+
+#define WEM_CP_RECEIVE_MASK(wem, mask_len) ((wem)->masks)
+#define WEM_CP_SEND_MASK(wem, mask_len) ((wem)->masks + (mask_len))
+#define WEM_RECEIVE_MASK(wem, mask_len) ((wem)->masks + 2 * (mask_len))
+#define WEM_SEND_MASK(wem, mask_len) ((wem)->masks + 3 * (mask_len))
+
+typedef uint64_t sccb_mask_t;
+
+typedef struct EventBufferHeader {
+    uint16_t length;
+    uint8_t  type;
+    uint8_t  flags;
+    uint16_t _reserved;
+} QEMU_PACKED EventBufferHeader;
+
+typedef struct MdbHeader {
+    uint16_t length;
+    uint16_t type;
+    uint32_t tag;
+    uint32_t revision_code;
+} QEMU_PACKED MdbHeader;
+
+typedef struct MTO {
+    uint16_t line_type_flags;
+    uint8_t  alarm_control;
+    uint8_t  _reserved[3];
+    char     message[];
+} QEMU_PACKED MTO;
+
+typedef struct GO {
+    uint32_t domid;
+    uint8_t  hhmmss_time[8];
+    uint8_t  th_time[3];
+    uint8_t  _reserved_0;
+    uint8_t  dddyyyy_date[7];
+    uint8_t  _reserved_1;
+    uint16_t general_msg_flags;
+    uint8_t  _reserved_2[10];
+    uint8_t  originating_system_name[8];
+    uint8_t  job_guest_name[8];
+} QEMU_PACKED GO;
+
+#define MESSAGE_TEXT 0x0004
+
+typedef struct MDBO {
+    uint16_t length;
+    uint16_t type;
+    union {
+        GO go;
+        MTO mto;
+    };
+} QEMU_PACKED MDBO;
+
+typedef struct MDB {
+    MdbHeader header;
+    MDBO mdbo[];
+} QEMU_PACKED MDB;
+
+typedef struct SclpMsg {
+    EventBufferHeader header;
+    MDB mdb;
+} QEMU_PACKED SclpMsg;
+
+#define GDS_ID_MDSMU                            0x1310
+#define GDS_ID_CPMSU                            0x1212
+#define GDS_ID_TEXTCMD                          0x1320
+
+typedef struct GdsVector {
+    uint16_t length;
+    uint16_t gds_id;
+} QEMU_PACKED GdsVector;
+
+#define GDS_KEY_SELFDEFTEXTMSG                  0x31
+#define GDS_KEY_TEXTMSG                         0x30
+
+typedef struct GdsSubvector {
+    uint8_t length;
+    uint8_t key;
+} QEMU_PACKED GdsSubvector;
+
+/* MDS Message Unit */
+typedef struct MDMSU {
+    GdsVector mdmsu;
+    GdsVector cpmsu;
+    GdsVector text_command;
+    GdsSubvector self_def_text_message;
+    GdsSubvector text_message;
+} QEMU_PACKED MDMSU;
+
+typedef struct WriteEventData {
+    SCCBHeader h;
+    EventBufferHeader ebh;
+} QEMU_PACKED WriteEventData;
+
+typedef struct ReadEventData {
+    SCCBHeader h;
+    union {
+        sccb_mask_t mask;
+        EventBufferHeader ebh;
+    };
+} QEMU_PACKED ReadEventData;
+
+struct SCLPEvent {
+    DeviceState qdev;
+    bool event_pending;
+    char *name;
+};
+
+struct SCLPEventClass {
+    DeviceClass parent_class;
+    int (*init)(SCLPEvent *event);
+
+    /* get SCLP's send mask */
+    sccb_mask_t (*get_send_mask)(void);
+
+    /* get SCLP's receive mask */
+    sccb_mask_t (*get_receive_mask)(void);
+
+    int (*read_event_data)(SCLPEvent *event, EventBufferHeader *evt_buf_hdr,
+                           int *slen);
+
+    int (*write_event_data)(SCLPEvent *event, EventBufferHeader *evt_buf_hdr);
+
+    /* can we handle this event type? */
+    bool (*can_handle_event)(uint8_t type);
+};
+
+#define TYPE_SCLP_EVENT_FACILITY "s390-sclp-event-facility"
+typedef struct SCLPEventFacility SCLPEventFacility;
+typedef struct SCLPEventFacilityClass SCLPEventFacilityClass;
+DECLARE_OBJ_CHECKERS(SCLPEventFacility, SCLPEventFacilityClass,
+                     EVENT_FACILITY, TYPE_SCLP_EVENT_FACILITY)
+
+struct SCLPEventFacilityClass {
+    SysBusDeviceClass parent_class;
+    void (*command_handler)(SCLPEventFacility *ef, SCCB *sccb, uint64_t code);
+    bool (*event_pending)(SCLPEventFacility *ef);
+};
+
+BusState *sclp_get_event_facility_bus(void);
+
+#endif
diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h
new file mode 100644 (file)
index 0000000..ea8d0f2
--- /dev/null
@@ -0,0 +1,258 @@
+/*
+ * S/390 channel I/O instructions
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+*/
+
+#ifndef S390X_IOINST_H
+#define S390X_IOINST_H
+
+/*
+ * Channel I/O related definitions, as defined in the Principles
+ * Of Operation (and taken from the Linux implementation).
+ */
+
+/* subchannel status word (command mode only) */
+typedef struct SCSW {
+    uint16_t flags;
+    uint16_t ctrl;
+    uint32_t cpa;
+    uint8_t dstat;
+    uint8_t cstat;
+    uint16_t count;
+} SCSW;
+QEMU_BUILD_BUG_MSG(sizeof(SCSW) != 12, "size of SCSW is wrong");
+
+#define SCSW_FLAGS_MASK_KEY 0xf000
+#define SCSW_FLAGS_MASK_SCTL 0x0800
+#define SCSW_FLAGS_MASK_ESWF 0x0400
+#define SCSW_FLAGS_MASK_CC 0x0300
+#define SCSW_FLAGS_MASK_FMT 0x0080
+#define SCSW_FLAGS_MASK_PFCH 0x0040
+#define SCSW_FLAGS_MASK_ISIC 0x0020
+#define SCSW_FLAGS_MASK_ALCC 0x0010
+#define SCSW_FLAGS_MASK_SSI 0x0008
+#define SCSW_FLAGS_MASK_ZCC 0x0004
+#define SCSW_FLAGS_MASK_ECTL 0x0002
+#define SCSW_FLAGS_MASK_PNO 0x0001
+
+#define SCSW_CTRL_MASK_FCTL 0x7000
+#define SCSW_CTRL_MASK_ACTL 0x0fe0
+#define SCSW_CTRL_MASK_STCTL 0x001f
+
+#define SCSW_FCTL_CLEAR_FUNC 0x1000
+#define SCSW_FCTL_HALT_FUNC 0x2000
+#define SCSW_FCTL_START_FUNC 0x4000
+
+#define SCSW_ACTL_SUSP 0x0020
+#define SCSW_ACTL_DEVICE_ACTIVE 0x0040
+#define SCSW_ACTL_SUBCH_ACTIVE 0x0080
+#define SCSW_ACTL_CLEAR_PEND 0x0100
+#define SCSW_ACTL_HALT_PEND  0x0200
+#define SCSW_ACTL_START_PEND 0x0400
+#define SCSW_ACTL_RESUME_PEND 0x0800
+
+#define SCSW_STCTL_STATUS_PEND 0x0001
+#define SCSW_STCTL_SECONDARY 0x0002
+#define SCSW_STCTL_PRIMARY 0x0004
+#define SCSW_STCTL_INTERMEDIATE 0x0008
+#define SCSW_STCTL_ALERT 0x0010
+
+#define SCSW_DSTAT_ATTENTION     0x80
+#define SCSW_DSTAT_STAT_MOD      0x40
+#define SCSW_DSTAT_CU_END        0x20
+#define SCSW_DSTAT_BUSY          0x10
+#define SCSW_DSTAT_CHANNEL_END   0x08
+#define SCSW_DSTAT_DEVICE_END    0x04
+#define SCSW_DSTAT_UNIT_CHECK    0x02
+#define SCSW_DSTAT_UNIT_EXCEP    0x01
+
+#define SCSW_CSTAT_PCI           0x80
+#define SCSW_CSTAT_INCORR_LEN    0x40
+#define SCSW_CSTAT_PROG_CHECK    0x20
+#define SCSW_CSTAT_PROT_CHECK    0x10
+#define SCSW_CSTAT_DATA_CHECK    0x08
+#define SCSW_CSTAT_CHN_CTRL_CHK  0x04
+#define SCSW_CSTAT_INTF_CTRL_CHK 0x02
+#define SCSW_CSTAT_CHAIN_CHECK   0x01
+
+/* path management control word */
+typedef struct PMCW {
+    uint32_t intparm;
+    uint16_t flags;
+    uint16_t devno;
+    uint8_t  lpm;
+    uint8_t  pnom;
+    uint8_t  lpum;
+    uint8_t  pim;
+    uint16_t mbi;
+    uint8_t  pom;
+    uint8_t  pam;
+    uint8_t  chpid[8];
+    uint32_t chars;
+} PMCW;
+QEMU_BUILD_BUG_MSG(sizeof(PMCW) != 28, "size of PMCW is wrong");
+
+#define PMCW_FLAGS_MASK_QF 0x8000
+#define PMCW_FLAGS_MASK_W 0x4000
+#define PMCW_FLAGS_MASK_ISC 0x3800
+#define PMCW_FLAGS_MASK_ENA 0x0080
+#define PMCW_FLAGS_MASK_LM 0x0060
+#define PMCW_FLAGS_MASK_MME 0x0018
+#define PMCW_FLAGS_MASK_MP 0x0004
+#define PMCW_FLAGS_MASK_TF 0x0002
+#define PMCW_FLAGS_MASK_DNV 0x0001
+#define PMCW_FLAGS_MASK_INVALID 0xc300
+
+#define PMCW_CHARS_MASK_ST 0x00e00000
+#define PMCW_CHARS_MASK_MBFC 0x00000004
+#define PMCW_CHARS_MASK_XMWME 0x00000002
+#define PMCW_CHARS_MASK_CSENSE 0x00000001
+#define PMCW_CHARS_MASK_INVALID 0xff1ffff8
+
+/* subchannel information block */
+typedef struct SCHIB {
+    PMCW pmcw;
+    SCSW scsw;
+    uint64_t mba;
+    uint8_t mda[4];
+} QEMU_PACKED SCHIB;
+
+/* format-0 extended-status word */
+typedef struct ESW {
+    uint32_t word0;      /* subchannel logout for format 0 */
+    uint32_t erw;
+    uint64_t word2;     /* failing-storage address for format 0 */
+    uint32_t word4;     /* secondary-CCW address for format 0 */
+} QEMU_PACKED ESW;
+
+#define ESW_ERW_SENSE 0x01000000
+
+/* interruption response block */
+typedef struct IRB {
+    SCSW scsw;
+    ESW esw;
+    uint32_t ecw[8];
+    uint32_t emw[8];
+} IRB;
+QEMU_BUILD_BUG_MSG(sizeof(IRB) != 96, "size of IRB is wrong");
+
+/* operation request block */
+typedef struct ORB {
+    uint32_t intparm;
+    uint16_t ctrl0;
+    uint8_t lpm;
+    uint8_t ctrl1;
+    uint32_t cpa;
+} ORB;
+QEMU_BUILD_BUG_MSG(sizeof(ORB) != 12, "size of ORB is wrong");
+
+#define ORB_CTRL0_MASK_KEY 0xf000
+#define ORB_CTRL0_MASK_SPND 0x0800
+#define ORB_CTRL0_MASK_STR 0x0400
+#define ORB_CTRL0_MASK_MOD 0x0200
+#define ORB_CTRL0_MASK_SYNC 0x0100
+#define ORB_CTRL0_MASK_FMT 0x0080
+#define ORB_CTRL0_MASK_PFCH 0x0040
+#define ORB_CTRL0_MASK_ISIC 0x0020
+#define ORB_CTRL0_MASK_ALCC 0x0010
+#define ORB_CTRL0_MASK_SSIC 0x0008
+#define ORB_CTRL0_MASK_C64 0x0002
+#define ORB_CTRL0_MASK_I2K 0x0001
+#define ORB_CTRL0_MASK_INVALID 0x0004
+
+#define ORB_CTRL1_MASK_ILS 0x80
+#define ORB_CTRL1_MASK_MIDAW 0x40
+#define ORB_CTRL1_MASK_ORBX 0x01
+#define ORB_CTRL1_MASK_INVALID 0x3e
+
+/* channel command word (type 0) */
+typedef struct CCW0 {
+        uint8_t cmd_code;
+        uint8_t cda0;
+        uint16_t cda1;
+        uint8_t flags;
+        uint8_t reserved;
+        uint16_t count;
+} CCW0;
+QEMU_BUILD_BUG_MSG(sizeof(CCW0) != 8, "size of CCW0 is wrong");
+
+/* channel command word (type 1) */
+typedef struct CCW1 {
+    uint8_t cmd_code;
+    uint8_t flags;
+    uint16_t count;
+    uint32_t cda;
+} CCW1;
+QEMU_BUILD_BUG_MSG(sizeof(CCW1) != 8, "size of CCW1 is wrong");
+
+#define CCW_FLAG_DC              0x80
+#define CCW_FLAG_CC              0x40
+#define CCW_FLAG_SLI             0x20
+#define CCW_FLAG_SKIP            0x10
+#define CCW_FLAG_PCI             0x08
+#define CCW_FLAG_IDA             0x04
+#define CCW_FLAG_SUSPEND         0x02
+#define CCW_FLAG_MIDA            0x01
+
+#define CCW_CMD_NOOP             0x03
+#define CCW_CMD_BASIC_SENSE      0x04
+#define CCW_CMD_TIC              0x08
+#define CCW_CMD_SENSE_ID         0xe4
+
+typedef struct CRW {
+    uint16_t flags;
+    uint16_t rsid;
+} CRW;
+QEMU_BUILD_BUG_MSG(sizeof(CRW) != 4, "size of CRW is wrong");
+
+#define CRW_FLAGS_MASK_S 0x4000
+#define CRW_FLAGS_MASK_R 0x2000
+#define CRW_FLAGS_MASK_C 0x1000
+#define CRW_FLAGS_MASK_RSC 0x0f00
+#define CRW_FLAGS_MASK_A 0x0080
+#define CRW_FLAGS_MASK_ERC 0x003f
+
+#define CRW_ERC_EVENT    0x00 /* event information pending */
+#define CRW_ERC_AVAIL    0x01 /* available */
+#define CRW_ERC_INIT     0x02 /* initialized */
+#define CRW_ERC_TERROR   0x03 /* temporary error */
+#define CRW_ERC_IPI      0x04 /* installed parm initialized */
+#define CRW_ERC_TERM     0x05 /* terminal */
+#define CRW_ERC_PERRN    0x06 /* perm. error, facility not init */
+#define CRW_ERC_PERRI    0x07 /* perm. error, facility init */
+#define CRW_ERC_PMOD     0x08 /* installed parameters modified */
+#define CRW_ERC_IPR      0x0A /* installed parameters restored */
+
+#define CRW_RSC_SUBCH 0x3
+#define CRW_RSC_CHP   0x4
+#define CRW_RSC_CSS   0xb
+
+/* I/O interruption code */
+typedef struct IOIntCode {
+    uint32_t subsys_id;
+    uint32_t intparm;
+    uint32_t interrupt_id;
+} QEMU_PACKED IOIntCode;
+
+/* schid disintegration */
+#define IOINST_SCHID_ONE(_schid)   ((_schid & 0x00010000) >> 16)
+#define IOINST_SCHID_M(_schid)     ((_schid & 0x00080000) >> 19)
+#define IOINST_SCHID_CSSID(_schid) ((_schid & 0xff000000) >> 24)
+#define IOINST_SCHID_SSID(_schid)  ((_schid & 0x00060000) >> 17)
+#define IOINST_SCHID_NR(_schid)    (_schid & 0x0000ffff)
+
+#define IO_INT_WORD_ISC(_int_word) ((_int_word & 0x38000000) >> 27)
+#define ISC_TO_ISC_BITS(_isc)      ((0x80 >> _isc) << 24)
+
+#define IO_INT_WORD_AI 0x80000000
+
+int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid,
+                                 int *schid);
+
+#endif
diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h
new file mode 100644 (file)
index 0000000..2c807ee
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * s390 CCW Assignment Support
+ *
+ * Copyright 2017 IBM Corp.
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_CCW_H
+#define HW_S390_CCW_H
+
+#include "hw/s390x/ccw-device.h"
+#include "qom/object.h"
+
+#define TYPE_S390_CCW "s390-ccw"
+typedef struct S390CCWDevice S390CCWDevice;
+typedef struct S390CCWDeviceClass S390CCWDeviceClass;
+DECLARE_OBJ_CHECKERS(S390CCWDevice, S390CCWDeviceClass,
+                     S390_CCW_DEVICE, TYPE_S390_CCW)
+
+struct S390CCWDevice {
+    CcwDevice parent_obj;
+    CssDevId hostid;
+    char *mdevid;
+    int32_t bootindex;
+};
+
+struct S390CCWDeviceClass {
+    CCWDeviceClass parent_class;
+    void (*realize)(S390CCWDevice *dev, char *sysfsdev, Error **errp);
+    void (*unrealize)(S390CCWDevice *dev);
+    IOInstEnding (*handle_request) (SubchDev *sch);
+    int (*handle_halt) (SubchDev *sch);
+    int (*handle_clear) (SubchDev *sch);
+    IOInstEnding (*handle_store) (SubchDev *sch);
+};
+
+#endif
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
new file mode 100644 (file)
index 0000000..2c43ea1
--- /dev/null
@@ -0,0 +1,406 @@
+/*
+ * s390 PCI BUS definitions
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com>
+ *            Hong Bo Li <lihbbj@cn.ibm.com>
+ *            Yi Min Zhao <zyimin@cn.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_PCI_BUS_H
+#define HW_S390_PCI_BUS_H
+
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "hw/s390x/sclp.h"
+#include "hw/s390x/s390_flic.h"
+#include "hw/s390x/css.h"
+#include "hw/s390x/s390-pci-clp.h"
+#include "qom/object.h"
+
+#define TYPE_S390_PCI_HOST_BRIDGE "s390-pcihost"
+#define TYPE_S390_PCI_BUS "s390-pcibus"
+#define TYPE_S390_PCI_DEVICE "zpci"
+#define TYPE_S390_PCI_IOMMU "s390-pci-iommu"
+#define TYPE_S390_IOMMU_MEMORY_REGION "s390-iommu-memory-region"
+#define FH_MASK_ENABLE   0x80000000
+#define FH_MASK_INSTANCE 0x7f000000
+#define FH_MASK_SHM      0x00ff0000
+#define FH_MASK_INDEX    0x0000ffff
+#define FH_SHM_VFIO      0x00010000
+#define FH_SHM_EMUL      0x00020000
+#define ZPCI_MAX_FID 0xffffffff
+#define ZPCI_MAX_UID 0xffff
+#define UID_UNDEFINED 0
+#define UID_CHECKING_ENABLED 0x01
+#define ZPCI_DTSM 0x40
+
+/* zPCI Function Types */
+#define ZPCI_PFT_ISM 5
+
+OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE)
+OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS)
+OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE)
+OBJECT_DECLARE_SIMPLE_TYPE(S390PCIIOMMU, S390_PCI_IOMMU)
+
+#define HP_EVENT_TO_CONFIGURED        0x0301
+#define HP_EVENT_RESERVED_TO_STANDBY  0x0302
+#define HP_EVENT_DECONFIGURE_REQUEST  0x0303
+#define HP_EVENT_CONFIGURED_TO_STBRES 0x0304
+#define HP_EVENT_STANDBY_TO_RESERVED  0x0308
+
+#define ERR_EVENT_INVALAS 0x1
+#define ERR_EVENT_OORANGE 0x2
+#define ERR_EVENT_INVALTF 0x3
+#define ERR_EVENT_TPROTE  0x4
+#define ERR_EVENT_APROTE  0x5
+#define ERR_EVENT_KEYE    0x6
+#define ERR_EVENT_INVALTE 0x7
+#define ERR_EVENT_INVALTL 0x8
+#define ERR_EVENT_TT      0x9
+#define ERR_EVENT_INVALMS 0xa
+#define ERR_EVENT_SERR    0xb
+#define ERR_EVENT_NOMSI   0x10
+#define ERR_EVENT_INVALBV 0x11
+#define ERR_EVENT_AIBV    0x12
+#define ERR_EVENT_AIRERR  0x13
+#define ERR_EVENT_FMBA    0x2a
+#define ERR_EVENT_FMBUP   0x2b
+#define ERR_EVENT_FMBPRO  0x2c
+#define ERR_EVENT_CCONF   0x30
+#define ERR_EVENT_SERVAC  0x3a
+#define ERR_EVENT_PERMERR 0x3b
+
+#define ERR_EVENT_Q_BIT 0x2
+#define ERR_EVENT_MVN_OFFSET 16
+
+#define ZPCI_MSI_VEC_BITS 11
+#define ZPCI_MSI_VEC_MASK 0x7ff
+
+#define ZPCI_MSI_ADDR  0xfe00000000000000ULL
+#define ZPCI_SDMA_ADDR 0x100000000ULL
+#define ZPCI_EDMA_ADDR 0x1ffffffffffffffULL
+
+#define PAGE_DEFAULT_ACC        0
+#define PAGE_DEFAULT_KEY        (PAGE_DEFAULT_ACC << 4)
+
+/* I/O Translation Anchor (IOTA) */
+enum ZpciIoatDtype {
+    ZPCI_IOTA_STO = 0,
+    ZPCI_IOTA_RTTO = 1,
+    ZPCI_IOTA_RSTO = 2,
+    ZPCI_IOTA_RFTO = 3,
+    ZPCI_IOTA_PFAA = 4,
+    ZPCI_IOTA_IOPFAA = 5,
+    ZPCI_IOTA_IOPTO = 7
+};
+
+#define ZPCI_IOTA_IOT_ENABLED           0x800ULL
+#define ZPCI_IOTA_DT_ST                 (ZPCI_IOTA_STO  << 2)
+#define ZPCI_IOTA_DT_RT                 (ZPCI_IOTA_RTTO << 2)
+#define ZPCI_IOTA_DT_RS                 (ZPCI_IOTA_RSTO << 2)
+#define ZPCI_IOTA_DT_RF                 (ZPCI_IOTA_RFTO << 2)
+#define ZPCI_IOTA_DT_PF                 (ZPCI_IOTA_PFAA << 2)
+#define ZPCI_IOTA_FS_4K                 0
+#define ZPCI_IOTA_FS_1M                 1
+#define ZPCI_IOTA_FS_2G                 2
+#define ZPCI_KEY                        (PAGE_DEFAULT_KEY << 5)
+
+#define ZPCI_IOTA_STO_FLAG  (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
+#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
+#define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS)
+#define ZPCI_IOTA_RFTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RF)
+#define ZPCI_IOTA_RFAA_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY |\
+                             ZPCI_IOTA_DT_PF | ZPCI_IOTA_FS_2G)
+
+/* I/O Region and segment tables */
+#define ZPCI_INDEX_MASK         0x7ffULL
+
+#define ZPCI_TABLE_TYPE_MASK    0xc
+#define ZPCI_TABLE_TYPE_RFX     0xc
+#define ZPCI_TABLE_TYPE_RSX     0x8
+#define ZPCI_TABLE_TYPE_RTX     0x4
+#define ZPCI_TABLE_TYPE_SX      0x0
+
+#define ZPCI_TABLE_LEN_RFX      0x3
+#define ZPCI_TABLE_LEN_RSX      0x3
+#define ZPCI_TABLE_LEN_RTX      0x3
+
+#define ZPCI_TABLE_OFFSET_MASK  0xc0
+#define ZPCI_TABLE_SIZE         0x4000
+#define ZPCI_TABLE_ALIGN        ZPCI_TABLE_SIZE
+#define ZPCI_TABLE_ENTRY_SIZE   (sizeof(unsigned long))
+#define ZPCI_TABLE_ENTRIES      (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+
+#define ZPCI_TABLE_BITS         11
+#define ZPCI_PT_BITS            8
+#define ZPCI_ST_SHIFT           (ZPCI_PT_BITS + TARGET_PAGE_BITS)
+#define ZPCI_RT_SHIFT           (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+
+#define ZPCI_RTE_FLAG_MASK      0x3fffULL
+#define ZPCI_RTE_ADDR_MASK      (~ZPCI_RTE_FLAG_MASK)
+#define ZPCI_STE_FLAG_MASK      0x7ffULL
+#define ZPCI_STE_ADDR_MASK      (~ZPCI_STE_FLAG_MASK)
+
+#define ZPCI_SFAA_MASK          (~((1ULL << 20) - 1))
+
+/* I/O Page tables */
+#define ZPCI_PTE_VALID_MASK             0x400
+#define ZPCI_PTE_INVALID                0x400
+#define ZPCI_PTE_VALID                  0x000
+#define ZPCI_PT_SIZE                    0x800
+#define ZPCI_PT_ALIGN                   ZPCI_PT_SIZE
+#define ZPCI_PT_ENTRIES                 (ZPCI_PT_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_PT_MASK                    (ZPCI_PT_ENTRIES - 1)
+
+#define ZPCI_PTE_FLAG_MASK              0xfffULL
+#define ZPCI_PTE_ADDR_MASK              (~ZPCI_PTE_FLAG_MASK)
+
+/* Shared bits */
+#define ZPCI_TABLE_VALID                0x00
+#define ZPCI_TABLE_INVALID              0x20
+#define ZPCI_TABLE_PROTECTED            0x200
+#define ZPCI_TABLE_UNPROTECTED          0x000
+#define ZPCI_TABLE_FC                   0x400
+
+#define ZPCI_TABLE_VALID_MASK           0x20
+#define ZPCI_TABLE_PROT_MASK            0x200
+
+#define ZPCI_ETT_RT 1
+#define ZPCI_ETT_ST 0
+#define ZPCI_ETT_PT -1
+
+/* PCI Function States
+ *
+ * reserved: default; device has just been plugged or is in progress of being
+ *           unplugged
+ * standby: device is present but not configured; transition from any
+ *          configured state/to this state via sclp configure/deconfigure
+ *
+ * The following states make up the "configured" meta-state:
+ * disabled: device is configured but not enabled; transition between this
+ *           state and enabled via clp enable/disable
+ * enabled: device is ready for use; transition to disabled via clp disable;
+ *          may enter an error state
+ * blocked: ignore all DMA and interrupts; transition back to enabled or from
+ *          error state via mpcifc
+ * error: an error occurred; transition back to enabled via mpcifc
+ * permanent error: an unrecoverable error occurred; transition to standby via
+ *                  sclp deconfigure
+ */
+typedef enum {
+    ZPCI_FS_RESERVED,
+    ZPCI_FS_STANDBY,
+    ZPCI_FS_DISABLED,
+    ZPCI_FS_ENABLED,
+    ZPCI_FS_BLOCKED,
+    ZPCI_FS_ERROR,
+    ZPCI_FS_PERMANENT_ERROR,
+} ZpciState;
+
+typedef struct SeiContainer {
+    QTAILQ_ENTRY(SeiContainer) link;
+    uint32_t fid;
+    uint32_t fh;
+    uint8_t cc;
+    uint16_t pec;
+    uint64_t faddr;
+    uint32_t e;
+} SeiContainer;
+
+typedef struct PciCcdfErr {
+    uint32_t reserved1;
+    uint32_t fh;
+    uint32_t fid;
+    uint32_t e;
+    uint64_t faddr;
+    uint32_t reserved3;
+    uint16_t reserved4;
+    uint16_t pec;
+} QEMU_PACKED PciCcdfErr;
+
+typedef struct PciCcdfAvail {
+    uint32_t reserved1;
+    uint32_t fh;
+    uint32_t fid;
+    uint32_t reserved2;
+    uint32_t reserved3;
+    uint32_t reserved4;
+    uint32_t reserved5;
+    uint16_t reserved6;
+    uint16_t pec;
+} QEMU_PACKED PciCcdfAvail;
+
+typedef struct ChscSeiNt2Res {
+    uint16_t length;
+    uint16_t code;
+    uint16_t reserved1;
+    uint8_t reserved2;
+    uint8_t nt;
+    uint8_t flags;
+    uint8_t reserved3;
+    uint8_t reserved4;
+    uint8_t cc;
+    uint32_t reserved5[13];
+    uint8_t ccdf[4016];
+} QEMU_PACKED ChscSeiNt2Res;
+
+typedef struct S390MsixInfo {
+    uint8_t table_bar;
+    uint8_t pba_bar;
+    uint16_t entries;
+    uint32_t table_offset;
+    uint32_t pba_offset;
+} S390MsixInfo;
+
+typedef struct S390IOTLBEntry {
+    uint64_t iova;
+    uint64_t translated_addr;
+    uint64_t len;
+    uint64_t perm;
+} S390IOTLBEntry;
+
+typedef struct S390PCIDMACount {
+    int id;
+    int users;
+    uint32_t avail;
+    QTAILQ_ENTRY(S390PCIDMACount) link;
+} S390PCIDMACount;
+
+struct S390PCIIOMMU {
+    Object parent_obj;
+    S390PCIBusDevice *pbdev;
+    AddressSpace as;
+    MemoryRegion mr;
+    IOMMUMemoryRegion iommu_mr;
+    bool enabled;
+    uint64_t g_iota;
+    uint64_t pba;
+    uint64_t pal;
+    uint64_t max_dma_limit;
+    GHashTable *iotlb;
+    S390PCIDMACount *dma_limit;
+};
+
+typedef struct S390PCIIOMMUTable {
+    uint64_t key;
+    S390PCIIOMMU *iommu[PCI_SLOT_MAX];
+} S390PCIIOMMUTable;
+
+/* Function Measurement Block */
+#define DEFAULT_MUI 4000
+#define UPDATE_U_BIT 0x1ULL
+#define FMBK_MASK 0xfULL
+
+typedef struct ZpciFmbFmt0 {
+    uint64_t dma_rbytes;
+    uint64_t dma_wbytes;
+} ZpciFmbFmt0;
+
+#define ZPCI_FMB_CNT_LD    0
+#define ZPCI_FMB_CNT_ST    1
+#define ZPCI_FMB_CNT_STB   2
+#define ZPCI_FMB_CNT_RPCIT 3
+#define ZPCI_FMB_CNT_MAX   4
+
+#define ZPCI_FMB_FORMAT    0
+
+typedef struct ZpciFmb {
+    uint32_t format;
+    uint32_t sample;
+    uint64_t last_update;
+    uint64_t counter[ZPCI_FMB_CNT_MAX];
+    ZpciFmbFmt0 fmt0;
+} ZpciFmb;
+QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb");
+
+#define ZPCI_DEFAULT_FN_GRP 0xFF
+#define ZPCI_SIM_GRP_START 0xF0
+typedef struct S390PCIGroup {
+    ClpRspQueryPciGrp zpci_group;
+    int id;
+    int host_id;
+    QTAILQ_ENTRY(S390PCIGroup) link;
+} S390PCIGroup;
+S390PCIGroup *s390_group_create(int id, int host_id);
+S390PCIGroup *s390_group_find(int id);
+S390PCIGroup *s390_group_find_host_sim(int host_id);
+
+struct S390PCIBusDevice {
+    DeviceState qdev;
+    PCIDevice *pdev;
+    ZpciState state;
+    char *target;
+    uint16_t uid;
+    uint32_t idx;
+    uint32_t fh;
+    uint32_t fid;
+    bool fid_defined;
+    uint64_t fmb_addr;
+    ZpciFmb fmb;
+    QEMUTimer *fmb_timer;
+    uint8_t isc;
+    uint16_t noi;
+    uint16_t maxstbl;
+    uint8_t sum;
+    uint8_t pft;
+    S390PCIGroup *pci_group;
+    ClpRspQueryPci zpci_fn;
+    S390MsixInfo msix;
+    AdapterRoutes routes;
+    S390PCIIOMMU *iommu;
+    MemoryRegion msix_notify_mr;
+    IndAddr *summary_ind;
+    IndAddr *indicator;
+    Notifier shutdown_notifier;
+    bool pci_unplug_request_processed;
+    bool unplug_requested;
+    bool interp;
+    bool forwarding_assist;
+    bool aif;
+    QTAILQ_ENTRY(S390PCIBusDevice) link;
+};
+
+struct S390PCIBus {
+    BusState qbus;
+};
+
+struct S390pciState {
+    PCIHostState parent_obj;
+    uint32_t next_idx;
+    int bus_no;
+    S390PCIBus *bus;
+    GHashTable *iommu_table;
+    GHashTable *zpci_table;
+    QTAILQ_HEAD(, SeiContainer) pending_sei;
+    QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs;
+    QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit;
+    QTAILQ_HEAD(, S390PCIGroup) zpci_groups;
+    uint8_t next_sim_grp;
+};
+
+S390pciState *s390_get_phb(void);
+int pci_chsc_sei_nt2_get_event(void *res);
+int pci_chsc_sei_nt2_have_event(void);
+void s390_pci_sclp_configure(SCCB *sccb);
+void s390_pci_sclp_deconfigure(SCCB *sccb);
+void s390_pci_iommu_enable(S390PCIIOMMU *iommu);
+void s390_pci_iommu_disable(S390PCIIOMMU *iommu);
+void s390_pci_generate_error_event(uint16_t pec, uint32_t fh, uint32_t fid,
+                                   uint64_t faddr, uint32_t e);
+uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr,
+                                  S390IOTLBEntry *entry);
+S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, uint32_t idx);
+S390PCIBusDevice *s390_pci_find_dev_by_fh(S390pciState *s, uint32_t fh);
+S390PCIBusDevice *s390_pci_find_dev_by_fid(S390pciState *s, uint32_t fid);
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+                                              const char *target);
+S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s,
+                                               S390PCIBusDevice *pbdev);
+void s390_pci_ism_reset(void);
+
+#endif
diff --git a/include/hw/s390x/s390-pci-clp.h b/include/hw/s390x/s390-pci-clp.h
new file mode 100644 (file)
index 0000000..03b7f9b
--- /dev/null
@@ -0,0 +1,216 @@
+/*
+ * s390 CLP instruction definitions
+ *
+ * Copyright 2019 IBM Corp.
+ * Author(s): Pierre Morel <pmorel@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_PCI_CLP_H
+#define HW_S390_PCI_CLP_H
+
+/* CLP common request & response block size */
+#define CLP_BLK_SIZE 4096
+#define PCI_BAR_COUNT 6
+#define PCI_MAX_FUNCTIONS 4096
+
+typedef struct ClpReqHdr {
+    uint16_t len;
+    uint16_t cmd;
+} QEMU_PACKED ClpReqHdr;
+
+typedef struct ClpRspHdr {
+    uint16_t len;
+    uint16_t rsp;
+} QEMU_PACKED ClpRspHdr;
+
+/* CLP Response Codes */
+#define CLP_RC_OK         0x0010  /* Command request successfully */
+#define CLP_RC_CMD        0x0020  /* Command code not recognized */
+#define CLP_RC_PERM       0x0030  /* Command not authorized */
+#define CLP_RC_FMT        0x0040  /* Invalid command request format */
+#define CLP_RC_LEN        0x0050  /* Invalid command request length */
+#define CLP_RC_8K         0x0060  /* Command requires 8K LPCB */
+#define CLP_RC_RESNOT0    0x0070  /* Reserved field not zero */
+#define CLP_RC_NODATA     0x0080  /* No data available */
+#define CLP_RC_FC_UNKNOWN 0x0100  /* Function code not recognized */
+
+/*
+ * Call Logical Processor - Command Codes
+ */
+#define CLP_LIST_PCI            0x0002
+#define CLP_QUERY_PCI_FN        0x0003
+#define CLP_QUERY_PCI_FNGRP     0x0004
+#define CLP_SET_PCI_FN          0x0005
+
+/* PCI function handle list entry */
+typedef struct ClpFhListEntry {
+    uint16_t device_id;
+    uint16_t vendor_id;
+#define CLP_FHLIST_MASK_CONFIG 0x80000000
+    uint32_t config;
+    uint32_t fid;
+    uint32_t fh;
+} QEMU_PACKED ClpFhListEntry;
+
+#define CLP_RC_SETPCIFN_FH      0x0101 /* Invalid PCI fn handle */
+#define CLP_RC_SETPCIFN_FHOP    0x0102 /* Fn handle not valid for op */
+#define CLP_RC_SETPCIFN_DMAAS   0x0103 /* Invalid DMA addr space */
+#define CLP_RC_SETPCIFN_RES     0x0104 /* Insufficient resources */
+#define CLP_RC_SETPCIFN_ALRDY   0x0105 /* Fn already in requested state */
+#define CLP_RC_SETPCIFN_ERR     0x0106 /* Fn in permanent error state */
+#define CLP_RC_SETPCIFN_RECPND  0x0107 /* Error recovery pending */
+#define CLP_RC_SETPCIFN_BUSY    0x0108 /* Fn busy */
+#define CLP_RC_LISTPCI_BADRT    0x010a /* Resume token not recognized */
+#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */
+
+/* request or response block header length */
+#define LIST_PCI_HDR_LEN 32
+
+/* Number of function handles fitting in response block */
+#define CLP_FH_LIST_NR_ENTRIES \
+    ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \
+        / sizeof(ClpFhListEntry))
+
+#define CLP_SET_ENABLE_PCI_FN  0 /* Yes, 0 enables it */
+#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */
+
+#define CLP_UTIL_STR_LEN 64
+#define CLP_PFIP_NR_SEGMENTS 4
+
+#define CLP_MASK_FMT 0xf0000000
+
+/* List PCI functions request */
+typedef struct ClpReqListPci {
+    ClpReqHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint64_t resume_token;
+    uint64_t reserved2;
+} QEMU_PACKED ClpReqListPci;
+
+/* List PCI functions response */
+typedef struct ClpRspListPci {
+    ClpRspHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint64_t resume_token;
+    uint32_t mdd;
+    uint16_t max_fn;
+    uint8_t flags;
+    uint8_t entry_size;
+    ClpFhListEntry fh_list[CLP_FH_LIST_NR_ENTRIES];
+} QEMU_PACKED ClpRspListPci;
+
+/* Query PCI function request */
+typedef struct ClpReqQueryPci {
+    ClpReqHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint32_t fh; /* function handle */
+    uint32_t reserved2;
+    uint64_t reserved3;
+} QEMU_PACKED ClpReqQueryPci;
+
+/* Query PCI function response */
+typedef struct ClpRspQueryPci {
+    ClpRspHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint16_t vfn; /* virtual fn number */
+#define CLP_RSP_QPCI_MASK_UTIL  0x01
+    uint8_t flags;
+    uint8_t pfgid;
+    uint32_t fid; /* pci function id */
+    uint8_t bar_size[PCI_BAR_COUNT];
+    uint16_t pchid;
+    uint32_t bar[PCI_BAR_COUNT];
+    uint8_t pfip[CLP_PFIP_NR_SEGMENTS];
+    uint16_t reserved2;
+    uint8_t fmbl;
+    uint8_t pft;
+    uint64_t sdma; /* start dma as */
+    uint64_t edma; /* end dma as */
+    uint32_t reserved3[11];
+    uint32_t uid;
+    uint8_t util_str[CLP_UTIL_STR_LEN]; /* utility string */
+} QEMU_PACKED ClpRspQueryPci;
+
+/* Query PCI function group request */
+typedef struct ClpReqQueryPciGrp {
+    ClpReqHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint8_t reserved2[3];
+    uint8_t g;
+    uint32_t reserved3;
+    uint64_t reserved4;
+} QEMU_PACKED ClpReqQueryPciGrp;
+
+/* Query PCI function group response */
+typedef struct ClpRspQueryPciGrp {
+    ClpRspHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+#define CLP_RSP_QPCIG_MASK_NOI 0xfff
+    uint16_t i;
+    uint8_t version;
+#define CLP_RSP_QPCIG_MASK_FRAME   0x2
+#define CLP_RSP_QPCIG_MASK_REFRESH 0x1
+    uint8_t fr;
+    uint16_t maxstbl;
+    uint16_t mui;
+    uint8_t dtsm;
+    uint8_t reserved3[7];
+    uint64_t dasm; /* dma address space mask */
+    uint64_t msia; /* MSI address */
+    uint64_t reserved4;
+    uint64_t reserved5;
+} QEMU_PACKED ClpRspQueryPciGrp;
+
+/* Set PCI function request */
+typedef struct ClpReqSetPci {
+    ClpReqHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint32_t fh; /* function handle */
+    uint16_t reserved2;
+    uint8_t oc; /* operation controls */
+    uint8_t ndas; /* number of dma spaces */
+    uint64_t reserved3;
+} QEMU_PACKED ClpReqSetPci;
+
+/* Set PCI function response */
+typedef struct ClpRspSetPci {
+    ClpRspHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint32_t fh; /* function handle */
+    uint32_t reserved3;
+    uint64_t reserved4;
+} QEMU_PACKED ClpRspSetPci;
+
+typedef struct ClpReqRspListPci {
+    ClpReqListPci request;
+    ClpRspListPci response;
+} QEMU_PACKED ClpReqRspListPci;
+
+typedef struct ClpReqRspSetPci {
+    ClpReqSetPci request;
+    ClpRspSetPci response;
+} QEMU_PACKED ClpReqRspSetPci;
+
+typedef struct ClpReqRspQueryPci {
+    ClpReqQueryPci request;
+    ClpRspQueryPci response;
+} QEMU_PACKED ClpReqRspQueryPci;
+
+typedef struct ClpReqRspQueryPciGrp {
+    ClpReqQueryPciGrp request;
+    ClpRspQueryPciGrp response;
+} QEMU_PACKED ClpReqRspQueryPciGrp;
+
+#endif
diff --git a/include/hw/s390x/s390-pci-inst.h b/include/hw/s390x/s390-pci-inst.h
new file mode 100644 (file)
index 0000000..a55c448
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ * s390 PCI instruction definitions
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com>
+ *            Hong Bo Li <lihbbj@cn.ibm.com>
+ *            Yi Min Zhao <zyimin@cn.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_PCI_INST_H
+#define HW_S390_PCI_INST_H
+
+#include "s390-pci-bus.h"
+#include "sysemu/dma.h"
+
+/* Load/Store status codes */
+#define ZPCI_PCI_ST_FUNC_NOT_ENABLED        4
+#define ZPCI_PCI_ST_FUNC_IN_ERR             8
+#define ZPCI_PCI_ST_BLOCKED                 12
+#define ZPCI_PCI_ST_INSUF_RES               16
+#define ZPCI_PCI_ST_INVAL_AS                20
+#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED    24
+#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED      28
+#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS        36
+#define ZPCI_PCI_ST_FUNC_NOT_AVAIL          40
+#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE     44
+
+/* Load/Store return codes */
+#define ZPCI_PCI_LS_OK              0
+#define ZPCI_PCI_LS_ERR             1
+#define ZPCI_PCI_LS_BUSY            2
+#define ZPCI_PCI_LS_INVAL_HANDLE    3
+
+/* Modify PCI status codes */
+#define ZPCI_MOD_ST_RES_NOT_AVAIL 4
+#define ZPCI_MOD_ST_INSUF_RES     16
+#define ZPCI_MOD_ST_SEQUENCE      24
+#define ZPCI_MOD_ST_DMAAS_INVAL   28
+#define ZPCI_MOD_ST_FRAME_INVAL   32
+#define ZPCI_MOD_ST_ERROR_RECOVER 40
+
+/* Modify PCI Function Controls */
+#define ZPCI_MOD_FC_REG_INT     2
+#define ZPCI_MOD_FC_DEREG_INT   3
+#define ZPCI_MOD_FC_REG_IOAT    4
+#define ZPCI_MOD_FC_DEREG_IOAT  5
+#define ZPCI_MOD_FC_REREG_IOAT  6
+#define ZPCI_MOD_FC_RESET_ERROR 7
+#define ZPCI_MOD_FC_RESET_BLOCK 9
+#define ZPCI_MOD_FC_SET_MEASURE 10
+
+/* Store PCI Function Controls status codes */
+#define ZPCI_STPCIFC_ST_PERM_ERROR    8
+#define ZPCI_STPCIFC_ST_INVAL_DMAAS   28
+#define ZPCI_STPCIFC_ST_ERROR_RECOVER 40
+
+/* Refresh PCI Translations status codes */
+#define ZPCI_RPCIT_ST_INSUFF_RES      16
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED     0x80
+#define ZPCI_FIB_FC_ERROR       0x40
+#define ZPCI_FIB_FC_LS_BLOCKED  0x20
+#define ZPCI_FIB_FC_DMAAS_REG   0x10
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED     0x80
+#define ZPCI_FIB_FC_ERROR       0x40
+#define ZPCI_FIB_FC_LS_BLOCKED  0x20
+#define ZPCI_FIB_FC_DMAAS_REG   0x10
+
+/* Function Information Block */
+typedef struct ZpciFib {
+    uint8_t fmt;   /* format */
+    uint8_t reserved1[7];
+    uint8_t fc;                  /* function controls */
+    uint8_t reserved2;
+    uint16_t reserved3;
+    uint32_t reserved4;
+    uint64_t pba;                /* PCI base address */
+    uint64_t pal;                /* PCI address limit */
+    uint64_t iota;               /* I/O Translation Anchor */
+#define FIB_DATA_ISC(x)    (((x) >> 28) & 0x7)
+#define FIB_DATA_NOI(x)    (((x) >> 16) & 0xfff)
+#define FIB_DATA_AIBVO(x) (((x) >> 8) & 0x3f)
+#define FIB_DATA_SUM(x)    (((x) >> 7) & 0x1)
+#define FIB_DATA_AISBO(x)  ((x) & 0x3f)
+    uint32_t data;
+    uint32_t reserved5;
+    uint64_t aibv;               /* Adapter int bit vector address */
+    uint64_t aisb;               /* Adapter int summary bit address */
+    uint64_t fmb_addr;           /* Function measurement address and key */
+    uint32_t reserved6;
+    uint32_t gd;
+} QEMU_PACKED ZpciFib;
+
+int pci_dereg_irqs(S390PCIBusDevice *pbdev);
+void pci_dereg_ioat(S390PCIIOMMU *iommu);
+int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra);
+int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
+int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
+int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra);
+int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr,
+                        uint8_t ar, uintptr_t ra);
+int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar,
+                        uintptr_t ra);
+int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar,
+                         uintptr_t ra);
+void fmb_timer_free(S390PCIBusDevice *pbdev);
+
+#define ZPCI_IO_BAR_MIN 0
+#define ZPCI_IO_BAR_MAX 5
+#define ZPCI_CONFIG_BAR 15
+
+#endif
diff --git a/include/hw/s390x/s390-pci-kvm.h b/include/hw/s390x/s390-pci-kvm.h
new file mode 100644 (file)
index 0000000..933814a
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * s390 PCI KVM interfaces
+ *
+ * Copyright 2022 IBM Corp.
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_PCI_KVM_H
+#define HW_S390_PCI_KVM_H
+
+#include "hw/s390x/s390-pci-bus.h"
+#include "hw/s390x/s390-pci-inst.h"
+
+#ifdef CONFIG_KVM
+bool s390_pci_kvm_interp_allowed(void);
+int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist);
+int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev);
+#else
+static inline bool s390_pci_kvm_interp_allowed(void)
+{
+    return false;
+}
+static inline int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib,
+                                          bool assist)
+{
+    return -EINVAL;
+}
+static inline int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev)
+{
+    return -EINVAL;
+}
+#endif
+
+#endif
diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h
new file mode 100644 (file)
index 0000000..ae1b126
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * s390 vfio-pci interfaces
+ *
+ * Copyright 2020 IBM Corp.
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_PCI_VFIO_H
+#define HW_S390_PCI_VFIO_H
+
+#include "hw/s390x/s390-pci-bus.h"
+#include CONFIG_DEVICES
+
+#ifdef CONFIG_VFIO
+bool s390_pci_update_dma_avail(int fd, unsigned int *avail);
+S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
+                                          S390PCIBusDevice *pbdev);
+void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
+bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh);
+void s390_pci_get_clp_info(S390PCIBusDevice *pbdev);
+#else
+static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
+{
+    return false;
+}
+static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
+                                                        S390PCIBusDevice *pbdev)
+{
+    return NULL;
+}
+static inline void s390_pci_end_dma_count(S390pciState *s,
+                                          S390PCIDMACount *cnt) { }
+static inline bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh)
+{
+    return false;
+}
+static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { }
+#endif
+
+#endif
diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h
new file mode 100644 (file)
index 0000000..c1d46e7
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * virtio ccw machine definitions
+ *
+ * Copyright 2012, 2016 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#ifndef HW_S390X_S390_VIRTIO_CCW_H
+#define HW_S390X_S390_VIRTIO_CCW_H
+
+#include "hw/boards.h"
+#include "qom/object.h"
+
+#define TYPE_S390_CCW_MACHINE               "s390-ccw-machine"
+
+OBJECT_DECLARE_TYPE(S390CcwMachineState, S390CcwMachineClass, S390_CCW_MACHINE)
+
+
+struct S390CcwMachineState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    /*< public >*/
+    bool aes_key_wrap;
+    bool dea_key_wrap;
+    bool pv;
+    uint8_t loadparm[8];
+};
+
+#define S390_PTF_REASON_NONE (0x00 << 8)
+#define S390_PTF_REASON_DONE (0x01 << 8)
+#define S390_PTF_REASON_BUSY (0x02 << 8)
+#define S390_TOPO_FC_MASK 0xffUL
+void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra);
+
+struct S390CcwMachineClass {
+    /*< private >*/
+    MachineClass parent_class;
+
+    /*< public >*/
+    bool ri_allowed;
+    bool cpu_model_allowed;
+    bool css_migration_enabled;
+    bool hpage_1m_allowed;
+    int max_threads;
+};
+
+/* runtime-instrumentation allowed by the machine */
+bool ri_allowed(void);
+/* cpu model allowed by the machine */
+bool cpu_model_allowed(void);
+/* 1M huge page mappings allowed by the machine */
+bool hpage_1m_allowed(void);
+
+/**
+ * Returns true if (vmstate based) migration of the channel subsystem
+ * is enabled, false if it is disabled.
+ */
+bool css_migration_enabled(void);
+
+#endif
diff --git a/include/hw/s390x/s390_flic.h b/include/hw/s390x/s390_flic.h
new file mode 100644 (file)
index 0000000..3907a13
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * QEMU S390x floating interrupt controller (flic)
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Jens Freimann <jfrei@linux.vnet.ibm.com>
+ *            Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_FLIC_H
+#define HW_S390_FLIC_H
+
+#include "hw/sysbus.h"
+#include "hw/s390x/adapter.h"
+#include "hw/virtio/virtio.h"
+#include "qemu/queue.h"
+#include "qom/object.h"
+
+/*
+ * Reserve enough gsis to accommodate all virtio devices.
+ * If any other user of adapter routes needs more of these,
+ * we need to bump the value; but virtio looks like the
+ * maximum right now.
+ */
+#define ADAPTER_ROUTES_MAX_GSI VIRTIO_QUEUE_MAX
+
+typedef struct AdapterRoutes {
+    AdapterInfo adapter;
+    int num_routes;
+    int gsi[ADAPTER_ROUTES_MAX_GSI];
+} AdapterRoutes;
+
+extern const VMStateDescription vmstate_adapter_routes;
+
+#define VMSTATE_ADAPTER_ROUTES(_f, _s) \
+    VMSTATE_STRUCT(_f, _s, 1, vmstate_adapter_routes, AdapterRoutes)
+
+#define TYPE_S390_FLIC_COMMON "s390-flic"
+OBJECT_DECLARE_TYPE(S390FLICState, S390FLICStateClass,
+                    S390_FLIC_COMMON)
+
+struct S390FLICState {
+    SysBusDevice parent_obj;
+    /* to limit AdapterRoutes.num_routes for compat */
+    uint32_t adapter_routes_max_batch;
+    bool ais_supported;
+};
+
+
+struct S390FLICStateClass {
+    DeviceClass parent_class;
+
+    int (*register_io_adapter)(S390FLICState *fs, uint32_t id, uint8_t isc,
+                               bool swap, bool maskable, uint8_t flags);
+    int (*io_adapter_map)(S390FLICState *fs, uint32_t id, uint64_t map_addr,
+                          bool do_map);
+    int (*add_adapter_routes)(S390FLICState *fs, AdapterRoutes *routes);
+    void (*release_adapter_routes)(S390FLICState *fs, AdapterRoutes *routes);
+    int (*clear_io_irq)(S390FLICState *fs, uint16_t subchannel_id,
+                        uint16_t subchannel_nr);
+    int (*modify_ais_mode)(S390FLICState *fs, uint8_t isc, uint16_t mode);
+    int (*inject_airq)(S390FLICState *fs, uint8_t type, uint8_t isc,
+                       uint8_t flags);
+    void (*inject_service)(S390FLICState *fs, uint32_t parm);
+    void (*inject_io)(S390FLICState *fs, uint16_t subchannel_id,
+                      uint16_t subchannel_nr, uint32_t io_int_parm,
+                      uint32_t io_int_word);
+    void (*inject_crw_mchk)(S390FLICState *fs);
+};
+
+#define TYPE_KVM_S390_FLIC "s390-flic-kvm"
+typedef struct KVMS390FLICState KVMS390FLICState;
+DECLARE_INSTANCE_CHECKER(KVMS390FLICState, KVM_S390_FLIC,
+                         TYPE_KVM_S390_FLIC)
+
+#define TYPE_QEMU_S390_FLIC "s390-flic-qemu"
+OBJECT_DECLARE_SIMPLE_TYPE(QEMUS390FLICState, QEMU_S390_FLIC)
+
+#define SIC_IRQ_MODE_ALL 0
+#define SIC_IRQ_MODE_SINGLE 1
+#define AIS_MODE_MASK(isc) (0x80 >> isc)
+
+#define ISC_TO_PENDING_IO(_isc) (0x80 >> (_isc))
+#define CR6_TO_PENDING_IO(_cr6) (((_cr6) >> 24) & 0xff)
+
+/* organize the ISC bits so that the macros above work */
+#define FLIC_PENDING_IO_ISC7            (1 << 0)
+#define FLIC_PENDING_IO_ISC6            (1 << 1)
+#define FLIC_PENDING_IO_ISC5            (1 << 2)
+#define FLIC_PENDING_IO_ISC4            (1 << 3)
+#define FLIC_PENDING_IO_ISC3            (1 << 4)
+#define FLIC_PENDING_IO_ISC2            (1 << 5)
+#define FLIC_PENDING_IO_ISC1            (1 << 6)
+#define FLIC_PENDING_IO_ISC0            (1 << 7)
+#define FLIC_PENDING_SERVICE            (1 << 8)
+#define FLIC_PENDING_MCHK_CR            (1 << 9)
+
+#define FLIC_PENDING_IO (FLIC_PENDING_IO_ISC0 | FLIC_PENDING_IO_ISC1 | \
+                         FLIC_PENDING_IO_ISC2 | FLIC_PENDING_IO_ISC3 | \
+                         FLIC_PENDING_IO_ISC4 | FLIC_PENDING_IO_ISC5 | \
+                         FLIC_PENDING_IO_ISC6 | FLIC_PENDING_IO_ISC7)
+
+typedef struct QEMUS390FlicIO {
+    uint16_t id;
+    uint16_t nr;
+    uint32_t parm;
+    uint32_t word;
+    QLIST_ENTRY(QEMUS390FlicIO) next;
+} QEMUS390FlicIO;
+
+struct QEMUS390FLICState {
+    S390FLICState parent_obj;
+    uint32_t pending;
+    uint32_t service_param;
+    uint8_t simm;
+    uint8_t nimm;
+    QLIST_HEAD(, QEMUS390FlicIO) io[8];
+};
+
+uint32_t qemu_s390_flic_dequeue_service(QEMUS390FLICState *flic);
+QEMUS390FlicIO *qemu_s390_flic_dequeue_io(QEMUS390FLICState *flic,
+                                          uint64_t cr6);
+void qemu_s390_flic_dequeue_crw_mchk(QEMUS390FLICState *flic);
+bool qemu_s390_flic_has_service(QEMUS390FLICState *flic);
+bool qemu_s390_flic_has_io(QEMUS390FLICState *fs, uint64_t cr6);
+bool qemu_s390_flic_has_crw_mchk(QEMUS390FLICState *flic);
+bool qemu_s390_flic_has_any(QEMUS390FLICState *flic);
+
+void s390_flic_init(void);
+
+S390FLICState *s390_get_flic(void);
+QEMUS390FLICState *s390_get_qemu_flic(S390FLICState *fs);
+S390FLICStateClass *s390_get_flic_class(S390FLICState *fs);
+void s390_crw_mchk(void);
+void s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr,
+                       uint32_t io_int_parm, uint32_t io_int_word);
+bool ais_needed(void *opaque);
+
+#endif /* HW_S390_FLIC_H */
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
new file mode 100644 (file)
index 0000000..b405a38
--- /dev/null
@@ -0,0 +1,231 @@
+/*
+ * SCLP Support
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_S390_SCLP_H
+#define HW_S390_SCLP_H
+
+#include "hw/sysbus.h"
+#include "target/s390x/cpu-qom.h"
+#include "qom/object.h"
+
+#define SCLP_CMD_CODE_MASK                      0xffff00ff
+
+/* SCLP command codes */
+#define SCLP_CMDW_READ_SCP_INFO                 0x00020001
+#define SCLP_CMDW_READ_SCP_INFO_FORCED          0x00120001
+#define SCLP_READ_STORAGE_ELEMENT_INFO          0x00040001
+#define SCLP_ATTACH_STORAGE_ELEMENT             0x00080001
+#define SCLP_ASSIGN_STORAGE                     0x000D0001
+#define SCLP_UNASSIGN_STORAGE                   0x000C0001
+#define SCLP_CMD_READ_EVENT_DATA                0x00770005
+#define SCLP_CMD_WRITE_EVENT_DATA               0x00760005
+#define SCLP_CMD_WRITE_EVENT_MASK               0x00780005
+
+/* SCLP Memory hotplug codes */
+#define SCLP_FC_ASSIGN_ATTACH_READ_STOR         0xE00000000000ULL
+#define SCLP_STARTING_SUBINCREMENT_ID           0x10001
+#define SCLP_INCREMENT_UNIT                     0x10000
+#define MAX_STORAGE_INCREMENTS                  1020
+
+/* CPU hotplug SCLP codes */
+#define SCLP_HAS_CPU_INFO                       0x0800000000000000ULL
+#define SCLP_CMDW_READ_CPU_INFO                 0x00010001
+
+/* SCLP PCI codes */
+#define SCLP_HAS_IOA_RECONFIG                   0x0000000040000000ULL
+#define SCLP_CMDW_CONFIGURE_IOA                 0x001a0001
+#define SCLP_CMDW_DECONFIGURE_IOA               0x001b0001
+#define SCLP_RECONFIG_PCI_ATYPE                 2
+
+/* SCLP response codes */
+#define SCLP_RC_NORMAL_READ_COMPLETION          0x0010
+#define SCLP_RC_NORMAL_COMPLETION               0x0020
+#define SCLP_RC_SCCB_BOUNDARY_VIOLATION         0x0100
+#define SCLP_RC_NO_ACTION_REQUIRED              0x0120
+#define SCLP_RC_INVALID_SCLP_COMMAND            0x01f0
+#define SCLP_RC_CONTAINED_EQUIPMENT_CHECK       0x0340
+#define SCLP_RC_INSUFFICIENT_SCCB_LENGTH        0x0300
+#define SCLP_RC_STANDBY_READ_COMPLETION         0x0410
+#define SCLP_RC_ADAPTER_IN_RESERVED_STATE       0x05f0
+#define SCLP_RC_ADAPTER_TYPE_NOT_RECOGNIZED     0x06f0
+#define SCLP_RC_ADAPTER_ID_NOT_RECOGNIZED       0x09f0
+#define SCLP_RC_INVALID_FUNCTION                0x40f0
+#define SCLP_RC_NO_EVENT_BUFFERS_STORED         0x60f0
+#define SCLP_RC_INVALID_SELECTION_MASK          0x70f0
+#define SCLP_RC_INCONSISTENT_LENGTHS            0x72f0
+#define SCLP_RC_EVENT_BUFFER_SYNTAX_ERROR       0x73f0
+#define SCLP_RC_INVALID_MASK_LENGTH             0x74f0
+
+
+/* Service Call Control Block (SCCB) and its elements */
+
+#define SCCB_SIZE 4096
+
+#define SCLP_VARIABLE_LENGTH_RESPONSE           0x80
+#define SCLP_EVENT_BUFFER_ACCEPTED              0x80
+
+#define SCLP_FC_NORMAL_WRITE                    0
+
+/*
+ * Normally packed structures are not the right thing to do, since all code
+ * must take care of endianness. We cannot use ldl_phys and friends for two
+ * reasons, though:
+ * - some of the embedded structures below the SCCB can appear multiple times
+ *   at different locations, so there is no fixed offset
+ * - we work on a private copy of the SCCB, since there are several length
+ *   fields, that would cause a security nightmare if we allow the guest to
+ *   alter the structure while we parse it. We cannot use ldl_p and friends
+ *   either without doing pointer arithmetic
+ * So we have to double check that all users of sclp data structures use the
+ * right endianness wrappers.
+ */
+typedef struct SCCBHeader {
+    uint16_t length;
+    uint8_t function_code;
+    uint8_t control_mask[3];
+    uint16_t response_code;
+} QEMU_PACKED SCCBHeader;
+
+#define SCCB_DATA_LEN (SCCB_SIZE - sizeof(SCCBHeader))
+#define SCCB_CPU_FEATURE_LEN 6
+
+/* CPU information */
+typedef struct CPUEntry {
+    uint8_t address;
+    uint8_t reserved0;
+    uint8_t features[SCCB_CPU_FEATURE_LEN];
+    uint8_t reserved2[6];
+    uint8_t type;
+    uint8_t reserved1;
+} QEMU_PACKED CPUEntry;
+
+#define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET     128
+#define SCLP_READ_SCP_INFO_MNEST                4
+typedef struct ReadInfo {
+    SCCBHeader h;
+    uint16_t rnmax;
+    uint8_t rnsize;
+    uint8_t  _reserved1[15 - 11];       /* 11-14 */
+    uint8_t stsi_parm;                  /* 15-15 */
+    uint16_t entries_cpu;               /* 16-17 */
+    uint16_t offset_cpu;                /* 18-19 */
+    uint8_t  _reserved2[24 - 20];       /* 20-23 */
+    uint8_t  loadparm[8];               /* 24-31 */
+    uint8_t  _reserved3[48 - 32];       /* 32-47 */
+    uint64_t facilities;                /* 48-55 */
+    uint8_t  _reserved0[76 - 56];       /* 56-75 */
+    uint32_t ibc_val;
+    uint8_t  conf_char[99 - 80];        /* 80-98 */
+    uint8_t mha_pow;
+    uint32_t rnsize2;
+    uint64_t rnmax2;
+    uint8_t  _reserved6[116 - 112];     /* 112-115 */
+    uint8_t  conf_char_ext[120 - 116];   /* 116-119 */
+    uint16_t highest_cpu;
+    uint8_t  _reserved5[124 - 122];     /* 122-123 */
+    uint32_t hmfai;
+    uint8_t  _reserved7[134 - 128];     /* 128-133 */
+    uint8_t  fac134;
+    uint8_t  _reserved8[144 - 135];     /* 135-143 */
+    struct CPUEntry entries[];
+    /*
+     * When the Extended-Length SCCB (ELS) feature is enabled the
+     * start of the entries field begins at an offset denoted by the
+     * offset_cpu field, otherwise it's at an offset of 128.
+     */
+} QEMU_PACKED ReadInfo;
+
+typedef struct ReadCpuInfo {
+    SCCBHeader h;
+    uint16_t nr_configured;         /* 8-9 */
+    uint16_t offset_configured;     /* 10-11 */
+    uint16_t nr_standby;            /* 12-13 */
+    uint16_t offset_standby;        /* 14-15 */
+    uint8_t reserved0[24-16];       /* 16-23 */
+    struct CPUEntry entries[];
+} QEMU_PACKED ReadCpuInfo;
+
+typedef struct ReadStorageElementInfo {
+    SCCBHeader h;
+    uint16_t max_id;
+    uint16_t assigned;
+    uint16_t standby;
+    uint8_t _reserved0[16 - 14]; /* 14-15 */
+    uint32_t entries[];
+} QEMU_PACKED ReadStorageElementInfo;
+
+typedef struct AttachStorageElement {
+    SCCBHeader h;
+    uint8_t _reserved0[10 - 8];  /* 8-9 */
+    uint16_t assigned;
+    uint8_t _reserved1[16 - 12]; /* 12-15 */
+    uint32_t entries[];
+} QEMU_PACKED AttachStorageElement;
+
+typedef struct AssignStorage {
+    SCCBHeader h;
+    uint16_t rn;
+} QEMU_PACKED AssignStorage;
+
+typedef struct IoaCfgSccb {
+    SCCBHeader header;
+    uint8_t atype;
+    uint8_t reserved1;
+    uint16_t reserved2;
+    uint32_t aid;
+} QEMU_PACKED IoaCfgSccb;
+
+typedef struct SCCB {
+    SCCBHeader h;
+    char data[];
+ } QEMU_PACKED SCCB;
+
+#define TYPE_SCLP "sclp"
+OBJECT_DECLARE_TYPE(SCLPDevice, SCLPDeviceClass,
+                    SCLP)
+
+struct SCLPEventFacility;
+
+struct SCLPDevice {
+    /* private */
+    DeviceState parent_obj;
+    struct SCLPEventFacility *event_facility;
+    int increment_size;
+
+    /* public */
+};
+
+struct SCLPDeviceClass {
+    /* private */
+    DeviceClass parent_class;
+    void (*read_SCP_info)(SCLPDevice *sclp, SCCB *sccb);
+    void (*read_cpu_info)(SCLPDevice *sclp, SCCB *sccb);
+
+    /* public */
+    void (*execute)(SCLPDevice *sclp, SCCB *sccb, uint32_t code);
+    void (*service_interrupt)(SCLPDevice *sclp, uint32_t sccb);
+};
+
+static inline int sccb_data_len(SCCB *sccb)
+{
+    return be16_to_cpu(sccb->h.length) - sizeof(sccb->h);
+}
+
+
+void s390_sclp_init(void);
+void sclp_service_interrupt(uint32_t sccb);
+void raise_irq_cpu_hotplug(void);
+int sclp_service_call(S390CPU *cpu, uint64_t sccb, uint32_t code);
+int sclp_service_call_protected(S390CPU *cpu, uint64_t sccb, uint32_t code);
+
+#endif
diff --git a/include/hw/s390x/storage-attributes.h b/include/hw/s390x/storage-attributes.h
new file mode 100644 (file)
index 0000000..5239eb5
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * s390 storage attributes device
+ *
+ * Copyright 2016 IBM Corp.
+ * Author(s): Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef S390_STORAGE_ATTRIBUTES_H
+#define S390_STORAGE_ATTRIBUTES_H
+
+#include "hw/qdev-core.h"
+#include "monitor/monitor.h"
+#include "qom/object.h"
+
+#define TYPE_S390_STATTRIB "s390-storage_attributes"
+#define TYPE_QEMU_S390_STATTRIB "s390-storage_attributes-qemu"
+#define TYPE_KVM_S390_STATTRIB "s390-storage_attributes-kvm"
+
+OBJECT_DECLARE_TYPE(S390StAttribState, S390StAttribClass, S390_STATTRIB)
+
+struct S390StAttribState {
+    DeviceState parent_obj;
+    uint64_t migration_cur_gfn;
+    bool migration_enabled;
+};
+
+
+struct S390StAttribClass {
+    DeviceClass parent_class;
+    /* Return value: < 0 on error, or new count */
+    int (*get_stattr)(S390StAttribState *sa, uint64_t *start_gfn,
+                      uint32_t count, uint8_t *values);
+    int (*peek_stattr)(S390StAttribState *sa, uint64_t start_gfn,
+                       uint32_t count, uint8_t *values);
+    int (*set_stattr)(S390StAttribState *sa, uint64_t start_gfn,
+                      uint32_t count, uint8_t *values);
+    void (*synchronize)(S390StAttribState *sa);
+    int (*set_migrationmode)(S390StAttribState *sa, bool value);
+    int (*get_active)(S390StAttribState *sa);
+    long long (*get_dirtycount)(S390StAttribState *sa);
+};
+
+typedef struct QEMUS390StAttribState QEMUS390StAttribState;
+DECLARE_INSTANCE_CHECKER(QEMUS390StAttribState, QEMU_S390_STATTRIB,
+                         TYPE_QEMU_S390_STATTRIB)
+
+struct QEMUS390StAttribState {
+    S390StAttribState parent_obj;
+};
+
+typedef struct KVMS390StAttribState KVMS390StAttribState;
+DECLARE_INSTANCE_CHECKER(KVMS390StAttribState, KVM_S390_STATTRIB,
+                         TYPE_KVM_S390_STATTRIB)
+
+struct KVMS390StAttribState {
+    S390StAttribState parent_obj;
+    uint64_t still_dirty;
+    uint8_t *incoming_buffer;
+};
+
+void s390_stattrib_init(void);
+
+#ifdef CONFIG_KVM
+Object *kvm_s390_stattrib_create(void);
+#else
+static inline Object *kvm_s390_stattrib_create(void)
+{
+    return NULL;
+}
+#endif
+
+void hmp_info_cmma(Monitor *mon, const QDict *qdict);
+void hmp_migrationmode(Monitor *mon, const QDict *qdict);
+
+#endif /* S390_STORAGE_ATTRIBUTES_H */
diff --git a/include/hw/s390x/storage-keys.h b/include/hw/s390x/storage-keys.h
new file mode 100644 (file)
index 0000000..aa2ec2a
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ * s390 storage key device
+ *
+ * Copyright 2015 IBM Corp.
+ * Author(s): Jason J. Herne <jjherne@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef S390_STORAGE_KEYS_H
+#define S390_STORAGE_KEYS_H
+
+#include "hw/qdev-core.h"
+#include "monitor/monitor.h"
+#include "qom/object.h"
+
+#define TYPE_S390_SKEYS "s390-skeys"
+OBJECT_DECLARE_TYPE(S390SKeysState, S390SKeysClass, S390_SKEYS)
+
+struct S390SKeysState {
+    DeviceState parent_obj;
+    bool migration_enabled;
+
+};
+
+
+struct S390SKeysClass {
+    DeviceClass parent_class;
+
+    /**
+     * @skeys_are_enabled:
+     *
+     * Check whether storage keys are enabled. If not enabled, they were not
+     * enabled lazily either by the guest via a storage key instruction or
+     * by the host during migration.
+     *
+     * If disabled, everything not explicitly triggered by the guest,
+     * such as outgoing migration or dirty/change tracking, should not touch
+     * storage keys and should not lazily enable it.
+     *
+     * @ks: the #S390SKeysState
+     *
+     * Returns false if not enabled and true if enabled.
+     */
+    bool (*skeys_are_enabled)(S390SKeysState *ks);
+
+    /**
+     * @enable_skeys:
+     *
+     * Lazily enable storage keys. If this function is not implemented,
+     * setting a storage key will lazily enable storage keys implicitly
+     * instead. TCG guests have to make sure to flush the TLB of all CPUs
+     * if storage keys were not enabled before this call.
+     *
+     * @ks: the #S390SKeysState
+     *
+     * Returns false if not enabled before this call, and true if already
+     * enabled.
+     */
+    bool (*enable_skeys)(S390SKeysState *ks);
+
+    /**
+     * @get_skeys:
+     *
+     * Get storage keys for the given PFN range. This call will fail if
+     * storage keys have not been lazily enabled yet.
+     *
+     * Callers have to validate that a GFN is valid before this call.
+     *
+     * @ks: the #S390SKeysState
+     * @start_gfn: the start GFN to get storage keys for
+     * @count: the number of storage keys to get
+     * @keys: the byte array where storage keys will be stored to
+     *
+     * Returns 0 on success, returns an error if getting a storage key failed.
+     */
+    int (*get_skeys)(S390SKeysState *ks, uint64_t start_gfn, uint64_t count,
+                     uint8_t *keys);
+    /**
+     * @set_skeys:
+     *
+     * Set storage keys for the given PFN range. This call will fail if
+     * storage keys have not been lazily enabled yet and implicit
+     * enablement is not supported.
+     *
+     * Callers have to validate that a GFN is valid before this call.
+     *
+     * @ks: the #S390SKeysState
+     * @start_gfn: the start GFN to set storage keys for
+     * @count: the number of storage keys to set
+     * @keys: the byte array where storage keys will be read from
+     *
+     * Returns 0 on success, returns an error if setting a storage key failed.
+     */
+    int (*set_skeys)(S390SKeysState *ks, uint64_t start_gfn, uint64_t count,
+                     uint8_t *keys);
+};
+
+#define TYPE_KVM_S390_SKEYS "s390-skeys-kvm"
+#define TYPE_QEMU_S390_SKEYS "s390-skeys-qemu"
+typedef struct QEMUS390SKeysState QEMUS390SKeysState;
+DECLARE_INSTANCE_CHECKER(QEMUS390SKeysState, QEMU_S390_SKEYS,
+                         TYPE_QEMU_S390_SKEYS)
+
+struct QEMUS390SKeysState {
+    S390SKeysState parent_obj;
+    uint8_t *keydata;
+    uint32_t key_count;
+};
+
+void s390_skeys_init(void);
+
+S390SKeysState *s390_get_skeys_device(void);
+
+void hmp_dump_skeys(Monitor *mon, const QDict *qdict);
+void hmp_info_skeys(Monitor *mon, const QDict *qdict);
+
+#endif /* S390_STORAGE_KEYS_H */
diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h
new file mode 100644 (file)
index 0000000..0935e85
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * TOD (Time Of Day) clock
+ *
+ * Copyright 2018 Red Hat, Inc.
+ * Author(s): David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_S390_TOD_H
+#define HW_S390_TOD_H
+
+#include "hw/qdev-core.h"
+#include "tcg/s390-tod.h"
+#include "qom/object.h"
+
+typedef struct S390TOD {
+    uint8_t high;
+    uint64_t low;
+} S390TOD;
+
+#define TYPE_S390_TOD "s390-tod"
+OBJECT_DECLARE_TYPE(S390TODState, S390TODClass, S390_TOD)
+#define TYPE_KVM_S390_TOD TYPE_S390_TOD "-kvm"
+#define TYPE_QEMU_S390_TOD TYPE_S390_TOD "-qemu"
+
+struct S390TODState {
+    /* private */
+    DeviceState parent_obj;
+
+    /*
+     * Used by TCG to remember the time base. Used by KVM to backup the TOD
+     * while the TOD is stopped.
+     */
+    S390TOD base;
+    /* Used by KVM to remember if the TOD is stopped and base is valid. */
+    bool stopped;
+};
+
+struct S390TODClass {
+    /* private */
+    DeviceClass parent_class;
+    void (*parent_realize)(DeviceState *dev, Error **errp);
+
+    /* public */
+    void (*get)(const S390TODState *td, S390TOD *tod, Error **errp);
+    void (*set)(S390TODState *td, const S390TOD *tod, Error **errp);
+};
+
+void s390_init_tod(void);
+S390TODState *s390_get_todstate(void);
+
+#endif
diff --git a/include/hw/s390x/vfio-ccw.h b/include/hw/s390x/vfio-ccw.h
new file mode 100644 (file)
index 0000000..4209d27
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * vfio based subchannel assignment support
+ *
+ * Copyright 2017, 2019 IBM Corp.
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *            Pierre Morel <pmorel@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_VFIO_CCW_H
+#define HW_VFIO_CCW_H
+
+#include "hw/vfio/vfio-common.h"
+#include "hw/s390x/s390-ccw.h"
+#include "hw/s390x/ccw-device.h"
+#include "qom/object.h"
+
+#define TYPE_VFIO_CCW "vfio-ccw"
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOCCWDevice, VFIO_CCW)
+
+#endif
diff --git a/include/hw/scsi/emulation.h b/include/hw/scsi/emulation.h
new file mode 100644 (file)
index 0000000..9521704
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef HW_SCSI_EMULATION_H
+#define HW_SCSI_EMULATION_H
+
+typedef struct SCSIBlockLimits {
+    bool wsnz;
+    uint16_t min_io_size;
+    uint32_t max_unmap_descr;
+    uint32_t opt_io_size;
+    uint32_t max_unmap_sectors;
+    uint32_t unmap_sectors;
+    uint32_t max_io_sectors;
+} SCSIBlockLimits;
+
+int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl);
+
+#endif
diff --git a/include/hw/scsi/esp.h b/include/hw/scsi/esp.h
new file mode 100644 (file)
index 0000000..13b1749
--- /dev/null
@@ -0,0 +1,172 @@
+#ifndef QEMU_HW_ESP_H
+#define QEMU_HW_ESP_H
+
+#include "hw/scsi/scsi.h"
+#include "hw/sysbus.h"
+#include "qemu/fifo8.h"
+#include "qom/object.h"
+
+/* esp.c */
+#define ESP_MAX_DEVS 7
+typedef void (*ESPDMAMemoryReadWriteFunc)(void *opaque, uint8_t *buf, int len);
+
+#define ESP_REGS 16
+#define ESP_FIFO_SZ 16
+#define ESP_CMDFIFO_SZ 32
+
+typedef struct ESPState ESPState;
+
+#define TYPE_ESP "esp"
+OBJECT_DECLARE_SIMPLE_TYPE(ESPState, ESP)
+
+struct ESPState {
+    DeviceState parent_obj;
+
+    uint8_t rregs[ESP_REGS];
+    uint8_t wregs[ESP_REGS];
+    qemu_irq irq;
+    qemu_irq irq_data;
+    uint8_t chip_id;
+    bool tchi_written;
+    int32_t ti_size;
+    uint32_t status;
+    uint32_t dma;
+    Fifo8 fifo;
+    SCSIBus bus;
+    SCSIDevice *current_dev;
+    SCSIRequest *current_req;
+    Fifo8 cmdfifo;
+    uint8_t cmdfifo_cdb_offset;
+    uint8_t lun;
+    uint32_t do_cmd;
+
+    bool data_in_ready;
+    uint8_t ti_cmd;
+    int dma_enabled;
+
+    uint32_t async_len;
+    uint8_t *async_buf;
+
+    ESPDMAMemoryReadWriteFunc dma_memory_read;
+    ESPDMAMemoryReadWriteFunc dma_memory_write;
+    void *dma_opaque;
+    void (*dma_cb)(ESPState *s);
+    uint8_t pdma_cb;
+
+    uint8_t mig_version_id;
+
+    /* Legacy fields for vmstate_esp version < 5 */
+    uint32_t mig_dma_left;
+    uint32_t mig_deferred_status;
+    bool mig_deferred_complete;
+    uint32_t mig_ti_rptr, mig_ti_wptr;
+    uint8_t mig_ti_buf[ESP_FIFO_SZ];
+    uint8_t mig_cmdbuf[ESP_CMDFIFO_SZ];
+    uint32_t mig_cmdlen;
+};
+
+#define TYPE_SYSBUS_ESP "sysbus-esp"
+OBJECT_DECLARE_SIMPLE_TYPE(SysBusESPState, SYSBUS_ESP)
+
+struct SysBusESPState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion iomem;
+    MemoryRegion pdma;
+    uint32_t it_shift;
+    ESPState esp;
+};
+
+#define ESP_TCLO   0x0
+#define ESP_TCMID  0x1
+#define ESP_FIFO   0x2
+#define ESP_CMD    0x3
+#define ESP_RSTAT  0x4
+#define ESP_WBUSID 0x4
+#define ESP_RINTR  0x5
+#define ESP_WSEL   0x5
+#define ESP_RSEQ   0x6
+#define ESP_WSYNTP 0x6
+#define ESP_RFLAGS 0x7
+#define ESP_WSYNO  0x7
+#define ESP_CFG1   0x8
+#define ESP_RRES1  0x9
+#define ESP_WCCF   0x9
+#define ESP_RRES2  0xa
+#define ESP_WTEST  0xa
+#define ESP_CFG2   0xb
+#define ESP_CFG3   0xc
+#define ESP_RES3   0xd
+#define ESP_TCHI   0xe
+#define ESP_RES4   0xf
+
+#define CMD_DMA 0x80
+#define CMD_CMD 0x7f
+
+#define CMD_NOP      0x00
+#define CMD_FLUSH    0x01
+#define CMD_RESET    0x02
+#define CMD_BUSRESET 0x03
+#define CMD_TI       0x10
+#define CMD_ICCS     0x11
+#define CMD_MSGACC   0x12
+#define CMD_PAD      0x18
+#define CMD_SATN     0x1a
+#define CMD_RSTATN   0x1b
+#define CMD_SEL      0x41
+#define CMD_SELATN   0x42
+#define CMD_SELATNS  0x43
+#define CMD_ENSEL    0x44
+#define CMD_DISSEL   0x45
+
+#define STAT_DO 0x00
+#define STAT_DI 0x01
+#define STAT_CD 0x02
+#define STAT_ST 0x03
+#define STAT_MO 0x06
+#define STAT_MI 0x07
+#define STAT_PIO_MASK 0x06
+
+#define STAT_TC 0x10
+#define STAT_PE 0x20
+#define STAT_GE 0x40
+#define STAT_INT 0x80
+
+#define BUSID_DID 0x07
+
+#define INTR_FC 0x08
+#define INTR_BS 0x10
+#define INTR_DC 0x20
+#define INTR_RST 0x80
+
+#define SEQ_0 0x0
+#define SEQ_MO 0x1
+#define SEQ_CD 0x4
+
+#define CFG1_RESREPT 0x40
+
+#define TCHI_FAS100A 0x4
+#define TCHI_AM53C974 0x12
+
+/* PDMA callbacks */
+enum pdma_cb {
+    SATN_PDMA_CB = 0,
+    S_WITHOUT_SATN_PDMA_CB = 1,
+    SATN_STOP_PDMA_CB = 2,
+    WRITE_RESPONSE_PDMA_CB = 3,
+    DO_DMA_PDMA_CB = 4
+};
+
+void esp_dma_enable(ESPState *s, int irq, int level);
+void esp_request_cancelled(SCSIRequest *req);
+void esp_command_complete(SCSIRequest *req, size_t resid);
+void esp_transfer_data(SCSIRequest *req, uint32_t len);
+void esp_hard_reset(ESPState *s);
+uint64_t esp_reg_read(ESPState *s, uint32_t saddr);
+void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val);
+extern const VMStateDescription vmstate_esp;
+int esp_pre_save(void *opaque);
+
+#endif
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
new file mode 100644 (file)
index 0000000..3692ca8
--- /dev/null
@@ -0,0 +1,251 @@
+#ifndef QEMU_HW_SCSI_H
+#define QEMU_HW_SCSI_H
+
+#include "block/aio.h"
+#include "hw/block/block.h"
+#include "hw/qdev-core.h"
+#include "scsi/utils.h"
+#include "qemu/notify.h"
+#include "qom/object.h"
+
+#define MAX_SCSI_DEVS 255
+
+typedef struct SCSIBus SCSIBus;
+typedef struct SCSIBusInfo SCSIBusInfo;
+typedef struct SCSIDevice SCSIDevice;
+typedef struct SCSIRequest SCSIRequest;
+typedef struct SCSIReqOps SCSIReqOps;
+
+#define SCSI_SENSE_BUF_SIZE_OLD 96
+#define SCSI_SENSE_BUF_SIZE 252
+#define DEFAULT_IO_TIMEOUT 30
+
+struct SCSIRequest {
+    SCSIBus           *bus;
+    SCSIDevice        *dev;
+    const SCSIReqOps  *ops;
+    uint32_t          refcount;
+    uint32_t          tag;
+    uint32_t          lun;
+    int16_t           status;
+    int16_t           host_status;
+    void              *hba_private;
+    uint64_t          residual;
+    SCSICommand       cmd;
+    NotifierList      cancel_notifiers;
+
+    /* Note:
+     * - fields before sense are initialized by scsi_req_alloc;
+     * - sense[] is uninitialized;
+     * - fields after sense are memset to 0 by scsi_req_alloc.
+     * */
+
+    uint8_t           sense[SCSI_SENSE_BUF_SIZE];
+    uint32_t          sense_len;
+    bool              enqueued;
+    bool              io_canceled;
+    bool              retry;
+    bool              dma_started;
+    BlockAIOCB        *aiocb;
+    QEMUSGList        *sg;
+    QTAILQ_ENTRY(SCSIRequest) next;
+};
+
+#define TYPE_SCSI_DEVICE "scsi-device"
+OBJECT_DECLARE_TYPE(SCSIDevice, SCSIDeviceClass, SCSI_DEVICE)
+
+struct SCSIDeviceClass {
+    DeviceClass parent_class;
+    void (*realize)(SCSIDevice *dev, Error **errp);
+    void (*unrealize)(SCSIDevice *dev);
+    int (*parse_cdb)(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf,
+                     size_t buf_len, void *hba_private);
+    SCSIRequest *(*alloc_req)(SCSIDevice *s, uint32_t tag, uint32_t lun,
+                              uint8_t *buf, void *hba_private);
+    void (*unit_attention_reported)(SCSIDevice *s);
+};
+
+struct SCSIDevice
+{
+    DeviceState qdev;
+    VMChangeStateEntry *vmsentry;
+    QEMUBH *bh;
+    uint32_t id;
+    BlockConf conf;
+    SCSISense unit_attention;
+    bool sense_is_ua;
+    uint8_t sense[SCSI_SENSE_BUF_SIZE];
+    uint32_t sense_len;
+    QTAILQ_HEAD(, SCSIRequest) requests;
+    uint32_t channel;
+    uint32_t lun;
+    int blocksize;
+    int type;
+    uint64_t max_lba;
+    uint64_t wwn;
+    uint64_t port_wwn;
+    int scsi_version;
+    int default_scsi_version;
+    uint32_t io_timeout;
+    bool needs_vpd_bl_emulation;
+    bool hba_supports_iothread;
+};
+
+extern const VMStateDescription vmstate_scsi_device;
+
+#define VMSTATE_SCSI_DEVICE(_field, _state) {                        \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(SCSIDevice),                                \
+    .vmsd       = &vmstate_scsi_device,                              \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, SCSIDevice),  \
+}
+
+/* cdrom.c */
+int cdrom_read_toc(int nb_sectors, uint8_t *buf, int msf, int start_track);
+int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num);
+
+/* scsi-bus.c */
+struct SCSIReqOps {
+    size_t size;
+    void (*init_req)(SCSIRequest *req);
+    void (*free_req)(SCSIRequest *req);
+    int32_t (*send_command)(SCSIRequest *req, uint8_t *buf);
+    void (*read_data)(SCSIRequest *req);
+    void (*write_data)(SCSIRequest *req);
+    uint8_t *(*get_buf)(SCSIRequest *req);
+
+    void (*save_request)(QEMUFile *f, SCSIRequest *req);
+    void (*load_request)(QEMUFile *f, SCSIRequest *req);
+};
+
+struct SCSIBusInfo {
+    int tcq;
+    int max_channel, max_target, max_lun;
+    int (*parse_cdb)(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf,
+                     size_t buf_len, void *hba_private);
+    void (*transfer_data)(SCSIRequest *req, uint32_t arg);
+    void (*fail)(SCSIRequest *req);
+    void (*complete)(SCSIRequest *req, size_t residual);
+    void (*cancel)(SCSIRequest *req);
+    void (*change)(SCSIBus *bus, SCSIDevice *dev, SCSISense sense);
+    QEMUSGList *(*get_sg_list)(SCSIRequest *req);
+
+    void (*save_request)(QEMUFile *f, SCSIRequest *req);
+    void *(*load_request)(QEMUFile *f, SCSIRequest *req);
+    void (*free_request)(SCSIBus *bus, void *priv);
+
+    /*
+     * Temporarily stop submitting new requests between drained_begin() and
+     * drained_end(). Called from the main loop thread with the BQL held.
+     *
+     * Implement these callbacks if request processing is triggered by a file
+     * descriptor like an EventNotifier. Otherwise set them to NULL.
+     */
+    void (*drained_begin)(SCSIBus *bus);
+    void (*drained_end)(SCSIBus *bus);
+};
+
+#define TYPE_SCSI_BUS "SCSI"
+OBJECT_DECLARE_SIMPLE_TYPE(SCSIBus, SCSI_BUS)
+
+struct SCSIBus {
+    BusState qbus;
+    int busnr;
+
+    SCSISense unit_attention;
+    const SCSIBusInfo *info;
+
+    int drain_count; /* protected by BQL */
+};
+
+/**
+ * scsi_bus_init_named: Initialize a SCSI bus with the specified name
+ * @bus: SCSIBus object to initialize
+ * @bus_size: size of @bus object
+ * @host: Device which owns the bus (generally the SCSI controller)
+ * @info: structure defining callbacks etc for the controller
+ * @bus_name: Name to use for this bus
+ *
+ * This in-place initializes @bus as a new SCSI bus with a name
+ * provided by the caller. It is the caller's responsibility to make
+ * sure that name does not clash with the name of any other bus in the
+ * system. Unless you need the new bus to have a specific name, you
+ * should use scsi_bus_init() instead.
+ */
+void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host,
+                         const SCSIBusInfo *info, const char *bus_name);
+
+/**
+ * scsi_bus_init: Initialize a SCSI bus
+ *
+ * This in-place-initializes @bus as a new SCSI bus and gives it
+ * an automatically generated unique name.
+ */
+static inline void scsi_bus_init(SCSIBus *bus, size_t bus_size,
+                                 DeviceState *host, const SCSIBusInfo *info)
+{
+    scsi_bus_init_named(bus, bus_size, host, info, NULL);
+}
+
+static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d)
+{
+    return DO_UPCAST(SCSIBus, qbus, d->qdev.parent_bus);
+}
+
+SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
+                                      int unit, bool removable, int bootindex,
+                                      bool share_rw,
+                                      BlockdevOnError rerror,
+                                      BlockdevOnError werror,
+                                      const char *serial, Error **errp);
+void scsi_bus_set_ua(SCSIBus *bus, SCSISense sense);
+void scsi_bus_legacy_handle_cmdline(SCSIBus *bus);
+
+SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d,
+                            uint32_t tag, uint32_t lun, void *hba_private);
+SCSIRequest *scsi_req_new(SCSIDevice *d, uint32_t tag, uint32_t lun,
+                          uint8_t *buf, size_t buf_len, void *hba_private);
+int32_t scsi_req_enqueue(SCSIRequest *req);
+SCSIRequest *scsi_req_ref(SCSIRequest *req);
+void scsi_req_unref(SCSIRequest *req);
+
+int scsi_bus_parse_cdb(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf,
+                       size_t buf_len, void *hba_private);
+int scsi_req_parse_cdb(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf,
+                       size_t buf_len);
+void scsi_req_build_sense(SCSIRequest *req, SCSISense sense);
+void scsi_req_print(SCSIRequest *req);
+void scsi_req_continue(SCSIRequest *req);
+void scsi_req_data(SCSIRequest *req, int len);
+void scsi_req_complete(SCSIRequest *req, int status);
+void scsi_req_complete_failed(SCSIRequest *req, int host_status);
+uint8_t *scsi_req_get_buf(SCSIRequest *req);
+int scsi_req_get_sense(SCSIRequest *req, uint8_t *buf, int len);
+void scsi_req_cancel_complete(SCSIRequest *req);
+void scsi_req_cancel(SCSIRequest *req);
+void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier);
+void scsi_req_retry(SCSIRequest *req);
+void scsi_device_drained_begin(SCSIDevice *sdev);
+void scsi_device_drained_end(SCSIDevice *sdev);
+void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense);
+void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense);
+void scsi_device_report_change(SCSIDevice *dev, SCSISense sense);
+void scsi_device_unit_attention_reported(SCSIDevice *dev);
+void scsi_generic_read_device_inquiry(SCSIDevice *dev);
+int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed);
+int scsi_SG_IO_FROM_DEV(BlockBackend *blk, uint8_t *cmd, uint8_t cmd_size,
+                        uint8_t *buf, uint8_t buf_size, uint32_t timeout);
+SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int target, int lun);
+SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int target, int lun);
+
+/* scsi-generic.c. */
+extern const SCSIReqOps scsi_generic_req_ops;
+
+/* scsi-disk.c */
+#define SCSI_DISK_QUIRK_MODE_PAGE_APPLE_VENDOR             0
+#define SCSI_DISK_QUIRK_MODE_SENSE_ROM_USE_DBD             1
+#define SCSI_DISK_QUIRK_MODE_PAGE_VENDOR_SPECIFIC_APPLE    2
+#define SCSI_DISK_QUIRK_MODE_PAGE_TRUNCATED                3
+
+#endif
diff --git a/include/hw/sd/allwinner-sdhost.h b/include/hw/sd/allwinner-sdhost.h
new file mode 100644 (file)
index 0000000..1b95117
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * Allwinner (sun4i and above) SD Host Controller emulation
+ *
+ * Copyright (C) 2019 Niek Linnenbank <nieklinnenbank@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SD_ALLWINNER_SDHOST_H
+#define HW_SD_ALLWINNER_SDHOST_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+#include "hw/sd/sd.h"
+
+/**
+ * Object model types
+ * @{
+ */
+
+/** Generic Allwinner SD Host Controller (abstract) */
+#define TYPE_AW_SDHOST "allwinner-sdhost"
+
+/** Allwinner sun4i family (A10, A12) */
+#define TYPE_AW_SDHOST_SUN4I TYPE_AW_SDHOST "-sun4i"
+
+/** Allwinner sun5i family and newer (A13, H2+, H3, etc) */
+#define TYPE_AW_SDHOST_SUN5I TYPE_AW_SDHOST "-sun5i"
+
+/** Allwinner sun50i-a64 */
+#define TYPE_AW_SDHOST_SUN50I_A64 TYPE_AW_SDHOST "-sun50i-a64"
+
+/** Allwinner sun50i-a64 emmc */
+#define TYPE_AW_SDHOST_SUN50I_A64_EMMC  TYPE_AW_SDHOST "-sun50i-a64-emmc"
+
+/** @} */
+
+/**
+ * Object model macros
+ * @{
+ */
+
+OBJECT_DECLARE_TYPE(AwSdHostState, AwSdHostClass, AW_SDHOST)
+
+/** @} */
+
+/**
+ * Allwinner SD Host Controller object instance state.
+ */
+struct AwSdHostState {
+    /*< private >*/
+    SysBusDevice busdev;
+    /*< public >*/
+
+    /** Secure Digital (SD) bus, which connects to SD card (if present) */
+    SDBus sdbus;
+
+    /** Maps I/O registers in physical memory */
+    MemoryRegion iomem;
+
+    /** Interrupt output signal to notify CPU */
+    qemu_irq irq;
+
+    /** Memory region where DMA transfers are done */
+    MemoryRegion *dma_mr;
+
+    /** Address space used internally for DMA transfers */
+    AddressSpace dma_as;
+
+    /** Number of bytes left in current DMA transfer */
+    uint32_t transfer_cnt;
+
+    /**
+     * @name Hardware Registers
+     * @{
+     */
+
+    uint32_t global_ctl;        /**< Global Control */
+    uint32_t clock_ctl;         /**< Clock Control */
+    uint32_t timeout;           /**< Timeout */
+    uint32_t bus_width;         /**< Bus Width */
+    uint32_t block_size;        /**< Block Size */
+    uint32_t byte_count;        /**< Byte Count */
+
+    uint32_t command;           /**< Command */
+    uint32_t command_arg;       /**< Command Argument */
+    uint32_t response[4];       /**< Command Response */
+
+    uint32_t irq_mask;          /**< Interrupt Mask */
+    uint32_t irq_status;        /**< Raw Interrupt Status */
+    uint32_t status;            /**< Status */
+
+    uint32_t fifo_wlevel;       /**< FIFO Water Level */
+    uint32_t fifo_func_sel;     /**< FIFO Function Select */
+    uint32_t debug_enable;      /**< Debug Enable */
+    uint32_t auto12_arg;        /**< Auto Command 12 Argument */
+    uint32_t newtiming_set;     /**< SD New Timing Set */
+    uint32_t newtiming_debug;   /**< SD New Timing Debug */
+    uint32_t hardware_rst;      /**< Hardware Reset */
+    uint32_t dmac;              /**< Internal DMA Controller Control */
+    uint32_t desc_base;         /**< Descriptor List Base Address */
+    uint32_t dmac_status;       /**< Internal DMA Controller Status */
+    uint32_t dmac_irq;          /**< Internal DMA Controller IRQ Enable */
+    uint32_t card_threshold;    /**< Card Threshold Control */
+    uint32_t startbit_detect;   /**< eMMC DDR Start Bit Detection Control */
+    uint32_t response_crc;      /**< Response CRC */
+    uint32_t data_crc[8];       /**< Data CRC */
+    uint32_t sample_delay;      /**< Sample delay control */
+    uint32_t status_crc;        /**< Status CRC */
+
+    /** @} */
+
+};
+
+/**
+ * Allwinner SD Host Controller class-level struct.
+ *
+ * This struct is filled by each sunxi device specific code
+ * such that the generic code can use this struct to support
+ * all devices.
+ */
+struct AwSdHostClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+    /*< public >*/
+
+    /** Maximum buffer size in bytes per DMA descriptor */
+    size_t max_desc_size;
+    bool   is_sun4i;
+
+    /** does the IP block support autocalibration? */
+    bool can_calibrate;
+};
+
+#endif /* HW_SD_ALLWINNER_SDHOST_H */
diff --git a/include/hw/sd/aspeed_sdhci.h b/include/hw/sd/aspeed_sdhci.h
new file mode 100644 (file)
index 0000000..057bc5f
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Aspeed SD Host Controller
+ * Eddie James <eajames@linux.ibm.com>
+ *
+ * Copyright (C) 2019 IBM Corp
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef ASPEED_SDHCI_H
+#define ASPEED_SDHCI_H
+
+#include "hw/sd/sdhci.h"
+#include "qom/object.h"
+
+#define TYPE_ASPEED_SDHCI "aspeed.sdhci"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedSDHCIState, ASPEED_SDHCI)
+
+#define ASPEED_SDHCI_CAPABILITIES 0x01E80080
+#define ASPEED_SDHCI_NUM_SLOTS    2
+#define ASPEED_SDHCI_NUM_REGS     (ASPEED_SDHCI_REG_SIZE / sizeof(uint32_t))
+#define ASPEED_SDHCI_REG_SIZE     0x100
+
+struct AspeedSDHCIState {
+    SysBusDevice parent;
+
+    SDHCIState slots[ASPEED_SDHCI_NUM_SLOTS];
+    uint8_t num_slots;
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    uint32_t regs[ASPEED_SDHCI_NUM_REGS];
+};
+
+#endif /* ASPEED_SDHCI_H */
diff --git a/include/hw/sd/bcm2835_sdhost.h b/include/hw/sd/bcm2835_sdhost.h
new file mode 100644 (file)
index 0000000..f6bca5c
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Raspberry Pi (BCM2835) SD Host Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_SDHOST_H
+#define BCM2835_SDHOST_H
+
+#include "hw/sysbus.h"
+#include "hw/sd/sd.h"
+#include "qom/object.h"
+
+#define TYPE_BCM2835_SDHOST "bcm2835-sdhost"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835SDHostState, BCM2835_SDHOST)
+
+#define BCM2835_SDHOST_FIFO_LEN 16
+
+struct BCM2835SDHostState {
+    SysBusDevice busdev;
+    SDBus sdbus;
+    MemoryRegion iomem;
+
+    uint32_t cmd;
+    uint32_t cmdarg;
+    uint32_t status;
+    uint32_t rsp[4];
+    uint32_t config;
+    uint32_t edm;
+    uint32_t vdd;
+    uint32_t hbct;
+    uint32_t hblc;
+    int32_t fifo_pos;
+    int32_t fifo_len;
+    uint32_t fifo[BCM2835_SDHOST_FIFO_LEN];
+    uint32_t datacnt;
+
+    qemu_irq irq;
+};
+
+#endif
diff --git a/include/hw/sd/cadence_sdhci.h b/include/hw/sd/cadence_sdhci.h
new file mode 100644 (file)
index 0000000..cd8288b
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Cadence SDHCI emulation
+ *
+ * Copyright (c) 2020 Wind River Systems, Inc.
+ *
+ * Author:
+ *   Bin Meng <bin.meng@windriver.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef CADENCE_SDHCI_H
+#define CADENCE_SDHCI_H
+
+#include "hw/sd/sdhci.h"
+
+#define CADENCE_SDHCI_REG_SIZE  0x100
+#define CADENCE_SDHCI_NUM_REGS  (CADENCE_SDHCI_REG_SIZE / sizeof(uint32_t))
+
+typedef struct CadenceSDHCIState {
+    SysBusDevice parent;
+
+    MemoryRegion container;
+    MemoryRegion iomem;
+    BusState *bus;
+
+    uint32_t regs[CADENCE_SDHCI_NUM_REGS];
+
+    SDHCIState sdhci;
+} CadenceSDHCIState;
+
+#define TYPE_CADENCE_SDHCI  "cadence.sdhci"
+#define CADENCE_SDHCI(obj)  OBJECT_CHECK(CadenceSDHCIState, (obj), \
+                                         TYPE_CADENCE_SDHCI)
+
+#endif /* CADENCE_SDHCI_H */
diff --git a/include/hw/sd/npcm7xx_sdhci.h b/include/hw/sd/npcm7xx_sdhci.h
new file mode 100644 (file)
index 0000000..ad8002f
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * NPCM7xx SD-3.0 / eMMC-4.51 Host Controller
+ *
+ * Copyright (c) 2021 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef NPCM7XX_SDHCI_H
+#define NPCM7XX_SDHCI_H
+
+#include "hw/sd/sdhci.h"
+#include "qom/object.h"
+
+#define TYPE_NPCM7XX_SDHCI "npcm7xx.sdhci"
+#define NPCM7XX_PRSTVALS_SIZE 6
+#define NPCM7XX_PRSTVALS 0x60
+#define NPCM7XX_PRSTVALS_0 0x0
+#define NPCM7XX_PRSTVALS_1 0x2
+#define NPCM7XX_PRSTVALS_2 0x4
+#define NPCM7XX_PRSTVALS_3 0x6
+#define NPCM7XX_PRSTVALS_4 0x8
+#define NPCM7XX_PRSTVALS_5 0xA
+#define NPCM7XX_BOOTTOCTRL 0x10
+#define NPCM7XX_SDHCI_REGSIZE 0x20
+
+#define NPCM7XX_PRSNTS_RESET 0x04A00000
+#define NPCM7XX_BLKGAP_RESET 0x80
+#define NPCM7XX_CAPAB_RESET 0x0100200161EE0399
+#define NPCM7XX_MAXCURR_RESET 0x0000000000000005
+#define NPCM7XX_HCVER_RESET 0x1002
+
+#define NPCM7XX_PRSTVALS_0_RESET 0x0040
+#define NPCM7XX_PRSTVALS_1_RESET 0x0001
+#define NPCM7XX_PRSTVALS_3_RESET 0x0001
+
+OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxSDHCIState, NPCM7XX_SDHCI)
+
+typedef struct NPCM7xxRegs {
+    /* Preset Values Register Field, read-only */
+    uint16_t prstvals[NPCM7XX_PRSTVALS_SIZE];
+    /* Boot Timeout Control Register, read-write */
+    uint32_t boottoctrl;
+} NPCM7xxRegisters;
+
+struct NPCM7xxSDHCIState {
+    SysBusDevice parent;
+
+    MemoryRegion container;
+    MemoryRegion iomem;
+    BusState *bus;
+    NPCM7xxRegisters regs;
+
+    SDHCIState sdhci;
+};
+
+#endif /* NPCM7XX_SDHCI_H */
diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
new file mode 100644 (file)
index 0000000..2c8748f
--- /dev/null
@@ -0,0 +1,216 @@
+/*
+ * SD Memory Card emulation.  Mostly correct for MMC too.
+ *
+ * Copyright (c) 2006 Andrzej Zaborowski  <balrog@zabor.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HW_SD_H
+#define HW_SD_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+#define OUT_OF_RANGE            (1 << 31)
+#define ADDRESS_ERROR           (1 << 30)
+#define BLOCK_LEN_ERROR         (1 << 29)
+#define ERASE_SEQ_ERROR         (1 << 28)
+#define ERASE_PARAM             (1 << 27)
+#define WP_VIOLATION            (1 << 26)
+#define CARD_IS_LOCKED          (1 << 25)
+#define LOCK_UNLOCK_FAILED      (1 << 24)
+#define COM_CRC_ERROR           (1 << 23)
+#define ILLEGAL_COMMAND         (1 << 22)
+#define CARD_ECC_FAILED         (1 << 21)
+#define CC_ERROR                (1 << 20)
+#define SD_ERROR                (1 << 19)
+#define CID_CSD_OVERWRITE       (1 << 16)
+#define WP_ERASE_SKIP           (1 << 15)
+#define CARD_ECC_DISABLED       (1 << 14)
+#define ERASE_RESET             (1 << 13)
+#define CURRENT_STATE           (7 << 9)
+#define READY_FOR_DATA          (1 << 8)
+#define APP_CMD                 (1 << 5)
+#define AKE_SEQ_ERROR           (1 << 3)
+
+enum SDPhySpecificationVersion {
+    SD_PHY_SPECv1_10_VERS     = 1,
+    SD_PHY_SPECv2_00_VERS     = 2,
+    SD_PHY_SPECv3_01_VERS     = 3,
+};
+
+typedef enum {
+    SD_VOLTAGE_0_4V     = 400,  /* currently not supported */
+    SD_VOLTAGE_1_8V     = 1800,
+    SD_VOLTAGE_3_0V     = 3000,
+    SD_VOLTAGE_3_3V     = 3300,
+} sd_voltage_mv_t;
+
+typedef enum  {
+    UHS_NOT_SUPPORTED   = 0,
+    UHS_I               = 1,
+    UHS_II              = 2,    /* currently not supported */
+    UHS_III             = 3,    /* currently not supported */
+} sd_uhs_mode_t;
+
+typedef enum {
+    sd_none = -1,
+    sd_bc = 0, /* broadcast -- no response */
+    sd_bcr,    /* broadcast with response */
+    sd_ac,     /* addressed -- no data transfer */
+    sd_adtc,   /* addressed with data transfer */
+} sd_cmd_type_t;
+
+typedef struct {
+    uint8_t cmd;
+    uint32_t arg;
+    uint8_t crc;
+} SDRequest;
+
+
+#define TYPE_SD_CARD "sd-card"
+OBJECT_DECLARE_TYPE(SDState, SDCardClass, SD_CARD)
+
+#define TYPE_SD_CARD_SPI "sd-card-spi"
+DECLARE_INSTANCE_CHECKER(SDState, SD_CARD_SPI, TYPE_SD_CARD_SPI)
+
+struct SDCardClass {
+    /*< private >*/
+    DeviceClass parent_class;
+    /*< public >*/
+
+    int (*do_command)(SDState *sd, SDRequest *req, uint8_t *response);
+    /**
+     * Write a byte to a SD card.
+     * @sd: card
+     * @value: byte to write
+     *
+     * Write a byte on the data lines of a SD card.
+     */
+    void (*write_byte)(SDState *sd, uint8_t value);
+    /**
+     * Read a byte from a SD card.
+     * @sd: card
+     *
+     * Read a byte from the data lines of a SD card.
+     *
+     * Return: byte value read
+     */
+    uint8_t (*read_byte)(SDState *sd);
+    bool (*receive_ready)(SDState *sd);
+    bool (*data_ready)(SDState *sd);
+    void (*set_voltage)(SDState *sd, uint16_t millivolts);
+    uint8_t (*get_dat_lines)(SDState *sd);
+    bool (*get_cmd_line)(SDState *sd);
+    void (*enable)(SDState *sd, bool enable);
+    bool (*get_inserted)(SDState *sd);
+    bool (*get_readonly)(SDState *sd);
+
+    const struct SDProto *proto;
+};
+
+#define TYPE_SD_BUS "sd-bus"
+OBJECT_DECLARE_TYPE(SDBus, SDBusClass,
+                    SD_BUS)
+
+struct SDBus {
+    BusState qbus;
+};
+
+struct SDBusClass {
+    /*< private >*/
+    BusClass parent_class;
+    /*< public >*/
+
+    /* These methods are called by the SD device to notify the controller
+     * when the card insertion or readonly status changes
+     */
+    void (*set_inserted)(DeviceState *dev, bool inserted);
+    void (*set_readonly)(DeviceState *dev, bool readonly);
+};
+
+/* Functions to be used by qdevified callers (working via
+ * an SDBus rather than directly with SDState)
+ */
+void sdbus_set_voltage(SDBus *sdbus, uint16_t millivolts);
+uint8_t sdbus_get_dat_lines(SDBus *sdbus);
+bool sdbus_get_cmd_line(SDBus *sdbus);
+int sdbus_do_command(SDBus *sd, SDRequest *req, uint8_t *response);
+/**
+ * Write a byte to a SD bus.
+ * @sd: bus
+ * @value: byte to write
+ *
+ * Write a byte on the data lines of a SD bus.
+ */
+void sdbus_write_byte(SDBus *sd, uint8_t value);
+/**
+ * Read a byte from a SD bus.
+ * @sd: bus
+ *
+ * Read a byte from the data lines of a SD bus.
+ *
+ * Return: byte value read
+ */
+uint8_t sdbus_read_byte(SDBus *sd);
+/**
+ * Write data to a SD bus.
+ * @sdbus: bus
+ * @buf: data to write
+ * @length: number of bytes to write
+ *
+ * Write multiple bytes of data on the data lines of a SD bus.
+ */
+void sdbus_write_data(SDBus *sdbus, const void *buf, size_t length);
+/**
+ * Read data from a SD bus.
+ * @sdbus: bus
+ * @buf: buffer to read data into
+ * @length: number of bytes to read
+ *
+ * Read multiple bytes of data on the data lines of a SD bus.
+ */
+void sdbus_read_data(SDBus *sdbus, void *buf, size_t length);
+bool sdbus_receive_ready(SDBus *sd);
+bool sdbus_data_ready(SDBus *sd);
+bool sdbus_get_inserted(SDBus *sd);
+bool sdbus_get_readonly(SDBus *sd);
+/**
+ * sdbus_reparent_card: Reparent an SD card from one controller to another
+ * @from: controller bus to remove card from
+ * @to: controller bus to move card to
+ *
+ * Reparent an SD card, effectively unplugging it from one controller
+ * and inserting it into another. This is useful for SoCs like the
+ * bcm2835 which have two SD controllers and connect a single SD card
+ * to them, selected by the guest reprogramming GPIO line routing.
+ */
+void sdbus_reparent_card(SDBus *from, SDBus *to);
+
+/* Functions to be used by SD devices to report back to qdevified controllers */
+void sdbus_set_inserted(SDBus *sd, bool inserted);
+void sdbus_set_readonly(SDBus *sd, bool inserted);
+
+#endif /* HW_SD_H */
diff --git a/include/hw/sd/sdcard_legacy.h b/include/hw/sd/sdcard_legacy.h
new file mode 100644 (file)
index 0000000..0dc3889
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * SD Memory Card emulation (deprecated legacy API)
+ *
+ * Copyright (c) 2006 Andrzej Zaborowski  <balrog@zabor.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef HW_SDCARD_LEGACY_H
+#define HW_SDCARD_LEGACY_H
+
+#include "hw/sd/sd.h"
+
+/* Legacy functions to be used only by non-qdevified callers */
+SDState *sd_init(BlockBackend *blk, bool is_spi);
+int sd_do_command(SDState *card, SDRequest *request, uint8_t *response);
+void sd_write_byte(SDState *card, uint8_t value);
+uint8_t sd_read_byte(SDState *card);
+void sd_set_cb(SDState *card, qemu_irq readonly, qemu_irq insert);
+
+/* sd_enable should not be used -- it is only used on the nseries boards,
+ * where it is part of a broken implementation of the MMC card slot switch
+ * (there should be two card slots which are multiplexed to a single MMC
+ * controller, but instead we model it with one card and controller and
+ * disable the card when the second slot is selected, so it looks like the
+ * second slot is always empty).
+ */
+void sd_enable(SDState *card, bool enable);
+
+#endif /* HW_SDCARD_LEGACY_H */
diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h
new file mode 100644 (file)
index 0000000..6cd2822
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * SD Association Host Standard Specification v2.0 controller emulation
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Mitsyanko Igor <i.mitsyanko@samsung.com>
+ * Peter A.G. Crosthwaite <peter.crosthwaite@petalogix.com>
+ *
+ * Based on MMC controller for Samsung S5PC1xx-based board emulation
+ * by Alexey Merkulov and Vladimir Monakhov.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU _General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef SDHCI_H
+#define SDHCI_H
+
+#include "hw/pci/pci_device.h"
+#include "hw/sysbus.h"
+#include "hw/sd/sd.h"
+#include "qom/object.h"
+
+/* SD/MMC host controller state */
+struct SDHCIState {
+    /*< private >*/
+    union {
+        PCIDevice pcidev;
+        SysBusDevice busdev;
+    };
+
+    /*< public >*/
+    SDBus sdbus;
+    MemoryRegion iomem;
+    AddressSpace sysbus_dma_as;
+    AddressSpace *dma_as;
+    MemoryRegion *dma_mr;
+    const MemoryRegionOps *io_ops;
+
+    QEMUTimer *insert_timer;       /* timer for 'changing' sd card. */
+    QEMUTimer *transfer_timer;
+    qemu_irq irq;
+
+    /* Registers cleared on reset */
+    uint32_t sdmasysad;    /* SDMA System Address register */
+    uint16_t blksize;      /* Host DMA Buff Boundary and Transfer BlkSize Reg */
+    uint16_t blkcnt;       /* Blocks count for current transfer */
+    uint32_t argument;     /* Command Argument Register */
+    uint16_t trnmod;       /* Transfer Mode Setting Register */
+    uint16_t cmdreg;       /* Command Register */
+    uint32_t rspreg[4];    /* Response Registers 0-3 */
+    uint32_t prnsts;       /* Present State Register */
+    uint8_t  hostctl1;     /* Host Control Register */
+    uint8_t  pwrcon;       /* Power control Register */
+    uint8_t  blkgap;       /* Block Gap Control Register */
+    uint8_t  wakcon;       /* WakeUp Control Register */
+    uint16_t clkcon;       /* Clock control Register */
+    uint8_t  timeoutcon;   /* Timeout Control Register */
+    uint8_t  admaerr;      /* ADMA Error Status Register */
+    uint16_t norintsts;    /* Normal Interrupt Status Register */
+    uint16_t errintsts;    /* Error Interrupt Status Register */
+    uint16_t norintstsen;  /* Normal Interrupt Status Enable Register */
+    uint16_t errintstsen;  /* Error Interrupt Status Enable Register */
+    uint16_t norintsigen;  /* Normal Interrupt Signal Enable Register */
+    uint16_t errintsigen;  /* Error Interrupt Signal Enable Register */
+    uint16_t acmd12errsts; /* Auto CMD12 error status register */
+    uint16_t hostctl2;     /* Host Control 2 */
+    uint64_t admasysaddr;  /* ADMA System Address Register */
+    uint16_t vendor_spec;  /* Vendor specific register */
+
+    /* Read-only registers */
+    uint64_t capareg;      /* Capabilities Register */
+    uint64_t maxcurr;      /* Maximum Current Capabilities Register */
+    uint16_t version;      /* Host Controller Version Register */
+
+    uint8_t  *fifo_buffer; /* SD host i/o FIFO buffer */
+    uint32_t buf_maxsz;
+    uint16_t data_count;   /* current element in FIFO buffer */
+    uint8_t  stopped_state;/* Current SDHC state */
+    bool     pending_insert_state;
+    /* Buffer Data Port Register - virtual access point to R and W buffers */
+    /* Software Reset Register - always reads as 0 */
+    /* Force Event Auto CMD12 Error Interrupt Reg - write only */
+    /* Force Event Error Interrupt Register- write only */
+    /* RO Host Controller Version Register always reads as 0x2401 */
+
+    /* Configurable properties */
+    bool pending_insert_quirk; /* Quirk for Raspberry Pi card insert int */
+    uint32_t quirks;
+    uint8_t endianness;
+    uint8_t sd_spec_version;
+    uint8_t uhs_mode;
+    uint8_t vendor;        /* For vendor specific functionality */
+};
+typedef struct SDHCIState SDHCIState;
+
+#define SDHCI_VENDOR_NONE       0
+#define SDHCI_VENDOR_IMX        1
+
+/*
+ * Controller does not provide transfer-complete interrupt when not
+ * busy.
+ *
+ * NOTE: This definition is taken out of Linux kernel and so the
+ * original bit number is preserved
+ */
+#define SDHCI_QUIRK_NO_BUSY_IRQ    BIT(14)
+
+#define TYPE_PCI_SDHCI "sdhci-pci"
+DECLARE_INSTANCE_CHECKER(SDHCIState, PCI_SDHCI,
+                         TYPE_PCI_SDHCI)
+
+#define TYPE_SYSBUS_SDHCI "generic-sdhci"
+DECLARE_INSTANCE_CHECKER(SDHCIState, SYSBUS_SDHCI,
+                         TYPE_SYSBUS_SDHCI)
+
+#define TYPE_IMX_USDHC "imx-usdhc"
+
+#define TYPE_S3C_SDHCI "s3c-sdhci"
+
+#endif /* SDHCI_H */
diff --git a/include/hw/sensor/emc141x_regs.h b/include/hw/sensor/emc141x_regs.h
new file mode 100644 (file)
index 0000000..e509a43
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * SMSC EMC141X temperature sensor.
+ *
+ * Browse the data sheet:
+ *
+ *    http://ww1.microchip.com/downloads/en/DeviceDoc/20005274A.pdf
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#ifndef EMC141X_REGS_H
+#define EMC141X_REGS_H
+
+#define EMC1413_DEVICE_ID                0x21
+#define EMC1414_DEVICE_ID                0x25
+#define MANUFACTURER_ID                  0x5d
+#define REVISION                         0x04
+
+/* the EMC141X registers */
+#define EMC141X_TEMP_HIGH0               0x00
+#define EMC141X_TEMP_HIGH1               0x01
+#define EMC141X_TEMP_HIGH2               0x23
+#define EMC141X_TEMP_HIGH3               0x2a
+#define EMC141X_TEMP_MAX_HIGH0           0x05
+#define EMC141X_TEMP_MIN_HIGH0           0x06
+#define EMC141X_TEMP_MAX_HIGH1           0x07
+#define EMC141X_TEMP_MIN_HIGH1           0x08
+#define EMC141X_TEMP_MAX_HIGH2           0x15
+#define EMC141X_TEMP_MIN_HIGH2           0x16
+#define EMC141X_TEMP_MAX_HIGH3           0x2c
+#define EMC141X_TEMP_MIN_HIGH3           0x2d
+#define EMC141X_DEVICE_ID                0xfd
+#define EMC141X_MANUFACTURER_ID          0xfe
+#define EMC141X_REVISION                 0xff
+
+#endif
diff --git a/include/hw/sensor/isl_pmbus_vr.h b/include/hw/sensor/isl_pmbus_vr.h
new file mode 100644 (file)
index 0000000..aa2c276
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * PMBus device for Renesas Digital Multiphase Voltage Regulators
+ *
+ * Copyright 2022 Google LLC
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_MISC_ISL_PMBUS_VR_H
+#define HW_MISC_ISL_PMBUS_VR_H
+
+#include "hw/i2c/pmbus_device.h"
+#include "qom/object.h"
+
+#define TYPE_ISL69259   "isl69259"
+#define TYPE_ISL69260   "isl69260"
+#define TYPE_RAA228000  "raa228000"
+#define TYPE_RAA229004  "raa229004"
+#define ISL_MAX_IC_DEVICE_ID_LEN 16
+
+struct ISLState {
+    PMBusDevice parent;
+
+    uint8_t ic_device_id[ISL_MAX_IC_DEVICE_ID_LEN];
+    uint8_t ic_device_id_len;
+};
+
+OBJECT_DECLARE_SIMPLE_TYPE(ISLState, ISL69260)
+
+#define ISL_CAPABILITY_DEFAULT                 0x40
+#define ISL_OPERATION_DEFAULT                  0x80
+#define ISL_ON_OFF_CONFIG_DEFAULT              0x16
+#define ISL_VOUT_MODE_DEFAULT                  0x40
+#define ISL_VOUT_COMMAND_DEFAULT               0x0384
+#define ISL_VOUT_MAX_DEFAULT                   0x08FC
+#define ISL_VOUT_MARGIN_HIGH_DEFAULT           0x0640
+#define ISL_VOUT_MARGIN_LOW_DEFAULT            0xFA
+#define ISL_VOUT_TRANSITION_RATE_DEFAULT       0x64
+#define ISL_VOUT_OV_FAULT_LIMIT_DEFAULT        0x076C
+#define ISL_OT_FAULT_LIMIT_DEFAULT             0x7D
+#define ISL_OT_WARN_LIMIT_DEFAULT              0x07D0
+#define ISL_VIN_OV_WARN_LIMIT_DEFAULT          0x36B0
+#define ISL_VIN_UV_WARN_LIMIT_DEFAULT          0x1F40
+#define ISL_IIN_OC_FAULT_LIMIT_DEFAULT         0x32
+#define ISL_TON_DELAY_DEFAULT                  0x14
+#define ISL_TON_RISE_DEFAULT                   0x01F4
+#define ISL_TOFF_FALL_DEFAULT                  0x01F4
+#define ISL_REVISION_DEFAULT                   0x33
+#define ISL_READ_VOUT_DEFAULT                  1000
+#define ISL_READ_IOUT_DEFAULT                  40
+#define ISL_READ_POUT_DEFAULT                  4
+#define ISL_READ_TEMP_DEFAULT                  25
+#define ISL_READ_VIN_DEFAULT                   1100
+#define ISL_READ_IIN_DEFAULT                   40
+#define ISL_READ_PIN_DEFAULT                   4
+
+#endif
diff --git a/include/hw/sensor/tmp105.h b/include/hw/sensor/tmp105.h
new file mode 100644 (file)
index 0000000..244e298
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Texas Instruments TMP105 Temperature Sensor
+ *
+ * Browse the data sheet:
+ *
+ *    http://www.ti.com/lit/gpn/tmp105
+ *
+ * Copyright (C) 2012 Alex Horn <alex.horn@cs.ox.ac.uk>
+ * Copyright (C) 2008-2012 Andrzej Zaborowski <balrogg@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_TMP105_H
+#define QEMU_TMP105_H
+
+#include "hw/i2c/i2c.h"
+#include "hw/sensor/tmp105_regs.h"
+#include "qom/object.h"
+
+#define TYPE_TMP105 "tmp105"
+OBJECT_DECLARE_SIMPLE_TYPE(TMP105State, TMP105)
+
+/**
+ * TMP105State:
+ * @config: Bits 5 and 6 (value 32 and 64) determine the precision of the
+ * temperature. See Table 8 in the data sheet.
+ *
+ * @see_also: http://www.ti.com/lit/gpn/tmp105
+ */
+struct TMP105State {
+    /*< private >*/
+    I2CSlave i2c;
+    /*< public >*/
+
+    uint8_t len;
+    uint8_t buf[2];
+    qemu_irq pin;
+
+    uint8_t pointer;
+    uint8_t config;
+    int16_t temperature;
+    int16_t limit[2];
+    int faults;
+    uint8_t alarm;
+    /*
+     * The TMP105 initially looks for a temperature rising above T_high;
+     * once this is detected, the condition it looks for next is the
+     * temperature falling below T_low. This flag is false when initially
+     * looking for T_high, true when looking for T_low.
+     */
+    bool detect_falling;
+};
+
+#endif
diff --git a/include/hw/sensor/tmp105_regs.h b/include/hw/sensor/tmp105_regs.h
new file mode 100644 (file)
index 0000000..ef015ee
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Texas Instruments TMP105 Temperature Sensor I2C messages
+ *
+ * Browse the data sheet:
+ *
+ *    http://www.ti.com/lit/gpn/tmp105
+ *
+ * Copyright (C) 2012 Alex Horn <alex.horn@cs.ox.ac.uk>
+ * Copyright (C) 2008-2012 Andrzej Zaborowski <balrogg@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#ifndef TMP105_REGS_H
+#define TMP105_REGS_H
+
+/**
+ * TMP105Reg:
+ * @TMP105_REG_TEMPERATURE: Temperature register
+ * @TMP105_REG_CONFIG: Configuration register
+ * @TMP105_REG_T_LOW: Low temperature register (also known as T_hyst)
+ * @TMP105_REG_T_HIGH: High temperature register (also known as T_OS)
+ *
+ * The following temperature sensors are
+ * compatible with the TMP105 registers:
+ * - adt75
+ * - ds1775
+ * - ds75
+ * - lm75
+ * - lm75a
+ * - max6625
+ * - max6626
+ * - mcp980x
+ * - stds75
+ * - tcn75
+ * - tmp100
+ * - tmp101
+ * - tmp105
+ * - tmp175
+ * - tmp275
+ * - tmp75
+ **/
+typedef enum TMP105Reg {
+    TMP105_REG_TEMPERATURE = 0,
+    TMP105_REG_CONFIG,
+    TMP105_REG_T_LOW,
+    TMP105_REG_T_HIGH,
+} TMP105Reg;
+
+#endif
diff --git a/include/hw/sh4/sh.h b/include/hw/sh4/sh.h
new file mode 100644 (file)
index 0000000..ec716cd
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Definitions for SH board emulation
+ *
+ * Copyright (c) 2005 Samuel Tardieu
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef QEMU_HW_SH_H
+#define QEMU_HW_SH_H
+
+#include "hw/sh4/sh_intc.h"
+#include "target/sh4/cpu-qom.h"
+
+#define A7ADDR(x) ((x) & 0x1fffffff)
+#define P4ADDR(x) ((x) | 0xe0000000)
+
+/* sh7750.c */
+struct SH7750State;
+
+struct SH7750State *sh7750_init(SuperHCPU *cpu, MemoryRegion *sysmem);
+
+typedef struct {
+    /* The callback will be triggered if any of the designated lines change */
+    uint16_t portamask_trigger;
+    uint16_t portbmask_trigger;
+    /* Return 0 if no action was taken */
+    int (*port_change_cb) (uint16_t porta, uint16_t portb,
+                           uint16_t *periph_pdtra,
+                           uint16_t *periph_portdira,
+                           uint16_t *periph_pdtrb,
+                           uint16_t *periph_portdirb);
+} sh7750_io_device;
+
+int sh7750_register_io_device(struct SH7750State *s,
+                              sh7750_io_device *device);
+
+/* sh_serial.c */
+#define TYPE_SH_SERIAL "sh-serial"
+#define SH_SERIAL_FEAT_SCIF (1 << 0)
+
+/* sh7750.c */
+qemu_irq sh7750_irl(struct SH7750State *s);
+
+/* tc58128.c */
+int tc58128_init(struct SH7750State *s, const char *zone1, const char *zone2);
+
+#endif
diff --git a/include/hw/sh4/sh_intc.h b/include/hw/sh4/sh_intc.h
new file mode 100644 (file)
index 0000000..f62d5c5
--- /dev/null
@@ -0,0 +1,81 @@
+#ifndef SH_INTC_H
+#define SH_INTC_H
+
+#include "exec/memory.h"
+
+typedef unsigned char intc_enum;
+
+struct intc_vect {
+    intc_enum enum_id;
+    unsigned short vect;
+};
+
+#define INTC_VECT(enum_id, vect) { enum_id, vect }
+
+struct intc_group {
+    intc_enum enum_id;
+    intc_enum enum_ids[32];
+};
+
+#define INTC_GROUP(enum_id, ...) { enum_id, {  __VA_ARGS__ } }
+
+struct intc_mask_reg {
+    unsigned long set_reg, clr_reg, reg_width;
+    intc_enum enum_ids[32];
+    unsigned long value;
+};
+
+struct intc_prio_reg {
+    unsigned long set_reg, clr_reg, reg_width, field_width;
+    intc_enum enum_ids[16];
+    unsigned long value;
+};
+
+#define _INTC_ARRAY(a) a, ARRAY_SIZE(a)
+
+struct intc_source {
+    unsigned short vect;
+    intc_enum next_enum_id;
+
+    int asserted; /* emulates the interrupt signal line from device to intc */
+    int enable_count;
+    int enable_max;
+    int pending; /* emulates the result of signal and masking */
+    struct intc_desc *parent;
+};
+
+struct intc_desc {
+    MemoryRegion iomem;
+    MemoryRegion *iomem_aliases;
+    qemu_irq *irqs;
+    struct intc_source *sources;
+    int nr_sources;
+    struct intc_mask_reg *mask_regs;
+    int nr_mask_regs;
+    struct intc_prio_reg *prio_regs;
+    int nr_prio_regs;
+    int pending; /* number of interrupt sources that has pending set */
+};
+
+int sh_intc_get_pending_vector(struct intc_desc *desc, int imask);
+
+void sh_intc_toggle_source(struct intc_source *source,
+                           int enable_adj, int assert_adj);
+
+void sh_intc_register_sources(struct intc_desc *desc,
+                              struct intc_vect *vectors,
+                              int nr_vectors,
+                              struct intc_group *groups,
+                              int nr_groups);
+
+int sh_intc_init(MemoryRegion *sysmem,
+                 struct intc_desc *desc,
+                 int nr_sources,
+                 struct intc_mask_reg *mask_regs,
+                 int nr_mask_regs,
+                 struct intc_prio_reg *prio_regs,
+                 int nr_prio_regs);
+
+void sh_intc_set_irl(void *opaque, int n, int level);
+
+#endif /* SH_INTC_H */
diff --git a/include/hw/southbridge/ich9.h b/include/hw/southbridge/ich9.h
new file mode 100644 (file)
index 0000000..fd01649
--- /dev/null
@@ -0,0 +1,247 @@
+#ifndef HW_SOUTHBRIDGE_ICH9_H
+#define HW_SOUTHBRIDGE_ICH9_H
+
+#include "hw/isa/apm.h"
+#include "hw/acpi/ich9.h"
+#include "hw/intc/ioapic.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_device.h"
+#include "hw/rtc/mc146818rtc.h"
+#include "exec/memory.h"
+#include "qemu/notify.h"
+#include "qom/object.h"
+
+void ich9_generate_smi(void);
+
+#define ICH9_CC_SIZE (16 * 1024) /* 16KB. Chipset configuration registers */
+
+#define TYPE_ICH9_LPC_DEVICE "ICH9-LPC"
+OBJECT_DECLARE_SIMPLE_TYPE(ICH9LPCState, ICH9_LPC_DEVICE)
+
+struct ICH9LPCState {
+    /* ICH9 LPC PCI to ISA bridge */
+    PCIDevice d;
+
+    /* (pci device, intx) -> pirq
+     * In real chipset case, the unused slots are never used
+     * as ICH9 supports only D25-D31 irq routing.
+     * On the other hand in qemu case, any slot/function can be populated
+     * via command line option.
+     * So fallback interrupt routing for any devices in any slots is necessary.
+    */
+    uint8_t irr[PCI_SLOT_MAX][PCI_NUM_PINS];
+
+    MC146818RtcState rtc;
+    APMState apm;
+    ICH9LPCPMRegs pm;
+    uint32_t sci_level; /* track sci level */
+    uint8_t sci_gsi;
+
+    /* 2.24 Pin Straps */
+    struct {
+        bool spkr_hi;
+    } pin_strap;
+
+    /* 10.1 Chipset Configuration registers(Memory Space)
+     which is pointed by RCBA */
+    uint8_t chip_config[ICH9_CC_SIZE];
+
+    /*
+     * 13.7.5 RST_CNT---Reset Control Register (LPC I/F---D31:F0)
+     *
+     * register contents and IO memory region
+     */
+    uint8_t rst_cnt;
+    MemoryRegion rst_cnt_mem;
+
+    /* SMI feature negotiation via fw_cfg */
+    uint64_t smi_host_features;       /* guest-invisible, host endian */
+    uint8_t smi_host_features_le[8];  /* guest-visible, read-only, little
+                                       * endian uint64_t */
+    uint8_t smi_guest_features_le[8]; /* guest-visible, read-write, little
+                                       * endian uint64_t */
+    uint8_t smi_features_ok;          /* guest-visible, read-only; selecting it
+                                       * triggers feature lockdown */
+    uint64_t smi_negotiated_features; /* guest-invisible, host endian */
+
+    MemoryRegion rcrb_mem; /* root complex register block */
+    Notifier machine_ready;
+
+    qemu_irq gsi[IOAPIC_NUM_PINS];
+};
+
+#define ICH9_MASK(bit, ms_bit, ls_bit) \
+((uint##bit##_t)(((1ULL << ((ms_bit) + 1)) - 1) & ~((1ULL << ls_bit) - 1)))
+
+/* ICH9: Chipset Configuration Registers */
+#define ICH9_CC_ADDR_MASK                       (ICH9_CC_SIZE - 1)
+
+#define ICH9_CC
+#define ICH9_CC_D28IP                           0x310C
+#define ICH9_CC_D28IP_SHIFT                     4
+#define ICH9_CC_D28IP_MASK                      0xf
+#define ICH9_CC_D28IP_DEFAULT                   0x00214321
+#define ICH9_CC_D31IR                           0x3140
+#define ICH9_CC_D30IR                           0x3142
+#define ICH9_CC_D29IR                           0x3144
+#define ICH9_CC_D28IR                           0x3146
+#define ICH9_CC_D27IR                           0x3148
+#define ICH9_CC_D26IR                           0x314C
+#define ICH9_CC_D25IR                           0x3150
+#define ICH9_CC_DIR_DEFAULT                     0x3210
+#define ICH9_CC_D30IR_DEFAULT                   0x0
+#define ICH9_CC_DIR_SHIFT                       4
+#define ICH9_CC_DIR_MASK                        0x7
+#define ICH9_CC_OIC                             0x31FF
+#define ICH9_CC_OIC_AEN                         0x1
+#define ICH9_CC_GCS                             0x3410
+#define ICH9_CC_GCS_DEFAULT                     0x00000020
+#define ICH9_CC_GCS_NO_REBOOT                   (1 << 5)
+
+/* D28:F[0-5] */
+#define ICH9_PCIE_DEV                           28
+#define ICH9_PCIE_FUNC_MAX                      6
+
+
+/* D29:F0 USB UHCI Controller #1 */
+#define ICH9_USB_UHCI1_DEV                      29
+#define ICH9_USB_UHCI1_FUNC                     0
+
+/* D30:F0 DMI-to-PCI bridge */
+#define ICH9_D2P_BRIDGE                         "ICH9 D2P BRIDGE"
+#define ICH9_D2P_BRIDGE_SAVEVM_VERSION          0
+
+#define ICH9_D2P_BRIDGE_DEV                     30
+#define ICH9_D2P_BRIDGE_FUNC                    0
+
+#define ICH9_D2P_SECONDARY_DEFAULT              (256 - 8)
+
+#define ICH9_D2P_A2_REVISION                    0x92
+
+/* D31:F0 LPC Processor Interface */
+#define ICH9_RST_CNT_IOPORT                     0xCF9
+
+/* D31:F1 LPC controller */
+#define ICH9_A2_LPC                             "ICH9 A2 LPC"
+#define ICH9_A2_LPC_SAVEVM_VERSION              0
+
+#define ICH9_LPC_DEV                            31
+#define ICH9_LPC_FUNC                           0
+
+#define ICH9_A2_LPC_REVISION                    0x2
+#define ICH9_LPC_NB_PIRQS                       8       /* PCI A-H */
+
+#define ICH9_LPC_PMBASE                         0x40
+#define ICH9_LPC_PMBASE_BASE_ADDRESS_MASK       ICH9_MASK(32, 15, 7)
+#define ICH9_LPC_PMBASE_RTE                     0x1
+#define ICH9_LPC_PMBASE_DEFAULT                 0x1
+
+#define ICH9_LPC_ACPI_CTRL                      0x44
+#define ICH9_LPC_ACPI_CTRL_ACPI_EN              0x80
+#define ICH9_LPC_ACPI_CTRL_SCI_IRQ_SEL_MASK     ICH9_MASK(8, 2, 0)
+#define ICH9_LPC_ACPI_CTRL_9                    0x0
+#define ICH9_LPC_ACPI_CTRL_10                   0x1
+#define ICH9_LPC_ACPI_CTRL_11                   0x2
+#define ICH9_LPC_ACPI_CTRL_20                   0x4
+#define ICH9_LPC_ACPI_CTRL_21                   0x5
+#define ICH9_LPC_ACPI_CTRL_DEFAULT              0x0
+
+#define ICH9_LPC_PIRQA_ROUT                     0x60
+#define ICH9_LPC_PIRQB_ROUT                     0x61
+#define ICH9_LPC_PIRQC_ROUT                     0x62
+#define ICH9_LPC_PIRQD_ROUT                     0x63
+
+#define ICH9_LPC_PIRQE_ROUT                     0x68
+#define ICH9_LPC_PIRQF_ROUT                     0x69
+#define ICH9_LPC_PIRQG_ROUT                     0x6a
+#define ICH9_LPC_PIRQH_ROUT                     0x6b
+
+#define ICH9_LPC_PIRQ_ROUT_IRQEN                0x80
+#define ICH9_LPC_PIRQ_ROUT_MASK                 ICH9_MASK(8, 3, 0)
+#define ICH9_LPC_PIRQ_ROUT_DEFAULT              0x80
+
+#define ICH9_LPC_GEN_PMCON_1                    0xa0
+#define ICH9_LPC_GEN_PMCON_1_SMI_LOCK           (1 << 4)
+#define ICH9_LPC_GEN_PMCON_2                    0xa2
+#define ICH9_LPC_GEN_PMCON_3                    0xa4
+#define ICH9_LPC_GEN_PMCON_LOCK                 0xa6
+
+#define ICH9_LPC_RCBA                           0xf0
+#define ICH9_LPC_RCBA_BA_MASK                   ICH9_MASK(32, 31, 14)
+#define ICH9_LPC_RCBA_EN                        0x1
+#define ICH9_LPC_RCBA_DEFAULT                   0x0
+
+#define ICH9_LPC_PIC_NUM_PINS                   16
+#define ICH9_LPC_IOAPIC_NUM_PINS                24
+
+#define ICH9_GPIO_GSI "gsi"
+
+/* D31:F2 SATA Controller #1 */
+#define ICH9_SATA1_DEV                          31
+#define ICH9_SATA1_FUNC                         2
+
+/* D31:F0 power management I/O registers
+   offset from the address ICH9_LPC_PMBASE */
+
+/* ICH9 LPC PM I/O registers are 128 ports and 128-aligned */
+#define ICH9_PMIO_SIZE                          128
+#define ICH9_PMIO_MASK                          (ICH9_PMIO_SIZE - 1)
+
+#define ICH9_PMIO_PM1_STS                       0x00
+#define ICH9_PMIO_PM1_EN                        0x02
+#define ICH9_PMIO_PM1_CNT                       0x04
+#define ICH9_PMIO_PM1_TMR                       0x08
+#define ICH9_PMIO_GPE0_STS                      0x20
+#define ICH9_PMIO_GPE0_EN                       0x28
+#define ICH9_PMIO_GPE0_LEN                      16
+#define ICH9_PMIO_SMI_EN                        0x30
+#define ICH9_PMIO_SMI_EN_APMC_EN                (1 << 5)
+#define ICH9_PMIO_SMI_EN_TCO_EN                 (1 << 13)
+#define ICH9_PMIO_SMI_STS                       0x34
+#define ICH9_PMIO_TCO_RLD                       0x60
+#define ICH9_PMIO_TCO_LEN                       32
+
+/* FADT ACPI_ENABLE/ACPI_DISABLE */
+#define ICH9_APM_ACPI_ENABLE                    0x2
+#define ICH9_APM_ACPI_DISABLE                   0x3
+
+
+/* D31:F3 SMBus controller */
+#define TYPE_ICH9_SMB_DEVICE "ICH9-SMB"
+
+#define ICH9_A2_SMB_REVISION                    0x02
+#define ICH9_SMB_PI                             0x00
+
+#define ICH9_SMB_SMBMBAR0                       0x10
+#define ICH9_SMB_SMBMBAR1                       0x14
+#define ICH9_SMB_SMBM_BAR                       0
+#define ICH9_SMB_SMBM_SIZE                      (1 << 8)
+#define ICH9_SMB_SMB_BASE                       0x20
+#define ICH9_SMB_SMB_BASE_BAR                   4
+#define ICH9_SMB_SMB_BASE_SIZE                  (1 << 5)
+#define ICH9_SMB_HOSTC                          0x40
+#define ICH9_SMB_HOSTC_SSRESET                  ((uint8_t)(1 << 3))
+#define ICH9_SMB_HOSTC_I2C_EN                   ((uint8_t)(1 << 2))
+#define ICH9_SMB_HOSTC_SMB_SMI_EN               ((uint8_t)(1 << 1))
+#define ICH9_SMB_HOSTC_HST_EN                   ((uint8_t)(1 << 0))
+
+/* D31:F3 SMBus I/O and memory mapped I/O registers */
+#define ICH9_SMB_DEV                            31
+#define ICH9_SMB_FUNC                           3
+
+#define ICH9_SMB_HST_STS                        0x00
+#define ICH9_SMB_HST_CNT                        0x02
+#define ICH9_SMB_HST_CMD                        0x03
+#define ICH9_SMB_XMIT_SLVA                      0x04
+#define ICH9_SMB_HST_D0                         0x05
+#define ICH9_SMB_HST_D1                         0x06
+#define ICH9_SMB_HOST_BLOCK_DB                  0x07
+
+#define ICH9_LPC_SMI_NEGOTIATED_FEAT_PROP "x-smi-negotiated-features"
+
+/* bit positions used in fw_cfg SMI feature negotiation */
+#define ICH9_LPC_SMI_F_BROADCAST_BIT            0
+#define ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT          1
+#define ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT       2
+
+#endif /* HW_SOUTHBRIDGE_ICH9_H */
diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h
new file mode 100644 (file)
index 0000000..86709ba
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * QEMU PIIX South Bridge Emulation
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ * Copyright (c) 2018 Hervé Poussineau
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_SOUTHBRIDGE_PIIX_H
+#define HW_SOUTHBRIDGE_PIIX_H
+
+#include "hw/pci/pci_device.h"
+#include "hw/acpi/piix4.h"
+#include "hw/ide/pci.h"
+#include "hw/rtc/mc146818rtc.h"
+#include "hw/usb/hcd-uhci.h"
+
+/* PIRQRC[A:D]: PIRQx Route Control Registers */
+#define PIIX_PIRQCA 0x60
+#define PIIX_PIRQCB 0x61
+#define PIIX_PIRQCC 0x62
+#define PIIX_PIRQCD 0x63
+
+/*
+ * Reset Control Register: PCI-accessible ISA-Compatible Register at address
+ * 0xcf9, provided by the PCI/ISA bridge (PIIX3 PCI function 0, 8086:7000).
+ */
+#define PIIX_RCR_IOPORT 0xcf9
+
+#define PIIX_NUM_PIRQS          4ULL    /* PIRQ[A-D] */
+
+struct PIIXState {
+    PCIDevice dev;
+
+    /*
+     * bitmap to track pic levels.
+     * The pic level is the logical OR of all the PCI irqs mapped to it
+     * So one PIC level is tracked by PIIX_NUM_PIRQS bits.
+     *
+     * PIRQ is mapped to PIC pins, we track it by
+     * PIIX_NUM_PIRQS * ISA_NUM_IRQS = 64 bits with
+     * pic_irq * PIIX_NUM_PIRQS + pirq
+     */
+#if ISA_NUM_IRQS * PIIX_NUM_PIRQS > 64
+#error "unable to encode pic state in 64bit in pic_levels."
+#endif
+    uint64_t pic_levels;
+
+    qemu_irq cpu_intr;
+    qemu_irq isa_irqs_in[ISA_NUM_IRQS];
+
+    /* This member isn't used. Just for save/load compatibility */
+    int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS];
+
+    MC146818RtcState rtc;
+    PCIIDEState ide;
+    UHCIState uhci;
+    PIIX4PMState pm;
+
+    uint32_t smb_io_base;
+
+    /* Reset Control Register contents */
+    uint8_t rcr;
+
+    /* IO memory region for Reset Control Register (PIIX_RCR_IOPORT) */
+    MemoryRegion rcr_mem;
+
+    bool has_acpi;
+    bool has_pic;
+    bool has_pit;
+    bool has_usb;
+    bool smm_enabled;
+};
+
+#define TYPE_PIIX_PCI_DEVICE "pci-piix"
+OBJECT_DECLARE_SIMPLE_TYPE(PIIXState, PIIX_PCI_DEVICE)
+
+#define TYPE_PIIX3_DEVICE "PIIX3"
+#define TYPE_PIIX4_PCI_DEVICE "piix4-isa"
+
+#endif
diff --git a/include/hw/sparc/grlib.h b/include/hw/sparc/grlib.h
new file mode 100644 (file)
index 0000000..ef1946c
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * QEMU GRLIB Components
+ *
+ * Copyright (c) 2010-2019 AdaCore
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef GRLIB_H
+#define GRLIB_H
+
+#include "hw/sysbus.h"
+
+/* Emulation of GrLib device is base on the GRLIB IP Core User's Manual:
+ * http://www.gaisler.com/products/grlib/grip.pdf
+ */
+
+/* IRQMP */
+#define TYPE_GRLIB_IRQMP "grlib-irqmp"
+
+void grlib_irqmp_ack(DeviceState *dev, int intno);
+
+/* GPTimer */
+#define TYPE_GRLIB_GPTIMER "grlib-gptimer"
+
+/* APB UART */
+#define TYPE_GRLIB_APB_UART "grlib-apbuart"
+
+#endif /* GRLIB_H */
diff --git a/include/hw/sparc/sparc32_dma.h b/include/hw/sparc/sparc32_dma.h
new file mode 100644 (file)
index 0000000..cde8ec0
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef SPARC32_DMA_H
+#define SPARC32_DMA_H
+
+#include "hw/sysbus.h"
+#include "hw/scsi/esp.h"
+#include "hw/net/lance.h"
+#include "qom/object.h"
+
+#define DMA_REGS 4
+
+#define TYPE_SPARC32_DMA_DEVICE "sparc32-dma-device"
+OBJECT_DECLARE_SIMPLE_TYPE(DMADeviceState, SPARC32_DMA_DEVICE)
+
+
+struct DMADeviceState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint32_t dmaregs[DMA_REGS];
+    qemu_irq irq;
+    void *iommu;
+    qemu_irq gpio[2];
+};
+
+#define TYPE_SPARC32_ESPDMA_DEVICE "sparc32-espdma"
+OBJECT_DECLARE_SIMPLE_TYPE(ESPDMADeviceState, SPARC32_ESPDMA_DEVICE)
+
+struct ESPDMADeviceState {
+    DMADeviceState parent_obj;
+
+    SysBusESPState esp;
+};
+
+#define TYPE_SPARC32_LEDMA_DEVICE "sparc32-ledma"
+OBJECT_DECLARE_SIMPLE_TYPE(LEDMADeviceState, SPARC32_LEDMA_DEVICE)
+
+struct LEDMADeviceState {
+    DMADeviceState parent_obj;
+
+    SysBusPCNetState lance;
+};
+
+#define TYPE_SPARC32_DMA "sparc32-dma"
+OBJECT_DECLARE_SIMPLE_TYPE(SPARC32DMAState, SPARC32_DMA)
+
+struct SPARC32DMAState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion dmamem;
+    MemoryRegion ledma_alias;
+    ESPDMADeviceState espdma;
+    LEDMADeviceState ledma;
+};
+
+/* sparc32_dma.c */
+void ledma_memory_read(void *opaque, hwaddr addr,
+                       uint8_t *buf, int len, int do_bswap);
+void ledma_memory_write(void *opaque, hwaddr addr,
+                        uint8_t *buf, int len, int do_bswap);
+void espdma_memory_read(void *opaque, uint8_t *buf, int len);
+void espdma_memory_write(void *opaque, uint8_t *buf, int len);
+
+#endif
diff --git a/include/hw/sparc/sparc64.h b/include/hw/sparc/sparc64.h
new file mode 100644 (file)
index 0000000..4ced36f
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef HW_SPARC_SPARC64_H
+#define HW_SPARC_SPARC64_H
+
+#include "target/sparc/cpu-qom.h"
+
+#define IVEC_MAX             0x40
+
+SPARCCPU *sparc64_cpu_devinit(const char *cpu_type, uint64_t prom_addr);
+
+void sparc64_cpu_set_ivec_irq(void *opaque, int irq, int level);
+
+#endif
diff --git a/include/hw/sparc/sun4m_iommu.h b/include/hw/sparc/sun4m_iommu.h
new file mode 100644 (file)
index 0000000..4e2ab34
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * QEMU Sun4m iommu emulation
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SUN4M_IOMMU_H
+#define SUN4M_IOMMU_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define IOMMU_NREGS         (4 * 4096 / 4)
+
+struct IOMMUState {
+    SysBusDevice parent_obj;
+
+    AddressSpace iommu_as;
+    IOMMUMemoryRegion iommu;
+
+    MemoryRegion iomem;
+    uint32_t regs[IOMMU_NREGS];
+    hwaddr iostart;
+    qemu_irq irq;
+    uint32_t version;
+};
+typedef struct IOMMUState IOMMUState;
+
+#define TYPE_SUN4M_IOMMU "sun4m-iommu"
+DECLARE_INSTANCE_CHECKER(IOMMUState, SUN4M_IOMMU,
+                         TYPE_SUN4M_IOMMU)
+
+#define TYPE_SUN4M_IOMMU_MEMORY_REGION "sun4m-iommu-memory-region"
+
+#endif
diff --git a/include/hw/sparc/sun4u_iommu.h b/include/hw/sparc/sun4u_iommu.h
new file mode 100644 (file)
index 0000000..f94566a
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * QEMU sun4u IOMMU emulation
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ * Copyright (c) 2012,2013 Artyom Tarasenko
+ * Copyright (c) 2017 Mark Cave-Ayland
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SUN4U_IOMMU_H
+#define SUN4U_IOMMU_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define IOMMU_NREGS             3
+
+struct IOMMUState {
+    SysBusDevice parent_obj;
+
+    AddressSpace iommu_as;
+    IOMMUMemoryRegion iommu;
+
+    MemoryRegion iomem;
+    uint64_t regs[IOMMU_NREGS];
+};
+typedef struct IOMMUState IOMMUState;
+
+#define TYPE_SUN4U_IOMMU "sun4u-iommu"
+DECLARE_INSTANCE_CHECKER(IOMMUState, SUN4U_IOMMU,
+                         TYPE_SUN4U_IOMMU)
+
+#define TYPE_SUN4U_IOMMU_MEMORY_REGION "sun4u-iommu-memory-region"
+
+#endif
diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h
new file mode 100644 (file)
index 0000000..8e1dda5
--- /dev/null
@@ -0,0 +1,118 @@
+/*
+ * ASPEED AST2400 SMC Controller (SPI Flash Only)
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef ASPEED_SMC_H
+#define ASPEED_SMC_H
+
+#include "hw/ssi/ssi.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+struct AspeedSMCState;
+struct AspeedSMCClass;
+
+#define TYPE_ASPEED_SMC_FLASH "aspeed.smc.flash"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedSMCFlash, ASPEED_SMC_FLASH)
+struct AspeedSMCFlash {
+    SysBusDevice parent_obj;
+
+    struct AspeedSMCState *controller;
+    struct AspeedSMCClass *asc;
+    uint8_t cs;
+
+    MemoryRegion mmio;
+};
+
+#define TYPE_ASPEED_SMC "aspeed.smc"
+OBJECT_DECLARE_TYPE(AspeedSMCState, AspeedSMCClass, ASPEED_SMC)
+
+#define ASPEED_SMC_R_MAX        (0x100 / 4)
+#define ASPEED_SMC_CS_MAX       5
+
+struct AspeedSMCState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mmio;
+    MemoryRegion mmio_flash_container;
+    MemoryRegion mmio_flash;
+
+    qemu_irq irq;
+
+    qemu_irq *cs_lines;
+    bool inject_failure;
+
+    SSIBus *spi;
+
+    uint32_t regs[ASPEED_SMC_R_MAX];
+
+    /* depends on the controller type */
+    uint8_t r_conf;
+    uint8_t r_ce_ctrl;
+    uint8_t r_ctrl0;
+    uint8_t r_timings;
+    uint8_t conf_enable_w0;
+
+    AddressSpace flash_as;
+    MemoryRegion *dram_mr;
+    AddressSpace dram_as;
+
+    AspeedSMCFlash flashes[ASPEED_SMC_CS_MAX];
+
+    uint8_t snoop_index;
+    uint8_t snoop_dummies;
+};
+
+typedef struct AspeedSegments {
+    hwaddr addr;
+    uint32_t size;
+} AspeedSegments;
+
+struct AspeedSMCClass {
+    SysBusDeviceClass parent_obj;
+
+    uint8_t r_conf;
+    uint8_t r_ce_ctrl;
+    uint8_t r_ctrl0;
+    uint8_t r_timings;
+    uint8_t nregs_timings;
+    uint8_t conf_enable_w0;
+    uint8_t cs_num_max;
+    const uint32_t *resets;
+    const AspeedSegments *segments;
+    uint32_t segment_addr_mask;
+    hwaddr flash_window_base;
+    uint32_t flash_window_size;
+    uint32_t features;
+    hwaddr dma_flash_mask;
+    hwaddr dma_dram_mask;
+    uint32_t nregs;
+    uint32_t (*segment_to_reg)(const AspeedSMCState *s,
+                               const AspeedSegments *seg);
+    void (*reg_to_segment)(const AspeedSMCState *s, uint32_t reg,
+                           AspeedSegments *seg);
+    void (*dma_ctrl)(AspeedSMCState *s, uint32_t value);
+    int (*addr_width)(const AspeedSMCState *s);
+};
+
+#endif /* ASPEED_SMC_H */
diff --git a/include/hw/ssi/ibex_spi_host.h b/include/hw/ssi/ibex_spi_host.h
new file mode 100644 (file)
index 0000000..5bd5557
--- /dev/null
@@ -0,0 +1,92 @@
+
+/*
+ * QEMU model of the Ibex SPI Controller
+ * SPEC Reference: https://docs.opentitan.org/hw/ip/spi_host/doc/
+ *
+ * Copyright (C) 2022 Western Digital
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef IBEX_SPI_HOST_H
+#define IBEX_SPI_HOST_H
+
+#include "hw/sysbus.h"
+#include "hw/ssi/ssi.h"
+#include "qemu/fifo8.h"
+#include "qom/object.h"
+#include "qemu/timer.h"
+
+#define TYPE_IBEX_SPI_HOST "ibex-spi"
+#define IBEX_SPI_HOST(obj) \
+    OBJECT_CHECK(IbexSPIHostState, (obj), TYPE_IBEX_SPI_HOST)
+
+/* SPI Registers */
+#define IBEX_SPI_HOST_INTR_STATE         (0x00 / 4)  /* rw1c */
+#define IBEX_SPI_HOST_INTR_ENABLE        (0x04 / 4)  /* rw */
+#define IBEX_SPI_HOST_INTR_TEST          (0x08 / 4)  /* wo */
+#define IBEX_SPI_HOST_ALERT_TEST         (0x0c / 4)  /* wo */
+#define IBEX_SPI_HOST_CONTROL            (0x10 / 4)  /* rw */
+#define IBEX_SPI_HOST_STATUS             (0x14 / 4)  /* ro */
+#define IBEX_SPI_HOST_CONFIGOPTS         (0x18 / 4)  /* rw */
+#define IBEX_SPI_HOST_CSID               (0x1c / 4)  /* rw */
+#define IBEX_SPI_HOST_COMMAND            (0x20 / 4)  /* wo */
+/* RX/TX Modelled by FIFO */
+#define IBEX_SPI_HOST_RXDATA             (0x24 / 4)
+#define IBEX_SPI_HOST_TXDATA             (0x28 / 4)
+
+#define IBEX_SPI_HOST_ERROR_ENABLE       (0x2c / 4)  /* rw */
+#define IBEX_SPI_HOST_ERROR_STATUS       (0x30 / 4)  /* rw1c */
+#define IBEX_SPI_HOST_EVENT_ENABLE       (0x34 / 4)  /* rw */
+
+/* FIFO Len in Bytes */
+#define IBEX_SPI_HOST_TXFIFO_LEN         288
+#define IBEX_SPI_HOST_RXFIFO_LEN         256
+
+/*  Max Register (Based on addr) */
+#define IBEX_SPI_HOST_MAX_REGS           (IBEX_SPI_HOST_EVENT_ENABLE + 1)
+
+/* MISC */
+#define TX_INTERRUPT_TRIGGER_DELAY_NS    100
+#define BIDIRECTIONAL_TRANSFER           3
+
+typedef struct {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+    uint32_t regs[IBEX_SPI_HOST_MAX_REGS];
+    /* Multi-reg that sets config opts per CS */
+    uint32_t *config_opts;
+    Fifo8 rx_fifo;
+    Fifo8 tx_fifo;
+    QEMUTimer *fifo_trigger_handle;
+
+    qemu_irq event;
+    qemu_irq host_err;
+    uint32_t num_cs;
+    qemu_irq *cs_lines;
+    SSIBus *ssi;
+
+    /* Used to track the init status, for replicating TXDATA ghost writes */
+    bool init_status;
+} IbexSPIHostState;
+
+#endif
diff --git a/include/hw/ssi/imx_spi.h b/include/hw/ssi/imx_spi.h
new file mode 100644 (file)
index 0000000..eeaf49b
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * IMX SPI Controller
+ *
+ * Copyright 2016 Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef IMX_SPI_H
+#define IMX_SPI_H
+
+#include "hw/sysbus.h"
+#include "hw/ssi/ssi.h"
+#include "qemu/bitops.h"
+#include "qemu/fifo32.h"
+#include "qom/object.h"
+
+#define ECSPI_FIFO_SIZE 64
+
+#define ECSPI_RXDATA 0
+#define ECSPI_TXDATA 1
+#define ECSPI_CONREG 2
+#define ECSPI_CONFIGREG 3
+#define ECSPI_INTREG 4
+#define ECSPI_DMAREG 5
+#define ECSPI_STATREG 6
+#define ECSPI_PERIODREG 7
+#define ECSPI_TESTREG 8
+#define ECSPI_MSGDATA 16
+#define ECSPI_MAX 17
+
+/* ECSPI_CONREG */
+#define ECSPI_CONREG_EN (1 << 0)
+#define ECSPI_CONREG_HT (1 << 1)
+#define ECSPI_CONREG_XCH (1 << 2)
+#define ECSPI_CONREG_SMC (1 << 3)
+#define ECSPI_CONREG_CHANNEL_MODE_SHIFT 4
+#define ECSPI_CONREG_CHANNEL_MODE_LENGTH 4
+#define ECSPI_CONREG_DRCTL_SHIFT 16
+#define ECSPI_CONREG_DRCTL_LENGTH 2
+#define ECSPI_CONREG_CHANNEL_SELECT_SHIFT 18
+#define ECSPI_CONREG_CHANNEL_SELECT_LENGTH 2
+#define ECSPI_CONREG_BURST_LENGTH_SHIFT 20
+#define ECSPI_CONREG_BURST_LENGTH_LENGTH 12
+
+/* ECSPI_CONFIGREG */
+#define ECSPI_CONFIGREG_SS_CTL_SHIFT 8
+#define ECSPI_CONFIGREG_SS_CTL_LENGTH 4
+
+/* ECSPI_INTREG */
+#define ECSPI_INTREG_TEEN (1 << 0)
+#define ECSPI_INTREG_TDREN (1 << 1)
+#define ECSPI_INTREG_TFEN (1 << 2)
+#define ECSPI_INTREG_RREN (1 << 3)
+#define ECSPI_INTREG_RDREN (1 << 4)
+#define ECSPI_INTREG_RFEN (1 << 5)
+#define ECSPI_INTREG_ROEN (1 << 6)
+#define ECSPI_INTREG_TCEN (1 << 7)
+
+/* ECSPI_DMAREG */
+#define ECSPI_DMAREG_RXTDEN (1 << 31)
+#define ECSPI_DMAREG_RXDEN (1 << 23)
+#define ECSPI_DMAREG_TEDEN (1 << 7)
+#define ECSPI_DMAREG_RX_THRESHOLD_SHIFT 16
+#define ECSPI_DMAREG_RX_THRESHOLD_LENGTH 6
+
+/* ECSPI_STATREG */
+#define ECSPI_STATREG_TE (1 << 0)
+#define ECSPI_STATREG_TDR (1 << 1)
+#define ECSPI_STATREG_TF (1 << 2)
+#define ECSPI_STATREG_RR (1 << 3)
+#define ECSPI_STATREG_RDR (1 << 4)
+#define ECSPI_STATREG_RF (1 << 5)
+#define ECSPI_STATREG_RO (1 << 6)
+#define ECSPI_STATREG_TC (1 << 7)
+
+#define EXTRACT(value, name) extract32(value, name##_SHIFT, name##_LENGTH)
+
+/* number of chip selects supported */
+#define ECSPI_NUM_CS 4
+
+#define TYPE_IMX_SPI "imx.spi"
+OBJECT_DECLARE_SIMPLE_TYPE(IMXSPIState, IMX_SPI)
+
+struct IMXSPIState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+
+    qemu_irq irq;
+
+    qemu_irq cs_lines[ECSPI_NUM_CS];
+
+    SSIBus *bus;
+
+    uint32_t regs[ECSPI_MAX];
+
+    Fifo32 rx_fifo;
+    Fifo32 tx_fifo;
+
+    int16_t burst_length;
+};
+
+#endif /* IMX_SPI_H */
diff --git a/include/hw/ssi/mss-spi.h b/include/hw/ssi/mss-spi.h
new file mode 100644 (file)
index 0000000..ce6279c
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Microsemi SmartFusion2 SPI
+ *
+ * Copyright (c) 2017 Subbaraya Sundeep <sundeep.lkml@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_MSS_SPI_H
+#define HW_MSS_SPI_H
+
+#include "hw/sysbus.h"
+#include "hw/ssi/ssi.h"
+#include "qemu/fifo32.h"
+#include "qom/object.h"
+
+#define TYPE_MSS_SPI   "mss-spi"
+OBJECT_DECLARE_SIMPLE_TYPE(MSSSpiState, MSS_SPI)
+
+#define R_SPI_MAX             16
+
+struct MSSSpiState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mmio;
+
+    qemu_irq irq;
+
+    qemu_irq cs_line;
+
+    SSIBus *spi;
+
+    Fifo32 rx_fifo;
+    Fifo32 tx_fifo;
+
+    int fifo_depth;
+    uint32_t frame_count;
+    bool enabled;
+
+    uint32_t regs[R_SPI_MAX];
+};
+
+#endif /* HW_MSS_SPI_H */
diff --git a/include/hw/ssi/npcm7xx_fiu.h b/include/hw/ssi/npcm7xx_fiu.h
new file mode 100644 (file)
index 0000000..a3a1704
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Nuvoton NPCM7xx Flash Interface Unit (FIU)
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM7XX_FIU_H
+#define NPCM7XX_FIU_H
+
+#include "hw/ssi/ssi.h"
+#include "hw/sysbus.h"
+
+/*
+ * Number of registers in our device state structure. Don't change this without
+ * incrementing the version_id in the vmstate.
+ */
+#define NPCM7XX_FIU_NR_REGS (0x7c / sizeof(uint32_t))
+
+typedef struct NPCM7xxFIUState NPCM7xxFIUState;
+
+/**
+ * struct NPCM7xxFIUFlash - Per-chipselect flash controller state.
+ * @direct_access: Memory region for direct flash access.
+ * @fiu: Pointer to flash controller shared state.
+ */
+typedef struct NPCM7xxFIUFlash {
+    MemoryRegion direct_access;
+    NPCM7xxFIUState *fiu;
+} NPCM7xxFIUFlash;
+
+/**
+ * NPCM7xxFIUState - Device state for one Flash Interface Unit.
+ * @parent: System bus device.
+ * @mmio: Memory region for register access.
+ * @cs_count: Number of flash chips that may be connected to this module.
+ * @active_cs: Currently active chip select, or -1 if no chip is selected.
+ * @cs_lines: GPIO lines that may be wired to flash chips.
+ * @flash: Array of @cs_count per-flash-chip state objects.
+ * @spi: The SPI bus mastered by this controller.
+ * @regs: Register contents.
+ *
+ * Each FIU has a shared bank of registers, and controls up to four chip
+ * selects. Each chip select has a dedicated memory region which may be used to
+ * read and write the flash connected to that chip select as if it were memory.
+ */
+struct NPCM7xxFIUState {
+    SysBusDevice parent;
+
+    MemoryRegion mmio;
+
+    int32_t cs_count;
+    int32_t active_cs;
+    qemu_irq *cs_lines;
+    NPCM7xxFIUFlash *flash;
+
+    SSIBus *spi;
+
+    uint32_t regs[NPCM7XX_FIU_NR_REGS];
+};
+
+#define TYPE_NPCM7XX_FIU "npcm7xx-fiu"
+#define NPCM7XX_FIU(obj) OBJECT_CHECK(NPCM7xxFIUState, (obj), TYPE_NPCM7XX_FIU)
+
+#endif /* NPCM7XX_FIU_H */
diff --git a/include/hw/ssi/npcm_pspi.h b/include/hw/ssi/npcm_pspi.h
new file mode 100644 (file)
index 0000000..37cc784
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Nuvoton Peripheral SPI Module
+ *
+ * Copyright 2023 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM_PSPI_H
+#define NPCM_PSPI_H
+
+#include "hw/ssi/ssi.h"
+#include "hw/sysbus.h"
+
+/*
+ * Number of registers in our device state structure. Don't change this without
+ * incrementing the version_id in the vmstate.
+ */
+#define NPCM_PSPI_NR_REGS 3
+
+/**
+ * NPCMPSPIState - Device state for one Flash Interface Unit.
+ * @parent: System bus device.
+ * @mmio: Memory region for register access.
+ * @spi: The SPI bus mastered by this controller.
+ * @regs: Register contents.
+ * @irq: The interrupt request queue for this module.
+ *
+ * Each PSPI has a shared bank of registers, and controls up to four chip
+ * selects. Each chip select has a dedicated memory region which may be used to
+ * read and write the flash connected to that chip select as if it were memory.
+ */
+typedef struct NPCMPSPIState {
+    SysBusDevice parent;
+
+    MemoryRegion mmio;
+
+    SSIBus *spi;
+    uint16_t regs[NPCM_PSPI_NR_REGS];
+    qemu_irq irq;
+} NPCMPSPIState;
+
+#define TYPE_NPCM_PSPI "npcm-pspi"
+OBJECT_DECLARE_SIMPLE_TYPE(NPCMPSPIState, NPCM_PSPI)
+
+#endif /* NPCM_PSPI_H */
diff --git a/include/hw/ssi/pl022.h b/include/hw/ssi/pl022.h
new file mode 100644 (file)
index 0000000..25d58db
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * ARM PrimeCell PL022 Synchronous Serial Port
+ *
+ * Copyright (c) 2007 CodeSourcery.
+ * Written by Paul Brook
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/*
+ * This is a model of the Arm PrimeCell PL022 synchronous serial port.
+ * The PL022 TRM is:
+ * https://developer.arm.com/documentation/ddi0194/latest
+ *
+ * QEMU interface:
+ * + sysbus IRQ: SSPINTR combined interrupt line
+ * + sysbus MMIO region 0: MemoryRegion for the device's registers
+ */
+
+#ifndef HW_SSI_PL022_H
+#define HW_SSI_PL022_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_PL022 "pl022"
+OBJECT_DECLARE_SIMPLE_TYPE(PL022State, PL022)
+
+struct PL022State {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint32_t cr0;
+    uint32_t cr1;
+    uint32_t bitmask;
+    uint32_t sr;
+    uint32_t cpsr;
+    uint32_t is;
+    uint32_t im;
+    /* The FIFO head points to the next empty entry.  */
+    int tx_fifo_head;
+    int rx_fifo_head;
+    int tx_fifo_len;
+    int rx_fifo_len;
+    uint16_t tx_fifo[8];
+    uint16_t rx_fifo[8];
+    qemu_irq irq;
+    SSIBus *ssi;
+};
+
+#endif
diff --git a/include/hw/ssi/sifive_spi.h b/include/hw/ssi/sifive_spi.h
new file mode 100644 (file)
index 0000000..d0c40cd
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * QEMU model of the SiFive SPI Controller
+ *
+ * Copyright (c) 2021 Wind River Systems, Inc.
+ *
+ * Author:
+ *   Bin Meng <bin.meng@windriver.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SIFIVE_SPI_H
+#define HW_SIFIVE_SPI_H
+
+#include "qemu/fifo8.h"
+#include "hw/sysbus.h"
+
+#define SIFIVE_SPI_REG_NUM  (0x78 / 4)
+
+#define TYPE_SIFIVE_SPI "sifive.spi"
+#define SIFIVE_SPI(obj) OBJECT_CHECK(SiFiveSPIState, (obj), TYPE_SIFIVE_SPI)
+
+typedef struct SiFiveSPIState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mmio;
+    qemu_irq irq;
+
+    uint32_t num_cs;
+    qemu_irq *cs_lines;
+
+    SSIBus *spi;
+
+    Fifo8 tx_fifo;
+    Fifo8 rx_fifo;
+
+    uint32_t regs[SIFIVE_SPI_REG_NUM];
+} SiFiveSPIState;
+
+#endif /* HW_SIFIVE_SPI_H */
diff --git a/include/hw/ssi/ssi.h b/include/hw/ssi/ssi.h
new file mode 100644 (file)
index 0000000..3cdcbd5
--- /dev/null
@@ -0,0 +1,117 @@
+/* QEMU Synchronous Serial Interface support.  */
+
+/*
+ * In principle SSI is a point-point interface.  As such the qemu
+ * implementation has a single peripheral on a "bus".
+ * However it is fairly common for boards to have multiple peripherals
+ * connected to a single master, and select devices with an external
+ * chip select.  This is implemented in qemu by having an explicit mux device.
+ * It is assumed that master and peripheral are both using the same transfer
+ * width.
+ */
+
+#ifndef QEMU_SSI_H
+#define QEMU_SSI_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+typedef enum SSICSMode SSICSMode;
+
+#define TYPE_SSI_PERIPHERAL "ssi-peripheral"
+OBJECT_DECLARE_TYPE(SSIPeripheral, SSIPeripheralClass,
+                    SSI_PERIPHERAL)
+
+#define SSI_GPIO_CS "ssi-gpio-cs"
+
+enum SSICSMode {
+    SSI_CS_NONE = 0,
+    SSI_CS_LOW,
+    SSI_CS_HIGH,
+};
+
+/* Peripherals.  */
+struct SSIPeripheralClass {
+    DeviceClass parent_class;
+
+    void (*realize)(SSIPeripheral *dev, Error **errp);
+
+    /* if you have standard or no CS behaviour, just override transfer.
+     * This is called when the device cs is active (true by default).
+     */
+    uint32_t (*transfer)(SSIPeripheral *dev, uint32_t val);
+    /* called when the CS line changes. Optional, devices only need to implement
+     * this if they have side effects associated with the cs line (beyond
+     * tristating the txrx lines).
+     */
+    int (*set_cs)(SSIPeripheral *dev, bool select);
+    /* define whether or not CS exists and is active low/high */
+    SSICSMode cs_polarity;
+
+    /* if you have non-standard CS behaviour override this to take control
+     * of the CS behaviour at the device level. transfer, set_cs, and
+     * cs_polarity are unused if this is overwritten. Transfer_raw will
+     * always be called for the device for every txrx access to the parent bus
+     */
+    uint32_t (*transfer_raw)(SSIPeripheral *dev, uint32_t val);
+};
+
+struct SSIPeripheral {
+    DeviceState parent_obj;
+
+    /* cache the class */
+    SSIPeripheralClass *spc;
+
+    /* Chip select state */
+    bool cs;
+
+    /* Chip select index */
+    uint8_t cs_index;
+};
+
+extern const VMStateDescription vmstate_ssi_peripheral;
+
+#define VMSTATE_SSI_PERIPHERAL(_field, _state) {                     \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(SSIPeripheral),                             \
+    .vmsd       = &vmstate_ssi_peripheral,                           \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, SSIPeripheral), \
+}
+
+DeviceState *ssi_create_peripheral(SSIBus *bus, const char *name);
+/**
+ * ssi_realize_and_unref: realize and unref an SSI peripheral
+ * @dev: SSI peripheral to realize
+ * @bus: SSI bus to put it on
+ * @errp: error pointer
+ *
+ * Call 'realize' on @dev, put it on the specified @bus, and drop the
+ * reference to it. Errors are reported via @errp and by returning
+ * false.
+ *
+ * This function is useful if you have created @dev via qdev_new()
+ * (which takes a reference to the device it returns to you), so that
+ * you can set properties on it before realizing it. If you don't need
+ * to set properties then ssi_create_peripheral() is probably better (as it
+ * does the create, init and realize in one step).
+ *
+ * If you are embedding the SSI peripheral into another QOM device and
+ * initialized it via some variant on object_initialize_child() then
+ * do not use this function, because that family of functions arrange
+ * for the only reference to the child device to be held by the parent
+ * via the child<> property, and so the reference-count-drop done here
+ * would be incorrect.  (Instead you would want ssi_realize(), which
+ * doesn't currently exist but would be trivial to create if we had
+ * any code that wanted it.)
+ */
+bool ssi_realize_and_unref(DeviceState *dev, SSIBus *bus, Error **errp);
+
+/* Master interface.  */
+SSIBus *ssi_create_bus(DeviceState *parent, const char *name);
+
+uint32_t ssi_transfer(SSIBus *bus, uint32_t val);
+
+DeviceState *ssi_get_cs(SSIBus *bus, uint8_t cs_index);
+
+#endif
diff --git a/include/hw/ssi/stm32f2xx_spi.h b/include/hw/ssi/stm32f2xx_spi.h
new file mode 100644 (file)
index 0000000..3683b4a
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * STM32F2XX SPI
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_STM32F2XX_SPI_H
+#define HW_STM32F2XX_SPI_H
+
+#include "hw/sysbus.h"
+#include "hw/ssi/ssi.h"
+#include "qom/object.h"
+
+#define STM_SPI_CR1     0x00
+#define STM_SPI_CR2     0x04
+#define STM_SPI_SR      0x08
+#define STM_SPI_DR      0x0C
+#define STM_SPI_CRCPR   0x10
+#define STM_SPI_RXCRCR  0x14
+#define STM_SPI_TXCRCR  0x18
+#define STM_SPI_I2SCFGR 0x1C
+#define STM_SPI_I2SPR   0x20
+
+#define STM_SPI_CR1_SPE  (1 << 6)
+#define STM_SPI_CR1_MSTR (1 << 2)
+
+#define STM_SPI_SR_RXNE   1
+
+#define TYPE_STM32F2XX_SPI "stm32f2xx-spi"
+OBJECT_DECLARE_SIMPLE_TYPE(STM32F2XXSPIState, STM32F2XX_SPI)
+
+struct STM32F2XXSPIState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint32_t spi_cr1;
+    uint32_t spi_cr2;
+    uint32_t spi_sr;
+    uint32_t spi_dr;
+    uint32_t spi_crcpr;
+    uint32_t spi_rxcrcr;
+    uint32_t spi_txcrcr;
+    uint32_t spi_i2scfgr;
+    uint32_t spi_i2spr;
+
+    qemu_irq irq;
+    SSIBus *ssi;
+};
+
+#endif /* HW_STM32F2XX_SPI_H */
diff --git a/include/hw/ssi/xilinx_spips.h b/include/hw/ssi/xilinx_spips.h
new file mode 100644 (file)
index 0000000..7a754bf
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * Header file for the Xilinx Zynq SPI controller
+ *
+ * Copyright (C) 2015 Xilinx Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XILINX_SPIPS_H
+#define XILINX_SPIPS_H
+
+#include "hw/ssi/ssi.h"
+#include "qemu/fifo32.h"
+#include "hw/stream.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+typedef struct XilinxSPIPS XilinxSPIPS;
+
+/* For SPIPS, QSPIPS.  */
+#define XLNX_SPIPS_R_MAX        (0x100 / 4)
+/* For ZYNQMP_QSPIPS.  */
+#define XLNX_ZYNQMP_SPIPS_R_MAX (0x200 / 4)
+
+/* Bite off 4k chunks at a time */
+#define LQSPI_CACHE_SIZE 1024
+
+#define QSPI_DMA_MAX_BURST_SIZE 2048
+
+typedef enum {
+    READ = 0x3,         READ_4 = 0x13,
+    FAST_READ = 0xb,    FAST_READ_4 = 0x0c,
+    DOR = 0x3b,         DOR_4 = 0x3c,
+    QOR = 0x6b,         QOR_4 = 0x6c,
+    DIOR = 0xbb,        DIOR_4 = 0xbc,
+    QIOR = 0xeb,        QIOR_4 = 0xec,
+
+    PP = 0x2,           PP_4 = 0x12,
+    DPP = 0xa2,
+    QPP = 0x32,         QPP_4 = 0x34,
+} FlashCMD;
+
+struct XilinxSPIPS {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    MemoryRegion mmlqspi;
+
+    qemu_irq irq;
+    int irqline;
+
+    uint8_t num_cs;
+    uint8_t num_busses;
+
+    uint8_t snoop_state;
+    int cmd_dummies;
+    uint8_t link_state;
+    uint8_t link_state_next;
+    uint8_t link_state_next_when;
+    qemu_irq *cs_lines;
+    bool *cs_lines_state;
+    SSIBus **spi;
+
+    Fifo8 rx_fifo;
+    Fifo8 tx_fifo;
+
+    uint8_t num_txrx_bytes;
+    uint32_t rx_discard;
+
+    uint32_t regs[XLNX_SPIPS_R_MAX];
+
+    bool man_start_com;
+};
+
+struct XilinxQSPIPS {
+    XilinxSPIPS parent_obj;
+
+    uint8_t lqspi_buf[LQSPI_CACHE_SIZE];
+    hwaddr lqspi_cached_addr;
+    Error *migration_blocker;
+    bool mmio_execution_enabled;
+};
+typedef struct XilinxQSPIPS XilinxQSPIPS;
+
+struct XlnxZynqMPQSPIPS {
+    XilinxQSPIPS parent_obj;
+
+    StreamSink *dma;
+    int gqspi_irqline;
+
+    uint32_t regs[XLNX_ZYNQMP_SPIPS_R_MAX];
+
+    /* GQSPI has separate tx/rx fifos */
+    Fifo8 rx_fifo_g;
+    Fifo8 tx_fifo_g;
+    Fifo32 fifo_g;
+    /*
+     * At the end of each generic command, misaligned extra bytes are discard
+     * or padded to tx and rx respectively to round it out (and avoid need for
+     * individual byte access. Since we use byte fifos, keep track of the
+     * alignment WRT to word access.
+     */
+    uint8_t rx_fifo_g_align;
+    uint8_t tx_fifo_g_align;
+    bool man_start_com_g;
+    uint32_t dma_burst_size;
+    uint8_t dma_buf[QSPI_DMA_MAX_BURST_SIZE];
+};
+
+struct XilinxSPIPSClass {
+    SysBusDeviceClass parent_class;
+
+    const MemoryRegionOps *reg_ops;
+    uint64_t reg_size;
+
+    uint32_t rx_fifo_size;
+    uint32_t tx_fifo_size;
+};
+
+#define TYPE_XILINX_SPIPS "xlnx.ps7-spi"
+#define TYPE_XILINX_QSPIPS "xlnx.ps7-qspi"
+#define TYPE_XLNX_ZYNQMP_QSPIPS "xlnx.usmp-gqspi"
+
+OBJECT_DECLARE_TYPE(XilinxSPIPS, XilinxSPIPSClass, XILINX_SPIPS)
+
+OBJECT_DECLARE_SIMPLE_TYPE(XilinxQSPIPS, XILINX_QSPIPS)
+
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPQSPIPS, XLNX_ZYNQMP_QSPIPS)
+
+#endif /* XILINX_SPIPS_H */
diff --git a/include/hw/ssi/xlnx-versal-ospi.h b/include/hw/ssi/xlnx-versal-ospi.h
new file mode 100644 (file)
index 0000000..4ac975a
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Header file for the Xilinx Versal's OSPI controller
+ *
+ * Copyright (C) 2021 Xilinx Inc
+ * Written by Francisco Iglesias <francisco.iglesias@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * This is a model of Xilinx Versal's Octal SPI flash memory controller
+ * documented in Versal's Technical Reference manual [1] and the Versal ACAP
+ * Register reference [2].
+ *
+ * References:
+ *
+ * [1] Versal ACAP Technical Reference Manual,
+ *     https://www.xilinx.com/support/documentation/architecture-manuals/am011-versal-acap-trm.pdf
+ *
+ * [2] Versal ACAP Register Reference,
+ *     https://docs.xilinx.com/r/en-US/am012-versal-register-reference/OSPI-Module
+ *
+ *
+ * QEMU interface:
+ * + sysbus MMIO region 0: MemoryRegion for the device's registers
+ * + sysbus MMIO region 1: MemoryRegion for flash memory linear address space
+ *   (data transfer).
+ * + sysbus IRQ 0: Device interrupt.
+ * + Named GPIO input "ospi-mux-sel": 0: enables indirect access mode
+ *   and 1: enables direct access mode.
+ * + Property "dac-with-indac": Allow both direct accesses and indirect
+ *   accesses simultaneously.
+ * + Property "indac-write-disabled": Disable indirect access writes.
+ */
+
+#ifndef XLNX_VERSAL_OSPI_H
+#define XLNX_VERSAL_OSPI_H
+
+#include "hw/register.h"
+#include "hw/ssi/ssi.h"
+#include "qemu/fifo8.h"
+#include "hw/dma/xlnx_csu_dma.h"
+
+#define TYPE_XILINX_VERSAL_OSPI "xlnx.versal-ospi"
+
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalOspi, XILINX_VERSAL_OSPI)
+
+#define XILINX_VERSAL_OSPI_R_MAX (0xfc / 4 + 1)
+
+/*
+ * Indirect operations
+ */
+typedef struct IndOp {
+    uint32_t flash_addr;
+    uint32_t num_bytes;
+    uint32_t done_bytes;
+    bool completed;
+} IndOp;
+
+struct XlnxVersalOspi {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    MemoryRegion iomem_dac;
+
+    uint8_t num_cs;
+    qemu_irq *cs_lines;
+
+    SSIBus *spi;
+
+    Fifo8 rx_fifo;
+    Fifo8 tx_fifo;
+
+    Fifo8 rx_sram;
+    Fifo8 tx_sram;
+
+    qemu_irq irq;
+
+    XlnxCSUDMA *dma_src;
+    bool ind_write_disabled;
+    bool dac_with_indac;
+    bool dac_enable;
+    bool src_dma_inprog;
+
+    IndOp rd_ind_op[2];
+    IndOp wr_ind_op[2];
+
+    uint32_t regs[XILINX_VERSAL_OSPI_R_MAX];
+    RegisterInfo regs_info[XILINX_VERSAL_OSPI_R_MAX];
+
+    /* Maximum inferred membank size is 512 bytes */
+    uint8_t stig_membank[512];
+};
+
+#endif /* XLNX_VERSAL_OSPI_H */
index e39d5a5b558458a5791e8f7f531fcc255d00ab60..f166facb090a7853992be6bbacba037f24bd7183 100644 (file)
@@ -3,51 +3,50 @@
 
 #include "qom/object.h"
 
-/* stream slave. Used until qdev provides a generic way.  */
-#define TYPE_STREAM_SLAVE "stream-slave"
+#define TYPE_STREAM_SINK "stream-sink"
 
-typedef struct StreamSlaveClass StreamSlaveClass;
-DECLARE_CLASS_CHECKERS(StreamSlaveClass, STREAM_SLAVE,
-                       TYPE_STREAM_SLAVE)
-#define STREAM_SLAVE(obj) \
-     INTERFACE_CHECK(StreamSlave, (obj), TYPE_STREAM_SLAVE)
+typedef struct StreamSinkClass StreamSinkClass;
+DECLARE_CLASS_CHECKERS(StreamSinkClass, STREAM_SINK,
+                       TYPE_STREAM_SINK)
+#define STREAM_SINK(obj) \
+     INTERFACE_CHECK(StreamSink, (obj), TYPE_STREAM_SINK)
 
-typedef struct StreamSlave StreamSlave;
+typedef struct StreamSink StreamSink;
 
 typedef void (*StreamCanPushNotifyFn)(void *opaque);
 
-struct StreamSlaveClass {
+struct StreamSinkClass {
     InterfaceClass parent;
     /**
-     * can push - determine if a stream slave is capable of accepting at least
+     * can push - determine if a stream sink is capable of accepting at least
      * one byte of data. Returns false if cannot accept. If not implemented, the
-     * slave is assumed to always be capable of receiving.
-     * @notify: Optional callback that the slave will call when the slave is
+     * sink is assumed to always be capable of receiving.
+     * @notify: Optional callback that the sink will call when the sink is
      * capable of receiving again. Only called if false is returned.
      * @notify_opaque: opaque data to pass to notify call.
      */
-    bool (*can_push)(StreamSlave *obj, StreamCanPushNotifyFn notify,
+    bool (*can_push)(StreamSink *obj, StreamCanPushNotifyFn notify,
                      void *notify_opaque);
     /**
-     * push - push data to a Stream slave. The number of bytes pushed is
-     * returned. If the slave short returns, the master must wait before trying
-     * again, the slave may continue to just return 0 waiting for the vm time to
+     * push - push data to a Stream sink. The number of bytes pushed is
+     * returned. If the sink short returns, the master must wait before trying
+     * again, the sink may continue to just return 0 waiting for the vm time to
      * advance. The can_push() function can be used to trap the point in time
-     * where the slave is ready to receive again, otherwise polling on a QEMU
+     * where the sink is ready to receive again, otherwise polling on a QEMU
      * timer will work.
-     * @obj: Stream slave to push to
+     * @obj: Stream sink to push to
      * @buf: Data to write
      * @len: Maximum number of bytes to write
      * @eop: End of packet flag
      */
-    size_t (*push)(StreamSlave *obj, unsigned char *buf, size_t len, bool eop);
+    size_t (*push)(StreamSink *obj, unsigned char *buf, size_t len, bool eop);
 };
 
 size_t
-stream_push(StreamSlave *sink, uint8_t *buf, size_t len, bool eop);
+stream_push(StreamSink *sink, uint8_t *buf, size_t len, bool eop);
 
 bool
-stream_can_push(StreamSlave *sink, StreamCanPushNotifyFn notify,
+stream_can_push(StreamSink *sink, StreamCanPushNotifyFn notify,
                 void *notify_opaque);
 
 
diff --git a/include/hw/timer/a9gtimer.h b/include/hw/timer/a9gtimer.h
new file mode 100644 (file)
index 0000000..6ae9122
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Global peripheral timer block for ARM A9MP
+ *
+ * (C) 2013 Xilinx Inc.
+ *
+ * Written by François LEGAL
+ * Written by Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef A9GTIMER_H
+#define A9GTIMER_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define A9_GTIMER_MAX_CPUS 4
+
+#define TYPE_A9_GTIMER "arm.cortex-a9-global-timer"
+OBJECT_DECLARE_SIMPLE_TYPE(A9GTimerState, A9_GTIMER)
+
+#define R_COUNTER_LO                0x00
+#define R_COUNTER_HI                0x04
+
+#define R_CONTROL                   0x08
+#define R_CONTROL_TIMER_ENABLE      (1 << 0)
+#define R_CONTROL_COMP_ENABLE       (1 << 1)
+#define R_CONTROL_IRQ_ENABLE        (1 << 2)
+#define R_CONTROL_AUTO_INCREMENT    (1 << 3)
+#define R_CONTROL_PRESCALER_SHIFT   8
+#define R_CONTROL_PRESCALER_LEN     8
+#define R_CONTROL_PRESCALER_MASK    (((1 << R_CONTROL_PRESCALER_LEN) - 1) << \
+                                     R_CONTROL_PRESCALER_SHIFT)
+
+#define R_CONTROL_BANKED            (R_CONTROL_COMP_ENABLE | \
+                                     R_CONTROL_IRQ_ENABLE | \
+                                     R_CONTROL_AUTO_INCREMENT)
+#define R_CONTROL_NEEDS_SYNC        (R_CONTROL_TIMER_ENABLE | \
+                                     R_CONTROL_PRESCALER_MASK)
+
+#define R_INTERRUPT_STATUS          0x0C
+#define R_COMPARATOR_LO             0x10
+#define R_COMPARATOR_HI             0x14
+#define R_AUTO_INCREMENT            0x18
+
+typedef struct A9GTimerPerCPU A9GTimerPerCPU;
+
+struct A9GTimerPerCPU {
+    A9GTimerState *parent;
+
+    uint32_t control; /* only per cpu banked bits valid */
+    uint64_t compare;
+    uint32_t status;
+    uint32_t inc;
+
+    MemoryRegion iomem;
+    qemu_irq irq; /* PPI interrupts */
+};
+
+struct A9GTimerState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion iomem;
+    /* static props */
+    uint32_t num_cpu;
+
+    QEMUTimer *timer;
+
+    uint64_t counter; /* current timer value */
+
+    uint64_t ref_counter;
+    uint64_t cpu_ref_time; /* the cpu time as of last update of ref_counter */
+    uint32_t control; /* only non per cpu banked bits valid */
+
+    A9GTimerPerCPU per_cpu[A9_GTIMER_MAX_CPUS];
+};
+
+typedef struct A9GTimerUpdate {
+    uint64_t now;
+    uint64_t new;
+} A9GTimerUpdate;
+
+#endif /* A9GTIMER_H */
diff --git a/include/hw/timer/allwinner-a10-pit.h b/include/hw/timer/allwinner-a10-pit.h
new file mode 100644 (file)
index 0000000..8435758
--- /dev/null
@@ -0,0 +1,68 @@
+#ifndef ALLWINNER_A10_PIT_H
+#define ALLWINNER_A10_PIT_H
+
+#include "hw/ptimer.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_AW_A10_PIT "allwinner-A10-timer"
+OBJECT_DECLARE_SIMPLE_TYPE(AwA10PITState, AW_A10_PIT)
+
+#define AW_A10_PIT_TIMER_NR    6
+#define AW_A10_PIT_TIMER_IRQ   0x1
+#define AW_A10_PIT_WDOG_IRQ    0x100
+
+#define AW_A10_PIT_TIMER_IRQ_EN    0
+#define AW_A10_PIT_TIMER_IRQ_ST    0x4
+
+#define AW_A10_PIT_TIMER_CONTROL   0x0
+#define AW_A10_PIT_TIMER_EN        0x1
+#define AW_A10_PIT_TIMER_RELOAD    0x2
+#define AW_A10_PIT_TIMER_MODE      0x80
+
+#define AW_A10_PIT_TIMER_INTERVAL  0x4
+#define AW_A10_PIT_TIMER_COUNT     0x8
+#define AW_A10_PIT_WDOG_CONTROL    0x90
+#define AW_A10_PIT_WDOG_MODE       0x94
+
+#define AW_A10_PIT_COUNT_CTL       0xa0
+#define AW_A10_PIT_COUNT_RL_EN     0x2
+#define AW_A10_PIT_COUNT_CLR_EN    0x1
+#define AW_A10_PIT_COUNT_LO        0xa4
+#define AW_A10_PIT_COUNT_HI        0xa8
+
+#define AW_A10_PIT_TIMER_BASE      0x10
+#define AW_A10_PIT_TIMER_BASE_END  \
+    (AW_A10_PIT_TIMER_BASE * 6 + AW_A10_PIT_TIMER_COUNT)
+
+#define AW_A10_PIT_DEFAULT_CLOCK   0x4
+
+
+typedef struct AwA10TimerContext {
+    AwA10PITState *container;
+    int index;
+} AwA10TimerContext;
+
+struct AwA10PITState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    qemu_irq irq[AW_A10_PIT_TIMER_NR];
+    ptimer_state * timer[AW_A10_PIT_TIMER_NR];
+    AwA10TimerContext timer_context[AW_A10_PIT_TIMER_NR];
+    MemoryRegion iomem;
+    uint32_t clk_freq[4];
+
+    uint32_t irq_enable;
+    uint32_t irq_status;
+    uint32_t control[AW_A10_PIT_TIMER_NR];
+    uint32_t interval[AW_A10_PIT_TIMER_NR];
+    uint32_t count[AW_A10_PIT_TIMER_NR];
+    uint32_t watch_dog_mode;
+    uint32_t watch_dog_control;
+    uint32_t count_lo;
+    uint32_t count_hi;
+    uint32_t count_ctl;
+};
+
+#endif
diff --git a/include/hw/timer/arm_mptimer.h b/include/hw/timer/arm_mptimer.h
new file mode 100644 (file)
index 0000000..65a96e2
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Private peripheral timer/watchdog blocks for ARM 11MPCore and A9MP
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Copyright (c) 2011 Linaro Limited
+ * Written by Paul Brook, Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef HW_TIMER_ARM_MPTIMER_H
+#define HW_TIMER_ARM_MPTIMER_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define ARM_MPTIMER_MAX_CPUS 4
+
+/* State of a single timer or watchdog block */
+typedef struct {
+    uint32_t control;
+    uint32_t status;
+    struct ptimer_state *timer;
+    qemu_irq irq;
+    MemoryRegion iomem;
+} TimerBlock;
+
+#define TYPE_ARM_MPTIMER "arm_mptimer"
+OBJECT_DECLARE_SIMPLE_TYPE(ARMMPTimerState, ARM_MPTIMER)
+
+struct ARMMPTimerState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint32_t num_cpu;
+    TimerBlock timerblock[ARM_MPTIMER_MAX_CPUS];
+    MemoryRegion iomem;
+};
+
+#endif
diff --git a/include/hw/timer/armv7m_systick.h b/include/hw/timer/armv7m_systick.h
new file mode 100644 (file)
index 0000000..ee09b13
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * ARMv7M SysTick timer
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Written by Paul Brook
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell
+ *
+ * This code is licensed under the GPL (version 2 or later).
+ */
+
+#ifndef HW_TIMER_ARMV7M_SYSTICK_H
+#define HW_TIMER_ARMV7M_SYSTICK_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+#include "hw/ptimer.h"
+#include "hw/clock.h"
+
+#define TYPE_SYSTICK "armv7m_systick"
+
+OBJECT_DECLARE_SIMPLE_TYPE(SysTickState, SYSTICK)
+
+/*
+ * QEMU interface:
+ *  + sysbus MMIO region 0 is the register interface (covering
+ *    the registers which are mapped at address 0xE000E010)
+ *  + sysbus IRQ 0 is the interrupt line to the NVIC
+ *  + Clock input "refclk" is the external reference clock
+ *    (used when SYST_CSR.CLKSOURCE == 0)
+ *  + Clock input "cpuclk" is the main CPU clock
+ *    (used when SYST_CSR.CLKSOURCE == 1)
+ */
+
+struct SysTickState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint32_t control;
+    uint32_t reload;
+    int64_t tick;
+    ptimer_state *ptimer;
+    MemoryRegion iomem;
+    qemu_irq irq;
+    Clock *refclk;
+    Clock *cpuclk;
+};
+
+#endif
diff --git a/include/hw/timer/aspeed_timer.h b/include/hw/timer/aspeed_timer.h
new file mode 100644 (file)
index 0000000..07dc6b6
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ *  ASPEED AST2400 Timer
+ *
+ *  Andrew Jeffery <andrew@aj.id.au>
+ *
+ *  Copyright (C) 2016 IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef ASPEED_TIMER_H
+#define ASPEED_TIMER_H
+
+#include "qemu/timer.h"
+#include "hw/misc/aspeed_scu.h"
+#include "qom/object.h"
+
+#define TYPE_ASPEED_TIMER "aspeed.timer"
+OBJECT_DECLARE_TYPE(AspeedTimerCtrlState, AspeedTimerClass, ASPEED_TIMER)
+#define TYPE_ASPEED_2400_TIMER TYPE_ASPEED_TIMER "-ast2400"
+#define TYPE_ASPEED_2500_TIMER TYPE_ASPEED_TIMER "-ast2500"
+#define TYPE_ASPEED_2600_TIMER TYPE_ASPEED_TIMER "-ast2600"
+#define TYPE_ASPEED_1030_TIMER TYPE_ASPEED_TIMER "-ast1030"
+
+#define ASPEED_TIMER_NR_TIMERS 8
+
+typedef struct AspeedTimer {
+    qemu_irq irq;
+
+    uint8_t id;
+    QEMUTimer timer;
+
+    /**
+     * Track the line level as the ASPEED timers implement edge triggered
+     * interrupts, signalling with both the rising and falling edge.
+     */
+    int32_t level;
+    uint32_t reload;
+    uint32_t match[2];
+    uint64_t start;
+} AspeedTimer;
+
+struct AspeedTimerCtrlState {
+    /*< private >*/
+    SysBusDevice parent;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    uint32_t ctrl;
+    uint32_t ctrl2;
+    uint32_t ctrl3;
+    uint32_t irq_sts;
+    AspeedTimer timers[ASPEED_TIMER_NR_TIMERS];
+
+    AspeedSCUState *scu;
+};
+
+
+struct AspeedTimerClass {
+    SysBusDeviceClass parent_class;
+
+    uint64_t (*read)(AspeedTimerCtrlState *s, hwaddr offset);
+    void (*write)(AspeedTimerCtrlState *s, hwaddr offset, uint64_t value);
+};
+
+#endif /* ASPEED_TIMER_H */
diff --git a/include/hw/timer/avr_timer16.h b/include/hw/timer/avr_timer16.h
new file mode 100644 (file)
index 0000000..a1a032a
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * AVR 16-bit timer
+ *
+ * Copyright (c) 2018 University of Kent
+ * Author: Ed Robbins
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+/*
+ * Driver for 16 bit timers on 8 bit AVR devices.
+ * Note:
+ * On ATmega640/V-1280/V-1281/V-2560/V-2561/V timers 1, 3, 4 and 5 are 16 bit
+ */
+
+#ifndef HW_TIMER_AVR_TIMER16_H
+#define HW_TIMER_AVR_TIMER16_H
+
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+
+enum NextInterrupt {
+    OVERFLOW,
+    COMPA,
+    COMPB,
+    COMPC,
+    CAPT
+};
+
+#define TYPE_AVR_TIMER16 "avr-timer16"
+OBJECT_DECLARE_SIMPLE_TYPE(AVRTimer16State, AVR_TIMER16)
+
+struct AVRTimer16State {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+    MemoryRegion imsk_iomem;
+    MemoryRegion ifr_iomem;
+    QEMUTimer *timer;
+    qemu_irq capt_irq;
+    qemu_irq compa_irq;
+    qemu_irq compb_irq;
+    qemu_irq compc_irq;
+    qemu_irq ovf_irq;
+
+    bool enabled;
+
+    /* registers */
+    uint8_t cra;
+    uint8_t crb;
+    uint8_t crc;
+    uint8_t cntl;
+    uint8_t cnth;
+    uint8_t icrl;
+    uint8_t icrh;
+    uint8_t ocral;
+    uint8_t ocrah;
+    uint8_t ocrbl;
+    uint8_t ocrbh;
+    uint8_t ocrcl;
+    uint8_t ocrch;
+    /*
+     * Reads and writes to CNT and ICR utilise a bizarre temporary
+     * register, which we emulate
+     */
+    uint8_t rtmp;
+    uint8_t imsk;
+    uint8_t ifr;
+
+    uint8_t id;
+    uint64_t cpu_freq_hz;
+    uint64_t freq_hz;
+    uint64_t period_ns;
+    uint64_t reset_time_ns;
+    enum NextInterrupt next_interrupt;
+};
+
+#endif /* HW_TIMER_AVR_TIMER16_H */
diff --git a/include/hw/timer/bcm2835_systmr.h b/include/hw/timer/bcm2835_systmr.h
new file mode 100644 (file)
index 0000000..a8f605b
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * BCM2835 SYS timer emulation
+ *
+ * Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef BCM2835_SYSTMR_H
+#define BCM2835_SYSTMR_H
+
+#include "hw/sysbus.h"
+#include "hw/irq.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+
+#define TYPE_BCM2835_SYSTIMER "bcm2835-sys-timer"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835SystemTimerState, BCM2835_SYSTIMER)
+
+#define BCM2835_SYSTIMER_COUNT 4
+
+typedef struct {
+    unsigned id;
+    QEMUTimer timer;
+    qemu_irq irq;
+    BCM2835SystemTimerState *state;
+} BCM2835SystemTimerCompare;
+
+struct BCM2835SystemTimerState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    struct {
+        uint32_t ctrl_status;
+        uint32_t compare[BCM2835_SYSTIMER_COUNT];
+    } reg;
+    BCM2835SystemTimerCompare tmr[BCM2835_SYSTIMER_COUNT];
+};
+
+#endif
diff --git a/include/hw/timer/cadence_ttc.h b/include/hw/timer/cadence_ttc.h
new file mode 100644 (file)
index 0000000..e125138
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Xilinx Zynq cadence TTC model
+ *
+ * Copyright (c) 2011 Xilinx Inc.
+ * Copyright (c) 2012 Peter A.G. Crosthwaite (peter.crosthwaite@petalogix.com)
+ * Copyright (c) 2012 PetaLogix Pty Ltd.
+ * Written By Haibing Ma
+ *            M. Habib
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef HW_TIMER_CADENCE_TTC_H
+#define HW_TIMER_CADENCE_TTC_H
+
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+
+typedef struct {
+    QEMUTimer *timer;
+    int freq;
+
+    uint32_t reg_clock;
+    uint32_t reg_count;
+    uint32_t reg_value;
+    uint16_t reg_interval;
+    uint16_t reg_match[3];
+    uint32_t reg_intr;
+    uint32_t reg_intr_en;
+    uint32_t reg_event_ctrl;
+    uint32_t reg_event;
+
+    uint64_t cpu_time;
+    unsigned int cpu_time_valid;
+
+    qemu_irq irq;
+} CadenceTimerState;
+
+#define TYPE_CADENCE_TTC "cadence_ttc"
+OBJECT_DECLARE_SIMPLE_TYPE(CadenceTTCState, CADENCE_TTC)
+
+struct CadenceTTCState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    CadenceTimerState timer[3];
+};
+
+#endif
diff --git a/include/hw/timer/cmsdk-apb-dualtimer.h b/include/hw/timer/cmsdk-apb-dualtimer.h
new file mode 100644 (file)
index 0000000..f3ec86c
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * ARM CMSDK APB dual-timer emulation
+ *
+ * Copyright (c) 2018 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "APB dual-input timer" which is part of the Cortex-M
+ * System Design Kit (CMSDK) and documented in the Cortex-M System
+ * Design Kit Technical Reference Manual (ARM DDI0479C):
+ * https://developer.arm.com/products/system-design/system-design-kits/cortex-m-system-design-kit
+ *
+ * QEMU interface:
+ *  + Clock input "TIMCLK": clock (for both timers)
+ *  + sysbus MMIO region 0: the register bank
+ *  + sysbus IRQ 0: combined timer interrupt TIMINTC
+ *  + sysbus IRO 1: timer block 1 interrupt TIMINT1
+ *  + sysbus IRQ 2: timer block 2 interrupt TIMINT2
+ */
+
+#ifndef CMSDK_APB_DUALTIMER_H
+#define CMSDK_APB_DUALTIMER_H
+
+#include "hw/sysbus.h"
+#include "hw/ptimer.h"
+#include "hw/clock.h"
+#include "qom/object.h"
+
+#define TYPE_CMSDK_APB_DUALTIMER "cmsdk-apb-dualtimer"
+OBJECT_DECLARE_SIMPLE_TYPE(CMSDKAPBDualTimer, CMSDK_APB_DUALTIMER)
+
+
+/* One of the two identical timer modules in the dual-timer module */
+typedef struct CMSDKAPBDualTimerModule {
+    CMSDKAPBDualTimer *parent;
+    struct ptimer_state *timer;
+    qemu_irq timerint;
+    /*
+     * We must track the guest LOAD and VALUE register state by hand
+     * rather than leaving this state only in the ptimer limit/count,
+     * because if CONTROL.SIZE is 0 then only the low 16 bits of the
+     * counter actually counts, but the high half is still guest
+     * accessible.
+     */
+    uint32_t load;
+    uint32_t value;
+    uint32_t control;
+    uint32_t intstatus;
+} CMSDKAPBDualTimerModule;
+
+#define CMSDK_APB_DUALTIMER_NUM_MODULES 2
+
+struct CMSDKAPBDualTimer {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    qemu_irq timerintc;
+    Clock *timclk;
+
+    CMSDKAPBDualTimerModule timermod[CMSDK_APB_DUALTIMER_NUM_MODULES];
+    uint32_t timeritcr;
+    uint32_t timeritop;
+};
+
+#endif
diff --git a/include/hw/timer/cmsdk-apb-timer.h b/include/hw/timer/cmsdk-apb-timer.h
new file mode 100644 (file)
index 0000000..2dd615d
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * ARM CMSDK APB timer emulation
+ *
+ * Copyright (c) 2017 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+#ifndef CMSDK_APB_TIMER_H
+#define CMSDK_APB_TIMER_H
+
+#include "hw/sysbus.h"
+#include "hw/ptimer.h"
+#include "hw/clock.h"
+#include "qom/object.h"
+
+#define TYPE_CMSDK_APB_TIMER "cmsdk-apb-timer"
+OBJECT_DECLARE_SIMPLE_TYPE(CMSDKAPBTimer, CMSDK_APB_TIMER)
+
+/*
+ * QEMU interface:
+ *  + Clock input "pclk": clock for the timer
+ *  + sysbus MMIO region 0: the register bank
+ *  + sysbus IRQ 0: timer interrupt TIMERINT
+ */
+struct CMSDKAPBTimer {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    qemu_irq timerint;
+    struct ptimer_state *timer;
+    Clock *pclk;
+
+    uint32_t ctrl;
+    uint32_t value;
+    uint32_t reload;
+    uint32_t intstatus;
+};
+
+#endif
diff --git a/include/hw/timer/digic-timer.h b/include/hw/timer/digic-timer.h
new file mode 100644 (file)
index 0000000..da82fb4
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Canon DIGIC timer block declarations.
+ *
+ * Copyright (C) 2013 Antony Pavlov <antonynpavlov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef HW_TIMER_DIGIC_TIMER_H
+#define HW_TIMER_DIGIC_TIMER_H
+
+#include "hw/sysbus.h"
+#include "hw/ptimer.h"
+#include "qom/object.h"
+
+#define TYPE_DIGIC_TIMER "digic-timer"
+OBJECT_DECLARE_SIMPLE_TYPE(DigicTimerState, DIGIC_TIMER)
+
+#define DIGIC_TIMER_CONTROL 0x00
+#define DIGIC_TIMER_CONTROL_RST 0x80000000
+#define DIGIC_TIMER_CONTROL_EN 0x00000001
+#define DIGIC_TIMER_RELVALUE 0x08
+#define DIGIC_TIMER_VALUE 0x0c
+
+struct DigicTimerState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion iomem;
+    ptimer_state *ptimer;
+
+    uint32_t control;
+    uint32_t relvalue;
+};
+
+#endif /* HW_TIMER_DIGIC_TIMER_H */
diff --git a/include/hw/timer/hpet.h b/include/hw/timer/hpet.h
new file mode 100644 (file)
index 0000000..f04c4d3
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * QEMU Emulated HPET support
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Beth Kon   <bkon@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_HPET_H
+#define HW_HPET_H
+
+#include "qom/object.h"
+
+#define HPET_BASE               0xfed00000
+#define HPET_LEN                0x400
+#define HPET_CLK_PERIOD         10 /* 10 ns*/
+
+#define FS_PER_NS 1000000       /* 1000000 femtoseconds == 1 ns */
+#define HPET_MIN_TIMERS         3
+#define HPET_MAX_TIMERS         32
+
+#define HPET_NUM_IRQ_ROUTES     32
+
+#define HPET_LEGACY_PIT_INT     0
+#define HPET_LEGACY_RTC_INT     1
+
+#define HPET_CFG_ENABLE 0x001
+#define HPET_CFG_LEGACY 0x002
+
+#define HPET_ID         0x000
+#define HPET_PERIOD     0x004
+#define HPET_CFG        0x010
+#define HPET_STATUS     0x020
+#define HPET_COUNTER    0x0f0
+#define HPET_TN_CFG     0x000
+#define HPET_TN_CMP     0x008
+#define HPET_TN_ROUTE   0x010
+#define HPET_CFG_WRITE_MASK  0x3
+
+#define HPET_ID_NUM_TIM_SHIFT   8
+#define HPET_ID_NUM_TIM_MASK    0x1f00
+
+#define HPET_TN_TYPE_LEVEL       0x002
+#define HPET_TN_ENABLE           0x004
+#define HPET_TN_PERIODIC         0x008
+#define HPET_TN_PERIODIC_CAP     0x010
+#define HPET_TN_SIZE_CAP         0x020
+#define HPET_TN_SETVAL           0x040
+#define HPET_TN_32BIT            0x100
+#define HPET_TN_INT_ROUTE_MASK  0x3e00
+#define HPET_TN_FSB_ENABLE      0x4000
+#define HPET_TN_FSB_CAP         0x8000
+#define HPET_TN_CFG_WRITE_MASK  0x7f4e
+#define HPET_TN_INT_ROUTE_SHIFT      9
+#define HPET_TN_INT_ROUTE_CAP_SHIFT 32
+#define HPET_TN_CFG_BITS_READONLY_OR_RESERVED 0xffff80b1U
+
+struct hpet_fw_entry
+{
+    uint32_t event_timer_block_id;
+    uint64_t address;
+    uint16_t min_tick;
+    uint8_t page_prot;
+} QEMU_PACKED;
+
+struct hpet_fw_config
+{
+    uint8_t count;
+    struct hpet_fw_entry hpet[8];
+} QEMU_PACKED;
+
+extern struct hpet_fw_config hpet_cfg;
+
+#define TYPE_HPET "hpet"
+
+static inline bool hpet_find(void)
+{
+    return object_resolve_path_type("", TYPE_HPET, NULL);
+}
+
+#endif
diff --git a/include/hw/timer/i8254.h b/include/hw/timer/i8254.h
new file mode 100644 (file)
index 0000000..8402caa
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * QEMU 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_I8254_H
+#define HW_I8254_H
+
+#include "hw/qdev-properties.h"
+#include "hw/isa/isa.h"
+#include "qapi/error.h"
+#include "qom/object.h"
+
+#define PIT_FREQ 1193182
+
+typedef struct PITChannelInfo {
+    int gate;
+    int mode;
+    int initial_count;
+    int out;
+} PITChannelInfo;
+
+#define TYPE_PIT_COMMON "pit-common"
+OBJECT_DECLARE_TYPE(PITCommonState, PITCommonClass, PIT_COMMON)
+
+#define TYPE_I8254 "isa-pit"
+#define TYPE_KVM_I8254 "kvm-pit"
+
+static inline ISADevice *i8254_pit_init(ISABus *bus, int base, int isa_irq,
+                                        qemu_irq alt_irq)
+{
+    DeviceState *dev;
+    ISADevice *d;
+
+    d = isa_new(TYPE_I8254);
+    dev = DEVICE(d);
+    qdev_prop_set_uint32(dev, "iobase", base);
+    isa_realize_and_unref(d, bus, &error_fatal);
+    qdev_connect_gpio_out(dev, 0,
+                          isa_irq >= 0 ? isa_bus_get_irq(bus, isa_irq)
+                                       : alt_irq);
+
+    return d;
+}
+
+static inline ISADevice *kvm_pit_init(ISABus *bus, int base)
+{
+    DeviceState *dev;
+    ISADevice *d;
+
+    d = isa_new(TYPE_KVM_I8254);
+    dev = DEVICE(d);
+    qdev_prop_set_uint32(dev, "iobase", base);
+    isa_realize_and_unref(d, bus, &error_fatal);
+
+    return d;
+}
+
+void pit_set_gate(ISADevice *dev, int channel, int val);
+void pit_get_channel_info(ISADevice *dev, int channel, PITChannelInfo *info);
+
+#endif /* HW_I8254_H */
diff --git a/include/hw/timer/i8254_internal.h b/include/hw/timer/i8254_internal.h
new file mode 100644 (file)
index 0000000..1761deb
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * QEMU 8253/8254 - internal interfaces
+ *
+ * Copyright (c) 2011 Jan Kiszka, Siemens AG
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_I8254_INTERNAL_H
+#define QEMU_I8254_INTERNAL_H
+
+#include "hw/isa/isa.h"
+#include "hw/timer/i8254.h"
+#include "qemu/timer.h"
+
+typedef struct PITChannelState {
+    int count; /* can be 65536 */
+    uint16_t latched_count;
+    uint8_t count_latched;
+    uint8_t status_latched;
+    uint8_t status;
+    uint8_t read_state;
+    uint8_t write_state;
+    uint8_t write_latch;
+    uint8_t rw_mode;
+    uint8_t mode;
+    uint8_t bcd; /* not supported */
+    uint8_t gate; /* timer start */
+    int64_t count_load_time;
+    /* irq handling */
+    int64_t next_transition_time;
+    QEMUTimer *irq_timer;
+    qemu_irq irq;
+    uint32_t irq_disabled;
+} PITChannelState;
+
+struct PITCommonState {
+    ISADevice dev;
+    MemoryRegion ioports;
+    uint32_t iobase;
+    PITChannelState channels[3];
+};
+
+struct PITCommonClass {
+    DeviceClass parent_class;
+
+    void (*set_channel_gate)(PITCommonState *s, PITChannelState *sc, int val);
+    void (*get_channel_info)(PITCommonState *s, PITChannelState *sc,
+                             PITChannelInfo *info);
+    void (*pre_save)(PITCommonState *s);
+    void (*post_load)(PITCommonState *s);
+};
+
+int pit_get_out(PITChannelState *s, int64_t current_time);
+int64_t pit_get_next_transition_time(PITChannelState *s, int64_t current_time);
+void pit_get_channel_info_common(PITCommonState *s, PITChannelState *sc,
+                                 PITChannelInfo *info);
+void pit_reset_common(PITCommonState *s);
+
+#endif /* QEMU_I8254_INTERNAL_H */
diff --git a/include/hw/timer/ibex_timer.h b/include/hw/timer/ibex_timer.h
new file mode 100644 (file)
index 0000000..41f5c82
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * QEMU lowRISC Ibex Timer device
+ *
+ * Copyright (c) 2021 Western Digital
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_IBEX_TIMER_H
+#define HW_IBEX_TIMER_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_IBEX_TIMER "ibex-timer"
+OBJECT_DECLARE_SIMPLE_TYPE(IbexTimerState, IBEX_TIMER)
+
+struct IbexTimerState {
+    /* <private> */
+    SysBusDevice parent_obj;
+    uint64_t mtimecmp;
+    QEMUTimer *mtimer; /* Internal timer for M-mode interrupt */
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint32_t timer_ctrl;
+    uint32_t timer_cfg0;
+    uint32_t timer_compare_lower0;
+    uint32_t timer_compare_upper0;
+    uint32_t timer_intr_enable;
+    uint32_t timer_intr_state;
+
+    uint32_t timebase_freq;
+
+    qemu_irq irq;
+
+    qemu_irq m_timer_irq;
+};
+#endif /* HW_IBEX_TIMER_H */
diff --git a/include/hw/timer/imx_epit.h b/include/hw/timer/imx_epit.h
new file mode 100644 (file)
index 0000000..79aff0c
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * i.MX EPIT Timer
+ *
+ * Copyright (c) 2008 OK Labs
+ * Copyright (c) 2011 NICTA Pty Ltd
+ * Originally written by Hans Jiang
+ * Updated by Peter Chubb
+ * Updated by Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef IMX_EPIT_H
+#define IMX_EPIT_H
+
+#include "hw/sysbus.h"
+#include "hw/ptimer.h"
+#include "hw/misc/imx_ccm.h"
+#include "qom/object.h"
+
+/*
+ * EPIT: Enhanced periodic interrupt timer
+ */
+
+#define CR_EN       (1 << 0)
+#define CR_ENMOD    (1 << 1)
+#define CR_OCIEN    (1 << 2)
+#define CR_RLD      (1 << 3)
+#define CR_PRESCALE_SHIFT (4)
+#define CR_PRESCALE_BITS  (12)
+#define CR_SWR      (1 << 16)
+#define CR_IOVW     (1 << 17)
+#define CR_DBGEN    (1 << 18)
+#define CR_WAITEN   (1 << 19)
+#define CR_DOZEN    (1 << 20)
+#define CR_STOPEN   (1 << 21)
+#define CR_CLKSRC_SHIFT (24)
+#define CR_CLKSRC_BITS  (2)
+
+#define SR_OCIF     (1 << 0)
+
+#define EPIT_TIMER_MAX  0XFFFFFFFFUL
+
+#define TYPE_IMX_EPIT "imx.epit"
+OBJECT_DECLARE_SIMPLE_TYPE(IMXEPITState, IMX_EPIT)
+
+struct IMXEPITState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    ptimer_state *timer_reload;
+    ptimer_state *timer_cmp;
+    MemoryRegion  iomem;
+    IMXCCMState  *ccm;
+
+    uint32_t cr;
+    uint32_t sr;
+    uint32_t lr;
+    uint32_t cmp;
+
+    qemu_irq irq;
+};
+
+#endif /* IMX_EPIT_H */
diff --git a/include/hw/timer/imx_gpt.h b/include/hw/timer/imx_gpt.h
new file mode 100644 (file)
index 0000000..5a1230d
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ * i.MX GPT Timer
+ *
+ * Copyright (c) 2008 OK Labs
+ * Copyright (c) 2011 NICTA Pty Ltd
+ * Originally written by Hans Jiang
+ * Updated by Peter Chubb
+ * Updated by Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef IMX_GPT_H
+#define IMX_GPT_H
+
+#include "hw/sysbus.h"
+#include "hw/ptimer.h"
+#include "hw/misc/imx_ccm.h"
+#include "qom/object.h"
+
+/*
+ * GPT : General purpose timer
+ *
+ * This timer counts up continuously while it is enabled, resetting itself
+ * to 0 when it reaches GPT_TIMER_MAX (in freerun mode) or when it
+ * reaches the value of one of the ocrX (in periodic mode).
+ */
+
+#define GPT_TIMER_MAX  0XFFFFFFFFUL
+
+/* Control register.  Not all of these bits have any effect (yet) */
+#define GPT_CR_EN     (1 << 0)  /* GPT Enable */
+#define GPT_CR_ENMOD  (1 << 1)  /* GPT Enable Mode */
+#define GPT_CR_DBGEN  (1 << 2)  /* GPT Debug mode enable */
+#define GPT_CR_WAITEN (1 << 3)  /* GPT Wait Mode Enable  */
+#define GPT_CR_DOZEN  (1 << 4)  /* GPT Doze mode enable */
+#define GPT_CR_STOPEN (1 << 5)  /* GPT Stop Mode Enable */
+#define GPT_CR_CLKSRC_SHIFT (6)
+#define GPT_CR_CLKSRC_MASK  (0x7)
+
+#define GPT_CR_FRR    (1 << 9)  /* Freerun or Restart */
+#define GPT_CR_SWR    (1 << 15) /* Software Reset */
+#define GPT_CR_IM1    (3 << 16) /* Input capture channel 1 mode (2 bits) */
+#define GPT_CR_IM2    (3 << 18) /* Input capture channel 2 mode (2 bits) */
+#define GPT_CR_OM1    (7 << 20) /* Output Compare Channel 1 Mode (3 bits) */
+#define GPT_CR_OM2    (7 << 23) /* Output Compare Channel 2 Mode (3 bits) */
+#define GPT_CR_OM3    (7 << 26) /* Output Compare Channel 3 Mode (3 bits) */
+#define GPT_CR_FO1    (1 << 29) /* Force Output Compare Channel 1 */
+#define GPT_CR_FO2    (1 << 30) /* Force Output Compare Channel 2 */
+#define GPT_CR_FO3    (1 << 31) /* Force Output Compare Channel 3 */
+
+#define GPT_SR_OF1  (1 << 0)
+#define GPT_SR_OF2  (1 << 1)
+#define GPT_SR_OF3  (1 << 2)
+#define GPT_SR_ROV  (1 << 5)
+
+#define GPT_IR_OF1IE  (1 << 0)
+#define GPT_IR_OF2IE  (1 << 1)
+#define GPT_IR_OF3IE  (1 << 2)
+#define GPT_IR_ROVIE  (1 << 5)
+
+#define TYPE_IMX25_GPT "imx25.gpt"
+#define TYPE_IMX31_GPT "imx31.gpt"
+#define TYPE_IMX6_GPT "imx6.gpt"
+#define TYPE_IMX6UL_GPT "imx6ul.gpt"
+#define TYPE_IMX7_GPT "imx7.gpt"
+
+#define TYPE_IMX_GPT TYPE_IMX25_GPT
+
+typedef struct IMXGPTState IMXGPTState;
+DECLARE_INSTANCE_CHECKER(IMXGPTState, IMX_GPT,
+                         TYPE_IMX_GPT)
+
+struct IMXGPTState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    ptimer_state *timer;
+    MemoryRegion  iomem;
+    IMXCCMState  *ccm;
+
+    uint32_t cr;
+    uint32_t pr;
+    uint32_t sr;
+    uint32_t ir;
+    uint32_t ocr1;
+    uint32_t ocr2;
+    uint32_t ocr3;
+    uint32_t icr1;
+    uint32_t icr2;
+    uint32_t cnt;
+
+    uint32_t next_timeout;
+    uint32_t next_int;
+
+    uint32_t freq;
+
+    qemu_irq irq;
+
+    const IMXClk *clocks;
+};
+
+#endif /* IMX_GPT_H */
diff --git a/include/hw/timer/mips_gictimer.h b/include/hw/timer/mips_gictimer.h
new file mode 100644 (file)
index 0000000..c7ca6c8
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2016 Imagination Technologies
+ *
+ */
+
+#ifndef MIPS_GICTIMER_H
+#define MIPS_GICTIMER_H
+
+typedef struct MIPSGICTimerVPState MIPSGICTimerVPState;
+typedef struct MIPSGICTimerState MIPSGICTimerState;
+
+typedef void MIPSGICTimerCB(void *opaque, uint32_t vp_index);
+
+struct MIPSGICTimerVPState {
+    QEMUTimer *qtimer;
+    uint32_t vp_index;
+    uint32_t comparelo;
+    MIPSGICTimerState *gictimer;
+};
+
+struct MIPSGICTimerState {
+    void *opaque;
+    uint8_t countstop;
+    uint32_t sh_counterlo;
+    int32_t num_vps;
+    MIPSGICTimerVPState *vptimers;
+    MIPSGICTimerCB *cb;
+};
+
+uint32_t mips_gictimer_get_freq(MIPSGICTimerState *gic);
+uint32_t mips_gictimer_get_sh_count(MIPSGICTimerState *gic);
+void mips_gictimer_store_sh_count(MIPSGICTimerState *gic, uint64_t count);
+uint32_t mips_gictimer_get_vp_compare(MIPSGICTimerState *gictimer,
+                                      uint32_t vp_index);
+void mips_gictimer_store_vp_compare(MIPSGICTimerState *gic, uint32_t vp_index,
+                                    uint64_t compare);
+uint8_t mips_gictimer_get_countstop(MIPSGICTimerState *gic);
+void mips_gictimer_start_count(MIPSGICTimerState *gic);
+void mips_gictimer_stop_count(MIPSGICTimerState *gic);
+MIPSGICTimerState *mips_gictimer_init(void *opaque, uint32_t nvps,
+                                      MIPSGICTimerCB *cb);
+
+#endif /* MIPS_GICTIMER_H */
diff --git a/include/hw/timer/mss-timer.h b/include/hw/timer/mss-timer.h
new file mode 100644 (file)
index 0000000..da38512
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Microsemi SmartFusion2 Timer.
+ *
+ * Copyright (c) 2017 Subbaraya Sundeep <sundeep.lkml@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_MSS_TIMER_H
+#define HW_MSS_TIMER_H
+
+#include "hw/sysbus.h"
+#include "hw/ptimer.h"
+#include "qom/object.h"
+
+#define TYPE_MSS_TIMER     "mss-timer"
+OBJECT_DECLARE_SIMPLE_TYPE(MSSTimerState, MSS_TIMER)
+
+/*
+ * There are two 32-bit down counting timers.
+ * Timers 1 and 2 can be concatenated into a single 64-bit Timer
+ * that operates either in Periodic mode or in One-shot mode.
+ * Writing 1 to the TIM64_MODE register bit 0 sets the Timers in 64-bit mode.
+ * In 64-bit mode, writing to the 32-bit registers has no effect.
+ * Similarly, in 32-bit mode, writing to the 64-bit mode registers
+ * has no effect. Only two 32-bit timers are supported currently.
+ */
+#define NUM_TIMERS        2
+
+#define R_TIM1_MAX        6
+
+struct Msf2Timer {
+    ptimer_state *ptimer;
+
+    uint32_t regs[R_TIM1_MAX];
+    qemu_irq irq;
+};
+
+struct MSSTimerState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion mmio;
+    uint32_t freq_hz;
+    struct Msf2Timer timers[NUM_TIMERS];
+};
+
+#endif /* HW_MSS_TIMER_H */
diff --git a/include/hw/timer/npcm7xx_timer.h b/include/hw/timer/npcm7xx_timer.h
new file mode 100644 (file)
index 0000000..d45c051
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Nuvoton NPCM7xx Timer Controller
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef NPCM7XX_TIMER_H
+#define NPCM7XX_TIMER_H
+
+#include "exec/memory.h"
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+
+/* Each Timer Module (TIM) instance holds five 25 MHz timers. */
+#define NPCM7XX_TIMERS_PER_CTRL (5)
+
+/*
+ * Number of registers in our device state structure. Don't change this without
+ * incrementing the version_id in the vmstate.
+ */
+#define NPCM7XX_TIMER_NR_REGS (0x54 / sizeof(uint32_t))
+
+/* The basic watchdog timer period is 2^14 clock cycles. */
+#define NPCM7XX_WATCHDOG_BASETIME_SHIFT 14
+
+#define NPCM7XX_WATCHDOG_RESET_GPIO_OUT "npcm7xx-clk-watchdog-reset-gpio-out"
+
+typedef struct NPCM7xxTimerCtrlState NPCM7xxTimerCtrlState;
+
+/**
+ * struct NPCM7xxBaseTimer - Basic functionality that both regular timer and
+ * watchdog timer use.
+ * @qtimer: QEMU timer that notifies us on expiration.
+ * @expires_ns: Absolute virtual expiration time.
+ * @remaining_ns: Remaining time until expiration if timer is paused.
+ */
+typedef struct NPCM7xxBaseTimer {
+    QEMUTimer   qtimer;
+    int64_t     expires_ns;
+    int64_t     remaining_ns;
+} NPCM7xxBaseTimer;
+
+/**
+ * struct NPCM7xxTimer - Individual timer state.
+ * @ctrl: The timer module that owns this timer.
+ * @irq: GIC interrupt line to fire on expiration (if enabled).
+ * @base_timer: The basic timer functionality for this timer.
+ * @tcsr: The Timer Control and Status Register.
+ * @ticr: The Timer Initial Count Register.
+ */
+typedef struct NPCM7xxTimer {
+    NPCM7xxTimerCtrlState *ctrl;
+
+    qemu_irq    irq;
+    NPCM7xxBaseTimer base_timer;
+
+    uint32_t    tcsr;
+    uint32_t    ticr;
+} NPCM7xxTimer;
+
+/**
+ * struct NPCM7xxWatchdogTimer - The watchdog timer state.
+ * @ctrl: The timer module that owns this timer.
+ * @irq: GIC interrupt line to fire on expiration (if enabled).
+ * @reset_signal: The GPIO used to send a reset signal.
+ * @base_timer: The basic timer functionality for this timer.
+ * @wtcr: The Watchdog Timer Control Register.
+ */
+typedef struct NPCM7xxWatchdogTimer {
+    NPCM7xxTimerCtrlState *ctrl;
+
+    qemu_irq            irq;
+    qemu_irq            reset_signal;
+    NPCM7xxBaseTimer base_timer;
+
+    uint32_t            wtcr;
+} NPCM7xxWatchdogTimer;
+
+/**
+ * struct NPCM7xxTimerCtrlState - Timer Module device state.
+ * @parent: System bus device.
+ * @iomem: Memory region through which registers are accessed.
+ * @index: The index of this timer module.
+ * @tisr: The Timer Interrupt Status Register.
+ * @timer: The five individual timers managed by this module.
+ * @watchdog_timer: The watchdog timer managed by this module.
+ */
+struct NPCM7xxTimerCtrlState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+
+    uint32_t    tisr;
+
+    Clock       *clock;
+    NPCM7xxTimer timer[NPCM7XX_TIMERS_PER_CTRL];
+    NPCM7xxWatchdogTimer watchdog_timer;
+};
+
+#define TYPE_NPCM7XX_TIMER "npcm7xx-timer"
+#define NPCM7XX_TIMER(obj)                                              \
+    OBJECT_CHECK(NPCM7xxTimerCtrlState, (obj), TYPE_NPCM7XX_TIMER)
+
+#endif /* NPCM7XX_TIMER_H */
diff --git a/include/hw/timer/nrf51_timer.h b/include/hw/timer/nrf51_timer.h
new file mode 100644 (file)
index 0000000..76827c1
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * nRF51 System-on-Chip Timer peripheral
+ *
+ * QEMU interface:
+ * + sysbus MMIO regions 0: GPIO registers
+ * + sysbus irq
+ *
+ * Copyright 2018 Steffen Görtz <contrib@steffen-goertz.de>
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+#ifndef NRF51_TIMER_H
+#define NRF51_TIMER_H
+
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+#define TYPE_NRF51_TIMER "nrf51_soc.timer"
+OBJECT_DECLARE_SIMPLE_TYPE(NRF51TimerState, NRF51_TIMER)
+
+#define NRF51_TIMER_REG_COUNT 4
+
+#define NRF51_TIMER_TASK_START 0x000
+#define NRF51_TIMER_TASK_STOP 0x004
+#define NRF51_TIMER_TASK_COUNT 0x008
+#define NRF51_TIMER_TASK_CLEAR 0x00C
+#define NRF51_TIMER_TASK_SHUTDOWN 0x010
+#define NRF51_TIMER_TASK_CAPTURE_0 0x040
+#define NRF51_TIMER_TASK_CAPTURE_3 0x04C
+
+#define NRF51_TIMER_EVENT_COMPARE_0 0x140
+#define NRF51_TIMER_EVENT_COMPARE_1 0x144
+#define NRF51_TIMER_EVENT_COMPARE_2 0x148
+#define NRF51_TIMER_EVENT_COMPARE_3 0x14C
+
+#define NRF51_TIMER_REG_SHORTS 0x200
+#define NRF51_TIMER_REG_SHORTS_MASK 0xf0f
+#define NRF51_TIMER_REG_INTENSET 0x304
+#define NRF51_TIMER_REG_INTENCLR 0x308
+#define NRF51_TIMER_REG_INTEN_MASK 0xf0000
+#define NRF51_TIMER_REG_MODE 0x504
+#define NRF51_TIMER_REG_MODE_MASK 0x01
+#define NRF51_TIMER_TIMER 0
+#define NRF51_TIMER_COUNTER 1
+#define NRF51_TIMER_REG_BITMODE 0x508
+#define NRF51_TIMER_REG_BITMODE_MASK 0x03
+#define NRF51_TIMER_WIDTH_16 0
+#define NRF51_TIMER_WIDTH_8 1
+#define NRF51_TIMER_WIDTH_24 2
+#define NRF51_TIMER_WIDTH_32 3
+#define NRF51_TIMER_REG_PRESCALER 0x510
+#define NRF51_TIMER_REG_PRESCALER_MASK 0x0F
+#define NRF51_TIMER_REG_CC0 0x540
+#define NRF51_TIMER_REG_CC3 0x54C
+
+struct NRF51TimerState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    uint8_t id;
+    QEMUTimer timer;
+    int64_t timer_start_ns;
+    int64_t update_counter_ns;
+    uint32_t counter;
+
+    bool running;
+
+    uint8_t events_compare[NRF51_TIMER_REG_COUNT];
+    uint32_t cc[NRF51_TIMER_REG_COUNT];
+    uint32_t shorts;
+    uint32_t inten;
+    uint32_t mode;
+    uint32_t bitmode;
+    uint32_t prescaler;
+
+};
+
+
+#endif
diff --git a/include/hw/timer/renesas_cmt.h b/include/hw/timer/renesas_cmt.h
new file mode 100644 (file)
index 0000000..1c0b65c
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Renesas Compare-match timer Object
+ *
+ * Copyright (c) 2019 Yoshinori Sato
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_TIMER_RENESAS_CMT_H
+#define HW_TIMER_RENESAS_CMT_H
+
+#include "qemu/timer.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_RENESAS_CMT "renesas-cmt"
+typedef struct RCMTState RCMTState;
+DECLARE_INSTANCE_CHECKER(RCMTState, RCMT,
+                         TYPE_RENESAS_CMT)
+
+enum {
+    CMT_CH = 2,
+    CMT_NR_IRQ = 1 * CMT_CH
+};
+
+struct RCMTState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint64_t input_freq;
+    MemoryRegion memory;
+
+    uint16_t cmstr;
+    uint16_t cmcr[CMT_CH];
+    uint16_t cmcnt[CMT_CH];
+    uint16_t cmcor[CMT_CH];
+    int64_t tick[CMT_CH];
+    qemu_irq cmi[CMT_CH];
+    QEMUTimer timer[CMT_CH];
+};
+
+#endif
diff --git a/include/hw/timer/renesas_tmr.h b/include/hw/timer/renesas_tmr.h
new file mode 100644 (file)
index 0000000..caf7eec
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Renesas 8bit timer Object
+ *
+ * Copyright (c) 2018 Yoshinori Sato
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_TIMER_RENESAS_TMR_H
+#define HW_TIMER_RENESAS_TMR_H
+
+#include "qemu/timer.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_RENESAS_TMR "renesas-tmr"
+typedef struct RTMRState RTMRState;
+DECLARE_INSTANCE_CHECKER(RTMRState, RTMR,
+                         TYPE_RENESAS_TMR)
+
+enum timer_event {
+    cmia = 0,
+    cmib = 1,
+    ovi = 2,
+    none = 3,
+    TMR_NR_EVENTS = 4
+};
+
+enum {
+    TMR_CH = 2,
+    TMR_NR_IRQ = 3 * TMR_CH
+};
+
+struct RTMRState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint64_t input_freq;
+    MemoryRegion memory;
+
+    int64_t tick;
+    uint8_t tcnt[TMR_CH];
+    uint8_t tcora[TMR_CH];
+    uint8_t tcorb[TMR_CH];
+    uint8_t tcr[TMR_CH];
+    uint8_t tccr[TMR_CH];
+    uint8_t tcor[TMR_CH];
+    uint8_t tcsr[TMR_CH];
+    int64_t div_round[TMR_CH];
+    uint8_t next[TMR_CH];
+    qemu_irq cmia[TMR_CH];
+    qemu_irq cmib[TMR_CH];
+    qemu_irq ovi[TMR_CH];
+    QEMUTimer timer[TMR_CH];
+};
+
+#endif
diff --git a/include/hw/timer/sifive_pwm.h b/include/hw/timer/sifive_pwm.h
new file mode 100644 (file)
index 0000000..6a8cf7b
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * SiFive PWM
+ *
+ * Copyright (c) 2020 Western Digital
+ *
+ * Author:  Alistair Francis <alistair.francis@wdc.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_SIFIVE_PWM_H
+#define HW_SIFIVE_PWM_H
+
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+
+#define TYPE_SIFIVE_PWM "sifive-pwm"
+
+#define SIFIVE_PWM(obj) \
+    OBJECT_CHECK(SiFivePwmState, (obj), TYPE_SIFIVE_PWM)
+
+#define SIFIVE_PWM_CHANS          4
+#define SIFIVE_PWM_IRQS           SIFIVE_PWM_CHANS
+
+typedef struct SiFivePwmState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+    QEMUTimer timer[SIFIVE_PWM_CHANS];
+    /*
+     * if en bit(s) set, is the number of ticks when pwmcount was 0
+     * if en bit(s) not set, is the number of ticks in pwmcount
+     */
+    uint64_t tick_offset;
+    uint64_t freq_hz;
+
+    uint32_t pwmcfg;
+    uint32_t pwmcmp[SIFIVE_PWM_CHANS];
+
+    qemu_irq irqs[SIFIVE_PWM_IRQS];
+} SiFivePwmState;
+
+#endif /* HW_SIFIVE_PWM_H */
diff --git a/include/hw/timer/sse-counter.h b/include/hw/timer/sse-counter.h
new file mode 100644 (file)
index 0000000..b433e58
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Arm SSE Subsystem System Counter
+ *
+ * Copyright (c) 2020 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "System counter" which is documented in
+ * the Arm SSE-123 Example Subsystem Technical Reference Manual:
+ * https://developer.arm.com/documentation/101370/latest/
+ *
+ * QEMU interface:
+ *  + Clock input "CLK": clock
+ *  + sysbus MMIO region 0: the control register frame
+ *  + sysbus MMIO region 1: the status register frame
+ *
+ * Consumers of the system counter's timestamp, such as the SSE
+ * System Timer device, can also use the APIs sse_counter_for_timestamp(),
+ * sse_counter_tick_to_time() and sse_counter_register_consumer() to
+ * interact with an instance of the System Counter. Generally the
+ * consumer device should have a QOM link property which the board
+ * code can set to the appropriate instance of the system counter.
+ */
+
+#ifndef SSE_COUNTER_H
+#define SSE_COUNTER_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+#include "qemu/notify.h"
+
+#define TYPE_SSE_COUNTER "sse-counter"
+OBJECT_DECLARE_SIMPLE_TYPE(SSECounter, SSE_COUNTER)
+
+struct SSECounter {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion control_mr;
+    MemoryRegion status_mr;
+    Clock *clk;
+    NotifierList notifier_list;
+
+    uint32_t cntcr;
+    uint32_t cntscr0;
+
+    /*
+     * These are used for handling clock frequency changes: they are a
+     * tuple of (QEMU_CLOCK_VIRTUAL timestamp, CNTCV at that time),
+     * taken when the clock frequency changes. sse_cntcv() needs them
+     * to calculate the current CNTCV.
+     */
+    uint64_t ns_then;
+    uint64_t ticks_then;
+};
+
+/*
+ * These functions are the interface by which a consumer of
+ * the system timestamp (such as the SSE system timer device)
+ * can communicate with the SSECounter.
+ */
+
+/**
+ * sse_counter_for_timestamp:
+ * @counter: SSECounter
+ * @ns: timestamp of QEMU_CLOCK_VIRTUAL in nanoseconds
+ *
+ * Returns the value of the timestamp counter at the specified
+ * point in time (assuming that no changes to scale factor, enable, etc
+ * happen in the meantime).
+ */
+uint64_t sse_counter_for_timestamp(SSECounter *counter, uint64_t ns);
+
+/**
+ * sse_counter_tick_to_time:
+ * @counter: SSECounter
+ * @tick: tick value
+ *
+ * Returns the time (a QEMU_CLOCK_VIRTUAL timestamp in nanoseconds)
+ * when the timestamp counter will reach the specified tick count.
+ * If the counter is not currently running, returns UINT64_MAX.
+ */
+uint64_t sse_counter_tick_to_time(SSECounter *counter, uint64_t tick);
+
+/**
+ * sse_counter_register_consumer:
+ * @counter: SSECounter
+ * @notifier: Notifier which is notified on counter changes
+ *
+ * Registers @notifier with the SSECounter. When the counter's
+ * configuration changes in a way that might invalidate information
+ * previously returned via sse_counter_for_timestamp() or
+ * sse_counter_tick_to_time(), the notifier will be called.
+ * Devices which consume the timestamp counter can use this as
+ * a cue to recalculate timer events.
+ */
+void sse_counter_register_consumer(SSECounter *counter, Notifier *notifier);
+
+#endif
diff --git a/include/hw/timer/sse-timer.h b/include/hw/timer/sse-timer.h
new file mode 100644 (file)
index 0000000..265ad32
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Arm SSE Subsystem System Timer
+ *
+ * Copyright (c) 2020 Linaro Limited
+ * Written by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 or
+ * (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "System timer" which is documented in
+ * the Arm SSE-123 Example Subsystem Technical Reference Manual:
+ * https://developer.arm.com/documentation/101370/latest/
+ *
+ * QEMU interface:
+ *  + QOM property "counter": link property to be set to the
+ *    TYPE_SSE_COUNTER timestamp counter device this timer runs off
+ *  + sysbus MMIO region 0: the register bank
+ *  + sysbus IRQ 0: timer interrupt
+ */
+
+#ifndef SSE_TIMER_H
+#define SSE_TIMER_H
+
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+#include "hw/timer/sse-counter.h"
+
+#define TYPE_SSE_TIMER "sse-timer"
+OBJECT_DECLARE_SIMPLE_TYPE(SSETimer, SSE_TIMER)
+
+struct SSETimer {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    qemu_irq irq;
+    SSECounter *counter;
+    QEMUTimer timer;
+    Notifier counter_notifier;
+
+    uint32_t cntfrq;
+    uint32_t cntp_ctl;
+    uint64_t cntp_cval;
+    uint64_t cntp_aival;
+    uint32_t cntp_aival_ctl;
+    uint32_t cntp_aival_reload;
+};
+
+#endif
diff --git a/include/hw/timer/stellaris-gptm.h b/include/hw/timer/stellaris-gptm.h
new file mode 100644 (file)
index 0000000..fde1fc6
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Luminary Micro Stellaris General Purpose Timer Module
+ *
+ * Copyright (c) 2006 CodeSourcery.
+ * Written by Paul Brook
+ *
+ * This code is licensed under the GPL.
+ */
+
+#ifndef HW_TIMER_STELLARIS_GPTM_H
+#define HW_TIMER_STELLARIS_GPTM_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+#include "hw/irq.h"
+#include "hw/clock.h"
+
+#define TYPE_STELLARIS_GPTM "stellaris-gptm"
+OBJECT_DECLARE_SIMPLE_TYPE(gptm_state, STELLARIS_GPTM)
+
+/*
+ * QEMU interface:
+ *  + sysbus MMIO region 0: register bank
+ *  + sysbus IRQ 0: timer interrupt
+ *  + unnamed GPIO output 0: trigger output for the ADC
+ *  + Clock input "clk": the 32-bit countdown timer runs at this speed
+ */
+struct gptm_state {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint32_t config;
+    uint32_t mode[2];
+    uint32_t control;
+    uint32_t state;
+    uint32_t mask;
+    uint32_t load[2];
+    uint32_t match[2];
+    uint32_t prescale[2];
+    uint32_t match_prescale[2];
+    uint32_t rtc;
+    int64_t tick[2];
+    struct gptm_state *opaque[2];
+    QEMUTimer *timer[2];
+    /* The timers have an alternate output used to trigger the ADC.  */
+    qemu_irq trigger;
+    qemu_irq irq;
+    Clock *clk;
+};
+
+#endif
diff --git a/include/hw/timer/stm32f2xx_timer.h b/include/hw/timer/stm32f2xx_timer.h
new file mode 100644 (file)
index 0000000..90f40f1
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * STM32F2XX Timer
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_STM32F2XX_TIMER_H
+#define HW_STM32F2XX_TIMER_H
+
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+
+#define TIM_CR1      0x00
+#define TIM_CR2      0x04
+#define TIM_SMCR     0x08
+#define TIM_DIER     0x0C
+#define TIM_SR       0x10
+#define TIM_EGR      0x14
+#define TIM_CCMR1    0x18
+#define TIM_CCMR2    0x1C
+#define TIM_CCER     0x20
+#define TIM_CNT      0x24
+#define TIM_PSC      0x28
+#define TIM_ARR      0x2C
+#define TIM_CCR1     0x34
+#define TIM_CCR2     0x38
+#define TIM_CCR3     0x3C
+#define TIM_CCR4     0x40
+#define TIM_DCR      0x48
+#define TIM_DMAR     0x4C
+#define TIM_OR       0x50
+
+#define TIM_CR1_CEN   1
+
+#define TIM_EGR_UG 1
+
+#define TIM_CCER_CC2E   (1 << 4)
+#define TIM_CCMR1_OC2M2 (1 << 14)
+#define TIM_CCMR1_OC2M1 (1 << 13)
+#define TIM_CCMR1_OC2M0 (1 << 12)
+#define TIM_CCMR1_OC2PE (1 << 11)
+
+#define TIM_DIER_UIE  1
+
+#define TYPE_STM32F2XX_TIMER "stm32f2xx-timer"
+typedef struct STM32F2XXTimerState STM32F2XXTimerState;
+DECLARE_INSTANCE_CHECKER(STM32F2XXTimerState, STM32F2XXTIMER,
+                         TYPE_STM32F2XX_TIMER)
+
+struct STM32F2XXTimerState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+    QEMUTimer *timer;
+    qemu_irq irq;
+
+    int64_t tick_offset;
+    uint64_t hit_time;
+    uint64_t freq_hz;
+
+    uint32_t tim_cr1;
+    uint32_t tim_cr2;
+    uint32_t tim_smcr;
+    uint32_t tim_dier;
+    uint32_t tim_sr;
+    uint32_t tim_egr;
+    uint32_t tim_ccmr1;
+    uint32_t tim_ccmr2;
+    uint32_t tim_ccer;
+    uint32_t tim_psc;
+    uint32_t tim_arr;
+    uint32_t tim_ccr1;
+    uint32_t tim_ccr2;
+    uint32_t tim_ccr3;
+    uint32_t tim_ccr4;
+    uint32_t tim_dcr;
+    uint32_t tim_dmar;
+    uint32_t tim_or;
+};
+
+#endif /* HW_STM32F2XX_TIMER_H */
diff --git a/include/hw/timer/tmu012.h b/include/hw/timer/tmu012.h
new file mode 100644 (file)
index 0000000..808ed8d
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * SuperH Timer
+ *
+ * Copyright (c) 2007 Magnus Damm
+ *
+ * This code is licensed under the GPL.
+ */
+
+#ifndef HW_TIMER_TMU012_H
+#define HW_TIMER_TMU012_H
+
+#include "exec/hwaddr.h"
+
+#define TMU012_FEAT_TOCR   (1 << 0)
+#define TMU012_FEAT_3CHAN  (1 << 1)
+#define TMU012_FEAT_EXTCLK (1 << 2)
+
+void tmu012_init(MemoryRegion *sysmem, hwaddr base,
+                 int feat, uint32_t freq,
+                 qemu_irq ch0_irq, qemu_irq ch1_irq,
+                 qemu_irq ch2_irq0, qemu_irq ch2_irq1);
+
+#endif
diff --git a/include/hw/tricore/tc27x_soc.h b/include/hw/tricore/tc27x_soc.h
new file mode 100644 (file)
index 0000000..dd3a748
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * Infineon tc27x SoC System emulation.
+ *
+ * Copyright (c) 2020 Andreas Konopik <andreas.konopik@efs-auto.de>
+ * Copyright (c) 2020 David Brenken <david.brenken@efs-auto.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TC27X_SOC_H
+#define TC27X_SOC_H
+
+#include "hw/sysbus.h"
+#include "target/tricore/cpu.h"
+#include "qom/object.h"
+
+#define TYPE_TC27X_SOC ("tc27x-soc")
+OBJECT_DECLARE_TYPE(TC27XSoCState, TC27XSoCClass, TC27X_SOC)
+
+typedef struct TC27XSoCCPUMemState {
+
+    MemoryRegion dspr;
+    MemoryRegion pspr;
+
+    MemoryRegion dcache;
+    MemoryRegion dtag;
+    MemoryRegion pcache;
+    MemoryRegion ptag;
+
+} TC27XSoCCPUMemState;
+
+typedef struct TC27XSoCFlashMemState {
+
+    MemoryRegion pflash0_c;
+    MemoryRegion pflash1_c;
+    MemoryRegion pflash0_u;
+    MemoryRegion pflash1_u;
+    MemoryRegion dflash0;
+    MemoryRegion dflash1;
+    MemoryRegion olda_c;
+    MemoryRegion olda_u;
+    MemoryRegion brom_c;
+    MemoryRegion brom_u;
+    MemoryRegion lmuram_c;
+    MemoryRegion lmuram_u;
+    MemoryRegion emem_c;
+    MemoryRegion emem_u;
+
+} TC27XSoCFlashMemState;
+
+typedef struct TC27XSoCState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    TriCoreCPU cpu;
+
+    MemoryRegion dsprX;
+    MemoryRegion psprX;
+
+    TC27XSoCCPUMemState cpu0mem;
+    TC27XSoCCPUMemState cpu1mem;
+    TC27XSoCCPUMemState cpu2mem;
+
+    TC27XSoCFlashMemState flashmem;
+
+} TC27XSoCState;
+
+typedef struct MemmapEntry {
+    hwaddr base;
+    hwaddr size;
+} MemmapEntry;
+
+typedef struct TC27XSoCClass {
+    DeviceClass parent_class;
+
+    const char *name;
+    const char *cpu_type;
+    const MemmapEntry *memmap;
+    uint32_t num_cpus;
+} TC27XSoCClass;
+
+enum {
+    TC27XD_DSPR2,
+    TC27XD_DCACHE2,
+    TC27XD_DTAG2,
+    TC27XD_PSPR2,
+    TC27XD_PCACHE2,
+    TC27XD_PTAG2,
+    TC27XD_DSPR1,
+    TC27XD_DCACHE1,
+    TC27XD_DTAG1,
+    TC27XD_PSPR1,
+    TC27XD_PCACHE1,
+    TC27XD_PTAG1,
+    TC27XD_DSPR0,
+    TC27XD_PSPR0,
+    TC27XD_PCACHE0,
+    TC27XD_PTAG0,
+    TC27XD_PFLASH0_C,
+    TC27XD_PFLASH1_C,
+    TC27XD_OLDA_C,
+    TC27XD_BROM_C,
+    TC27XD_LMURAM_C,
+    TC27XD_EMEM_C,
+    TC27XD_PFLASH0_U,
+    TC27XD_PFLASH1_U,
+    TC27XD_DFLASH0,
+    TC27XD_DFLASH1,
+    TC27XD_OLDA_U,
+    TC27XD_BROM_U,
+    TC27XD_LMURAM_U,
+    TC27XD_EMEM_U,
+    TC27XD_PSPRX,
+    TC27XD_DSPRX,
+};
+
+#endif
diff --git a/include/hw/tricore/triboard.h b/include/hw/tricore/triboard.h
new file mode 100644 (file)
index 0000000..4fdd2d7
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Infineon TriBoard System emulation.
+ *
+ * Copyright (c) 2020 Andreas Konopik <andreas.konopik@efs-auto.de>
+ * Copyright (c) 2020 David Brenken <david.brenken@efs-auto.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qapi/error.h"
+#include "hw/boards.h"
+#include "sysemu/sysemu.h"
+#include "exec/address-spaces.h"
+#include "qom/object.h"
+
+#include "hw/tricore/tc27x_soc.h"
+
+#define TYPE_TRIBOARD_MACHINE MACHINE_TYPE_NAME("triboard")
+typedef struct TriBoardMachineState TriBoardMachineState;
+typedef struct TriBoardMachineClass TriBoardMachineClass;
+DECLARE_OBJ_CHECKERS(TriBoardMachineState, TriBoardMachineClass,
+                     TRIBOARD_MACHINE, TYPE_TRIBOARD_MACHINE)
+
+
+struct TriBoardMachineState {
+    MachineState parent;
+
+    TC27XSoCState tc27x_soc;
+};
+
+struct TriBoardMachineClass {
+    MachineClass parent_obj;
+
+    const char *name;
+    const char *desc;
+    const char *soc_name;
+};
diff --git a/include/hw/tricore/tricore.h b/include/hw/tricore/tricore.h
new file mode 100644 (file)
index 0000000..c19ed3f
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef HW_TRICORE_H
+#define HW_TRICORE_H
+
+#include "exec/memory.h"
+
+struct tricore_boot_info {
+    uint64_t ram_size;
+    const char *kernel_filename;
+};
+#endif
diff --git a/include/hw/tricore/tricore_testdevice.h b/include/hw/tricore/tricore_testdevice.h
new file mode 100644 (file)
index 0000000..8b4fe15
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ *  Copyright (c) 2018-2021  Bastian Koppelmann Paderborn University
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_TRICORE_TESTDEVICE_H
+#define HW_TRICORE_TESTDEVICE_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_TRICORE_TESTDEVICE "tricore_testdevice"
+#define TRICORE_TESTDEVICE(obj) \
+    OBJECT_CHECK(TriCoreTestDeviceState, (obj), TYPE_TRICORE_TESTDEVICE)
+
+typedef struct {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+
+} TriCoreTestDeviceState;
+
+#endif
index a70a72e9177a9ac8a927c64c7e66135b35b5e15b..32c23a5ca2a0559328800bc117137d25b3ed8e8e 100644 (file)
 //#define USB_STATE_POWERED     2
 #define USB_STATE_DEFAULT     3
 //#define USB_STATE_ADDRESS     4
-//#define      USB_STATE_CONFIGURED  5
+//#define       USB_STATE_CONFIGURED  5
 #define USB_STATE_SUSPENDED   6
 
-#define USB_CLASS_AUDIO                        1
-#define USB_CLASS_COMM                 2
-#define USB_CLASS_HID                  3
-#define USB_CLASS_PHYSICAL             5
-#define USB_CLASS_STILL_IMAGE          6
-#define USB_CLASS_PRINTER              7
-#define USB_CLASS_MASS_STORAGE         8
-#define USB_CLASS_HUB                  9
-#define USB_CLASS_CDC_DATA             0x0a
-#define USB_CLASS_CSCID                        0x0b
-#define USB_CLASS_CONTENT_SEC          0x0d
-#define USB_CLASS_APP_SPEC             0xfe
-#define USB_CLASS_VENDOR_SPEC          0xff
+#define USB_CLASS_AUDIO                 1
+#define USB_CLASS_COMM                  2
+#define USB_CLASS_HID                   3
+#define USB_CLASS_PHYSICAL              5
+#define USB_CLASS_STILL_IMAGE           6
+#define USB_CLASS_PRINTER               7
+#define USB_CLASS_MASS_STORAGE          8
+#define USB_CLASS_HUB                   9
+#define USB_CLASS_CDC_DATA              0x0a
+#define USB_CLASS_CSCID                 0x0b
+#define USB_CLASS_CONTENT_SEC           0x0d
+#define USB_CLASS_APP_SPEC              0xfe
+#define USB_CLASS_VENDOR_SPEC           0xff
 
 #define USB_SUBCLASS_UNDEFINED          0
 #define USB_SUBCLASS_AUDIO_CONTROL      1
 #define USB_SUBCLASS_AUDIO_STREAMING    2
 #define USB_SUBCLASS_AUDIO_MIDISTREAMING 3
 
-#define USB_DIR_OUT                    0
-#define USB_DIR_IN                     0x80
+#define USB_DIR_OUT                     0
+#define USB_DIR_IN                      0x80
 
-#define USB_TYPE_MASK                  (0x03 << 5)
-#define USB_TYPE_STANDARD              (0x00 << 5)
-#define USB_TYPE_CLASS                 (0x01 << 5)
-#define USB_TYPE_VENDOR                        (0x02 << 5)
-#define USB_TYPE_RESERVED              (0x03 << 5)
+#define USB_TYPE_MASK                   (0x03 << 5)
+#define USB_TYPE_STANDARD               (0x00 << 5)
+#define USB_TYPE_CLASS                  (0x01 << 5)
+#define USB_TYPE_VENDOR                 (0x02 << 5)
+#define USB_TYPE_RESERVED               (0x03 << 5)
 
-#define USB_RECIP_MASK                 0x1f
-#define USB_RECIP_DEVICE               0x00
-#define USB_RECIP_INTERFACE            0x01
-#define USB_RECIP_ENDPOINT             0x02
-#define USB_RECIP_OTHER                        0x03
+#define USB_RECIP_MASK                  0x1f
+#define USB_RECIP_DEVICE                0x00
+#define USB_RECIP_INTERFACE             0x01
+#define USB_RECIP_ENDPOINT              0x02
+#define USB_RECIP_OTHER                 0x03
 
 #define DeviceRequest ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8)
 #define DeviceOutRequest ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8)
 #define EndpointOutRequest \
         ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8)
 
-#define USB_REQ_GET_STATUS             0x00
-#define USB_REQ_CLEAR_FEATURE          0x01
-#define USB_REQ_SET_FEATURE            0x03
-#define USB_REQ_SET_ADDRESS            0x05
-#define USB_REQ_GET_DESCRIPTOR         0x06
-#define USB_REQ_SET_DESCRIPTOR         0x07
-#define USB_REQ_GET_CONFIGURATION      0x08
-#define USB_REQ_SET_CONFIGURATION      0x09
-#define USB_REQ_GET_INTERFACE          0x0A
-#define USB_REQ_SET_INTERFACE          0x0B
-#define USB_REQ_SYNCH_FRAME            0x0C
+#define USB_REQ_GET_STATUS              0x00
+#define USB_REQ_CLEAR_FEATURE           0x01
+#define USB_REQ_SET_FEATURE             0x03
+#define USB_REQ_SET_ADDRESS             0x05
+#define USB_REQ_GET_DESCRIPTOR          0x06
+#define USB_REQ_SET_DESCRIPTOR          0x07
+#define USB_REQ_GET_CONFIGURATION       0x08
+#define USB_REQ_SET_CONFIGURATION       0x09
+#define USB_REQ_GET_INTERFACE           0x0A
+#define USB_REQ_SET_INTERFACE           0x0B
+#define USB_REQ_SYNCH_FRAME             0x0C
 #define USB_REQ_SET_SEL                 0x30
 #define USB_REQ_SET_ISOCH_DELAY         0x31
 
-#define USB_DEVICE_SELF_POWERED                0
-#define USB_DEVICE_REMOTE_WAKEUP       1
+#define USB_DEVICE_SELF_POWERED         0
+#define USB_DEVICE_REMOTE_WAKEUP        1
 
-#define USB_DT_DEVICE                  0x01
-#define USB_DT_CONFIG                  0x02
-#define USB_DT_STRING                  0x03
-#define USB_DT_INTERFACE               0x04
-#define USB_DT_ENDPOINT                        0x05
+#define USB_DT_DEVICE                   0x01
+#define USB_DT_CONFIG                   0x02
+#define USB_DT_STRING                   0x03
+#define USB_DT_INTERFACE                0x04
+#define USB_DT_ENDPOINT                 0x05
 #define USB_DT_DEVICE_QUALIFIER         0x06
 #define USB_DT_OTHER_SPEED_CONFIG       0x07
 #define USB_DT_DEBUG                    0x0A
 #define USB_CFG_ATT_WAKEUP           (1 << 5)
 #define USB_CFG_ATT_BATTERY          (1 << 4)
 
-#define USB_ENDPOINT_XFER_CONTROL      0
-#define USB_ENDPOINT_XFER_ISOC         1
-#define USB_ENDPOINT_XFER_BULK         2
-#define USB_ENDPOINT_XFER_INT          3
+#define USB_ENDPOINT_XFER_CONTROL       0
+#define USB_ENDPOINT_XFER_ISOC          1
+#define USB_ENDPOINT_XFER_BULK          2
+#define USB_ENDPOINT_XFER_INT           3
 #define USB_ENDPOINT_XFER_INVALID     255
 
 #define USB_INTERFACE_INVALID         255
@@ -216,10 +216,10 @@ struct USBEndpoint {
 };
 
 enum USBDeviceFlags {
-    USB_DEV_FLAG_FULL_PATH,
     USB_DEV_FLAG_IS_HOST,
     USB_DEV_FLAG_MSOS_DESC_ENABLE,
     USB_DEV_FLAG_MSOS_DESC_IN_USE,
+    USB_DEV_FLAG_IS_SCSI_STORAGE,
 };
 
 /* definition of a USB device */
@@ -231,6 +231,9 @@ struct USBDevice {
     void *opaque;
     uint32_t flags;
 
+    char *pcap_filename;
+    FILE *pcap;
+
     /* Actual connected speed */
     int speed;
     /* Supported speeds, not in info because it may be variable (hostdevs) */
@@ -463,7 +466,6 @@ void usb_generic_async_ctrl_complete(USBDevice *s, USBPacket *p);
 
 /* usb-linux.c */
 void hmp_info_usbhost(Monitor *mon, const QDict *qdict);
-bool usb_host_dev_is_scsi_storage(USBDevice *usbdev);
 
 /* usb ports of the VM */
 
@@ -497,7 +499,7 @@ void usb_bus_new(USBBus *bus, size_t bus_size,
 void usb_bus_release(USBBus *bus);
 USBBus *usb_bus_find(int busnr);
 void usb_legacy_register(const char *typename, const char *usbdevice_name,
-                         USBDevice *(*usbdevice_init)(const char *params));
+                         USBDevice *(*usbdevice_init)(void));
 USBDevice *usb_new(const char *name);
 bool usb_realize_and_unref(USBDevice *dev, USBBus *bus, Error **errp);
 USBDevice *usb_create_simple(USBBus *bus, const char *name);
@@ -559,15 +561,25 @@ const char *usb_device_get_product_desc(USBDevice *dev);
 
 const USBDesc *usb_device_get_usb_desc(USBDevice *dev);
 
+static inline bool usb_device_is_scsi_storage(USBDevice *dev)
+{
+    return dev->flags & (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
+}
+
 /* quirks.c */
 
 /* In bulk endpoints are streaming data sources (iow behave like isoc eps) */
-#define USB_QUIRK_BUFFER_BULK_IN       0x01
+#define USB_QUIRK_BUFFER_BULK_IN        0x01
 /* Bulk pkts in FTDI format, need special handling when combining packets */
-#define USB_QUIRK_IS_FTDI              0x02
+#define USB_QUIRK_IS_FTDI               0x02
 
 int usb_get_quirks(uint16_t vendor_id, uint16_t product_id,
                    uint8_t interface_class, uint8_t interface_subclass,
                    uint8_t interface_protocol);
 
+/* pcap.c */
+void usb_pcap_init(FILE *fp);
+void usb_pcap_ctrl(USBPacket *p, bool setup);
+void usb_pcap_data(USBPacket *p, bool setup);
+
 #endif
diff --git a/include/hw/usb/chipidea.h b/include/hw/usb/chipidea.h
new file mode 100644 (file)
index 0000000..fe4113e
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef CHIPIDEA_H
+#define CHIPIDEA_H
+
+#include "hw/usb/hcd-ehci.h"
+#include "qom/object.h"
+
+struct ChipideaState {
+    /*< private >*/
+    EHCISysBusState parent_obj;
+
+    MemoryRegion iomem[3];
+};
+
+#define TYPE_CHIPIDEA "usb-chipidea"
+OBJECT_DECLARE_SIMPLE_TYPE(ChipideaState, CHIPIDEA)
+
+#endif /* CHIPIDEA_H */
diff --git a/include/hw/usb/dwc2-regs.h b/include/hw/usb/dwc2-regs.h
new file mode 100644 (file)
index 0000000..0bf3f2a
--- /dev/null
@@ -0,0 +1,899 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Imported from the Linux kernel file drivers/usb/dwc2/hw.h, commit
+ * a89bae709b3492b478480a2c9734e7e9393b279c ("usb: dwc2: Move
+ * UTMI_PHY_DATA defines closer")
+ *
+ * hw.h - DesignWare HS OTG Controller hardware definitions
+ *
+ * Copyright 2004-2013 Synopsys, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The names of the above-listed copyright holders may not be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DWC2_REGS_H
+#define DWC2_REGS_H
+
+#define HSOTG_REG(x)    (x)
+
+#define GOTGCTL                         HSOTG_REG(0x000)
+#define GOTGCTL_CHIRPEN                 BIT(27)
+#define GOTGCTL_MULT_VALID_BC_MASK      (0x1f << 22)
+#define GOTGCTL_MULT_VALID_BC_SHIFT     22
+#define GOTGCTL_OTGVER                  BIT(20)
+#define GOTGCTL_BSESVLD                 BIT(19)
+#define GOTGCTL_ASESVLD                 BIT(18)
+#define GOTGCTL_DBNC_SHORT              BIT(17)
+#define GOTGCTL_CONID_B                 BIT(16)
+#define GOTGCTL_DBNCE_FLTR_BYPASS       BIT(15)
+#define GOTGCTL_DEVHNPEN                BIT(11)
+#define GOTGCTL_HSTSETHNPEN             BIT(10)
+#define GOTGCTL_HNPREQ                  BIT(9)
+#define GOTGCTL_HSTNEGSCS               BIT(8)
+#define GOTGCTL_SESREQ                  BIT(1)
+#define GOTGCTL_SESREQSCS               BIT(0)
+
+#define GOTGINT                         HSOTG_REG(0x004)
+#define GOTGINT_DBNCE_DONE              BIT(19)
+#define GOTGINT_A_DEV_TOUT_CHG          BIT(18)
+#define GOTGINT_HST_NEG_DET             BIT(17)
+#define GOTGINT_HST_NEG_SUC_STS_CHNG    BIT(9)
+#define GOTGINT_SES_REQ_SUC_STS_CHNG    BIT(8)
+#define GOTGINT_SES_END_DET             BIT(2)
+
+#define GAHBCFG                         HSOTG_REG(0x008)
+#define GAHBCFG_AHB_SINGLE              BIT(23)
+#define GAHBCFG_NOTI_ALL_DMA_WRIT       BIT(22)
+#define GAHBCFG_REM_MEM_SUPP            BIT(21)
+#define GAHBCFG_P_TXF_EMP_LVL           BIT(8)
+#define GAHBCFG_NP_TXF_EMP_LVL          BIT(7)
+#define GAHBCFG_DMA_EN                  BIT(5)
+#define GAHBCFG_HBSTLEN_MASK            (0xf << 1)
+#define GAHBCFG_HBSTLEN_SHIFT           1
+#define GAHBCFG_HBSTLEN_SINGLE          0
+#define GAHBCFG_HBSTLEN_INCR            1
+#define GAHBCFG_HBSTLEN_INCR4           3
+#define GAHBCFG_HBSTLEN_INCR8           5
+#define GAHBCFG_HBSTLEN_INCR16          7
+#define GAHBCFG_GLBL_INTR_EN            BIT(0)
+#define GAHBCFG_CTRL_MASK               (GAHBCFG_P_TXF_EMP_LVL | \
+                                         GAHBCFG_NP_TXF_EMP_LVL | \
+                                         GAHBCFG_DMA_EN | \
+                                         GAHBCFG_GLBL_INTR_EN)
+
+#define GUSBCFG                         HSOTG_REG(0x00C)
+#define GUSBCFG_FORCEDEVMODE            BIT(30)
+#define GUSBCFG_FORCEHOSTMODE           BIT(29)
+#define GUSBCFG_TXENDDELAY              BIT(28)
+#define GUSBCFG_ICTRAFFICPULLREMOVE     BIT(27)
+#define GUSBCFG_ICUSBCAP                BIT(26)
+#define GUSBCFG_ULPI_INT_PROT_DIS       BIT(25)
+#define GUSBCFG_INDICATORPASSTHROUGH    BIT(24)
+#define GUSBCFG_INDICATORCOMPLEMENT     BIT(23)
+#define GUSBCFG_TERMSELDLPULSE          BIT(22)
+#define GUSBCFG_ULPI_INT_VBUS_IND       BIT(21)
+#define GUSBCFG_ULPI_EXT_VBUS_DRV       BIT(20)
+#define GUSBCFG_ULPI_CLK_SUSP_M         BIT(19)
+#define GUSBCFG_ULPI_AUTO_RES           BIT(18)
+#define GUSBCFG_ULPI_FS_LS              BIT(17)
+#define GUSBCFG_OTG_UTMI_FS_SEL         BIT(16)
+#define GUSBCFG_PHY_LP_CLK_SEL          BIT(15)
+#define GUSBCFG_USBTRDTIM_MASK          (0xf << 10)
+#define GUSBCFG_USBTRDTIM_SHIFT         10
+#define GUSBCFG_HNPCAP                  BIT(9)
+#define GUSBCFG_SRPCAP                  BIT(8)
+#define GUSBCFG_DDRSEL                  BIT(7)
+#define GUSBCFG_PHYSEL                  BIT(6)
+#define GUSBCFG_FSINTF                  BIT(5)
+#define GUSBCFG_ULPI_UTMI_SEL           BIT(4)
+#define GUSBCFG_PHYIF16                 BIT(3)
+#define GUSBCFG_PHYIF8                  (0 << 3)
+#define GUSBCFG_TOUTCAL_MASK            (0x7 << 0)
+#define GUSBCFG_TOUTCAL_SHIFT           0
+#define GUSBCFG_TOUTCAL_LIMIT           0x7
+#define GUSBCFG_TOUTCAL(_x)             ((_x) << 0)
+
+#define GRSTCTL                         HSOTG_REG(0x010)
+#define GRSTCTL_AHBIDLE                 BIT(31)
+#define GRSTCTL_DMAREQ                  BIT(30)
+#define GRSTCTL_TXFNUM_MASK             (0x1f << 6)
+#define GRSTCTL_TXFNUM_SHIFT            6
+#define GRSTCTL_TXFNUM_LIMIT            0x1f
+#define GRSTCTL_TXFNUM(_x)              ((_x) << 6)
+#define GRSTCTL_TXFFLSH                 BIT(5)
+#define GRSTCTL_RXFFLSH                 BIT(4)
+#define GRSTCTL_IN_TKNQ_FLSH            BIT(3)
+#define GRSTCTL_FRMCNTRRST              BIT(2)
+#define GRSTCTL_HSFTRST                 BIT(1)
+#define GRSTCTL_CSFTRST                 BIT(0)
+
+#define GINTSTS                         HSOTG_REG(0x014)
+#define GINTMSK                         HSOTG_REG(0x018)
+#define GINTSTS_WKUPINT                 BIT(31)
+#define GINTSTS_SESSREQINT              BIT(30)
+#define GINTSTS_DISCONNINT              BIT(29)
+#define GINTSTS_CONIDSTSCHNG            BIT(28)
+#define GINTSTS_LPMTRANRCVD             BIT(27)
+#define GINTSTS_PTXFEMP                 BIT(26)
+#define GINTSTS_HCHINT                  BIT(25)
+#define GINTSTS_PRTINT                  BIT(24)
+#define GINTSTS_RESETDET                BIT(23)
+#define GINTSTS_FET_SUSP                BIT(22)
+#define GINTSTS_INCOMPL_IP              BIT(21)
+#define GINTSTS_INCOMPL_SOOUT           BIT(21)
+#define GINTSTS_INCOMPL_SOIN            BIT(20)
+#define GINTSTS_OEPINT                  BIT(19)
+#define GINTSTS_IEPINT                  BIT(18)
+#define GINTSTS_EPMIS                   BIT(17)
+#define GINTSTS_RESTOREDONE             BIT(16)
+#define GINTSTS_EOPF                    BIT(15)
+#define GINTSTS_ISOUTDROP               BIT(14)
+#define GINTSTS_ENUMDONE                BIT(13)
+#define GINTSTS_USBRST                  BIT(12)
+#define GINTSTS_USBSUSP                 BIT(11)
+#define GINTSTS_ERLYSUSP                BIT(10)
+#define GINTSTS_I2CINT                  BIT(9)
+#define GINTSTS_ULPI_CK_INT             BIT(8)
+#define GINTSTS_GOUTNAKEFF              BIT(7)
+#define GINTSTS_GINNAKEFF               BIT(6)
+#define GINTSTS_NPTXFEMP                BIT(5)
+#define GINTSTS_RXFLVL                  BIT(4)
+#define GINTSTS_SOF                     BIT(3)
+#define GINTSTS_OTGINT                  BIT(2)
+#define GINTSTS_MODEMIS                 BIT(1)
+#define GINTSTS_CURMODE_HOST            BIT(0)
+
+#define GRXSTSR                         HSOTG_REG(0x01C)
+#define GRXSTSP                         HSOTG_REG(0x020)
+#define GRXSTS_FN_MASK                  (0x7f << 25)
+#define GRXSTS_FN_SHIFT                 25
+#define GRXSTS_PKTSTS_MASK              (0xf << 17)
+#define GRXSTS_PKTSTS_SHIFT             17
+#define GRXSTS_PKTSTS_GLOBALOUTNAK      1
+#define GRXSTS_PKTSTS_OUTRX             2
+#define GRXSTS_PKTSTS_HCHIN             2
+#define GRXSTS_PKTSTS_OUTDONE           3
+#define GRXSTS_PKTSTS_HCHIN_XFER_COMP   3
+#define GRXSTS_PKTSTS_SETUPDONE         4
+#define GRXSTS_PKTSTS_DATATOGGLEERR     5
+#define GRXSTS_PKTSTS_SETUPRX           6
+#define GRXSTS_PKTSTS_HCHHALTED         7
+#define GRXSTS_HCHNUM_MASK              (0xf << 0)
+#define GRXSTS_HCHNUM_SHIFT             0
+#define GRXSTS_DPID_MASK                (0x3 << 15)
+#define GRXSTS_DPID_SHIFT               15
+#define GRXSTS_BYTECNT_MASK             (0x7ff << 4)
+#define GRXSTS_BYTECNT_SHIFT            4
+#define GRXSTS_EPNUM_MASK               (0xf << 0)
+#define GRXSTS_EPNUM_SHIFT              0
+
+#define GRXFSIZ                         HSOTG_REG(0x024)
+#define GRXFSIZ_DEPTH_MASK              (0xffff << 0)
+#define GRXFSIZ_DEPTH_SHIFT             0
+
+#define GNPTXFSIZ                       HSOTG_REG(0x028)
+/* Use FIFOSIZE_* constants to access this register */
+
+#define GNPTXSTS                        HSOTG_REG(0x02C)
+#define GNPTXSTS_NP_TXQ_TOP_MASK                (0x7f << 24)
+#define GNPTXSTS_NP_TXQ_TOP_SHIFT               24
+#define GNPTXSTS_NP_TXQ_SPC_AVAIL_MASK          (0xff << 16)
+#define GNPTXSTS_NP_TXQ_SPC_AVAIL_SHIFT         16
+#define GNPTXSTS_NP_TXQ_SPC_AVAIL_GET(_v)       (((_v) >> 16) & 0xff)
+#define GNPTXSTS_NP_TXF_SPC_AVAIL_MASK          (0xffff << 0)
+#define GNPTXSTS_NP_TXF_SPC_AVAIL_SHIFT         0
+#define GNPTXSTS_NP_TXF_SPC_AVAIL_GET(_v)       (((_v) >> 0) & 0xffff)
+
+#define GI2CCTL                         HSOTG_REG(0x0030)
+#define GI2CCTL_BSYDNE                  BIT(31)
+#define GI2CCTL_RW                      BIT(30)
+#define GI2CCTL_I2CDATSE0               BIT(28)
+#define GI2CCTL_I2CDEVADDR_MASK         (0x3 << 26)
+#define GI2CCTL_I2CDEVADDR_SHIFT        26
+#define GI2CCTL_I2CSUSPCTL              BIT(25)
+#define GI2CCTL_ACK                     BIT(24)
+#define GI2CCTL_I2CEN                   BIT(23)
+#define GI2CCTL_ADDR_MASK               (0x7f << 16)
+#define GI2CCTL_ADDR_SHIFT              16
+#define GI2CCTL_REGADDR_MASK            (0xff << 8)
+#define GI2CCTL_REGADDR_SHIFT           8
+#define GI2CCTL_RWDATA_MASK             (0xff << 0)
+#define GI2CCTL_RWDATA_SHIFT            0
+
+#define GPVNDCTL                        HSOTG_REG(0x0034)
+#define GGPIO                           HSOTG_REG(0x0038)
+#define GGPIO_STM32_OTG_GCCFG_PWRDWN    BIT(16)
+
+#define GUID                            HSOTG_REG(0x003c)
+#define GSNPSID                         HSOTG_REG(0x0040)
+#define GHWCFG1                         HSOTG_REG(0x0044)
+#define GSNPSID_ID_MASK                 GENMASK(31, 16)
+
+#define GHWCFG2                         HSOTG_REG(0x0048)
+#define GHWCFG2_OTG_ENABLE_IC_USB               BIT(31)
+#define GHWCFG2_DEV_TOKEN_Q_DEPTH_MASK          (0x1f << 26)
+#define GHWCFG2_DEV_TOKEN_Q_DEPTH_SHIFT         26
+#define GHWCFG2_HOST_PERIO_TX_Q_DEPTH_MASK      (0x3 << 24)
+#define GHWCFG2_HOST_PERIO_TX_Q_DEPTH_SHIFT     24
+#define GHWCFG2_NONPERIO_TX_Q_DEPTH_MASK        (0x3 << 22)
+#define GHWCFG2_NONPERIO_TX_Q_DEPTH_SHIFT       22
+#define GHWCFG2_MULTI_PROC_INT                  BIT(20)
+#define GHWCFG2_DYNAMIC_FIFO                    BIT(19)
+#define GHWCFG2_PERIO_EP_SUPPORTED              BIT(18)
+#define GHWCFG2_NUM_HOST_CHAN_MASK              (0xf << 14)
+#define GHWCFG2_NUM_HOST_CHAN_SHIFT             14
+#define GHWCFG2_NUM_DEV_EP_MASK                 (0xf << 10)
+#define GHWCFG2_NUM_DEV_EP_SHIFT                10
+#define GHWCFG2_FS_PHY_TYPE_MASK                (0x3 << 8)
+#define GHWCFG2_FS_PHY_TYPE_SHIFT               8
+#define GHWCFG2_FS_PHY_TYPE_NOT_SUPPORTED       0
+#define GHWCFG2_FS_PHY_TYPE_DEDICATED           1
+#define GHWCFG2_FS_PHY_TYPE_SHARED_UTMI         2
+#define GHWCFG2_FS_PHY_TYPE_SHARED_ULPI         3
+#define GHWCFG2_HS_PHY_TYPE_MASK                (0x3 << 6)
+#define GHWCFG2_HS_PHY_TYPE_SHIFT               6
+#define GHWCFG2_HS_PHY_TYPE_NOT_SUPPORTED       0
+#define GHWCFG2_HS_PHY_TYPE_UTMI                1
+#define GHWCFG2_HS_PHY_TYPE_ULPI                2
+#define GHWCFG2_HS_PHY_TYPE_UTMI_ULPI           3
+#define GHWCFG2_POINT2POINT                     BIT(5)
+#define GHWCFG2_ARCHITECTURE_MASK               (0x3 << 3)
+#define GHWCFG2_ARCHITECTURE_SHIFT              3
+#define GHWCFG2_SLAVE_ONLY_ARCH                 0
+#define GHWCFG2_EXT_DMA_ARCH                    1
+#define GHWCFG2_INT_DMA_ARCH                    2
+#define GHWCFG2_OP_MODE_MASK                    (0x7 << 0)
+#define GHWCFG2_OP_MODE_SHIFT                   0
+#define GHWCFG2_OP_MODE_HNP_SRP_CAPABLE         0
+#define GHWCFG2_OP_MODE_SRP_ONLY_CAPABLE        1
+#define GHWCFG2_OP_MODE_NO_HNP_SRP_CAPABLE      2
+#define GHWCFG2_OP_MODE_SRP_CAPABLE_DEVICE      3
+#define GHWCFG2_OP_MODE_NO_SRP_CAPABLE_DEVICE   4
+#define GHWCFG2_OP_MODE_SRP_CAPABLE_HOST        5
+#define GHWCFG2_OP_MODE_NO_SRP_CAPABLE_HOST     6
+#define GHWCFG2_OP_MODE_UNDEFINED               7
+
+#define GHWCFG3                         HSOTG_REG(0x004c)
+#define GHWCFG3_DFIFO_DEPTH_MASK                (0xffff << 16)
+#define GHWCFG3_DFIFO_DEPTH_SHIFT               16
+#define GHWCFG3_OTG_LPM_EN                      BIT(15)
+#define GHWCFG3_BC_SUPPORT                      BIT(14)
+#define GHWCFG3_OTG_ENABLE_HSIC                 BIT(13)
+#define GHWCFG3_ADP_SUPP                        BIT(12)
+#define GHWCFG3_SYNCH_RESET_TYPE                BIT(11)
+#define GHWCFG3_OPTIONAL_FEATURES               BIT(10)
+#define GHWCFG3_VENDOR_CTRL_IF                  BIT(9)
+#define GHWCFG3_I2C                             BIT(8)
+#define GHWCFG3_OTG_FUNC                        BIT(7)
+#define GHWCFG3_PACKET_SIZE_CNTR_WIDTH_MASK     (0x7 << 4)
+#define GHWCFG3_PACKET_SIZE_CNTR_WIDTH_SHIFT    4
+#define GHWCFG3_XFER_SIZE_CNTR_WIDTH_MASK       (0xf << 0)
+#define GHWCFG3_XFER_SIZE_CNTR_WIDTH_SHIFT      0
+
+#define GHWCFG4                         HSOTG_REG(0x0050)
+#define GHWCFG4_DESC_DMA_DYN                    BIT(31)
+#define GHWCFG4_DESC_DMA                        BIT(30)
+#define GHWCFG4_NUM_IN_EPS_MASK                 (0xf << 26)
+#define GHWCFG4_NUM_IN_EPS_SHIFT                26
+#define GHWCFG4_DED_FIFO_EN                     BIT(25)
+#define GHWCFG4_DED_FIFO_SHIFT          25
+#define GHWCFG4_SESSION_END_FILT_EN             BIT(24)
+#define GHWCFG4_B_VALID_FILT_EN                 BIT(23)
+#define GHWCFG4_A_VALID_FILT_EN                 BIT(22)
+#define GHWCFG4_VBUS_VALID_FILT_EN              BIT(21)
+#define GHWCFG4_IDDIG_FILT_EN                   BIT(20)
+#define GHWCFG4_NUM_DEV_MODE_CTRL_EP_MASK       (0xf << 16)
+#define GHWCFG4_NUM_DEV_MODE_CTRL_EP_SHIFT      16
+#define GHWCFG4_UTMI_PHY_DATA_WIDTH_MASK        (0x3 << 14)
+#define GHWCFG4_UTMI_PHY_DATA_WIDTH_SHIFT       14
+#define GHWCFG4_UTMI_PHY_DATA_WIDTH_8           0
+#define GHWCFG4_UTMI_PHY_DATA_WIDTH_16          1
+#define GHWCFG4_UTMI_PHY_DATA_WIDTH_8_OR_16     2
+#define GHWCFG4_ACG_SUPPORTED                   BIT(12)
+#define GHWCFG4_IPG_ISOC_SUPPORTED              BIT(11)
+#define GHWCFG4_SERVICE_INTERVAL_SUPPORTED      BIT(10)
+#define GHWCFG4_XHIBER                          BIT(7)
+#define GHWCFG4_HIBER                           BIT(6)
+#define GHWCFG4_MIN_AHB_FREQ                    BIT(5)
+#define GHWCFG4_POWER_OPTIMIZ                   BIT(4)
+#define GHWCFG4_NUM_DEV_PERIO_IN_EP_MASK        (0xf << 0)
+#define GHWCFG4_NUM_DEV_PERIO_IN_EP_SHIFT       0
+
+#define GLPMCFG                         HSOTG_REG(0x0054)
+#define GLPMCFG_INVSELHSIC              BIT(31)
+#define GLPMCFG_HSICCON                 BIT(30)
+#define GLPMCFG_RSTRSLPSTS              BIT(29)
+#define GLPMCFG_ENBESL                  BIT(28)
+#define GLPMCFG_LPM_RETRYCNT_STS_MASK   (0x7 << 25)
+#define GLPMCFG_LPM_RETRYCNT_STS_SHIFT  25
+#define GLPMCFG_SNDLPM                  BIT(24)
+#define GLPMCFG_RETRY_CNT_MASK          (0x7 << 21)
+#define GLPMCFG_RETRY_CNT_SHIFT         21
+#define GLPMCFG_LPM_REJECT_CTRL_CONTROL BIT(21)
+#define GLPMCFG_LPM_ACCEPT_CTRL_ISOC    BIT(22)
+#define GLPMCFG_LPM_CHNL_INDX_MASK      (0xf << 17)
+#define GLPMCFG_LPM_CHNL_INDX_SHIFT     17
+#define GLPMCFG_L1RESUMEOK              BIT(16)
+#define GLPMCFG_SLPSTS                  BIT(15)
+#define GLPMCFG_COREL1RES_MASK          (0x3 << 13)
+#define GLPMCFG_COREL1RES_SHIFT         13
+#define GLPMCFG_HIRD_THRES_MASK         (0x1f << 8)
+#define GLPMCFG_HIRD_THRES_SHIFT        8
+#define GLPMCFG_HIRD_THRES_EN           (0x10 << 8)
+#define GLPMCFG_ENBLSLPM                BIT(7)
+#define GLPMCFG_BREMOTEWAKE             BIT(6)
+#define GLPMCFG_HIRD_MASK               (0xf << 2)
+#define GLPMCFG_HIRD_SHIFT              2
+#define GLPMCFG_APPL1RES                BIT(1)
+#define GLPMCFG_LPMCAP                  BIT(0)
+
+#define GPWRDN                          HSOTG_REG(0x0058)
+#define GPWRDN_MULT_VAL_ID_BC_MASK      (0x1f << 24)
+#define GPWRDN_MULT_VAL_ID_BC_SHIFT     24
+#define GPWRDN_ADP_INT                  BIT(23)
+#define GPWRDN_BSESSVLD                 BIT(22)
+#define GPWRDN_IDSTS                    BIT(21)
+#define GPWRDN_LINESTATE_MASK           (0x3 << 19)
+#define GPWRDN_LINESTATE_SHIFT          19
+#define GPWRDN_STS_CHGINT_MSK           BIT(18)
+#define GPWRDN_STS_CHGINT               BIT(17)
+#define GPWRDN_SRP_DET_MSK              BIT(16)
+#define GPWRDN_SRP_DET                  BIT(15)
+#define GPWRDN_CONNECT_DET_MSK          BIT(14)
+#define GPWRDN_CONNECT_DET              BIT(13)
+#define GPWRDN_DISCONN_DET_MSK          BIT(12)
+#define GPWRDN_DISCONN_DET              BIT(11)
+#define GPWRDN_RST_DET_MSK              BIT(10)
+#define GPWRDN_RST_DET                  BIT(9)
+#define GPWRDN_LNSTSCHG_MSK             BIT(8)
+#define GPWRDN_LNSTSCHG                 BIT(7)
+#define GPWRDN_DIS_VBUS                 BIT(6)
+#define GPWRDN_PWRDNSWTCH               BIT(5)
+#define GPWRDN_PWRDNRSTN                BIT(4)
+#define GPWRDN_PWRDNCLMP                BIT(3)
+#define GPWRDN_RESTORE                  BIT(2)
+#define GPWRDN_PMUACTV                  BIT(1)
+#define GPWRDN_PMUINTSEL                BIT(0)
+
+#define GDFIFOCFG                       HSOTG_REG(0x005c)
+#define GDFIFOCFG_EPINFOBASE_MASK       (0xffff << 16)
+#define GDFIFOCFG_EPINFOBASE_SHIFT      16
+#define GDFIFOCFG_GDFIFOCFG_MASK        (0xffff << 0)
+#define GDFIFOCFG_GDFIFOCFG_SHIFT       0
+
+#define ADPCTL                          HSOTG_REG(0x0060)
+#define ADPCTL_AR_MASK                  (0x3 << 27)
+#define ADPCTL_AR_SHIFT                 27
+#define ADPCTL_ADP_TMOUT_INT_MSK        BIT(26)
+#define ADPCTL_ADP_SNS_INT_MSK          BIT(25)
+#define ADPCTL_ADP_PRB_INT_MSK          BIT(24)
+#define ADPCTL_ADP_TMOUT_INT            BIT(23)
+#define ADPCTL_ADP_SNS_INT              BIT(22)
+#define ADPCTL_ADP_PRB_INT              BIT(21)
+#define ADPCTL_ADPENA                   BIT(20)
+#define ADPCTL_ADPRES                   BIT(19)
+#define ADPCTL_ENASNS                   BIT(18)
+#define ADPCTL_ENAPRB                   BIT(17)
+#define ADPCTL_RTIM_MASK                (0x7ff << 6)
+#define ADPCTL_RTIM_SHIFT               6
+#define ADPCTL_PRB_PER_MASK             (0x3 << 4)
+#define ADPCTL_PRB_PER_SHIFT            4
+#define ADPCTL_PRB_DELTA_MASK           (0x3 << 2)
+#define ADPCTL_PRB_DELTA_SHIFT          2
+#define ADPCTL_PRB_DSCHRG_MASK          (0x3 << 0)
+#define ADPCTL_PRB_DSCHRG_SHIFT         0
+
+#define GREFCLK                             HSOTG_REG(0x0064)
+#define GREFCLK_REFCLKPER_MASK              (0x1ffff << 15)
+#define GREFCLK_REFCLKPER_SHIFT             15
+#define GREFCLK_REF_CLK_MODE                BIT(14)
+#define GREFCLK_SOF_CNT_WKUP_ALERT_MASK     (0x3ff)
+#define GREFCLK_SOF_CNT_WKUP_ALERT_SHIFT    0
+
+#define GINTMSK2                        HSOTG_REG(0x0068)
+#define GINTMSK2_WKUP_ALERT_INT_MSK     BIT(0)
+
+#define GINTSTS2                        HSOTG_REG(0x006c)
+#define GINTSTS2_WKUP_ALERT_INT         BIT(0)
+
+#define HPTXFSIZ                        HSOTG_REG(0x100)
+/* Use FIFOSIZE_* constants to access this register */
+
+#define DPTXFSIZN(_a)                   HSOTG_REG(0x104 + (((_a) - 1) * 4))
+/* Use FIFOSIZE_* constants to access this register */
+
+/* These apply to the GNPTXFSIZ, HPTXFSIZ and DPTXFSIZN registers */
+#define FIFOSIZE_DEPTH_MASK             (0xffff << 16)
+#define FIFOSIZE_DEPTH_SHIFT            16
+#define FIFOSIZE_STARTADDR_MASK         (0xffff << 0)
+#define FIFOSIZE_STARTADDR_SHIFT        0
+#define FIFOSIZE_DEPTH_GET(_x)          (((_x) >> 16) & 0xffff)
+
+/* Device mode registers */
+
+#define DCFG                            HSOTG_REG(0x800)
+#define DCFG_DESCDMA_EN                 BIT(23)
+#define DCFG_EPMISCNT_MASK              (0x1f << 18)
+#define DCFG_EPMISCNT_SHIFT             18
+#define DCFG_EPMISCNT_LIMIT             0x1f
+#define DCFG_EPMISCNT(_x)               ((_x) << 18)
+#define DCFG_IPG_ISOC_SUPPORDED         BIT(17)
+#define DCFG_PERFRINT_MASK              (0x3 << 11)
+#define DCFG_PERFRINT_SHIFT             11
+#define DCFG_PERFRINT_LIMIT             0x3
+#define DCFG_PERFRINT(_x)               ((_x) << 11)
+#define DCFG_DEVADDR_MASK               (0x7f << 4)
+#define DCFG_DEVADDR_SHIFT              4
+#define DCFG_DEVADDR_LIMIT              0x7f
+#define DCFG_DEVADDR(_x)                ((_x) << 4)
+#define DCFG_NZ_STS_OUT_HSHK            BIT(2)
+#define DCFG_DEVSPD_MASK                (0x3 << 0)
+#define DCFG_DEVSPD_SHIFT               0
+#define DCFG_DEVSPD_HS                  0
+#define DCFG_DEVSPD_FS                  1
+#define DCFG_DEVSPD_LS                  2
+#define DCFG_DEVSPD_FS48                3
+
+#define DCTL                            HSOTG_REG(0x804)
+#define DCTL_SERVICE_INTERVAL_SUPPORTED BIT(19)
+#define DCTL_PWRONPRGDONE               BIT(11)
+#define DCTL_CGOUTNAK                   BIT(10)
+#define DCTL_SGOUTNAK                   BIT(9)
+#define DCTL_CGNPINNAK                  BIT(8)
+#define DCTL_SGNPINNAK                  BIT(7)
+#define DCTL_TSTCTL_MASK                (0x7 << 4)
+#define DCTL_TSTCTL_SHIFT               4
+#define DCTL_GOUTNAKSTS                 BIT(3)
+#define DCTL_GNPINNAKSTS                BIT(2)
+#define DCTL_SFTDISCON                  BIT(1)
+#define DCTL_RMTWKUPSIG                 BIT(0)
+
+#define DSTS                            HSOTG_REG(0x808)
+#define DSTS_SOFFN_MASK                 (0x3fff << 8)
+#define DSTS_SOFFN_SHIFT                8
+#define DSTS_SOFFN_LIMIT                0x3fff
+#define DSTS_SOFFN(_x)                  ((_x) << 8)
+#define DSTS_ERRATICERR                 BIT(3)
+#define DSTS_ENUMSPD_MASK               (0x3 << 1)
+#define DSTS_ENUMSPD_SHIFT              1
+#define DSTS_ENUMSPD_HS                 0
+#define DSTS_ENUMSPD_FS                 1
+#define DSTS_ENUMSPD_LS                 2
+#define DSTS_ENUMSPD_FS48               3
+#define DSTS_SUSPSTS                    BIT(0)
+
+#define DIEPMSK                         HSOTG_REG(0x810)
+#define DIEPMSK_NAKMSK                  BIT(13)
+#define DIEPMSK_BNAININTRMSK            BIT(9)
+#define DIEPMSK_TXFIFOUNDRNMSK          BIT(8)
+#define DIEPMSK_TXFIFOEMPTY             BIT(7)
+#define DIEPMSK_INEPNAKEFFMSK           BIT(6)
+#define DIEPMSK_INTKNEPMISMSK           BIT(5)
+#define DIEPMSK_INTKNTXFEMPMSK          BIT(4)
+#define DIEPMSK_TIMEOUTMSK              BIT(3)
+#define DIEPMSK_AHBERRMSK               BIT(2)
+#define DIEPMSK_EPDISBLDMSK             BIT(1)
+#define DIEPMSK_XFERCOMPLMSK            BIT(0)
+
+#define DOEPMSK                         HSOTG_REG(0x814)
+#define DOEPMSK_BNAMSK                  BIT(9)
+#define DOEPMSK_BACK2BACKSETUP          BIT(6)
+#define DOEPMSK_STSPHSERCVDMSK          BIT(5)
+#define DOEPMSK_OUTTKNEPDISMSK          BIT(4)
+#define DOEPMSK_SETUPMSK                BIT(3)
+#define DOEPMSK_AHBERRMSK               BIT(2)
+#define DOEPMSK_EPDISBLDMSK             BIT(1)
+#define DOEPMSK_XFERCOMPLMSK            BIT(0)
+
+#define DAINT                           HSOTG_REG(0x818)
+#define DAINTMSK                        HSOTG_REG(0x81C)
+#define DAINT_OUTEP_SHIFT               16
+#define DAINT_OUTEP(_x)                 (1 << ((_x) + 16))
+#define DAINT_INEP(_x)                  (1 << (_x))
+
+#define DTKNQR1                         HSOTG_REG(0x820)
+#define DTKNQR2                         HSOTG_REG(0x824)
+#define DTKNQR3                         HSOTG_REG(0x830)
+#define DTKNQR4                         HSOTG_REG(0x834)
+#define DIEPEMPMSK                      HSOTG_REG(0x834)
+
+#define DVBUSDIS                        HSOTG_REG(0x828)
+#define DVBUSPULSE                      HSOTG_REG(0x82C)
+
+#define DIEPCTL0                        HSOTG_REG(0x900)
+#define DIEPCTL(_a)                     HSOTG_REG(0x900 + ((_a) * 0x20))
+
+#define DOEPCTL0                        HSOTG_REG(0xB00)
+#define DOEPCTL(_a)                     HSOTG_REG(0xB00 + ((_a) * 0x20))
+
+/* EP0 specialness:
+ * bits[29..28] - reserved (no SetD0PID, SetD1PID)
+ * bits[25..22] - should always be zero, this isn't a periodic endpoint
+ * bits[10..0]  - MPS setting different for EP0
+ */
+#define D0EPCTL_MPS_MASK                (0x3 << 0)
+#define D0EPCTL_MPS_SHIFT               0
+#define D0EPCTL_MPS_64                  0
+#define D0EPCTL_MPS_32                  1
+#define D0EPCTL_MPS_16                  2
+#define D0EPCTL_MPS_8                   3
+
+#define DXEPCTL_EPENA                   BIT(31)
+#define DXEPCTL_EPDIS                   BIT(30)
+#define DXEPCTL_SETD1PID                BIT(29)
+#define DXEPCTL_SETODDFR                BIT(29)
+#define DXEPCTL_SETD0PID                BIT(28)
+#define DXEPCTL_SETEVENFR               BIT(28)
+#define DXEPCTL_SNAK                    BIT(27)
+#define DXEPCTL_CNAK                    BIT(26)
+#define DXEPCTL_TXFNUM_MASK             (0xf << 22)
+#define DXEPCTL_TXFNUM_SHIFT            22
+#define DXEPCTL_TXFNUM_LIMIT            0xf
+#define DXEPCTL_TXFNUM(_x)              ((_x) << 22)
+#define DXEPCTL_STALL                   BIT(21)
+#define DXEPCTL_SNP                     BIT(20)
+#define DXEPCTL_EPTYPE_MASK             (0x3 << 18)
+#define DXEPCTL_EPTYPE_CONTROL          (0x0 << 18)
+#define DXEPCTL_EPTYPE_ISO              (0x1 << 18)
+#define DXEPCTL_EPTYPE_BULK             (0x2 << 18)
+#define DXEPCTL_EPTYPE_INTERRUPT        (0x3 << 18)
+
+#define DXEPCTL_NAKSTS                  BIT(17)
+#define DXEPCTL_DPID                    BIT(16)
+#define DXEPCTL_EOFRNUM                 BIT(16)
+#define DXEPCTL_USBACTEP                BIT(15)
+#define DXEPCTL_NEXTEP_MASK             (0xf << 11)
+#define DXEPCTL_NEXTEP_SHIFT            11
+#define DXEPCTL_NEXTEP_LIMIT            0xf
+#define DXEPCTL_NEXTEP(_x)              ((_x) << 11)
+#define DXEPCTL_MPS_MASK                (0x7ff << 0)
+#define DXEPCTL_MPS_SHIFT               0
+#define DXEPCTL_MPS_LIMIT               0x7ff
+#define DXEPCTL_MPS(_x)                 ((_x) << 0)
+
+#define DIEPINT(_a)                     HSOTG_REG(0x908 + ((_a) * 0x20))
+#define DOEPINT(_a)                     HSOTG_REG(0xB08 + ((_a) * 0x20))
+#define DXEPINT_SETUP_RCVD              BIT(15)
+#define DXEPINT_NYETINTRPT              BIT(14)
+#define DXEPINT_NAKINTRPT               BIT(13)
+#define DXEPINT_BBLEERRINTRPT           BIT(12)
+#define DXEPINT_PKTDRPSTS               BIT(11)
+#define DXEPINT_BNAINTR                 BIT(9)
+#define DXEPINT_TXFIFOUNDRN             BIT(8)
+#define DXEPINT_OUTPKTERR               BIT(8)
+#define DXEPINT_TXFEMP                  BIT(7)
+#define DXEPINT_INEPNAKEFF              BIT(6)
+#define DXEPINT_BACK2BACKSETUP          BIT(6)
+#define DXEPINT_INTKNEPMIS              BIT(5)
+#define DXEPINT_STSPHSERCVD             BIT(5)
+#define DXEPINT_INTKNTXFEMP             BIT(4)
+#define DXEPINT_OUTTKNEPDIS             BIT(4)
+#define DXEPINT_TIMEOUT                 BIT(3)
+#define DXEPINT_SETUP                   BIT(3)
+#define DXEPINT_AHBERR                  BIT(2)
+#define DXEPINT_EPDISBLD                BIT(1)
+#define DXEPINT_XFERCOMPL               BIT(0)
+
+#define DIEPTSIZ0                       HSOTG_REG(0x910)
+#define DIEPTSIZ0_PKTCNT_MASK           (0x3 << 19)
+#define DIEPTSIZ0_PKTCNT_SHIFT          19
+#define DIEPTSIZ0_PKTCNT_LIMIT          0x3
+#define DIEPTSIZ0_PKTCNT(_x)            ((_x) << 19)
+#define DIEPTSIZ0_XFERSIZE_MASK         (0x7f << 0)
+#define DIEPTSIZ0_XFERSIZE_SHIFT        0
+#define DIEPTSIZ0_XFERSIZE_LIMIT        0x7f
+#define DIEPTSIZ0_XFERSIZE(_x)          ((_x) << 0)
+
+#define DOEPTSIZ0                       HSOTG_REG(0xB10)
+#define DOEPTSIZ0_SUPCNT_MASK           (0x3 << 29)
+#define DOEPTSIZ0_SUPCNT_SHIFT          29
+#define DOEPTSIZ0_SUPCNT_LIMIT          0x3
+#define DOEPTSIZ0_SUPCNT(_x)            ((_x) << 29)
+#define DOEPTSIZ0_PKTCNT                BIT(19)
+#define DOEPTSIZ0_XFERSIZE_MASK         (0x7f << 0)
+#define DOEPTSIZ0_XFERSIZE_SHIFT        0
+
+#define DIEPTSIZ(_a)                    HSOTG_REG(0x910 + ((_a) * 0x20))
+#define DOEPTSIZ(_a)                    HSOTG_REG(0xB10 + ((_a) * 0x20))
+#define DXEPTSIZ_MC_MASK                (0x3 << 29)
+#define DXEPTSIZ_MC_SHIFT               29
+#define DXEPTSIZ_MC_LIMIT               0x3
+#define DXEPTSIZ_MC(_x)                 ((_x) << 29)
+#define DXEPTSIZ_PKTCNT_MASK            (0x3ff << 19)
+#define DXEPTSIZ_PKTCNT_SHIFT           19
+#define DXEPTSIZ_PKTCNT_LIMIT           0x3ff
+#define DXEPTSIZ_PKTCNT_GET(_v)         (((_v) >> 19) & 0x3ff)
+#define DXEPTSIZ_PKTCNT(_x)             ((_x) << 19)
+#define DXEPTSIZ_XFERSIZE_MASK          (0x7ffff << 0)
+#define DXEPTSIZ_XFERSIZE_SHIFT         0
+#define DXEPTSIZ_XFERSIZE_LIMIT         0x7ffff
+#define DXEPTSIZ_XFERSIZE_GET(_v)       (((_v) >> 0) & 0x7ffff)
+#define DXEPTSIZ_XFERSIZE(_x)           ((_x) << 0)
+
+#define DIEPDMA(_a)                     HSOTG_REG(0x914 + ((_a) * 0x20))
+#define DOEPDMA(_a)                     HSOTG_REG(0xB14 + ((_a) * 0x20))
+
+#define DTXFSTS(_a)                     HSOTG_REG(0x918 + ((_a) * 0x20))
+
+#define PCGCTL                          HSOTG_REG(0x0e00)
+#define PCGCTL_IF_DEV_MODE              BIT(31)
+#define PCGCTL_P2HD_PRT_SPD_MASK        (0x3 << 29)
+#define PCGCTL_P2HD_PRT_SPD_SHIFT       29
+#define PCGCTL_P2HD_DEV_ENUM_SPD_MASK   (0x3 << 27)
+#define PCGCTL_P2HD_DEV_ENUM_SPD_SHIFT  27
+#define PCGCTL_MAC_DEV_ADDR_MASK        (0x7f << 20)
+#define PCGCTL_MAC_DEV_ADDR_SHIFT       20
+#define PCGCTL_MAX_TERMSEL              BIT(19)
+#define PCGCTL_MAX_XCVRSELECT_MASK      (0x3 << 17)
+#define PCGCTL_MAX_XCVRSELECT_SHIFT     17
+#define PCGCTL_PORT_POWER               BIT(16)
+#define PCGCTL_PRT_CLK_SEL_MASK         (0x3 << 14)
+#define PCGCTL_PRT_CLK_SEL_SHIFT        14
+#define PCGCTL_ESS_REG_RESTORED         BIT(13)
+#define PCGCTL_EXTND_HIBER_SWITCH       BIT(12)
+#define PCGCTL_EXTND_HIBER_PWRCLMP      BIT(11)
+#define PCGCTL_ENBL_EXTND_HIBER         BIT(10)
+#define PCGCTL_RESTOREMODE              BIT(9)
+#define PCGCTL_RESETAFTSUSP             BIT(8)
+#define PCGCTL_DEEP_SLEEP               BIT(7)
+#define PCGCTL_PHY_IN_SLEEP             BIT(6)
+#define PCGCTL_ENBL_SLEEP_GATING        BIT(5)
+#define PCGCTL_RSTPDWNMODULE            BIT(3)
+#define PCGCTL_PWRCLMP                  BIT(2)
+#define PCGCTL_GATEHCLK                 BIT(1)
+#define PCGCTL_STOPPCLK                 BIT(0)
+
+#define PCGCCTL1                        HSOTG_REG(0xe04)
+#define PCGCCTL1_TIMER                  (0x3 << 1)
+#define PCGCCTL1_GATEEN                 BIT(0)
+
+#define EPFIFO(_a)                      HSOTG_REG(0x1000 + ((_a) * 0x1000))
+
+/* Host Mode Registers */
+
+#define HCFG                            HSOTG_REG(0x0400)
+#define HCFG_MODECHTIMEN                BIT(31)
+#define HCFG_PERSCHEDENA                BIT(26)
+#define HCFG_FRLISTEN_MASK              (0x3 << 24)
+#define HCFG_FRLISTEN_SHIFT             24
+#define HCFG_FRLISTEN_8                         (0 << 24)
+#define FRLISTEN_8_SIZE                         8
+#define HCFG_FRLISTEN_16                        BIT(24)
+#define FRLISTEN_16_SIZE                        16
+#define HCFG_FRLISTEN_32                        (2 << 24)
+#define FRLISTEN_32_SIZE                        32
+#define HCFG_FRLISTEN_64                        (3 << 24)
+#define FRLISTEN_64_SIZE                        64
+#define HCFG_DESCDMA                    BIT(23)
+#define HCFG_RESVALID_MASK              (0xff << 8)
+#define HCFG_RESVALID_SHIFT             8
+#define HCFG_ENA32KHZ                   BIT(7)
+#define HCFG_FSLSSUPP                   BIT(2)
+#define HCFG_FSLSPCLKSEL_MASK           (0x3 << 0)
+#define HCFG_FSLSPCLKSEL_SHIFT          0
+#define HCFG_FSLSPCLKSEL_30_60_MHZ      0
+#define HCFG_FSLSPCLKSEL_48_MHZ         1
+#define HCFG_FSLSPCLKSEL_6_MHZ          2
+
+#define HFIR                            HSOTG_REG(0x0404)
+#define HFIR_FRINT_MASK                 (0xffff << 0)
+#define HFIR_FRINT_SHIFT                0
+#define HFIR_RLDCTRL                    BIT(16)
+
+#define HFNUM                           HSOTG_REG(0x0408)
+#define HFNUM_FRREM_MASK                (0xffff << 16)
+#define HFNUM_FRREM_SHIFT               16
+#define HFNUM_FRNUM_MASK                (0xffff << 0)
+#define HFNUM_FRNUM_SHIFT               0
+#define HFNUM_MAX_FRNUM                 0x3fff
+
+#define HPTXSTS                         HSOTG_REG(0x0410)
+#define TXSTS_QTOP_ODD                  BIT(31)
+#define TXSTS_QTOP_CHNEP_MASK           (0xf << 27)
+#define TXSTS_QTOP_CHNEP_SHIFT          27
+#define TXSTS_QTOP_TOKEN_MASK           (0x3 << 25)
+#define TXSTS_QTOP_TOKEN_SHIFT          25
+#define TXSTS_QTOP_TERMINATE            BIT(24)
+#define TXSTS_QSPCAVAIL_MASK            (0xff << 16)
+#define TXSTS_QSPCAVAIL_SHIFT           16
+#define TXSTS_FSPCAVAIL_MASK            (0xffff << 0)
+#define TXSTS_FSPCAVAIL_SHIFT           0
+
+#define HAINT                           HSOTG_REG(0x0414)
+#define HAINTMSK                        HSOTG_REG(0x0418)
+#define HFLBADDR                        HSOTG_REG(0x041c)
+
+#define HPRT0                           HSOTG_REG(0x0440)
+#define HPRT0_SPD_MASK                  (0x3 << 17)
+#define HPRT0_SPD_SHIFT                 17
+#define HPRT0_SPD_HIGH_SPEED            0
+#define HPRT0_SPD_FULL_SPEED            1
+#define HPRT0_SPD_LOW_SPEED             2
+#define HPRT0_TSTCTL_MASK               (0xf << 13)
+#define HPRT0_TSTCTL_SHIFT              13
+#define HPRT0_PWR                       BIT(12)
+#define HPRT0_LNSTS_MASK                (0x3 << 10)
+#define HPRT0_LNSTS_SHIFT               10
+#define HPRT0_RST                       BIT(8)
+#define HPRT0_SUSP                      BIT(7)
+#define HPRT0_RES                       BIT(6)
+#define HPRT0_OVRCURRCHG                BIT(5)
+#define HPRT0_OVRCURRACT                BIT(4)
+#define HPRT0_ENACHG                    BIT(3)
+#define HPRT0_ENA                       BIT(2)
+#define HPRT0_CONNDET                   BIT(1)
+#define HPRT0_CONNSTS                   BIT(0)
+
+#define HCCHAR(_ch)                     HSOTG_REG(0x0500 + 0x20 * (_ch))
+#define HCCHAR_CHENA                    BIT(31)
+#define HCCHAR_CHDIS                    BIT(30)
+#define HCCHAR_ODDFRM                   BIT(29)
+#define HCCHAR_DEVADDR_MASK             (0x7f << 22)
+#define HCCHAR_DEVADDR_SHIFT            22
+#define HCCHAR_MULTICNT_MASK            (0x3 << 20)
+#define HCCHAR_MULTICNT_SHIFT           20
+#define HCCHAR_EPTYPE_MASK              (0x3 << 18)
+#define HCCHAR_EPTYPE_SHIFT             18
+#define HCCHAR_LSPDDEV                  BIT(17)
+#define HCCHAR_EPDIR                    BIT(15)
+#define HCCHAR_EPNUM_MASK               (0xf << 11)
+#define HCCHAR_EPNUM_SHIFT              11
+#define HCCHAR_MPS_MASK                 (0x7ff << 0)
+#define HCCHAR_MPS_SHIFT                0
+
+#define HCSPLT(_ch)                     HSOTG_REG(0x0504 + 0x20 * (_ch))
+#define HCSPLT_SPLTENA                  BIT(31)
+#define HCSPLT_COMPSPLT                 BIT(16)
+#define HCSPLT_XACTPOS_MASK             (0x3 << 14)
+#define HCSPLT_XACTPOS_SHIFT            14
+#define HCSPLT_XACTPOS_MID              0
+#define HCSPLT_XACTPOS_END              1
+#define HCSPLT_XACTPOS_BEGIN            2
+#define HCSPLT_XACTPOS_ALL              3
+#define HCSPLT_HUBADDR_MASK             (0x7f << 7)
+#define HCSPLT_HUBADDR_SHIFT            7
+#define HCSPLT_PRTADDR_MASK             (0x7f << 0)
+#define HCSPLT_PRTADDR_SHIFT            0
+
+#define HCINT(_ch)                      HSOTG_REG(0x0508 + 0x20 * (_ch))
+#define HCINTMSK(_ch)                   HSOTG_REG(0x050c + 0x20 * (_ch))
+#define HCINTMSK_RESERVED14_31          (0x3ffff << 14)
+#define HCINTMSK_FRM_LIST_ROLL          BIT(13)
+#define HCINTMSK_XCS_XACT               BIT(12)
+#define HCINTMSK_BNA                    BIT(11)
+#define HCINTMSK_DATATGLERR             BIT(10)
+#define HCINTMSK_FRMOVRUN               BIT(9)
+#define HCINTMSK_BBLERR                 BIT(8)
+#define HCINTMSK_XACTERR                BIT(7)
+#define HCINTMSK_NYET                   BIT(6)
+#define HCINTMSK_ACK                    BIT(5)
+#define HCINTMSK_NAK                    BIT(4)
+#define HCINTMSK_STALL                  BIT(3)
+#define HCINTMSK_AHBERR                 BIT(2)
+#define HCINTMSK_CHHLTD                 BIT(1)
+#define HCINTMSK_XFERCOMPL              BIT(0)
+
+#define HCTSIZ(_ch)                     HSOTG_REG(0x0510 + 0x20 * (_ch))
+#define TSIZ_DOPNG                      BIT(31)
+#define TSIZ_SC_MC_PID_MASK             (0x3 << 29)
+#define TSIZ_SC_MC_PID_SHIFT            29
+#define TSIZ_SC_MC_PID_DATA0            0
+#define TSIZ_SC_MC_PID_DATA2            1
+#define TSIZ_SC_MC_PID_DATA1            2
+#define TSIZ_SC_MC_PID_MDATA            3
+#define TSIZ_SC_MC_PID_SETUP            3
+#define TSIZ_PKTCNT_MASK                (0x3ff << 19)
+#define TSIZ_PKTCNT_SHIFT               19
+#define TSIZ_NTD_MASK                   (0xff << 8)
+#define TSIZ_NTD_SHIFT                  8
+#define TSIZ_SCHINFO_MASK               (0xff << 0)
+#define TSIZ_SCHINFO_SHIFT              0
+#define TSIZ_XFERSIZE_MASK              (0x7ffff << 0)
+#define TSIZ_XFERSIZE_SHIFT             0
+
+#define HCDMA(_ch)                      HSOTG_REG(0x0514 + 0x20 * (_ch))
+
+#define HCDMAB(_ch)                     HSOTG_REG(0x051c + 0x20 * (_ch))
+
+#define HCFIFO(_ch)                     HSOTG_REG(0x1000 + 0x1000 * (_ch))
+
+/**
+ * struct dwc2_dma_desc - DMA descriptor structure,
+ * used for both host and gadget modes
+ *
+ * @status: DMA descriptor status quadlet
+ * @buf:    DMA descriptor data buffer pointer
+ *
+ * DMA Descriptor structure contains two quadlets:
+ * Status quadlet and Data buffer pointer.
+ */
+struct dwc2_dma_desc {
+        uint32_t status;
+        uint32_t buf;
+} __packed;
+
+/* Host Mode DMA descriptor status quadlet */
+
+#define HOST_DMA_A                      BIT(31)
+#define HOST_DMA_STS_MASK               (0x3 << 28)
+#define HOST_DMA_STS_SHIFT              28
+#define HOST_DMA_STS_PKTERR             BIT(28)
+#define HOST_DMA_EOL                    BIT(26)
+#define HOST_DMA_IOC                    BIT(25)
+#define HOST_DMA_SUP                    BIT(24)
+#define HOST_DMA_ALT_QTD                BIT(23)
+#define HOST_DMA_QTD_OFFSET_MASK        (0x3f << 17)
+#define HOST_DMA_QTD_OFFSET_SHIFT       17
+#define HOST_DMA_ISOC_NBYTES_MASK       (0xfff << 0)
+#define HOST_DMA_ISOC_NBYTES_SHIFT      0
+#define HOST_DMA_NBYTES_MASK            (0x1ffff << 0)
+#define HOST_DMA_NBYTES_SHIFT           0
+#define HOST_DMA_NBYTES_LIMIT           131071
+
+/* Device Mode DMA descriptor status quadlet */
+
+#define DEV_DMA_BUFF_STS_MASK           (0x3 << 30)
+#define DEV_DMA_BUFF_STS_SHIFT          30
+#define DEV_DMA_BUFF_STS_HREADY         0
+#define DEV_DMA_BUFF_STS_DMABUSY        1
+#define DEV_DMA_BUFF_STS_DMADONE        2
+#define DEV_DMA_BUFF_STS_HBUSY          3
+#define DEV_DMA_STS_MASK                (0x3 << 28)
+#define DEV_DMA_STS_SHIFT               28
+#define DEV_DMA_STS_SUCC                0
+#define DEV_DMA_STS_BUFF_FLUSH          1
+#define DEV_DMA_STS_BUFF_ERR            3
+#define DEV_DMA_L                       BIT(27)
+#define DEV_DMA_SHORT                   BIT(26)
+#define DEV_DMA_IOC                     BIT(25)
+#define DEV_DMA_SR                      BIT(24)
+#define DEV_DMA_MTRF                    BIT(23)
+#define DEV_DMA_ISOC_PID_MASK           (0x3 << 23)
+#define DEV_DMA_ISOC_PID_SHIFT          23
+#define DEV_DMA_ISOC_PID_DATA0          0
+#define DEV_DMA_ISOC_PID_DATA2          1
+#define DEV_DMA_ISOC_PID_DATA1          2
+#define DEV_DMA_ISOC_PID_MDATA          3
+#define DEV_DMA_ISOC_FRNUM_MASK         (0x7ff << 12)
+#define DEV_DMA_ISOC_FRNUM_SHIFT        12
+#define DEV_DMA_ISOC_TX_NBYTES_MASK     (0xfff << 0)
+#define DEV_DMA_ISOC_TX_NBYTES_LIMIT    0xfff
+#define DEV_DMA_ISOC_RX_NBYTES_MASK     (0x7ff << 0)
+#define DEV_DMA_ISOC_RX_NBYTES_LIMIT    0x7ff
+#define DEV_DMA_ISOC_NBYTES_SHIFT       0
+#define DEV_DMA_NBYTES_MASK             (0xffff << 0)
+#define DEV_DMA_NBYTES_SHIFT            0
+#define DEV_DMA_NBYTES_LIMIT            0xffff
+
+#define MAX_DMA_DESC_NUM_GENERIC        64
+#define MAX_DMA_DESC_NUM_HS_ISOC        256
+
+#endif /* DWC2_REGS_H */
diff --git a/include/hw/usb/ehci-regs.h b/include/hw/usb/ehci-regs.h
new file mode 100644 (file)
index 0000000..3e91b8e
--- /dev/null
@@ -0,0 +1,82 @@
+#ifndef HW_USB_EHCI_REGS_H
+#define HW_USB_EHCI_REGS_H
+
+/* Capability Registers Base Address - section 2.2 */
+#define CAPLENGTH        0x0000  /* 1-byte, 0x0001 reserved */
+#define HCIVERSION       0x0002  /* 2-bytes, i/f version # */
+#define HCSPARAMS        0x0004  /* 4-bytes, structural params */
+#define HCCPARAMS        0x0008  /* 4-bytes, capability params */
+#define EECP             HCCPARAMS + 1
+#define HCSPPORTROUTE1   0x000c
+#define HCSPPORTROUTE2   0x0010
+
+#define USBCMD           0x0000
+#define USBCMD_RUNSTOP   (1 << 0)      // run / Stop
+#define USBCMD_HCRESET   (1 << 1)      // HC Reset
+#define USBCMD_FLS       (3 << 2)      // Frame List Size
+#define USBCMD_FLS_SH    2             // Frame List Size Shift
+#define USBCMD_PSE       (1 << 4)      // Periodic Schedule Enable
+#define USBCMD_ASE       (1 << 5)      // Asynch Schedule Enable
+#define USBCMD_IAAD      (1 << 6)      // Int Asynch Advance Doorbell
+#define USBCMD_LHCR      (1 << 7)      // Light Host Controller Reset
+#define USBCMD_ASPMC     (3 << 8)      // Async Sched Park Mode Count
+#define USBCMD_ASPME     (1 << 11)     // Async Sched Park Mode Enable
+#define USBCMD_ITC       (0x7f << 16)  // Int Threshold Control
+#define USBCMD_ITC_SH    16            // Int Threshold Control Shift
+
+#define USBSTS           0x0004
+#define USBSTS_RO_MASK   0x0000003f
+#define USBSTS_INT       (1 << 0)      // USB Interrupt
+#define USBSTS_ERRINT    (1 << 1)      // Error Interrupt
+#define USBSTS_PCD       (1 << 2)      // Port Change Detect
+#define USBSTS_FLR       (1 << 3)      // Frame List Rollover
+#define USBSTS_HSE       (1 << 4)      // Host System Error
+#define USBSTS_IAA       (1 << 5)      // Interrupt on Async Advance
+#define USBSTS_HALT      (1 << 12)     // HC Halted
+#define USBSTS_REC       (1 << 13)     // Reclamation
+#define USBSTS_PSS       (1 << 14)     // Periodic Schedule Status
+#define USBSTS_ASS       (1 << 15)     // Asynchronous Schedule Status
+
+/*
+ *  Interrupt enable bits correspond to the interrupt active bits in USBSTS
+ *  so no need to redefine here.
+ */
+#define USBINTR              0x0008
+#define USBINTR_MASK         0x0000003f
+
+#define FRINDEX              0x000c
+#define CTRLDSSEGMENT        0x0010
+#define PERIODICLISTBASE     0x0014
+#define ASYNCLISTADDR        0x0018
+#define ASYNCLISTADDR_MASK   0xffffffe0
+
+#define CONFIGFLAG           0x0040
+
+/*
+ * Bits that are reserved or are read-only are masked out of values
+ * written to us by software
+ */
+#define PORTSC_RO_MASK       0x007001c0
+#define PORTSC_RWC_MASK      0x0000002a
+#define PORTSC_WKOC_E        (1 << 22)    // Wake on Over Current Enable
+#define PORTSC_WKDS_E        (1 << 21)    // Wake on Disconnect Enable
+#define PORTSC_WKCN_E        (1 << 20)    // Wake on Connect Enable
+#define PORTSC_PTC           (15 << 16)   // Port Test Control
+#define PORTSC_PTC_SH        16           // Port Test Control shift
+#define PORTSC_PIC           (3 << 14)    // Port Indicator Control
+#define PORTSC_PIC_SH        14           // Port Indicator Control Shift
+#define PORTSC_POWNER        (1 << 13)    // Port Owner
+#define PORTSC_PPOWER        (1 << 12)    // Port Power
+#define PORTSC_LINESTAT      (3 << 10)    // Port Line Status
+#define PORTSC_LINESTAT_SH   10           // Port Line Status Shift
+#define PORTSC_PRESET        (1 << 8)     // Port Reset
+#define PORTSC_SUSPEND       (1 << 7)     // Port Suspend
+#define PORTSC_FPRES         (1 << 6)     // Force Port Resume
+#define PORTSC_OCC           (1 << 5)     // Over Current Change
+#define PORTSC_OCA           (1 << 4)     // Over Current Active
+#define PORTSC_PEDC          (1 << 3)     // Port Enable/Disable Change
+#define PORTSC_PED           (1 << 2)     // Port Enable/Disable
+#define PORTSC_CSC           (1 << 1)     // Connect Status Change
+#define PORTSC_CONNECT       (1 << 0)     // Current Connect Status
+
+#endif /* HW_USB_EHCI_REGS_H */
diff --git a/include/hw/usb/hcd-dwc3.h b/include/hw/usb/hcd-dwc3.h
new file mode 100644 (file)
index 0000000..f752a27
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * QEMU model of the USB DWC3 host controller emulation.
+ *
+ * Copyright (c) 2020 Xilinx Inc.
+ *
+ * Written by Vikram Garhwal<fnu.vikram@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef HCD_DWC3_H
+#define HCD_DWC3_H
+
+#include "hw/register.h"
+#include "hw/usb/hcd-xhci.h"
+#include "hw/usb/hcd-xhci-sysbus.h"
+
+#define TYPE_USB_DWC3 "usb_dwc3"
+
+#define USB_DWC3(obj) \
+     OBJECT_CHECK(USBDWC3, (obj), TYPE_USB_DWC3)
+
+#define USB_DWC3_R_MAX ((0x530 / 4) + 1)
+#define DWC3_SIZE 0x10000
+
+typedef struct USBDWC3 {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    XHCISysbusState sysbus_xhci;
+
+    uint32_t regs[USB_DWC3_R_MAX];
+    RegisterInfo regs_info[USB_DWC3_R_MAX];
+
+    struct {
+        uint8_t     mode;
+        uint32_t    dwc_usb3_user;
+    } cfg;
+
+} USBDWC3;
+
+#endif
diff --git a/include/hw/usb/hcd-musb.h b/include/hw/usb/hcd-musb.h
new file mode 100644 (file)
index 0000000..4d4b1ec
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * "Inventra" High-speed Dual-Role Controller (MUSB-HDRC), Mentor Graphics,
+ * USB2.0 OTG compliant core used in various chips.
+ *
+ * Only host-mode and non-DMA accesses are currently supported.
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Written by Andrzej Zaborowski <balrog@zabor.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_USB_HCD_MUSB_H
+#define HW_USB_HCD_MUSB_H
+
+#include "exec/hwaddr.h"
+
+enum musb_irq_source_e {
+    musb_irq_suspend = 0,
+    musb_irq_resume,
+    musb_irq_rst_babble,
+    musb_irq_sof,
+    musb_irq_connect,
+    musb_irq_disconnect,
+    musb_irq_vbus_request,
+    musb_irq_vbus_error,
+    musb_irq_rx,
+    musb_irq_tx,
+    musb_set_vbus,
+    musb_set_session,
+    /* Add new interrupts here */
+    musb_irq_max /* total number of interrupts defined */
+};
+
+/* TODO convert hcd-musb to QOM/qdev and remove MUSBReadFunc/MUSBWriteFunc */
+typedef void MUSBWriteFunc(void *opaque, hwaddr addr, uint32_t value);
+typedef uint32_t MUSBReadFunc(void *opaque, hwaddr addr);
+extern MUSBReadFunc * const musb_read[];
+extern MUSBWriteFunc * const musb_write[];
+
+typedef struct MUSBState MUSBState;
+
+MUSBState *musb_init(DeviceState *parent_device, int gpio_base);
+void musb_reset(MUSBState *s);
+uint32_t musb_core_intr_get(MUSBState *s);
+void musb_core_intr_clear(MUSBState *s, uint32_t mask);
+void musb_set_size(MUSBState *s, int epnum, int size, int is_tx);
+
+#endif
diff --git a/include/hw/usb/hid.h b/include/hw/usb/hid.h
new file mode 100644 (file)
index 0000000..1c14258
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef HW_USB_HID_H
+#define HW_USB_HID_H
+
+/* HID interface requests */
+#define HID_GET_REPORT   0xa101
+#define HID_GET_IDLE     0xa102
+#define HID_GET_PROTOCOL 0xa103
+#define HID_SET_REPORT   0x2109
+#define HID_SET_IDLE     0x210a
+#define HID_SET_PROTOCOL 0x210b
+
+/* HID descriptor types */
+#define USB_DT_HID    0x21
+#define USB_DT_REPORT 0x22
+#define USB_DT_PHY    0x23
+
+#endif
diff --git a/include/hw/usb/imx-usb-phy.h b/include/hw/usb/imx-usb-phy.h
new file mode 100644 (file)
index 0000000..d1e867b
--- /dev/null
@@ -0,0 +1,54 @@
+#ifndef IMX_USB_PHY_H
+#define IMX_USB_PHY_H
+
+#include "hw/sysbus.h"
+#include "qemu/bitops.h"
+#include "qom/object.h"
+
+enum IMXUsbPhyRegisters {
+    USBPHY_PWD,
+    USBPHY_PWD_SET,
+    USBPHY_PWD_CLR,
+    USBPHY_PWD_TOG,
+    USBPHY_TX,
+    USBPHY_TX_SET,
+    USBPHY_TX_CLR,
+    USBPHY_TX_TOG,
+    USBPHY_RX,
+    USBPHY_RX_SET,
+    USBPHY_RX_CLR,
+    USBPHY_RX_TOG,
+    USBPHY_CTRL,
+    USBPHY_CTRL_SET,
+    USBPHY_CTRL_CLR,
+    USBPHY_CTRL_TOG,
+    USBPHY_STATUS,
+    USBPHY_DEBUG = 0x14,
+    USBPHY_DEBUG_SET,
+    USBPHY_DEBUG_CLR,
+    USBPHY_DEBUG_TOG,
+    USBPHY_DEBUG0_STATUS,
+    USBPHY_DEBUG1 = 0x1c,
+    USBPHY_DEBUG1_SET,
+    USBPHY_DEBUG1_CLR,
+    USBPHY_DEBUG1_TOG,
+    USBPHY_VERSION,
+    USBPHY_MAX
+};
+
+#define USBPHY_CTRL_SFTRST BIT(31)
+
+#define TYPE_IMX_USBPHY "imx.usbphy"
+OBJECT_DECLARE_SIMPLE_TYPE(IMXUSBPHYState, IMX_USBPHY)
+
+struct IMXUSBPHYState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+
+    uint32_t usbphy[USBPHY_MAX];
+};
+
+#endif /* IMX_USB_PHY_H */
diff --git a/include/hw/usb/msd.h b/include/hw/usb/msd.h
new file mode 100644 (file)
index 0000000..f9fd862
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * USB Mass Storage Device emulation
+ *
+ * Copyright (c) 2006 CodeSourcery.
+ * Written by Paul Brook
+ *
+ * This code is licensed under the LGPL.
+ */
+
+#include "hw/usb.h"
+#include "hw/scsi/scsi.h"
+
+enum USBMSDMode {
+    USB_MSDM_CBW, /* Command Block.  */
+    USB_MSDM_DATAOUT, /* Transfer data to device.  */
+    USB_MSDM_DATAIN, /* Transfer data from device.  */
+    USB_MSDM_CSW /* Command Status.  */
+};
+
+struct QEMU_PACKED usb_msd_csw {
+    uint32_t sig;
+    uint32_t tag;
+    uint32_t residue;
+    uint8_t status;
+};
+
+struct MSDState {
+    USBDevice dev;
+    enum USBMSDMode mode;
+    uint32_t scsi_off;
+    uint32_t scsi_len;
+    uint32_t data_len;
+    struct usb_msd_csw csw;
+    SCSIRequest *req;
+    SCSIBus bus;
+    /* For async completion.  */
+    USBPacket *packet;
+    /* usb-storage only */
+    BlockConf conf;
+    bool removable;
+    bool commandlog;
+    SCSIDevice *scsi_dev;
+    bool needs_reset;
+};
+
+typedef struct MSDState MSDState;
+#define TYPE_USB_STORAGE "usb-storage-dev"
+DECLARE_INSTANCE_CHECKER(MSDState, USB_STORAGE_DEV,
+                         TYPE_USB_STORAGE)
+
+void usb_msd_transfer_data(SCSIRequest *req, uint32_t len);
+void usb_msd_command_complete(SCSIRequest *req, size_t resid);
+void usb_msd_request_cancelled(SCSIRequest *req);
+void *usb_msd_load_request(QEMUFile *f, SCSIRequest *req);
+void usb_msd_handle_reset(USBDevice *dev);
diff --git a/include/hw/usb/uhci-regs.h b/include/hw/usb/uhci-regs.h
new file mode 100644 (file)
index 0000000..fd45d29
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef HW_USB_UHCI_REGS_H
+#define HW_USB_UHCI_REGS_H
+
+#define UHCI_CMD_FGR      (1 << 4)
+#define UHCI_CMD_EGSM     (1 << 3)
+#define UHCI_CMD_GRESET   (1 << 2)
+#define UHCI_CMD_HCRESET  (1 << 1)
+#define UHCI_CMD_RS       (1 << 0)
+
+#define UHCI_STS_HCHALTED (1 << 5)
+#define UHCI_STS_HCPERR   (1 << 4)
+#define UHCI_STS_HSERR    (1 << 3)
+#define UHCI_STS_RD       (1 << 2)
+#define UHCI_STS_USBERR   (1 << 1)
+#define UHCI_STS_USBINT   (1 << 0)
+
+#define TD_CTRL_SPD     (1 << 29)
+#define TD_CTRL_ERROR_SHIFT  27
+#define TD_CTRL_IOS     (1 << 25)
+#define TD_CTRL_IOC     (1 << 24)
+#define TD_CTRL_ACTIVE  (1 << 23)
+#define TD_CTRL_STALL   (1 << 22)
+#define TD_CTRL_BABBLE  (1 << 20)
+#define TD_CTRL_NAK     (1 << 19)
+#define TD_CTRL_TIMEOUT (1 << 18)
+
+#define UHCI_PORT_SUSPEND (1 << 12)
+#define UHCI_PORT_RESET (1 << 9)
+#define UHCI_PORT_LSDA  (1 << 8)
+#define UHCI_PORT_RSVD1 (1 << 7)
+#define UHCI_PORT_RD    (1 << 6)
+#define UHCI_PORT_ENC   (1 << 3)
+#define UHCI_PORT_EN    (1 << 2)
+#define UHCI_PORT_CSC   (1 << 1)
+#define UHCI_PORT_CCS   (1 << 0)
+
+#define UHCI_PORT_READ_ONLY    (0x1bb)
+#define UHCI_PORT_WRITE_CLEAR  (UHCI_PORT_CSC | UHCI_PORT_ENC)
+
+#endif /* HW_USB_UHCI_REGS_H */
diff --git a/include/hw/usb/xhci.h b/include/hw/usb/xhci.h
new file mode 100644 (file)
index 0000000..5c90e13
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef HW_USB_XHCI_H
+#define HW_USB_XHCI_H
+
+#define TYPE_XHCI "base-xhci"
+#define TYPE_NEC_XHCI "nec-usb-xhci"
+#define TYPE_QEMU_XHCI "qemu-xhci"
+#define TYPE_XHCI_SYSBUS "sysbus-xhci"
+
+#define XHCI_MAXPORTS_2 15
+#define XHCI_MAXPORTS_3 15
+
+#define XHCI_MAXPORTS (XHCI_MAXPORTS_2 + XHCI_MAXPORTS_3)
+#define XHCI_MAXSLOTS 64
+#define XHCI_MAXINTRS 16
+
+/* must be power of 2 */
+#define XHCI_LEN_REGS 0x4000
+
+void xhci_sysbus_build_aml(Aml *scope, uint32_t mmio, unsigned int irq);
+
+#endif
diff --git a/include/hw/usb/xlnx-usb-subsystem.h b/include/hw/usb/xlnx-usb-subsystem.h
new file mode 100644 (file)
index 0000000..40f9e97
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * QEMU model of the Xilinx usb subsystem
+ *
+ * Copyright (c) 2020 Xilinx Inc. Sai Pavan Boddu <sai.pavan.boddu@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XLNX_USB_SUBSYSTEM_H
+#define XLNX_USB_SUBSYSTEM_H
+
+#include "hw/register.h"
+#include "hw/sysbus.h"
+#include "hw/usb/xlnx-versal-usb2-ctrl-regs.h"
+#include "hw/usb/hcd-dwc3.h"
+
+#define TYPE_XILINX_VERSAL_USB2 "xlnx.versal-usb2"
+
+#define VERSAL_USB2(obj) \
+     OBJECT_CHECK(VersalUsb2, (obj), TYPE_XILINX_VERSAL_USB2)
+
+typedef struct VersalUsb2 {
+    SysBusDevice parent_obj;
+    MemoryRegion dwc3_mr;
+    MemoryRegion usb2Ctrl_mr;
+
+    VersalUsb2CtrlRegs usb2Ctrl;
+    USBDWC3 dwc3;
+} VersalUsb2;
+
+#endif
diff --git a/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h b/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h
new file mode 100644 (file)
index 0000000..6a50200
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * QEMU model of the VersalUsb2CtrlRegs Register control/Status block for
+ * USB2.0 controller
+ *
+ * Copyright (c) 2020 Xilinx Inc. Vikram Garhwal <fnu.vikram@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XLNX_VERSAL_USB2_CTRL_REGS_H
+#define XLNX_VERSAL_USB2_CTRL_REGS_H
+
+#include "hw/register.h"
+#include "hw/sysbus.h"
+
+#define TYPE_XILINX_VERSAL_USB2_CTRL_REGS "xlnx.versal-usb2-ctrl-regs"
+
+#define XILINX_VERSAL_USB2_CTRL_REGS(obj) \
+     OBJECT_CHECK(VersalUsb2CtrlRegs, (obj), TYPE_XILINX_VERSAL_USB2_CTRL_REGS)
+
+#define USB2_REGS_R_MAX ((0x78 / 4) + 1)
+
+typedef struct VersalUsb2CtrlRegs {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    qemu_irq irq_ir;
+
+    uint32_t regs[USB2_REGS_R_MAX];
+    RegisterInfo regs_info[USB2_REGS_R_MAX];
+} VersalUsb2CtrlRegs;
+
+#endif
diff --git a/include/hw/vfio/vfio-amd-xgbe.h b/include/hw/vfio/vfio-amd-xgbe.h
new file mode 100644 (file)
index 0000000..a894546
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * VFIO AMD XGBE device
+ *
+ * Copyright Linaro Limited, 2015
+ *
+ * Authors:
+ *  Eric Auger <eric.auger@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_VFIO_VFIO_AMD_XGBE_H
+#define HW_VFIO_VFIO_AMD_XGBE_H
+
+#include "hw/vfio/vfio-platform.h"
+#include "qom/object.h"
+
+#define TYPE_VFIO_AMD_XGBE "vfio-amd-xgbe"
+
+/**
+ * This device exposes:
+ * - 5 MMIO regions: MAC, PCS, SerDes Rx/Tx regs,
+     SerDes Integration Registers 1/2 & 2/2
+ * - 2 level sensitive IRQs and optional DMA channel IRQs
+ */
+struct VFIOAmdXgbeDevice {
+    VFIOPlatformDevice vdev;
+};
+
+typedef struct VFIOAmdXgbeDevice VFIOAmdXgbeDevice;
+
+struct VFIOAmdXgbeDeviceClass {
+    /*< private >*/
+    VFIOPlatformDeviceClass parent_class;
+    /*< public >*/
+    DeviceRealize parent_realize;
+};
+
+typedef struct VFIOAmdXgbeDeviceClass VFIOAmdXgbeDeviceClass;
+
+DECLARE_OBJ_CHECKERS(VFIOAmdXgbeDevice, VFIOAmdXgbeDeviceClass,
+                     VFIO_AMD_XGBE_DEVICE, TYPE_VFIO_AMD_XGBE)
+
+#endif
diff --git a/include/hw/vfio/vfio-calxeda-xgmac.h b/include/hw/vfio/vfio-calxeda-xgmac.h
new file mode 100644 (file)
index 0000000..8482f15
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * VFIO calxeda xgmac device
+ *
+ * Copyright Linaro Limited, 2014
+ *
+ * Authors:
+ *  Eric Auger <eric.auger@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_VFIO_VFIO_CALXEDA_XGMAC_H
+#define HW_VFIO_VFIO_CALXEDA_XGMAC_H
+
+#include "hw/vfio/vfio-platform.h"
+#include "qom/object.h"
+
+#define TYPE_VFIO_CALXEDA_XGMAC "vfio-calxeda-xgmac"
+
+/**
+ * This device exposes:
+ * - a single MMIO region corresponding to its register space
+ * - 3 IRQS (main and 2 power related IRQs)
+ */
+struct VFIOCalxedaXgmacDevice {
+    VFIOPlatformDevice vdev;
+};
+typedef struct VFIOCalxedaXgmacDevice VFIOCalxedaXgmacDevice;
+
+struct VFIOCalxedaXgmacDeviceClass {
+    /*< private >*/
+    VFIOPlatformDeviceClass parent_class;
+    /*< public >*/
+    DeviceRealize parent_realize;
+};
+typedef struct VFIOCalxedaXgmacDeviceClass VFIOCalxedaXgmacDeviceClass;
+
+DECLARE_OBJ_CHECKERS(VFIOCalxedaXgmacDevice, VFIOCalxedaXgmacDeviceClass,
+                     VFIO_CALXEDA_XGMAC_DEVICE, TYPE_VFIO_CALXEDA_XGMAC)
+
+#endif
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
new file mode 100644 (file)
index 0000000..a4a22ac
--- /dev/null
@@ -0,0 +1,302 @@
+/*
+ * common header for vfio based device assignment support
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on qemu-kvm device-assignment:
+ *  Adapted for KVM by Qumranet.
+ *  Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
+ *  Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
+ *  Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
+ *  Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
+ *  Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
+ */
+
+#ifndef HW_VFIO_VFIO_COMMON_H
+#define HW_VFIO_VFIO_COMMON_H
+
+#include "exec/memory.h"
+#include "qemu/queue.h"
+#include "qemu/notify.h"
+#include "ui/console.h"
+#include "hw/display/ramfb.h"
+#ifdef CONFIG_LINUX
+#include <linux/vfio.h>
+#endif
+#include "sysemu/sysemu.h"
+
+#define VFIO_MSG_PREFIX "vfio %s: "
+
+enum {
+    VFIO_DEVICE_TYPE_PCI = 0,
+    VFIO_DEVICE_TYPE_PLATFORM = 1,
+    VFIO_DEVICE_TYPE_CCW = 2,
+    VFIO_DEVICE_TYPE_AP = 3,
+};
+
+typedef struct VFIOMmap {
+    MemoryRegion mem;
+    void *mmap;
+    off_t offset;
+    size_t size;
+} VFIOMmap;
+
+typedef struct VFIORegion {
+    struct VFIODevice *vbasedev;
+    off_t fd_offset; /* offset of region within device fd */
+    MemoryRegion *mem; /* slow, read/write access */
+    size_t size;
+    uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
+    uint32_t nr_mmaps;
+    VFIOMmap *mmaps;
+    uint8_t nr; /* cache the region number for debug */
+} VFIORegion;
+
+typedef struct VFIOMigration {
+    struct VFIODevice *vbasedev;
+    VMChangeStateEntry *vm_state;
+    Notifier migration_state;
+    uint32_t device_state;
+    int data_fd;
+    void *data_buffer;
+    size_t data_buffer_size;
+    uint64_t mig_flags;
+    uint64_t precopy_init_size;
+    uint64_t precopy_dirty_size;
+    bool initial_data_sent;
+} VFIOMigration;
+
+typedef struct VFIOAddressSpace {
+    AddressSpace *as;
+    QLIST_HEAD(, VFIOContainer) containers;
+    QLIST_ENTRY(VFIOAddressSpace) list;
+} VFIOAddressSpace;
+
+struct VFIOGroup;
+
+typedef struct VFIOContainer {
+    VFIOAddressSpace *space;
+    int fd; /* /dev/vfio/vfio, empowered by the attached groups */
+    MemoryListener listener;
+    MemoryListener prereg_listener;
+    unsigned iommu_type;
+    Error *error;
+    bool initialized;
+    bool dirty_pages_supported;
+    uint64_t dirty_pgsizes;
+    uint64_t max_dirty_bitmap_size;
+    unsigned long pgsizes;
+    unsigned int dma_max_mappings;
+    QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
+    QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
+    QLIST_HEAD(, VFIOGroup) group_list;
+    QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
+    QLIST_ENTRY(VFIOContainer) next;
+    QLIST_HEAD(, VFIODevice) device_list;
+    GList *iova_ranges;
+} VFIOContainer;
+
+typedef struct VFIOGuestIOMMU {
+    VFIOContainer *container;
+    IOMMUMemoryRegion *iommu_mr;
+    hwaddr iommu_offset;
+    IOMMUNotifier n;
+    QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
+} VFIOGuestIOMMU;
+
+typedef struct VFIORamDiscardListener {
+    VFIOContainer *container;
+    MemoryRegion *mr;
+    hwaddr offset_within_address_space;
+    hwaddr size;
+    uint64_t granularity;
+    RamDiscardListener listener;
+    QLIST_ENTRY(VFIORamDiscardListener) next;
+} VFIORamDiscardListener;
+
+typedef struct VFIOHostDMAWindow {
+    hwaddr min_iova;
+    hwaddr max_iova;
+    uint64_t iova_pgsizes;
+    QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
+} VFIOHostDMAWindow;
+
+typedef struct VFIODeviceOps VFIODeviceOps;
+
+typedef struct VFIODevice {
+    QLIST_ENTRY(VFIODevice) next;
+    QLIST_ENTRY(VFIODevice) container_next;
+    QLIST_ENTRY(VFIODevice) global_next;
+    struct VFIOGroup *group;
+    VFIOContainer *container;
+    char *sysfsdev;
+    char *name;
+    DeviceState *dev;
+    int fd;
+    int type;
+    bool reset_works;
+    bool needs_reset;
+    bool no_mmap;
+    bool ram_block_discard_allowed;
+    OnOffAuto enable_migration;
+    VFIODeviceOps *ops;
+    unsigned int num_irqs;
+    unsigned int num_regions;
+    unsigned int flags;
+    VFIOMigration *migration;
+    Error *migration_blocker;
+    OnOffAuto pre_copy_dirty_page_tracking;
+    bool dirty_pages_supported;
+    bool dirty_tracking;
+} VFIODevice;
+
+struct VFIODeviceOps {
+    void (*vfio_compute_needs_reset)(VFIODevice *vdev);
+    int (*vfio_hot_reset_multi)(VFIODevice *vdev);
+    void (*vfio_eoi)(VFIODevice *vdev);
+    Object *(*vfio_get_object)(VFIODevice *vdev);
+    void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
+    int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
+};
+
+typedef struct VFIOGroup {
+    int fd;
+    int groupid;
+    VFIOContainer *container;
+    QLIST_HEAD(, VFIODevice) device_list;
+    QLIST_ENTRY(VFIOGroup) next;
+    QLIST_ENTRY(VFIOGroup) container_next;
+    bool ram_block_discard_allowed;
+} VFIOGroup;
+
+typedef struct VFIODMABuf {
+    QemuDmaBuf buf;
+    uint32_t pos_x, pos_y, pos_updates;
+    uint32_t hot_x, hot_y, hot_updates;
+    int dmabuf_id;
+    QTAILQ_ENTRY(VFIODMABuf) next;
+} VFIODMABuf;
+
+typedef struct VFIODisplay {
+    QemuConsole *con;
+    RAMFBState *ramfb;
+    struct vfio_region_info *edid_info;
+    struct vfio_region_gfx_edid *edid_regs;
+    uint8_t *edid_blob;
+    QEMUTimer *edid_link_timer;
+    struct {
+        VFIORegion buffer;
+        DisplaySurface *surface;
+    } region;
+    struct {
+        QTAILQ_HEAD(, VFIODMABuf) bufs;
+        VFIODMABuf *primary;
+        VFIODMABuf *cursor;
+    } dmabuf;
+} VFIODisplay;
+
+typedef struct {
+    unsigned long *bitmap;
+    hwaddr size;
+    hwaddr pages;
+} VFIOBitmap;
+
+VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
+void vfio_put_address_space(VFIOAddressSpace *space);
+bool vfio_devices_all_running_and_saving(VFIOContainer *container);
+
+/* container->fd */
+int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
+                   ram_addr_t size, IOMMUTLBEntry *iotlb);
+int vfio_dma_map(VFIOContainer *container, hwaddr iova,
+                 ram_addr_t size, void *vaddr, bool readonly);
+int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start);
+int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
+                            hwaddr iova, hwaddr size);
+
+/* SPAPR specific */
+int vfio_container_add_section_window(VFIOContainer *container,
+                                      MemoryRegionSection *section,
+                                      Error **errp);
+void vfio_container_del_section_window(VFIOContainer *container,
+                                       MemoryRegionSection *section);
+int vfio_spapr_container_init(VFIOContainer *container, Error **errp);
+void vfio_spapr_container_deinit(VFIOContainer *container);
+
+void vfio_disable_irqindex(VFIODevice *vbasedev, int index);
+void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index);
+void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index);
+int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
+                           int action, int fd, Error **errp);
+void vfio_region_write(void *opaque, hwaddr addr,
+                           uint64_t data, unsigned size);
+uint64_t vfio_region_read(void *opaque,
+                          hwaddr addr, unsigned size);
+int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
+                      int index, const char *name);
+int vfio_region_mmap(VFIORegion *region);
+void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
+void vfio_region_unmap(VFIORegion *region);
+void vfio_region_exit(VFIORegion *region);
+void vfio_region_finalize(VFIORegion *region);
+void vfio_reset_handler(void *opaque);
+struct vfio_device_info *vfio_get_device_info(int fd);
+int vfio_attach_device(char *name, VFIODevice *vbasedev,
+                       AddressSpace *as, Error **errp);
+void vfio_detach_device(VFIODevice *vbasedev);
+
+int vfio_kvm_device_add_fd(int fd, Error **errp);
+int vfio_kvm_device_del_fd(int fd, Error **errp);
+
+extern const MemoryRegionOps vfio_region_ops;
+typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
+typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList;
+extern VFIOGroupList vfio_group_list;
+extern VFIODeviceList vfio_device_list;
+
+extern const MemoryListener vfio_memory_listener;
+extern int vfio_kvm_device_fd;
+
+bool vfio_mig_active(void);
+int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
+void vfio_unblock_multiple_devices_migration(void);
+bool vfio_viommu_preset(VFIODevice *vbasedev);
+int64_t vfio_mig_bytes_transferred(void);
+void vfio_reset_bytes_transferred(void);
+bool vfio_device_state_is_running(VFIODevice *vbasedev);
+bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
+
+#ifdef CONFIG_LINUX
+int vfio_get_region_info(VFIODevice *vbasedev, int index,
+                         struct vfio_region_info **info);
+int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
+                             uint32_t subtype, struct vfio_region_info **info);
+bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type);
+struct vfio_info_cap_header *
+vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id);
+bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
+                             unsigned int *avail);
+struct vfio_info_cap_header *
+vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
+struct vfio_info_cap_header *
+vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
+#endif
+
+bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
+void vfio_migration_exit(VFIODevice *vbasedev);
+
+int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size);
+bool vfio_devices_all_running_and_mig_active(VFIOContainer *container);
+bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container);
+int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
+                                    VFIOBitmap *vbmap, hwaddr iova,
+                                    hwaddr size);
+int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
+                                 uint64_t size, ram_addr_t ram_addr);
+#endif /* HW_VFIO_VFIO_COMMON_H */
diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h
new file mode 100644 (file)
index 0000000..c414c3d
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * vfio based device assignment support - platform devices
+ *
+ * Copyright Linaro Limited, 2014
+ *
+ * Authors:
+ *  Kim Phillips <kim.phillips@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on vfio based PCI device assignment support:
+ *  Copyright Red Hat, Inc. 2012
+ */
+
+#ifndef HW_VFIO_VFIO_PLATFORM_H
+#define HW_VFIO_VFIO_PLATFORM_H
+
+#include "hw/sysbus.h"
+#include "hw/vfio/vfio-common.h"
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "qom/object.h"
+
+#define TYPE_VFIO_PLATFORM "vfio-platform"
+
+enum {
+    VFIO_IRQ_INACTIVE = 0,
+    VFIO_IRQ_PENDING = 1,
+    VFIO_IRQ_ACTIVE = 2,
+    /* VFIO_IRQ_ACTIVE_AND_PENDING cannot happen with VFIO */
+};
+
+typedef struct VFIOINTp {
+    QLIST_ENTRY(VFIOINTp) next; /* entry for IRQ list */
+    QSIMPLEQ_ENTRY(VFIOINTp) pqnext; /* entry for pending IRQ queue */
+    EventNotifier *interrupt; /* eventfd triggered on interrupt */
+    EventNotifier *unmask; /* eventfd for unmask on QEMU bypass */
+    qemu_irq qemuirq;
+    struct VFIOPlatformDevice *vdev; /* back pointer to device */
+    int state; /* inactive, pending, active */
+    uint8_t pin; /* index */
+    uint32_t flags; /* IRQ info flags */
+    bool kvm_accel; /* set when QEMU bypass through KVM enabled */
+} VFIOINTp;
+
+/* function type for user side eventfd handler */
+typedef void (*eventfd_user_side_handler_t)(VFIOINTp *intp);
+
+struct VFIOPlatformDevice {
+    SysBusDevice sbdev;
+    VFIODevice vbasedev; /* not a QOM object */
+    VFIORegion **regions;
+    QLIST_HEAD(, VFIOINTp) intp_list; /* list of IRQs */
+    /* queue of pending IRQs */
+    QSIMPLEQ_HEAD(, VFIOINTp) pending_intp_queue;
+    char *compat; /* DT compatible values, separated by NUL */
+    unsigned int num_compat; /* number of compatible values */
+    uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
+    QEMUTimer *mmap_timer; /* allows fast-path resume after IRQ hit */
+    QemuMutex intp_mutex; /* protect the intp_list IRQ state */
+    bool irqfd_allowed; /* debug option to force irqfd on/off */
+};
+typedef struct VFIOPlatformDevice VFIOPlatformDevice;
+
+struct VFIOPlatformDeviceClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+    /*< public >*/
+};
+typedef struct VFIOPlatformDeviceClass VFIOPlatformDeviceClass;
+
+DECLARE_OBJ_CHECKERS(VFIOPlatformDevice, VFIOPlatformDeviceClass,
+                     VFIO_PLATFORM_DEVICE, TYPE_VFIO_PLATFORM)
+
+#endif /* HW_VFIO_VFIO_PLATFORM_H */
diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h
new file mode 100644 (file)
index 0000000..4dbf981
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Vhost Vdpa Device
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved.
+ *
+ * Authors:
+ *   Longpeng <longpeng2@huawei.com>
+ *
+ * Largely based on the "vhost-user-blk.h" implemented by:
+ *   Changpeng Liu <changpeng.liu@intel.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+#ifndef _VHOST_VDPA_DEVICE_H
+#define _VHOST_VDPA_DEVICE_H
+
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-vdpa.h"
+#include "qom/object.h"
+
+
+#define TYPE_VHOST_VDPA_DEVICE "vhost-vdpa-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VhostVdpaDevice, VHOST_VDPA_DEVICE)
+
+struct VhostVdpaDevice {
+    VirtIODevice parent_obj;
+    char *vhostdev;
+    int vhostfd;
+    int32_t bootindex;
+    uint32_t vdev_id;
+    uint32_t num_queues;
+    struct vhost_dev dev;
+    struct vhost_vdpa vdpa;
+    VirtQueue **virtqs;
+    uint8_t *config;
+    int config_size;
+    uint16_t queue_size;
+    bool started;
+    int (*post_init)(VhostVdpaDevice *v, Error **errp);
+};
+
+#endif
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
new file mode 100644 (file)
index 0000000..a86d103
--- /dev/null
@@ -0,0 +1,221 @@
+/*
+ * vhost-backend
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_BACKEND_H
+#define VHOST_BACKEND_H
+
+#include "exec/memory.h"
+
+typedef enum VhostBackendType {
+    VHOST_BACKEND_TYPE_NONE = 0,
+    VHOST_BACKEND_TYPE_KERNEL = 1,
+    VHOST_BACKEND_TYPE_USER = 2,
+    VHOST_BACKEND_TYPE_VDPA = 3,
+    VHOST_BACKEND_TYPE_MAX = 4,
+} VhostBackendType;
+
+typedef enum VhostSetConfigType {
+    VHOST_SET_CONFIG_TYPE_FRONTEND = 0,
+    VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
+} VhostSetConfigType;
+
+typedef enum VhostDeviceStateDirection {
+    /* Transfer state from back-end (device) to front-end */
+    VHOST_TRANSFER_STATE_DIRECTION_SAVE = 0,
+    /* Transfer state from front-end to back-end (device) */
+    VHOST_TRANSFER_STATE_DIRECTION_LOAD = 1,
+} VhostDeviceStateDirection;
+
+typedef enum VhostDeviceStatePhase {
+    /* The device (and all its vrings) is stopped */
+    VHOST_TRANSFER_STATE_PHASE_STOPPED = 0,
+} VhostDeviceStatePhase;
+
+struct vhost_inflight;
+struct vhost_dev;
+struct vhost_log;
+struct vhost_memory;
+struct vhost_vring_file;
+struct vhost_vring_state;
+struct vhost_vring_addr;
+struct vhost_scsi_target;
+struct vhost_iotlb_msg;
+struct vhost_virtqueue;
+
+typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque,
+                                  Error **errp);
+typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev);
+typedef int (*vhost_backend_memslots_limit)(struct vhost_dev *dev);
+
+typedef int (*vhost_net_set_backend_op)(struct vhost_dev *dev,
+                                struct vhost_vring_file *file);
+typedef int (*vhost_net_set_mtu_op)(struct vhost_dev *dev, uint16_t mtu);
+typedef int (*vhost_scsi_set_endpoint_op)(struct vhost_dev *dev,
+                                  struct vhost_scsi_target *target);
+typedef int (*vhost_scsi_clear_endpoint_op)(struct vhost_dev *dev,
+                                    struct vhost_scsi_target *target);
+typedef int (*vhost_scsi_get_abi_version_op)(struct vhost_dev *dev,
+                                             int *version);
+typedef int (*vhost_set_log_base_op)(struct vhost_dev *dev, uint64_t base,
+                                     struct vhost_log *log);
+typedef int (*vhost_set_mem_table_op)(struct vhost_dev *dev,
+                                      struct vhost_memory *mem);
+typedef int (*vhost_set_vring_addr_op)(struct vhost_dev *dev,
+                                       struct vhost_vring_addr *addr);
+typedef int (*vhost_set_vring_endian_op)(struct vhost_dev *dev,
+                                         struct vhost_vring_state *ring);
+typedef int (*vhost_set_vring_num_op)(struct vhost_dev *dev,
+                                      struct vhost_vring_state *ring);
+typedef int (*vhost_set_vring_base_op)(struct vhost_dev *dev,
+                                       struct vhost_vring_state *ring);
+typedef int (*vhost_get_vring_base_op)(struct vhost_dev *dev,
+                                       struct vhost_vring_state *ring);
+typedef int (*vhost_set_vring_kick_op)(struct vhost_dev *dev,
+                                       struct vhost_vring_file *file);
+typedef int (*vhost_set_vring_call_op)(struct vhost_dev *dev,
+                                       struct vhost_vring_file *file);
+typedef int (*vhost_set_vring_err_op)(struct vhost_dev *dev,
+                                      struct vhost_vring_file *file);
+typedef int (*vhost_set_vring_busyloop_timeout_op)(struct vhost_dev *dev,
+                                                   struct vhost_vring_state *r);
+typedef int (*vhost_set_features_op)(struct vhost_dev *dev,
+                                     uint64_t features);
+typedef int (*vhost_get_features_op)(struct vhost_dev *dev,
+                                     uint64_t *features);
+typedef int (*vhost_set_backend_cap_op)(struct vhost_dev *dev);
+typedef int (*vhost_set_owner_op)(struct vhost_dev *dev);
+typedef int (*vhost_reset_device_op)(struct vhost_dev *dev);
+typedef int (*vhost_get_vq_index_op)(struct vhost_dev *dev, int idx);
+typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev,
+                                         int enable);
+typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev);
+typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
+                                       char *mac_addr);
+typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev,
+                                            uint64_t guest_cid);
+typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start);
+typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev,
+                                           int enabled);
+typedef int (*vhost_send_device_iotlb_msg_op)(struct vhost_dev *dev,
+                                              struct vhost_iotlb_msg *imsg);
+typedef int (*vhost_set_config_op)(struct vhost_dev *dev, const uint8_t *data,
+                                   uint32_t offset, uint32_t size,
+                                   uint32_t flags);
+typedef int (*vhost_get_config_op)(struct vhost_dev *dev, uint8_t *config,
+                                   uint32_t config_len, Error **errp);
+
+typedef int (*vhost_crypto_create_session_op)(struct vhost_dev *dev,
+                                              void *session_info,
+                                              uint64_t *session_id);
+typedef int (*vhost_crypto_close_session_op)(struct vhost_dev *dev,
+                                             uint64_t session_id);
+
+typedef bool (*vhost_backend_no_private_memslots_op)(struct vhost_dev *dev);
+
+typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
+                                        uint16_t queue_size,
+                                        struct vhost_inflight *inflight);
+
+typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
+                                        struct vhost_inflight *inflight);
+
+typedef int (*vhost_dev_start_op)(struct vhost_dev *dev, bool started);
+
+typedef int (*vhost_vq_get_addr_op)(struct vhost_dev *dev,
+                    struct vhost_vring_addr *addr,
+                    struct vhost_virtqueue *vq);
+
+typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id);
+
+typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
+
+typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
+                                       int fd);
+
+typedef void (*vhost_reset_status_op)(struct vhost_dev *dev);
+
+typedef bool (*vhost_supports_device_state_op)(struct vhost_dev *dev);
+typedef int (*vhost_set_device_state_fd_op)(struct vhost_dev *dev,
+                                            VhostDeviceStateDirection direction,
+                                            VhostDeviceStatePhase phase,
+                                            int fd,
+                                            int *reply_fd,
+                                            Error **errp);
+typedef int (*vhost_check_device_state_op)(struct vhost_dev *dev, Error **errp);
+
+typedef struct VhostOps {
+    VhostBackendType backend_type;
+    vhost_backend_init vhost_backend_init;
+    vhost_backend_cleanup vhost_backend_cleanup;
+    vhost_backend_memslots_limit vhost_backend_memslots_limit;
+    vhost_backend_no_private_memslots_op vhost_backend_no_private_memslots;
+    vhost_net_set_backend_op vhost_net_set_backend;
+    vhost_net_set_mtu_op vhost_net_set_mtu;
+    vhost_scsi_set_endpoint_op vhost_scsi_set_endpoint;
+    vhost_scsi_clear_endpoint_op vhost_scsi_clear_endpoint;
+    vhost_scsi_get_abi_version_op vhost_scsi_get_abi_version;
+    vhost_set_log_base_op vhost_set_log_base;
+    vhost_set_mem_table_op vhost_set_mem_table;
+    vhost_set_vring_addr_op vhost_set_vring_addr;
+    vhost_set_vring_endian_op vhost_set_vring_endian;
+    vhost_set_vring_num_op vhost_set_vring_num;
+    vhost_set_vring_base_op vhost_set_vring_base;
+    vhost_get_vring_base_op vhost_get_vring_base;
+    vhost_set_vring_kick_op vhost_set_vring_kick;
+    vhost_set_vring_call_op vhost_set_vring_call;
+    vhost_set_vring_err_op vhost_set_vring_err;
+    vhost_set_vring_busyloop_timeout_op vhost_set_vring_busyloop_timeout;
+    vhost_set_features_op vhost_set_features;
+    vhost_get_features_op vhost_get_features;
+    vhost_set_backend_cap_op vhost_set_backend_cap;
+    vhost_set_owner_op vhost_set_owner;
+    vhost_reset_device_op vhost_reset_device;
+    vhost_get_vq_index_op vhost_get_vq_index;
+    vhost_set_vring_enable_op vhost_set_vring_enable;
+    vhost_requires_shm_log_op vhost_requires_shm_log;
+    vhost_migration_done_op vhost_migration_done;
+    vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid;
+    vhost_vsock_set_running_op vhost_vsock_set_running;
+    vhost_set_iotlb_callback_op vhost_set_iotlb_callback;
+    vhost_send_device_iotlb_msg_op vhost_send_device_iotlb_msg;
+    vhost_get_config_op vhost_get_config;
+    vhost_set_config_op vhost_set_config;
+    vhost_crypto_create_session_op vhost_crypto_create_session;
+    vhost_crypto_close_session_op vhost_crypto_close_session;
+    vhost_get_inflight_fd_op vhost_get_inflight_fd;
+    vhost_set_inflight_fd_op vhost_set_inflight_fd;
+    vhost_dev_start_op vhost_dev_start;
+    vhost_vq_get_addr_op  vhost_vq_get_addr;
+    vhost_get_device_id_op vhost_get_device_id;
+    vhost_force_iommu_op vhost_force_iommu;
+    vhost_set_config_call_op vhost_set_config_call;
+    vhost_reset_status_op vhost_reset_status;
+    vhost_supports_device_state_op vhost_supports_device_state;
+    vhost_set_device_state_fd_op vhost_set_device_state_fd;
+    vhost_check_device_state_op vhost_check_device_state;
+} VhostOps;
+
+int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
+                                             uint64_t iova, uint64_t uaddr,
+                                             uint64_t len,
+                                             IOMMUAccessFlags perm);
+
+int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev,
+                                                 uint64_t iova, uint64_t len);
+
+int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
+                                          struct vhost_iotlb_msg *imsg);
+
+int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd);
+
+int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid,
+                                        int *dmabuf_fd);
+
+#endif /* VHOST_BACKEND_H */
diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h
new file mode 100644 (file)
index 0000000..c5d2c09
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * vhost_scsi host device
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ *  Felipe Franciosi <felipe@nutanix.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_SCSI_COMMON_H
+#define VHOST_SCSI_COMMON_H
+
+#include "hw/virtio/virtio-scsi.h"
+#include "hw/virtio/vhost.h"
+#include "hw/fw-path-provider.h"
+#include "qom/object.h"
+
+#define TYPE_VHOST_SCSI_COMMON "vhost-scsi-common"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostSCSICommon, VHOST_SCSI_COMMON)
+
+struct VHostSCSICommon {
+    VirtIOSCSICommon parent_obj;
+
+    Error *migration_blocker;
+
+    struct vhost_dev dev;
+    const int *feature_bits;
+    int32_t bootindex;
+    int channel;
+    int target;
+    int lun;
+    uint64_t host_features;
+    bool migratable;
+
+    struct vhost_inflight *inflight;
+};
+
+int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp);
+void vhost_scsi_common_stop(VHostSCSICommon *vsc);
+char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus,
+                                        DeviceState *dev);
+void vhost_scsi_common_set_config(VirtIODevice *vdev, const uint8_t *config);
+uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t features,
+                                        Error **errp);
+
+#endif /* VHOST_SCSI_COMMON_H */
diff --git a/include/hw/virtio/vhost-scsi.h b/include/hw/virtio/vhost-scsi.h
new file mode 100644 (file)
index 0000000..7dc2bdd
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * vhost_scsi host device
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_SCSI_H
+#define VHOST_SCSI_H
+
+#include "hw/virtio/virtio-scsi.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-scsi-common.h"
+#include "qom/object.h"
+
+enum vhost_scsi_vq_list {
+    VHOST_SCSI_VQ_CONTROL = 0,
+    VHOST_SCSI_VQ_EVENT = 1,
+    VHOST_SCSI_VQ_NUM_FIXED = 2,
+};
+
+#define TYPE_VHOST_SCSI "vhost-scsi"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostSCSI, VHOST_SCSI)
+
+struct VHostSCSI {
+    VHostSCSICommon parent_obj;
+};
+
+#endif
diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
new file mode 100644 (file)
index 0000000..ea085ee
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * vhost-user-blk host device
+ * Copyright(C) 2017 Intel Corporation.
+ *
+ * Authors:
+ *  Changpeng Liu <changpeng.liu@intel.com>
+ *
+ * Based on vhost-scsi.h, Copyright IBM, Corp. 2011
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_USER_BLK_H
+#define VHOST_USER_BLK_H
+
+#include "standard-headers/linux/virtio_blk.h"
+#include "hw/block/block.h"
+#include "chardev/char-fe.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+#include "qom/object.h"
+
+#define TYPE_VHOST_USER_BLK "vhost-user-blk"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserBlk, VHOST_USER_BLK)
+
+#define VHOST_USER_BLK_AUTO_NUM_QUEUES UINT16_MAX
+
+struct VHostUserBlk {
+    VirtIODevice parent_obj;
+    CharBackend chardev;
+    int32_t bootindex;
+    struct virtio_blk_config blkcfg;
+    uint16_t num_queues;
+    uint32_t queue_size;
+    struct vhost_dev dev;
+    struct vhost_inflight *inflight;
+    VhostUserState vhost_user;
+    struct vhost_virtqueue *vhost_vqs;
+    VirtQueue **virtqs;
+
+    /*
+     * There are at least two steps of initialization of the
+     * vhost-user device. The first is a "connect" step and
+     * second is a "start" step. Make a separation between
+     * those initialization phases by using two fields.
+     */
+    /* vhost_user_blk_connect/vhost_user_blk_disconnect */
+    bool connected;
+    /* vhost_user_blk_start/vhost_user_blk_stop */
+    bool started_vu;
+};
+
+#endif
diff --git a/include/hw/virtio/vhost-user-device.h b/include/hw/virtio/vhost-user-device.h
new file mode 100644 (file)
index 0000000..3ddf88a
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Vhost-user generic virtio device
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef QEMU_VHOST_USER_DEVICE_H
+#define QEMU_VHOST_USER_DEVICE_H
+
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+
+#define TYPE_VHOST_USER_BASE "vhost-user-base"
+
+OBJECT_DECLARE_TYPE(VHostUserBase, VHostUserBaseClass, VHOST_USER_BASE)
+
+struct VHostUserBase {
+    VirtIODevice parent;
+    /* Properties */
+    CharBackend chardev;
+    uint16_t virtio_id;
+    uint32_t num_vqs;
+    uint32_t config_size;
+    /* State tracking */
+    VhostUserState vhost_user;
+    struct vhost_virtqueue *vhost_vq;
+    struct vhost_dev vhost_dev;
+    GPtrArray *vqs;
+    bool connected;
+};
+
+    /* needed so we can use the base realize after specialisation
+       tweaks */
+struct VHostUserBaseClass {
+    /*< private >*/
+    VirtioDeviceClass parent_class;
+    /*< public >*/
+    DeviceRealize parent_realize;
+};
+
+/* shared for the benefit of the derived pci class */
+#define TYPE_VHOST_USER_DEVICE "vhost-user-device"
+
+#endif /* QEMU_VHOST_USER_DEVICE_H */
diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
new file mode 100644 (file)
index 0000000..94c3aaa
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Vhost-user filesystem virtio device
+ *
+ * Copyright 2018-2019 Red Hat, Inc.
+ *
+ * Authors:
+ *  Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef QEMU_VHOST_USER_FS_H
+#define QEMU_VHOST_USER_FS_H
+
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+#include "chardev/char-fe.h"
+#include "qom/object.h"
+
+#define TYPE_VHOST_USER_FS "vhost-user-fs-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserFS, VHOST_USER_FS)
+
+typedef struct {
+    CharBackend chardev;
+    char *tag;
+    uint16_t num_request_queues;
+    uint16_t queue_size;
+} VHostUserFSConf;
+
+struct VHostUserFS {
+    /*< private >*/
+    VirtIODevice parent;
+    VHostUserFSConf conf;
+    struct vhost_virtqueue *vhost_vqs;
+    struct vhost_dev vhost_dev;
+    VhostUserState vhost_user;
+    VirtQueue **req_vqs;
+    VirtQueue *hiprio_vq;
+    int32_t bootindex;
+
+    /*< public >*/
+};
+
+#endif /* QEMU_VHOST_USER_FS_H */
diff --git a/include/hw/virtio/vhost-user-gpio.h b/include/hw/virtio/vhost-user-gpio.h
new file mode 100644 (file)
index 0000000..a9d3f9b
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Vhost-user GPIO virtio device
+ *
+ * Copyright (c) 2021 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _QEMU_VHOST_USER_GPIO_H
+#define _QEMU_VHOST_USER_GPIO_H
+
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+#include "standard-headers/linux/virtio_gpio.h"
+#include "chardev/char-fe.h"
+
+#define TYPE_VHOST_USER_GPIO "vhost-user-gpio-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserGPIO, VHOST_USER_GPIO);
+
+struct VHostUserGPIO {
+    /*< private >*/
+    VirtIODevice parent_obj;
+    CharBackend chardev;
+    struct virtio_gpio_config config;
+    struct vhost_virtqueue *vhost_vqs;
+    struct vhost_dev vhost_dev;
+    VhostUserState vhost_user;
+    VirtQueue *command_vq;
+    VirtQueue *interrupt_vq;
+    /**
+     * There are at least two steps of initialization of the
+     * vhost-user device. The first is a "connect" step and
+     * second is a "start" step. Make a separation between
+     * those initialization phases by using two fields.
+     *
+     * @connected: see vu_gpio_connect()/vu_gpio_disconnect()
+     * @started_vu: see vu_gpio_start()/vu_gpio_stop()
+     */
+    bool connected;
+    bool started_vu;
+    /*< public >*/
+};
+
+#endif /* _QEMU_VHOST_USER_GPIO_H */
diff --git a/include/hw/virtio/vhost-user-i2c.h b/include/hw/virtio/vhost-user-i2c.h
new file mode 100644 (file)
index 0000000..0f7acd4
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * Vhost-user i2c virtio device
+ *
+ * Copyright (c) 2021 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef QEMU_VHOST_USER_I2C_H
+#define QEMU_VHOST_USER_I2C_H
+
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+
+#define TYPE_VHOST_USER_I2C "vhost-user-i2c-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserI2C, VHOST_USER_I2C)
+
+struct VHostUserI2C {
+    VirtIODevice parent;
+    CharBackend chardev;
+    struct vhost_virtqueue *vhost_vq;
+    struct vhost_dev vhost_dev;
+    VhostUserState vhost_user;
+    VirtQueue *vq;
+    bool connected;
+};
+
+/* Virtio Feature bits */
+#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST               0
+
+#endif /* QEMU_VHOST_USER_I2C_H */
diff --git a/include/hw/virtio/vhost-user-rng.h b/include/hw/virtio/vhost-user-rng.h
new file mode 100644 (file)
index 0000000..ddd9f01
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Vhost-user RNG virtio device
+ *
+ * Copyright (c) 2021 Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef QEMU_VHOST_USER_RNG_H
+#define QEMU_VHOST_USER_RNG_H
+
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+#include "chardev/char-fe.h"
+
+#define TYPE_VHOST_USER_RNG "vhost-user-rng"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserRNG, VHOST_USER_RNG)
+
+struct VHostUserRNG {
+    /*< private >*/
+    VirtIODevice parent;
+    CharBackend chardev;
+    struct vhost_virtqueue *vhost_vq;
+    struct vhost_dev vhost_dev;
+    VhostUserState vhost_user;
+    VirtQueue *req_vq;
+    bool connected;
+
+    /*< public >*/
+};
+
+#endif /* QEMU_VHOST_USER_RNG_H */
diff --git a/include/hw/virtio/vhost-user-scmi.h b/include/hw/virtio/vhost-user-scmi.h
new file mode 100644 (file)
index 0000000..c90db77
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * Vhost-user SCMI virtio device
+ *
+ * Copyright (c) 2023 Red Hat, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _QEMU_VHOST_USER_SCMI_H
+#define _QEMU_VHOST_USER_SCMI_H
+
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+
+#define TYPE_VHOST_USER_SCMI "vhost-user-scmi"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSCMI, VHOST_USER_SCMI);
+
+struct VHostUserSCMI {
+    VirtIODevice parent;
+    CharBackend chardev;
+    struct vhost_virtqueue *vhost_vqs;
+    struct vhost_dev vhost_dev;
+    VhostUserState vhost_user;
+    VirtQueue *cmd_vq;
+    VirtQueue *event_vq;
+    bool connected;
+    bool started_vu;
+};
+
+#endif /* _QEMU_VHOST_USER_SCMI_H */
diff --git a/include/hw/virtio/vhost-user-scsi.h b/include/hw/virtio/vhost-user-scsi.h
new file mode 100644 (file)
index 0000000..78fe616
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * vhost-user-scsi host device
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ *  Felipe Franciosi <felipe@nutanix.com>
+ *
+ * This file is largely based on "vhost-scsi.h" by:
+ *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_USER_SCSI_H
+#define VHOST_USER_SCSI_H
+
+#include "hw/virtio/virtio-scsi.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+#include "hw/virtio/vhost-scsi-common.h"
+#include "qom/object.h"
+
+#define TYPE_VHOST_USER_SCSI "vhost-user-scsi"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSCSI, VHOST_USER_SCSI)
+
+struct VHostUserSCSI {
+    VHostSCSICommon parent_obj;
+
+    /* Properties */
+    bool connected;
+    bool started_vu;
+
+    VhostUserState vhost_user;
+    struct vhost_virtqueue *vhost_vqs;
+};
+
+#endif /* VHOST_USER_SCSI_H */
diff --git a/include/hw/virtio/vhost-user-vsock.h b/include/hw/virtio/vhost-user-vsock.h
new file mode 100644 (file)
index 0000000..67aa46c
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Vhost-user vsock virtio device
+ *
+ * Copyright 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef QEMU_VHOST_USER_VSOCK_H
+#define QEMU_VHOST_USER_VSOCK_H
+
+#include "hw/virtio/vhost-vsock-common.h"
+#include "hw/virtio/vhost-user.h"
+#include "standard-headers/linux/virtio_vsock.h"
+#include "qom/object.h"
+
+#define TYPE_VHOST_USER_VSOCK "vhost-user-vsock-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserVSock, VHOST_USER_VSOCK)
+
+typedef struct {
+    CharBackend chardev;
+} VHostUserVSockConf;
+
+struct VHostUserVSock {
+    /*< private >*/
+    VHostVSockCommon parent;
+    VhostUserState vhost_user;
+    VHostUserVSockConf conf;
+    struct virtio_vsock_config vsockcfg;
+
+    /*< public >*/
+};
+
+#endif /* QEMU_VHOST_USER_VSOCK_H */
diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
new file mode 100644 (file)
index 0000000..d7c09ff
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2017-2018 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_VHOST_USER_H
+#define HW_VIRTIO_VHOST_USER_H
+
+#include "chardev/char-fe.h"
+#include "hw/virtio/virtio.h"
+
+enum VhostUserProtocolFeature {
+    VHOST_USER_PROTOCOL_F_MQ = 0,
+    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+    VHOST_USER_PROTOCOL_F_RARP = 2,
+    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
+    VHOST_USER_PROTOCOL_F_NET_MTU = 4,
+    VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5,
+    VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
+    VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
+    VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
+    VHOST_USER_PROTOCOL_F_CONFIG = 9,
+    VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10,
+    VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
+    VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
+    VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
+    VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
+    VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
+    VHOST_USER_PROTOCOL_F_STATUS = 16,
+    /* Feature 17 reserved for VHOST_USER_PROTOCOL_F_XEN_MMAP. */
+    VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 18,
+    VHOST_USER_PROTOCOL_F_DEVICE_STATE = 19,
+    VHOST_USER_PROTOCOL_F_MAX
+};
+
+/**
+ * VhostUserHostNotifier - notifier information for one queue
+ * @rcu: rcu_head for cleanup
+ * @mr: memory region of notifier
+ * @addr: current mapped address
+ * @unmap_addr: address to be un-mapped
+ * @idx: virtioqueue index
+ *
+ * The VhostUserHostNotifier entries are re-used. When an old mapping
+ * is to be released it is moved to @unmap_addr and @addr is replaced.
+ * Once the RCU process has completed the unmap @unmap_addr is
+ * cleared.
+ */
+typedef struct VhostUserHostNotifier {
+    struct rcu_head rcu;
+    MemoryRegion mr;
+    void *addr;
+    void *unmap_addr;
+    int idx;
+} VhostUserHostNotifier;
+
+/**
+ * VhostUserState - shared state for all vhost-user devices
+ * @chr: the character backend for the socket
+ * @notifiers: GPtrArray of @VhostUserHostnotifier
+ * @memory_slots:
+ */
+typedef struct VhostUserState {
+    CharBackend *chr;
+    GPtrArray *notifiers;
+    int memory_slots;
+    bool supports_config;
+} VhostUserState;
+
+/**
+ * vhost_user_init() - initialise shared vhost_user state
+ * @user: allocated area for storing shared state
+ * @chr: the chardev for the vhost socket
+ * @errp: error handle
+ *
+ * User can either directly g_new() space for the state or embed
+ * VhostUserState in their larger device structure and just point to
+ * it.
+ *
+ * Return: true on success, false on error while setting errp.
+ */
+bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp);
+
+/**
+ * vhost_user_cleanup() - cleanup state
+ * @user: ptr to use state
+ *
+ * Cleans up shared state and notifiers, callee is responsible for
+ * freeing the @VhostUserState memory itself.
+ */
+void vhost_user_cleanup(VhostUserState *user);
+
+/**
+ * vhost_user_async_close() - cleanup vhost-user post connection drop
+ * @d: DeviceState for the associated device (passed to callback)
+ * @chardev: the CharBackend associated with the connection
+ * @vhost: the common vhost device
+ * @cb: the user callback function to complete the clean-up
+ *
+ * This function is used to handle the shutdown of a vhost-user
+ * connection to a backend. We handle this centrally to make sure we
+ * do all the steps and handle potential races due to VM shutdowns.
+ * Once the connection is disabled we call a backhalf to ensure
+ */
+typedef void (*vu_async_close_fn)(DeviceState *cb);
+
+void vhost_user_async_close(DeviceState *d,
+                            CharBackend *chardev, struct vhost_dev *vhost,
+                            vu_async_close_fn cb,
+                            IOEventHandler *event_cb);
+
+#endif
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
new file mode 100644 (file)
index 0000000..5407d54
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * vhost-vdpa.h
+ *
+ * Copyright(c) 2017-2018 Intel Corporation.
+ * Copyright(c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_VIRTIO_VHOST_VDPA_H
+#define HW_VIRTIO_VHOST_VDPA_H
+
+#include <gmodule.h>
+
+#include "hw/virtio/vhost-iova-tree.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
+#include "hw/virtio/virtio.h"
+#include "standard-headers/linux/vhost_types.h"
+
+/*
+ * ASID dedicated to map guest's addresses.  If SVQ is disabled it maps GPA to
+ * qemu's IOVA.  If SVQ is enabled it maps also the SVQ vring here
+ */
+#define VHOST_VDPA_GUEST_PA_ASID 0
+
+typedef struct VhostVDPAHostNotifier {
+    MemoryRegion mr;
+    void *addr;
+} VhostVDPAHostNotifier;
+
+typedef struct vhost_vdpa {
+    int device_fd;
+    int index;
+    uint32_t msg_type;
+    bool iotlb_batch_begin_sent;
+    uint32_t address_space_id;
+    MemoryListener listener;
+    struct vhost_vdpa_iova_range iova_range;
+    uint64_t acked_features;
+    bool shadow_vqs_enabled;
+    /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
+    bool shadow_data;
+    /* Device suspended successfully */
+    bool suspended;
+    /* IOVA mapping used by the Shadow Virtqueue */
+    VhostIOVATree *iova_tree;
+    GPtrArray *shadow_vqs;
+    const VhostShadowVirtqueueOps *shadow_vq_ops;
+    void *shadow_vq_ops_opaque;
+    struct vhost_dev *dev;
+    Error *migration_blocker;
+    VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
+    QLIST_HEAD(, vdpa_iommu) iommu_list;
+    IOMMUNotifier n;
+} VhostVDPA;
+
+int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range);
+int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx);
+
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
+                       hwaddr size, void *vaddr, bool readonly);
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
+                         hwaddr size);
+
+typedef struct vdpa_iommu {
+    struct vhost_vdpa *dev;
+    IOMMUMemoryRegion *iommu_mr;
+    hwaddr iommu_offset;
+    IOMMUNotifier n;
+    QLIST_ENTRY(vdpa_iommu) iommu_next;
+} VDPAIOMMUState;
+
+
+#endif
diff --git a/include/hw/virtio/vhost-vsock-common.h b/include/hw/virtio/vhost-vsock-common.h
new file mode 100644 (file)
index 0000000..93c7821
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Parent class for vhost-vsock devices
+ *
+ * Copyright 2015-2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef QEMU_VHOST_VSOCK_COMMON_H
+#define QEMU_VHOST_VSOCK_COMMON_H
+
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/vhost.h"
+#include "qom/object.h"
+
+#define TYPE_VHOST_VSOCK_COMMON "vhost-vsock-common"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostVSockCommon, VHOST_VSOCK_COMMON)
+
+enum {
+    VHOST_VSOCK_SAVEVM_VERSION = 0,
+
+    VHOST_VSOCK_QUEUE_SIZE = 128,
+};
+
+struct VHostVSockCommon {
+    VirtIODevice parent;
+
+    struct vhost_virtqueue vhost_vqs[2];
+    struct vhost_dev vhost_dev;
+
+    VirtQueue *event_vq;
+    VirtQueue *recv_vq;
+    VirtQueue *trans_vq;
+
+    QEMUTimer *post_load_timer;
+
+    /* features */
+    OnOffAuto seqpacket;
+};
+
+int vhost_vsock_common_start(VirtIODevice *vdev);
+void vhost_vsock_common_stop(VirtIODevice *vdev);
+int vhost_vsock_common_pre_save(void *opaque);
+int vhost_vsock_common_post_load(void *opaque, int version_id);
+void vhost_vsock_common_realize(VirtIODevice *vdev);
+void vhost_vsock_common_unrealize(VirtIODevice *vdev);
+uint64_t vhost_vsock_common_get_features(VirtIODevice *vdev, uint64_t features,
+                                         Error **errp);
+
+#endif /* QEMU_VHOST_VSOCK_COMMON_H */
diff --git a/include/hw/virtio/vhost-vsock.h b/include/hw/virtio/vhost-vsock.h
new file mode 100644 (file)
index 0000000..84f4e72
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Vhost vsock virtio device
+ *
+ * Copyright 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef QEMU_VHOST_VSOCK_H
+#define QEMU_VHOST_VSOCK_H
+
+#include "hw/virtio/vhost-vsock-common.h"
+#include "qom/object.h"
+
+#define TYPE_VHOST_VSOCK "vhost-vsock-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostVSock, VHOST_VSOCK)
+
+typedef struct {
+    uint64_t guest_cid;
+    char *vhostfd;
+} VHostVSockConf;
+
+struct VHostVSock {
+    /*< private >*/
+    VHostVSockCommon parent;
+    VHostVSockConf conf;
+
+    /*< public >*/
+};
+
+#endif /* QEMU_VHOST_VSOCK_H */
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
new file mode 100644 (file)
index 0000000..0247778
--- /dev/null
@@ -0,0 +1,467 @@
+#ifndef VHOST_H
+#define VHOST_H
+
+#include "hw/virtio/vhost-backend.h"
+#include "hw/virtio/virtio.h"
+#include "exec/memory.h"
+
+#define VHOST_F_DEVICE_IOTLB 63
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+
+#define VU_REALIZE_CONN_RETRIES 3
+
+/* Generic structures common for any vhost based device. */
+
+struct vhost_inflight {
+    int fd;
+    void *addr;
+    uint64_t size;
+    uint64_t offset;
+    uint16_t queue_size;
+};
+
+struct vhost_virtqueue {
+    int kick;
+    int call;
+    void *desc;
+    void *avail;
+    void *used;
+    int num;
+    unsigned long long desc_phys;
+    unsigned desc_size;
+    unsigned long long avail_phys;
+    unsigned avail_size;
+    unsigned long long used_phys;
+    unsigned used_size;
+    EventNotifier masked_notifier;
+    EventNotifier error_notifier;
+    EventNotifier masked_config_notifier;
+    struct vhost_dev *dev;
+};
+
+typedef unsigned long vhost_log_chunk_t;
+#define VHOST_LOG_PAGE 0x1000
+#define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t))
+#define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS)
+#define VHOST_INVALID_FEATURE_BIT   (0xff)
+#define VHOST_QUEUE_NUM_CONFIG_INR 0
+
+struct vhost_log {
+    unsigned long long size;
+    int refcnt;
+    int fd;
+    vhost_log_chunk_t *log;
+};
+
+struct vhost_dev;
+struct vhost_iommu {
+    struct vhost_dev *hdev;
+    MemoryRegion *mr;
+    hwaddr iommu_offset;
+    IOMMUNotifier n;
+    QLIST_ENTRY(vhost_iommu) iommu_next;
+};
+
+typedef struct VhostDevConfigOps {
+    /* Vhost device config space changed callback
+     */
+    int (*vhost_dev_config_notifier)(struct vhost_dev *dev);
+} VhostDevConfigOps;
+
+struct vhost_memory;
+
+/**
+ * struct vhost_dev - common vhost_dev structure
+ * @vhost_ops: backend specific ops
+ * @config_ops: ops for config changes (see @vhost_dev_set_config_notifier)
+ */
+struct vhost_dev {
+    VirtIODevice *vdev;
+    MemoryListener memory_listener;
+    MemoryListener iommu_listener;
+    struct vhost_memory *mem;
+    int n_mem_sections;
+    MemoryRegionSection *mem_sections;
+    int n_tmp_sections;
+    MemoryRegionSection *tmp_sections;
+    struct vhost_virtqueue *vqs;
+    unsigned int nvqs;
+    /* the first virtqueue which would be used by this vhost dev */
+    int vq_index;
+    /* one past the last vq index for the virtio device (not vhost) */
+    int vq_index_end;
+    /* if non-zero, minimum required value for max_queues */
+    int num_queues;
+    /**
+     * vhost feature handling requires matching the feature set
+     * offered by a backend which may be a subset of the total
+     * features eventually offered to the guest.
+     *
+     * @features: available features provided by the backend
+     * @acked_features: final negotiated features with front-end driver
+     *
+     * @backend_features: this is used in a couple of places to either
+     * store VHOST_USER_F_PROTOCOL_FEATURES to apply to
+     * VHOST_USER_SET_FEATURES or VHOST_NET_F_VIRTIO_NET_HDR. Its
+     * future use should be discouraged and the variable retired as
+     * its easy to confuse with the VirtIO backend_features.
+     */
+    uint64_t features;
+    uint64_t acked_features;
+    uint64_t backend_features;
+
+    /**
+     * @protocol_features: is the vhost-user only feature set by
+     * VHOST_USER_SET_PROTOCOL_FEATURES. Protocol features are only
+     * negotiated if VHOST_USER_F_PROTOCOL_FEATURES has been offered
+     * by the backend (see @features).
+     */
+    uint64_t protocol_features;
+
+    uint64_t max_queues;
+    uint64_t backend_cap;
+    /* @started: is the vhost device started? */
+    bool started;
+    bool log_enabled;
+    uint64_t log_size;
+    Error *migration_blocker;
+    const VhostOps *vhost_ops;
+    void *opaque;
+    struct vhost_log *log;
+    QLIST_ENTRY(vhost_dev) entry;
+    QLIST_HEAD(, vhost_iommu) iommu_list;
+    IOMMUNotifier n;
+    const VhostDevConfigOps *config_ops;
+};
+
+extern const VhostOps kernel_ops;
+extern const VhostOps user_ops;
+extern const VhostOps vdpa_ops;
+
+struct vhost_net {
+    struct vhost_dev dev;
+    struct vhost_virtqueue vqs[2];
+    int backend;
+    NetClientState *nc;
+};
+
+/**
+ * vhost_dev_init() - initialise the vhost interface
+ * @hdev: the common vhost_dev structure
+ * @opaque: opaque ptr passed to backend (vhost/vhost-user/vdpa)
+ * @backend_type: type of backend
+ * @busyloop_timeout: timeout for polling virtqueue
+ * @errp: error handle
+ *
+ * The initialisation of the vhost device will trigger the
+ * initialisation of the backend and potentially capability
+ * negotiation of backend interface. Configuration of the VirtIO
+ * itself won't happen until the interface is started.
+ *
+ * Return: 0 on success, non-zero on error while setting errp.
+ */
+int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
+                   VhostBackendType backend_type,
+                   uint32_t busyloop_timeout, Error **errp);
+
+/**
+ * vhost_dev_cleanup() - tear down and cleanup vhost interface
+ * @hdev: the common vhost_dev structure
+ */
+void vhost_dev_cleanup(struct vhost_dev *hdev);
+
+/**
+ * vhost_dev_enable_notifiers() - enable event notifiers
+ * @hdev: common vhost_dev structure
+ * @vdev: the VirtIODevice structure
+ *
+ * Enable notifications directly to the vhost device rather than being
+ * triggered by QEMU itself. Notifications should be enabled before
+ * the vhost device is started via @vhost_dev_start.
+ *
+ * Return: 0 on success, < 0 on error.
+ */
+int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
+
+/**
+ * vhost_dev_disable_notifiers - disable event notifications
+ * @hdev: common vhost_dev structure
+ * @vdev: the VirtIODevice structure
+ *
+ * Disable direct notifications to vhost device.
+ */
+void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
+bool vhost_config_pending(struct vhost_dev *hdev);
+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask);
+
+/**
+ * vhost_dev_is_started() - report status of vhost device
+ * @hdev: common vhost_dev structure
+ *
+ * Return the started status of the vhost device
+ */
+static inline bool vhost_dev_is_started(struct vhost_dev *hdev)
+{
+    return hdev->started;
+}
+
+/**
+ * vhost_dev_start() - start the vhost device
+ * @hdev: common vhost_dev structure
+ * @vdev: the VirtIODevice structure
+ * @vrings: true to have vrings enabled in this call
+ *
+ * Starts the vhost device. From this point VirtIO feature negotiation
+ * can start and the device can start processing VirtIO transactions.
+ *
+ * Return: 0 on success, < 0 on error.
+ */
+int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
+
+/**
+ * vhost_dev_stop() - stop the vhost device
+ * @hdev: common vhost_dev structure
+ * @vdev: the VirtIODevice structure
+ * @vrings: true to have vrings disabled in this call
+ *
+ * Stop the vhost device. After the device is stopped the notifiers
+ * can be disabled (@vhost_dev_disable_notifiers) and the device can
+ * be torn down (@vhost_dev_cleanup).
+ */
+void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
+
+/**
+ * DOC: vhost device configuration handling
+ *
+ * The VirtIO device configuration space is used for rarely changing
+ * or initialisation time parameters. The configuration can be updated
+ * by either the guest driver or the device itself. If the device can
+ * change the configuration over time the vhost handler should
+ * register a @VhostDevConfigOps structure with
+ * @vhost_dev_set_config_notifier so the guest can be notified. Some
+ * devices register a handler anyway and will signal an error if an
+ * unexpected config change happens.
+ */
+
+/**
+ * vhost_dev_get_config() - fetch device configuration
+ * @hdev: common vhost_dev_structure
+ * @config: pointer to device appropriate config structure
+ * @config_len: size of device appropriate config structure
+ *
+ * Return: 0 on success, < 0 on error while setting errp
+ */
+int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
+                         uint32_t config_len, Error **errp);
+
+/**
+ * vhost_dev_set_config() - set device configuration
+ * @hdev: common vhost_dev_structure
+ * @data: pointer to data to set
+ * @offset: offset into configuration space
+ * @size: length of set
+ * @flags: @VhostSetConfigType flags
+ *
+ * By use of @offset/@size a subset of the configuration space can be
+ * written to. The @flags are used to indicate if it is a normal
+ * transaction or related to migration.
+ *
+ * Return: 0 on success, non-zero on error
+ */
+int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data,
+                         uint32_t offset, uint32_t size, uint32_t flags);
+
+/**
+ * vhost_dev_set_config_notifier() - register VhostDevConfigOps
+ * @hdev: common vhost_dev_structure
+ * @ops: notifier ops
+ *
+ * If the device is expected to change configuration a notifier can be
+ * setup to handle the case.
+ */
+void vhost_dev_set_config_notifier(struct vhost_dev *dev,
+                                   const VhostDevConfigOps *ops);
+
+
+/* Test and clear masked event pending status.
+ * Should be called after unmask to avoid losing events.
+ */
+bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n);
+
+/* Mask/unmask events from this vq.
+ */
+void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
+                          bool mask);
+
+/**
+ * vhost_get_features() - return a sanitised set of feature bits
+ * @hdev: common vhost_dev structure
+ * @feature_bits: pointer to terminated table of feature bits
+ * @features: original feature set
+ *
+ * This returns a set of features bits that is an intersection of what
+ * is supported by the vhost backend (hdev->features), the supported
+ * feature_bits and the requested feature set.
+ */
+uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
+                            uint64_t features);
+
+/**
+ * vhost_ack_features() - set vhost acked_features
+ * @hdev: common vhost_dev structure
+ * @feature_bits: pointer to terminated table of feature bits
+ * @features: requested feature set
+ *
+ * This sets the internal hdev->acked_features to the intersection of
+ * the backends advertised features and the supported feature_bits.
+ */
+void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
+                        uint64_t features);
+unsigned int vhost_get_max_memslots(void);
+unsigned int vhost_get_free_memslots(void);
+
+int vhost_net_set_backend(struct vhost_dev *hdev,
+                          struct vhost_vring_file *file);
+
+void vhost_toggle_device_iotlb(VirtIODevice *vdev);
+int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
+
+int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev,
+                          struct vhost_virtqueue *vq, unsigned idx);
+void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev,
+                          struct vhost_virtqueue *vq, unsigned idx);
+
+void vhost_dev_reset_inflight(struct vhost_inflight *inflight);
+void vhost_dev_free_inflight(struct vhost_inflight *inflight);
+void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f);
+int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f);
+int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev);
+int vhost_dev_set_inflight(struct vhost_dev *dev,
+                           struct vhost_inflight *inflight);
+int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
+                           struct vhost_inflight *inflight);
+bool vhost_dev_has_iommu(struct vhost_dev *dev);
+
+#ifdef CONFIG_VHOST
+int vhost_reset_device(struct vhost_dev *hdev);
+#else
+static inline int vhost_reset_device(struct vhost_dev *hdev)
+{
+    return -ENOSYS;
+}
+#endif /* CONFIG_VHOST */
+
+/**
+ * vhost_supports_device_state(): Checks whether the back-end supports
+ * transferring internal device state for the purpose of migration.
+ * Support for this feature is required for vhost_set_device_state_fd()
+ * and vhost_check_device_state().
+ *
+ * @dev: The vhost device
+ *
+ * Returns true if the device supports these commands, and false if it
+ * does not.
+ */
+bool vhost_supports_device_state(struct vhost_dev *dev);
+
+/**
+ * vhost_set_device_state_fd(): Begin transfer of internal state from/to
+ * the back-end for the purpose of migration.  Data is to be transferred
+ * over a pipe according to @direction and @phase.  The sending end must
+ * only write to the pipe, and the receiving end must only read from it.
+ * Once the sending end is done, it closes its FD.  The receiving end
+ * must take this as the end-of-transfer signal and close its FD, too.
+ *
+ * @fd is the back-end's end of the pipe: The write FD for SAVE, and the
+ * read FD for LOAD.  This function transfers ownership of @fd to the
+ * back-end, i.e. closes it in the front-end.
+ *
+ * The back-end may optionally reply with an FD of its own, if this
+ * improves efficiency on its end.  In this case, the returned FD is
+ * stored in *reply_fd.  The back-end will discard the FD sent to it,
+ * and the front-end must use *reply_fd for transferring state to/from
+ * the back-end.
+ *
+ * @dev: The vhost device
+ * @direction: The direction in which the state is to be transferred.
+ *             For outgoing migrations, this is SAVE, and data is read
+ *             from the back-end and stored by the front-end in the
+ *             migration stream.
+ *             For incoming migrations, this is LOAD, and data is read
+ *             by the front-end from the migration stream and sent to
+ *             the back-end to restore the saved state.
+ * @phase: Which migration phase we are in.  Currently, there is only
+ *         STOPPED (device and all vrings are stopped), in the future,
+ *         more phases such as PRE_COPY or POST_COPY may be added.
+ * @fd: Back-end's end of the pipe through which to transfer state; note
+ *      that ownership is transferred to the back-end, so this function
+ *      closes @fd in the front-end.
+ * @reply_fd: If the back-end wishes to use a different pipe for state
+ *            transfer, this will contain an FD for the front-end to
+ *            use.  Otherwise, -1 is stored here.
+ * @errp: Potential error description
+ *
+ * Returns 0 on success, and -errno on failure.
+ */
+int vhost_set_device_state_fd(struct vhost_dev *dev,
+                              VhostDeviceStateDirection direction,
+                              VhostDeviceStatePhase phase,
+                              int fd,
+                              int *reply_fd,
+                              Error **errp);
+
+/**
+ * vhost_set_device_state_fd(): After transferring state from/to the
+ * back-end via vhost_set_device_state_fd(), i.e. once the sending end
+ * has closed the pipe, inquire the back-end to report any potential
+ * errors that have occurred on its side.  This allows to sense errors
+ * like:
+ * - During outgoing migration, when the source side had already started
+ *   to produce its state, something went wrong and it failed to finish
+ * - During incoming migration, when the received state is somehow
+ *   invalid and cannot be processed by the back-end
+ *
+ * @dev: The vhost device
+ * @errp: Potential error description
+ *
+ * Returns 0 when the back-end reports successful state transfer and
+ * processing, and -errno when an error occurred somewhere.
+ */
+int vhost_check_device_state(struct vhost_dev *dev, Error **errp);
+
+/**
+ * vhost_save_backend_state(): High-level function to receive a vhost
+ * back-end's state, and save it in @f.  Uses
+ * `vhost_set_device_state_fd()` to get the data from the back-end, and
+ * stores it in consecutive chunks that are each prefixed by their
+ * respective length (be32).  The end is marked by a 0-length chunk.
+ *
+ * Must only be called while the device and all its vrings are stopped
+ * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`).
+ *
+ * @dev: The vhost device from which to save the state
+ * @f: Migration stream in which to save the state
+ * @errp: Potential error message
+ *
+ * Returns 0 on success, and -errno otherwise.
+ */
+int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp);
+
+/**
+ * vhost_load_backend_state(): High-level function to load a vhost
+ * back-end's state from @f, and send it over to the back-end.  Reads
+ * the data from @f in the format used by `vhost_save_state()`, and uses
+ * `vhost_set_device_state_fd()` to transfer it to the back-end.
+ *
+ * Must only be called while the device and all its vrings are stopped
+ * (`VHOST_TRANSFER_STATE_PHASE_STOPPED`).
+ *
+ * @dev: The vhost device to which to send the state
+ * @f: Migration stream from which to load the state
+ * @errp: Potential error message
+ *
+ * Returns 0 on success, and -errno otherwise.
+ */
+int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp);
+
+#endif
diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
new file mode 100644 (file)
index 0000000..07aae69
--- /dev/null
@@ -0,0 +1,243 @@
+/*
+ * Virtio Accessor Support: In case your target can change endian.
+ *
+ * Copyright IBM, Corp. 2013
+ *
+ * Authors:
+ *  Rusty Russell   <rusty@au.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#ifndef QEMU_VIRTIO_ACCESS_H
+#define QEMU_VIRTIO_ACCESS_H
+
+#include "exec/hwaddr.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-bus.h"
+
+#if defined(TARGET_PPC64) || defined(TARGET_ARM)
+#define LEGACY_VIRTIO_IS_BIENDIAN 1
+#endif
+
+static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
+{
+#if defined(LEGACY_VIRTIO_IS_BIENDIAN)
+    return virtio_is_big_endian(vdev);
+#elif TARGET_BIG_ENDIAN
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        /* Devices conforming to VIRTIO 1.0 or later are always LE. */
+        return false;
+    }
+    return true;
+#else
+    return false;
+#endif
+}
+
+static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa)
+{
+    AddressSpace *dma_as = vdev->dma_as;
+
+    if (virtio_access_is_big_endian(vdev)) {
+        return lduw_be_phys(dma_as, pa);
+    }
+    return lduw_le_phys(dma_as, pa);
+}
+
+static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa)
+{
+    AddressSpace *dma_as = vdev->dma_as;
+
+    if (virtio_access_is_big_endian(vdev)) {
+        return ldl_be_phys(dma_as, pa);
+    }
+    return ldl_le_phys(dma_as, pa);
+}
+
+static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa)
+{
+    AddressSpace *dma_as = vdev->dma_as;
+
+    if (virtio_access_is_big_endian(vdev)) {
+        return ldq_be_phys(dma_as, pa);
+    }
+    return ldq_le_phys(dma_as, pa);
+}
+
+static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa,
+                                   uint16_t value)
+{
+    AddressSpace *dma_as = vdev->dma_as;
+
+    if (virtio_access_is_big_endian(vdev)) {
+        stw_be_phys(dma_as, pa, value);
+    } else {
+        stw_le_phys(dma_as, pa, value);
+    }
+}
+
+static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa,
+                                   uint32_t value)
+{
+    AddressSpace *dma_as = vdev->dma_as;
+
+    if (virtio_access_is_big_endian(vdev)) {
+        stl_be_phys(dma_as, pa, value);
+    } else {
+        stl_le_phys(dma_as, pa, value);
+    }
+}
+
+static inline void virtio_stw_p(VirtIODevice *vdev, void *ptr, uint16_t v)
+{
+    if (virtio_access_is_big_endian(vdev)) {
+        stw_be_p(ptr, v);
+    } else {
+        stw_le_p(ptr, v);
+    }
+}
+
+static inline void virtio_stl_p(VirtIODevice *vdev, void *ptr, uint32_t v)
+{
+    if (virtio_access_is_big_endian(vdev)) {
+        stl_be_p(ptr, v);
+    } else {
+        stl_le_p(ptr, v);
+    }
+}
+
+static inline void virtio_stq_p(VirtIODevice *vdev, void *ptr, uint64_t v)
+{
+    if (virtio_access_is_big_endian(vdev)) {
+        stq_be_p(ptr, v);
+    } else {
+        stq_le_p(ptr, v);
+    }
+}
+
+static inline int virtio_lduw_p(VirtIODevice *vdev, const void *ptr)
+{
+    if (virtio_access_is_big_endian(vdev)) {
+        return lduw_be_p(ptr);
+    } else {
+        return lduw_le_p(ptr);
+    }
+}
+
+static inline int virtio_ldl_p(VirtIODevice *vdev, const void *ptr)
+{
+    if (virtio_access_is_big_endian(vdev)) {
+        return ldl_be_p(ptr);
+    } else {
+        return ldl_le_p(ptr);
+    }
+}
+
+static inline uint64_t virtio_ldq_p(VirtIODevice *vdev, const void *ptr)
+{
+    if (virtio_access_is_big_endian(vdev)) {
+        return ldq_be_p(ptr);
+    } else {
+        return ldq_le_p(ptr);
+    }
+}
+
+static inline uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s)
+{
+#if HOST_BIG_ENDIAN
+    return virtio_access_is_big_endian(vdev) ? s : bswap16(s);
+#else
+    return virtio_access_is_big_endian(vdev) ? bswap16(s) : s;
+#endif
+}
+
+static inline uint16_t virtio_lduw_phys_cached(VirtIODevice *vdev,
+                                               MemoryRegionCache *cache,
+                                               hwaddr pa)
+{
+    if (virtio_access_is_big_endian(vdev)) {
+        return lduw_be_phys_cached(cache, pa);
+    }
+    return lduw_le_phys_cached(cache, pa);
+}
+
+static inline uint32_t virtio_ldl_phys_cached(VirtIODevice *vdev,
+                                              MemoryRegionCache *cache,
+                                              hwaddr pa)
+{
+    if (virtio_access_is_big_endian(vdev)) {
+        return ldl_be_phys_cached(cache, pa);
+    }
+    return ldl_le_phys_cached(cache, pa);
+}
+
+static inline uint64_t virtio_ldq_phys_cached(VirtIODevice *vdev,
+                                              MemoryRegionCache *cache,
+                                              hwaddr pa)
+{
+    if (virtio_access_is_big_endian(vdev)) {
+        return ldq_be_phys_cached(cache, pa);
+    }
+    return ldq_le_phys_cached(cache, pa);
+}
+
+static inline void virtio_stw_phys_cached(VirtIODevice *vdev,
+                                          MemoryRegionCache *cache,
+                                          hwaddr pa, uint16_t value)
+{
+    if (virtio_access_is_big_endian(vdev)) {
+        stw_be_phys_cached(cache, pa, value);
+    } else {
+        stw_le_phys_cached(cache, pa, value);
+    }
+}
+
+static inline void virtio_stl_phys_cached(VirtIODevice *vdev,
+                                          MemoryRegionCache *cache,
+                                          hwaddr pa, uint32_t value)
+{
+    if (virtio_access_is_big_endian(vdev)) {
+        stl_be_phys_cached(cache, pa, value);
+    } else {
+        stl_le_phys_cached(cache, pa, value);
+    }
+}
+
+static inline void virtio_tswap16s(VirtIODevice *vdev, uint16_t *s)
+{
+    *s = virtio_tswap16(vdev, *s);
+}
+
+static inline uint32_t virtio_tswap32(VirtIODevice *vdev, uint32_t s)
+{
+#if HOST_BIG_ENDIAN
+    return virtio_access_is_big_endian(vdev) ? s : bswap32(s);
+#else
+    return virtio_access_is_big_endian(vdev) ? bswap32(s) : s;
+#endif
+}
+
+static inline void virtio_tswap32s(VirtIODevice *vdev, uint32_t *s)
+{
+    *s = virtio_tswap32(vdev, *s);
+}
+
+static inline uint64_t virtio_tswap64(VirtIODevice *vdev, uint64_t s)
+{
+#if HOST_BIG_ENDIAN
+    return virtio_access_is_big_endian(vdev) ? s : bswap64(s);
+#else
+    return virtio_access_is_big_endian(vdev) ? bswap64(s) : s;
+#endif
+}
+
+static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s)
+{
+    *s = virtio_tswap64(vdev, *s);
+}
+#endif /* QEMU_VIRTIO_ACCESS_H */
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
new file mode 100644 (file)
index 0000000..5139cf8
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Virtio Support
+ *
+ * Copyright IBM, Corp. 2007-2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Rusty Russell     <rusty@rustcorp.com.au>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_VIRTIO_BALLOON_H
+#define QEMU_VIRTIO_BALLOON_H
+
+#include "standard-headers/linux/virtio_balloon.h"
+#include "hw/virtio/virtio.h"
+#include "sysemu/iothread.h"
+#include "qom/object.h"
+
+#define TYPE_VIRTIO_BALLOON "virtio-balloon-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOBalloon, VIRTIO_BALLOON)
+
+#define VIRTIO_BALLOON_FREE_PAGE_HINT_CMD_ID_MIN 0x80000000
+
+typedef struct virtio_balloon_stat VirtIOBalloonStat;
+
+typedef struct virtio_balloon_stat_modern {
+       uint16_t tag;
+       uint8_t reserved[6];
+       uint64_t val;
+} VirtIOBalloonStatModern;
+
+enum virtio_balloon_free_page_hint_status {
+    FREE_PAGE_HINT_S_STOP = 0,
+    FREE_PAGE_HINT_S_REQUESTED = 1,
+    FREE_PAGE_HINT_S_START = 2,
+    FREE_PAGE_HINT_S_DONE = 3,
+};
+
+struct VirtIOBalloon {
+    VirtIODevice parent_obj;
+    VirtQueue *ivq, *dvq, *svq, *free_page_vq, *reporting_vq;
+    uint32_t free_page_hint_status;
+    uint32_t num_pages;
+    uint32_t actual;
+    uint32_t free_page_hint_cmd_id;
+    uint64_t stats[VIRTIO_BALLOON_S_NR];
+    VirtQueueElement *stats_vq_elem;
+    size_t stats_vq_offset;
+    QEMUTimer *stats_timer;
+    IOThread *iothread;
+    QEMUBH *free_page_bh;
+    /*
+     * Lock to synchronize threads to access the free page reporting related
+     * fields (e.g. free_page_hint_status).
+     */
+    QemuMutex free_page_lock;
+    QemuCond  free_page_cond;
+    /*
+     * Set to block iothread to continue reading free page hints as the VM is
+     * stopped.
+     */
+    bool block_iothread;
+    NotifierWithReturn free_page_hint_notify;
+    int64_t stats_last_update;
+    int64_t stats_poll_interval;
+    uint32_t host_features;
+
+    bool qemu_4_0_config_size;
+    uint32_t poison_val;
+};
+
+#endif
diff --git a/include/hw/virtio/virtio-blk-common.h b/include/hw/virtio/virtio-blk-common.h
new file mode 100644 (file)
index 0000000..31daada
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ * Virtio Block Device common helpers
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef VIRTIO_BLK_COMMON_H
+#define VIRTIO_BLK_COMMON_H
+
+#include "hw/virtio/virtio.h"
+
+extern const VirtIOConfigSizeParams virtio_blk_cfg_size_params;
+
+#endif
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
new file mode 100644 (file)
index 0000000..dafec43
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ * Virtio Block Device
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_VIRTIO_BLK_H
+#define QEMU_VIRTIO_BLK_H
+
+#include "standard-headers/linux/virtio_blk.h"
+#include "hw/virtio/virtio.h"
+#include "hw/block/block.h"
+#include "sysemu/iothread.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/block-ram-registrar.h"
+#include "qom/object.h"
+
+#define TYPE_VIRTIO_BLK "virtio-blk-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOBlock, VIRTIO_BLK)
+
+/* This is the last element of the write scatter-gather list */
+struct virtio_blk_inhdr
+{
+    unsigned char status;
+};
+
+#define VIRTIO_BLK_AUTO_NUM_QUEUES UINT16_MAX
+
+struct VirtIOBlkConf
+{
+    BlockConf conf;
+    IOThread *iothread;
+    char *serial;
+    uint32_t request_merging;
+    uint16_t num_queues;
+    uint16_t queue_size;
+    bool seg_max_adjust;
+    bool report_discard_granularity;
+    uint32_t max_discard_sectors;
+    uint32_t max_write_zeroes_sectors;
+    bool x_enable_wce_if_config_wce;
+};
+
+struct VirtIOBlockDataPlane;
+
+struct VirtIOBlockReq;
+struct VirtIOBlock {
+    VirtIODevice parent_obj;
+    BlockBackend *blk;
+    void *rq;
+    VirtIOBlkConf conf;
+    unsigned short sector_mask;
+    bool original_wce;
+    VMChangeStateEntry *change;
+    bool dataplane_disabled;
+    bool dataplane_started;
+    struct VirtIOBlockDataPlane *dataplane;
+    uint64_t host_features;
+    size_t config_size;
+    BlockRAMRegistrar blk_ram_registrar;
+};
+
+typedef struct VirtIOBlockReq {
+    VirtQueueElement elem;
+    int64_t sector_num;
+    VirtIOBlock *dev;
+    VirtQueue *vq;
+    IOVDiscardUndo inhdr_undo;
+    IOVDiscardUndo outhdr_undo;
+    struct virtio_blk_inhdr *in;
+    struct virtio_blk_outhdr out;
+    QEMUIOVector qiov;
+    size_t in_len;
+    struct VirtIOBlockReq *next;
+    struct VirtIOBlockReq *mr_next;
+    BlockAcctCookie acct;
+} VirtIOBlockReq;
+
+#define VIRTIO_BLK_MAX_MERGE_REQS 32
+
+typedef struct MultiReqBuffer {
+    VirtIOBlockReq *reqs[VIRTIO_BLK_MAX_MERGE_REQS];
+    unsigned int num_reqs;
+    bool is_write;
+} MultiReqBuffer;
+
+void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq);
+
+#endif
diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
new file mode 100644 (file)
index 0000000..7ab8c9d
--- /dev/null
@@ -0,0 +1,160 @@
+/*
+ * VirtioBus
+ *
+ *  Copyright (C) 2012 : GreenSocs Ltd
+ *      http://www.greensocs.com/ , email: info@greensocs.com
+ *
+ *  Developed by :
+ *  Frederic Konrad   <fred.konrad@greensocs.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef VIRTIO_BUS_H
+#define VIRTIO_BUS_H
+
+#include "hw/qdev-core.h"
+#include "hw/virtio/virtio.h"
+#include "qom/object.h"
+
+#define TYPE_VIRTIO_BUS "virtio-bus"
+typedef struct VirtioBusClass VirtioBusClass;
+typedef struct VirtioBusState VirtioBusState;
+DECLARE_OBJ_CHECKERS(VirtioBusState, VirtioBusClass,
+                     VIRTIO_BUS, TYPE_VIRTIO_BUS)
+
+
+struct VirtioBusClass {
+    /* This is what a VirtioBus must implement */
+    BusClass parent;
+    void (*notify)(DeviceState *d, uint16_t vector);
+    void (*save_config)(DeviceState *d, QEMUFile *f);
+    void (*save_queue)(DeviceState *d, int n, QEMUFile *f);
+    void (*save_extra_state)(DeviceState *d, QEMUFile *f);
+    int (*load_config)(DeviceState *d, QEMUFile *f);
+    int (*load_queue)(DeviceState *d, int n, QEMUFile *f);
+    int (*load_done)(DeviceState *d, QEMUFile *f);
+    int (*load_extra_state)(DeviceState *d, QEMUFile *f);
+    bool (*has_extra_state)(DeviceState *d);
+    bool (*query_guest_notifiers)(DeviceState *d);
+    int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assign);
+    int (*set_host_notifier_mr)(DeviceState *d, int n,
+                                MemoryRegion *mr, bool assign);
+    void (*vmstate_change)(DeviceState *d, bool running);
+    /*
+     * Expose the features the transport layer supports before
+     * the negotiation takes place.
+     */
+    void (*pre_plugged)(DeviceState *d, Error **errp);
+    /*
+     * transport independent init function.
+     * This is called by virtio-bus just after the device is plugged.
+     */
+    void (*device_plugged)(DeviceState *d, Error **errp);
+    /*
+     * transport independent exit function.
+     * This is called by virtio-bus just before the device is unplugged.
+     */
+    void (*device_unplugged)(DeviceState *d);
+    int (*query_nvectors)(DeviceState *d);
+    /*
+     * ioeventfd handling: if the transport implements ioeventfd_assign,
+     * it must implement ioeventfd_enabled as well.
+     */
+    /* Returns true if the ioeventfd is enabled for the device. */
+    bool (*ioeventfd_enabled)(DeviceState *d);
+    /*
+     * Assigns/deassigns the ioeventfd backing for the transport on
+     * the device for queue number n. Returns an error value on
+     * failure.
+     */
+    int (*ioeventfd_assign)(DeviceState *d, EventNotifier *notifier,
+                            int n, bool assign);
+    /*
+     * Whether queue number n is enabled.
+     */
+    bool (*queue_enabled)(DeviceState *d, int n);
+    /*
+     * Does the transport have variable vring alignment?
+     * (ie can it ever call virtio_queue_set_align()?)
+     * Note that changing this will break migration for this transport.
+     */
+    bool has_variable_vring_alignment;
+    AddressSpace *(*get_dma_as)(DeviceState *d);
+    bool (*iommu_enabled)(DeviceState *d);
+};
+
+struct VirtioBusState {
+    BusState parent_obj;
+
+    /*
+     * Set if ioeventfd has been started.
+     */
+    bool ioeventfd_started;
+
+    /*
+     * Set if ioeventfd has been grabbed by vhost.  When ioeventfd
+     * is grabbed by vhost, we track its started/stopped state (which
+     * depends in turn on the virtio status register), but do not
+     * register a handler for the ioeventfd.  When ioeventfd is
+     * released, if ioeventfd_started is true we finally register
+     * the handler so that QEMU's device model can use ioeventfd.
+     */
+    int ioeventfd_grabbed;
+};
+
+void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp);
+void virtio_bus_reset(VirtioBusState *bus);
+void virtio_bus_device_unplugged(VirtIODevice *bus);
+/* Get the device id of the plugged device. */
+uint16_t virtio_bus_get_vdev_id(VirtioBusState *bus);
+/* Get the config_len field of the plugged device. */
+size_t virtio_bus_get_vdev_config_len(VirtioBusState *bus);
+/* Get bad features of the plugged device. */
+uint32_t virtio_bus_get_vdev_bad_features(VirtioBusState *bus);
+/* Get config of the plugged device. */
+void virtio_bus_get_vdev_config(VirtioBusState *bus, uint8_t *config);
+/* Set config of the plugged device. */
+void virtio_bus_set_vdev_config(VirtioBusState *bus, uint8_t *config);
+
+static inline VirtIODevice *virtio_bus_get_device(VirtioBusState *bus)
+{
+    BusState *qbus = &bus->parent_obj;
+    BusChild *kid = QTAILQ_FIRST(&qbus->children);
+    DeviceState *qdev = kid ? kid->child : NULL;
+
+    /* This is used on the data path, the cast is guaranteed
+     * to succeed by the qdev machinery.
+     */
+    return (VirtIODevice *)qdev;
+}
+
+/* Return whether the proxy allows ioeventfd.  */
+bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus);
+/* Start the ioeventfd. */
+int virtio_bus_start_ioeventfd(VirtioBusState *bus);
+/* Stop the ioeventfd. */
+void virtio_bus_stop_ioeventfd(VirtioBusState *bus);
+/* Tell the bus that vhost is grabbing the ioeventfd. */
+int virtio_bus_grab_ioeventfd(VirtioBusState *bus);
+/* bus that vhost is not using the ioeventfd anymore. */
+void virtio_bus_release_ioeventfd(VirtioBusState *bus);
+/* Switch from/to the generic ioeventfd handler */
+int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign);
+/* Tell the bus that the ioeventfd handler is no longer required. */
+void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n);
+/* Whether the IOMMU is enabled for this device */
+bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev);
+#endif /* VIRTIO_BUS_H */
diff --git a/include/hw/virtio/virtio-crypto.h b/include/hw/virtio/virtio-crypto.h
new file mode 100644 (file)
index 0000000..348749f
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Virtio crypto Support
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef QEMU_VIRTIO_CRYPTO_H
+#define QEMU_VIRTIO_CRYPTO_H
+
+#include "standard-headers/linux/virtio_crypto.h"
+#include "hw/virtio/virtio.h"
+#include "sysemu/iothread.h"
+#include "sysemu/cryptodev.h"
+#include "qom/object.h"
+
+
+#define DEBUG_VIRTIO_CRYPTO 0
+
+#define DPRINTF(fmt, ...) \
+do { \
+    if (DEBUG_VIRTIO_CRYPTO) { \
+        fprintf(stderr, "virtio_crypto: " fmt, ##__VA_ARGS__); \
+    } \
+} while (0)
+
+
+#define TYPE_VIRTIO_CRYPTO "virtio-crypto-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOCrypto, VIRTIO_CRYPTO)
+#define VIRTIO_CRYPTO_GET_PARENT_CLASS(obj) \
+        OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_CRYPTO)
+
+
+typedef struct VirtIOCryptoConf {
+    CryptoDevBackend *cryptodev;
+
+    /* Supported service mask */
+    uint32_t crypto_services;
+
+    /* Detailed algorithms mask */
+    uint32_t cipher_algo_l;
+    uint32_t cipher_algo_h;
+    uint32_t hash_algo;
+    uint32_t mac_algo_l;
+    uint32_t mac_algo_h;
+    uint32_t aead_algo;
+    uint32_t akcipher_algo;
+
+    /* Maximum length of cipher key */
+    uint32_t max_cipher_key_len;
+    /* Maximum length of authenticated key */
+    uint32_t max_auth_key_len;
+    /* Maximum size of each crypto request's content */
+    uint64_t max_size;
+} VirtIOCryptoConf;
+
+struct VirtIOCrypto;
+
+typedef struct VirtIOCryptoReq {
+    VirtQueueElement elem;
+    /* flags of operation, such as type of algorithm */
+    uint32_t flags;
+    struct virtio_crypto_inhdr *in;
+    struct iovec *in_iov; /* Head address of dest iovec */
+    unsigned int in_num; /* Number of dest iovec */
+    size_t in_len;
+    VirtQueue *vq;
+    struct VirtIOCrypto *vcrypto;
+    CryptoDevBackendOpInfo op_info;
+} VirtIOCryptoReq;
+
+typedef struct VirtIOCryptoQueue {
+    VirtQueue *dataq;
+    QEMUBH *dataq_bh;
+    struct VirtIOCrypto *vcrypto;
+} VirtIOCryptoQueue;
+
+struct VirtIOCrypto {
+    VirtIODevice parent_obj;
+
+    VirtQueue *ctrl_vq;
+    VirtIOCryptoQueue *vqs;
+    VirtIOCryptoConf conf;
+    CryptoDevBackend *cryptodev;
+
+    uint32_t max_queues;
+    uint32_t status;
+
+    int multiqueue;
+    uint32_t curr_queues;
+    size_t config_size;
+    uint8_t vhost_started;
+};
+
+#endif /* QEMU_VIRTIO_CRYPTO_H */
diff --git a/include/hw/virtio/virtio-dmabuf.h b/include/hw/virtio/virtio-dmabuf.h
new file mode 100644 (file)
index 0000000..627c3b6
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * Virtio Shared dma-buf
+ *
+ * Copyright Red Hat, Inc. 2023
+ *
+ * Authors:
+ *     Albert Esteve <aesteve@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef VIRTIO_DMABUF_H
+#define VIRTIO_DMABUF_H
+
+#include "qemu/uuid.h"
+#include "vhost.h"
+
+typedef enum SharedObjectType {
+    TYPE_INVALID = 0,
+    TYPE_DMABUF,
+    TYPE_VHOST_DEV,
+} SharedObjectType;
+
+typedef struct VirtioSharedObject {
+    SharedObjectType type;
+    gpointer value;
+} VirtioSharedObject;
+
+/**
+ * virtio_add_dmabuf() - Add a new dma-buf resource to the lookup table
+ * @uuid: new resource's UUID
+ * @dmabuf_fd: the dma-buf descriptor that will be stored and shared with
+ *             other virtio devices. The caller retains ownership over the
+ *             descriptor and its lifecycle.
+ *
+ * Note: @dmabuf_fd must be a valid (non-negative) file descriptor.
+ *
+ * Return: true if the UUID did not exist and the resource has been added,
+ * false if another resource with the same UUID already existed.
+ * Note that if it finds a repeated UUID, the resource is not inserted in
+ * the lookup table.
+ */
+bool virtio_add_dmabuf(QemuUUID *uuid, int dmabuf_fd);
+
+/**
+ * virtio_add_vhost_device() - Add a new exporter vhost device that holds the
+ * resource with the associated UUID
+ * @uuid: new resource's UUID
+ * @dev: the pointer to the vhost device that holds the resource. The caller
+ *       retains ownership over the device struct and its lifecycle.
+ *
+ * Return: true if the UUID did not exist and the device has been tracked,
+ * false if another resource with the same UUID already existed.
+ * Note that if it finds a repeated UUID, the resource is not inserted in
+ * the lookup table.
+ */
+bool virtio_add_vhost_device(QemuUUID *uuid, struct vhost_dev *dev);
+
+/**
+ * virtio_remove_resource() - Removes a resource from the lookup table
+ * @uuid: resource's UUID
+ *
+ * Return: true if the UUID has been found and removed from the lookup table.
+ */
+bool virtio_remove_resource(const QemuUUID *uuid);
+
+/**
+ * virtio_lookup_dmabuf() - Looks for a dma-buf resource in the lookup table
+ * @uuid: resource's UUID
+ *
+ * Return: the dma-buf file descriptor integer, or -1 if the key is not found.
+ */
+int virtio_lookup_dmabuf(const QemuUUID *uuid);
+
+/**
+ * virtio_lookup_vhost_device() - Looks for an exporter vhost device in the
+ * lookup table
+ * @uuid: resource's UUID
+ *
+ * Return: pointer to the vhost_dev struct, or NULL if the key is not found.
+ */
+struct vhost_dev *virtio_lookup_vhost_device(const QemuUUID *uuid);
+
+/**
+ * virtio_object_type() - Looks for the type of resource in the lookup table
+ * @uuid: resource's UUID
+ *
+ * Return: the type of resource associated with the UUID, or TYPE_INVALID if
+ * the key is not found.
+ */
+SharedObjectType virtio_object_type(const QemuUUID *uuid);
+
+/**
+ * virtio_free_resources() - Destroys all keys and values of the shared
+ * resources lookup table, and frees them
+ */
+void virtio_free_resources(void);
+
+#endif /* VIRTIO_DMABUF_H */
diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h
new file mode 100644 (file)
index 0000000..dd1975e
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_GPU_BSWAP_H
+#define HW_VIRTIO_GPU_BSWAP_H
+
+#include "qemu/bswap.h"
+#include "standard-headers/linux/virtio_gpu.h"
+
+static inline void
+virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr)
+{
+    le32_to_cpus(&hdr->type);
+    le32_to_cpus(&hdr->flags);
+    le64_to_cpus(&hdr->fence_id);
+    le32_to_cpus(&hdr->ctx_id);
+}
+
+static inline void
+virtio_gpu_bswap_32(void *ptr, size_t size)
+{
+#if HOST_BIG_ENDIAN
+
+    size_t i;
+    struct virtio_gpu_ctrl_hdr *hdr = (struct virtio_gpu_ctrl_hdr *) ptr;
+
+    virtio_gpu_ctrl_hdr_bswap(hdr);
+
+    i = sizeof(struct virtio_gpu_ctrl_hdr);
+    while (i < size) {
+        le32_to_cpus((uint32_t *)(ptr + i));
+        i = i + sizeof(uint32_t);
+    }
+
+#endif
+}
+
+static inline void
+virtio_gpu_t2d_bswap(struct virtio_gpu_transfer_to_host_2d *t2d)
+{
+    virtio_gpu_ctrl_hdr_bswap(&t2d->hdr);
+    le32_to_cpus(&t2d->r.x);
+    le32_to_cpus(&t2d->r.y);
+    le32_to_cpus(&t2d->r.width);
+    le32_to_cpus(&t2d->r.height);
+    le64_to_cpus(&t2d->offset);
+    le32_to_cpus(&t2d->resource_id);
+    le32_to_cpus(&t2d->padding);
+}
+
+static inline void
+virtio_gpu_create_blob_bswap(struct virtio_gpu_resource_create_blob *cblob)
+{
+    virtio_gpu_ctrl_hdr_bswap(&cblob->hdr);
+    le32_to_cpus(&cblob->resource_id);
+    le32_to_cpus(&cblob->blob_mem);
+    le32_to_cpus(&cblob->blob_flags);
+    le32_to_cpus(&cblob->nr_entries);
+    le64_to_cpus(&cblob->blob_id);
+    le64_to_cpus(&cblob->size);
+}
+
+static inline void
+virtio_gpu_map_blob_bswap(struct virtio_gpu_resource_map_blob *mblob)
+{
+    virtio_gpu_ctrl_hdr_bswap(&mblob->hdr);
+    le32_to_cpus(&mblob->resource_id);
+    le64_to_cpus(&mblob->offset);
+}
+
+static inline void
+virtio_gpu_unmap_blob_bswap(struct virtio_gpu_resource_unmap_blob *ublob)
+{
+    virtio_gpu_ctrl_hdr_bswap(&ublob->hdr);
+    le32_to_cpus(&ublob->resource_id);
+}
+
+static inline void
+virtio_gpu_scanout_blob_bswap(struct virtio_gpu_set_scanout_blob *ssb)
+{
+    virtio_gpu_bswap_32(ssb, sizeof(*ssb) - sizeof(ssb->offsets[3]));
+    le32_to_cpus(&ssb->offsets[3]);
+}
+
+#endif
diff --git a/include/hw/virtio/virtio-gpu-pci.h b/include/hw/virtio/virtio-gpu-pci.h
new file mode 100644 (file)
index 0000000..225cbbc
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Virtio GPU PCI Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_GPU_PCI_H
+#define HW_VIRTIO_GPU_PCI_H
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-gpu.h"
+#include "qom/object.h"
+
+
+/*
+ * virtio-gpu-pci-base: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_GPU_PCI_BASE "virtio-gpu-pci-base"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPUPCIBase, VIRTIO_GPU_PCI_BASE)
+
+struct VirtIOGPUPCIBase {
+    VirtIOPCIProxy parent_obj;
+    VirtIOGPUBase *vgpu;
+};
+
+/* to share between PCI and VGA */
+#define DEFINE_VIRTIO_GPU_PCI_PROPERTIES(_state)                \
+    DEFINE_PROP_BIT("ioeventfd", _state, flags,                 \
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false),  \
+        DEFINE_PROP_UINT32("vectors", _state, nvectors, 3)
+
+#endif /* HW_VIRTIO_GPU_PCI_H */
diff --git a/include/hw/virtio/virtio-gpu-pixman.h b/include/hw/virtio/virtio-gpu-pixman.h
new file mode 100644 (file)
index 0000000..4dba782
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_GPU_PIXMAN_H
+#define HW_VIRTIO_GPU_PIXMAN_H
+
+#include "ui/qemu-pixman.h"
+#include "standard-headers/linux/virtio_gpu.h"
+
+static inline pixman_format_code_t
+virtio_gpu_get_pixman_format(uint32_t virtio_gpu_format)
+{
+    switch (virtio_gpu_format) {
+    case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM:
+        return PIXMAN_BE_b8g8r8x8;
+    case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM:
+        return PIXMAN_BE_b8g8r8a8;
+    case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM:
+        return PIXMAN_BE_x8r8g8b8;
+    case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM:
+        return PIXMAN_BE_a8r8g8b8;
+    case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM:
+        return PIXMAN_BE_r8g8b8x8;
+    case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM:
+        return PIXMAN_BE_r8g8b8a8;
+    case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM:
+        return PIXMAN_BE_x8b8g8r8;
+    case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM:
+        return PIXMAN_BE_a8b8g8r8;
+    default:
+        return 0;
+    }
+}
+
+#endif
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
new file mode 100644 (file)
index 0000000..584ba2e
--- /dev/null
@@ -0,0 +1,337 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_GPU_H
+#define HW_VIRTIO_GPU_H
+
+#include "qemu/queue.h"
+#include "ui/qemu-pixman.h"
+#include "ui/console.h"
+#include "hw/virtio/virtio.h"
+#include "qemu/log.h"
+#include "sysemu/vhost-user-backend.h"
+
+#include "standard-headers/linux/virtio_gpu.h"
+#include "standard-headers/linux/virtio_ids.h"
+#include "qom/object.h"
+
+#define TYPE_VIRTIO_GPU_BASE "virtio-gpu-base"
+OBJECT_DECLARE_TYPE(VirtIOGPUBase, VirtIOGPUBaseClass,
+                    VIRTIO_GPU_BASE)
+
+#define TYPE_VIRTIO_GPU "virtio-gpu-device"
+OBJECT_DECLARE_TYPE(VirtIOGPU, VirtIOGPUClass, VIRTIO_GPU)
+
+#define TYPE_VIRTIO_GPU_GL "virtio-gpu-gl-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPUGL, VIRTIO_GPU_GL)
+
+#define TYPE_VHOST_USER_GPU "vhost-user-gpu"
+OBJECT_DECLARE_SIMPLE_TYPE(VhostUserGPU, VHOST_USER_GPU)
+
+#define TYPE_VIRTIO_GPU_RUTABAGA "virtio-gpu-rutabaga-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPURutabaga, VIRTIO_GPU_RUTABAGA)
+
+struct virtio_gpu_simple_resource {
+    uint32_t resource_id;
+    uint32_t width;
+    uint32_t height;
+    uint32_t format;
+    uint64_t *addrs;
+    struct iovec *iov;
+    unsigned int iov_cnt;
+    uint32_t scanout_bitmask;
+    pixman_image_t *image;
+#ifdef WIN32
+    HANDLE handle;
+#endif
+    uint64_t hostmem;
+
+    uint64_t blob_size;
+    void *blob;
+    int dmabuf_fd;
+    uint8_t *remapped;
+
+    QTAILQ_ENTRY(virtio_gpu_simple_resource) next;
+};
+
+struct virtio_gpu_framebuffer {
+    pixman_format_code_t format;
+    uint32_t bytes_pp;
+    uint32_t width, height;
+    uint32_t stride;
+    uint32_t offset;
+};
+
+struct virtio_gpu_scanout {
+    QemuConsole *con;
+    DisplaySurface *ds;
+    uint32_t width, height;
+    int x, y;
+    int invalidate;
+    uint32_t resource_id;
+    struct virtio_gpu_update_cursor cursor;
+    QEMUCursor *current_cursor;
+};
+
+struct virtio_gpu_requested_state {
+    uint16_t width_mm, height_mm;
+    uint32_t width, height;
+    uint32_t refresh_rate;
+    int x, y;
+};
+
+enum virtio_gpu_base_conf_flags {
+    VIRTIO_GPU_FLAG_VIRGL_ENABLED = 1,
+    VIRTIO_GPU_FLAG_STATS_ENABLED,
+    VIRTIO_GPU_FLAG_EDID_ENABLED,
+    VIRTIO_GPU_FLAG_DMABUF_ENABLED,
+    VIRTIO_GPU_FLAG_BLOB_ENABLED,
+    VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED,
+    VIRTIO_GPU_FLAG_RUTABAGA_ENABLED,
+};
+
+#define virtio_gpu_virgl_enabled(_cfg) \
+    (_cfg.flags & (1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED))
+#define virtio_gpu_stats_enabled(_cfg) \
+    (_cfg.flags & (1 << VIRTIO_GPU_FLAG_STATS_ENABLED))
+#define virtio_gpu_edid_enabled(_cfg) \
+    (_cfg.flags & (1 << VIRTIO_GPU_FLAG_EDID_ENABLED))
+#define virtio_gpu_dmabuf_enabled(_cfg) \
+    (_cfg.flags & (1 << VIRTIO_GPU_FLAG_DMABUF_ENABLED))
+#define virtio_gpu_blob_enabled(_cfg) \
+    (_cfg.flags & (1 << VIRTIO_GPU_FLAG_BLOB_ENABLED))
+#define virtio_gpu_context_init_enabled(_cfg) \
+    (_cfg.flags & (1 << VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED))
+#define virtio_gpu_rutabaga_enabled(_cfg) \
+    (_cfg.flags & (1 << VIRTIO_GPU_FLAG_RUTABAGA_ENABLED))
+#define virtio_gpu_hostmem_enabled(_cfg) \
+    (_cfg.hostmem > 0)
+
+struct virtio_gpu_base_conf {
+    uint32_t max_outputs;
+    uint32_t flags;
+    uint32_t xres;
+    uint32_t yres;
+    uint64_t hostmem;
+};
+
+struct virtio_gpu_ctrl_command {
+    VirtQueueElement elem;
+    VirtQueue *vq;
+    struct virtio_gpu_ctrl_hdr cmd_hdr;
+    uint32_t error;
+    bool finished;
+    QTAILQ_ENTRY(virtio_gpu_ctrl_command) next;
+};
+
+struct VirtIOGPUBase {
+    VirtIODevice parent_obj;
+
+    Error *migration_blocker;
+
+    struct virtio_gpu_base_conf conf;
+    struct virtio_gpu_config virtio_config;
+    const GraphicHwOps *hw_ops;
+
+    int renderer_blocked;
+    int enable;
+
+    MemoryRegion hostmem;
+
+    struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS];
+
+    int enabled_output_bitmask;
+    struct virtio_gpu_requested_state req_state[VIRTIO_GPU_MAX_SCANOUTS];
+};
+
+struct VirtIOGPUBaseClass {
+    VirtioDeviceClass parent;
+
+    void (*gl_flushed)(VirtIOGPUBase *g);
+};
+
+#define VIRTIO_GPU_BASE_PROPERTIES(_state, _conf)                       \
+    DEFINE_PROP_UINT32("max_outputs", _state, _conf.max_outputs, 1),    \
+    DEFINE_PROP_BIT("edid", _state, _conf.flags, \
+                    VIRTIO_GPU_FLAG_EDID_ENABLED, true), \
+    DEFINE_PROP_UINT32("xres", _state, _conf.xres, 1280), \
+    DEFINE_PROP_UINT32("yres", _state, _conf.yres, 800)
+
+typedef struct VGPUDMABuf {
+    QemuDmaBuf buf;
+    uint32_t scanout_id;
+    QTAILQ_ENTRY(VGPUDMABuf) next;
+} VGPUDMABuf;
+
+struct VirtIOGPU {
+    VirtIOGPUBase parent_obj;
+
+    uint64_t conf_max_hostmem;
+
+    VirtQueue *ctrl_vq;
+    VirtQueue *cursor_vq;
+
+    QEMUBH *ctrl_bh;
+    QEMUBH *cursor_bh;
+    QEMUBH *reset_bh;
+    QemuCond reset_cond;
+    bool reset_finished;
+
+    QTAILQ_HEAD(, virtio_gpu_simple_resource) reslist;
+    QTAILQ_HEAD(, virtio_gpu_ctrl_command) cmdq;
+    QTAILQ_HEAD(, virtio_gpu_ctrl_command) fenceq;
+
+    uint64_t hostmem;
+
+    bool processing_cmdq;
+    QEMUTimer *fence_poll;
+    QEMUTimer *print_stats;
+
+    uint32_t inflight;
+    struct {
+        uint32_t max_inflight;
+        uint32_t requests;
+        uint32_t req_3d;
+        uint32_t bytes_3d;
+    } stats;
+
+    struct {
+        QTAILQ_HEAD(, VGPUDMABuf) bufs;
+        VGPUDMABuf *primary[VIRTIO_GPU_MAX_SCANOUTS];
+    } dmabuf;
+};
+
+struct VirtIOGPUClass {
+    VirtIOGPUBaseClass parent;
+
+    void (*handle_ctrl)(VirtIODevice *vdev, VirtQueue *vq);
+    void (*process_cmd)(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd);
+    void (*update_cursor_data)(VirtIOGPU *g,
+                               struct virtio_gpu_scanout *s,
+                               uint32_t resource_id);
+};
+
+struct VirtIOGPUGL {
+    struct VirtIOGPU parent_obj;
+
+    bool renderer_inited;
+    bool renderer_reset;
+};
+
+struct VhostUserGPU {
+    VirtIOGPUBase parent_obj;
+
+    VhostUserBackend *vhost;
+    int vhost_gpu_fd; /* closed by the chardev */
+    CharBackend vhost_chr;
+    QemuDmaBuf dmabuf[VIRTIO_GPU_MAX_SCANOUTS];
+    bool backend_blocked;
+};
+
+#define MAX_SLOTS 4096
+
+struct MemoryRegionInfo {
+    int used;
+    MemoryRegion mr;
+    uint32_t resource_id;
+};
+
+struct rutabaga;
+
+struct VirtIOGPURutabaga {
+    VirtIOGPU parent_obj;
+    struct MemoryRegionInfo memory_regions[MAX_SLOTS];
+    uint64_t capset_mask;
+    char *wayland_socket_path;
+    char *wsi;
+    bool headless;
+    uint32_t num_capsets;
+    struct rutabaga *rutabaga;
+};
+
+#define VIRTIO_GPU_FILL_CMD(out) do {                                   \
+        size_t virtiogpufillcmd_s_ =                                    \
+            iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0,          \
+                       &out, sizeof(out));                              \
+        if (virtiogpufillcmd_s_ != sizeof(out)) {                       \
+            qemu_log_mask(LOG_GUEST_ERROR,                              \
+                          "%s: command size incorrect %zu vs %zu\n",    \
+                          __func__, virtiogpufillcmd_s_, sizeof(out));  \
+            return;                                                     \
+        }                                                               \
+    } while (0)
+
+/* virtio-gpu-base.c */
+bool virtio_gpu_base_device_realize(DeviceState *qdev,
+                                    VirtIOHandleOutput ctrl_cb,
+                                    VirtIOHandleOutput cursor_cb,
+                                    Error **errp);
+void virtio_gpu_base_device_unrealize(DeviceState *qdev);
+void virtio_gpu_base_reset(VirtIOGPUBase *g);
+void virtio_gpu_base_fill_display_info(VirtIOGPUBase *g,
+                        struct virtio_gpu_resp_display_info *dpy_info);
+
+void virtio_gpu_base_generate_edid(VirtIOGPUBase *g, int scanout,
+                                   struct virtio_gpu_resp_edid *edid);
+/* virtio-gpu.c */
+struct virtio_gpu_simple_resource *
+virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id);
+
+void virtio_gpu_ctrl_response(VirtIOGPU *g,
+                              struct virtio_gpu_ctrl_command *cmd,
+                              struct virtio_gpu_ctrl_hdr *resp,
+                              size_t resp_len);
+void virtio_gpu_ctrl_response_nodata(VirtIOGPU *g,
+                                     struct virtio_gpu_ctrl_command *cmd,
+                                     enum virtio_gpu_ctrl_type type);
+void virtio_gpu_get_display_info(VirtIOGPU *g,
+                                 struct virtio_gpu_ctrl_command *cmd);
+void virtio_gpu_get_edid(VirtIOGPU *g,
+                         struct virtio_gpu_ctrl_command *cmd);
+int virtio_gpu_create_mapping_iov(VirtIOGPU *g,
+                                  uint32_t nr_entries, uint32_t offset,
+                                  struct virtio_gpu_ctrl_command *cmd,
+                                  uint64_t **addr, struct iovec **iov,
+                                  uint32_t *niov);
+void virtio_gpu_cleanup_mapping_iov(VirtIOGPU *g,
+                                    struct iovec *iov, uint32_t count);
+void virtio_gpu_cleanup_mapping(VirtIOGPU *g,
+                                struct virtio_gpu_simple_resource *res);
+void virtio_gpu_process_cmdq(VirtIOGPU *g);
+void virtio_gpu_device_realize(DeviceState *qdev, Error **errp);
+void virtio_gpu_reset(VirtIODevice *vdev);
+void virtio_gpu_simple_process_cmd(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd);
+void virtio_gpu_update_cursor_data(VirtIOGPU *g,
+                                   struct virtio_gpu_scanout *s,
+                                   uint32_t resource_id);
+
+/* virtio-gpu-udmabuf.c */
+bool virtio_gpu_have_udmabuf(void);
+void virtio_gpu_init_udmabuf(struct virtio_gpu_simple_resource *res);
+void virtio_gpu_fini_udmabuf(struct virtio_gpu_simple_resource *res);
+int virtio_gpu_update_dmabuf(VirtIOGPU *g,
+                             uint32_t scanout_id,
+                             struct virtio_gpu_simple_resource *res,
+                             struct virtio_gpu_framebuffer *fb,
+                             struct virtio_gpu_rect *r);
+
+/* virtio-gpu-3d.c */
+void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
+                                  struct virtio_gpu_ctrl_command *cmd);
+void virtio_gpu_virgl_fence_poll(VirtIOGPU *g);
+void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
+void virtio_gpu_virgl_reset(VirtIOGPU *g);
+int virtio_gpu_virgl_init(VirtIOGPU *g);
+int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
+
+#endif
diff --git a/include/hw/virtio/virtio-input.h b/include/hw/virtio/virtio-input.h
new file mode 100644 (file)
index 0000000..a6c9703
--- /dev/null
@@ -0,0 +1,116 @@
+#ifndef QEMU_VIRTIO_INPUT_H
+#define QEMU_VIRTIO_INPUT_H
+
+#include "ui/input.h"
+#include "sysemu/vhost-user-backend.h"
+
+/* ----------------------------------------------------------------- */
+/* virtio input protocol                                             */
+
+#include "standard-headers/linux/virtio_ids.h"
+#include "standard-headers/linux/virtio_input.h"
+#include "qom/object.h"
+
+typedef struct virtio_input_absinfo virtio_input_absinfo;
+typedef struct virtio_input_config virtio_input_config;
+typedef struct virtio_input_event virtio_input_event;
+
+/* ----------------------------------------------------------------- */
+/* qemu internals                                                    */
+
+#define TYPE_VIRTIO_INPUT "virtio-input-device"
+OBJECT_DECLARE_TYPE(VirtIOInput, VirtIOInputClass,
+                    VIRTIO_INPUT)
+#define VIRTIO_INPUT_GET_PARENT_CLASS(obj) \
+        OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_INPUT)
+
+#define TYPE_VIRTIO_INPUT_HID  "virtio-input-hid-device"
+#define TYPE_VIRTIO_KEYBOARD   "virtio-keyboard-device"
+#define TYPE_VIRTIO_MOUSE      "virtio-mouse-device"
+#define TYPE_VIRTIO_TABLET     "virtio-tablet-device"
+#define TYPE_VIRTIO_MULTITOUCH "virtio-multitouch-device"
+
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOInputHID, VIRTIO_INPUT_HID)
+#define VIRTIO_INPUT_HID_GET_PARENT_CLASS(obj) \
+        OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_INPUT_HID)
+
+#define TYPE_VIRTIO_INPUT_HOST   "virtio-input-host-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOInputHost, VIRTIO_INPUT_HOST)
+#define VIRTIO_INPUT_HOST_GET_PARENT_CLASS(obj) \
+        OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_INPUT_HOST)
+
+#define TYPE_VHOST_USER_INPUT   "vhost-user-input"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserInput, VHOST_USER_INPUT)
+#define VHOST_USER_INPUT_GET_PARENT_CLASS(obj)             \
+    OBJECT_GET_PARENT_CLASS(obj, TYPE_VHOST_USER_INPUT)
+
+typedef struct VirtIOInputConfig VirtIOInputConfig;
+
+struct VirtIOInputConfig {
+    virtio_input_config               config;
+    QTAILQ_ENTRY(VirtIOInputConfig)   node;
+};
+
+struct VirtIOInput {
+    VirtIODevice                      parent_obj;
+    uint8_t                           cfg_select;
+    uint8_t                           cfg_subsel;
+    uint32_t                          cfg_size;
+    QTAILQ_HEAD(, VirtIOInputConfig)  cfg_list;
+    VirtQueue                         *evt, *sts;
+    char                              *serial;
+
+    struct {
+        virtio_input_event event;
+        VirtQueueElement *elem;
+    }                                 *queue;
+    uint32_t                          qindex, qsize;
+
+    bool                              active;
+};
+
+struct VirtIOInputClass {
+    /*< private >*/
+    VirtioDeviceClass parent;
+    /*< public >*/
+
+    DeviceRealize realize;
+    DeviceUnrealize unrealize;
+    void (*change_active)(VirtIOInput *vinput);
+    void (*handle_status)(VirtIOInput *vinput, virtio_input_event *event);
+};
+
+struct VirtIOInputHID {
+    VirtIOInput                       parent_obj;
+    char                              *display;
+    uint32_t                          head;
+    const QemuInputHandler            *handler;
+    QemuInputHandlerState             *hs;
+    int                               ledstate;
+    bool                              wheel_axis;
+};
+
+struct VirtIOInputHost {
+    VirtIOInput                       parent_obj;
+    char                              *evdev;
+    int                               fd;
+};
+
+struct VHostUserInput {
+    VirtIOInput                       parent_obj;
+
+    VhostUserBackend                  *vhost;
+};
+
+void virtio_input_send(VirtIOInput *vinput, virtio_input_event *event);
+void virtio_input_init_config(VirtIOInput *vinput,
+                              virtio_input_config *config);
+virtio_input_config *virtio_input_find_config(VirtIOInput *vinput,
+                                              uint8_t select,
+                                              uint8_t subsel);
+void virtio_input_add_config(VirtIOInput *vinput,
+                             virtio_input_config *config);
+void virtio_input_idstr_config(VirtIOInput *vinput,
+                               uint8_t select, const char *string);
+
+#endif /* QEMU_VIRTIO_INPUT_H */
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
new file mode 100644 (file)
index 0000000..781ebae
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * virtio-iommu device
+ *
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QEMU_VIRTIO_IOMMU_H
+#define QEMU_VIRTIO_IOMMU_H
+
+#include "standard-headers/linux/virtio_iommu.h"
+#include "hw/virtio/virtio.h"
+#include "hw/pci/pci.h"
+#include "qom/object.h"
+
+#define TYPE_VIRTIO_IOMMU "virtio-iommu-device"
+#define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-pci"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOIOMMU, VIRTIO_IOMMU)
+
+#define TYPE_VIRTIO_IOMMU_MEMORY_REGION "virtio-iommu-memory-region"
+
+typedef struct IOMMUDevice {
+    void         *viommu;
+    PCIBus       *bus;
+    int           devfn;
+    IOMMUMemoryRegion  iommu_mr;
+    AddressSpace  as;
+    MemoryRegion root;          /* The root container of the device */
+    MemoryRegion bypass_mr;     /* The alias of shared memory MR */
+    GList *resv_regions;
+    GList *host_resv_ranges;
+    bool probe_done;
+} IOMMUDevice;
+
+typedef struct IOMMUPciBus {
+    PCIBus       *bus;
+    IOMMUDevice  *pbdev[]; /* Parent array is sparse, so dynamically alloc */
+} IOMMUPciBus;
+
+struct VirtIOIOMMU {
+    VirtIODevice parent_obj;
+    VirtQueue *req_vq;
+    VirtQueue *event_vq;
+    struct virtio_iommu_config config;
+    uint64_t features;
+    GHashTable *as_by_busptr;
+    IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX];
+    PCIBus *primary_bus;
+    ReservedRegion *prop_resv_regions;
+    uint32_t nr_prop_resv_regions;
+    GTree *domains;
+    QemuRecMutex mutex;
+    GTree *endpoints;
+    bool boot_bypass;
+    Notifier machine_done;
+    bool granule_frozen;
+};
+
+#endif
diff --git a/include/hw/virtio/virtio-md-pci.h b/include/hw/virtio/virtio-md-pci.h
new file mode 100644 (file)
index 0000000..5912e16
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Abstract virtio based memory device
+ *
+ * Copyright (C) 2023 Red Hat, Inc.
+ *
+ * Authors:
+ *  David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_MD_PCI_H
+#define HW_VIRTIO_MD_PCI_H
+
+#include "hw/virtio/virtio-pci.h"
+#include "qom/object.h"
+
+/*
+ * virtio-md-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_MD_PCI "virtio-md-pci"
+
+OBJECT_DECLARE_TYPE(VirtIOMDPCI, VirtIOMDPCIClass, VIRTIO_MD_PCI)
+
+struct VirtIOMDPCIClass {
+    /* private */
+    VirtioPCIClass parent;
+
+    /* public */
+    void (*unplug_request_check)(VirtIOMDPCI *vmd, Error **errp);
+};
+
+struct VirtIOMDPCI {
+    VirtIOPCIProxy parent_obj;
+};
+
+void virtio_md_pci_pre_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp);
+void virtio_md_pci_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp);
+void virtio_md_pci_unplug_request(VirtIOMDPCI *vmd, MachineState *ms,
+                                  Error **errp);
+void virtio_md_pci_unplug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp);
+
+#endif
diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h
new file mode 100644 (file)
index 0000000..5f5b02b
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ * Virtio MEM device
+ *
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * Authors:
+ *  David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_MEM_H
+#define HW_VIRTIO_MEM_H
+
+#include "standard-headers/linux/virtio_mem.h"
+#include "hw/virtio/virtio.h"
+#include "qapi/qapi-types-misc.h"
+#include "sysemu/hostmem.h"
+#include "qom/object.h"
+
+#define TYPE_VIRTIO_MEM "virtio-mem"
+
+OBJECT_DECLARE_TYPE(VirtIOMEM, VirtIOMEMClass,
+                    VIRTIO_MEM)
+
+#define VIRTIO_MEM_MEMDEV_PROP "memdev"
+#define VIRTIO_MEM_NODE_PROP "node"
+#define VIRTIO_MEM_SIZE_PROP "size"
+#define VIRTIO_MEM_REQUESTED_SIZE_PROP "requested-size"
+#define VIRTIO_MEM_BLOCK_SIZE_PROP "block-size"
+#define VIRTIO_MEM_ADDR_PROP "memaddr"
+#define VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "unplugged-inaccessible"
+#define VIRTIO_MEM_EARLY_MIGRATION_PROP "x-early-migration"
+#define VIRTIO_MEM_PREALLOC_PROP "prealloc"
+#define VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP "dynamic-memslots"
+
+struct VirtIOMEM {
+    VirtIODevice parent_obj;
+
+    /* guest -> host request queue */
+    VirtQueue *vq;
+
+    /* bitmap used to track unplugged memory */
+    int32_t bitmap_size;
+    unsigned long *bitmap;
+
+    /*
+     * With "dynamic-memslots=on": Device memory region in which we dynamically
+     * map the memslots.
+     */
+    MemoryRegion *mr;
+
+    /*
+     * With "dynamic-memslots=on": The individual memslots (aliases into the
+     * memory backend).
+     */
+    MemoryRegion *memslots;
+
+    /* With "dynamic-memslots=on": The total number of memslots. */
+    uint16_t nb_memslots;
+
+    /*
+     * With "dynamic-memslots=on": Size of one memslot (the size of the
+     * last one can differ).
+     */
+    uint64_t memslot_size;
+
+    /* Assigned memory backend with the RAM memory region. */
+    HostMemoryBackend *memdev;
+
+    /* NUMA node */
+    uint32_t node;
+
+    /* assigned address of the region in guest physical memory */
+    uint64_t addr;
+
+    /* usable region size (<= region_size) */
+    uint64_t usable_region_size;
+
+    /* actual size (how much the guest plugged) */
+    uint64_t size;
+
+    /* requested size */
+    uint64_t requested_size;
+
+    /* block size and alignment */
+    uint64_t block_size;
+
+    /*
+     * Whether we indicate VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE to the guest.
+     * For !x86 targets this will always be "on" and consequently indicate
+     * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE.
+     */
+    OnOffAuto unplugged_inaccessible;
+
+    /* whether to prealloc memory when plugging new blocks */
+    bool prealloc;
+
+    /*
+     * Whether we migrate properties that are immutable while migration is
+     * active early, before state of other devices and especially, before
+     * migrating any RAM content.
+     */
+    bool early_migration;
+
+    /*
+     * Whether we dynamically map (multiple, if possible) memslots instead of
+     * statically mapping the whole RAM memory region.
+     */
+    bool dynamic_memslots;
+
+    /* notifiers to notify when "size" changes */
+    NotifierList size_change_notifiers;
+
+    /* listeners to notify on plug/unplug activity. */
+    QLIST_HEAD(, RamDiscardListener) rdl_list;
+};
+
+struct VirtIOMEMClass {
+    /* private */
+    VirtIODevice parent;
+
+    /* public */
+    void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi);
+    MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp);
+    void (*decide_memslots)(VirtIOMEM *vmem, unsigned int limit);
+    unsigned int (*get_memslots)(VirtIOMEM *vmem);
+    void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);
+    void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);
+    void (*unplug_request_check)(VirtIOMEM *vmem, Error **errp);
+};
+
+#endif
diff --git a/include/hw/virtio/virtio-mmio.h b/include/hw/virtio/virtio-mmio.h
new file mode 100644 (file)
index 0000000..aa49262
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Virtio MMIO bindings
+ *
+ * Copyright (c) 2011 Linaro Limited
+ *
+ * Author:
+ *  Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_VIRTIO_MMIO_H
+#define HW_VIRTIO_MMIO_H
+
+#include "hw/sysbus.h"
+#include "hw/virtio/virtio-bus.h"
+
+/* QOM macros */
+/* virtio-mmio-bus */
+#define TYPE_VIRTIO_MMIO_BUS "virtio-mmio-bus"
+/* This is reusing the VirtioBusState typedef from TYPE_VIRTIO_BUS */
+DECLARE_OBJ_CHECKERS(VirtioBusState, VirtioBusClass,
+                     VIRTIO_MMIO_BUS, TYPE_VIRTIO_MMIO_BUS)
+
+/* virtio-mmio */
+#define TYPE_VIRTIO_MMIO "virtio-mmio"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOMMIOProxy, VIRTIO_MMIO)
+
+#define VIRT_MAGIC 0x74726976 /* 'virt' */
+#define VIRT_VERSION 2
+#define VIRT_VERSION_LEGACY 1
+#define VIRT_VENDOR 0x554D4551 /* 'QEMU' */
+
+typedef struct VirtIOMMIOQueue {
+    uint16_t num;
+    bool enabled;
+    uint32_t desc[2];
+    uint32_t avail[2];
+    uint32_t used[2];
+} VirtIOMMIOQueue;
+
+#define VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT 1
+#define VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD \
+        (1 << VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT)
+
+struct VirtIOMMIOProxy {
+    /* Generic */
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+    qemu_irq irq;
+    bool legacy;
+    uint32_t flags;
+    /* Guest accessible state needing migration and reset */
+    uint32_t host_features_sel;
+    uint32_t guest_features_sel;
+    uint32_t guest_page_shift;
+    /* virtio-bus */
+    VirtioBusState bus;
+    bool format_transport_address;
+    /* Fields only used for non-legacy (v2) devices */
+    uint32_t guest_features[2];
+    VirtIOMMIOQueue vqs[VIRTIO_QUEUE_MAX];
+};
+
+#endif
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
new file mode 100644 (file)
index 0000000..55977f0
--- /dev/null
@@ -0,0 +1,238 @@
+/*
+ * Virtio Network Device
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_VIRTIO_NET_H
+#define QEMU_VIRTIO_NET_H
+
+#include "qemu/units.h"
+#include "standard-headers/linux/virtio_net.h"
+#include "hw/virtio/virtio.h"
+#include "net/announce.h"
+#include "qemu/option_int.h"
+#include "qom/object.h"
+
+#include "ebpf/ebpf_rss.h"
+
+#define TYPE_VIRTIO_NET "virtio-net-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
+
+#define TX_TIMER_INTERVAL 150000 /* 150 us */
+
+/* Limit the number of packets that can be sent via a single flush
+ * of the TX queue.  This gives us a guaranteed exit condition and
+ * ensures fairness in the io path.  256 conveniently matches the
+ * length of the TX queue and shows a good balance of performance
+ * and latency. */
+#define TX_BURST 256
+
+/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */
+#define MAC_TABLE_ENTRIES    64
+
+/*
+ * The maximum number of VLANs in the VLAN filter table
+ * added by VIRTIO_NET_CTRL_VLAN_ADD
+ */
+#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
+
+typedef struct virtio_net_conf
+{
+    uint32_t txtimer;
+    int32_t txburst;
+    char *tx;
+    uint16_t rx_queue_size;
+    uint16_t tx_queue_size;
+    uint16_t mtu;
+    int32_t speed;
+    char *duplex_str;
+    uint8_t duplex;
+    char *primary_id_str;
+} virtio_net_conf;
+
+/* Coalesced packets type & status */
+typedef enum {
+    RSC_COALESCE,           /* Data been coalesced */
+    RSC_FINAL,              /* Will terminate current connection */
+    RSC_NO_MATCH,           /* No matched in the buffer pool */
+    RSC_BYPASS,             /* Packet to be bypass, not tcp, tcp ctrl, etc */
+    RSC_CANDIDATE                /* Data want to be coalesced */
+} CoalesceStatus;
+
+typedef struct VirtioNetRscStat {
+    uint32_t received;
+    uint32_t coalesced;
+    uint32_t over_size;
+    uint32_t cache;
+    uint32_t empty_cache;
+    uint32_t no_match_cache;
+    uint32_t win_update;
+    uint32_t no_match;
+    uint32_t tcp_syn;
+    uint32_t tcp_ctrl_drain;
+    uint32_t dup_ack;
+    uint32_t dup_ack1;
+    uint32_t dup_ack2;
+    uint32_t pure_ack;
+    uint32_t ack_out_of_win;
+    uint32_t data_out_of_win;
+    uint32_t data_out_of_order;
+    uint32_t data_after_pure_ack;
+    uint32_t bypass_not_tcp;
+    uint32_t tcp_option;
+    uint32_t tcp_all_opt;
+    uint32_t ip_frag;
+    uint32_t ip_ecn;
+    uint32_t ip_hacked;
+    uint32_t ip_option;
+    uint32_t purge_failed;
+    uint32_t drain_failed;
+    uint32_t final_failed;
+    int64_t  timer;
+} VirtioNetRscStat;
+
+/* Rsc unit general info used to checking if can coalescing */
+typedef struct VirtioNetRscUnit {
+    void *ip;   /* ip header */
+    uint16_t *ip_plen;      /* data len pointer in ip header field */
+    struct tcp_header *tcp; /* tcp header */
+    uint16_t tcp_hdrlen;    /* tcp header len */
+    uint16_t payload;       /* pure payload without virtio/eth/ip/tcp */
+} VirtioNetRscUnit;
+
+/* Coalesced segment */
+typedef struct VirtioNetRscSeg {
+    QTAILQ_ENTRY(VirtioNetRscSeg) next;
+    void *buf;
+    size_t size;
+    uint16_t packets;
+    uint16_t dup_ack;
+    bool is_coalesced;      /* need recall ipv4 header checksum, mark here */
+    VirtioNetRscUnit unit;
+    NetClientState *nc;
+} VirtioNetRscSeg;
+
+
+/* Chain is divided by protocol(ipv4/v6) and NetClientInfo */
+typedef struct VirtioNetRscChain {
+    QTAILQ_ENTRY(VirtioNetRscChain) next;
+    VirtIONet *n;                            /* VirtIONet */
+    uint16_t proto;
+    uint8_t  gso_type;
+    uint16_t max_payload;
+    QEMUTimer *drain_timer;
+    QTAILQ_HEAD(, VirtioNetRscSeg) buffers;
+    VirtioNetRscStat stat;
+} VirtioNetRscChain;
+
+/* Maximum packet size we can receive from tap device: header + 64k */
+#define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 * KiB))
+
+#define VIRTIO_NET_RSS_MAX_KEY_SIZE     40
+#define VIRTIO_NET_RSS_MAX_TABLE_LEN    128
+
+typedef struct VirtioNetRssData {
+    bool    enabled;
+    bool    enabled_software_rss;
+    bool    redirect;
+    bool    populate_hash;
+    uint32_t hash_types;
+    uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
+    uint16_t indirections_len;
+    uint16_t *indirections_table;
+    uint16_t default_queue;
+} VirtioNetRssData;
+
+typedef struct VirtIONetQueue {
+    VirtQueue *rx_vq;
+    VirtQueue *tx_vq;
+    QEMUTimer *tx_timer;
+    QEMUBH *tx_bh;
+    uint32_t tx_waiting;
+    struct {
+        VirtQueueElement *elem;
+    } async_tx;
+    struct VirtIONet *n;
+} VirtIONetQueue;
+
+struct VirtIONet {
+    VirtIODevice parent_obj;
+    uint8_t mac[ETH_ALEN];
+    uint16_t status;
+    VirtIONetQueue *vqs;
+    VirtQueue *ctrl_vq;
+    NICState *nic;
+    /* RSC Chains - temporary storage of coalesced data,
+       all these data are lost in case of migration */
+    QTAILQ_HEAD(, VirtioNetRscChain) rsc_chains;
+    uint32_t tx_timeout;
+    int32_t tx_burst;
+    uint32_t has_vnet_hdr;
+    size_t host_hdr_len;
+    size_t guest_hdr_len;
+    uint64_t host_features;
+    uint32_t rsc_timeout;
+    uint8_t rsc4_enabled;
+    uint8_t rsc6_enabled;
+    uint8_t has_ufo;
+    uint32_t mergeable_rx_bufs;
+    uint8_t promisc;
+    uint8_t allmulti;
+    uint8_t alluni;
+    uint8_t nomulti;
+    uint8_t nouni;
+    uint8_t nobcast;
+    uint8_t vhost_started;
+    struct {
+        uint32_t in_use;
+        uint32_t first_multi;
+        uint8_t multi_overflow;
+        uint8_t uni_overflow;
+        uint8_t *macs;
+    } mac_table;
+    uint32_t *vlans;
+    virtio_net_conf net_conf;
+    NICConf nic_conf;
+    DeviceState *qdev;
+    int multiqueue;
+    uint16_t max_queue_pairs;
+    uint16_t curr_queue_pairs;
+    uint16_t max_ncs;
+    size_t config_size;
+    char *netclient_name;
+    char *netclient_type;
+    uint64_t curr_guest_offloads;
+    /* used on saved state restore phase to preserve the curr_guest_offloads */
+    uint64_t saved_guest_offloads;
+    AnnounceTimer announce_timer;
+    bool needs_vnet_hdr_swap;
+    bool mtu_bypass_backend;
+    /* primary failover device is hidden*/
+    bool failover_primary_hidden;
+    bool failover;
+    DeviceListener primary_listener;
+    QDict *primary_opts;
+    bool primary_opts_from_json;
+    Notifier migration_state;
+    VirtioNetRssData rss_data;
+    struct NetRxPkt *rx_pkt;
+    struct EBPFRSSContext ebpf_rss;
+};
+
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
+                                  const struct iovec *in_sg, unsigned in_num,
+                                  const struct iovec *out_sg,
+                                  unsigned out_num);
+void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
+                                   const char *type);
+uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n);
+
+#endif
diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
new file mode 100644 (file)
index 0000000..59d8801
--- /dev/null
@@ -0,0 +1,272 @@
+/*
+ * Virtio PCI Bindings
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2009 CodeSourcery
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Paul Brook        <paul@codesourcery.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_VIRTIO_PCI_H
+#define QEMU_VIRTIO_PCI_H
+
+#include "hw/pci/msi.h"
+#include "hw/virtio/virtio-bus.h"
+#include "qom/object.h"
+
+
+/* virtio-pci-bus */
+
+typedef struct VirtioBusState VirtioPCIBusState;
+typedef struct VirtioBusClass VirtioPCIBusClass;
+
+#define TYPE_VIRTIO_PCI_BUS "virtio-pci-bus"
+DECLARE_OBJ_CHECKERS(VirtioPCIBusState, VirtioPCIBusClass,
+                     VIRTIO_PCI_BUS, TYPE_VIRTIO_PCI_BUS)
+
+enum {
+    VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT,
+    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT,
+    VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT,
+    VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT,
+    VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT,
+    VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT,
+    VIRTIO_PCI_FLAG_ATS_BIT,
+    VIRTIO_PCI_FLAG_INIT_DEVERR_BIT,
+    VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT,
+    VIRTIO_PCI_FLAG_INIT_PM_BIT,
+    VIRTIO_PCI_FLAG_INIT_FLR_BIT,
+    VIRTIO_PCI_FLAG_AER_BIT,
+    VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT,
+};
+
+/* Need to activate work-arounds for buggy guests at vmstate load. */
+#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION \
+    (1 << VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT)
+
+/* Performance improves when virtqueue kick processing is decoupled from the
+ * vcpu thread using ioeventfd for some devices. */
+#define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
+
+/* virtio version flags */
+#define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT)
+
+/* migrate extra state */
+#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT)
+
+/* have pio notification for modern device ? */
+#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \
+    (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT)
+
+/* page per vq flag to be used by split drivers within guests */
+#define VIRTIO_PCI_FLAG_PAGE_PER_VQ \
+    (1 << VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT)
+
+/* address space translation service */
+#define VIRTIO_PCI_FLAG_ATS (1 << VIRTIO_PCI_FLAG_ATS_BIT)
+
+/* Init error enabling flags */
+#define VIRTIO_PCI_FLAG_INIT_DEVERR (1 << VIRTIO_PCI_FLAG_INIT_DEVERR_BIT)
+
+/* Init Link Control register */
+#define VIRTIO_PCI_FLAG_INIT_LNKCTL (1 << VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT)
+
+/* Init Power Management */
+#define VIRTIO_PCI_FLAG_INIT_PM (1 << VIRTIO_PCI_FLAG_INIT_PM_BIT)
+
+/* Init Function Level Reset capability */
+#define VIRTIO_PCI_FLAG_INIT_FLR (1 << VIRTIO_PCI_FLAG_INIT_FLR_BIT)
+
+/* Advanced Error Reporting capability */
+#define VIRTIO_PCI_FLAG_AER (1 << VIRTIO_PCI_FLAG_AER_BIT)
+
+/* Page Aligned Address space Translation Service */
+#define VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED \
+  (1 << VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT)
+
+typedef struct {
+    MSIMessage msg;
+    int virq;
+    unsigned int users;
+} VirtIOIRQFD;
+
+/*
+ * virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
+ */
+#define TYPE_VIRTIO_PCI "virtio-pci"
+OBJECT_DECLARE_TYPE(VirtIOPCIProxy, VirtioPCIClass, VIRTIO_PCI)
+
+struct VirtioPCIClass {
+    PCIDeviceClass parent_class;
+    DeviceRealize parent_dc_realize;
+    void (*realize)(VirtIOPCIProxy *vpci_dev, Error **errp);
+};
+
+typedef struct VirtIOPCIRegion {
+    MemoryRegion mr;
+    uint32_t offset;
+    uint32_t size;
+    uint32_t type;
+} VirtIOPCIRegion;
+
+typedef struct VirtIOPCIQueue {
+  uint16_t num;
+  bool enabled;
+  /*
+   * No need to migrate the reset status, because it is always 0
+   * when the migration starts.
+   */
+  bool reset;
+  uint32_t desc[2];
+  uint32_t avail[2];
+  uint32_t used[2];
+} VirtIOPCIQueue;
+
+struct VirtIOPCIProxy {
+    PCIDevice pci_dev;
+    MemoryRegion bar;
+    union {
+        struct {
+            VirtIOPCIRegion common;
+            VirtIOPCIRegion isr;
+            VirtIOPCIRegion device;
+            VirtIOPCIRegion notify;
+            VirtIOPCIRegion notify_pio;
+        };
+        VirtIOPCIRegion regs[5];
+    };
+    MemoryRegion modern_bar;
+    MemoryRegion io_bar;
+    uint32_t legacy_io_bar_idx;
+    uint32_t msix_bar_idx;
+    uint32_t modern_io_bar_idx;
+    uint32_t modern_mem_bar_idx;
+    int config_cap;
+    uint32_t flags;
+    bool disable_modern;
+    bool ignore_backend_features;
+    OnOffAuto disable_legacy;
+    /* Transitional device id */
+    uint16_t trans_devid;
+    uint32_t class_code;
+    uint32_t nvectors;
+    uint32_t dfselect;
+    uint32_t gfselect;
+    uint32_t guest_features[2];
+    VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX];
+
+    VirtIOIRQFD *vector_irqfd;
+    int nvqs_with_notifiers;
+    VirtioBusState bus;
+};
+
+static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy)
+{
+    return !proxy->disable_modern;
+}
+
+static inline bool virtio_pci_legacy(VirtIOPCIProxy *proxy)
+{
+    return proxy->disable_legacy == ON_OFF_AUTO_OFF;
+}
+
+static inline void virtio_pci_force_virtio_1(VirtIOPCIProxy *proxy)
+{
+    proxy->disable_modern = false;
+    proxy->disable_legacy = ON_OFF_AUTO_ON;
+}
+
+static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy)
+{
+    proxy->disable_modern = true;
+}
+
+uint16_t virtio_pci_get_trans_devid(uint16_t device_id);
+uint16_t virtio_pci_get_class_id(uint16_t device_id);
+
+/*
+ * virtio-input-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_INPUT_PCI "virtio-input-pci"
+
+/* Virtio ABI version, if we increment this, we break the guest driver. */
+#define VIRTIO_PCI_ABI_VERSION          0
+
+/* Input for virtio_pci_types_register() */
+typedef struct VirtioPCIDeviceTypeInfo {
+    /*
+     * Common base class for the subclasses below.
+     *
+     * Required only if transitional_name or non_transitional_name is set.
+     *
+     * We need a separate base type instead of making all types
+     * inherit from generic_name for two reasons:
+     * 1) generic_name implements INTERFACE_PCIE_DEVICE, but
+     *    transitional_name does not.
+     * 2) generic_name has the "disable-legacy" and "disable-modern"
+     *    properties, transitional_name and non_transitional name don't.
+     */
+    const char *base_name;
+    /*
+     * Generic device type.  Optional.
+     *
+     * Supports both transitional and non-transitional modes,
+     * using the disable-legacy and disable-modern properties.
+     * If disable-legacy=auto, (non-)transitional mode is selected
+     * depending on the bus where the device is plugged.
+     *
+     * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE,
+     * but PCI Express is supported only in non-transitional mode.
+     *
+     * The only type implemented by QEMU 3.1 and older.
+     */
+    const char *generic_name;
+    /*
+     * The transitional device type.  Optional.
+     *
+     * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE.
+     */
+    const char *transitional_name;
+    /*
+     * The non-transitional device type.  Optional.
+     *
+     * Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only.
+     */
+    const char *non_transitional_name;
+
+    /* Parent type.  If NULL, TYPE_VIRTIO_PCI is used */
+    const char *parent;
+
+    /* Same as TypeInfo fields: */
+    size_t instance_size;
+    size_t class_size;
+    void (*instance_init)(Object *obj);
+    void (*instance_finalize)(Object *obj);
+    void (*class_init)(ObjectClass *klass, void *data);
+    InterfaceInfo *interfaces;
+} VirtioPCIDeviceTypeInfo;
+
+/* Register virtio-pci type(s).  @t must be static. */
+void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t);
+
+/**
+ * virtio_pci_optimal_num_queues:
+ * @fixed_queues: number of queues that are always present
+ *
+ * Returns: The optimal number of queues for a multi-queue device, excluding
+ * @fixed_queues.
+ */
+unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues);
+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq,
+                                              int n, bool assign,
+                                              bool with_irqfd);
+
+int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, uint8_t bar, uint64_t offset,
+                           uint64_t length, uint8_t id);
+
+#endif
diff --git a/include/hw/virtio/virtio-pmem.h b/include/hw/virtio/virtio-pmem.h
new file mode 100644 (file)
index 0000000..fc4fd1f
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Virtio PMEM device
+ *
+ * Copyright (C) 2018-2019 Red Hat, Inc.
+ *
+ * Authors:
+ *  Pankaj Gupta <pagupta@redhat.com>
+ *  David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VIRTIO_PMEM_H
+#define HW_VIRTIO_PMEM_H
+
+#include "hw/virtio/virtio.h"
+#include "qapi/qapi-types-machine.h"
+#include "qom/object.h"
+
+#define TYPE_VIRTIO_PMEM "virtio-pmem"
+
+OBJECT_DECLARE_TYPE(VirtIOPMEM, VirtIOPMEMClass,
+                    VIRTIO_PMEM)
+
+#define VIRTIO_PMEM_ADDR_PROP "memaddr"
+#define VIRTIO_PMEM_MEMDEV_PROP "memdev"
+
+struct VirtIOPMEM {
+    VirtIODevice parent_obj;
+
+    VirtQueue *rq_vq;
+    uint64_t start;
+    HostMemoryBackend *memdev;
+};
+
+struct VirtIOPMEMClass {
+    /* private */
+    VirtIODevice parent;
+
+    /* public */
+    void (*fill_device_info)(const VirtIOPMEM *pmem, VirtioPMEMDeviceInfo *vi);
+    MemoryRegion *(*get_memory_region)(VirtIOPMEM *pmem, Error **errp);
+};
+
+#endif
diff --git a/include/hw/virtio/virtio-rng.h b/include/hw/virtio/virtio-rng.h
new file mode 100644 (file)
index 0000000..8273425
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Virtio RNG Support
+ *
+ * Copyright Red Hat, Inc. 2012
+ * Copyright Amit Shah <amit.shah@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef QEMU_VIRTIO_RNG_H
+#define QEMU_VIRTIO_RNG_H
+
+#include "hw/virtio/virtio.h"
+#include "sysemu/rng.h"
+#include "standard-headers/linux/virtio_rng.h"
+#include "qom/object.h"
+
+#define TYPE_VIRTIO_RNG "virtio-rng-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIORNG, VIRTIO_RNG)
+#define VIRTIO_RNG_GET_PARENT_CLASS(obj) \
+        OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_RNG)
+
+struct VirtIORNGConf {
+    RngBackend *rng;
+    uint64_t max_bytes;
+    uint32_t period_ms;
+};
+
+struct VirtIORNG {
+    VirtIODevice parent_obj;
+
+    /* Only one vq - guest puts buffer(s) on it when it needs entropy */
+    VirtQueue *vq;
+
+    VirtIORNGConf conf;
+
+    RngBackend *rng;
+
+    /* We purposefully don't migrate this state.  The quota will reset on the
+     * destination as a result.  Rate limiting is host state, not guest state.
+     */
+    QEMUTimer *rate_limit_timer;
+    int64_t quota_remaining;
+    bool activate_timer;
+
+    VMChangeStateEntry *vmstate;
+};
+
+#endif
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
new file mode 100644 (file)
index 0000000..779568a
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * Virtio SCSI HBA
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_VIRTIO_SCSI_H
+#define QEMU_VIRTIO_SCSI_H
+#include "qom/object.h"
+
+/* Override CDB/sense data size: they are dynamic (guest controlled) in QEMU */
+#define VIRTIO_SCSI_CDB_SIZE 0
+#define VIRTIO_SCSI_SENSE_SIZE 0
+#include "standard-headers/linux/virtio_scsi.h"
+#include "hw/virtio/virtio.h"
+#include "hw/scsi/scsi.h"
+#include "chardev/char-fe.h"
+#include "sysemu/iothread.h"
+
+#define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOSCSICommon, VIRTIO_SCSI_COMMON)
+
+#define TYPE_VIRTIO_SCSI "virtio-scsi-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOSCSI, VIRTIO_SCSI)
+
+#define VIRTIO_SCSI_MAX_CHANNEL 0
+#define VIRTIO_SCSI_MAX_TARGET  255
+#define VIRTIO_SCSI_MAX_LUN     16383
+
+/* Number of virtqueues that are always present */
+#define VIRTIO_SCSI_VQ_NUM_FIXED    2
+
+#define VIRTIO_SCSI_AUTO_NUM_QUEUES UINT32_MAX
+
+typedef struct virtio_scsi_cmd_req VirtIOSCSICmdReq;
+typedef struct virtio_scsi_cmd_resp VirtIOSCSICmdResp;
+typedef struct virtio_scsi_ctrl_tmf_req VirtIOSCSICtrlTMFReq;
+typedef struct virtio_scsi_ctrl_tmf_resp VirtIOSCSICtrlTMFResp;
+typedef struct virtio_scsi_ctrl_an_req VirtIOSCSICtrlANReq;
+typedef struct virtio_scsi_ctrl_an_resp VirtIOSCSICtrlANResp;
+typedef struct virtio_scsi_event VirtIOSCSIEvent;
+typedef struct virtio_scsi_config VirtIOSCSIConfig;
+
+struct VirtIOSCSIConf {
+    uint32_t num_queues;
+    uint32_t virtqueue_size;
+    bool seg_max_adjust;
+    uint32_t max_sectors;
+    uint32_t cmd_per_lun;
+    char *vhostfd;
+    char *wwpn;
+    CharBackend chardev;
+    uint32_t boot_tpgt;
+    IOThread *iothread;
+};
+
+struct VirtIOSCSI;
+
+struct VirtIOSCSICommon {
+    VirtIODevice parent_obj;
+    VirtIOSCSIConf conf;
+
+    uint32_t sense_size;
+    uint32_t cdb_size;
+    VirtQueue *ctrl_vq;
+    VirtQueue *event_vq;
+    VirtQueue **cmd_vqs;
+};
+
+struct VirtIOSCSIReq;
+
+struct VirtIOSCSI {
+    VirtIOSCSICommon parent_obj;
+
+    SCSIBus bus;
+    int resetting; /* written from main loop thread, read from any thread */
+    bool events_dropped;
+
+    /*
+     * TMFs deferred to main loop BH. These fields are protected by
+     * virtio_scsi_acquire().
+     */
+    QEMUBH *tmf_bh;
+    QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
+
+    /* Fields for dataplane below */
+    AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
+
+    bool dataplane_started;
+    bool dataplane_starting;
+    bool dataplane_stopping;
+    bool dataplane_fenced;
+    uint32_t host_features;
+};
+
+static inline void virtio_scsi_acquire(VirtIOSCSI *s)
+{
+    if (s->ctx) {
+        aio_context_acquire(s->ctx);
+    }
+}
+
+static inline void virtio_scsi_release(VirtIOSCSI *s)
+{
+    if (s->ctx) {
+        aio_context_release(s->ctx);
+    }
+}
+
+void virtio_scsi_common_realize(DeviceState *dev,
+                                VirtIOHandleOutput ctrl,
+                                VirtIOHandleOutput evt,
+                                VirtIOHandleOutput cmd,
+                                Error **errp);
+
+void virtio_scsi_common_unrealize(DeviceState *dev);
+
+void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp);
+int virtio_scsi_dataplane_start(VirtIODevice *s);
+void virtio_scsi_dataplane_stop(VirtIODevice *s);
+
+#endif /* QEMU_VIRTIO_SCSI_H */
diff --git a/include/hw/virtio/virtio-serial.h b/include/hw/virtio/virtio-serial.h
new file mode 100644 (file)
index 0000000..d87c62e
--- /dev/null
@@ -0,0 +1,227 @@
+/*
+ * Virtio Serial / Console Support
+ *
+ * Copyright IBM, Corp. 2008
+ * Copyright Red Hat, Inc. 2009, 2010
+ *
+ * Authors:
+ *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ *  Amit Shah <amit.shah@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_VIRTIO_SERIAL_H
+#define QEMU_VIRTIO_SERIAL_H
+
+#include "standard-headers/linux/virtio_console.h"
+#include "hw/virtio/virtio.h"
+#include "qom/object.h"
+
+struct virtio_serial_conf {
+    /* Max. number of ports we can have for a virtio-serial device */
+    uint32_t max_virtserial_ports;
+};
+
+#define TYPE_VIRTIO_SERIAL_PORT "virtio-serial-port"
+OBJECT_DECLARE_TYPE(VirtIOSerialPort, VirtIOSerialPortClass,
+                    VIRTIO_SERIAL_PORT)
+
+typedef struct VirtIOSerial VirtIOSerial;
+
+#define TYPE_VIRTIO_SERIAL_BUS "virtio-serial-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOSerialBus, VIRTIO_SERIAL_BUS)
+
+
+struct VirtIOSerialPortClass {
+    DeviceClass parent_class;
+
+    /* Is this a device that binds with hvc in the guest? */
+    bool is_console;
+
+    /*
+     * The per-port (or per-app) realize function that's called when a
+     * new device is found on the bus.
+     */
+    DeviceRealize realize;
+    /*
+     * Per-port unrealize function that's called when a port gets
+     * hot-unplugged or removed.
+     */
+    DeviceUnrealize unrealize;
+
+    /* Callbacks for guest events */
+        /* Guest opened/closed device. */
+    void (*set_guest_connected)(VirtIOSerialPort *port, int guest_connected);
+
+    /* Enable/disable backend for virtio serial port */
+    void (*enable_backend)(VirtIOSerialPort *port, bool enable);
+
+        /* Guest is now ready to accept data (virtqueues set up). */
+    void (*guest_ready)(VirtIOSerialPort *port);
+
+        /*
+         * Guest has enqueued a buffer for the host to write into.
+         * Called each time a buffer is enqueued by the guest;
+         * irrespective of whether there already were free buffers the
+         * host could have consumed.
+         *
+         * This is dependent on both the guest and host end being
+         * connected.
+         */
+    void (*guest_writable)(VirtIOSerialPort *port);
+
+    /*
+     * Guest wrote some data to the port. This data is handed over to
+     * the app via this callback.  The app can return a size less than
+     * 'len'.  In this case, throttling will be enabled for this port.
+     */
+    ssize_t (*have_data)(VirtIOSerialPort *port, const uint8_t *buf,
+                         ssize_t len);
+};
+
+/*
+ * This is the state that's shared between all the ports.  Some of the
+ * state is configurable via command-line options. Some of it can be
+ * set by individual devices in their initfn routines. Some of the
+ * state is set by the generic qdev device init routine.
+ */
+struct VirtIOSerialPort {
+    DeviceState dev;
+
+    QTAILQ_ENTRY(VirtIOSerialPort) next;
+
+    /*
+     * This field gives us the virtio device as well as the qdev bus
+     * that we are associated with
+     */
+    VirtIOSerial *vser;
+
+    VirtQueue *ivq, *ovq;
+
+    /*
+     * This name is sent to the guest and exported via sysfs.
+     * The guest could create symlinks based on this information.
+     * The name is in the reverse fqdn format, like org.qemu.console.0
+     */
+    char *name;
+
+    /*
+     * This id helps identify ports between the guest and the host.
+     * The guest sends a "header" with this id with each data packet
+     * that it sends and the host can then find out which associated
+     * device to send out this data to
+     */
+    uint32_t id;
+
+    /*
+     * This is the elem that we pop from the virtqueue.  A slow
+     * backend that consumes guest data (e.g. the file backend for
+     * qemu chardevs) can cause the guest to block till all the output
+     * is flushed.  This isn't desired, so we keep a note of the last
+     * element popped and continue consuming it once the backend
+     * becomes writable again.
+     */
+    VirtQueueElement *elem;
+
+    /*
+     * The index and the offset into the iov buffer that was popped in
+     * elem above.
+     */
+    uint32_t iov_idx;
+    uint64_t iov_offset;
+
+    /*
+     * When unthrottling we use a bottom-half to call flush_queued_data.
+     */
+    QEMUBH *bh;
+
+    /* Is the corresponding guest device open? */
+    bool guest_connected;
+    /* Is this device open for IO on the host? */
+    bool host_connected;
+    /* Do apps not want to receive data? */
+    bool throttled;
+};
+
+/* The virtio-serial bus on top of which the ports will ride as devices */
+struct VirtIOSerialBus {
+    BusState qbus;
+
+    /* This is the parent device that provides the bus for ports. */
+    VirtIOSerial *vser;
+
+    /* The maximum number of ports that can ride on top of this bus */
+    uint32_t max_nr_ports;
+};
+
+typedef struct VirtIOSerialPostLoad {
+    QEMUTimer *timer;
+    uint32_t nr_active_ports;
+    struct {
+        VirtIOSerialPort *port;
+        uint8_t host_connected;
+    } *connected;
+} VirtIOSerialPostLoad;
+
+struct VirtIOSerial {
+    VirtIODevice parent_obj;
+
+    VirtQueue *c_ivq, *c_ovq;
+    /* Arrays of ivqs and ovqs: one per port */
+    VirtQueue **ivqs, **ovqs;
+
+    VirtIOSerialBus bus;
+
+    QTAILQ_HEAD(, VirtIOSerialPort) ports;
+
+    QLIST_ENTRY(VirtIOSerial) next;
+
+    /* bitmap for identifying active ports */
+    uint32_t *ports_map;
+
+    struct VirtIOSerialPostLoad *post_load;
+
+    virtio_serial_conf serial;
+
+    uint64_t host_features;
+};
+
+/* Interface to the virtio-serial bus */
+
+/*
+ * Open a connection to the port
+ *   Returns 0 on success (always).
+ */
+int virtio_serial_open(VirtIOSerialPort *port);
+
+/*
+ * Close the connection to the port
+ *   Returns 0 on success (always).
+ */
+int virtio_serial_close(VirtIOSerialPort *port);
+
+/*
+ * Send data to Guest
+ */
+ssize_t virtio_serial_write(VirtIOSerialPort *port, const uint8_t *buf,
+                            size_t size);
+
+/*
+ * Query whether a guest is ready to receive data.
+ */
+size_t virtio_serial_guest_ready(VirtIOSerialPort *port);
+
+/*
+ * Flow control: Ports can signal to the virtio-serial core to stop
+ * sending data or re-start sending data, depending on the 'throttle'
+ * value here.
+ */
+void virtio_serial_throttle_port(VirtIOSerialPort *port, bool throttle);
+
+#define TYPE_VIRTIO_SERIAL "virtio-serial-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOSerial, VIRTIO_SERIAL)
+
+#endif
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
new file mode 100644 (file)
index 0000000..7d5ffdc
--- /dev/null
@@ -0,0 +1,518 @@
+/*
+ * Virtio Support
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_VIRTIO_H
+#define QEMU_VIRTIO_H
+
+#include "exec/memory.h"
+#include "hw/qdev-core.h"
+#include "net/net.h"
+#include "migration/vmstate.h"
+#include "qemu/event_notifier.h"
+#include "standard-headers/linux/virtio_config.h"
+#include "standard-headers/linux/virtio_ring.h"
+#include "qom/object.h"
+#include "block/aio.h"
+
+/*
+ * A guest should never accept this. It implies negotiation is broken
+ * between the driver frontend and the device. This bit is re-used for
+ * vhost-user to advertise VHOST_USER_F_PROTOCOL_FEATURES between QEMU
+ * and a vhost-user backend.
+ */
+#define VIRTIO_F_BAD_FEATURE 30
+
+#define VIRTIO_LEGACY_FEATURES ((0x1ULL << VIRTIO_F_BAD_FEATURE) | \
+                                (0x1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+                                (0x1ULL << VIRTIO_F_ANY_LAYOUT))
+
+struct VirtQueue;
+
+static inline hwaddr vring_align(hwaddr addr,
+                                             unsigned long align)
+{
+    return QEMU_ALIGN_UP(addr, align);
+}
+
+typedef struct VirtIOFeature {
+    uint64_t flags;
+    size_t end;
+} VirtIOFeature;
+
+typedef struct VirtIOConfigSizeParams {
+    size_t min_size;
+    size_t max_size;
+    const VirtIOFeature *feature_sizes;
+} VirtIOConfigSizeParams;
+
+size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
+                              uint64_t host_features);
+
+typedef struct VirtQueue VirtQueue;
+
+#define VIRTQUEUE_MAX_SIZE 1024
+
+typedef struct VirtQueueElement
+{
+    unsigned int index;
+    unsigned int len;
+    unsigned int ndescs;
+    unsigned int out_num;
+    unsigned int in_num;
+    hwaddr *in_addr;
+    hwaddr *out_addr;
+    struct iovec *in_sg;
+    struct iovec *out_sg;
+} VirtQueueElement;
+
+#define VIRTIO_QUEUE_MAX 1024
+
+#define VIRTIO_NO_VECTOR 0xffff
+
+/* special index value used internally for config irqs */
+#define VIRTIO_CONFIG_IRQ_IDX -1
+
+#define TYPE_VIRTIO_DEVICE "virtio-device"
+OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE)
+
+typedef struct {
+    int virtio_bit;
+    const char *feature_desc;
+} qmp_virtio_feature_map_t;
+
+enum virtio_device_endian {
+    VIRTIO_DEVICE_ENDIAN_UNKNOWN,
+    VIRTIO_DEVICE_ENDIAN_LITTLE,
+    VIRTIO_DEVICE_ENDIAN_BIG,
+};
+
+/**
+ * struct VirtIODevice - common VirtIO structure
+ * @name: name of the device
+ * @status: VirtIO Device Status field
+ *
+ */
+struct VirtIODevice
+{
+    DeviceState parent_obj;
+    const char *name;
+    uint8_t status;
+    uint8_t isr;
+    uint16_t queue_sel;
+    /**
+     * These fields represent a set of VirtIO features at various
+     * levels of the stack. @host_features indicates the complete
+     * feature set the VirtIO device can offer to the driver.
+     * @guest_features indicates which features the VirtIO driver has
+     * selected by writing to the feature register. Finally
+     * @backend_features represents everything supported by the
+     * backend (e.g. vhost) and could potentially be a subset of the
+     * total feature set offered by QEMU.
+     */
+    uint64_t host_features;
+    uint64_t guest_features;
+    uint64_t backend_features;
+
+    size_t config_len;
+    void *config;
+    uint16_t config_vector;
+    uint32_t generation;
+    int nvectors;
+    VirtQueue *vq;
+    MemoryListener listener;
+    uint16_t device_id;
+    /* @vm_running: current VM running state via virtio_vmstate_change() */
+    bool vm_running;
+    bool broken; /* device in invalid state, needs reset */
+    bool use_disabled_flag; /* allow use of 'disable' flag when needed */
+    bool disabled; /* device in temporarily disabled state */
+    /**
+     * @use_started: true if the @started flag should be used to check the
+     * current state of the VirtIO device. Otherwise status bits
+     * should be checked for a current status of the device.
+     * @use_started is only set via QMP and defaults to true for all
+     * modern machines (since 4.1).
+     */
+    bool use_started;
+    bool started;
+    bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */
+    bool disable_legacy_check;
+    bool vhost_started;
+    VMChangeStateEntry *vmstate;
+    char *bus_name;
+    uint8_t device_endian;
+    /**
+     * @user_guest_notifier_mask: gate usage of ->guest_notifier_mask() callback.
+     * This is used to suppress the masking of guest updates for
+     * vhost-user devices which are asynchronous by design.
+     */
+    bool use_guest_notifier_mask;
+    AddressSpace *dma_as;
+    QLIST_HEAD(, VirtQueue) *vector_queues;
+    QTAILQ_ENTRY(VirtIODevice) next;
+    /**
+     * @config_notifier: the event notifier that handles config events
+     */
+    EventNotifier config_notifier;
+    bool device_iotlb_enabled;
+};
+
+struct VirtioDeviceClass {
+    /*< private >*/
+    DeviceClass parent;
+    /*< public >*/
+
+    /* This is what a VirtioDevice must implement */
+    DeviceRealize realize;
+    DeviceUnrealize unrealize;
+    uint64_t (*get_features)(VirtIODevice *vdev,
+                             uint64_t requested_features,
+                             Error **errp);
+    uint64_t (*bad_features)(VirtIODevice *vdev);
+    void (*set_features)(VirtIODevice *vdev, uint64_t val);
+    int (*validate_features)(VirtIODevice *vdev);
+    void (*get_config)(VirtIODevice *vdev, uint8_t *config);
+    void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
+    void (*reset)(VirtIODevice *vdev);
+    void (*set_status)(VirtIODevice *vdev, uint8_t val);
+    /* Device must validate queue_index.  */
+    void (*queue_reset)(VirtIODevice *vdev, uint32_t queue_index);
+    /* Device must validate queue_index.  */
+    void (*queue_enable)(VirtIODevice *vdev, uint32_t queue_index);
+    /* For transitional devices, this is a bitmap of features
+     * that are only exposed on the legacy interface but not
+     * the modern one.
+     */
+    uint64_t legacy_features;
+    /* Test and clear event pending status.
+     * Should be called after unmask to avoid losing events.
+     * If backend does not support masking,
+     * must check in frontend instead.
+     */
+    bool (*guest_notifier_pending)(VirtIODevice *vdev, int n);
+    /* Mask/unmask events from this vq. Any events reported
+     * while masked will become pending.
+     * If backend does not support masking,
+     * must mask in frontend instead.
+     */
+    void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask);
+    int (*start_ioeventfd)(VirtIODevice *vdev);
+    void (*stop_ioeventfd)(VirtIODevice *vdev);
+    /* Saving and loading of a device; trying to deprecate save/load
+     * use vmsd for new devices.
+     */
+    void (*save)(VirtIODevice *vdev, QEMUFile *f);
+    int (*load)(VirtIODevice *vdev, QEMUFile *f, int version_id);
+    /* Post load hook in vmsd is called early while device is processed, and
+     * when VirtIODevice isn't fully initialized.  Devices should use this instead,
+     * unless they specifically want to verify the migration stream as it's
+     * processed, e.g. for bounds checking.
+     */
+    int (*post_load)(VirtIODevice *vdev);
+    const VMStateDescription *vmsd;
+    bool (*primary_unplug_pending)(void *opaque);
+    struct vhost_dev *(*get_vhost)(VirtIODevice *vdev);
+    void (*toggle_device_iotlb)(VirtIODevice *vdev);
+};
+
+void virtio_instance_init_common(Object *proxy_obj, void *data,
+                                 size_t vdev_size, const char *vdev_name);
+
+/**
+ * virtio_init() - initialise the common VirtIODevice structure
+ * @vdev: pointer to VirtIODevice
+ * @device_id: the VirtIO device ID (see virtio_ids.h)
+ * @config_size: size of the config space
+ */
+void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size);
+
+void virtio_cleanup(VirtIODevice *vdev);
+
+void virtio_error(VirtIODevice *vdev, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
+
+/* Set the child bus name. */
+void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name);
+
+typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *);
+
+VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
+                            VirtIOHandleOutput handle_output);
+
+void virtio_del_queue(VirtIODevice *vdev, int n);
+
+void virtio_delete_queue(VirtQueue *vq);
+
+void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
+                    unsigned int len);
+void virtqueue_flush(VirtQueue *vq, unsigned int count);
+void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
+                              unsigned int len);
+void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
+                     unsigned int len);
+bool virtqueue_rewind(VirtQueue *vq, unsigned int num);
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+                    unsigned int len, unsigned int idx);
+
+void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem);
+void *virtqueue_pop(VirtQueue *vq, size_t sz);
+unsigned int virtqueue_drop_all(VirtQueue *vq);
+void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz);
+void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
+                                VirtQueueElement *elem);
+int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
+                          unsigned int out_bytes);
+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+                               unsigned int *out_bytes,
+                               unsigned max_in_bytes, unsigned max_out_bytes);
+
+void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq);
+void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
+
+int virtio_save(VirtIODevice *vdev, QEMUFile *f);
+
+extern const VMStateInfo virtio_vmstate_info;
+
+#define VMSTATE_VIRTIO_DEVICE \
+    {                                         \
+        .name = "virtio",                     \
+        .info = &virtio_vmstate_info,         \
+        .flags = VMS_SINGLE,                  \
+    }
+
+int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id);
+
+/**
+ * virtio_notify_config() - signal a change to device config
+ * @vdev: the virtio device
+ *
+ * Assuming the virtio device is up (VIRTIO_CONFIG_S_DRIVER_OK) this
+ * will trigger a guest interrupt and update the config version.
+ */
+void virtio_notify_config(VirtIODevice *vdev);
+
+bool virtio_queue_get_notification(VirtQueue *vq);
+void virtio_queue_set_notification(VirtQueue *vq, int enable);
+
+int virtio_queue_ready(VirtQueue *vq);
+
+int virtio_queue_empty(VirtQueue *vq);
+
+/* Host binding interface.  */
+
+uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr);
+uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr);
+uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr);
+void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data);
+void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data);
+void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data);
+uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr);
+uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr);
+uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr);
+void virtio_config_modern_writeb(VirtIODevice *vdev,
+                                 uint32_t addr, uint32_t data);
+void virtio_config_modern_writew(VirtIODevice *vdev,
+                                 uint32_t addr, uint32_t data);
+void virtio_config_modern_writel(VirtIODevice *vdev,
+                                 uint32_t addr, uint32_t data);
+void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr);
+hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n);
+void virtio_queue_set_num(VirtIODevice *vdev, int n, int num);
+int virtio_queue_get_num(VirtIODevice *vdev, int n);
+int virtio_queue_get_max_num(VirtIODevice *vdev, int n);
+int virtio_get_num_queues(VirtIODevice *vdev);
+void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
+                            hwaddr avail, hwaddr used);
+void virtio_queue_update_rings(VirtIODevice *vdev, int n);
+void virtio_init_region_cache(VirtIODevice *vdev, int n);
+void virtio_queue_set_align(VirtIODevice *vdev, int n, int align);
+void virtio_queue_notify(VirtIODevice *vdev, int n);
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
+int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
+                                      MemoryRegion *mr, bool assign);
+int virtio_set_status(VirtIODevice *vdev, uint8_t val);
+void virtio_reset(void *opaque);
+void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index);
+void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index);
+void virtio_update_irq(VirtIODevice *vdev);
+int virtio_set_features(VirtIODevice *vdev, uint64_t val);
+
+/* Base devices.  */
+typedef struct VirtIOBlkConf VirtIOBlkConf;
+struct virtio_net_conf;
+typedef struct virtio_serial_conf virtio_serial_conf;
+typedef struct virtio_input_conf virtio_input_conf;
+typedef struct VirtIOSCSIConf VirtIOSCSIConf;
+typedef struct VirtIORNGConf VirtIORNGConf;
+
+#define DEFINE_VIRTIO_COMMON_FEATURES(_state, _field) \
+    DEFINE_PROP_BIT64("indirect_desc", _state, _field,    \
+                      VIRTIO_RING_F_INDIRECT_DESC, true), \
+    DEFINE_PROP_BIT64("event_idx", _state, _field,        \
+                      VIRTIO_RING_F_EVENT_IDX, true),     \
+    DEFINE_PROP_BIT64("notify_on_empty", _state, _field,  \
+                      VIRTIO_F_NOTIFY_ON_EMPTY, true), \
+    DEFINE_PROP_BIT64("any_layout", _state, _field, \
+                      VIRTIO_F_ANY_LAYOUT, true), \
+    DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
+                      VIRTIO_F_IOMMU_PLATFORM, false), \
+    DEFINE_PROP_BIT64("packed", _state, _field, \
+                      VIRTIO_F_RING_PACKED, false), \
+    DEFINE_PROP_BIT64("queue_reset", _state, _field, \
+                      VIRTIO_F_RING_RESET, true)
+
+hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
+bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n);
+bool virtio_queue_enabled(VirtIODevice *vdev, int n);
+hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
+hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n);
+hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n);
+hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n);
+hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n);
+unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n);
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
+                                     unsigned int idx);
+void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n);
+void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n);
+void virtio_queue_update_used_idx(VirtIODevice *vdev, int n);
+VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
+uint16_t virtio_get_queue_index(VirtQueue *vq);
+EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
+void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
+                                                bool with_irqfd);
+int virtio_device_start_ioeventfd(VirtIODevice *vdev);
+int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
+void virtio_device_release_ioeventfd(VirtIODevice *vdev);
+bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
+EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
+void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled);
+void virtio_queue_host_notifier_read(EventNotifier *n);
+void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx);
+void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx);
+void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx);
+VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
+VirtQueue *virtio_vector_next_queue(VirtQueue *vq);
+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev);
+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
+                                                 bool assign, bool with_irqfd);
+
+static inline void virtio_add_feature(uint64_t *features, unsigned int fbit)
+{
+    assert(fbit < 64);
+    *features |= (1ULL << fbit);
+}
+
+static inline void virtio_clear_feature(uint64_t *features, unsigned int fbit)
+{
+    assert(fbit < 64);
+    *features &= ~(1ULL << fbit);
+}
+
+static inline bool virtio_has_feature(uint64_t features, unsigned int fbit)
+{
+    assert(fbit < 64);
+    return !!(features & (1ULL << fbit));
+}
+
+static inline bool virtio_vdev_has_feature(const VirtIODevice *vdev,
+                                           unsigned int fbit)
+{
+    return virtio_has_feature(vdev->guest_features, fbit);
+}
+
+static inline bool virtio_host_has_feature(VirtIODevice *vdev,
+                                           unsigned int fbit)
+{
+    return virtio_has_feature(vdev->host_features, fbit);
+}
+
+static inline bool virtio_is_big_endian(VirtIODevice *vdev)
+{
+    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
+        return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
+    }
+    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
+    return false;
+}
+
+/**
+ * virtio_device_started() - check if device started
+ * @vdev - the VirtIO device
+ * @status - the devices status bits
+ *
+ * Check if the device is started. For most modern machines this is
+ * tracked via the @vdev->started field (to support migration),
+ * otherwise we check for the final negotiated status bit that
+ * indicates everything is ready.
+ */
+static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status)
+{
+    if (vdev->use_started) {
+        return vdev->started;
+    }
+
+    return status & VIRTIO_CONFIG_S_DRIVER_OK;
+}
+
+/**
+ * virtio_device_should_start() - check if device startable
+ * @vdev - the VirtIO device
+ * @status - the devices status bits
+ *
+ * This is similar to virtio_device_started() but also encapsulates a
+ * check on the VM status which would prevent a device starting
+ * anyway.
+ */
+static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status)
+{
+    if (!vdev->vm_running) {
+        return false;
+    }
+
+    return virtio_device_started(vdev, status);
+}
+
+static inline void virtio_set_started(VirtIODevice *vdev, bool started)
+{
+    if (started) {
+        vdev->start_on_kick = false;
+    }
+
+    if (vdev->use_started) {
+        vdev->started = started;
+    }
+}
+
+static inline void virtio_set_disabled(VirtIODevice *vdev, bool disable)
+{
+    if (vdev->use_disabled_flag) {
+        vdev->disabled = disable;
+    }
+}
+
+static inline bool virtio_device_disabled(VirtIODevice *vdev)
+{
+    return unlikely(vdev->disabled || vdev->broken);
+}
+
+bool virtio_legacy_allowed(VirtIODevice *vdev);
+bool virtio_legacy_check_disabled(VirtIODevice *vdev);
+
+QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev,
+                                   QEMUBHFunc *cb, void *opaque,
+                                   const char *name);
+#define virtio_bh_new_guarded(dev, cb, opaque) \
+    virtio_bh_new_guarded_full((dev), (cb), (opaque), (stringify(cb)))
+
+#endif
diff --git a/include/hw/watchdog/allwinner-wdt.h b/include/hw/watchdog/allwinner-wdt.h
new file mode 100644 (file)
index 0000000..7fe41e2
--- /dev/null
@@ -0,0 +1,123 @@
+/*
+ * Allwinner Watchdog emulation
+ *
+ * Copyright (C) 2023 Strahinja Jankovic <strahinja.p.jankovic@gmail.com>
+ *
+ *  This file is derived from Allwinner RTC,
+ *  by Niek Linnenbank.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_WATCHDOG_ALLWINNER_WDT_H
+#define HW_WATCHDOG_ALLWINNER_WDT_H
+
+#include "qom/object.h"
+#include "hw/ptimer.h"
+#include "hw/sysbus.h"
+
+/*
+ * This is a model of the Allwinner watchdog.
+ * Since watchdog registers belong to the timer module (and are shared with the
+ * RTC module), the interrupt line from watchdog is not handled right now.
+ * In QEMU, we just wire up the watchdog reset to watchdog_perform_action(),
+ * at least for the moment.
+ */
+
+#define TYPE_AW_WDT    "allwinner-wdt"
+
+/** Allwinner WDT sun4i family (A10, A12), also sun7i (A20) */
+#define TYPE_AW_WDT_SUN4I    TYPE_AW_WDT "-sun4i"
+
+/** Allwinner WDT sun6i family and newer (A31, H2+, H3, etc) */
+#define TYPE_AW_WDT_SUN6I    TYPE_AW_WDT "-sun6i"
+
+/** Number of WDT registers */
+#define AW_WDT_REGS_NUM      (5)
+
+OBJECT_DECLARE_TYPE(AwWdtState, AwWdtClass, AW_WDT)
+
+/**
+ * Allwinner WDT object instance state.
+ */
+struct AwWdtState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    struct ptimer_state *timer;
+
+    uint32_t regs[AW_WDT_REGS_NUM];
+};
+
+/**
+ * Allwinner WDT class-level struct.
+ *
+ * This struct is filled by each sunxi device specific code
+ * such that the generic code can use this struct to support
+ * all devices.
+ */
+struct AwWdtClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+    /*< public >*/
+
+    /** Defines device specific register map */
+    const uint8_t *regmap;
+
+    /** Size of the regmap in bytes */
+    size_t regmap_size;
+
+    /**
+     * Read device specific register
+     *
+     * @offset: register offset to read
+     * @return true if register read successful, false otherwise
+     */
+    bool (*read)(AwWdtState *s, uint32_t offset);
+
+    /**
+     * Write device specific register
+     *
+     * @offset: register offset to write
+     * @data: value to set in register
+     * @return true if register write successful, false otherwise
+     */
+    bool (*write)(AwWdtState *s, uint32_t offset, uint32_t data);
+
+    /**
+     * Check if watchdog can generate system reset
+     *
+     * @return true if watchdog can generate system reset
+     */
+    bool (*can_reset_system)(AwWdtState *s);
+
+    /**
+     * Check if provided key is valid
+     *
+     * @value: value written to register
+     * @return true if key is valid, false otherwise
+     */
+    bool (*is_key_valid)(AwWdtState *s, uint32_t val);
+
+    /**
+     * Get current INTV_VALUE setting
+     *
+     * @return current INTV_VALUE (0-15)
+     */
+    uint8_t (*get_intv_value)(AwWdtState *s);
+};
+
+#endif /* HW_WATCHDOG_ALLWINNER_WDT_H */
diff --git a/include/hw/watchdog/cmsdk-apb-watchdog.h b/include/hw/watchdog/cmsdk-apb-watchdog.h
new file mode 100644 (file)
index 0000000..c6b3e78
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * ARM CMSDK APB watchdog emulation
+ *
+ * Copyright (c) 2018 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the "APB watchdog" which is part of the Cortex-M
+ * System Design Kit (CMSDK) and documented in the Cortex-M System
+ * Design Kit Technical Reference Manual (ARM DDI0479C):
+ * https://developer.arm.com/products/system-design/system-design-kits/cortex-m-system-design-kit
+ *
+ * QEMU interface:
+ *  + Clock input "WDOGCLK": clock for the watchdog's timer
+ *  + sysbus MMIO region 0: the register bank
+ *  + sysbus IRQ 0: watchdog interrupt
+ *
+ * In real hardware the watchdog's reset output is just a GPIO line
+ * which can then be masked by the board or treated as a simple interrupt.
+ * (For instance the IoTKit does this with the non-secure watchdog, so that
+ * secure code can control whether non-secure code can perform a system
+ * reset via its watchdog.) In QEMU, we just wire up the watchdog reset
+ * to watchdog_perform_action(), at least for the moment.
+ */
+
+#ifndef CMSDK_APB_WATCHDOG_H
+#define CMSDK_APB_WATCHDOG_H
+
+#include "hw/sysbus.h"
+#include "hw/ptimer.h"
+#include "hw/clock.h"
+#include "qom/object.h"
+
+#define TYPE_CMSDK_APB_WATCHDOG "cmsdk-apb-watchdog"
+OBJECT_DECLARE_SIMPLE_TYPE(CMSDKAPBWatchdog, CMSDK_APB_WATCHDOG)
+
+/*
+ * This shares the same struct (and cast macro) as the base
+ * cmsdk-apb-watchdog device.
+ */
+#define TYPE_LUMINARY_WATCHDOG "luminary-watchdog"
+
+struct CMSDKAPBWatchdog {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    qemu_irq wdogint;
+    bool is_luminary;
+    struct ptimer_state *timer;
+    Clock *wdogclk;
+
+    uint32_t control;
+    uint32_t intstatus;
+    uint32_t lock;
+    uint32_t itcr;
+    uint32_t itop;
+    uint32_t resetstatus;
+    const uint32_t *id;
+};
+
+#endif
diff --git a/include/hw/watchdog/sbsa_gwdt.h b/include/hw/watchdog/sbsa_gwdt.h
new file mode 100644 (file)
index 0000000..70b137d
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Linaro Limited
+ *
+ * Authors:
+ *  Shashi Mallela <shashi.mallela@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef WDT_SBSA_GWDT_H
+#define WDT_SBSA_GWDT_H
+
+#include "qemu/bitops.h"
+#include "hw/sysbus.h"
+#include "hw/irq.h"
+
+#define TYPE_WDT_SBSA "sbsa_gwdt"
+#define SBSA_GWDT(obj) \
+    OBJECT_CHECK(SBSA_GWDTState, (obj), TYPE_WDT_SBSA)
+#define SBSA_GWDT_CLASS(klass) \
+    OBJECT_CLASS_CHECK(SBSA_GWDTClass, (klass), TYPE_WDT_SBSA)
+#define SBSA_GWDT_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(SBSA_GWDTClass, (obj), TYPE_WDT_SBSA)
+
+/* SBSA Generic Watchdog register definitions */
+/* refresh frame */
+#define SBSA_GWDT_WRR       0x000
+
+/* control frame */
+#define SBSA_GWDT_WCS       0x000
+#define SBSA_GWDT_WOR       0x008
+#define SBSA_GWDT_WORU      0x00C
+#define SBSA_GWDT_WCV       0x010
+#define SBSA_GWDT_WCVU      0x014
+
+/* Watchdog Interface Identification Register */
+#define SBSA_GWDT_W_IIDR    0xFCC
+
+/* Watchdog Control and Status Register Bits */
+#define SBSA_GWDT_WCS_EN    BIT(0)
+#define SBSA_GWDT_WCS_WS0   BIT(1)
+#define SBSA_GWDT_WCS_WS1   BIT(2)
+
+#define SBSA_GWDT_WOR_MASK  0x0000FFFF
+
+/*
+ * Watchdog Interface Identification Register definition
+ * considering JEP106 code for ARM in Bits [11:0]
+ */
+#define SBSA_GWDT_ID        0x1043B
+
+/* 2 Separate memory regions for each of refresh & control register frames */
+#define SBSA_GWDT_RMMIO_SIZE 0x1000
+#define SBSA_GWDT_CMMIO_SIZE 0x1000
+
+#define SBSA_TIMER_FREQ      62500000 /* Hz */
+
+typedef struct SBSA_GWDTState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion rmmio;
+    MemoryRegion cmmio;
+    qemu_irq irq;
+
+    QEMUTimer *timer;
+
+    uint32_t id;
+    uint32_t wcs;
+    uint32_t worl;
+    uint32_t woru;
+    uint32_t wcvl;
+    uint32_t wcvu;
+} SBSA_GWDTState;
+
+#endif /* WDT_SBSA_GWDT_H */
diff --git a/include/hw/watchdog/wdt_aspeed.h b/include/hw/watchdog/wdt_aspeed.h
new file mode 100644 (file)
index 0000000..e90ef86
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * ASPEED Watchdog Controller
+ *
+ * Copyright (C) 2016-2017 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef WDT_ASPEED_H
+#define WDT_ASPEED_H
+
+#include "hw/misc/aspeed_scu.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_ASPEED_WDT "aspeed.wdt"
+OBJECT_DECLARE_TYPE(AspeedWDTState, AspeedWDTClass, ASPEED_WDT)
+#define TYPE_ASPEED_2400_WDT TYPE_ASPEED_WDT "-ast2400"
+#define TYPE_ASPEED_2500_WDT TYPE_ASPEED_WDT "-ast2500"
+#define TYPE_ASPEED_2600_WDT TYPE_ASPEED_WDT "-ast2600"
+#define TYPE_ASPEED_1030_WDT TYPE_ASPEED_WDT "-ast1030"
+
+#define ASPEED_WDT_REGS_MAX        (0x30 / 4)
+
+struct AspeedWDTState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    QEMUTimer *timer;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    uint32_t regs[ASPEED_WDT_REGS_MAX];
+
+    AspeedSCUState *scu;
+    uint32_t pclk_freq;
+};
+
+
+struct AspeedWDTClass {
+    SysBusDeviceClass parent_class;
+
+    uint32_t iosize;
+    uint32_t ext_pulse_width_mask;
+    uint32_t reset_ctrl_reg;
+    void (*reset_pulse)(AspeedWDTState *s, uint32_t property);
+    void (*wdt_reload)(AspeedWDTState *s);
+    uint64_t (*sanitize_ctrl)(uint64_t data);
+    uint32_t default_status;
+    uint32_t default_reload_value;
+};
+
+#endif /* WDT_ASPEED_H */
diff --git a/include/hw/watchdog/wdt_diag288.h b/include/hw/watchdog/wdt_diag288.h
new file mode 100644 (file)
index 0000000..f72c1d3
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef WDT_DIAG288_H
+#define WDT_DIAG288_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+#define TYPE_WDT_DIAG288 "diag288"
+typedef struct DIAG288Class DIAG288Class;
+typedef struct DIAG288State DIAG288State;
+DECLARE_OBJ_CHECKERS(DIAG288State, DIAG288Class,
+                     DIAG288, TYPE_WDT_DIAG288)
+
+#define WDT_DIAG288_INIT      0
+#define WDT_DIAG288_CHANGE    1
+#define WDT_DIAG288_CANCEL    2
+
+struct DIAG288State {
+    /*< private >*/
+    DeviceState parent_obj;
+    QEMUTimer *timer;
+    bool enabled;
+
+    /*< public >*/
+};
+
+struct DIAG288Class {
+    /*< private >*/
+    DeviceClass parent_class;
+
+    /*< public >*/
+    int (*handle_timer)(DIAG288State *dev,
+                        uint64_t func, uint64_t timeout);
+};
+
+#endif /* WDT_DIAG288_H */
diff --git a/include/hw/watchdog/wdt_imx2.h b/include/hw/watchdog/wdt_imx2.h
new file mode 100644 (file)
index 0000000..600a552
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017, Impinj, Inc.
+ *
+ * i.MX2 Watchdog IP block
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef WDT_IMX2_H
+#define WDT_IMX2_H
+
+#include "qemu/bitops.h"
+#include "hw/sysbus.h"
+#include "hw/irq.h"
+#include "hw/ptimer.h"
+#include "qom/object.h"
+
+#define TYPE_IMX2_WDT "imx2.wdt"
+OBJECT_DECLARE_SIMPLE_TYPE(IMX2WdtState, IMX2_WDT)
+
+enum IMX2WdtRegisters {
+    IMX2_WDT_WCR  = 0x0000, /* Control Register */
+    IMX2_WDT_WSR  = 0x0002, /* Service Register */
+    IMX2_WDT_WRSR = 0x0004, /* Reset Status Register */
+    IMX2_WDT_WICR = 0x0006, /* Interrupt Control Register */
+    IMX2_WDT_WMCR = 0x0008, /* Misc Register */
+};
+
+#define IMX2_WDT_MMIO_SIZE 0x000a
+
+/* Control Register definitions */
+#define IMX2_WDT_WCR_WT         (0xFF << 8) /* Watchdog Timeout Field */
+#define IMX2_WDT_WCR_WDW        BIT(7)      /* WDOG Disable for Wait */
+#define IMX2_WDT_WCR_WDA        BIT(5)      /* WDOG Assertion */
+#define IMX2_WDT_WCR_SRS        BIT(4)      /* Software Reset Signal */
+#define IMX2_WDT_WCR_WDT        BIT(3)      /* WDOG Timeout Assertion */
+#define IMX2_WDT_WCR_WDE        BIT(2)      /* Watchdog Enable */
+#define IMX2_WDT_WCR_WDBG       BIT(1)      /* Watchdog Debug Enable */
+#define IMX2_WDT_WCR_WDZST      BIT(0)      /* Watchdog Timer Suspend */
+
+#define IMX2_WDT_WCR_LOCK_MASK  (IMX2_WDT_WCR_WDZST | IMX2_WDT_WCR_WDBG \
+                                 | IMX2_WDT_WCR_WDW)
+
+/* Service Register definitions */
+#define IMX2_WDT_SEQ1           0x5555      /* service sequence 1 */
+#define IMX2_WDT_SEQ2           0xAAAA      /* service sequence 2 */
+
+/* Reset Status Register definitions */
+#define IMX2_WDT_WRSR_TOUT      BIT(1)      /* Reset due to Timeout */
+#define IMX2_WDT_WRSR_SFTW      BIT(0)      /* Reset due to software reset */
+
+/* Interrupt Control Register definitions */
+#define IMX2_WDT_WICR_WIE       BIT(15)     /* Interrupt Enable */
+#define IMX2_WDT_WICR_WTIS      BIT(14)     /* Interrupt Status */
+#define IMX2_WDT_WICR_WICT      0xff        /* Interrupt Timeout */
+#define IMX2_WDT_WICR_WICT_DEF  0x04        /* Default interrupt timeout (2s) */
+
+#define IMX2_WDT_WICR_LOCK_MASK (IMX2_WDT_WICR_WIE | IMX2_WDT_WICR_WICT)
+
+/* Misc Control Register definitions */
+#define IMX2_WDT_WMCR_PDE       BIT(0)      /* Power-Down Enable */
+
+struct IMX2WdtState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion mmio;
+    qemu_irq irq;
+
+    struct ptimer_state *timer;
+    struct ptimer_state *itimer;
+
+    bool pretimeout_support;
+    bool wicr_locked;
+
+    uint16_t wcr;
+    uint16_t wsr;
+    uint16_t wrsr;
+    uint16_t wicr;
+    uint16_t wmcr;
+
+    bool wcr_locked;            /* affects WDZST, WDBG, and WDW */
+    bool wcr_wde_locked;        /* affects WDE */
+    bool wcr_wdt_locked;        /* affects WDT (never cleared) */
+};
+
+#endif /* WDT_IMX2_H */
diff --git a/include/hw/xen/arch_hvm.h b/include/hw/xen/arch_hvm.h
new file mode 100644 (file)
index 0000000..c7c5152
--- /dev/null
@@ -0,0 +1,5 @@
+#if defined(TARGET_I386) || defined(TARGET_X86_64)
+#include "hw/i386/xen_arch_hvm.h"
+#elif defined(TARGET_ARM) || defined(TARGET_ARM_64)
+#include "hw/arm/xen_arch_hvm.h"
+#endif
diff --git a/include/hw/xen/interface/arch-arm.h b/include/hw/xen/interface/arch-arm.h
new file mode 100644 (file)
index 0000000..1528ced
--- /dev/null
@@ -0,0 +1,509 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * arch-arm.h
+ *
+ * Guest OS interface to ARM Xen.
+ *
+ * Copyright 2011 (C) Citrix Systems
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_ARM_H__
+#define __XEN_PUBLIC_ARCH_ARM_H__
+
+/*
+ * `incontents 50 arm_abi Hypercall Calling Convention
+ *
+ * A hypercall is issued using the ARM HVC instruction.
+ *
+ * A hypercall can take up to 5 arguments. These are passed in
+ * registers, the first argument in x0/r0 (for arm64/arm32 guests
+ * respectively irrespective of whether the underlying hypervisor is
+ * 32- or 64-bit), the second argument in x1/r1, the third in x2/r2,
+ * the forth in x3/r3 and the fifth in x4/r4.
+ *
+ * The hypercall number is passed in r12 (arm) or x16 (arm64). In both
+ * cases the relevant ARM procedure calling convention specifies this
+ * is an inter-procedure-call scratch register (e.g. for use in linker
+ * stubs). This use does not conflict with use during a hypercall.
+ *
+ * The HVC ISS must contain a Xen specific TAG: XEN_HYPERCALL_TAG.
+ *
+ * The return value is in x0/r0.
+ *
+ * The hypercall will clobber x16/r12 and the argument registers used
+ * by that hypercall (except r0 which is the return value) i.e. in
+ * addition to x16/r12 a 2 argument hypercall will clobber x1/r1 and a
+ * 4 argument hypercall will clobber x1/r1, x2/r2 and x3/r3.
+ *
+ * Parameter structs passed to hypercalls are laid out according to
+ * the Procedure Call Standard for the ARM Architecture (AAPCS, AKA
+ * EABI) and Procedure Call Standard for the ARM 64-bit Architecture
+ * (AAPCS64). Where there is a conflict the 64-bit standard should be
+ * used regardless of guest type. Structures which are passed as
+ * hypercall arguments are always little endian.
+ *
+ * All memory which is shared with other entities in the system
+ * (including the hypervisor and other guests) must reside in memory
+ * which is mapped as Normal Inner Write-Back Outer Write-Back Inner-Shareable.
+ * This applies to:
+ *  - hypercall arguments passed via a pointer to guest memory.
+ *  - memory shared via the grant table mechanism (including PV I/O
+ *    rings etc).
+ *  - memory shared with the hypervisor (struct shared_info, struct
+ *    vcpu_info, the grant table, etc).
+ *
+ * Any cache allocation hints are acceptable.
+ */
+
+/*
+ * `incontents 55 arm_hcall Supported Hypercalls
+ *
+ * Xen on ARM makes extensive use of hardware facilities and therefore
+ * only a subset of the potential hypercalls are required.
+ *
+ * Since ARM uses second stage paging any machine/physical addresses
+ * passed to hypercalls are Guest Physical Addresses (Intermediate
+ * Physical Addresses) unless otherwise noted.
+ *
+ * The following hypercalls (and sub operations) are supported on the
+ * ARM platform. Other hypercalls should be considered
+ * unavailable/unsupported.
+ *
+ *  HYPERVISOR_memory_op
+ *   All generic sub-operations
+ *
+ *  HYPERVISOR_domctl
+ *   All generic sub-operations, with the exception of:
+ *    * XEN_DOMCTL_irq_permission (not yet implemented)
+ *
+ *  HYPERVISOR_sched_op
+ *   All generic sub-operations, with the exception of:
+ *    * SCHEDOP_block -- prefer wfi hardware instruction
+ *
+ *  HYPERVISOR_console_io
+ *   All generic sub-operations
+ *
+ *  HYPERVISOR_xen_version
+ *   All generic sub-operations
+ *
+ *  HYPERVISOR_event_channel_op
+ *   All generic sub-operations
+ *
+ *  HYPERVISOR_physdev_op
+ *   Exactly these sub-operations are supported:
+ *   PHYSDEVOP_pci_device_add
+ *   PHYSDEVOP_pci_device_remove
+ *
+ *  HYPERVISOR_sysctl
+ *   All generic sub-operations, with the exception of:
+ *    * XEN_SYSCTL_page_offline_op
+ *    * XEN_SYSCTL_get_pmstat
+ *    * XEN_SYSCTL_pm_op
+ *
+ *  HYPERVISOR_hvm_op
+ *   Exactly these sub-operations are supported:
+ *    * HVMOP_set_param
+ *    * HVMOP_get_param
+ *
+ *  HYPERVISOR_grant_table_op
+ *   All generic sub-operations
+ *
+ *  HYPERVISOR_vcpu_op
+ *   Exactly these sub-operations are supported:
+ *    * VCPUOP_register_vcpu_info
+ *    * VCPUOP_register_runstate_memory_area
+ *
+ *  HYPERVISOR_argo_op
+ *   All generic sub-operations
+ *
+ * Other notes on the ARM ABI:
+ *
+ * - struct start_info is not exported to ARM guests.
+ *
+ * - struct shared_info is mapped by ARM guests using the
+ *   HYPERVISOR_memory_op sub-op XENMEM_add_to_physmap, passing
+ *   XENMAPSPACE_shared_info as space parameter.
+ *
+ * - All the per-cpu struct vcpu_info are mapped by ARM guests using the
+ *   HYPERVISOR_vcpu_op sub-op VCPUOP_register_vcpu_info, including cpu0
+ *   struct vcpu_info.
+ *
+ * - The grant table is mapped using the HYPERVISOR_memory_op sub-op
+ *   XENMEM_add_to_physmap, passing XENMAPSPACE_grant_table as space
+ *   parameter. The memory range specified under the Xen compatible
+ *   hypervisor node on device tree can be used as target gpfn for the
+ *   mapping.
+ *
+ * - Xenstore is initialized by using the two hvm_params
+ *   HVM_PARAM_STORE_PFN and HVM_PARAM_STORE_EVTCHN. They can be read
+ *   with the HYPERVISOR_hvm_op sub-op HVMOP_get_param.
+ *
+ * - The paravirtualized console is initialized by using the two
+ *   hvm_params HVM_PARAM_CONSOLE_PFN and HVM_PARAM_CONSOLE_EVTCHN. They
+ *   can be read with the HYPERVISOR_hvm_op sub-op HVMOP_get_param.
+ *
+ * - Event channel notifications are delivered using the percpu GIC
+ *   interrupt specified under the Xen compatible hypervisor node on
+ *   device tree.
+ *
+ * - The device tree Xen compatible node is fully described under Linux
+ *   at Documentation/devicetree/bindings/arm/xen.txt.
+ */
+
+#define XEN_HYPERCALL_TAG   0XEA1
+
+#define  int64_aligned_t  int64_t __attribute__((aligned(8)))
+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))
+
+#ifndef __ASSEMBLY__
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type)                  \
+    typedef union { type *p; unsigned long q; }                 \
+        __guest_handle_ ## name;                                \
+    typedef union { type *p; uint64_aligned_t q; }              \
+        __guest_handle_64_ ## name
+
+/*
+ * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
+ * in a struct in memory. On ARM is always 8 bytes sizes and 8 bytes
+ * aligned.
+ * XEN_GUEST_HANDLE_PARAM represents a guest pointer, when passed as an
+ * hypercall argument. It is 4 bytes on aarch32 and 8 bytes on aarch64.
+ */
+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
+    ___DEFINE_XEN_GUEST_HANDLE(name, type);   \
+    ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
+#define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
+#define __XEN_GUEST_HANDLE(name)        __guest_handle_64_ ## name
+#define XEN_GUEST_HANDLE(name)          __XEN_GUEST_HANDLE(name)
+#define XEN_GUEST_HANDLE_PARAM(name)    __guest_handle_ ## name
+#define set_xen_guest_handle_raw(hnd, val)                  \
+    do {                                                    \
+        __typeof__(&(hnd)) _sxghr_tmp = &(hnd);             \
+        _sxghr_tmp->q = 0;                                  \
+        _sxghr_tmp->p = val;                                \
+    } while ( 0 )
+#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)
+
+typedef uint64_t xen_pfn_t;
+#define PRI_xen_pfn PRIx64
+#define PRIu_xen_pfn PRIu64
+
+/*
+ * Maximum number of virtual CPUs in legacy multi-processor guests.
+ * Only one. All other VCPUS must use VCPUOP_register_vcpu_info.
+ */
+#define XEN_LEGACY_MAX_VCPUS 1
+
+typedef uint64_t xen_ulong_t;
+#define PRI_xen_ulong PRIx64
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+/* Anonymous union includes both 32- and 64-bit names (e.g., r0/x0). */
+# define __DECL_REG(n64, n32) union {          \
+        uint64_t n64;                          \
+        uint32_t n32;                          \
+    }
+#else
+/* Non-gcc sources must always use the proper 64-bit name (e.g., x0). */
+#define __DECL_REG(n64, n32) uint64_t n64
+#endif
+
+struct vcpu_guest_core_regs
+{
+    /*         Aarch64       Aarch32 */
+    __DECL_REG(x0,           r0_usr);
+    __DECL_REG(x1,           r1_usr);
+    __DECL_REG(x2,           r2_usr);
+    __DECL_REG(x3,           r3_usr);
+    __DECL_REG(x4,           r4_usr);
+    __DECL_REG(x5,           r5_usr);
+    __DECL_REG(x6,           r6_usr);
+    __DECL_REG(x7,           r7_usr);
+    __DECL_REG(x8,           r8_usr);
+    __DECL_REG(x9,           r9_usr);
+    __DECL_REG(x10,          r10_usr);
+    __DECL_REG(x11,          r11_usr);
+    __DECL_REG(x12,          r12_usr);
+
+    __DECL_REG(x13,          sp_usr);
+    __DECL_REG(x14,          lr_usr);
+
+    __DECL_REG(x15,          __unused_sp_hyp);
+
+    __DECL_REG(x16,          lr_irq);
+    __DECL_REG(x17,          sp_irq);
+
+    __DECL_REG(x18,          lr_svc);
+    __DECL_REG(x19,          sp_svc);
+
+    __DECL_REG(x20,          lr_abt);
+    __DECL_REG(x21,          sp_abt);
+
+    __DECL_REG(x22,          lr_und);
+    __DECL_REG(x23,          sp_und);
+
+    __DECL_REG(x24,          r8_fiq);
+    __DECL_REG(x25,          r9_fiq);
+    __DECL_REG(x26,          r10_fiq);
+    __DECL_REG(x27,          r11_fiq);
+    __DECL_REG(x28,          r12_fiq);
+
+    __DECL_REG(x29,          sp_fiq);
+    __DECL_REG(x30,          lr_fiq);
+
+    /* Return address and mode */
+    __DECL_REG(pc64,         pc32);             /* ELR_EL2 */
+    uint64_t cpsr;                              /* SPSR_EL2 */
+
+    union {
+        uint64_t spsr_el1;       /* AArch64 */
+        uint32_t spsr_svc;       /* AArch32 */
+    };
+
+    /* AArch32 guests only */
+    uint32_t spsr_fiq, spsr_irq, spsr_und, spsr_abt;
+
+    /* AArch64 guests only */
+    uint64_t sp_el0;
+    uint64_t sp_el1, elr_el1;
+};
+typedef struct vcpu_guest_core_regs vcpu_guest_core_regs_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_core_regs_t);
+
+#undef __DECL_REG
+
+struct vcpu_guest_context {
+#define _VGCF_online                   0
+#define VGCF_online                    (1<<_VGCF_online)
+    uint32_t flags;                         /* VGCF_* */
+
+    struct vcpu_guest_core_regs user_regs;  /* Core CPU registers */
+
+    uint64_t sctlr;
+    uint64_t ttbcr, ttbr0, ttbr1;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
+
+/*
+ * struct xen_arch_domainconfig's ABI is covered by
+ * XEN_DOMCTL_INTERFACE_VERSION.
+ */
+#define XEN_DOMCTL_CONFIG_GIC_NATIVE    0
+#define XEN_DOMCTL_CONFIG_GIC_V2        1
+#define XEN_DOMCTL_CONFIG_GIC_V3        2
+
+#define XEN_DOMCTL_CONFIG_TEE_NONE      0
+#define XEN_DOMCTL_CONFIG_TEE_OPTEE     1
+
+struct xen_arch_domainconfig {
+    /* IN/OUT */
+    uint8_t gic_version;
+    /* IN */
+    uint16_t tee_type;
+    /* IN */
+    uint32_t nr_spis;
+    /*
+     * OUT
+     * Based on the property clock-frequency in the DT timer node.
+     * The property may be present when the bootloader/firmware doesn't
+     * set correctly CNTFRQ which hold the timer frequency.
+     *
+     * As it's not possible to trap this register, we have to replicate
+     * the value in the guest DT.
+     *
+     * = 0 => property not present
+     * > 0 => Value of the property
+     *
+     */
+    uint32_t clock_frequency;
+};
+#endif /* __XEN__ || __XEN_TOOLS__ */
+
+struct arch_vcpu_info {
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+struct arch_shared_info {
+};
+typedef struct arch_shared_info arch_shared_info_t;
+typedef uint64_t xen_callback_t;
+
+#endif
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+/* PSR bits (CPSR, SPSR) */
+
+#define PSR_THUMB       (1<<5)        /* Thumb Mode enable */
+#define PSR_FIQ_MASK    (1<<6)        /* Fast Interrupt mask */
+#define PSR_IRQ_MASK    (1<<7)        /* Interrupt mask */
+#define PSR_ABT_MASK    (1<<8)        /* Asynchronous Abort mask */
+#define PSR_BIG_ENDIAN  (1<<9)        /* arm32: Big Endian Mode */
+#define PSR_DBG_MASK    (1<<9)        /* arm64: Debug Exception mask */
+#define PSR_IT_MASK     (0x0600fc00)  /* Thumb If-Then Mask */
+#define PSR_JAZELLE     (1<<24)       /* Jazelle Mode */
+#define PSR_Z           (1<<30)       /* Zero condition flag */
+
+/* 32 bit modes */
+#define PSR_MODE_USR 0x10
+#define PSR_MODE_FIQ 0x11
+#define PSR_MODE_IRQ 0x12
+#define PSR_MODE_SVC 0x13
+#define PSR_MODE_MON 0x16
+#define PSR_MODE_ABT 0x17
+#define PSR_MODE_HYP 0x1a
+#define PSR_MODE_UND 0x1b
+#define PSR_MODE_SYS 0x1f
+
+/* 64 bit modes */
+#define PSR_MODE_BIT  0x10 /* Set iff AArch32 */
+#define PSR_MODE_EL3h 0x0d
+#define PSR_MODE_EL3t 0x0c
+#define PSR_MODE_EL2h 0x09
+#define PSR_MODE_EL2t 0x08
+#define PSR_MODE_EL1h 0x05
+#define PSR_MODE_EL1t 0x04
+#define PSR_MODE_EL0t 0x00
+
+/*
+ * We set PSR_Z to be able to boot Linux kernel versions with an invalid
+ * encoding of the first 8 NOP instructions. See commit a92882a4d270 in
+ * Linux.
+ *
+ * Note that PSR_Z is also set by U-Boot and QEMU -kernel when loading
+ * zImage kernels on aarch32.
+ */
+#define PSR_GUEST32_INIT (PSR_Z|PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC)
+#define PSR_GUEST64_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_EL1h)
+
+#define SCTLR_GUEST_INIT    xen_mk_ullong(0x00c50078)
+
+/*
+ * Virtual machine platform (memory layout, interrupts)
+ *
+ * These are defined for consistency between the tools and the
+ * hypervisor. Guests must not rely on these hardcoded values but
+ * should instead use the FDT.
+ */
+
+/* Physical Address Space */
+
+/* Virtio MMIO mappings */
+#define GUEST_VIRTIO_MMIO_BASE   xen_mk_ullong(0x02000000)
+#define GUEST_VIRTIO_MMIO_SIZE   xen_mk_ullong(0x00100000)
+
+/*
+ * vGIC mappings: Only one set of mapping is used by the guest.
+ * Therefore they can overlap.
+ */
+
+/* vGIC v2 mappings */
+#define GUEST_GICD_BASE   xen_mk_ullong(0x03001000)
+#define GUEST_GICD_SIZE   xen_mk_ullong(0x00001000)
+#define GUEST_GICC_BASE   xen_mk_ullong(0x03002000)
+#define GUEST_GICC_SIZE   xen_mk_ullong(0x00002000)
+
+/* vGIC v3 mappings */
+#define GUEST_GICV3_GICD_BASE      xen_mk_ullong(0x03001000)
+#define GUEST_GICV3_GICD_SIZE      xen_mk_ullong(0x00010000)
+
+#define GUEST_GICV3_RDIST_REGIONS  1
+
+#define GUEST_GICV3_GICR0_BASE     xen_mk_ullong(0x03020000) /* vCPU0..127 */
+#define GUEST_GICV3_GICR0_SIZE     xen_mk_ullong(0x01000000)
+
+/*
+ * 256 MB is reserved for VPCI configuration space based on calculation
+ * 256 buses x 32 devices x 8 functions x 4 KB = 256 MB
+ */
+#define GUEST_VPCI_ECAM_BASE    xen_mk_ullong(0x10000000)
+#define GUEST_VPCI_ECAM_SIZE    xen_mk_ullong(0x10000000)
+
+/* ACPI tables physical address */
+#define GUEST_ACPI_BASE xen_mk_ullong(0x20000000)
+#define GUEST_ACPI_SIZE xen_mk_ullong(0x02000000)
+
+/* PL011 mappings */
+#define GUEST_PL011_BASE    xen_mk_ullong(0x22000000)
+#define GUEST_PL011_SIZE    xen_mk_ullong(0x00001000)
+
+/* Guest PCI-PCIe memory space where config space and BAR will be available.*/
+#define GUEST_VPCI_ADDR_TYPE_MEM            xen_mk_ullong(0x02000000)
+#define GUEST_VPCI_MEM_ADDR                 xen_mk_ullong(0x23000000)
+#define GUEST_VPCI_MEM_SIZE                 xen_mk_ullong(0x10000000)
+
+/*
+ * 16MB == 4096 pages reserved for guest to use as a region to map its
+ * grant table in.
+ */
+#define GUEST_GNTTAB_BASE xen_mk_ullong(0x38000000)
+#define GUEST_GNTTAB_SIZE xen_mk_ullong(0x01000000)
+
+#define GUEST_MAGIC_BASE  xen_mk_ullong(0x39000000)
+#define GUEST_MAGIC_SIZE  xen_mk_ullong(0x01000000)
+
+#define GUEST_RAM_BANKS   2
+
+/*
+ * The way to find the extended regions (to be exposed to the guest as unused
+ * address space) relies on the fact that the regions reserved for the RAM
+ * below are big enough to also accommodate such regions.
+ */
+#define GUEST_RAM0_BASE   xen_mk_ullong(0x40000000) /* 3GB of low RAM @ 1GB */
+#define GUEST_RAM0_SIZE   xen_mk_ullong(0xc0000000)
+
+/* 4GB @ 4GB Prefetch Memory for VPCI */
+#define GUEST_VPCI_ADDR_TYPE_PREFETCH_MEM   xen_mk_ullong(0x42000000)
+#define GUEST_VPCI_PREFETCH_MEM_ADDR        xen_mk_ullong(0x100000000)
+#define GUEST_VPCI_PREFETCH_MEM_SIZE        xen_mk_ullong(0x100000000)
+
+#define GUEST_RAM1_BASE   xen_mk_ullong(0x0200000000) /* 1016GB of RAM @ 8GB */
+#define GUEST_RAM1_SIZE   xen_mk_ullong(0xfe00000000)
+
+#define GUEST_RAM_BASE    GUEST_RAM0_BASE /* Lowest RAM address */
+/* Largest amount of actual RAM, not including holes */
+#define GUEST_RAM_MAX     (GUEST_RAM0_SIZE + GUEST_RAM1_SIZE)
+/* Suitable for e.g. const uint64_t ramfoo[] = GUEST_RAM_BANK_FOOS; */
+#define GUEST_RAM_BANK_BASES   { GUEST_RAM0_BASE, GUEST_RAM1_BASE }
+#define GUEST_RAM_BANK_SIZES   { GUEST_RAM0_SIZE, GUEST_RAM1_SIZE }
+
+/* Current supported guest VCPUs */
+#define GUEST_MAX_VCPUS 128
+
+/* Interrupts */
+#define GUEST_TIMER_VIRT_PPI    27
+#define GUEST_TIMER_PHYS_S_PPI  29
+#define GUEST_TIMER_PHYS_NS_PPI 30
+#define GUEST_EVTCHN_PPI        31
+
+#define GUEST_VPL011_SPI        32
+
+#define GUEST_VIRTIO_MMIO_SPI_FIRST   33
+#define GUEST_VIRTIO_MMIO_SPI_LAST    43
+
+/* PSCI functions */
+#define PSCI_cpu_suspend 0
+#define PSCI_cpu_off     1
+#define PSCI_cpu_on      2
+#define PSCI_migrate     3
+
+#endif
+
+#ifndef __ASSEMBLY__
+/* Stub definition of PMU structure */
+typedef struct xen_pmu_arch { uint8_t dummy; } xen_pmu_arch_t;
+#endif
+
+#endif /*  __XEN_PUBLIC_ARCH_ARM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/arch-x86/cpuid.h b/include/hw/xen/interface/arch-x86/cpuid.h
new file mode 100644 (file)
index 0000000..7ecd16a
--- /dev/null
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * arch-x86/cpuid.h
+ *
+ * CPUID interface to Xen.
+ *
+ * Copyright (c) 2007 Citrix Systems, Inc.
+ *
+ * Authors:
+ *    Keir Fraser <keir@xen.org>
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__
+#define __XEN_PUBLIC_ARCH_X86_CPUID_H__
+
+/*
+ * For compatibility with other hypervisor interfaces, the Xen cpuid leaves
+ * can be found at the first otherwise unused 0x100 aligned boundary starting
+ * from 0x40000000.
+ *
+ * e.g If viridian extensions are enabled for an HVM domain, the Xen cpuid
+ * leaves will start at 0x40000100
+ */
+
+#define XEN_CPUID_FIRST_LEAF 0x40000000
+#define XEN_CPUID_LEAF(i)    (XEN_CPUID_FIRST_LEAF + (i))
+
+/*
+ * Leaf 1 (0x40000x00)
+ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX
+ *      are supported by the Xen host.
+ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification
+ *      of a Xen host.
+ */
+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
+
+/*
+ * Leaf 2 (0x40000x01)
+ * EAX[31:16]: Xen major version.
+ * EAX[15: 0]: Xen minor version.
+ * EBX-EDX: Reserved (currently all zeroes).
+ */
+
+/*
+ * Leaf 3 (0x40000x02)
+ * EAX: Number of hypercall transfer pages. This register is always guaranteed
+ *      to specify one hypercall page.
+ * EBX: Base address of Xen-specific MSRs.
+ * ECX: Features 1. Unused bits are set to zero.
+ * EDX: Features 2. Unused bits are set to zero.
+ */
+
+/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */
+#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
+#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
+
+/*
+ * Leaf 4 (0x40000x03)
+ * Sub-leaf 0: EAX: bit 0: emulated tsc
+ *                  bit 1: host tsc is known to be reliable
+ *                  bit 2: RDTSCP instruction available
+ *             EBX: tsc_mode: 0=default (emulate if necessary), 1=emulate,
+ *                            2=no emulation, 3=no emulation + TSC_AUX support
+ *             ECX: guest tsc frequency in kHz
+ *             EDX: guest tsc incarnation (migration count)
+ * Sub-leaf 1: EAX: tsc offset low part
+ *             EBX: tsc offset high part
+ *             ECX: multiplicator for tsc->ns conversion
+ *             EDX: shift amount for tsc->ns conversion
+ * Sub-leaf 2: EAX: host tsc frequency in kHz
+ */
+
+/*
+ * Leaf 5 (0x40000x04)
+ * HVM-specific features
+ * Sub-leaf 0: EAX: Features
+ * Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
+ * Sub-leaf 0: ECX: domain id (iff EAX has XEN_HVM_CPUID_DOMID_PRESENT flag)
+ */
+#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */
+#define XEN_HVM_CPUID_X2APIC_VIRT      (1u << 1) /* Virtualized x2APIC accesses */
+/* Memory mapped from other domains has valid IOMMU entries */
+#define XEN_HVM_CPUID_IOMMU_MAPPINGS   (1u << 2)
+#define XEN_HVM_CPUID_VCPU_ID_PRESENT  (1u << 3) /* vcpu id is present in EBX */
+#define XEN_HVM_CPUID_DOMID_PRESENT    (1u << 4) /* domid is present in ECX */
+/*
+ * With interrupt format set to 0 (non-remappable) bits 55:49 from the
+ * IO-APIC RTE and bits 11:5 from the MSI address can be used to store
+ * high bits for the Destination ID. This expands the Destination ID
+ * field from 8 to 15 bits, allowing to target APIC IDs up 32768.
+ */
+#define XEN_HVM_CPUID_EXT_DEST_ID      (1u << 5)
+/*
+ * Per-vCPU event channel upcalls work correctly with physical IRQs
+ * bound to event channels.
+ */
+#define XEN_HVM_CPUID_UPCALL_VECTOR    (1u << 6)
+
+/*
+ * Leaf 6 (0x40000x05)
+ * PV-specific parameters
+ * Sub-leaf 0: EAX: max available sub-leaf
+ * Sub-leaf 0: EBX: bits 0-7: max machine address width
+ */
+
+/* Max. address width in bits taking memory hotplug into account. */
+#define XEN_CPUID_MACHINE_ADDRESS_WIDTH_MASK (0xffu << 0)
+
+#define XEN_CPUID_MAX_NUM_LEAVES 5
+
+#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
diff --git a/include/hw/xen/interface/arch-x86/xen-x86_32.h b/include/hw/xen/interface/arch-x86/xen-x86_32.h
new file mode 100644 (file)
index 0000000..139438e
--- /dev/null
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * xen-x86_32.h
+ *
+ * Guest OS interface to x86 32-bit Xen.
+ *
+ * Copyright (c) 2004-2007, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__
+
+/*
+ * Hypercall interface:
+ *  Input:  %ebx, %ecx, %edx, %esi, %edi, %ebp (arguments 1-6)
+ *  Output: %eax
+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):
+ *  call hypercall_page + hypercall-number * 32
+ * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx)
+ */
+
+/*
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+#define FLAT_RING1_CS 0xe019    /* GDT index 259 */
+#define FLAT_RING1_DS 0xe021    /* GDT index 260 */
+#define FLAT_RING1_SS 0xe021    /* GDT index 260 */
+#define FLAT_RING3_CS 0xe02b    /* GDT index 261 */
+#define FLAT_RING3_DS 0xe033    /* GDT index 262 */
+#define FLAT_RING3_SS 0xe033    /* GDT index 262 */
+
+#define FLAT_KERNEL_CS FLAT_RING1_CS
+#define FLAT_KERNEL_DS FLAT_RING1_DS
+#define FLAT_KERNEL_SS FLAT_RING1_SS
+#define FLAT_USER_CS    FLAT_RING3_CS
+#define FLAT_USER_DS    FLAT_RING3_DS
+#define FLAT_USER_SS    FLAT_RING3_SS
+
+#define __HYPERVISOR_VIRT_START_PAE    0xF5800000
+#define __MACH2PHYS_VIRT_START_PAE     0xF5800000
+#define __MACH2PHYS_VIRT_END_PAE       0xF6800000
+#define HYPERVISOR_VIRT_START_PAE      xen_mk_ulong(__HYPERVISOR_VIRT_START_PAE)
+#define MACH2PHYS_VIRT_START_PAE       xen_mk_ulong(__MACH2PHYS_VIRT_START_PAE)
+#define MACH2PHYS_VIRT_END_PAE         xen_mk_ulong(__MACH2PHYS_VIRT_END_PAE)
+
+/* Non-PAE bounds are obsolete. */
+#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000
+#define __MACH2PHYS_VIRT_START_NONPAE  0xFC000000
+#define __MACH2PHYS_VIRT_END_NONPAE    0xFC400000
+#define HYPERVISOR_VIRT_START_NONPAE   \
+    xen_mk_ulong(__HYPERVISOR_VIRT_START_NONPAE)
+#define MACH2PHYS_VIRT_START_NONPAE    \
+    xen_mk_ulong(__MACH2PHYS_VIRT_START_NONPAE)
+#define MACH2PHYS_VIRT_END_NONPAE      \
+    xen_mk_ulong(__MACH2PHYS_VIRT_END_NONPAE)
+
+#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE
+#define __MACH2PHYS_VIRT_START  __MACH2PHYS_VIRT_START_PAE
+#define __MACH2PHYS_VIRT_END    __MACH2PHYS_VIRT_END_PAE
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START xen_mk_ulong(__HYPERVISOR_VIRT_START)
+#endif
+
+#define MACH2PHYS_VIRT_START  xen_mk_ulong(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    xen_mk_ulong(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START)
+#endif
+
+/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+#undef ___DEFINE_XEN_GUEST_HANDLE
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type)                  \
+    typedef struct { type *p; }                                 \
+        __guest_handle_ ## name;                                \
+    typedef struct { union { type *p; uint64_aligned_t q; }; }  \
+        __guest_handle_64_ ## name
+#undef set_xen_guest_handle_raw
+#define set_xen_guest_handle_raw(hnd, val)                  \
+    do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0;   \
+         (hnd).p = val;                                     \
+    } while ( 0 )
+#define  int64_aligned_t  int64_t __attribute__((aligned(8)))
+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))
+#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name
+#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name)
+#endif
+
+#ifndef __ASSEMBLY__
+
+#if defined(XEN_GENERATING_COMPAT_HEADERS)
+/* nothing */
+#elif defined(__XEN__) || defined(__XEN_TOOLS__)
+/* Anonymous unions include all permissible names (e.g., al/ah/ax/eax). */
+#define __DECL_REG_LO8(which) union { \
+    uint32_t e ## which ## x; \
+    uint16_t which ## x; \
+    struct { \
+        uint8_t which ## l; \
+        uint8_t which ## h; \
+    }; \
+}
+#define __DECL_REG_LO16(name) union { \
+    uint32_t e ## name, _e ## name; \
+    uint16_t name; \
+}
+#else
+/* Other sources must always use the proper 32-bit name (e.g., eax). */
+#define __DECL_REG_LO8(which) uint32_t e ## which ## x
+#define __DECL_REG_LO16(name) uint32_t e ## name
+#endif
+
+struct cpu_user_regs {
+    __DECL_REG_LO8(b);
+    __DECL_REG_LO8(c);
+    __DECL_REG_LO8(d);
+    __DECL_REG_LO16(si);
+    __DECL_REG_LO16(di);
+    __DECL_REG_LO16(bp);
+    __DECL_REG_LO8(a);
+    uint16_t error_code;    /* private */
+    uint16_t entry_vector;  /* private */
+    __DECL_REG_LO16(ip);
+    uint16_t cs;
+    uint8_t  saved_upcall_mask;
+    uint8_t  _pad0;
+    __DECL_REG_LO16(flags); /* eflags.IF == !saved_upcall_mask */
+    __DECL_REG_LO16(sp);
+    uint16_t ss, _pad1;
+    uint16_t es, _pad2;
+    uint16_t ds, _pad3;
+    uint16_t fs, _pad4;
+    uint16_t gs, _pad5;
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
+
+#undef __DECL_REG_LO8
+#undef __DECL_REG_LO16
+
+/*
+ * Page-directory addresses above 4GB do not fit into architectural %cr3.
+ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
+ * must use the following accessor macros to pack/unpack valid MFNs.
+ */
+#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
+#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
+
+struct arch_vcpu_info {
+    unsigned long cr2;
+    unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+struct xen_callback {
+    unsigned long cs;
+    unsigned long eip;
+};
+typedef struct xen_callback xen_callback_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/arch-x86/xen-x86_64.h b/include/hw/xen/interface/arch-x86/xen-x86_64.h
new file mode 100644 (file)
index 0000000..5d9035e
--- /dev/null
@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * xen-x86_64.h
+ *
+ * Guest OS interface to x86 64-bit Xen.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__
+
+/*
+ * Hypercall interface:
+ *  Input:  %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6)
+ *  Output: %rax
+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):
+ *  call hypercall_page + hypercall-number * 32
+ * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi)
+ */
+
+/*
+ * 64-bit segment selectors
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+
+#define FLAT_RING3_CS32 0xe023  /* GDT index 260 */
+#define FLAT_RING3_CS64 0xe033  /* GDT index 262 */
+#define FLAT_RING3_DS32 0xe02b  /* GDT index 261 */
+#define FLAT_RING3_DS64 0x0000  /* NULL selector */
+#define FLAT_RING3_SS32 0xe02b  /* GDT index 261 */
+#define FLAT_RING3_SS64 0xe02b  /* GDT index 261 */
+
+#define FLAT_KERNEL_DS64 FLAT_RING3_DS64
+#define FLAT_KERNEL_DS32 FLAT_RING3_DS32
+#define FLAT_KERNEL_DS   FLAT_KERNEL_DS64
+#define FLAT_KERNEL_CS64 FLAT_RING3_CS64
+#define FLAT_KERNEL_CS32 FLAT_RING3_CS32
+#define FLAT_KERNEL_CS   FLAT_KERNEL_CS64
+#define FLAT_KERNEL_SS64 FLAT_RING3_SS64
+#define FLAT_KERNEL_SS32 FLAT_RING3_SS32
+#define FLAT_KERNEL_SS   FLAT_KERNEL_SS64
+
+#define FLAT_USER_DS64 FLAT_RING3_DS64
+#define FLAT_USER_DS32 FLAT_RING3_DS32
+#define FLAT_USER_DS   FLAT_USER_DS64
+#define FLAT_USER_CS64 FLAT_RING3_CS64
+#define FLAT_USER_CS32 FLAT_RING3_CS32
+#define FLAT_USER_CS   FLAT_USER_CS64
+#define FLAT_USER_SS64 FLAT_RING3_SS64
+#define FLAT_USER_SS32 FLAT_RING3_SS32
+#define FLAT_USER_SS   FLAT_USER_SS64
+
+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
+#define __HYPERVISOR_VIRT_END   0xFFFF880000000000
+#define __MACH2PHYS_VIRT_START  0xFFFF800000000000
+#define __MACH2PHYS_VIRT_END    0xFFFF804000000000
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START xen_mk_ulong(__HYPERVISOR_VIRT_START)
+#define HYPERVISOR_VIRT_END   xen_mk_ulong(__HYPERVISOR_VIRT_END)
+#endif
+
+#define MACH2PHYS_VIRT_START  xen_mk_ulong(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    xen_mk_ulong(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
+/*
+ * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
+ *  @which == SEGBASE_*  ;  @base == 64-bit base address
+ * Returns 0 on success.
+ */
+#define SEGBASE_FS          0
+#define SEGBASE_GS_USER     1
+#define SEGBASE_GS_KERNEL   2
+#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
+
+/*
+ * int HYPERVISOR_iret(void)
+ * All arguments are on the kernel stack, in the following format.
+ * Never returns if successful. Current kernel context is lost.
+ * The saved CS is mapped as follows:
+ *   RING0 -> RING3 kernel mode.
+ *   RING1 -> RING3 kernel mode.
+ *   RING2 -> RING3 kernel mode.
+ *   RING3 -> RING3 user mode.
+ * However RING0 indicates that the guest kernel should return to iteself
+ * directly with
+ *      orb   $3,1*8(%rsp)
+ *      iretq
+ * If flags contains VGCF_in_syscall:
+ *   Restore RAX, RIP, RFLAGS, RSP.
+ *   Discard R11, RCX, CS, SS.
+ * Otherwise:
+ *   Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
+ * All other registers are saved on hypercall entry and restored to user.
+ */
+/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
+#define _VGCF_in_syscall 8
+#define VGCF_in_syscall  (1<<_VGCF_in_syscall)
+#define VGCF_IN_SYSCALL  VGCF_in_syscall
+
+#ifndef __ASSEMBLY__
+
+struct iret_context {
+    /* Top of stack (%rsp at point of hypercall). */
+    uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
+    /* Bottom of iret stack frame. */
+};
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+/* Anonymous unions include all permissible names (e.g., al/ah/ax/eax/rax). */
+#define __DECL_REG_LOHI(which) union { \
+    uint64_t r ## which ## x; \
+    uint32_t e ## which ## x; \
+    uint16_t which ## x; \
+    struct { \
+        uint8_t which ## l; \
+        uint8_t which ## h; \
+    }; \
+}
+#define __DECL_REG_LO8(name) union { \
+    uint64_t r ## name; \
+    uint32_t e ## name; \
+    uint16_t name; \
+    uint8_t name ## l; \
+}
+#define __DECL_REG_LO16(name) union { \
+    uint64_t r ## name; \
+    uint32_t e ## name; \
+    uint16_t name; \
+}
+#define __DECL_REG_HI(num) union { \
+    uint64_t r ## num; \
+    uint32_t r ## num ## d; \
+    uint16_t r ## num ## w; \
+    uint8_t r ## num ## b; \
+}
+#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)
+/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
+#define __DECL_REG(name) union { \
+    uint64_t r ## name, e ## name; \
+    uint32_t _e ## name; \
+}
+#else
+/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */
+#define __DECL_REG(name) uint64_t r ## name
+#endif
+
+#ifndef __DECL_REG_LOHI
+#define __DECL_REG_LOHI(name) __DECL_REG(name ## x)
+#define __DECL_REG_LO8        __DECL_REG
+#define __DECL_REG_LO16       __DECL_REG
+#define __DECL_REG_HI(num)    uint64_t r ## num
+#endif
+
+struct cpu_user_regs {
+    __DECL_REG_HI(15);
+    __DECL_REG_HI(14);
+    __DECL_REG_HI(13);
+    __DECL_REG_HI(12);
+    __DECL_REG_LO8(bp);
+    __DECL_REG_LOHI(b);
+    __DECL_REG_HI(11);
+    __DECL_REG_HI(10);
+    __DECL_REG_HI(9);
+    __DECL_REG_HI(8);
+    __DECL_REG_LOHI(a);
+    __DECL_REG_LOHI(c);
+    __DECL_REG_LOHI(d);
+    __DECL_REG_LO8(si);
+    __DECL_REG_LO8(di);
+    uint32_t error_code;    /* private */
+    uint32_t entry_vector;  /* private */
+    __DECL_REG_LO16(ip);
+    uint16_t cs, _pad0[1];
+    uint8_t  saved_upcall_mask;
+    uint8_t  _pad1[3];
+    __DECL_REG_LO16(flags); /* rflags.IF == !saved_upcall_mask */
+    __DECL_REG_LO8(sp);
+    uint16_t ss, _pad2[3];
+    uint16_t es, _pad3[3];
+    uint16_t ds, _pad4[3];
+    uint16_t fs, _pad5[3];
+    uint16_t gs, _pad6[3];
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
+
+#undef __DECL_REG
+#undef __DECL_REG_LOHI
+#undef __DECL_REG_LO8
+#undef __DECL_REG_LO16
+#undef __DECL_REG_HI
+
+#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
+#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
+
+struct arch_vcpu_info {
+    unsigned long cr2;
+    unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+typedef unsigned long xen_callback_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/arch-x86/xen.h b/include/hw/xen/interface/arch-x86/xen.h
new file mode 100644 (file)
index 0000000..c0f4551
--- /dev/null
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * arch-x86/xen.h
+ *
+ * Guest OS interface to x86 Xen.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#include "../xen.h"
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_H__
+
+/* Structural guest handles introduced in 0x00030201. */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030201
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+    typedef struct { type *p; } __guest_handle_ ## name
+#else
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+    typedef type * __guest_handle_ ## name
+#endif
+
+/*
+ * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
+ * in a struct in memory.
+ * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an
+ * hypercall argument.
+ * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but
+ * they might not be on other architectures.
+ */
+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
+    ___DEFINE_XEN_GUEST_HANDLE(name, type);   \
+    ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
+#define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
+#define __XEN_GUEST_HANDLE(name)        __guest_handle_ ## name
+#define XEN_GUEST_HANDLE(name)          __XEN_GUEST_HANDLE(name)
+#define XEN_GUEST_HANDLE_PARAM(name)    XEN_GUEST_HANDLE(name)
+#define set_xen_guest_handle_raw(hnd, val)  do { (hnd).p = val; } while (0)
+#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)
+
+#if defined(__i386__)
+# ifdef __XEN__
+__DeFiNe__ __DECL_REG_LO8(which) uint32_t e ## which ## x
+__DeFiNe__ __DECL_REG_LO16(name) union { uint32_t e ## name; }
+# endif
+#include "xen-x86_32.h"
+# ifdef __XEN__
+__UnDeF__ __DECL_REG_LO8
+__UnDeF__ __DECL_REG_LO16
+__DeFiNe__ __DECL_REG_LO8(which) e ## which ## x
+__DeFiNe__ __DECL_REG_LO16(name) e ## name
+# endif
+#elif defined(__x86_64__)
+#include "xen-x86_64.h"
+#endif
+
+#ifndef __ASSEMBLY__
+typedef unsigned long xen_pfn_t;
+#define PRI_xen_pfn "lx"
+#define PRIu_xen_pfn "lu"
+#endif
+
+#define XEN_HAVE_PV_GUEST_ENTRY 1
+
+#define XEN_HAVE_PV_UPCALL_MASK 1
+
+/*
+ * `incontents 200 segdesc Segment Descriptor Tables
+ */
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries);
+ * `
+ */
+/*
+ * A number of GDT entries are reserved by Xen. These are not situated at the
+ * start of the GDT because some stupid OSes export hard-coded selector values
+ * in their ABI. These hard-coded values are always near the start of the GDT,
+ * so Xen places itself out of the way, at the far end of the GDT.
+ *
+ * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op
+ */
+#define FIRST_RESERVED_GDT_PAGE  14
+#define FIRST_RESERVED_GDT_BYTE  (FIRST_RESERVED_GDT_PAGE * 4096)
+#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
+
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc);
+ * `
+ * ` @pa   The machine physical address of the descriptor to
+ * `       update. Must be either a descriptor page or writable.
+ * ` @desc The descriptor value to update, in the same format as a
+ * `       native descriptor table entry.
+ */
+
+/* Maximum number of virtual CPUs in legacy multi-processor guests. */
+#define XEN_LEGACY_MAX_VCPUS 32
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long xen_ulong_t;
+#define PRI_xen_ulong "lx"
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp);
+ * `
+ * Sets the stack segment and pointer for the current vcpu.
+ */
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]);
+ * `
+ */
+/*
+ * Send an array of these to HYPERVISOR_set_trap_table().
+ * Terminate the array with a sentinel entry, with traps[].address==0.
+ * The privilege level specifies which modes may enter a trap via a software
+ * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
+ * privilege levels as follows:
+ *  Level == 0: Noone may enter
+ *  Level == 1: Kernel may enter
+ *  Level == 2: Kernel may enter
+ *  Level == 3: Everyone may enter
+ *
+ * Note: For compatibility with kernels not setting up exception handlers
+ *       early enough, Xen will avoid trying to inject #GP (and hence crash
+ *       the domain) when an RDMSR would require this, but no handler was
+ *       set yet. The precise conditions are implementation specific, and
+ *       new code may not rely on such behavior anyway.
+ */
+#define TI_GET_DPL(_ti)      ((_ti)->flags & 3)
+#define TI_GET_IF(_ti)       ((_ti)->flags & 4)
+#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
+#define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))
+struct trap_info {
+    uint8_t       vector;  /* exception vector                              */
+    uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
+    uint16_t      cs;      /* code selector                                 */
+    unsigned long address; /* code offset                                   */
+};
+typedef struct trap_info trap_info_t;
+DEFINE_XEN_GUEST_HANDLE(trap_info_t);
+
+typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+/*
+ * The following is all CPU context. Note that the fpu_ctxt block is filled
+ * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
+ *
+ * Also note that when calling DOMCTL_setvcpucontext for HVM guests, not all
+ * information in this structure is updated, the fields read include: fpu_ctxt
+ * (if VGCT_I387_VALID is set), flags, user_regs and debugreg[*].
+ *
+ * Note: VCPUOP_initialise for HVM guests is non-symetric with
+ * DOMCTL_setvcpucontext, and uses struct vcpu_hvm_context from hvm/hvm_vcpu.h
+ */
+struct vcpu_guest_context {
+    /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
+    struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
+#define VGCF_I387_VALID                (1<<0)
+#define VGCF_IN_KERNEL                 (1<<2)
+#define _VGCF_i387_valid               0
+#define VGCF_i387_valid                (1<<_VGCF_i387_valid)
+#define _VGCF_in_kernel                2
+#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
+#define _VGCF_syscall_disables_events  4
+#define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)
+#define _VGCF_online                   5
+#define VGCF_online                    (1<<_VGCF_online)
+    unsigned long flags;                    /* VGCF_* flags                 */
+    struct cpu_user_regs user_regs;         /* User-level CPU registers     */
+    struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
+    unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
+    unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
+    unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */
+    /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
+    unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */
+    unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
+#ifdef __i386__
+    unsigned long event_callback_cs;        /* CS:EIP of event callback     */
+    unsigned long event_callback_eip;
+    unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */
+    unsigned long failsafe_callback_eip;
+#else
+    unsigned long event_callback_eip;
+    unsigned long failsafe_callback_eip;
+#ifdef __XEN__
+    union {
+        unsigned long syscall_callback_eip;
+        struct {
+            unsigned int event_callback_cs;    /* compat CS of event cb     */
+            unsigned int failsafe_callback_cs; /* compat CS of failsafe cb  */
+        };
+    };
+#else
+    unsigned long syscall_callback_eip;
+#endif
+#endif
+    unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
+#ifdef __x86_64__
+    /* Segment base addresses. */
+    uint64_t      fs_base;
+    uint64_t      gs_base_kernel;
+    uint64_t      gs_base_user;
+#endif
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
+
+struct arch_shared_info {
+    /*
+     * Number of valid entries in the p2m table(s) anchored at
+     * pfn_to_mfn_frame_list_list and/or p2m_vaddr.
+     */
+    unsigned long max_pfn;
+    /*
+     * Frame containing list of mfns containing list of mfns containing p2m.
+     * A value of 0 indicates it has not yet been set up, ~0 indicates it has
+     * been set to invalid e.g. due to the p2m being too large for the 3-level
+     * p2m tree. In this case the linear mapper p2m list anchored at p2m_vaddr
+     * is to be used.
+     */
+    xen_pfn_t     pfn_to_mfn_frame_list_list;
+    unsigned long nmi_reason;
+    /*
+     * Following three fields are valid if p2m_cr3 contains a value different
+     * from 0.
+     * p2m_cr3 is the root of the address space where p2m_vaddr is valid.
+     * p2m_cr3 is in the same format as a cr3 value in the vcpu register state
+     * and holds the folded machine frame number (via xen_pfn_to_cr3) of a
+     * L3 or L4 page table.
+     * p2m_vaddr holds the virtual address of the linear p2m list. All entries
+     * in the range [0...max_pfn[ are accessible via this pointer.
+     * p2m_generation will be incremented by the guest before and after each
+     * change of the mappings of the p2m list. p2m_generation starts at 0 and
+     * a value with the least significant bit set indicates that a mapping
+     * update is in progress. This allows guest external software (e.g. in Dom0)
+     * to verify that read mappings are consistent and whether they have changed
+     * since the last check.
+     * Modifying a p2m element in the linear p2m list is allowed via an atomic
+     * write only.
+     */
+    unsigned long p2m_cr3;         /* cr3 value of the p2m address space */
+    unsigned long p2m_vaddr;       /* virtual address of the p2m list */
+    unsigned long p2m_generation;  /* generation count of p2m mapping */
+#ifdef __i386__
+    /* There's no room for this field in the generic structure. */
+    uint32_t wc_sec_hi;
+#endif
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+/*
+ * struct xen_arch_domainconfig's ABI is covered by
+ * XEN_DOMCTL_INTERFACE_VERSION.
+ */
+struct xen_arch_domainconfig {
+#define _XEN_X86_EMU_LAPIC          0
+#define XEN_X86_EMU_LAPIC           (1U<<_XEN_X86_EMU_LAPIC)
+#define _XEN_X86_EMU_HPET           1
+#define XEN_X86_EMU_HPET            (1U<<_XEN_X86_EMU_HPET)
+#define _XEN_X86_EMU_PM             2
+#define XEN_X86_EMU_PM              (1U<<_XEN_X86_EMU_PM)
+#define _XEN_X86_EMU_RTC            3
+#define XEN_X86_EMU_RTC             (1U<<_XEN_X86_EMU_RTC)
+#define _XEN_X86_EMU_IOAPIC         4
+#define XEN_X86_EMU_IOAPIC          (1U<<_XEN_X86_EMU_IOAPIC)
+#define _XEN_X86_EMU_PIC            5
+#define XEN_X86_EMU_PIC             (1U<<_XEN_X86_EMU_PIC)
+#define _XEN_X86_EMU_VGA            6
+#define XEN_X86_EMU_VGA             (1U<<_XEN_X86_EMU_VGA)
+#define _XEN_X86_EMU_IOMMU          7
+#define XEN_X86_EMU_IOMMU           (1U<<_XEN_X86_EMU_IOMMU)
+#define _XEN_X86_EMU_PIT            8
+#define XEN_X86_EMU_PIT             (1U<<_XEN_X86_EMU_PIT)
+#define _XEN_X86_EMU_USE_PIRQ       9
+#define XEN_X86_EMU_USE_PIRQ        (1U<<_XEN_X86_EMU_USE_PIRQ)
+#define _XEN_X86_EMU_VPCI           10
+#define XEN_X86_EMU_VPCI            (1U<<_XEN_X86_EMU_VPCI)
+
+#define XEN_X86_EMU_ALL             (XEN_X86_EMU_LAPIC | XEN_X86_EMU_HPET |  \
+                                     XEN_X86_EMU_PM | XEN_X86_EMU_RTC |      \
+                                     XEN_X86_EMU_IOAPIC | XEN_X86_EMU_PIC |  \
+                                     XEN_X86_EMU_VGA | XEN_X86_EMU_IOMMU |   \
+                                     XEN_X86_EMU_PIT | XEN_X86_EMU_USE_PIRQ |\
+                                     XEN_X86_EMU_VPCI)
+    uint32_t emulation_flags;
+
+/*
+ * Select whether to use a relaxed behavior for accesses to MSRs not explicitly
+ * handled by Xen instead of injecting a #GP to the guest. Note this option
+ * doesn't allow the guest to read or write to the underlying MSR.
+ */
+#define XEN_X86_MSR_RELAXED (1u << 0)
+    uint32_t misc_flags;
+};
+
+/* Max  XEN_X86_* constant. Used for ABI checking. */
+#define XEN_X86_MISC_FLAGS_MAX XEN_X86_MSR_RELAXED
+
+#endif
+
+/*
+ * Representations of architectural CPUID and MSR information.  Used as the
+ * serialised version of Xen's internal representation.
+ */
+typedef struct xen_cpuid_leaf {
+#define XEN_CPUID_NO_SUBLEAF 0xffffffffu
+    uint32_t leaf, subleaf;
+    uint32_t a, b, c, d;
+} xen_cpuid_leaf_t;
+DEFINE_XEN_GUEST_HANDLE(xen_cpuid_leaf_t);
+
+typedef struct xen_msr_entry {
+    uint32_t idx;
+    uint32_t flags; /* Reserved MBZ. */
+    uint64_t val;
+} xen_msr_entry_t;
+DEFINE_XEN_GUEST_HANDLE(xen_msr_entry_t);
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_fpu_taskswitch(int set);
+ * `
+ * Sets (if set!=0) or clears (if set==0) CR0.TS.
+ */
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_set_debugreg(int regno, unsigned long value);
+ *
+ * ` unsigned long
+ * ` HYPERVISOR_get_debugreg(int regno);
+ * For 0<=reg<=7, returns the debug register value.
+ * For other values of reg, returns ((unsigned long)-EINVAL).
+ * (Unfortunately, this interface is defective.)
+ */
+
+/*
+ * Prefix forces emulation of some non-trapping instructions.
+ * Currently only CPUID.
+ */
+#ifdef __ASSEMBLY__
+#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
+#define XEN_CPUID          XEN_EMULATE_PREFIX cpuid
+#else
+#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
+#define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
+#endif
+
+/*
+ * Debug console IO port, also called "port E9 hack". Each character written
+ * to this IO port will be printed on the hypervisor console, subject to log
+ * level restrictions.
+ */
+#define XEN_HVM_DEBUGCONS_IOPORT 0xe9
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/event_channel.h b/include/hw/xen/interface/event_channel.h
new file mode 100644 (file)
index 0000000..0d91a1c
--- /dev/null
@@ -0,0 +1,371 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * event_channel.h
+ *
+ * Event channels between domains.
+ *
+ * Copyright (c) 2003-2004, K A Fraser.
+ */
+
+#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
+#define __XEN_PUBLIC_EVENT_CHANNEL_H__
+
+#include "xen.h"
+
+/*
+ * `incontents 150 evtchn Event Channels
+ *
+ * Event channels are the basic primitive provided by Xen for event
+ * notifications. An event is the Xen equivalent of a hardware
+ * interrupt. They essentially store one bit of information, the event
+ * of interest is signalled by transitioning this bit from 0 to 1.
+ *
+ * Notifications are received by a guest via an upcall from Xen,
+ * indicating when an event arrives (setting the bit). Further
+ * notifications are masked until the bit is cleared again (therefore,
+ * guests must check the value of the bit after re-enabling event
+ * delivery to ensure no missed notifications).
+ *
+ * Event notifications can be masked by setting a flag; this is
+ * equivalent to disabling interrupts and can be used to ensure
+ * atomicity of certain operations in the guest kernel.
+ *
+ * Event channels are represented by the evtchn_* fields in
+ * struct shared_info and struct vcpu_info.
+ */
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args)
+ * `
+ * @cmd  == EVTCHNOP_* (event-channel operation).
+ * @args == struct evtchn_* Operation-specific extra arguments (NULL if none).
+ */
+
+/* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */
+#define EVTCHNOP_bind_interdomain 0
+#define EVTCHNOP_bind_virq        1
+#define EVTCHNOP_bind_pirq        2
+#define EVTCHNOP_close            3
+#define EVTCHNOP_send             4
+#define EVTCHNOP_status           5
+#define EVTCHNOP_alloc_unbound    6
+#define EVTCHNOP_bind_ipi         7
+#define EVTCHNOP_bind_vcpu        8
+#define EVTCHNOP_unmask           9
+#define EVTCHNOP_reset           10
+#define EVTCHNOP_init_control    11
+#define EVTCHNOP_expand_array    12
+#define EVTCHNOP_set_priority    13
+#ifdef __XEN__
+#define EVTCHNOP_reset_cont      14
+#endif
+/* ` } */
+
+typedef uint32_t evtchn_port_t;
+DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);
+
+/*
+ * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
+ * accepting interdomain bindings from domain <remote_dom>. A fresh port
+ * is allocated in <dom> and returned as <port>.
+ * NOTES:
+ *  1. If the caller is unprivileged then <dom> must be DOMID_SELF.
+ *  2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
+ */
+struct evtchn_alloc_unbound {
+    /* IN parameters */
+    domid_t dom, remote_dom;
+    /* OUT parameters */
+    evtchn_port_t port;
+};
+typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t;
+
+/*
+ * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
+ * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify
+ * a port that is unbound and marked as accepting bindings from the calling
+ * domain. A fresh port is allocated in the calling domain and returned as
+ * <local_port>.
+ *
+ * In case the peer domain has already tried to set our event channel
+ * pending, before it was bound, EVTCHNOP_bind_interdomain always sets
+ * the local event channel pending.
+ *
+ * The usual pattern of use, in the guest's upcall (or subsequent
+ * handler) is as follows: (Re-enable the event channel for subsequent
+ * signalling and then) check for the existence of whatever condition
+ * is being waited for by other means, and take whatever action is
+ * needed (if any).
+ *
+ * NOTES:
+ *  1. <remote_dom> may be DOMID_SELF, allowing loopback connections.
+ */
+struct evtchn_bind_interdomain {
+    /* IN parameters. */
+    domid_t remote_dom;
+    evtchn_port_t remote_port;
+    /* OUT parameters. */
+    evtchn_port_t local_port;
+};
+typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t;
+
+/*
+ * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
+ * vcpu.
+ * NOTES:
+ *  1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list
+ *     in xen.h for the classification of each VIRQ.
+ *  2. Global VIRQs must be allocated on VCPU0 but can subsequently be
+ *     re-bound via EVTCHNOP_bind_vcpu.
+ *  3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu.
+ *     The allocated event channel is bound to the specified vcpu and the
+ *     binding cannot be changed.
+ */
+struct evtchn_bind_virq {
+    /* IN parameters. */
+    uint32_t virq; /* enum virq */
+    uint32_t vcpu;
+    /* OUT parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_bind_virq evtchn_bind_virq_t;
+
+/*
+ * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ <irq>).
+ * NOTES:
+ *  1. A physical IRQ may be bound to at most one event channel per domain.
+ *  2. Only a sufficiently-privileged domain may bind to a physical IRQ.
+ */
+struct evtchn_bind_pirq {
+    /* IN parameters. */
+    uint32_t pirq;
+#define BIND_PIRQ__WILL_SHARE 1
+    uint32_t flags; /* BIND_PIRQ__* */
+    /* OUT parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_bind_pirq evtchn_bind_pirq_t;
+
+/*
+ * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
+ * NOTES:
+ *  1. The allocated event channel is bound to the specified vcpu. The binding
+ *     may not be changed.
+ */
+struct evtchn_bind_ipi {
+    uint32_t vcpu;
+    /* OUT parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_bind_ipi evtchn_bind_ipi_t;
+
+/*
+ * EVTCHNOP_close: Close a local event channel <port>. If the channel is
+ * interdomain then the remote end is placed in the unbound state
+ * (EVTCHNSTAT_unbound), awaiting a new connection.
+ */
+struct evtchn_close {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_close evtchn_close_t;
+
+/*
+ * EVTCHNOP_send: Send an event to the remote end of the channel whose local
+ * endpoint is <port>.
+ */
+struct evtchn_send {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_send evtchn_send_t;
+
+/*
+ * EVTCHNOP_status: Get the current status of the communication channel which
+ * has an endpoint at <dom, port>.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may obtain the status of an event
+ *     channel for which <dom> is not DOMID_SELF.
+ */
+struct evtchn_status {
+    /* IN parameters */
+    domid_t  dom;
+    evtchn_port_t port;
+    /* OUT parameters */
+#define EVTCHNSTAT_closed       0  /* Channel is not in use.                 */
+#define EVTCHNSTAT_unbound      1  /* Channel is waiting interdom connection.*/
+#define EVTCHNSTAT_interdomain  2  /* Channel is connected to remote domain. */
+#define EVTCHNSTAT_pirq         3  /* Channel is bound to a phys IRQ line.   */
+#define EVTCHNSTAT_virq         4  /* Channel is bound to a virtual IRQ line */
+#define EVTCHNSTAT_ipi          5  /* Channel is bound to a virtual IPI line */
+    uint32_t status;
+    uint32_t vcpu;                 /* VCPU to which this channel is bound.   */
+    union {
+        struct {
+            domid_t dom;
+        } unbound;                 /* EVTCHNSTAT_unbound */
+        struct {
+            domid_t dom;
+            evtchn_port_t port;
+        } interdomain;             /* EVTCHNSTAT_interdomain */
+        uint32_t pirq;             /* EVTCHNSTAT_pirq        */
+        uint32_t virq;             /* EVTCHNSTAT_virq        */
+    } u;
+};
+typedef struct evtchn_status evtchn_status_t;
+
+/*
+ * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
+ * event is pending.
+ * NOTES:
+ *  1. IPI-bound channels always notify the vcpu specified at bind time.
+ *     This binding cannot be changed.
+ *  2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time.
+ *     This binding cannot be changed.
+ *  3. All other channels notify vcpu0 by default. This default is set when
+ *     the channel is allocated (a port that is freed and subsequently reused
+ *     has its binding reset to vcpu0).
+ */
+struct evtchn_bind_vcpu {
+    /* IN parameters. */
+    evtchn_port_t port;
+    uint32_t vcpu;
+};
+typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t;
+
+/*
+ * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
+ * a notification to the appropriate VCPU if an event is pending.
+ */
+struct evtchn_unmask {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_unmask evtchn_unmask_t;
+
+/*
+ * EVTCHNOP_reset: Close all event channels associated with specified domain.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.
+ *  3. Destroys all control blocks and event array, resets event channel
+ *     operations to 2-level ABI if called with <dom> == DOMID_SELF and FIFO
+ *     ABI was used. Guests should not bind events during EVTCHNOP_reset call
+ *     as these events are likely to be lost.
+ */
+struct evtchn_reset {
+    /* IN parameters. */
+    domid_t dom;
+};
+typedef struct evtchn_reset evtchn_reset_t;
+
+/*
+ * EVTCHNOP_init_control: initialize the control block for the FIFO ABI.
+ *
+ * Note: any events that are currently pending will not be resent and
+ * will be lost.  Guests should call this before binding any event to
+ * avoid losing any events.
+ */
+struct evtchn_init_control {
+    /* IN parameters. */
+    uint64_t control_gfn;
+    uint32_t offset;
+    uint32_t vcpu;
+    /* OUT parameters. */
+    uint8_t link_bits;
+    uint8_t _pad[7];
+};
+typedef struct evtchn_init_control evtchn_init_control_t;
+
+/*
+ * EVTCHNOP_expand_array: add an additional page to the event array.
+ */
+struct evtchn_expand_array {
+    /* IN parameters. */
+    uint64_t array_gfn;
+};
+typedef struct evtchn_expand_array evtchn_expand_array_t;
+
+/*
+ * EVTCHNOP_set_priority: set the priority for an event channel.
+ */
+struct evtchn_set_priority {
+    /* IN parameters. */
+    evtchn_port_t port;
+    uint32_t priority;
+};
+typedef struct evtchn_set_priority evtchn_set_priority_t;
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op)
+ * `
+ * Superceded by new event_channel_op() hypercall since 0x00030202.
+ */
+struct evtchn_op {
+    uint32_t cmd; /* enum event_channel_op */
+    union {
+        evtchn_alloc_unbound_t    alloc_unbound;
+        evtchn_bind_interdomain_t bind_interdomain;
+        evtchn_bind_virq_t        bind_virq;
+        evtchn_bind_pirq_t        bind_pirq;
+        evtchn_bind_ipi_t         bind_ipi;
+        evtchn_close_t            close;
+        evtchn_send_t             send;
+        evtchn_status_t           status;
+        evtchn_bind_vcpu_t        bind_vcpu;
+        evtchn_unmask_t           unmask;
+    } u;
+};
+typedef struct evtchn_op evtchn_op_t;
+DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);
+
+/*
+ * 2-level ABI
+ */
+
+#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
+
+/*
+ * FIFO ABI
+ */
+
+/* Events may have priorities from 0 (highest) to 15 (lowest). */
+#define EVTCHN_FIFO_PRIORITY_MAX     0
+#define EVTCHN_FIFO_PRIORITY_DEFAULT 7
+#define EVTCHN_FIFO_PRIORITY_MIN     15
+
+#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1)
+
+typedef uint32_t event_word_t;
+
+#define EVTCHN_FIFO_PENDING 31
+#define EVTCHN_FIFO_MASKED  30
+#define EVTCHN_FIFO_LINKED  29
+#define EVTCHN_FIFO_BUSY    28
+
+#define EVTCHN_FIFO_LINK_BITS 17
+#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1)
+
+#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS)
+
+struct evtchn_fifo_control_block {
+    uint32_t ready;
+    uint32_t _rsvd;
+    uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
+};
+typedef struct evtchn_fifo_control_block evtchn_fifo_control_block_t;
+
+#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/features.h b/include/hw/xen/interface/features.h
new file mode 100644 (file)
index 0000000..d2a9175
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * features.h
+ *
+ * Feature flags, reported by XENVER_get_features.
+ *
+ * Copyright (c) 2006, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_FEATURES_H__
+#define __XEN_PUBLIC_FEATURES_H__
+
+/*
+ * `incontents 200 elfnotes_features XEN_ELFNOTE_FEATURES
+ *
+ * The list of all the features the guest supports. They are set by
+ * parsing the XEN_ELFNOTE_FEATURES and XEN_ELFNOTE_SUPPORTED_FEATURES
+ * string. The format is the  feature names (as given here without the
+ * "XENFEAT_" prefix) separated by '|' characters.
+ * If a feature is required for the kernel to function then the feature name
+ * must be preceded by a '!' character.
+ *
+ * Note that if XEN_ELFNOTE_SUPPORTED_FEATURES is used, then in the
+ * XENFEAT_dom0 MUST be set if the guest is to be booted as dom0,
+ */
+
+/*
+ * If set, the guest does not need to write-protect its pagetables, and can
+ * update them via direct writes.
+ */
+#define XENFEAT_writable_page_tables       0
+
+/*
+ * If set, the guest does not need to write-protect its segment descriptor
+ * tables, and can update them via direct writes.
+ */
+#define XENFEAT_writable_descriptor_tables 1
+
+/*
+ * If set, translation between the guest's 'pseudo-physical' address space
+ * and the host's machine address space are handled by the hypervisor. In this
+ * mode the guest does not need to perform phys-to/from-machine translations
+ * when performing page table operations.
+ */
+#define XENFEAT_auto_translated_physmap    2
+
+/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */
+#define XENFEAT_supervisor_mode_kernel     3
+
+/*
+ * If set, the guest does not need to allocate x86 PAE page directories
+ * below 4GB. This flag is usually implied by auto_translated_physmap.
+ */
+#define XENFEAT_pae_pgdir_above_4gb        4
+
+/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
+#define XENFEAT_mmu_pt_update_preserve_ad  5
+
+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */
+#define XENFEAT_highmem_assist             6
+
+/*
+ * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel
+ * available pte bits.
+ */
+#define XENFEAT_gnttab_map_avail_bits      7
+
+/* x86: Does this Xen host support the HVM callback vector type? */
+#define XENFEAT_hvm_callback_vector        8
+
+/* x86: pvclock algorithm is safe to use on HVM */
+#define XENFEAT_hvm_safe_pvclock           9
+
+/* x86: pirq can be used by HVM guests */
+#define XENFEAT_hvm_pirqs                 10
+
+/* operation as Dom0 is supported */
+#define XENFEAT_dom0                      11
+
+/* Xen also maps grant references at pfn = mfn.
+ * This feature flag is deprecated and should not be used.
+#define XENFEAT_grant_map_identity        12
+ */
+
+/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */
+#define XENFEAT_memory_op_vnode_supported 13
+
+/* arm: Hypervisor supports ARM SMC calling convention. */
+#define XENFEAT_ARM_SMCCC_supported       14
+
+/*
+ * x86/PVH: If set, ACPI RSDP can be placed at any address. Otherwise RSDP
+ * must be located in lower 1MB, as required by ACPI Specification for IA-PC
+ * systems.
+ * This feature flag is only consulted if XEN_ELFNOTE_GUEST_OS contains
+ * the "linux" string.
+ */
+#define XENFEAT_linux_rsdp_unrestricted   15
+
+/*
+ * A direct-mapped (or 1:1 mapped) domain is a domain for which its
+ * local pages have gfn == mfn. If a domain is direct-mapped,
+ * XENFEAT_direct_mapped is set; otherwise XENFEAT_not_direct_mapped
+ * is set.
+ *
+ * If neither flag is set (e.g. older Xen releases) the assumptions are:
+ * - not auto_translated domains (x86 only) are always direct-mapped
+ * - on x86, auto_translated domains are not direct-mapped
+ * - on ARM, Dom0 is direct-mapped, DomUs are not
+ */
+#define XENFEAT_not_direct_mapped         16
+#define XENFEAT_direct_mapped             17
+
+#define XENFEAT_NR_SUBMAPS 1
+
+#endif /* __XEN_PUBLIC_FEATURES_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/grant_table.h b/include/hw/xen/interface/grant_table.h
new file mode 100644 (file)
index 0000000..1dfa17a
--- /dev/null
@@ -0,0 +1,669 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * grant_table.h
+ *
+ * Interface for granting foreign access to page frames, and receiving
+ * page-ownership transfers.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
+#define __XEN_PUBLIC_GRANT_TABLE_H__
+
+#include "xen.h"
+
+/*
+ * `incontents 150 gnttab Grant Tables
+ *
+ * Xen's grant tables provide a generic mechanism to memory sharing
+ * between domains. This shared memory interface underpins the split
+ * device drivers for block and network IO.
+ *
+ * Each domain has its own grant table. This is a data structure that
+ * is shared with Xen; it allows the domain to tell Xen what kind of
+ * permissions other domains have on its pages. Entries in the grant
+ * table are identified by grant references. A grant reference is an
+ * integer, which indexes into the grant table. It acts as a
+ * capability which the grantee can use to perform operations on the
+ * granter's memory.
+ *
+ * This capability-based system allows shared-memory communications
+ * between unprivileged domains. A grant reference also encapsulates
+ * the details of a shared page, removing the need for a domain to
+ * know the real machine address of a page it is sharing. This makes
+ * it possible to share memory correctly with domains running in
+ * fully virtualised memory.
+ */
+
+/***********************************
+ * GRANT TABLE REPRESENTATION
+ */
+
+/* Some rough guidelines on accessing and updating grant-table entries
+ * in a concurrency-safe manner. For more information, Linux contains a
+ * reference implementation for guest OSes (drivers/xen/grant_table.c, see
+ * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD
+ *
+ * NB. WMB is a no-op on current-generation x86 processors. However, a
+ *     compiler barrier will still be required.
+ *
+ * Introducing a valid entry into the grant table:
+ *  1. Write ent->domid.
+ *  2. Write ent->frame:
+ *      GTF_permit_access:   Frame to which access is permitted.
+ *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
+ *                           frame, or zero if none.
+ *  3. Write memory barrier (WMB).
+ *  4. Write ent->flags, inc. valid type.
+ *
+ * Invalidating an unused GTF_permit_access entry:
+ *  1. flags = ent->flags.
+ *  2. Observe that !(flags & (GTF_reading|GTF_writing)).
+ *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ *  NB. No need for WMB as reuse of entry is control-dependent on success of
+ *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ *
+ * Invalidating an in-use GTF_permit_access entry:
+ *  This cannot be done directly. Request assistance from the domain controller
+ *  which can set a timeout on the use of a grant entry and take necessary
+ *  action. (NB. This is not yet implemented!).
+ *
+ * Invalidating an unused GTF_accept_transfer entry:
+ *  1. flags = ent->flags.
+ *  2. Observe that !(flags & GTF_transfer_committed). [*]
+ *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ *  NB. No need for WMB as reuse of entry is control-dependent on success of
+ *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ *  [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
+ *      The guest must /not/ modify the grant entry until the address of the
+ *      transferred frame is written. It is safe for the guest to spin waiting
+ *      for this to occur (detect by observing GTF_transfer_completed in
+ *      ent->flags).
+ *
+ * Invalidating a committed GTF_accept_transfer entry:
+ *  1. Wait for (ent->flags & GTF_transfer_completed).
+ *
+ * Changing a GTF_permit_access from writable to read-only:
+ *  Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
+ *
+ * Changing a GTF_permit_access from read-only to writable:
+ *  Use SMP-safe bit-setting instruction.
+ */
+
+/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
+/*
+ * A grant table comprises a packed array of grant entries in one or more
+ * page frames shared between Xen and a guest.
+ * [XEN]: This field is written by Xen and read by the sharing guest.
+ * [GST]: This field is written by the guest and read by Xen.
+ */
+
+/*
+ * Version 1 of the grant table entry structure is maintained largely for
+ * backwards compatibility.  New guests are recommended to support using
+ * version 2 to overcome version 1 limitations, but to default to version 1.
+ */
+#if __XEN_INTERFACE_VERSION__ < 0x0003020a
+#define grant_entry_v1 grant_entry
+#define grant_entry_v1_t grant_entry_t
+#endif
+struct grant_entry_v1 {
+    /* GTF_xxx: various type and flag information.  [XEN,GST] */
+    uint16_t flags;
+    /* The domain being granted foreign privileges. [GST] */
+    domid_t  domid;
+    /*
+     * GTF_permit_access: GFN that @domid is allowed to map and access. [GST]
+     * GTF_accept_transfer: GFN that @domid is allowed to transfer into. [GST]
+     * GTF_transfer_completed: MFN whose ownership transferred by @domid
+     *                         (non-translated guests only). [XEN]
+     */
+    uint32_t frame;
+};
+typedef struct grant_entry_v1 grant_entry_v1_t;
+
+/* The first few grant table entries will be preserved across grant table
+ * version changes and may be pre-populated at domain creation by tools.
+ */
+#define GNTTAB_NR_RESERVED_ENTRIES     8
+#define GNTTAB_RESERVED_CONSOLE        0
+#define GNTTAB_RESERVED_XENSTORE       1
+
+/*
+ * Type of grant entry.
+ *  GTF_invalid: This grant entry grants no privileges.
+ *  GTF_permit_access: Allow @domid to map/access @frame.
+ *  GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
+ *                       to this guest. Xen writes the page number to @frame.
+ *  GTF_transitive: Allow @domid to transitively access a subrange of
+ *                  @trans_grant in @trans_domid.  No mappings are allowed.
+ */
+#define GTF_invalid         (0U<<0)
+#define GTF_permit_access   (1U<<0)
+#define GTF_accept_transfer (2U<<0)
+#define GTF_transitive      (3U<<0)
+#define GTF_type_mask       (3U<<0)
+
+/*
+ * Subflags for GTF_permit_access and GTF_transitive.
+ *  GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
+ *  GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
+ *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ * Further subflags for GTF_permit_access only.
+ *  GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags to be used for
+ *                             mappings of the grant [GST]
+ *  GTF_sub_page: Grant access to only a subrange of the page.  @domid
+ *                will only be allowed to copy from the grant, and not
+ *                map it. [GST]
+ */
+#define _GTF_readonly       (2)
+#define GTF_readonly        (1U<<_GTF_readonly)
+#define _GTF_reading        (3)
+#define GTF_reading         (1U<<_GTF_reading)
+#define _GTF_writing        (4)
+#define GTF_writing         (1U<<_GTF_writing)
+#define _GTF_PWT            (5)
+#define GTF_PWT             (1U<<_GTF_PWT)
+#define _GTF_PCD            (6)
+#define GTF_PCD             (1U<<_GTF_PCD)
+#define _GTF_PAT            (7)
+#define GTF_PAT             (1U<<_GTF_PAT)
+#define _GTF_sub_page       (8)
+#define GTF_sub_page        (1U<<_GTF_sub_page)
+
+/*
+ * Subflags for GTF_accept_transfer:
+ *  GTF_transfer_committed: Xen sets this flag to indicate that it is committed
+ *      to transferring ownership of a page frame. When a guest sees this flag
+ *      it must /not/ modify the grant entry until GTF_transfer_completed is
+ *      set by Xen.
+ *  GTF_transfer_completed: It is safe for the guest to spin-wait on this flag
+ *      after reading GTF_transfer_committed. Xen will always write the frame
+ *      address, followed by ORing this flag, in a timely manner.
+ */
+#define _GTF_transfer_committed (2)
+#define GTF_transfer_committed  (1U<<_GTF_transfer_committed)
+#define _GTF_transfer_completed (3)
+#define GTF_transfer_completed  (1U<<_GTF_transfer_completed)
+
+/*
+ * Version 2 grant table entries.  These fulfil the same role as
+ * version 1 entries, but can represent more complicated operations.
+ * Any given domain will have either a version 1 or a version 2 table,
+ * and every entry in the table will be the same version.
+ *
+ * The interface by which domains use grant references does not depend
+ * on the grant table version in use by the other domain.
+ */
+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+/*
+ * Version 1 and version 2 grant entries share a common prefix.  The
+ * fields of the prefix are documented as part of struct
+ * grant_entry_v1.
+ */
+struct grant_entry_header {
+    uint16_t flags;
+    domid_t  domid;
+};
+typedef struct grant_entry_header grant_entry_header_t;
+
+/*
+ * Version 2 of the grant entry structure.
+ */
+union grant_entry_v2 {
+    grant_entry_header_t hdr;
+
+    /*
+     * This member is used for V1-style full page grants, where either:
+     *
+     * -- hdr.type is GTF_accept_transfer, or
+     * -- hdr.type is GTF_permit_access and GTF_sub_page is not set.
+     *
+     * In that case, the frame field has the same semantics as the
+     * field of the same name in the V1 entry structure.
+     */
+    struct {
+        grant_entry_header_t hdr;
+        uint32_t pad0;
+        uint64_t frame;
+    } full_page;
+
+    /*
+     * If the grant type is GTF_grant_access and GTF_sub_page is set,
+     * @domid is allowed to access bytes [@page_off,@page_off+@length)
+     * in frame @frame.
+     */
+    struct {
+        grant_entry_header_t hdr;
+        uint16_t page_off;
+        uint16_t length;
+        uint64_t frame;
+    } sub_page;
+
+    /*
+     * If the grant is GTF_transitive, @domid is allowed to use the
+     * grant @gref in domain @trans_domid, as if it was the local
+     * domain.  Obviously, the transitive access must be compatible
+     * with the original grant.
+     *
+     * The current version of Xen does not allow transitive grants
+     * to be mapped.
+     */
+    struct {
+        grant_entry_header_t hdr;
+        domid_t trans_domid;
+        uint16_t pad0;
+        grant_ref_t gref;
+    } transitive;
+
+    uint32_t __spacer[4]; /* Pad to a power of two */
+};
+typedef union grant_entry_v2 grant_entry_v2_t;
+
+typedef uint16_t grant_status_t;
+
+#endif /* __XEN_INTERFACE_VERSION__ */
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
+ */
+
+/* ` enum neg_errnoval
+ * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd,
+ * `                           void *args,
+ * `                           unsigned int count)
+ * `
+ *
+ * @args points to an array of a per-command data structure. The array
+ * has @count members
+ */
+
+/* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */
+#define GNTTABOP_map_grant_ref        0
+#define GNTTABOP_unmap_grant_ref      1
+#define GNTTABOP_setup_table          2
+#define GNTTABOP_dump_table           3
+#define GNTTABOP_transfer             4
+#define GNTTABOP_copy                 5
+#define GNTTABOP_query_size           6
+#define GNTTABOP_unmap_and_replace    7
+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+#define GNTTABOP_set_version          8
+#define GNTTABOP_get_status_frames    9
+#define GNTTABOP_get_version          10
+#define GNTTABOP_swap_grant_ref              11
+#define GNTTABOP_cache_flush         12
+#endif /* __XEN_INTERFACE_VERSION__ */
+/* ` } */
+
+/*
+ * Handle to track a mapping created via a grant reference.
+ */
+typedef uint32_t grant_handle_t;
+
+/*
+ * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
+ * by devices and/or host CPUs. If successful, <handle> is a tracking number
+ * that must be presented later to destroy the mapping(s). On error, <status>
+ * is a negative status code.
+ * NOTES:
+ *  1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address
+ *     via which I/O devices may access the granted frame.
+ *  2. If GNTMAP_host_map is specified then a mapping will be added at
+ *     either a host virtual address in the current address space, or at
+ *     a PTE at the specified machine address.  The type of mapping to
+ *     perform is selected through the GNTMAP_contains_pte flag, and the
+ *     address is specified in <host_addr>.
+ *  3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
+ *     host mapping is destroyed by other means then it is *NOT* guaranteed
+ *     to be accounted to the correct grant reference!
+ */
+struct gnttab_map_grant_ref {
+    /* IN parameters. */
+    uint64_t host_addr;
+    uint32_t flags;               /* GNTMAP_* */
+    grant_ref_t ref;
+    domid_t  dom;
+    /* OUT parameters. */
+    int16_t  status;              /* => enum grant_status */
+    grant_handle_t handle;
+    uint64_t dev_bus_addr;
+};
+typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
+
+/*
+ * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
+ * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
+ * field is ignored. If non-zero, they must refer to a device/host mapping
+ * that is tracked by <handle>
+ * NOTES:
+ *  1. The call may fail in an undefined manner if either mapping is not
+ *     tracked by <handle>.
+ *  3. After executing a batch of unmaps, it is guaranteed that no stale
+ *     mappings will remain in the device or host TLBs.
+ */
+struct gnttab_unmap_grant_ref {
+    /* IN parameters. */
+    uint64_t host_addr;
+    uint64_t dev_bus_addr;
+    grant_handle_t handle;
+    /* OUT parameters. */
+    int16_t  status;              /* => enum grant_status */
+};
+typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
+
+/*
+ * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
+ * <nr_frames> pages. The frame addresses are written to the <frame_list>.
+ * Only <nr_frames> addresses are written, even if the table is larger.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ *  3. Xen may not support more than a single grant-table page per domain.
+ */
+struct gnttab_setup_table {
+    /* IN parameters. */
+    domid_t  dom;
+    uint32_t nr_frames;
+    /* OUT parameters. */
+    int16_t  status;              /* => enum grant_status */
+#if __XEN_INTERFACE_VERSION__ < 0x00040300
+    XEN_GUEST_HANDLE(ulong) frame_list;
+#else
+    XEN_GUEST_HANDLE(xen_pfn_t) frame_list;
+#endif
+};
+typedef struct gnttab_setup_table gnttab_setup_table_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
+
+/*
+ * GNTTABOP_dump_table: Dump the contents of the grant table to the
+ * xen console. Debugging use only.
+ */
+struct gnttab_dump_table {
+    /* IN parameters. */
+    domid_t dom;
+    /* OUT parameters. */
+    int16_t status;               /* => enum grant_status */
+};
+typedef struct gnttab_dump_table gnttab_dump_table_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
+
+/*
+ * GNTTABOP_transfer: Transfer <frame> to a foreign domain. The foreign domain
+ * has previously registered its interest in the transfer via <domid, ref>.
+ *
+ * Note that, even if the transfer fails, the specified page no longer belongs
+ * to the calling domain *unless* the error is GNTST_bad_page.
+ *
+ * Note further that only PV guests can use this operation.
+ */
+struct gnttab_transfer {
+    /* IN parameters. */
+    xen_pfn_t     mfn;
+    domid_t       domid;
+    grant_ref_t   ref;
+    /* OUT parameters. */
+    int16_t       status;
+};
+typedef struct gnttab_transfer gnttab_transfer_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
+
+
+/*
+ * GNTTABOP_copy: Hypervisor based copy
+ * source and destinations can be eithers MFNs or, for foreign domains,
+ * grant references. the foreign domain has to grant read/write access
+ * in its grant table.
+ *
+ * The flags specify what type source and destinations are (either MFN
+ * or grant reference).
+ *
+ * Note that this can also be used to copy data between two domains
+ * via a third party if the source and destination domains had previously
+ * grant appropriate access to their pages to the third party.
+ *
+ * source_offset specifies an offset in the source frame, dest_offset
+ * the offset in the target frame and  len specifies the number of
+ * bytes to be copied.
+ */
+
+#define _GNTCOPY_source_gref      (0)
+#define GNTCOPY_source_gref       (1<<_GNTCOPY_source_gref)
+#define _GNTCOPY_dest_gref        (1)
+#define GNTCOPY_dest_gref         (1<<_GNTCOPY_dest_gref)
+
+struct gnttab_copy {
+    /* IN parameters. */
+    struct gnttab_copy_ptr {
+        union {
+            grant_ref_t ref;
+            xen_pfn_t   gmfn;
+        } u;
+        domid_t  domid;
+        uint16_t offset;
+    } source, dest;
+    uint16_t      len;
+    uint16_t      flags;          /* GNTCOPY_* */
+    /* OUT parameters. */
+    int16_t       status;
+};
+typedef struct gnttab_copy  gnttab_copy_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);
+
+/*
+ * GNTTABOP_query_size: Query the current and maximum sizes of the shared
+ * grant table.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+struct gnttab_query_size {
+    /* IN parameters. */
+    domid_t  dom;
+    /* OUT parameters. */
+    uint32_t nr_frames;
+    uint32_t max_nr_frames;
+    int16_t  status;              /* => enum grant_status */
+};
+typedef struct gnttab_query_size gnttab_query_size_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
+
+/*
+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
+ * tracked by <handle> but atomically replace the page table entry with one
+ * pointing to the machine address under <new_addr>.  <new_addr> will be
+ * redirected to the null entry.
+ * NOTES:
+ *  1. The call may fail in an undefined manner if either mapping is not
+ *     tracked by <handle>.
+ *  2. After executing a batch of unmaps, it is guaranteed that no stale
+ *     mappings will remain in the device or host TLBs.
+ */
+struct gnttab_unmap_and_replace {
+    /* IN parameters. */
+    uint64_t host_addr;
+    uint64_t new_addr;
+    grant_handle_t handle;
+    /* OUT parameters. */
+    int16_t  status;              /* => enum grant_status */
+};
+typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
+
+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+/*
+ * GNTTABOP_set_version: Request a particular version of the grant
+ * table shared table structure.  This operation may be used to toggle
+ * between different versions, but must be performed while no grants
+ * are active.  The only defined versions are 1 and 2.
+ */
+struct gnttab_set_version {
+    /* IN/OUT parameters */
+    uint32_t version;
+};
+typedef struct gnttab_set_version gnttab_set_version_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t);
+
+
+/*
+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
+ * status for <dom>. In grant format version 2, the status is separated
+ * from the other shared grant fields to allow more efficient synchronization
+ * using barriers instead of atomic cmpexch operations.
+ * <nr_frames> specify the size of vector <frame_list>.
+ * The frame addresses are returned in the <frame_list>.
+ * Only <nr_frames> addresses are returned, even if the table is larger.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+struct gnttab_get_status_frames {
+    /* IN parameters. */
+    uint32_t nr_frames;
+    domid_t  dom;
+    /* OUT parameters. */
+    int16_t  status;              /* => enum grant_status */
+    XEN_GUEST_HANDLE(uint64_t) frame_list;
+};
+typedef struct gnttab_get_status_frames gnttab_get_status_frames_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t);
+
+/*
+ * GNTTABOP_get_version: Get the grant table version which is in
+ * effect for domain <dom>.
+ */
+struct gnttab_get_version {
+    /* IN parameters */
+    domid_t dom;
+    uint16_t pad;
+    /* OUT parameters */
+    uint32_t version;
+};
+typedef struct gnttab_get_version gnttab_get_version_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t);
+
+/*
+ * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries.
+ */
+struct gnttab_swap_grant_ref {
+    /* IN parameters */
+    grant_ref_t ref_a;
+    grant_ref_t ref_b;
+    /* OUT parameters */
+    int16_t status;             /* => enum grant_status */
+};
+typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t);
+
+/*
+ * Issue one or more cache maintenance operations on a portion of a
+ * page granted to the calling domain by a foreign domain.
+ */
+struct gnttab_cache_flush {
+    union {
+        uint64_t dev_bus_addr;
+        grant_ref_t ref;
+    } a;
+    uint16_t offset; /* offset from start of grant */
+    uint16_t length; /* size within the grant */
+#define GNTTAB_CACHE_CLEAN          (1u<<0)
+#define GNTTAB_CACHE_INVAL          (1u<<1)
+#define GNTTAB_CACHE_SOURCE_GREF    (1u<<31)
+    uint32_t op;
+};
+typedef struct gnttab_cache_flush gnttab_cache_flush_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_cache_flush_t);
+
+#endif /* __XEN_INTERFACE_VERSION__ */
+
+/*
+ * Bitfield values for gnttab_map_grant_ref.flags.
+ */
+ /* Map the grant entry for access by I/O devices. */
+#define _GNTMAP_device_map      (0)
+#define GNTMAP_device_map       (1<<_GNTMAP_device_map)
+ /* Map the grant entry for access by host CPUs. */
+#define _GNTMAP_host_map        (1)
+#define GNTMAP_host_map         (1<<_GNTMAP_host_map)
+ /* Accesses to the granted frame will be restricted to read-only access. */
+#define _GNTMAP_readonly        (2)
+#define GNTMAP_readonly         (1<<_GNTMAP_readonly)
+ /*
+  * GNTMAP_host_map subflag:
+  *  0 => The host mapping is usable only by the guest OS.
+  *  1 => The host mapping is usable by guest OS + current application.
+  */
+#define _GNTMAP_application_map (3)
+#define GNTMAP_application_map  (1<<_GNTMAP_application_map)
+
+ /*
+  * GNTMAP_contains_pte subflag:
+  *  0 => This map request contains a host virtual address.
+  *  1 => This map request contains the machine addess of the PTE to update.
+  */
+#define _GNTMAP_contains_pte    (4)
+#define GNTMAP_contains_pte     (1<<_GNTMAP_contains_pte)
+
+/*
+ * Bits to be placed in guest kernel available PTE bits (architecture
+ * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set).
+ */
+#define _GNTMAP_guest_avail0    (16)
+#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0)
+
+/*
+ * Values for error status returns. All errors are -ve.
+ */
+/* ` enum grant_status { */
+#define GNTST_okay             (0)  /* Normal return.                        */
+#define GNTST_general_error    (-1) /* General undefined error.              */
+#define GNTST_bad_domain       (-2) /* Unrecognsed domain id.                */
+#define GNTST_bad_gntref       (-3) /* Unrecognised or inappropriate gntref. */
+#define GNTST_bad_handle       (-4) /* Unrecognised or inappropriate handle. */
+#define GNTST_bad_virt_addr    (-5) /* Inappropriate virtual address to map. */
+#define GNTST_bad_dev_addr     (-6) /* Inappropriate device address to unmap.*/
+#define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.              */
+#define GNTST_permission_denied (-8) /* Not enough privilege for operation.  */
+#define GNTST_bad_page         (-9) /* Specified page was invalid for op.    */
+#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary.   */
+#define GNTST_address_too_big (-11) /* transfer page address too large.      */
+#define GNTST_eagain          (-12) /* Operation not done; try again.        */
+#define GNTST_no_space        (-13) /* Out of space (handles etc).           */
+/* ` } */
+
+#define GNTTABOP_error_msgs {                   \
+    "okay",                                     \
+    "undefined error",                          \
+    "unrecognised domain id",                   \
+    "invalid grant reference",                  \
+    "invalid mapping handle",                   \
+    "invalid virtual address",                  \
+    "invalid device address",                   \
+    "no spare translation slot in the I/O MMU", \
+    "permission denied",                        \
+    "bad page",                                 \
+    "copy arguments cross page boundary",       \
+    "page address size too large",              \
+    "operation not done; try again",            \
+    "out of space",                             \
+}
+
+#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/hvm/hvm_op.h b/include/hw/xen/interface/hvm/hvm_op.h
new file mode 100644 (file)
index 0000000..e22adf0
--- /dev/null
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (c) 2007, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
+#define __XEN_PUBLIC_HVM_HVM_OP_H__
+
+#include "../xen.h"
+#include "../trace.h"
+#include "../event_channel.h"
+
+/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */
+#define HVMOP_set_param           0
+#define HVMOP_get_param           1
+struct xen_hvm_param {
+    domid_t  domid;    /* IN */
+    uint16_t pad;
+    uint32_t index;    /* IN */
+    uint64_t value;    /* IN/OUT */
+};
+typedef struct xen_hvm_param xen_hvm_param_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t);
+
+struct xen_hvm_altp2m_suppress_ve {
+    uint16_t view;
+    uint8_t suppress_ve; /* Boolean type. */
+    uint8_t pad1;
+    uint32_t pad2;
+    uint64_t gfn;
+};
+
+struct xen_hvm_altp2m_suppress_ve_multi {
+    uint16_t view;
+    uint8_t suppress_ve; /* Boolean type. */
+    uint8_t pad1;
+    int32_t first_error; /* Should be set to 0. */
+    uint64_t first_gfn; /* Value may be updated. */
+    uint64_t last_gfn;
+    uint64_t first_error_gfn; /* Gfn of the first error. */
+};
+
+#if __XEN_INTERFACE_VERSION__ < 0x00040900
+
+/* Set the logical level of one of a domain's PCI INTx wires. */
+#define HVMOP_set_pci_intx_level  2
+struct xen_hvm_set_pci_intx_level {
+    /* Domain to be updated. */
+    domid_t  domid;
+    /* PCI INTx identification in PCI topology (domain:bus:device:intx). */
+    uint8_t  domain, bus, device, intx;
+    /* Assertion level (0 = unasserted, 1 = asserted). */
+    uint8_t  level;
+};
+typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t);
+
+/* Set the logical level of one of a domain's ISA IRQ wires. */
+#define HVMOP_set_isa_irq_level   3
+struct xen_hvm_set_isa_irq_level {
+    /* Domain to be updated. */
+    domid_t  domid;
+    /* ISA device identification, by ISA IRQ (0-15). */
+    uint8_t  isa_irq;
+    /* Assertion level (0 = unasserted, 1 = asserted). */
+    uint8_t  level;
+};
+typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t);
+
+#define HVMOP_set_pci_link_route  4
+struct xen_hvm_set_pci_link_route {
+    /* Domain to be updated. */
+    domid_t  domid;
+    /* PCI link identifier (0-3). */
+    uint8_t  link;
+    /* ISA IRQ (1-15), or 0 (disable link). */
+    uint8_t  isa_irq;
+};
+typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t);
+
+#endif /* __XEN_INTERFACE_VERSION__ < 0x00040900 */
+
+/* Flushes all VCPU TLBs: @arg must be NULL. */
+#define HVMOP_flush_tlbs          5
+
+/*
+ * hvmmem_type_t should not be defined when generating the corresponding
+ * compat header. This will ensure that the improperly named HVMMEM_(*)
+ * values are defined only once.
+ */
+#ifndef XEN_GENERATING_COMPAT_HEADERS
+
+typedef enum {
+    HVMMEM_ram_rw,             /* Normal read/write guest RAM */
+    HVMMEM_ram_ro,             /* Read-only; writes are discarded */
+    HVMMEM_mmio_dm,            /* Reads and write go to the device model */
+#if __XEN_INTERFACE_VERSION__ < 0x00040700
+    HVMMEM_mmio_write_dm,      /* Read-only; writes go to the device model */
+#else
+    HVMMEM_unused,             /* Placeholder; setting memory to this type
+                                  will fail for code after 4.7.0 */
+#endif
+    HVMMEM_ioreq_server        /* Memory type claimed by an ioreq server; type
+                                  changes to this value are only allowed after
+                                  an ioreq server has claimed its ownership.
+                                  Only pages with HVMMEM_ram_rw are allowed to
+                                  change to this type; conversely, pages with
+                                  this type are only allowed to be changed back
+                                  to HVMMEM_ram_rw. */
+} hvmmem_type_t;
+
+#endif /* XEN_GENERATING_COMPAT_HEADERS */
+
+/* Hint from PV drivers for pagetable destruction. */
+#define HVMOP_pagetable_dying        9
+struct xen_hvm_pagetable_dying {
+    /* Domain with a pagetable about to be destroyed. */
+    domid_t  domid;
+    uint16_t pad[3]; /* align next field on 8-byte boundary */
+    /* guest physical address of the toplevel pagetable dying */
+    uint64_t gpa;
+};
+typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t);
+
+/* Get the current Xen time, in nanoseconds since system boot. */
+#define HVMOP_get_time              10
+struct xen_hvm_get_time {
+    uint64_t now;      /* OUT */
+};
+typedef struct xen_hvm_get_time xen_hvm_get_time_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_time_t);
+
+#define HVMOP_xentrace              11
+struct xen_hvm_xentrace {
+    uint16_t event, extra_bytes;
+    uint8_t extra[TRACE_EXTRA_MAX * sizeof(uint32_t)];
+};
+typedef struct xen_hvm_xentrace xen_hvm_xentrace_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t);
+
+/* Following tools-only interfaces may change in future. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+/* Deprecated by XENMEM_access_op_set_access */
+#define HVMOP_set_mem_access        12
+
+/* Deprecated by XENMEM_access_op_get_access */
+#define HVMOP_get_mem_access        13
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
+#define HVMOP_get_mem_type    15
+/* Return hvmmem_type_t for the specified pfn. */
+struct xen_hvm_get_mem_type {
+    /* Domain to be queried. */
+    domid_t domid;
+    /* OUT variable. */
+    uint16_t mem_type;
+    uint16_t pad[2]; /* align next field on 8-byte boundary */
+    /* IN variable. */
+    uint64_t pfn;
+};
+typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t);
+
+/* Following tools-only interfaces may change in future. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+/*
+ * Definitions relating to DMOP_create_ioreq_server. (Defined here for
+ * backwards compatibility).
+ */
+
+#define HVM_IOREQSRV_BUFIOREQ_OFF    0
+#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1
+/*
+ * Use this when read_pointer gets updated atomically and
+ * the pointer pair gets read atomically:
+ */
+#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+/*
+ * HVMOP_set_evtchn_upcall_vector: Set a <vector> that should be used for event
+ *                                 channel upcalls on the specified <vcpu>. If set,
+ *                                 this vector will be used in preference to the
+ *                                 domain global callback via (see
+ *                                 HVM_PARAM_CALLBACK_IRQ).
+ */
+#define HVMOP_set_evtchn_upcall_vector 23
+struct xen_hvm_evtchn_upcall_vector {
+    uint32_t vcpu;
+    uint8_t vector;
+};
+typedef struct xen_hvm_evtchn_upcall_vector xen_hvm_evtchn_upcall_vector_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_evtchn_upcall_vector_t);
+
+#endif /* defined(__i386__) || defined(__x86_64__) */
+
+#define HVMOP_guest_request_vm_event 24
+
+/* HVMOP_altp2m: perform altp2m state operations */
+#define HVMOP_altp2m 25
+
+#define HVMOP_ALTP2M_INTERFACE_VERSION 0x00000001
+
+struct xen_hvm_altp2m_domain_state {
+    /* IN or OUT variable on/off */
+    uint8_t state;
+};
+typedef struct xen_hvm_altp2m_domain_state xen_hvm_altp2m_domain_state_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_domain_state_t);
+
+struct xen_hvm_altp2m_vcpu_enable_notify {
+    uint32_t vcpu_id;
+    uint32_t pad;
+    /* #VE info area gfn */
+    uint64_t gfn;
+};
+typedef struct xen_hvm_altp2m_vcpu_enable_notify xen_hvm_altp2m_vcpu_enable_notify_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_vcpu_enable_notify_t);
+
+struct xen_hvm_altp2m_vcpu_disable_notify {
+    uint32_t vcpu_id;
+};
+typedef struct xen_hvm_altp2m_vcpu_disable_notify xen_hvm_altp2m_vcpu_disable_notify_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_vcpu_disable_notify_t);
+
+struct xen_hvm_altp2m_view {
+    /* IN/OUT variable */
+    uint16_t view;
+    uint16_t hvmmem_default_access; /* xenmem_access_t */
+};
+typedef struct xen_hvm_altp2m_view xen_hvm_altp2m_view_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_view_t);
+
+#if __XEN_INTERFACE_VERSION__ < 0x00040a00
+struct xen_hvm_altp2m_set_mem_access {
+    /* view */
+    uint16_t view;
+    /* Memory type */
+    uint16_t access; /* xenmem_access_t */
+    uint32_t pad;
+    /* gfn */
+    uint64_t gfn;
+};
+typedef struct xen_hvm_altp2m_set_mem_access xen_hvm_altp2m_set_mem_access_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_set_mem_access_t);
+#endif /* __XEN_INTERFACE_VERSION__ < 0x00040a00 */
+
+struct xen_hvm_altp2m_mem_access {
+    /* view */
+    uint16_t view;
+    /* Memory type */
+    uint16_t access; /* xenmem_access_t */
+    uint32_t pad;
+    /* gfn */
+    uint64_t gfn;
+};
+typedef struct xen_hvm_altp2m_mem_access xen_hvm_altp2m_mem_access_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_mem_access_t);
+
+struct xen_hvm_altp2m_set_mem_access_multi {
+    /* view */
+    uint16_t view;
+    uint16_t pad;
+    /* Number of pages */
+    uint32_t nr;
+    /*
+     * Used for continuation purposes.
+     * Must be set to zero upon initial invocation.
+     */
+    uint64_t opaque;
+    /* List of pfns to set access for */
+    XEN_GUEST_HANDLE(const_uint64) pfn_list;
+    /* Corresponding list of access settings for pfn_list */
+    XEN_GUEST_HANDLE(const_uint8) access_list;
+};
+
+struct xen_hvm_altp2m_change_gfn {
+    /* view */
+    uint16_t view;
+    uint16_t pad1;
+    uint32_t pad2;
+    /* old gfn */
+    uint64_t old_gfn;
+    /* new gfn, INVALID_GFN (~0UL) means revert */
+    uint64_t new_gfn;
+};
+typedef struct xen_hvm_altp2m_change_gfn xen_hvm_altp2m_change_gfn_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_change_gfn_t);
+
+struct xen_hvm_altp2m_get_vcpu_p2m_idx {
+    uint32_t vcpu_id;
+    uint16_t altp2m_idx;
+};
+
+struct xen_hvm_altp2m_set_visibility {
+    uint16_t altp2m_idx;
+    uint8_t visible;
+    uint8_t pad;
+};
+
+struct xen_hvm_altp2m_op {
+    uint32_t version;   /* HVMOP_ALTP2M_INTERFACE_VERSION */
+    uint32_t cmd;
+/* Get/set the altp2m state for a domain */
+#define HVMOP_altp2m_get_domain_state     1
+#define HVMOP_altp2m_set_domain_state     2
+/* Set a given VCPU to receive altp2m event notifications */
+#define HVMOP_altp2m_vcpu_enable_notify   3
+/* Create a new view */
+#define HVMOP_altp2m_create_p2m           4
+/* Destroy a view */
+#define HVMOP_altp2m_destroy_p2m          5
+/* Switch view for an entire domain */
+#define HVMOP_altp2m_switch_p2m           6
+/* Notify that a page of memory is to have specific access types */
+#define HVMOP_altp2m_set_mem_access       7
+/* Change a p2m entry to have a different gfn->mfn mapping */
+#define HVMOP_altp2m_change_gfn           8
+/* Set access for an array of pages */
+#define HVMOP_altp2m_set_mem_access_multi 9
+/* Set the "Suppress #VE" bit on a page */
+#define HVMOP_altp2m_set_suppress_ve      10
+/* Get the "Suppress #VE" bit of a page */
+#define HVMOP_altp2m_get_suppress_ve      11
+/* Get the access of a page of memory from a certain view */
+#define HVMOP_altp2m_get_mem_access       12
+/* Disable altp2m event notifications for a given VCPU */
+#define HVMOP_altp2m_vcpu_disable_notify  13
+/* Get the active vcpu p2m index */
+#define HVMOP_altp2m_get_p2m_idx          14
+/* Set the "Supress #VE" bit for a range of pages */
+#define HVMOP_altp2m_set_suppress_ve_multi 15
+/* Set visibility for a given altp2m view */
+#define HVMOP_altp2m_set_visibility       16
+    domid_t domain;
+    uint16_t pad1;
+    uint32_t pad2;
+    union {
+        struct xen_hvm_altp2m_domain_state         domain_state;
+        struct xen_hvm_altp2m_vcpu_enable_notify   enable_notify;
+        struct xen_hvm_altp2m_view                 view;
+#if __XEN_INTERFACE_VERSION__ < 0x00040a00
+        struct xen_hvm_altp2m_set_mem_access       set_mem_access;
+#endif /* __XEN_INTERFACE_VERSION__ < 0x00040a00 */
+        struct xen_hvm_altp2m_mem_access           mem_access;
+        struct xen_hvm_altp2m_change_gfn           change_gfn;
+        struct xen_hvm_altp2m_set_mem_access_multi set_mem_access_multi;
+        struct xen_hvm_altp2m_suppress_ve          suppress_ve;
+        struct xen_hvm_altp2m_suppress_ve_multi    suppress_ve_multi;
+        struct xen_hvm_altp2m_vcpu_disable_notify  disable_notify;
+        struct xen_hvm_altp2m_get_vcpu_p2m_idx     get_vcpu_p2m_idx;
+        struct xen_hvm_altp2m_set_visibility       set_visibility;
+        uint8_t pad[64];
+    } u;
+};
+typedef struct xen_hvm_altp2m_op xen_hvm_altp2m_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_op_t);
+
+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/hvm/params.h b/include/hw/xen/interface/hvm/params.h
new file mode 100644 (file)
index 0000000..a22b4ed
--- /dev/null
@@ -0,0 +1,301 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (c) 2007, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
+#define __XEN_PUBLIC_HVM_PARAMS_H__
+
+#include "hvm_op.h"
+
+/* These parameters are deprecated and their meaning is undefined. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+#define HVM_PARAM_PAE_ENABLED                4
+#define HVM_PARAM_DM_DOMAIN                 13
+#define HVM_PARAM_MEMORY_EVENT_CR0          20
+#define HVM_PARAM_MEMORY_EVENT_CR3          21
+#define HVM_PARAM_MEMORY_EVENT_CR4          22
+#define HVM_PARAM_MEMORY_EVENT_INT3         23
+#define HVM_PARAM_NESTEDHVM                 24
+#define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP  25
+#define HVM_PARAM_BUFIOREQ_EVTCHN           26
+#define HVM_PARAM_MEMORY_EVENT_MSR          30
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
+/*
+ * Parameter space for HVMOP_{set,get}_param.
+ */
+
+#define HVM_PARAM_CALLBACK_IRQ 0
+#define HVM_PARAM_CALLBACK_IRQ_TYPE_MASK xen_mk_ullong(0xFF00000000000000)
+/*
+ * How should CPU0 event-channel notifications be delivered?
+ *
+ * If val == 0 then CPU0 event-channel notifications are not delivered.
+ * If val != 0, val[63:56] encodes the type, as follows:
+ */
+
+#define HVM_PARAM_CALLBACK_TYPE_GSI      0
+/*
+ * val[55:0] is a delivery GSI.  GSI 0 cannot be used, as it aliases val == 0,
+ * and disables all notifications.
+ */
+
+#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1
+/*
+ * val[55:0] is a delivery PCI INTx line:
+ * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0]
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#define HVM_PARAM_CALLBACK_TYPE_VECTOR   2
+/*
+ * val[7:0] is a vector number.  Check for XENFEAT_hvm_callback_vector to know
+ * if this delivery method is available.
+ */
+#elif defined(__arm__) || defined(__aarch64__)
+#define HVM_PARAM_CALLBACK_TYPE_PPI      2
+/*
+ * val[55:16] needs to be zero.
+ * val[15:8] is interrupt flag of the PPI used by event-channel:
+ *  bit 8: the PPI is edge(1) or level(0) triggered
+ *  bit 9: the PPI is active low(1) or high(0)
+ * val[7:0] is a PPI number used by event-channel.
+ * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to
+ * the notification is handled by the interrupt controller.
+ */
+#define HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_MASK      0xFF00
+#define HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_LOW_LEVEL 2
+#endif
+
+/*
+ * These are not used by Xen. They are here for convenience of HVM-guest
+ * xenbus implementations.
+ */
+#define HVM_PARAM_STORE_PFN    1
+#define HVM_PARAM_STORE_EVTCHN 2
+
+#define HVM_PARAM_IOREQ_PFN    5
+
+#define HVM_PARAM_BUFIOREQ_PFN 6
+
+#if defined(__i386__) || defined(__x86_64__)
+
+/*
+ * Viridian enlightenments
+ *
+ * (See http://download.microsoft.com/download/A/B/4/AB43A34E-BDD0-4FA6-BDEF-79EEF16E880B/Hypervisor%20Top%20Level%20Functional%20Specification%20v4.0.docx)
+ *
+ * To expose viridian enlightenments to the guest set this parameter
+ * to the desired feature mask. The base feature set must be present
+ * in any valid feature mask.
+ */
+#define HVM_PARAM_VIRIDIAN     9
+
+/* Base+Freq viridian feature sets:
+ *
+ * - Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL)
+ * - APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
+ * - Virtual Processor index MSR (HV_X64_MSR_VP_INDEX)
+ * - Timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and
+ *   HV_X64_MSR_APIC_FREQUENCY)
+ */
+#define _HVMPV_base_freq 0
+#define HVMPV_base_freq  (1 << _HVMPV_base_freq)
+
+/* Feature set modifications */
+
+/* Disable timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and
+ * HV_X64_MSR_APIC_FREQUENCY).
+ * This modification restores the viridian feature set to the
+ * original 'base' set exposed in releases prior to Xen 4.4.
+ */
+#define _HVMPV_no_freq 1
+#define HVMPV_no_freq  (1 << _HVMPV_no_freq)
+
+/* Enable Partition Time Reference Counter (HV_X64_MSR_TIME_REF_COUNT) */
+#define _HVMPV_time_ref_count 2
+#define HVMPV_time_ref_count  (1 << _HVMPV_time_ref_count)
+
+/* Enable Reference TSC Page (HV_X64_MSR_REFERENCE_TSC) */
+#define _HVMPV_reference_tsc 3
+#define HVMPV_reference_tsc  (1 << _HVMPV_reference_tsc)
+
+/* Use Hypercall for remote TLB flush */
+#define _HVMPV_hcall_remote_tlb_flush 4
+#define HVMPV_hcall_remote_tlb_flush (1 << _HVMPV_hcall_remote_tlb_flush)
+
+/* Use APIC assist */
+#define _HVMPV_apic_assist 5
+#define HVMPV_apic_assist (1 << _HVMPV_apic_assist)
+
+/* Enable crash MSRs */
+#define _HVMPV_crash_ctl 6
+#define HVMPV_crash_ctl (1 << _HVMPV_crash_ctl)
+
+/* Enable SYNIC MSRs */
+#define _HVMPV_synic 7
+#define HVMPV_synic (1 << _HVMPV_synic)
+
+/* Enable STIMER MSRs */
+#define _HVMPV_stimer 8
+#define HVMPV_stimer (1 << _HVMPV_stimer)
+
+/* Use Synthetic Cluster IPI Hypercall */
+#define _HVMPV_hcall_ipi 9
+#define HVMPV_hcall_ipi (1 << _HVMPV_hcall_ipi)
+
+/* Enable ExProcessorMasks */
+#define _HVMPV_ex_processor_masks 10
+#define HVMPV_ex_processor_masks (1 << _HVMPV_ex_processor_masks)
+
+/* Allow more than 64 VPs */
+#define _HVMPV_no_vp_limit 11
+#define HVMPV_no_vp_limit (1 << _HVMPV_no_vp_limit)
+
+/* Enable vCPU hotplug */
+#define _HVMPV_cpu_hotplug 12
+#define HVMPV_cpu_hotplug (1 << _HVMPV_cpu_hotplug)
+
+#define HVMPV_feature_mask \
+        (HVMPV_base_freq | \
+         HVMPV_no_freq | \
+         HVMPV_time_ref_count | \
+         HVMPV_reference_tsc | \
+         HVMPV_hcall_remote_tlb_flush | \
+         HVMPV_apic_assist | \
+         HVMPV_crash_ctl | \
+         HVMPV_synic | \
+         HVMPV_stimer | \
+         HVMPV_hcall_ipi | \
+         HVMPV_ex_processor_masks | \
+         HVMPV_no_vp_limit | \
+         HVMPV_cpu_hotplug)
+
+#endif
+
+/*
+ * Set mode for virtual timers (currently x86 only):
+ *  delay_for_missed_ticks (default):
+ *   Do not advance a vcpu's time beyond the correct delivery time for
+ *   interrupts that have been missed due to preemption. Deliver missed
+ *   interrupts when the vcpu is rescheduled and advance the vcpu's virtual
+ *   time stepwise for each one.
+ *  no_delay_for_missed_ticks:
+ *   As above, missed interrupts are delivered, but guest time always tracks
+ *   wallclock (i.e., real) time while doing so.
+ *  no_missed_ticks_pending:
+ *   No missed interrupts are held pending. Instead, to ensure ticks are
+ *   delivered at some non-zero rate, if we detect missed ticks then the
+ *   internal tick alarm is not disabled if the VCPU is preempted during the
+ *   next tick period.
+ *  one_missed_tick_pending:
+ *   Missed interrupts are collapsed together and delivered as one 'late tick'.
+ *   Guest time always tracks wallclock (i.e., real) time.
+ */
+#define HVM_PARAM_TIMER_MODE   10
+#define HVMPTM_delay_for_missed_ticks    0
+#define HVMPTM_no_delay_for_missed_ticks 1
+#define HVMPTM_no_missed_ticks_pending   2
+#define HVMPTM_one_missed_tick_pending   3
+
+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
+#define HVM_PARAM_HPET_ENABLED 11
+
+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
+#define HVM_PARAM_IDENT_PT     12
+
+/* ACPI S state: currently support S0 and S3 on x86. */
+#define HVM_PARAM_ACPI_S_STATE 14
+
+/* TSS used on Intel when CR0.PE=0. */
+#define HVM_PARAM_VM86_TSS     15
+
+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
+#define HVM_PARAM_VPT_ALIGN    16
+
+/* Console debug shared memory ring and event channel */
+#define HVM_PARAM_CONSOLE_PFN    17
+#define HVM_PARAM_CONSOLE_EVTCHN 18
+
+/*
+ * Select location of ACPI PM1a and TMR control blocks. Currently two locations
+ * are supported, specified by version 0 or 1 in this parameter:
+ *   - 0: default, use the old addresses
+ *        PM1A_EVT == 0x1f40; PM1A_CNT == 0x1f44; PM_TMR == 0x1f48
+ *   - 1: use the new default qemu addresses
+ *        PM1A_EVT == 0xb000; PM1A_CNT == 0xb004; PM_TMR == 0xb008
+ * You can find these address definitions in <hvm/ioreq.h>
+ */
+#define HVM_PARAM_ACPI_IOPORTS_LOCATION 19
+
+/* Params for the mem event rings */
+#define HVM_PARAM_PAGING_RING_PFN   27
+#define HVM_PARAM_MONITOR_RING_PFN  28
+#define HVM_PARAM_SHARING_RING_PFN  29
+
+/* SHUTDOWN_* action in case of a triple fault */
+#define HVM_PARAM_TRIPLE_FAULT_REASON 31
+
+#define HVM_PARAM_IOREQ_SERVER_PFN 32
+#define HVM_PARAM_NR_IOREQ_SERVER_PAGES 33
+
+/* Location of the VM Generation ID in guest physical address space. */
+#define HVM_PARAM_VM_GENERATION_ID_ADDR 34
+
+/*
+ * Set mode for altp2m:
+ *  disabled: don't activate altp2m (default)
+ *  mixed: allow access to all altp2m ops for both in-guest and external tools
+ *  external: allow access to external privileged tools only
+ *  limited: guest only has limited access (ie. control VMFUNC and #VE)
+ *
+ * Note that 'mixed' mode has not been evaluated for safety from a
+ * security perspective.  Before using this mode in a
+ * security-critical environment, each subop should be evaluated for
+ * safety, with unsafe subops blacklisted in XSM.
+ */
+#define HVM_PARAM_ALTP2M       35
+#define XEN_ALTP2M_disabled      0
+#define XEN_ALTP2M_mixed         1
+#define XEN_ALTP2M_external      2
+#define XEN_ALTP2M_limited       3
+
+/*
+ * Size of the x87 FPU FIP/FDP registers that the hypervisor needs to
+ * save/restore.  This is a workaround for a hardware limitation that
+ * does not allow the full FIP/FDP and FCS/FDS to be restored.
+ *
+ * Valid values are:
+ *
+ * 8: save/restore 64-bit FIP/FDP and clear FCS/FDS (default if CPU
+ *    has FPCSDS feature).
+ *
+ * 4: save/restore 32-bit FIP/FDP, FCS/FDS, and clear upper 32-bits of
+ *    FIP/FDP.
+ *
+ * 0: allow hypervisor to choose based on the value of FIP/FDP
+ *    (default if CPU does not have FPCSDS).
+ *
+ * If FPCSDS (bit 13 in CPUID leaf 0x7, subleaf 0x0) is set, the CPU
+ * never saves FCS/FDS and this parameter should be left at the
+ * default of 8.
+ */
+#define HVM_PARAM_X87_FIP_WIDTH 36
+
+/*
+ * TSS (and its size) used on Intel when CR0.PE=0. The address occupies
+ * the low 32 bits, while the size is in the high 32 ones.
+ */
+#define HVM_PARAM_VM86_TSS_SIZED 37
+
+/* Enable MCA capabilities. */
+#define HVM_PARAM_MCA_CAP 38
+#define XEN_HVM_MCA_CAP_LMCE   (xen_mk_ullong(1) << 0)
+#define XEN_HVM_MCA_CAP_MASK   XEN_HVM_MCA_CAP_LMCE
+
+#define HVM_NR_PARAMS 39
+
+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
diff --git a/include/hw/xen/interface/io/blkif.h b/include/hw/xen/interface/io/blkif.h
new file mode 100644 (file)
index 0000000..22f1eef
--- /dev/null
@@ -0,0 +1,713 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * blkif.h
+ *
+ * Unified block-device I/O interface for Xen guest OSes.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ * Copyright (c) 2012, Spectra Logic Corporation
+ */
+
+#ifndef __XEN_PUBLIC_IO_BLKIF_H__
+#define __XEN_PUBLIC_IO_BLKIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Front->back notifications: When enqueuing a new request, sending a
+ * notification can be made conditional on req_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Backends must set
+ * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
+ *
+ * Back->front notifications: When enqueuing a new response, sending a
+ * notification can be made conditional on rsp_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Frontends must set
+ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
+ */
+
+#ifndef blkif_vdev_t
+#define blkif_vdev_t   uint16_t
+#endif
+#define blkif_sector_t uint64_t
+
+/*
+ * Feature and Parameter Negotiation
+ * =================================
+ * The two halves of a Xen block driver utilize nodes within the XenStore to
+ * communicate capabilities and to negotiate operating parameters.  This
+ * section enumerates these nodes which reside in the respective front and
+ * backend portions of the XenStore, following the XenBus convention.
+ *
+ * All data in the XenStore is stored as strings.  Nodes specifying numeric
+ * values are encoded in decimal.  Integer value ranges listed below are
+ * expressed as fixed sized integer types capable of storing the conversion
+ * of a properly formated node string, without loss of information.
+ *
+ * Any specified default value is in effect if the corresponding XenBus node
+ * is not present in the XenStore.
+ *
+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the
+ * driver side whose XenBus tree contains them.
+ *
+ * XenStore nodes marked "DEPRECATED" in their notes section should only be
+ * used to provide interoperability with legacy implementations.
+ *
+ * See the XenBus state transition diagram below for details on when XenBus
+ * nodes must be published and when they can be queried.
+ *
+ *****************************************************************************
+ *                            Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------ Backend Device Identification (PRIVATE) ------------------
+ *
+ * mode
+ *      Values:         "r" (read only), "w" (writable)
+ *
+ *      The read or write access permissions to the backing store to be
+ *      granted to the frontend.
+ *
+ * params
+ *      Values:         string
+ *
+ *      A free formatted string providing sufficient information for the
+ *      hotplug script to attach the device and provide a suitable
+ *      handler (ie: a block device) for blkback to use.
+ *
+ * physical-device
+ *      Values:         "MAJOR:MINOR"
+ *      Notes: 11
+ *
+ *      MAJOR and MINOR are the major number and minor number of the
+ *      backing device respectively.
+ *
+ * physical-device-path
+ *      Values:         path string
+ *
+ *      A string that contains the absolute path to the disk image. On
+ *      NetBSD and Linux this is always a block device, while on FreeBSD
+ *      it can be either a block device or a regular file.
+ *
+ * type
+ *      Values:         "file", "phy", "tap"
+ *
+ *      The type of the backing device/object.
+ *
+ *
+ * direct-io-safe
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      The underlying storage is not affected by the direct IO memory
+ *      lifetime bug.  See:
+ *        https://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
+ *
+ *      Therefore this option gives the backend permission to use
+ *      O_DIRECT, notwithstanding that bug.
+ *
+ *      That is, if this option is enabled, use of O_DIRECT is safe,
+ *      in circumstances where we would normally have avoided it as a
+ *      workaround for that bug.  This option is not relevant for all
+ *      backends, and even not necessarily supported for those for
+ *      which it is relevant.  A backend which knows that it is not
+ *      affected by the bug can ignore this option.
+ *
+ *      This option doesn't require a backend to use O_DIRECT, so it
+ *      should not be used to try to control the caching behaviour.
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-barrier
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the backend can process requests
+ *      containing the BLKIF_OP_WRITE_BARRIER request opcode.  Requests
+ *      of this type may still be returned at any time with the
+ *      BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-flush-cache
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the backend can process requests
+ *      containing the BLKIF_OP_FLUSH_DISKCACHE request opcode.  Requests
+ *      of this type may still be returned at any time with the
+ *      BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-discard
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the backend can process requests
+ *      containing the BLKIF_OP_DISCARD request opcode.  Requests
+ *      of this type may still be returned at any time with the
+ *      BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-persistent
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *      Notes: 7
+ *
+ *      A value of "1" indicates that the backend can keep the grants used
+ *      by the frontend driver mapped, so the same set of grants should be
+ *      used in all transactions. The maximum number of grants the backend
+ *      can map persistently depends on the implementation, but ideally it
+ *      should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this
+ *      feature the backend doesn't need to unmap each grant, preventing
+ *      costly TLB flushes. The backend driver should only map grants
+ *      persistently if the frontend supports it. If a backend driver chooses
+ *      to use the persistent protocol when the frontend doesn't support it,
+ *      it will probably hit the maximum number of persistently mapped grants
+ *      (due to the fact that the frontend won't be reusing the same grants),
+ *      and fall back to non-persistent mode. Backend implementations may
+ *      shrink or expand the number of persistently mapped grants without
+ *      notifying the frontend depending on memory constraints (this might
+ *      cause a performance degradation).
+ *
+ *      If a backend driver wants to limit the maximum number of persistently
+ *      mapped grants to a value less than RING_SIZE *
+ *      BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to
+ *      discard the grants that are less commonly used. Using a LRU in the
+ *      backend driver paired with a LIFO queue in the frontend will
+ *      allow us to have better performance in this scenario.
+ *
+ *----------------------- Request Transport Parameters ------------------------
+ *
+ * max-ring-page-order
+ *      Values:         <uint32_t>
+ *      Default Value:  0
+ *      Notes:          1, 3
+ *
+ *      The maximum supported size of the request ring buffer in units of
+ *      lb(machine pages). (e.g. 0 == 1 page,  1 = 2 pages, 2 == 4 pages,
+ *      etc.).
+ *
+ * max-ring-pages
+ *      Values:         <uint32_t>
+ *      Default Value:  1
+ *      Notes:          DEPRECATED, 2, 3
+ *
+ *      The maximum supported size of the request ring buffer in units of
+ *      machine pages.  The value must be a power of 2.
+ *
+ *------------------------- Backend Device Properties -------------------------
+ *
+ * discard-enable
+ *      Values:         0/1 (boolean)
+ *      Default Value:  1
+ *
+ *      This optional property, set by the toolstack, instructs the backend
+ *      to offer (or not to offer) discard to the frontend. If the property
+ *      is missing the backend should offer discard if the backing storage
+ *      actually supports it.
+ *
+ * discard-alignment
+ *      Values:         <uint32_t>
+ *      Default Value:  0
+ *      Notes:          4, 5
+ *
+ *      The offset, in bytes from the beginning of the virtual block device,
+ *      to the first, addressable, discard extent on the underlying device.
+ *
+ * discard-granularity
+ *      Values:         <uint32_t>
+ *      Default Value:  <"sector-size">
+ *      Notes:          4
+ *
+ *      The size, in bytes, of the individually addressable discard extents
+ *      of the underlying device.
+ *
+ * discard-secure
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *      Notes:          10
+ *
+ *      A value of "1" indicates that the backend can process BLKIF_OP_DISCARD
+ *      requests with the BLKIF_DISCARD_SECURE flag set.
+ *
+ * info
+ *      Values:         <uint32_t> (bitmap)
+ *
+ *      A collection of bit flags describing attributes of the backing
+ *      device.  The VDISK_* macros define the meaning of each bit
+ *      location.
+ *
+ * sector-size
+ *      Values:         <uint32_t>
+ *
+ *      The logical block size, in bytes, of the underlying storage. This
+ *      must be a power of two with a minimum value of 512.
+ *
+ *      NOTE: Because of implementation bugs in some frontends this must be
+ *            set to 512, unless the frontend advertizes a non-zero value
+ *            in its "feature-large-sector-size" xenbus node. (See below).
+ *
+ * physical-sector-size
+ *      Values:         <uint32_t>
+ *      Default Value:  <"sector-size">
+ *
+ *      The physical block size, in bytes, of the backend storage. This
+ *      must be an integer multiple of "sector-size".
+ *
+ * sectors
+ *      Values:         <uint64_t>
+ *
+ *      The size of the backend device, expressed in units of "sector-size".
+ *      The product of "sector-size" and "sectors" must also be an integer
+ *      multiple of "physical-sector-size", if that node is present.
+ *
+ *****************************************************************************
+ *                            Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ *      Values:         <uint32_t>
+ *
+ *      The identifier of the Xen event channel used to signal activity
+ *      in the ring buffer.
+ *
+ * ring-ref
+ *      Values:         <uint32_t>
+ *      Notes:          6
+ *
+ *      The Xen grant reference granting permission for the backend to map
+ *      the sole page in a single page sized ring buffer.
+ *
+ * ring-ref%u
+ *      Values:         <uint32_t>
+ *      Notes:          6
+ *
+ *      For a frontend providing a multi-page ring, a "number of ring pages"
+ *      sized list of nodes, each containing a Xen grant reference granting
+ *      permission for the backend to map the page of the ring located
+ *      at page index "%u".  Page indexes are zero based.
+ *
+ * protocol
+ *      Values:         string (XEN_IO_PROTO_ABI_*)
+ *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
+ *
+ *      The machine ABI rules governing the format of all ring request and
+ *      response structures.
+ *
+ * ring-page-order
+ *      Values:         <uint32_t>
+ *      Default Value:  0
+ *      Maximum Value:  MAX(ffs(max-ring-pages) - 1, max-ring-page-order)
+ *      Notes:          1, 3
+ *
+ *      The size of the frontend allocated request ring buffer in units
+ *      of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
+ *      etc.).
+ *
+ * num-ring-pages
+ *      Values:         <uint32_t>
+ *      Default Value:  1
+ *      Maximum Value:  MAX(max-ring-pages,(0x1 << max-ring-page-order))
+ *      Notes:          DEPRECATED, 2, 3
+ *
+ *      The size of the frontend allocated request ring buffer in units of
+ *      machine pages.  The value must be a power of 2.
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-persistent
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *      Notes: 7, 8, 9
+ *
+ *      A value of "1" indicates that the frontend will reuse the same grants
+ *      for all transactions, allowing the backend to map them with write
+ *      access (even when it should be read-only). If the frontend hits the
+ *      maximum number of allowed persistently mapped grants, it can fallback
+ *      to non persistent mode. This will cause a performance degradation,
+ *      since the the backend driver will still try to map those grants
+ *      persistently. Since the persistent grants protocol is compatible with
+ *      the previous protocol, a frontend driver can choose to work in
+ *      persistent mode even when the backend doesn't support it.
+ *
+ *      It is recommended that the frontend driver stores the persistently
+ *      mapped grants in a LIFO queue, so a subset of all persistently mapped
+ *      grants gets used commonly. This is done in case the backend driver
+ *      decides to limit the maximum number of persistently mapped grants
+ *      to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ *
+ * feature-large-sector-size
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the frontend will correctly supply and
+ *      interpret all sector-based quantities in terms of the "sector-size"
+ *      value supplied in the backend info, whatever that may be set to.
+ *      If this node is not present or its value is "0" then it is assumed
+ *      that the frontend requires that the logical block size is 512 as it
+ *      is hardcoded (which is the case in some frontend implementations).
+ *
+ * trusted
+ *      Values:         0/1 (boolean)
+ *      Default value:  1
+ *
+ *      A value of "0" indicates that the frontend should not trust the
+ *      backend, and should deploy whatever measures available to protect from
+ *      a malicious backend on the other end.
+ *
+ *------------------------- Virtual Device Properties -------------------------
+ *
+ * device-type
+ *      Values:         "disk", "cdrom", "floppy", etc.
+ *
+ * virtual-device
+ *      Values:         <uint32_t>
+ *
+ *      A value indicating the physical device to virtualize within the
+ *      frontend's domain.  (e.g. "The first ATA disk", "The third SCSI
+ *      disk", etc.)
+ *
+ *      See docs/misc/vbd-interface.txt for details on the format of this
+ *      value.
+ *
+ * Notes
+ * -----
+ * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
+ *     PV drivers.
+ * (2) Multi-page ring buffer scheme first used in some RedHat distributions
+ *     including a distribution deployed on certain nodes of the Amazon
+ *     EC2 cluster.
+ * (3) Support for multi-page ring buffers was implemented independently,
+ *     in slightly different forms, by both Citrix and RedHat/Amazon.
+ *     For full interoperability, block front and backends should publish
+ *     identical ring parameters, adjusted for unit differences, to the
+ *     XenStore nodes used in both schemes.
+ * (4) Devices that support discard functionality may internally allocate space
+ *     (discardable extents) in units that are larger than the exported logical
+ *     block size. If the backing device has such discardable extents the
+ *     backend should provide both discard-granularity and discard-alignment.
+ *     Providing just one of the two may be considered an error by the frontend.
+ *     Backends supporting discard should include discard-granularity and
+ *     discard-alignment even if it supports discarding individual sectors.
+ *     Frontends should assume discard-alignment == 0 and discard-granularity
+ *     == sector size if these keys are missing.
+ * (5) The discard-alignment parameter allows a physical device to be
+ *     partitioned into virtual devices that do not necessarily begin or
+ *     end on a discardable extent boundary.
+ * (6) When there is only a single page allocated to the request ring,
+ *     'ring-ref' is used to communicate the grant reference for this
+ *     page to the backend.  When using a multi-page ring, the 'ring-ref'
+ *     node is not created.  Instead 'ring-ref0' - 'ring-refN' are used.
+ * (7) When using persistent grants data has to be copied from/to the page
+ *     where the grant is currently mapped. The overhead of doing this copy
+ *     however doesn't suppress the speed improvement of not having to unmap
+ *     the grants.
+ * (8) The frontend driver has to allow the backend driver to map all grants
+ *     with write access, even when they should be mapped read-only, since
+ *     further requests may reuse these grants and require write permissions.
+ * (9) Linux implementation doesn't have a limit on the maximum number of
+ *     grants that can be persistently mapped in the frontend driver, but
+ *     due to the frontent driver implementation it should never be bigger
+ *     than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ *(10) The discard-secure property may be present and will be set to 1 if the
+ *     backing device supports secure discard.
+ *(11) Only used by Linux and NetBSD.
+ */
+
+/*
+ * Multiple hardware queues/rings:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vbd, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues" with the number they wish to use, which must be
+ * greater than zero, and no more than the value reported by the backend in
+ * "multi-queue-max-queues".
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel and ring-ref keys, instead writing those keys under sub-keys
+ * having the name "queue-N" where N is the integer ID of the queue/ring for
+ * which those keys belong. Queues are indexed from zero.
+ * For example, a frontend with two queues must write the following set of
+ * queue-related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ * It is also possible to use multiple queues/rings together with
+ * feature multi-page ring buffer.
+ * For example, a frontend requests two queues/rings and the size of each ring
+ * buffer is two pages must write the following set of related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/ring-page-order = "1"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ */
+
+/*
+ * STATE DIAGRAMS
+ *
+ *****************************************************************************
+ *                                   Startup                                 *
+ *****************************************************************************
+ *
+ * Tool stack creates front and back nodes with state XenbusStateInitialising.
+ *
+ * Front                                Back
+ * =================================    =====================================
+ * XenbusStateInitialising              XenbusStateInitialising
+ *  o Query virtual device               o Query backend device identification
+ *    properties.                          data.
+ *  o Setup OS device instance.          o Open and validate backend device.
+ *                                       o Publish backend features and
+ *                                         transport parameters.
+ *                                                      |
+ *                                                      |
+ *                                                      V
+ *                                      XenbusStateInitWait
+ *
+ * o Query backend features and
+ *   transport parameters.
+ * o Allocate and initialize the
+ *   request ring.
+ * o Publish transport parameters
+ *   that will be in effect during
+ *   this connection.
+ *              |
+ *              |
+ *              V
+ * XenbusStateInitialised
+ *
+ *                                       o Query frontend transport parameters.
+ *                                       o Connect to the request ring and
+ *                                         event channel.
+ *                                       o Publish backend device properties.
+ *                                                      |
+ *                                                      |
+ *                                                      V
+ *                                      XenbusStateConnected
+ *
+ *  o Query backend device properties.
+ *  o Finalize OS virtual device
+ *    instance.
+ *              |
+ *              |
+ *              V
+ * XenbusStateConnected
+ *
+ * Note: Drivers that do not support any optional features, or the negotiation
+ *       of transport parameters, can skip certain states in the state machine:
+ *
+ *       o A frontend may transition to XenbusStateInitialised without
+ *         waiting for the backend to enter XenbusStateInitWait.  In this
+ *         case, default transport parameters are in effect and any
+ *         transport parameters published by the frontend must contain
+ *         their default values.
+ *
+ *       o A backend may transition to XenbusStateInitialised, bypassing
+ *         XenbusStateInitWait, without waiting for the frontend to first
+ *         enter the XenbusStateInitialised state.  In this case, default
+ *         transport parameters are in effect and any transport parameters
+ *         published by the backend must contain their default values.
+ *
+ *       Drivers that support optional features and/or transport parameter
+ *       negotiation must tolerate these additional state transition paths.
+ *       In general this means performing the work of any skipped state
+ *       transition, if it has not already been performed, in addition to the
+ *       work associated with entry into the current state.
+ */
+
+/*
+ * REQUEST CODES.
+ */
+#define BLKIF_OP_READ              0
+#define BLKIF_OP_WRITE             1
+/*
+ * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER
+ * operation code ("barrier request") must be completed prior to the
+ * execution of the barrier request.  All writes issued after the barrier
+ * request must not execute until after the completion of the barrier request.
+ *
+ * Optional.  See "feature-barrier" XenBus node documentation above.
+ */
+#define BLKIF_OP_WRITE_BARRIER     2
+/*
+ * Commit any uncommitted contents of the backing device's volatile cache
+ * to stable storage.
+ *
+ * Optional.  See "feature-flush-cache" XenBus node documentation above.
+ */
+#define BLKIF_OP_FLUSH_DISKCACHE   3
+/*
+ * Used in SLES sources for device specific command packet
+ * contained within the request. Reserved for that purpose.
+ */
+#define BLKIF_OP_RESERVED_1        4
+/*
+ * Indicate to the backend device that a region of storage is no longer in
+ * use, and may be discarded at any time without impact to the client.  If
+ * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the
+ * discarded region on the device must be rendered unrecoverable before the
+ * command returns.
+ *
+ * This operation is analogous to performing a trim (ATA) or unamp (SCSI),
+ * command on a native device.
+ *
+ * More information about trim/unmap operations can be found at:
+ * http://t13.org/Documents/UploadedDocuments/docs2008/
+ *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
+ * http://www.seagate.com/staticfiles/support/disc/manuals/
+ *     Interface%20manuals/100293068c.pdf
+ *
+ * Optional.  See "feature-discard", "discard-alignment",
+ * "discard-granularity", and "discard-secure" in the XenBus node
+ * documentation above.
+ */
+#define BLKIF_OP_DISCARD           5
+
+/*
+ * Recognized if "feature-max-indirect-segments" in present in the backend
+ * xenbus info. The "feature-max-indirect-segments" node contains the maximum
+ * number of segments allowed by the backend per request. If the node is
+ * present, the frontend might use blkif_request_indirect structs in order to
+ * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
+ * maximum number of indirect segments is fixed by the backend, but the
+ * frontend can issue requests with any number of indirect segments as long as
+ * it's less than the number provided by the backend. The indirect_grefs field
+ * in blkif_request_indirect should be filled by the frontend with the
+ * grant references of the pages that are holding the indirect segments.
+ * These pages are filled with an array of blkif_request_segment that hold the
+ * information about the segments. The number of indirect pages to use is
+ * determined by the number of segments an indirect request contains. Every
+ * indirect page can contain a maximum of
+ * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
+ * calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
+ *
+ * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
+ * create the "feature-max-indirect-segments" node!
+ */
+#define BLKIF_OP_INDIRECT          6
+
+/*
+ * Maximum scatter/gather segments per request.
+ * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
+ * NB. This could be 12 if the ring indexes weren't stored in the same page.
+ */
+#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+
+/*
+ * Maximum number of indirect pages to use per request.
+ */
+#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
+
+/*
+ * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as
+ * 'sector_number' in blkif_request, blkif_request_discard and
+ * blkif_request_indirect are sector-based quantities. See the description
+ * of the "feature-large-sector-size" frontend xenbus node above for
+ * more information.
+ */
+struct blkif_request_segment {
+    grant_ref_t gref;        /* reference to I/O buffer frame        */
+    /* @first_sect: first sector in frame to transfer (inclusive).   */
+    /* @last_sect: last sector in frame to transfer (inclusive).     */
+    uint8_t     first_sect, last_sect;
+};
+
+/*
+ * Starting ring element for any I/O request.
+ */
+struct blkif_request {
+    uint8_t        operation;    /* BLKIF_OP_???                         */
+    uint8_t        nr_segments;  /* number of segments                   */
+    blkif_vdev_t   handle;       /* only for read/write requests         */
+    uint64_t       id;           /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+typedef struct blkif_request blkif_request_t;
+
+/*
+ * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD
+ * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
+ */
+struct blkif_request_discard {
+    uint8_t        operation;    /* BLKIF_OP_DISCARD                     */
+    uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
+#define BLKIF_DISCARD_SECURE (1<<0)  /* ignored if discard-secure=0      */
+    blkif_vdev_t   handle;       /* same as for read/write requests      */
+    uint64_t       id;           /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;/* start sector idx on disk             */
+    uint64_t       nr_sectors;   /* number of contiguous sectors to discard*/
+};
+typedef struct blkif_request_discard blkif_request_discard_t;
+
+struct blkif_request_indirect {
+    uint8_t        operation;    /* BLKIF_OP_INDIRECT                    */
+    uint8_t        indirect_op;  /* BLKIF_OP_{READ/WRITE}                */
+    uint16_t       nr_segments;  /* number of segments                   */
+    uint64_t       id;           /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+    blkif_vdev_t   handle;       /* same as for read/write requests      */
+    grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+#ifdef __i386__
+    uint64_t       pad;          /* Make it 64 byte aligned on i386      */
+#endif
+};
+typedef struct blkif_request_indirect blkif_request_indirect_t;
+
+struct blkif_response {
+    uint64_t        id;              /* copied from request */
+    uint8_t         operation;       /* copied from request */
+    int16_t         status;          /* BLKIF_RSP_???       */
+};
+typedef struct blkif_response blkif_response_t;
+
+/*
+ * STATUS RETURN CODES.
+ */
+ /* Operation not supported (only happens on barrier writes). */
+#define BLKIF_RSP_EOPNOTSUPP  -2
+ /* Operation failed for some unspecified reason (-EIO). */
+#define BLKIF_RSP_ERROR       -1
+ /* Operation completed successfully. */
+#define BLKIF_RSP_OKAY         0
+
+/*
+ * Generate blkif ring structures and types.
+ */
+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
+
+#define VDISK_CDROM        0x1
+#define VDISK_REMOVABLE    0x2
+#define VDISK_READONLY     0x4
+
+#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/io/console.h b/include/hw/xen/interface/io/console.h
new file mode 100644 (file)
index 0000000..4509b4b
--- /dev/null
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * console.h
+ *
+ * Console I/O interface for Xen guest OSes.
+ *
+ * Copyright (c) 2005, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
+#define __XEN_PUBLIC_IO_CONSOLE_H__
+
+typedef uint32_t XENCONS_RING_IDX;
+
+#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1))
+
+struct xencons_interface {
+    char in[1024];
+    char out[2048];
+    XENCONS_RING_IDX in_cons, in_prod;
+    XENCONS_RING_IDX out_cons, out_prod;
+};
+
+#ifdef XEN_WANT_FLEX_CONSOLE_RING
+#include "ring.h"
+DEFINE_XEN_FLEX_RING(xencons);
+#endif
+
+#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/io/fbif.h b/include/hw/xen/interface/io/fbif.h
new file mode 100644 (file)
index 0000000..93c7319
--- /dev/null
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * fbif.h -- Xen virtual frame buffer device
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_FBIF_H__
+#define __XEN_PUBLIC_IO_FBIF_H__
+
+/* Out events (frontend -> backend) */
+
+/*
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ */
+
+/* Event type 1 currently not used */
+/*
+ * Framebuffer update notification event
+ * Capable frontend sets feature-update in xenstore.
+ * Backend requests it by setting request-update in xenstore.
+ */
+#define XENFB_TYPE_UPDATE 2
+
+struct xenfb_update
+{
+    uint8_t type;    /* XENFB_TYPE_UPDATE */
+    int32_t x;      /* source x */
+    int32_t y;      /* source y */
+    int32_t width;  /* rect width */
+    int32_t height; /* rect height */
+};
+
+/*
+ * Framebuffer resize notification event
+ * Capable backend sets feature-resize in xenstore.
+ */
+#define XENFB_TYPE_RESIZE 3
+
+struct xenfb_resize
+{
+    uint8_t type;    /* XENFB_TYPE_RESIZE */
+    int32_t width;   /* width in pixels */
+    int32_t height;  /* height in pixels */
+    int32_t stride;  /* stride in bytes */
+    int32_t depth;   /* depth in bits */
+    int32_t offset;  /* offset of the framebuffer in bytes */
+};
+
+#define XENFB_OUT_EVENT_SIZE 40
+
+union xenfb_out_event
+{
+    uint8_t type;
+    struct xenfb_update update;
+    struct xenfb_resize resize;
+    char pad[XENFB_OUT_EVENT_SIZE];
+};
+
+/* In events (backend -> frontend) */
+
+/*
+ * Frontends should ignore unknown in events.
+ */
+
+/*
+ * Framebuffer refresh period advice
+ * Backend sends it to advise the frontend their preferred period of
+ * refresh.  Frontends that keep the framebuffer constantly up-to-date
+ * just ignore it.  Frontends that use the advice should immediately
+ * refresh the framebuffer (and send an update notification event if
+ * those have been requested), then use the update frequency to guide
+ * their periodical refreshs.
+ */
+#define XENFB_TYPE_REFRESH_PERIOD 1
+#define XENFB_NO_REFRESH 0
+
+struct xenfb_refresh_period
+{
+    uint8_t type;    /* XENFB_TYPE_UPDATE_PERIOD */
+    uint32_t period; /* period of refresh, in ms,
+                      * XENFB_NO_REFRESH if no refresh is needed */
+};
+
+#define XENFB_IN_EVENT_SIZE 40
+
+union xenfb_in_event
+{
+    uint8_t type;
+    struct xenfb_refresh_period refresh_period;
+    char pad[XENFB_IN_EVENT_SIZE];
+};
+
+/* shared page */
+
+#define XENFB_IN_RING_SIZE 1024
+#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)
+#define XENFB_IN_RING_OFFS 1024
+#define XENFB_IN_RING(page) \
+    ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
+#define XENFB_IN_RING_REF(page, idx) \
+    (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])
+
+#define XENFB_OUT_RING_SIZE 2048
+#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)
+#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)
+#define XENFB_OUT_RING(page) \
+    ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
+#define XENFB_OUT_RING_REF(page, idx) \
+    (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])
+
+struct xenfb_page
+{
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
+
+    int32_t width;          /* the width of the framebuffer (in pixels) */
+    int32_t height;         /* the height of the framebuffer (in pixels) */
+    uint32_t line_length;   /* the length of a row of pixels (in bytes) */
+    uint32_t mem_length;    /* the length of the framebuffer (in bytes) */
+    uint8_t depth;          /* the depth of a pixel (in bits) */
+
+    /*
+     * Framebuffer page directory
+     *
+     * Each directory page holds PAGE_SIZE / sizeof(*pd)
+     * framebuffer pages, and can thus map up to PAGE_SIZE *
+     * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
+     * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs
+     * 64 bit.  256 directories give enough room for a 512 Meg
+     * framebuffer with a max resolution of 12,800x10,240.  Should
+     * be enough for a while with room leftover for expansion.
+     */
+    unsigned long pd[256];
+};
+
+/*
+ * Wart: xenkbd needs to know default resolution.  Put it here until a
+ * better solution is found, but don't leak it to the backend.
+ */
+#ifdef __KERNEL__
+#define XENFB_WIDTH 800
+#define XENFB_HEIGHT 600
+#define XENFB_DEPTH 32
+#endif
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/io/kbdif.h b/include/hw/xen/interface/io/kbdif.h
new file mode 100644 (file)
index 0000000..4bde6b3
--- /dev/null
@@ -0,0 +1,559 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * kbdif.h -- Xen virtual keyboard/mouse
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_KBDIF_H__
+#define __XEN_PUBLIC_IO_KBDIF_H__
+
+/*
+ *****************************************************************************
+ *                     Feature and Parameter Negotiation
+ *****************************************************************************
+ *
+ * The two halves of a para-virtual driver utilize nodes within
+ * XenStore to communicate capabilities and to negotiate operating parameters.
+ * This section enumerates these nodes which reside in the respective front and
+ * backend portions of XenStore, following XenBus convention.
+ *
+ * All data in XenStore is stored as strings.  Nodes specifying numeric
+ * values are encoded in decimal. Integer value ranges listed below are
+ * expressed as fixed sized integer types capable of storing the conversion
+ * of a properly formated node string, without loss of information.
+ *
+ *****************************************************************************
+ *                            Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *---------------------------- Features supported ----------------------------
+ *
+ * Capable backend advertises supported features by publishing
+ * corresponding entries in XenStore and puts 1 as the value of the entry.
+ * If a feature is not supported then 0 must be set or feature entry omitted.
+ *
+ * feature-disable-keyboard
+ *      Values:         <uint>
+ *
+ *      If there is no need to expose a virtual keyboard device by the
+ *      frontend then this must be set to 1.
+ *
+ * feature-disable-pointer
+ *      Values:         <uint>
+ *
+ *      If there is no need to expose a virtual pointer device by the
+ *      frontend then this must be set to 1.
+ *
+ * feature-abs-pointer
+ *      Values:         <uint>
+ *
+ *      Backends, which support reporting of absolute coordinates for pointer
+ *      device should set this to 1.
+ *
+ * feature-multi-touch
+ *      Values:         <uint>
+ *
+ *      Backends, which support reporting of multi-touch events
+ *      should set this to 1.
+ *
+ * feature-raw-pointer
+ *      Values:        <uint>
+ *
+ *      Backends, which support reporting raw (unscaled) absolute coordinates
+ *      for pointer devices should set this to 1. Raw (unscaled) values have
+ *      a range of [0, 0x7fff].
+ *
+ *-----------------------  Device Instance Parameters ------------------------
+ *
+ * unique-id
+ *      Values:         <string>
+ *
+ *      After device instance initialization it is assigned a unique ID,
+ *      so every instance of the frontend can be identified by the backend
+ *      by this ID. This can be UUID or such.
+ *
+ *------------------------- Pointer Device Parameters ------------------------
+ *
+ * width
+ *      Values:         <uint>
+ *
+ *      Maximum X coordinate (width) to be used by the frontend
+ *      while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ * height
+ *      Values:         <uint>
+ *
+ *      Maximum Y coordinate (height) to be used by the frontend
+ *      while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ *----------------------- Multi-touch Device Parameters ----------------------
+ *
+ * multi-touch-num-contacts
+ *      Values:         <uint>
+ *
+ *      Number of simultaneous touches reported.
+ *
+ * multi-touch-width
+ *      Values:         <uint>
+ *
+ *      Width of the touch area to be used by the frontend
+ *      while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ * multi-touch-height
+ *      Values:         <uint>
+ *
+ *      Height of the touch area to be used by the frontend
+ *      while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ *****************************************************************************
+ *                            Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------------------ Feature request -----------------------------
+ *
+ * Capable frontend requests features from backend via setting corresponding
+ * entries to 1 in XenStore. Requests for features not advertised as supported
+ * by the backend have no effect.
+ *
+ * request-abs-pointer
+ *      Values:         <uint>
+ *
+ *      Request backend to report absolute pointer coordinates
+ *      (XENKBD_TYPE_POS) instead of relative ones (XENKBD_TYPE_MOTION).
+ *
+ * request-multi-touch
+ *      Values:         <uint>
+ *
+ *      Request backend to report multi-touch events.
+ *
+ * request-raw-pointer
+ *      Values:         <uint>
+ *
+ *      Request backend to report raw unscaled absolute pointer coordinates.
+ *      This option is only valid if request-abs-pointer is also set.
+ *      Raw unscaled coordinates have the range [0, 0x7fff]
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ *      Values:         <uint>
+ *
+ *      The identifier of the Xen event channel used to signal activity
+ *      in the ring buffer.
+ *
+ * page-gref
+ *      Values:         <uint>
+ *
+ *      The Xen grant reference granting permission for the backend to map
+ *      a sole page in a single page sized event ring buffer.
+ *
+ * page-ref
+ *      Values:         <uint>
+ *
+ *      OBSOLETE, not recommended for use.
+ *      PFN of the shared page.
+ */
+
+/*
+ * EVENT CODES.
+ */
+
+#define XENKBD_TYPE_MOTION             1
+#define XENKBD_TYPE_RESERVED           2
+#define XENKBD_TYPE_KEY                3
+#define XENKBD_TYPE_POS                4
+#define XENKBD_TYPE_MTOUCH             5
+
+/* Multi-touch event sub-codes */
+
+#define XENKBD_MT_EV_DOWN              0
+#define XENKBD_MT_EV_UP                1
+#define XENKBD_MT_EV_MOTION            2
+#define XENKBD_MT_EV_SYN               3
+#define XENKBD_MT_EV_SHAPE             4
+#define XENKBD_MT_EV_ORIENT            5
+
+/*
+ * CONSTANTS, XENSTORE FIELD AND PATH NAME STRINGS, HELPERS.
+ */
+
+#define XENKBD_DRIVER_NAME             "vkbd"
+
+#define XENKBD_FIELD_FEAT_DSBL_KEYBRD  "feature-disable-keyboard"
+#define XENKBD_FIELD_FEAT_DSBL_POINTER "feature-disable-pointer"
+#define XENKBD_FIELD_FEAT_ABS_POINTER  "feature-abs-pointer"
+#define XENKBD_FIELD_FEAT_RAW_POINTER  "feature-raw-pointer"
+#define XENKBD_FIELD_FEAT_MTOUCH       "feature-multi-touch"
+#define XENKBD_FIELD_REQ_ABS_POINTER   "request-abs-pointer"
+#define XENKBD_FIELD_REQ_RAW_POINTER   "request-raw-pointer"
+#define XENKBD_FIELD_REQ_MTOUCH        "request-multi-touch"
+#define XENKBD_FIELD_RING_GREF         "page-gref"
+#define XENKBD_FIELD_EVT_CHANNEL       "event-channel"
+#define XENKBD_FIELD_WIDTH             "width"
+#define XENKBD_FIELD_HEIGHT            "height"
+#define XENKBD_FIELD_MT_WIDTH          "multi-touch-width"
+#define XENKBD_FIELD_MT_HEIGHT         "multi-touch-height"
+#define XENKBD_FIELD_MT_NUM_CONTACTS   "multi-touch-num-contacts"
+#define XENKBD_FIELD_UNIQUE_ID         "unique-id"
+
+/* OBSOLETE, not recommended for use */
+#define XENKBD_FIELD_RING_REF          "page-ref"
+
+/*
+ *****************************************************************************
+ * Description of the protocol between frontend and backend driver.
+ *****************************************************************************
+ *
+ * The two halves of a Para-virtual driver communicate with
+ * each other using a shared page and an event channel.
+ * Shared page contains a ring with event structures.
+ *
+ * All reserved fields in the structures below must be 0.
+ *
+ *****************************************************************************
+ *                           Backend to frontend events
+ *****************************************************************************
+ *
+ * Frontends should ignore unknown in events.
+ * All event packets have the same length (40 octets)
+ * All event packets have common header:
+ *
+ *          0         octet
+ * +-----------------+
+ * |       type      |
+ * +-----------------+
+ * type - uint8_t, event code, XENKBD_TYPE_???
+ *
+ *
+ * Pointer relative movement event
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MOTION  |                     reserved                     | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                               rel_x                               | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                               rel_y                               | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                               rel_z                               | 16
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * rel_x - int32_t, relative X motion
+ * rel_y - int32_t, relative Y motion
+ * rel_z - int32_t, relative Z motion (wheel)
+ */
+
+struct xenkbd_motion
+{
+    uint8_t type;
+    int32_t rel_x;
+    int32_t rel_y;
+    int32_t rel_z;
+};
+
+/*
+ * Key event (includes pointer buttons)
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_KEY     |     pressed    |            reserved             | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                              keycode                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 12
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * pressed - uint8_t, 1 if pressed; 0 otherwise
+ * keycode - uint32_t, KEY_* from linux/input.h
+ */
+
+struct xenkbd_key
+{
+    uint8_t type;
+    uint8_t pressed;
+    uint32_t keycode;
+};
+
+/*
+ * Pointer absolute position event
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_POS     |                     reserved                     | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_x                               | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_y                               | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                               rel_z                               | 16
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * abs_x - int32_t, absolute X position (in FB pixels)
+ * abs_y - int32_t, absolute Y position (in FB pixels)
+ * rel_z - int32_t, relative Z motion (wheel)
+ */
+
+struct xenkbd_position
+{
+    uint8_t type;
+    int32_t abs_x;
+    int32_t abs_y;
+    int32_t rel_z;
+};
+
+/*
+ * Multi-touch event and its sub-types
+ *
+ * All multi-touch event packets have common header:
+ *
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |   event_type   |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ *
+ * event_type - unt8_t, multi-touch event sub-type, XENKBD_MT_EV_???
+ * contact_id - unt8_t, ID of the contact
+ *
+ * Touch interactions can consist of one or more contacts.
+ * For each contact, a series of events is generated, starting
+ * with a down event, followed by zero or more motion events,
+ * and ending with an up event. Events relating to the same
+ * contact point can be identified by the ID of the sequence: contact ID.
+ * Contact ID may be reused after XENKBD_MT_EV_UP event and
+ * is in the [0; XENKBD_FIELD_NUM_CONTACTS - 1] range.
+ *
+ * For further information please refer to documentation on Wayland [1],
+ * Linux [2] and Windows [3] multi-touch support.
+ *
+ * [1] https://cgit.freedesktop.org/wayland/wayland/tree/protocol/wayland.xml
+ * [2] https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.txt
+ * [3] https://msdn.microsoft.com/en-us/library/jj151564(v=vs.85).aspx
+ *
+ *
+ * Multi-touch down event - sent when a new touch is made: touch is assigned
+ * a unique contact ID, sent with this and consequent events related
+ * to this touch.
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |   _MT_EV_DOWN  |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_x                               | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_y                               | 16
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * abs_x - int32_t, absolute X position, in pixels
+ * abs_y - int32_t, absolute Y position, in pixels
+ *
+ * Multi-touch contact release event
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |  _MT_EV_UP     |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * Multi-touch motion event
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |  _MT_EV_MOTION |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_x                               | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_y                               | 16
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * abs_x - int32_t, absolute X position, in pixels,
+ * abs_y - int32_t, absolute Y position, in pixels,
+ *
+ * Multi-touch input synchronization event - shows end of a set of events
+ * which logically belong together.
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |  _MT_EV_SYN    |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * Multi-touch shape event - touch point's shape has changed its shape.
+ * Shape is approximated by an ellipse through the major and minor axis
+ * lengths: major is the longer diameter of the ellipse and minor is the
+ * shorter one. Center of the ellipse is reported via
+ * XENKBD_MT_EV_DOWN/XENKBD_MT_EV_MOTION events.
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |  _MT_EV_SHAPE  |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                               major                               | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                               minor                               | 16
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * major - unt32_t, length of the major axis, pixels
+ * minor - unt32_t, length of the minor axis, pixels
+ *
+ * Multi-touch orientation event - touch point's shape has changed
+ * its orientation: calculated as a clockwise angle between the major axis
+ * of the ellipse and positive Y axis in degrees, [-180; +180].
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |  _MT_EV_ORIENT |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |           orientation           |            reserved             | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 16
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * orientation - int16_t, clockwise angle of the major axis
+ */
+
+struct xenkbd_mtouch {
+    uint8_t type;            /* XENKBD_TYPE_MTOUCH */
+    uint8_t event_type;      /* XENKBD_MT_EV_??? */
+    uint8_t contact_id;
+    uint8_t reserved[5];     /* reserved for the future use */
+    union {
+        struct {
+            int32_t abs_x;   /* absolute X position, pixels */
+            int32_t abs_y;   /* absolute Y position, pixels */
+        } pos;
+        struct {
+            uint32_t major;  /* length of the major axis, pixels */
+            uint32_t minor;  /* length of the minor axis, pixels */
+        } shape;
+        int16_t orientation; /* clockwise angle of the major axis */
+    } u;
+};
+
+#define XENKBD_IN_EVENT_SIZE 40
+
+union xenkbd_in_event
+{
+    uint8_t type;
+    struct xenkbd_motion motion;
+    struct xenkbd_key key;
+    struct xenkbd_position pos;
+    struct xenkbd_mtouch mtouch;
+    char pad[XENKBD_IN_EVENT_SIZE];
+};
+
+/*
+ *****************************************************************************
+ *                            Frontend to backend events
+ *****************************************************************************
+ *
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ * No out events currently defined.
+
+ * All event packets have the same length (40 octets)
+ * All event packets have common header:
+ *          0         octet
+ * +-----------------+
+ * |       type      |
+ * +-----------------+
+ * type - uint8_t, event code
+ */
+
+#define XENKBD_OUT_EVENT_SIZE 40
+
+union xenkbd_out_event
+{
+    uint8_t type;
+    char pad[XENKBD_OUT_EVENT_SIZE];
+};
+
+/*
+ *****************************************************************************
+ *                            Shared page
+ *****************************************************************************
+ */
+
+#define XENKBD_IN_RING_SIZE 2048
+#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
+#define XENKBD_IN_RING_OFFS 1024
+#define XENKBD_IN_RING(page) \
+    ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
+#define XENKBD_IN_RING_REF(page, idx) \
+    (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])
+
+#define XENKBD_OUT_RING_SIZE 1024
+#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)
+#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)
+#define XENKBD_OUT_RING(page) \
+    ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
+#define XENKBD_OUT_RING_REF(page, idx) \
+    (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])
+
+struct xenkbd_page
+{
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
+};
+
+#endif /* __XEN_PUBLIC_IO_KBDIF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/io/netif.h b/include/hw/xen/interface/io/netif.h
new file mode 100644 (file)
index 0000000..c13b850
--- /dev/null
@@ -0,0 +1,1091 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * netif.h
+ *
+ * Unified network-device I/O interface for Xen guest OSes.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_NETIF_H__
+#define __XEN_PUBLIC_IO_NETIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Older implementation of Xen network frontend / backend has an
+ * implicit dependency on the MAX_SKB_FRAGS as the maximum number of
+ * ring slots a skb can use. Netfront / netback may not work as
+ * expected when frontend and backend have different MAX_SKB_FRAGS.
+ *
+ * A better approach is to add mechanism for netfront / netback to
+ * negotiate this value. However we cannot fix all possible
+ * frontends, so we need to define a value which states the minimum
+ * slots backend must support.
+ *
+ * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS
+ * (18), which is proved to work with most frontends. Any new backend
+ * which doesn't negotiate with frontend should expect frontend to
+ * send a valid packet using slots up to this value.
+ */
+#define XEN_NETIF_NR_SLOTS_MIN 18
+
+/*
+ * Notifications after enqueuing any type of message should be conditional on
+ * the appropriate req_event or rsp_event field in the shared ring.
+ * If the client sends notification for rx requests then it should specify
+ * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume
+ * that it cannot safely queue packets (as it may not be kicked to send them).
+ */
+
+/*
+ * "feature-split-event-channels" is introduced to separate guest TX
+ * and RX notification. Backend either doesn't support this feature or
+ * advertises it via xenstore as 0 (disabled) or 1 (enabled).
+ *
+ * To make use of this feature, frontend should allocate two event
+ * channels for TX and RX, advertise them to backend as
+ * "event-channel-tx" and "event-channel-rx" respectively. If frontend
+ * doesn't want to use this feature, it just writes "event-channel"
+ * node as before.
+ */
+
+/*
+ * Multiple transmit and receive queues:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vif, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues", set to the number they wish to use, which
+ * must be greater than zero, and no more than the value reported by the backend
+ * in "multi-queue-max-queues".
+ *
+ * Queues replicate the shared rings and event channels.
+ * "feature-split-event-channels" may optionally be used when using
+ * multiple queues, but is not mandatory.
+ *
+ * Each queue consists of one shared ring pair, i.e. there must be the same
+ * number of tx and rx rings.
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys,
+ * instead writing those keys under sub-keys having the name "queue-N" where
+ * N is the integer ID of the queue for which those keys belong. Queues
+ * are indexed from zero. For example, a frontend with two queues and split
+ * event channels must write the following set of queue-related keys:
+ *
+ * /local/domain/1/device/vif/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vif/0/queue-0 = ""
+ * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>"
+ * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>"
+ * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>"
+ * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>"
+ * /local/domain/1/device/vif/0/queue-1 = ""
+ * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>"
+ * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1"
+ * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>"
+ * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>"
+ *
+ * If there is any inconsistency in the XenStore data, the backend may
+ * choose not to connect any queues, instead treating the request as an
+ * error. This includes scenarios where more (or fewer) queues were
+ * requested than the frontend provided details for.
+ *
+ * Mapping of packets to queues is considered to be a function of the
+ * transmitting system (backend or frontend) and is not negotiated
+ * between the two. Guests are free to transmit packets on any queue
+ * they choose, provided it has been set up correctly. Guests must be
+ * prepared to receive packets on any queue they have requested be set up.
+ */
+
+/*
+ * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum
+ * offload off or on. If it is missing then the feature is assumed to be on.
+ * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum
+ * offload on or off. If it is missing then the feature is assumed to be off.
+ */
+
+/*
+ * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to
+ * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither
+ * frontends nor backends are assumed to be capable unless the flags are
+ * present.
+ */
+
+/*
+ * "feature-multicast-control" and "feature-dynamic-multicast-control"
+ * advertise the capability to filter ethernet multicast packets in the
+ * backend. If the frontend wishes to take advantage of this feature then
+ * it may set "request-multicast-control". If the backend only advertises
+ * "feature-multicast-control" then "request-multicast-control" must be set
+ * before the frontend moves into the connected state. The backend will
+ * sample the value on this state transition and any subsequent change in
+ * value will have no effect. However, if the backend also advertises
+ * "feature-dynamic-multicast-control" then "request-multicast-control"
+ * may be set by the frontend at any time. In this case, the backend will
+ * watch the value and re-sample on watch events.
+ *
+ * If the sampled value of "request-multicast-control" is set then the
+ * backend transmit side should no longer flood multicast packets to the
+ * frontend, it should instead drop any multicast packet that does not
+ * match in a filter list.
+ * The list is amended by the frontend by sending dummy transmit requests
+ * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as
+ * specified below.
+ * Note that the filter list may be amended even if the sampled value of
+ * "request-multicast-control" is not set, however the filter should only
+ * be applied if it is set.
+ */
+
+/*
+ * The setting of "trusted" node to "0" in the frontend path signals that the
+ * frontend should not trust the backend, and should deploy whatever measures
+ * available to protect from a malicious backend on the other end.
+ */
+
+/*
+ * Control ring
+ * ============
+ *
+ * Some features, such as hashing (detailed below), require a
+ * significant amount of out-of-band data to be passed from frontend to
+ * backend. Use of xenstore is not suitable for large quantities of data
+ * because of quota limitations and so a dedicated 'control ring' is used.
+ * The ability of the backend to use a control ring is advertised by
+ * setting:
+ *
+ * /local/domain/X/backend/vif/<domid>/<vif>/feature-ctrl-ring = "1"
+ *
+ * The frontend provides a control ring to the backend by setting:
+ *
+ * /local/domain/<domid>/device/vif/<vif>/ctrl-ring-ref = <gref>
+ * /local/domain/<domid>/device/vif/<vif>/event-channel-ctrl = <port>
+ *
+ * where <gref> is the grant reference of the shared page used to
+ * implement the control ring and <port> is an event channel to be used
+ * as a mailbox interrupt. These keys must be set before the frontend
+ * moves into the connected state.
+ *
+ * The control ring uses a fixed request/response message size and is
+ * balanced (i.e. one request to one response), so operationally it is much
+ * the same as a transmit or receive ring.
+ * Note that there is no requirement that responses are issued in the same
+ * order as requests.
+ */
+
+/*
+ * Link state
+ * ==========
+ *
+ * The backend can advertise its current link (carrier) state to the
+ * frontend using the /local/domain/X/backend/vif/<domid>/<vif>/carrier
+ * node. If this node is not present, then the frontend should assume that
+ * the link is up (for compatibility with backends that do not implement
+ * this feature). If this node is present, then a value of "0" should be
+ * interpreted by the frontend as the link being down (no carrier) and a
+ * value of "1" should be interpreted as the link being up (carrier
+ * present).
+ */
+
+/*
+ * MTU
+ * ===
+ *
+ * The toolstack may set a value of MTU for the frontend by setting the
+ * /local/domain/<domid>/device/vif/<vif>/mtu node with the MTU value in
+ * octets. If this node is absent the frontend should assume an MTU value
+ * of 1500 octets. A frontend is also at liberty to ignore this value so
+ * it is only suitable for informing the frontend that a packet payload
+ * >1500 octets is permitted.
+ */
+
+/*
+ * Hash types
+ * ==========
+ *
+ * For the purposes of the definitions below, 'Packet[]' is an array of
+ * octets containing an IP packet without options, 'Array[X..Y]' means a
+ * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is
+ * used to indicate concatenation of arrays.
+ */
+
+/*
+ * A hash calculated over an IP version 4 header as follows:
+ *
+ * Buffer[0..8] = Packet[12..15] (source address) +
+ *                Packet[16..19] (destination address)
+ *
+ * Result = Hash(Buffer, 8)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \
+    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4)
+
+/*
+ * A hash calculated over an IP version 4 header and TCP header as
+ * follows:
+ *
+ * Buffer[0..12] = Packet[12..15] (source address) +
+ *                 Packet[16..19] (destination address) +
+ *                 Packet[20..21] (source port) +
+ *                 Packet[22..23] (destination port)
+ *
+ * Result = Hash(Buffer, 12)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \
+    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP)
+
+/*
+ * A hash calculated over an IP version 6 header as follows:
+ *
+ * Buffer[0..32] = Packet[8..23]  (source address ) +
+ *                 Packet[24..39] (destination address)
+ *
+ * Result = Hash(Buffer, 32)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \
+    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6)
+
+/*
+ * A hash calculated over an IP version 6 header and TCP header as
+ * follows:
+ *
+ * Buffer[0..36] = Packet[8..23]  (source address) +
+ *                 Packet[24..39] (destination address) +
+ *                 Packet[40..41] (source port) +
+ *                 Packet[42..43] (destination port)
+ *
+ * Result = Hash(Buffer, 36)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \
+    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP)
+
+/*
+ * Hash algorithms
+ * ===============
+ */
+
+#define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0
+
+/*
+ * Toeplitz hash:
+ */
+
+#define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1
+
+/*
+ * This algorithm uses a 'key' as well as the data buffer itself.
+ * (Buffer[] and Key[] are treated as shift-registers where the MSB of
+ * Buffer/Key[0] is considered 'left-most' and the LSB of Buffer/Key[N-1]
+ * is the 'right-most').
+ *
+ * Value = 0
+ * For number of bits in Buffer[]
+ *    If (left-most bit of Buffer[] is 1)
+ *        Value ^= left-most 32 bits of Key[]
+ *    Key[] << 1
+ *    Buffer[] << 1
+ *
+ * The code below is provided for convenience where an operating system
+ * does not already provide an implementation.
+ */
+#ifdef XEN_NETIF_DEFINE_TOEPLITZ
+static uint32_t xen_netif_toeplitz_hash(const uint8_t *key,
+                                        unsigned int keylen,
+                                        const uint8_t *buf,
+                                        unsigned int buflen)
+{
+    unsigned int keyi, bufi;
+    uint64_t prefix = 0;
+    uint64_t hash = 0;
+
+    /* Pre-load prefix with the first 8 bytes of the key */
+    for (keyi = 0; keyi < 8; keyi++) {
+        prefix <<= 8;
+        prefix |= (keyi < keylen) ? key[keyi] : 0;
+    }
+
+    for (bufi = 0; bufi < buflen; bufi++) {
+        uint8_t byte = buf[bufi];
+        unsigned int bit;
+
+        for (bit = 0; bit < 8; bit++) {
+            if (byte & 0x80)
+                hash ^= prefix;
+            prefix <<= 1;
+            byte <<=1;
+        }
+
+        /*
+         * 'prefix' has now been left-shifted by 8, so
+         * OR in the next byte.
+         */
+        prefix |= (keyi < keylen) ? key[keyi] : 0;
+        keyi++;
+    }
+
+    /* The valid part of the hash is in the upper 32 bits. */
+    return hash >> 32;
+}
+#endif /* XEN_NETIF_DEFINE_TOEPLITZ */
+
+/*
+ * Control requests (struct xen_netif_ctrl_request)
+ * ================================================
+ *
+ * All requests have the following format:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |    id     |   type    |         data[0]       |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |         data[1]       |         data[2]       |
+ * +-----+-----+-----+-----+-----------------------+
+ *
+ * id: the request identifier, echoed in response.
+ * type: the type of request (see below)
+ * data[]: any data associated with the request (determined by type)
+ */
+
+struct xen_netif_ctrl_request {
+    uint16_t id;
+    uint16_t type;
+
+#define XEN_NETIF_CTRL_TYPE_INVALID               0
+#define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS        1
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS        2
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY          3
+#define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING      6
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM    7
+#define XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE 8
+#define XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING      9
+#define XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING     10
+
+    uint32_t data[3];
+};
+
+/*
+ * Control responses (struct xen_netif_ctrl_response)
+ * ==================================================
+ *
+ * All responses have the following format:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |    id     |   type    |         status        |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |         data          |
+ * +-----+-----+-----+-----+
+ *
+ * id: the corresponding request identifier
+ * type: the type of the corresponding request
+ * status: the status of request processing
+ * data: any data associated with the response (determined by type and
+ *       status)
+ */
+
+struct xen_netif_ctrl_response {
+    uint16_t id;
+    uint16_t type;
+    uint32_t status;
+
+#define XEN_NETIF_CTRL_STATUS_SUCCESS           0
+#define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     1
+#define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2
+#define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   3
+
+    uint32_t data;
+};
+
+/*
+ * Static Grants (struct xen_netif_gref)
+ * =====================================
+ *
+ * A frontend may provide a fixed set of grant references to be mapped on
+ * the backend. The message of type XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ * prior its usage in the command ring allows for creation of these mappings.
+ * The backend will maintain a fixed amount of these mappings.
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE lets a frontend query how many
+ * of these mappings can be kept.
+ *
+ * Each entry in the XEN_NETIF_CTRL_TYPE_{ADD,DEL}_GREF_MAPPING input table has
+ * the following format:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | grant ref             |  flags    |  status   |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * grant ref: grant reference (IN)
+ * flags: flags describing the control operation (IN)
+ * status: XEN_NETIF_CTRL_STATUS_* (OUT)
+ *
+ * 'status' is an output parameter which does not require to be set to zero
+ * prior to its usage in the corresponding control messages.
+ */
+
+struct xen_netif_gref {
+       grant_ref_t ref;
+       uint16_t flags;
+
+#define _XEN_NETIF_CTRLF_GREF_readonly    0
+#define XEN_NETIF_CTRLF_GREF_readonly    (1U<<_XEN_NETIF_CTRLF_GREF_readonly)
+
+       uint16_t status;
+};
+
+/*
+ * Control messages
+ * ================
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
+ * --------------------------------------
+ *
+ * This is sent by the frontend to set the desired hash algorithm.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
+ *  data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *
+ * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables
+ *       hashing and the backend is free to choose how it steers packets
+ *       to queues (which is the default behaviour).
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
+ * ----------------------------------
+ *
+ * This is sent by the frontend to query the types of hash supported by
+ * the backend.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
+ *  data[0] = 0
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS       - Operation successful
+ *  data   = supported hash types (if operation was successful)
+ *
+ * NOTE: A valid hash algorithm must be selected before this operation can
+ *       succeed.
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
+ * ----------------------------------
+ *
+ * This is sent by the frontend to set the types of hash that the backend
+ * should calculate. (See above for hash type definitions).
+ * Note that the 'maximal' type of hash should always be chosen. For
+ * example, if the frontend sets both IPV4 and IPV4_TCP hash types then
+ * the latter hash type should be calculated for any TCP packet and the
+ * former only calculated for non-TCP packets.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
+ *  data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag
+ *                                                     value is invalid or
+ *                                                     unsupported
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = 0
+ *
+ * NOTE: A valid hash algorithm must be selected before this operation can
+ *       succeed.
+ *       Also, setting data[0] to zero disables hashing and the backend
+ *       is free to choose how it steers packets to queues.
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
+ * --------------------------------
+ *
+ * This is sent by the frontend to set the key of the hash if the algorithm
+ * requires it. (See hash algorithms above).
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
+ *  data[0] = grant reference of page containing the key (assumed to
+ *            start at beginning of grant)
+ *  data[1] = size of key in octets
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid
+ *           XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   - Key size is larger
+ *                                                     than the backend
+ *                                                     supports
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = 0
+ *
+ * NOTE: Any key octets not specified are assumed to be zero (the key
+ *       is assumed to be empty by default) and specifying a new key
+ *       invalidates any previous key, hence specifying a key size of
+ *       zero will clear the key (which ensures that the calculated hash
+ *       will always be zero).
+ *       The maximum size of key is algorithm and backend specific, but
+ *       is also limited by the single grant reference.
+ *       The grant reference may be read-only and must remain valid until
+ *       the response has been processed.
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
+ * -----------------------------------------
+ *
+ * This is sent by the frontend to query the maximum size of mapping
+ * table supported by the backend. The size is specified in terms of
+ * table entries.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
+ *  data[0] = 0
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS       - Operation successful
+ *  data   = maximum number of entries allowed in the mapping table
+ *           (if operation was successful) or zero if a mapping table is
+ *           not supported (i.e. hash mapping is done only by modular
+ *           arithmetic).
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
+ * -------------------------------------
+ *
+ * This is sent by the frontend to set the actual size of the mapping
+ * table to be used by the backend. The size is specified in terms of
+ * table entries.
+ * Any previous table is invalidated by this message and any new table
+ * is assumed to be zero filled.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
+ *  data[0] = number of entries in mapping table
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = 0
+ *
+ * NOTE: Setting data[0] to 0 means that hash mapping should be done
+ *       using modular arithmetic.
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
+ * ------------------------------------
+ *
+ * This is sent by the frontend to set the content of the table mapping
+ * hash value to queue number. The backend should calculate the hash from
+ * the packet header, use it as an index into the table (modulo the size
+ * of the table) and then steer the packet to the queue number found at
+ * that index.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
+ *  data[0] = grant reference of page containing the mapping (sub-)table
+ *            (assumed to start at beginning of grant)
+ *  data[1] = size of (sub-)table in entries
+ *  data[2] = offset, in entries, of sub-table within overall table
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content
+ *                                                     is invalid
+ *           XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   - Table size is larger
+ *                                                     than the backend
+ *                                                     supports
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = 0
+ *
+ * NOTE: The overall table has the following format:
+ *
+ *          0     1     2     3     4     5     6     7  octet
+ *       +-----+-----+-----+-----+-----+-----+-----+-----+
+ *       |       mapping[0]      |       mapping[1]      |
+ *       +-----+-----+-----+-----+-----+-----+-----+-----+
+ *       |                       .                       |
+ *       |                       .                       |
+ *       |                       .                       |
+ *       +-----+-----+-----+-----+-----+-----+-----+-----+
+ *       |      mapping[N-2]     |      mapping[N-1]     |
+ *       +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ *       where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
+ *       message and each  mapping must specifies a queue between 0 and
+ *       "multi-queue-num-queues" (see above).
+ *       The backend may support a mapping table larger than can be
+ *       mapped by a single grant reference. Thus sub-tables within a
+ *       larger table can be individually set by sending multiple messages
+ *       with differing offset values. Specifying a new sub-table does not
+ *       invalidate any table data outside that range.
+ *       The grant reference may be read-only and must remain valid until
+ *       the response has been processed.
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE
+ * -----------------------------------------
+ *
+ * This is sent by the frontend to fetch the number of grefs that can be kept
+ * mapped in the backend.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE
+ *  data[0] = queue index (assumed 0 for single queue)
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The queue index is
+ *                                                     out of range
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = maximum number of entries allowed in the gref mapping table
+ *           (if operation was successful) or zero if it is not supported.
+ *
+ * XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ * ------------------------------------
+ *
+ * This is sent by the frontend for backend to map a list of grant
+ * references.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ *  data[0] = queue index
+ *  data[1] = grant reference of page containing the mapping list
+ *            (r/w and assumed to start at beginning of page)
+ *  data[2] = size of list in entries
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *
+ * NOTE: Each entry in the input table has the format outlined
+ *       in struct xen_netif_gref.
+ *       Contrary to XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING, the struct
+ *       xen_netif_gref 'status' field is not used and therefore the response
+ *       'status' determines the success of this operation. In case of
+ *       failure none of grants mappings get added in the backend.
+ *
+ * XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING
+ * ------------------------------------
+ *
+ * This is sent by the frontend for backend to unmap a list of grant
+ * references.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING
+ *  data[0] = queue index
+ *  data[1] = grant reference of page containing the mapping list
+ *            (r/w and assumed to start at beginning of page)
+ *  data[2] = size of list in entries
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = number of entries that were unmapped
+ *
+ * NOTE: Each entry in the input table has the format outlined in struct
+ *       xen_netif_gref.
+ *       The struct xen_netif_gref 'status' field determines if the entry
+ *       was successfully removed.
+ *       The entries used are only the ones representing grant references that
+ *       were previously the subject of a XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ *       operation. Any other entries will have their status set to
+ *       XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER upon completion.
+ */
+
+DEFINE_RING_TYPES(xen_netif_ctrl,
+                  struct xen_netif_ctrl_request,
+                  struct xen_netif_ctrl_response);
+
+/*
+ * Guest transmit
+ * ==============
+ *
+ * This is the 'wire' format for transmit (frontend -> backend) packets:
+ *
+ *  Fragment 1: netif_tx_request_t  - flags = NETTXF_*
+ *                                    size = total packet size
+ * [Extra 1: netif_extra_info_t]    - (only if fragment 1 flags include
+ *                                     NETTXF_extra_info)
+ *  ...
+ * [Extra N: netif_extra_info_t]    - (only if extra N-1 flags include
+ *                                     XEN_NETIF_EXTRA_MORE)
+ *  ...
+ *  Fragment N: netif_tx_request_t  - (only if fragment N-1 flags include
+ *                                     NETTXF_more_data - flags on preceding
+ *                                     extras are not relevant here)
+ *                                    flags = 0
+ *                                    size = fragment size
+ *
+ * NOTE:
+ *
+ * This format slightly is different from that used for receive
+ * (backend -> frontend) packets. Specifically, in a multi-fragment
+ * packet the actual size of fragment 1 can only be determined by
+ * subtracting the sizes of fragments 2..N from the total packet size.
+ *
+ * Ring slot size is 12 octets, however not all request/response
+ * structs use the full size.
+ *
+ * tx request data (netif_tx_request_t)
+ * ------------------------------------
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | grant ref             | offset    | flags     |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id        | size      |
+ * +-----+-----+-----+-----+
+ *
+ * grant ref: Reference to buffer page.
+ * offset: Offset within buffer page.
+ * flags: NETTXF_*.
+ * id: request identifier, echoed in response.
+ * size: packet size in bytes.
+ *
+ * tx response (netif_tx_response_t)
+ * ---------------------------------
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id        | status    | unused                |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | unused                |
+ * +-----+-----+-----+-----+
+ *
+ * id: reflects id in transmit request
+ * status: NETIF_RSP_*
+ *
+ * Guest receive
+ * =============
+ *
+ * This is the 'wire' format for receive (backend -> frontend) packets:
+ *
+ *  Fragment 1: netif_rx_request_t  - flags = NETRXF_*
+ *                                    size = fragment size
+ * [Extra 1: netif_extra_info_t]    - (only if fragment 1 flags include
+ *                                     NETRXF_extra_info)
+ *  ...
+ * [Extra N: netif_extra_info_t]    - (only if extra N-1 flags include
+ *                                     XEN_NETIF_EXTRA_MORE)
+ *  ...
+ *  Fragment N: netif_rx_request_t  - (only if fragment N-1 flags include
+ *                                     NETRXF_more_data - flags on preceding
+ *                                     extras are not relevant here)
+ *                                    flags = 0
+ *                                    size = fragment size
+ *
+ * NOTE:
+ *
+ * This format slightly is different from that used for transmit
+ * (frontend -> backend) packets. Specifically, in a multi-fragment
+ * packet the size of the packet can only be determined by summing the
+ * sizes of fragments 1..N.
+ *
+ * Ring slot size is 8 octets.
+ *
+ * rx request (netif_rx_request_t)
+ * -------------------------------
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id        | pad       | gref                  |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * id: request identifier, echoed in response.
+ * gref: reference to incoming granted frame.
+ *
+ * rx response (netif_rx_response_t)
+ * ---------------------------------
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id        | offset    | flags     | status    |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * id: reflects id in receive request
+ * offset: offset in page of start of received packet
+ * flags: NETRXF_*
+ * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size.
+ *
+ * NOTE: Historically, to support GSO on the frontend receive side, Linux
+ *       netfront does not make use of the rx response id (because, as
+ *       described below, extra info structures overlay the id field).
+ *       Instead it assumes that responses always appear in the same ring
+ *       slot as their corresponding request. Thus, to maintain
+ *       compatibility, backends must make sure this is the case.
+ *
+ * Extra Info
+ * ==========
+ *
+ * Can be present if initial request or response has NET{T,R}XF_extra_info,
+ * or previous extra request has XEN_NETIF_EXTRA_MORE.
+ *
+ * The struct therefore needs to fit into either a tx or rx slot and
+ * is therefore limited to 8 octets.
+ *
+ * NOTE: Because extra info data overlays the usual request/response
+ *       structures, there is no id information in the opposite direction.
+ *       So, if an extra info overlays an rx response the frontend can
+ *       assume that it is in the same ring slot as the request that was
+ *       consumed to make the slot available, and the backend must ensure
+ *       this assumption is true.
+ *
+ * extra info (netif_extra_info_t)
+ * -------------------------------
+ *
+ * General format:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags| type specific data                |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | padding for tx        |
+ * +-----+-----+-----+-----+
+ *
+ * type: XEN_NETIF_EXTRA_TYPE_*
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * padding for tx: present only in the tx case due to 8 octet limit
+ *                 from rx case. Not shown in type specific entries
+ *                 below.
+ *
+ * XEN_NETIF_EXTRA_TYPE_GSO:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags| size      |type | pad | features  |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * type: Must be XEN_NETIF_EXTRA_TYPE_GSO
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * size: Maximum payload size of each segment. For example,
+ *       for TCP this is just the path MSS.
+ * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of
+ *       the packet and any extra features required to segment the
+ *       packet properly.
+ * features: EN_NETIF_GSO_FEAT_*: This specifies any extra GSO
+ *           features required to process this packet, such as ECN
+ *           support for TCPv4.
+ *
+ * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags| addr                              |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * addr: address to add/remove
+ *
+ * XEN_NETIF_EXTRA_TYPE_HASH:
+ *
+ * A backend that supports teoplitz hashing is assumed to accept
+ * this type of extra info in transmit packets.
+ * A frontend that enables hashing is assumed to accept
+ * this type of extra info in receive packets.
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags|htype| alg |LSB ---- value ---- MSB|
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * type: Must be XEN_NETIF_EXTRA_TYPE_HASH
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above)
+ * alg: The algorithm used to calculate the hash (one of
+ *      XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above)
+ * value: Hash value
+ */
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETTXF_csum_blank     (0)
+#define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank)
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETTXF_data_validated (1)
+#define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
+
+/* Packet continues in the next request descriptor. */
+#define _NETTXF_more_data      (2)
+#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETTXF_extra_info     (3)
+#define  NETTXF_extra_info     (1U<<_NETTXF_extra_info)
+
+#define XEN_NETIF_MAX_TX_SIZE 0xFFFF
+struct netif_tx_request {
+    grant_ref_t gref;
+    uint16_t offset;
+    uint16_t flags;
+    uint16_t id;
+    uint16_t size;
+};
+typedef struct netif_tx_request netif_tx_request_t;
+
+/* Types of netif_extra_info descriptors. */
+#define XEN_NETIF_EXTRA_TYPE_NONE      (0)  /* Never used - invalid */
+#define XEN_NETIF_EXTRA_TYPE_GSO       (1)  /* u.gso */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2)  /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3)  /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_HASH      (4)  /* u.hash */
+#define XEN_NETIF_EXTRA_TYPE_MAX       (5)
+
+/* netif_extra_info_t flags. */
+#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
+#define XEN_NETIF_EXTRA_FLAG_MORE  (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
+
+/* GSO types */
+#define XEN_NETIF_GSO_TYPE_NONE         (0)
+#define XEN_NETIF_GSO_TYPE_TCPV4        (1)
+#define XEN_NETIF_GSO_TYPE_TCPV6        (2)
+
+/*
+ * This structure needs to fit within both netif_tx_request_t and
+ * netif_rx_response_t for compatibility.
+ */
+struct netif_extra_info {
+    uint8_t type;
+    uint8_t flags;
+    union {
+        struct {
+            uint16_t size;
+            uint8_t type;
+            uint8_t pad;
+            uint16_t features;
+        } gso;
+        struct {
+            uint8_t addr[6];
+        } mcast;
+        struct {
+            uint8_t type;
+            uint8_t algorithm;
+            uint8_t value[4];
+        } hash;
+        uint16_t pad[3];
+    } u;
+};
+typedef struct netif_extra_info netif_extra_info_t;
+
+struct netif_tx_response {
+    uint16_t id;
+    int16_t  status;
+};
+typedef struct netif_tx_response netif_tx_response_t;
+
+struct netif_rx_request {
+    uint16_t    id;        /* Echoed in response message.        */
+    uint16_t    pad;
+    grant_ref_t gref;
+};
+typedef struct netif_rx_request netif_rx_request_t;
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETRXF_data_validated (0)
+#define  NETRXF_data_validated (1U<<_NETRXF_data_validated)
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETRXF_csum_blank     (1)
+#define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
+
+/* Packet continues in the next request descriptor. */
+#define _NETRXF_more_data      (2)
+#define  NETRXF_more_data      (1U<<_NETRXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETRXF_extra_info     (3)
+#define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
+
+/* Packet has GSO prefix. Deprecated but included for compatibility */
+#define _NETRXF_gso_prefix     (4)
+#define  NETRXF_gso_prefix     (1U<<_NETRXF_gso_prefix)
+
+struct netif_rx_response {
+    uint16_t id;
+    uint16_t offset;
+    uint16_t flags;
+    int16_t  status;
+};
+typedef struct netif_rx_response netif_rx_response_t;
+
+/*
+ * Generate netif ring structures and types.
+ */
+
+DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
+DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
+
+#define NETIF_RSP_DROPPED         -2
+#define NETIF_RSP_ERROR           -1
+#define NETIF_RSP_OKAY             0
+/* No response: used for auxiliary requests (e.g., netif_extra_info_t). */
+#define NETIF_RSP_NULL             1
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/io/protocols.h b/include/hw/xen/interface/io/protocols.h
new file mode 100644 (file)
index 0000000..7815e1f
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * protocols.h
+ *
+ * Copyright (c) 2008, Keir Fraser
+ */
+
+#ifndef __XEN_PROTOCOLS_H__
+#define __XEN_PROTOCOLS_H__
+
+#define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"
+#define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi"
+#define XEN_IO_PROTO_ABI_ARM        "arm-abi"
+
+#if defined(__i386__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
+#elif defined(__x86_64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
+#elif defined(__arm__) || defined(__aarch64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM
+#else
+# error arch fixup needed here
+#endif
+
+#endif
diff --git a/include/hw/xen/interface/io/ring.h b/include/hw/xen/interface/io/ring.h
new file mode 100644 (file)
index 0000000..0259392
--- /dev/null
@@ -0,0 +1,496 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * ring.h
+ *
+ * Shared producer-consumer ring macros.
+ *
+ * Tim Deegan and Andrew Warfield November 2004.
+ */
+
+#ifndef __XEN_PUBLIC_IO_RING_H__
+#define __XEN_PUBLIC_IO_RING_H__
+
+/*
+ * When #include'ing this header, you need to provide the following
+ * declaration upfront:
+ * - standard integers types (uint8_t, uint16_t, etc)
+ * They are provided by stdint.h of the standard headers.
+ *
+ * In addition, if you intend to use the FLEX macros, you also need to
+ * provide the following, before invoking the FLEX macros:
+ * - size_t
+ * - memcpy
+ * - grant_ref_t
+ * These declarations are provided by string.h of the standard headers,
+ * and grant_table.h from the Xen public headers.
+ */
+
+#include "../xen-compat.h"
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030208
+#define xen_mb()  mb()
+#define xen_rmb() rmb()
+#define xen_wmb() wmb()
+#endif
+
+typedef unsigned int RING_IDX;
+
+/* Round a 32-bit unsigned constant down to the nearest power of two. */
+#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2                  : ((_x) & 0x1))
+#define __RD4(_x)  (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2    : __RD2(_x))
+#define __RD8(_x)  (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4    : __RD4(_x))
+#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8    : __RD8(_x))
+#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
+
+/*
+ * Calculate size of a shared ring, given the total available space for the
+ * ring and indexes (_sz), and the name tag of the request/response structure.
+ * A ring contains as many entries as will fit, rounded down to the nearest
+ * power of two (so we can mask with (size-1) to loop around).
+ */
+#define __CONST_RING_SIZE(_s, _sz) \
+    (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \
+           sizeof(((struct _s##_sring *)0)->ring[0])))
+/*
+ * The same for passing in an actual pointer instead of a name tag.
+ */
+#define __RING_SIZE(_s, _sz) \
+    (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
+
+/*
+ * Macros to make the correct C datatypes for a new kind of ring.
+ *
+ * To make a new ring datatype, you need to have two message structures,
+ * let's say request_t, and response_t already defined.
+ *
+ * In a header where you want the ring datatype declared, you then do:
+ *
+ *     DEFINE_RING_TYPES(mytag, request_t, response_t);
+ *
+ * These expand out to give you a set of types, as you can see below.
+ * The most important of these are:
+ *
+ *     mytag_sring_t      - The shared ring.
+ *     mytag_front_ring_t - The 'front' half of the ring.
+ *     mytag_back_ring_t  - The 'back' half of the ring.
+ *
+ * To initialize a ring in your code you need to know the location and size
+ * of the shared memory area (PAGE_SIZE, for instance). To initialise
+ * the front half:
+ *
+ *     mytag_front_ring_t ring;
+ *     XEN_FRONT_RING_INIT(&ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *
+ * Initializing the back follows similarly (note that only the front
+ * initializes the shared ring):
+ *
+ *     mytag_back_ring_t back_ring;
+ *     BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ */
+
+#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)                     \
+                                                                        \
+/* Shared ring entry */                                                 \
+union __name##_sring_entry {                                            \
+    __req_t req;                                                        \
+    __rsp_t rsp;                                                        \
+};                                                                      \
+                                                                        \
+/* Shared ring page */                                                  \
+struct __name##_sring {                                                 \
+    RING_IDX req_prod, req_event;                                       \
+    RING_IDX rsp_prod, rsp_event;                                       \
+    union {                                                             \
+        struct {                                                        \
+            uint8_t smartpoll_active;                                   \
+        } netif;                                                        \
+        struct {                                                        \
+            uint8_t msg;                                                \
+        } tapif_user;                                                   \
+        uint8_t pvt_pad[4];                                             \
+    } pvt;                                                              \
+    uint8_t __pad[44];                                                  \
+    union __name##_sring_entry ring[1]; /* variable-length */           \
+};                                                                      \
+                                                                        \
+/* "Front" end's private variables */                                   \
+struct __name##_front_ring {                                            \
+    RING_IDX req_prod_pvt;                                              \
+    RING_IDX rsp_cons;                                                  \
+    unsigned int nr_ents;                                               \
+    struct __name##_sring *sring;                                       \
+};                                                                      \
+                                                                        \
+/* "Back" end's private variables */                                    \
+struct __name##_back_ring {                                             \
+    RING_IDX rsp_prod_pvt;                                              \
+    RING_IDX req_cons;                                                  \
+    unsigned int nr_ents;                                               \
+    struct __name##_sring *sring;                                       \
+};                                                                      \
+                                                                        \
+/* Syntactic sugar */                                                   \
+typedef struct __name##_sring __name##_sring_t;                         \
+typedef struct __name##_front_ring __name##_front_ring_t;               \
+typedef struct __name##_back_ring __name##_back_ring_t
+
+/*
+ * Macros for manipulating rings.
+ *
+ * FRONT_RING_whatever works on the "front end" of a ring: here
+ * requests are pushed on to the ring and responses taken off it.
+ *
+ * BACK_RING_whatever works on the "back end" of a ring: here
+ * requests are taken off the ring and responses put on.
+ *
+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
+ * This is OK in 1-for-1 request-response situations where the
+ * requestor (front end) never has more than RING_SIZE()-1
+ * outstanding requests.
+ */
+
+/* Initialising empty rings */
+#define SHARED_RING_INIT(_s) do {                                       \
+    (_s)->req_prod  = (_s)->rsp_prod  = 0;                              \
+    (_s)->req_event = (_s)->rsp_event = 1;                              \
+    (void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad));      \
+    (void)memset((_s)->__pad, 0, sizeof((_s)->__pad));                  \
+} while(0)
+
+#define FRONT_RING_ATTACH(_r, _s, _i, __size) do {                      \
+    (_r)->req_prod_pvt = (_i);                                          \
+    (_r)->rsp_cons = (_i);                                              \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+    (_r)->sring = (_s);                                                 \
+} while (0)
+
+#define FRONT_RING_INIT(_r, _s, __size) FRONT_RING_ATTACH(_r, _s, 0, __size)
+
+#define XEN_FRONT_RING_INIT(r, s, size) do {                            \
+    SHARED_RING_INIT(s);                                                \
+    FRONT_RING_INIT(r, s, size);                                        \
+} while (0)
+
+#define BACK_RING_ATTACH(_r, _s, _i, __size) do {                       \
+    (_r)->rsp_prod_pvt = (_i);                                          \
+    (_r)->req_cons = (_i);                                              \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+    (_r)->sring = (_s);                                                 \
+} while (0)
+
+#define BACK_RING_INIT(_r, _s, __size) BACK_RING_ATTACH(_r, _s, 0, __size)
+
+/* How big is this ring? */
+#define RING_SIZE(_r)                                                   \
+    ((_r)->nr_ents)
+
+/* Number of free requests (for use on front side only). */
+#define RING_FREE_REQUESTS(_r)                                          \
+    (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
+
+/* Test if there is an empty slot available on the front ring.
+ * (This is only meaningful from the front. )
+ */
+#define RING_FULL(_r)                                                   \
+    (RING_FREE_REQUESTS(_r) == 0)
+
+/* Test if there are outstanding messages to be processed on a ring. */
+#define XEN_RING_NR_UNCONSUMED_RESPONSES(_r)                            \
+    ((_r)->sring->rsp_prod - (_r)->rsp_cons)
+
+#ifdef __GNUC__
+#define XEN_RING_NR_UNCONSUMED_REQUESTS(_r) ({                          \
+    unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;          \
+    unsigned int rsp = RING_SIZE(_r) -                                  \
+        ((_r)->req_cons - (_r)->rsp_prod_pvt);                          \
+    req < rsp ? req : rsp;                                              \
+})
+#else
+/* Same as above, but without the nice GCC ({ ... }) syntax. */
+#define XEN_RING_NR_UNCONSUMED_REQUESTS(_r)                             \
+    ((((_r)->sring->req_prod - (_r)->req_cons) <                        \
+      (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ?        \
+     ((_r)->sring->req_prod - (_r)->req_cons) :                         \
+     (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
+#endif
+
+#ifdef XEN_RING_HAS_UNCONSUMED_IS_BOOL
+/*
+ * These variants should only be used in case no caller is abusing them for
+ * obtaining the number of unconsumed responses/requests.
+ */
+#define RING_HAS_UNCONSUMED_RESPONSES(_r) \
+    (!!XEN_RING_NR_UNCONSUMED_RESPONSES(_r))
+#define RING_HAS_UNCONSUMED_REQUESTS(_r)  \
+    (!!XEN_RING_NR_UNCONSUMED_REQUESTS(_r))
+#else
+#define RING_HAS_UNCONSUMED_RESPONSES(_r) XEN_RING_NR_UNCONSUMED_RESPONSES(_r)
+#define RING_HAS_UNCONSUMED_REQUESTS(_r)  XEN_RING_NR_UNCONSUMED_REQUESTS(_r)
+#endif
+
+/* Direct access to individual ring elements, by index. */
+#define RING_GET_REQUEST(_r, _idx)                                      \
+    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
+
+#define RING_GET_RESPONSE(_r, _idx)                                     \
+    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
+
+/*
+ * Get a local copy of a request/response.
+ *
+ * Use this in preference to RING_GET_{REQUEST,RESPONSE}() so all processing is
+ * done on a local copy that cannot be modified by the other end.
+ *
+ * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this
+ * to be ineffective where dest is a struct which consists of only bitfields.
+ */
+#define RING_COPY_(type, r, idx, dest) do {                            \
+       /* Use volatile to force the copy into dest. */                 \
+       *(dest) = *(volatile __typeof__(dest))RING_GET_##type(r, idx);  \
+} while (0)
+
+#define RING_COPY_REQUEST(r, idx, req)  RING_COPY_(REQUEST, r, idx, req)
+#define RING_COPY_RESPONSE(r, idx, rsp) RING_COPY_(RESPONSE, r, idx, rsp)
+
+/* Loop termination condition: Would the specified index overflow the ring? */
+#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                           \
+    (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
+
+/* Ill-behaved frontend determination: Can there be this many requests? */
+#define RING_REQUEST_PROD_OVERFLOW(_r, _prod)                           \
+    (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
+
+/* Ill-behaved backend determination: Can there be this many responses? */
+#define RING_RESPONSE_PROD_OVERFLOW(_r, _prod)                          \
+    (((_prod) - (_r)->rsp_cons) > RING_SIZE(_r))
+
+#define RING_PUSH_REQUESTS(_r) do {                                     \
+    xen_wmb(); /* back sees requests /before/ updated producer index */ \
+    (_r)->sring->req_prod = (_r)->req_prod_pvt;                         \
+} while (0)
+
+#define RING_PUSH_RESPONSES(_r) do {                                    \
+    xen_wmb(); /* front sees resps /before/ updated producer index */   \
+    (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;                         \
+} while (0)
+
+/*
+ * Notification hold-off (req_event and rsp_event):
+ *
+ * When queueing requests or responses on a shared ring, it may not always be
+ * necessary to notify the remote end. For example, if requests are in flight
+ * in a backend, the front may be able to queue further requests without
+ * notifying the back (if the back checks for new requests when it queues
+ * responses).
+ *
+ * When enqueuing requests or responses:
+ *
+ *  Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
+ *  is a boolean return value. True indicates that the receiver requires an
+ *  asynchronous notification.
+ *
+ * After dequeuing requests or responses (before sleeping the connection):
+ *
+ *  Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
+ *  The second argument is a boolean return value. True indicates that there
+ *  are pending messages on the ring (i.e., the connection should not be put
+ *  to sleep).
+ *
+ *  These macros will set the req_event/rsp_event field to trigger a
+ *  notification on the very next message that is enqueued. If you want to
+ *  create batches of work (i.e., only receive a notification after several
+ *  messages have been enqueued) then you will need to create a customised
+ *  version of the FINAL_CHECK macro in your own code, which sets the event
+ *  field appropriately.
+ */
+
+#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do {           \
+    RING_IDX __old = (_r)->sring->req_prod;                             \
+    RING_IDX __new = (_r)->req_prod_pvt;                                \
+    xen_wmb(); /* back sees requests /before/ updated producer index */ \
+    (_r)->sring->req_prod = __new;                                      \
+    xen_mb(); /* back sees new requests /before/ we check req_event */  \
+    (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <           \
+                 (RING_IDX)(__new - __old));                            \
+} while (0)
+
+#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do {          \
+    RING_IDX __old = (_r)->sring->rsp_prod;                             \
+    RING_IDX __new = (_r)->rsp_prod_pvt;                                \
+    xen_wmb(); /* front sees resps /before/ updated producer index */   \
+    (_r)->sring->rsp_prod = __new;                                      \
+    xen_mb(); /* front sees new resps /before/ we check rsp_event */    \
+    (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <           \
+                 (RING_IDX)(__new - __old));                            \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do {             \
+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
+    if (_work_to_do) break;                                             \
+    (_r)->sring->req_event = (_r)->req_cons + 1;                        \
+    xen_mb();                                                           \
+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do {            \
+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
+    if (_work_to_do) break;                                             \
+    (_r)->sring->rsp_event = (_r)->rsp_cons + 1;                        \
+    xen_mb();                                                           \
+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
+} while (0)
+
+
+/*
+ * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and
+ * functions to check if there is data on the ring, and to read and
+ * write to them.
+ *
+ * DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but
+ * does not define the indexes page. As different protocols can have
+ * extensions to the basic format, this macro allow them to define their
+ * own struct.
+ *
+ * XEN_FLEX_RING_SIZE
+ *   Convenience macro to calculate the size of one of the two rings
+ *   from the overall order.
+ *
+ * $NAME_mask
+ *   Function to apply the size mask to an index, to reduce the index
+ *   within the range [0-size].
+ *
+ * $NAME_read_packet
+ *   Function to read data from the ring. The amount of data to read is
+ *   specified by the "size" argument.
+ *
+ * $NAME_write_packet
+ *   Function to write data to the ring. The amount of data to write is
+ *   specified by the "size" argument.
+ *
+ * $NAME_get_ring_ptr
+ *   Convenience function that returns a pointer to read/write to the
+ *   ring at the right location.
+ *
+ * $NAME_data_intf
+ *   Indexes page, shared between frontend and backend. It also
+ *   contains the array of grant refs.
+ *
+ * $NAME_queued
+ *   Function to calculate how many bytes are currently on the ring,
+ *   ready to be read. It can also be used to calculate how much free
+ *   space is currently on the ring (XEN_FLEX_RING_SIZE() -
+ *   $NAME_queued()).
+ */
+
+#ifndef XEN_PAGE_SHIFT
+/* The PAGE_SIZE for ring protocols and hypercall interfaces is always
+ * 4K, regardless of the architecture, and page granularity chosen by
+ * operating systems.
+ */
+#define XEN_PAGE_SHIFT 12
+#endif
+#define XEN_FLEX_RING_SIZE(order)                                             \
+    (1UL << ((order) + XEN_PAGE_SHIFT - 1))
+
+#define DEFINE_XEN_FLEX_RING(name)                                            \
+static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size)          \
+{                                                                             \
+    return idx & (ring_size - 1);                                             \
+}                                                                             \
+                                                                              \
+static inline unsigned char *name##_get_ring_ptr(unsigned char *buf,          \
+                                                 RING_IDX idx,                \
+                                                 RING_IDX ring_size)          \
+{                                                                             \
+    return buf + name##_mask(idx, ring_size);                                 \
+}                                                                             \
+                                                                              \
+static inline void name##_read_packet(void *opaque,                           \
+                                      const unsigned char *buf,               \
+                                      size_t size,                            \
+                                      RING_IDX masked_prod,                   \
+                                      RING_IDX *masked_cons,                  \
+                                      RING_IDX ring_size)                     \
+{                                                                             \
+    if (*masked_cons < masked_prod ||                                         \
+        size <= ring_size - *masked_cons) {                                   \
+        memcpy(opaque, buf + *masked_cons, size);                             \
+    } else {                                                                  \
+        memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons);         \
+        memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf,       \
+               size - (ring_size - *masked_cons));                            \
+    }                                                                         \
+    *masked_cons = name##_mask(*masked_cons + size, ring_size);               \
+}                                                                             \
+                                                                              \
+static inline void name##_write_packet(unsigned char *buf,                    \
+                                       const void *opaque,                    \
+                                       size_t size,                           \
+                                       RING_IDX *masked_prod,                 \
+                                       RING_IDX masked_cons,                  \
+                                       RING_IDX ring_size)                    \
+{                                                                             \
+    if (*masked_prod < masked_cons ||                                         \
+        size <= ring_size - *masked_prod) {                                   \
+        memcpy(buf + *masked_prod, opaque, size);                             \
+    } else {                                                                  \
+        memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod);         \
+        memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod),     \
+               size - (ring_size - *masked_prod));                            \
+    }                                                                         \
+    *masked_prod = name##_mask(*masked_prod + size, ring_size);               \
+}                                                                             \
+                                                                              \
+static inline RING_IDX name##_queued(RING_IDX prod,                           \
+                                     RING_IDX cons,                           \
+                                     RING_IDX ring_size)                      \
+{                                                                             \
+    RING_IDX size;                                                            \
+                                                                              \
+    if (prod == cons)                                                         \
+        return 0;                                                             \
+                                                                              \
+    prod = name##_mask(prod, ring_size);                                      \
+    cons = name##_mask(cons, ring_size);                                      \
+                                                                              \
+    if (prod == cons)                                                         \
+        return ring_size;                                                     \
+                                                                              \
+    if (prod > cons)                                                          \
+        size = prod - cons;                                                   \
+    else                                                                      \
+        size = ring_size - (cons - prod);                                     \
+    return size;                                                              \
+}                                                                             \
+                                                                              \
+struct name##_data {                                                          \
+    unsigned char *in; /* half of the allocation */                           \
+    unsigned char *out; /* half of the allocation */                          \
+}
+
+#define DEFINE_XEN_FLEX_RING_AND_INTF(name)                                   \
+struct name##_data_intf {                                                     \
+    RING_IDX in_cons, in_prod;                                                \
+                                                                              \
+    uint8_t pad1[56];                                                         \
+                                                                              \
+    RING_IDX out_cons, out_prod;                                              \
+                                                                              \
+    uint8_t pad2[56];                                                         \
+                                                                              \
+    RING_IDX ring_order;                                                      \
+    grant_ref_t ref[];                                                        \
+};                                                                            \
+DEFINE_XEN_FLEX_RING(name)
+
+#endif /* __XEN_PUBLIC_IO_RING_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/io/usbif.h b/include/hw/xen/interface/io/usbif.h
new file mode 100644 (file)
index 0000000..875af0d
--- /dev/null
@@ -0,0 +1,408 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * usbif.h
+ *
+ * USB I/O interface for Xen guest OSes.
+ *
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_USBIF_H__
+#define __XEN_PUBLIC_IO_USBIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Detailed Interface Description
+ * ==============================
+ * The pvUSB interface is using a split driver design: a frontend driver in
+ * the guest and a backend driver in a driver domain (normally dom0) having
+ * access to the physical USB device(s) being passed to the guest.
+ *
+ * The frontend and backend drivers use XenStore to initiate the connection
+ * between them, the I/O activity is handled via two shared ring pages and an
+ * event channel. As the interface between frontend and backend is at the USB
+ * host connector level, multiple (up to 31) physical USB devices can be
+ * handled by a single connection.
+ *
+ * The Xen pvUSB device name is "qusb", so the frontend's XenStore entries are
+ * to be found under "device/qusb", while the backend's XenStore entries are
+ * under "backend/<guest-dom-id>/qusb".
+ *
+ * When a new pvUSB connection is established, the frontend needs to setup the
+ * two shared ring pages for communication and the event channel. The ring
+ * pages need to be made available to the backend via the grant table
+ * interface.
+ *
+ * One of the shared ring pages is used by the backend to inform the frontend
+ * about USB device plug events (device to be added or removed). This is the
+ * "conn-ring".
+ *
+ * The other ring page is used for USB I/O communication (requests and
+ * responses). This is the "urb-ring".
+ *
+ * Feature and Parameter Negotiation
+ * =================================
+ * The two halves of a Xen pvUSB driver utilize nodes within the XenStore to
+ * communicate capabilities and to negotiate operating parameters. This
+ * section enumerates these nodes which reside in the respective front and
+ * backend portions of the XenStore, following the XenBus convention.
+ *
+ * Any specified default value is in effect if the corresponding XenBus node
+ * is not present in the XenStore.
+ *
+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the
+ * driver side whose XenBus tree contains them.
+ *
+ *****************************************************************************
+ *                            Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------ Backend Device Identification (PRIVATE) ------------------
+ *
+ * num-ports
+ *      Values:         unsigned [1...31]
+ *
+ *      Number of ports for this (virtual) USB host connector.
+ *
+ * usb-ver
+ *      Values:         unsigned [1...2]
+ *
+ *      USB version of this host connector: 1 = USB 1.1, 2 = USB 2.0.
+ *
+ * port/[1...31]
+ *      Values:         string
+ *
+ *      Physical USB device connected to the given port, e.g. "3-1.5".
+ *
+ *****************************************************************************
+ *                            Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ *      Values:         unsigned
+ *
+ *      The identifier of the Xen event channel used to signal activity
+ *      in the ring buffer.
+ *
+ * urb-ring-ref
+ *      Values:         unsigned
+ *
+ *      The Xen grant reference granting permission for the backend to map
+ *      the sole page in a single page sized ring buffer. This is the ring
+ *      buffer for urb requests.
+ *
+ * conn-ring-ref
+ *      Values:         unsigned
+ *
+ *      The Xen grant reference granting permission for the backend to map
+ *      the sole page in a single page sized ring buffer. This is the ring
+ *      buffer for connection/disconnection requests.
+ *
+ * protocol
+ *      Values:         string (XEN_IO_PROTO_ABI_*)
+ *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
+ *
+ *      The machine ABI rules governing the format of all ring request and
+ *      response structures.
+ *
+ * Protocol Description
+ * ====================
+ *
+ *-------------------------- USB device plug events --------------------------
+ *
+ * USB device plug events are send via the "conn-ring" shared page. As only
+ * events are being sent, the respective requests from the frontend to the
+ * backend are just dummy ones.
+ * The events sent to the frontend have the following layout:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |               id                |    portnum     |     speed      | 4
+ * +----------------+----------------+----------------+----------------+
+ *   id - uint16_t, event id (taken from the actual frontend dummy request)
+ *   portnum - uint8_t, port number (1 ... 31)
+ *   speed - uint8_t, device USBIF_SPEED_*, USBIF_SPEED_NONE == unplug
+ *
+ * The dummy request:
+ *         0                1        octet
+ * +----------------+----------------+
+ * |               id                | 2
+ * +----------------+----------------+
+ *   id - uint16_t, guest supplied value (no need for being unique)
+ *
+ *-------------------------- USB I/O request ---------------------------------
+ *
+ * A single USB I/O request on the "urb-ring" has the following layout:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |               id                |         nr_buffer_segs          | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                               pipe                                | 8
+ * +----------------+----------------+----------------+----------------+
+ * |         transfer_flags          |          buffer_length          | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                       request type specific                       | 16
+ * |                               data                                | 20
+ * +----------------+----------------+----------------+----------------+
+ * |                              seg[0]                               | 24
+ * |                               data                                | 28
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |             seg[USBIF_MAX_SEGMENTS_PER_REQUEST - 1]               | 144
+ * |                               data                                | 148
+ * +----------------+----------------+----------------+----------------+
+ * Bit field bit number 0 is always least significant bit, undefined bits must
+ * be zero.
+ *   id - uint16_t, guest supplied value
+ *   nr_buffer_segs - uint16_t, number of segment entries in seg[] array
+ *   pipe - uint32_t, bit field with multiple information:
+ *     bits 0-4: port request to send to
+ *     bit 5: unlink request with specified id (cancel I/O) if set (see below)
+ *     bit 7: direction (1 = read from device)
+ *     bits 8-14: device number on port
+ *     bits 15-18: endpoint of device
+ *     bits 30-31: request type: 00 = isochronous, 01 = interrupt,
+ *                               10 = control, 11 = bulk
+ *   transfer_flags - uint16_t, bit field with processing flags:
+ *     bit 0: less data than specified allowed
+ *   buffer_length - uint16_t, total length of data
+ *   request type specific data - 8 bytes, see below
+ *   seg[] - array with 8 byte elements, see below
+ *
+ * Request type specific data for isochronous request:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |            interval             |           start_frame           | 4
+ * +----------------+----------------+----------------+----------------+
+ * |       number_of_packets         |       nr_frame_desc_segs        | 8
+ * +----------------+----------------+----------------+----------------+
+ *   interval - uint16_t, time interval in msecs between frames
+ *   start_frame - uint16_t, start frame number
+ *   number_of_packets - uint16_t, number of packets to transfer
+ *   nr_frame_desc_segs - uint16_t number of seg[] frame descriptors elements
+ *
+ * Request type specific data for interrupt request:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |            interval             |                0                | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                                 0                                 | 8
+ * +----------------+----------------+----------------+----------------+
+ *   interval - uint16_t, time in msecs until interruption
+ *
+ * Request type specific data for control request:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |                      data of setup packet                         | 4
+ * |                                                                   | 8
+ * +----------------+----------------+----------------+----------------+
+ *
+ * Request type specific data for bulk request:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |                                 0                                 | 4
+ * |                                 0                                 | 8
+ * +----------------+----------------+----------------+----------------+
+ *
+ * Request type specific data for unlink request:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |           unlink_id             |                0                | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                                 0                                 | 8
+ * +----------------+----------------+----------------+----------------+
+ *   unlink_id - uint16_t, request id of request to terminate
+ *
+ * seg[] array element layout:
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |                               gref                                | 4
+ * +----------------+----------------+----------------+----------------+
+ * |             offset              |             length              | 8
+ * +----------------+----------------+----------------+----------------+
+ *   gref - uint32_t, grant reference of buffer page
+ *   offset - uint16_t, offset of buffer start in page
+ *   length - uint16_t, length of buffer in page
+ *
+ *-------------------------- USB I/O response --------------------------------
+ *
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |               id                |          start_frame            | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                              status                               | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                          actual_length                            | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                           error_count                             | 16
+ * +----------------+----------------+----------------+----------------+
+ *   id - uint16_t, id of the request this response belongs to
+ *   start_frame - uint16_t, start_frame this response (iso requests only)
+ *   status - int32_t, USBIF_STATUS_* (non-iso requests)
+ *   actual_length - uint32_t, actual size of data transferred
+ *   error_count - uint32_t, number of errors (iso requests)
+ */
+
+enum usb_spec_version {
+    USB_VER_UNKNOWN = 0,
+    USB_VER_USB11,
+    USB_VER_USB20,
+    USB_VER_USB30,    /* not supported yet */
+};
+
+/*
+ *  USB pipe in usbif_request
+ *
+ *  - port number:      bits 0-4
+ *                              (USB_MAXCHILDREN is 31)
+ *
+ *  - operation flag:   bit 5
+ *                              (0 = submit urb,
+ *                               1 = unlink urb)
+ *
+ *  - direction:        bit 7
+ *                              (0 = Host-to-Device [Out]
+ *                               1 = Device-to-Host [In])
+ *
+ *  - device address:   bits 8-14
+ *
+ *  - endpoint:         bits 15-18
+ *
+ *  - pipe type:        bits 30-31
+ *                              (00 = isochronous, 01 = interrupt,
+ *                               10 = control, 11 = bulk)
+ */
+
+#define USBIF_PIPE_PORT_MASK    0x0000001f
+#define USBIF_PIPE_UNLINK       0x00000020
+#define USBIF_PIPE_DIR          0x00000080
+#define USBIF_PIPE_DEV_MASK     0x0000007f
+#define USBIF_PIPE_DEV_SHIFT    8
+#define USBIF_PIPE_EP_MASK      0x0000000f
+#define USBIF_PIPE_EP_SHIFT     15
+#define USBIF_PIPE_TYPE_MASK    0x00000003
+#define USBIF_PIPE_TYPE_SHIFT   30
+#define USBIF_PIPE_TYPE_ISOC    0
+#define USBIF_PIPE_TYPE_INT     1
+#define USBIF_PIPE_TYPE_CTRL    2
+#define USBIF_PIPE_TYPE_BULK    3
+
+#define usbif_pipeportnum(pipe)                 ((pipe) & USBIF_PIPE_PORT_MASK)
+#define usbif_setportnum_pipe(pipe, portnum)    ((pipe) | (portnum))
+
+#define usbif_pipeunlink(pipe)                  ((pipe) & USBIF_PIPE_UNLINK)
+#define usbif_pipesubmit(pipe)                  (!usbif_pipeunlink(pipe))
+#define usbif_setunlink_pipe(pipe)              ((pipe) | USBIF_PIPE_UNLINK)
+
+#define usbif_pipein(pipe)                      ((pipe) & USBIF_PIPE_DIR)
+#define usbif_pipeout(pipe)                     (!usbif_pipein(pipe))
+
+#define usbif_pipedevice(pipe)                  \
+        (((pipe) >> USBIF_PIPE_DEV_SHIFT) & USBIF_PIPE_DEV_MASK)
+
+#define usbif_pipeendpoint(pipe)                \
+        (((pipe) >> USBIF_PIPE_EP_SHIFT) & USBIF_PIPE_EP_MASK)
+
+#define usbif_pipetype(pipe)                    \
+        (((pipe) >> USBIF_PIPE_TYPE_SHIFT) & USBIF_PIPE_TYPE_MASK)
+#define usbif_pipeisoc(pipe)    (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_ISOC)
+#define usbif_pipeint(pipe)     (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_INT)
+#define usbif_pipectrl(pipe)    (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_CTRL)
+#define usbif_pipebulk(pipe)    (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_BULK)
+
+#define USBIF_MAX_SEGMENTS_PER_REQUEST (16)
+#define USBIF_MAX_PORTNR        31
+#define USBIF_RING_SIZE         4096
+
+/*
+ * RING for transferring urbs.
+ */
+struct usbif_request_segment {
+    grant_ref_t gref;
+    uint16_t offset;
+    uint16_t length;
+};
+
+struct usbif_urb_request {
+    uint16_t id;                  /* request id */
+    uint16_t nr_buffer_segs;      /* number of urb->transfer_buffer segments */
+
+    /* basic urb parameter */
+    uint32_t pipe;
+    uint16_t transfer_flags;
+#define USBIF_SHORT_NOT_OK      0x0001
+    uint16_t buffer_length;
+    union {
+        uint8_t ctrl[8];                 /* setup_packet (Ctrl) */
+
+        struct {
+            uint16_t interval;           /* maximum (1024*8) in usb core */
+            uint16_t start_frame;        /* start frame */
+            uint16_t number_of_packets;  /* number of ISO packet */
+            uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */
+        } isoc;
+
+        struct {
+            uint16_t interval;           /* maximum (1024*8) in usb core */
+            uint16_t pad[3];
+        } intr;
+
+        struct {
+            uint16_t unlink_id;          /* unlink request id */
+            uint16_t pad[3];
+        } unlink;
+
+    } u;
+
+    /* urb data segments */
+    struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST];
+};
+typedef struct usbif_urb_request usbif_urb_request_t;
+
+struct usbif_urb_response {
+    uint16_t id;           /* request id */
+    uint16_t start_frame;  /* start frame (ISO) */
+    int32_t status;        /* status (non-ISO) */
+#define USBIF_STATUS_OK         0
+#define USBIF_STATUS_NODEV      (-19)
+#define USBIF_STATUS_INVAL      (-22)
+#define USBIF_STATUS_STALL      (-32)
+#define USBIF_STATUS_IOERROR    (-71)
+#define USBIF_STATUS_BABBLE     (-75)
+#define USBIF_STATUS_SHUTDOWN   (-108)
+    int32_t actual_length; /* actual transfer length */
+    int32_t error_count;   /* number of ISO errors */
+};
+typedef struct usbif_urb_response usbif_urb_response_t;
+
+DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response);
+#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, USBIF_RING_SIZE)
+
+/*
+ * RING for notifying connect/disconnect events to frontend
+ */
+struct usbif_conn_request {
+    uint16_t id;
+};
+typedef struct usbif_conn_request usbif_conn_request_t;
+
+struct usbif_conn_response {
+    uint16_t id;           /* request id */
+    uint8_t portnum;       /* port number */
+    uint8_t speed;         /* usb_device_speed */
+#define USBIF_SPEED_NONE        0
+#define USBIF_SPEED_LOW         1
+#define USBIF_SPEED_FULL        2
+#define USBIF_SPEED_HIGH        3
+};
+typedef struct usbif_conn_response usbif_conn_response_t;
+
+DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response);
+#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, USBIF_RING_SIZE)
+
+#endif /* __XEN_PUBLIC_IO_USBIF_H__ */
diff --git a/include/hw/xen/interface/io/xenbus.h b/include/hw/xen/interface/io/xenbus.h
new file mode 100644 (file)
index 0000000..9cd0cd7
--- /dev/null
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: MIT */
+/*****************************************************************************
+ * xenbus.h
+ *
+ * Xenbus protocol details.
+ *
+ * Copyright (C) 2005 XenSource Ltd.
+ */
+
+#ifndef _XEN_PUBLIC_IO_XENBUS_H
+#define _XEN_PUBLIC_IO_XENBUS_H
+
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus.  States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+    XenbusStateUnknown       = 0,
+
+    XenbusStateInitialising  = 1,
+
+    /*
+     * InitWait: Finished early initialisation but waiting for information
+     * from the peer or hotplug scripts.
+     */
+    XenbusStateInitWait      = 2,
+
+    /*
+     * Initialised: Waiting for a connection from the peer.
+     */
+    XenbusStateInitialised   = 3,
+
+    XenbusStateConnected     = 4,
+
+    /*
+     * Closing: The device is being closed due to an error or an unplug event.
+     */
+    XenbusStateClosing       = 5,
+
+    XenbusStateClosed        = 6,
+
+    /*
+     * Reconfiguring: The device is being reconfigured.
+     */
+    XenbusStateReconfiguring = 7,
+
+    XenbusStateReconfigured  = 8
+};
+typedef enum xenbus_state XenbusState;
+
+#endif /* _XEN_PUBLIC_IO_XENBUS_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/io/xs_wire.h b/include/hw/xen/interface/io/xs_wire.h
new file mode 100644 (file)
index 0000000..04e6849
--- /dev/null
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Details of the "wire" protocol between Xen Store Daemon and client
+ * library or guest kernel.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ */
+
+#ifndef _XS_WIRE_H
+#define _XS_WIRE_H
+
+enum xsd_sockmsg_type
+{
+    XS_CONTROL,
+#define XS_DEBUG XS_CONTROL
+    XS_DIRECTORY,
+    XS_READ,
+    XS_GET_PERMS,
+    XS_WATCH,
+    XS_UNWATCH,
+    XS_TRANSACTION_START,
+    XS_TRANSACTION_END,
+    XS_INTRODUCE,
+    XS_RELEASE,
+    XS_GET_DOMAIN_PATH,
+    XS_WRITE,
+    XS_MKDIR,
+    XS_RM,
+    XS_SET_PERMS,
+    XS_WATCH_EVENT,
+    XS_ERROR,
+    XS_IS_DOMAIN_INTRODUCED,
+    XS_RESUME,
+    XS_SET_TARGET,
+    /* XS_RESTRICT has been removed */
+    XS_RESET_WATCHES = XS_SET_TARGET + 2,
+    XS_DIRECTORY_PART,
+
+    XS_TYPE_COUNT,      /* Number of valid types. */
+
+    XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */
+};
+
+#define XS_WRITE_NONE "NONE"
+#define XS_WRITE_CREATE "CREATE"
+#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
+
+/* We hand errors as strings, for portability. */
+struct xsd_errors
+{
+    int errnum;
+    const char *errstring;
+};
+#ifdef EINVAL
+#define XSD_ERROR(x) { x, #x }
+/* LINTED: static unused */
+static const struct xsd_errors xsd_errors[]
+#if defined(__GNUC__)
+__attribute__((unused))
+#endif
+    = {
+    /* /!\ New errors should be added at the end of the array. */
+    XSD_ERROR(EINVAL),
+    XSD_ERROR(EACCES),
+    XSD_ERROR(EEXIST),
+    XSD_ERROR(EISDIR),
+    XSD_ERROR(ENOENT),
+    XSD_ERROR(ENOMEM),
+    XSD_ERROR(ENOSPC),
+    XSD_ERROR(EIO),
+    XSD_ERROR(ENOTEMPTY),
+    XSD_ERROR(ENOSYS),
+    XSD_ERROR(EROFS),
+    XSD_ERROR(EBUSY),
+    XSD_ERROR(EAGAIN),
+    XSD_ERROR(EISCONN),
+    XSD_ERROR(E2BIG),
+    XSD_ERROR(EPERM),
+};
+#endif
+
+struct xsd_sockmsg
+{
+    uint32_t type;  /* XS_??? */
+    uint32_t req_id;/* Request identifier, echoed in daemon's response.  */
+    uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */
+    uint32_t len;   /* Length of data following this. */
+
+    /* Generally followed by nul-terminated string(s). */
+};
+
+enum xs_watch_type
+{
+    XS_WATCH_PATH = 0,
+    XS_WATCH_TOKEN
+};
+
+/*
+ * `incontents 150 xenstore_struct XenStore wire protocol.
+ *
+ * Inter-domain shared memory communications. */
+#define XENSTORE_RING_SIZE 1024
+typedef uint32_t XENSTORE_RING_IDX;
+#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1))
+struct xenstore_domain_interface {
+    char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */
+    char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
+    XENSTORE_RING_IDX req_cons, req_prod;
+    XENSTORE_RING_IDX rsp_cons, rsp_prod;
+    uint32_t server_features; /* Bitmap of features supported by the server */
+    uint32_t connection;
+    uint32_t error;
+};
+
+/* Violating this is very bad.  See docs/misc/xenstore.txt. */
+#define XENSTORE_PAYLOAD_MAX 4096
+
+/* Violating these just gets you an error back */
+#define XENSTORE_ABS_PATH_MAX 3072
+#define XENSTORE_REL_PATH_MAX 2048
+
+/* The ability to reconnect a ring */
+#define XENSTORE_SERVER_FEATURE_RECONNECTION 1
+/* The presence of the "error" field in the ring page */
+#define XENSTORE_SERVER_FEATURE_ERROR        2
+
+/* Valid values for the connection field */
+#define XENSTORE_CONNECTED 0 /* the steady-state */
+#define XENSTORE_RECONNECT 1 /* reconnect in progress */
+
+/* Valid values for the error field */
+#define XENSTORE_ERROR_NONE    0 /* No error */
+#define XENSTORE_ERROR_COMM    1 /* Communication problem */
+#define XENSTORE_ERROR_RINGIDX 2 /* Invalid ring index */
+#define XENSTORE_ERROR_PROTO   3 /* Protocol violation (payload too long) */
+
+#endif /* _XS_WIRE_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/memory.h b/include/hw/xen/interface/memory.h
new file mode 100644 (file)
index 0000000..29cf5c8
--- /dev/null
@@ -0,0 +1,746 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * memory.h
+ *
+ * Memory reservation and information.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_MEMORY_H__
+#define __XEN_PUBLIC_MEMORY_H__
+
+#include "xen.h"
+#include "physdev.h"
+
+/*
+ * Increase or decrease the specified domain's memory reservation. Returns the
+ * number of extents successfully allocated or freed.
+ * arg == addr of struct xen_memory_reservation.
+ */
+#define XENMEM_increase_reservation 0
+#define XENMEM_decrease_reservation 1
+#define XENMEM_populate_physmap     6
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
+/*
+ * Maximum # bits addressable by the user of the allocated region (e.g., I/O
+ * devices often have a 32-bit limitation even in 64-bit systems). If zero
+ * then the user has no addressing restriction. This field is not used by
+ * XENMEM_decrease_reservation.
+ */
+#define XENMEMF_address_bits(x)     (x)
+#define XENMEMF_get_address_bits(x) ((x) & 0xffu)
+/* NUMA node to allocate from. */
+#define XENMEMF_node(x)     (((x) + 1) << 8)
+#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
+/* Flag to populate physmap with populate-on-demand entries */
+#define XENMEMF_populate_on_demand (1<<16)
+/* Flag to request allocation only from the node specified */
+#define XENMEMF_exact_node_request  (1<<17)
+#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request)
+/* Flag to indicate the node specified is virtual node */
+#define XENMEMF_vnode  (1<<18)
+#endif
+
+struct xen_memory_reservation {
+
+    /*
+     * XENMEM_increase_reservation:
+     *   OUT: MFN (*not* GMFN) bases of extents that were allocated
+     * XENMEM_decrease_reservation:
+     *   IN:  GMFN bases of extents to free
+     * XENMEM_populate_physmap:
+     *   IN:  GPFN bases of extents to populate with memory
+     *   OUT: GMFN bases of extents that were allocated
+     *   (NB. This command also updates the mach_to_phys translation table)
+     * XENMEM_claim_pages:
+     *   IN: must be zero
+     */
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
+
+    /* Number of extents, and size/alignment of each (2^extent_order pages). */
+    xen_ulong_t    nr_extents;
+    unsigned int   extent_order;
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
+    /* XENMEMF flags. */
+    unsigned int   mem_flags;
+#else
+    unsigned int   address_bits;
+#endif
+
+    /*
+     * Domain whose reservation is being changed.
+     * Unprivileged domains can specify only DOMID_SELF.
+     */
+    domid_t        domid;
+};
+typedef struct xen_memory_reservation xen_memory_reservation_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
+
+/*
+ * An atomic exchange of memory pages. If return code is zero then
+ * @out.extent_list provides GMFNs of the newly-allocated memory.
+ * Returns zero on complete success, otherwise a negative error code.
+ * On complete success then always @nr_exchanged == @in.nr_extents.
+ * On partial success @nr_exchanged indicates how much work was done.
+ *
+ * Note that only PV guests can use this operation.
+ */
+#define XENMEM_exchange             11
+struct xen_memory_exchange {
+    /*
+     * [IN] Details of memory extents to be exchanged (GMFN bases).
+     * Note that @in.address_bits is ignored and unused.
+     */
+    struct xen_memory_reservation in;
+
+    /*
+     * [IN/OUT] Details of new memory extents.
+     * We require that:
+     *  1. @in.domid == @out.domid
+     *  2. @in.nr_extents  << @in.extent_order ==
+     *     @out.nr_extents << @out.extent_order
+     *  3. @in.extent_start and @out.extent_start lists must not overlap
+     *  4. @out.extent_start lists GPFN bases to be populated
+     *  5. @out.extent_start is overwritten with allocated GMFN bases
+     */
+    struct xen_memory_reservation out;
+
+    /*
+     * [OUT] Number of input extents that were successfully exchanged:
+     *  1. The first @nr_exchanged input extents were successfully
+     *     deallocated.
+     *  2. The corresponding first entries in the output extent list correctly
+     *     indicate the GMFNs that were successfully exchanged.
+     *  3. All other input and output extents are untouched.
+     *  4. If not all input exents are exchanged then the return code of this
+     *     command will be non-zero.
+     *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
+     */
+    xen_ulong_t nr_exchanged;
+};
+typedef struct xen_memory_exchange xen_memory_exchange_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t);
+
+/*
+ * Returns the maximum machine frame number of mapped RAM in this system.
+ * This command always succeeds (it never returns an error code).
+ * arg == NULL.
+ */
+#define XENMEM_maximum_ram_page     2
+
+struct xen_memory_domain {
+    /* [IN] Domain information is being queried for. */
+    domid_t domid;
+};
+
+/*
+ * Returns the current or maximum memory reservation, in pages, of the
+ * specified domain (may be DOMID_SELF). Returns -ve errcode on failure.
+ * arg == addr of struct xen_memory_domain.
+ */
+#define XENMEM_current_reservation  3
+#define XENMEM_maximum_reservation  4
+
+/*
+ * Returns the maximum GFN in use by the specified domain (may be DOMID_SELF).
+ * Returns -ve errcode on failure.
+ * arg == addr of struct xen_memory_domain.
+ */
+#define XENMEM_maximum_gpfn         14
+
+/*
+ * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table do not implement
+ * this command.
+ * arg == addr of xen_machphys_mfn_list_t.
+ */
+#define XENMEM_machphys_mfn_list    5
+struct xen_machphys_mfn_list {
+    /*
+     * Size of the 'extent_start' array. Fewer entries will be filled if the
+     * machphys table is smaller than max_extents * 2MB.
+     */
+    unsigned int max_extents;
+
+    /*
+     * Pointer to buffer to fill with list of extent starts. If there are
+     * any large discontiguities in the machine address space, 2MB gaps in
+     * the machphys table will be represented by an MFN base of zero.
+     */
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
+
+    /*
+     * Number of extents written to the above array. This will be smaller
+     * than 'max_extents' if the machphys table is smaller than max_e * 2MB.
+     */
+    unsigned int nr_extents;
+};
+typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
+
+/*
+ * For a compat caller, this is identical to XENMEM_machphys_mfn_list.
+ *
+ * For a non compat caller, this functions similarly to
+ * XENMEM_machphys_mfn_list, but returns the mfns making up the compatibility
+ * m2p table.
+ */
+#define XENMEM_machphys_compat_mfn_list     25
+
+/*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping     12
+struct xen_machphys_mapping {
+    xen_ulong_t v_start, v_end; /* Start and end virtual addresses.   */
+    xen_ulong_t max_mfn;        /* Maximum MFN that can be looked up. */
+};
+typedef struct xen_machphys_mapping xen_machphys_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
+
+/* Source mapping space. */
+/* ` enum phys_map_space { */
+#define XENMAPSPACE_shared_info  0 /* shared info page */
+#define XENMAPSPACE_grant_table  1 /* grant table page */
+#define XENMAPSPACE_gmfn         2 /* GMFN */
+#define XENMAPSPACE_gmfn_range   3 /* GMFN range, XENMEM_add_to_physmap only. */
+#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom,
+                                    * XENMEM_add_to_physmap_batch only. */
+#define XENMAPSPACE_dev_mmio     5 /* device mmio region
+                                      ARM only; the region is mapped in
+                                      Stage-2 using the Normal Memory
+                                      Inner/Outer Write-Back Cacheable
+                                      memory attribute. */
+/* ` } */
+
+/*
+ * Sets the GPFN at which a particular page appears in the specified guest's
+ * physical address space (translated guests only).
+ * arg == addr of xen_add_to_physmap_t.
+ */
+#define XENMEM_add_to_physmap      7
+struct xen_add_to_physmap {
+    /* Which domain to change the mapping for. */
+    domid_t domid;
+
+    /* Number of pages to go through for gmfn_range */
+    uint16_t    size;
+
+    unsigned int space; /* => enum phys_map_space */
+
+#define XENMAPIDX_grant_table_status 0x80000000
+
+    /* Index into space being mapped. */
+    xen_ulong_t idx;
+
+    /* GPFN in domid where the source mapping page should appear. */
+    xen_pfn_t     gpfn;
+};
+typedef struct xen_add_to_physmap xen_add_to_physmap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
+
+/* A batched version of add_to_physmap. */
+#define XENMEM_add_to_physmap_batch 23
+struct xen_add_to_physmap_batch {
+    /* IN */
+    /* Which domain to change the mapping for. */
+    domid_t domid;
+    uint16_t space; /* => enum phys_map_space */
+
+    /* Number of pages to go through */
+    uint16_t size;
+
+#if __XEN_INTERFACE_VERSION__ < 0x00040700
+    domid_t foreign_domid; /* IFF gmfn_foreign. Should be 0 for other spaces. */
+#else
+    union xen_add_to_physmap_batch_extra {
+        domid_t foreign_domid; /* gmfn_foreign */
+        uint16_t res0;  /* All the other spaces. Should be 0 */
+    } u;
+#endif
+
+    /* Indexes into space being mapped. */
+    XEN_GUEST_HANDLE(xen_ulong_t) idxs;
+
+    /* GPFN in domid where the source mapping page should appear. */
+    XEN_GUEST_HANDLE(xen_pfn_t) gpfns;
+
+    /* OUT */
+
+    /* Per index error code. */
+    XEN_GUEST_HANDLE(int) errs;
+};
+typedef struct xen_add_to_physmap_batch xen_add_to_physmap_batch_t;
+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_batch_t);
+
+#if __XEN_INTERFACE_VERSION__ < 0x00040400
+#define XENMEM_add_to_physmap_range XENMEM_add_to_physmap_batch
+#define xen_add_to_physmap_range xen_add_to_physmap_batch
+typedef struct xen_add_to_physmap_batch xen_add_to_physmap_range_t;
+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_range_t);
+#endif
+
+/*
+ * Unmaps the page appearing at a particular GPFN from the specified guest's
+ * physical address space (translated guests only).
+ * arg == addr of xen_remove_from_physmap_t.
+ */
+#define XENMEM_remove_from_physmap      15
+struct xen_remove_from_physmap {
+    /* Which domain to change the mapping for. */
+    domid_t domid;
+
+    /* GPFN of the current mapping of the page. */
+    xen_pfn_t     gpfn;
+};
+typedef struct xen_remove_from_physmap xen_remove_from_physmap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t);
+
+/*** REMOVED ***/
+/*#define XENMEM_translate_gpfn_list  8*/
+
+/*
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started (specified by XENMEM_set_memory_map).
+ * arg == addr of xen_memory_map_t.
+ */
+#define XENMEM_memory_map           9
+struct xen_memory_map {
+    /*
+     * On call the number of entries which can be stored in buffer. On
+     * return the number of entries which have been stored in
+     * buffer.
+     */
+    unsigned int nr_entries;
+
+    /*
+     * Entries in the buffer are in the same format as returned by the
+     * BIOS INT 0x15 EAX=0xE820 call.
+     */
+    XEN_GUEST_HANDLE(void) buffer;
+};
+typedef struct xen_memory_map xen_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ * Specifying buffer as NULL will return the number of entries required
+ * to store the complete memory map.
+ * arg == addr of xen_memory_map_t.
+ */
+#define XENMEM_machine_memory_map   10
+
+/*
+ * Set the pseudo-physical memory map of a domain, as returned by
+ * XENMEM_memory_map.
+ * arg == addr of xen_foreign_memory_map_t.
+ */
+#define XENMEM_set_memory_map       13
+struct xen_foreign_memory_map {
+    domid_t domid;
+    struct xen_memory_map map;
+};
+typedef struct xen_foreign_memory_map xen_foreign_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t);
+
+#define XENMEM_set_pod_target       16
+#define XENMEM_get_pod_target       17
+struct xen_pod_target {
+    /* IN */
+    uint64_t target_pages;
+    /* OUT */
+    uint64_t tot_pages;
+    uint64_t pod_cache_pages;
+    uint64_t pod_entries;
+    /* IN */
+    domid_t domid;
+};
+typedef struct xen_pod_target xen_pod_target_t;
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+#ifndef uint64_aligned_t
+#define uint64_aligned_t uint64_t
+#endif
+
+/*
+ * Get the number of MFNs saved through memory sharing.
+ * The call never fails.
+ */
+#define XENMEM_get_sharing_freed_pages    18
+#define XENMEM_get_sharing_shared_pages   19
+
+#define XENMEM_paging_op                    20
+#define XENMEM_paging_op_nominate           0
+#define XENMEM_paging_op_evict              1
+#define XENMEM_paging_op_prep               2
+
+struct xen_mem_paging_op {
+    uint8_t     op;         /* XENMEM_paging_op_* */
+    domid_t     domain;
+
+    /* IN: (XENMEM_paging_op_prep) buffer to immediately fill page from */
+    XEN_GUEST_HANDLE_64(const_uint8) buffer;
+    /* IN:  gfn of page being operated on */
+    uint64_aligned_t    gfn;
+};
+typedef struct xen_mem_paging_op xen_mem_paging_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mem_paging_op_t);
+
+#define XENMEM_access_op                    21
+#define XENMEM_access_op_set_access         0
+#define XENMEM_access_op_get_access         1
+/*
+ * XENMEM_access_op_enable_emulate and XENMEM_access_op_disable_emulate are
+ * currently unused, but since they have been in use please do not reuse them.
+ *
+ * #define XENMEM_access_op_enable_emulate     2
+ * #define XENMEM_access_op_disable_emulate    3
+ */
+#define XENMEM_access_op_set_access_multi   4
+
+typedef enum {
+    XENMEM_access_n,
+    XENMEM_access_r,
+    XENMEM_access_w,
+    XENMEM_access_rw,
+    XENMEM_access_x,
+    XENMEM_access_rx,
+    XENMEM_access_wx,
+    XENMEM_access_rwx,
+    /*
+     * Page starts off as r-x, but automatically
+     * change to r-w on a write
+     */
+    XENMEM_access_rx2rw,
+    /*
+     * Log access: starts off as n, automatically
+     * goes to rwx, generating an event without
+     * pausing the vcpu
+     */
+    XENMEM_access_n2rwx,
+    /* Take the domain default */
+    XENMEM_access_default
+} xenmem_access_t;
+
+struct xen_mem_access_op {
+    /* XENMEM_access_op_* */
+    uint8_t op;
+    /* xenmem_access_t */
+    uint8_t access;
+    domid_t domid;
+    /*
+     * Number of pages for set op (or size of pfn_list for
+     * XENMEM_access_op_set_access_multi)
+     * Ignored on setting default access and other ops
+     */
+    uint32_t nr;
+    /*
+     * First pfn for set op
+     * pfn for get op
+     * ~0ull is used to set and get the default access for pages
+     */
+    uint64_aligned_t pfn;
+    /*
+     * List of pfns to set access for
+     * Used only with XENMEM_access_op_set_access_multi
+     */
+    XEN_GUEST_HANDLE(const_uint64) pfn_list;
+    /*
+     * Corresponding list of access settings for pfn_list
+     * Used only with XENMEM_access_op_set_access_multi
+     */
+    XEN_GUEST_HANDLE(const_uint8) access_list;
+};
+typedef struct xen_mem_access_op xen_mem_access_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t);
+
+#define XENMEM_sharing_op                   22
+#define XENMEM_sharing_op_nominate_gfn      0
+#define XENMEM_sharing_op_nominate_gref     1
+#define XENMEM_sharing_op_share             2
+#define XENMEM_sharing_op_debug_gfn         3
+#define XENMEM_sharing_op_debug_mfn         4
+#define XENMEM_sharing_op_debug_gref        5
+#define XENMEM_sharing_op_add_physmap       6
+#define XENMEM_sharing_op_audit             7
+#define XENMEM_sharing_op_range_share       8
+#define XENMEM_sharing_op_fork              9
+#define XENMEM_sharing_op_fork_reset        10
+
+#define XENMEM_SHARING_OP_S_HANDLE_INVALID  (-10)
+#define XENMEM_SHARING_OP_C_HANDLE_INVALID  (-9)
+
+/* The following allows sharing of grant refs. This is useful
+ * for sharing utilities sitting as "filters" in IO backends
+ * (e.g. memshr + blktap(2)). The IO backend is only exposed
+ * to grant references, and this allows sharing of the grefs */
+#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG   (xen_mk_ullong(1) << 62)
+
+#define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val)  \
+    (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val)
+#define XENMEM_SHARING_OP_FIELD_IS_GREF(field)         \
+    ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG)
+#define XENMEM_SHARING_OP_FIELD_GET_GREF(field)        \
+    ((field) & (~XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG))
+
+struct xen_mem_sharing_op {
+    uint8_t     op;     /* XENMEM_sharing_op_* */
+    domid_t     domain;
+
+    union {
+        struct mem_sharing_op_nominate {  /* OP_NOMINATE_xxx           */
+            union {
+                uint64_aligned_t gfn;     /* IN: gfn to nominate       */
+                uint32_t      grant_ref;  /* IN: grant ref to nominate */
+            } u;
+            uint64_aligned_t  handle;     /* OUT: the handle           */
+        } nominate;
+        struct mem_sharing_op_share {     /* OP_SHARE/ADD_PHYSMAP */
+            uint64_aligned_t source_gfn;    /* IN: the gfn of the source page */
+            uint64_aligned_t source_handle; /* IN: handle to the source page */
+            uint64_aligned_t client_gfn;    /* IN: the client gfn */
+            uint64_aligned_t client_handle; /* IN: handle to the client page */
+            domid_t  client_domain; /* IN: the client domain id */
+        } share;
+        struct mem_sharing_op_range {         /* OP_RANGE_SHARE */
+            uint64_aligned_t first_gfn;      /* IN: the first gfn */
+            uint64_aligned_t last_gfn;       /* IN: the last gfn */
+            uint64_aligned_t opaque;         /* Must be set to 0 */
+            domid_t client_domain;           /* IN: the client domain id */
+            uint16_t _pad[3];                /* Must be set to 0 */
+        } range;
+        struct mem_sharing_op_debug {     /* OP_DEBUG_xxx */
+            union {
+                uint64_aligned_t gfn;      /* IN: gfn to debug          */
+                uint64_aligned_t mfn;      /* IN: mfn to debug          */
+                uint32_t gref;     /* IN: gref to debug         */
+            } u;
+        } debug;
+        struct mem_sharing_op_fork {      /* OP_FORK{,_RESET} */
+            domid_t parent_domain;        /* IN: parent's domain id */
+/* Only makes sense for short-lived forks */
+#define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
+/* Only makes sense for short-lived forks */
+#define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
+#define XENMEM_FORK_RESET_STATE        (1u << 2)
+#define XENMEM_FORK_RESET_MEMORY       (1u << 3)
+            uint16_t flags;               /* IN: optional settings */
+            uint32_t pad;                 /* Must be set to 0 */
+        } fork;
+    } u;
+};
+typedef struct xen_mem_sharing_op xen_mem_sharing_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
+
+/*
+ * Attempt to stake a claim for a domain on a quantity of pages
+ * of system RAM, but _not_ assign specific pageframes.  Only
+ * arithmetic is performed so the hypercall is very fast and need
+ * not be preemptible, thus sidestepping time-of-check-time-of-use
+ * races for memory allocation.  Returns 0 if the hypervisor page
+ * allocator has atomically and successfully claimed the requested
+ * number of pages, else non-zero.
+ *
+ * Any domain may have only one active claim.  When sufficient memory
+ * has been allocated to resolve the claim, the claim silently expires.
+ * Claiming zero pages effectively resets any outstanding claim and
+ * is always successful.
+ *
+ * Note that a valid claim may be staked even after memory has been
+ * allocated for a domain.  In this case, the claim is not incremental,
+ * i.e. if the domain's total page count is 3, and a claim is staked
+ * for 10, only 7 additional pages are claimed.
+ *
+ * Caller must be privileged or the hypercall fails.
+ */
+#define XENMEM_claim_pages                  24
+
+/*
+ * XENMEM_claim_pages flags - the are no flags at this time.
+ * The zero value is appropriate.
+ */
+
+/*
+ * With some legacy devices, certain guest-physical addresses cannot safely
+ * be used for other purposes, e.g. to map guest RAM.  This hypercall
+ * enumerates those regions so the toolstack can avoid using them.
+ */
+#define XENMEM_reserved_device_memory_map   27
+struct xen_reserved_device_memory {
+    xen_pfn_t start_pfn;
+    xen_ulong_t nr_pages;
+};
+typedef struct xen_reserved_device_memory xen_reserved_device_memory_t;
+DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_t);
+
+struct xen_reserved_device_memory_map {
+#define XENMEM_RDM_ALL 1 /* Request all regions (ignore dev union). */
+    /* IN */
+    uint32_t flags;
+    /*
+     * IN/OUT
+     *
+     * Gets set to the required number of entries when too low,
+     * signaled by error code -ERANGE.
+     */
+    unsigned int nr_entries;
+    /* OUT */
+    XEN_GUEST_HANDLE(xen_reserved_device_memory_t) buffer;
+    /* IN */
+    union {
+        physdev_pci_device_t pci;
+    } dev;
+};
+typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
+/*
+ * Get the pages for a particular guest resource, so that they can be
+ * mapped directly by a tools domain.
+ */
+#define XENMEM_acquire_resource 28
+struct xen_mem_acquire_resource {
+    /* IN - The domain whose resource is to be mapped */
+    domid_t domid;
+    /* IN - the type of resource */
+    uint16_t type;
+
+#define XENMEM_resource_ioreq_server 0
+#define XENMEM_resource_grant_table 1
+#define XENMEM_resource_vmtrace_buf 2
+
+    /*
+     * IN - a type-specific resource identifier, which must be zero
+     *      unless stated otherwise.
+     *
+     * type == XENMEM_resource_ioreq_server -> id == ioreq server id
+     * type == XENMEM_resource_grant_table -> id defined below
+     */
+    uint32_t id;
+
+#define XENMEM_resource_grant_table_id_shared 0
+#define XENMEM_resource_grant_table_id_status 1
+
+    /*
+     * IN/OUT
+     *
+     * As an IN parameter number of frames of the resource to be mapped.
+     * This value may be updated over the course of the operation.
+     *
+     * When frame_list is NULL and nr_frames is 0, this is interpreted as a
+     * request for the size of the resource, which shall be returned in the
+     * nr_frames field.
+     *
+     * The size of a resource will never be zero, but a nonzero result doesn't
+     * guarantee that a subsequent mapping request will be successful.  There
+     * are further type/id specific constraints which may change between the
+     * two calls.
+     */
+    uint32_t nr_frames;
+    /*
+     * Padding field, must be zero on input.
+     * In a previous version this was an output field with the lowest bit
+     * named XENMEM_rsrc_acq_caller_owned. Future versions of this interface
+     * will not reuse this bit as an output with the field being zero on
+     * input.
+     */
+    uint32_t pad;
+    /*
+     * IN - the index of the initial frame to be mapped. This parameter
+     *      is ignored if nr_frames is 0.  This value may be updated
+     *      over the course of the operation.
+     */
+    uint64_t frame;
+
+#define XENMEM_resource_ioreq_server_frame_bufioreq 0
+#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))
+
+    /*
+     * IN/OUT - If the tools domain is PV then, upon return, frame_list
+     *          will be populated with the MFNs of the resource.
+     *          If the tools domain is HVM then it is expected that, on
+     *          entry, frame_list will be populated with a list of GFNs
+     *          that will be mapped to the MFNs of the resource.
+     *          If -EIO is returned then the frame_list has only been
+     *          partially mapped and it is up to the caller to unmap all
+     *          the GFNs.
+     *          This parameter may be NULL if nr_frames is 0.  This
+     *          value may be updated over the course of the operation.
+     */
+    XEN_GUEST_HANDLE(xen_pfn_t) frame_list;
+};
+typedef struct xen_mem_acquire_resource xen_mem_acquire_resource_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mem_acquire_resource_t);
+
+/*
+ * XENMEM_get_vnumainfo used by guest to get
+ * vNUMA topology from hypervisor.
+ */
+#define XENMEM_get_vnumainfo                26
+
+/* vNUMA node memory ranges */
+struct xen_vmemrange {
+    uint64_t start, end;
+    unsigned int flags;
+    unsigned int nid;
+};
+typedef struct xen_vmemrange xen_vmemrange_t;
+DEFINE_XEN_GUEST_HANDLE(xen_vmemrange_t);
+
+/*
+ * vNUMA topology specifies vNUMA node number, distance table,
+ * memory ranges and vcpu mapping provided for guests.
+ * XENMEM_get_vnumainfo hypercall expects to see from guest
+ * nr_vnodes, nr_vmemranges and nr_vcpus to indicate available memory.
+ * After filling guests structures, nr_vnodes, nr_vmemranges and nr_vcpus
+ * copied back to guest. Domain returns expected values of nr_vnodes,
+ * nr_vmemranges and nr_vcpus to guest if the values where incorrect.
+ */
+struct xen_vnuma_topology_info {
+    /* IN */
+    domid_t domid;
+    uint16_t pad;
+    /* IN/OUT */
+    unsigned int nr_vnodes;
+    unsigned int nr_vcpus;
+    unsigned int nr_vmemranges;
+    /* OUT */
+    union {
+        XEN_GUEST_HANDLE(uint) h;
+        uint64_t pad;
+    } vdistance;
+    union {
+        XEN_GUEST_HANDLE(uint) h;
+        uint64_t pad;
+    } vcpu_to_vnode;
+    union {
+        XEN_GUEST_HANDLE(xen_vmemrange_t) h;
+        uint64_t pad;
+    } vmemrange;
+};
+typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
+DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
+
+/* Next available subop number is 29 */
+
+#endif /* __XEN_PUBLIC_MEMORY_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/physdev.h b/include/hw/xen/interface/physdev.h
new file mode 100644 (file)
index 0000000..f0c0d47
--- /dev/null
@@ -0,0 +1,366 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (c) 2006, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_PHYSDEV_H__
+#define __XEN_PUBLIC_PHYSDEV_H__
+
+#include "xen.h"
+
+/*
+ * Prototype for this hypercall is:
+ *  int physdev_op(int cmd, void *args)
+ * @cmd  == PHYSDEVOP_??? (physdev operation).
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Notify end-of-interrupt (EOI) for the specified IRQ.
+ * @arg == pointer to physdev_eoi structure.
+ */
+#define PHYSDEVOP_eoi                   12
+struct physdev_eoi {
+    /* IN */
+    uint32_t irq;
+};
+typedef struct physdev_eoi physdev_eoi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
+
+/*
+ * Register a shared page for the hypervisor to indicate whether the guest
+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
+ * once the guest used this function in that the associated event channel
+ * will automatically get unmasked. The page registered is used as a bit
+ * array indexed by Xen's PIRQ value.
+ */
+#define PHYSDEVOP_pirq_eoi_gmfn_v1       17
+/*
+ * Register a shared page for the hypervisor to indicate whether the
+ * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to
+ * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of
+ * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by
+ * Xen's PIRQ value.
+ */
+#define PHYSDEVOP_pirq_eoi_gmfn_v2       28
+struct physdev_pirq_eoi_gmfn {
+    /* IN */
+    xen_pfn_t gmfn;
+};
+typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t);
+
+/*
+ * Query the status of an IRQ line.
+ * @arg == pointer to physdev_irq_status_query structure.
+ */
+#define PHYSDEVOP_irq_status_query       5
+struct physdev_irq_status_query {
+    /* IN */
+    uint32_t irq;
+    /* OUT */
+    uint32_t flags; /* XENIRQSTAT_* */
+};
+typedef struct physdev_irq_status_query physdev_irq_status_query_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t);
+
+/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */
+#define _XENIRQSTAT_needs_eoi   (0)
+#define  XENIRQSTAT_needs_eoi   (1U<<_XENIRQSTAT_needs_eoi)
+
+/* IRQ shared by multiple guests? */
+#define _XENIRQSTAT_shared      (1)
+#define  XENIRQSTAT_shared      (1U<<_XENIRQSTAT_shared)
+
+/*
+ * Set the current VCPU's I/O privilege level.
+ * @arg == pointer to physdev_set_iopl structure.
+ */
+#define PHYSDEVOP_set_iopl               6
+struct physdev_set_iopl {
+    /* IN */
+    uint32_t iopl;
+};
+typedef struct physdev_set_iopl physdev_set_iopl_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t);
+
+/*
+ * Set the current VCPU's I/O-port permissions bitmap.
+ * @arg == pointer to physdev_set_iobitmap structure.
+ */
+#define PHYSDEVOP_set_iobitmap           7
+struct physdev_set_iobitmap {
+    /* IN */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
+    XEN_GUEST_HANDLE(uint8) bitmap;
+#else
+    uint8_t *bitmap;
+#endif
+    uint32_t nr_ports;
+};
+typedef struct physdev_set_iobitmap physdev_set_iobitmap_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t);
+
+/*
+ * Read or write an IO-APIC register.
+ * @arg == pointer to physdev_apic structure.
+ */
+#define PHYSDEVOP_apic_read              8
+#define PHYSDEVOP_apic_write             9
+struct physdev_apic {
+    /* IN */
+    unsigned long apic_physbase;
+    uint32_t reg;
+    /* IN or OUT */
+    uint32_t value;
+};
+typedef struct physdev_apic physdev_apic_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);
+
+/*
+ * Allocate or free a physical upcall vector for the specified IRQ line.
+ * @arg == pointer to physdev_irq structure.
+ */
+#define PHYSDEVOP_alloc_irq_vector      10
+#define PHYSDEVOP_free_irq_vector       11
+struct physdev_irq {
+    /* IN */
+    uint32_t irq;
+    /* IN or OUT */
+    uint32_t vector;
+};
+typedef struct physdev_irq physdev_irq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
+
+#define MAP_PIRQ_TYPE_MSI               0x0
+#define MAP_PIRQ_TYPE_GSI               0x1
+#define MAP_PIRQ_TYPE_UNKNOWN           0x2
+#define MAP_PIRQ_TYPE_MSI_SEG           0x3
+#define MAP_PIRQ_TYPE_MULTI_MSI         0x4
+
+#define PHYSDEVOP_map_pirq               13
+struct physdev_map_pirq {
+    domid_t domid;
+    /* IN */
+    int type;
+    /* IN (ignored for ..._MULTI_MSI) */
+    int index;
+    /* IN or OUT */
+    int pirq;
+    /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
+    int bus;
+    /* IN */
+    int devfn;
+    /* IN (also OUT for ..._MULTI_MSI) */
+    int entry_nr;
+    /* IN */
+    uint64_t table_base;
+};
+typedef struct physdev_map_pirq physdev_map_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t);
+
+#define PHYSDEVOP_unmap_pirq             14
+struct physdev_unmap_pirq {
+    domid_t domid;
+    /* IN */
+    int pirq;
+};
+
+typedef struct physdev_unmap_pirq physdev_unmap_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t);
+
+#define PHYSDEVOP_manage_pci_add         15
+#define PHYSDEVOP_manage_pci_remove      16
+struct physdev_manage_pci {
+    /* IN */
+    uint8_t bus;
+    uint8_t devfn;
+};
+
+typedef struct physdev_manage_pci physdev_manage_pci_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t);
+
+#define PHYSDEVOP_restore_msi            19
+struct physdev_restore_msi {
+    /* IN */
+    uint8_t bus;
+    uint8_t devfn;
+};
+typedef struct physdev_restore_msi physdev_restore_msi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t);
+
+#define PHYSDEVOP_manage_pci_add_ext     20
+struct physdev_manage_pci_ext {
+    /* IN */
+    uint8_t bus;
+    uint8_t devfn;
+    uint32_t is_extfn;
+    uint32_t is_virtfn;
+    struct {
+        uint8_t bus;
+        uint8_t devfn;
+    } physfn;
+};
+
+typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t);
+
+/*
+ * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
+ * hypercall since 0x00030202.
+ */
+struct physdev_op {
+    uint32_t cmd;
+    union {
+        physdev_irq_status_query_t irq_status_query;
+        physdev_set_iopl_t         set_iopl;
+        physdev_set_iobitmap_t     set_iobitmap;
+        physdev_apic_t             apic_op;
+        physdev_irq_t              irq_op;
+    } u;
+};
+typedef struct physdev_op physdev_op_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_op_t);
+
+#define PHYSDEVOP_setup_gsi    21
+struct physdev_setup_gsi {
+    int gsi;
+    /* IN */
+    uint8_t triggering;
+    /* IN */
+    uint8_t polarity;
+    /* IN */
+};
+
+typedef struct physdev_setup_gsi physdev_setup_gsi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t);
+
+/* leave PHYSDEVOP 22 free */
+
+/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI
+ * the hypercall returns a free pirq */
+#define PHYSDEVOP_get_free_pirq    23
+struct physdev_get_free_pirq {
+    /* IN */
+    int type;
+    /* OUT */
+    uint32_t pirq;
+};
+
+typedef struct physdev_get_free_pirq physdev_get_free_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t);
+
+#define XEN_PCI_MMCFG_RESERVED         0x1
+
+#define PHYSDEVOP_pci_mmcfg_reserved    24
+struct physdev_pci_mmcfg_reserved {
+    uint64_t address;
+    uint16_t segment;
+    uint8_t start_bus;
+    uint8_t end_bus;
+    uint32_t flags;
+};
+typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t);
+
+#define XEN_PCI_DEV_EXTFN              0x1
+#define XEN_PCI_DEV_VIRTFN             0x2
+#define XEN_PCI_DEV_PXM                0x4
+
+#define PHYSDEVOP_pci_device_add        25
+struct physdev_pci_device_add {
+    /* IN */
+    uint16_t seg;
+    uint8_t bus;
+    uint8_t devfn;
+    uint32_t flags;
+    struct {
+        uint8_t bus;
+        uint8_t devfn;
+    } physfn;
+    /*
+     * Optional parameters array.
+     * First element ([0]) is PXM domain associated with the device (if
+     * XEN_PCI_DEV_PXM is set)
+     */
+    uint32_t optarr[XEN_FLEX_ARRAY_DIM];
+};
+typedef struct physdev_pci_device_add physdev_pci_device_add_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t);
+
+#define PHYSDEVOP_pci_device_remove     26
+#define PHYSDEVOP_restore_msi_ext       27
+/*
+ * Dom0 should use these two to announce MMIO resources assigned to
+ * MSI-X capable devices won't (prepare) or may (release) change.
+ */
+#define PHYSDEVOP_prepare_msix          30
+#define PHYSDEVOP_release_msix          31
+struct physdev_pci_device {
+    /* IN */
+    uint16_t seg;
+    uint8_t bus;
+    uint8_t devfn;
+};
+typedef struct physdev_pci_device physdev_pci_device_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t);
+
+#define PHYSDEVOP_DBGP_RESET_PREPARE    1
+#define PHYSDEVOP_DBGP_RESET_DONE       2
+
+#define PHYSDEVOP_DBGP_BUS_UNKNOWN      0
+#define PHYSDEVOP_DBGP_BUS_PCI          1
+
+#define PHYSDEVOP_dbgp_op               29
+struct physdev_dbgp_op {
+    /* IN */
+    uint8_t op;
+    uint8_t bus;
+    union {
+        physdev_pci_device_t pci;
+    } u;
+};
+typedef struct physdev_dbgp_op physdev_dbgp_op_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t);
+
+/*
+ * Notify that some PIRQ-bound event channels have been unmasked.
+ * ** This command is obsolete since interface version 0x00030202 and is **
+ * ** unsupported by newer versions of Xen.                              **
+ */
+#define PHYSDEVOP_IRQ_UNMASK_NOTIFY      4
+
+#if __XEN_INTERFACE_VERSION__ < 0x00040600
+/*
+ * These all-capitals physdev operation names are superceded by the new names
+ * (defined above) since interface version 0x00030202. The guard above was
+ * added post-4.5 only though and hence shouldn't check for 0x00030202.
+ */
+#define PHYSDEVOP_IRQ_STATUS_QUERY       PHYSDEVOP_irq_status_query
+#define PHYSDEVOP_SET_IOPL               PHYSDEVOP_set_iopl
+#define PHYSDEVOP_SET_IOBITMAP           PHYSDEVOP_set_iobitmap
+#define PHYSDEVOP_APIC_READ              PHYSDEVOP_apic_read
+#define PHYSDEVOP_APIC_WRITE             PHYSDEVOP_apic_write
+#define PHYSDEVOP_ASSIGN_VECTOR          PHYSDEVOP_alloc_irq_vector
+#define PHYSDEVOP_FREE_VECTOR            PHYSDEVOP_free_irq_vector
+#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi
+#define PHYSDEVOP_IRQ_SHARED             XENIRQSTAT_shared
+#endif
+
+#if __XEN_INTERFACE_VERSION__ < 0x00040200
+#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v1
+#else
+#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v2
+#endif
+
+#endif /* __XEN_PUBLIC_PHYSDEV_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/sched.h b/include/hw/xen/interface/sched.h
new file mode 100644 (file)
index 0000000..b4362c6
--- /dev/null
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * sched.h
+ *
+ * Scheduler state interactions
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_SCHED_H__
+#define __XEN_PUBLIC_SCHED_H__
+
+#include "event_channel.h"
+
+/*
+ * `incontents 150 sched Guest Scheduler Operations
+ *
+ * The SCHEDOP interface provides mechanisms for a guest to interact
+ * with the scheduler, including yield, blocking and shutting itself
+ * down.
+ */
+
+/*
+ * The prototype for this hypercall is:
+ * ` long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...)
+ *
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == Operation-specific extra argument(s), as described below.
+ * ...  == Additional Operation-specific extra arguments, described below.
+ *
+ * Versions of Xen prior to 3.0.2 provided only the following legacy version
+ * of this hypercall, supporting only the commands yield, block and shutdown:
+ *  long sched_op(int cmd, unsigned long arg)
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == 0               (SCHEDOP_yield and SCHEDOP_block)
+ *      == SHUTDOWN_* code (SCHEDOP_shutdown)
+ *
+ * This legacy version is available to new guests as:
+ * ` long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg)
+ */
+
+/* ` enum sched_op { // SCHEDOP_* => struct sched_* */
+/*
+ * Voluntarily yield the CPU.
+ * @arg == NULL.
+ */
+#define SCHEDOP_yield       0
+
+/*
+ * Block execution of this VCPU until an event is received for processing.
+ * If called with event upcalls masked, this operation will atomically
+ * reenable event delivery and check for pending events before blocking the
+ * VCPU. This avoids a "wakeup waiting" race.
+ * @arg == NULL.
+ */
+#define SCHEDOP_block       1
+
+/*
+ * Halt execution of this domain (all VCPUs) and notify the system controller.
+ * @arg == pointer to sched_shutdown_t structure.
+ *
+ * If the sched_shutdown_t reason is SHUTDOWN_suspend then
+ * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN
+ * of the guest's start info page.  RDX/EDX is the third hypercall
+ * argument.
+ *
+ * In addition, which reason is SHUTDOWN_suspend this hypercall
+ * returns 1 if suspend was cancelled or the domain was merely
+ * checkpointed, and 0 if it is resuming in a new domain.
+ */
+#define SCHEDOP_shutdown    2
+
+/*
+ * Poll a set of event-channel ports. Return when one or more are pending. An
+ * optional timeout may be specified.
+ * @arg == pointer to sched_poll_t structure.
+ */
+#define SCHEDOP_poll        3
+
+/*
+ * Declare a shutdown for another domain. The main use of this function is
+ * in interpreting shutdown requests and reasons for fully-virtualized
+ * domains.  A para-virtualized domain may use SCHEDOP_shutdown directly.
+ * @arg == pointer to sched_remote_shutdown_t structure.
+ */
+#define SCHEDOP_remote_shutdown        4
+
+/*
+ * Latch a shutdown code, so that when the domain later shuts down it
+ * reports this code to the control tools.
+ * @arg == sched_shutdown_t, as for SCHEDOP_shutdown.
+ */
+#define SCHEDOP_shutdown_code 5
+
+/*
+ * Setup, poke and destroy a domain watchdog timer.
+ * @arg == pointer to sched_watchdog_t structure.
+ * With id == 0, setup a domain watchdog timer to cause domain shutdown
+ *               after timeout, returns watchdog id.
+ * With id != 0 and timeout == 0, destroy domain watchdog timer.
+ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
+ */
+#define SCHEDOP_watchdog    6
+
+/*
+ * Override the current vcpu affinity by pinning it to one physical cpu or
+ * undo this override restoring the previous affinity.
+ * @arg == pointer to sched_pin_override_t structure.
+ *
+ * A negative pcpu value will undo a previous pin override and restore the
+ * previous cpu affinity.
+ * This call is allowed for the hardware domain only and requires the cpu
+ * to be part of the domain's cpupool.
+ */
+#define SCHEDOP_pin_override 7
+/* ` } */
+
+struct sched_shutdown {
+    unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */
+};
+typedef struct sched_shutdown sched_shutdown_t;
+DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);
+
+struct sched_poll {
+    XEN_GUEST_HANDLE(evtchn_port_t) ports;
+    unsigned int nr_ports;
+    uint64_t timeout;
+};
+typedef struct sched_poll sched_poll_t;
+DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
+
+struct sched_remote_shutdown {
+    domid_t domain_id;         /* Remote domain ID */
+    unsigned int reason;       /* SHUTDOWN_* => enum sched_shutdown_reason */
+};
+typedef struct sched_remote_shutdown sched_remote_shutdown_t;
+DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
+
+struct sched_watchdog {
+    uint32_t id;                /* watchdog ID */
+    uint32_t timeout;           /* timeout */
+};
+typedef struct sched_watchdog sched_watchdog_t;
+DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t);
+
+struct sched_pin_override {
+    int32_t pcpu;
+};
+typedef struct sched_pin_override sched_pin_override_t;
+DEFINE_XEN_GUEST_HANDLE(sched_pin_override_t);
+
+/*
+ * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
+ * software to determine the appropriate action. For the most part, Xen does
+ * not care about the shutdown code.
+ */
+/* ` enum sched_shutdown_reason { */
+#define SHUTDOWN_poweroff   0  /* Domain exited normally. Clean up and kill. */
+#define SHUTDOWN_reboot     1  /* Clean up, kill, and then restart.          */
+#define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */
+#define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */
+#define SHUTDOWN_watchdog   4  /* Restart because watchdog time expired.     */
+
+/*
+ * Domain asked to perform 'soft reset' for it. The expected behavior is to
+ * reset internal Xen state for the domain returning it to the point where it
+ * was created but leaving the domain's memory contents and vCPU contexts
+ * intact. This will allow the domain to start over and set up all Xen specific
+ * interfaces again.
+ */
+#define SHUTDOWN_soft_reset 5
+#define SHUTDOWN_MAX        5  /* Maximum valid shutdown reason.             */
+/* ` } */
+
+#endif /* __XEN_PUBLIC_SCHED_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/trace.h b/include/hw/xen/interface/trace.h
new file mode 100644 (file)
index 0000000..62a1799
--- /dev/null
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * include/public/trace.h
+ *
+ * Mark Williamson, (C) 2004 Intel Research Cambridge
+ * Copyright (C) 2005 Bin Ren
+ */
+
+#ifndef __XEN_PUBLIC_TRACE_H__
+#define __XEN_PUBLIC_TRACE_H__
+
+#define TRACE_EXTRA_MAX    7
+#define TRACE_EXTRA_SHIFT 28
+
+/* Trace classes */
+#define TRC_CLS_SHIFT 16
+#define TRC_GEN      0x0001f000    /* General trace            */
+#define TRC_SCHED    0x0002f000    /* Xen Scheduler trace      */
+#define TRC_DOM0OP   0x0004f000    /* Xen DOM0 operation trace */
+#define TRC_HVM      0x0008f000    /* Xen HVM trace            */
+#define TRC_MEM      0x0010f000    /* Xen memory trace         */
+#define TRC_PV       0x0020f000    /* Xen PV traces            */
+#define TRC_SHADOW   0x0040f000    /* Xen shadow tracing       */
+#define TRC_HW       0x0080f000    /* Xen hardware-related traces */
+#define TRC_GUEST    0x0800f000    /* Guest-generated traces   */
+#define TRC_ALL      0x0ffff000
+#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
+#define TRC_HD_CYCLE_FLAG (1UL<<31)
+#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) )
+#define TRC_HD_EXTRA(x)    (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX)
+
+/* Trace subclasses */
+#define TRC_SUBCLS_SHIFT 12
+
+/* trace subclasses for SVM */
+#define TRC_HVM_ENTRYEXIT   0x00081000   /* VMENTRY and #VMEXIT       */
+#define TRC_HVM_HANDLER     0x00082000   /* various HVM handlers      */
+#define TRC_HVM_EMUL        0x00084000   /* emulated devices */
+
+#define TRC_SCHED_MIN       0x00021000   /* Just runstate changes */
+#define TRC_SCHED_CLASS     0x00022000   /* Scheduler-specific    */
+#define TRC_SCHED_VERBOSE   0x00028000   /* More inclusive scheduling */
+
+/*
+ * The highest 3 bits of the last 12 bits of TRC_SCHED_CLASS above are
+ * reserved for encoding what scheduler produced the information. The
+ * actual event is encoded in the last 9 bits.
+ *
+ * This means we have 8 scheduling IDs available (which means at most 8
+ * schedulers generating events) and, in each scheduler, up to 512
+ * different events.
+ */
+#define TRC_SCHED_ID_BITS 3
+#define TRC_SCHED_ID_SHIFT (TRC_SUBCLS_SHIFT - TRC_SCHED_ID_BITS)
+#define TRC_SCHED_ID_MASK (((1UL<<TRC_SCHED_ID_BITS) - 1) << TRC_SCHED_ID_SHIFT)
+#define TRC_SCHED_EVT_MASK (~(TRC_SCHED_ID_MASK))
+
+/* Per-scheduler IDs, to identify scheduler specific events */
+#define TRC_SCHED_CSCHED   0
+#define TRC_SCHED_CSCHED2  1
+/* #define XEN_SCHEDULER_SEDF 2 (Removed) */
+#define TRC_SCHED_ARINC653 3
+#define TRC_SCHED_RTDS     4
+#define TRC_SCHED_SNULL    5
+
+/* Per-scheduler tracing */
+#define TRC_SCHED_CLASS_EVT(_c, _e) \
+  ( ( TRC_SCHED_CLASS | \
+      ((TRC_SCHED_##_c << TRC_SCHED_ID_SHIFT) & TRC_SCHED_ID_MASK) ) + \
+    (_e & TRC_SCHED_EVT_MASK) )
+
+/* Trace classes for DOM0 operations */
+#define TRC_DOM0_DOMOPS     0x00041000   /* Domains manipulations */
+
+/* Trace classes for Hardware */
+#define TRC_HW_PM           0x00801000   /* Power management traces */
+#define TRC_HW_IRQ          0x00802000   /* Traces relating to the handling of IRQs */
+
+/* Trace events per class */
+#define TRC_LOST_RECORDS        (TRC_GEN + 1)
+#define TRC_TRACE_WRAP_BUFFER  (TRC_GEN + 2)
+#define TRC_TRACE_CPU_CHANGE    (TRC_GEN + 3)
+
+#define TRC_SCHED_RUNSTATE_CHANGE   (TRC_SCHED_MIN + 1)
+#define TRC_SCHED_CONTINUE_RUNNING  (TRC_SCHED_MIN + 2)
+#define TRC_SCHED_DOM_ADD        (TRC_SCHED_VERBOSE +  1)
+#define TRC_SCHED_DOM_REM        (TRC_SCHED_VERBOSE +  2)
+#define TRC_SCHED_SLEEP          (TRC_SCHED_VERBOSE +  3)
+#define TRC_SCHED_WAKE           (TRC_SCHED_VERBOSE +  4)
+#define TRC_SCHED_YIELD          (TRC_SCHED_VERBOSE +  5)
+#define TRC_SCHED_BLOCK          (TRC_SCHED_VERBOSE +  6)
+#define TRC_SCHED_SHUTDOWN       (TRC_SCHED_VERBOSE +  7)
+#define TRC_SCHED_CTL            (TRC_SCHED_VERBOSE +  8)
+#define TRC_SCHED_ADJDOM         (TRC_SCHED_VERBOSE +  9)
+#define TRC_SCHED_SWITCH         (TRC_SCHED_VERBOSE + 10)
+#define TRC_SCHED_S_TIMER_FN     (TRC_SCHED_VERBOSE + 11)
+#define TRC_SCHED_T_TIMER_FN     (TRC_SCHED_VERBOSE + 12)
+#define TRC_SCHED_DOM_TIMER_FN   (TRC_SCHED_VERBOSE + 13)
+#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14)
+#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15)
+#define TRC_SCHED_SHUTDOWN_CODE  (TRC_SCHED_VERBOSE + 16)
+#define TRC_SCHED_SWITCH_INFCONT (TRC_SCHED_VERBOSE + 17)
+
+#define TRC_DOM0_DOM_ADD         (TRC_DOM0_DOMOPS + 1)
+#define TRC_DOM0_DOM_REM         (TRC_DOM0_DOMOPS + 2)
+
+#define TRC_MEM_PAGE_GRANT_MAP      (TRC_MEM + 1)
+#define TRC_MEM_PAGE_GRANT_UNMAP    (TRC_MEM + 2)
+#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3)
+#define TRC_MEM_SET_P2M_ENTRY       (TRC_MEM + 4)
+#define TRC_MEM_DECREASE_RESERVATION (TRC_MEM + 5)
+#define TRC_MEM_POD_POPULATE        (TRC_MEM + 16)
+#define TRC_MEM_POD_ZERO_RECLAIM    (TRC_MEM + 17)
+#define TRC_MEM_POD_SUPERPAGE_SPLINTER (TRC_MEM + 18)
+
+#define TRC_PV_ENTRY   0x00201000 /* Hypervisor entry points for PV guests. */
+#define TRC_PV_SUBCALL 0x00202000 /* Sub-call in a multicall hypercall */
+
+#define TRC_PV_HYPERCALL             (TRC_PV_ENTRY +  1)
+#define TRC_PV_TRAP                  (TRC_PV_ENTRY +  3)
+#define TRC_PV_PAGE_FAULT            (TRC_PV_ENTRY +  4)
+#define TRC_PV_FORCED_INVALID_OP     (TRC_PV_ENTRY +  5)
+#define TRC_PV_EMULATE_PRIVOP        (TRC_PV_ENTRY +  6)
+#define TRC_PV_EMULATE_4GB           (TRC_PV_ENTRY +  7)
+#define TRC_PV_MATH_STATE_RESTORE    (TRC_PV_ENTRY +  8)
+#define TRC_PV_PAGING_FIXUP          (TRC_PV_ENTRY +  9)
+#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV_ENTRY + 10)
+#define TRC_PV_PTWR_EMULATION        (TRC_PV_ENTRY + 11)
+#define TRC_PV_PTWR_EMULATION_PAE    (TRC_PV_ENTRY + 12)
+#define TRC_PV_HYPERCALL_V2          (TRC_PV_ENTRY + 13)
+#define TRC_PV_HYPERCALL_SUBCALL     (TRC_PV_SUBCALL + 14)
+
+/*
+ * TRC_PV_HYPERCALL_V2 format
+ *
+ * Only some of the hypercall argument are recorded. Bit fields A0 to
+ * A5 in the first extra word are set if the argument is present and
+ * the arguments themselves are packed sequentially in the following
+ * words.
+ *
+ * The TRC_64_FLAG bit is not set for these events (even if there are
+ * 64-bit arguments in the record).
+ *
+ * Word
+ * 0    bit 31 30|29 28|27 26|25 24|23 22|21 20|19 ... 0
+ *          A5   |A4   |A3   |A2   |A1   |A0   |Hypercall op
+ * 1    First 32 bit (or low word of first 64 bit) arg in record
+ * 2    Second 32 bit (or high word of first 64 bit) arg in record
+ * ...
+ *
+ * A0-A5 bitfield values:
+ *
+ *   00b  Argument not present
+ *   01b  32-bit argument present
+ *   10b  64-bit argument present
+ *   11b  Reserved
+ */
+#define TRC_PV_HYPERCALL_V2_ARG_32(i) (0x1 << (20 + 2*(i)))
+#define TRC_PV_HYPERCALL_V2_ARG_64(i) (0x2 << (20 + 2*(i)))
+#define TRC_PV_HYPERCALL_V2_ARG_MASK  (0xfff00000)
+
+#define TRC_SHADOW_NOT_SHADOW                 (TRC_SHADOW +  1)
+#define TRC_SHADOW_FAST_PROPAGATE             (TRC_SHADOW +  2)
+#define TRC_SHADOW_FAST_MMIO                  (TRC_SHADOW +  3)
+#define TRC_SHADOW_FALSE_FAST_PATH            (TRC_SHADOW +  4)
+#define TRC_SHADOW_MMIO                       (TRC_SHADOW +  5)
+#define TRC_SHADOW_FIXUP                      (TRC_SHADOW +  6)
+#define TRC_SHADOW_DOMF_DYING                 (TRC_SHADOW +  7)
+#define TRC_SHADOW_EMULATE                    (TRC_SHADOW +  8)
+#define TRC_SHADOW_EMULATE_UNSHADOW_USER      (TRC_SHADOW +  9)
+#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ    (TRC_SHADOW + 10)
+#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11)
+#define TRC_SHADOW_WRMAP_BF                   (TRC_SHADOW + 12)
+#define TRC_SHADOW_PREALLOC_UNPIN             (TRC_SHADOW + 13)
+#define TRC_SHADOW_RESYNC_FULL                (TRC_SHADOW + 14)
+#define TRC_SHADOW_RESYNC_ONLY                (TRC_SHADOW + 15)
+
+/* trace events per subclass */
+#define TRC_HVM_NESTEDFLAG      (0x400)
+#define TRC_HVM_VMENTRY         (TRC_HVM_ENTRYEXIT + 0x01)
+#define TRC_HVM_VMEXIT          (TRC_HVM_ENTRYEXIT + 0x02)
+#define TRC_HVM_VMEXIT64        (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02)
+#define TRC_HVM_PF_XEN          (TRC_HVM_HANDLER + 0x01)
+#define TRC_HVM_PF_XEN64        (TRC_HVM_HANDLER + TRC_64_FLAG + 0x01)
+#define TRC_HVM_PF_INJECT       (TRC_HVM_HANDLER + 0x02)
+#define TRC_HVM_PF_INJECT64     (TRC_HVM_HANDLER + TRC_64_FLAG + 0x02)
+#define TRC_HVM_INJ_EXC         (TRC_HVM_HANDLER + 0x03)
+#define TRC_HVM_INJ_VIRQ        (TRC_HVM_HANDLER + 0x04)
+#define TRC_HVM_REINJ_VIRQ      (TRC_HVM_HANDLER + 0x05)
+#define TRC_HVM_IO_READ         (TRC_HVM_HANDLER + 0x06)
+#define TRC_HVM_IO_WRITE        (TRC_HVM_HANDLER + 0x07)
+#define TRC_HVM_CR_READ         (TRC_HVM_HANDLER + 0x08)
+#define TRC_HVM_CR_READ64       (TRC_HVM_HANDLER + TRC_64_FLAG + 0x08)
+#define TRC_HVM_CR_WRITE        (TRC_HVM_HANDLER + 0x09)
+#define TRC_HVM_CR_WRITE64      (TRC_HVM_HANDLER + TRC_64_FLAG + 0x09)
+#define TRC_HVM_DR_READ         (TRC_HVM_HANDLER + 0x0A)
+#define TRC_HVM_DR_WRITE        (TRC_HVM_HANDLER + 0x0B)
+#define TRC_HVM_MSR_READ        (TRC_HVM_HANDLER + 0x0C)
+#define TRC_HVM_MSR_WRITE       (TRC_HVM_HANDLER + 0x0D)
+#define TRC_HVM_CPUID           (TRC_HVM_HANDLER + 0x0E)
+#define TRC_HVM_INTR            (TRC_HVM_HANDLER + 0x0F)
+#define TRC_HVM_NMI             (TRC_HVM_HANDLER + 0x10)
+#define TRC_HVM_SMI             (TRC_HVM_HANDLER + 0x11)
+#define TRC_HVM_VMMCALL         (TRC_HVM_HANDLER + 0x12)
+#define TRC_HVM_HLT             (TRC_HVM_HANDLER + 0x13)
+#define TRC_HVM_INVLPG          (TRC_HVM_HANDLER + 0x14)
+#define TRC_HVM_INVLPG64        (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14)
+#define TRC_HVM_MCE             (TRC_HVM_HANDLER + 0x15)
+#define TRC_HVM_IOPORT_READ     (TRC_HVM_HANDLER + 0x16)
+#define TRC_HVM_IOMEM_READ      (TRC_HVM_HANDLER + 0x17)
+#define TRC_HVM_CLTS            (TRC_HVM_HANDLER + 0x18)
+#define TRC_HVM_LMSW            (TRC_HVM_HANDLER + 0x19)
+#define TRC_HVM_LMSW64          (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19)
+#define TRC_HVM_RDTSC           (TRC_HVM_HANDLER + 0x1a)
+#define TRC_HVM_INTR_WINDOW     (TRC_HVM_HANDLER + 0x20)
+#define TRC_HVM_NPF             (TRC_HVM_HANDLER + 0x21)
+#define TRC_HVM_REALMODE_EMULATE (TRC_HVM_HANDLER + 0x22)
+#define TRC_HVM_TRAP             (TRC_HVM_HANDLER + 0x23)
+#define TRC_HVM_TRAP_DEBUG       (TRC_HVM_HANDLER + 0x24)
+#define TRC_HVM_VLAPIC           (TRC_HVM_HANDLER + 0x25)
+#define TRC_HVM_XCR_READ64      (TRC_HVM_HANDLER + TRC_64_FLAG + 0x26)
+#define TRC_HVM_XCR_WRITE64     (TRC_HVM_HANDLER + TRC_64_FLAG + 0x27)
+
+#define TRC_HVM_IOPORT_WRITE    (TRC_HVM_HANDLER + 0x216)
+#define TRC_HVM_IOMEM_WRITE     (TRC_HVM_HANDLER + 0x217)
+
+/* Trace events for emulated devices */
+#define TRC_HVM_EMUL_HPET_START_TIMER  (TRC_HVM_EMUL + 0x1)
+#define TRC_HVM_EMUL_PIT_START_TIMER   (TRC_HVM_EMUL + 0x2)
+#define TRC_HVM_EMUL_RTC_START_TIMER   (TRC_HVM_EMUL + 0x3)
+#define TRC_HVM_EMUL_LAPIC_START_TIMER (TRC_HVM_EMUL + 0x4)
+#define TRC_HVM_EMUL_HPET_STOP_TIMER   (TRC_HVM_EMUL + 0x5)
+#define TRC_HVM_EMUL_PIT_STOP_TIMER    (TRC_HVM_EMUL + 0x6)
+#define TRC_HVM_EMUL_RTC_STOP_TIMER    (TRC_HVM_EMUL + 0x7)
+#define TRC_HVM_EMUL_LAPIC_STOP_TIMER  (TRC_HVM_EMUL + 0x8)
+#define TRC_HVM_EMUL_PIT_TIMER_CB      (TRC_HVM_EMUL + 0x9)
+#define TRC_HVM_EMUL_LAPIC_TIMER_CB    (TRC_HVM_EMUL + 0xA)
+#define TRC_HVM_EMUL_PIC_INT_OUTPUT    (TRC_HVM_EMUL + 0xB)
+#define TRC_HVM_EMUL_PIC_KICK          (TRC_HVM_EMUL + 0xC)
+#define TRC_HVM_EMUL_PIC_INTACK        (TRC_HVM_EMUL + 0xD)
+#define TRC_HVM_EMUL_PIC_POSEDGE       (TRC_HVM_EMUL + 0xE)
+#define TRC_HVM_EMUL_PIC_NEGEDGE       (TRC_HVM_EMUL + 0xF)
+#define TRC_HVM_EMUL_PIC_PEND_IRQ_CALL (TRC_HVM_EMUL + 0x10)
+#define TRC_HVM_EMUL_LAPIC_PIC_INTR    (TRC_HVM_EMUL + 0x11)
+
+/* trace events for per class */
+#define TRC_PM_FREQ_CHANGE      (TRC_HW_PM + 0x01)
+#define TRC_PM_IDLE_ENTRY       (TRC_HW_PM + 0x02)
+#define TRC_PM_IDLE_EXIT        (TRC_HW_PM + 0x03)
+
+/* Trace events for IRQs */
+#define TRC_HW_IRQ_MOVE_CLEANUP_DELAY (TRC_HW_IRQ + 0x1)
+#define TRC_HW_IRQ_MOVE_CLEANUP       (TRC_HW_IRQ + 0x2)
+#define TRC_HW_IRQ_BIND_VECTOR        (TRC_HW_IRQ + 0x3)
+#define TRC_HW_IRQ_CLEAR_VECTOR       (TRC_HW_IRQ + 0x4)
+#define TRC_HW_IRQ_MOVE_FINISH        (TRC_HW_IRQ + 0x5)
+#define TRC_HW_IRQ_ASSIGN_VECTOR      (TRC_HW_IRQ + 0x6)
+#define TRC_HW_IRQ_UNMAPPED_VECTOR    (TRC_HW_IRQ + 0x7)
+#define TRC_HW_IRQ_HANDLED            (TRC_HW_IRQ + 0x8)
+
+/*
+ * Event Flags
+ *
+ * Some events (e.g, TRC_PV_TRAP and TRC_HVM_IOMEM_READ) have multiple
+ * record formats.  These event flags distinguish between the
+ * different formats.
+ */
+#define TRC_64_FLAG 0x100 /* Addresses are 64 bits (instead of 32 bits) */
+
+/* This structure represents a single trace buffer record. */
+struct t_rec {
+    uint32_t event:28;
+    uint32_t extra_u32:3;         /* # entries in trailing extra_u32[] array */
+    uint32_t cycles_included:1;   /* u.cycles or u.no_cycles? */
+    union {
+        struct {
+            uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */
+            uint32_t extra_u32[7];         /* event data items */
+        } cycles;
+        struct {
+            uint32_t extra_u32[7];         /* event data items */
+        } nocycles;
+    } u;
+};
+
+/*
+ * This structure contains the metadata for a single trace buffer.  The head
+ * field, indexes into an array of struct t_rec's.
+ */
+struct t_buf {
+    /* Assume the data buffer size is X.  X is generally not a power of 2.
+     * CONS and PROD are incremented modulo (2*X):
+     *     0 <= cons < 2*X
+     *     0 <= prod < 2*X
+     * This is done because addition modulo X breaks at 2^32 when X is not a
+     * power of 2:
+     *     (((2^32 - 1) % X) + 1) % X != (2^32) % X
+     */
+    uint32_t cons;   /* Offset of next item to be consumed by control tools. */
+    uint32_t prod;   /* Offset of next item to be produced by Xen.           */
+    /*  Records follow immediately after the meta-data header.    */
+};
+
+/* Structure used to pass MFNs to the trace buffers back to trace consumers.
+ * Offset is an offset into the mapped structure where the mfn list will be held.
+ * MFNs will be at ((unsigned long *)(t_info))+(t_info->cpu_offset[cpu]).
+ */
+struct t_info {
+    uint16_t tbuf_size; /* Size in pages of each trace buffer */
+    uint16_t mfn_offset[];  /* Offset within t_info structure of the page list per cpu */
+    /* MFN lists immediately after the header */
+};
+
+#endif /* __XEN_PUBLIC_TRACE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/vcpu.h b/include/hw/xen/interface/vcpu.h
new file mode 100644 (file)
index 0000000..81a3b3a
--- /dev/null
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * vcpu.h
+ *
+ * VCPU initialisation, query, and hotplug.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_VCPU_H__
+#define __XEN_PUBLIC_VCPU_H__
+
+#include "xen.h"
+
+/*
+ * Prototype for this hypercall is:
+ *  long vcpu_op(int cmd, unsigned int vcpuid, void *extra_args)
+ * @cmd        == VCPUOP_??? (VCPU operation).
+ * @vcpuid     == VCPU to operate on.
+ * @extra_args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Initialise a VCPU. Each VCPU can be initialised only once. A
+ * newly-initialised VCPU will not run until it is brought up by VCPUOP_up.
+ *
+ * @extra_arg == For PV or ARM guests this is a pointer to a vcpu_guest_context
+ *               structure containing the initial state for the VCPU. For x86
+ *               HVM based guests this is a pointer to a vcpu_hvm_context
+ *               structure.
+ */
+#define VCPUOP_initialise            0
+
+/*
+ * Bring up a VCPU. This makes the VCPU runnable. This operation will fail
+ * if the VCPU has not been initialised (VCPUOP_initialise).
+ */
+#define VCPUOP_up                    1
+
+/*
+ * Bring down a VCPU (i.e., make it non-runnable).
+ * There are a few caveats that callers should observe:
+ *  1. This operation may return, and VCPU_is_up may return false, before the
+ *     VCPU stops running (i.e., the command is asynchronous). It is a good
+ *     idea to ensure that the VCPU has entered a non-critical loop before
+ *     bringing it down. Alternatively, this operation is guaranteed
+ *     synchronous if invoked by the VCPU itself.
+ *  2. After a VCPU is initialised, there is currently no way to drop all its
+ *     references to domain memory. Even a VCPU that is down still holds
+ *     memory references via its pagetable base pointer and GDT. It is good
+ *     practise to move a VCPU onto an 'idle' or default page table, LDT and
+ *     GDT before bringing it down.
+ */
+#define VCPUOP_down                  2
+
+/* Returns 1 if the given VCPU is up. */
+#define VCPUOP_is_up                 3
+
+/*
+ * Return information about the state and running time of a VCPU.
+ * @extra_arg == pointer to vcpu_runstate_info structure.
+ */
+#define VCPUOP_get_runstate_info     4
+struct vcpu_runstate_info {
+    /* VCPU's current state (RUNSTATE_*). */
+    int      state;
+    /* When was current state entered (system time, ns)? */
+    uint64_t state_entry_time;
+    /*
+     * Update indicator set in state_entry_time:
+     * When activated via VMASST_TYPE_runstate_update_flag, set during
+     * updates in guest memory mapped copy of vcpu_runstate_info.
+     */
+#define XEN_RUNSTATE_UPDATE          (xen_mk_ullong(1) << 63)
+    /*
+     * Time spent in each RUNSTATE_* (ns). The sum of these times is
+     * guaranteed not to drift from system time.
+     */
+    uint64_t time[4];
+};
+typedef struct vcpu_runstate_info vcpu_runstate_info_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t);
+
+/* VCPU is currently running on a physical CPU. */
+#define RUNSTATE_running  0
+
+/* VCPU is runnable, but not currently scheduled on any physical CPU. */
+#define RUNSTATE_runnable 1
+
+/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
+#define RUNSTATE_blocked  2
+
+/*
+ * VCPU is not runnable, but it is not blocked.
+ * This is a 'catch all' state for things like hotplug and pauses by the
+ * system administrator (or for critical sections in the hypervisor).
+ * RUNSTATE_blocked dominates this state (it is the preferred state).
+ */
+#define RUNSTATE_offline  3
+
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ *  1. The registered address may be virtual or physical or guest handle,
+ *     depending on the platform. Virtual address or guest handle should be
+ *     registered on x86 systems.
+ *  2. Only one shared area may be registered per VCPU. The shared area is
+ *     updated by the hypervisor each time the VCPU is scheduled. Thus
+ *     runstate.state will always be RUNSTATE_running and
+ *     runstate.state_entry_time will indicate the system time at which the
+ *     VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_memory_area 5
+struct vcpu_register_runstate_memory_area {
+    union {
+        XEN_GUEST_HANDLE(vcpu_runstate_info_t) h;
+        struct vcpu_runstate_info *v;
+        uint64_t p;
+    } addr;
+};
+typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t);
+
+/*
+ * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer
+ * which can be set via these commands. Periods smaller than one millisecond
+ * may not be supported.
+ */
+#define VCPUOP_set_periodic_timer    6 /* arg == vcpu_set_periodic_timer_t */
+#define VCPUOP_stop_periodic_timer   7 /* arg == NULL */
+struct vcpu_set_periodic_timer {
+    uint64_t period_ns;
+};
+typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);
+
+/*
+ * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
+ * timer which can be set via these commands.
+ */
+#define VCPUOP_set_singleshot_timer  8 /* arg == vcpu_set_singleshot_timer_t */
+#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */
+struct vcpu_set_singleshot_timer {
+    uint64_t timeout_abs_ns;   /* Absolute system time value in nanoseconds. */
+    uint32_t flags;            /* VCPU_SSHOTTMR_??? */
+};
+typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
+
+/* Flags to VCPUOP_set_singleshot_timer. */
+ /* Require the timeout to be in the future (return -ETIME if it's passed). */
+#define _VCPU_SSHOTTMR_future (0)
+#define VCPU_SSHOTTMR_future  (1U << _VCPU_SSHOTTMR_future)
+
+/*
+ * Register a memory location in the guest address space for the
+ * vcpu_info structure.  This allows the guest to place the vcpu_info
+ * structure in a convenient place, such as in a per-cpu data area.
+ * The pointer need not be page aligned, but the structure must not
+ * cross a page boundary.
+ *
+ * This may be called only once per vcpu.
+ */
+#define VCPUOP_register_vcpu_info   10  /* arg == vcpu_register_vcpu_info_t */
+struct vcpu_register_vcpu_info {
+    uint64_t mfn;    /* mfn of page to place vcpu_info */
+    uint32_t offset; /* offset within page */
+    uint32_t rsvd;   /* unused */
+};
+typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
+
+/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
+#define VCPUOP_send_nmi             11
+
+/*
+ * Get the physical ID information for a pinned vcpu's underlying physical
+ * processor.  The physical ID informmation is architecture-specific.
+ * On x86: id[31:0]=apic_id, id[63:32]=acpi_id.
+ * This command returns -EINVAL if it is not a valid operation for this VCPU.
+ */
+#define VCPUOP_get_physid           12 /* arg == vcpu_get_physid_t */
+struct vcpu_get_physid {
+    uint64_t phys_id;
+};
+typedef struct vcpu_get_physid vcpu_get_physid_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t);
+#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid))
+#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32))
+
+/*
+ * Register a memory location to get a secondary copy of the vcpu time
+ * parameters.  The master copy still exists as part of the vcpu shared
+ * memory area, and this secondary copy is updated whenever the master copy
+ * is updated (and using the same versioning scheme for synchronisation).
+ *
+ * The intent is that this copy may be mapped (RO) into userspace so
+ * that usermode can compute system time using the time info and the
+ * tsc.  Usermode will see an array of vcpu_time_info structures, one
+ * for each vcpu, and choose the right one by an existing mechanism
+ * which allows it to get the current vcpu number (such as via a
+ * segment limit).  It can then apply the normal algorithm to compute
+ * system time from the tsc.
+ *
+ * @extra_arg == pointer to vcpu_register_time_info_memory_area structure.
+ */
+#define VCPUOP_register_vcpu_time_memory_area   13
+DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t);
+struct vcpu_register_time_memory_area {
+    union {
+        XEN_GUEST_HANDLE(vcpu_time_info_t) h;
+        struct vcpu_time_info *v;
+        uint64_t p;
+    } addr;
+};
+typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
+
+#endif /* __XEN_PUBLIC_VCPU_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/version.h b/include/hw/xen/interface/version.h
new file mode 100644 (file)
index 0000000..9c78b4f
--- /dev/null
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * version.h
+ *
+ * Xen version, type, and compile information.
+ *
+ * Copyright (c) 2005, Nguyen Anh Quynh <aquynh@gmail.com>
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_VERSION_H__
+#define __XEN_PUBLIC_VERSION_H__
+
+#include "xen.h"
+
+/* NB. All ops return zero on success, except XENVER_{version,pagesize}
+ * XENVER_{version,pagesize,build_id} */
+
+/* arg == NULL; returns major:minor (16:16). */
+#define XENVER_version      0
+
+/* arg == xen_extraversion_t. */
+#define XENVER_extraversion 1
+typedef char xen_extraversion_t[16];
+#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t))
+
+/* arg == xen_compile_info_t. */
+#define XENVER_compile_info 2
+struct xen_compile_info {
+    char compiler[64];
+    char compile_by[16];
+    char compile_domain[32];
+    char compile_date[32];
+};
+typedef struct xen_compile_info xen_compile_info_t;
+
+#define XENVER_capabilities 3
+typedef char xen_capabilities_info_t[1024];
+#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t))
+
+#define XENVER_changeset 4
+typedef char xen_changeset_info_t[64];
+#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))
+
+#define XENVER_platform_parameters 5
+struct xen_platform_parameters {
+    xen_ulong_t virt_start;
+};
+typedef struct xen_platform_parameters xen_platform_parameters_t;
+
+#define XENVER_get_features 6
+struct xen_feature_info {
+    unsigned int submap_idx;    /* IN: which 32-bit submap to return */
+    uint32_t     submap;        /* OUT: 32-bit submap */
+};
+typedef struct xen_feature_info xen_feature_info_t;
+
+/* Declares the features reported by XENVER_get_features. */
+#include "features.h"
+
+/* arg == NULL; returns host memory page size. */
+#define XENVER_pagesize 7
+
+/* arg == xen_domain_handle_t.
+ *
+ * The toolstack fills it out for guest consumption. It is intended to hold
+ * the UUID of the guest.
+ */
+#define XENVER_guest_handle 8
+
+#define XENVER_commandline 9
+typedef char xen_commandline_t[1024];
+
+/*
+ * Return value is the number of bytes written, or XEN_Exx on error.
+ * Calling with empty parameter returns the size of build_id.
+ */
+#define XENVER_build_id 10
+struct xen_build_id {
+        uint32_t        len; /* IN: size of buf[]. */
+        unsigned char   buf[XEN_FLEX_ARRAY_DIM];
+                             /* OUT: Variable length buffer with build_id. */
+};
+typedef struct xen_build_id xen_build_id_t;
+
+#endif /* __XEN_PUBLIC_VERSION_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/interface/xen-compat.h b/include/hw/xen/interface/xen-compat.h
new file mode 100644 (file)
index 0000000..97fe698
--- /dev/null
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * xen-compat.h
+ *
+ * Guest OS interface to Xen.  Compatibility layer.
+ *
+ * Copyright (c) 2006, Christian Limpach
+ */
+
+#ifndef __XEN_PUBLIC_XEN_COMPAT_H__
+#define __XEN_PUBLIC_XEN_COMPAT_H__
+
+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040e00
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+/* Xen is built with matching headers and implements the latest interface. */
+#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__
+#elif !defined(__XEN_INTERFACE_VERSION__)
+/* Guests which do not specify a version get the legacy interface. */
+#define __XEN_INTERFACE_VERSION__ 0x00000000
+#endif
+
+#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__
+#error "These header files do not support the requested interface version."
+#endif
+
+#define COMPAT_FLEX_ARRAY_DIM XEN_FLEX_ARRAY_DIM
+
+#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */
diff --git a/include/hw/xen/interface/xen.h b/include/hw/xen/interface/xen.h
new file mode 100644 (file)
index 0000000..920567e
--- /dev/null
@@ -0,0 +1,1032 @@
+/* SPDX-License-Identifier: MIT */
+/******************************************************************************
+ * xen.h
+ *
+ * Guest OS interface to Xen.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_XEN_H__
+#define __XEN_PUBLIC_XEN_H__
+
+#include "xen-compat.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "arch-x86/xen.h"
+#elif defined(__arm__) || defined (__aarch64__)
+#include "arch-arm.h"
+#else
+#error "Unsupported architecture"
+#endif
+
+#ifndef __ASSEMBLY__
+/* Guest handles for primitive C types. */
+DEFINE_XEN_GUEST_HANDLE(char);
+__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
+DEFINE_XEN_GUEST_HANDLE(int);
+__DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
+#if __XEN_INTERFACE_VERSION__ < 0x00040300
+DEFINE_XEN_GUEST_HANDLE(long);
+__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+#endif
+DEFINE_XEN_GUEST_HANDLE(void);
+
+DEFINE_XEN_GUEST_HANDLE(uint64_t);
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
+DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
+
+/* Define a variable length array (depends on compiler). */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define XEN_FLEX_ARRAY_DIM
+#elif defined(__GNUC__)
+#define XEN_FLEX_ARRAY_DIM  0
+#else
+#define XEN_FLEX_ARRAY_DIM  1 /* variable size */
+#endif
+
+/* Turn a plain number into a C unsigned (long (long)) constant. */
+#define __xen_mk_uint(x)  x ## U
+#define __xen_mk_ulong(x) x ## UL
+#ifndef __xen_mk_ullong
+# define __xen_mk_ullong(x) x ## ULL
+#endif
+#define xen_mk_uint(x)    __xen_mk_uint(x)
+#define xen_mk_ulong(x)   __xen_mk_ulong(x)
+#define xen_mk_ullong(x)  __xen_mk_ullong(x)
+
+#else
+
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define xen_mk_uint(x)   x
+#define xen_mk_ulong(x)  x
+#define xen_mk_ullong(x) x
+
+#endif
+
+/*
+ * HYPERCALLS
+ */
+
+/* `incontents 100 hcalls List of hypercalls
+ * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*()
+ */
+
+#define __HYPERVISOR_set_trap_table        0
+#define __HYPERVISOR_mmu_update            1
+#define __HYPERVISOR_set_gdt               2
+#define __HYPERVISOR_stack_switch          3
+#define __HYPERVISOR_set_callbacks         4
+#define __HYPERVISOR_fpu_taskswitch        5
+#define __HYPERVISOR_sched_op_compat       6 /* compat since 0x00030101 */
+#define __HYPERVISOR_platform_op           7
+#define __HYPERVISOR_set_debugreg          8
+#define __HYPERVISOR_get_debugreg          9
+#define __HYPERVISOR_update_descriptor    10
+#define __HYPERVISOR_memory_op            12
+#define __HYPERVISOR_multicall            13
+#define __HYPERVISOR_update_va_mapping    14
+#define __HYPERVISOR_set_timer_op         15
+#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */
+#define __HYPERVISOR_xen_version          17
+#define __HYPERVISOR_console_io           18
+#define __HYPERVISOR_physdev_op_compat    19 /* compat since 0x00030202 */
+#define __HYPERVISOR_grant_table_op       20
+#define __HYPERVISOR_vm_assist            21
+#define __HYPERVISOR_update_va_mapping_otherdomain 22
+#define __HYPERVISOR_iret                 23 /* x86 only */
+#define __HYPERVISOR_vcpu_op              24
+#define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op            26
+#define __HYPERVISOR_xsm_op               27
+#define __HYPERVISOR_nmi_op               28
+#define __HYPERVISOR_sched_op             29
+#define __HYPERVISOR_callback_op          30
+#define __HYPERVISOR_xenoprof_op          31
+#define __HYPERVISOR_event_channel_op     32
+#define __HYPERVISOR_physdev_op           33
+#define __HYPERVISOR_hvm_op               34
+#define __HYPERVISOR_sysctl               35
+#define __HYPERVISOR_domctl               36
+#define __HYPERVISOR_kexec_op             37
+#define __HYPERVISOR_tmem_op              38
+#define __HYPERVISOR_argo_op              39
+#define __HYPERVISOR_xenpmu_op            40
+#define __HYPERVISOR_dm_op                41
+#define __HYPERVISOR_hypfs_op             42
+
+/* Architecture-specific hypercall definitions. */
+#define __HYPERVISOR_arch_0               48
+#define __HYPERVISOR_arch_1               49
+#define __HYPERVISOR_arch_2               50
+#define __HYPERVISOR_arch_3               51
+#define __HYPERVISOR_arch_4               52
+#define __HYPERVISOR_arch_5               53
+#define __HYPERVISOR_arch_6               54
+#define __HYPERVISOR_arch_7               55
+
+/* ` } */
+
+/*
+ * HYPERCALL COMPATIBILITY.
+ */
+
+/* New sched_op hypercall introduced in 0x00030101. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030101
+#undef __HYPERVISOR_sched_op
+#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat
+#endif
+
+/* New event-channel and physdev hypercalls introduced in 0x00030202. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030202
+#undef __HYPERVISOR_event_channel_op
+#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat
+#undef __HYPERVISOR_physdev_op
+#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat
+#endif
+
+/* New platform_op hypercall introduced in 0x00030204. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030204
+#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op
+#endif
+
+/*
+ * VIRTUAL INTERRUPTS
+ *
+ * Virtual interrupts that a guest OS may receive from Xen.
+ *
+ * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a
+ * global VIRQ. The former can be bound once per VCPU and cannot be re-bound.
+ * The latter can be allocated only once per guest: they must initially be
+ * allocated to VCPU0 but can subsequently be re-bound.
+ */
+/* ` enum virq { */
+#define VIRQ_TIMER      0  /* V. Timebase update, and/or requested timeout.  */
+#define VIRQ_DEBUG      1  /* V. Request guest to dump debug info.           */
+#define VIRQ_CONSOLE    2  /* G. (DOM0) Bytes received on emergency console. */
+#define VIRQ_DOM_EXC    3  /* G. (DOM0) Exceptional event for some domain.   */
+#define VIRQ_TBUF       4  /* G. (DOM0) Trace buffer has records available.  */
+#define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */
+#define VIRQ_XENOPROF   7  /* V. XenOprofile interrupt: new sample available */
+#define VIRQ_CON_RING   8  /* G. (DOM0) Bytes received on console            */
+#define VIRQ_PCPU_STATE 9  /* G. (DOM0) PCPU state changed                   */
+#define VIRQ_MEM_EVENT  10 /* G. (DOM0) A memory event has occurred          */
+#define VIRQ_ARGO       11 /* G. Argo interdomain message notification       */
+#define VIRQ_ENOMEM     12 /* G. (DOM0) Low on heap memory       */
+#define VIRQ_XENPMU     13 /* V.  PMC interrupt                              */
+
+/* Architecture-specific VIRQ definitions. */
+#define VIRQ_ARCH_0    16
+#define VIRQ_ARCH_1    17
+#define VIRQ_ARCH_2    18
+#define VIRQ_ARCH_3    19
+#define VIRQ_ARCH_4    20
+#define VIRQ_ARCH_5    21
+#define VIRQ_ARCH_6    22
+#define VIRQ_ARCH_7    23
+/* ` } */
+
+#define NR_VIRQS       24
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[],
+ * `                       unsigned count, unsigned *done_out,
+ * `                       unsigned foreigndom)
+ * `
+ * @reqs is an array of mmu_update_t structures ((ptr, val) pairs).
+ * @count is the length of the above array.
+ * @pdone is an output parameter indicating number of completed operations
+ * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this
+ *                    hypercall invocation. Can be DOMID_SELF.
+ * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced
+ *                     in this hypercall invocation. The value of this field
+ *                     (x) encodes the PFD as follows:
+ *                     x == 0 => PFD == DOMID_SELF
+ *                     x != 0 => PFD == x - 1
+ *
+ * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command.
+ * -------------
+ * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
+ * Updates an entry in a page table belonging to PFD. If updating an L1 table,
+ * and the new table entry is valid/present, the mapped frame must belong to
+ * FD. If attempting to map an I/O page then the caller assumes the privilege
+ * of the FD.
+ * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
+ * FD == DOMID_XEN: Map restricted areas of Xen's heap space.
+ * ptr[:2]  -- Machine address of the page-table entry to modify.
+ * val      -- Value to write.
+ *
+ * There also certain implicit requirements when using this hypercall. The
+ * pages that make up a pagetable must be mapped read-only in the guest.
+ * This prevents uncontrolled guest updates to the pagetable. Xen strictly
+ * enforces this, and will disallow any pagetable update which will end up
+ * mapping pagetable page RW, and will disallow using any writable page as a
+ * pagetable. In practice it means that when constructing a page table for a
+ * process, thread, etc, we MUST be very dilligient in following these rules:
+ *  1). Start with top-level page (PGD or in Xen language: L4). Fill out
+ *      the entries.
+ *  2). Keep on going, filling out the upper (PUD or L3), and middle (PMD
+ *      or L2).
+ *  3). Start filling out the PTE table (L1) with the PTE entries. Once
+ *     done, make sure to set each of those entries to RO (so writeable bit
+ *     is unset). Once that has been completed, set the PMD (L2) for this
+ *     PTE table as RO.
+ *  4). When completed with all of the PMD (L2) entries, and all of them have
+ *     been set to RO, make sure to set RO the PUD (L3). Do the same
+ *     operation on PGD (L4) pagetable entries that have a PUD (L3) entry.
+ *  5). Now before you can use those pages (so setting the cr3), you MUST also
+ *      pin them so that the hypervisor can verify the entries. This is done
+ *      via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame
+ *      number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op(
+ *      MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be
+ *      issued.
+ * For 32-bit guests, the L4 is not used (as there is less pagetables), so
+ * instead use L3.
+ * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE
+ * hypercall. Also if so desired the OS can also try to write to the PTE
+ * and be trapped by the hypervisor (as the PTE entry is RO).
+ *
+ * To deallocate the pages, the operations are the reverse of the steps
+ * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the
+ * pagetable MUST not be in use (meaning that the cr3 is not set to it).
+ *
+ * ptr[1:0] == MMU_MACHPHYS_UPDATE:
+ * Updates an entry in the machine->pseudo-physical mapping table.
+ * ptr[:2]  -- Machine address within the frame whose mapping to modify.
+ *             The frame must belong to the FD, if one is specified.
+ * val      -- Value to write into the mapping entry.
+ *
+ * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
+ * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
+ * with those in @val.
+ *
+ * ptr[1:0] == MMU_PT_UPDATE_NO_TRANSLATE:
+ * As MMU_NORMAL_PT_UPDATE above, but @val is not translated though FD
+ * page tables.
+ *
+ * @val is usually the machine frame number along with some attributes.
+ * The attributes by default follow the architecture defined bits. Meaning that
+ * if this is a X86_64 machine and four page table layout is used, the layout
+ * of val is:
+ *  - 63 if set means No execute (NX)
+ *  - 46-13 the machine frame number
+ *  - 12 available for guest
+ *  - 11 available for guest
+ *  - 10 available for guest
+ *  - 9 available for guest
+ *  - 8 global
+ *  - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages)
+ *  - 6 dirty
+ *  - 5 accessed
+ *  - 4 page cached disabled
+ *  - 3 page write through
+ *  - 2 userspace accessible
+ *  - 1 writeable
+ *  - 0 present
+ *
+ *  The one bits that does not fit with the default layout is the PAGE_PSE
+ *  also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the
+ *  HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB
+ *  (or 2MB) instead of using the PAGE_PSE bit.
+ *
+ *  The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen
+ *  using it as the Page Attribute Table (PAT) bit - for details on it please
+ *  refer to Intel SDM 10.12. The PAT allows to set the caching attributes of
+ *  pages instead of using MTRRs.
+ *
+ *  The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits):
+ *                    PAT4                 PAT0
+ *  +-----+-----+----+----+----+-----+----+----+
+ *  | UC  | UC- | WC | WB | UC | UC- | WC | WB |  <= Linux
+ *  +-----+-----+----+----+----+-----+----+----+
+ *  | UC  | UC- | WT | WB | UC | UC- | WT | WB |  <= BIOS (default when machine boots)
+ *  +-----+-----+----+----+----+-----+----+----+
+ *  | rsv | rsv | WP | WC | UC | UC- | WT | WB |  <= Xen
+ *  +-----+-----+----+----+----+-----+----+----+
+ *
+ *  The lookup of this index table translates to looking up
+ *  Bit 7, Bit 4, and Bit 3 of val entry:
+ *
+ *  PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3).
+ *
+ *  If all bits are off, then we are using PAT0. If bit 3 turned on,
+ *  then we are using PAT1, if bit 3 and bit 4, then PAT2..
+ *
+ *  As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means
+ *  that if a guest that follows Linux's PAT setup and would like to set Write
+ *  Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is
+ *  set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the
+ *  caching as:
+ *
+ *   WB = none (so PAT0)
+ *   WC = PWT (bit 3 on)
+ *   UC = PWT | PCD (bit 3 and 4 are on).
+ *
+ * To make it work with Xen, it needs to translate the WC bit as so:
+ *
+ *  PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3
+ *
+ * And to translate back it would:
+ *
+ * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7.
+ */
+#define MMU_NORMAL_PT_UPDATE       0 /* checked '*ptr = val'. ptr is MA.      */
+#define MMU_MACHPHYS_UPDATE        1 /* ptr = MA of frame to modify entry for */
+#define MMU_PT_UPDATE_PRESERVE_AD  2 /* atomically: *ptr = val | (*ptr&(A|D)) */
+#define MMU_PT_UPDATE_NO_TRANSLATE 3 /* checked '*ptr = val'. ptr is MA.      */
+                                     /* val never translated.                 */
+
+/*
+ * MMU EXTENDED OPERATIONS
+ *
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_mmuext_op(mmuext_op_t uops[],
+ * `                      unsigned int count,
+ * `                      unsigned int *pdone,
+ * `                      unsigned int foreigndom)
+ */
+/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).
+ * Where the FD has some effect, it is described below.
+ *
+ * cmd: MMUEXT_(UN)PIN_*_TABLE
+ * mfn: Machine frame number to be (un)pinned as a p.t. page.
+ *      The frame must belong to the FD, if one is specified.
+ *
+ * cmd: MMUEXT_NEW_BASEPTR
+ * mfn: Machine frame number of new page-table base to install in MMU.
+ *
+ * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only]
+ * mfn: Machine frame number of new page-table base to install in MMU
+ *      when in user space.
+ *
+ * cmd: MMUEXT_TLB_FLUSH_LOCAL
+ * No additional arguments. Flushes local TLB.
+ *
+ * cmd: MMUEXT_INVLPG_LOCAL
+ * linear_addr: Linear address to be flushed from the local TLB.
+ *
+ * cmd: MMUEXT_TLB_FLUSH_MULTI
+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
+ *
+ * cmd: MMUEXT_INVLPG_MULTI
+ * linear_addr: Linear address to be flushed.
+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
+ *
+ * cmd: MMUEXT_TLB_FLUSH_ALL
+ * No additional arguments. Flushes all VCPUs' TLBs.
+ *
+ * cmd: MMUEXT_INVLPG_ALL
+ * linear_addr: Linear address to be flushed from all VCPUs' TLBs.
+ *
+ * cmd: MMUEXT_FLUSH_CACHE
+ * No additional arguments. Writes back and flushes cache contents.
+ *
+ * cmd: MMUEXT_FLUSH_CACHE_GLOBAL
+ * No additional arguments. Writes back and flushes cache contents
+ * on all CPUs in the system.
+ *
+ * cmd: MMUEXT_SET_LDT
+ * linear_addr: Linear address of LDT base (NB. must be page-aligned).
+ * nr_ents: Number of entries in LDT.
+ *
+ * cmd: MMUEXT_CLEAR_PAGE
+ * mfn: Machine frame number to be cleared.
+ *
+ * cmd: MMUEXT_COPY_PAGE
+ * mfn: Machine frame number of the destination page.
+ * src_mfn: Machine frame number of the source page.
+ *
+ * cmd: MMUEXT_[UN]MARK_SUPER
+ * mfn: Machine frame number of head of superpage to be [un]marked.
+ */
+/* ` enum mmuext_cmd { */
+#define MMUEXT_PIN_L1_TABLE      0
+#define MMUEXT_PIN_L2_TABLE      1
+#define MMUEXT_PIN_L3_TABLE      2
+#define MMUEXT_PIN_L4_TABLE      3
+#define MMUEXT_UNPIN_TABLE       4
+#define MMUEXT_NEW_BASEPTR       5
+#define MMUEXT_TLB_FLUSH_LOCAL   6
+#define MMUEXT_INVLPG_LOCAL      7
+#define MMUEXT_TLB_FLUSH_MULTI   8
+#define MMUEXT_INVLPG_MULTI      9
+#define MMUEXT_TLB_FLUSH_ALL    10
+#define MMUEXT_INVLPG_ALL       11
+#define MMUEXT_FLUSH_CACHE      12
+#define MMUEXT_SET_LDT          13
+#define MMUEXT_NEW_USER_BASEPTR 15
+#define MMUEXT_CLEAR_PAGE       16
+#define MMUEXT_COPY_PAGE        17
+#define MMUEXT_FLUSH_CACHE_GLOBAL 18
+#define MMUEXT_MARK_SUPER       19
+#define MMUEXT_UNMARK_SUPER     20
+/* ` } */
+
+#ifndef __ASSEMBLY__
+struct mmuext_op {
+    unsigned int cmd; /* => enum mmuext_cmd */
+    union {
+        /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
+         * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */
+        xen_pfn_t     mfn;
+        /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
+        unsigned long linear_addr;
+    } arg1;
+    union {
+        /* SET_LDT */
+        unsigned int nr_ents;
+        /* TLB_FLUSH_MULTI, INVLPG_MULTI */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
+        XEN_GUEST_HANDLE(const_void) vcpumask;
+#else
+        const void *vcpumask;
+#endif
+        /* COPY_PAGE */
+        xen_pfn_t src_mfn;
+    } arg2;
+};
+typedef struct mmuext_op mmuext_op_t;
+DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
+#endif
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_update_va_mapping(unsigned long va, u64 val,
+ * `                              enum uvm_flags flags)
+ * `
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, u64 val,
+ * `                                          enum uvm_flags flags,
+ * `                                          domid_t domid)
+ * `
+ * ` @va: The virtual address whose mapping we want to change
+ * ` @val: The new page table entry, must contain a machine address
+ * ` @flags: Control TLB flushes
+ */
+/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
+/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap.   */
+/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer.         */
+/* ` enum uvm_flags { */
+#define UVMF_NONE           (xen_mk_ulong(0)<<0) /* No flushing at all.   */
+#define UVMF_TLB_FLUSH      (xen_mk_ulong(1)<<0) /* Flush entire TLB(s).  */
+#define UVMF_INVLPG         (xen_mk_ulong(2)<<0) /* Flush only one entry. */
+#define UVMF_FLUSHTYPE_MASK (xen_mk_ulong(3)<<0)
+#define UVMF_MULTI          (xen_mk_ulong(0)<<2) /* Flush subset of TLBs. */
+#define UVMF_LOCAL          (xen_mk_ulong(0)<<2) /* Flush local TLB.      */
+#define UVMF_ALL            (xen_mk_ulong(1)<<2) /* Flush all TLBs.       */
+/* ` } */
+
+/*
+ * ` int
+ * ` HYPERVISOR_console_io(unsigned int cmd,
+ * `                       unsigned int count,
+ * `                       char buffer[]);
+ *
+ * @cmd: Command (see below)
+ * @count: Size of the buffer to read/write
+ * @buffer: Pointer in the guest memory
+ *
+ * List of commands:
+ *
+ *  * CONSOLEIO_write: Write the buffer to Xen console.
+ *      For the hardware domain, all the characters in the buffer will
+ *      be written. Characters will be printed directly to the console.
+ *      For all the other domains, only the printable characters will be
+ *      written. Characters may be buffered until a newline (i.e '\n') is
+ *      found.
+ *      @return 0 on success, otherwise return an error code.
+ *  * CONSOLEIO_read: Attempts to read up to @count characters from Xen
+ *      console. The maximum buffer size (i.e. @count) supported is 2GB.
+ *      @return the number of characters read on success, otherwise return
+ *      an error code.
+ */
+#define CONSOLEIO_write         0
+#define CONSOLEIO_read          1
+
+/*
+ * Commands to HYPERVISOR_vm_assist().
+ */
+#define VMASST_CMD_enable                0
+#define VMASST_CMD_disable               1
+
+/* x86/32 guests: simulate full 4GB segment limits. */
+#define VMASST_TYPE_4gb_segments         0
+
+/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
+#define VMASST_TYPE_4gb_segments_notify  1
+
+/*
+ * x86 guests: support writes to bottom-level PTEs.
+ * NB1. Page-directory entries cannot be written.
+ * NB2. Guest must continue to remove all writable mappings of PTEs.
+ */
+#define VMASST_TYPE_writable_pagetables  2
+
+/* x86/PAE guests: support PDPTs above 4GB. */
+#define VMASST_TYPE_pae_extended_cr3     3
+
+/*
+ * x86 guests: Sane behaviour for virtual iopl
+ *  - virtual iopl updated from do_iret() hypercalls.
+ *  - virtual iopl reported in bounce frames.
+ *  - guest kernels assumed to be level 0 for the purpose of iopl checks.
+ */
+#define VMASST_TYPE_architectural_iopl   4
+
+/*
+ * All guests: activate update indicator in vcpu_runstate_info
+ * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped
+ * vcpu_runstate_info during updates of the runstate information.
+ */
+#define VMASST_TYPE_runstate_update_flag 5
+
+/*
+ * x86/64 guests: strictly hide M2P from user mode.
+ * This allows the guest to control respective hypervisor behavior:
+ * - when not set, L4 tables get created with the respective slot blank,
+ *   and whenever the L4 table gets used as a kernel one the missing
+ *   mapping gets inserted,
+ * - when set, L4 tables get created with the respective slot initialized
+ *   as before, and whenever the L4 table gets used as a user one the
+ *   mapping gets zapped.
+ */
+#define VMASST_TYPE_m2p_strict           32
+
+#if __XEN_INTERFACE_VERSION__ < 0x00040600
+#define MAX_VMASST_TYPE                  3
+#endif
+
+/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */
+#define DOMID_FIRST_RESERVED xen_mk_uint(0x7FF0)
+
+/* DOMID_SELF is used in certain contexts to refer to oneself. */
+#define DOMID_SELF           xen_mk_uint(0x7FF0)
+
+/*
+ * DOMID_IO is used to restrict page-table updates to mapping I/O memory.
+ * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO
+ * is useful to ensure that no mappings to the OS's own heap are accidentally
+ * installed. (e.g., in Linux this could cause havoc as reference counts
+ * aren't adjusted on the I/O-mapping code path).
+ * This only makes sense as HYPERVISOR_mmu_update()'s and
+ * HYPERVISOR_update_va_mapping_otherdomain()'s "foreigndom" argument. For
+ * HYPERVISOR_mmu_update() context it can be specified by any calling domain,
+ * otherwise it's only permitted if the caller is privileged.
+ */
+#define DOMID_IO             xen_mk_uint(0x7FF1)
+
+/*
+ * DOMID_XEN is used to allow privileged domains to map restricted parts of
+ * Xen's heap space (e.g., the machine_to_phys table).
+ * This only makes sense as
+ * - HYPERVISOR_mmu_update()'s, HYPERVISOR_mmuext_op()'s, or
+ *   HYPERVISOR_update_va_mapping_otherdomain()'s "foreigndom" argument,
+ * - with XENMAPSPACE_gmfn_foreign,
+ * and is only permitted if the caller is privileged.
+ */
+#define DOMID_XEN            xen_mk_uint(0x7FF2)
+
+/*
+ * DOMID_COW is used as the owner of sharable pages */
+#define DOMID_COW            xen_mk_uint(0x7FF3)
+
+/* DOMID_INVALID is used to identify pages with unknown owner. */
+#define DOMID_INVALID        xen_mk_uint(0x7FF4)
+
+/* Idle domain. */
+#define DOMID_IDLE           xen_mk_uint(0x7FFF)
+
+/* Mask for valid domain id values */
+#define DOMID_MASK           xen_mk_uint(0x7FFF)
+
+#ifndef __ASSEMBLY__
+
+typedef uint16_t domid_t;
+
+/*
+ * Send an array of these to HYPERVISOR_mmu_update().
+ * NB. The fields are natural pointer/address size for this architecture.
+ */
+struct mmu_update {
+    uint64_t ptr;       /* Machine address of PTE. */
+    uint64_t val;       /* New contents of PTE.    */
+};
+typedef struct mmu_update mmu_update_t;
+DEFINE_XEN_GUEST_HANDLE(mmu_update_t);
+
+/*
+ * ` enum neg_errnoval
+ * ` HYPERVISOR_multicall(multicall_entry_t call_list[],
+ * `                      uint32_t nr_calls);
+ *
+ * NB. The fields are logically the natural register size for this
+ * architecture. In cases where xen_ulong_t is larger than this then
+ * any unused bits in the upper portion must be zero.
+ */
+struct multicall_entry {
+    xen_ulong_t op, result;
+    xen_ulong_t args[6];
+};
+typedef struct multicall_entry multicall_entry_t;
+DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
+
+#if __XEN_INTERFACE_VERSION__ < 0x00040400
+/*
+ * Event channel endpoints per domain (when using the 2-level ABI):
+ *  1024 if a long is 32 bits; 4096 if a long is 64 bits.
+ */
+#define NR_EVENT_CHANNELS EVTCHN_2L_NR_CHANNELS
+#endif
+
+struct vcpu_time_info {
+    /*
+     * Updates to the following values are preceded and followed by an
+     * increment of 'version'. The guest can therefore detect updates by
+     * looking for changes to 'version'. If the least-significant bit of
+     * the version number is set then an update is in progress and the guest
+     * must wait to read a consistent set of values.
+     * The correct way to interact with the version number is similar to
+     * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry.
+     */
+    uint32_t version;
+    uint32_t pad0;
+    uint64_t tsc_timestamp;   /* TSC at last update of time vals.  */
+    uint64_t system_time;     /* Time, in nanosecs, since boot.    */
+    /*
+     * Current system time:
+     *   system_time +
+     *   ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32)
+     * CPU frequency (Hz):
+     *   ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
+     */
+    uint32_t tsc_to_system_mul;
+    int8_t   tsc_shift;
+#if __XEN_INTERFACE_VERSION__ > 0x040600
+    uint8_t  flags;
+    uint8_t  pad1[2];
+#else
+    int8_t   pad1[3];
+#endif
+}; /* 32 bytes */
+typedef struct vcpu_time_info vcpu_time_info_t;
+
+#define XEN_PVCLOCK_TSC_STABLE_BIT     (1 << 0)
+#define XEN_PVCLOCK_GUEST_STOPPED      (1 << 1)
+
+struct vcpu_info {
+    /*
+     * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
+     * a pending notification for a particular VCPU. It is then cleared
+     * by the guest OS /before/ checking for pending work, thus avoiding
+     * a set-and-check race. Note that the mask is only accessed by Xen
+     * on the CPU that is currently hosting the VCPU. This means that the
+     * pending and mask flags can be updated by the guest without special
+     * synchronisation (i.e., no need for the x86 LOCK prefix).
+     * This may seem suboptimal because if the pending flag is set by
+     * a different CPU then an IPI may be scheduled even when the mask
+     * is set. However, note:
+     *  1. The task of 'interrupt holdoff' is covered by the per-event-
+     *     channel mask bits. A 'noisy' event that is continually being
+     *     triggered can be masked at source at this very precise
+     *     granularity.
+     *  2. The main purpose of the per-VCPU mask is therefore to restrict
+     *     reentrant execution: whether for concurrency control, or to
+     *     prevent unbounded stack usage. Whatever the purpose, we expect
+     *     that the mask will be asserted only for short periods at a time,
+     *     and so the likelihood of a 'spurious' IPI is suitably small.
+     * The mask is read before making an event upcall to the guest: a
+     * non-zero mask therefore guarantees that the VCPU will not receive
+     * an upcall activation. The mask is cleared when the VCPU requests
+     * to block: this avoids wakeup-waiting races.
+     */
+    uint8_t evtchn_upcall_pending;
+#ifdef XEN_HAVE_PV_UPCALL_MASK
+    uint8_t evtchn_upcall_mask;
+#else /* XEN_HAVE_PV_UPCALL_MASK */
+    uint8_t pad0;
+#endif /* XEN_HAVE_PV_UPCALL_MASK */
+    xen_ulong_t evtchn_pending_sel;
+    struct arch_vcpu_info arch;
+    vcpu_time_info_t time;
+}; /* 64 bytes (x86) */
+#ifndef __XEN__
+typedef struct vcpu_info vcpu_info_t;
+#endif
+
+/*
+ * `incontents 200 startofday_shared Start-of-day shared data structure
+ * Xen/kernel shared data -- pointer provided in start_info.
+ *
+ * This structure is defined to be both smaller than a page, and the
+ * only data on the shared page, but may vary in actual size even within
+ * compatible Xen versions; guests should not rely on the size
+ * of this structure remaining constant.
+ */
+struct shared_info {
+    struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
+
+    /*
+     * A domain can create "event channels" on which it can send and receive
+     * asynchronous event notifications. There are three classes of event that
+     * are delivered by this mechanism:
+     *  1. Bi-directional inter- and intra-domain connections. Domains must
+     *     arrange out-of-band to set up a connection (usually by allocating
+     *     an unbound 'listener' port and avertising that via a storage service
+     *     such as xenstore).
+     *  2. Physical interrupts. A domain with suitable hardware-access
+     *     privileges can bind an event-channel port to a physical interrupt
+     *     source.
+     *  3. Virtual interrupts ('events'). A domain can bind an event-channel
+     *     port to a virtual interrupt source, such as the virtual-timer
+     *     device or the emergency console.
+     *
+     * Event channels are addressed by a "port index". Each channel is
+     * associated with two bits of information:
+     *  1. PENDING -- notifies the domain that there is a pending notification
+     *     to be processed. This bit is cleared by the guest.
+     *  2. MASK -- if this bit is clear then a 0->1 transition of PENDING
+     *     will cause an asynchronous upcall to be scheduled. This bit is only
+     *     updated by the guest. It is read-only within Xen. If a channel
+     *     becomes pending while the channel is masked then the 'edge' is lost
+     *     (i.e., when the channel is unmasked, the guest must manually handle
+     *     pending notifications as no upcall will be scheduled by Xen).
+     *
+     * To expedite scanning of pending notifications, any 0->1 pending
+     * transition on an unmasked channel causes a corresponding bit in a
+     * per-vcpu selector word to be set. Each bit in the selector covers a
+     * 'C long' in the PENDING bitfield array.
+     */
+    xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8];
+    xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8];
+
+    /*
+     * Wallclock time: updated by control software or RTC emulation.
+     * Guests should base their gettimeofday() syscall on this
+     * wallclock-base value.
+     * The values of wc_sec and wc_nsec are offsets from the Unix epoch
+     * adjusted by the domain's 'time offset' (in seconds) as set either
+     * by XEN_DOMCTL_settimeoffset, or adjusted via a guest write to the
+     * emulated RTC.
+     */
+    uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
+    uint32_t wc_sec;
+    uint32_t wc_nsec;
+#if !defined(__i386__)
+    uint32_t wc_sec_hi;
+# define xen_wc_sec_hi wc_sec_hi
+#elif !defined(__XEN__) && !defined(__XEN_TOOLS__)
+# define xen_wc_sec_hi arch.wc_sec_hi
+#endif
+
+    struct arch_shared_info arch;
+
+};
+#ifndef __XEN__
+typedef struct shared_info shared_info_t;
+#endif
+
+/*
+ * `incontents 200 startofday Start-of-day memory layout
+ *
+ *  1. The domain is started within contiguous virtual-memory region.
+ *  2. The contiguous region ends on an aligned 4MB boundary.
+ *  3. This the order of bootstrap elements in the initial virtual region:
+ *      a. relocated kernel image
+ *      b. initial ram disk              [mod_start, mod_len]
+ *         (may be omitted)
+ *      c. list of allocated page frames [mfn_list, nr_pages]
+ *         (unless relocated due to XEN_ELFNOTE_INIT_P2M)
+ *      d. start_info_t structure        [register rSI (x86)]
+ *         in case of dom0 this page contains the console info, too
+ *      e. unless dom0: xenstore ring page
+ *      f. unless dom0: console ring page
+ *      g. bootstrap page tables         [pt_base and CR3 (x86)]
+ *      h. bootstrap stack               [register ESP (x86)]
+ *  4. Bootstrap elements are packed together, but each is 4kB-aligned.
+ *  5. The list of page frames forms a contiguous 'pseudo-physical' memory
+ *     layout for the domain. In particular, the bootstrap virtual-memory
+ *     region is a 1:1 mapping to the first section of the pseudo-physical map.
+ *  6. All bootstrap elements are mapped read-writable for the guest OS. The
+ *     only exception is the bootstrap page table, which is mapped read-only.
+ *  7. There is guaranteed to be at least 512kB padding after the final
+ *     bootstrap element. If necessary, the bootstrap virtual region is
+ *     extended by an extra 4MB to ensure this.
+ *
+ * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page
+ * table layout") a bug caused the pt_base (3.g above) and cr3 to not point
+ * to the start of the guest page tables (it was offset by two pages).
+ * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU
+ * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got
+ * allocated in the order: 'first L1','first L2', 'first L3', so the offset
+ * to the page table base is by two pages back. The initial domain if it is
+ * 32-bit and runs under a 64-bit hypervisor should _NOT_ use two of the
+ * pages preceding pt_base and mark them as reserved/unused.
+ */
+#ifdef XEN_HAVE_PV_GUEST_ENTRY
+struct start_info {
+    /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME.    */
+    char magic[32];             /* "xen-<version>-<platform>".            */
+    unsigned long nr_pages;     /* Total pages allocated to this domain.  */
+    unsigned long shared_info;  /* MACHINE address of shared info struct. */
+    uint32_t flags;             /* SIF_xxx flags.                         */
+    xen_pfn_t store_mfn;        /* MACHINE page number of shared page.    */
+    uint32_t store_evtchn;      /* Event channel for store communication. */
+    union {
+        struct {
+            xen_pfn_t mfn;      /* MACHINE page number of console page.   */
+            uint32_t  evtchn;   /* Event channel for console page.        */
+        } domU;
+        struct {
+            uint32_t info_off;  /* Offset of console_info struct.         */
+            uint32_t info_size; /* Size of console_info struct from start.*/
+        } dom0;
+    } console;
+    /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME).     */
+    unsigned long pt_base;      /* VIRTUAL address of page directory.     */
+    unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames.       */
+    unsigned long mfn_list;     /* VIRTUAL address of page-frame list.    */
+    unsigned long mod_start;    /* VIRTUAL address of pre-loaded module   */
+                                /* (PFN of pre-loaded module if           */
+                                /*  SIF_MOD_START_PFN set in flags).      */
+    unsigned long mod_len;      /* Size (bytes) of pre-loaded module.     */
+#define MAX_GUEST_CMDLINE 1024
+    int8_t cmd_line[MAX_GUEST_CMDLINE];
+    /* The pfn range here covers both page table and p->m table frames.   */
+    unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table.    */
+    unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table.  */
+};
+typedef struct start_info start_info_t;
+
+/* New console union for dom0 introduced in 0x00030203. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030203
+#define console_mfn    console.domU.mfn
+#define console_evtchn console.domU.evtchn
+#endif
+#endif /* XEN_HAVE_PV_GUEST_ENTRY */
+
+/* These flags are passed in the 'flags' field of start_info_t. */
+#define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */
+#define SIF_INITDOMAIN    (1<<1)  /* Is this the initial control domain? */
+#define SIF_MULTIBOOT_MOD (1<<2)  /* Is mod_start a multiboot module? */
+#define SIF_MOD_START_PFN (1<<3)  /* Is mod_start a PFN? */
+#define SIF_VIRT_P2M_4TOOLS (1<<4) /* Do Xen tools understand a virt. mapped */
+                                   /* P->M making the 3 level tree obsolete? */
+#define SIF_PM_MASK       (0xFF<<8) /* reserve 1 byte for xen-pm options */
+
+/*
+ * A multiboot module is a package containing modules very similar to a
+ * multiboot module array. The only differences are:
+ * - the array of module descriptors is by convention simply at the beginning
+ *   of the multiboot module,
+ * - addresses in the module descriptors are based on the beginning of the
+ *   multiboot module,
+ * - the number of modules is determined by a termination descriptor that has
+ *   mod_start == 0.
+ *
+ * This permits to both build it statically and reference it in a configuration
+ * file, and let the PV guest easily rebase the addresses to virtual addresses
+ * and at the same time count the number of modules.
+ */
+struct xen_multiboot_mod_list
+{
+    /* Address of first byte of the module */
+    uint32_t mod_start;
+    /* Address of last byte of the module (inclusive) */
+    uint32_t mod_end;
+    /* Address of zero-terminated command line */
+    uint32_t cmdline;
+    /* Unused, must be zero */
+    uint32_t pad;
+};
+/*
+ * `incontents 200 startofday_dom0_console Dom0_console
+ *
+ * The console structure in start_info.console.dom0
+ *
+ * This structure includes a variety of information required to
+ * have a working VGA/VESA console.
+ */
+typedef struct dom0_vga_console_info {
+    uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */
+#define XEN_VGATYPE_TEXT_MODE_3 0x03
+#define XEN_VGATYPE_VESA_LFB    0x23
+#define XEN_VGATYPE_EFI_LFB     0x70
+
+    union {
+        struct {
+            /* Font height, in pixels. */
+            uint16_t font_height;
+            /* Cursor location (column, row). */
+            uint16_t cursor_x, cursor_y;
+            /* Number of rows and columns (dimensions in characters). */
+            uint16_t rows, columns;
+        } text_mode_3;
+
+        struct {
+            /* Width and height, in pixels. */
+            uint16_t width, height;
+            /* Bytes per scan line. */
+            uint16_t bytes_per_line;
+            /* Bits per pixel. */
+            uint16_t bits_per_pixel;
+            /* LFB physical address, and size (in units of 64kB). */
+            uint32_t lfb_base;
+            uint32_t lfb_size;
+            /* RGB mask offsets and sizes, as defined by VBE 1.2+ */
+            uint8_t  red_pos, red_size;
+            uint8_t  green_pos, green_size;
+            uint8_t  blue_pos, blue_size;
+            uint8_t  rsvd_pos, rsvd_size;
+#if __XEN_INTERFACE_VERSION__ >= 0x00030206
+            /* VESA capabilities (offset 0xa, VESA command 0x4f00). */
+            uint32_t gbl_caps;
+            /* Mode attributes (offset 0x0, VESA command 0x4f01). */
+            uint16_t mode_attrs;
+            uint16_t pad;
+#endif
+#if __XEN_INTERFACE_VERSION__ >= 0x00040d00
+            /* high 32 bits of lfb_base */
+            uint32_t ext_lfb_base;
+#endif
+        } vesa_lfb;
+    } u;
+} dom0_vga_console_info_t;
+#define xen_vga_console_info dom0_vga_console_info
+#define xen_vga_console_info_t dom0_vga_console_info_t
+
+typedef uint8_t xen_domain_handle_t[16];
+
+__DEFINE_XEN_GUEST_HANDLE(uint8,  uint8_t);
+__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t);
+__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t);
+__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t);
+
+typedef struct {
+    uint8_t a[16];
+} xen_uuid_t;
+
+/*
+ * XEN_DEFINE_UUID(0x00112233, 0x4455, 0x6677, 0x8899,
+ *                 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff)
+ * will construct UUID 00112233-4455-6677-8899-aabbccddeeff presented as
+ * {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
+ * 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff};
+ *
+ * NB: This is compatible with Linux kernel and with libuuid, but it is not
+ * compatible with Microsoft, as they use mixed-endian encoding (some
+ * components are little-endian, some are big-endian).
+ */
+#define XEN_DEFINE_UUID_(a, b, c, d, e1, e2, e3, e4, e5, e6)            \
+    {{((a) >> 24) & 0xFF, ((a) >> 16) & 0xFF,                           \
+      ((a) >>  8) & 0xFF, ((a) >>  0) & 0xFF,                           \
+      ((b) >>  8) & 0xFF, ((b) >>  0) & 0xFF,                           \
+      ((c) >>  8) & 0xFF, ((c) >>  0) & 0xFF,                           \
+      ((d) >>  8) & 0xFF, ((d) >>  0) & 0xFF,                           \
+                e1, e2, e3, e4, e5, e6}}
+
+#if defined(__STDC_VERSION__) ? __STDC_VERSION__ >= 199901L : defined(__GNUC__)
+#define XEN_DEFINE_UUID(a, b, c, d, e1, e2, e3, e4, e5, e6)             \
+    ((xen_uuid_t)XEN_DEFINE_UUID_(a, b, c, d, e1, e2, e3, e4, e5, e6))
+#else
+#define XEN_DEFINE_UUID(a, b, c, d, e1, e2, e3, e4, e5, e6)             \
+    XEN_DEFINE_UUID_(a, b, c, d, e1, e2, e3, e4, e5, e6)
+#endif /* __STDC_VERSION__ / __GNUC__ */
+
+#endif /* !__ASSEMBLY__ */
+
+/* Default definitions for macros used by domctl/sysctl. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+#ifndef int64_aligned_t
+#define int64_aligned_t int64_t
+#endif
+#ifndef uint64_aligned_t
+#define uint64_aligned_t uint64_t
+#endif
+#ifndef XEN_GUEST_HANDLE_64
+#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name)
+#endif
+
+#ifndef __ASSEMBLY__
+struct xenctl_bitmap {
+    XEN_GUEST_HANDLE_64(uint8) bitmap;
+    uint32_t nr_bits;
+};
+typedef struct xenctl_bitmap xenctl_bitmap_t;
+#endif
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
+#endif /* __XEN_PUBLIC_XEN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/hw/xen/start_info.h b/include/hw/xen/start_info.h
new file mode 100644 (file)
index 0000000..6ed4877
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2016, Citrix Systems, Inc.
+ */
+
+#ifndef XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H
+#define XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H
+
+/*
+ * Start of day structure passed to PVH guests and to HVM guests in %ebx.
+ *
+ * NOTE: nothing will be loaded at physical address 0, so a 0 value in any
+ * of the address fields should be treated as not present.
+ *
+ *  0 +----------------+
+ *    | magic          | Contains the magic value XEN_HVM_START_MAGIC_VALUE
+ *    |                | ("xEn3" with the 0x80 bit of the "E" set).
+ *  4 +----------------+
+ *    | version        | Version of this structure. Current version is 1. New
+ *    |                | versions are guaranteed to be backwards-compatible.
+ *  8 +----------------+
+ *    | flags          | SIF_xxx flags.
+ * 12 +----------------+
+ *    | nr_modules     | Number of modules passed to the kernel.
+ * 16 +----------------+
+ *    | modlist_paddr  | Physical address of an array of modules
+ *    |                | (layout of the structure below).
+ * 24 +----------------+
+ *    | cmdline_paddr  | Physical address of the command line,
+ *    |                | a zero-terminated ASCII string.
+ * 32 +----------------+
+ *    | rsdp_paddr     | Physical address of the RSDP ACPI data structure.
+ * 40 +----------------+
+ *    | memmap_paddr   | Physical address of the (optional) memory map. Only
+ *    |                | present in version 1 and newer of the structure.
+ * 48 +----------------+
+ *    | memmap_entries | Number of entries in the memory map table. Only
+ *    |                | present in version 1 and newer of the structure.
+ *    |                | Zero if there is no memory map being provided.
+ * 52 +----------------+
+ *    | reserved       | Version 1 and newer only.
+ * 56 +----------------+
+ *
+ * The layout of each entry in the module structure is the following:
+ *
+ *  0 +----------------+
+ *    | paddr          | Physical address of the module.
+ *  8 +----------------+
+ *    | size           | Size of the module in bytes.
+ * 16 +----------------+
+ *    | cmdline_paddr  | Physical address of the command line,
+ *    |                | a zero-terminated ASCII string.
+ * 24 +----------------+
+ *    | reserved       |
+ * 32 +----------------+
+ *
+ * The layout of each entry in the memory map table is as follows:
+ *
+ *  0 +----------------+
+ *    | addr           | Base address
+ *  8 +----------------+
+ *    | size           | Size of mapping in bytes
+ * 16 +----------------+
+ *    | type           | Type of mapping as defined between the hypervisor
+ *    |                | and guest it's starting. E820_TYPE_xxx, for example.
+ * 20 +----------------|
+ *    | reserved       |
+ * 24 +----------------+
+ *
+ * The address and sizes are always a 64bit little endian unsigned integer.
+ *
+ * NB: Xen on x86 will always try to place all the data below the 4GiB
+ * boundary.
+ *
+ * Version numbers of the hvm_start_info structure have evolved like this:
+ *
+ * Version 0:
+ *
+ * Version 1:   Added the memmap_paddr/memmap_entries fields (plus 4 bytes of
+ *              padding) to the end of the hvm_start_info struct. These new
+ *              fields can be used to pass a memory map to the guest. The
+ *              memory map is optional and so guests that understand version 1
+ *              of the structure must check that memmap_entries is non-zero
+ *              before trying to read the memory map.
+ */
+#define XEN_HVM_START_MAGIC_VALUE 0x336ec578
+
+/*
+ * C representation of the x86/HVM start info layout.
+ *
+ * The canonical definition of this layout is above, this is just a way to
+ * represent the layout described there using C types.
+ */
+struct hvm_start_info {
+    uint32_t magic;             /* Contains the magic value 0x336ec578       */
+                                /* ("xEn3" with the 0x80 bit of the "E" set).*/
+    uint32_t version;           /* Version of this structure.                */
+    uint32_t flags;             /* SIF_xxx flags.                            */
+    uint32_t nr_modules;        /* Number of modules passed to the kernel.   */
+    uint64_t modlist_paddr;     /* Physical address of an array of           */
+                                /* hvm_modlist_entry.                        */
+    uint64_t cmdline_paddr;     /* Physical address of the command line.     */
+    uint64_t rsdp_paddr;        /* Physical address of the RSDP ACPI data    */
+                                /* structure.                                */
+    uint64_t memmap_paddr;      /* Physical address of an array of           */
+                                /* hvm_memmap_table_entry. Only present in   */
+                                /* version 1 and newer of the structure      */
+    uint32_t memmap_entries;    /* Number of entries in the memmap table.    */
+                                /* Only present in version 1 and newer of    */
+                                /* the structure. Value will be zero if      */
+                                /* there is no memory map being provided.    */
+    uint32_t reserved;
+};
+
+struct hvm_modlist_entry {
+    uint64_t paddr;             /* Physical address of the module.           */
+    uint64_t size;              /* Size of the module in bytes.              */
+    uint64_t cmdline_paddr;     /* Physical address of the command line.     */
+    uint64_t reserved;
+};
+
+struct hvm_memmap_table_entry {
+    uint64_t addr;              /* Base address of the memory region         */
+    uint64_t size;              /* Size of the memory region in bytes        */
+    uint32_t type;              /* Mapping type                              */
+    uint32_t reserved;
+};
+
+#endif /* XEN_PUBLIC_ARCH_X86_HVM_START_INFO_H */
diff --git a/include/hw/xen/xen-backend.h b/include/hw/xen/xen-backend.h
new file mode 100644 (file)
index 0000000..0f01631
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018  Citrix Systems Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_XEN_BACKEND_H
+#define HW_XEN_BACKEND_H
+
+#include "hw/xen/xen-bus.h"
+
+typedef struct XenBackendInstance XenBackendInstance;
+
+typedef void (*XenBackendDeviceCreate)(XenBackendInstance *backend,
+                                       QDict *opts, Error **errp);
+typedef void (*XenBackendDeviceDestroy)(XenBackendInstance *backend,
+                                        Error **errp);
+
+typedef struct XenBackendInfo {
+    const char *type;
+    XenBackendDeviceCreate create;
+    XenBackendDeviceDestroy destroy;
+} XenBackendInfo;
+
+XenBus *xen_backend_get_bus(XenBackendInstance *backend);
+const char *xen_backend_get_name(XenBackendInstance *backend);
+
+void xen_backend_set_device(XenBackendInstance *backend,
+                            XenDevice *xendevice);
+XenDevice *xen_backend_get_device(XenBackendInstance *backend);
+
+void xen_backend_register(const XenBackendInfo *info);
+const char **xen_backend_get_types(unsigned int *nr);
+
+bool xen_backend_exists(const char *type, const char *name);
+void xen_backend_device_create(XenBus *xenbus, const char *type,
+                               const char *name, QDict *opts, Error **errp);
+bool xen_backend_try_device_destroy(XenDevice *xendev, Error **errp);
+
+#endif /* HW_XEN_BACKEND_H */
diff --git a/include/hw/xen/xen-block.h b/include/hw/xen/xen-block.h
new file mode 100644 (file)
index 0000000..d692ea7
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018  Citrix Systems Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_XEN_BLOCK_H
+#define HW_XEN_BLOCK_H
+
+#include "hw/xen/xen-bus.h"
+#include "hw/block/block.h"
+#include "hw/block/dataplane/xen-block.h"
+#include "sysemu/iothread.h"
+#include "qom/object.h"
+
+typedef enum XenBlockVdevType {
+    XEN_BLOCK_VDEV_TYPE_INVALID,
+    XEN_BLOCK_VDEV_TYPE_DP,
+    XEN_BLOCK_VDEV_TYPE_XVD,
+    XEN_BLOCK_VDEV_TYPE_HD,
+    XEN_BLOCK_VDEV_TYPE_SD,
+    XEN_BLOCK_VDEV_TYPE__MAX
+} XenBlockVdevType;
+
+typedef struct XenBlockVdev {
+    XenBlockVdevType type;
+    unsigned long disk;
+    unsigned long partition;
+    unsigned long number;
+} XenBlockVdev;
+
+
+typedef struct XenBlockProperties {
+    XenBlockVdev vdev;
+    BlockConf conf;
+    unsigned int max_ring_page_order;
+    IOThread *iothread;
+} XenBlockProperties;
+
+typedef struct XenBlockDrive {
+    char *id;
+    char *node_name;
+} XenBlockDrive;
+
+typedef struct XenBlockIOThread {
+    char *id;
+} XenBlockIOThread;
+
+struct XenBlockDevice {
+    XenDevice xendev;
+    XenBlockProperties props;
+    const char *device_type;
+    unsigned int info;
+    XenBlockDataPlane *dataplane;
+    XenBlockDrive *drive;
+    XenBlockIOThread *iothread;
+};
+typedef struct XenBlockDevice XenBlockDevice;
+
+typedef void (*XenBlockDeviceRealize)(XenBlockDevice *blockdev, Error **errp);
+typedef void (*XenBlockDeviceUnrealize)(XenBlockDevice *blockdev);
+
+struct XenBlockDeviceClass {
+    /*< private >*/
+    XenDeviceClass parent_class;
+    /*< public >*/
+    XenBlockDeviceRealize realize;
+    XenBlockDeviceUnrealize unrealize;
+};
+
+#define TYPE_XEN_BLOCK_DEVICE  "xen-block"
+OBJECT_DECLARE_TYPE(XenBlockDevice, XenBlockDeviceClass, XEN_BLOCK_DEVICE)
+
+struct XenDiskDevice {
+    XenBlockDevice blockdev;
+};
+
+#define TYPE_XEN_DISK_DEVICE  "xen-disk"
+OBJECT_DECLARE_SIMPLE_TYPE(XenDiskDevice, XEN_DISK_DEVICE)
+
+struct XenCDRomDevice {
+    XenBlockDevice blockdev;
+};
+
+#define TYPE_XEN_CDROM_DEVICE  "xen-cdrom"
+OBJECT_DECLARE_SIMPLE_TYPE(XenCDRomDevice, XEN_CDROM_DEVICE)
+
+#endif /* HW_XEN_BLOCK_H */
diff --git a/include/hw/xen/xen-bus-helper.h b/include/hw/xen/xen-bus-helper.h
new file mode 100644 (file)
index 0000000..d8dcc2f
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018  Citrix Systems Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_XEN_BUS_HELPER_H
+#define HW_XEN_BUS_HELPER_H
+
+#include "hw/xen/xen_backend_ops.h"
+
+const char *xs_strstate(enum xenbus_state state);
+
+void xs_node_create(struct qemu_xs_handle *h,  xs_transaction_t tid,
+                    const char *node, unsigned int owner, unsigned int domid,
+                    unsigned int perms, Error **errp);
+void xs_node_destroy(struct qemu_xs_handle *h,  xs_transaction_t tid,
+                     const char *node, Error **errp);
+
+/* Write to node/key unless node is empty, in which case write to key */
+void xs_node_vprintf(struct qemu_xs_handle *h,  xs_transaction_t tid,
+                     const char *node, const char *key, Error **errp,
+                     const char *fmt, va_list ap)
+    G_GNUC_PRINTF(6, 0);
+void xs_node_printf(struct qemu_xs_handle *h,  xs_transaction_t tid,
+                    const char *node, const char *key, Error **errp,
+                    const char *fmt, ...)
+    G_GNUC_PRINTF(6, 7);
+
+/* Read from node/key unless node is empty, in which case read from key */
+int xs_node_vscanf(struct qemu_xs_handle *h,  xs_transaction_t tid,
+                   const char *node, const char *key, Error **errp,
+                   const char *fmt, va_list ap)
+    G_GNUC_SCANF(6, 0);
+int xs_node_scanf(struct qemu_xs_handle *h,  xs_transaction_t tid,
+                  const char *node, const char *key, Error **errp,
+                  const char *fmt, ...)
+    G_GNUC_SCANF(6, 7);
+
+/* Watch node/key unless node is empty, in which case watch key */
+struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node,
+                                    const char *key, xs_watch_fn fn,
+                                    void *opaque, Error **errp);
+void xs_node_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w);
+
+#endif /* HW_XEN_BUS_HELPER_H */
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
new file mode 100644 (file)
index 0000000..334ddd1
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2018  Citrix Systems Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_XEN_BUS_H
+#define HW_XEN_BUS_H
+
+#include "hw/xen/xen_backend_ops.h"
+#include "hw/sysbus.h"
+#include "qemu/notify.h"
+#include "qom/object.h"
+
+typedef struct XenEventChannel XenEventChannel;
+
+struct XenDevice {
+    DeviceState qdev;
+    domid_t frontend_id;
+    char *name;
+    struct qemu_xs_handle *xsh;
+    char *backend_path, *frontend_path;
+    enum xenbus_state backend_state, frontend_state;
+    Notifier exit;
+    struct qemu_xs_watch *backend_state_watch, *frontend_state_watch;
+    bool backend_online;
+    struct qemu_xs_watch *backend_online_watch;
+    xengnttab_handle *xgth;
+    bool inactive;
+    QLIST_HEAD(, XenEventChannel) event_channels;
+    QLIST_ENTRY(XenDevice) list;
+};
+typedef struct XenDevice XenDevice;
+
+typedef char *(*XenDeviceGetFrontendPath)(XenDevice *xendev, Error **errp);
+typedef char *(*XenDeviceGetName)(XenDevice *xendev, Error **errp);
+typedef void (*XenDeviceRealize)(XenDevice *xendev, Error **errp);
+typedef void (*XenDeviceFrontendChanged)(XenDevice *xendev,
+                                         enum xenbus_state frontend_state,
+                                         Error **errp);
+typedef void (*XenDeviceUnrealize)(XenDevice *xendev);
+
+struct XenDeviceClass {
+    /*< private >*/
+    DeviceClass parent_class;
+    /*< public >*/
+    const char *backend;
+    const char *device;
+    XenDeviceGetFrontendPath get_frontend_path;
+    XenDeviceGetName get_name;
+    XenDeviceRealize realize;
+    XenDeviceFrontendChanged frontend_changed;
+    XenDeviceUnrealize unrealize;
+};
+
+#define TYPE_XEN_DEVICE "xen-device"
+OBJECT_DECLARE_TYPE(XenDevice, XenDeviceClass, XEN_DEVICE)
+
+struct XenBus {
+    BusState qbus;
+    domid_t backend_id;
+    struct qemu_xs_handle *xsh;
+    unsigned int backend_types;
+    struct qemu_xs_watch **backend_watch;
+    QLIST_HEAD(, XenDevice) inactive_devices;
+};
+
+struct XenBusClass {
+    /*< private >*/
+    BusClass parent_class;
+};
+
+#define TYPE_XEN_BUS "xen-bus"
+OBJECT_DECLARE_TYPE(XenBus, XenBusClass,
+                    XEN_BUS)
+
+BusState *xen_bus_init(void);
+
+void xen_device_backend_set_state(XenDevice *xendev,
+                                  enum xenbus_state state);
+enum xenbus_state xen_device_backend_get_state(XenDevice *xendev);
+
+void xen_device_backend_printf(XenDevice *xendev, const char *key,
+                               const char *fmt, ...)
+    G_GNUC_PRINTF(3, 4);
+void xen_device_frontend_printf(XenDevice *xendev, const char *key,
+                                const char *fmt, ...)
+    G_GNUC_PRINTF(3, 4);
+
+int xen_device_frontend_scanf(XenDevice *xendev, const char *key,
+                              const char *fmt, ...)
+    G_GNUC_SCANF(3, 4);
+
+void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs,
+                                   Error **errp);
+void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs,
+                                unsigned int nr_refs, int prot,
+                                Error **errp);
+void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, uint32_t *refs,
+                                 unsigned int nr_refs, Error **errp);
+
+typedef struct XenDeviceGrantCopySegment {
+    union {
+        void *virt;
+        struct {
+            uint32_t ref;
+            off_t offset;
+        } foreign;
+    } source, dest;
+    size_t len;
+} XenDeviceGrantCopySegment;
+
+void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain,
+                                XenDeviceGrantCopySegment segs[],
+                                unsigned int nr_segs, Error **errp);
+
+typedef bool (*XenEventHandler)(void *opaque);
+
+XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
+                                               unsigned int port,
+                                               XenEventHandler handler,
+                                               void *opaque, Error **errp);
+void xen_device_set_event_channel_context(XenDevice *xendev,
+                                          XenEventChannel *channel,
+                                          AioContext *ctx,
+                                          Error **errp);
+void xen_device_notify_event_channel(XenDevice *xendev,
+                                     XenEventChannel *channel,
+                                     Error **errp);
+void xen_device_unbind_event_channel(XenDevice *xendev,
+                                     XenEventChannel *channel,
+                                     Error **errp);
+unsigned int xen_event_channel_get_local_port(XenEventChannel *channel);
+
+#endif /* HW_XEN_BUS_H */
diff --git a/include/hw/xen/xen-hvm-common.h b/include/hw/xen/xen-hvm-common.h
new file mode 100644 (file)
index 0000000..4e9904f
--- /dev/null
@@ -0,0 +1,99 @@
+#ifndef HW_XEN_HVM_COMMON_H
+#define HW_XEN_HVM_COMMON_H
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+
+#include "cpu.h"
+#include "hw/pci/pci.h"
+#include "hw/hw.h"
+#include "hw/xen/xen_native.h"
+#include "hw/xen/xen-legacy-backend.h"
+#include "sysemu/runstate.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/xen.h"
+#include "sysemu/xen-mapcache.h"
+#include "qemu/error-report.h"
+#include <xen/hvm/ioreq.h>
+
+extern MemoryRegion ram_memory;
+extern MemoryListener xen_io_listener;
+extern DeviceListener xen_device_listener;
+
+//#define DEBUG_XEN_HVM
+
+#ifdef DEBUG_XEN_HVM
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i)
+{
+    return shared_page->vcpu_ioreq[i].vp_eport;
+}
+static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu)
+{
+    return &shared_page->vcpu_ioreq[vcpu];
+}
+
+#define BUFFER_IO_MAX_DELAY  100
+
+typedef struct XenPhysmap {
+    hwaddr start_addr;
+    ram_addr_t size;
+    const char *name;
+    hwaddr phys_offset;
+
+    QLIST_ENTRY(XenPhysmap) list;
+} XenPhysmap;
+
+typedef struct XenPciDevice {
+    PCIDevice *pci_dev;
+    uint32_t sbdf;
+    QLIST_ENTRY(XenPciDevice) entry;
+} XenPciDevice;
+
+typedef struct XenIOState {
+    ioservid_t ioservid;
+    shared_iopage_t *shared_page;
+    buffered_iopage_t *buffered_io_page;
+    xenforeignmemory_resource_handle *fres;
+    QEMUTimer *buffered_io_timer;
+    CPUState **cpu_by_vcpu_id;
+    /* the evtchn port for polling the notification, */
+    evtchn_port_t *ioreq_local_port;
+    /* evtchn remote and local ports for buffered io */
+    evtchn_port_t bufioreq_remote_port;
+    evtchn_port_t bufioreq_local_port;
+    /* the evtchn fd for polling */
+    xenevtchn_handle *xce_handle;
+    /* which vcpu we are serving */
+    int send_vcpu;
+
+    struct xs_handle *xenstore;
+    MemoryListener memory_listener;
+    MemoryListener io_listener;
+    QLIST_HEAD(, XenPciDevice) dev_list;
+    DeviceListener device_listener;
+
+    Notifier exit;
+} XenIOState;
+
+void xen_exit_notifier(Notifier *n, void *data);
+
+void xen_region_add(MemoryListener *listener, MemoryRegionSection *section);
+void xen_region_del(MemoryListener *listener, MemoryRegionSection *section);
+void xen_io_add(MemoryListener *listener, MemoryRegionSection *section);
+void xen_io_del(MemoryListener *listener, MemoryRegionSection *section);
+void xen_device_realize(DeviceListener *listener, DeviceState *dev);
+void xen_device_unrealize(DeviceListener *listener, DeviceState *dev);
+
+void xen_hvm_change_state_handler(void *opaque, bool running, RunState rstate);
+void xen_register_ioreq(XenIOState *state, unsigned int max_cpus,
+                        const MemoryListener *xen_memory_listener);
+
+void cpu_ioreq_pio(ioreq_t *req);
+#endif /* HW_XEN_HVM_COMMON_H */
diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h
new file mode 100644 (file)
index 0000000..fc42146
--- /dev/null
@@ -0,0 +1,89 @@
+#ifndef HW_XEN_LEGACY_BACKEND_H
+#define HW_XEN_LEGACY_BACKEND_H
+
+#include "hw/xen/xen_backend_ops.h"
+#include "hw/xen/xen_pvdev.h"
+#include "net/net.h"
+#include "qom/object.h"
+
+#define TYPE_XENSYSDEV "xen-sysdev"
+#define TYPE_XENSYSBUS "xen-sysbus"
+#define TYPE_XENBACKEND "xen-backend"
+
+typedef struct XenLegacyDevice XenLegacyDevice;
+DECLARE_INSTANCE_CHECKER(XenLegacyDevice, XENBACKEND,
+                         TYPE_XENBACKEND)
+
+/* variables */
+extern struct qemu_xs_handle *xenstore;
+extern const char *xen_protocol;
+extern DeviceState *xen_sysdev;
+extern BusState *xen_sysbus;
+
+int xenstore_mkdir(char *path, int p);
+int xenstore_write_be_str(struct XenLegacyDevice *xendev, const char *node,
+                          const char *val);
+int xenstore_write_be_int(struct XenLegacyDevice *xendev, const char *node,
+                          int ival);
+int xenstore_write_be_int64(struct XenLegacyDevice *xendev, const char *node,
+                            int64_t ival);
+char *xenstore_read_be_str(struct XenLegacyDevice *xendev, const char *node);
+int xenstore_read_be_int(struct XenLegacyDevice *xendev, const char *node,
+                         int *ival);
+char *xenstore_read_fe_str(struct XenLegacyDevice *xendev, const char *node);
+int xenstore_read_fe_int(struct XenLegacyDevice *xendev, const char *node,
+                         int *ival);
+int xenstore_read_fe_uint64(struct XenLegacyDevice *xendev, const char *node,
+                            uint64_t *uval);
+
+void xen_be_check_state(struct XenLegacyDevice *xendev);
+
+/* xen backend driver bits */
+void xen_be_init(void);
+int xen_be_register(const char *type, struct XenDevOps *ops);
+int xen_be_set_state(struct XenLegacyDevice *xendev, enum xenbus_state state);
+int xen_be_bind_evtchn(struct XenLegacyDevice *xendev);
+void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev,
+                               unsigned int nr_refs);
+void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs,
+                            unsigned int nr_refs, int prot);
+void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr,
+                             uint32_t *refs, unsigned int nr_refs);
+
+int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev,
+                           bool to_domain, XenGrantCopySegment segs[],
+                           unsigned int nr_segs);
+
+static inline void *xen_be_map_grant_ref(struct XenLegacyDevice *xendev,
+                                         uint32_t ref, int prot)
+{
+    return xen_be_map_grant_refs(xendev, &ref, 1, prot);
+}
+
+static inline void xen_be_unmap_grant_ref(struct XenLegacyDevice *xendev,
+                                          void *ptr, uint32_t ref)
+{
+    return xen_be_unmap_grant_refs(xendev, ptr, &ref, 1);
+}
+
+/* actual backend drivers */
+extern struct XenDevOps xen_console_ops;      /* xen_console.c     */
+extern struct XenDevOps xen_kbdmouse_ops;     /* xen_framebuffer.c */
+extern struct XenDevOps xen_framebuffer_ops;  /* xen_framebuffer.c */
+extern struct XenDevOps xen_blkdev_ops;       /* xen_disk.c        */
+#ifdef CONFIG_VIRTFS
+extern struct XenDevOps xen_9pfs_ops;       /* xen-9p-backend.c        */
+#endif
+extern struct XenDevOps xen_netdev_ops;       /* xen_nic.c         */
+#ifdef CONFIG_USB_LIBUSB
+extern struct XenDevOps xen_usb_ops;          /* xen-usb.c         */
+#endif
+
+/* configuration (aka xenbus setup) */
+void xen_config_cleanup(void);
+int xen_config_dev_nic(NICInfo *nic);
+int xen_config_dev_vfb(int vdev, const char *type);
+int xen_config_dev_vkbd(int vdev);
+int xen_config_dev_console(int vdev);
+
+#endif /* HW_XEN_LEGACY_BACKEND_H */
diff --git a/include/hw/xen/xen-x86.h b/include/hw/xen/xen-x86.h
new file mode 100644 (file)
index 0000000..85e3db1
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * Xen X86-specific
+ *
+ * Copyright 2020 Red Hat, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef QEMU_HW_XEN_X86_H
+#define QEMU_HW_XEN_X86_H
+
+#include "hw/i386/pc.h"
+
+void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory);
+
+#endif /* QEMU_HW_XEN_X86_H */
diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
new file mode 100644 (file)
index 0000000..37ecc91
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * public xen header
+ *   stuff needed outside xen-*.c, i.e. interfaces to qemu.
+ *   must not depend on any xen headers being present in
+ *   /usr/include/xen, so it can be included unconditionally.
+ */
+#ifndef QEMU_HW_XEN_H
+#define QEMU_HW_XEN_H
+
+/*
+ * C files using Xen toolstack libraries will have included those headers
+ * already via xen_native.h, and having __XEM_TOOLS__ defined will have
+ * automatically set __XEN_INTERFACE_VERSION__ to the latest supported
+ * by the *system* Xen headers which were transitively included.
+ *
+ * C files which are part of the internal emulation, and which did not
+ * include xen_native.h, may need this defined so that the Xen headers
+ * imported to include/hw/xen/interface/ will expose the appropriate API
+ * version.
+ *
+ * This is why there's a rule that xen_native.h must be included first.
+ */
+#ifndef __XEN_INTERFACE_VERSION__
+#define __XEN_INTERFACE_VERSION__ 0x00040e00
+#endif
+
+#include "exec/cpu-common.h"
+
+/* xen-machine.c */
+enum xen_mode {
+    XEN_DISABLED = 0, /* xen support disabled (default) */
+    XEN_ATTACH,       /* attach to xen domain created by libxl */
+    XEN_EMULATE,      /* emulate Xen within QEMU */
+};
+
+extern uint32_t xen_domid;
+extern enum xen_mode xen_mode;
+extern bool xen_domid_restrict;
+
+int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
+int xen_set_pci_link_route(uint8_t link, uint8_t irq);
+void xen_intx_set_irq(void *opaque, int irq_num, int level);
+void xen_hvm_inject_msi(uint64_t addr, uint32_t data);
+int xen_is_pirq_msi(uint32_t msi_data);
+
+qemu_irq *xen_interrupt_controller_init(void);
+
+void xen_register_framebuffer(struct MemoryRegion *mr);
+
+#endif /* QEMU_HW_XEN_H */
diff --git a/include/hw/xen/xen_backend_ops.h b/include/hw/xen/xen_backend_ops.h
new file mode 100644 (file)
index 0000000..90cca85
--- /dev/null
@@ -0,0 +1,408 @@
+/*
+ * QEMU Xen backend support
+ *
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_XEN_BACKEND_OPS_H
+#define QEMU_XEN_BACKEND_OPS_H
+
+#include "hw/xen/xen.h"
+#include "hw/xen/interface/xen.h"
+#include "hw/xen/interface/io/xenbus.h"
+
+/*
+ * For the time being, these operations map fairly closely to the API of
+ * the actual Xen libraries, e.g. libxenevtchn. As we complete the migration
+ * from XenLegacyDevice back ends to the new XenDevice model, they may
+ * evolve to slightly higher-level APIs.
+ *
+ * The internal emulations do not emulate the Xen APIs entirely faithfully;
+ * only enough to be used by the Xen backend devices. For example, only one
+ * event channel can be bound to each handle, since that's sufficient for
+ * the device support (only the true Xen HVM backend uses more). And the
+ * behaviour of unmask() and pending() is different too because the device
+ * backends don't care.
+ */
+
+typedef struct xenevtchn_handle xenevtchn_handle;
+typedef int xenevtchn_port_or_error_t;
+typedef uint32_t evtchn_port_t;
+typedef uint16_t domid_t;
+typedef uint32_t grant_ref_t;
+
+#define XEN_PAGE_SHIFT       12
+#define XEN_PAGE_SIZE        (1UL << XEN_PAGE_SHIFT)
+#define XEN_PAGE_MASK        (~(XEN_PAGE_SIZE - 1))
+
+#ifndef xen_rmb
+#define xen_rmb() smp_rmb()
+#endif
+#ifndef xen_wmb
+#define xen_wmb() smp_wmb()
+#endif
+#ifndef xen_mb
+#define xen_mb() smp_mb()
+#endif
+
+struct evtchn_backend_ops {
+    xenevtchn_handle *(*open)(void);
+    int (*bind_interdomain)(xenevtchn_handle *xc, uint32_t domid,
+                            evtchn_port_t guest_port);
+    int (*unbind)(xenevtchn_handle *xc, evtchn_port_t port);
+    int (*close)(struct xenevtchn_handle *xc);
+    int (*get_fd)(struct xenevtchn_handle *xc);
+    int (*notify)(struct xenevtchn_handle *xc, evtchn_port_t port);
+    int (*unmask)(struct xenevtchn_handle *xc, evtchn_port_t port);
+    int (*pending)(struct xenevtchn_handle *xc);
+};
+
+extern struct evtchn_backend_ops *xen_evtchn_ops;
+
+static inline xenevtchn_handle *qemu_xen_evtchn_open(void)
+{
+    if (!xen_evtchn_ops) {
+        return NULL;
+    }
+    return xen_evtchn_ops->open();
+}
+
+static inline int qemu_xen_evtchn_bind_interdomain(xenevtchn_handle *xc,
+                                                   uint32_t domid,
+                                                   evtchn_port_t guest_port)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->bind_interdomain(xc, domid, guest_port);
+}
+
+static inline int qemu_xen_evtchn_unbind(xenevtchn_handle *xc,
+                                         evtchn_port_t port)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->unbind(xc, port);
+}
+
+static inline int qemu_xen_evtchn_close(xenevtchn_handle *xc)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->close(xc);
+}
+
+static inline int qemu_xen_evtchn_fd(xenevtchn_handle *xc)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->get_fd(xc);
+}
+
+static inline int qemu_xen_evtchn_notify(xenevtchn_handle *xc,
+                                         evtchn_port_t port)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->notify(xc, port);
+}
+
+static inline int qemu_xen_evtchn_unmask(xenevtchn_handle *xc,
+                                         evtchn_port_t port)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->unmask(xc, port);
+}
+
+static inline int qemu_xen_evtchn_pending(xenevtchn_handle *xc)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->pending(xc);
+}
+
+typedef struct xengntdev_handle xengnttab_handle;
+
+typedef struct XenGrantCopySegment {
+    union {
+        void *virt;
+        struct {
+            uint32_t ref;
+            off_t offset;
+        } foreign;
+    } source, dest;
+    size_t len;
+} XenGrantCopySegment;
+
+#define XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE  (1U << 0)
+
+struct gnttab_backend_ops {
+    uint32_t features;
+    xengnttab_handle *(*open)(void);
+    int (*close)(xengnttab_handle *xgt);
+    int (*grant_copy)(xengnttab_handle *xgt, bool to_domain, uint32_t domid,
+                      XenGrantCopySegment *segs, uint32_t nr_segs,
+                      Error **errp);
+    int (*set_max_grants)(xengnttab_handle *xgt, uint32_t nr_grants);
+    void *(*map_refs)(xengnttab_handle *xgt, uint32_t count, uint32_t domid,
+                      uint32_t *refs, int prot);
+    int (*unmap)(xengnttab_handle *xgt, void *start_address, uint32_t *refs,
+                 uint32_t count);
+};
+
+extern struct gnttab_backend_ops *xen_gnttab_ops;
+
+static inline bool qemu_xen_gnttab_can_map_multi(void)
+{
+    return xen_gnttab_ops &&
+        !!(xen_gnttab_ops->features & XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE);
+}
+
+static inline xengnttab_handle *qemu_xen_gnttab_open(void)
+{
+    if (!xen_gnttab_ops) {
+        return NULL;
+    }
+    return xen_gnttab_ops->open();
+}
+
+static inline int qemu_xen_gnttab_close(xengnttab_handle *xgt)
+{
+    if (!xen_gnttab_ops) {
+        return -ENOSYS;
+    }
+    return xen_gnttab_ops->close(xgt);
+}
+
+static inline int qemu_xen_gnttab_grant_copy(xengnttab_handle *xgt,
+                                             bool to_domain, uint32_t domid,
+                                             XenGrantCopySegment *segs,
+                                             uint32_t nr_segs, Error **errp)
+{
+    if (!xen_gnttab_ops) {
+        return -ENOSYS;
+    }
+
+    return xen_gnttab_ops->grant_copy(xgt, to_domain, domid, segs, nr_segs,
+                                      errp);
+}
+
+static inline int qemu_xen_gnttab_set_max_grants(xengnttab_handle *xgt,
+                                                 uint32_t nr_grants)
+{
+    if (!xen_gnttab_ops) {
+        return -ENOSYS;
+    }
+    return xen_gnttab_ops->set_max_grants(xgt, nr_grants);
+}
+
+static inline void *qemu_xen_gnttab_map_refs(xengnttab_handle *xgt,
+                                             uint32_t count, uint32_t domid,
+                                             uint32_t *refs, int prot)
+{
+    if (!xen_gnttab_ops) {
+        return NULL;
+    }
+    return xen_gnttab_ops->map_refs(xgt, count, domid, refs, prot);
+}
+
+static inline int qemu_xen_gnttab_unmap(xengnttab_handle *xgt,
+                                        void *start_address, uint32_t *refs,
+                                        uint32_t count)
+{
+    if (!xen_gnttab_ops) {
+        return -ENOSYS;
+    }
+    return xen_gnttab_ops->unmap(xgt, start_address, refs, count);
+}
+
+struct foreignmem_backend_ops {
+    void *(*map)(uint32_t dom, void *addr, int prot, size_t pages,
+                 xen_pfn_t *pfns, int *errs);
+    int (*unmap)(void *addr, size_t pages);
+};
+
+extern struct foreignmem_backend_ops *xen_foreignmem_ops;
+
+static inline void *qemu_xen_foreignmem_map(uint32_t dom, void *addr, int prot,
+                                            size_t pages, xen_pfn_t *pfns,
+                                            int *errs)
+{
+    if (!xen_foreignmem_ops) {
+        return NULL;
+    }
+    return xen_foreignmem_ops->map(dom, addr, prot, pages, pfns, errs);
+}
+
+static inline int qemu_xen_foreignmem_unmap(void *addr, size_t pages)
+{
+    if (!xen_foreignmem_ops) {
+        return -ENOSYS;
+    }
+    return xen_foreignmem_ops->unmap(addr, pages);
+}
+
+typedef void (*xs_watch_fn)(void *opaque, const char *path);
+
+struct qemu_xs_handle;
+struct qemu_xs_watch;
+typedef uint32_t xs_transaction_t;
+
+#define XBT_NULL 0
+
+#define XS_PERM_NONE  0x00
+#define XS_PERM_READ  0x01
+#define XS_PERM_WRITE 0x02
+
+struct xenstore_backend_ops {
+    struct qemu_xs_handle *(*open)(void);
+    void (*close)(struct qemu_xs_handle *h);
+    char *(*get_domain_path)(struct qemu_xs_handle *h, unsigned int domid);
+    char **(*directory)(struct qemu_xs_handle *h, xs_transaction_t t,
+                        const char *path, unsigned int *num);
+    void *(*read)(struct qemu_xs_handle *h, xs_transaction_t t,
+                  const char *path, unsigned int *len);
+    bool (*write)(struct qemu_xs_handle *h, xs_transaction_t t,
+                  const char *path, const void *data, unsigned int len);
+    bool (*create)(struct qemu_xs_handle *h, xs_transaction_t t,
+                   unsigned int owner, unsigned int domid,
+                   unsigned int perms, const char *path);
+    bool (*destroy)(struct qemu_xs_handle *h, xs_transaction_t t,
+               const char *path);
+    struct qemu_xs_watch *(*watch)(struct qemu_xs_handle *h, const char *path,
+                                   xs_watch_fn fn, void *opaque);
+    void (*unwatch)(struct qemu_xs_handle *h, struct qemu_xs_watch *w);
+    xs_transaction_t (*transaction_start)(struct qemu_xs_handle *h);
+    bool (*transaction_end)(struct qemu_xs_handle *h, xs_transaction_t t,
+                            bool abort);
+};
+
+extern struct xenstore_backend_ops *xen_xenstore_ops;
+
+static inline struct qemu_xs_handle *qemu_xen_xs_open(void)
+{
+    if (!xen_xenstore_ops) {
+        return NULL;
+    }
+    return xen_xenstore_ops->open();
+}
+
+static inline void qemu_xen_xs_close(struct qemu_xs_handle *h)
+{
+    if (!xen_xenstore_ops) {
+        return;
+    }
+    xen_xenstore_ops->close(h);
+}
+
+static inline char *qemu_xen_xs_get_domain_path(struct qemu_xs_handle *h,
+                                                unsigned int domid)
+{
+    if (!xen_xenstore_ops) {
+        return NULL;
+    }
+    return xen_xenstore_ops->get_domain_path(h, domid);
+}
+
+static inline char **qemu_xen_xs_directory(struct qemu_xs_handle *h,
+                                           xs_transaction_t t, const char *path,
+                                           unsigned int *num)
+{
+    if (!xen_xenstore_ops) {
+        return NULL;
+    }
+    return xen_xenstore_ops->directory(h, t, path, num);
+}
+
+static inline void *qemu_xen_xs_read(struct qemu_xs_handle *h,
+                                     xs_transaction_t t, const char *path,
+                                     unsigned int *len)
+{
+    if (!xen_xenstore_ops) {
+        return NULL;
+    }
+    return xen_xenstore_ops->read(h, t, path, len);
+}
+
+static inline bool qemu_xen_xs_write(struct qemu_xs_handle *h,
+                                     xs_transaction_t t, const char *path,
+                                     const void *data, unsigned int len)
+{
+    if (!xen_xenstore_ops) {
+        return false;
+    }
+    return xen_xenstore_ops->write(h, t, path, data, len);
+}
+
+static inline bool qemu_xen_xs_create(struct qemu_xs_handle *h,
+                                      xs_transaction_t t, unsigned int owner,
+                                      unsigned int domid, unsigned int perms,
+                                      const char *path)
+{
+    if (!xen_xenstore_ops) {
+        return false;
+    }
+    return xen_xenstore_ops->create(h, t, owner, domid, perms, path);
+}
+
+static inline bool qemu_xen_xs_destroy(struct qemu_xs_handle *h,
+                                       xs_transaction_t t, const char *path)
+{
+    if (!xen_xenstore_ops) {
+        return false;
+    }
+    return xen_xenstore_ops->destroy(h, t, path);
+}
+
+static inline struct qemu_xs_watch *qemu_xen_xs_watch(struct qemu_xs_handle *h,
+                                                      const char *path,
+                                                      xs_watch_fn fn,
+                                                      void *opaque)
+{
+    if (!xen_xenstore_ops) {
+        return NULL;
+    }
+    return xen_xenstore_ops->watch(h, path, fn, opaque);
+}
+
+static inline void qemu_xen_xs_unwatch(struct qemu_xs_handle *h,
+                                       struct qemu_xs_watch *w)
+{
+    if (!xen_xenstore_ops) {
+        return;
+    }
+    xen_xenstore_ops->unwatch(h, w);
+}
+
+static inline xs_transaction_t qemu_xen_xs_transaction_start(struct qemu_xs_handle *h)
+{
+    if (!xen_xenstore_ops) {
+        return XBT_NULL;
+    }
+    return xen_xenstore_ops->transaction_start(h);
+}
+
+static inline bool qemu_xen_xs_transaction_end(struct qemu_xs_handle *h,
+                                               xs_transaction_t t, bool abort)
+{
+    if (!xen_xenstore_ops) {
+        return false;
+    }
+    return xen_xenstore_ops->transaction_end(h, t, abort);
+}
+
+void setup_xen_backend_ops(void);
+
+#endif /* QEMU_XEN_BACKEND_OPS_H */
diff --git a/include/hw/xen/xen_native.h b/include/hw/xen/xen_native.h
new file mode 100644 (file)
index 0000000..1a5ad69
--- /dev/null
@@ -0,0 +1,550 @@
+#ifndef QEMU_HW_XEN_NATIVE_H
+#define QEMU_HW_XEN_NATIVE_H
+
+#ifdef __XEN_INTERFACE_VERSION__
+#error In Xen native files, include xen_native.h before other Xen headers
+#endif
+
+/*
+ * If we have new enough libxenctrl then we do not want/need these compat
+ * interfaces, despite what the user supplied cflags might say. They
+ * must be undefined before including xenctrl.h
+ */
+#undef XC_WANT_COMPAT_EVTCHN_API
+#undef XC_WANT_COMPAT_GNTTAB_API
+#undef XC_WANT_COMPAT_MAP_FOREIGN_API
+
+#include <xenctrl.h>
+#include <xenstore.h>
+
+#include "hw/xen/xen.h"
+#include "hw/pci/pci_device.h"
+#include "hw/xen/trace.h"
+
+extern xc_interface *xen_xc;
+
+/*
+ * We don't support Xen prior to 4.7.1.
+ */
+
+#include <xenforeignmemory.h>
+
+extern xenforeignmemory_handle *xen_fmem;
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40900
+
+typedef xc_interface xendevicemodel_handle;
+
+#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40900 */
+
+#undef XC_WANT_COMPAT_DEVICEMODEL_API
+#include <xendevicemodel.h>
+
+#endif
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 41100
+
+static inline int xendevicemodel_relocate_memory(
+    xendevicemodel_handle *dmod, domid_t domid, uint32_t size, uint64_t src_gfn,
+    uint64_t dst_gfn)
+{
+    uint32_t i;
+    int rc;
+
+    for (i = 0; i < size; i++) {
+        unsigned long idx = src_gfn + i;
+        xen_pfn_t gpfn = dst_gfn + i;
+
+        rc = xc_domain_add_to_physmap(xen_xc, domid, XENMAPSPACE_gmfn, idx,
+                                      gpfn);
+        if (rc) {
+            return rc;
+        }
+    }
+
+    return 0;
+}
+
+static inline int xendevicemodel_pin_memory_cacheattr(
+    xendevicemodel_handle *dmod, domid_t domid, uint64_t start, uint64_t end,
+    uint32_t type)
+{
+    return xc_domain_pin_memory_cacheattr(xen_xc, domid, start, end, type);
+}
+
+typedef void xenforeignmemory_resource_handle;
+
+#define XENMEM_resource_ioreq_server 0
+
+#define XENMEM_resource_ioreq_server_frame_bufioreq 0
+#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))
+
+static inline xenforeignmemory_resource_handle *xenforeignmemory_map_resource(
+    xenforeignmemory_handle *fmem, domid_t domid, unsigned int type,
+    unsigned int id, unsigned long frame, unsigned long nr_frames,
+    void **paddr, int prot, int flags)
+{
+    errno = EOPNOTSUPP;
+    return NULL;
+}
+
+static inline int xenforeignmemory_unmap_resource(
+    xenforeignmemory_handle *fmem, xenforeignmemory_resource_handle *fres)
+{
+    return 0;
+}
+
+#endif /* CONFIG_XEN_CTRL_INTERFACE_VERSION < 41100 */
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 41000
+
+#define XEN_COMPAT_PHYSMAP
+static inline void *xenforeignmemory_map2(xenforeignmemory_handle *h,
+                                          uint32_t dom, void *addr,
+                                          int prot, int flags, size_t pages,
+                                          const xen_pfn_t arr[/*pages*/],
+                                          int err[/*pages*/])
+{
+    assert(addr == NULL && flags == 0);
+    return xenforeignmemory_map(h, dom, prot, pages, arr, err);
+}
+
+static inline int xentoolcore_restrict_all(domid_t domid)
+{
+    errno = ENOTTY;
+    return -1;
+}
+
+static inline int xendevicemodel_shutdown(xendevicemodel_handle *dmod,
+                                          domid_t domid, unsigned int reason)
+{
+    errno = ENOTTY;
+    return -1;
+}
+
+#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 41000 */
+
+#include <xentoolcore.h>
+
+#endif
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40900
+
+static inline xendevicemodel_handle *xendevicemodel_open(
+    struct xentoollog_logger *logger, unsigned int open_flags)
+{
+    return xen_xc;
+}
+
+static inline int xendevicemodel_create_ioreq_server(
+    xendevicemodel_handle *dmod, domid_t domid, int handle_bufioreq,
+    ioservid_t *id)
+{
+    return xc_hvm_create_ioreq_server(dmod, domid, handle_bufioreq,
+                                      id);
+}
+
+static inline int xendevicemodel_get_ioreq_server_info(
+    xendevicemodel_handle *dmod, domid_t domid, ioservid_t id,
+    xen_pfn_t *ioreq_pfn, xen_pfn_t *bufioreq_pfn,
+    evtchn_port_t *bufioreq_port)
+{
+    return xc_hvm_get_ioreq_server_info(dmod, domid, id, ioreq_pfn,
+                                        bufioreq_pfn, bufioreq_port);
+}
+
+static inline int xendevicemodel_map_io_range_to_ioreq_server(
+    xendevicemodel_handle *dmod, domid_t domid, ioservid_t id, int is_mmio,
+    uint64_t start, uint64_t end)
+{
+    return xc_hvm_map_io_range_to_ioreq_server(dmod, domid, id, is_mmio,
+                                               start, end);
+}
+
+static inline int xendevicemodel_unmap_io_range_from_ioreq_server(
+    xendevicemodel_handle *dmod, domid_t domid, ioservid_t id, int is_mmio,
+    uint64_t start, uint64_t end)
+{
+    return xc_hvm_unmap_io_range_from_ioreq_server(dmod, domid, id, is_mmio,
+                                                   start, end);
+}
+
+static inline int xendevicemodel_map_pcidev_to_ioreq_server(
+    xendevicemodel_handle *dmod, domid_t domid, ioservid_t id,
+    uint16_t segment, uint8_t bus, uint8_t device, uint8_t function)
+{
+    return xc_hvm_map_pcidev_to_ioreq_server(dmod, domid, id, segment,
+                                             bus, device, function);
+}
+
+static inline int xendevicemodel_unmap_pcidev_from_ioreq_server(
+    xendevicemodel_handle *dmod, domid_t domid, ioservid_t id,
+    uint16_t segment, uint8_t bus, uint8_t device, uint8_t function)
+{
+    return xc_hvm_unmap_pcidev_from_ioreq_server(dmod, domid, id, segment,
+                                                 bus, device, function);
+}
+
+static inline int xendevicemodel_destroy_ioreq_server(
+    xendevicemodel_handle *dmod, domid_t domid, ioservid_t id)
+{
+    return xc_hvm_destroy_ioreq_server(dmod, domid, id);
+}
+
+static inline int xendevicemodel_set_ioreq_server_state(
+    xendevicemodel_handle *dmod, domid_t domid, ioservid_t id, int enabled)
+{
+    return xc_hvm_set_ioreq_server_state(dmod, domid, id, enabled);
+}
+
+static inline int xendevicemodel_set_pci_intx_level(
+    xendevicemodel_handle *dmod, domid_t domid, uint16_t segment,
+    uint8_t bus, uint8_t device, uint8_t intx, unsigned int level)
+{
+    return xc_hvm_set_pci_intx_level(dmod, domid, segment, bus, device,
+                                     intx, level);
+}
+
+static inline int xendevicemodel_set_isa_irq_level(
+    xendevicemodel_handle *dmod, domid_t domid, uint8_t irq,
+    unsigned int level)
+{
+    return xc_hvm_set_isa_irq_level(dmod, domid, irq, level);
+}
+
+static inline int xendevicemodel_set_pci_link_route(
+    xendevicemodel_handle *dmod, domid_t domid, uint8_t link, uint8_t irq)
+{
+    return xc_hvm_set_pci_link_route(dmod, domid, link, irq);
+}
+
+static inline int xendevicemodel_inject_msi(
+    xendevicemodel_handle *dmod, domid_t domid, uint64_t msi_addr,
+    uint32_t msi_data)
+{
+    return xc_hvm_inject_msi(dmod, domid, msi_addr, msi_data);
+}
+
+static inline int xendevicemodel_track_dirty_vram(
+    xendevicemodel_handle *dmod, domid_t domid, uint64_t first_pfn,
+    uint32_t nr, unsigned long *dirty_bitmap)
+{
+    return xc_hvm_track_dirty_vram(dmod, domid, first_pfn, nr,
+                                   dirty_bitmap);
+}
+
+static inline int xendevicemodel_modified_memory(
+    xendevicemodel_handle *dmod, domid_t domid, uint64_t first_pfn,
+    uint32_t nr)
+{
+    return xc_hvm_modified_memory(dmod, domid, first_pfn, nr);
+}
+
+static inline int xendevicemodel_set_mem_type(
+    xendevicemodel_handle *dmod, domid_t domid, hvmmem_type_t mem_type,
+    uint64_t first_pfn, uint32_t nr)
+{
+    return xc_hvm_set_mem_type(dmod, domid, mem_type, first_pfn, nr);
+}
+
+#endif
+
+extern xendevicemodel_handle *xen_dmod;
+
+static inline int xen_set_mem_type(domid_t domid, hvmmem_type_t type,
+                                   uint64_t first_pfn, uint32_t nr)
+{
+    return xendevicemodel_set_mem_type(xen_dmod, domid, type, first_pfn,
+                                       nr);
+}
+
+static inline int xen_set_pci_intx_level(domid_t domid, uint16_t segment,
+                                         uint8_t bus, uint8_t device,
+                                         uint8_t intx, unsigned int level)
+{
+    return xendevicemodel_set_pci_intx_level(xen_dmod, domid, segment, bus,
+                                             device, intx, level);
+}
+
+static inline int xen_inject_msi(domid_t domid, uint64_t msi_addr,
+                                 uint32_t msi_data)
+{
+    return xendevicemodel_inject_msi(xen_dmod, domid, msi_addr, msi_data);
+}
+
+static inline int xen_set_isa_irq_level(domid_t domid, uint8_t irq,
+                                        unsigned int level)
+{
+    return xendevicemodel_set_isa_irq_level(xen_dmod, domid, irq, level);
+}
+
+static inline int xen_track_dirty_vram(domid_t domid, uint64_t first_pfn,
+                                       uint32_t nr, unsigned long *bitmap)
+{
+    return xendevicemodel_track_dirty_vram(xen_dmod, domid, first_pfn, nr,
+                                           bitmap);
+}
+
+static inline int xen_modified_memory(domid_t domid, uint64_t first_pfn,
+                                      uint32_t nr)
+{
+    return xendevicemodel_modified_memory(xen_dmod, domid, first_pfn, nr);
+}
+
+static inline int xen_restrict(domid_t domid)
+{
+    int rc;
+    rc = xentoolcore_restrict_all(domid);
+    trace_xen_domid_restrict(rc ? errno : 0);
+    return rc;
+}
+
+void destroy_hvm_domain(bool reboot);
+
+/* shutdown/destroy current domain because of an error */
+void xen_shutdown_fatal_error(const char *fmt, ...) G_GNUC_PRINTF(1, 2);
+
+#ifdef HVM_PARAM_VMPORT_REGS_PFN
+static inline int xen_get_vmport_regs_pfn(xc_interface *xc, domid_t dom,
+                                          xen_pfn_t *vmport_regs_pfn)
+{
+    int rc;
+    uint64_t value;
+    rc = xc_hvm_param_get(xc, dom, HVM_PARAM_VMPORT_REGS_PFN, &value);
+    if (rc >= 0) {
+        *vmport_regs_pfn = (xen_pfn_t) value;
+    }
+    return rc;
+}
+#else
+static inline int xen_get_vmport_regs_pfn(xc_interface *xc, domid_t dom,
+                                          xen_pfn_t *vmport_regs_pfn)
+{
+    return -ENOSYS;
+}
+#endif
+
+static inline int xen_get_default_ioreq_server_info(domid_t dom,
+                                                    xen_pfn_t *ioreq_pfn,
+                                                    xen_pfn_t *bufioreq_pfn,
+                                                    evtchn_port_t
+                                                        *bufioreq_evtchn)
+{
+    unsigned long param;
+    int rc;
+
+    rc = xc_get_hvm_param(xen_xc, dom, HVM_PARAM_IOREQ_PFN, &param);
+    if (rc < 0) {
+        fprintf(stderr, "failed to get HVM_PARAM_IOREQ_PFN\n");
+        return -1;
+    }
+
+    *ioreq_pfn = param;
+
+    rc = xc_get_hvm_param(xen_xc, dom, HVM_PARAM_BUFIOREQ_PFN, &param);
+    if (rc < 0) {
+        fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_PFN\n");
+        return -1;
+    }
+
+    *bufioreq_pfn = param;
+
+    rc = xc_get_hvm_param(xen_xc, dom, HVM_PARAM_BUFIOREQ_EVTCHN,
+                          &param);
+    if (rc < 0) {
+        fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n");
+        return -1;
+    }
+
+    *bufioreq_evtchn = param;
+
+    return 0;
+}
+
+static bool use_default_ioreq_server;
+
+static inline void xen_map_memory_section(domid_t dom,
+                                          ioservid_t ioservid,
+                                          MemoryRegionSection *section)
+{
+    hwaddr start_addr = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    hwaddr end_addr = start_addr + size - 1;
+
+    if (use_default_ioreq_server) {
+        return;
+    }
+
+    trace_xen_map_mmio_range(ioservid, start_addr, end_addr);
+    xendevicemodel_map_io_range_to_ioreq_server(xen_dmod, dom, ioservid, 1,
+                                                start_addr, end_addr);
+}
+
+static inline void xen_unmap_memory_section(domid_t dom,
+                                            ioservid_t ioservid,
+                                            MemoryRegionSection *section)
+{
+    hwaddr start_addr = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    hwaddr end_addr = start_addr + size - 1;
+
+    if (use_default_ioreq_server) {
+        return;
+    }
+
+    trace_xen_unmap_mmio_range(ioservid, start_addr, end_addr);
+    xendevicemodel_unmap_io_range_from_ioreq_server(xen_dmod, dom, ioservid,
+                                                    1, start_addr, end_addr);
+}
+
+static inline void xen_map_io_section(domid_t dom,
+                                      ioservid_t ioservid,
+                                      MemoryRegionSection *section)
+{
+    hwaddr start_addr = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    hwaddr end_addr = start_addr + size - 1;
+
+    if (use_default_ioreq_server) {
+        return;
+    }
+
+    trace_xen_map_portio_range(ioservid, start_addr, end_addr);
+    xendevicemodel_map_io_range_to_ioreq_server(xen_dmod, dom, ioservid, 0,
+                                                start_addr, end_addr);
+}
+
+static inline void xen_unmap_io_section(domid_t dom,
+                                        ioservid_t ioservid,
+                                        MemoryRegionSection *section)
+{
+    hwaddr start_addr = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    hwaddr end_addr = start_addr + size - 1;
+
+    if (use_default_ioreq_server) {
+        return;
+    }
+
+    trace_xen_unmap_portio_range(ioservid, start_addr, end_addr);
+    xendevicemodel_unmap_io_range_from_ioreq_server(xen_dmod, dom, ioservid,
+                                                    0, start_addr, end_addr);
+}
+
+static inline void xen_map_pcidev(domid_t dom,
+                                  ioservid_t ioservid,
+                                  PCIDevice *pci_dev)
+{
+    if (use_default_ioreq_server) {
+        return;
+    }
+
+    trace_xen_map_pcidev(ioservid, pci_dev_bus_num(pci_dev),
+                         PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn));
+    xendevicemodel_map_pcidev_to_ioreq_server(xen_dmod, dom, ioservid, 0,
+                                              pci_dev_bus_num(pci_dev),
+                                              PCI_SLOT(pci_dev->devfn),
+                                              PCI_FUNC(pci_dev->devfn));
+}
+
+static inline void xen_unmap_pcidev(domid_t dom,
+                                    ioservid_t ioservid,
+                                    PCIDevice *pci_dev)
+{
+    if (use_default_ioreq_server) {
+        return;
+    }
+
+    trace_xen_unmap_pcidev(ioservid, pci_dev_bus_num(pci_dev),
+                           PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn));
+    xendevicemodel_unmap_pcidev_from_ioreq_server(xen_dmod, dom, ioservid, 0,
+                                                  pci_dev_bus_num(pci_dev),
+                                                  PCI_SLOT(pci_dev->devfn),
+                                                  PCI_FUNC(pci_dev->devfn));
+}
+
+static inline int xen_create_ioreq_server(domid_t dom,
+                                          ioservid_t *ioservid)
+{
+    int rc = xendevicemodel_create_ioreq_server(xen_dmod, dom,
+                                                HVM_IOREQSRV_BUFIOREQ_ATOMIC,
+                                                ioservid);
+
+    if (rc == 0) {
+        trace_xen_ioreq_server_create(*ioservid);
+        return rc;
+    }
+
+    *ioservid = 0;
+    use_default_ioreq_server = true;
+    trace_xen_default_ioreq_server();
+
+    return rc;
+}
+
+static inline void xen_destroy_ioreq_server(domid_t dom,
+                                            ioservid_t ioservid)
+{
+    if (use_default_ioreq_server) {
+        return;
+    }
+
+    trace_xen_ioreq_server_destroy(ioservid);
+    xendevicemodel_destroy_ioreq_server(xen_dmod, dom, ioservid);
+}
+
+static inline int xen_get_ioreq_server_info(domid_t dom,
+                                            ioservid_t ioservid,
+                                            xen_pfn_t *ioreq_pfn,
+                                            xen_pfn_t *bufioreq_pfn,
+                                            evtchn_port_t *bufioreq_evtchn)
+{
+    if (use_default_ioreq_server) {
+        return xen_get_default_ioreq_server_info(dom, ioreq_pfn,
+                                                 bufioreq_pfn,
+                                                 bufioreq_evtchn);
+    }
+
+    return xendevicemodel_get_ioreq_server_info(xen_dmod, dom, ioservid,
+                                                ioreq_pfn, bufioreq_pfn,
+                                                bufioreq_evtchn);
+}
+
+static inline int xen_set_ioreq_server_state(domid_t dom,
+                                             ioservid_t ioservid,
+                                             bool enable)
+{
+    if (use_default_ioreq_server) {
+        return 0;
+    }
+
+    trace_xen_ioreq_server_state(ioservid, enable);
+    return xendevicemodel_set_ioreq_server_state(xen_dmod, dom, ioservid,
+                                                 enable);
+}
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 41500
+static inline int xendevicemodel_set_irq_level(xendevicemodel_handle *dmod,
+                                               domid_t domid, uint32_t irq,
+                                               unsigned int level)
+{
+    return -1;
+}
+#endif
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 41700
+#define GUEST_VIRTIO_MMIO_BASE   xen_mk_ullong(0x02000000)
+#define GUEST_VIRTIO_MMIO_SIZE   xen_mk_ullong(0x00100000)
+#define GUEST_VIRTIO_MMIO_SPI_FIRST   33
+#define GUEST_VIRTIO_MMIO_SPI_LAST    43
+#endif
+
+#if defined(__i386__) || defined(__x86_64__)
+#define GUEST_RAM_BANKS   2
+#define GUEST_RAM0_BASE   0x40000000ULL /* 3GB of low RAM @ 1GB */
+#define GUEST_RAM0_SIZE   0xc0000000ULL
+#define GUEST_RAM1_BASE   0x0200000000ULL /* 1016GB of RAM @ 8GB */
+#define GUEST_RAM1_SIZE   0xfe00000000ULL
+#endif
+
+#endif /* QEMU_HW_XEN_NATIVE_H */
diff --git a/include/hw/xen/xen_pvdev.h b/include/hw/xen/xen_pvdev.h
new file mode 100644 (file)
index 0000000..ddad4b9
--- /dev/null
@@ -0,0 +1,83 @@
+#ifndef QEMU_HW_XEN_PVDEV_H
+#define QEMU_HW_XEN_PVDEV_H
+
+#include "hw/qdev-core.h"
+#include "hw/xen/xen_backend_ops.h"
+
+/* ------------------------------------------------------------- */
+
+#define XEN_BUFSIZE 1024
+
+struct XenLegacyDevice;
+
+/* driver uses grant tables  ->  open gntdev device (xendev->gnttabdev) */
+#define DEVOPS_FLAG_NEED_GNTDEV   1
+/* don't expect frontend doing correct state transitions (aka console quirk) */
+#define DEVOPS_FLAG_IGNORE_STATE  2
+
+struct XenDevOps {
+    size_t    size;
+    uint32_t  flags;
+    void      (*alloc)(struct XenLegacyDevice *xendev);
+    int       (*init)(struct XenLegacyDevice *xendev);
+    int       (*initialise)(struct XenLegacyDevice *xendev);
+    void      (*connected)(struct XenLegacyDevice *xendev);
+    void      (*event)(struct XenLegacyDevice *xendev);
+    void      (*disconnect)(struct XenLegacyDevice *xendev);
+    int       (*free)(struct XenLegacyDevice *xendev);
+    void      (*backend_changed)(struct XenLegacyDevice *xendev,
+                                 const char *node);
+    void      (*frontend_changed)(struct XenLegacyDevice *xendev,
+                                  const char *node);
+    int       (*backend_register)(void);
+};
+
+struct XenLegacyDevice {
+    DeviceState        qdev;
+    const char         *type;
+    int                dom;
+    int                dev;
+    char               name[64];
+    int                debug;
+
+    struct qemu_xs_watch *watch;
+    enum xenbus_state  be_state;
+    enum xenbus_state  fe_state;
+    int                online;
+    char               be[XEN_BUFSIZE];
+    char               *fe;
+    char               *protocol;
+    int                remote_port;
+    int                local_port;
+
+    xenevtchn_handle   *evtchndev;
+    xengnttab_handle   *gnttabdev;
+
+    struct XenDevOps   *ops;
+    QTAILQ_ENTRY(XenLegacyDevice) next;
+};
+
+/* ------------------------------------------------------------- */
+
+/* xenstore helper functions */
+int xenstore_write_str(const char *base, const char *node, const char *val);
+int xenstore_write_int(const char *base, const char *node, int ival);
+int xenstore_write_int64(const char *base, const char *node, int64_t ival);
+char *xenstore_read_str(const char *base, const char *node);
+int xenstore_read_int(const char *base, const char *node, int *ival);
+int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval);
+
+const char *xenbus_strstate(enum xenbus_state state);
+
+void xen_pv_evtchn_event(void *opaque);
+void xen_pv_insert_xendev(struct XenLegacyDevice *xendev);
+void xen_pv_del_xendev(struct XenLegacyDevice *xendev);
+struct XenLegacyDevice *xen_pv_find_xendev(const char *type, int dom, int dev);
+
+void xen_pv_unbind_evtchn(struct XenLegacyDevice *xendev);
+int xen_pv_send_notify(struct XenLegacyDevice *xendev);
+
+void xen_pv_printf(struct XenLegacyDevice *xendev, int msg_level,
+                   const char *fmt, ...)  G_GNUC_PRINTF(3, 4);
+
+#endif /* QEMU_HW_XEN_PVDEV_H */
diff --git a/include/hw/xtensa/mx_pic.h b/include/hw/xtensa/mx_pic.h
new file mode 100644 (file)
index 0000000..500424c
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2013 - 2019, Max Filippov, Open Source and Linux Lab.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Open Source and Linux Lab nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef XTENSA_MX_PIC_H
+#define XTENSA_MX_PIC_H
+
+#include "exec/memory.h"
+
+struct XtensaMxPic;
+typedef struct XtensaMxPic XtensaMxPic;
+
+XtensaMxPic *xtensa_mx_pic_init(unsigned n_extint);
+void xtensa_mx_pic_reset(void *opaque);
+MemoryRegion *xtensa_mx_pic_register_cpu(XtensaMxPic *mx,
+                                         qemu_irq *irq,
+                                         qemu_irq runstall);
+qemu_irq *xtensa_mx_pic_get_extints(XtensaMxPic *mx);
+
+#endif
diff --git a/include/hw/xtensa/xtensa-isa.h b/include/hw/xtensa/xtensa-isa.h
new file mode 100644 (file)
index 0000000..a289531
--- /dev/null
@@ -0,0 +1,836 @@
+/* Interface definition for configurable Xtensa ISA support.
+ *
+ * Copyright (c) 2001-2013 Tensilica Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HW_XTENSA_XTENSA_ISA_H
+#define HW_XTENSA_XTENSA_ISA_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Version number: This is intended to help support code that works with
+ * versions of this library from multiple Xtensa releases.
+ */
+
+#define XTENSA_ISA_VERSION 7000
+
+/*
+ * This file defines the interface to the Xtensa ISA library. This
+ * library contains most of the ISA-specific information for a
+ * particular Xtensa processor. For example, the set of valid
+ * instructions, their opcode encodings and operand fields are all
+ * included here.
+ *
+ * This interface basically defines a number of abstract data types.
+ *
+ * . an instruction buffer - for holding the raw instruction bits
+ * . ISA info - information about the ISA as a whole
+ * . instruction formats - instruction size and slot structure
+ * . opcodes - information about individual instructions
+ * . operands - information about register and immediate instruction operands
+ * . stateOperands - information about processor state instruction operands
+ * . interfaceOperands - information about interface instruction operands
+ * . register files - register file information
+ * . processor states - internal processor state information
+ * . system registers - "special registers" and "user registers"
+ * . interfaces - TIE interfaces that are external to the processor
+ * . functional units - TIE shared functions
+ *
+ * The interface defines a set of functions to access each data type.
+ * With the exception of the instruction buffer, the internal
+ * representations of the data structures are hidden. All accesses must
+ * be made through the functions defined here.
+ */
+
+typedef struct xtensa_isa_opaque { int unused; } *xtensa_isa;
+
+
+/*
+ * Most of the Xtensa ISA entities (e.g., opcodes, regfiles, etc.) are
+ * represented here using sequential integers beginning with 0. The
+ * specific values are only fixed for a particular instantiation of an
+ * xtensa_isa structure, so these values should only be used
+ * internally.
+ */
+
+typedef int xtensa_opcode;
+typedef int xtensa_format;
+typedef int xtensa_regfile;
+typedef int xtensa_state;
+typedef int xtensa_sysreg;
+typedef int xtensa_interface;
+typedef int xtensa_funcUnit;
+
+
+/* Define a unique value for undefined items. */
+
+#define XTENSA_UNDEFINED -1
+
+
+/*
+ * Overview of using this interface to decode/encode instructions:
+ *
+ * Each Xtensa instruction is associated with a particular instruction
+ * format, where the format defines a fixed number of slots for
+ * operations. The formats for the core Xtensa ISA have only one slot,
+ * but FLIX instructions may have multiple slots. Within each slot,
+ * there is a single opcode and some number of associated operands.
+ *
+ * The encoding and decoding functions operate on instruction buffers,
+ * not on the raw bytes of the instructions. The same instruction
+ * buffer data structure is used for both entire instructions and
+ * individual slots in those instructions -- the contents of a slot need
+ * to be extracted from or inserted into the buffer for the instruction
+ * as a whole.
+ *
+ * Decoding an instruction involves first finding the format, which
+ * identifies the number of slots, and then decoding each slot
+ * separately. A slot is decoded by finding the opcode and then using
+ * the opcode to determine how many operands there are. For example:
+ *
+ * xtensa_insnbuf_from_chars
+ * xtensa_format_decode
+ * for each slot {
+ *   xtensa_format_get_slot
+ *   xtensa_opcode_decode
+ *   for each operand {
+ *     xtensa_operand_get_field
+ *     xtensa_operand_decode
+ *   }
+ * }
+ *
+ * Encoding an instruction is roughly the same procedure in reverse:
+ *
+ * xtensa_format_encode
+ * for each slot {
+ *   xtensa_opcode_encode
+ *   for each operand {
+ *     xtensa_operand_encode
+ *     xtensa_operand_set_field
+ *   }
+ *   xtensa_format_set_slot
+ * }
+ * xtensa_insnbuf_to_chars
+ */
+
+
+/* Error handling. */
+
+/*
+ * Error codes. The code for the most recent error condition can be
+ * retrieved with the "errno" function. For any result other than
+ * xtensa_isa_ok, an error message containing additional information
+ * about the problem can be retrieved using the "error_msg" function.
+ * The error messages are stored in an internal buffer, which should
+ * not be freed and may be overwritten by subsequent operations.
+ */
+
+typedef enum xtensa_isa_status_enum {
+    xtensa_isa_ok = 0,
+    xtensa_isa_bad_format,
+    xtensa_isa_bad_slot,
+    xtensa_isa_bad_opcode,
+    xtensa_isa_bad_operand,
+    xtensa_isa_bad_field,
+    xtensa_isa_bad_iclass,
+    xtensa_isa_bad_regfile,
+    xtensa_isa_bad_sysreg,
+    xtensa_isa_bad_state,
+    xtensa_isa_bad_interface,
+    xtensa_isa_bad_funcUnit,
+    xtensa_isa_wrong_slot,
+    xtensa_isa_no_field,
+    xtensa_isa_out_of_memory,
+    xtensa_isa_buffer_overflow,
+    xtensa_isa_internal_error,
+    xtensa_isa_bad_value
+} xtensa_isa_status;
+
+xtensa_isa_status xtensa_isa_errno(xtensa_isa isa);
+
+char *xtensa_isa_error_msg(xtensa_isa isa);
+
+
+
+/* Instruction buffers. */
+
+typedef uint32_t xtensa_insnbuf_word;
+typedef xtensa_insnbuf_word *xtensa_insnbuf;
+
+
+/* Get the size in "insnbuf_words" of the xtensa_insnbuf array. */
+
+int xtensa_insnbuf_size(xtensa_isa isa);
+
+
+/* Allocate an xtensa_insnbuf of the right size. */
+
+xtensa_insnbuf xtensa_insnbuf_alloc(xtensa_isa isa);
+
+
+/* Release an xtensa_insnbuf. */
+
+void xtensa_insnbuf_free(xtensa_isa isa, xtensa_insnbuf buf);
+
+
+/*
+ * Conversion between raw memory (char arrays) and our internal
+ * instruction representation. This is complicated by the Xtensa ISA's
+ * variable instruction lengths. When converting to chars, the buffer
+ * must contain a valid instruction so we know how many bytes to copy;
+ * thus, the "to_chars" function returns the number of bytes copied or
+ * XTENSA_UNDEFINED on error. The "from_chars" function first reads the
+ * minimal number of bytes required to decode the instruction length and
+ * then proceeds to copy the entire instruction into the buffer; if the
+ * memory does not contain a valid instruction, it copies the maximum
+ * number of bytes required for the longest Xtensa instruction. The
+ * "num_chars" argument may be used to limit the number of bytes that
+ * can be read or written. Otherwise, if "num_chars" is zero, the
+ * functions may read or write past the end of the code.
+ */
+
+int xtensa_insnbuf_to_chars(xtensa_isa isa, const xtensa_insnbuf insn,
+                            unsigned char *cp, int num_chars);
+
+void xtensa_insnbuf_from_chars(xtensa_isa isa, xtensa_insnbuf insn,
+                               const unsigned char *cp, int num_chars);
+
+
+
+/* ISA information. */
+
+/* Initialize the ISA information. */
+
+xtensa_isa xtensa_isa_init(void *xtensa_modules, xtensa_isa_status *errno_p,
+                           char **error_msg_p);
+
+
+/* Deallocate an xtensa_isa structure. */
+
+void xtensa_isa_free(xtensa_isa isa);
+
+
+/* Get the maximum instruction size in bytes. */
+
+int xtensa_isa_maxlength(xtensa_isa isa);
+
+
+/*
+ * Decode the length in bytes of an instruction in raw memory (not an
+ * insnbuf). This function reads only the minimal number of bytes
+ * required to decode the instruction length. Returns
+ * XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_isa_length_from_chars(xtensa_isa isa, const unsigned char *cp);
+
+
+/*
+ * Get the number of stages in the processor's pipeline. The pipeline
+ * stage values returned by other functions in this library will range
+ * from 0 to N-1, where N is the value returned by this function.
+ * Note that the stage numbers used here may not correspond to the
+ * actual processor hardware, e.g., the hardware may have additional
+ * stages before stage 0. Returns XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_isa_num_pipe_stages(xtensa_isa isa);
+
+
+/* Get the number of various entities that are defined for this processor. */
+
+int xtensa_isa_num_formats(xtensa_isa isa);
+
+int xtensa_isa_num_opcodes(xtensa_isa isa);
+
+int xtensa_isa_num_regfiles(xtensa_isa isa);
+
+int xtensa_isa_num_states(xtensa_isa isa);
+
+int xtensa_isa_num_sysregs(xtensa_isa isa);
+
+int xtensa_isa_num_interfaces(xtensa_isa isa);
+
+int xtensa_isa_num_funcUnits(xtensa_isa isa);
+
+
+
+/* Instruction formats. */
+
+/* Get the name of a format. Returns null on error. */
+
+const char *xtensa_format_name(xtensa_isa isa, xtensa_format fmt);
+
+
+/*
+ * Given a format name, return the format number. Returns
+ * XTENSA_UNDEFINED if the name is not a valid format.
+ */
+
+xtensa_format xtensa_format_lookup(xtensa_isa isa, const char *fmtname);
+
+
+/*
+ * Decode the instruction format from a binary instruction buffer.
+ * Returns XTENSA_UNDEFINED if the format is not recognized.
+ */
+
+xtensa_format xtensa_format_decode(xtensa_isa isa, const xtensa_insnbuf insn);
+
+
+/*
+ * Set the instruction format field(s) in a binary instruction buffer.
+ * All the other fields are set to zero. Returns non-zero on error.
+ */
+
+int xtensa_format_encode(xtensa_isa isa, xtensa_format fmt,
+                         xtensa_insnbuf insn);
+
+
+/*
+ * Find the length (in bytes) of an instruction. Returns
+ * XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_format_length(xtensa_isa isa, xtensa_format fmt);
+
+
+/*
+ * Get the number of slots in an instruction. Returns XTENSA_UNDEFINED
+ * on error.
+ */
+
+int xtensa_format_num_slots(xtensa_isa isa, xtensa_format fmt);
+
+
+/*
+ * Get the opcode for a no-op in a particular slot.
+ * Returns XTENSA_UNDEFINED on error.
+ */
+
+xtensa_opcode xtensa_format_slot_nop_opcode(xtensa_isa isa, xtensa_format fmt,
+                                            int slot);
+
+
+/*
+ * Get the bits for a specified slot out of an insnbuf for the
+ * instruction as a whole and put them into an insnbuf for that one
+ * slot, and do the opposite to set a slot. Return non-zero on error.
+ */
+
+int xtensa_format_get_slot(xtensa_isa isa, xtensa_format fmt, int slot,
+                           const xtensa_insnbuf insn, xtensa_insnbuf slotbuf);
+
+int xtensa_format_set_slot(xtensa_isa isa, xtensa_format fmt, int slot,
+                           xtensa_insnbuf insn, const xtensa_insnbuf slotbuf);
+
+
+
+/* Opcode information. */
+
+/*
+ * Translate a mnemonic name to an opcode. Returns XTENSA_UNDEFINED if
+ * the name is not a valid opcode mnemonic.
+ */
+
+xtensa_opcode xtensa_opcode_lookup(xtensa_isa isa, const char *opname);
+
+
+/*
+ * Decode the opcode for one instruction slot from a binary instruction
+ * buffer. Returns the opcode or XTENSA_UNDEFINED if the opcode is
+ * illegal.
+ */
+
+xtensa_opcode xtensa_opcode_decode(xtensa_isa isa, xtensa_format fmt, int slot,
+                                   const xtensa_insnbuf slotbuf);
+
+
+/*
+ * Set the opcode field(s) for an instruction slot. All other fields
+ * in the slot are set to zero. Returns non-zero if the opcode cannot
+ * be encoded.
+ */
+
+int xtensa_opcode_encode(xtensa_isa isa, xtensa_format fmt, int slot,
+                         xtensa_insnbuf slotbuf, xtensa_opcode opc);
+
+
+/* Get the mnemonic name for an opcode. Returns null on error. */
+
+const char *xtensa_opcode_name(xtensa_isa isa, xtensa_opcode opc);
+
+
+/* Check various properties of opcodes. These functions return 0 if
+ * the condition is false, 1 if the condition is true, and
+ * XTENSA_UNDEFINED on error. The instructions are classified as
+ * follows:
+ *
+ * branch: conditional branch; may fall through to next instruction (B*)
+ * jump: unconditional branch (J, JX, RET*, RF*)
+ * loop: zero-overhead loop (LOOP*)
+ * call: unconditional call; control returns to next instruction (CALL*)
+ *
+ * For the opcodes that affect control flow in some way, the branch
+ * target may be specified by an immediate operand or it may be an
+ * address stored in a register. You can distinguish these by
+ * checking if the instruction has a PC-relative immediate
+ * operand.
+ */
+
+int xtensa_opcode_is_branch(xtensa_isa isa, xtensa_opcode opc);
+
+int xtensa_opcode_is_jump(xtensa_isa isa, xtensa_opcode opc);
+
+int xtensa_opcode_is_loop(xtensa_isa isa, xtensa_opcode opc);
+
+int xtensa_opcode_is_call(xtensa_isa isa, xtensa_opcode opc);
+
+
+/*
+ * Find the number of ordinary operands, state operands, and interface
+ * operands for an instruction. These return XTENSA_UNDEFINED on
+ * error.
+ */
+
+int xtensa_opcode_num_operands(xtensa_isa isa, xtensa_opcode opc);
+
+int xtensa_opcode_num_stateOperands(xtensa_isa isa, xtensa_opcode opc);
+
+int xtensa_opcode_num_interfaceOperands(xtensa_isa isa, xtensa_opcode opc);
+
+
+/*
+ * Get functional unit usage requirements for an opcode. Each "use"
+ * is identified by a <functional unit, pipeline stage> pair. The
+ * "num_funcUnit_uses" function returns the number of these "uses" or
+ * XTENSA_UNDEFINED on error. The "funcUnit_use" function returns
+ * a pointer to a "use" pair or null on error.
+ */
+
+typedef struct xtensa_funcUnit_use_struct {
+    xtensa_funcUnit unit;
+    int stage;
+} xtensa_funcUnit_use;
+
+int xtensa_opcode_num_funcUnit_uses(xtensa_isa isa, xtensa_opcode opc);
+
+xtensa_funcUnit_use *xtensa_opcode_funcUnit_use(xtensa_isa isa,
+                                                xtensa_opcode opc, int u);
+
+
+
+/* Operand information. */
+
+/* Get the name of an operand. Returns null on error. */
+
+const char *xtensa_operand_name(xtensa_isa isa, xtensa_opcode opc, int opnd);
+
+
+/*
+ * Some operands are "invisible", i.e., not explicitly specified in
+ * assembly language. When assembling an instruction, you need not set
+ * the values of invisible operands, since they are either hardwired or
+ * derived from other field values. The values of invisible operands
+ * can be examined in the same way as other operands, but remember that
+ * an invisible operand may get its value from another visible one, so
+ * the entire instruction must be available before examining the
+ * invisible operand values. This function returns 1 if an operand is
+ * visible, 0 if it is invisible, or XTENSA_UNDEFINED on error. Note
+ * that whether an operand is visible is orthogonal to whether it is
+ * "implicit", i.e., whether it is encoded in a field in the
+ * instruction.
+ */
+
+int xtensa_operand_is_visible(xtensa_isa isa, xtensa_opcode opc, int opnd);
+
+
+/*
+ * Check if an operand is an input ('i'), output ('o'), or inout ('m')
+ * operand. Note: The output operand of a conditional assignment
+ * (e.g., movnez) appears here as an inout ('m') even if it is declared
+ * in the TIE code as an output ('o'); this allows the compiler to
+ * properly handle register allocation for conditional assignments.
+ * Returns 0 on error.
+ */
+
+char xtensa_operand_inout(xtensa_isa isa, xtensa_opcode opc, int opnd);
+
+
+/*
+ * Get and set the raw (encoded) value of the field for the specified
+ * operand. The "set" function does not check if the value fits in the
+ * field; that is done by the "encode" function below. Both of these
+ * functions return non-zero on error, e.g., if the field is not defined
+ * for the specified slot.
+ */
+
+int xtensa_operand_get_field(xtensa_isa isa, xtensa_opcode opc, int opnd,
+                             xtensa_format fmt, int slot,
+                             const xtensa_insnbuf slotbuf, uint32_t *valp);
+
+int xtensa_operand_set_field(xtensa_isa isa, xtensa_opcode opc, int opnd,
+                             xtensa_format fmt, int slot,
+                             xtensa_insnbuf slotbuf, uint32_t val);
+
+
+/*
+ * Encode and decode operands. The raw bits in the operand field may
+ * be encoded in a variety of different ways. These functions hide
+ * the details of that encoding. The result values are returned through
+ * the argument pointer. The return value is non-zero on error.
+ */
+
+int xtensa_operand_encode(xtensa_isa isa, xtensa_opcode opc, int opnd,
+                          uint32_t *valp);
+
+int xtensa_operand_decode(xtensa_isa isa, xtensa_opcode opc, int opnd,
+                          uint32_t *valp);
+
+
+/*
+ * An operand may be either a register operand or an immediate of some
+ * sort (e.g., PC-relative or not). The "is_register" function returns
+ * 0 if the operand is an immediate, 1 if it is a register, and
+ * XTENSA_UNDEFINED on error. The "regfile" function returns the
+ * regfile for a register operand, or XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_operand_is_register(xtensa_isa isa, xtensa_opcode opc, int opnd);
+
+xtensa_regfile xtensa_operand_regfile(xtensa_isa isa, xtensa_opcode opc,
+                                      int opnd);
+
+
+/*
+ * Register operands may span multiple consecutive registers, e.g., a
+ * 64-bit data type may occupy two 32-bit registers. Only the first
+ * register is encoded in the operand field. This function specifies
+ * the number of consecutive registers occupied by this operand. For
+ * non-register operands, the return value is undefined. Returns
+ * XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_operand_num_regs(xtensa_isa isa, xtensa_opcode opc, int opnd);
+
+
+/*
+ * Some register operands do not completely identify the register being
+ * accessed. For example, the operand value may be added to an internal
+ * state value. By definition, this implies that the corresponding
+ * regfile is not allocatable. Unknown registers should generally be
+ * treated with worst-case assumptions. The function returns 0 if the
+ * register value is unknown, 1 if known, and XTENSA_UNDEFINED on
+ * error.
+ */
+
+int xtensa_operand_is_known_reg(xtensa_isa isa, xtensa_opcode opc, int opnd);
+
+
+/*
+ * Check if an immediate operand is PC-relative. Returns 0 for register
+ * operands and non-PC-relative immediates, 1 for PC-relative
+ * immediates, and XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_operand_is_PCrelative(xtensa_isa isa, xtensa_opcode opc, int opnd);
+
+
+/*
+ * For PC-relative offset operands, the interpretation of the offset may
+ * vary between opcodes, e.g., is it relative to the current PC or that
+ * of the next instruction?  The following functions are defined to
+ * perform PC-relative relocations and to undo them (as in the
+ * disassembler). The "do_reloc" function takes the desired address
+ * value and the PC of the current instruction and sets the value to the
+ * corresponding PC-relative offset (which can then be encoded and
+ * stored into the operand field). The "undo_reloc" function takes the
+ * unencoded offset value and the current PC and sets the value to the
+ * appropriate address. The return values are non-zero on error. Note
+ * that these functions do not replace the encode/decode functions; the
+ * operands must be encoded/decoded separately and the encode functions
+ * are responsible for detecting invalid operand values.
+ */
+
+int xtensa_operand_do_reloc(xtensa_isa isa, xtensa_opcode opc, int opnd,
+                            uint32_t *valp, uint32_t pc);
+
+int xtensa_operand_undo_reloc(xtensa_isa isa, xtensa_opcode opc, int opnd,
+                              uint32_t *valp, uint32_t pc);
+
+
+
+/* State Operands. */
+
+/*
+ * Get the state accessed by a state operand. Returns XTENSA_UNDEFINED
+ * on error.
+ */
+
+xtensa_state xtensa_stateOperand_state(xtensa_isa isa, xtensa_opcode opc,
+                                       int stOp);
+
+
+/*
+ * Check if a state operand is an input ('i'), output ('o'), or inout
+ * ('m') operand. Returns 0 on error.
+ */
+
+char xtensa_stateOperand_inout(xtensa_isa isa, xtensa_opcode opc, int stOp);
+
+
+
+/* Interface Operands. */
+
+/*
+ * Get the external interface accessed by an interface operand.
+ * Returns XTENSA_UNDEFINED on error.
+ */
+
+xtensa_interface xtensa_interfaceOperand_interface(xtensa_isa isa,
+                                                   xtensa_opcode opc,
+                                                   int ifOp);
+
+
+
+/* Register Files. */
+
+/*
+ * Regfiles include both "real" regfiles and "views", where a view
+ * allows a group of adjacent registers in a real "parent" regfile to be
+ * viewed as a single register. A regfile view has all the same
+ * properties as its parent except for its (long) name, bit width, number
+ * of entries, and default ctype. You can use the parent function to
+ * distinguish these two classes.
+ */
+
+/*
+ * Look up a regfile by either its name or its abbreviated "short name".
+ * Returns XTENSA_UNDEFINED on error. The "lookup_shortname" function
+ * ignores "view" regfiles since they always have the same shortname as
+ * their parents.
+ */
+
+xtensa_regfile xtensa_regfile_lookup(xtensa_isa isa, const char *name);
+
+xtensa_regfile xtensa_regfile_lookup_shortname(xtensa_isa isa,
+                                               const char *shortname);
+
+
+/*
+ * Get the name or abbreviated "short name" of a regfile.
+ * Returns null on error.
+ */
+
+const char *xtensa_regfile_name(xtensa_isa isa, xtensa_regfile rf);
+
+const char *xtensa_regfile_shortname(xtensa_isa isa, xtensa_regfile rf);
+
+
+/*
+ * Get the parent regfile of a "view" regfile. If the regfile is not a
+ * view, the result is the same as the input parameter. Returns
+ * XTENSA_UNDEFINED on error.
+ */
+
+xtensa_regfile xtensa_regfile_view_parent(xtensa_isa isa, xtensa_regfile rf);
+
+
+/*
+ * Get the bit width of a regfile or regfile view.
+ * Returns XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_regfile_num_bits(xtensa_isa isa, xtensa_regfile rf);
+
+
+/*
+ * Get the number of regfile entries. Returns XTENSA_UNDEFINED on
+ * error.
+ */
+
+int xtensa_regfile_num_entries(xtensa_isa isa, xtensa_regfile rf);
+
+
+
+/* Processor States. */
+
+/* Look up a state by name. Returns XTENSA_UNDEFINED on error. */
+
+xtensa_state xtensa_state_lookup(xtensa_isa isa, const char *name);
+
+
+/* Get the name for a processor state. Returns null on error. */
+
+const char *xtensa_state_name(xtensa_isa isa, xtensa_state st);
+
+
+/*
+ * Get the bit width for a processor state.
+ * Returns XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_state_num_bits(xtensa_isa isa, xtensa_state st);
+
+
+/*
+ * Check if a state is exported from the processor core. Returns 0 if
+ * the condition is false, 1 if the condition is true, and
+ * XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_state_is_exported(xtensa_isa isa, xtensa_state st);
+
+
+/*
+ * Check for a "shared_or" state. Returns 0 if the condition is false,
+ * 1 if the condition is true, and XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_state_is_shared_or(xtensa_isa isa, xtensa_state st);
+
+
+
+/* Sysregs ("special registers" and "user registers"). */
+
+/*
+ * Look up a register by its number and whether it is a "user register"
+ * or a "special register". Returns XTENSA_UNDEFINED if the sysreg does
+ * not exist.
+ */
+
+xtensa_sysreg xtensa_sysreg_lookup(xtensa_isa isa, int num, int is_user);
+
+
+/*
+ * Check if there exists a sysreg with a given name.
+ * If not, this function returns XTENSA_UNDEFINED.
+ */
+
+xtensa_sysreg xtensa_sysreg_lookup_name(xtensa_isa isa, const char *name);
+
+
+/* Get the name of a sysreg. Returns null on error. */
+
+const char *xtensa_sysreg_name(xtensa_isa isa, xtensa_sysreg sysreg);
+
+
+/* Get the register number. Returns XTENSA_UNDEFINED on error. */
+
+int xtensa_sysreg_number(xtensa_isa isa, xtensa_sysreg sysreg);
+
+
+/*
+ * Check if a sysreg is a "special register" or a "user register".
+ * Returns 0 for special registers, 1 for user registers and
+ * XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_sysreg_is_user(xtensa_isa isa, xtensa_sysreg sysreg);
+
+
+
+/* Interfaces. */
+
+/*
+ * Find an interface by name. The return value is XTENSA_UNDEFINED if
+ * the specified interface is not found.
+ */
+
+xtensa_interface xtensa_interface_lookup(xtensa_isa isa, const char *ifname);
+
+
+/* Get the name of an interface. Returns null on error. */
+
+const char *xtensa_interface_name(xtensa_isa isa, xtensa_interface intf);
+
+
+/*
+ * Get the bit width for an interface.
+ * Returns XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_interface_num_bits(xtensa_isa isa, xtensa_interface intf);
+
+
+/*
+ * Check if an interface is an input ('i') or output ('o') with respect
+ * to the Xtensa processor core. Returns 0 on error.
+ */
+
+char xtensa_interface_inout(xtensa_isa isa, xtensa_interface intf);
+
+
+/*
+ * Check if accessing an interface has potential side effects.
+ * Currently "data" interfaces have side effects and "control"
+ * interfaces do not. Returns 1 if there are side effects, 0 if not,
+ * and XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_interface_has_side_effect(xtensa_isa isa, xtensa_interface intf);
+
+
+/*
+ * Some interfaces may be related such that accessing one interface
+ * has side effects on a set of related interfaces. The interfaces
+ * are partitioned into equivalence classes of related interfaces, and
+ * each class is assigned a unique identifier number. This function
+ * returns the class identifier for an interface, or XTENSA_UNDEFINED
+ * on error. These identifiers can be compared to determine if two
+ * interfaces are related; the specific values of the identifiers have
+ * no particular meaning otherwise.
+ */
+
+int xtensa_interface_class_id(xtensa_isa isa, xtensa_interface intf);
+
+
+/* Functional Units. */
+
+/*
+ * Find a functional unit by name. The return value is XTENSA_UNDEFINED if
+ * the specified unit is not found.
+ */
+
+xtensa_funcUnit xtensa_funcUnit_lookup(xtensa_isa isa, const char *fname);
+
+
+/* Get the name of a functional unit. Returns null on error. */
+
+const char *xtensa_funcUnit_name(xtensa_isa isa, xtensa_funcUnit fun);
+
+
+/*
+ * Functional units may be replicated. See how many instances of a
+ * particular function unit exist. Returns XTENSA_UNDEFINED on error.
+ */
+
+int xtensa_funcUnit_num_copies(xtensa_isa isa, xtensa_funcUnit fun);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* HW_XTENSA_XTENSA_ISA_H */
index 27e42bdadc719d990310e4b1045f9034c3666e8c..98934e6d9eec20d8c2fbeea1c659c75156bed9a3 100644 (file)
@@ -41,35 +41,13 @@ struct QIOChannelCommand {
     QIOChannel parent;
     int writefd;
     int readfd;
-    pid_t pid;
+    GPid pid;
+#ifdef WIN32
+    bool blocking;
+#endif
 };
 
 
-/**
- * qio_channel_command_new_pid:
- * @writefd: the FD connected to the command's stdin
- * @readfd: the FD connected to the command's stdout
- * @pid: the PID of the running child command
- * @errp: pointer to a NULL-initialized error object
- *
- * Create a channel for performing I/O with the
- * previously spawned command identified by @pid.
- * The two file descriptors provide the connection
- * to command's stdio streams, either one or which
- * may be -1 to indicate that stream is not open.
- *
- * The channel will take ownership of the process
- * @pid and will kill it when closing the channel.
- * Similarly it will take responsibility for
- * closing the file descriptors @writefd and @readfd.
- *
- * Returns: the command channel object, or NULL on error
- */
-QIOChannelCommand *
-qio_channel_command_new_pid(int writefd,
-                            int readfd,
-                            pid_t pid);
-
 /**
  * qio_channel_command_new_spawn:
  * @argv: the NULL terminated list of command arguments
diff --git a/include/io/channel-null.h b/include/io/channel-null.h
new file mode 100644 (file)
index 0000000..f6d54e6
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * QEMU I/O channels null driver
+ *
+ * Copyright (c) 2022 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QIO_CHANNEL_FILE_H
+#define QIO_CHANNEL_FILE_H
+
+#include "io/channel.h"
+#include "qom/object.h"
+
+#define TYPE_QIO_CHANNEL_NULL "qio-channel-null"
+OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelNull, QIO_CHANNEL_NULL)
+
+
+/**
+ * QIOChannelNull:
+ *
+ * The QIOChannelNull object provides a channel implementation
+ * that discards all writes and returns EOF for all reads.
+ */
+
+struct QIOChannelNull {
+    QIOChannel parent;
+    bool closed;
+};
+
+
+/**
+ * qio_channel_null_new:
+ *
+ * Create a new IO channel object that discards all writes
+ * and returns EOF for all reads.
+ *
+ * Returns: the new channel object
+ */
+QIOChannelNull *
+qio_channel_null_new(void);
+
+#endif /* QIO_CHANNEL_NULL_H */
index e747e635147ed62015a9a2b5337c5a3a21c41880..ab15577d38487aa6657a286d75637e311419ecef 100644 (file)
@@ -47,6 +47,8 @@ struct QIOChannelSocket {
     socklen_t localAddrLen;
     struct sockaddr_storage remoteAddr;
     socklen_t remoteAddrLen;
+    ssize_t zero_copy_queued;
+    ssize_t zero_copy_sent;
 };
 
 
@@ -122,7 +124,7 @@ void qio_channel_socket_connect_async(QIOChannelSocket *ioc,
  * qio_channel_socket_listen_sync:
  * @ioc: the socket channel object
  * @addr: the address to listen to
- * @num: the expected ammount of connections
+ * @num: the expected amount of connections
  * @errp: pointer to a NULL-initialized error object
  *
  * Attempt to listen to the address @addr. This method
@@ -139,7 +141,7 @@ int qio_channel_socket_listen_sync(QIOChannelSocket *ioc,
  * qio_channel_socket_listen_async:
  * @ioc: the socket channel object
  * @addr: the address to listen to
- * @num: the expected ammount of connections
+ * @num: the expected amount of connections
  * @callback: the function to invoke on completion
  * @opaque: user data to pass to @callback
  * @destroy: the function to free @opaque
index 5672479e9eb60d6117592cc4a66e8329dbdf9e1c..26c67f17e2d364eeed9ce0a156b41aa7c210d1fc 100644 (file)
@@ -48,6 +48,7 @@ struct QIOChannelTLS {
     QIOChannel *master;
     QCryptoTLSSession *session;
     QIOChannelShutdown shutdown;
+    guint hs_ioc_tag;
 };
 
 /**
index a5d720d9a04bf48d13c6e7983bc45f23fe3b8f7b..fa18a3756d8b7678d3661cac6c5838d8ca9e53ac 100644 (file)
 QIOChannel *qio_channel_new_fd(int fd,
                                Error **errp);
 
+/**
+ * qio_channel_util_set_aio_fd_handler:
+ * @read_fd: the file descriptor for the read handler
+ * @read_ctx: the AioContext for the read handler
+ * @io_read: the read handler
+ * @write_fd: the file descriptor for the write handler
+ * @write_ctx: the AioContext for the write handler
+ * @io_write: the write handler
+ * @opaque: the opaque argument to the read and write handler
+ *
+ * Set the read and write handlers when @read_ctx and @write_ctx are non-NULL,
+ * respectively. To leave a handler in its current state, pass a NULL
+ * AioContext. To clear a handler, pass a non-NULL AioContext and a NULL
+ * handler.
+ */
+void qio_channel_util_set_aio_fd_handler(int read_fd,
+                                         AioContext *read_ctx,
+                                         IOHandler *io_read,
+                                         int write_fd,
+                                         AioContext *write_ctx,
+                                         IOHandler *io_write,
+                                         void *opaque);
+
 #endif /* QIO_CHANNEL_UTIL_H */
index 4d6fe45f63d8f3d040ff4b845d2fa8f7919ee934..5f9dbaab65b09b6dd36c9a298905cc16f9b2674d 100644 (file)
@@ -22,7 +22,7 @@
 #define QIO_CHANNEL_H
 
 #include "qom/object.h"
-#include "qemu/coroutine.h"
+#include "qemu/coroutine-core.h"
 #include "block/aio.h"
 
 #define TYPE_QIO_CHANNEL "qio-channel"
@@ -32,12 +32,18 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
 
 #define QIO_CHANNEL_ERR_BLOCK -2
 
+#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
+
+#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
+
 typedef enum QIOChannelFeature QIOChannelFeature;
 
 enum QIOChannelFeature {
     QIO_CHANNEL_FEATURE_FD_PASS,
     QIO_CHANNEL_FEATURE_SHUTDOWN,
     QIO_CHANNEL_FEATURE_LISTEN,
+    QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
+    QIO_CHANNEL_FEATURE_READ_MSG_PEEK,
 };
 
 
@@ -75,9 +81,11 @@ struct QIOChannel {
     Object parent;
     unsigned int features; /* bitmask of QIOChannelFeatures */
     char *name;
-    AioContext *ctx;
+    AioContext *read_ctx;
     Coroutine *read_coroutine;
+    AioContext *write_ctx;
     Coroutine *write_coroutine;
+    bool follow_coroutine_ctx;
 #ifdef _WIN32
     HANDLE event; /* For use with GSource on Win32 */
 #endif
@@ -92,7 +100,8 @@ struct QIOChannel {
  * provide additional optional features.
  *
  * Consult the corresponding public API docs for a description
- * of the semantics of each callback
+ * of the semantics of each callback. io_shutdown in particular
+ * must be thread-safe, terminate quickly and must not block.
  */
 struct QIOChannelClass {
     ObjectClass parent;
@@ -103,12 +112,14 @@ struct QIOChannelClass {
                          size_t niov,
                          int *fds,
                          size_t nfds,
+                         int flags,
                          Error **errp);
     ssize_t (*io_readv)(QIOChannel *ioc,
                         const struct iovec *iov,
                         size_t niov,
                         int **fds,
                         size_t *nfds,
+                        int flags,
                         Error **errp);
     int (*io_close)(QIOChannel *ioc,
                     Error **errp);
@@ -131,10 +142,13 @@ struct QIOChannelClass {
                      int whence,
                      Error **errp);
     void (*io_set_aio_fd_handler)(QIOChannel *ioc,
-                                  AioContext *ctx,
+                                  AioContext *read_ctx,
                                   IOHandler *io_read,
+                                  AioContext *write_ctx,
                                   IOHandler *io_write,
                                   void *opaque);
+    int (*io_flush)(QIOChannel *ioc,
+                    Error **errp);
 };
 
 /* General I/O handling functions */
@@ -181,6 +195,7 @@ void qio_channel_set_name(QIOChannel *ioc,
  * @niov: the length of the @iov array
  * @fds: pointer to an array that will received file handles
  * @nfds: pointer filled with number of elements in @fds on return
+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
  * @errp: pointer to a NULL-initialized error object
  *
  * Read data from the IO channel, storing it in the
@@ -217,6 +232,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
                                size_t niov,
                                int **fds,
                                size_t *nfds,
+                               int flags,
                                Error **errp);
 
 
@@ -227,6 +243,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
  * @niov: the length of the @iov array
  * @fds: an array of file handles to send
  * @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
  * @errp: pointer to a NULL-initialized error object
  *
  * Write data to the IO channel, reading it from the
@@ -259,6 +276,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
                                 size_t niov,
                                 int *fds,
                                 size_t nfds,
+                                int flags,
                                 Error **errp);
 
 /**
@@ -286,10 +304,10 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
  * Returns: 1 if all bytes were read, 0 if end-of-file
  *          occurs without data, or -1 on error
  */
-int qio_channel_readv_all_eof(QIOChannel *ioc,
-                              const struct iovec *iov,
-                              size_t niov,
-                              Error **errp);
+int coroutine_mixed_fn qio_channel_readv_all_eof(QIOChannel *ioc,
+                                                 const struct iovec *iov,
+                                                 size_t niov,
+                                                 Error **errp);
 
 /**
  * qio_channel_readv_all:
@@ -313,10 +331,10 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
  *
  * Returns: 0 if all bytes were read, or -1 on error
  */
-int qio_channel_readv_all(QIOChannel *ioc,
-                          const struct iovec *iov,
-                          size_t niov,
-                          Error **errp);
+int coroutine_mixed_fn qio_channel_readv_all(QIOChannel *ioc,
+                                             const struct iovec *iov,
+                                             size_t niov,
+                                             Error **errp);
 
 
 /**
@@ -338,10 +356,10 @@ int qio_channel_readv_all(QIOChannel *ioc,
  *
  * Returns: 0 if all bytes were written, or -1 on error
  */
-int qio_channel_writev_all(QIOChannel *ioc,
-                           const struct iovec *iov,
-                           size_t niov,
-                           Error **erp);
+int coroutine_mixed_fn qio_channel_writev_all(QIOChannel *ioc,
+                                              const struct iovec *iov,
+                                              size_t niov,
+                                              Error **errp);
 
 /**
  * qio_channel_readv:
@@ -422,10 +440,10 @@ ssize_t qio_channel_write(QIOChannel *ioc,
  * Returns: 1 if all bytes were read, 0 if end-of-file occurs
  *          without data, or -1 on error
  */
-int qio_channel_read_all_eof(QIOChannel *ioc,
-                             char *buf,
-                             size_t buflen,
-                             Error **errp);
+int coroutine_mixed_fn qio_channel_read_all_eof(QIOChannel *ioc,
+                                                char *buf,
+                                                size_t buflen,
+                                                Error **errp);
 
 /**
  * qio_channel_read_all:
@@ -442,10 +460,10 @@ int qio_channel_read_all_eof(QIOChannel *ioc,
  *
  * Returns: 0 if all bytes were read, or -1 on error
  */
-int qio_channel_read_all(QIOChannel *ioc,
-                         char *buf,
-                         size_t buflen,
-                         Error **errp);
+int coroutine_mixed_fn qio_channel_read_all(QIOChannel *ioc,
+                                            char *buf,
+                                            size_t buflen,
+                                            Error **errp);
 
 /**
  * qio_channel_write_all:
@@ -461,10 +479,10 @@ int qio_channel_read_all(QIOChannel *ioc,
  *
  * Returns: 0 if all bytes were written, or -1 on error
  */
-int qio_channel_write_all(QIOChannel *ioc,
-                          const char *buf,
-                          size_t buflen,
-                          Error **errp);
+int coroutine_mixed_fn qio_channel_write_all(QIOChannel *ioc,
+                                             const char *buf,
+                                             size_t buflen,
+                                             Error **errp);
 
 /**
  * qio_channel_set_blocking:
@@ -483,6 +501,21 @@ int qio_channel_set_blocking(QIOChannel *ioc,
                              bool enabled,
                              Error **errp);
 
+/**
+ * qio_channel_set_follow_coroutine_ctx:
+ * @ioc: the channel object
+ * @enabled: whether or not to follow the coroutine's AioContext
+ *
+ * If @enabled is true, calls to qio_channel_yield() use the current
+ * coroutine's AioContext. Usually this is desirable.
+ *
+ * If @enabled is false, calls to qio_channel_yield() use the global iohandler
+ * AioContext. This is may be used by coroutines that run in the main loop and
+ * do not wish to respond to I/O during nested event loops. This is the
+ * default for compatibility with code that is not aware of AioContexts.
+ */
+void qio_channel_set_follow_coroutine_ctx(QIOChannel *ioc, bool enabled);
+
 /**
  * qio_channel_close:
  * @ioc: the channel object
@@ -510,6 +543,8 @@ int qio_channel_close(QIOChannel *ioc,
  * QIO_CHANNEL_FEATURE_SHUTDOWN prior to calling
  * this method.
  *
+ * This function is thread-safe, terminates quickly and does not block.
+ *
  * Returns: 0 on success, -1 on error
  */
 int qio_channel_shutdown(QIOChannel *ioc,
@@ -686,41 +721,6 @@ GSource *qio_channel_add_watch_source(QIOChannel *ioc,
                                       GDestroyNotify notify,
                                       GMainContext *context);
 
-/**
- * qio_channel_attach_aio_context:
- * @ioc: the channel object
- * @ctx: the #AioContext to set the handlers on
- *
- * Request that qio_channel_yield() sets I/O handlers on
- * the given #AioContext.  If @ctx is %NULL, qio_channel_yield()
- * uses QEMU's main thread event loop.
- *
- * You can move a #QIOChannel from one #AioContext to another even if
- * I/O handlers are set for a coroutine.  However, #QIOChannel provides
- * no synchronization between the calls to qio_channel_yield() and
- * qio_channel_attach_aio_context().
- *
- * Therefore you should first call qio_channel_detach_aio_context()
- * to ensure that the coroutine is not entered concurrently.  Then,
- * while the coroutine has yielded, call qio_channel_attach_aio_context(),
- * and then aio_co_schedule() to place the coroutine on the new
- * #AioContext.  The calls to qio_channel_detach_aio_context()
- * and qio_channel_attach_aio_context() should be protected with
- * aio_context_acquire() and aio_context_release().
- */
-void qio_channel_attach_aio_context(QIOChannel *ioc,
-                                    AioContext *ctx);
-
-/**
- * qio_channel_detach_aio_context:
- * @ioc: the channel object
- *
- * Disable any I/O handlers set by qio_channel_yield().  With the
- * help of aio_co_schedule(), this allows moving a coroutine that was
- * paused by qio_channel_yield() to another context.
- */
-void qio_channel_detach_aio_context(QIOChannel *ioc);
-
 /**
  * qio_channel_yield:
  * @ioc: the channel object
@@ -740,6 +740,16 @@ void qio_channel_detach_aio_context(QIOChannel *ioc);
 void coroutine_fn qio_channel_yield(QIOChannel *ioc,
                                     GIOCondition condition);
 
+/**
+ * qio_channel_wake_read:
+ * @ioc: the channel object
+ *
+ * If qio_channel_yield() is currently waiting for the channel to become
+ * readable, interrupt it and reenter immediately. This function is safe to call
+ * from any thread.
+ */
+void qio_channel_wake_read(QIOChannel *ioc);
+
 /**
  * qio_channel_wait:
  * @ioc: the channel object
@@ -758,8 +768,9 @@ void qio_channel_wait(QIOChannel *ioc,
 /**
  * qio_channel_set_aio_fd_handler:
  * @ioc: the channel object
- * @ctx: the AioContext to set the handlers on
+ * @read_ctx: the AioContext to set the read handler on or NULL
  * @io_read: the read handler
+ * @write_ctx: the AioContext to set the write handler on or NULL
  * @io_write: the write handler
  * @opaque: the opaque value passed to the handler
  *
@@ -767,11 +778,124 @@ void qio_channel_wait(QIOChannel *ioc,
  * be used by channel implementations to forward the handlers
  * to another channel (e.g. from #QIOChannelTLS to the
  * underlying socket).
+ *
+ * When @read_ctx is NULL, don't touch the read handler. When @write_ctx is
+ * NULL, don't touch the write handler. Note that setting the read handler
+ * clears the write handler, and vice versa, if they share the same AioContext.
+ * Therefore the caller must pass both handlers together when sharing the same
+ * AioContext.
  */
 void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
-                                    AioContext *ctx,
+                                    AioContext *read_ctx,
                                     IOHandler *io_read,
+                                    AioContext *write_ctx,
                                     IOHandler *io_write,
                                     void *opaque);
 
+/**
+ * qio_channel_readv_full_all_eof:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data to
+ * @niov: the length of the @iov array
+ * @fds: an array of file handles to read
+ * @nfds: number of file handles in @fds
+ * @errp: pointer to a NULL-initialized error object
+ *
+ *
+ * Performs same function as qio_channel_readv_all_eof.
+ * Additionally, attempts to read file descriptors shared
+ * over the channel. The function will wait for all
+ * requested data to be read, yielding from the current
+ * coroutine if required. data refers to both file
+ * descriptors and the iovs.
+ *
+ * Returns: 1 if all bytes were read, 0 if end-of-file
+ *          occurs without data, or -1 on error
+ */
+
+int coroutine_mixed_fn qio_channel_readv_full_all_eof(QIOChannel *ioc,
+                                                      const struct iovec *iov,
+                                                      size_t niov,
+                                                      int **fds, size_t *nfds,
+                                                      Error **errp);
+
+/**
+ * qio_channel_readv_full_all:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data to
+ * @niov: the length of the @iov array
+ * @fds: an array of file handles to read
+ * @nfds: number of file handles in @fds
+ * @errp: pointer to a NULL-initialized error object
+ *
+ *
+ * Performs same function as qio_channel_readv_all_eof.
+ * Additionally, attempts to read file descriptors shared
+ * over the channel. The function will wait for all
+ * requested data to be read, yielding from the current
+ * coroutine if required. data refers to both file
+ * descriptors and the iovs.
+ *
+ * Returns: 0 if all bytes were read, or -1 on error
+ */
+
+int coroutine_mixed_fn qio_channel_readv_full_all(QIOChannel *ioc,
+                                                  const struct iovec *iov,
+                                                  size_t niov,
+                                                  int **fds, size_t *nfds,
+                                                  Error **errp);
+
+/**
+ * qio_channel_writev_full_all:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to write data from
+ * @niov: the length of the @iov array
+ * @fds: an array of file handles to send
+ * @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
+ * @errp: pointer to a NULL-initialized error object
+ *
+ *
+ * Behaves like qio_channel_writev_full but will attempt
+ * to send all data passed (file handles and memory regions).
+ * The function will wait for all requested data
+ * to be written, yielding from the current coroutine
+ * if required.
+ *
+ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags,
+ * instead of waiting for all requested data to be written,
+ * this function will wait until it's all queued for writing.
+ * In this case, if the buffer gets changed between queueing and
+ * sending, the updated buffer will be sent. If this is not a
+ * desired behavior, it's suggested to call qio_channel_flush()
+ * before reusing the buffer.
+ *
+ * Returns: 0 if all bytes were written, or -1 on error
+ */
+
+int coroutine_mixed_fn qio_channel_writev_full_all(QIOChannel *ioc,
+                                                   const struct iovec *iov,
+                                                   size_t niov,
+                                                   int *fds, size_t nfds,
+                                                   int flags, Error **errp);
+
+/**
+ * qio_channel_flush:
+ * @ioc: the channel object
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Will block until every packet queued with
+ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY
+ * is sent, or return in case of any error.
+ *
+ * If not implemented, acts as a no-op, and returns 0.
+ *
+ * Returns -1 if any error is found,
+ *          1 if every send failed to use zero copy.
+ *          0 otherwise.
+ */
+
+int qio_channel_flush(QIOChannel *ioc,
+                      Error **errp);
+
 #endif /* QIO_CHANNEL_H */
index beec4f5cfd1dc036e9c96fbd50e25cb948c138b5..dc7d32ebd07ecfd22c3c9d11c8618a2673952dcf 100644 (file)
@@ -145,7 +145,7 @@ typedef void (*QIOTaskWorker)(QIOTask *task,
  * The QIOTask module can also be used to perform operations
  * in a background thread context, while still reporting the
  * results in the main event thread. This allows code which
- * cannot easily be rewritten to be asychronous (such as DNS
+ * cannot easily be rewritten to be asynchronous (such as DNS
  * lookups) to be easily run non-blocking. Reporting the
  * results in the main thread context means that the caller
  * typically does not need to be concerned about thread
diff --git a/include/libdecnumber/dconfig.h b/include/libdecnumber/dconfig.h
new file mode 100644 (file)
index 0000000..2bc0ba7
--- /dev/null
@@ -0,0 +1,39 @@
+/* Configure decNumber for either host or target.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2, or (at your option) any later
+   version.
+
+   In addition to the permissions in the GNU General Public License,
+   the Free Software Foundation gives you unlimited permission to link
+   the compiled version of this file into combinations with other
+   programs, and to distribute those combinations without any
+   restriction coming from the use of this file.  (The General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into a combine executable.)
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+
+#if HOST_BIG_ENDIAN
+#define WORDS_BIGENDIAN 1
+#else
+#define WORDS_BIGENDIAN 0
+#endif
+
+#ifndef DECDPUN
+#define DECDPUN 3
+#endif
diff --git a/include/libdecnumber/decContext.h b/include/libdecnumber/decContext.h
new file mode 100644 (file)
index 0000000..cea6e42
--- /dev/null
@@ -0,0 +1,255 @@
+/* Decimal context header module for the decNumber C Library.
+   Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+   Contributed by IBM Corporation.  Author Mike Cowlishaw.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2, or (at your option) any later
+   version.
+
+   In addition to the permissions in the GNU General Public License,
+   the Free Software Foundation gives you unlimited permission to link
+   the compiled version of this file into combinations with other
+   programs, and to distribute those combinations without any
+   restriction coming from the use of this file.  (The General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into a combine executable.)
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+/* ------------------------------------------------------------------ */
+/* Decimal Context module header                                     */
+/* ------------------------------------------------------------------ */
+/*                                                                   */
+/* Context variables must always have valid values:                  */
+/*                                                                   */
+/*  status   -- [any bits may be cleared, but not set, by user]              */
+/*  round    -- must be one of the enumerated rounding modes         */
+/*                                                                   */
+/* The following variables are implied for fixed size formats (i.e.,  */
+/* they are ignored) but should still be set correctly in case used   */
+/* with decNumber functions:                                         */
+/*                                                                   */
+/*  clamp    -- must be either 0 or 1                                */
+/*  digits   -- must be in the range 1 through 999999999             */
+/*  emax     -- must be in the range 0 through 999999999             */
+/*  emin     -- must be in the range 0 through -999999999            */
+/*  extended -- must be either 0 or 1 [present only if DECSUBSET]     */
+/*  traps    -- only defined bits may be set                         */
+/*                                                                   */
+/* ------------------------------------------------------------------ */
+
+#ifndef DECCONTEXT_H
+#define DECCONTEXT_H
+
+  #define DECCNAME     "decContext"                    /* Short name */
+  #define DECCFULLNAME "Decimal Context Descriptor"   /* Verbose name */
+  #define DECCAUTHOR   "Mike Cowlishaw"                      /* Who to blame */
+
+
+  /* Extended flags setting -- set this to 0 to use only IEEE flags   */
+  #define DECEXTFLAG 1            /* 1=enable extended flags         */
+
+  /* Conditional code flag -- set this to 0 for best performance      */
+  #define DECSUBSET  0            /* 1=enable subset arithmetic      */
+
+  /* Context for operations, with associated constants               */
+  enum rounding {
+    DEC_ROUND_CEILING,            /* round towards +infinity         */
+    DEC_ROUND_UP,                 /* round away from 0               */
+    DEC_ROUND_HALF_UP,            /* 0.5 rounds up                   */
+    DEC_ROUND_HALF_EVEN,          /* 0.5 rounds to nearest even      */
+    DEC_ROUND_HALF_DOWN,          /* 0.5 rounds down                 */
+    DEC_ROUND_DOWN,               /* round towards 0 (truncate)      */
+    DEC_ROUND_FLOOR,              /* round towards -infinity         */
+    DEC_ROUND_05UP,               /* round for reround               */
+    DEC_ROUND_MAX                 /* enum must be less than this     */
+    };
+  #define DEC_ROUND_DEFAULT DEC_ROUND_HALF_EVEN;
+
+  typedef struct {
+    int32_t  digits;              /* working precision               */
+    int32_t  emax;                /* maximum positive exponent       */
+    int32_t  emin;                /* minimum negative exponent       */
+    enum     rounding round;      /* rounding mode                   */
+    uint32_t traps;               /* trap-enabler flags              */
+    uint32_t status;              /* status flags                    */
+    uint8_t  clamp;               /* flag: apply IEEE exponent clamp */
+    #if DECSUBSET
+    uint8_t  extended;            /* flag: special-values allowed    */
+    #endif
+    } decContext;
+
+  /* Maxima and Minima for context settings                          */
+  #define DEC_MAX_DIGITS 999999999
+  #define DEC_MIN_DIGITS        1
+  #define DEC_MAX_EMAX  999999999
+  #define DEC_MIN_EMAX          0
+  #define DEC_MAX_EMIN          0
+  #define DEC_MIN_EMIN -999999999
+  #define DEC_MAX_MATH     999999 /* max emax, etc., for math funcs. */
+
+  /* Classifications for decimal numbers, aligned with 754r (note     */
+  /* that 'normal' and 'subnormal' are meaningful only with a        */
+  /* decContext or a fixed size format).                             */
+  enum decClass {
+    DEC_CLASS_SNAN,
+    DEC_CLASS_QNAN,
+    DEC_CLASS_NEG_INF,
+    DEC_CLASS_NEG_NORMAL,
+    DEC_CLASS_NEG_SUBNORMAL,
+    DEC_CLASS_NEG_ZERO,
+    DEC_CLASS_POS_ZERO,
+    DEC_CLASS_POS_SUBNORMAL,
+    DEC_CLASS_POS_NORMAL,
+    DEC_CLASS_POS_INF
+    };
+  /* Strings for the decClasses */
+  #define DEC_ClassString_SN  "sNaN"
+  #define DEC_ClassString_QN  "NaN"
+  #define DEC_ClassString_NI  "-Infinity"
+  #define DEC_ClassString_NN  "-Normal"
+  #define DEC_ClassString_NS  "-Subnormal"
+  #define DEC_ClassString_NZ  "-Zero"
+  #define DEC_ClassString_PZ  "+Zero"
+  #define DEC_ClassString_PS  "+Subnormal"
+  #define DEC_ClassString_PN  "+Normal"
+  #define DEC_ClassString_PI  "+Infinity"
+  #define DEC_ClassString_UN  "Invalid"
+
+  /* Trap-enabler and Status flags (exceptional conditions), and      */
+  /* their names.  The top byte is reserved for internal use         */
+  #if DECEXTFLAG
+    /* Extended flags */
+    #define DEC_Conversion_syntax    0x00000001
+    #define DEC_Division_by_zero     0x00000002
+    #define DEC_Division_impossible  0x00000004
+    #define DEC_Division_undefined   0x00000008
+    #define DEC_Insufficient_storage 0x00000010 /* [when malloc fails] */
+    #define DEC_Inexact                     0x00000020
+    #define DEC_Invalid_context             0x00000040
+    #define DEC_Invalid_operation    0x00000080
+    #if DECSUBSET
+    #define DEC_Lost_digits         0x00000100
+    #endif
+    #define DEC_Overflow            0x00000200
+    #define DEC_Clamped                     0x00000400
+    #define DEC_Rounded                     0x00000800
+    #define DEC_Subnormal           0x00001000
+    #define DEC_Underflow           0x00002000
+  #else
+    /* IEEE flags only */
+    #define DEC_Conversion_syntax    0x00000010
+    #define DEC_Division_by_zero     0x00000002
+    #define DEC_Division_impossible  0x00000010
+    #define DEC_Division_undefined   0x00000010
+    #define DEC_Insufficient_storage 0x00000010 /* [when malloc fails] */
+    #define DEC_Inexact                     0x00000001
+    #define DEC_Invalid_context             0x00000010
+    #define DEC_Invalid_operation    0x00000010
+    #if DECSUBSET
+    #define DEC_Lost_digits         0x00000000
+    #endif
+    #define DEC_Overflow            0x00000008
+    #define DEC_Clamped                     0x00000000
+    #define DEC_Rounded                     0x00000000
+    #define DEC_Subnormal           0x00000000
+    #define DEC_Underflow           0x00000004
+  #endif
+
+  /* IEEE 854 groupings for the flags                                */
+  /* [DEC_Clamped, DEC_Lost_digits, DEC_Rounded, and DEC_Subnormal    */
+  /* are not in IEEE 854]                                            */
+  #define DEC_IEEE_854_Division_by_zero         (DEC_Division_by_zero)
+  #if DECSUBSET
+  #define DEC_IEEE_854_Inexact          (DEC_Inexact | DEC_Lost_digits)
+  #else
+  #define DEC_IEEE_854_Inexact          (DEC_Inexact)
+  #endif
+  #define DEC_IEEE_854_Invalid_operation (DEC_Conversion_syntax |     \
+                                         DEC_Division_impossible |   \
+                                         DEC_Division_undefined |    \
+                                         DEC_Insufficient_storage |  \
+                                         DEC_Invalid_context |       \
+                                         DEC_Invalid_operation)
+  #define DEC_IEEE_854_Overflow                 (DEC_Overflow)
+  #define DEC_IEEE_854_Underflow        (DEC_Underflow)
+
+  /* flags which are normally errors (result is qNaN, infinite, or 0) */
+  #define DEC_Errors (DEC_IEEE_854_Division_by_zero |                \
+                     DEC_IEEE_854_Invalid_operation |                \
+                     DEC_IEEE_854_Overflow | DEC_IEEE_854_Underflow)
+  /* flags which cause a result to become qNaN                       */
+  #define DEC_NaNs    DEC_IEEE_854_Invalid_operation
+
+  /* flags which are normally for information only (finite results)   */
+  #if DECSUBSET
+  #define DEC_Information (DEC_Clamped | DEC_Rounded | DEC_Inexact    \
+                         | DEC_Lost_digits)
+  #else
+  #define DEC_Information (DEC_Clamped | DEC_Rounded | DEC_Inexact)
+  #endif
+
+  /* Name strings for the exceptional conditions                     */
+  #define DEC_Condition_CS "Conversion syntax"
+  #define DEC_Condition_DZ "Division by zero"
+  #define DEC_Condition_DI "Division impossible"
+  #define DEC_Condition_DU "Division undefined"
+  #define DEC_Condition_IE "Inexact"
+  #define DEC_Condition_IS "Insufficient storage"
+  #define DEC_Condition_IC "Invalid context"
+  #define DEC_Condition_IO "Invalid operation"
+  #if DECSUBSET
+  #define DEC_Condition_LD "Lost digits"
+  #endif
+  #define DEC_Condition_OV "Overflow"
+  #define DEC_Condition_PA "Clamped"
+  #define DEC_Condition_RO "Rounded"
+  #define DEC_Condition_SU "Subnormal"
+  #define DEC_Condition_UN "Underflow"
+  #define DEC_Condition_ZE "No status"
+  #define DEC_Condition_MU "Multiple status"
+  #define DEC_Condition_Length 21  /* length of the longest string,   */
+                                  /* including terminator            */
+
+  /* Initialization descriptors, used by decContextDefault           */
+  #define DEC_INIT_BASE                0
+  #define DEC_INIT_DECIMAL32   32
+  #define DEC_INIT_DECIMAL64   64
+  #define DEC_INIT_DECIMAL128 128
+  /* Synonyms */
+  #define DEC_INIT_DECSINGLE  DEC_INIT_DECIMAL32
+  #define DEC_INIT_DECDOUBLE  DEC_INIT_DECIMAL64
+  #define DEC_INIT_DECQUAD    DEC_INIT_DECIMAL128
+
+  /* decContext routines                                             */
+
+
+  extern decContext  * decContextClearStatus(decContext *, uint32_t);
+  extern decContext  * decContextDefault(decContext *, int32_t);
+  extern enum rounding decContextGetRounding(decContext *);
+  extern uint32_t      decContextGetStatus(decContext *);
+  extern decContext  * decContextRestoreStatus(decContext *, uint32_t, uint32_t);
+  extern uint32_t      decContextSaveStatus(decContext *, uint32_t);
+  extern decContext  * decContextSetRounding(decContext *, enum rounding);
+  extern decContext  * decContextSetStatus(decContext *, uint32_t);
+  extern decContext  * decContextSetStatusFromString(decContext *, const char *);
+  extern decContext  * decContextSetStatusFromStringQuiet(decContext *, const char *);
+  extern decContext  * decContextSetStatusQuiet(decContext *, uint32_t);
+  extern const char  * decContextStatusToString(const decContext *);
+  extern uint32_t      decContextTestSavedStatus(uint32_t, uint32_t);
+  extern uint32_t      decContextTestStatus(decContext *, uint32_t);
+  extern decContext  * decContextZeroStatus(decContext *);
+
+#endif
diff --git a/include/libdecnumber/decDPD.h b/include/libdecnumber/decDPD.h
new file mode 100644 (file)
index 0000000..26a21ec
--- /dev/null
@@ -0,0 +1,1214 @@
+/* Conversion lookup tables for the decNumber C Library.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   Contributed by IBM Corporation.  Author Mike Cowlishaw.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2, or (at your option) any later
+   version.
+
+   In addition to the permissions in the GNU General Public License,
+   the Free Software Foundation gives you unlimited permission to link
+   the compiled version of this file into combinations with other
+   programs, and to distribute those combinations without any
+   restriction coming from the use of this file.  (The General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into a combine executable.)
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+/* ------------------------------------------------------------------------ */
+/* Binary Coded Decimal and Densely Packed Decimal conversion lookup tables */
+/* [Automatically generated -- do not edit.  2007.05.05]                   */
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+/* For details, see: http://www2.hursley.ibm.com/decimal/DPDecimal.html            */
+
+
+/* This include file defines several DPD and BCD conversion tables:        */
+/*                                                                         */
+/*   uint16_t BCD2DPD[2458];    -- BCD -> DPD (0x999 => 2457)              */
+/*   uint16_t BIN2DPD[1000];    -- Bin -> DPD (999 => 2457)                */
+/*   uint8_t  BIN2CHAR[4001];   -- Bin -> CHAR (999 => '\3' '9' '9' '9')   */
+/*   uint8_t  BIN2BCD8[4000];   -- Bin -> bytes (999 => 9 9 9 3)           */
+/*   uint16_t DPD2BCD[1024];    -- DPD -> BCD (0x3FF => 0x999)             */
+/*   uint16_t DPD2BIN[1024];    -- DPD -> BIN (0x3FF => 999)               */
+/*   uint32_t DPD2BINK[1024];   -- DPD -> BIN * 1000 (0x3FF => 999000)     */
+/*   uint32_t DPD2BINM[1024];   -- DPD -> BIN * 1E+6 (0x3FF => 999000000)  */
+/*   uint8_t  DPD2BCD8[4096];   -- DPD -> bytes (x3FF => 9 9 9 3)          */
+/*                                                                         */
+/* In all cases the result (10 bits or 12 bits, or binary) is right-aligned */
+/* in the table entry. BIN2CHAR entries are a single byte length (0 for    */
+/* value 0) followed by three digit characters; a trailing terminator is    */
+/* included to allow 4-char moves always.  BIN2BCD8 and DPD2BCD8 entries    */
+/* are similar with the three BCD8 digits followed by a one-byte length            */
+/* (again, length=0 for value 0).                                          */
+/*                                                                         */
+/* To use a table, its name, prefixed with DEC_, must be defined with a            */
+/* value of 1 before this header file is included.  For example:           */
+/*    #define DEC_BCD2DPD 1                                                */
+/* This mechanism allows software to only include tables that are needed.   */
+/* ------------------------------------------------------------------------ */
+
+#if defined(DEC_BCD2DPD) && DEC_BCD2DPD==1 && !defined(DECBCD2DPD)
+#define DECBCD2DPD
+
+const uint16_t BCD2DPD[2458]={   0,    1,    2,    3,    4,    5,    6,    7,
+    8,   9,    0,    0,    0,    0,    0,    0,   16,   17,   18,   19,   20,
+   21,  22,   23,   24,   25,    0,    0,    0,    0,    0,    0,   32,   33,
+   34,  35,   36,   37,   38,   39,   40,   41,    0,    0,    0,    0,    0,
+    0,  48,   49,   50,   51,   52,   53,   54,   55,   56,   57,    0,    0,
+    0,   0,    0,    0,   64,   65,   66,   67,   68,   69,   70,   71,   72,
+   73,   0,    0,    0,    0,    0,    0,   80,   81,   82,   83,   84,   85,
+   86,  87,   88,   89,    0,    0,    0,    0,    0,    0,   96,   97,   98,
+   99, 100,  101,  102,  103,  104,  105,    0,    0,    0,    0,    0,    0,
+  112, 113,  114,  115,  116,  117,  118,  119,  120,  121,    0,    0,    0,
+    0,   0,    0,   10,   11,   42,   43,   74,   75,  106,  107,   78,   79,
+    0,   0,    0,    0,    0,    0,   26,   27,   58,   59,   90,   91,  122,
+  123,  94,   95,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0, 128,  129,  130,  131,  132,  133,  134,  135,  136,  137,    0,    0,
+    0,   0,    0,    0,  144,  145,  146,  147,  148,  149,  150,  151,  152,
+  153,   0,    0,    0,    0,    0,    0,  160,  161,  162,  163,  164,  165,
+  166, 167,  168,  169,    0,    0,    0,    0,    0,    0,  176,  177,  178,
+  179, 180,  181,  182,  183,  184,  185,    0,    0,    0,    0,    0,    0,
+  192, 193,  194,  195,  196,  197,  198,  199,  200,  201,    0,    0,    0,
+    0,   0,    0,  208,  209,  210,  211,  212,  213,  214,  215,  216,  217,
+    0,   0,    0,    0,    0,    0,  224,  225,  226,  227,  228,  229,  230,
+  231, 232,  233,    0,    0,    0,    0,    0,    0,  240,  241,  242,  243,
+  244, 245,  246,  247,  248,  249,    0,    0,    0,    0,    0,    0,  138,
+  139, 170,  171,  202,  203,  234,  235,  206,  207,    0,    0,    0,    0,
+    0,   0,  154,  155,  186,  187,  218,  219,  250,  251,  222,  223,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,  256,  257,  258,
+  259, 260,  261,  262,  263,  264,  265,    0,    0,    0,    0,    0,    0,
+  272, 273,  274,  275,  276,  277,  278,  279,  280,  281,    0,    0,    0,
+    0,   0,    0,  288,  289,  290,  291,  292,  293,  294,  295,  296,  297,
+    0,   0,    0,    0,    0,    0,  304,  305,  306,  307,  308,  309,  310,
+  311, 312,  313,    0,    0,    0,    0,    0,    0,  320,  321,  322,  323,
+  324, 325,  326,  327,  328,  329,    0,    0,    0,    0,    0,    0,  336,
+  337, 338,  339,  340,  341,  342,  343,  344,  345,    0,    0,    0,    0,
+    0,   0,  352,  353,  354,  355,  356,  357,  358,  359,  360,  361,    0,
+    0,   0,    0,    0,    0,  368,  369,  370,  371,  372,  373,  374,  375,
+  376, 377,    0,    0,    0,    0,    0,    0,  266,  267,  298,  299,  330,
+  331, 362,  363,  334,  335,    0,    0,    0,    0,    0,    0,  282,  283,
+  314, 315,  346,  347,  378,  379,  350,  351,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,  384,  385,  386,  387,  388,  389,  390,
+  391, 392,  393,    0,    0,    0,    0,    0,    0,  400,  401,  402,  403,
+  404, 405,  406,  407,  408,  409,    0,    0,    0,    0,    0,    0,  416,
+  417, 418,  419,  420,  421,  422,  423,  424,  425,    0,    0,    0,    0,
+    0,   0,  432,  433,  434,  435,  436,  437,  438,  439,  440,  441,    0,
+    0,   0,    0,    0,    0,  448,  449,  450,  451,  452,  453,  454,  455,
+  456, 457,    0,    0,    0,    0,    0,    0,  464,  465,  466,  467,  468,
+  469, 470,  471,  472,  473,    0,    0,    0,    0,    0,    0,  480,  481,
+  482, 483,  484,  485,  486,  487,  488,  489,    0,    0,    0,    0,    0,
+    0, 496,  497,  498,  499,  500,  501,  502,  503,  504,  505,    0,    0,
+    0,   0,    0,    0,  394,  395,  426,  427,  458,  459,  490,  491,  462,
+  463,   0,    0,    0,    0,    0,    0,  410,  411,  442,  443,  474,  475,
+  506, 507,  478,  479,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,  512,  513,  514,  515,  516,  517,  518,  519,  520,  521,    0,
+    0,   0,    0,    0,    0,  528,  529,  530,  531,  532,  533,  534,  535,
+  536, 537,    0,    0,    0,    0,    0,    0,  544,  545,  546,  547,  548,
+  549, 550,  551,  552,  553,    0,    0,    0,    0,    0,    0,  560,  561,
+  562, 563,  564,  565,  566,  567,  568,  569,    0,    0,    0,    0,    0,
+    0, 576,  577,  578,  579,  580,  581,  582,  583,  584,  585,    0,    0,
+    0,   0,    0,    0,  592,  593,  594,  595,  596,  597,  598,  599,  600,
+  601,   0,    0,    0,    0,    0,    0,  608,  609,  610,  611,  612,  613,
+  614, 615,  616,  617,    0,    0,    0,    0,    0,    0,  624,  625,  626,
+  627, 628,  629,  630,  631,  632,  633,    0,    0,    0,    0,    0,    0,
+  522, 523,  554,  555,  586,  587,  618,  619,  590,  591,    0,    0,    0,
+    0,   0,    0,  538,  539,  570,  571,  602,  603,  634,  635,  606,  607,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,  640,  641,
+  642, 643,  644,  645,  646,  647,  648,  649,    0,    0,    0,    0,    0,
+    0, 656,  657,  658,  659,  660,  661,  662,  663,  664,  665,    0,    0,
+    0,   0,    0,    0,  672,  673,  674,  675,  676,  677,  678,  679,  680,
+  681,   0,    0,    0,    0,    0,    0,  688,  689,  690,  691,  692,  693,
+  694, 695,  696,  697,    0,    0,    0,    0,    0,    0,  704,  705,  706,
+  707, 708,  709,  710,  711,  712,  713,    0,    0,    0,    0,    0,    0,
+  720, 721,  722,  723,  724,  725,  726,  727,  728,  729,    0,    0,    0,
+    0,   0,    0,  736,  737,  738,  739,  740,  741,  742,  743,  744,  745,
+    0,   0,    0,    0,    0,    0,  752,  753,  754,  755,  756,  757,  758,
+  759, 760,  761,    0,    0,    0,    0,    0,    0,  650,  651,  682,  683,
+  714, 715,  746,  747,  718,  719,    0,    0,    0,    0,    0,    0,  666,
+  667, 698,  699,  730,  731,  762,  763,  734,  735,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,  768,  769,  770,  771,  772,  773,
+  774, 775,  776,  777,    0,    0,    0,    0,    0,    0,  784,  785,  786,
+  787, 788,  789,  790,  791,  792,  793,    0,    0,    0,    0,    0,    0,
+  800, 801,  802,  803,  804,  805,  806,  807,  808,  809,    0,    0,    0,
+    0,   0,    0,  816,  817,  818,  819,  820,  821,  822,  823,  824,  825,
+    0,   0,    0,    0,    0,    0,  832,  833,  834,  835,  836,  837,  838,
+  839, 840,  841,    0,    0,    0,    0,    0,    0,  848,  849,  850,  851,
+  852, 853,  854,  855,  856,  857,    0,    0,    0,    0,    0,    0,  864,
+  865, 866,  867,  868,  869,  870,  871,  872,  873,    0,    0,    0,    0,
+    0,   0,  880,  881,  882,  883,  884,  885,  886,  887,  888,  889,    0,
+    0,   0,    0,    0,    0,  778,  779,  810,  811,  842,  843,  874,  875,
+  846, 847,    0,    0,    0,    0,    0,    0,  794,  795,  826,  827,  858,
+  859, 890,  891,  862,  863,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,  896,  897,  898,  899,  900,  901,  902,  903,  904,  905,
+    0,   0,    0,    0,    0,    0,  912,  913,  914,  915,  916,  917,  918,
+  919, 920,  921,    0,    0,    0,    0,    0,    0,  928,  929,  930,  931,
+  932, 933,  934,  935,  936,  937,    0,    0,    0,    0,    0,    0,  944,
+  945, 946,  947,  948,  949,  950,  951,  952,  953,    0,    0,    0,    0,
+    0,   0,  960,  961,  962,  963,  964,  965,  966,  967,  968,  969,    0,
+    0,   0,    0,    0,    0,  976,  977,  978,  979,  980,  981,  982,  983,
+  984, 985,    0,    0,    0,    0,    0,    0,  992,  993,  994,  995,  996,
+  997, 998,  999, 1000, 1001,    0,    0,    0,    0,    0,    0, 1008, 1009,
+ 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017,    0,   0,    0,    0,    0,
+    0, 906,  907,  938,  939,  970,  971, 1002, 1003,  974,  975,    0,    0,
+    0,   0,    0,    0,  922,  923,  954,  955,  986,  987, 1018, 1019,  990,
+  991,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   12,
+   13, 268,  269,  524,  525,  780,  781,   46,   47,    0,    0,    0,    0,
+    0,   0,   28,   29,  284,  285,  540,  541,  796,  797,   62,   63,    0,
+    0,   0,    0,    0,    0,   44,   45,  300,  301,  556,  557,  812,  813,
+  302, 303,    0,    0,    0,    0,    0,    0,   60,   61,  316,  317,  572,
+  573, 828,  829,  318,  319,    0,    0,    0,    0,    0,    0,   76,   77,
+  332, 333,  588,  589,  844,  845,  558,  559,    0,    0,    0,    0,    0,
+    0,  92,   93,  348,  349,  604,  605,  860,  861,  574,  575,    0,    0,
+    0,   0,    0,    0,  108,  109,  364,  365,  620,  621,  876,  877,  814,
+  815,   0,    0,    0,    0,    0,    0,  124,  125,  380,  381,  636,  637,
+  892, 893,  830,  831,    0,    0,    0,    0,    0,    0,   14,   15,  270,
+  271, 526,  527,  782,  783,  110,  111,    0,    0,    0,    0,    0,    0,
+   30,  31,  286,  287,  542,  543,  798,  799,  126,  127,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+    0,   0,    0,    0,    0,    0,    0,    0,  140,  141,  396,  397,  652,
+  653, 908,  909,  174,  175,    0,    0,    0,    0,    0,    0,  156,  157,
+  412, 413,  668,  669,  924,  925,  190,  191,    0,    0,    0,    0,    0,
+    0, 172,  173,  428,  429,  684,  685,  940,  941,  430,  431,    0,    0,
+    0,   0,    0,    0,  188,  189,  444,  445,  700,  701,  956,  957,  446,
+  447,   0,    0,    0,    0,    0,    0,  204,  205,  460,  461,  716,  717,
+  972, 973,  686,  687,    0,    0,    0,    0,    0,    0,  220,  221,  476,
+  477, 732,  733,  988,  989,  702,  703,    0,    0,    0,    0,    0,    0,
+  236, 237,  492,  493,  748,  749, 1004, 1005,  942,  943,    0,    0,    0,
+    0,   0,    0,  252,  253,  508,  509,  764,  765, 1020, 1021,  958,  959,
+    0,   0,    0,    0,    0,    0,  142,  143,  398,  399,  654,  655,  910,
+  911, 238,  239,    0,    0,    0,    0,    0,    0,  158,  159,  414,  415,
+  670, 671,  926,  927,  254,  255};
+#endif
+
+#if defined(DEC_DPD2BCD) && DEC_DPD2BCD==1 && !defined(DECDPD2BCD)
+#define DECDPD2BCD
+
+const uint16_t DPD2BCD[1024]={   0,    1,    2,    3,    4,    5,    6,    7,
+    8,   9,  128,  129, 2048, 2049, 2176, 2177,   16,   17,   18,   19,   20,
+   21,  22,   23,   24,   25,  144,  145, 2064, 2065, 2192, 2193,   32,   33,
+   34,  35,   36,   37,   38,   39,   40,   41,  130,  131, 2080, 2081, 2056,
+ 2057,  48,   49,   50,   51,   52,   53,   54,   55,   56,   57,  146,  147,
+ 2096, 2097, 2072, 2073,   64,  65,   66,   67,   68,   69,   70,   71,   72,
+   73, 132,  133, 2112, 2113,  136,  137,   80,   81,   82,   83,   84,   85,
+   86,  87,   88,   89,  148,  149, 2128, 2129,  152,  153,   96,   97,   98,
+   99, 100,  101,  102,  103,  104,  105,  134,  135, 2144, 2145, 2184, 2185,
+  112, 113,  114,  115,  116,  117,  118,  119,  120,  121,  150,  151, 2160,
+ 2161, 2200, 2201,  256,  257, 258,  259,  260,  261,  262,  263,  264,  265,
+  384, 385, 2304, 2305, 2432, 2433,  272,  273,  274,  275,  276,  277,  278,
+  279, 280,  281,  400,  401, 2320, 2321, 2448, 2449,  288,  289,  290,  291,
+  292, 293,  294,  295,  296,  297,  386,  387, 2336, 2337, 2312, 2313,  304,
+  305, 306,  307,  308,  309,  310,  311,  312,  313,  402,  403, 2352, 2353,
+ 2328, 2329,  320,  321,  322, 323,  324,  325,  326,  327,  328,  329,  388,
+  389, 2368, 2369,  392,  393, 336,  337,  338,  339,  340,  341,  342,  343,
+  344, 345,  404,  405, 2384, 2385,  408,  409,  352,  353,  354,  355,  356,
+  357, 358,  359,  360,  361,  390,  391, 2400, 2401, 2440, 2441,  368,  369,
+  370, 371,  372,  373,  374,  375,  376,  377,  406,  407, 2416, 2417, 2456,
+ 2457, 512,  513,  514,  515,  516,  517,  518,  519,  520,  521,  640,  641,
+ 2050, 2051, 2178, 2179,  528, 529,  530,  531,  532,  533,  534,  535,  536,
+  537, 656,  657, 2066, 2067, 2194, 2195,  544,  545,  546,  547,  548,  549,
+  550, 551,  552,  553,  642,  643, 2082, 2083, 2088, 2089,  560,  561,  562,
+  563, 564,  565,  566,  567,  568,  569,  658,  659, 2098, 2099, 2104, 2105,
+  576, 577,  578,  579,  580,  581,  582,  583,  584,  585,  644,  645, 2114,
+ 2115, 648,  649,  592,  593,  594,  595,  596,  597,  598,  599,  600,  601,
+  660, 661, 2130, 2131,  664,  665,  608,  609,  610,  611,  612,  613,  614,
+  615, 616,  617,  646,  647, 2146, 2147, 2184, 2185,  624,  625,  626,  627,
+  628, 629,  630,  631,  632,  633,  662,  663, 2162, 2163, 2200, 2201,  768,
+  769, 770,  771,  772,  773,  774,  775,  776,  777,  896,  897, 2306, 2307,
+ 2434, 2435,  784,  785,  786, 787,  788,  789,  790,  791,  792,  793,  912,
+  913, 2322, 2323, 2450, 2451, 800,  801,  802,  803,  804,  805,  806,  807,
+  808, 809,  898,  899, 2338, 2339, 2344, 2345,  816,  817,  818,  819,  820,
+  821, 822,  823,  824,  825,  914,  915, 2354, 2355, 2360, 2361,  832,  833,
+  834, 835,  836,  837,  838,  839,  840,  841,  900,  901, 2370, 2371,  904,
+  905, 848,  849,  850,  851,  852,  853,  854,  855,  856,  857,  916,  917,
+ 2386, 2387,  920,  921,  864, 865,  866,  867,  868,  869,  870,  871,  872,
+  873, 902,  903, 2402, 2403, 2440, 2441,  880,  881,  882,  883,  884,  885,
+  886, 887,  888,  889,  918,  919, 2418, 2419, 2456, 2457, 1024, 1025, 1026,
+ 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1152, 1153, 2052, 2053, 2180, 2181,
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1168, 1169, 2068,
+ 2069, 2196, 2197, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065,
+ 1154, 1155, 2084, 2085, 2120, 2121, 1072, 1073, 1074, 1075, 1076, 1077, 1078,
+ 1079, 1080, 1081, 1170, 1171, 2100, 2101, 2136, 2137, 1088, 1089, 1090, 1091,
+ 1092, 1093, 1094, 1095, 1096, 1097, 1156, 1157, 2116, 2117, 1160, 1161, 1104,
+ 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1172, 1173, 2132, 2133,
+ 1176, 1177, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1158,
+ 1159, 2148, 2149, 2184, 2185, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143,
+ 1144, 1145, 1174, 1175, 2164, 2165, 2200, 2201, 1280, 1281, 1282, 1283, 1284,
+ 1285, 1286, 1287, 1288, 1289, 1408, 1409, 2308, 2309, 2436, 2437, 1296, 1297,
+ 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1424, 1425, 2324, 2325, 2452,
+ 2453, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1410, 1411,
+ 2340, 2341, 2376, 2377, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336,
+ 1337, 1426, 1427, 2356, 2357, 2392, 2393, 1344, 1345, 1346, 1347, 1348, 1349,
+ 1350, 1351, 1352, 1353, 1412, 1413, 2372, 2373, 1416, 1417, 1360, 1361, 1362,
+ 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1428, 1429, 2388, 2389, 1432, 1433,
+ 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1414, 1415, 2404,
+ 2405, 2440, 2441, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401,
+ 1430, 1431, 2420, 2421, 2456, 2457, 1536, 1537, 1538, 1539, 1540, 1541, 1542,
+ 1543, 1544, 1545, 1664, 1665, 2054, 2055, 2182, 2183, 1552, 1553, 1554, 1555,
+ 1556, 1557, 1558, 1559, 1560, 1561, 1680, 1681, 2070, 2071, 2198, 2199, 1568,
+ 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1666, 1667, 2086, 2087,
+ 2152, 2153, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1682,
+ 1683, 2102, 2103, 2168, 2169, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607,
+ 1608, 1609, 1668, 1669, 2118, 2119, 1672, 1673, 1616, 1617, 1618, 1619, 1620,
+ 1621, 1622, 1623, 1624, 1625, 1684, 1685, 2134, 2135, 1688, 1689, 1632, 1633,
+ 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1670, 1671, 2150, 2151, 2184,
+ 2185, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1686, 1687,
+ 2166, 2167, 2200, 2201, 1792, 1793, 1794, 1795, 1796, 1797, 1798, 1799, 1800,
+ 1801, 1920, 1921, 2310, 2311, 2438, 2439, 1808, 1809, 1810, 1811, 1812, 1813,
+ 1814, 1815, 1816, 1817, 1936, 1937, 2326, 2327, 2454, 2455, 1824, 1825, 1826,
+ 1827, 1828, 1829, 1830, 1831, 1832, 1833, 1922, 1923, 2342, 2343, 2408, 2409,
+ 1840, 1841, 1842, 1843, 1844, 1845, 1846, 1847, 1848, 1849, 1938, 1939, 2358,
+ 2359, 2424, 2425, 1856, 1857, 1858, 1859, 1860, 1861, 1862, 1863, 1864, 1865,
+ 1924, 1925, 2374, 2375, 1928, 1929, 1872, 1873, 1874, 1875, 1876, 1877, 1878,
+ 1879, 1880, 1881, 1940, 1941, 2390, 2391, 1944, 1945, 1888, 1889, 1890, 1891,
+ 1892, 1893, 1894, 1895, 1896, 1897, 1926, 1927, 2406, 2407, 2440, 2441, 1904,
+ 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1942, 1943, 2422, 2423,
+ 2456, 2457};
+#endif
+
+#if defined(DEC_BIN2DPD) && DEC_BIN2DPD==1 && !defined(DECBIN2DPD)
+#define DECBIN2DPD
+
+const uint16_t BIN2DPD[1000]={   0,    1,    2,    3,    4,    5,    6,    7,
+    8,   9,   16,   17,   18,   19,   20,   21,   22,   23,   24,   25,   32,
+   33,  34,   35,   36,   37,   38,   39,   40,   41,   48,   49,   50,   51,
+   52,  53,   54,   55,   56,   57,   64,   65,   66,   67,   68,   69,   70,
+   71,  72,   73,   80,   81,   82,   83,   84,   85,   86,   87,   88,   89,
+   96,  97,   98,   99,  100,  101,  102,  103,  104,  105,  112,  113,  114,
+  115, 116,  117,  118,  119,  120,  121,   10,   11,   42,   43,   74,   75,
+  106, 107,   78,   79,   26,   27,   58,   59,   90,   91,  122,  123,   94,
+   95, 128,  129,  130,  131,  132,  133,  134,  135,  136,  137,  144,  145,
+  146, 147,  148,  149,  150,  151,  152,  153,  160,  161,  162,  163,  164,
+  165, 166,  167,  168,  169,  176,  177,  178,  179,  180,  181,  182,  183,
+  184, 185,  192,  193,  194,  195,  196,  197,  198,  199,  200,  201,  208,
+  209, 210,  211,  212,  213,  214,  215,  216,  217,  224,  225,  226,  227,
+  228, 229,  230,  231,  232,  233,  240,  241,  242,  243,  244,  245,  246,
+  247, 248,  249,  138,  139,  170,  171,  202,  203,  234,  235,  206,  207,
+  154, 155,  186,  187,  218,  219,  250,  251,  222,  223,  256,  257,  258,
+  259, 260,  261,  262,  263,  264,  265,  272,  273,  274,  275,  276,  277,
+  278, 279,  280,  281,  288,  289,  290,  291,  292,  293,  294,  295,  296,
+  297, 304,  305,  306,  307,  308,  309,  310,  311,  312,  313,  320,  321,
+  322, 323,  324,  325,  326,  327,  328,  329,  336,  337,  338,  339,  340,
+  341, 342,  343,  344,  345,  352,  353,  354,  355,  356,  357,  358,  359,
+  360, 361,  368,  369,  370,  371,  372,  373,  374,  375,  376,  377,  266,
+  267, 298,  299,  330,  331,  362,  363,  334,  335,  282,  283,  314,  315,
+  346, 347,  378,  379,  350,  351,  384,  385,  386,  387,  388,  389,  390,
+  391, 392,  393,  400,  401,  402,  403,  404,  405,  406,  407,  408,  409,
+  416, 417,  418,  419,  420,  421,  422,  423,  424,  425,  432,  433,  434,
+  435, 436,  437,  438,  439,  440,  441,  448,  449,  450,  451,  452,  453,
+  454, 455,  456,  457,  464,  465,  466,  467,  468,  469,  470,  471,  472,
+  473, 480,  481,  482,  483,  484,  485,  486,  487,  488,  489,  496,  497,
+  498, 499,  500,  501,  502,  503,  504,  505,  394,  395,  426,  427,  458,
+  459, 490,  491,  462,  463,  410,  411,  442,  443,  474,  475,  506,  507,
+  478, 479,  512,  513,  514,  515,  516,  517,  518,  519,  520,  521,  528,
+  529, 530,  531,  532,  533,  534,  535,  536,  537,  544,  545,  546,  547,
+  548, 549,  550,  551,  552,  553,  560,  561,  562,  563,  564,  565,  566,
+  567, 568,  569,  576,  577,  578,  579,  580,  581,  582,  583,  584,  585,
+  592, 593,  594,  595,  596,  597,  598,  599,  600,  601,  608,  609,  610,
+  611, 612,  613,  614,  615,  616,  617,  624,  625,  626,  627,  628,  629,
+  630, 631,  632,  633,  522,  523,  554,  555,  586,  587,  618,  619,  590,
+  591, 538,  539,  570,  571,  602,  603,  634,  635,  606,  607,  640,  641,
+  642, 643,  644,  645,  646,  647,  648,  649,  656,  657,  658,  659,  660,
+  661, 662,  663,  664,  665,  672,  673,  674,  675,  676,  677,  678,  679,
+  680, 681,  688,  689,  690,  691,  692,  693,  694,  695,  696,  697,  704,
+  705, 706,  707,  708,  709,  710,  711,  712,  713,  720,  721,  722,  723,
+  724, 725,  726,  727,  728,  729,  736,  737,  738,  739,  740,  741,  742,
+  743, 744,  745,  752,  753,  754,  755,  756,  757,  758,  759,  760,  761,
+  650, 651,  682,  683,  714,  715,  746,  747,  718,  719,  666,  667,  698,
+  699, 730,  731,  762,  763,  734,  735,  768,  769,  770,  771,  772,  773,
+  774, 775,  776,  777,  784,  785,  786,  787,  788,  789,  790,  791,  792,
+  793, 800,  801,  802,  803,  804,  805,  806,  807,  808,  809,  816,  817,
+  818, 819,  820,  821,  822,  823,  824,  825,  832,  833,  834,  835,  836,
+  837, 838,  839,  840,  841,  848,  849,  850,  851,  852,  853,  854,  855,
+  856, 857,  864,  865,  866,  867,  868,  869,  870,  871,  872,  873,  880,
+  881, 882,  883,  884,  885,  886,  887,  888,  889,  778,  779,  810,  811,
+  842, 843,  874,  875,  846,  847,  794,  795,  826,  827,  858,  859,  890,
+  891, 862,  863,  896,  897,  898,  899,  900,  901,  902,  903,  904,  905,
+  912, 913,  914,  915,  916,  917,  918,  919,  920,  921,  928,  929,  930,
+  931, 932,  933,  934,  935,  936,  937,  944,  945,  946,  947,  948,  949,
+  950, 951,  952,  953,  960,  961,  962,  963,  964,  965,  966,  967,  968,
+  969, 976,  977,  978,  979,  980,  981,  982,  983,  984,  985,  992,  993,
+  994, 995,  996,  997,  998,  999, 1000, 1001, 1008, 1009, 1010, 1011, 1012,
+ 1013, 1014, 1015, 1016, 1017, 906,  907,  938,  939,  970,  971, 1002, 1003,
+  974, 975,  922,  923,  954,  955,  986,  987, 1018, 1019,  990,  991,   12,
+   13, 268,  269,  524,  525,  780,  781,   46,   47,   28,   29,  284,  285,
+  540, 541,  796,  797,   62,   63,   44,   45,  300,  301,  556,  557,  812,
+  813, 302,  303,   60,   61,  316,  317,  572,  573,  828,  829,  318,  319,
+   76,  77,  332,  333,  588,  589,  844,  845,  558,  559,   92,   93,  348,
+  349, 604,  605,  860,  861,  574,  575,  108,  109,  364,  365,  620,  621,
+  876, 877,  814,  815,  124,  125,  380,  381,  636,  637,  892,  893,  830,
+  831,  14,   15,  270,  271,  526,  527,  782,  783,  110,  111,   30,   31,
+  286, 287,  542,  543,  798,  799,  126,  127,  140,  141,  396,  397,  652,
+  653, 908,  909,  174,  175,  156,  157,  412,  413,  668,  669,  924,  925,
+  190, 191,  172,  173,  428,  429,  684,  685,  940,  941,  430,  431,  188,
+  189, 444,  445,  700,  701,  956,  957,  446,  447,  204,  205,  460,  461,
+  716, 717,  972,  973,  686,  687,  220,  221,  476,  477,  732,  733,  988,
+  989, 702,  703,  236,  237,  492,  493,  748,  749, 1004, 1005,  942,  943,
+  252, 253,  508,  509,  764,  765, 1020, 1021,  958,  959,  142,  143,  398,
+  399, 654,  655,  910,  911,  238,  239,  158,  159,  414,  415,  670,  671,
+  926, 927,  254,  255};
+#endif
+
+#if defined(DEC_DPD2BIN) && DEC_DPD2BIN==1 && !defined(DECDPD2BIN)
+#define DECDPD2BIN
+
+const uint16_t DPD2BIN[1024]={   0,    1,    2,    3,    4,    5,    6,    7,
+    8,   9,   80,   81,  800,  801,  880,  881,   10,   11,   12,   13,   14,
+   15,  16,   17,   18,   19,   90,   91,  810,  811,  890,  891,   20,   21,
+   22,  23,   24,   25,   26,   27,   28,   29,   82,   83,  820,  821,  808,
+  809,  30,   31,   32,   33,   34,   35,   36,   37,   38,   39,   92,   93,
+  830, 831,  818,  819,   40,   41,   42,   43,   44,   45,   46,   47,   48,
+   49,  84,   85,  840,  841,   88,   89,   50,   51,   52,   53,   54,   55,
+   56,  57,   58,   59,   94,   95,  850,  851,   98,   99,   60,   61,   62,
+   63,  64,   65,   66,   67,   68,   69,   86,   87,  860,  861,  888,  889,
+   70,  71,   72,   73,   74,   75,   76,   77,   78,   79,   96,   97,  870,
+  871, 898,  899,  100,  101,  102,  103,  104,  105,  106,  107,  108,  109,
+  180, 181,  900,  901,  980,  981,  110,  111,  112,  113,  114,  115,  116,
+  117, 118,  119,  190,  191,  910,  911,  990,  991,  120,  121,  122,  123,
+  124, 125,  126,  127,  128,  129,  182,  183,  920,  921,  908,  909,  130,
+  131, 132,  133,  134,  135,  136,  137,  138,  139,  192,  193,  930,  931,
+  918, 919,  140,  141,  142,  143,  144,  145,  146,  147,  148,  149,  184,
+  185, 940,  941,  188,  189,  150,  151,  152,  153,  154,  155,  156,  157,
+  158, 159,  194,  195,  950,  951,  198,  199,  160,  161,  162,  163,  164,
+  165, 166,  167,  168,  169,  186,  187,  960,  961,  988,  989,  170,  171,
+  172, 173,  174,  175,  176,  177,  178,  179,  196,  197,  970,  971,  998,
+  999, 200,  201,  202,  203,  204,  205,  206,  207,  208,  209,  280,  281,
+  802, 803,  882,  883,  210,  211,  212,  213,  214,  215,  216,  217,  218,
+  219, 290,  291,  812,  813,  892,  893,  220,  221,  222,  223,  224,  225,
+  226, 227,  228,  229,  282,  283,  822,  823,  828,  829,  230,  231,  232,
+  233, 234,  235,  236,  237,  238,  239,  292,  293,  832,  833,  838,  839,
+  240, 241,  242,  243,  244,  245,  246,  247,  248,  249,  284,  285,  842,
+  843, 288,  289,  250,  251,  252,  253,  254,  255,  256,  257,  258,  259,
+  294, 295,  852,  853,  298,  299,  260,  261,  262,  263,  264,  265,  266,
+  267, 268,  269,  286,  287,  862,  863,  888,  889,  270,  271,  272,  273,
+  274, 275,  276,  277,  278,  279,  296,  297,  872,  873,  898,  899,  300,
+  301, 302,  303,  304,  305,  306,  307,  308,  309,  380,  381,  902,  903,
+  982, 983,  310,  311,  312,  313,  314,  315,  316,  317,  318,  319,  390,
+  391, 912,  913,  992,  993,  320,  321,  322,  323,  324,  325,  326,  327,
+  328, 329,  382,  383,  922,  923,  928,  929,  330,  331,  332,  333,  334,
+  335, 336,  337,  338,  339,  392,  393,  932,  933,  938,  939,  340,  341,
+  342, 343,  344,  345,  346,  347,  348,  349,  384,  385,  942,  943,  388,
+  389, 350,  351,  352,  353,  354,  355,  356,  357,  358,  359,  394,  395,
+  952, 953,  398,  399,  360,  361,  362,  363,  364,  365,  366,  367,  368,
+  369, 386,  387,  962,  963,  988,  989,  370,  371,  372,  373,  374,  375,
+  376, 377,  378,  379,  396,  397,  972,  973,  998,  999,  400,  401,  402,
+  403, 404,  405,  406,  407,  408,  409,  480,  481,  804,  805,  884,  885,
+  410, 411,  412,  413,  414,  415,  416,  417,  418,  419,  490,  491,  814,
+  815, 894,  895,  420,  421,  422,  423,  424,  425,  426,  427,  428,  429,
+  482, 483,  824,  825,  848,  849,  430,  431,  432,  433,  434,  435,  436,
+  437, 438,  439,  492,  493,  834,  835,  858,  859,  440,  441,  442,  443,
+  444, 445,  446,  447,  448,  449,  484,  485,  844,  845,  488,  489,  450,
+  451, 452,  453,  454,  455,  456,  457,  458,  459,  494,  495,  854,  855,
+  498, 499,  460,  461,  462,  463,  464,  465,  466,  467,  468,  469,  486,
+  487, 864,  865,  888,  889,  470,  471,  472,  473,  474,  475,  476,  477,
+  478, 479,  496,  497,  874,  875,  898,  899,  500,  501,  502,  503,  504,
+  505, 506,  507,  508,  509,  580,  581,  904,  905,  984,  985,  510,  511,
+  512, 513,  514,  515,  516,  517,  518,  519,  590,  591,  914,  915,  994,
+  995, 520,  521,  522,  523,  524,  525,  526,  527,  528,  529,  582,  583,
+  924, 925,  948,  949,  530,  531,  532,  533,  534,  535,  536,  537,  538,
+  539, 592,  593,  934,  935,  958,  959,  540,  541,  542,  543,  544,  545,
+  546, 547,  548,  549,  584,  585,  944,  945,  588,  589,  550,  551,  552,
+  553, 554,  555,  556,  557,  558,  559,  594,  595,  954,  955,  598,  599,
+  560, 561,  562,  563,  564,  565,  566,  567,  568,  569,  586,  587,  964,
+  965, 988,  989,  570,  571,  572,  573,  574,  575,  576,  577,  578,  579,
+  596, 597,  974,  975,  998,  999,  600,  601,  602,  603,  604,  605,  606,
+  607, 608,  609,  680,  681,  806,  807,  886,  887,  610,  611,  612,  613,
+  614, 615,  616,  617,  618,  619,  690,  691,  816,  817,  896,  897,  620,
+  621, 622,  623,  624,  625,  626,  627,  628,  629,  682,  683,  826,  827,
+  868, 869,  630,  631,  632,  633,  634,  635,  636,  637,  638,  639,  692,
+  693, 836,  837,  878,  879,  640,  641,  642,  643,  644,  645,  646,  647,
+  648, 649,  684,  685,  846,  847,  688,  689,  650,  651,  652,  653,  654,
+  655, 656,  657,  658,  659,  694,  695,  856,  857,  698,  699,  660,  661,
+  662, 663,  664,  665,  666,  667,  668,  669,  686,  687,  866,  867,  888,
+  889, 670,  671,  672,  673,  674,  675,  676,  677,  678,  679,  696,  697,
+  876, 877,  898,  899,  700,  701,  702,  703,  704,  705,  706,  707,  708,
+  709, 780,  781,  906,  907,  986,  987,  710,  711,  712,  713,  714,  715,
+  716, 717,  718,  719,  790,  791,  916,  917,  996,  997,  720,  721,  722,
+  723, 724,  725,  726,  727,  728,  729,  782,  783,  926,  927,  968,  969,
+  730, 731,  732,  733,  734,  735,  736,  737,  738,  739,  792,  793,  936,
+  937, 978,  979,  740,  741,  742,  743,  744,  745,  746,  747,  748,  749,
+  784, 785,  946,  947,  788,  789,  750,  751,  752,  753,  754,  755,  756,
+  757, 758,  759,  794,  795,  956,  957,  798,  799,  760,  761,  762,  763,
+  764, 765,  766,  767,  768,  769,  786,  787,  966,  967,  988,  989,  770,
+  771, 772,  773,  774,  775,  776,  777,  778,  779,  796,  797,  976,  977,
+  998, 999};
+#endif
+
+#if defined(DEC_DPD2BINK) && DEC_DPD2BINK==1 && !defined(DECDPD2BINK)
+#define DECDPD2BINK
+
+const uint32_t DPD2BINK[1024]={              0,   1000,   2000,   3000,   4000,   5000,
+   6000,   7000,   8000,   9000,  80000,  81000, 800000, 801000, 880000, 881000,
+  10000,  11000,  12000,  13000,  14000,  15000,  16000,  17000,  18000,  19000,
+  90000,  91000, 810000, 811000, 890000, 891000,  20000,  21000,  22000,  23000,
+  24000,  25000,  26000,  27000,  28000,  29000,  82000,  83000, 820000, 821000,
+ 808000, 809000,  30000,  31000,  32000,  33000,  34000,  35000,  36000,  37000,
+  38000,  39000,  92000,  93000, 830000, 831000, 818000, 819000,  40000,  41000,
+  42000,  43000,  44000,  45000,  46000,  47000,  48000,  49000,  84000,  85000,
+ 840000, 841000,  88000,  89000,  50000,  51000,  52000,  53000,  54000,  55000,
+  56000,  57000,  58000,  59000,  94000,  95000, 850000, 851000,  98000,  99000,
+  60000,  61000,  62000,  63000,  64000,  65000,  66000,  67000,  68000,  69000,
+  86000,  87000, 860000, 861000, 888000, 889000,  70000,  71000,  72000,  73000,
+  74000,  75000,  76000,  77000,  78000,  79000,  96000,  97000, 870000, 871000,
+ 898000, 899000, 100000, 101000, 102000, 103000, 104000, 105000, 106000, 107000,
+ 108000, 109000, 180000, 181000, 900000, 901000, 980000, 981000, 110000, 111000,
+ 112000, 113000, 114000, 115000, 116000, 117000, 118000, 119000, 190000, 191000,
+ 910000, 911000, 990000, 991000, 120000, 121000, 122000, 123000, 124000, 125000,
+ 126000, 127000, 128000, 129000, 182000, 183000, 920000, 921000, 908000, 909000,
+ 130000, 131000, 132000, 133000, 134000, 135000, 136000, 137000, 138000, 139000,
+ 192000, 193000, 930000, 931000, 918000, 919000, 140000, 141000, 142000, 143000,
+ 144000, 145000, 146000, 147000, 148000, 149000, 184000, 185000, 940000, 941000,
+ 188000, 189000, 150000, 151000, 152000, 153000, 154000, 155000, 156000, 157000,
+ 158000, 159000, 194000, 195000, 950000, 951000, 198000, 199000, 160000, 161000,
+ 162000, 163000, 164000, 165000, 166000, 167000, 168000, 169000, 186000, 187000,
+ 960000, 961000, 988000, 989000, 170000, 171000, 172000, 173000, 174000, 175000,
+ 176000, 177000, 178000, 179000, 196000, 197000, 970000, 971000, 998000, 999000,
+ 200000, 201000, 202000, 203000, 204000, 205000, 206000, 207000, 208000, 209000,
+ 280000, 281000, 802000, 803000, 882000, 883000, 210000, 211000, 212000, 213000,
+ 214000, 215000, 216000, 217000, 218000, 219000, 290000, 291000, 812000, 813000,
+ 892000, 893000, 220000, 221000, 222000, 223000, 224000, 225000, 226000, 227000,
+ 228000, 229000, 282000, 283000, 822000, 823000, 828000, 829000, 230000, 231000,
+ 232000, 233000, 234000, 235000, 236000, 237000, 238000, 239000, 292000, 293000,
+ 832000, 833000, 838000, 839000, 240000, 241000, 242000, 243000, 244000, 245000,
+ 246000, 247000, 248000, 249000, 284000, 285000, 842000, 843000, 288000, 289000,
+ 250000, 251000, 252000, 253000, 254000, 255000, 256000, 257000, 258000, 259000,
+ 294000, 295000, 852000, 853000, 298000, 299000, 260000, 261000, 262000, 263000,
+ 264000, 265000, 266000, 267000, 268000, 269000, 286000, 287000, 862000, 863000,
+ 888000, 889000, 270000, 271000, 272000, 273000, 274000, 275000, 276000, 277000,
+ 278000, 279000, 296000, 297000, 872000, 873000, 898000, 899000, 300000, 301000,
+ 302000, 303000, 304000, 305000, 306000, 307000, 308000, 309000, 380000, 381000,
+ 902000, 903000, 982000, 983000, 310000, 311000, 312000, 313000, 314000, 315000,
+ 316000, 317000, 318000, 319000, 390000, 391000, 912000, 913000, 992000, 993000,
+ 320000, 321000, 322000, 323000, 324000, 325000, 326000, 327000, 328000, 329000,
+ 382000, 383000, 922000, 923000, 928000, 929000, 330000, 331000, 332000, 333000,
+ 334000, 335000, 336000, 337000, 338000, 339000, 392000, 393000, 932000, 933000,
+ 938000, 939000, 340000, 341000, 342000, 343000, 344000, 345000, 346000, 347000,
+ 348000, 349000, 384000, 385000, 942000, 943000, 388000, 389000, 350000, 351000,
+ 352000, 353000, 354000, 355000, 356000, 357000, 358000, 359000, 394000, 395000,
+ 952000, 953000, 398000, 399000, 360000, 361000, 362000, 363000, 364000, 365000,
+ 366000, 367000, 368000, 369000, 386000, 387000, 962000, 963000, 988000, 989000,
+ 370000, 371000, 372000, 373000, 374000, 375000, 376000, 377000, 378000, 379000,
+ 396000, 397000, 972000, 973000, 998000, 999000, 400000, 401000, 402000, 403000,
+ 404000, 405000, 406000, 407000, 408000, 409000, 480000, 481000, 804000, 805000,
+ 884000, 885000, 410000, 411000, 412000, 413000, 414000, 415000, 416000, 417000,
+ 418000, 419000, 490000, 491000, 814000, 815000, 894000, 895000, 420000, 421000,
+ 422000, 423000, 424000, 425000, 426000, 427000, 428000, 429000, 482000, 483000,
+ 824000, 825000, 848000, 849000, 430000, 431000, 432000, 433000, 434000, 435000,
+ 436000, 437000, 438000, 439000, 492000, 493000, 834000, 835000, 858000, 859000,
+ 440000, 441000, 442000, 443000, 444000, 445000, 446000, 447000, 448000, 449000,
+ 484000, 485000, 844000, 845000, 488000, 489000, 450000, 451000, 452000, 453000,
+ 454000, 455000, 456000, 457000, 458000, 459000, 494000, 495000, 854000, 855000,
+ 498000, 499000, 460000, 461000, 462000, 463000, 464000, 465000, 466000, 467000,
+ 468000, 469000, 486000, 487000, 864000, 865000, 888000, 889000, 470000, 471000,
+ 472000, 473000, 474000, 475000, 476000, 477000, 478000, 479000, 496000, 497000,
+ 874000, 875000, 898000, 899000, 500000, 501000, 502000, 503000, 504000, 505000,
+ 506000, 507000, 508000, 509000, 580000, 581000, 904000, 905000, 984000, 985000,
+ 510000, 511000, 512000, 513000, 514000, 515000, 516000, 517000, 518000, 519000,
+ 590000, 591000, 914000, 915000, 994000, 995000, 520000, 521000, 522000, 523000,
+ 524000, 525000, 526000, 527000, 528000, 529000, 582000, 583000, 924000, 925000,
+ 948000, 949000, 530000, 531000, 532000, 533000, 534000, 535000, 536000, 537000,
+ 538000, 539000, 592000, 593000, 934000, 935000, 958000, 959000, 540000, 541000,
+ 542000, 543000, 544000, 545000, 546000, 547000, 548000, 549000, 584000, 585000,
+ 944000, 945000, 588000, 589000, 550000, 551000, 552000, 553000, 554000, 555000,
+ 556000, 557000, 558000, 559000, 594000, 595000, 954000, 955000, 598000, 599000,
+ 560000, 561000, 562000, 563000, 564000, 565000, 566000, 567000, 568000, 569000,
+ 586000, 587000, 964000, 965000, 988000, 989000, 570000, 571000, 572000, 573000,
+ 574000, 575000, 576000, 577000, 578000, 579000, 596000, 597000, 974000, 975000,
+ 998000, 999000, 600000, 601000, 602000, 603000, 604000, 605000, 606000, 607000,
+ 608000, 609000, 680000, 681000, 806000, 807000, 886000, 887000, 610000, 611000,
+ 612000, 613000, 614000, 615000, 616000, 617000, 618000, 619000, 690000, 691000,
+ 816000, 817000, 896000, 897000, 620000, 621000, 622000, 623000, 624000, 625000,
+ 626000, 627000, 628000, 629000, 682000, 683000, 826000, 827000, 868000, 869000,
+ 630000, 631000, 632000, 633000, 634000, 635000, 636000, 637000, 638000, 639000,
+ 692000, 693000, 836000, 837000, 878000, 879000, 640000, 641000, 642000, 643000,
+ 644000, 645000, 646000, 647000, 648000, 649000, 684000, 685000, 846000, 847000,
+ 688000, 689000, 650000, 651000, 652000, 653000, 654000, 655000, 656000, 657000,
+ 658000, 659000, 694000, 695000, 856000, 857000, 698000, 699000, 660000, 661000,
+ 662000, 663000, 664000, 665000, 666000, 667000, 668000, 669000, 686000, 687000,
+ 866000, 867000, 888000, 889000, 670000, 671000, 672000, 673000, 674000, 675000,
+ 676000, 677000, 678000, 679000, 696000, 697000, 876000, 877000, 898000, 899000,
+ 700000, 701000, 702000, 703000, 704000, 705000, 706000, 707000, 708000, 709000,
+ 780000, 781000, 906000, 907000, 986000, 987000, 710000, 711000, 712000, 713000,
+ 714000, 715000, 716000, 717000, 718000, 719000, 790000, 791000, 916000, 917000,
+ 996000, 997000, 720000, 721000, 722000, 723000, 724000, 725000, 726000, 727000,
+ 728000, 729000, 782000, 783000, 926000, 927000, 968000, 969000, 730000, 731000,
+ 732000, 733000, 734000, 735000, 736000, 737000, 738000, 739000, 792000, 793000,
+ 936000, 937000, 978000, 979000, 740000, 741000, 742000, 743000, 744000, 745000,
+ 746000, 747000, 748000, 749000, 784000, 785000, 946000, 947000, 788000, 789000,
+ 750000, 751000, 752000, 753000, 754000, 755000, 756000, 757000, 758000, 759000,
+ 794000, 795000, 956000, 957000, 798000, 799000, 760000, 761000, 762000, 763000,
+ 764000, 765000, 766000, 767000, 768000, 769000, 786000, 787000, 966000, 967000,
+ 988000, 989000, 770000, 771000, 772000, 773000, 774000, 775000, 776000, 777000,
+ 778000, 779000, 796000, 797000, 976000, 977000, 998000, 999000};
+#endif
+
+#if defined(DEC_DPD2BINM) && DEC_DPD2BINM==1 && !defined(DECDPD2BINM)
+#define DECDPD2BINM
+
+const uint32_t DPD2BINM[1024]={0,   1000000,   2000000,          3000000,   4000000,
+   5000000,   6000000,  7000000,   8000000,   9000000,  80000000,  81000000,
+ 800000000, 801000000, 880000000, 881000000,  10000000,         11000000,  12000000,
+  13000000,  14000000, 15000000,  16000000,  17000000,  18000000,  19000000,
+  90000000,  91000000, 810000000, 811000000, 890000000, 891000000,  20000000,
+  21000000,  22000000, 23000000,  24000000,  25000000,  26000000,  27000000,
+  28000000,  29000000, 82000000,  83000000, 820000000, 821000000, 808000000,
+ 809000000,  30000000, 31000000,  32000000,  33000000,  34000000,  35000000,
+  36000000,  37000000, 38000000,  39000000,  92000000,  93000000, 830000000,
+ 831000000, 818000000, 819000000,  40000000,  41000000,         42000000,  43000000,
+  44000000,  45000000, 46000000,  47000000,  48000000,  49000000,  84000000,
+  85000000, 840000000, 841000000,  88000000,  89000000,         50000000,  51000000,
+  52000000,  53000000, 54000000,  55000000,  56000000,  57000000,  58000000,
+  59000000,  94000000, 95000000, 850000000, 851000000,  98000000,  99000000,
+  60000000,  61000000, 62000000,  63000000,  64000000,  65000000,  66000000,
+  67000000,  68000000, 69000000,  86000000,  87000000, 860000000, 861000000,
+ 888000000, 889000000, 70000000,  71000000,  72000000,  73000000,  74000000,
+  75000000,  76000000, 77000000,  78000000,  79000000,  96000000,  97000000,
+ 870000000, 871000000, 898000000, 899000000, 100000000, 101000000, 102000000,
+ 103000000, 104000000, 105000000, 106000000, 107000000, 108000000, 109000000,
+ 180000000, 181000000, 900000000, 901000000, 980000000, 981000000, 110000000,
+ 111000000, 112000000, 113000000, 114000000, 115000000, 116000000, 117000000,
+ 118000000, 119000000, 190000000, 191000000, 910000000, 911000000, 990000000,
+ 991000000, 120000000, 121000000, 122000000, 123000000, 124000000, 125000000,
+ 126000000, 127000000, 128000000, 129000000, 182000000, 183000000, 920000000,
+ 921000000, 908000000, 909000000, 130000000, 131000000, 132000000, 133000000,
+ 134000000, 135000000, 136000000, 137000000, 138000000, 139000000, 192000000,
+ 193000000, 930000000, 931000000, 918000000, 919000000, 140000000, 141000000,
+ 142000000, 143000000, 144000000, 145000000, 146000000, 147000000, 148000000,
+ 149000000, 184000000, 185000000, 940000000, 941000000, 188000000, 189000000,
+ 150000000, 151000000, 152000000, 153000000, 154000000, 155000000, 156000000,
+ 157000000, 158000000, 159000000, 194000000, 195000000, 950000000, 951000000,
+ 198000000, 199000000, 160000000, 161000000, 162000000, 163000000, 164000000,
+ 165000000, 166000000, 167000000, 168000000, 169000000, 186000000, 187000000,
+ 960000000, 961000000, 988000000, 989000000, 170000000, 171000000, 172000000,
+ 173000000, 174000000, 175000000, 176000000, 177000000, 178000000, 179000000,
+ 196000000, 197000000, 970000000, 971000000, 998000000, 999000000, 200000000,
+ 201000000, 202000000, 203000000, 204000000, 205000000, 206000000, 207000000,
+ 208000000, 209000000, 280000000, 281000000, 802000000, 803000000, 882000000,
+ 883000000, 210000000, 211000000, 212000000, 213000000, 214000000, 215000000,
+ 216000000, 217000000, 218000000, 219000000, 290000000, 291000000, 812000000,
+ 813000000, 892000000, 893000000, 220000000, 221000000, 222000000, 223000000,
+ 224000000, 225000000, 226000000, 227000000, 228000000, 229000000, 282000000,
+ 283000000, 822000000, 823000000, 828000000, 829000000, 230000000, 231000000,
+ 232000000, 233000000, 234000000, 235000000, 236000000, 237000000, 238000000,
+ 239000000, 292000000, 293000000, 832000000, 833000000, 838000000, 839000000,
+ 240000000, 241000000, 242000000, 243000000, 244000000, 245000000, 246000000,
+ 247000000, 248000000, 249000000, 284000000, 285000000, 842000000, 843000000,
+ 288000000, 289000000, 250000000, 251000000, 252000000, 253000000, 254000000,
+ 255000000, 256000000, 257000000, 258000000, 259000000, 294000000, 295000000,
+ 852000000, 853000000, 298000000, 299000000, 260000000, 261000000, 262000000,
+ 263000000, 264000000, 265000000, 266000000, 267000000, 268000000, 269000000,
+ 286000000, 287000000, 862000000, 863000000, 888000000, 889000000, 270000000,
+ 271000000, 272000000, 273000000, 274000000, 275000000, 276000000, 277000000,
+ 278000000, 279000000, 296000000, 297000000, 872000000, 873000000, 898000000,
+ 899000000, 300000000, 301000000, 302000000, 303000000, 304000000, 305000000,
+ 306000000, 307000000, 308000000, 309000000, 380000000, 381000000, 902000000,
+ 903000000, 982000000, 983000000, 310000000, 311000000, 312000000, 313000000,
+ 314000000, 315000000, 316000000, 317000000, 318000000, 319000000, 390000000,
+ 391000000, 912000000, 913000000, 992000000, 993000000, 320000000, 321000000,
+ 322000000, 323000000, 324000000, 325000000, 326000000, 327000000, 328000000,
+ 329000000, 382000000, 383000000, 922000000, 923000000, 928000000, 929000000,
+ 330000000, 331000000, 332000000, 333000000, 334000000, 335000000, 336000000,
+ 337000000, 338000000, 339000000, 392000000, 393000000, 932000000, 933000000,
+ 938000000, 939000000, 340000000, 341000000, 342000000, 343000000, 344000000,
+ 345000000, 346000000, 347000000, 348000000, 349000000, 384000000, 385000000,
+ 942000000, 943000000, 388000000, 389000000, 350000000, 351000000, 352000000,
+ 353000000, 354000000, 355000000, 356000000, 357000000, 358000000, 359000000,
+ 394000000, 395000000, 952000000, 953000000, 398000000, 399000000, 360000000,
+ 361000000, 362000000, 363000000, 364000000, 365000000, 366000000, 367000000,
+ 368000000, 369000000, 386000000, 387000000, 962000000, 963000000, 988000000,
+ 989000000, 370000000, 371000000, 372000000, 373000000, 374000000, 375000000,
+ 376000000, 377000000, 378000000, 379000000, 396000000, 397000000, 972000000,
+ 973000000, 998000000, 999000000, 400000000, 401000000, 402000000, 403000000,
+ 404000000, 405000000, 406000000, 407000000, 408000000, 409000000, 480000000,
+ 481000000, 804000000, 805000000, 884000000, 885000000, 410000000, 411000000,
+ 412000000, 413000000, 414000000, 415000000, 416000000, 417000000, 418000000,
+ 419000000, 490000000, 491000000, 814000000, 815000000, 894000000, 895000000,
+ 420000000, 421000000, 422000000, 423000000, 424000000, 425000000, 426000000,
+ 427000000, 428000000, 429000000, 482000000, 483000000, 824000000, 825000000,
+ 848000000, 849000000, 430000000, 431000000, 432000000, 433000000, 434000000,
+ 435000000, 436000000, 437000000, 438000000, 439000000, 492000000, 493000000,
+ 834000000, 835000000, 858000000, 859000000, 440000000, 441000000, 442000000,
+ 443000000, 444000000, 445000000, 446000000, 447000000, 448000000, 449000000,
+ 484000000, 485000000, 844000000, 845000000, 488000000, 489000000, 450000000,
+ 451000000, 452000000, 453000000, 454000000, 455000000, 456000000, 457000000,
+ 458000000, 459000000, 494000000, 495000000, 854000000, 855000000, 498000000,
+ 499000000, 460000000, 461000000, 462000000, 463000000, 464000000, 465000000,
+ 466000000, 467000000, 468000000, 469000000, 486000000, 487000000, 864000000,
+ 865000000, 888000000, 889000000, 470000000, 471000000, 472000000, 473000000,
+ 474000000, 475000000, 476000000, 477000000, 478000000, 479000000, 496000000,
+ 497000000, 874000000, 875000000, 898000000, 899000000, 500000000, 501000000,
+ 502000000, 503000000, 504000000, 505000000, 506000000, 507000000, 508000000,
+ 509000000, 580000000, 581000000, 904000000, 905000000, 984000000, 985000000,
+ 510000000, 511000000, 512000000, 513000000, 514000000, 515000000, 516000000,
+ 517000000, 518000000, 519000000, 590000000, 591000000, 914000000, 915000000,
+ 994000000, 995000000, 520000000, 521000000, 522000000, 523000000, 524000000,
+ 525000000, 526000000, 527000000, 528000000, 529000000, 582000000, 583000000,
+ 924000000, 925000000, 948000000, 949000000, 530000000, 531000000, 532000000,
+ 533000000, 534000000, 535000000, 536000000, 537000000, 538000000, 539000000,
+ 592000000, 593000000, 934000000, 935000000, 958000000, 959000000, 540000000,
+ 541000000, 542000000, 543000000, 544000000, 545000000, 546000000, 547000000,
+ 548000000, 549000000, 584000000, 585000000, 944000000, 945000000, 588000000,
+ 589000000, 550000000, 551000000, 552000000, 553000000, 554000000, 555000000,
+ 556000000, 557000000, 558000000, 559000000, 594000000, 595000000, 954000000,
+ 955000000, 598000000, 599000000, 560000000, 561000000, 562000000, 563000000,
+ 564000000, 565000000, 566000000, 567000000, 568000000, 569000000, 586000000,
+ 587000000, 964000000, 965000000, 988000000, 989000000, 570000000, 571000000,
+ 572000000, 573000000, 574000000, 575000000, 576000000, 577000000, 578000000,
+ 579000000, 596000000, 597000000, 974000000, 975000000, 998000000, 999000000,
+ 600000000, 601000000, 602000000, 603000000, 604000000, 605000000, 606000000,
+ 607000000, 608000000, 609000000, 680000000, 681000000, 806000000, 807000000,
+ 886000000, 887000000, 610000000, 611000000, 612000000, 613000000, 614000000,
+ 615000000, 616000000, 617000000, 618000000, 619000000, 690000000, 691000000,
+ 816000000, 817000000, 896000000, 897000000, 620000000, 621000000, 622000000,
+ 623000000, 624000000, 625000000, 626000000, 627000000, 628000000, 629000000,
+ 682000000, 683000000, 826000000, 827000000, 868000000, 869000000, 630000000,
+ 631000000, 632000000, 633000000, 634000000, 635000000, 636000000, 637000000,
+ 638000000, 639000000, 692000000, 693000000, 836000000, 837000000, 878000000,
+ 879000000, 640000000, 641000000, 642000000, 643000000, 644000000, 645000000,
+ 646000000, 647000000, 648000000, 649000000, 684000000, 685000000, 846000000,
+ 847000000, 688000000, 689000000, 650000000, 651000000, 652000000, 653000000,
+ 654000000, 655000000, 656000000, 657000000, 658000000, 659000000, 694000000,
+ 695000000, 856000000, 857000000, 698000000, 699000000, 660000000, 661000000,
+ 662000000, 663000000, 664000000, 665000000, 666000000, 667000000, 668000000,
+ 669000000, 686000000, 687000000, 866000000, 867000000, 888000000, 889000000,
+ 670000000, 671000000, 672000000, 673000000, 674000000, 675000000, 676000000,
+ 677000000, 678000000, 679000000, 696000000, 697000000, 876000000, 877000000,
+ 898000000, 899000000, 700000000, 701000000, 702000000, 703000000, 704000000,
+ 705000000, 706000000, 707000000, 708000000, 709000000, 780000000, 781000000,
+ 906000000, 907000000, 986000000, 987000000, 710000000, 711000000, 712000000,
+ 713000000, 714000000, 715000000, 716000000, 717000000, 718000000, 719000000,
+ 790000000, 791000000, 916000000, 917000000, 996000000, 997000000, 720000000,
+ 721000000, 722000000, 723000000, 724000000, 725000000, 726000000, 727000000,
+ 728000000, 729000000, 782000000, 783000000, 926000000, 927000000, 968000000,
+ 969000000, 730000000, 731000000, 732000000, 733000000, 734000000, 735000000,
+ 736000000, 737000000, 738000000, 739000000, 792000000, 793000000, 936000000,
+ 937000000, 978000000, 979000000, 740000000, 741000000, 742000000, 743000000,
+ 744000000, 745000000, 746000000, 747000000, 748000000, 749000000, 784000000,
+ 785000000, 946000000, 947000000, 788000000, 789000000, 750000000, 751000000,
+ 752000000, 753000000, 754000000, 755000000, 756000000, 757000000, 758000000,
+ 759000000, 794000000, 795000000, 956000000, 957000000, 798000000, 799000000,
+ 760000000, 761000000, 762000000, 763000000, 764000000, 765000000, 766000000,
+ 767000000, 768000000, 769000000, 786000000, 787000000, 966000000, 967000000,
+ 988000000, 989000000, 770000000, 771000000, 772000000, 773000000, 774000000,
+ 775000000, 776000000, 777000000, 778000000, 779000000, 796000000, 797000000,
+ 976000000, 977000000, 998000000, 999000000};
+#endif
+
+#if defined(DEC_BIN2CHAR) && DEC_BIN2CHAR==1 && !defined(DECBIN2CHAR)
+#define DECBIN2CHAR
+
+const uint8_t BIN2CHAR[4001]={
+ '\0','0','0','0', '\1','0','0','1', '\1','0','0','2', '\1','0','0','3', '\1','0','0','4',
+ '\1','0','0','5', '\1','0','0','6', '\1','0','0','7', '\1','0','0','8', '\1','0','0','9',
+ '\2','0','1','0', '\2','0','1','1', '\2','0','1','2', '\2','0','1','3', '\2','0','1','4',
+ '\2','0','1','5', '\2','0','1','6', '\2','0','1','7', '\2','0','1','8', '\2','0','1','9',
+ '\2','0','2','0', '\2','0','2','1', '\2','0','2','2', '\2','0','2','3', '\2','0','2','4',
+ '\2','0','2','5', '\2','0','2','6', '\2','0','2','7', '\2','0','2','8', '\2','0','2','9',
+ '\2','0','3','0', '\2','0','3','1', '\2','0','3','2', '\2','0','3','3', '\2','0','3','4',
+ '\2','0','3','5', '\2','0','3','6', '\2','0','3','7', '\2','0','3','8', '\2','0','3','9',
+ '\2','0','4','0', '\2','0','4','1', '\2','0','4','2', '\2','0','4','3', '\2','0','4','4',
+ '\2','0','4','5', '\2','0','4','6', '\2','0','4','7', '\2','0','4','8', '\2','0','4','9',
+ '\2','0','5','0', '\2','0','5','1', '\2','0','5','2', '\2','0','5','3', '\2','0','5','4',
+ '\2','0','5','5', '\2','0','5','6', '\2','0','5','7', '\2','0','5','8', '\2','0','5','9',
+ '\2','0','6','0', '\2','0','6','1', '\2','0','6','2', '\2','0','6','3', '\2','0','6','4',
+ '\2','0','6','5', '\2','0','6','6', '\2','0','6','7', '\2','0','6','8', '\2','0','6','9',
+ '\2','0','7','0', '\2','0','7','1', '\2','0','7','2', '\2','0','7','3', '\2','0','7','4',
+ '\2','0','7','5', '\2','0','7','6', '\2','0','7','7', '\2','0','7','8', '\2','0','7','9',
+ '\2','0','8','0', '\2','0','8','1', '\2','0','8','2', '\2','0','8','3', '\2','0','8','4',
+ '\2','0','8','5', '\2','0','8','6', '\2','0','8','7', '\2','0','8','8', '\2','0','8','9',
+ '\2','0','9','0', '\2','0','9','1', '\2','0','9','2', '\2','0','9','3', '\2','0','9','4',
+ '\2','0','9','5', '\2','0','9','6', '\2','0','9','7', '\2','0','9','8', '\2','0','9','9',
+ '\3','1','0','0', '\3','1','0','1', '\3','1','0','2', '\3','1','0','3', '\3','1','0','4',
+ '\3','1','0','5', '\3','1','0','6', '\3','1','0','7', '\3','1','0','8', '\3','1','0','9',
+ '\3','1','1','0', '\3','1','1','1', '\3','1','1','2', '\3','1','1','3', '\3','1','1','4',
+ '\3','1','1','5', '\3','1','1','6', '\3','1','1','7', '\3','1','1','8', '\3','1','1','9',
+ '\3','1','2','0', '\3','1','2','1', '\3','1','2','2', '\3','1','2','3', '\3','1','2','4',
+ '\3','1','2','5', '\3','1','2','6', '\3','1','2','7', '\3','1','2','8', '\3','1','2','9',
+ '\3','1','3','0', '\3','1','3','1', '\3','1','3','2', '\3','1','3','3', '\3','1','3','4',
+ '\3','1','3','5', '\3','1','3','6', '\3','1','3','7', '\3','1','3','8', '\3','1','3','9',
+ '\3','1','4','0', '\3','1','4','1', '\3','1','4','2', '\3','1','4','3', '\3','1','4','4',
+ '\3','1','4','5', '\3','1','4','6', '\3','1','4','7', '\3','1','4','8', '\3','1','4','9',
+ '\3','1','5','0', '\3','1','5','1', '\3','1','5','2', '\3','1','5','3', '\3','1','5','4',
+ '\3','1','5','5', '\3','1','5','6', '\3','1','5','7', '\3','1','5','8', '\3','1','5','9',
+ '\3','1','6','0', '\3','1','6','1', '\3','1','6','2', '\3','1','6','3', '\3','1','6','4',
+ '\3','1','6','5', '\3','1','6','6', '\3','1','6','7', '\3','1','6','8', '\3','1','6','9',
+ '\3','1','7','0', '\3','1','7','1', '\3','1','7','2', '\3','1','7','3', '\3','1','7','4',
+ '\3','1','7','5', '\3','1','7','6', '\3','1','7','7', '\3','1','7','8', '\3','1','7','9',
+ '\3','1','8','0', '\3','1','8','1', '\3','1','8','2', '\3','1','8','3', '\3','1','8','4',
+ '\3','1','8','5', '\3','1','8','6', '\3','1','8','7', '\3','1','8','8', '\3','1','8','9',
+ '\3','1','9','0', '\3','1','9','1', '\3','1','9','2', '\3','1','9','3', '\3','1','9','4',
+ '\3','1','9','5', '\3','1','9','6', '\3','1','9','7', '\3','1','9','8', '\3','1','9','9',
+ '\3','2','0','0', '\3','2','0','1', '\3','2','0','2', '\3','2','0','3', '\3','2','0','4',
+ '\3','2','0','5', '\3','2','0','6', '\3','2','0','7', '\3','2','0','8', '\3','2','0','9',
+ '\3','2','1','0', '\3','2','1','1', '\3','2','1','2', '\3','2','1','3', '\3','2','1','4',
+ '\3','2','1','5', '\3','2','1','6', '\3','2','1','7', '\3','2','1','8', '\3','2','1','9',
+ '\3','2','2','0', '\3','2','2','1', '\3','2','2','2', '\3','2','2','3', '\3','2','2','4',
+ '\3','2','2','5', '\3','2','2','6', '\3','2','2','7', '\3','2','2','8', '\3','2','2','9',
+ '\3','2','3','0', '\3','2','3','1', '\3','2','3','2', '\3','2','3','3', '\3','2','3','4',
+ '\3','2','3','5', '\3','2','3','6', '\3','2','3','7', '\3','2','3','8', '\3','2','3','9',
+ '\3','2','4','0', '\3','2','4','1', '\3','2','4','2', '\3','2','4','3', '\3','2','4','4',
+ '\3','2','4','5', '\3','2','4','6', '\3','2','4','7', '\3','2','4','8', '\3','2','4','9',
+ '\3','2','5','0', '\3','2','5','1', '\3','2','5','2', '\3','2','5','3', '\3','2','5','4',
+ '\3','2','5','5', '\3','2','5','6', '\3','2','5','7', '\3','2','5','8', '\3','2','5','9',
+ '\3','2','6','0', '\3','2','6','1', '\3','2','6','2', '\3','2','6','3', '\3','2','6','4',
+ '\3','2','6','5', '\3','2','6','6', '\3','2','6','7', '\3','2','6','8', '\3','2','6','9',
+ '\3','2','7','0', '\3','2','7','1', '\3','2','7','2', '\3','2','7','3', '\3','2','7','4',
+ '\3','2','7','5', '\3','2','7','6', '\3','2','7','7', '\3','2','7','8', '\3','2','7','9',
+ '\3','2','8','0', '\3','2','8','1', '\3','2','8','2', '\3','2','8','3', '\3','2','8','4',
+ '\3','2','8','5', '\3','2','8','6', '\3','2','8','7', '\3','2','8','8', '\3','2','8','9',
+ '\3','2','9','0', '\3','2','9','1', '\3','2','9','2', '\3','2','9','3', '\3','2','9','4',
+ '\3','2','9','5', '\3','2','9','6', '\3','2','9','7', '\3','2','9','8', '\3','2','9','9',
+ '\3','3','0','0', '\3','3','0','1', '\3','3','0','2', '\3','3','0','3', '\3','3','0','4',
+ '\3','3','0','5', '\3','3','0','6', '\3','3','0','7', '\3','3','0','8', '\3','3','0','9',
+ '\3','3','1','0', '\3','3','1','1', '\3','3','1','2', '\3','3','1','3', '\3','3','1','4',
+ '\3','3','1','5', '\3','3','1','6', '\3','3','1','7', '\3','3','1','8', '\3','3','1','9',
+ '\3','3','2','0', '\3','3','2','1', '\3','3','2','2', '\3','3','2','3', '\3','3','2','4',
+ '\3','3','2','5', '\3','3','2','6', '\3','3','2','7', '\3','3','2','8', '\3','3','2','9',
+ '\3','3','3','0', '\3','3','3','1', '\3','3','3','2', '\3','3','3','3', '\3','3','3','4',
+ '\3','3','3','5', '\3','3','3','6', '\3','3','3','7', '\3','3','3','8', '\3','3','3','9',
+ '\3','3','4','0', '\3','3','4','1', '\3','3','4','2', '\3','3','4','3', '\3','3','4','4',
+ '\3','3','4','5', '\3','3','4','6', '\3','3','4','7', '\3','3','4','8', '\3','3','4','9',
+ '\3','3','5','0', '\3','3','5','1', '\3','3','5','2', '\3','3','5','3', '\3','3','5','4',
+ '\3','3','5','5', '\3','3','5','6', '\3','3','5','7', '\3','3','5','8', '\3','3','5','9',
+ '\3','3','6','0', '\3','3','6','1', '\3','3','6','2', '\3','3','6','3', '\3','3','6','4',
+ '\3','3','6','5', '\3','3','6','6', '\3','3','6','7', '\3','3','6','8', '\3','3','6','9',
+ '\3','3','7','0', '\3','3','7','1', '\3','3','7','2', '\3','3','7','3', '\3','3','7','4',
+ '\3','3','7','5', '\3','3','7','6', '\3','3','7','7', '\3','3','7','8', '\3','3','7','9',
+ '\3','3','8','0', '\3','3','8','1', '\3','3','8','2', '\3','3','8','3', '\3','3','8','4',
+ '\3','3','8','5', '\3','3','8','6', '\3','3','8','7', '\3','3','8','8', '\3','3','8','9',
+ '\3','3','9','0', '\3','3','9','1', '\3','3','9','2', '\3','3','9','3', '\3','3','9','4',
+ '\3','3','9','5', '\3','3','9','6', '\3','3','9','7', '\3','3','9','8', '\3','3','9','9',
+ '\3','4','0','0', '\3','4','0','1', '\3','4','0','2', '\3','4','0','3', '\3','4','0','4',
+ '\3','4','0','5', '\3','4','0','6', '\3','4','0','7', '\3','4','0','8', '\3','4','0','9',
+ '\3','4','1','0', '\3','4','1','1', '\3','4','1','2', '\3','4','1','3', '\3','4','1','4',
+ '\3','4','1','5', '\3','4','1','6', '\3','4','1','7', '\3','4','1','8', '\3','4','1','9',
+ '\3','4','2','0', '\3','4','2','1', '\3','4','2','2', '\3','4','2','3', '\3','4','2','4',
+ '\3','4','2','5', '\3','4','2','6', '\3','4','2','7', '\3','4','2','8', '\3','4','2','9',
+ '\3','4','3','0', '\3','4','3','1', '\3','4','3','2', '\3','4','3','3', '\3','4','3','4',
+ '\3','4','3','5', '\3','4','3','6', '\3','4','3','7', '\3','4','3','8', '\3','4','3','9',
+ '\3','4','4','0', '\3','4','4','1', '\3','4','4','2', '\3','4','4','3', '\3','4','4','4',
+ '\3','4','4','5', '\3','4','4','6', '\3','4','4','7', '\3','4','4','8', '\3','4','4','9',
+ '\3','4','5','0', '\3','4','5','1', '\3','4','5','2', '\3','4','5','3', '\3','4','5','4',
+ '\3','4','5','5', '\3','4','5','6', '\3','4','5','7', '\3','4','5','8', '\3','4','5','9',
+ '\3','4','6','0', '\3','4','6','1', '\3','4','6','2', '\3','4','6','3', '\3','4','6','4',
+ '\3','4','6','5', '\3','4','6','6', '\3','4','6','7', '\3','4','6','8', '\3','4','6','9',
+ '\3','4','7','0', '\3','4','7','1', '\3','4','7','2', '\3','4','7','3', '\3','4','7','4',
+ '\3','4','7','5', '\3','4','7','6', '\3','4','7','7', '\3','4','7','8', '\3','4','7','9',
+ '\3','4','8','0', '\3','4','8','1', '\3','4','8','2', '\3','4','8','3', '\3','4','8','4',
+ '\3','4','8','5', '\3','4','8','6', '\3','4','8','7', '\3','4','8','8', '\3','4','8','9',
+ '\3','4','9','0', '\3','4','9','1', '\3','4','9','2', '\3','4','9','3', '\3','4','9','4',
+ '\3','4','9','5', '\3','4','9','6', '\3','4','9','7', '\3','4','9','8', '\3','4','9','9',
+ '\3','5','0','0', '\3','5','0','1', '\3','5','0','2', '\3','5','0','3', '\3','5','0','4',
+ '\3','5','0','5', '\3','5','0','6', '\3','5','0','7', '\3','5','0','8', '\3','5','0','9',
+ '\3','5','1','0', '\3','5','1','1', '\3','5','1','2', '\3','5','1','3', '\3','5','1','4',
+ '\3','5','1','5', '\3','5','1','6', '\3','5','1','7', '\3','5','1','8', '\3','5','1','9',
+ '\3','5','2','0', '\3','5','2','1', '\3','5','2','2', '\3','5','2','3', '\3','5','2','4',
+ '\3','5','2','5', '\3','5','2','6', '\3','5','2','7', '\3','5','2','8', '\3','5','2','9',
+ '\3','5','3','0', '\3','5','3','1', '\3','5','3','2', '\3','5','3','3', '\3','5','3','4',
+ '\3','5','3','5', '\3','5','3','6', '\3','5','3','7', '\3','5','3','8', '\3','5','3','9',
+ '\3','5','4','0', '\3','5','4','1', '\3','5','4','2', '\3','5','4','3', '\3','5','4','4',
+ '\3','5','4','5', '\3','5','4','6', '\3','5','4','7', '\3','5','4','8', '\3','5','4','9',
+ '\3','5','5','0', '\3','5','5','1', '\3','5','5','2', '\3','5','5','3', '\3','5','5','4',
+ '\3','5','5','5', '\3','5','5','6', '\3','5','5','7', '\3','5','5','8', '\3','5','5','9',
+ '\3','5','6','0', '\3','5','6','1', '\3','5','6','2', '\3','5','6','3', '\3','5','6','4',
+ '\3','5','6','5', '\3','5','6','6', '\3','5','6','7', '\3','5','6','8', '\3','5','6','9',
+ '\3','5','7','0', '\3','5','7','1', '\3','5','7','2', '\3','5','7','3', '\3','5','7','4',
+ '\3','5','7','5', '\3','5','7','6', '\3','5','7','7', '\3','5','7','8', '\3','5','7','9',
+ '\3','5','8','0', '\3','5','8','1', '\3','5','8','2', '\3','5','8','3', '\3','5','8','4',
+ '\3','5','8','5', '\3','5','8','6', '\3','5','8','7', '\3','5','8','8', '\3','5','8','9',
+ '\3','5','9','0', '\3','5','9','1', '\3','5','9','2', '\3','5','9','3', '\3','5','9','4',
+ '\3','5','9','5', '\3','5','9','6', '\3','5','9','7', '\3','5','9','8', '\3','5','9','9',
+ '\3','6','0','0', '\3','6','0','1', '\3','6','0','2', '\3','6','0','3', '\3','6','0','4',
+ '\3','6','0','5', '\3','6','0','6', '\3','6','0','7', '\3','6','0','8', '\3','6','0','9',
+ '\3','6','1','0', '\3','6','1','1', '\3','6','1','2', '\3','6','1','3', '\3','6','1','4',
+ '\3','6','1','5', '\3','6','1','6', '\3','6','1','7', '\3','6','1','8', '\3','6','1','9',
+ '\3','6','2','0', '\3','6','2','1', '\3','6','2','2', '\3','6','2','3', '\3','6','2','4',
+ '\3','6','2','5', '\3','6','2','6', '\3','6','2','7', '\3','6','2','8', '\3','6','2','9',
+ '\3','6','3','0', '\3','6','3','1', '\3','6','3','2', '\3','6','3','3', '\3','6','3','4',
+ '\3','6','3','5', '\3','6','3','6', '\3','6','3','7', '\3','6','3','8', '\3','6','3','9',
+ '\3','6','4','0', '\3','6','4','1', '\3','6','4','2', '\3','6','4','3', '\3','6','4','4',
+ '\3','6','4','5', '\3','6','4','6', '\3','6','4','7', '\3','6','4','8', '\3','6','4','9',
+ '\3','6','5','0', '\3','6','5','1', '\3','6','5','2', '\3','6','5','3', '\3','6','5','4',
+ '\3','6','5','5', '\3','6','5','6', '\3','6','5','7', '\3','6','5','8', '\3','6','5','9',
+ '\3','6','6','0', '\3','6','6','1', '\3','6','6','2', '\3','6','6','3', '\3','6','6','4',
+ '\3','6','6','5', '\3','6','6','6', '\3','6','6','7', '\3','6','6','8', '\3','6','6','9',
+ '\3','6','7','0', '\3','6','7','1', '\3','6','7','2', '\3','6','7','3', '\3','6','7','4',
+ '\3','6','7','5', '\3','6','7','6', '\3','6','7','7', '\3','6','7','8', '\3','6','7','9',
+ '\3','6','8','0', '\3','6','8','1', '\3','6','8','2', '\3','6','8','3', '\3','6','8','4',
+ '\3','6','8','5', '\3','6','8','6', '\3','6','8','7', '\3','6','8','8', '\3','6','8','9',
+ '\3','6','9','0', '\3','6','9','1', '\3','6','9','2', '\3','6','9','3', '\3','6','9','4',
+ '\3','6','9','5', '\3','6','9','6', '\3','6','9','7', '\3','6','9','8', '\3','6','9','9',
+ '\3','7','0','0', '\3','7','0','1', '\3','7','0','2', '\3','7','0','3', '\3','7','0','4',
+ '\3','7','0','5', '\3','7','0','6', '\3','7','0','7', '\3','7','0','8', '\3','7','0','9',
+ '\3','7','1','0', '\3','7','1','1', '\3','7','1','2', '\3','7','1','3', '\3','7','1','4',
+ '\3','7','1','5', '\3','7','1','6', '\3','7','1','7', '\3','7','1','8', '\3','7','1','9',
+ '\3','7','2','0', '\3','7','2','1', '\3','7','2','2', '\3','7','2','3', '\3','7','2','4',
+ '\3','7','2','5', '\3','7','2','6', '\3','7','2','7', '\3','7','2','8', '\3','7','2','9',
+ '\3','7','3','0', '\3','7','3','1', '\3','7','3','2', '\3','7','3','3', '\3','7','3','4',
+ '\3','7','3','5', '\3','7','3','6', '\3','7','3','7', '\3','7','3','8', '\3','7','3','9',
+ '\3','7','4','0', '\3','7','4','1', '\3','7','4','2', '\3','7','4','3', '\3','7','4','4',
+ '\3','7','4','5', '\3','7','4','6', '\3','7','4','7', '\3','7','4','8', '\3','7','4','9',
+ '\3','7','5','0', '\3','7','5','1', '\3','7','5','2', '\3','7','5','3', '\3','7','5','4',
+ '\3','7','5','5', '\3','7','5','6', '\3','7','5','7', '\3','7','5','8', '\3','7','5','9',
+ '\3','7','6','0', '\3','7','6','1', '\3','7','6','2', '\3','7','6','3', '\3','7','6','4',
+ '\3','7','6','5', '\3','7','6','6', '\3','7','6','7', '\3','7','6','8', '\3','7','6','9',
+ '\3','7','7','0', '\3','7','7','1', '\3','7','7','2', '\3','7','7','3', '\3','7','7','4',
+ '\3','7','7','5', '\3','7','7','6', '\3','7','7','7', '\3','7','7','8', '\3','7','7','9',
+ '\3','7','8','0', '\3','7','8','1', '\3','7','8','2', '\3','7','8','3', '\3','7','8','4',
+ '\3','7','8','5', '\3','7','8','6', '\3','7','8','7', '\3','7','8','8', '\3','7','8','9',
+ '\3','7','9','0', '\3','7','9','1', '\3','7','9','2', '\3','7','9','3', '\3','7','9','4',
+ '\3','7','9','5', '\3','7','9','6', '\3','7','9','7', '\3','7','9','8', '\3','7','9','9',
+ '\3','8','0','0', '\3','8','0','1', '\3','8','0','2', '\3','8','0','3', '\3','8','0','4',
+ '\3','8','0','5', '\3','8','0','6', '\3','8','0','7', '\3','8','0','8', '\3','8','0','9',
+ '\3','8','1','0', '\3','8','1','1', '\3','8','1','2', '\3','8','1','3', '\3','8','1','4',
+ '\3','8','1','5', '\3','8','1','6', '\3','8','1','7', '\3','8','1','8', '\3','8','1','9',
+ '\3','8','2','0', '\3','8','2','1', '\3','8','2','2', '\3','8','2','3', '\3','8','2','4',
+ '\3','8','2','5', '\3','8','2','6', '\3','8','2','7', '\3','8','2','8', '\3','8','2','9',
+ '\3','8','3','0', '\3','8','3','1', '\3','8','3','2', '\3','8','3','3', '\3','8','3','4',
+ '\3','8','3','5', '\3','8','3','6', '\3','8','3','7', '\3','8','3','8', '\3','8','3','9',
+ '\3','8','4','0', '\3','8','4','1', '\3','8','4','2', '\3','8','4','3', '\3','8','4','4',
+ '\3','8','4','5', '\3','8','4','6', '\3','8','4','7', '\3','8','4','8', '\3','8','4','9',
+ '\3','8','5','0', '\3','8','5','1', '\3','8','5','2', '\3','8','5','3', '\3','8','5','4',
+ '\3','8','5','5', '\3','8','5','6', '\3','8','5','7', '\3','8','5','8', '\3','8','5','9',
+ '\3','8','6','0', '\3','8','6','1', '\3','8','6','2', '\3','8','6','3', '\3','8','6','4',
+ '\3','8','6','5', '\3','8','6','6', '\3','8','6','7', '\3','8','6','8', '\3','8','6','9',
+ '\3','8','7','0', '\3','8','7','1', '\3','8','7','2', '\3','8','7','3', '\3','8','7','4',
+ '\3','8','7','5', '\3','8','7','6', '\3','8','7','7', '\3','8','7','8', '\3','8','7','9',
+ '\3','8','8','0', '\3','8','8','1', '\3','8','8','2', '\3','8','8','3', '\3','8','8','4',
+ '\3','8','8','5', '\3','8','8','6', '\3','8','8','7', '\3','8','8','8', '\3','8','8','9',
+ '\3','8','9','0', '\3','8','9','1', '\3','8','9','2', '\3','8','9','3', '\3','8','9','4',
+ '\3','8','9','5', '\3','8','9','6', '\3','8','9','7', '\3','8','9','8', '\3','8','9','9',
+ '\3','9','0','0', '\3','9','0','1', '\3','9','0','2', '\3','9','0','3', '\3','9','0','4',
+ '\3','9','0','5', '\3','9','0','6', '\3','9','0','7', '\3','9','0','8', '\3','9','0','9',
+ '\3','9','1','0', '\3','9','1','1', '\3','9','1','2', '\3','9','1','3', '\3','9','1','4',
+ '\3','9','1','5', '\3','9','1','6', '\3','9','1','7', '\3','9','1','8', '\3','9','1','9',
+ '\3','9','2','0', '\3','9','2','1', '\3','9','2','2', '\3','9','2','3', '\3','9','2','4',
+ '\3','9','2','5', '\3','9','2','6', '\3','9','2','7', '\3','9','2','8', '\3','9','2','9',
+ '\3','9','3','0', '\3','9','3','1', '\3','9','3','2', '\3','9','3','3', '\3','9','3','4',
+ '\3','9','3','5', '\3','9','3','6', '\3','9','3','7', '\3','9','3','8', '\3','9','3','9',
+ '\3','9','4','0', '\3','9','4','1', '\3','9','4','2', '\3','9','4','3', '\3','9','4','4',
+ '\3','9','4','5', '\3','9','4','6', '\3','9','4','7', '\3','9','4','8', '\3','9','4','9',
+ '\3','9','5','0', '\3','9','5','1', '\3','9','5','2', '\3','9','5','3', '\3','9','5','4',
+ '\3','9','5','5', '\3','9','5','6', '\3','9','5','7', '\3','9','5','8', '\3','9','5','9',
+ '\3','9','6','0', '\3','9','6','1', '\3','9','6','2', '\3','9','6','3', '\3','9','6','4',
+ '\3','9','6','5', '\3','9','6','6', '\3','9','6','7', '\3','9','6','8', '\3','9','6','9',
+ '\3','9','7','0', '\3','9','7','1', '\3','9','7','2', '\3','9','7','3', '\3','9','7','4',
+ '\3','9','7','5', '\3','9','7','6', '\3','9','7','7', '\3','9','7','8', '\3','9','7','9',
+ '\3','9','8','0', '\3','9','8','1', '\3','9','8','2', '\3','9','8','3', '\3','9','8','4',
+ '\3','9','8','5', '\3','9','8','6', '\3','9','8','7', '\3','9','8','8', '\3','9','8','9',
+ '\3','9','9','0', '\3','9','9','1', '\3','9','9','2', '\3','9','9','3', '\3','9','9','4',
+ '\3','9','9','5', '\3','9','9','6', '\3','9','9','7', '\3','9','9','8', '\3','9','9','9', '\0'};
+#endif
+
+#if defined(DEC_DPD2BCD8) && DEC_DPD2BCD8==1 && !defined(DECDPD2BCD8)
+#define DECDPD2BCD8
+
+const uint8_t DPD2BCD8[4096]={
+ 0,0,0,0, 0,0,1,1, 0,0,2,1, 0,0,3,1, 0,0,4,1, 0,0,5,1, 0,0,6,1, 0,0,7,1, 0,0,8,1,
+ 0,0,9,1, 0,8,0,2, 0,8,1,2, 8,0,0,3, 8,0,1,3, 8,8,0,3, 8,8,1,3, 0,1,0,2, 0,1,1,2,
+ 0,1,2,2, 0,1,3,2, 0,1,4,2, 0,1,5,2, 0,1,6,2, 0,1,7,2, 0,1,8,2, 0,1,9,2, 0,9,0,2,
+ 0,9,1,2, 8,1,0,3, 8,1,1,3, 8,9,0,3, 8,9,1,3, 0,2,0,2, 0,2,1,2, 0,2,2,2, 0,2,3,2,
+ 0,2,4,2, 0,2,5,2, 0,2,6,2, 0,2,7,2, 0,2,8,2, 0,2,9,2, 0,8,2,2, 0,8,3,2, 8,2,0,3,
+ 8,2,1,3, 8,0,8,3, 8,0,9,3, 0,3,0,2, 0,3,1,2, 0,3,2,2, 0,3,3,2, 0,3,4,2, 0,3,5,2,
+ 0,3,6,2, 0,3,7,2, 0,3,8,2, 0,3,9,2, 0,9,2,2, 0,9,3,2, 8,3,0,3, 8,3,1,3, 8,1,8,3,
+ 8,1,9,3, 0,4,0,2, 0,4,1,2, 0,4,2,2, 0,4,3,2, 0,4,4,2, 0,4,5,2, 0,4,6,2, 0,4,7,2,
+ 0,4,8,2, 0,4,9,2, 0,8,4,2, 0,8,5,2, 8,4,0,3, 8,4,1,3, 0,8,8,2, 0,8,9,2, 0,5,0,2,
+ 0,5,1,2, 0,5,2,2, 0,5,3,2, 0,5,4,2, 0,5,5,2, 0,5,6,2, 0,5,7,2, 0,5,8,2, 0,5,9,2,
+ 0,9,4,2, 0,9,5,2, 8,5,0,3, 8,5,1,3, 0,9,8,2, 0,9,9,2, 0,6,0,2, 0,6,1,2, 0,6,2,2,
+ 0,6,3,2, 0,6,4,2, 0,6,5,2, 0,6,6,2, 0,6,7,2, 0,6,8,2, 0,6,9,2, 0,8,6,2, 0,8,7,2,
+ 8,6,0,3, 8,6,1,3, 8,8,8,3, 8,8,9,3, 0,7,0,2, 0,7,1,2, 0,7,2,2, 0,7,3,2, 0,7,4,2,
+ 0,7,5,2, 0,7,6,2, 0,7,7,2, 0,7,8,2, 0,7,9,2, 0,9,6,2, 0,9,7,2, 8,7,0,3, 8,7,1,3,
+ 8,9,8,3, 8,9,9,3, 1,0,0,3, 1,0,1,3, 1,0,2,3, 1,0,3,3, 1,0,4,3, 1,0,5,3, 1,0,6,3,
+ 1,0,7,3, 1,0,8,3, 1,0,9,3, 1,8,0,3, 1,8,1,3, 9,0,0,3, 9,0,1,3, 9,8,0,3, 9,8,1,3,
+ 1,1,0,3, 1,1,1,3, 1,1,2,3, 1,1,3,3, 1,1,4,3, 1,1,5,3, 1,1,6,3, 1,1,7,3, 1,1,8,3,
+ 1,1,9,3, 1,9,0,3, 1,9,1,3, 9,1,0,3, 9,1,1,3, 9,9,0,3, 9,9,1,3, 1,2,0,3, 1,2,1,3,
+ 1,2,2,3, 1,2,3,3, 1,2,4,3, 1,2,5,3, 1,2,6,3, 1,2,7,3, 1,2,8,3, 1,2,9,3, 1,8,2,3,
+ 1,8,3,3, 9,2,0,3, 9,2,1,3, 9,0,8,3, 9,0,9,3, 1,3,0,3, 1,3,1,3, 1,3,2,3, 1,3,3,3,
+ 1,3,4,3, 1,3,5,3, 1,3,6,3, 1,3,7,3, 1,3,8,3, 1,3,9,3, 1,9,2,3, 1,9,3,3, 9,3,0,3,
+ 9,3,1,3, 9,1,8,3, 9,1,9,3, 1,4,0,3, 1,4,1,3, 1,4,2,3, 1,4,3,3, 1,4,4,3, 1,4,5,3,
+ 1,4,6,3, 1,4,7,3, 1,4,8,3, 1,4,9,3, 1,8,4,3, 1,8,5,3, 9,4,0,3, 9,4,1,3, 1,8,8,3,
+ 1,8,9,3, 1,5,0,3, 1,5,1,3, 1,5,2,3, 1,5,3,3, 1,5,4,3, 1,5,5,3, 1,5,6,3, 1,5,7,3,
+ 1,5,8,3, 1,5,9,3, 1,9,4,3, 1,9,5,3, 9,5,0,3, 9,5,1,3, 1,9,8,3, 1,9,9,3, 1,6,0,3,
+ 1,6,1,3, 1,6,2,3, 1,6,3,3, 1,6,4,3, 1,6,5,3, 1,6,6,3, 1,6,7,3, 1,6,8,3, 1,6,9,3,
+ 1,8,6,3, 1,8,7,3, 9,6,0,3, 9,6,1,3, 9,8,8,3, 9,8,9,3, 1,7,0,3, 1,7,1,3, 1,7,2,3,
+ 1,7,3,3, 1,7,4,3, 1,7,5,3, 1,7,6,3, 1,7,7,3, 1,7,8,3, 1,7,9,3, 1,9,6,3, 1,9,7,3,
+ 9,7,0,3, 9,7,1,3, 9,9,8,3, 9,9,9,3, 2,0,0,3, 2,0,1,3, 2,0,2,3, 2,0,3,3, 2,0,4,3,
+ 2,0,5,3, 2,0,6,3, 2,0,7,3, 2,0,8,3, 2,0,9,3, 2,8,0,3, 2,8,1,3, 8,0,2,3, 8,0,3,3,
+ 8,8,2,3, 8,8,3,3, 2,1,0,3, 2,1,1,3, 2,1,2,3, 2,1,3,3, 2,1,4,3, 2,1,5,3, 2,1,6,3,
+ 2,1,7,3, 2,1,8,3, 2,1,9,3, 2,9,0,3, 2,9,1,3, 8,1,2,3, 8,1,3,3, 8,9,2,3, 8,9,3,3,
+ 2,2,0,3, 2,2,1,3, 2,2,2,3, 2,2,3,3, 2,2,4,3, 2,2,5,3, 2,2,6,3, 2,2,7,3, 2,2,8,3,
+ 2,2,9,3, 2,8,2,3, 2,8,3,3, 8,2,2,3, 8,2,3,3, 8,2,8,3, 8,2,9,3, 2,3,0,3, 2,3,1,3,
+ 2,3,2,3, 2,3,3,3, 2,3,4,3, 2,3,5,3, 2,3,6,3, 2,3,7,3, 2,3,8,3, 2,3,9,3, 2,9,2,3,
+ 2,9,3,3, 8,3,2,3, 8,3,3,3, 8,3,8,3, 8,3,9,3, 2,4,0,3, 2,4,1,3, 2,4,2,3, 2,4,3,3,
+ 2,4,4,3, 2,4,5,3, 2,4,6,3, 2,4,7,3, 2,4,8,3, 2,4,9,3, 2,8,4,3, 2,8,5,3, 8,4,2,3,
+ 8,4,3,3, 2,8,8,3, 2,8,9,3, 2,5,0,3, 2,5,1,3, 2,5,2,3, 2,5,3,3, 2,5,4,3, 2,5,5,3,
+ 2,5,6,3, 2,5,7,3, 2,5,8,3, 2,5,9,3, 2,9,4,3, 2,9,5,3, 8,5,2,3, 8,5,3,3, 2,9,8,3,
+ 2,9,9,3, 2,6,0,3, 2,6,1,3, 2,6,2,3, 2,6,3,3, 2,6,4,3, 2,6,5,3, 2,6,6,3, 2,6,7,3,
+ 2,6,8,3, 2,6,9,3, 2,8,6,3, 2,8,7,3, 8,6,2,3, 8,6,3,3, 8,8,8,3, 8,8,9,3, 2,7,0,3,
+ 2,7,1,3, 2,7,2,3, 2,7,3,3, 2,7,4,3, 2,7,5,3, 2,7,6,3, 2,7,7,3, 2,7,8,3, 2,7,9,3,
+ 2,9,6,3, 2,9,7,3, 8,7,2,3, 8,7,3,3, 8,9,8,3, 8,9,9,3, 3,0,0,3, 3,0,1,3, 3,0,2,3,
+ 3,0,3,3, 3,0,4,3, 3,0,5,3, 3,0,6,3, 3,0,7,3, 3,0,8,3, 3,0,9,3, 3,8,0,3, 3,8,1,3,
+ 9,0,2,3, 9,0,3,3, 9,8,2,3, 9,8,3,3, 3,1,0,3, 3,1,1,3, 3,1,2,3, 3,1,3,3, 3,1,4,3,
+ 3,1,5,3, 3,1,6,3, 3,1,7,3, 3,1,8,3, 3,1,9,3, 3,9,0,3, 3,9,1,3, 9,1,2,3, 9,1,3,3,
+ 9,9,2,3, 9,9,3,3, 3,2,0,3, 3,2,1,3, 3,2,2,3, 3,2,3,3, 3,2,4,3, 3,2,5,3, 3,2,6,3,
+ 3,2,7,3, 3,2,8,3, 3,2,9,3, 3,8,2,3, 3,8,3,3, 9,2,2,3, 9,2,3,3, 9,2,8,3, 9,2,9,3,
+ 3,3,0,3, 3,3,1,3, 3,3,2,3, 3,3,3,3, 3,3,4,3, 3,3,5,3, 3,3,6,3, 3,3,7,3, 3,3,8,3,
+ 3,3,9,3, 3,9,2,3, 3,9,3,3, 9,3,2,3, 9,3,3,3, 9,3,8,3, 9,3,9,3, 3,4,0,3, 3,4,1,3,
+ 3,4,2,3, 3,4,3,3, 3,4,4,3, 3,4,5,3, 3,4,6,3, 3,4,7,3, 3,4,8,3, 3,4,9,3, 3,8,4,3,
+ 3,8,5,3, 9,4,2,3, 9,4,3,3, 3,8,8,3, 3,8,9,3, 3,5,0,3, 3,5,1,3, 3,5,2,3, 3,5,3,3,
+ 3,5,4,3, 3,5,5,3, 3,5,6,3, 3,5,7,3, 3,5,8,3, 3,5,9,3, 3,9,4,3, 3,9,5,3, 9,5,2,3,
+ 9,5,3,3, 3,9,8,3, 3,9,9,3, 3,6,0,3, 3,6,1,3, 3,6,2,3, 3,6,3,3, 3,6,4,3, 3,6,5,3,
+ 3,6,6,3, 3,6,7,3, 3,6,8,3, 3,6,9,3, 3,8,6,3, 3,8,7,3, 9,6,2,3, 9,6,3,3, 9,8,8,3,
+ 9,8,9,3, 3,7,0,3, 3,7,1,3, 3,7,2,3, 3,7,3,3, 3,7,4,3, 3,7,5,3, 3,7,6,3, 3,7,7,3,
+ 3,7,8,3, 3,7,9,3, 3,9,6,3, 3,9,7,3, 9,7,2,3, 9,7,3,3, 9,9,8,3, 9,9,9,3, 4,0,0,3,
+ 4,0,1,3, 4,0,2,3, 4,0,3,3, 4,0,4,3, 4,0,5,3, 4,0,6,3, 4,0,7,3, 4,0,8,3, 4,0,9,3,
+ 4,8,0,3, 4,8,1,3, 8,0,4,3, 8,0,5,3, 8,8,4,3, 8,8,5,3, 4,1,0,3, 4,1,1,3, 4,1,2,3,
+ 4,1,3,3, 4,1,4,3, 4,1,5,3, 4,1,6,3, 4,1,7,3, 4,1,8,3, 4,1,9,3, 4,9,0,3, 4,9,1,3,
+ 8,1,4,3, 8,1,5,3, 8,9,4,3, 8,9,5,3, 4,2,0,3, 4,2,1,3, 4,2,2,3, 4,2,3,3, 4,2,4,3,
+ 4,2,5,3, 4,2,6,3, 4,2,7,3, 4,2,8,3, 4,2,9,3, 4,8,2,3, 4,8,3,3, 8,2,4,3, 8,2,5,3,
+ 8,4,8,3, 8,4,9,3, 4,3,0,3, 4,3,1,3, 4,3,2,3, 4,3,3,3, 4,3,4,3, 4,3,5,3, 4,3,6,3,
+ 4,3,7,3, 4,3,8,3, 4,3,9,3, 4,9,2,3, 4,9,3,3, 8,3,4,3, 8,3,5,3, 8,5,8,3, 8,5,9,3,
+ 4,4,0,3, 4,4,1,3, 4,4,2,3, 4,4,3,3, 4,4,4,3, 4,4,5,3, 4,4,6,3, 4,4,7,3, 4,4,8,3,
+ 4,4,9,3, 4,8,4,3, 4,8,5,3, 8,4,4,3, 8,4,5,3, 4,8,8,3, 4,8,9,3, 4,5,0,3, 4,5,1,3,
+ 4,5,2,3, 4,5,3,3, 4,5,4,3, 4,5,5,3, 4,5,6,3, 4,5,7,3, 4,5,8,3, 4,5,9,3, 4,9,4,3,
+ 4,9,5,3, 8,5,4,3, 8,5,5,3, 4,9,8,3, 4,9,9,3, 4,6,0,3, 4,6,1,3, 4,6,2,3, 4,6,3,3,
+ 4,6,4,3, 4,6,5,3, 4,6,6,3, 4,6,7,3, 4,6,8,3, 4,6,9,3, 4,8,6,3, 4,8,7,3, 8,6,4,3,
+ 8,6,5,3, 8,8,8,3, 8,8,9,3, 4,7,0,3, 4,7,1,3, 4,7,2,3, 4,7,3,3, 4,7,4,3, 4,7,5,3,
+ 4,7,6,3, 4,7,7,3, 4,7,8,3, 4,7,9,3, 4,9,6,3, 4,9,7,3, 8,7,4,3, 8,7,5,3, 8,9,8,3,
+ 8,9,9,3, 5,0,0,3, 5,0,1,3, 5,0,2,3, 5,0,3,3, 5,0,4,3, 5,0,5,3, 5,0,6,3, 5,0,7,3,
+ 5,0,8,3, 5,0,9,3, 5,8,0,3, 5,8,1,3, 9,0,4,3, 9,0,5,3, 9,8,4,3, 9,8,5,3, 5,1,0,3,
+ 5,1,1,3, 5,1,2,3, 5,1,3,3, 5,1,4,3, 5,1,5,3, 5,1,6,3, 5,1,7,3, 5,1,8,3, 5,1,9,3,
+ 5,9,0,3, 5,9,1,3, 9,1,4,3, 9,1,5,3, 9,9,4,3, 9,9,5,3, 5,2,0,3, 5,2,1,3, 5,2,2,3,
+ 5,2,3,3, 5,2,4,3, 5,2,5,3, 5,2,6,3, 5,2,7,3, 5,2,8,3, 5,2,9,3, 5,8,2,3, 5,8,3,3,
+ 9,2,4,3, 9,2,5,3, 9,4,8,3, 9,4,9,3, 5,3,0,3, 5,3,1,3, 5,3,2,3, 5,3,3,3, 5,3,4,3,
+ 5,3,5,3, 5,3,6,3, 5,3,7,3, 5,3,8,3, 5,3,9,3, 5,9,2,3, 5,9,3,3, 9,3,4,3, 9,3,5,3,
+ 9,5,8,3, 9,5,9,3, 5,4,0,3, 5,4,1,3, 5,4,2,3, 5,4,3,3, 5,4,4,3, 5,4,5,3, 5,4,6,3,
+ 5,4,7,3, 5,4,8,3, 5,4,9,3, 5,8,4,3, 5,8,5,3, 9,4,4,3, 9,4,5,3, 5,8,8,3, 5,8,9,3,
+ 5,5,0,3, 5,5,1,3, 5,5,2,3, 5,5,3,3, 5,5,4,3, 5,5,5,3, 5,5,6,3, 5,5,7,3, 5,5,8,3,
+ 5,5,9,3, 5,9,4,3, 5,9,5,3, 9,5,4,3, 9,5,5,3, 5,9,8,3, 5,9,9,3, 5,6,0,3, 5,6,1,3,
+ 5,6,2,3, 5,6,3,3, 5,6,4,3, 5,6,5,3, 5,6,6,3, 5,6,7,3, 5,6,8,3, 5,6,9,3, 5,8,6,3,
+ 5,8,7,3, 9,6,4,3, 9,6,5,3, 9,8,8,3, 9,8,9,3, 5,7,0,3, 5,7,1,3, 5,7,2,3, 5,7,3,3,
+ 5,7,4,3, 5,7,5,3, 5,7,6,3, 5,7,7,3, 5,7,8,3, 5,7,9,3, 5,9,6,3, 5,9,7,3, 9,7,4,3,
+ 9,7,5,3, 9,9,8,3, 9,9,9,3, 6,0,0,3, 6,0,1,3, 6,0,2,3, 6,0,3,3, 6,0,4,3, 6,0,5,3,
+ 6,0,6,3, 6,0,7,3, 6,0,8,3, 6,0,9,3, 6,8,0,3, 6,8,1,3, 8,0,6,3, 8,0,7,3, 8,8,6,3,
+ 8,8,7,3, 6,1,0,3, 6,1,1,3, 6,1,2,3, 6,1,3,3, 6,1,4,3, 6,1,5,3, 6,1,6,3, 6,1,7,3,
+ 6,1,8,3, 6,1,9,3, 6,9,0,3, 6,9,1,3, 8,1,6,3, 8,1,7,3, 8,9,6,3, 8,9,7,3, 6,2,0,3,
+ 6,2,1,3, 6,2,2,3, 6,2,3,3, 6,2,4,3, 6,2,5,3, 6,2,6,3, 6,2,7,3, 6,2,8,3, 6,2,9,3,
+ 6,8,2,3, 6,8,3,3, 8,2,6,3, 8,2,7,3, 8,6,8,3, 8,6,9,3, 6,3,0,3, 6,3,1,3, 6,3,2,3,
+ 6,3,3,3, 6,3,4,3, 6,3,5,3, 6,3,6,3, 6,3,7,3, 6,3,8,3, 6,3,9,3, 6,9,2,3, 6,9,3,3,
+ 8,3,6,3, 8,3,7,3, 8,7,8,3, 8,7,9,3, 6,4,0,3, 6,4,1,3, 6,4,2,3, 6,4,3,3, 6,4,4,3,
+ 6,4,5,3, 6,4,6,3, 6,4,7,3, 6,4,8,3, 6,4,9,3, 6,8,4,3, 6,8,5,3, 8,4,6,3, 8,4,7,3,
+ 6,8,8,3, 6,8,9,3, 6,5,0,3, 6,5,1,3, 6,5,2,3, 6,5,3,3, 6,5,4,3, 6,5,5,3, 6,5,6,3,
+ 6,5,7,3, 6,5,8,3, 6,5,9,3, 6,9,4,3, 6,9,5,3, 8,5,6,3, 8,5,7,3, 6,9,8,3, 6,9,9,3,
+ 6,6,0,3, 6,6,1,3, 6,6,2,3, 6,6,3,3, 6,6,4,3, 6,6,5,3, 6,6,6,3, 6,6,7,3, 6,6,8,3,
+ 6,6,9,3, 6,8,6,3, 6,8,7,3, 8,6,6,3, 8,6,7,3, 8,8,8,3, 8,8,9,3, 6,7,0,3, 6,7,1,3,
+ 6,7,2,3, 6,7,3,3, 6,7,4,3, 6,7,5,3, 6,7,6,3, 6,7,7,3, 6,7,8,3, 6,7,9,3, 6,9,6,3,
+ 6,9,7,3, 8,7,6,3, 8,7,7,3, 8,9,8,3, 8,9,9,3, 7,0,0,3, 7,0,1,3, 7,0,2,3, 7,0,3,3,
+ 7,0,4,3, 7,0,5,3, 7,0,6,3, 7,0,7,3, 7,0,8,3, 7,0,9,3, 7,8,0,3, 7,8,1,3, 9,0,6,3,
+ 9,0,7,3, 9,8,6,3, 9,8,7,3, 7,1,0,3, 7,1,1,3, 7,1,2,3, 7,1,3,3, 7,1,4,3, 7,1,5,3,
+ 7,1,6,3, 7,1,7,3, 7,1,8,3, 7,1,9,3, 7,9,0,3, 7,9,1,3, 9,1,6,3, 9,1,7,3, 9,9,6,3,
+ 9,9,7,3, 7,2,0,3, 7,2,1,3, 7,2,2,3, 7,2,3,3, 7,2,4,3, 7,2,5,3, 7,2,6,3, 7,2,7,3,
+ 7,2,8,3, 7,2,9,3, 7,8,2,3, 7,8,3,3, 9,2,6,3, 9,2,7,3, 9,6,8,3, 9,6,9,3, 7,3,0,3,
+ 7,3,1,3, 7,3,2,3, 7,3,3,3, 7,3,4,3, 7,3,5,3, 7,3,6,3, 7,3,7,3, 7,3,8,3, 7,3,9,3,
+ 7,9,2,3, 7,9,3,3, 9,3,6,3, 9,3,7,3, 9,7,8,3, 9,7,9,3, 7,4,0,3, 7,4,1,3, 7,4,2,3,
+ 7,4,3,3, 7,4,4,3, 7,4,5,3, 7,4,6,3, 7,4,7,3, 7,4,8,3, 7,4,9,3, 7,8,4,3, 7,8,5,3,
+ 9,4,6,3, 9,4,7,3, 7,8,8,3, 7,8,9,3, 7,5,0,3, 7,5,1,3, 7,5,2,3, 7,5,3,3, 7,5,4,3,
+ 7,5,5,3, 7,5,6,3, 7,5,7,3, 7,5,8,3, 7,5,9,3, 7,9,4,3, 7,9,5,3, 9,5,6,3, 9,5,7,3,
+ 7,9,8,3, 7,9,9,3, 7,6,0,3, 7,6,1,3, 7,6,2,3, 7,6,3,3, 7,6,4,3, 7,6,5,3, 7,6,6,3,
+ 7,6,7,3, 7,6,8,3, 7,6,9,3, 7,8,6,3, 7,8,7,3, 9,6,6,3, 9,6,7,3, 9,8,8,3, 9,8,9,3,
+ 7,7,0,3, 7,7,1,3, 7,7,2,3, 7,7,3,3, 7,7,4,3, 7,7,5,3, 7,7,6,3, 7,7,7,3, 7,7,8,3,
+ 7,7,9,3, 7,9,6,3, 7,9,7,3, 9,7,6,3, 9,7,7,3, 9,9,8,3, 9,9,9,3};
+#endif
+
+#if defined(DEC_BIN2BCD8) && DEC_BIN2BCD8==1 && !defined(DECBIN2BCD8)
+#define DECBIN2BCD8
+
+const uint8_t BIN2BCD8[4000]={
+ 0,0,0,0, 0,0,1,1, 0,0,2,1, 0,0,3,1, 0,0,4,1, 0,0,5,1, 0,0,6,1, 0,0,7,1, 0,0,8,1,
+ 0,0,9,1, 0,1,0,2, 0,1,1,2, 0,1,2,2, 0,1,3,2, 0,1,4,2, 0,1,5,2, 0,1,6,2, 0,1,7,2,
+ 0,1,8,2, 0,1,9,2, 0,2,0,2, 0,2,1,2, 0,2,2,2, 0,2,3,2, 0,2,4,2, 0,2,5,2, 0,2,6,2,
+ 0,2,7,2, 0,2,8,2, 0,2,9,2, 0,3,0,2, 0,3,1,2, 0,3,2,2, 0,3,3,2, 0,3,4,2, 0,3,5,2,
+ 0,3,6,2, 0,3,7,2, 0,3,8,2, 0,3,9,2, 0,4,0,2, 0,4,1,2, 0,4,2,2, 0,4,3,2, 0,4,4,2,
+ 0,4,5,2, 0,4,6,2, 0,4,7,2, 0,4,8,2, 0,4,9,2, 0,5,0,2, 0,5,1,2, 0,5,2,2, 0,5,3,2,
+ 0,5,4,2, 0,5,5,2, 0,5,6,2, 0,5,7,2, 0,5,8,2, 0,5,9,2, 0,6,0,2, 0,6,1,2, 0,6,2,2,
+ 0,6,3,2, 0,6,4,2, 0,6,5,2, 0,6,6,2, 0,6,7,2, 0,6,8,2, 0,6,9,2, 0,7,0,2, 0,7,1,2,
+ 0,7,2,2, 0,7,3,2, 0,7,4,2, 0,7,5,2, 0,7,6,2, 0,7,7,2, 0,7,8,2, 0,7,9,2, 0,8,0,2,
+ 0,8,1,2, 0,8,2,2, 0,8,3,2, 0,8,4,2, 0,8,5,2, 0,8,6,2, 0,8,7,2, 0,8,8,2, 0,8,9,2,
+ 0,9,0,2, 0,9,1,2, 0,9,2,2, 0,9,3,2, 0,9,4,2, 0,9,5,2, 0,9,6,2, 0,9,7,2, 0,9,8,2,
+ 0,9,9,2, 1,0,0,3, 1,0,1,3, 1,0,2,3, 1,0,3,3, 1,0,4,3, 1,0,5,3, 1,0,6,3, 1,0,7,3,
+ 1,0,8,3, 1,0,9,3, 1,1,0,3, 1,1,1,3, 1,1,2,3, 1,1,3,3, 1,1,4,3, 1,1,5,3, 1,1,6,3,
+ 1,1,7,3, 1,1,8,3, 1,1,9,3, 1,2,0,3, 1,2,1,3, 1,2,2,3, 1,2,3,3, 1,2,4,3, 1,2,5,3,
+ 1,2,6,3, 1,2,7,3, 1,2,8,3, 1,2,9,3, 1,3,0,3, 1,3,1,3, 1,3,2,3, 1,3,3,3, 1,3,4,3,
+ 1,3,5,3, 1,3,6,3, 1,3,7,3, 1,3,8,3, 1,3,9,3, 1,4,0,3, 1,4,1,3, 1,4,2,3, 1,4,3,3,
+ 1,4,4,3, 1,4,5,3, 1,4,6,3, 1,4,7,3, 1,4,8,3, 1,4,9,3, 1,5,0,3, 1,5,1,3, 1,5,2,3,
+ 1,5,3,3, 1,5,4,3, 1,5,5,3, 1,5,6,3, 1,5,7,3, 1,5,8,3, 1,5,9,3, 1,6,0,3, 1,6,1,3,
+ 1,6,2,3, 1,6,3,3, 1,6,4,3, 1,6,5,3, 1,6,6,3, 1,6,7,3, 1,6,8,3, 1,6,9,3, 1,7,0,3,
+ 1,7,1,3, 1,7,2,3, 1,7,3,3, 1,7,4,3, 1,7,5,3, 1,7,6,3, 1,7,7,3, 1,7,8,3, 1,7,9,3,
+ 1,8,0,3, 1,8,1,3, 1,8,2,3, 1,8,3,3, 1,8,4,3, 1,8,5,3, 1,8,6,3, 1,8,7,3, 1,8,8,3,
+ 1,8,9,3, 1,9,0,3, 1,9,1,3, 1,9,2,3, 1,9,3,3, 1,9,4,3, 1,9,5,3, 1,9,6,3, 1,9,7,3,
+ 1,9,8,3, 1,9,9,3, 2,0,0,3, 2,0,1,3, 2,0,2,3, 2,0,3,3, 2,0,4,3, 2,0,5,3, 2,0,6,3,
+ 2,0,7,3, 2,0,8,3, 2,0,9,3, 2,1,0,3, 2,1,1,3, 2,1,2,3, 2,1,3,3, 2,1,4,3, 2,1,5,3,
+ 2,1,6,3, 2,1,7,3, 2,1,8,3, 2,1,9,3, 2,2,0,3, 2,2,1,3, 2,2,2,3, 2,2,3,3, 2,2,4,3,
+ 2,2,5,3, 2,2,6,3, 2,2,7,3, 2,2,8,3, 2,2,9,3, 2,3,0,3, 2,3,1,3, 2,3,2,3, 2,3,3,3,
+ 2,3,4,3, 2,3,5,3, 2,3,6,3, 2,3,7,3, 2,3,8,3, 2,3,9,3, 2,4,0,3, 2,4,1,3, 2,4,2,3,
+ 2,4,3,3, 2,4,4,3, 2,4,5,3, 2,4,6,3, 2,4,7,3, 2,4,8,3, 2,4,9,3, 2,5,0,3, 2,5,1,3,
+ 2,5,2,3, 2,5,3,3, 2,5,4,3, 2,5,5,3, 2,5,6,3, 2,5,7,3, 2,5,8,3, 2,5,9,3, 2,6,0,3,
+ 2,6,1,3, 2,6,2,3, 2,6,3,3, 2,6,4,3, 2,6,5,3, 2,6,6,3, 2,6,7,3, 2,6,8,3, 2,6,9,3,
+ 2,7,0,3, 2,7,1,3, 2,7,2,3, 2,7,3,3, 2,7,4,3, 2,7,5,3, 2,7,6,3, 2,7,7,3, 2,7,8,3,
+ 2,7,9,3, 2,8,0,3, 2,8,1,3, 2,8,2,3, 2,8,3,3, 2,8,4,3, 2,8,5,3, 2,8,6,3, 2,8,7,3,
+ 2,8,8,3, 2,8,9,3, 2,9,0,3, 2,9,1,3, 2,9,2,3, 2,9,3,3, 2,9,4,3, 2,9,5,3, 2,9,6,3,
+ 2,9,7,3, 2,9,8,3, 2,9,9,3, 3,0,0,3, 3,0,1,3, 3,0,2,3, 3,0,3,3, 3,0,4,3, 3,0,5,3,
+ 3,0,6,3, 3,0,7,3, 3,0,8,3, 3,0,9,3, 3,1,0,3, 3,1,1,3, 3,1,2,3, 3,1,3,3, 3,1,4,3,
+ 3,1,5,3, 3,1,6,3, 3,1,7,3, 3,1,8,3, 3,1,9,3, 3,2,0,3, 3,2,1,3, 3,2,2,3, 3,2,3,3,
+ 3,2,4,3, 3,2,5,3, 3,2,6,3, 3,2,7,3, 3,2,8,3, 3,2,9,3, 3,3,0,3, 3,3,1,3, 3,3,2,3,
+ 3,3,3,3, 3,3,4,3, 3,3,5,3, 3,3,6,3, 3,3,7,3, 3,3,8,3, 3,3,9,3, 3,4,0,3, 3,4,1,3,
+ 3,4,2,3, 3,4,3,3, 3,4,4,3, 3,4,5,3, 3,4,6,3, 3,4,7,3, 3,4,8,3, 3,4,9,3, 3,5,0,3,
+ 3,5,1,3, 3,5,2,3, 3,5,3,3, 3,5,4,3, 3,5,5,3, 3,5,6,3, 3,5,7,3, 3,5,8,3, 3,5,9,3,
+ 3,6,0,3, 3,6,1,3, 3,6,2,3, 3,6,3,3, 3,6,4,3, 3,6,5,3, 3,6,6,3, 3,6,7,3, 3,6,8,3,
+ 3,6,9,3, 3,7,0,3, 3,7,1,3, 3,7,2,3, 3,7,3,3, 3,7,4,3, 3,7,5,3, 3,7,6,3, 3,7,7,3,
+ 3,7,8,3, 3,7,9,3, 3,8,0,3, 3,8,1,3, 3,8,2,3, 3,8,3,3, 3,8,4,3, 3,8,5,3, 3,8,6,3,
+ 3,8,7,3, 3,8,8,3, 3,8,9,3, 3,9,0,3, 3,9,1,3, 3,9,2,3, 3,9,3,3, 3,9,4,3, 3,9,5,3,
+ 3,9,6,3, 3,9,7,3, 3,9,8,3, 3,9,9,3, 4,0,0,3, 4,0,1,3, 4,0,2,3, 4,0,3,3, 4,0,4,3,
+ 4,0,5,3, 4,0,6,3, 4,0,7,3, 4,0,8,3, 4,0,9,3, 4,1,0,3, 4,1,1,3, 4,1,2,3, 4,1,3,3,
+ 4,1,4,3, 4,1,5,3, 4,1,6,3, 4,1,7,3, 4,1,8,3, 4,1,9,3, 4,2,0,3, 4,2,1,3, 4,2,2,3,
+ 4,2,3,3, 4,2,4,3, 4,2,5,3, 4,2,6,3, 4,2,7,3, 4,2,8,3, 4,2,9,3, 4,3,0,3, 4,3,1,3,
+ 4,3,2,3, 4,3,3,3, 4,3,4,3, 4,3,5,3, 4,3,6,3, 4,3,7,3, 4,3,8,3, 4,3,9,3, 4,4,0,3,
+ 4,4,1,3, 4,4,2,3, 4,4,3,3, 4,4,4,3, 4,4,5,3, 4,4,6,3, 4,4,7,3, 4,4,8,3, 4,4,9,3,
+ 4,5,0,3, 4,5,1,3, 4,5,2,3, 4,5,3,3, 4,5,4,3, 4,5,5,3, 4,5,6,3, 4,5,7,3, 4,5,8,3,
+ 4,5,9,3, 4,6,0,3, 4,6,1,3, 4,6,2,3, 4,6,3,3, 4,6,4,3, 4,6,5,3, 4,6,6,3, 4,6,7,3,
+ 4,6,8,3, 4,6,9,3, 4,7,0,3, 4,7,1,3, 4,7,2,3, 4,7,3,3, 4,7,4,3, 4,7,5,3, 4,7,6,3,
+ 4,7,7,3, 4,7,8,3, 4,7,9,3, 4,8,0,3, 4,8,1,3, 4,8,2,3, 4,8,3,3, 4,8,4,3, 4,8,5,3,
+ 4,8,6,3, 4,8,7,3, 4,8,8,3, 4,8,9,3, 4,9,0,3, 4,9,1,3, 4,9,2,3, 4,9,3,3, 4,9,4,3,
+ 4,9,5,3, 4,9,6,3, 4,9,7,3, 4,9,8,3, 4,9,9,3, 5,0,0,3, 5,0,1,3, 5,0,2,3, 5,0,3,3,
+ 5,0,4,3, 5,0,5,3, 5,0,6,3, 5,0,7,3, 5,0,8,3, 5,0,9,3, 5,1,0,3, 5,1,1,3, 5,1,2,3,
+ 5,1,3,3, 5,1,4,3, 5,1,5,3, 5,1,6,3, 5,1,7,3, 5,1,8,3, 5,1,9,3, 5,2,0,3, 5,2,1,3,
+ 5,2,2,3, 5,2,3,3, 5,2,4,3, 5,2,5,3, 5,2,6,3, 5,2,7,3, 5,2,8,3, 5,2,9,3, 5,3,0,3,
+ 5,3,1,3, 5,3,2,3, 5,3,3,3, 5,3,4,3, 5,3,5,3, 5,3,6,3, 5,3,7,3, 5,3,8,3, 5,3,9,3,
+ 5,4,0,3, 5,4,1,3, 5,4,2,3, 5,4,3,3, 5,4,4,3, 5,4,5,3, 5,4,6,3, 5,4,7,3, 5,4,8,3,
+ 5,4,9,3, 5,5,0,3, 5,5,1,3, 5,5,2,3, 5,5,3,3, 5,5,4,3, 5,5,5,3, 5,5,6,3, 5,5,7,3,
+ 5,5,8,3, 5,5,9,3, 5,6,0,3, 5,6,1,3, 5,6,2,3, 5,6,3,3, 5,6,4,3, 5,6,5,3, 5,6,6,3,
+ 5,6,7,3, 5,6,8,3, 5,6,9,3, 5,7,0,3, 5,7,1,3, 5,7,2,3, 5,7,3,3, 5,7,4,3, 5,7,5,3,
+ 5,7,6,3, 5,7,7,3, 5,7,8,3, 5,7,9,3, 5,8,0,3, 5,8,1,3, 5,8,2,3, 5,8,3,3, 5,8,4,3,
+ 5,8,5,3, 5,8,6,3, 5,8,7,3, 5,8,8,3, 5,8,9,3, 5,9,0,3, 5,9,1,3, 5,9,2,3, 5,9,3,3,
+ 5,9,4,3, 5,9,5,3, 5,9,6,3, 5,9,7,3, 5,9,8,3, 5,9,9,3, 6,0,0,3, 6,0,1,3, 6,0,2,3,
+ 6,0,3,3, 6,0,4,3, 6,0,5,3, 6,0,6,3, 6,0,7,3, 6,0,8,3, 6,0,9,3, 6,1,0,3, 6,1,1,3,
+ 6,1,2,3, 6,1,3,3, 6,1,4,3, 6,1,5,3, 6,1,6,3, 6,1,7,3, 6,1,8,3, 6,1,9,3, 6,2,0,3,
+ 6,2,1,3, 6,2,2,3, 6,2,3,3, 6,2,4,3, 6,2,5,3, 6,2,6,3, 6,2,7,3, 6,2,8,3, 6,2,9,3,
+ 6,3,0,3, 6,3,1,3, 6,3,2,3, 6,3,3,3, 6,3,4,3, 6,3,5,3, 6,3,6,3, 6,3,7,3, 6,3,8,3,
+ 6,3,9,3, 6,4,0,3, 6,4,1,3, 6,4,2,3, 6,4,3,3, 6,4,4,3, 6,4,5,3, 6,4,6,3, 6,4,7,3,
+ 6,4,8,3, 6,4,9,3, 6,5,0,3, 6,5,1,3, 6,5,2,3, 6,5,3,3, 6,5,4,3, 6,5,5,3, 6,5,6,3,
+ 6,5,7,3, 6,5,8,3, 6,5,9,3, 6,6,0,3, 6,6,1,3, 6,6,2,3, 6,6,3,3, 6,6,4,3, 6,6,5,3,
+ 6,6,6,3, 6,6,7,3, 6,6,8,3, 6,6,9,3, 6,7,0,3, 6,7,1,3, 6,7,2,3, 6,7,3,3, 6,7,4,3,
+ 6,7,5,3, 6,7,6,3, 6,7,7,3, 6,7,8,3, 6,7,9,3, 6,8,0,3, 6,8,1,3, 6,8,2,3, 6,8,3,3,
+ 6,8,4,3, 6,8,5,3, 6,8,6,3, 6,8,7,3, 6,8,8,3, 6,8,9,3, 6,9,0,3, 6,9,1,3, 6,9,2,3,
+ 6,9,3,3, 6,9,4,3, 6,9,5,3, 6,9,6,3, 6,9,7,3, 6,9,8,3, 6,9,9,3, 7,0,0,3, 7,0,1,3,
+ 7,0,2,3, 7,0,3,3, 7,0,4,3, 7,0,5,3, 7,0,6,3, 7,0,7,3, 7,0,8,3, 7,0,9,3, 7,1,0,3,
+ 7,1,1,3, 7,1,2,3, 7,1,3,3, 7,1,4,3, 7,1,5,3, 7,1,6,3, 7,1,7,3, 7,1,8,3, 7,1,9,3,
+ 7,2,0,3, 7,2,1,3, 7,2,2,3, 7,2,3,3, 7,2,4,3, 7,2,5,3, 7,2,6,3, 7,2,7,3, 7,2,8,3,
+ 7,2,9,3, 7,3,0,3, 7,3,1,3, 7,3,2,3, 7,3,3,3, 7,3,4,3, 7,3,5,3, 7,3,6,3, 7,3,7,3,
+ 7,3,8,3, 7,3,9,3, 7,4,0,3, 7,4,1,3, 7,4,2,3, 7,4,3,3, 7,4,4,3, 7,4,5,3, 7,4,6,3,
+ 7,4,7,3, 7,4,8,3, 7,4,9,3, 7,5,0,3, 7,5,1,3, 7,5,2,3, 7,5,3,3, 7,5,4,3, 7,5,5,3,
+ 7,5,6,3, 7,5,7,3, 7,5,8,3, 7,5,9,3, 7,6,0,3, 7,6,1,3, 7,6,2,3, 7,6,3,3, 7,6,4,3,
+ 7,6,5,3, 7,6,6,3, 7,6,7,3, 7,6,8,3, 7,6,9,3, 7,7,0,3, 7,7,1,3, 7,7,2,3, 7,7,3,3,
+ 7,7,4,3, 7,7,5,3, 7,7,6,3, 7,7,7,3, 7,7,8,3, 7,7,9,3, 7,8,0,3, 7,8,1,3, 7,8,2,3,
+ 7,8,3,3, 7,8,4,3, 7,8,5,3, 7,8,6,3, 7,8,7,3, 7,8,8,3, 7,8,9,3, 7,9,0,3, 7,9,1,3,
+ 7,9,2,3, 7,9,3,3, 7,9,4,3, 7,9,5,3, 7,9,6,3, 7,9,7,3, 7,9,8,3, 7,9,9,3, 8,0,0,3,
+ 8,0,1,3, 8,0,2,3, 8,0,3,3, 8,0,4,3, 8,0,5,3, 8,0,6,3, 8,0,7,3, 8,0,8,3, 8,0,9,3,
+ 8,1,0,3, 8,1,1,3, 8,1,2,3, 8,1,3,3, 8,1,4,3, 8,1,5,3, 8,1,6,3, 8,1,7,3, 8,1,8,3,
+ 8,1,9,3, 8,2,0,3, 8,2,1,3, 8,2,2,3, 8,2,3,3, 8,2,4,3, 8,2,5,3, 8,2,6,3, 8,2,7,3,
+ 8,2,8,3, 8,2,9,3, 8,3,0,3, 8,3,1,3, 8,3,2,3, 8,3,3,3, 8,3,4,3, 8,3,5,3, 8,3,6,3,
+ 8,3,7,3, 8,3,8,3, 8,3,9,3, 8,4,0,3, 8,4,1,3, 8,4,2,3, 8,4,3,3, 8,4,4,3, 8,4,5,3,
+ 8,4,6,3, 8,4,7,3, 8,4,8,3, 8,4,9,3, 8,5,0,3, 8,5,1,3, 8,5,2,3, 8,5,3,3, 8,5,4,3,
+ 8,5,5,3, 8,5,6,3, 8,5,7,3, 8,5,8,3, 8,5,9,3, 8,6,0,3, 8,6,1,3, 8,6,2,3, 8,6,3,3,
+ 8,6,4,3, 8,6,5,3, 8,6,6,3, 8,6,7,3, 8,6,8,3, 8,6,9,3, 8,7,0,3, 8,7,1,3, 8,7,2,3,
+ 8,7,3,3, 8,7,4,3, 8,7,5,3, 8,7,6,3, 8,7,7,3, 8,7,8,3, 8,7,9,3, 8,8,0,3, 8,8,1,3,
+ 8,8,2,3, 8,8,3,3, 8,8,4,3, 8,8,5,3, 8,8,6,3, 8,8,7,3, 8,8,8,3, 8,8,9,3, 8,9,0,3,
+ 8,9,1,3, 8,9,2,3, 8,9,3,3, 8,9,4,3, 8,9,5,3, 8,9,6,3, 8,9,7,3, 8,9,8,3, 8,9,9,3,
+ 9,0,0,3, 9,0,1,3, 9,0,2,3, 9,0,3,3, 9,0,4,3, 9,0,5,3, 9,0,6,3, 9,0,7,3, 9,0,8,3,
+ 9,0,9,3, 9,1,0,3, 9,1,1,3, 9,1,2,3, 9,1,3,3, 9,1,4,3, 9,1,5,3, 9,1,6,3, 9,1,7,3,
+ 9,1,8,3, 9,1,9,3, 9,2,0,3, 9,2,1,3, 9,2,2,3, 9,2,3,3, 9,2,4,3, 9,2,5,3, 9,2,6,3,
+ 9,2,7,3, 9,2,8,3, 9,2,9,3, 9,3,0,3, 9,3,1,3, 9,3,2,3, 9,3,3,3, 9,3,4,3, 9,3,5,3,
+ 9,3,6,3, 9,3,7,3, 9,3,8,3, 9,3,9,3, 9,4,0,3, 9,4,1,3, 9,4,2,3, 9,4,3,3, 9,4,4,3,
+ 9,4,5,3, 9,4,6,3, 9,4,7,3, 9,4,8,3, 9,4,9,3, 9,5,0,3, 9,5,1,3, 9,5,2,3, 9,5,3,3,
+ 9,5,4,3, 9,5,5,3, 9,5,6,3, 9,5,7,3, 9,5,8,3, 9,5,9,3, 9,6,0,3, 9,6,1,3, 9,6,2,3,
+ 9,6,3,3, 9,6,4,3, 9,6,5,3, 9,6,6,3, 9,6,7,3, 9,6,8,3, 9,6,9,3, 9,7,0,3, 9,7,1,3,
+ 9,7,2,3, 9,7,3,3, 9,7,4,3, 9,7,5,3, 9,7,6,3, 9,7,7,3, 9,7,8,3, 9,7,9,3, 9,8,0,3,
+ 9,8,1,3, 9,8,2,3, 9,8,3,3, 9,8,4,3, 9,8,5,3, 9,8,6,3, 9,8,7,3, 9,8,8,3, 9,8,9,3,
+ 9,9,0,3, 9,9,1,3, 9,9,2,3, 9,9,3,3, 9,9,4,3, 9,9,5,3, 9,9,6,3, 9,9,7,3, 9,9,8,3,
+ 9,9,9,3};
+#endif
diff --git a/include/libdecnumber/decNumber.h b/include/libdecnumber/decNumber.h
new file mode 100644 (file)
index 0000000..41bc2a0
--- /dev/null
@@ -0,0 +1,205 @@
+/* Decimal number arithmetic module header for the decNumber C Library.
+   Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+   Contributed by IBM Corporation.  Author Mike Cowlishaw.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2, or (at your option) any later
+   version.
+
+   In addition to the permissions in the GNU General Public License,
+   the Free Software Foundation gives you unlimited permission to link
+   the compiled version of this file into combinations with other
+   programs, and to distribute those combinations without any
+   restriction coming from the use of this file.  (The General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into a combine executable.)
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+/* ------------------------------------------------------------------ */
+/* Decimal Number arithmetic module header                           */
+/* ------------------------------------------------------------------ */
+
+#ifndef DECNUMBER_H
+#define DECNUMBER_H
+
+  #define DECNAME     "decNumber"                      /* Short name */
+  #define DECFULLNAME "Decimal Number Module"        /* Verbose name */
+  #define DECAUTHOR   "Mike Cowlishaw"               /* Who to blame */
+
+  #include "libdecnumber/decContext.h"
+
+  /* Bit settings for decNumber.bits                                 */
+  #define DECNEG    0x80      /* Sign; 1=negative, 0=positive or zero */
+  #define DECINF    0x40      /* 1=Infinity                          */
+  #define DECNAN    0x20      /* 1=NaN                               */
+  #define DECSNAN   0x10      /* 1=sNaN                                      */
+  /* The remaining bits are reserved; they must be 0                 */
+  #define DECSPECIAL (DECINF|DECNAN|DECSNAN) /* any special value     */
+
+  /* Define the decNumber data structure.  The size and shape of the  */
+  /* units array in the structure is determined by the following      */
+  /* constant. This must not be changed without recompiling the      */
+  /* decNumber library modules. */
+
+  #define DECDPUN 3          /* DECimal Digits Per UNit [must be >0  */
+                             /* and <10; 3 or powers of 2 are best]. */
+
+  /* DECNUMDIGITS is the default number of digits that can be held in */
+  /* the structure.  If undefined, 1 is assumed and it is assumed     */
+  /* that the structure will be immediately followed by extra space,  */
+  /* as required.  DECNUMDIGITS is always >0.                        */
+  #if !defined(DECNUMDIGITS)
+    #define DECNUMDIGITS 1
+  #endif
+
+  /* The size (integer data type) of each unit is determined by the   */
+  /* number of digits it will hold.                                  */
+  #if  DECDPUN<=2
+    #define decNumberUnit uint8_t
+  #elif DECDPUN<=4
+    #define decNumberUnit uint16_t
+  #else
+    #define decNumberUnit uint32_t
+  #endif
+  /* The number of units needed is ceil(DECNUMDIGITS/DECDPUN)        */
+  #define DECNUMUNITS ((DECNUMDIGITS+DECDPUN-1)/DECDPUN)
+
+  /* The data structure... */
+  typedef struct {
+    int32_t digits;     /* Count of digits in the coefficient; >0    */
+    int32_t exponent;   /* Unadjusted exponent, unbiased, in         */
+                        /* range: -1999999997 through 999999999      */
+    uint8_t bits;       /* Indicator bits (see above)                */
+                        /* Coefficient, from least significant unit  */
+    decNumberUnit lsu[DECNUMUNITS];
+    } decNumber;
+
+  /* Notes:                                                          */
+  /* 1. If digits is > DECDPUN then there will one or more           */
+  /*   decNumberUnits immediately following the first element of lsu.*/
+  /*   These contain the remaining (more significant) digits of the  */
+  /*   number, and may be in the lsu array, or may be guaranteed by  */
+  /*   some other mechanism (such as being contained in another      */
+  /*   structure, or being overlaid on dynamically allocated         */
+  /*   storage).                                                     */
+  /*                                                                 */
+  /*   Each integer of the coefficient (except potentially the last) */
+  /*   contains DECDPUN digits (e.g., a value in the range 0 through */
+  /*   99999999 if DECDPUN is 8, or 0 through 999 if DECDPUN is 3).  */
+  /*                                                                 */
+  /* 2. A decNumber converted to a string may need up to digits+14    */
+  /*   characters.  The worst cases (non-exponential and exponential */
+  /*   formats) are -0.00000{9...}# and -9.{9...}E+999999999#        */
+  /*   (where # is '\0')                                             */
+
+
+  /* ---------------------------------------------------------------- */
+  /* decNumber public functions and macros                           */
+  /* ---------------------------------------------------------------- */
+
+
+  /* Conversions                                                     */
+  decNumber * decNumberFromInt32(decNumber *, int32_t);
+  decNumber * decNumberFromUInt32(decNumber *, uint32_t);
+  decNumber *decNumberFromInt64(decNumber *, int64_t);
+  decNumber *decNumberFromUInt64(decNumber *, uint64_t);
+  decNumber *decNumberFromInt128(decNumber *, uint64_t, int64_t);
+  decNumber *decNumberFromUInt128(decNumber *, uint64_t, uint64_t);
+  decNumber * decNumberFromString(decNumber *, const char *, decContext *);
+  char     * decNumberToString(const decNumber *, char *);
+  char     * decNumberToEngString(const decNumber *, char *);
+  uint32_t    decNumberToUInt32(const decNumber *, decContext *);
+  int32_t     decNumberToInt32(const decNumber *, decContext *);
+  int64_t     decNumberIntegralToInt64(const decNumber *dn, decContext *set);
+  void        decNumberIntegralToInt128(const decNumber *dn, decContext *set,
+        uint64_t *plow, uint64_t *phigh);
+  uint8_t   * decNumberGetBCD(const decNumber *, uint8_t *);
+  decNumber * decNumberSetBCD(decNumber *, const uint8_t *, uint32_t);
+
+  /* Operators and elementary functions                                      */
+  decNumber * decNumberAbs(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberAdd(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberAnd(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberCompare(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberCompareSignal(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberCompareTotal(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberCompareTotalMag(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberDivide(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberDivideInteger(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberExp(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberFMA(decNumber *, const decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberInvert(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberLn(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberLogB(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberLog10(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberMax(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberMaxMag(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberMin(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberMinMag(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberMinus(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberMultiply(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberNormalize(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberOr(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberPlus(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberPower(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberQuantize(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberReduce(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberRemainder(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberRemainderNear(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberRescale(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberRotate(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberSameQuantum(decNumber *, const decNumber *, const decNumber *);
+  decNumber * decNumberScaleB(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberShift(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberSquareRoot(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberSubtract(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberToIntegralExact(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberToIntegralValue(decNumber *, const decNumber *, decContext *);
+  decNumber * decNumberXor(decNumber *, const decNumber *, const decNumber *, decContext *);
+
+  /* Utilities                                                       */
+  enum decClass decNumberClass(const decNumber *, decContext *);
+  const char * decNumberClassToString(enum decClass);
+  decNumber  * decNumberCopy(decNumber *, const decNumber *);
+  decNumber  * decNumberCopyAbs(decNumber *, const decNumber *);
+  decNumber  * decNumberCopyNegate(decNumber *, const decNumber *);
+  decNumber  * decNumberCopySign(decNumber *, const decNumber *, const decNumber *);
+  decNumber  * decNumberNextMinus(decNumber *, const decNumber *, decContext *);
+  decNumber  * decNumberNextPlus(decNumber *, const decNumber *, decContext *);
+  decNumber  * decNumberNextToward(decNumber *, const decNumber *, const decNumber *, decContext *);
+  decNumber  * decNumberTrim(decNumber *);
+  const char * decNumberVersion(void);
+  decNumber  * decNumberZero(decNumber *);
+
+  /* Functions for testing decNumbers (normality depends on context)  */
+  int32_t decNumberIsNormal(const decNumber *, decContext *);
+  int32_t decNumberIsSubnormal(const decNumber *, decContext *);
+
+  /* Macros for testing decNumber *dn                                */
+  #define decNumberIsCanonical(dn) (1) /* All decNumbers are saintly */
+  #define decNumberIsFinite(dn)           (((dn)->bits&DECSPECIAL)==0)
+  #define decNumberIsInfinite(dn)  (((dn)->bits&DECINF)!=0)
+  #define decNumberIsNaN(dn)      (((dn)->bits&(DECNAN|DECSNAN))!=0)
+  #define decNumberIsNegative(dn)  (((dn)->bits&DECNEG)!=0)
+  #define decNumberIsQNaN(dn)     (((dn)->bits&(DECNAN))!=0)
+  #define decNumberIsSNaN(dn)     (((dn)->bits&(DECSNAN))!=0)
+  #define decNumberIsSpecial(dn)   (((dn)->bits&DECSPECIAL)!=0)
+  #define decNumberIsZero(dn)     (*(dn)->lsu==0 \
+                                   && (dn)->digits==1 \
+                                   && (((dn)->bits&DECSPECIAL)==0))
+  #define decNumberRadix(dn)      (10)
+
+#endif
diff --git a/include/libdecnumber/decNumberLocal.h b/include/libdecnumber/decNumberLocal.h
new file mode 100644 (file)
index 0000000..6198ca8
--- /dev/null
@@ -0,0 +1,663 @@
+/* Local definitions for the decNumber C Library.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   Contributed by IBM Corporation.  Author Mike Cowlishaw.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2, or (at your option) any later
+   version.
+
+   In addition to the permissions in the GNU General Public License,
+   the Free Software Foundation gives you unlimited permission to link
+   the compiled version of this file into combinations with other
+   programs, and to distribute those combinations without any
+   restriction coming from the use of this file.  (The General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into a combine executable.)
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+/* ------------------------------------------------------------------ */
+/* decNumber package local type, tuning, and macro definitions       */
+/* ------------------------------------------------------------------ */
+/* This header file is included by all modules in the decNumber              */
+/* library, and contains local type definitions, tuning parameters,   */
+/* etc.         It should not need to be used by application programs.       */
+/* decNumber.h or one of decDouble (etc.) must be included first.     */
+/* ------------------------------------------------------------------ */
+
+#ifndef DECNUMBERLOCAL_H
+#define DECNUMBERLOCAL_H
+
+  #define DECVERSION   "decNumber 3.53" /* Package Version [16 max.] */
+  #define DECNLAUTHOR  "Mike Cowlishaw"              /* Who to blame */
+
+  #include "libdecnumber/dconfig.h"
+  #include "libdecnumber/decContext.h"
+
+  /* Conditional code flag -- set this to match hardware platform     */
+  /* 1=little-endian, 0=big-endian                                   */
+  #if WORDS_BIGENDIAN
+  #define DECLITEND 0
+  #else
+  #define DECLITEND 1
+  #endif
+
+  /* Conditional code flag -- set this to 1 for best performance      */
+  #define DECUSE64  1        /* 1=use int64s, 0=int32 & smaller only */
+
+  /* Conditional check flags -- set these to 0 for best performance   */
+  #define DECCHECK  0        /* 1 to enable robust checking          */
+  #define DECALLOC  0        /* 1 to enable memory accounting        */
+  #define DECTRACE  0        /* 1 to trace certain internals, etc.   */
+
+  /* Tuning parameter for decNumber (arbitrary precision) module      */
+  #define DECBUFFER 36       /* Size basis for local buffers.  This  */
+                             /* should be a common maximum precision */
+                             /* rounded up to a multiple of 4; must  */
+                             /* be zero or positive.                 */
+
+  /* ---------------------------------------------------------------- */
+  /* Definitions for all modules (general-purpose)                   */
+  /* ---------------------------------------------------------------- */
+
+  /* Local names for common types -- for safety, decNumber modules do */
+  /* not use int or long directly.                                   */
+  #define Flag  uint8_t
+  #define Byte  int8_t
+  #define uByte         uint8_t
+  #define Short         int16_t
+  #define uShort uint16_t
+  #define Int   int32_t
+  #define uInt  uint32_t
+  #define Unit  decNumberUnit
+  #if DECUSE64
+  #define Long  int64_t
+  #define uLong         uint64_t
+  #endif
+
+  /* Development-use definitions                                     */
+  typedef long int LI;       /* for printf arguments only            */
+  #define DECNOINT  0        /* 1 to check no internal use of 'int'  */
+  #if DECNOINT
+    /* if these interfere with your C includes, do not set DECNOINT   */
+    #define  int ?           /* enable to ensure that plain C 'int'  */
+    #define  long ??         /* .. or 'long' types are not used      */
+  #endif
+
+  /* Shared lookup tables                                            */
+  extern const uByte  DECSTICKYTAB[10]; /* re-round digits if sticky  */
+  extern const uLong  DECPOWERS[20];    /* powers of ten table        */
+  /* The following are included from decDPD.h                        */
+  extern const uShort DPD2BIN[1024];   /* DPD -> 0-999               */
+  extern const uShort BIN2DPD[1000];   /* 0-999 -> DPD               */
+  extern const uInt   DPD2BINK[1024];  /* DPD -> 0-999000            */
+  extern const uInt   DPD2BINM[1024];  /* DPD -> 0-999000000         */
+  extern const uByte  DPD2BCD8[4096];  /* DPD -> ddd + len           */
+  extern const uByte  BIN2BCD8[4000];  /* 0-999 -> ddd + len         */
+  extern const uShort BCD2DPD[2458];   /* 0-0x999 -> DPD (0x999=2457)*/
+
+  /* LONGMUL32HI -- set w=(u*v)>>32, where w, u, and v are uInts      */
+  /* (that is, sets w to be the high-order word of the 64-bit result; */
+  /* the low-order word is simply u*v.)                                      */
+  /* This version is derived from Knuth via Hacker's Delight;        */
+  /* it seems to optimize better than some others tried                      */
+  #define LONGMUL32HI(w, u, v) {            \
+    uInt u0, u1, v0, v1, w0, w1, w2, t;             \
+    u0=u & 0xffff; u1=u>>16;                \
+    v0=v & 0xffff; v1=v>>16;                \
+    w0=u0*v0;                               \
+    t=u1*v0 + (w0>>16);                             \
+    w1=t & 0xffff; w2=t>>16;                \
+    w1=u0*v1 + w1;                          \
+    (w)=u1*v1 + w2 + (w1>>16);}
+
+  /* ROUNDUP -- round an integer up to a multiple of n               */
+  #define ROUNDUP(i, n) ((((i)+(n)-1)/n)*n)
+
+  /* ROUNDDOWN -- round an integer down to a multiple of n           */
+  #define ROUNDDOWN(i, n) (((i)/n)*n)
+  #define ROUNDDOWN4(i)          ((i)&~3)      /* special for n=4            */
+
+  /* References to multi-byte sequences under different sizes        */
+  /* Refer to a uInt from four bytes starting at a char* or uByte*,   */
+  /* etc.                                                            */
+  #define UINTAT(b)   (*((uInt  *)(b)))
+  #define USHORTAT(b) (*((uShort *)(b)))
+  #define UBYTEAT(b)  (*((uByte         *)(b)))
+
+  /* X10 and X100 -- multiply integer i by 10 or 100                 */
+  /* [shifts are usually faster than multiply; could be conditional]  */
+  #define X10(i)  (((i)<<1)+((i)<<3))
+  #define X100(i) (((i)<<2)+((i)<<5)+((i)<<6))
+
+  /* MAXI and MINI -- general max & min (not in ANSI) for integers    */
+  #define MAXI(x,y) ((x)<(y)?(y):(x))
+  #define MINI(x,y) ((x)>(y)?(y):(x))
+
+  /* Useful constants                                                */
+  #define BILLION      1000000000           /* 10**9                 */
+  /* CHARMASK: 0x30303030 for ASCII/UTF8; 0xF0F0F0F0 for EBCDIC              */
+  #define CHARMASK ((((((((uInt)'0')<<8)+'0')<<8)+'0')<<8)+'0')
+
+
+  /* ---------------------------------------------------------------- */
+  /* Definitions for arbitrary-precision modules (only valid after    */
+  /* decNumber.h has been included)                                  */
+  /* ---------------------------------------------------------------- */
+
+  /* Limits and constants                                            */
+  #define DECNUMMAXP 999999999 /* maximum precision code can handle  */
+  #define DECNUMMAXE 999999999 /* maximum adjusted exponent ditto    */
+  #define DECNUMMINE -999999999 /* minimum adjusted exponent ditto    */
+  #if (DECNUMMAXP != DEC_MAX_DIGITS)
+    #error Maximum digits mismatch
+  #endif
+  #if (DECNUMMAXE != DEC_MAX_EMAX)
+    #error Maximum exponent mismatch
+  #endif
+  #if (DECNUMMINE != DEC_MIN_EMIN)
+    #error Minimum exponent mismatch
+  #endif
+
+  /* Set DECDPUNMAX -- the maximum integer that fits in DECDPUN              */
+  /* digits, and D2UTABLE -- the initializer for the D2U table       */
+  #if  DECDPUN==1
+    #define DECDPUNMAX 9
+    #define D2UTABLE {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,  \
+                     18,19,20,21,22,23,24,25,26,27,28,29,30,31,32, \
+                     33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, \
+                     48,49}
+  #elif DECDPUN==2
+    #define DECDPUNMAX 99
+    #define D2UTABLE {0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,  \
+                     11,11,12,12,13,13,14,14,15,15,16,16,17,17,18, \
+                     18,19,19,20,20,21,21,22,22,23,23,24,24,25}
+  #elif DECDPUN==3
+    #define DECDPUNMAX 999
+    #define D2UTABLE {0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7,  \
+                     8,8,8,9,9,9,10,10,10,11,11,11,12,12,12,13,13, \
+                     13,14,14,14,15,15,15,16,16,16,17}
+  #elif DECDPUN==4
+    #define DECDPUNMAX 9999
+    #define D2UTABLE {0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,  \
+                     6,6,6,7,7,7,7,8,8,8,8,9,9,9,9,10,10,10,10,11, \
+                     11,11,11,12,12,12,12,13}
+  #elif DECDPUN==5
+    #define DECDPUNMAX 99999
+    #define D2UTABLE {0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,  \
+                     5,5,5,5,6,6,6,6,6,7,7,7,7,7,8,8,8,8,8,9,9,9,  \
+                     9,9,10,10,10,10}
+  #elif DECDPUN==6
+    #define DECDPUNMAX 999999
+    #define D2UTABLE {0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,  \
+                     4,4,4,5,5,5,5,5,5,6,6,6,6,6,6,7,7,7,7,7,7,8,  \
+                     8,8,8,8,8,9}
+  #elif DECDPUN==7
+    #define DECDPUNMAX 9999999
+    #define D2UTABLE {0,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3,  \
+                     4,4,4,4,4,4,4,5,5,5,5,5,5,5,6,6,6,6,6,6,6,7,  \
+                     7,7,7,7,7,7}
+  #elif DECDPUN==8
+    #define DECDPUNMAX 99999999
+    #define D2UTABLE {0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,  \
+                     3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,  \
+                     6,6,6,6,6,7}
+  #elif DECDPUN==9
+    #define DECDPUNMAX 999999999
+    #define D2UTABLE {0,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,  \
+                     3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,  \
+                     5,5,6,6,6,6}
+  #elif defined(DECDPUN)
+    #error DECDPUN must be in the range 1-9
+  #endif
+
+  /* ----- Shared data (in decNumber.c) ----- */
+  /* Public lookup table used by the D2U macro (see below)           */
+  #define DECMAXD2U 49
+  extern const uByte d2utable[DECMAXD2U+1];
+
+  /* ----- Macros ----- */
+  /* ISZERO -- return true if decNumber dn is a zero                 */
+  /* [performance-critical in some situations]                       */
+  #define ISZERO(dn) decNumberIsZero(dn)     /* now just a local name */
+
+  /* D2U -- return the number of Units needed to hold d digits       */
+  /* (runtime version, with table lookaside for small d)             */
+  #if DECDPUN==8
+    #define D2U(d) ((unsigned)((d)<=DECMAXD2U?d2utable[d]:((d)+7)>>3))
+  #elif DECDPUN==4
+    #define D2U(d) ((unsigned)((d)<=DECMAXD2U?d2utable[d]:((d)+3)>>2))
+  #else
+    #define D2U(d) ((d)<=DECMAXD2U?d2utable[d]:((d)+DECDPUN-1)/DECDPUN)
+  #endif
+  /* SD2U -- static D2U macro (for compile-time calculation)         */
+  #define SD2U(d) (((d)+DECDPUN-1)/DECDPUN)
+
+  /* MSUDIGITS -- returns digits in msu, from digits, calculated      */
+  /* using D2U                                                       */
+  #define MSUDIGITS(d) ((d)-(D2U(d)-1)*DECDPUN)
+
+  /* D2N -- return the number of decNumber structs that would be      */
+  /* needed to contain that number of digits (and the initial        */
+  /* decNumber struct) safely. Note that one Unit is included in the */
+  /* initial structure.         Used for allocating space that is aligned on */
+  /* a decNumber struct boundary. */
+  #define D2N(d) \
+    ((((SD2U(d)-1)*sizeof(Unit))+sizeof(decNumber)*2-1)/sizeof(decNumber))
+
+  /* TODIGIT -- macro to remove the leading digit from the unsigned   */
+  /* integer u at column cut (counting from the right, LSD=0) and     */
+  /* place it as an ASCII character into the character pointed to by  */
+  /* c.         Note that cut must be <= 9, and the maximum value for u is   */
+  /* 2,000,000,000 (as is needed for negative exponents of           */
+  /* subnormals).  The unsigned integer pow is used as a temporary    */
+  /* variable. */
+  #define TODIGIT(u, cut, c, pow) {      \
+    *(c)='0';                            \
+    pow=DECPOWERS[cut]*2;                \
+    if ((u)>pow) {                       \
+      pow*=4;                            \
+      if ((u)>=pow) {(u)-=pow; *(c)+=8;}  \
+      pow/=2;                            \
+      if ((u)>=pow) {(u)-=pow; *(c)+=4;}  \
+      pow/=2;                            \
+      }                                          \
+    if ((u)>=pow) {(u)-=pow; *(c)+=2;}   \
+    pow/=2;                              \
+    if ((u)>=pow) {(u)-=pow; *(c)+=1;}   \
+    }
+
+  /* ---------------------------------------------------------------- */
+  /* Definitions for fixed-precision modules (only valid after       */
+  /* decSingle.h, decDouble.h, or decQuad.h has been included)       */
+  /* ---------------------------------------------------------------- */
+
+  /* bcdnum -- a structure describing a format-independent finite     */
+  /* number, whose coefficient is a string of bcd8 uBytes            */
+  typedef struct {
+    uByte   *msd;            /* -> most significant digit            */
+    uByte   *lsd;            /* -> least ditto                       */
+    uInt     sign;           /* 0=positive, DECFLOAT_Sign=negative   */
+    Int             exponent;        /* Unadjusted signed exponent (q), or   */
+                             /* DECFLOAT_NaN etc. for a special      */
+    } bcdnum;
+
+  /* Test if exponent or bcdnum exponent must be a special, etc.      */
+  #define EXPISSPECIAL(exp) ((exp)>=DECFLOAT_MinSp)
+  #define EXPISINF(exp) (exp==DECFLOAT_Inf)
+  #define EXPISNAN(exp) (exp==DECFLOAT_qNaN || exp==DECFLOAT_sNaN)
+  #define NUMISSPECIAL(num) (EXPISSPECIAL((num)->exponent))
+
+  /* Refer to a 32-bit word or byte in a decFloat (df) by big-endian  */
+  /* (array) notation (the 0 word or byte contains the sign bit),     */
+  /* automatically adjusting for endianness; similarly address a word */
+  /* in the next-wider format (decFloatWider, or dfw)                */
+  #define DECWORDS  (DECBYTES/4)
+  #define DECWWORDS (DECWBYTES/4)
+  #if DECLITEND
+    #define DFWORD(df, off) ((df)->words[DECWORDS-1-(off)])
+    #define DFBYTE(df, off) ((df)->bytes[DECBYTES-1-(off)])
+    #define DFWWORD(dfw, off) ((dfw)->words[DECWWORDS-1-(off)])
+  #else
+    #define DFWORD(df, off) ((df)->words[off])
+    #define DFBYTE(df, off) ((df)->bytes[off])
+    #define DFWWORD(dfw, off) ((dfw)->words[off])
+  #endif
+
+  /* Tests for sign or specials, directly on DECFLOATs               */
+  #define DFISSIGNED(df)   (DFWORD(df, 0)&0x80000000)
+  #define DFISSPECIAL(df) ((DFWORD(df, 0)&0x78000000)==0x78000000)
+  #define DFISINF(df)    ((DFWORD(df, 0)&0x7c000000)==0x78000000)
+  #define DFISNAN(df)    ((DFWORD(df, 0)&0x7c000000)==0x7c000000)
+  #define DFISQNAN(df)   ((DFWORD(df, 0)&0x7e000000)==0x7c000000)
+  #define DFISSNAN(df)   ((DFWORD(df, 0)&0x7e000000)==0x7e000000)
+
+  /* Shared lookup tables                                            */
+  extern const uInt   DECCOMBMSD[64];  /* Combination field -> MSD   */
+  extern const uInt   DECCOMBFROM[48]; /* exp+msd -> Combination     */
+
+  /* Private generic (utility) routine                               */
+  #if DECCHECK || DECTRACE
+    extern void decShowNum(const bcdnum *, const char *);
+  #endif
+
+  /* Format-dependent macros and constants                           */
+  #if defined(DECPMAX)
+
+    /* Useful constants                                                      */
+    #define DECPMAX9  (ROUNDUP(DECPMAX, 9)/9)  /* 'Pmax' in 10**9s    */
+    /* Top words for a zero                                          */
+    #define SINGLEZERO  0x22500000
+    #define DOUBLEZERO  0x22380000
+    #define QUADZERO    0x22080000
+    /* [ZEROWORD is defined to be one of these in the DFISZERO macro] */
+
+    /* Format-dependent common tests:                                */
+    /*  DFISZERO   -- test for (any) zero                            */
+    /*  DFISCCZERO -- test for coefficient continuation being zero   */
+    /*  DFISCC01   -- test for coefficient contains only 0s and 1s   */
+    /*  DFISINT    -- test for finite and exponent q=0               */
+    /*  DFISUINT01 -- test for sign=0, finite, exponent q=0, and     */
+    /*                MSD=0 or 1                                     */
+    /*  ZEROWORD is also defined here.                               */
+    /* In DFISZERO the first test checks the least-significant word   */
+    /* (most likely to be non-zero); the penultimate tests MSD and    */
+    /* DPDs in the signword, and the final test excludes specials and */
+    /* MSD>7.  DFISINT similarly has to allow for the two forms of    */
+    /* MSD codes.  DFISUINT01 only has to allow for one form of MSD   */
+    /* code.                                                         */
+    #if DECPMAX==7
+      #define ZEROWORD SINGLEZERO
+      /* [test macros not needed except for Zero]                    */
+      #define DFISZERO(df)  ((DFWORD(df, 0)&0x1c0fffff)==0        \
+                         && (DFWORD(df, 0)&0x60000000)!=0x60000000)
+    #elif DECPMAX==16
+      #define ZEROWORD DOUBLEZERO
+      #define DFISZERO(df)  ((DFWORD(df, 1)==0                    \
+                         && (DFWORD(df, 0)&0x1c03ffff)==0         \
+                         && (DFWORD(df, 0)&0x60000000)!=0x60000000))
+      #define DFISINT(df) ((DFWORD(df, 0)&0x63fc0000)==0x22380000  \
+                        ||(DFWORD(df, 0)&0x7bfc0000)==0x6a380000)
+      #define DFISUINT01(df) ((DFWORD(df, 0)&0xfbfc0000)==0x22380000)
+      #define DFISCCZERO(df) (DFWORD(df, 1)==0                    \
+                         && (DFWORD(df, 0)&0x0003ffff)==0)
+      #define DFISCC01(df)  ((DFWORD(df, 0)&~0xfffc9124)==0       \
+                         && (DFWORD(df, 1)&~0x49124491)==0)
+    #elif DECPMAX==34
+      #define ZEROWORD QUADZERO
+      #define DFISZERO(df)  ((DFWORD(df, 3)==0                    \
+                         &&  DFWORD(df, 2)==0                     \
+                         &&  DFWORD(df, 1)==0                     \
+                         && (DFWORD(df, 0)&0x1c003fff)==0         \
+                         && (DFWORD(df, 0)&0x60000000)!=0x60000000))
+      #define DFISINT(df) ((DFWORD(df, 0)&0x63ffc000)==0x22080000  \
+                        ||(DFWORD(df, 0)&0x7bffc000)==0x6a080000)
+      #define DFISUINT01(df) ((DFWORD(df, 0)&0xfbffc000)==0x22080000)
+      #define DFISCCZERO(df) (DFWORD(df, 3)==0                    \
+                         &&  DFWORD(df, 2)==0                     \
+                         &&  DFWORD(df, 1)==0                     \
+                         && (DFWORD(df, 0)&0x00003fff)==0)
+
+      #define DFISCC01(df)   ((DFWORD(df, 0)&~0xffffc912)==0      \
+                         &&  (DFWORD(df, 1)&~0x44912449)==0       \
+                         &&  (DFWORD(df, 2)&~0x12449124)==0       \
+                         &&  (DFWORD(df, 3)&~0x49124491)==0)
+    #endif
+
+    /* Macros to test if a certain 10 bits of a uInt or pair of uInts */
+    /* are a canonical declet [higher or lower bits are ignored].     */
+    /* declet is at offset 0 (from the right) in a uInt:             */
+    #define CANONDPD(dpd) (((dpd)&0x300)==0 || ((dpd)&0x6e)!=0x6e)
+    /* declet is at offset k (a multiple of 2) in a uInt:            */
+    #define CANONDPDOFF(dpd, k) (((dpd)&(0x300<<(k)))==0           \
+      || ((dpd)&(((uInt)0x6e)<<(k)))!=(((uInt)0x6e)<<(k)))
+    /* declet is at offset k (a multiple of 2) in a pair of uInts:    */
+    /* [the top 2 bits will always be in the more-significant uInt]   */
+    #define CANONDPDTWO(hi, lo, k) (((hi)&(0x300>>(32-(k))))==0            \
+      || ((hi)&(0x6e>>(32-(k))))!=(0x6e>>(32-(k)))                 \
+      || ((lo)&(((uInt)0x6e)<<(k)))!=(((uInt)0x6e)<<(k)))
+
+    /* Macro to test whether a full-length (length DECPMAX) BCD8      */
+    /* coefficient is zero                                           */
+    /* test just the LSWord first, then the remainder                */
+    #if DECPMAX==7
+      #define ISCOEFFZERO(u) (UINTAT((u)+DECPMAX-4)==0             \
+       && UINTAT((u)+DECPMAX-7)==0)
+    #elif DECPMAX==16
+      #define ISCOEFFZERO(u) (UINTAT((u)+DECPMAX-4)==0             \
+       && (UINTAT((u)+DECPMAX-8)+UINTAT((u)+DECPMAX-12)            \
+          +UINTAT((u)+DECPMAX-16))==0)
+    #elif DECPMAX==34
+      #define ISCOEFFZERO(u) (UINTAT((u)+DECPMAX-4)==0             \
+       && (UINTAT((u)+DECPMAX-8) +UINTAT((u)+DECPMAX-12)           \
+          +UINTAT((u)+DECPMAX-16)+UINTAT((u)+DECPMAX-20)           \
+          +UINTAT((u)+DECPMAX-24)+UINTAT((u)+DECPMAX-28)           \
+          +UINTAT((u)+DECPMAX-32)+USHORTAT((u)+DECPMAX-34))==0)
+    #endif
+
+    /* Macros and masks for the exponent continuation field and MSD   */
+    /* Get the exponent continuation from a decFloat *df as an Int    */
+    #define GETECON(df) ((Int)((DFWORD((df), 0)&0x03ffffff)>>(32-6-DECECONL)))
+    /* Ditto, from the next-wider format                             */
+    #define GETWECON(df) ((Int)((DFWWORD((df), 0)&0x03ffffff)>>(32-6-DECWECONL)))
+    /* Get the biased exponent similarly                             */
+    #define GETEXP(df) ((Int)(DECCOMBEXP[DFWORD((df), 0)>>26]+GETECON(df)))
+    /* Get the unbiased exponent similarly                           */
+    #define GETEXPUN(df) ((Int)GETEXP(df)-DECBIAS)
+    /* Get the MSD similarly (as uInt)                               */
+    #define GETMSD(df)  (DECCOMBMSD[DFWORD((df), 0)>>26])
+
+    /* Compile-time computes of the exponent continuation field masks */
+    /* full exponent continuation field:                             */
+    #define ECONMASK ((0x03ffffff>>(32-6-DECECONL))<<(32-6-DECECONL))
+    /* same, not including its first digit (the qNaN/sNaN selector):  */
+    #define ECONNANMASK ((0x01ffffff>>(32-6-DECECONL))<<(32-6-DECECONL))
+
+    /* Macros to decode the coefficient in a finite decFloat *df into */
+    /* a BCD string (uByte *bcdin) of length DECPMAX uBytes          */
+
+    /* In-line sequence to convert 10 bits at right end of uInt dpd   */
+    /* to three BCD8 digits starting at uByte u.  Note that an extra  */
+    /* byte is written to the right of the three digits because this  */
+    /* moves four at a time for speed; the alternative macro moves    */
+    /* exactly three bytes                                           */
+    #define dpd2bcd8(u, dpd) {                          \
+      UINTAT(u)=UINTAT(&DPD2BCD8[((dpd)&0x3ff)*4]);}
+
+    #define dpd2bcd83(u, dpd) {                                 \
+      *(u)=DPD2BCD8[((dpd)&0x3ff)*4];                   \
+      *(u+1)=DPD2BCD8[((dpd)&0x3ff)*4+1];               \
+      *(u+2)=DPD2BCD8[((dpd)&0x3ff)*4+2];}
+
+    /* Decode the declets.  After extracting each one, it is decoded  */
+    /* to BCD8 using a table lookup (also used for variable-length    */
+    /* decode).         Each DPD decode is 3 bytes BCD8 plus a one-byte      */
+    /* length which is not used, here).         Fixed-length 4-byte moves    */
+    /* are fast, however, almost everywhere, and so are used except   */
+    /* for the final three bytes (to avoid overrun).  The code below  */
+    /* is 36 instructions for Doubles and about 70 for Quads, even    */
+    /* on IA32.                                                              */
+
+    /* Two macros are defined for each format:                       */
+    /*  GETCOEFF extracts the coefficient of the current format      */
+    /*  GETWCOEFF extracts the coefficient of the next-wider format. */
+    /* The latter is a copy of the next-wider GETCOEFF using DFWWORD. */
+
+    #if DECPMAX==7
+    #define GETCOEFF(df, bcd) {                                 \
+      uInt sourhi=DFWORD(df, 0);                        \
+      *(bcd)=(uByte)DECCOMBMSD[sourhi>>26];             \
+      dpd2bcd8(bcd+1, sourhi>>10);                      \
+      dpd2bcd83(bcd+4, sourhi);}
+    #define GETWCOEFF(df, bcd) {                        \
+      uInt sourhi=DFWWORD(df, 0);                       \
+      uInt sourlo=DFWWORD(df, 1);                       \
+      *(bcd)=(uByte)DECCOMBMSD[sourhi>>26];             \
+      dpd2bcd8(bcd+1, sourhi>>8);                       \
+      dpd2bcd8(bcd+4, (sourhi<<2) | (sourlo>>30));      \
+      dpd2bcd8(bcd+7, sourlo>>20);                      \
+      dpd2bcd8(bcd+10, sourlo>>10);                     \
+      dpd2bcd83(bcd+13, sourlo);}
+
+    #elif DECPMAX==16
+    #define GETCOEFF(df, bcd) {                                 \
+      uInt sourhi=DFWORD(df, 0);                        \
+      uInt sourlo=DFWORD(df, 1);                        \
+      *(bcd)=(uByte)DECCOMBMSD[sourhi>>26];             \
+      dpd2bcd8(bcd+1, sourhi>>8);                       \
+      dpd2bcd8(bcd+4, (sourhi<<2) | (sourlo>>30));      \
+      dpd2bcd8(bcd+7, sourlo>>20);                      \
+      dpd2bcd8(bcd+10, sourlo>>10);                     \
+      dpd2bcd83(bcd+13, sourlo);}
+    #define GETWCOEFF(df, bcd) {                        \
+      uInt sourhi=DFWWORD(df, 0);                       \
+      uInt sourmh=DFWWORD(df, 1);                       \
+      uInt sourml=DFWWORD(df, 2);                       \
+      uInt sourlo=DFWWORD(df, 3);                       \
+      *(bcd)=(uByte)DECCOMBMSD[sourhi>>26];             \
+      dpd2bcd8(bcd+1, sourhi>>4);                       \
+      dpd2bcd8(bcd+4, ((sourhi)<<6) | (sourmh>>26));    \
+      dpd2bcd8(bcd+7, sourmh>>16);                      \
+      dpd2bcd8(bcd+10, sourmh>>6);                      \
+      dpd2bcd8(bcd+13, ((sourmh)<<4) | (sourml>>28));   \
+      dpd2bcd8(bcd+16, sourml>>18);                     \
+      dpd2bcd8(bcd+19, sourml>>8);                      \
+      dpd2bcd8(bcd+22, ((sourml)<<2) | (sourlo>>30));   \
+      dpd2bcd8(bcd+25, sourlo>>20);                     \
+      dpd2bcd8(bcd+28, sourlo>>10);                     \
+      dpd2bcd83(bcd+31, sourlo);}
+
+    #elif DECPMAX==34
+    #define GETCOEFF(df, bcd) {                                 \
+      uInt sourhi=DFWORD(df, 0);                        \
+      uInt sourmh=DFWORD(df, 1);                        \
+      uInt sourml=DFWORD(df, 2);                        \
+      uInt sourlo=DFWORD(df, 3);                        \
+      *(bcd)=(uByte)DECCOMBMSD[sourhi>>26];             \
+      dpd2bcd8(bcd+1, sourhi>>4);                       \
+      dpd2bcd8(bcd+4, ((sourhi)<<6) | (sourmh>>26));    \
+      dpd2bcd8(bcd+7, sourmh>>16);                      \
+      dpd2bcd8(bcd+10, sourmh>>6);                      \
+      dpd2bcd8(bcd+13, ((sourmh)<<4) | (sourml>>28));   \
+      dpd2bcd8(bcd+16, sourml>>18);                     \
+      dpd2bcd8(bcd+19, sourml>>8);                      \
+      dpd2bcd8(bcd+22, ((sourml)<<2) | (sourlo>>30));   \
+      dpd2bcd8(bcd+25, sourlo>>20);                     \
+      dpd2bcd8(bcd+28, sourlo>>10);                     \
+      dpd2bcd83(bcd+31, sourlo);}
+
+      #define GETWCOEFF(df, bcd) {??} /* [should never be used]              */
+    #endif
+
+    /* Macros to decode the coefficient in a finite decFloat *df into */
+    /* a base-billion uInt array, with the least-significant         */
+    /* 0-999999999 'digit' at offset 0.                                      */
+
+    /* Decode the declets.  After extracting each one, it is decoded  */
+    /* to binary using a table lookup. Three tables are used; one    */
+    /* the usual DPD to binary, the other two pre-multiplied by 1000  */
+    /* and 1000000 to avoid multiplication during decode.  These      */
+    /* tables can also be used for multiplying up the MSD as the DPD  */
+    /* code for 0 through 9 is the identity.                         */
+    #define DPD2BIN0 DPD2BIN        /* for prettier code             */
+
+    #if DECPMAX==7
+    #define GETCOEFFBILL(df, buf) {                          \
+      uInt sourhi=DFWORD(df, 0);                             \
+      (buf)[0]=DPD2BIN0[sourhi&0x3ff]                        \
+             +DPD2BINK[(sourhi>>10)&0x3ff]                   \
+             +DPD2BINM[DECCOMBMSD[sourhi>>26]];}
+
+    #elif DECPMAX==16
+    #define GETCOEFFBILL(df, buf) {                          \
+      uInt sourhi, sourlo;                                   \
+      sourlo=DFWORD(df, 1);                                  \
+      (buf)[0]=DPD2BIN0[sourlo&0x3ff]                        \
+             +DPD2BINK[(sourlo>>10)&0x3ff]                   \
+             +DPD2BINM[(sourlo>>20)&0x3ff];                  \
+      sourhi=DFWORD(df, 0);                                  \
+      (buf)[1]=DPD2BIN0[((sourhi<<2) | (sourlo>>30))&0x3ff]   \
+             +DPD2BINK[(sourhi>>8)&0x3ff]                    \
+             +DPD2BINM[DECCOMBMSD[sourhi>>26]];}
+
+    #elif DECPMAX==34
+    #define GETCOEFFBILL(df, buf) {                          \
+      uInt sourhi, sourmh, sourml, sourlo;                   \
+      sourlo=DFWORD(df, 3);                                  \
+      (buf)[0]=DPD2BIN0[sourlo&0x3ff]                        \
+             +DPD2BINK[(sourlo>>10)&0x3ff]                   \
+             +DPD2BINM[(sourlo>>20)&0x3ff];                  \
+      sourml=DFWORD(df, 2);                                  \
+      (buf)[1]=DPD2BIN0[((sourml<<2) | (sourlo>>30))&0x3ff]   \
+             +DPD2BINK[(sourml>>8)&0x3ff]                    \
+             +DPD2BINM[(sourml>>18)&0x3ff];                  \
+      sourmh=DFWORD(df, 1);                                  \
+      (buf)[2]=DPD2BIN0[((sourmh<<4) | (sourml>>28))&0x3ff]   \
+             +DPD2BINK[(sourmh>>6)&0x3ff]                    \
+             +DPD2BINM[(sourmh>>16)&0x3ff];                  \
+      sourhi=DFWORD(df, 0);                                  \
+      (buf)[3]=DPD2BIN0[((sourhi<<6) | (sourmh>>26))&0x3ff]   \
+             +DPD2BINK[(sourhi>>4)&0x3ff]                    \
+             +DPD2BINM[DECCOMBMSD[sourhi>>26]];}
+
+    #endif
+
+    /* Macros to decode the coefficient in a finite decFloat *df into */
+    /* a base-thousand uInt array, with the least-significant 0-999   */
+    /* 'digit' at offset 0.                                          */
+
+    /* Decode the declets.  After extracting each one, it is decoded  */
+    /* to binary using a table lookup.                               */
+    #if DECPMAX==7
+    #define GETCOEFFTHOU(df, buf) {                          \
+      uInt sourhi=DFWORD(df, 0);                             \
+      (buf)[0]=DPD2BIN[sourhi&0x3ff];                        \
+      (buf)[1]=DPD2BIN[(sourhi>>10)&0x3ff];                  \
+      (buf)[2]=DECCOMBMSD[sourhi>>26];}
+
+    #elif DECPMAX==16
+    #define GETCOEFFTHOU(df, buf) {                          \
+      uInt sourhi, sourlo;                                   \
+      sourlo=DFWORD(df, 1);                                  \
+      (buf)[0]=DPD2BIN[sourlo&0x3ff];                        \
+      (buf)[1]=DPD2BIN[(sourlo>>10)&0x3ff];                  \
+      (buf)[2]=DPD2BIN[(sourlo>>20)&0x3ff];                  \
+      sourhi=DFWORD(df, 0);                                  \
+      (buf)[3]=DPD2BIN[((sourhi<<2) | (sourlo>>30))&0x3ff];   \
+      (buf)[4]=DPD2BIN[(sourhi>>8)&0x3ff];                   \
+      (buf)[5]=DECCOMBMSD[sourhi>>26];}
+
+    #elif DECPMAX==34
+    #define GETCOEFFTHOU(df, buf) {                          \
+      uInt sourhi, sourmh, sourml, sourlo;                   \
+      sourlo=DFWORD(df, 3);                                  \
+      (buf)[0]=DPD2BIN[sourlo&0x3ff];                        \
+      (buf)[1]=DPD2BIN[(sourlo>>10)&0x3ff];                  \
+      (buf)[2]=DPD2BIN[(sourlo>>20)&0x3ff];                  \
+      sourml=DFWORD(df, 2);                                  \
+      (buf)[3]=DPD2BIN[((sourml<<2) | (sourlo>>30))&0x3ff];   \
+      (buf)[4]=DPD2BIN[(sourml>>8)&0x3ff];                   \
+      (buf)[5]=DPD2BIN[(sourml>>18)&0x3ff];                  \
+      sourmh=DFWORD(df, 1);                                  \
+      (buf)[6]=DPD2BIN[((sourmh<<4) | (sourml>>28))&0x3ff];   \
+      (buf)[7]=DPD2BIN[(sourmh>>6)&0x3ff];                   \
+      (buf)[8]=DPD2BIN[(sourmh>>16)&0x3ff];                  \
+      sourhi=DFWORD(df, 0);                                  \
+      (buf)[9]=DPD2BIN[((sourhi<<6) | (sourmh>>26))&0x3ff];   \
+      (buf)[10]=DPD2BIN[(sourhi>>4)&0x3ff];                  \
+      (buf)[11]=DECCOMBMSD[sourhi>>26];}
+
+    #endif
+
+    /* Set a decFloat to the maximum positive finite number (Nmax)    */
+    #if DECPMAX==7
+    #define DFSETNMAX(df)           \
+      {DFWORD(df, 0)=0x77f3fcff;}
+    #elif DECPMAX==16
+    #define DFSETNMAX(df)           \
+      {DFWORD(df, 0)=0x77fcff3f;     \
+       DFWORD(df, 1)=0xcff3fcff;}
+    #elif DECPMAX==34
+    #define DFSETNMAX(df)           \
+      {DFWORD(df, 0)=0x77ffcff3;     \
+       DFWORD(df, 1)=0xfcff3fcf;     \
+       DFWORD(df, 2)=0xf3fcff3f;     \
+       DFWORD(df, 3)=0xcff3fcff;}
+    #endif
+
+  /* [end of format-dependent macros and constants]                  */
+  #endif
+
+#endif
diff --git a/include/libdecnumber/dpd/decimal128.h b/include/libdecnumber/dpd/decimal128.h
new file mode 100644 (file)
index 0000000..aff261e
--- /dev/null
@@ -0,0 +1,99 @@
+/* Decimal 128-bit format module header for the decNumber C Library.
+   Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+   Contributed by IBM Corporation.  Author Mike Cowlishaw.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2, or (at your option) any later
+   version.
+
+   In addition to the permissions in the GNU General Public License,
+   the Free Software Foundation gives you unlimited permission to link
+   the compiled version of this file into combinations with other
+   programs, and to distribute those combinations without any
+   restriction coming from the use of this file.  (The General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into a combine executable.)
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+/* ------------------------------------------------------------------ */
+/* Decimal 128-bit format module header                                      */
+/* ------------------------------------------------------------------ */
+
+#ifndef DECIMAL128_H
+#define DECIMAL128_H
+
+  #define DEC128NAME    "decimal128"                 /* Short name   */
+  #define DEC128FULLNAME "Decimal 128-bit Number"     /* Verbose name */
+  #define DEC128AUTHOR  "Mike Cowlishaw"             /* Who to blame */
+
+  /* parameters for decimal128s */
+  #define DECIMAL128_Bytes  16         /* length                     */
+  #define DECIMAL128_Pmax   34         /* maximum precision (digits) */
+  #define DECIMAL128_Emax   6144       /* maximum adjusted exponent  */
+  #define DECIMAL128_Emin  -6143       /* minimum adjusted exponent  */
+  #define DECIMAL128_Bias   6176       /* bias for the exponent      */
+  #define DECIMAL128_String 43         /* maximum string length, +1  */
+  #define DECIMAL128_EconL  12         /* exp. continuation length   */
+  /* highest biased exponent (Elimit-1)                                      */
+  #define DECIMAL128_Ehigh  (DECIMAL128_Emax+DECIMAL128_Bias-DECIMAL128_Pmax+1)
+
+  /* check enough digits, if pre-defined                             */
+  #if defined(DECNUMDIGITS)
+    #if (DECNUMDIGITS<DECIMAL128_Pmax)
+      #error decimal128.h needs pre-defined DECNUMDIGITS>=34 for safe use
+    #endif
+  #endif
+
+  #ifndef DECNUMDIGITS
+    #define DECNUMDIGITS DECIMAL128_Pmax /* size if not already defined*/
+  #endif
+  #include "libdecnumber/decNumber.h"
+
+  /* Decimal 128-bit type, accessible by bytes                       */
+  typedef struct {
+    uint8_t bytes[DECIMAL128_Bytes]; /* decimal128: 1, 5, 12, 110 bits*/
+    } decimal128;
+
+  /* special values [top byte excluding sign bit; last two bits are   */
+  /* don't-care for Infinity on input, last bit don't-care for NaN]   */
+  #if !defined(DECIMAL_NaN)
+    #define DECIMAL_NaN            0x7c        /* 0 11111 00 NaN             */
+    #define DECIMAL_sNaN    0x7e       /* 0 11111 10 sNaN            */
+    #define DECIMAL_Inf            0x78        /* 0 11110 00 Infinity        */
+  #endif
+
+  #include "decimal128Local.h"
+
+  /* ---------------------------------------------------------------- */
+  /* Routines                                                        */
+  /* ---------------------------------------------------------------- */
+
+
+  /* String conversions                                                      */
+  decimal128 * decimal128FromString(decimal128 *, const char *, decContext *);
+  char * decimal128ToString(const decimal128 *, char *);
+  char * decimal128ToEngString(const decimal128 *, char *);
+
+  /* decNumber conversions                                           */
+  decimal128 * decimal128FromNumber(decimal128 *, const decNumber *,
+                                   decContext *);
+  decNumber * decimal128ToNumber(const decimal128 *, decNumber *);
+
+  /* Format-dependent utilities                                              */
+  uint32_t    decimal128IsCanonical(const decimal128 *);
+  decimal128 * decimal128Canonical(decimal128 *, const decimal128 *);
+
+#endif
diff --git a/include/libdecnumber/dpd/decimal128Local.h b/include/libdecnumber/dpd/decimal128Local.h
new file mode 100644 (file)
index 0000000..9765427
--- /dev/null
@@ -0,0 +1,47 @@
+/* Local definitions for use with the decNumber C Library.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2, or (at your option) any later
+   version.
+
+   In addition to the permissions in the GNU General Public License,
+   the Free Software Foundation gives you unlimited permission to link
+   the compiled version of this file into combinations with other
+   programs, and to distribute those combinations without any
+   restriction coming from the use of this file.  (The General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into a combine executable.)
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+#if !defined(DECIMAL128LOCAL)
+
+/* The compiler needs sign manipulation functions for decimal128 which
+   are not part of the decNumber package.  */
+
+/* Set sign; this assumes the sign was previously zero.  */
+#define decimal128SetSign(d,b) \
+  { (d)->bytes[WORDS_BIGENDIAN ? 0 : 15] |= ((unsigned) (b) << 7); }
+
+/* Clear sign.  */
+#define decimal128ClearSign(d) \
+  { (d)->bytes[WORDS_BIGENDIAN ? 0 : 15] &= ~0x80; }
+
+/* Flip sign.  */
+#define decimal128FlipSign(d) \
+  { (d)->bytes[WORDS_BIGENDIAN ? 0 : 15] ^= 0x80; }
+
+#endif
diff --git a/include/libdecnumber/dpd/decimal32.h b/include/libdecnumber/dpd/decimal32.h
new file mode 100644 (file)
index 0000000..6cb9e43
--- /dev/null
@@ -0,0 +1,97 @@
+/* Decimal 32-bit format module header for the decNumber C Library.
+   Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+   Contributed by IBM Corporation.  Author Mike Cowlishaw.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2, or (at your option) any later
+   version.
+
+   In addition to the permissions in the GNU General Public License,
+   the Free Software Foundation gives you unlimited permission to link
+   the compiled version of this file into combinations with other
+   programs, and to distribute those combinations without any
+   restriction coming from the use of this file.  (The General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into a combine executable.)
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+/* ------------------------------------------------------------------ */
+/* Decimal 32-bit format module header                               */
+/* ------------------------------------------------------------------ */
+
+#ifndef DECIMAL32_H
+#define DECIMAL32_H
+
+  #define DEC32NAME    "decimal32"                   /* Short name   */
+  #define DEC32FULLNAME "Decimal 32-bit Number"              /* Verbose name */
+  #define DEC32AUTHOR  "Mike Cowlishaw"              /* Who to blame */
+
+  /* parameters for decimal32s */
+  #define DECIMAL32_Bytes  4           /* length                     */
+  #define DECIMAL32_Pmax   7           /* maximum precision (digits) */
+  #define DECIMAL32_Emax   96          /* maximum adjusted exponent  */
+  #define DECIMAL32_Emin  -95          /* minimum adjusted exponent  */
+  #define DECIMAL32_Bias   101         /* bias for the exponent      */
+  #define DECIMAL32_String 15          /* maximum string length, +1  */
+  #define DECIMAL32_EconL  6           /* exp. continuation length   */
+  /* highest biased exponent (Elimit-1)                                      */
+  #define DECIMAL32_Ehigh  (DECIMAL32_Emax+DECIMAL32_Bias-DECIMAL32_Pmax+1)
+
+  /* check enough digits, if pre-defined                             */
+  #if defined(DECNUMDIGITS)
+    #if (DECNUMDIGITS<DECIMAL32_Pmax)
+      #error decimal32.h needs pre-defined DECNUMDIGITS>=7 for safe use
+    #endif
+  #endif
+
+  #ifndef DECNUMDIGITS
+    #define DECNUMDIGITS DECIMAL32_Pmax /* size if not already defined*/
+  #endif
+  #include "libdecnumber/decNumber.h"
+
+  /* Decimal 32-bit type, accessible by bytes */
+  typedef struct {
+    uint8_t bytes[DECIMAL32_Bytes];    /* decimal32: 1, 5, 6, 20 bits*/
+    } decimal32;
+
+  /* special values [top byte excluding sign bit; last two bits are   */
+  /* don't-care for Infinity on input, last bit don't-care for NaN]   */
+  #if !defined(DECIMAL_NaN)
+    #define DECIMAL_NaN            0x7c        /* 0 11111 00 NaN             */
+    #define DECIMAL_sNaN    0x7e       /* 0 11111 10 sNaN            */
+    #define DECIMAL_Inf            0x78        /* 0 11110 00 Infinity        */
+  #endif
+
+  /* ---------------------------------------------------------------- */
+  /* Routines                                                        */
+  /* ---------------------------------------------------------------- */
+
+
+  /* String conversions                                                      */
+  decimal32 * decimal32FromString(decimal32 *, const char *, decContext *);
+  char * decimal32ToString(const decimal32 *, char *);
+  char * decimal32ToEngString(const decimal32 *, char *);
+
+  /* decNumber conversions                                           */
+  decimal32 * decimal32FromNumber(decimal32 *, const decNumber *,
+                                 decContext *);
+  decNumber * decimal32ToNumber(const decimal32 *, decNumber *);
+
+  /* Format-dependent utilities                                              */
+  uint32_t    decimal32IsCanonical(const decimal32 *);
+  decimal32 * decimal32Canonical(decimal32 *, const decimal32 *);
+
+#endif
diff --git a/include/libdecnumber/dpd/decimal64.h b/include/libdecnumber/dpd/decimal64.h
new file mode 100644 (file)
index 0000000..f29e570
--- /dev/null
@@ -0,0 +1,99 @@
+/* Decimal 64-bit format module header for the decNumber C Library.
+   Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+   Contributed by IBM Corporation.  Author Mike Cowlishaw.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2, or (at your option) any later
+   version.
+
+   In addition to the permissions in the GNU General Public License,
+   the Free Software Foundation gives you unlimited permission to link
+   the compiled version of this file into combinations with other
+   programs, and to distribute those combinations without any
+   restriction coming from the use of this file.  (The General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into a combine executable.)
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+/* ------------------------------------------------------------------ */
+/* Decimal 64-bit format module header                               */
+/* ------------------------------------------------------------------ */
+
+#ifndef DECIMAL64_H
+#define DECIMAL64_H
+
+  #define DEC64NAME    "decimal64"                   /* Short name   */
+  #define DEC64FULLNAME "Decimal 64-bit Number"              /* Verbose name */
+  #define DEC64AUTHOR  "Mike Cowlishaw"              /* Who to blame */
+
+
+  /* parameters for decimal64s                                       */
+  #define DECIMAL64_Bytes  8           /* length                     */
+  #define DECIMAL64_Pmax   16          /* maximum precision (digits) */
+  #define DECIMAL64_Emax   384         /* maximum adjusted exponent  */
+  #define DECIMAL64_Emin  -383         /* minimum adjusted exponent  */
+  #define DECIMAL64_Bias   398         /* bias for the exponent      */
+  #define DECIMAL64_String 24          /* maximum string length, +1  */
+  #define DECIMAL64_EconL  8           /* exp. continuation length   */
+  /* highest biased exponent (Elimit-1)                                      */
+  #define DECIMAL64_Ehigh  (DECIMAL64_Emax+DECIMAL64_Bias-DECIMAL64_Pmax+1)
+
+  /* check enough digits, if pre-defined                             */
+  #if defined(DECNUMDIGITS)
+    #if (DECNUMDIGITS<DECIMAL64_Pmax)
+      #error decimal64.h needs pre-defined DECNUMDIGITS>=16 for safe use
+    #endif
+  #endif
+
+
+  #ifndef DECNUMDIGITS
+    #define DECNUMDIGITS DECIMAL64_Pmax /* size if not already defined*/
+  #endif
+  #include "libdecnumber/decNumber.h"
+
+  /* Decimal 64-bit type, accessible by bytes                        */
+  typedef struct {
+    uint8_t bytes[DECIMAL64_Bytes];    /* decimal64: 1, 5, 8, 50 bits*/
+    } decimal64;
+
+  /* special values [top byte excluding sign bit; last two bits are   */
+  /* don't-care for Infinity on input, last bit don't-care for NaN]   */
+  #if !defined(DECIMAL_NaN)
+    #define DECIMAL_NaN            0x7c        /* 0 11111 00 NaN             */
+    #define DECIMAL_sNaN    0x7e       /* 0 11111 10 sNaN            */
+    #define DECIMAL_Inf            0x78        /* 0 11110 00 Infinity        */
+  #endif
+
+  /* ---------------------------------------------------------------- */
+  /* Routines                                                        */
+  /* ---------------------------------------------------------------- */
+
+
+  /* String conversions                                                      */
+  decimal64 * decimal64FromString(decimal64 *, const char *, decContext *);
+  char * decimal64ToString(const decimal64 *, char *);
+  char * decimal64ToEngString(const decimal64 *, char *);
+
+  /* decNumber conversions                                           */
+  decimal64 * decimal64FromNumber(decimal64 *, const decNumber *,
+                                 decContext *);
+  decNumber * decimal64ToNumber(const decimal64 *, decNumber *);
+
+  /* Format-dependent utilities                                              */
+  uint32_t    decimal64IsCanonical(const decimal64 *);
+  decimal64 * decimal64Canonical(decimal64 *, const decimal64 *);
+
+#endif
index acd27018e940eb97867bf84222ea37f1da4166f4..a687ac0efea8b1096f4e45bda580dea59e660bc2 100644 (file)
 #ifndef MIGRATION_BLOCKER_H
 #define MIGRATION_BLOCKER_H
 
+#include "qapi/qapi-types-migration.h"
+
+#define MIG_MODE_ALL MIG_MODE__MAX
+
+/**
+ * @migrate_add_blocker - prevent all modes of migration from proceeding
+ *
+ * @reasonp - address of an error to be returned whenever migration is attempted
+ *
+ * @errp - [out] The reason (if any) we cannot block migration right now.
+ *
+ * @returns - 0 on success, -EBUSY/-EACCES on failure, with errp set.
+ *
+ * *@reasonp is freed and set to NULL if failure is returned.
+ * On success, the caller must not free @reasonp, except by
+ *   calling migrate_del_blocker.
+ */
+int migrate_add_blocker(Error **reasonp, Error **errp);
+
+/**
+ * @migrate_add_blocker_internal - prevent all modes of migration from
+ *                                 proceeding, but ignore -only-migratable
+ *
+ * @reasonp - address of an error to be returned whenever migration is attempted
+ *
+ * @errp - [out] The reason (if any) we cannot block migration right now.
+ *
+ * @returns - 0 on success, -EBUSY on failure, with errp set.
+ *
+ * Some of the migration blockers can be temporary (e.g., for a few seconds),
+ * so it shouldn't need to conflict with "-only-migratable".  For those cases,
+ * we can call this function rather than @migrate_add_blocker().
+ *
+ * *@reasonp is freed and set to NULL if failure is returned.
+ * On success, the caller must not free @reasonp, except by
+ *   calling migrate_del_blocker.
+ */
+int migrate_add_blocker_internal(Error **reasonp, Error **errp);
+
+/**
+ * @migrate_del_blocker - remove a migration blocker from all modes and free it.
+ *
+ * @reasonp - address of the error blocking migration
+ *
+ * This function frees *@reasonp and sets it to NULL.
+ */
+void migrate_del_blocker(Error **reasonp);
+
 /**
- * @migrate_add_blocker - prevent migration from proceeding
+ * @migrate_add_blocker_normal - prevent normal migration mode from proceeding
  *
- * @reason - an error to be returned whenever migration is attempted
+ * @reasonp - address of an error to be returned whenever migration is attempted
  *
  * @errp - [out] The reason (if any) we cannot block migration right now.
  *
  * @returns - 0 on success, -EBUSY/-EACCES on failure, with errp set.
+ *
+ * *@reasonp is freed and set to NULL if failure is returned.
+ * On success, the caller must not free @reasonp, except by
+ *   calling migrate_del_blocker.
  */
-int migrate_add_blocker(Error *reason, Error **errp);
+int migrate_add_blocker_normal(Error **reasonp, Error **errp);
 
 /**
- * @migrate_del_blocker - remove a blocking error from migration
+ * @migrate_add_blocker_modes - prevent some modes of migration from proceeding
+ *
+ * @reasonp - address of an error to be returned whenever migration is attempted
+ *
+ * @errp - [out] The reason (if any) we cannot block migration right now.
+ *
+ * @mode - one or more migration modes to be blocked.  The list is terminated
+ *         by -1 or MIG_MODE_ALL.  For the latter, all modes are blocked.
+ *
+ * @returns - 0 on success, -EBUSY/-EACCES on failure, with errp set.
  *
- * @reason - the error blocking migration
+ * *@reasonp is freed and set to NULL if failure is returned.
+ * On success, the caller must not free *@reasonp before the blocker is removed.
  */
-void migrate_del_blocker(Error *reason);
+int migrate_add_blocker_modes(Error **reasonp, Error **errp, MigMode mode, ...);
 
 #endif
index 768e1f04c3a60fc9ee98928333be46b6a10f5baa..eaac07f26d013134238877e80d72c2c52d10c720 100644 (file)
@@ -28,7 +28,6 @@ bool migration_in_colo_state(void);
 int migration_incoming_enable_colo(void);
 void migration_incoming_disable_colo(void);
 bool migration_incoming_colo_enabled(void);
-void *colo_process_incoming_thread(void *opaque);
 bool migration_incoming_in_colo_state(void);
 
 COLOMode get_colo_mode(void);
@@ -36,5 +35,21 @@ COLOMode get_colo_mode(void);
 /* failover */
 void colo_do_failover(void);
 
-void colo_checkpoint_notify(void *opaque);
+/*
+ * colo_checkpoint_delay_set
+ *
+ * Handles change of x-checkpoint-delay migration parameter, called from
+ * migrate_params_apply() to notify COLO module about the change.
+ */
+void colo_checkpoint_delay_set(void);
+
+/*
+ * Starts COLO incoming process. Called from process_incoming_migration_co()
+ * after loading the state.
+ *
+ * Called with BQL locked, may temporary release BQL.
+ */
+int coroutine_fn colo_incoming_co(void);
+
+void colo_shutdown(void);
 #endif
index 945eb35d5b616e30317099cdb22360814fb2ce32..d7c2cd3216e85128d0d6ae15c359fb3d485bd28f 100644 (file)
@@ -16,7 +16,7 @@
 #include "qapi/qapi-types-run-state.h"
 
 void register_global_state(void);
-int global_state_store(void);
+void global_state_store(void);
 void global_state_store_running(void);
 bool global_state_received(void);
 RunState global_state_get_runstate(void);
index 34e7d7571382d7b4cc043863a0e44d8fcc867ded..1bc8902e6d2203f5199136e3073c60504cd43dad 100644 (file)
@@ -15,6 +15,7 @@
 #define MIGRATION_MISC_H
 
 #include "qemu/notify.h"
+#include "qapi/qapi-types-migration.h"
 #include "qapi/qapi-types-net.h"
 
 /* migration/ram.c */
@@ -37,10 +38,10 @@ void precopy_infrastructure_init(void);
 void precopy_add_notifier(NotifierWithReturn *n);
 void precopy_remove_notifier(NotifierWithReturn *n);
 int precopy_notify(PrecopyNotifyReason reason, Error **errp);
-void precopy_enable_free_page_optimization(void);
 
 void ram_mig_init(void);
 void qemu_guest_free_page_hint(void *addr, size_t len);
+bool migrate_ram_is_ignored(RAMBlock *block);
 
 /* migration/block.c */
 
@@ -58,19 +59,23 @@ void dump_vmstate_json_to_file(FILE *out_fp);
 /* migration/migration.c */
 void migration_object_init(void);
 void migration_shutdown(void);
-void qemu_start_incoming_migration(const char *uri, Error **errp);
 bool migration_is_idle(void);
 bool migration_is_active(MigrationState *);
-void add_migration_state_change_notifier(Notifier *notify);
-void remove_migration_state_change_notifier(Notifier *notify);
+void migration_add_notifier(Notifier *notify,
+                            void (*func)(Notifier *notifier, void *data));
+void migration_remove_notifier(Notifier *notify);
+void migration_call_notifiers(MigrationState *s);
 bool migration_in_setup(MigrationState *);
 bool migration_has_finished(MigrationState *);
 bool migration_has_failed(MigrationState *);
 /* ...and after the device transmission */
 bool migration_in_postcopy_after_devices(MigrationState *);
-void migration_global_dump(Monitor *mon);
-/* True if incomming migration entered POSTCOPY_INCOMING_DISCARD */
+/* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */
 bool migration_in_incoming_postcopy(void);
+/* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */
+bool migration_incoming_postcopy_advised(void);
+/* True if background snapshot is active */
+bool migration_in_bg_snapshot(void);
 
 /* migration/block-dirty-bitmap.c */
 void dirty_bitmap_mig_init(void);
index 2867e3da84ab94eda680b9f0033a7f370ec75a3c..9ba163f333ef779c703e5eeabe56e76c6306ff4b 100644 (file)
@@ -35,7 +35,7 @@ void qemu_put_byte(QEMUFile *f, int v);
 void qemu_put_be16(QEMUFile *f, unsigned int v);
 void qemu_put_be32(QEMUFile *f, unsigned int v);
 void qemu_put_be64(QEMUFile *f, uint64_t v);
-size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size);
+size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size);
 
 int qemu_get_byte(QEMUFile *f);
 
@@ -161,10 +161,20 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
     qemu_get_be64s(f, (uint64_t *)pv);
 }
 
-size_t qemu_get_counted_string(QEMUFile *f, char buf[256]);
+size_t coroutine_mixed_fn qemu_get_counted_string(QEMUFile *f, char buf[256]);
 
 void qemu_put_counted_string(QEMUFile *f, const char *name);
 
-int qemu_file_rate_limit(QEMUFile *f);
+/**
+ * migration_rate_exceeded: Check if we have exceeded rate for this interval
+ *
+ * Checks if we have already transferred more data that we are allowed
+ * in the current interval.
+ *
+ * @f: QEMUFile used for main migration channel
+ *
+ * Returns if we should stop sending data for this interval.
+ */
+bool migration_rate_exceeded(QEMUFile *f);
 
 #endif
index c1dcff0f9073311215da85da078eb7a1bdbb2ba4..fed1d04a3c3bca8dbcd991c6a037ad8f60f4c8cc 100644 (file)
@@ -20,6 +20,12 @@ typedef struct SaveVMHandlers {
     /* This runs inside the iothread lock.  */
     SaveStateHandler *save_state;
 
+    /*
+     * save_prepare is called early, even before migration starts, and can be
+     * used to perform early checks.
+     */
+    int (*save_prepare)(void *opaque, Error **errp);
+    int (*save_setup)(QEMUFile *f, void *opaque);
     void (*save_cleanup)(void *opaque);
     int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
     int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
@@ -45,29 +51,33 @@ typedef struct SaveVMHandlers {
     int (*save_live_iterate)(QEMUFile *f, void *opaque);
 
     /* This runs outside the iothread lock!  */
-    int (*save_setup)(QEMUFile *f, void *opaque);
-    void (*save_live_pending)(QEMUFile *f, void *opaque,
-                              uint64_t threshold_size,
-                              uint64_t *res_precopy_only,
-                              uint64_t *res_compatible,
-                              uint64_t *res_postcopy_only);
     /* Note for save_live_pending:
-     * - res_precopy_only is for data which must be migrated in precopy phase
-     *     or in stopped state, in other words - before target vm start
-     * - res_compatible is for data which may be migrated in any phase
-     * - res_postcopy_only is for data which must be migrated in postcopy phase
-     *     or in stopped state, in other words - after source vm stop
+     * must_precopy:
+     * - must be migrated in precopy or in stopped state
+     * - i.e. must be migrated before target start
      *
-     * Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
-     * whole amount of pending data.
+     * can_postcopy:
+     * - can migrate in postcopy or in stopped state
+     * - i.e. can migrate after target start
+     * - some can also be migrated during precopy (RAM)
+     * - some must be migrated after source stops (block-dirty-bitmap)
+     *
+     * Sum of can_postcopy and must_postcopy is the whole amount of
+     * pending data.
      */
-
-
+    /* This estimates the remaining data to transfer */
+    void (*state_pending_estimate)(void *opaque, uint64_t *must_precopy,
+                                   uint64_t *can_postcopy);
+    /* This calculate the exact remaining data to transfer */
+    void (*state_pending_exact)(void *opaque, uint64_t *must_precopy,
+                                uint64_t *can_postcopy);
     LoadStateHandler *load_state;
     int (*load_setup)(QEMUFile *f, void *opaque);
     int (*load_cleanup)(void *opaque);
     /* Called when postcopy migration wants to resume from failure */
     int (*resume_prepare)(MigrationState *s, void *opaque);
+    /* Checks if switchover ack should be used. Called only in dest */
+    bool (*switchover_ack_needed)(void *opaque);
 } SaveVMHandlers;
 
 int register_savevm_live(const char *idstr,
index c85b6ec75b710931bf07ced23c9ccfb8bed3ac79..e72083b117a65addeb0092a9e3085e23db344993 100644 (file)
 #ifndef QEMU_MIGRATION_SNAPSHOT_H
 #define QEMU_MIGRATION_SNAPSHOT_H
 
-int save_snapshot(const char *name, Error **errp);
-int load_snapshot(const char *name, Error **errp);
+#include "qapi/qapi-builtin-types.h"
+
+/**
+ * save_snapshot: Save an internal snapshot.
+ * @name: name of internal snapshot
+ * @overwrite: replace existing snapshot with @name
+ * @vmstate: blockdev node name to store VM state in
+ * @has_devices: whether to use explicit device list
+ * @devices: explicit device list to snapshot
+ * @errp: pointer to error object
+ * On success, return %true.
+ * On failure, store an error through @errp and return %false.
+ */
+bool save_snapshot(const char *name, bool overwrite,
+                   const char *vmstate,
+                   bool has_devices, strList *devices,
+                   Error **errp);
+
+/**
+ * load_snapshot: Load an internal snapshot.
+ * @name: name of internal snapshot
+ * @vmstate: blockdev node name to load VM state from
+ * @has_devices: whether to use explicit device list
+ * @devices: explicit device list to snapshot
+ * @errp: pointer to error object
+ * On success, return %true.
+ * On failure, store an error through @errp and return %false.
+ */
+bool load_snapshot(const char *name,
+                   const char *vmstate,
+                   bool has_devices, strList *devices,
+                   Error **errp);
+
+/**
+ * delete_snapshot: Delete a snapshot.
+ * @name: path to snapshot
+ * @has_devices: whether to use explicit device list
+ * @devices: explicit device list to snapshot
+ * @errp: pointer to error object
+ * On success, return %true.
+ * On failure, store an error through @errp and return %false.
+ */
+bool delete_snapshot(const char *name,
+                    bool has_devices, strList *devices,
+                    Error **errp);
 
 #endif
index 4d71dc8fbaa99c34979eaed8af335337639083f5..9821918631655a27af63c8222f294048ea61e169 100644 (file)
@@ -41,9 +41,11 @@ typedef struct VMStateField VMStateField;
  */
 struct VMStateInfo {
     const char *name;
-    int (*get)(QEMUFile *f, void *pv, size_t size, const VMStateField *field);
-    int (*put)(QEMUFile *f, void *pv, size_t size, const VMStateField *field,
-               QJSON *vmdesc);
+    int coroutine_mixed_fn (*get)(QEMUFile *f, void *pv, size_t size,
+                                  const VMStateField *field);
+    int coroutine_mixed_fn (*put)(QEMUFile *f, void *pv, size_t size,
+                                  const VMStateField *field,
+                                  JSONWriter *vmdesc);
 };
 
 enum VMStateFlags {
@@ -147,12 +149,16 @@ enum VMStateFlags {
      * VMStateField.struct_version_id to tell which version of the
      * structure we are referencing to use. */
     VMS_VSTRUCT           = 0x8000,
+
+    /* Marker for end of list */
+    VMS_END = 0x10000
 };
 
 typedef enum {
     MIG_PRI_DEFAULT = 0,
     MIG_PRI_IOMMU,              /* Must happen before PCI devices */
     MIG_PRI_PCI_BUS,            /* Must happen before IOMMU */
+    MIG_PRI_VIRTIO_MEM,         /* Must happen before IOMMU */
     MIG_PRI_GICV3_ITS,          /* Must happen before PCI devices */
     MIG_PRI_GICV3,              /* Must happen before the ITS */
     MIG_PRI_MAX,
@@ -177,12 +183,24 @@ struct VMStateField {
 
 struct VMStateDescription {
     const char *name;
-    int unmigratable;
+    bool unmigratable;
+    /*
+     * This VMSD describes something that should be sent during setup phase
+     * of migration. It plays similar role as save_setup() for explicitly
+     * registered vmstate entries, so it can be seen as a way to describe
+     * save_setup() in VMSD structures.
+     *
+     * Note that for now, a SaveStateEntry cannot have a VMSD and
+     * operations (e.g., save_setup()) set at the same time. Consequently,
+     * save_setup() and a VMSD with early_setup set to true are mutually
+     * exclusive. For this reason, also early_setup VMSDs are migrated in a
+     * QEMU_VM_SECTION_FULL section, while save_setup() data is migrated in
+     * a QEMU_VM_SECTION_START section.
+     */
+    bool early_setup;
     int version_id;
     int minimum_version_id;
-    int minimum_version_id_old;
     MigrationPriority priority;
-    LoadStateHandler *load_state_old;
     int (*pre_load)(void *opaque);
     int (*post_load)(void *opaque, int version_id);
     int (*pre_save)(void *opaque);
@@ -194,8 +212,6 @@ struct VMStateDescription {
     const VMStateDescription **subsections;
 };
 
-extern const VMStateDescription vmstate_dummy;
-
 extern const VMStateInfo vmstate_info_bool;
 
 extern const VMStateInfo vmstate_info_int8;
@@ -708,8 +724,9 @@ extern const VMStateInfo vmstate_info_qlist;
  *        '_state' type
  *    That the pointer is right at the start of _tmp_type.
  */
-#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) {                 \
+#define VMSTATE_WITH_TMP_TEST(_state, _test, _tmp_type, _vmsd) {     \
     .name         = "tmp",                                           \
+    .field_exists = (_test),                                         \
     .size         = sizeof(_tmp_type) +                              \
                     QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \
                     type_check_pointer(_state,                       \
@@ -718,6 +735,9 @@ extern const VMStateInfo vmstate_info_qlist;
     .info         = &vmstate_info_tmp,                               \
 }
 
+#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) \
+    VMSTATE_WITH_TMP_TEST(_state, NULL, _tmp_type, _vmsd)
+
 #define VMSTATE_UNUSED_BUFFER(_test, _version, _size) {              \
     .name         = "unused",                                        \
     .field_exists = (_test),                                         \
@@ -741,8 +761,9 @@ extern const VMStateInfo vmstate_info_qlist;
 /* _field_size should be a int32_t field in the _state struct giving the
  * size of the bitmap _field in bits.
  */
-#define VMSTATE_BITMAP(_field, _state, _version, _field_size) {      \
+#define VMSTATE_BITMAP_TEST(_field, _state, _test, _version, _field_size) { \
     .name         = (stringify(_field)),                             \
+    .field_exists = (_test),                                         \
     .version_id   = (_version),                                      \
     .size_offset  = vmstate_offset_value(_state, _field_size, int32_t),\
     .info         = &vmstate_info_bitmap,                            \
@@ -750,6 +771,9 @@ extern const VMStateInfo vmstate_info_qlist;
     .offset       = offsetof(_state, _field),                        \
 }
 
+#define VMSTATE_BITMAP(_field, _state, _version, _field_size) \
+    VMSTATE_BITMAP_TEST(_field, _state, NULL, _version, _field_size)
+
 /* For migrating a QTAILQ.
  * Target QTAILQ needs be properly initialized.
  * _type: type of QTAILQ element
@@ -1164,16 +1188,21 @@ extern const VMStateInfo vmstate_info_qlist;
     VMSTATE_UNUSED_BUFFER(_test, 0, _size)
 
 #define VMSTATE_END_OF_LIST()                                         \
-    {}
+    {                     \
+        .flags = VMS_END, \
+    }
 
 int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
                        void *opaque, int version_id);
 int vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
-                       void *opaque, QJSON *vmdesc);
+                       void *opaque, JSONWriter *vmdesc);
+int vmstate_save_state_with_err(QEMUFile *f, const VMStateDescription *vmsd,
+                       void *opaque, JSONWriter *vmdesc, Error **errp);
 int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,
-                         void *opaque, QJSON *vmdesc, int version_id);
+                         void *opaque, JSONWriter *vmdesc,
+                         int version_id, Error **errp);
 
-bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque);
+bool vmstate_section_needed(const VMStateDescription *vmsd, void *opaque);
 
 #define  VMSTATE_INSTANCE_ID_ANY  -1
 
@@ -1184,7 +1213,15 @@ int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
                                    int required_for_version,
                                    Error **errp);
 
-/* Returns: 0 on success, -1 on failure */
+/**
+ * vmstate_register() - legacy function to register state
+ * serialisation description
+ *
+ * New code shouldn't be using this function as QOM-ified devices have
+ * dc->vmsd to store the serialisation description.
+ *
+ * Returns: 0 on success, -1 on failure
+ */
 static inline int vmstate_register(VMStateIf *obj, int instance_id,
                                    const VMStateDescription *vmsd,
                                    void *opaque)
@@ -1193,6 +1230,34 @@ static inline int vmstate_register(VMStateIf *obj, int instance_id,
                                           opaque, -1, 0, NULL);
 }
 
+/**
+ * vmstate_replace_hack_for_ppc() - ppc used to abuse vmstate_register
+ *
+ * Don't even think about using this function in new code.
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+int vmstate_replace_hack_for_ppc(VMStateIf *obj, int instance_id,
+                                 const VMStateDescription *vmsd,
+                                 void *opaque);
+
+/**
+ * vmstate_register_any() - legacy function to register state
+ * serialisation description and let the function choose the id
+ *
+ * New code shouldn't be using this function as QOM-ified devices have
+ * dc->vmsd to store the serialisation description.
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static inline int vmstate_register_any(VMStateIf *obj,
+                                       const VMStateDescription *vmsd,
+                                       void *opaque)
+{
+    return vmstate_register_with_alias_id(obj, VMSTATE_INSTANCE_ID_ANY, vmsd,
+                                          opaque, -1, 0, NULL);
+}
+
 void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd,
                         void *opaque);
 
index 60fc92722aed6defa0a6608e6b75ed9b0c4760d0..d78e979f0532354623528d55eb2d6e46006fb917 100644 (file)
@@ -48,6 +48,14 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict);
 void hmp_info_tlb(Monitor *mon, const QDict *qdict);
 void hmp_mce(Monitor *mon, const QDict *qdict);
 void hmp_info_local_apic(Monitor *mon, const QDict *qdict);
-void hmp_info_io_apic(Monitor *mon, const QDict *qdict);
+void hmp_info_sev(Monitor *mon, const QDict *qdict);
+void hmp_info_sgx(Monitor *mon, const QDict *qdict);
+void hmp_info_via(Monitor *mon, const QDict *qdict);
+void hmp_memory_dump(Monitor *mon, const QDict *qdict);
+void hmp_physical_memory_dump(Monitor *mon, const QDict *qdict);
+void hmp_info_registers(Monitor *mon, const QDict *qdict);
+void hmp_gva2gpa(Monitor *mon, const QDict *qdict);
+void hmp_gpa2hva(Monitor *mon, const QDict *qdict);
+void hmp_gpa2hpa(Monitor *mon, const QDict *qdict);
 
 #endif /* MONITOR_HMP_TARGET_H */
index ed2913fd18e801164c691e308aae6f27c03521da..13f9a2dedb89e6b853d5856f0efd9d14c4bb1fba 100644 (file)
 #define HMP_H
 
 #include "qemu/readline.h"
+#include "qapi/qapi-types-common.h"
 
-void hmp_handle_error(Monitor *mon, Error *err);
+bool hmp_handle_error(Monitor *mon, Error *err);
+void hmp_help_cmd(Monitor *mon, const char *name);
+strList *hmp_split_at_comma(const char *str);
 
 void hmp_info_name(Monitor *mon, const QDict *qdict);
 void hmp_info_version(Monitor *mon, const QDict *qdict);
@@ -28,7 +31,6 @@ void hmp_info_mice(Monitor *mon, const QDict *qdict);
 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
-void hmp_info_migrate_cache_size(Monitor *mon, const QDict *qdict);
 void hmp_info_cpus(Monitor *mon, const QDict *qdict);
 void hmp_info_vnc(Monitor *mon, const QDict *qdict);
 void hmp_info_spice(Monitor *mon, const QDict *qdict);
@@ -54,6 +56,7 @@ void hmp_ringbuf_read(Monitor *mon, const QDict *qdict);
 void hmp_cont(Monitor *mon, const QDict *qdict);
 void hmp_system_wakeup(Monitor *mon, const QDict *qdict);
 void hmp_nmi(Monitor *mon, const QDict *qdict);
+void hmp_info_network(Monitor *mon, const QDict *qdict);
 void hmp_set_link(Monitor *mon, const QDict *qdict);
 void hmp_balloon(Monitor *mon, const QDict *qdict);
 void hmp_loadvm(Monitor *mon, const QDict *qdict);
@@ -64,17 +67,22 @@ void hmp_migrate_continue(Monitor *mon, const QDict *qdict);
 void hmp_migrate_incoming(Monitor *mon, const QDict *qdict);
 void hmp_migrate_recover(Monitor *mon, const QDict *qdict);
 void hmp_migrate_pause(Monitor *mon, const QDict *qdict);
-void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict);
-void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
-void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
 void hmp_client_migrate_info(Monitor *mon, const QDict *qdict);
 void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict);
 void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict);
 void hmp_set_password(Monitor *mon, const QDict *qdict);
 void hmp_expire_password(Monitor *mon, const QDict *qdict);
 void hmp_change(Monitor *mon, const QDict *qdict);
+#ifdef CONFIG_VNC
+void hmp_change_vnc(Monitor *mon, const char *device, const char *target,
+                    const char *arg, const char *read_only, bool force,
+                    Error **errp);
+#endif
+void hmp_change_medium(Monitor *mon, const char *device, const char *target,
+                       const char *arg, const char *read_only, bool force,
+                       Error **errp);
 void hmp_migrate(Monitor *mon, const QDict *qdict);
 void hmp_device_add(Monitor *mon, const QDict *qdict);
 void hmp_device_del(Monitor *mon, const QDict *qdict);
@@ -83,8 +91,11 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
 void hmp_getfd(Monitor *mon, const QDict *qdict);
 void hmp_closefd(Monitor *mon, const QDict *qdict);
+void hmp_mouse_move(Monitor *mon, const QDict *qdict);
+void hmp_mouse_button(Monitor *mon, const QDict *qdict);
+void hmp_mouse_set(Monitor *mon, const QDict *qdict);
 void hmp_sendkey(Monitor *mon, const QDict *qdict);
-void hmp_screendump(Monitor *mon, const QDict *qdict);
+void coroutine_fn hmp_screendump(Monitor *mon, const QDict *qdict);
 void hmp_chardev_add(Monitor *mon, const QDict *qdict);
 void hmp_chardev_change(Monitor *mon, const QDict *qdict);
 void hmp_chardev_remove(Monitor *mon, const QDict *qdict);
@@ -98,6 +109,13 @@ void hmp_qom_list(Monitor *mon, const QDict *qdict);
 void hmp_qom_get(Monitor *mon, const QDict *qdict);
 void hmp_qom_set(Monitor *mon, const QDict *qdict);
 void hmp_info_qom_tree(Monitor *mon, const QDict *dict);
+void hmp_virtio_query(Monitor *mon, const QDict *qdict);
+void hmp_virtio_status(Monitor *mon, const QDict *qdict);
+void hmp_virtio_queue_status(Monitor *mon, const QDict *qdict);
+void hmp_vhost_queue_status(Monitor *mon, const QDict *qdict);
+void hmp_virtio_queue_element(Monitor *mon, const QDict *qdict);
+void hmp_xen_event_inject(Monitor *mon, const QDict *qdict);
+void hmp_xen_event_list(Monitor *mon, const QDict *qdict);
 void object_add_completion(ReadLineState *rs, int nb_args, const char *str);
 void object_del_completion(ReadLineState *rs, int nb_args, const char *str);
 void device_add_completion(ReadLineState *rs, int nb_args, const char *str);
@@ -128,10 +146,40 @@ void hmp_info_ramblock(Monitor *mon, const QDict *qdict);
 void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict);
 void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict);
 void hmp_info_memory_size_summary(Monitor *mon, const QDict *qdict);
-void hmp_info_sev(Monitor *mon, const QDict *qdict);
 void hmp_info_replay(Monitor *mon, const QDict *qdict);
 void hmp_replay_break(Monitor *mon, const QDict *qdict);
 void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
 void hmp_replay_seek(Monitor *mon, const QDict *qdict);
+void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict);
+void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict);
+void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict);
+void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict);
+void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict);
+void hmp_human_readable_text_helper(Monitor *mon,
+                                    HumanReadableText *(*qmp_handler)(Error **));
+void hmp_info_stats(Monitor *mon, const QDict *qdict);
+void hmp_one_insn_per_tb(Monitor *mon, const QDict *qdict);
+void hmp_watchdog_action(Monitor *mon, const QDict *qdict);
+void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict);
+void hmp_info_capture(Monitor *mon, const QDict *qdict);
+void hmp_stopcapture(Monitor *mon, const QDict *qdict);
+void hmp_wavcapture(Monitor *mon, const QDict *qdict);
+void hmp_trace_event(Monitor *mon, const QDict *qdict);
+void hmp_trace_file(Monitor *mon, const QDict *qdict);
+void hmp_info_trace_events(Monitor *mon, const QDict *qdict);
+void hmp_help(Monitor *mon, const QDict *qdict);
+void hmp_info_help(Monitor *mon, const QDict *qdict);
+void hmp_info_sync_profile(Monitor *mon, const QDict *qdict);
+void hmp_info_history(Monitor *mon, const QDict *qdict);
+void hmp_logfile(Monitor *mon, const QDict *qdict);
+void hmp_log(Monitor *mon, const QDict *qdict);
+void hmp_gdbserver(Monitor *mon, const QDict *qdict);
+void hmp_print(Monitor *mon, const QDict *qdict);
+void hmp_sum(Monitor *mon, const QDict *qdict);
+void hmp_ioport_read(Monitor *mon, const QDict *qdict);
+void hmp_ioport_write(Monitor *mon, const QDict *qdict);
+void hmp_boot_set(Monitor *mon, const QDict *qdict);
+void hmp_info_mtree(Monitor *mon, const QDict *qdict);
+void hmp_info_cryptodev(Monitor *mon, const QDict *qdict);
 
 #endif
index 348bfad3d58e0ddd700bf41be61ff8f993b56453..965f5d5450034541ca548b3f11e1246a3695e0aa 100644 (file)
@@ -4,6 +4,7 @@
 #include "block/block.h"
 #include "qapi/qapi-types-misc.h"
 #include "qemu/readline.h"
+#include "exec/hwaddr.h"
 
 typedef struct MonitorHMP MonitorHMP;
 typedef struct MonitorOptions MonitorOptions;
@@ -30,22 +31,36 @@ void monitor_resume(Monitor *mon);
 int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp);
 int monitor_fd_param(Monitor *mon, const char *fdname, Error **errp);
 
+int monitor_puts(Monitor *mon, const char *str);
 int monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
-    GCC_FMT_ATTR(2, 0);
-int monitor_printf(Monitor *mon, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+    G_GNUC_PRINTF(2, 0);
+int monitor_printf(Monitor *mon, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
+void monitor_printc(Monitor *mon, int ch);
 void monitor_flush(Monitor *mon);
 int monitor_set_cpu(Monitor *mon, int cpu_index);
 int monitor_get_cpu_index(Monitor *mon);
 
+int monitor_puts_locked(Monitor *mon, const char *str);
+void monitor_flush_locked(Monitor *mon);
+
+void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, uint64_t size, Error **errp);
+
 void monitor_read_command(MonitorHMP *mon, int show_prompt);
 int monitor_read_password(MonitorHMP *mon, ReadLineFunc *readline_func,
                           void *opaque);
 
 AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
-                                bool has_opaque, const char *opaque,
-                                Error **errp);
+                                const char *opaque, Error **errp);
 int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags);
 void monitor_fdset_dup_fd_remove(int dup_fd);
 int64_t monitor_fdset_dup_fd_find(int dup_fd);
 
+void monitor_register_hmp(const char *name, bool info,
+                          void (*cmd)(Monitor *mon, const QDict *qdict));
+void monitor_register_hmp_info_hrt(const char *name,
+                                   HumanReadableText *(*handler)(Error **errp));
+
+int error_vprintf_unless_qmp(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 0);
+int error_printf_unless_qmp(const char *fmt, ...) G_GNUC_PRINTF(1, 2);
+
 #endif /* MONITOR_H */
index eaa947d73a3ca9f301d124ae7585a81d2e2659d4..1d57bf657794c9e7a0bf10a76e13e0a9bf78a774 100644 (file)
@@ -9,6 +9,31 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp);
 
 int qdev_device_help(QemuOpts *opts);
 DeviceState *qdev_device_add(QemuOpts *opts, Error **errp);
-void qdev_set_id(DeviceState *dev, const char *id);
+DeviceState *qdev_device_add_from_qdict(const QDict *opts,
+                                        bool from_json, Error **errp);
+
+/**
+ * qdev_set_id: parent the device and set its id if provided.
+ * @dev: device to handle
+ * @id: id to be given to the device, or NULL.
+ *
+ * Returns: the id of the device in case of success; otherwise NULL.
+ *
+ * @dev must be unrealized, unparented and must not have an id.
+ *
+ * If @id is non-NULL, this function tries to setup @dev qom path as
+ * "/peripheral/id". If @id is already taken, it fails. If it succeeds,
+ * the id field of @dev is set to @id (@dev now owns the given @id
+ * parameter).
+ *
+ * If @id is NULL, this function generates a unique name and setups @dev
+ * qom path as "/peripheral-anon/name". This name is not set as the id
+ * of @dev.
+ *
+ * Upon success, it returns the id/name (generated or provided). The
+ * returned string is owned by the corresponding child property and must
+ * not be freed by the caller.
+ */
+const char *qdev_set_id(DeviceState *dev, char *id, Error **errp);
 
 #endif
diff --git a/include/monitor/qmp-helpers.h b/include/monitor/qmp-helpers.h
new file mode 100644 (file)
index 0000000..318c335
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * QMP command helpers
+ *
+ * Copyright (c) 2022 Red Hat Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef MONITOR_QMP_HELPERS_H
+
+bool qmp_add_client_spice(int fd, bool has_skipauth, bool skipauth,
+                        bool has_tls, bool tls, Error **errp);
+#ifdef CONFIG_VNC
+bool qmp_add_client_vnc(int fd, bool has_skipauth, bool skipauth,
+                        bool has_tls, bool tls, Error **errp);
+#endif
+#ifdef CONFIG_DBUS_DISPLAY
+bool qmp_add_client_dbus_display(int fd, bool has_skipauth, bool skipauth,
+                        bool has_tls, bool tls, Error **errp);
+#endif
+bool qmp_add_client_char(int fd, bool has_skipauth, bool skipauth,
+                         bool has_tls, bool tls, const char *protocol,
+                         Error **errp);
+
+#endif
index 05a0d273fed657c70f4276067d3a692f5a7160bb..7dec37e56c78cb8f4129b69ce17d8e6b08bcd1a2 100644 (file)
 #include "qemu/bswap.h"
 struct iovec;
 
+#define CSUM_IP     0x01
+#define CSUM_TCP    0x02
+#define CSUM_UDP    0x04
+#define CSUM_ALL    (CSUM_IP | CSUM_TCP | CSUM_UDP)
+
 uint32_t net_checksum_add_cont(int len, uint8_t *buf, int seq);
 uint16_t net_checksum_finish(uint32_t sum);
 uint16_t net_checksum_tcpudp(uint16_t length, uint16_t proto,
                              uint8_t *addrs, uint8_t *buf);
-void net_checksum_calculate(uint8_t *data, int length);
+void net_checksum_calculate(uint8_t *data, int length, int csum_flag);
 
 static inline uint32_t
 net_checksum_add(int len, uint8_t *buf)
index 0671be69165c89827b8dac79817e3f131cc19ff1..3b80b6e07f34ab543da32be52ddc018aef647963 100644 (file)
@@ -31,6 +31,9 @@
 
 #define ETH_ALEN 6
 #define ETH_HLEN 14
+#define ETH_ZLEN 60     /* Min. octets in frame without FCS */
+#define ETH_FCS_LEN 4
+#define ETH_MTU 1500
 
 struct eth_header {
     uint8_t  h_dest[ETH_ALEN];   /* destination eth addr */
@@ -158,7 +161,7 @@ struct tcp_hdr {
     u_short     th_dport;   /* destination port */
     uint32_t    th_seq;     /* sequence number */
     uint32_t    th_ack;     /* acknowledgment number */
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     u_char  th_off : 4,     /* data offset */
         th_x2:4;            /* (unused) */
 #else
@@ -221,6 +224,7 @@ struct tcp_hdr {
 #define IP_HEADER_VERSION_6       (6)
 #define IP_PROTO_TCP              (6)
 #define IP_PROTO_UDP              (17)
+#define IP_PROTO_SCTP             (132)
 #define IPTOS_ECN_MASK            0x03
 #define IPTOS_ECN(x)              ((x) & IPTOS_ECN_MASK)
 #define IPTOS_ECN_CE              0x03
@@ -311,10 +315,10 @@ eth_get_l2_hdr_length(const void *p)
 }
 
 static inline uint32_t
-eth_get_l2_hdr_length_iov(const struct iovec *iov, int iovcnt)
+eth_get_l2_hdr_length_iov(const struct iovec *iov, size_t iovcnt, size_t iovoff)
 {
     uint8_t p[sizeof(struct eth_header) + sizeof(struct vlan_header)];
-    size_t copied = iov_to_buf(iov, iovcnt, 0, p, ARRAY_SIZE(p));
+    size_t copied = iov_to_buf(iov, iovcnt, iovoff, p, ARRAY_SIZE(p));
 
     if (copied < ARRAY_SIZE(p)) {
         return copied;
@@ -339,26 +343,19 @@ eth_get_pkt_tci(const void *p)
 
 size_t
 eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
-               uint8_t *new_ehdr_buf,
+               void *new_ehdr_buf,
                uint16_t *payload_offset, uint16_t *tci);
 
 size_t
-eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
-                  uint16_t vet, uint8_t *new_ehdr_buf,
+eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, int index,
+                  uint16_t vet, uint16_t vet_ext, void *new_ehdr_buf,
                   uint16_t *payload_offset, uint16_t *tci);
 
 uint16_t
 eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len);
 
-void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
-    uint16_t vlan_ethtype, bool *is_new);
-
-static inline void
-eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
-    bool *is_new)
-{
-    eth_setup_vlan_headers_ex(ehdr, vlan_tag, ETH_P_VLAN, is_new);
-}
+void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size,
+                            uint16_t vlan_tag, uint16_t vlan_ethtype);
 
 
 uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto);
@@ -380,18 +377,25 @@ typedef struct eth_ip4_hdr_info_st {
     bool   fragment;
 } eth_ip4_hdr_info;
 
+typedef enum EthL4HdrProto {
+    ETH_L4_HDR_PROTO_INVALID,
+    ETH_L4_HDR_PROTO_TCP,
+    ETH_L4_HDR_PROTO_UDP,
+    ETH_L4_HDR_PROTO_SCTP
+} EthL4HdrProto;
+
 typedef struct eth_l4_hdr_info_st {
     union {
         struct tcp_header tcp;
         struct udp_header udp;
     } hdr;
 
+    EthL4HdrProto proto;
     bool has_tcp_data;
 } eth_l4_hdr_info;
 
-void eth_get_protocols(const struct iovec *iov, int iovcnt,
-                       bool *isip4, bool *isip6,
-                       bool *isudp, bool *istcp,
+void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff,
+                       bool *hasip4, bool *hasip6,
                        size_t *l3hdr_off,
                        size_t *l4hdr_off,
                        size_t *l5hdr_off,
@@ -399,11 +403,6 @@ void eth_get_protocols(const struct iovec *iov, int iovcnt,
                        eth_ip4_hdr_info *ip4hdr_info,
                        eth_l4_hdr_info  *l4hdr_info);
 
-void eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
-                                 void *l3hdr, size_t l3hdr_len,
-                                 size_t l3payload_len,
-                                 size_t frag_offset, bool more_frags);
-
 void
 eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len);
 
@@ -422,4 +421,20 @@ bool
 eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
                    size_t ip6hdr_off, eth_ip6_hdr_info *info);
 
+/**
+ * eth_pad_short_frame - pad a short frame to the minimum Ethernet frame length
+ *
+ * If the Ethernet frame size is shorter than 60 bytes, it will be padded to
+ * 60 bytes at the address @padded_pkt.
+ *
+ * @padded_pkt: buffer address to hold the padded frame
+ * @padded_buflen: pointer holding length of @padded_pkt. If the frame is
+ *                 padded, the length will be updated to the padded one.
+ * @pkt: address to hold the original Ethernet frame
+ * @pkt_size: size of the original Ethernet frame
+ * @return true if the frame is padded, otherwise false
+ */
+bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
+                         const void *pkt, size_t pkt_size);
+
 #endif
index 03f058ecb0c1e8d4f4d3a8cb6c58554b5527a47a..ffbd2c8d565e6832ac6b6ee96ca6e95667e4e581 100644 (file)
@@ -4,6 +4,7 @@
 #include "qemu/queue.h"
 #include "qapi/qapi-types-net.h"
 #include "net/queue.h"
+#include "hw/qdev-properties-system.h"
 
 #define MAC_FMT "%02X:%02X:%02X:%02X:%02X:%02X"
 #define MAC_ARG(x) ((uint8_t *)(x))[0], ((uint8_t *)(x))[1], \
@@ -43,6 +44,9 @@ typedef struct NICConf {
 
 typedef void (NetPoll)(NetClientState *, bool enable);
 typedef bool (NetCanReceive)(NetClientState *);
+typedef int (NetStart)(NetClientState *);
+typedef int (NetLoad)(NetClientState *);
+typedef void (NetStop)(NetClientState *);
 typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t);
 typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int);
 typedef void (NetCleanup) (NetClientState *);
@@ -50,16 +54,21 @@ typedef void (LinkStatusChanged)(NetClientState *);
 typedef void (NetClientDestructor)(NetClientState *);
 typedef RxFilterInfo *(QueryRxFilter)(NetClientState *);
 typedef bool (HasUfo)(NetClientState *);
+typedef bool (HasUso)(NetClientState *);
 typedef bool (HasVnetHdr)(NetClientState *);
 typedef bool (HasVnetHdrLen)(NetClientState *, int);
+typedef bool (GetUsingVnetHdr)(NetClientState *);
 typedef void (UsingVnetHdr)(NetClientState *, bool);
-typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
+typedef void (SetOffload)(NetClientState *, int, int, int, int, int, int, int);
+typedef int (GetVnetHdrLen)(NetClientState *);
 typedef void (SetVnetHdrLen)(NetClientState *, int);
 typedef int (SetVnetLE)(NetClientState *, bool);
 typedef int (SetVnetBE)(NetClientState *, bool);
 typedef struct SocketReadState SocketReadState;
 typedef void (SocketReadStateFinalize)(SocketReadState *rs);
 typedef void (NetAnnounce)(NetClientState *);
+typedef bool (SetSteeringEBPF)(NetClientState *, int);
+typedef bool (NetCheckPeerType)(NetClientState *, ObjectClass *, Error **);
 
 typedef struct NetClientInfo {
     NetClientDriver type;
@@ -68,19 +77,27 @@ typedef struct NetClientInfo {
     NetReceive *receive_raw;
     NetReceiveIOV *receive_iov;
     NetCanReceive *can_receive;
+    NetStart *start;
+    NetLoad *load;
+    NetStop *stop;
     NetCleanup *cleanup;
     LinkStatusChanged *link_status_changed;
     QueryRxFilter *query_rx_filter;
     NetPoll *poll;
     HasUfo *has_ufo;
+    HasUso *has_uso;
     HasVnetHdr *has_vnet_hdr;
     HasVnetHdrLen *has_vnet_hdr_len;
+    GetUsingVnetHdr *get_using_vnet_hdr;
     UsingVnetHdr *using_vnet_hdr;
     SetOffload *set_offload;
+    GetVnetHdrLen *get_vnet_hdr_len;
     SetVnetHdrLen *set_vnet_hdr_len;
     SetVnetLE *set_vnet_le;
     SetVnetBE *set_vnet_be;
     NetAnnounce *announce;
+    SetSteeringEBPF *set_steering_ebpf;
+    NetCheckPeerType *check_peer_type;
 } NetClientInfo;
 
 struct NetClientState {
@@ -99,12 +116,17 @@ struct NetClientState {
     int vring_enable;
     int vnet_hdr_len;
     bool is_netdev;
+    bool do_not_pad; /* do not pad to the minimum ethernet frame length */
+    bool is_datapath;
     QTAILQ_HEAD(, NetFilterState) filters;
 };
 
+typedef QTAILQ_HEAD(NetClientStateList, NetClientState) NetClientStateList;
+
 typedef struct NICState {
     NetClientState *ncs;
     NICConf *conf;
+    MemReentrancyGuard *reentrancy_guard;
     void *opaque;
     bool peer_deleted;
 } NICState;
@@ -130,10 +152,15 @@ NetClientState *qemu_new_net_client(NetClientInfo *info,
                                     NetClientState *peer,
                                     const char *model,
                                     const char *name);
+NetClientState *qemu_new_net_control_client(NetClientInfo *info,
+                                        NetClientState *peer,
+                                        const char *model,
+                                        const char *name);
 NICState *qemu_new_nic(NetClientInfo *info,
                        NICConf *conf,
                        const char *model,
                        const char *name,
+                       MemReentrancyGuard *reentrancy_guard,
                        void *opaque);
 void qemu_del_nic(NICState *nic);
 NetClientState *qemu_get_subqueue(NICState *nic, int queue_index);
@@ -160,13 +187,18 @@ ssize_t qemu_send_packet_async(NetClientState *nc, const uint8_t *buf,
 void qemu_purge_queued_packets(NetClientState *nc);
 void qemu_flush_queued_packets(NetClientState *nc);
 void qemu_flush_or_purge_queued_packets(NetClientState *nc, bool purge);
+void qemu_set_info_str(NetClientState *nc,
+                       const char *fmt, ...) G_GNUC_PRINTF(2, 3);
 void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]);
 bool qemu_has_ufo(NetClientState *nc);
+bool qemu_has_uso(NetClientState *nc);
 bool qemu_has_vnet_hdr(NetClientState *nc);
 bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
+bool qemu_get_using_vnet_hdr(NetClientState *nc);
 void qemu_using_vnet_hdr(NetClientState *nc, bool enable);
 void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
-                      int ecn, int ufo);
+                      int ecn, int ufo, int uso4, int uso6);
+int qemu_get_vnet_hdr_len(NetClientState *nc);
 void qemu_set_vnet_hdr_len(NetClientState *nc, int len);
 int qemu_set_vnet_le(NetClientState *nc, bool is_le);
 int qemu_set_vnet_be(NetClientState *nc, bool is_be);
@@ -177,12 +209,25 @@ int qemu_find_nic_model(NICInfo *nd, const char * const *models,
                         const char *default_model);
 
 void print_net_client(Monitor *mon, NetClientState *nc);
-void hmp_info_network(Monitor *mon, const QDict *qdict);
 void net_socket_rs_init(SocketReadState *rs,
                         SocketReadStateFinalize *finalize,
                         bool vnet_hdr);
 NetClientState *qemu_get_peer(NetClientState *nc, int queue_index);
 
+/**
+ * qemu_get_nic_models:
+ * @device_type: Defines which devices should be taken into consideration
+ *               (e.g. TYPE_DEVICE for all devices, or TYPE_PCI_DEVICE for PCI)
+ *
+ * Get an array of pointers to names of NIC devices that are available in
+ * the QEMU binary. The array is terminated with a NULL pointer entry.
+ * The caller is responsible for freeing the memory when it is not required
+ * anymore, e.g. with g_ptr_array_free(..., true).
+ *
+ * Returns: Pointer to the array that contains the pointers to the names.
+ */
+GPtrArray *qemu_get_nic_models(const char *device_type);
+
 /* NIC info */
 
 #define MAX_NICS 8
@@ -203,9 +248,12 @@ extern NICInfo nd_table[MAX_NICS];
 extern const char *host_net_devices[];
 
 /* from net.c */
-int net_client_parse(QemuOptsList *opts_list, const char *str);
+extern NetClientStateList net_clients;
+bool netdev_is_modern(const char *optstr);
+void netdev_parse_modern(const char *optstr);
+void net_client_parse(QemuOptsList *opts_list, const char *optstr);
 void show_netdevs(void);
-int net_init_clients(Error **errp);
+void net_init_clients(void);
 void net_check_clients(void);
 void net_cleanup(void);
 void hmp_host_net_add(Monitor *mon, const QDict *qdict);
@@ -239,4 +287,9 @@ uint32_t net_crc32_le(const uint8_t *p, int len);
     .offset     = vmstate_offset_macaddr(_state, _field),            \
 }
 
+static inline bool net_peer_needs_padding(NetClientState *nc)
+{
+  return nc->peer && !nc->peer->do_not_pad;
+}
+
 #endif
index 5bcd8a6285986f8922b55173308f37eaa120b014..35bf61970985c38458205ea03142b12e8133391e 100644 (file)
@@ -14,5 +14,6 @@
 struct vhost_net;
 struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
 uint64_t vhost_user_get_acked_features(NetClientState *nc);
+void vhost_user_save_acked_features(NetClientState *nc);
 
 #endif /* VHOST_USER_H */
index 45e34b7cfcf60974ba8d41bbf806da8e29a9a190..b81f9a6f2a0e27241771d6d060c22a4c89cbf701 100644 (file)
@@ -15,7 +15,6 @@
 #define TYPE_VHOST_VDPA "vhost-vdpa"
 
 struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
-uint64_t vhost_vdpa_get_acked_features(NetClientState *nc);
 
 extern const int vdpa_feature_bits[];
 
index 172b0051d8120cee3738e770e03cc330653ca0ad..c37aba35e65e3c65b3a7bd483ba286e36d7a0f58 100644 (file)
@@ -14,14 +14,17 @@ typedef struct VhostNetOptions {
     VhostBackendType backend_type;
     NetClientState *net_backend;
     uint32_t busyloop_timeout;
+    unsigned int nvqs;
     void *opaque;
 } VhostNetOptions;
 
 uint64_t vhost_net_get_max_queues(VHostNetState *net);
 struct vhost_net *vhost_net_init(VhostNetOptions *options);
 
-int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, int total_queues);
-void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, int total_queues);
+int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
+                    int data_queue_pairs, int cvq);
+void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
+                    int data_queue_pairs, int cvq);
 
 void vhost_net_cleanup(VHostNetState *net);
 
@@ -36,6 +39,8 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
 bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
                               int idx, bool mask);
+bool vhost_net_config_pending(VHostNetState *net);
+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask);
 int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr);
 VHostNetState *get_vhost_net(NetClientState *nc);
 
@@ -45,4 +50,10 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net);
 
 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu);
 
+void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
+                               int vq_index);
+int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
+                                int vq_index);
+
+void vhost_net_save_acked_features(NetClientState *nc);
 #endif
diff --git a/include/qapi/compat-policy.h b/include/qapi/compat-policy.h
new file mode 100644 (file)
index 0000000..8b7b25c
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Policy for handling "funny" management interfaces
+ *
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef QAPI_COMPAT_POLICY_H
+#define QAPI_COMPAT_POLICY_H
+
+#include "qapi/error.h"
+#include "qapi/qapi-types-compat.h"
+
+extern CompatPolicy compat_policy;
+
+bool compat_policy_input_ok(unsigned special_features,
+                            const CompatPolicy *policy,
+                            ErrorClass error_class,
+                            const char *kind, const char *name,
+                            Error **errp);
+
+/*
+ * Create a QObject input visitor for @obj for use with QMP
+ *
+ * This is like qobject_input_visitor_new(), except it obeys the
+ * policy for handling deprecated management interfaces set with
+ * -compat.
+ */
+Visitor *qobject_input_visitor_new_qmp(QObject *obj);
+
+/*
+ * Create a QObject output visitor for @obj for use with QMP
+ *
+ * This is like qobject_output_visitor_new(), except it obeys the
+ * policy for handling deprecated management interfaces set with
+ * -compat.
+ */
+Visitor *qobject_output_visitor_new_qmp(QObject **result);
+
+#endif
index eaa05c48374cb9aaa689f6984e56e8a1eb190da5..f21a231bb1a6fb39300585d02e071a1fc79edd25 100644 (file)
  *    error_propagate_prepend(errp, *errp, ...) by error_prepend(errp, ...)
  *
  * 4. Ensure @errp is valid at return: when you destroy *errp, set
- *    errp = NULL.
+ *    *errp = NULL.
  *
  * Example:
  *
@@ -320,7 +320,7 @@ ErrorClass error_get_class(const Error *err);
 void error_setg_internal(Error **errp,
                          const char *src, int line, const char *func,
                          const char *fmt, ...)
-    GCC_FMT_ATTR(5, 6);
+    G_GNUC_PRINTF(5, 6);
 
 /*
  * Just like error_setg(), with @os_error info added to the message.
@@ -336,7 +336,7 @@ void error_setg_internal(Error **errp,
 void error_setg_errno_internal(Error **errp,
                                const char *fname, int line, const char *func,
                                int os_error, const char *fmt, ...)
-    GCC_FMT_ATTR(6, 7);
+    G_GNUC_PRINTF(6, 7);
 
 #ifdef _WIN32
 /*
@@ -350,7 +350,7 @@ void error_setg_errno_internal(Error **errp,
 void error_setg_win32_internal(Error **errp,
                                const char *src, int line, const char *func,
                                int win32_err, const char *fmt, ...)
-    GCC_FMT_ATTR(6, 7);
+    G_GNUC_PRINTF(6, 7);
 #endif
 
 /*
@@ -383,21 +383,21 @@ void error_propagate(Error **dst_errp, Error *local_err);
  */
 void error_propagate_prepend(Error **dst_errp, Error *local_err,
                              const char *fmt, ...)
-    GCC_FMT_ATTR(3, 4);
+    G_GNUC_PRINTF(3, 4);
 
 /*
  * Prepend some text to @errp's human-readable error message.
  * The text is made by formatting @fmt, @ap like vprintf().
  */
 void error_vprepend(Error *const *errp, const char *fmt, va_list ap)
-    GCC_FMT_ATTR(2, 0);
+    G_GNUC_PRINTF(2, 0);
 
 /*
  * Prepend some text to @errp's human-readable error message.
  * The text is made by formatting @fmt, ... like printf().
  */
 void error_prepend(Error *const *errp, const char *fmt, ...)
-    GCC_FMT_ATTR(2, 3);
+    G_GNUC_PRINTF(2, 3);
 
 /*
  * Append a printf-style human-readable explanation to an existing error.
@@ -414,7 +414,7 @@ void error_prepend(Error *const *errp, const char *fmt, ...)
  * newline.
  */
 void error_append_hint(Error *const *errp, const char *fmt, ...)
-    GCC_FMT_ATTR(2, 3);
+    G_GNUC_PRINTF(2, 3);
 
 /*
  * Convenience function to report open() failure.
@@ -458,13 +458,13 @@ void error_report_err(Error *err);
  * Convenience function to error_prepend(), warn_report() and free @err.
  */
 void warn_reportf_err(Error *err, const char *fmt, ...)
-    GCC_FMT_ATTR(2, 3);
+    G_GNUC_PRINTF(2, 3);
 
 /*
  * Convenience function to error_prepend(), error_report() and free @err.
  */
 void error_reportf_err(Error *err, const char *fmt, ...)
-    GCC_FMT_ATTR(2, 3);
+    G_GNUC_PRINTF(2, 3);
 
 /*
  * Just like error_setg(), except you get to specify the error class.
@@ -477,7 +477,7 @@ void error_reportf_err(Error *err, const char *fmt, ...)
 void error_set_internal(Error **errp,
                         const char *src, int line, const char *func,
                         ErrorClass err_class, const char *fmt, ...)
-    GCC_FMT_ATTR(6, 7);
+    G_GNUC_PRINTF(6, 7);
 
 /*
  * Make @errp parameter easier to use regardless of argument value
@@ -519,6 +519,12 @@ static inline void error_propagator_cleanup(ErrorPropagator *prop)
 
 G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(ErrorPropagator, error_propagator_cleanup);
 
+/*
+ * Special error destination to warn on error.
+ * See error_setg() and error_propagate() for details.
+ */
+extern Error *error_warn;
+
 /*
  * Special error destination to abort on error.
  * See error_setg() and error_propagate() for details.
diff --git a/include/qapi/forward-visitor.h b/include/qapi/forward-visitor.h
new file mode 100644 (file)
index 0000000..50fb3e9
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Forwarding visitor
+ *
+ * Copyright Red Hat, Inc. 2021
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef FORWARD_VISITOR_H
+#define FORWARD_VISITOR_H
+
+#include "qapi/visitor.h"
+
+typedef struct ForwardFieldVisitor ForwardFieldVisitor;
+
+/*
+ * The forwarding visitor only expects a single name, @from, to be passed for
+ * toplevel fields.  It is converted to @to and forwarded to the @target visitor.
+ * Calls within a struct are forwarded without changing the name.
+ */
+Visitor *visitor_forward_field(Visitor *target, const char *from, const char *to);
+
+#endif
index af8d96c570ef5bc6997349f76529281be86e479c..f2e956813a146ddfd7b8a84d23df78dbb3bf6739 100644 (file)
@@ -21,7 +21,6 @@ typedef void (QmpCommandFunc)(QDict *, QObject **, Error **);
 
 typedef enum QmpCommandOptions
 {
-    QCO_NO_OPTIONS            =  0x0,
     QCO_NO_SUCCESS_RESP       =  (1U << 0),
     QCO_ALLOW_OOB             =  (1U << 1),
     QCO_ALLOW_PRECONFIG       =  (1U << 2),
@@ -34,25 +33,30 @@ typedef struct QmpCommand
     /* Runs in coroutine context if QCO_COROUTINE is set */
     QmpCommandFunc *fn;
     QmpCommandOptions options;
+    unsigned special_features;
     QTAILQ_ENTRY(QmpCommand) node;
     bool enabled;
+    const char *disable_reason;
 } QmpCommand;
 
 typedef QTAILQ_HEAD(QmpCommandList, QmpCommand) QmpCommandList;
 
 void qmp_register_command(QmpCommandList *cmds, const char *name,
-                          QmpCommandFunc *fn, QmpCommandOptions options);
+                          QmpCommandFunc *fn, QmpCommandOptions options,
+                          unsigned special_features);
 const QmpCommand *qmp_find_command(const QmpCommandList *cmds,
                                    const char *name);
-void qmp_disable_command(QmpCommandList *cmds, const char *name);
+void qmp_disable_command(QmpCommandList *cmds, const char *name,
+                         const char *err_msg);
 void qmp_enable_command(QmpCommandList *cmds, const char *name);
 
 bool qmp_command_is_enabled(const QmpCommand *cmd);
+bool qmp_command_available(const QmpCommand *cmd, Error **errp);
 const char *qmp_command_name(const QmpCommand *cmd);
 bool qmp_has_success_response(const QmpCommand *cmd);
 QDict *qmp_error_response(Error *err);
-QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
-                    bool allow_oob, Monitor *cur_mon);
+QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *request,
+                                       bool allow_oob, Monitor *cur_mon);
 bool qmp_is_oob(const QDict *dict);
 
 typedef void (*qmp_cmd_callback_fn)(const QmpCommand *cmd, void *opaque);
diff --git a/include/qapi/qmp/json-writer.h b/include/qapi/qmp/json-writer.h
new file mode 100644 (file)
index 0000000..b70ba64
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * JSON Writer
+ *
+ * Copyright (c) 2020 Red Hat Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef JSON_WRITER_H
+#define JSON_WRITER_H
+
+JSONWriter *json_writer_new(bool pretty);
+const char *json_writer_get(JSONWriter *);
+GString *json_writer_get_and_free(JSONWriter *);
+void json_writer_free(JSONWriter *);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(JSONWriter, json_writer_free)
+
+void json_writer_start_object(JSONWriter *, const char *name);
+void json_writer_end_object(JSONWriter *);
+void json_writer_start_array(JSONWriter *, const char *name);
+void json_writer_end_array(JSONWriter *);
+void json_writer_bool(JSONWriter *, const char *name, bool val);
+void json_writer_null(JSONWriter *, const char *name);
+void json_writer_int64(JSONWriter *, const char *name, int64_t val);
+void json_writer_uint64(JSONWriter *, const char *name, uint64_t val);
+void json_writer_double(JSONWriter *, const char *name, double val);
+void json_writer_str(JSONWriter *, const char *name, const char *str);
+
+#endif
index 5f61e38e6402dc9256b54cb0bf69149d1e14a33d..0d09726939b9904ea5f4fe06f196d09f2c1b132a 100644 (file)
@@ -21,9 +21,11 @@ struct QBool {
     bool value;
 };
 
+void qbool_unref(QBool *q);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QBool, qbool_unref)
+
 QBool *qbool_from_bool(bool value);
 bool qbool_get_bool(const QBool *qb);
-bool qbool_is_equal(const QObject *x, const QObject *y);
-void qbool_destroy_obj(QObject *obj);
 
 #endif /* QBOOL_H */
index da942347a7a4bed78e35768d33f761c982e61245..82e90fc07229baf447cbd88fc45075a847c7f443 100644 (file)
@@ -30,6 +30,10 @@ struct QDict {
     QLIST_HEAD(,QDictEntry) table[QDICT_BUCKET_MAX];
 };
 
+void qdict_unref(QDict *q);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QDict, qdict_unref)
+
 /* Object API */
 QDict *qdict_new(void);
 const char *qdict_entry_key(const QDictEntry *entry);
@@ -39,10 +43,8 @@ void qdict_put_obj(QDict *qdict, const char *key, QObject *value);
 void qdict_del(QDict *qdict, const char *key);
 int qdict_haskey(const QDict *qdict, const char *key);
 QObject *qdict_get(const QDict *qdict, const char *key);
-bool qdict_is_equal(const QObject *x, const QObject *y);
 const QDictEntry *qdict_first(const QDict *qdict);
 const QDictEntry *qdict_next(const QDict *qdict, const QDictEntry *entry);
-void qdict_destroy_obj(QObject *obj);
 
 /* Helper to qdict_put_obj(), accepts any object */
 #define qdict_put(qdict, key, obj) \
index 7c76e24aa7a75189d98e3dfb236e966129b4bd56..8dd9fcb071a41835fd6fe783c0b2c0fe734b76ec 100644 (file)
@@ -16,8 +16,6 @@
  * These macros will go away, please don't use in new code, and do not
  * add new ones!
  */
-#define QERR_BASE_NOT_FOUND \
-    "Base '%s' not found"
 
 #define QERR_BUS_NO_HOTPLUG \
     "Bus '%s' does not support hotplugging"
 #define QERR_DEVICE_HAS_NO_MEDIUM \
     "Device '%s' has no medium"
 
-#define QERR_DEVICE_INIT_FAILED \
-    "Device '%s' could not be initialized"
-
 #define QERR_DEVICE_IN_USE \
     "Device '%s' is in use"
 
 #define QERR_DEVICE_NO_HOTPLUG \
     "Device '%s' does not support hotplugging"
 
-#define QERR_FD_NOT_FOUND \
-    "File descriptor named '%s' not found"
-
-#define QERR_FD_NOT_SUPPLIED \
-    "No file descriptor supplied via SCM_RIGHTS"
-
-#define QERR_FEATURE_DISABLED \
-    "The feature '%s' is not enabled"
-
-#define QERR_INVALID_BLOCK_FORMAT \
-    "Invalid block format '%s'"
-
 #define QERR_INVALID_PARAMETER \
     "Invalid parameter '%s'"
 
@@ -55,9 +38,6 @@
 #define QERR_INVALID_PARAMETER_VALUE \
     "Parameter '%s' expects %s"
 
-#define QERR_INVALID_PASSWORD \
-    "Password incorrect"
-
 #define QERR_IO_ERROR \
     "An IO error has occurred"
 
@@ -67,9 +47,6 @@
 #define QERR_MISSING_PARAMETER \
     "Parameter '%s' is missing"
 
-#define QERR_PERMISSION_DENIED \
-    "Insufficient permission to perform this operation"
-
 #define QERR_PROPERTY_VALUE_BAD \
     "Property '%s.%s' doesn't take value '%s'"
 
 #define QERR_QGA_COMMAND_FAILED \
     "Guest agent command failed, error was '%s'"
 
-#define QERR_REPLAY_NOT_SUPPORTED \
-    "Record/replay feature is not supported for '%s'"
-
-#define QERR_SET_PASSWD_FAILED \
-    "Could not set password"
-
-#define QERR_UNDEFINED_ERROR \
-    "An undefined error has occurred"
-
 #define QERR_UNSUPPORTED \
     "this feature or command is not currently supported"
 
index 5ebbe5a1181f694026ae3eecdc9689033f576cd7..7bd8d2de1b336c4a8ebf4e5804edc3a5d45d089d 100644 (file)
 QObject *qobject_from_json(const char *string, Error **errp);
 
 QObject *qobject_from_vjsonf_nofail(const char *string, va_list ap)
-    GCC_FMT_ATTR(1, 0);
+    G_GNUC_PRINTF(1, 0);
 QObject *qobject_from_jsonf_nofail(const char *string, ...)
-    GCC_FMT_ATTR(1, 2);
+    G_GNUC_PRINTF(1, 2);
 QDict *qdict_from_vjsonf_nofail(const char *string, va_list ap)
-    GCC_FMT_ATTR(1, 0);
+    G_GNUC_PRINTF(1, 0);
 QDict *qdict_from_jsonf_nofail(const char *string, ...)
-    GCC_FMT_ATTR(1, 2);
+    G_GNUC_PRINTF(1, 2);
 
-QString *qobject_to_json(const QObject *obj);
-QString *qobject_to_json_pretty(const QObject *obj);
+GString *qobject_to_json(const QObject *obj);
+GString *qobject_to_json_pretty(const QObject *obj, bool pretty);
 
 #endif /* QJSON_H */
index 595b7943e1479e90ba3dce32e5d715af638cc04e..e4e985d4356d28e19edd43d11ad455edf53fd05c 100644 (file)
@@ -26,6 +26,10 @@ struct QList {
     QTAILQ_HEAD(,QListEntry) head;
 };
 
+void qlist_unref(QList *q);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QList, qlist_unref)
+
 #define qlist_append(qlist, obj) \
         qlist_append_obj(qlist, QOBJECT(obj))
 
@@ -51,8 +55,6 @@ QObject *qlist_pop(QList *qlist);
 QObject *qlist_peek(QList *qlist);
 int qlist_empty(const QList *qlist);
 size_t qlist_size(const QList *qlist);
-bool qlist_is_equal(const QObject *x, const QObject *y);
-void qlist_destroy_obj(QObject *obj);
 
 static inline const QListEntry *qlist_first(const QList *qlist)
 {
index c1426882c57333bd80664c60dfd09978debe286e..7feb7c7d830d9e39022530f78775a5df2b27a8ff 100644 (file)
@@ -26,6 +26,8 @@ static inline QNull *qnull(void)
     return qobject_ref(&qnull_);
 }
 
-bool qnull_is_equal(const QObject *x, const QObject *y);
+void qnull_unref(QNull *q);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QNull, qnull_unref)
 
 #endif /* QNULL_H */
index bbae0a5ec8a28095bec5a0fe1ce928267bb9b087..e86788dd2e3aeb0ae5a1d5625b0c4d78b391ff9a 100644 (file)
@@ -54,6 +54,10 @@ struct QNum {
     } u;
 };
 
+void qnum_unref(QNum *q);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QNum, qnum_unref)
+
 QNum *qnum_from_int(int64_t value);
 QNum *qnum_from_uint(uint64_t value);
 QNum *qnum_from_double(double value);
@@ -68,7 +72,4 @@ double qnum_get_double(QNum *qn);
 
 char *qnum_to_string(QNum *qn);
 
-bool qnum_is_equal(const QObject *x, const QObject *y);
-void qnum_destroy_obj(QObject *obj);
-
 #endif /* QNUM_H */
index fcfd54922006870c42a593a2c715939b3b850d4d..89b97d88bcab08003dd253bea2342aae7ea6e914 100644 (file)
@@ -45,10 +45,16 @@ struct QObject {
     struct QObjectBase_ base;
 };
 
-#define QOBJECT(obj) ({                                         \
+/*
+ * Preprocessor sorcery ahead: use a different identifier for the
+ * local variable in each expansion, so we can nest macro calls
+ * without shadowing variables.
+ */
+#define QOBJECT_INTERNAL(obj, _obj) ({                          \
     typeof(obj) _obj = (obj);                                   \
-    _obj ? container_of(&(_obj)->base, QObject, base) : NULL;   \
+    _obj ? container_of(&_obj->base, QObject, base) : NULL;     \
 })
+#define QOBJECT(obj) QOBJECT_INTERNAL((obj), MAKE_IDENTFIER(_obj))
 
 /* Required for qobject_to() */
 #define QTYPE_CAST_TO_QNull     QTYPE_QNULL
@@ -64,14 +70,6 @@ QEMU_BUILD_BUG_MSG(QTYPE__MAX != 7,
 #define qobject_to(type, obj)                                       \
     ((type *)qobject_check_type(obj, glue(QTYPE_CAST_TO_, type)))
 
-/* Initialize an object to default values */
-static inline void qobject_init(QObject *obj, QType type)
-{
-    assert(QTYPE_NONE < type && type < QTYPE__MAX);
-    obj->base.refcnt = 1;
-    obj->base.type = type;
-}
-
 static inline void qobject_ref_impl(QObject *obj)
 {
     if (obj) {
@@ -90,6 +88,7 @@ bool qobject_is_equal(const QObject *x, const QObject *y);
 
 /**
  * qobject_destroy(): Free resources used by the object
+ * For use via qobject_unref() only!
  */
 void qobject_destroy(QObject *obj);
 
index e2e356e5e775f2078635c8c07ef885138c3c380e..318d815d6a437e805adab78ab133f14183b75990 100644 (file)
 
 struct QString {
     struct QObjectBase_ base;
-    char *string;
-    size_t length;
-    size_t capacity;
+    const char *string;
 };
 
+void qstring_unref(QString *q);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QString, qstring_unref)
+
 QString *qstring_new(void);
 QString *qstring_from_str(const char *str);
 QString *qstring_from_substr(const char *str, size_t start, size_t end);
-size_t qstring_get_length(const QString *qstring);
+QString *qstring_from_gstring(GString *gstr);
 const char *qstring_get_str(const QString *qstring);
-const char *qstring_get_try_str(const QString *qstring);
-const char *qobject_get_try_str(const QObject *qstring);
-void qstring_append_int(QString *qstring, int64_t value);
-void qstring_append(QString *qstring, const char *str);
-void qstring_append_chr(QString *qstring, int c);
-bool qstring_is_equal(const QObject *x, const QObject *y);
-char *qstring_free(QString *qstring, bool return_str);
-void qstring_destroy_obj(QObject *obj);
 
 #endif /* QSTRING_H */
diff --git a/include/qapi/type-helpers.h b/include/qapi/type-helpers.h
new file mode 100644 (file)
index 0000000..be1f181
--- /dev/null
@@ -0,0 +1,14 @@
+/*
+ * QAPI common helper functions
+ *
+ * This file provides helper functions related to types defined
+ * in the QAPI schema.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qapi-types-common.h"
+
+HumanReadableText *human_readable_text_from_str(GString *str);
index 6178e98e97a5a31c3551e0bce8a9c72d149d8d88..81a2b13a3339c7f6341cc43f8679542949684cfe 100644 (file)
 #ifndef QAPI_UTIL_H
 #define QAPI_UTIL_H
 
+typedef enum {
+    QAPI_DEPRECATED,
+    QAPI_UNSTABLE,
+} QapiSpecialFeature;
+
 typedef struct QEnumLookup {
     const char *const *array;
-    int size;
+    const unsigned char *const special_features;
+    const int size;
 } QEnumLookup;
 
 const char *qapi_enum_lookup(const QEnumLookup *lookup, int val);
@@ -37,4 +43,17 @@ int parse_qapi_name(const char *name, bool complete);
     (list) = _tmp; \
 } while (0)
 
+/*
+ * For any pointer to a GenericList @tail (usually the 'next' member of a
+ * list element), insert @element at the back and update the tail.
+ *
+ * Note that this macro evaluates @element exactly once, so it is safe
+ * to have side-effects with that argument.
+ */
+#define QAPI_LIST_APPEND(tail, element) do { \
+    *(tail) = g_malloc0(sizeof(**(tail))); \
+    (*(tail))->value = (element); \
+    (tail) = &(*(tail))->next; \
+} while (0)
+
 #endif
index 7362c043be8e4da1dc16696fb15870d5ec7428c4..2badec5ba4604b297e354a442dfc4f23ccdfcb53 100644 (file)
@@ -113,9 +113,20 @@ struct Visitor
        The core takes care of the return type in the public interface. */
     void (*optional)(Visitor *v, const char *name, bool *present);
 
+    /* Optional */
+    bool (*policy_reject)(Visitor *v, const char *name,
+                          unsigned special_features, Error **errp);
+
+    /* Optional */
+    bool (*policy_skip)(Visitor *v, const char *name,
+                        unsigned special_features);
+
     /* Must be set */
     VisitorType type;
 
+    /* Optional */
+    struct CompatPolicy compat_policy;
+
     /* Must be set for output visitors, optional otherwise. */
     void (*complete)(Visitor *v, void *opaque);
 
index ebc19ede7fff4cbae0ab7d968bec8ed6774f2896..d53a84c9ba41d29733b2b73ce7787161b10ff1b0 100644 (file)
@@ -16,6 +16,7 @@
 #define QAPI_VISITOR_H
 
 #include "qapi/qapi-builtin-types.h"
+#include "qapi/qapi-types-compat.h"
 
 /*
  * The QAPI schema defines both a set of C data types, and a QMP wire
@@ -459,6 +460,41 @@ void visit_end_alternate(Visitor *v, void **obj);
  */
 bool visit_optional(Visitor *v, const char *name, bool *present);
 
+/*
+ * Should we reject member @name due to policy?
+ *
+ * @special_features is the member's special features encoded as a
+ * bitset of QapiSpecialFeature.
+ *
+ * @name must not be NULL.  This function is only useful between
+ * visit_start_struct() and visit_end_struct(), since only objects
+ * have deprecated members.
+ */
+bool visit_policy_reject(Visitor *v, const char *name,
+                         unsigned special_features, Error **errp);
+
+/*
+ *
+ * Should we skip member @name due to policy?
+ *
+ * @special_features is the member's special features encoded as a
+ * bitset of QapiSpecialFeature.
+ *
+ * @name must not be NULL.  This function is only useful between
+ * visit_start_struct() and visit_end_struct(), since only objects
+ * have deprecated members.
+ */
+bool visit_policy_skip(Visitor *v, const char *name,
+                       unsigned special_features);
+
+/*
+ * Set policy for handling deprecated management interfaces.
+ *
+ * Intended use: call visit_set_policy(v, &compat_policy) when
+ * visiting management interface input or output.
+ */
+void visit_set_policy(Visitor *v, CompatPolicy *policy);
+
 /*
  * Visit an enum value.
  *
diff --git a/include/qemu-common.h b/include/qemu-common.h
deleted file mode 100644 (file)
index fda7dc6..0000000
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * This file is supposed to be included only by .c files. No header file should
- * depend on qemu-common.h, as this would easily lead to circular header
- * dependencies.
- *
- * If a header file uses a definition from qemu-common.h, that definition
- * must be moved to a separate header file, and the header that uses it
- * must include that header.
- */
-#ifndef QEMU_COMMON_H
-#define QEMU_COMMON_H
-
-#define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
-
-/* Copyright string for -version arguments, About dialogs, etc */
-#define QEMU_COPYRIGHT "Copyright (c) 2003-2020 " \
-    "Fabrice Bellard and the QEMU Project developers"
-
-/* Bug reporting information for --help arguments, About dialogs, etc */
-#define QEMU_HELP_BOTTOM \
-    "See <https://qemu.org/contribute/report-a-bug> for how to report bugs.\n" \
-    "More information on the QEMU project at <https://qemu.org>."
-
-/* main function, renamed */
-#if defined(CONFIG_COCOA)
-int qemu_main(int argc, char **argv, char **envp);
-#endif
-
-void qemu_get_timedate(struct tm *tm, int offset);
-int qemu_timedate_diff(struct tm *tm);
-
-void *qemu_oom_check(void *ptr);
-
-ssize_t qemu_write_full(int fd, const void *buf, size_t count)
-    QEMU_WARN_UNUSED_RESULT;
-
-#ifndef _WIN32
-int qemu_pipe(int pipefd[2]);
-/* like openpty() but also makes it raw; return master fd */
-int qemu_openpty_raw(int *aslave, char *pty_name);
-#endif
-
-#ifdef _WIN32
-/* MinGW needs type casts for the 'buf' and 'optval' arguments. */
-#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \
-    getsockopt(sockfd, level, optname, (void *)optval, optlen)
-#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \
-    setsockopt(sockfd, level, optname, (const void *)optval, optlen)
-#define qemu_recv(sockfd, buf, len, flags) recv(sockfd, (void *)buf, len, flags)
-#define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \
-    sendto(sockfd, (const void *)buf, len, flags, destaddr, addrlen)
-#else
-#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \
-    getsockopt(sockfd, level, optname, optval, optlen)
-#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \
-    setsockopt(sockfd, level, optname, optval, optlen)
-#define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags)
-#define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \
-    sendto(sockfd, buf, len, flags, destaddr, addrlen)
-#endif
-
-void cpu_exec_init_all(void);
-void cpu_exec_step_atomic(CPUState *cpu);
-
-/**
- * set_preferred_target_page_bits:
- * @bits: number of bits needed to represent an address within the page
- *
- * Set the preferred target page size (the actual target page
- * size may be smaller than any given CPU's preference).
- * Returns true on success, false on failure (which can only happen
- * if this is called after the system has already finalized its
- * choice of page size and the requested page size is smaller than that).
- */
-bool set_preferred_target_page_bits(int bits);
-
-/**
- * finalize_target_page_bits:
- * Commit the final value set by set_preferred_target_page_bits.
- */
-void finalize_target_page_bits(void);
-
-/**
- * Sends a (part of) iovec down a socket, yielding when the socket is full, or
- * Receives data into a (part of) iovec from a socket,
- * yielding when there is no data in the socket.
- * The same interface as qemu_sendv_recvv(), with added yielding.
- * XXX should mark these as coroutine_fn
- */
-ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
-                            size_t offset, size_t bytes, bool do_send);
-#define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \
-  qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false)
-#define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \
-  qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true)
-
-/**
- * The same as above, but with just a single buffer
- */
-ssize_t qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send);
-#define qemu_co_recv(sockfd, buf, bytes) \
-  qemu_co_send_recv(sockfd, buf, bytes, false)
-#define qemu_co_send(sockfd, buf, bytes) \
-  qemu_co_send_recv(sockfd, buf, bytes, true)
-
-void qemu_progress_init(int enabled, float min_skip);
-void qemu_progress_end(void);
-void qemu_progress_print(float delta, int max);
-const char *qemu_get_vm_name(void);
-
-#define QEMU_FILE_TYPE_BIOS   0
-#define QEMU_FILE_TYPE_KEYMAP 1
-/**
- * qemu_find_file:
- * @type: QEMU_FILE_TYPE_BIOS (for BIOS, VGA BIOS)
- *        or QEMU_FILE_TYPE_KEYMAP (for keymaps).
- * @name: Relative or absolute file name
- *
- * If @name exists on disk as an absolute path, or a path relative
- * to the current directory, then returns @name unchanged.
- * Otherwise searches for @name file in the data directories, either
- * configured at build time (DATADIR) or registered with the -L command
- * line option.
- *
- * The caller must use g_free() to free the returned data when it is
- * no longer required.
- *
- * Returns: a path that can access @name, or NULL if no matching file exists.
- */
-char *qemu_find_file(int type, const char *name);
-
-/* OS specific functions */
-void os_setup_early_signal_handling(void);
-int os_parse_cmd_args(int index, const char *optarg);
-
-/*
- * Hexdump a line of a byte buffer into a hexadecimal/ASCII buffer
- */
-#define QEMU_HEXDUMP_LINE_BYTES 16 /* Number of bytes to dump */
-#define QEMU_HEXDUMP_LINE_LEN 75   /* Number of characters in line */
-void qemu_hexdump_line(char *line, unsigned int b, const void *bufptr,
-                       unsigned int len, bool ascii);
-
-/*
- * Hexdump a buffer to a file. An optional string prefix is added to every line
- */
-
-void qemu_hexdump(FILE *fp, const char *prefix,
-                  const void *bufptr, size_t size);
-
-/*
- * helper to parse debug environment variables
- */
-int parse_debug_env(const char *name, int max, int initial);
-
-const char *qemu_ether_ntoa(const MACAddr *mac);
-void page_size_init(void);
-
-/* returns non-zero if dump is in progress, otherwise zero is
- * returned. */
-bool dump_in_progress(void);
-
-#endif
diff --git a/include/qemu-main.h b/include/qemu-main.h
new file mode 100644 (file)
index 0000000..940960a
--- /dev/null
@@ -0,0 +1,11 @@
+/*
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_MAIN_H
+#define QEMU_MAIN_H
+
+int qemu_default_main(void);
+extern int (*qemu_main)(void);
+
+#endif /* QEMU_MAIN_H */
diff --git a/include/qemu/accel.h b/include/qemu/accel.h
new file mode 100644 (file)
index 0000000..972a849
--- /dev/null
@@ -0,0 +1,115 @@
+/* QEMU accelerator interfaces
+ *
+ * Copyright (c) 2014 Red Hat Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef QEMU_ACCEL_H
+#define QEMU_ACCEL_H
+
+#include "qom/object.h"
+#include "exec/hwaddr.h"
+
+struct AccelState {
+    /*< private >*/
+    Object parent_obj;
+};
+
+typedef struct AccelClass {
+    /*< private >*/
+    ObjectClass parent_class;
+    /*< public >*/
+
+    const char *name;
+    int (*init_machine)(MachineState *ms);
+#ifndef CONFIG_USER_ONLY
+    void (*setup_post)(MachineState *ms, AccelState *accel);
+    bool (*has_memory)(MachineState *ms, AddressSpace *as,
+                       hwaddr start_addr, hwaddr size);
+#endif
+    bool (*cpu_common_realize)(CPUState *cpu, Error **errp);
+    void (*cpu_common_unrealize)(CPUState *cpu);
+
+    /* gdbstub related hooks */
+    int (*gdbstub_supported_sstep_flags)(void);
+
+    bool *allowed;
+    /*
+     * Array of global properties that would be applied when specific
+     * accelerator is chosen. It works like MachineClass.compat_props
+     * but it's for accelerators not machines. Accelerator-provided
+     * global properties may be overridden by machine-type
+     * compat_props or user-provided global properties.
+     */
+    GPtrArray *compat_props;
+} AccelClass;
+
+#define TYPE_ACCEL "accel"
+
+#define ACCEL_CLASS_SUFFIX  "-" TYPE_ACCEL
+#define ACCEL_CLASS_NAME(a) (a ACCEL_CLASS_SUFFIX)
+
+#define ACCEL_CLASS(klass) \
+    OBJECT_CLASS_CHECK(AccelClass, (klass), TYPE_ACCEL)
+#define ACCEL(obj) \
+    OBJECT_CHECK(AccelState, (obj), TYPE_ACCEL)
+#define ACCEL_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(AccelClass, (obj), TYPE_ACCEL)
+
+AccelClass *accel_find(const char *opt_name);
+AccelState *current_accel(void);
+const char *current_accel_name(void);
+
+void accel_init_interfaces(AccelClass *ac);
+
+#ifndef CONFIG_USER_ONLY
+int accel_init_machine(AccelState *accel, MachineState *ms);
+
+/* Called just before os_setup_post (ie just before drop OS privs) */
+void accel_setup_post(MachineState *ms);
+#endif /* !CONFIG_USER_ONLY */
+
+/**
+ * accel_cpu_instance_init:
+ * @cpu: The CPU that needs to do accel-specific object initializations.
+ */
+void accel_cpu_instance_init(CPUState *cpu);
+
+/**
+ * accel_cpu_common_realize:
+ * @cpu: The CPU that needs to call accel-specific cpu realization.
+ * @errp: currently unused.
+ */
+bool accel_cpu_common_realize(CPUState *cpu, Error **errp);
+
+/**
+ * accel_cpu_common_unrealize:
+ * @cpu: The CPU that needs to call accel-specific cpu unrealization.
+ */
+void accel_cpu_common_unrealize(CPUState *cpu);
+
+/**
+ * accel_supported_gdbstub_sstep_flags:
+ *
+ * Returns the supported single step modes for the configured
+ * accelerator.
+ */
+int accel_supported_gdbstub_sstep_flags(void);
+
+#endif /* QEMU_ACCEL_H */
diff --git a/include/qemu/async-teardown.h b/include/qemu/async-teardown.h
new file mode 100644 (file)
index 0000000..b281da0
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ * Asynchronous teardown
+ *
+ * Copyright IBM, Corp. 2022
+ *
+ * Authors:
+ *  Claudio Imbrenda <imbrenda@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_ASYNC_TEARDOWN_H
+#define QEMU_ASYNC_TEARDOWN_H
+
+#ifdef CONFIG_LINUX
+void init_async_teardown(void);
+#endif
+
+#endif
index c1d211a351998e67d00202b6c4bcf0f04d68c94a..f1d3d1702a93c20b83eac503fa74a77b3895cbef 100644 (file)
@@ -8,13 +8,15 @@
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
  *
- * See docs/devel/atomics.txt for discussion about the guarantees each
+ * See docs/devel/atomics.rst for discussion about the guarantees each
  * atomic primitive is meant to provide.
  */
 
 #ifndef QEMU_ATOMIC_H
 #define QEMU_ATOMIC_H
 
+#include "compiler.h"
+
 /* Compiler barrier */
 #define barrier()   ({ asm volatile("" ::: "memory"); (void)0; })
 
@@ -60,8 +62,9 @@
         (unsigned short)1,                                                         \
       (expr)+0))))))
 
-#ifdef __ATOMIC_RELAXED
-/* For C11 atomic ops */
+#ifndef __ATOMIC_RELAXED
+#error "Expecting C11 atomic ops"
+#endif
 
 /* Manual memory barriers
  *
@@ -80,7 +83,7 @@
  * no processors except Alpha need a barrier here.  Leave it in if
  * using Thread Sanitizer to avoid warnings, otherwise optimize it away.
  */
-#if defined(__SANITIZE_THREAD__)
+#ifdef QEMU_SANITIZE_THREAD
 #define smp_read_barrier_depends()   ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); })
 #elif defined(__alpha__)
 #define smp_read_barrier_depends()   asm volatile("mb":::"memory")
 
 #define qatomic_read(ptr)                              \
     ({                                                 \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
+    qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
     qatomic_read__nocheck(ptr);                        \
     })
 
     __atomic_store_n(ptr, i, __ATOMIC_RELAXED)
 
 #define qatomic_set(ptr, i)  do {                      \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
+    qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
     qatomic_set__nocheck(ptr, i);                      \
 } while(0)
 
 /* See above: most compilers currently treat consume and acquire the
  * same, but this slows down qatomic_rcu_read unnecessarily.
  */
-#ifdef __SANITIZE_THREAD__
+#ifdef QEMU_SANITIZE_THREAD
 #define qatomic_rcu_read__nocheck(ptr, valptr)           \
     __atomic_load(ptr, valptr, __ATOMIC_CONSUME);
 #else
     smp_read_barrier_depends();
 #endif
 
-#define qatomic_rcu_read(ptr)                          \
-    ({                                                 \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
-    typeof_strip_qual(*ptr) _val;                      \
-    qatomic_rcu_read__nocheck(ptr, &_val);             \
-    _val;                                              \
+/*
+ * Preprocessor sorcery ahead: use a different identifier for the
+ * local variable in each expansion, so we can nest macro calls
+ * without shadowing variables.
+ */
+#define qatomic_rcu_read_internal(ptr, _val)            \
+    ({                                                  \
+    qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
+    typeof_strip_qual(*ptr) _val;                       \
+    qatomic_rcu_read__nocheck(ptr, &_val);              \
+    _val;                                               \
     })
+#define qatomic_rcu_read(ptr) \
+    qatomic_rcu_read_internal((ptr), MAKE_IDENTFIER(_val))
 
 #define qatomic_rcu_set(ptr, i) do {                   \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE); \
+    qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
     __atomic_store_n(ptr, i, __ATOMIC_RELEASE);        \
 } while(0)
 
 #define qatomic_load_acquire(ptr)                       \
     ({                                                  \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);  \
+    qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
     typeof_strip_qual(*ptr) _val;                       \
     __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE);        \
     _val;                                               \
     })
 
 #define qatomic_store_release(ptr, i)  do {             \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);  \
+    qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \
     __atomic_store_n(ptr, i, __ATOMIC_RELEASE);         \
 } while(0)
 
 })
 
 #define qatomic_xchg(ptr, i)    ({                          \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);      \
+    qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE);     \
     qatomic_xchg__nocheck(ptr, i);                          \
 })
 
 })
 
 #define qatomic_cmpxchg(ptr, old, new)    ({                            \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > ATOMIC_REG_SIZE);                  \
+    qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE);                 \
     qatomic_cmpxchg__nocheck(ptr, old, new);                            \
 })
 
 #define qatomic_xor(ptr, n) \
     ((void) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST))
 
-#else /* __ATOMIC_RELAXED */
+#define smp_wmb()   smp_mb_release()
+#define smp_rmb()   smp_mb_acquire()
 
 /*
- * We use GCC builtin if it's available, as that can use mfence on
- * 32-bit as well, e.g. if built with -march=pentium-m. However, on
- * i386 the spec is buggy, and the implementation followed it until
- * 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793).
+ * SEQ_CST is weaker than the older __sync_* builtins and Linux
+ * kernel read-modify-write atomics.  Provide a macro to obtain
+ * the same semantics.
  */
-#if defined(__i386__) || defined(__x86_64__)
-#if !QEMU_GNUC_PREREQ(4, 4)
-#if defined __x86_64__
-#define smp_mb()    ({ asm volatile("mfence" ::: "memory"); (void)0; })
+#if !defined(QEMU_SANITIZE_THREAD) && \
+    (defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
+# define smp_mb__before_rmw() signal_barrier()
+# define smp_mb__after_rmw() signal_barrier()
 #else
-#define smp_mb()    ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; })
-#endif
-#endif
+# define smp_mb__before_rmw() smp_mb()
+# define smp_mb__after_rmw() smp_mb()
 #endif
 
-
-#ifdef __alpha__
-#define smp_read_barrier_depends()   asm volatile("mb":::"memory")
-#endif
-
-#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
-
-/*
- * Because of the strongly ordered storage model, wmb() and rmb() are nops
- * here (a compiler barrier only).  QEMU doesn't do accesses to write-combining
- * qemu memory or non-temporal load/stores from C code.
- */
-#define smp_mb_release()   barrier()
-#define smp_mb_acquire()   barrier()
-
 /*
- * __sync_lock_test_and_set() is documented to be an acquire barrier only,
- * but it is a full barrier at the hardware level.  Add a compiler barrier
- * to make it a full barrier also at the compiler level.
+ * On some architectures, qatomic_set_mb is more efficient than a store
+ * plus a fence.
  */
-#define qatomic_xchg(ptr, i)    (barrier(), __sync_lock_test_and_set(ptr, i))
-
-#elif defined(_ARCH_PPC)
 
-/*
- * We use an eieio() for wmb() on powerpc.  This assumes we don't
- * need to order cacheable and non-cacheable stores with respect to
- * each other.
- *
- * smp_mb has the same problem as on x86 for not-very-new GCC
- * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011).
- */
-#define smp_wmb()          ({ asm volatile("eieio" ::: "memory"); (void)0; })
-#if defined(__powerpc64__)
-#define smp_mb_release()   ({ asm volatile("lwsync" ::: "memory"); (void)0; })
-#define smp_mb_acquire()   ({ asm volatile("lwsync" ::: "memory"); (void)0; })
+#if !defined(QEMU_SANITIZE_THREAD) && \
+    (defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
+# define qatomic_set_mb(ptr, i) \
+    ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); })
 #else
-#define smp_mb_release()   ({ asm volatile("sync" ::: "memory"); (void)0; })
-#define smp_mb_acquire()   ({ asm volatile("sync" ::: "memory"); (void)0; })
-#endif
-#define smp_mb()           ({ asm volatile("sync" ::: "memory"); (void)0; })
-
-#endif /* _ARCH_PPC */
-
-/*
- * For (host) platforms we don't have explicit barrier definitions
- * for, we use the gcc __sync_synchronize() primitive to generate a
- * full barrier.  This should be safe on all platforms, though it may
- * be overkill for smp_mb_acquire() and smp_mb_release().
- */
-#ifndef smp_mb
-#define smp_mb()           __sync_synchronize()
-#endif
-
-#ifndef smp_mb_acquire
-#define smp_mb_acquire()   __sync_synchronize()
-#endif
-
-#ifndef smp_mb_release
-#define smp_mb_release()   __sync_synchronize()
-#endif
-
-#ifndef smp_read_barrier_depends
-#define smp_read_barrier_depends()   barrier()
-#endif
-
-#ifndef signal_barrier
-#define signal_barrier()    barrier()
-#endif
-
-/* These will only be atomic if the processor does the fetch or store
- * in a single issue memory operation
- */
-#define qatomic_read__nocheck(p)   (*(__typeof__(*(p)) volatile*) (p))
-#define qatomic_set__nocheck(p, i) ((*(__typeof__(*(p)) volatile*) (p)) = (i))
-
-#define qatomic_read(ptr)       qatomic_read__nocheck(ptr)
-#define qatomic_set(ptr, i)     qatomic_set__nocheck(ptr,i)
-
-/**
- * qatomic_rcu_read - reads a RCU-protected pointer to a local variable
- * into a RCU read-side critical section. The pointer can later be safely
- * dereferenced within the critical section.
- *
- * This ensures that the pointer copy is invariant thorough the whole critical
- * section.
- *
- * Inserts memory barriers on architectures that require them (currently only
- * Alpha) and documents which pointers are protected by RCU.
- *
- * qatomic_rcu_read also includes a compiler barrier to ensure that
- * value-speculative optimizations (e.g. VSS: Value Speculation
- * Scheduling) does not perform the data read before the pointer read
- * by speculating the value of the pointer.
- *
- * Should match qatomic_rcu_set(), qatomic_xchg(), qatomic_cmpxchg().
- */
-#define qatomic_rcu_read(ptr)    ({               \
-    typeof(*ptr) _val = qatomic_read(ptr);        \
-    smp_read_barrier_depends();                   \
-    _val;                                         \
-})
-
-/**
- * qatomic_rcu_set - assigns (publicizes) a pointer to a new data structure
- * meant to be read by RCU read-side critical sections.
- *
- * Documents which pointers will be dereferenced by RCU read-side critical
- * sections and adds the required memory barriers on architectures requiring
- * them. It also makes sure the compiler does not reorder code initializing the
- * data structure before its publication.
- *
- * Should match qatomic_rcu_read().
- */
-#define qatomic_rcu_set(ptr, i)  do {             \
-    smp_wmb();                                    \
-    qatomic_set(ptr, i);                          \
-} while (0)
-
-#define qatomic_load_acquire(ptr)    ({     \
-    typeof(*ptr) _val = qatomic_read(ptr);  \
-    smp_mb_acquire();                       \
-    _val;                                   \
-})
-
-#define qatomic_store_release(ptr, i)  do { \
-    smp_mb_release();                       \
-    qatomic_set(ptr, i);                    \
-} while (0)
-
-#ifndef qatomic_xchg
-#if defined(__clang__)
-#define qatomic_xchg(ptr, i)    __sync_swap(ptr, i)
-#else
-/* __sync_lock_test_and_set() is documented to be an acquire barrier only.  */
-#define qatomic_xchg(ptr, i)    (smp_mb(), __sync_lock_test_and_set(ptr, i))
-#endif
-#endif
-#define qatomic_xchg__nocheck  qatomic_xchg
-
-/* Provide shorter names for GCC atomic builtins.  */
-#define qatomic_fetch_inc(ptr)  __sync_fetch_and_add(ptr, 1)
-#define qatomic_fetch_dec(ptr)  __sync_fetch_and_add(ptr, -1)
-
-#define qatomic_fetch_add(ptr, n) __sync_fetch_and_add(ptr, n)
-#define qatomic_fetch_sub(ptr, n) __sync_fetch_and_sub(ptr, n)
-#define qatomic_fetch_and(ptr, n) __sync_fetch_and_and(ptr, n)
-#define qatomic_fetch_or(ptr, n) __sync_fetch_and_or(ptr, n)
-#define qatomic_fetch_xor(ptr, n) __sync_fetch_and_xor(ptr, n)
-
-#define qatomic_inc_fetch(ptr)  __sync_add_and_fetch(ptr, 1)
-#define qatomic_dec_fetch(ptr)  __sync_add_and_fetch(ptr, -1)
-#define qatomic_add_fetch(ptr, n) __sync_add_and_fetch(ptr, n)
-#define qatomic_sub_fetch(ptr, n) __sync_sub_and_fetch(ptr, n)
-#define qatomic_and_fetch(ptr, n) __sync_and_and_fetch(ptr, n)
-#define qatomic_or_fetch(ptr, n) __sync_or_and_fetch(ptr, n)
-#define qatomic_xor_fetch(ptr, n) __sync_xor_and_fetch(ptr, n)
-
-#define qatomic_cmpxchg(ptr, old, new) \
-    __sync_val_compare_and_swap(ptr, old, new)
-#define qatomic_cmpxchg__nocheck(ptr, old, new)  qatomic_cmpxchg(ptr, old, new)
-
-/* And even shorter names that return void.  */
-#define qatomic_inc(ptr)        ((void) __sync_fetch_and_add(ptr, 1))
-#define qatomic_dec(ptr)        ((void) __sync_fetch_and_add(ptr, -1))
-#define qatomic_add(ptr, n)     ((void) __sync_fetch_and_add(ptr, n))
-#define qatomic_sub(ptr, n)     ((void) __sync_fetch_and_sub(ptr, n))
-#define qatomic_and(ptr, n)     ((void) __sync_fetch_and_and(ptr, n))
-#define qatomic_or(ptr, n)      ((void) __sync_fetch_and_or(ptr, n))
-#define qatomic_xor(ptr, n)     ((void) __sync_fetch_and_xor(ptr, n))
-
-#endif /* __ATOMIC_RELAXED */
-
-#ifndef smp_wmb
-#define smp_wmb()   smp_mb_release()
-#endif
-#ifndef smp_rmb
-#define smp_rmb()   smp_mb_acquire()
-#endif
-
-/* This is more efficient than a store plus a fence.  */
-#if !defined(__SANITIZE_THREAD__)
-#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
-#define qatomic_mb_set(ptr, i)  ((void)qatomic_xchg(ptr, i))
-#endif
-#endif
-
-/* qatomic_mb_read/set semantics map Java volatile variables. They are
- * less expensive on some platforms (notably POWER) than fully
- * sequentially consistent operations.
- *
- * As long as they are used as paired operations they are safe to
- * use. See docs/devel/atomics.txt for more discussion.
- */
-
-#ifndef qatomic_mb_read
-#define qatomic_mb_read(ptr)                             \
-    qatomic_load_acquire(ptr)
-#endif
-
-#ifndef qatomic_mb_set
-#define qatomic_mb_set(ptr, i)  do {                    \
-    qatomic_store_release(ptr, i);                      \
-    smp_mb();                                           \
-} while(0)
+# define qatomic_set_mb(ptr, i) \
+   ({ qatomic_store_release(ptr, i); smp_mb(); })
 #endif
 
 #define qatomic_fetch_inc_nonzero(ptr) ({                               \
     _oldn;                                                              \
 })
 
-/* Abstractions to access atomically (i.e. "once") i64/u64 variables */
-#ifdef CONFIG_ATOMIC64
-static inline int64_t qatomic_read_i64(const int64_t *ptr)
-{
-    /* use __nocheck because sizeof(void *) might be < sizeof(u64) */
-    return qatomic_read__nocheck(ptr);
-}
-
-static inline uint64_t qatomic_read_u64(const uint64_t *ptr)
-{
-    return qatomic_read__nocheck(ptr);
-}
-
-static inline void qatomic_set_i64(int64_t *ptr, int64_t val)
-{
-    qatomic_set__nocheck(ptr, val);
-}
+/*
+ * Abstractions to access atomically (i.e. "once") i64/u64 variables.
+ *
+ * The i386 abi is odd in that by default members are only aligned to
+ * 4 bytes, which means that 8-byte types can wind up mis-aligned.
+ * Clang will then warn about this, and emit a call into libatomic.
+ *
+ * Use of these types in structures when they will be used with atomic
+ * operations can avoid this.
+ */
+typedef int64_t aligned_int64_t __attribute__((aligned(8)));
+typedef uint64_t aligned_uint64_t __attribute__((aligned(8)));
 
-static inline void qatomic_set_u64(uint64_t *ptr, uint64_t val)
-{
-    qatomic_set__nocheck(ptr, val);
-}
+#ifdef CONFIG_ATOMIC64
+/* Use __nocheck because sizeof(void *) might be < sizeof(u64) */
+#define qatomic_read_i64(P) \
+    _Generic(*(P), int64_t: qatomic_read__nocheck(P))
+#define qatomic_read_u64(P) \
+    _Generic(*(P), uint64_t: qatomic_read__nocheck(P))
+#define qatomic_set_i64(P, V) \
+    _Generic(*(P), int64_t: qatomic_set__nocheck(P, V))
+#define qatomic_set_u64(P, V) \
+    _Generic(*(P), uint64_t: qatomic_set__nocheck(P, V))
 
 static inline void qatomic64_init(void)
 {
index ad2bcf45b4f8c7c1ed18ae9b5f858c6861dc6454..88af6d4ea3faa15c1184c3a293af54093df76b9a 100644 (file)
@@ -6,7 +6,7 @@
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
  *
- * See docs/devel/atomics.txt for discussion about the guarantees each
+ * See docs/devel/atomics.rst for discussion about the guarantees each
  * atomic primitive is meant to provide.
  */
 
 
 #include "qemu/int128.h"
 
+/*
+ * If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
+ * that are supported by the host, e.g. s390x.  We can force the pointer to
+ * have our known alignment with __builtin_assume_aligned, however prior to
+ * GCC 13 that was only reliable with optimization enabled.  See
+ *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
+ */
+#if defined(CONFIG_ATOMIC128_OPT)
+# if !defined(__OPTIMIZE__)
+#  define ATTRIBUTE_ATOMIC128_OPT  __attribute__((optimize("O1")))
+# endif
+# define CONFIG_ATOMIC128
+#endif
+#ifndef ATTRIBUTE_ATOMIC128_OPT
+# define ATTRIBUTE_ATOMIC128_OPT
+#endif
+
 /*
  * GCC is a house divided about supporting large atomic operations.
  *
@@ -26,8 +43,8 @@
  * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878
  *
  * This interpretation is not especially helpful for QEMU.
- * For softmmu, all RAM is always read/write from the hypervisor.
- * For user-only, if the guest doesn't implement such an __atomic_read
+ * For system-mode, all RAM is always read/write from the hypervisor.
+ * For user-mode, if the guest doesn't implement such an __atomic_read
  * then the host need not worry about it either.
  *
  * Moreover, using libatomic is not an option, because its interface is
  * Therefore, special case each platform.
  */
 
-#if defined(CONFIG_ATOMIC128)
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
-{
-    return qatomic_cmpxchg__nocheck(ptr, cmp, new);
-}
-# define HAVE_CMPXCHG128 1
-#elif defined(CONFIG_CMPXCHG128)
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
-{
-    return __sync_val_compare_and_swap_16(ptr, cmp, new);
-}
-# define HAVE_CMPXCHG128 1
-#elif defined(__aarch64__)
-/* Through gcc 8, aarch64 has no support for 128-bit at all.  */
-static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
-{
-    uint64_t cmpl = int128_getlo(cmp), cmph = int128_gethi(cmp);
-    uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
-    uint64_t oldl, oldh;
-    uint32_t tmp;
-
-    asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
-        "cmp %[oldl], %[cmpl]\n\t"
-        "ccmp %[oldh], %[cmph], #0, eq\n\t"
-        "b.ne 1f\n\t"
-        "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
-        "cbnz %w[tmp], 0b\n"
-        "1:"
-        : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
-          [oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
-        : [cmpl] "r"(cmpl), [cmph] "r"(cmph),
-          [newl] "r"(newl), [newh] "r"(newh)
-        : "memory", "cc");
-
-    return int128_make128(oldl, oldh);
-}
-# define HAVE_CMPXCHG128 1
-#else
-/* Fallback definition that must be optimized away, or error.  */
-Int128 QEMU_ERROR("unsupported atomic")
-    atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new);
-# define HAVE_CMPXCHG128 0
-#endif /* Some definition for HAVE_CMPXCHG128 */
-
-
-#if defined(CONFIG_ATOMIC128)
-static inline Int128 atomic16_read(Int128 *ptr)
-{
-    return qatomic_read__nocheck(ptr);
-}
-
-static inline void atomic16_set(Int128 *ptr, Int128 val)
-{
-    qatomic_set__nocheck(ptr, val);
-}
-
-# define HAVE_ATOMIC128 1
-#elif !defined(CONFIG_USER_ONLY) && defined(__aarch64__)
-/* We can do better than cmpxchg for AArch64.  */
-static inline Int128 atomic16_read(Int128 *ptr)
-{
-    uint64_t l, h;
-    uint32_t tmp;
-
-    /* The load must be paired with the store to guarantee not tearing.  */
-    asm("0: ldxp %[l], %[h], %[mem]\n\t"
-        "stxp %w[tmp], %[l], %[h], %[mem]\n\t"
-        "cbnz %w[tmp], 0b"
-        : [mem] "+m"(*ptr), [tmp] "=r"(tmp), [l] "=r"(l), [h] "=r"(h));
-
-    return int128_make128(l, h);
-}
-
-static inline void atomic16_set(Int128 *ptr, Int128 val)
-{
-    uint64_t l = int128_getlo(val), h = int128_gethi(val);
-    uint64_t t1, t2;
-
-    /* Load into temporaries to acquire the exclusive access lock.  */
-    asm("0: ldxp %[t1], %[t2], %[mem]\n\t"
-        "stxp %w[t1], %[l], %[h], %[mem]\n\t"
-        "cbnz %w[t1], 0b"
-        : [mem] "+m"(*ptr), [t1] "=&r"(t1), [t2] "=&r"(t2)
-        : [l] "r"(l), [h] "r"(h));
-}
-
-# define HAVE_ATOMIC128 1
-#elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
-static inline Int128 atomic16_read(Int128 *ptr)
-{
-    /* Maybe replace 0 with 0, returning the old value.  */
-    return atomic16_cmpxchg(ptr, 0, 0);
-}
-
-static inline void atomic16_set(Int128 *ptr, Int128 val)
-{
-    Int128 old = *ptr, cmp;
-    do {
-        cmp = old;
-        old = atomic16_cmpxchg(ptr, cmp, val);
-    } while (old != cmp);
-}
-
-# define HAVE_ATOMIC128 1
-#else
-/* Fallback definitions that must be optimized away, or error.  */
-Int128 QEMU_ERROR("unsupported atomic") atomic16_read(Int128 *ptr);
-void QEMU_ERROR("unsupported atomic") atomic16_set(Int128 *ptr, Int128 val);
-# define HAVE_ATOMIC128 0
-#endif /* Some definition for HAVE_ATOMIC128 */
+#include "host/atomic128-cas.h"
+#include "host/atomic128-ldst.h"
 
 #endif /* QEMU_ATOMIC128_H */
index 82a1d2f41f66b3f63d17effec7ce36264cac2cdb..97806811eeb4f4d61495adaf0f5c58e431eb95e2 100644 (file)
  * Note that nbits should be always a compile time evaluable constant.
  * Otherwise many inlines will generate horrible code.
  *
- * bitmap_zero(dst, nbits)                     *dst = 0UL
- * bitmap_fill(dst, nbits)                     *dst = ~0UL
- * bitmap_copy(dst, src, nbits)                        *dst = *src
- * bitmap_and(dst, src1, src2, nbits)          *dst = *src1 & *src2
- * bitmap_or(dst, src1, src2, nbits)           *dst = *src1 | *src2
- * bitmap_xor(dst, src1, src2, nbits)          *dst = *src1 ^ *src2
- * bitmap_andnot(dst, src1, src2, nbits)       *dst = *src1 & ~(*src2)
- * bitmap_complement(dst, src, nbits)          *dst = ~(*src)
- * bitmap_equal(src1, src2, nbits)             Are *src1 and *src2 equal?
+ * bitmap_zero(dst, nbits)                      *dst = 0UL
+ * bitmap_fill(dst, nbits)                      *dst = ~0UL
+ * bitmap_copy(dst, src, nbits)                 *dst = *src
+ * bitmap_and(dst, src1, src2, nbits)           *dst = *src1 & *src2
+ * bitmap_or(dst, src1, src2, nbits)            *dst = *src1 | *src2
+ * bitmap_xor(dst, src1, src2, nbits)           *dst = *src1 ^ *src2
+ * bitmap_andnot(dst, src1, src2, nbits)        *dst = *src1 & ~(*src2)
+ * bitmap_complement(dst, src, nbits)           *dst = ~(*src)
+ * bitmap_equal(src1, src2, nbits)              Are *src1 and *src2 equal?
  * bitmap_intersects(src1, src2, nbits)         Do *src1 and *src2 overlap?
- * bitmap_empty(src, nbits)                    Are all bits zero in *src?
- * bitmap_full(src, nbits)                     Are all bits set in *src?
- * bitmap_set(dst, pos, nbits)                 Set specified bit area
- * bitmap_set_atomic(dst, pos, nbits)   Set specified bit area with atomic ops
- * bitmap_clear(dst, pos, nbits)               Clear specified bit area
+ * bitmap_empty(src, nbits)                     Are all bits zero in *src?
+ * bitmap_full(src, nbits)                      Are all bits set in *src?
+ * bitmap_set(dst, pos, nbits)                  Set specified bit area
+ * bitmap_set_atomic(dst, pos, nbits)           Set specified bit area with atomic ops
+ * bitmap_clear(dst, pos, nbits)                Clear specified bit area
  * bitmap_test_and_clear_atomic(dst, pos, nbits)    Test and clear area
- * bitmap_find_next_zero_area(buf, len, pos, n, mask)  Find bit free area
+ * bitmap_find_next_zero_area(buf, len, pos, n, mask)  Find bit free area
  * bitmap_to_le(dst, src, nbits)      Convert bitmap to little endian
  * bitmap_from_le(dst, src, nbits)    Convert bitmap from little endian
  * bitmap_copy_with_src_offset(dst, src, offset, nbits)
 /*
  * Also the following operations apply to bitmaps.
  *
- * set_bit(bit, addr)                  *addr |= bit
- * clear_bit(bit, addr)                        *addr &= ~bit
- * change_bit(bit, addr)               *addr ^= bit
- * test_bit(bit, addr)                 Is bit set in *addr?
- * test_and_set_bit(bit, addr)         Set bit and return old value
- * test_and_clear_bit(bit, addr)       Clear bit and return old value
- * test_and_change_bit(bit, addr)      Change bit and return old value
- * find_first_zero_bit(addr, nbits)    Position first zero bit in *addr
- * find_first_bit(addr, nbits)         Position first set bit in *addr
- * find_next_zero_bit(addr, nbits, bit)        Position next zero bit in *addr >= bit
- * find_next_bit(addr, nbits, bit)     Position next set bit in *addr >= bit
+ * set_bit(bit, addr)               *addr |= bit
+ * clear_bit(bit, addr)             *addr &= ~bit
+ * change_bit(bit, addr)            *addr ^= bit
+ * test_bit(bit, addr)              Is bit set in *addr?
+ * test_and_set_bit(bit, addr)      Set bit and return old value
+ * test_and_clear_bit(bit, addr)    Clear bit and return old value
+ * test_and_change_bit(bit, addr)   Change bit and return old value
+ * find_first_zero_bit(addr, nbits) Position first zero bit in *addr
+ * find_first_bit(addr, nbits)      Position first set bit in *addr
+ * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit
+ * find_next_bit(addr, nbits, bit)  Position next set bit in *addr >= bit
  */
 
 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
@@ -253,6 +253,7 @@ void bitmap_set(unsigned long *map, long i, long len);
 void bitmap_set_atomic(unsigned long *map, long i, long len);
 void bitmap_clear(unsigned long *map, long start, long nr);
 bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr);
+bool bitmap_test_and_clear(unsigned long *map, long start, long nr);
 void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src,
                                   long nr);
 unsigned long bitmap_find_next_zero_area(unsigned long *map,
index 3acbf3384c63ce4f7d2a98db962ae999de3b2b63..cb3526d1f44dae3a041abc73019ea172faeeadc0 100644 (file)
@@ -140,7 +140,8 @@ static inline int test_bit(long nr, const unsigned long *addr)
  * @addr: The address to start the search at
  * @size: The maximum size to search
  *
- * Returns the bit number of the first set bit, or size.
+ * Returns the bit number of the last set bit,
+ * or @size if there is no set bit in the bitmap.
  */
 unsigned long find_last_bit(const unsigned long *addr,
                             unsigned long size);
@@ -150,6 +151,9 @@ unsigned long find_last_bit(const unsigned long *addr,
  * @addr: The address to base the search on
  * @offset: The bitnumber to start searching at
  * @size: The bitmap size in bits
+ *
+ * Returns the bit number of the next set bit,
+ * or @size if there are no further set bits in the bitmap.
  */
 unsigned long find_next_bit(const unsigned long *addr,
                             unsigned long size,
@@ -160,6 +164,9 @@ unsigned long find_next_bit(const unsigned long *addr,
  * @addr: The address to base the search on
  * @offset: The bitnumber to start searching at
  * @size: The bitmap size in bits
+ *
+ * Returns the bit number of the next cleared bit,
+ * or @size if there are no further clear bits in the bitmap.
  */
 
 unsigned long find_next_zero_bit(const unsigned long *addr,
@@ -171,7 +178,8 @@ unsigned long find_next_zero_bit(const unsigned long *addr,
  * @addr: The address to start the search at
  * @size: The maximum size to search
  *
- * Returns the bit number of the first set bit.
+ * Returns the bit number of the first set bit,
+ * or @size if there is no set bit in the bitmap.
  */
 static inline unsigned long find_first_bit(const unsigned long *addr,
                                            unsigned long size)
@@ -194,7 +202,8 @@ static inline unsigned long find_first_bit(const unsigned long *addr,
  * @addr: The address to start the search at
  * @size: The maximum size to search
  *
- * Returns the bit number of the first cleared bit.
+ * Returns the bit number of the first cleared bit,
+ * or @size if there is no clear bit in the bitmap.
  */
 static inline unsigned long find_first_zero_bit(const unsigned long *addr,
                                                 unsigned long size)
@@ -209,7 +218,7 @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr,
  */
 static inline uint8_t rol8(uint8_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> ((8 - shift) & 7));
+    return (word << (shift & 7)) | (word >> (-shift & 7));
 }
 
 /**
@@ -219,7 +228,7 @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
  */
 static inline uint8_t ror8(uint8_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << ((8 - shift) & 7));
+    return (word >> (shift & 7)) | (word << (-shift & 7));
 }
 
 /**
@@ -229,7 +238,7 @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
  */
 static inline uint16_t rol16(uint16_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> ((16 - shift) & 15));
+    return (word << (shift & 15)) | (word >> (-shift & 15));
 }
 
 /**
@@ -239,7 +248,7 @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
  */
 static inline uint16_t ror16(uint16_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << ((16 - shift) & 15));
+    return (word >> (shift & 15)) | (word << (-shift & 15));
 }
 
 /**
@@ -249,7 +258,7 @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
  */
 static inline uint32_t rol32(uint32_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> ((32 - shift) & 31));
+    return (word << (shift & 31)) | (word >> (-shift & 31));
 }
 
 /**
@@ -259,7 +268,7 @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
  */
 static inline uint32_t ror32(uint32_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << ((32 - shift) & 31));
+    return (word >> (shift & 31)) | (word << (-shift & 31));
 }
 
 /**
@@ -269,7 +278,7 @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
  */
 static inline uint64_t rol64(uint64_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> ((64 - shift) & 63));
+    return (word << (shift & 63)) | (word >> (-shift & 63));
 }
 
 /**
@@ -279,7 +288,36 @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
  */
 static inline uint64_t ror64(uint64_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << ((64 - shift) & 63));
+    return (word >> (shift & 63)) | (word << (-shift & 63));
+}
+
+/**
+ * hswap32 - swap 16-bit halfwords within a 32-bit value
+ * @h: value to swap
+ */
+static inline uint32_t hswap32(uint32_t h)
+{
+    return rol32(h, 16);
+}
+
+/**
+ * hswap64 - swap 16-bit halfwords within a 64-bit value
+ * @h: value to swap
+ */
+static inline uint64_t hswap64(uint64_t h)
+{
+    uint64_t m = 0x0000ffff0000ffffull;
+    h = rol64(h, 32);
+    return ((h & m) << 16) | ((h >> 16) & m);
+}
+
+/**
+ * wswap64 - swap 32-bit words within a 64-bit value
+ * @h: value to swap
+ */
+static inline uint64_t wswap64(uint64_t h)
+{
+    return rol64(h, 32);
 }
 
 /**
index 8b01c38040c6ce0d3b121ef73696bceb6bde6a23..933a66ee87e258db1245bf9d0b56b74f47664fe0 100644 (file)
@@ -1,85 +1,54 @@
 #ifndef BSWAP_H
 #define BSWAP_H
 
-#include "fpu/softfloat-types.h"
+#undef  bswap16
+#define bswap16(_x) __builtin_bswap16(_x)
+#undef  bswap32
+#define bswap32(_x) __builtin_bswap32(_x)
+#undef  bswap64
+#define bswap64(_x) __builtin_bswap64(_x)
 
-#ifdef CONFIG_MACHINE_BSWAP_H
-# include <sys/endian.h>
-# include <machine/bswap.h>
-#elif defined(__FreeBSD__)
-# include <sys/endian.h>
-#elif defined(__HAIKU__)
-# include <endian.h>
-#elif defined(CONFIG_BYTESWAP_H)
-# include <byteswap.h>
-
-static inline uint16_t bswap16(uint16_t x)
-{
-    return bswap_16(x);
-}
-
-static inline uint32_t bswap32(uint32_t x)
-{
-    return bswap_32(x);
-}
-
-static inline uint64_t bswap64(uint64_t x)
+static inline uint32_t bswap24(uint32_t x)
 {
-    return bswap_64(x);
-}
-# else
-static inline uint16_t bswap16(uint16_t x)
-{
-    return (((x & 0x00ff) << 8) |
-            ((x & 0xff00) >> 8));
+    return (((x & 0x000000ffU) << 16) |
+            ((x & 0x0000ff00U) <<  0) |
+            ((x & 0x00ff0000U) >> 16));
 }
 
-static inline uint32_t bswap32(uint32_t x)
-{
-    return (((x & 0x000000ffU) << 24) |
-            ((x & 0x0000ff00U) <<  8) |
-            ((x & 0x00ff0000U) >>  8) |
-            ((x & 0xff000000U) >> 24));
-}
-
-static inline uint64_t bswap64(uint64_t x)
+static inline void bswap16s(uint16_t *s)
 {
-    return (((x & 0x00000000000000ffULL) << 56) |
-            ((x & 0x000000000000ff00ULL) << 40) |
-            ((x & 0x0000000000ff0000ULL) << 24) |
-            ((x & 0x00000000ff000000ULL) <<  8) |
-            ((x & 0x000000ff00000000ULL) >>  8) |
-            ((x & 0x0000ff0000000000ULL) >> 24) |
-            ((x & 0x00ff000000000000ULL) >> 40) |
-            ((x & 0xff00000000000000ULL) >> 56));
+    *s = __builtin_bswap16(*s);
 }
-#endif /* ! CONFIG_MACHINE_BSWAP_H */
 
-static inline void bswap16s(uint16_t *s)
+static inline void bswap24s(uint32_t *s)
 {
-    *s = bswap16(*s);
+    *s = bswap24(*s & 0x00ffffffU);
 }
 
 static inline void bswap32s(uint32_t *s)
 {
-    *s = bswap32(*s);
+    *s = __builtin_bswap32(*s);
 }
 
 static inline void bswap64s(uint64_t *s)
 {
-    *s = bswap64(*s);
+    *s = __builtin_bswap64(*s);
 }
 
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
 #define be_bswap(v, size) (v)
-#define le_bswap(v, size) glue(bswap, size)(v)
+#define le_bswap(v, size) glue(__builtin_bswap, size)(v)
+#define le_bswap24(v) bswap24(v)
 #define be_bswaps(v, size)
-#define le_bswaps(p, size) do { *p = glue(bswap, size)(*p); } while(0)
+#define le_bswaps(p, size) \
+            do { *p = glue(__builtin_bswap, size)(*p); } while (0)
 #else
 #define le_bswap(v, size) (v)
-#define be_bswap(v, size) glue(bswap, size)(v)
+#define le_bswap24(v) (v)
+#define be_bswap(v, size) glue(__builtin_bswap, size)(v)
 #define le_bswaps(v, size)
-#define be_bswaps(p, size) do { *p = glue(bswap, size)(*p); } while(0)
+#define be_bswaps(p, size) \
+            do { *p = glue(__builtin_bswap, size)(*p); } while (0)
 #endif
 
 /**
@@ -170,11 +139,20 @@ CPU_CONVERT(le, 32, uint32_t)
 CPU_CONVERT(le, 64, uint64_t)
 
 /*
- * Same as cpu_to_le{16,32}, except that gcc will figure the result is
+ * Same as cpu_to_le{16,32,64}, except that gcc will figure the result is
  * a compile-time constant if you pass in a constant.  So this can be
  * used to initialize static variables.
  */
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
+# define const_le64(_x)                          \
+    ((((_x) & 0x00000000000000ffU) << 56) |      \
+     (((_x) & 0x000000000000ff00U) << 40) |      \
+     (((_x) & 0x0000000000ff0000U) << 24) |      \
+     (((_x) & 0x00000000ff000000U) <<  8) |      \
+     (((_x) & 0x000000ff00000000U) >>  8) |      \
+     (((_x) & 0x0000ff0000000000U) >> 24) |      \
+     (((_x) & 0x00ff000000000000U) >> 40) |      \
+     (((_x) & 0xff00000000000000U) >> 56))
 # define const_le32(_x)                          \
     ((((_x) & 0x000000ffU) << 24) |              \
      (((_x) & 0x0000ff00U) <<  8) |              \
@@ -184,68 +162,11 @@ CPU_CONVERT(le, 64, uint64_t)
     ((((_x) & 0x00ff) << 8) |                    \
      (((_x) & 0xff00) >> 8))
 #else
+# define const_le64(_x) (_x)
 # define const_le32(_x) (_x)
 # define const_le16(_x) (_x)
 #endif
 
-/* Unions for reinterpreting between floats and integers.  */
-
-typedef union {
-    float32 f;
-    uint32_t l;
-} CPU_FloatU;
-
-typedef union {
-    float64 d;
-#if defined(HOST_WORDS_BIGENDIAN)
-    struct {
-        uint32_t upper;
-        uint32_t lower;
-    } l;
-#else
-    struct {
-        uint32_t lower;
-        uint32_t upper;
-    } l;
-#endif
-    uint64_t ll;
-} CPU_DoubleU;
-
-typedef union {
-     floatx80 d;
-     struct {
-         uint64_t lower;
-         uint16_t upper;
-     } l;
-} CPU_LDoubleU;
-
-typedef union {
-    float128 q;
-#if defined(HOST_WORDS_BIGENDIAN)
-    struct {
-        uint32_t upmost;
-        uint32_t upper;
-        uint32_t lower;
-        uint32_t lowest;
-    } l;
-    struct {
-        uint64_t upper;
-        uint64_t lower;
-    } ll;
-#else
-    struct {
-        uint32_t lowest;
-        uint32_t lower;
-        uint32_t upper;
-        uint32_t upmost;
-    } l;
-    struct {
-        uint64_t lower;
-        uint64_t upper;
-    } ll;
-#endif
-} CPU_QuadU;
-
 /* unaligned/endian-independent pointer access */
 
 /*
@@ -269,6 +190,7 @@ typedef union {
  * size is:
  *   b: 8 bits
  *   w: 16 bits
+ *   24: 24 bits
  *   l: 32 bits
  *   q: 64 bits
  *
@@ -341,6 +263,11 @@ static inline void stw_he_p(void *ptr, uint16_t v)
     __builtin_memcpy(ptr, &v, sizeof(v));
 }
 
+static inline void st24_he_p(void *ptr, uint32_t v)
+{
+    __builtin_memcpy(ptr, &v, 3);
+}
+
 static inline int ldl_he_p(const void *ptr)
 {
     int32_t r;
@@ -390,6 +317,11 @@ static inline void stw_le_p(void *ptr, uint16_t v)
     stw_he_p(ptr, le_bswap(v, 16));
 }
 
+static inline void st24_le_p(void *ptr, uint32_t v)
+{
+    st24_he_p(ptr, le_bswap24(v));
+}
+
 static inline void stl_le_p(void *ptr, uint32_t v)
 {
     stl_he_p(ptr, le_bswap(v, 32));
@@ -400,36 +332,6 @@ static inline void stq_le_p(void *ptr, uint64_t v)
     stq_he_p(ptr, le_bswap(v, 64));
 }
 
-/* float access */
-
-static inline float32 ldfl_le_p(const void *ptr)
-{
-    CPU_FloatU u;
-    u.l = ldl_le_p(ptr);
-    return u.f;
-}
-
-static inline void stfl_le_p(void *ptr, float32 v)
-{
-    CPU_FloatU u;
-    u.f = v;
-    stl_le_p(ptr, u.l);
-}
-
-static inline float64 ldfq_le_p(const void *ptr)
-{
-    CPU_DoubleU u;
-    u.ll = ldq_le_p(ptr);
-    return u.d;
-}
-
-static inline void stfq_le_p(void *ptr, float64 v)
-{
-    CPU_DoubleU u;
-    u.d = v;
-    stq_le_p(ptr, u.ll);
-}
-
 static inline int lduw_be_p(const void *ptr)
 {
     return (uint16_t)be_bswap(lduw_he_p(ptr), 16);
@@ -465,36 +367,6 @@ static inline void stq_be_p(void *ptr, uint64_t v)
     stq_he_p(ptr, be_bswap(v, 64));
 }
 
-/* float access */
-
-static inline float32 ldfl_be_p(const void *ptr)
-{
-    CPU_FloatU u;
-    u.l = ldl_be_p(ptr);
-    return u.f;
-}
-
-static inline void stfl_be_p(void *ptr, float32 v)
-{
-    CPU_FloatU u;
-    u.f = v;
-    stl_be_p(ptr, u.l);
-}
-
-static inline float64 ldfq_be_p(const void *ptr)
-{
-    CPU_DoubleU u;
-    u.ll = ldq_be_p(ptr);
-    return u.d;
-}
-
-static inline void stfq_be_p(void *ptr, float64 v)
-{
-    CPU_DoubleU u;
-    u.d = v;
-    stq_be_p(ptr, u.ll);
-}
-
 static inline unsigned long leul_to_cpu(unsigned long v)
 {
 #if HOST_LONG_BITS == 32
index d34d2c857ccc64cbc0780fa69e2858e234c1d7c2..e95dfd696ca8393822c0ee349f8ede3ca0e6ea91 100644 (file)
@@ -49,7 +49,7 @@ struct Buffer {
  * to identify in debug traces.
  */
 void buffer_init(Buffer *buffer, const char *name, ...)
-        GCC_FMT_ATTR(2, 3);
+        G_GNUC_PRINTF(2, 3);
 
 /**
  * buffer_shrink:
diff --git a/include/qemu/cacheflush.h b/include/qemu/cacheflush.h
new file mode 100644 (file)
index 0000000..ae20bcd
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Flush the host cpu caches.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_CACHEFLUSH_H
+#define QEMU_CACHEFLUSH_H
+
+/**
+ * flush_idcache_range:
+ * @rx: instruction address
+ * @rw: data address
+ * @len: length to flush
+ *
+ * Flush @len bytes of the data cache at @rw and the icache at @rx
+ * to bring them in sync.  The two addresses may be different virtual
+ * mappings of the same physical page(s).
+ */
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__s390__)
+
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
+{
+    /* icache is coherent and does not require flushing. */
+}
+
+#else
+
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len);
+
+#endif
+
+#endif /* QEMU_CACHEFLUSH_H */
diff --git a/include/qemu/cacheinfo.h b/include/qemu/cacheinfo.h
new file mode 100644 (file)
index 0000000..019a157
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * QEMU host cacheinfo information
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_CACHEINFO_H
+#define QEMU_CACHEINFO_H
+
+/*
+ * These variables represent our best guess at the host icache and
+ * dcache sizes, expressed both as the size in bytes and as the
+ * base-2 log of the size in bytes. They are initialized at startup
+ * (via an attribute 'constructor' function).
+ */
+extern int qemu_icache_linesize;
+extern int qemu_icache_linesize_log;
+extern int qemu_dcache_linesize;
+extern int qemu_dcache_linesize_log;
+
+#endif
diff --git a/include/qemu/clang-tsa.h b/include/qemu/clang-tsa.h
new file mode 100644 (file)
index 0000000..ba06fb8
--- /dev/null
@@ -0,0 +1,114 @@
+#ifndef CLANG_TSA_H
+#define CLANG_TSA_H
+
+/*
+ * Copyright 2018 Jarkko Hietaniemi <jhi@iki.fi>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* http://clang.llvm.org/docs/ThreadSafetyAnalysis.html
+ *
+ * TSA is available since clang 3.6-ish.
+ */
+#ifdef __clang__
+#  define TSA(x)   __attribute__((x))
+#else
+#  define TSA(x)   /* No TSA, make TSA attributes no-ops. */
+#endif
+
+/* TSA_CAPABILITY() is used to annotate typedefs:
+ *
+ * typedef pthread_mutex_t TSA_CAPABILITY("mutex") tsa_mutex;
+ */
+#define TSA_CAPABILITY(x) TSA(capability(x))
+
+/* TSA_GUARDED_BY() is used to annotate global variables,
+ * the data is guarded:
+ *
+ * Foo foo TSA_GUARDED_BY(mutex);
+ */
+#define TSA_GUARDED_BY(x) TSA(guarded_by(x))
+
+/* TSA_PT_GUARDED_BY() is used to annotate global pointers, the data
+ * behind the pointer is guarded.
+ *
+ * Foo* ptr TSA_PT_GUARDED_BY(mutex);
+ */
+#define TSA_PT_GUARDED_BY(x) TSA(pt_guarded_by(x))
+
+/* The TSA_REQUIRES() is used to annotate functions: the caller of the
+ * function MUST hold the resource, the function will NOT release it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_REQUIRES(mutex);
+ */
+#define TSA_REQUIRES(...) TSA(requires_capability(__VA_ARGS__))
+#define TSA_REQUIRES_SHARED(...) TSA(requires_shared_capability(__VA_ARGS__))
+
+/* TSA_EXCLUDES() is used to annotate functions: the caller of the
+ * function MUST NOT hold resource, the function first acquires the
+ * resource, and then releases it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_EXCLUDES(mutex);
+ */
+#define TSA_EXCLUDES(...) TSA(locks_excluded(__VA_ARGS__))
+
+/* TSA_ACQUIRE() is used to annotate functions: the caller of the
+ * function MUST NOT hold the resource, the function will acquire the
+ * resource, but NOT release it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_ACQUIRE(mutex);
+ */
+#define TSA_ACQUIRE(...) TSA(acquire_capability(__VA_ARGS__))
+#define TSA_ACQUIRE_SHARED(...) TSA(acquire_shared_capability(__VA_ARGS__))
+
+/* TSA_RELEASE() is used to annotate functions: the caller of the
+ * function MUST hold the resource, but the function will then release it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_RELEASE(mutex);
+ */
+#define TSA_RELEASE(...) TSA(release_capability(__VA_ARGS__))
+#define TSA_RELEASE_SHARED(...) TSA(release_shared_capability(__VA_ARGS__))
+
+/* TSA_NO_TSA is used to annotate functions.  Use only when you need to.
+ *
+ * void Foo(void) TSA_NO_TSA;
+ */
+#define TSA_NO_TSA TSA(no_thread_safety_analysis)
+
+/*
+ * TSA_ASSERT() is used to annotate functions: This function will assert that
+ * the lock is held. When it returns, the caller of the function is assumed to
+ * already hold the resource.
+ *
+ * More than one mutex may be specified, comma-separated.
+ */
+#define TSA_ASSERT(...) TSA(assert_capability(__VA_ARGS__))
+#define TSA_ASSERT_SHARED(...) TSA(assert_shared_capability(__VA_ARGS__))
+
+#endif /* #ifndef CLANG_TSA_H */
index 4e4503004ca350ff9a9ba53df2918daf67991512..78ca5850f8f5cf3fdc1a901938514c99f83a3ecd 100644 (file)
 #ifndef QEMU_CO_SHARED_RESOURCE_H
 #define QEMU_CO_SHARED_RESOURCE_H
 
-
+/* Accesses to co-shared-resource API are thread-safe */
 typedef struct SharedResource SharedResource;
 
 /*
  * Create SharedResource structure
  *
  * @total: total amount of some resource to be shared between clients
- *
- * Note: this API is not thread-safe.
  */
 SharedResource *shres_create(uint64_t total);
 
index c76281f3540bf07a13c5608c68256e72c2e54aec..c797f0d4572f6a3474892cdc6a66379dce491ae6 100644 (file)
@@ -7,27 +7,21 @@
 #ifndef COMPILER_H
 #define COMPILER_H
 
+#define HOST_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+
+/* HOST_LONG_BITS is the size of a native pointer in bits. */
+#define HOST_LONG_BITS (__SIZEOF_POINTER__ * 8)
+
 #if defined __clang_analyzer__ || defined __COVERITY__
 #define QEMU_STATIC_ANALYSIS 1
 #endif
 
-/*----------------------------------------------------------------------------
-| The macro QEMU_GNUC_PREREQ tests for minimum version of the GNU C compiler.
-| The code is a copy of SOFTFLOAT_GNUC_PREREQ, see softfloat-macros.h.
-*----------------------------------------------------------------------------*/
-#if defined(__GNUC__) && defined(__GNUC_MINOR__)
-# define QEMU_GNUC_PREREQ(maj, min) \
-         ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#ifdef __cplusplus
+#define QEMU_EXTERN_C extern "C"
 #else
-# define QEMU_GNUC_PREREQ(maj, min) 0
+#define QEMU_EXTERN_C extern
 #endif
 
-#define QEMU_NORETURN __attribute__ ((__noreturn__))
-
-#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
-
-#define QEMU_SENTINEL __attribute__((sentinel))
-
 #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
 # define QEMU_PACKED __attribute__((gcc_struct, packed))
 #else
 #ifndef glue
 #define xglue(x, y) x ## y
 #define glue(x, y) xglue(x, y)
-#define stringify(s)   tostring(s)
-#define tostring(s)    #s
+#define stringify(s) tostring(s)
+#define tostring(s) #s
 #endif
 
-#ifndef likely
-#if __GNUC__ < 3
-#define __builtin_expect(x, n) (x)
-#endif
+/* Expands into an identifier stemN, where N is another number each time */
+#define MAKE_IDENTFIER(stem) glue(stem, __COUNTER__)
 
+#ifndef likely
 #define likely(x)   __builtin_expect(!!(x), 1)
 #define unlikely(x)   __builtin_expect(!!(x), 0)
 #endif
     (offsetof(container, field) + sizeof_field(container, field))
 
 /* Convert from a base type to a parent type, with compile time checking.  */
-#ifdef __GNUC__
 #define DO_UPCAST(type, field, dev) ( __extension__ ( { \
     char __attribute__((unused)) offset_must_be_zero[ \
         -offsetof(type, field)]; \
     container_of(dev, type, field);}))
-#else
-#define DO_UPCAST(type, field, dev) container_of(dev, type, field)
-#endif
 
 #define typeof_field(type, field) typeof(((type *)0)->field)
 #define type_check(t1,t2) ((t1*)0 - (t2*)0)
         int:(x) ? -1 : 1; \
     }
 
-/* QEMU_BUILD_BUG_MSG() emits the message given if _Static_assert is
- * supported; otherwise, it will be omitted from the compiler error
- * message (but as it remains present in the source code, it can still
- * be useful when debugging). */
-#if defined(CONFIG_STATIC_ASSERT)
 #define QEMU_BUILD_BUG_MSG(x, msg) _Static_assert(!(x), msg)
-#elif defined(__COUNTER__)
-#define QEMU_BUILD_BUG_MSG(x, msg) typedef QEMU_BUILD_BUG_ON_STRUCT(x) \
-    glue(qemu_build_bug_on__, __COUNTER__) __attribute__((unused))
-#else
-#define QEMU_BUILD_BUG_MSG(x, msg)
-#endif
 
 #define QEMU_BUILD_BUG_ON(x) QEMU_BUILD_BUG_MSG(x, "not expecting: " #x)
 
 #define QEMU_BUILD_BUG_ON_ZERO(x) (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - \
                                    sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)))
 
-#if defined __GNUC__
-# if !QEMU_GNUC_PREREQ(4, 4)
-   /* gcc versions before 4.4.x don't support gnu_printf, so use printf. */
-#  define GCC_FMT_ATTR(n, m) __attribute__((format(printf, n, m)))
-# else
-   /* Use gnu_printf when supported (qemu uses standard format strings). */
-#  define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
-#  if defined(_WIN32)
-    /* Map __printf__ to __gnu_printf__ because we want standard format strings
-     * even when MinGW or GLib include files use __printf__. */
-#   define __printf__ __gnu_printf__
-#  endif
-# endif
-#else
-#define GCC_FMT_ATTR(n, m)
+#if !defined(__clang__) && defined(_WIN32)
+/*
+ * Map __printf__ to __gnu_printf__ because we want standard format strings even
+ * when MinGW or GLib include files use __printf__.
+ */
+# define __printf__ __gnu_printf__
 #endif
 
 #ifndef __has_warning
 #define __has_attribute(x) 0 /* compatibility with older GCC */
 #endif
 
+#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
+# define QEMU_SANITIZE_ADDRESS 1
+#endif
+
+#if defined(__SANITIZE_THREAD__) || __has_feature(thread_sanitizer)
+# define QEMU_SANITIZE_THREAD 1
+#endif
+
 /*
  * GCC doesn't provide __has_attribute() until GCC 5, but we know all the GCC
  * versions we support have the "flatten" attribute. Clang may not have the
 #define QEMU_ALWAYS_INLINE
 #endif
 
-/* Implement C11 _Generic via GCC builtins.  Example:
- *
- *    QEMU_GENERIC(x, (float, sinf), (long double, sinl), sin) (x)
- *
- * The first argument is the discriminator.  The last is the default value.
- * The middle ones are tuples in "(type, expansion)" format.
+/**
+ * In most cases, normal "fallthrough" comments are good enough for
+ * switch-case statements, but sometimes the compiler has problems
+ * with those. In that case you can use QEMU_FALLTHROUGH instead.
  */
+#if __has_attribute(fallthrough)
+# define QEMU_FALLTHROUGH __attribute__((fallthrough))
+#else
+# define QEMU_FALLTHROUGH do {} while (0) /* fallthrough */
+#endif
 
-/* First, find out the number of generic cases.  */
-#define QEMU_GENERIC(x, ...) \
-    QEMU_GENERIC_(typeof(x), __VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-
-/* There will be extra arguments, but they are not used.  */
-#define QEMU_GENERIC_(x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, count, ...) \
-    QEMU_GENERIC##count(x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9)
-
-/* Two more helper macros, this time to extract items from a parenthesized
- * list.
+#ifdef CONFIG_CFI
+/*
+ * If CFI is enabled, use an attribute to disable cfi-icall on the following
+ * function
  */
-#define QEMU_FIRST_(a, b) a
-#define QEMU_SECOND_(a, b) b
-
-/* ... and a final one for the common part of the "recursion".  */
-#define QEMU_GENERIC_IF(x, type_then, else_)                                   \
-    __builtin_choose_expr(__builtin_types_compatible_p(x,                      \
-                                                       QEMU_FIRST_ type_then), \
-                          QEMU_SECOND_ type_then, else_)
-
-/* CPP poor man's "recursion".  */
-#define QEMU_GENERIC1(x, a0, ...) (a0)
-#define QEMU_GENERIC2(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC1(x, __VA_ARGS__))
-#define QEMU_GENERIC3(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC2(x, __VA_ARGS__))
-#define QEMU_GENERIC4(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC3(x, __VA_ARGS__))
-#define QEMU_GENERIC5(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC4(x, __VA_ARGS__))
-#define QEMU_GENERIC6(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC5(x, __VA_ARGS__))
-#define QEMU_GENERIC7(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC6(x, __VA_ARGS__))
-#define QEMU_GENERIC8(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC7(x, __VA_ARGS__))
-#define QEMU_GENERIC9(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC8(x, __VA_ARGS__))
-#define QEMU_GENERIC10(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC9(x, __VA_ARGS__))
+#define QEMU_DISABLE_CFI __attribute__((no_sanitize("cfi-icall")))
+#else
+/* If CFI is not enabled, use an empty define to not change the behavior */
+#define QEMU_DISABLE_CFI
+#endif
 
-/**
- * qemu_build_not_reached()
- *
- * The compiler, during optimization, is expected to prove that a call
- * to this function cannot be reached and remove it.  If the compiler
- * supports QEMU_ERROR, this will be reported at compile time; otherwise
- * this will be reported at link time due to the missing symbol.
+/*
+ * Apple clang version 14 has a bug in its __builtin_subcll(); define
+ * BUILTIN_SUBCLL_BROKEN for the offending versions so we can avoid it.
+ * When a version of Apple clang which has this bug fixed is released
+ * we can add an upper bound to this check.
+ * See https://gitlab.com/qemu-project/qemu/-/issues/1631
+ * and https://gitlab.com/qemu-project/qemu/-/issues/1659 for details.
+ * The bug never made it into any upstream LLVM releases, only Apple ones.
  */
-#if defined(__OPTIMIZE__) && !defined(__NO_INLINE__)
-extern void QEMU_NORETURN QEMU_ERROR("code path is reachable")
-    qemu_build_not_reached(void);
+#if defined(__apple_build_version__) && __clang_major__ >= 14
+#define BUILTIN_SUBCLL_BROKEN
+#endif
+
+#if __has_attribute(annotate)
+#define QEMU_ANNOTATE(x) __attribute__((annotate(x)))
+#else
+#define QEMU_ANNOTATE(x)
+#endif
+
+#if __has_attribute(used)
+# define QEMU_USED __attribute__((used))
 #else
-#define qemu_build_not_reached()  g_assert_not_reached()
+# define QEMU_USED
 #endif
 
+/*
+ * Ugly CPP trick that is like "defined FOO", but also works in C
+ * code.  Useful to replace #ifdef with "if" statements; assumes
+ * the symbol was defined with Meson's "config.set()", so it is empty
+ * if defined.
+ */
+#define IS_ENABLED(x)                  IS_EMPTY(x)
+
+#define IS_EMPTY_JUNK_                 junk,
+#define IS_EMPTY(value)                IS_EMPTY_(IS_EMPTY_JUNK_##value)
+
+/* Expands to either SECOND_ARG(junk, 1, 0) or SECOND_ARG(IS_EMPTY_JUNK_CONFIG_FOO 1, 0)  */
+#define SECOND_ARG(first, second, ...) second
+#define IS_EMPTY_(junk_maybecomma)     SECOND_ARG(junk_maybecomma 1, 0)
+
 #endif /* COMPILER_H */
index d74f92015297dfe21f9a1f0f3be07e4e152cbea5..b82a778123f8de288d7d04b5d4242d78468b3d07 100644 (file)
@@ -1,24 +1,28 @@
 #ifndef QEMU_CONFIG_FILE_H
 #define QEMU_CONFIG_FILE_H
 
+typedef void QEMUConfigCB(const char *group, QDict *qdict, void *opaque, Error **errp);
 
+void qemu_load_module_for_opts(const char *group);
 QemuOptsList *qemu_find_opts(const char *group);
 QemuOptsList *qemu_find_opts_err(const char *group, Error **errp);
 QemuOpts *qemu_find_opts_singleton(const char *group);
 
 void qemu_add_opts(QemuOptsList *list);
 void qemu_add_drive_opts(QemuOptsList *list);
-int qemu_set_option(const char *str);
 int qemu_global_option(const char *str);
 
-void qemu_config_write(FILE *fp);
-int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname);
+int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname,
+                      Error **errp);
 
-int qemu_read_config_file(const char *filename);
+/* A default callback for qemu_read_config_file().  */
+void qemu_config_do_parse(const char *group, QDict *qdict, void *opaque, Error **errp);
+
+int qemu_read_config_file(const char *filename, QEMUConfigCB *f, Error **errp);
 
 /* Parse QDict options as a replacement for a config file (allowing multiple
    enumerated (0..(n-1)) configuration "sections") */
-void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists,
+bool qemu_config_parse_qdict(QDict *options, QemuOptsList **lists,
                              Error **errp);
 
 #endif /* QEMU_CONFIG_FILE_H */
diff --git a/include/qemu/coroutine-core.h b/include/qemu/coroutine-core.h
new file mode 100644 (file)
index 0000000..230bb56
--- /dev/null
@@ -0,0 +1,154 @@
+/*
+ * QEMU coroutine implementation
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *  Kevin Wolf         <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_COROUTINE_CORE_H
+#define QEMU_COROUTINE_CORE_H
+
+/**
+ * Coroutines are a mechanism for stack switching and can be used for
+ * cooperative userspace threading.  These functions provide a simple but
+ * useful flavor of coroutines that is suitable for writing sequential code,
+ * rather than callbacks, for operations that need to give up control while
+ * waiting for events to complete.
+ *
+ * These functions are re-entrant and may be used outside the global mutex.
+ *
+ * Functions that execute in coroutine context cannot be called
+ * directly from normal functions.  Use @coroutine_fn to mark such
+ * functions.  For example:
+ *
+ *   static void coroutine_fn foo(void) {
+ *       ....
+ *   }
+ *
+ * In the future it would be nice to have the compiler or a static
+ * checker catch misuse of such functions.  This annotation might make
+ * it possible and in the meantime it serves as documentation.
+ */
+
+/**
+ * Mark a function that executes in coroutine context
+ *
+ *
+ * Functions that execute in coroutine context cannot be called
+ * directly from normal functions.  Use @coroutine_fn to mark such
+ * functions.  For example:
+ *
+ *   static void coroutine_fn foo(void) {
+ *       ....
+ *   }
+ *
+ * In the future it would be nice to have the compiler or a static
+ * checker catch misuse of such functions.  This annotation might make
+ * it possible and in the meantime it serves as documentation.
+ */
+
+typedef struct Coroutine Coroutine;
+typedef struct CoMutex CoMutex;
+
+/**
+ * Coroutine entry point
+ *
+ * When the coroutine is entered for the first time, opaque is passed in as an
+ * argument.
+ *
+ * When this function returns, the coroutine is destroyed automatically and
+ * execution continues in the caller who last entered the coroutine.
+ */
+typedef void coroutine_fn CoroutineEntry(void *opaque);
+
+/**
+ * Create a new coroutine
+ *
+ * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
+ * The opaque argument is passed as the argument to the entry point.
+ */
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque);
+
+/**
+ * Transfer control to a coroutine
+ */
+void qemu_coroutine_enter(Coroutine *coroutine);
+
+/**
+ * Transfer control to a coroutine if it's not active (i.e. part of the call
+ * stack of the running coroutine). Otherwise, do nothing.
+ */
+void qemu_coroutine_enter_if_inactive(Coroutine *co);
+
+/**
+ * Transfer control to a coroutine and associate it with ctx
+ */
+void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
+
+/**
+ * Transfer control back to a coroutine's caller
+ *
+ * This function does not return until the coroutine is re-entered using
+ * qemu_coroutine_enter().
+ */
+void coroutine_fn qemu_coroutine_yield(void);
+
+/**
+ * Get the AioContext of the given coroutine
+ */
+AioContext *qemu_coroutine_get_aio_context(Coroutine *co);
+
+/**
+ * Get the currently executing coroutine
+ */
+Coroutine *qemu_coroutine_self(void);
+
+/**
+ * Return whether or not currently inside a coroutine
+ *
+ * This can be used to write functions that work both when in coroutine context
+ * and when not in coroutine context.  Note that such functions cannot use the
+ * coroutine_fn annotation since they work outside coroutine context.
+ */
+bool qemu_in_coroutine(void);
+
+/**
+ * Return true if the coroutine is currently entered
+ *
+ * A coroutine is "entered" if it has not yielded from the current
+ * qemu_coroutine_enter() call used to run it.  This does not mean that the
+ * coroutine is currently executing code since it may have transferred control
+ * to another coroutine using qemu_coroutine_enter().
+ *
+ * When several coroutines enter each other there may be no way to know which
+ * ones have already been entered.  In such situations this function can be
+ * used to avoid recursively entering coroutines.
+ */
+bool qemu_coroutine_entered(Coroutine *co);
+
+/**
+ * Initialises a CoMutex. This must be called before any other operation is used
+ * on the CoMutex.
+ */
+void qemu_co_mutex_init(CoMutex *mutex);
+
+/**
+ * Locks the mutex. If the lock cannot be taken immediately, control is
+ * transferred to the caller of the current coroutine.
+ */
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
+
+/**
+ * Unlocks the mutex and schedules the next coroutine that was waiting for this
+ * lock to be run.
+ */
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
+
+#endif
diff --git a/include/qemu/coroutine-tls.h b/include/qemu/coroutine-tls.h
new file mode 100644 (file)
index 0000000..1558a82
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ * QEMU Thread Local Storage for coroutines
+ *
+ * Copyright Red Hat
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ * It is forbidden to access Thread Local Storage in coroutines because
+ * compiler optimizations may cause values to be cached across coroutine
+ * re-entry. Coroutines can run in more than one thread through the course of
+ * their life, leading bugs when stale TLS values from the wrong thread are
+ * used as a result of compiler optimization.
+ *
+ * An example is:
+ *
+ * ..code-block:: c
+ *   :caption: A coroutine that may see the wrong TLS value
+ *
+ *   static __thread AioContext *current_aio_context;
+ *   ...
+ *   static void coroutine_fn foo(void)
+ *   {
+ *       aio_notify(current_aio_context);
+ *       qemu_coroutine_yield();
+ *       aio_notify(current_aio_context); // <-- may be stale after yielding!
+ *   }
+ *
+ * This header provides macros for safely defining variables in Thread Local
+ * Storage:
+ *
+ * ..code-block:: c
+ *   :caption: A coroutine that safely uses TLS
+ *
+ *   QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context)
+ *   ...
+ *   static void coroutine_fn foo(void)
+ *   {
+ *       aio_notify(get_current_aio_context());
+ *       qemu_coroutine_yield();
+ *       aio_notify(get_current_aio_context()); // <-- safe
+ *   }
+ */
+
+#ifndef QEMU_COROUTINE_TLS_H
+#define QEMU_COROUTINE_TLS_H
+
+/*
+ * To stop the compiler from caching TLS values we define accessor functions
+ * with __attribute__((noinline)) plus asm volatile("") to prevent
+ * optimizations that override noinline.
+ *
+ * The compiler can still analyze noinline code and make optimizations based on
+ * that knowledge, so an inline asm output operand is used to prevent
+ * optimizations that make assumptions about the address of the TLS variable.
+ *
+ * This is fragile and ultimately needs to be solved by a mechanism that is
+ * guaranteed to work by the compiler (e.g. stackless coroutines), but for now
+ * we use this approach to prevent issues.
+ */
+
+/**
+ * QEMU_DECLARE_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Declare an extern variable in Thread Local Storage from a header file:
+ *
+ * .. code-block:: c
+ *   :caption: Declaring an extern variable in Thread Local Storage
+ *
+ *   QEMU_DECLARE_CO_TLS(int, my_count)
+ *   ...
+ *   int c = get_my_count();
+ *   set_my_count(c + 1);
+ *   *get_ptr_my_count() = 0;
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ *   :caption: Declaring a TLS variable using __thread
+ *
+ *   extern __thread int my_count;
+ *   ...
+ *   int c = my_count;
+ *   my_count = c + 1;
+ *   *(&my_count) = 0;
+ */
+#define QEMU_DECLARE_CO_TLS(type, var)                                       \
+    __attribute__((noinline)) type get_##var(void);                          \
+    __attribute__((noinline)) void set_##var(type v);                        \
+    __attribute__((noinline)) type *get_ptr_##var(void);
+
+/**
+ * QEMU_DEFINE_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Define a variable in Thread Local Storage that was previously declared from
+ * a header file with QEMU_DECLARE_CO_TLS():
+ *
+ * .. code-block:: c
+ *   :caption: Defining a variable in Thread Local Storage
+ *
+ *   QEMU_DEFINE_CO_TLS(int, my_count)
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ *   :caption: Defining a TLS variable using __thread
+ *
+ *   __thread int my_count;
+ */
+#define QEMU_DEFINE_CO_TLS(type, var)                                        \
+    static __thread type co_tls_##var;                                       \
+    type get_##var(void) { asm volatile(""); return co_tls_##var; }          \
+    void set_##var(type v) { asm volatile(""); co_tls_##var = v; }           \
+    type *get_ptr_##var(void)                                                \
+    { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
+
+/**
+ * QEMU_DEFINE_STATIC_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Define a static variable in Thread Local Storage:
+ *
+ * .. code-block:: c
+ *   :caption: Defining a static variable in Thread Local Storage
+ *
+ *   QEMU_DEFINE_STATIC_CO_TLS(int, my_count)
+ *   ...
+ *   int c = get_my_count();
+ *   set_my_count(c + 1);
+ *   *get_ptr_my_count() = 0;
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ *   :caption: Defining a static TLS variable using __thread
+ *
+ *   static __thread int my_count;
+ *   ...
+ *   int c = my_count;
+ *   my_count = c + 1;
+ *   *(&my_count) = 0;
+ */
+#define QEMU_DEFINE_STATIC_CO_TLS(type, var)                                 \
+    static __thread type co_tls_##var;                                       \
+    static __attribute__((noinline, unused))                                 \
+    type get_##var(void)                                                     \
+    { asm volatile(""); return co_tls_##var; }                               \
+    static __attribute__((noinline, unused))                                 \
+    void set_##var(type v)                                                   \
+    { asm volatile(""); co_tls_##var = v; }                                  \
+    static __attribute__((noinline, unused))                                 \
+    type *get_ptr_##var(void)                                                \
+    { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
+
+#endif /* QEMU_COROUTINE_TLS_H */
index 84eab6e3bf7581d28083fce205c49462819bf43a..a65be6697f51f20cf2c91c1632ab66e48afbbb69 100644 (file)
@@ -15,6 +15,7 @@
 #ifndef QEMU_COROUTINE_H
 #define QEMU_COROUTINE_H
 
+#include "qemu/coroutine-core.h"
 #include "qemu/queue.h"
 #include "qemu/timer.h"
 
  * waiting for events to complete.
  *
  * These functions are re-entrant and may be used outside the global mutex.
- */
-
-/**
- * Mark a function that executes in coroutine context
  *
- * Functions that execute in coroutine context cannot be called directly from
- * normal functions.  In the future it would be nice to enable compiler or
- * static checker support for catching such errors.  This annotation might make
- * it possible and in the meantime it serves as documentation.
- *
- * For example:
+ * Functions that execute in coroutine context cannot be called
+ * directly from normal functions.  Use @coroutine_fn to mark such
+ * functions.  For example:
  *
  *   static void coroutine_fn foo(void) {
  *       ....
  *   }
- */
-#define coroutine_fn
-
-typedef struct Coroutine Coroutine;
-
-/**
- * Coroutine entry point
- *
- * When the coroutine is entered for the first time, opaque is passed in as an
- * argument.
- *
- * When this function returns, the coroutine is destroyed automatically and
- * execution continues in the caller who last entered the coroutine.
- */
-typedef void coroutine_fn CoroutineEntry(void *opaque);
-
-/**
- * Create a new coroutine
- *
- * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
- * The opaque argument is passed as the argument to the entry point.
- */
-Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque);
-
-/**
- * Transfer control to a coroutine
- */
-void qemu_coroutine_enter(Coroutine *coroutine);
-
-/**
- * Transfer control to a coroutine if it's not active (i.e. part of the call
- * stack of the running coroutine). Otherwise, do nothing.
- */
-void qemu_coroutine_enter_if_inactive(Coroutine *co);
-
-/**
- * Transfer control to a coroutine and associate it with ctx
- */
-void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
-
-/**
- * Transfer control back to a coroutine's caller
- *
- * This function does not return until the coroutine is re-entered using
- * qemu_coroutine_enter().
- */
-void coroutine_fn qemu_coroutine_yield(void);
-
-/**
- * Get the AioContext of the given coroutine
- */
-AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co);
-
-/**
- * Get the currently executing coroutine
- */
-Coroutine *coroutine_fn qemu_coroutine_self(void);
-
-/**
- * Return whether or not currently inside a coroutine
- *
- * This can be used to write functions that work both when in coroutine context
- * and when not in coroutine context.  Note that such functions cannot use the
- * coroutine_fn annotation since they work outside coroutine context.
- */
-bool qemu_in_coroutine(void);
-
-/**
- * Return true if the coroutine is currently entered
  *
- * A coroutine is "entered" if it has not yielded from the current
- * qemu_coroutine_enter() call used to run it.  This does not mean that the
- * coroutine is currently executing code since it may have transferred control
- * to another coroutine using qemu_coroutine_enter().
- *
- * When several coroutines enter each other there may be no way to know which
- * ones have already been entered.  In such situations this function can be
- * used to avoid recursively entering coroutines.
+ * In the future it would be nice to have the compiler or a static
+ * checker catch misuse of such functions.  This annotation might make
+ * it possible and in the meantime it serves as documentation.
  */
-bool qemu_coroutine_entered(Coroutine *co);
 
 /**
  * Provides a mutex that can be used to synchronise coroutines
@@ -149,24 +68,6 @@ struct CoMutex {
     Coroutine *holder;
 };
 
-/**
- * Initialises a CoMutex. This must be called before any other operation is used
- * on the CoMutex.
- */
-void qemu_co_mutex_init(CoMutex *mutex);
-
-/**
- * Locks the mutex. If the lock cannot be taken immediately, control is
- * transferred to the caller of the current coroutine.
- */
-void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
-
-/**
- * Unlocks the mutex and schedules the next coroutine that was waiting for this
- * lock to be run.
- */
-void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
-
 /**
  * Assert that the current coroutine holds @mutex.
  */
@@ -198,23 +99,38 @@ typedef struct CoQueue {
  */
 void qemu_co_queue_init(CoQueue *queue);
 
+typedef enum {
+    /*
+     * Enqueue at front instead of back. Use this to re-queue a request when
+     * its wait condition is not satisfied after being woken up.
+     */
+    CO_QUEUE_WAIT_FRONT = 0x1,
+} CoQueueWaitFlags;
+
 /**
  * Adds the current coroutine to the CoQueue and transfers control to the
  * caller of the coroutine.  The mutex is unlocked during the wait and
  * locked again afterwards.
  */
 #define qemu_co_queue_wait(queue, lock) \
-    qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock))
-void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock);
+    qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), 0)
+#define qemu_co_queue_wait_flags(queue, lock, flags) \
+    qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), (flags))
+void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock,
+                                          CoQueueWaitFlags flags);
 
 /**
- * Removes the next coroutine from the CoQueue, and wake it up.
+ * Removes the next coroutine from the CoQueue, and queue it to run after
+ * the currently-running coroutine yields.
  * Returns true if a coroutine was removed, false if the queue is empty.
+ * Used from coroutine context, use qemu_co_enter_next outside.
  */
 bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
 
 /**
- * Empties the CoQueue; all coroutines are woken up.
+ * Empties the CoQueue and queues the coroutine to run after
+ * the currently-running coroutine yields.
+ * Used from coroutine context, use qemu_co_enter_all outside.
  */
 void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
 
@@ -231,17 +147,34 @@ void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
     qemu_co_enter_next_impl(queue, QEMU_MAKE_LOCKABLE(lock))
 bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock);
 
+/**
+ * Empties the CoQueue, waking the waiting coroutine one at a time.  Unlike
+ * qemu_co_queue_all, this function releases the lock during aio_co_wake
+ * because it is meant to be used outside coroutine context; in that case, the
+ * coroutine is entered immediately, before qemu_co_enter_all returns.
+ *
+ * If used in coroutine context, qemu_co_enter_all is equivalent to
+ * qemu_co_queue_all.
+ */
+#define qemu_co_enter_all(queue, lock) \
+    qemu_co_enter_all_impl(queue, QEMU_MAKE_LOCKABLE(lock))
+void qemu_co_enter_all_impl(CoQueue *queue, QemuLockable *lock);
+
 /**
  * Checks if the CoQueue is empty.
  */
 bool qemu_co_queue_empty(CoQueue *queue);
 
 
+typedef struct CoRwTicket CoRwTicket;
 typedef struct CoRwlock {
-    int pending_writer;
-    int reader;
     CoMutex mutex;
-    CoQueue queue;
+
+    /* Number of readers, or -1 if owned for writing.  */
+    int owners;
+
+    /* Waiting coroutines.  */
+    QSIMPLEQ_HEAD(, CoRwTicket) tickets;
 } CoRwlock;
 
 /**
@@ -255,17 +188,16 @@ void qemu_co_rwlock_init(CoRwlock *lock);
  * of a parallel writer, control is transferred to the caller of the current
  * coroutine.
  */
-void qemu_co_rwlock_rdlock(CoRwlock *lock);
+void coroutine_fn qemu_co_rwlock_rdlock(CoRwlock *lock);
 
 /**
  * Write Locks the CoRwlock from a reader.  This is a bit more efficient than
  * @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock.
- * However, if the lock cannot be upgraded immediately, control is transferred
- * to the caller of the current coroutine.  Also, @qemu_co_rwlock_upgrade
- * only overrides CoRwlock fairness if there are no concurrent readers, so
- * another writer might run while @qemu_co_rwlock_upgrade blocks.
+ * Note that if the lock cannot be upgraded immediately, control is transferred
+ * to the caller of the current coroutine; another writer might run while
+ * @qemu_co_rwlock_upgrade blocks.
  */
-void qemu_co_rwlock_upgrade(CoRwlock *lock);
+void coroutine_fn qemu_co_rwlock_upgrade(CoRwlock *lock);
 
 /**
  * Downgrades a write-side critical section to a reader.  Downgrading with
@@ -273,44 +205,64 @@ void qemu_co_rwlock_upgrade(CoRwlock *lock);
  * followed by @qemu_co_rwlock_rdlock.  This makes it more efficient, but
  * may also sometimes be necessary for correctness.
  */
-void qemu_co_rwlock_downgrade(CoRwlock *lock);
+void coroutine_fn qemu_co_rwlock_downgrade(CoRwlock *lock);
 
 /**
  * Write Locks the mutex. If the lock cannot be taken immediately because
  * of a parallel reader, control is transferred to the caller of the current
  * coroutine.
  */
-void qemu_co_rwlock_wrlock(CoRwlock *lock);
+void coroutine_fn qemu_co_rwlock_wrlock(CoRwlock *lock);
 
 /**
  * Unlocks the read/write lock and schedules the next coroutine that was
  * waiting for this lock to be run.
  */
-void qemu_co_rwlock_unlock(CoRwlock *lock);
+void coroutine_fn qemu_co_rwlock_unlock(CoRwlock *lock);
 
-typedef struct QemuCoSleepState QemuCoSleepState;
+typedef struct QemuCoSleep {
+    Coroutine *to_wake;
+} QemuCoSleep;
 
 /**
- * Yield the coroutine for a given duration. During this yield, @sleep_state
- * (if not NULL) is set to an opaque pointer, which may be used for
- * qemu_co_sleep_wake(). Be careful, the pointer is set back to zero when the
- * timer fires. Don't save the obtained value to other variables and don't call
- * qemu_co_sleep_wake from another aio context.
+ * Yield the coroutine for a given duration. Initializes @w so that,
+ * during this yield, it can be passed to qemu_co_sleep_wake() to
+ * terminate the sleep.
  */
-void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
-                                            QemuCoSleepState **sleep_state);
+void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
+                                            QEMUClockType type, int64_t ns);
+
+/**
+ * Yield the coroutine until the next call to qemu_co_sleep_wake.
+ */
+void coroutine_fn qemu_co_sleep(QemuCoSleep *w);
+
 static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
 {
-    qemu_co_sleep_ns_wakeable(type, ns, NULL);
+    QemuCoSleep w = { 0 };
+    qemu_co_sleep_ns_wakeable(&w, type, ns);
 }
 
+typedef void CleanupFunc(void *opaque);
+/**
+ * Run entry in a coroutine and start timer. Wait for entry to finish or for
+ * timer to elapse, what happen first. If entry finished, return 0, if timer
+ * elapsed earlier, return -ETIMEDOUT.
+ *
+ * Be careful, entry execution is not canceled, user should handle it somehow.
+ * If @clean is provided, it's called after coroutine finish if timeout
+ * happened.
+ */
+int coroutine_fn qemu_co_timeout(CoroutineEntry *entry, void *opaque,
+                                 uint64_t timeout_ns, CleanupFunc clean);
+
 /**
  * Wake a coroutine if it is sleeping in qemu_co_sleep_ns. The timer will be
  * deleted. @sleep_state must be the variable whose address was given to
  * qemu_co_sleep_ns() and should be checked to be non-NULL before calling
  * qemu_co_sleep_wake().
  */
-void qemu_co_sleep_wake(QemuCoSleepState *sleep_state);
+void qemu_co_sleep_wake(QemuCoSleep *w);
 
 /**
  * Yield until a file descriptor becomes readable
@@ -319,6 +271,41 @@ void qemu_co_sleep_wake(QemuCoSleepState *sleep_state);
  */
 void coroutine_fn yield_until_fd_readable(int fd);
 
+/**
+ * Increase coroutine pool size
+ */
+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size);
+
+/**
+ * Decrease coroutine pool size
+ */
+void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size);
+
 #include "qemu/lockable.h"
 
+/**
+ * Sends a (part of) iovec down a socket, yielding when the socket is full, or
+ * Receives data into a (part of) iovec from a socket,
+ * yielding when there is no data in the socket.
+ * The same interface as qemu_sendv_recvv(), with added yielding.
+ * XXX should mark these as coroutine_fn
+ */
+ssize_t coroutine_fn qemu_co_sendv_recvv(int sockfd, struct iovec *iov,
+                                         unsigned iov_cnt, size_t offset,
+                                         size_t bytes, bool do_send);
+#define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \
+  qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false)
+#define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \
+  qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true)
+
+/**
+ * The same as above, but with just a single buffer
+ */
+ssize_t coroutine_fn qemu_co_send_recv(int sockfd, void *buf, size_t bytes,
+                                       bool do_send);
+#define qemu_co_recv(sockfd, buf, bytes) \
+  qemu_co_send_recv(sockfd, buf, bytes, false)
+#define qemu_co_send(sockfd, buf, bytes) \
+  qemu_co_send_recv(sockfd, buf, bytes, true)
+
 #endif /* QEMU_COROUTINE_H */
diff --git a/include/qemu/cpu-float.h b/include/qemu/cpu-float.h
new file mode 100644 (file)
index 0000000..a26b9fa
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef QEMU_CPU_FLOAT_H
+#define QEMU_CPU_FLOAT_H
+
+#include "fpu/softfloat-types.h"
+
+/* Unions for reinterpreting between floats and integers.  */
+
+typedef union {
+    float32 f;
+    uint32_t l;
+} CPU_FloatU;
+
+typedef union {
+    float64 d;
+#if HOST_BIG_ENDIAN
+    struct {
+        uint32_t upper;
+        uint32_t lower;
+    } l;
+#else
+    struct {
+        uint32_t lower;
+        uint32_t upper;
+    } l;
+#endif
+    uint64_t ll;
+} CPU_DoubleU;
+
+typedef union {
+     floatx80 d;
+     struct {
+         uint64_t lower;
+         uint16_t upper;
+     } l;
+} CPU_LDoubleU;
+
+typedef union {
+    float128 q;
+#if HOST_BIG_ENDIAN
+    struct {
+        uint32_t upmost;
+        uint32_t upper;
+        uint32_t lower;
+        uint32_t lowest;
+    } l;
+    struct {
+        uint64_t upper;
+        uint64_t lower;
+    } ll;
+#else
+    struct {
+        uint32_t lowest;
+        uint32_t lower;
+        uint32_t upper;
+        uint32_t upmost;
+    } l;
+    struct {
+        uint64_t lower;
+        uint64_t upper;
+    } ll;
+#endif
+} CPU_QuadU;
+
+#endif /* QEMU_CPU_FLOAT_H */
index 09fc245b918e386ac0745bf22333f33bfc44a938..b11161555b691f3a34d8b4c544e20f5d17a3a7fa 100644 (file)
@@ -25,6 +25,9 @@
 #endif
 
 /* Leaf 1, %ecx */
+#ifndef bit_PCLMUL
+#define bit_PCLMUL      (1 << 1)
+#endif
 #ifndef bit_SSE4_1
 #define bit_SSE4_1      (1 << 19)
 #endif
 #ifndef bit_AVX2
 #define bit_AVX2        (1 << 5)
 #endif
-#ifndef bit_AVX512F
-#define bit_AVX512F        (1 << 16)
-#endif
 #ifndef bit_BMI2
 #define bit_BMI2        (1 << 8)
 #endif
+#ifndef bit_AVX512F
+#define bit_AVX512F     (1 << 16)
+#endif
+#ifndef bit_AVX512DQ
+#define bit_AVX512DQ    (1 << 17)
+#endif
+#ifndef bit_AVX512BW
+#define bit_AVX512BW    (1 << 30)
+#endif
+#ifndef bit_AVX512VL
+#define bit_AVX512VL    (1u << 31)
+#endif
+
+/* Leaf 7, %ecx */
+#ifndef bit_AVX512VBMI2
+#define bit_AVX512VBMI2 (1 << 6)
+#endif
 
 /* Leaf 0x80000001, %ecx */
 #ifndef bit_LZCNT
 #define bit_LZCNT       (1 << 5)
 #endif
 
+/*
+ * Signatures for different CPU implementations as returned from Leaf 0.
+ */
+
+#ifndef signature_INTEL_ecx
+/* "Genu" "ineI" "ntel" */
+#define signature_INTEL_ebx     0x756e6547
+#define signature_INTEL_edx     0x49656e69
+#define signature_INTEL_ecx     0x6c65746e
+#endif
+
+#ifndef signature_AMD_ecx
+/* "Auth" "enti" "cAMD" */
+#define signature_AMD_ebx       0x68747541
+#define signature_AMD_edx       0x69746e65
+#define signature_AMD_ecx       0x444d4163
+#endif
+
+static inline unsigned xgetbv_low(unsigned c)
+{
+    unsigned a, d;
+    asm("xgetbv" : "=a"(a), "=d"(d) : "c"(c));
+    return a;
+}
+
 #endif /* QEMU_CPUID_H */
diff --git a/include/qemu/crc-ccitt.h b/include/qemu/crc-ccitt.h
new file mode 100644 (file)
index 0000000..8918daf
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * CRC16 (CCITT) Checksum Algorithm
+ *
+ * Copyright (c) 2021 Wind River Systems, Inc.
+ *
+ * Author:
+ *   Bin Meng <bin.meng@windriver.com>
+ *
+ * From Linux kernel v5.10 include/linux/crc-ccitt.h
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#ifndef CRC_CCITT_H
+#define CRC_CCITT_H
+
+extern uint16_t const crc_ccitt_table[256];
+extern uint16_t const crc_ccitt_false_table[256];
+
+uint16_t crc_ccitt(uint16_t crc, const uint8_t *buffer, size_t len);
+uint16_t crc_ccitt_false(uint16_t crc, const uint8_t *buffer, size_t len);
+
+static inline uint16_t crc_ccitt_byte(uint16_t crc, const uint8_t c)
+{
+    return (crc >> 8) ^ crc_ccitt_table[(crc ^ c) & 0xff];
+}
+
+static inline uint16_t crc_ccitt_false_byte(uint16_t crc, const uint8_t c)
+{
+    return (crc << 8) ^ crc_ccitt_false_table[(crc >> 8) ^ c];
+}
+
+#endif /* CRC_CCITT_H */
index 5b78884c38fcbc384e098e5a62cc7f670c905fd2..88b4d2b3b3304803456f5b23ef42dbf530381ec5 100644 (file)
@@ -30,5 +30,6 @@
 
 
 uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length);
+uint32_t iov_crc32c(uint32_t crc, const struct iovec *iov, size_t iov_cnt);
 
 #endif
index 986ed8e15f41248e3802b53cab08c8a939b957d6..92c927a6a3525278bad40c06ac4c0bd4a0918467 100644 (file)
@@ -1,6 +1,24 @@
 #ifndef QEMU_CUTILS_H
 #define QEMU_CUTILS_H
 
+/*
+ * si_prefix:
+ * @exp10: exponent of 10, a multiple of 3 between -18 and 18 inclusive.
+ *
+ * Return a SI prefix (n, u, m, K, M, etc.) corresponding
+ * to the given exponent of 10.
+ */
+const char *si_prefix(unsigned int exp10);
+
+/*
+ * iec_binary_prefix:
+ * @exp2: exponent of 2, a multiple of 10 between 0 and 60 inclusive.
+ *
+ * Return an IEC binary prefix (Ki, Mi, etc.) corresponding
+ * to the given exponent of 2.
+ */
+const char *iec_binary_prefix(unsigned int exp2);
+
 /**
  * pstrcpy:
  * @buf: buffer to copy string into
@@ -129,9 +147,6 @@ static inline const char *qemu_strchrnul(const char *s, int c)
 const char *qemu_strchrnul(const char *s, int c);
 #endif
 time_t mktimegm(struct tm *tm);
-int qemu_fdatasync(int fd);
-int qemu_msync(void *addr, size_t length, int fd);
-int fcntl_setfl(int fd, int flag);
 int qemu_parse_fd(const char *param);
 int qemu_strtoi(const char *nptr, const char **endptr, int base,
                 int *result);
@@ -148,9 +163,8 @@ int qemu_strtou64(const char *nptr, const char **endptr, int base,
 int qemu_strtod(const char *nptr, const char **endptr, double *result);
 int qemu_strtod_finite(const char *nptr, const char **endptr, double *result);
 
-int parse_uint(const char *s, unsigned long long *value, char **endptr,
-               int base);
-int parse_uint_full(const char *s, unsigned long long *value, int base);
+int parse_uint(const char *s, const char **endptr, int base, uint64_t *value);
+int parse_uint_full(const char *s, int base, uint64_t *value);
 
 int qemu_strtosz(const char *nptr, const char **end, uint64_t *result);
 int qemu_strtosz_MiB(const char *nptr, const char **end, uint64_t *result);
@@ -196,17 +210,61 @@ int uleb128_decode_small(const uint8_t *in, uint32_t *n);
  */
 int qemu_pstrcmp0(const char **str1, const char **str2);
 
+/* Find program directory, and save it for later usage with
+ * qemu_get_exec_dir().
+ * Try OS specific API first, if not working, parse from argv0. */
+void qemu_init_exec_dir(const char *argv0);
+
+/* Get the saved exec dir.  */
+const char *qemu_get_exec_dir(void);
 
 /**
  * get_relocated_path:
  * @dir: the directory (typically a `CONFIG_*DIR` variable) to be relocated.
  *
  * Returns a path for @dir that uses the directory of the running executable
- * as the prefix.  For example, if `bindir` is `/usr/bin` and @dir is
- * `/usr/share/qemu`, the function will append `../share/qemu` to the
- * directory that contains the running executable and return the result.
+ * as the prefix.
+ *
+ * When a directory named `qemu-bundle` exists in the directory of the running
+ * executable, the path to the directory will be prepended to @dir. For
+ * example, if the directory of the running executable is `/qemu/build` @dir
+ * is `/usr/share/qemu`, the result will be
+ * `/qemu/build/qemu-bundle/usr/share/qemu`. The directory is expected to exist
+ * in the build tree.
+ *
+ * Otherwise, the directory of the running executable will be used as the
+ * prefix and it appends the relative path from `bindir` to @dir. For example,
+ * if the directory of the running executable is `/opt/qemu/bin`, `bindir` is
+ * `/usr/bin` and @dir is `/usr/share/qemu`, the result will be
+ * `/opt/qemu/bin/../share/qemu`.
+ *
  * The returned string should be freed by the caller.
  */
 char *get_relocated_path(const char *dir);
 
+static inline const char *yes_no(bool b)
+{
+     return b ? "yes" : "no";
+}
+
+/*
+ * helper to parse debug environment variables
+ */
+int parse_debug_env(const char *name, int max, int initial);
+
+/*
+ * Hexdump a line of a byte buffer into a hexadecimal/ASCII buffer
+ */
+#define QEMU_HEXDUMP_LINE_BYTES 16 /* Number of bytes to dump */
+#define QEMU_HEXDUMP_LINE_LEN 75   /* Number of characters in line */
+void qemu_hexdump_line(char *line, unsigned int b, const void *bufptr,
+                       unsigned int len, bool ascii);
+
+/*
+ * Hexdump a buffer to a file. An optional string prefix is added to every line
+ */
+
+void qemu_hexdump(FILE *fp, const char *prefix,
+                  const void *bufptr, size_t size);
+
 #endif
diff --git a/include/qemu/datadir.h b/include/qemu/datadir.h
new file mode 100644 (file)
index 0000000..21f9097
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef QEMU_DATADIR_H
+#define QEMU_DATADIR_H
+
+#define QEMU_FILE_TYPE_BIOS   0
+#define QEMU_FILE_TYPE_KEYMAP 1
+/**
+ * qemu_find_file:
+ * @type: QEMU_FILE_TYPE_BIOS (for BIOS, VGA BIOS)
+ *        or QEMU_FILE_TYPE_KEYMAP (for keymaps).
+ * @name: Relative or absolute file name
+ *
+ * If @name exists on disk as an absolute path, or a path relative
+ * to the current directory, then returns @name unchanged.
+ * Otherwise searches for @name file in the data directories, either
+ * configured at build time (DATADIR) or registered with the -L command
+ * line option.
+ *
+ * The caller must use g_free() to free the returned data when it is
+ * no longer required.
+ *
+ * Returns: a path that can access @name, or NULL if no matching file exists.
+ */
+char *qemu_find_file(int type, const char *name);
+void qemu_add_default_firmwarepath(void);
+void qemu_add_data_dir(char *path);
+void qemu_list_data_dirs(void);
+
+#endif
index 9d591f9ee4ad596fa4a8a6cda56cc42024d3026a..81d3de8a5a1e8dafaab6af6f9c334e6a8476129f 100644 (file)
 
 #include <gio/gio.h>
 
+#include "qom/object.h"
+#include "chardev/char.h"
+#include "qemu/notify.h"
+
+/* glib/gio 2.68 */
+#define DBUS_METHOD_INVOCATION_HANDLED TRUE
+#define DBUS_METHOD_INVOCATION_UNHANDLED FALSE
+
+/* in msec */
+#define DBUS_DEFAULT_TIMEOUT 1000
+
+#define DBUS_DISPLAY1_ROOT "/org/qemu/Display1"
+
+#define DBUS_DISPLAY_ERROR (dbus_display_error_quark())
+GQuark dbus_display_error_quark(void);
+
+typedef enum {
+    DBUS_DISPLAY_ERROR_FAILED,
+    DBUS_DISPLAY_ERROR_INVALID,
+    DBUS_DISPLAY_ERROR_UNSUPPORTED,
+    DBUS_DISPLAY_N_ERRORS,
+} DBusDisplayError;
+
 GStrv qemu_dbus_get_queued_owners(GDBusConnection *connection,
                                   const char *name,
                                   Error **errp);
diff --git a/include/qemu/defer-call.h b/include/qemu/defer-call.h
new file mode 100644 (file)
index 0000000..e2c1d24
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Deferred calls
+ *
+ * Copyright Red Hat.
+ */
+
+#ifndef QEMU_DEFER_CALL_H
+#define QEMU_DEFER_CALL_H
+
+/* See documentation in util/defer-call.c */
+void defer_call_begin(void);
+void defer_call_end(void);
+void defer_call(void (*fn)(void *), void *opaque);
+
+#endif /* QEMU_DEFER_CALL_H */
index b9addcc11f7d39c2f18b01ff709e517608eea3c9..6006dfae44c37bab7cce4437206c1e6d8c754b7b 100644 (file)
@@ -1,10 +1,6 @@
 #ifndef ENVLIST_H
 #define ENVLIST_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 typedef struct envlist envlist_t;
 
 envlist_t *envlist_create(void);
@@ -15,8 +11,4 @@ int envlist_parse_set(envlist_t *, const char *);
 int envlist_parse_unset(envlist_t *, const char *);
 char **envlist_to_environ(const envlist_t *, size_t *);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif /* ENVLIST_H */
index a5ad95ff1bb2cfbc3f0c0a0a5b9ec0a58105e136..3ae2357fda5404a552ce5fe2889a66b67c4cdfe8 100644 (file)
@@ -30,23 +30,21 @@ void loc_set_none(void);
 void loc_set_cmdline(char **argv, int idx, int cnt);
 void loc_set_file(const char *fname, int lno);
 
-int error_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
-int error_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
-int error_vprintf_unless_qmp(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
-int error_printf_unless_qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+int error_vprintf(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 0);
+int error_printf(const char *fmt, ...) G_GNUC_PRINTF(1, 2);
 
-void error_vreport(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
-void warn_vreport(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
-void info_vreport(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
+void error_vreport(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 0);
+void warn_vreport(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 0);
+void info_vreport(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 0);
 
-void error_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
-void warn_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
-void info_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+void error_report(const char *fmt, ...) G_GNUC_PRINTF(1, 2);
+void warn_report(const char *fmt, ...) G_GNUC_PRINTF(1, 2);
+void info_report(const char *fmt, ...) G_GNUC_PRINTF(1, 2);
 
 bool error_report_once_cond(bool *printed, const char *fmt, ...)
-    GCC_FMT_ATTR(2, 3);
+    G_GNUC_PRINTF(2, 3);
 bool warn_report_once_cond(bool *printed, const char *fmt, ...)
-    GCC_FMT_ATTR(2, 3);
+    G_GNUC_PRINTF(2, 3);
 
 void error_init(const char *argv0);
 
@@ -72,9 +70,7 @@ void error_init(const char *argv0);
                               fmt, ##__VA_ARGS__);      \
     })
 
-const char *error_get_progname(void);
-
-extern bool error_with_timestamp;
+extern bool message_with_timestamp;
 extern bool error_with_guestname;
 extern const char *error_guest_name;
 
index 3380b662f38270271294382157a814a5b279aced..8a4ff308e191b21307eff0d6f46cc37c5faef7f2 100644 (file)
@@ -24,6 +24,7 @@ struct EventNotifier {
 #else
     int rfd;
     int wfd;
+    bool initialized;
 #endif
 };
 
@@ -37,6 +38,7 @@ int event_notifier_test_and_clear(EventNotifier *);
 #ifdef CONFIG_POSIX
 void event_notifier_init_fd(EventNotifier *, int fd);
 int event_notifier_get_fd(const EventNotifier *);
+int event_notifier_get_wfd(const EventNotifier *);
 #else
 HANDLE event_notifier_get_handle(EventNotifier *);
 #endif
index 489c35429111b726e9fc3f6b899c7c90502edd7f..16be02f361fc37f2483030ba034e1fd381441ddd 100644 (file)
@@ -46,7 +46,7 @@ void fifo8_push(Fifo8 *fifo, uint8_t data);
  * fifo8_push_all:
  * @fifo: FIFO to push to
  * @data: data to push
- * @size: number of bytes to push
+ * @num: number of bytes to push
  *
  * Push a byte array to the FIFO. Behaviour is undefined if the FIFO is full.
  * Clients are responsible for checking the space left in the FIFO using
@@ -148,12 +148,16 @@ uint32_t fifo8_num_used(Fifo8 *fifo);
 
 extern const VMStateDescription vmstate_fifo8;
 
-#define VMSTATE_FIFO8(_field, _state) {                              \
-    .name       = (stringify(_field)),                               \
-    .size       = sizeof(Fifo8),                                     \
-    .vmsd       = &vmstate_fifo8,                                    \
-    .flags      = VMS_STRUCT,                                        \
-    .offset     = vmstate_offset_value(_state, _field, Fifo8),       \
+#define VMSTATE_FIFO8_TEST(_field, _state, _test) {                  \
+    .name         = (stringify(_field)),                             \
+    .field_exists = (_test),                                         \
+    .size         = sizeof(Fifo8),                                   \
+    .vmsd         = &vmstate_fifo8,                                  \
+    .flags        = VMS_STRUCT,                                      \
+    .offset       = vmstate_offset_value(_state, _field, Fifo8),     \
 }
 
+#define VMSTATE_FIFO8(_field, _state)                                \
+    VMSTATE_FIFO8_TEST(_field, _state, NULL)
+
 #endif /* QEMU_FIFO8_H */
index 09ff9c22369fb8f2477db9e43a03f105523bfdbd..5060d49d6092e77344ec7e59bd6a232aa9aad0bb 100644 (file)
 #define QEMU_GUEST_RANDOM_H
 
 /**
- * qemu_guest_random_seed_main(const char *optarg, Error **errp)
- * @optarg: a non-NULL pointer to a C string
+ * qemu_guest_random_seed_main(const char *seedstr, Error **errp)
+ * @seedstr: a non-NULL pointer to a C string
  * @errp: an error indicator
  *
- * The @optarg value is that which accompanies the -seed argument.
+ * The @seedstr value is that which accompanies the -seed argument.
  * This forces qemu_guest_getrandom into deterministic mode.
  *
  * Returns 0 on success, < 0 on failure while setting *errp.
  */
-int qemu_guest_random_seed_main(const char *optarg, Error **errp);
+int qemu_guest_random_seed_main(const char *seedstr, Error **errp);
 
 /**
  * qemu_guest_random_seed_thread_part1(void)
index 5e71b6d6f7404d1881e6cecb264261e0143f5087..8136e33674cd05f2a439bb6fd6d1e9a14c4b496e 100644 (file)
@@ -76,20 +76,9 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size);
  *
  * Store result of merging @a and @b into @result.
  * @result is allowed to be equal to @a or @b.
- *
- * Return true if the merge was successful,
- *        false if it was not attempted.
- */
-bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result);
-
-/**
- * hbitmap_can_merge:
- *
- * hbitmap_can_merge(a, b) && hbitmap_can_merge(a, result) is sufficient and
- * necessary for hbitmap_merge will not fail.
- *
+ * All bitmaps must have same size.
  */
-bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b);
+void hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result);
 
 /**
  * hbitmap_empty:
@@ -340,6 +329,18 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t start, int64_t end,
                              int64_t max_dirty_count,
                              int64_t *dirty_start, int64_t *dirty_count);
 
+/*
+ * hbitmap_status:
+ * @hb: The HBitmap to operate on
+ * @start: The bit to start from
+ * @count: Number of bits to proceed
+ * @pnum: Out-parameter. How many bits has same value starting from @start
+ *
+ * Returns true if bitmap is dirty at @start, false otherwise.
+ */
+bool hbitmap_status(const HBitmap *hb, int64_t start, int64_t count,
+                    int64_t *pnum);
+
 /**
  * hbitmap_iter_next:
  * @hbi: HBitmapIter to operate on.
diff --git a/include/qemu/help-texts.h b/include/qemu/help-texts.h
new file mode 100644 (file)
index 0000000..d0359f8
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef QEMU_HELP_TEXTS_H
+#define QEMU_HELP_TEXTS_H
+
+/* Copyright string for -version arguments, About dialogs, etc */
+#define QEMU_COPYRIGHT "Copyright (c) 2003-2023 " \
+    "Fabrice Bellard and the QEMU Project developers"
+
+/* Bug reporting information for --help arguments, About dialogs, etc */
+#define QEMU_HELP_BOTTOM \
+    "See <https://qemu.org/contribute/report-a-bug> for how to report bugs.\n" \
+    "More information on the QEMU project at <https://qemu.org>."
+
+#endif
index cdca2991d8a89774f3ef5f5c80e9c356519a6e8b..ead97d354d6a2b45690b39560b1561853d226218 100644 (file)
  * THE SOFTWARE.
  */
 
+/* Portions of this work are licensed under the terms of the GNU GPL,
+ * version 2 or later. See the COPYING file in the top-level directory.
+ */
+
 #ifndef HOST_UTILS_H
 #define HOST_UTILS_H
 
 #include "qemu/bswap.h"
+#include "qemu/int128.h"
 
 #ifdef CONFIG_INT128
 static inline void mulu64(uint64_t *plow, uint64_t *phigh,
@@ -51,43 +56,45 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
     return (__int128_t)a * b / c;
 }
 
-static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c)
 {
-    if (divisor == 0) {
-        return 1;
-    } else {
-        __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
-        __uint128_t result = dividend / divisor;
-        *plow = result;
-        *phigh = dividend % divisor;
-        return result > UINT64_MAX;
-    }
+    return ((__int128_t)a * b + c - 1) / c;
 }
 
-static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
+                               uint64_t divisor)
 {
-    if (divisor == 0) {
-        return 1;
-    } else {
-        __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
-        __int128_t result = dividend / divisor;
-        *plow = result;
-        *phigh = dividend % divisor;
-        return result != *plow;
-    }
+    __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
+    __uint128_t result = dividend / divisor;
+
+    *plow = result;
+    *phigh = result >> 64;
+    return dividend % divisor;
+}
+
+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
+                              int64_t divisor)
+{
+    __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
+    __int128_t result = dividend / divisor;
+
+    *plow = result;
+    *phigh = result >> 64;
+    return dividend % divisor;
 }
 #else
 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
 
-static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+static inline uint64_t muldiv64_rounding(uint64_t a, uint32_t b, uint32_t c,
+                                  bool round_up)
 {
     union {
         uint64_t ll;
         struct {
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
             uint32_t high, low;
 #else
             uint32_t low, high;
@@ -98,14 +105,57 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
 
     u.ll = a;
     rl = (uint64_t)u.l.low * (uint64_t)b;
+    if (round_up) {
+        rl += c - 1;
+    }
     rh = (uint64_t)u.l.high * (uint64_t)b;
     rh += (rl >> 32);
     res.l.high = rh / c;
     res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
     return res.ll;
 }
+
+static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+    return muldiv64_rounding(a, b, c, false);
+}
+
+static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c)
+{
+    return muldiv64_rounding(a, b, c, true);
+}
 #endif
 
+/**
+ * clz8 - count leading zeros in a 8-bit value.
+ * @val: The value to search
+ *
+ * Returns 8 if the value is zero.  Note that the GCC builtin is
+ * undefined if the value is zero.
+ *
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
+ * so this function subtracts off the number of prepended zeroes.
+ */
+static inline int clz8(uint8_t val)
+{
+    return val ? __builtin_clz(val) - 24 : 8;
+}
+
+/**
+ * clz16 - count leading zeros in a 16-bit value.
+ * @val: The value to search
+ *
+ * Returns 16 if the value is zero.  Note that the GCC builtin is
+ * undefined if the value is zero.
+ *
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
+ * so this function subtracts off the number of prepended zeroes.
+ */
+static inline int clz16(uint16_t val)
+{
+    return val ? __builtin_clz(val) - 16 : 16;
+}
+
 /**
  * clz32 - count leading zeros in a 32-bit value.
  * @val: The value to search
@@ -152,6 +202,30 @@ static inline int clo64(uint64_t val)
     return clz64(~val);
 }
 
+/**
+ * ctz8 - count trailing zeros in a 8-bit value.
+ * @val: The value to search
+ *
+ * Returns 8 if the value is zero.  Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int ctz8(uint8_t val)
+{
+    return val ? __builtin_ctz(val) : 8;
+}
+
+/**
+ * ctz16 - count trailing zeros in a 16-bit value.
+ * @val: The value to search
+ *
+ * Returns 16 if the value is zero.  Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int ctz16(uint16_t val)
+{
+    return val ? __builtin_ctz(val) : 16;
+}
+
 /**
  * ctz32 - count trailing zeros in a 32-bit value.
  * @val: The value to search
@@ -272,6 +346,9 @@ static inline int ctpop64(uint64_t val)
  */
 static inline uint8_t revbit8(uint8_t x)
 {
+#if __has_builtin(__builtin_bitreverse8)
+    return __builtin_bitreverse8(x);
+#else
     /* Assign the correct nibble position.  */
     x = ((x & 0xf0) >> 4)
       | ((x & 0x0f) << 4);
@@ -281,6 +358,7 @@ static inline uint8_t revbit8(uint8_t x)
       | ((x & 0x22) << 1)
       | ((x & 0x11) << 3);
     return x;
+#endif
 }
 
 /**
@@ -289,6 +367,9 @@ static inline uint8_t revbit8(uint8_t x)
  */
 static inline uint16_t revbit16(uint16_t x)
 {
+#if __has_builtin(__builtin_bitreverse16)
+    return __builtin_bitreverse16(x);
+#else
     /* Assign the correct byte position.  */
     x = bswap16(x);
     /* Assign the correct nibble position.  */
@@ -300,6 +381,7 @@ static inline uint16_t revbit16(uint16_t x)
       | ((x & 0x2222) << 1)
       | ((x & 0x1111) << 3);
     return x;
+#endif
 }
 
 /**
@@ -308,6 +390,9 @@ static inline uint16_t revbit16(uint16_t x)
  */
 static inline uint32_t revbit32(uint32_t x)
 {
+#if __has_builtin(__builtin_bitreverse32)
+    return __builtin_bitreverse32(x);
+#else
     /* Assign the correct byte position.  */
     x = bswap32(x);
     /* Assign the correct nibble position.  */
@@ -319,6 +404,7 @@ static inline uint32_t revbit32(uint32_t x)
       | ((x & 0x22222222u) << 1)
       | ((x & 0x11111111u) << 3);
     return x;
+#endif
 }
 
 /**
@@ -327,6 +413,9 @@ static inline uint32_t revbit32(uint32_t x)
  */
 static inline uint64_t revbit64(uint64_t x)
 {
+#if __has_builtin(__builtin_bitreverse64)
+    return __builtin_bitreverse64(x);
+#else
     /* Assign the correct byte position.  */
     x = bswap64(x);
     /* Assign the correct nibble position.  */
@@ -338,6 +427,259 @@ static inline uint64_t revbit64(uint64_t x)
       | ((x & 0x2222222222222222ull) << 1)
       | ((x & 0x1111111111111111ull) << 3);
     return x;
+#endif
+}
+
+/**
+ * Return the absolute value of a 64-bit integer as an unsigned 64-bit value
+ */
+static inline uint64_t uabs64(int64_t v)
+{
+    return v < 0 ? -v : v;
+}
+
+/**
+ * sadd32_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+    return __builtin_add_overflow(x, y, ret);
+}
+
+/**
+ * sadd64_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+    return __builtin_add_overflow(x, y, ret);
+}
+
+/**
+ * uadd32_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+    return __builtin_add_overflow(x, y, ret);
+}
+
+/**
+ * uadd64_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+    return __builtin_add_overflow(x, y, ret);
+}
+
+/**
+ * ssub32_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for difference
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+    return __builtin_sub_overflow(x, y, ret);
+}
+
+/**
+ * ssub64_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+    return __builtin_sub_overflow(x, y, ret);
+}
+
+/**
+ * usub32_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+    return __builtin_sub_overflow(x, y, ret);
+}
+
+/**
+ * usub64_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+    return __builtin_sub_overflow(x, y, ret);
+}
+
+/**
+ * smul32_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+    return __builtin_mul_overflow(x, y, ret);
+}
+
+/**
+ * smul64_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+    return __builtin_mul_overflow(x, y, ret);
+}
+
+/**
+ * umul32_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+    return __builtin_mul_overflow(x, y, ret);
+}
+
+/**
+ * umul64_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+    return __builtin_mul_overflow(x, y, ret);
+}
+
+/*
+ * Unsigned 128x64 multiplication.
+ * Returns true if the result got truncated to 128 bits.
+ * Otherwise, returns false and the multiplication result via plow and phigh.
+ */
+static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor)
+{
+#if defined(CONFIG_INT128)
+    bool res;
+    __uint128_t r;
+    __uint128_t f = ((__uint128_t)*phigh << 64) | *plow;
+    res = __builtin_mul_overflow(f, factor, &r);
+
+    *plow = r;
+    *phigh = r >> 64;
+
+    return res;
+#else
+    uint64_t dhi = *phigh;
+    uint64_t dlo = *plow;
+    uint64_t ahi;
+    uint64_t blo, bhi;
+
+    if (dhi == 0) {
+        mulu64(plow, phigh, dlo, factor);
+        return false;
+    }
+
+    mulu64(plow, &ahi, dlo, factor);
+    mulu64(&blo, &bhi, dhi, factor);
+
+    return uadd64_overflow(ahi, blo, phigh) || bhi != 0;
+#endif
+}
+
+/**
+ * uadd64_carry - addition with carry-in and carry-out
+ * @x, @y: addends
+ * @pcarry: in-out carry value
+ *
+ * Computes @x + @y + *@pcarry, placing the carry-out back
+ * into *@pcarry and returning the 64-bit sum.
+ */
+static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
+{
+#if __has_builtin(__builtin_addcll)
+    unsigned long long c = *pcarry;
+    x = __builtin_addcll(x, y, c, &c);
+    *pcarry = c & 1;
+    return x;
+#else
+    bool c = *pcarry;
+    /* This is clang's internal expansion of __builtin_addc. */
+    c = uadd64_overflow(x, c, &x);
+    c |= uadd64_overflow(x, y, &x);
+    *pcarry = c;
+    return x;
+#endif
+}
+
+/**
+ * usub64_borrow - subtraction with borrow-in and borrow-out
+ * @x, @y: addends
+ * @pborrow: in-out borrow value
+ *
+ * Computes @x - @y - *@pborrow, placing the borrow-out back
+ * into *@pborrow and returning the 64-bit sum.
+ */
+static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
+{
+#if __has_builtin(__builtin_subcll) && !defined(BUILTIN_SUBCLL_BROKEN)
+    unsigned long long b = *pborrow;
+    x = __builtin_subcll(x, y, b, &b);
+    *pborrow = b & 1;
+    return x;
+#else
+    bool b = *pborrow;
+    b = usub64_overflow(x, b, &x);
+    b |= usub64_overflow(x, y, &x);
+    *pborrow = b;
+    return x;
+#endif
 }
 
 /* Host type specific sizes of these routines.  */
@@ -437,4 +779,83 @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
  */
 void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
 
+/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
+ *
+ * Licensed under the GPLv2/LGPLv3
+ */
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
+                                  uint64_t n0, uint64_t d)
+{
+#if defined(__x86_64__)
+    uint64_t q;
+    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
+    return q;
+#elif defined(__s390x__) && !defined(__clang__)
+    /* Need to use a TImode type to get an even register pair for DLGR.  */
+    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
+    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
+    *r = n >> 64;
+    return n;
+#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
+    /* From Power ISA 2.06, programming note for divdeu.  */
+    uint64_t q1, q2, Q, r1, r2, R;
+    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
+        : "=&r"(q1), "=r"(q2)
+        : "r"(n1), "r"(n0), "r"(d));
+    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
+    r2 = n0 - (q2 * d);
+    Q = q1 + q2;
+    R = r1 + r2;
+    if (R >= d || R < r2) { /* overflow implies R > d */
+        Q += 1;
+        R -= d;
+    }
+    *r = R;
+    return Q;
+#else
+    uint64_t d0, d1, q0, q1, r1, r0, m;
+
+    d0 = (uint32_t)d;
+    d1 = d >> 32;
+
+    r1 = n1 % d1;
+    q1 = n1 / d1;
+    m = q1 * d0;
+    r1 = (r1 << 32) | (n0 >> 32);
+    if (r1 < m) {
+        q1 -= 1;
+        r1 += d;
+        if (r1 >= d) {
+            if (r1 < m) {
+                q1 -= 1;
+                r1 += d;
+            }
+        }
+    }
+    r1 -= m;
+
+    r0 = r1 % d1;
+    q0 = r1 / d1;
+    m = q0 * d0;
+    r0 = (r0 << 32) | (uint32_t)n0;
+    if (r0 < m) {
+        q0 -= 1;
+        r0 += d;
+        if (r0 >= d) {
+            if (r0 < m) {
+                q0 -= 1;
+                r0 += d;
+            }
+        }
+    }
+    r0 -= m;
+
+    *r = r0;
+    return (q1 << 32) | q0;
+#endif
+}
+
+Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor);
+Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor);
 #endif
diff --git a/include/qemu/hw-version.h b/include/qemu/hw-version.h
new file mode 100644 (file)
index 0000000..730a8c9
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * QEMU "hardware version" machinery
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_HW_VERSION_H
+#define QEMU_HW_VERSION_H
+
+/*
+ * Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default
+ * instead of QEMU_VERSION, so setting hw_version on MachineClass
+ * is no longer mandatory.
+ *
+ * Do NOT change this string, or it will break compatibility on all
+ * machine classes that don't set hw_version.
+ */
+#define QEMU_HW_VERSION "2.5+"
+
+/* QEMU "hardware version" setting. Used to replace code that exposed
+ * QEMU_VERSION to guests in the past and need to keep compatibility.
+ * Do not use qemu_hw_version() in new code.
+ */
+void qemu_set_hw_version(const char *);
+const char *qemu_hw_version(void);
+
+#endif
index b55c406e69dbc2a0b1029f4cea41e4b1f2ecd49d..46b759b284f31fe40a5258f232f9770760ff2865 100644 (file)
@@ -5,6 +5,7 @@ typedef enum IdSubSystems {
     ID_QDEV,
     ID_BLOCK,
     ID_CHR,
+    ID_NET,
     ID_MAX      /* last element, used as array size */
 } IdSubSystems;
 
index 76ea405922dc2347e02db8878e81924a1fc31375..174bd7dafb8dd0082027b7de5a15a0013d527981 100644 (file)
@@ -1,16 +1,27 @@
 #ifndef INT128_H
 #define INT128_H
 
-#ifdef CONFIG_INT128
 #include "qemu/bswap.h"
 
+/*
+ * With TCI, we need to use libffi for interfacing with TCG helpers.
+ * But libffi does not support __int128_t, and therefore cannot pass
+ * or return values of this type, force use of the Int128 struct.
+ */
+#if defined(CONFIG_INT128) && !defined(CONFIG_TCG_INTERPRETER)
 typedef __int128_t Int128;
+typedef __int128_t __attribute__((aligned(16))) Int128Aligned;
 
 static inline Int128 int128_make64(uint64_t a)
 {
     return a;
 }
 
+static inline Int128 int128_makes64(int64_t a)
+{
+    return a;
+}
+
 static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
 {
     return (__uint128_t)hi << 64 | lo;
@@ -53,16 +64,36 @@ static inline Int128 int128_exts64(int64_t a)
     return a;
 }
 
+static inline Int128 int128_not(Int128 a)
+{
+    return ~a;
+}
+
 static inline Int128 int128_and(Int128 a, Int128 b)
 {
     return a & b;
 }
 
+static inline Int128 int128_or(Int128 a, Int128 b)
+{
+    return a | b;
+}
+
+static inline Int128 int128_xor(Int128 a, Int128 b)
+{
+    return a ^ b;
+}
+
 static inline Int128 int128_rshift(Int128 a, int n)
 {
     return a >> n;
 }
 
+static inline Int128 int128_urshift(Int128 a, int n)
+{
+    return (__uint128_t)a >> n;
+}
+
 static inline Int128 int128_lshift(Int128 a, int n)
 {
     return a << n;
@@ -103,11 +134,21 @@ static inline bool int128_ge(Int128 a, Int128 b)
     return a >= b;
 }
 
+static inline bool int128_uge(Int128 a, Int128 b)
+{
+    return ((__uint128_t)a) >= ((__uint128_t)b);
+}
+
 static inline bool int128_lt(Int128 a, Int128 b)
 {
     return a < b;
 }
 
+static inline bool int128_ult(Int128 a, Int128 b)
+{
+    return (__uint128_t)a < (__uint128_t)b;
+}
+
 static inline bool int128_le(Int128 a, Int128 b)
 {
     return a <= b;
@@ -145,26 +186,78 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
 
 static inline Int128 bswap128(Int128 a)
 {
+#if __has_builtin(__builtin_bswap128)
+    return __builtin_bswap128(a);
+#else
     return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a)));
+#endif
+}
+
+static inline int clz128(Int128 a)
+{
+    if (a >> 64) {
+        return __builtin_clzll(a >> 64);
+    } else {
+        return (a) ? __builtin_clzll((uint64_t)a) + 64 : 128;
+    }
+}
+
+static inline Int128 int128_divu(Int128 a, Int128 b)
+{
+    return (__uint128_t)a / (__uint128_t)b;
+}
+
+static inline Int128 int128_remu(Int128 a, Int128 b)
+{
+    return (__uint128_t)a % (__uint128_t)b;
+}
+
+static inline Int128 int128_divs(Int128 a, Int128 b)
+{
+    return a / b;
+}
+
+static inline Int128 int128_rems(Int128 a, Int128 b)
+{
+    return a % b;
 }
 
 #else /* !CONFIG_INT128 */
 
 typedef struct Int128 Int128;
-
+typedef struct Int128 __attribute__((aligned(16))) Int128Aligned;
+
+/*
+ * We guarantee that the in-memory byte representation of an
+ * Int128 is that of a host-endian-order 128-bit integer
+ * (whether using this struct or the __int128_t version of the type).
+ * Some code using this type relies on this (eg when copying it into
+ * guest memory or a gdb protocol buffer, or by using Int128 in
+ * a union with other integer types).
+ */
 struct Int128 {
+#if HOST_BIG_ENDIAN
+    int64_t hi;
+    uint64_t lo;
+#else
     uint64_t lo;
     int64_t hi;
+#endif
 };
 
 static inline Int128 int128_make64(uint64_t a)
 {
-    return (Int128) { a, 0 };
+    return (Int128) { .lo = a, .hi = 0 };
+}
+
+static inline Int128 int128_makes64(int64_t a)
+{
+    return (Int128) { .lo = a, .hi = a >> 63 };
 }
 
 static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
 {
-    return (Int128) { lo, hi };
+    return (Int128) { .lo = lo, .hi = hi };
 }
 
 static inline uint64_t int128_get64(Int128 a)
@@ -195,17 +288,32 @@ static inline Int128 int128_one(void)
 
 static inline Int128 int128_2_64(void)
 {
-    return (Int128) { 0, 1 };
+    return int128_make128(0, 1);
 }
 
 static inline Int128 int128_exts64(int64_t a)
 {
-    return (Int128) { .lo = a, .hi = (a < 0) ? -1 : 0 };
+    return int128_make128(a, (a < 0) ? -1 : 0);
+}
+
+static inline Int128 int128_not(Int128 a)
+{
+    return int128_make128(~a.lo, ~a.hi);
 }
 
 static inline Int128 int128_and(Int128 a, Int128 b)
 {
-    return (Int128) { a.lo & b.lo, a.hi & b.hi };
+    return int128_make128(a.lo & b.lo, a.hi & b.hi);
+}
+
+static inline Int128 int128_or(Int128 a, Int128 b)
+{
+    return int128_make128(a.lo | b.lo, a.hi | b.hi);
+}
+
+static inline Int128 int128_xor(Int128 a, Int128 b)
+{
+    return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
 }
 
 static inline Int128 int128_rshift(Int128 a, int n)
@@ -222,6 +330,20 @@ static inline Int128 int128_rshift(Int128 a, int n)
     }
 }
 
+static inline Int128 int128_urshift(Int128 a, int n)
+{
+    uint64_t h = a.hi;
+    if (!n) {
+        return a;
+    }
+    h = h >> (n & 63);
+    if (n >= 64) {
+        return int128_make64(h);
+    } else {
+        return int128_make128((a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h);
+    }
+}
+
 static inline Int128 int128_lshift(Int128 a, int n)
 {
     uint64_t l = a.lo << (n & 63);
@@ -277,11 +399,21 @@ static inline bool int128_ge(Int128 a, Int128 b)
     return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo);
 }
 
+static inline bool int128_uge(Int128 a, Int128 b)
+{
+    return (uint64_t)a.hi > (uint64_t)b.hi || (a.hi == b.hi && a.lo >= b.lo);
+}
+
 static inline bool int128_lt(Int128 a, Int128 b)
 {
     return !int128_ge(a, b);
 }
 
+static inline bool int128_ult(Int128 a, Int128 b)
+{
+    return !int128_uge(a, b);
+}
+
 static inline bool int128_le(Int128 a, Int128 b)
 {
     return int128_ge(b, a);
@@ -317,5 +449,48 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
     *a = int128_sub(*a, b);
 }
 
-#endif /* CONFIG_INT128 */
+static inline Int128 bswap128(Int128 a)
+{
+    return int128_make128(bswap64(a.hi), bswap64(a.lo));
+}
+
+static inline int clz128(Int128 a)
+{
+    if (a.hi) {
+        return __builtin_clzll(a.hi);
+    } else {
+        return (a.lo) ? __builtin_clzll(a.lo) + 64 : 128;
+    }
+}
+
+Int128 int128_divu(Int128, Int128);
+Int128 int128_remu(Int128, Int128);
+Int128 int128_divs(Int128, Int128);
+Int128 int128_rems(Int128, Int128);
+#endif /* CONFIG_INT128 && !CONFIG_TCG_INTERPRETER */
+
+static inline void bswap128s(Int128 *s)
+{
+    *s = bswap128(*s);
+}
+
+#define UINT128_MAX int128_make128(~0LL, ~0LL)
+#define INT128_MAX int128_make128(UINT64_MAX, INT64_MAX)
+#define INT128_MIN int128_make128(0, INT64_MIN)
+
+/*
+ * When compiler supports a 128-bit type, define a combination of
+ * a possible structure and the native types.  Ease parameter passing
+ * via use of the transparent union extension.
+ */
+#ifdef CONFIG_INT128_TYPE
+typedef union {
+    __uint128_t u;
+    __int128_t i;
+    Int128 s;
+} Int128Alias __attribute__((transparent_union));
+#else
+typedef Int128 Int128Alias;
+#endif /* CONFIG_INT128_TYPE */
+
 #endif /* INT128_H */
diff --git a/include/qemu/interval-tree.h b/include/qemu/interval-tree.h
new file mode 100644 (file)
index 0000000..25006de
--- /dev/null
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Interval trees.
+ *
+ * Derived from include/linux/interval_tree.h and its dependencies.
+ */
+
+#ifndef QEMU_INTERVAL_TREE_H
+#define QEMU_INTERVAL_TREE_H
+
+/*
+ * For now, don't expose Linux Red-Black Trees separately, but retain the
+ * separate type definitions to keep the implementation sane, and allow
+ * the possibility of disentangling them later.
+ */
+typedef struct RBNode
+{
+    /* Encodes parent with color in the lsb. */
+    uintptr_t rb_parent_color;
+    struct RBNode *rb_right;
+    struct RBNode *rb_left;
+} RBNode;
+
+typedef struct RBRoot
+{
+    RBNode *rb_node;
+} RBRoot;
+
+typedef struct RBRootLeftCached {
+    RBRoot rb_root;
+    RBNode *rb_leftmost;
+} RBRootLeftCached;
+
+typedef struct IntervalTreeNode
+{
+    RBNode rb;
+
+    uint64_t start;    /* Start of interval */
+    uint64_t last;     /* Last location _in_ interval */
+    uint64_t subtree_last;
+} IntervalTreeNode;
+
+typedef RBRootLeftCached IntervalTreeRoot;
+
+/**
+ * interval_tree_is_empty
+ * @root: root of the tree.
+ *
+ * Returns true if the tree contains no nodes.
+ */
+static inline bool interval_tree_is_empty(const IntervalTreeRoot *root)
+{
+    return root->rb_root.rb_node == NULL;
+}
+
+/**
+ * interval_tree_insert
+ * @node: node to insert,
+ * @root: root of the tree.
+ *
+ * Insert @node into @root, and rebalance.
+ */
+void interval_tree_insert(IntervalTreeNode *node, IntervalTreeRoot *root);
+
+/**
+ * interval_tree_remove
+ * @node: node to remove,
+ * @root: root of the tree.
+ *
+ * Remove @node from @root, and rebalance.
+ */
+void interval_tree_remove(IntervalTreeNode *node, IntervalTreeRoot *root);
+
+/**
+ * interval_tree_iter_first:
+ * @root: root of the tree,
+ * @start, @last: the inclusive interval [start, last].
+ *
+ * Locate the "first" of a set of nodes within the tree at @root
+ * that overlap the interval, where "first" is sorted by start.
+ * Returns NULL if no overlap found.
+ */
+IntervalTreeNode *interval_tree_iter_first(IntervalTreeRoot *root,
+                                           uint64_t start, uint64_t last);
+
+/**
+ * interval_tree_iter_next:
+ * @node: previous search result
+ * @start, @last: the inclusive interval [start, last].
+ *
+ * Locate the "next" of a set of nodes within the tree that overlap the
+ * interval; @next is the result of a previous call to
+ * interval_tree_iter_{first,next}.  Returns NULL if @next was the last
+ * node in the set.
+ */
+IntervalTreeNode *interval_tree_iter_next(IntervalTreeNode *node,
+                                          uint64_t start, uint64_t last);
+
+#endif /* QEMU_INTERVAL_TREE_H */
index b6b283a5e5c1f5b69baa2b958339fa08f5e8e734..63a1c01965d19217ed9d9ee57eb12a9c90132773 100644 (file)
@@ -222,13 +222,11 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov)
 
 void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
 void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
-void qemu_iovec_init_extended(
-        QEMUIOVector *qiov,
-        void *head_buf, size_t head_len,
-        QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
-        void *tail_buf, size_t tail_len);
 void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
                            size_t offset, size_t len);
+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
+                               size_t offset, size_t len,
+                               size_t *head, size_t *tail, int *niov);
 int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len);
 void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
 void qemu_iovec_concat(QEMUIOVector *dst,
index b66cf93c4bc6deaad4527a89bace3b9f3afa295e..2a10a7052e420392460c8dbf82e31db241b8895b 100644 (file)
@@ -15,7 +15,7 @@
  * Currently the iova tree will only allow to keep ranges
  * information, and no extra user data is allowed for each element.  A
  * benefit is that we can merge adjacent ranges internally within the
- * tree.  It can save a lot of memory when the ranges are splitted but
+ * tree.  It can save a lot of memory when the ranges are split but
  * mostly continuous.
  *
  * Note that current implementation does not provide any thread
@@ -29,6 +29,7 @@
 #define  IOVA_OK           (0)
 #define  IOVA_ERR_INVALID  (-1) /* Invalid parameters */
 #define  IOVA_ERR_OVERLAP  (-2) /* IOVA range overlapped */
+#define  IOVA_ERR_NOMEM    (-3) /* Cannot allocate */
 
 typedef struct IOVATree IOVATree;
 typedef struct DMAMap {
@@ -59,7 +60,7 @@ IOVATree *iova_tree_new(void);
  *
  * Return: 0 if succeeded, or <0 if error.
  */
-int iova_tree_insert(IOVATree *tree, DMAMap *map);
+int iova_tree_insert(IOVATree *tree, const DMAMap *map);
 
 /**
  * iova_tree_remove:
@@ -71,10 +72,8 @@ int iova_tree_insert(IOVATree *tree, DMAMap *map);
  * provided.  The range does not need to be exactly what has inserted,
  * all the mappings that are included in the provided range will be
  * removed from the tree.  Here map->translated_addr is meaningless.
- *
- * Return: 0 if succeeded, or <0 if error.
  */
-int iova_tree_remove(IOVATree *tree, DMAMap *map);
+void iova_tree_remove(IOVATree *tree, DMAMap map);
 
 /**
  * iova_tree_find:
@@ -82,7 +81,7 @@ int iova_tree_remove(IOVATree *tree, DMAMap *map);
  * @tree: the iova tree to search from
  * @map: the mapping to search
  *
- * Search for a mapping in the iova tree that overlaps with the
+ * Search for a mapping in the iova tree that iova overlaps with the
  * mapping range specified.  Only the first found mapping will be
  * returned.
  *
@@ -92,7 +91,25 @@ int iova_tree_remove(IOVATree *tree, DMAMap *map);
  * user is responsible to make sure the pointer is valid (say, no
  * concurrent deletion in progress).
  */
-DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map);
+const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map);
+
+/**
+ * iova_tree_find_iova:
+ *
+ * @tree: the iova tree to search from
+ * @map: the mapping to search
+ *
+ * Search for a mapping in the iova tree that translated_addr overlaps with the
+ * mapping range specified.  Only the first found mapping will be
+ * returned.
+ *
+ * Return: DMAMap pointer if found, or NULL if not found.  Note that
+ * the returned DMAMap pointer is maintained internally.  User should
+ * only read the content but never modify or free the content.  Also,
+ * user is responsible to make sure the pointer is valid (say, no
+ * concurrent deletion in progress).
+ */
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map);
 
 /**
  * iova_tree_find_address:
@@ -105,13 +122,13 @@ DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map);
  *
  * Return: same as iova_tree_find().
  */
-DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova);
+const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova);
 
 /**
  * iova_tree_foreach:
  *
  * @tree: the iova tree to iterate on
- * @iterator: the interator for the mappings, return true to stop
+ * @iterator: the iterator for the mappings, return true to stop
  *
  * Iterate over the iova tree.
  *
@@ -119,6 +136,23 @@ DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova);
  */
 void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator);
 
+/**
+ * iova_tree_alloc_map:
+ *
+ * @tree: the iova tree to allocate from
+ * @map: the new map (as translated addr & size) to allocate in the iova region
+ * @iova_begin: the minimum address of the allocation
+ * @iova_end: the maximum addressable direction of the allocation
+ *
+ * Allocates a new region of a given size, between iova_min and iova_max.
+ *
+ * Return: Same as iova_tree_insert, but cannot overlap and can return error if
+ * iova tree is out of free contiguous range. The caller gets the assigned iova
+ * in map->iova.
+ */
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
+                        hwaddr iova_end);
+
 /**
  * iova_tree_destroy:
  *
index 32aabb1c60009825ca4a921b78643fcdc7468ed9..e502787dd87dc431c1d8fa8af92fc64808a28492 100644 (file)
@@ -40,27 +40,62 @@ typedef struct JobTxn JobTxn;
  * Long-running operation.
  */
 typedef struct Job {
+
+    /* Fields set at initialization (job_create), and never modified */
+
     /** The ID of the job. May be NULL for internal jobs. */
     char *id;
 
-    /** The type of this job. */
+    /**
+     * The type of this job.
+     * All callbacks are called with job_mutex *not* held.
+     */
     const JobDriver *driver;
 
-    /** Reference count of the block job */
-    int refcnt;
-
-    /** Current state; See @JobStatus for details. */
-    JobStatus status;
-
-    /** AioContext to run the job coroutine in */
-    AioContext *aio_context;
-
     /**
      * The coroutine that executes the job.  If not NULL, it is reentered when
      * busy is false and the job is cancelled.
+     * Initialized in job_start()
      */
     Coroutine *co;
 
+    /** True if this job should automatically finalize itself */
+    bool auto_finalize;
+
+    /** True if this job should automatically dismiss itself */
+    bool auto_dismiss;
+
+    /**
+     * The completion function that will be called when the job completes.
+     * Called with AioContext lock held, since many callback implementations
+     * use bdrv_* functions that require to hold the lock.
+     */
+    BlockCompletionFunc *cb;
+
+    /** The opaque value that is passed to the completion function.  */
+    void *opaque;
+
+    /* ProgressMeter API is thread-safe */
+    ProgressMeter progress;
+
+    /**
+     * AioContext to run the job coroutine in.
+     * The job Aiocontext can be read when holding *either*
+     * the BQL (so we are in the main loop) or the job_mutex.
+     * It can only be written when we hold *both* BQL
+     * and the job_mutex.
+     */
+    AioContext *aio_context;
+
+
+    /** Protected by job_mutex */
+
+    /** Reference count of the block job */
+    int refcnt;
+
+    /** Current state; See @JobStatus for details. */
+    JobStatus status;
+
     /**
      * Timer that is used by @job_sleep_ns. Accessed under job_mutex (in
      * job.c).
@@ -76,7 +111,7 @@ typedef struct Job {
     /**
      * Set to false by the job while the coroutine has yielded and may be
      * re-entered by job_enter(). There may still be I/O or event loop activity
-     * pending. Accessed under block_job_mutex (in blockjob.c).
+     * pending. Accessed under job_mutex.
      *
      * When the job is deferred to the main loop, busy is true as long as the
      * bottom half is still pending.
@@ -112,14 +147,6 @@ typedef struct Job {
     /** Set to true when the job has deferred work to the main loop. */
     bool deferred_to_main_loop;
 
-    /** True if this job should automatically finalize itself */
-    bool auto_finalize;
-
-    /** True if this job should automatically dismiss itself */
-    bool auto_dismiss;
-
-    ProgressMeter progress;
-
     /**
      * Return code from @run and/or @prepare callback(s).
      * Not final until the job has reached the CONCLUDED status.
@@ -134,12 +161,6 @@ typedef struct Job {
      */
     Error *err;
 
-    /** The completion function that will be called when the job completes.  */
-    BlockCompletionFunc *cb;
-
-    /** The opaque value that is passed to the completion function.  */
-    void *opaque;
-
     /** Notifiers called when a cancelled job is finalised */
     NotifierList on_finalize_cancelled;
 
@@ -167,8 +188,15 @@ typedef struct Job {
 
 /**
  * Callbacks and other information about a Job driver.
+ * All callbacks are invoked with job_mutex *not* held.
  */
 struct JobDriver {
+
+    /*
+     * These fields are initialized when this object is created,
+     * and are never changed afterwards
+     */
+
     /** Derived Job struct size */
     size_t instance_size;
 
@@ -184,9 +212,18 @@ struct JobDriver {
      * aborted. If it returns zero, the job moves into the WAITING state. If it
      * is the last job to complete in its transaction, all jobs in the
      * transaction move from WAITING to PENDING.
+     *
+     * This callback must be run in the job's context.
      */
     int coroutine_fn (*run)(Job *job, Error **errp);
 
+    /*
+     * Functions run without regard to the BQL that may run in any
+     * arbitrary thread. These functions do not need to be thread-safe
+     * because the caller ensures that they are invoked from one
+     * thread at time.
+     */
+
     /**
      * If the callback is not NULL, it will be invoked when the job transitions
      * into the paused state.  Paused jobs must not perform any asynchronous
@@ -201,6 +238,13 @@ struct JobDriver {
      */
     void coroutine_fn (*resume)(Job *job);
 
+    /*
+     * Global state (GS) API. These functions run under the BQL.
+     *
+     * See include/block/block-global-state.h for more information about
+     * the GS API.
+     */
+
     /**
      * Called when the job is resumed by the user (i.e. user_paused becomes
      * false). .user_resume is called before .resume.
@@ -220,6 +264,9 @@ struct JobDriver {
      *
      * This callback will not be invoked if the job has already failed.
      * If it fails, abort and then clean will be called.
+     *
+     * Called with AioContext lock held, since many callbacs implementations
+     * use bdrv_* functions that require to hold the lock.
      */
     int (*prepare)(Job *job);
 
@@ -230,6 +277,9 @@ struct JobDriver {
      *
      * All jobs will complete with a call to either .commit() or .abort() but
      * never both.
+     *
+     * Called with AioContext lock held, since many callback implementations
+     * use bdrv_* functions that require to hold the lock.
      */
     void (*commit)(Job *job);
 
@@ -240,6 +290,9 @@ struct JobDriver {
      *
      * All jobs will complete with a call to either .commit() or .abort() but
      * never both.
+     *
+     * Called with AioContext lock held, since many callback implementations
+     * use bdrv_* functions that require to hold the lock.
      */
     void (*abort)(Job *job);
 
@@ -248,11 +301,35 @@ struct JobDriver {
      * .commit() or .abort(). Regardless of which callback is invoked after
      * completion, .clean() will always be called, even if the job does not
      * belong to a transaction group.
+     *
+     * Called with AioContext lock held, since many callbacs implementations
+     * use bdrv_* functions that require to hold the lock.
      */
     void (*clean)(Job *job);
 
+    /**
+     * If the callback is not NULL, it will be invoked in job_cancel_async
+     *
+     * This function must return true if the job will be cancelled
+     * immediately without any further I/O (mandatory if @force is
+     * true), and false otherwise.  This lets the generic job layer
+     * know whether a job has been truly (force-)cancelled, or whether
+     * it is just in a special completion mode (like mirror after
+     * READY).
+     * (If the callback is NULL, the job is assumed to terminate
+     * without I/O.)
+     *
+     * Called with AioContext lock held, since many callback implementations
+     * use bdrv_* functions that require to hold the lock.
+     */
+    bool (*cancel)(Job *job, bool force);
 
-    /** Called when the job is freed */
+
+    /**
+     * Called when the job is freed.
+     * Called with AioContext lock held, since many callback implementations
+     * use bdrv_* functions that require to hold the lock.
+     */
     void (*free)(Job *job);
 };
 
@@ -267,6 +344,30 @@ typedef enum JobCreateFlags {
     JOB_MANUAL_DISMISS = 0x04,
 } JobCreateFlags;
 
+extern QemuMutex job_mutex;
+
+#define JOB_LOCK_GUARD() QEMU_LOCK_GUARD(&job_mutex)
+
+#define WITH_JOB_LOCK_GUARD() WITH_QEMU_LOCK_GUARD(&job_mutex)
+
+/**
+ * job_lock:
+ *
+ * Take the mutex protecting the list of jobs and their status.
+ * Most functions called by the monitor need to call job_lock
+ * and job_unlock manually.  On the other hand, function called
+ * by the block jobs themselves and by the block layer will take the
+ * lock for you.
+ */
+void job_lock(void);
+
+/**
+ * job_unlock:
+ *
+ * Release the mutex protecting the list of jobs and their status.
+ */
+void job_unlock(void);
+
 /**
  * Allocate and return a new job transaction. Jobs can be added to the
  * transaction using job_txn_add_job().
@@ -283,23 +384,20 @@ JobTxn *job_txn_new(void);
 /**
  * Release a reference that was previously acquired with job_txn_add_job or
  * job_txn_new. If it's the last reference to the object, it will be freed.
+ *
+ * Called with job lock *not* held.
  */
 void job_txn_unref(JobTxn *txn);
 
-/**
- * @txn: The transaction (may be NULL)
- * @job: Job to add to the transaction
- *
- * Add @job to the transaction.  The @job must not already be in a transaction.
- * The caller must call either job_txn_unref() or job_completed() to release
- * the reference that is automatically grabbed here.
- *
- * If @txn is NULL, the function does nothing.
+/*
+ * Same as job_txn_unref(), but called with job lock held.
+ * Might release the lock temporarily.
  */
-void job_txn_add_job(JobTxn *txn, Job *job);
+void job_txn_unref_locked(JobTxn *txn);
 
 /**
  * Create a new long-running job and return it.
+ * Called with job_mutex *not* held.
  *
  * @job_id: The id of the newly-created job, or %NULL for internal jobs
  * @driver: The class object for the newly-created job.
@@ -317,20 +415,27 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn,
 /**
  * Add a reference to Job refcnt, it will be decreased with job_unref, and then
  * be freed if it comes to be the last reference.
+ *
+ * Called with job lock held.
  */
-void job_ref(Job *job);
+void job_ref_locked(Job *job);
 
 /**
- * Release a reference that was previously acquired with job_ref() or
+ * Release a reference that was previously acquired with job_ref_locked() or
  * job_create(). If it's the last reference to the object, it will be freed.
+ *
+ * Takes AioContext lock internally to invoke a job->driver callback.
+ * Called with job lock held.
  */
-void job_unref(Job *job);
+void job_unref_locked(Job *job);
 
 /**
  * @job: The job that has made progress
  * @done: How much progress the job made since the last call
  *
  * Updates the progress counter of the job.
+ *
+ * May be called with mutex held or not held.
  */
 void job_progress_update(Job *job, uint64_t done);
 
@@ -341,6 +446,8 @@ void job_progress_update(Job *job, uint64_t done);
  *
  * Sets the expected end value of the progress counter of a job so that a
  * completion percentage can be calculated when the progress is updated.
+ *
+ * May be called with mutex held or not held.
  */
 void job_progress_set_remaining(Job *job, uint64_t remaining);
 
@@ -356,27 +463,27 @@ void job_progress_set_remaining(Job *job, uint64_t remaining);
  * length before, and job_progress_update() afterwards.
  * (So the operation acts as a parenthesis in regards to the main job
  * operation running in background.)
+ *
+ * May be called with mutex held or not held.
  */
 void job_progress_increase_remaining(Job *job, uint64_t delta);
 
-/** To be called when a cancelled job is finalised. */
-void job_event_cancelled(Job *job);
-
-/** To be called when a successfully completed job is finalised. */
-void job_event_completed(Job *job);
-
 /**
  * Conditionally enter the job coroutine if the job is ready to run, not
  * already busy and fn() returns true. fn() is called while under the job_lock
  * critical section.
+ *
+ * Called with job lock held, but might release it temporarily.
  */
-void job_enter_cond(Job *job, bool(*fn)(Job *job));
+void job_enter_cond_locked(Job *job, bool(*fn)(Job *job));
 
 /**
  * @job: A job that has not yet been started.
  *
  * Begins execution of a job.
  * Takes ownership of one reference to the job object.
+ *
+ * Called with job_mutex *not* held.
  */
 void job_start(Job *job);
 
@@ -384,6 +491,7 @@ void job_start(Job *job);
  * @job: The job to enter.
  *
  * Continue the specified job by entering the coroutine.
+ * Called with job_mutex *not* held.
  */
 void job_enter(Job *job);
 
@@ -392,6 +500,8 @@ void job_enter(Job *job);
  *
  * Pause now if job_pause() has been called. Jobs that perform lots of I/O
  * must call this between requests so that the job can be paused.
+ *
+ * Called with job_mutex *not* held.
  */
 void coroutine_fn job_pause_point(Job *job);
 
@@ -399,8 +509,9 @@ void coroutine_fn job_pause_point(Job *job);
  * @job: The job that calls the function.
  *
  * Yield the job coroutine.
+ * Called with job_mutex *not* held.
  */
-void job_yield(Job *job);
+void coroutine_fn job_yield(Job *job);
 
 /**
  * @job: The job that calls the function.
@@ -409,10 +520,11 @@ void job_yield(Job *job);
  * Put the job to sleep (assuming that it wasn't canceled) for @ns
  * %QEMU_CLOCK_REALTIME nanoseconds.  Canceling the job will immediately
  * interrupt the wait.
+ *
+ * Called with job_mutex *not* held.
  */
 void coroutine_fn job_sleep_ns(Job *job, int64_t ns);
 
-
 /** Returns the JobType of a given Job. */
 JobType job_type(const Job *job);
 
@@ -422,102 +534,165 @@ const char *job_type_str(const Job *job);
 /** Returns true if the job should not be visible to the management layer. */
 bool job_is_internal(Job *job);
 
-/** Returns whether the job is scheduled for cancellation. */
+/**
+ * Returns whether the job is being cancelled.
+ * Called with job_mutex *not* held.
+ */
 bool job_is_cancelled(Job *job);
 
-/** Returns whether the job is in a completed state. */
-bool job_is_completed(Job *job);
+/* Same as job_is_cancelled(), but called with job lock held. */
+bool job_is_cancelled_locked(Job *job);
+
+/**
+ * Returns whether the job is scheduled for cancellation (at an
+ * indefinite point).
+ * Called with job_mutex *not* held.
+ */
+bool job_cancel_requested(Job *job);
+
+/**
+ * Returns whether the job is in a completed state.
+ * Called with job lock held.
+ */
+bool job_is_completed_locked(Job *job);
 
-/** Returns whether the job is ready to be completed. */
+/**
+ * Returns whether the job is ready to be completed.
+ * Called with job_mutex *not* held.
+ */
 bool job_is_ready(Job *job);
 
+/* Same as job_is_ready(), but called with job lock held. */
+bool job_is_ready_locked(Job *job);
+
 /**
  * Request @job to pause at the next pause point. Must be paired with
  * job_resume(). If the job is supposed to be resumed by user action, call
- * job_user_pause() instead.
+ * job_user_pause_locked() instead.
+ *
+ * Called with job lock *not* held.
  */
 void job_pause(Job *job);
 
-/** Resumes a @job paused with job_pause. */
+/* Same as job_pause(), but called with job lock held. */
+void job_pause_locked(Job *job);
+
+/** Resumes a @job paused with job_pause. Called with job lock *not* held. */
 void job_resume(Job *job);
 
+/*
+ * Same as job_resume(), but called with job lock held.
+ * Might release the lock temporarily.
+ */
+void job_resume_locked(Job *job);
+
 /**
  * Asynchronously pause the specified @job.
  * Do not allow a resume until a matching call to job_user_resume.
+ * Called with job lock held.
  */
-void job_user_pause(Job *job, Error **errp);
+void job_user_pause_locked(Job *job, Error **errp);
 
-/** Returns true if the job is user-paused. */
-bool job_user_paused(Job *job);
+/**
+ * Returns true if the job is user-paused.
+ * Called with job lock held.
+ */
+bool job_user_paused_locked(Job *job);
 
 /**
  * Resume the specified @job.
- * Must be paired with a preceding job_user_pause.
+ * Must be paired with a preceding job_user_pause_locked.
+ * Called with job lock held, but might release it temporarily.
  */
-void job_user_resume(Job *job, Error **errp);
+void job_user_resume_locked(Job *job, Error **errp);
 
 /**
  * Get the next element from the list of block jobs after @job, or the
  * first one if @job is %NULL.
  *
  * Returns the requested job, or %NULL if there are no more jobs left.
+ * Called with job lock *not* held.
  */
 Job *job_next(Job *job);
 
+/* Same as job_next(), but called with job lock held. */
+Job *job_next_locked(Job *job);
+
 /**
  * Get the job identified by @id (which must not be %NULL).
  *
  * Returns the requested job, or %NULL if it doesn't exist.
+ * Called with job lock held.
  */
-Job *job_get(const char *id);
+Job *job_get_locked(const char *id);
 
 /**
  * Check whether the verb @verb can be applied to @job in its current state.
  * Returns 0 if the verb can be applied; otherwise errp is set and -EPERM
  * returned.
+ *
+ * Called with job lock held.
  */
-int job_apply_verb(Job *job, JobVerb verb, Error **errp);
+int job_apply_verb_locked(Job *job, JobVerb verb, Error **errp);
 
-/** The @job could not be started, free it. */
+/**
+ * The @job could not be started, free it.
+ * Called with job_mutex *not* held.
+ */
 void job_early_fail(Job *job);
 
-/** Moves the @job from RUNNING to READY */
+/**
+ * Moves the @job from RUNNING to READY.
+ * Called with job_mutex *not* held.
+ */
 void job_transition_to_ready(Job *job);
 
-/** Asynchronously complete the specified @job. */
-void job_complete(Job *job, Error **errp);
+/**
+ * Asynchronously complete the specified @job.
+ * Called with job lock held, but might release it temporarily.
+ */
+void job_complete_locked(Job *job, Error **errp);
 
 /**
  * Asynchronously cancel the specified @job. If @force is true, the job should
  * be cancelled immediately without waiting for a consistent state.
+ * Called with job lock held.
  */
-void job_cancel(Job *job, bool force);
+void job_cancel_locked(Job *job, bool force);
 
 /**
- * Cancels the specified job like job_cancel(), but may refuse to do so if the
- * operation isn't meaningful in the current state of the job.
+ * Cancels the specified job like job_cancel_locked(), but may refuse
+ * to do so if the operation isn't meaningful in the current state of the job.
+ * Called with job lock held.
  */
-void job_user_cancel(Job *job, bool force, Error **errp);
+void job_user_cancel_locked(Job *job, bool force, Error **errp);
 
 /**
  * Synchronously cancel the @job.  The completion callback is called
- * before the function returns.  The job may actually complete
- * instead of canceling itself; the circumstances under which this
- * happens depend on the kind of job that is active.
+ * before the function returns.  If @force is false, the job may
+ * actually complete instead of canceling itself; the circumstances
+ * under which this happens depend on the kind of job that is active.
  *
  * Returns the return value from the job if the job actually completed
  * during the call, or -ECANCELED if it was canceled.
  *
- * Callers must hold the AioContext lock of job->aio_context.
+ * Called with job_lock *not* held.
  */
-int job_cancel_sync(Job *job);
+int job_cancel_sync(Job *job, bool force);
+
+/* Same as job_cancel_sync, but called with job lock held. */
+int job_cancel_sync_locked(Job *job, bool force);
 
-/** Synchronously cancels all jobs using job_cancel_sync(). */
+/**
+ * Synchronously force-cancels all jobs using job_cancel_sync_locked().
+ *
+ * Called with job_lock *not* held.
+ */
 void job_cancel_sync_all(void);
 
 /**
  * @job: The job to be completed.
- * @errp: Error object which may be set by job_complete(); this is not
+ * @errp: Error object which may be set by job_complete_locked(); this is not
  *        necessarily set on every error, the job return value has to be
  *        checked as well.
  *
@@ -526,10 +701,9 @@ void job_cancel_sync_all(void);
  * function).
  *
  * Returns the return value from the job.
- *
- * Callers must hold the AioContext lock of job->aio_context.
+ * Called with job_lock held.
  */
-int job_complete_sync(Job *job, Error **errp);
+int job_complete_sync_locked(Job *job, Error **errp);
 
 /**
  * For a @job that has finished its work and is pending awaiting explicit
@@ -538,14 +712,18 @@ int job_complete_sync(Job *job, Error **errp);
  * FIXME: Make the below statement universally true:
  * For jobs that support the manual workflow mode, all graph changes that occur
  * as a result will occur after this command and before a successful reply.
+ *
+ * Called with job lock held.
  */
-void job_finalize(Job *job, Error **errp);
+void job_finalize_locked(Job *job, Error **errp);
 
 /**
  * Remove the concluded @job from the query list and resets the passed pointer
  * to %NULL. Returns an error if the job is not actually concluded.
+ *
+ * Called with job lock held.
  */
-void job_dismiss(Job **job, Error **errp);
+void job_dismiss_locked(Job **job, Error **errp);
 
 /**
  * Synchronously finishes the given @job. If @finish is given, it is called to
@@ -554,8 +732,20 @@ void job_dismiss(Job **job, Error **errp);
  * Returns 0 if the job is successfully completed, -ECANCELED if the job was
  * cancelled before completing, and -errno in other error cases.
  *
- * Callers must hold the AioContext lock of job->aio_context.
+ * Called with job_lock held, but might release it temporarily.
+ */
+int job_finish_sync_locked(Job *job, void (*finish)(Job *, Error **errp),
+                           Error **errp);
+
+/**
+ * Sets the @job->aio_context.
+ * Called with job_mutex *not* held.
+ *
+ * This function must run in the main thread to protect against
+ * concurrent read in job_finish_sync_locked(), takes the job_mutex
+ * lock to protect against the read in job_do_yield_locked(), and must
+ * be called when the job is quiescent.
  */
-int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp);
+void job_set_aio_context(Job *job, AioContext *ctx);
 
 #endif
diff --git a/include/qemu/keyval.h b/include/qemu/keyval.h
new file mode 100644 (file)
index 0000000..1187b68
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef KEYVAL_H
+#define KEYVAL_H
+
+QDict *keyval_parse_into(QDict *qdict, const char *params, const char *implied_key,
+                         bool *p_help, Error **errp);
+QDict *keyval_parse(const char *params, const char *implied_key,
+                    bool *help, Error **errp);
+void keyval_merge(QDict *old, const QDict *new, Error **errp);
+
+#endif /* KEYVAL_H */
index b62002314180ccb0bc73fbfec5ddb6fada3cbcc9..9823220446d9bea927d23f6a91524fcf5c06000b 100644 (file)
@@ -13,7 +13,7 @@
 #ifndef QEMU_LOCKABLE_H
 #define QEMU_LOCKABLE_H
 
-#include "qemu/coroutine.h"
+#include "qemu/coroutine-core.h"
 #include "qemu/thread.h"
 
 typedef void QemuLockUnlockFunc(void *);
@@ -24,79 +24,71 @@ struct QemuLockable {
     QemuLockUnlockFunc *unlock;
 };
 
-/* This function gives an error if an invalid, non-NULL pointer type is passed
- * to QEMU_MAKE_LOCKABLE.  For optimized builds, we can rely on dead-code elimination
- * from the compiler, and give the errors already at link time.
- */
-#if defined(__OPTIMIZE__) && !defined(__SANITIZE_ADDRESS__)
-void unknown_lock_type(void *);
-#else
-static inline void unknown_lock_type(void *unused)
-{
-    abort();
-}
-#endif
-
 static inline __attribute__((__always_inline__)) QemuLockable *
 qemu_make_lockable(void *x, QemuLockable *lockable)
 {
-    /* We cannot test this in a macro, otherwise we get compiler
+    /*
+     * We cannot test this in a macro, otherwise we get compiler
      * warnings like "the address of 'm' will always evaluate as 'true'".
      */
     return x ? lockable : NULL;
 }
 
-/* Auxiliary macros to simplify QEMU_MAKE_LOCABLE.  */
-#define QEMU_LOCK_FUNC(x) ((QemuLockUnlockFunc *)    \
-    QEMU_GENERIC(x,                                  \
-                 (QemuMutex *, qemu_mutex_lock),     \
-                 (QemuRecMutex *, qemu_rec_mutex_lock), \
-                 (CoMutex *, qemu_co_mutex_lock),    \
-                 (QemuSpin *, qemu_spin_lock),       \
-                 unknown_lock_type))
-
-#define QEMU_UNLOCK_FUNC(x) ((QemuLockUnlockFunc *)  \
-    QEMU_GENERIC(x,                                  \
-                 (QemuMutex *, qemu_mutex_unlock),   \
-                 (QemuRecMutex *, qemu_rec_mutex_unlock), \
-                 (CoMutex *, qemu_co_mutex_unlock),  \
-                 (QemuSpin *, qemu_spin_unlock),     \
-                 unknown_lock_type))
-
-/* In C, compound literals have the lifetime of an automatic variable.
+static inline __attribute__((__always_inline__)) QemuLockable *
+qemu_null_lockable(void *x)
+{
+    if (x != NULL) {
+        qemu_build_not_reached();
+    }
+    return NULL;
+}
+
+/*
+ * In C, compound literals have the lifetime of an automatic variable.
  * In C++ it would be different, but then C++ wouldn't need QemuLockable
  * either...
  */
-#define QEMU_MAKE_LOCKABLE_(x) (&(QemuLockable) {     \
-        .object = (x),                               \
-        .lock = QEMU_LOCK_FUNC(x),                   \
-        .unlock = QEMU_UNLOCK_FUNC(x),               \
+#define QML_OBJ_(x, name) (&(QemuLockable) {                            \
+        .object = (x),                                                  \
+        .lock = (QemuLockUnlockFunc *) qemu_ ## name ## _lock,          \
+        .unlock = (QemuLockUnlockFunc *) qemu_ ## name ## _unlock       \
     })
 
-/* QEMU_MAKE_LOCKABLE - Make a polymorphic QemuLockable
+/**
+ * QEMU_MAKE_LOCKABLE - Make a polymorphic QemuLockable
  *
- * @x: a lock object (currently one of QemuMutex, QemuRecMutex, CoMutex, QemuSpin).
+ * @x: a lock object (currently one of QemuMutex, QemuRecMutex,
+ *     CoMutex, QemuSpin).
  *
  * Returns a QemuLockable object that can be passed around
  * to a function that can operate with locks of any kind, or
  * NULL if @x is %NULL.
+ *
+ * Note the special case for void *, so that we may pass "NULL".
  */
-#define QEMU_MAKE_LOCKABLE(x)                        \
-    QEMU_GENERIC(x,                                  \
-                 (QemuLockable *, (x)),              \
-                 qemu_make_lockable((x), QEMU_MAKE_LOCKABLE_(x)))
+#define QEMU_MAKE_LOCKABLE(x)                                           \
+    _Generic((x), QemuLockable *: (x),                                  \
+             void *: qemu_null_lockable(x),                             \
+             QemuMutex *: qemu_make_lockable(x, QML_OBJ_(x, mutex)),    \
+             QemuRecMutex *: qemu_make_lockable(x, QML_OBJ_(x, rec_mutex)), \
+             CoMutex *: qemu_make_lockable(x, QML_OBJ_(x, co_mutex)),   \
+             QemuSpin *: qemu_make_lockable(x, QML_OBJ_(x, spin)))
 
-/* QEMU_MAKE_LOCKABLE_NONNULL - Make a polymorphic QemuLockable
+/**
+ * QEMU_MAKE_LOCKABLE_NONNULL - Make a polymorphic QemuLockable
  *
- * @x: a lock object (currently one of QemuMutex, QemuRecMutex, CoMutex, QemuSpin).
+ * @x: a lock object (currently one of QemuMutex, QemuRecMutex,
+ *     CoMutex, QemuSpin).
  *
  * Returns a QemuLockable object that can be passed around
  * to a function that can operate with locks of any kind.
  */
-#define QEMU_MAKE_LOCKABLE_NONNULL(x)                \
-    QEMU_GENERIC(x,                                  \
-                 (QemuLockable *, (x)),              \
-                 QEMU_MAKE_LOCKABLE_(x))
+#define QEMU_MAKE_LOCKABLE_NONNULL(x)                           \
+    _Generic((x), QemuLockable *: (x),                          \
+                  QemuMutex *: QML_OBJ_(x, mutex),              \
+                  QemuRecMutex *: QML_OBJ_(x, rec_mutex),       \
+                  CoMutex *: QML_OBJ_(x, co_mutex),             \
+                  QemuSpin *: QML_OBJ_(x, spin))
 
 static inline void qemu_lockable_lock(QemuLockable *x)
 {
index 2f0a5b080eabd6f26b390e9ee6f4c2fba8ff5935..d47c9cd4462b7e9ec9bf4e5bebe4a91e30107624 100644 (file)
@@ -30,6 +30,6 @@ static inline bool qemu_loglevel_mask(int mask)
 }
 
 /* main logging function */
-int GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...);
+void G_GNUC_PRINTF(1, 2) qemu_log(const char *fmt, ...);
 
 #endif
index 9b8066020729a7d027fd4eab9ab39de79ce7b5cc..df59bfabcd5f1fce7279a0b307c6ef1b221f3b35 100644 (file)
@@ -3,46 +3,16 @@
 
 /* A small part of this API is split into its own header */
 #include "qemu/log-for-trace.h"
-#include "qemu/rcu.h"
-
-typedef struct QemuLogFile {
-    struct rcu_head rcu;
-    FILE *fd;
-} QemuLogFile;
-
-/* Private global variable, don't use */
-extern QemuLogFile *qemu_logfile;
-
 
 /* 
  * The new API:
- *
  */
 
-/* Log settings checking macros: */
-
-/* Returns true if qemu_log() will really write somewhere
- */
-static inline bool qemu_log_enabled(void)
-{
-    return qemu_logfile != NULL;
-}
+/* Returns true if qemu_log() will really write somewhere. */
+bool qemu_log_enabled(void);
 
-/* Returns true if qemu_log() will write somewhere else than stderr
- */
-static inline bool qemu_log_separate(void)
-{
-    QemuLogFile *logfile;
-    bool res = false;
-
-    rcu_read_lock();
-    logfile = qatomic_rcu_read(&qemu_logfile);
-    if (logfile && logfile->fd != stderr) {
-        res = true;
-    }
-    rcu_read_unlock();
-    return res;
-}
+/* Returns true if qemu_log() will write somewhere other than stderr. */
+bool qemu_log_separate(void);
 
 #define CPU_LOG_TB_OUT_ASM (1 << 0)
 #define CPU_LOG_TB_IN_ASM  (1 << 1)
@@ -64,51 +34,16 @@ static inline bool qemu_log_separate(void)
 #define CPU_LOG_PLUGIN     (1 << 18)
 /* LOG_STRACE is used for user-mode strace logging. */
 #define LOG_STRACE         (1 << 19)
+#define LOG_PER_THREAD     (1 << 20)
+#define CPU_LOG_TB_VPU     (1 << 21)
 
-/* Lock output for a series of related logs.  Since this is not needed
- * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
- * assume that qemu_loglevel_mask has already been tested, and that
- * qemu_loglevel is never set when qemu_logfile is unset.
- */
+/* Lock/unlock output. */
 
-static inline FILE *qemu_log_lock(void)
-{
-    QemuLogFile *logfile;
-    rcu_read_lock();
-    logfile = qatomic_rcu_read(&qemu_logfile);
-    if (logfile) {
-        qemu_flockfile(logfile->fd);
-        return logfile->fd;
-    } else {
-        return NULL;
-    }
-}
-
-static inline void qemu_log_unlock(FILE *fd)
-{
-    if (fd) {
-        qemu_funlockfile(fd);
-    }
-    rcu_read_unlock();
-}
+FILE *qemu_log_trylock(void) G_GNUC_WARN_UNUSED_RESULT;
+void qemu_log_unlock(FILE *fd);
 
 /* Logging functions: */
 
-/* vfprintf-like logging function
- */
-static inline void GCC_FMT_ATTR(1, 0)
-qemu_log_vprintf(const char *fmt, va_list va)
-{
-    QemuLogFile *logfile;
-
-    rcu_read_lock();
-    logfile = qatomic_rcu_read(&qemu_logfile);
-    if (logfile) {
-        vfprintf(logfile->fd, fmt, va);
-    }
-    rcu_read_unlock();
-}
-
 /* log only if a bit is set on the current loglevel mask:
  * @mask: bit to check in the mask
  * @fmt: printf-style format string
@@ -147,9 +82,9 @@ typedef struct QEMULogItem {
 
 extern const QEMULogItem qemu_log_items[];
 
-void qemu_set_log(int log_flags);
-void qemu_log_needs_buffers(void);
-void qemu_set_log_filename(const char *filename, Error **errp);
+bool qemu_set_log(int log_flags, Error **errp);
+bool qemu_set_log_filename(const char *filename, Error **errp);
+bool qemu_set_log_filename_flags(const char *name, int flags, Error **errp);
 void qemu_set_dfilter_ranges(const char *ranges, Error **errp);
 bool qemu_log_in_addr_range(uint64_t addr);
 int qemu_str_to_log_mask(const char *str);
@@ -159,9 +94,4 @@ int qemu_str_to_log_mask(const char *str);
  */
 void qemu_print_log_usage(FILE *f);
 
-/* fflush() the log file */
-void qemu_log_flush(void);
-/* Close the log file */
-void qemu_log_close(void);
-
 #endif
diff --git a/include/qemu/madvise.h b/include/qemu/madvise.h
new file mode 100644 (file)
index 0000000..e155f59
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * QEMU madvise wrapper functions
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_MADVISE_H
+#define QEMU_MADVISE_H
+
+#define QEMU_MADV_INVALID -1
+
+#if defined(CONFIG_MADVISE)
+
+#define QEMU_MADV_WILLNEED  MADV_WILLNEED
+#define QEMU_MADV_DONTNEED  MADV_DONTNEED
+#ifdef MADV_DONTFORK
+#define QEMU_MADV_DONTFORK  MADV_DONTFORK
+#else
+#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
+#endif
+#ifdef MADV_MERGEABLE
+#define QEMU_MADV_MERGEABLE MADV_MERGEABLE
+#else
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#endif
+#ifdef MADV_UNMERGEABLE
+#define QEMU_MADV_UNMERGEABLE MADV_UNMERGEABLE
+#else
+#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
+#endif
+#ifdef MADV_DODUMP
+#define QEMU_MADV_DODUMP MADV_DODUMP
+#else
+#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
+#endif
+#ifdef MADV_DONTDUMP
+#define QEMU_MADV_DONTDUMP MADV_DONTDUMP
+#else
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#endif
+#ifdef MADV_HUGEPAGE
+#define QEMU_MADV_HUGEPAGE MADV_HUGEPAGE
+#else
+#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
+#endif
+#ifdef MADV_NOHUGEPAGE
+#define QEMU_MADV_NOHUGEPAGE MADV_NOHUGEPAGE
+#else
+#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
+#endif
+#ifdef MADV_REMOVE
+#define QEMU_MADV_REMOVE MADV_REMOVE
+#else
+#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
+#endif
+#ifdef MADV_POPULATE_WRITE
+#define QEMU_MADV_POPULATE_WRITE MADV_POPULATE_WRITE
+#else
+#define QEMU_MADV_POPULATE_WRITE QEMU_MADV_INVALID
+#endif
+
+#elif defined(CONFIG_POSIX_MADVISE)
+
+#define QEMU_MADV_WILLNEED  POSIX_MADV_WILLNEED
+#define QEMU_MADV_DONTNEED  POSIX_MADV_DONTNEED
+#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
+#define QEMU_MADV_NOHUGEPAGE  QEMU_MADV_INVALID
+#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
+#define QEMU_MADV_POPULATE_WRITE QEMU_MADV_INVALID
+
+#else /* no-op */
+
+#define QEMU_MADV_WILLNEED  QEMU_MADV_INVALID
+#define QEMU_MADV_DONTNEED  QEMU_MADV_INVALID
+#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
+#define QEMU_MADV_NOHUGEPAGE  QEMU_MADV_INVALID
+#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
+#define QEMU_MADV_POPULATE_WRITE QEMU_MADV_INVALID
+
+#endif
+
+int qemu_madvise(void *addr, size_t len, int advice);
+
+#endif
index d6892fd2081a4774677fdb141e42aa1fef1827bc..68e70e61aa59c81256a46b7fae793a45ab99e8a2 100644 (file)
 #define QEMU_MAIN_LOOP_H
 
 #include "block/aio.h"
+#include "qom/object.h"
+#include "sysemu/event-loop-base.h"
 
 #define SIG_IPI SIGUSR1
 
+#define TYPE_MAIN_LOOP  "main-loop"
+OBJECT_DECLARE_TYPE(MainLoop, MainLoopClass, MAIN_LOOP)
+
+struct MainLoop {
+    EventLoopBase parent_obj;
+};
+typedef struct MainLoop MainLoop;
+
 /**
  * qemu_init_main_loop: Set up the process so that it can run the main loop.
  *
@@ -147,6 +157,8 @@ typedef void WaitObjectFunc(void *opaque);
  * in the main loop's calls to WaitForMultipleObjects.  When the handle
  * is in a signaled state, QEMU will call @func.
  *
+ * If the same HANDLE is added twice, this function returns -1.
+ *
  * @handle: The Windows handle to be observed.
  * @func: A function to be called when @handle is in a signaled state.
  * @opaque: A pointer-size value that is passed to @func.
@@ -234,24 +246,6 @@ void event_notifier_set_handler(EventNotifier *e,
 
 GSource *iohandler_get_g_source(void);
 AioContext *iohandler_get_aio_context(void);
-#ifdef CONFIG_POSIX
-/**
- * qemu_add_child_watch: Register a child process for reaping.
- *
- * Under POSIX systems, a parent process must read the exit status of
- * its child processes using waitpid, or the operating system will not
- * free some of the resources attached to that process.
- *
- * This function directs the QEMU main loop to observe a child process
- * and call waitpid as soon as it exits; the watch is then removed
- * automatically.  It is useful whenever QEMU forks a child process
- * but will find out about its termination by other means such as a
- * "broken pipe".
- *
- * @pid: The pid that QEMU should observe.
- */
-int qemu_add_child_watch(pid_t pid);
-#endif
 
 /**
  * qemu_mutex_iothread_locked: Return lock status of the main loop mutex.
@@ -260,9 +254,63 @@ int qemu_add_child_watch(pid_t pid);
  * must always be taken outside other locks.  This function helps
  * functions take different paths depending on whether the current
  * thread is running within the main loop mutex.
+ *
+ * This function should never be used in the block layer, because
+ * unit tests, block layer tools and qemu-storage-daemon do not
+ * have a BQL.
+ * Please instead refer to qemu_in_main_thread().
  */
 bool qemu_mutex_iothread_locked(void);
 
+/**
+ * qemu_in_main_thread: return whether it's possible to safely access
+ * the global state of the block layer.
+ *
+ * Global state of the block layer is not accessible from I/O threads
+ * or worker threads; only from threads that "own" the default
+ * AioContext that qemu_get_aio_context() returns.  For tests, block
+ * layer tools and qemu-storage-daemon there is a designated thread that
+ * runs the event loop for qemu_get_aio_context(), and that is the
+ * main thread.
+ *
+ * For emulators, however, any thread that holds the BQL can act
+ * as the block layer main thread; this will be any of the actual
+ * main thread, the vCPU threads or the RCU thread.
+ *
+ * For clarity, do not use this function outside the block layer.
+ */
+bool qemu_in_main_thread(void);
+
+/*
+ * Mark and check that the function is part of the Global State API.
+ * Please refer to include/block/block-global-state.h for more
+ * information about GS API.
+ */
+#define GLOBAL_STATE_CODE()                                         \
+    do {                                                            \
+        assert(qemu_in_main_thread());                              \
+    } while (0)
+
+/*
+ * Mark and check that the function is part of the I/O API.
+ * Please refer to include/block/block-io.h for more
+ * information about IO API.
+ */
+#define IO_CODE()                                                   \
+    do {                                                            \
+        /* nop */                                                   \
+    } while (0)
+
+/*
+ * Mark and check that the function is part of the "I/O OR GS" API.
+ * Please refer to include/block/block-io.h for more
+ * information about "IO or GS" API.
+ */
+#define IO_OR_GS_CODE()                                             \
+    do {                                                            \
+        /* nop */                                                   \
+    } while (0)
+
 /**
  * qemu_mutex_lock_iothread: Lock the main loop mutex.
  *
@@ -295,6 +343,35 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line);
  */
 void qemu_mutex_unlock_iothread(void);
 
+/**
+ * QEMU_IOTHREAD_LOCK_GUARD
+ *
+ * Wrap a block of code in a conditional qemu_mutex_{lock,unlock}_iothread.
+ */
+typedef struct IOThreadLockAuto IOThreadLockAuto;
+
+static inline IOThreadLockAuto *qemu_iothread_auto_lock(const char *file,
+                                                        int line)
+{
+    if (qemu_mutex_iothread_locked()) {
+        return NULL;
+    }
+    qemu_mutex_lock_iothread_impl(file, line);
+    /* Anything non-NULL causes the cleanup function to be called */
+    return (IOThreadLockAuto *)(uintptr_t)1;
+}
+
+static inline void qemu_iothread_auto_unlock(IOThreadLockAuto *l)
+{
+    qemu_mutex_unlock_iothread();
+}
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(IOThreadLockAuto, qemu_iothread_auto_unlock)
+
+#define QEMU_IOTHREAD_LOCK_GUARD() \
+    g_autoptr(IOThreadLockAuto) _iothread_lock_auto __attribute__((unused)) \
+        = qemu_iothread_auto_lock(__FILE__, __LINE__)
+
 /*
  * qemu_cond_wait_iothread: Wait on condition for the main loop mutex
  *
@@ -310,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
 
 /* internal interfaces */
 
-void qemu_fd_register(int fd);
-
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
+#define qemu_bh_new_guarded(cb, opaque, guard) \
+    qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard)
+#define qemu_bh_new(cb, opaque) \
+    qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
+                         MemReentrancyGuard *reentrancy_guard);
 void qemu_bh_schedule_idle(QEMUBH *bh);
 
 enum {
diff --git a/include/qemu/memalign.h b/include/qemu/memalign.h
new file mode 100644 (file)
index 0000000..fa299f3
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Allocation and free functions for aligned memory
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_MEMALIGN_H
+#define QEMU_MEMALIGN_H
+
+/**
+ * qemu_try_memalign: Allocate aligned memory
+ * @alignment: required alignment, in bytes
+ * @size: size of allocation, in bytes
+ *
+ * Allocate memory on an aligned boundary (i.e. the returned
+ * address will be an exact multiple of @alignment).
+ * @alignment must be a power of 2, or the function will assert().
+ * On success, returns allocated memory; on failure, returns NULL.
+ *
+ * The memory allocated through this function must be freed via
+ * qemu_vfree() (and not via free()).
+ */
+void *qemu_try_memalign(size_t alignment, size_t size);
+/**
+ * qemu_memalign: Allocate aligned memory, without failing
+ * @alignment: required alignment, in bytes
+ * @size: size of allocation, in bytes
+ *
+ * Allocate memory in the same way as qemu_try_memalign(), but
+ * abort() with an error message if the memory allocation fails.
+ *
+ * The memory allocated through this function must be freed via
+ * qemu_vfree() (and not via free()).
+ */
+void *qemu_memalign(size_t alignment, size_t size);
+/**
+ * qemu_vfree: Free memory allocated through qemu_memalign
+ * @ptr: memory to free
+ *
+ * This function must be used to free memory allocated via qemu_memalign()
+ * or qemu_try_memalign(). (Using the wrong free function will cause
+ * subtle bugs on Windows hosts.)
+ */
+void qemu_vfree(void *ptr);
+/*
+ * It's an analog of GLIB's g_autoptr_cleanup_generic_gfree(), used to define
+ * g_autofree macro.
+ */
+static inline void qemu_cleanup_generic_vfree(void *p)
+{
+  void **pp = (void **)p;
+  qemu_vfree(*pp);
+}
+
+/*
+ * Analog of g_autofree, but qemu_vfree is called on cleanup instead of g_free.
+ */
+#define QEMU_AUTO_VFREE __attribute__((cleanup(qemu_cleanup_generic_vfree)))
+
+#endif
index e786266b9237a98f5bdef2ed70c283c1e34e5897..8344daaa03f2d48819fdd3d6f1fef862b8eacd3a 100644 (file)
@@ -1,21 +1,32 @@
 #ifndef QEMU_MMAP_ALLOC_H
 #define QEMU_MMAP_ALLOC_H
 
+typedef enum {
+    QEMU_FS_TYPE_UNKNOWN = 0,
+    QEMU_FS_TYPE_TMPFS,
+    QEMU_FS_TYPE_HUGETLBFS,
+    QEMU_FS_TYPE_NUM,
+} QemuFsType;
 
 size_t qemu_fd_getpagesize(int fd);
-
-size_t qemu_mempath_getpagesize(const char *mem_path);
+QemuFsType qemu_fd_getfs(int fd);
 
 /**
- * qemu_ram_mmap: mmap the specified file or device.
+ * qemu_ram_mmap: mmap anonymous memory, the specified file or device.
+ *
+ * mmap() abstraction to map guest RAM, simplifying flag handling, taking
+ * care of alignment requirements and installing guard pages.
  *
  * Parameters:
  *  @fd: the file or the device to mmap
  *  @size: the number of bytes to be mmaped
  *  @align: if not zero, specify the alignment of the starting mapping address;
  *          otherwise, the alignment in use will be determined by QEMU.
- *  @shared: map has RAM_SHARED flag.
- *  @is_pmem: map has RAM_PMEM flag.
+ *  @qemu_map_flags: QEMU_MAP_* flags
+ *  @map_offset: map starts at offset of map_offset from the start of fd
+ *
+ * Internally, MAP_PRIVATE, MAP_ANONYMOUS and MAP_SHARED_VALIDATE are set
+ * implicitly based on other parameters.
  *
  * Return:
  *  On success, return a pointer to the mapped area.
@@ -24,9 +35,32 @@ size_t qemu_mempath_getpagesize(const char *mem_path);
 void *qemu_ram_mmap(int fd,
                     size_t size,
                     size_t align,
-                    bool shared,
-                    bool is_pmem);
+                    uint32_t qemu_map_flags,
+                    off_t map_offset);
 
 void qemu_ram_munmap(int fd, void *ptr, size_t size);
 
+/*
+ * Abstraction of PROT_ and MAP_ flags as passed to mmap(), for example,
+ * consumed by qemu_ram_mmap().
+ */
+
+/* Map PROT_READ instead of PROT_READ | PROT_WRITE. */
+#define QEMU_MAP_READONLY   (1 << 0)
+
+/* Use MAP_SHARED instead of MAP_PRIVATE. */
+#define QEMU_MAP_SHARED     (1 << 1)
+
+/*
+ * Use MAP_SYNC | MAP_SHARED_VALIDATE if supported. Ignored without
+ * QEMU_MAP_SHARED. If mapping fails, warn and fallback to !QEMU_MAP_SYNC.
+ */
+#define QEMU_MAP_SYNC       (1 << 2)
+
+/*
+ * Use MAP_NORESERVE to skip reservation of swap space (or huge pages if
+ * applicable). Bail out if not supported/effective.
+ */
+#define QEMU_MAP_NORESERVE  (1 << 3)
+
 #endif
index 4b42dd285eeac1ba12e5c9e18ac08b3eddc253e5..c37ce74b16ffe57550fab68b0f2ddfa1b70fd6e9 100644 (file)
@@ -61,17 +61,132 @@ typedef enum {
 #define fuzz_target_init(function) module_init(function, \
                                                MODULE_INIT_FUZZ_TARGET)
 #define migration_init(function) module_init(function, MODULE_INIT_MIGRATION)
-#define block_module_load_one(lib) module_load_one("block-", lib, false)
-#define ui_module_load_one(lib) module_load_one("ui-", lib, false)
-#define audio_module_load_one(lib) module_load_one("audio-", lib, false)
+#define block_module_load(lib, errp) module_load("block-", lib, errp)
+#define ui_module_load(lib, errp) module_load("ui-", lib, errp)
+#define audio_module_load(lib, errp) module_load("audio-", lib, errp)
 
 void register_module_init(void (*fn)(void), module_init_type type);
 void register_dso_module_init(void (*fn)(void), module_init_type type);
 
 void module_call_init(module_init_type type);
-bool module_load_one(const char *prefix, const char *lib_name, bool mayfail);
-void module_load_qom_one(const char *type);
+
+/*
+ * module_load: attempt to load a module from a set of directories
+ *
+ * directories searched are:
+ * - getenv("QEMU_MODULE_DIR")
+ * - get_relocated_path(CONFIG_QEMU_MODDIR);
+ * - /var/run/qemu/${version_dir}
+ *
+ * prefix:         a subsystem prefix, or the empty string ("audio-", ..., "")
+ * name:           name of the module
+ * errp:           error to set in case the module is found, but load failed.
+ *
+ * Return value:   -1 on error (errp set if not NULL).
+ *                 0 if module or one of its dependencies are not installed,
+ *                 1 if the module is found and loaded,
+ *                 2 if the module is already loaded, or module is built-in.
+ */
+int module_load(const char *prefix, const char *name, Error **errp);
+
+/*
+ * module_load_qom: attempt to load a module to provide a QOM type
+ *
+ * type:           the type to be provided
+ * errp:           error to set.
+ *
+ * Return value:   as per module_load.
+ */
+int module_load_qom(const char *type, Error **errp);
 void module_load_qom_all(void);
-int module_load_check(const char *name);
+void module_allow_arch(const char *arch);
+
+/**
+ * DOC: module info annotation macros
+ *
+ * ``scripts/modinfo-collect.py`` will collect module info,
+ * using the preprocessor and -DQEMU_MODINFO.
+ *
+ * ``scripts/modinfo-generate.py`` will create a module meta-data database
+ * from the collected information so qemu knows about module
+ * dependencies and QOM objects implemented by modules.
+ *
+ * See ``*.modinfo`` and ``modinfo.c`` in the build directory to check the
+ * script results.
+ */
+#ifdef QEMU_MODINFO
+# define modinfo(kind, value) \
+    MODINFO_START kind value MODINFO_END
+#else
+# define modinfo(kind, value)
+#endif
+
+/**
+ * module_obj
+ *
+ * @name: QOM type.
+ *
+ * This module implements QOM type @name.
+ */
+#define module_obj(name) modinfo(obj, name)
+
+/**
+ * module_dep
+ *
+ * @name: module name
+ *
+ * This module depends on module @name.
+ */
+#define module_dep(name) modinfo(dep, name)
+
+/**
+ * module_arch
+ *
+ * @name: target architecture
+ *
+ * This module is for target architecture @arch.
+ *
+ * Note that target-dependent modules are tagged automatically, so
+ * this is only needed in case target-independent modules should be
+ * restricted.  Use case example: the ccw bus is implemented by s390x
+ * only.
+ */
+#define module_arch(name) modinfo(arch, name)
+
+/**
+ * module_opts
+ *
+ * @name: QemuOpts name
+ *
+ * This module registers QemuOpts @name.
+ */
+#define module_opts(name) modinfo(opts, name)
+
+/**
+ * module_kconfig
+ *
+ * @name: Kconfig requirement necessary to load the module
+ *
+ * This module requires a core module that should be implemented and
+ * enabled in Kconfig.
+ */
+#define module_kconfig(name) modinfo(kconfig, name)
+
+/*
+ * module info database
+ *
+ * scripts/modinfo-generate.c will build this using the data collected
+ * by scripts/modinfo-collect.py
+ */
+typedef struct QemuModinfo QemuModinfo;
+struct QemuModinfo {
+    const char *name;
+    const char *arch;
+    const char **objs;
+    const char **deps;
+    const char **opts;
+};
+extern const QemuModinfo qemu_modinfo[];
+void module_init_info(const QemuModinfo *info);
 
 #endif
diff --git a/include/qemu/mprotect.h b/include/qemu/mprotect.h
new file mode 100644 (file)
index 0000000..1e83d14
--- /dev/null
@@ -0,0 +1,14 @@
+/*
+ * QEMU mprotect functions
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_MPROTECT_H
+#define QEMU_MPROTECT_H
+
+int qemu_mprotect_rw(void *addr, size_t size);
+int qemu_mprotect_rwx(void *addr, size_t size);
+int qemu_mprotect_none(void *addr, size_t size);
+
+#endif
index 4b8b198ba761b64cc370722cb55149dc54859273..5f45774c2ca535cb085ca2fe31ec82646565c8fd 100644 (file)
@@ -1,7 +1,6 @@
 #ifndef NVDIMM_UTILS_H
 #define NVDIMM_UTILS_H
 
-#include "qemu/osdep.h"
 
 GSList *nvdimm_get_device_list(void);
 #endif
index ac69352e0eb080d06ff4633a5f9b4c0187621c21..b3498287823831530f2abdb64e96fbe430371c5e 100644 (file)
@@ -119,8 +119,6 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
                            int fail_if_exists, Error **errp);
 void qemu_opts_reset(QemuOptsList *list);
 void qemu_opts_loc_restore(QemuOpts *opts);
-bool qemu_opts_set(QemuOptsList *list, const char *id,
-                   const char *name, const char *value, Error **errp);
 const char *qemu_opts_id(QemuOpts *opts);
 void qemu_opts_set_id(QemuOpts *opts, char *id);
 void qemu_opts_del(QemuOpts *opts);
@@ -131,8 +129,6 @@ QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params,
                                   bool permit_abbrev);
 QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params,
                           bool permit_abbrev, Error **errp);
-void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
-                            int permit_abbrev);
 QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
                                Error **errp);
 QDict *qemu_opts_to_qdict_filtered(QemuOpts *opts, QDict *qdict,
@@ -148,7 +144,6 @@ void qemu_opts_print_help(QemuOptsList *list, bool print_caption);
 void qemu_opts_free(QemuOptsList *list);
 QemuOptsList *qemu_opts_append(QemuOptsList *dst, QemuOptsList *list);
 
-QDict *keyval_parse(const char *params, const char *implied_key,
-                    bool *help, Error **errp);
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QemuOpts, qemu_opts_del)
 
 #endif
index f9ec8c84e903e4da4bc10844c87c364a0ba8843e..d30ba73eda2c7900b4ba66d576a58753b3bde8dd 100644 (file)
 #ifndef QEMU_OSDEP_H
 #define QEMU_OSDEP_H
 
+#if !defined _FORTIFY_SOURCE && defined __OPTIMIZE__ && __OPTIMIZE__ && defined __linux__
+# define _FORTIFY_SOURCE 2
+#endif
+
 #include "config-host.h"
 #ifdef NEED_CPU_H
 #include CONFIG_TARGET
 #include "exec/poison.h"
 #endif
 
+/*
+ * HOST_WORDS_BIGENDIAN was replaced with HOST_BIG_ENDIAN. Prevent it from
+ * creeping back in.
+ */
+#pragma GCC poison HOST_WORDS_BIGENDIAN
+
+/*
+ * TARGET_WORDS_BIGENDIAN was replaced with TARGET_BIG_ENDIAN. Prevent it from
+ * creeping back in.
+ */
+#pragma GCC poison TARGET_WORDS_BIGENDIAN
+
 #include "qemu/compiler.h"
 
 /* Older versions of C++ don't get definitions of various macros from
 #define daemon qemu_fake_daemon_function
 #include <stdlib.h>
 #undef daemon
-extern int daemon(int, int);
+QEMU_EXTERN_C int daemon(int, int);
 #endif
 
 #ifdef _WIN32
 /* as defined in sdkddkver.h */
 #ifndef _WIN32_WINNT
-#define _WIN32_WINNT 0x0600 /* Vista */
+#define _WIN32_WINNT 0x0602 /* Windows 8 API (should be >= the one from glib) */
 #endif
 /* reduces the number of implicitly included headers */
 #ifndef WIN32_LEAN_AND_MEAN
@@ -76,6 +92,19 @@ extern int daemon(int, int);
 #define __USE_MINGW_ANSI_STDIO 1
 #endif
 
+/*
+ * We need the FreeBSD "legacy" definitions. Rust needs the FreeBSD 11 system
+ * calls since it doesn't use libc at all, so we have to emulate that despite
+ * FreeBSD 11 being EOL'd.
+ */
+#ifdef __FreeBSD__
+#define _WANT_FREEBSD11_STAT
+#define _WANT_FREEBSD11_STATFS
+#define _WANT_FREEBSD11_DIRENT
+#define _WANT_KERNEL_ERRNO
+#define _WANT_SEMUN
+#endif
+
 #include <stdarg.h>
 #include <stddef.h>
 #include <stdbool.h>
@@ -104,8 +133,13 @@ extern int daemon(int, int);
 #include <setjmp.h>
 #include <signal.h>
 
-#ifdef HAVE_SYS_SIGNAL_H
-#include <sys/signal.h>
+#ifdef CONFIG_IOVEC
+#include <sys/uio.h>
+#endif
+
+#if defined(__linux__) && defined(__sparc__)
+/* The SPARC definition of QEMU_VMALLOC_ALIGN needs SHMLBA */
+#include <sys/shm.h>
 #endif
 
 #ifndef _WIN32
@@ -115,6 +149,17 @@ extern int daemon(int, int);
 #define WEXITSTATUS(x) (x)
 #endif
 
+#ifdef __APPLE__
+#include <AvailabilityMacros.h>
+#endif
+
+/*
+ * This is somewhat like a system header; it must be outside any extern "C"
+ * block because it includes system headers itself, including glib.h,
+ * which will not compile if inside an extern "C" block.
+ */
+#include "glib-compat.h"
+
 #ifdef _WIN32
 #include "sysemu/os-win32.h"
 #endif
@@ -123,9 +168,72 @@ extern int daemon(int, int);
 #include "sysemu/os-posix.h"
 #endif
 
-#include "glib-compat.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include "qemu/typedefs.h"
 
+/**
+ * Mark a function that executes in coroutine context
+ *
+ * Functions that execute in coroutine context cannot be called directly from
+ * normal functions.  In the future it would be nice to enable compiler or
+ * static checker support for catching such errors.  This annotation might make
+ * it possible and in the meantime it serves as documentation.
+ *
+ * For example:
+ *
+ *   static void coroutine_fn foo(void) {
+ *       ....
+ *   }
+ */
+#ifdef __clang__
+#define coroutine_fn QEMU_ANNOTATE("coroutine_fn")
+#else
+#define coroutine_fn
+#endif
+
+/**
+ * Mark a function that can suspend when executed in coroutine context,
+ * but can handle running in non-coroutine context too.
+ */
+#ifdef __clang__
+#define coroutine_mixed_fn QEMU_ANNOTATE("coroutine_mixed_fn")
+#else
+#define coroutine_mixed_fn
+#endif
+
+/**
+ * Mark a function that should not be called from a coroutine context.
+ * Usually there will be an analogous, coroutine_fn function that should
+ * be used instead.
+ *
+ * When the function is also marked as coroutine_mixed_fn, the function should
+ * only be called if the caller does not know whether it is in coroutine
+ * context.
+ *
+ * Functions that are only no_coroutine_fn, on the other hand, should not
+ * be called from within coroutines at all.  This for example includes
+ * functions that block.
+ *
+ * In the future it would be nice to enable compiler or static checker
+ * support for catching such errors.  This annotation is the first step
+ * towards this, and in the meantime it serves as documentation.
+ *
+ * For example:
+ *
+ *   static void no_coroutine_fn foo(void) {
+ *       ....
+ *   }
+ */
+#ifdef __clang__
+#define no_coroutine_fn QEMU_ANNOTATE("no_coroutine_fn")
+#else
+#define no_coroutine_fn
+#endif
+
+
 /*
  * For mingw, as of v6.0.0, the function implementing the assert macro is
  * not marked as noreturn, so the compiler cannot delete code following an
@@ -138,6 +246,31 @@ extern int daemon(int, int);
 #define assert(x)  g_assert(x)
 #endif
 
+/**
+ * qemu_build_not_reached()
+ *
+ * The compiler, during optimization, is expected to prove that a call
+ * to this function cannot be reached and remove it.  If the compiler
+ * supports QEMU_ERROR, this will be reported at compile time; otherwise
+ * this will be reported at link time due to the missing symbol.
+ */
+G_NORETURN
+void QEMU_ERROR("code path is reachable")
+    qemu_build_not_reached_always(void);
+#if defined(__OPTIMIZE__) && !defined(__NO_INLINE__)
+#define qemu_build_not_reached()  qemu_build_not_reached_always()
+#else
+#define qemu_build_not_reached()  g_assert_not_reached()
+#endif
+
+/**
+ * qemu_build_assert()
+ *
+ * The compiler, during optimization, is expected to prove that the
+ * assertion is true.
+ */
+#define qemu_build_assert(test)  while (!(test)) qemu_build_not_reached()
+
 /*
  * According to waitpid man page:
  * WCOREDUMP
@@ -173,8 +306,8 @@ extern int daemon(int, int);
 #ifndef MAP_ANONYMOUS
 #define MAP_ANONYMOUS MAP_ANON
 #endif
-#ifndef MAP_FIXED_NOREPLACE
-#define MAP_FIXED_NOREPLACE 0
+#ifndef MAP_NORESERVE
+#define MAP_NORESERVE 0
 #endif
 #ifndef ENOMEDIUM
 #define ENOMEDIUM ENODEV
@@ -192,6 +325,14 @@ extern int daemon(int, int);
 #define ESHUTDOWN 4099
 #endif
 
+#define RETRY_ON_EINTR(expr) \
+    (__extension__                                          \
+        ({ typeof(expr) __result;                               \
+           do {                                             \
+                __result = (expr);         \
+           } while (__result == -1 && errno == EINTR);     \
+           __result; }))
+
 /* time_t may be either 32 or 64 bits depending on the host OS, and
  * can be either signed or unsigned, so we can't just hardcode a
  * specific maximum value. This is not a C preprocessor constant,
@@ -222,19 +363,10 @@ extern int daemon(int, int);
 #define TIME_MAX TYPE_MAXIMUM(time_t)
 #endif
 
-/* HOST_LONG_BITS is the size of a native pointer in bits. */
-#if UINTPTR_MAX == UINT32_MAX
-# define HOST_LONG_BITS 32
-#elif UINTPTR_MAX == UINT64_MAX
-# define HOST_LONG_BITS 64
-#else
-# error Unknown pointer size
-#endif
-
 /* Mac OSX has a <stdint.h> bug that incorrectly defines SIZE_MAX with
  * the wrong type. Our replacement isn't usable in preprocessor
  * expressions, but it is sufficient for our needs. */
-#if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX
+#ifdef HAVE_BROKEN_SIZE_MAX
 #undef SIZE_MAX
 #define SIZE_MAX ((size_t)-1)
 #endif
@@ -255,19 +387,28 @@ extern int daemon(int, int);
  * determined by the pre-processor instead of the compiler, you'll
  * have to open-code it.  Sadly, Coverity is severely confused by the
  * constant variants, so we have to dumb things down there.
+ *
+ * Preprocessor sorcery ahead: use different identifiers for the local
+ * variables in each expansion, so we can nest macro calls without
+ * shadowing variables.
  */
-#undef MIN
-#define MIN(a, b)                                       \
+#define MIN_INTERNAL(a, b, _a, _b)                      \
     ({                                                  \
         typeof(1 ? (a) : (b)) _a = (a), _b = (b);       \
         _a < _b ? _a : _b;                              \
     })
-#undef MAX
-#define MAX(a, b)                                       \
+#undef MIN
+#define MIN(a, b) \
+    MIN_INTERNAL((a), (b), MAKE_IDENTFIER(_a), MAKE_IDENTFIER(_b))
+
+#define MAX_INTERNAL(a, b, _a, _b)                      \
     ({                                                  \
         typeof(1 ? (a) : (b)) _a = (a), _b = (b);       \
         _a > _b ? _a : _b;                              \
     })
+#undef MAX
+#define MAX(a, b) \
+    MAX_INTERNAL((a), (b), MAKE_IDENTFIER(_a), MAKE_IDENTFIER(_b))
 
 #ifdef __COVERITY__
 # define MIN_CONST(a, b) ((a) < (b) ? (a) : (b))
@@ -288,20 +429,29 @@ extern int daemon(int, int);
 /*
  * Minimum function that returns zero only if both values are zero.
  * Intended for use with unsigned values only.
+ *
+ * Preprocessor sorcery ahead: use different identifiers for the local
+ * variables in each expansion, so we can nest macro calls without
+ * shadowing variables.
  */
-#ifndef MIN_NON_ZERO
-#define MIN_NON_ZERO(a, b)                              \
+#define MIN_NON_ZERO_INTERNAL(a, b, _a, _b)             \
     ({                                                  \
         typeof(1 ? (a) : (b)) _a = (a), _b = (b);       \
         _a == 0 ? _b : (_b == 0 || _b > _a) ? _a : _b;  \
     })
-#endif
+#define MIN_NON_ZERO(a, b) \
+    MIN_NON_ZERO_INTERNAL((a), (b), MAKE_IDENTFIER(_a), MAKE_IDENTFIER(_b))
 
-/* Round number down to multiple */
+/*
+ * Round number down to multiple. Safe when m is not a power of 2 (see
+ * ROUND_DOWN for a faster version when a power of 2 is guaranteed).
+ */
 #define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m))
 
-/* Round number up to multiple. Safe when m is not a power of 2 (see
- * ROUND_UP for a faster version when a power of 2 is guaranteed) */
+/*
+ * Round number up to multiple. Safe when m is not a power of 2 (see
+ * ROUND_UP for a faster version when a power of 2 is guaranteed).
+ */
 #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m))
 
 /* Check if n is a multiple of m */
@@ -318,11 +468,22 @@ extern int daemon(int, int);
 /* Check if pointer p is n-bytes aligned */
 #define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n))
 
-/* Round number up to multiple. Requires that d be a power of 2 (see
+/*
+ * Round number down to multiple. Requires that d be a power of 2 (see
+ * QEMU_ALIGN_UP for a safer but slower version on arbitrary
+ * numbers); works even if d is a smaller type than n.
+ */
+#ifndef ROUND_DOWN
+#define ROUND_DOWN(n, d) ((n) & -(0 ? (n) : (d)))
+#endif
+
+/*
+ * Round number up to multiple. Requires that d be a power of 2 (see
  * QEMU_ALIGN_UP for a safer but slower version on arbitrary
- * numbers); works even if d is a smaller type than n.  */
+ * numbers); works even if d is a smaller type than n.
+ */
 #ifndef ROUND_UP
-#define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d)))
+#define ROUND_UP(n, d) ROUND_DOWN((n) + (d) - 1, (d))
 #endif
 
 #ifndef DIV_ROUND_UP
@@ -341,87 +502,10 @@ extern int daemon(int, int);
 #endif
 
 int qemu_daemon(int nochdir, int noclose);
-void *qemu_try_memalign(size_t alignment, size_t size);
-void *qemu_memalign(size_t alignment, size_t size);
-void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared);
-void qemu_vfree(void *ptr);
+void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
+                          bool noreserve);
 void qemu_anon_ram_free(void *ptr, size_t size);
 
-#define QEMU_MADV_INVALID -1
-
-#if defined(CONFIG_MADVISE)
-
-#define QEMU_MADV_WILLNEED  MADV_WILLNEED
-#define QEMU_MADV_DONTNEED  MADV_DONTNEED
-#ifdef MADV_DONTFORK
-#define QEMU_MADV_DONTFORK  MADV_DONTFORK
-#else
-#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
-#endif
-#ifdef MADV_MERGEABLE
-#define QEMU_MADV_MERGEABLE MADV_MERGEABLE
-#else
-#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
-#endif
-#ifdef MADV_UNMERGEABLE
-#define QEMU_MADV_UNMERGEABLE MADV_UNMERGEABLE
-#else
-#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
-#endif
-#ifdef MADV_DODUMP
-#define QEMU_MADV_DODUMP MADV_DODUMP
-#else
-#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
-#endif
-#ifdef MADV_DONTDUMP
-#define QEMU_MADV_DONTDUMP MADV_DONTDUMP
-#else
-#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
-#endif
-#ifdef MADV_HUGEPAGE
-#define QEMU_MADV_HUGEPAGE MADV_HUGEPAGE
-#else
-#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
-#endif
-#ifdef MADV_NOHUGEPAGE
-#define QEMU_MADV_NOHUGEPAGE MADV_NOHUGEPAGE
-#else
-#define QEMU_MADV_NOHUGEPAGE QEMU_MADV_INVALID
-#endif
-#ifdef MADV_REMOVE
-#define QEMU_MADV_REMOVE MADV_REMOVE
-#else
-#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
-#endif
-
-#elif defined(CONFIG_POSIX_MADVISE)
-
-#define QEMU_MADV_WILLNEED  POSIX_MADV_WILLNEED
-#define QEMU_MADV_DONTNEED  POSIX_MADV_DONTNEED
-#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
-#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
-#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
-#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
-#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
-#define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
-#define QEMU_MADV_NOHUGEPAGE  QEMU_MADV_INVALID
-#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
-
-#else /* no-op */
-
-#define QEMU_MADV_WILLNEED  QEMU_MADV_INVALID
-#define QEMU_MADV_DONTNEED  QEMU_MADV_INVALID
-#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
-#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
-#define QEMU_MADV_UNMERGEABLE QEMU_MADV_INVALID
-#define QEMU_MADV_DODUMP QEMU_MADV_INVALID
-#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
-#define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
-#define QEMU_MADV_NOHUGEPAGE  QEMU_MADV_INVALID
-#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
-
-#endif
-
 #ifdef _WIN32
 #define HAVE_CHARDEV_SERIAL 1
 #elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__)    \
@@ -430,15 +514,18 @@ void qemu_anon_ram_free(void *ptr, size_t size);
 #define HAVE_CHARDEV_SERIAL 1
 #endif
 
-#if defined(__linux__) || defined(__FreeBSD__) ||               \
-    defined(__FreeBSD_kernel__) || defined(__DragonFly__)
-#define HAVE_CHARDEV_PARPORT 1
-#endif
-
 #if defined(__HAIKU__)
 #define SIGIO SIGPOLL
 #endif
 
+#ifdef HAVE_MADVISE_WITHOUT_PROTOTYPE
+/*
+ * See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for discussion
+ * about Solaris missing the madvise() prototype.
+ */
+int madvise(char *, size_t, int);
+#endif
+
 #if defined(CONFIG_LINUX)
 #ifndef BUS_MCEERR_AR
 #define BUS_MCEERR_AR 4
@@ -459,10 +546,9 @@ void qemu_anon_ram_free(void *ptr, size_t size);
    /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
 #  define QEMU_VMALLOC_ALIGN (256 * 4096)
 #elif defined(__linux__) && defined(__sparc__)
-#include <sys/shm.h>
-#  define QEMU_VMALLOC_ALIGN MAX(qemu_real_host_page_size, SHMLBA)
+#  define QEMU_VMALLOC_ALIGN MAX(qemu_real_host_page_size(), SHMLBA)
 #else
-#  define QEMU_VMALLOC_ALIGN qemu_real_host_page_size
+#  define QEMU_VMALLOC_ALIGN qemu_real_host_page_size()
 #endif
 
 #ifdef CONFIG_POSIX
@@ -493,10 +579,6 @@ void sigaction_invoke(struct sigaction *action,
                       struct qemu_signalfd_siginfo *info);
 #endif
 
-int qemu_madvise(void *addr, size_t len, int advice);
-int qemu_mprotect_rwx(void *addr, size_t size);
-int qemu_mprotect_none(void *addr, size_t size);
-
 /*
  * Don't introduce new usage of this function, prefer the following
  * qemu_open/qemu_create that take an "Error **errp"
@@ -539,8 +621,6 @@ struct iovec {
 
 ssize_t readv(int fd, const struct iovec *iov, int iov_cnt);
 ssize_t writev(int fd, const struct iovec *iov, int iov_cnt);
-#else
-#include <sys/uio.h>
 #endif
 
 #ifdef _WIN32
@@ -560,45 +640,18 @@ static inline void qemu_timersub(const struct timeval *val1,
 #define qemu_timersub timersub
 #endif
 
-void qemu_set_cloexec(int fd);
-
-/* Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default
- * instead of QEMU_VERSION, so setting hw_version on MachineClass
- * is no longer mandatory.
- *
- * Do NOT change this string, or it will break compatibility on all
- * machine classes that don't set hw_version.
- */
-#define QEMU_HW_VERSION "2.5+"
+ssize_t qemu_write_full(int fd, const void *buf, size_t count)
+    G_GNUC_WARN_UNUSED_RESULT;
 
-/* QEMU "hardware version" setting. Used to replace code that exposed
- * QEMU_VERSION to guests in the past and need to keep compatibility.
- * Do not use qemu_hw_version() in new code.
- */
-void qemu_set_hw_version(const char *);
-const char *qemu_hw_version(void);
-
-void fips_set_state(bool requested);
-bool fips_get_state(void);
+void qemu_set_cloexec(int fd);
 
-/* Return a dynamically allocated pathname denoting a file or directory that is
- * appropriate for storing local state.
- *
- * @relative_pathname need not start with a directory separator; one will be
- * added automatically.
+/* Return a dynamically allocated directory path that is appropriate for storing
+ * local state.
  *
  * The caller is responsible for releasing the value returned with g_free()
  * after use.
  */
-char *qemu_get_local_state_pathname(const char *relative_pathname);
-
-/* Find program directory, and save it for later usage with
- * qemu_get_exec_dir().
- * Try OS specific API first, if not working, parse from argv0. */
-void qemu_init_exec_dir(const char *argv0);
-
-/* Get the saved exec dir.  */
-const char *qemu_get_exec_dir(void);
+char *qemu_get_local_state_dir(void);
 
 /**
  * qemu_getauxval:
@@ -611,8 +664,23 @@ unsigned long qemu_getauxval(unsigned long type);
 
 void qemu_set_tty_echo(int fd, bool echo);
 
-void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus,
-                     Error **errp);
+typedef struct ThreadContext ThreadContext;
+
+/**
+ * qemu_prealloc_mem:
+ * @fd: the fd mapped into the area, -1 for anonymous memory
+ * @area: start address of the are to preallocate
+ * @sz: the size of the area to preallocate
+ * @max_threads: maximum number of threads to use
+ * @errp: returns an error if this function fails
+ *
+ * Preallocate memory (populate/prefault page tables writable) for the virtual
+ * memory area starting at @area with the size of @sz. After a successful call,
+ * each page in the area was faulted in writable at least once, for example,
+ * after allocating file blocks for mapped files.
+ */
+void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
+                       ThreadContext *tc, Error **errp);
 
 /**
  * qemu_get_pid_name:
@@ -624,30 +692,18 @@ void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus,
  */
 char *qemu_get_pid_name(pid_t pid);
 
-/**
- * qemu_fork:
- *
- * A version of fork that avoids signal handler race
- * conditions that can lead to child process getting
- * signals that are otherwise only expected by the
- * parent. It also resets all signal handlers to the
- * default settings.
- *
- * Returns 0 to child process, pid number to parent
- * or -1 on failure.
- */
-pid_t qemu_fork(Error **errp);
-
 /* Using intptr_t ensures that qemu_*_page_mask is sign-extended even
  * when intptr_t is 32-bit and we are aligning a long long.
  */
-extern uintptr_t qemu_real_host_page_size;
-extern intptr_t qemu_real_host_page_mask;
+static inline uintptr_t qemu_real_host_page_size(void)
+{
+    return getpagesize();
+}
 
-extern int qemu_icache_linesize;
-extern int qemu_icache_linesize_log;
-extern int qemu_dcache_linesize;
-extern int qemu_dcache_linesize_log;
+static inline intptr_t qemu_real_host_page_mask(void)
+{
+    return -(intptr_t)qemu_real_host_page_size();
+}
 
 /*
  * After using getopt or getopt_long, if you need to parse another set
@@ -664,15 +720,20 @@ static inline void qemu_reset_optind(void)
 #endif
 }
 
+int qemu_fdatasync(int fd);
+
 /**
- * qemu_get_host_name:
- * @errp: Error object
- *
- * Operating system agnostic way of querying host name.
+ * Sync changes made to the memory mapped file back to the backing
+ * storage. For POSIX compliant systems this will fallback
+ * to regular msync call. Otherwise it will trigger whole file sync
+ * (including the metadata case there is no support to skip that otherwise)
  *
- * Returns allocated hostname (caller should free), NULL on failure.
+ * @addr   - start of the memory area to be synced
+ * @length - length of the are to be synced
+ * @fd     - file descriptor for the file to be synced
+ *           (mandatory only for POSIX non-compliant systems)
  */
-char *qemu_get_host_name(Error **errp);
+int qemu_msync(void *addr, size_t length, int fd);
 
 /**
  * qemu_get_host_physmem:
@@ -686,4 +747,50 @@ char *qemu_get_host_name(Error **errp);
  */
 size_t qemu_get_host_physmem(void);
 
+/*
+ * Toggle write/execute on the pages marked MAP_JIT
+ * for the current thread.
+ */
+#if defined(MAC_OS_VERSION_11_0) && \
+    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
+static inline void qemu_thread_jit_execute(void)
+{
+    pthread_jit_write_protect_np(true);
+}
+
+static inline void qemu_thread_jit_write(void)
+{
+    pthread_jit_write_protect_np(false);
+}
+#else
+static inline void qemu_thread_jit_write(void) {}
+static inline void qemu_thread_jit_execute(void) {}
+#endif
+
+/**
+ * Platforms which do not support system() return ENOSYS
+ */
+#ifndef HAVE_SYSTEM_FUNCTION
+#define system platform_does_not_support_system
+static inline int platform_does_not_support_system(const char *command)
+{
+    errno = ENOSYS;
+    return -1;
+}
+#endif /* !HAVE_SYSTEM_FUNCTION */
+
+/**
+ * If the load average was unobtainable, -1 is returned
+ */
+#ifndef HAVE_GETLOADAVG_FUNCTION
+static inline int getloadavg(double loadavg[], int nelem)
+{
+    return -1;
+}
+#endif /* !HAVE_GETLOADAVG_FUNCTION */
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif
diff --git a/include/qemu/plugin-event.h b/include/qemu/plugin-event.h
new file mode 100644 (file)
index 0000000..7056d84
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_PLUGIN_EVENT_H
+#define QEMU_PLUGIN_EVENT_H
+
+/*
+ * Events that plugins can subscribe to.
+ */
+enum qemu_plugin_event {
+    QEMU_PLUGIN_EV_VCPU_INIT,
+    QEMU_PLUGIN_EV_VCPU_EXIT,
+    QEMU_PLUGIN_EV_VCPU_TB_TRANS,
+    QEMU_PLUGIN_EV_VCPU_IDLE,
+    QEMU_PLUGIN_EV_VCPU_RESUME,
+    QEMU_PLUGIN_EV_VCPU_SYSCALL,
+    QEMU_PLUGIN_EV_VCPU_SYSCALL_RET,
+    QEMU_PLUGIN_EV_FLUSH,
+    QEMU_PLUGIN_EV_ATEXIT,
+    QEMU_PLUGIN_EV_MAX, /* total number of plugin events we support */
+};
+
+#endif /* QEMU_PLUGIN_EVENT_H */
index fbbe99474bd2b3717d04a56918390df1223ae9dc..71c1123308e0f3b35062262369f1911c9dd078ba 100644 (file)
@@ -6,21 +6,17 @@
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
-#ifndef _PLUGIN_MEMORY_H_
-#define _PLUGIN_MEMORY_H_
+#ifndef PLUGIN_MEMORY_H
+#define PLUGIN_MEMORY_H
+
+#include "exec/cpu-defs.h"
+#include "exec/hwaddr.h"
 
 struct qemu_plugin_hwaddr {
     bool is_io;
     bool is_store;
-    union {
-        struct {
-            MemoryRegionSection *section;
-            hwaddr    offset;
-        } io;
-        struct {
-            uint64_t hostaddr;
-        } ram;
-    } v;
+    hwaddr phys_addr;
+    MemoryRegion *mr;
 };
 
 /**
@@ -34,7 +30,7 @@ struct qemu_plugin_hwaddr {
  * It would only fail if not called from an instrumented memory access
  * which would be an abuse of the API.
  */
-bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
+bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
                        bool is_store, struct qemu_plugin_hwaddr *data);
 
-#endif /* _PLUGIN_MEMORY_H_ */
+#endif /* PLUGIN_MEMORY_H */
index ab790ad105c6688780cfb46dcd110053f15da03a..7fdc3a4849ffb3ae5322b32fbcf05d91438cba44 100644 (file)
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "qemu/option.h"
-
-/*
- * Events that plugins can subscribe to.
- */
-enum qemu_plugin_event {
-    QEMU_PLUGIN_EV_VCPU_INIT,
-    QEMU_PLUGIN_EV_VCPU_EXIT,
-    QEMU_PLUGIN_EV_VCPU_TB_TRANS,
-    QEMU_PLUGIN_EV_VCPU_IDLE,
-    QEMU_PLUGIN_EV_VCPU_RESUME,
-    QEMU_PLUGIN_EV_VCPU_SYSCALL,
-    QEMU_PLUGIN_EV_VCPU_SYSCALL_RET,
-    QEMU_PLUGIN_EV_FLUSH,
-    QEMU_PLUGIN_EV_ATEXIT,
-    QEMU_PLUGIN_EV_MAX, /* total number of plugin events we support */
-};
+#include "qemu/plugin-event.h"
+#include "exec/memopidx.h"
+#include "hw/core/cpu.h"
 
 /*
  * Option parsing/processing.
@@ -36,6 +23,25 @@ enum qemu_plugin_event {
 struct qemu_plugin_desc;
 typedef QTAILQ_HEAD(, qemu_plugin_desc) QemuPluginList;
 
+/*
+ * Construct a qemu_plugin_meminfo_t.
+ */
+static inline qemu_plugin_meminfo_t
+make_plugin_meminfo(MemOpIdx oi, enum qemu_plugin_mem_rw rw)
+{
+    return oi | (rw << 16);
+}
+
+/*
+ * Extract the memory operation direction from a qemu_plugin_meminfo_t.
+ * Other portions may be extracted via get_memop and get_mmuidx.
+ */
+static inline enum qemu_plugin_mem_rw
+get_plugin_meminfo_rw(qemu_plugin_meminfo_t i)
+{
+    return i >> 16;
+}
+
 #ifdef CONFIG_PLUGIN
 extern QemuOptsList qemu_plugin_opts;
 
@@ -44,8 +50,8 @@ static inline void qemu_plugin_add_opts(void)
     qemu_add_opts(&qemu_plugin_opts);
 }
 
-void qemu_plugin_opt_parse(const char *optarg, QemuPluginList *head);
-int qemu_plugin_load_list(QemuPluginList *head);
+void qemu_plugin_opt_parse(const char *optstr, QemuPluginList *head);
+int qemu_plugin_load_list(QemuPluginList *head, Error **errp);
 
 union qemu_plugin_cb_sig {
     qemu_plugin_simple_cb_t          simple;
@@ -79,7 +85,6 @@ enum plugin_dyn_cb_subtype {
 struct qemu_plugin_dyn_cb {
     union qemu_plugin_cb_sig f;
     void *userp;
-    unsigned tcg_flags;
     enum plugin_dyn_cb_subtype type;
     /* @rw applies to mem callbacks only (both regular and inline) */
     enum qemu_plugin_mem_rw rw;
@@ -92,13 +97,18 @@ struct qemu_plugin_dyn_cb {
     };
 };
 
+/* Internal context for instrumenting an instruction */
 struct qemu_plugin_insn {
     GByteArray *data;
     uint64_t vaddr;
     void *haddr;
     GArray *cbs[PLUGIN_N_CB_TYPES][PLUGIN_N_CB_SUBTYPES];
     bool calls_helpers;
+
+    /* if set, the instruction calls helpers that might access guest memory */
     bool mem_helper;
+
+    bool mem_only;
 };
 
 /*
@@ -128,6 +138,7 @@ static inline struct qemu_plugin_insn *qemu_plugin_insn_alloc(void)
     return insn;
 }
 
+/* Internal context for this TranslationBlock */
 struct qemu_plugin_tb {
     GPtrArray *insns;
     size_t n;
@@ -135,15 +146,22 @@ struct qemu_plugin_tb {
     uint64_t vaddr2;
     void *haddr1;
     void *haddr2;
+    bool mem_only;
+
+    /* if set, the TB calls helpers that might access guest memory */
+    bool mem_helper;
+
     GArray *cbs[PLUGIN_N_CB_SUBTYPES];
 };
 
 /**
  * qemu_plugin_tb_insn_get(): get next plugin record for translation.
- *
+ * @tb: the internal tb context
+ * @pc: address of instruction
  */
 static inline
-struct qemu_plugin_insn *qemu_plugin_tb_insn_get(struct qemu_plugin_tb *tb)
+struct qemu_plugin_insn *qemu_plugin_tb_insn_get(struct qemu_plugin_tb *tb,
+                                                 uint64_t pc)
 {
     struct qemu_plugin_insn *insn;
     int i, j;
@@ -156,6 +174,7 @@ struct qemu_plugin_insn *qemu_plugin_tb_insn_get(struct qemu_plugin_tb *tb)
     g_byte_array_set_size(insn->data, 0);
     insn->calls_helpers = false;
     insn->mem_helper = false;
+    insn->vaddr = pc;
 
     for (i = 0; i < PLUGIN_N_CB_TYPES; i++) {
         for (j = 0; j < PLUGIN_N_CB_SUBTYPES; j++) {
@@ -177,7 +196,8 @@ qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1,
                          uint64_t a6, uint64_t a7, uint64_t a8);
 void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret);
 
-void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, uint32_t meminfo);
+void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
+                             MemOpIdx oi, enum qemu_plugin_mem_rw rw);
 
 void qemu_plugin_flush_cb(void);
 
@@ -185,21 +205,51 @@ void qemu_plugin_atexit_cb(void);
 
 void qemu_plugin_add_dyn_cb_arr(GArray *arr);
 
-void qemu_plugin_disable_mem_helpers(CPUState *cpu);
+static inline void qemu_plugin_disable_mem_helpers(CPUState *cpu)
+{
+    cpu->plugin_mem_cbs = NULL;
+}
+
+/**
+ * qemu_plugin_user_exit(): clean-up callbacks before calling exit callbacks
+ *
+ * This is a user-mode only helper that ensure we have fully cleared
+ * callbacks from all threads before calling the exit callbacks. This
+ * is so the plugins themselves don't have to jump through hoops to
+ * guard against race conditions.
+ */
+void qemu_plugin_user_exit(void);
+
+/**
+ * qemu_plugin_user_prefork_lock(): take plugin lock before forking
+ *
+ * This is a user-mode only helper to take the internal plugin lock
+ * before a fork event. This is ensure a consistent lock state
+ */
+void qemu_plugin_user_prefork_lock(void);
+
+/**
+ * qemu_plugin_user_postfork(): reset the plugin lock
+ * @is_child: is this thread the child
+ *
+ * This user-mode only helper resets the lock state after a fork so we
+ * can continue using the plugin interface.
+ */
+void qemu_plugin_user_postfork(bool is_child);
 
 #else /* !CONFIG_PLUGIN */
 
 static inline void qemu_plugin_add_opts(void)
 { }
 
-static inline void qemu_plugin_opt_parse(const char *optarg,
+static inline void qemu_plugin_opt_parse(const char *optstr,
                                          QemuPluginList *head)
 {
     error_report("plugin interface not enabled in this build");
     exit(1);
 }
 
-static inline int qemu_plugin_load_list(QemuPluginList *head)
+static inline int qemu_plugin_load_list(QemuPluginList *head, Error **errp)
 {
     return 0;
 }
@@ -231,7 +281,8 @@ void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret)
 { }
 
 static inline void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
-                                           uint32_t meminfo)
+                                           MemOpIdx oi,
+                                           enum qemu_plugin_mem_rw rw)
 { }
 
 static inline void qemu_plugin_flush_cb(void)
@@ -247,6 +298,15 @@ void qemu_plugin_add_dyn_cb_arr(GArray *arr)
 static inline void qemu_plugin_disable_mem_helpers(CPUState *cpu)
 { }
 
+static inline void qemu_plugin_user_exit(void)
+{ }
+
+static inline void qemu_plugin_user_prefork_lock(void)
+{ }
+
+static inline void qemu_plugin_user_postfork(bool is_child)
+{ }
+
 #endif /* !CONFIG_PLUGIN */
 
 #endif /* QEMU_PLUGIN_H */
index 8e16c9277dbc7f52391bd69de6551ddb4bff3ed5..9f0dcdf28f465cb120c9aff027883d4673e5044b 100644 (file)
@@ -7,8 +7,6 @@
 #ifndef QEMU_PROCESSOR_H
 #define QEMU_PROCESSOR_H
 
-#include "qemu/atomic.h"
-
 #if defined(__i386__) || defined(__x86_64__)
 # define cpu_relax() asm volatile("rep; nop" ::: "memory")
 
index 9a23ff071c4c327c099eb5b4ec3e9d6505b298ef..0f2c0a32d20af6f6e925a65a0c87440a7fc1760a 100644 (file)
@@ -27,6 +27,8 @@
 #ifndef QEMU_PROGRESS_METER_H
 #define QEMU_PROGRESS_METER_H
 
+#include "qemu/thread.h"
+
 typedef struct ProgressMeter {
     /**
      * Current progress. The unit is arbitrary as long as the ratio between
@@ -37,22 +39,24 @@ typedef struct ProgressMeter {
 
     /** Estimated current value at the completion of the process */
     uint64_t total;
+
+    QemuMutex lock; /* protects concurrent access to above fields */
 } ProgressMeter;
 
-static inline void progress_work_done(ProgressMeter *pm, uint64_t done)
-{
-    pm->current += done;
-}
-
-static inline void progress_set_remaining(ProgressMeter *pm, uint64_t remaining)
-{
-    pm->total = pm->current + remaining;
-}
-
-static inline void progress_increase_remaining(ProgressMeter *pm,
-                                               uint64_t delta)
-{
-    pm->total += delta;
-}
+void progress_init(ProgressMeter *pm);
+void progress_destroy(ProgressMeter *pm);
+
+/* Get a snapshot of internal current and total values  */
+void progress_get_snapshot(ProgressMeter *pm, uint64_t *current,
+                           uint64_t *total);
+
+/* Increases the amount of work done so far by @done */
+void progress_work_done(ProgressMeter *pm, uint64_t done);
+
+/* Sets how much work has to be done to complete to @remaining */
+void progress_set_remaining(ProgressMeter *pm, uint64_t remaining);
+
+/* Increases the total work to do by @delta */
+void progress_increase_remaining(ProgressMeter *pm, uint64_t delta);
 
 #endif /* QEMU_PROGRESS_METER_H */
index bab8b0d4b3af9572b90c45300a916ba16993304a..4daab6efd291214fab935ee3d44fad95f28152ee 100644 (file)
@@ -7,8 +7,9 @@
  *
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
-#ifndef QEMU_PLUGIN_API_H
-#define QEMU_PLUGIN_API_H
+
+#ifndef QEMU_QEMU_PLUGIN_H
+#define QEMU_QEMU_PLUGIN_H
 
 #include <inttypes.h>
 #include <stdbool.h>
  *   https://gcc.gnu.org/wiki/Visibility
  */
 #if defined _WIN32 || defined __CYGWIN__
-  #ifdef BUILDING_DLL
-    #define QEMU_PLUGIN_EXPORT __declspec(dllexport)
-  #else
+  #ifdef CONFIG_PLUGIN
     #define QEMU_PLUGIN_EXPORT __declspec(dllimport)
+    #define QEMU_PLUGIN_API __declspec(dllexport)
+  #else
+    #define QEMU_PLUGIN_EXPORT __declspec(dllexport)
+    #define QEMU_PLUGIN_API __declspec(dllimport)
   #endif
   #define QEMU_PLUGIN_LOCAL
 #else
-  #if __GNUC__ >= 4
-    #define QEMU_PLUGIN_EXPORT __attribute__((visibility("default")))
-    #define QEMU_PLUGIN_LOCAL  __attribute__((visibility("hidden")))
-  #else
-    #define QEMU_PLUGIN_EXPORT
-    #define QEMU_PLUGIN_LOCAL
-  #endif
+  #define QEMU_PLUGIN_EXPORT __attribute__((visibility("default")))
+  #define QEMU_PLUGIN_LOCAL  __attribute__((visibility("hidden")))
+  #define QEMU_PLUGIN_API
 #endif
 
+/**
+ * typedef qemu_plugin_id_t - Unique plugin ID
+ */
 typedef uint64_t qemu_plugin_id_t;
 
 /*
@@ -52,24 +54,32 @@ typedef uint64_t qemu_plugin_id_t;
 
 extern QEMU_PLUGIN_EXPORT int qemu_plugin_version;
 
-#define QEMU_PLUGIN_VERSION 0
+#define QEMU_PLUGIN_VERSION 1
 
-typedef struct {
-    /* string describing architecture */
+/**
+ * struct qemu_info_t - system information for plugins
+ *
+ * This structure provides for some limited information about the
+ * system to allow the plugin to make decisions on how to proceed. For
+ * example it might only be suitable for running on some guest
+ * architectures or when under full system emulation.
+ */
+typedef struct qemu_info_t {
+    /** @target_name: string describing architecture */
     const char *target_name;
+    /** @version: minimum and current plugin API level */
     struct {
         int min;
         int cur;
     } version;
-    /* is this a full system emulation? */
+    /** @system_emulation: is this a full system emulation? */
     bool system_emulation;
     union {
-        /*
-         * smp_vcpus may change if vCPUs can be hot-plugged, max_vcpus
-         * is the system-wide limit.
-         */
+        /** @system: information relevant to system emulation */
         struct {
+            /** @system.smp_vcpus: initial number of vCPUs */
             int smp_vcpus;
+            /** @system.max_vcpus: maximum possible number of vCPUs */
             int max_vcpus;
         } system;
     };
@@ -82,31 +92,50 @@ typedef struct {
  * @argc: number of arguments
  * @argv: array of arguments (@argc elements)
  *
- * All plugins must export this symbol.
- *
- * Note: Calling qemu_plugin_uninstall() from this function is a bug. To raise
- * an error during install, return !0.
+ * All plugins must export this symbol which is called when the plugin
+ * is first loaded. Calling qemu_plugin_uninstall() from this function
+ * is a bug.
  *
  * Note: @info is only live during the call. Copy any information we
- * want to keep.
+ * want to keep. @argv remains valid throughout the lifetime of the
+ * loaded plugin.
  *
- * Note: @argv remains valid throughout the lifetime of the loaded plugin.
+ * Return: 0 on successful loading, !0 for an error.
  */
 QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
                                            const qemu_info_t *info,
                                            int argc, char **argv);
 
-/*
- * Prototypes for the various callback styles we will be registering
- * in the following functions.
+/**
+ * typedef qemu_plugin_simple_cb_t - simple callback
+ * @id: the unique qemu_plugin_id_t
+ *
+ * This callback passes no information aside from the unique @id.
  */
 typedef void (*qemu_plugin_simple_cb_t)(qemu_plugin_id_t id);
 
+/**
+ * typedef qemu_plugin_udata_cb_t - callback with user data
+ * @id: the unique qemu_plugin_id_t
+ * @userdata: a pointer to some user data supplied when the callback
+ * was registered.
+ */
 typedef void (*qemu_plugin_udata_cb_t)(qemu_plugin_id_t id, void *userdata);
 
+/**
+ * typedef qemu_plugin_vcpu_simple_cb_t - vcpu callback
+ * @id: the unique qemu_plugin_id_t
+ * @vcpu_index: the current vcpu context
+ */
 typedef void (*qemu_plugin_vcpu_simple_cb_t)(qemu_plugin_id_t id,
                                              unsigned int vcpu_index);
 
+/**
+ * typedef qemu_plugin_vcpu_udata_cb_t - vcpu callback
+ * @vcpu_index: the current vcpu context
+ * @userdata: a pointer to some user data supplied when the callback
+ * was registered.
+ */
 typedef void (*qemu_plugin_vcpu_udata_cb_t)(unsigned int vcpu_index,
                                             void *userdata);
 
@@ -121,6 +150,7 @@ typedef void (*qemu_plugin_vcpu_udata_cb_t)(unsigned int vcpu_index,
  *
  * Note: Calling this function from qemu_plugin_install() is a bug.
  */
+QEMU_PLUGIN_API
 void qemu_plugin_uninstall(qemu_plugin_id_t id, qemu_plugin_simple_cb_t cb);
 
 /**
@@ -134,6 +164,7 @@ void qemu_plugin_uninstall(qemu_plugin_id_t id, qemu_plugin_simple_cb_t cb);
  * Plugins are reset asynchronously, and therefore the given plugin receives
  * callbacks until @cb is called.
  */
+QEMU_PLUGIN_API
 void qemu_plugin_reset(qemu_plugin_id_t id, qemu_plugin_simple_cb_t cb);
 
 /**
@@ -145,6 +176,7 @@ void qemu_plugin_reset(qemu_plugin_id_t id, qemu_plugin_simple_cb_t cb);
  *
  * See also: qemu_plugin_register_vcpu_exit_cb()
  */
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_init_cb(qemu_plugin_id_t id,
                                        qemu_plugin_vcpu_simple_cb_t cb);
 
@@ -157,6 +189,7 @@ void qemu_plugin_register_vcpu_init_cb(qemu_plugin_id_t id,
  *
  * See also: qemu_plugin_register_vcpu_init_cb()
  */
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_exit_cb(qemu_plugin_id_t id,
                                        qemu_plugin_vcpu_simple_cb_t cb);
 
@@ -167,6 +200,7 @@ void qemu_plugin_register_vcpu_exit_cb(qemu_plugin_id_t id,
  *
  * The @cb function is called every time a vCPU idles.
  */
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_idle_cb(qemu_plugin_id_t id,
                                        qemu_plugin_vcpu_simple_cb_t cb);
 
@@ -177,20 +211,29 @@ void qemu_plugin_register_vcpu_idle_cb(qemu_plugin_id_t id,
  *
  * The @cb function is called every time a vCPU resumes execution.
  */
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_resume_cb(qemu_plugin_id_t id,
                                          qemu_plugin_vcpu_simple_cb_t cb);
 
-/*
- * Opaque types that the plugin is given during the translation and
- * instrumentation phase.
- */
+/** struct qemu_plugin_tb - Opaque handle for a translation block */
 struct qemu_plugin_tb;
+/** struct qemu_plugin_insn - Opaque handle for a translated instruction */
 struct qemu_plugin_insn;
 
+/**
+ * enum qemu_plugin_cb_flags - type of callback
+ *
+ * @QEMU_PLUGIN_CB_NO_REGS: callback does not access the CPU's regs
+ * @QEMU_PLUGIN_CB_R_REGS: callback reads the CPU's regs
+ * @QEMU_PLUGIN_CB_RW_REGS: callback reads and writes the CPU's regs
+ *
+ * Note: currently unused, plugins cannot read or change system
+ * register state.
+ */
 enum qemu_plugin_cb_flags {
-    QEMU_PLUGIN_CB_NO_REGS, /* callback does not access the CPU's regs */
-    QEMU_PLUGIN_CB_R_REGS,  /* callback reads the CPU's regs */
-    QEMU_PLUGIN_CB_RW_REGS, /* callback reads and writes the CPU's regs */
+    QEMU_PLUGIN_CB_NO_REGS,
+    QEMU_PLUGIN_CB_R_REGS,
+    QEMU_PLUGIN_CB_RW_REGS,
 };
 
 enum qemu_plugin_mem_rw {
@@ -199,6 +242,14 @@ enum qemu_plugin_mem_rw {
     QEMU_PLUGIN_MEM_RW,
 };
 
+/**
+ * typedef qemu_plugin_vcpu_tb_trans_cb_t - translation callback
+ * @id: unique plugin id
+ * @tb: opaque handle used for querying and instrumenting a block.
+ */
+typedef void (*qemu_plugin_vcpu_tb_trans_cb_t)(qemu_plugin_id_t id,
+                                               struct qemu_plugin_tb *tb);
+
 /**
  * qemu_plugin_register_vcpu_tb_trans_cb() - register a translate cb
  * @id: plugin ID
@@ -211,14 +262,12 @@ enum qemu_plugin_mem_rw {
  * callbacks to be triggered when the block or individual instruction
  * executes.
  */
-typedef void (*qemu_plugin_vcpu_tb_trans_cb_t)(qemu_plugin_id_t id,
-                                               struct qemu_plugin_tb *tb);
-
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id,
                                            qemu_plugin_vcpu_tb_trans_cb_t cb);
 
 /**
- * qemu_plugin_register_vcpu_tb_trans_exec_cb() - register execution callback
+ * qemu_plugin_register_vcpu_tb_exec_cb() - register execution callback
  * @tb: the opaque qemu_plugin_tb handle for the translation
  * @cb: callback function
  * @flags: does the plugin read or write the CPU's registers?
@@ -226,17 +275,26 @@ void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id,
  *
  * The @cb function is called every time a translated unit executes.
  */
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_tb_exec_cb(struct qemu_plugin_tb *tb,
                                           qemu_plugin_vcpu_udata_cb_t cb,
                                           enum qemu_plugin_cb_flags flags,
                                           void *userdata);
 
+/**
+ * enum qemu_plugin_op - describes an inline op
+ *
+ * @QEMU_PLUGIN_INLINE_ADD_U64: add an immediate value uint64_t
+ *
+ * Note: currently only a single inline op is supported.
+ */
+
 enum qemu_plugin_op {
     QEMU_PLUGIN_INLINE_ADD_U64,
 };
 
 /**
- * qemu_plugin_register_vcpu_tb_trans_exec_inline() - execution inline op
+ * qemu_plugin_register_vcpu_tb_exec_inline() - execution inline op
  * @tb: the opaque qemu_plugin_tb handle for the translation
  * @op: the type of qemu_plugin_op (e.g. ADD_U64)
  * @ptr: the target memory location for the op
@@ -245,7 +303,11 @@ enum qemu_plugin_op {
  * Insert an inline op to every time a translated unit executes.
  * Useful if you just want to increment a single counter somewhere in
  * memory.
+ *
+ * Note: ops are not atomic so in multi-threaded/multi-smp situations
+ * you will get inexact results.
  */
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb,
                                               enum qemu_plugin_op op,
                                               void *ptr, uint64_t imm);
@@ -259,6 +321,7 @@ void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb,
  *
  * The @cb function is called every time an instruction is executed
  */
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn,
                                             qemu_plugin_vcpu_udata_cb_t cb,
                                             enum qemu_plugin_cb_flags flags,
@@ -267,7 +330,6 @@ void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn,
 /**
  * qemu_plugin_register_vcpu_insn_exec_inline() - insn execution inline op
  * @insn: the opaque qemu_plugin_insn handle for an instruction
- * @cb: callback function
  * @op: the type of qemu_plugin_op (e.g. ADD_U64)
  * @ptr: the target memory location for the op
  * @imm: the op data (e.g. 1)
@@ -275,45 +337,130 @@ void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn,
  * Insert an inline op to every time an instruction executes. Useful
  * if you just want to increment a single counter somewhere in memory.
  */
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn,
                                                 enum qemu_plugin_op op,
                                                 void *ptr, uint64_t imm);
 
-/*
- * Helpers to query information about the instructions in a block
+/**
+ * qemu_plugin_tb_n_insns() - query helper for number of insns in TB
+ * @tb: opaque handle to TB passed to callback
+ *
+ * Returns: number of instructions in this block
  */
+QEMU_PLUGIN_API
 size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb);
 
+/**
+ * qemu_plugin_tb_vaddr() - query helper for vaddr of TB start
+ * @tb: opaque handle to TB passed to callback
+ *
+ * Returns: virtual address of block start
+ */
+QEMU_PLUGIN_API
 uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb);
 
+/**
+ * qemu_plugin_tb_get_insn() - retrieve handle for instruction
+ * @tb: opaque handle to TB passed to callback
+ * @idx: instruction number, 0 indexed
+ *
+ * The returned handle can be used in follow up helper queries as well
+ * as when instrumenting an instruction. It is only valid for the
+ * lifetime of the callback.
+ *
+ * Returns: opaque handle to instruction
+ */
+QEMU_PLUGIN_API
 struct qemu_plugin_insn *
 qemu_plugin_tb_get_insn(const struct qemu_plugin_tb *tb, size_t idx);
 
+/**
+ * qemu_plugin_insn_data() - return ptr to instruction data
+ * @insn: opaque instruction handle from qemu_plugin_tb_get_insn()
+ *
+ * Note: data is only valid for duration of callback. See
+ * qemu_plugin_insn_size() to calculate size of stream.
+ *
+ * Returns: pointer to a stream of bytes containing the value of this
+ * instructions opcode.
+ */
+QEMU_PLUGIN_API
 const void *qemu_plugin_insn_data(const struct qemu_plugin_insn *insn);
 
+/**
+ * qemu_plugin_insn_size() - return size of instruction
+ * @insn: opaque instruction handle from qemu_plugin_tb_get_insn()
+ *
+ * Returns: size of instruction in bytes
+ */
+QEMU_PLUGIN_API
 size_t qemu_plugin_insn_size(const struct qemu_plugin_insn *insn);
 
+/**
+ * qemu_plugin_insn_vaddr() - return vaddr of instruction
+ * @insn: opaque instruction handle from qemu_plugin_tb_get_insn()
+ *
+ * Returns: virtual address of instruction
+ */
+QEMU_PLUGIN_API
 uint64_t qemu_plugin_insn_vaddr(const struct qemu_plugin_insn *insn);
+
+/**
+ * qemu_plugin_insn_haddr() - return hardware addr of instruction
+ * @insn: opaque instruction handle from qemu_plugin_tb_get_insn()
+ *
+ * Returns: hardware (physical) target address of instruction
+ */
+QEMU_PLUGIN_API
 void *qemu_plugin_insn_haddr(const struct qemu_plugin_insn *insn);
 
-/*
- * Memory Instrumentation
+/**
+ * typedef qemu_plugin_meminfo_t - opaque memory transaction handle
  *
- * The anonymous qemu_plugin_meminfo_t and qemu_plugin_hwaddr types
- * can be used in queries to QEMU to get more information about a
- * given memory access.
+ * This can be further queried using the qemu_plugin_mem_* query
+ * functions.
  */
 typedef uint32_t qemu_plugin_meminfo_t;
+/** struct qemu_plugin_hwaddr - opaque hw address handle */
 struct qemu_plugin_hwaddr;
 
-/* meminfo queries */
+/**
+ * qemu_plugin_mem_size_shift() - get size of access
+ * @info: opaque memory transaction handle
+ *
+ * Returns: size of access in ^2 (0=byte, 1=16bit, 2=32bit etc...)
+ */
+QEMU_PLUGIN_API
 unsigned int qemu_plugin_mem_size_shift(qemu_plugin_meminfo_t info);
+/**
+ * qemu_plugin_mem_is_sign_extended() - was the access sign extended
+ * @info: opaque memory transaction handle
+ *
+ * Returns: true if it was, otherwise false
+ */
+QEMU_PLUGIN_API
 bool qemu_plugin_mem_is_sign_extended(qemu_plugin_meminfo_t info);
+/**
+ * qemu_plugin_mem_is_big_endian() - was the access big endian
+ * @info: opaque memory transaction handle
+ *
+ * Returns: true if it was, otherwise false
+ */
+QEMU_PLUGIN_API
 bool qemu_plugin_mem_is_big_endian(qemu_plugin_meminfo_t info);
+/**
+ * qemu_plugin_mem_is_store() - was the access a store
+ * @info: opaque memory transaction handle
+ *
+ * Returns: true if it was, otherwise false
+ */
+QEMU_PLUGIN_API
 bool qemu_plugin_mem_is_store(qemu_plugin_meminfo_t info);
 
-/*
- * qemu_plugin_get_hwaddr():
+/**
+ * qemu_plugin_get_hwaddr() - return handle for memory operation
+ * @info: opaque memory info structure
  * @vaddr: the virtual address of the memory operation
  *
  * For system emulation returns a qemu_plugin_hwaddr handle to query
@@ -324,28 +471,97 @@ bool qemu_plugin_mem_is_store(qemu_plugin_meminfo_t info);
  * information about the handle should be recovered before the
  * callback returns.
  */
+QEMU_PLUGIN_API
 struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
                                                   uint64_t vaddr);
 
 /*
- * The following additional queries can be run on the hwaddr structure
- * to return information about it. For non-IO accesses the device
- * offset will be into the appropriate block of RAM.
+ * The following additional queries can be run on the hwaddr structure to
+ * return information about it - namely whether it is for an IO access and the
+ * physical address associated with the access.
+ */
+
+/**
+ * qemu_plugin_hwaddr_is_io() - query whether memory operation is IO
+ * @haddr: address handle from qemu_plugin_get_hwaddr()
+ *
+ * Returns true if the handle's memory operation is to memory-mapped IO, or
+ * false if it is to RAM
  */
+QEMU_PLUGIN_API
 bool qemu_plugin_hwaddr_is_io(const struct qemu_plugin_hwaddr *haddr);
-uint64_t qemu_plugin_hwaddr_device_offset(const struct qemu_plugin_hwaddr *haddr);
 
-typedef void
-(*qemu_plugin_vcpu_mem_cb_t)(unsigned int vcpu_index,
-                             qemu_plugin_meminfo_t info, uint64_t vaddr,
-                             void *userdata);
+/**
+ * qemu_plugin_hwaddr_phys_addr() - query physical address for memory operation
+ * @haddr: address handle from qemu_plugin_get_hwaddr()
+ *
+ * Returns the physical address associated with the memory operation
+ *
+ * Note that the returned physical address may not be unique if you are dealing
+ * with multiple address spaces.
+ */
+QEMU_PLUGIN_API
+uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr);
+
+/*
+ * Returns a string representing the device. The string is valid for
+ * the lifetime of the plugin.
+ */
+QEMU_PLUGIN_API
+const char *qemu_plugin_hwaddr_device_name(const struct qemu_plugin_hwaddr *h);
 
+/**
+ * typedef qemu_plugin_vcpu_mem_cb_t - memory callback function type
+ * @vcpu_index: the executing vCPU
+ * @info: an opaque handle for further queries about the memory
+ * @vaddr: the virtual address of the transaction
+ * @userdata: any user data attached to the callback
+ */
+typedef void (*qemu_plugin_vcpu_mem_cb_t) (unsigned int vcpu_index,
+                                           qemu_plugin_meminfo_t info,
+                                           uint64_t vaddr,
+                                           void *userdata);
+
+/**
+ * qemu_plugin_register_vcpu_mem_cb() - register memory access callback
+ * @insn: handle for instruction to instrument
+ * @cb: callback of type qemu_plugin_vcpu_mem_cb_t
+ * @flags: (currently unused) callback flags
+ * @rw: monitor reads, writes or both
+ * @userdata: opaque pointer for userdata
+ *
+ * This registers a full callback for every memory access generated by
+ * an instruction. If the instruction doesn't access memory no
+ * callback will be made.
+ *
+ * The callback reports the vCPU the access took place on, the virtual
+ * address of the access and a handle for further queries. The user
+ * can attach some userdata to the callback for additional purposes.
+ *
+ * Other execution threads will continue to execute during the
+ * callback so the plugin is responsible for ensuring it doesn't get
+ * confused by making appropriate use of locking if required.
+ */
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn,
                                       qemu_plugin_vcpu_mem_cb_t cb,
                                       enum qemu_plugin_cb_flags flags,
                                       enum qemu_plugin_mem_rw rw,
                                       void *userdata);
 
+/**
+ * qemu_plugin_register_vcpu_mem_inline() - register an inline op to any memory access
+ * @insn: handle for instruction to instrument
+ * @rw: apply to reads, writes or both
+ * @op: the op, of type qemu_plugin_op
+ * @ptr: pointer memory for the op
+ * @imm: immediate data for @op
+ *
+ * This registers a inline op every memory access generated by the
+ * instruction. This provides for a lightweight but not thread-safe
+ * way of counting the number of operations done.
+ */
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn,
                                           enum qemu_plugin_mem_rw rw,
                                           enum qemu_plugin_op op, void *ptr,
@@ -359,6 +575,7 @@ typedef void
                                  uint64_t a3, uint64_t a4, uint64_t a5,
                                  uint64_t a6, uint64_t a7, uint64_t a8);
 
+QEMU_PLUGIN_API
 void qemu_plugin_register_vcpu_syscall_cb(qemu_plugin_id_t id,
                                           qemu_plugin_vcpu_syscall_cb_t cb);
 
@@ -366,6 +583,7 @@ typedef void
 (*qemu_plugin_vcpu_syscall_ret_cb_t)(qemu_plugin_id_t id, unsigned int vcpu_idx,
                                      int64_t num, int64_t ret);
 
+QEMU_PLUGIN_API
 void
 qemu_plugin_register_vcpu_syscall_ret_cb(qemu_plugin_id_t id,
                                          qemu_plugin_vcpu_syscall_ret_cb_t cb);
@@ -378,8 +596,19 @@ qemu_plugin_register_vcpu_syscall_ret_cb(qemu_plugin_id_t id,
  * Returns an allocated string containing the disassembly
  */
 
+QEMU_PLUGIN_API
 char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn);
 
+/**
+ * qemu_plugin_insn_symbol() - best effort symbol lookup
+ * @insn: instruction reference
+ *
+ * Return a static string referring to the symbol. This is dependent
+ * on the binary QEMU is running having provided a symbol table.
+ */
+QEMU_PLUGIN_API
+const char *qemu_plugin_insn_symbol(const struct qemu_plugin_insn *insn);
+
 /**
  * qemu_plugin_vcpu_for_each() - iterate over the existing vCPU
  * @id: plugin ID
@@ -389,12 +618,28 @@ char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn);
  *
  * See also: qemu_plugin_register_vcpu_init_cb()
  */
+QEMU_PLUGIN_API
 void qemu_plugin_vcpu_for_each(qemu_plugin_id_t id,
                                qemu_plugin_vcpu_simple_cb_t cb);
 
+QEMU_PLUGIN_API
 void qemu_plugin_register_flush_cb(qemu_plugin_id_t id,
                                    qemu_plugin_simple_cb_t cb);
 
+/**
+ * qemu_plugin_register_atexit_cb() - register exit callback
+ * @id: plugin ID
+ * @cb: callback
+ * @userdata: user data for callback
+ *
+ * The @cb function is called once execution has finished. Plugins
+ * should be able to free all their resources at this point much like
+ * after a reset/uninstall callback is called.
+ *
+ * In user-mode it is possible a few un-instrumented instructions from
+ * child threads may run before the host kernel reaps the threads.
+ */
+QEMU_PLUGIN_API
 void qemu_plugin_register_atexit_cb(qemu_plugin_id_t id,
                                     qemu_plugin_udata_cb_t cb, void *userdata);
 
@@ -408,6 +653,59 @@ int qemu_plugin_n_max_vcpus(void);
  * qemu_plugin_outs() - output string via QEMU's logging system
  * @string: a string
  */
+QEMU_PLUGIN_API
 void qemu_plugin_outs(const char *string);
 
-#endif /* QEMU_PLUGIN_API_H */
+/**
+ * qemu_plugin_bool_parse() - parses a boolean argument in the form of
+ * "<argname>=[on|yes|true|off|no|false]"
+ *
+ * @name: argument name, the part before the equals sign
+ * @val: argument value, what's after the equals sign
+ * @ret: output return value
+ *
+ * returns true if the combination @name=@val parses correctly to a boolean
+ * argument, and false otherwise
+ */
+QEMU_PLUGIN_API
+bool qemu_plugin_bool_parse(const char *name, const char *val, bool *ret);
+
+/**
+ * qemu_plugin_path_to_binary() - path to binary file being executed
+ *
+ * Return a string representing the path to the binary. For user-mode
+ * this is the main executable. For system emulation we currently
+ * return NULL. The user should g_free() the string once no longer
+ * needed.
+ */
+QEMU_PLUGIN_API
+const char *qemu_plugin_path_to_binary(void);
+
+/**
+ * qemu_plugin_start_code() - returns start of text segment
+ *
+ * Returns the nominal start address of the main text segment in
+ * user-mode. Currently returns 0 for system emulation.
+ */
+QEMU_PLUGIN_API
+uint64_t qemu_plugin_start_code(void);
+
+/**
+ * qemu_plugin_end_code() - returns end of text segment
+ *
+ * Returns the nominal end address of the main text segment in
+ * user-mode. Currently returns 0 for system emulation.
+ */
+QEMU_PLUGIN_API
+uint64_t qemu_plugin_end_code(void);
+
+/**
+ * qemu_plugin_entry_code() - returns start address for module
+ *
+ * Returns the nominal entry address of the main text segment in
+ * user-mode. Currently returns 0 for system emulation.
+ */
+QEMU_PLUGIN_API
+uint64_t qemu_plugin_entry_code(void);
+
+#endif /* QEMU_QEMU_PLUGIN_H */
index 40b596262fae5559a4d6bbd9da6c693b80a0cf6c..1b70920648b83e8beb49f0faff1cf8add3bc4e63 100644 (file)
 #ifndef QEMU_PRINT_H
 #define QEMU_PRINT_H
 
-int qemu_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
-int qemu_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+int qemu_vprintf(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 0);
+int qemu_printf(const char *fmt, ...) G_GNUC_PRINTF(1, 2);
 
 int qemu_vfprintf(FILE *stream, const char *fmt, va_list ap)
-    GCC_FMT_ATTR(2, 0);
-int qemu_fprintf(FILE *stream, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+    G_GNUC_PRINTF(2, 0);
+int qemu_fprintf(FILE *stream, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
 
 #endif
diff --git a/include/qemu/qemu-progress.h b/include/qemu/qemu-progress.h
new file mode 100644 (file)
index 0000000..137e1c3
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef QEMU_PROGRESS_H
+#define QEMU_PROGRESS_H
+
+void qemu_progress_init(int enabled, float min_skip);
+void qemu_progress_end(void);
+void qemu_progress_print(float delta, int max);
+
+#endif /* QEMU_PROGRESS_H */
index 6484837487b012cbfc63b6c6e39e9c8196db7856..758c7ac6c89978ee762e9d94658471cc22b6aabc 100644 (file)
@@ -104,7 +104,7 @@ bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing);
  * Returns the corresponding pointer when a match is found.
  * Returns NULL otherwise.
  */
-void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash,
+void *qht_lookup_custom(const struct qht *ht, const void *userp, uint32_t hash,
                         qht_lookup_func_t func);
 
 /**
@@ -115,7 +115,7 @@ void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash,
  *
  * Calls qht_lookup_custom() using @ht's default comparison function.
  */
-void *qht_lookup(struct qht *ht, const void *userp, uint32_t hash);
+void *qht_lookup(const struct qht *ht, const void *userp, uint32_t hash);
 
 /**
  * qht_remove - remove a pointer from the hash table
@@ -211,7 +211,7 @@ void qht_iter_remove(struct qht *ht, qht_iter_bool_func_t func, void *userp);
  * When done with @stats, pass the struct to qht_statistics_destroy().
  * Failing to do this will leak memory.
  */
-void qht_statistics_init(struct qht *ht, struct qht_stats *stats);
+void qht_statistics_init(const struct qht *ht, struct qht_stats *stats);
 
 /**
  * qht_statistics_destroy - Destroy a &struct qht_stats
index d65f35350dfda2b27aeb6c08c3cb5d6de7d1b53f..69fe74b50d079c035e7f465646f3ef1dd546d6a9 100644 (file)
@@ -199,4 +199,3 @@ static inline gint q_tree_nnodes(QTree *tree)
 #endif /* HAVE_GLIB_WITH_SLICE_ALLOCATOR */
 
 #endif /* QEMU_QTREE_H */
-
index f62b363e0d1240b476a66383b46caf6497cacaeb..205e1da76dc5b29327f590b8293a826ced63c25d 100644 (file)
@@ -114,8 +114,8 @@ static inline uint64_t range_upb(Range *range)
  * @size may be 0. If the range would overflow, returns -ERANGE, otherwise
  * 0.
  */
-static inline int QEMU_WARN_UNUSED_RESULT range_init(Range *range, uint64_t lob,
-                                                     uint64_t size)
+G_GNUC_WARN_UNUSED_RESULT
+static inline int range_init(Range *range, uint64_t lob, uint64_t size)
 {
     if (lob + size < lob) {
         return -ERANGE;
@@ -217,6 +217,20 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1,
     return !(last2 < first1 || last1 < first2);
 }
 
+/*
+ * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap.
+ * Both @a and @b must not be empty.
+ */
+int range_compare(Range *a, Range *b);
+
 GList *range_list_insert(GList *list, Range *data);
 
+/*
+ * Inverse an array of sorted ranges over the [low, high] span, ie.
+ * original ranges becomes holes in the newly allocated inv_ranges
+ */
+void range_inverse_array(GList *in_ranges,
+                         GList **out_ranges,
+                         uint64_t low, uint64_t high);
+
 #endif
index 01da8d63f149264144465e935c672d91c0849e13..48bf59e8572ff90024e23a338657a5787e3d78f6 100644 (file)
 #ifndef QEMU_RATELIMIT_H
 #define QEMU_RATELIMIT_H
 
+#include "qemu/lockable.h"
 #include "qemu/timer.h"
 
 typedef struct {
+    QemuMutex lock;
     int64_t slice_start_time;
     int64_t slice_end_time;
     uint64_t slice_quota;
@@ -40,7 +42,12 @@ static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
     int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     double delay_slices;
 
-    assert(limit->slice_quota && limit->slice_ns);
+    QEMU_LOCK_GUARD(&limit->lock);
+    if (!limit->slice_quota) {
+        /* Throttling disabled.  */
+        return 0;
+    }
+    assert(limit->slice_ns);
 
     if (limit->slice_end_time < now) {
         /* Previous, possibly extended, time slice finished; reset the
@@ -65,11 +72,26 @@ static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
     return limit->slice_end_time - now;
 }
 
+static inline void ratelimit_init(RateLimit *limit)
+{
+    qemu_mutex_init(&limit->lock);
+}
+
+static inline void ratelimit_destroy(RateLimit *limit)
+{
+    qemu_mutex_destroy(&limit->lock);
+}
+
 static inline void ratelimit_set_speed(RateLimit *limit, uint64_t speed,
                                        uint64_t slice_ns)
 {
+    QEMU_LOCK_GUARD(&limit->lock);
     limit->slice_ns = slice_ns;
-    limit->slice_quota = MAX(((double)speed * slice_ns) / 1000000000ULL, 1);
+    if (speed == 0) {
+        limit->slice_quota = 0;
+    } else {
+        limit->slice_quota = MAX(((double)speed * slice_ns) / 1000000000ULL, 1);
+    }
 }
 
 #endif
index 515d327cf11cfdd69b7bf636e44f0a43bcc762f0..fea058aa9f8a0e1430487d56874907773f76a939 100644 (file)
 #include "qemu/thread.h"
 #include "qemu/queue.h"
 #include "qemu/atomic.h"
+#include "qemu/notify.h"
 #include "qemu/sys_membarrier.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "qemu/coroutine-tls.h"
 
 /*
  * Important !
@@ -66,13 +64,20 @@ struct rcu_reader_data {
 
     /* Data used for registry, protected by rcu_registry_lock */
     QLIST_ENTRY(rcu_reader_data) node;
+
+    /*
+     * NotifierList used to force an RCU grace period.  Accessed under
+     * rcu_registry_lock.  Note that the notifier is called _outside_
+     * the thread!
+     */
+    NotifierList force_rcu;
 };
 
-extern __thread struct rcu_reader_data rcu_reader;
+QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader)
 
 static inline void rcu_read_lock(void)
 {
-    struct rcu_reader_data *p_rcu_reader = &rcu_reader;
+    struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
     unsigned ctr;
 
     if (p_rcu_reader->depth++ > 0) {
@@ -82,13 +87,16 @@ static inline void rcu_read_lock(void)
     ctr = qatomic_read(&rcu_gp_ctr);
     qatomic_set(&p_rcu_reader->ctr, ctr);
 
-    /* Write p_rcu_reader->ctr before reading RCU-protected pointers.  */
+    /*
+     * Read rcu_gp_ptr and write p_rcu_reader->ctr before reading
+     * RCU-protected pointers.
+     */
     smp_mb_placeholder();
 }
 
 static inline void rcu_read_unlock(void)
 {
-    struct rcu_reader_data *p_rcu_reader = &rcu_reader;
+    struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
 
     assert(p_rcu_reader->depth != 0);
     if (--p_rcu_reader->depth > 0) {
@@ -110,19 +118,19 @@ static inline void rcu_read_unlock(void)
     }
 }
 
-extern void synchronize_rcu(void);
+void synchronize_rcu(void);
 
 /*
  * Reader thread registration.
  */
-extern void rcu_register_thread(void);
-extern void rcu_unregister_thread(void);
+void rcu_register_thread(void);
+void rcu_unregister_thread(void);
 
 /*
  * Support for fork().  fork() support is enabled at startup.
  */
-extern void rcu_enable_atfork(void);
-extern void rcu_disable_atfork(void);
+void rcu_enable_atfork(void);
+void rcu_disable_atfork(void);
 
 struct rcu_head;
 typedef void RCUCBFunc(struct rcu_head *head);
@@ -132,8 +140,8 @@ struct rcu_head {
     RCUCBFunc *func;
 };
 
-extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
-extern void drain_call_rcu(void);
+void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
+void drain_call_rcu(void);
 
 /* The operands of the minus operator must have the same type,
  * which must be the one that we specify in the cast.
@@ -180,8 +188,11 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock)
 #define RCU_READ_LOCK_GUARD() \
     g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock()
 
-#ifdef __cplusplus
-}
-#endif
+/*
+ * Force-RCU notifiers tell readers that they should exit their
+ * read-side critical section.
+ */
+void rcu_add_force_rcu_notifier(Notifier *n);
+void rcu_remove_force_rcu_notifier(Notifier *n);
 
 #endif /* QEMU_RCU_H */
index 0e53ddd5305ec6345eb71e9d7e67f9b6b0b8a24f..4e6298d47307fd8dbcc1d13f3297ea8ae5bcdbcd 100644 (file)
 #include "qemu/queue.h"
 #include "qemu/atomic.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
 /*
  * List access methods.
  */
@@ -311,7 +306,4 @@ extern "C" {
          (var) && ((next) = qatomic_rcu_read(&(var)->field.sle_next), 1); \
          (var) = (next))
 
-#ifdef __cplusplus
-}
-#endif
 #endif /* QEMU_RCU_QUEUE_H */
index e81258322be699fc3e93d7effba8d2c562d1ac2b..b05e4782da964203b49a7caa9ac518dea2fdfbc4 100644 (file)
@@ -5,7 +5,7 @@
 #define READLINE_MAX_CMDS 64
 #define READLINE_MAX_COMPLETIONS 256
 
-typedef void GCC_FMT_ATTR(2, 3) ReadLinePrintfFunc(void *opaque,
+typedef void G_GNUC_PRINTF(2, 3) ReadLinePrintfFunc(void *opaque,
                                                    const char *fmt, ...);
 typedef void ReadLineFlushFunc(void *opaque);
 typedef void ReadLineFunc(void *opaque, const char *str,
@@ -44,6 +44,8 @@ typedef struct ReadLineState {
 } ReadLineState;
 
 void readline_add_completion(ReadLineState *rs, const char *str);
+void readline_add_completion_of(ReadLineState *rs,
+                                const char *pfx, const char *str);
 void readline_set_completion_index(ReadLineState *rs, int completion_index);
 
 const char *readline_get_history(ReadLineState *rs, unsigned int index);
diff --git a/include/qemu/reserved-region.h b/include/qemu/reserved-region.h
new file mode 100644 (file)
index 0000000..8e6f0a9
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * QEMU ReservedRegion helpers
+ *
+ * Copyright (c) 2023 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_RESERVED_REGION_H
+#define QEMU_RESERVED_REGION_H
+
+#include "exec/memory.h"
+
+/*
+ * Insert a new region into a sorted list of reserved regions. In case
+ * there is overlap with existing regions, the new added region has
+ * higher priority and replaces the overlapped segment.
+ */
+GList *resv_region_list_insert(GList *list, ReservedRegion *reg);
+
+#endif
index 8382c4c779dc91851d029b40b4e180cc399969e1..1690a74f4bfeb2acfb1e85752ec8141163c91d6c 100644 (file)
@@ -6,12 +6,13 @@
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
-#ifndef _SELFMAP_H_
-#define _SELFMAP_H_
+#ifndef SELFMAP_H
+#define SELFMAP_H
+
+#include "qemu/interval-tree.h"
 
 typedef struct {
-    unsigned long start;
-    unsigned long end;
+    IntervalTreeNode itree;
 
     /* flags */
     bool is_read;
@@ -19,26 +20,25 @@ typedef struct {
     bool is_exec;
     bool is_priv;
 
-    unsigned long offset;
-    gchar *dev;
-    uint64_t inode;
-    gchar *path;
+    dev_t dev;
+    ino_t inode;
+    uint64_t offset;
+    const char *path;
 } MapInfo;
 
-
 /**
  * read_self_maps:
  *
- * Read /proc/self/maps and return a list of MapInfo structures.
+ * Read /proc/self/maps and return a tree of MapInfo structures.
  */
-GSList *read_self_maps(void);
+IntervalTreeRoot *read_self_maps(void);
 
 /**
  * free_self_maps:
- * @info: a GSlist
+ * @info: an interval tree
  *
- * Free a list of MapInfo structures.
+ * Free a tree of MapInfo structures.
  */
-void free_self_maps(GSList *info);
+void free_self_maps(IntervalTreeRoot *root);
 
-#endif /* _SELFMAP_H_ */
+#endif /* SELFMAP_H */
index 7d1f8135767dafda528669d108e3b89ea08a9236..d935fd80da8c31126fe38da85e5725279af96717 100644 (file)
@@ -14,12 +14,41 @@ int inet_aton(const char *cp, struct in_addr *ia);
 /* misc helpers */
 bool fd_is_socket(int fd);
 int qemu_socket(int domain, int type, int protocol);
+
+/**
+ * qemu_socketpair:
+ * @domain: specifies a communication domain, such as PF_UNIX
+ * @type: specifies the socket type.
+ * @protocol: specifies a particular protocol to be used with the  socket
+ * @sv: an array to store the pair of socket created
+ *
+ * Creates an unnamed pair of connected sockets in the specified domain,
+ * of the specified type, and using the optionally specified protocol.
+ * And automatically set the close-on-exec flags on the returned sockets
+ *
+ * Return 0 on success.
+ */
+int qemu_socketpair(int domain, int type, int protocol, int sv[2]);
+
 int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
+/*
+ * A variant of send(2) which handles partial send.
+ *
+ * Return the number of bytes transferred over the socket.
+ * Set errno if fewer than `count' bytes are sent.
+ *
+ * This function don't work with non-blocking socket's.
+ * Any of the possibilities with non-blocking socket's is bad:
+ *   - return a short write (then name is wrong)
+ *   - busy wait adding (errno == EAGAIN) to the loop
+ */
+ssize_t qemu_send_full(int s, const void *buf, size_t count)
+    G_GNUC_WARN_UNUSED_RESULT;
 int socket_set_cork(int fd, int v);
 int socket_set_nodelay(int fd);
-void qemu_set_block(int fd);
-int qemu_try_set_nonblock(int fd);
-void qemu_set_nonblock(int fd);
+void qemu_socket_set_block(int fd);
+int qemu_socket_try_set_nonblock(int fd);
+void qemu_socket_set_nonblock(int fd);
 int socket_set_fast_reuse(int fd);
 
 #ifdef WIN32
@@ -40,6 +69,7 @@ NetworkAddressFamily inet_netfamily(int family);
 int unix_listen(const char *path, Error **errp);
 int unix_connect(const char *path, Error **errp);
 
+char *socket_uri(SocketAddress *addr);
 SocketAddress *socket_parse(const char *str, Error **errp);
 int socket_connect(SocketAddress *addr, Error **errp);
 int socket_listen(SocketAddress *addr, int num, Error **errp);
@@ -47,6 +77,8 @@ void socket_listen_cleanup(int fd, Error **errp);
 int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp);
 
 /* Old, ipv4 only bits.  Don't use for new code. */
+int convert_host_port(struct sockaddr_in *saddr, const char *host,
+                      const char *port, Error **errp);
 int parse_host_port(struct sockaddr_in *saddr, const char *str,
                     Error **errp);
 int socket_init(void);
@@ -111,4 +143,14 @@ SocketAddress *socket_remote_address(int fd, Error **errp);
  */
 SocketAddress *socket_address_flatten(SocketAddressLegacy *addr);
 
+/**
+ * socket_address_parse_named_fd:
+ *
+ * Modify @addr, replacing a named fd by its corresponding number.
+ * Needed for callers that plan to pass @addr to a context where the
+ * current monitor is not available.
+ *
+ * Return 0 on success.
+ */
+int socket_address_parse_named_fd(SocketAddress *addr, Error **errp);
 #endif /* QEMU_SOCKETS_H */
index fdd3d1b8f98fc0cdc1e189bcea7660eec32246f8..99b5cb724a8952931b14bf12ec1b52ee8ba2fa04 100644 (file)
@@ -21,7 +21,7 @@
 
 typedef struct Stat64 {
 #ifdef CONFIG_ATOMIC64
-    uint64_t value;
+    aligned_uint64_t value;
 #else
     uint32_t low, high;
     uint32_t lock;
@@ -40,6 +40,11 @@ static inline uint64_t stat64_get(const Stat64 *s)
     return qatomic_read__nocheck(&s->value);
 }
 
+static inline void stat64_set(Stat64 *s, uint64_t value)
+{
+    qatomic_set__nocheck(&s->value, value);
+}
+
 static inline void stat64_add(Stat64 *s, uint64_t value)
 {
     qatomic_add(&s->value, value);
@@ -62,6 +67,7 @@ static inline void stat64_max(Stat64 *s, uint64_t value)
 }
 #else
 uint64_t stat64_get(const Stat64 *s);
+void stat64_set(Stat64 *s, uint64_t value);
 bool stat64_min_slow(Stat64 *s, uint64_t value);
 bool stat64_max_slow(Stat64 *s, uint64_t value);
 bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high);
index b5bfa21d521cec579eb93c8566be30fbbd18f186..e7774891f8d65416a2f600b7aa4ed96b0a371502 100644 (file)
@@ -14,8 +14,8 @@
  * side.  The slow side forces processor-level ordering on all other cores
  * through a system call.
  */
-extern void smp_mb_global_init(void);
-extern void smp_mb_global(void);
+void smp_mb_global_init(void);
+void smp_mb_global(void);
 #define smp_mb_placeholder()       barrier()
 #else
 /* Keep it simple, execute a real memory barrier on both sides.  */
diff --git a/include/qemu/thread-context.h b/include/qemu/thread-context.h
new file mode 100644 (file)
index 0000000..2ebd6b7
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * QEMU Thread Context
+ *
+ * Copyright Red Hat Inc., 2022
+ *
+ * Authors:
+ *  David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef SYSEMU_THREAD_CONTEXT_H
+#define SYSEMU_THREAD_CONTEXT_H
+
+#include "qapi/qapi-types-machine.h"
+#include "qemu/thread.h"
+#include "qom/object.h"
+
+#define TYPE_THREAD_CONTEXT "thread-context"
+OBJECT_DECLARE_TYPE(ThreadContext, ThreadContextClass,
+                    THREAD_CONTEXT)
+
+struct ThreadContextClass {
+    ObjectClass parent_class;
+};
+
+struct ThreadContext {
+    /* private */
+    Object parent;
+
+    /* private */
+    unsigned int thread_id;
+    QemuThread thread;
+
+    /* Semaphore to wait for context thread action. */
+    QemuSemaphore sem;
+    /* Semaphore to wait for action in context thread. */
+    QemuSemaphore sem_thread;
+    /* Mutex to synchronize requests. */
+    QemuMutex mutex;
+
+    /* Commands for the thread to execute. */
+    int thread_cmd;
+    void *thread_cmd_data;
+
+    /* CPU affinity bitmap used for initialization. */
+    unsigned long *init_cpu_bitmap;
+    int init_cpu_nbits;
+};
+
+void thread_context_create_thread(ThreadContext *tc, QemuThread *thread,
+                                  const char *name,
+                                  void *(*start_routine)(void *), void *arg,
+                                  int mode);
+
+#endif /* SYSEMU_THREAD_CONTEXT_H */
index c903525062e298a2e3b42d78538a1d52f3f4ba1a..5f2f3d1386bcec6b4cc3bcc799509011155c3538 100644 (file)
@@ -4,12 +4,6 @@
 #include <pthread.h>
 #include <semaphore.h>
 
-typedef QemuMutex QemuRecMutex;
-#define qemu_rec_mutex_destroy qemu_mutex_destroy
-#define qemu_rec_mutex_lock_impl    qemu_mutex_lock_impl
-#define qemu_rec_mutex_trylock_impl qemu_mutex_trylock_impl
-#define qemu_rec_mutex_unlock qemu_mutex_unlock
-
 struct QemuMutex {
     pthread_mutex_t lock;
 #ifdef CONFIG_DEBUG_MUTEX
@@ -19,20 +13,23 @@ struct QemuMutex {
     bool initialized;
 };
 
+/*
+ * QemuRecMutex cannot be a typedef of QemuMutex lest we have two
+ * compatible cases in _Generic.  See qemu/lockable.h.
+ */
+typedef struct QemuRecMutex {
+    QemuMutex m;
+} QemuRecMutex;
+
 struct QemuCond {
     pthread_cond_t cond;
     bool initialized;
 };
 
 struct QemuSemaphore {
-#ifndef CONFIG_SEM_TIMEDWAIT
-    pthread_mutex_t lock;
-    pthread_cond_t cond;
+    QemuMutex mutex;
+    QemuCond cond;
     unsigned int count;
-#else
-    sem_t sem;
-#endif
-    bool initialized;
 };
 
 struct QemuEvent {
index d0a1a9597eb05a66df50de293f00fea7cb2f5173..d95af4498fc9e74bab49b2fab3bbfb12e7b8edfa 100644 (file)
@@ -18,12 +18,6 @@ struct QemuRecMutex {
     bool initialized;
 };
 
-void qemu_rec_mutex_destroy(QemuRecMutex *mutex);
-void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line);
-int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file,
-                                int line);
-void qemu_rec_mutex_unlock(QemuRecMutex *mutex);
-
 struct QemuCond {
     CONDITION_VARIABLE var;
     bool initialized;
index 54357631846fe6fbbe3b231e6e5d5a9c960549ac..dd3822d7cee9010fb4e07158ee86c429c30315f2 100644 (file)
@@ -3,6 +3,7 @@
 
 #include "qemu/processor.h"
 #include "qemu/atomic.h"
+#include "qemu/clang-tsa.h"
 
 typedef struct QemuCond QemuCond;
 typedef struct QemuSemaphore QemuSemaphore;
@@ -24,9 +25,18 @@ typedef struct QemuThread QemuThread;
 
 void qemu_mutex_init(QemuMutex *mutex);
 void qemu_mutex_destroy(QemuMutex *mutex);
-int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line);
-void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line);
-void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line);
+int TSA_NO_TSA qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file,
+                                       const int line);
+void TSA_NO_TSA qemu_mutex_lock_impl(QemuMutex *mutex, const char *file,
+                                     const int line);
+void TSA_NO_TSA qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file,
+                                       const int line);
+
+void qemu_rec_mutex_init(QemuRecMutex *mutex);
+void qemu_rec_mutex_destroy(QemuRecMutex *mutex);
+void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line);
+int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line);
+void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line);
 
 typedef void (*QemuMutexLockFunc)(QemuMutex *m, const char *f, int l);
 typedef int (*QemuMutexTrylockFunc)(QemuMutex *m, const char *f, int l);
@@ -104,6 +114,9 @@ extern QemuCondTimedWaitFunc qemu_cond_timedwait_func;
 #define qemu_mutex_unlock(mutex) \
         qemu_mutex_unlock_impl(mutex, __FILE__, __LINE__)
 
+#define qemu_rec_mutex_unlock(mutex) \
+        qemu_rec_mutex_unlock_impl(mutex, __FILE__, __LINE__)
+
 static inline void (qemu_mutex_lock)(QemuMutex *mutex)
 {
     qemu_mutex_lock(mutex);
@@ -129,8 +142,10 @@ static inline int (qemu_rec_mutex_trylock)(QemuRecMutex *mutex)
     return qemu_rec_mutex_trylock(mutex);
 }
 
-/* Prototypes for other functions are in thread-posix.h/thread-win32.h.  */
-void qemu_rec_mutex_init(QemuRecMutex *mutex);
+static inline void (qemu_rec_mutex_unlock)(QemuRecMutex *mutex)
+{
+    qemu_rec_mutex_unlock(mutex);
+}
 
 void qemu_cond_init(QemuCond *cond);
 void qemu_cond_destroy(QemuCond *cond);
@@ -142,8 +157,8 @@ void qemu_cond_destroy(QemuCond *cond);
  */
 void qemu_cond_signal(QemuCond *cond);
 void qemu_cond_broadcast(QemuCond *cond);
-void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex,
-                         const char *file, const int line);
+void TSA_NO_TSA qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex,
+                                    const char *file, const int line);
 bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
                               const char *file, const int line);
 
@@ -174,10 +189,14 @@ void qemu_event_destroy(QemuEvent *ev);
 void qemu_thread_create(QemuThread *thread, const char *name,
                         void *(*start_routine)(void *),
                         void *arg, int mode);
+int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
+                             unsigned long nbits);
+int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
+                             unsigned long *nbits);
 void *qemu_thread_join(QemuThread *thread);
 void qemu_thread_get_self(QemuThread *thread);
 bool qemu_thread_is_self(QemuThread *thread);
-void qemu_thread_exit(void *retval) QEMU_NORETURN;
+G_NORETURN void qemu_thread_exit(void *retval);
 void qemu_thread_naming(bool enable);
 
 struct Notifier;
@@ -216,17 +235,16 @@ struct QemuSpin {
 
 static inline void qemu_spin_init(QemuSpin *spin)
 {
-    __sync_lock_release(&spin->value);
+    qatomic_set(&spin->value, 0);
 #ifdef CONFIG_TSAN
     __tsan_mutex_create(spin, __tsan_mutex_not_static);
 #endif
 }
 
-/* const parameter because the only purpose here is the TSAN annotation */
-static inline void qemu_spin_destroy(const QemuSpin *spin)
+static inline void qemu_spin_destroy(QemuSpin *spin)
 {
 #ifdef CONFIG_TSAN
-    __tsan_mutex_destroy((void *)spin, __tsan_mutex_not_static);
+    __tsan_mutex_destroy(spin, __tsan_mutex_not_static);
 #endif
 }
 
@@ -235,7 +253,7 @@ static inline void qemu_spin_lock(QemuSpin *spin)
 #ifdef CONFIG_TSAN
     __tsan_mutex_pre_lock(spin, 0);
 #endif
-    while (unlikely(__sync_lock_test_and_set(&spin->value, true))) {
+    while (unlikely(qatomic_xchg(&spin->value, 1))) {
         while (qatomic_read(&spin->value)) {
             cpu_relax();
         }
@@ -250,7 +268,7 @@ static inline bool qemu_spin_trylock(QemuSpin *spin)
 #ifdef CONFIG_TSAN
     __tsan_mutex_pre_lock(spin, __tsan_mutex_try_lock);
 #endif
-    bool busy = __sync_lock_test_and_set(&spin->value, true);
+    bool busy = qatomic_xchg(&spin->value, true);
 #ifdef CONFIG_TSAN
     unsigned flags = __tsan_mutex_try_lock;
     flags |= busy ? __tsan_mutex_try_lock_failed : 0;
@@ -269,7 +287,7 @@ static inline void qemu_spin_unlock(QemuSpin *spin)
 #ifdef CONFIG_TSAN
     __tsan_mutex_pre_unlock(spin, 0);
 #endif
-    __sync_lock_release(&spin->value);
+    qatomic_store_release(&spin->value, 0);
 #ifdef CONFIG_TSAN
     __tsan_mutex_post_unlock(spin, 0);
 #endif
index 05f63461373ae947747c8deb940e03338173c5c0..181245d29bc4193d8d87a9429e45d129efd12320 100644 (file)
@@ -99,13 +99,18 @@ typedef struct ThrottleState {
     int64_t previous_leak;    /* timestamp of the last leak done */
 } ThrottleState;
 
+typedef enum {
+    THROTTLE_READ = 0,
+    THROTTLE_WRITE,
+    THROTTLE_MAX
+} ThrottleDirection;
+
 typedef struct ThrottleTimers {
-    QEMUTimer *timers[2];     /* timers used to do the throttling */
+    QEMUTimer *timers[THROTTLE_MAX];    /* timers used to do the throttling */
     QEMUClockType clock_type; /* the clock used */
 
     /* Callbacks */
-    QEMUTimerCB *read_timer_cb;
-    QEMUTimerCB *write_timer_cb;
+    QEMUTimerCB *timer_cb[THROTTLE_MAX];
     void *timer_opaque;
 } ThrottleTimers;
 
@@ -149,9 +154,10 @@ void throttle_config_init(ThrottleConfig *cfg);
 /* usage */
 bool throttle_schedule_timer(ThrottleState *ts,
                              ThrottleTimers *tt,
-                             bool is_write);
+                             ThrottleDirection direction);
 
-void throttle_account(ThrottleState *ts, bool is_write, uint64_t size);
+void throttle_account(ThrottleState *ts, ThrottleDirection direction,
+                      uint64_t size);
 void throttle_limits_to_config(ThrottleLimits *arg, ThrottleConfig *cfg,
                                Error **errp);
 void throttle_config_to_limits(ThrottleConfig *cfg, ThrottleLimits *var);
index bdecc5b41fe516d4931408b12dea3dee97f09f99..9a366e551fb36670112e4c21a62cfe41e9f530da 100644 (file)
@@ -520,7 +520,7 @@ static inline QEMUTimer *timer_new_full(QEMUTimerListGroup *timer_list_group,
                                         int scale, int attributes,
                                         QEMUTimerCB *cb, void *opaque)
 {
-    QEMUTimer *ts = g_malloc0(sizeof(QEMUTimer));
+    QEMUTimer *ts = g_new0(QEMUTimer, 1);
     timer_init_full(ts, timer_list_group, type, scale, attributes, cb, opaque);
     return ts;
 }
@@ -609,17 +609,6 @@ static inline QEMUTimer *timer_new_ms(QEMUClockType type, QEMUTimerCB *cb,
  */
 void timer_deinit(QEMUTimer *ts);
 
-/**
- * timer_free:
- * @ts: the timer
- *
- * Free a timer (it must not be on the active list)
- */
-static inline void timer_free(QEMUTimer *ts)
-{
-    g_free(ts);
-}
-
 /**
  * timer_del:
  * @ts: the timer
@@ -631,6 +620,21 @@ static inline void timer_free(QEMUTimer *ts)
  */
 void timer_del(QEMUTimer *ts);
 
+/**
+ * timer_free:
+ * @ts: the timer
+ *
+ * Free a timer. This will call timer_del() for you to remove
+ * the timer from the active list if it was still active.
+ */
+static inline void timer_free(QEMUTimer *ts)
+{
+    if (ts) {
+        timer_del(ts);
+        g_free(ts);
+    }
+}
+
 /**
  * timer_mod_ns:
  * @ts: the timer
@@ -806,6 +810,8 @@ static inline int64_t get_clock_realtime(void)
     return tv.tv_sec * 1000000000LL + (tv.tv_usec * 1000);
 }
 
+extern int64_t clock_start;
+
 /* Warning: don't insert tracepoints into these functions, they are
    also used by simpletrace backend and tracepoints would cause
    an infinite recursion! */
@@ -973,6 +979,28 @@ static inline int64_t cpu_get_host_ticks(void)
     return cur - ofs;
 }
 
+#elif defined(__riscv) && __riscv_xlen == 32
+static inline int64_t cpu_get_host_ticks(void)
+{
+    uint32_t lo, hi, tmph;
+    do {
+        asm volatile("RDTIMEH %0\n\t"
+                     "RDTIME %1\n\t"
+                     "RDTIMEH %2"
+                     : "=r"(hi), "=r"(lo), "=r"(tmph));
+    } while (unlikely(tmph != hi));
+    return lo | (uint64_t)hi << 32;
+}
+
+#elif defined(__riscv) && __riscv_xlen > 32
+static inline int64_t cpu_get_host_ticks(void)
+{
+    int64_t val;
+
+    asm volatile("RDTIME %0" : "=r"(val));
+    return val;
+}
+
 #else
 /* The host CPU doesn't have an easily accessible cycle counter.
    Just return a monotonically increasing value.  This will be
@@ -983,13 +1011,4 @@ static inline int64_t cpu_get_host_ticks(void)
 }
 #endif
 
-#ifdef CONFIG_PROFILER
-static inline int64_t profile_getclock(void)
-{
-    return get_clock();
-}
-
-extern int64_t dev_time;
-#endif
-
 #endif
diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h
new file mode 100644 (file)
index 0000000..2f2060a
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Simple transactions API
+ *
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * = Generic transaction API =
+ *
+ * The intended usage is the following: you create "prepare" functions, which
+ * represents the actions. They will usually have Transaction* argument, and
+ * call tran_add() to register finalization callbacks. For finalization
+ * callbacks, prepare corresponding TransactionActionDrv structures.
+ *
+ * Then, when you need to make a transaction, create an empty Transaction by
+ * tran_create(), call your "prepare" functions on it, and finally call
+ * tran_abort() or tran_commit() to finalize the transaction by corresponding
+ * finalization actions in reverse order.
+ *
+ * The clean() functions registered by the drivers in a transaction are called
+ * last, after all abort() or commit() functions have been called.
+ */
+
+#ifndef QEMU_TRANSACTIONS_H
+#define QEMU_TRANSACTIONS_H
+
+#include <gmodule.h>
+
+typedef struct TransactionActionDrv {
+    void (*abort)(void *opaque);
+    void (*commit)(void *opaque);
+    void (*clean)(void *opaque);
+} TransactionActionDrv;
+
+typedef struct Transaction Transaction;
+
+Transaction *tran_new(void);
+void tran_add(Transaction *tran, TransactionActionDrv *drv, void *opaque);
+void tran_abort(Transaction *tran);
+void tran_commit(Transaction *tran);
+
+static inline void tran_finalize(Transaction *tran, int ret)
+{
+    if (ret < 0) {
+        tran_abort(tran);
+    } else {
+        tran_commit(tran);
+    }
+}
+
+#endif /* QEMU_TRANSACTIONS_H */
index 6281eae3b55ddcf989c65f6326e09de1931e0c21..5abdbc3874796bad57e0210d2cdd74f8cda8a611 100644 (file)
  * Incomplete struct types
  * Please keep this list in case-insensitive alphabetical order.
  */
+typedef struct AccelCPUState AccelCPUState;
+typedef struct AccelState AccelState;
 typedef struct AdapterInfo AdapterInfo;
 typedef struct AddressSpace AddressSpace;
 typedef struct AioContext AioContext;
 typedef struct Aml Aml;
 typedef struct AnnounceTimer AnnounceTimer;
+typedef struct ArchCPU ArchCPU;
 typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
 typedef struct BdrvDirtyBitmapIter BdrvDirtyBitmapIter;
 typedef struct BlockBackend BlockBackend;
@@ -34,15 +37,21 @@ typedef struct BlockDriverState BlockDriverState;
 typedef struct BusClass BusClass;
 typedef struct BusState BusState;
 typedef struct Chardev Chardev;
+typedef struct Clock Clock;
 typedef struct CompatProperty CompatProperty;
-typedef struct CoMutex CoMutex;
+typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
 typedef struct CPUAddressSpace CPUAddressSpace;
+typedef struct CPUArchState CPUArchState;
+typedef struct CpuInfoFast CpuInfoFast;
+typedef struct CPUJumpCache CPUJumpCache;
 typedef struct CPUState CPUState;
+typedef struct CPUTLBEntryFull CPUTLBEntryFull;
 typedef struct DeviceListener DeviceListener;
 typedef struct DeviceState DeviceState;
 typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot;
 typedef struct DisplayChangeListener DisplayChangeListener;
 typedef struct DriveInfo DriveInfo;
+typedef struct DumpState DumpState;
 typedef struct Error Error;
 typedef struct EventNotifier EventNotifier;
 typedef struct FlatView FlatView;
@@ -50,6 +59,7 @@ typedef struct FWCfgEntry FWCfgEntry;
 typedef struct FWCfgIoState FWCfgIoState;
 typedef struct FWCfgMemState FWCfgMemState;
 typedef struct FWCfgState FWCfgState;
+typedef struct GraphicHwOps GraphicHwOps;
 typedef struct HostMemoryBackend HostMemoryBackend;
 typedef struct I2CBus I2CBus;
 typedef struct I2SCodec I2SCodec;
@@ -57,8 +67,8 @@ typedef struct IOMMUMemoryRegion IOMMUMemoryRegion;
 typedef struct ISABus ISABus;
 typedef struct ISADevice ISADevice;
 typedef struct IsaDma IsaDma;
+typedef struct JSONWriter JSONWriter;
 typedef struct MACAddr MACAddr;
-typedef struct ReservedRegion ReservedRegion;
 typedef struct MachineClass MachineClass;
 typedef struct MachineState MachineState;
 typedef struct MemoryListener MemoryListener;
@@ -86,10 +96,13 @@ typedef struct PCIEAERLog PCIEAERLog;
 typedef struct PCIEAERMsg PCIEAERMsg;
 typedef struct PCIEPort PCIEPort;
 typedef struct PCIESlot PCIESlot;
+typedef struct PCIESriovPF PCIESriovPF;
+typedef struct PCIESriovVF PCIESriovVF;
 typedef struct PCIExpressDevice PCIExpressDevice;
 typedef struct PCIExpressHost PCIExpressHost;
 typedef struct PCIHostDeviceAddress PCIHostDeviceAddress;
 typedef struct PCIHostState PCIHostState;
+typedef struct PICCommonState PICCommonState;
 typedef struct PostcopyDiscardState PostcopyDiscardState;
 typedef struct Property Property;
 typedef struct PropertyInfo PropertyInfo;
@@ -97,6 +110,7 @@ typedef struct QBool QBool;
 typedef struct QDict QDict;
 typedef struct QEMUBH QEMUBH;
 typedef struct QemuConsole QemuConsole;
+typedef struct QEMUCursor QEMUCursor;
 typedef struct QEMUFile QEMUFile;
 typedef struct QemuLockable QemuLockable;
 typedef struct QemuMutex QemuMutex;
@@ -107,7 +121,6 @@ typedef struct QEMUSGList QEMUSGList;
 typedef struct QemuSpin QemuSpin;
 typedef struct QEMUTimer QEMUTimer;
 typedef struct QEMUTimerListGroup QEMUTimerListGroup;
-typedef struct QJSON QJSON;
 typedef struct QList QList;
 typedef struct QNull QNull;
 typedef struct QNum QNum;
@@ -115,9 +128,11 @@ typedef struct QObject QObject;
 typedef struct QString QString;
 typedef struct RAMBlock RAMBlock;
 typedef struct Range Range;
-typedef struct SavedIOTLB SavedIOTLB;
+typedef struct ReservedRegion ReservedRegion;
 typedef struct SHPCDevice SHPCDevice;
 typedef struct SSIBus SSIBus;
+typedef struct TCGHelperInfo TCGHelperInfo;
+typedef struct TranslationBlock TranslationBlock;
 typedef struct VirtIODevice VirtIODevice;
 typedef struct Visitor Visitor;
 typedef struct VMChangeStateEntry VMChangeStateEntry;
index d201c61260de35bc7b0dac7780d273f932510649..1855b764f2abd20af7dc86fa2fd09fdcb3023783 100644 (file)
@@ -41,8 +41,7 @@
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ * License along with this library. If not, see <https://www.gnu.org/licenses/>.
  *
  * Authors:
  *    Richard W.M. Jones <rjones@redhat.com>
 #ifndef QEMU_URI_H
 #define QEMU_URI_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /**
  * URI:
  *
@@ -64,16 +59,16 @@ extern "C" {
  * as described in RFC 2396 but separated for further processing.
  */
 typedef struct URI {
-    char *scheme;      /* the URI scheme */
-    char *opaque;      /* opaque part */
-    char *authority;   /* the authority part */
-    char *server;      /* the server part */
-    char *user;                /* the user part */
-    int port;          /* the port number */
-    char *path;                /* the path string */
-    char *fragment;    /* the fragment identifier */
-    int  cleanup;      /* parsing potentially unclean URI */
-    char *query;       /* the query string (as it appears in the URI) */
+    char *scheme;      /* the URI scheme */
+    char *opaque;      /* opaque part */
+    char *authority;   /* the authority part */
+    char *server;      /* the server part */
+    char *user;        /* the user part */
+    int port;          /* the port number */
+    char *path;        /* the path string */
+    char *fragment;    /* the fragment identifier */
+    int cleanup;       /* parsing potentially unclean URI */
+    char *query;       /* the query string (as it appears in the URI) */
 } URI;
 
 URI *uri_new(void);
@@ -89,23 +84,20 @@ void uri_free(URI *uri);
 
 /* Single web service query parameter 'name=value'. */
 typedef struct QueryParam {
-  char *name;                  /* Name (unescaped). */
-  char *value;                 /* Value (unescaped). */
-  int ignore;                  /* Ignore this field in qparam_get_query */
+  char *name;          /* Name (unescaped). */
+  char *value;         /* Value (unescaped). */
+  int ignore;          /* Ignore this field in qparam_get_query */
 } QueryParam;
 
 /* Set of parameters. */
 typedef struct QueryParams {
-  int n;                       /* number of parameters used */
-  int alloc;                   /* allocated space */
-  QueryParam *p;               /* array of parameters */
+  int n;               /* number of parameters used */
+  int alloc;           /* allocated space */
+  QueryParam *p;       /* array of parameters */
 } QueryParams;
 
-struct QueryParams *query_params_new (int init_alloc);
-extern QueryParams *query_params_parse (const char *query);
-extern void query_params_free (QueryParams *ps);
+QueryParams *query_params_new(int init_alloc);
+QueryParams *query_params_parse(const char *query);
+void query_params_free(QueryParams *ps);
 
-#ifdef __cplusplus
-}
-#endif
 #endif /* QEMU_URI_H */
diff --git a/include/qemu/userfaultfd.h b/include/qemu/userfaultfd.h
new file mode 100644 (file)
index 0000000..18a4314
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Linux UFFD-WP support
+ *
+ * Copyright Virtuozzo GmbH, 2020
+ *
+ * Authors:
+ *  Andrey Gruzdev   <andrey.gruzdev@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef USERFAULTFD_H
+#define USERFAULTFD_H
+
+#ifdef CONFIG_LINUX
+
+#include "exec/hwaddr.h"
+#include <linux/userfaultfd.h>
+
+/**
+ * uffd_open(): Open an userfaultfd handle for current context.
+ *
+ * @flags: The flags we want to pass in when creating the handle.
+ *
+ * Returns: the uffd handle if >=0, or <0 if error happens.
+ */
+int uffd_open(int flags);
+int uffd_query_features(uint64_t *features);
+int uffd_create_fd(uint64_t features, bool non_blocking);
+void uffd_close_fd(int uffd_fd);
+int uffd_register_memory(int uffd_fd, void *addr, uint64_t length,
+        uint64_t mode, uint64_t *ioctls);
+int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length);
+int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
+        bool wp, bool dont_wake);
+int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
+        uint64_t length, bool dont_wake);
+int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake);
+int uffd_wakeup(int uffd_fd, void *addr, uint64_t length);
+int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count);
+bool uffd_poll_events(int uffd_fd, int tmo);
+
+#endif /* CONFIG_LINUX */
+
+#endif /* USERFAULTFD_H */
index 9925febfa54d3d5dc8e81b114a37c3dd19a7613d..869f84af09ddc4aa80d53e133c323781ade4d190 100644 (file)
@@ -61,14 +61,27 @@ typedef struct {
     (clock_seq_hi_and_reserved), (clock_seq_low), (node0), (node1), (node2),\
     (node3), (node4), (node5) }
 
+/* Normal (network byte order) UUID */
+#define UUID(time_low, time_mid, time_hi_and_version,                    \
+  clock_seq_hi_and_reserved, clock_seq_low, node0, node1, node2,         \
+  node3, node4, node5)                                                   \
+  { ((time_low) >> 24) & 0xff, ((time_low) >> 16) & 0xff,                \
+    ((time_low) >> 8) & 0xff, (time_low) & 0xff,                         \
+    ((time_mid) >> 8) & 0xff, (time_mid) & 0xff,                         \
+    ((time_hi_and_version) >> 8) & 0xff, (time_hi_and_version) & 0xff,   \
+    (clock_seq_hi_and_reserved), (clock_seq_low),                        \
+    (node0), (node1), (node2), (node3), (node4), (node5)                 \
+  }
+
 #define UUID_FMT "%02hhx%02hhx%02hhx%02hhx-" \
                  "%02hhx%02hhx-%02hhx%02hhx-" \
                  "%02hhx%02hhx-" \
                  "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
 
-#define UUID_FMT_LEN 36
-
 #define UUID_NONE "00000000-0000-0000-0000-000000000000"
+QEMU_BUILD_BUG_ON(sizeof(UUID_NONE) - 1 != 36);
+
+#define UUID_STR_LEN sizeof(UUID_NONE)
 
 void qemu_uuid_generate(QemuUUID *out);
 
@@ -84,4 +97,6 @@ int qemu_uuid_parse(const char *str, QemuUUID *uuid);
 
 QemuUUID qemu_uuid_bswap(QemuUUID uuid);
 
+uint32_t qemu_uuid_hash(const void *uuid);
+
 #endif
index 4491c8e1a6e9b5a0fc5a9d2bb185a9daccd9224b..bde9495b25476bb7d2461aa53870ab5d25d6f00e 100644 (file)
@@ -18,7 +18,7 @@ typedef struct QEMUVFIOState QEMUVFIOState;
 QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp);
 void qemu_vfio_close(QEMUVFIOState *s);
 int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
-                      bool temporary, uint64_t *iova_list);
+                      bool temporary, uint64_t *iova_list, Error **errp);
 int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s);
 void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host);
 void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
index 0da4c2cc4ccc6545e459d4454277dfa963e9308d..0417ec05332efb4c730dda48f80b22e2f7d3829d 100644 (file)
 #ifndef VHOST_USER_SERVER_H
 #define VHOST_USER_SERVER_H
 
-#include "contrib/libvhost-user/libvhost-user.h"
+#include "subprojects/libvhost-user/libvhost-user.h" /* only for the type definitions */
 #include "io/channel-socket.h"
 #include "io/channel-file.h"
 #include "io/net-listener.h"
-#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "standard-headers/linux/virtio_blk.h"
 
@@ -41,7 +40,12 @@ typedef struct {
     int max_queues;
     const VuDevIface *vu_iface;
 
+    unsigned int in_flight; /* atomic */
+
     /* Protected by ctx lock */
+    bool in_qio_channel_yield;
+    bool wait_idle;
+    bool quiescing;
     VuDev vu_dev;
     QIOChannel *ioc; /* The I/O channel with the client */
     QIOChannelSocket *sioc; /* The underlying data channel with the client */
@@ -59,6 +63,10 @@ bool vhost_user_server_start(VuServer *server,
 
 void vhost_user_server_stop(VuServer *server);
 
+void vhost_user_server_inc_in_flight(VuServer *server);
+void vhost_user_server_dec_in_flight(VuServer *server);
+bool vhost_user_server_has_in_flight(VuServer *server);
+
 void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
 void vhost_user_server_detach_aio_context(VuServer *server);
 
index 145096e8ee79dcf4d89cbdc9a716a95e892c094b..73a44e2408c27147a410b69e4e2bc1a7e676ee43 100644 (file)
 #ifndef QEMU_WIN_DUMP_DEFS_H
 #define QEMU_WIN_DUMP_DEFS_H
 
+typedef struct WinDumpPhyMemRun32 {
+    uint32_t BasePage;
+    uint32_t PageCount;
+} QEMU_PACKED WinDumpPhyMemRun32;
+
 typedef struct WinDumpPhyMemRun64 {
     uint64_t BasePage;
     uint64_t PageCount;
 } QEMU_PACKED WinDumpPhyMemRun64;
 
+typedef struct WinDumpPhyMemDesc32 {
+    uint32_t NumberOfRuns;
+    uint32_t NumberOfPages;
+    WinDumpPhyMemRun32 Run[86];
+} QEMU_PACKED WinDumpPhyMemDesc32;
+
 typedef struct WinDumpPhyMemDesc64 {
     uint32_t NumberOfRuns;
     uint32_t unused;
@@ -33,6 +44,39 @@ typedef struct WinDumpExceptionRecord {
     uint64_t ExceptionInformation[15];
 } QEMU_PACKED WinDumpExceptionRecord;
 
+typedef struct WinDumpHeader32 {
+    char Signature[4];
+    char ValidDump[4];
+    uint32_t MajorVersion;
+    uint32_t MinorVersion;
+    uint32_t DirectoryTableBase;
+    uint32_t PfnDatabase;
+    uint32_t PsLoadedModuleList;
+    uint32_t PsActiveProcessHead;
+    uint32_t MachineImageType;
+    uint32_t NumberProcessors;
+    union {
+        struct {
+            uint32_t BugcheckCode;
+            uint32_t BugcheckParameter1;
+            uint32_t BugcheckParameter2;
+            uint32_t BugcheckParameter3;
+            uint32_t BugcheckParameter4;
+        };
+        uint8_t BugcheckData[20];
+    };
+    uint8_t VersionUser[32];
+    uint32_t reserved0;
+    uint32_t KdDebuggerDataBlock;
+    union {
+        WinDumpPhyMemDesc32 PhysicalMemoryBlock;
+        uint8_t PhysicalMemoryBlockBuffer[700];
+    };
+    uint8_t reserved1[3200];
+    uint32_t RequiredDumpSpace;
+    uint8_t reserved2[92];
+} QEMU_PACKED WinDumpHeader32;
+
 typedef struct WinDumpHeader64 {
     char Signature[4];
     char ValidDump[4];
@@ -81,24 +125,48 @@ typedef struct WinDumpHeader64 {
     uint8_t reserved[4018];
 } QEMU_PACKED WinDumpHeader64;
 
+typedef union WinDumpHeader {
+    struct {
+        char Signature[4];
+        char ValidDump[4];
+    };
+    WinDumpHeader32 x32;
+    WinDumpHeader64 x64;
+} WinDumpHeader;
+
 #define KDBG_OWNER_TAG_OFFSET64             0x10
 #define KDBG_MM_PFN_DATABASE_OFFSET64       0xC0
 #define KDBG_KI_BUGCHECK_DATA_OFFSET64      0x88
 #define KDBG_KI_PROCESSOR_BLOCK_OFFSET64    0x218
 #define KDBG_OFFSET_PRCB_CONTEXT_OFFSET64   0x338
 
+#define KDBG_OWNER_TAG_OFFSET           KDBG_OWNER_TAG_OFFSET64
+#define KDBG_MM_PFN_DATABASE_OFFSET     KDBG_MM_PFN_DATABASE_OFFSET64
+#define KDBG_KI_BUGCHECK_DATA_OFFSET    KDBG_KI_BUGCHECK_DATA_OFFSET64
+#define KDBG_KI_PROCESSOR_BLOCK_OFFSET  KDBG_KI_PROCESSOR_BLOCK_OFFSET64
+#define KDBG_OFFSET_PRCB_CONTEXT_OFFSET KDBG_OFFSET_PRCB_CONTEXT_OFFSET64
+
 #define VMCOREINFO_ELF_NOTE_HDR_SIZE    24
+#define VMCOREINFO_WIN_DUMP_NOTE_SIZE64 (sizeof(WinDumpHeader64) + \
+                                         VMCOREINFO_ELF_NOTE_HDR_SIZE)
+#define VMCOREINFO_WIN_DUMP_NOTE_SIZE32 (sizeof(WinDumpHeader32) + \
+                                         VMCOREINFO_ELF_NOTE_HDR_SIZE)
 
 #define WIN_CTX_X64 0x00100000L
+#define WIN_CTX_X86 0x00010000L
 
 #define WIN_CTX_CTL 0x00000001L
 #define WIN_CTX_INT 0x00000002L
 #define WIN_CTX_SEG 0x00000004L
 #define WIN_CTX_FP  0x00000008L
 #define WIN_CTX_DBG 0x00000010L
+#define WIN_CTX_EXT 0x00000020L
+
+#define WIN_CTX64_FULL  (WIN_CTX_X64 | WIN_CTX_CTL | WIN_CTX_INT | WIN_CTX_FP)
+#define WIN_CTX64_ALL   (WIN_CTX64_FULL | WIN_CTX_SEG | WIN_CTX_DBG)
 
-#define WIN_CTX_FULL    (WIN_CTX_X64 | WIN_CTX_CTL | WIN_CTX_INT | WIN_CTX_FP)
-#define WIN_CTX_ALL     (WIN_CTX_FULL | WIN_CTX_SEG | WIN_CTX_DBG)
+#define WIN_CTX32_FULL (WIN_CTX_X86 | WIN_CTX_CTL | WIN_CTX_INT | WIN_CTX_SEG)
+#define WIN_CTX32_ALL (WIN_CTX32_FULL | WIN_CTX_FP | WIN_CTX_DBG | WIN_CTX_EXT)
 
 #define LIVE_SYSTEM_DUMP    0x00000161
 
@@ -107,7 +175,41 @@ typedef struct WinM128A {
     int64_t high;
 } QEMU_ALIGNED(16) WinM128A;
 
-typedef struct WinContext {
+typedef struct WinContext32 {
+    uint32_t ContextFlags;
+
+    uint32_t Dr0;
+    uint32_t Dr1;
+    uint32_t Dr2;
+    uint32_t Dr3;
+    uint32_t Dr6;
+    uint32_t Dr7;
+
+    uint8_t  FloatSave[112];
+
+    uint32_t SegGs;
+    uint32_t SegFs;
+    uint32_t SegEs;
+    uint32_t SegDs;
+
+    uint32_t Edi;
+    uint32_t Esi;
+    uint32_t Ebx;
+    uint32_t Edx;
+    uint32_t Ecx;
+    uint32_t Eax;
+
+    uint32_t Ebp;
+    uint32_t Eip;
+    uint32_t SegCs;
+    uint32_t EFlags;
+    uint32_t Esp;
+    uint32_t SegSs;
+
+    uint8_t ExtendedRegisters[512];
+} QEMU_ALIGNED(16) WinContext32;
+
+typedef struct WinContext64 {
     uint64_t PHome[6];
 
     uint32_t ContextFlags;
@@ -174,6 +276,11 @@ typedef struct WinContext {
     uint64_t LastBranchFromRip;
     uint64_t LastExceptionToRip;
     uint64_t LastExceptionFromRip;
-} QEMU_ALIGNED(16) WinContext;
+} QEMU_ALIGNED(16) WinContext64;
+
+typedef union WinContext {
+    WinContext32 x32;
+    WinContext64 x64;
+} WinContext;
 
 #endif /* QEMU_WIN_DUMP_DEFS_H */
index a83fe8e749bd72df7e7258d40cd58a05319ab608..b08a934acc2db68d5f90c72e71148c898ef39431 100644 (file)
 #ifdef CONFIG_LIBATTR
 #  include <attr/xattr.h>
 #else
-#  define ENOATTR ENODATA
-#  include <sys/xattr.h>
+#  if !defined(ENOATTR)
+#    define ENOATTR ENODATA
+#  endif
+#  ifndef CONFIG_WIN32
+#    include <sys/xattr.h>
+#  endif
 #endif
 
 #endif
index 076f1f605447930f466f794ba61b17b514136ee5..0259bbef187ab579e4b78e5e0165d7b5f505af2c 100644 (file)
@@ -48,8 +48,8 @@
  * xxhash32, customized for input variables that are not guaranteed to be
  * contiguous in memory.
  */
-static inline uint32_t
-qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
+static inline uint32_t qemu_xxhash8(uint64_t ab, uint64_t cd, uint64_t ef,
+                                    uint32_t g, uint32_t h)
 {
     uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2;
     uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2;
@@ -59,6 +59,8 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
     uint32_t b = ab >> 32;
     uint32_t c = cd;
     uint32_t d = cd >> 32;
+    uint32_t e = ef;
+    uint32_t f = ef >> 32;
     uint32_t h32;
 
     v1 += a * PRIME32_2;
@@ -89,6 +91,9 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
     h32 += g * PRIME32_3;
     h32  = rol32(h32, 17) * PRIME32_4;
 
+    h32 += h * PRIME32_3;
+    h32  = rol32(h32, 17) * PRIME32_4;
+
     h32 ^= h32 >> 15;
     h32 *= PRIME32_2;
     h32 ^= h32 >> 13;
@@ -100,23 +105,127 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
 
 static inline uint32_t qemu_xxhash2(uint64_t ab)
 {
-    return qemu_xxhash7(ab, 0, 0, 0, 0);
+    return qemu_xxhash8(ab, 0, 0, 0, 0);
 }
 
 static inline uint32_t qemu_xxhash4(uint64_t ab, uint64_t cd)
 {
-    return qemu_xxhash7(ab, cd, 0, 0, 0);
+    return qemu_xxhash8(ab, cd, 0, 0, 0);
 }
 
 static inline uint32_t qemu_xxhash5(uint64_t ab, uint64_t cd, uint32_t e)
 {
-    return qemu_xxhash7(ab, cd, e, 0, 0);
+    return qemu_xxhash8(ab, cd, 0, e, 0);
 }
 
 static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e,
                                     uint32_t f)
 {
-    return qemu_xxhash7(ab, cd, e, f, 0);
+    return qemu_xxhash8(ab, cd, 0, e, f);
+}
+
+static inline uint32_t qemu_xxhash7(uint64_t ab, uint64_t cd, uint64_t ef,
+                                    uint32_t g)
+{
+    return qemu_xxhash8(ab, cd, ef, g, 0);
+}
+
+/*
+ * Component parts of the XXH64 algorithm from
+ * https://github.com/Cyan4973/xxHash/blob/v0.8.0/xxhash.h
+ *
+ * The complete algorithm looks like
+ *
+ *  i = 0;
+ *  if (len >= 32) {
+ *      v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+ *      v2 = seed + XXH_PRIME64_2;
+ *      v3 = seed + 0;
+ *      v4 = seed - XXH_PRIME64_1;
+ *      do {
+ *          v1 = XXH64_round(v1, get64bits(input + i));
+ *          v2 = XXH64_round(v2, get64bits(input + i + 8));
+ *          v3 = XXH64_round(v3, get64bits(input + i + 16));
+ *          v4 = XXH64_round(v4, get64bits(input + i + 24));
+ *      } while ((i += 32) <= len);
+ *      h64 = XXH64_mergerounds(v1, v2, v3, v4);
+ *  } else {
+ *      h64 = seed + XXH_PRIME64_5;
+ *  }
+ *  h64 += len;
+ *
+ *  for (; i + 8 <= len; i += 8) {
+ *      h64 ^= XXH64_round(0, get64bits(input + i));
+ *      h64 = rol64(h64, 27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+ *  }
+ *  for (; i + 4 <= len; i += 4) {
+ *      h64 ^= get32bits(input + i) * PRIME64_1;
+ *      h64 = rol64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+ *  }
+ *  for (; i < len; i += 1) {
+ *      h64 ^= get8bits(input + i) * XXH_PRIME64_5;
+ *      h64 = rol64(h64, 11) * XXH_PRIME64_1;
+ *  }
+ *
+ *  return XXH64_avalanche(h64)
+ *
+ * Exposing the pieces instead allows for simplified usage when
+ * the length is a known constant and the inputs are in registers.
+ */
+#define XXH_PRIME64_1   0x9E3779B185EBCA87ULL
+#define XXH_PRIME64_2   0xC2B2AE3D27D4EB4FULL
+#define XXH_PRIME64_3   0x165667B19E3779F9ULL
+#define XXH_PRIME64_4   0x85EBCA77C2B2AE63ULL
+#define XXH_PRIME64_5   0x27D4EB2F165667C5ULL
+
+static inline uint64_t XXH64_round(uint64_t acc, uint64_t input)
+{
+    return rol64(acc + input * XXH_PRIME64_2, 31) * XXH_PRIME64_1;
+}
+
+static inline uint64_t XXH64_mergeround(uint64_t acc, uint64_t val)
+{
+    return (acc ^ XXH64_round(0, val)) * XXH_PRIME64_1 + XXH_PRIME64_4;
+}
+
+static inline uint64_t XXH64_mergerounds(uint64_t v1, uint64_t v2,
+                                         uint64_t v3, uint64_t v4)
+{
+    uint64_t h64;
+
+    h64 = rol64(v1, 1) + rol64(v2, 7) + rol64(v3, 12) + rol64(v4, 18);
+    h64 = XXH64_mergeround(h64, v1);
+    h64 = XXH64_mergeround(h64, v2);
+    h64 = XXH64_mergeround(h64, v3);
+    h64 = XXH64_mergeround(h64, v4);
+
+    return h64;
+}
+
+static inline uint64_t XXH64_avalanche(uint64_t h64)
+{
+    h64 ^= h64 >> 33;
+    h64 *= XXH_PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= XXH_PRIME64_3;
+    h64 ^= h64 >> 32;
+    return h64;
+}
+
+static inline uint64_t qemu_xxhash64_4(uint64_t a, uint64_t b,
+                                       uint64_t c, uint64_t d)
+{
+    uint64_t v1 = QEMU_XXHASH_SEED + XXH_PRIME64_1 + XXH_PRIME64_2;
+    uint64_t v2 = QEMU_XXHASH_SEED + XXH_PRIME64_2;
+    uint64_t v3 = QEMU_XXHASH_SEED + 0;
+    uint64_t v4 = QEMU_XXHASH_SEED - XXH_PRIME64_1;
+
+    v1 = XXH64_round(v1, a);
+    v2 = XXH64_round(v2, b);
+    v3 = XXH64_round(v3, c);
+    v4 = XXH64_round(v4, d);
+
+    return XXH64_avalanche(XXH64_mergerounds(v1, v2, v3, v4));
 }
 
 #endif /* QEMU_XXHASH_H */
diff --git a/include/qemu/yank.h b/include/qemu/yank.h
new file mode 100644 (file)
index 0000000..1907150
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * QEMU yank feature
+ *
+ * Copyright (c) Lukas Straub <lukasstraub2@web.de>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef YANK_H
+#define YANK_H
+
+#include "qapi/qapi-types-yank.h"
+
+typedef void (YankFn)(void *opaque);
+
+/**
+ * yank_register_instance: Register a new instance.
+ *
+ * This registers a new instance for yanking. Must be called before any yank
+ * function is registered for this instance.
+ *
+ * This function is thread-safe.
+ *
+ * @instance: The instance.
+ * @errp: Error object.
+ *
+ * Returns true on success or false if an error occurred.
+ */
+bool yank_register_instance(const YankInstance *instance, Error **errp);
+
+/**
+ * yank_unregister_instance: Unregister a instance.
+ *
+ * This unregisters a instance. Must be called only after every yank function
+ * of the instance has been unregistered.
+ *
+ * This function is thread-safe.
+ *
+ * @instance: The instance.
+ */
+void yank_unregister_instance(const YankInstance *instance);
+
+/**
+ * yank_register_function: Register a yank function
+ *
+ * This registers a yank function. All limitations of qmp oob commands apply
+ * to the yank function as well. See docs/devel/qapi-code-gen.txt under
+ * "An OOB-capable command handler must satisfy the following conditions".
+ *
+ * This function is thread-safe.
+ *
+ * @instance: The instance.
+ * @func: The yank function.
+ * @opaque: Will be passed to the yank function.
+ */
+void yank_register_function(const YankInstance *instance,
+                            YankFn *func,
+                            void *opaque);
+
+/**
+ * yank_unregister_function: Unregister a yank function
+ *
+ * This unregisters a yank function.
+ *
+ * This function is thread-safe.
+ *
+ * @instance: The instance.
+ * @func: func that was passed to yank_register_function.
+ * @opaque: opaque that was passed to yank_register_function.
+ */
+void yank_unregister_function(const YankInstance *instance,
+                              YankFn *func,
+                              void *opaque);
+
+#define BLOCKDEV_YANK_INSTANCE(the_node_name) (&(YankInstance) { \
+        .type = YANK_INSTANCE_TYPE_BLOCK_NODE, \
+        .u.block_node.node_name = (the_node_name) })
+
+#define CHARDEV_YANK_INSTANCE(the_id) (&(YankInstance) { \
+        .type = YANK_INSTANCE_TYPE_CHARDEV, \
+        .u.chardev.id = (the_id) })
+
+#define MIGRATION_YANK_INSTANCE (&(YankInstance) { \
+        .type = YANK_INSTANCE_TYPE_MIGRATION })
+
+#endif
index d378f13a116a6845966489e7436ee25329492743..afccd24ca7ab7197dc7169ba4394d3957657d1fa 100644 (file)
@@ -16,7 +16,6 @@
 
 #include "qapi/qapi-builtin-types.h"
 #include "qemu/module.h"
-#include "qom/object.h"
 
 struct TypeImpl;
 typedef struct TypeImpl *Type;
@@ -616,7 +615,7 @@ Object *object_new_with_props(const char *typename,
                               Object *parent,
                               const char *id,
                               Error **errp,
-                              ...) QEMU_SENTINEL;
+                              ...) G_GNUC_NULL_TERMINATED;
 
 /**
  * object_new_with_propv:
@@ -638,7 +637,8 @@ bool object_apply_global_props(Object *obj, const GPtrArray *props,
                                Error **errp);
 void object_set_machine_compat_props(GPtrArray *compat_props);
 void object_set_accelerator_compat_props(GPtrArray *compat_props);
-void object_register_sugar_prop(const char *driver, const char *prop, const char *value);
+void object_register_sugar_prop(const char *driver, const char *prop,
+                                const char *value, bool optional);
 void object_apply_compat_props(Object *obj);
 
 /**
@@ -675,7 +675,7 @@ void object_apply_compat_props(Object *obj);
  *
  * Returns: %true on success, %false on error.
  */
-bool object_set_props(Object *obj, Error **errp, ...) QEMU_SENTINEL;
+bool object_set_props(Object *obj, Error **errp, ...) G_GNUC_NULL_TERMINATED;
 
 /**
  * object_set_propv:
@@ -727,7 +727,7 @@ void object_initialize(void *obj, size_t size, const char *typename);
 bool object_initialize_child_with_props(Object *parentobj,
                              const char *propname,
                              void *childobj, size_t size, const char *type,
-                             Error **errp, ...) QEMU_SENTINEL;
+                             Error **errp, ...) G_GNUC_NULL_TERMINATED;
 
 /**
  * object_initialize_child_with_propsv:
@@ -860,6 +860,29 @@ static void do_qemu_init_ ## type_array(void)                               \
 }                                                                           \
 type_init(do_qemu_init_ ## type_array)
 
+/**
+ * type_print_class_properties:
+ * @type: a QOM class name
+ *
+ * Print the object's class properties to stdout or the monitor.
+ * Return whether an object was found.
+ */
+bool type_print_class_properties(const char *type);
+
+/**
+ * object_set_properties_from_keyval:
+ * @obj: a QOM object
+ * @qdict: a dictionary with the properties to be set
+ * @from_json: true if leaf values of @qdict are typed, false if they
+ * are strings
+ * @errp: pointer to error object
+ *
+ * For each key in the dictionary, parse the value string if needed,
+ * then set the corresponding property in @obj.
+ */
+void object_set_properties_from_keyval(Object *obj, const QDict *qdict,
+                                       bool from_json, Error **errp);
+
 /**
  * object_class_dynamic_cast_assert:
  * @klass: The #ObjectClass to attempt to cast.
@@ -1070,6 +1093,14 @@ void object_property_set_default_bool(ObjectProperty *prop, bool value);
  */
 void object_property_set_default_str(ObjectProperty *prop, const char *value);
 
+/**
+ * object_property_set_default_list:
+ * @prop: the property to set
+ *
+ * Set the property default value to be an empty list.
+ */
+void object_property_set_default_list(ObjectProperty *prop);
+
 /**
  * object_property_set_default_int:
  * @prop: the property to set
@@ -1519,6 +1550,18 @@ Object *object_resolve_path(const char *path, bool *ambiguous);
 Object *object_resolve_path_type(const char *path, const char *typename,
                                  bool *ambiguous);
 
+/**
+ * object_resolve_path_at:
+ * @parent: the object in which to resolve the path
+ * @path: the path to resolve
+ *
+ * This is like object_resolve_path(), except paths not starting with
+ * a slash are relative to @parent.
+ *
+ * Returns: The resolved object or NULL on path lookup failure.
+ */
+Object *object_resolve_path_at(Object *parent, const char *path);
+
 /**
  * object_resolve_path_component:
  * @parent: the object in which to resolve the path
index 07d5cc883299e1afa4058949958868af5c12f366..02b11a7ef071d8e0f4d7bf22f215ec51b70f7765 100644 (file)
@@ -2,6 +2,7 @@
 #define OBJECT_INTERFACES_H
 
 #include "qom/object.h"
+#include "qapi/qapi-types-qom.h"
 #include "qapi/visitor.h"
 
 #define TYPE_USER_CREATABLE "user-creatable"
@@ -87,67 +88,60 @@ Object *user_creatable_add_type(const char *type, const char *id,
                                 Visitor *v, Error **errp);
 
 /**
- * user_creatable_add_dict:
- * @qdict: the object definition
- * @keyval: if true, use a keyval visitor for processing @qdict (i.e.
- *          assume that all @qdict values are strings); otherwise, use
- *          the normal QObject visitor (i.e. assume all @qdict values
- *          have the QType expected by the QOM object type)
+ * user_creatable_add_qapi:
+ * @options: the object definition
  * @errp: if an error occurs, a pointer to an area to store the error
  *
- * Create an instance of the user creatable object that is defined by
- * @qdict.  The object type is taken from the QDict key 'qom-type', its
- * ID from the key 'id'. The remaining entries in @qdict are used to
- * initialize the object properties.
- *
- * Returns: %true on success, %false on failure.
+ * Create an instance of the user creatable object according to the
+ * options passed in @opts as described in the QAPI schema documentation.
  */
-bool user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp);
+void user_creatable_add_qapi(ObjectOptions *options, Error **errp);
 
 /**
- * user_creatable_add_opts:
- * @opts: the object definition
+ * user_creatable_parse_str:
+ * @str: the object definition string as passed on the command line
  * @errp: if an error occurs, a pointer to an area to store the error
  *
- * Create an instance of the user creatable object whose type
- * is defined in @opts by the 'qom-type' option, placing it
- * in the object composition tree with name provided by the
- * 'id' field. The remaining options in @opts are used to
- * initialize the object properties.
+ * Parses the option for the user creatable object with a keyval parser and
+ * implicit key 'qom-type', converting the result to ObjectOptions.
  *
- * Returns: the newly created object or NULL on error
+ * If a help option is given, print help instead.
+ *
+ * Returns: ObjectOptions on success, NULL when an error occurred (*errp is set
+ * then) or help was printed (*errp is not set).
  */
-Object *user_creatable_add_opts(QemuOpts *opts, Error **errp);
-
+ObjectOptions *user_creatable_parse_str(const char *str, Error **errp);
 
 /**
- * user_creatable_add_opts_predicate:
- * @type: the QOM type to be added
+ * user_creatable_add_from_str:
+ * @str: the object definition string as passed on the command line
+ * @errp: if an error occurs, a pointer to an area to store the error
+ *
+ * Create an instance of the user creatable object by parsing @str
+ * with a keyval parser and implicit key 'qom-type', converting the
+ * result to ObjectOptions and calling into qmp_object_add().
  *
- * A callback function to determine whether an object
- * of type @type should be created. Instances of this
- * callback should be passed to user_creatable_add_opts_foreach
+ * If a help option is given, print help instead.
+ *
+ * Returns: true when an object was successfully created, false when an error
+ * occurred (*errp is set then) or help was printed (*errp is not set).
  */
-typedef bool (*user_creatable_add_opts_predicate)(const char *type);
+bool user_creatable_add_from_str(const char *str, Error **errp);
 
 /**
- * user_creatable_add_opts_foreach:
- * @opaque: a user_creatable_add_opts_predicate callback or NULL
- * @opts: options to create
- * @errp: unused
+ * user_creatable_process_cmdline:
+ * @cmdline: the object definition string as passed on the command line
  *
- * An iterator callback to be used in conjunction with
- * the qemu_opts_foreach() method for creating a list of
- * objects from a set of QemuOpts
+ * Create an instance of the user creatable object by parsing @cmdline
+ * with a keyval parser and implicit key 'qom-type', converting the
+ * result to ObjectOptions and calling into qmp_object_add().
  *
- * The @opaque parameter can be passed a user_creatable_add_opts_predicate
- * callback to filter which types of object are created during iteration.
- * When it fails, report the error.
+ * If a help option is given, print help instead and exit.
  *
- * Returns: 0 on success, -1 when an error was reported.
+ * This function is only meant to be called during command line parsing.
+ * It exits the process on failure or after printing help.
  */
-int user_creatable_add_opts_foreach(void *opaque,
-                                    QemuOpts *opts, Error **errp);
+void user_creatable_process_cmdline(const char *cmdline);
 
 /**
  * user_creatable_print_help:
@@ -163,19 +157,6 @@ int user_creatable_add_opts_foreach(void *opaque,
  */
 bool user_creatable_print_help(const char *type, QemuOpts *opts);
 
-/**
- * user_creatable_print_help_from_qdict:
- * @args: options to create
- *
- * Prints help considering the other options given in @args (if "qom-type" is
- * given and valid, print properties for the type, otherwise print valid types)
- *
- * In contrast to user_creatable_print_help(), this function can't return that
- * no help was requested. It should only be called if we know that help is
- * requested and it will always print some help.
- */
-void user_creatable_print_help_from_qdict(QDict *args);
-
 /**
  * user_creatable_del:
  * @id: the unique ID for the object
@@ -196,11 +177,4 @@ bool user_creatable_del(const char *id, Error **errp);
  */
 void user_creatable_cleanup(void);
 
-/**
- * qmp_object_add:
- *
- * QMP command handler for object-add. See the QAPI schema for documentation.
- */
-void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp);
-
 #endif
index 2a32c08b5e9c9f1006f525336d468523a2e2949e..9b984519121f7a4a3ed19f1962d3ba2d2c58161e 100644 (file)
 #define TYPE_NO_LUN         0x7f
 
 /* Mode page codes for mode sense/set */
+#define MODE_PAGE_VENDOR_SPECIFIC             0x00
 #define MODE_PAGE_R_W_ERROR                   0x01
 #define MODE_PAGE_HD_GEOMETRY                 0x04
 #define MODE_PAGE_FLEXIBLE_DISK_GEOMETRY      0x05
 #define MODE_PAGE_CACHING                     0x08
 #define MODE_PAGE_AUDIO_CTL                   0x0e
+#define MODE_PAGE_CONTROL                     0x0a
 #define MODE_PAGE_POWER                       0x1a
 #define MODE_PAGE_FAULT_FAIL                  0x1c
 #define MODE_PAGE_TO_PROTECT                  0x1d
 #define MODE_PAGE_CAPABILITIES                0x2a
+#define MODE_PAGE_APPLE_VENDOR                0x30
 #define MODE_PAGE_ALLS                        0x3f
 /* Not in Mt. Fuji, but in ATAPI 2.6 -- deprecated now in favor
  * of MODE_PAGE_SENSE_POWER */
index e4ecbe00f6f6276a8602cd32849e91dc94755533..45de28d3542c713e23fa34bb627c87e2e54d3195 100644 (file)
@@ -5,7 +5,6 @@
 #include "qapi/visitor.h"
 #include "qom/object_interfaces.h"
 #include "block/aio.h"
-#include "qemu/coroutine.h"
 
 #define TYPE_PR_MANAGER "pr-manager"
 
index fbc5588279939d70a5e31627bd2a942798a13b39..d5c8efa16ef2fb2409f7ec0140eca2362a2f33d2 100644 (file)
@@ -16,6 +16,22 @@ enum SCSIXferMode {
     SCSI_XFER_TO_DEV,    /*  WRITE, MODE_SELECT, ...         */
 };
 
+enum SCSIHostStatus {
+    SCSI_HOST_OK,
+    SCSI_HOST_NO_LUN,
+    SCSI_HOST_BUSY,
+    SCSI_HOST_TIME_OUT,
+    SCSI_HOST_BAD_RESPONSE,
+    SCSI_HOST_ABORTED,
+    SCSI_HOST_ERROR = 0x07,
+    SCSI_HOST_RESET = 0x08,
+    SCSI_HOST_TRANSPORT_DISRUPTED = 0xe,
+    SCSI_HOST_TARGET_FAILURE = 0x10,
+    SCSI_HOST_RESERVATION_ERROR = 0x11,
+    SCSI_HOST_ALLOCATION_FAILURE = 0x12,
+    SCSI_HOST_MEDIUM_ERROR = 0x13,
+};
+
 typedef struct SCSICommand {
     uint8_t buf[SCSI_CMD_BUF_SIZE];
     int len;
@@ -57,6 +73,8 @@ extern const struct SCSISense sense_code_LBA_OUT_OF_RANGE;
 extern const struct SCSISense sense_code_INVALID_FIELD;
 /* Illegal request, Invalid field in parameter list */
 extern const struct SCSISense sense_code_INVALID_PARAM;
+/* Illegal request, Invalid value in parameter list */
+extern const struct SCSISense sense_code_INVALID_PARAM_VALUE;
 /* Illegal request, Parameter list length error */
 extern const struct SCSISense sense_code_INVALID_PARAM_LEN;
 /* Illegal request, LUN not supported */
@@ -121,16 +139,9 @@ int scsi_cdb_length(uint8_t *buf);
 #ifdef CONFIG_LINUX
 #define SG_ERR_DRIVER_TIMEOUT  0x06
 #define SG_ERR_DRIVER_SENSE    0x08
-
-#define SG_ERR_DID_OK          0x00
-#define SG_ERR_DID_NO_CONNECT  0x01
-#define SG_ERR_DID_BUS_BUSY    0x02
-#define SG_ERR_DID_TIME_OUT    0x03
-
-#define SG_ERR_DRIVER_SENSE    0x08
-
-int sg_io_sense_from_errno(int errno_value, struct sg_io_hdr *io_hdr,
-                           SCSISense *sense);
 #endif
 
+int scsi_sense_from_errno(int errno_value, SCSISense *sense);
+int scsi_sense_from_host_status(uint8_t host_status, SCSISense *sense);
+
 #endif
diff --git a/include/semihosting/common-semi.h b/include/semihosting/common-semi.h
new file mode 100644 (file)
index 0000000..0a91db7
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ *  Semihosting support for systems modeled on the Arm "Angel"
+ *  semihosting syscalls design. This includes Arm and RISC-V processors
+ *
+ *  Copyright (c) 2005, 2007 CodeSourcery.
+ *  Copyright (c) 2019 Linaro
+ *  Written by Paul Brook.
+ *
+ *  Copyright © 2020 by Keith Packard <keithp@keithp.com>
+ *  Adapted for systems other than ARM, including RISC-V, by Keith Packard
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *  ARM Semihosting is documented in:
+ *     Semihosting for AArch32 and AArch64 Release 2.0
+ *     https://static.docs.arm.com/100863/0200/semihosting.pdf
+ *
+ *  RISC-V Semihosting is documented in:
+ *     RISC-V Semihosting
+ *     https://github.com/riscv/riscv-semihosting-spec/blob/main/riscv-semihosting-spec.adoc
+ */
+
+#ifndef COMMON_SEMI_H
+#define COMMON_SEMI_H
+
+void do_common_semihosting(CPUState *cs);
+
+#endif /* COMMON_SEMI_H */
diff --git a/include/semihosting/console.h b/include/semihosting/console.h
new file mode 100644 (file)
index 0000000..bd78e5f
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Semihosting Console
+ *
+ * Copyright (c) 2019 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef SEMIHOST_CONSOLE_H
+#define SEMIHOST_CONSOLE_H
+
+#include "cpu.h"
+
+/**
+ * qemu_semihosting_console_read:
+ * @cs: CPUState
+ * @buf: host buffer
+ * @len: buffer size
+ *
+ * Receive at least one character from debug console.  As this call may
+ * block if no data is available we suspend the CPU and will re-execute the
+ * instruction when data is there. Therefore two conditions must be met:
+ *
+ *   - CPUState is synchronized before calling this function
+ *   - pc is only updated once the character is successfully returned
+ *
+ * Returns: number of characters read, OR cpu_loop_exit!
+ */
+int qemu_semihosting_console_read(CPUState *cs, void *buf, int len);
+
+/**
+ * qemu_semihosting_console_write:
+ * @buf: host buffer
+ * @len: buffer size
+ *
+ * Write len bytes from buf to the debug console.
+ *
+ * Returns: number of bytes written -- this should only ever be short
+ * on some sort of i/o error.
+ */
+int qemu_semihosting_console_write(void *buf, int len);
+
+/*
+ * qemu_semihosting_console_block_until_ready:
+ * @cs: CPUState
+ *
+ * If no data is available we suspend the CPU and will re-execute the
+ * instruction when data is available.
+ */
+void qemu_semihosting_console_block_until_ready(CPUState *cs);
+
+/**
+ * qemu_semihosting_console_ready:
+ *
+ * Return true if characters are available for read; does not block.
+ */
+bool qemu_semihosting_console_ready(void);
+
+#endif /* SEMIHOST_CONSOLE_H */
diff --git a/include/semihosting/guestfd.h b/include/semihosting/guestfd.h
new file mode 100644 (file)
index 0000000..3d426fe
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Hosted file support for semihosting syscalls.
+ *
+ * Copyright (c) 2005, 2007 CodeSourcery.
+ * Copyright (c) 2019 Linaro
+ * Copyright © 2020 by Keith Packard <keithp@keithp.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef SEMIHOSTING_GUESTFD_H
+#define SEMIHOSTING_GUESTFD_H
+
+typedef enum GuestFDType {
+    GuestFDUnused = 0,
+    GuestFDHost,
+    GuestFDGDB,
+    GuestFDStatic,
+    GuestFDConsole,
+} GuestFDType;
+
+/*
+ * Guest file descriptors are integer indexes into an array of
+ * these structures (we will dynamically resize as necessary).
+ */
+typedef struct GuestFD {
+    GuestFDType type;
+    union {
+        int hostfd;
+        struct {
+            const uint8_t *data;
+            size_t len;
+            size_t off;
+        } staticfile;
+    };
+} GuestFD;
+
+/*
+ * For ARM semihosting, we have a separate structure for routing
+ * data for the console which is outside the guest fd address space.
+ */
+extern GuestFD console_in_gf;
+extern GuestFD console_out_gf;
+
+/**
+ * alloc_guestfd:
+ *
+ * Allocate an unused GuestFD index.  The associated guestfd index
+ * will still be GuestFDUnused until it is initialized.
+ */
+int alloc_guestfd(void);
+
+/**
+ * dealloc_guestfd:
+ * @guestfd: GuestFD index
+ *
+ * Deallocate a GuestFD index.  The associated GuestFD structure
+ * will be recycled for a subsequent allocation.
+ */
+void dealloc_guestfd(int guestfd);
+
+/**
+ * get_guestfd:
+ * @guestfd: GuestFD index
+ *
+ * Return the GuestFD structure associated with an initialized @guestfd,
+ * or NULL if it has not been allocated, or hasn't been initialized.
+ */
+GuestFD *get_guestfd(int guestfd);
+
+/**
+ * associate_guestfd:
+ * @guestfd: GuestFD index
+ * @hostfd: host file descriptor
+ *
+ * Initialize the GuestFD for @guestfd to GuestFDHost using @hostfd.
+ */
+void associate_guestfd(int guestfd, int hostfd);
+
+/**
+ * staticfile_guestfd:
+ * @guestfd: GuestFD index
+ * @data: data to be read
+ * @len: length of @data
+ *
+ * Initialize the GuestFD for @guestfd to GuestFDStatic.
+ * The @len bytes at @data will be returned to the guest on reads.
+ */
+void staticfile_guestfd(int guestfd, const uint8_t *data, size_t len);
+
+#endif /* SEMIHOSTING_GUESTFD_H */
diff --git a/include/semihosting/semihost.h b/include/semihosting/semihost.h
new file mode 100644 (file)
index 0000000..97d2a2b
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Semihosting support
+ *
+ * Copyright (c) 2015 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef SEMIHOST_H
+#define SEMIHOST_H
+
+typedef enum SemihostingTarget {
+    SEMIHOSTING_TARGET_AUTO = 0,
+    SEMIHOSTING_TARGET_NATIVE,
+    SEMIHOSTING_TARGET_GDB
+} SemihostingTarget;
+
+#ifdef CONFIG_USER_ONLY
+static inline bool semihosting_enabled(bool is_user)
+{
+    return true;
+}
+
+static inline SemihostingTarget semihosting_get_target(void)
+{
+    return SEMIHOSTING_TARGET_AUTO;
+}
+
+static inline const char *semihosting_get_arg(int i)
+{
+    return NULL;
+}
+
+static inline int semihosting_get_argc(void)
+{
+    return 0;
+}
+
+static inline const char *semihosting_get_cmdline(void)
+{
+    return NULL;
+}
+#else /* !CONFIG_USER_ONLY */
+/**
+ * semihosting_enabled:
+ * @is_user: true if guest code is in usermode (i.e. not privileged)
+ *
+ * Return true if guest code is allowed to make semihosting calls.
+ */
+bool semihosting_enabled(bool is_user);
+SemihostingTarget semihosting_get_target(void);
+const char *semihosting_get_arg(int i);
+int semihosting_get_argc(void);
+const char *semihosting_get_cmdline(void);
+void semihosting_arg_fallback(const char *file, const char *cmd);
+/* for vl.c hooks */
+void qemu_semihosting_enable(void);
+int qemu_semihosting_config_options(const char *optstr);
+void qemu_semihosting_chardev_init(void);
+void qemu_semihosting_console_init(Chardev *);
+#endif /* CONFIG_USER_ONLY */
+void qemu_semihosting_guestfd_init(void);
+
+#endif /* SEMIHOST_H */
diff --git a/include/semihosting/syscalls.h b/include/semihosting/syscalls.h
new file mode 100644 (file)
index 0000000..3a5ec22
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Syscall implementations for semihosting.
+ *
+ * Copyright (c) 2022 Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef SEMIHOSTING_SYSCALLS_H
+#define SEMIHOSTING_SYSCALLS_H
+
+/*
+ * Argument loading from the guest is performed by the caller;
+ * results are returned via the 'complete' callback.
+ *
+ * String operands are in address/len pairs.  The len argument may be 0
+ * (when the semihosting abi does not already provide the length),
+ * or non-zero (where it should include the terminating zero).
+ */
+
+typedef struct GuestFD GuestFD;
+
+void semihost_sys_open(CPUState *cs, gdb_syscall_complete_cb complete,
+                       target_ulong fname, target_ulong fname_len,
+                       int gdb_flags, int mode);
+
+void semihost_sys_close(CPUState *cs, gdb_syscall_complete_cb complete,
+                        int fd);
+
+void semihost_sys_read(CPUState *cs, gdb_syscall_complete_cb complete,
+                       int fd, target_ulong buf, target_ulong len);
+
+void semihost_sys_read_gf(CPUState *cs, gdb_syscall_complete_cb complete,
+                          GuestFD *gf, target_ulong buf, target_ulong len);
+
+void semihost_sys_write(CPUState *cs, gdb_syscall_complete_cb complete,
+                        int fd, target_ulong buf, target_ulong len);
+
+void semihost_sys_write_gf(CPUState *cs, gdb_syscall_complete_cb complete,
+                           GuestFD *gf, target_ulong buf, target_ulong len);
+
+void semihost_sys_lseek(CPUState *cs, gdb_syscall_complete_cb complete,
+                        int fd, int64_t off, int gdb_whence);
+
+void semihost_sys_isatty(CPUState *cs, gdb_syscall_complete_cb complete,
+                         int fd);
+
+void semihost_sys_flen(CPUState *cs, gdb_syscall_complete_cb fstat_cb,
+                       gdb_syscall_complete_cb flen_cb,
+                       int fd, target_ulong fstat_addr);
+
+void semihost_sys_fstat(CPUState *cs, gdb_syscall_complete_cb complete,
+                        int fd, target_ulong addr);
+
+void semihost_sys_stat(CPUState *cs, gdb_syscall_complete_cb complete,
+                       target_ulong fname, target_ulong fname_len,
+                       target_ulong addr);
+
+void semihost_sys_remove(CPUState *cs, gdb_syscall_complete_cb complete,
+                         target_ulong fname, target_ulong fname_len);
+
+void semihost_sys_rename(CPUState *cs, gdb_syscall_complete_cb complete,
+                         target_ulong oname, target_ulong oname_len,
+                         target_ulong nname, target_ulong nname_len);
+
+void semihost_sys_system(CPUState *cs, gdb_syscall_complete_cb complete,
+                         target_ulong cmd, target_ulong cmd_len);
+
+void semihost_sys_gettimeofday(CPUState *cs, gdb_syscall_complete_cb complete,
+                               target_ulong tv_addr, target_ulong tz_addr);
+
+void semihost_sys_poll_one(CPUState *cs, gdb_syscall_complete_cb complete,
+                           int fd, GIOCondition cond, int timeout);
+
+#endif /* SEMIHOSTING_SYSCALLS_H */
diff --git a/include/semihosting/uaccess.h b/include/semihosting/uaccess.h
new file mode 100644 (file)
index 0000000..3963eaf
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Helper routines to provide target memory access for semihosting
+ * syscalls in system emulation mode.
+ *
+ * Copyright (c) 2007 CodeSourcery.
+ *
+ * This code is licensed under the GPL
+ */
+
+#ifndef SEMIHOSTING_UACCESS_H
+#define SEMIHOSTING_UACCESS_H
+
+#ifdef CONFIG_USER_ONLY
+#error Cannot include semihosting/uaccess.h from user emulation
+#endif
+
+#include "cpu.h"
+
+#define get_user_u64(val, addr)                                         \
+    ({ uint64_t val_ = 0;                                               \
+       int ret_ = cpu_memory_rw_debug(env_cpu(env), (addr),             \
+                                      &val_, sizeof(val_), 0);          \
+       (val) = tswap64(val_); ret_; })
+
+#define get_user_u32(val, addr)                                         \
+    ({ uint32_t val_ = 0;                                               \
+       int ret_ = cpu_memory_rw_debug(env_cpu(env), (addr),             \
+                                      &val_, sizeof(val_), 0);          \
+       (val) = tswap32(val_); ret_; })
+
+#define get_user_u8(val, addr)                                          \
+    ({ uint8_t val_ = 0;                                                \
+       int ret_ = cpu_memory_rw_debug(env_cpu(env), (addr),             \
+                                      &val_, sizeof(val_), 0);          \
+       (val) = val_; ret_; })
+
+#define get_user_ual(arg, p) get_user_u32(arg, p)
+
+#define put_user_u64(val, addr)                                         \
+    ({ uint64_t val_ = tswap64(val);                                    \
+       cpu_memory_rw_debug(env_cpu(env), (addr), &val_, sizeof(val_), 1); })
+
+#define put_user_u32(val, addr)                                         \
+    ({ uint32_t val_ = tswap32(val);                                    \
+       cpu_memory_rw_debug(env_cpu(env), (addr), &val_, sizeof(val_), 1); })
+
+#define put_user_ual(arg, p) put_user_u32(arg, p)
+
+void *uaccess_lock_user(CPUArchState *env, target_ulong addr,
+                        target_ulong len, bool copy);
+#define lock_user(type, p, len, copy) uaccess_lock_user(env, p, len, copy)
+
+char *uaccess_lock_user_string(CPUArchState *env, target_ulong addr);
+#define lock_user_string(p) uaccess_lock_user_string(env, p)
+
+void uaccess_unlock_user(CPUArchState *env, void *p,
+                         target_ulong addr, target_ulong len);
+#define unlock_user(s, args, len) uaccess_unlock_user(env, s, args, len)
+
+ssize_t uaccess_strlen_user(CPUArchState *env, target_ulong addr);
+#define target_strlen(p) uaccess_strlen_user(env, p)
+
+#endif /* SEMIHOSTING_SOFTMMU_UACCESS_H */
diff --git a/include/standard-headers/asm-m68k/bootinfo-mac.h b/include/standard-headers/asm-m68k/bootinfo-mac.h
new file mode 100644 (file)
index 0000000..449928c
--- /dev/null
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+** asm/bootinfo-mac.h -- Macintosh-specific boot information definitions
+*/
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_MAC_H
+#define _UAPI_ASM_M68K_BOOTINFO_MAC_H
+
+
+    /*
+     *  Macintosh-specific tags (all __be32)
+     */
+
+#define BI_MAC_MODEL           0x8000  /* Mac Gestalt ID (model type) */
+#define BI_MAC_VADDR           0x8001  /* Mac video base address */
+#define BI_MAC_VDEPTH          0x8002  /* Mac video depth */
+#define BI_MAC_VROW            0x8003  /* Mac video rowbytes */
+#define BI_MAC_VDIM            0x8004  /* Mac video dimensions */
+#define BI_MAC_VLOGICAL                0x8005  /* Mac video logical base */
+#define BI_MAC_SCCBASE         0x8006  /* Mac SCC base address */
+#define BI_MAC_BTIME           0x8007  /* Mac boot time */
+#define BI_MAC_GMTBIAS         0x8008  /* Mac GMT timezone offset */
+#define BI_MAC_MEMSIZE         0x8009  /* Mac RAM size (sanity check) */
+#define BI_MAC_CPUID           0x800a  /* Mac CPU type (sanity check) */
+#define BI_MAC_ROMBASE         0x800b  /* Mac system ROM base address */
+
+
+    /*
+     *  Macintosh hardware profile data - unused, see macintosh.h for
+     *  reasonable type values
+     */
+
+#define BI_MAC_VIA1BASE                0x8010  /* Mac VIA1 base address (always present) */
+#define BI_MAC_VIA2BASE                0x8011  /* Mac VIA2 base address (type varies) */
+#define BI_MAC_VIA2TYPE                0x8012  /* Mac VIA2 type (VIA, RBV, OSS) */
+#define BI_MAC_ADBTYPE         0x8013  /* Mac ADB interface type */
+#define BI_MAC_ASCBASE         0x8014  /* Mac Apple Sound Chip base address */
+#define BI_MAC_SCSI5380                0x8015  /* Mac NCR 5380 SCSI (base address, multi) */
+#define BI_MAC_SCSIDMA         0x8016  /* Mac SCSI DMA (base address) */
+#define BI_MAC_SCSI5396                0x8017  /* Mac NCR 53C96 SCSI (base address, multi) */
+#define BI_MAC_IDETYPE         0x8018  /* Mac IDE interface type */
+#define BI_MAC_IDEBASE         0x8019  /* Mac IDE interface base address */
+#define BI_MAC_NUBUS           0x801a  /* Mac Nubus type (none, regular, pseudo) */
+#define BI_MAC_SLOTMASK                0x801b  /* Mac Nubus slots present */
+#define BI_MAC_SCCTYPE         0x801c  /* Mac SCC serial type (normal, IOP) */
+#define BI_MAC_ETHTYPE         0x801d  /* Mac builtin ethernet type (Sonic, MACE */
+#define BI_MAC_ETHBASE         0x801e  /* Mac builtin ethernet base address */
+#define BI_MAC_PMU             0x801f  /* Mac power management / poweroff hardware */
+#define BI_MAC_IOP_SWIM                0x8020  /* Mac SWIM floppy IOP */
+#define BI_MAC_IOP_ADB         0x8021  /* Mac ADB IOP */
+
+
+    /*
+     * Macintosh Gestalt numbers (BI_MAC_MODEL)
+     */
+
+#define MAC_MODEL_II           6
+#define MAC_MODEL_IIX          7
+#define MAC_MODEL_IICX         8
+#define MAC_MODEL_SE30         9
+#define MAC_MODEL_IICI         11
+#define MAC_MODEL_IIFX         13      /* And well numbered it is too */
+#define MAC_MODEL_IISI         18
+#define MAC_MODEL_LC           19
+#define MAC_MODEL_Q900         20
+#define MAC_MODEL_PB170                21
+#define MAC_MODEL_Q700         22
+#define MAC_MODEL_CLII         23      /* aka: P200 */
+#define MAC_MODEL_PB140                25
+#define MAC_MODEL_Q950         26      /* aka: WGS95 */
+#define MAC_MODEL_LCIII                27      /* aka: P450 */
+#define MAC_MODEL_PB210                29
+#define MAC_MODEL_C650         30
+#define MAC_MODEL_PB230                32
+#define MAC_MODEL_PB180                33
+#define MAC_MODEL_PB160                34
+#define MAC_MODEL_Q800         35      /* aka: WGS80 */
+#define MAC_MODEL_Q650         36
+#define MAC_MODEL_LCII         37      /* aka: P400/405/410/430 */
+#define MAC_MODEL_PB250                38
+#define MAC_MODEL_IIVI         44
+#define MAC_MODEL_P600         45      /* aka: P600CD */
+#define MAC_MODEL_IIVX         48
+#define MAC_MODEL_CCL          49      /* aka: P250 */
+#define MAC_MODEL_PB165C       50
+#define MAC_MODEL_C610         52      /* aka: WGS60 */
+#define MAC_MODEL_Q610         53
+#define MAC_MODEL_PB145                54      /* aka: PB145B */
+#define MAC_MODEL_P520         56      /* aka: LC520 */
+#define MAC_MODEL_C660         60
+#define MAC_MODEL_P460         62      /* aka: LCIII+, P466/P467 */
+#define MAC_MODEL_PB180C       71
+#define MAC_MODEL_PB520                72      /* aka: PB520C, PB540, PB540C, PB550C */
+#define MAC_MODEL_PB270C       77
+#define MAC_MODEL_Q840         78
+#define MAC_MODEL_P550         80      /* aka: LC550, P560 */
+#define MAC_MODEL_CCLII                83      /* aka: P275 */
+#define MAC_MODEL_PB165                84
+#define MAC_MODEL_PB190                85      /* aka: PB190CS */
+#define MAC_MODEL_TV           88
+#define MAC_MODEL_P475         89      /* aka: LC475, P476 */
+#define MAC_MODEL_P475F                90      /* aka: P475 w/ FPU (no LC040) */
+#define MAC_MODEL_P575         92      /* aka: LC575, P577/P578 */
+#define MAC_MODEL_Q605         94
+#define MAC_MODEL_Q605_ACC     95      /* Q605 accelerated to 33 MHz */
+#define MAC_MODEL_Q630         98      /* aka: LC630, P630/631/635/636/637/638/640 */
+#define MAC_MODEL_P588         99      /* aka: LC580, P580 */
+#define MAC_MODEL_PB280                102
+#define MAC_MODEL_PB280C       103
+#define MAC_MODEL_PB150                115
+
+
+    /*
+     *  Latest Macintosh bootinfo version
+     */
+
+#define MAC_BOOTI_VERSION      MK_BI_VERSION(2, 0)
+
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_MAC_H */
diff --git a/include/standard-headers/asm-m68k/bootinfo-virt.h b/include/standard-headers/asm-m68k/bootinfo-virt.h
new file mode 100644 (file)
index 0000000..75ac6bb
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+** asm/bootinfo-virt.h -- Virtual-m68k-specific boot information definitions
+*/
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_VIRT_H
+#define _UAPI_ASM_M68K_BOOTINFO_VIRT_H
+
+#define BI_VIRT_QEMU_VERSION   0x8000
+#define BI_VIRT_GF_PIC_BASE    0x8001
+#define BI_VIRT_GF_RTC_BASE    0x8002
+#define BI_VIRT_GF_TTY_BASE    0x8003
+#define BI_VIRT_VIRTIO_BASE    0x8004
+#define BI_VIRT_CTRL_BASE      0x8005
+
+/* No longer used -- replaced with BI_RNG_SEED -- but don't reuse this index:
+ * #define BI_VIRT_RNG_SEED    0x8006 */
+
+#define VIRT_BOOTI_VERSION     MK_BI_VERSION(2, 0)
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_MAC_H */
diff --git a/include/standard-headers/asm-m68k/bootinfo.h b/include/standard-headers/asm-m68k/bootinfo.h
new file mode 100644 (file)
index 0000000..b7a8dd2
--- /dev/null
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * asm/bootinfo.h -- Definition of the Linux/m68k boot information structure
+ *
+ * Copyright 1992 by Greg Harp
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _UAPI_ASM_M68K_BOOTINFO_H
+#define _UAPI_ASM_M68K_BOOTINFO_H
+
+
+    /*
+     *  Bootinfo definitions
+     *
+     *  This is an easily parsable and extendable structure containing all
+     *  information to be passed from the bootstrap to the kernel.
+     *
+     *  This way I hope to keep all future changes back/forewards compatible.
+     *  Thus, keep your fingers crossed...
+     *
+     *  This structure is copied right after the kernel by the bootstrap
+     *  routine.
+     */
+
+struct bi_record {
+       uint16_t tag;                   /* tag ID */
+       uint16_t size;                  /* size of record (in bytes) */
+       uint32_t data[0];                       /* data */
+};
+
+
+struct mem_info {
+       uint32_t addr;                  /* physical address of memory chunk */
+       uint32_t size;                  /* length of memory chunk (in bytes) */
+};
+
+
+    /*
+     *  Tag Definitions
+     *
+     *  Machine independent tags start counting from 0x0000
+     *  Machine dependent tags start counting from 0x8000
+     */
+
+#define BI_LAST                        0x0000  /* last record (sentinel) */
+#define BI_MACHTYPE            0x0001  /* machine type (uint32_t) */
+#define BI_CPUTYPE             0x0002  /* cpu type (uint32_t) */
+#define BI_FPUTYPE             0x0003  /* fpu type (uint32_t) */
+#define BI_MMUTYPE             0x0004  /* mmu type (uint32_t) */
+#define BI_MEMCHUNK            0x0005  /* memory chunk address and size */
+                                       /* (struct mem_info) */
+#define BI_RAMDISK             0x0006  /* ramdisk address and size */
+                                       /* (struct mem_info) */
+#define BI_COMMAND_LINE                0x0007  /* kernel command line parameters */
+                                       /* (string) */
+/*
+ * A random seed used to initialize the RNG. Record format:
+ *
+ *   - length       [ 2 bytes, 16-bit big endian ]
+ *   - seed data    [ `length` bytes, padded to preserve 4-byte struct alignment ]
+ */
+#define BI_RNG_SEED            0x0008
+
+    /*
+     *  Linux/m68k Architectures (BI_MACHTYPE)
+     */
+
+#define MACH_AMIGA             1
+#define MACH_ATARI             2
+#define MACH_MAC               3
+#define MACH_APOLLO            4
+#define MACH_SUN3              5
+#define MACH_MVME147           6
+#define MACH_MVME16x           7
+#define MACH_BVME6000          8
+#define MACH_HP300             9
+#define MACH_Q40               10
+#define MACH_SUN3X             11
+#define MACH_M54XX             12
+#define MACH_M5441X            13
+#define MACH_VIRT              14
+
+
+    /*
+     *  CPU, FPU and MMU types (BI_CPUTYPE, BI_FPUTYPE, BI_MMUTYPE)
+     *
+     *  Note: we may rely on the following equalities:
+     *
+     *      CPU_68020 == MMU_68851
+     *      CPU_68030 == MMU_68030
+     *      CPU_68040 == FPU_68040 == MMU_68040
+     *      CPU_68060 == FPU_68060 == MMU_68060
+     */
+
+#define CPUB_68020             0
+#define CPUB_68030             1
+#define CPUB_68040             2
+#define CPUB_68060             3
+#define CPUB_COLDFIRE          4
+
+#define CPU_68020              (1 << CPUB_68020)
+#define CPU_68030              (1 << CPUB_68030)
+#define CPU_68040              (1 << CPUB_68040)
+#define CPU_68060              (1 << CPUB_68060)
+#define CPU_COLDFIRE           (1 << CPUB_COLDFIRE)
+
+#define FPUB_68881             0
+#define FPUB_68882             1
+#define FPUB_68040             2       /* Internal FPU */
+#define FPUB_68060             3       /* Internal FPU */
+#define FPUB_SUNFPA            4       /* Sun-3 FPA */
+#define FPUB_COLDFIRE          5       /* ColdFire FPU */
+
+#define FPU_68881              (1 << FPUB_68881)
+#define FPU_68882              (1 << FPUB_68882)
+#define FPU_68040              (1 << FPUB_68040)
+#define FPU_68060              (1 << FPUB_68060)
+#define FPU_SUNFPA             (1 << FPUB_SUNFPA)
+#define FPU_COLDFIRE           (1 << FPUB_COLDFIRE)
+
+#define MMUB_68851             0
+#define MMUB_68030             1       /* Internal MMU */
+#define MMUB_68040             2       /* Internal MMU */
+#define MMUB_68060             3       /* Internal MMU */
+#define MMUB_APOLLO            4       /* Custom Apollo */
+#define MMUB_SUN3              5       /* Custom Sun-3 */
+#define MMUB_COLDFIRE          6       /* Internal MMU */
+
+#define MMU_68851              (1 << MMUB_68851)
+#define MMU_68030              (1 << MMUB_68030)
+#define MMU_68040              (1 << MMUB_68040)
+#define MMU_68060              (1 << MMUB_68060)
+#define MMU_SUN3               (1 << MMUB_SUN3)
+#define MMU_APOLLO             (1 << MMUB_APOLLO)
+#define MMU_COLDFIRE           (1 << MMUB_COLDFIRE)
+
+
+    /*
+     * Stuff for bootinfo interface versioning
+     *
+     * At the start of kernel code, a 'struct bootversion' is located.
+     * bootstrap checks for a matching version of the interface before booting
+     * a kernel, to avoid user confusion if kernel and bootstrap don't work
+     * together :-)
+     *
+     * If incompatible changes are made to the bootinfo interface, the major
+     * number below should be stepped (and the minor reset to 0) for the
+     * appropriate machine. If a change is backward-compatible, the minor
+     * should be stepped. "Backwards-compatible" means that booting will work,
+     * but certain features may not.
+     */
+
+#define BOOTINFOV_MAGIC                        0x4249561A      /* 'BIV^Z' */
+#define MK_BI_VERSION(major, minor)    (((major) << 16) + (minor))
+#define BI_VERSION_MAJOR(v)            (((v) >> 16) & 0xffff)
+#define BI_VERSION_MINOR(v)            ((v) & 0xffff)
+
+struct bootversion {
+       uint16_t branch;
+       uint32_t magic;
+       struct {
+               uint32_t machtype;
+               uint32_t version;
+       } machversions[0];
+} QEMU_PACKED;
+
+
+#endif /* _UAPI_ASM_M68K_BOOTINFO_H */
diff --git a/include/standard-headers/asm-s390/virtio-ccw.h b/include/standard-headers/asm-s390/virtio-ccw.h
new file mode 100644 (file)
index 0000000..2b605f7
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/*
+ * Definitions for virtio-ccw devices.
+ *
+ * Copyright IBM Corp. 2013
+ *
+ *  Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_VIRTIO_CCW_H
+#define __KVM_VIRTIO_CCW_H
+
+/* Alignment of vring buffers. */
+#define KVM_VIRTIO_CCW_RING_ALIGN 4096
+
+/* Subcode for diagnose 500 (virtio hypercall). */
+#define KVM_S390_VIRTIO_CCW_NOTIFY 3
+
+#endif
diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h
new file mode 100644 (file)
index 0000000..0b06d2b
--- /dev/null
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_BOOTPARAM_H
+#define _ASM_X86_BOOTPARAM_H
+
+/* setup_data/setup_indirect types */
+#define SETUP_NONE                     0
+#define SETUP_E820_EXT                 1
+#define SETUP_DTB                      2
+#define SETUP_PCI                      3
+#define SETUP_EFI                      4
+#define SETUP_APPLE_PROPERTIES         5
+#define SETUP_JAILHOUSE                        6
+#define SETUP_CC_BLOB                  7
+#define SETUP_IMA                      8
+#define SETUP_RNG_SEED                 9
+#define SETUP_ENUM_MAX                 SETUP_RNG_SEED
+
+#define SETUP_INDIRECT                 (1<<31)
+#define SETUP_TYPE_MAX                 (SETUP_ENUM_MAX | SETUP_INDIRECT)
+
+/* ram_size flags */
+#define RAMDISK_IMAGE_START_MASK       0x07FF
+#define RAMDISK_PROMPT_FLAG            0x8000
+#define RAMDISK_LOAD_FLAG              0x4000
+
+/* loadflags */
+#define LOADED_HIGH    (1<<0)
+#define KASLR_FLAG     (1<<1)
+#define QUIET_FLAG     (1<<5)
+#define KEEP_SEGMENTS  (1<<6)
+#define CAN_USE_HEAP   (1<<7)
+
+/* xloadflags */
+#define XLF_KERNEL_64                  (1<<0)
+#define XLF_CAN_BE_LOADED_ABOVE_4G     (1<<1)
+#define XLF_EFI_HANDOVER_32            (1<<2)
+#define XLF_EFI_HANDOVER_64            (1<<3)
+#define XLF_EFI_KEXEC                  (1<<4)
+#define XLF_5LEVEL                     (1<<5)
+#define XLF_5LEVEL_ENABLED             (1<<6)
+
+
+#endif /* _ASM_X86_BOOTPARAM_H */
diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h
new file mode 100644 (file)
index 0000000..f0235e5
--- /dev/null
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_KVM_PARA_H
+#define _ASM_X86_KVM_PARA_H
+
+#include "standard-headers/linux/types.h"
+
+/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
+ * should be used to determine that a VM is running under KVM.
+ */
+#define KVM_CPUID_SIGNATURE    0x40000000
+#define KVM_SIGNATURE "KVMKVMKVM\0\0\0"
+
+/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
+ * a particular paravirtualization, the appropriate feature bit should
+ * be checked in eax. The performance hint feature bit should be checked
+ * in edx.
+ */
+#define KVM_CPUID_FEATURES     0x40000001
+#define KVM_FEATURE_CLOCKSOURCE                0
+#define KVM_FEATURE_NOP_IO_DELAY       1
+#define KVM_FEATURE_MMU_OP             2
+/* This indicates that the new set of kvmclock msrs
+ * are available. The use of 0x11 and 0x12 is deprecated
+ */
+#define KVM_FEATURE_CLOCKSOURCE2        3
+#define KVM_FEATURE_ASYNC_PF           4
+#define KVM_FEATURE_STEAL_TIME         5
+#define KVM_FEATURE_PV_EOI             6
+#define KVM_FEATURE_PV_UNHALT          7
+#define KVM_FEATURE_PV_TLB_FLUSH       9
+#define KVM_FEATURE_ASYNC_PF_VMEXIT    10
+#define KVM_FEATURE_PV_SEND_IPI        11
+#define KVM_FEATURE_POLL_CONTROL       12
+#define KVM_FEATURE_PV_SCHED_YIELD     13
+#define KVM_FEATURE_ASYNC_PF_INT       14
+#define KVM_FEATURE_MSI_EXT_DEST_ID    15
+#define KVM_FEATURE_HC_MAP_GPA_RANGE   16
+#define KVM_FEATURE_MIGRATION_CONTROL  17
+
+#define KVM_HINTS_REALTIME      0
+
+/* The last 8 bits are used to indicate how to interpret the flags field
+ * in pvclock structure. If no bits are set, all flags are ignored.
+ */
+#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT     24
+
+#define MSR_KVM_WALL_CLOCK  0x11
+#define MSR_KVM_SYSTEM_TIME 0x12
+
+#define KVM_MSR_ENABLED 1
+/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
+#define MSR_KVM_WALL_CLOCK_NEW  0x4b564d00
+#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
+#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
+#define MSR_KVM_STEAL_TIME  0x4b564d03
+#define MSR_KVM_PV_EOI_EN      0x4b564d04
+#define MSR_KVM_POLL_CONTROL   0x4b564d05
+#define MSR_KVM_ASYNC_PF_INT   0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK   0x4b564d07
+#define MSR_KVM_MIGRATION_CONTROL      0x4b564d08
+
+struct kvm_steal_time {
+       uint64_t steal;
+       uint32_t version;
+       uint32_t flags;
+       uint8_t  preempted;
+       uint8_t  uint8_t_pad[3];
+       uint32_t pad[11];
+};
+
+#define KVM_VCPU_PREEMPTED          (1 << 0)
+#define KVM_VCPU_FLUSH_TLB          (1 << 1)
+
+#define KVM_CLOCK_PAIRING_WALLCLOCK 0
+struct kvm_clock_pairing {
+       int64_t sec;
+       int64_t nsec;
+       uint64_t tsc;
+       uint32_t flags;
+       uint32_t pad[9];
+};
+
+#define KVM_STEAL_ALIGNMENT_BITS 5
+#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
+#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
+
+#define KVM_MAX_MMU_OP_BATCH           32
+
+#define KVM_ASYNC_PF_ENABLED                   (1 << 0)
+#define KVM_ASYNC_PF_SEND_ALWAYS               (1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT     (1 << 2)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT           (1 << 3)
+
+/* MSR_KVM_ASYNC_PF_INT */
+#define KVM_ASYNC_PF_VEC_MASK                  GENMASK(7, 0)
+
+/* MSR_KVM_MIGRATION_CONTROL */
+#define KVM_MIGRATION_READY            (1 << 0)
+
+/* KVM_HC_MAP_GPA_RANGE */
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K   0
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M   (1 << 0)
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G   (1 << 1)
+#define KVM_MAP_GPA_RANGE_ENC_STAT(n)  (n << 4)
+#define KVM_MAP_GPA_RANGE_ENCRYPTED    KVM_MAP_GPA_RANGE_ENC_STAT(1)
+#define KVM_MAP_GPA_RANGE_DECRYPTED    KVM_MAP_GPA_RANGE_ENC_STAT(0)
+
+/* Operations for KVM_HC_MMU_OP */
+#define KVM_MMU_OP_WRITE_PTE            1
+#define KVM_MMU_OP_FLUSH_TLB           2
+#define KVM_MMU_OP_RELEASE_PT          3
+
+/* Payload for KVM_HC_MMU_OP */
+struct kvm_mmu_op_header {
+       uint32_t op;
+       uint32_t pad;
+};
+
+struct kvm_mmu_op_write_pte {
+       struct kvm_mmu_op_header header;
+       uint64_t pte_phys;
+       uint64_t pte_val;
+};
+
+struct kvm_mmu_op_flush_tlb {
+       struct kvm_mmu_op_header header;
+};
+
+struct kvm_mmu_op_release_pt {
+       struct kvm_mmu_op_header header;
+       uint64_t pt_phys;
+};
+
+#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
+#define KVM_PV_REASON_PAGE_READY 2
+
+struct kvm_vcpu_pv_apf_data {
+       /* Used for 'page not present' events delivered via #PF */
+       uint32_t flags;
+
+       /* Used for 'page ready' events delivered via interrupt notification */
+       uint32_t token;
+
+       uint8_t pad[56];
+       uint32_t enabled;
+};
+
+#define KVM_PV_EOI_BIT 0
+#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
+#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
+#define KVM_PV_EOI_DISABLED 0x0
+
+#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
new file mode 100644 (file)
index 0000000..a5a1c82
--- /dev/null
@@ -0,0 +1,685 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PVRDMA_DEV_API_H__
+#define __PVRDMA_DEV_API_H__
+
+#include "standard-headers/linux/types.h"
+
+#include "pvrdma_verbs.h"
+
+/*
+ * PVRDMA version macros. Some new features require updates to PVRDMA_VERSION.
+ * These macros allow us to check for different features if necessary.
+ */
+
+#define PVRDMA_ROCEV1_VERSION          17
+#define PVRDMA_ROCEV2_VERSION          18
+#define PVRDMA_PPN64_VERSION           19
+#define PVRDMA_QPHANDLE_VERSION                20
+#define PVRDMA_VERSION                 PVRDMA_QPHANDLE_VERSION
+
+#define PVRDMA_BOARD_ID                        1
+#define PVRDMA_REV_ID                  1
+
+/*
+ * Masks and accessors for page directory, which is a two-level lookup:
+ * page directory -> page table -> page. Only one directory for now, but we
+ * could expand that easily. 9 bits for tables, 9 bits for pages, gives one
+ * gigabyte for memory regions and so forth.
+ */
+
+#define PVRDMA_PDIR_SHIFT              18
+#define PVRDMA_PTABLE_SHIFT            9
+#define PVRDMA_PAGE_DIR_DIR(x)         (((x) >> PVRDMA_PDIR_SHIFT) & 0x1)
+#define PVRDMA_PAGE_DIR_TABLE(x)       (((x) >> PVRDMA_PTABLE_SHIFT) & 0x1ff)
+#define PVRDMA_PAGE_DIR_PAGE(x)                ((x) & 0x1ff)
+#define PVRDMA_PAGE_DIR_MAX_PAGES      (1 * 512 * 512)
+#define PVRDMA_MAX_FAST_REG_PAGES      128
+
+/*
+ * Max MSI-X vectors.
+ */
+
+#define PVRDMA_MAX_INTERRUPTS  3
+
+/* Register offsets within PCI resource on BAR1. */
+#define PVRDMA_REG_VERSION     0x00    /* R: Version of device. */
+#define PVRDMA_REG_DSRLOW      0x04    /* W: Device shared region low PA. */
+#define PVRDMA_REG_DSRHIGH     0x08    /* W: Device shared region high PA. */
+#define PVRDMA_REG_CTL         0x0c    /* W: PVRDMA_DEVICE_CTL */
+#define PVRDMA_REG_REQUEST     0x10    /* W: Indicate device request. */
+#define PVRDMA_REG_ERR         0x14    /* R: Device error. */
+#define PVRDMA_REG_ICR         0x18    /* R: Interrupt cause. */
+#define PVRDMA_REG_IMR         0x1c    /* R/W: Interrupt mask. */
+#define PVRDMA_REG_MACL                0x20    /* R/W: MAC address low. */
+#define PVRDMA_REG_MACH                0x24    /* R/W: MAC address high. */
+
+/* Object flags. */
+#define PVRDMA_CQ_FLAG_ARMED_SOL       BIT(0)  /* Armed for solicited-only. */
+#define PVRDMA_CQ_FLAG_ARMED           BIT(1)  /* Armed. */
+#define PVRDMA_MR_FLAG_DMA             BIT(0)  /* DMA region. */
+#define PVRDMA_MR_FLAG_FRMR            BIT(1)  /* Fast reg memory region. */
+
+/*
+ * Atomic operation capability (masked versions are extended atomic
+ * operations.
+ */
+
+#define PVRDMA_ATOMIC_OP_COMP_SWAP     BIT(0)  /* Compare and swap. */
+#define PVRDMA_ATOMIC_OP_FETCH_ADD     BIT(1)  /* Fetch and add. */
+#define PVRDMA_ATOMIC_OP_MASK_COMP_SWAP        BIT(2)  /* Masked compare and swap. */
+#define PVRDMA_ATOMIC_OP_MASK_FETCH_ADD        BIT(3)  /* Masked fetch and add. */
+
+/*
+ * Base Memory Management Extension flags to support Fast Reg Memory Regions
+ * and Fast Reg Work Requests. Each flag represents a verb operation and we
+ * must support all of them to qualify for the BMME device cap.
+ */
+
+#define PVRDMA_BMME_FLAG_LOCAL_INV     BIT(0)  /* Local Invalidate. */
+#define PVRDMA_BMME_FLAG_REMOTE_INV    BIT(1)  /* Remote Invalidate. */
+#define PVRDMA_BMME_FLAG_FAST_REG_WR   BIT(2)  /* Fast Reg Work Request. */
+
+/*
+ * GID types. The interpretation of the gid_types bit field in the device
+ * capabilities will depend on the device mode. For now, the device only
+ * supports RoCE as mode, so only the different GID types for RoCE are
+ * defined.
+ */
+
+#define PVRDMA_GID_TYPE_FLAG_ROCE_V1   BIT(0)
+#define PVRDMA_GID_TYPE_FLAG_ROCE_V2   BIT(1)
+
+/*
+ * Version checks. This checks whether each version supports specific
+ * capabilities from the device.
+ */
+
+#define PVRDMA_IS_VERSION17(_dev)                                      \
+       (_dev->dsr_version == PVRDMA_ROCEV1_VERSION &&                  \
+        _dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1)
+
+#define PVRDMA_IS_VERSION18(_dev)                                      \
+       (_dev->dsr_version >= PVRDMA_ROCEV2_VERSION &&                  \
+        (_dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1 ||  \
+         _dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2))   \
+
+#define PVRDMA_SUPPORTED(_dev)                                         \
+       ((_dev->dsr->caps.mode == PVRDMA_DEVICE_MODE_ROCE) &&           \
+        (PVRDMA_IS_VERSION17(_dev) || PVRDMA_IS_VERSION18(_dev)))
+
+/*
+ * Get capability values based on device version.
+ */
+
+#define PVRDMA_GET_CAP(_dev, _old_val, _val) \
+       ((PVRDMA_IS_VERSION18(_dev)) ? _val : _old_val)
+
+enum pvrdma_pci_resource {
+       PVRDMA_PCI_RESOURCE_MSIX,       /* BAR0: MSI-X, MMIO. */
+       PVRDMA_PCI_RESOURCE_REG,        /* BAR1: Registers, MMIO. */
+       PVRDMA_PCI_RESOURCE_UAR,        /* BAR2: UAR pages, MMIO, 64-bit. */
+       PVRDMA_PCI_RESOURCE_LAST,       /* Last. */
+};
+
+enum pvrdma_device_ctl {
+       PVRDMA_DEVICE_CTL_ACTIVATE,     /* Activate device. */
+       PVRDMA_DEVICE_CTL_UNQUIESCE,    /* Unquiesce device. */
+       PVRDMA_DEVICE_CTL_RESET,        /* Reset device. */
+};
+
+enum pvrdma_intr_vector {
+       PVRDMA_INTR_VECTOR_RESPONSE,    /* Command response. */
+       PVRDMA_INTR_VECTOR_ASYNC,       /* Async events. */
+       PVRDMA_INTR_VECTOR_CQ,          /* CQ notification. */
+       /* Additional CQ notification vectors. */
+};
+
+enum pvrdma_intr_cause {
+       PVRDMA_INTR_CAUSE_RESPONSE      = (1 << PVRDMA_INTR_VECTOR_RESPONSE),
+       PVRDMA_INTR_CAUSE_ASYNC         = (1 << PVRDMA_INTR_VECTOR_ASYNC),
+       PVRDMA_INTR_CAUSE_CQ            = (1 << PVRDMA_INTR_VECTOR_CQ),
+};
+
+enum pvrdma_gos_bits {
+       PVRDMA_GOS_BITS_UNK,            /* Unknown. */
+       PVRDMA_GOS_BITS_32,             /* 32-bit. */
+       PVRDMA_GOS_BITS_64,             /* 64-bit. */
+};
+
+enum pvrdma_gos_type {
+       PVRDMA_GOS_TYPE_UNK,            /* Unknown. */
+       PVRDMA_GOS_TYPE_LINUX,          /* Linux. */
+};
+
+enum pvrdma_device_mode {
+       PVRDMA_DEVICE_MODE_ROCE,        /* RoCE. */
+       PVRDMA_DEVICE_MODE_IWARP,       /* iWarp. */
+       PVRDMA_DEVICE_MODE_IB,          /* InfiniBand. */
+};
+
+struct pvrdma_gos_info {
+       uint32_t gos_bits:2;                    /* W: PVRDMA_GOS_BITS_ */
+       uint32_t gos_type:4;                    /* W: PVRDMA_GOS_TYPE_ */
+       uint32_t gos_ver:16;                    /* W: Guest OS version. */
+       uint32_t gos_misc:10;           /* W: Other. */
+       uint32_t pad;                   /* Pad to 8-byte alignment. */
+};
+
+struct pvrdma_device_caps {
+       uint64_t fw_ver;                                /* R: Query device. */
+       uint64_t node_guid;
+       uint64_t sys_image_guid;
+       uint64_t max_mr_size;
+       uint64_t page_size_cap;
+       uint64_t atomic_arg_sizes;                      /* EX verbs. */
+       uint32_t ex_comp_mask;                  /* EX verbs. */
+       uint32_t device_cap_flags2;                     /* EX verbs. */
+       uint32_t max_fa_bit_boundary;           /* EX verbs. */
+       uint32_t log_max_atomic_inline_arg;             /* EX verbs. */
+       uint32_t vendor_id;
+       uint32_t vendor_part_id;
+       uint32_t hw_ver;
+       uint32_t max_qp;
+       uint32_t max_qp_wr;
+       uint32_t device_cap_flags;
+       uint32_t max_sge;
+       uint32_t max_sge_rd;
+       uint32_t max_cq;
+       uint32_t max_cqe;
+       uint32_t max_mr;
+       uint32_t max_pd;
+       uint32_t max_qp_rd_atom;
+       uint32_t max_ee_rd_atom;
+       uint32_t max_res_rd_atom;
+       uint32_t max_qp_init_rd_atom;
+       uint32_t max_ee_init_rd_atom;
+       uint32_t max_ee;
+       uint32_t max_rdd;
+       uint32_t max_mw;
+       uint32_t max_raw_ipv6_qp;
+       uint32_t max_raw_ethy_qp;
+       uint32_t max_mcast_grp;
+       uint32_t max_mcast_qp_attach;
+       uint32_t max_total_mcast_qp_attach;
+       uint32_t max_ah;
+       uint32_t max_fmr;
+       uint32_t max_map_per_fmr;
+       uint32_t max_srq;
+       uint32_t max_srq_wr;
+       uint32_t max_srq_sge;
+       uint32_t max_uar;
+       uint32_t gid_tbl_len;
+       uint16_t max_pkeys;
+       uint8_t  local_ca_ack_delay;
+       uint8_t  phys_port_cnt;
+       uint8_t  mode;                          /* PVRDMA_DEVICE_MODE_ */
+       uint8_t  atomic_ops;                            /* PVRDMA_ATOMIC_OP_* bits */
+       uint8_t  bmme_flags;                            /* FRWR Mem Mgmt Extensions */
+       uint8_t  gid_types;                             /* PVRDMA_GID_TYPE_FLAG_ */
+       uint32_t max_fast_reg_page_list_len;
+};
+
+struct pvrdma_ring_page_info {
+       uint32_t num_pages;                             /* Num pages incl. header. */
+       uint32_t reserved;                              /* Reserved. */
+       uint64_t pdir_dma;                              /* Page directory PA. */
+};
+
+#pragma pack(push, 1)
+
+struct pvrdma_device_shared_region {
+       uint32_t driver_version;                        /* W: Driver version. */
+       uint32_t pad;                           /* Pad to 8-byte align. */
+       struct pvrdma_gos_info gos_info;        /* W: Guest OS information. */
+       uint64_t cmd_slot_dma;                  /* W: Command slot address. */
+       uint64_t resp_slot_dma;                 /* W: Response slot address. */
+       struct pvrdma_ring_page_info async_ring_pages;
+                                               /* W: Async ring page info. */
+       struct pvrdma_ring_page_info cq_ring_pages;
+                                               /* W: CQ ring page info. */
+       union {
+               uint32_t uar_pfn;                       /* W: UAR pageframe. */
+               uint64_t uar_pfn64;                     /* W: 64-bit UAR page frame. */
+       };
+       struct pvrdma_device_caps caps;         /* R: Device capabilities. */
+};
+
+#pragma pack(pop)
+
+/* Event types. Currently a 1:1 mapping with enum ib_event. */
+enum pvrdma_eqe_type {
+       PVRDMA_EVENT_CQ_ERR,
+       PVRDMA_EVENT_QP_FATAL,
+       PVRDMA_EVENT_QP_REQ_ERR,
+       PVRDMA_EVENT_QP_ACCESS_ERR,
+       PVRDMA_EVENT_COMM_EST,
+       PVRDMA_EVENT_SQ_DRAINED,
+       PVRDMA_EVENT_PATH_MIG,
+       PVRDMA_EVENT_PATH_MIG_ERR,
+       PVRDMA_EVENT_DEVICE_FATAL,
+       PVRDMA_EVENT_PORT_ACTIVE,
+       PVRDMA_EVENT_PORT_ERR,
+       PVRDMA_EVENT_LID_CHANGE,
+       PVRDMA_EVENT_PKEY_CHANGE,
+       PVRDMA_EVENT_SM_CHANGE,
+       PVRDMA_EVENT_SRQ_ERR,
+       PVRDMA_EVENT_SRQ_LIMIT_REACHED,
+       PVRDMA_EVENT_QP_LAST_WQE_REACHED,
+       PVRDMA_EVENT_CLIENT_REREGISTER,
+       PVRDMA_EVENT_GID_CHANGE,
+};
+
+/* Event queue element. */
+struct pvrdma_eqe {
+       uint32_t type;  /* Event type. */
+       uint32_t info;  /* Handle, other. */
+};
+
+/* CQ notification queue element. */
+struct pvrdma_cqne {
+       uint32_t info;  /* Handle */
+};
+
+enum {
+       PVRDMA_CMD_FIRST,
+       PVRDMA_CMD_QUERY_PORT = PVRDMA_CMD_FIRST,
+       PVRDMA_CMD_QUERY_PKEY,
+       PVRDMA_CMD_CREATE_PD,
+       PVRDMA_CMD_DESTROY_PD,
+       PVRDMA_CMD_CREATE_MR,
+       PVRDMA_CMD_DESTROY_MR,
+       PVRDMA_CMD_CREATE_CQ,
+       PVRDMA_CMD_RESIZE_CQ,
+       PVRDMA_CMD_DESTROY_CQ,
+       PVRDMA_CMD_CREATE_QP,
+       PVRDMA_CMD_MODIFY_QP,
+       PVRDMA_CMD_QUERY_QP,
+       PVRDMA_CMD_DESTROY_QP,
+       PVRDMA_CMD_CREATE_UC,
+       PVRDMA_CMD_DESTROY_UC,
+       PVRDMA_CMD_CREATE_BIND,
+       PVRDMA_CMD_DESTROY_BIND,
+       PVRDMA_CMD_CREATE_SRQ,
+       PVRDMA_CMD_MODIFY_SRQ,
+       PVRDMA_CMD_QUERY_SRQ,
+       PVRDMA_CMD_DESTROY_SRQ,
+       PVRDMA_CMD_MAX,
+};
+
+enum {
+       PVRDMA_CMD_FIRST_RESP = (1 << 31),
+       PVRDMA_CMD_QUERY_PORT_RESP = PVRDMA_CMD_FIRST_RESP,
+       PVRDMA_CMD_QUERY_PKEY_RESP,
+       PVRDMA_CMD_CREATE_PD_RESP,
+       PVRDMA_CMD_DESTROY_PD_RESP_NOOP,
+       PVRDMA_CMD_CREATE_MR_RESP,
+       PVRDMA_CMD_DESTROY_MR_RESP_NOOP,
+       PVRDMA_CMD_CREATE_CQ_RESP,
+       PVRDMA_CMD_RESIZE_CQ_RESP,
+       PVRDMA_CMD_DESTROY_CQ_RESP_NOOP,
+       PVRDMA_CMD_CREATE_QP_RESP,
+       PVRDMA_CMD_MODIFY_QP_RESP,
+       PVRDMA_CMD_QUERY_QP_RESP,
+       PVRDMA_CMD_DESTROY_QP_RESP,
+       PVRDMA_CMD_CREATE_UC_RESP,
+       PVRDMA_CMD_DESTROY_UC_RESP_NOOP,
+       PVRDMA_CMD_CREATE_BIND_RESP_NOOP,
+       PVRDMA_CMD_DESTROY_BIND_RESP_NOOP,
+       PVRDMA_CMD_CREATE_SRQ_RESP,
+       PVRDMA_CMD_MODIFY_SRQ_RESP,
+       PVRDMA_CMD_QUERY_SRQ_RESP,
+       PVRDMA_CMD_DESTROY_SRQ_RESP,
+       PVRDMA_CMD_MAX_RESP,
+};
+
+struct pvrdma_cmd_hdr {
+       uint64_t response;              /* Key for response lookup. */
+       uint32_t cmd;           /* PVRDMA_CMD_ */
+       uint32_t reserved;              /* Reserved. */
+};
+
+struct pvrdma_cmd_resp_hdr {
+       uint64_t response;              /* From cmd hdr. */
+       uint32_t ack;           /* PVRDMA_CMD_XXX_RESP */
+       uint8_t err;                    /* Error. */
+       uint8_t reserved[3];            /* Reserved. */
+};
+
+struct pvrdma_cmd_query_port {
+       struct pvrdma_cmd_hdr hdr;
+       uint8_t port_num;
+       uint8_t reserved[7];
+};
+
+struct pvrdma_cmd_query_port_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       struct pvrdma_port_attr attrs;
+};
+
+struct pvrdma_cmd_query_pkey {
+       struct pvrdma_cmd_hdr hdr;
+       uint8_t port_num;
+       uint8_t index;
+       uint8_t reserved[6];
+};
+
+struct pvrdma_cmd_query_pkey_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       uint16_t pkey;
+       uint8_t reserved[6];
+};
+
+struct pvrdma_cmd_create_uc {
+       struct pvrdma_cmd_hdr hdr;
+       union {
+               uint32_t pfn; /* UAR page frame number */
+               uint64_t pfn64; /* 64-bit UAR page frame number */
+       };
+};
+
+struct pvrdma_cmd_create_uc_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       uint32_t ctx_handle;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_destroy_uc {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t ctx_handle;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_create_pd {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t ctx_handle;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_create_pd_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       uint32_t pd_handle;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_destroy_pd {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t pd_handle;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_create_mr {
+       struct pvrdma_cmd_hdr hdr;
+       uint64_t start;
+       uint64_t length;
+       uint64_t pdir_dma;
+       uint32_t pd_handle;
+       uint32_t access_flags;
+       uint32_t flags;
+       uint32_t nchunks;
+};
+
+struct pvrdma_cmd_create_mr_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       uint32_t mr_handle;
+       uint32_t lkey;
+       uint32_t rkey;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_destroy_mr {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t mr_handle;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_create_cq {
+       struct pvrdma_cmd_hdr hdr;
+       uint64_t pdir_dma;
+       uint32_t ctx_handle;
+       uint32_t cqe;
+       uint32_t nchunks;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_create_cq_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       uint32_t cq_handle;
+       uint32_t cqe;
+};
+
+struct pvrdma_cmd_resize_cq {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t cq_handle;
+       uint32_t cqe;
+};
+
+struct pvrdma_cmd_resize_cq_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       uint32_t cqe;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_destroy_cq {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t cq_handle;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_create_srq {
+       struct pvrdma_cmd_hdr hdr;
+       uint64_t pdir_dma;
+       uint32_t pd_handle;
+       uint32_t nchunks;
+       struct pvrdma_srq_attr attrs;
+       uint8_t srq_type;
+       uint8_t reserved[7];
+};
+
+struct pvrdma_cmd_create_srq_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       uint32_t srqn;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_modify_srq {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t srq_handle;
+       uint32_t attr_mask;
+       struct pvrdma_srq_attr attrs;
+};
+
+struct pvrdma_cmd_query_srq {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t srq_handle;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_query_srq_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       struct pvrdma_srq_attr attrs;
+};
+
+struct pvrdma_cmd_destroy_srq {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t srq_handle;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_create_qp {
+       struct pvrdma_cmd_hdr hdr;
+       uint64_t pdir_dma;
+       uint32_t pd_handle;
+       uint32_t send_cq_handle;
+       uint32_t recv_cq_handle;
+       uint32_t srq_handle;
+       uint32_t max_send_wr;
+       uint32_t max_recv_wr;
+       uint32_t max_send_sge;
+       uint32_t max_recv_sge;
+       uint32_t max_inline_data;
+       uint32_t lkey;
+       uint32_t access_flags;
+       uint16_t total_chunks;
+       uint16_t send_chunks;
+       uint16_t max_atomic_arg;
+       uint8_t sq_sig_all;
+       uint8_t qp_type;
+       uint8_t is_srq;
+       uint8_t reserved[3];
+};
+
+struct pvrdma_cmd_create_qp_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       uint32_t qpn;
+       uint32_t max_send_wr;
+       uint32_t max_recv_wr;
+       uint32_t max_send_sge;
+       uint32_t max_recv_sge;
+       uint32_t max_inline_data;
+};
+
+struct pvrdma_cmd_create_qp_resp_v2 {
+       struct pvrdma_cmd_resp_hdr hdr;
+       uint32_t qpn;
+       uint32_t qp_handle;
+       uint32_t max_send_wr;
+       uint32_t max_recv_wr;
+       uint32_t max_send_sge;
+       uint32_t max_recv_sge;
+       uint32_t max_inline_data;
+};
+
+struct pvrdma_cmd_modify_qp {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t qp_handle;
+       uint32_t attr_mask;
+       struct pvrdma_qp_attr attrs;
+};
+
+struct pvrdma_cmd_query_qp {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t qp_handle;
+       uint32_t attr_mask;
+};
+
+struct pvrdma_cmd_query_qp_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       struct pvrdma_qp_attr attrs;
+};
+
+struct pvrdma_cmd_destroy_qp {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t qp_handle;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_destroy_qp_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       uint32_t events_reported;
+       uint8_t reserved[4];
+};
+
+struct pvrdma_cmd_create_bind {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t mtu;
+       uint32_t vlan;
+       uint32_t index;
+       uint8_t new_gid[16];
+       uint8_t gid_type;
+       uint8_t reserved[3];
+};
+
+struct pvrdma_cmd_destroy_bind {
+       struct pvrdma_cmd_hdr hdr;
+       uint32_t index;
+       uint8_t dest_gid[16];
+       uint8_t reserved[4];
+};
+
+union pvrdma_cmd_req {
+       struct pvrdma_cmd_hdr hdr;
+       struct pvrdma_cmd_query_port query_port;
+       struct pvrdma_cmd_query_pkey query_pkey;
+       struct pvrdma_cmd_create_uc create_uc;
+       struct pvrdma_cmd_destroy_uc destroy_uc;
+       struct pvrdma_cmd_create_pd create_pd;
+       struct pvrdma_cmd_destroy_pd destroy_pd;
+       struct pvrdma_cmd_create_mr create_mr;
+       struct pvrdma_cmd_destroy_mr destroy_mr;
+       struct pvrdma_cmd_create_cq create_cq;
+       struct pvrdma_cmd_resize_cq resize_cq;
+       struct pvrdma_cmd_destroy_cq destroy_cq;
+       struct pvrdma_cmd_create_qp create_qp;
+       struct pvrdma_cmd_modify_qp modify_qp;
+       struct pvrdma_cmd_query_qp query_qp;
+       struct pvrdma_cmd_destroy_qp destroy_qp;
+       struct pvrdma_cmd_create_bind create_bind;
+       struct pvrdma_cmd_destroy_bind destroy_bind;
+       struct pvrdma_cmd_create_srq create_srq;
+       struct pvrdma_cmd_modify_srq modify_srq;
+       struct pvrdma_cmd_query_srq query_srq;
+       struct pvrdma_cmd_destroy_srq destroy_srq;
+};
+
+union pvrdma_cmd_resp {
+       struct pvrdma_cmd_resp_hdr hdr;
+       struct pvrdma_cmd_query_port_resp query_port_resp;
+       struct pvrdma_cmd_query_pkey_resp query_pkey_resp;
+       struct pvrdma_cmd_create_uc_resp create_uc_resp;
+       struct pvrdma_cmd_create_pd_resp create_pd_resp;
+       struct pvrdma_cmd_create_mr_resp create_mr_resp;
+       struct pvrdma_cmd_create_cq_resp create_cq_resp;
+       struct pvrdma_cmd_resize_cq_resp resize_cq_resp;
+       struct pvrdma_cmd_create_qp_resp create_qp_resp;
+       struct pvrdma_cmd_create_qp_resp_v2 create_qp_resp_v2;
+       struct pvrdma_cmd_query_qp_resp query_qp_resp;
+       struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp;
+       struct pvrdma_cmd_create_srq_resp create_srq_resp;
+       struct pvrdma_cmd_query_srq_resp query_srq_resp;
+};
+
+#endif /* __PVRDMA_DEV_API_H__ */
diff --git a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
new file mode 100644 (file)
index 0000000..94d41b2
--- /dev/null
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PVRDMA_VERBS_H__
+#define __PVRDMA_VERBS_H__
+
+#include "standard-headers/linux/types.h"
+
+union pvrdma_gid {
+       uint8_t raw[16];
+       struct {
+               uint64_t        subnet_prefix;
+               uint64_t        interface_id;
+       } global;
+};
+
+enum pvrdma_link_layer {
+       PVRDMA_LINK_LAYER_UNSPECIFIED,
+       PVRDMA_LINK_LAYER_INFINIBAND,
+       PVRDMA_LINK_LAYER_ETHERNET,
+};
+
+enum pvrdma_mtu {
+       PVRDMA_MTU_256  = 1,
+       PVRDMA_MTU_512  = 2,
+       PVRDMA_MTU_1024 = 3,
+       PVRDMA_MTU_2048 = 4,
+       PVRDMA_MTU_4096 = 5,
+};
+
+enum pvrdma_port_state {
+       PVRDMA_PORT_NOP                 = 0,
+       PVRDMA_PORT_DOWN                = 1,
+       PVRDMA_PORT_INIT                = 2,
+       PVRDMA_PORT_ARMED               = 3,
+       PVRDMA_PORT_ACTIVE              = 4,
+       PVRDMA_PORT_ACTIVE_DEFER        = 5,
+};
+
+enum pvrdma_port_cap_flags {
+       PVRDMA_PORT_SM                          = 1 <<  1,
+       PVRDMA_PORT_NOTICE_SUP                  = 1 <<  2,
+       PVRDMA_PORT_TRAP_SUP                    = 1 <<  3,
+       PVRDMA_PORT_OPT_IPD_SUP                 = 1 <<  4,
+       PVRDMA_PORT_AUTO_MIGR_SUP               = 1 <<  5,
+       PVRDMA_PORT_SL_MAP_SUP                  = 1 <<  6,
+       PVRDMA_PORT_MKEY_NVRAM                  = 1 <<  7,
+       PVRDMA_PORT_PKEY_NVRAM                  = 1 <<  8,
+       PVRDMA_PORT_LED_INFO_SUP                = 1 <<  9,
+       PVRDMA_PORT_SM_DISABLED                 = 1 << 10,
+       PVRDMA_PORT_SYS_IMAGE_GUID_SUP          = 1 << 11,
+       PVRDMA_PORT_PKEY_SW_EXT_PORT_TRAP_SUP   = 1 << 12,
+       PVRDMA_PORT_EXTENDED_SPEEDS_SUP         = 1 << 14,
+       PVRDMA_PORT_CM_SUP                      = 1 << 16,
+       PVRDMA_PORT_SNMP_TUNNEL_SUP             = 1 << 17,
+       PVRDMA_PORT_REINIT_SUP                  = 1 << 18,
+       PVRDMA_PORT_DEVICE_MGMT_SUP             = 1 << 19,
+       PVRDMA_PORT_VENDOR_CLASS_SUP            = 1 << 20,
+       PVRDMA_PORT_DR_NOTICE_SUP               = 1 << 21,
+       PVRDMA_PORT_CAP_MASK_NOTICE_SUP         = 1 << 22,
+       PVRDMA_PORT_BOOT_MGMT_SUP               = 1 << 23,
+       PVRDMA_PORT_LINK_LATENCY_SUP            = 1 << 24,
+       PVRDMA_PORT_CLIENT_REG_SUP              = 1 << 25,
+       PVRDMA_PORT_IP_BASED_GIDS               = 1 << 26,
+       PVRDMA_PORT_CAP_FLAGS_MAX               = PVRDMA_PORT_IP_BASED_GIDS,
+};
+
+enum pvrdma_port_width {
+       PVRDMA_WIDTH_1X         = 1,
+       PVRDMA_WIDTH_4X         = 2,
+       PVRDMA_WIDTH_8X         = 4,
+       PVRDMA_WIDTH_12X        = 8,
+};
+
+enum pvrdma_port_speed {
+       PVRDMA_SPEED_SDR        = 1,
+       PVRDMA_SPEED_DDR        = 2,
+       PVRDMA_SPEED_QDR        = 4,
+       PVRDMA_SPEED_FDR10      = 8,
+       PVRDMA_SPEED_FDR        = 16,
+       PVRDMA_SPEED_EDR        = 32,
+};
+
+struct pvrdma_port_attr {
+       enum pvrdma_port_state  state;
+       enum pvrdma_mtu         max_mtu;
+       enum pvrdma_mtu         active_mtu;
+       uint32_t                        gid_tbl_len;
+       uint32_t                        port_cap_flags;
+       uint32_t                        max_msg_sz;
+       uint32_t                        bad_pkey_cntr;
+       uint32_t                        qkey_viol_cntr;
+       uint16_t                        pkey_tbl_len;
+       uint16_t                        lid;
+       uint16_t                        sm_lid;
+       uint8_t                 lmc;
+       uint8_t                 max_vl_num;
+       uint8_t                 sm_sl;
+       uint8_t                 subnet_timeout;
+       uint8_t                 init_type_reply;
+       uint8_t                 active_width;
+       uint8_t                 active_speed;
+       uint8_t                 phys_state;
+       uint8_t                 reserved[2];
+};
+
+struct pvrdma_global_route {
+       union pvrdma_gid        dgid;
+       uint32_t                        flow_label;
+       uint8_t                 sgid_index;
+       uint8_t                 hop_limit;
+       uint8_t                 traffic_class;
+       uint8_t                 reserved;
+};
+
+struct pvrdma_grh {
+       uint32_t                        version_tclass_flow;
+       uint16_t                        paylen;
+       uint8_t                 next_hdr;
+       uint8_t                 hop_limit;
+       union pvrdma_gid        sgid;
+       union pvrdma_gid        dgid;
+};
+
+enum pvrdma_ah_flags {
+       PVRDMA_AH_GRH = 1,
+};
+
+enum pvrdma_rate {
+       PVRDMA_RATE_PORT_CURRENT        = 0,
+       PVRDMA_RATE_2_5_GBPS            = 2,
+       PVRDMA_RATE_5_GBPS              = 5,
+       PVRDMA_RATE_10_GBPS             = 3,
+       PVRDMA_RATE_20_GBPS             = 6,
+       PVRDMA_RATE_30_GBPS             = 4,
+       PVRDMA_RATE_40_GBPS             = 7,
+       PVRDMA_RATE_60_GBPS             = 8,
+       PVRDMA_RATE_80_GBPS             = 9,
+       PVRDMA_RATE_120_GBPS            = 10,
+       PVRDMA_RATE_14_GBPS             = 11,
+       PVRDMA_RATE_56_GBPS             = 12,
+       PVRDMA_RATE_112_GBPS            = 13,
+       PVRDMA_RATE_168_GBPS            = 14,
+       PVRDMA_RATE_25_GBPS             = 15,
+       PVRDMA_RATE_100_GBPS            = 16,
+       PVRDMA_RATE_200_GBPS            = 17,
+       PVRDMA_RATE_300_GBPS            = 18,
+};
+
+struct pvrdma_ah_attr {
+       struct pvrdma_global_route      grh;
+       uint16_t                                dlid;
+       uint16_t                                vlan_id;
+       uint8_t                         sl;
+       uint8_t                         src_path_bits;
+       uint8_t                         static_rate;
+       uint8_t                         ah_flags;
+       uint8_t                         port_num;
+       uint8_t                         dmac[6];
+       uint8_t                         reserved;
+};
+
+enum pvrdma_cq_notify_flags {
+       PVRDMA_CQ_SOLICITED             = 1 << 0,
+       PVRDMA_CQ_NEXT_COMP             = 1 << 1,
+       PVRDMA_CQ_SOLICITED_MASK        = PVRDMA_CQ_SOLICITED |
+                                         PVRDMA_CQ_NEXT_COMP,
+       PVRDMA_CQ_REPORT_MISSED_EVENTS  = 1 << 2,
+};
+
+struct pvrdma_qp_cap {
+       uint32_t        max_send_wr;
+       uint32_t        max_recv_wr;
+       uint32_t        max_send_sge;
+       uint32_t        max_recv_sge;
+       uint32_t        max_inline_data;
+       uint32_t        reserved;
+};
+
+enum pvrdma_sig_type {
+       PVRDMA_SIGNAL_ALL_WR,
+       PVRDMA_SIGNAL_REQ_WR,
+};
+
+enum pvrdma_qp_type {
+       PVRDMA_QPT_SMI,
+       PVRDMA_QPT_GSI,
+       PVRDMA_QPT_RC,
+       PVRDMA_QPT_UC,
+       PVRDMA_QPT_UD,
+       PVRDMA_QPT_RAW_IPV6,
+       PVRDMA_QPT_RAW_ETHERTYPE,
+       PVRDMA_QPT_RAW_PACKET = 8,
+       PVRDMA_QPT_XRC_INI = 9,
+       PVRDMA_QPT_XRC_TGT,
+       PVRDMA_QPT_MAX,
+};
+
+enum pvrdma_qp_create_flags {
+       PVRDMA_QP_CREATE_IPOPVRDMA_UD_LSO               = 1 << 0,
+       PVRDMA_QP_CREATE_BLOCK_MULTICAST_LOOPBACK       = 1 << 1,
+};
+
+enum pvrdma_qp_attr_mask {
+       PVRDMA_QP_STATE                 = 1 << 0,
+       PVRDMA_QP_CUR_STATE             = 1 << 1,
+       PVRDMA_QP_EN_SQD_ASYNC_NOTIFY   = 1 << 2,
+       PVRDMA_QP_ACCESS_FLAGS          = 1 << 3,
+       PVRDMA_QP_PKEY_INDEX            = 1 << 4,
+       PVRDMA_QP_PORT                  = 1 << 5,
+       PVRDMA_QP_QKEY                  = 1 << 6,
+       PVRDMA_QP_AV                    = 1 << 7,
+       PVRDMA_QP_PATH_MTU              = 1 << 8,
+       PVRDMA_QP_TIMEOUT               = 1 << 9,
+       PVRDMA_QP_RETRY_CNT             = 1 << 10,
+       PVRDMA_QP_RNR_RETRY             = 1 << 11,
+       PVRDMA_QP_RQ_PSN                = 1 << 12,
+       PVRDMA_QP_MAX_QP_RD_ATOMIC      = 1 << 13,
+       PVRDMA_QP_ALT_PATH              = 1 << 14,
+       PVRDMA_QP_MIN_RNR_TIMER         = 1 << 15,
+       PVRDMA_QP_SQ_PSN                = 1 << 16,
+       PVRDMA_QP_MAX_DEST_RD_ATOMIC    = 1 << 17,
+       PVRDMA_QP_PATH_MIG_STATE        = 1 << 18,
+       PVRDMA_QP_CAP                   = 1 << 19,
+       PVRDMA_QP_DEST_QPN              = 1 << 20,
+       PVRDMA_QP_ATTR_MASK_MAX         = PVRDMA_QP_DEST_QPN,
+};
+
+enum pvrdma_qp_state {
+       PVRDMA_QPS_RESET,
+       PVRDMA_QPS_INIT,
+       PVRDMA_QPS_RTR,
+       PVRDMA_QPS_RTS,
+       PVRDMA_QPS_SQD,
+       PVRDMA_QPS_SQE,
+       PVRDMA_QPS_ERR,
+};
+
+enum pvrdma_mig_state {
+       PVRDMA_MIG_MIGRATED,
+       PVRDMA_MIG_REARM,
+       PVRDMA_MIG_ARMED,
+};
+
+enum pvrdma_mw_type {
+       PVRDMA_MW_TYPE_1 = 1,
+       PVRDMA_MW_TYPE_2 = 2,
+};
+
+struct pvrdma_srq_attr {
+       uint32_t                        max_wr;
+       uint32_t                        max_sge;
+       uint32_t                        srq_limit;
+       uint32_t                        reserved;
+};
+
+struct pvrdma_qp_attr {
+       enum pvrdma_qp_state    qp_state;
+       enum pvrdma_qp_state    cur_qp_state;
+       enum pvrdma_mtu         path_mtu;
+       enum pvrdma_mig_state   path_mig_state;
+       uint32_t                        qkey;
+       uint32_t                        rq_psn;
+       uint32_t                        sq_psn;
+       uint32_t                        dest_qp_num;
+       uint32_t                        qp_access_flags;
+       uint16_t                        pkey_index;
+       uint16_t                        alt_pkey_index;
+       uint8_t                 en_sqd_async_notify;
+       uint8_t                 sq_draining;
+       uint8_t                 max_rd_atomic;
+       uint8_t                 max_dest_rd_atomic;
+       uint8_t                 min_rnr_timer;
+       uint8_t                 port_num;
+       uint8_t                 timeout;
+       uint8_t                 retry_cnt;
+       uint8_t                 rnr_retry;
+       uint8_t                 alt_port_num;
+       uint8_t                 alt_timeout;
+       uint8_t                 reserved[5];
+       struct pvrdma_qp_cap    cap;
+       struct pvrdma_ah_attr   ah_attr;
+       struct pvrdma_ah_attr   alt_ah_attr;
+};
+
+enum pvrdma_send_flags {
+       PVRDMA_SEND_FENCE       = 1 << 0,
+       PVRDMA_SEND_SIGNALED    = 1 << 1,
+       PVRDMA_SEND_SOLICITED   = 1 << 2,
+       PVRDMA_SEND_INLINE      = 1 << 3,
+       PVRDMA_SEND_IP_CSUM     = 1 << 4,
+       PVRDMA_SEND_FLAGS_MAX   = PVRDMA_SEND_IP_CSUM,
+};
+
+enum pvrdma_access_flags {
+       PVRDMA_ACCESS_LOCAL_WRITE       = 1 << 0,
+       PVRDMA_ACCESS_REMOTE_WRITE      = 1 << 1,
+       PVRDMA_ACCESS_REMOTE_READ       = 1 << 2,
+       PVRDMA_ACCESS_REMOTE_ATOMIC     = 1 << 3,
+       PVRDMA_ACCESS_MW_BIND           = 1 << 4,
+       PVRDMA_ZERO_BASED               = 1 << 5,
+       PVRDMA_ACCESS_ON_DEMAND         = 1 << 6,
+       PVRDMA_ACCESS_FLAGS_MAX         = PVRDMA_ACCESS_ON_DEMAND,
+};
+
+#endif /* __PVRDMA_VERBS_H__ */
diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h
new file mode 100644 (file)
index 0000000..72279f4
--- /dev/null
@@ -0,0 +1,1568 @@
+/*
+ * Copyright 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef DRM_FOURCC_H
+#define DRM_FOURCC_H
+
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/**
+ * DOC: overview
+ *
+ * In the DRM subsystem, framebuffer pixel formats are described using the
+ * fourcc codes defined in `include/uapi/drm/drm_fourcc.h`. In addition to the
+ * fourcc code, a Format Modifier may optionally be provided, in order to
+ * further describe the buffer's format - for example tiling or compression.
+ *
+ * Format Modifiers
+ * ----------------
+ *
+ * Format modifiers are used in conjunction with a fourcc code, forming a
+ * unique fourcc:modifier pair. This format:modifier pair must fully define the
+ * format and data layout of the buffer, and should be the only way to describe
+ * that particular buffer.
+ *
+ * Having multiple fourcc:modifier pairs which describe the same layout should
+ * be avoided, as such aliases run the risk of different drivers exposing
+ * different names for the same data format, forcing userspace to understand
+ * that they are aliases.
+ *
+ * Format modifiers may change any property of the buffer, including the number
+ * of planes and/or the required allocation size. Format modifiers are
+ * vendor-namespaced, and as such the relationship between a fourcc code and a
+ * modifier is specific to the modifer being used. For example, some modifiers
+ * may preserve meaning - such as number of planes - from the fourcc code,
+ * whereas others may not.
+ *
+ * Modifiers must uniquely encode buffer layout. In other words, a buffer must
+ * match only a single modifier. A modifier must not be a subset of layouts of
+ * another modifier. For instance, it's incorrect to encode pitch alignment in
+ * a modifier: a buffer may match a 64-pixel aligned modifier and a 32-pixel
+ * aligned modifier. That said, modifiers can have implicit minimal
+ * requirements.
+ *
+ * For modifiers where the combination of fourcc code and modifier can alias,
+ * a canonical pair needs to be defined and used by all drivers. Preferred
+ * combinations are also encouraged where all combinations might lead to
+ * confusion and unnecessarily reduced interoperability. An example for the
+ * latter is AFBC, where the ABGR layouts are preferred over ARGB layouts.
+ *
+ * There are two kinds of modifier users:
+ *
+ * - Kernel and user-space drivers: for drivers it's important that modifiers
+ *   don't alias, otherwise two drivers might support the same format but use
+ *   different aliases, preventing them from sharing buffers in an efficient
+ *   format.
+ * - Higher-level programs interfacing with KMS/GBM/EGL/Vulkan/etc: these users
+ *   see modifiers as opaque tokens they can check for equality and intersect.
+ *   These users musn't need to know to reason about the modifier value
+ *   (i.e. they are not expected to extract information out of the modifier).
+ *
+ * Vendors should document their modifier usage in as much detail as
+ * possible, to ensure maximum compatibility across devices, drivers and
+ * applications.
+ *
+ * The authoritative list of format modifier codes is found in
+ * `include/uapi/drm/drm_fourcc.h`
+ *
+ * Open Source User Waiver
+ * -----------------------
+ *
+ * Because this is the authoritative source for pixel formats and modifiers
+ * referenced by GL, Vulkan extensions and other standards and hence used both
+ * by open source and closed source driver stacks, the usual requirement for an
+ * upstream in-kernel or open source userspace user does not apply.
+ *
+ * To ensure, as much as feasible, compatibility across stacks and avoid
+ * confusion with incompatible enumerations stakeholders for all relevant driver
+ * stacks should approve additions.
+ */
+
+#define fourcc_code(a, b, c, d) ((uint32_t)(a) | ((uint32_t)(b) << 8) | \
+                                ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24))
+
+#define DRM_FORMAT_BIG_ENDIAN (1U<<31) /* format is big endian instead of little endian */
+
+/* Reserve 0 for the invalid format specifier */
+#define DRM_FORMAT_INVALID     0
+
+/* color index */
+#define DRM_FORMAT_C1          fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */
+#define DRM_FORMAT_C2          fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */
+#define DRM_FORMAT_C4          fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */
+#define DRM_FORMAT_C8          fourcc_code('C', '8', ' ', ' ') /* [7:0] C */
+
+/* 1 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D1          fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D2          fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D4          fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */
+
+/* 8 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D8          fourcc_code('D', '8', ' ', ' ') /* [7:0] D */
+
+/* 1 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R1          fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R2          fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R4          fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */
+
+/* 8 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R8          fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
+
+/* 10 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R10         fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */
+
+/* 12 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R12         fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */
+
+/* 16 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R16         fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */
+
+/* 16 bpp RG */
+#define DRM_FORMAT_RG88                fourcc_code('R', 'G', '8', '8') /* [15:0] R:G 8:8 little endian */
+#define DRM_FORMAT_GR88                fourcc_code('G', 'R', '8', '8') /* [15:0] G:R 8:8 little endian */
+
+/* 32 bpp RG */
+#define DRM_FORMAT_RG1616      fourcc_code('R', 'G', '3', '2') /* [31:0] R:G 16:16 little endian */
+#define DRM_FORMAT_GR1616      fourcc_code('G', 'R', '3', '2') /* [31:0] G:R 16:16 little endian */
+
+/* 8 bpp RGB */
+#define DRM_FORMAT_RGB332      fourcc_code('R', 'G', 'B', '8') /* [7:0] R:G:B 3:3:2 */
+#define DRM_FORMAT_BGR233      fourcc_code('B', 'G', 'R', '8') /* [7:0] B:G:R 2:3:3 */
+
+/* 16 bpp RGB */
+#define DRM_FORMAT_XRGB4444    fourcc_code('X', 'R', '1', '2') /* [15:0] x:R:G:B 4:4:4:4 little endian */
+#define DRM_FORMAT_XBGR4444    fourcc_code('X', 'B', '1', '2') /* [15:0] x:B:G:R 4:4:4:4 little endian */
+#define DRM_FORMAT_RGBX4444    fourcc_code('R', 'X', '1', '2') /* [15:0] R:G:B:x 4:4:4:4 little endian */
+#define DRM_FORMAT_BGRX4444    fourcc_code('B', 'X', '1', '2') /* [15:0] B:G:R:x 4:4:4:4 little endian */
+
+#define DRM_FORMAT_ARGB4444    fourcc_code('A', 'R', '1', '2') /* [15:0] A:R:G:B 4:4:4:4 little endian */
+#define DRM_FORMAT_ABGR4444    fourcc_code('A', 'B', '1', '2') /* [15:0] A:B:G:R 4:4:4:4 little endian */
+#define DRM_FORMAT_RGBA4444    fourcc_code('R', 'A', '1', '2') /* [15:0] R:G:B:A 4:4:4:4 little endian */
+#define DRM_FORMAT_BGRA4444    fourcc_code('B', 'A', '1', '2') /* [15:0] B:G:R:A 4:4:4:4 little endian */
+
+#define DRM_FORMAT_XRGB1555    fourcc_code('X', 'R', '1', '5') /* [15:0] x:R:G:B 1:5:5:5 little endian */
+#define DRM_FORMAT_XBGR1555    fourcc_code('X', 'B', '1', '5') /* [15:0] x:B:G:R 1:5:5:5 little endian */
+#define DRM_FORMAT_RGBX5551    fourcc_code('R', 'X', '1', '5') /* [15:0] R:G:B:x 5:5:5:1 little endian */
+#define DRM_FORMAT_BGRX5551    fourcc_code('B', 'X', '1', '5') /* [15:0] B:G:R:x 5:5:5:1 little endian */
+
+#define DRM_FORMAT_ARGB1555    fourcc_code('A', 'R', '1', '5') /* [15:0] A:R:G:B 1:5:5:5 little endian */
+#define DRM_FORMAT_ABGR1555    fourcc_code('A', 'B', '1', '5') /* [15:0] A:B:G:R 1:5:5:5 little endian */
+#define DRM_FORMAT_RGBA5551    fourcc_code('R', 'A', '1', '5') /* [15:0] R:G:B:A 5:5:5:1 little endian */
+#define DRM_FORMAT_BGRA5551    fourcc_code('B', 'A', '1', '5') /* [15:0] B:G:R:A 5:5:5:1 little endian */
+
+#define DRM_FORMAT_RGB565      fourcc_code('R', 'G', '1', '6') /* [15:0] R:G:B 5:6:5 little endian */
+#define DRM_FORMAT_BGR565      fourcc_code('B', 'G', '1', '6') /* [15:0] B:G:R 5:6:5 little endian */
+
+/* 24 bpp RGB */
+#define DRM_FORMAT_RGB888      fourcc_code('R', 'G', '2', '4') /* [23:0] R:G:B little endian */
+#define DRM_FORMAT_BGR888      fourcc_code('B', 'G', '2', '4') /* [23:0] B:G:R little endian */
+
+/* 32 bpp RGB */
+#define DRM_FORMAT_XRGB8888    fourcc_code('X', 'R', '2', '4') /* [31:0] x:R:G:B 8:8:8:8 little endian */
+#define DRM_FORMAT_XBGR8888    fourcc_code('X', 'B', '2', '4') /* [31:0] x:B:G:R 8:8:8:8 little endian */
+#define DRM_FORMAT_RGBX8888    fourcc_code('R', 'X', '2', '4') /* [31:0] R:G:B:x 8:8:8:8 little endian */
+#define DRM_FORMAT_BGRX8888    fourcc_code('B', 'X', '2', '4') /* [31:0] B:G:R:x 8:8:8:8 little endian */
+
+#define DRM_FORMAT_ARGB8888    fourcc_code('A', 'R', '2', '4') /* [31:0] A:R:G:B 8:8:8:8 little endian */
+#define DRM_FORMAT_ABGR8888    fourcc_code('A', 'B', '2', '4') /* [31:0] A:B:G:R 8:8:8:8 little endian */
+#define DRM_FORMAT_RGBA8888    fourcc_code('R', 'A', '2', '4') /* [31:0] R:G:B:A 8:8:8:8 little endian */
+#define DRM_FORMAT_BGRA8888    fourcc_code('B', 'A', '2', '4') /* [31:0] B:G:R:A 8:8:8:8 little endian */
+
+#define DRM_FORMAT_XRGB2101010 fourcc_code('X', 'R', '3', '0') /* [31:0] x:R:G:B 2:10:10:10 little endian */
+#define DRM_FORMAT_XBGR2101010 fourcc_code('X', 'B', '3', '0') /* [31:0] x:B:G:R 2:10:10:10 little endian */
+#define DRM_FORMAT_RGBX1010102 fourcc_code('R', 'X', '3', '0') /* [31:0] R:G:B:x 10:10:10:2 little endian */
+#define DRM_FORMAT_BGRX1010102 fourcc_code('B', 'X', '3', '0') /* [31:0] B:G:R:x 10:10:10:2 little endian */
+
+#define DRM_FORMAT_ARGB2101010 fourcc_code('A', 'R', '3', '0') /* [31:0] A:R:G:B 2:10:10:10 little endian */
+#define DRM_FORMAT_ABGR2101010 fourcc_code('A', 'B', '3', '0') /* [31:0] A:B:G:R 2:10:10:10 little endian */
+#define DRM_FORMAT_RGBA1010102 fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */
+#define DRM_FORMAT_BGRA1010102 fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */
+
+/* 64 bpp RGB */
+#define DRM_FORMAT_XRGB16161616        fourcc_code('X', 'R', '4', '8') /* [63:0] x:R:G:B 16:16:16:16 little endian */
+#define DRM_FORMAT_XBGR16161616        fourcc_code('X', 'B', '4', '8') /* [63:0] x:B:G:R 16:16:16:16 little endian */
+
+#define DRM_FORMAT_ARGB16161616        fourcc_code('A', 'R', '4', '8') /* [63:0] A:R:G:B 16:16:16:16 little endian */
+#define DRM_FORMAT_ABGR16161616        fourcc_code('A', 'B', '4', '8') /* [63:0] A:B:G:R 16:16:16:16 little endian */
+
+/*
+ * Floating point 64bpp RGB
+ * IEEE 754-2008 binary16 half-precision float
+ * [15:0] sign:exponent:mantissa 1:5:10
+ */
+#define DRM_FORMAT_XRGB16161616F fourcc_code('X', 'R', '4', 'H') /* [63:0] x:R:G:B 16:16:16:16 little endian */
+#define DRM_FORMAT_XBGR16161616F fourcc_code('X', 'B', '4', 'H') /* [63:0] x:B:G:R 16:16:16:16 little endian */
+
+#define DRM_FORMAT_ARGB16161616F fourcc_code('A', 'R', '4', 'H') /* [63:0] A:R:G:B 16:16:16:16 little endian */
+#define DRM_FORMAT_ABGR16161616F fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little endian */
+
+/*
+ * RGBA format with 10-bit components packed in 64-bit per pixel, with 6 bits
+ * of unused padding per component:
+ */
+#define DRM_FORMAT_AXBXGXRX106106106106 fourcc_code('A', 'B', '1', '0') /* [63:0] A:x:B:x:G:x:R:x 10:6:10:6:10:6:10:6 little endian */
+
+/* packed YCbCr */
+#define DRM_FORMAT_YUYV                fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
+#define DRM_FORMAT_YVYU                fourcc_code('Y', 'V', 'Y', 'U') /* [31:0] Cb0:Y1:Cr0:Y0 8:8:8:8 little endian */
+#define DRM_FORMAT_UYVY                fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */
+#define DRM_FORMAT_VYUY                fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */
+
+#define DRM_FORMAT_AYUV                fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_AVUY8888    fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */
+#define DRM_FORMAT_XYUV8888    fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_XVUY8888    fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */
+#define DRM_FORMAT_VUY888      fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */
+#define DRM_FORMAT_VUY101010   fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */
+
+/*
+ * packed Y2xx indicate for each component, xx valid data occupy msb
+ * 16-xx padding occupy lsb
+ */
+#define DRM_FORMAT_Y210         fourcc_code('Y', '2', '1', '0') /* [63:0] Cr0:0:Y1:0:Cb0:0:Y0:0 10:6:10:6:10:6:10:6 little endian per 2 Y pixels */
+#define DRM_FORMAT_Y212         fourcc_code('Y', '2', '1', '2') /* [63:0] Cr0:0:Y1:0:Cb0:0:Y0:0 12:4:12:4:12:4:12:4 little endian per 2 Y pixels */
+#define DRM_FORMAT_Y216         fourcc_code('Y', '2', '1', '6') /* [63:0] Cr0:Y1:Cb0:Y0 16:16:16:16 little endian per 2 Y pixels */
+
+/*
+ * packed Y4xx indicate for each component, xx valid data occupy msb
+ * 16-xx padding occupy lsb except Y410
+ */
+#define DRM_FORMAT_Y410         fourcc_code('Y', '4', '1', '0') /* [31:0] A:Cr:Y:Cb 2:10:10:10 little endian */
+#define DRM_FORMAT_Y412         fourcc_code('Y', '4', '1', '2') /* [63:0] A:0:Cr:0:Y:0:Cb:0 12:4:12:4:12:4:12:4 little endian */
+#define DRM_FORMAT_Y416         fourcc_code('Y', '4', '1', '6') /* [63:0] A:Cr:Y:Cb 16:16:16:16 little endian */
+
+#define DRM_FORMAT_XVYU2101010 fourcc_code('X', 'V', '3', '0') /* [31:0] X:Cr:Y:Cb 2:10:10:10 little endian */
+#define DRM_FORMAT_XVYU12_16161616     fourcc_code('X', 'V', '3', '6') /* [63:0] X:0:Cr:0:Y:0:Cb:0 12:4:12:4:12:4:12:4 little endian */
+#define DRM_FORMAT_XVYU16161616        fourcc_code('X', 'V', '4', '8') /* [63:0] X:Cr:Y:Cb 16:16:16:16 little endian */
+
+/*
+ * packed YCbCr420 2x2 tiled formats
+ * first 64 bits will contain Y,Cb,Cr components for a 2x2 tile
+ */
+/* [63:0]   A3:A2:Y3:0:Cr0:0:Y2:0:A1:A0:Y1:0:Cb0:0:Y0:0  1:1:8:2:8:2:8:2:1:1:8:2:8:2:8:2 little endian */
+#define DRM_FORMAT_Y0L0                fourcc_code('Y', '0', 'L', '0')
+/* [63:0]   X3:X2:Y3:0:Cr0:0:Y2:0:X1:X0:Y1:0:Cb0:0:Y0:0  1:1:8:2:8:2:8:2:1:1:8:2:8:2:8:2 little endian */
+#define DRM_FORMAT_X0L0                fourcc_code('X', '0', 'L', '0')
+
+/* [63:0]   A3:A2:Y3:Cr0:Y2:A1:A0:Y1:Cb0:Y0  1:1:10:10:10:1:1:10:10:10 little endian */
+#define DRM_FORMAT_Y0L2                fourcc_code('Y', '0', 'L', '2')
+/* [63:0]   X3:X2:Y3:Cr0:Y2:X1:X0:Y1:Cb0:Y0  1:1:10:10:10:1:1:10:10:10 little endian */
+#define DRM_FORMAT_X0L2                fourcc_code('X', '0', 'L', '2')
+
+/*
+ * 1-plane YUV 4:2:0
+ * In these formats, the component ordering is specified (Y, followed by U
+ * then V), but the exact Linear layout is undefined.
+ * These formats can only be used with a non-Linear modifier.
+ */
+#define DRM_FORMAT_YUV420_8BIT fourcc_code('Y', 'U', '0', '8')
+#define DRM_FORMAT_YUV420_10BIT        fourcc_code('Y', 'U', '1', '0')
+
+/*
+ * 2 plane RGB + A
+ * index 0 = RGB plane, same format as the corresponding non _A8 format has
+ * index 1 = A plane, [7:0] A
+ */
+#define DRM_FORMAT_XRGB8888_A8 fourcc_code('X', 'R', 'A', '8')
+#define DRM_FORMAT_XBGR8888_A8 fourcc_code('X', 'B', 'A', '8')
+#define DRM_FORMAT_RGBX8888_A8 fourcc_code('R', 'X', 'A', '8')
+#define DRM_FORMAT_BGRX8888_A8 fourcc_code('B', 'X', 'A', '8')
+#define DRM_FORMAT_RGB888_A8   fourcc_code('R', '8', 'A', '8')
+#define DRM_FORMAT_BGR888_A8   fourcc_code('B', '8', 'A', '8')
+#define DRM_FORMAT_RGB565_A8   fourcc_code('R', '5', 'A', '8')
+#define DRM_FORMAT_BGR565_A8   fourcc_code('B', '5', 'A', '8')
+
+/*
+ * 2 plane YCbCr
+ * index 0 = Y plane, [7:0] Y
+ * index 1 = Cr:Cb plane, [15:0] Cr:Cb little endian
+ * or
+ * index 1 = Cb:Cr plane, [15:0] Cb:Cr little endian
+ */
+#define DRM_FORMAT_NV12                fourcc_code('N', 'V', '1', '2') /* 2x2 subsampled Cr:Cb plane */
+#define DRM_FORMAT_NV21                fourcc_code('N', 'V', '2', '1') /* 2x2 subsampled Cb:Cr plane */
+#define DRM_FORMAT_NV16                fourcc_code('N', 'V', '1', '6') /* 2x1 subsampled Cr:Cb plane */
+#define DRM_FORMAT_NV61                fourcc_code('N', 'V', '6', '1') /* 2x1 subsampled Cb:Cr plane */
+#define DRM_FORMAT_NV24                fourcc_code('N', 'V', '2', '4') /* non-subsampled Cr:Cb plane */
+#define DRM_FORMAT_NV42                fourcc_code('N', 'V', '4', '2') /* non-subsampled Cb:Cr plane */
+/*
+ * 2 plane YCbCr
+ * index 0 = Y plane, [39:0] Y3:Y2:Y1:Y0 little endian
+ * index 1 = Cr:Cb plane, [39:0] Cr1:Cb1:Cr0:Cb0 little endian
+ */
+#define DRM_FORMAT_NV15                fourcc_code('N', 'V', '1', '5') /* 2x2 subsampled Cr:Cb plane */
+
+/*
+ * 2 plane YCbCr MSB aligned
+ * index 0 = Y plane, [15:0] Y:x [10:6] little endian
+ * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [10:6:10:6] little endian
+ */
+#define DRM_FORMAT_P210                fourcc_code('P', '2', '1', '0') /* 2x1 subsampled Cr:Cb plane, 10 bit per channel */
+
+/*
+ * 2 plane YCbCr MSB aligned
+ * index 0 = Y plane, [15:0] Y:x [10:6] little endian
+ * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [10:6:10:6] little endian
+ */
+#define DRM_FORMAT_P010                fourcc_code('P', '0', '1', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel */
+
+/*
+ * 2 plane YCbCr MSB aligned
+ * index 0 = Y plane, [15:0] Y:x [12:4] little endian
+ * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [12:4:12:4] little endian
+ */
+#define DRM_FORMAT_P012                fourcc_code('P', '0', '1', '2') /* 2x2 subsampled Cr:Cb plane 12 bits per channel */
+
+/*
+ * 2 plane YCbCr MSB aligned
+ * index 0 = Y plane, [15:0] Y little endian
+ * index 1 = Cr:Cb plane, [31:0] Cr:Cb [16:16] little endian
+ */
+#define DRM_FORMAT_P016                fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */
+
+/* 2 plane YCbCr420.
+ * 3 10 bit components and 2 padding bits packed into 4 bytes.
+ * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian
+ * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 [2:10:10:10:2:10:10:10] little endian
+ */
+#define DRM_FORMAT_P030                fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
+
+/* 3 plane non-subsampled (444) YCbCr
+ * 16 bits per component, but only 10 bits are used and 6 bits are padded
+ * index 0: Y plane, [15:0] Y:x [10:6] little endian
+ * index 1: Cb plane, [15:0] Cb:x [10:6] little endian
+ * index 2: Cr plane, [15:0] Cr:x [10:6] little endian
+ */
+#define DRM_FORMAT_Q410                fourcc_code('Q', '4', '1', '0')
+
+/* 3 plane non-subsampled (444) YCrCb
+ * 16 bits per component, but only 10 bits are used and 6 bits are padded
+ * index 0: Y plane, [15:0] Y:x [10:6] little endian
+ * index 1: Cr plane, [15:0] Cr:x [10:6] little endian
+ * index 2: Cb plane, [15:0] Cb:x [10:6] little endian
+ */
+#define DRM_FORMAT_Q401                fourcc_code('Q', '4', '0', '1')
+
+/*
+ * 3 plane YCbCr
+ * index 0: Y plane, [7:0] Y
+ * index 1: Cb plane, [7:0] Cb
+ * index 2: Cr plane, [7:0] Cr
+ * or
+ * index 1: Cr plane, [7:0] Cr
+ * index 2: Cb plane, [7:0] Cb
+ */
+#define DRM_FORMAT_YUV410      fourcc_code('Y', 'U', 'V', '9') /* 4x4 subsampled Cb (1) and Cr (2) planes */
+#define DRM_FORMAT_YVU410      fourcc_code('Y', 'V', 'U', '9') /* 4x4 subsampled Cr (1) and Cb (2) planes */
+#define DRM_FORMAT_YUV411      fourcc_code('Y', 'U', '1', '1') /* 4x1 subsampled Cb (1) and Cr (2) planes */
+#define DRM_FORMAT_YVU411      fourcc_code('Y', 'V', '1', '1') /* 4x1 subsampled Cr (1) and Cb (2) planes */
+#define DRM_FORMAT_YUV420      fourcc_code('Y', 'U', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) planes */
+#define DRM_FORMAT_YVU420      fourcc_code('Y', 'V', '1', '2') /* 2x2 subsampled Cr (1) and Cb (2) planes */
+#define DRM_FORMAT_YUV422      fourcc_code('Y', 'U', '1', '6') /* 2x1 subsampled Cb (1) and Cr (2) planes */
+#define DRM_FORMAT_YVU422      fourcc_code('Y', 'V', '1', '6') /* 2x1 subsampled Cr (1) and Cb (2) planes */
+#define DRM_FORMAT_YUV444      fourcc_code('Y', 'U', '2', '4') /* non-subsampled Cb (1) and Cr (2) planes */
+#define DRM_FORMAT_YVU444      fourcc_code('Y', 'V', '2', '4') /* non-subsampled Cr (1) and Cb (2) planes */
+
+
+/*
+ * Format Modifiers:
+ *
+ * Format modifiers describe, typically, a re-ordering or modification
+ * of the data in a plane of an FB.  This can be used to express tiled/
+ * swizzled formats, or compression, or a combination of the two.
+ *
+ * The upper 8 bits of the format modifier are a vendor-id as assigned
+ * below.  The lower 56 bits are assigned as vendor sees fit.
+ */
+
+/* Vendor Ids: */
+#define DRM_FORMAT_MOD_VENDOR_NONE    0
+#define DRM_FORMAT_MOD_VENDOR_INTEL   0x01
+#define DRM_FORMAT_MOD_VENDOR_AMD     0x02
+#define DRM_FORMAT_MOD_VENDOR_NVIDIA  0x03
+#define DRM_FORMAT_MOD_VENDOR_SAMSUNG 0x04
+#define DRM_FORMAT_MOD_VENDOR_QCOM    0x05
+#define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06
+#define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07
+#define DRM_FORMAT_MOD_VENDOR_ARM     0x08
+#define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09
+#define DRM_FORMAT_MOD_VENDOR_AMLOGIC 0x0a
+
+/* add more to the end as needed */
+
+#define DRM_FORMAT_RESERVED          ((1ULL << 56) - 1)
+
+#define fourcc_mod_get_vendor(modifier) \
+       (((modifier) >> 56) & 0xff)
+
+#define fourcc_mod_is_vendor(modifier, vendor) \
+       (fourcc_mod_get_vendor(modifier) == DRM_FORMAT_MOD_VENDOR_## vendor)
+
+#define fourcc_mod_code(vendor, val) \
+       ((((uint64_t)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | ((val) & 0x00ffffffffffffffULL))
+
+/*
+ * Format Modifier tokens:
+ *
+ * When adding a new token please document the layout with a code comment,
+ * similar to the fourcc codes above. drm_fourcc.h is considered the
+ * authoritative source for all of these.
+ *
+ * Generic modifier names:
+ *
+ * DRM_FORMAT_MOD_GENERIC_* definitions are used to provide vendor-neutral names
+ * for layouts which are common across multiple vendors. To preserve
+ * compatibility, in cases where a vendor-specific definition already exists and
+ * a generic name for it is desired, the common name is a purely symbolic alias
+ * and must use the same numerical value as the original definition.
+ *
+ * Note that generic names should only be used for modifiers which describe
+ * generic layouts (such as pixel re-ordering), which may have
+ * independently-developed support across multiple vendors.
+ *
+ * In future cases where a generic layout is identified before merging with a
+ * vendor-specific modifier, a new 'GENERIC' vendor or modifier using vendor
+ * 'NONE' could be considered. This should only be for obvious, exceptional
+ * cases to avoid polluting the 'GENERIC' namespace with modifiers which only
+ * apply to a single vendor.
+ *
+ * Generic names should not be used for cases where multiple hardware vendors
+ * have implementations of the same standardised compression scheme (such as
+ * AFBC). In those cases, all implementations should use the same format
+ * modifier(s), reflecting the vendor of the standard.
+ */
+
+#define DRM_FORMAT_MOD_GENERIC_16_16_TILE DRM_FORMAT_MOD_SAMSUNG_16_16_TILE
+
+/*
+ * Invalid Modifier
+ *
+ * This modifier can be used as a sentinel to terminate the format modifiers
+ * list, or to initialize a variable with an invalid modifier. It might also be
+ * used to report an error back to userspace for certain APIs.
+ */
+#define DRM_FORMAT_MOD_INVALID fourcc_mod_code(NONE, DRM_FORMAT_RESERVED)
+
+/*
+ * Linear Layout
+ *
+ * Just plain linear layout. Note that this is different from no specifying any
+ * modifier (e.g. not setting DRM_MODE_FB_MODIFIERS in the DRM_ADDFB2 ioctl),
+ * which tells the driver to also take driver-internal information into account
+ * and so might actually result in a tiled framebuffer.
+ */
+#define DRM_FORMAT_MOD_LINEAR  fourcc_mod_code(NONE, 0)
+
+/*
+ * Deprecated: use DRM_FORMAT_MOD_LINEAR instead
+ *
+ * The "none" format modifier doesn't actually mean that the modifier is
+ * implicit, instead it means that the layout is linear. Whether modifiers are
+ * used is out-of-band information carried in an API-specific way (e.g. in a
+ * flag for drm_mode_fb_cmd2).
+ */
+#define DRM_FORMAT_MOD_NONE    0
+
+/* Intel framebuffer modifiers */
+
+/*
+ * Intel X-tiling layout
+ *
+ * This is a tiled layout using 4Kb tiles (except on gen2 where the tiles 2Kb)
+ * in row-major layout. Within the tile bytes are laid out row-major, with
+ * a platform-dependent stride. On top of that the memory can apply
+ * platform-depending swizzling of some higher address bits into bit6.
+ *
+ * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets.
+ * On earlier platforms the is highly platforms specific and not useful for
+ * cross-driver sharing. It exists since on a given platform it does uniquely
+ * identify the layout in a simple way for i915-specific userspace, which
+ * facilitated conversion of userspace to modifiers. Additionally the exact
+ * format on some really old platforms is not known.
+ */
+#define I915_FORMAT_MOD_X_TILED        fourcc_mod_code(INTEL, 1)
+
+/*
+ * Intel Y-tiling layout
+ *
+ * This is a tiled layout using 4Kb tiles (except on gen2 where the tiles 2Kb)
+ * in row-major layout. Within the tile bytes are laid out in OWORD (16 bytes)
+ * chunks column-major, with a platform-dependent height. On top of that the
+ * memory can apply platform-depending swizzling of some higher address bits
+ * into bit6.
+ *
+ * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets.
+ * On earlier platforms the is highly platforms specific and not useful for
+ * cross-driver sharing. It exists since on a given platform it does uniquely
+ * identify the layout in a simple way for i915-specific userspace, which
+ * facilitated conversion of userspace to modifiers. Additionally the exact
+ * format on some really old platforms is not known.
+ */
+#define I915_FORMAT_MOD_Y_TILED        fourcc_mod_code(INTEL, 2)
+
+/*
+ * Intel Yf-tiling layout
+ *
+ * This is a tiled layout using 4Kb tiles in row-major layout.
+ * Within the tile pixels are laid out in 16 256 byte units / sub-tiles which
+ * are arranged in four groups (two wide, two high) with column-major layout.
+ * Each group therefore consits out of four 256 byte units, which are also laid
+ * out as 2x2 column-major.
+ * 256 byte units are made out of four 64 byte blocks of pixels, producing
+ * either a square block or a 2:1 unit.
+ * 64 byte blocks of pixels contain four pixel rows of 16 bytes, where the width
+ * in pixel depends on the pixel depth.
+ */
+#define I915_FORMAT_MOD_Yf_TILED fourcc_mod_code(INTEL, 3)
+
+/*
+ * Intel color control surface (CCS) for render compression
+ *
+ * The framebuffer format must be one of the 8:8:8:8 RGB formats.
+ * The main surface will be plane index 0 and must be Y/Yf-tiled,
+ * the CCS will be plane index 1.
+ *
+ * Each CCS tile matches a 1024x512 pixel area of the main surface.
+ * To match certain aspects of the 3D hardware the CCS is
+ * considered to be made up of normal 128Bx32 Y tiles, Thus
+ * the CCS pitch must be specified in multiples of 128 bytes.
+ *
+ * In reality the CCS tile appears to be a 64Bx64 Y tile, composed
+ * of QWORD (8 bytes) chunks instead of OWORD (16 bytes) chunks.
+ * But that fact is not relevant unless the memory is accessed
+ * directly.
+ */
+#define I915_FORMAT_MOD_Y_TILED_CCS    fourcc_mod_code(INTEL, 4)
+#define I915_FORMAT_MOD_Yf_TILED_CCS   fourcc_mod_code(INTEL, 5)
+
+/*
+ * Intel color control surfaces (CCS) for Gen-12 render compression.
+ *
+ * The main surface is Y-tiled and at plane index 0, the CCS is linear and
+ * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in
+ * main surface. In other words, 4 bits in CCS map to a main surface cache
+ * line pair. The main surface pitch is required to be a multiple of four
+ * Y-tile widths.
+ */
+#define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS fourcc_mod_code(INTEL, 6)
+
+/*
+ * Intel color control surfaces (CCS) for Gen-12 media compression
+ *
+ * The main surface is Y-tiled and at plane index 0, the CCS is linear and
+ * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in
+ * main surface. In other words, 4 bits in CCS map to a main surface cache
+ * line pair. The main surface pitch is required to be a multiple of four
+ * Y-tile widths. For semi-planar formats like NV12, CCS planes follow the
+ * Y and UV planes i.e., planes 0 and 1 are used for Y and UV surfaces,
+ * planes 2 and 3 for the respective CCS.
+ */
+#define I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS fourcc_mod_code(INTEL, 7)
+
+/*
+ * Intel Color Control Surface with Clear Color (CCS) for Gen-12 render
+ * compression.
+ *
+ * The main surface is Y-tiled and is at plane index 0 whereas CCS is linear
+ * and at index 1. The clear color is stored at index 2, and the pitch should
+ * be 64 bytes aligned. The clear color structure is 256 bits. The first 128 bits
+ * represents Raw Clear Color Red, Green, Blue and Alpha color each represented
+ * by 32 bits. The raw clear color is consumed by the 3d engine and generates
+ * the converted clear color of size 64 bits. The first 32 bits store the Lower
+ * Converted Clear Color value and the next 32 bits store the Higher Converted
+ * Clear Color value when applicable. The Converted Clear Color values are
+ * consumed by the DE. The last 64 bits are used to store Color Discard Enable
+ * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line
+ * corresponds to an area of 4x1 tiles in the main surface. The main surface
+ * pitch is required to be a multiple of 4 tile widths.
+ */
+#define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8)
+
+/*
+ * Intel Tile 4 layout
+ *
+ * This is a tiled layout using 4KB tiles in a row-major layout. It has the same
+ * shape as Tile Y at two granularities: 4KB (128B x 32) and 64B (16B x 4). It
+ * only differs from Tile Y at the 256B granularity in between. At this
+ * granularity, Tile Y has a shape of 16B x 32 rows, but this tiling has a shape
+ * of 64B x 8 rows.
+ */
+#define I915_FORMAT_MOD_4_TILED         fourcc_mod_code(INTEL, 9)
+
+/*
+ * Intel color control surfaces (CCS) for DG2 render compression.
+ *
+ * The main surface is Tile 4 and at plane index 0. The CCS data is stored
+ * outside of the GEM object in a reserved memory area dedicated for the
+ * storage of the CCS data for all RC/RC_CC/MC compressible GEM objects. The
+ * main surface pitch is required to be a multiple of four Tile 4 widths.
+ */
+#define I915_FORMAT_MOD_4_TILED_DG2_RC_CCS fourcc_mod_code(INTEL, 10)
+
+/*
+ * Intel color control surfaces (CCS) for DG2 media compression.
+ *
+ * The main surface is Tile 4 and at plane index 0. For semi-planar formats
+ * like NV12, the Y and UV planes are Tile 4 and are located at plane indices
+ * 0 and 1, respectively. The CCS for all planes are stored outside of the
+ * GEM object in a reserved memory area dedicated for the storage of the
+ * CCS data for all RC/RC_CC/MC compressible GEM objects. The main surface
+ * pitch is required to be a multiple of four Tile 4 widths.
+ */
+#define I915_FORMAT_MOD_4_TILED_DG2_MC_CCS fourcc_mod_code(INTEL, 11)
+
+/*
+ * Intel Color Control Surface with Clear Color (CCS) for DG2 render compression.
+ *
+ * The main surface is Tile 4 and at plane index 0. The CCS data is stored
+ * outside of the GEM object in a reserved memory area dedicated for the
+ * storage of the CCS data for all RC/RC_CC/MC compressible GEM objects. The
+ * main surface pitch is required to be a multiple of four Tile 4 widths. The
+ * clear color is stored at plane index 1 and the pitch should be 64 bytes
+ * aligned. The format of the 256 bits of clear color data matches the one used
+ * for the I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC modifier, see its description
+ * for details.
+ */
+#define I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC fourcc_mod_code(INTEL, 12)
+
+/*
+ * Intel Color Control Surfaces (CCS) for display ver. 14 render compression.
+ *
+ * The main surface is tile4 and at plane index 0, the CCS is linear and
+ * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in
+ * main surface. In other words, 4 bits in CCS map to a main surface cache
+ * line pair. The main surface pitch is required to be a multiple of four
+ * tile4 widths.
+ */
+#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS fourcc_mod_code(INTEL, 13)
+
+/*
+ * Intel Color Control Surfaces (CCS) for display ver. 14 media compression
+ *
+ * The main surface is tile4 and at plane index 0, the CCS is linear and
+ * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in
+ * main surface. In other words, 4 bits in CCS map to a main surface cache
+ * line pair. The main surface pitch is required to be a multiple of four
+ * tile4 widths. For semi-planar formats like NV12, CCS planes follow the
+ * Y and UV planes i.e., planes 0 and 1 are used for Y and UV surfaces,
+ * planes 2 and 3 for the respective CCS.
+ */
+#define I915_FORMAT_MOD_4_TILED_MTL_MC_CCS fourcc_mod_code(INTEL, 14)
+
+/*
+ * Intel Color Control Surface with Clear Color (CCS) for display ver. 14 render
+ * compression.
+ *
+ * The main surface is tile4 and is at plane index 0 whereas CCS is linear
+ * and at index 1. The clear color is stored at index 2, and the pitch should
+ * be ignored. The clear color structure is 256 bits. The first 128 bits
+ * represents Raw Clear Color Red, Green, Blue and Alpha color each represented
+ * by 32 bits. The raw clear color is consumed by the 3d engine and generates
+ * the converted clear color of size 64 bits. The first 32 bits store the Lower
+ * Converted Clear Color value and the next 32 bits store the Higher Converted
+ * Clear Color value when applicable. The Converted Clear Color values are
+ * consumed by the DE. The last 64 bits are used to store Color Discard Enable
+ * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line
+ * corresponds to an area of 4x1 tiles in the main surface. The main surface
+ * pitch is required to be a multiple of 4 tile widths.
+ */
+#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15)
+
+/*
+ * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
+ *
+ * Macroblocks are laid in a Z-shape, and each pixel data is following the
+ * standard NV12 style.
+ * As for NV12, an image is the result of two frame buffers: one for Y,
+ * one for the interleaved Cb/Cr components (1/2 the height of the Y buffer).
+ * Alignment requirements are (for each buffer):
+ * - multiple of 128 pixels for the width
+ * - multiple of  32 pixels for the height
+ *
+ * For more information: see https://linuxtv.org/downloads/v4l-dvb-apis/re32.html
+ */
+#define DRM_FORMAT_MOD_SAMSUNG_64_32_TILE      fourcc_mod_code(SAMSUNG, 1)
+
+/*
+ * Tiled, 16 (pixels) x 16 (lines) - sized macroblocks
+ *
+ * This is a simple tiled layout using tiles of 16x16 pixels in a row-major
+ * layout. For YCbCr formats Cb/Cr components are taken in such a way that
+ * they correspond to their 16x16 luma block.
+ */
+#define DRM_FORMAT_MOD_SAMSUNG_16_16_TILE      fourcc_mod_code(SAMSUNG, 2)
+
+/*
+ * Qualcomm Compressed Format
+ *
+ * Refers to a compressed variant of the base format that is compressed.
+ * Implementation may be platform and base-format specific.
+ *
+ * Each macrotile consists of m x n (mostly 4 x 4) tiles.
+ * Pixel data pitch/stride is aligned with macrotile width.
+ * Pixel data height is aligned with macrotile height.
+ * Entire pixel data buffer is aligned with 4k(bytes).
+ */
+#define DRM_FORMAT_MOD_QCOM_COMPRESSED fourcc_mod_code(QCOM, 1)
+
+/*
+ * Qualcomm Tiled Format
+ *
+ * Similar to DRM_FORMAT_MOD_QCOM_COMPRESSED but not compressed.
+ * Implementation may be platform and base-format specific.
+ *
+ * Each macrotile consists of m x n (mostly 4 x 4) tiles.
+ * Pixel data pitch/stride is aligned with macrotile width.
+ * Pixel data height is aligned with macrotile height.
+ * Entire pixel data buffer is aligned with 4k(bytes).
+ */
+#define DRM_FORMAT_MOD_QCOM_TILED3     fourcc_mod_code(QCOM, 3)
+
+/*
+ * Qualcomm Alternate Tiled Format
+ *
+ * Alternate tiled format typically only used within GMEM.
+ * Implementation may be platform and base-format specific.
+ */
+#define DRM_FORMAT_MOD_QCOM_TILED2     fourcc_mod_code(QCOM, 2)
+
+
+/* Vivante framebuffer modifiers */
+
+/*
+ * Vivante 4x4 tiling layout
+ *
+ * This is a simple tiled layout using tiles of 4x4 pixels in a row-major
+ * layout.
+ */
+#define DRM_FORMAT_MOD_VIVANTE_TILED           fourcc_mod_code(VIVANTE, 1)
+
+/*
+ * Vivante 64x64 super-tiling layout
+ *
+ * This is a tiled layout using 64x64 pixel super-tiles, where each super-tile
+ * contains 8x4 groups of 2x4 tiles of 4x4 pixels (like above) each, all in row-
+ * major layout.
+ *
+ * For more information: see
+ * https://github.com/etnaviv/etna_viv/blob/master/doc/hardware.md#texture-tiling
+ */
+#define DRM_FORMAT_MOD_VIVANTE_SUPER_TILED     fourcc_mod_code(VIVANTE, 2)
+
+/*
+ * Vivante 4x4 tiling layout for dual-pipe
+ *
+ * Same as the 4x4 tiling layout, except every second 4x4 pixel tile starts at a
+ * different base address. Offsets from the base addresses are therefore halved
+ * compared to the non-split tiled layout.
+ */
+#define DRM_FORMAT_MOD_VIVANTE_SPLIT_TILED     fourcc_mod_code(VIVANTE, 3)
+
+/*
+ * Vivante 64x64 super-tiling layout for dual-pipe
+ *
+ * Same as the 64x64 super-tiling layout, except every second 4x4 pixel tile
+ * starts at a different base address. Offsets from the base addresses are
+ * therefore halved compared to the non-split super-tiled layout.
+ */
+#define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4)
+
+/*
+ * Vivante TS (tile-status) buffer modifiers. They can be combined with all of
+ * the color buffer tiling modifiers defined above. When TS is present it's a
+ * separate buffer containing the clear/compression status of each tile. The
+ * modifiers are defined as VIVANTE_MOD_TS_c_s, where c is the color buffer
+ * tile size in bytes covered by one entry in the status buffer and s is the
+ * number of status bits per entry.
+ * We reserve the top 8 bits of the Vivante modifier space for tile status
+ * clear/compression modifiers, as future cores might add some more TS layout
+ * variations.
+ */
+#define VIVANTE_MOD_TS_64_4               (1ULL << 48)
+#define VIVANTE_MOD_TS_64_2               (2ULL << 48)
+#define VIVANTE_MOD_TS_128_4              (3ULL << 48)
+#define VIVANTE_MOD_TS_256_4              (4ULL << 48)
+#define VIVANTE_MOD_TS_MASK               (0xfULL << 48)
+
+/*
+ * Vivante compression modifiers. Those depend on a TS modifier being present
+ * as the TS bits get reinterpreted as compression tags instead of simple
+ * clear markers when compression is enabled.
+ */
+#define VIVANTE_MOD_COMP_DEC400           (1ULL << 52)
+#define VIVANTE_MOD_COMP_MASK             (0xfULL << 52)
+
+/* Masking out the extension bits will yield the base modifier. */
+#define VIVANTE_MOD_EXT_MASK              (VIVANTE_MOD_TS_MASK | \
+                                           VIVANTE_MOD_COMP_MASK)
+
+/* NVIDIA frame buffer modifiers */
+
+/*
+ * Tegra Tiled Layout, used by Tegra 2, 3 and 4.
+ *
+ * Pixels are arranged in simple tiles of 16 x 16 bytes.
+ */
+#define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1)
+
+/*
+ * Generalized Block Linear layout, used by desktop GPUs starting with NV50/G80,
+ * and Tegra GPUs starting with Tegra K1.
+ *
+ * Pixels are arranged in Groups of Bytes (GOBs).  GOB size and layout varies
+ * based on the architecture generation.  GOBs themselves are then arranged in
+ * 3D blocks, with the block dimensions (in terms of GOBs) always being a power
+ * of two, and hence expressible as their log2 equivalent (E.g., "2" represents
+ * a block depth or height of "4").
+ *
+ * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format
+ * in full detail.
+ *
+ *       Macro
+ * Bits  Param Description
+ * ----  ----- -----------------------------------------------------------------
+ *
+ *  3:0  h     log2(height) of each block, in GOBs.  Placed here for
+ *             compatibility with the existing
+ *             DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers.
+ *
+ *  4:4  -     Must be 1, to indicate block-linear layout.  Necessary for
+ *             compatibility with the existing
+ *             DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers.
+ *
+ *  8:5  -     Reserved (To support 3D-surfaces with variable log2(depth) block
+ *             size).  Must be zero.
+ *
+ *             Note there is no log2(width) parameter.  Some portions of the
+ *             hardware support a block width of two gobs, but it is impractical
+ *             to use due to lack of support elsewhere, and has no known
+ *             benefits.
+ *
+ * 11:9  -     Reserved (To support 2D-array textures with variable array stride
+ *             in blocks, specified via log2(tile width in blocks)).  Must be
+ *             zero.
+ *
+ * 19:12 k     Page Kind.  This value directly maps to a field in the page
+ *             tables of all GPUs >= NV50.  It affects the exact layout of bits
+ *             in memory and can be derived from the tuple
+ *
+ *               (format, GPU model, compression type, samples per pixel)
+ *
+ *             Where compression type is defined below.  If GPU model were
+ *             implied by the format modifier, format, or memory buffer, page
+ *             kind would not need to be included in the modifier itself, but
+ *             since the modifier should define the layout of the associated
+ *             memory buffer independent from any device or other context, it
+ *             must be included here.
+ *
+ * 21:20 g     GOB Height and Page Kind Generation.  The height of a GOB changed
+ *             starting with Fermi GPUs.  Additionally, the mapping between page
+ *             kind and bit layout has changed at various points.
+ *
+ *               0 = Gob Height 8, Fermi - Volta, Tegra K1+ Page Kind mapping
+ *               1 = Gob Height 4, G80 - GT2XX Page Kind mapping
+ *               2 = Gob Height 8, Turing+ Page Kind mapping
+ *               3 = Reserved for future use.
+ *
+ * 22:22 s     Sector layout.  On Tegra GPUs prior to Xavier, there is a further
+ *             bit remapping step that occurs at an even lower level than the
+ *             page kind and block linear swizzles.  This causes the layout of
+ *             surfaces mapped in those SOC's GPUs to be incompatible with the
+ *             equivalent mapping on other GPUs in the same system.
+ *
+ *               0 = Tegra K1 - Tegra Parker/TX2 Layout.
+ *               1 = Desktop GPU and Tegra Xavier+ Layout
+ *
+ * 25:23 c     Lossless Framebuffer Compression type.
+ *
+ *               0 = none
+ *               1 = ROP/3D, layout 1, exact compression format implied by Page
+ *                   Kind field
+ *               2 = ROP/3D, layout 2, exact compression format implied by Page
+ *                   Kind field
+ *               3 = CDE horizontal
+ *               4 = CDE vertical
+ *               5 = Reserved for future use
+ *               6 = Reserved for future use
+ *               7 = Reserved for future use
+ *
+ * 55:25 -     Reserved for future use.  Must be zero.
+ */
+#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \
+       fourcc_mod_code(NVIDIA, (0x10 | \
+                                ((h) & 0xf) | \
+                                (((k) & 0xff) << 12) | \
+                                (((g) & 0x3) << 20) | \
+                                (((s) & 0x1) << 22) | \
+                                (((c) & 0x7) << 23)))
+
+/* To grandfather in prior block linear format modifiers to the above layout,
+ * the page kind "0", which corresponds to "pitch/linear" and hence is unusable
+ * with block-linear layouts, is remapped within drivers to the value 0xfe,
+ * which corresponds to the "generic" kind used for simple single-sample
+ * uncompressed color formats on Fermi - Volta GPUs.
+ */
+static inline uint64_t
+drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier)
+{
+       if (!(modifier & 0x10) || (modifier & (0xff << 12)))
+               return modifier;
+       else
+               return modifier | (0xfe << 12);
+}
+
+/*
+ * 16Bx2 Block Linear layout, used by Tegra K1 and later
+ *
+ * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked
+ * vertically by a power of 2 (1 to 32 GOBs) to form a block.
+ *
+ * Within a GOB, data is ordered as 16B x 2 lines sectors laid in Z-shape.
+ *
+ * Parameter 'v' is the log2 encoding of the number of GOBs stacked vertically.
+ * Valid values are:
+ *
+ * 0 == ONE_GOB
+ * 1 == TWO_GOBS
+ * 2 == FOUR_GOBS
+ * 3 == EIGHT_GOBS
+ * 4 == SIXTEEN_GOBS
+ * 5 == THIRTYTWO_GOBS
+ *
+ * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format
+ * in full detail.
+ */
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \
+       DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 0, 0, 0, (v))
+
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4)
+#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \
+       DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5)
+
+/*
+ * Some Broadcom modifiers take parameters, for example the number of
+ * vertical lines in the image. Reserve the lower 32 bits for modifier
+ * type, and the next 24 bits for parameters. Top 8 bits are the
+ * vendor code.
+ */
+#define __fourcc_mod_broadcom_param_shift 8
+#define __fourcc_mod_broadcom_param_bits 48
+#define fourcc_mod_broadcom_code(val, params) \
+       fourcc_mod_code(BROADCOM, ((((uint64_t)params) << __fourcc_mod_broadcom_param_shift) | val))
+#define fourcc_mod_broadcom_param(m) \
+       ((int)(((m) >> __fourcc_mod_broadcom_param_shift) &     \
+              ((1ULL << __fourcc_mod_broadcom_param_bits) - 1)))
+#define fourcc_mod_broadcom_mod(m) \
+       ((m) & ~(((1ULL << __fourcc_mod_broadcom_param_bits) - 1) <<    \
+                __fourcc_mod_broadcom_param_shift))
+
+/*
+ * Broadcom VC4 "T" format
+ *
+ * This is the primary layout that the V3D GPU can texture from (it
+ * can't do linear).  The T format has:
+ *
+ * - 64b utiles of pixels in a raster-order grid according to cpp.  It's 4x4
+ *   pixels at 32 bit depth.
+ *
+ * - 1k subtiles made of a 4x4 raster-order grid of 64b utiles (so usually
+ *   16x16 pixels).
+ *
+ * - 4k tiles made of a 2x2 grid of 1k subtiles (so usually 32x32 pixels).  On
+ *   even 4k tile rows, they're arranged as (BL, TL, TR, BR), and on odd rows
+ *   they're (TR, BR, BL, TL), where bottom left is start of memory.
+ *
+ * - an image made of 4k tiles in rows either left-to-right (even rows of 4k
+ *   tiles) or right-to-left (odd rows of 4k tiles).
+ */
+#define DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED fourcc_mod_code(BROADCOM, 1)
+
+/*
+ * Broadcom SAND format
+ *
+ * This is the native format that the H.264 codec block uses.  For VC4
+ * HVS, it is only valid for H.264 (NV12/21) and RGBA modes.
+ *
+ * The image can be considered to be split into columns, and the
+ * columns are placed consecutively into memory.  The width of those
+ * columns can be either 32, 64, 128, or 256 pixels, but in practice
+ * only 128 pixel columns are used.
+ *
+ * The pitch between the start of each column is set to optimally
+ * switch between SDRAM banks. This is passed as the number of lines
+ * of column width in the modifier (we can't use the stride value due
+ * to various core checks that look at it , so you should set the
+ * stride to width*cpp).
+ *
+ * Note that the column height for this format modifier is the same
+ * for all of the planes, assuming that each column contains both Y
+ * and UV.  Some SAND-using hardware stores UV in a separate tiled
+ * image from Y to reduce the column height, which is not supported
+ * with these modifiers.
+ *
+ * The DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT modifier is also
+ * supported for DRM_FORMAT_P030 where the columns remain as 128 bytes
+ * wide, but as this is a 10 bpp format that translates to 96 pixels.
+ */
+
+#define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \
+       fourcc_mod_broadcom_code(2, v)
+#define DRM_FORMAT_MOD_BROADCOM_SAND64_COL_HEIGHT(v) \
+       fourcc_mod_broadcom_code(3, v)
+#define DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(v) \
+       fourcc_mod_broadcom_code(4, v)
+#define DRM_FORMAT_MOD_BROADCOM_SAND256_COL_HEIGHT(v) \
+       fourcc_mod_broadcom_code(5, v)
+
+#define DRM_FORMAT_MOD_BROADCOM_SAND32 \
+       DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(0)
+#define DRM_FORMAT_MOD_BROADCOM_SAND64 \
+       DRM_FORMAT_MOD_BROADCOM_SAND64_COL_HEIGHT(0)
+#define DRM_FORMAT_MOD_BROADCOM_SAND128 \
+       DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(0)
+#define DRM_FORMAT_MOD_BROADCOM_SAND256 \
+       DRM_FORMAT_MOD_BROADCOM_SAND256_COL_HEIGHT(0)
+
+/* Broadcom UIF format
+ *
+ * This is the common format for the current Broadcom multimedia
+ * blocks, including V3D 3.x and newer, newer video codecs, and
+ * displays.
+ *
+ * The image consists of utiles (64b blocks), UIF blocks (2x2 utiles),
+ * and macroblocks (4x4 UIF blocks).  Those 4x4 UIF block groups are
+ * stored in columns, with padding between the columns to ensure that
+ * moving from one column to the next doesn't hit the same SDRAM page
+ * bank.
+ *
+ * To calculate the padding, it is assumed that each hardware block
+ * and the software driving it knows the platform's SDRAM page size,
+ * number of banks, and XOR address, and that it's identical between
+ * all blocks using the format.  This tiling modifier will use XOR as
+ * necessary to reduce the padding.  If a hardware block can't do XOR,
+ * the assumption is that a no-XOR tiling modifier will be created.
+ */
+#define DRM_FORMAT_MOD_BROADCOM_UIF fourcc_mod_code(BROADCOM, 6)
+
+/*
+ * Arm Framebuffer Compression (AFBC) modifiers
+ *
+ * AFBC is a proprietary lossless image compression protocol and format.
+ * It provides fine-grained random access and minimizes the amount of data
+ * transferred between IP blocks.
+ *
+ * AFBC has several features which may be supported and/or used, which are
+ * represented using bits in the modifier. Not all combinations are valid,
+ * and different devices or use-cases may support different combinations.
+ *
+ * Further information on the use of AFBC modifiers can be found in
+ * Documentation/gpu/afbc.rst
+ */
+
+/*
+ * The top 4 bits (out of the 56 bits alloted for specifying vendor specific
+ * modifiers) denote the category for modifiers. Currently we have three
+ * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of
+ * sixteen different categories.
+ */
+#define DRM_FORMAT_MOD_ARM_CODE(__type, __val) \
+       fourcc_mod_code(ARM, ((uint64_t)(__type) << 52) | ((__val) & 0x000fffffffffffffULL))
+
+#define DRM_FORMAT_MOD_ARM_TYPE_AFBC 0x00
+#define DRM_FORMAT_MOD_ARM_TYPE_MISC 0x01
+
+#define DRM_FORMAT_MOD_ARM_AFBC(__afbc_mode) \
+       DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFBC, __afbc_mode)
+
+/*
+ * AFBC superblock size
+ *
+ * Indicates the superblock size(s) used for the AFBC buffer. The buffer
+ * size (in pixels) must be aligned to a multiple of the superblock size.
+ * Four lowest significant bits(LSBs) are reserved for block size.
+ *
+ * Where one superblock size is specified, it applies to all planes of the
+ * buffer (e.g. 16x16, 32x8). When multiple superblock sizes are specified,
+ * the first applies to the Luma plane and the second applies to the Chroma
+ * plane(s). e.g. (32x8_64x4 means 32x8 Luma, with 64x4 Chroma).
+ * Multiple superblock sizes are only valid for multi-plane YCbCr formats.
+ */
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_MASK      0xf
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_16x16     (1ULL)
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8      (2ULL)
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_64x4      (3ULL)
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4 (4ULL)
+
+/*
+ * AFBC lossless colorspace transform
+ *
+ * Indicates that the buffer makes use of the AFBC lossless colorspace
+ * transform.
+ */
+#define AFBC_FORMAT_MOD_YTR     (1ULL <<  4)
+
+/*
+ * AFBC block-split
+ *
+ * Indicates that the payload of each superblock is split. The second
+ * half of the payload is positioned at a predefined offset from the start
+ * of the superblock payload.
+ */
+#define AFBC_FORMAT_MOD_SPLIT   (1ULL <<  5)
+
+/*
+ * AFBC sparse layout
+ *
+ * This flag indicates that the payload of each superblock must be stored at a
+ * predefined position relative to the other superblocks in the same AFBC
+ * buffer. This order is the same order used by the header buffer. In this mode
+ * each superblock is given the same amount of space as an uncompressed
+ * superblock of the particular format would require, rounding up to the next
+ * multiple of 128 bytes in size.
+ */
+#define AFBC_FORMAT_MOD_SPARSE  (1ULL <<  6)
+
+/*
+ * AFBC copy-block restrict
+ *
+ * Buffers with this flag must obey the copy-block restriction. The restriction
+ * is such that there are no copy-blocks referring across the border of 8x8
+ * blocks. For the subsampled data the 8x8 limitation is also subsampled.
+ */
+#define AFBC_FORMAT_MOD_CBR     (1ULL <<  7)
+
+/*
+ * AFBC tiled layout
+ *
+ * The tiled layout groups superblocks in 8x8 or 4x4 tiles, where all
+ * superblocks inside a tile are stored together in memory. 8x8 tiles are used
+ * for pixel formats up to and including 32 bpp while 4x4 tiles are used for
+ * larger bpp formats. The order between the tiles is scan line.
+ * When the tiled layout is used, the buffer size (in pixels) must be aligned
+ * to the tile size.
+ */
+#define AFBC_FORMAT_MOD_TILED   (1ULL <<  8)
+
+/*
+ * AFBC solid color blocks
+ *
+ * Indicates that the buffer makes use of solid-color blocks, whereby bandwidth
+ * can be reduced if a whole superblock is a single color.
+ */
+#define AFBC_FORMAT_MOD_SC      (1ULL <<  9)
+
+/*
+ * AFBC double-buffer
+ *
+ * Indicates that the buffer is allocated in a layout safe for front-buffer
+ * rendering.
+ */
+#define AFBC_FORMAT_MOD_DB      (1ULL << 10)
+
+/*
+ * AFBC buffer content hints
+ *
+ * Indicates that the buffer includes per-superblock content hints.
+ */
+#define AFBC_FORMAT_MOD_BCH     (1ULL << 11)
+
+/* AFBC uncompressed storage mode
+ *
+ * Indicates that the buffer is using AFBC uncompressed storage mode.
+ * In this mode all superblock payloads in the buffer use the uncompressed
+ * storage mode, which is usually only used for data which cannot be compressed.
+ * The buffer layout is the same as for AFBC buffers without USM set, this only
+ * affects the storage mode of the individual superblocks. Note that even a
+ * buffer without USM set may use uncompressed storage mode for some or all
+ * superblocks, USM just guarantees it for all.
+ */
+#define AFBC_FORMAT_MOD_USM    (1ULL << 12)
+
+/*
+ * Arm Fixed-Rate Compression (AFRC) modifiers
+ *
+ * AFRC is a proprietary fixed rate image compression protocol and format,
+ * designed to provide guaranteed bandwidth and memory footprint
+ * reductions in graphics and media use-cases.
+ *
+ * AFRC buffers consist of one or more planes, with the same components
+ * and meaning as an uncompressed buffer using the same pixel format.
+ *
+ * Within each plane, the pixel/luma/chroma values are grouped into
+ * "coding unit" blocks which are individually compressed to a
+ * fixed size (in bytes). All coding units within a given plane of a buffer
+ * store the same number of values, and have the same compressed size.
+ *
+ * The coding unit size is configurable, allowing different rates of compression.
+ *
+ * The start of each AFRC buffer plane must be aligned to an alignment granule which
+ * depends on the coding unit size.
+ *
+ * Coding Unit Size   Plane Alignment
+ * ----------------   ---------------
+ * 16 bytes           1024 bytes
+ * 24 bytes           512  bytes
+ * 32 bytes           2048 bytes
+ *
+ * Coding units are grouped into paging tiles. AFRC buffer dimensions must be aligned
+ * to a multiple of the paging tile dimensions.
+ * The dimensions of each paging tile depend on whether the buffer is optimised for
+ * scanline (SCAN layout) or rotated (ROT layout) access.
+ *
+ * Layout   Paging Tile Width   Paging Tile Height
+ * ------   -----------------   ------------------
+ * SCAN     16 coding units     4 coding units
+ * ROT      8  coding units     8 coding units
+ *
+ * The dimensions of each coding unit depend on the number of components
+ * in the compressed plane and whether the buffer is optimised for
+ * scanline (SCAN layout) or rotated (ROT layout) access.
+ *
+ * Number of Components in Plane   Layout      Coding Unit Width   Coding Unit Height
+ * -----------------------------   ---------   -----------------   ------------------
+ * 1                               SCAN        16 samples          4 samples
+ * Example: 16x4 luma samples in a 'Y' plane
+ *          16x4 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer
+ * -----------------------------   ---------   -----------------   ------------------
+ * 1                               ROT         8 samples           8 samples
+ * Example: 8x8 luma samples in a 'Y' plane
+ *          8x8 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer
+ * -----------------------------   ---------   -----------------   ------------------
+ * 2                               DONT CARE   8 samples           4 samples
+ * Example: 8x4 chroma pairs in the 'UV' plane of a semi-planar YUV buffer
+ * -----------------------------   ---------   -----------------   ------------------
+ * 3                               DONT CARE   4 samples           4 samples
+ * Example: 4x4 pixels in an RGB buffer without alpha
+ * -----------------------------   ---------   -----------------   ------------------
+ * 4                               DONT CARE   4 samples           4 samples
+ * Example: 4x4 pixels in an RGB buffer with alpha
+ */
+
+#define DRM_FORMAT_MOD_ARM_TYPE_AFRC 0x02
+
+#define DRM_FORMAT_MOD_ARM_AFRC(__afrc_mode) \
+       DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFRC, __afrc_mode)
+
+/*
+ * AFRC coding unit size modifier.
+ *
+ * Indicates the number of bytes used to store each compressed coding unit for
+ * one or more planes in an AFRC encoded buffer. The coding unit size for chrominance
+ * is the same for both Cb and Cr, which may be stored in separate planes.
+ *
+ * AFRC_FORMAT_MOD_CU_SIZE_P0 indicates the number of bytes used to store
+ * each compressed coding unit in the first plane of the buffer. For RGBA buffers
+ * this is the only plane, while for semi-planar and fully-planar YUV buffers,
+ * this corresponds to the luma plane.
+ *
+ * AFRC_FORMAT_MOD_CU_SIZE_P12 indicates the number of bytes used to store
+ * each compressed coding unit in the second and third planes in the buffer.
+ * For semi-planar and fully-planar YUV buffers, this corresponds to the chroma plane(s).
+ *
+ * For single-plane buffers, AFRC_FORMAT_MOD_CU_SIZE_P0 must be specified
+ * and AFRC_FORMAT_MOD_CU_SIZE_P12 must be zero.
+ * For semi-planar and fully-planar buffers, both AFRC_FORMAT_MOD_CU_SIZE_P0 and
+ * AFRC_FORMAT_MOD_CU_SIZE_P12 must be specified.
+ */
+#define AFRC_FORMAT_MOD_CU_SIZE_MASK 0xf
+#define AFRC_FORMAT_MOD_CU_SIZE_16 (1ULL)
+#define AFRC_FORMAT_MOD_CU_SIZE_24 (2ULL)
+#define AFRC_FORMAT_MOD_CU_SIZE_32 (3ULL)
+
+#define AFRC_FORMAT_MOD_CU_SIZE_P0(__afrc_cu_size) (__afrc_cu_size)
+#define AFRC_FORMAT_MOD_CU_SIZE_P12(__afrc_cu_size) ((__afrc_cu_size) << 4)
+
+/*
+ * AFRC scanline memory layout.
+ *
+ * Indicates if the buffer uses the scanline-optimised layout
+ * for an AFRC encoded buffer, otherwise, it uses the rotation-optimised layout.
+ * The memory layout is the same for all planes.
+ */
+#define AFRC_FORMAT_MOD_LAYOUT_SCAN (1ULL << 8)
+
+/*
+ * Arm 16x16 Block U-Interleaved modifier
+ *
+ * This is used by Arm Mali Utgard and Midgard GPUs. It divides the image
+ * into 16x16 pixel blocks. Blocks are stored linearly in order, but pixels
+ * in the block are reordered.
+ */
+#define DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED \
+       DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_MISC, 1ULL)
+
+/*
+ * Allwinner tiled modifier
+ *
+ * This tiling mode is implemented by the VPU found on all Allwinner platforms,
+ * codenamed sunxi. It is associated with a YUV format that uses either 2 or 3
+ * planes.
+ *
+ * With this tiling, the luminance samples are disposed in tiles representing
+ * 32x32 pixels and the chrominance samples in tiles representing 32x64 pixels.
+ * The pixel order in each tile is linear and the tiles are disposed linearly,
+ * both in row-major order.
+ */
+#define DRM_FORMAT_MOD_ALLWINNER_TILED fourcc_mod_code(ALLWINNER, 1)
+
+/*
+ * Amlogic Video Framebuffer Compression modifiers
+ *
+ * Amlogic uses a proprietary lossless image compression protocol and format
+ * for their hardware video codec accelerators, either video decoders or
+ * video input encoders.
+ *
+ * It considerably reduces memory bandwidth while writing and reading
+ * frames in memory.
+ *
+ * The underlying storage is considered to be 3 components, 8bit or 10-bit
+ * per component YCbCr 420, single plane :
+ * - DRM_FORMAT_YUV420_8BIT
+ * - DRM_FORMAT_YUV420_10BIT
+ *
+ * The first 8 bits of the mode defines the layout, then the following 8 bits
+ * defines the options changing the layout.
+ *
+ * Not all combinations are valid, and different SoCs may support different
+ * combinations of layout and options.
+ */
+#define __fourcc_mod_amlogic_layout_mask 0xff
+#define __fourcc_mod_amlogic_options_shift 8
+#define __fourcc_mod_amlogic_options_mask 0xff
+
+#define DRM_FORMAT_MOD_AMLOGIC_FBC(__layout, __options) \
+       fourcc_mod_code(AMLOGIC, \
+                       ((__layout) & __fourcc_mod_amlogic_layout_mask) | \
+                       (((__options) & __fourcc_mod_amlogic_options_mask) \
+                        << __fourcc_mod_amlogic_options_shift))
+
+/* Amlogic FBC Layouts */
+
+/*
+ * Amlogic FBC Basic Layout
+ *
+ * The basic layout is composed of:
+ * - a body content organized in 64x32 superblocks with 4096 bytes per
+ *   superblock in default mode.
+ * - a 32 bytes per 128x64 header block
+ *
+ * This layout is transferrable between Amlogic SoCs supporting this modifier.
+ */
+#define AMLOGIC_FBC_LAYOUT_BASIC               (1ULL)
+
+/*
+ * Amlogic FBC Scatter Memory layout
+ *
+ * Indicates the header contains IOMMU references to the compressed
+ * frames content to optimize memory access and layout.
+ *
+ * In this mode, only the header memory address is needed, thus the
+ * content memory organization is tied to the current producer
+ * execution and cannot be saved/dumped neither transferrable between
+ * Amlogic SoCs supporting this modifier.
+ *
+ * Due to the nature of the layout, these buffers are not expected to
+ * be accessible by the user-space clients, but only accessible by the
+ * hardware producers and consumers.
+ *
+ * The user-space clients should expect a failure while trying to mmap
+ * the DMA-BUF handle returned by the producer.
+ */
+#define AMLOGIC_FBC_LAYOUT_SCATTER             (2ULL)
+
+/* Amlogic FBC Layout Options Bit Mask */
+
+/*
+ * Amlogic FBC Memory Saving mode
+ *
+ * Indicates the storage is packed when pixel size is multiple of word
+ * boudaries, i.e. 8bit should be stored in this mode to save allocation
+ * memory.
+ *
+ * This mode reduces body layout to 3072 bytes per 64x32 superblock with
+ * the basic layout and 3200 bytes per 64x32 superblock combined with
+ * the scatter layout.
+ */
+#define AMLOGIC_FBC_OPTION_MEM_SAVING          (1ULL << 0)
+
+/*
+ * AMD modifiers
+ *
+ * Memory layout:
+ *
+ * without DCC:
+ *   - main surface
+ *
+ * with DCC & without DCC_RETILE:
+ *   - main surface in plane 0
+ *   - DCC surface in plane 1 (RB-aligned, pipe-aligned if DCC_PIPE_ALIGN is set)
+ *
+ * with DCC & DCC_RETILE:
+ *   - main surface in plane 0
+ *   - displayable DCC surface in plane 1 (not RB-aligned & not pipe-aligned)
+ *   - pipe-aligned DCC surface in plane 2 (RB-aligned & pipe-aligned)
+ *
+ * For multi-plane formats the above surfaces get merged into one plane for
+ * each format plane, based on the required alignment only.
+ *
+ * Bits  Parameter                Notes
+ * ----- ------------------------ ---------------------------------------------
+ *
+ *   7:0 TILE_VERSION             Values are AMD_FMT_MOD_TILE_VER_*
+ *  12:8 TILE                     Values are AMD_FMT_MOD_TILE_<version>_*
+ *    13 DCC
+ *    14 DCC_RETILE
+ *    15 DCC_PIPE_ALIGN
+ *    16 DCC_INDEPENDENT_64B
+ *    17 DCC_INDEPENDENT_128B
+ * 19:18 DCC_MAX_COMPRESSED_BLOCK Values are AMD_FMT_MOD_DCC_BLOCK_*
+ *    20 DCC_CONSTANT_ENCODE
+ * 23:21 PIPE_XOR_BITS            Only for some chips
+ * 26:24 BANK_XOR_BITS            Only for some chips
+ * 29:27 PACKERS                  Only for some chips
+ * 32:30 RB                       Only for some chips
+ * 35:33 PIPE                     Only for some chips
+ * 55:36 -                        Reserved for future use, must be zero
+ */
+#define AMD_FMT_MOD fourcc_mod_code(AMD, 0)
+
+#define IS_AMD_FMT_MOD(val) (((val) >> 56) == DRM_FORMAT_MOD_VENDOR_AMD)
+
+/* Reserve 0 for GFX8 and older */
+#define AMD_FMT_MOD_TILE_VER_GFX9 1
+#define AMD_FMT_MOD_TILE_VER_GFX10 2
+#define AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS 3
+#define AMD_FMT_MOD_TILE_VER_GFX11 4
+
+/*
+ * 64K_S is the same for GFX9/GFX10/GFX10_RBPLUS and hence has GFX9 as canonical
+ * version.
+ */
+#define AMD_FMT_MOD_TILE_GFX9_64K_S 9
+
+/*
+ * 64K_D for non-32 bpp is the same for GFX9/GFX10/GFX10_RBPLUS and hence has
+ * GFX9 as canonical version.
+ */
+#define AMD_FMT_MOD_TILE_GFX9_64K_D 10
+#define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25
+#define AMD_FMT_MOD_TILE_GFX9_64K_D_X 26
+#define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27
+#define AMD_FMT_MOD_TILE_GFX11_256K_R_X 31
+
+#define AMD_FMT_MOD_DCC_BLOCK_64B 0
+#define AMD_FMT_MOD_DCC_BLOCK_128B 1
+#define AMD_FMT_MOD_DCC_BLOCK_256B 2
+
+#define AMD_FMT_MOD_TILE_VERSION_SHIFT 0
+#define AMD_FMT_MOD_TILE_VERSION_MASK 0xFF
+#define AMD_FMT_MOD_TILE_SHIFT 8
+#define AMD_FMT_MOD_TILE_MASK 0x1F
+
+/* Whether DCC compression is enabled. */
+#define AMD_FMT_MOD_DCC_SHIFT 13
+#define AMD_FMT_MOD_DCC_MASK 0x1
+
+/*
+ * Whether to include two DCC surfaces, one which is rb & pipe aligned, and
+ * one which is not-aligned.
+ */
+#define AMD_FMT_MOD_DCC_RETILE_SHIFT 14
+#define AMD_FMT_MOD_DCC_RETILE_MASK 0x1
+
+/* Only set if DCC_RETILE = false */
+#define AMD_FMT_MOD_DCC_PIPE_ALIGN_SHIFT 15
+#define AMD_FMT_MOD_DCC_PIPE_ALIGN_MASK 0x1
+
+#define AMD_FMT_MOD_DCC_INDEPENDENT_64B_SHIFT 16
+#define AMD_FMT_MOD_DCC_INDEPENDENT_64B_MASK 0x1
+#define AMD_FMT_MOD_DCC_INDEPENDENT_128B_SHIFT 17
+#define AMD_FMT_MOD_DCC_INDEPENDENT_128B_MASK 0x1
+#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_SHIFT 18
+#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3
+
+/*
+ * DCC supports embedding some clear colors directly in the DCC surface.
+ * However, on older GPUs the rendering HW ignores the embedded clear color
+ * and prefers the driver provided color. This necessitates doing a fastclear
+ * eliminate operation before a process transfers control.
+ *
+ * If this bit is set that means the fastclear eliminate is not needed for these
+ * embeddable colors.
+ */
+#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_SHIFT 20
+#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_MASK 0x1
+
+/*
+ * The below fields are for accounting for per GPU differences. These are only
+ * relevant for GFX9 and later and if the tile field is *_X/_T.
+ *
+ * PIPE_XOR_BITS = always needed
+ * BANK_XOR_BITS = only for TILE_VER_GFX9
+ * PACKERS = only for TILE_VER_GFX10_RBPLUS
+ * RB = only for TILE_VER_GFX9 & DCC
+ * PIPE = only for TILE_VER_GFX9 & DCC & (DCC_RETILE | DCC_PIPE_ALIGN)
+ */
+#define AMD_FMT_MOD_PIPE_XOR_BITS_SHIFT 21
+#define AMD_FMT_MOD_PIPE_XOR_BITS_MASK 0x7
+#define AMD_FMT_MOD_BANK_XOR_BITS_SHIFT 24
+#define AMD_FMT_MOD_BANK_XOR_BITS_MASK 0x7
+#define AMD_FMT_MOD_PACKERS_SHIFT 27
+#define AMD_FMT_MOD_PACKERS_MASK 0x7
+#define AMD_FMT_MOD_RB_SHIFT 30
+#define AMD_FMT_MOD_RB_MASK 0x7
+#define AMD_FMT_MOD_PIPE_SHIFT 33
+#define AMD_FMT_MOD_PIPE_MASK 0x7
+
+#define AMD_FMT_MOD_SET(field, value) \
+       ((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT)
+#define AMD_FMT_MOD_GET(field, value) \
+       (((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK)
+#define AMD_FMT_MOD_CLEAR(field) \
+       (~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT))
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* DRM_FOURCC_H */
index 5e489872516827f195db45746660b9ed7d1f4870..1eb84b5087f8a1d1c1c8f44b5fcc246e04a5a76e 100644 (file)
@@ -28,7 +28,7 @@
 #define _BITUL(x)      (_UL(1) << (x))
 #define _BITULL(x)     (_ULL(1) << (x))
 
-#define __ALIGN_KERNEL(x, a)           __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL(x, a)           __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1)
 #define __ALIGN_KERNEL_MASK(x, mask)   (((x) + (mask)) & ~(mask))
 
 #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
index 8bfd01d230dac8ce236b84b59790f057619303e3..99fcddf04f88f8b6dbd66a57fa58195e394c28bf 100644 (file)
  * have the same layout for 32-bit and 64-bit userland.
  */
 
+/* Note on reserved space.
+ * Reserved fields must not be accessed directly by user space because
+ * they may be replaced by a different field in the future. They must
+ * be initialized to zero before making the request, e.g. via memset
+ * of the entire structure or implicitly by not being set in a structure
+ * initializer.
+ */
+
 /**
  * struct ethtool_cmd - DEPRECATED, link control and status
  * This structure is DEPRECATED, please use struct ethtool_link_settings.
@@ -67,6 +75,7 @@
  *     and other link features that the link partner advertised
  *     through autonegotiation; 0 if unknown or not applicable.
  *     Read-only.
+ * @reserved: Reserved for future use; see the note on reserved space.
  *
  * The link speed in Mbps is split between @speed and @speed_hi.  Use
  * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to
@@ -150,11 +159,14 @@ static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep)
  *     in its bus driver structure (e.g. pci_driver::name).  Must
  *     not be an empty string.
  * @version: Driver version string; may be an empty string
- * @fw_version: Firmware version string; may be an empty string
- * @erom_version: Expansion ROM version string; may be an empty string
+ * @fw_version: Firmware version string; driver defined; may be an
+ *     empty string
+ * @erom_version: Expansion ROM version string; driver defined; may be
+ *     an empty string
  * @bus_info: Device bus address.  This should match the dev_name()
  *     string for the underlying bus device, if there is one.  May be
  *     an empty string.
+ * @reserved2: Reserved for future use; see the note on reserved space.
  * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
  *     %ETHTOOL_SPFLAGS commands; also the number of strings in the
  *     %ETH_SS_PRIV_FLAGS set
@@ -169,10 +181,6 @@ static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep)
  *
  * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
  * strings in any string set (from Linux 2.6.34).
- *
- * Drivers should set at most @driver, @version, @fw_version and
- * @bus_info in their get_drvinfo() implementation.  The ethtool
- * core fills in the other fields using other driver operations.
  */
 struct ethtool_drvinfo {
        uint32_t        cmd;
@@ -221,9 +229,10 @@ enum tunable_id {
        ETHTOOL_RX_COPYBREAK,
        ETHTOOL_TX_COPYBREAK,
        ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
+       ETHTOOL_TX_COPYBREAK_BUF_SIZE,
        /*
         * Add your fresh new tunable attribute above and remember to update
-        * tunable_strings[] in net/core/ethtool.c
+        * tunable_strings[] in net/ethtool/common.c
         */
        __ETHTOOL_TUNABLE_COUNT,
 };
@@ -246,7 +255,7 @@ struct ethtool_tunable {
        uint32_t        id;
        uint32_t        type_id;
        uint32_t        len;
-       void    *data[0];
+       void    *data[];
 };
 
 #define DOWNSHIFT_DEV_DEFAULT_COUNT    0xff
@@ -287,7 +296,7 @@ enum phy_tunable_id {
        ETHTOOL_PHY_EDPD,
        /*
         * Add your fresh new phy tunable attribute above and remember to update
-        * phy_tunable_strings[] in net/core/ethtool.c
+        * phy_tunable_strings[] in net/ethtool/common.c
         */
        __ETHTOOL_PHY_TUNABLE_COUNT,
 };
@@ -311,7 +320,7 @@ struct ethtool_regs {
        uint32_t        cmd;
        uint32_t        version;
        uint32_t        len;
-       uint8_t data[0];
+       uint8_t data[];
 };
 
 /**
@@ -337,7 +346,7 @@ struct ethtool_eeprom {
        uint32_t        magic;
        uint32_t        offset;
        uint32_t        len;
-       uint8_t data[0];
+       uint8_t data[];
 };
 
 /**
@@ -356,6 +365,7 @@ struct ethtool_eeprom {
  * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting
  *     its tx lpi (after reaching 'idle' state). Effective only when eee
  *     was negotiated and tx_lpi_enabled was set.
+ * @reserved: Reserved for future use; see the note on reserved space.
  */
 struct ethtool_eee {
        uint32_t        cmd;
@@ -374,6 +384,7 @@ struct ethtool_eee {
  * @cmd: %ETHTOOL_GMODULEINFO
  * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx
  * @eeprom_len: Length of the eeprom
+ * @reserved: Reserved for future use; see the note on reserved space.
  *
  * This structure is used to return the information to
  * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM.
@@ -579,9 +590,7 @@ struct ethtool_pauseparam {
        uint32_t        tx_pause;
 };
 
-/**
- * enum ethtool_link_ext_state - link extended state
- */
+/* Link extended state */
 enum ethtool_link_ext_state {
        ETHTOOL_LINK_EXT_STATE_AUTONEG,
        ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
@@ -593,12 +602,10 @@ enum ethtool_link_ext_state {
        ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE,
        ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED,
        ETHTOOL_LINK_EXT_STATE_OVERHEAT,
+       ETHTOOL_LINK_EXT_STATE_MODULE,
 };
 
-/**
- * enum ethtool_link_ext_substate_autoneg - more information in addition to
- * ETHTOOL_LINK_EXT_STATE_AUTONEG.
- */
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */
 enum ethtool_link_ext_substate_autoneg {
        ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1,
        ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED,
@@ -608,9 +615,7 @@ enum ethtool_link_ext_substate_autoneg {
        ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD,
 };
 
-/**
- * enum ethtool_link_ext_substate_link_training - more information in addition to
- * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE.
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE.
  */
 enum ethtool_link_ext_substate_link_training {
        ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1,
@@ -619,9 +624,7 @@ enum ethtool_link_ext_substate_link_training {
        ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT,
 };
 
-/**
- * enum ethtool_link_ext_substate_logical_mismatch - more information in addition
- * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH.
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH.
  */
 enum ethtool_link_ext_substate_link_logical_mismatch {
        ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1,
@@ -631,24 +634,26 @@ enum ethtool_link_ext_substate_link_logical_mismatch {
        ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED,
 };
 
-/**
- * enum ethtool_link_ext_substate_bad_signal_integrity - more information in
- * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY.
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY.
  */
 enum ethtool_link_ext_substate_bad_signal_integrity {
        ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1,
        ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE,
+       ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST,
+       ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS,
 };
 
-/**
- * enum ethtool_link_ext_substate_cable_issue - more information in
- * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE.
- */
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */
 enum ethtool_link_ext_substate_cable_issue {
        ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1,
        ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE,
 };
 
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_MODULE. */
+enum ethtool_link_ext_substate_module {
+       ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY = 1,
+};
+
 #define ETH_GSTRING_LEN                32
 
 /**
@@ -661,6 +666,7 @@ enum ethtool_link_ext_substate_cable_issue {
  *     now deprecated
  * @ETH_SS_FEATURES: Device feature names
  * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
+ * @ETH_SS_TUNABLES: tunable names
  * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS
  * @ETH_SS_PHY_TUNABLES: PHY tunable names
  * @ETH_SS_LINK_MODES: link mode names
@@ -670,6 +676,13 @@ enum ethtool_link_ext_substate_cable_issue {
  * @ETH_SS_TS_TX_TYPES: timestamping Tx types
  * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters
  * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types
+ * @ETH_SS_STATS_STD: standardized stats
+ * @ETH_SS_STATS_ETH_PHY: names of IEEE 802.3 PHY statistics
+ * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics
+ * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics
+ * @ETH_SS_STATS_RMON: names of RMON statistics
+ *
+ * @ETH_SS_COUNT: number of defined string sets
  */
 enum ethtool_stringset {
        ETH_SS_TEST             = 0,
@@ -688,11 +701,127 @@ enum ethtool_stringset {
        ETH_SS_TS_TX_TYPES,
        ETH_SS_TS_RX_FILTERS,
        ETH_SS_UDP_TUNNEL_TYPES,
+       ETH_SS_STATS_STD,
+       ETH_SS_STATS_ETH_PHY,
+       ETH_SS_STATS_ETH_MAC,
+       ETH_SS_STATS_ETH_CTRL,
+       ETH_SS_STATS_RMON,
 
        /* add new constants above here */
        ETH_SS_COUNT
 };
 
+/**
+ * enum ethtool_mac_stats_src - source of ethtool MAC statistics
+ * @ETHTOOL_MAC_STATS_SRC_AGGREGATE:
+ *     if device supports a MAC merge layer, this retrieves the aggregate
+ *     statistics of the eMAC and pMAC. Otherwise, it retrieves just the
+ *     statistics of the single (express) MAC.
+ * @ETHTOOL_MAC_STATS_SRC_EMAC:
+ *     if device supports a MM layer, this retrieves the eMAC statistics.
+ *     Otherwise, it retrieves the statistics of the single (express) MAC.
+ * @ETHTOOL_MAC_STATS_SRC_PMAC:
+ *     if device supports a MM layer, this retrieves the pMAC statistics.
+ */
+enum ethtool_mac_stats_src {
+       ETHTOOL_MAC_STATS_SRC_AGGREGATE,
+       ETHTOOL_MAC_STATS_SRC_EMAC,
+       ETHTOOL_MAC_STATS_SRC_PMAC,
+};
+
+/**
+ * enum ethtool_module_power_mode_policy - plug-in module power mode policy
+ * @ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH: Module is always in high power mode.
+ * @ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO: Module is transitioned by the host
+ *     to high power mode when the first port using it is put administratively
+ *     up and to low power mode when the last port using it is put
+ *     administratively down.
+ */
+enum ethtool_module_power_mode_policy {
+       ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH = 1,
+       ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO,
+};
+
+/**
+ * enum ethtool_module_power_mode - plug-in module power mode
+ * @ETHTOOL_MODULE_POWER_MODE_LOW: Module is in low power mode.
+ * @ETHTOOL_MODULE_POWER_MODE_HIGH: Module is in high power mode.
+ */
+enum ethtool_module_power_mode {
+       ETHTOOL_MODULE_POWER_MODE_LOW = 1,
+       ETHTOOL_MODULE_POWER_MODE_HIGH,
+};
+
+/**
+ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE
+ *     functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are
+ *     unknown
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled
+ */
+enum ethtool_podl_pse_admin_state {
+       ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1,
+       ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
+       ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED,
+};
+
+/**
+ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE.
+ *     IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus:
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is
+ *     asserted true when the PoDL PSE state diagram variable mr_pse_enable is
+ *     false"
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is
+ *     asserted true when either of the PSE state diagram variables
+ *     pi_detecting or pi_classifying is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower”
+ *     is asserted true when the PoDL PSE state diagram variable pi_powered is
+ *     true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted
+ *     true when the PoDL PSE state diagram variable pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true
+ *     when the logical combination of the PoDL PSE state diagram variables
+ *     pi_prebiased*!pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted
+ *     true when the PoDL PSE state diagram variable overload_held is true."
+ */
+enum ethtool_podl_pse_pw_d_status {
+       ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE,
+       ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR,
+};
+
+/**
+ * enum ethtool_mm_verify_status - status of MAC Merge Verify function
+ * @ETHTOOL_MM_VERIFY_STATUS_UNKNOWN:
+ *     verification status is unknown
+ * @ETHTOOL_MM_VERIFY_STATUS_INITIAL:
+ *     the 802.3 Verify State diagram is in the state INIT_VERIFICATION
+ * @ETHTOOL_MM_VERIFY_STATUS_VERIFYING:
+ *     the Verify State diagram is in the state VERIFICATION_IDLE,
+ *     SEND_VERIFY or WAIT_FOR_RESPONSE
+ * @ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED:
+ *     indicates that the Verify State diagram is in the state VERIFIED
+ * @ETHTOOL_MM_VERIFY_STATUS_FAILED:
+ *     the Verify State diagram is in the state VERIFY_FAIL
+ * @ETHTOOL_MM_VERIFY_STATUS_DISABLED:
+ *     verification of preemption operation is disabled
+ */
+enum ethtool_mm_verify_status {
+       ETHTOOL_MM_VERIFY_STATUS_UNKNOWN,
+       ETHTOOL_MM_VERIFY_STATUS_INITIAL,
+       ETHTOOL_MM_VERIFY_STATUS_VERIFYING,
+       ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED,
+       ETHTOOL_MM_VERIFY_STATUS_FAILED,
+       ETHTOOL_MM_VERIFY_STATUS_DISABLED,
+};
+
 /**
  * struct ethtool_gstrings - string set for data tagging
  * @cmd: Command number = %ETHTOOL_GSTRINGS
@@ -709,12 +838,13 @@ struct ethtool_gstrings {
        uint32_t        cmd;
        uint32_t        string_set;
        uint32_t        len;
-       uint8_t data[0];
+       uint8_t data[];
 };
 
 /**
  * struct ethtool_sset_info - string set information
  * @cmd: Command number = %ETHTOOL_GSSET_INFO
+ * @reserved: Reserved for future use; see the note on reserved space.
  * @sset_mask: On entry, a bitmask of string sets to query, with bits
  *     numbered according to &enum ethtool_stringset.  On return, a
  *     bitmask of those string sets queried that are supported.
@@ -733,7 +863,7 @@ struct ethtool_sset_info {
        uint32_t        cmd;
        uint32_t        reserved;
        uint64_t        sset_mask;
-       uint32_t        data[0];
+       uint32_t        data[];
 };
 
 /**
@@ -759,6 +889,7 @@ enum ethtool_test_flags {
  * @flags: A bitmask of flags from &enum ethtool_test_flags.  Some
  *     flags may be set by the user on entry; others may be set by
  *     the driver on return.
+ * @reserved: Reserved for future use; see the note on reserved space.
  * @len: On return, the number of test results
  * @data: Array of test results
  *
@@ -772,7 +903,7 @@ struct ethtool_test {
        uint32_t        flags;
        uint32_t        reserved;
        uint32_t        len;
-       uint64_t        data[0];
+       uint64_t        data[];
 };
 
 /**
@@ -789,7 +920,7 @@ struct ethtool_test {
 struct ethtool_stats {
        uint32_t        cmd;
        uint32_t        n_stats;
-       uint64_t        data[0];
+       uint64_t        data[];
 };
 
 /**
@@ -806,7 +937,7 @@ struct ethtool_stats {
 struct ethtool_perm_addr {
        uint32_t        cmd;
        uint32_t        size;
-       uint8_t data[0];
+       uint8_t data[];
 };
 
 /* boolean flags controlling per-interface behavior characteristics.
@@ -959,6 +1090,7 @@ union ethtool_flow_union {
  * @vlan_etype: VLAN EtherType
  * @vlan_tci: VLAN tag control information
  * @data: user defined data
+ * @padding: Reserved for future use; see the note on reserved space.
  *
  * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT
  * is set in &struct ethtool_rx_flow_spec @flow_type.
@@ -1094,7 +1226,7 @@ struct ethtool_rxnfc {
                uint32_t                        rule_cnt;
                uint32_t                        rss_context;
        };
-       uint32_t                                rule_locs[0];
+       uint32_t                                rule_locs[];
 };
 
 
@@ -1114,7 +1246,7 @@ struct ethtool_rxnfc {
 struct ethtool_rxfh_indir {
        uint32_t        cmd;
        uint32_t        size;
-       uint32_t        ring_index[0];
+       uint32_t        ring_index[];
 };
 
 /**
@@ -1134,7 +1266,8 @@ struct ethtool_rxfh_indir {
  *     hardware hash key.
  * @hfunc: Defines the current RSS hash function used by HW (or to be set to).
  *     Valid values are one of the %ETH_RSS_HASH_*.
- * @rsvd:      Reserved for future extensions.
+ * @rsvd8: Reserved for future use; see the note on reserved space.
+ * @rsvd32: Reserved for future use; see the note on reserved space.
  * @rss_config: RX ring/queue index for each hash value i.e., indirection table
  *     of @indir_size uint32_t elements, followed by hash key of @key_size
  *     bytes.
@@ -1154,7 +1287,7 @@ struct ethtool_rxfh {
        uint8_t hfunc;
        uint8_t rsvd8[3];
        uint32_t        rsvd32;
-       uint32_t   rss_config[0];
+       uint32_t   rss_config[];
 };
 #define ETH_RXFH_CONTEXT_ALLOC         0xffffffff
 #define ETH_RXFH_INDIR_NO_CHANGE       0xffffffff
@@ -1239,7 +1372,7 @@ struct ethtool_dump {
        uint32_t        version;
        uint32_t        flag;
        uint32_t        len;
-       uint8_t data[0];
+       uint8_t data[];
 };
 
 #define ETH_FW_DUMP_DISABLE 0
@@ -1271,7 +1404,7 @@ struct ethtool_get_features_block {
 struct ethtool_gfeatures {
        uint32_t        cmd;
        uint32_t        size;
-       struct ethtool_get_features_block features[0];
+       struct ethtool_get_features_block features[];
 };
 
 /**
@@ -1293,7 +1426,7 @@ struct ethtool_set_features_block {
 struct ethtool_sfeatures {
        uint32_t        cmd;
        uint32_t        size;
-       struct ethtool_set_features_block features[0];
+       struct ethtool_set_features_block features[];
 };
 
 /**
@@ -1302,7 +1435,9 @@ struct ethtool_sfeatures {
  * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags
  * @phc_index: device index of the associated PHC, or -1 if there is none
  * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values
+ * @tx_reserved: Reserved for future use; see the note on reserved space.
  * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values
+ * @rx_reserved: Reserved for future use; see the note on reserved space.
  *
  * The bits in the 'tx_types' and 'rx_filters' fields correspond to
  * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values,
@@ -1376,15 +1511,33 @@ struct ethtool_per_queue_op {
 };
 
 /**
- * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
+ * struct ethtool_fecparam - Ethernet Forward Error Correction parameters
  * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
- * @active_fec: FEC mode which is active on porte
- * @fec: Bitmask of supported/configured FEC modes
- * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
+ * @active_fec: FEC mode which is active on the port, single bit set, GET only.
+ * @fec: Bitmask of configured FEC modes.
+ * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET.
  *
- * Drivers should reject a non-zero setting of @autoneg when
- * autoneogotiation is disabled (or not supported) for the link.
+ * Note that @reserved was never validated on input and ethtool user space
+ * left it uninitialized when calling SET. Hence going forward it can only be
+ * used to return a value to userspace with GET.
+ *
+ * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS.
+ * FEC settings are configured by link autonegotiation whenever it's enabled.
+ * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode.
  *
+ * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings.
+ * It is recommended that drivers only accept a single bit set in @fec.
+ * When multiple bits are set in @fec drivers may pick mode in an implementation
+ * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other
+ * FEC modes, because it's unclear whether in this case other modes constrain
+ * AUTO or are independent choices.
+ * Drivers must reject SET requests if they support none of the requested modes.
+ *
+ * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead
+ * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM.
+ *
+ * See enum ethtool_fec_config_bits for definition of valid bits for both
+ * @fec and @active_fec.
  */
 struct ethtool_fecparam {
        uint32_t   cmd;
@@ -1396,11 +1549,16 @@ struct ethtool_fecparam {
 
 /**
  * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
- * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported
- * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver
- * @ETHTOOL_FEC_OFF: No FEC Mode
- * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode
- * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode
+ * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not
+ *                     be used together with other bits. GET only.
+ * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually
+ *                     based link mode and SFP parameters read from module's
+ *                     EEPROM. This bit does _not_ mean autonegotiation.
+ * @ETHTOOL_FEC_OFF_BIT: No FEC Mode
+ * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet
+ *                     Consortium)
  */
 enum ethtool_fec_config_bits {
        ETHTOOL_FEC_NONE_BIT,
@@ -1619,6 +1777,17 @@ enum ethtool_link_mode_bit_indices {
        ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT         = 89,
        ETHTOOL_LINK_MODE_100baseFX_Half_BIT             = 90,
        ETHTOOL_LINK_MODE_100baseFX_Full_BIT             = 91,
+       ETHTOOL_LINK_MODE_10baseT1L_Full_BIT             = 92,
+       ETHTOOL_LINK_MODE_800000baseCR8_Full_BIT         = 93,
+       ETHTOOL_LINK_MODE_800000baseKR8_Full_BIT         = 94,
+       ETHTOOL_LINK_MODE_800000baseDR8_Full_BIT         = 95,
+       ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT       = 96,
+       ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT         = 97,
+       ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT         = 98,
+       ETHTOOL_LINK_MODE_10baseT1S_Full_BIT             = 99,
+       ETHTOOL_LINK_MODE_10baseT1S_Half_BIT             = 100,
+       ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT        = 101,
+
        /* must be last entry */
        __ETHTOOL_LINK_MODE_MASK_NBITS
 };
@@ -1730,6 +1899,7 @@ enum ethtool_link_mode_bit_indices {
 #define SPEED_100000           100000
 #define SPEED_200000           200000
 #define SPEED_400000           400000
+#define SPEED_800000           800000
 
 #define SPEED_UNKNOWN          -1
 
@@ -1767,6 +1937,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex)
 #define MASTER_SLAVE_STATE_SLAVE               3
 #define MASTER_SLAVE_STATE_ERR                 4
 
+/* These are used to throttle the rate of data on the phy interface when the
+ * native speed of the interface is higher than the link speed. These should
+ * not be used for phy interfaces which natively support multiple speeds (e.g.
+ * MII or SGMII).
+ */
+/* No rate matching performed. */
+#define RATE_MATCH_NONE                0
+/* The phy sends pause frames to throttle the MAC. */
+#define RATE_MATCH_PAUSE       1
+/* The phy asserts CRS to prevent the MAC from transmitting. */
+#define RATE_MATCH_CRS         2
+/* The MAC is programmed with a sufficiently-large IPG. */
+#define RATE_MATCH_OPEN_LOOP   3
+
 /* Which connector port. */
 #define PORT_TP                        0x00
 #define PORT_AUI               0x01
@@ -1958,6 +2142,11 @@ enum ethtool_reset_flags {
  *     autonegotiation; 0 if unknown or not applicable.  Read-only.
  * @transceiver: Used to distinguish different possible PHY types,
  *     reported consistently by PHYLIB.  Read-only.
+ * @master_slave_cfg: Master/slave port mode.
+ * @master_slave_state: Master/slave port state.
+ * @rate_matching: Rate adaptation performed by the PHY
+ * @reserved: Reserved for future use; see the note on reserved space.
+ * @link_mode_masks: Variable length bitmaps.
  *
  * If autonegotiation is disabled, the speed and @duplex represent the
  * fixed link mode and are writable if the driver supports multiple
@@ -2007,9 +2196,9 @@ struct ethtool_link_settings {
        uint8_t transceiver;
        uint8_t master_slave_cfg;
        uint8_t master_slave_state;
-       uint8_t reserved1[1];
+       uint8_t rate_matching;
        uint32_t        reserved[7];
-       uint32_t        link_mode_masks[0];
+       uint32_t        link_mode_masks[];
        /* layout of link_mode_masks fields:
         * uint32_t map_supported[link_mode_masks_nwords];
         * uint32_t map_advertising[link_mode_masks_nwords];
index 950d7edb7ef683071481d0a69d8b4539cc359f3c..6b9793842c98496feff47cba2f0a49eecdfac7d6 100644 (file)
  *  7.33
  *  - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID
  *  - add FUSE_OPEN_KILL_SUIDGID
+ *  - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT
+ *  - add FUSE_SETXATTR_ACL_KILL_SGID
+ *
+ *  7.34
+ *  - add FUSE_SYNCFS
+ *
+ *  7.35
+ *  - add FOPEN_NOFLUSH
+ *
+ *  7.36
+ *  - extend fuse_init_in with reserved fields, add FUSE_INIT_EXT init flag
+ *  - add flags2 to fuse_init_in and fuse_init_out
+ *  - add FUSE_SECURITY_CTX init flag
+ *  - add security context to create, mkdir, symlink, and mknod requests
+ *  - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX
+ *
+ *  7.37
+ *  - add FUSE_TMPFILE
+ *
+ *  7.38
+ *  - add FUSE_EXPIRE_ONLY flag to fuse_notify_inval_entry
+ *  - add FOPEN_PARALLEL_DIRECT_WRITES
+ *  - add total_extlen to fuse_in_header
+ *  - add FUSE_MAX_NR_SECCTX
+ *  - add extension header
+ *  - add FUSE_EXT_GROUPS
+ *  - add FUSE_CREATE_SUPP_GROUP
+ *  - add FUSE_HAS_EXPIRE_ONLY
+ *
+ *  7.39
+ *  - add FUSE_DIRECT_IO_RELAX
+ *  - add FUSE_STATX and related structures
  */
 
 #ifndef _LINUX_FUSE_H
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 33
+#define FUSE_KERNEL_MINOR_VERSION 39
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -237,6 +269,40 @@ struct fuse_attr {
        uint32_t        flags;
 };
 
+/*
+ * The following structures are bit-for-bit compatible with the statx(2) ABI in
+ * Linux.
+ */
+struct fuse_sx_time {
+       int64_t         tv_sec;
+       uint32_t        tv_nsec;
+       int32_t         __reserved;
+};
+
+struct fuse_statx {
+       uint32_t        mask;
+       uint32_t        blksize;
+       uint64_t        attributes;
+       uint32_t        nlink;
+       uint32_t        uid;
+       uint32_t        gid;
+       uint16_t        mode;
+       uint16_t        __spare0[1];
+       uint64_t        ino;
+       uint64_t        size;
+       uint64_t        blocks;
+       uint64_t        attributes_mask;
+       struct fuse_sx_time     atime;
+       struct fuse_sx_time     btime;
+       struct fuse_sx_time     ctime;
+       struct fuse_sx_time     mtime;
+       uint32_t        rdev_major;
+       uint32_t        rdev_minor;
+       uint32_t        dev_major;
+       uint32_t        dev_minor;
+       uint64_t        __spare2[14];
+};
+
 struct fuse_kstatfs {
        uint64_t        blocks;
        uint64_t        bfree;
@@ -281,12 +347,16 @@ struct fuse_file_lock {
  * FOPEN_NONSEEKABLE: the file is not seekable
  * FOPEN_CACHE_DIR: allow caching this directory
  * FOPEN_STREAM: the file is stream-like (no file position at all)
+ * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
+ * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode
  */
 #define FOPEN_DIRECT_IO                (1 << 0)
 #define FOPEN_KEEP_CACHE       (1 << 1)
 #define FOPEN_NONSEEKABLE      (1 << 2)
 #define FOPEN_CACHE_DIR                (1 << 3)
 #define FOPEN_STREAM           (1 << 4)
+#define FOPEN_NOFLUSH          (1 << 5)
+#define FOPEN_PARALLEL_DIRECT_WRITES   (1 << 6)
 
 /**
  * INIT request/reply flags
@@ -326,6 +396,17 @@ struct fuse_file_lock {
  *                     does not have CAP_FSETID. Additionally upon
  *                     write/truncate sgid is killed only if file has group
  *                     execute permission. (Same as Linux VFS behavior).
+ * FUSE_SETXATTR_EXT:  Server supports extended struct fuse_setxattr_in
+ * FUSE_INIT_EXT: extended fuse_init_in request
+ * FUSE_INIT_RESERVED: reserved, do not use
+ * FUSE_SECURITY_CTX:  add security context to create, mkdir, symlink, and
+ *                     mknod
+ * FUSE_HAS_INODE_DAX:  use per inode DAX
+ * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir,
+ *                     symlink and mknod (single group that matches parent)
+ * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
+ * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now
+ *                       allow shared mmap
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -356,6 +437,15 @@ struct fuse_file_lock {
 #define FUSE_MAP_ALIGNMENT     (1 << 26)
 #define FUSE_SUBMOUNTS         (1 << 27)
 #define FUSE_HANDLE_KILLPRIV_V2        (1 << 28)
+#define FUSE_SETXATTR_EXT      (1 << 29)
+#define FUSE_INIT_EXT          (1 << 30)
+#define FUSE_INIT_RESERVED     (1 << 31)
+/* bits 32..63 get shifted down 32 bits into the flags2 field */
+#define FUSE_SECURITY_CTX      (1ULL << 32)
+#define FUSE_HAS_INODE_DAX     (1ULL << 33)
+#define FUSE_CREATE_SUPP_GROUP (1ULL << 34)
+#define FUSE_HAS_EXPIRE_ONLY   (1ULL << 35)
+#define FUSE_DIRECT_IO_RELAX   (1ULL << 36)
 
 /**
  * CUSE INIT request/reply flags
@@ -438,8 +528,10 @@ struct fuse_file_lock {
  * fuse_attr flags
  *
  * FUSE_ATTR_SUBMOUNT: Object is a submount root
+ * FUSE_ATTR_DAX: Enable DAX for this file in per inode DAX mode
  */
 #define FUSE_ATTR_SUBMOUNT      (1 << 0)
+#define FUSE_ATTR_DAX          (1 << 1)
 
 /**
  * Open flags
@@ -447,6 +539,29 @@ struct fuse_file_lock {
  */
 #define FUSE_OPEN_KILL_SUIDGID (1 << 0)
 
+/**
+ * setxattr flags
+ * FUSE_SETXATTR_ACL_KILL_SGID: Clear SGID when system.posix_acl_access is set
+ */
+#define FUSE_SETXATTR_ACL_KILL_SGID    (1 << 0)
+
+/**
+ * notify_inval_entry flags
+ * FUSE_EXPIRE_ONLY
+ */
+#define FUSE_EXPIRE_ONLY               (1 << 0)
+
+/**
+ * extension type
+ * FUSE_MAX_NR_SECCTX: maximum value of &fuse_secctx_header.nr_secctx
+ * FUSE_EXT_GROUPS: &fuse_supp_groups extension
+ */
+enum fuse_ext_type {
+       /* Types 0..31 are reserved for fuse_secctx_header */
+       FUSE_MAX_NR_SECCTX      = 31,
+       FUSE_EXT_GROUPS         = 32,
+};
+
 enum fuse_opcode {
        FUSE_LOOKUP             = 1,
        FUSE_FORGET             = 2,  /* no reply */
@@ -495,6 +610,9 @@ enum fuse_opcode {
        FUSE_COPY_FILE_RANGE    = 47,
        FUSE_SETUPMAPPING       = 48,
        FUSE_REMOVEMAPPING      = 49,
+       FUSE_SYNCFS             = 50,
+       FUSE_TMPFILE            = 51,
+       FUSE_STATX              = 52,
 
        /* CUSE specific operations */
        CUSE_INIT               = 4096,
@@ -559,6 +677,22 @@ struct fuse_attr_out {
        struct fuse_attr attr;
 };
 
+struct fuse_statx_in {
+       uint32_t        getattr_flags;
+       uint32_t        reserved;
+       uint64_t        fh;
+       uint32_t        sx_flags;
+       uint32_t        sx_mask;
+};
+
+struct fuse_statx_out {
+       uint64_t        attr_valid;     /* Cache timeout for the attributes */
+       uint32_t        attr_valid_nsec;
+       uint32_t        flags;
+       uint64_t        spare[2];
+       struct fuse_statx stat;
+};
+
 #define FUSE_COMPAT_MKNOD_IN_SIZE 8
 
 struct fuse_mknod_in {
@@ -677,9 +811,13 @@ struct fuse_fsync_in {
        uint32_t        padding;
 };
 
+#define FUSE_COMPAT_SETXATTR_IN_SIZE 8
+
 struct fuse_setxattr_in {
        uint32_t        size;
        uint32_t        flags;
+       uint32_t        setxattr_flags;
+       uint32_t        padding;
 };
 
 struct fuse_getxattr_in {
@@ -714,6 +852,8 @@ struct fuse_init_in {
        uint32_t        minor;
        uint32_t        max_readahead;
        uint32_t        flags;
+       uint32_t        flags2;
+       uint32_t        unused[11];
 };
 
 #define FUSE_COMPAT_INIT_OUT_SIZE 8
@@ -730,7 +870,8 @@ struct fuse_init_out {
        uint32_t        time_gran;
        uint16_t        max_pages;
        uint16_t        map_alignment;
-       uint32_t        unused[8];
+       uint32_t        flags2;
+       uint32_t        unused[7];
 };
 
 #define CUSE_INIT_INFO_MAX 4096
@@ -821,7 +962,8 @@ struct fuse_in_header {
        uint32_t        uid;
        uint32_t        gid;
        uint32_t        pid;
-       uint32_t        padding;
+       uint16_t        total_extlen; /* length of extensions in 8byte units */
+       uint16_t        padding;
 };
 
 struct fuse_out_header {
@@ -838,9 +980,12 @@ struct fuse_dirent {
        char name[];
 };
 
-#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
-#define FUSE_DIRENT_ALIGN(x) \
+/* Align variable length records to 64bit boundary */
+#define FUSE_REC_ALIGN(x) \
        (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
+
+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
+#define FUSE_DIRENT_ALIGN(x) FUSE_REC_ALIGN(x)
 #define FUSE_DIRENT_SIZE(d) \
        FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
 
@@ -863,7 +1008,7 @@ struct fuse_notify_inval_inode_out {
 struct fuse_notify_inval_entry_out {
        uint64_t        parent;
        uint32_t        namelen;
-       uint32_t        padding;
+       uint32_t        flags;
 };
 
 struct fuse_notify_delete_out {
@@ -899,7 +1044,8 @@ struct fuse_notify_retrieve_in {
 };
 
 /* Device ioctls: */
-#define FUSE_DEV_IOC_CLONE     _IOR(229, 0, uint32_t)
+#define FUSE_DEV_IOC_MAGIC             229
+#define FUSE_DEV_IOC_CLONE             _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t)
 
 struct fuse_lseek_in {
        uint64_t        fh;
@@ -952,4 +1098,53 @@ struct fuse_removemapping_one {
 #define FUSE_REMOVEMAPPING_MAX_ENTRY   \
                (PAGE_SIZE / sizeof(struct fuse_removemapping_one))
 
+struct fuse_syncfs_in {
+       uint64_t        padding;
+};
+
+/*
+ * For each security context, send fuse_secctx with size of security context
+ * fuse_secctx will be followed by security context name and this in turn
+ * will be followed by actual context label.
+ * fuse_secctx, name, context
+ */
+struct fuse_secctx {
+       uint32_t        size;
+       uint32_t        padding;
+};
+
+/*
+ * Contains the information about how many fuse_secctx structures are being
+ * sent and what's the total size of all security contexts (including
+ * size of fuse_secctx_header).
+ *
+ */
+struct fuse_secctx_header {
+       uint32_t        size;
+       uint32_t        nr_secctx;
+};
+
+/**
+ * struct fuse_ext_header - extension header
+ * @size: total size of this extension including this header
+ * @type: type of extension
+ *
+ * This is made compatible with fuse_secctx_header by using type values >
+ * FUSE_MAX_NR_SECCTX
+ */
+struct fuse_ext_header {
+       uint32_t        size;
+       uint32_t        type;
+};
+
+/**
+ * struct fuse_supp_groups - Supplementary group extension
+ * @nr_groups: number of supplementary groups
+ * @groups: flexible array of group IDs
+ */
+struct fuse_supp_groups {
+       uint32_t        nr_groups;
+       uint32_t        groups[];
+};
+
 #endif /* _LINUX_FUSE_H */
index c403b9cb0d4ed62fb0aedd2c1e7796ffc8d9d818..f6bab08540d81247024938067e84306dc1c00cde 100644 (file)
 #define KEY_PAUSECD            201
 #define KEY_PROG3              202
 #define KEY_PROG4              203
-#define KEY_DASHBOARD          204     /* AL Dashboard */
+#define KEY_ALL_APPLICATIONS   204     /* AC Desktop Show All Applications */
+#define KEY_DASHBOARD          KEY_ALL_APPLICATIONS
 #define KEY_SUSPEND            205
 #define KEY_CLOSE              206     /* AC Close */
 #define KEY_PLAY               207
 #define KEY_VOICECOMMAND               0x246   /* Listening Voice Command */
 #define KEY_ASSISTANT          0x247   /* AL Context-aware desktop assistant */
 #define KEY_KBD_LAYOUT_NEXT    0x248   /* AC Next Keyboard Layout Select */
+#define KEY_EMOJI_PICKER       0x249   /* Show/hide emoji picker (HUTRR101) */
+#define KEY_DICTATE            0x24a   /* Start or Stop Voice Dictation Session (HUTRR99) */
+#define KEY_CAMERA_ACCESS_ENABLE       0x24b   /* Enables programmatic access to camera devices. (HUTRR72) */
+#define KEY_CAMERA_ACCESS_DISABLE      0x24c   /* Disables programmatic access to camera devices. (HUTRR72) */
+#define KEY_CAMERA_ACCESS_TOGGLE       0x24d   /* Toggles the current state of the camera access control. (HUTRR72) */
 
 #define KEY_BRIGHTNESS_MIN             0x250   /* Set Brightness to Minimum */
 #define KEY_BRIGHTNESS_MAX             0x251   /* Set Brightness to Maximum */
 /* Select an area of screen to be copied */
 #define KEY_SELECTIVE_SCREENSHOT       0x27a
 
+/* Move the focus to the next or previous user controllable element within a UI container */
+#define KEY_NEXT_ELEMENT               0x27b
+#define KEY_PREVIOUS_ELEMENT           0x27c
+
+/* Toggle Autopilot engagement */
+#define KEY_AUTOPILOT_ENGAGE_TOGGLE    0x27d
+
+/* Shortcut Keys */
+#define KEY_MARK_WAYPOINT              0x27e
+#define KEY_SOS                                0x27f
+#define KEY_NAV_CHART                  0x280
+#define KEY_FISHING_CHART              0x281
+#define KEY_SINGLE_RANGE_RADAR         0x282
+#define KEY_DUAL_RANGE_RADAR           0x283
+#define KEY_RADAR_OVERLAY              0x284
+#define KEY_TRADITIONAL_SONAR          0x285
+#define KEY_CLEARVU_SONAR              0x286
+#define KEY_SIDEVU_SONAR               0x287
+#define KEY_NAV_INFO                   0x288
+#define KEY_BRIGHTNESS_MENU            0x289
+
 /*
  * Some keyboards have keys which do not have a defined meaning, these keys
  * are intended to be programmed / bound to macros by the user. For most
 #define ABS_TOOL_WIDTH         0x1c
 
 #define ABS_VOLUME             0x20
+#define ABS_PROFILE            0x21
 
 #define ABS_MISC               0x28
 
index f89c986190de9b8810d73cd6548178fd3d647a82..942ea6aaa977357d2d1588246c3f269f407bcc02 100644 (file)
@@ -75,13 +75,16 @@ struct input_id {
  * Note that input core does not clamp reported values to the
  * [minimum, maximum] limits, such task is left to userspace.
  *
- * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z)
- * is reported in units per millimeter (units/mm), resolution
- * for rotational axes (ABS_RX, ABS_RY, ABS_RZ) is reported
- * in units per radian.
+ * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z,
+ * ABS_MT_POSITION_X, ABS_MT_POSITION_Y) is reported in units
+ * per millimeter (units/mm), resolution for rotational axes
+ * (ABS_RX, ABS_RY, ABS_RZ) is reported in units per radian.
+ * The resolution for the size axes (ABS_MT_TOUCH_MAJOR,
+ * ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MAJOR, ABS_MT_WIDTH_MINOR)
+ * is reported in units per millimeter (units/mm).
  * When INPUT_PROP_ACCELEROMETER is set the resolution changes.
  * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in
- * in units per g (units/g) and in units per degree per second
+ * units per g (units/g) and in units per degree per second
  * (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ).
  */
 struct input_absinfo {
@@ -268,6 +271,7 @@ struct input_mask {
 #define BUS_RMI                        0x1D
 #define BUS_CEC                        0x1E
 #define BUS_INTEL_ISHTP                0x1F
+#define BUS_AMD_SFH            0x20
 
 /*
  * MT_TOOL types
index e709ae8235e7fd2deeeb63b0ad507a1fd8de12b3..e5f558d9649396faed564c9156ab3d03b9aaa7ab 100644 (file)
 #define  PCI_SID_ESR_FIC       0x20    /* First In Chassis Flag */
 #define PCI_SID_CHASSIS_NR     3       /* Chassis Number */
 
-/* Message Signalled Interrupt registers */
+/* Message Signaled Interrupt registers */
 
-#define PCI_MSI_FLAGS          2       /* Message Control */
+#define PCI_MSI_FLAGS          0x02    /* Message Control */
 #define  PCI_MSI_FLAGS_ENABLE  0x0001  /* MSI feature enabled */
 #define  PCI_MSI_FLAGS_QMASK   0x000e  /* Maximum queue size available */
 #define  PCI_MSI_FLAGS_QSIZE   0x0070  /* Message queue size configured */
 #define  PCI_MSI_FLAGS_64BIT   0x0080  /* 64-bit addresses allowed */
 #define  PCI_MSI_FLAGS_MASKBIT 0x0100  /* Per-vector masking capable */
 #define PCI_MSI_RFU            3       /* Rest of capability flags */
-#define PCI_MSI_ADDRESS_LO     4       /* Lower 32 bits */
-#define PCI_MSI_ADDRESS_HI     8       /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
-#define PCI_MSI_DATA_32                8       /* 16 bits of data for 32-bit devices */
-#define PCI_MSI_MASK_32                12      /* Mask bits register for 32-bit devices */
-#define PCI_MSI_PENDING_32     16      /* Pending intrs for 32-bit devices */
-#define PCI_MSI_DATA_64                12      /* 16 bits of data for 64-bit devices */
-#define PCI_MSI_MASK_64                16      /* Mask bits register for 64-bit devices */
-#define PCI_MSI_PENDING_64     20      /* Pending intrs for 64-bit devices */
+#define PCI_MSI_ADDRESS_LO     0x04    /* Lower 32 bits */
+#define PCI_MSI_ADDRESS_HI     0x08    /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
+#define PCI_MSI_DATA_32                0x08    /* 16 bits of data for 32-bit devices */
+#define PCI_MSI_MASK_32                0x0c    /* Mask bits register for 32-bit devices */
+#define PCI_MSI_PENDING_32     0x10    /* Pending intrs for 32-bit devices */
+#define PCI_MSI_DATA_64                0x0c    /* 16 bits of data for 64-bit devices */
+#define PCI_MSI_MASK_64                0x10    /* Mask bits register for 64-bit devices */
+#define PCI_MSI_PENDING_64     0x14    /* Pending intrs for 64-bit devices */
 
 /* MSI-X registers (in MSI-X capability) */
 #define PCI_MSIX_FLAGS         2       /* Message Control */
 
 /* MSI-X Table entry format (in memory mapped by a BAR) */
 #define PCI_MSIX_ENTRY_SIZE            16
-#define PCI_MSIX_ENTRY_LOWER_ADDR      0  /* Message Address */
-#define PCI_MSIX_ENTRY_UPPER_ADDR      4  /* Message Upper Address */
-#define PCI_MSIX_ENTRY_DATA            8  /* Message Data */
-#define PCI_MSIX_ENTRY_VECTOR_CTRL     12 /* Vector Control */
+#define PCI_MSIX_ENTRY_LOWER_ADDR      0x0  /* Message Address */
+#define PCI_MSIX_ENTRY_UPPER_ADDR      0x4  /* Message Upper Address */
+#define PCI_MSIX_ENTRY_DATA            0x8  /* Message Data */
+#define PCI_MSIX_ENTRY_VECTOR_CTRL     0xc  /* Vector Control */
 #define  PCI_MSIX_ENTRY_CTRL_MASKBIT   0x00000001
 
 /* CompactPCI Hotswap Register */
 
 /* PCI Express capability registers */
 
-#define PCI_EXP_FLAGS          2       /* Capabilities register */
+#define PCI_EXP_FLAGS          0x02    /* Capabilities register */
 #define  PCI_EXP_FLAGS_VERS    0x000f  /* Capability version */
 #define  PCI_EXP_FLAGS_TYPE    0x00f0  /* Device/Port type */
 #define   PCI_EXP_TYPE_ENDPOINT           0x0  /* Express Endpoint */
 #define   PCI_EXP_TYPE_RC_EC      0xa  /* Root Complex Event Collector */
 #define  PCI_EXP_FLAGS_SLOT    0x0100  /* Slot implemented */
 #define  PCI_EXP_FLAGS_IRQ     0x3e00  /* Interrupt message number */
-#define PCI_EXP_DEVCAP         4       /* Device capabilities */
+#define PCI_EXP_DEVCAP         0x04    /* Device capabilities */
 #define  PCI_EXP_DEVCAP_PAYLOAD        0x00000007 /* Max_Payload_Size */
 #define  PCI_EXP_DEVCAP_PHANTOM        0x00000018 /* Phantom functions */
 #define  PCI_EXP_DEVCAP_EXT_TAG        0x00000020 /* Extended tags */
 #define  PCI_EXP_DEVCAP_PWR_VAL        0x03fc0000 /* Slot Power Limit Value */
 #define  PCI_EXP_DEVCAP_PWR_SCL        0x0c000000 /* Slot Power Limit Scale */
 #define  PCI_EXP_DEVCAP_FLR     0x10000000 /* Function Level Reset */
-#define PCI_EXP_DEVCTL         8       /* Device Control */
+#define PCI_EXP_DEVCTL         0x08    /* Device Control */
 #define  PCI_EXP_DEVCTL_CERE   0x0001  /* Correctable Error Reporting En. */
 #define  PCI_EXP_DEVCTL_NFERE  0x0002  /* Non-Fatal Error Reporting Enable */
 #define  PCI_EXP_DEVCTL_FERE   0x0004  /* Fatal Error Reporting Enable */
 #define  PCI_EXP_DEVCTL_URRE   0x0008  /* Unsupported Request Reporting En. */
 #define  PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */
 #define  PCI_EXP_DEVCTL_PAYLOAD        0x00e0  /* Max_Payload_Size */
+#define  PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */
 #define  PCI_EXP_DEVCTL_EXT_TAG        0x0100  /* Extended Tag Field Enable */
 #define  PCI_EXP_DEVCTL_PHANTOM        0x0200  /* Phantom Functions Enable */
 #define  PCI_EXP_DEVCTL_AUX_PME        0x0400  /* Auxiliary Power PM Enable */
 #define  PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */
 #define  PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */
 #define  PCI_EXP_DEVCTL_BCR_FLR 0x8000  /* Bridge Configuration Retry / FLR */
-#define PCI_EXP_DEVSTA         10      /* Device Status */
+#define PCI_EXP_DEVSTA         0x0a    /* Device Status */
 #define  PCI_EXP_DEVSTA_CED    0x0001  /* Correctable Error Detected */
 #define  PCI_EXP_DEVSTA_NFED   0x0002  /* Non-Fatal Error Detected */
 #define  PCI_EXP_DEVSTA_FED    0x0004  /* Fatal Error Detected */
 #define  PCI_EXP_DEVSTA_AUXPD  0x0010  /* AUX Power Detected */
 #define  PCI_EXP_DEVSTA_TRPND  0x0020  /* Transactions Pending */
 #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1      12      /* v1 endpoints without link end here */
-#define PCI_EXP_LNKCAP         12      /* Link Capabilities */
+#define PCI_EXP_LNKCAP         0x0c    /* Link Capabilities */
 #define  PCI_EXP_LNKCAP_SLS    0x0000000f /* Supported Link Speeds */
 #define  PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */
 #define  PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */
 #define  PCI_EXP_LNKCAP_DLLLARC        0x00100000 /* Data Link Layer Link Active Reporting Capable */
 #define  PCI_EXP_LNKCAP_LBNC   0x00200000 /* Link Bandwidth Notification Capability */
 #define  PCI_EXP_LNKCAP_PN     0xff000000 /* Port Number */
-#define PCI_EXP_LNKCTL         16      /* Link Control */
+#define PCI_EXP_LNKCTL         0x10    /* Link Control */
 #define  PCI_EXP_LNKCTL_ASPMC  0x0003  /* ASPM Control */
 #define  PCI_EXP_LNKCTL_ASPM_L0S 0x0001        /* L0s Enable */
 #define  PCI_EXP_LNKCTL_ASPM_L1  0x0002        /* L1 Enable */
 #define  PCI_EXP_LNKCTL_HAWD   0x0200  /* Hardware Autonomous Width Disable */
 #define  PCI_EXP_LNKCTL_LBMIE  0x0400  /* Link Bandwidth Management Interrupt Enable */
 #define  PCI_EXP_LNKCTL_LABIE  0x0800  /* Link Autonomous Bandwidth Interrupt Enable */
-#define PCI_EXP_LNKSTA         18      /* Link Status */
+#define PCI_EXP_LNKSTA         0x12    /* Link Status */
 #define  PCI_EXP_LNKSTA_CLS    0x000f  /* Current Link Speed */
 #define  PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */
 #define  PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */
 #define  PCI_EXP_LNKSTA_LBMS   0x4000  /* Link Bandwidth Management Status */
 #define  PCI_EXP_LNKSTA_LABS   0x8000  /* Link Autonomous Bandwidth Status */
 #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20      /* v1 endpoints with link end here */
-#define PCI_EXP_SLTCAP         20      /* Slot Capabilities */
+#define PCI_EXP_SLTCAP         0x14    /* Slot Capabilities */
 #define  PCI_EXP_SLTCAP_ABP    0x00000001 /* Attention Button Present */
 #define  PCI_EXP_SLTCAP_PCP    0x00000002 /* Power Controller Present */
 #define  PCI_EXP_SLTCAP_MRLSP  0x00000004 /* MRL Sensor Present */
 #define  PCI_EXP_SLTCAP_EIP    0x00020000 /* Electromechanical Interlock Present */
 #define  PCI_EXP_SLTCAP_NCCS   0x00040000 /* No Command Completed Support */
 #define  PCI_EXP_SLTCAP_PSN    0xfff80000 /* Physical Slot Number */
-#define PCI_EXP_SLTCTL         24      /* Slot Control */
+#define PCI_EXP_SLTCTL         0x18    /* Slot Control */
 #define  PCI_EXP_SLTCTL_ABPE   0x0001  /* Attention Button Pressed Enable */
 #define  PCI_EXP_SLTCTL_PFDE   0x0002  /* Power Fault Detected Enable */
 #define  PCI_EXP_SLTCTL_MRLSCE 0x0004  /* MRL Sensor Changed Enable */
 #define  PCI_EXP_SLTCTL_PWR_OFF        0x0400 /* Power Off */
 #define  PCI_EXP_SLTCTL_EIC    0x0800  /* Electromechanical Interlock Control */
 #define  PCI_EXP_SLTCTL_DLLSCE 0x1000  /* Data Link Layer State Changed Enable */
+#define  PCI_EXP_SLTCTL_ASPL_DISABLE   0x2000 /* Auto Slot Power Limit Disable */
 #define  PCI_EXP_SLTCTL_IBPD_DISABLE   0x4000 /* In-band PD disable */
-#define PCI_EXP_SLTSTA         26      /* Slot Status */
+#define PCI_EXP_SLTSTA         0x1a    /* Slot Status */
 #define  PCI_EXP_SLTSTA_ABP    0x0001  /* Attention Button Pressed */
 #define  PCI_EXP_SLTSTA_PFD    0x0002  /* Power Fault Detected */
 #define  PCI_EXP_SLTSTA_MRLSC  0x0004  /* MRL Sensor Changed */
 #define  PCI_EXP_SLTSTA_PDS    0x0040  /* Presence Detect State */
 #define  PCI_EXP_SLTSTA_EIS    0x0080  /* Electromechanical Interlock Status */
 #define  PCI_EXP_SLTSTA_DLLSC  0x0100  /* Data Link Layer State Changed */
-#define PCI_EXP_RTCTL          28      /* Root Control */
+#define PCI_EXP_RTCTL          0x1c    /* Root Control */
 #define  PCI_EXP_RTCTL_SECEE   0x0001  /* System Error on Correctable Error */
 #define  PCI_EXP_RTCTL_SENFEE  0x0002  /* System Error on Non-Fatal Error */
 #define  PCI_EXP_RTCTL_SEFEE   0x0004  /* System Error on Fatal Error */
 #define  PCI_EXP_RTCTL_PMEIE   0x0008  /* PME Interrupt Enable */
 #define  PCI_EXP_RTCTL_CRSSVE  0x0010  /* CRS Software Visibility Enable */
-#define PCI_EXP_RTCAP          30      /* Root Capabilities */
+#define PCI_EXP_RTCAP          0x1e    /* Root Capabilities */
 #define  PCI_EXP_RTCAP_CRSVIS  0x0001  /* CRS Software Visibility capability */
-#define PCI_EXP_RTSTA          32      /* Root Status */
+#define PCI_EXP_RTSTA          0x20    /* Root Status */
 #define  PCI_EXP_RTSTA_PME     0x00010000 /* PME status */
 #define  PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */
 /*
  * Use pcie_capability_read_word() and similar interfaces to use them
  * safely.
  */
-#define PCI_EXP_DEVCAP2                36      /* Device Capabilities 2 */
+#define PCI_EXP_DEVCAP2                0x24    /* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_COMP_TMOUT_DIS        0x00000010 /* Completion Timeout Disable supported */
 #define  PCI_EXP_DEVCAP2_ARI           0x00000020 /* Alternative Routing-ID */
 #define  PCI_EXP_DEVCAP2_ATOMIC_ROUTE  0x00000040 /* Atomic Op routing */
 #define  PCI_EXP_DEVCAP2_OBFF_MSG      0x00040000 /* New message signaling */
 #define  PCI_EXP_DEVCAP2_OBFF_WAKE     0x00080000 /* Re-use WAKE# for OBFF */
 #define  PCI_EXP_DEVCAP2_EE_PREFIX     0x00200000 /* End-End TLP Prefix */
-#define PCI_EXP_DEVCTL2                40      /* Device Control 2 */
+#define PCI_EXP_DEVCTL2                0x28    /* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT  0x000f  /* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_COMP_TMOUT_DIS        0x0010  /* Completion Timeout Disable */
 #define  PCI_EXP_DEVCTL2_ARI           0x0020  /* Alternative Routing-ID */
 #define  PCI_EXP_DEVCTL2_OBFF_MSGA_EN  0x2000  /* Enable OBFF Message type A */
 #define  PCI_EXP_DEVCTL2_OBFF_MSGB_EN  0x4000  /* Enable OBFF Message type B */
 #define  PCI_EXP_DEVCTL2_OBFF_WAKE_EN  0x6000  /* OBFF using WAKE# signaling */
-#define PCI_EXP_DEVSTA2                42      /* Device Status 2 */
-#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2      44      /* v2 endpoints without link end here */
-#define PCI_EXP_LNKCAP2                44      /* Link Capabilities 2 */
+#define PCI_EXP_DEVSTA2                0x2a    /* Device Status 2 */
+#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c /* end of v2 EPs w/o link */
+#define PCI_EXP_LNKCAP2                0x2c    /* Link Capabilities 2 */
 #define  PCI_EXP_LNKCAP2_SLS_2_5GB     0x00000002 /* Supported Speed 2.5GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_5_0GB     0x00000004 /* Supported Speed 5GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_8_0GB     0x00000008 /* Supported Speed 8GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_32_0GB    0x00000020 /* Supported Speed 32GT/s */
 #define  PCI_EXP_LNKCAP2_SLS_64_0GB    0x00000040 /* Supported Speed 64GT/s */
 #define  PCI_EXP_LNKCAP2_CROSSLINK     0x00000100 /* Crosslink supported */
-#define PCI_EXP_LNKCTL2                48      /* Link Control 2 */
+#define PCI_EXP_LNKCTL2                0x30    /* Link Control 2 */
 #define  PCI_EXP_LNKCTL2_TLS           0x000f
 #define  PCI_EXP_LNKCTL2_TLS_2_5GT     0x0001 /* Supported Speed 2.5GT/s */
 #define  PCI_EXP_LNKCTL2_TLS_5_0GT     0x0002 /* Supported Speed 5GT/s */
 #define  PCI_EXP_LNKCTL2_ENTER_COMP    0x0010 /* Enter Compliance */
 #define  PCI_EXP_LNKCTL2_TX_MARGIN     0x0380 /* Transmit Margin */
 #define  PCI_EXP_LNKCTL2_HASD          0x0020 /* HW Autonomous Speed Disable */
-#define PCI_EXP_LNKSTA2                50      /* Link Status 2 */
-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52      /* v2 endpoints with link end here */
-#define PCI_EXP_SLTCAP2                52      /* Slot Capabilities 2 */
+#define PCI_EXP_LNKSTA2                0x32    /* Link Status 2 */
+#define  PCI_EXP_LNKSTA2_FLIT          0x0400 /* Flit Mode Status */
+#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32    /* end of v2 EPs w/ link */
+#define PCI_EXP_SLTCAP2                0x34    /* Slot Capabilities 2 */
 #define  PCI_EXP_SLTCAP2_IBPD  0x00000001 /* In-band PD Disable Supported */
-#define PCI_EXP_SLTCTL2                56      /* Slot Control 2 */
-#define PCI_EXP_SLTSTA2                58      /* Slot Status 2 */
+#define PCI_EXP_SLTCTL2                0x38    /* Slot Control 2 */
+#define PCI_EXP_SLTSTA2                0x3a    /* Slot Status 2 */
 
 /* Extended Capabilities (PCI-X 2.0 and Express) */
 #define PCI_EXT_CAP_ID(header)         (header & 0x0000ffff)
 #define PCI_EXT_CAP_ID_DVSEC   0x23    /* Designated Vendor-Specific */
 #define PCI_EXT_CAP_ID_DLF     0x25    /* Data Link Feature */
 #define PCI_EXT_CAP_ID_PL_16GT 0x26    /* Physical Layer 16.0 GT/s */
-#define PCI_EXT_CAP_ID_MAX     PCI_EXT_CAP_ID_PL_16GT
+#define PCI_EXT_CAP_ID_PL_32GT  0x2A    /* Physical Layer 32.0 GT/s */
+#define PCI_EXT_CAP_ID_DOE     0x2E    /* Data Object Exchange */
+#define PCI_EXT_CAP_ID_MAX     PCI_EXT_CAP_ID_DOE
 
 #define PCI_EXT_CAP_DSN_SIZEOF 12
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
 
 /* Advanced Error Reporting */
-#define PCI_ERR_UNCOR_STATUS   4       /* Uncorrectable Error Status */
+#define PCI_ERR_UNCOR_STATUS   0x04    /* Uncorrectable Error Status */
 #define  PCI_ERR_UNC_UND       0x00000001      /* Undefined */
 #define  PCI_ERR_UNC_DLP       0x00000010      /* Data Link Protocol */
 #define  PCI_ERR_UNC_SURPDN    0x00000020      /* Surprise Down */
 #define  PCI_ERR_UNC_MCBTLP    0x00800000      /* MC blocked TLP */
 #define  PCI_ERR_UNC_ATOMEG    0x01000000      /* Atomic egress blocked */
 #define  PCI_ERR_UNC_TLPPRE    0x02000000      /* TLP prefix blocked */
-#define PCI_ERR_UNCOR_MASK     8       /* Uncorrectable Error Mask */
+#define PCI_ERR_UNCOR_MASK     0x08    /* Uncorrectable Error Mask */
        /* Same bits as above */
-#define PCI_ERR_UNCOR_SEVER    12      /* Uncorrectable Error Severity */
+#define PCI_ERR_UNCOR_SEVER    0x0c    /* Uncorrectable Error Severity */
        /* Same bits as above */
-#define PCI_ERR_COR_STATUS     16      /* Correctable Error Status */
+#define PCI_ERR_COR_STATUS     0x10    /* Correctable Error Status */
 #define  PCI_ERR_COR_RCVR      0x00000001      /* Receiver Error Status */
 #define  PCI_ERR_COR_BAD_TLP   0x00000040      /* Bad TLP Status */
 #define  PCI_ERR_COR_BAD_DLLP  0x00000080      /* Bad DLLP Status */
 #define  PCI_ERR_COR_ADV_NFAT  0x00002000      /* Advisory Non-Fatal */
 #define  PCI_ERR_COR_INTERNAL  0x00004000      /* Corrected Internal */
 #define  PCI_ERR_COR_LOG_OVER  0x00008000      /* Header Log Overflow */
-#define PCI_ERR_COR_MASK       20      /* Correctable Error Mask */
+#define PCI_ERR_COR_MASK       0x14    /* Correctable Error Mask */
        /* Same bits as above */
-#define PCI_ERR_CAP            24      /* Advanced Error Capabilities */
-#define  PCI_ERR_CAP_FEP(x)    ((x) & 31)      /* First Error Pointer */
+#define PCI_ERR_CAP            0x18    /* Advanced Error Capabilities & Ctrl*/
+#define  PCI_ERR_CAP_FEP(x)    ((x) & 0x1f)    /* First Error Pointer */
 #define  PCI_ERR_CAP_ECRC_GENC 0x00000020      /* ECRC Generation Capable */
 #define  PCI_ERR_CAP_ECRC_GENE 0x00000040      /* ECRC Generation Enable */
 #define  PCI_ERR_CAP_ECRC_CHKC 0x00000080      /* ECRC Check Capable */
 #define  PCI_ERR_CAP_ECRC_CHKE 0x00000100      /* ECRC Check Enable */
-#define PCI_ERR_HEADER_LOG     28      /* Header Log Register (16 bytes) */
-#define PCI_ERR_ROOT_COMMAND   44      /* Root Error Command */
+#define PCI_ERR_HEADER_LOG     0x1c    /* Header Log Register (16 bytes) */
+#define PCI_ERR_ROOT_COMMAND   0x2c    /* Root Error Command */
 #define  PCI_ERR_ROOT_CMD_COR_EN       0x00000001 /* Correctable Err Reporting Enable */
 #define  PCI_ERR_ROOT_CMD_NONFATAL_EN  0x00000002 /* Non-Fatal Err Reporting Enable */
 #define  PCI_ERR_ROOT_CMD_FATAL_EN     0x00000004 /* Fatal Err Reporting Enable */
-#define PCI_ERR_ROOT_STATUS    48
+#define PCI_ERR_ROOT_STATUS    0x30
 #define  PCI_ERR_ROOT_COR_RCV          0x00000001 /* ERR_COR Received */
 #define  PCI_ERR_ROOT_MULTI_COR_RCV    0x00000002 /* Multiple ERR_COR */
 #define  PCI_ERR_ROOT_UNCOR_RCV                0x00000004 /* ERR_FATAL/NONFATAL */
 #define  PCI_ERR_ROOT_NONFATAL_RCV     0x00000020 /* Non-Fatal Received */
 #define  PCI_ERR_ROOT_FATAL_RCV                0x00000040 /* Fatal Received */
 #define  PCI_ERR_ROOT_AER_IRQ          0xf8000000 /* Advanced Error Interrupt Message Number */
-#define PCI_ERR_ROOT_ERR_SRC   52      /* Error Source Identification */
+#define PCI_ERR_ROOT_ERR_SRC   0x34    /* Error Source Identification */
 
 /* Virtual Channel */
-#define PCI_VC_PORT_CAP1       4
+#define PCI_VC_PORT_CAP1       0x04
 #define  PCI_VC_CAP1_EVCC      0x00000007      /* extended VC count */
 #define  PCI_VC_CAP1_LPEVCC    0x00000070      /* low prio extended VC count */
 #define  PCI_VC_CAP1_ARB_SIZE  0x00000c00
-#define PCI_VC_PORT_CAP2       8
+#define PCI_VC_PORT_CAP2       0x08
 #define  PCI_VC_CAP2_32_PHASE          0x00000002
 #define  PCI_VC_CAP2_64_PHASE          0x00000004
 #define  PCI_VC_CAP2_128_PHASE         0x00000008
 #define  PCI_VC_CAP2_ARB_OFF           0xff000000
-#define PCI_VC_PORT_CTRL       12
+#define PCI_VC_PORT_CTRL       0x0c
 #define  PCI_VC_PORT_CTRL_LOAD_TABLE   0x00000001
-#define PCI_VC_PORT_STATUS     14
+#define PCI_VC_PORT_STATUS     0x0e
 #define  PCI_VC_PORT_STATUS_TABLE      0x00000001
-#define PCI_VC_RES_CAP         16
+#define PCI_VC_RES_CAP         0x10
 #define  PCI_VC_RES_CAP_32_PHASE       0x00000002
 #define  PCI_VC_RES_CAP_64_PHASE       0x00000004
 #define  PCI_VC_RES_CAP_128_PHASE      0x00000008
 #define  PCI_VC_RES_CAP_128_PHASE_TB   0x00000010
 #define  PCI_VC_RES_CAP_256_PHASE      0x00000020
 #define  PCI_VC_RES_CAP_ARB_OFF                0xff000000
-#define PCI_VC_RES_CTRL                20
+#define PCI_VC_RES_CTRL                0x14
 #define  PCI_VC_RES_CTRL_LOAD_TABLE    0x00010000
 #define  PCI_VC_RES_CTRL_ARB_SELECT    0x000e0000
 #define  PCI_VC_RES_CTRL_ID            0x07000000
 #define  PCI_VC_RES_CTRL_ENABLE                0x80000000
-#define PCI_VC_RES_STATUS      26
+#define PCI_VC_RES_STATUS      0x1a
 #define  PCI_VC_RES_STATUS_TABLE       0x00000001
 #define  PCI_VC_RES_STATUS_NEGO                0x00000002
 #define PCI_CAP_VC_BASE_SIZEOF         0x10
-#define PCI_CAP_VC_PER_VC_SIZEOF       0x0C
+#define PCI_CAP_VC_PER_VC_SIZEOF       0x0c
 
 /* Power Budgeting */
-#define PCI_PWR_DSR            4       /* Data Select Register */
-#define PCI_PWR_DATA           8       /* Data Register */
+#define PCI_PWR_DSR            0x04    /* Data Select Register */
+#define PCI_PWR_DATA           0x08    /* Data Register */
 #define  PCI_PWR_DATA_BASE(x)  ((x) & 0xff)        /* Base Power */
 #define  PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3)    /* Data Scale */
 #define  PCI_PWR_DATA_PM_SUB(x)        (((x) >> 10) & 7)   /* PM Sub State */
 #define  PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */
 #define  PCI_PWR_DATA_TYPE(x)  (((x) >> 15) & 7)   /* Type */
 #define  PCI_PWR_DATA_RAIL(x)  (((x) >> 18) & 7)   /* Power Rail */
-#define PCI_PWR_CAP            12      /* Capability */
+#define PCI_PWR_CAP            0x0c    /* Capability */
 #define  PCI_PWR_CAP_BUDGET(x) ((x) & 1)       /* Included in system budget */
-#define PCI_EXT_CAP_PWR_SIZEOF 16
+#define PCI_EXT_CAP_PWR_SIZEOF 0x10
 
 /* Root Complex Event Collector Endpoint Association  */
 #define PCI_RCEC_RCIEP_BITMAP  4       /* Associated Bitmap for RCiEPs */
 #define  PCI_SRIOV_VFM_MI      0x1     /* Dormant.MigrateIn */
 #define  PCI_SRIOV_VFM_MO      0x2     /* Active.MigrateOut */
 #define  PCI_SRIOV_VFM_AV      0x3     /* Active.Available */
-#define PCI_EXT_CAP_SRIOV_SIZEOF 64
+#define PCI_EXT_CAP_SRIOV_SIZEOF 0x40
 
 #define PCI_LTR_MAX_SNOOP_LAT  0x4
 #define PCI_LTR_MAX_NOSNOOP_LAT        0x6
 #define   PCI_TPH_LOC_NONE     0x000   /* no location */
 #define   PCI_TPH_LOC_CAP      0x200   /* in capability */
 #define   PCI_TPH_LOC_MSIX     0x400   /* in MSI-X */
-#define PCI_TPH_CAP_ST_MASK    0x07FF0000      /* st table mask */
-#define PCI_TPH_CAP_ST_SHIFT   16      /* st table shift */
-#define PCI_TPH_BASE_SIZEOF    12      /* size with no st table */
+#define PCI_TPH_CAP_ST_MASK    0x07FF0000      /* ST table mask */
+#define PCI_TPH_CAP_ST_SHIFT   16      /* ST table shift */
+#define PCI_TPH_BASE_SIZEOF    0xc     /* size with no ST table */
 
 /* Downstream Port Containment */
-#define PCI_EXP_DPC_CAP                        4       /* DPC Capability */
+#define PCI_EXP_DPC_CAP                        0x04    /* DPC Capability */
 #define PCI_EXP_DPC_IRQ                        0x001F  /* Interrupt Message Number */
 #define  PCI_EXP_DPC_CAP_RP_EXT                0x0020  /* Root Port Extensions */
 #define  PCI_EXP_DPC_CAP_POISONED_TLP  0x0040  /* Poisoned TLP Egress Blocking Supported */
 #define  PCI_EXP_DPC_RP_PIO_LOG_SIZE   0x0F00  /* RP PIO Log Size */
 #define  PCI_EXP_DPC_CAP_DL_ACTIVE     0x1000  /* ERR_COR signal on DL_Active supported */
 
-#define PCI_EXP_DPC_CTL                        6       /* DPC control */
+#define PCI_EXP_DPC_CTL                        0x06    /* DPC control */
 #define  PCI_EXP_DPC_CTL_EN_FATAL      0x0001  /* Enable trigger on ERR_FATAL message */
 #define  PCI_EXP_DPC_CTL_EN_NONFATAL   0x0002  /* Enable trigger on ERR_NONFATAL message */
 #define  PCI_EXP_DPC_CTL_INT_EN                0x0008  /* DPC Interrupt Enable */
 
-#define PCI_EXP_DPC_STATUS             8       /* DPC Status */
+#define PCI_EXP_DPC_STATUS             0x08    /* DPC Status */
 #define  PCI_EXP_DPC_STATUS_TRIGGER        0x0001 /* Trigger Status */
 #define  PCI_EXP_DPC_STATUS_TRIGGER_RSN            0x0006 /* Trigger Reason */
 #define  PCI_EXP_DPC_STATUS_INTERRUPT      0x0008 /* Interrupt Status */
 #define  PCI_EXP_DPC_RP_BUSY               0x0010 /* Root Port Busy */
 #define  PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */
 
-#define PCI_EXP_DPC_SOURCE_ID          10      /* DPC Source Identifier */
+#define PCI_EXP_DPC_SOURCE_ID           0x0A   /* DPC Source Identifier */
 
 #define PCI_EXP_DPC_RP_PIO_STATUS       0x0C   /* RP PIO Status */
 #define PCI_EXP_DPC_RP_PIO_MASK                 0x10   /* RP PIO Mask */
 /* Precision Time Measurement */
 #define PCI_PTM_CAP                    0x04        /* PTM Capability */
 #define  PCI_PTM_CAP_REQ               0x00000001  /* Requester capable */
+#define  PCI_PTM_CAP_RES               0x00000002  /* Responder capable */
 #define  PCI_PTM_CAP_ROOT              0x00000004  /* Root capable */
 #define  PCI_PTM_GRANULARITY_MASK      0x0000FF00  /* Clock granularity */
 #define PCI_PTM_CTRL                   0x08        /* PTM Control */
 
 /* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */
 #define PCI_DVSEC_HEADER1              0x4 /* Designated Vendor-Specific Header1 */
+#define  PCI_DVSEC_HEADER1_VID(x)      ((x) & 0xffff)
+#define  PCI_DVSEC_HEADER1_REV(x)      (((x) >> 16) & 0xf)
+#define  PCI_DVSEC_HEADER1_LEN(x)      (((x) >> 20) & 0xfff)
 #define PCI_DVSEC_HEADER2              0x8 /* Designated Vendor-Specific Header2 */
+#define  PCI_DVSEC_HEADER2_ID(x)               ((x) & 0xffff)
 
 /* Data Link Feature */
 #define PCI_DLF_CAP            0x04    /* Capabilities Register */
 #define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK                0x000000F0
 #define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT       4
 
+/* Data Object Exchange */
+#define PCI_DOE_CAP            0x04    /* DOE Capabilities Register */
+#define  PCI_DOE_CAP_INT_SUP                   0x00000001  /* Interrupt Support */
+#define  PCI_DOE_CAP_INT_MSG_NUM               0x00000ffe  /* Interrupt Message Number */
+#define PCI_DOE_CTRL           0x08    /* DOE Control Register */
+#define  PCI_DOE_CTRL_ABORT                    0x00000001  /* DOE Abort */
+#define  PCI_DOE_CTRL_INT_EN                   0x00000002  /* DOE Interrupt Enable */
+#define  PCI_DOE_CTRL_GO                       0x80000000  /* DOE Go */
+#define PCI_DOE_STATUS         0x0c    /* DOE Status Register */
+#define  PCI_DOE_STATUS_BUSY                   0x00000001  /* DOE Busy */
+#define  PCI_DOE_STATUS_INT_STATUS             0x00000002  /* DOE Interrupt Status */
+#define  PCI_DOE_STATUS_ERROR                  0x00000004  /* DOE Error */
+#define  PCI_DOE_STATUS_DATA_OBJECT_READY      0x80000000  /* Data Object Ready */
+#define PCI_DOE_WRITE          0x10    /* DOE Write Data Mailbox Register */
+#define PCI_DOE_READ           0x14    /* DOE Read Data Mailbox Register */
+#define PCI_DOE_CAP_SIZEOF     0x18    /* Size of DOE register block */
+
+/* DOE Data Object - note not actually registers */
+#define PCI_DOE_DATA_OBJECT_HEADER_1_VID               0x0000ffff
+#define PCI_DOE_DATA_OBJECT_HEADER_1_TYPE              0x00ff0000
+#define PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH            0x0003ffff
+
+#define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX           0x000000ff
+#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID             0x0000ffff
+#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL                0x00ff0000
+#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX      0xff000000
+
 #endif /* LINUX_PCI_REGS_H */
diff --git a/include/standard-headers/linux/pvpanic.h b/include/standard-headers/linux/pvpanic.h
new file mode 100644 (file)
index 0000000..54b7485
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef __PVPANIC_H__
+#define __PVPANIC_H__
+
+#define PVPANIC_PANICKED       (1 << 0)
+#define PVPANIC_CRASH_LOADED   (1 << 1)
+
+#endif /* __PVPANIC_H__ */
diff --git a/include/standard-headers/linux/udmabuf.h b/include/standard-headers/linux/udmabuf.h
new file mode 100644 (file)
index 0000000..e19eb5b
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_UDMABUF_H
+#define _LINUX_UDMABUF_H
+
+#include "standard-headers/linux/types.h"
+
+#define UDMABUF_FLAGS_CLOEXEC  0x01
+
+struct udmabuf_create {
+       uint32_t memfd;
+       uint32_t flags;
+       uint64_t offset;
+       uint64_t size;
+};
+
+struct udmabuf_create_item {
+       uint32_t memfd;
+       uint32_t __pad;
+       uint64_t offset;
+       uint64_t size;
+};
+
+struct udmabuf_create_list {
+       uint32_t flags;
+       uint32_t count;
+       struct udmabuf_create_item list[];
+};
+
+#define UDMABUF_CREATE       _IOW('u', 0x42, struct udmabuf_create)
+#define UDMABUF_CREATE_LIST  _IOW('u', 0x43, struct udmabuf_create_list)
+
+#endif /* _LINUX_UDMABUF_H */
index 0bd2684a2ae47993594e5f8fcf5b56f98aa0db84..5ad07e134aed3525943388a18b5bded129624f58 100644 (file)
@@ -47,6 +47,22 @@ struct vhost_vring_addr {
        uint64_t log_guest_addr;
 };
 
+struct vhost_worker_state {
+       /*
+        * For VHOST_NEW_WORKER the kernel will return the new vhost_worker id.
+        * For VHOST_FREE_WORKER this must be set to the id of the vhost_worker
+        * to free.
+        */
+       unsigned int worker_id;
+};
+
+struct vhost_vring_worker {
+       /* vring index */
+       unsigned int index;
+       /* The id of the vhost_worker returned from VHOST_NEW_WORKER */
+       unsigned int worker_id;
+};
+
 /* no alignment requirement */
 struct vhost_iotlb_msg {
        uint64_t iova;
@@ -87,7 +103,7 @@ struct vhost_msg {
 
 struct vhost_msg_v2 {
        uint32_t type;
-       uint32_t reserved;
+       uint32_t asid;
        union {
                struct vhost_iotlb_msg iotlb;
                uint8_t padding[64];
@@ -107,7 +123,7 @@ struct vhost_memory_region {
 struct vhost_memory {
        uint32_t nregions;
        uint32_t padding;
-       struct vhost_memory_region regions[0];
+       struct vhost_memory_region regions[];
 };
 
 /* VHOST_SCSI specific definitions */
@@ -135,7 +151,7 @@ struct vhost_scsi_target {
 struct vhost_vdpa_config {
        uint32_t off;
        uint32_t len;
-       uint8_t buf[0];
+       uint8_t buf[];
 };
 
 /* vhost vdpa IOVA range
@@ -153,4 +169,21 @@ struct vhost_vdpa_iova_range {
 /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
 #define VHOST_NET_F_VIRTIO_NET_HDR 27
 
+/* Use message type V2 */
+#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+/* IOTLB can accept batching hints */
+#define VHOST_BACKEND_F_IOTLB_BATCH  0x2
+/* IOTLB can accept address space identifier through V2 type of IOTLB
+ * message
+ */
+#define VHOST_BACKEND_F_IOTLB_ASID  0x3
+/* Device can be suspended */
+#define VHOST_BACKEND_F_SUSPEND  0x4
+/* Device can be resumed */
+#define VHOST_BACKEND_F_RESUME  0x5
+/* Device supports the driver enabling virtqueues both before and after
+ * DRIVER_OK
+ */
+#define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK  0x6
+
 #endif
index f5604fc5fb15acc1eef28cdbf58e6fe6a42fd161..da61dee98c5ab18bf99c2cf66c52a71e3aec448c 100644 (file)
@@ -38,7 +38,7 @@ struct virtio_9p_config {
        /* length of the tag name */
        __virtio16 tag_len;
        /* non-NULL terminated tag name */
-       uint8_t tag[0];
+       uint8_t tag[];
 } QEMU_PACKED;
 
 #endif /* _LINUX_VIRTIO_9P_H */
index 2dcc90826ae7d30ccc7169355b43800c21551214..d7be3cf5e42f321afee34602c42aaf1b74b26a76 100644 (file)
@@ -40,6 +40,8 @@
 #define VIRTIO_BLK_F_MQ                12      /* support more than one vq */
 #define VIRTIO_BLK_F_DISCARD   13      /* DISCARD is supported */
 #define VIRTIO_BLK_F_WRITE_ZEROES      14      /* WRITE ZEROES is supported */
+#define VIRTIO_BLK_F_SECURE_ERASE      16 /* Secure Erase is supported */
+#define VIRTIO_BLK_F_ZONED             17      /* Zoned block device */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -119,6 +121,31 @@ struct virtio_blk_config {
        uint8_t write_zeroes_may_unmap;
 
        uint8_t unused1[3];
+
+       /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */
+       /*
+        * The maximum secure erase sectors (in 512-byte sectors) for
+        * one segment.
+        */
+       __virtio32 max_secure_erase_sectors;
+       /*
+        * The maximum number of secure erase segments in a
+        * secure erase command.
+        */
+       __virtio32 max_secure_erase_seg;
+       /* Secure erase commands must be aligned to this number of sectors. */
+       __virtio32 secure_erase_sector_alignment;
+
+       /* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */
+       struct virtio_blk_zoned_characteristics {
+               __virtio32 zone_sectors;
+               __virtio32 max_open_zones;
+               __virtio32 max_active_zones;
+               __virtio32 max_append_sectors;
+               __virtio32 write_granularity;
+               uint8_t model;
+               uint8_t unused2[3];
+       } zoned;
 } QEMU_PACKED;
 
 /*
@@ -153,6 +180,30 @@ struct virtio_blk_config {
 /* Write zeroes command */
 #define VIRTIO_BLK_T_WRITE_ZEROES      13
 
+/* Secure erase command */
+#define VIRTIO_BLK_T_SECURE_ERASE      14
+
+/* Zone append command */
+#define VIRTIO_BLK_T_ZONE_APPEND    15
+
+/* Report zones command */
+#define VIRTIO_BLK_T_ZONE_REPORT    16
+
+/* Open zone command */
+#define VIRTIO_BLK_T_ZONE_OPEN      18
+
+/* Close zone command */
+#define VIRTIO_BLK_T_ZONE_CLOSE     20
+
+/* Finish zone command */
+#define VIRTIO_BLK_T_ZONE_FINISH    22
+
+/* Reset zone command */
+#define VIRTIO_BLK_T_ZONE_RESET     24
+
+/* Reset All zones command */
+#define VIRTIO_BLK_T_ZONE_RESET_ALL 26
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER   0x80000000
@@ -172,6 +223,72 @@ struct virtio_blk_outhdr {
        __virtio64 sector;
 };
 
+/*
+ * Supported zoned device models.
+ */
+
+/* Regular block device */
+#define VIRTIO_BLK_Z_NONE      0
+/* Host-managed zoned device */
+#define VIRTIO_BLK_Z_HM        1
+/* Host-aware zoned device */
+#define VIRTIO_BLK_Z_HA        2
+
+/*
+ * Zone descriptor. A part of VIRTIO_BLK_T_ZONE_REPORT command reply.
+ */
+struct virtio_blk_zone_descriptor {
+       /* Zone capacity */
+       __virtio64 z_cap;
+       /* The starting sector of the zone */
+       __virtio64 z_start;
+       /* Zone write pointer position in sectors */
+       __virtio64 z_wp;
+       /* Zone type */
+       uint8_t z_type;
+       /* Zone state */
+       uint8_t z_state;
+       uint8_t reserved[38];
+};
+
+struct virtio_blk_zone_report {
+       __virtio64 nr_zones;
+       uint8_t reserved[56];
+       struct virtio_blk_zone_descriptor zones[];
+};
+
+/*
+ * Supported zone types.
+ */
+
+/* Conventional zone */
+#define VIRTIO_BLK_ZT_CONV         1
+/* Sequential Write Required zone */
+#define VIRTIO_BLK_ZT_SWR          2
+/* Sequential Write Preferred zone */
+#define VIRTIO_BLK_ZT_SWP          3
+
+/*
+ * Zone states that are available for zones of all types.
+ */
+
+/* Not a write pointer (conventional zones only) */
+#define VIRTIO_BLK_ZS_NOT_WP       0
+/* Empty */
+#define VIRTIO_BLK_ZS_EMPTY        1
+/* Implicitly Open */
+#define VIRTIO_BLK_ZS_IOPEN        2
+/* Explicitly Open */
+#define VIRTIO_BLK_ZS_EOPEN        3
+/* Closed */
+#define VIRTIO_BLK_ZS_CLOSED       4
+/* Read-Only */
+#define VIRTIO_BLK_ZS_RDONLY       13
+/* Full */
+#define VIRTIO_BLK_ZS_FULL         14
+/* Offline */
+#define VIRTIO_BLK_ZS_OFFLINE      15
+
 /* Unmap this range (only valid for write zeroes command) */
 #define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP     0x00000001
 
@@ -198,4 +315,11 @@ struct virtio_scsi_inhdr {
 #define VIRTIO_BLK_S_OK                0
 #define VIRTIO_BLK_S_IOERR     1
 #define VIRTIO_BLK_S_UNSUPP    2
+
+/* Error codes that are specific to zoned block devices */
+#define VIRTIO_BLK_S_ZONE_INVALID_CMD     3
+#define VIRTIO_BLK_S_ZONE_UNALIGNED_WP    4
+#define VIRTIO_BLK_S_ZONE_OPEN_RESOURCE   5
+#define VIRTIO_BLK_S_ZONE_ACTIVE_RESOURCE 6
+
 #endif /* _LINUX_VIRTIO_BLK_H */
diff --git a/include/standard-headers/linux/virtio_bt.h b/include/standard-headers/linux/virtio_bt.h
new file mode 100644 (file)
index 0000000..a11ecc3
--- /dev/null
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef _LINUX_VIRTIO_BT_H
+#define _LINUX_VIRTIO_BT_H
+
+#include "standard-headers/linux/virtio_types.h"
+
+/* Feature bits */
+#define VIRTIO_BT_F_VND_HCI    0       /* Indicates vendor command support */
+#define VIRTIO_BT_F_MSFT_EXT   1       /* Indicates MSFT vendor support */
+#define VIRTIO_BT_F_AOSP_EXT   2       /* Indicates AOSP vendor support */
+#define VIRTIO_BT_F_CONFIG_V2  3       /* Use second version configuration */
+
+enum virtio_bt_config_type {
+       VIRTIO_BT_CONFIG_TYPE_PRIMARY   = 0,
+       VIRTIO_BT_CONFIG_TYPE_AMP       = 1,
+};
+
+enum virtio_bt_config_vendor {
+       VIRTIO_BT_CONFIG_VENDOR_NONE    = 0,
+       VIRTIO_BT_CONFIG_VENDOR_ZEPHYR  = 1,
+       VIRTIO_BT_CONFIG_VENDOR_INTEL   = 2,
+       VIRTIO_BT_CONFIG_VENDOR_REALTEK = 3,
+};
+
+struct virtio_bt_config {
+       uint8_t  type;
+       uint16_t vendor;
+       uint16_t msft_opcode;
+} QEMU_PACKED;
+
+struct virtio_bt_config_v2 {
+       uint8_t  type;
+       uint8_t  alignment;
+       uint16_t vendor;
+       uint16_t msft_opcode;
+};
+
+#endif /* _LINUX_VIRTIO_BT_H */
index 22e3a85f6760920cb3d3b49d064a0b421b0e76cc..8a7d0dc8b0070343636a72926dbea66ceb01b184 100644 (file)
@@ -52,7 +52,7 @@
  * rest are per-device feature bits.
  */
 #define VIRTIO_TRANSPORT_F_START       28
-#define VIRTIO_TRANSPORT_F_END         38
+#define VIRTIO_TRANSPORT_F_END         41
 
 #ifndef VIRTIO_CONFIG_NO_LEGACY
 /* Do we get callbacks when the ring is completely used, even if we've
 /* This feature indicates support for the packed virtqueue layout. */
 #define VIRTIO_F_RING_PACKED           34
 
+/*
+ * Inorder feature indicates that all buffers are used by the device
+ * in the same order in which they have been made available.
+ */
+#define VIRTIO_F_IN_ORDER              35
+
 /*
  * This feature indicates that memory accesses by the driver and the
  * device are ordered in a way described by the platform.
  * Does the device support Single Root I/O Virtualization?
  */
 #define VIRTIO_F_SR_IOV                        37
+
+/*
+ * This feature indicates that the driver passes extra data (besides
+ * identifying the virtqueue) in its device notifications.
+ */
+#define VIRTIO_F_NOTIFICATION_DATA     38
+
+/*
+ * This feature indicates that the driver can reset a queue individually.
+ */
+#define VIRTIO_F_RING_RESET            40
 #endif /* _LINUX_VIRTIO_CONFIG_H */
index 5ff0b4ee5921f0ca19c895c78b0feb91524293e6..68066dafb6dd73d4220b41b65260176898812d82 100644 (file)
@@ -37,6 +37,7 @@
 #define VIRTIO_CRYPTO_SERVICE_HASH   1
 #define VIRTIO_CRYPTO_SERVICE_MAC    2
 #define VIRTIO_CRYPTO_SERVICE_AEAD   3
+#define VIRTIO_CRYPTO_SERVICE_AKCIPHER 4
 
 #define VIRTIO_CRYPTO_OPCODE(service, op)   (((service) << 8) | (op))
 
@@ -57,6 +58,10 @@ struct virtio_crypto_ctrl_header {
           VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02)
 #define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \
           VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03)
+#define VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION \
+          VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x04)
+#define VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION \
+          VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x05)
        uint32_t opcode;
        uint32_t algo;
        uint32_t flag;
@@ -180,6 +185,58 @@ struct virtio_crypto_aead_create_session_req {
        uint8_t padding[32];
 };
 
+struct virtio_crypto_rsa_session_para {
+#define VIRTIO_CRYPTO_RSA_RAW_PADDING   0
+#define VIRTIO_CRYPTO_RSA_PKCS1_PADDING 1
+       uint32_t padding_algo;
+
+#define VIRTIO_CRYPTO_RSA_NO_HASH   0
+#define VIRTIO_CRYPTO_RSA_MD2       1
+#define VIRTIO_CRYPTO_RSA_MD3       2
+#define VIRTIO_CRYPTO_RSA_MD4       3
+#define VIRTIO_CRYPTO_RSA_MD5       4
+#define VIRTIO_CRYPTO_RSA_SHA1      5
+#define VIRTIO_CRYPTO_RSA_SHA256    6
+#define VIRTIO_CRYPTO_RSA_SHA384    7
+#define VIRTIO_CRYPTO_RSA_SHA512    8
+#define VIRTIO_CRYPTO_RSA_SHA224    9
+       uint32_t hash_algo;
+};
+
+struct virtio_crypto_ecdsa_session_para {
+#define VIRTIO_CRYPTO_CURVE_UNKNOWN   0
+#define VIRTIO_CRYPTO_CURVE_NIST_P192 1
+#define VIRTIO_CRYPTO_CURVE_NIST_P224 2
+#define VIRTIO_CRYPTO_CURVE_NIST_P256 3
+#define VIRTIO_CRYPTO_CURVE_NIST_P384 4
+#define VIRTIO_CRYPTO_CURVE_NIST_P521 5
+       uint32_t curve_id;
+       uint32_t padding;
+};
+
+struct virtio_crypto_akcipher_session_para {
+#define VIRTIO_CRYPTO_NO_AKCIPHER    0
+#define VIRTIO_CRYPTO_AKCIPHER_RSA   1
+#define VIRTIO_CRYPTO_AKCIPHER_DSA   2
+#define VIRTIO_CRYPTO_AKCIPHER_ECDSA 3
+       uint32_t algo;
+
+#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC  1
+#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE 2
+       uint32_t keytype;
+       uint32_t keylen;
+
+       union {
+               struct virtio_crypto_rsa_session_para rsa;
+               struct virtio_crypto_ecdsa_session_para ecdsa;
+       } u;
+};
+
+struct virtio_crypto_akcipher_create_session_req {
+       struct virtio_crypto_akcipher_session_para para;
+       uint8_t padding[36];
+};
+
 struct virtio_crypto_alg_chain_session_para {
 #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER  1
 #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH  2
@@ -247,6 +304,8 @@ struct virtio_crypto_op_ctrl_req {
                        mac_create_session;
                struct virtio_crypto_aead_create_session_req
                        aead_create_session;
+               struct virtio_crypto_akcipher_create_session_req
+                       akcipher_create_session;
                struct virtio_crypto_destroy_session_req
                        destroy_session;
                uint8_t padding[56];
@@ -266,6 +325,14 @@ struct virtio_crypto_op_header {
        VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00)
 #define VIRTIO_CRYPTO_AEAD_DECRYPT \
        VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01)
+#define VIRTIO_CRYPTO_AKCIPHER_ENCRYPT \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00)
+#define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01)
+#define VIRTIO_CRYPTO_AKCIPHER_SIGN \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02)
+#define VIRTIO_CRYPTO_AKCIPHER_VERIFY \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x03)
        uint32_t opcode;
        /* algo should be service-specific algorithms */
        uint32_t algo;
@@ -390,6 +457,16 @@ struct virtio_crypto_aead_data_req {
        uint8_t padding[32];
 };
 
+struct virtio_crypto_akcipher_para {
+       uint32_t src_data_len;
+       uint32_t dst_data_len;
+};
+
+struct virtio_crypto_akcipher_data_req {
+       struct virtio_crypto_akcipher_para para;
+       uint8_t padding[40];
+};
+
 /* The request of the data virtqueue's packet */
 struct virtio_crypto_op_data_req {
        struct virtio_crypto_op_header header;
@@ -399,6 +476,7 @@ struct virtio_crypto_op_data_req {
                struct virtio_crypto_hash_data_req hash_req;
                struct virtio_crypto_mac_data_req mac_req;
                struct virtio_crypto_aead_data_req aead_req;
+               struct virtio_crypto_akcipher_data_req akcipher_req;
                uint8_t padding[48];
        } u;
 };
@@ -408,6 +486,8 @@ struct virtio_crypto_op_data_req {
 #define VIRTIO_CRYPTO_BADMSG    2
 #define VIRTIO_CRYPTO_NOTSUPP   3
 #define VIRTIO_CRYPTO_INVSESS   4 /* Invalid session id */
+#define VIRTIO_CRYPTO_NOSPC     5 /* no free session ID */
+#define VIRTIO_CRYPTO_KEY_REJECTED 6 /* Signature verification failed */
 
 /* The accelerator hardware is ready */
 #define VIRTIO_CRYPTO_S_HW_READY  (1 << 0)
@@ -438,7 +518,7 @@ struct virtio_crypto_config {
        uint32_t max_cipher_key_len;
        /* Maximum length of authenticated key */
        uint32_t max_auth_key_len;
-       uint32_t reserve;
+       uint32_t akcipher_algo;
        /* Maximum size of each crypto request's content */
        uint64_t max_size;
 };
diff --git a/include/standard-headers/linux/virtio_gpio.h b/include/standard-headers/linux/virtio_gpio.h
new file mode 100644 (file)
index 0000000..2b5cf06
--- /dev/null
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _LINUX_VIRTIO_GPIO_H
+#define _LINUX_VIRTIO_GPIO_H
+
+#include "standard-headers/linux/types.h"
+
+/* Virtio GPIO Feature bits */
+#define VIRTIO_GPIO_F_IRQ                      0
+
+/* Virtio GPIO request types */
+#define VIRTIO_GPIO_MSG_GET_NAMES              0x0001
+#define VIRTIO_GPIO_MSG_GET_DIRECTION          0x0002
+#define VIRTIO_GPIO_MSG_SET_DIRECTION          0x0003
+#define VIRTIO_GPIO_MSG_GET_VALUE              0x0004
+#define VIRTIO_GPIO_MSG_SET_VALUE              0x0005
+#define VIRTIO_GPIO_MSG_IRQ_TYPE               0x0006
+
+/* Possible values of the status field */
+#define VIRTIO_GPIO_STATUS_OK                  0x0
+#define VIRTIO_GPIO_STATUS_ERR                 0x1
+
+/* Direction types */
+#define VIRTIO_GPIO_DIRECTION_NONE             0x00
+#define VIRTIO_GPIO_DIRECTION_OUT              0x01
+#define VIRTIO_GPIO_DIRECTION_IN               0x02
+
+/* Virtio GPIO IRQ types */
+#define VIRTIO_GPIO_IRQ_TYPE_NONE              0x00
+#define VIRTIO_GPIO_IRQ_TYPE_EDGE_RISING       0x01
+#define VIRTIO_GPIO_IRQ_TYPE_EDGE_FALLING      0x02
+#define VIRTIO_GPIO_IRQ_TYPE_EDGE_BOTH         0x03
+#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_HIGH                0x04
+#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_LOW         0x08
+
+struct virtio_gpio_config {
+       uint16_t ngpio;
+       uint8_t padding[2];
+       uint32_t gpio_names_size;
+};
+
+/* Virtio GPIO Request / Response */
+struct virtio_gpio_request {
+       uint16_t type;
+       uint16_t gpio;
+       uint32_t value;
+};
+
+struct virtio_gpio_response {
+       uint8_t status;
+       uint8_t value;
+};
+
+struct virtio_gpio_response_get_names {
+       uint8_t status;
+       uint8_t value[];
+};
+
+/* Virtio GPIO IRQ Request / Response */
+struct virtio_gpio_irq_request {
+       uint16_t gpio;
+};
+
+struct virtio_gpio_irq_response {
+       uint8_t status;
+};
+
+/* Possible values of the interrupt status field */
+#define VIRTIO_GPIO_IRQ_STATUS_INVALID         0x0
+#define VIRTIO_GPIO_IRQ_STATUS_VALID           0x1
+
+#endif /* _LINUX_VIRTIO_GPIO_H */
index 1357e4774ea65573eae228a33b08600ea3436157..2da48d3d4c2cc3d631a913224ca1c447aa64277e 100644 (file)
  * VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB
  */
 #define VIRTIO_GPU_F_RESOURCE_BLOB       3
+/*
+ * VIRTIO_GPU_CMD_CREATE_CONTEXT with
+ * context_init and multiple timelines
+ */
+#define VIRTIO_GPU_F_CONTEXT_INIT        4
 
 enum virtio_gpu_ctrl_type {
        VIRTIO_GPU_UNDEFINED = 0,
@@ -122,14 +127,20 @@ enum virtio_gpu_shm_id {
        VIRTIO_GPU_SHM_ID_HOST_VISIBLE = 1
 };
 
-#define VIRTIO_GPU_FLAG_FENCE (1 << 0)
+#define VIRTIO_GPU_FLAG_FENCE         (1 << 0)
+/*
+ * If the following flag is set, then ring_idx contains the index
+ * of the command ring that needs to used when creating the fence
+ */
+#define VIRTIO_GPU_FLAG_INFO_RING_IDX (1 << 1)
 
 struct virtio_gpu_ctrl_hdr {
        uint32_t type;
        uint32_t flags;
        uint64_t fence_id;
        uint32_t ctx_id;
-       uint32_t padding;
+       uint8_t ring_idx;
+       uint8_t padding[3];
 };
 
 /* data passed in the cursor vq */
@@ -269,10 +280,11 @@ struct virtio_gpu_resource_create_3d {
 };
 
 /* VIRTIO_GPU_CMD_CTX_CREATE */
+#define VIRTIO_GPU_CONTEXT_INIT_CAPSET_ID_MASK 0x000000ff
 struct virtio_gpu_ctx_create {
        struct virtio_gpu_ctrl_hdr hdr;
        uint32_t nlen;
-       uint32_t padding;
+       uint32_t context_init;
        char debug_name[64];
 };
 
diff --git a/include/standard-headers/linux/virtio_i2c.h b/include/standard-headers/linux/virtio_i2c.h
new file mode 100644 (file)
index 0000000..09fa907
--- /dev/null
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */
+/*
+ * Definitions for virtio I2C Adpter
+ *
+ * Copyright (c) 2021 Intel Corporation. All rights reserved.
+ */
+
+#ifndef _LINUX_VIRTIO_I2C_H
+#define _LINUX_VIRTIO_I2C_H
+
+#include "standard-headers/linux/const.h"
+#include "standard-headers/linux/types.h"
+
+/* Virtio I2C Feature bits */
+#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST       0
+
+/* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */
+#define VIRTIO_I2C_FLAGS_FAIL_NEXT     _BITUL(0)
+
+/* The bit 1 of the @virtio_i2c_out_hdr.@flags, used to mark a buffer as read */
+#define VIRTIO_I2C_FLAGS_M_RD          _BITUL(1)
+
+/**
+ * struct virtio_i2c_out_hdr - the virtio I2C message OUT header
+ * @addr: the controlled device address
+ * @padding: used to pad to full dword
+ * @flags: used for feature extensibility
+ */
+struct virtio_i2c_out_hdr {
+       uint16_t addr;
+       uint16_t padding;
+       uint32_t flags;
+};
+
+/**
+ * struct virtio_i2c_in_hdr - the virtio I2C message IN header
+ * @status: the processing result from the backend
+ */
+struct virtio_i2c_in_hdr {
+       uint8_t status;
+};
+
+/* The final status written by the device */
+#define VIRTIO_I2C_MSG_OK      0
+#define VIRTIO_I2C_MSG_ERR     1
+
+#endif /* _LINUX_VIRTIO_I2C_H */
index bc1c0621f5edd18ee6e3c9328f4d8d815d51dc65..7aa2eb76620508fdc915533f74973d76308d3ef5 100644 (file)
 #define VIRTIO_ID_PSTORE               22 /* virtio pstore device */
 #define VIRTIO_ID_IOMMU                        23 /* virtio IOMMU */
 #define VIRTIO_ID_MEM                  24 /* virtio mem */
+#define VIRTIO_ID_SOUND                        25 /* virtio sound */
 #define VIRTIO_ID_FS                   26 /* virtio filesystem */
 #define VIRTIO_ID_PMEM                 27 /* virtio pmem */
+#define VIRTIO_ID_RPMB                 28 /* virtio rpmb */
 #define VIRTIO_ID_MAC80211_HWSIM       29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_VIDEO_ENCODER                30 /* virtio video encoder */
+#define VIRTIO_ID_VIDEO_DECODER                31 /* virtio video decoder */
+#define VIRTIO_ID_SCMI                 32 /* virtio SCMI */
+#define VIRTIO_ID_NITRO_SEC_MOD                33 /* virtio nitro secure module*/
+#define VIRTIO_ID_I2C_ADAPTER          34 /* virtio i2c adapter */
+#define VIRTIO_ID_WATCHDOG             35 /* virtio watchdog */
+#define VIRTIO_ID_CAN                  36 /* virtio can */
+#define VIRTIO_ID_DMABUF               37 /* virtio dmabuf */
+#define VIRTIO_ID_PARAM_SERV           38 /* virtio parameter server */
+#define VIRTIO_ID_AUDIO_POLICY         39 /* virtio audio policy */
+#define VIRTIO_ID_BT                   40 /* virtio bluetooth */
+#define VIRTIO_ID_GPIO                 41 /* virtio gpio */
+
+/*
+ * Virtio Transitional IDs
+ */
+
+#define VIRTIO_TRANS_ID_NET            0x1000 /* transitional virtio net */
+#define VIRTIO_TRANS_ID_BLOCK          0x1001 /* transitional virtio block */
+#define VIRTIO_TRANS_ID_BALLOON                0x1002 /* transitional virtio balloon */
+#define VIRTIO_TRANS_ID_CONSOLE                0x1003 /* transitional virtio console */
+#define VIRTIO_TRANS_ID_SCSI           0x1004 /* transitional virtio SCSI */
+#define VIRTIO_TRANS_ID_RNG            0x1005 /* transitional virtio rng */
+#define VIRTIO_TRANS_ID_9P             0x1009 /* transitional virtio 9p console */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
index b9443b83a13a07a208530cf7b95a5ef3f67c5d84..366379c2f077047b34c11e473d54544466f729ad 100644 (file)
@@ -16,6 +16,7 @@
 #define VIRTIO_IOMMU_F_BYPASS                  3
 #define VIRTIO_IOMMU_F_PROBE                   4
 #define VIRTIO_IOMMU_F_MMIO                    5
+#define VIRTIO_IOMMU_F_BYPASS_CONFIG           6
 
 struct virtio_iommu_range_64 {
        uint64_t                                        start;
@@ -36,6 +37,8 @@ struct virtio_iommu_config {
        struct virtio_iommu_range_32            domain_range;
        /* Probe buffer size */
        uint32_t                                        probe_size;
+       uint8_t                                 bypass;
+       uint8_t                                 reserved[3];
 };
 
 /* Request types */
@@ -66,11 +69,14 @@ struct virtio_iommu_req_tail {
        uint8_t                                 reserved[3];
 };
 
+#define VIRTIO_IOMMU_ATTACH_F_BYPASS           (1 << 0)
+
 struct virtio_iommu_req_attach {
        struct virtio_iommu_req_head            head;
        uint32_t                                        domain;
        uint32_t                                        endpoint;
-       uint8_t                                 reserved[8];
+       uint32_t                                        flags;
+       uint8_t                                 reserved[4];
        struct virtio_iommu_req_tail            tail;
 };
 
index 05e5ade75d3d8d2533c63d4fb4fe1c9026d86751..18c74c527c8ab19ff3d7f1b8601fdc665d4df68d 100644 (file)
  * explicitly triggered (VIRTIO_MEM_REQ_UNPLUG).
  *
  * There are no guarantees what will happen if unplugged memory is
- * read/written. Such memory should, in general, not be touched. E.g.,
- * even writing might succeed, but the values will simply be discarded at
- * random points in time.
+ * read/written. In general, unplugged memory should not be touched, because
+ * the resulting action is undefined. There is one exception: without
+ * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, unplugged memory inside the usable
+ * region can be read, to simplify creation of memory dumps.
  *
  * It can happen that the device cannot process a request, because it is
  * busy. The device driver has to retry later.
@@ -87,6 +88,8 @@
 
 /* node_id is an ACPI PXM and is valid */
 #define VIRTIO_MEM_F_ACPI_PXM          0
+/* unplugged memory must not be accessed */
+#define VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE    1
 
 
 /* --- virtio-mem: guest -> host requests --- */
index e0a070518f39d2b4b227e1a38a285bd0f34b0a2c..0f8841774238f0a503fc671e6dfb5518de3c5505 100644 (file)
 #define VIRTIO_NET_F_MQ        22      /* Device supports Receive Flow
                                         * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23  /* Set MAC address */
-
+#define VIRTIO_NET_F_VQ_NOTF_COAL 52   /* Device supports virtqueue notification coalescing */
+#define VIRTIO_NET_F_NOTF_COAL 53      /* Device supports notifications coalescing */
+#define VIRTIO_NET_F_GUEST_USO4        54      /* Guest can handle USOv4 in. */
+#define VIRTIO_NET_F_GUEST_USO6        55      /* Guest can handle USOv6 in. */
+#define VIRTIO_NET_F_HOST_USO  56      /* Host can handle USO in. */
 #define VIRTIO_NET_F_HASH_REPORT  57   /* Supports hash report */
+#define VIRTIO_NET_F_GUEST_HDRLEN  59  /* Guest provides the exact hdr_len value. */
 #define VIRTIO_NET_F_RSS         60    /* Supports RSS RX steering */
 #define VIRTIO_NET_F_RSC_EXT     61    /* extended coalescing info */
 #define VIRTIO_NET_F_STANDBY     62    /* Act as standby for another device
@@ -130,6 +135,7 @@ struct virtio_net_hdr_v1 {
 #define VIRTIO_NET_HDR_GSO_TCPV4       1       /* GSO frame, IPv4 TCP (TSO) */
 #define VIRTIO_NET_HDR_GSO_UDP         3       /* GSO frame, IPv4 UDP (UFO) */
 #define VIRTIO_NET_HDR_GSO_TCPV6       4       /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_UDP_L4      5       /* GSO frame, IPv4& IPv6 UDP (USO) */
 #define VIRTIO_NET_HDR_GSO_ECN         0x80    /* TCP has ECN set */
        uint8_t gso_type;
        __virtio16 hdr_len;     /* Ethernet + IP + tcp/udp hdrs */
@@ -355,4 +361,49 @@ struct virtio_net_hash_config {
 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
 
+/*
+ * Control notifications coalescing.
+ *
+ * Request the device to change the notifications coalescing parameters.
+ *
+ * Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
+ */
+#define VIRTIO_NET_CTRL_NOTF_COAL              6
+/*
+ * Set the tx-usecs/tx-max-packets parameters.
+ */
+struct virtio_net_ctrl_coal_tx {
+       /* Maximum number of packets to send before a TX notification */
+       uint32_t tx_max_packets;
+       /* Maximum number of usecs to delay a TX notification */
+       uint32_t tx_usecs;
+};
+
+#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET               0
+
+/*
+ * Set the rx-usecs/rx-max-packets parameters.
+ */
+struct virtio_net_ctrl_coal_rx {
+       /* Maximum number of packets to receive before a RX notification */
+       uint32_t rx_max_packets;
+       /* Maximum number of usecs to delay a RX notification */
+       uint32_t rx_usecs;
+};
+
+#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET               1
+#define VIRTIO_NET_CTRL_NOTF_COAL_VQ_SET               2
+#define VIRTIO_NET_CTRL_NOTF_COAL_VQ_GET               3
+
+struct virtio_net_ctrl_coal {
+       uint32_t max_packets;
+       uint32_t max_usecs;
+};
+
+struct  virtio_net_ctrl_coal_vq {
+       uint16_t vqn;
+       uint16_t reserved;
+       struct virtio_net_ctrl_coal coal;
+};
+
 #endif /* _LINUX_VIRTIO_NET_H */
index db7a8e2fcbf2639bdafa15d78693ec339eb0c430..be912cfc957cd78f0cade4c99ad03c0513e03c4d 100644 (file)
@@ -202,6 +202,8 @@ struct virtio_pci_cfg_cap {
 #define VIRTIO_PCI_COMMON_Q_AVAILHI    44
 #define VIRTIO_PCI_COMMON_Q_USEDLO     48
 #define VIRTIO_PCI_COMMON_Q_USEDHI     52
+#define VIRTIO_PCI_COMMON_Q_NDATA      56
+#define VIRTIO_PCI_COMMON_Q_RESET      58
 
 #endif /* VIRTIO_PCI_NO_MODERN */
 
diff --git a/include/standard-headers/linux/virtio_pcidev.h b/include/standard-headers/linux/virtio_pcidev.h
new file mode 100644 (file)
index 0000000..bdf1d06
--- /dev/null
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/*
+ * Copyright (C) 2021 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#ifndef _LINUX_VIRTIO_PCIDEV_H
+#define _LINUX_VIRTIO_PCIDEV_H
+#include "standard-headers/linux/types.h"
+
+/**
+ * enum virtio_pcidev_ops - virtual PCI device operations
+ * @VIRTIO_PCIDEV_OP_RESERVED: reserved to catch errors
+ * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8;
+ *     the @data field should be filled in by the device (in little endian).
+ * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8;
+ *     the @data field contains the data to write (in little endian).
+ * @VIRTIO_PCIDEV_OP_MMIO_READ: read BAR mem/pio, size can be variable;
+ *     the @data field should be filled in by the device (in little endian).
+ * @VIRTIO_PCIDEV_OP_MMIO_WRITE: write BAR mem/pio, size can be variable;
+ *     the @data field contains the data to write (in little endian).
+ * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but
+ *     the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE)
+ * @VIRTIO_PCIDEV_OP_INT: legacy INTx# pin interrupt, the addr field is 1-4 for
+ *     the number
+ * @VIRTIO_PCIDEV_OP_MSI: MSI(-X) interrupt, this message basically transports
+ *     the 16- or 32-bit write that would otherwise be done into memory,
+ *     analogous to the write messages (@VIRTIO_PCIDEV_OP_MMIO_WRITE) above
+ * @VIRTIO_PCIDEV_OP_PME: Dummy message whose content is ignored (and should be
+ *     all zeroes) to signal the PME# pin.
+ */
+enum virtio_pcidev_ops {
+       VIRTIO_PCIDEV_OP_RESERVED = 0,
+       VIRTIO_PCIDEV_OP_CFG_READ,
+       VIRTIO_PCIDEV_OP_CFG_WRITE,
+       VIRTIO_PCIDEV_OP_MMIO_READ,
+       VIRTIO_PCIDEV_OP_MMIO_WRITE,
+       VIRTIO_PCIDEV_OP_MMIO_MEMSET,
+       VIRTIO_PCIDEV_OP_INT,
+       VIRTIO_PCIDEV_OP_MSI,
+       VIRTIO_PCIDEV_OP_PME,
+};
+
+/**
+ * struct virtio_pcidev_msg - virtio PCI device operation
+ * @op: the operation to do
+ * @bar: the bar (only with BAR read/write messages)
+ * @reserved: reserved
+ * @size: the size of the read/write (in bytes)
+ * @addr: the address to read/write
+ * @data: the data, normally @size long, but just one byte for
+ *     %VIRTIO_PCIDEV_OP_MMIO_MEMSET
+ *
+ * Note: the fields are all in native (CPU) endian, however, the
+ * @data values will often be in little endian (see the ops above.)
+ */
+struct virtio_pcidev_msg {
+       uint8_t op;
+       uint8_t bar;
+       uint16_t reserved;
+       uint32_t size;
+       uint64_t addr;
+       uint8_t data[];
+};
+
+#endif /* _LINUX_VIRTIO_PCIDEV_H */
index 0fa0e1067ffe56c40a4034ed036893146008f0f5..22f6eb8ca710d3f35060e26df99dd4beb6fb95f1 100644 (file)
 #define VRING_USED_ALIGN_SIZE 4
 #define VRING_DESC_ALIGN_SIZE 16
 
-/* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
+/**
+ * struct vring_desc - Virtio ring descriptors,
+ * 16 bytes long. These can chain together via @next.
+ *
+ * @addr: buffer address (guest-physical)
+ * @len: buffer length
+ * @flags: descriptor flags
+ * @next: index of the next descriptor in the chain,
+ *        if the VRING_DESC_F_NEXT flag is set. We chain unused
+ *        descriptors via this, too.
+ */
 struct vring_desc {
-       /* Address (guest-physical). */
        __virtio64 addr;
-       /* Length. */
        __virtio32 len;
-       /* The flags as indicated above. */
        __virtio16 flags;
-       /* We chain unused descriptors via this, too */
        __virtio16 next;
 };
 
diff --git a/include/standard-headers/linux/virtio_scmi.h b/include/standard-headers/linux/virtio_scmi.h
new file mode 100644 (file)
index 0000000..8f2c305
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/*
+ * Copyright (C) 2020-2021 OpenSynergy GmbH
+ * Copyright (C) 2021 ARM Ltd.
+ */
+
+#ifndef _LINUX_VIRTIO_SCMI_H
+#define _LINUX_VIRTIO_SCMI_H
+
+#include "standard-headers/linux/virtio_types.h"
+
+/* Device implements some SCMI notifications, or delayed responses. */
+#define VIRTIO_SCMI_F_P2A_CHANNELS 0
+
+/* Device implements any SCMI statistics shared memory region */
+#define VIRTIO_SCMI_F_SHARED_MEMORY 1
+
+/* Virtqueues */
+
+#define VIRTIO_SCMI_VQ_TX 0 /* cmdq */
+#define VIRTIO_SCMI_VQ_RX 1 /* eventq */
+#define VIRTIO_SCMI_VQ_MAX_CNT 2
+
+#endif /* _LINUX_VIRTIO_SCMI_H */
diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h
new file mode 100644 (file)
index 0000000..1af96b9
--- /dev/null
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * Copyright (C) 2021 OpenSynergy GmbH
+ */
+#ifndef VIRTIO_SND_IF_H
+#define VIRTIO_SND_IF_H
+
+#include "standard-headers/linux/virtio_types.h"
+
+/*******************************************************************************
+ * CONFIGURATION SPACE
+ */
+struct virtio_snd_config {
+       /* # of available physical jacks */
+       uint32_t jacks;
+       /* # of available PCM streams */
+       uint32_t streams;
+       /* # of available channel maps */
+       uint32_t chmaps;
+};
+
+enum {
+       /* device virtqueue indexes */
+       VIRTIO_SND_VQ_CONTROL = 0,
+       VIRTIO_SND_VQ_EVENT,
+       VIRTIO_SND_VQ_TX,
+       VIRTIO_SND_VQ_RX,
+       /* # of device virtqueues */
+       VIRTIO_SND_VQ_MAX
+};
+
+/*******************************************************************************
+ * COMMON DEFINITIONS
+ */
+
+/* supported dataflow directions */
+enum {
+       VIRTIO_SND_D_OUTPUT = 0,
+       VIRTIO_SND_D_INPUT
+};
+
+enum {
+       /* jack control request types */
+       VIRTIO_SND_R_JACK_INFO = 1,
+       VIRTIO_SND_R_JACK_REMAP,
+
+       /* PCM control request types */
+       VIRTIO_SND_R_PCM_INFO = 0x0100,
+       VIRTIO_SND_R_PCM_SET_PARAMS,
+       VIRTIO_SND_R_PCM_PREPARE,
+       VIRTIO_SND_R_PCM_RELEASE,
+       VIRTIO_SND_R_PCM_START,
+       VIRTIO_SND_R_PCM_STOP,
+
+       /* channel map control request types */
+       VIRTIO_SND_R_CHMAP_INFO = 0x0200,
+
+       /* jack event types */
+       VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000,
+       VIRTIO_SND_EVT_JACK_DISCONNECTED,
+
+       /* PCM event types */
+       VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100,
+       VIRTIO_SND_EVT_PCM_XRUN,
+
+       /* common status codes */
+       VIRTIO_SND_S_OK = 0x8000,
+       VIRTIO_SND_S_BAD_MSG,
+       VIRTIO_SND_S_NOT_SUPP,
+       VIRTIO_SND_S_IO_ERR
+};
+
+/* common header */
+struct virtio_snd_hdr {
+       uint32_t code;
+};
+
+/* event notification */
+struct virtio_snd_event {
+       /* VIRTIO_SND_EVT_XXX */
+       struct virtio_snd_hdr hdr;
+       /* optional event data */
+       uint32_t data;
+};
+
+/* common control request to query an item information */
+struct virtio_snd_query_info {
+       /* VIRTIO_SND_R_XXX_INFO */
+       struct virtio_snd_hdr hdr;
+       /* item start identifier */
+       uint32_t start_id;
+       /* item count to query */
+       uint32_t count;
+       /* item information size in bytes */
+       uint32_t size;
+};
+
+/* common item information header */
+struct virtio_snd_info {
+       /* function group node id (High Definition Audio Specification 7.1.2) */
+       uint32_t hda_fn_nid;
+};
+
+/*******************************************************************************
+ * JACK CONTROL MESSAGES
+ */
+struct virtio_snd_jack_hdr {
+       /* VIRTIO_SND_R_JACK_XXX */
+       struct virtio_snd_hdr hdr;
+       /* 0 ... virtio_snd_config::jacks - 1 */
+       uint32_t jack_id;
+};
+
+/* supported jack features */
+enum {
+       VIRTIO_SND_JACK_F_REMAP = 0
+};
+
+struct virtio_snd_jack_info {
+       /* common header */
+       struct virtio_snd_info hdr;
+       /* supported feature bit map (1 << VIRTIO_SND_JACK_F_XXX) */
+       uint32_t features;
+       /* pin configuration (High Definition Audio Specification 7.3.3.31) */
+       uint32_t hda_reg_defconf;
+       /* pin capabilities (High Definition Audio Specification 7.3.4.9) */
+       uint32_t hda_reg_caps;
+       /* current jack connection status (0: disconnected, 1: connected) */
+       uint8_t connected;
+
+       uint8_t padding[7];
+};
+
+/* jack remapping control request */
+struct virtio_snd_jack_remap {
+       /* .code = VIRTIO_SND_R_JACK_REMAP */
+       struct virtio_snd_jack_hdr hdr;
+       /* selected association number */
+       uint32_t association;
+       /* selected sequence number */
+       uint32_t sequence;
+};
+
+/*******************************************************************************
+ * PCM CONTROL MESSAGES
+ */
+struct virtio_snd_pcm_hdr {
+       /* VIRTIO_SND_R_PCM_XXX */
+       struct virtio_snd_hdr hdr;
+       /* 0 ... virtio_snd_config::streams - 1 */
+       uint32_t stream_id;
+};
+
+/* supported PCM stream features */
+enum {
+       VIRTIO_SND_PCM_F_SHMEM_HOST = 0,
+       VIRTIO_SND_PCM_F_SHMEM_GUEST,
+       VIRTIO_SND_PCM_F_MSG_POLLING,
+       VIRTIO_SND_PCM_F_EVT_SHMEM_PERIODS,
+       VIRTIO_SND_PCM_F_EVT_XRUNS
+};
+
+/* supported PCM sample formats */
+enum {
+       /* analog formats (width / physical width) */
+       VIRTIO_SND_PCM_FMT_IMA_ADPCM = 0,       /*  4 /  4 bits */
+       VIRTIO_SND_PCM_FMT_MU_LAW,              /*  8 /  8 bits */
+       VIRTIO_SND_PCM_FMT_A_LAW,               /*  8 /  8 bits */
+       VIRTIO_SND_PCM_FMT_S8,                  /*  8 /  8 bits */
+       VIRTIO_SND_PCM_FMT_U8,                  /*  8 /  8 bits */
+       VIRTIO_SND_PCM_FMT_S16,                 /* 16 / 16 bits */
+       VIRTIO_SND_PCM_FMT_U16,                 /* 16 / 16 bits */
+       VIRTIO_SND_PCM_FMT_S18_3,               /* 18 / 24 bits */
+       VIRTIO_SND_PCM_FMT_U18_3,               /* 18 / 24 bits */
+       VIRTIO_SND_PCM_FMT_S20_3,               /* 20 / 24 bits */
+       VIRTIO_SND_PCM_FMT_U20_3,               /* 20 / 24 bits */
+       VIRTIO_SND_PCM_FMT_S24_3,               /* 24 / 24 bits */
+       VIRTIO_SND_PCM_FMT_U24_3,               /* 24 / 24 bits */
+       VIRTIO_SND_PCM_FMT_S20,                 /* 20 / 32 bits */
+       VIRTIO_SND_PCM_FMT_U20,                 /* 20 / 32 bits */
+       VIRTIO_SND_PCM_FMT_S24,                 /* 24 / 32 bits */
+       VIRTIO_SND_PCM_FMT_U24,                 /* 24 / 32 bits */
+       VIRTIO_SND_PCM_FMT_S32,                 /* 32 / 32 bits */
+       VIRTIO_SND_PCM_FMT_U32,                 /* 32 / 32 bits */
+       VIRTIO_SND_PCM_FMT_FLOAT,               /* 32 / 32 bits */
+       VIRTIO_SND_PCM_FMT_FLOAT64,             /* 64 / 64 bits */
+       /* digital formats (width / physical width) */
+       VIRTIO_SND_PCM_FMT_DSD_U8,              /*  8 /  8 bits */
+       VIRTIO_SND_PCM_FMT_DSD_U16,             /* 16 / 16 bits */
+       VIRTIO_SND_PCM_FMT_DSD_U32,             /* 32 / 32 bits */
+       VIRTIO_SND_PCM_FMT_IEC958_SUBFRAME      /* 32 / 32 bits */
+};
+
+/* supported PCM frame rates */
+enum {
+       VIRTIO_SND_PCM_RATE_5512 = 0,
+       VIRTIO_SND_PCM_RATE_8000,
+       VIRTIO_SND_PCM_RATE_11025,
+       VIRTIO_SND_PCM_RATE_16000,
+       VIRTIO_SND_PCM_RATE_22050,
+       VIRTIO_SND_PCM_RATE_32000,
+       VIRTIO_SND_PCM_RATE_44100,
+       VIRTIO_SND_PCM_RATE_48000,
+       VIRTIO_SND_PCM_RATE_64000,
+       VIRTIO_SND_PCM_RATE_88200,
+       VIRTIO_SND_PCM_RATE_96000,
+       VIRTIO_SND_PCM_RATE_176400,
+       VIRTIO_SND_PCM_RATE_192000,
+       VIRTIO_SND_PCM_RATE_384000
+};
+
+struct virtio_snd_pcm_info {
+       /* common header */
+       struct virtio_snd_info hdr;
+       /* supported feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */
+       uint32_t features;
+       /* supported sample format bit map (1 << VIRTIO_SND_PCM_FMT_XXX) */
+       uint64_t formats;
+       /* supported frame rate bit map (1 << VIRTIO_SND_PCM_RATE_XXX) */
+       uint64_t rates;
+       /* dataflow direction (VIRTIO_SND_D_XXX) */
+       uint8_t direction;
+       /* minimum # of supported channels */
+       uint8_t channels_min;
+       /* maximum # of supported channels */
+       uint8_t channels_max;
+
+       uint8_t padding[5];
+};
+
+/* set PCM stream format */
+struct virtio_snd_pcm_set_params {
+       /* .code = VIRTIO_SND_R_PCM_SET_PARAMS */
+       struct virtio_snd_pcm_hdr hdr;
+       /* size of the hardware buffer */
+       uint32_t buffer_bytes;
+       /* size of the hardware period */
+       uint32_t period_bytes;
+       /* selected feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */
+       uint32_t features;
+       /* selected # of channels */
+       uint8_t channels;
+       /* selected sample format (VIRTIO_SND_PCM_FMT_XXX) */
+       uint8_t format;
+       /* selected frame rate (VIRTIO_SND_PCM_RATE_XXX) */
+       uint8_t rate;
+
+       uint8_t padding;
+};
+
+/*******************************************************************************
+ * PCM I/O MESSAGES
+ */
+
+/* I/O request header */
+struct virtio_snd_pcm_xfer {
+       /* 0 ... virtio_snd_config::streams - 1 */
+       uint32_t stream_id;
+};
+
+/* I/O request status */
+struct virtio_snd_pcm_status {
+       /* VIRTIO_SND_S_XXX */
+       uint32_t status;
+       /* current device latency */
+       uint32_t latency_bytes;
+};
+
+/*******************************************************************************
+ * CHANNEL MAP CONTROL MESSAGES
+ */
+struct virtio_snd_chmap_hdr {
+       /* VIRTIO_SND_R_CHMAP_XXX */
+       struct virtio_snd_hdr hdr;
+       /* 0 ... virtio_snd_config::chmaps - 1 */
+       uint32_t chmap_id;
+};
+
+/* standard channel position definition */
+enum {
+       VIRTIO_SND_CHMAP_NONE = 0,      /* undefined */
+       VIRTIO_SND_CHMAP_NA,            /* silent */
+       VIRTIO_SND_CHMAP_MONO,          /* mono stream */
+       VIRTIO_SND_CHMAP_FL,            /* front left */
+       VIRTIO_SND_CHMAP_FR,            /* front right */
+       VIRTIO_SND_CHMAP_RL,            /* rear left */
+       VIRTIO_SND_CHMAP_RR,            /* rear right */
+       VIRTIO_SND_CHMAP_FC,            /* front center */
+       VIRTIO_SND_CHMAP_LFE,           /* low frequency (LFE) */
+       VIRTIO_SND_CHMAP_SL,            /* side left */
+       VIRTIO_SND_CHMAP_SR,            /* side right */
+       VIRTIO_SND_CHMAP_RC,            /* rear center */
+       VIRTIO_SND_CHMAP_FLC,           /* front left center */
+       VIRTIO_SND_CHMAP_FRC,           /* front right center */
+       VIRTIO_SND_CHMAP_RLC,           /* rear left center */
+       VIRTIO_SND_CHMAP_RRC,           /* rear right center */
+       VIRTIO_SND_CHMAP_FLW,           /* front left wide */
+       VIRTIO_SND_CHMAP_FRW,           /* front right wide */
+       VIRTIO_SND_CHMAP_FLH,           /* front left high */
+       VIRTIO_SND_CHMAP_FCH,           /* front center high */
+       VIRTIO_SND_CHMAP_FRH,           /* front right high */
+       VIRTIO_SND_CHMAP_TC,            /* top center */
+       VIRTIO_SND_CHMAP_TFL,           /* top front left */
+       VIRTIO_SND_CHMAP_TFR,           /* top front right */
+       VIRTIO_SND_CHMAP_TFC,           /* top front center */
+       VIRTIO_SND_CHMAP_TRL,           /* top rear left */
+       VIRTIO_SND_CHMAP_TRR,           /* top rear right */
+       VIRTIO_SND_CHMAP_TRC,           /* top rear center */
+       VIRTIO_SND_CHMAP_TFLC,          /* top front left center */
+       VIRTIO_SND_CHMAP_TFRC,          /* top front right center */
+       VIRTIO_SND_CHMAP_TSL,           /* top side left */
+       VIRTIO_SND_CHMAP_TSR,           /* top side right */
+       VIRTIO_SND_CHMAP_LLFE,          /* left LFE */
+       VIRTIO_SND_CHMAP_RLFE,          /* right LFE */
+       VIRTIO_SND_CHMAP_BC,            /* bottom center */
+       VIRTIO_SND_CHMAP_BLC,           /* bottom left center */
+       VIRTIO_SND_CHMAP_BRC            /* bottom right center */
+};
+
+/* maximum possible number of channels */
+#define VIRTIO_SND_CHMAP_MAX_SIZE      18
+
+struct virtio_snd_chmap_info {
+       /* common header */
+       struct virtio_snd_info hdr;
+       /* dataflow direction (VIRTIO_SND_D_XXX) */
+       uint8_t direction;
+       /* # of valid channel position values */
+       uint8_t channels;
+       /* channel position values (VIRTIO_SND_CHMAP_XXX) */
+       uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE];
+};
+
+#endif /* VIRTIO_SND_IF_H */
index be443211ce96c17b4e1ef40335b2d686d5ad4f5e..467e751b17892ec87dff622f2ec3ec2cd6e7dc72 100644 (file)
@@ -38,6 +38,9 @@
 #include "standard-headers/linux/virtio_ids.h"
 #include "standard-headers/linux/virtio_config.h"
 
+/* The feature bitmap for virtio vsock */
+#define VIRTIO_VSOCK_F_SEQPACKET       1       /* SOCK_SEQPACKET supported */
+
 struct virtio_vsock_config {
        uint64_t guest_cid;
 } QEMU_PACKED;
@@ -65,6 +68,7 @@ struct virtio_vsock_hdr {
 
 enum virtio_vsock_type {
        VIRTIO_VSOCK_TYPE_STREAM = 1,
+       VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
 };
 
 enum virtio_vsock_op {
@@ -91,4 +95,10 @@ enum virtio_vsock_shutdown {
        VIRTIO_VSOCK_SHUTDOWN_SEND = 2,
 };
 
+/* VIRTIO_VSOCK_OP_RW flags values */
+enum virtio_vsock_rw {
+       VIRTIO_VSOCK_SEQ_EOM = 1,
+       VIRTIO_VSOCK_SEQ_EOR = 2,
+};
+
 #endif /* _LINUX_VIRTIO_VSOCK_H */
diff --git a/include/standard-headers/rdma/vmw_pvrdma-abi.h b/include/standard-headers/rdma/vmw_pvrdma-abi.h
new file mode 100644 (file)
index 0000000..c30182a
--- /dev/null
@@ -0,0 +1,310 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __VMW_PVRDMA_ABI_H__
+#define __VMW_PVRDMA_ABI_H__
+
+#include "standard-headers/linux/types.h"
+
+#define PVRDMA_UVERBS_ABI_VERSION      3               /* ABI Version. */
+#define PVRDMA_UAR_HANDLE_MASK         0x00FFFFFF      /* Bottom 24 bits. */
+#define PVRDMA_UAR_QP_OFFSET           0               /* QP doorbell. */
+#define PVRDMA_UAR_QP_SEND             (1 << 30)       /* Send bit. */
+#define PVRDMA_UAR_QP_RECV             (1 << 31)       /* Recv bit. */
+#define PVRDMA_UAR_CQ_OFFSET           4               /* CQ doorbell. */
+#define PVRDMA_UAR_CQ_ARM_SOL          (1 << 29)       /* Arm solicited bit. */
+#define PVRDMA_UAR_CQ_ARM              (1 << 30)       /* Arm bit. */
+#define PVRDMA_UAR_CQ_POLL             (1 << 31)       /* Poll bit. */
+#define PVRDMA_UAR_SRQ_OFFSET          8               /* SRQ doorbell. */
+#define PVRDMA_UAR_SRQ_RECV            (1 << 30)       /* Recv bit. */
+
+enum pvrdma_wr_opcode {
+       PVRDMA_WR_RDMA_WRITE,
+       PVRDMA_WR_RDMA_WRITE_WITH_IMM,
+       PVRDMA_WR_SEND,
+       PVRDMA_WR_SEND_WITH_IMM,
+       PVRDMA_WR_RDMA_READ,
+       PVRDMA_WR_ATOMIC_CMP_AND_SWP,
+       PVRDMA_WR_ATOMIC_FETCH_AND_ADD,
+       PVRDMA_WR_LSO,
+       PVRDMA_WR_SEND_WITH_INV,
+       PVRDMA_WR_RDMA_READ_WITH_INV,
+       PVRDMA_WR_LOCAL_INV,
+       PVRDMA_WR_FAST_REG_MR,
+       PVRDMA_WR_MASKED_ATOMIC_CMP_AND_SWP,
+       PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+       PVRDMA_WR_BIND_MW,
+       PVRDMA_WR_REG_SIG_MR,
+       PVRDMA_WR_ERROR,
+};
+
+enum pvrdma_wc_status {
+       PVRDMA_WC_SUCCESS,
+       PVRDMA_WC_LOC_LEN_ERR,
+       PVRDMA_WC_LOC_QP_OP_ERR,
+       PVRDMA_WC_LOC_EEC_OP_ERR,
+       PVRDMA_WC_LOC_PROT_ERR,
+       PVRDMA_WC_WR_FLUSH_ERR,
+       PVRDMA_WC_MW_BIND_ERR,
+       PVRDMA_WC_BAD_RESP_ERR,
+       PVRDMA_WC_LOC_ACCESS_ERR,
+       PVRDMA_WC_REM_INV_REQ_ERR,
+       PVRDMA_WC_REM_ACCESS_ERR,
+       PVRDMA_WC_REM_OP_ERR,
+       PVRDMA_WC_RETRY_EXC_ERR,
+       PVRDMA_WC_RNR_RETRY_EXC_ERR,
+       PVRDMA_WC_LOC_RDD_VIOL_ERR,
+       PVRDMA_WC_REM_INV_RD_REQ_ERR,
+       PVRDMA_WC_REM_ABORT_ERR,
+       PVRDMA_WC_INV_EECN_ERR,
+       PVRDMA_WC_INV_EEC_STATE_ERR,
+       PVRDMA_WC_FATAL_ERR,
+       PVRDMA_WC_RESP_TIMEOUT_ERR,
+       PVRDMA_WC_GENERAL_ERR,
+};
+
+enum pvrdma_wc_opcode {
+       PVRDMA_WC_SEND,
+       PVRDMA_WC_RDMA_WRITE,
+       PVRDMA_WC_RDMA_READ,
+       PVRDMA_WC_COMP_SWAP,
+       PVRDMA_WC_FETCH_ADD,
+       PVRDMA_WC_BIND_MW,
+       PVRDMA_WC_LSO,
+       PVRDMA_WC_LOCAL_INV,
+       PVRDMA_WC_FAST_REG_MR,
+       PVRDMA_WC_MASKED_COMP_SWAP,
+       PVRDMA_WC_MASKED_FETCH_ADD,
+       PVRDMA_WC_RECV = 1 << 7,
+       PVRDMA_WC_RECV_RDMA_WITH_IMM,
+};
+
+enum pvrdma_wc_flags {
+       PVRDMA_WC_GRH                   = 1 << 0,
+       PVRDMA_WC_WITH_IMM              = 1 << 1,
+       PVRDMA_WC_WITH_INVALIDATE       = 1 << 2,
+       PVRDMA_WC_IP_CSUM_OK            = 1 << 3,
+       PVRDMA_WC_WITH_SMAC             = 1 << 4,
+       PVRDMA_WC_WITH_VLAN             = 1 << 5,
+       PVRDMA_WC_WITH_NETWORK_HDR_TYPE = 1 << 6,
+       PVRDMA_WC_FLAGS_MAX             = PVRDMA_WC_WITH_NETWORK_HDR_TYPE,
+};
+
+enum pvrdma_network_type {
+       PVRDMA_NETWORK_IB,
+       PVRDMA_NETWORK_ROCE_V1 = PVRDMA_NETWORK_IB,
+       PVRDMA_NETWORK_IPV4,
+       PVRDMA_NETWORK_IPV6
+};
+
+struct pvrdma_alloc_ucontext_resp {
+       uint32_t qp_tab_size;
+       uint32_t reserved;
+};
+
+struct pvrdma_alloc_pd_resp {
+       uint32_t pdn;
+       uint32_t reserved;
+};
+
+struct pvrdma_create_cq {
+       uint64_t __attribute__((aligned(8))) buf_addr;
+       uint32_t buf_size;
+       uint32_t reserved;
+};
+
+struct pvrdma_create_cq_resp {
+       uint32_t cqn;
+       uint32_t reserved;
+};
+
+struct pvrdma_resize_cq {
+       uint64_t __attribute__((aligned(8))) buf_addr;
+       uint32_t buf_size;
+       uint32_t reserved;
+};
+
+struct pvrdma_create_srq {
+       uint64_t __attribute__((aligned(8))) buf_addr;
+       uint32_t buf_size;
+       uint32_t reserved;
+};
+
+struct pvrdma_create_srq_resp {
+       uint32_t srqn;
+       uint32_t reserved;
+};
+
+struct pvrdma_create_qp {
+       uint64_t __attribute__((aligned(8))) rbuf_addr;
+       uint64_t __attribute__((aligned(8))) sbuf_addr;
+       uint32_t rbuf_size;
+       uint32_t sbuf_size;
+       uint64_t __attribute__((aligned(8))) qp_addr;
+};
+
+struct pvrdma_create_qp_resp {
+       uint32_t qpn;
+       uint32_t qp_handle;
+};
+
+/* PVRDMA masked atomic compare and swap */
+struct pvrdma_ex_cmp_swap {
+       uint64_t __attribute__((aligned(8))) swap_val;
+       uint64_t __attribute__((aligned(8))) compare_val;
+       uint64_t __attribute__((aligned(8))) swap_mask;
+       uint64_t __attribute__((aligned(8))) compare_mask;
+};
+
+/* PVRDMA masked atomic fetch and add */
+struct pvrdma_ex_fetch_add {
+       uint64_t __attribute__((aligned(8))) add_val;
+       uint64_t __attribute__((aligned(8))) field_boundary;
+};
+
+/* PVRDMA address vector. */
+struct pvrdma_av {
+       uint32_t port_pd;
+       uint32_t sl_tclass_flowlabel;
+       uint8_t dgid[16];
+       uint8_t src_path_bits;
+       uint8_t gid_index;
+       uint8_t stat_rate;
+       uint8_t hop_limit;
+       uint8_t dmac[6];
+       uint8_t reserved[6];
+};
+
+/* PVRDMA scatter/gather entry */
+struct pvrdma_sge {
+       uint64_t __attribute__((aligned(8))) addr;
+       uint32_t   length;
+       uint32_t   lkey;
+};
+
+/* PVRDMA receive queue work request */
+struct pvrdma_rq_wqe_hdr {
+       uint64_t __attribute__((aligned(8))) wr_id;             /* wr id */
+       uint32_t num_sge;               /* size of s/g array */
+       uint32_t total_len;     /* reserved */
+};
+/* Use pvrdma_sge (ib_sge) for receive queue s/g array elements. */
+
+/* PVRDMA send queue work request */
+struct pvrdma_sq_wqe_hdr {
+       uint64_t __attribute__((aligned(8))) wr_id;             /* wr id */
+       uint32_t num_sge;               /* size of s/g array */
+       uint32_t total_len;     /* reserved */
+       uint32_t opcode;                /* operation type */
+       uint32_t send_flags;    /* wr flags */
+       union {
+               uint32_t imm_data;
+               uint32_t invalidate_rkey;
+       } ex;
+       uint32_t reserved;
+       union {
+               struct {
+                       uint64_t __attribute__((aligned(8))) remote_addr;
+                       uint32_t rkey;
+                       uint8_t reserved[4];
+               } rdma;
+               struct {
+                       uint64_t __attribute__((aligned(8))) remote_addr;
+                       uint64_t __attribute__((aligned(8))) compare_add;
+                       uint64_t __attribute__((aligned(8))) swap;
+                       uint32_t rkey;
+                       uint32_t reserved;
+               } atomic;
+               struct {
+                       uint64_t __attribute__((aligned(8))) remote_addr;
+                       uint32_t log_arg_sz;
+                       uint32_t rkey;
+                       union {
+                               struct pvrdma_ex_cmp_swap  cmp_swap;
+                               struct pvrdma_ex_fetch_add fetch_add;
+                       } wr_data;
+               } masked_atomics;
+               struct {
+                       uint64_t __attribute__((aligned(8))) iova_start;
+                       uint64_t __attribute__((aligned(8))) pl_pdir_dma;
+                       uint32_t page_shift;
+                       uint32_t page_list_len;
+                       uint32_t length;
+                       uint32_t access_flags;
+                       uint32_t rkey;
+                       uint32_t reserved;
+               } fast_reg;
+               struct {
+                       uint32_t remote_qpn;
+                       uint32_t remote_qkey;
+                       struct pvrdma_av av;
+               } ud;
+       } wr;
+};
+/* Use pvrdma_sge (ib_sge) for send queue s/g array elements. */
+
+/* Completion queue element. */
+struct pvrdma_cqe {
+       uint64_t __attribute__((aligned(8))) wr_id;
+       uint64_t __attribute__((aligned(8))) qp;
+       uint32_t opcode;
+       uint32_t status;
+       uint32_t byte_len;
+       uint32_t imm_data;
+       uint32_t src_qp;
+       uint32_t wc_flags;
+       uint32_t vendor_err;
+       uint16_t pkey_index;
+       uint16_t slid;
+       uint8_t sl;
+       uint8_t dlid_path_bits;
+       uint8_t port_num;
+       uint8_t smac[6];
+       uint8_t network_hdr_type;
+       uint8_t reserved2[6]; /* Pad to next power of 2 (64). */
+};
+
+#endif /* __VMW_PVRDMA_ABI_H__ */
diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h
new file mode 100644 (file)
index 0000000..f07f368
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Accelerator blocking API, to prevent new ioctls from starting and wait the
+ * running ones finish.
+ * This mechanism differs from pause/resume_all_vcpus() in that it does not
+ * release the BQL.
+ *
+ *  Copyright (c) 2022 Red Hat Inc.
+ *
+ * Author: Emanuele Giuseppe Esposito       <eesposit@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef ACCEL_BLOCKER_H
+#define ACCEL_BLOCKER_H
+
+#include "sysemu/cpus.h"
+
+void accel_blocker_init(void);
+
+/*
+ * accel_{cpu_}ioctl_begin/end:
+ * Mark when ioctl is about to run or just finished.
+ *
+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is
+ * called, preventing new ioctls to run. They will continue only after
+ * accel_ioctl_inibith_end().
+ */
+void accel_ioctl_begin(void);
+void accel_ioctl_end(void);
+void accel_cpu_ioctl_begin(CPUState *cpu);
+void accel_cpu_ioctl_end(CPUState *cpu);
+
+/*
+ * accel_ioctl_inhibit_begin: start critical section
+ *
+ * This function makes sure that:
+ * 1) incoming accel_{cpu_}ioctl_begin() calls block
+ * 2) wait that all ioctls that were already running reach
+ *    accel_{cpu_}ioctl_end(), kicking vcpus if necessary.
+ *
+ * This allows the caller to access shared data or perform operations without
+ * worrying of concurrent vcpus accesses.
+ */
+void accel_ioctl_inhibit_begin(void);
+
+/*
+ * accel_ioctl_inhibit_end: end critical section started by
+ * accel_ioctl_inhibit_begin()
+ *
+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
+ */
+void accel_ioctl_inhibit_end(void);
+
+#endif /* ACCEL_BLOCKER_H */
diff --git a/include/sysemu/accel-ops.h b/include/sysemu/accel-ops.h
new file mode 100644 (file)
index 0000000..ef91fc2
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Accelerator OPS, used for cpus.c module
+ *
+ * Copyright 2021 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef ACCEL_OPS_H
+#define ACCEL_OPS_H
+
+#include "exec/cpu-common.h"
+#include "qom/object.h"
+
+#define ACCEL_OPS_SUFFIX "-ops"
+#define TYPE_ACCEL_OPS "accel" ACCEL_OPS_SUFFIX
+#define ACCEL_OPS_NAME(name) (name "-" TYPE_ACCEL_OPS)
+
+typedef struct AccelOpsClass AccelOpsClass;
+DECLARE_CLASS_CHECKERS(AccelOpsClass, ACCEL_OPS, TYPE_ACCEL_OPS)
+
+/* cpus.c operations interface */
+struct AccelOpsClass {
+    /*< private >*/
+    ObjectClass parent_class;
+    /*< public >*/
+
+    /* initialization function called when accel is chosen */
+    void (*ops_init)(AccelOpsClass *ops);
+
+    bool (*cpus_are_resettable)(void);
+    void (*cpu_reset_hold)(CPUState *cpu);
+
+    void (*create_vcpu_thread)(CPUState *cpu); /* MANDATORY NON-NULL */
+    void (*kick_vcpu_thread)(CPUState *cpu);
+    bool (*cpu_thread_is_idle)(CPUState *cpu);
+
+    void (*synchronize_post_reset)(CPUState *cpu);
+    void (*synchronize_post_init)(CPUState *cpu);
+    void (*synchronize_state)(CPUState *cpu);
+    void (*synchronize_pre_loadvm)(CPUState *cpu);
+    void (*synchronize_pre_resume)(bool step_pending);
+
+    void (*handle_interrupt)(CPUState *cpu, int mask);
+
+    int64_t (*get_virtual_clock)(void);
+    int64_t (*get_elapsed_ticks)(void);
+
+    /* gdbstub hooks */
+    bool (*supports_guest_debug)(void);
+    int (*update_guest_debug)(CPUState *cpu);
+    int (*insert_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len);
+    int (*remove_breakpoint)(CPUState *cpu, int type, vaddr addr, vaddr len);
+    void (*remove_all_breakpoints)(CPUState *cpu);
+};
+
+#endif /* ACCEL_OPS_H */
diff --git a/include/sysemu/accel.h b/include/sysemu/accel.h
deleted file mode 100644 (file)
index e08b8ab..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/* QEMU accelerator interfaces
- *
- * Copyright (c) 2014 Red Hat Inc
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef HW_ACCEL_H
-#define HW_ACCEL_H
-
-#include "qom/object.h"
-#include "exec/hwaddr.h"
-
-typedef struct AccelState {
-    /*< private >*/
-    Object parent_obj;
-} AccelState;
-
-typedef struct AccelClass {
-    /*< private >*/
-    ObjectClass parent_class;
-    /*< public >*/
-
-    const char *name;
-#ifndef CONFIG_USER_ONLY
-    int (*init_machine)(MachineState *ms);
-    void (*setup_post)(MachineState *ms, AccelState *accel);
-    bool (*has_memory)(MachineState *ms, AddressSpace *as,
-                       hwaddr start_addr, hwaddr size);
-#endif
-    bool *allowed;
-    /*
-     * Array of global properties that would be applied when specific
-     * accelerator is chosen. It works like MachineClass.compat_props
-     * but it's for accelerators not machines. Accelerator-provided
-     * global properties may be overridden by machine-type
-     * compat_props or user-provided global properties.
-     */
-    GPtrArray *compat_props;
-} AccelClass;
-
-#define TYPE_ACCEL "accel"
-
-#define ACCEL_CLASS_SUFFIX  "-" TYPE_ACCEL
-#define ACCEL_CLASS_NAME(a) (a ACCEL_CLASS_SUFFIX)
-
-#define ACCEL_CLASS(klass) \
-    OBJECT_CLASS_CHECK(AccelClass, (klass), TYPE_ACCEL)
-#define ACCEL(obj) \
-    OBJECT_CHECK(AccelState, (obj), TYPE_ACCEL)
-#define ACCEL_GET_CLASS(obj) \
-    OBJECT_GET_CLASS(AccelClass, (obj), TYPE_ACCEL)
-
-AccelClass *accel_find(const char *opt_name);
-int accel_init_machine(AccelState *accel, MachineState *ms);
-
-/* Called just before os_setup_post (ie just before drop OS privs) */
-void accel_setup_post(MachineState *ms);
-
-AccelState *current_accel(void);
-
-#endif
index 54f069d49126c601117ffd2e1b9490d6632389ba..8850cb1a14b9b25f9b072cb14a916793addbc870 100644 (file)
@@ -9,7 +9,6 @@ enum {
     QEMU_ARCH_CRIS = (1 << 2),
     QEMU_ARCH_I386 = (1 << 3),
     QEMU_ARCH_M68K = (1 << 4),
-    QEMU_ARCH_LM32 = (1 << 5),
     QEMU_ARCH_MICROBLAZE = (1 << 6),
     QEMU_ARCH_MIPS = (1 << 7),
     QEMU_ARCH_PPC = (1 << 8),
@@ -18,21 +17,18 @@ enum {
     QEMU_ARCH_SPARC = (1 << 11),
     QEMU_ARCH_XTENSA = (1 << 12),
     QEMU_ARCH_OPENRISC = (1 << 13),
-    QEMU_ARCH_UNICORE32 = (1 << 14),
-    QEMU_ARCH_MOXIE = (1 << 15),
     QEMU_ARCH_TRICORE = (1 << 16),
     QEMU_ARCH_NIOS2 = (1 << 17),
     QEMU_ARCH_HPPA = (1 << 18),
     QEMU_ARCH_RISCV = (1 << 19),
     QEMU_ARCH_RX = (1 << 20),
     QEMU_ARCH_AVR = (1 << 21),
-
-    QEMU_ARCH_NONE = (1 << 31),
+    QEMU_ARCH_HEXAGON = (1 << 22),
+    QEMU_ARCH_LOONGARCH = (1 << 23),
 };
 
 extern const uint32_t arch_type;
 
-int kvm_available(void);
-int xen_available(void);
+void qemu_init_arch_modules(void);
 
 #endif
diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h
new file mode 100644 (file)
index 0000000..780cea7
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_COMMON_H
+#define BLOCK_BACKEND_COMMON_H
+
+#include "qemu/iov.h"
+#include "block/throttle-groups.h"
+
+/*
+ * TODO Have to include block/block.h for a bunch of block layer
+ * types.  Unfortunately, this pulls in the whole BlockDriverState
+ * API, which we don't want used by many BlockBackend users.  Some of
+ * the types belong here, and the rest should be split into a common
+ * header and one for the BlockDriverState API.
+ */
+#include "block/block.h"
+
+/* Callbacks for block device models */
+typedef struct BlockDevOps {
+
+    /*
+     * Global state (GS) API. These functions run under the BQL.
+     *
+     * See include/block/block-global-state.h for more information about
+     * the GS API.
+     */
+
+    /*
+     * Runs when virtual media changed (monitor commands eject, change)
+     * Argument load is true on load and false on eject.
+     * Beware: doesn't run when a host device's physical media
+     * changes.  Sure would be useful if it did.
+     * Device models with removable media must implement this callback.
+     */
+    void (*change_media_cb)(void *opaque, bool load, Error **errp);
+    /*
+     * Runs when an eject request is issued from the monitor, the tray
+     * is closed, and the medium is locked.
+     * Device models that do not implement is_medium_locked will not need
+     * this callback.  Device models that can lock the medium or tray might
+     * want to implement the callback and unlock the tray when "force" is
+     * true, even if they do not support eject requests.
+     */
+    void (*eject_request_cb)(void *opaque, bool force);
+
+    /*
+     * Is the virtual medium locked into the device?
+     * Device models implement this only when device has such a lock.
+     */
+    bool (*is_medium_locked)(void *opaque);
+
+    /*
+     * Runs when the backend receives a drain request.
+     */
+    void (*drained_begin)(void *opaque);
+    /*
+     * Runs when the backend's last drain request ends.
+     */
+    void (*drained_end)(void *opaque);
+    /*
+     * Is the device still busy?
+     */
+    bool (*drained_poll)(void *opaque);
+
+    /*
+     * I/O API functions. These functions are thread-safe.
+     *
+     * See include/block/block-io.h for more information about
+     * the I/O API.
+     */
+
+    /*
+     * Is the virtual tray open?
+     * Device models implement this only when the device has a tray.
+     */
+    bool (*is_tray_open)(void *opaque);
+
+    /*
+     * Runs when the size changed (e.g. monitor command block_resize)
+     */
+    void (*resize_cb)(void *opaque);
+} BlockDevOps;
+
+/*
+ * This struct is embedded in (the private) BlockBackend struct and contains
+ * fields that must be public. This is in particular for QLIST_ENTRY() and
+ * friends so that BlockBackends can be kept in lists outside block-backend.c
+ */
+typedef struct BlockBackendPublic {
+    ThrottleGroupMember throttle_group_member;
+} BlockBackendPublic;
+
+#endif /* BLOCK_BACKEND_COMMON_H */
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
new file mode 100644 (file)
index 0000000..49c12b0
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_GLOBAL_STATE_H
+#define BLOCK_BACKEND_GLOBAL_STATE_H
+
+#include "block-backend-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
+
+BlockBackend * no_coroutine_fn
+blk_new_with_bs(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
+                Error **errp);
+
+BlockBackend * coroutine_fn no_co_wrapper
+blk_co_new_with_bs(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm,
+                   Error **errp);
+
+BlockBackend * no_coroutine_fn
+blk_new_open(const char *filename, const char *reference, QDict *options,
+             int flags, Error **errp);
+
+BlockBackend * coroutine_fn no_co_wrapper
+blk_co_new_open(const char *filename, const char *reference, QDict *options,
+                int flags, Error **errp);
+
+int blk_get_refcnt(BlockBackend *blk);
+void blk_ref(BlockBackend *blk);
+
+void no_coroutine_fn blk_unref(BlockBackend *blk);
+void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk);
+
+void blk_remove_all_bs(void);
+BlockBackend *blk_by_name(const char *name);
+BlockBackend *blk_next(BlockBackend *blk);
+BlockBackend *blk_all_next(BlockBackend *blk);
+bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
+void monitor_remove_blk(BlockBackend *blk);
+
+BlockBackendPublic *blk_get_public(BlockBackend *blk);
+BlockBackend *blk_by_public(BlockBackendPublic *public);
+
+void blk_remove_bs(BlockBackend *blk);
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
+int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
+bool GRAPH_RDLOCK bdrv_has_blk(BlockDriverState *bs);
+bool GRAPH_RDLOCK bdrv_is_root_node(BlockDriverState *bs);
+int GRAPH_UNLOCKED blk_set_perm(BlockBackend *blk, uint64_t perm,
+                                uint64_t shared_perm, Error **errp);
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
+
+void blk_iostatus_enable(BlockBackend *blk);
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
+void blk_iostatus_disable(BlockBackend *blk);
+void blk_iostatus_reset(BlockBackend *blk);
+int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
+void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
+DeviceState *blk_get_attached_dev(BlockBackend *blk);
+BlockBackend *blk_by_dev(void *dev);
+BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
+void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
+
+void blk_activate(BlockBackend *blk, Error **errp);
+
+int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
+void blk_aio_cancel(BlockAIOCB *acb);
+int blk_commit_all(void);
+bool blk_in_drain(BlockBackend *blk);
+void blk_drain(BlockBackend *blk);
+void blk_drain_all(void);
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+                      BlockdevOnError on_write_error);
+bool blk_supports_write_perm(BlockBackend *blk);
+bool blk_is_sg(BlockBackend *blk);
+void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
+int blk_get_flags(BlockBackend *blk);
+bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
+void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
+void blk_op_block_all(BlockBackend *blk, Error *reason);
+void blk_op_unblock_all(BlockBackend *blk, Error *reason);
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+                        Error **errp);
+void blk_add_aio_context_notifier(BlockBackend *blk,
+        void (*attached_aio_context)(AioContext *new_context, void *opaque),
+        void (*detach_aio_context)(void *opaque), void *opaque);
+void blk_remove_aio_context_notifier(BlockBackend *blk,
+                                     void (*attached_aio_context)(AioContext *,
+                                                                  void *),
+                                     void (*detach_aio_context)(void *),
+                                     void *opaque);
+void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
+void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify);
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
+void blk_update_root_state(BlockBackend *blk);
+bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
+int blk_get_open_flags_from_root_state(BlockBackend *blk);
+
+int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
+                     int64_t pos, int size);
+int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
+int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
+int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
+
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
+void blk_io_limits_disable(BlockBackend *blk);
+void blk_io_limits_enable(BlockBackend *blk, const char *group);
+void blk_io_limits_update_group(BlockBackend *blk, const char *group);
+void blk_set_force_allow_inactivate(BlockBackend *blk);
+
+bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp);
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size);
+
+const BdrvChild *blk_root(BlockBackend *blk);
+
+int blk_make_empty(BlockBackend *blk, Error **errp);
+
+#endif /* BLOCK_BACKEND_GLOBAL_STATE_H */
diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
new file mode 100644 (file)
index 0000000..d174275
--- /dev/null
@@ -0,0 +1,230 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_IO_H
+#define BLOCK_BACKEND_IO_H
+
+#include "block-backend-common.h"
+#include "block/accounting.h"
+
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+const char *blk_name(const BlockBackend *blk);
+
+BlockDriverState *blk_bs(BlockBackend *blk);
+
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
+void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
+bool blk_iostatus_is_enabled(const BlockBackend *blk);
+
+char *blk_get_attached_dev_id(BlockBackend *blk);
+
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                  int64_t bytes, BdrvRequestFlags flags,
+                                  BlockCompletionFunc *cb, void *opaque);
+
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+                           QEMUIOVector *qiov, BdrvRequestFlags flags,
+                           BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+                            QEMUIOVector *qiov, BdrvRequestFlags flags,
+                            BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_flush(BlockBackend *blk,
+                          BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
+                                unsigned int *nr_zones,
+                                BlockZoneDescriptor *zones,
+                                BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+                              int64_t offset, int64_t len,
+                              BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
+                                QEMUIOVector *qiov, BdrvRequestFlags flags,
+                                BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
+                             BlockCompletionFunc *cb, void *opaque);
+void blk_aio_cancel_async(BlockAIOCB *acb);
+BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
+                          BlockCompletionFunc *cb, void *opaque);
+
+void blk_inc_in_flight(BlockBackend *blk);
+void blk_dec_in_flight(BlockBackend *blk);
+
+bool coroutine_fn GRAPH_RDLOCK blk_co_is_inserted(BlockBackend *blk);
+bool co_wrapper_mixed_bdrv_rdlock blk_is_inserted(BlockBackend *blk);
+
+bool coroutine_fn GRAPH_RDLOCK blk_co_is_available(BlockBackend *blk);
+bool co_wrapper_mixed_bdrv_rdlock blk_is_available(BlockBackend *blk);
+
+void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked);
+void co_wrapper blk_lock_medium(BlockBackend *blk, bool locked);
+
+void coroutine_fn blk_co_eject(BlockBackend *blk, bool eject_flag);
+void co_wrapper blk_eject(BlockBackend *blk, bool eject_flag);
+
+int64_t coroutine_fn blk_co_getlength(BlockBackend *blk);
+int64_t co_wrapper_mixed blk_getlength(BlockBackend *blk);
+
+void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
+                                      uint64_t *nb_sectors_ptr);
+void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
+
+int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk);
+int64_t blk_nb_sectors(BlockBackend *blk);
+
+void *blk_try_blockalign(BlockBackend *blk, size_t size);
+void *blk_blockalign(BlockBackend *blk, size_t size);
+bool blk_is_writable(BlockBackend *blk);
+bool blk_enable_write_cache(BlockBackend *blk);
+BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
+BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
+                                      int error);
+void blk_error_action(BlockBackend *blk, BlockErrorAction action,
+                      bool is_read, int error);
+void blk_iostatus_set_err(BlockBackend *blk, int error);
+int blk_get_max_iov(BlockBackend *blk);
+int blk_get_max_hw_iov(BlockBackend *blk);
+
+AioContext *blk_get_aio_context(BlockBackend *blk);
+BlockAcctStats *blk_get_stats(BlockBackend *blk);
+void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
+                  BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
+                                  BlockCompletionFunc *cb,
+                                  void *opaque, int ret);
+
+uint32_t blk_get_request_alignment(BlockBackend *blk);
+uint32_t blk_get_max_transfer(BlockBackend *blk);
+uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
+
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
+                                   BlockBackend *blk_out, int64_t off_out,
+                                   int64_t bytes, BdrvRequestFlags read_flags,
+                                   BdrvRequestFlags write_flags);
+
+int coroutine_fn blk_co_block_status_above(BlockBackend *blk,
+                                           BlockDriverState *base,
+                                           int64_t offset, int64_t bytes,
+                                           int64_t *pnum, int64_t *map,
+                                           BlockDriverState **file);
+int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk,
+                                           BlockDriverState *base,
+                                           bool include_base, int64_t offset,
+                                           int64_t bytes, int64_t *pnum);
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * See include/block/block-io.h for more information about
+ * the "I/O or GS" API.
+ */
+
+int co_wrapper_mixed blk_pread(BlockBackend *blk, int64_t offset,
+                               int64_t bytes, void *buf,
+                               BdrvRequestFlags flags);
+int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes,
+                              void *buf, BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_preadv(BlockBackend *blk, int64_t offset,
+                                int64_t bytes, QEMUIOVector *qiov,
+                                BdrvRequestFlags flags);
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+                               int64_t bytes, QEMUIOVector *qiov,
+                               BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_preadv_part(BlockBackend *blk, int64_t offset,
+                                     int64_t bytes, QEMUIOVector *qiov,
+                                     size_t qiov_offset,
+                                     BdrvRequestFlags flags);
+int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset,
+                                    int64_t bytes, QEMUIOVector *qiov,
+                                    size_t qiov_offset, BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_pwrite(BlockBackend *blk, int64_t offset,
+                                int64_t bytes, const void *buf,
+                                BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes,
+                               const void *buf, BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_pwritev(BlockBackend *blk, int64_t offset,
+                                 int64_t bytes, QEMUIOVector *qiov,
+                                 BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+                                int64_t bytes, QEMUIOVector *qiov,
+                                BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_pwritev_part(BlockBackend *blk, int64_t offset,
+                                      int64_t bytes, QEMUIOVector *qiov,
+                                      size_t qiov_offset,
+                                      BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
+                                     int64_t bytes,
+                                     QEMUIOVector *qiov, size_t qiov_offset,
+                                     BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_pwrite_compressed(BlockBackend *blk,
+                                           int64_t offset, int64_t bytes,
+                                           const void *buf);
+int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset,
+                                          int64_t bytes, const void *buf);
+
+int co_wrapper_mixed blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                       int64_t bytes,
+                                       BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                      int64_t bytes, BdrvRequestFlags flags);
+
+int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
+                                    unsigned int *nr_zones,
+                                    BlockZoneDescriptor *zones);
+int co_wrapper_mixed blk_zone_report(BlockBackend *blk, int64_t offset,
+                                         unsigned int *nr_zones,
+                                         BlockZoneDescriptor *zones);
+int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+                                  int64_t offset, int64_t len);
+int co_wrapper_mixed blk_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
+                                       int64_t offset, int64_t len);
+int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
+                                    QEMUIOVector *qiov,
+                                    BdrvRequestFlags flags);
+int co_wrapper_mixed blk_zone_append(BlockBackend *blk, int64_t *offset,
+                                         QEMUIOVector *qiov,
+                                         BdrvRequestFlags flags);
+
+int co_wrapper_mixed blk_pdiscard(BlockBackend *blk, int64_t offset,
+                                  int64_t bytes);
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
+                                 int64_t bytes);
+
+int co_wrapper_mixed blk_flush(BlockBackend *blk);
+int coroutine_fn blk_co_flush(BlockBackend *blk);
+
+int co_wrapper_mixed blk_ioctl(BlockBackend *blk, unsigned long int req,
+                               void *buf);
+int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req,
+                              void *buf);
+
+int co_wrapper_mixed blk_truncate(BlockBackend *blk, int64_t offset,
+                                  bool exact, PreallocMode prealloc,
+                                  BdrvRequestFlags flags, Error **errp);
+int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact,
+                                 PreallocMode prealloc, BdrvRequestFlags flags,
+                                 Error **errp);
+
+#endif /* BLOCK_BACKEND_IO_H */
index 880e9032930b0207e2e3e6fe1bd7f390c30b562f..038be9fc408a3a2a4a5930264e26017d1e00fd39 100644 (file)
 #ifndef BLOCK_BACKEND_H
 #define BLOCK_BACKEND_H
 
-#include "qemu/iov.h"
-#include "block/throttle-groups.h"
+#include "block-backend-global-state.h"
+#include "block-backend-io.h"
 
-/*
- * TODO Have to include block/block.h for a bunch of block layer
- * types.  Unfortunately, this pulls in the whole BlockDriverState
- * API, which we don't want used by many BlockBackend users.  Some of
- * the types belong here, and the rest should be split into a common
- * header and one for the BlockDriverState API.
- */
-#include "block/block.h"
-
-/* Callbacks for block device models */
-typedef struct BlockDevOps {
-    /*
-     * Runs when virtual media changed (monitor commands eject, change)
-     * Argument load is true on load and false on eject.
-     * Beware: doesn't run when a host device's physical media
-     * changes.  Sure would be useful if it did.
-     * Device models with removable media must implement this callback.
-     */
-    void (*change_media_cb)(void *opaque, bool load, Error **errp);
-    /*
-     * Runs when an eject request is issued from the monitor, the tray
-     * is closed, and the medium is locked.
-     * Device models that do not implement is_medium_locked will not need
-     * this callback.  Device models that can lock the medium or tray might
-     * want to implement the callback and unlock the tray when "force" is
-     * true, even if they do not support eject requests.
-     */
-    void (*eject_request_cb)(void *opaque, bool force);
-    /*
-     * Is the virtual tray open?
-     * Device models implement this only when the device has a tray.
-     */
-    bool (*is_tray_open)(void *opaque);
-    /*
-     * Is the virtual medium locked into the device?
-     * Device models implement this only when device has such a lock.
-     */
-    bool (*is_medium_locked)(void *opaque);
-    /*
-     * Runs when the size changed (e.g. monitor command block_resize)
-     */
-    void (*resize_cb)(void *opaque);
-    /*
-     * Runs when the backend receives a drain request.
-     */
-    void (*drained_begin)(void *opaque);
-    /*
-     * Runs when the backend's last drain request ends.
-     */
-    void (*drained_end)(void *opaque);
-} BlockDevOps;
-
-/* This struct is embedded in (the private) BlockBackend struct and contains
- * fields that must be public. This is in particular for QLIST_ENTRY() and
- * friends so that BlockBackends can be kept in lists outside block-backend.c
- * */
-typedef struct BlockBackendPublic {
-    ThrottleGroupMember throttle_group_member;
-} BlockBackendPublic;
-
-BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
-BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
-                              uint64_t shared_perm, Error **errp);
-BlockBackend *blk_new_open(const char *filename, const char *reference,
-                           QDict *options, int flags, Error **errp);
-int blk_get_refcnt(BlockBackend *blk);
-void blk_ref(BlockBackend *blk);
-void blk_unref(BlockBackend *blk);
-void blk_remove_all_bs(void);
-const char *blk_name(const BlockBackend *blk);
-BlockBackend *blk_by_name(const char *name);
-BlockBackend *blk_next(BlockBackend *blk);
-BlockBackend *blk_all_next(BlockBackend *blk);
-bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
-void monitor_remove_blk(BlockBackend *blk);
-
-BlockBackendPublic *blk_get_public(BlockBackend *blk);
-BlockBackend *blk_by_public(BlockBackendPublic *public);
-
-BlockDriverState *blk_bs(BlockBackend *blk);
-void blk_remove_bs(BlockBackend *blk);
-int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
-bool bdrv_has_blk(BlockDriverState *bs);
-bool bdrv_is_root_node(BlockDriverState *bs);
-int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
-                 Error **errp);
-void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
-
-void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
-void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
-void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
-void blk_iostatus_enable(BlockBackend *blk);
-bool blk_iostatus_is_enabled(const BlockBackend *blk);
-BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
-void blk_iostatus_disable(BlockBackend *blk);
-void blk_iostatus_reset(BlockBackend *blk);
-void blk_iostatus_set_err(BlockBackend *blk, int error);
-int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
-void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
-DeviceState *blk_get_attached_dev(BlockBackend *blk);
-char *blk_get_attached_dev_id(BlockBackend *blk);
-BlockBackend *blk_by_dev(void *dev);
-BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
-void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
-int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
-                               unsigned int bytes, QEMUIOVector *qiov,
-                               BdrvRequestFlags flags);
-int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
-                                     unsigned int bytes,
-                                     QEMUIOVector *qiov, size_t qiov_offset,
-                                     BdrvRequestFlags flags);
-int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
-                               unsigned int bytes, QEMUIOVector *qiov,
-                               BdrvRequestFlags flags);
-
-static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
-                                            unsigned int bytes, void *buf,
-                                            BdrvRequestFlags flags)
-{
-    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
-    return blk_co_preadv(blk, offset, bytes, &qiov, flags);
-}
-
-static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
-                                             unsigned int bytes, void *buf,
-                                             BdrvRequestFlags flags)
-{
-    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
-    return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
-}
-
-int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                      int bytes, BdrvRequestFlags flags);
-BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                  int bytes, BdrvRequestFlags flags,
-                                  BlockCompletionFunc *cb, void *opaque);
-int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
-int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes);
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
-               BdrvRequestFlags flags);
-int64_t blk_getlength(BlockBackend *blk);
-void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
-int64_t blk_nb_sectors(BlockBackend *blk);
-BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
-                           QEMUIOVector *qiov, BdrvRequestFlags flags,
-                           BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
-                            QEMUIOVector *qiov, BdrvRequestFlags flags,
-                            BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_flush(BlockBackend *blk,
-                          BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes,
-                             BlockCompletionFunc *cb, void *opaque);
-void blk_aio_cancel(BlockAIOCB *acb);
-void blk_aio_cancel_async(BlockAIOCB *acb);
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
-BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
-                          BlockCompletionFunc *cb, void *opaque);
-int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes);
-int blk_co_flush(BlockBackend *blk);
-int blk_flush(BlockBackend *blk);
-int blk_commit_all(void);
-void blk_inc_in_flight(BlockBackend *blk);
-void blk_dec_in_flight(BlockBackend *blk);
-void blk_drain(BlockBackend *blk);
-void blk_drain_all(void);
-void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
-                      BlockdevOnError on_write_error);
-BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
-BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
-                                      int error);
-void blk_error_action(BlockBackend *blk, BlockErrorAction action,
-                      bool is_read, int error);
-bool blk_supports_write_perm(BlockBackend *blk);
-bool blk_is_writable(BlockBackend *blk);
-bool blk_is_sg(BlockBackend *blk);
-bool blk_enable_write_cache(BlockBackend *blk);
-void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
-void blk_invalidate_cache(BlockBackend *blk, Error **errp);
-bool blk_is_inserted(BlockBackend *blk);
-bool blk_is_available(BlockBackend *blk);
-void blk_lock_medium(BlockBackend *blk, bool locked);
-void blk_eject(BlockBackend *blk, bool eject_flag);
-int blk_get_flags(BlockBackend *blk);
-uint32_t blk_get_request_alignment(BlockBackend *blk);
-uint32_t blk_get_max_transfer(BlockBackend *blk);
-int blk_get_max_iov(BlockBackend *blk);
-void blk_set_guest_block_size(BlockBackend *blk, int align);
-void *blk_try_blockalign(BlockBackend *blk, size_t size);
-void *blk_blockalign(BlockBackend *blk, size_t size);
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
-void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
-void blk_op_block_all(BlockBackend *blk, Error *reason);
-void blk_op_unblock_all(BlockBackend *blk, Error *reason);
-AioContext *blk_get_aio_context(BlockBackend *blk);
-int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
-                        Error **errp);
-void blk_add_aio_context_notifier(BlockBackend *blk,
-        void (*attached_aio_context)(AioContext *new_context, void *opaque),
-        void (*detach_aio_context)(void *opaque), void *opaque);
-void blk_remove_aio_context_notifier(BlockBackend *blk,
-                                     void (*attached_aio_context)(AioContext *,
-                                                                  void *),
-                                     void (*detach_aio_context)(void *),
-                                     void *opaque);
-void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
-void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify);
-void blk_io_plug(BlockBackend *blk);
-void blk_io_unplug(BlockBackend *blk);
-BlockAcctStats *blk_get_stats(BlockBackend *blk);
-BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
-void blk_update_root_state(BlockBackend *blk);
-bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
-int blk_get_open_flags_from_root_state(BlockBackend *blk);
-
-void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
-                  BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                      int bytes, BdrvRequestFlags flags);
-int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
-                          int bytes);
-int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
-                 PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes);
-int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
-                     int64_t pos, int size);
-int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
-int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
-int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
-BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
-                                  BlockCompletionFunc *cb,
-                                  void *opaque, int ret);
-
-void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
-void blk_io_limits_disable(BlockBackend *blk);
-void blk_io_limits_enable(BlockBackend *blk, const char *group);
-void blk_io_limits_update_group(BlockBackend *blk, const char *group);
-void blk_set_force_allow_inactivate(BlockBackend *blk);
-
-void blk_register_buf(BlockBackend *blk, void *host, size_t size);
-void blk_unregister_buf(BlockBackend *blk, void *host);
-
-int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
-                                   BlockBackend *blk_out, int64_t off_out,
-                                   int bytes, BdrvRequestFlags read_flags,
-                                   BdrvRequestFlags write_flags);
-
-const BdrvChild *blk_root(BlockBackend *blk);
-
-int blk_make_empty(BlockBackend *blk, Error **errp);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
 
 #endif
diff --git a/include/sysemu/block-ram-registrar.h b/include/sysemu/block-ram-registrar.h
new file mode 100644 (file)
index 0000000..d8b2f79
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * BlockBackend RAM Registrar
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef BLOCK_RAM_REGISTRAR_H
+#define BLOCK_RAM_REGISTRAR_H
+
+#include "exec/ramlist.h"
+
+/**
+ * struct BlockRAMRegistrar:
+ *
+ * Keeps RAMBlock memory registered with a BlockBackend using
+ * blk_register_buf() including hotplugged memory.
+ *
+ * Emulated devices or other BlockBackend users initialize a BlockRAMRegistrar
+ * with blk_ram_registrar_init() before submitting I/O requests with the
+ * BDRV_REQ_REGISTERED_BUF flag set.
+ */
+typedef struct {
+    BlockBackend *blk;
+    RAMBlockNotifier notifier;
+    bool ok;
+} BlockRAMRegistrar;
+
+void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk);
+void blk_ram_registrar_destroy(BlockRAMRegistrar *r);
+
+/* Have all RAMBlocks been registered successfully? */
+static inline bool blk_ram_registrar_ok(BlockRAMRegistrar *r)
+{
+    return r->ok;
+}
+
+#endif /* BLOCK_RAM_REGISTRAR_H */
index 3b5fcda08d346a3d402637c20c745cbf3ff68157..3211b16513ae0d6d5e96cab7d3cc917fe439a8d9 100644 (file)
@@ -13,9 +13,6 @@
 #include "block/block.h"
 #include "qemu/queue.h"
 
-void blockdev_mark_auto_del(BlockBackend *blk);
-void blockdev_auto_del(BlockBackend *blk);
-
 typedef enum {
     IF_DEFAULT = -1,            /* for use with drive_add() only */
     /*
@@ -35,10 +32,19 @@ struct DriveInfo {
     bool is_default;            /* Added by default_drive() ?  */
     int media_cd;
     QemuOpts *opts;
-    bool claimed_by_board;
     QTAILQ_ENTRY(DriveInfo) next;
 };
 
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+void blockdev_mark_auto_del(BlockBackend *blk);
+void blockdev_auto_del(BlockBackend *blk);
+
 DriveInfo *blk_legacy_dinfo(BlockBackend *blk);
 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo);
 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo);
@@ -46,14 +52,10 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo);
 void override_max_devs(BlockInterfaceType type, int max_devs);
 
 DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit);
-void drive_mark_claimed_by_board(void);
 void drive_check_orphaned(void);
 DriveInfo *drive_get_by_index(BlockInterfaceType type, int index);
 int drive_get_max_bus(BlockInterfaceType type);
-int drive_get_max_devs(BlockInterfaceType type);
-DriveInfo *drive_get_next(BlockInterfaceType type);
 
-QemuOpts *drive_def(const char *optstr);
 QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
                     const char *optstr);
 DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type,
diff --git a/include/sysemu/cpu-timers-internal.h b/include/sysemu/cpu-timers-internal.h
new file mode 100644 (file)
index 0000000..94bb739
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TIMERS_STATE_H
+#define TIMERS_STATE_H
+
+/* timers state, for sharing between icount and cpu-timers */
+
+typedef struct TimersState {
+    /* Protected by BQL.  */
+    int64_t cpu_ticks_prev;
+    int64_t cpu_ticks_offset;
+
+    /*
+     * Protect fields that can be respectively read outside the
+     * BQL, and written from multiple threads.
+     */
+    QemuSeqLock vm_clock_seqlock;
+    QemuSpin vm_clock_lock;
+
+    int16_t cpu_ticks_enabled;
+
+    /* Conversion factor from emulated instructions to virtual clock ticks.  */
+    int16_t icount_time_shift;
+    /* Icount delta used for shift auto adjust. */
+    int64_t last_delta;
+
+    /* Compensate for varying guest execution speed.  */
+    aligned_int64_t qemu_icount_bias;
+
+    int64_t vm_clock_warp_start;
+    int64_t cpu_clock_offset;
+
+    /* Only written by TCG thread */
+    int64_t qemu_icount;
+
+    /* for adjusting icount */
+    QEMUTimer *icount_rt_timer;
+    QEMUTimer *icount_vm_timer;
+    QEMUTimer *icount_warp_timer;
+} TimersState;
+
+extern TimersState timers_state;
+
+/*
+ * icount needs this internal from cpu-timers when adjusting the icount shift.
+ */
+int64_t cpu_get_clock_locked(void);
+
+#endif /* TIMERS_STATE_H */
index ed6ee5c46ccdc74428aa63b3e196138654dd2ef5..2e786fe7fb1440d3cae1e14fd2cabd300f31126b 100644 (file)
@@ -59,6 +59,7 @@ int64_t icount_round(int64_t count);
 /* if the CPUs are idle, start accounting real time to virtual clock. */
 void icount_start_warp_timer(void);
 void icount_account_warp_timer(void);
+void icount_notify_exit(void);
 
 /*
  * CPU Ticks and Clock
index e8156728c63d2689c96b4fb007a43a4cd2a6cb67..b4a566cfe75274f6d8f4bfe8abb9e409a4c2e440 100644 (file)
@@ -1,31 +1,17 @@
 #ifndef QEMU_CPUS_H
 #define QEMU_CPUS_H
 
-#include "qemu/timer.h"
+#include "sysemu/accel-ops.h"
 
-/* cpus.c */
+/* register accel-specific operations */
+void cpus_register_accel(const AccelOpsClass *i);
 
-/* CPU execution threads */
+/* return registers ops */
+const AccelOpsClass *cpus_get_accel(void);
 
-typedef struct CpusAccel {
-    void (*create_vcpu_thread)(CPUState *cpu); /* MANDATORY */
-    void (*kick_vcpu_thread)(CPUState *cpu);
+/* accel/dummy-cpus.c */
 
-    void (*synchronize_post_reset)(CPUState *cpu);
-    void (*synchronize_post_init)(CPUState *cpu);
-    void (*synchronize_state)(CPUState *cpu);
-    void (*synchronize_pre_loadvm)(CPUState *cpu);
-
-    void (*handle_interrupt)(CPUState *cpu, int mask);
-
-    int64_t (*get_virtual_clock)(void);
-    int64_t (*get_elapsed_ticks)(void);
-} CpusAccel;
-
-/* register accel-specific cpus interface implementation */
-void cpus_register_accel(const CpusAccel *i);
-
-/* Create a dummy vcpu for CpusAccel->create_vcpu_thread */
+/* Create a dummy vcpu for AccelOpsClass->create_vcpu_thread */
 void dummy_start_vcpu_thread(CPUState *);
 
 /* interface available for cpus accelerator threads */
@@ -57,18 +43,11 @@ extern int icount_align_option;
 /* Unblock cpu */
 void qemu_cpu_kick_self(void);
 
+bool cpus_are_resettable(void);
+
 void cpu_synchronize_all_states(void);
 void cpu_synchronize_all_post_reset(void);
 void cpu_synchronize_all_post_init(void);
 void cpu_synchronize_all_pre_loadvm(void);
 
-#ifndef CONFIG_USER_ONLY
-/* vl.c */
-/* *-user doesn't have configurable SMP topology */
-extern int smp_cores;
-extern int smp_threads;
-#endif
-
-void list_cpus(const char *optarg);
-
 #endif
index e8cab1356e490a436e9f7ab729e3df9b9a0c6b7c..4c3c22acae5d0f82e57b93a4bdedd2bd84a46be5 100644 (file)
@@ -79,7 +79,7 @@ cryptodev_vhost_init(
  * cryptodev_vhost_cleanup:
  * @crypto: the cryptodev backend common vhost object
  *
- * Clean the resouce associated with @crypto that realizaed
+ * Clean the resource associated with @crypto that realizaed
  * by cryptodev_vhost_init()
  *
  */
index f4d4057d4da6009493e52cc04e7baf01373b9c8b..96d3998b935d7289eaeacc170ee6014ba2a75ec7 100644 (file)
@@ -24,7 +24,9 @@
 #define CRYPTODEV_H
 
 #include "qemu/queue.h"
+#include "qemu/throttle.h"
 #include "qom/object.h"
+#include "qapi/qapi-types-cryptodev.h"
 
 /**
  * CryptoDevBackend:
@@ -48,15 +50,9 @@ typedef struct CryptoDevBackendPeers CryptoDevBackendPeers;
 typedef struct CryptoDevBackendClient
                      CryptoDevBackendClient;
 
-enum CryptoDevBackendAlgType {
-    CRYPTODEV_BACKEND_ALG_SYM,
-    CRYPTODEV_BACKEND_ALG__MAX,
-};
-
 /**
  * CryptoDevBackendSymSessionInfo:
  *
- * @op_code: operation code (refer to virtio_crypto.h)
  * @cipher_alg: algorithm type of CIPHER
  * @key_len: byte length of cipher key
  * @hash_alg: algorithm type of HASH/MAC
@@ -74,7 +70,6 @@ enum CryptoDevBackendAlgType {
  */
 typedef struct CryptoDevBackendSymSessionInfo {
     /* corresponding with virtio crypto spec */
-    uint32_t op_code;
     uint32_t cipher_alg;
     uint32_t key_len;
     uint32_t hash_alg;
@@ -89,11 +84,37 @@ typedef struct CryptoDevBackendSymSessionInfo {
     uint8_t *auth_key;
 } CryptoDevBackendSymSessionInfo;
 
+/**
+ * CryptoDevBackendAsymSessionInfo:
+ */
+typedef struct CryptoDevBackendRsaPara {
+    uint32_t padding_algo;
+    uint32_t hash_algo;
+} CryptoDevBackendRsaPara;
+
+typedef struct CryptoDevBackendAsymSessionInfo {
+    /* corresponding with virtio crypto spec */
+    uint32_t algo;
+    uint32_t keytype;
+    uint32_t keylen;
+    uint8_t *key;
+    union {
+        CryptoDevBackendRsaPara rsa;
+    } u;
+} CryptoDevBackendAsymSessionInfo;
+
+typedef struct CryptoDevBackendSessionInfo {
+    uint32_t op_code;
+    union {
+        CryptoDevBackendSymSessionInfo sym_sess_info;
+        CryptoDevBackendAsymSessionInfo asym_sess_info;
+    } u;
+    uint64_t session_id;
+} CryptoDevBackendSessionInfo;
+
 /**
  * CryptoDevBackendSymOpInfo:
  *
- * @session_id: session index which was previously
- *              created by cryptodev_backend_sym_create_session()
  * @aad_len: byte length of additional authenticated data
  * @iv_len: byte length of initialization vector or counter
  * @src_len: byte length of source data
@@ -119,7 +140,6 @@ typedef struct CryptoDevBackendSymSessionInfo {
  *
  */
 typedef struct CryptoDevBackendSymOpInfo {
-    uint64_t session_id;
     uint32_t aad_len;
     uint32_t iv_len;
     uint32_t src_len;
@@ -138,34 +158,63 @@ typedef struct CryptoDevBackendSymOpInfo {
     uint8_t data[];
 } CryptoDevBackendSymOpInfo;
 
+
+/**
+ * CryptoDevBackendAsymOpInfo:
+ *
+ * @src_len: byte length of source data
+ * @dst_len: byte length of destination data
+ * @src: point to the source data
+ * @dst: point to the destination data
+ *
+ */
+typedef struct CryptoDevBackendAsymOpInfo {
+    uint32_t src_len;
+    uint32_t dst_len;
+    uint8_t *src;
+    uint8_t *dst;
+} CryptoDevBackendAsymOpInfo;
+
+typedef void (*CryptoDevCompletionFunc) (void *opaque, int ret);
+
+typedef struct CryptoDevBackendOpInfo {
+    QCryptodevBackendAlgType algtype;
+    uint32_t op_code;
+    uint32_t queue_index;
+    CryptoDevCompletionFunc cb;
+    void *opaque; /* argument for cb */
+    uint64_t session_id;
+    union {
+        CryptoDevBackendSymOpInfo *sym_op_info;
+        CryptoDevBackendAsymOpInfo *asym_op_info;
+    } u;
+    QTAILQ_ENTRY(CryptoDevBackendOpInfo) next;
+} CryptoDevBackendOpInfo;
+
 struct CryptoDevBackendClass {
     ObjectClass parent_class;
 
     void (*init)(CryptoDevBackend *backend, Error **errp);
     void (*cleanup)(CryptoDevBackend *backend, Error **errp);
 
-    int64_t (*create_session)(CryptoDevBackend *backend,
-                       CryptoDevBackendSymSessionInfo *sess_info,
-                       uint32_t queue_index, Error **errp);
+    int (*create_session)(CryptoDevBackend *backend,
+                          CryptoDevBackendSessionInfo *sess_info,
+                          uint32_t queue_index,
+                          CryptoDevCompletionFunc cb,
+                          void *opaque);
+
     int (*close_session)(CryptoDevBackend *backend,
-                           uint64_t session_id,
-                           uint32_t queue_index, Error **errp);
-    int (*do_sym_op)(CryptoDevBackend *backend,
-                     CryptoDevBackendSymOpInfo *op_info,
-                     uint32_t queue_index, Error **errp);
-};
+                         uint64_t session_id,
+                         uint32_t queue_index,
+                         CryptoDevCompletionFunc cb,
+                         void *opaque);
 
-typedef enum CryptoDevBackendOptionsType {
-    CRYPTODEV_BACKEND_TYPE_NONE = 0,
-    CRYPTODEV_BACKEND_TYPE_BUILTIN = 1,
-    CRYPTODEV_BACKEND_TYPE_VHOST_USER = 2,
-    CRYPTODEV_BACKEND_TYPE__MAX,
-} CryptoDevBackendOptionsType;
+    int (*do_op)(CryptoDevBackend *backend,
+                 CryptoDevBackendOpInfo *op_info);
+};
 
 struct CryptoDevBackendClient {
-    CryptoDevBackendOptionsType type;
-    char *model;
-    char *name;
+    QCryptodevBackendType type;
     char *info_str;
     unsigned int queue_index;
     int vring_enable;
@@ -190,6 +239,7 @@ struct CryptoDevBackendConf {
     uint32_t mac_algo_l;
     uint32_t mac_algo_h;
     uint32_t aead_algo;
+    uint32_t akcipher_algo;
     /* Maximum length of cipher key */
     uint32_t max_cipher_key_len;
     /* Maximum length of authenticated key */
@@ -198,6 +248,24 @@ struct CryptoDevBackendConf {
     uint64_t max_size;
 };
 
+typedef struct CryptodevBackendSymStat {
+    int64_t encrypt_ops;
+    int64_t decrypt_ops;
+    int64_t encrypt_bytes;
+    int64_t decrypt_bytes;
+} CryptodevBackendSymStat;
+
+typedef struct CryptodevBackendAsymStat {
+    int64_t encrypt_ops;
+    int64_t decrypt_ops;
+    int64_t sign_ops;
+    int64_t verify_ops;
+    int64_t encrypt_bytes;
+    int64_t decrypt_bytes;
+    int64_t sign_bytes;
+    int64_t verify_bytes;
+} CryptodevBackendAsymStat;
+
 struct CryptoDevBackend {
     Object parent_obj;
 
@@ -205,15 +273,48 @@ struct CryptoDevBackend {
     /* Tag the cryptodev backend is used by virtio-crypto or not */
     bool is_used;
     CryptoDevBackendConf conf;
+    CryptodevBackendSymStat *sym_stat;
+    CryptodevBackendAsymStat *asym_stat;
+
+    ThrottleState ts;
+    ThrottleTimers tt;
+    ThrottleConfig tc;
+    QTAILQ_HEAD(, CryptoDevBackendOpInfo) opinfos;
 };
 
+#define CryptodevSymStatInc(be, op, bytes) do { \
+   be->sym_stat->op##_bytes += (bytes); \
+   be->sym_stat->op##_ops += 1; \
+} while (/*CONSTCOND*/0)
+
+#define CryptodevSymStatIncEncrypt(be, bytes) \
+            CryptodevSymStatInc(be, encrypt, bytes)
+
+#define CryptodevSymStatIncDecrypt(be, bytes) \
+            CryptodevSymStatInc(be, decrypt, bytes)
+
+#define CryptodevAsymStatInc(be, op, bytes) do { \
+    be->asym_stat->op##_bytes += (bytes); \
+    be->asym_stat->op##_ops += 1; \
+} while (/*CONSTCOND*/0)
+
+#define CryptodevAsymStatIncEncrypt(be, bytes) \
+            CryptodevAsymStatInc(be, encrypt, bytes)
+
+#define CryptodevAsymStatIncDecrypt(be, bytes) \
+            CryptodevAsymStatInc(be, decrypt, bytes)
+
+#define CryptodevAsymStatIncSign(be, bytes) \
+            CryptodevAsymStatInc(be, sign, bytes)
+
+#define CryptodevAsymStatIncVerify(be, bytes) \
+            CryptodevAsymStatInc(be, verify, bytes)
+
+
 /**
  * cryptodev_backend_new_client:
- * @model: the cryptodev backend model
- * @name: the cryptodev backend name, can be NULL
  *
- * Creates a new cryptodev backend client object
- * with the @name in the model @model.
+ * Creates a new cryptodev backend client object.
  *
  * The returned object must be released with
  * cryptodev_backend_free_client() when no
@@ -221,9 +322,8 @@ struct CryptoDevBackend {
  *
  * Returns: a new cryptodev backend client object
  */
-CryptoDevBackendClient *
-cryptodev_backend_new_client(const char *model,
-                                    const char *name);
+CryptoDevBackendClient *cryptodev_backend_new_client(void);
+
 /**
  * cryptodev_backend_free_client:
  * @cc: the cryptodev backend client object
@@ -239,7 +339,7 @@ void cryptodev_backend_free_client(
  * @backend: the cryptodev backend object
  * @errp: pointer to a NULL-initialized error object
  *
- * Clean the resouce associated with @backend that realizaed
+ * Clean the resource associated with @backend that realizaed
  * by the specific backend's init() callback
  */
 void cryptodev_backend_cleanup(
@@ -247,60 +347,67 @@ void cryptodev_backend_cleanup(
            Error **errp);
 
 /**
- * cryptodev_backend_sym_create_session:
+ * cryptodev_backend_create_session:
  * @backend: the cryptodev backend object
  * @sess_info: parameters needed by session creating
  * @queue_index: queue index of cryptodev backend client
  * @errp: pointer to a NULL-initialized error object
+ * @cb: callback when session create is compeleted
+ * @opaque: parameter passed to callback
  *
- * Create a session for symmetric algorithms
+ * Create a session for symmetric/asymmetric algorithms
  *
- * Returns: session id on success, or -1 on error
+ * Returns: 0 for success and cb will be called when creation is completed,
+ * negative value for error, and cb will not be called.
  */
-int64_t cryptodev_backend_sym_create_session(
+int cryptodev_backend_create_session(
            CryptoDevBackend *backend,
-           CryptoDevBackendSymSessionInfo *sess_info,
-           uint32_t queue_index, Error **errp);
+           CryptoDevBackendSessionInfo *sess_info,
+           uint32_t queue_index,
+           CryptoDevCompletionFunc cb,
+           void *opaque);
 
 /**
- * cryptodev_backend_sym_close_session:
+ * cryptodev_backend_close_session:
  * @backend: the cryptodev backend object
  * @session_id: the session id
  * @queue_index: queue index of cryptodev backend client
  * @errp: pointer to a NULL-initialized error object
+ * @cb: callback when session create is compeleted
+ * @opaque: parameter passed to callback
  *
- * Close a session for symmetric algorithms which was previously
- * created by cryptodev_backend_sym_create_session()
+ * Close a session for which was previously
+ * created by cryptodev_backend_create_session()
  *
- * Returns: 0 on success, or Negative on error
+ * Returns: 0 for success and cb will be called when creation is completed,
+ * negative value for error, and cb will not be called.
  */
-int cryptodev_backend_sym_close_session(
+int cryptodev_backend_close_session(
            CryptoDevBackend *backend,
            uint64_t session_id,
-           uint32_t queue_index, Error **errp);
+           uint32_t queue_index,
+           CryptoDevCompletionFunc cb,
+           void *opaque);
 
 /**
  * cryptodev_backend_crypto_operation:
  * @backend: the cryptodev backend object
- * @opaque: pointer to a VirtIOCryptoReq object
- * @queue_index: queue index of cryptodev backend client
- * @errp: pointer to a NULL-initialized error object
+ * @op_info: pointer to a CryptoDevBackendOpInfo object
  *
- * Do crypto operation, such as encryption and
- * decryption
+ * Do crypto operation, such as encryption, decryption, signature and
+ * verification
  *
- * Returns: VIRTIO_CRYPTO_OK on success,
- *         or -VIRTIO_CRYPTO_* on error
+ * Returns: 0 for success and cb will be called when creation is completed,
+ * negative value for error, and cb will not be called.
  */
 int cryptodev_backend_crypto_operation(
                  CryptoDevBackend *backend,
-                 void *opaque,
-                 uint32_t queue_index, Error **errp);
+                 CryptoDevBackendOpInfo *op_info);
 
 /**
  * cryptodev_backend_set_used:
  * @backend: the cryptodev backend object
- * @used: ture or false
+ * @used: true or false
  *
  * Set the cryptodev backend is used by virtio-crypto or not
  */
@@ -320,7 +427,7 @@ bool cryptodev_backend_is_used(CryptoDevBackend *backend);
 /**
  * cryptodev_backend_set_ready:
  * @backend: the cryptodev backend object
- * @ready: ture or false
+ * @ready: true or false
  *
  * Set the cryptodev backend is ready or not, which is called
  * by the children of the cryptodev banckend interface.
index 982c89345f8c8b6286f8470c0b3bef7c589e06b4..8eab39593418aa5dbb605d439df42664a9c2f440 100644 (file)
@@ -70,6 +70,23 @@ int qemu_fdt_setprop_u64(void *fdt, const char *node_path,
                          const char *property, uint64_t val);
 int qemu_fdt_setprop_string(void *fdt, const char *node_path,
                             const char *property, const char *string);
+
+/**
+ * qemu_fdt_setprop_string_array: set a string array property
+ *
+ * @fdt: pointer to the dt blob
+ * @name: node name
+ * @prop: property array
+ * @array: pointer to an array of string pointers
+ * @len: length of array
+ *
+ * assigns a string array to a property. This function converts and
+ * array of strings to a sequential string with \0 separators before
+ * setting the property.
+ */
+int qemu_fdt_setprop_string_array(void *fdt, const char *node_path,
+                                  const char *prop, char **array, int len);
+
 int qemu_fdt_setprop_phandle(void *fdt, const char *node_path,
                              const char *property,
                              const char *target_node_path);
@@ -104,20 +121,20 @@ uint32_t qemu_fdt_get_phandle(void *fdt, const char *path);
 uint32_t qemu_fdt_alloc_phandle(void *fdt);
 int qemu_fdt_nop_node(void *fdt, const char *node_path);
 int qemu_fdt_add_subnode(void *fdt, const char *name);
+int qemu_fdt_add_path(void *fdt, const char *path);
 
 #define qemu_fdt_setprop_cells(fdt, node_path, property, ...)                 \
     do {                                                                      \
         uint32_t qdt_tmp[] = { __VA_ARGS__ };                                 \
-        int i;                                                                \
-                                                                              \
-        for (i = 0; i < ARRAY_SIZE(qdt_tmp); i++) {                           \
-            qdt_tmp[i] = cpu_to_be32(qdt_tmp[i]);                             \
+        for (unsigned i_ = 0; i_ < ARRAY_SIZE(qdt_tmp); i_++) {               \
+            qdt_tmp[i_] = cpu_to_be32(qdt_tmp[i_]);                           \
         }                                                                     \
         qemu_fdt_setprop(fdt, node_path, property, qdt_tmp,                   \
                          sizeof(qdt_tmp));                                    \
     } while (0)
 
 void qemu_fdt_dumpdtb(void *fdt, int size);
+void hmp_dumpdtb(Monitor *mon, const QDict *qdict);
 
 /**
  * qemu_fdt_setprop_sized_cells_from_array:
@@ -178,6 +195,15 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
                                                 qdt_tmp);                 \
     })
 
+
+/**
+ * qemu_fdt_randomize_seeds:
+ * @fdt: device tree blob
+ *
+ * Re-randomize all "rng-seed" properties with new seeds.
+ */
+void qemu_fdt_randomize_seeds(void *fdt);
+
 #define FDT_PCI_RANGE_RELOCATABLE          0x80000000
 #define FDT_PCI_RANGE_PREFETCHABLE         0x40000000
 #define FDT_PCI_RANGE_ALIASED              0x20000000
diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
new file mode 100644 (file)
index 0000000..d11ebbb
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Dirty page rate limit common functions
+ *
+ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
+ *
+ * Authors:
+ *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_DIRTYRLIMIT_H
+#define QEMU_DIRTYRLIMIT_H
+
+#define DIRTYLIMIT_CALC_TIME_MS         1000    /* 1000ms */
+
+int64_t vcpu_dirty_rate_get(int cpu_index);
+void vcpu_dirty_rate_stat_start(void);
+void vcpu_dirty_rate_stat_stop(void);
+void vcpu_dirty_rate_stat_initialize(void);
+void vcpu_dirty_rate_stat_finalize(void);
+
+void dirtylimit_state_lock(void);
+void dirtylimit_state_unlock(void);
+void dirtylimit_state_initialize(void);
+void dirtylimit_state_finalize(void);
+bool dirtylimit_in_service(void);
+bool dirtylimit_vcpu_index_valid(int cpu_index);
+void dirtylimit_process(void);
+void dirtylimit_change(bool start);
+void dirtylimit_set_vcpu(int cpu_index,
+                         uint64_t quota,
+                         bool enable);
+void dirtylimit_set_all(uint64_t quota,
+                        bool enable);
+void dirtylimit_vcpu_execute(CPUState *cpu);
+uint64_t dirtylimit_throttle_time_per_round(void);
+uint64_t dirtylimit_ring_full_time(void);
+#endif
diff --git a/include/sysemu/dirtyrate.h b/include/sysemu/dirtyrate.h
new file mode 100644 (file)
index 0000000..20813f3
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * dirty page rate helper functions
+ *
+ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
+ *
+ * Authors:
+ *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_DIRTYRATE_H
+#define QEMU_DIRTYRATE_H
+
+#include "qapi/qapi-types-migration.h"
+
+typedef struct VcpuStat {
+    int nvcpu; /* number of vcpu */
+    DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */
+} VcpuStat;
+
+int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
+                                 VcpuStat *stat,
+                                 unsigned int flag,
+                                 bool one_shot);
+
+void global_dirty_log_change(unsigned int flag,
+                             bool start);
+#endif
index 80c5bc3e02d12880166f08f06968e1e5a07a2b14..a1ac5bc1b5435be8ef97d39acb0bf99cfb94e032 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * DMA helper functions
  *
- * Copyright (c) 2009 Red Hat
+ * Copyright (c) 2009, 2020 Red Hat
  *
  * This work is licensed under the terms of the GNU General Public License
  * (GNU GPL), version 2 or later.
 #include "block/block.h"
 #include "block/accounting.h"
 
-typedef struct ScatterGatherEntry ScatterGatherEntry;
-
 typedef enum {
     DMA_DIRECTION_TO_DEVICE = 0,
     DMA_DIRECTION_FROM_DEVICE = 1,
 } DMADirection;
 
-struct QEMUSGList {
-    ScatterGatherEntry *sg;
-    int nsg;
-    int nalloc;
-    size_t size;
-    DeviceState *dev;
-    AddressSpace *as;
-};
-
-#ifndef CONFIG_USER_ONLY
-
 /*
  * When an IOMMU is present, bus addresses become distinct from
  * CPU/memory physical addresses and may be a different size.  Because
@@ -45,6 +32,17 @@ typedef uint64_t dma_addr_t;
 #define DMA_ADDR_BITS 64
 #define DMA_ADDR_FMT "%" PRIx64
 
+typedef struct ScatterGatherEntry ScatterGatherEntry;
+
+struct QEMUSGList {
+    ScatterGatherEntry *sg;
+    int nsg;
+    int nalloc;
+    dma_addr_t size;
+    DeviceState *dev;
+    AddressSpace *as;
+};
+
 static inline void dma_barrier(AddressSpace *as, DMADirection dir)
 {
     /*
@@ -73,71 +71,164 @@ static inline void dma_barrier(AddressSpace *as, DMADirection dir)
  * dma_memory_{read,write}() and check for errors */
 static inline bool dma_memory_valid(AddressSpace *as,
                                     dma_addr_t addr, dma_addr_t len,
-                                    DMADirection dir)
+                                    DMADirection dir, MemTxAttrs attrs)
 {
     return address_space_access_valid(as, addr, len,
                                       dir == DMA_DIRECTION_FROM_DEVICE,
-                                      MEMTXATTRS_UNSPECIFIED);
+                                      attrs);
 }
 
-static inline int dma_memory_rw_relaxed(AddressSpace *as, dma_addr_t addr,
-                                        void *buf, dma_addr_t len,
-                                        DMADirection dir)
+static inline MemTxResult dma_memory_rw_relaxed(AddressSpace *as,
+                                                dma_addr_t addr,
+                                                void *buf, dma_addr_t len,
+                                                DMADirection dir,
+                                                MemTxAttrs attrs)
 {
-    return (bool)address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED,
-                                  buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
+    return address_space_rw(as, addr, attrs,
+                            buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
-static inline int dma_memory_read_relaxed(AddressSpace *as, dma_addr_t addr,
-                                          void *buf, dma_addr_t len)
+static inline MemTxResult dma_memory_read_relaxed(AddressSpace *as,
+                                                  dma_addr_t addr,
+                                                  void *buf, dma_addr_t len)
 {
-    return dma_memory_rw_relaxed(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+    return dma_memory_rw_relaxed(as, addr, buf, len,
+                                 DMA_DIRECTION_TO_DEVICE,
+                                 MEMTXATTRS_UNSPECIFIED);
 }
 
-static inline int dma_memory_write_relaxed(AddressSpace *as, dma_addr_t addr,
-                                           const void *buf, dma_addr_t len)
+static inline MemTxResult dma_memory_write_relaxed(AddressSpace *as,
+                                                   dma_addr_t addr,
+                                                   const void *buf,
+                                                   dma_addr_t len)
 {
     return dma_memory_rw_relaxed(as, addr, (void *)buf, len,
-                                 DMA_DIRECTION_FROM_DEVICE);
+                                 DMA_DIRECTION_FROM_DEVICE,
+                                 MEMTXATTRS_UNSPECIFIED);
 }
 
-static inline int dma_memory_rw(AddressSpace *as, dma_addr_t addr,
-                                void *buf, dma_addr_t len,
-                                DMADirection dir)
+/**
+ * dma_memory_rw: Read from or write to an address space from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @len: the number of bytes to read or write
+ * @dir: indicates the transfer direction
+ * @attrs: memory transaction attributes
+ */
+static inline MemTxResult dma_memory_rw(AddressSpace *as, dma_addr_t addr,
+                                        void *buf, dma_addr_t len,
+                                        DMADirection dir, MemTxAttrs attrs)
 {
     dma_barrier(as, dir);
 
-    return dma_memory_rw_relaxed(as, addr, buf, len, dir);
+    return dma_memory_rw_relaxed(as, addr, buf, len, dir, attrs);
 }
 
-static inline int dma_memory_read(AddressSpace *as, dma_addr_t addr,
-                                  void *buf, dma_addr_t len)
+/**
+ * dma_memory_read: Read from an address space from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).  Called within RCU critical section.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @len: length of the data transferred
+ * @attrs: memory transaction attributes
+ */
+static inline MemTxResult dma_memory_read(AddressSpace *as, dma_addr_t addr,
+                                          void *buf, dma_addr_t len,
+                                          MemTxAttrs attrs)
 {
-    return dma_memory_rw(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+    return dma_memory_rw(as, addr, buf, len,
+                         DMA_DIRECTION_TO_DEVICE, attrs);
 }
 
-static inline int dma_memory_write(AddressSpace *as, dma_addr_t addr,
-                                   const void *buf, dma_addr_t len)
+/**
+ * address_space_write: Write to address space from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @len: the number of bytes to write
+ * @attrs: memory transaction attributes
+ */
+static inline MemTxResult dma_memory_write(AddressSpace *as, dma_addr_t addr,
+                                           const void *buf, dma_addr_t len,
+                                           MemTxAttrs attrs)
 {
     return dma_memory_rw(as, addr, (void *)buf, len,
-                         DMA_DIRECTION_FROM_DEVICE);
+                         DMA_DIRECTION_FROM_DEVICE, attrs);
 }
 
-int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t len);
+/**
+ * dma_memory_set: Fill memory with a constant byte from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @c: constant byte to fill the memory
+ * @len: the number of bytes to fill with the constant byte
+ * @attrs: memory transaction attributes
+ */
+MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
+                           uint8_t c, dma_addr_t len, MemTxAttrs attrs);
 
+/**
+ * address_space_map: Map a physical memory region into a host virtual address.
+ *
+ * May map a subset of the requested range, given by and returned in @plen.
+ * May return %NULL and set *@plen to zero(0), if resources needed to perform
+ * the mapping are exhausted.
+ * Use only for reads OR writes - not for read-modify-write operations.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @len: pointer to length of buffer; updated on return
+ * @dir: indicates the transfer direction
+ * @attrs: memory attributes
+ */
 static inline void *dma_memory_map(AddressSpace *as,
                                    dma_addr_t addr, dma_addr_t *len,
-                                   DMADirection dir)
+                                   DMADirection dir, MemTxAttrs attrs)
 {
     hwaddr xlen = *len;
     void *p;
 
     p = address_space_map(as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE,
-                          MEMTXATTRS_UNSPECIFIED);
+                          attrs);
     *len = xlen;
     return p;
 }
 
+/**
+ * address_space_unmap: Unmaps a memory region previously mapped
+ *                      by dma_memory_map()
+ *
+ * Will also mark the memory as dirty if @dir == %DMA_DIRECTION_FROM_DEVICE.
+ * @access_len gives the amount of memory that was actually read or written
+ * by the caller.
+ *
+ * @as: #AddressSpace used
+ * @buffer: host pointer as returned by address_space_map()
+ * @len: buffer length as returned by address_space_map()
+ * @dir: indicates the transfer direction
+ * @access_len: amount of data actually transferred
+ */
 static inline void dma_memory_unmap(AddressSpace *as,
                                     void *buffer, dma_addr_t len,
                                     DMADirection dir, dma_addr_t access_len)
@@ -147,32 +238,34 @@ static inline void dma_memory_unmap(AddressSpace *as,
 }
 
 #define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \
-    static inline uint##_bits##_t ld##_lname##_##_end##_dma(AddressSpace *as, \
-                                                            dma_addr_t addr) \
-    {                                                                   \
-        uint##_bits##_t val;                                            \
-        dma_memory_read(as, addr, &val, (_bits) / 8);                   \
-        return _end##_bits##_to_cpu(val);                               \
-    }                                                                   \
-    static inline void st##_sname##_##_end##_dma(AddressSpace *as,      \
-                                                 dma_addr_t addr,       \
-                                                 uint##_bits##_t val)   \
-    {                                                                   \
-        val = cpu_to_##_end##_bits(val);                                \
-        dma_memory_write(as, addr, &val, (_bits) / 8);                  \
+    static inline MemTxResult ld##_lname##_##_end##_dma(AddressSpace *as, \
+                                                        dma_addr_t addr, \
+                                                        uint##_bits##_t *pval, \
+                                                        MemTxAttrs attrs) \
+    { \
+        MemTxResult res = dma_memory_read(as, addr, pval, (_bits) / 8, attrs); \
+        _end##_bits##_to_cpus(pval); \
+        return res; \
+    } \
+    static inline MemTxResult st##_sname##_##_end##_dma(AddressSpace *as, \
+                                                        dma_addr_t addr, \
+                                                        uint##_bits##_t val, \
+                                                        MemTxAttrs attrs) \
+    { \
+        val = cpu_to_##_end##_bits(val); \
+        return dma_memory_write(as, addr, &val, (_bits) / 8, attrs); \
     }
 
-static inline uint8_t ldub_dma(AddressSpace *as, dma_addr_t addr)
+static inline MemTxResult ldub_dma(AddressSpace *as, dma_addr_t addr,
+                                   uint8_t *val, MemTxAttrs attrs)
 {
-    uint8_t val;
-
-    dma_memory_read(as, addr, &val, 1);
-    return val;
+    return dma_memory_read(as, addr, val, 1, attrs);
 }
 
-static inline void stb_dma(AddressSpace *as, dma_addr_t addr, uint8_t val)
+static inline MemTxResult stb_dma(AddressSpace *as, dma_addr_t addr,
+                                  uint8_t val, MemTxAttrs attrs)
 {
-    dma_memory_write(as, addr, &val, 1);
+    return dma_memory_write(as, addr, &val, 1, attrs);
 }
 
 DEFINE_LDST_DMA(uw, w, 16, le);
@@ -193,7 +286,6 @@ void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
                       AddressSpace *as);
 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
-#endif
 
 typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov,
                               BlockCompletionFunc *cb, void *cb_opaque,
@@ -209,10 +301,24 @@ BlockAIOCB *dma_blk_read(BlockBackend *blk,
 BlockAIOCB *dma_blk_write(BlockBackend *blk,
                           QEMUSGList *sg, uint64_t offset, uint32_t align,
                           BlockCompletionFunc *cb, void *opaque);
-uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg);
-uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg);
+MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual,
+                         QEMUSGList *sg, MemTxAttrs attrs);
+MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual,
+                          QEMUSGList *sg, MemTxAttrs attrs);
 
 void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
                     QEMUSGList *sg, enum BlockAcctType type);
 
+/**
+ * dma_aligned_pow2_mask: Return the address bit mask of the largest
+ * power of 2 size less or equal than @end - @start + 1, aligned with @start,
+ * and bounded by 1 << @max_addr_bits bits.
+ *
+ * @start: range start address
+ * @end: range end address (greater than @start)
+ * @max_addr_bits: max address bits (<= 64)
+ */
+uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end,
+                               int max_addr_bits);
+
 #endif
index e25b02e99013c75316c8b79da9a39803b816b554..743916e46ca137f301d034c295baed8bc8f20d68 100644 (file)
@@ -21,6 +21,10 @@ typedef struct ArchDumpInfo {
     uint32_t page_size;      /* The target's page size. If it's variable and
                               * unknown, then this should be the maximum. */
     uint64_t phys_base;      /* The target's physmem base. */
+    void (*arch_sections_add_fn)(DumpState *s);
+    uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff);
+    int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff);
+    void (*arch_cleanup_fn)(DumpState *s);
 } ArchDumpInfo;
 
 struct GuestPhysBlockList; /* memory_mapping.h */
index 250143cb5a71a9e6f7f98d8201a848bd3444d4da..d702854853f73f4fbf846ba83265aaa13a57a7f1 100644 (file)
@@ -15,6 +15,7 @@
 #define DUMP_H
 
 #include "qapi/qapi-types-dump.h"
+#include "qemu/thread.h"
 
 #define MAKEDUMPFILE_SIGNATURE      "makedumpfile"
 #define MAX_SIZE_MDF_HEADER         (4096) /* max size of makedumpfile_header */
@@ -136,7 +137,7 @@ typedef struct QEMU_PACKED KdumpSubHeader64 {
 } KdumpSubHeader64;
 
 typedef struct DataCache {
-    int fd;             /* fd of the file where to write the cached data */
+    DumpState *state;   /* dump state related to this data */
     uint8_t *buf;       /* buffer for cached data */
     size_t buf_size;    /* size of the buf */
     size_t data_size;   /* size of cached data in buf */
@@ -154,20 +155,36 @@ typedef struct DumpState {
     GuestPhysBlockList guest_phys_blocks;
     ArchDumpInfo dump_info;
     MemoryMappingList list;
-    uint16_t phdr_num;
-    uint32_t sh_info;
-    bool have_section;
     bool resume;
     bool detached;
-    ssize_t note_size;
+    bool kdump_raw;
     hwaddr memory_offset;
     int fd;
 
-    GuestPhysBlock *next_block;
-    ram_addr_t start;
-    bool has_filter;
-    int64_t begin;
-    int64_t length;
+    /*
+     * Dump filter area variables
+     *
+     * A filtered dump only contains the guest memory designated by
+     * the start address and length variables defined below.
+     *
+     * If length is 0, no filtering is applied.
+     */
+    int64_t filter_area_begin;  /* Start address of partial guest memory area */
+    int64_t filter_area_length; /* Length of partial guest memory area */
+
+    /* Elf dump related data */
+    uint32_t phdr_num;
+    uint32_t shdr_num;
+    ssize_t note_size;
+    hwaddr shdr_offset;
+    hwaddr phdr_offset;
+    hwaddr section_offset;
+    hwaddr note_offset;
+
+    void *elf_section_hdrs;     /* Pointer to section header buffer */
+    void *elf_section_data;     /* Pointer to section data buffer */
+    uint64_t elf_section_data_size; /* Size of section data */
+    GArray *string_table_buf;   /* String table data buffer */
 
     uint8_t *note_buf;          /* buffer for notes */
     size_t note_buf_offset;     /* the writing place in note_buf */
@@ -200,4 +217,9 @@ typedef struct DumpState {
 uint16_t cpu_to_dump16(DumpState *s, uint16_t val);
 uint32_t cpu_to_dump32(DumpState *s, uint32_t val);
 uint64_t cpu_to_dump64(DumpState *s, uint64_t val);
+
+int64_t dump_filtered_memblock_size(GuestPhysBlock *block, int64_t filter_area_start,
+                                    int64_t filter_area_length);
+int64_t dump_filtered_memblock_start(GuestPhysBlock *block, int64_t filter_area_start,
+                                     int64_t filter_area_length);
 #endif
diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
new file mode 100644 (file)
index 0000000..a6c24f1
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * QEMU event-loop backend
+ *
+ * Copyright (C) 2022 Red Hat Inc
+ *
+ * Authors:
+ *  Nicolas Saenz Julienne <nsaenzju@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_EVENT_LOOP_BASE_H
+#define QEMU_EVENT_LOOP_BASE_H
+
+#include "qom/object.h"
+#include "block/aio.h"
+
+#define TYPE_EVENT_LOOP_BASE         "event-loop-base"
+OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass,
+                    EVENT_LOOP_BASE)
+
+struct EventLoopBaseClass {
+    ObjectClass parent_class;
+
+    void (*init)(EventLoopBase *base, Error **errp);
+    void (*update_params)(EventLoopBase *base, Error **errp);
+    bool (*can_be_deleted)(EventLoopBase *base);
+};
+
+struct EventLoopBase {
+    Object parent;
+
+    /* AioContext AIO engine parameters */
+    int64_t aio_max_batch;
+
+    /* AioContext thread pool parameters */
+    int64_t thread_pool_min;
+    int64_t thread_pool_max;
+};
+#endif
diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h
deleted file mode 100644 (file)
index 12fb54f..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * QEMU HAXM support
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * Copyright (c) 2011 Intel Corporation
- *  Written by:
- *  Jiang Yunhong<yunhong.jiang@intel.com>
- *  Xin Xiaohui<xiaohui.xin@intel.com>
- *  Zhang Xiantao<xiantao.zhang@intel.com>
- *
- * Copyright 2016 Google, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_HAX_H
-#define QEMU_HAX_H
-
-int hax_sync_vcpus(void);
-
-#ifdef CONFIG_HAX
-
-int hax_enabled(void);
-
-#else /* CONFIG_HAX */
-
-#define hax_enabled() (0)
-
-#endif /* CONFIG_HAX */
-
-#endif /* QEMU_HAX_H */
index df5644723a39523a1a193a828909cfe148bcc1fc..39326f1d4f9c0713fdcf0613623a941157cbeed8 100644 (file)
@@ -18,6 +18,7 @@
 #include "qom/object.h"
 #include "exec/memory.h"
 #include "qemu/bitmap.h"
+#include "qemu/thread-context.h"
 
 #define TYPE_MEMORY_BACKEND "memory-backend"
 OBJECT_DECLARE_TYPE(HostMemoryBackend, HostMemoryBackendClass,
@@ -64,8 +65,9 @@ struct HostMemoryBackend {
     /* protected */
     uint64_t size;
     bool merge, dump, use_canonical_path;
-    bool prealloc, is_mapped, share;
+    bool prealloc, is_mapped, share, reserve;
     uint32_t prealloc_threads;
+    ThreadContext *prealloc_context;
     DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
     HostMemPolicy policy;
 
index f893768df95397f8b104a1a5a16cf1c3aa84af68..4a7c6af3a5f3b9f7e1548e045946827d7a1c02eb 100644 (file)
 #ifndef HVF_H
 #define HVF_H
 
-#include "sysemu/accel.h"
+#include "qemu/accel.h"
 #include "qom/object.h"
 
+#ifdef NEED_CPU_H
+#include "cpu.h"
+
 #ifdef CONFIG_HVF
-uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
-                                 int reg);
 extern bool hvf_allowed;
 #define hvf_enabled() (hvf_allowed)
 #else /* !CONFIG_HVF */
 #define hvf_enabled() 0
-#define hvf_get_supported_cpuid(func, idx, reg) 0
 #endif /* !CONFIG_HVF */
 
+#endif /* NEED_CPU_H */
+
 #define TYPE_HVF_ACCEL ACCEL_CLASS_NAME("hvf")
 
 typedef struct HVFState HVFState;
 DECLARE_INSTANCE_CHECKER(HVFState, HVF_STATE,
                          TYPE_HVF_ACCEL)
 
+#ifdef NEED_CPU_H
+struct hvf_sw_breakpoint {
+    vaddr pc;
+    vaddr saved_insn;
+    int use_count;
+    QTAILQ_ENTRY(hvf_sw_breakpoint) entry;
+};
+
+struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu,
+                                                 vaddr pc);
+int hvf_sw_breakpoints_active(CPUState *cpu);
+
+int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp);
+int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp);
+int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type);
+int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type);
+void hvf_arch_remove_all_hw_breakpoints(void);
+
+/*
+ * hvf_update_guest_debug:
+ * @cs: CPUState for the CPU to update
+ *
+ * Update guest to enable or disable debugging. Per-arch specifics will be
+ * handled by calling down to hvf_arch_update_guest_debug.
+ */
+int hvf_update_guest_debug(CPUState *cpu);
+void hvf_arch_update_guest_debug(CPUState *cpu);
+
+/*
+ * Return whether the guest supports debugging.
+ */
+bool hvf_arch_supports_guest_debug(void);
+#endif /* NEED_CPU_H */
+
 #endif
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
new file mode 100644 (file)
index 0000000..718bedd
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ * QEMU Hypervisor.framework (HVF) support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/* header to be included in HVF-specific code */
+
+#ifndef HVF_INT_H
+#define HVF_INT_H
+
+#ifdef __aarch64__
+#include <Hypervisor/Hypervisor.h>
+#else
+#include <Hypervisor/hv.h>
+#endif
+
+/* hvf_slot flags */
+#define HVF_SLOT_LOG (1 << 0)
+
+typedef struct hvf_slot {
+    uint64_t start;
+    uint64_t size;
+    uint8_t *mem;
+    int slot_id;
+    uint32_t flags;
+    MemoryRegion *region;
+} hvf_slot;
+
+typedef struct hvf_vcpu_caps {
+    uint64_t vmx_cap_pinbased;
+    uint64_t vmx_cap_procbased;
+    uint64_t vmx_cap_procbased2;
+    uint64_t vmx_cap_entry;
+    uint64_t vmx_cap_exit;
+    uint64_t vmx_cap_preemption_timer;
+} hvf_vcpu_caps;
+
+struct HVFState {
+    AccelState parent;
+    hvf_slot slots[32];
+    int num_slots;
+
+    hvf_vcpu_caps *hvf_caps;
+    uint64_t vtimer_offset;
+    QTAILQ_HEAD(, hvf_sw_breakpoint) hvf_sw_breakpoints;
+};
+extern HVFState *hvf_state;
+
+struct AccelCPUState {
+    uint64_t fd;
+    void *exit;
+    bool vtimer_masked;
+    sigset_t unblock_ipi_mask;
+    bool guest_debug_enabled;
+};
+
+void assert_hvf_ok(hv_return_t ret);
+int hvf_arch_init(void);
+int hvf_arch_init_vcpu(CPUState *cpu);
+void hvf_arch_vcpu_destroy(CPUState *cpu);
+int hvf_vcpu_exec(CPUState *);
+hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
+int hvf_put_registers(CPUState *);
+int hvf_get_registers(CPUState *);
+void hvf_kick_vcpu_thread(CPUState *cpu);
+
+#endif
index ffed6192a371a29fca9811660eece713698b8dc8..c71b77e71f34cc8089b643bca8c4af5e889040ff 100644 (file)
 #define QEMU_HW_ACCEL_H
 
 #include "hw/core/cpu.h"
-#include "sysemu/hax.h"
 #include "sysemu/kvm.h"
 #include "sysemu/hvf.h"
 #include "sysemu/whpx.h"
+#include "sysemu/nvmm.h"
 
 void cpu_synchronize_state(CPUState *cpu);
 void cpu_synchronize_post_reset(CPUState *cpu);
index 0c5284dbbcf60dbea2035404f6baa2eb9fd127f5..2102a90eca69768431d7233c5b01cff1f6b6f5eb 100644 (file)
 #include "block/aio.h"
 #include "qemu/thread.h"
 #include "qom/object.h"
+#include "sysemu/event-loop-base.h"
 
 #define TYPE_IOTHREAD "iothread"
 
 struct IOThread {
-    Object parent_obj;
+    EventLoopBase parent_obj;
 
     QemuThread thread;
     AioContext *ctx;
@@ -57,4 +58,10 @@ IOThread *iothread_create(const char *id, Error **errp);
 void iothread_stop(IOThread *iothread);
 void iothread_destroy(IOThread *iothread);
 
+/*
+ * Returns true if executing within IOThread context,
+ * false otherwise.
+ */
+bool qemu_in_iothread(void);
+
 #endif /* IOTHREAD_H */
index bb5d5cf49710112779fd575e301ee7a24e1bc924..d6148781642107de0995af400331bbde126791b8 100644 (file)
  *
  */
 
+/* header to be included in non-KVM-specific code */
+
 #ifndef QEMU_KVM_H
 #define QEMU_KVM_H
 
-#include "qemu/queue.h"
-#include "hw/core/cpu.h"
 #include "exec/memattrs.h"
-#include "sysemu/accel.h"
+#include "qemu/accel.h"
 #include "qom/object.h"
 
 #ifdef NEED_CPU_H
@@ -36,15 +36,11 @@ extern bool kvm_kernel_irqchip;
 extern bool kvm_split_irqchip;
 extern bool kvm_async_interrupts_allowed;
 extern bool kvm_halt_in_kernel_allowed;
-extern bool kvm_eventfds_allowed;
-extern bool kvm_irqfds_allowed;
 extern bool kvm_resamplefds_allowed;
 extern bool kvm_msi_via_irqfd_allowed;
 extern bool kvm_gsi_routing_allowed;
 extern bool kvm_gsi_direct_mapping;
 extern bool kvm_readonly_mem_allowed;
-extern bool kvm_direct_msi_allowed;
-extern bool kvm_ioeventfd_any_length_allowed;
 extern bool kvm_msi_use_devid;
 
 #define kvm_enabled()           (kvm_allowed)
@@ -88,23 +84,16 @@ extern bool kvm_msi_use_devid;
  */
 #define kvm_halt_in_kernel() (kvm_halt_in_kernel_allowed)
 
-/**
- * kvm_eventfds_enabled:
- *
- * Returns: true if we can use eventfds to receive notifications
- * from a KVM CPU (ie the kernel supports eventds and we are running
- * with a configuration where it is meaningful to use them).
- */
-#define kvm_eventfds_enabled() (kvm_eventfds_allowed)
-
 /**
  * kvm_irqfds_enabled:
  *
  * Returns: true if we can use irqfds to inject interrupts into
  * a KVM CPU (ie the kernel supports irqfds and we are running
  * with a configuration where it is meaningful to use them).
+ *
+ * Always available if running with in-kernel irqchip.
  */
-#define kvm_irqfds_enabled() (kvm_irqfds_allowed)
+#define kvm_irqfds_enabled() kvm_irqchip_in_kernel()
 
 /**
  * kvm_resamplefds_enabled:
@@ -147,19 +136,6 @@ extern bool kvm_msi_use_devid;
  */
 #define kvm_readonly_mem_enabled() (kvm_readonly_mem_allowed)
 
-/**
- * kvm_direct_msi_enabled:
- *
- * Returns: true if KVM allows direct MSI injection.
- */
-#define kvm_direct_msi_enabled() (kvm_direct_msi_allowed)
-
-/**
- * kvm_ioeventfd_any_length_enabled:
- * Returns: true if KVM allows any length io eventfd.
- */
-#define kvm_ioeventfd_any_length_enabled() (kvm_ioeventfd_any_length_allowed)
-
 /**
  * kvm_msi_devid_required:
  * Returns: true if KVM requires a device id to be provided while
@@ -174,21 +150,17 @@ extern bool kvm_msi_use_devid;
 #define kvm_irqchip_is_split() (false)
 #define kvm_async_interrupts_enabled() (false)
 #define kvm_halt_in_kernel() (false)
-#define kvm_eventfds_enabled() (false)
 #define kvm_irqfds_enabled() (false)
 #define kvm_resamplefds_enabled() (false)
 #define kvm_msi_via_irqfd_enabled() (false)
 #define kvm_gsi_routing_allowed() (false)
 #define kvm_gsi_direct_mapping() (false)
 #define kvm_readonly_mem_enabled() (false)
-#define kvm_direct_msi_enabled() (false)
-#define kvm_ioeventfd_any_length_enabled() (false)
 #define kvm_msi_devid_required() (false)
 
 #endif  /* CONFIG_KVM_IS_POSSIBLE */
 
 struct kvm_run;
-struct kvm_lapic_state;
 struct kvm_irq_routing_entry;
 
 typedef struct KVMCapabilityInfo {
@@ -209,18 +181,19 @@ DECLARE_INSTANCE_CHECKER(KVMState, KVM_STATE,
 extern KVMState *kvm_state;
 typedef struct Notifier Notifier;
 
+typedef struct KVMRouteChange {
+     KVMState *s;
+     int changes;
+} KVMRouteChange;
+
 /* external API */
 
-bool kvm_has_free_slot(MachineState *ms);
+unsigned int kvm_get_max_memslots(void);
+unsigned int kvm_get_free_memslots(void);
 bool kvm_has_sync_mmu(void);
 int kvm_has_vcpu_events(void);
-int kvm_has_robust_singlestep(void);
-int kvm_has_debugregs(void);
 int kvm_max_nested_state_length(void);
-int kvm_has_pit_state2(void);
-int kvm_has_many_ioeventfds(void);
 int kvm_has_gsi_routing(void);
-int kvm_has_intx_set_mask(void);
 
 /**
  * kvm_arm_supports_user_irq
@@ -233,41 +206,32 @@ int kvm_has_intx_set_mask(void);
  */
 bool kvm_arm_supports_user_irq(void);
 
-/**
- * kvm_memcrypt_enabled - return boolean indicating whether memory encryption
- *                        is enabled
- * Returns: 1 memory encryption is enabled
- *          0 memory encryption is disabled
- */
-bool kvm_memcrypt_enabled(void);
-
-/**
- * kvm_memcrypt_encrypt_data: encrypt the memory range
- *
- * Return: 1 failed to encrypt the range
- *         0 succesfully encrypted memory region
- */
-int kvm_memcrypt_encrypt_data(uint8_t *ptr, uint64_t len);
 
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
+int kvm_on_sigbus(int code, void *addr);
 
 #ifdef NEED_CPU_H
 #include "cpu.h"
 
 void kvm_flush_coalesced_mmio_buffer(void);
 
-int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
-                          target_ulong len, int type);
-int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
-                          target_ulong len, int type);
-void kvm_remove_all_breakpoints(CPUState *cpu);
+/**
+ * kvm_update_guest_debug(): ensure KVM debug structures updated
+ * @cs: the CPUState for this cpu
+ * @reinject_trap: KVM trap injection control
+ *
+ * There are usually per-arch specifics which will be handled by
+ * calling down to kvm_arch_update_guest_debug after the generic
+ * fields have been set.
+ */
+#ifdef KVM_CAP_SET_GUEST_DEBUG
 int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap);
-
-int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
-int kvm_on_sigbus(int code, void *addr);
-
-/* interface with exec.c */
-
-void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align, bool shared));
+#else
+static inline int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
+{
+    return -EINVAL;
+}
+#endif
 
 /* internal API */
 
@@ -353,6 +317,8 @@ bool kvm_device_supported(int vmfd, uint64_t type);
 
 extern const KVMCapabilityInfo kvm_arch_required_capabilities[];
 
+void kvm_arch_accel_class_init(ObjectClass *oc);
+
 void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run);
 MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run);
 
@@ -371,6 +337,8 @@ int kvm_arch_get_registers(CPUState *cpu);
 
 int kvm_arch_put_registers(CPUState *cpu, int level);
 
+int kvm_arch_get_default_type(MachineState *ms);
+
 int kvm_arch_init(MachineState *ms, KVMState *s);
 
 int kvm_arch_init_vcpu(CPUState *cpu);
@@ -407,20 +375,18 @@ void kvm_irqchip_add_change_notifier(Notifier *n);
 void kvm_irqchip_remove_change_notifier(Notifier *n);
 void kvm_irqchip_change_notify(void);
 
-void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
-
 struct kvm_guest_debug;
 struct kvm_debug_exit_arch;
 
 struct kvm_sw_breakpoint {
-    target_ulong pc;
-    target_ulong saved_insn;
+    vaddr pc;
+    vaddr saved_insn;
     int use_count;
     QTAILQ_ENTRY(kvm_sw_breakpoint) entry;
 };
 
 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
-                                                 target_ulong pc);
+                                                 vaddr pc);
 
 int kvm_sw_breakpoints_active(CPUState *cpu);
 
@@ -428,10 +394,8 @@ int kvm_arch_insert_sw_breakpoint(CPUState *cpu,
                                   struct kvm_sw_breakpoint *bp);
 int kvm_arch_remove_sw_breakpoint(CPUState *cpu,
                                   struct kvm_sw_breakpoint *bp);
-int kvm_arch_insert_hw_breakpoint(target_ulong addr,
-                                  target_ulong len, int type);
-int kvm_arch_remove_hw_breakpoint(target_ulong addr,
-                                  target_ulong len, int type);
+int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type);
+int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type);
 void kvm_arch_remove_all_hw_breakpoints(void);
 
 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg);
@@ -466,17 +430,10 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension);
         kvm_vcpu_ioctl(cpu, KVM_ENABLE_CAP, &cap);                   \
     })
 
-uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
-                                      uint32_t index, int reg);
-uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index);
-
-
 void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
 
-#if !defined(CONFIG_USER_ONLY)
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
                                        hwaddr *phys_addr);
-#endif
 
 #endif /* NEED_CPU_H */
 
@@ -486,7 +443,7 @@ void kvm_init_cpu_signals(CPUState *cpu);
 
 /**
  * kvm_irqchip_add_msi_route - Add MSI route for specific vector
- * @s:      KVM state
+ * @c:      KVMRouteChange instance.
  * @vector: which vector to add. This can be either MSI/MSIX
  *          vector. The function will automatically detect whether
  *          MSI/MSIX is enabled, and fetch corresponding MSI
@@ -495,10 +452,24 @@ void kvm_init_cpu_signals(CPUState *cpu);
  *          as @NULL, an empty MSI message will be inited.
  * @return: virq (>=0) when success, errno (<0) when failed.
  */
-int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev);
+int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev);
 int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
                                  PCIDevice *dev);
 void kvm_irqchip_commit_routes(KVMState *s);
+
+static inline KVMRouteChange kvm_irqchip_begin_route_changes(KVMState *s)
+{
+    return (KVMRouteChange) { .s = s, .changes = 0 };
+}
+
+static inline void kvm_irqchip_commit_route_changes(KVMRouteChange *c)
+{
+    if (c->changes) {
+        kvm_irqchip_commit_routes(c->s);
+        c->changes = 0;
+    }
+}
+
 void kvm_irqchip_release_virq(KVMState *s, int virq);
 
 int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter);
@@ -513,7 +484,6 @@ int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
 int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n,
                                       qemu_irq irq);
 void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi);
-void kvm_pc_setup_irq_routing(bool pci_enabled);
 void kvm_init_irq_routing(KVMState *s);
 
 bool kvm_kernel_irqchip_allowed(void);
@@ -551,10 +521,21 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source);
  * Returns: 0 on success, or a negative errno on failure.
  */
 int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
-struct ppc_radix_page_info *kvm_get_radix_page_info(void);
-int kvm_get_max_memslots(void);
 
 /* Notify resamplefd for EOI of specific interrupts. */
 void kvm_resample_fd_notify(int gsi);
 
+/**
+ * kvm_cpu_check_are_resettable - return whether CPUs can be reset
+ *
+ * Returns: true: CPUs are resettable
+ *          false: CPUs are not resettable
+ */
+bool kvm_cpu_check_are_resettable(void);
+
+bool kvm_arch_cpu_check_are_resettable(void);
+
+bool kvm_dirty_ring_enabled(void);
+
+uint32_t kvm_dirty_ring_size(void);
 #endif
index 65740806dacaf5b9615f86d0baf4d5da1beb36b0..fd846394be104b9b56b8f4c0b7b56305ac486a9a 100644 (file)
@@ -10,7 +10,9 @@
 #define QEMU_KVM_INT_H
 
 #include "exec/memory.h"
-#include "sysemu/accel.h"
+#include "qapi/qapi-types-common.h"
+#include "qemu/accel.h"
+#include "qemu/queue.h"
 #include "sysemu/kvm.h"
 
 typedef struct KVMSlot
@@ -23,18 +25,105 @@ typedef struct KVMSlot
     int old_flags;
     /* Dirty bitmap cache for the slot */
     unsigned long *dirty_bmap;
+    unsigned long dirty_bmap_size;
+    /* Cache of the address space ID */
+    int as_id;
+    /* Cache of the offset in ram address space */
+    ram_addr_t ram_start_offset;
 } KVMSlot;
 
+typedef struct KVMMemoryUpdate {
+    QSIMPLEQ_ENTRY(KVMMemoryUpdate) next;
+    MemoryRegionSection section;
+} KVMMemoryUpdate;
+
 typedef struct KVMMemoryListener {
     MemoryListener listener;
-    /* Protects the slots and all inside them */
-    QemuMutex slots_lock;
     KVMSlot *slots;
+    unsigned int nr_used_slots;
     int as_id;
+    QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
+    QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;
 } KVMMemoryListener;
 
+#define KVM_MSI_HASHTAB_SIZE    256
+
+enum KVMDirtyRingReaperState {
+    KVM_DIRTY_RING_REAPER_NONE = 0,
+    /* The reaper is sleeping */
+    KVM_DIRTY_RING_REAPER_WAIT,
+    /* The reaper is reaping for dirty pages */
+    KVM_DIRTY_RING_REAPER_REAPING,
+};
+
+/*
+ * KVM reaper instance, responsible for collecting the KVM dirty bits
+ * via the dirty ring.
+ */
+struct KVMDirtyRingReaper {
+    /* The reaper thread */
+    QemuThread reaper_thr;
+    volatile uint64_t reaper_iteration; /* iteration number of reaper thr */
+    volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */
+};
+struct KVMState
+{
+    AccelState parent_obj;
+
+    int nr_slots;
+    int fd;
+    int vmfd;
+    int coalesced_mmio;
+    int coalesced_pio;
+    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
+    bool coalesced_flush_in_progress;
+    int vcpu_events;
+#ifdef KVM_CAP_SET_GUEST_DEBUG
+    QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints;
+#endif
+    int max_nested_state_len;
+    int kvm_shadow_mem;
+    bool kernel_irqchip_allowed;
+    bool kernel_irqchip_required;
+    OnOffAuto kernel_irqchip_split;
+    bool sync_mmu;
+    uint64_t manual_dirty_log_protect;
+    /* The man page (and posix) say ioctl numbers are signed int, but
+     * they're not.  Linux, glibc and *BSD all treat ioctl numbers as
+     * unsigned, and treating them as signed here can break things */
+    unsigned irq_set_ioctl;
+    unsigned int sigmask_len;
+    GHashTable *gsimap;
+#ifdef KVM_CAP_IRQ_ROUTING
+    struct kvm_irq_routing *irq_routes;
+    int nr_allocated_irq_routes;
+    unsigned long *used_gsi_bitmap;
+    unsigned int gsi_count;
+#endif
+    KVMMemoryListener memory_listener;
+    QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus;
+
+    /* For "info mtree -f" to tell if an MR is registered in KVM */
+    int nr_as;
+    struct KVMAs {
+        KVMMemoryListener *ml;
+        AddressSpace *as;
+    } *as;
+    uint64_t kvm_dirty_ring_bytes;  /* Size of the per-vcpu dirty ring */
+    uint32_t kvm_dirty_ring_size;   /* Number of dirty GFNs per ring */
+    bool kvm_dirty_ring_with_bitmap;
+    uint64_t kvm_eager_split_size;  /* Eager Page Splitting chunk size */
+    struct KVMDirtyRingReaper reaper;
+    NotifyVmexitOption notify_vmexit;
+    uint32_t notify_window;
+    uint32_t xen_version;
+    uint32_t xen_caps;
+    uint16_t xen_gnttab_max_frames;
+    uint16_t xen_evtchn_max_pirq;
+};
+
 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
-                                  AddressSpace *as, int as_id);
+                                  AddressSpace *as, int as_id, const char *name);
 
 void kvm_set_max_memslot_size(hwaddr max_slot_size);
 
diff --git a/include/sysemu/kvm_xen.h b/include/sysemu/kvm_xen.h
new file mode 100644 (file)
index 0000000..961c702
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Xen HVM emulation support in KVM
+ *
+ * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_SYSEMU_KVM_XEN_H
+#define QEMU_SYSEMU_KVM_XEN_H
+
+/* The KVM API uses these to indicate "no GPA" or "no GFN" */
+#define INVALID_GPA UINT64_MAX
+#define INVALID_GFN UINT64_MAX
+
+/* QEMU plays the rôle of dom0 for "interdomain" communication. */
+#define DOMID_QEMU  0
+
+int kvm_xen_soft_reset(void);
+uint32_t kvm_xen_get_caps(void);
+void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id);
+bool kvm_xen_has_vcpu_callback_vector(void);
+void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type);
+void kvm_xen_set_callback_asserted(void);
+int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port);
+uint16_t kvm_xen_get_gnttab_max_frames(void);
+uint16_t kvm_xen_get_evtchn_max_pirq(void);
+
+#define kvm_xen_has_cap(cap) (!!(kvm_xen_get_caps() &           \
+                                 KVM_XEN_HVM_CONFIG_ ## cap))
+
+#define XEN_SPECIAL_AREA_ADDR 0xfeff8000UL
+#define XEN_SPECIAL_AREA_SIZE 0x4000UL
+
+#define XEN_SPECIALPAGE_CONSOLE     0
+#define XEN_SPECIALPAGE_XENSTORE    1
+
+#define XEN_SPECIAL_PFN(x) ((XEN_SPECIAL_AREA_ADDR >> TARGET_PAGE_BITS) + \
+                            XEN_SPECIALPAGE_##x)
+
+#endif /* QEMU_SYSEMU_KVM_XEN_H */
index 4b20f1a639e1a9d75b243fdb44bc018cd0bb87d2..021e0a623091b08a42fe5a0941cf2289f0fdad32 100644 (file)
@@ -15,8 +15,7 @@
 #define MEMORY_MAPPING_H
 
 #include "qemu/queue.h"
-#include "exec/cpu-defs.h"
-#include "exec/memory.h"
+#include "exec/cpu-common.h"
 
 typedef struct GuestPhysBlock {
     /* visible to guest, reflects PCI hole, etc */
@@ -43,7 +42,7 @@ typedef struct GuestPhysBlockList {
 /* The physical and virtual address in the memory mapping are contiguous. */
 typedef struct MemoryMapping {
     hwaddr phys_addr;
-    target_ulong virt_addr;
+    vaddr virt_addr;
     ram_addr_t length;
     QTAILQ_ENTRY(MemoryMapping) next;
 } MemoryMapping;
@@ -72,7 +71,7 @@ void guest_phys_blocks_free(GuestPhysBlockList *list);
 void guest_phys_blocks_init(GuestPhysBlockList *list);
 void guest_phys_blocks_append(GuestPhysBlockList *list);
 
-void qemu_get_guest_memory_mapping(MemoryMappingList *list,
+bool qemu_get_guest_memory_mapping(MemoryMappingList *list,
                                    const GuestPhysBlockList *guest_phys_blocks,
                                    Error **errp);
 
diff --git a/include/sysemu/nvmm.h b/include/sysemu/nvmm.h
new file mode 100644 (file)
index 0000000..be7bc9a
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
+ *
+ * NetBSD Virtual Machine Monitor (NVMM) accelerator support.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/* header to be included in non-NVMM-specific code */
+
+#ifndef QEMU_NVMM_H
+#define QEMU_NVMM_H
+
+#ifdef NEED_CPU_H
+
+#ifdef CONFIG_NVMM
+
+int nvmm_enabled(void);
+
+#else /* CONFIG_NVMM */
+
+#define nvmm_enabled() (0)
+
+#endif /* CONFIG_NVMM */
+
+#endif /* NEED_CPU_H */
+
+#endif /* QEMU_NVMM_H */
index 629c8c648b7abb1e7bf6277925b826bcb10ca13b..dff32ae185a613ef54ec794bac656481b027991c 100644 (file)
 #include <sys/sysmacros.h>
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void os_set_line_buffering(void);
+void os_setup_early_signal_handling(void);
 void os_set_proc_name(const char *s);
 void os_setup_signal_handling(void);
+int os_set_daemonize(bool d);
+bool is_daemonized(void);
 void os_daemonize(void);
+bool os_set_runas(const char *user_id);
+void os_set_chroot(const char *path);
 void os_setup_post(void);
 int os_mlock(void);
 
-#define closesocket(s) close(s)
-#define ioctlsocket(s, r, v) ioctl(s, r, v)
-
-typedef struct timeval qemu_timeval;
-#define qemu_gettimeofday(tp) gettimeofday(tp, NULL)
-
-bool is_daemonized(void);
-
 /**
  * qemu_alloc_stack:
  * @sz: pointer to a size_t holding the requested usable stack size
@@ -92,4 +93,8 @@ static inline void qemu_funlockfile(FILE *f)
     funlockfile(f);
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif
index 5346d51e890eb1614bb040d725f0b90d7c9d3f66..1047d260cbd4ff618ad407e69567b855048ac8c0 100644 (file)
 #include <winsock2.h>
 #include <windows.h>
 #include <ws2tcpip.h>
+#include "qemu/typedefs.h"
 
-#if defined(_WIN64)
-/* On w64, setjmp is implemented by _setjmp which needs a second parameter.
+#ifdef HAVE_AFUNIX_H
+#include <afunix.h>
+#else
+/*
+ * Fallback definitions of things we need in afunix.h, if not available from
+ * the used Windows SDK or MinGW headers.
+ */
+#define UNIX_PATH_MAX 108
+
+typedef struct sockaddr_un {
+    ADDRESS_FAMILY sun_family;
+    char sun_path[UNIX_PATH_MAX];
+} SOCKADDR_UN, *PSOCKADDR_UN;
+
+#define SIO_AF_UNIX_GETPEERPID _WSAIOR(IOC_VENDOR, 256)
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__aarch64__)
+/*
+ * On windows-arm64, setjmp is available in only one variant, and longjmp always
+ * does stack unwinding. This crash with generated code.
+ * Thus, we use another implementation of setjmp (not windows one), coming from
+ * mingw, which never performs stack unwinding.
+ */
+#undef setjmp
+#undef longjmp
+/*
+ * These functions are not declared in setjmp.h because __aarch64__ defines
+ * setjmp to _setjmpex instead. However, they are still defined in libmingwex.a,
+ * which gets linked automatically.
+ */
+int __mingw_setjmp(jmp_buf);
+void __attribute__((noreturn)) __mingw_longjmp(jmp_buf, int);
+#define setjmp(env) __mingw_setjmp(env)
+#define longjmp(env, val) __mingw_longjmp(env, val)
+#elif defined(_WIN64)
+/*
+ * On windows-x64, setjmp is implemented by _setjmp which needs a second parameter.
  * If this parameter is NULL, longjump does no stack unwinding.
  * That is what we need for QEMU. Passing the value of register rsp (default)
- * lets longjmp try a stack unwinding which will crash with generated code. */
+ * lets longjmp try a stack unwinding which will crash with generated code.
+ */
 # undef setjmp
 # define setjmp(env) _setjmp(env, NULL)
-#endif
+#endif /* __aarch64__ */
 /* QEMU uses sigsetjmp()/siglongjmp() as the portable way to specify
  * "longjmp and don't touch the signal masks". Since we know that the
  * savemask parameter will always be zero we can safely define these
@@ -58,8 +100,9 @@ struct tm *localtime_r(const time_t *timep, struct tm *result);
 static inline void os_setup_signal_handling(void) {}
 static inline void os_daemonize(void) {}
 static inline void os_setup_post(void) {}
-void os_set_line_buffering(void);
 static inline void os_set_proc_name(const char *dummy) {}
+void os_set_line_buffering(void);
+void os_setup_early_signal_handling(void);
 
 int getpagesize(void);
 
@@ -67,11 +110,13 @@ int getpagesize(void);
 # define EPROTONOSUPPORT EINVAL
 #endif
 
-typedef struct {
-    long tv_sec;
-    long tv_usec;
-} qemu_timeval;
-int qemu_gettimeofday(qemu_timeval *tp);
+static inline int os_set_daemonize(bool d)
+{
+    if (d) {
+        return -ENOTSUP;
+    }
+    return 0;
+}
 
 static inline bool is_daemonized(void)
 {
@@ -101,25 +146,48 @@ static inline char *realpath(const char *path, char *resolved_path)
     return resolved_path;
 }
 
-/* ??? Mingw appears to export _lock_file and _unlock_file as the functions
- * with which to lock a stdio handle.  But something is wrong in the markup,
- * either in the header or the library, such that we get undefined references
- * to "_imp___lock_file" etc when linking.  Since we seem to have no other
- * alternative, and the usage within the logging functions isn't critical,
- * ignore FILE locking.
+/*
+ * Older versions of MinGW do not import _lock_file and _unlock_file properly.
+ * This was fixed for v6.0.0 with commit b48e3ac8969d.
  */
-
 static inline void qemu_flockfile(FILE *f)
 {
+#ifdef HAVE__LOCK_FILE
+    _lock_file(f);
+#endif
 }
 
 static inline void qemu_funlockfile(FILE *f)
 {
+#ifdef HAVE__LOCK_FILE
+    _unlock_file(f);
+#endif
 }
 
-/* We wrap all the sockets functions so that we can
- * set errno based on WSAGetLastError()
+/* Helper for WSAEventSelect, to report errors */
+bool qemu_socket_select(int sockfd, WSAEVENT hEventObject,
+                        long lNetworkEvents, Error **errp);
+
+bool qemu_socket_unselect(int sockfd, Error **errp);
+
+/* We wrap all the sockets functions so that we can set errno based on
+ * WSAGetLastError(), and use file-descriptors instead of SOCKET.
+ */
+
+/*
+ * qemu_close_socket_osfhandle:
+ * @fd: a file descriptor associated with a SOCKET
+ *
+ * Close only the C run-time file descriptor, leave the SOCKET opened.
+ *
+ * Returns zero on success. On error, -1 is returned, and errno is set to
+ * indicate the error.
  */
+int qemu_close_socket_osfhandle(int fd);
+
+#undef close
+#define close qemu_close_wrap
+int qemu_close_wrap(int fd);
 
 #undef connect
 #define connect qemu_connect_wrap
@@ -152,10 +220,6 @@ int qemu_shutdown_wrap(int sockfd, int how);
 #define ioctlsocket qemu_ioctlsocket_wrap
 int qemu_ioctlsocket_wrap(int fd, int req, void *val);
 
-#undef closesocket
-#define closesocket qemu_closesocket_wrap
-int qemu_closesocket_wrap(int fd);
-
 #undef getsockopt
 #define getsockopt qemu_getsockopt_wrap
 int qemu_getsockopt_wrap(int sockfd, int level, int optname,
@@ -194,4 +258,15 @@ ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags);
 ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags,
                            struct sockaddr *addr, socklen_t *addrlen);
 
+EXCEPTION_DISPOSITION
+win32_close_exception_handler(struct _EXCEPTION_RECORD*, void*,
+                              struct _CONTEXT*, void*);
+
+void *qemu_win32_map_alloc(size_t size, HANDLE *h, Error **errp);
+void qemu_win32_map_free(void *ptr, HANDLE h, Error **errp);
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif
index 4c53537ef3d2107982fa7be87a5ea2c4eceae0b3..85f05b0e46a82de16e2e15b43c73c1a24cb9f47c 100644 (file)
@@ -14,6 +14,7 @@
 #ifndef QTEST_H
 #define QTEST_H
 
+#include "chardev/char.h"
 
 extern bool qtest_allowed;
 
@@ -22,6 +23,9 @@ static inline bool qtest_enabled(void)
     return qtest_allowed;
 }
 
+void qtest_send_prefix(CharBackend *chr);
+void G_GNUC_PRINTF(2, 3) qtest_sendf(CharBackend *chr, const char *fmt, ...);
+void qtest_set_command_cb(bool (*pc_cb)(CharBackend *chr, gchar **words));
 bool qtest_driver(void);
 
 void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **errp);
index 56c0c17c30b02a1280ea7f3d3b6782657a07439f..08aae5869fcac6fe8a1e15ba0e4c13837d2db140 100644 (file)
@@ -1,8 +1,8 @@
-#ifndef REPLAY_H
-#define REPLAY_H
+#ifndef SYSEMU_REPLAY_H
+#define SYSEMU_REPLAY_H
 
 /*
- * replay.h
+ * QEMU replay (system interface)
  *
  * Copyright (c) 2010-2015 Institute for System Programming
  *                         of the Russian Academy of Sciences.
@@ -12,9 +12,9 @@
  *
  */
 
+#include "exec/replay-core.h"
 #include "qapi/qapi-types-misc.h"
 #include "qapi/qapi-types-run-state.h"
-#include "qapi/qapi-types-replay.h"
 #include "qapi/qapi-types-ui.h"
 #include "block/aio.h"
 
@@ -45,8 +45,6 @@ typedef enum ReplayCheckpoint ReplayCheckpoint;
 
 typedef struct ReplayNetState ReplayNetState;
 
-extern ReplayMode replay_mode;
-
 /* Name of the initial VM snapshot */
 extern char *replay_snapshot;
 
@@ -63,40 +61,6 @@ extern char *replay_snapshot;
 void replay_mutex_lock(void);
 void replay_mutex_unlock(void);
 
-/* Replay process control functions */
-
-/*! Enables recording or saving event log with specified parameters */
-void replay_configure(struct QemuOpts *opts);
-/*! Initializes timers used for snapshotting and enables events recording */
-void replay_start(void);
-/*! Closes replay log file and frees other resources. */
-void replay_finish(void);
-/*! Adds replay blocker with the specified error description */
-void replay_add_blocker(Error *reason);
-/* Returns name of the replay log file */
-const char *replay_get_filename(void);
-/*
- * Start making one step in backward direction.
- * Used by gdbstub for backwards debugging.
- * Returns true on success.
- */
-bool replay_reverse_step(void);
-/*
- * Start searching the last breakpoint/watchpoint.
- * Used by gdbstub for backwards debugging.
- * Returns true if the process successfully started.
- */
-bool replay_reverse_continue(void);
-/*
- * Returns true if replay module is processing
- * reverse_continue or reverse_step request
- */
-bool replay_running_debug(void);
-/* Called in reverse debugging mode to collect breakpoint information */
-void replay_breakpoint(void);
-/* Called when gdb is attached to gdbstub */
-void replay_gdb_attached(void);
-
 /* Processing the instructions */
 
 /*! Returns number of executed instructions. */
@@ -106,47 +70,26 @@ int replay_get_instructions(void);
 /*! Updates instructions counter in replay mode. */
 void replay_account_executed_instructions(void);
 
-/* Interrupts and exceptions */
-
-/*! Called by exception handler to write or read
-    exception processing events. */
-bool replay_exception(void);
-/*! Used to determine that exception is pending.
-    Does not proceed to the next event in the log. */
-bool replay_has_exception(void);
-/*! Called by interrupt handlers to write or read
-    interrupt processing events.
-    \return true if interrupt should be processed */
-bool replay_interrupt(void);
-/*! Tries to read interrupt event from the file.
-    Returns true, when interrupt request is pending */
-bool replay_has_interrupt(void);
-
 /* Processing clocks and other time sources */
 
 /*! Save the specified clock */
 int64_t replay_save_clock(ReplayClockKind kind, int64_t clock,
                           int64_t raw_icount);
 /*! Read the specified clock from the log or return cached data */
-int64_t replay_read_clock(ReplayClockKind kind);
+int64_t replay_read_clock(ReplayClockKind kind, int64_t raw_icount);
 /*! Saves or reads the clock depending on the current replay mode. */
 #define REPLAY_CLOCK(clock, value)                                      \
-    (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock))       \
+    (replay_mode == REPLAY_MODE_PLAY                                    \
+        ? replay_read_clock((clock), icount_get_raw())                  \
         : replay_mode == REPLAY_MODE_RECORD                             \
-            ? replay_save_clock((clock), (value), icount_get_raw()) \
-        : (value))
+            ? replay_save_clock((clock), (value), icount_get_raw())     \
+            : (value))
 #define REPLAY_CLOCK_LOCKED(clock, value)                               \
-    (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock))       \
+    (replay_mode == REPLAY_MODE_PLAY                                    \
+        ? replay_read_clock((clock), icount_get_raw_locked())           \
         : replay_mode == REPLAY_MODE_RECORD                             \
             ? replay_save_clock((clock), (value), icount_get_raw_locked()) \
-        : (value))
-
-/* Processing data from random generators */
-
-/* Saves the values from the random number generator */
-void replay_save_random(int ret, void *buf, size_t len);
-/* Loads the saved values for the random number generator */
-int replay_read_random(void *buf, size_t len);
+            : (value))
 
 /* Events */
 
@@ -158,9 +101,14 @@ void replay_shutdown_request(ShutdownCause cause);
     Returns 0 in PLAY mode if checkpoint was not found.
     Returns 1 in all other cases. */
 bool replay_checkpoint(ReplayCheckpoint checkpoint);
-/*! Used to determine that checkpoint is pending.
+/*! Used to determine that checkpoint or async event is pending.
     Does not proceed to the next event in the log. */
-bool replay_has_checkpoint(void);
+bool replay_has_event(void);
+/*
+ * Processes the async events added to the queue (while recording)
+ * or reads the events from the file (while replaying).
+ */
+void replay_async_events(void);
 
 /* Asynchronous events queue */
 
@@ -191,7 +139,7 @@ uint64_t blkreplay_next_id(void);
 /*! Registers char driver to save it's events */
 void replay_register_char_driver(struct Chardev *chr);
 /*! Saves write to char device event to the log */
-void replay_chr_be_write(struct Chardev *s, uint8_t *buf, int len);
+void replay_chr_be_write(struct Chardev *s, const uint8_t *buf, int len);
 /*! Writes char write return value to the replay log. */
 void replay_char_write_event_save(int res, int offset);
 /*! Reads char write return value from the replay log. */
index 0b0d6d7598c9566c11cab829c6d15fe22b57634a..609e4d50c2690283a123b1c86eb047af26e36310 100644 (file)
@@ -1,10 +1,13 @@
 #ifndef QEMU_SYSEMU_RESET_H
 #define QEMU_SYSEMU_RESET_H
 
+#include "qapi/qapi-events-run-state.h"
+
 typedef void QEMUResetHandler(void *opaque);
 
 void qemu_register_reset(QEMUResetHandler *func, void *opaque);
+void qemu_register_reset_nosnapshotload(QEMUResetHandler *func, void *opaque);
 void qemu_unregister_reset(QEMUResetHandler *func, void *opaque);
-void qemu_devices_reset(void);
+void qemu_devices_reset(ShutdownCause reason);
 
 #endif
diff --git a/include/sysemu/rtc.h b/include/sysemu/rtc.h
new file mode 100644 (file)
index 0000000..0fc8ad6
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * RTC configuration and clock read
+ *
+ * Copyright (c) 2003-2021 QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SYSEMU_RTC_H
+#define SYSEMU_RTC_H
+
+/**
+ * qemu_get_timedate: Get the current RTC time
+ * @tm: struct tm to fill in with RTC time
+ * @offset: offset in seconds to adjust the RTC time by before
+ *          converting to struct tm format.
+ *
+ * This function fills in @tm with the current RTC time, as adjusted
+ * by @offset (for example, if @offset is 3600 then the returned time/date
+ * will be one hour further ahead than the current RTC time).
+ *
+ * The usual use is by RTC device models, which should call this function
+ * to find the time/date value that they should return to the guest
+ * when it reads the RTC registers.
+ *
+ * The behaviour of the clock whose value this function returns will
+ * depend on the -rtc command line option passed by the user.
+ */
+void qemu_get_timedate(struct tm *tm, time_t offset);
+
+/**
+ * qemu_timedate_diff: Return difference between a struct tm and the RTC
+ * @tm: struct tm containing the date/time to compare against
+ *
+ * Returns the difference in seconds between the RTC clock time
+ * and the date/time specified in @tm. For example, if @tm specifies
+ * a timestamp one hour further ahead than the current RTC time
+ * then this function will return 3600.
+ */
+time_t qemu_timedate_diff(struct tm *tm);
+
+#endif
diff --git a/include/sysemu/runstate-action.h b/include/sysemu/runstate-action.h
new file mode 100644 (file)
index 0000000..db4e309
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef RUNSTATE_ACTION_H
+#define RUNSTATE_ACTION_H
+
+#include "qapi/qapi-commands-run-state.h"
+
+/* in system/runstate-action.c */
+extern RebootAction reboot_action;
+extern ShutdownAction shutdown_action;
+extern PanicAction panic_action;
+
+#endif /* RUNSTATE_ACTION_H */
index f76009485829ee23efd1cb456b74e58b30122a9f..c8c2bd8a61b37000c8fdb266fc2191c50d3eb351 100644 (file)
@@ -6,21 +6,34 @@
 
 bool runstate_check(RunState state);
 void runstate_set(RunState new_state);
-int runstate_is_running(void);
+RunState runstate_get(void);
+bool runstate_is_running(void);
 bool runstate_needs_reset(void);
-bool runstate_store(char *str, size_t size);
 
-typedef void VMChangeStateHandler(void *opaque, int running, RunState state);
+typedef void VMChangeStateHandler(void *opaque, bool running, RunState state);
 
 VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
                                                      void *opaque);
 VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
         VMChangeStateHandler *cb, void *opaque, int priority);
+VMChangeStateEntry *
+qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
+                                           VMChangeStateHandler *prepare_cb,
+                                           void *opaque, int priority);
 VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev,
                                                      VMChangeStateHandler *cb,
                                                      void *opaque);
+VMChangeStateEntry *qdev_add_vm_change_state_handler_full(
+    DeviceState *dev, VMChangeStateHandler *cb,
+    VMChangeStateHandler *prepare_cb, void *opaque);
 void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
-void vm_state_notify(int running, RunState state);
+/**
+ * vm_state_notify: Notify the state of the VM
+ *
+ * @running: whether the VM is running or not.
+ * @state: the #RunState of the VM.
+ */
+void vm_state_notify(bool running, RunState state);
 
 static inline bool shutdown_caused_by_guest(ShutdownCause cause)
 {
@@ -28,7 +41,13 @@ static inline bool shutdown_caused_by_guest(ShutdownCause cause)
 }
 
 void vm_start(void);
-int vm_prepare_start(void);
+
+/**
+ * vm_prepare_start: Prepare for starting/resuming the VM
+ *
+ * @step_pending: whether any of the CPUs is about to be single-stepped by gdb
+ */
+int vm_prepare_start(bool step_pending);
 int vm_stop(RunState state);
 int vm_stop_force_state(RunState state);
 int vm_shutdown(void);
@@ -41,7 +60,6 @@ typedef enum WakeupReason {
     QEMU_WAKEUP_REASON_OTHER,
 } WakeupReason;
 
-void qemu_exit_preconfig_request(void);
 void qemu_system_reset_request(ShutdownCause reason);
 void qemu_system_suspend_request(void);
 void qemu_register_suspend_notifier(Notifier *notifier);
@@ -50,6 +68,8 @@ void qemu_system_wakeup_request(WakeupReason reason, Error **errp);
 void qemu_system_wakeup_enable(WakeupReason reason, bool enabled);
 void qemu_register_wakeup_notifier(Notifier *notifier);
 void qemu_register_wakeup_support(void);
+void qemu_system_shutdown_request_with_code(ShutdownCause reason,
+                                            int exit_code);
 void qemu_system_shutdown_request(ShutdownCause reason);
 void qemu_system_powerdown_request(void);
 void qemu_register_powerdown_notifier(Notifier *notifier);
@@ -64,6 +84,7 @@ void qemu_system_killed(int signal, pid_t pid);
 void qemu_system_reset(ShutdownCause reason);
 void qemu_system_guest_panicked(GuestPanicInformation *info);
 void qemu_system_guest_crashloaded(GuestPanicInformation *info);
+bool qemu_system_dump_in_progress(void);
 
 #endif
 
diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h
deleted file mode 100644 (file)
index 98c1ec8..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * QEMU Secure Encrypted Virutualization (SEV) support
- *
- * Copyright: Advanced Micro Devices, 2016-2018
- *
- * Authors:
- *  Brijesh Singh <brijesh.singh@amd.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_SEV_H
-#define QEMU_SEV_H
-
-#include "sysemu/kvm.h"
-
-void *sev_guest_init(const char *id);
-int sev_encrypt_data(void *handle, uint8_t *ptr, uint64_t len);
-#endif
diff --git a/include/sysemu/stats.h b/include/sysemu/stats.h
new file mode 100644 (file)
index 0000000..42c236c
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef STATS_H
+#define STATS_H
+
+#include "qapi/qapi-types-stats.h"
+
+typedef void StatRetrieveFunc(StatsResultList **result, StatsTarget target,
+                              strList *names, strList *targets, Error **errp);
+typedef void SchemaRetrieveFunc(StatsSchemaList **result, Error **errp);
+
+/*
+ * Register callbacks for the QMP query-stats command.
+ *
+ * @provider: stats provider checked against QMP command arguments
+ * @stats_fn: routine to query stats:
+ * @schema_fn: routine to query stat schemas:
+ */
+void add_stats_callbacks(StatsProvider provider,
+                         StatRetrieveFunc *stats_fn,
+                         SchemaRetrieveFunc *schemas_fn);
+
+/*
+ * Helper routines for adding stats entries to the results lists.
+ */
+void add_stats_entry(StatsResultList **, StatsProvider, const char *id,
+                     StatsList *stats_list);
+void add_stats_schema(StatsSchemaList **, StatsProvider, StatsTarget,
+                      StatsSchemaValueList *);
+
+/*
+ * True if a string matches the filter passed to the stats_fn callback,
+ * false otherwise.
+ *
+ * Note that an empty list means no filtering, i.e. all strings will
+ * return true.
+ */
+bool apply_str_list_filter(const char *string, strList *list);
+
+#endif /* STATS_H */
index 817ff4cf753fe95fb363ec0f8ed11f474274aa27..73a37949c24d1b5f511f6d2aaf7a7494f5af5349 100644 (file)
@@ -8,22 +8,23 @@
 
 /* vl.c */
 
-extern const char *bios_name;
 extern int only_migratable;
 extern const char *qemu_name;
 extern QemuUUID qemu_uuid;
 extern bool qemu_uuid_set;
 
-void qemu_add_data_dir(char *path);
+const char *qemu_get_vm_name(void);
 
 void qemu_add_exit_notifier(Notifier *notify);
 void qemu_remove_exit_notifier(Notifier *notify);
 
-extern bool machine_init_done;
-
 void qemu_add_machine_init_done_notifier(Notifier *notify);
 void qemu_remove_machine_init_done_notifier(Notifier *notify);
 
+void configure_rtc(QemuOpts *opts);
+
+void qemu_init_subsystems(void);
+
 extern int autostart;
 
 typedef enum {
@@ -33,6 +34,7 @@ typedef enum {
 } VGAInterfaceType;
 
 extern int vga_interface_type;
+extern bool vga_interface_created;
 
 extern int graphic_width;
 extern int graphic_height;
@@ -40,13 +42,8 @@ extern int graphic_depth;
 extern int display_opengl;
 extern const char *keyboard_layout;
 extern int win2k_install_hack;
-extern int alt_grab;
-extern int ctrl_grab;
 extern int graphic_rotate;
-extern int no_shutdown;
 extern int old_param;
-extern int boot_menu;
-extern bool boot_strict;
 extern uint8_t *boot_splash_filedata;
 extern bool enable_mlock;
 extern bool enable_cpu_pm;
@@ -64,17 +61,10 @@ extern int nb_option_roms;
 extern const char *prom_envs[MAX_PROM_ENVS];
 extern unsigned int nb_prom_envs;
 
-/* pcie aer error injection */
-void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict);
-
 /* serial ports */
 
 /* Return the Chardev for serial port i, or NULL if none */
 Chardev *serial_hd(int i);
-/* return the number of serial ports defined by the user. serial_hd(i)
- * will always return NULL for any i which is greater than or equal to this.
- */
-int serial_max_hds(void);
 
 /* parallel ports */
 
@@ -107,13 +97,11 @@ typedef void QEMUBootSetHandler(void *opaque, const char *boot_order,
 void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque);
 void qemu_boot_set(const char *boot_order, Error **errp);
 
-QemuOpts *qemu_get_machine_opts(void);
-
 bool defaults_enabled(void);
 
-void qemu_init(int argc, char **argv, char **envp);
-void qemu_main_loop(void);
-void qemu_cleanup(void);
+void qemu_init(int argc, char **argv);
+int qemu_main_loop(void);
+void qemu_cleanup(int);
 
 extern QemuOptsList qemu_legacy_drive_opts;
 extern QemuOptsList qemu_common_drive_opts;
index d9d3ca85596f9aa93ae8470a931b8d0f76aa0a8a..5e2ca9aab3da4fb3bda7c70aa86363f0da723d9f 100644 (file)
@@ -5,10 +5,11 @@
  * See the COPYING file in the top-level directory.
  */
 
+/* header to be included in non-TCG-specific code */
+
 #ifndef SYSEMU_TCG_H
 #define SYSEMU_TCG_H
 
-void tcg_exec_init(unsigned long tb_size);
 #ifdef CONFIG_TCG
 extern bool tcg_allowed;
 #define tcg_enabled() (tcg_allowed)
index 1a85564e479deda8af0996c06a4175f57b2162c3..1ee568b3b62ae2862e5878c0fa4b622d29864a49 100644 (file)
@@ -15,7 +15,9 @@
 #include "qapi/qapi-types-tpm.h"
 #include "qom/object.h"
 
-int tpm_config_parse(QemuOptsList *opts_list, const char *optarg);
+#ifdef CONFIG_TPM
+
+int tpm_config_parse(QemuOptsList *opts_list, const char *optstr);
 int tpm_init(void);
 void tpm_cleanup(void);
 
@@ -46,6 +48,7 @@ struct TPMIfClass {
 #define TYPE_TPM_TIS_SYSBUS         "tpm-tis-device"
 #define TYPE_TPM_CRB                "tpm-crb"
 #define TYPE_TPM_SPAPR              "tpm-spapr"
+#define TYPE_TPM_TIS_I2C            "tpm-tis-i2c"
 
 #define TPM_IS_TIS_ISA(chr)                         \
     object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_ISA)
@@ -55,6 +58,8 @@ struct TPMIfClass {
     object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB)
 #define TPM_IS_SPAPR(chr)                           \
     object_dynamic_cast(OBJECT(chr), TYPE_TPM_SPAPR)
+#define TPM_IS_TIS_I2C(chr)                      \
+    object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_I2C)
 
 /* returns NULL unless there is exactly one TPM device */
 static inline TPMIf *tpm_find(void)
@@ -73,4 +78,17 @@ static inline TPMVersion tpm_get_version(TPMIf *ti)
     return TPM_IF_GET_CLASS(ti)->get_version(ti);
 }
 
+#else /* CONFIG_TPM */
+
+#define tpm_init()  (0)
+#define tpm_cleanup()
+
+/* needed for an alignment check in non-tpm code */
+static inline Object *TPM_IS_CRB(Object *obj)
+{
+     return NULL;
+}
+
+#endif /* CONFIG_TPM */
+
 #endif /* QEMU_TPM_H */
index 6f078f5f4829ff55af09b87aa48ddbe38a1ce2f1..7fabafefee164f2f307a0f879f3510d650ac96e1 100644 (file)
@@ -18,6 +18,8 @@
 #include "sysemu/tpm.h"
 #include "qapi/error.h"
 
+#ifdef CONFIG_TPM
+
 #define TYPE_TPM_BACKEND "tpm-backend"
 OBJECT_DECLARE_TYPE(TPMBackend, TPMBackendClass,
                     TPM_BACKEND)
@@ -113,7 +115,7 @@ int tpm_backend_startup_tpm(TPMBackend *s, size_t buffersize);
 
 /**
  * tpm_backend_had_startup_error:
- * @s: the backend to query for a statup error
+ * @s: the backend to query for a startup error
  *
  * Check whether the backend had an error during startup. Returns
  * false if no error occurred and the backend can be used, true
@@ -209,4 +211,6 @@ TPMInfo *tpm_backend_query_tpm(TPMBackend *s);
 
 TPMBackend *qemu_find_tpm_be(const char *id);
 
-#endif
+#endif /* CONFIG_TPM */
+
+#endif /* TPM_BACKEND_H */
index a08d16380d798f2737118e0b978272310f5d9433..745c89b02bc1e7db6fb7dbf26e29b0a8242f6842 100644 (file)
 #include "qemu/queue.h"
 #include "qapi/qapi-types-run-state.h"
 
-struct WatchdogTimerModel {
-    QLIST_ENTRY(WatchdogTimerModel) entry;
-
-    /* Short name of the device - used to select it on the command line. */
-    const char *wdt_name;
-    /* Longer description (eg. manufacturer and full model number). */
-    const char *wdt_description;
-};
-typedef struct WatchdogTimerModel WatchdogTimerModel;
-
 /* in hw/watchdog.c */
-int select_watchdog(const char *p);
-int select_watchdog_action(const char *action);
 WatchdogAction get_watchdog_action(void);
-void watchdog_add_model(WatchdogTimerModel *model);
 void watchdog_perform_action(void);
 
 #endif /* QEMU_WATCHDOG_H */
index 59edf1374270f42c7c3334d91357d4ab8e6e9d8f..781ca5b2b61437e796f4d99d9f92796e1c0299fc 100644 (file)
  *
  */
 
+/* header to be included in non-WHPX-specific code */
+
 #ifndef QEMU_WHPX_H
 #define QEMU_WHPX_H
 
+#ifdef NEED_CPU_H
+
 #ifdef CONFIG_WHPX
 
 int whpx_enabled(void);
+bool whpx_apic_in_platform(void);
 
 #else /* CONFIG_WHPX */
 
 #define whpx_enabled() (0)
+#define whpx_apic_in_platform() (0)
 
 #endif /* CONFIG_WHPX */
 
+#endif /* NEED_CPU_H */
+
 #endif /* QEMU_WHPX_H */
index 0ca25697e4f4f69c8a768c0343bad9a4800e63b0..bc13ad56924a3e2177256c6644afd7e3265ad528 100644 (file)
@@ -5,6 +5,8 @@
  * See the COPYING file in the top-level directory.
  */
 
+/* header to be included in non-Xen-specific code */
+
 #ifndef SYSEMU_XEN_H
 #define SYSEMU_XEN_H
 
diff --git a/include/tcg/debug-assert.h b/include/tcg/debug-assert.h
new file mode 100644 (file)
index 0000000..596765a
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define tcg_debug_assert
+ * Copyright (c) 2008 Fabrice Bellard
+ */
+
+#ifndef TCG_DEBUG_ASSERT_H
+#define TCG_DEBUG_ASSERT_H
+
+#if defined CONFIG_DEBUG_TCG || defined QEMU_STATIC_ANALYSIS
+# define tcg_debug_assert(X) do { assert(X); } while (0)
+#else
+# define tcg_debug_assert(X) \
+    do { if (!(X)) { __builtin_unreachable(); } } while (0)
+#endif
+
+#endif
diff --git a/include/tcg/helper-info.h b/include/tcg/helper-info.h
new file mode 100644 (file)
index 0000000..7c27d61
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * TCG Helper Information Structure
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef TCG_HELPER_INFO_H
+#define TCG_HELPER_INFO_H
+
+#ifdef CONFIG_TCG_INTERPRETER
+#include <ffi.h>
+#endif
+
+/*
+ * Describe the calling convention of a given argument type.
+ */
+typedef enum {
+    TCG_CALL_RET_NORMAL,         /* by registers */
+    TCG_CALL_RET_BY_REF,         /* for i128, by reference */
+    TCG_CALL_RET_BY_VEC,         /* for i128, by vector register */
+} TCGCallReturnKind;
+
+typedef enum {
+    TCG_CALL_ARG_NORMAL,         /* by registers (continuing onto stack) */
+    TCG_CALL_ARG_EVEN,           /* like normal, but skipping odd slots */
+    TCG_CALL_ARG_EXTEND,         /* for i32, as a sign/zero-extended i64 */
+    TCG_CALL_ARG_EXTEND_U,       /*      ... as a zero-extended i64 */
+    TCG_CALL_ARG_EXTEND_S,       /*      ... as a sign-extended i64 */
+    TCG_CALL_ARG_BY_REF,         /* for i128, by reference, first */
+    TCG_CALL_ARG_BY_REF_N,       /*       ... by reference, subsequent */
+} TCGCallArgumentKind;
+
+typedef struct TCGCallArgumentLoc {
+    TCGCallArgumentKind kind    : 8;
+    unsigned arg_slot           : 8;
+    unsigned ref_slot           : 8;
+    unsigned arg_idx            : 4;
+    unsigned tmp_subindex       : 2;
+} TCGCallArgumentLoc;
+
+struct TCGHelperInfo {
+    void *func;
+    const char *name;
+
+    /* Used with g_once_init_enter. */
+#ifdef CONFIG_TCG_INTERPRETER
+    ffi_cif *cif;
+#else
+    uintptr_t init;
+#endif
+
+    unsigned typemask           : 32;
+    unsigned flags              : 8;
+    unsigned nr_in              : 8;
+    unsigned nr_out             : 8;
+    TCGCallReturnKind out_kind  : 8;
+
+    /* Maximum physical arguments are constrained by TCG_TYPE_I128. */
+    TCGCallArgumentLoc in[MAX_CALL_IARGS * (128 / TCG_TARGET_REG_BITS)];
+};
+
+#endif /* TCG_HELPER_INFO_H */
diff --git a/include/tcg/insn-start-words.h b/include/tcg/insn-start-words.h
new file mode 100644 (file)
index 0000000..50c18bd
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define TARGET_INSN_START_WORDS
+ * Copyright (c) 2008 Fabrice Bellard
+ */
+
+#ifndef TARGET_INSN_START_WORDS
+
+#include "cpu.h"
+
+#ifndef TARGET_INSN_START_EXTRA_WORDS
+# define TARGET_INSN_START_WORDS 1
+#else
+# define TARGET_INSN_START_WORDS (1 + TARGET_INSN_START_EXTRA_WORDS)
+#endif
+
+#endif /* TARGET_INSN_START_WORDS */
diff --git a/include/tcg/oversized-guest.h b/include/tcg/oversized-guest.h
new file mode 100644 (file)
index 0000000..641b974
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define TCG_OVERSIZED_GUEST
+ * Copyright (c) 2008 Fabrice Bellard
+ */
+
+#ifndef EXEC_TCG_OVERSIZED_GUEST_H
+#define EXEC_TCG_OVERSIZED_GUEST_H
+
+#include "tcg-target-reg-bits.h"
+#include "cpu-param.h"
+
+/*
+ * Oversized TCG guests make things like MTTCG hard
+ * as we can't use atomics for cputlb updates.
+ */
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+#define TCG_OVERSIZED_GUEST 1
+#else
+#define TCG_OVERSIZED_GUEST 0
+#endif
+
+#endif
diff --git a/include/tcg/startup.h b/include/tcg/startup.h
new file mode 100644 (file)
index 0000000..f713057
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Tiny Code Generator for QEMU: definitions used by runtime startup
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_STARTUP_H
+#define TCG_STARTUP_H
+
+/**
+ * tcg_init: Initialize the TCG runtime
+ * @tb_size: translation buffer size
+ * @splitwx: use separate rw and rx mappings
+ * @max_cpus: number of vcpus in system mode
+ *
+ * Allocate and initialize TCG resources, especially the JIT buffer.
+ * In user-only mode, @max_cpus is unused.
+ */
+void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus);
+
+/**
+ * tcg_register_thread: Register this thread with the TCG runtime
+ *
+ * All TCG threads except the parent (i.e. the one that called the TCG
+ * accelerator's init_machine() method) must register with this
+ * function before initiating translation.
+ */
+void tcg_register_thread(void);
+
+/**
+ * tcg_prologue_init(): Generate the code for the TCG prologue
+ *
+ * In softmmu this is done automatically as part of the TCG
+ * accelerator's init_machine() method, but for user-mode, the
+ * user-mode code must call this function after it has loaded
+ * the guest binary and the value of guest_base is known.
+ */
+void tcg_prologue_init(void);
+
+#endif
diff --git a/include/tcg/tcg-cond.h b/include/tcg/tcg-cond.h
new file mode 100644 (file)
index 0000000..2a38a38
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_COND_H
+#define TCG_COND_H
+
+/*
+ * Conditions.  Note that these are laid out for easy manipulation by
+ * the functions below:
+ *    bit 0 is used for inverting;
+ *    bit 1 is signed,
+ *    bit 2 is unsigned,
+ *    bit 3 is used with bit 0 for swapping signed/unsigned.
+ */
+typedef enum {
+    /* non-signed */
+    TCG_COND_NEVER  = 0 | 0 | 0 | 0,
+    TCG_COND_ALWAYS = 0 | 0 | 0 | 1,
+    TCG_COND_EQ     = 8 | 0 | 0 | 0,
+    TCG_COND_NE     = 8 | 0 | 0 | 1,
+    /* signed */
+    TCG_COND_LT     = 0 | 0 | 2 | 0,
+    TCG_COND_GE     = 0 | 0 | 2 | 1,
+    TCG_COND_LE     = 8 | 0 | 2 | 0,
+    TCG_COND_GT     = 8 | 0 | 2 | 1,
+    /* unsigned */
+    TCG_COND_LTU    = 0 | 4 | 0 | 0,
+    TCG_COND_GEU    = 0 | 4 | 0 | 1,
+    TCG_COND_LEU    = 8 | 4 | 0 | 0,
+    TCG_COND_GTU    = 8 | 4 | 0 | 1,
+} TCGCond;
+
+/* Invert the sense of the comparison.  */
+static inline TCGCond tcg_invert_cond(TCGCond c)
+{
+    return (TCGCond)(c ^ 1);
+}
+
+/* Swap the operands in a comparison.  */
+static inline TCGCond tcg_swap_cond(TCGCond c)
+{
+    return c & 6 ? (TCGCond)(c ^ 9) : c;
+}
+
+/* Create an "unsigned" version of a "signed" comparison.  */
+static inline TCGCond tcg_unsigned_cond(TCGCond c)
+{
+    return c & 2 ? (TCGCond)(c ^ 6) : c;
+}
+
+/* Create a "signed" version of an "unsigned" comparison.  */
+static inline TCGCond tcg_signed_cond(TCGCond c)
+{
+    return c & 4 ? (TCGCond)(c ^ 6) : c;
+}
+
+/* Must a comparison be considered unsigned?  */
+static inline bool is_unsigned_cond(TCGCond c)
+{
+    return (c & 4) != 0;
+}
+
+/*
+ * Create a "high" version of a double-word comparison.
+ * This removes equality from a LTE or GTE comparison.
+ */
+static inline TCGCond tcg_high_cond(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_GE:
+    case TCG_COND_LE:
+    case TCG_COND_GEU:
+    case TCG_COND_LEU:
+        return (TCGCond)(c ^ 8);
+    default:
+        return c;
+    }
+}
+
+#endif /* TCG_COND_H */
diff --git a/include/tcg/tcg-gvec-desc.h b/include/tcg/tcg-gvec-desc.h
new file mode 100644 (file)
index 0000000..704bd86
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Generic vector operation descriptor
+ *
+ * Copyright (c) 2018 Linaro
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TCG_TCG_GVEC_DESC_H
+#define TCG_TCG_GVEC_DESC_H
+
+/*
+ * This configuration allows MAXSZ to represent 2048 bytes, and
+ * OPRSZ to match MAXSZ, or represent the smaller values 8, 16, or 32.
+ *
+ * Encode this with:
+ *   0, 1, 3 -> 8, 16, 32
+ *   2       -> maxsz
+ *
+ * This steals the input that would otherwise map to 24 to match maxsz.
+ */
+#define SIMD_MAXSZ_SHIFT   0
+#define SIMD_MAXSZ_BITS    8
+
+#define SIMD_OPRSZ_SHIFT   (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
+#define SIMD_OPRSZ_BITS    2
+
+#define SIMD_DATA_SHIFT    (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
+#define SIMD_DATA_BITS     (32 - SIMD_DATA_SHIFT)
+
+/* Create a descriptor from components.  */
+uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data);
+
+/* Extract the max vector size from a descriptor.  */
+static inline intptr_t simd_maxsz(uint32_t desc)
+{
+    return extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) * 8 + 8;
+}
+
+/* Extract the operation size from a descriptor.  */
+static inline intptr_t simd_oprsz(uint32_t desc)
+{
+    uint32_t f = extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS);
+    intptr_t o = f * 8 + 8;
+    intptr_t m = simd_maxsz(desc);
+    return f == 2 ? m : o;
+}
+
+/* Extract the operation-specific data from a descriptor.  */
+static inline int32_t simd_data(uint32_t desc)
+{
+    return sextract32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS);
+}
+
+#endif
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
new file mode 100644 (file)
index 0000000..6ccfe91
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Memory helpers that will be used by TCG generated code.
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_LDST_H
+#define TCG_LDST_H
+
+/* Value zero-extended to tcg register size.  */
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
+                                 MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
+                                 MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
+                                 MemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
+                        MemOpIdx oi, uintptr_t retaddr);
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
+                       MemOpIdx oi, uintptr_t retaddr);
+
+/* Value sign-extended to tcg register size.  */
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
+                                 MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
+                                 MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
+                                 MemOpIdx oi, uintptr_t retaddr);
+
+/*
+ * Value extended to at least uint32_t, so that some ABIs do not require
+ * zero-extension from uint8_t or uint16_t.
+ */
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
+                    MemOpIdx oi, uintptr_t retaddr);
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
+                    MemOpIdx oi, uintptr_t retaddr);
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
+                    MemOpIdx oi, uintptr_t retaddr);
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
+                    MemOpIdx oi, uintptr_t retaddr);
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
+                     MemOpIdx oi, uintptr_t retaddr);
+
+#endif /* TCG_LDST_H */
diff --git a/include/tcg/tcg-mo.h b/include/tcg/tcg-mo.h
new file mode 100644 (file)
index 0000000..c2c5570
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_MO_H
+#define TCG_MO_H
+
+typedef enum {
+    /* Used to indicate the type of accesses on which ordering
+       is to be ensured.  Modeled after SPARC barriers.
+
+       This is of the form TCG_MO_A_B where A is before B in program order.
+    */
+    TCG_MO_LD_LD  = 0x01,
+    TCG_MO_ST_LD  = 0x02,
+    TCG_MO_LD_ST  = 0x04,
+    TCG_MO_ST_ST  = 0x08,
+    TCG_MO_ALL    = 0x0F,  /* OR of the above */
+
+    /* Used to indicate the kind of ordering which is to be ensured by the
+       instruction.  These types are derived from x86/aarch64 instructions.
+       It should be noted that these are different from C11 semantics.  */
+    TCG_BAR_LDAQ  = 0x10,  /* Following ops will not come forward */
+    TCG_BAR_STRL  = 0x20,  /* Previous ops will not be delayed */
+    TCG_BAR_SC    = 0x30,  /* No ops cross barrier; OR of the above */
+} TCGBar;
+
+#endif /* TCG_MO_H */
diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h
new file mode 100644 (file)
index 0000000..2d932a5
--- /dev/null
@@ -0,0 +1,561 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Target independent opcode generation functions.
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ */
+
+#ifndef TCG_TCG_OP_COMMON_H
+#define TCG_TCG_OP_COMMON_H
+
+#include "tcg/tcg.h"
+#include "exec/helper-proto-common.h"
+#include "exec/helper-gen-common.h"
+
+TCGv_i32 tcg_constant_i32(int32_t val);
+TCGv_i64 tcg_constant_i64(int64_t val);
+TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val);
+TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val);
+
+TCGv_i32 tcg_temp_new_i32(void);
+TCGv_i64 tcg_temp_new_i64(void);
+TCGv_ptr tcg_temp_new_ptr(void);
+TCGv_i128 tcg_temp_new_i128(void);
+TCGv_vec tcg_temp_new_vec(TCGType type);
+TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match);
+
+TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name);
+TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name);
+TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name);
+
+/* Generic ops.  */
+
+void gen_set_label(TCGLabel *l);
+void tcg_gen_br(TCGLabel *l);
+void tcg_gen_mb(TCGBar);
+
+/**
+ * tcg_gen_exit_tb() - output exit_tb TCG operation
+ * @tb: The TranslationBlock from which we are exiting
+ * @idx: Direct jump slot index, or exit request
+ *
+ * See tcg/README for more info about this TCG operation.
+ * See also tcg.h and the block comment above TB_EXIT_MASK.
+ *
+ * For a normal exit from the TB, back to the main loop, @tb should
+ * be NULL and @idx should be 0.  Otherwise, @tb should be valid and
+ * @idx should be one of the TB_EXIT_ values.
+ */
+void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx);
+
+/**
+ * tcg_gen_goto_tb() - output goto_tb TCG operation
+ * @idx: Direct jump slot index (0 or 1)
+ *
+ * See tcg/README for more info about this TCG operation.
+ *
+ * NOTE: In system emulation, direct jumps with goto_tb are only safe within
+ * the pages this TB resides in because we don't take care of direct jumps when
+ * address mapping changes, e.g. in tlb_flush(). In user mode, there's only a
+ * static address translation, so the destination address is always valid, TBs
+ * are always invalidated properly, and direct jumps are reset when mapping
+ * changes.
+ */
+void tcg_gen_goto_tb(unsigned idx);
+
+/**
+ * tcg_gen_lookup_and_goto_ptr() - look up the current TB, jump to it if valid
+ * @addr: Guest address of the target TB
+ *
+ * If the TB is not valid, jump to the epilogue.
+ *
+ * This operation is optional. If the TCG backend does not implement goto_ptr,
+ * this op is equivalent to calling tcg_gen_exit_tb() with 0 as the argument.
+ */
+void tcg_gen_lookup_and_goto_ptr(void);
+
+void tcg_gen_plugin_cb_start(unsigned from, unsigned type, unsigned wr);
+void tcg_gen_plugin_cb_end(void);
+
+/* 32 bit ops */
+
+void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg);
+void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2);
+void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
+void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
+void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_ctpop_i32(TCGv_i32 a1, TCGv_i32 a2);
+void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
+                         unsigned int ofs, unsigned int len);
+void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
+                           unsigned int ofs, unsigned int len);
+void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
+                         unsigned int ofs, unsigned int len);
+void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
+                          unsigned int ofs, unsigned int len);
+void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah,
+                          unsigned int ofs);
+void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *);
+void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *);
+void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
+                         TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
+                          TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_negsetcond_i32(TCGCond cond, TCGv_i32 ret,
+                            TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_negsetcondi_i32(TCGCond cond, TCGv_i32 ret,
+                             TCGv_i32 arg1, int32_t arg2);
+void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
+                         TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2);
+void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+                      TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh);
+void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+                      TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh);
+void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc);
+void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags);
+void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_abs_i32(TCGv_i32, TCGv_i32);
+
+/* Replicate a value of size @vece from @in to all the lanes in @out */
+void tcg_gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in);
+
+void tcg_gen_discard_i32(TCGv_i32 arg);
+void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg);
+
+void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset);
+
+void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset);
+
+void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg);
+
+/* 64 bit ops */
+
+void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg);
+void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2);
+void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
+void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
+void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ctpop_i64(TCGv_i64 a1, TCGv_i64 a2);
+void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
+                         unsigned int ofs, unsigned int len);
+void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
+                           unsigned int ofs, unsigned int len);
+void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
+                         unsigned int ofs, unsigned int len);
+void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
+                          unsigned int ofs, unsigned int len);
+void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah,
+                          unsigned int ofs);
+void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *);
+void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *);
+void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
+                         TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
+                          TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_negsetcond_i64(TCGCond cond, TCGv_i64 ret,
+                            TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_negsetcondi_i64(TCGCond cond, TCGv_i64 ret,
+                             TCGv_i64 arg1, int64_t arg2);
+void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
+                         TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2);
+void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+                      TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
+void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+                      TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
+void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc);
+void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
+void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
+void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_wswap_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_abs_i64(TCGv_i64, TCGv_i64);
+
+/* Replicate a value of size @vece from @in to all the lanes in @out */
+void tcg_gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in);
+
+void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset);
+
+void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+
+void tcg_gen_discard_i64(TCGv_i64 arg);
+void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset);
+void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg);
+
+
+/* Size changing operations.  */
+
+void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg);
+void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg);
+void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high);
+void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg);
+void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg);
+void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg);
+void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg);
+void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi);
+
+void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg);
+void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi);
+
+/* 128 bit ops */
+
+void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src);
+void tcg_gen_ld_i128(TCGv_i128 ret, TCGv_ptr base, tcg_target_long offset);
+void tcg_gen_st_i128(TCGv_i128 val, TCGv_ptr base, tcg_target_long offset);
+
+/* Local load/store bit ops */
+
+void tcg_gen_qemu_ld_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
+void tcg_gen_qemu_st_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
+void tcg_gen_qemu_ld_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
+void tcg_gen_qemu_st_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
+void tcg_gen_qemu_ld_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
+void tcg_gen_qemu_st_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
+
+/* Atomic ops */
+
+void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
+                                    TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
+                                    TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
+                                     TCGv_i128, TCGArg, MemOp, TCGType);
+
+void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
+                                        TCGv_i128, TCGArg, MemOp, TCGType);
+
+void tcg_gen_atomic_xchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                 TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_xchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                 TCGArg, MemOp, TCGType);
+
+void tcg_gen_atomic_fetch_add_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_add_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_and_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_and_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_or_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                     TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_or_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                     TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_xor_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_xor_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_smin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_smin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_umin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_umin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_smax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_smax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_umax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_fetch_umax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                       TCGArg, MemOp, TCGType);
+
+void tcg_gen_atomic_add_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_add_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_and_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_and_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_or_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                     TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_or_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                     TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_xor_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_xor_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                      TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_smin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_smin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_umin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_umin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_smax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_smax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_umax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
+                                       TCGArg, MemOp, TCGType);
+void tcg_gen_atomic_umax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
+                                       TCGArg, MemOp, TCGType);
+
+/* Vector ops */
+
+void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
+void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
+void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec, TCGv_i64);
+void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec, TCGv_ptr, tcg_target_long);
+void tcg_gen_dupi_vec(unsigned vece, TCGv_vec, uint64_t);
+void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
+void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
+void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
+void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
+
+void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+
+void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
+void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
+void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
+void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
+
+void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
+void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
+void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
+void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
+void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
+
+void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r,
+                     TCGv_vec a, TCGv_vec b);
+
+void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
+                        TCGv_vec b, TCGv_vec c);
+void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
+                        TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d);
+
+void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
+void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
+void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
+
+/* Host pointer ops */
+
+#if UINTPTR_MAX == UINT32_MAX
+# define PTR  i32
+# define NAT  TCGv_i32
+#else
+# define PTR  i64
+# define NAT  TCGv_i64
+#endif
+
+TCGv_ptr tcg_constant_ptr_int(intptr_t x);
+#define tcg_constant_ptr(X)  tcg_constant_ptr_int((intptr_t)(X))
+
+static inline void tcg_gen_ld_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t o)
+{
+    glue(tcg_gen_ld_,PTR)((NAT)r, a, o);
+}
+
+static inline void tcg_gen_st_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t o)
+{
+    glue(tcg_gen_st_, PTR)((NAT)r, a, o);
+}
+
+static inline void tcg_gen_discard_ptr(TCGv_ptr a)
+{
+    glue(tcg_gen_discard_,PTR)((NAT)a);
+}
+
+static inline void tcg_gen_add_ptr(TCGv_ptr r, TCGv_ptr a, TCGv_ptr b)
+{
+    glue(tcg_gen_add_,PTR)((NAT)r, (NAT)a, (NAT)b);
+}
+
+static inline void tcg_gen_addi_ptr(TCGv_ptr r, TCGv_ptr a, intptr_t b)
+{
+    glue(tcg_gen_addi_,PTR)((NAT)r, (NAT)a, b);
+}
+
+static inline void tcg_gen_mov_ptr(TCGv_ptr d, TCGv_ptr s)
+{
+    glue(tcg_gen_mov_,PTR)((NAT)d, (NAT)s);
+}
+
+static inline void tcg_gen_movi_ptr(TCGv_ptr d, intptr_t s)
+{
+    glue(tcg_gen_movi_,PTR)((NAT)d, s);
+}
+
+static inline void tcg_gen_brcondi_ptr(TCGCond cond, TCGv_ptr a,
+                                       intptr_t b, TCGLabel *label)
+{
+    glue(tcg_gen_brcondi_,PTR)(cond, (NAT)a, b, label);
+}
+
+static inline void tcg_gen_ext_i32_ptr(TCGv_ptr r, TCGv_i32 a)
+{
+#if UINTPTR_MAX == UINT32_MAX
+    tcg_gen_mov_i32((NAT)r, a);
+#else
+    tcg_gen_ext_i32_i64((NAT)r, a);
+#endif
+}
+
+static inline void tcg_gen_trunc_i64_ptr(TCGv_ptr r, TCGv_i64 a)
+{
+#if UINTPTR_MAX == UINT32_MAX
+    tcg_gen_extrl_i64_i32((NAT)r, a);
+#else
+    tcg_gen_mov_i64((NAT)r, a);
+#endif
+}
+
+static inline void tcg_gen_extu_ptr_i64(TCGv_i64 r, TCGv_ptr a)
+{
+#if UINTPTR_MAX == UINT32_MAX
+    tcg_gen_extu_i32_i64(r, (NAT)a);
+#else
+    tcg_gen_mov_i64(r, (NAT)a);
+#endif
+}
+
+static inline void tcg_gen_trunc_ptr_i32(TCGv_i32 r, TCGv_ptr a)
+{
+#if UINTPTR_MAX == UINT32_MAX
+    tcg_gen_mov_i32(r, (NAT)a);
+#else
+    tcg_gen_extrl_i64_i32(r, (NAT)a);
+#endif
+}
+
+#undef PTR
+#undef NAT
+
+#endif /* TCG_TCG_OP_COMMON_H */
diff --git a/include/tcg/tcg-op-gvec-common.h b/include/tcg/tcg-op-gvec-common.h
new file mode 100644 (file)
index 0000000..4db8a58
--- /dev/null
@@ -0,0 +1,432 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Target independent generic vector operation expansion
+ *
+ * Copyright (c) 2018 Linaro
+ */
+
+#ifndef TCG_TCG_OP_GVEC_COMMON_H
+#define TCG_TCG_OP_GVEC_COMMON_H
+
+/*
+ * "Generic" vectors.  All operands are given as offsets from ENV,
+ * and therefore cannot also be allocated via tcg_global_mem_new_*.
+ * OPRSZ is the byte size of the vector upon which the operation is performed.
+ * MAXSZ is the byte size of the full vector; bytes beyond OPSZ are cleared.
+ *
+ * All sizes must be 8 or any multiple of 16.
+ * When OPRSZ is 8, the alignment may be 8, otherwise must be 16.
+ * Operands may completely, but not partially, overlap.
+ */
+
+/* Expand a call to a gvec-style helper, with pointers to two vector
+   operands, and a descriptor (see tcg-gvec-desc.h).  */
+typedef void gen_helper_gvec_2(TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
+                        uint32_t oprsz, uint32_t maxsz, int32_t data,
+                        gen_helper_gvec_2 *fn);
+
+/* Similarly, passing an extra data value.  */
+typedef void gen_helper_gvec_2i(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
+void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
+                         uint32_t oprsz, uint32_t maxsz, int32_t data,
+                         gen_helper_gvec_2i *fn);
+
+/* Similarly, passing an extra pointer (e.g. env or float_status).  */
+typedef void gen_helper_gvec_2_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
+                        TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
+                        int32_t data, gen_helper_gvec_2_ptr *fn);
+
+/* Similarly, with three vector operands.  */
+typedef void gen_helper_gvec_3(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                        uint32_t oprsz, uint32_t maxsz, int32_t data,
+                        gen_helper_gvec_3 *fn);
+
+/* Similarly, with four vector operands.  */
+typedef void gen_helper_gvec_4(TCGv_ptr, TCGv_ptr, TCGv_ptr,
+                               TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                        uint32_t cofs, uint32_t oprsz, uint32_t maxsz,
+                        int32_t data, gen_helper_gvec_4 *fn);
+
+/* Similarly, with five vector operands.  */
+typedef void gen_helper_gvec_5(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
+                               TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                        uint32_t cofs, uint32_t xofs, uint32_t oprsz,
+                        uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn);
+
+typedef void gen_helper_gvec_3_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr,
+                                   TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                        TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
+                        int32_t data, gen_helper_gvec_3_ptr *fn);
+
+typedef void gen_helper_gvec_4_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr,
+                                   TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                        uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz,
+                        uint32_t maxsz, int32_t data,
+                        gen_helper_gvec_4_ptr *fn);
+
+typedef void gen_helper_gvec_5_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
+                                   TCGv_ptr, TCGv_ptr, TCGv_i32);
+void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                        uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
+                        uint32_t oprsz, uint32_t maxsz, int32_t data,
+                        gen_helper_gvec_5_ptr *fn);
+
+/* Expand a gvec operation.  Either inline or out-of-line depending on
+   the actual vector size and the operations supported by the host.  */
+typedef struct {
+    /* Expand inline as a 64-bit or 32-bit integer.
+       Only one of these will be non-NULL.  */
+    void (*fni8)(TCGv_i64, TCGv_i64);
+    void (*fni4)(TCGv_i32, TCGv_i32);
+    /* Expand inline with a host vector type.  */
+    void (*fniv)(unsigned, TCGv_vec, TCGv_vec);
+    /* Expand out-of-line helper w/descriptor.  */
+    gen_helper_gvec_2 *fno;
+    /* The optional opcodes, if any, utilized by .fniv.  */
+    const TCGOpcode *opt_opc;
+    /* The data argument to the out-of-line helper.  */
+    int32_t data;
+    /* The vector element size, if applicable.  */
+    uint8_t vece;
+    /* Prefer i64 to v64.  */
+    bool prefer_i64;
+    /* Load dest as a 2nd source operand.  */
+    bool load_dest;
+} GVecGen2;
+
+typedef struct {
+    /* Expand inline as a 64-bit or 32-bit integer.
+       Only one of these will be non-NULL.  */
+    void (*fni8)(TCGv_i64, TCGv_i64, int64_t);
+    void (*fni4)(TCGv_i32, TCGv_i32, int32_t);
+    /* Expand inline with a host vector type.  */
+    void (*fniv)(unsigned, TCGv_vec, TCGv_vec, int64_t);
+    /* Expand out-of-line helper w/descriptor, data in descriptor.  */
+    gen_helper_gvec_2 *fno;
+    /* Expand out-of-line helper w/descriptor, data as argument.  */
+    gen_helper_gvec_2i *fnoi;
+    /* The optional opcodes, if any, utilized by .fniv.  */
+    const TCGOpcode *opt_opc;
+    /* The vector element size, if applicable.  */
+    uint8_t vece;
+    /* Prefer i64 to v64.  */
+    bool prefer_i64;
+    /* Load dest as a 3rd source operand.  */
+    bool load_dest;
+} GVecGen2i;
+
+typedef struct {
+    /* Expand inline as a 64-bit or 32-bit integer.
+       Only one of these will be non-NULL.  */
+    void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64);
+    void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32);
+    /* Expand inline with a host vector type.  */
+    void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec);
+    /* Expand out-of-line helper w/descriptor.  */
+    gen_helper_gvec_2i *fno;
+    /* The optional opcodes, if any, utilized by .fniv.  */
+    const TCGOpcode *opt_opc;
+    /* The data argument to the out-of-line helper.  */
+    uint32_t data;
+    /* The vector element size, if applicable.  */
+    uint8_t vece;
+    /* Prefer i64 to v64.  */
+    bool prefer_i64;
+    /* Load scalar as 1st source operand.  */
+    bool scalar_first;
+} GVecGen2s;
+
+typedef struct {
+    /* Expand inline as a 64-bit or 32-bit integer.
+       Only one of these will be non-NULL.  */
+    void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64);
+    void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32);
+    /* Expand inline with a host vector type.  */
+    void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec);
+    /* Expand out-of-line helper w/descriptor.  */
+    gen_helper_gvec_3 *fno;
+    /* The optional opcodes, if any, utilized by .fniv.  */
+    const TCGOpcode *opt_opc;
+    /* The data argument to the out-of-line helper.  */
+    int32_t data;
+    /* The vector element size, if applicable.  */
+    uint8_t vece;
+    /* Prefer i64 to v64.  */
+    bool prefer_i64;
+    /* Load dest as a 3rd source operand.  */
+    bool load_dest;
+} GVecGen3;
+
+typedef struct {
+    /*
+     * Expand inline as a 64-bit or 32-bit integer. Only one of these will be
+     * non-NULL.
+     */
+    void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t);
+    void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t);
+    /* Expand inline with a host vector type.  */
+    void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, int64_t);
+    /* Expand out-of-line helper w/descriptor, data in descriptor.  */
+    gen_helper_gvec_3 *fno;
+    /* The optional opcodes, if any, utilized by .fniv.  */
+    const TCGOpcode *opt_opc;
+    /* The vector element size, if applicable.  */
+    uint8_t vece;
+    /* Prefer i64 to v64.  */
+    bool prefer_i64;
+    /* Load dest as a 3rd source operand.  */
+    bool load_dest;
+} GVecGen3i;
+
+typedef struct {
+    /* Expand inline as a 64-bit or 32-bit integer.
+       Only one of these will be non-NULL.  */
+    void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64);
+    void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
+    /* Expand inline with a host vector type.  */
+    void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec);
+    /* Expand out-of-line helper w/descriptor.  */
+    gen_helper_gvec_4 *fno;
+    /* The optional opcodes, if any, utilized by .fniv.  */
+    const TCGOpcode *opt_opc;
+    /* The data argument to the out-of-line helper.  */
+    int32_t data;
+    /* The vector element size, if applicable.  */
+    uint8_t vece;
+    /* Prefer i64 to v64.  */
+    bool prefer_i64;
+    /* Write aofs as a 2nd dest operand.  */
+    bool write_aofs;
+} GVecGen4;
+
+typedef struct {
+    /*
+     * Expand inline as a 64-bit or 32-bit integer. Only one of these will be
+     * non-NULL.
+     */
+    void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, int64_t);
+    void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, int32_t);
+    /* Expand inline with a host vector type.  */
+    void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec, int64_t);
+    /* Expand out-of-line helper w/descriptor, data in descriptor.  */
+    gen_helper_gvec_4 *fno;
+    /* The optional opcodes, if any, utilized by .fniv.  */
+    const TCGOpcode *opt_opc;
+    /* The vector element size, if applicable.  */
+    uint8_t vece;
+    /* Prefer i64 to v64.  */
+    bool prefer_i64;
+} GVecGen4i;
+
+void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
+                    uint32_t oprsz, uint32_t maxsz, const GVecGen2 *);
+void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+                     uint32_t maxsz, int64_t c, const GVecGen2i *);
+void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+                     uint32_t maxsz, TCGv_i64 c, const GVecGen2s *);
+void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                    uint32_t oprsz, uint32_t maxsz, const GVecGen3 *);
+void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+                     uint32_t oprsz, uint32_t maxsz, int64_t c,
+                     const GVecGen3i *);
+void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
+                    uint32_t oprsz, uint32_t maxsz, const GVecGen4 *);
+void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
+                     uint32_t oprsz, uint32_t maxsz, int64_t c,
+                     const GVecGen4i *);
+
+/* Expand a specific vector operation.  */
+
+void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_abs(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_mul(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       int64_t c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       int64_t c, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_subs(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_muls(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+
+/* Saturated arithmetic.  */
+void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+
+/* Min/max.  */
+void tcg_gen_gvec_smin(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_umin(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_smax(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_umax(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_and(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_or(unsigned vece, uint32_t dofs, uint32_t aofs,
+                     uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_nand(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_nor(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_eqv(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       int64_t c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_xori(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       int64_t c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      int64_t c, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
+                      TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
+                          uint32_t s, uint32_t m);
+void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t s,
+                          uint32_t m, uint64_t imm);
+void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
+                          uint32_t m, TCGv_i32);
+void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
+                          uint32_t m, TCGv_i64);
+
+void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        int64_t shift, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_shrs(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
+
+/*
+ * Perform vector shift by vector element, modulo the element size.
+ * E.g.  D[i] = A[i] << (B[i] % (8 << vece)).
+ */
+void tcg_gen_gvec_shlv(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
+                       uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+
+void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
+                      uint32_t aofs, uint32_t bofs,
+                      uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, int64_t c,
+                       uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
+                       uint32_t aofs, TCGv_i64 c,
+                       uint32_t oprsz, uint32_t maxsz);
+
+/*
+ * Perform vector bit select: d = (b & a) | (c & ~a).
+ */
+void tcg_gen_gvec_bitsel(unsigned vece, uint32_t dofs, uint32_t aofs,
+                         uint32_t bofs, uint32_t cofs,
+                         uint32_t oprsz, uint32_t maxsz);
+
+/*
+ * 64-bit vector operations.  Use these when the register has been allocated
+ * with tcg_global_mem_new_i64, and so we cannot also address it via pointer.
+ * OPRSZ = MAXSZ = 8.
+ */
+
+void tcg_gen_vec_neg8_i64(TCGv_i64 d, TCGv_i64 a);
+void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 a);
+void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 a);
+
+void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+
+void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+
+void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
+void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
+
+/* 32-bit vector operations. */
+void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+
+void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+
+#endif
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
new file mode 100644 (file)
index 0000000..b0a81ad
--- /dev/null
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Target dependent generic vector operation expansion
+ *
+ * Copyright (c) 2018 Linaro
+ */
+
+#ifndef TCG_TCG_OP_GVEC_H
+#define TCG_TCG_OP_GVEC_H
+
+#include "tcg/tcg-op-gvec-common.h"
+
+#ifndef TARGET_LONG_BITS
+#error must include QEMU headers
+#endif
+
+#if TARGET_LONG_BITS == 64
+#define tcg_gen_gvec_dup_tl  tcg_gen_gvec_dup_i64
+#define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i64
+#define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i64
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#define tcg_gen_vec_add32_tl tcg_gen_vec_add32_i64
+#define tcg_gen_vec_sub32_tl tcg_gen_vec_sub32_i64
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64
+#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64
+#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64
+#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64
+#elif TARGET_LONG_BITS == 32
+#define tcg_gen_gvec_dup_tl  tcg_gen_gvec_dup_i32
+#define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i32
+#define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i32
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#define tcg_gen_vec_add32_tl tcg_gen_add_i32
+#define tcg_gen_vec_sub32_tl tcg_gen_sub_i32
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32
+#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32
+#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32
+#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32
+#else
+# error
+#endif
+
+#endif
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
new file mode 100644 (file)
index 0000000..a028505
--- /dev/null
@@ -0,0 +1,410 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Target dependent opcode generation functions.
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ */
+
+#ifndef TCG_TCG_OP_H
+#define TCG_TCG_OP_H
+
+#include "tcg/tcg-op-common.h"
+
+#ifndef TARGET_LONG_BITS
+#error must include QEMU headers
+#endif
+
+#if TARGET_LONG_BITS == 32
+# define TCG_TYPE_TL  TCG_TYPE_I32
+#elif TARGET_LONG_BITS == 64
+# define TCG_TYPE_TL  TCG_TYPE_I64
+#else
+# error
+#endif
+
+#ifndef TARGET_INSN_START_EXTRA_WORDS
+static inline void tcg_gen_insn_start(target_ulong pc)
+{
+    TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 64 / TCG_TARGET_REG_BITS);
+    tcg_set_insn_start_param(op, 0, pc);
+}
+#elif TARGET_INSN_START_EXTRA_WORDS == 1
+static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
+{
+    TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 2 * 64 / TCG_TARGET_REG_BITS);
+    tcg_set_insn_start_param(op, 0, pc);
+    tcg_set_insn_start_param(op, 1, a1);
+}
+#elif TARGET_INSN_START_EXTRA_WORDS == 2
+static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
+                                      target_ulong a2)
+{
+    TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 3 * 64 / TCG_TARGET_REG_BITS);
+    tcg_set_insn_start_param(op, 0, pc);
+    tcg_set_insn_start_param(op, 1, a1);
+    tcg_set_insn_start_param(op, 2, a2);
+}
+#else
+#error Unhandled TARGET_INSN_START_EXTRA_WORDS value
+#endif
+
+#if TARGET_LONG_BITS == 32
+typedef TCGv_i32 TCGv;
+#define tcg_temp_new() tcg_temp_new_i32()
+#define tcg_global_mem_new tcg_global_mem_new_i32
+#define tcgv_tl_temp tcgv_i32_temp
+#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
+#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
+#elif TARGET_LONG_BITS == 64
+typedef TCGv_i64 TCGv;
+#define tcg_temp_new() tcg_temp_new_i64()
+#define tcg_global_mem_new tcg_global_mem_new_i64
+#define tcgv_tl_temp tcgv_i64_temp
+#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
+#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
+#else
+#error Unhandled TARGET_LONG_BITS value
+#endif
+
+static inline void
+tcg_gen_qemu_ld_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
+{
+    tcg_gen_qemu_ld_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
+}
+
+static inline void
+tcg_gen_qemu_st_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
+{
+    tcg_gen_qemu_st_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
+}
+
+static inline void
+tcg_gen_qemu_ld_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
+{
+    tcg_gen_qemu_ld_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
+}
+
+static inline void
+tcg_gen_qemu_st_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
+{
+    tcg_gen_qemu_st_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
+}
+
+static inline void
+tcg_gen_qemu_ld_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
+{
+    tcg_gen_qemu_ld_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
+}
+
+static inline void
+tcg_gen_qemu_st_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
+{
+    tcg_gen_qemu_st_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
+}
+
+#define DEF_ATOMIC2(N, S)                                               \
+    static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S v,          \
+                               TCGArg i, MemOp m)                       \
+    { N##_##S##_chk(r, tcgv_tl_temp(a), v, i, m, TCG_TYPE_TL); }
+
+#define DEF_ATOMIC3(N, S)                                               \
+    static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S o,          \
+                               TCGv_##S n, TCGArg i, MemOp m)           \
+    { N##_##S##_chk(r, tcgv_tl_temp(a), o, n, i, m, TCG_TYPE_TL); }
+
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i32)
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i64)
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i128)
+
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i32)
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i64)
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i128)
+
+DEF_ATOMIC2(tcg_gen_atomic_xchg, i32)
+DEF_ATOMIC2(tcg_gen_atomic_xchg, i64)
+
+DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i32)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i64)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i32)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i64)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i32)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i64)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i32)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i64)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i32)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i64)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i32)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i64)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i32)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i64)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i32)
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i64)
+
+DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i32)
+DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i64)
+DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i32)
+DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i64)
+DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i32)
+DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i64)
+DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i32)
+DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i64)
+DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i32)
+DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i64)
+DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i32)
+DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i64)
+DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i32)
+DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i64)
+DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i32)
+DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
+
+#undef DEF_ATOMIC2
+#undef DEF_ATOMIC3
+
+#if TARGET_LONG_BITS == 64
+#define tcg_gen_movi_tl tcg_gen_movi_i64
+#define tcg_gen_mov_tl tcg_gen_mov_i64
+#define tcg_gen_ld8u_tl tcg_gen_ld8u_i64
+#define tcg_gen_ld8s_tl tcg_gen_ld8s_i64
+#define tcg_gen_ld16u_tl tcg_gen_ld16u_i64
+#define tcg_gen_ld16s_tl tcg_gen_ld16s_i64
+#define tcg_gen_ld32u_tl tcg_gen_ld32u_i64
+#define tcg_gen_ld32s_tl tcg_gen_ld32s_i64
+#define tcg_gen_ld_tl tcg_gen_ld_i64
+#define tcg_gen_st8_tl tcg_gen_st8_i64
+#define tcg_gen_st16_tl tcg_gen_st16_i64
+#define tcg_gen_st32_tl tcg_gen_st32_i64
+#define tcg_gen_st_tl tcg_gen_st_i64
+#define tcg_gen_add_tl tcg_gen_add_i64
+#define tcg_gen_addi_tl tcg_gen_addi_i64
+#define tcg_gen_sub_tl tcg_gen_sub_i64
+#define tcg_gen_neg_tl tcg_gen_neg_i64
+#define tcg_gen_abs_tl tcg_gen_abs_i64
+#define tcg_gen_subfi_tl tcg_gen_subfi_i64
+#define tcg_gen_subi_tl tcg_gen_subi_i64
+#define tcg_gen_and_tl tcg_gen_and_i64
+#define tcg_gen_andi_tl tcg_gen_andi_i64
+#define tcg_gen_or_tl tcg_gen_or_i64
+#define tcg_gen_ori_tl tcg_gen_ori_i64
+#define tcg_gen_xor_tl tcg_gen_xor_i64
+#define tcg_gen_xori_tl tcg_gen_xori_i64
+#define tcg_gen_not_tl tcg_gen_not_i64
+#define tcg_gen_shl_tl tcg_gen_shl_i64
+#define tcg_gen_shli_tl tcg_gen_shli_i64
+#define tcg_gen_shr_tl tcg_gen_shr_i64
+#define tcg_gen_shri_tl tcg_gen_shri_i64
+#define tcg_gen_sar_tl tcg_gen_sar_i64
+#define tcg_gen_sari_tl tcg_gen_sari_i64
+#define tcg_gen_brcond_tl tcg_gen_brcond_i64
+#define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
+#define tcg_gen_negsetcond_tl tcg_gen_negsetcond_i64
+#define tcg_gen_negsetcondi_tl tcg_gen_negsetcondi_i64
+#define tcg_gen_mul_tl tcg_gen_mul_i64
+#define tcg_gen_muli_tl tcg_gen_muli_i64
+#define tcg_gen_div_tl tcg_gen_div_i64
+#define tcg_gen_rem_tl tcg_gen_rem_i64
+#define tcg_gen_divu_tl tcg_gen_divu_i64
+#define tcg_gen_remu_tl tcg_gen_remu_i64
+#define tcg_gen_discard_tl tcg_gen_discard_i64
+#define tcg_gen_trunc_tl_i32 tcg_gen_extrl_i64_i32
+#define tcg_gen_trunc_i64_tl tcg_gen_mov_i64
+#define tcg_gen_extu_i32_tl tcg_gen_extu_i32_i64
+#define tcg_gen_ext_i32_tl tcg_gen_ext_i32_i64
+#define tcg_gen_extu_tl_i64 tcg_gen_mov_i64
+#define tcg_gen_ext_tl_i64 tcg_gen_mov_i64
+#define tcg_gen_ext8u_tl tcg_gen_ext8u_i64
+#define tcg_gen_ext8s_tl tcg_gen_ext8s_i64
+#define tcg_gen_ext16u_tl tcg_gen_ext16u_i64
+#define tcg_gen_ext16s_tl tcg_gen_ext16s_i64
+#define tcg_gen_ext32u_tl tcg_gen_ext32u_i64
+#define tcg_gen_ext32s_tl tcg_gen_ext32s_i64
+#define tcg_gen_ext_tl tcg_gen_ext_i64
+#define tcg_gen_bswap16_tl tcg_gen_bswap16_i64
+#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
+#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
+#define tcg_gen_bswap_tl tcg_gen_bswap64_i64
+#define tcg_gen_hswap_tl tcg_gen_hswap_i64
+#define tcg_gen_wswap_tl tcg_gen_wswap_i64
+#define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64
+#define tcg_gen_extr_i64_tl tcg_gen_extr32_i64
+#define tcg_gen_andc_tl tcg_gen_andc_i64
+#define tcg_gen_eqv_tl tcg_gen_eqv_i64
+#define tcg_gen_nand_tl tcg_gen_nand_i64
+#define tcg_gen_nor_tl tcg_gen_nor_i64
+#define tcg_gen_orc_tl tcg_gen_orc_i64
+#define tcg_gen_clz_tl tcg_gen_clz_i64
+#define tcg_gen_ctz_tl tcg_gen_ctz_i64
+#define tcg_gen_clzi_tl tcg_gen_clzi_i64
+#define tcg_gen_ctzi_tl tcg_gen_ctzi_i64
+#define tcg_gen_clrsb_tl tcg_gen_clrsb_i64
+#define tcg_gen_ctpop_tl tcg_gen_ctpop_i64
+#define tcg_gen_rotl_tl tcg_gen_rotl_i64
+#define tcg_gen_rotli_tl tcg_gen_rotli_i64
+#define tcg_gen_rotr_tl tcg_gen_rotr_i64
+#define tcg_gen_rotri_tl tcg_gen_rotri_i64
+#define tcg_gen_deposit_tl tcg_gen_deposit_i64
+#define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i64
+#define tcg_gen_extract_tl tcg_gen_extract_i64
+#define tcg_gen_sextract_tl tcg_gen_sextract_i64
+#define tcg_gen_extract2_tl tcg_gen_extract2_i64
+#define tcg_constant_tl tcg_constant_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
+#define tcg_gen_add2_tl tcg_gen_add2_i64
+#define tcg_gen_sub2_tl tcg_gen_sub2_i64
+#define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
+#define tcg_gen_muls2_tl tcg_gen_muls2_i64
+#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i64
+#define tcg_gen_smin_tl tcg_gen_smin_i64
+#define tcg_gen_umin_tl tcg_gen_umin_i64
+#define tcg_gen_smax_tl tcg_gen_smax_i64
+#define tcg_gen_umax_tl tcg_gen_umax_i64
+#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i64
+#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i64
+#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i64
+#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i64
+#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i64
+#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i64
+#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i64
+#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i64
+#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i64
+#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i64
+#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i64
+#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64
+#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64
+#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64
+#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i64
+#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i64
+#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i64
+#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i64
+#define tcg_gen_dup_tl_vec  tcg_gen_dup_i64_vec
+#define tcg_gen_dup_tl tcg_gen_dup_i64
+#define dup_const_tl dup_const
+#else
+#define tcg_gen_movi_tl tcg_gen_movi_i32
+#define tcg_gen_mov_tl tcg_gen_mov_i32
+#define tcg_gen_ld8u_tl tcg_gen_ld8u_i32
+#define tcg_gen_ld8s_tl tcg_gen_ld8s_i32
+#define tcg_gen_ld16u_tl tcg_gen_ld16u_i32
+#define tcg_gen_ld16s_tl tcg_gen_ld16s_i32
+#define tcg_gen_ld32u_tl tcg_gen_ld_i32
+#define tcg_gen_ld32s_tl tcg_gen_ld_i32
+#define tcg_gen_ld_tl tcg_gen_ld_i32
+#define tcg_gen_st8_tl tcg_gen_st8_i32
+#define tcg_gen_st16_tl tcg_gen_st16_i32
+#define tcg_gen_st32_tl tcg_gen_st_i32
+#define tcg_gen_st_tl tcg_gen_st_i32
+#define tcg_gen_add_tl tcg_gen_add_i32
+#define tcg_gen_addi_tl tcg_gen_addi_i32
+#define tcg_gen_sub_tl tcg_gen_sub_i32
+#define tcg_gen_neg_tl tcg_gen_neg_i32
+#define tcg_gen_abs_tl tcg_gen_abs_i32
+#define tcg_gen_subfi_tl tcg_gen_subfi_i32
+#define tcg_gen_subi_tl tcg_gen_subi_i32
+#define tcg_gen_and_tl tcg_gen_and_i32
+#define tcg_gen_andi_tl tcg_gen_andi_i32
+#define tcg_gen_or_tl tcg_gen_or_i32
+#define tcg_gen_ori_tl tcg_gen_ori_i32
+#define tcg_gen_xor_tl tcg_gen_xor_i32
+#define tcg_gen_xori_tl tcg_gen_xori_i32
+#define tcg_gen_not_tl tcg_gen_not_i32
+#define tcg_gen_shl_tl tcg_gen_shl_i32
+#define tcg_gen_shli_tl tcg_gen_shli_i32
+#define tcg_gen_shr_tl tcg_gen_shr_i32
+#define tcg_gen_shri_tl tcg_gen_shri_i32
+#define tcg_gen_sar_tl tcg_gen_sar_i32
+#define tcg_gen_sari_tl tcg_gen_sari_i32
+#define tcg_gen_brcond_tl tcg_gen_brcond_i32
+#define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
+#define tcg_gen_negsetcond_tl tcg_gen_negsetcond_i32
+#define tcg_gen_negsetcondi_tl tcg_gen_negsetcondi_i32
+#define tcg_gen_mul_tl tcg_gen_mul_i32
+#define tcg_gen_muli_tl tcg_gen_muli_i32
+#define tcg_gen_div_tl tcg_gen_div_i32
+#define tcg_gen_rem_tl tcg_gen_rem_i32
+#define tcg_gen_divu_tl tcg_gen_divu_i32
+#define tcg_gen_remu_tl tcg_gen_remu_i32
+#define tcg_gen_discard_tl tcg_gen_discard_i32
+#define tcg_gen_trunc_tl_i32 tcg_gen_mov_i32
+#define tcg_gen_trunc_i64_tl tcg_gen_extrl_i64_i32
+#define tcg_gen_extu_i32_tl tcg_gen_mov_i32
+#define tcg_gen_ext_i32_tl tcg_gen_mov_i32
+#define tcg_gen_extu_tl_i64 tcg_gen_extu_i32_i64
+#define tcg_gen_ext_tl_i64 tcg_gen_ext_i32_i64
+#define tcg_gen_ext8u_tl tcg_gen_ext8u_i32
+#define tcg_gen_ext8s_tl tcg_gen_ext8s_i32
+#define tcg_gen_ext16u_tl tcg_gen_ext16u_i32
+#define tcg_gen_ext16s_tl tcg_gen_ext16s_i32
+#define tcg_gen_ext32u_tl tcg_gen_mov_i32
+#define tcg_gen_ext32s_tl tcg_gen_mov_i32
+#define tcg_gen_ext_tl tcg_gen_ext_i32
+#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
+#define tcg_gen_bswap32_tl(D, S, F) tcg_gen_bswap32_i32(D, S)
+#define tcg_gen_bswap_tl tcg_gen_bswap32_i32
+#define tcg_gen_hswap_tl tcg_gen_hswap_i32
+#define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
+#define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32
+#define tcg_gen_andc_tl tcg_gen_andc_i32
+#define tcg_gen_eqv_tl tcg_gen_eqv_i32
+#define tcg_gen_nand_tl tcg_gen_nand_i32
+#define tcg_gen_nor_tl tcg_gen_nor_i32
+#define tcg_gen_orc_tl tcg_gen_orc_i32
+#define tcg_gen_clz_tl tcg_gen_clz_i32
+#define tcg_gen_ctz_tl tcg_gen_ctz_i32
+#define tcg_gen_clzi_tl tcg_gen_clzi_i32
+#define tcg_gen_ctzi_tl tcg_gen_ctzi_i32
+#define tcg_gen_clrsb_tl tcg_gen_clrsb_i32
+#define tcg_gen_ctpop_tl tcg_gen_ctpop_i32
+#define tcg_gen_rotl_tl tcg_gen_rotl_i32
+#define tcg_gen_rotli_tl tcg_gen_rotli_i32
+#define tcg_gen_rotr_tl tcg_gen_rotr_i32
+#define tcg_gen_rotri_tl tcg_gen_rotri_i32
+#define tcg_gen_deposit_tl tcg_gen_deposit_i32
+#define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i32
+#define tcg_gen_extract_tl tcg_gen_extract_i32
+#define tcg_gen_sextract_tl tcg_gen_sextract_i32
+#define tcg_gen_extract2_tl tcg_gen_extract2_i32
+#define tcg_constant_tl tcg_constant_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
+#define tcg_gen_add2_tl tcg_gen_add2_i32
+#define tcg_gen_sub2_tl tcg_gen_sub2_i32
+#define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
+#define tcg_gen_muls2_tl tcg_gen_muls2_i32
+#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i32
+#define tcg_gen_smin_tl tcg_gen_smin_i32
+#define tcg_gen_umin_tl tcg_gen_umin_i32
+#define tcg_gen_smax_tl tcg_gen_smax_i32
+#define tcg_gen_umax_tl tcg_gen_umax_i32
+#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i32
+#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i32
+#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i32
+#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i32
+#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i32
+#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i32
+#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i32
+#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i32
+#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i32
+#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i32
+#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i32
+#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32
+#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32
+#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32
+#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i32
+#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i32
+#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i32
+#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i32
+#define tcg_gen_dup_tl_vec  tcg_gen_dup_i32_vec
+#define tcg_gen_dup_tl tcg_gen_dup_i32
+
+#define dup_const_tl(VECE, C)                                      \
+    (__builtin_constant_p(VECE)                                    \
+     ? (  (VECE) == MO_8  ? 0x01010101ul * (uint8_t)(C)            \
+        : (VECE) == MO_16 ? 0x00010001ul * (uint16_t)(C)           \
+        : (VECE) == MO_32 ? 0x00000001ul * (uint32_t)(C)           \
+        : (qemu_build_not_reached_always(), 0))                    \
+     :  (target_long)dup_const(VECE, C))
+
+#endif /* TARGET_LONG_BITS == 64 */
+#endif /* TCG_TCG_OP_H */
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
new file mode 100644 (file)
index 0000000..b80227f
--- /dev/null
@@ -0,0 +1,318 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * DEF(name, oargs, iargs, cargs, flags)
+ */
+
+/* predefined ops */
+DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT)
+DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
+
+/* variable number of parameters */
+DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
+
+DEF(br, 0, 0, 1, TCG_OPF_BB_END)
+
+#define IMPL(X) (__builtin_constant_p(X) && (X) <= 0 ? TCG_OPF_NOT_PRESENT : 0)
+#if TCG_TARGET_REG_BITS == 32
+# define IMPL64  TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT
+#else
+# define IMPL64  TCG_OPF_64BIT
+#endif
+
+DEF(mb, 0, 0, 1, 0)
+
+DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
+DEF(setcond_i32, 1, 2, 1, 0)
+DEF(negsetcond_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_negsetcond_i32))
+DEF(movcond_i32, 1, 4, 1, 0)
+/* load/store */
+DEF(ld8u_i32, 1, 1, 1, 0)
+DEF(ld8s_i32, 1, 1, 1, 0)
+DEF(ld16u_i32, 1, 1, 1, 0)
+DEF(ld16s_i32, 1, 1, 1, 0)
+DEF(ld_i32, 1, 1, 1, 0)
+DEF(st8_i32, 0, 2, 1, 0)
+DEF(st16_i32, 0, 2, 1, 0)
+DEF(st_i32, 0, 2, 1, 0)
+/* arith */
+DEF(add_i32, 1, 2, 0, 0)
+DEF(sub_i32, 1, 2, 0, 0)
+DEF(mul_i32, 1, 2, 0, 0)
+DEF(div_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
+DEF(divu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_div_i32))
+DEF(rem_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
+DEF(remu_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rem_i32))
+DEF(div2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
+DEF(divu2_i32, 2, 3, 0, IMPL(TCG_TARGET_HAS_div2_i32))
+DEF(and_i32, 1, 2, 0, 0)
+DEF(or_i32, 1, 2, 0, 0)
+DEF(xor_i32, 1, 2, 0, 0)
+/* shifts/rotates */
+DEF(shl_i32, 1, 2, 0, 0)
+DEF(shr_i32, 1, 2, 0, 0)
+DEF(sar_i32, 1, 2, 0, 0)
+DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
+DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
+DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
+DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32))
+DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32))
+DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32))
+
+DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH)
+
+DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
+DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
+DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
+DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
+DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
+DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
+DEF(brcond2_i32, 0, 4, 2,
+    TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL(TCG_TARGET_REG_BITS == 32))
+DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
+
+DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
+DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32))
+DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32))
+DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32))
+DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32))
+DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32))
+DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32))
+DEF(neg_i32, 1, 1, 0, 0)
+DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32))
+DEF(orc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_orc_i32))
+DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32))
+DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32))
+DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
+DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
+DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
+DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
+
+DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
+DEF(setcond_i64, 1, 2, 1, IMPL64)
+DEF(negsetcond_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_negsetcond_i64))
+DEF(movcond_i64, 1, 4, 1, IMPL64)
+/* load/store */
+DEF(ld8u_i64, 1, 1, 1, IMPL64)
+DEF(ld8s_i64, 1, 1, 1, IMPL64)
+DEF(ld16u_i64, 1, 1, 1, IMPL64)
+DEF(ld16s_i64, 1, 1, 1, IMPL64)
+DEF(ld32u_i64, 1, 1, 1, IMPL64)
+DEF(ld32s_i64, 1, 1, 1, IMPL64)
+DEF(ld_i64, 1, 1, 1, IMPL64)
+DEF(st8_i64, 0, 2, 1, IMPL64)
+DEF(st16_i64, 0, 2, 1, IMPL64)
+DEF(st32_i64, 0, 2, 1, IMPL64)
+DEF(st_i64, 0, 2, 1, IMPL64)
+/* arith */
+DEF(add_i64, 1, 2, 0, IMPL64)
+DEF(sub_i64, 1, 2, 0, IMPL64)
+DEF(mul_i64, 1, 2, 0, IMPL64)
+DEF(div_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
+DEF(divu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div_i64))
+DEF(rem_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
+DEF(remu_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rem_i64))
+DEF(div2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
+DEF(divu2_i64, 2, 3, 0, IMPL64 | IMPL(TCG_TARGET_HAS_div2_i64))
+DEF(and_i64, 1, 2, 0, IMPL64)
+DEF(or_i64, 1, 2, 0, IMPL64)
+DEF(xor_i64, 1, 2, 0, IMPL64)
+/* shifts/rotates */
+DEF(shl_i64, 1, 2, 0, IMPL64)
+DEF(shr_i64, 1, 2, 0, IMPL64)
+DEF(sar_i64, 1, 2, 0, IMPL64)
+DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
+DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
+DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
+DEF(extract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_extract_i64))
+DEF(sextract_i64, 1, 1, 2, IMPL64 | IMPL(TCG_TARGET_HAS_sextract_i64))
+DEF(extract2_i64, 1, 2, 1, IMPL64 | IMPL(TCG_TARGET_HAS_extract2_i64))
+
+/* size changing ops */
+DEF(ext_i32_i64, 1, 1, 0, IMPL64)
+DEF(extu_i32_i64, 1, 1, 0, IMPL64)
+DEF(extrl_i64_i32, 1, 1, 0,
+    IMPL(TCG_TARGET_HAS_extr_i64_i32)
+    | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
+DEF(extrh_i64_i32, 1, 1, 0,
+    IMPL(TCG_TARGET_HAS_extr_i64_i32)
+    | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
+
+DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL64)
+DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64))
+DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64))
+DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
+DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64))
+DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64))
+DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64))
+DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
+DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
+DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
+DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64))
+DEF(neg_i64, 1, 1, 0, IMPL64)
+DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64))
+DEF(orc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_orc_i64))
+DEF(eqv_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_eqv_i64))
+DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64))
+DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
+DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64))
+DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64))
+DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64))
+
+DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
+DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
+DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
+DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
+DEF(muluh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muluh_i64))
+DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
+
+#define DATA64_ARGS  (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
+
+/* There are tcg_ctx->insn_start_words here, not just one. */
+DEF(insn_start, 0, 0, DATA64_ARGS, TCG_OPF_NOT_PRESENT)
+
+DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
+DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
+DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
+
+DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT)
+DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT)
+
+/* Replicate ld/st ops for 32 and 64-bit guest addresses. */
+DEF(qemu_ld_a32_i32, 1, 1, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st_a32_i32, 0, 1 + 1, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld_a32_i64, DATA64_ARGS, 1, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+DEF(qemu_st_a32_i64, 0, DATA64_ARGS + 1, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+
+DEF(qemu_ld_a64_i32, 1, DATA64_ARGS, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st_a64_i32, 0, 1 + DATA64_ARGS, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld_a64_i64, DATA64_ARGS, DATA64_ARGS, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+
+/* Only used by i386 to cope with stupid register constraints. */
+DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
+    IMPL(TCG_TARGET_HAS_qemu_st8_i32))
+DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
+    IMPL(TCG_TARGET_HAS_qemu_st8_i32))
+
+/* Only for 64-bit hosts at the moment. */
+DEF(qemu_ld_a32_i128, 2, 1, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
+    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
+DEF(qemu_ld_a64_i128, 2, 1, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
+    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
+DEF(qemu_st_a32_i128, 0, 3, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
+    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
+DEF(qemu_st_a64_i128, 0, 3, 1,
+    TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
+    IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
+
+/* Host vector support.  */
+
+#define IMPLVEC  TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
+
+DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
+
+DEF(dup_vec, 1, 1, 0, IMPLVEC)
+DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
+
+DEF(ld_vec, 1, 1, 1, IMPLVEC)
+DEF(st_vec, 0, 2, 1, IMPLVEC)
+DEF(dupm_vec, 1, 1, 1, IMPLVEC)
+
+DEF(add_vec, 1, 2, 0, IMPLVEC)
+DEF(sub_vec, 1, 2, 0, IMPLVEC)
+DEF(mul_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_mul_vec))
+DEF(neg_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
+DEF(abs_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_abs_vec))
+DEF(ssadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
+DEF(usadd_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
+DEF(sssub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
+DEF(ussub_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_sat_vec))
+DEF(smin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
+DEF(umin_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
+DEF(smax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
+DEF(umax_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_minmax_vec))
+
+DEF(and_vec, 1, 2, 0, IMPLVEC)
+DEF(or_vec, 1, 2, 0, IMPLVEC)
+DEF(xor_vec, 1, 2, 0, IMPLVEC)
+DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
+DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
+DEF(nand_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nand_vec))
+DEF(nor_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nor_vec))
+DEF(eqv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_eqv_vec))
+DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
+
+DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
+DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
+DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
+DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
+
+DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
+DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
+DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
+DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec))
+
+DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
+DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
+DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
+DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
+DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
+
+DEF(cmp_vec, 1, 2, 1, IMPLVEC)
+
+DEF(bitsel_vec, 1, 3, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_bitsel_vec))
+DEF(cmpsel_vec, 1, 4, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_cmpsel_vec))
+
+DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
+
+#if TCG_TARGET_MAYBE_vec
+#include "tcg-target.opc.h"
+#endif
+
+#ifdef TCG_TARGET_INTERPRETER
+/* These opcodes are only for use between the tci generator and interpreter. */
+DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
+DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
+#endif
+
+#undef DATA64_ARGS
+#undef IMPL
+#undef IMPL64
+#undef IMPLVEC
+#undef DEF
diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h
new file mode 100644 (file)
index 0000000..44192c5
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * TCG internals related to TCG temp allocation
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_TEMP_INTERNAL_H
+#define TCG_TEMP_INTERNAL_H
+
+/*
+ * Allocation and freeing of EBB temps is reserved to TCG internals
+ */
+
+void tcg_temp_free_internal(TCGTemp *);
+
+void tcg_temp_free_i32(TCGv_i32 arg);
+void tcg_temp_free_i64(TCGv_i64 arg);
+void tcg_temp_free_i128(TCGv_i128 arg);
+void tcg_temp_free_ptr(TCGv_ptr arg);
+void tcg_temp_free_vec(TCGv_vec arg);
+
+TCGv_i32 tcg_temp_ebb_new_i32(void);
+TCGv_i64 tcg_temp_ebb_new_i64(void);
+TCGv_ptr tcg_temp_ebb_new_ptr(void);
+TCGv_i128 tcg_temp_ebb_new_i128(void);
+
+#endif /* TCG_TEMP_FREE_H */
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
new file mode 100644 (file)
index 0000000..451f3fe
--- /dev/null
@@ -0,0 +1,1069 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_H
+#define TCG_H
+
+#include "exec/memop.h"
+#include "exec/memopidx.h"
+#include "qemu/bitops.h"
+#include "qemu/plugin.h"
+#include "qemu/queue.h"
+#include "tcg/tcg-mo.h"
+#include "tcg-target-reg-bits.h"
+#include "tcg-target.h"
+#include "tcg/tcg-cond.h"
+#include "tcg/debug-assert.h"
+
+/* XXX: make safe guess about sizes */
+#define MAX_OP_PER_INSTR 266
+
+#define MAX_CALL_IARGS  7
+
+#define CPU_TEMP_BUF_NLONGS 128
+#define TCG_STATIC_FRAME_SIZE  (CPU_TEMP_BUF_NLONGS * sizeof(long))
+
+#if TCG_TARGET_REG_BITS == 32
+typedef int32_t tcg_target_long;
+typedef uint32_t tcg_target_ulong;
+#define TCG_PRIlx PRIx32
+#define TCG_PRIld PRId32
+#elif TCG_TARGET_REG_BITS == 64
+typedef int64_t tcg_target_long;
+typedef uint64_t tcg_target_ulong;
+#define TCG_PRIlx PRIx64
+#define TCG_PRIld PRId64
+#else
+#error unsupported
+#endif
+
+#if TCG_TARGET_NB_REGS <= 32
+typedef uint32_t TCGRegSet;
+#elif TCG_TARGET_NB_REGS <= 64
+typedef uint64_t TCGRegSet;
+#else
+#error unsupported
+#endif
+
+#if TCG_TARGET_REG_BITS == 32
+/* Turn some undef macros into false macros.  */
+#define TCG_TARGET_HAS_extr_i64_i32     0
+#define TCG_TARGET_HAS_div_i64          0
+#define TCG_TARGET_HAS_rem_i64          0
+#define TCG_TARGET_HAS_div2_i64         0
+#define TCG_TARGET_HAS_rot_i64          0
+#define TCG_TARGET_HAS_ext8s_i64        0
+#define TCG_TARGET_HAS_ext16s_i64       0
+#define TCG_TARGET_HAS_ext32s_i64       0
+#define TCG_TARGET_HAS_ext8u_i64        0
+#define TCG_TARGET_HAS_ext16u_i64       0
+#define TCG_TARGET_HAS_ext32u_i64       0
+#define TCG_TARGET_HAS_bswap16_i64      0
+#define TCG_TARGET_HAS_bswap32_i64      0
+#define TCG_TARGET_HAS_bswap64_i64      0
+#define TCG_TARGET_HAS_not_i64          0
+#define TCG_TARGET_HAS_andc_i64         0
+#define TCG_TARGET_HAS_orc_i64          0
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          0
+#define TCG_TARGET_HAS_clz_i64          0
+#define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i64        0
+#define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract2_i64     0
+#define TCG_TARGET_HAS_negsetcond_i64   0
+#define TCG_TARGET_HAS_add2_i64         0
+#define TCG_TARGET_HAS_sub2_i64         0
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        0
+#define TCG_TARGET_HAS_mulsh_i64        0
+/* Turn some undef macros into true macros.  */
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#endif
+
+#ifndef TCG_TARGET_deposit_i32_valid
+#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
+#endif
+#ifndef TCG_TARGET_deposit_i64_valid
+#define TCG_TARGET_deposit_i64_valid(ofs, len) 1
+#endif
+#ifndef TCG_TARGET_extract_i32_valid
+#define TCG_TARGET_extract_i32_valid(ofs, len) 1
+#endif
+#ifndef TCG_TARGET_extract_i64_valid
+#define TCG_TARGET_extract_i64_valid(ofs, len) 1
+#endif
+
+/* Only one of DIV or DIV2 should be defined.  */
+#if defined(TCG_TARGET_HAS_div_i32)
+#define TCG_TARGET_HAS_div2_i32         0
+#elif defined(TCG_TARGET_HAS_div2_i32)
+#define TCG_TARGET_HAS_div_i32          0
+#define TCG_TARGET_HAS_rem_i32          0
+#endif
+#if defined(TCG_TARGET_HAS_div_i64)
+#define TCG_TARGET_HAS_div2_i64         0
+#elif defined(TCG_TARGET_HAS_div2_i64)
+#define TCG_TARGET_HAS_div_i64          0
+#define TCG_TARGET_HAS_rem_i64          0
+#endif
+
+#if !defined(TCG_TARGET_HAS_v64) \
+    && !defined(TCG_TARGET_HAS_v128) \
+    && !defined(TCG_TARGET_HAS_v256)
+#define TCG_TARGET_MAYBE_vec            0
+#define TCG_TARGET_HAS_abs_vec          0
+#define TCG_TARGET_HAS_neg_vec          0
+#define TCG_TARGET_HAS_not_vec          0
+#define TCG_TARGET_HAS_andc_vec         0
+#define TCG_TARGET_HAS_orc_vec          0
+#define TCG_TARGET_HAS_nand_vec         0
+#define TCG_TARGET_HAS_nor_vec          0
+#define TCG_TARGET_HAS_eqv_vec          0
+#define TCG_TARGET_HAS_roti_vec         0
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         0
+#define TCG_TARGET_HAS_shi_vec          0
+#define TCG_TARGET_HAS_shs_vec          0
+#define TCG_TARGET_HAS_shv_vec          0
+#define TCG_TARGET_HAS_mul_vec          0
+#define TCG_TARGET_HAS_sat_vec          0
+#define TCG_TARGET_HAS_minmax_vec       0
+#define TCG_TARGET_HAS_bitsel_vec       0
+#define TCG_TARGET_HAS_cmpsel_vec       0
+#else
+#define TCG_TARGET_MAYBE_vec            1
+#endif
+#ifndef TCG_TARGET_HAS_v64
+#define TCG_TARGET_HAS_v64              0
+#endif
+#ifndef TCG_TARGET_HAS_v128
+#define TCG_TARGET_HAS_v128             0
+#endif
+#ifndef TCG_TARGET_HAS_v256
+#define TCG_TARGET_HAS_v256             0
+#endif
+
+typedef enum TCGOpcode {
+#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
+#include "tcg/tcg-opc.h"
+#undef DEF
+    NB_OPS,
+} TCGOpcode;
+
+#define tcg_regset_set_reg(d, r)   ((d) |= (TCGRegSet)1 << (r))
+#define tcg_regset_reset_reg(d, r) ((d) &= ~((TCGRegSet)1 << (r)))
+#define tcg_regset_test_reg(d, r)  (((d) >> (r)) & 1)
+
+#ifndef TCG_TARGET_INSN_UNIT_SIZE
+# error "Missing TCG_TARGET_INSN_UNIT_SIZE"
+#elif TCG_TARGET_INSN_UNIT_SIZE == 1
+typedef uint8_t tcg_insn_unit;
+#elif TCG_TARGET_INSN_UNIT_SIZE == 2
+typedef uint16_t tcg_insn_unit;
+#elif TCG_TARGET_INSN_UNIT_SIZE == 4
+typedef uint32_t tcg_insn_unit;
+#elif TCG_TARGET_INSN_UNIT_SIZE == 8
+typedef uint64_t tcg_insn_unit;
+#else
+/* The port better have done this.  */
+#endif
+
+typedef struct TCGRelocation TCGRelocation;
+struct TCGRelocation {
+    QSIMPLEQ_ENTRY(TCGRelocation) next;
+    tcg_insn_unit *ptr;
+    intptr_t addend;
+    int type;
+};
+
+typedef struct TCGOp TCGOp;
+typedef struct TCGLabelUse TCGLabelUse;
+struct TCGLabelUse {
+    QSIMPLEQ_ENTRY(TCGLabelUse) next;
+    TCGOp *op;
+};
+
+typedef struct TCGLabel TCGLabel;
+struct TCGLabel {
+    bool present;
+    bool has_value;
+    uint16_t id;
+    union {
+        uintptr_t value;
+        const tcg_insn_unit *value_ptr;
+    } u;
+    QSIMPLEQ_HEAD(, TCGLabelUse) branches;
+    QSIMPLEQ_HEAD(, TCGRelocation) relocs;
+    QSIMPLEQ_ENTRY(TCGLabel) next;
+};
+
+typedef struct TCGPool {
+    struct TCGPool *next;
+    int size;
+    uint8_t data[] __attribute__ ((aligned));
+} TCGPool;
+
+#define TCG_POOL_CHUNK_SIZE 32768
+
+#define TCG_MAX_TEMPS 512
+#define TCG_MAX_INSNS 512
+
+/* when the size of the arguments of a called function is smaller than
+   this value, they are statically allocated in the TB stack frame */
+#define TCG_STATIC_CALL_ARGS_SIZE 128
+
+typedef enum TCGType {
+    TCG_TYPE_I32,
+    TCG_TYPE_I64,
+    TCG_TYPE_I128,
+
+    TCG_TYPE_V64,
+    TCG_TYPE_V128,
+    TCG_TYPE_V256,
+
+    /* Number of different types (integer not enum) */
+#define TCG_TYPE_COUNT  (TCG_TYPE_V256 + 1)
+
+    /* An alias for the size of the host register.  */
+#if TCG_TARGET_REG_BITS == 32
+    TCG_TYPE_REG = TCG_TYPE_I32,
+#else
+    TCG_TYPE_REG = TCG_TYPE_I64,
+#endif
+
+    /* An alias for the size of the native pointer.  */
+#if UINTPTR_MAX == UINT32_MAX
+    TCG_TYPE_PTR = TCG_TYPE_I32,
+#else
+    TCG_TYPE_PTR = TCG_TYPE_I64,
+#endif
+} TCGType;
+
+/**
+ * tcg_type_size
+ * @t: type
+ *
+ * Return the size of the type in bytes.
+ */
+static inline int tcg_type_size(TCGType t)
+{
+    unsigned i = t;
+    if (i >= TCG_TYPE_V64) {
+        tcg_debug_assert(i < TCG_TYPE_COUNT);
+        i -= TCG_TYPE_V64 - 1;
+    }
+    return 4 << i;
+}
+
+/**
+ * get_alignment_bits
+ * @memop: MemOp value
+ *
+ * Extract the alignment size from the memop.
+ */
+static inline unsigned get_alignment_bits(MemOp memop)
+{
+    unsigned a = memop & MO_AMASK;
+
+    if (a == MO_UNALN) {
+        /* No alignment required.  */
+        a = 0;
+    } else if (a == MO_ALIGN) {
+        /* A natural alignment requirement.  */
+        a = memop & MO_SIZE;
+    } else {
+        /* A specific alignment requirement.  */
+        a = a >> MO_ASHIFT;
+    }
+    return a;
+}
+
+typedef tcg_target_ulong TCGArg;
+
+/* Define type and accessor macros for TCG variables.
+
+   TCG variables are the inputs and outputs of TCG ops, as described
+   in tcg/README. Target CPU front-end code uses these types to deal
+   with TCG variables as it emits TCG code via the tcg_gen_* functions.
+   They come in several flavours:
+    * TCGv_i32  : 32 bit integer type
+    * TCGv_i64  : 64 bit integer type
+    * TCGv_i128 : 128 bit integer type
+    * TCGv_ptr  : a host pointer type
+    * TCGv_vec  : a host vector type; the exact size is not exposed
+                  to the CPU front-end code.
+    * TCGv      : an integer type the same size as target_ulong
+                  (an alias for either TCGv_i32 or TCGv_i64)
+   The compiler's type checking will complain if you mix them
+   up and pass the wrong sized TCGv to a function.
+
+   Users of tcg_gen_* don't need to know about any of the internal
+   details of these, and should treat them as opaque types.
+   You won't be able to look inside them in a debugger either.
+
+   Internal implementation details follow:
+
+   Note that there is no definition of the structs TCGv_i32_d etc anywhere.
+   This is deliberate, because the values we store in variables of type
+   TCGv_i32 are not really pointers-to-structures. They're just small
+   integers, but keeping them in pointer types like this means that the
+   compiler will complain if you accidentally pass a TCGv_i32 to a
+   function which takes a TCGv_i64, and so on. Only the internals of
+   TCG need to care about the actual contents of the types.  */
+
+typedef struct TCGv_i32_d *TCGv_i32;
+typedef struct TCGv_i64_d *TCGv_i64;
+typedef struct TCGv_i128_d *TCGv_i128;
+typedef struct TCGv_ptr_d *TCGv_ptr;
+typedef struct TCGv_vec_d *TCGv_vec;
+typedef TCGv_ptr TCGv_env;
+
+/* call flags */
+/* Helper does not read globals (either directly or through an exception). It
+   implies TCG_CALL_NO_WRITE_GLOBALS. */
+#define TCG_CALL_NO_READ_GLOBALS    0x0001
+/* Helper does not write globals */
+#define TCG_CALL_NO_WRITE_GLOBALS   0x0002
+/* Helper can be safely suppressed if the return value is not used. */
+#define TCG_CALL_NO_SIDE_EFFECTS    0x0004
+/* Helper is G_NORETURN.  */
+#define TCG_CALL_NO_RETURN          0x0008
+/* Helper is part of Plugins.  */
+#define TCG_CALL_PLUGIN             0x0010
+
+/* convenience version of most used call flags */
+#define TCG_CALL_NO_RWG         TCG_CALL_NO_READ_GLOBALS
+#define TCG_CALL_NO_WG          TCG_CALL_NO_WRITE_GLOBALS
+#define TCG_CALL_NO_SE          TCG_CALL_NO_SIDE_EFFECTS
+#define TCG_CALL_NO_RWG_SE      (TCG_CALL_NO_RWG | TCG_CALL_NO_SE)
+#define TCG_CALL_NO_WG_SE       (TCG_CALL_NO_WG | TCG_CALL_NO_SE)
+
+/*
+ * Flags for the bswap opcodes.
+ * If IZ, the input is zero-extended, otherwise unknown.
+ * If OZ or OS, the output is zero- or sign-extended respectively,
+ * otherwise the high bits are undefined.
+ */
+enum {
+    TCG_BSWAP_IZ = 1,
+    TCG_BSWAP_OZ = 2,
+    TCG_BSWAP_OS = 4,
+};
+
+typedef enum TCGTempVal {
+    TEMP_VAL_DEAD,
+    TEMP_VAL_REG,
+    TEMP_VAL_MEM,
+    TEMP_VAL_CONST,
+} TCGTempVal;
+
+typedef enum TCGTempKind {
+    /*
+     * Temp is dead at the end of the extended basic block (EBB),
+     * the single-entry multiple-exit region that falls through
+     * conditional branches.
+     */
+    TEMP_EBB,
+    /* Temp is live across the entire translation block, but dead at end. */
+    TEMP_TB,
+    /* Temp is live across the entire translation block, and between them. */
+    TEMP_GLOBAL,
+    /* Temp is in a fixed register. */
+    TEMP_FIXED,
+    /* Temp is a fixed constant. */
+    TEMP_CONST,
+} TCGTempKind;
+
+typedef struct TCGTemp {
+    TCGReg reg:8;
+    TCGTempVal val_type:8;
+    TCGType base_type:8;
+    TCGType type:8;
+    TCGTempKind kind:3;
+    unsigned int indirect_reg:1;
+    unsigned int indirect_base:1;
+    unsigned int mem_coherent:1;
+    unsigned int mem_allocated:1;
+    unsigned int temp_allocated:1;
+    unsigned int temp_subindex:2;
+
+    int64_t val;
+    struct TCGTemp *mem_base;
+    intptr_t mem_offset;
+    const char *name;
+
+    /* Pass-specific information that can be stored for a temporary.
+       One word worth of integer data, and one pointer to data
+       allocated separately.  */
+    uintptr_t state;
+    void *state_ptr;
+} TCGTemp;
+
+typedef struct TCGContext TCGContext;
+
+typedef struct TCGTempSet {
+    unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
+} TCGTempSet;
+
+/*
+ * With 1 128-bit output, a 32-bit host requires 4 output parameters,
+ * which leaves a maximum of 28 other slots.  Which is enough for 7
+ * 128-bit operands.
+ */
+#define DEAD_ARG  (1 << 4)
+#define SYNC_ARG  (1 << 0)
+typedef uint32_t TCGLifeData;
+
+struct TCGOp {
+    TCGOpcode opc   : 8;
+    unsigned nargs  : 8;
+
+    /* Parameters for this opcode.  See below.  */
+    unsigned param1 : 8;
+    unsigned param2 : 8;
+
+    /* Lifetime data of the operands.  */
+    TCGLifeData life;
+
+    /* Next and previous opcodes.  */
+    QTAILQ_ENTRY(TCGOp) link;
+
+    /* Register preferences for the output(s).  */
+    TCGRegSet output_pref[2];
+
+    /* Arguments for the opcode.  */
+    TCGArg args[];
+};
+
+#define TCGOP_CALLI(X)    (X)->param1
+#define TCGOP_CALLO(X)    (X)->param2
+
+#define TCGOP_VECL(X)     (X)->param1
+#define TCGOP_VECE(X)     (X)->param2
+
+/* Make sure operands fit in the bitfields above.  */
+QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
+
+static inline TCGRegSet output_pref(const TCGOp *op, unsigned i)
+{
+    return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0;
+}
+
+struct TCGContext {
+    uint8_t *pool_cur, *pool_end;
+    TCGPool *pool_first, *pool_current, *pool_first_large;
+    int nb_labels;
+    int nb_globals;
+    int nb_temps;
+    int nb_indirects;
+    int nb_ops;
+    TCGType addr_type;            /* TCG_TYPE_I32 or TCG_TYPE_I64 */
+
+    int page_mask;
+    uint8_t page_bits;
+    uint8_t tlb_dyn_max_bits;
+    uint8_t insn_start_words;
+    TCGBar guest_mo;
+
+    TCGRegSet reserved_regs;
+    intptr_t current_frame_offset;
+    intptr_t frame_start;
+    intptr_t frame_end;
+    TCGTemp *frame_temp;
+
+    TranslationBlock *gen_tb;     /* tb for which code is being generated */
+    tcg_insn_unit *code_buf;      /* pointer for start of tb */
+    tcg_insn_unit *code_ptr;      /* pointer for running end of tb */
+
+#ifdef CONFIG_DEBUG_TCG
+    int goto_tb_issue_mask;
+    const TCGOpcode *vecop_list;
+#endif
+
+    /* Code generation.  Note that we specifically do not use tcg_insn_unit
+       here, because there's too much arithmetic throughout that relies
+       on addition and subtraction working on bytes.  Rely on the GCC
+       extension that allows arithmetic on void*.  */
+    void *code_gen_buffer;
+    size_t code_gen_buffer_size;
+    void *code_gen_ptr;
+    void *data_gen_ptr;
+
+    /* Threshold to flush the translated code buffer.  */
+    void *code_gen_highwater;
+
+    /* Track which vCPU triggers events */
+    CPUState *cpu;                      /* *_trans */
+
+    /* These structures are private to tcg-target.c.inc.  */
+#ifdef TCG_TARGET_NEED_LDST_LABELS
+    QSIMPLEQ_HEAD(, TCGLabelQemuLdst) ldst_labels;
+#endif
+#ifdef TCG_TARGET_NEED_POOL_LABELS
+    struct TCGLabelPoolData *pool_labels;
+#endif
+
+    TCGLabel *exitreq_label;
+
+#ifdef CONFIG_PLUGIN
+    /*
+     * We keep one plugin_tb struct per TCGContext. Note that on every TB
+     * translation we clear but do not free its contents; this way we
+     * avoid a lot of malloc/free churn, since after a few TB's it's
+     * unlikely that we'll need to allocate either more instructions or more
+     * space for instructions (for variable-instruction-length ISAs).
+     */
+    struct qemu_plugin_tb *plugin_tb;
+
+    /* descriptor of the instruction being translated */
+    struct qemu_plugin_insn *plugin_insn;
+#endif
+
+    GHashTable *const_table[TCG_TYPE_COUNT];
+    TCGTempSet free_temps[TCG_TYPE_COUNT];
+    TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
+
+    QTAILQ_HEAD(, TCGOp) ops, free_ops;
+    QSIMPLEQ_HEAD(, TCGLabel) labels;
+
+    /* Tells which temporary holds a given register.
+       It does not take into account fixed registers */
+    TCGTemp *reg_to_temp[TCG_TARGET_NB_REGS];
+
+    uint16_t gen_insn_end_off[TCG_MAX_INSNS];
+    uint64_t *gen_insn_data;
+
+    /* Exit to translator on overflow. */
+    sigjmp_buf jmp_trans;
+};
+
+static inline bool temp_readonly(TCGTemp *ts)
+{
+    return ts->kind >= TEMP_FIXED;
+}
+
+#ifdef CONFIG_USER_ONLY
+extern bool tcg_use_softmmu;
+#else
+#define tcg_use_softmmu  true
+#endif
+
+extern __thread TCGContext *tcg_ctx;
+extern const void *tcg_code_gen_epilogue;
+extern uintptr_t tcg_splitwx_diff;
+extern TCGv_env tcg_env;
+
+bool in_code_gen_buffer(const void *p);
+
+#ifdef CONFIG_DEBUG_TCG
+const void *tcg_splitwx_to_rx(void *rw);
+void *tcg_splitwx_to_rw(const void *rx);
+#else
+static inline const void *tcg_splitwx_to_rx(void *rw)
+{
+    return rw ? rw + tcg_splitwx_diff : NULL;
+}
+
+static inline void *tcg_splitwx_to_rw(const void *rx)
+{
+    return rx ? (void *)rx - tcg_splitwx_diff : NULL;
+}
+#endif
+
+static inline TCGArg temp_arg(TCGTemp *ts)
+{
+    return (uintptr_t)ts;
+}
+
+static inline TCGTemp *arg_temp(TCGArg a)
+{
+    return (TCGTemp *)(uintptr_t)a;
+}
+
+#ifdef CONFIG_DEBUG_TCG
+size_t temp_idx(TCGTemp *ts);
+TCGTemp *tcgv_i32_temp(TCGv_i32 v);
+#else
+static inline size_t temp_idx(TCGTemp *ts)
+{
+    return ts - tcg_ctx->temps;
+}
+
+/*
+ * Using the offset of a temporary, relative to TCGContext, rather than
+ * its index means that we don't use 0.  That leaves offset 0 free for
+ * a NULL representation without having to leave index 0 unused.
+ */
+static inline TCGTemp *tcgv_i32_temp(TCGv_i32 v)
+{
+    return (void *)tcg_ctx + (uintptr_t)v;
+}
+#endif
+
+static inline TCGTemp *tcgv_i64_temp(TCGv_i64 v)
+{
+    return tcgv_i32_temp((TCGv_i32)v);
+}
+
+static inline TCGTemp *tcgv_i128_temp(TCGv_i128 v)
+{
+    return tcgv_i32_temp((TCGv_i32)v);
+}
+
+static inline TCGTemp *tcgv_ptr_temp(TCGv_ptr v)
+{
+    return tcgv_i32_temp((TCGv_i32)v);
+}
+
+static inline TCGTemp *tcgv_vec_temp(TCGv_vec v)
+{
+    return tcgv_i32_temp((TCGv_i32)v);
+}
+
+static inline TCGArg tcgv_i32_arg(TCGv_i32 v)
+{
+    return temp_arg(tcgv_i32_temp(v));
+}
+
+static inline TCGArg tcgv_i64_arg(TCGv_i64 v)
+{
+    return temp_arg(tcgv_i64_temp(v));
+}
+
+static inline TCGArg tcgv_i128_arg(TCGv_i128 v)
+{
+    return temp_arg(tcgv_i128_temp(v));
+}
+
+static inline TCGArg tcgv_ptr_arg(TCGv_ptr v)
+{
+    return temp_arg(tcgv_ptr_temp(v));
+}
+
+static inline TCGArg tcgv_vec_arg(TCGv_vec v)
+{
+    return temp_arg(tcgv_vec_temp(v));
+}
+
+static inline TCGv_i32 temp_tcgv_i32(TCGTemp *t)
+{
+    (void)temp_idx(t); /* trigger embedded assert */
+    return (TCGv_i32)((void *)t - (void *)tcg_ctx);
+}
+
+static inline TCGv_i64 temp_tcgv_i64(TCGTemp *t)
+{
+    return (TCGv_i64)temp_tcgv_i32(t);
+}
+
+static inline TCGv_i128 temp_tcgv_i128(TCGTemp *t)
+{
+    return (TCGv_i128)temp_tcgv_i32(t);
+}
+
+static inline TCGv_ptr temp_tcgv_ptr(TCGTemp *t)
+{
+    return (TCGv_ptr)temp_tcgv_i32(t);
+}
+
+static inline TCGv_vec temp_tcgv_vec(TCGTemp *t)
+{
+    return (TCGv_vec)temp_tcgv_i32(t);
+}
+
+static inline TCGArg tcg_get_insn_param(TCGOp *op, int arg)
+{
+    return op->args[arg];
+}
+
+static inline void tcg_set_insn_param(TCGOp *op, int arg, TCGArg v)
+{
+    op->args[arg] = v;
+}
+
+static inline uint64_t tcg_get_insn_start_param(TCGOp *op, int arg)
+{
+    if (TCG_TARGET_REG_BITS == 64) {
+        return tcg_get_insn_param(op, arg);
+    } else {
+        return deposit64(tcg_get_insn_param(op, arg * 2), 32, 32,
+                         tcg_get_insn_param(op, arg * 2 + 1));
+    }
+}
+
+static inline void tcg_set_insn_start_param(TCGOp *op, int arg, uint64_t v)
+{
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_set_insn_param(op, arg, v);
+    } else {
+        tcg_set_insn_param(op, arg * 2, v);
+        tcg_set_insn_param(op, arg * 2 + 1, v >> 32);
+    }
+}
+
+/* The last op that was emitted.  */
+static inline TCGOp *tcg_last_op(void)
+{
+    return QTAILQ_LAST(&tcg_ctx->ops);
+}
+
+/* Test for whether to terminate the TB for using too many opcodes.  */
+static inline bool tcg_op_buf_full(void)
+{
+    /* This is not a hard limit, it merely stops translation when
+     * we have produced "enough" opcodes.  We want to limit TB size
+     * such that a RISC host can reasonably use a 16-bit signed
+     * branch within the TB.  We also need to be mindful of the
+     * 16-bit unsigned offsets, TranslationBlock.jmp_reset_offset[]
+     * and TCGContext.gen_insn_end_off[].
+     */
+    return tcg_ctx->nb_ops >= 4000;
+}
+
+/* pool based memory allocation */
+
+/* user-mode: mmap_lock must be held for tcg_malloc_internal. */
+void *tcg_malloc_internal(TCGContext *s, int size);
+void tcg_pool_reset(TCGContext *s);
+TranslationBlock *tcg_tb_alloc(TCGContext *s);
+
+void tcg_region_reset_all(void);
+
+size_t tcg_code_size(void);
+size_t tcg_code_capacity(void);
+
+void tcg_tb_insert(TranslationBlock *tb);
+void tcg_tb_remove(TranslationBlock *tb);
+TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr);
+void tcg_tb_foreach(GTraverseFunc func, gpointer user_data);
+size_t tcg_nb_tbs(void);
+
+/* user-mode: Called with mmap_lock held.  */
+static inline void *tcg_malloc(int size)
+{
+    TCGContext *s = tcg_ctx;
+    uint8_t *ptr, *ptr_end;
+
+    /* ??? This is a weak placeholder for minimum malloc alignment.  */
+    size = QEMU_ALIGN_UP(size, 8);
+
+    ptr = s->pool_cur;
+    ptr_end = ptr + size;
+    if (unlikely(ptr_end > s->pool_end)) {
+        return tcg_malloc_internal(tcg_ctx, size);
+    } else {
+        s->pool_cur = ptr_end;
+        return ptr;
+    }
+}
+
+void tcg_func_start(TCGContext *s);
+
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start);
+
+void tb_target_set_jmp_target(const TranslationBlock *, int,
+                              uintptr_t, uintptr_t);
+
+void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size);
+
+#define TCG_CT_CONST  1 /* any constant of register size */
+
+typedef struct TCGArgConstraint {
+    unsigned ct : 16;
+    unsigned alias_index : 4;
+    unsigned sort_index : 4;
+    unsigned pair_index : 4;
+    unsigned pair : 2;  /* 0: none, 1: first, 2: second, 3: second alias */
+    bool oalias : 1;
+    bool ialias : 1;
+    bool newreg : 1;
+    TCGRegSet regs;
+} TCGArgConstraint;
+
+#define TCG_MAX_OP_ARGS 16
+
+/* Bits for TCGOpDef->flags, 8 bits available, all used.  */
+enum {
+    /* Instruction exits the translation block.  */
+    TCG_OPF_BB_EXIT      = 0x01,
+    /* Instruction defines the end of a basic block.  */
+    TCG_OPF_BB_END       = 0x02,
+    /* Instruction clobbers call registers and potentially update globals.  */
+    TCG_OPF_CALL_CLOBBER = 0x04,
+    /* Instruction has side effects: it cannot be removed if its outputs
+       are not used, and might trigger exceptions.  */
+    TCG_OPF_SIDE_EFFECTS = 0x08,
+    /* Instruction operands are 64-bits (otherwise 32-bits).  */
+    TCG_OPF_64BIT        = 0x10,
+    /* Instruction is optional and not implemented by the host, or insn
+       is generic and should not be implemented by the host.  */
+    TCG_OPF_NOT_PRESENT  = 0x20,
+    /* Instruction operands are vectors.  */
+    TCG_OPF_VECTOR       = 0x40,
+    /* Instruction is a conditional branch. */
+    TCG_OPF_COND_BRANCH  = 0x80
+};
+
+typedef struct TCGOpDef {
+    const char *name;
+    uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
+    uint8_t flags;
+    TCGArgConstraint *args_ct;
+} TCGOpDef;
+
+extern TCGOpDef tcg_op_defs[];
+extern const size_t tcg_op_defs_max;
+
+typedef struct TCGTargetOpDef {
+    TCGOpcode op;
+    const char *args_ct_str[TCG_MAX_OP_ARGS];
+} TCGTargetOpDef;
+
+bool tcg_op_supported(TCGOpcode op);
+
+void tcg_gen_call0(TCGHelperInfo *, TCGTemp *ret);
+void tcg_gen_call1(TCGHelperInfo *, TCGTemp *ret, TCGTemp *);
+void tcg_gen_call2(TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *);
+void tcg_gen_call3(TCGHelperInfo *, TCGTemp *ret, TCGTemp *,
+                   TCGTemp *, TCGTemp *);
+void tcg_gen_call4(TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *,
+                   TCGTemp *, TCGTemp *);
+void tcg_gen_call5(TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *,
+                   TCGTemp *, TCGTemp *, TCGTemp *);
+void tcg_gen_call6(TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *,
+                   TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *);
+void tcg_gen_call7(TCGHelperInfo *, TCGTemp *ret, TCGTemp *, TCGTemp *,
+                   TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *, TCGTemp *);
+
+TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs);
+void tcg_op_remove(TCGContext *s, TCGOp *op);
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op,
+                            TCGOpcode opc, unsigned nargs);
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op,
+                           TCGOpcode opc, unsigned nargs);
+
+/**
+ * tcg_remove_ops_after:
+ * @op: target operation
+ *
+ * Discard any opcodes emitted since @op.  Expected usage is to save
+ * a starting point with tcg_last_op(), speculatively emit opcodes,
+ * then decide whether or not to keep those opcodes after the fact.
+ */
+void tcg_remove_ops_after(TCGOp *op);
+
+void tcg_optimize(TCGContext *s);
+
+TCGLabel *gen_new_label(void);
+
+/**
+ * label_arg
+ * @l: label
+ *
+ * Encode a label for storage in the TCG opcode stream.
+ */
+
+static inline TCGArg label_arg(TCGLabel *l)
+{
+    return (uintptr_t)l;
+}
+
+/**
+ * arg_label
+ * @i: value
+ *
+ * The opposite of label_arg.  Retrieve a label from the
+ * encoding of the TCG opcode stream.
+ */
+
+static inline TCGLabel *arg_label(TCGArg i)
+{
+    return (TCGLabel *)(uintptr_t)i;
+}
+
+/**
+ * tcg_ptr_byte_diff
+ * @a, @b: addresses to be differenced
+ *
+ * There are many places within the TCG backends where we need a byte
+ * difference between two pointers.  While this can be accomplished
+ * with local casting, it's easy to get wrong -- especially if one is
+ * concerned with the signedness of the result.
+ *
+ * This version relies on GCC's void pointer arithmetic to get the
+ * correct result.
+ */
+
+static inline ptrdiff_t tcg_ptr_byte_diff(const void *a, const void *b)
+{
+    return a - b;
+}
+
+/**
+ * tcg_pcrel_diff
+ * @s: the tcg context
+ * @target: address of the target
+ *
+ * Produce a pc-relative difference, from the current code_ptr
+ * to the destination address.
+ */
+
+static inline ptrdiff_t tcg_pcrel_diff(TCGContext *s, const void *target)
+{
+    return tcg_ptr_byte_diff(target, tcg_splitwx_to_rx(s->code_ptr));
+}
+
+/**
+ * tcg_tbrel_diff
+ * @s: the tcg context
+ * @target: address of the target
+ *
+ * Produce a difference, from the beginning of the current TB code
+ * to the destination address.
+ */
+static inline ptrdiff_t tcg_tbrel_diff(TCGContext *s, const void *target)
+{
+    return tcg_ptr_byte_diff(target, tcg_splitwx_to_rx(s->code_buf));
+}
+
+/**
+ * tcg_current_code_size
+ * @s: the tcg context
+ *
+ * Compute the current code size within the translation block.
+ * This is used to fill in qemu's data structures for goto_tb.
+ */
+
+static inline size_t tcg_current_code_size(TCGContext *s)
+{
+    return tcg_ptr_byte_diff(s->code_ptr, s->code_buf);
+}
+
+/**
+ * tcg_qemu_tb_exec:
+ * @env: pointer to CPUArchState for the CPU
+ * @tb_ptr: address of generated code for the TB to execute
+ *
+ * Start executing code from a given translation block.
+ * Where translation blocks have been linked, execution
+ * may proceed from the given TB into successive ones.
+ * Control eventually returns only when some action is needed
+ * from the top-level loop: either control must pass to a TB
+ * which has not yet been directly linked, or an asynchronous
+ * event such as an interrupt needs handling.
+ *
+ * Return: The return value is the value passed to the corresponding
+ * tcg_gen_exit_tb() at translation time of the last TB attempted to execute.
+ * The value is either zero or a 4-byte aligned pointer to that TB combined
+ * with additional information in its two least significant bits. The
+ * additional information is encoded as follows:
+ *  0, 1: the link between this TB and the next is via the specified
+ *        TB index (0 or 1). That is, we left the TB via (the equivalent
+ *        of) "goto_tb <index>". The main loop uses this to determine
+ *        how to link the TB just executed to the next.
+ *  2:    we are using instruction counting code generation, and we
+ *        did not start executing this TB because the instruction counter
+ *        would hit zero midway through it. In this case the pointer
+ *        returned is the TB we were about to execute, and the caller must
+ *        arrange to execute the remaining count of instructions.
+ *  3:    we stopped because the CPU's exit_request flag was set
+ *        (usually meaning that there is an interrupt that needs to be
+ *        handled). The pointer returned is the TB we were about to execute
+ *        when we noticed the pending exit request.
+ *
+ * If the bottom two bits indicate an exit-via-index then the CPU
+ * state is correctly synchronised and ready for execution of the next
+ * TB (and in particular the guest PC is the address to execute next).
+ * Otherwise, we gave up on execution of this TB before it started, and
+ * the caller must fix up the CPU state by calling the CPU's
+ * synchronize_from_tb() method with the TB pointer we return (falling
+ * back to calling the CPU's set_pc method with tb->pb if no
+ * synchronize_from_tb() method exists).
+ *
+ * Note that TCG targets may use a different definition of tcg_qemu_tb_exec
+ * to this default (which just calls the prologue.code emitted by
+ * tcg_target_qemu_prologue()).
+ */
+#define TB_EXIT_MASK      3
+#define TB_EXIT_IDX0      0
+#define TB_EXIT_IDX1      1
+#define TB_EXIT_IDXMAX    1
+#define TB_EXIT_REQUESTED 3
+
+#ifdef CONFIG_TCG_INTERPRETER
+uintptr_t tcg_qemu_tb_exec(CPUArchState *env, const void *tb_ptr);
+#else
+typedef uintptr_t tcg_prologue_fn(CPUArchState *env, const void *tb_ptr);
+extern tcg_prologue_fn *tcg_qemu_tb_exec;
+#endif
+
+void tcg_register_jit(const void *buf, size_t buf_size);
+
+#if TCG_TARGET_MAYBE_vec
+/* Return zero if the tuple (opc, type, vece) is unsupportable;
+   return > 0 if it is directly supportable;
+   return < 0 if we must call tcg_expand_vec_op.  */
+int tcg_can_emit_vec_op(TCGOpcode, TCGType, unsigned);
+#else
+static inline int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
+{
+    return 0;
+}
+#endif
+
+/* Expand the tuple (opc, type, vece) on the given arguments.  */
+void tcg_expand_vec_op(TCGOpcode, TCGType, unsigned, TCGArg, ...);
+
+/* Replicate a constant C according to the log2 of the element size.  */
+uint64_t dup_const(unsigned vece, uint64_t c);
+
+#define dup_const(VECE, C)                                         \
+    (__builtin_constant_p(VECE)                                    \
+     ? (  (VECE) == MO_8  ? 0x0101010101010101ull * (uint8_t)(C)   \
+        : (VECE) == MO_16 ? 0x0001000100010001ull * (uint16_t)(C)  \
+        : (VECE) == MO_32 ? 0x0000000100000001ull * (uint32_t)(C)  \
+        : (VECE) == MO_64 ? (uint64_t)(C)                          \
+        : (qemu_build_not_reached_always(), 0))                    \
+     : dup_const(VECE, C))
+
+static inline const TCGOpcode *tcg_swap_vecop_list(const TCGOpcode *n)
+{
+#ifdef CONFIG_DEBUG_TCG
+    const TCGOpcode *o = tcg_ctx->vecop_list;
+    tcg_ctx->vecop_list = n;
+    return o;
+#else
+    return NULL;
+#endif
+}
+
+bool tcg_can_emit_vecop_list(const TCGOpcode *, TCGType, unsigned);
+
+#endif /* TCG_H */
diff --git a/include/trace-tcg.h b/include/trace-tcg.h
deleted file mode 100644 (file)
index da68608..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef TRACE_TCG_H
-#define TRACE_TCG_H
-
-#include "trace/generated-tcg-tracers.h"
-
-#endif /* TRACE_TCG_H */
diff --git a/include/ui/clipboard.h b/include/ui/clipboard.h
new file mode 100644 (file)
index 0000000..ab6acdb
--- /dev/null
@@ -0,0 +1,277 @@
+#ifndef QEMU_CLIPBOARD_H
+#define QEMU_CLIPBOARD_H
+
+#include "qemu/notify.h"
+
+/**
+ * DOC: Introduction
+ *
+ * The header ``ui/clipboard.h`` declares the qemu clipboard interface.
+ *
+ * All qemu elements which want use the clipboard can register as
+ * clipboard peer.  Subsequently they can set the clipboard content
+ * and get notifications for clipboard updates.
+ *
+ * Typical users are user interfaces (gtk), remote access protocols
+ * (vnc) and devices talking to the guest (vdagent).
+ *
+ * Even though the design allows different data types only plain text
+ * is supported for now.
+ */
+
+typedef enum QemuClipboardType QemuClipboardType;
+typedef enum QemuClipboardNotifyType QemuClipboardNotifyType;
+typedef enum QemuClipboardSelection QemuClipboardSelection;
+typedef struct QemuClipboardPeer QemuClipboardPeer;
+typedef struct QemuClipboardNotify QemuClipboardNotify;
+typedef struct QemuClipboardInfo QemuClipboardInfo;
+
+/**
+ * enum QemuClipboardType
+ *
+ * @QEMU_CLIPBOARD_TYPE_TEXT: text/plain; charset=utf-8
+ * @QEMU_CLIPBOARD_TYPE__COUNT: type count.
+ */
+enum QemuClipboardType {
+    QEMU_CLIPBOARD_TYPE_TEXT,
+    QEMU_CLIPBOARD_TYPE__COUNT,
+};
+
+/* same as VD_AGENT_CLIPBOARD_SELECTION_* */
+/**
+ * enum QemuClipboardSelection
+ *
+ * @QEMU_CLIPBOARD_SELECTION_CLIPBOARD: clipboard (explitcit cut+paste).
+ * @QEMU_CLIPBOARD_SELECTION_PRIMARY: primary selection (select + middle mouse button).
+ * @QEMU_CLIPBOARD_SELECTION_SECONDARY: secondary selection (dunno).
+ * @QEMU_CLIPBOARD_SELECTION__COUNT: selection count.
+ */
+enum QemuClipboardSelection {
+    QEMU_CLIPBOARD_SELECTION_CLIPBOARD,
+    QEMU_CLIPBOARD_SELECTION_PRIMARY,
+    QEMU_CLIPBOARD_SELECTION_SECONDARY,
+    QEMU_CLIPBOARD_SELECTION__COUNT,
+};
+
+/**
+ * struct QemuClipboardPeer
+ *
+ * @name: peer name.
+ * @notifier: notifier for clipboard updates.
+ * @request: callback for clipboard data requests.
+ *
+ * Clipboard peer description.
+ */
+struct QemuClipboardPeer {
+    const char *name;
+    Notifier notifier;
+    void (*request)(QemuClipboardInfo *info,
+                    QemuClipboardType type);
+};
+
+/**
+ * enum QemuClipboardNotifyType
+ *
+ * @QEMU_CLIPBOARD_UPDATE_INFO: clipboard info update
+ * @QEMU_CLIPBOARD_RESET_SERIAL: reset clipboard serial
+ *
+ * Clipboard notify type.
+ */
+enum QemuClipboardNotifyType {
+    QEMU_CLIPBOARD_UPDATE_INFO,
+    QEMU_CLIPBOARD_RESET_SERIAL,
+};
+
+/**
+ * struct QemuClipboardNotify
+ *
+ * @type: the type of event.
+ * @info: a QemuClipboardInfo event.
+ *
+ * Clipboard notify data.
+ */
+struct QemuClipboardNotify {
+    QemuClipboardNotifyType type;
+    union {
+        QemuClipboardInfo *info;
+    };
+};
+
+/**
+ * struct QemuClipboardInfo
+ *
+ * @refcount: reference counter.
+ * @owner: clipboard owner.
+ * @selection: clipboard selection.
+ * @types: clipboard data array (one entry per type).
+ * @has_serial: whether @serial is available.
+ * @serial: the grab serial counter.
+ *
+ * Clipboard content data and metadata.
+ */
+struct QemuClipboardInfo {
+    uint32_t refcount;
+    QemuClipboardPeer *owner;
+    QemuClipboardSelection selection;
+    bool has_serial;
+    uint32_t serial;
+    struct {
+        bool available;
+        bool requested;
+        size_t size;
+        void *data;
+    } types[QEMU_CLIPBOARD_TYPE__COUNT];
+};
+
+/**
+ * qemu_clipboard_peer_register
+ *
+ * @peer: peer information.
+ *
+ * Register clipboard peer.  Registering is needed for both active
+ * (set+grab clipboard) and passive (watch clipboard for updates)
+ * interaction with the qemu clipboard.
+ */
+void qemu_clipboard_peer_register(QemuClipboardPeer *peer);
+
+/**
+ * qemu_clipboard_peer_unregister
+ *
+ * @peer: peer information.
+ *
+ * Unregister clipboard peer.
+ */
+void qemu_clipboard_peer_unregister(QemuClipboardPeer *peer);
+
+/**
+ * qemu_clipboard_peer_owns
+ *
+ * @peer: peer information.
+ * @selection: clipboard selection.
+ *
+ * Return TRUE if the peer owns the clipboard.
+ */
+bool qemu_clipboard_peer_owns(QemuClipboardPeer *peer,
+                              QemuClipboardSelection selection);
+
+/**
+ * qemu_clipboard_peer_release
+ *
+ * @peer: peer information.
+ * @selection: clipboard selection.
+ *
+ * If the peer owns the clipboard, release it.
+ */
+void qemu_clipboard_peer_release(QemuClipboardPeer *peer,
+                                 QemuClipboardSelection selection);
+
+/**
+ * qemu_clipboard_info
+ *
+ * @selection: clipboard selection.
+ *
+ * Return the current clipboard data & owner information.
+ */
+QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection);
+
+/**
+ * qemu_clipboard_check_serial
+ *
+ * @info: clipboard info.
+ * @client: whether to check from the client context and priority.
+ *
+ * Return TRUE if the @info has a higher serial than the current clipboard.
+ */
+bool qemu_clipboard_check_serial(QemuClipboardInfo *info, bool client);
+
+/**
+ * qemu_clipboard_info_new
+ *
+ * @owner: clipboard owner.
+ * @selection: clipboard selection.
+ *
+ * Allocate a new QemuClipboardInfo and initialize it with the given
+ * @owner and @selection.
+ *
+ * QemuClipboardInfo is a reference-counted struct.  The new struct is
+ * returned with a reference already taken (i.e. reference count is
+ * one).
+ */
+QemuClipboardInfo *qemu_clipboard_info_new(QemuClipboardPeer *owner,
+                                           QemuClipboardSelection selection);
+/**
+ * qemu_clipboard_info_ref
+ *
+ * @info: clipboard info.
+ *
+ * Increase @info reference count.
+ */
+QemuClipboardInfo *qemu_clipboard_info_ref(QemuClipboardInfo *info);
+
+/**
+ * qemu_clipboard_info_unref
+ *
+ * @info: clipboard info.
+ *
+ * Decrease @info reference count.  When the count goes down to zero
+ * free the @info struct itself and all clipboard data.
+ */
+void qemu_clipboard_info_unref(QemuClipboardInfo *info);
+
+/**
+ * qemu_clipboard_update
+ *
+ * @info: clipboard info.
+ *
+ * Update the qemu clipboard.  Notify all registered peers (including
+ * the clipboard owner) that the qemu clipboard has been updated.
+ *
+ * This is used for both new completely clipboard content and for
+ * clipboard data updates in response to qemu_clipboard_request()
+ * calls.
+ */
+void qemu_clipboard_update(QemuClipboardInfo *info);
+
+/**
+ * qemu_clipboard_reset_serial
+ *
+ * Reset the clipboard serial.
+ */
+void qemu_clipboard_reset_serial(void);
+
+/**
+ * qemu_clipboard_request
+ *
+ * @info: clipboard info.
+ * @type: clipboard data type.
+ *
+ * Request clipboard content.  Typically the clipboard owner only
+ * advertises the available data types and provides the actual data
+ * only on request.
+ */
+void qemu_clipboard_request(QemuClipboardInfo *info,
+                            QemuClipboardType type);
+
+/**
+ * qemu_clipboard_set_data
+ *
+ * @peer: clipboard peer.
+ * @info: clipboard info.
+ * @type: clipboard data type.
+ * @size: data size.
+ * @data: data blob.
+ * @update: notify peers about the update.
+ *
+ * Set clipboard content for the given @type.  This function will make
+ * a copy of the content data and store that.
+ */
+void qemu_clipboard_set_data(QemuClipboardPeer *peer,
+                             QemuClipboardInfo *info,
+                             QemuClipboardType type,
+                             uint32_t size,
+                             const void *data,
+                             bool update);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QemuClipboardInfo, qemu_clipboard_info_unref)
+
+#endif /* QEMU_CLIPBOARD_H */
index e7303d8b98a895dcc0e842621d7d4e710f06f086..a4a49ffc640c8f83897cb9f564c8d5c96eb08c69 100644 (file)
@@ -4,13 +4,30 @@
 #include "ui/qemu-pixman.h"
 #include "qom/object.h"
 #include "qemu/notify.h"
-#include "qemu/error-report.h"
 #include "qapi/qapi-types-ui.h"
+#include "ui/input.h"
+#include "ui/surface.h"
 
-#ifdef CONFIG_OPENGL
-# include <epoxy/gl.h>
-# include "ui/shader.h"
-#endif
+#define TYPE_QEMU_CONSOLE "qemu-console"
+OBJECT_DECLARE_TYPE(QemuConsole, QemuConsoleClass, QEMU_CONSOLE)
+
+#define TYPE_QEMU_GRAPHIC_CONSOLE "qemu-graphic-console"
+OBJECT_DECLARE_SIMPLE_TYPE(QemuGraphicConsole, QEMU_GRAPHIC_CONSOLE)
+
+#define TYPE_QEMU_TEXT_CONSOLE "qemu-text-console"
+OBJECT_DECLARE_SIMPLE_TYPE(QemuTextConsole, QEMU_TEXT_CONSOLE)
+
+#define TYPE_QEMU_FIXED_TEXT_CONSOLE "qemu-fixed-text-console"
+OBJECT_DECLARE_SIMPLE_TYPE(QemuFixedTextConsole, QEMU_FIXED_TEXT_CONSOLE)
+
+#define QEMU_IS_GRAPHIC_CONSOLE(c) \
+    object_dynamic_cast(OBJECT(c), TYPE_QEMU_GRAPHIC_CONSOLE)
+
+#define QEMU_IS_TEXT_CONSOLE(c) \
+    object_dynamic_cast(OBJECT(c), TYPE_QEMU_TEXT_CONSOLE)
+
+#define QEMU_IS_FIXED_TEXT_CONSOLE(c) \
+    object_dynamic_cast(OBJECT(c), TYPE_QEMU_FIXED_TEXT_CONSOLE)
 
 /* keyboard/mouse support */
 
@@ -65,11 +82,12 @@ void qemu_remove_led_event_handler(QEMUPutLEDEntry *entry);
 
 void kbd_put_ledstate(int ledstate);
 
-void hmp_mouse_set(Monitor *mon, const QDict *qdict);
+bool qemu_mouse_set(int index, Error **errp);
 
 /* keysym is a unicode code except for special keys (see QEMU_KEY_xxx
    constants) */
 #define QEMU_KEY_ESC1(c) ((c) | 0xe100)
+#define QEMU_KEY_TAB        0x0009
 #define QEMU_KEY_BACKSPACE  0x007f
 #define QEMU_KEY_UP         QEMU_KEY_ESC1('A')
 #define QEMU_KEY_DOWN       QEMU_KEY_ESC1('B')
@@ -90,33 +108,41 @@ void hmp_mouse_set(Monitor *mon, const QDict *qdict);
 #define QEMU_KEY_CTRL_PAGEUP     0xe406
 #define QEMU_KEY_CTRL_PAGEDOWN   0xe407
 
-void kbd_put_keysym_console(QemuConsole *s, int keysym);
-bool kbd_put_qcode_console(QemuConsole *s, int qcode, bool ctrl);
-void kbd_put_string_console(QemuConsole *s, const char *str, int len);
-void kbd_put_keysym(int keysym);
-
+void qemu_text_console_put_keysym(QemuTextConsole *s, int keysym);
+bool qemu_text_console_put_qcode(QemuTextConsole *s, int qcode, bool ctrl);
+void qemu_text_console_put_string(QemuTextConsole *s, const char *str, int len);
+
+/* Touch devices */
+typedef struct touch_slot {
+    int x;
+    int y;
+    int tracking_id;
+} touch_slot;
+
+void console_handle_touch_event(QemuConsole *con,
+                                struct touch_slot touch_slots[INPUT_EVENT_SLOTS_MAX],
+                                uint64_t num_slot,
+                                int width, int height,
+                                double x, double y,
+                                InputMultiTouchType type,
+                                Error **errp);
 /* consoles */
 
-#define TYPE_QEMU_CONSOLE "qemu-console"
-OBJECT_DECLARE_TYPE(QemuConsole, QemuConsoleClass, QEMU_CONSOLE)
-
-
 struct QemuConsoleClass {
     ObjectClass parent_class;
 };
 
-#define QEMU_ALLOCATED_FLAG     0x01
-
-typedef struct DisplaySurface {
-    pixman_format_code_t format;
-    pixman_image_t *image;
-    uint8_t flags;
-#ifdef CONFIG_OPENGL
-    GLenum glformat;
-    GLenum gltype;
-    GLuint texture;
-#endif
-} DisplaySurface;
+typedef struct ScanoutTexture {
+    uint32_t backing_id;
+    bool backing_y_0_top;
+    uint32_t backing_width;
+    uint32_t backing_height;
+    uint32_t x;
+    uint32_t y;
+    uint32_t width;
+    uint32_t height;
+    void *d3d_tex2d;
+} ScanoutTexture;
 
 typedef struct QemuUIInfo {
     /* physical dimension */
@@ -127,19 +153,20 @@ typedef struct QemuUIInfo {
     int       yoff;
     uint32_t  width;
     uint32_t  height;
+    uint32_t  refresh_rate;
 } QemuUIInfo;
 
 /* cursor data format is 32bit RGBA */
 typedef struct QEMUCursor {
-    int                 width, height;
+    uint16_t            width, height;
     int                 hot_x, hot_y;
     int                 refcount;
     uint32_t            data[];
 } QEMUCursor;
 
-QEMUCursor *cursor_alloc(int width, int height);
-void cursor_get(QEMUCursor *c);
-void cursor_put(QEMUCursor *c);
+QEMUCursor *cursor_alloc(uint16_t width, uint16_t height);
+QEMUCursor *cursor_ref(QEMUCursor *c);
+void cursor_unref(QEMUCursor *c);
 QEMUCursor *cursor_builtin_hidden(void);
 QEMUCursor *cursor_builtin_left_ptr(void);
 void cursor_print_ascii_art(QEMUCursor *c, const char *prefix);
@@ -166,60 +193,96 @@ typedef struct QemuDmaBuf {
     uint32_t  fourcc;
     uint64_t  modifier;
     uint32_t  texture;
+    uint32_t  x;
+    uint32_t  y;
+    uint32_t  backing_width;
+    uint32_t  backing_height;
     bool      y0_top;
+    void      *sync;
+    int       fence_fd;
+    bool      allow_fences;
+    bool      draw_submitted;
 } QemuDmaBuf;
 
+enum display_scanout {
+    SCANOUT_NONE,
+    SCANOUT_SURFACE,
+    SCANOUT_TEXTURE,
+    SCANOUT_DMABUF,
+};
+
+typedef struct DisplayScanout {
+    enum display_scanout kind;
+    union {
+        /* DisplaySurface *surface; is kept in QemuConsole */
+        ScanoutTexture texture;
+        QemuDmaBuf *dmabuf;
+    };
+} DisplayScanout;
+
 typedef struct DisplayState DisplayState;
+typedef struct DisplayGLCtx DisplayGLCtx;
 
 typedef struct DisplayChangeListenerOps {
     const char *dpy_name;
 
+    /* optional */
     void (*dpy_refresh)(DisplayChangeListener *dcl);
 
+    /* optional */
     void (*dpy_gfx_update)(DisplayChangeListener *dcl,
                            int x, int y, int w, int h);
+    /* optional */
     void (*dpy_gfx_switch)(DisplayChangeListener *dcl,
                            struct DisplaySurface *new_surface);
+    /* optional */
     bool (*dpy_gfx_check_format)(DisplayChangeListener *dcl,
                                  pixman_format_code_t format);
 
+    /* optional */
     void (*dpy_text_cursor)(DisplayChangeListener *dcl,
                             int x, int y);
+    /* optional */
     void (*dpy_text_resize)(DisplayChangeListener *dcl,
                             int w, int h);
+    /* optional */
     void (*dpy_text_update)(DisplayChangeListener *dcl,
                             int x, int y, int w, int h);
 
+    /* optional */
     void (*dpy_mouse_set)(DisplayChangeListener *dcl,
                           int x, int y, int on);
+    /* optional */
     void (*dpy_cursor_define)(DisplayChangeListener *dcl,
                               QEMUCursor *cursor);
 
-    QEMUGLContext (*dpy_gl_ctx_create)(DisplayChangeListener *dcl,
-                                       QEMUGLParams *params);
-    void (*dpy_gl_ctx_destroy)(DisplayChangeListener *dcl,
-                               QEMUGLContext ctx);
-    int (*dpy_gl_ctx_make_current)(DisplayChangeListener *dcl,
-                                   QEMUGLContext ctx);
-    QEMUGLContext (*dpy_gl_ctx_get_current)(DisplayChangeListener *dcl);
-
+    /* required if GL */
     void (*dpy_gl_scanout_disable)(DisplayChangeListener *dcl);
+    /* required if GL */
     void (*dpy_gl_scanout_texture)(DisplayChangeListener *dcl,
                                    uint32_t backing_id,
                                    bool backing_y_0_top,
                                    uint32_t backing_width,
                                    uint32_t backing_height,
                                    uint32_t x, uint32_t y,
-                                   uint32_t w, uint32_t h);
+                                   uint32_t w, uint32_t h,
+                                   void *d3d_tex2d);
+    /* optional (default to true if has dpy_gl_scanout_dmabuf) */
+    bool (*dpy_has_dmabuf)(DisplayChangeListener *dcl);
+    /* optional */
     void (*dpy_gl_scanout_dmabuf)(DisplayChangeListener *dcl,
                                   QemuDmaBuf *dmabuf);
+    /* optional */
     void (*dpy_gl_cursor_dmabuf)(DisplayChangeListener *dcl,
                                  QemuDmaBuf *dmabuf, bool have_hot,
                                  uint32_t hot_x, uint32_t hot_y);
+    /* optional */
     void (*dpy_gl_cursor_position)(DisplayChangeListener *dcl,
                                    uint32_t pos_x, uint32_t pos_y);
+    /* optional */
     void (*dpy_gl_release_dmabuf)(DisplayChangeListener *dcl,
                                   QemuDmaBuf *dmabuf);
+    /* required if GL */
     void (*dpy_gl_update)(DisplayChangeListener *dcl,
                           uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 
@@ -234,41 +297,41 @@ struct DisplayChangeListener {
     QLIST_ENTRY(DisplayChangeListener) next;
 };
 
-DisplayState *init_displaystate(void);
-DisplaySurface *qemu_create_displaysurface_from(int width, int height,
-                                                pixman_format_code_t format,
-                                                int linesize, uint8_t *data);
-DisplaySurface *qemu_create_displaysurface_pixman(pixman_image_t *image);
-DisplaySurface *qemu_create_message_surface(int w, int h,
-                                            const char *msg);
-PixelFormat qemu_default_pixelformat(int bpp);
-
-DisplaySurface *qemu_create_displaysurface(int width, int height);
-void qemu_free_displaysurface(DisplaySurface *surface);
-
-static inline int is_surface_bgr(DisplaySurface *surface)
-{
-    if (PIXMAN_FORMAT_BPP(surface->format) == 32 &&
-        PIXMAN_FORMAT_TYPE(surface->format) == PIXMAN_TYPE_ABGR) {
-        return 1;
-    } else {
-        return 0;
-    }
-}
+typedef struct DisplayGLCtxOps {
+    bool (*dpy_gl_ctx_is_compatible_dcl)(DisplayGLCtx *dgc,
+                                         DisplayChangeListener *dcl);
+    QEMUGLContext (*dpy_gl_ctx_create)(DisplayGLCtx *dgc,
+                                       QEMUGLParams *params);
+    void (*dpy_gl_ctx_destroy)(DisplayGLCtx *dgc,
+                               QEMUGLContext ctx);
+    int (*dpy_gl_ctx_make_current)(DisplayGLCtx *dgc,
+                                   QEMUGLContext ctx);
+    void (*dpy_gl_ctx_create_texture)(DisplayGLCtx *dgc,
+                                      DisplaySurface *surface);
+    void (*dpy_gl_ctx_destroy_texture)(DisplayGLCtx *dgc,
+                                      DisplaySurface *surface);
+    void (*dpy_gl_ctx_update_texture)(DisplayGLCtx *dgc,
+                                      DisplaySurface *surface,
+                                      int x, int y, int w, int h);
+} DisplayGLCtxOps;
+
+struct DisplayGLCtx {
+    const DisplayGLCtxOps *ops;
+#ifdef CONFIG_OPENGL
+    QemuGLShader *gls; /* optional shared shader */
+#endif
+};
 
-static inline int is_buffer_shared(DisplaySurface *surface)
-{
-    return !(surface->flags & QEMU_ALLOCATED_FLAG);
-}
+DisplayState *init_displaystate(void);
 
 void register_displaychangelistener(DisplayChangeListener *dcl);
 void update_displaychangelistener(DisplayChangeListener *dcl,
                                   uint64_t interval);
 void unregister_displaychangelistener(DisplayChangeListener *dcl);
 
-bool dpy_ui_info_supported(QemuConsole *con);
+bool dpy_ui_info_supported(const QemuConsole *con);
 const QemuUIInfo *dpy_get_ui_info(const QemuConsole *con);
-int dpy_set_ui_info(QemuConsole *con, QemuUIInfo *info);
+int dpy_set_ui_info(QemuConsole *con, QemuUIInfo *info, bool delay);
 
 void dpy_gfx_update(QemuConsole *con, int x, int y, int w, int h);
 void dpy_gfx_update_full(QemuConsole *con);
@@ -287,7 +350,8 @@ void dpy_gl_scanout_disable(QemuConsole *con);
 void dpy_gl_scanout_texture(QemuConsole *con,
                             uint32_t backing_id, bool backing_y_0_top,
                             uint32_t backing_width, uint32_t backing_height,
-                            uint32_t x, uint32_t y, uint32_t w, uint32_t h);
+                            uint32_t x, uint32_t y, uint32_t w, uint32_t h,
+                            void *d3d_tex2d);
 void dpy_gl_scanout_dmabuf(QemuConsole *con,
                            QemuDmaBuf *dmabuf);
 void dpy_gl_cursor_dmabuf(QemuConsole *con, QemuDmaBuf *dmabuf,
@@ -303,47 +367,8 @@ QEMUGLContext dpy_gl_ctx_create(QemuConsole *con,
                                 QEMUGLParams *params);
 void dpy_gl_ctx_destroy(QemuConsole *con, QEMUGLContext ctx);
 int dpy_gl_ctx_make_current(QemuConsole *con, QEMUGLContext ctx);
-QEMUGLContext dpy_gl_ctx_get_current(QemuConsole *con);
 
 bool console_has_gl(QemuConsole *con);
-bool console_has_gl_dmabuf(QemuConsole *con);
-
-static inline int surface_stride(DisplaySurface *s)
-{
-    return pixman_image_get_stride(s->image);
-}
-
-static inline void *surface_data(DisplaySurface *s)
-{
-    return pixman_image_get_data(s->image);
-}
-
-static inline int surface_width(DisplaySurface *s)
-{
-    return pixman_image_get_width(s->image);
-}
-
-static inline int surface_height(DisplaySurface *s)
-{
-    return pixman_image_get_height(s->image);
-}
-
-static inline int surface_bits_per_pixel(DisplaySurface *s)
-{
-    int bits = PIXMAN_FORMAT_BPP(s->format);
-    return bits;
-}
-
-static inline int surface_bytes_per_pixel(DisplaySurface *s)
-{
-    int bits = PIXMAN_FORMAT_BPP(s->format);
-    return DIV_ROUND_UP(bits, 8);
-}
-
-static inline pixman_format_code_t surface_format(DisplaySurface *s)
-{
-    return s->format;
-}
 
 typedef uint32_t console_ch_t;
 
@@ -352,13 +377,21 @@ static inline void console_write_ch(console_ch_t *dest, uint32_t ch)
     *dest = ch;
 }
 
+enum {
+    GRAPHIC_FLAGS_NONE     = 0,
+    /* require a console/display with GL callbacks */
+    GRAPHIC_FLAGS_GL       = 1 << 0,
+    /* require a console/display with DMABUF import */
+    GRAPHIC_FLAGS_DMABUF   = 1 << 1,
+};
+
 typedef struct GraphicHwOps {
+    int (*get_flags)(void *opaque); /* optional, default 0 */
     void (*invalidate)(void *opaque);
     void (*gfx_update)(void *opaque);
     bool gfx_update_async; /* if true, calls graphic_hw_update_done() */
     void (*text_update)(void *opaque, console_ch_t *text);
-    void (*update_interval)(void *opaque, uint64_t interval);
-    int (*ui_info)(void *opaque, uint32_t head, QemuUIInfo *info);
+    void (*ui_info)(void *opaque, uint32_t head, QemuUIInfo *info);
     void (*gl_block)(void *opaque, bool block);
 } GraphicHwOps;
 
@@ -378,11 +411,13 @@ void graphic_hw_gl_block(QemuConsole *con, bool block);
 
 void qemu_console_early_init(void);
 
+void qemu_console_set_display_gl_ctx(QemuConsole *con, DisplayGLCtx *ctx);
+
 QemuConsole *qemu_console_lookup_by_index(unsigned int index);
 QemuConsole *qemu_console_lookup_by_device(DeviceState *dev, uint32_t head);
 QemuConsole *qemu_console_lookup_by_device_name(const char *device_id,
                                                 uint32_t head, Error **errp);
-QemuConsole *qemu_console_lookup_unused(void);
+QEMUCursor *qemu_console_get_cursor(QemuConsole *con);
 bool qemu_console_is_visible(QemuConsole *con);
 bool qemu_console_is_graphic(QemuConsole *con);
 bool qemu_console_is_fixedsize(QemuConsole *con);
@@ -390,7 +425,6 @@ bool qemu_console_is_gl_blocked(QemuConsole *con);
 char *qemu_console_get_label(QemuConsole *con);
 int qemu_console_get_index(QemuConsole *con);
 uint32_t qemu_console_get_head(QemuConsole *con);
-QemuUIInfo *qemu_console_get_ui_info(QemuConsole *con);
 int qemu_console_get_width(QemuConsole *con, int fallback);
 int qemu_console_get_height(QemuConsole *con, int fallback);
 /* Return the low-level window id for the console */
@@ -401,6 +435,8 @@ void qemu_console_set_window_id(QemuConsole *con, int window_id);
 void console_select(unsigned int index);
 void qemu_console_resize(QemuConsole *con, int width, int height);
 DisplaySurface *qemu_console_surface(QemuConsole *con);
+void coroutine_fn qemu_console_co_wait_update(QemuConsole *con);
+int qemu_invalidate_text_consoles(void);
 
 /* console-gl.c */
 #ifdef CONFIG_OPENGL
@@ -426,12 +462,14 @@ struct QemuDisplay {
     DisplayType type;
     void (*early_init)(DisplayOptions *opts);
     void (*init)(DisplayState *ds, DisplayOptions *opts);
+    const char *vc;
 };
 
 void qemu_display_register(QemuDisplay *ui);
 bool qemu_display_find_default(DisplayOptions *opts);
 void qemu_display_early_init(DisplayOptions *opts);
 void qemu_display_init(DisplayState *ds, DisplayOptions *opts);
+const char *qemu_display_get_vc(DisplayOptions *opts);
 void qemu_display_help(void);
 
 /* vnc.c */
@@ -440,10 +478,23 @@ void vnc_display_open(const char *id, Error **errp);
 void vnc_display_add_client(const char *id, int csock, bool skipauth);
 int vnc_display_password(const char *id, const char *password);
 int vnc_display_pw_expire(const char *id, time_t expires);
-QemuOpts *vnc_parse(const char *str, Error **errp);
+void vnc_parse(const char *str);
 int vnc_init_func(void *opaque, QemuOpts *opts, Error **errp);
+bool vnc_display_reload_certs(const char *id,  Error **errp);
+bool vnc_display_update(DisplayUpdateOptionsVNC *arg, Error **errp);
 
 /* input.c */
 int index_from_key(const char *key, size_t key_length);
 
+#ifdef CONFIG_LINUX
+/* udmabuf.c */
+int udmabuf_fd(void);
+#endif
+
+/* util.c */
+bool qemu_console_fill_device_address(QemuConsole *con,
+                                      char *device_address,
+                                      size_t size,
+                                      Error **errp);
+
 #endif
diff --git a/include/ui/dbus-display.h b/include/ui/dbus-display.h
new file mode 100644 (file)
index 0000000..7c9ec1a
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef DBUS_DISPLAY_H
+#define DBUS_DISPLAY_H
+
+#include "qapi/error.h"
+#include "ui/dbus-module.h"
+
+static inline bool qemu_using_dbus_display(Error **errp)
+{
+    if (!using_dbus_display) {
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
+                  "D-Bus display is not in use");
+        return false;
+    }
+    return true;
+}
+
+#endif /* DBUS_DISPLAY_H */
diff --git a/include/ui/dbus-module.h b/include/ui/dbus-module.h
new file mode 100644 (file)
index 0000000..5442ee0
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef DBUS_MODULE_H
+#define DBUS_MODULE_H
+
+struct QemuDBusDisplayOps {
+    bool (*add_client)(int csock, Error **errp);
+};
+
+extern int using_dbus_display;
+extern struct QemuDBusDisplayOps qemu_dbus_display;
+
+#endif /* DBUS_MODULE_H */
index f004ce11a7bc6843e48bcc84f872c18a19cceddf..c2761d747a4e9b68029f9531fdb93b8caecf66ed 100644 (file)
@@ -4,11 +4,10 @@
 #include "ui/console.h"
 #include "ui/egl-helpers.h"
 
-QEMUGLContext qemu_egl_create_context(DisplayChangeListener *dcl,
+QEMUGLContext qemu_egl_create_context(DisplayGLCtx *dgc,
                                       QEMUGLParams *params);
-void qemu_egl_destroy_context(DisplayChangeListener *dcl, QEMUGLContext ctx);
-int qemu_egl_make_context_current(DisplayChangeListener *dcl,
+void qemu_egl_destroy_context(DisplayGLCtx *dgc, QEMUGLContext ctx);
+int qemu_egl_make_context_current(DisplayGLCtx *dgc,
                                   QEMUGLContext ctx);
-QEMUGLContext qemu_egl_get_current_context(DisplayChangeListener *dcl);
 
 #endif /* EGL_CONTEXT_H */
index 94a4b3e6f3bd7cb7239476676f072a72b25150c2..4b8c0d2281ca2b9cfcb937e08962c4f132b5e859 100644 (file)
@@ -3,13 +3,16 @@
 
 #include <epoxy/gl.h>
 #include <epoxy/egl.h>
+#ifdef CONFIG_GBM
 #include <gbm.h>
+#endif
 #include "ui/console.h"
 #include "ui/shader.h"
 
 extern EGLDisplay *qemu_egl_display;
 extern EGLConfig qemu_egl_config;
 extern DisplayGLMode qemu_egl_mode;
+extern bool qemu_egl_angle_d3d;
 
 typedef struct egl_fb {
     int width;
@@ -17,8 +20,11 @@ typedef struct egl_fb {
     GLuint texture;
     GLuint framebuffer;
     bool delete_texture;
+    QemuDmaBuf *dmabuf;
 } egl_fb;
 
+#define EGL_FB_INIT { 0, }
+
 void egl_fb_destroy(egl_fb *fb);
 void egl_fb_setup_default(egl_fb *fb, int width, int height);
 void egl_fb_setup_for_tex(egl_fb *fb, int width, int height,
@@ -26,16 +32,18 @@ void egl_fb_setup_for_tex(egl_fb *fb, int width, int height,
 void egl_fb_setup_new_tex(egl_fb *fb, int width, int height);
 void egl_fb_blit(egl_fb *dst, egl_fb *src, bool flip);
 void egl_fb_read(DisplaySurface *dst, egl_fb *src);
+void egl_fb_read_rect(DisplaySurface *dst, egl_fb *src, int x, int y, int w, int h);
 
 void egl_texture_blit(QemuGLShader *gls, egl_fb *dst, egl_fb *src, bool flip);
 void egl_texture_blend(QemuGLShader *gls, egl_fb *dst, egl_fb *src, bool flip,
                        int x, int y, double scale_x, double scale_y);
 
-#ifdef CONFIG_OPENGL_DMABUF
+extern EGLContext qemu_egl_rn_ctx;
+
+#ifdef CONFIG_GBM
 
 extern int qemu_egl_rn_fd;
 extern struct gbm_device *qemu_egl_rn_gbm_dev;
-extern EGLContext qemu_egl_rn_ctx;
 
 int egl_rendernode_init(const char *rendernode, DisplayGLMode mode);
 int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc,
@@ -43,13 +51,29 @@ int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc,
 
 void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf);
 void egl_dmabuf_release_texture(QemuDmaBuf *dmabuf);
+void egl_dmabuf_create_sync(QemuDmaBuf *dmabuf);
+void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf);
 
 #endif
 
 EGLSurface qemu_egl_init_surface_x11(EGLContext ectx, EGLNativeWindowType win);
 
+#if defined(CONFIG_X11) || defined(CONFIG_GBM)
+
 int qemu_egl_init_dpy_x11(EGLNativeDisplayType dpy, DisplayGLMode mode);
 int qemu_egl_init_dpy_mesa(EGLNativeDisplayType dpy, DisplayGLMode mode);
+
+#endif
+
+#ifdef WIN32
+int qemu_egl_init_dpy_win32(EGLNativeDisplayType dpy, DisplayGLMode mode);
+#endif
+
 EGLContext qemu_egl_init_ctx(void);
+bool qemu_egl_has_dmabuf(void);
+
+bool egl_init(const char *rendernode, DisplayGLMode mode, Error **errp);
+
+const char *qemu_egl_get_error_string(void);
 
 #endif /* EGL_HELPERS_H */
index eaeb450f913e02b50c0a7c1e4e190da66ed1338e..aa3d637029931d1323088f28ba7c2bef9f9cbe3e 100644 (file)
 #include <gdk/gdkwayland.h>
 #endif
 
+#include "ui/clipboard.h"
+#include "ui/console.h"
 #include "ui/kbd-state.h"
 #if defined(CONFIG_OPENGL)
 #include "ui/egl-helpers.h"
 #include "ui/egl-context.h"
 #endif
+#ifdef CONFIG_VTE
+#include "qemu/fifo8.h"
+#endif
 
-#define MILLISEC_PER_SEC 1000000
+#define MAX_VCS 10
 
 typedef struct GtkDisplayState GtkDisplayState;
 
 typedef struct VirtualGfxConsole {
     GtkWidget *drawing_area;
+    DisplayGLCtx dgc;
     DisplayChangeListener dcl;
     QKbdState *kbd;
     DisplaySurface *ds;
@@ -50,6 +56,7 @@ typedef struct VirtualGfxConsole {
     int cursor_y;
     bool y0_top;
     bool scanout_mode;
+    bool has_dmabuf;
 #endif
 } VirtualGfxConsole;
 
@@ -59,6 +66,7 @@ typedef struct VirtualVteConsole {
     GtkWidget *scrollbar;
     GtkWidget *terminal;
     Chardev *chr;
+    Fifo8 out_fifo;
     bool echo;
 } VirtualVteConsole;
 #endif
@@ -84,10 +92,71 @@ typedef struct VirtualConsole {
     };
 } VirtualConsole;
 
+struct GtkDisplayState {
+    GtkWidget *window;
+
+    GtkWidget *menu_bar;
+
+    GtkAccelGroup *accel_group;
+
+    GtkWidget *machine_menu_item;
+    GtkWidget *machine_menu;
+    GtkWidget *pause_item;
+    GtkWidget *reset_item;
+    GtkWidget *powerdown_item;
+    GtkWidget *quit_item;
+
+    GtkWidget *view_menu_item;
+    GtkWidget *view_menu;
+    GtkWidget *full_screen_item;
+    GtkWidget *copy_item;
+    GtkWidget *zoom_in_item;
+    GtkWidget *zoom_out_item;
+    GtkWidget *zoom_fixed_item;
+    GtkWidget *zoom_fit_item;
+    GtkWidget *grab_item;
+    GtkWidget *grab_on_hover_item;
+
+    int nb_vcs;
+    VirtualConsole vc[MAX_VCS];
+
+    GtkWidget *show_tabs_item;
+    GtkWidget *untabify_item;
+    GtkWidget *show_menubar_item;
+
+    GtkWidget *vbox;
+    GtkWidget *notebook;
+    int button_mask;
+    gboolean last_set;
+    int last_x;
+    int last_y;
+    int grab_x_root;
+    int grab_y_root;
+    VirtualConsole *kbd_owner;
+    VirtualConsole *ptr_owner;
+
+    gboolean full_screen;
+
+    GdkCursor *null_cursor;
+    Notifier mouse_mode_notifier;
+    gboolean free_scale;
+
+    bool external_pause_update;
+
+    QemuClipboardPeer cbpeer;
+    uint32_t cbpending[QEMU_CLIPBOARD_SELECTION__COUNT];
+    GtkClipboard *gtkcb[QEMU_CLIPBOARD_SELECTION__COUNT];
+    bool cbowner[QEMU_CLIPBOARD_SELECTION__COUNT];
+
+    DisplayOptions *opts;
+};
+
 extern bool gtk_use_gl_area;
 
 /* ui/gtk.c */
 void gd_update_windowsize(VirtualConsole *vc);
+void gd_update_monitor_refresh_rate(VirtualConsole *vc, GtkWidget *widget);
+void gd_hw_gl_flushed(void *vc);
 
 /* ui/gtk-egl.c */
 void gd_egl_init(VirtualConsole *vc);
@@ -97,7 +166,7 @@ void gd_egl_update(DisplayChangeListener *dcl,
 void gd_egl_refresh(DisplayChangeListener *dcl);
 void gd_egl_switch(DisplayChangeListener *dcl,
                    DisplaySurface *surface);
-QEMUGLContext gd_egl_create_context(DisplayChangeListener *dcl,
+QEMUGLContext gd_egl_create_context(DisplayGLCtx *dgc,
                                     QEMUGLParams *params);
 void gd_egl_scanout_disable(DisplayChangeListener *dcl);
 void gd_egl_scanout_texture(DisplayChangeListener *dcl,
@@ -106,7 +175,8 @@ void gd_egl_scanout_texture(DisplayChangeListener *dcl,
                             uint32_t backing_width,
                             uint32_t backing_height,
                             uint32_t x, uint32_t y,
-                            uint32_t w, uint32_t h);
+                            uint32_t w, uint32_t h,
+                            void *d3d_tex2d);
 void gd_egl_scanout_dmabuf(DisplayChangeListener *dcl,
                            QemuDmaBuf *dmabuf);
 void gd_egl_cursor_dmabuf(DisplayChangeListener *dcl,
@@ -114,12 +184,12 @@ void gd_egl_cursor_dmabuf(DisplayChangeListener *dcl,
                           uint32_t hot_x, uint32_t hot_y);
 void gd_egl_cursor_position(DisplayChangeListener *dcl,
                             uint32_t pos_x, uint32_t pos_y);
-void gd_egl_release_dmabuf(DisplayChangeListener *dcl,
-                           QemuDmaBuf *dmabuf);
+void gd_egl_flush(DisplayChangeListener *dcl,
+                  uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gd_egl_scanout_flush(DisplayChangeListener *dcl,
                           uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gtk_egl_init(DisplayGLMode mode);
-int gd_egl_make_current(DisplayChangeListener *dcl,
+int gd_egl_make_current(DisplayGLCtx *dgc,
                         QEMUGLContext ctx);
 
 /* ui/gtk-gl-area.c */
@@ -130,22 +200,28 @@ void gd_gl_area_update(DisplayChangeListener *dcl,
 void gd_gl_area_refresh(DisplayChangeListener *dcl);
 void gd_gl_area_switch(DisplayChangeListener *dcl,
                        DisplaySurface *surface);
-QEMUGLContext gd_gl_area_create_context(DisplayChangeListener *dcl,
+QEMUGLContext gd_gl_area_create_context(DisplayGLCtx *dgc,
                                         QEMUGLParams *params);
-void gd_gl_area_destroy_context(DisplayChangeListener *dcl,
+void gd_gl_area_destroy_context(DisplayGLCtx *dgc,
                                 QEMUGLContext ctx);
+void gd_gl_area_scanout_dmabuf(DisplayChangeListener *dcl,
+                               QemuDmaBuf *dmabuf);
 void gd_gl_area_scanout_texture(DisplayChangeListener *dcl,
                                 uint32_t backing_id,
                                 bool backing_y_0_top,
                                 uint32_t backing_width,
                                 uint32_t backing_height,
                                 uint32_t x, uint32_t y,
-                                uint32_t w, uint32_t h);
+                                uint32_t w, uint32_t h,
+                                void *d3d_tex2d);
+void gd_gl_area_scanout_disable(DisplayChangeListener *dcl);
 void gd_gl_area_scanout_flush(DisplayChangeListener *dcl,
                               uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gtk_gl_area_init(void);
-QEMUGLContext gd_gl_area_get_current_context(DisplayChangeListener *dcl);
-int gd_gl_area_make_current(DisplayChangeListener *dcl,
+int gd_gl_area_make_current(DisplayGLCtx *dgc,
                             QEMUGLContext ctx);
 
+/* gtk-clipboard.c */
+void gd_clipboard_init(GtkDisplayState *gd);
+
 #endif /* UI_GTK_H */
index c86219a1c11a0015c00659530297caec67475ca6..8f9aac562ed24898af3542560a582786559b4b0a 100644 (file)
@@ -8,9 +8,12 @@
 #define INPUT_EVENT_MASK_BTN   (1<<INPUT_EVENT_KIND_BTN)
 #define INPUT_EVENT_MASK_REL   (1<<INPUT_EVENT_KIND_REL)
 #define INPUT_EVENT_MASK_ABS   (1<<INPUT_EVENT_KIND_ABS)
+#define INPUT_EVENT_MASK_MTT   (1<<INPUT_EVENT_KIND_MTT)
 
 #define INPUT_EVENT_ABS_MIN    0x0000
 #define INPUT_EVENT_ABS_MAX    0x7FFF
+#define INPUT_EVENT_SLOTS_MIN  0x0
+#define INPUT_EVENT_SLOTS_MAX  0xa
 
 typedef struct QemuInputHandler QemuInputHandler;
 typedef struct QemuInputHandlerState QemuInputHandlerState;
@@ -27,7 +30,7 @@ struct QemuInputHandler {
 };
 
 QemuInputHandlerState *qemu_input_handler_register(DeviceState *dev,
-                                                   QemuInputHandler *handler);
+                                            const QemuInputHandler *handler);
 void qemu_input_handler_activate(QemuInputHandlerState *s);
 void qemu_input_handler_deactivate(QemuInputHandlerState *s);
 void qemu_input_handler_unregister(QemuInputHandlerState *s);
@@ -54,13 +57,18 @@ void qemu_input_queue_btn(QemuConsole *src, InputButton btn, bool down);
 void qemu_input_update_buttons(QemuConsole *src, uint32_t *button_map,
                                uint32_t button_old, uint32_t button_new);
 
-bool qemu_input_is_absolute(void);
+bool qemu_input_is_absolute(QemuConsole *con);
 int qemu_input_scale_axis(int value,
                           int min_in, int max_in,
                           int min_out, int max_out);
 void qemu_input_queue_rel(QemuConsole *src, InputAxis axis, int value);
 void qemu_input_queue_abs(QemuConsole *src, InputAxis axis, int value,
                           int min_in, int max_in);
+void qemu_input_queue_mtt(QemuConsole *src, InputMultiTouchType type, int slot,
+                          int tracking_id);
+void qemu_input_queue_mtt_abs(QemuConsole *src, InputAxis axis, int value,
+                              int min_in, int max_in,
+                              int slot, int tracking_id);
 
 void qemu_input_check_mode_change(void);
 void qemu_add_mouse_mode_change_notifier(Notifier *notify);
index eb9067dd532c2e76db336d8542b4d1e8424e8208..fb79776128cf39a4cc3f00b64d3febb2e38db50f 100644 (file)
@@ -65,7 +65,7 @@ void qkbd_state_key_event(QKbdState *kbd, QKeyCode qcode, bool down);
  * using qemu_input_event_send_key_delay().
  *
  * @kbd: state tracker state.
- * @delay_ms: the delay in miliseconds.
+ * @delay_ms: the delay in milliseconds.
  */
 void qkbd_state_set_delay(QKbdState *kbd, int delay_ms);
 
diff --git a/include/ui/pixman-minimal.h b/include/ui/pixman-minimal.h
new file mode 100644 (file)
index 0000000..6dd7de1
--- /dev/null
@@ -0,0 +1,239 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Tiny subset of PIXMAN API commonly used by QEMU.
+ *
+ * Copyright 1987, 1988, 1989, 1998  The Open Group
+ * Copyright 1987, 1988, 1989 Digital Equipment Corporation
+ * Copyright 1999, 2004, 2008 Keith Packard
+ * Copyright 2000 SuSE, Inc.
+ * Copyright 2000 Keith Packard, member of The XFree86 Project, Inc.
+ * Copyright 2004, 2005, 2007, 2008, 2009, 2010 Red Hat, Inc.
+ * Copyright 2004 Nicholas Miell
+ * Copyright 2005 Lars Knoll & Zack Rusin, Trolltech
+ * Copyright 2005 Trolltech AS
+ * Copyright 2007 Luca Barbato
+ * Copyright 2008 Aaron Plattner, NVIDIA Corporation
+ * Copyright 2008 Rodrigo Kumpera
+ * Copyright 2008 André Tupinambá
+ * Copyright 2008 Mozilla Corporation
+ * Copyright 2008 Frederic Plourde
+ * Copyright 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2009, 2010 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef PIXMAN_MINIMAL_H
+#define PIXMAN_MINIMAL_H
+
+#define PIXMAN_TYPE_OTHER       0
+#define PIXMAN_TYPE_ARGB        2
+#define PIXMAN_TYPE_ABGR        3
+#define PIXMAN_TYPE_BGRA        8
+#define PIXMAN_TYPE_RGBA        9
+
+#define PIXMAN_FORMAT(bpp, type, a, r, g, b) (((bpp) << 24) |   \
+                                              ((type) << 16) |  \
+                                              ((a) << 12) |     \
+                                              ((r) << 8) |      \
+                                              ((g) << 4) |      \
+                                              ((b)))
+
+#define PIXMAN_FORMAT_RESHIFT(val, ofs, num)                            \
+        (((val >> (ofs)) & ((1 << (num)) - 1)) << ((val >> 22) & 3))
+
+#define PIXMAN_FORMAT_BPP(f)    PIXMAN_FORMAT_RESHIFT(f, 24, 8)
+#define PIXMAN_FORMAT_TYPE(f)   (((f) >> 16) & 0x3f)
+#define PIXMAN_FORMAT_A(f)      PIXMAN_FORMAT_RESHIFT(f, 12, 4)
+#define PIXMAN_FORMAT_R(f)      PIXMAN_FORMAT_RESHIFT(f, 8, 4)
+#define PIXMAN_FORMAT_G(f)      PIXMAN_FORMAT_RESHIFT(f, 4, 4)
+#define PIXMAN_FORMAT_B(f)      PIXMAN_FORMAT_RESHIFT(f, 0, 4)
+#define PIXMAN_FORMAT_DEPTH(f)  (PIXMAN_FORMAT_A(f) +   \
+                                 PIXMAN_FORMAT_R(f) +   \
+                                 PIXMAN_FORMAT_G(f) +   \
+                                 PIXMAN_FORMAT_B(f))
+
+typedef enum {
+    /* 32bpp formats */
+    PIXMAN_a8r8g8b8 =    PIXMAN_FORMAT(32, PIXMAN_TYPE_ARGB, 8, 8, 8, 8),
+    PIXMAN_x8r8g8b8 =    PIXMAN_FORMAT(32, PIXMAN_TYPE_ARGB, 0, 8, 8, 8),
+    PIXMAN_a8b8g8r8 =    PIXMAN_FORMAT(32, PIXMAN_TYPE_ABGR, 8, 8, 8, 8),
+    PIXMAN_x8b8g8r8 =    PIXMAN_FORMAT(32, PIXMAN_TYPE_ABGR, 0, 8, 8, 8),
+    PIXMAN_b8g8r8a8 =    PIXMAN_FORMAT(32, PIXMAN_TYPE_BGRA, 8, 8, 8, 8),
+    PIXMAN_b8g8r8x8 =    PIXMAN_FORMAT(32, PIXMAN_TYPE_BGRA, 0, 8, 8, 8),
+    PIXMAN_r8g8b8a8 =    PIXMAN_FORMAT(32, PIXMAN_TYPE_RGBA, 8, 8, 8, 8),
+    PIXMAN_r8g8b8x8 =    PIXMAN_FORMAT(32, PIXMAN_TYPE_RGBA, 0, 8, 8, 8),
+    /* 24bpp formats */
+    PIXMAN_r8g8b8 =      PIXMAN_FORMAT(24, PIXMAN_TYPE_ARGB, 0, 8, 8, 8),
+    PIXMAN_b8g8r8 =      PIXMAN_FORMAT(24, PIXMAN_TYPE_ABGR, 0, 8, 8, 8),
+    /* 16bpp formats */
+    PIXMAN_r5g6b5 =      PIXMAN_FORMAT(16, PIXMAN_TYPE_ARGB, 0, 5, 6, 5),
+    PIXMAN_a1r5g5b5 =    PIXMAN_FORMAT(16, PIXMAN_TYPE_ARGB, 1, 5, 5, 5),
+    PIXMAN_x1r5g5b5 =    PIXMAN_FORMAT(16, PIXMAN_TYPE_ARGB, 0, 5, 5, 5),
+} pixman_format_code_t;
+
+typedef struct pixman_image pixman_image_t;
+
+typedef void (*pixman_image_destroy_func_t)(pixman_image_t *image, void *data);
+
+struct pixman_image {
+    int ref_count;
+    pixman_format_code_t format;
+    int width;
+    int height;
+    int stride;
+    uint32_t *data;
+    uint32_t *free_me;
+    pixman_image_destroy_func_t destroy_func;
+    void *destroy_data;
+};
+
+typedef struct pixman_color {
+    uint16_t    red;
+    uint16_t    green;
+    uint16_t    blue;
+    uint16_t    alpha;
+} pixman_color_t;
+
+static inline uint32_t *create_bits(pixman_format_code_t format,
+                                    int width,
+                                    int height,
+                                    int *rowstride_bytes)
+{
+    int stride = 0;
+    size_t buf_size = 0;
+    int bpp = PIXMAN_FORMAT_BPP(format);
+
+    /*
+     * Calculate the following while checking for overflow truncation:
+     * stride = ((width * bpp + 0x1f) >> 5) * sizeof(uint32_t);
+     */
+
+    if (unlikely(__builtin_mul_overflow(width, bpp, &stride))) {
+        return NULL;
+    }
+
+    if (unlikely(__builtin_add_overflow(stride, 0x1f, &stride))) {
+        return NULL;
+    }
+
+    stride >>= 5;
+
+    stride *= sizeof(uint32_t);
+
+    if (unlikely(__builtin_mul_overflow((size_t) height,
+                                        (size_t) stride,
+                                        &buf_size))) {
+        return NULL;
+    }
+
+    if (rowstride_bytes) {
+        *rowstride_bytes = stride;
+    }
+
+    return g_malloc0(buf_size);
+}
+
+static inline pixman_image_t *pixman_image_create_bits(pixman_format_code_t format,
+                                                       int width,
+                                                       int height,
+                                                       uint32_t *bits,
+                                                       int rowstride_bytes)
+{
+    pixman_image_t *i = g_new0(pixman_image_t, 1);
+
+    i->width = width;
+    i->height = height;
+    i->format = format;
+    if (bits) {
+        i->data = bits;
+    } else {
+        i->free_me = i->data =
+            create_bits(format, width, height, &rowstride_bytes);
+        if (width && height) {
+            assert(i->data);
+        }
+    }
+    i->stride = rowstride_bytes ? rowstride_bytes :
+                            width * DIV_ROUND_UP(PIXMAN_FORMAT_BPP(format), 8);
+    i->ref_count = 1;
+
+    return i;
+}
+
+static inline pixman_image_t *pixman_image_ref(pixman_image_t *i)
+{
+    i->ref_count++;
+    return i;
+}
+
+static inline bool pixman_image_unref(pixman_image_t *i)
+{
+    i->ref_count--;
+
+    if (i->ref_count == 0) {
+        if (i->destroy_func) {
+            i->destroy_func(i, i->destroy_data);
+        }
+        g_free(i->free_me);
+        g_free(i);
+
+        return true;
+    }
+
+    return false;
+}
+
+static inline void pixman_image_set_destroy_function(pixman_image_t *i,
+                                                     pixman_image_destroy_func_t func,
+                                                     void *data)
+
+{
+    i->destroy_func = func;
+    i->destroy_data = data;
+}
+
+static inline uint32_t *pixman_image_get_data(pixman_image_t *i)
+{
+    return i->data;
+}
+
+static inline int pixman_image_get_height(pixman_image_t *i)
+{
+    return i->height;
+}
+
+static inline int pixman_image_get_width(pixman_image_t *i)
+{
+    return i->width;
+}
+
+static inline int pixman_image_get_stride(pixman_image_t *i)
+{
+    return i->stride;
+}
+
+static inline pixman_format_code_t pixman_image_get_format(pixman_image_t *i)
+{
+    return i->format;
+}
+
+#endif /* PIXMAN_MINIMAL_H */
index 87737a6f1629771077a3479d6d0d8f08f9647cca..ef13a8210cc4a9a2997a9f4a3712db69c235647e 100644 (file)
@@ -6,11 +6,11 @@
 #ifndef QEMU_PIXMAN_H
 #define QEMU_PIXMAN_H
 
-/* pixman-0.16.0 headers have a redundant declaration */
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wredundant-decls"
+#ifdef CONFIG_PIXMAN
 #include <pixman.h>
-#pragma GCC diagnostic pop
+#else
+#include "pixman-minimal.h"
+#endif
 
 /*
  * pixman image formats are defined to be native endian,
@@ -19,7 +19,7 @@
  * feeding libjpeg / libpng and writing screenshots.
  */
 
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
 # define PIXMAN_BE_r8g8b8     PIXMAN_r8g8b8
 # define PIXMAN_BE_x8r8g8b8   PIXMAN_x8r8g8b8
 # define PIXMAN_BE_a8r8g8b8   PIXMAN_a8r8g8b8
@@ -32,6 +32,8 @@
 # define PIXMAN_LE_r8g8b8     PIXMAN_b8g8r8
 # define PIXMAN_LE_a8r8g8b8   PIXMAN_b8g8r8a8
 # define PIXMAN_LE_x8r8g8b8   PIXMAN_b8g8r8x8
+# define PIXMAN_LE_a8b8g8r8   PIXMAN_r8g8b8a8
+# define PIXMAN_LE_x8b8g8r8   PIXMAN_r8g8b8x8
 #else
 # define PIXMAN_BE_r8g8b8     PIXMAN_b8g8r8
 # define PIXMAN_BE_x8r8g8b8   PIXMAN_b8g8r8x8
 # define PIXMAN_LE_r8g8b8     PIXMAN_r8g8b8
 # define PIXMAN_LE_a8r8g8b8   PIXMAN_a8r8g8b8
 # define PIXMAN_LE_x8r8g8b8   PIXMAN_x8r8g8b8
+# define PIXMAN_LE_a8b8g8r8   PIXMAN_a8b8g8r8
+# define PIXMAN_LE_x8b8g8r8   PIXMAN_x8b8g8r8
 #endif
 
+#define QEMU_PIXMAN_COLOR(r, g, b)                                               \
+    { .red = r << 8, .green = g << 8, .blue = b << 8, .alpha = 0xffff }
+
+#define QEMU_PIXMAN_COLOR_BLACK QEMU_PIXMAN_COLOR(0x00, 0x00, 0x00)
+#define QEMU_PIXMAN_COLOR_GRAY QEMU_PIXMAN_COLOR(0xaa, 0xaa, 0xaa)
+
 /* -------------------------------------------------------------------- */
 
 typedef struct PixelFormat {
@@ -62,22 +72,20 @@ typedef struct PixelFormat {
 PixelFormat qemu_pixelformat_from_pixman(pixman_format_code_t format);
 pixman_format_code_t qemu_default_pixman_format(int bpp, bool native_endian);
 pixman_format_code_t qemu_drm_format_to_pixman(uint32_t drm_format);
+uint32_t qemu_pixman_to_drm_format(pixman_format_code_t pixman);
 int qemu_pixman_get_type(int rshift, int gshift, int bshift);
-pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf);
 bool qemu_pixman_check_format(DisplayChangeListener *dcl,
                               pixman_format_code_t format);
 
+#ifdef CONFIG_PIXMAN
+pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf);
 pixman_image_t *qemu_pixman_linebuf_create(pixman_format_code_t format,
                                            int width);
 void qemu_pixman_linebuf_fill(pixman_image_t *linebuf, pixman_image_t *fb,
                               int width, int x, int y);
-void qemu_pixman_linebuf_copy(pixman_image_t *fb, int width, int x, int y,
-                              pixman_image_t *linebuf);
 pixman_image_t *qemu_pixman_mirror_create(pixman_format_code_t format,
                                           pixman_image_t *image);
-void qemu_pixman_image_unref(pixman_image_t *image);
 
-pixman_color_t qemu_pixman_color(PixelFormat *pf, uint32_t color);
 pixman_image_t *qemu_pixman_glyph_from_vgafont(int height, const uint8_t *font,
                                                unsigned int ch);
 void qemu_pixman_glyph_render(pixman_image_t *glyph,
@@ -85,6 +93,9 @@ void qemu_pixman_glyph_render(pixman_image_t *glyph,
                               pixman_color_t *fgcol,
                               pixman_color_t *bgcol,
                               int x, int y, int cw, int ch);
+#endif
+
+void qemu_pixman_image_unref(pixman_image_t *image);
 
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(pixman_image_t, qemu_pixman_image_unref)
 
index 2beb7929728c58eb79e9c8a4b6f7bcfaba74575e..b7d493742c4c38904aa584c6758f586a71d58714 100644 (file)
 
 void qemu_spice_input_init(void);
 void qemu_spice_display_init(void);
+void qemu_spice_display_init_done(void);
 bool qemu_spice_have_display_interface(QemuConsole *con);
 int qemu_spice_add_display_interface(QXLInstance *qxlin, QemuConsole *con);
 int qemu_spice_migrate_info(const char *hostname, int port, int tls_port,
                             const char *subject);
 
-#if !defined(SPICE_SERVER_VERSION) || (SPICE_SERVER_VERSION < 0xc06)
-#define SPICE_NEEDS_SET_MM_TIME 1
+#if SPICE_SERVER_VERSION >= 0x000f00 /* release 0.15.0 */
+#define SPICE_HAS_ATTACHED_WORKER 1
 #else
-#define SPICE_NEEDS_SET_MM_TIME 0
+#define SPICE_HAS_ATTACHED_WORKER 0
 #endif
 
 #else  /* CONFIG_SPICE */
diff --git a/include/ui/rect.h b/include/ui/rect.h
new file mode 100644 (file)
index 0000000..68f05d7
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef QEMU_RECT_H
+#define QEMU_RECT_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+typedef struct QemuRect {
+    int16_t x;
+    int16_t y;
+    uint16_t width;
+    uint16_t height;
+} QemuRect;
+
+static inline void qemu_rect_init(QemuRect *rect,
+                                  int16_t x, int16_t y,
+                                  uint16_t width, uint16_t height)
+{
+    rect->x = x;
+    rect->y = y;
+    rect->width = width;
+    rect->height = height;
+}
+
+static inline void qemu_rect_translate(QemuRect *rect,
+                                       int16_t dx, int16_t dy)
+{
+    rect->x += dx;
+    rect->y += dy;
+}
+
+static inline bool qemu_rect_intersect(const QemuRect *a, const QemuRect *b,
+                                       QemuRect *res)
+{
+    int16_t x1, x2, y1, y2;
+
+    x1 = MAX(a->x, b->x);
+    y1 = MAX(a->y, b->y);
+    x2 = MIN(a->x + a->width, b->x + b->width);
+    y2 = MIN(a->y + a->height, b->y + b->height);
+
+    if (x1 >= x2 || y1 >= y2) {
+        if (res) {
+            qemu_rect_init(res, 0, 0, 0, 0);
+        }
+
+        return false;
+    }
+
+    if (res) {
+        qemu_rect_init(res, x1, y1, x2 - x1, y2 - y1);
+    }
+
+    return true;
+}
+
+#endif
index 0875b8d56b7ebbac43c4cf27de6791da7b43d5c7..e3acc7c82a7052ed93ed5d9f3241552ce68eeaa7 100644 (file)
@@ -5,7 +5,18 @@
 #undef WIN32_LEAN_AND_MEAN
 
 #include <SDL.h>
+
+/* with Alpine / muslc SDL headers pull in directfb headers
+ * which in turn trigger warning about redundant decls for
+ * direct_waitqueue_deinit.
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wredundant-decls"
+
 #include <SDL_syswm.h>
+
+#pragma GCC diagnostic pop
+
 #ifdef CONFIG_SDL_IMAGE
 # include <SDL_image.h>
 #endif
@@ -16,6 +27,7 @@
 #endif
 
 struct sdl2_console {
+    DisplayGLCtx dgc;
     DisplayChangeListener dcl;
     DisplaySurface *surface;
     DisplayOptions *opts;
@@ -65,12 +77,11 @@ void sdl2_gl_switch(DisplayChangeListener *dcl,
 void sdl2_gl_refresh(DisplayChangeListener *dcl);
 void sdl2_gl_redraw(struct sdl2_console *scon);
 
-QEMUGLContext sdl2_gl_create_context(DisplayChangeListener *dcl,
+QEMUGLContext sdl2_gl_create_context(DisplayGLCtx *dgc,
                                      QEMUGLParams *params);
-void sdl2_gl_destroy_context(DisplayChangeListener *dcl, QEMUGLContext ctx);
-int sdl2_gl_make_context_current(DisplayChangeListener *dcl,
+void sdl2_gl_destroy_context(DisplayGLCtx *dgc, QEMUGLContext ctx);
+int sdl2_gl_make_context_current(DisplayGLCtx *dgc,
                                  QEMUGLContext ctx);
-QEMUGLContext sdl2_gl_get_current_context(DisplayChangeListener *dcl);
 
 void sdl2_gl_scanout_disable(DisplayChangeListener *dcl);
 void sdl2_gl_scanout_texture(DisplayChangeListener *dcl,
@@ -79,7 +90,8 @@ void sdl2_gl_scanout_texture(DisplayChangeListener *dcl,
                              uint32_t backing_width,
                              uint32_t backing_height,
                              uint32_t x, uint32_t y,
-                             uint32_t w, uint32_t h);
+                             uint32_t w, uint32_t h,
+                             void *d3d_tex2d);
 void sdl2_gl_scanout_flush(DisplayChangeListener *dcl,
                            uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 
index 4a47ffdd4c82a1be23b48058424806187e56efcd..e1a9b3618527c9b75b045900f0c6d7d1d698e34d 100644 (file)
 #include "ui/qemu-pixman.h"
 #include "ui/console.h"
 
-#if defined(CONFIG_OPENGL_DMABUF)
-# if SPICE_SERVER_VERSION >= 0x000d01 /* release 0.13.1 */
+#if defined(CONFIG_OPENGL) && defined(CONFIG_GBM)
 #  define HAVE_SPICE_GL 1
 #  include "ui/egl-helpers.h"
 #  include "ui/egl-context.h"
-# endif
 #endif
 
 #define NUM_MEMSLOTS 8
@@ -44,7 +42,7 @@
 #define NUM_MEMSLOTS_GROUPS 2
 
 /*
- * Internal enum to differenciate between options for
+ * Internal enum to differentiate between options for
  * io calls that have a sync (old) version and an _async (new)
  * version:
  *  QXL_SYNC: use the old version
@@ -86,6 +84,7 @@ typedef struct SimpleSpiceCursor SimpleSpiceCursor;
 
 struct SimpleSpiceDisplay {
     DisplaySurface *ds;
+    DisplayGLCtx dgc;
     DisplayChangeListener dcl;
     void *buf;
     int bufsize;
@@ -183,8 +182,4 @@ void qemu_spice_display_start(void);
 void qemu_spice_display_stop(void);
 int qemu_spice_display_is_running(SimpleSpiceDisplay *ssd);
 
-bool qemu_spice_fill_device_address(QemuConsole *con,
-                                    char *device_address,
-                                    size_t size);
-
 #endif
diff --git a/include/ui/surface.h b/include/ui/surface.h
new file mode 100644 (file)
index 0000000..4244e0c
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * QEMU UI Console
+ */
+#ifndef SURFACE_H
+#define SURFACE_H
+
+#include "ui/qemu-pixman.h"
+
+#ifdef CONFIG_OPENGL
+# include <epoxy/gl.h>
+# include "ui/shader.h"
+#endif
+
+#define QEMU_ALLOCATED_FLAG     0x01
+#define QEMU_PLACEHOLDER_FLAG   0x02
+
+typedef struct DisplaySurface {
+    pixman_image_t *image;
+    uint8_t flags;
+#ifdef CONFIG_OPENGL
+    GLenum glformat;
+    GLenum gltype;
+    GLuint texture;
+#endif
+#ifdef WIN32
+    HANDLE handle;
+    uint32_t handle_offset;
+#endif
+} DisplaySurface;
+
+PixelFormat qemu_default_pixelformat(int bpp);
+
+DisplaySurface *qemu_create_displaysurface_from(int width, int height,
+                                                pixman_format_code_t format,
+                                                int linesize, uint8_t *data);
+DisplaySurface *qemu_create_displaysurface_pixman(pixman_image_t *image);
+DisplaySurface *qemu_create_placeholder_surface(int w, int h,
+                                                const char *msg);
+#ifdef WIN32
+void qemu_displaysurface_win32_set_handle(DisplaySurface *surface,
+                                          HANDLE h, uint32_t offset);
+#endif
+
+DisplaySurface *qemu_create_displaysurface(int width, int height);
+void qemu_free_displaysurface(DisplaySurface *surface);
+
+static inline int is_buffer_shared(DisplaySurface *surface)
+{
+    return !(surface->flags & QEMU_ALLOCATED_FLAG);
+}
+
+static inline int is_placeholder(DisplaySurface *surface)
+{
+    return surface->flags & QEMU_PLACEHOLDER_FLAG;
+}
+
+static inline int surface_stride(DisplaySurface *s)
+{
+    return pixman_image_get_stride(s->image);
+}
+
+static inline void *surface_data(DisplaySurface *s)
+{
+    return pixman_image_get_data(s->image);
+}
+
+static inline int surface_width(DisplaySurface *s)
+{
+    return pixman_image_get_width(s->image);
+}
+
+static inline int surface_height(DisplaySurface *s)
+{
+    return pixman_image_get_height(s->image);
+}
+
+static inline pixman_format_code_t surface_format(DisplaySurface *s)
+{
+    return pixman_image_get_format(s->image);
+}
+
+static inline int surface_bits_per_pixel(DisplaySurface *s)
+{
+    int bits = PIXMAN_FORMAT_BPP(surface_format(s));
+    return bits;
+}
+
+static inline int surface_bytes_per_pixel(DisplaySurface *s)
+{
+    int bits = PIXMAN_FORMAT_BPP(surface_format(s));
+    return DIV_ROUND_UP(bits, 8);
+}
+
+#endif
diff --git a/include/user/safe-syscall.h b/include/user/safe-syscall.h
new file mode 100644 (file)
index 0000000..27b71cd
--- /dev/null
@@ -0,0 +1,140 @@
+/*
+ * safe-syscall.h: prototypes for linux-user signal-race-safe syscalls
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LINUX_USER_SAFE_SYSCALL_H
+#define LINUX_USER_SAFE_SYSCALL_H
+
+/**
+ * safe_syscall:
+ * @int number: number of system call to make
+ * ...: arguments to the system call
+ *
+ * Call a system call if guest signal not pending.
+ * This has the same API as the libc syscall() function, except that it
+ * may return -1 with errno == QEMU_ERESTARTSYS if a signal was pending.
+ *
+ * Returns: the system call result, or -1 with an error code in errno
+ * (Errnos are host errnos; we rely on QEMU_ERESTARTSYS not clashing
+ * with any of the host errno values.)
+ */
+
+/*
+ * A guide to using safe_syscall() to handle interactions between guest
+ * syscalls and guest signals:
+ *
+ * Guest syscalls come in two flavours:
+ *
+ * (1) Non-interruptible syscalls
+ *
+ * These are guest syscalls that never get interrupted by signals and
+ * so never return EINTR. They can be implemented straightforwardly in
+ * QEMU: just make sure that if the implementation code has to make any
+ * blocking calls that those calls are retried if they return EINTR.
+ * It's also OK to implement these with safe_syscall, though it will be
+ * a little less efficient if a signal is delivered at the 'wrong' moment.
+ *
+ * Some non-interruptible syscalls need to be handled using block_signals()
+ * to block signals for the duration of the syscall. This mainly applies
+ * to code which needs to modify the data structures used by the
+ * host_signal_handler() function and the functions it calls, including
+ * all syscalls which change the thread's signal mask.
+ *
+ * (2) Interruptible syscalls
+ *
+ * These are guest syscalls that can be interrupted by signals and
+ * for which we need to either return EINTR or arrange for the guest
+ * syscall to be restarted. This category includes both syscalls which
+ * always restart (and in the kernel return -ERESTARTNOINTR), ones
+ * which only restart if there is no handler (kernel returns -ERESTARTNOHAND
+ * or -ERESTART_RESTARTBLOCK), and the most common kind which restart
+ * if the handler was registered with SA_RESTART (kernel returns
+ * -ERESTARTSYS). System calls which are only interruptible in some
+ * situations (like 'open') also need to be handled this way.
+ *
+ * Here it is important that the host syscall is made
+ * via this safe_syscall() function, and *not* via the host libc.
+ * If the host libc is used then the implementation will appear to work
+ * most of the time, but there will be a race condition where a
+ * signal could arrive just before we make the host syscall inside libc,
+ * and then the guest syscall will not correctly be interrupted.
+ * Instead the implementation of the guest syscall can use the safe_syscall
+ * function but otherwise just return the result or errno in the usual
+ * way; the main loop code will take care of restarting the syscall
+ * if appropriate.
+ *
+ * (If the implementation needs to make multiple host syscalls this is
+ * OK; any which might really block must be via safe_syscall(); for those
+ * which are only technically blocking (ie which we know in practice won't
+ * stay in the host kernel indefinitely) it's OK to use libc if necessary.
+ * You must be able to cope with backing out correctly if some safe_syscall
+ * you make in the implementation returns either -QEMU_ERESTARTSYS or
+ * EINTR though.)
+ *
+ * block_signals() cannot be used for interruptible syscalls.
+ *
+ *
+ * How and why the safe_syscall implementation works:
+ *
+ * The basic setup is that we make the host syscall via a known
+ * section of host native assembly. If a signal occurs, our signal
+ * handler checks the interrupted host PC against the address of that
+ * known section. If the PC is before or at the address of the syscall
+ * instruction then we change the PC to point at a "return
+ * -QEMU_ERESTARTSYS" code path instead, and then exit the signal handler
+ * (causing the safe_syscall() call to immediately return that value).
+ * Then in the main.c loop if we see this magic return value we adjust
+ * the guest PC to wind it back to before the system call, and invoke
+ * the guest signal handler as usual.
+ *
+ * This winding-back will happen in two cases:
+ * (1) signal came in just before we took the host syscall (a race);
+ *   in this case we'll take the guest signal and have another go
+ *   at the syscall afterwards, and this is indistinguishable for the
+ *   guest from the timing having been different such that the guest
+ *   signal really did win the race
+ * (2) signal came in while the host syscall was blocking, and the
+ *   host kernel decided the syscall should be restarted;
+ *   in this case we want to restart the guest syscall also, and so
+ *   rewinding is the right thing. (Note that "restart" semantics mean
+ *   "first call the signal handler, then reattempt the syscall".)
+ * The other situation to consider is when a signal came in while the
+ * host syscall was blocking, and the host kernel decided that the syscall
+ * should not be restarted; in this case QEMU's host signal handler will
+ * be invoked with the PC pointing just after the syscall instruction,
+ * with registers indicating an EINTR return; the special code in the
+ * handler will not kick in, and we will return EINTR to the guest as
+ * we should.
+ *
+ * Notice that we can leave the host kernel to make the decision for
+ * us about whether to do a restart of the syscall or not; we do not
+ * need to check SA_RESTART flags in QEMU or distinguish the various
+ * kinds of restartability.
+ */
+
+/* The core part of this function is implemented in assembly */
+long safe_syscall_base(int *pending, long number, ...);
+long safe_syscall_set_errno_tail(int value);
+
+/* These are defined by the safe-syscall.inc.S file */
+extern char safe_syscall_start[];
+extern char safe_syscall_end[];
+
+#define safe_syscall(...)                                                 \
+    safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending, \
+                      __VA_ARGS__)
+
+#endif
diff --git a/include/user/syscall-trace.h b/include/user/syscall-trace.h
new file mode 100644 (file)
index 0000000..557f881
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Common System Call Tracing Wrappers for *-user
+ *
+ * Copyright (c) 2019 Linaro
+ * Written by Alex Bennée <alex.bennee@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef SYSCALL_TRACE_H
+#define SYSCALL_TRACE_H
+
+#include "exec/user/abitypes.h"
+#include "qemu/plugin.h"
+#include "trace/trace-root.h"
+
+/*
+ * These helpers just provide a common place for the various
+ * subsystems that want to track syscalls to put their hooks in. We
+ * could potentially unify the -strace code here as well.
+ */
+
+static inline void record_syscall_start(void *cpu, int num,
+                                        abi_long arg1, abi_long arg2,
+                                        abi_long arg3, abi_long arg4,
+                                        abi_long arg5, abi_long arg6,
+                                        abi_long arg7, abi_long arg8)
+{
+    qemu_plugin_vcpu_syscall(cpu, num,
+                             arg1, arg2, arg3, arg4,
+                             arg5, arg6, arg7, arg8);
+}
+
+static inline void record_syscall_return(void *cpu, int num, abi_long ret)
+{
+    qemu_plugin_vcpu_syscall_ret(cpu, num, ret);
+}
+
+
+#endif /* SYSCALL_TRACE_H */
index baa4e2b089f615d5b3b6d27d9d50827249371414..8096180f85516a59600b2a3d224bdb39dfa8bca5 100644 (file)
@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc,
                                         size_t niov,
                                         int **fds,
                                         size_t *nfds,
+                                        int flags,
                                         Error **errp)
 {
     QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
@@ -81,6 +82,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc,
                                          size_t niov,
                                          int *fds,
                                          size_t nfds,
+                                         int flags,
                                          Error **errp)
 {
     QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
index b2a9e271382a32a98f5df8acc6b8696c0f81b8fd..6d5f64e146d45c7ed330063711b5295816591a38 100644 (file)
 
 #include "qemu/osdep.h"
 #include "io/channel-command.h"
+#include "io/channel-util.h"
 #include "io/channel-watch.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
 #include "qemu/sockets.h"
 #include "trace.h"
 
-
-QIOChannelCommand *
+/**
+ * qio_channel_command_new_pid:
+ * @writefd: the FD connected to the command's stdin
+ * @readfd: the FD connected to the command's stdout
+ * @pid: the PID/HANDLE of the running child command
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Create a channel for performing I/O with the
+ * previously spawned command identified by @pid.
+ * The two file descriptors provide the connection
+ * to command's stdio streams, either one or which
+ * may be -1 to indicate that stream is not open.
+ *
+ * The channel will take ownership of the process
+ * @pid and will kill it when closing the channel.
+ * Similarly it will take responsibility for
+ * closing the file descriptors @writefd and @readfd.
+ *
+ * Returns: the command channel object, or NULL on error
+ */
+static QIOChannelCommand *
 qio_channel_command_new_pid(int writefd,
                             int readfd,
-                            pid_t pid)
+                            GPid pid)
 {
     QIOChannelCommand *ioc;
 
@@ -40,119 +60,41 @@ qio_channel_command_new_pid(int writefd,
     ioc->writefd = writefd;
     ioc->pid = pid;
 
-    trace_qio_channel_command_new_pid(ioc, writefd, readfd, pid);
+    trace_qio_channel_command_new_pid(ioc, writefd, readfd,
+#ifdef WIN32
+                                      GetProcessId(pid)
+#else
+                                      pid
+#endif
+        );
     return ioc;
 }
 
-
-#ifndef WIN32
 QIOChannelCommand *
 qio_channel_command_new_spawn(const char *const argv[],
                               int flags,
                               Error **errp)
 {
-    pid_t pid = -1;
-    int stdinfd[2] = { -1, -1 };
-    int stdoutfd[2] = { -1, -1 };
-    int devnull = -1;
-    bool stdinnull = false, stdoutnull = false;
-    QIOChannelCommand *ioc;
+    g_autoptr(GError) err = NULL;
+    GPid pid = 0;
+    GSpawnFlags gflags = G_SPAWN_CLOEXEC_PIPES | G_SPAWN_DO_NOT_REAP_CHILD;
+    int stdinfd = -1, stdoutfd = -1;
 
     flags = flags & O_ACCMODE;
-
-    if (flags == O_RDONLY) {
-        stdinnull = true;
-    }
-    if (flags == O_WRONLY) {
-        stdoutnull = true;
-    }
-
-    if (stdinnull || stdoutnull) {
-        devnull = open("/dev/null", O_RDWR);
-        if (devnull < 0) {
-            error_setg_errno(errp, errno,
-                             "Unable to open /dev/null");
-            goto error;
-        }
-    }
-
-    if ((!stdinnull && pipe(stdinfd) < 0) ||
-        (!stdoutnull && pipe(stdoutfd) < 0)) {
-        error_setg_errno(errp, errno,
-                         "Unable to open pipe");
-        goto error;
+    gflags |= flags == O_WRONLY ? G_SPAWN_STDOUT_TO_DEV_NULL : 0;
+
+    if (!g_spawn_async_with_pipes(NULL, (char **)argv, NULL, gflags, NULL, NULL,
+                                  &pid,
+                                  flags == O_RDONLY ? NULL : &stdinfd,
+                                  flags == O_WRONLY ? NULL : &stdoutfd,
+                                  NULL, &err)) {
+        error_setg(errp, "%s", err->message);
+        return NULL;
     }
 
-    pid = qemu_fork(errp);
-    if (pid < 0) {
-        goto error;
-    }
-
-    if (pid == 0) { /* child */
-        dup2(stdinnull ? devnull : stdinfd[0], STDIN_FILENO);
-        dup2(stdoutnull ? devnull : stdoutfd[1], STDOUT_FILENO);
-        /* Leave stderr connected to qemu's stderr */
-
-        if (!stdinnull) {
-            close(stdinfd[0]);
-            close(stdinfd[1]);
-        }
-        if (!stdoutnull) {
-            close(stdoutfd[0]);
-            close(stdoutfd[1]);
-        }
-        if (devnull != -1) {
-            close(devnull);
-        }
-
-        execv(argv[0], (char * const *)argv);
-        _exit(1);
-    }
-
-    if (!stdinnull) {
-        close(stdinfd[0]);
-    }
-    if (!stdoutnull) {
-        close(stdoutfd[1]);
-    }
-
-    ioc = qio_channel_command_new_pid(stdinnull ? devnull : stdinfd[1],
-                                      stdoutnull ? devnull : stdoutfd[0],
-                                      pid);
-    trace_qio_channel_command_new_spawn(ioc, argv[0], flags);
-    return ioc;
-
- error:
-    if (devnull != -1) {
-        close(devnull);
-    }
-    if (stdinfd[0] != -1) {
-        close(stdinfd[0]);
-    }
-    if (stdinfd[1] != -1) {
-        close(stdinfd[1]);
-    }
-    if (stdoutfd[0] != -1) {
-        close(stdoutfd[0]);
-    }
-    if (stdoutfd[1] != -1) {
-        close(stdoutfd[1]);
-    }
-    return NULL;
+    return qio_channel_command_new_pid(stdinfd, stdoutfd, pid);
 }
 
-#else /* WIN32 */
-QIOChannelCommand *
-qio_channel_command_new_spawn(const char *const argv[],
-                              int flags,
-                              Error **errp)
-{
-    error_setg_errno(errp, ENOSYS,
-                     "Command spawn not supported on this platform");
-    return NULL;
-}
-#endif /* WIN32 */
-
 #ifndef WIN32
 static int qio_channel_command_abort(QIOChannelCommand *ioc,
                                      Error **errp)
@@ -195,6 +137,23 @@ static int qio_channel_command_abort(QIOChannelCommand *ioc,
 
     return 0;
 }
+#else
+static int qio_channel_command_abort(QIOChannelCommand *ioc,
+                                     Error **errp)
+{
+    DWORD ret;
+
+    TerminateProcess(ioc->pid, 0);
+    ret = WaitForSingleObject(ioc->pid, 1000);
+    if (ret != WAIT_OBJECT_0) {
+        error_setg(errp,
+                   "Process %llu refused to die",
+                   (unsigned long long)GetProcessId(ioc->pid));
+        return -1;
+    }
+
+    return 0;
+}
 #endif /* ! WIN32 */
 
 
@@ -203,7 +162,7 @@ static void qio_channel_command_init(Object *obj)
     QIOChannelCommand *ioc = QIO_CHANNEL_COMMAND(obj);
     ioc->readfd = -1;
     ioc->writefd = -1;
-    ioc->pid = -1;
+    ioc->pid = 0;
 }
 
 static void qio_channel_command_finalize(Object *obj)
@@ -218,23 +177,45 @@ static void qio_channel_command_finalize(Object *obj)
     }
     ioc->writefd = ioc->readfd = -1;
     if (ioc->pid > 0) {
-#ifndef WIN32
         qio_channel_command_abort(ioc, NULL);
-#endif
+        g_spawn_close_pid(ioc->pid);
     }
 }
 
+#ifdef WIN32
+static bool win32_fd_poll(int fd, gushort events)
+{
+    GPollFD pfd = { .fd = _get_osfhandle(fd), .events = events };
+    int res;
+
+    do {
+        res = g_poll(&pfd, 1, 0);
+    } while (res < 0 && errno == EINTR);
+    if (res == 0) {
+        return false;
+    }
+
+    return true;
+}
+#endif
 
 static ssize_t qio_channel_command_readv(QIOChannel *ioc,
                                          const struct iovec *iov,
                                          size_t niov,
                                          int **fds,
                                          size_t *nfds,
+                                         int flags,
                                          Error **errp)
 {
     QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
     ssize_t ret;
 
+#ifdef WIN32
+    if (!cioc->blocking && !win32_fd_poll(cioc->readfd, G_IO_IN)) {
+        return QIO_CHANNEL_ERR_BLOCK;
+    }
+#endif
+
  retry:
     ret = readv(cioc->readfd, iov, niov);
     if (ret < 0) {
@@ -258,11 +239,18 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc,
                                           size_t niov,
                                           int *fds,
                                           size_t nfds,
+                                          int flags,
                                           Error **errp)
 {
     QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
     ssize_t ret;
 
+#ifdef WIN32
+    if (!cioc->blocking && !win32_fd_poll(cioc->writefd, G_IO_OUT)) {
+        return QIO_CHANNEL_ERR_BLOCK;
+    }
+#endif
+
  retry:
     ret = writev(cioc->writefd, iov, niov);
     if (ret <= 0) {
@@ -285,14 +273,16 @@ static int qio_channel_command_set_blocking(QIOChannel *ioc,
 {
     QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
 
-    if (enabled) {
-        qemu_set_block(cioc->writefd);
-        qemu_set_block(cioc->readfd);
-    } else {
-        qemu_set_nonblock(cioc->writefd);
-        qemu_set_nonblock(cioc->readfd);
-    }
+#ifdef WIN32
+    cioc->blocking = enabled;
+#else
 
+    if ((cioc->writefd >= 0 && !g_unix_set_fd_nonblocking(cioc->writefd, !enabled, NULL)) ||
+        (cioc->readfd >= 0 && !g_unix_set_fd_nonblocking(cioc->readfd, !enabled, NULL))) {
+        error_setg_errno(errp, errno, "Failed to set FD nonblocking");
+        return -1;
+    }
+#endif
     return 0;
 }
 
@@ -329,6 +319,8 @@ static int qio_channel_command_close(QIOChannel *ioc,
                          (unsigned long long)cioc->pid);
         return -1;
     }
+#else
+    WaitForSingleObject(cioc->pid, INFINITE);
 #endif
 
     if (rv < 0) {
@@ -340,14 +332,17 @@ static int qio_channel_command_close(QIOChannel *ioc,
 
 
 static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc,
-                                                   AioContext *ctx,
+                                                   AioContext *read_ctx,
                                                    IOHandler *io_read,
+                                                   AioContext *write_ctx,
                                                    IOHandler *io_write,
                                                    void *opaque)
 {
     QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
-    aio_set_fd_handler(ctx, cioc->readfd, false, io_read, NULL, NULL, opaque);
-    aio_set_fd_handler(ctx, cioc->writefd, false, NULL, io_write, NULL, opaque);
+
+    qio_channel_util_set_aio_fd_handler(cioc->readfd, read_ctx, io_read,
+                                        cioc->writefd, write_ctx, io_write,
+                                        opaque);
 }
 
 
index c4bf799a80233590bd27b9bd0d43310f1ad5cf12..4a12c618860116cbdf61c8d64db0e469f8f6a75b 100644 (file)
@@ -20,6 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "io/channel-file.h"
+#include "io/channel-util.h"
 #include "io/channel-watch.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
@@ -86,6 +87,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc,
                                       size_t niov,
                                       int **fds,
                                       size_t *nfds,
+                                      int flags,
                                       Error **errp)
 {
     QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
@@ -114,6 +116,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc,
                                        size_t niov,
                                        int *fds,
                                        size_t nfds,
+                                       int flags,
                                        Error **errp)
 {
     QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
@@ -139,14 +142,19 @@ static int qio_channel_file_set_blocking(QIOChannel *ioc,
                                          bool enabled,
                                          Error **errp)
 {
+#ifdef WIN32
+    /* not implemented */
+    error_setg_errno(errp, errno, "Failed to set FD nonblocking");
+    return -1;
+#else
     QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
 
-    if (enabled) {
-        qemu_set_block(fioc->fd);
-    } else {
-        qemu_set_nonblock(fioc->fd);
+    if (!g_unix_set_fd_nonblocking(fioc->fd, !enabled, NULL)) {
+        error_setg_errno(errp, errno, "Failed to set FD nonblocking");
+        return -1;
     }
     return 0;
+#endif
 }
 
 
@@ -185,13 +193,17 @@ static int qio_channel_file_close(QIOChannel *ioc,
 
 
 static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc,
-                                                AioContext *ctx,
+                                                AioContext *read_ctx,
                                                 IOHandler *io_read,
+                                                AioContext *write_ctx,
                                                 IOHandler *io_write,
                                                 void *opaque)
 {
     QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
-    aio_set_fd_handler(ctx, fioc->fd, false, io_read, io_write, NULL, opaque);
+
+    qio_channel_util_set_aio_fd_handler(fioc->fd, read_ctx, io_read,
+                                        fioc->fd, write_ctx, io_write,
+                                        opaque);
 }
 
 static GSource *qio_channel_file_create_watch(QIOChannel *ioc,
diff --git a/io/channel-null.c b/io/channel-null.c
new file mode 100644 (file)
index 0000000..ef99586
--- /dev/null
@@ -0,0 +1,239 @@
+/*
+ * QEMU I/O channels null driver
+ *
+ * Copyright (c) 2022 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "io/channel-null.h"
+#include "io/channel-watch.h"
+#include "qapi/error.h"
+#include "trace.h"
+#include "qemu/iov.h"
+
+typedef struct QIOChannelNullSource QIOChannelNullSource;
+struct QIOChannelNullSource {
+    GSource parent;
+    QIOChannel *ioc;
+    GIOCondition condition;
+};
+
+
+QIOChannelNull *
+qio_channel_null_new(void)
+{
+    QIOChannelNull *ioc;
+
+    ioc = QIO_CHANNEL_NULL(object_new(TYPE_QIO_CHANNEL_NULL));
+
+    trace_qio_channel_null_new(ioc);
+
+    return ioc;
+}
+
+
+static void
+qio_channel_null_init(Object *obj)
+{
+    QIOChannelNull *ioc = QIO_CHANNEL_NULL(obj);
+    ioc->closed = false;
+}
+
+
+static ssize_t
+qio_channel_null_readv(QIOChannel *ioc,
+                       const struct iovec *iov,
+                       size_t niov,
+                       int **fds G_GNUC_UNUSED,
+                       size_t *nfds G_GNUC_UNUSED,
+                       int flags,
+                       Error **errp)
+{
+    QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc);
+
+    if (nioc->closed) {
+        error_setg_errno(errp, EINVAL,
+                         "Channel is closed");
+        return -1;
+    }
+
+    return 0;
+}
+
+
+static ssize_t
+qio_channel_null_writev(QIOChannel *ioc,
+                        const struct iovec *iov,
+                        size_t niov,
+                        int *fds G_GNUC_UNUSED,
+                        size_t nfds G_GNUC_UNUSED,
+                        int flags G_GNUC_UNUSED,
+                        Error **errp)
+{
+    QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc);
+
+    if (nioc->closed) {
+        error_setg_errno(errp, EINVAL,
+                         "Channel is closed");
+        return -1;
+    }
+
+    return iov_size(iov, niov);
+}
+
+
+static int
+qio_channel_null_set_blocking(QIOChannel *ioc G_GNUC_UNUSED,
+                              bool enabled G_GNUC_UNUSED,
+                              Error **errp G_GNUC_UNUSED)
+{
+    return 0;
+}
+
+
+static off_t
+qio_channel_null_seek(QIOChannel *ioc G_GNUC_UNUSED,
+                      off_t offset G_GNUC_UNUSED,
+                      int whence G_GNUC_UNUSED,
+                      Error **errp G_GNUC_UNUSED)
+{
+    return 0;
+}
+
+
+static int
+qio_channel_null_close(QIOChannel *ioc,
+                       Error **errp G_GNUC_UNUSED)
+{
+    QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc);
+
+    nioc->closed = true;
+    return 0;
+}
+
+
+static void
+qio_channel_null_set_aio_fd_handler(QIOChannel *ioc G_GNUC_UNUSED,
+                                    AioContext *read_ctx G_GNUC_UNUSED,
+                                    IOHandler *io_read G_GNUC_UNUSED,
+                                    AioContext *write_ctx G_GNUC_UNUSED,
+                                    IOHandler *io_write G_GNUC_UNUSED,
+                                    void *opaque G_GNUC_UNUSED)
+{
+}
+
+
+static gboolean
+qio_channel_null_source_prepare(GSource *source G_GNUC_UNUSED,
+                                gint *timeout)
+{
+    *timeout = -1;
+
+    return TRUE;
+}
+
+
+static gboolean
+qio_channel_null_source_check(GSource *source G_GNUC_UNUSED)
+{
+    return TRUE;
+}
+
+
+static gboolean
+qio_channel_null_source_dispatch(GSource *source,
+                                 GSourceFunc callback,
+                                 gpointer user_data)
+{
+    QIOChannelFunc func = (QIOChannelFunc)callback;
+    QIOChannelNullSource *ssource = (QIOChannelNullSource *)source;
+
+    return (*func)(ssource->ioc,
+                   ssource->condition,
+                   user_data);
+}
+
+
+static void
+qio_channel_null_source_finalize(GSource *source)
+{
+    QIOChannelNullSource *ssource = (QIOChannelNullSource *)source;
+
+    object_unref(OBJECT(ssource->ioc));
+}
+
+
+GSourceFuncs qio_channel_null_source_funcs = {
+    qio_channel_null_source_prepare,
+    qio_channel_null_source_check,
+    qio_channel_null_source_dispatch,
+    qio_channel_null_source_finalize
+};
+
+
+static GSource *
+qio_channel_null_create_watch(QIOChannel *ioc,
+                              GIOCondition condition)
+{
+    GSource *source;
+    QIOChannelNullSource *ssource;
+
+    source = g_source_new(&qio_channel_null_source_funcs,
+                          sizeof(QIOChannelNullSource));
+    ssource = (QIOChannelNullSource *)source;
+
+    ssource->ioc = ioc;
+    object_ref(OBJECT(ioc));
+
+    ssource->condition = condition;
+
+    return source;
+}
+
+
+static void
+qio_channel_null_class_init(ObjectClass *klass,
+                            void *class_data G_GNUC_UNUSED)
+{
+    QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
+
+    ioc_klass->io_writev = qio_channel_null_writev;
+    ioc_klass->io_readv = qio_channel_null_readv;
+    ioc_klass->io_set_blocking = qio_channel_null_set_blocking;
+    ioc_klass->io_seek = qio_channel_null_seek;
+    ioc_klass->io_close = qio_channel_null_close;
+    ioc_klass->io_create_watch = qio_channel_null_create_watch;
+    ioc_klass->io_set_aio_fd_handler = qio_channel_null_set_aio_fd_handler;
+}
+
+
+static const TypeInfo qio_channel_null_info = {
+    .parent = TYPE_QIO_CHANNEL,
+    .name = TYPE_QIO_CHANNEL_NULL,
+    .instance_size = sizeof(QIOChannelNull),
+    .instance_init = qio_channel_null_init,
+    .class_init = qio_channel_null_class_init,
+};
+
+
+static void
+qio_channel_null_register_types(void)
+{
+    type_register_static(&qio_channel_null_info);
+}
+
+type_init(qio_channel_null_register_types);
index de259f7eed2752c92ec0b1b77e90896e8ea314e4..3a899b060858de9a12a6692098b2ff09fe1b3352 100644 (file)
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-sockets.h"
 #include "qemu/module.h"
 #include "io/channel-socket.h"
+#include "io/channel-util.h"
 #include "io/channel-watch.h"
 #include "trace.h"
 #include "qapi/clone-visitor.h"
+#ifdef CONFIG_LINUX
+#include <linux/errqueue.h>
+#include <sys/socket.h>
+
+#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY))
+#define QEMU_MSG_ZEROCOPY
+#endif
+#endif
 
 #define SOCKET_MAX_FDS 16
 
@@ -55,6 +63,8 @@ qio_channel_socket_new(void)
 
     sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
     sioc->fd = -1;
+    sioc->zero_copy_queued = 0;
+    sioc->zero_copy_sent = 0;
 
     ioc = QIO_CHANNEL(sioc);
     qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
@@ -154,6 +164,19 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
         return -1;
     }
 
+#ifdef QEMU_MSG_ZEROCOPY
+    int ret, v = 1;
+    ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v));
+    if (ret == 0) {
+        /* Zero copy available on host */
+        qio_channel_set_feature(QIO_CHANNEL(ioc),
+                                QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY);
+    }
+#endif
+
+    qio_channel_set_feature(QIO_CHANNEL(ioc),
+                            QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
+
     return 0;
 }
 
@@ -387,6 +410,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
     }
 #endif /* WIN32 */
 
+    qio_channel_set_feature(QIO_CHANNEL(cioc),
+                            QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
+
     trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
     return cioc;
 
@@ -417,9 +443,9 @@ static void qio_channel_socket_finalize(Object *obj)
             }
         }
 #ifdef WIN32
-        WSAEventSelect(ioc->fd, NULL, 0);
+        qemu_socket_unselect(ioc->fd, NULL);
 #endif
-        closesocket(ioc->fd);
+        close(ioc->fd);
         ioc->fd = -1;
     }
 }
@@ -461,7 +487,7 @@ static void qio_channel_socket_copy_fds(struct msghdr *msg,
             }
 
             /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
-            qemu_set_block(fd);
+            qemu_socket_set_block(fd);
 
 #ifndef MSG_CMSG_CLOEXEC
             qemu_set_cloexec(fd);
@@ -477,6 +503,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
                                         size_t niov,
                                         int **fds,
                                         size_t *nfds,
+                                        int flags,
                                         Error **errp)
 {
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
@@ -487,15 +514,19 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
 
     memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
 
-#ifdef MSG_CMSG_CLOEXEC
-    sflags |= MSG_CMSG_CLOEXEC;
-#endif
-
     msg.msg_iov = (struct iovec *)iov;
     msg.msg_iovlen = niov;
     if (fds && nfds) {
         msg.msg_control = control;
         msg.msg_controllen = sizeof(control);
+#ifdef MSG_CMSG_CLOEXEC
+        sflags |= MSG_CMSG_CLOEXEC;
+#endif
+
+    }
+
+    if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
+        sflags |= MSG_PEEK;
     }
 
  retry:
@@ -525,6 +556,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
                                          size_t niov,
                                          int *fds,
                                          size_t nfds,
+                                         int flags,
                                          Error **errp)
 {
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
@@ -533,6 +565,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
     char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
     size_t fdsize = sizeof(int) * nfds;
     struct cmsghdr *cmsg;
+    int sflags = 0;
 
     memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
 
@@ -557,19 +590,44 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
         memcpy(CMSG_DATA(cmsg), fds, fdsize);
     }
 
+    if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+#ifdef QEMU_MSG_ZEROCOPY
+        sflags = MSG_ZEROCOPY;
+#else
+        /*
+         * We expect QIOChannel class entry point to have
+         * blocked this code path already
+         */
+        g_assert_not_reached();
+#endif
+    }
+
  retry:
-    ret = sendmsg(sioc->fd, &msg, 0);
+    ret = sendmsg(sioc->fd, &msg, sflags);
     if (ret <= 0) {
-        if (errno == EAGAIN) {
+        switch (errno) {
+        case EAGAIN:
             return QIO_CHANNEL_ERR_BLOCK;
-        }
-        if (errno == EINTR) {
+        case EINTR:
             goto retry;
+        case ENOBUFS:
+            if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+                error_setg_errno(errp, errno,
+                                 "Process can't lock enough memory for using MSG_ZEROCOPY");
+                return -1;
+            }
+            break;
         }
+
         error_setg_errno(errp, errno,
                          "Unable to write to socket");
         return -1;
     }
+
+    if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+        sioc->zero_copy_queued++;
+    }
+
     return ret;
 }
 #else /* WIN32 */
@@ -578,11 +636,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
                                         size_t niov,
                                         int **fds,
                                         size_t *nfds,
+                                        int flags,
                                         Error **errp)
 {
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
     ssize_t done = 0;
     ssize_t i;
+    int sflags = 0;
+
+    if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
+        sflags |= MSG_PEEK;
+    }
 
     for (i = 0; i < niov; i++) {
         ssize_t ret;
@@ -590,7 +654,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
         ret = recv(sioc->fd,
                    iov[i].iov_base,
                    iov[i].iov_len,
-                   0);
+                   sflags);
         if (ret < 0) {
             if (errno == EAGAIN) {
                 if (done) {
@@ -620,6 +684,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
                                          size_t niov,
                                          int *fds,
                                          size_t nfds,
+                                         int flags,
                                          Error **errp)
 {
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
@@ -658,6 +723,85 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 }
 #endif /* WIN32 */
 
+
+#ifdef QEMU_MSG_ZEROCOPY
+static int qio_channel_socket_flush(QIOChannel *ioc,
+                                    Error **errp)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+    struct msghdr msg = {};
+    struct sock_extended_err *serr;
+    struct cmsghdr *cm;
+    char control[CMSG_SPACE(sizeof(*serr))];
+    int received;
+    int ret;
+
+    if (sioc->zero_copy_queued == sioc->zero_copy_sent) {
+        return 0;
+    }
+
+    msg.msg_control = control;
+    msg.msg_controllen = sizeof(control);
+    memset(control, 0, sizeof(control));
+
+    ret = 1;
+
+    while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
+        received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
+        if (received < 0) {
+            switch (errno) {
+            case EAGAIN:
+                /* Nothing on errqueue, wait until something is available */
+                qio_channel_wait(ioc, G_IO_ERR);
+                continue;
+            case EINTR:
+                continue;
+            default:
+                error_setg_errno(errp, errno,
+                                 "Unable to read errqueue");
+                return -1;
+            }
+        }
+
+        cm = CMSG_FIRSTHDR(&msg);
+        if (cm->cmsg_level != SOL_IP   && cm->cmsg_type != IP_RECVERR &&
+            cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) {
+            error_setg_errno(errp, EPROTOTYPE,
+                             "Wrong cmsg in errqueue");
+            return -1;
+        }
+
+        serr = (void *) CMSG_DATA(cm);
+        if (serr->ee_errno != SO_EE_ORIGIN_NONE) {
+            error_setg_errno(errp, serr->ee_errno,
+                             "Error on socket");
+            return -1;
+        }
+        if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
+            error_setg_errno(errp, serr->ee_origin,
+                             "Error not from zero copy");
+            return -1;
+        }
+        if (serr->ee_data < serr->ee_info) {
+            error_setg_errno(errp, serr->ee_origin,
+                             "Wrong notification bounds");
+            return -1;
+        }
+
+        /* No errors, count successfully finished sendmsg()*/
+        sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;
+
+        /* If any sendmsg() succeeded using zero copy, return 0 at the end */
+        if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) {
+            ret = 0;
+        }
+    }
+
+    return ret;
+}
+
+#endif /* QEMU_MSG_ZEROCOPY */
+
 static int
 qio_channel_socket_set_blocking(QIOChannel *ioc,
                                 bool enabled,
@@ -666,9 +810,9 @@ qio_channel_socket_set_blocking(QIOChannel *ioc,
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
 
     if (enabled) {
-        qemu_set_block(sioc->fd);
+        qemu_socket_set_block(sioc->fd);
     } else {
-        qemu_set_nonblock(sioc->fd);
+        qemu_socket_set_nonblock(sioc->fd);
     }
     return 0;
 }
@@ -681,9 +825,9 @@ qio_channel_socket_set_delay(QIOChannel *ioc,
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
     int v = enabled ? 0 : 1;
 
-    qemu_setsockopt(sioc->fd,
-                    IPPROTO_TCP, TCP_NODELAY,
-                    &v, sizeof(v));
+    setsockopt(sioc->fd,
+               IPPROTO_TCP, TCP_NODELAY,
+               &v, sizeof(v));
 }
 
 
@@ -708,13 +852,13 @@ qio_channel_socket_close(QIOChannel *ioc,
 
     if (sioc->fd != -1) {
 #ifdef WIN32
-        WSAEventSelect(sioc->fd, NULL, 0);
+        qemu_socket_unselect(sioc->fd, NULL);
 #endif
         if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_LISTEN)) {
             socket_listen_cleanup(sioc->fd, errp);
         }
 
-        if (closesocket(sioc->fd) < 0) {
+        if (close(sioc->fd) < 0) {
             sioc->fd = -1;
             error_setg_errno(&err, errno, "Unable to close socket");
             error_propagate(errp, err);
@@ -755,13 +899,17 @@ qio_channel_socket_shutdown(QIOChannel *ioc,
 }
 
 static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc,
-                                                  AioContext *ctx,
+                                                  AioContext *read_ctx,
                                                   IOHandler *io_read,
+                                                  AioContext *write_ctx,
                                                   IOHandler *io_write,
                                                   void *opaque)
 {
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
-    aio_set_fd_handler(ctx, sioc->fd, false, io_read, io_write, NULL, opaque);
+
+    qio_channel_util_set_aio_fd_handler(sioc->fd, read_ctx, io_read,
+                                        sioc->fd, write_ctx, io_write,
+                                        opaque);
 }
 
 static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
@@ -787,6 +935,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
     ioc_klass->io_set_delay = qio_channel_socket_set_delay;
     ioc_klass->io_create_watch = qio_channel_socket_create_watch;
     ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
+#ifdef QEMU_MSG_ZEROCOPY
+    ioc_klass->io_flush = qio_channel_socket_flush;
+#endif
 }
 
 static const TypeInfo qio_channel_socket_info = {
index 388f019977254ebb487fac109595894dd499ddcb..58fe1aceeeafd473dfb3fde267f18eccee6bdb65 100644 (file)
@@ -23,6 +23,7 @@
 #include "qemu/module.h"
 #include "io/channel-tls.h"
 #include "trace.h"
+#include "qemu/atomic.h"
 
 
 static ssize_t qio_channel_tls_write_handler(const char *buf,
@@ -73,6 +74,9 @@ qio_channel_tls_new_server(QIOChannel *master,
     ioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS));
 
     ioc->master = master;
+    if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) {
+        qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_SHUTDOWN);
+    }
     object_ref(OBJECT(master));
 
     ioc->session = qcrypto_tls_session_new(
@@ -194,12 +198,13 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc,
         }
 
         trace_qio_channel_tls_handshake_pending(ioc, status);
-        qio_channel_add_watch_full(ioc->master,
-                                   condition,
-                                   qio_channel_tls_handshake_io,
-                                   data,
-                                   NULL,
-                                   context);
+        ioc->hs_ioc_tag =
+            qio_channel_add_watch_full(ioc->master,
+                                       condition,
+                                       qio_channel_tls_handshake_io,
+                                       data,
+                                       NULL,
+                                       context);
     }
 }
 
@@ -214,6 +219,7 @@ static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc,
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(
         qio_task_get_source(task));
 
+    tioc->hs_ioc_tag = 0;
     g_free(data);
     qio_channel_tls_handshake_task(tioc, task, context);
 
@@ -259,6 +265,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
                                      size_t niov,
                                      int **fds,
                                      size_t *nfds,
+                                     int flags,
                                      Error **errp)
 {
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
@@ -277,7 +284,8 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
                     return QIO_CHANNEL_ERR_BLOCK;
                 }
             } else if (errno == ECONNABORTED &&
-                       (tioc->shutdown & QIO_CHANNEL_SHUTDOWN_READ)) {
+                       (qatomic_load_acquire(&tioc->shutdown) &
+                        QIO_CHANNEL_SHUTDOWN_READ)) {
                 return 0;
             }
 
@@ -299,6 +307,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc,
                                       size_t niov,
                                       int *fds,
                                       size_t nfds,
+                                      int flags,
                                       Error **errp)
 {
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
@@ -361,7 +370,7 @@ static int qio_channel_tls_shutdown(QIOChannel *ioc,
 {
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
 
-    tioc->shutdown |= how;
+    qatomic_or(&tioc->shutdown, how);
 
     return qio_channel_shutdown(tioc->master, how, errp);
 }
@@ -371,26 +380,97 @@ static int qio_channel_tls_close(QIOChannel *ioc,
 {
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
 
+    if (tioc->hs_ioc_tag) {
+        g_clear_handle_id(&tioc->hs_ioc_tag, g_source_remove);
+    }
+
     return qio_channel_close(tioc->master, errp);
 }
 
 static void qio_channel_tls_set_aio_fd_handler(QIOChannel *ioc,
-                                               AioContext *ctx,
+                                               AioContext *read_ctx,
                                                IOHandler *io_read,
+                                               AioContext *write_ctx,
                                                IOHandler *io_write,
                                                void *opaque)
 {
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
 
-    qio_channel_set_aio_fd_handler(tioc->master, ctx, io_read, io_write, opaque);
+    qio_channel_set_aio_fd_handler(tioc->master, read_ctx, io_read,
+            write_ctx, io_write, opaque);
+}
+
+typedef struct QIOChannelTLSSource QIOChannelTLSSource;
+struct QIOChannelTLSSource {
+    GSource parent;
+    QIOChannelTLS *tioc;
+};
+
+static gboolean
+qio_channel_tls_source_check(GSource *source)
+{
+    QIOChannelTLSSource *tsource = (QIOChannelTLSSource *)source;
+
+    return qcrypto_tls_session_check_pending(tsource->tioc->session) > 0;
+}
+
+static gboolean
+qio_channel_tls_source_prepare(GSource *source, gint *timeout)
+{
+    *timeout = -1;
+    return qio_channel_tls_source_check(source);
+}
+
+static gboolean
+qio_channel_tls_source_dispatch(GSource *source, GSourceFunc callback,
+                                gpointer user_data)
+{
+    return G_SOURCE_CONTINUE;
+}
+
+static void
+qio_channel_tls_source_finalize(GSource *source)
+{
+    QIOChannelTLSSource *tsource = (QIOChannelTLSSource *)source;
+
+    object_unref(OBJECT(tsource->tioc));
+}
+
+static GSourceFuncs qio_channel_tls_source_funcs = {
+    qio_channel_tls_source_prepare,
+    qio_channel_tls_source_check,
+    qio_channel_tls_source_dispatch,
+    qio_channel_tls_source_finalize
+};
+
+static void
+qio_channel_tls_read_watch(QIOChannelTLS *tioc, GSource *source)
+{
+    GSource *child;
+    QIOChannelTLSSource *tlssource;
+
+    child = g_source_new(&qio_channel_tls_source_funcs,
+                          sizeof(QIOChannelTLSSource));
+    tlssource = (QIOChannelTLSSource *)child;
+
+    tlssource->tioc = tioc;
+    object_ref(OBJECT(tioc));
+
+    g_source_add_child_source(source, child);
+    g_source_unref(child);
 }
 
 static GSource *qio_channel_tls_create_watch(QIOChannel *ioc,
                                              GIOCondition condition)
 {
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
+    GSource *source = qio_channel_create_watch(tioc->master, condition);
+
+    if (condition & G_IO_IN) {
+        qio_channel_tls_read_watch(tioc, source);
+    }
 
-    return qio_channel_create_watch(tioc->master, condition);
+    return source;
 }
 
 QCryptoTLSSession *
index 848a7a43d671daa6d7257e6fa525ea55a06ebc09..4b340d46d760f245c2622e6c93f95867f449f47a 100644 (file)
@@ -36,3 +36,27 @@ QIOChannel *qio_channel_new_fd(int fd,
     }
     return ioc;
 }
+
+
+void qio_channel_util_set_aio_fd_handler(int read_fd,
+                                         AioContext *read_ctx,
+                                         IOHandler *io_read,
+                                         int write_fd,
+                                         AioContext *write_ctx,
+                                         IOHandler *io_write,
+                                         void *opaque)
+{
+    if (read_fd == write_fd && read_ctx == write_ctx) {
+        aio_set_fd_handler(read_ctx, read_fd, io_read, io_write,
+                NULL, NULL, opaque);
+    } else {
+        if (read_ctx) {
+            aio_set_fd_handler(read_ctx, read_fd, io_read, NULL,
+                    NULL, NULL, opaque);
+        }
+        if (write_ctx) {
+            aio_set_fd_handler(write_ctx, write_fd, NULL, io_write,
+                    NULL, NULL, opaque);
+        }
+    }
+}
index 0289b3647c278077fdfe6e0c99a0d11eeb5ac3ee..64b486e378184b6834f9ec70d050a507757fd50c 100644 (file)
@@ -115,28 +115,24 @@ static gboolean
 qio_channel_socket_source_check(GSource *source)
 {
     static struct timeval tv0;
-
     QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source;
-    WSANETWORKEVENTS ev;
     fd_set rfds, wfds, xfds;
 
     if (!ssource->condition) {
         return 0;
     }
 
-    WSAEnumNetworkEvents(ssource->socket, ssource->ioc->event, &ev);
-
     FD_ZERO(&rfds);
     FD_ZERO(&wfds);
     FD_ZERO(&xfds);
     if (ssource->condition & G_IO_IN) {
-        FD_SET((SOCKET)ssource->socket, &rfds);
+        FD_SET(ssource->socket, &rfds);
     }
     if (ssource->condition & G_IO_OUT) {
-        FD_SET((SOCKET)ssource->socket, &wfds);
+        FD_SET(ssource->socket, &wfds);
     }
     if (ssource->condition & G_IO_PRI) {
-        FD_SET((SOCKET)ssource->socket, &xfds);
+        FD_SET(ssource->socket, &xfds);
     }
     ssource->revents = 0;
     if (select(0, &rfds, &wfds, &xfds, &tv0) == 0) {
@@ -279,17 +275,15 @@ GSource *qio_channel_create_fd_watch(QIOChannel *ioc,
 
 #ifdef CONFIG_WIN32
 GSource *qio_channel_create_socket_watch(QIOChannel *ioc,
-                                         int socket,
+                                         int sockfd,
                                          GIOCondition condition)
 {
     GSource *source;
     QIOChannelSocketSource *ssource;
 
-#ifdef WIN32
-    WSAEventSelect(socket, ioc->event,
-                   FD_READ | FD_ACCEPT | FD_CLOSE |
-                   FD_CONNECT | FD_WRITE | FD_OOB);
-#endif
+    qemu_socket_select(sockfd, ioc->event,
+                       FD_READ | FD_ACCEPT | FD_CLOSE |
+                       FD_CONNECT | FD_WRITE | FD_OOB, NULL);
 
     source = g_source_new(&qio_channel_socket_source_funcs,
                           sizeof(QIOChannelSocketSource));
@@ -299,7 +293,7 @@ GSource *qio_channel_create_socket_watch(QIOChannel *ioc,
     object_ref(OBJECT(ioc));
 
     ssource->condition = condition;
-    ssource->socket = socket;
+    ssource->socket = _get_osfhandle(sockfd);
     ssource->revents = 0;
 
     ssource->fd.fd = (gintptr)ioc->event;
index 03c1f7cb62f77af2b0bef66207db5441cf9368c3..a12acc27cf25d312564c316469949482cb7020d6 100644 (file)
@@ -32,7 +32,7 @@
 
 #define QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN 24
 #define QIO_CHANNEL_WEBSOCK_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
-#define QIO_CHANNEL_WEBSOCK_GUID_LEN strlen(QIO_CHANNEL_WEBSOCK_GUID)
+#define QIO_CHANNEL_WEBSOCK_GUID_LEN (sizeof(QIO_CHANNEL_WEBSOCK_GUID) - 1)
 
 #define QIO_CHANNEL_WEBSOCK_HEADER_PROTOCOL "sec-websocket-protocol"
 #define QIO_CHANNEL_WEBSOCK_HEADER_VERSION "sec-websocket-version"
@@ -157,7 +157,7 @@ enum {
     QIO_CHANNEL_WEBSOCK_OPCODE_PONG = 0xA
 };
 
-static void GCC_FMT_ATTR(2, 3)
+static void G_GNUC_PRINTF(2, 3)
 qio_channel_websock_handshake_send_res(QIOChannelWebsock *ioc,
                                        const char *resmsg,
                                        ...)
@@ -177,15 +177,9 @@ qio_channel_websock_handshake_send_res(QIOChannelWebsock *ioc,
 
 static gchar *qio_channel_websock_date_str(void)
 {
-    struct tm tm;
-    time_t now = time(NULL);
-    char datebuf[128];
+    g_autoptr(GDateTime) now = g_date_time_new_now_utc();
 
-    gmtime_r(&now, &tm);
-
-    strftime(datebuf, sizeof(datebuf), "%a, %d %b %Y %H:%M:%S GMT", &tm);
-
-    return g_strdup(datebuf);
+    return g_date_time_format(now, "%a, %d %b %Y %H:%M:%S GMT");
 }
 
 static void qio_channel_websock_handshake_send_res_err(QIOChannelWebsock *ioc,
@@ -1087,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc,
                                          size_t niov,
                                          int **fds,
                                          size_t *nfds,
+                                         int flags,
                                          Error **errp)
 {
     QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
@@ -1133,6 +1128,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,
                                           size_t niov,
                                           int *fds,
                                           size_t nfds,
+                                          int flags,
                                           Error **errp)
 {
     QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
index 93d449dee261b9674493b6350d1583a46d87023a..86c5834510ff1af08c5756257a71df11c04756bd 100644 (file)
@@ -19,6 +19,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "block/aio-wait.h"
 #include "io/channel.h"
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
@@ -52,6 +53,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
                                size_t niov,
                                int **fds,
                                size_t *nfds,
+                               int flags,
                                Error **errp)
 {
     QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
@@ -63,7 +65,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
         return -1;
     }
 
-    return klass->io_readv(ioc, iov, niov, fds, nfds, errp);
+    if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) &&
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+        error_setg_errno(errp, EINVAL,
+                         "Channel does not support peek read");
+        return -1;
+    }
+
+    return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp);
 }
 
 
@@ -72,39 +81,81 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
                                 size_t niov,
                                 int *fds,
                                 size_t nfds,
+                                int flags,
                                 Error **errp)
 {
     QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
 
-    if ((fds || nfds) &&
-        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
+    if (fds || nfds) {
+        if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
+            error_setg_errno(errp, EINVAL,
+                             "Channel does not support file descriptor passing");
+            return -1;
+        }
+        if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+            error_setg_errno(errp, EINVAL,
+                             "Zero Copy does not support file descriptor passing");
+            return -1;
+        }
+    }
+
+    if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) &&
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
         error_setg_errno(errp, EINVAL,
-                         "Channel does not support file descriptor passing");
+                         "Requested Zero Copy feature is not available");
         return -1;
     }
 
-    return klass->io_writev(ioc, iov, niov, fds, nfds, errp);
+    return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp);
 }
 
 
-int qio_channel_readv_all_eof(QIOChannel *ioc,
-                              const struct iovec *iov,
-                              size_t niov,
-                              Error **errp)
+int coroutine_mixed_fn qio_channel_readv_all_eof(QIOChannel *ioc,
+                                                 const struct iovec *iov,
+                                                 size_t niov,
+                                                 Error **errp)
+{
+    return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
+}
+
+int coroutine_mixed_fn qio_channel_readv_all(QIOChannel *ioc,
+                                             const struct iovec *iov,
+                                             size_t niov,
+                                             Error **errp)
+{
+    return qio_channel_readv_full_all(ioc, iov, niov, NULL, NULL, errp);
+}
+
+int coroutine_mixed_fn qio_channel_readv_full_all_eof(QIOChannel *ioc,
+                                                      const struct iovec *iov,
+                                                      size_t niov,
+                                                      int **fds, size_t *nfds,
+                                                      Error **errp)
 {
     int ret = -1;
     struct iovec *local_iov = g_new(struct iovec, niov);
     struct iovec *local_iov_head = local_iov;
     unsigned int nlocal_iov = niov;
+    int **local_fds = fds;
+    size_t *local_nfds = nfds;
     bool partial = false;
 
+    if (nfds) {
+        *nfds = 0;
+    }
+
+    if (fds) {
+        *fds = NULL;
+    }
+
     nlocal_iov = iov_copy(local_iov, nlocal_iov,
                           iov, niov,
                           0, iov_size(iov, niov));
 
-    while (nlocal_iov > 0) {
+    while ((nlocal_iov > 0) || local_fds) {
         ssize_t len;
-        len = qio_channel_readv(ioc, local_iov, nlocal_iov, errp);
+        len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
+                                     local_nfds, 0, errp);
         if (len == QIO_CHANNEL_ERR_BLOCK) {
             if (qemu_in_coroutine()) {
                 qio_channel_yield(ioc, G_IO_IN);
@@ -112,20 +163,50 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
                 qio_channel_wait(ioc, G_IO_IN);
             }
             continue;
-        } else if (len < 0) {
-            goto cleanup;
-        } else if (len == 0) {
-            if (partial) {
-                error_setg(errp,
-                           "Unexpected end-of-file before all bytes were read");
-            } else {
+        }
+
+        if (len == 0) {
+            if (local_nfds && *local_nfds) {
+                /*
+                 * Got some FDs, but no data yet. This isn't an EOF
+                 * scenario (yet), so carry on to try to read data
+                 * on next loop iteration
+                 */
+                goto next_iter;
+            } else if (!partial) {
+                /* No fds and no data - EOF before any data read */
                 ret = 0;
+                goto cleanup;
+            } else {
+                len = -1;
+                error_setg(errp,
+                           "Unexpected end-of-file before all data were read");
+                /* Fallthrough into len < 0 handling */
+            }
+        }
+
+        if (len < 0) {
+            /* Close any FDs we previously received */
+            if (nfds && fds) {
+                size_t i;
+                for (i = 0; i < (*nfds); i++) {
+                    close((*fds)[i]);
+                }
+                g_free(*fds);
+                *fds = NULL;
+                *nfds = 0;
             }
             goto cleanup;
         }
 
+        if (nlocal_iov) {
+            iov_discard_front(&local_iov, &nlocal_iov, len);
+        }
+
+next_iter:
         partial = true;
-        iov_discard_front(&local_iov, &nlocal_iov, len);
+        local_fds = NULL;
+        local_nfds = NULL;
     }
 
     ret = 1;
@@ -135,27 +216,38 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
     return ret;
 }
 
-int qio_channel_readv_all(QIOChannel *ioc,
-                          const struct iovec *iov,
-                          size_t niov,
-                          Error **errp)
+int coroutine_mixed_fn qio_channel_readv_full_all(QIOChannel *ioc,
+                                                  const struct iovec *iov,
+                                                  size_t niov,
+                                                  int **fds, size_t *nfds,
+                                                  Error **errp)
 {
-    int ret = qio_channel_readv_all_eof(ioc, iov, niov, errp);
+    int ret = qio_channel_readv_full_all_eof(ioc, iov, niov, fds, nfds, errp);
 
     if (ret == 0) {
-        ret = -1;
-        error_setg(errp,
-                   "Unexpected end-of-file before all bytes were read");
-    } else if (ret == 1) {
-        ret = 0;
+        error_setg(errp, "Unexpected end-of-file before all data were read");
+        return -1;
+    }
+    if (ret == 1) {
+        return 0;
     }
+
     return ret;
 }
 
-int qio_channel_writev_all(QIOChannel *ioc,
-                           const struct iovec *iov,
-                           size_t niov,
-                           Error **errp)
+int coroutine_mixed_fn qio_channel_writev_all(QIOChannel *ioc,
+                                              const struct iovec *iov,
+                                              size_t niov,
+                                              Error **errp)
+{
+    return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp);
+}
+
+int coroutine_mixed_fn qio_channel_writev_full_all(QIOChannel *ioc,
+                                                   const struct iovec *iov,
+                                                   size_t niov,
+                                                   int *fds, size_t nfds,
+                                                   int flags, Error **errp)
 {
     int ret = -1;
     struct iovec *local_iov = g_new(struct iovec, niov);
@@ -168,7 +260,10 @@ int qio_channel_writev_all(QIOChannel *ioc,
 
     while (nlocal_iov > 0) {
         ssize_t len;
-        len = qio_channel_writev(ioc, local_iov, nlocal_iov, errp);
+
+        len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds,
+                                            nfds, flags, errp);
+
         if (len == QIO_CHANNEL_ERR_BLOCK) {
             if (qemu_in_coroutine()) {
                 qio_channel_yield(ioc, G_IO_OUT);
@@ -182,6 +277,9 @@ int qio_channel_writev_all(QIOChannel *ioc,
         }
 
         iov_discard_front(&local_iov, &nlocal_iov, len);
+
+        fds = NULL;
+        nfds = 0;
     }
 
     ret = 0;
@@ -195,7 +293,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc,
                           size_t niov,
                           Error **errp)
 {
-    return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp);
+    return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp);
 }
 
 
@@ -204,7 +302,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc,
                            size_t niov,
                            Error **errp)
 {
-    return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp);
+    return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp);
 }
 
 
@@ -214,7 +312,7 @@ ssize_t qio_channel_read(QIOChannel *ioc,
                          Error **errp)
 {
     struct iovec iov = { .iov_base = buf, .iov_len = buflen };
-    return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp);
+    return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp);
 }
 
 
@@ -224,34 +322,34 @@ ssize_t qio_channel_write(QIOChannel *ioc,
                           Error **errp)
 {
     struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
-    return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp);
+    return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp);
 }
 
 
-int qio_channel_read_all_eof(QIOChannel *ioc,
-                             char *buf,
-                             size_t buflen,
-                             Error **errp)
+int coroutine_mixed_fn qio_channel_read_all_eof(QIOChannel *ioc,
+                                                char *buf,
+                                                size_t buflen,
+                                                Error **errp)
 {
     struct iovec iov = { .iov_base = buf, .iov_len = buflen };
     return qio_channel_readv_all_eof(ioc, &iov, 1, errp);
 }
 
 
-int qio_channel_read_all(QIOChannel *ioc,
-                         char *buf,
-                         size_t buflen,
-                         Error **errp)
+int coroutine_mixed_fn qio_channel_read_all(QIOChannel *ioc,
+                                            char *buf,
+                                            size_t buflen,
+                                            Error **errp)
 {
     struct iovec iov = { .iov_base = buf, .iov_len = buflen };
     return qio_channel_readv_all(ioc, &iov, 1, errp);
 }
 
 
-int qio_channel_write_all(QIOChannel *ioc,
-                          const char *buf,
-                          size_t buflen,
-                          Error **errp)
+int coroutine_mixed_fn qio_channel_write_all(QIOChannel *ioc,
+                                             const char *buf,
+                                             size_t buflen,
+                                             Error **errp)
 {
     struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
     return qio_channel_writev_all(ioc, &iov, 1, errp);
@@ -267,6 +365,12 @@ int qio_channel_set_blocking(QIOChannel *ioc,
 }
 
 
+void qio_channel_set_follow_coroutine_ctx(QIOChannel *ioc, bool enabled)
+{
+    ioc->follow_coroutine_ctx = enabled;
+}
+
+
 int qio_channel_close(QIOChannel *ioc,
                       Error **errp)
 {
@@ -290,14 +394,16 @@ GSource *qio_channel_create_watch(QIOChannel *ioc,
 
 
 void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
-                                    AioContext *ctx,
+                                    AioContext *read_ctx,
                                     IOHandler *io_read,
+                                    AioContext *write_ctx,
                                     IOHandler *io_write,
                                     void *opaque)
 {
     QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
 
-    klass->io_set_aio_fd_handler(ioc, ctx, io_read, io_write, opaque);
+    klass->io_set_aio_fd_handler(ioc, read_ctx, io_read, write_ctx, io_write,
+            opaque);
 }
 
 guint qio_channel_add_watch_full(QIOChannel *ioc,
@@ -400,11 +506,28 @@ off_t qio_channel_io_seek(QIOChannel *ioc,
     return klass->io_seek(ioc, offset, whence, errp);
 }
 
+int qio_channel_flush(QIOChannel *ioc,
+                                Error **errp)
+{
+    QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+
+    if (!klass->io_flush ||
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
+        return 0;
+    }
+
+    return klass->io_flush(ioc, errp);
+}
+
 
 static void qio_channel_restart_read(void *opaque)
 {
     QIOChannel *ioc = opaque;
-    Coroutine *co = ioc->read_coroutine;
+    Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL);
+
+    if (!co) {
+        return;
+    }
 
     /* Assert that aio_co_wake() reenters the coroutine directly */
     assert(qemu_get_current_aio_context() ==
@@ -415,7 +538,11 @@ static void qio_channel_restart_read(void *opaque)
 static void qio_channel_restart_write(void *opaque)
 {
     QIOChannel *ioc = opaque;
-    Coroutine *co = ioc->write_coroutine;
+    Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL);
+
+    if (!co) {
+        return;
+    }
 
     /* Assert that aio_co_wake() reenters the coroutine directly */
     assert(qemu_get_current_aio_context() ==
@@ -423,65 +550,121 @@ static void qio_channel_restart_write(void *opaque)
     aio_co_wake(co);
 }
 
-static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc)
+static void coroutine_fn
+qio_channel_set_fd_handlers(QIOChannel *ioc, GIOCondition condition)
 {
-    IOHandler *rd_handler = NULL, *wr_handler = NULL;
-    AioContext *ctx;
+    AioContext *ctx = ioc->follow_coroutine_ctx ?
+        qemu_coroutine_get_aio_context(qemu_coroutine_self()) :
+        iohandler_get_aio_context();
+    AioContext *read_ctx = NULL;
+    IOHandler *io_read = NULL;
+    AioContext *write_ctx = NULL;
+    IOHandler *io_write = NULL;
 
-    if (ioc->read_coroutine) {
-        rd_handler = qio_channel_restart_read;
-    }
-    if (ioc->write_coroutine) {
-        wr_handler = qio_channel_restart_write;
+    if (condition == G_IO_IN) {
+        ioc->read_coroutine = qemu_coroutine_self();
+        ioc->read_ctx = ctx;
+        read_ctx = ctx;
+        io_read = qio_channel_restart_read;
+
+        /*
+         * Thread safety: if the other coroutine is set and its AioContext
+         * matches ours, then there is mutual exclusion between read and write
+         * because they share a single thread and it's safe to set both read
+         * and write fd handlers here. If the AioContext does not match ours,
+         * then both threads may run in parallel but there is no shared state
+         * to worry about.
+         */
+        if (ioc->write_coroutine && ioc->write_ctx == ctx) {
+            write_ctx = ctx;
+            io_write = qio_channel_restart_write;
+        }
+    } else if (condition == G_IO_OUT) {
+        ioc->write_coroutine = qemu_coroutine_self();
+        ioc->write_ctx = ctx;
+        write_ctx = ctx;
+        io_write = qio_channel_restart_write;
+        if (ioc->read_coroutine && ioc->read_ctx == ctx) {
+            read_ctx = ctx;
+            io_read = qio_channel_restart_read;
+        }
+    } else {
+        abort();
     }
 
-    ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context();
-    qio_channel_set_aio_fd_handler(ioc, ctx, rd_handler, wr_handler, ioc);
+    qio_channel_set_aio_fd_handler(ioc, read_ctx, io_read,
+            write_ctx, io_write, ioc);
 }
 
-void qio_channel_attach_aio_context(QIOChannel *ioc,
-                                    AioContext *ctx)
+static void coroutine_fn
+qio_channel_clear_fd_handlers(QIOChannel *ioc, GIOCondition condition)
 {
-    assert(!ioc->read_coroutine);
-    assert(!ioc->write_coroutine);
-    ioc->ctx = ctx;
-}
+    AioContext *read_ctx = NULL;
+    IOHandler *io_read = NULL;
+    AioContext *write_ctx = NULL;
+    IOHandler *io_write = NULL;
+    AioContext *ctx;
 
-void qio_channel_detach_aio_context(QIOChannel *ioc)
-{
-    ioc->read_coroutine = NULL;
-    ioc->write_coroutine = NULL;
-    qio_channel_set_aio_fd_handlers(ioc);
-    ioc->ctx = NULL;
+    if (condition == G_IO_IN) {
+        ctx = ioc->read_ctx;
+        read_ctx = ctx;
+        io_read = NULL;
+        if (ioc->write_coroutine && ioc->write_ctx == ctx) {
+            write_ctx = ctx;
+            io_write = qio_channel_restart_write;
+        }
+    } else if (condition == G_IO_OUT) {
+        ctx = ioc->write_ctx;
+        write_ctx = ctx;
+        io_write = NULL;
+        if (ioc->read_coroutine && ioc->read_ctx == ctx) {
+            read_ctx = ctx;
+            io_read = qio_channel_restart_read;
+        }
+    } else {
+        abort();
+    }
+
+    qio_channel_set_aio_fd_handler(ioc, read_ctx, io_read,
+            write_ctx, io_write, ioc);
 }
 
 void coroutine_fn qio_channel_yield(QIOChannel *ioc,
                                     GIOCondition condition)
 {
+    AioContext *ioc_ctx;
+
     assert(qemu_in_coroutine());
+    ioc_ctx = qemu_coroutine_get_aio_context(qemu_coroutine_self());
+
     if (condition == G_IO_IN) {
         assert(!ioc->read_coroutine);
-        ioc->read_coroutine = qemu_coroutine_self();
     } else if (condition == G_IO_OUT) {
         assert(!ioc->write_coroutine);
-        ioc->write_coroutine = qemu_coroutine_self();
     } else {
         abort();
     }
-    qio_channel_set_aio_fd_handlers(ioc);
+    qio_channel_set_fd_handlers(ioc, condition);
     qemu_coroutine_yield();
+    assert(in_aio_context_home_thread(ioc_ctx));
 
     /* Allow interrupting the operation by reentering the coroutine other than
      * through the aio_fd_handlers. */
-    if (condition == G_IO_IN && ioc->read_coroutine) {
-        ioc->read_coroutine = NULL;
-        qio_channel_set_aio_fd_handlers(ioc);
-    } else if (condition == G_IO_OUT && ioc->write_coroutine) {
-        ioc->write_coroutine = NULL;
-        qio_channel_set_aio_fd_handlers(ioc);
+    if (condition == G_IO_IN) {
+        assert(ioc->read_coroutine == NULL);
+    } else if (condition == G_IO_OUT) {
+        assert(ioc->write_coroutine == NULL);
     }
+    qio_channel_clear_fd_handlers(ioc, condition);
 }
 
+void qio_channel_wake_read(QIOChannel *ioc)
+{
+    Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL);
+    if (co) {
+        aio_co_wake(co);
+    }
+}
 
 static gboolean qio_channel_wait_complete(QIOChannel *ioc,
                                           GIOCondition condition,
@@ -522,6 +705,10 @@ static void qio_channel_finalize(Object *obj)
 {
     QIOChannel *ioc = QIO_CHANNEL(obj);
 
+    /* Must not have coroutines in qio_channel_yield() */
+    assert(!ioc->read_coroutine);
+    assert(!ioc->write_coroutine);
+
     g_free(ioc->name);
 
 #ifdef _WIN32
index 743a0efc87670fa89f9becf7de783edb6ed59037..53b0e8407a97ea8154fed19e9533ead7c3d7d93e 100644 (file)
@@ -122,6 +122,10 @@ static int qio_dns_resolver_lookup_sync_inet(QIODNSResolver *resolver,
             .ipv4 = iaddr->ipv4,
             .has_ipv6 = iaddr->has_ipv6,
             .ipv6 = iaddr->ipv6,
+#ifdef HAVE_IPPROTO_MPTCP
+            .has_mptcp = iaddr->has_mptcp,
+            .mptcp = iaddr->mptcp,
+#endif
         };
 
         (*addrs)[i] = newaddr;
index bcd8b1e7373ae9b4311951ca83e868165c32b6c5..283b9b2bdbdf5c9ccb103e1aa7a5f874b3c6e24d 100644 (file)
@@ -3,6 +3,7 @@ io_ss.add(files(
   'channel-buffer.c',
   'channel-command.c',
   'channel-file.c',
+  'channel-null.c',
   'channel-socket.c',
   'channel-tls.c',
   'channel-util.c',
@@ -12,4 +13,4 @@ io_ss.add(files(
   'dns-resolver.c',
   'net-listener.c',
   'task.c',
-))
+), gnutls)
index 46c2643d005f92d96b206f594cf05d879b5075c8..47405965a66d44e81567e1fefc7b3dabffaa9d18 100644 (file)
@@ -109,9 +109,7 @@ void qio_net_listener_add(QIONetListener *listener,
                           QIOChannelSocket *sioc)
 {
     if (listener->name) {
-        char *name = g_strdup_printf("%s-listen", listener->name);
-        qio_channel_set_name(QIO_CHANNEL(sioc), name);
-        g_free(name);
+        qio_channel_set_name(QIO_CHANNEL(sioc), listener->name);
     }
 
     listener->sioc = g_renew(QIOChannelSocket *, listener->sioc,
@@ -292,6 +290,9 @@ static void qio_net_listener_finalize(Object *obj)
     QIONetListener *listener = QIO_NET_LISTENER(obj);
     size_t i;
 
+    if (listener->io_notify) {
+        listener->io_notify(listener->io_data);
+    }
     qio_net_listener_disconnect(listener);
 
     for (i = 0; i < listener->nsioc; i++) {
index d7bc70b96668ca0153fea89913c68eca61f0e19e..3cc5cf1efdf398b25701f3ea3aba61c1b6c6b435 100644 (file)
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # task.c
 qio_task_new(void *task, void *source, void *func, void *opaque) "Task new task=%p source=%p func=%p opaque=%p"
@@ -10,6 +10,9 @@ qio_task_thread_result(void *task) "Task thread result task=%p"
 qio_task_thread_source_attach(void *task, void *source) "Task thread source attach task=%p source=%p"
 qio_task_thread_source_cancel(void *task, void *source) "Task thread source cancel task=%p source=%p"
 
+# channel-null.c
+qio_channel_null_new(void *ioc) "Null new ioc=%p"
+
 # channel-socket.c
 qio_channel_socket_new(void *ioc) "Socket new ioc=%p"
 qio_channel_socket_new_fd(void *ioc, int fd) "Socket new ioc=%p fd=%d"
index 69eff9efbc70834f62cd1b009962ee005d88199b..b753286414a7ec2f3c4de0e20f1478ac35224091 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Event loop thread
  *
- * Copyright Red Hat Inc., 2013
+ * Copyright Red Hat Inc., 2013, 2020
  *
  * Authors:
  *  Stefan Hajnoczi   <stefanha@redhat.com>
@@ -17,6 +17,7 @@
 #include "qemu/module.h"
 #include "block/aio.h"
 #include "block/block.h"
+#include "sysemu/event-loop-base.h"
 #include "sysemu/iothread.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-misc.h"
 #include "qemu/rcu.h"
 #include "qemu/main-loop.h"
 
-typedef ObjectClass IOThreadClass;
-
-DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD,
-                       TYPE_IOTHREAD)
 
 #ifdef CONFIG_POSIX
 /* Benchmark results from 2016 on NVMe SSD drives show max polling times around
@@ -39,13 +36,6 @@ DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD,
 #define IOTHREAD_POLL_MAX_NS_DEFAULT 0ULL
 #endif
 
-static __thread IOThread *my_iothread;
-
-AioContext *qemu_get_current_aio_context(void)
-{
-    return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
-}
-
 static void *iothread_run(void *opaque)
 {
     IOThread *iothread = opaque;
@@ -56,7 +46,7 @@ static void *iothread_run(void *opaque)
      * in this new thread uses glib.
      */
     g_main_context_push_thread_default(iothread->worker_context);
-    my_iothread = iothread;
+    qemu_set_current_aio_context(iothread->ctx);
     iothread->thread_id = qemu_get_thread_id();
     qemu_sem_post(&iothread->init_done_sem);
 
@@ -148,22 +138,51 @@ static void iothread_instance_finalize(Object *obj)
     qemu_sem_destroy(&iothread->init_done_sem);
 }
 
-static void iothread_init_gcontext(IOThread *iothread)
+static void iothread_init_gcontext(IOThread *iothread, const char *thread_name)
 {
     GSource *source;
+    g_autofree char *name = g_strdup_printf("%s aio-context", thread_name);
 
     iothread->worker_context = g_main_context_new();
     source = aio_get_g_source(iothread_get_aio_context(iothread));
+    g_source_set_name(source, name);
     g_source_attach(source, iothread->worker_context);
     g_source_unref(source);
     iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
 }
 
-static void iothread_complete(UserCreatable *obj, Error **errp)
+static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp)
+{
+    ERRP_GUARD();
+    IOThread *iothread = IOTHREAD(base);
+
+    if (!iothread->ctx) {
+        return;
+    }
+
+    aio_context_set_poll_params(iothread->ctx,
+                                iothread->poll_max_ns,
+                                iothread->poll_grow,
+                                iothread->poll_shrink,
+                                errp);
+    if (*errp) {
+        return;
+    }
+
+    aio_context_set_aio_params(iothread->ctx,
+                               iothread->parent_obj.aio_max_batch,
+                               errp);
+
+    aio_context_set_thread_pool_params(iothread->ctx, base->thread_pool_min,
+                                       base->thread_pool_max, errp);
+}
+
+
+static void iothread_init(EventLoopBase *base, Error **errp)
 {
     Error *local_error = NULL;
-    IOThread *iothread = IOTHREAD(obj);
-    char *thread_name;
+    IOThread *iothread = IOTHREAD(base);
+    g_autofree char *thread_name = NULL;
 
     iothread->stopping = false;
     iothread->running = true;
@@ -172,17 +191,16 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
         return;
     }
 
+    thread_name = g_strdup_printf("IO %s",
+                        object_get_canonical_path_component(OBJECT(base)));
+
     /*
      * Init one GMainContext for the iothread unconditionally, even if
      * it's not used
      */
-    iothread_init_gcontext(iothread);
+    iothread_init_gcontext(iothread, thread_name);
 
-    aio_context_set_poll_params(iothread->ctx,
-                                iothread->poll_max_ns,
-                                iothread->poll_grow,
-                                iothread->poll_shrink,
-                                &local_error);
+    iothread_set_aio_context_params(base, &local_error);
     if (local_error) {
         error_propagate(errp, local_error);
         aio_context_unref(iothread->ctx);
@@ -193,11 +211,8 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
     /* This assumes we are called from a thread with useful CPU affinity for us
      * to inherit.
      */
-    thread_name = g_strdup_printf("IO %s",
-                        object_get_canonical_path_component(OBJECT(obj)));
     qemu_thread_create(&iothread->thread, thread_name, iothread_run,
                        iothread, QEMU_THREAD_JOINABLE);
-    g_free(thread_name);
 
     /* Wait for initialization to complete */
     while (iothread->thread_id == -1) {
@@ -208,48 +223,67 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
 typedef struct {
     const char *name;
     ptrdiff_t offset; /* field's byte offset in IOThread struct */
-} PollParamInfo;
+} IOThreadParamInfo;
 
-static PollParamInfo poll_max_ns_info = {
+static IOThreadParamInfo poll_max_ns_info = {
     "poll-max-ns", offsetof(IOThread, poll_max_ns),
 };
-static PollParamInfo poll_grow_info = {
+static IOThreadParamInfo poll_grow_info = {
     "poll-grow", offsetof(IOThread, poll_grow),
 };
-static PollParamInfo poll_shrink_info = {
+static IOThreadParamInfo poll_shrink_info = {
     "poll-shrink", offsetof(IOThread, poll_shrink),
 };
 
-static void iothread_get_poll_param(Object *obj, Visitor *v,
-        const char *name, void *opaque, Error **errp)
+static void iothread_get_param(Object *obj, Visitor *v,
+        const char *name, IOThreadParamInfo *info, Error **errp)
 {
     IOThread *iothread = IOTHREAD(obj);
-    PollParamInfo *info = opaque;
     int64_t *field = (void *)iothread + info->offset;
 
     visit_type_int64(v, name, field, errp);
 }
 
-static void iothread_set_poll_param(Object *obj, Visitor *v,
-        const char *name, void *opaque, Error **errp)
+static bool iothread_set_param(Object *obj, Visitor *v,
+        const char *name, IOThreadParamInfo *info, Error **errp)
 {
     IOThread *iothread = IOTHREAD(obj);
-    PollParamInfo *info = opaque;
     int64_t *field = (void *)iothread + info->offset;
     int64_t value;
 
     if (!visit_type_int64(v, name, &value, errp)) {
-        return;
+        return false;
     }
 
     if (value < 0) {
         error_setg(errp, "%s value must be in range [0, %" PRId64 "]",
                    info->name, INT64_MAX);
-        return;
+        return false;
     }
 
     *field = value;
 
+    return true;
+}
+
+static void iothread_get_poll_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+    IOThreadParamInfo *info = opaque;
+
+    iothread_get_param(obj, v, name, info, errp);
+}
+
+static void iothread_set_poll_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+    IOThread *iothread = IOTHREAD(obj);
+    IOThreadParamInfo *info = opaque;
+
+    if (!iothread_set_param(obj, v, name, info, errp)) {
+        return;
+    }
+
     if (iothread->ctx) {
         aio_context_set_poll_params(iothread->ctx,
                                     iothread->poll_max_ns,
@@ -261,8 +295,10 @@ static void iothread_set_poll_param(Object *obj, Visitor *v,
 
 static void iothread_class_init(ObjectClass *klass, void *class_data)
 {
-    UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
-    ucc->complete = iothread_complete;
+    EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass);
+
+    bc->init = iothread_init;
+    bc->update_params = iothread_set_aio_context_params;
 
     object_class_property_add(klass, "poll-max-ns", "int",
                               iothread_get_poll_param,
@@ -280,15 +316,11 @@ static void iothread_class_init(ObjectClass *klass, void *class_data)
 
 static const TypeInfo iothread_info = {
     .name = TYPE_IOTHREAD,
-    .parent = TYPE_OBJECT,
+    .parent = TYPE_EVENT_LOOP_BASE,
     .class_init = iothread_class_init,
     .instance_size = sizeof(IOThread),
     .instance_init = iothread_instance_init,
     .instance_finalize = iothread_instance_finalize,
-    .interfaces = (InterfaceInfo[]) {
-        {TYPE_USER_CREATABLE},
-        {}
-    },
 };
 
 static void iothread_register_types(void)
@@ -310,8 +342,7 @@ AioContext *iothread_get_aio_context(IOThread *iothread)
 
 static int query_one_iothread(Object *object, void *opaque)
 {
-    IOThreadInfoList ***prev = opaque;
-    IOThreadInfoList *elem;
+    IOThreadInfoList ***tail = opaque;
     IOThreadInfo *info;
     IOThread *iothread;
 
@@ -326,13 +357,9 @@ static int query_one_iothread(Object *object, void *opaque)
     info->poll_max_ns = iothread->poll_max_ns;
     info->poll_grow = iothread->poll_grow;
     info->poll_shrink = iothread->poll_shrink;
+    info->aio_max_batch = iothread->parent_obj.aio_max_batch;
 
-    elem = g_new0(IOThreadInfoList, 1);
-    elem->value = info;
-    elem->next = NULL;
-
-    **prev = elem;
-    *prev = &elem->next;
+    QAPI_LIST_APPEND(*tail, info);
     return 0;
 }
 
@@ -375,3 +402,9 @@ IOThread *iothread_by_id(const char *id)
 {
     return IOTHREAD(object_resolve_path_type(id, TYPE_IOTHREAD, NULL));
 }
+
+bool qemu_in_iothread(void)
+{
+    return qemu_get_current_aio_context() == qemu_get_aio_context() ?
+                    false : true;
+}
index 645601b2ccc1873b5fae2d97dbd8c34a68d42c90..9e26fa899f6b64fd20cd6486571cd0650d243c81 100644 (file)
--- a/job-qmp.c
+++ b/job-qmp.c
 #include "qapi/error.h"
 #include "trace/trace-root.h"
 
-/* Get a job using its ID and acquire its AioContext */
-static Job *find_job(const char *id, AioContext **aio_context, Error **errp)
+/*
+ * Get a job using its ID. Called with job_mutex held.
+ */
+static Job *find_job_locked(const char *id, Error **errp)
 {
     Job *job;
 
-    *aio_context = NULL;
-
-    job = job_get(id);
+    job = job_get_locked(id);
     if (!job) {
         error_setg(errp, "Job not found");
         return NULL;
     }
 
-    *aio_context = job->aio_context;
-    aio_context_acquire(*aio_context);
-
     return job;
 }
 
 void qmp_job_cancel(const char *id, Error **errp)
 {
-    AioContext *aio_context;
-    Job *job = find_job(id, &aio_context, errp);
+    Job *job;
+
+    JOB_LOCK_GUARD();
+    job = find_job_locked(id, errp);
 
     if (!job) {
         return;
     }
 
     trace_qmp_job_cancel(job);
-    job_user_cancel(job, true, errp);
-    aio_context_release(aio_context);
+    job_user_cancel_locked(job, true, errp);
 }
 
 void qmp_job_pause(const char *id, Error **errp)
 {
-    AioContext *aio_context;
-    Job *job = find_job(id, &aio_context, errp);
+    Job *job;
+
+    JOB_LOCK_GUARD();
+    job = find_job_locked(id, errp);
 
     if (!job) {
         return;
     }
 
     trace_qmp_job_pause(job);
-    job_user_pause(job, errp);
-    aio_context_release(aio_context);
+    job_user_pause_locked(job, errp);
 }
 
 void qmp_job_resume(const char *id, Error **errp)
 {
-    AioContext *aio_context;
-    Job *job = find_job(id, &aio_context, errp);
+    Job *job;
+
+    JOB_LOCK_GUARD();
+    job = find_job_locked(id, errp);
 
     if (!job) {
         return;
     }
 
     trace_qmp_job_resume(job);
-    job_user_resume(job, errp);
-    aio_context_release(aio_context);
+    job_user_resume_locked(job, errp);
 }
 
 void qmp_job_complete(const char *id, Error **errp)
 {
-    AioContext *aio_context;
-    Job *job = find_job(id, &aio_context, errp);
+    Job *job;
+
+    JOB_LOCK_GUARD();
+    job = find_job_locked(id, errp);
 
     if (!job) {
         return;
     }
 
     trace_qmp_job_complete(job);
-    job_complete(job, errp);
-    aio_context_release(aio_context);
+    job_complete_locked(job, errp);
 }
 
 void qmp_job_finalize(const char *id, Error **errp)
 {
-    AioContext *aio_context;
-    Job *job = find_job(id, &aio_context, errp);
+    Job *job;
+
+    JOB_LOCK_GUARD();
+    job = find_job_locked(id, errp);
 
     if (!job) {
         return;
     }
 
     trace_qmp_job_finalize(job);
-    job_ref(job);
-    job_finalize(job, errp);
-
-    /*
-     * Job's context might have changed via job_finalize (and job_txn_apply
-     * automatically acquires the new one), so make sure we release the correct
-     * one.
-     */
-    aio_context = job->aio_context;
-    job_unref(job);
-    aio_context_release(aio_context);
+    job_ref_locked(job);
+    job_finalize_locked(job, errp);
+
+    job_unref_locked(job);
 }
 
 void qmp_job_dismiss(const char *id, Error **errp)
 {
-    AioContext *aio_context;
-    Job *job = find_job(id, &aio_context, errp);
+    Job *job;
+
+    JOB_LOCK_GUARD();
+    job = find_job_locked(id, errp);
 
     if (!job) {
         return;
     }
 
     trace_qmp_job_dismiss(job);
-    job_dismiss(&job, errp);
-    aio_context_release(aio_context);
+    job_dismiss_locked(&job, errp);
 }
 
-static JobInfo *job_query_single(Job *job, Error **errp)
+/* Called with job_mutex held. */
+static JobInfo *job_query_single_locked(Job *job, Error **errp)
 {
     JobInfo *info;
+    uint64_t progress_current;
+    uint64_t progress_total;
 
     assert(!job_is_internal(job));
+    progress_get_snapshot(&job->progress, &progress_current,
+                          &progress_total);
 
     info = g_new(JobInfo, 1);
     *info = (JobInfo) {
         .id                 = g_strdup(job->id),
         .type               = job_type(job),
         .status             = job->status,
-        .current_progress   = job->progress.current,
-        .total_progress     = job->progress.total,
-        .has_error          = !!job->err,
-        .error              = job->err ? \
+        .current_progress   = progress_current,
+        .total_progress     = progress_total,
+        .error              = job->err ?
                               g_strdup(error_get_pretty(job->err)) : NULL,
     };
 
@@ -164,28 +165,23 @@ static JobInfo *job_query_single(Job *job, Error **errp)
 
 JobInfoList *qmp_query_jobs(Error **errp)
 {
-    JobInfoList *head = NULL, **p_next = &head;
+    JobInfoList *head = NULL, **tail = &head;
     Job *job;
 
-    for (job = job_next(NULL); job; job = job_next(job)) {
-        JobInfoList *elem;
-        AioContext *aio_context;
+    JOB_LOCK_GUARD();
+
+    for (job = job_next_locked(NULL); job; job = job_next_locked(job)) {
+        JobInfo *value;
 
         if (job_is_internal(job)) {
             continue;
         }
-        elem = g_new0(JobInfoList, 1);
-        aio_context = job->aio_context;
-        aio_context_acquire(aio_context);
-        elem->value = job_query_single(job, errp);
-        aio_context_release(aio_context);
-        if (!elem->value) {
-            g_free(elem);
+        value = job_query_single_locked(job, errp);
+        if (!value) {
             qapi_free_JobInfoList(head);
             return NULL;
         }
-        *p_next = elem;
-        p_next = &elem->next;
+        QAPI_LIST_APPEND(tail, value);
     }
 
     return head;
diff --git a/job.c b/job.c
index 8fecf389609fe09c5f87d001ac7c83af5cde92d9..99a2e54b54a899033a9f20ce6252d5be40c7d60f 100644 (file)
--- a/job.c
+++ b/job.c
 #include "trace/trace-root.h"
 #include "qapi/qapi-events-job.h"
 
+/*
+ * The job API is composed of two categories of functions.
+ *
+ * The first includes functions used by the monitor.  The monitor is
+ * peculiar in that it accesses the job list with job_get, and
+ * therefore needs consistency across job_get and the actual operation
+ * (e.g. job_user_cancel). To achieve this consistency, the caller
+ * calls job_lock/job_unlock itself around the whole operation.
+ *
+ *
+ * The second includes functions used by the job drivers and sometimes
+ * by the core block layer. These delegate the locking to the callee instead.
+ */
+
+/*
+ * job_mutex protects the jobs list, but also makes the
+ * struct job fields thread-safe.
+ */
+QemuMutex job_mutex;
+
+/* Protected by job_mutex */
 static QLIST_HEAD(, Job) jobs = QLIST_HEAD_INITIALIZER(jobs);
 
 /* Job State Transition Table */
@@ -56,9 +77,10 @@ bool JobVerbTable[JOB_VERB__MAX][JOB_STATUS__MAX] = {
     [JOB_VERB_PAUSE]                = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
     [JOB_VERB_RESUME]               = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
     [JOB_VERB_SET_SPEED]            = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
-    [JOB_VERB_COMPLETE]             = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
+    [JOB_VERB_COMPLETE]             = {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0},
     [JOB_VERB_FINALIZE]             = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0},
     [JOB_VERB_DISMISS]              = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0},
+    [JOB_VERB_CHANGE]               = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
 };
 
 /* Transactional group of jobs */
@@ -74,17 +96,12 @@ struct JobTxn {
     int refcnt;
 };
 
-/* Right now, this mutex is only needed to synchronize accesses to job->busy
- * and job->sleep_timer, such as concurrent calls to job_do_yield and
- * job_enter. */
-static QemuMutex job_mutex;
-
-static void job_lock(void)
+void job_lock(void)
 {
     qemu_mutex_lock(&job_mutex);
 }
 
-static void job_unlock(void)
+void job_unlock(void)
 {
     qemu_mutex_unlock(&job_mutex);
 }
@@ -102,19 +119,38 @@ JobTxn *job_txn_new(void)
     return txn;
 }
 
-static void job_txn_ref(JobTxn *txn)
+/* Called with job_mutex held. */
+static void job_txn_ref_locked(JobTxn *txn)
 {
     txn->refcnt++;
 }
 
-void job_txn_unref(JobTxn *txn)
+void job_txn_unref_locked(JobTxn *txn)
 {
     if (txn && --txn->refcnt == 0) {
         g_free(txn);
     }
 }
 
-void job_txn_add_job(JobTxn *txn, Job *job)
+void job_txn_unref(JobTxn *txn)
+{
+    JOB_LOCK_GUARD();
+    job_txn_unref_locked(txn);
+}
+
+/**
+ * @txn: The transaction (may be NULL)
+ * @job: Job to add to the transaction
+ *
+ * Add @job to the transaction.  The @job must not already be in a transaction.
+ * The caller must call either job_txn_unref() or job_completed() to release
+ * the reference that is automatically grabbed here.
+ *
+ * If @txn is NULL, the function does nothing.
+ *
+ * Called with job_mutex held.
+ */
+static void job_txn_add_job_locked(JobTxn *txn, Job *job)
 {
     if (!txn) {
         return;
@@ -124,21 +160,22 @@ void job_txn_add_job(JobTxn *txn, Job *job)
     job->txn = txn;
 
     QLIST_INSERT_HEAD(&txn->jobs, job, txn_list);
-    job_txn_ref(txn);
+    job_txn_ref_locked(txn);
 }
 
-static void job_txn_del_job(Job *job)
+/* Called with job_mutex held. */
+static void job_txn_del_job_locked(Job *job)
 {
     if (job->txn) {
         QLIST_REMOVE(job, txn_list);
-        job_txn_unref(job->txn);
+        job_txn_unref_locked(job->txn);
         job->txn = NULL;
     }
 }
 
-static int job_txn_apply(Job *job, int fn(Job *))
+/* Called with job_mutex held, but releases it temporarily. */
+static int job_txn_apply_locked(Job *job, int fn(Job *))
 {
-    AioContext *inner_ctx;
     Job *other_job, *next;
     JobTxn *txn = job->txn;
     int rc = 0;
@@ -149,25 +186,16 @@ static int job_txn_apply(Job *job, int fn(Job *))
      * we need to release it here to avoid holding the lock twice - which would
      * break AIO_WAIT_WHILE from within fn.
      */
-    job_ref(job);
-    aio_context_release(job->aio_context);
+    job_ref_locked(job);
 
     QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
-        inner_ctx = other_job->aio_context;
-        aio_context_acquire(inner_ctx);
         rc = fn(other_job);
-        aio_context_release(inner_ctx);
         if (rc) {
             break;
         }
     }
 
-    /*
-     * Note that job->aio_context might have been changed by calling fn, so we
-     * can't use a local variable to cache it.
-     */
-    aio_context_acquire(job->aio_context);
-    job_unref(job);
+    job_unref_locked(job);
     return rc;
 }
 
@@ -176,7 +204,8 @@ bool job_is_internal(Job *job)
     return (job->id == NULL);
 }
 
-static void job_state_transition(Job *job, JobStatus s1)
+/* Called with job_mutex held. */
+static void job_state_transition_locked(Job *job, JobStatus s1)
 {
     JobStatus s0 = job->status;
     assert(s1 >= 0 && s1 < JOB_STATUS__MAX);
@@ -191,7 +220,7 @@ static void job_state_transition(Job *job, JobStatus s1)
     }
 }
 
-int job_apply_verb(Job *job, JobVerb verb, Error **errp)
+int job_apply_verb_locked(Job *job, JobVerb verb, Error **errp)
 {
     JobStatus s0 = job->status;
     assert(verb >= 0 && verb < JOB_VERB__MAX);
@@ -215,12 +244,32 @@ const char *job_type_str(const Job *job)
     return JobType_str(job_type(job));
 }
 
+bool job_is_cancelled_locked(Job *job)
+{
+    /* force_cancel may be true only if cancelled is true, too */
+    assert(job->cancelled || !job->force_cancel);
+    return job->force_cancel;
+}
+
 bool job_is_cancelled(Job *job)
+{
+    JOB_LOCK_GUARD();
+    return job_is_cancelled_locked(job);
+}
+
+/* Called with job_mutex held. */
+static bool job_cancel_requested_locked(Job *job)
 {
     return job->cancelled;
 }
 
-bool job_is_ready(Job *job)
+bool job_cancel_requested(Job *job)
+{
+    JOB_LOCK_GUARD();
+    return job_cancel_requested_locked(job);
+}
+
+bool job_is_ready_locked(Job *job)
 {
     switch (job->status) {
     case JOB_STATUS_UNDEFINED:
@@ -242,7 +291,13 @@ bool job_is_ready(Job *job)
     return false;
 }
 
-bool job_is_completed(Job *job)
+bool job_is_ready(Job *job)
+{
+    JOB_LOCK_GUARD();
+    return job_is_ready_locked(job);
+}
+
+bool job_is_completed_locked(Job *job)
 {
     switch (job->status) {
     case JOB_STATUS_UNDEFINED:
@@ -264,17 +319,24 @@ bool job_is_completed(Job *job)
     return false;
 }
 
-static bool job_started(Job *job)
+static bool job_is_completed(Job *job)
+{
+    JOB_LOCK_GUARD();
+    return job_is_completed_locked(job);
+}
+
+static bool job_started_locked(Job *job)
 {
     return job->co;
 }
 
-static bool job_should_pause(Job *job)
+/* Called with job_mutex held. */
+static bool job_should_pause_locked(Job *job)
 {
     return job->pause_count > 0;
 }
 
-Job *job_next(Job *job)
+Job *job_next_locked(Job *job)
 {
     if (!job) {
         return QLIST_FIRST(&jobs);
@@ -282,7 +344,13 @@ Job *job_next(Job *job)
     return QLIST_NEXT(job, job_list);
 }
 
-Job *job_get(const char *id)
+Job *job_next(Job *job)
+{
+    JOB_LOCK_GUARD();
+    return job_next_locked(job);
+}
+
+Job *job_get_locked(const char *id)
 {
     Job *job;
 
@@ -295,6 +363,18 @@ Job *job_get(const char *id)
     return NULL;
 }
 
+void job_set_aio_context(Job *job, AioContext *ctx)
+{
+    /* protect against read in job_finish_sync_locked and job_start */
+    GLOBAL_STATE_CODE();
+    /* protect against read in job_do_yield_locked */
+    JOB_LOCK_GUARD();
+    /* ensure the job is quiescent while the AioContext is changed */
+    assert(job->paused || job_is_completed_locked(job));
+    job->aio_context = ctx;
+}
+
+/* Called with job_mutex *not* held. */
 static void job_sleep_timer_cb(void *opaque)
 {
     Job *job = opaque;
@@ -308,6 +388,8 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn,
 {
     Job *job;
 
+    JOB_LOCK_GUARD();
+
     if (job_id) {
         if (flags & JOB_INTERNAL) {
             error_setg(errp, "Cannot specify job ID for internal job");
@@ -317,7 +399,7 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn,
             error_setg(errp, "Invalid job ID '%s'", job_id);
             return NULL;
         }
-        if (job_get(job_id)) {
+        if (job_get_locked(job_id)) {
             error_setg(errp, "Job ID '%s' already in use", job_id);
             return NULL;
         }
@@ -339,12 +421,15 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn,
     job->cb            = cb;
     job->opaque        = opaque;
 
+    progress_init(&job->progress);
+
     notifier_list_init(&job->on_finalize_cancelled);
     notifier_list_init(&job->on_finalize_completed);
     notifier_list_init(&job->on_pending);
     notifier_list_init(&job->on_ready);
+    notifier_list_init(&job->on_idle);
 
-    job_state_transition(job, JOB_STATUS_CREATED);
+    job_state_transition_locked(job, JOB_STATUS_CREATED);
     aio_timer_init(qemu_get_aio_context(), &job->sleep_timer,
                    QEMU_CLOCK_REALTIME, SCALE_NS,
                    job_sleep_timer_cb, job);
@@ -355,33 +440,42 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn,
      * consolidating the job management logic */
     if (!txn) {
         txn = job_txn_new();
-        job_txn_add_job(txn, job);
-        job_txn_unref(txn);
+        job_txn_add_job_locked(txn, job);
+        job_txn_unref_locked(txn);
     } else {
-        job_txn_add_job(txn, job);
+        job_txn_add_job_locked(txn, job);
     }
 
     return job;
 }
 
-void job_ref(Job *job)
+void job_ref_locked(Job *job)
 {
     ++job->refcnt;
 }
 
-void job_unref(Job *job)
+void job_unref_locked(Job *job)
 {
+    GLOBAL_STATE_CODE();
+
     if (--job->refcnt == 0) {
         assert(job->status == JOB_STATUS_NULL);
         assert(!timer_pending(&job->sleep_timer));
         assert(!job->txn);
 
         if (job->driver->free) {
+            AioContext *aio_context = job->aio_context;
+            job_unlock();
+            /* FIXME: aiocontext lock is required because cb calls blk_unref */
+            aio_context_acquire(aio_context);
             job->driver->free(job);
+            aio_context_release(aio_context);
+            job_lock();
         }
 
         QLIST_REMOVE(job, job_list);
 
+        progress_destroy(&job->progress);
         error_free(job->err);
         g_free(job->id);
         g_free(job);
@@ -403,48 +497,56 @@ void job_progress_increase_remaining(Job *job, uint64_t delta)
     progress_increase_remaining(&job->progress, delta);
 }
 
-void job_event_cancelled(Job *job)
+/**
+ * To be called when a cancelled job is finalised.
+ * Called with job_mutex held.
+ */
+static void job_event_cancelled_locked(Job *job)
 {
     notifier_list_notify(&job->on_finalize_cancelled, job);
 }
 
-void job_event_completed(Job *job)
+/**
+ * To be called when a successfully completed job is finalised.
+ * Called with job_mutex held.
+ */
+static void job_event_completed_locked(Job *job)
 {
     notifier_list_notify(&job->on_finalize_completed, job);
 }
 
-static void job_event_pending(Job *job)
+/* Called with job_mutex held. */
+static void job_event_pending_locked(Job *job)
 {
     notifier_list_notify(&job->on_pending, job);
 }
 
-static void job_event_ready(Job *job)
+/* Called with job_mutex held. */
+static void job_event_ready_locked(Job *job)
 {
     notifier_list_notify(&job->on_ready, job);
 }
 
-static void job_event_idle(Job *job)
+/* Called with job_mutex held. */
+static void job_event_idle_locked(Job *job)
 {
     notifier_list_notify(&job->on_idle, job);
 }
 
-void job_enter_cond(Job *job, bool(*fn)(Job *job))
+void job_enter_cond_locked(Job *job, bool(*fn)(Job *job))
 {
-    if (!job_started(job)) {
+    if (!job_started_locked(job)) {
         return;
     }
     if (job->deferred_to_main_loop) {
         return;
     }
 
-    job_lock();
     if (job->busy) {
-        job_unlock();
         return;
     }
 
     if (fn && !fn(job)) {
-        job_unlock();
         return;
     }
 
@@ -452,12 +554,14 @@ void job_enter_cond(Job *job, bool(*fn)(Job *job))
     timer_del(&job->sleep_timer);
     job->busy = true;
     job_unlock();
-    aio_co_enter(job->aio_context, job->co);
+    aio_co_wake(job->co);
+    job_lock();
 }
 
 void job_enter(Job *job)
 {
-    job_enter_cond(job, NULL);
+    JOB_LOCK_GUARD();
+    job_enter_cond_locked(job, NULL);
 }
 
 /* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds.
@@ -465,97 +569,137 @@ void job_enter(Job *job)
  * is allowed and cancels the timer.
  *
  * If @ns is (uint64_t) -1, no timer is scheduled and job_enter() must be
- * called explicitly. */
-static void coroutine_fn job_do_yield(Job *job, uint64_t ns)
+ * called explicitly.
+ *
+ * Called with job_mutex held, but releases it temporarily.
+ */
+static void coroutine_fn job_do_yield_locked(Job *job, uint64_t ns)
 {
-    job_lock();
+    AioContext *next_aio_context;
+
     if (ns != -1) {
         timer_mod(&job->sleep_timer, ns);
     }
     job->busy = false;
-    job_event_idle(job);
+    job_event_idle_locked(job);
     job_unlock();
     qemu_coroutine_yield();
+    job_lock();
+
+    next_aio_context = job->aio_context;
+    /*
+     * Coroutine has resumed, but in the meanwhile the job AioContext
+     * might have changed via bdrv_try_change_aio_context(), so we need to move
+     * the coroutine too in the new aiocontext.
+     */
+    while (qemu_get_current_aio_context() != next_aio_context) {
+        job_unlock();
+        aio_co_reschedule_self(next_aio_context);
+        job_lock();
+        next_aio_context = job->aio_context;
+    }
 
-    /* Set by job_enter_cond() before re-entering the coroutine.  */
+    /* Set by job_enter_cond_locked() before re-entering the coroutine.  */
     assert(job->busy);
 }
 
-void coroutine_fn job_pause_point(Job *job)
+/* Called with job_mutex held, but releases it temporarily. */
+static void coroutine_fn job_pause_point_locked(Job *job)
 {
-    assert(job && job_started(job));
+    assert(job && job_started_locked(job));
 
-    if (!job_should_pause(job)) {
+    if (!job_should_pause_locked(job)) {
         return;
     }
-    if (job_is_cancelled(job)) {
+    if (job_is_cancelled_locked(job)) {
         return;
     }
 
     if (job->driver->pause) {
+        job_unlock();
         job->driver->pause(job);
+        job_lock();
     }
 
-    if (job_should_pause(job) && !job_is_cancelled(job)) {
+    if (job_should_pause_locked(job) && !job_is_cancelled_locked(job)) {
         JobStatus status = job->status;
-        job_state_transition(job, status == JOB_STATUS_READY
-                                  ? JOB_STATUS_STANDBY
-                                  : JOB_STATUS_PAUSED);
+        job_state_transition_locked(job, status == JOB_STATUS_READY
+                                    ? JOB_STATUS_STANDBY
+                                    : JOB_STATUS_PAUSED);
         job->paused = true;
-        job_do_yield(job, -1);
+        job_do_yield_locked(job, -1);
         job->paused = false;
-        job_state_transition(job, status);
+        job_state_transition_locked(job, status);
     }
 
     if (job->driver->resume) {
+        job_unlock();
         job->driver->resume(job);
+        job_lock();
     }
 }
 
-void job_yield(Job *job)
+void coroutine_fn job_pause_point(Job *job)
 {
+    JOB_LOCK_GUARD();
+    job_pause_point_locked(job);
+}
+
+void coroutine_fn job_yield(Job *job)
+{
+    JOB_LOCK_GUARD();
     assert(job->busy);
 
     /* Check cancellation *before* setting busy = false, too!  */
-    if (job_is_cancelled(job)) {
+    if (job_is_cancelled_locked(job)) {
         return;
     }
 
-    if (!job_should_pause(job)) {
-        job_do_yield(job, -1);
+    if (!job_should_pause_locked(job)) {
+        job_do_yield_locked(job, -1);
     }
 
-    job_pause_point(job);
+    job_pause_point_locked(job);
 }
 
 void coroutine_fn job_sleep_ns(Job *job, int64_t ns)
 {
+    JOB_LOCK_GUARD();
     assert(job->busy);
 
     /* Check cancellation *before* setting busy = false, too!  */
-    if (job_is_cancelled(job)) {
+    if (job_is_cancelled_locked(job)) {
         return;
     }
 
-    if (!job_should_pause(job)) {
-        job_do_yield(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns);
+    if (!job_should_pause_locked(job)) {
+        job_do_yield_locked(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns);
     }
 
-    job_pause_point(job);
+    job_pause_point_locked(job);
 }
 
-/* Assumes the block_job_mutex is held */
-static bool job_timer_not_pending(Job *job)
+/* Assumes the job_mutex is held */
+static bool job_timer_not_pending_locked(Job *job)
 {
     return !timer_pending(&job->sleep_timer);
 }
 
-void job_pause(Job *job)
+void job_pause_locked(Job *job)
 {
     job->pause_count++;
+    if (!job->paused) {
+        job_enter_cond_locked(job, NULL);
+    }
 }
 
-void job_resume(Job *job)
+void job_pause(Job *job)
+{
+    JOB_LOCK_GUARD();
+    job_pause_locked(job);
+}
+
+void job_resume_locked(Job *job)
 {
     assert(job->pause_count > 0);
     job->pause_count--;
@@ -564,12 +708,18 @@ void job_resume(Job *job)
     }
 
     /* kick only if no timer is pending */
-    job_enter_cond(job, job_timer_not_pending);
+    job_enter_cond_locked(job, job_timer_not_pending_locked);
+}
+
+void job_resume(Job *job)
+{
+    JOB_LOCK_GUARD();
+    job_resume_locked(job);
 }
 
-void job_user_pause(Job *job, Error **errp)
+void job_user_pause_locked(Job *job, Error **errp)
 {
-    if (job_apply_verb(job, JOB_VERB_PAUSE, errp)) {
+    if (job_apply_verb_locked(job, JOB_VERB_PAUSE, errp)) {
         return;
     }
     if (job->user_paused) {
@@ -577,87 +727,95 @@ void job_user_pause(Job *job, Error **errp)
         return;
     }
     job->user_paused = true;
-    job_pause(job);
+    job_pause_locked(job);
 }
 
-bool job_user_paused(Job *job)
+bool job_user_paused_locked(Job *job)
 {
     return job->user_paused;
 }
 
-void job_user_resume(Job *job, Error **errp)
+void job_user_resume_locked(Job *job, Error **errp)
 {
     assert(job);
+    GLOBAL_STATE_CODE();
     if (!job->user_paused || job->pause_count <= 0) {
         error_setg(errp, "Can't resume a job that was not paused");
         return;
     }
-    if (job_apply_verb(job, JOB_VERB_RESUME, errp)) {
+    if (job_apply_verb_locked(job, JOB_VERB_RESUME, errp)) {
         return;
     }
     if (job->driver->user_resume) {
+        job_unlock();
         job->driver->user_resume(job);
+        job_lock();
     }
     job->user_paused = false;
-    job_resume(job);
+    job_resume_locked(job);
 }
 
-static void job_do_dismiss(Job *job)
+/* Called with job_mutex held, but releases it temporarily. */
+static void job_do_dismiss_locked(Job *job)
 {
     assert(job);
     job->busy = false;
     job->paused = false;
     job->deferred_to_main_loop = true;
 
-    job_txn_del_job(job);
+    job_txn_del_job_locked(job);
 
-    job_state_transition(job, JOB_STATUS_NULL);
-    job_unref(job);
+    job_state_transition_locked(job, JOB_STATUS_NULL);
+    job_unref_locked(job);
 }
 
-void job_dismiss(Job **jobptr, Error **errp)
+void job_dismiss_locked(Job **jobptr, Error **errp)
 {
     Job *job = *jobptr;
     /* similarly to _complete, this is QMP-interface only. */
     assert(job->id);
-    if (job_apply_verb(job, JOB_VERB_DISMISS, errp)) {
+    if (job_apply_verb_locked(job, JOB_VERB_DISMISS, errp)) {
         return;
     }
 
-    job_do_dismiss(job);
+    job_do_dismiss_locked(job);
     *jobptr = NULL;
 }
 
 void job_early_fail(Job *job)
 {
+    JOB_LOCK_GUARD();
     assert(job->status == JOB_STATUS_CREATED);
-    job_do_dismiss(job);
+    job_do_dismiss_locked(job);
 }
 
-static void job_conclude(Job *job)
+/* Called with job_mutex held. */
+static void job_conclude_locked(Job *job)
 {
-    job_state_transition(job, JOB_STATUS_CONCLUDED);
-    if (job->auto_dismiss || !job_started(job)) {
-        job_do_dismiss(job);
+    job_state_transition_locked(job, JOB_STATUS_CONCLUDED);
+    if (job->auto_dismiss || !job_started_locked(job)) {
+        job_do_dismiss_locked(job);
     }
 }
 
-static void job_update_rc(Job *job)
+/* Called with job_mutex held. */
+static void job_update_rc_locked(Job *job)
 {
-    if (!job->ret && job_is_cancelled(job)) {
+    if (!job->ret && job_is_cancelled_locked(job)) {
         job->ret = -ECANCELED;
     }
     if (job->ret) {
         if (!job->err) {
             error_setg(&job->err, "%s", strerror(-job->ret));
         }
-        job_state_transition(job, JOB_STATUS_ABORTING);
+        job_state_transition_locked(job, JOB_STATUS_ABORTING);
     }
 }
 
 static void job_commit(Job *job)
 {
     assert(!job->ret);
+    GLOBAL_STATE_CODE();
     if (job->driver->commit) {
         job->driver->commit(job);
     }
@@ -666,6 +824,7 @@ static void job_commit(Job *job)
 static void job_abort(Job *job)
 {
     assert(job->ret);
+    GLOBAL_STATE_CODE();
     if (job->driver->abort) {
         job->driver->abort(job);
     }
@@ -673,19 +832,31 @@ static void job_abort(Job *job)
 
 static void job_clean(Job *job)
 {
+    GLOBAL_STATE_CODE();
     if (job->driver->clean) {
         job->driver->clean(job);
     }
 }
 
-static int job_finalize_single(Job *job)
+/*
+ * Called with job_mutex held, but releases it temporarily.
+ * Takes AioContext lock internally to invoke a job->driver callback.
+ */
+static int job_finalize_single_locked(Job *job)
 {
-    assert(job_is_completed(job));
+    int job_ret;
+    AioContext *ctx = job->aio_context;
+
+    assert(job_is_completed_locked(job));
 
     /* Ensure abort is called for late-transactional failures */
-    job_update_rc(job);
+    job_update_rc_locked(job);
 
-    if (!job->ret) {
+    job_ret = job->ret;
+    job_unlock();
+    aio_context_acquire(ctx);
+
+    if (!job_ret) {
         job_commit(job);
     } else {
         job_abort(job);
@@ -693,43 +864,77 @@ static int job_finalize_single(Job *job)
     job_clean(job);
 
     if (job->cb) {
-        job->cb(job->opaque, job->ret);
+        job->cb(job->opaque, job_ret);
     }
 
+    aio_context_release(ctx);
+    job_lock();
+
     /* Emit events only if we actually started */
-    if (job_started(job)) {
-        if (job_is_cancelled(job)) {
-            job_event_cancelled(job);
+    if (job_started_locked(job)) {
+        if (job_is_cancelled_locked(job)) {
+            job_event_cancelled_locked(job);
         } else {
-            job_event_completed(job);
+            job_event_completed_locked(job);
         }
     }
 
-    job_txn_del_job(job);
-    job_conclude(job);
+    job_txn_del_job_locked(job);
+    job_conclude_locked(job);
     return 0;
 }
 
-static void job_cancel_async(Job *job, bool force)
+/*
+ * Called with job_mutex held, but releases it temporarily.
+ * Takes AioContext lock internally to invoke a job->driver callback.
+ */
+static void job_cancel_async_locked(Job *job, bool force)
 {
+    AioContext *ctx = job->aio_context;
+    GLOBAL_STATE_CODE();
+    if (job->driver->cancel) {
+        job_unlock();
+        aio_context_acquire(ctx);
+        force = job->driver->cancel(job, force);
+        aio_context_release(ctx);
+        job_lock();
+    } else {
+        /* No .cancel() means the job will behave as if force-cancelled */
+        force = true;
+    }
+
     if (job->user_paused) {
         /* Do not call job_enter here, the caller will handle it.  */
         if (job->driver->user_resume) {
+            job_unlock();
             job->driver->user_resume(job);
+            job_lock();
         }
         job->user_paused = false;
         assert(job->pause_count > 0);
         job->pause_count--;
     }
-    job->cancelled = true;
-    /* To prevent 'force == false' overriding a previous 'force == true' */
-    job->force_cancel |= force;
+
+    /*
+     * Ignore soft cancel requests after the job is already done
+     * (We will still invoke job->driver->cancel() above, but if the
+     * job driver supports soft cancelling and the job is done, that
+     * should be a no-op, too.  We still call it so it can override
+     * @force.)
+     */
+    if (force || !job->deferred_to_main_loop) {
+        job->cancelled = true;
+        /* To prevent 'force == false' overriding a previous 'force == true' */
+        job->force_cancel |= force;
+    }
 }
 
-static void job_completed_txn_abort(Job *job)
+/*
+ * Called with job_mutex held, but releases it temporarily.
+ * Takes AioContext lock internally to invoke a job->driver callback.
+ */
+static void job_completed_txn_abort_locked(Job *job)
 {
-    AioContext *outer_ctx = job->aio_context;
-    AioContext *ctx;
     JobTxn *txn = job->txn;
     Job *other_job;
 
@@ -740,159 +945,164 @@ static void job_completed_txn_abort(Job *job)
         return;
     }
     txn->aborting = true;
-    job_txn_ref(txn);
+    job_txn_ref_locked(txn);
 
-    /* We can only hold the single job's AioContext lock while calling
-     * job_finalize_single() because the finalization callbacks can involve
-     * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */
-    aio_context_release(outer_ctx);
+    job_ref_locked(job);
 
     /* Other jobs are effectively cancelled by us, set the status for
      * them; this job, however, may or may not be cancelled, depending
      * on the caller, so leave it. */
     QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
         if (other_job != job) {
-            ctx = other_job->aio_context;
-            aio_context_acquire(ctx);
-            job_cancel_async(other_job, false);
-            aio_context_release(ctx);
+            /*
+             * This is a transaction: If one job failed, no result will matter.
+             * Therefore, pass force=true to terminate all other jobs as quickly
+             * as possible.
+             */
+            job_cancel_async_locked(other_job, true);
         }
     }
     while (!QLIST_EMPTY(&txn->jobs)) {
         other_job = QLIST_FIRST(&txn->jobs);
-        ctx = other_job->aio_context;
-        aio_context_acquire(ctx);
-        if (!job_is_completed(other_job)) {
-            assert(job_is_cancelled(other_job));
-            job_finish_sync(other_job, NULL, NULL);
+        if (!job_is_completed_locked(other_job)) {
+            assert(job_cancel_requested_locked(other_job));
+            job_finish_sync_locked(other_job, NULL, NULL);
         }
-        job_finalize_single(other_job);
-        aio_context_release(ctx);
+        job_finalize_single_locked(other_job);
     }
 
-    aio_context_acquire(outer_ctx);
-
-    job_txn_unref(txn);
+    job_unref_locked(job);
+    job_txn_unref_locked(txn);
 }
 
-static int job_prepare(Job *job)
+/* Called with job_mutex held, but releases it temporarily */
+static int job_prepare_locked(Job *job)
 {
+    int ret;
+    AioContext *ctx = job->aio_context;
+
+    GLOBAL_STATE_CODE();
+
     if (job->ret == 0 && job->driver->prepare) {
-        job->ret = job->driver->prepare(job);
-        job_update_rc(job);
+        job_unlock();
+        aio_context_acquire(ctx);
+        ret = job->driver->prepare(job);
+        aio_context_release(ctx);
+        job_lock();
+        job->ret = ret;
+        job_update_rc_locked(job);
     }
+
     return job->ret;
 }
 
-static int job_needs_finalize(Job *job)
+/* Called with job_mutex held */
+static int job_needs_finalize_locked(Job *job)
 {
     return !job->auto_finalize;
 }
 
-static void job_do_finalize(Job *job)
+/* Called with job_mutex held */
+static void job_do_finalize_locked(Job *job)
 {
     int rc;
     assert(job && job->txn);
 
     /* prepare the transaction to complete */
-    rc = job_txn_apply(job, job_prepare);
+    rc = job_txn_apply_locked(job, job_prepare_locked);
     if (rc) {
-        job_completed_txn_abort(job);
+        job_completed_txn_abort_locked(job);
     } else {
-        job_txn_apply(job, job_finalize_single);
+        job_txn_apply_locked(job, job_finalize_single_locked);
     }
 }
 
-void job_finalize(Job *job, Error **errp)
+void job_finalize_locked(Job *job, Error **errp)
 {
     assert(job && job->id);
-    if (job_apply_verb(job, JOB_VERB_FINALIZE, errp)) {
+    if (job_apply_verb_locked(job, JOB_VERB_FINALIZE, errp)) {
         return;
     }
-    job_do_finalize(job);
+    job_do_finalize_locked(job);
 }
 
-static int job_transition_to_pending(Job *job)
+/* Called with job_mutex held. */
+static int job_transition_to_pending_locked(Job *job)
 {
-    job_state_transition(job, JOB_STATUS_PENDING);
+    job_state_transition_locked(job, JOB_STATUS_PENDING);
     if (!job->auto_finalize) {
-        job_event_pending(job);
+        job_event_pending_locked(job);
     }
     return 0;
 }
 
 void job_transition_to_ready(Job *job)
 {
-    job_state_transition(job, JOB_STATUS_READY);
-    job_event_ready(job);
+    JOB_LOCK_GUARD();
+    job_state_transition_locked(job, JOB_STATUS_READY);
+    job_event_ready_locked(job);
 }
 
-static void job_completed_txn_success(Job *job)
+/* Called with job_mutex held. */
+static void job_completed_txn_success_locked(Job *job)
 {
     JobTxn *txn = job->txn;
     Job *other_job;
 
-    job_state_transition(job, JOB_STATUS_WAITING);
+    job_state_transition_locked(job, JOB_STATUS_WAITING);
 
     /*
      * Successful completion, see if there are other running jobs in this
      * txn.
      */
     QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
-        if (!job_is_completed(other_job)) {
+        if (!job_is_completed_locked(other_job)) {
             return;
         }
         assert(other_job->ret == 0);
     }
 
-    job_txn_apply(job, job_transition_to_pending);
+    job_txn_apply_locked(job, job_transition_to_pending_locked);
 
     /* If no jobs need manual finalization, automatically do so */
-    if (job_txn_apply(job, job_needs_finalize) == 0) {
-        job_do_finalize(job);
+    if (job_txn_apply_locked(job, job_needs_finalize_locked) == 0) {
+        job_do_finalize_locked(job);
     }
 }
 
-static void job_completed(Job *job)
+/* Called with job_mutex held. */
+static void job_completed_locked(Job *job)
 {
-    assert(job && job->txn && !job_is_completed(job));
+    assert(job && job->txn && !job_is_completed_locked(job));
 
-    job_update_rc(job);
+    job_update_rc_locked(job);
     trace_job_completed(job, job->ret);
     if (job->ret) {
-        job_completed_txn_abort(job);
+        job_completed_txn_abort_locked(job);
     } else {
-        job_completed_txn_success(job);
+        job_completed_txn_success_locked(job);
     }
 }
 
-/** Useful only as a type shim for aio_bh_schedule_oneshot. */
+/**
+ * Useful only as a type shim for aio_bh_schedule_oneshot.
+ * Called with job_mutex *not* held.
+ */
 static void job_exit(void *opaque)
 {
     Job *job = (Job *)opaque;
-    AioContext *ctx;
-
-    job_ref(job);
-    aio_context_acquire(job->aio_context);
+    JOB_LOCK_GUARD();
+    job_ref_locked(job);
 
     /* This is a lie, we're not quiescent, but still doing the completion
      * callbacks. However, completion callbacks tend to involve operations that
      * drain block nodes, and if .drained_poll still returned true, we would
      * deadlock. */
     job->busy = false;
-    job_event_idle(job);
-
-    job_completed(job);
+    job_event_idle_locked(job);
 
-    /*
-     * Note that calling job_completed can move the job to a different
-     * aio_context, so we cannot cache from above. job_txn_apply takes care of
-     * acquiring the new lock, and we ref/unref to avoid job_completed freeing
-     * the job underneath us.
-     */
-    ctx = job->aio_context;
-    job_unref(job);
-    aio_context_release(ctx);
+    job_completed_locked(job);
+    job_unref_locked(job);
 }
 
 /**
@@ -902,118 +1112,169 @@ static void job_exit(void *opaque)
 static void coroutine_fn job_co_entry(void *opaque)
 {
     Job *job = opaque;
+    int ret;
 
     assert(job && job->driver && job->driver->run);
-    job_pause_point(job);
-    job->ret = job->driver->run(job, &job->err);
-    job->deferred_to_main_loop = true;
-    job->busy = true;
+    WITH_JOB_LOCK_GUARD() {
+        assert(job->aio_context == qemu_get_current_aio_context());
+        job_pause_point_locked(job);
+    }
+    ret = job->driver->run(job, &job->err);
+    WITH_JOB_LOCK_GUARD() {
+        job->ret = ret;
+        job->deferred_to_main_loop = true;
+        job->busy = true;
+    }
     aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
 }
 
 void job_start(Job *job)
 {
-    assert(job && !job_started(job) && job->paused &&
-           job->driver && job->driver->run);
-    job->co = qemu_coroutine_create(job_co_entry, job);
-    job->pause_count--;
-    job->busy = true;
-    job->paused = false;
-    job_state_transition(job, JOB_STATUS_RUNNING);
+    assert(qemu_in_main_thread());
+
+    WITH_JOB_LOCK_GUARD() {
+        assert(job && !job_started_locked(job) && job->paused &&
+            job->driver && job->driver->run);
+        job->co = qemu_coroutine_create(job_co_entry, job);
+        job->pause_count--;
+        job->busy = true;
+        job->paused = false;
+        job_state_transition_locked(job, JOB_STATUS_RUNNING);
+    }
     aio_co_enter(job->aio_context, job->co);
 }
 
-void job_cancel(Job *job, bool force)
+void job_cancel_locked(Job *job, bool force)
 {
     if (job->status == JOB_STATUS_CONCLUDED) {
-        job_do_dismiss(job);
+        job_do_dismiss_locked(job);
         return;
     }
-    job_cancel_async(job, force);
-    if (!job_started(job)) {
-        job_completed(job);
+    job_cancel_async_locked(job, force);
+    if (!job_started_locked(job)) {
+        job_completed_locked(job);
     } else if (job->deferred_to_main_loop) {
-        job_completed_txn_abort(job);
+        /*
+         * job_cancel_async() ignores soft-cancel requests for jobs
+         * that are already done (i.e. deferred to the main loop).  We
+         * have to check again whether the job is really cancelled.
+         * (job_cancel_requested() and job_is_cancelled() are equivalent
+         * here, because job_cancel_async() will make soft-cancel
+         * requests no-ops when deferred_to_main_loop is true.  We
+         * choose to call job_is_cancelled() to show that we invoke
+         * job_completed_txn_abort() only for force-cancelled jobs.)
+         */
+        if (job_is_cancelled_locked(job)) {
+            job_completed_txn_abort_locked(job);
+        }
     } else {
-        job_enter(job);
+        job_enter_cond_locked(job, NULL);
     }
 }
 
-void job_user_cancel(Job *job, bool force, Error **errp)
+void job_user_cancel_locked(Job *job, bool force, Error **errp)
 {
-    if (job_apply_verb(job, JOB_VERB_CANCEL, errp)) {
+    if (job_apply_verb_locked(job, JOB_VERB_CANCEL, errp)) {
         return;
     }
-    job_cancel(job, force);
+    job_cancel_locked(job, force);
 }
 
-/* A wrapper around job_cancel() taking an Error ** parameter so it may be
- * used with job_finish_sync() without the need for (rather nasty) function
- * pointer casts there. */
-static void job_cancel_err(Job *job, Error **errp)
+/* A wrapper around job_cancel_locked() taking an Error ** parameter so it may
+ * be used with job_finish_sync_locked() without the need for (rather nasty)
+ * function pointer casts there.
+ *
+ * Called with job_mutex held.
+ */
+static void job_cancel_err_locked(Job *job, Error **errp)
 {
-    job_cancel(job, false);
+    job_cancel_locked(job, false);
 }
 
-int job_cancel_sync(Job *job)
+/**
+ * Same as job_cancel_err(), but force-cancel.
+ * Called with job_mutex held.
+ */
+static void job_force_cancel_err_locked(Job *job, Error **errp)
 {
-    return job_finish_sync(job, &job_cancel_err, NULL);
+    job_cancel_locked(job, true);
+}
+
+int job_cancel_sync_locked(Job *job, bool force)
+{
+    if (force) {
+        return job_finish_sync_locked(job, &job_force_cancel_err_locked, NULL);
+    } else {
+        return job_finish_sync_locked(job, &job_cancel_err_locked, NULL);
+    }
+}
+
+int job_cancel_sync(Job *job, bool force)
+{
+    JOB_LOCK_GUARD();
+    return job_cancel_sync_locked(job, force);
 }
 
 void job_cancel_sync_all(void)
 {
     Job *job;
-    AioContext *aio_context;
+    JOB_LOCK_GUARD();
 
-    while ((job = job_next(NULL))) {
-        aio_context = job->aio_context;
-        aio_context_acquire(aio_context);
-        job_cancel_sync(job);
-        aio_context_release(aio_context);
+    while ((job = job_next_locked(NULL))) {
+        job_cancel_sync_locked(job, true);
     }
 }
 
-int job_complete_sync(Job *job, Error **errp)
+int job_complete_sync_locked(Job *job, Error **errp)
 {
-    return job_finish_sync(job, job_complete, errp);
+    return job_finish_sync_locked(job, job_complete_locked, errp);
 }
 
-void job_complete(Job *job, Error **errp)
+void job_complete_locked(Job *job, Error **errp)
 {
     /* Should not be reachable via external interface for internal jobs */
     assert(job->id);
-    if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) {
+    GLOBAL_STATE_CODE();
+    if (job_apply_verb_locked(job, JOB_VERB_COMPLETE, errp)) {
         return;
     }
-    if (job->pause_count || job_is_cancelled(job) || !job->driver->complete) {
+    if (job_cancel_requested_locked(job) || !job->driver->complete) {
         error_setg(errp, "The active block job '%s' cannot be completed",
                    job->id);
         return;
     }
 
+    job_unlock();
     job->driver->complete(job, errp);
+    job_lock();
 }
 
-int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
+int job_finish_sync_locked(Job *job,
+                           void (*finish)(Job *, Error **errp),
+                           Error **errp)
 {
     Error *local_err = NULL;
     int ret;
+    GLOBAL_STATE_CODE();
 
-    job_ref(job);
+    job_ref_locked(job);
 
     if (finish) {
         finish(job, &local_err);
     }
     if (local_err) {
         error_propagate(errp, local_err);
-        job_unref(job);
+        job_unref_locked(job);
         return -EBUSY;
     }
 
-    AIO_WAIT_WHILE(job->aio_context,
-                   (job_enter(job), !job_is_completed(job)));
+    job_unlock();
+    AIO_WAIT_WHILE_UNLOCKED(job->aio_context,
+                            (job_enter(job), !job_is_completed(job)));
+    job_lock();
 
-    ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret;
-    job_unref(job);
+    ret = (job_is_cancelled_locked(job) && job->ret == 0)
+          ? -ECANCELED : job->ret;
+    job_unref_locked(job);
     return ret;
 }
index fa5257c8389ead000d74f198f40cc201f7f74f3e..c12f4b3e402b5ef6f9366e9cdc07fad241420816 100644 (file)
@@ -1,22 +1,34 @@
-project('qemu', ['c'], meson_version: '>=0.55.0',
-        default_options: ['warning_level=1', 'c_std=gnu99', 'cpp_std=gnu++11', 'b_colorout=auto'] +
-                         (meson.version().version_compare('>=0.56.0') ? [ 'b_staticpic=false' ] : []),
-        version: run_command('head', meson.source_root() / 'VERSION').stdout().strip())
+project('qemu', ['c'], meson_version: '>=0.63.0',
+        default_options: ['warning_level=1', 'c_std=gnu11', 'cpp_std=gnu++11', 'b_colorout=auto',
+                          'b_staticpic=false', 'stdsplit=false', 'optimization=2', 'b_pie=true'],
+        version: files('VERSION'))
+
+add_test_setup('quick', exclude_suites: ['slow', 'thorough'], is_default: true)
+add_test_setup('slow', exclude_suites: ['thorough'], env: ['G_TEST_SLOW=1', 'SPEED=slow'])
+add_test_setup('thorough', env: ['G_TEST_SLOW=1', 'SPEED=thorough'])
+
+meson.add_postconf_script(find_program('scripts/symlink-install-tree.py'))
 
 not_found = dependency('', required: false)
-if meson.version().version_compare('>=0.56.0')
-  keyval = import('keyval')
-else
-  keyval = import('unstable-keyval')
-endif
+keyval = import('keyval')
 ss = import('sourceset')
 fs = import('fs')
 
+targetos = host_machine.system()
 sh = find_program('sh')
-cc = meson.get_compiler('c')
 config_host = keyval.load(meson.current_build_dir() / 'config-host.mak')
-enable_modules = 'CONFIG_MODULES' in config_host
-enable_static = 'CONFIG_STATIC' in config_host
+
+cc = meson.get_compiler('c')
+all_languages = ['c']
+if targetos == 'windows' and add_languages('cpp', required: false, native: false)
+  all_languages += ['cpp']
+  cxx = meson.get_compiler('cpp')
+endif
+if targetos == 'darwin' and \
+   add_languages('objc', required: get_option('cocoa'), native: false)
+  all_languages += ['objc']
+  objc = meson.get_compiler('objc')
+endif
 
 # Temporary directory used for files created while
 # configure runs. Since it is in the build directory
@@ -39,43 +51,463 @@ qemu_icondir = get_option('datadir') / 'icons'
 
 config_host_data = configuration_data()
 genh = []
+qapi_trace_events = []
+
+bsd_oses = ['gnu/kfreebsd', 'freebsd', 'netbsd', 'openbsd', 'dragonfly', 'darwin']
+supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux']
+supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv32', 'riscv64', 'x86', 'x86_64',
+  'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc64']
+
+cpu = host_machine.cpu_family()
 
-have_user = false
+target_dirs = config_host['TARGET_DIRS'].split()
+have_linux_user = false
+have_bsd_user = false
 have_system = false
-have_tools = 'CONFIG_TOOLS' in config_host
+foreach target : target_dirs
+  have_linux_user = have_linux_user or target.endswith('linux-user')
+  have_bsd_user = have_bsd_user or target.endswith('bsd-user')
+  have_system = have_system or target.endswith('-softmmu')
+endforeach
+have_user = have_linux_user or have_bsd_user
+have_tools = get_option('tools') \
+  .disable_auto_if(not have_system) \
+  .allowed()
+have_ga = get_option('guest_agent') \
+  .disable_auto_if(not have_system and not have_tools) \
+  .require(targetos in ['sunos', 'linux', 'windows', 'freebsd', 'netbsd', 'openbsd'],
+           error_message: 'unsupported OS for QEMU guest agent') \
+  .allowed()
+enable_modules = get_option('modules') \
+  .require(targetos != 'windows',
+           error_message: 'Modules are not available for Windows') \
+  .require(not get_option('prefer_static'),
+           error_message: 'Modules are incompatible with static linking') \
+  .allowed()
 have_block = have_system or have_tools
 
 python = import('python').find_installation()
 
-supported_oses = ['linux']
-supported_cpus = ['riscv64', 'x86', 'x86_64', 'arm', 'aarch64']
+if cpu not in supported_cpus
+  host_arch = 'unknown'
+elif cpu == 'x86'
+  host_arch = 'i386'
+elif cpu == 'mips64'
+  host_arch = 'mips'
+elif cpu in ['riscv32', 'riscv64']
+  host_arch = 'riscv'
+else
+  host_arch = cpu
+endif
+
+if cpu in ['x86', 'x86_64']
+  kvm_targets = ['i386-softmmu', 'x86_64-softmmu']
+elif cpu == 'aarch64'
+  kvm_targets = ['aarch64-softmmu']
+elif cpu == 's390x'
+  kvm_targets = ['s390x-softmmu']
+elif cpu in ['ppc', 'ppc64']
+  kvm_targets = ['ppc-softmmu', 'ppc64-softmmu']
+elif cpu in ['mips', 'mips64']
+  kvm_targets = ['mips-softmmu', 'mipsel-softmmu', 'mips64-softmmu', 'mips64el-softmmu']
+elif cpu in ['riscv32']
+  kvm_targets = ['riscv32-softmmu']
+elif cpu in ['riscv64']
+  kvm_targets = ['riscv64-softmmu']
+else
+  kvm_targets = []
+endif
 
-cpu = host_machine.cpu_family()
-targetos = host_machine.system()
+kvm_targets_c = '""'
+if get_option('kvm').allowed() and targetos == 'linux'
+  kvm_targets_c = '"' + '" ,"'.join(kvm_targets) + '"'
+endif
+config_host_data.set('CONFIG_KVM_TARGETS', kvm_targets_c)
+accelerator_targets = { 'CONFIG_KVM': kvm_targets }
+
+if cpu in ['x86', 'x86_64']
+  xen_targets = ['i386-softmmu', 'x86_64-softmmu']
+elif cpu in ['arm', 'aarch64']
+  # i386 emulator provides xenpv machine type for multiple architectures
+  xen_targets = ['i386-softmmu', 'x86_64-softmmu', 'aarch64-softmmu']
+else
+  xen_targets = []
+endif
+accelerator_targets += { 'CONFIG_XEN': xen_targets }
+
+if cpu in ['aarch64']
+  accelerator_targets += {
+    'CONFIG_HVF': ['aarch64-softmmu']
+  }
+endif
+
+if cpu in ['x86', 'x86_64']
+  accelerator_targets += {
+    'CONFIG_HVF': ['x86_64-softmmu'],
+    'CONFIG_NVMM': ['i386-softmmu', 'x86_64-softmmu'],
+    'CONFIG_WHPX': ['i386-softmmu', 'x86_64-softmmu'],
+  }
+endif
+
+modular_tcg = []
+# Darwin does not support references to thread-local variables in modules
+if targetos != 'darwin'
+  modular_tcg = ['i386-softmmu', 'x86_64-softmmu']
+endif
+
+edk2_targets = [ 'arm-softmmu', 'aarch64-softmmu', 'i386-softmmu', 'x86_64-softmmu' ]
+unpack_edk2_blobs = false
+foreach target : edk2_targets
+  if target in target_dirs
+    bzip2 = find_program('bzip2', required: get_option('install_blobs'))
+    unpack_edk2_blobs = bzip2.found()
+    break
+  endif
+endforeach
+
+dtrace = not_found
+stap = not_found
+if 'dtrace' in get_option('trace_backends')
+  dtrace = find_program('dtrace', required: true)
+  stap = find_program('stap', required: false)
+  if stap.found()
+    # Workaround to avoid dtrace(1) producing a file with 'hidden' symbol
+    # visibility. Define STAP_SDT_V2 to produce 'default' symbol visibility
+    # instead. QEMU --enable-modules depends on this because the SystemTap
+    # semaphores are linked into the main binary and not the module's shared
+    # object.
+    add_global_arguments('-DSTAP_SDT_V2',
+                         native: false, language: all_languages)
+  endif
+endif
+
+if get_option('iasl') == ''
+  iasl = find_program('iasl', required: false)
+else
+  iasl = find_program(get_option('iasl'), required: true)
+endif
 
 ##################
 # Compiler flags #
 ##################
 
-# Specify linker-script with add_project_link_arguments so that it is not placed
-# within a linker --start-group/--end-group pair
+foreach lang : all_languages
+  compiler = meson.get_compiler(lang)
+  if compiler.get_id() == 'gcc' and compiler.version().version_compare('>=7.4')
+    # ok
+  elif compiler.get_id() == 'clang' and compiler.compiles('''
+      #ifdef __apple_build_version__
+      # if __clang_major__ < 12 || (__clang_major__ == 12 && __clang_minor__ < 0)
+      #  error You need at least XCode Clang v12.0 to compile QEMU
+      # endif
+      #else
+      # if __clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 0)
+      #  error You need at least Clang v10.0 to compile QEMU
+      # endif
+      #endif''')
+    # ok
+  else
+    error('You either need GCC v7.4 or Clang v10.0 (or XCode Clang v12.0) to compile QEMU')
+  endif
+endforeach
+
+# default flags for all hosts
+# We use -fwrapv to tell the compiler that we require a C dialect where
+# left shift of signed integers is well defined and has the expected
+# 2s-complement style results. (Both clang and gcc agree that it
+# provides these semantics.)
+
+qemu_common_flags = [
+  '-D_GNU_SOURCE', '-D_FILE_OFFSET_BITS=64', '-D_LARGEFILE_SOURCE',
+  '-fno-strict-aliasing', '-fno-common', '-fwrapv' ]
+qemu_cflags = []
+qemu_ldflags = []
+
+if targetos == 'darwin'
+  # Disable attempts to use ObjectiveC features in os/object.h since they
+  # won't work when we're compiling with gcc as a C compiler.
+  if compiler.get_id() == 'gcc'
+    qemu_common_flags += '-DOS_OBJECT_USE_OBJC=0'
+  endif
+elif targetos == 'sunos'
+  # needed for CMSG_ macros in sys/socket.h
+  qemu_common_flags += '-D_XOPEN_SOURCE=600'
+  # needed for TIOCWIN* defines in termios.h
+  qemu_common_flags += '-D__EXTENSIONS__'
+elif targetos == 'haiku'
+  qemu_common_flags += ['-DB_USE_POSITIVE_POSIX_ERRORS', '-D_BSD_SOURCE', '-fPIC']
+endif
+
+# __sync_fetch_and_and requires at least -march=i486. Many toolchains
+# use i686 as default anyway, but for those that don't, an explicit
+# specification is necessary
+if host_arch == 'i386' and not cc.links('''
+  static int sfaa(int *ptr)
+  {
+    return __sync_fetch_and_and(ptr, 0);
+  }
+
+  int main(void)
+  {
+    int val = 42;
+    val = __sync_val_compare_and_swap(&val, 0, 1);
+    sfaa(&val);
+    return val;
+  }''')
+  qemu_common_flags = ['-march=i486'] + qemu_common_flags
+endif
+
+if get_option('prefer_static')
+  qemu_ldflags += get_option('b_pie') ? '-static-pie' : '-static'
+endif
+
+# Meson currently only handles pie as a boolean for now, so if the user
+# has explicitly disabled PIE we need to extend our cflags.
+#
+# -no-pie is supposedly a linker flag that has no effect on the compiler
+# command line, but some distros, that didn't quite know what they were
+# doing, made local changes to gcc's specs file that turned it into
+# a compiler command-line flag.
+#
+# What about linker flags?  For a static build, no PIE is implied by -static
+# which we added above (and if it's not because of the same specs patching,
+# there's nothing we can do: compilation will fail, report a bug to your
+# distro and do not use --disable-pie in the meanwhile).  For dynamic linking,
+# instead, we can't add -no-pie because it overrides -shared: the linker then
+# tries to build an executable instead of a shared library and fails.  So
+# don't add -no-pie anywhere and cross fingers. :(
+if not get_option('b_pie')
+  qemu_common_flags += cc.get_supported_arguments('-fno-pie', '-no-pie')
+endif
+
+if not get_option('stack_protector').disabled()
+  stack_protector_probe = '''
+    int main(int argc, char *argv[])
+    {
+      char arr[64], *p = arr, *c = argv[argc - 1];
+      while (*c) {
+          *p++ = *c++;
+      }
+      return 0;
+    }'''
+  have_stack_protector = false
+  foreach arg : ['-fstack-protector-strong', '-fstack-protector-all']
+    # We need to check both a compile and a link, since some compiler
+    # setups fail only on a .c->.o compile and some only at link time
+    if cc.compiles(stack_protector_probe, args: ['-Werror', arg]) and \
+       cc.links(stack_protector_probe, args: ['-Werror', arg])
+      have_stack_protector = true
+      qemu_cflags += arg
+      qemu_ldflags += arg
+      break
+    endif
+  endforeach
+  get_option('stack_protector') \
+    .require(have_stack_protector, error_message: 'Stack protector not supported')
+endif
+
+coroutine_backend = get_option('coroutine_backend')
+ucontext_probe = '''
+  #include <ucontext.h>
+  #ifdef __stub_makecontext
+  #error Ignoring glibc stub makecontext which will always fail
+  #endif
+  int main(void) { makecontext(0, 0, 0); return 0; }'''
+
+# On Windows the only valid backend is the Windows specific one.
+# For POSIX prefer ucontext, but it's not always possible. The fallback
+# is sigcontext.
+supported_backends = []
+if targetos == 'windows'
+  supported_backends += ['windows']
+else
+  if targetos != 'darwin' and cc.links(ucontext_probe)
+    supported_backends += ['ucontext']
+  endif
+  supported_backends += ['sigaltstack']
+endif
+
+if coroutine_backend == 'auto'
+  coroutine_backend = supported_backends[0]
+elif coroutine_backend not in supported_backends
+  error('"@0@" backend requested but not available.  Available backends: @1@' \
+        .format(coroutine_backend, ', '.join(supported_backends)))
+endif
+
+# Compiles if SafeStack *not* enabled
+safe_stack_probe = '''
+  int main(void)
+  {
+  #if defined(__has_feature)
+  #if __has_feature(safe_stack)
+  #error SafeStack Enabled
+  #endif
+  #endif
+      return 0;
+  }'''
+if get_option('safe_stack') != not cc.compiles(safe_stack_probe)
+  safe_stack_arg = get_option('safe_stack') ? '-fsanitize=safe-stack' : '-fno-sanitize=safe-stack'
+  if get_option('safe_stack') != not cc.compiles(safe_stack_probe, args: safe_stack_arg)
+    error(get_option('safe_stack') \
+          ? 'SafeStack not supported by your compiler' \
+          : 'Cannot disable SafeStack')
+  endif
+  qemu_cflags += safe_stack_arg
+  qemu_ldflags += safe_stack_arg
+endif
+if get_option('safe_stack') and coroutine_backend != 'ucontext'
+  error('SafeStack is only supported with the ucontext coroutine backend')
+endif
+
+if get_option('sanitizers')
+  if cc.has_argument('-fsanitize=address')
+    qemu_cflags = ['-fsanitize=address'] + qemu_cflags
+    qemu_ldflags = ['-fsanitize=address'] + qemu_ldflags
+  endif
+
+  # Detect static linking issue with ubsan - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84285
+  if cc.links('int main(int argc, char **argv) { return argc + 1; }',
+              args: [qemu_ldflags, '-fsanitize=undefined'])
+    qemu_cflags = ['-fsanitize=undefined'] + qemu_cflags
+    qemu_ldflags = ['-fsanitize=undefined'] + qemu_ldflags
+  endif
+endif
+
+# Thread sanitizer is, for now, much noisier than the other sanitizers;
+# keep it separate until that is not the case.
+if get_option('tsan')
+  if get_option('sanitizers')
+    error('TSAN is not supported with other sanitizers')
+  endif
+  if not cc.has_function('__tsan_create_fiber',
+                         args: '-fsanitize=thread',
+                         prefix: '#include <sanitizer/tsan_interface.h>')
+    error('Cannot enable TSAN due to missing fiber annotation interface')
+  endif
+  qemu_cflags = ['-fsanitize=thread'] + qemu_cflags
+  qemu_ldflags = ['-fsanitize=thread'] + qemu_ldflags
+endif
+
+# Detect support for PT_GNU_RELRO + DT_BIND_NOW.
+# The combination is known as "full relro", because .got.plt is read-only too.
+qemu_ldflags += cc.get_supported_link_arguments('-Wl,-z,relro', '-Wl,-z,now')
+
+if targetos == 'windows'
+  qemu_ldflags += cc.get_supported_link_arguments('-Wl,--no-seh', '-Wl,--nxcompat')
+  qemu_ldflags += cc.get_supported_link_arguments('-Wl,--dynamicbase', '-Wl,--high-entropy-va')
+endif
+
+# Exclude --warn-common with TSan to suppress warnings from the TSan libraries.
+if targetos != 'sunos' and not get_option('tsan')
+  qemu_ldflags += cc.get_supported_link_arguments('-Wl,--warn-common')
+endif
+
+if get_option('fuzzing')
+  # Specify a filter to only instrument code that is directly related to
+  # virtual-devices.
+  configure_file(output: 'instrumentation-filter',
+                 input: 'scripts/oss-fuzz/instrumentation-filter-template',
+                 copy: true)
+
+  if cc.compiles('int main () { return 0; }',
+                  name: '-fsanitize-coverage-allowlist=/dev/null',
+                 args: ['-fsanitize-coverage-allowlist=/dev/null',
+                        '-fsanitize-coverage=trace-pc'] )
+    qemu_common_flags += ['-fsanitize-coverage-allowlist=instrumentation-filter']
+  endif
+
+  if get_option('fuzzing_engine') == ''
+    # Add CFLAGS to tell clang to add fuzzer-related instrumentation to all the
+    # compiled code.  To build non-fuzzer binaries with --enable-fuzzing, link
+    # everything with fsanitize=fuzzer-no-link. Otherwise, the linker will be
+    # unable to bind the fuzzer-related callbacks added by instrumentation.
+    qemu_common_flags += ['-fsanitize=fuzzer-no-link']
+    qemu_ldflags += ['-fsanitize=fuzzer-no-link']
+    # For the actual fuzzer binaries, we need to link against the libfuzzer
+    # library. They need to be configurable, to support OSS-Fuzz
+    fuzz_exe_ldflags = ['-fsanitize=fuzzer']
+  else
+    # LIB_FUZZING_ENGINE was set; assume we are running on OSS-Fuzz, and
+    # the needed CFLAGS have already been provided
+    fuzz_exe_ldflags = get_option('fuzzing_engine').split()
+  endif
+endif
+
+add_global_arguments(qemu_common_flags, native: false, language: all_languages)
+add_global_link_arguments(qemu_ldflags, native: false, language: all_languages)
+
+# Collect warnings that we want to enable
+
+warn_flags = [
+  '-Wundef',
+  '-Wwrite-strings',
+  '-Wmissing-prototypes',
+  '-Wstrict-prototypes',
+  '-Wredundant-decls',
+  '-Wold-style-declaration',
+  '-Wold-style-definition',
+  '-Wtype-limits',
+  '-Wformat-security',
+  '-Wformat-y2k',
+  '-Winit-self',
+  '-Wignored-qualifiers',
+  '-Wempty-body',
+  '-Wnested-externs',
+  '-Wendif-labels',
+  '-Wexpansion-to-defined',
+  '-Wimplicit-fallthrough=2',
+  '-Wmissing-format-attribute',
+  '-Wno-initializer-overrides',
+  '-Wno-missing-include-dirs',
+  '-Wno-shift-negative-value',
+  '-Wno-string-plus-int',
+  '-Wno-typedef-redefinition',
+  '-Wno-tautological-type-limit-compare',
+  '-Wno-psabi',
+  '-Wno-gnu-variable-sized-type-not-at-end',
+  '-Wshadow=local',
+]
+
+if targetos != 'darwin'
+  warn_flags += ['-Wthread-safety']
+endif
+
+# Set up C++ compiler flags
+qemu_cxxflags = []
+if 'cpp' in all_languages
+  qemu_cxxflags = ['-D__STDC_LIMIT_MACROS', '-D__STDC_CONSTANT_MACROS', '-D__STDC_FORMAT_MACROS'] + qemu_cflags
+endif
 
-add_project_arguments(config_host['QEMU_CFLAGS'].split(),
-                      native: false, language: ['c', 'objc'])
-add_project_arguments(config_host['QEMU_CXXFLAGS'].split(),
-                      native: false, language: 'cpp')
-add_project_link_arguments(config_host['QEMU_LDFLAGS'].split(),
-                           native: false, language: ['c', 'cpp', 'objc'])
+add_project_arguments(qemu_cflags, native: false, language: 'c')
+add_project_arguments(cc.get_supported_arguments(warn_flags), native: false, language: 'c')
+if 'cpp' in all_languages
+  add_project_arguments(qemu_cxxflags, native: false, language: 'cpp')
+  add_project_arguments(cxx.get_supported_arguments(warn_flags), native: false, language: 'cpp')
+endif
+if 'objc' in all_languages
+  # Note sanitizer flags are not applied to Objective-C sources!
+  add_project_arguments(objc.get_supported_arguments(warn_flags), native: false, language: 'objc')
+endif
+if targetos == 'linux'
+  add_project_arguments('-isystem', meson.current_source_dir() / 'linux-headers',
+                        '-isystem', 'linux-headers',
+                        language: all_languages)
+endif
 
 add_project_arguments('-iquote', '.',
                       '-iquote', meson.current_source_dir(),
                       '-iquote', meson.current_source_dir() / 'include',
-                      language: ['c', 'cpp', 'objc'])
+                      language: all_languages)
 
-link_language = meson.get_external_property('link_language', 'cpp')
-if link_language == 'cpp'
-  add_languages('cpp', required: true, native: false)
+# If a host-specific include directory exists, list that first...
+host_include = meson.current_source_dir() / 'host/include/'
+if fs.is_dir(host_include / host_arch)
+  add_project_arguments('-iquote', host_include / host_arch,
+                        language: all_languages)
 endif
+# ... followed by the generic fallback.
+add_project_arguments('-iquote', host_include / 'generic',
+                      language: all_languages)
 
 sparse = find_program('cgcc', required: get_option('sparse'))
 if sparse.found()
@@ -90,158 +522,2623 @@ endif
 # Target-specific checks and dependencies #
 ###########################################
 
-if targetos != 'linux' and get_option('mpath').enabled()
-  error('Multipath is supported only on Linux')
+# Fuzzing
+if get_option('fuzzing') and get_option('fuzzing_engine') == '' and \
+    not cc.links('''
+          #include <stdint.h>
+          #include <sys/types.h>
+          int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+          int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { return 0; }
+        ''',
+        args: ['-Werror', '-fsanitize=fuzzer'])
+  error('Your compiler does not support -fsanitize=fuzzer')
+endif
+
+# Tracing backends
+if 'ftrace' in get_option('trace_backends') and targetos != 'linux'
+  error('ftrace is supported only on Linux')
+endif
+if 'syslog' in get_option('trace_backends') and not cc.compiles('''
+    #include <syslog.h>
+    int main(void) {
+        openlog("qemu", LOG_PID, LOG_DAEMON);
+        syslog(LOG_INFO, "configure");
+        return 0;
+    }''')
+  error('syslog is not supported on this system')
 endif
 
-m = cc.find_library('m', required: false)
+# Miscellaneous Linux-only features
+get_option('mpath') \
+  .require(targetos == 'linux', error_message: 'Multipath is supported only on Linux')
+
+multiprocess_allowed = get_option('multiprocess') \
+  .require(targetos == 'linux', error_message: 'Multiprocess QEMU is supported only on Linux') \
+  .allowed()
+
+vfio_user_server_allowed = get_option('vfio_user_server') \
+  .require(targetos == 'linux', error_message: 'vfio-user server is supported only on Linux') \
+  .allowed()
+
+have_tpm = get_option('tpm') \
+  .require(targetos != 'windows', error_message: 'TPM emulation only available on POSIX systems') \
+  .allowed()
+
+# vhost
+have_vhost_user = get_option('vhost_user') \
+  .disable_auto_if(targetos != 'linux') \
+  .require(targetos != 'windows',
+           error_message: 'vhost-user is not available on Windows').allowed()
+have_vhost_vdpa = get_option('vhost_vdpa') \
+  .require(targetos == 'linux',
+           error_message: 'vhost-vdpa is only available on Linux').allowed()
+have_vhost_kernel = get_option('vhost_kernel') \
+  .require(targetos == 'linux',
+           error_message: 'vhost-kernel is only available on Linux').allowed()
+have_vhost_user_crypto = get_option('vhost_crypto') \
+  .require(have_vhost_user,
+           error_message: 'vhost-crypto requires vhost-user to be enabled').allowed()
+
+have_vhost = have_vhost_user or have_vhost_vdpa or have_vhost_kernel
+
+have_vhost_net_user = have_vhost_user and get_option('vhost_net').allowed()
+have_vhost_net_vdpa = have_vhost_vdpa and get_option('vhost_net').allowed()
+have_vhost_net_kernel = have_vhost_kernel and get_option('vhost_net').allowed()
+have_vhost_net = have_vhost_net_kernel or have_vhost_net_user or have_vhost_net_vdpa
+
+# Target-specific libraries and flags
+libm = cc.find_library('m', required: false)
+threads = dependency('threads')
 util = cc.find_library('util', required: false)
 winmm = []
+socket = []
+version_res = []
 coref = []
 iokit = []
+emulator_link_args = []
+nvmm =not_found
+hvf = not_found
+midl = not_found
+widl = not_found
+pathcch = not_found
+host_dsosuf = '.so'
+if targetos == 'windows'
+  midl = find_program('midl', required: false)
+  widl = find_program('widl', required: false)
+  pathcch = cc.find_library('pathcch')
+  socket = cc.find_library('ws2_32')
+  winmm = cc.find_library('winmm')
+
+  win = import('windows')
+  version_res = win.compile_resources('version.rc',
+                                      depend_files: files('pc-bios/qemu-nsis.ico'),
+                                      include_directories: include_directories('.'))
+  host_dsosuf = '.dll'
+elif targetos == 'darwin'
+  coref = dependency('appleframeworks', modules: 'CoreFoundation')
+  iokit = dependency('appleframeworks', modules: 'IOKit', required: false)
+  host_dsosuf = '.dylib'
+elif targetos == 'sunos'
+  socket = [cc.find_library('socket'),
+            cc.find_library('nsl'),
+            cc.find_library('resolv')]
+elif targetos == 'haiku'
+  socket = [cc.find_library('posix_error_mapper'),
+            cc.find_library('network'),
+            cc.find_library('bsd')]
+elif targetos == 'openbsd'
+  if get_option('tcg').allowed() and target_dirs.length() > 0
+    # Disable OpenBSD W^X if available
+    emulator_link_args = cc.get_supported_link_arguments('-Wl,-z,wxneeded')
+  endif
+endif
+
+# Target-specific configuration of accelerators
+accelerators = []
+if get_option('kvm').allowed() and targetos == 'linux'
+  accelerators += 'CONFIG_KVM'
+endif
+if get_option('whpx').allowed() and targetos == 'windows'
+  if get_option('whpx').enabled() and host_machine.cpu() != 'x86_64'
+    error('WHPX requires 64-bit host')
+  elif cc.has_header('winhvplatform.h', required: get_option('whpx')) and \
+       cc.has_header('winhvemulation.h', required: get_option('whpx'))
+    accelerators += 'CONFIG_WHPX'
+  endif
+endif
+if get_option('hvf').allowed()
+  hvf = dependency('appleframeworks', modules: 'Hypervisor',
+                   required: get_option('hvf'))
+  if hvf.found()
+    accelerators += 'CONFIG_HVF'
+  endif
+endif
+if targetos == 'netbsd'
+  nvmm = cc.find_library('nvmm', required: get_option('nvmm'))
+  if nvmm.found()
+    accelerators += 'CONFIG_NVMM'
+  endif
+endif
+
+tcg_arch = host_arch
+if get_option('tcg').allowed()
+  if host_arch == 'unknown'
+    if not get_option('tcg_interpreter')
+      error('Unsupported CPU @0@, try --enable-tcg-interpreter'.format(cpu))
+    endif
+  elif get_option('tcg_interpreter')
+    warning('Use of the TCG interpreter is not recommended on this host')
+    warning('architecture. There is a native TCG execution backend available')
+    warning('which provides substantially better performance and reliability.')
+    warning('It is strongly recommended to remove the --enable-tcg-interpreter')
+    warning('configuration option on this architecture to use the native')
+    warning('backend.')
+  endif
+  if get_option('tcg_interpreter')
+    tcg_arch = 'tci'
+  elif host_arch == 'x86_64'
+    tcg_arch = 'i386'
+  elif host_arch == 'ppc64'
+    tcg_arch = 'ppc'
+  endif
+  add_project_arguments('-iquote', meson.current_source_dir() / 'tcg' / tcg_arch,
+                        language: all_languages)
+
+  accelerators += 'CONFIG_TCG'
+endif
+
+if 'CONFIG_KVM' not in accelerators and get_option('kvm').enabled()
+  error('KVM not available on this platform')
+endif
+if 'CONFIG_HVF' not in accelerators and get_option('hvf').enabled()
+  error('HVF not available on this platform')
+endif
+if 'CONFIG_NVMM' not in accelerators and get_option('nvmm').enabled()
+  error('NVMM not available on this platform')
+endif
+if 'CONFIG_WHPX' not in accelerators and get_option('whpx').enabled()
+  error('WHPX not available on this platform')
+endif
 
 ################
 # Dependencies #
 ################
 
-# The path to glib.h is added to all compilation commands.  This was
-# grandfathered in from the QEMU Makefiles.
-add_project_arguments(config_host['GLIB_CFLAGS'].split(),
-                      native: false, language: ['c', 'cpp', 'objc'])
-glib = declare_dependency(link_args: config_host['GLIB_LIBS'].split())
+# When bumping glib minimum version, please check also whether to increase
+# the _WIN32_WINNT setting in osdep.h according to the value from glib
+glib_req_ver = '>=2.56.0'
+glib_pc = dependency('glib-2.0', version: glib_req_ver, required: true,
+                    method: 'pkg-config')
+glib_cflags = []
+if enable_modules
+  gmodule = dependency('gmodule-export-2.0', version: glib_req_ver, required: true,
+                       method: 'pkg-config')
+elif get_option('plugins')
+  gmodule = dependency('gmodule-no-export-2.0', version: glib_req_ver, required: true,
+                       method: 'pkg-config')
+else
+  gmodule = not_found
+endif
 
-# pass down whether Glib has the slice allocator
-if config_host.has_key('HAVE_GLIB_WITH_SLICE_ALLOCATOR')
-  config_host_data.set('HAVE_GLIB_WITH_SLICE_ALLOCATOR', true)
+# This workaround is required due to a bug in pkg-config file for glib as it
+# doesn't define GLIB_STATIC_COMPILATION for pkg-config --static
+if targetos == 'windows' and get_option('prefer_static')
+  glib_cflags += ['-DGLIB_STATIC_COMPILATION']
 endif
 
-pam = not_found
-if 'CONFIG_AUTH_PAM' in config_host
-  pam = cc.find_library('pam')
+# Sanity check that the current size_t matches the
+# size that glib thinks it should be. This catches
+# problems on multi-arch where people try to build
+# 32-bit QEMU while pointing at 64-bit glib headers
+
+if not cc.compiles('''
+  #include <glib.h>
+  #include <unistd.h>
+
+  #define QEMU_BUILD_BUG_ON(x) \
+  typedef char qemu_build_bug_on[(x)?-1:1] __attribute__((unused));
+
+  int main(void) {
+     QEMU_BUILD_BUG_ON(sizeof(size_t) != GLIB_SIZEOF_SIZE_T);
+     return 0;
+  }''', dependencies: glib_pc, args: glib_cflags)
+  error('''sizeof(size_t) doesn't match GLIB_SIZEOF_SIZE_T.
+        You probably need to set PKG_CONFIG_LIBDIR" to point
+        to the right pkg-config files for your build target.''')
 endif
-libaio = cc.find_library('aio', required: false)
-zlib = dependency('zlib', required: true, static: enable_static)
-linux_io_uring = not_found
-if 'CONFIG_LINUX_IO_URING' in config_host
-  linux_io_uring = declare_dependency(compile_args: config_host['LINUX_IO_URING_CFLAGS'].split(),
-                                      link_args: config_host['LINUX_IO_URING_LIBS'].split())
+
+# Silence clang warnings triggered by glib < 2.57.2
+if not cc.compiles('''
+  #include <glib.h>
+  typedef struct Foo {
+    int i;
+  } Foo;
+  static void foo_free(Foo *f)
+  {
+    g_free(f);
+  }
+  G_DEFINE_AUTOPTR_CLEANUP_FUNC(Foo, foo_free)
+  int main(void) { return 0; }''', dependencies: glib_pc, args: ['-Wunused-function', '-Werror'])
+  glib_cflags += cc.get_supported_arguments('-Wno-unused-function')
+endif
+glib = declare_dependency(dependencies: [glib_pc, gmodule],
+                          compile_args: glib_cflags,
+                          version: glib_pc.version())
+
+# Check whether glib has gslice, which we have to avoid for correctness.
+# TODO: remove this check and the corresponding workaround (qtree) when
+# the minimum supported glib is >= 2.75.3
+glib_has_gslice = glib.version().version_compare('<2.75.3')
+
+# override glib dep to include the above refinements
+meson.override_dependency('glib-2.0', glib)
+
+# The path to glib.h is added to all compilation commands.
+add_project_dependencies(glib.partial_dependency(compile_args: true, includes: true),
+                         native: false, language: all_languages)
+
+gio = not_found
+gdbus_codegen = not_found
+gdbus_codegen_error = '@0@ requires gdbus-codegen, please install libgio'
+if not get_option('gio').auto() or have_system
+  gio = dependency('gio-2.0', required: get_option('gio'),
+                   method: 'pkg-config')
+  if gio.found() and not cc.links('''
+    #include <gio/gio.h>
+    int main(void)
+    {
+      g_dbus_proxy_new_sync(0, 0, 0, 0, 0, 0, 0, 0);
+      return 0;
+    }''', dependencies: [glib, gio])
+    if get_option('gio').enabled()
+      error('The installed libgio is broken for static linking')
+    endif
+    gio = not_found
+  endif
+  if gio.found()
+    gdbus_codegen = find_program(gio.get_variable('gdbus_codegen'),
+                                 required: get_option('gio'))
+    gio_unix = dependency('gio-unix-2.0', required: get_option('gio'),
+                          method: 'pkg-config')
+    gio = declare_dependency(dependencies: [gio, gio_unix],
+                             version: gio.version())
+  endif
+endif
+if gdbus_codegen.found() and get_option('cfi')
+  gdbus_codegen = not_found
+  gdbus_codegen_error = '@0@ uses gdbus-codegen, which does not support control flow integrity'
+endif
+
+xml_pp = find_program('scripts/xml-preprocess.py')
+
+lttng = not_found
+if 'ust' in get_option('trace_backends')
+  lttng = dependency('lttng-ust', required: true, version: '>= 2.1',
+                     method: 'pkg-config')
+endif
+pixman = not_found
+if not get_option('pixman').auto() or have_system or have_tools
+  pixman = dependency('pixman-1', required: get_option('pixman'), version:'>=0.21.8',
+                      method: 'pkg-config')
+endif
+
+zlib = dependency('zlib', required: true)
+
+libaio = not_found
+if not get_option('linux_aio').auto() or have_block
+  libaio = cc.find_library('aio', has_headers: ['libaio.h'],
+                           required: get_option('linux_aio'))
 endif
-libxml2 = not_found
-if 'CONFIG_LIBXML2' in config_host
-  libxml2 = declare_dependency(compile_args: config_host['LIBXML2_CFLAGS'].split(),
-                               link_args: config_host['LIBXML2_LIBS'].split())
+
+linux_io_uring_test = '''
+  #include <liburing.h>
+  #include <linux/errqueue.h>
+
+  int main(void) { return 0; }'''
+
+linux_io_uring = not_found
+if not get_option('linux_io_uring').auto() or have_block
+  linux_io_uring = dependency('liburing', version: '>=0.3',
+                              required: get_option('linux_io_uring'),
+                              method: 'pkg-config')
+  if not cc.links(linux_io_uring_test)
+    linux_io_uring = not_found
+  endif
 endif
+
 libnfs = not_found
-if 'CONFIG_LIBNFS' in config_host
-  libnfs = declare_dependency(link_args: config_host['LIBNFS_LIBS'].split())
+if not get_option('libnfs').auto() or have_block
+  libnfs = dependency('libnfs', version: '>=1.9.3',
+                      required: get_option('libnfs'),
+                      method: 'pkg-config')
+endif
+
+libattr_test = '''
+  #include <stddef.h>
+  #include <sys/types.h>
+  #ifdef CONFIG_LIBATTR
+  #include <attr/xattr.h>
+  #else
+  #include <sys/xattr.h>
+  #endif
+  int main(void) { getxattr(NULL, NULL, NULL, 0); setxattr(NULL, NULL, NULL, 0, 0); return 0; }'''
+
+libattr = not_found
+have_old_libattr = false
+if get_option('attr').allowed()
+  if cc.links(libattr_test)
+    libattr = declare_dependency()
+  else
+    libattr = cc.find_library('attr', has_headers: ['attr/xattr.h'],
+                              required: get_option('attr'))
+    if libattr.found() and not \
+      cc.links(libattr_test, dependencies: libattr, args: '-DCONFIG_LIBATTR')
+      libattr = not_found
+      if get_option('attr').enabled()
+        error('could not link libattr')
+      else
+        warning('could not link libattr, disabling')
+      endif
+    else
+      have_old_libattr = libattr.found()
+    endif
+  endif
+endif
+
+cocoa = dependency('appleframeworks', modules: ['Cocoa', 'CoreVideo'],
+                   required: get_option('cocoa'))
+
+vmnet = dependency('appleframeworks', modules: 'vmnet', required: get_option('vmnet'))
+if vmnet.found() and not cc.has_header_symbol('vmnet/vmnet.h',
+                                              'VMNET_BRIDGED_MODE',
+                                              dependencies: vmnet)
+  vmnet = not_found
+  if get_option('vmnet').enabled()
+    error('vmnet.framework API is outdated')
+  else
+    warning('vmnet.framework API is outdated, disabling')
+  endif
+endif
+
+seccomp = not_found
+seccomp_has_sysrawrc = false
+if not get_option('seccomp').auto() or have_system or have_tools
+  seccomp = dependency('libseccomp', version: '>=2.3.0',
+                       required: get_option('seccomp'),
+                       method: 'pkg-config')
+  if seccomp.found()
+    seccomp_has_sysrawrc = cc.has_header_symbol('seccomp.h',
+                                                'SCMP_FLTATR_API_SYSRAWRC',
+                                                dependencies: seccomp)
+  endif
+endif
+
+libcap_ng = not_found
+if not get_option('cap_ng').auto() or have_system or have_tools
+  libcap_ng = cc.find_library('cap-ng', has_headers: ['cap-ng.h'],
+                              required: get_option('cap_ng'))
+endif
+if libcap_ng.found() and not cc.links('''
+   #include <cap-ng.h>
+   int main(void)
+   {
+     capng_capability_to_name(CAPNG_EFFECTIVE);
+     return 0;
+   }''', dependencies: libcap_ng)
+  libcap_ng = not_found
+  if get_option('cap_ng').enabled()
+    error('could not link libcap-ng')
+  else
+    warning('could not link libcap-ng, disabling')
+  endif
+endif
+
+if get_option('xkbcommon').auto() and not have_system and not have_tools
+  xkbcommon = not_found
+else
+  xkbcommon = dependency('xkbcommon', required: get_option('xkbcommon'),
+                         method: 'pkg-config')
+endif
+
+slirp = not_found
+if not get_option('slirp').auto() or have_system
+  slirp = dependency('slirp', required: get_option('slirp'),
+                     method: 'pkg-config')
+  # slirp < 4.7 is incompatible with CFI support in QEMU.  This is because
+  # it passes function pointers within libslirp as callbacks for timers.
+  # When using a system-wide shared libslirp, the type information for the
+  # callback is missing and the timer call produces a false positive with CFI.
+  # Do not use the "version" keyword argument to produce a better error.
+  # with control-flow integrity.
+  if get_option('cfi') and slirp.found() and slirp.version().version_compare('<4.7')
+    if get_option('slirp').enabled()
+      error('Control-Flow Integrity requires libslirp 4.7.')
+    else
+      warning('Cannot use libslirp since Control-Flow Integrity requires libslirp >= 4.7.')
+      slirp = not_found
+    endif
+  endif
+endif
+
+vde = not_found
+if not get_option('vde').auto() or have_system or have_tools
+  vde = cc.find_library('vdeplug', has_headers: ['libvdeplug.h'],
+                           required: get_option('vde'))
+endif
+if vde.found() and not cc.links('''
+   #include <libvdeplug.h>
+   int main(void)
+   {
+     struct vde_open_args a = {0, 0, 0};
+     char s[] = "";
+     vde_open(s, s, &a);
+     return 0;
+   }''', dependencies: vde)
+  vde = not_found
+  if get_option('cap_ng').enabled()
+    error('could not link libvdeplug')
+  else
+    warning('could not link libvdeplug, disabling')
+  endif
+endif
+
+pulse = not_found
+if not get_option('pa').auto() or (targetos == 'linux' and have_system)
+  pulse = dependency('libpulse', required: get_option('pa'),
+                     method: 'pkg-config')
+endif
+alsa = not_found
+if not get_option('alsa').auto() or (targetos == 'linux' and have_system)
+  alsa = dependency('alsa', required: get_option('alsa'),
+                    method: 'pkg-config')
+endif
+jack = not_found
+if not get_option('jack').auto() or have_system
+  jack = dependency('jack', required: get_option('jack'),
+                    method: 'pkg-config')
+endif
+pipewire = not_found
+if not get_option('pipewire').auto() or (targetos == 'linux' and have_system)
+  pipewire = dependency('libpipewire-0.3', version: '>=0.3.60',
+                    required: get_option('pipewire'),
+                    method: 'pkg-config')
+endif
+sndio = not_found
+if not get_option('sndio').auto() or have_system
+  sndio = dependency('sndio', required: get_option('sndio'),
+                    method: 'pkg-config')
+endif
+
+spice_protocol = not_found
+if not get_option('spice_protocol').auto() or have_system
+  spice_protocol = dependency('spice-protocol', version: '>=0.14.0',
+                              required: get_option('spice_protocol'),
+                              method: 'pkg-config')
 endif
 spice = not_found
-spice_headers = not_found
-if 'CONFIG_SPICE' in config_host
-  spice = declare_dependency(compile_args: config_host['SPICE_CFLAGS'].split(),
-                             link_args: config_host['SPICE_LIBS'].split())
-  spice_headers = declare_dependency(compile_args: config_host['SPICE_CFLAGS'].split())
+if get_option('spice') \
+             .disable_auto_if(not have_system) \
+             .require(pixman.found(),
+                      error_message: 'cannot enable SPICE if pixman is not available') \
+             .allowed()
+  spice = dependency('spice-server', version: '>=0.14.0',
+                     required: get_option('spice'),
+                     method: 'pkg-config')
 endif
+spice_headers = spice.partial_dependency(compile_args: true, includes: true)
+
+rt = cc.find_library('rt', required: false)
+
 libiscsi = not_found
-if 'CONFIG_LIBISCSI' in config_host
-  libiscsi = declare_dependency(compile_args: config_host['LIBISCSI_CFLAGS'].split(),
-                                link_args: config_host['LIBISCSI_LIBS'].split())
+if not get_option('libiscsi').auto() or have_block
+  libiscsi = dependency('libiscsi', version: '>=1.9.0',
+                         required: get_option('libiscsi'),
+                         method: 'pkg-config')
 endif
 zstd = not_found
-if 'CONFIG_ZSTD' in config_host
-  zstd = declare_dependency(compile_args: config_host['ZSTD_CFLAGS'].split(),
-                            link_args: config_host['ZSTD_LIBS'].split())
+if not get_option('zstd').auto() or have_block
+  zstd = dependency('libzstd', version: '>=1.4.0',
+                    required: get_option('zstd'),
+                    method: 'pkg-config')
 endif
-curl = not_found
-if 'CONFIG_CURL' in config_host
-  curl = declare_dependency(compile_args: config_host['CURL_CFLAGS'].split(),
-                            link_args: config_host['CURL_LIBS'].split())
+virgl = not_found
+
+have_vhost_user_gpu = have_tools and targetos == 'linux' and pixman.found()
+if not get_option('virglrenderer').auto() or have_system or have_vhost_user_gpu
+  virgl = dependency('virglrenderer',
+                     method: 'pkg-config',
+                     required: get_option('virglrenderer'))
+  if virgl.found()
+    config_host_data.set('HAVE_VIRGL_D3D_INFO_EXT',
+                         cc.has_member('struct virgl_renderer_resource_info_ext', 'd3d_tex2d',
+                                       prefix: '#include <virglrenderer.h>',
+                                       dependencies: virgl))
+  endif
 endif
-rbd = not_found
-if 'CONFIG_RBD' in config_host
-  rbd = declare_dependency(link_args: config_host['RBD_LIBS'].split())
+rutabaga = not_found
+if not get_option('rutabaga_gfx').auto() or have_system or have_vhost_user_gpu
+  rutabaga = dependency('rutabaga_gfx_ffi',
+                         method: 'pkg-config',
+                         required: get_option('rutabaga_gfx'))
 endif
-glusterfs = not_found
-if 'CONFIG_GLUSTERFS' in config_host
-  glusterfs = declare_dependency(compile_args: config_host['GLUSTERFS_CFLAGS'].split(),
-                                 link_args: config_host['GLUSTERFS_LIBS'].split())
+blkio = not_found
+if not get_option('blkio').auto() or have_block
+  blkio = dependency('blkio',
+                     method: 'pkg-config',
+                     required: get_option('blkio'))
 endif
-libssh = not_found
-if 'CONFIG_LIBSSH' in config_host
-  libssh = declare_dependency(compile_args: config_host['LIBSSH_CFLAGS'].split(),
-                              link_args: config_host['LIBSSH_LIBS'].split())
+curl = not_found
+if not get_option('curl').auto() or have_block
+  curl = dependency('libcurl', version: '>=7.29.0',
+                    method: 'pkg-config',
+                    required: get_option('curl'))
+endif
+libudev = not_found
+if targetos == 'linux' and (have_system or have_tools)
+  libudev = dependency('libudev',
+                       method: 'pkg-config',
+                       required: get_option('libudev'))
 endif
 
-have_vhost_user_blk_server = (targetos == 'linux' and
-    'CONFIG_VHOST_USER' in config_host)
-
-if get_option('vhost_user_blk_server').enabled()
-    if targetos != 'linux'
-        error('vhost_user_blk_server requires linux')
-    elif 'CONFIG_VHOST_USER' not in config_host
-        error('vhost_user_blk_server requires vhost-user support')
+mpathlibs = [libudev]
+mpathpersist = not_found
+if targetos == 'linux' and have_tools and get_option('mpath').allowed()
+  mpath_test_source = '''
+    #include <libudev.h>
+    #include <mpath_persist.h>
+    unsigned mpath_mx_alloc_len = 1024;
+    int logsink;
+    static struct config *multipath_conf;
+    extern struct udev *udev;
+    extern struct config *get_multipath_config(void);
+    extern void put_multipath_config(struct config *conf);
+    struct udev *udev;
+    struct config *get_multipath_config(void) { return multipath_conf; }
+    void put_multipath_config(struct config *conf) { }
+    int main(void) {
+        udev = udev_new();
+        multipath_conf = mpath_lib_init();
+        return 0;
+    }'''
+  libmpathpersist = cc.find_library('mpathpersist',
+                                    required: get_option('mpath'))
+  if libmpathpersist.found()
+    mpathlibs += libmpathpersist
+    if get_option('prefer_static')
+      mpathlibs += cc.find_library('devmapper',
+                                     required: get_option('mpath'))
+    endif
+    mpathlibs += cc.find_library('multipath',
+                                 required: get_option('mpath'))
+    foreach lib: mpathlibs
+      if not lib.found()
+        mpathlibs = []
+        break
+      endif
+    endforeach
+    if mpathlibs.length() == 0
+      msg = 'Dependencies missing for libmpathpersist'
+    elif cc.links(mpath_test_source, dependencies: mpathlibs)
+      mpathpersist = declare_dependency(dependencies: mpathlibs)
+    else
+      msg = 'Cannot detect libmpathpersist API'
     endif
-elif get_option('vhost_user_blk_server').disabled() or not have_system
-    have_vhost_user_blk_server = false
+    if not mpathpersist.found()
+      if get_option('mpath').enabled()
+        error(msg)
+      else
+        warning(msg + ', disabling')
+      endif
+    endif
+  endif
 endif
 
-#################
-# config-host.h #
-#################
-
-config_host_data.set_quoted('CONFIG_BINDIR', get_option('prefix') / get_option('bindir'))
-config_host_data.set_quoted('CONFIG_PREFIX', get_option('prefix'))
-config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', get_option('prefix') / qemu_confdir)
-config_host_data.set_quoted('CONFIG_QEMU_DATADIR', get_option('prefix') / qemu_datadir)
-config_host_data.set_quoted('CONFIG_QEMU_DESKTOPDIR', get_option('prefix') / qemu_desktopdir)
-config_host_data.set_quoted('CONFIG_QEMU_FIRMWAREPATH', get_option('qemu_firmwarepath'))
-config_host_data.set_quoted('CONFIG_QEMU_HELPERDIR', get_option('prefix') / get_option('libexecdir'))
-config_host_data.set_quoted('CONFIG_QEMU_ICONDIR', get_option('prefix') / qemu_icondir)
-config_host_data.set_quoted('CONFIG_QEMU_LOCALEDIR', get_option('prefix') / get_option('localedir'))
-config_host_data.set_quoted('CONFIG_QEMU_LOCALSTATEDIR', get_option('prefix') / get_option('localstatedir'))
-config_host_data.set_quoted('CONFIG_QEMU_MODDIR', get_option('prefix') / qemu_moddir)
-config_host_data.set_quoted('CONFIG_SYSCONFDIR', get_option('prefix') / get_option('sysconfdir'))
+iconv = not_found
+curses = not_found
+if have_system and get_option('curses').allowed()
+  curses_test = '''
+    #if defined(__APPLE__) || defined(__OpenBSD__)
+    #define _XOPEN_SOURCE_EXTENDED 1
+    #endif
+    #include <locale.h>
+    #include <curses.h>
+    #include <wchar.h>
+    int main(void) {
+      wchar_t wch = L'w';
+      setlocale(LC_ALL, "");
+      resize_term(0, 0);
+      addwstr(L"wide chars\n");
+      addnwstr(&wch, 1);
+      add_wch(WACS_DEGREE);
+      return 0;
+    }'''
 
-config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version()))
-config_host_data.set('QEMU_VERSION_MAJOR', meson.project_version().split('.')[0])
-config_host_data.set('QEMU_VERSION_MINOR', meson.project_version().split('.')[1])
-config_host_data.set('QEMU_VERSION_MICRO', meson.project_version().split('.')[2])
+  curses_dep_list = targetos == 'windows' ? ['ncurses', 'ncursesw'] : ['ncursesw']
+  curses = dependency(curses_dep_list,
+                      required: false,
+                      method: 'pkg-config')
+  msg = get_option('curses').enabled() ? 'curses library not found' : ''
+  curses_compile_args = ['-DNCURSES_WIDECHAR=1']
+  if curses.found()
+    if cc.links(curses_test, args: curses_compile_args, dependencies: [curses])
+      curses = declare_dependency(compile_args: curses_compile_args, dependencies: [curses],
+                                  version: curses.version())
+    else
+      msg = 'curses package not usable'
+      curses = not_found
+    endif
+  endif
+  if not curses.found()
+    has_curses_h = cc.has_header('curses.h', args: curses_compile_args)
+    if targetos != 'windows' and not has_curses_h
+      message('Trying with /usr/include/ncursesw')
+      curses_compile_args += ['-I/usr/include/ncursesw']
+      has_curses_h = cc.has_header('curses.h', args: curses_compile_args)
+    endif
+    if has_curses_h
+      curses_libname_list = (targetos == 'windows' ? ['pdcurses'] : ['ncursesw', 'cursesw'])
+      foreach curses_libname : curses_libname_list
+        libcurses = cc.find_library(curses_libname,
+                                    required: false)
+        if libcurses.found()
+          if cc.links(curses_test, args: curses_compile_args, dependencies: libcurses)
+            curses = declare_dependency(compile_args: curses_compile_args,
+                                        dependencies: [libcurses])
+            break
+          else
+            msg = 'curses library not usable'
+          endif
+        endif
+      endforeach
+    endif
+  endif
+  if get_option('iconv').allowed()
+    foreach link_args : [ ['-liconv'], [] ]
+      # Programs will be linked with glib and this will bring in libiconv on FreeBSD.
+      # We need to use libiconv if available because mixing libiconv's headers with
+      # the system libc does not work.
+      # However, without adding glib to the dependencies -L/usr/local/lib will not be
+      # included in the command line and libiconv will not be found.
+      if cc.links('''
+        #include <iconv.h>
+        int main(void) {
+          iconv_t conv = iconv_open("WCHAR_T", "UCS-2");
+          return conv != (iconv_t) -1;
+        }''', args: link_args, dependencies: glib)
+        iconv = declare_dependency(link_args: link_args, dependencies: glib)
+        break
+      endif
+    endforeach
+  endif
+  if curses.found() and not iconv.found()
+    if get_option('iconv').enabled()
+      error('iconv not available')
+    endif
+    msg = 'iconv required for curses UI but not available'
+    curses = not_found
+  endif
+  if not curses.found() and msg != ''
+    if get_option('curses').enabled()
+      error(msg)
+    else
+      warning(msg + ', disabling')
+    endif
+  endif
+endif
 
-ignored = ['CONFIG_QEMU_INTERP_PREFIX'] # actually per-target
-arrays = ['CONFIG_AUDIO_DRIVERS', 'CONFIG_BDRV_RW_WHITELIST', 'CONFIG_BDRV_RO_WHITELIST']
-strings = ['HOST_DSOSUF', 'CONFIG_IASL']
-foreach k, v: config_host
-  if ignored.contains(k)
-    # do nothing
-  elif arrays.contains(k)
-    if v != ''
-      v = '"' + '", "'.join(v.split()) + '", '
-    endif
-    config_host_data.set(k, v)
-  elif k == 'ARCH'
-    config_host_data.set('HOST_' + v.to_upper(), 1)
-  elif strings.contains(k)
-    if not k.startswith('CONFIG_')
-      k = 'CONFIG_' + k.to_upper()
-    endif
-    config_host_data.set_quoted(k, v)
-  elif k.startswith('CONFIG_') or k.startswith('HAVE_') or k.startswith('HOST_')
-    config_host_data.set(k, v == 'y' ? 1 : v)
+brlapi = not_found
+if not get_option('brlapi').auto() or have_system
+  brlapi = cc.find_library('brlapi', has_headers: ['brlapi.h'],
+                         required: get_option('brlapi'))
+  if brlapi.found() and not cc.links('''
+     #include <brlapi.h>
+     #include <stddef.h>
+     int main(void) { return brlapi__openConnection (NULL, NULL, NULL); }''', dependencies: brlapi)
+    brlapi = not_found
+    if get_option('brlapi').enabled()
+      error('could not link brlapi')
+    else
+      warning('could not link brlapi, disabling')
+    endif
   endif
-endforeach
+endif
+
+sdl = not_found
+if not get_option('sdl').auto() or have_system
+  sdl = dependency('sdl2', required: get_option('sdl'))
+  sdl_image = not_found
+endif
+if sdl.found()
+  # Some versions of SDL have problems with -Wundef
+  if not cc.compiles('''
+                     #include <SDL.h>
+                     #include <SDL_syswm.h>
+                     int main(int argc, char *argv[]) { return 0; }
+                     ''', dependencies: sdl, args: '-Werror=undef')
+    sdl = declare_dependency(compile_args: '-Wno-undef',
+                             dependencies: sdl,
+                             version: sdl.version())
+  endif
+  sdl_image = dependency('SDL2_image', required: get_option('sdl_image'),
+                         method: 'pkg-config')
+else
+  if get_option('sdl_image').enabled()
+    error('sdl-image required, but SDL was @0@'.format(
+          get_option('sdl').disabled() ? 'disabled' : 'not found'))
+  endif
+  sdl_image = not_found
+endif
+
+rbd = not_found
+if not get_option('rbd').auto() or have_block
+  librados = cc.find_library('rados', required: get_option('rbd'))
+  librbd = cc.find_library('rbd', has_headers: ['rbd/librbd.h'],
+                           required: get_option('rbd'))
+  if librados.found() and librbd.found()
+    if cc.links('''
+      #include <stdio.h>
+      #include <rbd/librbd.h>
+      int main(void) {
+        rados_t cluster;
+        rados_create(&cluster, NULL);
+        #if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 12, 0)
+        #error
+        #endif
+        return 0;
+      }''', dependencies: [librbd, librados])
+      rbd = declare_dependency(dependencies: [librbd, librados])
+    elif get_option('rbd').enabled()
+      error('librbd >= 1.12.0 required')
+    else
+      warning('librbd >= 1.12.0 not found, disabling')
+    endif
+  endif
+endif
+
+glusterfs = not_found
+glusterfs_ftruncate_has_stat = false
+glusterfs_iocb_has_stat = false
+if not get_option('glusterfs').auto() or have_block
+  glusterfs = dependency('glusterfs-api', version: '>=3',
+                         required: get_option('glusterfs'),
+                         method: 'pkg-config')
+  if glusterfs.found()
+    glusterfs_ftruncate_has_stat = cc.links('''
+      #include <glusterfs/api/glfs.h>
+
+      int
+      main(void)
+      {
+          /* new glfs_ftruncate() passes two additional args */
+          return glfs_ftruncate(NULL, 0, NULL, NULL);
+      }
+    ''', dependencies: glusterfs)
+    glusterfs_iocb_has_stat = cc.links('''
+      #include <glusterfs/api/glfs.h>
+
+      /* new glfs_io_cbk() passes two additional glfs_stat structs */
+      static void
+      glusterfs_iocb(glfs_fd_t *fd, ssize_t ret, struct glfs_stat *prestat, struct glfs_stat *poststat, void *data)
+      {}
+
+      int
+      main(void)
+      {
+          glfs_io_cbk iocb = &glusterfs_iocb;
+          iocb(NULL, 0 , NULL, NULL, NULL);
+          return 0;
+      }
+    ''', dependencies: glusterfs)
+  endif
+endif
+
+hv_balloon = false
+if get_option('hv_balloon').allowed() and have_system
+  if cc.links('''
+    #include <string.h>
+    #include <gmodule.h>
+    int main(void) {
+        GTree *tree;
+
+        tree = g_tree_new((GCompareFunc)strcmp);
+        (void)g_tree_node_first(tree);
+        g_tree_destroy(tree);
+        return 0;
+    }
+  ''', dependencies: glib)
+    hv_balloon = true
+  else
+    if get_option('hv_balloon').enabled()
+      error('could not enable hv-balloon, update your glib')
+    else
+      warning('could not find glib support for hv-balloon, disabling')
+    endif
+  endif
+endif
+
+libssh = not_found
+if not get_option('libssh').auto() or have_block
+  libssh = dependency('libssh', version: '>=0.8.7',
+                    method: 'pkg-config',
+                    required: get_option('libssh'))
+endif
+
+libbzip2 = not_found
+if not get_option('bzip2').auto() or have_block
+  libbzip2 = cc.find_library('bz2', has_headers: ['bzlib.h'],
+                             required: get_option('bzip2'))
+  if libbzip2.found() and not cc.links('''
+     #include <bzlib.h>
+     int main(void) { BZ2_bzlibVersion(); return 0; }''', dependencies: libbzip2)
+    libbzip2 = not_found
+    if get_option('bzip2').enabled()
+      error('could not link libbzip2')
+    else
+      warning('could not link libbzip2, disabling')
+    endif
+  endif
+endif
+
+liblzfse = not_found
+if not get_option('lzfse').auto() or have_block
+  liblzfse = cc.find_library('lzfse', has_headers: ['lzfse.h'],
+                             required: get_option('lzfse'))
+endif
+if liblzfse.found() and not cc.links('''
+   #include <lzfse.h>
+   int main(void) { lzfse_decode_scratch_size(); return 0; }''', dependencies: liblzfse)
+  liblzfse = not_found
+  if get_option('lzfse').enabled()
+    error('could not link liblzfse')
+  else
+    warning('could not link liblzfse, disabling')
+  endif
+endif
+
+oss = not_found
+if get_option('oss').allowed() and have_system
+  if not cc.has_header('sys/soundcard.h')
+    # not found
+  elif targetos == 'netbsd'
+    oss = cc.find_library('ossaudio', required: get_option('oss'))
+  else
+    oss = declare_dependency()
+  endif
+
+  if not oss.found()
+    if get_option('oss').enabled()
+      error('OSS not found')
+    endif
+  endif
+endif
+dsound = not_found
+if not get_option('dsound').auto() or (targetos == 'windows' and have_system)
+  if cc.has_header('dsound.h')
+    dsound = declare_dependency(link_args: ['-lole32', '-ldxguid'])
+  endif
+
+  if not dsound.found()
+    if get_option('dsound').enabled()
+      error('DirectSound not found')
+    endif
+  endif
+endif
+
+coreaudio = not_found
+if not get_option('coreaudio').auto() or (targetos == 'darwin' and have_system)
+  coreaudio = dependency('appleframeworks', modules: 'CoreAudio',
+                         required: get_option('coreaudio'))
+endif
+
+opengl = not_found
+if not get_option('opengl').auto() or have_system or have_vhost_user_gpu
+  epoxy = dependency('epoxy', method: 'pkg-config',
+                      required: get_option('opengl'))
+  if cc.has_header('epoxy/egl.h', dependencies: epoxy)
+    opengl = epoxy
+  elif get_option('opengl').enabled()
+    error('epoxy/egl.h not found')
+  endif
+endif
+gbm = not_found
+if (have_system or have_tools) and (virgl.found() or opengl.found())
+  gbm = dependency('gbm', method: 'pkg-config', required: false)
+endif
+have_vhost_user_gpu = have_vhost_user_gpu and virgl.found() and opengl.found() and gbm.found()
+
+gnutls = not_found
+gnutls_crypto = not_found
+if get_option('gnutls').enabled() or (get_option('gnutls').auto() and have_system)
+  # For general TLS support our min gnutls matches
+  # that implied by our platform support matrix
+  #
+  # For the crypto backends, we look for a newer
+  # gnutls:
+  #
+  #   Version 3.6.8  is needed to get XTS
+  #   Version 3.6.13 is needed to get PBKDF
+  #   Version 3.6.14 is needed to get HW accelerated XTS
+  #
+  # If newer enough gnutls isn't available, we can
+  # still use a different crypto backend to satisfy
+  # the platform support requirements
+  gnutls_crypto = dependency('gnutls', version: '>=3.6.14',
+                             method: 'pkg-config',
+                             required: false)
+  if gnutls_crypto.found()
+    gnutls = gnutls_crypto
+  else
+    # Our min version if all we need is TLS
+    gnutls = dependency('gnutls', version: '>=3.5.18',
+                        method: 'pkg-config',
+                        required: get_option('gnutls'))
+  endif
+endif
+
+# We prefer use of gnutls for crypto, unless the options
+# explicitly asked for nettle or gcrypt.
+#
+# If gnutls isn't available for crypto, then we'll prefer
+# gcrypt over nettle for performance reasons.
+gcrypt = not_found
+nettle = not_found
+hogweed = not_found
+xts = 'none'
+
+if get_option('nettle').enabled() and get_option('gcrypt').enabled()
+  error('Only one of gcrypt & nettle can be enabled')
+endif
+
+# Explicit nettle/gcrypt request, so ignore gnutls for crypto
+if get_option('nettle').enabled() or get_option('gcrypt').enabled()
+  gnutls_crypto = not_found
+endif
+
+if not gnutls_crypto.found()
+  if (not get_option('gcrypt').auto() or have_system) and not get_option('nettle').enabled()
+    gcrypt = dependency('libgcrypt', version: '>=1.8',
+                        method: 'config-tool',
+                        required: get_option('gcrypt'))
+    # Debian has removed -lgpg-error from libgcrypt-config
+    # as it "spreads unnecessary dependencies" which in
+    # turn breaks static builds...
+    if gcrypt.found() and get_option('prefer_static')
+      gcrypt = declare_dependency(dependencies:
+        [gcrypt,
+         cc.find_library('gpg-error', required: true)],
+        version: gcrypt.version())
+    endif
+  endif
+  if (not get_option('nettle').auto() or have_system) and not gcrypt.found()
+    nettle = dependency('nettle', version: '>=3.4',
+                        method: 'pkg-config',
+                        required: get_option('nettle'))
+    if nettle.found() and not cc.has_header('nettle/xts.h', dependencies: nettle)
+      xts = 'private'
+    endif
+  endif
+endif
+
+gmp = dependency('gmp', required: false, method: 'pkg-config')
+if nettle.found() and gmp.found()
+  hogweed = dependency('hogweed', version: '>=3.4',
+                       method: 'pkg-config',
+                       required: get_option('nettle'))
+endif
+
+
+gtk = not_found
+gtkx11 = not_found
+vte = not_found
+have_gtk_clipboard = get_option('gtk_clipboard').enabled()
+
+if get_option('gtk') \
+             .disable_auto_if(not have_system) \
+             .require(pixman.found(),
+                      error_message: 'cannot enable GTK if pixman is not available') \
+             .allowed()
+  gtk = dependency('gtk+-3.0', version: '>=3.22.0',
+                   method: 'pkg-config',
+                   required: get_option('gtk'))
+  if gtk.found()
+    gtkx11 = dependency('gtk+-x11-3.0', version: '>=3.22.0',
+                        method: 'pkg-config',
+                        required: false)
+    gtk = declare_dependency(dependencies: [gtk, gtkx11],
+                             version: gtk.version())
+
+    if not get_option('vte').auto() or have_system
+      vte = dependency('vte-2.91',
+                       method: 'pkg-config',
+                       required: get_option('vte'))
+    endif
+  elif have_gtk_clipboard
+    error('GTK clipboard requested, but GTK not found')
+  endif
+endif
+
+x11 = not_found
+if gtkx11.found()
+  x11 = dependency('x11', method: 'pkg-config', required: gtkx11.found())
+endif
+png = not_found
+if get_option('png').allowed() and have_system
+   png = dependency('libpng', version: '>=1.6.34', required: get_option('png'),
+                    method: 'pkg-config')
+endif
+vnc = not_found
+jpeg = not_found
+sasl = not_found
+if get_option('vnc') \
+             .disable_auto_if(not have_system) \
+             .require(pixman.found(),
+                      error_message: 'cannot enable VNC if pixman is not available') \
+             .allowed()
+  vnc = declare_dependency() # dummy dependency
+  jpeg = dependency('libjpeg', required: get_option('vnc_jpeg'),
+                    method: 'pkg-config')
+  sasl = cc.find_library('sasl2', has_headers: ['sasl/sasl.h'],
+                         required: get_option('vnc_sasl'))
+  if sasl.found()
+    sasl = declare_dependency(dependencies: sasl,
+                              compile_args: '-DSTRUCT_IOVEC_DEFINED')
+  endif
+endif
+
+pam = not_found
+if not get_option('auth_pam').auto() or have_system
+  pam = cc.find_library('pam', has_headers: ['security/pam_appl.h'],
+                        required: get_option('auth_pam'))
+endif
+if pam.found() and not cc.links('''
+   #include <stddef.h>
+   #include <security/pam_appl.h>
+   int main(void) {
+     const char *service_name = "qemu";
+     const char *user = "frank";
+     const struct pam_conv pam_conv = { 0 };
+     pam_handle_t *pamh = NULL;
+     pam_start(service_name, user, &pam_conv, &pamh);
+     return 0;
+   }''', dependencies: pam)
+  pam = not_found
+  if get_option('auth_pam').enabled()
+    error('could not link libpam')
+  else
+    warning('could not link libpam, disabling')
+  endif
+endif
+
+snappy = not_found
+if not get_option('snappy').auto() or have_system
+  snappy = cc.find_library('snappy', has_headers: ['snappy-c.h'],
+                           required: get_option('snappy'))
+endif
+if snappy.found() and not cc.links('''
+   #include <snappy-c.h>
+   int main(void) { snappy_max_compressed_length(4096); return 0; }''', dependencies: snappy)
+  snappy = not_found
+  if get_option('snappy').enabled()
+    error('could not link libsnappy')
+  else
+    warning('could not link libsnappy, disabling')
+  endif
+endif
+
+lzo = not_found
+if not get_option('lzo').auto() or have_system
+  lzo = cc.find_library('lzo2', has_headers: ['lzo/lzo1x.h'],
+                        required: get_option('lzo'))
+endif
+if lzo.found() and not cc.links('''
+   #include <lzo/lzo1x.h>
+   int main(void) { lzo_version(); return 0; }''', dependencies: lzo)
+  lzo = not_found
+  if get_option('lzo').enabled()
+    error('could not link liblzo2')
+  else
+    warning('could not link liblzo2, disabling')
+  endif
+endif
+
+numa = not_found
+if not get_option('numa').auto() or have_system or have_tools
+  numa = cc.find_library('numa', has_headers: ['numa.h'],
+                              required: get_option('numa'))
+endif
+if numa.found() and not cc.links('''
+   #include <numa.h>
+   int main(void) { return numa_available(); }
+   ''', dependencies: numa)
+  numa = not_found
+  if get_option('numa').enabled()
+    error('could not link numa')
+  else
+    warning('could not link numa, disabling')
+  endif
+endif
+
+rdma = not_found
+if not get_option('rdma').auto() or have_system
+  libumad = cc.find_library('ibumad', required: get_option('rdma'))
+  rdma_libs = [cc.find_library('rdmacm', has_headers: ['rdma/rdma_cma.h'],
+                               required: get_option('rdma')),
+               cc.find_library('ibverbs', required: get_option('rdma')),
+               libumad]
+  rdma = declare_dependency(dependencies: rdma_libs)
+  foreach lib: rdma_libs
+    if not lib.found()
+      rdma = not_found
+    endif
+  endforeach
+endif
+
+xen = not_found
+if get_option('xen').enabled() or (get_option('xen').auto() and have_system)
+  xencontrol = dependency('xencontrol', required: false,
+                          method: 'pkg-config')
+  if xencontrol.found()
+    xen_pc = declare_dependency(version: xencontrol.version(),
+      dependencies: [
+        xencontrol,
+        # disabler: true makes xen_pc.found() return false if any is not found
+        dependency('xenstore', required: false,
+                   method: 'pkg-config',
+                   disabler: true),
+        dependency('xenforeignmemory', required: false,
+                   method: 'pkg-config',
+                   disabler: true),
+        dependency('xengnttab', required: false,
+                   method: 'pkg-config',
+                   disabler: true),
+        dependency('xenevtchn', required: false,
+                   method: 'pkg-config',
+                   disabler: true),
+        dependency('xendevicemodel', required: false,
+                   method: 'pkg-config',
+                   disabler: true),
+        # optional, no "disabler: true"
+        dependency('xentoolcore', required: false,
+                   method: 'pkg-config')])
+    if xen_pc.found()
+      xen = xen_pc
+    endif
+  endif
+  if not xen.found()
+    xen_tests = [ '4.11.0', '4.10.0', '4.9.0', '4.8.0', '4.7.1' ]
+    xen_libs = {
+      '4.11.0': [ 'xenstore', 'xenctrl', 'xendevicemodel', 'xenforeignmemory', 'xengnttab', 'xenevtchn', 'xentoolcore' ],
+      '4.10.0': [ 'xenstore', 'xenctrl', 'xendevicemodel', 'xenforeignmemory', 'xengnttab', 'xenevtchn', 'xentoolcore' ],
+      '4.9.0': [ 'xenstore', 'xenctrl', 'xendevicemodel', 'xenforeignmemory', 'xengnttab', 'xenevtchn' ],
+      '4.8.0': [ 'xenstore', 'xenctrl', 'xenforeignmemory', 'xengnttab', 'xenevtchn' ],
+      '4.7.1': [ 'xenstore', 'xenctrl', 'xenforeignmemory', 'xengnttab', 'xenevtchn' ],
+    }
+    xen_deps = {}
+    foreach ver: xen_tests
+      # cache the various library tests to avoid polluting the logs
+      xen_test_deps = []
+      foreach l: xen_libs[ver]
+        if l not in xen_deps
+          xen_deps += { l: cc.find_library(l, required: false) }
+        endif
+        xen_test_deps += xen_deps[l]
+      endforeach
+
+      # Use -D to pick just one of the test programs in scripts/xen-detect.c
+      xen_version = ver.split('.')
+      xen_ctrl_version = xen_version[0] + \
+        ('0' + xen_version[1]).substring(-2) + \
+        ('0' + xen_version[2]).substring(-2)
+      if cc.links(files('scripts/xen-detect.c'),
+                  args: '-DCONFIG_XEN_CTRL_INTERFACE_VERSION=' + xen_ctrl_version,
+                  dependencies: xen_test_deps)
+        xen = declare_dependency(version: ver, dependencies: xen_test_deps)
+        break
+      endif
+    endforeach
+  endif
+  if xen.found()
+    accelerators += 'CONFIG_XEN'
+  elif get_option('xen').enabled()
+    error('could not compile and link Xen test program')
+  endif
+endif
+have_xen_pci_passthrough = get_option('xen_pci_passthrough') \
+  .require(xen.found(),
+           error_message: 'Xen PCI passthrough requested but Xen not enabled') \
+  .require(targetos == 'linux',
+           error_message: 'Xen PCI passthrough not available on this platform') \
+  .require(cpu == 'x86'  or cpu == 'x86_64',
+           error_message: 'Xen PCI passthrough not available on this platform') \
+  .allowed()
+
+
+cacard = not_found
+if not get_option('smartcard').auto() or have_system
+  cacard = dependency('libcacard', required: get_option('smartcard'),
+                      version: '>=2.5.1', method: 'pkg-config')
+endif
+u2f = not_found
+if have_system
+  u2f = dependency('u2f-emu', required: get_option('u2f'),
+                   method: 'pkg-config')
+endif
+canokey = not_found
+if have_system
+  canokey = dependency('canokey-qemu', required: get_option('canokey'),
+                   method: 'pkg-config')
+endif
+usbredir = not_found
+if not get_option('usb_redir').auto() or have_system
+  usbredir = dependency('libusbredirparser-0.5', required: get_option('usb_redir'),
+                        version: '>=0.6', method: 'pkg-config')
+endif
+libusb = not_found
+if not get_option('libusb').auto() or have_system
+  libusb = dependency('libusb-1.0', required: get_option('libusb'),
+                      version: '>=1.0.13', method: 'pkg-config')
+endif
+
+libpmem = not_found
+if not get_option('libpmem').auto() or have_system
+  libpmem = dependency('libpmem', required: get_option('libpmem'),
+                       method: 'pkg-config')
+endif
+libdaxctl = not_found
+if not get_option('libdaxctl').auto() or have_system
+  libdaxctl = dependency('libdaxctl', required: get_option('libdaxctl'),
+                         version: '>=57', method: 'pkg-config')
+endif
+tasn1 = not_found
+if gnutls.found()
+  tasn1 = dependency('libtasn1',
+                     method: 'pkg-config')
+endif
+keyutils = not_found
+if not get_option('libkeyutils').auto() or have_block
+  keyutils = dependency('libkeyutils', required: get_option('libkeyutils'),
+                        method: 'pkg-config')
+endif
+
+has_gettid = cc.has_function('gettid')
+
+# libselinux
+selinux = dependency('libselinux',
+                     required: get_option('selinux'),
+                     method: 'pkg-config')
+
+# Malloc tests
+
+malloc = []
+if get_option('malloc') == 'system'
+  has_malloc_trim = \
+    get_option('malloc_trim').allowed() and \
+    cc.has_function('malloc_trim', prefix: '#include <malloc.h>')
+else
+  has_malloc_trim = false
+  malloc = cc.find_library(get_option('malloc'), required: true)
+endif
+if not has_malloc_trim and get_option('malloc_trim').enabled()
+  if get_option('malloc') == 'system'
+    error('malloc_trim not available on this platform.')
+  else
+    error('malloc_trim not available with non-libc memory allocator')
+  endif
+endif
+
+gnu_source_prefix = '''
+  #ifndef _GNU_SOURCE
+  #define _GNU_SOURCE
+  #endif
+'''
+
+# Check whether the glibc provides STATX_BASIC_STATS
+
+has_statx = cc.has_header_symbol('sys/stat.h', 'STATX_BASIC_STATS', prefix: gnu_source_prefix)
+
+# Check whether statx() provides mount ID information
+
+has_statx_mnt_id = cc.has_header_symbol('sys/stat.h', 'STATX_MNT_ID', prefix: gnu_source_prefix)
+
+have_vhost_user_blk_server = get_option('vhost_user_blk_server') \
+  .require(targetos == 'linux',
+           error_message: 'vhost_user_blk_server requires linux') \
+  .require(have_vhost_user,
+           error_message: 'vhost_user_blk_server requires vhost-user support') \
+  .disable_auto_if(not have_tools and not have_system) \
+  .allowed()
+
+if get_option('fuse').disabled() and get_option('fuse_lseek').enabled()
+  error('Cannot enable fuse-lseek while fuse is disabled')
+endif
+
+fuse = dependency('fuse3', required: get_option('fuse'),
+                  version: '>=3.1', method: 'pkg-config')
+
+fuse_lseek = not_found
+if get_option('fuse_lseek').allowed()
+  if fuse.version().version_compare('>=3.8')
+    # Dummy dependency
+    fuse_lseek = declare_dependency()
+  elif get_option('fuse_lseek').enabled()
+    if fuse.found()
+      error('fuse-lseek requires libfuse >=3.8, found ' + fuse.version())
+    else
+      error('fuse-lseek requires libfuse, which was not found')
+    endif
+  endif
+endif
+
+have_libvduse = (targetos == 'linux')
+if get_option('libvduse').enabled()
+    if targetos != 'linux'
+        error('libvduse requires linux')
+    endif
+elif get_option('libvduse').disabled()
+    have_libvduse = false
+endif
+
+have_vduse_blk_export = (have_libvduse and targetos == 'linux')
+if get_option('vduse_blk_export').enabled()
+    if targetos != 'linux'
+        error('vduse_blk_export requires linux')
+    elif not have_libvduse
+        error('vduse_blk_export requires libvduse support')
+    endif
+elif get_option('vduse_blk_export').disabled()
+    have_vduse_blk_export = false
+endif
+
+# libbpf
+libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
+if libbpf.found() and not cc.links('''
+   #include <bpf/libbpf.h>
+   int main(void)
+   {
+     bpf_object__destroy_skeleton(NULL);
+     return 0;
+   }''', dependencies: libbpf)
+  libbpf = not_found
+  if get_option('bpf').enabled()
+    error('libbpf skeleton test failed')
+  else
+    warning('libbpf skeleton test failed, disabling')
+  endif
+endif
+
+# libxdp
+libxdp = not_found
+if not get_option('af_xdp').auto() or have_system
+    libxdp = dependency('libxdp', required: get_option('af_xdp'),
+                        version: '>=1.4.0', method: 'pkg-config')
+endif
+
+# libdw
+libdw = not_found
+if not get_option('libdw').auto() or \
+        (not get_option('prefer_static') and (have_system or have_user))
+    libdw = dependency('libdw',
+                       method: 'pkg-config',
+                       required: get_option('libdw'))
+endif
+
+#################
+# config-host.h #
+#################
+
+audio_drivers_selected = []
+if have_system
+  audio_drivers_available = {
+    'alsa': alsa.found(),
+    'coreaudio': coreaudio.found(),
+    'dsound': dsound.found(),
+    'jack': jack.found(),
+    'oss': oss.found(),
+    'pa': pulse.found(),
+    'pipewire': pipewire.found(),
+    'sdl': sdl.found(),
+    'sndio': sndio.found(),
+  }
+  foreach k, v: audio_drivers_available
+    config_host_data.set('CONFIG_AUDIO_' + k.to_upper(), v)
+  endforeach
+
+  # Default to native drivers first, OSS second, SDL third
+  audio_drivers_priority = \
+    [ 'pa', 'coreaudio', 'dsound', 'sndio', 'oss' ] + \
+    (targetos == 'linux' ? [] : [ 'sdl' ])
+  audio_drivers_default = []
+  foreach k: audio_drivers_priority
+    if audio_drivers_available[k]
+      audio_drivers_default += k
+    endif
+  endforeach
+
+  foreach k: get_option('audio_drv_list')
+    if k == 'default'
+      audio_drivers_selected += audio_drivers_default
+    elif not audio_drivers_available[k]
+      error('Audio driver "@0@" not available.'.format(k))
+    else
+      audio_drivers_selected += k
+    endif
+  endforeach
+endif
+config_host_data.set('CONFIG_AUDIO_DRIVERS',
+                     '"' + '", "'.join(audio_drivers_selected) + '", ')
+
+if get_option('cfi')
+  cfi_flags=[]
+  # Check for dependency on LTO
+  if not get_option('b_lto')
+    error('Selected Control-Flow Integrity but LTO is disabled')
+  endif
+  if enable_modules
+    error('Selected Control-Flow Integrity is not compatible with modules')
+  endif
+  # Check for cfi flags. CFI requires LTO so we can't use
+  # get_supported_arguments, but need a more complex "compiles" which allows
+  # custom arguments
+  if cc.compiles('int main () { return 0; }', name: '-fsanitize=cfi-icall',
+                 args: ['-flto', '-fsanitize=cfi-icall'] )
+    cfi_flags += '-fsanitize=cfi-icall'
+  else
+    error('-fsanitize=cfi-icall is not supported by the compiler')
+  endif
+  if cc.compiles('int main () { return 0; }',
+                 name: '-fsanitize-cfi-icall-generalize-pointers',
+                 args: ['-flto', '-fsanitize=cfi-icall',
+                        '-fsanitize-cfi-icall-generalize-pointers'] )
+    cfi_flags += '-fsanitize-cfi-icall-generalize-pointers'
+  else
+    error('-fsanitize-cfi-icall-generalize-pointers is not supported by the compiler')
+  endif
+  if get_option('cfi_debug')
+    if cc.compiles('int main () { return 0; }',
+                   name: '-fno-sanitize-trap=cfi-icall',
+                   args: ['-flto', '-fsanitize=cfi-icall',
+                          '-fno-sanitize-trap=cfi-icall'] )
+      cfi_flags += '-fno-sanitize-trap=cfi-icall'
+    else
+      error('-fno-sanitize-trap=cfi-icall is not supported by the compiler')
+    endif
+  endif
+  add_global_arguments(cfi_flags, native: false, language: all_languages)
+  add_global_link_arguments(cfi_flags, native: false, language: all_languages)
+endif
+
+have_host_block_device = (targetos != 'darwin' or
+    cc.has_header('IOKit/storage/IOMedia.h'))
+
+dbus_display = get_option('dbus_display') \
+  .require(gio.version().version_compare('>=2.64'),
+           error_message: '-display dbus requires glib>=2.64') \
+  .require(gdbus_codegen.found(),
+           error_message: gdbus_codegen_error.format('-display dbus')) \
+  .allowed()
+
+have_virtfs = get_option('virtfs') \
+    .require(targetos == 'linux' or targetos == 'darwin',
+             error_message: 'virtio-9p (virtfs) requires Linux or macOS') \
+    .require(targetos == 'linux' or cc.has_function('pthread_fchdir_np'),
+             error_message: 'virtio-9p (virtfs) on macOS requires the presence of pthread_fchdir_np') \
+    .require(targetos == 'darwin' or libattr.found(),
+             error_message: 'virtio-9p (virtfs) on Linux requires libattr-devel') \
+    .disable_auto_if(not have_tools and not have_system) \
+    .allowed()
+
+have_virtfs_proxy_helper = get_option('virtfs_proxy_helper') \
+    .require(targetos != 'darwin', error_message: 'the virtfs proxy helper is incompatible with macOS') \
+    .require(have_virtfs, error_message: 'the virtfs proxy helper requires that virtfs is enabled') \
+    .disable_auto_if(not have_tools) \
+    .require(libcap_ng.found(), error_message: 'the virtfs proxy helper requires libcap-ng') \
+    .allowed()
+
+if get_option('block_drv_ro_whitelist') == ''
+  config_host_data.set('CONFIG_BDRV_RO_WHITELIST', '')
+else
+  config_host_data.set('CONFIG_BDRV_RO_WHITELIST',
+        '"' + get_option('block_drv_ro_whitelist').replace(',', '", "') + '", ')
+endif
+if get_option('block_drv_rw_whitelist') == ''
+  config_host_data.set('CONFIG_BDRV_RW_WHITELIST', '')
+else
+  config_host_data.set('CONFIG_BDRV_RW_WHITELIST',
+        '"' + get_option('block_drv_rw_whitelist').replace(',', '", "') + '", ')
+endif
+
+foreach k : get_option('trace_backends')
+  config_host_data.set('CONFIG_TRACE_' + k.to_upper(), true)
+endforeach
+config_host_data.set_quoted('CONFIG_TRACE_FILE', get_option('trace_file'))
+config_host_data.set_quoted('CONFIG_TLS_PRIORITY', get_option('tls_priority'))
+if iasl.found()
+  config_host_data.set_quoted('CONFIG_IASL', iasl.full_path())
+endif
+config_host_data.set_quoted('CONFIG_BINDIR', get_option('prefix') / get_option('bindir'))
+config_host_data.set_quoted('CONFIG_PREFIX', get_option('prefix'))
+config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', get_option('prefix') / qemu_confdir)
+config_host_data.set_quoted('CONFIG_QEMU_DATADIR', get_option('prefix') / qemu_datadir)
+config_host_data.set_quoted('CONFIG_QEMU_DESKTOPDIR', get_option('prefix') / qemu_desktopdir)
+
+qemu_firmwarepath = ''
+foreach k : get_option('qemu_firmwarepath')
+  qemu_firmwarepath += '"' + get_option('prefix') / k + '", '
+endforeach
+config_host_data.set('CONFIG_QEMU_FIRMWAREPATH', qemu_firmwarepath)
+
+config_host_data.set_quoted('CONFIG_QEMU_HELPERDIR', get_option('prefix') / get_option('libexecdir'))
+config_host_data.set_quoted('CONFIG_QEMU_ICONDIR', get_option('prefix') / qemu_icondir)
+config_host_data.set_quoted('CONFIG_QEMU_LOCALEDIR', get_option('prefix') / get_option('localedir'))
+config_host_data.set_quoted('CONFIG_QEMU_LOCALSTATEDIR', get_option('prefix') / get_option('localstatedir'))
+config_host_data.set_quoted('CONFIG_QEMU_MODDIR', get_option('prefix') / qemu_moddir)
+config_host_data.set_quoted('CONFIG_SYSCONFDIR', get_option('prefix') / get_option('sysconfdir'))
+
+if enable_modules
+  config_host_data.set('CONFIG_STAMP', run_command(
+      meson.current_source_dir() / 'scripts/qemu-stamp.py',
+      meson.project_version(), '--',
+      meson.current_source_dir() / 'configure',
+      capture: true, check: true).stdout().strip())
+endif
+
+have_slirp_smbd = get_option('slirp_smbd') \
+  .require(targetos != 'windows', error_message: 'Host smbd not supported on this platform.') \
+  .allowed()
+if have_slirp_smbd
+  smbd_path = get_option('smbd')
+  if smbd_path == ''
+    smbd_path = (targetos == 'sunos' ? '/usr/sfw/sbin/smbd' : '/usr/sbin/smbd')
+  endif
+  config_host_data.set_quoted('CONFIG_SMBD_COMMAND', smbd_path)
+endif
+
+config_host_data.set('HOST_' + host_arch.to_upper(), 1)
+
+if get_option('module_upgrades') and not enable_modules
+  error('Cannot enable module-upgrades as modules are not enabled')
+endif
+config_host_data.set('CONFIG_MODULE_UPGRADES', get_option('module_upgrades'))
+
+config_host_data.set('CONFIG_ATTR', libattr.found())
+config_host_data.set('CONFIG_BDRV_WHITELIST_TOOLS', get_option('block_drv_whitelist_in_tools'))
+config_host_data.set('CONFIG_BRLAPI', brlapi.found())
+config_host_data.set('CONFIG_BSD', targetos in bsd_oses)
+config_host_data.set('CONFIG_COCOA', cocoa.found())
+config_host_data.set('CONFIG_DARWIN', targetos == 'darwin')
+config_host_data.set('CONFIG_FUZZ', get_option('fuzzing'))
+config_host_data.set('CONFIG_GCOV', get_option('b_coverage'))
+config_host_data.set('CONFIG_LIBUDEV', libudev.found())
+config_host_data.set('CONFIG_LINUX', targetos == 'linux')
+config_host_data.set('CONFIG_POSIX', targetos != 'windows')
+config_host_data.set('CONFIG_WIN32', targetos == 'windows')
+config_host_data.set('CONFIG_LZO', lzo.found())
+config_host_data.set('CONFIG_MPATH', mpathpersist.found())
+config_host_data.set('CONFIG_BLKIO', blkio.found())
+if blkio.found()
+  config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD',
+                       blkio.version().version_compare('>=1.3.0'))
+endif
+config_host_data.set('CONFIG_CURL', curl.found())
+config_host_data.set('CONFIG_CURSES', curses.found())
+config_host_data.set('CONFIG_GBM', gbm.found())
+config_host_data.set('CONFIG_GIO', gio.found())
+config_host_data.set('CONFIG_GLUSTERFS', glusterfs.found())
+if glusterfs.found()
+  config_host_data.set('CONFIG_GLUSTERFS_XLATOR_OPT', glusterfs.version().version_compare('>=4'))
+  config_host_data.set('CONFIG_GLUSTERFS_DISCARD', glusterfs.version().version_compare('>=5'))
+  config_host_data.set('CONFIG_GLUSTERFS_FALLOCATE', glusterfs.version().version_compare('>=6'))
+  config_host_data.set('CONFIG_GLUSTERFS_ZEROFILL', glusterfs.version().version_compare('>=6'))
+  config_host_data.set('CONFIG_GLUSTERFS_FTRUNCATE_HAS_STAT', glusterfs_ftruncate_has_stat)
+  config_host_data.set('CONFIG_GLUSTERFS_IOCB_HAS_STAT', glusterfs_iocb_has_stat)
+endif
+config_host_data.set('CONFIG_GTK', gtk.found())
+config_host_data.set('CONFIG_VTE', vte.found())
+config_host_data.set('CONFIG_GTK_CLIPBOARD', have_gtk_clipboard)
+config_host_data.set('CONFIG_HEXAGON_IDEF_PARSER', get_option('hexagon_idef_parser'))
+config_host_data.set('CONFIG_LIBATTR', have_old_libattr)
+config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found())
+config_host_data.set('CONFIG_EBPF', libbpf.found())
+config_host_data.set('CONFIG_AF_XDP', libxdp.found())
+config_host_data.set('CONFIG_LIBDAXCTL', libdaxctl.found())
+config_host_data.set('CONFIG_LIBISCSI', libiscsi.found())
+config_host_data.set('CONFIG_LIBNFS', libnfs.found())
+config_host_data.set('CONFIG_LIBSSH', libssh.found())
+config_host_data.set('CONFIG_LINUX_AIO', libaio.found())
+config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
+config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
+config_host_data.set('CONFIG_MODULES', enable_modules)
+config_host_data.set('CONFIG_NUMA', numa.found())
+if numa.found()
+  config_host_data.set('HAVE_NUMA_HAS_PREFERRED_MANY',
+                       cc.has_function('numa_has_preferred_many',
+                                       dependencies: numa))
+endif
+config_host_data.set('CONFIG_OPENGL', opengl.found())
+config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
+config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_RDMA', rdma.found())
+config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
+config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
+config_host_data.set('CONFIG_SDL', sdl.found())
+config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
+config_host_data.set('CONFIG_SECCOMP', seccomp.found())
+if seccomp.found()
+  config_host_data.set('CONFIG_SECCOMP_SYSRAWRC', seccomp_has_sysrawrc)
+endif
+config_host_data.set('CONFIG_PIXMAN', pixman.found())
+config_host_data.set('CONFIG_SNAPPY', snappy.found())
+config_host_data.set('CONFIG_SOLARIS', targetos == 'sunos')
+if get_option('tcg').allowed()
+  config_host_data.set('CONFIG_TCG', 1)
+  config_host_data.set('CONFIG_TCG_INTERPRETER', tcg_arch == 'tci')
+endif
+config_host_data.set('CONFIG_TPM', have_tpm)
+config_host_data.set('CONFIG_TSAN', get_option('tsan'))
+config_host_data.set('CONFIG_USB_LIBUSB', libusb.found())
+config_host_data.set('CONFIG_VDE', vde.found())
+config_host_data.set('CONFIG_VHOST', have_vhost)
+config_host_data.set('CONFIG_VHOST_NET', have_vhost_net)
+config_host_data.set('CONFIG_VHOST_NET_USER', have_vhost_net_user)
+config_host_data.set('CONFIG_VHOST_NET_VDPA', have_vhost_net_vdpa)
+config_host_data.set('CONFIG_VHOST_KERNEL', have_vhost_kernel)
+config_host_data.set('CONFIG_VHOST_USER', have_vhost_user)
+config_host_data.set('CONFIG_VHOST_CRYPTO', have_vhost_user_crypto)
+config_host_data.set('CONFIG_VHOST_VDPA', have_vhost_vdpa)
+config_host_data.set('CONFIG_VMNET', vmnet.found())
+config_host_data.set('CONFIG_VHOST_USER_BLK_SERVER', have_vhost_user_blk_server)
+config_host_data.set('CONFIG_VDUSE_BLK_EXPORT', have_vduse_blk_export)
+config_host_data.set('CONFIG_PNG', png.found())
+config_host_data.set('CONFIG_VNC', vnc.found())
+config_host_data.set('CONFIG_VNC_JPEG', jpeg.found())
+config_host_data.set('CONFIG_VNC_SASL', sasl.found())
+config_host_data.set('CONFIG_VIRTFS', have_virtfs)
+config_host_data.set('CONFIG_VTE', vte.found())
+config_host_data.set('CONFIG_XKBCOMMON', xkbcommon.found())
+config_host_data.set('CONFIG_KEYUTILS', keyutils.found())
+config_host_data.set('CONFIG_GETTID', has_gettid)
+config_host_data.set('CONFIG_GNUTLS', gnutls.found())
+config_host_data.set('CONFIG_GNUTLS_CRYPTO', gnutls_crypto.found())
+config_host_data.set('CONFIG_TASN1', tasn1.found())
+config_host_data.set('CONFIG_GCRYPT', gcrypt.found())
+config_host_data.set('CONFIG_NETTLE', nettle.found())
+config_host_data.set('CONFIG_HOGWEED', hogweed.found())
+config_host_data.set('CONFIG_QEMU_PRIVATE_XTS', xts == 'private')
+config_host_data.set('CONFIG_MALLOC_TRIM', has_malloc_trim)
+config_host_data.set('CONFIG_STATX', has_statx)
+config_host_data.set('CONFIG_STATX_MNT_ID', has_statx_mnt_id)
+config_host_data.set('CONFIG_ZSTD', zstd.found())
+config_host_data.set('CONFIG_FUSE', fuse.found())
+config_host_data.set('CONFIG_FUSE_LSEEK', fuse_lseek.found())
+config_host_data.set('CONFIG_SPICE_PROTOCOL', spice_protocol.found())
+if spice_protocol.found()
+config_host_data.set('CONFIG_SPICE_PROTOCOL_MAJOR', spice_protocol.version().split('.')[0])
+config_host_data.set('CONFIG_SPICE_PROTOCOL_MINOR', spice_protocol.version().split('.')[1])
+config_host_data.set('CONFIG_SPICE_PROTOCOL_MICRO', spice_protocol.version().split('.')[2])
+endif
+config_host_data.set('CONFIG_SPICE', spice.found())
+config_host_data.set('CONFIG_X11', x11.found())
+config_host_data.set('CONFIG_DBUS_DISPLAY', dbus_display)
+config_host_data.set('CONFIG_CFI', get_option('cfi'))
+config_host_data.set('CONFIG_SELINUX', selinux.found())
+config_host_data.set('CONFIG_XEN_BACKEND', xen.found())
+config_host_data.set('CONFIG_LIBDW', libdw.found())
+if xen.found()
+  # protect from xen.version() having less than three components
+  xen_version = xen.version().split('.') + ['0', '0']
+  xen_ctrl_version = xen_version[0] + \
+    ('0' + xen_version[1]).substring(-2) + \
+    ('0' + xen_version[2]).substring(-2)
+  config_host_data.set('CONFIG_XEN_CTRL_INTERFACE_VERSION', xen_ctrl_version)
+endif
+config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version()))
+config_host_data.set('QEMU_VERSION_MAJOR', meson.project_version().split('.')[0])
+config_host_data.set('QEMU_VERSION_MINOR', meson.project_version().split('.')[1])
+config_host_data.set('QEMU_VERSION_MICRO', meson.project_version().split('.')[2])
+
+config_host_data.set_quoted('CONFIG_HOST_DSOSUF', host_dsosuf)
+config_host_data.set('HAVE_HOST_BLOCK_DEVICE', have_host_block_device)
+
+have_coroutine_pool = get_option('coroutine_pool')
+if get_option('debug_stack_usage') and have_coroutine_pool
+  message('Disabling coroutine pool to measure stack usage')
+  have_coroutine_pool = false
+endif
+config_host_data.set('CONFIG_COROUTINE_POOL', have_coroutine_pool)
+config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock'))
+config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex'))
+config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage'))
+config_host_data.set('CONFIG_DEBUG_TCG', get_option('debug_tcg'))
+config_host_data.set('CONFIG_LIVE_BLOCK_MIGRATION', get_option('live_block_migration').allowed())
+config_host_data.set('CONFIG_QOM_CAST_DEBUG', get_option('qom_cast_debug'))
+config_host_data.set('CONFIG_REPLICATION', get_option('replication').allowed())
+
+# has_header
+config_host_data.set('CONFIG_EPOLL', cc.has_header('sys/epoll.h'))
+config_host_data.set('CONFIG_LINUX_MAGIC_H', cc.has_header('linux/magic.h'))
+config_host_data.set('CONFIG_VALGRIND_H', cc.has_header('valgrind/valgrind.h'))
+config_host_data.set('HAVE_BTRFS_H', cc.has_header('linux/btrfs.h'))
+config_host_data.set('HAVE_DRM_H', cc.has_header('libdrm/drm.h'))
+config_host_data.set('HAVE_PTY_H', cc.has_header('pty.h'))
+config_host_data.set('HAVE_SYS_DISK_H', cc.has_header('sys/disk.h'))
+config_host_data.set('HAVE_SYS_IOCCOM_H', cc.has_header('sys/ioccom.h'))
+config_host_data.set('HAVE_SYS_KCOV_H', cc.has_header('sys/kcov.h'))
+if targetos == 'windows'
+  config_host_data.set('HAVE_AFUNIX_H', cc.has_header('afunix.h'))
+endif
+
+# has_function
+config_host_data.set('CONFIG_CLOSE_RANGE', cc.has_function('close_range'))
+config_host_data.set('CONFIG_ACCEPT4', cc.has_function('accept4'))
+config_host_data.set('CONFIG_CLOCK_ADJTIME', cc.has_function('clock_adjtime'))
+config_host_data.set('CONFIG_DUP3', cc.has_function('dup3'))
+config_host_data.set('CONFIG_FALLOCATE', cc.has_function('fallocate'))
+config_host_data.set('CONFIG_POSIX_FALLOCATE', cc.has_function('posix_fallocate'))
+config_host_data.set('CONFIG_GETCPU', cc.has_function('getcpu', prefix: gnu_source_prefix))
+config_host_data.set('CONFIG_SCHED_GETCPU', cc.has_function('sched_getcpu', prefix: '#include <sched.h>'))
+# Note that we need to specify prefix: here to avoid incorrectly
+# thinking that Windows has posix_memalign()
+config_host_data.set('CONFIG_POSIX_MEMALIGN', cc.has_function('posix_memalign', prefix: '#include <stdlib.h>'))
+config_host_data.set('CONFIG_ALIGNED_MALLOC', cc.has_function('_aligned_malloc'))
+config_host_data.set('CONFIG_VALLOC', cc.has_function('valloc'))
+config_host_data.set('CONFIG_MEMALIGN', cc.has_function('memalign'))
+config_host_data.set('CONFIG_PPOLL', cc.has_function('ppoll'))
+config_host_data.set('CONFIG_PREADV', cc.has_function('preadv', prefix: '#include <sys/uio.h>'))
+config_host_data.set('CONFIG_PTHREAD_FCHDIR_NP', cc.has_function('pthread_fchdir_np'))
+config_host_data.set('CONFIG_SENDFILE', cc.has_function('sendfile'))
+config_host_data.set('CONFIG_SETNS', cc.has_function('setns') and cc.has_function('unshare'))
+config_host_data.set('CONFIG_SYNCFS', cc.has_function('syncfs'))
+config_host_data.set('CONFIG_SYNC_FILE_RANGE', cc.has_function('sync_file_range'))
+config_host_data.set('CONFIG_TIMERFD', cc.has_function('timerfd_create'))
+config_host_data.set('HAVE_COPY_FILE_RANGE', cc.has_function('copy_file_range'))
+config_host_data.set('HAVE_GETIFADDRS', cc.has_function('getifaddrs'))
+config_host_data.set('HAVE_GLIB_WITH_SLICE_ALLOCATOR', glib_has_gslice)
+config_host_data.set('HAVE_OPENPTY', cc.has_function('openpty', dependencies: util))
+config_host_data.set('HAVE_STRCHRNUL', cc.has_function('strchrnul'))
+config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: '#include <stdlib.h>'))
+config_host_data.set('HAVE_GETLOADAVG_FUNCTION', cc.has_function('getloadavg', prefix: '#include <stdlib.h>'))
+if rbd.found()
+  config_host_data.set('HAVE_RBD_NAMESPACE_EXISTS',
+                       cc.has_function('rbd_namespace_exists',
+                                       dependencies: rbd,
+                                       prefix: '#include <rbd/librbd.h>'))
+endif
+if rdma.found()
+  config_host_data.set('HAVE_IBV_ADVISE_MR',
+                       cc.has_function('ibv_advise_mr',
+                                       dependencies: rdma,
+                                       prefix: '#include <infiniband/verbs.h>'))
+endif
+
+have_asan_fiber = false
+if get_option('sanitizers') and \
+   not cc.has_function('__sanitizer_start_switch_fiber',
+                         args: '-fsanitize=address',
+                         prefix: '#include <sanitizer/asan_interface.h>')
+  warning('Missing ASAN due to missing fiber annotation interface')
+  warning('Without code annotation, the report may be inferior.')
+else
+  have_asan_fiber = true
+endif
+config_host_data.set('CONFIG_ASAN_IFACE_FIBER', have_asan_fiber)
+
+# has_header_symbol
+config_host_data.set('CONFIG_BLKZONED',
+                     cc.has_header_symbol('linux/blkzoned.h', 'BLKOPENZONE'))
+config_host_data.set('CONFIG_EPOLL_CREATE1',
+                     cc.has_header_symbol('sys/epoll.h', 'epoll_create1'))
+config_host_data.set('CONFIG_FALLOCATE_PUNCH_HOLE',
+                     cc.has_header_symbol('linux/falloc.h', 'FALLOC_FL_PUNCH_HOLE') and
+                     cc.has_header_symbol('linux/falloc.h', 'FALLOC_FL_KEEP_SIZE'))
+config_host_data.set('CONFIG_FALLOCATE_ZERO_RANGE',
+                     cc.has_header_symbol('linux/falloc.h', 'FALLOC_FL_ZERO_RANGE'))
+config_host_data.set('CONFIG_FIEMAP',
+                     cc.has_header('linux/fiemap.h') and
+                     cc.has_header_symbol('linux/fs.h', 'FS_IOC_FIEMAP'))
+config_host_data.set('CONFIG_GETRANDOM',
+                     cc.has_function('getrandom') and
+                     cc.has_header_symbol('sys/random.h', 'GRND_NONBLOCK'))
+config_host_data.set('CONFIG_INOTIFY',
+                     cc.has_header_symbol('sys/inotify.h', 'inotify_init'))
+config_host_data.set('CONFIG_INOTIFY1',
+                     cc.has_header_symbol('sys/inotify.h', 'inotify_init1'))
+config_host_data.set('CONFIG_PRCTL_PR_SET_TIMERSLACK',
+                     cc.has_header_symbol('sys/prctl.h', 'PR_SET_TIMERSLACK'))
+config_host_data.set('CONFIG_RTNETLINK',
+                     cc.has_header_symbol('linux/rtnetlink.h', 'IFLA_PROTO_DOWN'))
+config_host_data.set('CONFIG_SYSMACROS',
+                     cc.has_header_symbol('sys/sysmacros.h', 'makedev'))
+config_host_data.set('HAVE_OPTRESET',
+                     cc.has_header_symbol('getopt.h', 'optreset'))
+config_host_data.set('HAVE_IPPROTO_MPTCP',
+                     cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP'))
+
+# has_member
+config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
+                     cc.has_member('struct sigevent', 'sigev_notify_thread_id',
+                                   prefix: '#include <signal.h>'))
+config_host_data.set('HAVE_STRUCT_STAT_ST_ATIM',
+                     cc.has_member('struct stat', 'st_atim',
+                                   prefix: '#include <sys/stat.h>'))
+config_host_data.set('HAVE_BLK_ZONE_REP_CAPACITY',
+                     cc.has_member('struct blk_zone', 'capacity',
+                                   prefix: '#include <linux/blkzoned.h>'))
+
+# has_type
+config_host_data.set('CONFIG_IOVEC',
+                     cc.has_type('struct iovec',
+                                 prefix: '#include <sys/uio.h>'))
+config_host_data.set('HAVE_UTMPX',
+                     cc.has_type('struct utmpx',
+                                 prefix: '#include <utmpx.h>'))
+
+config_host_data.set('CONFIG_EVENTFD', cc.links('''
+  #include <sys/eventfd.h>
+  int main(void) { return eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); }'''))
+config_host_data.set('CONFIG_FDATASYNC', cc.links(gnu_source_prefix + '''
+  #include <unistd.h>
+  int main(void) {
+  #if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
+  return fdatasync(0);
+  #else
+  #error Not supported
+  #endif
+  }'''))
+
+has_madvise = cc.links(gnu_source_prefix + '''
+  #include <sys/types.h>
+  #include <sys/mman.h>
+  #include <stddef.h>
+  int main(void) { return madvise(NULL, 0, MADV_DONTNEED); }''')
+missing_madvise_proto = false
+if has_madvise
+  # Some platforms (illumos and Solaris before Solaris 11) provide madvise()
+  # but forget to prototype it. In this case, has_madvise will be true (the
+  # test program links despite a compile warning). To detect the
+  # missing-prototype case, we try again with a definitely-bogus prototype.
+  # This will only compile if the system headers don't provide the prototype;
+  # otherwise the conflicting prototypes will cause a compiler error.
+  missing_madvise_proto = cc.links(gnu_source_prefix + '''
+    #include <sys/types.h>
+    #include <sys/mman.h>
+    #include <stddef.h>
+    extern int madvise(int);
+    int main(void) { return madvise(0); }''')
+endif
+config_host_data.set('CONFIG_MADVISE', has_madvise)
+config_host_data.set('HAVE_MADVISE_WITHOUT_PROTOTYPE', missing_madvise_proto)
+
+config_host_data.set('CONFIG_MEMFD', cc.links(gnu_source_prefix + '''
+  #include <sys/mman.h>
+  int main(void) { return memfd_create("foo", MFD_ALLOW_SEALING); }'''))
+config_host_data.set('CONFIG_OPEN_BY_HANDLE', cc.links(gnu_source_prefix + '''
+  #include <fcntl.h>
+  #if !defined(AT_EMPTY_PATH)
+  # error missing definition
+  #else
+  int main(void) { struct file_handle fh; return open_by_handle_at(0, &fh, 0); }
+  #endif'''))
+config_host_data.set('CONFIG_POSIX_MADVISE', cc.links(gnu_source_prefix + '''
+  #include <sys/mman.h>
+  #include <stddef.h>
+  int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }'''))
+
+config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_W_TID', cc.links(gnu_source_prefix + '''
+  #include <pthread.h>
+
+  static void *f(void *p) { return NULL; }
+  int main(void)
+  {
+    pthread_t thread;
+    pthread_create(&thread, 0, f, 0);
+    pthread_setname_np(thread, "QEMU");
+    return 0;
+  }''', dependencies: threads))
+config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_WO_TID', cc.links(gnu_source_prefix + '''
+  #include <pthread.h>
+
+  static void *f(void *p) { pthread_setname_np("QEMU"); return NULL; }
+  int main(void)
+  {
+    pthread_t thread;
+    pthread_create(&thread, 0, f, 0);
+    return 0;
+  }''', dependencies: threads))
+config_host_data.set('CONFIG_PTHREAD_SET_NAME_NP', cc.links(gnu_source_prefix + '''
+  #include <pthread.h>
+  #include <pthread_np.h>
+
+  static void *f(void *p) { return NULL; }
+  int main(void)
+  {
+    pthread_t thread;
+    pthread_create(&thread, 0, f, 0);
+    pthread_set_name_np(thread, "QEMU");
+    return 0;
+  }''', dependencies: threads))
+config_host_data.set('CONFIG_PTHREAD_CONDATTR_SETCLOCK', cc.links(gnu_source_prefix + '''
+  #include <pthread.h>
+  #include <time.h>
+
+  int main(void)
+  {
+    pthread_condattr_t attr
+    pthread_condattr_init(&attr);
+    pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+    return 0;
+  }''', dependencies: threads))
+config_host_data.set('CONFIG_PTHREAD_AFFINITY_NP', cc.links(gnu_source_prefix + '''
+  #include <pthread.h>
+
+  static void *f(void *p) { return NULL; }
+  int main(void)
+  {
+    int setsize = CPU_ALLOC_SIZE(64);
+    pthread_t thread;
+    cpu_set_t *cpuset;
+    pthread_create(&thread, 0, f, 0);
+    cpuset = CPU_ALLOC(64);
+    CPU_ZERO_S(setsize, cpuset);
+    pthread_setaffinity_np(thread, setsize, cpuset);
+    pthread_getaffinity_np(thread, setsize, cpuset);
+    CPU_FREE(cpuset);
+    return 0;
+  }''', dependencies: threads))
+config_host_data.set('CONFIG_SIGNALFD', cc.links(gnu_source_prefix + '''
+  #include <sys/signalfd.h>
+  #include <stddef.h>
+  int main(void) { return signalfd(-1, NULL, SFD_CLOEXEC); }'''))
+config_host_data.set('CONFIG_SPLICE', cc.links(gnu_source_prefix + '''
+  #include <unistd.h>
+  #include <fcntl.h>
+  #include <limits.h>
+
+  int main(void)
+  {
+    int len, fd = 0;
+    len = tee(STDIN_FILENO, STDOUT_FILENO, INT_MAX, SPLICE_F_NONBLOCK);
+    splice(STDIN_FILENO, NULL, fd, NULL, len, SPLICE_F_MOVE);
+    return 0;
+  }'''))
+
+config_host_data.set('HAVE_MLOCKALL', cc.links(gnu_source_prefix + '''
+  #include <sys/mman.h>
+  int main(void) {
+    return mlockall(MCL_FUTURE);
+  }'''))
+
+have_l2tpv3 = false
+if get_option('l2tpv3').allowed() and have_system
+  have_l2tpv3 = cc.has_type('struct mmsghdr',
+    prefix: gnu_source_prefix + '''
+      #include <sys/socket.h>
+      #include <linux/ip.h>''')
+endif
+config_host_data.set('CONFIG_L2TPV3', have_l2tpv3)
+
+have_netmap = false
+if get_option('netmap').allowed() and have_system
+  have_netmap = cc.compiles('''
+    #include <inttypes.h>
+    #include <net/if.h>
+    #include <net/netmap.h>
+    #include <net/netmap_user.h>
+    #if (NETMAP_API < 11) || (NETMAP_API > 15)
+    #error
+    #endif
+    int main(void) { return 0; }''')
+  if not have_netmap and get_option('netmap').enabled()
+    error('Netmap headers not available')
+  endif
+endif
+config_host_data.set('CONFIG_NETMAP', have_netmap)
+
+# Work around a system header bug with some kernel/XFS header
+# versions where they both try to define 'struct fsxattr':
+# xfs headers will not try to redefine structs from linux headers
+# if this macro is set.
+config_host_data.set('HAVE_FSXATTR', cc.links('''
+  #include <linux/fs.h>
+  struct fsxattr foo;
+  int main(void) {
+    return 0;
+  }'''))
+
+# Some versions of Mac OS X incorrectly define SIZE_MAX
+config_host_data.set('HAVE_BROKEN_SIZE_MAX', not cc.compiles('''
+    #include <stdint.h>
+    #include <stdio.h>
+    int main(void) {
+        return printf("%zu", SIZE_MAX);
+    }''', args: ['-Werror']))
+
+# See if 64-bit atomic operations are supported.
+# Note that without __atomic builtins, we can only
+# assume atomic loads/stores max at pointer size.
+config_host_data.set('CONFIG_ATOMIC64', cc.links('''
+  #include <stdint.h>
+  int main(void)
+  {
+    uint64_t x = 0, y = 0;
+    y = __atomic_load_n(&x, __ATOMIC_RELAXED);
+    __atomic_store_n(&x, y, __ATOMIC_RELAXED);
+    __atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+    __atomic_exchange_n(&x, y, __ATOMIC_RELAXED);
+    __atomic_fetch_add(&x, y, __ATOMIC_RELAXED);
+    return 0;
+  }'''))
+
+has_int128_type = cc.compiles('''
+  __int128_t a;
+  __uint128_t b;
+  int main(void) { b = a; }''')
+config_host_data.set('CONFIG_INT128_TYPE', has_int128_type)
+
+has_int128 = has_int128_type and cc.links('''
+  __int128_t a;
+  __uint128_t b;
+  int main (void) {
+    a = a + b;
+    b = a * b;
+    a = a * a;
+    return 0;
+  }''')
+config_host_data.set('CONFIG_INT128', has_int128)
+
+if has_int128_type
+  # "do we have 128-bit atomics which are handled inline and specifically not
+  # via libatomic". The reason we can't use libatomic is documented in the
+  # comment starting "GCC is a house divided" in include/qemu/atomic128.h.
+  # We only care about these operations on 16-byte aligned pointers, so
+  # force 16-byte alignment of the pointer, which may be greater than
+  # __alignof(unsigned __int128) for the host.
+  atomic_test_128 = '''
+    int main(int ac, char **av) {
+      __uint128_t *p = __builtin_assume_aligned(av[ac - 1], 16);
+      p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
+      __atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
+      __atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+      return 0;
+    }'''
+  has_atomic128 = cc.links(atomic_test_128)
+
+  config_host_data.set('CONFIG_ATOMIC128', has_atomic128)
+
+  if not has_atomic128
+    # Even with __builtin_assume_aligned, the above test may have failed
+    # without optimization enabled.  Try again with optimizations locally
+    # enabled for the function.  See
+    #   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
+    has_atomic128_opt = cc.links('__attribute__((optimize("O1")))' + atomic_test_128)
+    config_host_data.set('CONFIG_ATOMIC128_OPT', has_atomic128_opt)
+
+    if not has_atomic128_opt
+      config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
+        int main(void)
+        {
+          __uint128_t x = 0, y = 0;
+          __sync_val_compare_and_swap_16(&x, y, x);
+          return 0;
+        }
+      '''))
+    endif
+  endif
+endif
+
+config_host_data.set('CONFIG_GETAUXVAL', cc.links(gnu_source_prefix + '''
+  #include <sys/auxv.h>
+  int main(void) {
+    return getauxval(AT_HWCAP) == 0;
+  }'''))
+
+config_host_data.set('CONFIG_USBFS', have_linux_user and cc.compiles('''
+  #include <linux/usbdevice_fs.h>
+
+  #ifndef USBDEVFS_GET_CAPABILITIES
+  #error "USBDEVFS_GET_CAPABILITIES undefined"
+  #endif
+
+  #ifndef USBDEVFS_DISCONNECT_CLAIM
+  #error "USBDEVFS_DISCONNECT_CLAIM undefined"
+  #endif
+
+  int main(void) { return 0; }'''))
+
+have_keyring = get_option('keyring') \
+  .require(targetos == 'linux', error_message: 'keyring is only available on Linux') \
+  .require(cc.compiles('''
+    #include <errno.h>
+    #include <asm/unistd.h>
+    #include <linux/keyctl.h>
+    #include <sys/syscall.h>
+    #include <unistd.h>
+    int main(void) {
+        return syscall(__NR_keyctl, KEYCTL_READ, 0, NULL, NULL, 0);
+    }'''), error_message: 'keyctl syscall not available on this system').allowed()
+config_host_data.set('CONFIG_SECRET_KEYRING', have_keyring)
+
+have_cpuid_h = cc.links('''
+  #include <cpuid.h>
+  int main(void) {
+    unsigned a, b, c, d;
+    unsigned max = __get_cpuid_max(0, 0);
+
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+    }
+
+    if (max >= 7) {
+        __cpuid_count(7, 0, a, b, c, d);
+    }
+
+    return 0;
+  }''')
+config_host_data.set('CONFIG_CPUID_H', have_cpuid_h)
+
+config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \
+  .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX2') \
+  .require(cc.links('''
+    #include <cpuid.h>
+    #include <immintrin.h>
+    static int __attribute__((target("avx2"))) bar(void *a) {
+      __m256i x = *(__m256i *)a;
+      return _mm256_testz_si256(x, x);
+    }
+    int main(int argc, char *argv[]) { return bar(argv[argc - 1]); }
+  '''), error_message: 'AVX2 not available').allowed())
+
+config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
+  .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512F') \
+  .require(cc.links('''
+    #include <cpuid.h>
+    #include <immintrin.h>
+    static int __attribute__((target("avx512f"))) bar(void *a) {
+      __m512i x = *(__m512i *)a;
+      return _mm512_test_epi64_mask(x, x);
+    }
+    int main(int argc, char *argv[]) { return bar(argv[argc - 1]); }
+  '''), error_message: 'AVX512F not available').allowed())
+
+config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
+  .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
+  .require(cc.links('''
+    #include <cpuid.h>
+    #include <immintrin.h>
+    static int __attribute__((target("avx512bw"))) bar(void *a) {
+      __m512i *x = a;
+      __m512i res= _mm512_abs_epi8(*x);
+      return res[1];
+    }
+    int main(int argc, char *argv[]) { return bar(argv[0]); }
+  '''), error_message: 'AVX512BW not available').allowed())
+
+# For both AArch64 and AArch32, detect if builtins are available.
+config_host_data.set('CONFIG_ARM_AES_BUILTIN', cc.compiles('''
+    #include <arm_neon.h>
+    #ifndef __ARM_FEATURE_AES
+    __attribute__((target("+crypto")))
+    #endif
+    void foo(uint8x16_t *p) { *p = vaesmcq_u8(*p); }
+  '''))
+
+have_pvrdma = get_option('pvrdma') \
+  .require(rdma.found(), error_message: 'PVRDMA requires OpenFabrics libraries') \
+  .require(cc.compiles(gnu_source_prefix + '''
+    #include <sys/mman.h>
+    int main(void)
+    {
+      char buf = 0;
+      void *addr = &buf;
+      addr = mremap(addr, 0, 1, MREMAP_MAYMOVE | MREMAP_FIXED);
+
+      return 0;
+    }'''), error_message: 'PVRDMA requires mremap').allowed()
+
+if have_pvrdma
+  config_host_data.set('LEGACY_RDMA_REG_MR', not cc.links('''
+    #include <infiniband/verbs.h>
+    int main(void)
+    {
+      struct ibv_mr *mr;
+      struct ibv_pd *pd = NULL;
+      size_t length = 10;
+      uint64_t iova = 0;
+      int access = 0;
+      void *addr = NULL;
+
+      mr = ibv_reg_mr_iova(pd, addr, length, iova, access);
+      ibv_dereg_mr(mr);
+      return 0;
+    }'''))
+endif
+
+if get_option('membarrier').disabled()
+  have_membarrier = false
+elif targetos == 'windows'
+  have_membarrier = true
+elif targetos == 'linux'
+  have_membarrier = cc.compiles('''
+    #include <linux/membarrier.h>
+    #include <sys/syscall.h>
+    #include <unistd.h>
+    #include <stdlib.h>
+    int main(void) {
+        syscall(__NR_membarrier, MEMBARRIER_CMD_QUERY, 0);
+        syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0);
+        exit(0);
+    }''')
+endif
+config_host_data.set('CONFIG_MEMBARRIER', get_option('membarrier') \
+  .require(have_membarrier, error_message: 'membarrier system call not available') \
+  .allowed())
+
+have_afalg = get_option('crypto_afalg') \
+  .require(cc.compiles(gnu_source_prefix + '''
+    #include <errno.h>
+    #include <sys/types.h>
+    #include <sys/socket.h>
+    #include <linux/if_alg.h>
+    int main(void) {
+      int sock;
+      sock = socket(AF_ALG, SOCK_SEQPACKET, 0);
+      return sock;
+    }
+  '''), error_message: 'AF_ALG requested but could not be detected').allowed()
+config_host_data.set('CONFIG_AF_ALG', have_afalg)
+
+config_host_data.set('CONFIG_AF_VSOCK', cc.has_header_symbol(
+  'linux/vm_sockets.h', 'AF_VSOCK',
+  prefix: '#include <sys/socket.h>',
+))
+
+have_vss = false
+have_vss_sdk = false # old xp/2003 SDK
+if targetos == 'windows' and 'cpp' in all_languages
+  have_vss = cxx.compiles('''
+    #define __MIDL_user_allocate_free_DEFINED__
+    #include <vss.h>
+    int main(void) { return VSS_CTX_BACKUP; }''')
+  have_vss_sdk = cxx.has_header('vscoordint.h')
+endif
+config_host_data.set('HAVE_VSS_SDK', have_vss_sdk)
+
+# Older versions of MinGW do not import _lock_file and _unlock_file properly.
+# This was fixed for v6.0.0 with commit b48e3ac8969d.
+if targetos == 'windows'
+  config_host_data.set('HAVE__LOCK_FILE', cc.links('''
+    #include <stdio.h>
+    int main(void) {
+      _lock_file(NULL);
+      _unlock_file(NULL);
+      return 0;
+    }''', name: '_lock_file and _unlock_file'))
+endif
+
+if targetos == 'windows'
+  mingw_has_setjmp_longjmp = cc.links('''
+    #include <setjmp.h>
+    int main(void) {
+      /*
+       * These functions are not available in setjmp header, but may be
+       * available at link time, from libmingwex.a.
+       */
+      extern int __mingw_setjmp(jmp_buf);
+      extern void __attribute__((noreturn)) __mingw_longjmp(jmp_buf, int);
+      jmp_buf env;
+      __mingw_setjmp(env);
+      __mingw_longjmp(env, 0);
+    }
+  ''', name: 'mingw setjmp and longjmp')
+
+  if cpu == 'aarch64' and not mingw_has_setjmp_longjmp
+    error('mingw must provide setjmp/longjmp for windows-arm64')
+  endif
+endif
 
 ########################
 # Target configuration #
 ########################
 
-config_all = {}
-config_all += config_host
+minikconf = find_program('scripts/minikconf.py')
+config_targetos = {
+  (targetos == 'windows' ? 'CONFIG_WIN32' : 'CONFIG_POSIX'): 'y'
+}
+if targetos == 'darwin'
+  config_targetos += {'CONFIG_DARWIN': 'y'}
+elif targetos == 'linux'
+  config_targetos += {'CONFIG_LINUX': 'y'}
+endif
+if targetos in bsd_oses
+  config_targetos += {'CONFIG_BSD': 'y'}
+endif
+
+config_all = {}
+config_all_devices = {}
+config_all_disas = {}
+config_devices_mak_list = []
+config_devices_h = {}
+config_target_h = {}
+config_target_mak = {}
+
+disassemblers = {
+  'alpha' : ['CONFIG_ALPHA_DIS'],
+  'avr' : ['CONFIG_AVR_DIS'],
+  'cris' : ['CONFIG_CRIS_DIS'],
+  'hexagon' : ['CONFIG_HEXAGON_DIS'],
+  'hppa' : ['CONFIG_HPPA_DIS'],
+  'i386' : ['CONFIG_I386_DIS'],
+  'x86_64' : ['CONFIG_I386_DIS'],
+  'm68k' : ['CONFIG_M68K_DIS'],
+  'microblaze' : ['CONFIG_MICROBLAZE_DIS'],
+  'mips' : ['CONFIG_MIPS_DIS'],
+  'nios2' : ['CONFIG_NIOS2_DIS'],
+  'or1k' : ['CONFIG_OPENRISC_DIS'],
+  'ppc' : ['CONFIG_PPC_DIS'],
+  'riscv' : ['CONFIG_RISCV_DIS'],
+  'rx' : ['CONFIG_RX_DIS'],
+  's390' : ['CONFIG_S390_DIS'],
+  'sh4' : ['CONFIG_SH4_DIS'],
+  'sparc' : ['CONFIG_SPARC_DIS'],
+  'xtensa' : ['CONFIG_XTENSA_DIS'],
+  'loongarch' : ['CONFIG_LOONGARCH_DIS'],
+}
+
+have_ivshmem = config_host_data.get('CONFIG_EVENTFD')
+host_kconfig = \
+  (get_option('fuzzing') ? ['CONFIG_FUZZ=y'] : []) + \
+  (have_tpm ? ['CONFIG_TPM=y'] : []) + \
+  (pixman.found() ? ['CONFIG_PIXMAN=y'] : []) + \
+  (spice.found() ? ['CONFIG_SPICE=y'] : []) + \
+  (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \
+  (opengl.found() ? ['CONFIG_OPENGL=y'] : []) + \
+  (x11.found() ? ['CONFIG_X11=y'] : []) + \
+  (have_vhost_user ? ['CONFIG_VHOST_USER=y'] : []) + \
+  (have_vhost_vdpa ? ['CONFIG_VHOST_VDPA=y'] : []) + \
+  (have_vhost_kernel ? ['CONFIG_VHOST_KERNEL=y'] : []) + \
+  (have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \
+  (targetos == 'linux' ? ['CONFIG_LINUX=y'] : []) + \
+  (have_pvrdma ? ['CONFIG_PVRDMA=y'] : []) + \
+  (multiprocess_allowed ? ['CONFIG_MULTIPROCESS_ALLOWED=y'] : []) + \
+  (vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : []) + \
+  (hv_balloon ? ['CONFIG_HV_BALLOON_POSSIBLE=y'] : [])
+
+ignored = [ 'TARGET_XML_FILES', 'TARGET_ABI_DIR', 'TARGET_ARCH' ]
+
+default_targets = 'CONFIG_DEFAULT_TARGETS' in config_host
+actual_target_dirs = []
+fdt_required = []
+foreach target : target_dirs
+  config_target = { 'TARGET_NAME': target.split('-')[0] }
+  if target.endswith('linux-user')
+    if targetos != 'linux'
+      if default_targets
+        continue
+      endif
+      error('Target @0@ is only available on a Linux host'.format(target))
+    endif
+    config_target += { 'CONFIG_LINUX_USER': 'y' }
+  elif target.endswith('bsd-user')
+    if targetos not in bsd_oses
+      if default_targets
+        continue
+      endif
+      error('Target @0@ is only available on a BSD host'.format(target))
+    endif
+    config_target += { 'CONFIG_BSD_USER': 'y' }
+  elif target.endswith('softmmu')
+    config_target += { 'CONFIG_SYSTEM_ONLY': 'y' }
+    config_target += { 'CONFIG_SOFTMMU': 'y' }
+  endif
+  if target.endswith('-user')
+    config_target += {
+      'CONFIG_USER_ONLY': 'y',
+      'CONFIG_QEMU_INTERP_PREFIX':
+        get_option('interp_prefix').replace('%M', config_target['TARGET_NAME'])
+    }
+  endif
+
+  accel_kconfig = []
+  foreach sym: accelerators
+    if sym == 'CONFIG_TCG' or target in accelerator_targets.get(sym, [])
+      config_target += { sym: 'y' }
+      config_all += { sym: 'y' }
+      if target in modular_tcg
+        config_target += { 'CONFIG_TCG_MODULAR': 'y' }
+      else
+        config_target += { 'CONFIG_TCG_BUILTIN': 'y' }
+      endif
+      accel_kconfig += [ sym + '=y' ]
+    endif
+  endforeach
+  if accel_kconfig.length() == 0
+    if default_targets
+      continue
+    endif
+    error('No accelerator available for target @0@'.format(target))
+  endif
+
+  actual_target_dirs += target
+  config_target += keyval.load('configs/targets' / target + '.mak')
+  config_target += { 'TARGET_' + config_target['TARGET_ARCH'].to_upper(): 'y' }
+
+  if 'TARGET_NEED_FDT' in config_target
+    fdt_required += target
+  endif
+
+  # Add default keys
+  if 'TARGET_BASE_ARCH' not in config_target
+    config_target += {'TARGET_BASE_ARCH': config_target['TARGET_ARCH']}
+  endif
+  if 'TARGET_ABI_DIR' not in config_target
+    config_target += {'TARGET_ABI_DIR': config_target['TARGET_ARCH']}
+  endif
+  if 'TARGET_BIG_ENDIAN' not in config_target
+    config_target += {'TARGET_BIG_ENDIAN': 'n'}
+  endif
+
+  foreach k, v: disassemblers
+    if host_arch.startswith(k) or config_target['TARGET_BASE_ARCH'].startswith(k)
+      foreach sym: v
+        config_target += { sym: 'y' }
+        config_all_disas += { sym: 'y' }
+      endforeach
+    endif
+  endforeach
+
+  config_target_data = configuration_data()
+  foreach k, v: config_target
+    if not k.startswith('TARGET_') and not k.startswith('CONFIG_')
+      # do nothing
+    elif ignored.contains(k)
+      # do nothing
+    elif k == 'TARGET_BASE_ARCH'
+      # Note that TARGET_BASE_ARCH ends up in config-target.h but it is
+      # not used to select files from sourcesets.
+      config_target_data.set('TARGET_' + v.to_upper(), 1)
+    elif k == 'TARGET_NAME' or k == 'CONFIG_QEMU_INTERP_PREFIX'
+      config_target_data.set_quoted(k, v)
+    elif v == 'y'
+      config_target_data.set(k, 1)
+    elif v == 'n'
+      config_target_data.set(k, 0)
+    else
+      config_target_data.set(k, v)
+    endif
+  endforeach
+  config_target_data.set('QEMU_ARCH',
+                         'QEMU_ARCH_' + config_target['TARGET_BASE_ARCH'].to_upper())
+  config_target_h += {target: configure_file(output: target + '-config-target.h',
+                                               configuration: config_target_data)}
+
+  if target.endswith('-softmmu')
+    config_input = meson.get_external_property(target, 'default')
+    config_devices_mak = target + '-config-devices.mak'
+    config_devices_mak = configure_file(
+      input: ['configs/devices' / target / config_input + '.mak', 'Kconfig'],
+      output: config_devices_mak,
+      depfile: config_devices_mak + '.d',
+      capture: true,
+      command: [minikconf,
+                get_option('default_devices') ? '--defconfig' : '--allnoconfig',
+                config_devices_mak, '@DEPFILE@', '@INPUT@',
+                host_kconfig, accel_kconfig,
+                'CONFIG_' + config_target['TARGET_ARCH'].to_upper() + '=y'])
+
+    config_devices_data = configuration_data()
+    config_devices = keyval.load(config_devices_mak)
+    foreach k, v: config_devices
+      config_devices_data.set(k, 1)
+    endforeach
+    config_devices_mak_list += config_devices_mak
+    config_devices_h += {target: configure_file(output: target + '-config-devices.h',
+                                                configuration: config_devices_data)}
+    config_target += config_devices
+    config_all_devices += config_devices
+  endif
+  config_target_mak += {target: config_target}
+endforeach
+target_dirs = actual_target_dirs
+
+# This configuration is used to build files that are shared by
+# multiple binaries, and then extracted out of the "common"
+# static_library target.
+#
+# We do not use all_sources()/all_dependencies(), because it would
+# build literally all source files, including devices only used by
+# targets that are not built for this compilation.  The CONFIG_ALL
+# pseudo symbol replaces it.
+
+config_all += config_all_devices
+config_all += config_targetos
+config_all += config_all_disas
 config_all += {
-  'CONFIG_SOFTMMU': have_system,
+  'CONFIG_XEN': xen.found(),
+  'CONFIG_SYSTEM_ONLY': have_system,
   'CONFIG_USER_ONLY': have_user,
   'CONFIG_ALL': true,
 }
 
+target_configs_h = []
+foreach target: target_dirs
+  target_configs_h += config_target_h[target]
+  target_configs_h += config_devices_h.get(target, [])
+endforeach
+genh += custom_target('config-poison.h',
+                      input: [target_configs_h],
+                      output: 'config-poison.h',
+                      capture: true,
+                      command: [find_program('scripts/make-config-poison.sh'),
+                                target_configs_h])
+
+##############
+# Submodules #
+##############
+
+capstone = not_found
+if not get_option('capstone').auto() or have_system or have_user
+  capstone = dependency('capstone', version: '>=3.0.5',
+                        method: 'pkg-config',
+                        required: get_option('capstone'))
+
+  # Some versions of capstone have broken pkg-config file
+  # that reports a wrong -I path, causing the #include to
+  # fail later. If the system has such a broken version
+  # do not use it.
+  if capstone.found() and not cc.compiles('#include <capstone.h>',
+                                          dependencies: [capstone])
+    capstone = not_found
+    if get_option('capstone').enabled()
+      error('capstone requested, but it does not appear to work')
+    endif
+  endif
+endif
+
+libvfio_user_dep = not_found
+if have_system and vfio_user_server_allowed
+  libvfio_user_proj = subproject('libvfio-user', required: true)
+  libvfio_user_dep = libvfio_user_proj.get_variable('libvfio_user_dep')
+endif
+
+fdt = not_found
+fdt_opt = get_option('fdt')
+if fdt_required.length() > 0 or fdt_opt == 'enabled'
+  if fdt_opt == 'disabled'
+    error('fdt disabled but required by targets ' + ', '.join(fdt_required))
+  endif
+
+  if fdt_opt in ['enabled', 'auto', 'system']
+    if get_option('wrap_mode') == 'nodownload'
+      fdt_opt = 'system'
+    endif
+    fdt = cc.find_library('fdt', required: fdt_opt == 'system')
+    if fdt.found() and cc.links('''
+       #include <libfdt.h>
+       #include <libfdt_env.h>
+       int main(void) { fdt_find_max_phandle(NULL, NULL); return 0; }''',
+         dependencies: fdt)
+      fdt_opt = 'system'
+    elif fdt_opt == 'system'
+       error('system libfdt requested, but it is too old (1.5.1 or newer required)')
+    else
+      fdt_opt = 'internal'
+      fdt = not_found
+    endif
+  endif
+  if not fdt.found()
+    assert(fdt_opt == 'internal')
+    libfdt_proj = subproject('dtc', required: true,
+                             default_options: ['tools=false',  'yaml=disabled',
+                                               'python=disabled', 'default_library=static'])
+    fdt = libfdt_proj.get_variable('libfdt_dep')
+  endif
+else
+  fdt_opt = 'disabled'
+endif
+
+config_host_data.set('CONFIG_CAPSTONE', capstone.found())
+config_host_data.set('CONFIG_FDT', fdt.found())
+config_host_data.set('CONFIG_SLIRP', slirp.found())
+
 #####################
 # Generated sources #
 #####################
@@ -249,33 +3146,53 @@ config_all += {
 genh += configure_file(output: 'config-host.h', configuration: config_host_data)
 
 hxtool = find_program('scripts/hxtool')
-shaderinclude = find_program('scripts/shaderinclude.pl')
+shaderinclude = find_program('scripts/shaderinclude.py')
 qapi_gen = find_program('scripts/qapi-gen.py')
-qapi_gen_depends = [ meson.source_root() / 'scripts/qapi/__init__.py',
-                     meson.source_root() / 'scripts/qapi/commands.py',
-                     meson.source_root() / 'scripts/qapi/common.py',
-                     meson.source_root() / 'scripts/qapi/error.py',
-                     meson.source_root() / 'scripts/qapi/events.py',
-                     meson.source_root() / 'scripts/qapi/expr.py',
-                     meson.source_root() / 'scripts/qapi/gen.py',
-                     meson.source_root() / 'scripts/qapi/introspect.py',
-                     meson.source_root() / 'scripts/qapi/parser.py',
-                     meson.source_root() / 'scripts/qapi/schema.py',
-                     meson.source_root() / 'scripts/qapi/source.py',
-                     meson.source_root() / 'scripts/qapi/types.py',
-                     meson.source_root() / 'scripts/qapi/visit.py',
-                     meson.source_root() / 'scripts/qapi/common.py',
-                     meson.source_root() / 'scripts/qapi-gen.py'
+qapi_gen_depends = [ meson.current_source_dir() / 'scripts/qapi/__init__.py',
+                     meson.current_source_dir() / 'scripts/qapi/commands.py',
+                     meson.current_source_dir() / 'scripts/qapi/common.py',
+                     meson.current_source_dir() / 'scripts/qapi/error.py',
+                     meson.current_source_dir() / 'scripts/qapi/events.py',
+                     meson.current_source_dir() / 'scripts/qapi/expr.py',
+                     meson.current_source_dir() / 'scripts/qapi/gen.py',
+                     meson.current_source_dir() / 'scripts/qapi/introspect.py',
+                     meson.current_source_dir() / 'scripts/qapi/main.py',
+                     meson.current_source_dir() / 'scripts/qapi/parser.py',
+                     meson.current_source_dir() / 'scripts/qapi/schema.py',
+                     meson.current_source_dir() / 'scripts/qapi/source.py',
+                     meson.current_source_dir() / 'scripts/qapi/types.py',
+                     meson.current_source_dir() / 'scripts/qapi/visit.py',
+                     meson.current_source_dir() / 'scripts/qapi-gen.py'
 ]
 
 tracetool = [
   python, files('scripts/tracetool.py'),
-   '--backend=' + config_host['TRACE_BACKENDS']
+   '--backend=' + ','.join(get_option('trace_backends'))
 ]
+tracetool_depends = files(
+  'scripts/tracetool/backend/log.py',
+  'scripts/tracetool/backend/__init__.py',
+  'scripts/tracetool/backend/dtrace.py',
+  'scripts/tracetool/backend/ftrace.py',
+  'scripts/tracetool/backend/simple.py',
+  'scripts/tracetool/backend/syslog.py',
+  'scripts/tracetool/backend/ust.py',
+  'scripts/tracetool/format/ust_events_c.py',
+  'scripts/tracetool/format/ust_events_h.py',
+  'scripts/tracetool/format/__init__.py',
+  'scripts/tracetool/format/d.py',
+  'scripts/tracetool/format/simpletrace_stap.py',
+  'scripts/tracetool/format/c.py',
+  'scripts/tracetool/format/h.py',
+  'scripts/tracetool/format/log_stap.py',
+  'scripts/tracetool/format/stap.py',
+  'scripts/tracetool/__init__.py',
+  'scripts/tracetool/vcpu.py'
+)
 
 qemu_version_cmd = [find_program('scripts/qemu-version.sh'),
                     meson.current_source_dir(),
-                    config_host['PKGVERSION'], meson.project_version()]
+                    get_option('pkgversion'), meson.project_version()]
 qemu_version = custom_target('qemu-version.h',
                              output: 'qemu-version.h',
                              command: qemu_version_cmd,
@@ -300,7 +3217,6 @@ foreach d : hx_headers
                 input: files(d[0]),
                 output: d[1],
                 capture: true,
-                build_by_default: true, # to be removed when added to a target
                 command: [hxtool, '-h', '@INPUT0@'])
 endforeach
 genh += hxdep
@@ -312,17 +3228,31 @@ genh += hxdep
 authz_ss = ss.source_set()
 blockdev_ss = ss.source_set()
 block_ss = ss.source_set()
+chardev_ss = ss.source_set()
+common_ss = ss.source_set()
 crypto_ss = ss.source_set()
+hwcore_ss = ss.source_set()
 io_ss = ss.source_set()
 qmp_ss = ss.source_set()
 qom_ss = ss.source_set()
-softmmu_ss = ss.source_set()
+system_ss = ss.source_set()
+specific_fuzz_ss = ss.source_set()
 specific_ss = ss.source_set()
 stub_ss = ss.source_set()
 trace_ss = ss.source_set()
+user_ss = ss.source_set()
 util_ss = ss.source_set()
 
+# accel modules
+qtest_module_ss = ss.source_set()
+tcg_module_ss = ss.source_set()
+
 modules = {}
+target_modules = {}
+hw_arch = {}
+target_arch = {}
+target_system_arch = {}
+target_user_arch = {}
 
 ###############
 # Trace files #
@@ -332,9 +3262,18 @@ modules = {}
 # we have those
 trace_events_subdirs = [
   'crypto',
+  'qapi',
+  'qom',
   'monitor',
+  'util',
+#  'gdbstub',
 ]
-
+if have_linux_user
+  trace_events_subdirs += [ 'linux-user' ]
+endif
+if have_bsd_user
+  trace_events_subdirs += [ 'bsd-user' ]
+endif
 if have_block
   trace_events_subdirs += [
     'authz',
@@ -344,13 +3283,105 @@ if have_block
     'scsi',
   ]
 endif
+if have_system
+  trace_events_subdirs += [
+    'accel/kvm',
+    'audio',
+    'backends',
+    'backends/tpm',
+    'chardev',
+    'ebpf',
+    'hw/9pfs',
+    'hw/acpi',
+    'hw/adc',
+    'hw/alpha',
+    'hw/arm',
+    'hw/audio',
+    'hw/block',
+    'hw/block/dataplane',
+    'hw/char',
+    'hw/display',
+    'hw/dma',
+    'hw/hyperv',
+    'hw/i2c',
+    'hw/i386',
+    'hw/i386/xen',
+    'hw/i386/kvm',
+    'hw/ide',
+    'hw/input',
+    'hw/intc',
+    'hw/isa',
+    'hw/mem',
+    'hw/mips',
+    'hw/misc',
+    'hw/misc/macio',
+    'hw/net',
+    'hw/net/can',
+    'hw/nubus',
+    'hw/nvme',
+    'hw/nvram',
+    'hw/pci',
+    'hw/pci-host',
+    'hw/ppc',
+    'hw/rdma',
+    'hw/rdma/vmw',
+    'hw/rtc',
+    'hw/s390x',
+    'hw/scsi',
+    'hw/sd',
+    'hw/sh4',
+    'hw/sparc',
+    'hw/sparc64',
+    'hw/ssi',
+    'hw/timer',
+    'hw/tpm',
+    'hw/ufs',
+    'hw/usb',
+    'hw/vfio',
+    'hw/virtio',
+    'hw/watchdog',
+    'hw/xen',
+    'hw/gpio',
+    'migration',
+    'net',
+    'system',
+    'ui',
+    'hw/remote',
+  ]
+endif
+if have_system or have_user
+  trace_events_subdirs += [
+    'accel/tcg',
+    'hw/core',
+    'target/arm',
+    'target/arm/hvf',
+    'target/hppa',
+    'target/i386',
+    'target/i386/kvm',
+    'target/mips/tcg',
+    'target/nios2',
+    'target/ppc',
+    'target/riscv',
+    'target/s390x',
+    'target/s390x/kvm',
+    'target/sparc',
+  ]
+endif
 
-trace_events_subdirs += [
-  'qapi',
-  'qom',
-  'util',
-]
+vhost_user = not_found
+if targetos == 'linux' and have_vhost_user
+  libvhost_user = subproject('libvhost-user')
+  vhost_user = libvhost_user.get_variable('vhost_user_dep')
+endif
+
+libvduse = not_found
+if have_libvduse
+  libvduse_proj = subproject('libvduse')
+  libvduse = libvduse_proj.get_variable('libvduse_dep')
+endif
 
+# NOTE: the trace/ subdirectory needs the qapi_trace_events variable
+# that is filled in by qapi/.
 subdir('qapi')
 subdir('qobject')
 subdir('stubs')
@@ -359,79 +3390,255 @@ subdir('util')
 subdir('qom')
 subdir('authz')
 subdir('crypto')
+#subdir('ui')
+#subdir('hw')
+#subdir('gdbstub')
+
+if enable_modules
+  libmodulecommon = static_library('module-common', files('module-common.c') + genh, pic: true, c_args: '-DBUILD_DSO')
+  modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO')
+endif
+
+qom_ss = qom_ss.apply(config_targetos, strict: false)
+libqom = static_library('qom', qom_ss.sources() + genh,
+                        dependencies: [qom_ss.dependencies()],
+                        name_suffix: 'fa',
+                        build_by_default: false)
+qom = declare_dependency(link_whole: libqom)
+
+event_loop_base = files('event-loop-base.c')
+event_loop_base = static_library('event-loop-base',
+                                 sources: event_loop_base + genh,
+                                 name_suffix: 'fa',
+                                 build_by_default: false)
+event_loop_base = declare_dependency(link_whole: event_loop_base,
+                                     dependencies: [qom])
 
 stub_ss = stub_ss.apply(config_all, strict: false)
 
 util_ss.add_all(trace_ss)
 util_ss = util_ss.apply(config_all, strict: false)
 libqemuutil = static_library('qemuutil',
+                             build_by_default: false,
                              sources: util_ss.sources() + stub_ss.sources() + genh,
-                             dependencies: [util_ss.dependencies(), m, glib])
+                             dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman])
 qemuutil = declare_dependency(link_with: libqemuutil,
-                              sources: genh)
+                              sources: genh + version_res,
+                              dependencies: [event_loop_base])
+
+if have_system or have_user
+  decodetree = generator(find_program('scripts/decodetree.py'),
+                         output: 'decode-@BASENAME@.c.inc',
+                         arguments: ['@INPUT@', '@EXTRA_ARGS@', '-o', '@OUTPUT@'])
+  subdir('libdecnumber')
+  subdir('target')
+endif
 
+#subdir('audio')
 subdir('io')
+#subdir('chardev')
+#subdir('fsdev')
+#subdir('dump')
 
-block_ss.add(files(
-  'block.c',
-  'blockjob.c',
-  'job.c',
-  'qemu-io-cmds.c',
-))
+if have_block
+  block_ss.add(files(
+    'block.c',
+    'blockjob.c',
+    'job.c',
+    'qemu-io-cmds.c',
+  ))
+  if config_host_data.get('CONFIG_REPLICATION')
+    block_ss.add(files('replication.c'))
+  endif
 
-subdir('nbd')
-subdir('scsi')
-subdir('block')
+  subdir('nbd')
+  subdir('scsi')
+  subdir('block')
 
-blockdev_ss.add(files(
-  'blockdev.c',
-  'blockdev-nbd.c',
-  'iothread.c',
-  'job-qmp.c',
-))
+  blockdev_ss.add(files(
+    'blockdev.c',
+    'blockdev-nbd.c',
+    'iothread.c',
+    'job-qmp.c',
+  ), gnutls)
+
+  # os-posix.c contains POSIX-specific functions used by qemu-storage-daemon,
+  # os-win32.c does not
+  blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
+#  system_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
+endif
+
+#common_ss.add(files('cpu-common.c'))
+#specific_ss.add(files('cpu-target.c'))
 
-# os-posix.c contains POSIX-specific functions used by qemu-storage-daemon,
-blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
+#subdir('system')
 
+# Work around a gcc bug/misfeature wherein constant propagation looks
+# through an alias:
+#   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99696
+# to guess that a const variable is always zero.  Without lto, this is
+# impossible, as the alias is restricted to page-vary-common.c.  Indeed,
+# without lto, not even the alias is required -- we simply use different
+# declarations in different compilation units.
+#pagevary = files('page-vary-common.c')
+if get_option('b_lto')
+  pagevary_flags = ['-fno-lto']
+  if get_option('cfi')
+    pagevary_flags += '-fno-sanitize=cfi-icall'
+  endif
+  pagevary = static_library('page-vary-common', sources: pagevary + genh,
+                            c_args: pagevary_flags)
+  pagevary = declare_dependency(link_with: pagevary)
+endif
+#common_ss.add(pagevary)
+#specific_ss.add(files('page-vary-target.c'))
+
+#subdir('backends')
+#subdir('disas')
+#subdir('migration')
 subdir('monitor')
+#subdir('net')
+#subdir('replay')
+#subdir('semihosting')
+#subdir('stats')
+#subdir('tcg')
+#subdir('fpu')
+#subdir('accel')
+#subdir('plugins')
+#subdir('ebpf')
+
+common_user_inc = []
+
+#subdir('common-user')
+#subdir('bsd-user')
+#subdir('linux-user')
+
+# needed for fuzzing binaries
+#subdir('tests/qtest/libqos')
+#subdir('tests/qtest/fuzz')
+
+# accel modules
+tcg_real_module_ss = ss.source_set()
+tcg_real_module_ss.add_all(when: 'CONFIG_TCG_MODULAR', if_true: tcg_module_ss)
+specific_ss.add_all(when: 'CONFIG_TCG_BUILTIN', if_true: tcg_module_ss)
+target_modules += { 'accel' : { 'qtest': qtest_module_ss,
+                                'tcg': tcg_real_module_ss }}
 
 ########################
 # Library dependencies #
 ########################
 
+modinfo_collect = find_program('scripts/modinfo-collect.py')
+modinfo_generate = find_program('scripts/modinfo-generate.py')
+modinfo_files = []
+
 block_mods = []
+system_mods = []
 foreach d, list : modules
+  if not (d == 'block' ? have_block : have_system)
+    continue
+  endif
+
   foreach m, module_ss : list
-    if enable_modules and targetos != 'windows'
+    if enable_modules
       module_ss = module_ss.apply(config_all, strict: false)
       sl = static_library(d + '-' + m, [genh, module_ss.sources()],
                           dependencies: [modulecommon, module_ss.dependencies()], pic: true)
       if d == 'block'
         block_mods += sl
+      else
+        system_mods += sl
+      endif
+      if module_ss.sources() != []
+        # FIXME: Should use sl.extract_all_objects(recursive: true) as
+        # input. Sources can be used multiple times but objects are
+        # unique when it comes to lookup in compile_commands.json.
+        # Depnds on a mesion version with
+        # https://github.com/mesonbuild/meson/pull/8900
+        modinfo_files += custom_target(d + '-' + m + '.modinfo',
+                                       output: d + '-' + m + '.modinfo',
+                                       input: module_ss.sources() + genh,
+                                       capture: true,
+                                       command: [modinfo_collect, module_ss.sources()])
       endif
     else
       if d == 'block'
         block_ss.add_all(module_ss)
+      else
+        system_ss.add_all(module_ss)
       endif
     endif
   endforeach
 endforeach
 
+foreach d, list : target_modules
+  foreach m, module_ss : list
+    if enable_modules
+      foreach target : target_dirs
+        if target.endswith('-softmmu')
+          config_target = config_target_mak[target]
+          config_target += config_targetos
+          target_inc = [include_directories('target' / config_target['TARGET_BASE_ARCH'])]
+          c_args = ['-DNEED_CPU_H',
+                    '-DCONFIG_TARGET="@0@-config-target.h"'.format(target),
+                    '-DCONFIG_DEVICES="@0@-config-devices.h"'.format(target)]
+          target_module_ss = module_ss.apply(config_target, strict: false)
+          if target_module_ss.sources() != []
+            module_name = d + '-' + m + '-' + config_target['TARGET_NAME']
+            sl = static_library(module_name,
+                                [genh, target_module_ss.sources()],
+                                dependencies: [modulecommon, target_module_ss.dependencies()],
+                                include_directories: target_inc,
+                                c_args: c_args,
+                                pic: true)
+            system_mods += sl
+            # FIXME: Should use sl.extract_all_objects(recursive: true) too.
+            modinfo_files += custom_target(module_name + '.modinfo',
+                                           output: module_name + '.modinfo',
+                                           input: target_module_ss.sources() + genh,
+                                           capture: true,
+                                           command: [modinfo_collect, '--target', target, target_module_ss.sources()])
+          endif
+        endif
+      endforeach
+    else
+      specific_ss.add_all(module_ss)
+    endif
+  endforeach
+endforeach
+
+if enable_modules
+  foreach target : target_dirs
+    if target.endswith('-softmmu')
+      config_target = config_target_mak[target]
+      config_devices_mak = target + '-config-devices.mak'
+      modinfo_src = custom_target('modinfo-' + target + '.c',
+                                  output: 'modinfo-' + target + '.c',
+                                  input: modinfo_files,
+                                  command: [modinfo_generate, '--devices', config_devices_mak, '@INPUT@'],
+                                  capture: true)
+
+      modinfo_lib = static_library('modinfo-' + target + '.c', modinfo_src)
+      modinfo_dep = declare_dependency(link_with: modinfo_lib)
+
+      arch = config_target['TARGET_NAME'] == 'sparc64' ? 'sparc64' : config_target['TARGET_BASE_ARCH']
+      hw_arch[arch].add(modinfo_dep)
+    endif
+  endforeach
+endif
+
 nm = find_program('nm')
 undefsym = find_program('scripts/undefsym.py')
 block_syms = custom_target('block.syms', output: 'block.syms',
                              input: [libqemuutil, block_mods],
                              capture: true,
                              command: [undefsym, nm, '@INPUT@'])
+qemu_syms = custom_target('qemu.syms', output: 'qemu.syms',
+                             input: [libqemuutil, system_mods],
+                             capture: true,
+                             command: [undefsym, nm, '@INPUT@'])
 
-qom_ss = qom_ss.apply(config_host, strict: false)
-libqom = static_library('qom', qom_ss.sources() + genh,
-                        dependencies: [qom_ss.dependencies()],
-                        name_suffix: 'fa')
-
-qom = declare_dependency(link_whole: libqom)
-
-authz_ss = authz_ss.apply(config_host, strict: false)
+authz_ss = authz_ss.apply(config_targetos, strict: false)
 libauthz = static_library('authz', authz_ss.sources() + genh,
                           dependencies: [authz_ss.dependencies()],
                           name_suffix: 'fa',
@@ -440,7 +3647,7 @@ libauthz = static_library('authz', authz_ss.sources() + genh,
 authz = declare_dependency(link_whole: libauthz,
                            dependencies: qom)
 
-crypto_ss = crypto_ss.apply(config_host, strict: false)
+crypto_ss = crypto_ss.apply(config_targetos, strict: false)
 libcrypto = static_library('crypto', crypto_ss.sources() + genh,
                            dependencies: [crypto_ss.dependencies()],
                            name_suffix: 'fa',
@@ -449,7 +3656,7 @@ libcrypto = static_library('crypto', crypto_ss.sources() + genh,
 crypto = declare_dependency(link_whole: libcrypto,
                             dependencies: [authz, qom])
 
-io_ss = io_ss.apply(config_host, strict: false)
+io_ss = io_ss.apply(config_targetos, strict: false)
 libio = static_library('io', io_ss.sources() + genh,
                        dependencies: [io_ss.dependencies()],
                        link_with: libqemuutil,
@@ -458,7 +3665,14 @@ libio = static_library('io', io_ss.sources() + genh,
 
 io = declare_dependency(link_whole: libio, dependencies: [crypto, qom])
 
-block_ss = block_ss.apply(config_host, strict: false)
+#libmigration = static_library('migration', sources: migration_files + genh,
+#                              name_suffix: 'fa',
+#                              build_by_default: false)
+#migration = declare_dependency(link_with: libmigration,
+#                               dependencies: [zlib, qom, io])
+#system_ss.add(migration)
+
+block_ss = block_ss.apply(config_targetos, strict: false)
 libblock = static_library('block', block_ss.sources() + genh,
                           dependencies: block_ss.dependencies(),
                           link_depends: block_syms,
@@ -469,270 +3683,762 @@ block = declare_dependency(link_whole: [libblock],
                            link_args: '@block.syms',
                            dependencies: [crypto, io])
 
-blockdev_ss = blockdev_ss.apply(config_host, strict: false)
+blockdev_ss = blockdev_ss.apply(config_targetos, strict: false)
 libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
                              dependencies: blockdev_ss.dependencies(),
                              name_suffix: 'fa',
                              build_by_default: false)
 
 blockdev = declare_dependency(link_whole: [libblockdev],
-                              dependencies: [block])
+                              dependencies: [block, event_loop_base])
+
+qmp_ss = qmp_ss.apply(config_targetos, strict: false)
+libqmp = static_library('qmp', qmp_ss.sources() + genh,
+                        dependencies: qmp_ss.dependencies(),
+                        name_suffix: 'fa',
+                        build_by_default: false)
+
+qmp = declare_dependency(link_whole: [libqmp])
+
+#libchardev = static_library('chardev', chardev_ss.sources() + genh,
+#                            name_suffix: 'fa',
+#                            dependencies: chardev_ss.dependencies(),
+#                            build_by_default: false)
+
+#chardev = declare_dependency(link_whole: libchardev)
+
+#hwcore_ss = hwcore_ss.apply(config_targetos, strict: false)
+#libhwcore = static_library('hwcore', sources: hwcore_ss.sources() + genh,
+#                           name_suffix: 'fa',
+#                           build_by_default: false)
+#hwcore = declare_dependency(link_whole: libhwcore)
+#common_ss.add(hwcore)
 
 ###########
 # Targets #
 ###########
 
-foreach m : block_mods
-  shared_module(m.name(),
+emulator_modules = []
+foreach m : block_mods + system_mods
+  emulator_modules += shared_module(m.name(),
+                build_by_default: true,
                 name_prefix: '',
                 link_whole: m,
                 install: true,
                 install_dir: qemu_moddir)
 endforeach
+if emulator_modules.length() > 0
+  alias_target('modules', emulator_modules)
+endif
+
+#system_ss.add(authz, blockdev, chardev, crypto, io, qmp)
+common_ss.add(qom, qemuutil)
+
+common_ss.add_all(when: 'CONFIG_SYSTEM_ONLY', if_true: [system_ss])
+common_ss.add_all(when: 'CONFIG_USER_ONLY', if_true: user_ss)
+
+common_all = common_ss.apply(config_all, strict: false)
+#common_all = static_library('common',
+#                            build_by_default: false,
+#                            sources: common_all.sources() + genh,
+#                            include_directories: common_user_inc,
+#                            implicit_include_directories: false,
+#                            dependencies: common_all.dependencies(),
+#                            name_suffix: 'fa')
+
+feature_to_c = find_program('scripts/feature_to_c.py')
+
+if targetos == 'darwin'
+  entitlement = find_program('scripts/entitlement.sh')
+endif
+
+emulators = {}
+foreach target : target_dirs
+  config_target = config_target_mak[target]
+  target_name = config_target['TARGET_NAME']
+  target_base_arch = config_target['TARGET_BASE_ARCH']
+  arch_srcs = [config_target_h[target]]
+  arch_deps = []
+  c_args = ['-DNEED_CPU_H',
+            '-DCONFIG_TARGET="@0@-config-target.h"'.format(target),
+            '-DCONFIG_DEVICES="@0@-config-devices.h"'.format(target)]
+  link_args = emulator_link_args
+
+  config_target += config_targetos
+  target_inc = [include_directories('target' / config_target['TARGET_BASE_ARCH'])]
+  if targetos == 'linux'
+    target_inc += include_directories('linux-headers', is_system: true)
+  endif
+  if target.endswith('-softmmu')
+    target_type='system'
+    t = target_system_arch[target_base_arch].apply(config_target, strict: false)
+    arch_srcs += t.sources()
+    arch_deps += t.dependencies()
+
+    hw_dir = target_name == 'sparc64' ? 'sparc64' : target_base_arch
+    hw = hw_arch[hw_dir].apply(config_target, strict: false)
+    arch_srcs += hw.sources()
+    arch_deps += hw.dependencies()
+
+    arch_srcs += config_devices_h[target]
+    link_args += ['@block.syms', '@qemu.syms']
+  else
+    abi = config_target['TARGET_ABI_DIR']
+    target_type='user'
+    target_inc += common_user_inc
+    if target_base_arch in target_user_arch
+      t = target_user_arch[target_base_arch].apply(config_target, strict: false)
+      arch_srcs += t.sources()
+      arch_deps += t.dependencies()
+    endif
+    if 'CONFIG_LINUX_USER' in config_target
+      base_dir = 'linux-user'
+    endif
+    if 'CONFIG_BSD_USER' in config_target
+      base_dir = 'bsd-user'
+      target_inc += include_directories('bsd-user/' / targetos)
+      target_inc += include_directories('bsd-user/host/' / host_arch)
+      dir = base_dir / abi
+      arch_srcs += files(dir / 'signal.c', dir / 'target_arch_cpu.c')
+    endif
+    target_inc += include_directories(
+      base_dir,
+      base_dir / abi,
+    )
+    if 'CONFIG_LINUX_USER' in config_target
+      dir = base_dir / abi
+      arch_srcs += files(dir / 'signal.c', dir / 'cpu_loop.c')
+      if config_target.has_key('TARGET_SYSTBL_ABI')
+        arch_srcs += \
+          syscall_nr_generators[abi].process(base_dir / abi / config_target['TARGET_SYSTBL'],
+                                             extra_args : config_target['TARGET_SYSTBL_ABI'])
+      endif
+    endif
+  endif
+
+  if 'TARGET_XML_FILES' in config_target
+    gdbstub_xml = custom_target(target + '-gdbstub-xml.c',
+                                output: target + '-gdbstub-xml.c',
+                                input: files(config_target['TARGET_XML_FILES'].split()),
+                                command: [feature_to_c, '@INPUT@'],
+                                capture: true)
+    arch_srcs += gdbstub_xml
+  endif
+
+  t = target_arch[target_base_arch].apply(config_target, strict: false)
+  arch_srcs += t.sources()
+  arch_deps += t.dependencies()
+
+  target_common = common_ss.apply(config_target, strict: false)
+  objects = common_all.extract_objects(target_common.sources())
+  deps = target_common.dependencies()
+
+  target_specific = specific_ss.apply(config_target, strict: false)
+  arch_srcs += target_specific.sources()
+  arch_deps += target_specific.dependencies()
+
+  lib = static_library('qemu-' + target,
+                 sources: arch_srcs + genh,
+                 dependencies: arch_deps,
+                 objects: objects,
+                 include_directories: target_inc,
+                 c_args: c_args,
+                 build_by_default: false,
+                 name_suffix: 'fa')
+
+  if target.endswith('-softmmu')
+    execs = [{
+      'name': 'qemu-system-' + target_name,
+      'win_subsystem': 'console',
+      'sources': files('system/main.c'),
+      'dependencies': []
+    }]
+    if targetos == 'windows' and (sdl.found() or gtk.found())
+      execs += [{
+        'name': 'qemu-system-' + target_name + 'w',
+        'win_subsystem': 'windows',
+        'sources': files('system/main.c'),
+        'dependencies': []
+      }]
+    endif
+    if get_option('fuzzing')
+      specific_fuzz = specific_fuzz_ss.apply(config_target, strict: false)
+      execs += [{
+        'name': 'qemu-fuzz-' + target_name,
+        'win_subsystem': 'console',
+        'sources': specific_fuzz.sources(),
+        'dependencies': specific_fuzz.dependencies(),
+      }]
+    endif
+  else
+    execs = [{
+      'name': 'qemu-' + target_name,
+      'win_subsystem': 'console',
+      'sources': [],
+      'dependencies': []
+    }]
+  endif
+  foreach exe: execs
+    exe_name = exe['name']
+    if targetos == 'darwin'
+      exe_name += '-unsigned'
+    endif
+
+    emulator = executable(exe_name, exe['sources'],
+               install: true,
+               c_args: c_args,
+               dependencies: arch_deps + deps + exe['dependencies'],
+               objects: lib.extract_all_objects(recursive: true),
+               link_depends: [block_syms, qemu_syms] + exe.get('link_depends', []),
+               link_args: link_args,
+               win_subsystem: exe['win_subsystem'])
+
+    if targetos == 'darwin'
+      icon = 'pc-bios/qemu.rsrc'
+      build_input = [emulator, files(icon)]
+      install_input = [
+        get_option('bindir') / exe_name,
+        meson.current_source_dir() / icon
+      ]
+      if 'CONFIG_HVF' in config_target
+        entitlements = 'accel/hvf/entitlements.plist'
+        build_input += files(entitlements)
+        install_input += meson.current_source_dir() / entitlements
+      endif
+
+      emulators += {exe['name'] : custom_target(exe['name'],
+                   input: build_input,
+                   output: exe['name'],
+                   command: [entitlement, '@OUTPUT@', '@INPUT@'])
+      }
+
+      meson.add_install_script(entitlement, '--install',
+                               get_option('bindir') / exe['name'],
+                               install_input)
+    else
+      emulators += {exe['name']: emulator}
+    endif
+
+    if stap.found()
+      foreach stp: [
+        {'ext': '.stp-build', 'fmt': 'stap', 'bin': meson.current_build_dir() / exe['name'], 'install': false},
+        {'ext': '.stp', 'fmt': 'stap', 'bin': get_option('prefix') / get_option('bindir') / exe['name'], 'install': true},
+        {'ext': '-simpletrace.stp', 'fmt': 'simpletrace-stap', 'bin': '', 'install': true},
+        {'ext': '-log.stp', 'fmt': 'log-stap', 'bin': '', 'install': true},
+      ]
+        custom_target(exe['name'] + stp['ext'],
+                      input: trace_events_all,
+                      output: exe['name'] + stp['ext'],
+                      install: stp['install'],
+                      install_dir: get_option('datadir') / 'systemtap/tapset',
+                      command: [
+                        tracetool, '--group=all', '--format=' + stp['fmt'],
+                        '--binary=' + stp['bin'],
+                        '--target-name=' + target_name,
+                        '--target-type=' + target_type,
+                        '--probe-prefix=qemu.' + target_type + '.' + target_name,
+                        '@INPUT@', '@OUTPUT@'
+                      ],
+                      depend_files: tracetool_depends)
+      endforeach
+    endif
+  endforeach
+endforeach
 
 # Other build targets
 
+if get_option('plugins')
+  install_headers('include/qemu/qemu-plugin.h')
+  if targetos == 'windows'
+    # On windows, we want to deliver the qemu_plugin_api.lib file in the qemu installer,
+    # so that plugin authors can compile against it.
+    install_data(win32_qemu_plugin_api_lib, install_dir: 'lib')
+  endif
+endif
+
+#subdir('qga')
+
+# Don't build qemu-keymap if xkbcommon is not explicitly enabled
+# when we don't build tools or system
+if xkbcommon.found()
+  # used for the update-keymaps target, so include rules even if !have_tools
+  qemu_keymap = executable('qemu-keymap', files('qemu-keymap.c', 'ui/input-keymap.c') + genh,
+                           dependencies: [qemuutil, xkbcommon], install: have_tools)
+endif
+
 if have_tools
   qemu_img = executable('qemu-img', [files('qemu-img.c'), hxdep],
              dependencies: [authz, block, crypto, io, qom, qemuutil], install: true)
   qemu_io = executable('qemu-io', files('qemu-io.c'),
              dependencies: [block, qemuutil], install: true)
   qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
-               dependencies: [blockdev, qemuutil], install: true)
+               dependencies: [blockdev, qemuutil, gnutls, selinux],
+               install: true)
+
+#  subdir('storage-daemon')
+#  subdir('contrib/rdmacm-mux')
+#  subdir('contrib/elf2dmp')
+
+#  executable('qemu-edid', files('qemu-edid.c', 'hw/display/edid-generate.c'),
+#             dependencies: qemuutil,
+#             install: true)
+
+#  if have_vhost_user
+#    subdir('contrib/vhost-user-blk')
+#    subdir('contrib/vhost-user-gpu')
+#    subdir('contrib/vhost-user-input')
+#    subdir('contrib/vhost-user-scsi')
+#  endif
+
+#  if targetos == 'linux'
+#    executable('qemu-bridge-helper', files('qemu-bridge-helper.c'),
+#               dependencies: [qemuutil, libcap_ng],
+#               install: true,
+#               install_dir: get_option('libexecdir'))
+#
+#    executable('qemu-pr-helper', files('scsi/qemu-pr-helper.c', 'scsi/utils.c'),
+#               dependencies: [authz, crypto, io, qom, qemuutil,
+#                              libcap_ng, mpathpersist],
+#               install: true)
+#  endif
+
+#  if have_ivshmem
+#    subdir('contrib/ivshmem-client')
+#    subdir('contrib/ivshmem-server')
+#  endif
 endif
 
-subdir('scripts')
+#subdir('scripts')
+#subdir('tools')
+#subdir('pc-bios')
+#subdir('docs')
+#subdir('tests')
+#if gtk.found()
+#  subdir('po')
+#endif
+
+if host_machine.system() == 'windows'
+  nsis_cmd = [
+    find_program('scripts/nsis.py'),
+    '@OUTPUT@',
+    get_option('prefix'),
+    meson.current_source_dir(),
+    glib_pc.get_variable('bindir'),
+    host_machine.cpu(),
+    '--',
+    '-DDISPLAYVERSION=' + meson.project_version(),
+  ]
+  if build_docs
+    nsis_cmd += '-DCONFIG_DOCUMENTATION=y'
+  endif
+  if gtk.found()
+    nsis_cmd += '-DCONFIG_GTK=y'
+  endif
+
+  nsis = custom_target('nsis',
+                       output: 'qemu-setup-' + meson.project_version() + '.exe',
+                       input: files('qemu.nsi'),
+                       build_always_stale: true,
+                       command: nsis_cmd + ['@INPUT@'])
+  alias_target('installer', nsis)
+endif
 
 #########################
 # Configuration summary #
 #########################
 
+# Build environment
 summary_info = {}
+summary_info += {'Build directory':   meson.current_build_dir()}
+summary_info += {'Source path':       meson.current_source_dir()}
+summary_info += {'Download dependencies': get_option('wrap_mode') != 'nodownload'}
+summary(summary_info, bool_yn: true, section: 'Build environment')
+
+# Directories
 summary_info += {'Install prefix':    get_option('prefix')}
 summary_info += {'BIOS directory':    qemu_datadir}
-summary_info += {'firmware path':     get_option('qemu_firmwarepath')}
-summary_info += {'binary directory':  get_option('bindir')}
-summary_info += {'library directory': get_option('libdir')}
+pathsep = targetos == 'windows' ? ';' : ':'
+summary_info += {'firmware path':     pathsep.join(get_option('qemu_firmwarepath'))}
+summary_info += {'binary directory':  get_option('prefix') / get_option('bindir')}
+summary_info += {'library directory': get_option('prefix') / get_option('libdir')}
 summary_info += {'module directory':  qemu_moddir}
-summary_info += {'libexec directory': get_option('libexecdir')}
-summary_info += {'include directory': get_option('includedir')}
-summary_info += {'config directory':  get_option('sysconfdir')}
-summary_info += {'local state directory': get_option('localstatedir')}
-summary_info += {'Manual directory':  get_option('mandir')}
-summary_info += {'Doc directory':     get_option('docdir')}
-summary_info += {'Build directory':   meson.current_build_dir()}
-summary_info += {'Source path':       meson.current_source_dir()}
-summary_info += {'GIT binary':        config_host['GIT']}
-summary_info += {'GIT submodules':    config_host['GIT_SUBMODULES']}
-summary_info += {'C compiler':        meson.get_compiler('c').cmd_array()[0]}
-summary_info += {'Host C compiler':   meson.get_compiler('c', native: true).cmd_array()[0]}
-if link_language == 'cpp'
-  summary_info += {'C++ compiler':      meson.get_compiler('cpp').cmd_array()[0]}
+summary_info += {'libexec directory': get_option('prefix') / get_option('libexecdir')}
+summary_info += {'include directory': get_option('prefix') / get_option('includedir')}
+summary_info += {'config directory':  get_option('prefix') / get_option('sysconfdir')}
+if targetos != 'windows'
+  summary_info += {'local state directory': get_option('prefix') / get_option('localstatedir')}
+  summary_info += {'Manual directory':      get_option('prefix') / get_option('mandir')}
 else
-  summary_info += {'C++ compiler':      false}
+  summary_info += {'local state directory': 'queried at runtime'}
 endif
+summary_info += {'Doc directory':     get_option('prefix') / get_option('docdir')}
+summary(summary_info, bool_yn: true, section: 'Directories')
 
-summary_info += {'ARFLAGS':           config_host['ARFLAGS']}
-summary_info += {'CFLAGS':            ' '.join(get_option('c_args')
-                                               + ['-O' + get_option('optimization')]
-                                               + (get_option('debug') ? ['-g'] : []))}
-if link_language == 'cpp'
-  summary_info += {'CXXFLAGS':        ' '.join(get_option('cpp_args')
-                                               + ['-O' + get_option('optimization')]
-                                               + (get_option('debug') ? ['-g'] : []))}
-endif
-link_args = get_option(link_language + '_link_args')
-if link_args.length() > 0
-  summary_info += {'LDFLAGS':         ' '.join(link_args)}
-endif
-summary_info += {'QEMU_CFLAGS':       config_host['QEMU_CFLAGS']}
-summary_info += {'QEMU_LDFLAGS':      config_host['QEMU_LDFLAGS']}
-summary_info += {'make':              config_host['MAKE']}
+# Host binaries
+summary_info = {}
 summary_info += {'python':            '@0@ (version: @1@)'.format(python.full_path(), python.language_version())}
 summary_info += {'sphinx-build':      false}
+
+# FIXME: the [binaries] section of machine files, which can be probed
+# with find_program(), would be great for passing gdb and genisoimage
+# paths from configure to Meson.  However, there seems to be no way to
+# hide a program (for example if gdb is too old).
+if config_host.has_key('GDB')
+  summary_info += {'gdb':             config_host['GDB']}
+endif
+summary_info += {'iasl':              iasl}
 summary_info += {'genisoimage':       config_host['GENISOIMAGE']}
-# TODO: add back version
-summary_info += {'slirp support':     false}
-summary_info += {'module support':    config_host.has_key('CONFIG_MODULES')}
-if config_host.has_key('CONFIG_MODULES')
-  summary_info += {'alternative module path': config_host.has_key('CONFIG_MODULE_UPGRADES')}
+if targetos == 'windows' and have_ga
+  summary_info += {'wixl':            wixl}
+endif
+if slirp.found() and have_system
+  summary_info += {'smbd':            have_slirp_smbd ? smbd_path : false}
+endif
+summary(summary_info, bool_yn: true, section: 'Host binaries')
+
+# Configurable features
+summary_info = {}
+summary_info += {'Documentation':     false}
+summary_info += {'system-mode emulation': have_system}
+summary_info += {'user-mode emulation': have_user}
+summary_info += {'block layer':       have_block}
+summary_info += {'Install blobs':     get_option('install_blobs')}
+summary_info += {'module support':    enable_modules}
+if enable_modules
+  summary_info += {'alternative module path': get_option('module_upgrades')}
+endif
+summary_info += {'fuzzing support':   get_option('fuzzing')}
+if have_system
+  summary_info += {'Audio drivers':     ' '.join(audio_drivers_selected)}
+endif
+summary_info += {'Trace backends':    ','.join(get_option('trace_backends'))}
+if 'simple' in get_option('trace_backends')
+  summary_info += {'Trace output file': get_option('trace_file') + '-<pid>'}
 endif
+summary_info += {'D-Bus display':     dbus_display}
+summary_info += {'QOM debugging':     get_option('qom_cast_debug')}
+summary_info += {'Relocatable install': get_option('relocatable')}
+summary_info += {'vhost-kernel support': have_vhost_kernel}
+summary_info += {'vhost-net support': have_vhost_net}
+summary_info += {'vhost-user support': have_vhost_user}
+summary_info += {'vhost-user-crypto support': have_vhost_user_crypto}
+summary_info += {'vhost-user-blk server support': have_vhost_user_blk_server}
+summary_info += {'vhost-vdpa support': have_vhost_vdpa}
+summary_info += {'build guest agent': have_ga}
+summary(summary_info, bool_yn: true, section: 'Configurable features')
+
+# Compilation information
+summary_info = {}
 summary_info += {'host CPU':          cpu}
 summary_info += {'host endianness':   build_machine.endian()}
-summary_info += {'target list':       ' '}
-summary_info += {'gprof enabled':     config_host.has_key('CONFIG_GPROF')}
-summary_info += {'sparse enabled':    sparse.found()}
+summary_info += {'C compiler':        ' '.join(meson.get_compiler('c').cmd_array())}
+summary_info += {'Host C compiler':   ' '.join(meson.get_compiler('c', native: true).cmd_array())}
+if 'cpp' in all_languages
+  summary_info += {'C++ compiler':    ' '.join(meson.get_compiler('cpp').cmd_array())}
+else
+  summary_info += {'C++ compiler':      false}
+endif
+if 'objc' in all_languages
+  summary_info += {'Objective-C compiler': ' '.join(meson.get_compiler('objc').cmd_array())}
+else
+  summary_info += {'Objective-C compiler': false}
+endif
+option_cflags = (get_option('debug') ? ['-g'] : [])
+if get_option('optimization') != 'plain'
+  option_cflags += ['-O' + get_option('optimization')]
+endif
+summary_info += {'CFLAGS':            ' '.join(get_option('c_args') + option_cflags)}
+if 'cpp' in all_languages
+  summary_info += {'CXXFLAGS':        ' '.join(get_option('cpp_args') + option_cflags)}
+endif
+if 'objc' in all_languages
+  summary_info += {'OBJCFLAGS':       ' '.join(get_option('objc_args') + option_cflags)}
+endif
+link_args = get_option('c_link_args')
+if link_args.length() > 0
+  summary_info += {'LDFLAGS':         ' '.join(link_args)}
+endif
+summary_info += {'QEMU_CFLAGS':       ' '.join(qemu_common_flags + qemu_cflags)}
+if 'cpp' in all_languages
+  summary_info += {'QEMU_CXXFLAGS':     ' '.join(qemu_common_flags + qemu_cxxflags)}
+endif
+if 'objc' in all_languages
+  summary_info += {'QEMU_OBJCFLAGS':    ' '.join(qemu_common_flags)}
+endif
+summary_info += {'QEMU_LDFLAGS':      ' '.join(qemu_ldflags)}
+summary_info += {'link-time optimization (LTO)': get_option('b_lto')}
+summary_info += {'PIE':               get_option('b_pie')}
+summary_info += {'static build':      get_option('prefer_static')}
+summary_info += {'malloc trim support': has_malloc_trim}
+summary_info += {'membarrier':        have_membarrier}
+summary_info += {'debug graph lock':  get_option('debug_graph_lock')}
+summary_info += {'debug stack usage': get_option('debug_stack_usage')}
+summary_info += {'mutex debugging':   get_option('debug_mutex')}
+summary_info += {'memory allocator':  get_option('malloc')}
+summary_info += {'avx2 optimization': config_host_data.get('CONFIG_AVX2_OPT')}
+summary_info += {'avx512bw optimization': config_host_data.get('CONFIG_AVX512BW_OPT')}
+summary_info += {'avx512f optimization': config_host_data.get('CONFIG_AVX512F_OPT')}
+summary_info += {'gcov':              get_option('b_coverage')}
+summary_info += {'thread sanitizer':  get_option('tsan')}
+summary_info += {'CFI support':       get_option('cfi')}
+if get_option('cfi')
+  summary_info += {'CFI debug support': get_option('cfi_debug')}
+endif
 summary_info += {'strip binaries':    get_option('strip')}
-summary_info += {'profiler':          config_host.has_key('CONFIG_PROFILER')}
-summary_info += {'static build':      config_host.has_key('CONFIG_STATIC')}
-if targetos == 'darwin'
-  summary_info += {'Cocoa support': config_host.has_key('CONFIG_COCOA')}
-endif
-# TODO: add back version
-summary_info += {'SDL support':       false}
-summary_info += {'SDL image support': false}
-# TODO: add back version
-summary_info += {'GTK support':       config_host.has_key('CONFIG_GTK')}
-summary_info += {'GTK GL support':    config_host.has_key('CONFIG_GTK_GL')}
-summary_info += {'pixman':            false}
-# TODO: add back version
-summary_info += {'VTE support':       config_host.has_key('CONFIG_VTE')}
-summary_info += {'TLS priority':      config_host['CONFIG_TLS_PRIORITY']}
-summary_info += {'GNUTLS support':    config_host.has_key('CONFIG_GNUTLS')}
-# TODO: add back version
-summary_info += {'libgcrypt':         config_host.has_key('CONFIG_GCRYPT')}
-if config_host.has_key('CONFIG_GCRYPT')
-   summary_info += {'  hmac':            config_host.has_key('CONFIG_GCRYPT_HMAC')}
-   summary_info += {'  XTS':             not config_host.has_key('CONFIG_QEMU_PRIVATE_XTS')}
-endif
-# TODO: add back version
-summary_info += {'nettle':            config_host.has_key('CONFIG_NETTLE')}
-if config_host.has_key('CONFIG_NETTLE')
-   summary_info += {'  XTS':             not config_host.has_key('CONFIG_QEMU_PRIVATE_XTS')}
-endif
-summary_info += {'libtasn1':          config_host.has_key('CONFIG_TASN1')}
-summary_info += {'PAM':               config_host.has_key('CONFIG_AUTH_PAM')}
-summary_info += {'iconv support':     false}
-summary_info += {'curses support':    false}
-# TODO: add back version
-summary_info += {'virgl support':     config_host.has_key('CONFIG_VIRGL')}
-summary_info += {'curl support':      config_host.has_key('CONFIG_CURL')}
+summary_info += {'sparse':            sparse}
 summary_info += {'mingw32 support':   targetos == 'windows'}
-summary_info += {'Audio drivers':     config_host['CONFIG_AUDIO_DRIVERS']}
-summary_info += {'Block whitelist (rw)': config_host['CONFIG_BDRV_RW_WHITELIST']}
-summary_info += {'Block whitelist (ro)': config_host['CONFIG_BDRV_RO_WHITELIST']}
-summary_info += {'VirtFS support':    config_host.has_key('CONFIG_VIRTFS')}
-summary_info += {'build virtiofs daemon': false}
-summary_info += {'Multipath support': false}
-summary_info += {'VNC support':       false}
-summary_info += {'xen support':       config_host.has_key('CONFIG_XEN_BACKEND')}
-if config_host.has_key('CONFIG_XEN_BACKEND')
-  summary_info += {'xen ctrl version':  config_host['CONFIG_XEN_CTRL_INTERFACE_VERSION']}
-endif
-summary_info += {'brlapi support':    config_host.has_key('CONFIG_BRLAPI')}
-summary_info += {'Documentation':     false}
-summary_info += {'PIE':               get_option('b_pie')}
-summary_info += {'vde support':       config_host.has_key('CONFIG_VDE')}
-summary_info += {'netmap support':    config_host.has_key('CONFIG_NETMAP')}
-summary_info += {'Linux AIO support': config_host.has_key('CONFIG_LINUX_AIO')}
-summary_info += {'Linux io_uring support': config_host.has_key('CONFIG_LINUX_IO_URING')}
-summary_info += {'ATTR/XATTR support': config_host.has_key('CONFIG_ATTR')}
-summary_info += {'Install blobs':     get_option('install_blobs')}
-summary_info += {'KVM support':       config_all.has_key('CONFIG_KVM')}
-summary_info += {'HAX support':       config_all.has_key('CONFIG_HAX')}
-summary_info += {'HVF support':       config_all.has_key('CONFIG_HVF')}
-summary_info += {'WHPX support':      config_all.has_key('CONFIG_WHPX')}
+summary(summary_info, bool_yn: true, section: 'Compilation')
+
+# snarf the cross-compilation information for tests
+summary_info = {}
+have_cross = false
+foreach target: target_dirs
+  tcg_mak = meson.current_build_dir() / 'tests/tcg' / target / 'config-target.mak'
+  if fs.exists(tcg_mak)
+    config_cross_tcg = keyval.load(tcg_mak)
+    if 'CC' in config_cross_tcg
+      summary_info += {config_cross_tcg['TARGET_NAME']: config_cross_tcg['CC']}
+      have_cross = true
+    endif
+  endif
+endforeach
+if have_cross
+  summary(summary_info, bool_yn: true, section: 'Cross compilers')
+endif
+
+# Targets and accelerators
+summary_info = {}
+if have_system
+  summary_info += {'KVM support':       config_all.has_key('CONFIG_KVM')}
+  summary_info += {'HVF support':       config_all.has_key('CONFIG_HVF')}
+  summary_info += {'WHPX support':      config_all.has_key('CONFIG_WHPX')}
+  summary_info += {'NVMM support':      config_all.has_key('CONFIG_NVMM')}
+  summary_info += {'Xen support':       xen.found()}
+  if xen.found()
+    summary_info += {'xen ctrl version':  xen.version()}
+  endif
+  summary_info += {'Xen emulation':     config_all.has_key('CONFIG_XEN_EMU')}
+endif
 summary_info += {'TCG support':       config_all.has_key('CONFIG_TCG')}
 if config_all.has_key('CONFIG_TCG')
-  summary_info += {'TCG debug enabled': config_host.has_key('CONFIG_DEBUG_TCG')}
-  summary_info += {'TCG interpreter':   config_host.has_key('CONFIG_TCG_INTERPRETER')}
-endif
-summary_info += {'malloc trim support': false}
-summary_info += {'RDMA support':      config_host.has_key('CONFIG_RDMA')}
-summary_info += {'PVRDMA support':    config_host.has_key('CONFIG_PVRDMA')}
-summary_info += {'fdt support':       false}
-summary_info += {'membarrier':        config_host.has_key('CONFIG_MEMBARRIER')}
-summary_info += {'preadv support':    config_host.has_key('CONFIG_PREADV')}
-summary_info += {'fdatasync':         config_host.has_key('CONFIG_FDATASYNC')}
-summary_info += {'madvise':           config_host.has_key('CONFIG_MADVISE')}
-summary_info += {'posix_madvise':     config_host.has_key('CONFIG_POSIX_MADVISE')}
-summary_info += {'posix_memalign':    config_host.has_key('CONFIG_POSIX_MEMALIGN')}
-summary_info += {'libcap-ng support': config_host.has_key('CONFIG_LIBCAP_NG')}
-summary_info += {'vhost-kernel support': config_host.has_key('CONFIG_VHOST_KERNEL')}
-summary_info += {'vhost-net support': config_host.has_key('CONFIG_VHOST_NET')}
-summary_info += {'vhost-crypto support': config_host.has_key('CONFIG_VHOST_CRYPTO')}
-summary_info += {'vhost-scsi support': config_host.has_key('CONFIG_VHOST_SCSI')}
-summary_info += {'vhost-vsock support': config_host.has_key('CONFIG_VHOST_VSOCK')}
-summary_info += {'vhost-user support': config_host.has_key('CONFIG_VHOST_USER')}
-summary_info += {'vhost-user-blk server support': have_vhost_user_blk_server}
-summary_info += {'vhost-user-fs support': config_host.has_key('CONFIG_VHOST_USER_FS')}
-summary_info += {'vhost-vdpa support': config_host.has_key('CONFIG_VHOST_VDPA')}
-summary_info += {'Trace backends':    config_host['TRACE_BACKENDS']}
-if config_host['TRACE_BACKENDS'].split().contains('simple')
-  summary_info += {'Trace output file': config_host['CONFIG_TRACE_FILE'] + '-<pid>'}
-endif
-# TODO: add back protocol and server version
-summary_info += {'spice support':     config_host.has_key('CONFIG_SPICE')}
-summary_info += {'rbd support':       config_host.has_key('CONFIG_RBD')}
-summary_info += {'xfsctl support':    config_host.has_key('CONFIG_XFS')}
-summary_info += {'smartcard support': config_host.has_key('CONFIG_SMARTCARD')}
-summary_info += {'U2F support':       false}
-summary_info += {'libusb':            config_host.has_key('CONFIG_USB_LIBUSB')}
-summary_info += {'usb net redir':     config_host.has_key('CONFIG_USB_REDIR')}
-summary_info += {'OpenGL support':    config_host.has_key('CONFIG_OPENGL')}
-summary_info += {'OpenGL dmabufs':    config_host.has_key('CONFIG_OPENGL_DMABUF')}
-summary_info += {'libiscsi support':  config_host.has_key('CONFIG_LIBISCSI')}
-summary_info += {'libnfs support':    config_host.has_key('CONFIG_LIBNFS')}
-summary_info += {'build guest agent': config_host.has_key('CONFIG_GUEST_AGENT')}
-summary_info += {'seccomp support':   config_host.has_key('CONFIG_SECCOMP')}
-summary_info += {'coroutine backend': config_host['CONFIG_COROUTINE_BACKEND']}
-summary_info += {'coroutine pool':    config_host['CONFIG_COROUTINE_POOL'] == '1'}
-summary_info += {'debug stack usage': config_host.has_key('CONFIG_DEBUG_STACK_USAGE')}
-summary_info += {'mutex debugging':   config_host.has_key('CONFIG_DEBUG_MUTEX')}
-summary_info += {'crypto afalg':      config_host.has_key('CONFIG_AF_ALG')}
-summary_info += {'GlusterFS support': config_host.has_key('CONFIG_GLUSTERFS')}
-summary_info += {'gcov':              get_option('b_coverage')}
-summary_info += {'TPM support':       config_host.has_key('CONFIG_TPM')}
-summary_info += {'libssh support':    config_host.has_key('CONFIG_LIBSSH')}
-summary_info += {'QOM debugging':     config_host.has_key('CONFIG_QOM_CAST_DEBUG')}
-summary_info += {'Live block migration': config_host.has_key('CONFIG_LIVE_BLOCK_MIGRATION')}
-summary_info += {'lzo support':       config_host.has_key('CONFIG_LZO')}
-summary_info += {'snappy support':    config_host.has_key('CONFIG_SNAPPY')}
-summary_info += {'bzip2 support':     config_host.has_key('CONFIG_BZIP2')}
-summary_info += {'lzfse support':     config_host.has_key('CONFIG_LZFSE')}
-summary_info += {'zstd support':      config_host.has_key('CONFIG_ZSTD')}
-summary_info += {'NUMA host support': config_host.has_key('CONFIG_NUMA')}
-summary_info += {'libxml2':           config_host.has_key('CONFIG_LIBXML2')}
-summary_info += {'memory allocator':  get_option('malloc')}
-summary_info += {'avx2 optimization': config_host.has_key('CONFIG_AVX2_OPT')}
-summary_info += {'avx512f optimization': config_host.has_key('CONFIG_AVX512F_OPT')}
-summary_info += {'replication support': config_host.has_key('CONFIG_REPLICATION')}
-summary_info += {'bochs support':     config_host.has_key('CONFIG_BOCHS')}
-summary_info += {'cloop support':     config_host.has_key('CONFIG_CLOOP')}
-summary_info += {'dmg support':       config_host.has_key('CONFIG_DMG')}
-summary_info += {'qcow v1 support':   config_host.has_key('CONFIG_QCOW1')}
-summary_info += {'vdi support':       config_host.has_key('CONFIG_VDI')}
-summary_info += {'vvfat support':     config_host.has_key('CONFIG_VVFAT')}
-summary_info += {'qed support':       config_host.has_key('CONFIG_QED')}
-summary_info += {'parallels support': config_host.has_key('CONFIG_PARALLELS')}
-summary_info += {'sheepdog support':  config_host.has_key('CONFIG_SHEEPDOG')}
-summary_info += {'capstone':          false}
-summary_info += {'libpmem support':   config_host.has_key('CONFIG_LIBPMEM')}
-summary_info += {'libdaxctl support': config_host.has_key('CONFIG_LIBDAXCTL')}
-summary_info += {'libudev':           false}
-summary_info += {'default devices':   config_host['CONFIG_MINIKCONF_MODE'] == '--defconfig'}
-summary_info += {'plugin support':    config_host.has_key('CONFIG_PLUGIN')}
-summary_info += {'fuzzing support':   config_host.has_key('CONFIG_FUZZ')}
-if config_host.has_key('HAVE_GDB_BIN')
-  summary_info += {'gdb':             config_host['HAVE_GDB_BIN']}
-endif
-summary_info += {'thread sanitizer':  config_host.has_key('CONFIG_TSAN')}
-summary_info += {'rng-none':          config_host.has_key('CONFIG_RNG_NONE')}
-summary_info += {'Linux keyring':     config_host.has_key('CONFIG_SECRET_KEYRING')}
-summary(summary_info, bool_yn: true)
-
-if not supported_cpus.contains(cpu)
+  if get_option('tcg_interpreter')
+    summary_info += {'TCG backend':   'TCI (TCG with bytecode interpreter, slow)'}
+  else
+    summary_info += {'TCG backend':   'native (@0@)'.format(cpu)}
+  endif
+  summary_info += {'TCG plugins':       get_option('plugins')}
+  summary_info += {'TCG debug enabled': get_option('debug_tcg')}
+endif
+summary_info += {'target list':       ' '.join(target_dirs)}
+if have_system
+  summary_info += {'default devices':   get_option('default_devices')}
+  summary_info += {'out of process emulation': multiprocess_allowed}
+  summary_info += {'vfio-user server': vfio_user_server_allowed}
+endif
+summary(summary_info, bool_yn: true, section: 'Targets and accelerators')
+
+# Block layer
+summary_info = {}
+summary_info += {'coroutine backend': coroutine_backend}
+summary_info += {'coroutine pool':    have_coroutine_pool}
+if have_block
+  summary_info += {'Block whitelist (rw)': get_option('block_drv_rw_whitelist')}
+  summary_info += {'Block whitelist (ro)': get_option('block_drv_ro_whitelist')}
+  summary_info += {'Use block whitelist in tools': get_option('block_drv_whitelist_in_tools')}
+  summary_info += {'VirtFS (9P) support':    have_virtfs}
+  summary_info += {'VirtFS (9P) Proxy Helper support (deprecated)': have_virtfs_proxy_helper}
+  summary_info += {'Live block migration': config_host_data.get('CONFIG_LIVE_BLOCK_MIGRATION')}
+  summary_info += {'replication support': config_host_data.get('CONFIG_REPLICATION')}
+  summary_info += {'bochs support':     get_option('bochs').allowed()}
+  summary_info += {'cloop support':     get_option('cloop').allowed()}
+  summary_info += {'dmg support':       get_option('dmg').allowed()}
+  summary_info += {'qcow v1 support':   get_option('qcow1').allowed()}
+  summary_info += {'vdi support':       get_option('vdi').allowed()}
+  summary_info += {'vhdx support':      get_option('vhdx').allowed()}
+  summary_info += {'vmdk support':      get_option('vmdk').allowed()}
+  summary_info += {'vpc support':       get_option('vpc').allowed()}
+  summary_info += {'vvfat support':     get_option('vvfat').allowed()}
+  summary_info += {'qed support':       get_option('qed').allowed()}
+  summary_info += {'parallels support': get_option('parallels').allowed()}
+  summary_info += {'FUSE exports':      fuse}
+  summary_info += {'VDUSE block exports': have_vduse_blk_export}
+endif
+summary(summary_info, bool_yn: true, section: 'Block layer support')
+
+# Crypto
+summary_info = {}
+summary_info += {'TLS priority':      get_option('tls_priority')}
+summary_info += {'GNUTLS support':    gnutls}
+if gnutls.found()
+  summary_info += {'  GNUTLS crypto':   gnutls_crypto.found()}
+endif
+summary_info += {'libgcrypt':         gcrypt}
+summary_info += {'nettle':            nettle}
+if nettle.found()
+   summary_info += {'  XTS':             xts != 'private'}
+endif
+summary_info += {'AF_ALG support':    have_afalg}
+summary_info += {'rng-none':          get_option('rng_none')}
+summary_info += {'Linux keyring':     have_keyring}
+summary_info += {'Linux keyutils':    keyutils}
+summary(summary_info, bool_yn: true, section: 'Crypto')
+
+# UI
+summary_info = {}
+if targetos == 'darwin'
+  summary_info += {'Cocoa support':           cocoa}
+endif
+summary_info += {'SDL support':       sdl}
+summary_info += {'SDL image support': sdl_image}
+summary_info += {'GTK support':       gtk}
+summary_info += {'pixman':            pixman}
+summary_info += {'VTE support':       vte}
+summary_info += {'PNG support':       png}
+summary_info += {'VNC support':       vnc}
+if vnc.found()
+  summary_info += {'VNC SASL support':  sasl}
+  summary_info += {'VNC JPEG support':  jpeg}
+endif
+summary_info += {'spice protocol support': spice_protocol}
+if spice_protocol.found()
+  summary_info += {'  spice server support': spice}
+endif
+summary_info += {'curses support':    curses}
+summary_info += {'brlapi support':    brlapi}
+summary(summary_info, bool_yn: true, section: 'User interface')
+
+# Audio backends
+summary_info = {}
+if targetos not in ['darwin', 'haiku', 'windows']
+  summary_info += {'OSS support':     oss}
+  summary_info += {'sndio support':   sndio}
+elif targetos == 'darwin'
+  summary_info += {'CoreAudio support': coreaudio}
+elif targetos == 'windows'
+  summary_info += {'DirectSound support': dsound}
+endif
+if targetos == 'linux'
+  summary_info += {'ALSA support':    alsa}
+  summary_info += {'PulseAudio support': pulse}
+endif
+summary_info += {'PipeWire support':  pipewire}
+summary_info += {'JACK support':      jack}
+summary(summary_info, bool_yn: true, section: 'Audio backends')
+
+# Network backends
+summary_info = {}
+if targetos == 'darwin'
+  summary_info += {'vmnet.framework support': vmnet}
+endif
+summary_info += {'AF_XDP support':    libxdp}
+summary_info += {'slirp support':     slirp}
+summary_info += {'vde support':       vde}
+summary_info += {'netmap support':    have_netmap}
+summary_info += {'l2tpv3 support':    have_l2tpv3}
+summary(summary_info, bool_yn: true, section: 'Network backends')
+
+# Libraries
+summary_info = {}
+summary_info += {'libtasn1':          tasn1}
+summary_info += {'PAM':               pam}
+summary_info += {'iconv support':     iconv}
+summary_info += {'virgl support':     virgl}
+summary_info += {'rutabaga support':  rutabaga}
+summary_info += {'blkio support':     blkio}
+summary_info += {'curl support':      curl}
+summary_info += {'Multipath support': mpathpersist}
+summary_info += {'Linux AIO support': libaio}
+summary_info += {'Linux io_uring support': linux_io_uring}
+summary_info += {'ATTR/XATTR support': libattr}
+summary_info += {'RDMA support':      rdma}
+summary_info += {'PVRDMA support':    have_pvrdma}
+summary_info += {'fdt support':       fdt_opt == 'disabled' ? false : fdt_opt}
+summary_info += {'libcap-ng support': libcap_ng}
+summary_info += {'bpf support':       libbpf}
+summary_info += {'rbd support':       rbd}
+summary_info += {'smartcard support': cacard}
+summary_info += {'U2F support':       u2f}
+summary_info += {'libusb':            libusb}
+summary_info += {'usb net redir':     usbredir}
+summary_info += {'OpenGL support (epoxy)': opengl}
+summary_info += {'GBM':               gbm}
+summary_info += {'libiscsi support':  libiscsi}
+summary_info += {'libnfs support':    libnfs}
+if targetos == 'windows'
+  if have_ga
+    summary_info += {'QGA VSS support':   have_qga_vss}
+  endif
+endif
+summary_info += {'seccomp support':   seccomp}
+summary_info += {'GlusterFS support': glusterfs}
+summary_info += {'hv-balloon support': hv_balloon}
+summary_info += {'TPM support':       have_tpm}
+summary_info += {'libssh support':    libssh}
+summary_info += {'lzo support':       lzo}
+summary_info += {'snappy support':    snappy}
+summary_info += {'bzip2 support':     libbzip2}
+summary_info += {'lzfse support':     liblzfse}
+summary_info += {'zstd support':      zstd}
+summary_info += {'NUMA host support': numa}
+summary_info += {'capstone':          capstone}
+summary_info += {'libpmem support':   libpmem}
+summary_info += {'libdaxctl support': libdaxctl}
+summary_info += {'libudev':           libudev}
+# Dummy dependency, keep .found()
+summary_info += {'FUSE lseek':        fuse_lseek.found()}
+summary_info += {'selinux':           selinux}
+summary_info += {'libdw':             libdw}
+summary(summary_info, bool_yn: true, section: 'Dependencies')
+
+if host_arch == 'unknown'
   message()
-  warning('SUPPORT FOR THIS HOST CPU WILL GO AWAY IN FUTURE RELEASES!')
+  warning('UNSUPPORTED HOST CPU')
   message()
-  message('CPU host architecture ' + cpu + ' support is not currently maintained.')
-  message('The QEMU project intends to remove support for this host CPU in')
-  message('a future release if nobody volunteers to maintain it and to')
-  message('provide a build host for our continuous integration setup.')
-  message('configure has succeeded and you can continue to build, but')
-  message('if you care about QEMU on this platform you should contact')
-  message('us upstream at qemu-devel@nongnu.org.')
+  message('Support for CPU host architecture ' + cpu + ' is not currently')
+  message('maintained. The QEMU project does not guarantee that QEMU will')
+  message('compile or work on this host CPU. You can help by volunteering')
+  message('to maintain it and providing a build host for our continuous')
+  message('integration setup.')
+  if get_option('tcg').allowed() and target_dirs.length() > 0
+    message()
+    message('configure has succeeded and you can continue to build, but')
+    message('QEMU will use a slow interpreter to emulate the target CPU.')
+  endif
 endif
 
 if not supported_oses.contains(targetos)
   message()
-  warning('WARNING: SUPPORT FOR THIS HOST OS WILL GO AWAY IN FUTURE RELEASES!')
+  warning('UNSUPPORTED HOST OS')
   message()
-  message('Host OS ' + targetos + 'support is not currently maintained.')
-  message('The QEMU project intends to remove support for this host OS in')
-  message('a future release if nobody volunteers to maintain it and to')
-  message('provide a build host for our continuous integration setup.')
+  message('Support for host OS ' + targetos + 'is not currently maintained.')
   message('configure has succeeded and you can continue to build, but')
-  message('if you care about QEMU on this platform you should contact')
-  message('us upstream at qemu-devel@nongnu.org.')
+  message('the QEMU project does not guarantee that QEMU will compile or')
+  message('work on this operating system. You can help by volunteering')
+  message('to maintain it and providing a build host for our continuous')
+  message('integration setup. This will ensure that future versions of QEMU')
+  message('will keep working on ' + targetos + '.')
+endif
+
+if host_arch == 'unknown' or not supported_oses.contains(targetos)
+  message()
+  message('If you want to help supporting QEMU on this platform, please')
+  message('contact the developers at qemu-devel@nongnu.org.')
+endif
+
+actually_reloc = get_option('relocatable')
+# check if get_relocated_path() is actually able to relocate paths
+if get_option('relocatable') and \
+  not (get_option('prefix') / get_option('bindir')).startswith(get_option('prefix') / '')
+  message()
+  warning('bindir not included within prefix, the installation will not be relocatable.')
+  actually_reloc = false
+endif
+if not actually_reloc and (targetos == 'windows' or get_option('relocatable'))
+  if targetos == 'windows'
+    message()
+    warning('Windows installs should usually be relocatable.')
+  endif
+  message()
+  message('QEMU will have to be installed under ' + get_option('prefix') + '.')
+  message('Use --disable-relocatable to remove this warning.')
 endif
index f6f64785fe799fc925e824e38b9429c7443291ea..c9baeda6395634c3478a3c2a3a8c8f57fbe0b592 100644 (file)
@@ -1,20 +1,66 @@
+# These options do not correspond to a --enable/--disable-* option
+# on the configure script command line.  If you add more, list them in
+# scripts/meson-buildoptions.py's SKIP_OPTIONS constant too.
+
 option('qemu_suffix', type : 'string', value: 'qemu',
        description: 'Suffix for QEMU data/modules/config directories (can be empty)')
-option('docdir', type : 'string', value : 'doc',
+option('docdir', type : 'string', value : 'share/doc',
        description: 'Base directory for documentation installation (can be empty)')
-option('qemu_firmwarepath', type : 'string', value : '',
+option('qemu_firmwarepath', type : 'array', value : ['share/qemu-firmware'],
        description: 'search PATH for firmware files')
-option('sphinx_build', type : 'string', value : '',
-       description: 'Use specified sphinx-build [$sphinx_build] for building document (default to be empty)')
+option('pkgversion', type : 'string', value : '',
+       description: 'use specified string as sub-version of the package')
+option('smbd', type : 'string', value : '',
+       description: 'Path to smbd for slirp networking')
+option('iasl', type : 'string', value : '',
+       description: 'Path to ACPI disassembler')
+option('tls_priority', type : 'string', value : 'NORMAL',
+       description: 'Default TLS protocol/cipher priority string')
+option('default_devices', type : 'boolean', value : true,
+       description: 'Include a default selection of devices in emulators')
+option('audio_drv_list', type: 'array', value: ['default'],
+       choices: ['alsa', 'coreaudio', 'default', 'dsound', 'jack', 'oss', 'pa', 'pipewire', 'sdl', 'sndio'],
+       description: 'Set audio driver list')
+option('block_drv_rw_whitelist', type : 'string', value : '',
+       description: 'set block driver read-write whitelist (by default affects only QEMU, not tools like qemu-img)')
+option('block_drv_ro_whitelist', type : 'string', value : '',
+       description: 'set block driver read-only whitelist (by default affects only QEMU, not tools like qemu-img)')
+option('interp_prefix', type : 'string', value : '/usr/gnemul/qemu-%M',
+       description: 'where to find shared libraries etc., use %M for cpu name')
+option('fuzzing_engine', type : 'string', value : '',
+       description: 'fuzzing engine library for OSS-Fuzz')
+option('trace_file', type: 'string', value: 'trace',
+       description: 'Trace file prefix for simple backend')
+option('coroutine_backend', type: 'combo',
+       choices: ['ucontext', 'sigaltstack', 'windows', 'auto'],
+       value: 'auto', description: 'coroutine backend to use')
+
+# Everything else can be set via --enable/--disable-* option
+# on the configure script command line.  After adding an option
+# here make sure to run "make update-buildoptions".
 
 option('docs', type : 'feature', value : 'auto',
        description: 'Documentations build support')
-option('gettext', type : 'boolean', value : true,
+option('fuzzing', type : 'boolean', value: false,
+       description: 'build fuzzing targets')
+option('gettext', type : 'feature', value : 'auto',
        description: 'Localization of the GTK+ user interface')
+option('modules', type : 'feature', value : 'disabled',
+       description: 'modules support (non Windows)')
+option('module_upgrades', type : 'boolean', value : false,
+       description: 'try to load modules from alternate paths for upgrades')
 option('install_blobs', type : 'boolean', value : true,
        description: 'install provided firmware blobs')
 option('sparse', type : 'feature', value : 'auto',
        description: 'sparse checker')
+option('guest_agent', type : 'feature', value : 'auto',
+       description: 'Build QEMU Guest Agent')
+option('guest_agent_msi', type : 'feature', value : 'auto',
+       description: 'Build MSI package for the QEMU Guest Agent')
+option('tools', type : 'feature', value : 'auto',
+       description: 'build support utilities that come with QEMU')
+option('qga_vss', type : 'feature', value: 'auto',
+       description: 'build QGA VSS support (broken with MinGW)')
 
 option('malloc_trim', type : 'feature', value : 'auto',
        description: 'enable libc malloc_trim() for memory optimization')
@@ -23,56 +69,302 @@ option('malloc', type : 'combo', choices : ['system', 'tcmalloc', 'jemalloc'],
 
 option('kvm', type: 'feature', value: 'auto',
        description: 'KVM acceleration support')
-option('hax', type: 'feature', value: 'auto',
-       description: 'HAX acceleration support')
 option('whpx', type: 'feature', value: 'auto',
        description: 'WHPX acceleration support')
 option('hvf', type: 'feature', value: 'auto',
        description: 'HVF acceleration support')
+option('nvmm', type: 'feature', value: 'auto',
+       description: 'NVMM acceleration support')
 option('xen', type: 'feature', value: 'auto',
        description: 'Xen backend support')
 option('xen_pci_passthrough', type: 'feature', value: 'auto',
        description: 'Xen PCI passthrough support')
-option('tcg', type: 'feature', value: 'auto',
+option('tcg', type: 'feature', value: 'enabled',
        description: 'TCG support')
+option('plugins', type: 'boolean', value: false,
+       description: 'TCG plugins via shared library loading')
+option('debug_tcg', type: 'boolean', value: false,
+       description: 'TCG debugging')
+option('tcg_interpreter', type: 'boolean', value: false,
+       description: 'TCG with bytecode interpreter (slow)')
+option('safe_stack', type: 'boolean', value: false,
+       description: 'SafeStack Stack Smash Protection (requires clang/llvm and coroutine backend ucontext)')
+option('sanitizers', type: 'boolean', value: false,
+       description: 'enable default sanitizers')
+option('tsan', type: 'boolean', value: false,
+       description: 'enable thread sanitizer')
+option('stack_protector', type: 'feature', value: 'auto',
+       description: 'compiler-provided stack protection')
+option('cfi', type: 'boolean', value: false,
+       description: 'Control-Flow Integrity (CFI)')
+option('cfi_debug', type: 'boolean', value: false,
+       description: 'Verbose errors in case of CFI violation')
+option('multiprocess', type: 'feature', value: 'auto',
+       description: 'Out of process device emulation support')
+option('relocatable', type : 'boolean', value : 'true',
+       description: 'toggle relocatable install')
+option('vfio_user_server', type: 'feature', value: 'disabled',
+       description: 'vfio-user server support')
+option('dbus_display', type: 'feature', value: 'auto',
+       description: '-display dbus support')
+option('tpm', type : 'feature', value : 'auto',
+       description: 'TPM support')
+
+# Do not enable it by default even for Mingw32, because it doesn't
+# work on Wine.
+option('membarrier', type: 'feature', value: 'disabled',
+       description: 'membarrier system call (for Linux 4.14+ or Windows')
+
+option('avx2', type: 'feature', value: 'auto',
+       description: 'AVX2 optimizations')
+option('avx512f', type: 'feature', value: 'disabled',
+       description: 'AVX512F optimizations')
+option('avx512bw', type: 'feature', value: 'auto',
+       description: 'AVX512BW optimizations')
+option('keyring', type: 'feature', value: 'auto',
+       description: 'Linux keyring support')
+option('libkeyutils', type: 'feature', value: 'auto',
+       description: 'Linux keyutils support')
 
+option('af_xdp', type : 'feature', value : 'auto',
+       description: 'AF_XDP network backend support')
+option('attr', type : 'feature', value : 'auto',
+       description: 'attr/xattr support')
+option('auth_pam', type : 'feature', value : 'auto',
+       description: 'PAM access control')
+option('brlapi', type : 'feature', value : 'auto',
+       description: 'brlapi character device driver')
+option('bzip2', type : 'feature', value : 'auto',
+       description: 'bzip2 support for DMG images')
+option('cap_ng', type : 'feature', value : 'auto',
+       description: 'cap_ng support')
+option('blkio', type : 'feature', value : 'auto',
+       description: 'libblkio block device driver')
+option('bpf', type : 'feature', value : 'auto',
+        description: 'eBPF support')
 option('cocoa', type : 'feature', value : 'auto',
        description: 'Cocoa user interface (macOS only)')
+option('curl', type : 'feature', value : 'auto',
+       description: 'CURL block device driver')
+option('gio', type : 'feature', value : 'auto',
+       description: 'use libgio for D-Bus support')
+option('glusterfs', type : 'feature', value : 'auto',
+       description: 'Glusterfs block device driver')
+option('hv_balloon', type : 'feature', value : 'auto',
+       description: 'hv-balloon driver (requires Glib 2.68+ GTree API)')
+option('libdw', type : 'feature', value : 'auto',
+       description: 'debuginfo support')
+option('libiscsi', type : 'feature', value : 'auto',
+       description: 'libiscsi userspace initiator')
+option('libnfs', type : 'feature', value : 'auto',
+       description: 'libnfs block device driver')
 option('mpath', type : 'feature', value : 'auto',
        description: 'Multipath persistent reservation passthrough')
+option('numa', type : 'feature', value : 'auto',
+       description: 'libnuma support')
 option('iconv', type : 'feature', value : 'auto',
        description: 'Font glyph conversion support')
 option('curses', type : 'feature', value : 'auto',
        description: 'curses UI')
+option('gnutls', type : 'feature', value : 'auto',
+       description: 'GNUTLS cryptography support')
+option('nettle', type : 'feature', value : 'auto',
+       description: 'nettle cryptography support')
+option('gcrypt', type : 'feature', value : 'auto',
+       description: 'libgcrypt cryptography support')
+option('crypto_afalg', type : 'feature', value : 'disabled',
+       description: 'Linux AF_ALG crypto backend driver')
+option('libdaxctl', type : 'feature', value : 'auto',
+       description: 'libdaxctl support')
+option('libpmem', type : 'feature', value : 'auto',
+       description: 'libpmem support')
+option('libssh', type : 'feature', value : 'auto',
+       description: 'ssh block device support')
 option('libudev', type : 'feature', value : 'auto',
        description: 'Use libudev to enumerate host devices')
+option('libusb', type : 'feature', value : 'auto',
+       description: 'libusb support for USB passthrough')
+option('linux_aio', type : 'feature', value : 'auto',
+       description: 'Linux AIO support')
+option('linux_io_uring', type : 'feature', value : 'auto',
+       description: 'Linux io_uring support')
+option('lzfse', type : 'feature', value : 'auto',
+       description: 'lzfse support for DMG images')
+option('lzo', type : 'feature', value : 'auto',
+       description: 'lzo compression support')
+option('rbd', type : 'feature', value : 'auto',
+       description: 'Ceph block device driver')
+option('opengl', type : 'feature', value : 'auto',
+       description: 'OpenGL support')
+option('rdma', type : 'feature', value : 'auto',
+       description: 'Enable RDMA-based migration')
+option('pvrdma', type : 'feature', value : 'auto',
+       description: 'Enable PVRDMA support')
+option('gtk', type : 'feature', value : 'auto',
+       description: 'GTK+ user interface')
 option('sdl', type : 'feature', value : 'auto',
        description: 'SDL user interface')
 option('sdl_image', type : 'feature', value : 'auto',
        description: 'SDL Image support for icons')
+option('seccomp', type : 'feature', value : 'auto',
+       description: 'seccomp support')
+option('smartcard', type : 'feature', value : 'auto',
+       description: 'CA smartcard emulation support')
+option('snappy', type : 'feature', value : 'auto',
+       description: 'snappy compression support')
+option('spice', type : 'feature', value : 'auto',
+       description: 'Spice server support')
+option('spice_protocol', type : 'feature', value : 'auto',
+       description: 'Spice protocol support')
 option('u2f', type : 'feature', value : 'auto',
        description: 'U2F emulation support')
-option('vnc', type : 'feature', value : 'enabled',
+option('canokey', type : 'feature', value : 'auto',
+       description: 'CanoKey support')
+option('usb_redir', type : 'feature', value : 'auto',
+       description: 'libusbredir support')
+option('l2tpv3', type : 'feature', value : 'auto',
+       description: 'l2tpv3 network backend support')
+option('netmap', type : 'feature', value : 'auto',
+       description: 'netmap network backend support')
+option('pixman', type : 'feature', value : 'auto',
+       description: 'pixman support')
+option('slirp', type: 'feature', value: 'auto',
+       description: 'libslirp user mode network backend support')
+option('vde', type : 'feature', value : 'auto',
+       description: 'vde network backend support')
+option('vmnet', type : 'feature', value : 'auto',
+       description: 'vmnet.framework network backend support')
+option('virglrenderer', type : 'feature', value : 'auto',
+       description: 'virgl rendering support')
+option('rutabaga_gfx', type : 'feature', value : 'auto',
+       description: 'rutabaga_gfx support')
+option('png', type : 'feature', value : 'auto',
+       description: 'PNG support with libpng')
+option('vnc', type : 'feature', value : 'auto',
        description: 'VNC server')
 option('vnc_jpeg', type : 'feature', value : 'auto',
        description: 'JPEG lossy compression for VNC server')
-option('vnc_png', type : 'feature', value : 'auto',
-       description: 'PNG compression for VNC server')
 option('vnc_sasl', type : 'feature', value : 'auto',
        description: 'SASL authentication for VNC server')
+option('vte', type : 'feature', value : 'auto',
+       description: 'vte support for the gtk UI')
+
+# GTK Clipboard implementation is disabled by default, since it may cause hangs
+# of the guest VCPUs. See gitlab issue 1150:
+# https://gitlab.com/qemu-project/qemu/-/issues/1150
+
+option('gtk_clipboard', type: 'feature', value : 'disabled',
+       description: 'clipboard support for the gtk UI (EXPERIMENTAL, MAY HANG)')
 option('xkbcommon', type : 'feature', value : 'auto',
        description: 'xkbcommon support')
-option('virtiofsd', type: 'feature', value: 'auto',
-       description: 'build virtiofs daemon (virtiofsd)')
+option('zstd', type : 'feature', value : 'auto',
+       description: 'zstd compression support')
+option('fuse', type: 'feature', value: 'auto',
+       description: 'FUSE block device export')
+option('fuse_lseek', type : 'feature', value : 'auto',
+       description: 'SEEK_HOLE/SEEK_DATA support for FUSE exports')
+
+option('trace_backends', type: 'array', value: ['log'],
+       choices: ['dtrace', 'ftrace', 'log', 'nop', 'simple', 'syslog', 'ust'],
+       description: 'Set available tracing backends')
+
+option('alsa', type: 'feature', value: 'auto',
+       description: 'ALSA sound support')
+option('coreaudio', type: 'feature', value: 'auto',
+       description: 'CoreAudio sound support')
+option('dsound', type: 'feature', value: 'auto',
+       description: 'DirectSound sound support')
+option('jack', type: 'feature', value: 'auto',
+       description: 'JACK sound support')
+option('oss', type: 'feature', value: 'auto',
+       description: 'OSS sound support')
+option('pa', type: 'feature', value: 'auto',
+       description: 'PulseAudio sound support')
+option('pipewire', type: 'feature', value: 'auto',
+       description: 'PipeWire sound support')
+option('sndio', type: 'feature', value: 'auto',
+       description: 'sndio sound support')
+
+option('vhost_kernel', type: 'feature', value: 'auto',
+       description: 'vhost kernel backend support')
+option('vhost_net', type: 'feature', value: 'auto',
+       description: 'vhost-net kernel acceleration support')
+option('vhost_user', type: 'feature', value: 'auto',
+       description: 'vhost-user backend support')
+option('vhost_crypto', type: 'feature', value: 'auto',
+       description: 'vhost-user crypto backend support')
+option('vhost_vdpa', type: 'feature', value: 'auto',
+       description: 'vhost-vdpa kernel backend support')
 option('vhost_user_blk_server', type: 'feature', value: 'auto',
        description: 'build vhost-user-blk server')
+option('virtfs', type: 'feature', value: 'auto',
+       description: 'virtio-9p support')
+option('virtfs_proxy_helper', type: 'feature', value: 'auto',
+       description: 'virtio-9p proxy helper support')
+option('libvduse', type: 'feature', value: 'auto',
+       description: 'build VDUSE Library')
+option('vduse_blk_export', type: 'feature', value: 'auto',
+       description: 'VDUSE block export support')
 
-option('capstone', type: 'combo', value: 'auto',
-       choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],
+option('capstone', type: 'feature', value: 'auto',
        description: 'Whether and how to find the capstone library')
-option('slirp', type: 'combo', value: 'auto',
-       choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],
-       description: 'Whether and how to find the slirp library')
 option('fdt', type: 'combo', value: 'auto',
        choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],
        description: 'Whether and how to find the libfdt library')
+
+option('selinux', type: 'feature', value: 'auto',
+       description: 'SELinux support in qemu-nbd')
+option('live_block_migration', type: 'feature', value: 'auto',
+       description: 'block migration in the main migration stream')
+option('replication', type: 'feature', value: 'auto',
+       description: 'replication support')
+option('colo_proxy', type: 'feature', value: 'auto',
+       description: 'colo-proxy support')
+option('bochs', type: 'feature', value: 'auto',
+       description: 'bochs image format support')
+option('cloop', type: 'feature', value: 'auto',
+       description: 'cloop image format support')
+option('dmg', type: 'feature', value: 'auto',
+       description: 'dmg image format support')
+option('qcow1', type: 'feature', value: 'auto',
+       description: 'qcow1 image format support')
+option('vdi', type: 'feature', value: 'auto',
+       description: 'vdi image format support')
+option('vhdx', type: 'feature', value: 'auto',
+       description: 'vhdx image format support')
+option('vmdk', type: 'feature', value: 'auto',
+       description: 'vmdk image format support')
+option('vpc', type: 'feature', value: 'auto',
+       description: 'vpc image format support')
+option('vvfat', type: 'feature', value: 'auto',
+       description: 'vvfat image format support')
+option('qed', type: 'feature', value: 'auto',
+       description: 'qed image format support')
+option('parallels', type: 'feature', value: 'auto',
+       description: 'parallels image format support')
+option('block_drv_whitelist_in_tools', type: 'boolean', value: false,
+       description: 'use block whitelist also in tools instead of only QEMU')
+option('rng_none', type: 'boolean', value: false,
+       description: 'dummy RNG, avoid using /dev/(u)random and getrandom()')
+option('coroutine_pool', type: 'boolean', value: true,
+       description: 'coroutine freelist (better performance)')
+option('debug_graph_lock', type: 'boolean', value: false,
+       description: 'graph lock debugging support')
+option('debug_mutex', type: 'boolean', value: false,
+       description: 'mutex debugging support')
+option('debug_stack_usage', type: 'boolean', value: false,
+       description: 'measure coroutine stack usage')
+option('qom_cast_debug', type: 'boolean', value: true,
+       description: 'cast debugging support')
+option('slirp_smbd', type : 'feature', value : 'auto',
+       description: 'use smbd (at path --smbd=*) in slirp networking')
+
+option('qemu_ga_manufacturer', type: 'string', value: 'QEMU',
+       description: '"manufacturer" name for qemu-ga registry entries')
+option('qemu_ga_distro', type: 'string', value: 'Linux',
+       description: 'second path element in qemu-ga registry entries')
+option('qemu_ga_version', type: 'string', value: '',
+       description: 'version number for qemu-ga installer')
+
+option('hexagon_idef_parser', type : 'boolean', value : true,
+       description: 'use idef-parser to automatically generate TCG code for the Hexagon frontend')
diff --git a/monitor/fds.c b/monitor/fds.c
new file mode 100644 (file)
index 0000000..d86c2c6
--- /dev/null
@@ -0,0 +1,517 @@
+/*
+ * QEMU monitor file descriptor passing
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor-internal.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/ctype.h"
+#include "qemu/cutils.h"
+#include "sysemu/runstate.h"
+
+/* file descriptors passed via SCM_RIGHTS */
+typedef struct mon_fd_t mon_fd_t;
+struct mon_fd_t {
+    char *name;
+    int fd;
+    QLIST_ENTRY(mon_fd_t) next;
+};
+
+/* file descriptor associated with a file descriptor set */
+typedef struct MonFdsetFd MonFdsetFd;
+struct MonFdsetFd {
+    int fd;
+    bool removed;
+    char *opaque;
+    QLIST_ENTRY(MonFdsetFd) next;
+};
+
+/* file descriptor set containing fds passed via SCM_RIGHTS */
+typedef struct MonFdset MonFdset;
+struct MonFdset {
+    int64_t id;
+    QLIST_HEAD(, MonFdsetFd) fds;
+    QLIST_HEAD(, MonFdsetFd) dup_fds;
+    QLIST_ENTRY(MonFdset) next;
+};
+
+/* Protects mon_fdsets */
+static QemuMutex mon_fdsets_lock;
+static QLIST_HEAD(, MonFdset) mon_fdsets;
+
+static bool monitor_add_fd(Monitor *mon, int fd, const char *fdname, Error **errp)
+{
+    mon_fd_t *monfd;
+
+    if (qemu_isdigit(fdname[0])) {
+        close(fd);
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "fdname",
+                   "a name not starting with a digit");
+        return false;
+    }
+
+    /* See close() call below. */
+    qemu_mutex_lock(&mon->mon_lock);
+    QLIST_FOREACH(monfd, &mon->fds, next) {
+        int tmp_fd;
+
+        if (strcmp(monfd->name, fdname) != 0) {
+            continue;
+        }
+
+        tmp_fd = monfd->fd;
+        monfd->fd = fd;
+        qemu_mutex_unlock(&mon->mon_lock);
+        /* Make sure close() is outside critical section */
+        close(tmp_fd);
+        return true;
+    }
+
+    monfd = g_new0(mon_fd_t, 1);
+    monfd->name = g_strdup(fdname);
+    monfd->fd = fd;
+
+    QLIST_INSERT_HEAD(&mon->fds, monfd, next);
+    qemu_mutex_unlock(&mon->mon_lock);
+    return true;
+}
+
+#ifdef CONFIG_POSIX
+void qmp_getfd(const char *fdname, Error **errp)
+{
+    Monitor *cur_mon = monitor_cur();
+    int fd;
+
+    fd = qemu_chr_fe_get_msgfd(&cur_mon->chr);
+    if (fd == -1) {
+        error_setg(errp, "No file descriptor supplied via SCM_RIGHTS");
+        return;
+    }
+
+    monitor_add_fd(cur_mon, fd, fdname, errp);
+}
+#endif
+
+void qmp_closefd(const char *fdname, Error **errp)
+{
+    Monitor *cur_mon = monitor_cur();
+    mon_fd_t *monfd;
+    int tmp_fd;
+
+    qemu_mutex_lock(&cur_mon->mon_lock);
+    QLIST_FOREACH(monfd, &cur_mon->fds, next) {
+        if (strcmp(monfd->name, fdname) != 0) {
+            continue;
+        }
+
+        QLIST_REMOVE(monfd, next);
+        tmp_fd = monfd->fd;
+        g_free(monfd->name);
+        g_free(monfd);
+        qemu_mutex_unlock(&cur_mon->mon_lock);
+        /* Make sure close() is outside critical section */
+        close(tmp_fd);
+        return;
+    }
+
+    qemu_mutex_unlock(&cur_mon->mon_lock);
+    error_setg(errp, "File descriptor named '%s' not found", fdname);
+}
+
+int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp)
+{
+    mon_fd_t *monfd;
+
+    QEMU_LOCK_GUARD(&mon->mon_lock);
+    QLIST_FOREACH(monfd, &mon->fds, next) {
+        int fd;
+
+        if (strcmp(monfd->name, fdname) != 0) {
+            continue;
+        }
+
+        fd = monfd->fd;
+        assert(fd >= 0);
+
+        /* caller takes ownership of fd */
+        QLIST_REMOVE(monfd, next);
+        g_free(monfd->name);
+        g_free(monfd);
+
+        return fd;
+    }
+
+    error_setg(errp, "File descriptor named '%s' has not been found", fdname);
+    return -1;
+}
+
+static void monitor_fdset_cleanup(MonFdset *mon_fdset)
+{
+    MonFdsetFd *mon_fdset_fd;
+    MonFdsetFd *mon_fdset_fd_next;
+
+    QLIST_FOREACH_SAFE(mon_fdset_fd, &mon_fdset->fds, next, mon_fdset_fd_next) {
+        if ((mon_fdset_fd->removed ||
+                (QLIST_EMPTY(&mon_fdset->dup_fds) && mon_refcount == 0)) &&
+                runstate_is_running()) {
+            close(mon_fdset_fd->fd);
+            g_free(mon_fdset_fd->opaque);
+            QLIST_REMOVE(mon_fdset_fd, next);
+            g_free(mon_fdset_fd);
+        }
+    }
+
+    if (QLIST_EMPTY(&mon_fdset->fds) && QLIST_EMPTY(&mon_fdset->dup_fds)) {
+        QLIST_REMOVE(mon_fdset, next);
+        g_free(mon_fdset);
+    }
+}
+
+void monitor_fdsets_cleanup(void)
+{
+    MonFdset *mon_fdset;
+    MonFdset *mon_fdset_next;
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    QLIST_FOREACH_SAFE(mon_fdset, &mon_fdsets, next, mon_fdset_next) {
+        monitor_fdset_cleanup(mon_fdset);
+    }
+}
+
+AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id,
+                      const char *opaque, Error **errp)
+{
+    int fd;
+    Monitor *mon = monitor_cur();
+    AddfdInfo *fdinfo;
+
+    fd = qemu_chr_fe_get_msgfd(&mon->chr);
+    if (fd == -1) {
+        error_setg(errp, "No file descriptor supplied via SCM_RIGHTS");
+        goto error;
+    }
+
+    fdinfo = monitor_fdset_add_fd(fd, has_fdset_id, fdset_id, opaque, errp);
+    if (fdinfo) {
+        return fdinfo;
+    }
+
+error:
+    if (fd != -1) {
+        close(fd);
+    }
+    return NULL;
+}
+
+#ifdef WIN32
+void qmp_get_win32_socket(const char *infos, const char *fdname, Error **errp)
+{
+    g_autofree WSAPROTOCOL_INFOW *info = NULL;
+    gsize len;
+    SOCKET sk;
+    int fd;
+
+    info = (void *)g_base64_decode(infos, &len);
+    if (len != sizeof(*info)) {
+        error_setg(errp, "Invalid WSAPROTOCOL_INFOW value");
+        return;
+    }
+
+    sk = WSASocketW(FROM_PROTOCOL_INFO,
+                    FROM_PROTOCOL_INFO,
+                    FROM_PROTOCOL_INFO,
+                    info, 0, 0);
+    if (sk == INVALID_SOCKET) {
+        error_setg_win32(errp, WSAGetLastError(), "Couldn't import socket");
+        return;
+    }
+
+    fd = _open_osfhandle(sk, _O_BINARY);
+    if (fd < 0) {
+        error_setg_errno(errp, errno, "Failed to associate a FD with the SOCKET");
+        closesocket(sk);
+        return;
+    }
+
+    monitor_add_fd(monitor_cur(), fd, fdname, errp);
+}
+#endif
+
+
+void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t fd, Error **errp)
+{
+    MonFdset *mon_fdset;
+    MonFdsetFd *mon_fdset_fd;
+    char fd_str[60];
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        if (mon_fdset->id != fdset_id) {
+            continue;
+        }
+        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
+            if (has_fd) {
+                if (mon_fdset_fd->fd != fd) {
+                    continue;
+                }
+                mon_fdset_fd->removed = true;
+                break;
+            } else {
+                mon_fdset_fd->removed = true;
+            }
+        }
+        if (has_fd && !mon_fdset_fd) {
+            goto error;
+        }
+        monitor_fdset_cleanup(mon_fdset);
+        return;
+    }
+
+error:
+    if (has_fd) {
+        snprintf(fd_str, sizeof(fd_str), "fdset-id:%" PRId64 ", fd:%" PRId64,
+                 fdset_id, fd);
+    } else {
+        snprintf(fd_str, sizeof(fd_str), "fdset-id:%" PRId64, fdset_id);
+    }
+    error_setg(errp, "File descriptor named '%s' not found", fd_str);
+}
+
+FdsetInfoList *qmp_query_fdsets(Error **errp)
+{
+    MonFdset *mon_fdset;
+    MonFdsetFd *mon_fdset_fd;
+    FdsetInfoList *fdset_list = NULL;
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        FdsetInfo *fdset_info = g_malloc0(sizeof(*fdset_info));
+
+        fdset_info->fdset_id = mon_fdset->id;
+
+        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
+            FdsetFdInfo *fdsetfd_info;
+
+            fdsetfd_info = g_malloc0(sizeof(*fdsetfd_info));
+            fdsetfd_info->fd = mon_fdset_fd->fd;
+            fdsetfd_info->opaque = g_strdup(mon_fdset_fd->opaque);
+
+            QAPI_LIST_PREPEND(fdset_info->fds, fdsetfd_info);
+        }
+
+        QAPI_LIST_PREPEND(fdset_list, fdset_info);
+    }
+
+    return fdset_list;
+}
+
+AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
+                                const char *opaque, Error **errp)
+{
+    MonFdset *mon_fdset = NULL;
+    MonFdsetFd *mon_fdset_fd;
+    AddfdInfo *fdinfo;
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    if (has_fdset_id) {
+        QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+            /* Break if match found or match impossible due to ordering by ID */
+            if (fdset_id <= mon_fdset->id) {
+                if (fdset_id < mon_fdset->id) {
+                    mon_fdset = NULL;
+                }
+                break;
+            }
+        }
+    }
+
+    if (mon_fdset == NULL) {
+        int64_t fdset_id_prev = -1;
+        MonFdset *mon_fdset_cur = QLIST_FIRST(&mon_fdsets);
+
+        if (has_fdset_id) {
+            if (fdset_id < 0) {
+                error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id",
+                           "a non-negative value");
+                return NULL;
+            }
+            /* Use specified fdset ID */
+            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+                mon_fdset_cur = mon_fdset;
+                if (fdset_id < mon_fdset_cur->id) {
+                    break;
+                }
+            }
+        } else {
+            /* Use first available fdset ID */
+            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+                mon_fdset_cur = mon_fdset;
+                if (fdset_id_prev == mon_fdset_cur->id - 1) {
+                    fdset_id_prev = mon_fdset_cur->id;
+                    continue;
+                }
+                break;
+            }
+        }
+
+        mon_fdset = g_malloc0(sizeof(*mon_fdset));
+        if (has_fdset_id) {
+            mon_fdset->id = fdset_id;
+        } else {
+            mon_fdset->id = fdset_id_prev + 1;
+        }
+
+        /* The fdset list is ordered by fdset ID */
+        if (!mon_fdset_cur) {
+            QLIST_INSERT_HEAD(&mon_fdsets, mon_fdset, next);
+        } else if (mon_fdset->id < mon_fdset_cur->id) {
+            QLIST_INSERT_BEFORE(mon_fdset_cur, mon_fdset, next);
+        } else {
+            QLIST_INSERT_AFTER(mon_fdset_cur, mon_fdset, next);
+        }
+    }
+
+    mon_fdset_fd = g_malloc0(sizeof(*mon_fdset_fd));
+    mon_fdset_fd->fd = fd;
+    mon_fdset_fd->removed = false;
+    mon_fdset_fd->opaque = g_strdup(opaque);
+    QLIST_INSERT_HEAD(&mon_fdset->fds, mon_fdset_fd, next);
+
+    fdinfo = g_malloc0(sizeof(*fdinfo));
+    fdinfo->fdset_id = mon_fdset->id;
+    fdinfo->fd = mon_fdset_fd->fd;
+
+    return fdinfo;
+}
+
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags)
+{
+#ifdef _WIN32
+    return -ENOENT;
+#else
+    MonFdset *mon_fdset;
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        MonFdsetFd *mon_fdset_fd;
+        MonFdsetFd *mon_fdset_fd_dup;
+        int fd = -1;
+        int dup_fd;
+        int mon_fd_flags;
+
+        if (mon_fdset->id != fdset_id) {
+            continue;
+        }
+
+        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
+            mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL);
+            if (mon_fd_flags == -1) {
+                return -1;
+            }
+
+            if ((flags & O_ACCMODE) == (mon_fd_flags & O_ACCMODE)) {
+                fd = mon_fdset_fd->fd;
+                break;
+            }
+        }
+
+        if (fd == -1) {
+            errno = EACCES;
+            return -1;
+        }
+
+        dup_fd = qemu_dup_flags(fd, flags);
+        if (dup_fd == -1) {
+            return -1;
+        }
+
+        mon_fdset_fd_dup = g_malloc0(sizeof(*mon_fdset_fd_dup));
+        mon_fdset_fd_dup->fd = dup_fd;
+        QLIST_INSERT_HEAD(&mon_fdset->dup_fds, mon_fdset_fd_dup, next);
+        return dup_fd;
+    }
+
+    errno = ENOENT;
+    return -1;
+#endif
+}
+
+static int64_t monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove)
+{
+    MonFdset *mon_fdset;
+    MonFdsetFd *mon_fdset_fd_dup;
+
+    QEMU_LOCK_GUARD(&mon_fdsets_lock);
+    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+        QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) {
+            if (mon_fdset_fd_dup->fd == dup_fd) {
+                if (remove) {
+                    QLIST_REMOVE(mon_fdset_fd_dup, next);
+                    g_free(mon_fdset_fd_dup);
+                    if (QLIST_EMPTY(&mon_fdset->dup_fds)) {
+                        monitor_fdset_cleanup(mon_fdset);
+                    }
+                    return -1;
+                } else {
+                    return mon_fdset->id;
+                }
+            }
+        }
+    }
+
+    return -1;
+}
+
+int64_t monitor_fdset_dup_fd_find(int dup_fd)
+{
+    return monitor_fdset_dup_fd_find_remove(dup_fd, false);
+}
+
+void monitor_fdset_dup_fd_remove(int dup_fd)
+{
+    monitor_fdset_dup_fd_find_remove(dup_fd, true);
+}
+
+int monitor_fd_param(Monitor *mon, const char *fdname, Error **errp)
+{
+    int fd;
+
+    if (!qemu_isdigit(fdname[0]) && mon) {
+        fd = monitor_get_fd(mon, fdname, errp);
+    } else {
+        fd = qemu_parse_fd(fdname);
+        if (fd < 0) {
+            error_setg(errp, "Invalid file descriptor number '%s'",
+                       fdname);
+        }
+    }
+
+    return fd;
+}
+
+static void __attribute__((__constructor__)) monitor_fds_init(void)
+{
+    qemu_mutex_init(&mon_fdsets_lock);
+}
diff --git a/monitor/hmp-cmds-target.c b/monitor/hmp-cmds-target.c
new file mode 100644 (file)
index 0000000..d9fbcac
--- /dev/null
@@ -0,0 +1,380 @@
+/*
+ * Miscellaneous target-dependent HMP commands
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "disas/disas.h"
+#include "exec/address-spaces.h"
+#include "monitor/hmp-target.h"
+#include "monitor/monitor-internal.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "sysemu/hw_accel.h"
+
+/* Set the current CPU defined by the user. Callers must hold BQL. */
+int monitor_set_cpu(Monitor *mon, int cpu_index)
+{
+    CPUState *cpu;
+
+    cpu = qemu_get_cpu(cpu_index);
+    if (cpu == NULL) {
+        return -1;
+    }
+    g_free(mon->mon_cpu_path);
+    mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu));
+    return 0;
+}
+
+/* Callers must hold BQL. */
+static CPUState *mon_get_cpu_sync(Monitor *mon, bool synchronize)
+{
+    CPUState *cpu = NULL;
+
+    if (mon->mon_cpu_path) {
+        cpu = (CPUState *) object_resolve_path_type(mon->mon_cpu_path,
+                                                    TYPE_CPU, NULL);
+        if (!cpu) {
+            g_free(mon->mon_cpu_path);
+            mon->mon_cpu_path = NULL;
+        }
+    }
+    if (!mon->mon_cpu_path) {
+        if (!first_cpu) {
+            return NULL;
+        }
+        monitor_set_cpu(mon, first_cpu->cpu_index);
+        cpu = first_cpu;
+    }
+    assert(cpu != NULL);
+    if (synchronize) {
+        cpu_synchronize_state(cpu);
+    }
+    return cpu;
+}
+
+CPUState *mon_get_cpu(Monitor *mon)
+{
+    return mon_get_cpu_sync(mon, true);
+}
+
+CPUArchState *mon_get_cpu_env(Monitor *mon)
+{
+    CPUState *cs = mon_get_cpu(mon);
+
+    return cs ? cpu_env(cs) : NULL;
+}
+
+int monitor_get_cpu_index(Monitor *mon)
+{
+    CPUState *cs = mon_get_cpu_sync(mon, false);
+
+    return cs ? cs->cpu_index : UNASSIGNED_CPU_INDEX;
+}
+
+void hmp_info_registers(Monitor *mon, const QDict *qdict)
+{
+    bool all_cpus = qdict_get_try_bool(qdict, "cpustate_all", false);
+    int vcpu = qdict_get_try_int(qdict, "vcpu", -1);
+    CPUState *cs;
+
+    if (all_cpus) {
+        CPU_FOREACH(cs) {
+            monitor_printf(mon, "\nCPU#%d\n", cs->cpu_index);
+            cpu_dump_state(cs, NULL, CPU_DUMP_FPU);
+        }
+    } else {
+        cs = vcpu >= 0 ? qemu_get_cpu(vcpu) : mon_get_cpu(mon);
+
+        if (!cs) {
+            if (vcpu >= 0) {
+                monitor_printf(mon, "CPU#%d not available\n", vcpu);
+            } else {
+                monitor_printf(mon, "No CPU available\n");
+            }
+            return;
+        }
+
+        monitor_printf(mon, "\nCPU#%d\n", cs->cpu_index);
+        cpu_dump_state(cs, NULL, CPU_DUMP_FPU);
+    }
+}
+
+static void memory_dump(Monitor *mon, int count, int format, int wsize,
+                        hwaddr addr, int is_physical)
+{
+    int l, line_size, i, max_digits, len;
+    uint8_t buf[16];
+    uint64_t v;
+    CPUState *cs = mon_get_cpu(mon);
+
+    if (!cs && (format == 'i' || !is_physical)) {
+        monitor_printf(mon, "Can not dump without CPU\n");
+        return;
+    }
+
+    if (format == 'i') {
+        monitor_disas(mon, cs, addr, count, is_physical);
+        return;
+    }
+
+    len = wsize * count;
+    if (wsize == 1) {
+        line_size = 8;
+    } else {
+        line_size = 16;
+    }
+    max_digits = 0;
+
+    switch(format) {
+    case 'o':
+        max_digits = DIV_ROUND_UP(wsize * 8, 3);
+        break;
+    default:
+    case 'x':
+        max_digits = (wsize * 8) / 4;
+        break;
+    case 'u':
+    case 'd':
+        max_digits = DIV_ROUND_UP(wsize * 8 * 10, 33);
+        break;
+    case 'c':
+        wsize = 1;
+        break;
+    }
+
+    while (len > 0) {
+        if (is_physical) {
+            monitor_printf(mon, HWADDR_FMT_plx ":", addr);
+        } else {
+            monitor_printf(mon, TARGET_FMT_lx ":", (target_ulong)addr);
+        }
+        l = len;
+        if (l > line_size)
+            l = line_size;
+        if (is_physical) {
+            AddressSpace *as = cs ? cs->as : &address_space_memory;
+            MemTxResult r = address_space_read(as, addr,
+                                               MEMTXATTRS_UNSPECIFIED, buf, l);
+            if (r != MEMTX_OK) {
+                monitor_printf(mon, " Cannot access memory\n");
+                break;
+            }
+        } else {
+            if (cpu_memory_rw_debug(cs, addr, buf, l, 0) < 0) {
+                monitor_printf(mon, " Cannot access memory\n");
+                break;
+            }
+        }
+        i = 0;
+        while (i < l) {
+            switch(wsize) {
+            default:
+            case 1:
+                v = ldub_p(buf + i);
+                break;
+            case 2:
+                v = lduw_p(buf + i);
+                break;
+            case 4:
+                v = (uint32_t)ldl_p(buf + i);
+                break;
+            case 8:
+                v = ldq_p(buf + i);
+                break;
+            }
+            monitor_printf(mon, " ");
+            switch(format) {
+            case 'o':
+                monitor_printf(mon, "%#*" PRIo64, max_digits, v);
+                break;
+            case 'x':
+                monitor_printf(mon, "0x%0*" PRIx64, max_digits, v);
+                break;
+            case 'u':
+                monitor_printf(mon, "%*" PRIu64, max_digits, v);
+                break;
+            case 'd':
+                monitor_printf(mon, "%*" PRId64, max_digits, v);
+                break;
+            case 'c':
+                monitor_printc(mon, v);
+                break;
+            }
+            i += wsize;
+        }
+        monitor_printf(mon, "\n");
+        addr += l;
+        len -= l;
+    }
+}
+
+void hmp_memory_dump(Monitor *mon, const QDict *qdict)
+{
+    int count = qdict_get_int(qdict, "count");
+    int format = qdict_get_int(qdict, "format");
+    int size = qdict_get_int(qdict, "size");
+    target_long addr = qdict_get_int(qdict, "addr");
+
+    memory_dump(mon, count, format, size, addr, 0);
+}
+
+void hmp_physical_memory_dump(Monitor *mon, const QDict *qdict)
+{
+    int count = qdict_get_int(qdict, "count");
+    int format = qdict_get_int(qdict, "format");
+    int size = qdict_get_int(qdict, "size");
+    hwaddr addr = qdict_get_int(qdict, "addr");
+
+    memory_dump(mon, count, format, size, addr, 1);
+}
+
+void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, uint64_t size, Error **errp)
+{
+    Int128 gpa_region_size;
+    MemoryRegionSection mrs = memory_region_find(get_system_memory(),
+                                                 addr, size);
+
+    if (!mrs.mr) {
+        error_setg(errp, "No memory is mapped at address 0x%" HWADDR_PRIx, addr);
+        return NULL;
+    }
+
+    if (!memory_region_is_ram(mrs.mr) && !memory_region_is_romd(mrs.mr)) {
+        error_setg(errp, "Memory at address 0x%" HWADDR_PRIx "is not RAM", addr);
+        memory_region_unref(mrs.mr);
+        return NULL;
+    }
+
+    gpa_region_size = int128_make64(size);
+    if (int128_lt(mrs.size, gpa_region_size)) {
+        error_setg(errp, "Size of memory region at 0x%" HWADDR_PRIx
+                   " exceeded.", addr);
+        memory_region_unref(mrs.mr);
+        return NULL;
+    }
+
+    *p_mr = mrs.mr;
+    return qemu_map_ram_ptr(mrs.mr->ram_block, mrs.offset_within_region);
+}
+
+void hmp_gpa2hva(Monitor *mon, const QDict *qdict)
+{
+    hwaddr addr = qdict_get_int(qdict, "addr");
+    Error *local_err = NULL;
+    MemoryRegion *mr = NULL;
+    void *ptr;
+
+    ptr = gpa2hva(&mr, addr, 1, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return;
+    }
+
+    monitor_printf(mon, "Host virtual address for 0x%" HWADDR_PRIx
+                   " (%s) is %p\n",
+                   addr, mr->name, ptr);
+
+    memory_region_unref(mr);
+}
+
+void hmp_gva2gpa(Monitor *mon, const QDict *qdict)
+{
+    target_ulong addr = qdict_get_int(qdict, "addr");
+    MemTxAttrs attrs;
+    CPUState *cs = mon_get_cpu(mon);
+    hwaddr gpa;
+
+    if (!cs) {
+        monitor_printf(mon, "No cpu\n");
+        return;
+    }
+
+    gpa  = cpu_get_phys_page_attrs_debug(cs, addr & TARGET_PAGE_MASK, &attrs);
+    if (gpa == -1) {
+        monitor_printf(mon, "Unmapped\n");
+    } else {
+        monitor_printf(mon, "gpa: %#" HWADDR_PRIx "\n",
+                       gpa + (addr & ~TARGET_PAGE_MASK));
+    }
+}
+
+#ifdef CONFIG_LINUX
+static uint64_t vtop(void *ptr, Error **errp)
+{
+    uint64_t pinfo;
+    uint64_t ret = -1;
+    uintptr_t addr = (uintptr_t) ptr;
+    uintptr_t pagesize = qemu_real_host_page_size();
+    off_t offset = addr / pagesize * sizeof(pinfo);
+    int fd;
+
+    fd = open("/proc/self/pagemap", O_RDONLY);
+    if (fd == -1) {
+        error_setg_errno(errp, errno, "Cannot open /proc/self/pagemap");
+        return -1;
+    }
+
+    /* Force copy-on-write if necessary.  */
+    qatomic_add((uint8_t *)ptr, 0);
+
+    if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
+        error_setg_errno(errp, errno, "Cannot read pagemap");
+        goto out;
+    }
+    if ((pinfo & (1ull << 63)) == 0) {
+        error_setg(errp, "Page not present");
+        goto out;
+    }
+    ret = ((pinfo & 0x007fffffffffffffull) * pagesize) | (addr & (pagesize - 1));
+
+out:
+    close(fd);
+    return ret;
+}
+
+void hmp_gpa2hpa(Monitor *mon, const QDict *qdict)
+{
+    hwaddr addr = qdict_get_int(qdict, "addr");
+    Error *local_err = NULL;
+    MemoryRegion *mr = NULL;
+    void *ptr;
+    uint64_t physaddr;
+
+    ptr = gpa2hva(&mr, addr, 1, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return;
+    }
+
+    physaddr = vtop(ptr, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    } else {
+        monitor_printf(mon, "Host physical address for 0x%" HWADDR_PRIx
+                       " (%s) is 0x%" PRIx64 "\n",
+                       addr, mr->name, (uint64_t) physaddr);
+    }
+
+    memory_region_unref(mr);
+}
+#endif
index 65d8ff48494be8eaa38df989b63dd0df6f4d84e1..871898ac46b77dab291bd59d155703337376b3e6 100644 (file)
  */
 
 #include "qemu/osdep.h"
+#include "exec/address-spaces.h"
+#include "exec/gdbstub.h"
+#include "exec/ioport.h"
 #include "monitor/hmp.h"
-#include "net/net.h"
-#include "net/eth.h"
-#include "chardev/char.h"
-#include "sysemu/block-backend.h"
-#include "sysemu/runstate.h"
-#include "qemu/config-file.h"
-#include "qemu/option.h"
-#include "qemu/timer.h"
-#include "qemu/sockets.h"
 #include "qemu/help_option.h"
 #include "monitor/monitor-internal.h"
 #include "qapi/error.h"
-#include "qapi/clone-visitor.h"
-#include "qapi/opts-visitor.h"
-#include "qapi/qapi-builtin-visit.h"
-#include "qapi/qapi-commands-block.h"
-#include "qapi/qapi-commands-char.h"
 #include "qapi/qapi-commands-control.h"
-#include "qapi/qapi-commands-machine.h"
-#include "qapi/qapi-commands-migration.h"
 #include "qapi/qapi-commands-misc.h"
-#include "qapi/qapi-commands-net.h"
-#include "qapi/qapi-commands-pci.h"
-#include "qapi/qapi-commands-rocker.h"
-#include "qapi/qapi-commands-run-state.h"
-#include "qapi/qapi-commands-tpm.h"
-#include "qapi/qapi-commands-ui.h"
-#include "qapi/qapi-visit-net.h"
-#include "qapi/qapi-visit-migration.h"
 #include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qerror.h"
-#include "qapi/string-input-visitor.h"
-#include "qapi/string-output-visitor.h"
-#include "qom/object_interfaces.h"
-#include "ui/console.h"
 #include "qemu/cutils.h"
-#include "qemu/error-report.h"
-#include "exec/ramlist.h"
 #include "hw/intc/intc.h"
-#include "hw/rdma/rdma.h"
-#include "migration/snapshot.h"
-#include "migration/misc.h"
+#include "qemu/log.h"
+#include "sysemu/sysemu.h"
 
-#ifdef CONFIG_SPICE
-#include <spice/enums.h>
-#endif
-
-void hmp_handle_error(Monitor *mon, Error *err)
+bool hmp_handle_error(Monitor *mon, Error *err)
 {
     if (err) {
         error_reportf_err(err, "Error: ");
+        return true;
     }
+    return false;
 }
 
 /*
- * Produce a strList from a comma separated list.
- * A NULL or empty input string return NULL.
+ * Split @str at comma.
+ * A null @str defaults to "".
  */
-static strList *strList_from_comma_list(const char *in)
+strList *hmp_split_at_comma(const char *str)
 {
+    char **split = g_strsplit(str ?: "", ",", -1);
     strList *res = NULL;
-    strList **hook = &res;
-
-    while (in && in[0]) {
-        char *comma = strchr(in, ',');
-        *hook = g_new0(strList, 1);
+    strList **tail = &res;
+    int i;
 
-        if (comma) {
-            (*hook)->value = g_strndup(in, comma - in);
-            in = comma + 1; /* skip the , */
-        } else {
-            (*hook)->value = g_strdup(in);
-            in = NULL;
-        }
-        hook = &(*hook)->next;
+    for (i = 0; split[i]; i++) {
+        QAPI_LIST_APPEND(tail, split[i]);
     }
 
+    g_free(split);
     return res;
 }
 
@@ -100,7 +62,7 @@ void hmp_info_name(Monitor *mon, const QDict *qdict)
     NameInfo *info;
 
     info = qmp_query_name(NULL);
-    if (info->has_name) {
+    if (info->name) {
         monitor_printf(mon, "%s\n", info->name);
     }
     qapi_free_NameInfo(info);
@@ -119,2155 +81,365 @@ void hmp_info_version(Monitor *mon, const QDict *qdict)
     qapi_free_VersionInfo(info);
 }
 
-void hmp_info_kvm(Monitor *mon, const QDict *qdict)
+static int hmp_info_pic_foreach(Object *obj, void *opaque)
 {
-    KvmInfo *info;
+    InterruptStatsProvider *intc;
+    InterruptStatsProviderClass *k;
+    Monitor *mon = opaque;
 
-    info = qmp_query_kvm(NULL);
-    monitor_printf(mon, "kvm support: ");
-    if (info->present) {
-        monitor_printf(mon, "%s\n", info->enabled ? "enabled" : "disabled");
-    } else {
-        monitor_printf(mon, "not compiled\n");
+    if (object_dynamic_cast(obj, TYPE_INTERRUPT_STATS_PROVIDER)) {
+        intc = INTERRUPT_STATS_PROVIDER(obj);
+        k = INTERRUPT_STATS_PROVIDER_GET_CLASS(obj);
+        if (k->print_info) {
+            k->print_info(intc, mon);
+        } else {
+            monitor_printf(mon, "Interrupt controller information not available for %s.\n",
+                           object_get_typename(obj));
+        }
     }
 
-    qapi_free_KvmInfo(info);
+    return 0;
 }
 
-void hmp_info_status(Monitor *mon, const QDict *qdict)
+void hmp_info_pic(Monitor *mon, const QDict *qdict)
 {
-    StatusInfo *info;
-
-    info = qmp_query_status(NULL);
-
-    monitor_printf(mon, "VM status: %s%s",
-                   info->running ? "running" : "paused",
-                   info->singlestep ? " (single step mode)" : "");
-
-    if (!info->running && info->status != RUN_STATE_PAUSED) {
-        monitor_printf(mon, " (%s)", RunState_str(info->status));
-    }
-
-    monitor_printf(mon, "\n");
-
-    qapi_free_StatusInfo(info);
+    object_child_foreach_recursive(object_get_root(),
+                                   hmp_info_pic_foreach, mon);
 }
 
-void hmp_info_uuid(Monitor *mon, const QDict *qdict)
+void hmp_quit(Monitor *mon, const QDict *qdict)
 {
-    UuidInfo *info;
-
-    info = qmp_query_uuid(NULL);
-    monitor_printf(mon, "%s\n", info->UUID);
-    qapi_free_UuidInfo(info);
+    monitor_suspend(mon);
+    qmp_quit(NULL);
 }
 
-void hmp_info_chardev(Monitor *mon, const QDict *qdict)
+void hmp_stop(Monitor *mon, const QDict *qdict)
 {
-    ChardevInfoList *char_info, *info;
-
-    char_info = qmp_query_chardev(NULL);
-    for (info = char_info; info; info = info->next) {
-        monitor_printf(mon, "%s: filename=%s\n", info->value->label,
-                                                 info->value->filename);
-    }
-
-    qapi_free_ChardevInfoList(char_info);
+    qmp_stop(NULL);
 }
 
-void hmp_info_mice(Monitor *mon, const QDict *qdict)
+void hmp_sync_profile(Monitor *mon, const QDict *qdict)
 {
-    MouseInfoList *mice_list, *mouse;
+    const char *op = qdict_get_try_str(qdict, "op");
+
+    if (op == NULL) {
+        bool on = qsp_is_enabled();
 
-    mice_list = qmp_query_mice(NULL);
-    if (!mice_list) {
-        monitor_printf(mon, "No mouse devices connected\n");
+        monitor_printf(mon, "sync-profile is %s\n", on ? "on" : "off");
         return;
     }
+    if (!strcmp(op, "on")) {
+        qsp_enable();
+    } else if (!strcmp(op, "off")) {
+        qsp_disable();
+    } else if (!strcmp(op, "reset")) {
+        qsp_reset();
+    } else {
+        Error *err = NULL;
 
-    for (mouse = mice_list; mouse; mouse = mouse->next) {
-        monitor_printf(mon, "%c Mouse #%" PRId64 ": %s%s\n",
-                       mouse->value->current ? '*' : ' ',
-                       mouse->value->index, mouse->value->name,
-                       mouse->value->absolute ? " (absolute)" : "");
+        error_setg(&err, "invalid parameter '%s',"
+                   " expecting 'on', 'off', or 'reset'", op);
+        hmp_handle_error(mon, err);
     }
-
-    qapi_free_MouseInfoList(mice_list);
 }
 
-static char *SocketAddress_to_str(SocketAddress *addr)
+void hmp_exit_preconfig(Monitor *mon, const QDict *qdict)
 {
-    switch (addr->type) {
-    case SOCKET_ADDRESS_TYPE_INET:
-        return g_strdup_printf("tcp:%s:%s",
-                               addr->u.inet.host,
-                               addr->u.inet.port);
-    case SOCKET_ADDRESS_TYPE_UNIX:
-        return g_strdup_printf("unix:%s",
-                               addr->u.q_unix.path);
-    case SOCKET_ADDRESS_TYPE_FD:
-        return g_strdup_printf("fd:%s", addr->u.fd.str);
-    case SOCKET_ADDRESS_TYPE_VSOCK:
-        return g_strdup_printf("tcp:%s:%s",
-                               addr->u.vsock.cid,
-                               addr->u.vsock.port);
-    default:
-        return g_strdup("unknown address type");
-    }
+    Error *err = NULL;
+
+    qmp_x_exit_preconfig(&err);
+    hmp_handle_error(mon, err);
 }
 
-void hmp_info_migrate(Monitor *mon, const QDict *qdict)
+void hmp_cpu(Monitor *mon, const QDict *qdict)
 {
-    MigrationInfo *info;
-
-    info = qmp_query_migrate(NULL);
-
-    migration_global_dump(mon);
-
-    if (info->has_status) {
-        monitor_printf(mon, "Migration status: %s",
-                       MigrationStatus_str(info->status));
-        if (info->status == MIGRATION_STATUS_FAILED &&
-            info->has_error_desc) {
-            monitor_printf(mon, " (%s)\n", info->error_desc);
-        } else {
-            monitor_printf(mon, "\n");
-        }
-
-        monitor_printf(mon, "total time: %" PRIu64 " ms\n",
-                       info->total_time);
-        if (info->has_expected_downtime) {
-            monitor_printf(mon, "expected downtime: %" PRIu64 " ms\n",
-                           info->expected_downtime);
-        }
-        if (info->has_downtime) {
-            monitor_printf(mon, "downtime: %" PRIu64 " ms\n",
-                           info->downtime);
-        }
-        if (info->has_setup_time) {
-            monitor_printf(mon, "setup: %" PRIu64 " ms\n",
-                           info->setup_time);
-        }
-    }
-
-    if (info->has_ram) {
-        monitor_printf(mon, "transferred ram: %" PRIu64 " kbytes\n",
-                       info->ram->transferred >> 10);
-        monitor_printf(mon, "throughput: %0.2f mbps\n",
-                       info->ram->mbps);
-        monitor_printf(mon, "remaining ram: %" PRIu64 " kbytes\n",
-                       info->ram->remaining >> 10);
-        monitor_printf(mon, "total ram: %" PRIu64 " kbytes\n",
-                       info->ram->total >> 10);
-        monitor_printf(mon, "duplicate: %" PRIu64 " pages\n",
-                       info->ram->duplicate);
-        monitor_printf(mon, "skipped: %" PRIu64 " pages\n",
-                       info->ram->skipped);
-        monitor_printf(mon, "normal: %" PRIu64 " pages\n",
-                       info->ram->normal);
-        monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n",
-                       info->ram->normal_bytes >> 10);
-        monitor_printf(mon, "dirty sync count: %" PRIu64 "\n",
-                       info->ram->dirty_sync_count);
-        monitor_printf(mon, "page size: %" PRIu64 " kbytes\n",
-                       info->ram->page_size >> 10);
-        monitor_printf(mon, "multifd bytes: %" PRIu64 " kbytes\n",
-                       info->ram->multifd_bytes >> 10);
-        monitor_printf(mon, "pages-per-second: %" PRIu64 "\n",
-                       info->ram->pages_per_second);
-
-        if (info->ram->dirty_pages_rate) {
-            monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n",
-                           info->ram->dirty_pages_rate);
-        }
-        if (info->ram->postcopy_requests) {
-            monitor_printf(mon, "postcopy request count: %" PRIu64 "\n",
-                           info->ram->postcopy_requests);
-        }
-    }
-
-    if (info->has_disk) {
-        monitor_printf(mon, "transferred disk: %" PRIu64 " kbytes\n",
-                       info->disk->transferred >> 10);
-        monitor_printf(mon, "remaining disk: %" PRIu64 " kbytes\n",
-                       info->disk->remaining >> 10);
-        monitor_printf(mon, "total disk: %" PRIu64 " kbytes\n",
-                       info->disk->total >> 10);
-    }
-
-    if (info->has_xbzrle_cache) {
-        monitor_printf(mon, "cache size: %" PRIu64 " bytes\n",
-                       info->xbzrle_cache->cache_size);
-        monitor_printf(mon, "xbzrle transferred: %" PRIu64 " kbytes\n",
-                       info->xbzrle_cache->bytes >> 10);
-        monitor_printf(mon, "xbzrle pages: %" PRIu64 " pages\n",
-                       info->xbzrle_cache->pages);
-        monitor_printf(mon, "xbzrle cache miss: %" PRIu64 " pages\n",
-                       info->xbzrle_cache->cache_miss);
-        monitor_printf(mon, "xbzrle cache miss rate: %0.2f\n",
-                       info->xbzrle_cache->cache_miss_rate);
-        monitor_printf(mon, "xbzrle encoding rate: %0.2f\n",
-                       info->xbzrle_cache->encoding_rate);
-        monitor_printf(mon, "xbzrle overflow: %" PRIu64 "\n",
-                       info->xbzrle_cache->overflow);
-    }
-
-    if (info->has_compression) {
-        monitor_printf(mon, "compression pages: %" PRIu64 " pages\n",
-                       info->compression->pages);
-        monitor_printf(mon, "compression busy: %" PRIu64 "\n",
-                       info->compression->busy);
-        monitor_printf(mon, "compression busy rate: %0.2f\n",
-                       info->compression->busy_rate);
-        monitor_printf(mon, "compressed size: %" PRIu64 " kbytes\n",
-                       info->compression->compressed_size >> 10);
-        monitor_printf(mon, "compression rate: %0.2f\n",
-                       info->compression->compression_rate);
-    }
-
-    if (info->has_cpu_throttle_percentage) {
-        monitor_printf(mon, "cpu throttle percentage: %" PRIu64 "\n",
-                       info->cpu_throttle_percentage);
-    }
-
-    if (info->has_postcopy_blocktime) {
-        monitor_printf(mon, "postcopy blocktime: %u\n",
-                       info->postcopy_blocktime);
-    }
-
-    if (info->has_postcopy_vcpu_blocktime) {
-        Visitor *v;
-        char *str;
-        v = string_output_visitor_new(false, &str);
-        visit_type_uint32List(v, NULL, &info->postcopy_vcpu_blocktime,
-                              &error_abort);
-        visit_complete(v, &str);
-        monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
-        g_free(str);
-        visit_free(v);
-    }
-    if (info->has_socket_address) {
-        SocketAddressList *addr;
-
-        monitor_printf(mon, "socket address: [\n");
+    int64_t cpu_index;
 
-        for (addr = info->socket_address; addr; addr = addr->next) {
-            char *s = SocketAddress_to_str(addr->value);
-            monitor_printf(mon, "\t%s\n", s);
-            g_free(s);
-        }
-        monitor_printf(mon, "]\n");
+    /* XXX: drop the monitor_set_cpu() usage when all HMP commands that
+            use it are converted to the QAPI */
+    cpu_index = qdict_get_int(qdict, "index");
+    if (monitor_set_cpu(mon, cpu_index) < 0) {
+        monitor_printf(mon, "invalid CPU index\n");
     }
+}
 
-    if (info->has_vfio) {
-        monitor_printf(mon, "vfio device transferred: %" PRIu64 " kbytes\n",
-                       info->vfio->transferred >> 10);
-    }
+void hmp_cont(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
 
-    qapi_free_MigrationInfo(info);
+    qmp_cont(&err);
+    hmp_handle_error(mon, err);
 }
 
-void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict)
+void hmp_change(Monitor *mon, const QDict *qdict)
 {
-    MigrationCapabilityStatusList *caps, *cap;
-
-    caps = qmp_query_migrate_capabilities(NULL);
+    const char *device = qdict_get_str(qdict, "device");
+    const char *target = qdict_get_str(qdict, "target");
+    const char *arg = qdict_get_try_str(qdict, "arg");
+    const char *read_only = qdict_get_try_str(qdict, "read-only-mode");
+    bool force = qdict_get_try_bool(qdict, "force", false);
+    Error *err = NULL;
 
-    if (caps) {
-        for (cap = caps; cap; cap = cap->next) {
-            monitor_printf(mon, "%s: %s\n",
-                           MigrationCapability_str(cap->value->capability),
-                           cap->value->state ? "on" : "off");
-        }
+#ifdef CONFIG_VNC
+    if (strcmp(device, "vnc") == 0) {
+        hmp_change_vnc(mon, device, target, arg, read_only, force, &err);
+    } else
+#endif
+    {
+        hmp_change_medium(mon, device, target, arg, read_only, force, &err);
     }
 
-    qapi_free_MigrationCapabilityStatusList(caps);
+    hmp_handle_error(mon, err);
 }
 
-void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
+#ifdef CONFIG_POSIX
+void hmp_getfd(Monitor *mon, const QDict *qdict)
 {
-    MigrationParameters *params;
-
-    params = qmp_query_migrate_parameters(NULL);
-
-    if (params) {
-        monitor_printf(mon, "%s: %" PRIu64 " ms\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_ANNOUNCE_INITIAL),
-            params->announce_initial);
-        monitor_printf(mon, "%s: %" PRIu64 " ms\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_ANNOUNCE_MAX),
-            params->announce_max);
-        monitor_printf(mon, "%s: %" PRIu64 "\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_ANNOUNCE_ROUNDS),
-            params->announce_rounds);
-        monitor_printf(mon, "%s: %" PRIu64 " ms\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_ANNOUNCE_STEP),
-            params->announce_step);
-        assert(params->has_compress_level);
-        monitor_printf(mon, "%s: %u\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_LEVEL),
-            params->compress_level);
-        assert(params->has_compress_threads);
-        monitor_printf(mon, "%s: %u\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_THREADS),
-            params->compress_threads);
-        assert(params->has_compress_wait_thread);
-        monitor_printf(mon, "%s: %s\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_WAIT_THREAD),
-            params->compress_wait_thread ? "on" : "off");
-        assert(params->has_decompress_threads);
-        monitor_printf(mon, "%s: %u\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_DECOMPRESS_THREADS),
-            params->decompress_threads);
-        assert(params->has_throttle_trigger_threshold);
-        monitor_printf(mon, "%s: %u\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD),
-            params->throttle_trigger_threshold);
-        assert(params->has_cpu_throttle_initial);
-        monitor_printf(mon, "%s: %u\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL),
-            params->cpu_throttle_initial);
-        assert(params->has_cpu_throttle_increment);
-        monitor_printf(mon, "%s: %u\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT),
-            params->cpu_throttle_increment);
-        assert(params->has_cpu_throttle_tailslow);
-        monitor_printf(mon, "%s: %s\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW),
-            params->cpu_throttle_tailslow ? "on" : "off");
-        assert(params->has_max_cpu_throttle);
-        monitor_printf(mon, "%s: %u\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_MAX_CPU_THROTTLE),
-            params->max_cpu_throttle);
-        assert(params->has_tls_creds);
-        monitor_printf(mon, "%s: '%s'\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_TLS_CREDS),
-            params->tls_creds);
-        assert(params->has_tls_hostname);
-        monitor_printf(mon, "%s: '%s'\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_TLS_HOSTNAME),
-            params->tls_hostname);
-        assert(params->has_max_bandwidth);
-        monitor_printf(mon, "%s: %" PRIu64 " bytes/second\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_MAX_BANDWIDTH),
-            params->max_bandwidth);
-        assert(params->has_downtime_limit);
-        monitor_printf(mon, "%s: %" PRIu64 " ms\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_DOWNTIME_LIMIT),
-            params->downtime_limit);
-        assert(params->has_x_checkpoint_delay);
-        monitor_printf(mon, "%s: %u ms\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_X_CHECKPOINT_DELAY),
-            params->x_checkpoint_delay);
-        assert(params->has_block_incremental);
-        monitor_printf(mon, "%s: %s\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_BLOCK_INCREMENTAL),
-            params->block_incremental ? "on" : "off");
-        monitor_printf(mon, "%s: %u\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_CHANNELS),
-            params->multifd_channels);
-        monitor_printf(mon, "%s: %s\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION),
-            MultiFDCompression_str(params->multifd_compression));
-        monitor_printf(mon, "%s: %" PRIu64 " bytes\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE),
-            params->xbzrle_cache_size);
-        monitor_printf(mon, "%s: %" PRIu64 "\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_MAX_POSTCOPY_BANDWIDTH),
-            params->max_postcopy_bandwidth);
-        monitor_printf(mon, "%s: '%s'\n",
-            MigrationParameter_str(MIGRATION_PARAMETER_TLS_AUTHZ),
-            params->tls_authz);
-
-        if (params->has_block_bitmap_mapping) {
-            const BitmapMigrationNodeAliasList *bmnal;
-
-            monitor_printf(mon, "%s:\n",
-                           MigrationParameter_str(
-                               MIGRATION_PARAMETER_BLOCK_BITMAP_MAPPING));
-
-            for (bmnal = params->block_bitmap_mapping;
-                 bmnal;
-                 bmnal = bmnal->next)
-            {
-                const BitmapMigrationNodeAlias *bmna = bmnal->value;
-                const BitmapMigrationBitmapAliasList *bmbal;
-
-                monitor_printf(mon, "  '%s' -> '%s'\n",
-                               bmna->node_name, bmna->alias);
-
-                for (bmbal = bmna->bitmaps; bmbal; bmbal = bmbal->next) {
-                    const BitmapMigrationBitmapAlias *bmba = bmbal->value;
-
-                    monitor_printf(mon, "    '%s' -> '%s'\n",
-                                   bmba->name, bmba->alias);
-                }
-            }
-        }
-    }
+    const char *fdname = qdict_get_str(qdict, "fdname");
+    Error *err = NULL;
 
-    qapi_free_MigrationParameters(params);
+    qmp_getfd(fdname, &err);
+    hmp_handle_error(mon, err);
 }
+#endif
 
-void hmp_info_migrate_cache_size(Monitor *mon, const QDict *qdict)
+void hmp_closefd(Monitor *mon, const QDict *qdict)
 {
-    monitor_printf(mon, "xbzrel cache size: %" PRId64 " kbytes\n",
-                   qmp_query_migrate_cache_size(NULL) >> 10);
-}
-
+    const char *fdname = qdict_get_str(qdict, "fdname");
+    Error *err = NULL;
 
-#ifdef CONFIG_VNC
-/* Helper for hmp_info_vnc_clients, _servers */
-static void hmp_info_VncBasicInfo(Monitor *mon, VncBasicInfo *info,
-                                  const char *name)
-{
-    monitor_printf(mon, "  %s: %s:%s (%s%s)\n",
-                   name,
-                   info->host,
-                   info->service,
-                   NetworkAddressFamily_str(info->family),
-                   info->websocket ? " (Websocket)" : "");
+    qmp_closefd(fdname, &err);
+    hmp_handle_error(mon, err);
 }
 
-/* Helper displaying and auth and crypt info */
-static void hmp_info_vnc_authcrypt(Monitor *mon, const char *indent,
-                                   VncPrimaryAuth auth,
-                                   VncVencryptSubAuth *vencrypt)
+void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
 {
-    monitor_printf(mon, "%sAuth: %s (Sub: %s)\n", indent,
-                   VncPrimaryAuth_str(auth),
-                   vencrypt ? VncVencryptSubAuth_str(*vencrypt) : "none");
-}
+    IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
+    IOThreadInfoList *info;
+    IOThreadInfo *value;
 
-static void hmp_info_vnc_clients(Monitor *mon, VncClientInfoList *client)
-{
-    while (client) {
-        VncClientInfo *cinfo = client->value;
-
-        hmp_info_VncBasicInfo(mon, qapi_VncClientInfo_base(cinfo), "Client");
-        monitor_printf(mon, "    x509_dname: %s\n",
-                       cinfo->has_x509_dname ?
-                       cinfo->x509_dname : "none");
-        monitor_printf(mon, "    sasl_username: %s\n",
-                       cinfo->has_sasl_username ?
-                       cinfo->sasl_username : "none");
-
-        client = client->next;
+    for (info = info_list; info; info = info->next) {
+        value = info->value;
+        monitor_printf(mon, "%s:\n", value->id);
+        monitor_printf(mon, "  thread_id=%" PRId64 "\n", value->thread_id);
+        monitor_printf(mon, "  poll-max-ns=%" PRId64 "\n", value->poll_max_ns);
+        monitor_printf(mon, "  poll-grow=%" PRId64 "\n", value->poll_grow);
+        monitor_printf(mon, "  poll-shrink=%" PRId64 "\n", value->poll_shrink);
+        monitor_printf(mon, "  aio-max-batch=%" PRId64 "\n",
+                       value->aio_max_batch);
     }
+
+    qapi_free_IOThreadInfoList(info_list);
 }
 
-static void hmp_info_vnc_servers(Monitor *mon, VncServerInfo2List *server)
+void hmp_help(Monitor *mon, const QDict *qdict)
 {
-    while (server) {
-        VncServerInfo2 *sinfo = server->value;
-        hmp_info_VncBasicInfo(mon, qapi_VncServerInfo2_base(sinfo), "Server");
-        hmp_info_vnc_authcrypt(mon, "    ", sinfo->auth,
-                               sinfo->has_vencrypt ? &sinfo->vencrypt : NULL);
-        server = server->next;
-    }
+    hmp_help_cmd(mon, qdict_get_try_str(qdict, "name"));
 }
 
-void hmp_info_vnc(Monitor *mon, const QDict *qdict)
+void hmp_info_help(Monitor *mon, const QDict *qdict)
 {
-    VncInfo2List *info2l, *info2l_head;
-    Error *err = NULL;
-
-    info2l = qmp_query_vnc_servers(&err);
-    info2l_head = info2l;
-    if (err) {
-        hmp_handle_error(mon, err);
-        return;
-    }
-    if (!info2l) {
-        monitor_printf(mon, "None\n");
-        return;
-    }
-
-    while (info2l) {
-        VncInfo2 *info = info2l->value;
-        monitor_printf(mon, "%s:\n", info->id);
-        hmp_info_vnc_servers(mon, info->server);
-        hmp_info_vnc_clients(mon, info->clients);
-        if (!info->server) {
-            /* The server entry displays its auth, we only
-             * need to display in the case of 'reverse' connections
-             * where there's no server.
-             */
-            hmp_info_vnc_authcrypt(mon, "  ", info->auth,
-                               info->has_vencrypt ? &info->vencrypt : NULL);
-        }
-        if (info->has_display) {
-            monitor_printf(mon, "  Display: %s\n", info->display);
-        }
-        info2l = info2l->next;
-    }
-
-    qapi_free_VncInfo2List(info2l_head);
-
+    hmp_help_cmd(mon, "info");
 }
-#endif
 
-#ifdef CONFIG_SPICE
-void hmp_info_spice(Monitor *mon, const QDict *qdict)
+void hmp_info_sync_profile(Monitor *mon, const QDict *qdict)
 {
-    SpiceChannelList *chan;
-    SpiceInfo *info;
-    const char *channel_name;
-    const char * const channel_names[] = {
-        [SPICE_CHANNEL_MAIN] = "main",
-        [SPICE_CHANNEL_DISPLAY] = "display",
-        [SPICE_CHANNEL_INPUTS] = "inputs",
-        [SPICE_CHANNEL_CURSOR] = "cursor",
-        [SPICE_CHANNEL_PLAYBACK] = "playback",
-        [SPICE_CHANNEL_RECORD] = "record",
-        [SPICE_CHANNEL_TUNNEL] = "tunnel",
-        [SPICE_CHANNEL_SMARTCARD] = "smartcard",
-        [SPICE_CHANNEL_USBREDIR] = "usbredir",
-        [SPICE_CHANNEL_PORT] = "port",
-#if 0
-        /* minimum spice-protocol is 0.12.3, webdav was added in 0.12.7,
-         * no easy way to #ifdef (SPICE_CHANNEL_* is a enum).  Disable
-         * as quick fix for build failures with older versions. */
-        [SPICE_CHANNEL_WEBDAV] = "webdav",
-#endif
-    };
+    int64_t max = qdict_get_try_int(qdict, "max", 10);
+    bool mean = qdict_get_try_bool(qdict, "mean", false);
+    bool coalesce = !qdict_get_try_bool(qdict, "no_coalesce", false);
+    enum QSPSortBy sort_by;
 
-    info = qmp_query_spice(NULL);
+    sort_by = mean ? QSP_SORT_BY_AVG_WAIT_TIME : QSP_SORT_BY_TOTAL_WAIT_TIME;
+    qsp_report(max, sort_by, coalesce);
+}
 
-    if (!info->enabled) {
-        monitor_printf(mon, "Server: disabled\n");
-        goto out;
-    }
+void hmp_info_history(Monitor *mon, const QDict *qdict)
+{
+    MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
+    int i;
+    const char *str;
 
-    monitor_printf(mon, "Server:\n");
-    if (info->has_port) {
-        monitor_printf(mon, "     address: %s:%" PRId64 "\n",
-                       info->host, info->port);
-    }
-    if (info->has_tls_port) {
-        monitor_printf(mon, "     address: %s:%" PRId64 " [tls]\n",
-                       info->host, info->tls_port);
+    if (!hmp_mon->rs) {
+        return;
     }
-    monitor_printf(mon, "    migrated: %s\n",
-                   info->migrated ? "true" : "false");
-    monitor_printf(mon, "        auth: %s\n", info->auth);
-    monitor_printf(mon, "    compiled: %s\n", info->compiled_version);
-    monitor_printf(mon, "  mouse-mode: %s\n",
-                   SpiceQueryMouseMode_str(info->mouse_mode));
-
-    if (!info->has_channels || info->channels == NULL) {
-        monitor_printf(mon, "Channels: none\n");
-    } else {
-        for (chan = info->channels; chan; chan = chan->next) {
-            monitor_printf(mon, "Channel:\n");
-            monitor_printf(mon, "     address: %s:%s%s\n",
-                           chan->value->host, chan->value->port,
-                           chan->value->tls ? " [tls]" : "");
-            monitor_printf(mon, "     session: %" PRId64 "\n",
-                           chan->value->connection_id);
-            monitor_printf(mon, "     channel: %" PRId64 ":%" PRId64 "\n",
-                           chan->value->channel_type, chan->value->channel_id);
-
-            channel_name = "unknown";
-            if (chan->value->channel_type > 0 &&
-                chan->value->channel_type < ARRAY_SIZE(channel_names) &&
-                channel_names[chan->value->channel_type]) {
-                channel_name = channel_names[chan->value->channel_type];
-            }
-
-            monitor_printf(mon, "     channel name: %s\n", channel_name);
+    i = 0;
+    for(;;) {
+        str = readline_get_history(hmp_mon->rs, i);
+        if (!str) {
+            break;
         }
+        monitor_printf(mon, "%d: '%s'\n", i, str);
+        i++;
     }
-
-out:
-    qapi_free_SpiceInfo(info);
 }
-#endif
 
-void hmp_info_balloon(Monitor *mon, const QDict *qdict)
+void hmp_logfile(Monitor *mon, const QDict *qdict)
 {
-    BalloonInfo *info;
     Error *err = NULL;
 
-    info = qmp_query_balloon(&err);
-    if (err) {
-        hmp_handle_error(mon, err);
-        return;
+    if (!qemu_set_log_filename(qdict_get_str(qdict, "filename"), &err)) {
+        error_report_err(err);
     }
-
-    monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
-
-    qapi_free_BalloonInfo(info);
 }
 
-static void hmp_info_pci_device(Monitor *mon, const PciDeviceInfo *dev)
+void hmp_log(Monitor *mon, const QDict *qdict)
 {
-    PciMemoryRegionList *region;
-
-    monitor_printf(mon, "  Bus %2" PRId64 ", ", dev->bus);
-    monitor_printf(mon, "device %3" PRId64 ", function %" PRId64 ":\n",
-                   dev->slot, dev->function);
-    monitor_printf(mon, "    ");
+    int mask;
+    const char *items = qdict_get_str(qdict, "items");
+    Error *err = NULL;
 
-    if (dev->class_info->has_desc) {
-        monitor_printf(mon, "%s", dev->class_info->desc);
+    if (!strcmp(items, "none")) {
+        mask = 0;
     } else {
-        monitor_printf(mon, "Class %04" PRId64, dev->class_info->q_class);
+        mask = qemu_str_to_log_mask(items);
+        if (!mask) {
+            hmp_help_cmd(mon, "log");
+            return;
+        }
     }
 
-    monitor_printf(mon, ": PCI device %04" PRIx64 ":%04" PRIx64 "\n",
-                   dev->id->vendor, dev->id->device);
-    if (dev->id->has_subsystem_vendor && dev->id->has_subsystem) {
-        monitor_printf(mon, "      PCI subsystem %04" PRIx64 ":%04" PRIx64 "\n",
-                       dev->id->subsystem_vendor, dev->id->subsystem);
+    if (!qemu_set_log(mask, &err)) {
+        error_report_err(err);
     }
+}
 
-    if (dev->has_irq) {
-        monitor_printf(mon, "      IRQ %" PRId64 ", pin %c\n",
-                       dev->irq, (char)('A' + dev->irq_pin - 1));
+void hmp_gdbserver(Monitor *mon, const QDict *qdict)
+{
+    const char *device = qdict_get_try_str(qdict, "device");
+    if (!device) {
+        device = "tcp::" DEFAULT_GDBSTUB_PORT;
     }
 
-    if (dev->has_pci_bridge) {
-        monitor_printf(mon, "      BUS %" PRId64 ".\n",
-                       dev->pci_bridge->bus->number);
-        monitor_printf(mon, "      secondary bus %" PRId64 ".\n",
-                       dev->pci_bridge->bus->secondary);
-        monitor_printf(mon, "      subordinate bus %" PRId64 ".\n",
-                       dev->pci_bridge->bus->subordinate);
-
-        monitor_printf(mon, "      IO range [0x%04"PRIx64", 0x%04"PRIx64"]\n",
-                       dev->pci_bridge->bus->io_range->base,
-                       dev->pci_bridge->bus->io_range->limit);
-
-        monitor_printf(mon,
-                       "      memory range [0x%08"PRIx64", 0x%08"PRIx64"]\n",
-                       dev->pci_bridge->bus->memory_range->base,
-                       dev->pci_bridge->bus->memory_range->limit);
-
-        monitor_printf(mon, "      prefetchable memory range "
-                       "[0x%08"PRIx64", 0x%08"PRIx64"]\n",
-                       dev->pci_bridge->bus->prefetchable_range->base,
-                       dev->pci_bridge->bus->prefetchable_range->limit);
+    if (gdbserver_start(device) < 0) {
+        monitor_printf(mon, "Could not open gdbserver on device '%s'\n",
+                       device);
+    } else if (strcmp(device, "none") == 0) {
+        monitor_printf(mon, "Disabled gdbserver\n");
+    } else {
+        monitor_printf(mon, "Waiting for gdb connection on device '%s'\n",
+                       device);
     }
+}
 
-    for (region = dev->regions; region; region = region->next) {
-        uint64_t addr, size;
-
-        addr = region->value->address;
-        size = region->value->size;
-
-        monitor_printf(mon, "      BAR%" PRId64 ": ", region->value->bar);
+void hmp_print(Monitor *mon, const QDict *qdict)
+{
+    int format = qdict_get_int(qdict, "format");
+    hwaddr val = qdict_get_int(qdict, "val");
 
-        if (!strcmp(region->value->type, "io")) {
-            monitor_printf(mon, "I/O at 0x%04" PRIx64
-                                " [0x%04" PRIx64 "].\n",
-                           addr, addr + size - 1);
-        } else {
-            monitor_printf(mon, "%d bit%s memory at 0x%08" PRIx64
-                               " [0x%08" PRIx64 "].\n",
-                           region->value->mem_type_64 ? 64 : 32,
-                           region->value->prefetch ? " prefetchable" : "",
-                           addr, addr + size - 1);
-        }
+    switch(format) {
+    case 'o':
+        monitor_printf(mon, "%#" HWADDR_PRIo, val);
+        break;
+    case 'x':
+        monitor_printf(mon, "%#" HWADDR_PRIx, val);
+        break;
+    case 'u':
+        monitor_printf(mon, "%" HWADDR_PRIu, val);
+        break;
+    default:
+    case 'd':
+        monitor_printf(mon, "%" HWADDR_PRId, val);
+        break;
+    case 'c':
+        monitor_printc(mon, val);
+        break;
     }
+    monitor_printf(mon, "\n");
+}
 
-    monitor_printf(mon, "      id \"%s\"\n", dev->qdev_id);
+void hmp_sum(Monitor *mon, const QDict *qdict)
+{
+    uint32_t addr;
+    uint16_t sum;
+    uint32_t start = qdict_get_int(qdict, "start");
+    uint32_t size = qdict_get_int(qdict, "size");
 
-    if (dev->has_pci_bridge) {
-        if (dev->pci_bridge->has_devices) {
-            PciDeviceInfoList *cdev;
-            for (cdev = dev->pci_bridge->devices; cdev; cdev = cdev->next) {
-                hmp_info_pci_device(mon, cdev->value);
-            }
-        }
+    sum = 0;
+    for(addr = start; addr < (start + size); addr++) {
+        uint8_t val = address_space_ldub(&address_space_memory, addr,
+                                         MEMTXATTRS_UNSPECIFIED, NULL);
+        /* BSD sum algorithm ('sum' Unix command) */
+        sum = (sum >> 1) | (sum << 15);
+        sum += val;
     }
+    monitor_printf(mon, "%05d\n", sum);
 }
 
-static int hmp_info_irq_foreach(Object *obj, void *opaque)
+void hmp_ioport_read(Monitor *mon, const QDict *qdict)
 {
-    InterruptStatsProvider *intc;
-    InterruptStatsProviderClass *k;
-    Monitor *mon = opaque;
+    int size = qdict_get_int(qdict, "size");
+    int addr = qdict_get_int(qdict, "addr");
+    int has_index = qdict_haskey(qdict, "index");
+    uint32_t val;
+    int suffix;
 
-    if (object_dynamic_cast(obj, TYPE_INTERRUPT_STATS_PROVIDER)) {
-        intc = INTERRUPT_STATS_PROVIDER(obj);
-        k = INTERRUPT_STATS_PROVIDER_GET_CLASS(obj);
-        uint64_t *irq_counts;
-        unsigned int nb_irqs, i;
-        if (k->get_statistics &&
-            k->get_statistics(intc, &irq_counts, &nb_irqs)) {
-            if (nb_irqs > 0) {
-                monitor_printf(mon, "IRQ statistics for %s:\n",
-                               object_get_typename(obj));
-                for (i = 0; i < nb_irqs; i++) {
-                    if (irq_counts[i] > 0) {
-                        monitor_printf(mon, "%2d: %" PRId64 "\n", i,
-                                       irq_counts[i]);
-                    }
-                }
-            }
-        } else {
-            monitor_printf(mon, "IRQ statistics not available for %s.\n",
-                           object_get_typename(obj));
-        }
+    if (has_index) {
+        int index = qdict_get_int(qdict, "index");
+        cpu_outb(addr & IOPORTS_MASK, index & 0xff);
+        addr++;
     }
+    addr &= 0xffff;
 
-    return 0;
+    switch(size) {
+    default:
+    case 1:
+        val = cpu_inb(addr);
+        suffix = 'b';
+        break;
+    case 2:
+        val = cpu_inw(addr);
+        suffix = 'w';
+        break;
+    case 4:
+        val = cpu_inl(addr);
+        suffix = 'l';
+        break;
+    }
+    monitor_printf(mon, "port%c[0x%04x] = 0x%0*x\n",
+                   suffix, addr, size * 2, val);
 }
 
-void hmp_info_irq(Monitor *mon, const QDict *qdict)
+void hmp_ioport_write(Monitor *mon, const QDict *qdict)
 {
-    object_child_foreach_recursive(object_get_root(),
-                                   hmp_info_irq_foreach, mon);
-}
+    int size = qdict_get_int(qdict, "size");
+    int addr = qdict_get_int(qdict, "addr");
+    int val = qdict_get_int(qdict, "val");
 
-static int hmp_info_pic_foreach(Object *obj, void *opaque)
-{
-    InterruptStatsProvider *intc;
-    InterruptStatsProviderClass *k;
-    Monitor *mon = opaque;
+    addr &= IOPORTS_MASK;
 
-    if (object_dynamic_cast(obj, TYPE_INTERRUPT_STATS_PROVIDER)) {
-        intc = INTERRUPT_STATS_PROVIDER(obj);
-        k = INTERRUPT_STATS_PROVIDER_GET_CLASS(obj);
-        if (k->print_info) {
-            k->print_info(intc, mon);
-        } else {
-            monitor_printf(mon, "Interrupt controller information not available for %s.\n",
-                           object_get_typename(obj));
-        }
+    switch (size) {
+    default:
+    case 1:
+        cpu_outb(addr, val);
+        break;
+    case 2:
+        cpu_outw(addr, val);
+        break;
+    case 4:
+        cpu_outl(addr, val);
+        break;
     }
-
-    return 0;
 }
 
-void hmp_info_pic(Monitor *mon, const QDict *qdict)
+void hmp_boot_set(Monitor *mon, const QDict *qdict)
 {
-    object_child_foreach_recursive(object_get_root(),
-                                   hmp_info_pic_foreach, mon);
+    Error *local_err = NULL;
+    const char *bootdevice = qdict_get_str(qdict, "bootdevice");
+
+    qemu_boot_set(bootdevice, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    } else {
+        monitor_printf(mon, "boot device list now set to %s\n", bootdevice);
+    }
 }
 
-static int hmp_info_rdma_foreach(Object *obj, void *opaque)
+void hmp_info_mtree(Monitor *mon, const QDict *qdict)
 {
-    RdmaProvider *rdma;
-    RdmaProviderClass *k;
-    Monitor *mon = opaque;
+    bool flatview = qdict_get_try_bool(qdict, "flatview", false);
+    bool dispatch_tree = qdict_get_try_bool(qdict, "dispatch_tree", false);
+    bool owner = qdict_get_try_bool(qdict, "owner", false);
+    bool disabled = qdict_get_try_bool(qdict, "disabled", false);
 
-    if (object_dynamic_cast(obj, INTERFACE_RDMA_PROVIDER)) {
-        rdma = RDMA_PROVIDER(obj);
-        k = RDMA_PROVIDER_GET_CLASS(obj);
-        if (k->print_statistics) {
-            k->print_statistics(mon, rdma);
-        } else {
-            monitor_printf(mon, "RDMA statistics not available for %s.\n",
-                           object_get_typename(obj));
-        }
-    }
-
-    return 0;
-}
-
-void hmp_info_rdma(Monitor *mon, const QDict *qdict)
-{
-    object_child_foreach_recursive(object_get_root(),
-                                   hmp_info_rdma_foreach, mon);
-}
-
-void hmp_info_pci(Monitor *mon, const QDict *qdict)
-{
-    PciInfoList *info_list, *info;
-    Error *err = NULL;
-
-    info_list = qmp_query_pci(&err);
-    if (err) {
-        monitor_printf(mon, "PCI devices not supported\n");
-        error_free(err);
-        return;
-    }
-
-    for (info = info_list; info; info = info->next) {
-        PciDeviceInfoList *dev;
-
-        for (dev = info->value->devices; dev; dev = dev->next) {
-            hmp_info_pci_device(mon, dev->value);
-        }
-    }
-
-    qapi_free_PciInfoList(info_list);
-}
-
-void hmp_info_tpm(Monitor *mon, const QDict *qdict)
-{
-    TPMInfoList *info_list, *info;
-    Error *err = NULL;
-    unsigned int c = 0;
-    TPMPassthroughOptions *tpo;
-    TPMEmulatorOptions *teo;
-
-    info_list = qmp_query_tpm(&err);
-    if (err) {
-        monitor_printf(mon, "TPM device not supported\n");
-        error_free(err);
-        return;
-    }
-
-    if (info_list) {
-        monitor_printf(mon, "TPM device:\n");
-    }
-
-    for (info = info_list; info; info = info->next) {
-        TPMInfo *ti = info->value;
-        monitor_printf(mon, " tpm%d: model=%s\n",
-                       c, TpmModel_str(ti->model));
-
-        monitor_printf(mon, "  \\ %s: type=%s",
-                       ti->id, TpmTypeOptionsKind_str(ti->options->type));
-
-        switch (ti->options->type) {
-        case TPM_TYPE_OPTIONS_KIND_PASSTHROUGH:
-            tpo = ti->options->u.passthrough.data;
-            monitor_printf(mon, "%s%s%s%s",
-                           tpo->has_path ? ",path=" : "",
-                           tpo->has_path ? tpo->path : "",
-                           tpo->has_cancel_path ? ",cancel-path=" : "",
-                           tpo->has_cancel_path ? tpo->cancel_path : "");
-            break;
-        case TPM_TYPE_OPTIONS_KIND_EMULATOR:
-            teo = ti->options->u.emulator.data;
-            monitor_printf(mon, ",chardev=%s", teo->chardev);
-            break;
-        case TPM_TYPE_OPTIONS_KIND__MAX:
-            break;
-        }
-        monitor_printf(mon, "\n");
-        c++;
-    }
-    qapi_free_TPMInfoList(info_list);
-}
-
-void hmp_quit(Monitor *mon, const QDict *qdict)
-{
-    monitor_suspend(mon);
-    qmp_quit(NULL);
-}
-
-void hmp_stop(Monitor *mon, const QDict *qdict)
-{
-    qmp_stop(NULL);
-}
-
-void hmp_sync_profile(Monitor *mon, const QDict *qdict)
-{
-    const char *op = qdict_get_try_str(qdict, "op");
-
-    if (op == NULL) {
-        bool on = qsp_is_enabled();
-
-        monitor_printf(mon, "sync-profile is %s\n", on ? "on" : "off");
-        return;
-    }
-    if (!strcmp(op, "on")) {
-        qsp_enable();
-    } else if (!strcmp(op, "off")) {
-        qsp_disable();
-    } else if (!strcmp(op, "reset")) {
-        qsp_reset();
-    } else {
-        Error *err = NULL;
-
-        error_setg(&err, QERR_INVALID_PARAMETER, op);
-        hmp_handle_error(mon, err);
-    }
-}
-
-void hmp_system_reset(Monitor *mon, const QDict *qdict)
-{
-    qmp_system_reset(NULL);
-}
-
-void hmp_system_powerdown(Monitor *mon, const QDict *qdict)
-{
-    qmp_system_powerdown(NULL);
-}
-
-void hmp_exit_preconfig(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-
-    qmp_x_exit_preconfig(&err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_cpu(Monitor *mon, const QDict *qdict)
-{
-    int64_t cpu_index;
-
-    /* XXX: drop the monitor_set_cpu() usage when all HMP commands that
-            use it are converted to the QAPI */
-    cpu_index = qdict_get_int(qdict, "index");
-    if (monitor_set_cpu(mon, cpu_index) < 0) {
-        monitor_printf(mon, "invalid CPU index\n");
-    }
-}
-
-void hmp_memsave(Monitor *mon, const QDict *qdict)
-{
-    uint32_t size = qdict_get_int(qdict, "size");
-    const char *filename = qdict_get_str(qdict, "filename");
-    uint64_t addr = qdict_get_int(qdict, "val");
-    Error *err = NULL;
-    int cpu_index = monitor_get_cpu_index(mon);
-
-    if (cpu_index < 0) {
-        monitor_printf(mon, "No CPU available\n");
-        return;
-    }
-
-    qmp_memsave(addr, size, filename, true, cpu_index, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_pmemsave(Monitor *mon, const QDict *qdict)
-{
-    uint32_t size = qdict_get_int(qdict, "size");
-    const char *filename = qdict_get_str(qdict, "filename");
-    uint64_t addr = qdict_get_int(qdict, "val");
-    Error *err = NULL;
-
-    qmp_pmemsave(addr, size, filename, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_ringbuf_write(Monitor *mon, const QDict *qdict)
-{
-    const char *chardev = qdict_get_str(qdict, "device");
-    const char *data = qdict_get_str(qdict, "data");
-    Error *err = NULL;
-
-    qmp_ringbuf_write(chardev, data, false, 0, &err);
-
-    hmp_handle_error(mon, err);
-}
-
-void hmp_ringbuf_read(Monitor *mon, const QDict *qdict)
-{
-    uint32_t size = qdict_get_int(qdict, "size");
-    const char *chardev = qdict_get_str(qdict, "device");
-    char *data;
-    Error *err = NULL;
-    int i;
-
-    data = qmp_ringbuf_read(chardev, size, false, 0, &err);
-    if (err) {
-        hmp_handle_error(mon, err);
-        return;
-    }
-
-    for (i = 0; data[i]; i++) {
-        unsigned char ch = data[i];
-
-        if (ch == '\\') {
-            monitor_printf(mon, "\\\\");
-        } else if ((ch < 0x20 && ch != '\n' && ch != '\t') || ch == 0x7F) {
-            monitor_printf(mon, "\\u%04X", ch);
-        } else {
-            monitor_printf(mon, "%c", ch);
-        }
-
-    }
-    monitor_printf(mon, "\n");
-    g_free(data);
-}
-
-void hmp_cont(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-
-    qmp_cont(&err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_system_wakeup(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-
-    qmp_system_wakeup(&err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_nmi(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-
-    qmp_inject_nmi(&err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_set_link(Monitor *mon, const QDict *qdict)
-{
-    const char *name = qdict_get_str(qdict, "name");
-    bool up = qdict_get_bool(qdict, "up");
-    Error *err = NULL;
-
-    qmp_set_link(name, up, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_balloon(Monitor *mon, const QDict *qdict)
-{
-    int64_t value = qdict_get_int(qdict, "value");
-    Error *err = NULL;
-
-    qmp_balloon(value, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_loadvm(Monitor *mon, const QDict *qdict)
-{
-    int saved_vm_running  = runstate_is_running();
-    const char *name = qdict_get_str(qdict, "name");
-    Error *err = NULL;
-
-    vm_stop(RUN_STATE_RESTORE_VM);
-
-    if (load_snapshot(name, &err) == 0 && saved_vm_running) {
-        vm_start();
-    }
-    hmp_handle_error(mon, err);
-}
-
-void hmp_savevm(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-
-    save_snapshot(qdict_get_try_str(qdict, "name"), &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_delvm(Monitor *mon, const QDict *qdict)
-{
-    BlockDriverState *bs;
-    Error *err = NULL;
-    const char *name = qdict_get_str(qdict, "name");
-
-    if (bdrv_all_delete_snapshot(name, &bs, &err) < 0) {
-        error_prepend(&err,
-                      "deleting snapshot on device '%s': ",
-                      bdrv_get_device_name(bs));
-    }
-    hmp_handle_error(mon, err);
-}
-
-void hmp_announce_self(Monitor *mon, const QDict *qdict)
-{
-    const char *interfaces_str = qdict_get_try_str(qdict, "interfaces");
-    const char *id = qdict_get_try_str(qdict, "id");
-    AnnounceParameters *params = QAPI_CLONE(AnnounceParameters,
-                                            migrate_announce_params());
-
-    qapi_free_strList(params->interfaces);
-    params->interfaces = strList_from_comma_list(interfaces_str);
-    params->has_interfaces = params->interfaces != NULL;
-    params->id = g_strdup(id);
-    params->has_id = !!params->id;
-    qmp_announce_self(params, NULL);
-    qapi_free_AnnounceParameters(params);
-}
-
-void hmp_migrate_cancel(Monitor *mon, const QDict *qdict)
-{
-    qmp_migrate_cancel(NULL);
-}
-
-void hmp_migrate_continue(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-    const char *state = qdict_get_str(qdict, "state");
-    int val = qapi_enum_parse(&MigrationStatus_lookup, state, -1, &err);
-
-    if (val >= 0) {
-        qmp_migrate_continue(val, &err);
-    }
-
-    hmp_handle_error(mon, err);
-}
-
-void hmp_migrate_incoming(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-    const char *uri = qdict_get_str(qdict, "uri");
-
-    qmp_migrate_incoming(uri, &err);
-
-    hmp_handle_error(mon, err);
-}
-
-void hmp_migrate_recover(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-    const char *uri = qdict_get_str(qdict, "uri");
-
-    qmp_migrate_recover(uri, &err);
-
-    hmp_handle_error(mon, err);
-}
-
-void hmp_migrate_pause(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-
-    qmp_migrate_pause(&err);
-
-    hmp_handle_error(mon, err);
-}
-
-/* Kept for backwards compatibility */
-void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-
-    double value = qdict_get_double(qdict, "value");
-    qmp_migrate_set_downtime(value, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict)
-{
-    int64_t value = qdict_get_int(qdict, "value");
-    Error *err = NULL;
-
-    qmp_migrate_set_cache_size(value, &err);
-    hmp_handle_error(mon, err);
-}
-
-/* Kept for backwards compatibility */
-void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-
-    int64_t value = qdict_get_int(qdict, "value");
-    qmp_migrate_set_speed(value, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict)
-{
-    const char *cap = qdict_get_str(qdict, "capability");
-    bool state = qdict_get_bool(qdict, "state");
-    Error *err = NULL;
-    MigrationCapabilityStatusList *caps = g_malloc0(sizeof(*caps));
-    int val;
-
-    val = qapi_enum_parse(&MigrationCapability_lookup, cap, -1, &err);
-    if (val < 0) {
-        goto end;
-    }
-
-    caps->value = g_malloc0(sizeof(*caps->value));
-    caps->value->capability = val;
-    caps->value->state = state;
-    caps->next = NULL;
-    qmp_migrate_set_capabilities(caps, &err);
-
-end:
-    qapi_free_MigrationCapabilityStatusList(caps);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
-{
-    const char *param = qdict_get_str(qdict, "parameter");
-    const char *valuestr = qdict_get_str(qdict, "value");
-    Visitor *v = string_input_visitor_new(valuestr);
-    MigrateSetParameters *p = g_new0(MigrateSetParameters, 1);
-    uint64_t valuebw = 0;
-    uint64_t cache_size;
-    Error *err = NULL;
-    int val, ret;
-
-    val = qapi_enum_parse(&MigrationParameter_lookup, param, -1, &err);
-    if (val < 0) {
-        goto cleanup;
-    }
-
-    switch (val) {
-    case MIGRATION_PARAMETER_COMPRESS_LEVEL:
-        p->has_compress_level = true;
-        visit_type_int(v, param, &p->compress_level, &err);
-        break;
-    case MIGRATION_PARAMETER_COMPRESS_THREADS:
-        p->has_compress_threads = true;
-        visit_type_int(v, param, &p->compress_threads, &err);
-        break;
-    case MIGRATION_PARAMETER_COMPRESS_WAIT_THREAD:
-        p->has_compress_wait_thread = true;
-        visit_type_bool(v, param, &p->compress_wait_thread, &err);
-        break;
-    case MIGRATION_PARAMETER_DECOMPRESS_THREADS:
-        p->has_decompress_threads = true;
-        visit_type_int(v, param, &p->decompress_threads, &err);
-        break;
-    case MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD:
-        p->has_throttle_trigger_threshold = true;
-        visit_type_int(v, param, &p->throttle_trigger_threshold, &err);
-        break;
-    case MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL:
-        p->has_cpu_throttle_initial = true;
-        visit_type_int(v, param, &p->cpu_throttle_initial, &err);
-        break;
-    case MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT:
-        p->has_cpu_throttle_increment = true;
-        visit_type_int(v, param, &p->cpu_throttle_increment, &err);
-        break;
-    case MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW:
-        p->has_cpu_throttle_tailslow = true;
-        visit_type_bool(v, param, &p->cpu_throttle_tailslow, &err);
-        break;
-    case MIGRATION_PARAMETER_MAX_CPU_THROTTLE:
-        p->has_max_cpu_throttle = true;
-        visit_type_int(v, param, &p->max_cpu_throttle, &err);
-        break;
-    case MIGRATION_PARAMETER_TLS_CREDS:
-        p->has_tls_creds = true;
-        p->tls_creds = g_new0(StrOrNull, 1);
-        p->tls_creds->type = QTYPE_QSTRING;
-        visit_type_str(v, param, &p->tls_creds->u.s, &err);
-        break;
-    case MIGRATION_PARAMETER_TLS_HOSTNAME:
-        p->has_tls_hostname = true;
-        p->tls_hostname = g_new0(StrOrNull, 1);
-        p->tls_hostname->type = QTYPE_QSTRING;
-        visit_type_str(v, param, &p->tls_hostname->u.s, &err);
-        break;
-    case MIGRATION_PARAMETER_TLS_AUTHZ:
-        p->has_tls_authz = true;
-        p->tls_authz = g_new0(StrOrNull, 1);
-        p->tls_authz->type = QTYPE_QSTRING;
-        visit_type_str(v, param, &p->tls_authz->u.s, &err);
-        break;
-    case MIGRATION_PARAMETER_MAX_BANDWIDTH:
-        p->has_max_bandwidth = true;
-        /*
-         * Can't use visit_type_size() here, because it
-         * defaults to Bytes rather than Mebibytes.
-         */
-        ret = qemu_strtosz_MiB(valuestr, NULL, &valuebw);
-        if (ret < 0 || valuebw > INT64_MAX
-            || (size_t)valuebw != valuebw) {
-            error_setg(&err, "Invalid size %s", valuestr);
-            break;
-        }
-        p->max_bandwidth = valuebw;
-        break;
-    case MIGRATION_PARAMETER_DOWNTIME_LIMIT:
-        p->has_downtime_limit = true;
-        visit_type_int(v, param, &p->downtime_limit, &err);
-        break;
-    case MIGRATION_PARAMETER_X_CHECKPOINT_DELAY:
-        p->has_x_checkpoint_delay = true;
-        visit_type_int(v, param, &p->x_checkpoint_delay, &err);
-        break;
-    case MIGRATION_PARAMETER_BLOCK_INCREMENTAL:
-        p->has_block_incremental = true;
-        visit_type_bool(v, param, &p->block_incremental, &err);
-        break;
-    case MIGRATION_PARAMETER_MULTIFD_CHANNELS:
-        p->has_multifd_channels = true;
-        visit_type_int(v, param, &p->multifd_channels, &err);
-        break;
-    case MIGRATION_PARAMETER_MULTIFD_COMPRESSION:
-        p->has_multifd_compression = true;
-        visit_type_MultiFDCompression(v, param, &p->multifd_compression,
-                                      &err);
-        break;
-    case MIGRATION_PARAMETER_MULTIFD_ZLIB_LEVEL:
-        p->has_multifd_zlib_level = true;
-        visit_type_int(v, param, &p->multifd_zlib_level, &err);
-        break;
-    case MIGRATION_PARAMETER_MULTIFD_ZSTD_LEVEL:
-        p->has_multifd_zstd_level = true;
-        visit_type_int(v, param, &p->multifd_zstd_level, &err);
-        break;
-    case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
-        p->has_xbzrle_cache_size = true;
-        if (!visit_type_size(v, param, &cache_size, &err)) {
-            break;
-        }
-        if (cache_size > INT64_MAX || (size_t)cache_size != cache_size) {
-            error_setg(&err, "Invalid size %s", valuestr);
-            break;
-        }
-        p->xbzrle_cache_size = cache_size;
-        break;
-    case MIGRATION_PARAMETER_MAX_POSTCOPY_BANDWIDTH:
-        p->has_max_postcopy_bandwidth = true;
-        visit_type_size(v, param, &p->max_postcopy_bandwidth, &err);
-        break;
-    case MIGRATION_PARAMETER_ANNOUNCE_INITIAL:
-        p->has_announce_initial = true;
-        visit_type_size(v, param, &p->announce_initial, &err);
-        break;
-    case MIGRATION_PARAMETER_ANNOUNCE_MAX:
-        p->has_announce_max = true;
-        visit_type_size(v, param, &p->announce_max, &err);
-        break;
-    case MIGRATION_PARAMETER_ANNOUNCE_ROUNDS:
-        p->has_announce_rounds = true;
-        visit_type_size(v, param, &p->announce_rounds, &err);
-        break;
-    case MIGRATION_PARAMETER_ANNOUNCE_STEP:
-        p->has_announce_step = true;
-        visit_type_size(v, param, &p->announce_step, &err);
-        break;
-    case MIGRATION_PARAMETER_BLOCK_BITMAP_MAPPING:
-        error_setg(&err, "The block-bitmap-mapping parameter can only be set "
-                   "through QMP");
-        break;
-    default:
-        assert(0);
-    }
-
-    if (err) {
-        goto cleanup;
-    }
-
-    qmp_migrate_set_parameters(p, &err);
-
- cleanup:
-    qapi_free_MigrateSetParameters(p);
-    visit_free(v);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_client_migrate_info(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-    const char *protocol = qdict_get_str(qdict, "protocol");
-    const char *hostname = qdict_get_str(qdict, "hostname");
-    bool has_port        = qdict_haskey(qdict, "port");
-    int port             = qdict_get_try_int(qdict, "port", -1);
-    bool has_tls_port    = qdict_haskey(qdict, "tls-port");
-    int tls_port         = qdict_get_try_int(qdict, "tls-port", -1);
-    const char *cert_subject = qdict_get_try_str(qdict, "cert-subject");
-
-    qmp_client_migrate_info(protocol, hostname,
-                            has_port, port, has_tls_port, tls_port,
-                            !!cert_subject, cert_subject, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-    qmp_migrate_start_postcopy(&err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-
-    qmp_x_colo_lost_heartbeat(&err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_set_password(Monitor *mon, const QDict *qdict)
-{
-    const char *protocol  = qdict_get_str(qdict, "protocol");
-    const char *password  = qdict_get_str(qdict, "password");
-    const char *connected = qdict_get_try_str(qdict, "connected");
-    Error *err = NULL;
-
-    qmp_set_password(protocol, password, !!connected, connected, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_expire_password(Monitor *mon, const QDict *qdict)
-{
-    const char *protocol  = qdict_get_str(qdict, "protocol");
-    const char *whenstr = qdict_get_str(qdict, "time");
-    Error *err = NULL;
-
-    qmp_expire_password(protocol, whenstr, &err);
-    hmp_handle_error(mon, err);
-}
-
-
-#ifdef CONFIG_VNC
-static void hmp_change_read_arg(void *opaque, const char *password,
-                                void *readline_opaque)
-{
-    qmp_change_vnc_password(password, NULL);
-    monitor_read_command(opaque, 1);
-}
-#endif
-
-void hmp_change(Monitor *mon, const QDict *qdict)
-{
-    const char *device = qdict_get_str(qdict, "device");
-    const char *target = qdict_get_str(qdict, "target");
-    const char *arg = qdict_get_try_str(qdict, "arg");
-    const char *read_only = qdict_get_try_str(qdict, "read-only-mode");
-    BlockdevChangeReadOnlyMode read_only_mode = 0;
-    Error *err = NULL;
-
-#ifdef CONFIG_VNC
-    if (strcmp(device, "vnc") == 0) {
-        if (read_only) {
-            monitor_printf(mon,
-                           "Parameter 'read-only-mode' is invalid for VNC\n");
-            return;
-        }
-        if (strcmp(target, "passwd") == 0 ||
-            strcmp(target, "password") == 0) {
-            if (!arg) {
-                MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
-                monitor_read_password(hmp_mon, hmp_change_read_arg, NULL);
-                return;
-            }
-        }
-        qmp_change("vnc", target, !!arg, arg, &err);
-    } else
-#endif
-    {
-        if (read_only) {
-            read_only_mode =
-                qapi_enum_parse(&BlockdevChangeReadOnlyMode_lookup,
-                                read_only,
-                                BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN, &err);
-            if (err) {
-                goto end;
-            }
-        }
-
-        qmp_blockdev_change_medium(true, device, false, NULL, target,
-                                   !!arg, arg, !!read_only, read_only_mode,
-                                   &err);
-    }
-
-end:
-    hmp_handle_error(mon, err);
-}
-
-typedef struct HMPMigrationStatus
-{
-    QEMUTimer *timer;
-    Monitor *mon;
-    bool is_block_migration;
-} HMPMigrationStatus;
-
-static void hmp_migrate_status_cb(void *opaque)
-{
-    HMPMigrationStatus *status = opaque;
-    MigrationInfo *info;
-
-    info = qmp_query_migrate(NULL);
-    if (!info->has_status || info->status == MIGRATION_STATUS_ACTIVE ||
-        info->status == MIGRATION_STATUS_SETUP) {
-        if (info->has_disk) {
-            int progress;
-
-            if (info->disk->remaining) {
-                progress = info->disk->transferred * 100 / info->disk->total;
-            } else {
-                progress = 100;
-            }
-
-            monitor_printf(status->mon, "Completed %d %%\r", progress);
-            monitor_flush(status->mon);
-        }
-
-        timer_mod(status->timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
-    } else {
-        if (status->is_block_migration) {
-            monitor_printf(status->mon, "\n");
-        }
-        if (info->has_error_desc) {
-            error_report("%s", info->error_desc);
-        }
-        monitor_resume(status->mon);
-        timer_del(status->timer);
-        timer_free(status->timer);
-        g_free(status);
-    }
-
-    qapi_free_MigrationInfo(info);
-}
-
-void hmp_migrate(Monitor *mon, const QDict *qdict)
-{
-    bool detach = qdict_get_try_bool(qdict, "detach", false);
-    bool blk = qdict_get_try_bool(qdict, "blk", false);
-    bool inc = qdict_get_try_bool(qdict, "inc", false);
-    bool resume = qdict_get_try_bool(qdict, "resume", false);
-    const char *uri = qdict_get_str(qdict, "uri");
-    Error *err = NULL;
-
-    qmp_migrate(uri, !!blk, blk, !!inc, inc,
-                false, false, true, resume, &err);
-    if (err) {
-        hmp_handle_error(mon, err);
-        return;
-    }
-
-    if (!detach) {
-        HMPMigrationStatus *status;
-
-        if (monitor_suspend(mon) < 0) {
-            monitor_printf(mon, "terminal does not allow synchronous "
-                           "migration, continuing detached\n");
-            return;
-        }
-
-        status = g_malloc0(sizeof(*status));
-        status->mon = mon;
-        status->is_block_migration = blk || inc;
-        status->timer = timer_new_ms(QEMU_CLOCK_REALTIME, hmp_migrate_status_cb,
-                                          status);
-        timer_mod(status->timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
-    }
-}
-
-void hmp_netdev_add(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-    QemuOpts *opts;
-    const char *type = qdict_get_try_str(qdict, "type");
-
-    if (type && is_help_option(type)) {
-        show_netdevs();
-        return;
-    }
-    opts = qemu_opts_from_qdict(qemu_find_opts("netdev"), qdict, &err);
-    if (err) {
-        goto out;
-    }
-
-    netdev_add(opts, &err);
-    if (err) {
-        qemu_opts_del(opts);
-    }
-
-out:
-    hmp_handle_error(mon, err);
-}
-
-void hmp_netdev_del(Monitor *mon, const QDict *qdict)
-{
-    const char *id = qdict_get_str(qdict, "id");
-    Error *err = NULL;
-
-    qmp_netdev_del(id, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_object_add(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-    QemuOpts *opts;
-    Object *obj = NULL;
-
-    opts = qemu_opts_from_qdict(qemu_find_opts("object"), qdict, &err);
-    if (err) {
-        goto end;
-    }
-
-    obj = user_creatable_add_opts(opts, &err);
-    qemu_opts_del(opts);
-
-end:
-    hmp_handle_error(mon, err);
-
-    if (obj) {
-        object_unref(obj);
-    }
-}
-
-void hmp_getfd(Monitor *mon, const QDict *qdict)
-{
-    const char *fdname = qdict_get_str(qdict, "fdname");
-    Error *err = NULL;
-
-    qmp_getfd(fdname, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_closefd(Monitor *mon, const QDict *qdict)
-{
-    const char *fdname = qdict_get_str(qdict, "fdname");
-    Error *err = NULL;
-
-    qmp_closefd(fdname, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_sendkey(Monitor *mon, const QDict *qdict)
-{
-    const char *keys = qdict_get_str(qdict, "keys");
-    KeyValueList *keylist, *head = NULL, *tmp = NULL;
-    int has_hold_time = qdict_haskey(qdict, "hold-time");
-    int hold_time = qdict_get_try_int(qdict, "hold-time", -1);
-    Error *err = NULL;
-    const char *separator;
-    int keyname_len;
-
-    while (1) {
-        separator = qemu_strchrnul(keys, '-');
-        keyname_len = separator - keys;
-
-        /* Be compatible with old interface, convert user inputted "<" */
-        if (keys[0] == '<' && keyname_len == 1) {
-            keys = "less";
-            keyname_len = 4;
-        }
-
-        keylist = g_malloc0(sizeof(*keylist));
-        keylist->value = g_malloc0(sizeof(*keylist->value));
-
-        if (!head) {
-            head = keylist;
-        }
-        if (tmp) {
-            tmp->next = keylist;
-        }
-        tmp = keylist;
-
-        if (strstart(keys, "0x", NULL)) {
-            char *endp;
-            int value = strtoul(keys, &endp, 0);
-            assert(endp <= keys + keyname_len);
-            if (endp != keys + keyname_len) {
-                goto err_out;
-            }
-            keylist->value->type = KEY_VALUE_KIND_NUMBER;
-            keylist->value->u.number.data = value;
-        } else {
-            int idx = index_from_key(keys, keyname_len);
-            if (idx == Q_KEY_CODE__MAX) {
-                goto err_out;
-            }
-            keylist->value->type = KEY_VALUE_KIND_QCODE;
-            keylist->value->u.qcode.data = idx;
-        }
-
-        if (!*separator) {
-            break;
-        }
-        keys = separator + 1;
-    }
-
-    qmp_send_key(head, has_hold_time, hold_time, &err);
-    hmp_handle_error(mon, err);
-
-out:
-    qapi_free_KeyValueList(head);
-    return;
-
-err_out:
-    monitor_printf(mon, "invalid parameter: %.*s\n", keyname_len, keys);
-    goto out;
-}
-
-void coroutine_fn
-hmp_screendump(Monitor *mon, const QDict *qdict)
-{
-    const char *filename = qdict_get_str(qdict, "filename");
-    const char *id = qdict_get_try_str(qdict, "device");
-    int64_t head = qdict_get_try_int(qdict, "head", 0);
-    Error *err = NULL;
-
-    qmp_screendump(filename, id != NULL, id, id != NULL, head, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_chardev_add(Monitor *mon, const QDict *qdict)
-{
-    const char *args = qdict_get_str(qdict, "args");
-    Error *err = NULL;
-    QemuOpts *opts;
-
-    opts = qemu_opts_parse_noisily(qemu_find_opts("chardev"), args, true);
-    if (opts == NULL) {
-        error_setg(&err, "Parsing chardev args failed");
-    } else {
-        qemu_chr_new_from_opts(opts, NULL, &err);
-        qemu_opts_del(opts);
-    }
-    hmp_handle_error(mon, err);
-}
-
-void hmp_chardev_change(Monitor *mon, const QDict *qdict)
-{
-    const char *args = qdict_get_str(qdict, "args");
-    const char *id;
-    Error *err = NULL;
-    ChardevBackend *backend = NULL;
-    ChardevReturn *ret = NULL;
-    QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("chardev"), args,
-                                             true);
-    if (!opts) {
-        error_setg(&err, "Parsing chardev args failed");
-        goto end;
-    }
-
-    id = qdict_get_str(qdict, "id");
-    if (qemu_opts_id(opts)) {
-        error_setg(&err, "Unexpected 'id' parameter");
-        goto end;
-    }
-
-    backend = qemu_chr_parse_opts(opts, &err);
-    if (!backend) {
-        goto end;
-    }
-
-    ret = qmp_chardev_change(id, backend, &err);
-
-end:
-    qapi_free_ChardevReturn(ret);
-    qapi_free_ChardevBackend(backend);
-    qemu_opts_del(opts);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_chardev_remove(Monitor *mon, const QDict *qdict)
-{
-    Error *local_err = NULL;
-
-    qmp_chardev_remove(qdict_get_str(qdict, "id"), &local_err);
-    hmp_handle_error(mon, local_err);
-}
-
-void hmp_chardev_send_break(Monitor *mon, const QDict *qdict)
-{
-    Error *local_err = NULL;
-
-    qmp_chardev_send_break(qdict_get_str(qdict, "id"), &local_err);
-    hmp_handle_error(mon, local_err);
-}
-
-void hmp_object_del(Monitor *mon, const QDict *qdict)
-{
-    const char *id = qdict_get_str(qdict, "id");
-    Error *err = NULL;
-
-    user_creatable_del(id, &err);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-    MemoryDeviceInfoList *info_list = qmp_query_memory_devices(&err);
-    MemoryDeviceInfoList *info;
-    VirtioPMEMDeviceInfo *vpi;
-    VirtioMEMDeviceInfo *vmi;
-    MemoryDeviceInfo *value;
-    PCDIMMDeviceInfo *di;
-
-    for (info = info_list; info; info = info->next) {
-        value = info->value;
-
-        if (value) {
-            switch (value->type) {
-            case MEMORY_DEVICE_INFO_KIND_DIMM:
-            case MEMORY_DEVICE_INFO_KIND_NVDIMM:
-                di = value->type == MEMORY_DEVICE_INFO_KIND_DIMM ?
-                     value->u.dimm.data : value->u.nvdimm.data;
-                monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
-                               MemoryDeviceInfoKind_str(value->type),
-                               di->id ? di->id : "");
-                monitor_printf(mon, "  addr: 0x%" PRIx64 "\n", di->addr);
-                monitor_printf(mon, "  slot: %" PRId64 "\n", di->slot);
-                monitor_printf(mon, "  node: %" PRId64 "\n", di->node);
-                monitor_printf(mon, "  size: %" PRIu64 "\n", di->size);
-                monitor_printf(mon, "  memdev: %s\n", di->memdev);
-                monitor_printf(mon, "  hotplugged: %s\n",
-                               di->hotplugged ? "true" : "false");
-                monitor_printf(mon, "  hotpluggable: %s\n",
-                               di->hotpluggable ? "true" : "false");
-                break;
-            case MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM:
-                vpi = value->u.virtio_pmem.data;
-                monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
-                               MemoryDeviceInfoKind_str(value->type),
-                               vpi->id ? vpi->id : "");
-                monitor_printf(mon, "  memaddr: 0x%" PRIx64 "\n", vpi->memaddr);
-                monitor_printf(mon, "  size: %" PRIu64 "\n", vpi->size);
-                monitor_printf(mon, "  memdev: %s\n", vpi->memdev);
-                break;
-            case MEMORY_DEVICE_INFO_KIND_VIRTIO_MEM:
-                vmi = value->u.virtio_mem.data;
-                monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
-                               MemoryDeviceInfoKind_str(value->type),
-                               vmi->id ? vmi->id : "");
-                monitor_printf(mon, "  memaddr: 0x%" PRIx64 "\n", vmi->memaddr);
-                monitor_printf(mon, "  node: %" PRId64 "\n", vmi->node);
-                monitor_printf(mon, "  requested-size: %" PRIu64 "\n",
-                               vmi->requested_size);
-                monitor_printf(mon, "  size: %" PRIu64 "\n", vmi->size);
-                monitor_printf(mon, "  max-size: %" PRIu64 "\n", vmi->max_size);
-                monitor_printf(mon, "  block-size: %" PRIu64 "\n",
-                               vmi->block_size);
-                monitor_printf(mon, "  memdev: %s\n", vmi->memdev);
-                break;
-            default:
-                g_assert_not_reached();
-            }
-        }
-    }
-
-    qapi_free_MemoryDeviceInfoList(info_list);
-    hmp_handle_error(mon, err);
-}
-
-void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
-{
-    IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
-    IOThreadInfoList *info;
-    IOThreadInfo *value;
-
-    for (info = info_list; info; info = info->next) {
-        value = info->value;
-        monitor_printf(mon, "%s:\n", value->id);
-        monitor_printf(mon, "  thread_id=%" PRId64 "\n", value->thread_id);
-        monitor_printf(mon, "  poll-max-ns=%" PRId64 "\n", value->poll_max_ns);
-        monitor_printf(mon, "  poll-grow=%" PRId64 "\n", value->poll_grow);
-        monitor_printf(mon, "  poll-shrink=%" PRId64 "\n", value->poll_shrink);
-    }
-
-    qapi_free_IOThreadInfoList(info_list);
-}
-
-void hmp_rocker(Monitor *mon, const QDict *qdict)
-{
-    const char *name = qdict_get_str(qdict, "name");
-    RockerSwitch *rocker;
-    Error *err = NULL;
-
-    rocker = qmp_query_rocker(name, &err);
-    if (err != NULL) {
-        hmp_handle_error(mon, err);
-        return;
-    }
-
-    monitor_printf(mon, "name: %s\n", rocker->name);
-    monitor_printf(mon, "id: 0x%" PRIx64 "\n", rocker->id);
-    monitor_printf(mon, "ports: %d\n", rocker->ports);
-
-    qapi_free_RockerSwitch(rocker);
-}
-
-void hmp_rocker_ports(Monitor *mon, const QDict *qdict)
-{
-    RockerPortList *list, *port;
-    const char *name = qdict_get_str(qdict, "name");
-    Error *err = NULL;
-
-    list = qmp_query_rocker_ports(name, &err);
-    if (err != NULL) {
-        hmp_handle_error(mon, err);
-        return;
-    }
-
-    monitor_printf(mon, "            ena/    speed/ auto\n");
-    monitor_printf(mon, "      port  link    duplex neg?\n");
-
-    for (port = list; port; port = port->next) {
-        monitor_printf(mon, "%10s  %-4s   %-3s  %2s  %-3s\n",
-                       port->value->name,
-                       port->value->enabled ? port->value->link_up ?
-                       "up" : "down" : "!ena",
-                       port->value->speed == 10000 ? "10G" : "??",
-                       port->value->duplex ? "FD" : "HD",
-                       port->value->autoneg ? "Yes" : "No");
-    }
-
-    qapi_free_RockerPortList(list);
-}
-
-void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict)
-{
-    RockerOfDpaFlowList *list, *info;
-    const char *name = qdict_get_str(qdict, "name");
-    uint32_t tbl_id = qdict_get_try_int(qdict, "tbl_id", -1);
-    Error *err = NULL;
-
-    list = qmp_query_rocker_of_dpa_flows(name, tbl_id != -1, tbl_id, &err);
-    if (err != NULL) {
-        hmp_handle_error(mon, err);
-        return;
-    }
-
-    monitor_printf(mon, "prio tbl hits key(mask) --> actions\n");
-
-    for (info = list; info; info = info->next) {
-        RockerOfDpaFlow *flow = info->value;
-        RockerOfDpaFlowKey *key = flow->key;
-        RockerOfDpaFlowMask *mask = flow->mask;
-        RockerOfDpaFlowAction *action = flow->action;
-
-        if (flow->hits) {
-            monitor_printf(mon, "%-4d %-3d %-4" PRIu64,
-                           key->priority, key->tbl_id, flow->hits);
-        } else {
-            monitor_printf(mon, "%-4d %-3d     ",
-                           key->priority, key->tbl_id);
-        }
-
-        if (key->has_in_pport) {
-            monitor_printf(mon, " pport %d", key->in_pport);
-            if (mask->has_in_pport) {
-                monitor_printf(mon, "(0x%x)", mask->in_pport);
-            }
-        }
-
-        if (key->has_vlan_id) {
-            monitor_printf(mon, " vlan %d",
-                           key->vlan_id & VLAN_VID_MASK);
-            if (mask->has_vlan_id) {
-                monitor_printf(mon, "(0x%x)", mask->vlan_id);
-            }
-        }
-
-        if (key->has_tunnel_id) {
-            monitor_printf(mon, " tunnel %d", key->tunnel_id);
-            if (mask->has_tunnel_id) {
-                monitor_printf(mon, "(0x%x)", mask->tunnel_id);
-            }
-        }
-
-        if (key->has_eth_type) {
-            switch (key->eth_type) {
-            case 0x0806:
-                monitor_printf(mon, " ARP");
-                break;
-            case 0x0800:
-                monitor_printf(mon, " IP");
-                break;
-            case 0x86dd:
-                monitor_printf(mon, " IPv6");
-                break;
-            case 0x8809:
-                monitor_printf(mon, " LACP");
-                break;
-            case 0x88cc:
-                monitor_printf(mon, " LLDP");
-                break;
-            default:
-                monitor_printf(mon, " eth type 0x%04x", key->eth_type);
-                break;
-            }
-        }
-
-        if (key->has_eth_src) {
-            if ((strcmp(key->eth_src, "01:00:00:00:00:00") == 0) &&
-                (mask->has_eth_src) &&
-                (strcmp(mask->eth_src, "01:00:00:00:00:00") == 0)) {
-                monitor_printf(mon, " src <any mcast/bcast>");
-            } else if ((strcmp(key->eth_src, "00:00:00:00:00:00") == 0) &&
-                (mask->has_eth_src) &&
-                (strcmp(mask->eth_src, "01:00:00:00:00:00") == 0)) {
-                monitor_printf(mon, " src <any ucast>");
-            } else {
-                monitor_printf(mon, " src %s", key->eth_src);
-                if (mask->has_eth_src) {
-                    monitor_printf(mon, "(%s)", mask->eth_src);
-                }
-            }
-        }
-
-        if (key->has_eth_dst) {
-            if ((strcmp(key->eth_dst, "01:00:00:00:00:00") == 0) &&
-                (mask->has_eth_dst) &&
-                (strcmp(mask->eth_dst, "01:00:00:00:00:00") == 0)) {
-                monitor_printf(mon, " dst <any mcast/bcast>");
-            } else if ((strcmp(key->eth_dst, "00:00:00:00:00:00") == 0) &&
-                (mask->has_eth_dst) &&
-                (strcmp(mask->eth_dst, "01:00:00:00:00:00") == 0)) {
-                monitor_printf(mon, " dst <any ucast>");
-            } else {
-                monitor_printf(mon, " dst %s", key->eth_dst);
-                if (mask->has_eth_dst) {
-                    monitor_printf(mon, "(%s)", mask->eth_dst);
-                }
-            }
-        }
-
-        if (key->has_ip_proto) {
-            monitor_printf(mon, " proto %d", key->ip_proto);
-            if (mask->has_ip_proto) {
-                monitor_printf(mon, "(0x%x)", mask->ip_proto);
-            }
-        }
-
-        if (key->has_ip_tos) {
-            monitor_printf(mon, " TOS %d", key->ip_tos);
-            if (mask->has_ip_tos) {
-                monitor_printf(mon, "(0x%x)", mask->ip_tos);
-            }
-        }
-
-        if (key->has_ip_dst) {
-            monitor_printf(mon, " dst %s", key->ip_dst);
-        }
-
-        if (action->has_goto_tbl || action->has_group_id ||
-            action->has_new_vlan_id) {
-            monitor_printf(mon, " -->");
-        }
-
-        if (action->has_new_vlan_id) {
-            monitor_printf(mon, " apply new vlan %d",
-                           ntohs(action->new_vlan_id));
-        }
-
-        if (action->has_group_id) {
-            monitor_printf(mon, " write group 0x%08x", action->group_id);
-        }
-
-        if (action->has_goto_tbl) {
-            monitor_printf(mon, " goto tbl %d", action->goto_tbl);
-        }
-
-        monitor_printf(mon, "\n");
-    }
-
-    qapi_free_RockerOfDpaFlowList(list);
-}
-
-void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict)
-{
-    RockerOfDpaGroupList *list, *g;
-    const char *name = qdict_get_str(qdict, "name");
-    uint8_t type = qdict_get_try_int(qdict, "type", 9);
-    Error *err = NULL;
-
-    list = qmp_query_rocker_of_dpa_groups(name, type != 9, type, &err);
-    if (err != NULL) {
-        hmp_handle_error(mon, err);
-        return;
-    }
-
-    monitor_printf(mon, "id (decode) --> buckets\n");
-
-    for (g = list; g; g = g->next) {
-        RockerOfDpaGroup *group = g->value;
-        bool set = false;
-
-        monitor_printf(mon, "0x%08x", group->id);
-
-        monitor_printf(mon, " (type %s", group->type == 0 ? "L2 interface" :
-                                         group->type == 1 ? "L2 rewrite" :
-                                         group->type == 2 ? "L3 unicast" :
-                                         group->type == 3 ? "L2 multicast" :
-                                         group->type == 4 ? "L2 flood" :
-                                         group->type == 5 ? "L3 interface" :
-                                         group->type == 6 ? "L3 multicast" :
-                                         group->type == 7 ? "L3 ECMP" :
-                                         group->type == 8 ? "L2 overlay" :
-                                         "unknown");
-
-        if (group->has_vlan_id) {
-            monitor_printf(mon, " vlan %d", group->vlan_id);
-        }
-
-        if (group->has_pport) {
-            monitor_printf(mon, " pport %d", group->pport);
-        }
-
-        if (group->has_index) {
-            monitor_printf(mon, " index %d", group->index);
-        }
-
-        monitor_printf(mon, ") -->");
-
-        if (group->has_set_vlan_id && group->set_vlan_id) {
-            set = true;
-            monitor_printf(mon, " set vlan %d",
-                           group->set_vlan_id & VLAN_VID_MASK);
-        }
-
-        if (group->has_set_eth_src) {
-            if (!set) {
-                set = true;
-                monitor_printf(mon, " set");
-            }
-            monitor_printf(mon, " src %s", group->set_eth_src);
-        }
-
-        if (group->has_set_eth_dst) {
-            if (!set) {
-                monitor_printf(mon, " set");
-            }
-            monitor_printf(mon, " dst %s", group->set_eth_dst);
-        }
-
-        if (group->has_ttl_check && group->ttl_check) {
-            monitor_printf(mon, " check TTL");
-        }
-
-        if (group->has_group_id && group->group_id) {
-            monitor_printf(mon, " group id 0x%08x", group->group_id);
-        }
-
-        if (group->has_pop_vlan && group->pop_vlan) {
-            monitor_printf(mon, " pop vlan");
-        }
-
-        if (group->has_out_pport) {
-            monitor_printf(mon, " out pport %d", group->out_pport);
-        }
-
-        if (group->has_group_ids) {
-            struct uint32List *id;
-
-            monitor_printf(mon, " groups [");
-            for (id = group->group_ids; id; id = id->next) {
-                monitor_printf(mon, "0x%08x", id->value);
-                if (id->next) {
-                    monitor_printf(mon, ",");
-                }
-            }
-            monitor_printf(mon, "]");
-        }
-
-        monitor_printf(mon, "\n");
-    }
-
-    qapi_free_RockerOfDpaGroupList(list);
-}
-
-void hmp_info_ramblock(Monitor *mon, const QDict *qdict)
-{
-    ram_block_dump(mon);
-}
-
-void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-    GuidInfo *info = qmp_query_vm_generation_id(&err);
-    if (info) {
-        monitor_printf(mon, "%s\n", info->guid);
-    }
-    hmp_handle_error(mon, err);
-    qapi_free_GuidInfo(info);
-}
-
-void hmp_info_memory_size_summary(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-    MemoryInfo *info = qmp_query_memory_size_summary(&err);
-    if (info) {
-        monitor_printf(mon, "base memory: %" PRIu64 "\n",
-                       info->base_memory);
-
-        if (info->has_plugged_memory) {
-            monitor_printf(mon, "plugged memory: %" PRIu64 "\n",
-                           info->plugged_memory);
-        }
-
-        qapi_free_MemoryInfo(info);
-    }
-    hmp_handle_error(mon, err);
+    mtree_info(flatview, dispatch_tree, owner, disabled);
 }
diff --git a/monitor/hmp-target.c b/monitor/hmp-target.c
new file mode 100644 (file)
index 0000000..1eb72ac
--- /dev/null
@@ -0,0 +1,178 @@
+/*
+ * QEMU monitor, target-dependent part
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor-internal.h"
+#include "monitor/qdev.h"
+#include "net/slirp.h"
+#include "sysemu/device_tree.h"
+#include "monitor/hmp-target.h"
+#include "monitor/hmp.h"
+#include "block/block-hmp-cmds.h"
+#include "qapi/qapi-commands-control.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+
+#if defined(TARGET_S390X)
+#include "hw/s390x/storage-keys.h"
+#include "hw/s390x/storage-attributes.h"
+#endif
+
+/* Make devices configuration available for use in hmp-commands*.hx templates */
+#include CONFIG_DEVICES
+
+static HMPCommand hmp_info_cmds[];
+
+/**
+ * Is @name in the '|' separated list of names @list?
+ */
+int hmp_compare_cmd(const char *name, const char *list)
+{
+    const char *p, *pstart;
+    int len;
+    len = strlen(name);
+    p = list;
+    for (;;) {
+        pstart = p;
+        p = qemu_strchrnul(p, '|');
+        if ((p - pstart) == len && !memcmp(pstart, name, len)) {
+            return 1;
+        }
+        if (*p == '\0') {
+            break;
+        }
+        p++;
+    }
+    return 0;
+}
+
+/* Please update hmp-commands.hx when adding or changing commands */
+static HMPCommand hmp_info_cmds[] = {
+#include "hmp-commands-info.h"
+    { NULL, NULL, },
+};
+
+/* hmp_cmds and hmp_info_cmds would be sorted at runtime */
+HMPCommand hmp_cmds[] = {
+#include "hmp-commands.h"
+    { NULL, NULL, },
+};
+
+/*
+ * Set @pval to the value in the register identified by @name.
+ * return 0 if OK, -1 if not found
+ */
+int get_monitor_def(Monitor *mon, int64_t *pval, const char *name)
+{
+    const MonitorDef *md = target_monitor_defs();
+    CPUState *cs = mon_get_cpu(mon);
+    void *ptr;
+    uint64_t tmp = 0;
+    int ret;
+
+    if (cs == NULL || md == NULL) {
+        return -1;
+    }
+
+    for(; md->name != NULL; md++) {
+        if (hmp_compare_cmd(name, md->name)) {
+            if (md->get_value) {
+                *pval = md->get_value(mon, md, md->offset);
+            } else {
+                CPUArchState *env = mon_get_cpu_env(mon);
+                ptr = (uint8_t *)env + md->offset;
+                switch(md->type) {
+                case MD_I32:
+                    *pval = *(int32_t *)ptr;
+                    break;
+                case MD_TLONG:
+                    *pval = *(target_long *)ptr;
+                    break;
+                default:
+                    *pval = 0;
+                    break;
+                }
+            }
+            return 0;
+        }
+    }
+
+    ret = target_get_monitor_def(cs, name, &tmp);
+    if (!ret) {
+        *pval = (target_long) tmp;
+    }
+
+    return ret;
+}
+
+static int
+compare_mon_cmd(const void *a, const void *b)
+{
+    return strcmp(((const HMPCommand *)a)->name,
+            ((const HMPCommand *)b)->name);
+}
+
+static void __attribute__((__constructor__)) sortcmdlist(void)
+{
+    qsort(hmp_cmds, ARRAY_SIZE(hmp_cmds) - 1,
+          sizeof(*hmp_cmds),
+          compare_mon_cmd);
+    qsort(hmp_info_cmds, ARRAY_SIZE(hmp_info_cmds) - 1,
+          sizeof(*hmp_info_cmds),
+          compare_mon_cmd);
+}
+
+void monitor_register_hmp(const char *name, bool info,
+                          void (*cmd)(Monitor *mon, const QDict *qdict))
+{
+    HMPCommand *table = info ? hmp_info_cmds : hmp_cmds;
+
+    while (table->name != NULL) {
+        if (strcmp(table->name, name) == 0) {
+            g_assert(table->cmd == NULL && table->cmd_info_hrt == NULL);
+            table->cmd = cmd;
+            return;
+        }
+        table++;
+    }
+    g_assert_not_reached();
+}
+
+void monitor_register_hmp_info_hrt(const char *name,
+                                   HumanReadableText *(*handler)(Error **errp))
+{
+    HMPCommand *table = hmp_info_cmds;
+
+    while (table->name != NULL) {
+        if (strcmp(table->name, name) == 0) {
+            g_assert(table->cmd == NULL && table->cmd_info_hrt == NULL);
+            table->cmd_info_hrt = handler;
+            return;
+        }
+        table++;
+    }
+    g_assert_not_reached();
+}
index 1204233999fbdfb020cf0669dcb56cf8fc601d76..69c1b7e98abb9c53791b7824fddb73748358fe44 100644 (file)
@@ -24,8 +24,9 @@
 
 #include "qemu/osdep.h"
 #include <dirent.h>
+#include "hw/qdev-core.h"
 #include "monitor-internal.h"
-#include "qapi/error.h"
+#include "monitor/hmp.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qnum.h"
 #include "qemu/config-file.h"
@@ -35,7 +36,6 @@
 #include "qemu/option.h"
 #include "qemu/units.h"
 #include "sysemu/block-backend.h"
-#include "sysemu/runstate.h"
 #include "trace.h"
 
 static void monitor_command_cb(void *opaque, const char *cmdline,
@@ -213,6 +213,11 @@ static bool cmd_can_preconfig(const HMPCommand *cmd)
     return strchr(cmd->flags, 'p');
 }
 
+static bool cmd_available(const HMPCommand *cmd)
+{
+    return phase_check(PHASE_MACHINE_READY) || cmd_can_preconfig(cmd);
+}
+
 static void help_cmd_dump_one(Monitor *mon,
                               const HMPCommand *cmd,
                               char **prefix_args,
@@ -220,7 +225,7 @@ static void help_cmd_dump_one(Monitor *mon,
 {
     int i;
 
-    if (runstate_check(RUN_STATE_PRECONFIG) && !cmd_can_preconfig(cmd)) {
+    if (!cmd_available(cmd)) {
         return;
     }
 
@@ -248,8 +253,7 @@ static void help_cmd_dump(Monitor *mon, const HMPCommand *cmds,
     /* Find one entry to dump */
     for (cmd = cmds; cmd->name != NULL; cmd++) {
         if (hmp_compare_cmd(args[arg_index], cmd->name) &&
-            ((!runstate_check(RUN_STATE_PRECONFIG) ||
-                cmd_can_preconfig(cmd)))) {
+            cmd_available(cmd)) {
             if (cmd->sub_table) {
                 /* continue with next arg */
                 help_cmd_dump(mon, cmd->sub_table,
@@ -268,7 +272,7 @@ static void help_cmd_dump(Monitor *mon, const HMPCommand *cmds,
     }
 }
 
-void help_cmd(Monitor *mon, const char *name)
+void hmp_help_cmd(Monitor *mon, const char *name)
 {
     char *args[MAX_ARGS];
     int nb_args = 0;
@@ -279,10 +283,15 @@ void help_cmd(Monitor *mon, const char *name)
         if (!strcmp(name, "log")) {
             const QEMULogItem *item;
             monitor_printf(mon, "Log items (comma separated):\n");
-            monitor_printf(mon, "%-10s %s\n", "none", "remove all logs");
+            monitor_printf(mon, "%-15s %s\n", "none", "remove all logs");
             for (item = qemu_log_items; item->mask != 0; item++) {
-                monitor_printf(mon, "%-10s %s\n", item->name, item->help);
+                monitor_printf(mon, "%-15s %s\n", item->name, item->help);
             }
+#ifdef CONFIG_TRACE_LOG
+            monitor_printf(mon, "trace:PATTERN   enable trace events\n");
+            monitor_printf(mon, "\nUse \"log trace:help\" to get a list of "
+                           "trace events.\n\n");
+#endif
             return;
         }
 
@@ -302,8 +311,8 @@ void help_cmd(Monitor *mon, const char *name)
 static const char *pch;
 static sigjmp_buf expr_env;
 
-static void GCC_FMT_ATTR(2, 3) QEMU_NORETURN
-expr_error(Monitor *mon, const char *fmt, ...)
+static G_NORETURN G_GNUC_PRINTF(2, 3)
+void expr_error(Monitor *mon, const char *fmt, ...)
 {
     va_list ap;
     va_start(ap, fmt);
@@ -653,9 +662,9 @@ static const HMPCommand *monitor_parse_command(MonitorHMP *hmp_mon,
                        (int)(p - cmdp_start), cmdp_start);
         return NULL;
     }
-    if (runstate_check(RUN_STATE_PRECONFIG) && !cmd_can_preconfig(cmd)) {
-        monitor_printf(mon, "Command '%.*s' not available with -preconfig "
-                            "until after exit_preconfig.\n",
+    if (!cmd_available(cmd)) {
+        monitor_printf(mon, "Command '%.*s' not available "
+                            "until machine initialization has completed.\n",
                        (int)(p - cmdp_start), cmdp_start);
         return NULL;
     }
@@ -975,6 +984,7 @@ static QDict *monitor_parse_arguments(Monitor *mon,
             {
                 const char *tmp = p;
                 int skip_key = 0;
+                int ret;
                 /* option */
 
                 c = *typestr++;
@@ -997,11 +1007,27 @@ static QDict *monitor_parse_arguments(Monitor *mon,
                     }
                     if (skip_key) {
                         p = tmp;
+                    } else if (*typestr == 's') {
+                        /* has option with string value */
+                        typestr++;
+                        tmp = p++;
+                        while (qemu_isspace(*p)) {
+                            p++;
+                        }
+                        ret = get_str(buf, sizeof(buf), &p);
+                        if (ret < 0) {
+                            monitor_printf(mon, "%s: value expected for -%c\n",
+                                           cmd->name, *tmp);
+                            goto fail;
+                        }
+                        qdict_put_str(qdict, key, buf);
                     } else {
-                        /* has option */
+                        /* has boolean option */
                         p++;
                         qdict_put_bool(qdict, key, true);
                     }
+                } else if (*typestr == 's') {
+                    typestr++;
                 }
             }
             break;
@@ -1056,6 +1082,31 @@ fail:
     return NULL;
 }
 
+static void hmp_info_human_readable_text(Monitor *mon,
+                                         HumanReadableText *(*handler)(Error **))
+{
+    Error *err = NULL;
+    g_autoptr(HumanReadableText) info = handler(&err);
+
+    if (hmp_handle_error(mon, err)) {
+        return;
+    }
+
+    monitor_puts(mon, info->human_readable_text);
+}
+
+static void handle_hmp_command_exec(Monitor *mon,
+                                    const HMPCommand *cmd,
+                                    QDict *qdict)
+{
+    if (cmd->cmd_info_hrt) {
+        hmp_info_human_readable_text(mon,
+                                     cmd->cmd_info_hrt);
+    } else {
+        cmd->cmd(mon, qdict);
+    }
+}
+
 typedef struct HandleHmpCommandCo {
     Monitor *mon;
     const HMPCommand *cmd;
@@ -1066,7 +1117,7 @@ typedef struct HandleHmpCommandCo {
 static void handle_hmp_command_co(void *opaque)
 {
     HandleHmpCommandCo *data = opaque;
-    data->cmd->cmd(data->mon, data->qdict);
+    handle_hmp_command_exec(data->mon, data->cmd, data->qdict);
     monitor_set_cur(qemu_coroutine_self(), NULL);
     data->done = true;
 }
@@ -1084,6 +1135,13 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
         return;
     }
 
+    if (!cmd->cmd && !cmd->cmd_info_hrt) {
+        /* FIXME: is it useful to try autoload modules here ??? */
+        monitor_printf(&mon->common, "Command \"%.*s\" is not available.\n",
+                       (int)(cmdline - cmd_start), cmd_start);
+        return;
+    }
+
     qdict = monitor_parse_arguments(&mon->common, &cmdline, cmd);
     if (!qdict) {
         while (cmdline > cmd_start && qemu_isspace(cmdline[-1])) {
@@ -1097,7 +1155,7 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
     if (!cmd->coroutine) {
         /* old_mon is non-NULL when called from qmp_human_monitor_command() */
         Monitor *old_mon = monitor_set_cur(qemu_coroutine_self(), &mon->common);
-        cmd->cmd(&mon->common, qdict);
+        handle_hmp_command_exec(&mon->common, cmd, qdict);
         monitor_set_cur(qemu_coroutine_self(), old_mon);
     } else {
         HandleHmpCommandCo data = {
@@ -1109,7 +1167,7 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
         Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, &data);
         monitor_set_cur(co, &mon->common);
         aio_co_enter(qemu_get_aio_context(), co);
-        AIO_WAIT_WHILE(qemu_get_aio_context(), !data.done);
+        AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
     }
 
     qobject_unref(qdict);
@@ -1131,9 +1189,7 @@ static void cmd_completion(MonitorHMP *mon, const char *name, const char *list)
         }
         memcpy(cmd, pstart, len);
         cmd[len] = '\0';
-        if (name[0] == '\0' || !strncmp(name, cmd, strlen(name))) {
-            readline_add_completion(mon->rs, cmd);
-        }
+        readline_add_completion_of(mon->rs, name, cmd);
         if (*p == '\0') {
             break;
         }
@@ -1212,7 +1268,7 @@ static void monitor_find_completion_by_table(MonitorHMP *mon,
 {
     const char *cmdname;
     int i;
-    const char *ptype, *old_ptype, *str, *name;
+    const char *ptype, *old_ptype, *str;
     const HMPCommand *cmd;
     BlockBackend *blk = NULL;
 
@@ -1225,8 +1281,7 @@ static void monitor_find_completion_by_table(MonitorHMP *mon,
         }
         readline_set_completion_index(mon->rs, strlen(cmdname));
         for (cmd = cmd_table; cmd->name != NULL; cmd++) {
-            if (!runstate_check(RUN_STATE_PRECONFIG) ||
-                 cmd_can_preconfig(cmd)) {
+            if (cmd_available(cmd)) {
                 cmd_completion(mon, cmdname, cmd->name);
             }
         }
@@ -1234,8 +1289,7 @@ static void monitor_find_completion_by_table(MonitorHMP *mon,
         /* find the command */
         for (cmd = cmd_table; cmd->name != NULL; cmd++) {
             if (hmp_compare_cmd(args[0], cmd->name) &&
-                (!runstate_check(RUN_STATE_PRECONFIG) ||
-                 cmd_can_preconfig(cmd))) {
+                cmd_available(cmd)) {
                 break;
             }
         }
@@ -1279,11 +1333,7 @@ static void monitor_find_completion_by_table(MonitorHMP *mon,
             /* block device name completion */
             readline_set_completion_index(mon->rs, strlen(str));
             while ((blk = blk_next(blk)) != NULL) {
-                name = blk_name(blk);
-                if (str[0] == '\0' ||
-                    !strncmp(name, str, strlen(str))) {
-                    readline_add_completion(mon->rs, name);
-                }
+                readline_add_completion_of(mon->rs, str, blk_name(blk));
             }
             break;
         case 's':
@@ -1351,45 +1401,42 @@ static void monitor_read(void *opaque, const uint8_t *buf, int size)
 static void monitor_event(void *opaque, QEMUChrEvent event)
 {
     Monitor *mon = opaque;
-    MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
 
     switch (event) {
     case CHR_EVENT_MUX_IN:
         qemu_mutex_lock(&mon->mon_lock);
-        mon->mux_out = 0;
-        qemu_mutex_unlock(&mon->mon_lock);
-        if (mon->reset_seen) {
-            readline_restart(hmp_mon->rs);
+        if (mon->mux_out) {
+            mon->mux_out = 0;
             monitor_resume(mon);
-            monitor_flush(mon);
-        } else {
-            qatomic_mb_set(&mon->suspend_cnt, 0);
         }
+        qemu_mutex_unlock(&mon->mon_lock);
         break;
 
     case CHR_EVENT_MUX_OUT:
-        if (mon->reset_seen) {
-            if (qatomic_mb_read(&mon->suspend_cnt) == 0) {
-                monitor_printf(mon, "\n");
+        qemu_mutex_lock(&mon->mon_lock);
+        if (!mon->mux_out) {
+            if (mon->reset_seen && !mon->suspend_cnt) {
+                monitor_puts_locked(mon, "\n");
+            } else {
+                monitor_flush_locked(mon);
             }
-            monitor_flush(mon);
             monitor_suspend(mon);
-        } else {
-            qatomic_inc(&mon->suspend_cnt);
+            mon->mux_out = 1;
         }
-        qemu_mutex_lock(&mon->mon_lock);
-        mon->mux_out = 1;
         qemu_mutex_unlock(&mon->mon_lock);
         break;
 
     case CHR_EVENT_OPENED:
         monitor_printf(mon, "QEMU %s monitor - type 'help' for more "
                        "information\n", QEMU_VERSION);
+        qemu_mutex_lock(&mon->mon_lock);
+        mon->reset_seen = 1;
         if (!mon->mux_out) {
-            readline_restart(hmp_mon->rs);
-            readline_show_prompt(hmp_mon->rs);
+            /* Suspend-resume forces the prompt to be printed.  */
+            monitor_suspend(mon);
+            monitor_resume(mon);
         }
-        mon->reset_seen = 1;
+        qemu_mutex_unlock(&mon->mon_lock);
         mon_refcount++;
         break;
 
@@ -1409,7 +1456,7 @@ static void monitor_event(void *opaque, QEMUChrEvent event)
  * These functions just adapt the readline interface in a typesafe way.  We
  * could cast function pointers but that discards compiler checks.
  */
-static void GCC_FMT_ATTR(2, 3) monitor_readline_printf(void *opaque,
+static void G_GNUC_PRINTF(2, 3) monitor_readline_printf(void *opaque,
                                                        const char *fmt, ...)
 {
     MonitorHMP *mon = opaque;
index 6d00985ace7a4456d4214199f112d45107b06f38..5269492cf055d66e1b3062bd0c819f0f63c5bd6b 100644 (file)
@@ -1,9 +1,11 @@
 qmp_ss.add(files('monitor.c', 'qmp.c', 'qmp-cmds-control.c'))
 
-softmmu_ss.add(files(
+system_ss.add(files(
+  'fds.c',
   'hmp-cmds.c',
   'hmp.c',
 ))
-softmmu_ss.add([spice_headers, files('qmp-cmds.c')])
+system_ss.add([spice_headers, files('qmp-cmds.c')])
 
-specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: [files('misc.c'), spice])
+specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
+               if_true: [files( 'hmp-cmds-target.c', 'hmp-target.c'), spice])
diff --git a/monitor/misc.c b/monitor/misc.c
deleted file mode 100644 (file)
index 398211a..0000000
+++ /dev/null
@@ -1,2185 +0,0 @@
-/*
- * QEMU monitor
- *
- * Copyright (c) 2003-2004 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "monitor-internal.h"
-#include "cpu.h"
-#include "monitor/qdev.h"
-#include "hw/usb.h"
-#include "hw/pci/pci.h"
-#include "sysemu/watchdog.h"
-#include "hw/loader.h"
-#include "exec/gdbstub.h"
-#include "net/net.h"
-#include "net/slirp.h"
-#include "ui/qemu-spice.h"
-#include "qemu/config-file.h"
-#include "qemu/ctype.h"
-#include "ui/console.h"
-#include "ui/input.h"
-#include "audio/audio.h"
-#include "disas/disas.h"
-#include "sysemu/balloon.h"
-#include "qemu/timer.h"
-#include "sysemu/hw_accel.h"
-#include "sysemu/runstate.h"
-#include "authz/list.h"
-#include "qapi/util.h"
-#include "sysemu/blockdev.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/tcg.h"
-#include "sysemu/tpm.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qerror.h"
-#include "qapi/qmp/qstring.h"
-#include "qom/object_interfaces.h"
-#include "trace/control.h"
-#include "monitor/hmp-target.h"
-#include "monitor/hmp.h"
-#ifdef CONFIG_TRACE_SIMPLE
-#include "trace/simple.h"
-#endif
-#include "exec/memory.h"
-#include "exec/exec-all.h"
-#include "qemu/option.h"
-#include "qemu/thread.h"
-#include "block/qapi.h"
-#include "block/block-hmp-cmds.h"
-#include "qapi/qapi-commands-char.h"
-#include "qapi/qapi-commands-control.h"
-#include "qapi/qapi-commands-migration.h"
-#include "qapi/qapi-commands-misc.h"
-#include "qapi/qapi-commands-qom.h"
-#include "qapi/qapi-commands-trace.h"
-#include "qapi/qapi-init-commands.h"
-#include "qapi/error.h"
-#include "qapi/qmp-event.h"
-#include "sysemu/cpus.h"
-#include "qemu/cutils.h"
-#include "tcg/tcg.h"
-
-#if defined(TARGET_S390X)
-#include "hw/s390x/storage-keys.h"
-#include "hw/s390x/storage-attributes.h"
-#endif
-
-/* file descriptors passed via SCM_RIGHTS */
-typedef struct mon_fd_t mon_fd_t;
-struct mon_fd_t {
-    char *name;
-    int fd;
-    QLIST_ENTRY(mon_fd_t) next;
-};
-
-/* file descriptor associated with a file descriptor set */
-typedef struct MonFdsetFd MonFdsetFd;
-struct MonFdsetFd {
-    int fd;
-    bool removed;
-    char *opaque;
-    QLIST_ENTRY(MonFdsetFd) next;
-};
-
-/* file descriptor set containing fds passed via SCM_RIGHTS */
-typedef struct MonFdset MonFdset;
-struct MonFdset {
-    int64_t id;
-    QLIST_HEAD(, MonFdsetFd) fds;
-    QLIST_HEAD(, MonFdsetFd) dup_fds;
-    QLIST_ENTRY(MonFdset) next;
-};
-
-/* Protects mon_fdsets */
-static QemuMutex mon_fdsets_lock;
-static QLIST_HEAD(, MonFdset) mon_fdsets;
-
-static HMPCommand hmp_info_cmds[];
-
-char *qmp_human_monitor_command(const char *command_line, bool has_cpu_index,
-                                int64_t cpu_index, Error **errp)
-{
-    char *output = NULL;
-    MonitorHMP hmp = {};
-
-    monitor_data_init(&hmp.common, false, true, false);
-
-    if (has_cpu_index) {
-        int ret = monitor_set_cpu(&hmp.common, cpu_index);
-        if (ret < 0) {
-            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
-                       "a CPU number");
-            goto out;
-        }
-    }
-
-    handle_hmp_command(&hmp, command_line);
-
-    WITH_QEMU_LOCK_GUARD(&hmp.common.mon_lock) {
-        if (qstring_get_length(hmp.common.outbuf) > 0) {
-            output = g_strdup(qstring_get_str(hmp.common.outbuf));
-        } else {
-            output = g_strdup("");
-        }
-    }
-
-out:
-    monitor_data_destroy(&hmp.common);
-    return output;
-}
-
-/**
- * Is @name in the '|' separated list of names @list?
- */
-int hmp_compare_cmd(const char *name, const char *list)
-{
-    const char *p, *pstart;
-    int len;
-    len = strlen(name);
-    p = list;
-    for (;;) {
-        pstart = p;
-        p = qemu_strchrnul(p, '|');
-        if ((p - pstart) == len && !memcmp(pstart, name, len)) {
-            return 1;
-        }
-        if (*p == '\0') {
-            break;
-        }
-        p++;
-    }
-    return 0;
-}
-
-static void do_help_cmd(Monitor *mon, const QDict *qdict)
-{
-    help_cmd(mon, qdict_get_try_str(qdict, "name"));
-}
-
-static void hmp_trace_event(Monitor *mon, const QDict *qdict)
-{
-    const char *tp_name = qdict_get_str(qdict, "name");
-    bool new_state = qdict_get_bool(qdict, "option");
-    bool has_vcpu = qdict_haskey(qdict, "vcpu");
-    int vcpu = qdict_get_try_int(qdict, "vcpu", 0);
-    Error *local_err = NULL;
-
-    if (vcpu < 0) {
-        monitor_printf(mon, "argument vcpu must be positive");
-        return;
-    }
-
-    qmp_trace_event_set_state(tp_name, new_state, true, true, has_vcpu, vcpu, &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-    }
-}
-
-#ifdef CONFIG_TRACE_SIMPLE
-static void hmp_trace_file(Monitor *mon, const QDict *qdict)
-{
-    const char *op = qdict_get_try_str(qdict, "op");
-    const char *arg = qdict_get_try_str(qdict, "arg");
-
-    if (!op) {
-        st_print_trace_file_status();
-    } else if (!strcmp(op, "on")) {
-        st_set_trace_file_enabled(true);
-    } else if (!strcmp(op, "off")) {
-        st_set_trace_file_enabled(false);
-    } else if (!strcmp(op, "flush")) {
-        st_flush_trace_buffer();
-    } else if (!strcmp(op, "set")) {
-        if (arg) {
-            st_set_trace_file(arg);
-        }
-    } else {
-        monitor_printf(mon, "unexpected argument \"%s\"\n", op);
-        help_cmd(mon, "trace-file");
-    }
-}
-#endif
-
-static void hmp_info_help(Monitor *mon, const QDict *qdict)
-{
-    help_cmd(mon, "info");
-}
-
-static void monitor_init_qmp_commands(void)
-{
-    /*
-     * Two command lists:
-     * - qmp_commands contains all QMP commands
-     * - qmp_cap_negotiation_commands contains just
-     *   "qmp_capabilities", to enforce capability negotiation
-     */
-
-    qmp_init_marshal(&qmp_commands);
-
-    qmp_register_command(&qmp_commands, "query-qmp-schema",
-                         qmp_query_qmp_schema, QCO_ALLOW_PRECONFIG);
-    qmp_register_command(&qmp_commands, "device_add", qmp_device_add,
-                         QCO_NO_OPTIONS);
-    qmp_register_command(&qmp_commands, "object-add", qmp_object_add,
-                         QCO_NO_OPTIONS);
-
-    QTAILQ_INIT(&qmp_cap_negotiation_commands);
-    qmp_register_command(&qmp_cap_negotiation_commands, "qmp_capabilities",
-                         qmp_marshal_qmp_capabilities, QCO_ALLOW_PRECONFIG);
-}
-
-/* Set the current CPU defined by the user. Callers must hold BQL. */
-int monitor_set_cpu(Monitor *mon, int cpu_index)
-{
-    CPUState *cpu;
-
-    cpu = qemu_get_cpu(cpu_index);
-    if (cpu == NULL) {
-        return -1;
-    }
-    g_free(mon->mon_cpu_path);
-    mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu));
-    return 0;
-}
-
-/* Callers must hold BQL. */
-static CPUState *mon_get_cpu_sync(Monitor *mon, bool synchronize)
-{
-    CPUState *cpu = NULL;
-
-    if (mon->mon_cpu_path) {
-        cpu = (CPUState *) object_resolve_path_type(mon->mon_cpu_path,
-                                                    TYPE_CPU, NULL);
-        if (!cpu) {
-            g_free(mon->mon_cpu_path);
-            mon->mon_cpu_path = NULL;
-        }
-    }
-    if (!mon->mon_cpu_path) {
-        if (!first_cpu) {
-            return NULL;
-        }
-        monitor_set_cpu(mon, first_cpu->cpu_index);
-        cpu = first_cpu;
-    }
-    assert(cpu != NULL);
-    if (synchronize) {
-        cpu_synchronize_state(cpu);
-    }
-    return cpu;
-}
-
-CPUState *mon_get_cpu(Monitor *mon)
-{
-    return mon_get_cpu_sync(mon, true);
-}
-
-CPUArchState *mon_get_cpu_env(Monitor *mon)
-{
-    CPUState *cs = mon_get_cpu(mon);
-
-    return cs ? cs->env_ptr : NULL;
-}
-
-int monitor_get_cpu_index(Monitor *mon)
-{
-    CPUState *cs = mon_get_cpu_sync(mon, false);
-
-    return cs ? cs->cpu_index : UNASSIGNED_CPU_INDEX;
-}
-
-static void hmp_info_registers(Monitor *mon, const QDict *qdict)
-{
-    bool all_cpus = qdict_get_try_bool(qdict, "cpustate_all", false);
-    CPUState *cs;
-
-    if (all_cpus) {
-        CPU_FOREACH(cs) {
-            monitor_printf(mon, "\nCPU#%d\n", cs->cpu_index);
-            cpu_dump_state(cs, NULL, CPU_DUMP_FPU);
-        }
-    } else {
-        cs = mon_get_cpu(mon);
-
-        if (!cs) {
-            monitor_printf(mon, "No CPU available\n");
-            return;
-        }
-
-        cpu_dump_state(cs, NULL, CPU_DUMP_FPU);
-    }
-}
-
-#ifdef CONFIG_TCG
-static void hmp_info_jit(Monitor *mon, const QDict *qdict)
-{
-    if (!tcg_enabled()) {
-        error_report("JIT information is only available with accel=tcg");
-        return;
-    }
-
-    dump_exec_info();
-    dump_drift_info();
-}
-
-static void hmp_info_opcount(Monitor *mon, const QDict *qdict)
-{
-    dump_opcount_info();
-}
-#endif
-
-static void hmp_info_sync_profile(Monitor *mon, const QDict *qdict)
-{
-    int64_t max = qdict_get_try_int(qdict, "max", 10);
-    bool mean = qdict_get_try_bool(qdict, "mean", false);
-    bool coalesce = !qdict_get_try_bool(qdict, "no_coalesce", false);
-    enum QSPSortBy sort_by;
-
-    sort_by = mean ? QSP_SORT_BY_AVG_WAIT_TIME : QSP_SORT_BY_TOTAL_WAIT_TIME;
-    qsp_report(max, sort_by, coalesce);
-}
-
-static void hmp_info_history(Monitor *mon, const QDict *qdict)
-{
-    MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
-    int i;
-    const char *str;
-
-    if (!hmp_mon->rs) {
-        return;
-    }
-    i = 0;
-    for(;;) {
-        str = readline_get_history(hmp_mon->rs, i);
-        if (!str) {
-            break;
-        }
-        monitor_printf(mon, "%d: '%s'\n", i, str);
-        i++;
-    }
-}
-
-static void hmp_info_cpustats(Monitor *mon, const QDict *qdict)
-{
-    CPUState *cs = mon_get_cpu(mon);
-
-    if (!cs) {
-        monitor_printf(mon, "No CPU available\n");
-        return;
-    }
-    cpu_dump_statistics(cs, 0);
-}
-
-static void hmp_info_trace_events(Monitor *mon, const QDict *qdict)
-{
-    const char *name = qdict_get_try_str(qdict, "name");
-    bool has_vcpu = qdict_haskey(qdict, "vcpu");
-    int vcpu = qdict_get_try_int(qdict, "vcpu", 0);
-    TraceEventInfoList *events;
-    TraceEventInfoList *elem;
-    Error *local_err = NULL;
-
-    if (name == NULL) {
-        name = "*";
-    }
-    if (vcpu < 0) {
-        monitor_printf(mon, "argument vcpu must be positive");
-        return;
-    }
-
-    events = qmp_trace_event_get_state(name, has_vcpu, vcpu, &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-        return;
-    }
-
-    for (elem = events; elem != NULL; elem = elem->next) {
-        monitor_printf(mon, "%s : state %u\n",
-                       elem->value->name,
-                       elem->value->state == TRACE_EVENT_STATE_ENABLED ? 1 : 0);
-    }
-    qapi_free_TraceEventInfoList(events);
-}
-
-void qmp_client_migrate_info(const char *protocol, const char *hostname,
-                             bool has_port, int64_t port,
-                             bool has_tls_port, int64_t tls_port,
-                             bool has_cert_subject, const char *cert_subject,
-                             Error **errp)
-{
-    if (strcmp(protocol, "spice") == 0) {
-        if (!qemu_using_spice(errp)) {
-            return;
-        }
-
-        if (!has_port && !has_tls_port) {
-            error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port");
-            return;
-        }
-
-        if (qemu_spice.migrate_info(hostname,
-                                    has_port ? port : -1,
-                                    has_tls_port ? tls_port : -1,
-                                    cert_subject)) {
-            error_setg(errp, QERR_UNDEFINED_ERROR);
-            return;
-        }
-        return;
-    }
-
-    error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "spice");
-}
-
-static void hmp_logfile(Monitor *mon, const QDict *qdict)
-{
-    Error *err = NULL;
-
-    qemu_set_log_filename(qdict_get_str(qdict, "filename"), &err);
-    if (err) {
-        error_report_err(err);
-    }
-}
-
-static void hmp_log(Monitor *mon, const QDict *qdict)
-{
-    int mask;
-    const char *items = qdict_get_str(qdict, "items");
-
-    if (!strcmp(items, "none")) {
-        mask = 0;
-    } else {
-        mask = qemu_str_to_log_mask(items);
-        if (!mask) {
-            help_cmd(mon, "log");
-            return;
-        }
-    }
-    qemu_set_log(mask);
-}
-
-static void hmp_singlestep(Monitor *mon, const QDict *qdict)
-{
-    const char *option = qdict_get_try_str(qdict, "option");
-    if (!option || !strcmp(option, "on")) {
-        singlestep = 1;
-    } else if (!strcmp(option, "off")) {
-        singlestep = 0;
-    } else {
-        monitor_printf(mon, "unexpected option %s\n", option);
-    }
-}
-
-static void hmp_gdbserver(Monitor *mon, const QDict *qdict)
-{
-    const char *device = qdict_get_try_str(qdict, "device");
-    if (!device)
-        device = "tcp::" DEFAULT_GDBSTUB_PORT;
-    if (gdbserver_start(device) < 0) {
-        monitor_printf(mon, "Could not open gdbserver on device '%s'\n",
-                       device);
-    } else if (strcmp(device, "none") == 0) {
-        monitor_printf(mon, "Disabled gdbserver\n");
-    } else {
-        monitor_printf(mon, "Waiting for gdb connection on device '%s'\n",
-                       device);
-    }
-}
-
-static void hmp_watchdog_action(Monitor *mon, const QDict *qdict)
-{
-    const char *action = qdict_get_str(qdict, "action");
-    if (select_watchdog_action(action) == -1) {
-        monitor_printf(mon, "Unknown watchdog action '%s'\n", action);
-    }
-}
-
-static void monitor_printc(Monitor *mon, int c)
-{
-    monitor_printf(mon, "'");
-    switch(c) {
-    case '\'':
-        monitor_printf(mon, "\\'");
-        break;
-    case '\\':
-        monitor_printf(mon, "\\\\");
-        break;
-    case '\n':
-        monitor_printf(mon, "\\n");
-        break;
-    case '\r':
-        monitor_printf(mon, "\\r");
-        break;
-    default:
-        if (c >= 32 && c <= 126) {
-            monitor_printf(mon, "%c", c);
-        } else {
-            monitor_printf(mon, "\\x%02x", c);
-        }
-        break;
-    }
-    monitor_printf(mon, "'");
-}
-
-static void memory_dump(Monitor *mon, int count, int format, int wsize,
-                        hwaddr addr, int is_physical)
-{
-    int l, line_size, i, max_digits, len;
-    uint8_t buf[16];
-    uint64_t v;
-    CPUState *cs = mon_get_cpu(mon);
-
-    if (!cs && (format == 'i' || !is_physical)) {
-        monitor_printf(mon, "Can not dump without CPU\n");
-        return;
-    }
-
-    if (format == 'i') {
-        monitor_disas(mon, cs, addr, count, is_physical);
-        return;
-    }
-
-    len = wsize * count;
-    if (wsize == 1)
-        line_size = 8;
-    else
-        line_size = 16;
-    max_digits = 0;
-
-    switch(format) {
-    case 'o':
-        max_digits = DIV_ROUND_UP(wsize * 8, 3);
-        break;
-    default:
-    case 'x':
-        max_digits = (wsize * 8) / 4;
-        break;
-    case 'u':
-    case 'd':
-        max_digits = DIV_ROUND_UP(wsize * 8 * 10, 33);
-        break;
-    case 'c':
-        wsize = 1;
-        break;
-    }
-
-    while (len > 0) {
-        if (is_physical)
-            monitor_printf(mon, TARGET_FMT_plx ":", addr);
-        else
-            monitor_printf(mon, TARGET_FMT_lx ":", (target_ulong)addr);
-        l = len;
-        if (l > line_size)
-            l = line_size;
-        if (is_physical) {
-            AddressSpace *as = cs ? cs->as : &address_space_memory;
-            MemTxResult r = address_space_read(as, addr,
-                                               MEMTXATTRS_UNSPECIFIED, buf, l);
-            if (r != MEMTX_OK) {
-                monitor_printf(mon, " Cannot access memory\n");
-                break;
-            }
-        } else {
-            if (cpu_memory_rw_debug(cs, addr, buf, l, 0) < 0) {
-                monitor_printf(mon, " Cannot access memory\n");
-                break;
-            }
-        }
-        i = 0;
-        while (i < l) {
-            switch(wsize) {
-            default:
-            case 1:
-                v = ldub_p(buf + i);
-                break;
-            case 2:
-                v = lduw_p(buf + i);
-                break;
-            case 4:
-                v = (uint32_t)ldl_p(buf + i);
-                break;
-            case 8:
-                v = ldq_p(buf + i);
-                break;
-            }
-            monitor_printf(mon, " ");
-            switch(format) {
-            case 'o':
-                monitor_printf(mon, "%#*" PRIo64, max_digits, v);
-                break;
-            case 'x':
-                monitor_printf(mon, "0x%0*" PRIx64, max_digits, v);
-                break;
-            case 'u':
-                monitor_printf(mon, "%*" PRIu64, max_digits, v);
-                break;
-            case 'd':
-                monitor_printf(mon, "%*" PRId64, max_digits, v);
-                break;
-            case 'c':
-                monitor_printc(mon, v);
-                break;
-            }
-            i += wsize;
-        }
-        monitor_printf(mon, "\n");
-        addr += l;
-        len -= l;
-    }
-}
-
-static void hmp_memory_dump(Monitor *mon, const QDict *qdict)
-{
-    int count = qdict_get_int(qdict, "count");
-    int format = qdict_get_int(qdict, "format");
-    int size = qdict_get_int(qdict, "size");
-    target_long addr = qdict_get_int(qdict, "addr");
-
-    memory_dump(mon, count, format, size, addr, 0);
-}
-
-static void hmp_physical_memory_dump(Monitor *mon, const QDict *qdict)
-{
-    int count = qdict_get_int(qdict, "count");
-    int format = qdict_get_int(qdict, "format");
-    int size = qdict_get_int(qdict, "size");
-    hwaddr addr = qdict_get_int(qdict, "addr");
-
-    memory_dump(mon, count, format, size, addr, 1);
-}
-
-static void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, Error **errp)
-{
-    MemoryRegionSection mrs = memory_region_find(get_system_memory(),
-                                                 addr, 1);
-
-    if (!mrs.mr) {
-        error_setg(errp, "No memory is mapped at address 0x%" HWADDR_PRIx, addr);
-        return NULL;
-    }
-
-    if (!memory_region_is_ram(mrs.mr) && !memory_region_is_romd(mrs.mr)) {
-        error_setg(errp, "Memory at address 0x%" HWADDR_PRIx "is not RAM", addr);
-        memory_region_unref(mrs.mr);
-        return NULL;
-    }
-
-    *p_mr = mrs.mr;
-    return qemu_map_ram_ptr(mrs.mr->ram_block, mrs.offset_within_region);
-}
-
-static void hmp_gpa2hva(Monitor *mon, const QDict *qdict)
-{
-    hwaddr addr = qdict_get_int(qdict, "addr");
-    Error *local_err = NULL;
-    MemoryRegion *mr = NULL;
-    void *ptr;
-
-    ptr = gpa2hva(&mr, addr, &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-        return;
-    }
-
-    monitor_printf(mon, "Host virtual address for 0x%" HWADDR_PRIx
-                   " (%s) is %p\n",
-                   addr, mr->name, ptr);
-
-    memory_region_unref(mr);
-}
-
-static void hmp_gva2gpa(Monitor *mon, const QDict *qdict)
-{
-    target_ulong addr = qdict_get_int(qdict, "addr");
-    MemTxAttrs attrs;
-    CPUState *cs = mon_get_cpu(mon);
-    hwaddr gpa;
-
-    if (!cs) {
-        monitor_printf(mon, "No cpu\n");
-        return;
-    }
-
-    gpa  = cpu_get_phys_page_attrs_debug(cs, addr & TARGET_PAGE_MASK, &attrs);
-    if (gpa == -1) {
-        monitor_printf(mon, "Unmapped\n");
-    } else {
-        monitor_printf(mon, "gpa: %#" HWADDR_PRIx "\n",
-                       gpa + (addr & ~TARGET_PAGE_MASK));
-    }
-}
-
-#ifdef CONFIG_LINUX
-static uint64_t vtop(void *ptr, Error **errp)
-{
-    uint64_t pinfo;
-    uint64_t ret = -1;
-    uintptr_t addr = (uintptr_t) ptr;
-    uintptr_t pagesize = qemu_real_host_page_size;
-    off_t offset = addr / pagesize * sizeof(pinfo);
-    int fd;
-
-    fd = open("/proc/self/pagemap", O_RDONLY);
-    if (fd == -1) {
-        error_setg_errno(errp, errno, "Cannot open /proc/self/pagemap");
-        return -1;
-    }
-
-    /* Force copy-on-write if necessary.  */
-    qatomic_add((uint8_t *)ptr, 0);
-
-    if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
-        error_setg_errno(errp, errno, "Cannot read pagemap");
-        goto out;
-    }
-    if ((pinfo & (1ull << 63)) == 0) {
-        error_setg(errp, "Page not present");
-        goto out;
-    }
-    ret = ((pinfo & 0x007fffffffffffffull) * pagesize) | (addr & (pagesize - 1));
-
-out:
-    close(fd);
-    return ret;
-}
-
-static void hmp_gpa2hpa(Monitor *mon, const QDict *qdict)
-{
-    hwaddr addr = qdict_get_int(qdict, "addr");
-    Error *local_err = NULL;
-    MemoryRegion *mr = NULL;
-    void *ptr;
-    uint64_t physaddr;
-
-    ptr = gpa2hva(&mr, addr, &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-        return;
-    }
-
-    physaddr = vtop(ptr, &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-    } else {
-        monitor_printf(mon, "Host physical address for 0x%" HWADDR_PRIx
-                       " (%s) is 0x%" PRIx64 "\n",
-                       addr, mr->name, (uint64_t) physaddr);
-    }
-
-    memory_region_unref(mr);
-}
-#endif
-
-static void do_print(Monitor *mon, const QDict *qdict)
-{
-    int format = qdict_get_int(qdict, "format");
-    hwaddr val = qdict_get_int(qdict, "val");
-
-    switch(format) {
-    case 'o':
-        monitor_printf(mon, "%#" HWADDR_PRIo, val);
-        break;
-    case 'x':
-        monitor_printf(mon, "%#" HWADDR_PRIx, val);
-        break;
-    case 'u':
-        monitor_printf(mon, "%" HWADDR_PRIu, val);
-        break;
-    default:
-    case 'd':
-        monitor_printf(mon, "%" HWADDR_PRId, val);
-        break;
-    case 'c':
-        monitor_printc(mon, val);
-        break;
-    }
-    monitor_printf(mon, "\n");
-}
-
-static void hmp_sum(Monitor *mon, const QDict *qdict)
-{
-    uint32_t addr;
-    uint16_t sum;
-    uint32_t start = qdict_get_int(qdict, "start");
-    uint32_t size = qdict_get_int(qdict, "size");
-
-    sum = 0;
-    for(addr = start; addr < (start + size); addr++) {
-        uint8_t val = address_space_ldub(&address_space_memory, addr,
-                                         MEMTXATTRS_UNSPECIFIED, NULL);
-        /* BSD sum algorithm ('sum' Unix command) */
-        sum = (sum >> 1) | (sum << 15);
-        sum += val;
-    }
-    monitor_printf(mon, "%05d\n", sum);
-}
-
-static int mouse_button_state;
-
-static void hmp_mouse_move(Monitor *mon, const QDict *qdict)
-{
-    int dx, dy, dz, button;
-    const char *dx_str = qdict_get_str(qdict, "dx_str");
-    const char *dy_str = qdict_get_str(qdict, "dy_str");
-    const char *dz_str = qdict_get_try_str(qdict, "dz_str");
-
-    dx = strtol(dx_str, NULL, 0);
-    dy = strtol(dy_str, NULL, 0);
-    qemu_input_queue_rel(NULL, INPUT_AXIS_X, dx);
-    qemu_input_queue_rel(NULL, INPUT_AXIS_Y, dy);
-
-    if (dz_str) {
-        dz = strtol(dz_str, NULL, 0);
-        if (dz != 0) {
-            button = (dz > 0) ? INPUT_BUTTON_WHEEL_UP : INPUT_BUTTON_WHEEL_DOWN;
-            qemu_input_queue_btn(NULL, button, true);
-            qemu_input_event_sync();
-            qemu_input_queue_btn(NULL, button, false);
-        }
-    }
-    qemu_input_event_sync();
-}
-
-static void hmp_mouse_button(Monitor *mon, const QDict *qdict)
-{
-    static uint32_t bmap[INPUT_BUTTON__MAX] = {
-        [INPUT_BUTTON_LEFT]       = MOUSE_EVENT_LBUTTON,
-        [INPUT_BUTTON_MIDDLE]     = MOUSE_EVENT_MBUTTON,
-        [INPUT_BUTTON_RIGHT]      = MOUSE_EVENT_RBUTTON,
-    };
-    int button_state = qdict_get_int(qdict, "button_state");
-
-    if (mouse_button_state == button_state) {
-        return;
-    }
-    qemu_input_update_buttons(NULL, bmap, mouse_button_state, button_state);
-    qemu_input_event_sync();
-    mouse_button_state = button_state;
-}
-
-static void hmp_ioport_read(Monitor *mon, const QDict *qdict)
-{
-    int size = qdict_get_int(qdict, "size");
-    int addr = qdict_get_int(qdict, "addr");
-    int has_index = qdict_haskey(qdict, "index");
-    uint32_t val;
-    int suffix;
-
-    if (has_index) {
-        int index = qdict_get_int(qdict, "index");
-        cpu_outb(addr & IOPORTS_MASK, index & 0xff);
-        addr++;
-    }
-    addr &= 0xffff;
-
-    switch(size) {
-    default:
-    case 1:
-        val = cpu_inb(addr);
-        suffix = 'b';
-        break;
-    case 2:
-        val = cpu_inw(addr);
-        suffix = 'w';
-        break;
-    case 4:
-        val = cpu_inl(addr);
-        suffix = 'l';
-        break;
-    }
-    monitor_printf(mon, "port%c[0x%04x] = %#0*x\n",
-                   suffix, addr, size * 2, val);
-}
-
-static void hmp_ioport_write(Monitor *mon, const QDict *qdict)
-{
-    int size = qdict_get_int(qdict, "size");
-    int addr = qdict_get_int(qdict, "addr");
-    int val = qdict_get_int(qdict, "val");
-
-    addr &= IOPORTS_MASK;
-
-    switch (size) {
-    default:
-    case 1:
-        cpu_outb(addr, val);
-        break;
-    case 2:
-        cpu_outw(addr, val);
-        break;
-    case 4:
-        cpu_outl(addr, val);
-        break;
-    }
-}
-
-static void hmp_boot_set(Monitor *mon, const QDict *qdict)
-{
-    Error *local_err = NULL;
-    const char *bootdevice = qdict_get_str(qdict, "bootdevice");
-
-    qemu_boot_set(bootdevice, &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-    } else {
-        monitor_printf(mon, "boot device list now set to %s\n", bootdevice);
-    }
-}
-
-static void hmp_info_mtree(Monitor *mon, const QDict *qdict)
-{
-    bool flatview = qdict_get_try_bool(qdict, "flatview", false);
-    bool dispatch_tree = qdict_get_try_bool(qdict, "dispatch_tree", false);
-    bool owner = qdict_get_try_bool(qdict, "owner", false);
-    bool disabled = qdict_get_try_bool(qdict, "disabled", false);
-
-    mtree_info(flatview, dispatch_tree, owner, disabled);
-}
-
-#ifdef CONFIG_PROFILER
-
-int64_t dev_time;
-
-static void hmp_info_profile(Monitor *mon, const QDict *qdict)
-{
-    static int64_t last_cpu_exec_time;
-    int64_t cpu_exec_time;
-    int64_t delta;
-
-    cpu_exec_time = tcg_cpu_exec_time();
-    delta = cpu_exec_time - last_cpu_exec_time;
-
-    monitor_printf(mon, "async time  %" PRId64 " (%0.3f)\n",
-                   dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
-    monitor_printf(mon, "qemu time   %" PRId64 " (%0.3f)\n",
-                   delta, delta / (double)NANOSECONDS_PER_SECOND);
-    last_cpu_exec_time = cpu_exec_time;
-    dev_time = 0;
-}
-#else
-static void hmp_info_profile(Monitor *mon, const QDict *qdict)
-{
-    monitor_printf(mon, "Internal profiler not compiled\n");
-}
-#endif
-
-/* Capture support */
-static QLIST_HEAD (capture_list_head, CaptureState) capture_head;
-
-static void hmp_info_capture(Monitor *mon, const QDict *qdict)
-{
-    int i;
-    CaptureState *s;
-
-    for (s = capture_head.lh_first, i = 0; s; s = s->entries.le_next, ++i) {
-        monitor_printf(mon, "[%d]: ", i);
-        s->ops.info (s->opaque);
-    }
-}
-
-static void hmp_stopcapture(Monitor *mon, const QDict *qdict)
-{
-    int i;
-    int n = qdict_get_int(qdict, "n");
-    CaptureState *s;
-
-    for (s = capture_head.lh_first, i = 0; s; s = s->entries.le_next, ++i) {
-        if (i == n) {
-            s->ops.destroy (s->opaque);
-            QLIST_REMOVE (s, entries);
-            g_free (s);
-            return;
-        }
-    }
-}
-
-static void hmp_wavcapture(Monitor *mon, const QDict *qdict)
-{
-    const char *path = qdict_get_str(qdict, "path");
-    int freq = qdict_get_try_int(qdict, "freq", 44100);
-    int bits = qdict_get_try_int(qdict, "bits", 16);
-    int nchannels = qdict_get_try_int(qdict, "nchannels", 2);
-    const char *audiodev = qdict_get_str(qdict, "audiodev");
-    CaptureState *s;
-    AudioState *as = audio_state_by_name(audiodev);
-
-    if (!as) {
-        monitor_printf(mon, "Audiodev '%s' not found\n", audiodev);
-        return;
-    }
-
-    s = g_malloc0 (sizeof (*s));
-
-    if (wav_start_capture(as, s, path, freq, bits, nchannels)) {
-        monitor_printf(mon, "Failed to add wave capture\n");
-        g_free (s);
-        return;
-    }
-    QLIST_INSERT_HEAD (&capture_head, s, entries);
-}
-
-static QAuthZList *find_auth(Monitor *mon, const char *name)
-{
-    Object *obj;
-    Object *container;
-
-    container = object_get_objects_root();
-    obj = object_resolve_path_component(container, name);
-    if (!obj) {
-        monitor_printf(mon, "acl: unknown list '%s'\n", name);
-        return NULL;
-    }
-
-    return QAUTHZ_LIST(obj);
-}
-
-static bool warn_acl;
-static void hmp_warn_acl(void)
-{
-    if (warn_acl) {
-        return;
-    }
-    error_report("The acl_show, acl_reset, acl_policy, acl_add, acl_remove "
-                 "commands are deprecated with no replacement. Authorization "
-                 "for VNC should be performed using the pluggable QAuthZ "
-                 "objects");
-    warn_acl = true;
-}
-
-static void hmp_acl_show(Monitor *mon, const QDict *qdict)
-{
-    const char *aclname = qdict_get_str(qdict, "aclname");
-    QAuthZList *auth = find_auth(mon, aclname);
-    QAuthZListRuleList *rules;
-    size_t i = 0;
-
-    hmp_warn_acl();
-
-    if (!auth) {
-        return;
-    }
-
-    monitor_printf(mon, "policy: %s\n",
-                   QAuthZListPolicy_str(auth->policy));
-
-    rules = auth->rules;
-    while (rules) {
-        QAuthZListRule *rule = rules->value;
-        i++;
-        monitor_printf(mon, "%zu: %s %s\n", i,
-                       QAuthZListPolicy_str(rule->policy),
-                       rule->match);
-        rules = rules->next;
-    }
-}
-
-static void hmp_acl_reset(Monitor *mon, const QDict *qdict)
-{
-    const char *aclname = qdict_get_str(qdict, "aclname");
-    QAuthZList *auth = find_auth(mon, aclname);
-
-    hmp_warn_acl();
-
-    if (!auth) {
-        return;
-    }
-
-    auth->policy = QAUTHZ_LIST_POLICY_DENY;
-    qapi_free_QAuthZListRuleList(auth->rules);
-    auth->rules = NULL;
-    monitor_printf(mon, "acl: removed all rules\n");
-}
-
-static void hmp_acl_policy(Monitor *mon, const QDict *qdict)
-{
-    const char *aclname = qdict_get_str(qdict, "aclname");
-    const char *policy = qdict_get_str(qdict, "policy");
-    QAuthZList *auth = find_auth(mon, aclname);
-    int val;
-    Error *err = NULL;
-
-    hmp_warn_acl();
-
-    if (!auth) {
-        return;
-    }
-
-    val = qapi_enum_parse(&QAuthZListPolicy_lookup,
-                          policy,
-                          QAUTHZ_LIST_POLICY_DENY,
-                          &err);
-    if (err) {
-        error_free(err);
-        monitor_printf(mon, "acl: unknown policy '%s', "
-                       "expected 'deny' or 'allow'\n", policy);
-    } else {
-        auth->policy = val;
-        if (auth->policy == QAUTHZ_LIST_POLICY_ALLOW) {
-            monitor_printf(mon, "acl: policy set to 'allow'\n");
-        } else {
-            monitor_printf(mon, "acl: policy set to 'deny'\n");
-        }
-    }
-}
-
-static QAuthZListFormat hmp_acl_get_format(const char *match)
-{
-    if (strchr(match, '*')) {
-        return QAUTHZ_LIST_FORMAT_GLOB;
-    } else {
-        return QAUTHZ_LIST_FORMAT_EXACT;
-    }
-}
-
-static void hmp_acl_add(Monitor *mon, const QDict *qdict)
-{
-    const char *aclname = qdict_get_str(qdict, "aclname");
-    const char *match = qdict_get_str(qdict, "match");
-    const char *policystr = qdict_get_str(qdict, "policy");
-    int has_index = qdict_haskey(qdict, "index");
-    int index = qdict_get_try_int(qdict, "index", -1);
-    QAuthZList *auth = find_auth(mon, aclname);
-    Error *err = NULL;
-    QAuthZListPolicy policy;
-    QAuthZListFormat format;
-    size_t i = 0;
-
-    hmp_warn_acl();
-
-    if (!auth) {
-        return;
-    }
-
-    policy = qapi_enum_parse(&QAuthZListPolicy_lookup,
-                             policystr,
-                             QAUTHZ_LIST_POLICY_DENY,
-                             &err);
-    if (err) {
-        error_free(err);
-        monitor_printf(mon, "acl: unknown policy '%s', "
-                       "expected 'deny' or 'allow'\n", policystr);
-        return;
-    }
-
-    format = hmp_acl_get_format(match);
-
-    if (has_index && index == 0) {
-        monitor_printf(mon, "acl: unable to add acl entry\n");
-        return;
-    }
-
-    if (has_index) {
-        i = qauthz_list_insert_rule(auth, match, policy,
-                                    format, index - 1, &err);
-    } else {
-        i = qauthz_list_append_rule(auth, match, policy,
-                                    format, &err);
-    }
-    if (err) {
-        monitor_printf(mon, "acl: unable to add rule: %s",
-                       error_get_pretty(err));
-        error_free(err);
-    } else {
-        monitor_printf(mon, "acl: added rule at position %zu\n", i + 1);
-    }
-}
-
-static void hmp_acl_remove(Monitor *mon, const QDict *qdict)
-{
-    const char *aclname = qdict_get_str(qdict, "aclname");
-    const char *match = qdict_get_str(qdict, "match");
-    QAuthZList *auth = find_auth(mon, aclname);
-    ssize_t i = 0;
-
-    hmp_warn_acl();
-
-    if (!auth) {
-        return;
-    }
-
-    i = qauthz_list_delete_rule(auth, match);
-    if (i >= 0) {
-        monitor_printf(mon, "acl: removed rule at position %zu\n", i + 1);
-    } else {
-        monitor_printf(mon, "acl: no matching acl entry\n");
-    }
-}
-
-void qmp_getfd(const char *fdname, Error **errp)
-{
-    Monitor *cur_mon = monitor_cur();
-    mon_fd_t *monfd;
-    int fd, tmp_fd;
-
-    fd = qemu_chr_fe_get_msgfd(&cur_mon->chr);
-    if (fd == -1) {
-        error_setg(errp, QERR_FD_NOT_SUPPLIED);
-        return;
-    }
-
-    if (qemu_isdigit(fdname[0])) {
-        close(fd);
-        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "fdname",
-                   "a name not starting with a digit");
-        return;
-    }
-
-    QEMU_LOCK_GUARD(&cur_mon->mon_lock);
-    QLIST_FOREACH(monfd, &cur_mon->fds, next) {
-        if (strcmp(monfd->name, fdname) != 0) {
-            continue;
-        }
-
-        tmp_fd = monfd->fd;
-        monfd->fd = fd;
-        /* Make sure close() is outside critical section */
-        close(tmp_fd);
-        return;
-    }
-
-    monfd = g_malloc0(sizeof(mon_fd_t));
-    monfd->name = g_strdup(fdname);
-    monfd->fd = fd;
-
-    QLIST_INSERT_HEAD(&cur_mon->fds, monfd, next);
-}
-
-void qmp_closefd(const char *fdname, Error **errp)
-{
-    Monitor *cur_mon = monitor_cur();
-    mon_fd_t *monfd;
-    int tmp_fd;
-
-    qemu_mutex_lock(&cur_mon->mon_lock);
-    QLIST_FOREACH(monfd, &cur_mon->fds, next) {
-        if (strcmp(monfd->name, fdname) != 0) {
-            continue;
-        }
-
-        QLIST_REMOVE(monfd, next);
-        tmp_fd = monfd->fd;
-        g_free(monfd->name);
-        g_free(monfd);
-        qemu_mutex_unlock(&cur_mon->mon_lock);
-        /* Make sure close() is outside critical section */
-        close(tmp_fd);
-        return;
-    }
-
-    qemu_mutex_unlock(&cur_mon->mon_lock);
-    error_setg(errp, QERR_FD_NOT_FOUND, fdname);
-}
-
-int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp)
-{
-    mon_fd_t *monfd;
-
-    QEMU_LOCK_GUARD(&mon->mon_lock);
-    QLIST_FOREACH(monfd, &mon->fds, next) {
-        int fd;
-
-        if (strcmp(monfd->name, fdname) != 0) {
-            continue;
-        }
-
-        fd = monfd->fd;
-
-        /* caller takes ownership of fd */
-        QLIST_REMOVE(monfd, next);
-        g_free(monfd->name);
-        g_free(monfd);
-
-        return fd;
-    }
-
-    error_setg(errp, "File descriptor named '%s' has not been found", fdname);
-    return -1;
-}
-
-static void monitor_fdset_cleanup(MonFdset *mon_fdset)
-{
-    MonFdsetFd *mon_fdset_fd;
-    MonFdsetFd *mon_fdset_fd_next;
-
-    QLIST_FOREACH_SAFE(mon_fdset_fd, &mon_fdset->fds, next, mon_fdset_fd_next) {
-        if ((mon_fdset_fd->removed ||
-                (QLIST_EMPTY(&mon_fdset->dup_fds) && mon_refcount == 0)) &&
-                runstate_is_running()) {
-            close(mon_fdset_fd->fd);
-            g_free(mon_fdset_fd->opaque);
-            QLIST_REMOVE(mon_fdset_fd, next);
-            g_free(mon_fdset_fd);
-        }
-    }
-
-    if (QLIST_EMPTY(&mon_fdset->fds) && QLIST_EMPTY(&mon_fdset->dup_fds)) {
-        QLIST_REMOVE(mon_fdset, next);
-        g_free(mon_fdset);
-    }
-}
-
-void monitor_fdsets_cleanup(void)
-{
-    MonFdset *mon_fdset;
-    MonFdset *mon_fdset_next;
-
-    QEMU_LOCK_GUARD(&mon_fdsets_lock);
-    QLIST_FOREACH_SAFE(mon_fdset, &mon_fdsets, next, mon_fdset_next) {
-        monitor_fdset_cleanup(mon_fdset);
-    }
-}
-
-AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque,
-                      const char *opaque, Error **errp)
-{
-    int fd;
-    Monitor *mon = monitor_cur();
-    AddfdInfo *fdinfo;
-
-    fd = qemu_chr_fe_get_msgfd(&mon->chr);
-    if (fd == -1) {
-        error_setg(errp, QERR_FD_NOT_SUPPLIED);
-        goto error;
-    }
-
-    fdinfo = monitor_fdset_add_fd(fd, has_fdset_id, fdset_id,
-                                  has_opaque, opaque, errp);
-    if (fdinfo) {
-        return fdinfo;
-    }
-
-error:
-    if (fd != -1) {
-        close(fd);
-    }
-    return NULL;
-}
-
-void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t fd, Error **errp)
-{
-    MonFdset *mon_fdset;
-    MonFdsetFd *mon_fdset_fd;
-    char fd_str[60];
-
-    QEMU_LOCK_GUARD(&mon_fdsets_lock);
-    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-        if (mon_fdset->id != fdset_id) {
-            continue;
-        }
-        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
-            if (has_fd) {
-                if (mon_fdset_fd->fd != fd) {
-                    continue;
-                }
-                mon_fdset_fd->removed = true;
-                break;
-            } else {
-                mon_fdset_fd->removed = true;
-            }
-        }
-        if (has_fd && !mon_fdset_fd) {
-            goto error;
-        }
-        monitor_fdset_cleanup(mon_fdset);
-        return;
-    }
-
-error:
-    if (has_fd) {
-        snprintf(fd_str, sizeof(fd_str), "fdset-id:%" PRId64 ", fd:%" PRId64,
-                 fdset_id, fd);
-    } else {
-        snprintf(fd_str, sizeof(fd_str), "fdset-id:%" PRId64, fdset_id);
-    }
-    error_setg(errp, QERR_FD_NOT_FOUND, fd_str);
-}
-
-FdsetInfoList *qmp_query_fdsets(Error **errp)
-{
-    MonFdset *mon_fdset;
-    MonFdsetFd *mon_fdset_fd;
-    FdsetInfoList *fdset_list = NULL;
-
-    QEMU_LOCK_GUARD(&mon_fdsets_lock);
-    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-        FdsetInfoList *fdset_info = g_malloc0(sizeof(*fdset_info));
-        FdsetFdInfoList *fdsetfd_list = NULL;
-
-        fdset_info->value = g_malloc0(sizeof(*fdset_info->value));
-        fdset_info->value->fdset_id = mon_fdset->id;
-
-        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
-            FdsetFdInfoList *fdsetfd_info;
-
-            fdsetfd_info = g_malloc0(sizeof(*fdsetfd_info));
-            fdsetfd_info->value = g_malloc0(sizeof(*fdsetfd_info->value));
-            fdsetfd_info->value->fd = mon_fdset_fd->fd;
-            if (mon_fdset_fd->opaque) {
-                fdsetfd_info->value->has_opaque = true;
-                fdsetfd_info->value->opaque = g_strdup(mon_fdset_fd->opaque);
-            } else {
-                fdsetfd_info->value->has_opaque = false;
-            }
-
-            fdsetfd_info->next = fdsetfd_list;
-            fdsetfd_list = fdsetfd_info;
-        }
-
-        fdset_info->value->fds = fdsetfd_list;
-
-        fdset_info->next = fdset_list;
-        fdset_list = fdset_info;
-    }
-
-    return fdset_list;
-}
-
-AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
-                                bool has_opaque, const char *opaque,
-                                Error **errp)
-{
-    MonFdset *mon_fdset = NULL;
-    MonFdsetFd *mon_fdset_fd;
-    AddfdInfo *fdinfo;
-
-    QEMU_LOCK_GUARD(&mon_fdsets_lock);
-    if (has_fdset_id) {
-        QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-            /* Break if match found or match impossible due to ordering by ID */
-            if (fdset_id <= mon_fdset->id) {
-                if (fdset_id < mon_fdset->id) {
-                    mon_fdset = NULL;
-                }
-                break;
-            }
-        }
-    }
-
-    if (mon_fdset == NULL) {
-        int64_t fdset_id_prev = -1;
-        MonFdset *mon_fdset_cur = QLIST_FIRST(&mon_fdsets);
-
-        if (has_fdset_id) {
-            if (fdset_id < 0) {
-                error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id",
-                           "a non-negative value");
-                return NULL;
-            }
-            /* Use specified fdset ID */
-            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-                mon_fdset_cur = mon_fdset;
-                if (fdset_id < mon_fdset_cur->id) {
-                    break;
-                }
-            }
-        } else {
-            /* Use first available fdset ID */
-            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-                mon_fdset_cur = mon_fdset;
-                if (fdset_id_prev == mon_fdset_cur->id - 1) {
-                    fdset_id_prev = mon_fdset_cur->id;
-                    continue;
-                }
-                break;
-            }
-        }
-
-        mon_fdset = g_malloc0(sizeof(*mon_fdset));
-        if (has_fdset_id) {
-            mon_fdset->id = fdset_id;
-        } else {
-            mon_fdset->id = fdset_id_prev + 1;
-        }
-
-        /* The fdset list is ordered by fdset ID */
-        if (!mon_fdset_cur) {
-            QLIST_INSERT_HEAD(&mon_fdsets, mon_fdset, next);
-        } else if (mon_fdset->id < mon_fdset_cur->id) {
-            QLIST_INSERT_BEFORE(mon_fdset_cur, mon_fdset, next);
-        } else {
-            QLIST_INSERT_AFTER(mon_fdset_cur, mon_fdset, next);
-        }
-    }
-
-    mon_fdset_fd = g_malloc0(sizeof(*mon_fdset_fd));
-    mon_fdset_fd->fd = fd;
-    mon_fdset_fd->removed = false;
-    if (has_opaque) {
-        mon_fdset_fd->opaque = g_strdup(opaque);
-    }
-    QLIST_INSERT_HEAD(&mon_fdset->fds, mon_fdset_fd, next);
-
-    fdinfo = g_malloc0(sizeof(*fdinfo));
-    fdinfo->fdset_id = mon_fdset->id;
-    fdinfo->fd = mon_fdset_fd->fd;
-
-    return fdinfo;
-}
-
-int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags)
-{
-#ifdef _WIN32
-    return -ENOENT;
-#else
-    MonFdset *mon_fdset;
-
-    QEMU_LOCK_GUARD(&mon_fdsets_lock);
-    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-        MonFdsetFd *mon_fdset_fd;
-        MonFdsetFd *mon_fdset_fd_dup;
-        int fd = -1;
-        int dup_fd;
-        int mon_fd_flags;
-
-        if (mon_fdset->id != fdset_id) {
-            continue;
-        }
-
-        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
-            mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL);
-            if (mon_fd_flags == -1) {
-                return -1;
-            }
-
-            if ((flags & O_ACCMODE) == (mon_fd_flags & O_ACCMODE)) {
-                fd = mon_fdset_fd->fd;
-                break;
-            }
-        }
-
-        if (fd == -1) {
-            errno = EACCES;
-            return -1;
-        }
-
-        dup_fd = qemu_dup_flags(fd, flags);
-        if (dup_fd == -1) {
-            return -1;
-        }
-
-        mon_fdset_fd_dup = g_malloc0(sizeof(*mon_fdset_fd_dup));
-        mon_fdset_fd_dup->fd = dup_fd;
-        QLIST_INSERT_HEAD(&mon_fdset->dup_fds, mon_fdset_fd_dup, next);
-        return dup_fd;
-    }
-
-    errno = ENOENT;
-    return -1;
-#endif
-}
-
-static int64_t monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove)
-{
-    MonFdset *mon_fdset;
-    MonFdsetFd *mon_fdset_fd_dup;
-
-    QEMU_LOCK_GUARD(&mon_fdsets_lock);
-    QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-        QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) {
-            if (mon_fdset_fd_dup->fd == dup_fd) {
-                if (remove) {
-                    QLIST_REMOVE(mon_fdset_fd_dup, next);
-                    g_free(mon_fdset_fd_dup);
-                    if (QLIST_EMPTY(&mon_fdset->dup_fds)) {
-                        monitor_fdset_cleanup(mon_fdset);
-                    }
-                    return -1;
-                } else {
-                    return mon_fdset->id;
-                }
-            }
-        }
-    }
-
-    return -1;
-}
-
-int64_t monitor_fdset_dup_fd_find(int dup_fd)
-{
-    return monitor_fdset_dup_fd_find_remove(dup_fd, false);
-}
-
-void monitor_fdset_dup_fd_remove(int dup_fd)
-{
-    monitor_fdset_dup_fd_find_remove(dup_fd, true);
-}
-
-int monitor_fd_param(Monitor *mon, const char *fdname, Error **errp)
-{
-    int fd;
-    Error *local_err = NULL;
-
-    if (!qemu_isdigit(fdname[0]) && mon) {
-        fd = monitor_get_fd(mon, fdname, &local_err);
-    } else {
-        fd = qemu_parse_fd(fdname);
-        if (fd == -1) {
-            error_setg(&local_err, "Invalid file descriptor number '%s'",
-                       fdname);
-        }
-    }
-    if (local_err) {
-        error_propagate(errp, local_err);
-        assert(fd == -1);
-    } else {
-        assert(fd != -1);
-    }
-
-    return fd;
-}
-
-/* Please update hmp-commands.hx when adding or changing commands */
-static HMPCommand hmp_info_cmds[] = {
-#include "hmp-commands-info.h"
-    { NULL, NULL, },
-};
-
-/* hmp_cmds and hmp_info_cmds would be sorted at runtime */
-HMPCommand hmp_cmds[] = {
-#include "hmp-commands.h"
-    { NULL, NULL, },
-};
-
-/*
- * Set @pval to the value in the register identified by @name.
- * return 0 if OK, -1 if not found
- */
-int get_monitor_def(Monitor *mon, int64_t *pval, const char *name)
-{
-    const MonitorDef *md = target_monitor_defs();
-    CPUState *cs = mon_get_cpu(mon);
-    void *ptr;
-    uint64_t tmp = 0;
-    int ret;
-
-    if (cs == NULL || md == NULL) {
-        return -1;
-    }
-
-    for(; md->name != NULL; md++) {
-        if (hmp_compare_cmd(name, md->name)) {
-            if (md->get_value) {
-                *pval = md->get_value(mon, md, md->offset);
-            } else {
-                CPUArchState *env = mon_get_cpu_env(mon);
-                ptr = (uint8_t *)env + md->offset;
-                switch(md->type) {
-                case MD_I32:
-                    *pval = *(int32_t *)ptr;
-                    break;
-                case MD_TLONG:
-                    *pval = *(target_long *)ptr;
-                    break;
-                default:
-                    *pval = 0;
-                    break;
-                }
-            }
-            return 0;
-        }
-    }
-
-    ret = target_get_monitor_def(cs, name, &tmp);
-    if (!ret) {
-        *pval = (target_long) tmp;
-    }
-
-    return ret;
-}
-
-static void add_completion_option(ReadLineState *rs, const char *str,
-                                  const char *option)
-{
-    if (!str || !option) {
-        return;
-    }
-    if (!strncmp(option, str, strlen(str))) {
-        readline_add_completion(rs, option);
-    }
-}
-
-void chardev_add_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    size_t len;
-    ChardevBackendInfoList *list, *start;
-
-    if (nb_args != 2) {
-        return;
-    }
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-
-    start = list = qmp_query_chardev_backends(NULL);
-    while (list) {
-        const char *chr_name = list->value->name;
-
-        if (!strncmp(chr_name, str, len)) {
-            readline_add_completion(rs, chr_name);
-        }
-        list = list->next;
-    }
-    qapi_free_ChardevBackendInfoList(start);
-}
-
-void netdev_add_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    size_t len;
-    int i;
-
-    if (nb_args != 2) {
-        return;
-    }
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-    for (i = 0; i < NET_CLIENT_DRIVER__MAX; i++) {
-        add_completion_option(rs, str, NetClientDriver_str(i));
-    }
-}
-
-void device_add_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    GSList *list, *elt;
-    size_t len;
-
-    if (nb_args != 2) {
-        return;
-    }
-
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-    list = elt = object_class_get_list(TYPE_DEVICE, false);
-    while (elt) {
-        const char *name;
-        DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
-                                             TYPE_DEVICE);
-        name = object_class_get_name(OBJECT_CLASS(dc));
-
-        if (dc->user_creatable
-            && !strncmp(name, str, len)) {
-            readline_add_completion(rs, name);
-        }
-        elt = elt->next;
-    }
-    g_slist_free(list);
-}
-
-void object_add_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    GSList *list, *elt;
-    size_t len;
-
-    if (nb_args != 2) {
-        return;
-    }
-
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-    list = elt = object_class_get_list(TYPE_USER_CREATABLE, false);
-    while (elt) {
-        const char *name;
-
-        name = object_class_get_name(OBJECT_CLASS(elt->data));
-        if (!strncmp(name, str, len) && strcmp(name, TYPE_USER_CREATABLE)) {
-            readline_add_completion(rs, name);
-        }
-        elt = elt->next;
-    }
-    g_slist_free(list);
-}
-
-static int qdev_add_hotpluggable_device(Object *obj, void *opaque)
-{
-    GSList **list = opaque;
-    DeviceState *dev = (DeviceState *)object_dynamic_cast(obj, TYPE_DEVICE);
-
-    if (dev == NULL) {
-        return 0;
-    }
-
-    if (dev->realized && object_property_get_bool(obj, "hotpluggable", NULL)) {
-        *list = g_slist_append(*list, dev);
-    }
-
-    return 0;
-}
-
-static GSList *qdev_build_hotpluggable_device_list(Object *peripheral)
-{
-    GSList *list = NULL;
-
-    object_child_foreach(peripheral, qdev_add_hotpluggable_device, &list);
-
-    return list;
-}
-
-static void peripheral_device_del_completion(ReadLineState *rs,
-                                             const char *str, size_t len)
-{
-    Object *peripheral = container_get(qdev_get_machine(), "/peripheral");
-    GSList *list, *item;
-
-    list = qdev_build_hotpluggable_device_list(peripheral);
-    if (!list) {
-        return;
-    }
-
-    for (item = list; item; item = g_slist_next(item)) {
-        DeviceState *dev = item->data;
-
-        if (dev->id && !strncmp(str, dev->id, len)) {
-            readline_add_completion(rs, dev->id);
-        }
-    }
-
-    g_slist_free(list);
-}
-
-void chardev_remove_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    size_t len;
-    ChardevInfoList *list, *start;
-
-    if (nb_args != 2) {
-        return;
-    }
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-
-    start = list = qmp_query_chardev(NULL);
-    while (list) {
-        ChardevInfo *chr = list->value;
-
-        if (!strncmp(chr->label, str, len)) {
-            readline_add_completion(rs, chr->label);
-        }
-        list = list->next;
-    }
-    qapi_free_ChardevInfoList(start);
-}
-
-static void ringbuf_completion(ReadLineState *rs, const char *str)
-{
-    size_t len;
-    ChardevInfoList *list, *start;
-
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-
-    start = list = qmp_query_chardev(NULL);
-    while (list) {
-        ChardevInfo *chr_info = list->value;
-
-        if (!strncmp(chr_info->label, str, len)) {
-            Chardev *chr = qemu_chr_find(chr_info->label);
-            if (chr && CHARDEV_IS_RINGBUF(chr)) {
-                readline_add_completion(rs, chr_info->label);
-            }
-        }
-        list = list->next;
-    }
-    qapi_free_ChardevInfoList(start);
-}
-
-void ringbuf_write_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    if (nb_args != 2) {
-        return;
-    }
-    ringbuf_completion(rs, str);
-}
-
-void device_del_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    size_t len;
-
-    if (nb_args != 2) {
-        return;
-    }
-
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-    peripheral_device_del_completion(rs, str, len);
-}
-
-void object_del_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    ObjectPropertyInfoList *list, *start;
-    size_t len;
-
-    if (nb_args != 2) {
-        return;
-    }
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-
-    start = list = qmp_qom_list("/objects", NULL);
-    while (list) {
-        ObjectPropertyInfo *info = list->value;
-
-        if (!strncmp(info->type, "child<", 5)
-            && !strncmp(info->name, str, len)) {
-            readline_add_completion(rs, info->name);
-        }
-        list = list->next;
-    }
-    qapi_free_ObjectPropertyInfoList(start);
-}
-
-void sendkey_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    int i;
-    char *sep;
-    size_t len;
-
-    if (nb_args != 2) {
-        return;
-    }
-    sep = strrchr(str, '-');
-    if (sep) {
-        str = sep + 1;
-    }
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-    for (i = 0; i < Q_KEY_CODE__MAX; i++) {
-        if (!strncmp(str, QKeyCode_str(i), len)) {
-            readline_add_completion(rs, QKeyCode_str(i));
-        }
-    }
-}
-
-void set_link_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    size_t len;
-
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-    if (nb_args == 2) {
-        NetClientState *ncs[MAX_QUEUE_NUM];
-        int count, i;
-        count = qemu_find_net_clients_except(NULL, ncs,
-                                             NET_CLIENT_DRIVER_NONE,
-                                             MAX_QUEUE_NUM);
-        for (i = 0; i < MIN(count, MAX_QUEUE_NUM); i++) {
-            const char *name = ncs[i]->name;
-            if (!strncmp(str, name, len)) {
-                readline_add_completion(rs, name);
-            }
-        }
-    } else if (nb_args == 3) {
-        add_completion_option(rs, str, "on");
-        add_completion_option(rs, str, "off");
-    }
-}
-
-void netdev_del_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    int len, count, i;
-    NetClientState *ncs[MAX_QUEUE_NUM];
-
-    if (nb_args != 2) {
-        return;
-    }
-
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-    count = qemu_find_net_clients_except(NULL, ncs, NET_CLIENT_DRIVER_NIC,
-                                         MAX_QUEUE_NUM);
-    for (i = 0; i < MIN(count, MAX_QUEUE_NUM); i++) {
-        const char *name = ncs[i]->name;
-        if (strncmp(str, name, len)) {
-            continue;
-        }
-        if (ncs[i]->is_netdev) {
-            readline_add_completion(rs, name);
-        }
-    }
-}
-
-void info_trace_events_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    size_t len;
-
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-    if (nb_args == 2) {
-        TraceEventIter iter;
-        TraceEvent *ev;
-        char *pattern = g_strdup_printf("%s*", str);
-        trace_event_iter_init(&iter, pattern);
-        while ((ev = trace_event_iter_next(&iter)) != NULL) {
-            readline_add_completion(rs, trace_event_get_name(ev));
-        }
-        g_free(pattern);
-    }
-}
-
-void trace_event_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    size_t len;
-
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-    if (nb_args == 2) {
-        TraceEventIter iter;
-        TraceEvent *ev;
-        char *pattern = g_strdup_printf("%s*", str);
-        trace_event_iter_init(&iter, pattern);
-        while ((ev = trace_event_iter_next(&iter)) != NULL) {
-            readline_add_completion(rs, trace_event_get_name(ev));
-        }
-        g_free(pattern);
-    } else if (nb_args == 3) {
-        add_completion_option(rs, str, "on");
-        add_completion_option(rs, str, "off");
-    }
-}
-
-void watchdog_action_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    int i;
-
-    if (nb_args != 2) {
-        return;
-    }
-    readline_set_completion_index(rs, strlen(str));
-    for (i = 0; i < WATCHDOG_ACTION__MAX; i++) {
-        add_completion_option(rs, str, WatchdogAction_str(i));
-    }
-}
-
-void migrate_set_capability_completion(ReadLineState *rs, int nb_args,
-                                       const char *str)
-{
-    size_t len;
-
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-    if (nb_args == 2) {
-        int i;
-        for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
-            const char *name = MigrationCapability_str(i);
-            if (!strncmp(str, name, len)) {
-                readline_add_completion(rs, name);
-            }
-        }
-    } else if (nb_args == 3) {
-        add_completion_option(rs, str, "on");
-        add_completion_option(rs, str, "off");
-    }
-}
-
-void migrate_set_parameter_completion(ReadLineState *rs, int nb_args,
-                                      const char *str)
-{
-    size_t len;
-
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-    if (nb_args == 2) {
-        int i;
-        for (i = 0; i < MIGRATION_PARAMETER__MAX; i++) {
-            const char *name = MigrationParameter_str(i);
-            if (!strncmp(str, name, len)) {
-                readline_add_completion(rs, name);
-            }
-        }
-    }
-}
-
-static void vm_completion(ReadLineState *rs, const char *str)
-{
-    size_t len;
-    BlockDriverState *bs;
-    BdrvNextIterator it;
-
-    len = strlen(str);
-    readline_set_completion_index(rs, len);
-
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
-        SnapshotInfoList *snapshots, *snapshot;
-        AioContext *ctx = bdrv_get_aio_context(bs);
-        bool ok = false;
-
-        aio_context_acquire(ctx);
-        if (bdrv_can_snapshot(bs)) {
-            ok = bdrv_query_snapshot_info_list(bs, &snapshots, NULL) == 0;
-        }
-        aio_context_release(ctx);
-        if (!ok) {
-            continue;
-        }
-
-        snapshot = snapshots;
-        while (snapshot) {
-            char *completion = snapshot->value->name;
-            if (!strncmp(str, completion, len)) {
-                readline_add_completion(rs, completion);
-            }
-            completion = snapshot->value->id;
-            if (!strncmp(str, completion, len)) {
-                readline_add_completion(rs, completion);
-            }
-            snapshot = snapshot->next;
-        }
-        qapi_free_SnapshotInfoList(snapshots);
-    }
-
-}
-
-void delvm_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    if (nb_args == 2) {
-        vm_completion(rs, str);
-    }
-}
-
-void loadvm_completion(ReadLineState *rs, int nb_args, const char *str)
-{
-    if (nb_args == 2) {
-        vm_completion(rs, str);
-    }
-}
-
-static int
-compare_mon_cmd(const void *a, const void *b)
-{
-    return strcmp(((const HMPCommand *)a)->name,
-            ((const HMPCommand *)b)->name);
-}
-
-static void sortcmdlist(void)
-{
-    qsort(hmp_cmds, ARRAY_SIZE(hmp_cmds) - 1,
-          sizeof(*hmp_cmds),
-          compare_mon_cmd);
-    qsort(hmp_info_cmds, ARRAY_SIZE(hmp_info_cmds) - 1,
-          sizeof(*hmp_info_cmds),
-          compare_mon_cmd);
-}
-
-void monitor_init_globals(void)
-{
-    monitor_init_globals_core();
-    monitor_init_qmp_commands();
-    sortcmdlist();
-    qemu_mutex_init(&mon_fdsets_lock);
-}
index a6131554da37cb49170fb9778e94337207e8cb93..252de856812f41eb45a88866240950861b943822 100644 (file)
@@ -63,7 +63,8 @@
  * '.'          other form of optional type (for 'i' and 'l')
  * 'b'          boolean
  *              user mode accepts "on" or "off"
- * '-'          optional parameter (eg. '-f')
+ * '-'          optional parameter (eg. '-f'); if followed by a 's', it
+ *              specifies an optional string param (e.g. '-fs' allows '-f foo')
  *
  */
 
@@ -74,6 +75,13 @@ typedef struct HMPCommand {
     const char *help;
     const char *flags; /* p=preconfig */
     void (*cmd)(Monitor *mon, const QDict *qdict);
+    /*
+     * If implementing a command that takes no arguments and simply
+     * prints formatted data, then leave @cmd NULL, and then set
+     * @cmd_info_hrt to the corresponding QMP handler that returns
+     * the formatted text.
+     */
+    HumanReadableText *(*cmd_info_hrt)(Error **errp);
     bool coroutine;
     /*
      * @sub_table is a list of 2nd level of commands. If it does not exist,
@@ -86,7 +94,6 @@ typedef struct HMPCommand {
 
 struct Monitor {
     CharBackend chr;
-    int reset_seen;
     int suspend_cnt;            /* Needs to be accessed atomically */
     bool is_qmp;
     bool skip_flush;
@@ -105,10 +112,10 @@ struct Monitor {
      * Members that are protected by the per-monitor lock
      */
     QLIST_HEAD(, mon_fd_t) fds;
-    QString *outbuf;
+    GString *outbuf;
     guint out_watch;
-    /* Read under either BQL or mon_lock, written with BQL+mon_lock.  */
     int mux_out;
+    int reset_seen;
 };
 
 struct MonitorHMP {
@@ -158,7 +165,6 @@ typedef QTAILQ_HEAD(MonitorList, Monitor) MonitorList;
 extern IOThread *mon_iothread;
 extern Coroutine *qmp_dispatcher_co;
 extern bool qmp_dispatcher_co_shutdown;
-extern bool qmp_dispatcher_co_busy;
 extern QmpCommandList qmp_commands, qmp_cap_negotiation_commands;
 extern QemuMutex monitor_lock;
 extern MonitorList mon_list;
@@ -166,7 +172,6 @@ extern int mon_refcount;
 
 extern HMPCommand hmp_cmds[];
 
-int monitor_puts(Monitor *mon, const char *str);
 void monitor_data_init(Monitor *mon, bool is_qmp, bool skip_flush,
                        bool use_io_thread);
 void monitor_data_destroy(Monitor *mon);
@@ -177,13 +182,10 @@ void monitor_fdsets_cleanup(void);
 void qmp_send_response(MonitorQMP *mon, const QDict *rsp);
 void monitor_data_destroy_qmp(MonitorQMP *mon);
 void coroutine_fn monitor_qmp_dispatcher_co(void *data);
+void qmp_dispatcher_co_wake(void);
 
 int get_monitor_def(Monitor *mon, int64_t *pval, const char *name);
-void help_cmd(Monitor *mon, const char *name);
 void handle_hmp_command(MonitorHMP *mon, const char *cmdline);
 int hmp_compare_cmd(const char *name, const char *list);
 
-void qmp_query_qmp_schema(QDict *qdict, QObject **ret_data,
-                                 Error **errp);
-
 #endif
index 27573348f3d7ac5c03b56637235d985d6587e177..01ede1babd3d5ce004f528d870d54b9a0a1279c1 100644 (file)
 #include "qapi/qapi-emit-events.h"
 #include "qapi/qapi-visit-control.h"
 #include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qstring.h"
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "sysemu/qtest.h"
-#include "sysemu/sysemu.h"
 #include "trace.h"
 
 /*
@@ -58,29 +56,11 @@ IOThread *mon_iothread;
 /* Coroutine to dispatch the requests received from I/O thread */
 Coroutine *qmp_dispatcher_co;
 
-/* Set to true when the dispatcher coroutine should terminate */
-bool qmp_dispatcher_co_shutdown;
-
 /*
- * qmp_dispatcher_co_busy is used for synchronisation between the
- * monitor thread and the main thread to ensure that the dispatcher
- * coroutine never gets scheduled a second time when it's already
- * scheduled (scheduling the same coroutine twice is forbidden).
- *
- * It is true if the coroutine is active and processing requests.
- * Additional requests may then be pushed onto mon->qmp_requests,
- * and @qmp_dispatcher_co_shutdown may be set without further ado.
- * @qmp_dispatcher_co_busy must not be woken up in this case.
- *
- * If false, you also have to set @qmp_dispatcher_co_busy to true and
- * wake up @qmp_dispatcher_co after pushing the new requests.
- *
- * The coroutine will automatically change this variable back to false
- * before it yields.  Nobody else may set the variable to false.
- *
- * Access must be atomic for thread safety.
+ * Set to true when the dispatcher coroutine should terminate.  Protected
+ * by monitor_lock.
  */
-bool qmp_dispatcher_co_busy;
+bool qmp_dispatcher_co_shutdown;
 
 /*
  * Protects mon_list, monitor_qapi_event_state, coroutine_mon,
@@ -156,22 +136,19 @@ static inline bool monitor_is_hmp_non_interactive(const Monitor *mon)
     return !monitor_uses_readline(container_of(mon, MonitorHMP, common));
 }
 
-static void monitor_flush_locked(Monitor *mon);
-
-static gboolean monitor_unblocked(GIOChannel *chan, GIOCondition cond,
+static gboolean monitor_unblocked(void *do_not_use, GIOCondition cond,
                                   void *opaque)
 {
     Monitor *mon = opaque;
 
-    qemu_mutex_lock(&mon->mon_lock);
+    QEMU_LOCK_GUARD(&mon->mon_lock);
     mon->out_watch = 0;
     monitor_flush_locked(mon);
-    qemu_mutex_unlock(&mon->mon_lock);
-    return FALSE;
+    return G_SOURCE_REMOVE;
 }
 
 /* Caller must hold mon->mon_lock */
-static void monitor_flush_locked(Monitor *mon)
+void monitor_flush_locked(Monitor *mon)
 {
     int rc;
     size_t len;
@@ -181,22 +158,19 @@ static void monitor_flush_locked(Monitor *mon)
         return;
     }
 
-    buf = qstring_get_str(mon->outbuf);
-    len = qstring_get_length(mon->outbuf);
+    buf = mon->outbuf->str;
+    len = mon->outbuf->len;
 
     if (len && !mon->mux_out) {
         rc = qemu_chr_fe_write(&mon->chr, (const uint8_t *) buf, len);
         if ((rc < 0 && errno != EAGAIN) || (rc == len)) {
             /* all flushed or error */
-            qobject_unref(mon->outbuf);
-            mon->outbuf = qstring_new();
+            g_string_truncate(mon->outbuf, 0);
             return;
         }
         if (rc > 0) {
             /* partial write */
-            QString *tmp = qstring_from_str(buf + rc);
-            qobject_unref(mon->outbuf);
-            mon->outbuf = tmp;
+            g_string_erase(mon->outbuf, 0, rc);
         }
         if (mon->out_watch == 0) {
             mon->out_watch =
@@ -208,33 +182,36 @@ static void monitor_flush_locked(Monitor *mon)
 
 void monitor_flush(Monitor *mon)
 {
-    qemu_mutex_lock(&mon->mon_lock);
+    QEMU_LOCK_GUARD(&mon->mon_lock);
     monitor_flush_locked(mon);
-    qemu_mutex_unlock(&mon->mon_lock);
 }
 
 /* flush at every end of line */
-int monitor_puts(Monitor *mon, const char *str)
+int monitor_puts_locked(Monitor *mon, const char *str)
 {
     int i;
     char c;
 
-    qemu_mutex_lock(&mon->mon_lock);
     for (i = 0; str[i]; i++) {
         c = str[i];
         if (c == '\n') {
-            qstring_append_chr(mon->outbuf, '\r');
+            g_string_append_c(mon->outbuf, '\r');
         }
-        qstring_append_chr(mon->outbuf, c);
+        g_string_append_c(mon->outbuf, c);
         if (c == '\n') {
             monitor_flush_locked(mon);
         }
     }
-    qemu_mutex_unlock(&mon->mon_lock);
 
     return i;
 }
 
+int monitor_puts(Monitor *mon, const char *str)
+{
+    QEMU_LOCK_GUARD(&mon->mon_lock);
+    return monitor_puts_locked(mon, str);
+}
+
 int monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
 {
     char *buf;
@@ -265,6 +242,33 @@ int monitor_printf(Monitor *mon, const char *fmt, ...)
     return ret;
 }
 
+void monitor_printc(Monitor *mon, int c)
+{
+    monitor_printf(mon, "'");
+    switch(c) {
+    case '\'':
+        monitor_printf(mon, "\\'");
+        break;
+    case '\\':
+        monitor_printf(mon, "\\\\");
+        break;
+    case '\n':
+        monitor_printf(mon, "\\n");
+        break;
+    case '\r':
+        monitor_printf(mon, "\\r");
+        break;
+    default:
+        if (c >= 32 && c <= 126) {
+            monitor_printf(mon, "%c", c);
+        } else {
+            monitor_printf(mon, "\\x%02x", c);
+        }
+        break;
+    }
+    monitor_printf(mon, "'");
+}
+
 /*
  * Print to current monitor if we have one, else to stderr.
  */
@@ -291,6 +295,16 @@ int error_vprintf_unless_qmp(const char *fmt, va_list ap)
     return -1;
 }
 
+int error_printf_unless_qmp(const char *fmt, ...)
+{
+    va_list ap;
+    int ret;
+
+    va_start(ap, fmt);
+    ret = error_vprintf_unless_qmp(fmt, ap);
+    va_end(ap);
+    return ret;
+}
 
 static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
     /* Limit guest-triggerable events to 1 per second */
@@ -301,6 +315,7 @@ static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
     [QAPI_EVENT_QUORUM_FAILURE]    = { 1000 * SCALE_MS },
     [QAPI_EVENT_VSERPORT_CHANGE]   = { 1000 * SCALE_MS },
     [QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE] = { 1000 * SCALE_MS },
+    [QAPI_EVENT_HV_BALLOON_STATUS_REPORT] = { 1000 * SCALE_MS },
 };
 
 /*
@@ -353,7 +368,7 @@ monitor_qapi_event_queue_no_reenter(QAPIEvent event, QDict *qdict)
     evconf = &monitor_qapi_event_conf[event];
     trace_monitor_protocol_event_queue(event, qdict, evconf->rate);
 
-    qemu_mutex_lock(&monitor_lock);
+    QEMU_LOCK_GUARD(&monitor_lock);
 
     if (!evconf->rate) {
         /* Unthrottled event */
@@ -395,8 +410,6 @@ monitor_qapi_event_queue_no_reenter(QAPIEvent event, QDict *qdict)
             timer_mod_ns(evstate->timer, now + evconf->rate);
         }
     }
-
-    qemu_mutex_unlock(&monitor_lock);
 }
 
 void qapi_event_emit(QAPIEvent event, QDict *qdict)
@@ -451,7 +464,7 @@ static void monitor_qapi_event_handler(void *opaque)
     MonitorQAPIEventConf *evconf = &monitor_qapi_event_conf[evstate->event];
 
     trace_monitor_protocol_event_handler(evstate->event, evstate->qdict);
-    qemu_mutex_lock(&monitor_lock);
+    QEMU_LOCK_GUARD(&monitor_lock);
 
     if (evstate->qdict) {
         int64_t now = qemu_clock_get_ns(monitor_get_event_clock());
@@ -466,8 +479,6 @@ static void monitor_qapi_event_handler(void *opaque)
         timer_free(evstate->timer);
         g_free(evstate);
     }
-
-    qemu_mutex_unlock(&monitor_lock);
 }
 
 static unsigned int qapi_event_throttle_hash(const void *key)
@@ -483,6 +494,10 @@ static unsigned int qapi_event_throttle_hash(const void *key)
         hash += g_str_hash(qdict_get_str(evstate->data, "node-name"));
     }
 
+    if (evstate->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE) {
+        hash += g_str_hash(qdict_get_str(evstate->data, "qom-path"));
+    }
+
     return hash;
 }
 
@@ -505,6 +520,11 @@ static gboolean qapi_event_throttle_equal(const void *a, const void *b)
                        qdict_get_str(evb->data, "node-name"));
     }
 
+    if (eva->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE) {
+        return !strcmp(qdict_get_str(eva->data, "qom-path"),
+                       qdict_get_str(evb->data, "qom-path"));
+    }
+
     return TRUE;
 }
 
@@ -532,6 +552,17 @@ static void monitor_accept_input(void *opaque)
 {
     Monitor *mon = opaque;
 
+    qemu_mutex_lock(&mon->mon_lock);
+    if (!monitor_is_qmp(mon) && mon->reset_seen) {
+        MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
+        assert(hmp_mon->rs);
+        readline_restart(hmp_mon->rs);
+        qemu_mutex_unlock(&mon->mon_lock);
+        readline_show_prompt(hmp_mon->rs);
+    } else {
+        qemu_mutex_unlock(&mon->mon_lock);
+    }
+
     qemu_chr_fe_accept_input(&mon->chr);
 }
 
@@ -550,12 +581,6 @@ void monitor_resume(Monitor *mon)
             ctx = qemu_get_aio_context();
         }
 
-        if (!monitor_is_qmp(mon)) {
-            MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
-            assert(hmp_mon->rs);
-            readline_show_prompt(hmp_mon->rs);
-        }
-
         aio_bh_schedule_oneshot(ctx, monitor_accept_input, mon);
     }
 
@@ -566,7 +591,7 @@ int monitor_can_read(void *opaque)
 {
     Monitor *mon = opaque;
 
-    return !qatomic_mb_read(&mon->suspend_cnt);
+    return !qatomic_read(&mon->suspend_cnt);
 }
 
 void monitor_list_append(Monitor *mon)
@@ -602,7 +627,7 @@ void monitor_data_init(Monitor *mon, bool is_qmp, bool skip_flush,
     }
     qemu_mutex_init(&mon->mon_lock);
     mon->is_qmp = is_qmp;
-    mon->outbuf = qstring_new();
+    mon->outbuf = g_string_new(NULL);
     mon->skip_flush = skip_flush;
     mon->use_io_thread = use_io_thread;
 }
@@ -616,7 +641,7 @@ void monitor_data_destroy(Monitor *mon)
     } else {
         readline_free(container_of(mon, MonitorHMP, common)->rs);
     }
-    qobject_unref(mon->outbuf);
+    g_string_free(mon->outbuf, true);
     qemu_mutex_destroy(&mon->mon_lock);
 }
 
@@ -629,7 +654,7 @@ void monitor_cleanup(void)
      * We need to poll both qemu_aio_context and iohandler_ctx to make
      * sure that the dispatcher coroutine keeps making progress and
      * eventually terminates.  qemu_aio_context is automatically
-     * polled by calling AIO_WAIT_WHILE on it, but we must poll
+     * polled by calling AIO_WAIT_WHILE_UNLOCKED on it, but we must poll
      * iohandler_ctx manually.
      *
      * Letting the iothread continue while shutting down the dispatcher
@@ -637,14 +662,14 @@ void monitor_cleanup(void)
      * we'll just leave them in the queue without sending a response
      * and monitor_data_destroy() will free them.
      */
-    qmp_dispatcher_co_shutdown = true;
-    if (!qatomic_xchg(&qmp_dispatcher_co_busy, true)) {
-        aio_co_wake(qmp_dispatcher_co);
+    WITH_QEMU_LOCK_GUARD(&monitor_lock) {
+        qmp_dispatcher_co_shutdown = true;
     }
+    qmp_dispatcher_co_wake();
 
-    AIO_WAIT_WHILE(qemu_get_aio_context(),
+    AIO_WAIT_WHILE_UNLOCKED(NULL,
                    (aio_poll(iohandler_get_aio_context(), false),
-                    qatomic_mb_read(&qmp_dispatcher_co_busy)));
+                    qatomic_read(&qmp_dispatcher_co)));
 
     /*
      * We need to explicitly stop the I/O thread (but not destroy it),
@@ -683,7 +708,7 @@ static void monitor_qapi_event_init(void)
                                                 qapi_event_throttle_equal);
 }
 
-void monitor_init_globals_core(void)
+void monitor_init_globals(void)
 {
     monitor_qapi_event_init();
     qemu_mutex_init(&monitor_lock);
@@ -695,14 +720,13 @@ void monitor_init_globals_core(void)
      * rid of those assumptions.
      */
     qmp_dispatcher_co = qemu_coroutine_create(monitor_qmp_dispatcher_co, NULL);
-    qatomic_mb_set(&qmp_dispatcher_co_busy, true);
     aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co);
 }
 
 int monitor_init(MonitorOptions *opts, bool allow_hmp, Error **errp)
 {
+    ERRP_GUARD();
     Chardev *chr;
-    Error *local_err = NULL;
 
     chr = qemu_chr_find(opts->chardev);
     if (chr == NULL) {
@@ -716,7 +740,7 @@ int monitor_init(MonitorOptions *opts, bool allow_hmp, Error **errp)
 
     switch (opts->mode) {
     case MONITOR_MODE_CONTROL:
-        monitor_init_qmp(chr, opts->pretty, &local_err);
+        monitor_init_qmp(chr, opts->pretty, errp);
         break;
     case MONITOR_MODE_READLINE:
         if (!allow_hmp) {
@@ -724,20 +748,16 @@ int monitor_init(MonitorOptions *opts, bool allow_hmp, Error **errp)
             return -1;
         }
         if (opts->pretty) {
-            warn_report("'pretty' is deprecated for HMP monitors, it has no "
-                        "effect and will be removed in future versions");
+            error_setg(errp, "'pretty' is not compatible with HMP monitors");
+            return -1;
         }
-        monitor_init_hmp(chr, true, &local_err);
+        monitor_init_hmp(chr, true, errp);
         break;
     default:
         g_assert_not_reached();
     }
 
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return -1;
-    }
-    return 0;
+    return *errp ? -1 : 0;
 }
 
 int monitor_init_opts(QemuOpts *opts, Error **errp)
index a456762f6a840c9e733a8dc51aa1ea7771b652a4..f21506efa584598ef99b8f25e198934371e82caa 100644 (file)
 
 #include "monitor-internal.h"
 #include "qemu-version.h"
+#include "qapi/compat-policy.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-control.h"
-#include "qapi/qapi-emit-events.h"
+#include "qapi/qapi-commands-introspect.h"
 #include "qapi/qapi-introspect.h"
+#include "qapi/qapi-visit-introspect.h"
+#include "qapi/qobject-input-visitor.h"
 
 /*
  * Accept QMP capabilities in @list for @mon.
@@ -104,17 +107,16 @@ VersionInfo *qmp_query_version(Error **errp)
 
 static void query_commands_cb(const QmpCommand *cmd, void *opaque)
 {
-    CommandInfoList *info, **list = opaque;
+    CommandInfo *info;
+    CommandInfoList **list = opaque;
 
     if (!cmd->enabled) {
         return;
     }
 
     info = g_malloc0(sizeof(*info));
-    info->value = g_malloc0(sizeof(*info->value));
-    info->value->name = g_strdup(cmd->name);
-    info->next = *list;
-    *list = info;
+    info->name = g_strdup(cmd->name);
+    QAPI_LIST_PREPEND(*list, info);
 }
 
 CommandInfoList *qmp_query_commands(Error **errp)
@@ -131,41 +133,89 @@ CommandInfoList *qmp_query_commands(Error **errp)
     return list;
 }
 
-EventInfoList *qmp_query_events(Error **errp)
+static void *split_off_generic_list(void *list,
+                                    bool (*splitp)(void *elt),
+                                    void **part)
 {
-    /*
-     * TODO This deprecated command is the only user of
-     * QAPIEvent_str() and QAPIEvent_lookup[].  When the command goes,
-     * they should go, too.
-     */
-    EventInfoList *info, *ev_list = NULL;
-    QAPIEvent e;
-
-    for (e = 0 ; e < QAPI_EVENT__MAX ; e++) {
-        const char *event_name = QAPIEvent_str(e);
-        assert(event_name != NULL);
-        info = g_malloc0(sizeof(*info));
-        info->value = g_malloc0(sizeof(*info->value));
-        info->value->name = g_strdup(event_name);
-
-        info->next = ev_list;
-        ev_list = info;
+    GenericList *keep = NULL, **keep_tailp = &keep;
+    GenericList *split = NULL, **split_tailp = &split;
+    GenericList *tail;
+
+    for (tail = list; tail; tail = tail->next) {
+        if (splitp(tail)) {
+            *split_tailp = tail;
+            split_tailp = &tail->next;
+        } else {
+            *keep_tailp = tail;
+            keep_tailp = &tail->next;
+        }
     }
 
-    return ev_list;
+    *keep_tailp = *split_tailp = NULL;
+    *part = split;
+    return keep;
 }
 
-/*
- * Minor hack: generated marshalling suppressed for this command
- * ('gen': false in the schema) so we can parse the JSON string
- * directly into QObject instead of first parsing it with
- * visit_type_SchemaInfoList() into a SchemaInfoList, then marshal it
- * to QObject with generated output marshallers, every time.  Instead,
- * we do it in test-qobject-input-visitor.c, just to make sure
- * qapi-gen.py's output actually conforms to the schema.
- */
-void qmp_query_qmp_schema(QDict *qdict, QObject **ret_data,
-                                 Error **errp)
+static bool is_in(const char *s, strList *list)
 {
-    *ret_data = qobject_from_qlit(&qmp_schema_qlit);
+    strList *tail;
+
+    for (tail = list; tail; tail = tail->next) {
+        if (!strcmp(tail->value, s)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool is_entity_deprecated(void *link)
+{
+    return is_in("deprecated", ((SchemaInfoList *)link)->value->features);
+}
+
+static bool is_member_deprecated(void *link)
+{
+    return is_in("deprecated",
+                 ((SchemaInfoObjectMemberList *)link)->value->features);
+}
+
+static SchemaInfoList *zap_deprecated(SchemaInfoList *schema)
+{
+    void *to_zap;
+    SchemaInfoList *tail;
+    SchemaInfo *ent;
+
+    schema = split_off_generic_list(schema, is_entity_deprecated, &to_zap);
+    qapi_free_SchemaInfoList(to_zap);
+
+    for (tail = schema; tail; tail = tail->next) {
+        ent = tail->value;
+        if (ent->meta_type == SCHEMA_META_TYPE_OBJECT) {
+            ent->u.object.members
+                = split_off_generic_list(ent->u.object.members,
+                                         is_member_deprecated, &to_zap);
+            qapi_free_SchemaInfoObjectMemberList(to_zap);
+        }
+    }
+
+    return schema;
+}
+
+SchemaInfoList *qmp_query_qmp_schema(Error **errp)
+{
+    QObject *obj = qobject_from_qlit(&qmp_schema_qlit);
+    Visitor *v = qobject_input_visitor_new(obj);
+    SchemaInfoList *schema = NULL;
+
+    /* test_visitor_in_qmp_introspect() ensures this can't fail */
+    visit_type_SchemaInfoList(v, NULL, &schema, &error_abort);
+    g_assert(schema);
+
+    qobject_unref(obj);
+    visit_free(v);
+
+    if (compat_policy.deprecated_output == COMPAT_POLICY_OUTPUT_HIDE) {
+        return zap_deprecated(schema);
+    }
+    return schema;
 }
index a08143b32335b602f89d77de2a89b2e1651895d5..b0f948d33766bb781b5bb1cfdfacb3d190828b9a 100644 (file)
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/cutils.h"
-#include "qemu/option.h"
-#include "monitor/monitor.h"
+#include "qemu/sockets.h"
+#include "monitor-internal.h"
+#include "monitor/qdev.h"
+#include "monitor/qmp-helpers.h"
 #include "sysemu/sysemu.h"
-#include "qemu/config-file.h"
-#include "qemu/uuid.h"
-#include "chardev/char.h"
-#include "ui/qemu-spice.h"
-#include "ui/vnc.h"
 #include "sysemu/kvm.h"
 #include "sysemu/runstate.h"
-#include "sysemu/arch_init.h"
-#include "sysemu/blockdev.h"
+#include "sysemu/runstate-action.h"
 #include "sysemu/block-backend.h"
 #include "qapi/error.h"
-#include "qapi/qapi-commands-acpi.h"
-#include "qapi/qapi-commands-block.h"
+#include "qapi/qapi-init-commands.h"
 #include "qapi/qapi-commands-control.h"
-#include "qapi/qapi-commands-machine.h"
 #include "qapi/qapi-commands-misc.h"
-#include "qapi/qapi-commands-ui.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/type-helpers.h"
 #include "hw/mem/memory-device.h"
-#include "hw/acpi/acpi_dev_interface.h"
+#include "hw/intc/intc.h"
+#include "hw/rdma/rdma.h"
 
 NameInfo *qmp_query_name(Error **errp)
 {
     NameInfo *info = g_malloc0(sizeof(*info));
 
-    if (qemu_name) {
-        info->has_name = true;
-        info->name = g_strdup(qemu_name);
-    }
-
-    return info;
-}
-
-KvmInfo *qmp_query_kvm(Error **errp)
-{
-    KvmInfo *info = g_malloc0(sizeof(*info));
-
-    info->enabled = kvm_enabled();
-    info->present = kvm_available();
-
-    return info;
-}
-
-UuidInfo *qmp_query_uuid(Error **errp)
-{
-    UuidInfo *info = g_malloc0(sizeof(*info));
-
-    info->UUID = qemu_uuid_unparse_strdup(&qemu_uuid);
+    info->name = g_strdup(qemu_name);
     return info;
 }
 
 void qmp_quit(Error **errp)
 {
-    no_shutdown = 0;
+    shutdown_action = SHUTDOWN_ACTION_POWEROFF;
     qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_QMP_QUIT);
 }
 
@@ -80,7 +51,7 @@ void qmp_stop(Error **errp)
 {
     /* if there is a dump in background, we should wait until the dump
      * finished */
-    if (dump_in_progress()) {
+    if (qemu_system_dump_in_progress()) {
         error_setg(errp, "There is a dump in process, please wait.");
         return;
     }
@@ -92,26 +63,6 @@ void qmp_stop(Error **errp)
     }
 }
 
-void qmp_system_reset(Error **errp)
-{
-    qemu_system_reset_request(SHUTDOWN_CAUSE_HOST_QMP_SYSTEM_RESET);
-}
-
-void qmp_system_powerdown(Error **errp)
-{
-    qemu_system_powerdown_request();
-}
-
-void qmp_x_exit_preconfig(Error **errp)
-{
-    if (!runstate_check(RUN_STATE_PRECONFIG)) {
-        error_setg(errp, "The command is permitted only in '%s' state",
-                   RunState_str(RUN_STATE_PRECONFIG));
-        return;
-    }
-    qemu_exit_preconfig_request();
-}
-
 void qmp_cont(Error **errp)
 {
     BlockBackend *blk;
@@ -120,7 +71,7 @@ void qmp_cont(Error **errp)
 
     /* if there is a dump in background, we should wait until the dump
      * finished */
-    if (dump_in_progress()) {
+    if (qemu_system_dump_in_progress()) {
         error_setg(errp, "There is a dump in process, please wait.");
         return;
     }
@@ -139,8 +90,11 @@ void qmp_cont(Error **errp)
         blk_iostatus_reset(blk);
     }
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
-        block_job_iostatus_reset(job);
+    WITH_JOB_LOCK_GUARD() {
+        for (job = block_job_next_locked(NULL); job;
+             job = block_job_next_locked(job)) {
+            block_job_iostatus_reset_locked(job);
+        }
     }
 
     /* Continuing after completed migration. Images have been inactivated to
@@ -149,7 +103,7 @@ void qmp_cont(Error **errp)
      * If there are no inactive block nodes (e.g. because the VM was just
      * paused rather than completing a migration), bdrv_inactivate_all() simply
      * doesn't do anything. */
-    bdrv_invalidate_cache_all(&local_err);
+    bdrv_activate_all(&local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
@@ -162,242 +116,96 @@ void qmp_cont(Error **errp)
     }
 }
 
-void qmp_system_wakeup(Error **errp)
-{
-    if (!qemu_wakeup_suspend_enabled()) {
-        error_setg(errp,
-                   "wake-up from suspend is not supported by this guest");
-        return;
-    }
-
-    qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, errp);
-}
-
-void qmp_set_password(const char *protocol, const char *password,
-                      bool has_connected, const char *connected, Error **errp)
-{
-    int disconnect_if_connected = 0;
-    int fail_if_connected = 0;
-    int rc;
-
-    if (has_connected) {
-        if (strcmp(connected, "fail") == 0) {
-            fail_if_connected = 1;
-        } else if (strcmp(connected, "disconnect") == 0) {
-            disconnect_if_connected = 1;
-        } else if (strcmp(connected, "keep") == 0) {
-            /* nothing */
-        } else {
-            error_setg(errp, QERR_INVALID_PARAMETER, "connected");
-            return;
-        }
-    }
-
-    if (strcmp(protocol, "spice") == 0) {
-        if (!qemu_using_spice(errp)) {
-            return;
-        }
-        rc = qemu_spice.set_passwd(password, fail_if_connected,
-                                   disconnect_if_connected);
-        if (rc != 0) {
-            error_setg(errp, QERR_SET_PASSWD_FAILED);
-        }
-        return;
-    }
-
-    if (strcmp(protocol, "vnc") == 0) {
-        if (fail_if_connected || disconnect_if_connected) {
-            /* vnc supports "connected=keep" only */
-            error_setg(errp, QERR_INVALID_PARAMETER, "connected");
-            return;
-        }
-        /* Note that setting an empty password will not disable login through
-         * this interface. */
-        rc = vnc_display_password(NULL, password);
-        if (rc < 0) {
-            error_setg(errp, QERR_SET_PASSWD_FAILED);
-        }
-        return;
-    }
-
-    error_setg(errp, QERR_INVALID_PARAMETER, "protocol");
-}
-
-void qmp_expire_password(const char *protocol, const char *whenstr,
-                         Error **errp)
+void qmp_add_client(const char *protocol, const char *fdname,
+                    bool has_skipauth, bool skipauth, bool has_tls, bool tls,
+                    Error **errp)
 {
-    time_t when;
-    int rc;
-
-    if (strcmp(whenstr, "now") == 0) {
-        when = 0;
-    } else if (strcmp(whenstr, "never") == 0) {
-        when = TIME_MAX;
-    } else if (whenstr[0] == '+') {
-        when = time(NULL) + strtoull(whenstr+1, NULL, 10);
-    } else {
-        when = strtoull(whenstr, NULL, 10);
-    }
-
-    if (strcmp(protocol, "spice") == 0) {
-        if (!qemu_using_spice(errp)) {
-            return;
-        }
-        rc = qemu_spice.set_pw_expire(when);
-        if (rc != 0) {
-            error_setg(errp, QERR_SET_PASSWD_FAILED);
-        }
-        return;
-    }
-
-    if (strcmp(protocol, "vnc") == 0) {
-        rc = vnc_display_pw_expire(NULL, when);
-        if (rc != 0) {
-            error_setg(errp, QERR_SET_PASSWD_FAILED);
-        }
-        return;
-    }
-
-    error_setg(errp, QERR_INVALID_PARAMETER, "protocol");
-}
-
+    static const struct {
+        const char *name;
+        bool (*add_client)(int fd, bool has_skipauth, bool skipauth,
+                           bool has_tls, bool tls, Error **errp);
+    } protocol_table[] = {
+        { "spice", qmp_add_client_spice },
 #ifdef CONFIG_VNC
-void qmp_change_vnc_password(const char *password, Error **errp)
-{
-    if (vnc_display_password(NULL, password) < 0) {
-        error_setg(errp, QERR_SET_PASSWD_FAILED);
-    }
-}
-
-static void qmp_change_vnc_listen(const char *target, Error **errp)
-{
-    QemuOptsList *olist = qemu_find_opts("vnc");
-    QemuOpts *opts;
+        { "vnc", qmp_add_client_vnc },
+#endif
+#ifdef CONFIG_DBUS_DISPLAY
+        { "@dbus-display", qmp_add_client_dbus_display },
+#endif
+    };
+    int fd, i;
 
-    if (strstr(target, "id=")) {
-        error_setg(errp, "id not supported");
+    fd = monitor_get_fd(monitor_cur(), fdname, errp);
+    if (fd < 0) {
         return;
     }
 
-    opts = qemu_opts_find(olist, "default");
-    if (opts) {
-        qemu_opts_del(opts);
-    }
-    opts = vnc_parse(target, errp);
-    if (!opts) {
+    if (!fd_is_socket(fd)) {
+        error_setg(errp, "parameter @fdname must name a socket");
+        close(fd);
         return;
     }
 
-    vnc_display_open("default", errp);
-}
-
-static void qmp_change_vnc(const char *target, bool has_arg, const char *arg,
-                           Error **errp)
-{
-    if (strcmp(target, "passwd") == 0 || strcmp(target, "password") == 0) {
-        if (!has_arg) {
-            error_setg(errp, QERR_MISSING_PARAMETER, "password");
-        } else {
-            qmp_change_vnc_password(arg, errp);
+    for (i = 0; i < ARRAY_SIZE(protocol_table); i++) {
+        if (!strcmp(protocol, protocol_table[i].name)) {
+            if (!protocol_table[i].add_client(fd, has_skipauth, skipauth,
+                                              has_tls, tls, errp)) {
+                close(fd);
+            }
+            return;
         }
-    } else {
-        qmp_change_vnc_listen(target, errp);
     }
-}
-#endif /* !CONFIG_VNC */
 
-void qmp_change(const char *device, const char *target,
-                bool has_arg, const char *arg, Error **errp)
-{
-    if (strcmp(device, "vnc") == 0) {
-#ifdef CONFIG_VNC
-        qmp_change_vnc(target, has_arg, arg, errp);
-#else
-        error_setg(errp, QERR_FEATURE_DISABLED, "vnc");
-#endif
-    } else {
-        qmp_blockdev_change_medium(true, device, false, NULL, target,
-                                   has_arg, arg, false, 0, errp);
+    if (!qmp_add_client_char(fd, has_skipauth, skipauth, has_tls, tls,
+                             protocol, errp)) {
+        close(fd);
     }
 }
 
-void qmp_add_client(const char *protocol, const char *fdname,
-                    bool has_skipauth, bool skipauth, bool has_tls, bool tls,
-                    Error **errp)
+char *qmp_human_monitor_command(const char *command_line, bool has_cpu_index,
+                                int64_t cpu_index, Error **errp)
 {
-    Chardev *s;
-    int fd;
+    char *output = NULL;
+    MonitorHMP hmp = {};
 
-    fd = monitor_get_fd(monitor_cur(), fdname, errp);
-    if (fd < 0) {
-        return;
-    }
+    monitor_data_init(&hmp.common, false, true, false);
 
-    if (strcmp(protocol, "spice") == 0) {
-        if (!qemu_using_spice(errp)) {
-            close(fd);
-            return;
-        }
-        skipauth = has_skipauth ? skipauth : false;
-        tls = has_tls ? tls : false;
-        if (qemu_spice.display_add_client(fd, skipauth, tls) < 0) {
-            error_setg(errp, "spice failed to add client");
-            close(fd);
-        }
-        return;
-#ifdef CONFIG_VNC
-    } else if (strcmp(protocol, "vnc") == 0) {
-        skipauth = has_skipauth ? skipauth : false;
-        vnc_display_add_client(NULL, fd, skipauth);
-        return;
-#endif
-    } else if ((s = qemu_chr_find(protocol)) != NULL) {
-        if (qemu_chr_add_client(s, fd) < 0) {
-            error_setg(errp, "failed to add client");
-            close(fd);
-            return;
+    if (has_cpu_index) {
+        int ret = monitor_set_cpu(&hmp.common, cpu_index);
+        if (ret < 0) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
+                       "a CPU number");
+            goto out;
         }
-        return;
     }
 
-    error_setg(errp, "protocol '%s' is invalid", protocol);
-    close(fd);
-}
-
+    handle_hmp_command(&hmp, command_line);
 
-MemoryDeviceInfoList *qmp_query_memory_devices(Error **errp)
-{
-    return qmp_memory_device_list();
-}
-
-ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp)
-{
-    bool ambig;
-    ACPIOSTInfoList *head = NULL;
-    ACPIOSTInfoList **prev = &head;
-    Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, &ambig);
-
-    if (obj) {
-        AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj);
-        AcpiDeviceIf *adev = ACPI_DEVICE_IF(obj);
-
-        adevc->ospm_status(adev, &prev);
-    } else {
-        error_setg(errp, "command is not supported, missing ACPI device");
+    WITH_QEMU_LOCK_GUARD(&hmp.common.mon_lock) {
+        output = g_strdup(hmp.common.outbuf->str);
     }
 
-    return head;
+out:
+    monitor_data_destroy(&hmp.common);
+    return output;
 }
 
-MemoryInfo *qmp_query_memory_size_summary(Error **errp)
+static void __attribute__((__constructor__)) monitor_init_qmp_commands(void)
 {
-    MemoryInfo *mem_info = g_malloc0(sizeof(MemoryInfo));
+    /*
+     * Two command lists:
+     * - qmp_commands contains all QMP commands
+     * - qmp_cap_negotiation_commands contains just
+     *   "qmp_capabilities", to enforce capability negotiation
+     */
 
-    mem_info->base_memory = ram_size;
+    qmp_init_marshal(&qmp_commands);
 
-    mem_info->plugged_memory = get_plugged_memory_size();
-    mem_info->has_plugged_memory =
-        mem_info->plugged_memory != (uint64_t)-1;
+    qmp_register_command(&qmp_commands, "device_add",
+                         qmp_device_add, 0, 0);
 
-    return mem_info;
+    QTAILQ_INIT(&qmp_cap_negotiation_commands);
+    qmp_register_command(&qmp_cap_negotiation_commands, "qmp_capabilities",
+                         qmp_marshal_qmp_capabilities,
+                         QCO_ALLOW_PRECONFIG, 0);
 }
index b42f8c6af3759d909d1e9570bc7a784453cb337d..a239945e8dd7601673adfbd863634d5c974530b0 100644 (file)
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qlist.h"
-#include "qapi/qmp/qstring.h"
 #include "trace.h"
 
+/*
+ * qmp_dispatcher_co_busy is used for synchronisation between the
+ * monitor thread and the main thread to ensure that the dispatcher
+ * coroutine never gets scheduled a second time when it's already
+ * scheduled (scheduling the same coroutine twice is forbidden).
+ *
+ * It is true if the coroutine will process at least one more request
+ * before going to sleep.  Either it has been kicked already, or it
+ * is active and processing requests.  Additional requests may therefore
+ * be pushed onto mon->qmp_requests, and @qmp_dispatcher_co_shutdown may
+ * be set without further ado.  @qmp_dispatcher_co must not be woken up
+ * in this case.
+ *
+ * If false, you have to wake up @qmp_dispatcher_co after pushing new
+ * requests. You also have to set @qmp_dispatcher_co_busy to true
+ * before waking up the coroutine.
+ *
+ * The coroutine will automatically change this variable back to false
+ * before it yields.  Nobody else may set the variable to false.
+ *
+ * Access must be atomic for thread safety.
+ */
+static bool qmp_dispatcher_co_busy = true;
+
 struct QMPRequest {
     /* Owner of the request */
     MonitorQMP *mon;
@@ -77,10 +100,10 @@ static void monitor_qmp_cleanup_req_queue_locked(MonitorQMP *mon)
 
 static void monitor_qmp_cleanup_queue_and_resume(MonitorQMP *mon)
 {
-    qemu_mutex_lock(&mon->qmp_queue_lock);
+    QEMU_LOCK_GUARD(&mon->qmp_queue_lock);
 
     /*
-     * Same condition as in monitor_qmp_bh_dispatcher(), but before
+     * Same condition as in monitor_qmp_dispatcher_co(), but before
      * removing an element from the queue (hence no `- 1`).
      * Also, the queue should not be empty either, otherwise the
      * monitor hasn't been suspended yet (or was already resumed).
@@ -104,21 +127,21 @@ static void monitor_qmp_cleanup_queue_and_resume(MonitorQMP *mon)
         monitor_resume(&mon->common);
     }
 
-    qemu_mutex_unlock(&mon->qmp_queue_lock);
 }
 
 void qmp_send_response(MonitorQMP *mon, const QDict *rsp)
 {
     const QObject *data = QOBJECT(rsp);
-    QString *json;
+    GString *json;
 
-    json = mon->pretty ? qobject_to_json_pretty(data) : qobject_to_json(data);
+    json = qobject_to_json_pretty(data, mon->pretty);
     assert(json != NULL);
+    trace_monitor_qmp_respond(mon, json->str);
 
-    qstring_append_chr(json, '\n');
-    monitor_puts(&mon->common, qstring_get_str(json));
+    g_string_append_c(json, '\n');
+    monitor_puts(&mon->common, json->str);
 
-    qobject_unref(json);
+    g_string_free(json, true);
 }
 
 /*
@@ -179,8 +202,6 @@ static QMPRequest *monitor_qmp_requests_pop_any_with_lock(void)
     Monitor *mon;
     MonitorQMP *qmp_mon;
 
-    qemu_mutex_lock(&monitor_lock);
-
     QTAILQ_FOREACH(mon, &mon_list, entry) {
         if (!monitor_is_qmp(mon)) {
             continue;
@@ -205,53 +226,91 @@ static QMPRequest *monitor_qmp_requests_pop_any_with_lock(void)
         QTAILQ_INSERT_TAIL(&mon_list, mon, entry);
     }
 
-    qemu_mutex_unlock(&monitor_lock);
-
     return req_obj;
 }
 
-void coroutine_fn monitor_qmp_dispatcher_co(void *data)
+static QMPRequest *monitor_qmp_dispatcher_pop_any(void)
 {
-    QMPRequest *req_obj = NULL;
-    QDict *rsp;
-    bool need_resume;
-    MonitorQMP *mon;
-
     while (true) {
-        assert(qatomic_mb_read(&qmp_dispatcher_co_busy) == true);
+        /*
+         * To avoid double scheduling, busy is true on entry to
+         * monitor_qmp_dispatcher_co(), and must be set again before
+         * aio_co_wake()-ing it.
+         */
+        assert(qatomic_read(&qmp_dispatcher_co_busy) == true);
 
         /*
          * Mark the dispatcher as not busy already here so that we
          * don't miss any new requests coming in the middle of our
          * processing.
+         *
+         * Clear qmp_dispatcher_co_busy before reading request.
          */
-        qatomic_mb_set(&qmp_dispatcher_co_busy, false);
+        qatomic_set_mb(&qmp_dispatcher_co_busy, false);
 
-        while (!(req_obj = monitor_qmp_requests_pop_any_with_lock())) {
-            /*
-             * No more requests to process.  Wait to be reentered from
-             * handle_qmp_command() when it pushes more requests, or
-             * from monitor_cleanup() when it requests shutdown.
-             */
-            if (!qmp_dispatcher_co_shutdown) {
-                qemu_coroutine_yield();
-
-                /*
-                 * busy must be set to true again by whoever
-                 * rescheduled us to avoid double scheduling
-                 */
-                assert(qatomic_xchg(&qmp_dispatcher_co_busy, false) == true);
-            }
+        WITH_QEMU_LOCK_GUARD(&monitor_lock) {
+            QMPRequest *req_obj;
 
-            /*
-             * qmp_dispatcher_co_shutdown may have changed if we
-             * yielded and were reentered from monitor_cleanup()
-             */
+            /* On shutdown, don't take any more requests from the queue */
             if (qmp_dispatcher_co_shutdown) {
-                return;
+                return NULL;
             }
+
+            req_obj = monitor_qmp_requests_pop_any_with_lock();
+            if (req_obj) {
+                return req_obj;
+            }
+        }
+
+        /*
+         * No more requests to process.  Wait to be reentered from
+         * handle_qmp_command() when it pushes more requests, or
+         * from monitor_cleanup() when it requests shutdown.
+         */
+        qemu_coroutine_yield();
+    }
+}
+
+void coroutine_fn monitor_qmp_dispatcher_co(void *data)
+{
+    QMPRequest *req_obj;
+    QDict *rsp;
+    bool oob_enabled;
+    MonitorQMP *mon;
+
+    while ((req_obj = monitor_qmp_dispatcher_pop_any()) != NULL) {
+        trace_monitor_qmp_in_band_dequeue(req_obj,
+                                          req_obj->mon->qmp_requests->length);
+
+        /*
+         * @req_obj has a request, we hold req_obj->mon->qmp_queue_lock
+         */
+
+        mon = req_obj->mon;
+
+        /*
+         * We need to resume the monitor if handle_qmp_command()
+         * suspended it.  Two cases:
+         * 1. OOB enabled: mon->qmp_requests has no more space
+         *    Resume right away, so that OOB commands can get executed while
+         *    this request is being processed.
+         * 2. OOB disabled: always
+         *    Resume only after we're done processing the request,
+         * We need to save qmp_oob_enabled() for later, because
+         * qmp_qmp_capabilities() can change it.
+         */
+        oob_enabled = qmp_oob_enabled(mon);
+        if (oob_enabled
+            && mon->qmp_requests->length == QMP_REQ_QUEUE_LEN_MAX - 1) {
+            monitor_resume(&mon->common);
         }
 
+        /*
+         * Drop the queue mutex now, before yielding, otherwise we might
+         * deadlock if the main thread tries to lock it.
+         */
+        qemu_mutex_unlock(&mon->qmp_queue_lock);
+
         if (qatomic_xchg(&qmp_dispatcher_co_busy, true) == true) {
             /*
              * Someone rescheduled us (probably because a new requests
@@ -262,72 +321,70 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data)
             qemu_coroutine_yield();
         }
 
-        /*
-         * Move the coroutine from iohandler_ctx to qemu_aio_context for
-         * executing the command handler so that it can make progress if it
-         * involves an AIO_WAIT_WHILE().
-         */
-        aio_co_schedule(qemu_get_aio_context(), qmp_dispatcher_co);
-        qemu_coroutine_yield();
-
-        mon = req_obj->mon;
-        /* qmp_oob_enabled() might change after "qmp_capabilities" */
-        need_resume = !qmp_oob_enabled(mon) ||
-            mon->qmp_requests->length == QMP_REQ_QUEUE_LEN_MAX - 1;
-        qemu_mutex_unlock(&mon->qmp_queue_lock);
+        /* Process request */
         if (req_obj->req) {
-            QDict *qdict = qobject_to(QDict, req_obj->req);
-            QObject *id = qdict ? qdict_get(qdict, "id") : NULL;
-            trace_monitor_qmp_cmd_in_band(qobject_get_try_str(id) ?: "");
+            if (trace_event_get_state(TRACE_MONITOR_QMP_CMD_IN_BAND)) {
+                QDict *qdict = qobject_to(QDict, req_obj->req);
+                QObject *id = qdict ? qdict_get(qdict, "id") : NULL;
+                GString *id_json;
+
+                id_json = id ? qobject_to_json(id) : g_string_new(NULL);
+                trace_monitor_qmp_cmd_in_band(id_json->str);
+                g_string_free(id_json, true);
+            }
             monitor_qmp_dispatch(mon, req_obj->req);
         } else {
             assert(req_obj->err);
+            trace_monitor_qmp_err_in_band(error_get_pretty(req_obj->err));
             rsp = qmp_error_response(req_obj->err);
             req_obj->err = NULL;
             monitor_qmp_respond(mon, rsp);
             qobject_unref(rsp);
         }
 
-        if (need_resume) {
-            /* Pairs with the monitor_suspend() in handle_qmp_command() */
+        if (!oob_enabled) {
             monitor_resume(&mon->common);
         }
+
         qmp_request_free(req_obj);
+    }
+    qatomic_set(&qmp_dispatcher_co, NULL);
+}
 
-        /*
-         * Yield and reschedule so the main loop stays responsive.
-         *
-         * Move back to iohandler_ctx so that nested event loops for
-         * qemu_aio_context don't start new monitor commands.
-         */
-        aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co);
-        qemu_coroutine_yield();
+void qmp_dispatcher_co_wake(void)
+{
+    /* Write request before reading qmp_dispatcher_co_busy.  */
+    smp_mb__before_rmw();
+
+    if (!qatomic_xchg(&qmp_dispatcher_co_busy, true)) {
+        aio_co_wake(qmp_dispatcher_co);
     }
 }
 
 static void handle_qmp_command(void *opaque, QObject *req, Error *err)
 {
     MonitorQMP *mon = opaque;
-    QObject *id = NULL;
-    QDict *qdict;
+    QDict *qdict = qobject_to(QDict, req);
     QMPRequest *req_obj;
 
     assert(!req != !err);
 
-    qdict = qobject_to(QDict, req);
-    if (qdict) {
-        id = qdict_get(qdict, "id");
-    } /* else will fail qmp_dispatch() */
-
     if (req && trace_event_get_state_backends(TRACE_HANDLE_QMP_COMMAND)) {
-        QString *req_json = qobject_to_json(req);
-        trace_handle_qmp_command(mon, qstring_get_str(req_json));
-        qobject_unref(req_json);
+        GString *req_json = qobject_to_json(req);
+        trace_handle_qmp_command(mon, req_json->str);
+        g_string_free(req_json, true);
     }
 
     if (qdict && qmp_is_oob(qdict)) {
         /* OOB commands are executed immediately */
-        trace_monitor_qmp_cmd_out_of_band(qobject_get_try_str(id) ?: "");
+        if (trace_event_get_state(TRACE_MONITOR_QMP_CMD_OUT_OF_BAND)) {
+            QObject *id = qdict_get(qdict, "id");
+            GString *id_json;
+
+            id_json = id ? qobject_to_json(id) : g_string_new(NULL);
+            trace_monitor_qmp_cmd_out_of_band(id_json->str);
+            g_string_free(id_json, true);
+        }
         monitor_qmp_dispatch(mon, req);
         qobject_unref(req);
         return;
@@ -339,33 +396,33 @@ static void handle_qmp_command(void *opaque, QObject *req, Error *err)
     req_obj->err = err;
 
     /* Protect qmp_requests and fetching its length. */
-    qemu_mutex_lock(&mon->qmp_queue_lock);
+    WITH_QEMU_LOCK_GUARD(&mon->qmp_queue_lock) {
 
-    /*
-     * Suspend the monitor when we can't queue more requests after
-     * this one.  Dequeuing in monitor_qmp_bh_dispatcher() or
-     * monitor_qmp_cleanup_queue_and_resume() will resume it.
-     * Note that when OOB is disabled, we queue at most one command,
-     * for backward compatibility.
-     */
-    if (!qmp_oob_enabled(mon) ||
-        mon->qmp_requests->length == QMP_REQ_QUEUE_LEN_MAX - 1) {
-        monitor_suspend(&mon->common);
-    }
+        /*
+         * Suspend the monitor when we can't queue more requests after
+         * this one.  Dequeuing in monitor_qmp_dispatcher_co() or
+         * monitor_qmp_cleanup_queue_and_resume() will resume it.
+         * Note that when OOB is disabled, we queue at most one command,
+         * for backward compatibility.
+         */
+        if (!qmp_oob_enabled(mon) ||
+            mon->qmp_requests->length == QMP_REQ_QUEUE_LEN_MAX - 1) {
+            monitor_suspend(&mon->common);
+        }
 
-    /*
-     * Put the request to the end of queue so that requests will be
-     * handled in time order.  Ownership for req_obj, req,
-     * etc. will be delivered to the handler side.
-     */
-    assert(mon->qmp_requests->length < QMP_REQ_QUEUE_LEN_MAX);
-    g_queue_push_tail(mon->qmp_requests, req_obj);
-    qemu_mutex_unlock(&mon->qmp_queue_lock);
+        /*
+         * Put the request to the end of queue so that requests will be
+         * handled in time order.  Ownership for req_obj, req,
+         * etc. will be delivered to the handler side.
+         */
+        trace_monitor_qmp_in_band_enqueue(req_obj, mon,
+                                          mon->qmp_requests->length);
+        assert(mon->qmp_requests->length < QMP_REQ_QUEUE_LEN_MAX);
+        g_queue_push_tail(mon->qmp_requests, req_obj);
+    }
 
     /* Kick the dispatcher routine */
-    if (!qatomic_xchg(&qmp_dispatcher_co_busy, true)) {
-        aio_co_wake(qmp_dispatcher_co);
-    }
+    qmp_dispatcher_co_wake();
 }
 
 static void monitor_qmp_read(void *opaque, const uint8_t *buf, int size)
index 0365ac4d9942acc1d18195a2a35fa94537c95507..032d1220e146e6e1c945150e80aec1fc2435dc76 100644 (file)
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # hmp.c
 handle_hmp_command(void *mon, const char *cmdline) "mon %p cmdline: %s"
@@ -10,6 +10,10 @@ monitor_protocol_event_queue(uint32_t event, void *qdict, uint64_t rate) "event=
 monitor_suspend(void *ptr, int cnt) "mon %p: %d"
 
 # qmp.c
+monitor_qmp_in_band_enqueue(void *req, void *mon, unsigned len) "%p mon %p len %u"
+monitor_qmp_in_band_dequeue(void *req, unsigned len) "%p len %u"
 monitor_qmp_cmd_in_band(const char *id) "%s"
+monitor_qmp_err_in_band(const char *desc) "%s"
 monitor_qmp_cmd_out_of_band(const char *id) "%s"
+monitor_qmp_respond(void *mon, const char *json) "mon %p resp: %s"
 handle_qmp_command(void *mon, const char *req) "mon %p req: %s"
diff --git a/nbd/client-connection.c b/nbd/client-connection.c
new file mode 100644 (file)
index 0000000..f9da67c
--- /dev/null
@@ -0,0 +1,422 @@
+/*
+ * QEMU Block driver for NBD
+ *
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "trace.h"
+
+#include "block/nbd.h"
+
+#include "qapi/qapi-visit-sockets.h"
+#include "qapi/clone-visitor.h"
+#include "qemu/coroutine.h"
+
+struct NBDClientConnection {
+    /* Initialization constants, never change */
+    SocketAddress *saddr; /* address to connect to */
+    QCryptoTLSCreds *tlscreds;
+    char *tlshostname;
+    NBDExportInfo initial_info;
+    bool do_negotiation;
+    bool do_retry;
+
+    QemuMutex mutex;
+
+    NBDExportInfo updated_info;
+    /*
+     * @sioc represents a successful result. While thread is running, @sioc is
+     * used only by thread and not protected by mutex. When thread is not
+     * running, @sioc is stolen by nbd_co_establish_connection() under mutex.
+     */
+    QIOChannelSocket *sioc;
+    QIOChannel *ioc;
+    /*
+     * @err represents previous attempt. It may be copied by
+     * nbd_co_establish_connection() when it reports failure.
+     */
+    Error *err;
+
+    /* All further fields are accessed only under mutex */
+    bool running; /* thread is running now */
+    bool detached; /* thread is detached and should cleanup the state */
+
+    /*
+     * wait_co: if non-NULL, which coroutine to wake in
+     * nbd_co_establish_connection() after yield()
+     */
+    Coroutine *wait_co;
+};
+
+/*
+ * The function isn't protected by any mutex, only call it when the client
+ * connection attempt has not yet started.
+ */
+void nbd_client_connection_enable_retry(NBDClientConnection *conn)
+{
+    conn->do_retry = true;
+}
+
+NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
+                                               bool do_negotiation,
+                                               const char *export_name,
+                                               const char *x_dirty_bitmap,
+                                               QCryptoTLSCreds *tlscreds,
+                                               const char *tlshostname)
+{
+    NBDClientConnection *conn = g_new(NBDClientConnection, 1);
+
+    object_ref(OBJECT(tlscreds));
+    *conn = (NBDClientConnection) {
+        .saddr = QAPI_CLONE(SocketAddress, saddr),
+        .tlscreds = tlscreds,
+        .tlshostname = g_strdup(tlshostname),
+        .do_negotiation = do_negotiation,
+
+        .initial_info.request_sizes = true,
+        .initial_info.mode = NBD_MODE_EXTENDED,
+        .initial_info.base_allocation = true,
+        .initial_info.x_dirty_bitmap = g_strdup(x_dirty_bitmap),
+        .initial_info.name = g_strdup(export_name ?: "")
+    };
+
+    qemu_mutex_init(&conn->mutex);
+
+    return conn;
+}
+
+static void nbd_client_connection_do_free(NBDClientConnection *conn)
+{
+    if (conn->sioc) {
+        qio_channel_close(QIO_CHANNEL(conn->sioc), NULL);
+        object_unref(OBJECT(conn->sioc));
+    }
+    error_free(conn->err);
+    qapi_free_SocketAddress(conn->saddr);
+    g_free(conn->tlshostname);
+    object_unref(OBJECT(conn->tlscreds));
+    g_free(conn->initial_info.x_dirty_bitmap);
+    g_free(conn->initial_info.name);
+    g_free(conn);
+}
+
+/*
+ * Connect to @addr and do NBD negotiation if @info is not null. If @tlscreds
+ * are given @outioc is returned. @outioc is provided only on success.  The call
+ * may be cancelled from other thread by simply qio_channel_shutdown(sioc).
+ */
+static int nbd_connect(QIOChannelSocket *sioc, SocketAddress *addr,
+                       NBDExportInfo *info, QCryptoTLSCreds *tlscreds,
+                       const char *tlshostname,
+                       QIOChannel **outioc, Error **errp)
+{
+    int ret;
+
+    if (outioc) {
+        *outioc = NULL;
+    }
+
+    ret = qio_channel_socket_connect_sync(sioc, addr, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
+
+    if (!info) {
+        return 0;
+    }
+
+    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), tlscreds, tlshostname,
+                                outioc, info, errp);
+    if (ret < 0) {
+        /*
+         * nbd_receive_negotiate() may setup tls ioc and return it even on
+         * failure path. In this case we should use it instead of original
+         * channel.
+         */
+        if (outioc && *outioc) {
+            qio_channel_close(*outioc, NULL);
+            object_unref(OBJECT(*outioc));
+            *outioc = NULL;
+        } else {
+            qio_channel_close(QIO_CHANNEL(sioc), NULL);
+        }
+
+        return ret;
+    }
+
+    return 0;
+}
+
+static void *connect_thread_func(void *opaque)
+{
+    NBDClientConnection *conn = opaque;
+    int ret;
+    bool do_free;
+    uint64_t timeout = 1;
+    uint64_t max_timeout = 16;
+
+    qemu_mutex_lock(&conn->mutex);
+    while (!conn->detached) {
+        Error *local_err = NULL;
+
+        assert(!conn->sioc);
+        conn->sioc = qio_channel_socket_new();
+
+        qemu_mutex_unlock(&conn->mutex);
+
+        conn->updated_info = conn->initial_info;
+
+        ret = nbd_connect(conn->sioc, conn->saddr,
+                          conn->do_negotiation ? &conn->updated_info : NULL,
+                          conn->tlscreds, conn->tlshostname,
+                          &conn->ioc, &local_err);
+
+        /*
+         * conn->updated_info will finally be returned to the user. Clear the
+         * pointers to our internally allocated strings, which are IN parameters
+         * of nbd_receive_negotiate() and therefore nbd_connect(). Caller
+         * shouldn't be interested in these fields.
+         */
+        conn->updated_info.x_dirty_bitmap = NULL;
+        conn->updated_info.name = NULL;
+
+        qemu_mutex_lock(&conn->mutex);
+
+        error_free(conn->err);
+        conn->err = NULL;
+        error_propagate(&conn->err, local_err);
+
+        if (ret < 0) {
+            object_unref(OBJECT(conn->sioc));
+            conn->sioc = NULL;
+            if (conn->do_retry && !conn->detached) {
+                trace_nbd_connect_thread_sleep(timeout);
+                qemu_mutex_unlock(&conn->mutex);
+
+                sleep(timeout);
+                if (timeout < max_timeout) {
+                    timeout *= 2;
+                }
+
+                qemu_mutex_lock(&conn->mutex);
+                continue;
+            }
+        }
+
+        break;
+    }
+
+    /* mutex is locked */
+
+    assert(conn->running);
+    conn->running = false;
+    if (conn->wait_co) {
+        aio_co_wake(conn->wait_co);
+        conn->wait_co = NULL;
+    }
+    do_free = conn->detached;
+
+    qemu_mutex_unlock(&conn->mutex);
+
+    if (do_free) {
+        nbd_client_connection_do_free(conn);
+    }
+
+    return NULL;
+}
+
+void nbd_client_connection_release(NBDClientConnection *conn)
+{
+    bool do_free = false;
+
+    if (!conn) {
+        return;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&conn->mutex) {
+        assert(!conn->detached);
+        if (conn->running) {
+            conn->detached = true;
+        } else {
+            do_free = true;
+        }
+        if (conn->sioc) {
+            qio_channel_shutdown(QIO_CHANNEL(conn->sioc),
+                                 QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+        }
+    }
+
+    if (do_free) {
+        nbd_client_connection_do_free(conn);
+    }
+}
+
+/*
+ * Get a new connection in context of @conn:
+ *   if the thread is running, wait for completion
+ *   if the thread already succeeded in the background, and user didn't get the
+ *     result, just return it now
+ *   otherwise the thread is not running, so start a thread and wait for
+ *     completion
+ *
+ * If @blocking is false, don't wait for the thread, return immediately.
+ *
+ * If @info is not NULL, also do nbd-negotiation after successful connection.
+ * In this case info is used only as out parameter, and is fully initialized by
+ * nbd_co_establish_connection(). "IN" fields of info as well as related only to
+ * nbd_receive_export_list() would be zero (see description of NBDExportInfo in
+ * include/block/nbd.h).
+ */
+QIOChannel *coroutine_fn
+nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info,
+                            bool blocking, Error **errp)
+{
+    QemuThread thread;
+
+    if (conn->do_negotiation) {
+        assert(info);
+    }
+
+    WITH_QEMU_LOCK_GUARD(&conn->mutex) {
+        /*
+         * Don't call nbd_co_establish_connection() in several coroutines in
+         * parallel. Only one call at once is supported.
+         */
+        assert(!conn->wait_co);
+
+        if (!conn->running) {
+            if (conn->sioc) {
+                /* Previous attempt finally succeeded in background */
+                if (conn->do_negotiation) {
+                    memcpy(info, &conn->updated_info, sizeof(*info));
+                    if (conn->ioc) {
+                        /* TLS channel now has own reference to parent */
+                        object_unref(OBJECT(conn->sioc));
+                        conn->sioc = NULL;
+
+                        return g_steal_pointer(&conn->ioc);
+                    }
+                }
+
+                assert(!conn->ioc);
+
+                return QIO_CHANNEL(g_steal_pointer(&conn->sioc));
+            }
+
+            conn->running = true;
+            qemu_thread_create(&thread, "nbd-connect",
+                               connect_thread_func, conn, QEMU_THREAD_DETACHED);
+        }
+
+        if (!blocking) {
+            if (conn->err) {
+                error_propagate(errp, error_copy(conn->err));
+            } else {
+                error_setg(errp, "No connection at the moment");
+            }
+
+            return NULL;
+        }
+
+        conn->wait_co = qemu_coroutine_self();
+    }
+
+    /*
+     * We are going to wait for connect-thread finish, but
+     * nbd_co_establish_connection_cancel() can interrupt.
+     */
+    qemu_coroutine_yield();
+
+    WITH_QEMU_LOCK_GUARD(&conn->mutex) {
+        if (conn->running) {
+            /*
+             * The connection attempt was canceled and the coroutine resumed
+             * before the connection thread finished its job.  Report the
+             * attempt as failed, but leave the connection thread running,
+             * to reuse it for the next connection attempt.
+             */
+            if (conn->err) {
+                error_propagate(errp, error_copy(conn->err));
+            } else {
+                /*
+                 * The only possible case here is cancelling by open_timer
+                 * during nbd_open(). So, the error message is for that case.
+                 * If we have more use cases, we can refactor
+                 * nbd_co_establish_connection_cancel() to take an additional
+                 * parameter cancel_reason, that would be passed than to the
+                 * caller of cancelled nbd_co_establish_connection().
+                 */
+                error_setg(errp, "Connection attempt cancelled by timeout");
+            }
+
+            return NULL;
+        } else {
+            /* Thread finished. There must be either error or sioc */
+            assert(!conn->err != !conn->sioc);
+
+            if (conn->err) {
+                error_propagate(errp, error_copy(conn->err));
+                return NULL;
+            }
+
+            if (conn->do_negotiation) {
+                memcpy(info, &conn->updated_info, sizeof(*info));
+                if (conn->ioc) {
+                    /* TLS channel now has own reference to parent */
+                    object_unref(OBJECT(conn->sioc));
+                    conn->sioc = NULL;
+
+                    return g_steal_pointer(&conn->ioc);
+                }
+            }
+
+            assert(!conn->ioc);
+
+            return QIO_CHANNEL(g_steal_pointer(&conn->sioc));
+        }
+    }
+
+    abort(); /* unreachable */
+}
+
+/*
+ * nbd_co_establish_connection_cancel
+ * Cancel nbd_co_establish_connection() asynchronously.
+ *
+ * Note that this function neither directly stops the thread nor closes the
+ * socket, but rather safely wakes nbd_co_establish_connection() which is
+ * sleeping in yield()
+ */
+void nbd_co_establish_connection_cancel(NBDClientConnection *conn)
+{
+    Coroutine *wait_co;
+
+    WITH_QEMU_LOCK_GUARD(&conn->mutex) {
+        wait_co = g_steal_pointer(&conn->wait_co);
+    }
+
+    if (wait_co) {
+        aio_co_wake(wait_co);
+    }
+}
index 0c2db4bcba9f7a501e29a67bdae2dceb04d3b32e..29ffc609a4b7dd0b01e758ff02839f0e1a651421 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2016-2019 Red Hat, Inc.
+ *  Copyright Red Hat
  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
  *
  *  Network Block Device Client Side
@@ -650,19 +650,20 @@ static int nbd_send_meta_query(QIOChannel *ioc, uint32_t opt,
                                Error **errp)
 {
     int ret;
-    uint32_t export_len = strlen(export);
+    uint32_t export_len;
     uint32_t queries = !!query;
     uint32_t query_len = 0;
     uint32_t data_len;
     char *data;
     char *p;
 
+    assert(strnlen(export, NBD_MAX_STRING_SIZE + 1) <= NBD_MAX_STRING_SIZE);
+    export_len = strlen(export);
     data_len = sizeof(export_len) + export_len + sizeof(queries);
-    assert(export_len <= NBD_MAX_STRING_SIZE);
     if (query) {
+        assert(strnlen(query, NBD_MAX_STRING_SIZE + 1) <= NBD_MAX_STRING_SIZE);
         query_len = strlen(query);
         data_len += sizeof(query_len) + query_len;
-        assert(query_len <= NBD_MAX_STRING_SIZE);
     } else {
         assert(opt == NBD_OPT_LIST_META_CONTEXT);
     }
@@ -874,15 +875,11 @@ static int nbd_list_meta_contexts(QIOChannel *ioc,
  * Start the handshake to the server.  After a positive return, the server
  * is ready to accept additional NBD_OPT requests.
  * Returns: negative errno: failure talking to server
- *          0: server is oldstyle, must call nbd_negotiate_finish_oldstyle
- *          1: server is newstyle, but can only accept EXPORT_NAME
- *          2: server is newstyle, but lacks structured replies
- *          3: server is newstyle and set up for structured replies
+ *          non-negative: enum NBDMode describing server abilities
  */
-static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc,
-                               QCryptoTLSCreds *tlscreds,
+static int nbd_start_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
                                const char *hostname, QIOChannel **outioc,
-                               bool structured_reply, bool *zeroes,
+                               NBDMode max_mode, bool *zeroes,
                                Error **errp)
 {
     ERRP_GUARD();
@@ -948,10 +945,6 @@ static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc,
                     return -EINVAL;
                 }
                 ioc = *outioc;
-                if (aio_context) {
-                    qio_channel_set_blocking(ioc, false, NULL);
-                    qio_channel_attach_aio_context(ioc, aio_context);
-                }
             } else {
                 error_setg(errp, "Server does not support STARTTLS");
                 return -EINVAL;
@@ -960,24 +953,32 @@ static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc,
         if (fixedNewStyle) {
             int result = 0;
 
-            if (structured_reply) {
+            if (max_mode >= NBD_MODE_EXTENDED) {
+                result = nbd_request_simple_option(ioc,
+                                                   NBD_OPT_EXTENDED_HEADERS,
+                                                   false, errp);
+                if (result) {
+                    return result < 0 ? -EINVAL : NBD_MODE_EXTENDED;
+                }
+            }
+            if (max_mode >= NBD_MODE_STRUCTURED) {
                 result = nbd_request_simple_option(ioc,
                                                    NBD_OPT_STRUCTURED_REPLY,
                                                    false, errp);
-                if (result < 0) {
-                    return -EINVAL;
+                if (result) {
+                    return result < 0 ? -EINVAL : NBD_MODE_STRUCTURED;
                 }
             }
-            return 2 + result;
+            return NBD_MODE_SIMPLE;
         } else {
-            return 1;
+            return NBD_MODE_EXPORT_NAME;
         }
     } else if (magic == NBD_CLIENT_MAGIC) {
         if (tlscreds) {
             error_setg(errp, "Server does not support STARTTLS");
             return -EINVAL;
         }
-        return 0;
+        return NBD_MODE_OLDSTYLE;
     } else {
         error_setg(errp, "Bad server magic received: 0x%" PRIx64, magic);
         return -EINVAL;
@@ -1016,8 +1017,7 @@ static int nbd_negotiate_finish_oldstyle(QIOChannel *ioc, NBDExportInfo *info,
  * Returns: negative errno: failure talking to server
  *          0: server is connected
  */
-int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
-                          QCryptoTLSCreds *tlscreds,
+int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
                           const char *hostname, QIOChannel **outioc,
                           NBDExportInfo *info, Error **errp)
 {
@@ -1029,18 +1029,21 @@ int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
     assert(info->name && strlen(info->name) <= NBD_MAX_STRING_SIZE);
     trace_nbd_receive_negotiate_name(info->name);
 
-    result = nbd_start_negotiate(aio_context, ioc, tlscreds, hostname, outioc,
-                                 info->structured_reply, &zeroes, errp);
+    result = nbd_start_negotiate(ioc, tlscreds, hostname, outioc,
+                                 info->mode, &zeroes, errp);
+    if (result < 0) {
+        return result;
+    }
 
-    info->structured_reply = false;
+    info->mode = result;
     info->base_allocation = false;
     if (tlscreds && *outioc) {
         ioc = *outioc;
     }
 
-    switch (result) {
-    case 3: /* newstyle, with structured replies */
-        info->structured_reply = true;
+    switch (info->mode) {
+    case NBD_MODE_EXTENDED:
+    case NBD_MODE_STRUCTURED:
         if (base_allocation) {
             result = nbd_negotiate_simple_meta_context(ioc, info, errp);
             if (result < 0) {
@@ -1049,7 +1052,7 @@ int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
             info->base_allocation = result == 1;
         }
         /* fall through */
-    case 2: /* newstyle, try OPT_GO */
+    case NBD_MODE_SIMPLE:
         /* Try NBD_OPT_GO first - if it works, we are done (it
          * also gives us a good message if the server requires
          * TLS).  If it is not available, fall back to
@@ -1072,7 +1075,7 @@ int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
             return -EINVAL;
         }
         /* fall through */
-    case 1: /* newstyle, but limited to EXPORT_NAME */
+    case NBD_MODE_EXPORT_NAME:
         /* write the export name request */
         if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, info->name,
                                     errp) < 0) {
@@ -1088,7 +1091,7 @@ int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
             return -EINVAL;
         }
         break;
-    case 0: /* oldstyle, parse length and flags */
+    case NBD_MODE_OLDSTYLE:
         if (*info->name) {
             error_setg(errp, "Server does not support non-empty export names");
             return -EINVAL;
@@ -1098,7 +1101,7 @@ int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
         }
         break;
     default:
-        return result;
+        g_assert_not_reached();
     }
 
     trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
@@ -1149,15 +1152,19 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
     QIOChannel *sioc = NULL;
 
     *info = NULL;
-    result = nbd_start_negotiate(NULL, ioc, tlscreds, hostname, &sioc, true,
-                                 NULL, errp);
+    result = nbd_start_negotiate(ioc, tlscreds, hostname, &sioc,
+                                 NBD_MODE_EXTENDED, NULL, errp);
     if (tlscreds && sioc) {
         ioc = sioc;
     }
+    if (result < 0) {
+        goto out;
+    }
 
-    switch (result) {
-    case 2:
-    case 3:
+    switch ((NBDMode)result) {
+    case NBD_MODE_SIMPLE:
+    case NBD_MODE_STRUCTURED:
+    case NBD_MODE_EXTENDED:
         /* newstyle - use NBD_OPT_LIST to populate array, then try
          * NBD_OPT_INFO on each array member. If structured replies
          * are enabled, also try NBD_OPT_LIST_META_CONTEXT. */
@@ -1178,7 +1185,7 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
             memset(&array[count - 1], 0, sizeof(*array));
             array[count - 1].name = name;
             array[count - 1].description = desc;
-            array[count - 1].structured_reply = result == 3;
+            array[count - 1].mode = result;
         }
 
         for (i = 0; i < count; i++) {
@@ -1194,7 +1201,7 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
                 break;
             }
 
-            if (result == 3 &&
+            if (result >= NBD_MODE_STRUCTURED &&
                 nbd_list_meta_contexts(ioc, &array[i], errp) < 0) {
                 goto out;
             }
@@ -1203,13 +1210,15 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
         /* Send NBD_OPT_ABORT as a courtesy before hanging up */
         nbd_send_opt_abort(ioc);
         break;
-    case 1: /* newstyle, but limited to EXPORT_NAME */
+    case NBD_MODE_EXPORT_NAME:
         error_setg(errp, "Server does not support export lists");
         /* We can't even send NBD_OPT_ABORT, so merely hang up */
         goto out;
-    case 0: /* oldstyle, parse length and flags */
+    case NBD_MODE_OLDSTYLE:
+        /* Lone export name is implied, but we can parse length and flags */
         array = g_new0(NBDExportInfo, 1);
         array->name = g_strdup("");
+        array->mode = NBD_MODE_OLDSTYLE;
         count = 1;
 
         if (nbd_negotiate_finish_oldstyle(ioc, array, errp) < 0) {
@@ -1219,13 +1228,13 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
         /* Send NBD_CMD_DISC as a courtesy to the server, but ignore all
          * errors now that we have the information we wanted. */
         if (nbd_drop(ioc, 124, NULL) == 0) {
-            NBDRequest request = { .type = NBD_CMD_DISC };
+            NBDRequest request = { .type = NBD_CMD_DISC, .mode = result };
 
             nbd_send_request(ioc, &request);
         }
         break;
     default:
-        goto out;
+        g_assert_not_reached();
     }
 
     *info = array;
@@ -1347,20 +1356,29 @@ int nbd_disconnect(int fd)
 
 int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
 {
-    uint8_t buf[NBD_REQUEST_SIZE];
+    uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
+    size_t len;
 
-    trace_nbd_send_request(request->from, request->len, request->handle,
+    trace_nbd_send_request(request->from, request->len, request->cookie,
                            request->flags, request->type,
                            nbd_cmd_lookup(request->type));
 
-    stl_be_p(buf, NBD_REQUEST_MAGIC);
     stw_be_p(buf + 4, request->flags);
     stw_be_p(buf + 6, request->type);
-    stq_be_p(buf + 8, request->handle);
+    stq_be_p(buf + 8, request->cookie);
     stq_be_p(buf + 16, request->from);
-    stl_be_p(buf + 24, request->len);
+    if (request->mode >= NBD_MODE_EXTENDED) {
+        stl_be_p(buf, NBD_EXTENDED_REQUEST_MAGIC);
+        stq_be_p(buf + 24, request->len);
+        len = NBD_EXTENDED_REQUEST_SIZE;
+    } else {
+        assert(request->len <= UINT32_MAX);
+        stl_be_p(buf, NBD_REQUEST_MAGIC);
+        stl_be_p(buf + 24, request->len);
+        len = NBD_REQUEST_SIZE;
+    }
 
-    return nbd_write(ioc, buf, sizeof(buf), NULL);
+    return nbd_write(ioc, buf, len, NULL);
 }
 
 /* nbd_receive_simple_reply
@@ -1382,35 +1400,62 @@ static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
     }
 
     reply->error = be32_to_cpu(reply->error);
-    reply->handle = be64_to_cpu(reply->handle);
+    reply->cookie = be64_to_cpu(reply->cookie);
 
     return 0;
 }
 
-/* nbd_receive_structured_reply_chunk
+/* nbd_receive_reply_chunk_header
  * Read structured reply chunk except magic field (which should be already
- * read).
+ * read).  Normalize into the compact form.
  * Payload is not read.
  */
-static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
-                                              NBDStructuredReplyChunk *chunk,
-                                              Error **errp)
+static int nbd_receive_reply_chunk_header(QIOChannel *ioc, NBDReply *chunk,
+                                          Error **errp)
 {
     int ret;
+    size_t len;
+    uint64_t payload_len;
 
-    assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
+    if (chunk->magic == NBD_STRUCTURED_REPLY_MAGIC) {
+        len = sizeof(chunk->structured);
+    } else {
+        assert(chunk->magic == NBD_EXTENDED_REPLY_MAGIC);
+        len = sizeof(chunk->extended);
+    }
 
     ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
-                   sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
+                   len - sizeof(chunk->magic), "structured chunk",
                    errp);
     if (ret < 0) {
         return ret;
     }
 
-    chunk->flags = be16_to_cpu(chunk->flags);
-    chunk->type = be16_to_cpu(chunk->type);
-    chunk->handle = be64_to_cpu(chunk->handle);
-    chunk->length = be32_to_cpu(chunk->length);
+    /* flags, type, and cookie occupy same space between forms */
+    chunk->structured.flags = be16_to_cpu(chunk->structured.flags);
+    chunk->structured.type = be16_to_cpu(chunk->structured.type);
+    chunk->structured.cookie = be64_to_cpu(chunk->structured.cookie);
+
+    /*
+     * Because we use BLOCK_STATUS with REQ_ONE, and cap READ requests
+     * at 32M, no valid server should send us payload larger than
+     * this.  Even if we stopped using REQ_ONE, sane servers will cap
+     * the number of extents they return for block status.
+     */
+    if (chunk->magic == NBD_STRUCTURED_REPLY_MAGIC) {
+        payload_len = be32_to_cpu(chunk->structured.length);
+    } else {
+        /* For now, we are ignoring the extended header offset. */
+        payload_len = be64_to_cpu(chunk->extended.length);
+        chunk->magic = NBD_STRUCTURED_REPLY_MAGIC;
+    }
+    if (payload_len > NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData)) {
+        error_setg(errp, "server chunk %" PRIu32 " (%s) payload is too long",
+                   chunk->structured.type,
+                   nbd_rep_lookup(chunk->structured.type));
+        return -EINVAL;
+    }
+    chunk->structured.length = payload_len;
 
     return 0;
 }
@@ -1434,9 +1479,7 @@ nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size,
 
         len = qio_channel_readv(ioc, &iov, 1, errp);
         if (len == QIO_CHANNEL_ERR_BLOCK) {
-            bdrv_dec_in_flight(bs);
             qio_channel_yield(ioc, G_IO_IN);
-            bdrv_inc_in_flight(bs);
             continue;
         } else if (len < 0) {
             return -EIO;
@@ -1459,19 +1502,21 @@ nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size,
 
 /* nbd_receive_reply
  *
- * Decreases bs->in_flight while waiting for a new reply. This yield is where
- * we wait indefinitely and the coroutine must be able to be safely reentered
- * for nbd_client_attach_aio_context().
+ * Wait for a new reply. If this yields, the coroutine must be able to be
+ * safely reentered for nbd_client_attach_aio_context().  @mode determines
+ * which reply magic we are expecting, although this normalizes the result
+ * so that the caller only has to work with compact headers.
  *
  * Returns 1 on success
- *         0 on eof, when no data was read (errp is not set)
- *         negative errno on failure (errp is set)
+ *         0 on eof, when no data was read
+ *         negative errno on failure
  */
 int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
-                                   NBDReply *reply, Error **errp)
+                                   NBDReply *reply, NBDMode mode, Error **errp)
 {
     int ret;
     const char *type;
+    uint32_t expected;
 
     ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp);
     if (ret <= 0) {
@@ -1480,34 +1525,44 @@ int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
 
     reply->magic = be32_to_cpu(reply->magic);
 
+    /* Diagnose but accept wrong-width header */
     switch (reply->magic) {
     case NBD_SIMPLE_REPLY_MAGIC:
+        if (mode >= NBD_MODE_EXTENDED) {
+            trace_nbd_receive_wrong_header(reply->magic,
+                                           nbd_mode_lookup(mode));
+        }
         ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
         if (ret < 0) {
-            break;
+            return ret;
         }
         trace_nbd_receive_simple_reply(reply->simple.error,
                                        nbd_err_lookup(reply->simple.error),
-                                       reply->handle);
+                                       reply->cookie);
         break;
     case NBD_STRUCTURED_REPLY_MAGIC:
-        ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
+    case NBD_EXTENDED_REPLY_MAGIC:
+        expected = mode >= NBD_MODE_EXTENDED ? NBD_EXTENDED_REPLY_MAGIC
+            : NBD_STRUCTURED_REPLY_MAGIC;
+        if (reply->magic != expected) {
+            trace_nbd_receive_wrong_header(reply->magic,
+                                           nbd_mode_lookup(mode));
+        }
+        ret = nbd_receive_reply_chunk_header(ioc, reply, errp);
         if (ret < 0) {
-            break;
+            return ret;
         }
         type = nbd_reply_type_lookup(reply->structured.type);
-        trace_nbd_receive_structured_reply_chunk(reply->structured.flags,
-                                                 reply->structured.type, type,
-                                                 reply->structured.handle,
-                                                 reply->structured.length);
+        trace_nbd_receive_reply_chunk_header(reply->structured.flags,
+                                             reply->structured.type, type,
+                                             reply->structured.cookie,
+                                             reply->structured.length);
         break;
     default:
+        trace_nbd_receive_wrong_header(reply->magic, nbd_mode_lookup(mode));
         error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
         return -EINVAL;
     }
-    if (ret < 0) {
-        return ret;
-    }
 
     return 1;
 }
index ddfe7d118371f8d24f8ad1431aa769314751864d..3247c1d618aa9e1740375a3885e1265489b4a2fd 100644 (file)
@@ -79,6 +79,8 @@ const char *nbd_opt_lookup(uint32_t opt)
         return "list meta context";
     case NBD_OPT_SET_META_CONTEXT:
         return "set meta context";
+    case NBD_OPT_EXTENDED_HEADERS:
+        return "extended headers";
     default:
         return "<unknown>";
     }
@@ -112,6 +114,10 @@ const char *nbd_rep_lookup(uint32_t rep)
         return "server shutting down";
     case NBD_REP_ERR_BLOCK_SIZE_REQD:
         return "block size required";
+    case NBD_REP_ERR_TOO_BIG:
+        return "option payload too big";
+    case NBD_REP_ERR_EXT_HEADER_REQD:
+        return "extended headers required";
     default:
         return "<unknown>";
     }
@@ -170,7 +176,9 @@ const char *nbd_reply_type_lookup(uint16_t type)
     case NBD_REPLY_TYPE_OFFSET_HOLE:
         return "hole";
     case NBD_REPLY_TYPE_BLOCK_STATUS:
-        return "block status";
+        return "block status (32-bit)";
+    case NBD_REPLY_TYPE_BLOCK_STATUS_EXT:
+        return "block status (64-bit)";
     case NBD_REPLY_TYPE_ERROR:
         return "generic error";
     case NBD_REPLY_TYPE_ERROR_OFFSET:
@@ -248,3 +256,22 @@ int nbd_errno_to_system_errno(int err)
     }
     return ret;
 }
+
+
+const char *nbd_mode_lookup(NBDMode mode)
+{
+    switch (mode) {
+    case NBD_MODE_OLDSTYLE:
+        return "oldstyle";
+    case NBD_MODE_EXPORT_NAME:
+        return "export name only";
+    case NBD_MODE_SIMPLE:
+        return "simple headers";
+    case NBD_MODE_STRUCTURED:
+        return "structured replies";
+    case NBD_MODE_EXTENDED:
+        return "extended headers";
+    default:
+        return "<unknown>";
+    }
+}
index 2baaa3694801ebbe7fe7e9c5b794a2bc01e1bb57..b26d70565ebb24a179615dbfc4ad0483849ee89b 100644 (file)
@@ -1,5 +1,6 @@
 block_ss.add(files(
   'client.c',
+  'client-connection.c',
   'common.c',
 ))
 blockdev_ss.add(files(
index 1b2141ab4b2dc4880640384e804605149a925e25..dfa02f77ee487ce0a6168a49f8d6d9e5cdd5ac73 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * NBD Internal Declarations
  *
- * Copyright (C) 2016 Red Hat, Inc.
+ * Copyright Red Hat
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -13,7 +13,6 @@
 #include "sysemu/block-backend.h"
 #include "io/channel-tls.h"
 
-#include "qemu/coroutine.h"
 #include "qemu/iov.h"
 
 #ifndef _WIN32
  * https://github.com/yoe/nbd/blob/master/doc/proto.md
  */
 
-/* Size of all NBD_OPT_*, without payload */
+/* Size of all compact NBD_CMD_*, without payload */
 #define NBD_REQUEST_SIZE            (4 + 2 + 2 + 8 + 8 + 4)
+/* Size of all extended NBD_CMD_*, without payload */
+#define NBD_EXTENDED_REQUEST_SIZE   (4 + 2 + 2 + 8 + 8 + 8)
+
 /* Size of all NBD_REP_* sent in answer to most NBD_OPT_*, without payload */
 #define NBD_REPLY_SIZE              (4 + 4 + 8)
 /* Size of reply to NBD_OPT_EXPORT_NAME */
@@ -45,7 +47,6 @@
 #define NBD_OLDSTYLE_NEGOTIATE_SIZE (8 + 8 + 8 + 4 + 124)
 
 #define NBD_INIT_MAGIC              0x4e42444d41474943LL /* ASCII "NBDMAGIC" */
-#define NBD_REQUEST_MAGIC           0x25609513
 #define NBD_OPTS_MAGIC              0x49484156454F5054LL /* ASCII "IHAVEOPT" */
 #define NBD_CLIENT_MAGIC            0x0000420281861253LL
 #define NBD_REP_MAGIC               0x0003e889045565a9LL
index 613ed2634ada0e629a4ec087d4591fbdf7d14658..895cf0a7525bdf85996564faa05d515042c7a45e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2016-2020 Red Hat, Inc.
+ *  Copyright Red Hat
  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
  *
  *  Network Block Device Server Side
 
 #include "qemu/osdep.h"
 
+#include "block/block_int.h"
 #include "block/export.h"
+#include "block/dirty-bitmap.h"
 #include "qapi/error.h"
 #include "qemu/queue.h"
 #include "trace.h"
 #include "nbd-internal.h"
 #include "qemu/units.h"
+#include "qemu/memalign.h"
 
 #define NBD_META_ID_BASE_ALLOCATION 0
 #define NBD_META_ID_ALLOCATION_DEPTH 1
@@ -77,7 +80,6 @@ static int system_errno_to_nbd_errno(int err)
 typedef struct NBDRequestData NBDRequestData;
 
 struct NBDRequestData {
-    QSIMPLEQ_ENTRY(NBDRequestData) entry;
     NBDClient *client;
     uint8_t *data;
     bool complete;
@@ -103,11 +105,13 @@ struct NBDExport {
 
 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
 
-/* NBDExportMetaContexts represents a list of contexts to be exported,
+/*
+ * NBDMetaContexts represents a list of meta contexts in use,
  * as selected by NBD_OPT_SET_META_CONTEXT. Also used for
- * NBD_OPT_LIST_META_CONTEXT. */
-typedef struct NBDExportMetaContexts {
-    NBDExport *exp;
+ * NBD_OPT_LIST_META_CONTEXT.
+ */
+struct NBDMetaContexts {
+    const NBDExport *exp; /* associated export */
     size_t count; /* number of negotiated contexts */
     bool base_allocation; /* export base:allocation context (block status) */
     bool allocation_depth; /* export qemu:allocation-depth */
@@ -115,7 +119,7 @@ typedef struct NBDExportMetaContexts {
                     * export qemu:dirty-bitmap:<export bitmap name>,
                     * sized by exp->nr_export_bitmaps
                     */
-} NBDExportMetaContexts;
+};
 
 struct NBDClient {
     int refcount;
@@ -132,14 +136,17 @@ struct NBDClient {
     CoMutex send_lock;
     Coroutine *send_coroutine;
 
+    bool read_yielding;
+    bool quiescing;
+
     QTAILQ_ENTRY(NBDClient) next;
     int nb_requests;
     bool closing;
 
     uint32_t check_align; /* If non-zero, check for aligned client requests */
 
-    bool structured_reply;
-    NBDExportMetaContexts export_meta;
+    NBDMode mode;
+    NBDMetaContexts contexts; /* Negotiated meta contexts */
 
     uint32_t opt; /* Current option being negotiated */
     uint32_t optlen; /* remaining length of data in ioc for the option being
@@ -210,7 +217,7 @@ static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type,
 
 /* Send an error reply.
  * Return -errno on error, 0 on success. */
-static int GCC_FMT_ATTR(4, 0)
+static int G_GNUC_PRINTF(4, 0)
 nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type,
                             Error **errp, const char *fmt, va_list va)
 {
@@ -250,7 +257,7 @@ nbd_sanitize_name(const char *name)
 
 /* Send an error reply.
  * Return -errno on error, 0 on success. */
-static int GCC_FMT_ATTR(4, 5)
+static int G_GNUC_PRINTF(4, 5)
 nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
                            Error **errp, const char *fmt, ...)
 {
@@ -266,7 +273,7 @@ nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
 /* Drop remainder of the current option, and send a reply with the
  * given error type and message. Return -errno on read or write
  * failure; or 0 if connection is still live. */
-static int GCC_FMT_ATTR(4, 0)
+static int G_GNUC_PRINTF(4, 0)
 nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
               const char *fmt, va_list va)
 {
@@ -279,7 +286,7 @@ nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
     return ret;
 }
 
-static int GCC_FMT_ATTR(4, 5)
+static int G_GNUC_PRINTF(4, 5)
 nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
              const char *fmt, ...)
 {
@@ -293,7 +300,7 @@ nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
     return ret;
 }
 
-static int GCC_FMT_ATTR(3, 4)
+static int G_GNUC_PRINTF(3, 4)
 nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
 {
     int ret;
@@ -450,10 +457,10 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
     return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
 }
 
-static void nbd_check_meta_export(NBDClient *client)
+static void nbd_check_meta_export(NBDClient *client, NBDExport *exp)
 {
-    if (client->exp != client->export_meta.exp) {
-        client->export_meta.count = 0;
+    if (exp != client->contexts.exp) {
+        client->contexts.count = 0;
     }
 }
 
@@ -477,6 +484,10 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
         [10 .. 133]   reserved     (0) [unless no_zeroes]
      */
     trace_nbd_negotiate_handle_export_name();
+    if (client->mode >= NBD_MODE_EXTENDED) {
+        error_setg(errp, "Extended headers already negotiated");
+        return -EINVAL;
+    }
     if (client->optlen > NBD_MAX_STRING_SIZE) {
         error_setg(errp, "Bad length received");
         return -EINVAL;
@@ -495,11 +506,15 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
         error_setg(errp, "export not found");
         return -EINVAL;
     }
+    nbd_check_meta_export(client, client->exp);
 
     myflags = client->exp->nbdflags;
-    if (client->structured_reply) {
+    if (client->mode >= NBD_MODE_STRUCTURED) {
         myflags |= NBD_FLAG_SEND_DF;
     }
+    if (client->mode >= NBD_MODE_EXTENDED && client->contexts.count) {
+        myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
+    }
     trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
     stq_be_p(buf, client->exp->size);
     stw_be_p(buf + 8, myflags);
@@ -512,7 +527,6 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
 
     QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
     blk_exp_ref(&client->exp->common);
-    nbd_check_meta_export(client);
 
     return 0;
 }
@@ -632,6 +646,9 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
                                           errp, "export '%s' not present",
                                           sane_name);
     }
+    if (client->opt == NBD_OPT_GO) {
+        nbd_check_meta_export(client, exp);
+    }
 
     /* Don't bother sending NBD_INFO_NAME unless client requested it */
     if (sendname) {
@@ -682,9 +699,13 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
 
     /* Send NBD_INFO_EXPORT always */
     myflags = exp->nbdflags;
-    if (client->structured_reply) {
+    if (client->mode >= NBD_MODE_STRUCTURED) {
         myflags |= NBD_FLAG_SEND_DF;
     }
+    if (client->mode >= NBD_MODE_EXTENDED &&
+        (client->contexts.count || client->opt == NBD_OPT_INFO)) {
+        myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
+    }
     trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
     stq_be_p(buf, exp->size);
     stw_be_p(buf + 8, myflags);
@@ -720,7 +741,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
         client->check_align = check_align;
         QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
         blk_exp_ref(&client->exp->common);
-        nbd_check_meta_export(client);
         rc = 1;
     }
     return rc;
@@ -843,7 +863,7 @@ static bool nbd_strshift(const char **str, const char *prefix)
  * Handle queries to 'base' namespace. For now, only the base:allocation
  * context is available.  Return true if @query has been handled.
  */
-static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
+static bool nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta,
                                 const char *query)
 {
     if (!nbd_strshift(&query, "base:")) {
@@ -863,7 +883,7 @@ static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
  * and qemu:allocation-depth contexts are available.  Return true if @query
  * has been handled.
  */
-static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
+static bool nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta,
                                 const char *query)
 {
     size_t i;
@@ -876,7 +896,9 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
     if (!*query) {
         if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
             meta->allocation_depth = meta->exp->allocation_depth;
-            memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+            if (meta->exp->nr_export_bitmaps) {
+                memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+            }
         }
         trace_nbd_negotiate_meta_query_parse("empty");
         return true;
@@ -891,7 +913,8 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
     if (nbd_strshift(&query, "dirty-bitmap:")) {
         trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
         if (!*query) {
-            if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+            if (client->opt == NBD_OPT_LIST_META_CONTEXT &&
+                meta->exp->nr_export_bitmaps) {
                 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
             }
             trace_nbd_negotiate_meta_query_parse("empty");
@@ -926,7 +949,7 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
  * Return -errno on I/O error, 0 if option was completely handled by
  * sending a reply about inconsistent lengths, or 1 on success. */
 static int nbd_negotiate_meta_query(NBDClient *client,
-                                    NBDExportMetaContexts *meta, Error **errp)
+                                    NBDMetaContexts *meta, Error **errp)
 {
     int ret;
     g_autofree char *query = NULL;
@@ -965,18 +988,20 @@ static int nbd_negotiate_meta_query(NBDClient *client,
  * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT
  *
  * Return -errno on I/O error, or 0 if option was completely handled. */
-static int nbd_negotiate_meta_queries(NBDClient *client,
-                                      NBDExportMetaContexts *meta, Error **errp)
+static int nbd_negotiate_meta_queries(NBDClient *client, Error **errp)
 {
     int ret;
     g_autofree char *export_name = NULL;
-    g_autofree bool *bitmaps = NULL;
-    NBDExportMetaContexts local_meta = {0};
+    /* Mark unused to work around https://bugs.llvm.org/show_bug.cgi?id=3888 */
+    g_autofree G_GNUC_UNUSED bool *bitmaps = NULL;
+    NBDMetaContexts local_meta = {0};
+    NBDMetaContexts *meta;
     uint32_t nb_queries;
     size_t i;
     size_t count = 0;
 
-    if (!client->structured_reply) {
+    if (client->opt == NBD_OPT_SET_META_CONTEXT &&
+        client->mode < NBD_MODE_STRUCTURED) {
         return nbd_opt_invalid(client, errp,
                                "request option '%s' when structured reply "
                                "is not negotiated",
@@ -986,6 +1011,8 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
     if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
         /* Only change the caller's meta on SET. */
         meta = &local_meta;
+    } else {
+        meta = &client->contexts;
     }
 
     g_free(meta->bitmaps);
@@ -1020,7 +1047,9 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
         /* enable all known contexts */
         meta->base_allocation = true;
         meta->allocation_depth = meta->exp->allocation_depth;
-        memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+        if (meta->exp->nr_export_bitmaps) {
+            memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+        }
     } else {
         for (i = 0; i < nb_queries; ++i) {
             ret = nbd_negotiate_meta_query(client, meta, errp);
@@ -1111,10 +1140,12 @@ static int nbd_negotiate_options(NBDClient *client, Error **errp)
     if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) {
         return -EIO;
     }
+    client->mode = NBD_MODE_EXPORT_NAME;
     trace_nbd_negotiate_options_flags(flags);
     if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
         fixedNewstyle = true;
         flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
+        client->mode = NBD_MODE_SIMPLE;
     }
     if (flags & NBD_FLAG_C_NO_ZEROES) {
         no_zeroes = true;
@@ -1151,7 +1182,7 @@ static int nbd_negotiate_options(NBDClient *client, Error **errp)
         client->optlen = length;
 
         if (length > NBD_MAX_BUFFER_SIZE) {
-            error_setg(errp, "len (%" PRIu32) is larger than max len (%u)",
+            error_setg(errp, "len (%" PRIu32 ") is larger than max len (%u)",
                        length, NBD_MAX_BUFFER_SIZE);
             return -EINVAL;
         }
@@ -1178,7 +1209,7 @@ static int nbd_negotiate_options(NBDClient *client, Error **errp)
                 }
                 ret = 0;
                 object_unref(OBJECT(client->ioc));
-                client->ioc = QIO_CHANNEL(tioc);
+                client->ioc = tioc;
                 break;
 
             case NBD_OPT_EXPORT_NAME:
@@ -1250,20 +1281,36 @@ static int nbd_negotiate_options(NBDClient *client, Error **errp)
             case NBD_OPT_STRUCTURED_REPLY:
                 if (length) {
                     ret = nbd_reject_length(client, false, errp);
-                } else if (client->structured_reply) {
+                } else if (client->mode >= NBD_MODE_EXTENDED) {
+                    ret = nbd_negotiate_send_rep_err(
+                        client, NBD_REP_ERR_EXT_HEADER_REQD, errp,
+                        "extended headers already negotiated");
+                } else if (client->mode >= NBD_MODE_STRUCTURED) {
                     ret = nbd_negotiate_send_rep_err(
                         client, NBD_REP_ERR_INVALID, errp,
                         "structured reply already negotiated");
                 } else {
                     ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
-                    client->structured_reply = true;
+                    client->mode = NBD_MODE_STRUCTURED;
                 }
                 break;
 
             case NBD_OPT_LIST_META_CONTEXT:
             case NBD_OPT_SET_META_CONTEXT:
-                ret = nbd_negotiate_meta_queries(client, &client->export_meta,
-                                                 errp);
+                ret = nbd_negotiate_meta_queries(client, errp);
+                break;
+
+            case NBD_OPT_EXTENDED_HEADERS:
+                if (length) {
+                    ret = nbd_reject_length(client, false, errp);
+                } else if (client->mode >= NBD_MODE_EXTENDED) {
+                    ret = nbd_negotiate_send_rep_err(
+                        client, NBD_REP_ERR_INVALID, errp,
+                        "extended headers already negotiated");
+                } else {
+                    ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
+                    client->mode = NBD_MODE_EXTENDED;
+                }
                 break;
 
             default:
@@ -1322,6 +1369,7 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
      */
 
     qio_channel_set_blocking(client->ioc, false, NULL);
+    qio_channel_set_follow_coroutine_ctx(client->ioc, true);
 
     trace_nbd_negotiate_begin();
     memcpy(buf, "NBDMAGIC", 8);
@@ -1341,50 +1389,111 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
         return ret;
     }
 
-    /* Attach the channel to the same AioContext as the export */
-    if (client->exp && client->exp->common.ctx) {
-        qio_channel_attach_aio_context(client->ioc, client->exp->common.ctx);
-    }
-
     assert(!client->optlen);
     trace_nbd_negotiate_success();
 
     return 0;
 }
 
-static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request,
-                               Error **errp)
+/* nbd_read_eof
+ * Tries to read @size bytes from @ioc. This is a local implementation of
+ * qio_channel_readv_all_eof. We have it here because we need it to be
+ * interruptible and to know when the coroutine is yielding.
+ * Returns 1 on success
+ *         0 on eof, when no data was read (errp is not set)
+ *         negative errno on failure (errp is set)
+ */
+static inline int coroutine_fn
+nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
+{
+    bool partial = false;
+
+    assert(size);
+    while (size > 0) {
+        struct iovec iov = { .iov_base = buffer, .iov_len = size };
+        ssize_t len;
+
+        len = qio_channel_readv(client->ioc, &iov, 1, errp);
+        if (len == QIO_CHANNEL_ERR_BLOCK) {
+            client->read_yielding = true;
+            qio_channel_yield(client->ioc, G_IO_IN);
+            client->read_yielding = false;
+            if (client->quiescing) {
+                return -EAGAIN;
+            }
+            continue;
+        } else if (len < 0) {
+            return -EIO;
+        } else if (len == 0) {
+            if (partial) {
+                error_setg(errp,
+                           "Unexpected end-of-file before all bytes were read");
+                return -EIO;
+            } else {
+                return 0;
+            }
+        }
+
+        partial = true;
+        size -= len;
+        buffer = (uint8_t *) buffer + len;
+    }
+    return 1;
+}
+
+static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *request,
+                                            Error **errp)
 {
-    uint8_t buf[NBD_REQUEST_SIZE];
-    uint32_t magic;
+    uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
+    uint32_t magic, expect;
     int ret;
+    size_t size = client->mode >= NBD_MODE_EXTENDED ?
+        NBD_EXTENDED_REQUEST_SIZE : NBD_REQUEST_SIZE;
 
-    ret = nbd_read(ioc, buf, sizeof(buf), "request", errp);
+    ret = nbd_read_eof(client, buf, size, errp);
     if (ret < 0) {
         return ret;
     }
+    if (ret == 0) {
+        return -EIO;
+    }
 
-    /* Request
-       [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
-       [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, ...)
-       [ 6 ..  7]   type    (NBD_CMD_READ, ...)
-       [ 8 .. 15]   handle
-       [16 .. 23]   from
-       [24 .. 27]   len
+    /*
+     * Compact request
+     *  [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
+     *  [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, ...)
+     *  [ 6 ..  7]   type    (NBD_CMD_READ, ...)
+     *  [ 8 .. 15]   cookie
+     *  [16 .. 23]   from
+     *  [24 .. 27]   len
+     * Extended request
+     *  [ 0 ..  3]   magic   (NBD_EXTENDED_REQUEST_MAGIC)
+     *  [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, NBD_CMD_FLAG_PAYLOAD_LEN, ...)
+     *  [ 6 ..  7]   type    (NBD_CMD_READ, ...)
+     *  [ 8 .. 15]   cookie
+     *  [16 .. 23]   from
+     *  [24 .. 31]   len
      */
 
     magic = ldl_be_p(buf);
     request->flags  = lduw_be_p(buf + 4);
     request->type   = lduw_be_p(buf + 6);
-    request->handle = ldq_be_p(buf + 8);
+    request->cookie = ldq_be_p(buf + 8);
     request->from   = ldq_be_p(buf + 16);
-    request->len    = ldl_be_p(buf + 24);
+    if (client->mode >= NBD_MODE_EXTENDED) {
+        request->len = ldq_be_p(buf + 24);
+        expect = NBD_EXTENDED_REQUEST_MAGIC;
+    } else {
+        request->len = (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits */
+        expect = NBD_REQUEST_MAGIC;
+    }
 
     trace_nbd_receive_request(magic, request->flags, request->type,
                               request->from, request->len);
 
-    if (magic != NBD_REQUEST_MAGIC) {
-        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
+    if (magic != expect) {
+        error_setg(errp, "invalid magic (got 0x%" PRIx32 ", expected 0x%"
+                   PRIx32 ")", magic, expect);
         return -EINVAL;
     }
     return 0;
@@ -1405,7 +1514,6 @@ void nbd_client_put(NBDClient *client)
          */
         assert(client->closing);
 
-        qio_channel_detach_aio_context(client->ioc);
         object_unref(OBJECT(client->sioc));
         object_unref(OBJECT(client->ioc));
         if (client->tlscreds) {
@@ -1416,7 +1524,7 @@ void nbd_client_put(NBDClient *client)
             QTAILQ_REMOVE(&client->exp->clients, client, next);
             blk_exp_unref(&client->exp->common);
         }
-        g_free(client->export_meta.bitmaps);
+        g_free(client->contexts.bitmaps);
         g_free(client);
     }
 }
@@ -1464,6 +1572,11 @@ static void nbd_request_put(NBDRequestData *req)
     g_free(req);
 
     client->nb_requests--;
+
+    if (client->quiescing && client->nb_requests == 0) {
+        aio_wait_kick();
+    }
+
     nbd_client_receive_next_request(client);
 
     nbd_client_put(client);
@@ -1479,28 +1592,62 @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
     exp->common.ctx = ctx;
 
     QTAILQ_FOREACH(client, &exp->clients, next) {
-        qio_channel_attach_aio_context(client->ioc, ctx);
-        if (client->recv_coroutine) {
-            aio_co_schedule(ctx, client->recv_coroutine);
-        }
-        if (client->send_coroutine) {
-            aio_co_schedule(ctx, client->send_coroutine);
-        }
+        assert(client->nb_requests == 0);
+        assert(client->recv_coroutine == NULL);
+        assert(client->send_coroutine == NULL);
     }
 }
 
 static void blk_aio_detach(void *opaque)
 {
     NBDExport *exp = opaque;
-    NBDClient *client;
 
     trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
 
+    exp->common.ctx = NULL;
+}
+
+static void nbd_drained_begin(void *opaque)
+{
+    NBDExport *exp = opaque;
+    NBDClient *client;
+
     QTAILQ_FOREACH(client, &exp->clients, next) {
-        qio_channel_detach_aio_context(client->ioc);
+        client->quiescing = true;
     }
+}
 
-    exp->common.ctx = NULL;
+static void nbd_drained_end(void *opaque)
+{
+    NBDExport *exp = opaque;
+    NBDClient *client;
+
+    QTAILQ_FOREACH(client, &exp->clients, next) {
+        client->quiescing = false;
+        nbd_client_receive_next_request(client);
+    }
+}
+
+static bool nbd_drained_poll(void *opaque)
+{
+    NBDExport *exp = opaque;
+    NBDClient *client;
+
+    QTAILQ_FOREACH(client, &exp->clients, next) {
+        if (client->nb_requests != 0) {
+            /*
+             * If there's a coroutine waiting for a request on nbd_read_eof()
+             * enter it here so we don't depend on the client to wake it up.
+             */
+            if (client->recv_coroutine != NULL && client->read_yielding) {
+                qio_channel_wake_read(client->ioc);
+            }
+
+            return true;
+        }
+    }
+
+    return false;
 }
 
 static void nbd_eject_notifier(Notifier *n, void *data)
@@ -1522,20 +1669,27 @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
     blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
 }
 
+static const BlockDevOps nbd_block_ops = {
+    .drained_begin = nbd_drained_begin,
+    .drained_end = nbd_drained_end,
+    .drained_poll = nbd_drained_poll,
+};
+
 static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
                              Error **errp)
 {
     NBDExport *exp = container_of(blk_exp, NBDExport, common);
     BlockExportOptionsNbd *arg = &exp_args->u.nbd;
+    const char *name = arg->name ?: exp_args->node_name;
     BlockBackend *blk = blk_exp->blk;
     int64_t size;
     uint64_t perm, shared_perm;
     bool readonly = !exp_args->writable;
-    bool shared = !exp_args->writable;
-    strList *bitmaps;
+    BlockDirtyBitmapOrStrList *bitmaps;
     size_t i;
     int ret;
 
+    GLOBAL_STATE_CODE();
     assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
 
     if (!nbd_server_is_running()) {
@@ -1543,12 +1697,8 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
         return -EINVAL;
     }
 
-    if (!arg->has_name) {
-        arg->name = exp_args->node_name;
-    }
-
-    if (strlen(arg->name) > NBD_MAX_STRING_SIZE) {
-        error_setg(errp, "export name '%s' too long", arg->name);
+    if (strlen(name) > NBD_MAX_STRING_SIZE) {
+        error_setg(errp, "export name '%s' too long", name);
         return -EINVAL;
     }
 
@@ -1557,8 +1707,8 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
         return -EINVAL;
     }
 
-    if (nbd_export_find(arg->name)) {
-        error_setg(errp, "NBD server already has export named '%s'", arg->name);
+    if (nbd_export_find(name)) {
+        error_setg(errp, "NBD server already has export named '%s'", name);
         return -EEXIST;
     }
 
@@ -1578,57 +1728,79 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
     }
 
     QTAILQ_INIT(&exp->clients);
-    exp->name = g_strdup(arg->name);
+    exp->name = g_strdup(name);
     exp->description = g_strdup(arg->description);
     exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
                      NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
+
+    if (nbd_server_max_connections() != 1) {
+        exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
+    }
     if (readonly) {
         exp->nbdflags |= NBD_FLAG_READ_ONLY;
-        if (shared) {
-            exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
-        }
     } else {
         exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES |
                           NBD_FLAG_SEND_FAST_ZERO);
     }
     exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
 
+    bdrv_graph_rdlock_main_loop();
+
     for (bitmaps = arg->bitmaps; bitmaps; bitmaps = bitmaps->next) {
         exp->nr_export_bitmaps++;
     }
     exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps);
     for (i = 0, bitmaps = arg->bitmaps; bitmaps;
-         i++, bitmaps = bitmaps->next) {
-        const char *bitmap = bitmaps->value;
+         i++, bitmaps = bitmaps->next)
+    {
+        const char *bitmap;
         BlockDriverState *bs = blk_bs(blk);
         BdrvDirtyBitmap *bm = NULL;
 
-        while (bs) {
-            bm = bdrv_find_dirty_bitmap(bs, bitmap);
-            if (bm != NULL) {
-                break;
+        switch (bitmaps->value->type) {
+        case QTYPE_QSTRING:
+            bitmap = bitmaps->value->u.local;
+            while (bs) {
+                bm = bdrv_find_dirty_bitmap(bs, bitmap);
+                if (bm != NULL) {
+                    break;
+                }
+
+                bs = bdrv_filter_or_cow_bs(bs);
             }
 
-            bs = bdrv_filter_or_cow_bs(bs);
-        }
+            if (bm == NULL) {
+                ret = -ENOENT;
+                error_setg(errp, "Bitmap '%s' is not found",
+                           bitmaps->value->u.local);
+                goto fail;
+            }
 
-        if (bm == NULL) {
-            ret = -ENOENT;
-            error_setg(errp, "Bitmap '%s' is not found", bitmap);
-            goto fail;
+            if (readonly && bdrv_is_writable(bs) &&
+                bdrv_dirty_bitmap_enabled(bm)) {
+                ret = -EINVAL;
+                error_setg(errp, "Enabled bitmap '%s' incompatible with "
+                           "readonly export", bitmap);
+                goto fail;
+            }
+            break;
+        case QTYPE_QDICT:
+            bitmap = bitmaps->value->u.external.name;
+            bm = block_dirty_bitmap_lookup(bitmaps->value->u.external.node,
+                                           bitmap, NULL, errp);
+            if (!bm) {
+                ret = -ENOENT;
+                goto fail;
+            }
+            break;
+        default:
+            abort();
         }
 
-        if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
-            ret = -EINVAL;
-            goto fail;
-        }
+        assert(bm);
 
-        if (readonly && bdrv_is_writable(bs) &&
-            bdrv_dirty_bitmap_enabled(bm)) {
+        if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
             ret = -EINVAL;
-            error_setg(errp,
-                       "Enabled bitmap '%s' incompatible with readonly export",
-                       bitmap);
             goto fail;
         }
 
@@ -1643,13 +1815,25 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
 
     exp->allocation_depth = arg->allocation_depth;
 
+    /*
+     * We need to inhibit request queuing in the block layer to ensure we can
+     * be properly quiesced when entering a drained section, as our coroutines
+     * servicing pending requests might enter blk_pread().
+     */
+    blk_set_disable_request_queuing(blk, true);
+
     blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
 
+    blk_set_dev_ops(blk, &nbd_block_ops, exp);
+
     QTAILQ_INSERT_TAIL(&exports, exp, next);
 
+    bdrv_graph_rdunlock_main_loop();
+
     return 0;
 
 fail:
+    bdrv_graph_rdunlock_main_loop();
     g_free(exp->export_bitmaps);
     g_free(exp->name);
     g_free(exp->description);
@@ -1709,14 +1893,13 @@ static void nbd_export_delete(BlockExport *blk_exp)
     g_free(exp->description);
     exp->description = NULL;
 
-    if (exp->common.blk) {
-        if (exp->eject_notifier_blk) {
-            notifier_remove(&exp->eject_notifier);
-            blk_unref(exp->eject_notifier_blk);
-        }
-        blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
-                                        blk_aio_detach, exp);
+    if (exp->eject_notifier_blk) {
+        notifier_remove(&exp->eject_notifier);
+        blk_unref(exp->eject_notifier_blk);
     }
+    blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
+                                    blk_aio_detach, exp);
+    blk_set_disable_request_queuing(exp->common.blk, false);
 
     for (i = 0; i < exp->nr_export_bitmaps; i++) {
         bdrv_dirty_bitmap_set_busy(exp->export_bitmaps[i], false);
@@ -1749,19 +1932,19 @@ static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
 }
 
 static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
-                                       uint64_t handle)
+                                       uint64_t cookie)
 {
     stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
     stl_be_p(&reply->error, error);
-    stq_be_p(&reply->handle, handle);
+    stq_be_p(&reply->cookie, cookie);
 }
 
-static int nbd_co_send_simple_reply(NBDClient *client,
-                                    uint64_t handle,
-                                    uint32_t error,
-                                    void *data,
-                                    size_t len,
-                                    Error **errp)
+static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,
+                                                 NBDRequest *request,
+                                                 uint32_t error,
+                                                 void *data,
+                                                 uint64_t len,
+                                                 Error **errp)
 {
     NBDSimpleReply reply;
     int nbd_err = system_errno_to_nbd_errno(error);
@@ -1770,144 +1953,184 @@ static int nbd_co_send_simple_reply(NBDClient *client,
         {.iov_base = data, .iov_len = len}
     };
 
-    trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
-                                   len);
-    set_be_simple_reply(&reply, nbd_err, handle);
+    assert(!len || !nbd_err);
+    assert(len <= NBD_MAX_BUFFER_SIZE);
+    assert(client->mode < NBD_MODE_STRUCTURED ||
+           (client->mode == NBD_MODE_STRUCTURED &&
+            request->type != NBD_CMD_READ));
+    trace_nbd_co_send_simple_reply(request->cookie, nbd_err,
+                                   nbd_err_lookup(nbd_err), len);
+    set_be_simple_reply(&reply, nbd_err, request->cookie);
 
-    return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
+    return nbd_co_send_iov(client, iov, 2, errp);
 }
 
-static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
-                                uint16_t type, uint64_t handle, uint32_t length)
+/*
+ * Prepare the header of a reply chunk for network transmission.
+ *
+ * On input, @iov is partially initialized: iov[0].iov_base must point
+ * to an uninitialized NBDReply, while the remaining @niov elements
+ * (if any) must be ready for transmission.  This function then
+ * populates iov[0] for transmission.
+ */
+static inline void set_be_chunk(NBDClient *client, struct iovec *iov,
+                                size_t niov, uint16_t flags, uint16_t type,
+                                NBDRequest *request)
 {
-    stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
-    stw_be_p(&chunk->flags, flags);
-    stw_be_p(&chunk->type, type);
-    stq_be_p(&chunk->handle, handle);
-    stl_be_p(&chunk->length, length);
+    size_t i, length = 0;
+
+    for (i = 1; i < niov; i++) {
+        length += iov[i].iov_len;
+    }
+    assert(length <= NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData));
+
+    if (client->mode >= NBD_MODE_EXTENDED) {
+        NBDExtendedReplyChunk *chunk = iov->iov_base;
+
+        iov[0].iov_len = sizeof(*chunk);
+        stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC);
+        stw_be_p(&chunk->flags, flags);
+        stw_be_p(&chunk->type, type);
+        stq_be_p(&chunk->cookie, request->cookie);
+        stq_be_p(&chunk->offset, request->from);
+        stq_be_p(&chunk->length, length);
+    } else {
+        NBDStructuredReplyChunk *chunk = iov->iov_base;
+
+        iov[0].iov_len = sizeof(*chunk);
+        stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
+        stw_be_p(&chunk->flags, flags);
+        stw_be_p(&chunk->type, type);
+        stq_be_p(&chunk->cookie, request->cookie);
+        stl_be_p(&chunk->length, length);
+    }
 }
 
-static int coroutine_fn nbd_co_send_structured_done(NBDClient *client,
-                                                    uint64_t handle,
-                                                    Error **errp)
+static int coroutine_fn nbd_co_send_chunk_done(NBDClient *client,
+                                               NBDRequest *request,
+                                               Error **errp)
 {
-    NBDStructuredReplyChunk chunk;
+    NBDReply hdr;
     struct iovec iov[] = {
-        {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+        {.iov_base = &hdr},
     };
 
-    trace_nbd_co_send_structured_done(handle);
-    set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0);
-
+    trace_nbd_co_send_chunk_done(request->cookie);
+    set_be_chunk(client, iov, 1, NBD_REPLY_FLAG_DONE,
+                 NBD_REPLY_TYPE_NONE, request);
     return nbd_co_send_iov(client, iov, 1, errp);
 }
 
-static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
-                                                    uint64_t handle,
-                                                    uint64_t offset,
-                                                    void *data,
-                                                    size_t size,
-                                                    bool final,
-                                                    Error **errp)
+static int coroutine_fn nbd_co_send_chunk_read(NBDClient *client,
+                                               NBDRequest *request,
+                                               uint64_t offset,
+                                               void *data,
+                                               uint64_t size,
+                                               bool final,
+                                               Error **errp)
 {
+    NBDReply hdr;
     NBDStructuredReadData chunk;
     struct iovec iov[] = {
+        {.iov_base = &hdr},
         {.iov_base = &chunk, .iov_len = sizeof(chunk)},
         {.iov_base = data, .iov_len = size}
     };
 
-    assert(size);
-    trace_nbd_co_send_structured_read(handle, offset, data, size);
-    set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
-                 NBD_REPLY_TYPE_OFFSET_DATA, handle,
-                 sizeof(chunk) - sizeof(chunk.h) + size);
+    assert(size && size <= NBD_MAX_BUFFER_SIZE);
+    trace_nbd_co_send_chunk_read(request->cookie, offset, data, size);
+    set_be_chunk(client, iov, 3, final ? NBD_REPLY_FLAG_DONE : 0,
+                 NBD_REPLY_TYPE_OFFSET_DATA, request);
     stq_be_p(&chunk.offset, offset);
 
-    return nbd_co_send_iov(client, iov, 2, errp);
+    return nbd_co_send_iov(client, iov, 3, errp);
 }
 
-static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
-                                                     uint64_t handle,
-                                                     uint32_t error,
-                                                     const char *msg,
-                                                     Error **errp)
+static int coroutine_fn nbd_co_send_chunk_error(NBDClient *client,
+                                                NBDRequest *request,
+                                                uint32_t error,
+                                                const char *msg,
+                                                Error **errp)
 {
+    NBDReply hdr;
     NBDStructuredError chunk;
     int nbd_err = system_errno_to_nbd_errno(error);
     struct iovec iov[] = {
+        {.iov_base = &hdr},
         {.iov_base = &chunk, .iov_len = sizeof(chunk)},
         {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
     };
 
     assert(nbd_err);
-    trace_nbd_co_send_structured_error(handle, nbd_err,
-                                       nbd_err_lookup(nbd_err), msg ? msg : "");
-    set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
-                 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
+    trace_nbd_co_send_chunk_error(request->cookie, nbd_err,
+                                  nbd_err_lookup(nbd_err), msg ? msg : "");
+    set_be_chunk(client, iov, 3, NBD_REPLY_FLAG_DONE,
+                 NBD_REPLY_TYPE_ERROR, request);
     stl_be_p(&chunk.error, nbd_err);
-    stw_be_p(&chunk.message_length, iov[1].iov_len);
+    stw_be_p(&chunk.message_length, iov[2].iov_len);
 
-    return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
+    return nbd_co_send_iov(client, iov, 3, errp);
 }
 
 /* Do a sparse read and send the structured reply to the client.
- * Returns -errno if sending fails. bdrv_block_status_above() failure is
+ * Returns -errno if sending fails. blk_co_block_status_above() failure is
  * reported to the client, at which point this function succeeds.
  */
 static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
-                                                uint64_t handle,
+                                                NBDRequest *request,
                                                 uint64_t offset,
                                                 uint8_t *data,
-                                                size_t size,
+                                                uint64_t size,
                                                 Error **errp)
 {
     int ret = 0;
     NBDExport *exp = client->exp;
     size_t progress = 0;
 
+    assert(size <= NBD_MAX_BUFFER_SIZE);
     while (progress < size) {
         int64_t pnum;
-        int status = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
-                                             offset + progress,
-                                             size - progress, &pnum, NULL,
-                                             NULL);
+        int status = blk_co_block_status_above(exp->common.blk, NULL,
+                                               offset + progress,
+                                               size - progress, &pnum, NULL,
+                                               NULL);
         bool final;
 
         if (status < 0) {
             char *msg = g_strdup_printf("unable to check for holes: %s",
                                         strerror(-status));
 
-            ret = nbd_co_send_structured_error(client, handle, -status, msg,
-                                               errp);
+            ret = nbd_co_send_chunk_error(client, request, -status, msg, errp);
             g_free(msg);
             return ret;
         }
         assert(pnum && pnum <= size - progress);
         final = progress + pnum == size;
         if (status & BDRV_BLOCK_ZERO) {
+            NBDReply hdr;
             NBDStructuredReadHole chunk;
             struct iovec iov[] = {
+                {.iov_base = &hdr},
                 {.iov_base = &chunk, .iov_len = sizeof(chunk)},
             };
 
-            trace_nbd_co_send_structured_read_hole(handle, offset + progress,
-                                                   pnum);
-            set_be_chunk(&chunk.h, final ? NBD_REPLY_FLAG_DONE : 0,
-                         NBD_REPLY_TYPE_OFFSET_HOLE,
-                         handle, sizeof(chunk) - sizeof(chunk.h));
+            trace_nbd_co_send_chunk_read_hole(request->cookie,
+                                              offset + progress, pnum);
+            set_be_chunk(client, iov, 2,
+                         final ? NBD_REPLY_FLAG_DONE : 0,
+                         NBD_REPLY_TYPE_OFFSET_HOLE, request);
             stq_be_p(&chunk.offset, offset + progress);
             stl_be_p(&chunk.length, pnum);
-            ret = nbd_co_send_iov(client, iov, 1, errp);
+            ret = nbd_co_send_iov(client, iov, 2, errp);
         } else {
-            ret = blk_pread(exp->common.blk, offset + progress,
-                            data + progress, pnum);
+            ret = blk_co_pread(exp->common.blk, offset + progress, pnum,
+                               data + progress, 0);
             if (ret < 0) {
                 error_setg_errno(errp, -ret, "reading from file failed");
                 break;
             }
-            ret = nbd_co_send_structured_read(client, handle, offset + progress,
-                                              data + progress, pnum, final,
-                                              errp);
+            ret = nbd_co_send_chunk_read(client, request, offset + progress,
+                                         data + progress, pnum, final, errp);
         }
 
         if (ret < 0) {
@@ -1919,20 +2142,24 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
 }
 
 typedef struct NBDExtentArray {
-    NBDExtent *extents;
+    NBDExtent64 *extents;
     unsigned int nb_alloc;
     unsigned int count;
     uint64_t total_length;
+    bool extended;
     bool can_add;
     bool converted_to_be;
 } NBDExtentArray;
 
-static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc)
+static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc,
+                                            NBDMode mode)
 {
     NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
 
+    assert(mode >= NBD_MODE_STRUCTURED);
     ea->nb_alloc = nb_alloc;
-    ea->extents = g_new(NBDExtent, nb_alloc);
+    ea->extents = g_new(NBDExtent64, nb_alloc);
+    ea->extended = mode >= NBD_MODE_EXTENDED;
     ea->can_add = true;
 
     return ea;
@@ -1943,7 +2170,7 @@ static void nbd_extent_array_free(NBDExtentArray *ea)
     g_free(ea->extents);
     g_free(ea);
 }
-G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free);
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(NBDExtentArray, nbd_extent_array_free)
 
 /* Further modifications of the array after conversion are abandoned */
 static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
@@ -1951,39 +2178,67 @@ static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
     int i;
 
     assert(!ea->converted_to_be);
+    assert(ea->extended);
+    ea->can_add = false;
+    ea->converted_to_be = true;
+
+    for (i = 0; i < ea->count; i++) {
+        ea->extents[i].length = cpu_to_be64(ea->extents[i].length);
+        ea->extents[i].flags = cpu_to_be64(ea->extents[i].flags);
+    }
+}
+
+/* Further modifications of the array after conversion are abandoned */
+static NBDExtent32 *nbd_extent_array_convert_to_narrow(NBDExtentArray *ea)
+{
+    int i;
+    NBDExtent32 *extents = g_new(NBDExtent32, ea->count);
+
+    assert(!ea->converted_to_be);
+    assert(!ea->extended);
     ea->can_add = false;
     ea->converted_to_be = true;
 
     for (i = 0; i < ea->count; i++) {
-        ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
-        ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
+        assert((ea->extents[i].length | ea->extents[i].flags) <= UINT32_MAX);
+        extents[i].length = cpu_to_be32(ea->extents[i].length);
+        extents[i].flags = cpu_to_be32(ea->extents[i].flags);
     }
+
+    return extents;
 }
 
 /*
  * Add extent to NBDExtentArray. If extent can't be added (no available space),
  * return -1.
  * For safety, when returning -1 for the first time, .can_add is set to false,
- * further call to nbd_extent_array_add() will crash.
- * (to avoid the situation, when after failing to add an extent (returned -1),
- * user miss this failure and add another extent, which is successfully added
- * (array is full, but new extent may be squashed into the last one), then we
- * have invalid array with skipped extent)
+ * and further calls to nbd_extent_array_add() will crash.
+ * (this avoids the situation where a caller ignores failure to add one extent,
+ * where adding another extent that would squash into the last array entry
+ * would result in an incorrect range reported to the client)
  */
 static int nbd_extent_array_add(NBDExtentArray *ea,
-                                uint32_t length, uint32_t flags)
+                                uint64_t length, uint32_t flags)
 {
     assert(ea->can_add);
 
     if (!length) {
         return 0;
     }
+    if (!ea->extended) {
+        assert(length <= UINT32_MAX);
+    }
 
     /* Extend previous extent if flags are the same */
     if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
-        uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length;
+        uint64_t sum = length + ea->extents[ea->count - 1].length;
 
-        if (sum <= UINT32_MAX) {
+        /*
+         * sum cannot overflow: the block layer bounds image size at
+         * 2^63, and ea->extents[].length comes from the block layer.
+         */
+        assert(sum >= length);
+        if (sum <= UINT32_MAX || ea->extended) {
             ea->extents[ea->count - 1].length = sum;
             ea->total_length += length;
             return 0;
@@ -1996,27 +2251,28 @@ static int nbd_extent_array_add(NBDExtentArray *ea,
     }
 
     ea->total_length += length;
-    ea->extents[ea->count] = (NBDExtent) {.length = length, .flags = flags};
+    ea->extents[ea->count] = (NBDExtent64) {.length = length, .flags = flags};
     ea->count++;
 
     return 0;
 }
 
-static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
-                                  uint64_t bytes, NBDExtentArray *ea)
+static int coroutine_fn blockstatus_to_extents(BlockBackend *blk,
+                                               uint64_t offset, uint64_t bytes,
+                                               NBDExtentArray *ea)
 {
     while (bytes) {
         uint32_t flags;
         int64_t num;
-        int ret = bdrv_block_status_above(bs, NULL, offset, bytes, &num,
-                                          NULL, NULL);
+        int ret = blk_co_block_status_above(blk, NULL, offset, bytes, &num,
+                                            NULL, NULL);
 
         if (ret < 0) {
             return ret;
         }
 
-        flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) |
-                (ret & BDRV_BLOCK_ZERO      ? NBD_STATE_ZERO : 0);
+        flags = (ret & BDRV_BLOCK_DATA ? 0 : NBD_STATE_HOLE) |
+                (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
 
         if (nbd_extent_array_add(ea, num, flags) < 0) {
             return 0;
@@ -2029,13 +2285,14 @@ static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
     return 0;
 }
 
-static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
-                                 uint64_t bytes, NBDExtentArray *ea)
+static int coroutine_fn blockalloc_to_extents(BlockBackend *blk,
+                                              uint64_t offset, uint64_t bytes,
+                                              NBDExtentArray *ea)
 {
     while (bytes) {
         int64_t num;
-        int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
-                                          &num);
+        int ret = blk_co_is_allocated_above(blk, NULL, false, offset, bytes,
+                                            &num);
 
         if (ret < 0) {
             return ret;
@@ -2058,50 +2315,72 @@ static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
  * @ea is converted to BE by the function
  * @last controls whether NBD_REPLY_FLAG_DONE is sent.
  */
-static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
-                               NBDExtentArray *ea,
-                               bool last, uint32_t context_id, Error **errp)
-{
-    NBDStructuredMeta chunk;
-    struct iovec iov[] = {
-        {.iov_base = &chunk, .iov_len = sizeof(chunk)},
-        {.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])}
-    };
+static int coroutine_fn
+nbd_co_send_extents(NBDClient *client, NBDRequest *request, NBDExtentArray *ea,
+                    bool last, uint32_t context_id, Error **errp)
+{
+    NBDReply hdr;
+    NBDStructuredMeta meta;
+    NBDExtendedMeta meta_ext;
+    g_autofree NBDExtent32 *extents = NULL;
+    uint16_t type;
+    struct iovec iov[] = { {.iov_base = &hdr}, {0}, {0} };
+
+    if (client->mode >= NBD_MODE_EXTENDED) {
+        type = NBD_REPLY_TYPE_BLOCK_STATUS_EXT;
+
+        iov[1].iov_base = &meta_ext;
+        iov[1].iov_len = sizeof(meta_ext);
+        stl_be_p(&meta_ext.context_id, context_id);
+        stl_be_p(&meta_ext.count, ea->count);
+
+        nbd_extent_array_convert_to_be(ea);
+        iov[2].iov_base = ea->extents;
+        iov[2].iov_len = ea->count * sizeof(ea->extents[0]);
+    } else {
+        type = NBD_REPLY_TYPE_BLOCK_STATUS;
 
-    nbd_extent_array_convert_to_be(ea);
+        iov[1].iov_base = &meta;
+        iov[1].iov_len = sizeof(meta);
+        stl_be_p(&meta.context_id, context_id);
 
-    trace_nbd_co_send_extents(handle, ea->count, context_id, ea->total_length,
-                              last);
-    set_be_chunk(&chunk.h, last ? NBD_REPLY_FLAG_DONE : 0,
-                 NBD_REPLY_TYPE_BLOCK_STATUS,
-                 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
-    stl_be_p(&chunk.context_id, context_id);
+        extents = nbd_extent_array_convert_to_narrow(ea);
+        iov[2].iov_base = extents;
+        iov[2].iov_len = ea->count * sizeof(extents[0]);
+    }
 
-    return nbd_co_send_iov(client, iov, 2, errp);
+    trace_nbd_co_send_extents(request->cookie, ea->count, context_id,
+                              ea->total_length, last);
+    set_be_chunk(client, iov, 3, last ? NBD_REPLY_FLAG_DONE : 0, type,
+                 request);
+
+    return nbd_co_send_iov(client, iov, 3, errp);
 }
 
 /* Get block status from the exported device and send it to the client */
-static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
-                                    BlockDriverState *bs, uint64_t offset,
-                                    uint32_t length, bool dont_fragment,
-                                    bool last, uint32_t context_id,
-                                    Error **errp)
+static int
+coroutine_fn nbd_co_send_block_status(NBDClient *client, NBDRequest *request,
+                                      BlockBackend *blk, uint64_t offset,
+                                      uint64_t length, bool dont_fragment,
+                                      bool last, uint32_t context_id,
+                                      Error **errp)
 {
     int ret;
     unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
-    g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
+    g_autoptr(NBDExtentArray) ea =
+        nbd_extent_array_new(nb_extents, client->mode);
 
     if (context_id == NBD_META_ID_BASE_ALLOCATION) {
-        ret = blockstatus_to_extents(bs, offset, length, ea);
+        ret = blockstatus_to_extents(blk, offset, length, ea);
     } else {
-        ret = blockalloc_to_extents(bs, offset, length, ea);
+        ret = blockalloc_to_extents(blk, offset, length, ea);
     }
     if (ret < 0) {
-        return nbd_co_send_structured_error(
-                client, handle, -ret, "can't get block status", errp);
+        return nbd_co_send_chunk_error(client, request, -ret,
+                                       "can't get block status", errp);
     }
 
-    return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
+    return nbd_co_send_extents(client, request, ea, last, context_id, errp);
 }
 
 /* Populate @ea from a dirty bitmap. */
@@ -2112,11 +2391,12 @@ static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
     int64_t start, dirty_start, dirty_count;
     int64_t end = offset + length;
     bool full = false;
+    int64_t bound = es->extended ? INT64_MAX : INT32_MAX;
 
     bdrv_dirty_bitmap_lock(bitmap);
 
     for (start = offset;
-         bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX,
+         bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, bound,
                                            &dirty_start, &dirty_count);
          start = dirty_start + dirty_count)
     {
@@ -2136,93 +2416,267 @@ static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
     bdrv_dirty_bitmap_unlock(bitmap);
 }
 
-static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
-                              BdrvDirtyBitmap *bitmap, uint64_t offset,
-                              uint32_t length, bool dont_fragment, bool last,
-                              uint32_t context_id, Error **errp)
+static int coroutine_fn nbd_co_send_bitmap(NBDClient *client,
+                                           NBDRequest *request,
+                                           BdrvDirtyBitmap *bitmap,
+                                           uint64_t offset,
+                                           uint64_t length, bool dont_fragment,
+                                           bool last, uint32_t context_id,
+                                           Error **errp)
 {
     unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
-    g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
+    g_autoptr(NBDExtentArray) ea =
+        nbd_extent_array_new(nb_extents, client->mode);
 
     bitmap_to_extents(bitmap, offset, length, ea);
 
-    return nbd_co_send_extents(client, handle, ea, last, context_id, errp);
+    return nbd_co_send_extents(client, request, ea, last, context_id, errp);
+}
+
+/*
+ * nbd_co_block_status_payload_read
+ * Called when a client wants a subset of negotiated contexts via a
+ * BLOCK_STATUS payload.  Check the payload for valid length and
+ * contents.  On success, return 0 with request updated to effective
+ * length.  If request was invalid but all payload consumed, return 0
+ * with request->len and request->contexts->count set to 0 (which will
+ * trigger an appropriate NBD_EINVAL response later on).  Return
+ * negative errno if the payload was not fully consumed.
+ */
+static int
+nbd_co_block_status_payload_read(NBDClient *client, NBDRequest *request,
+                                 Error **errp)
+{
+    uint64_t payload_len = request->len;
+    g_autofree char *buf = NULL;
+    size_t count, i, nr_bitmaps;
+    uint32_t id;
+
+    if (payload_len > NBD_MAX_BUFFER_SIZE) {
+        error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
+                   request->len, NBD_MAX_BUFFER_SIZE);
+        return -EINVAL;
+    }
+
+    assert(client->contexts.exp == client->exp);
+    nr_bitmaps = client->exp->nr_export_bitmaps;
+    request->contexts = g_new0(NBDMetaContexts, 1);
+    request->contexts->exp = client->exp;
+
+    if (payload_len % sizeof(uint32_t) ||
+        payload_len < sizeof(NBDBlockStatusPayload) ||
+        payload_len > (sizeof(NBDBlockStatusPayload) +
+                       sizeof(id) * client->contexts.count)) {
+        goto skip;
+    }
+
+    buf = g_malloc(payload_len);
+    if (nbd_read(client->ioc, buf, payload_len,
+                 "CMD_BLOCK_STATUS data", errp) < 0) {
+        return -EIO;
+    }
+    trace_nbd_co_receive_request_payload_received(request->cookie,
+                                                  payload_len);
+    request->contexts->bitmaps = g_new0(bool, nr_bitmaps);
+    count = (payload_len - sizeof(NBDBlockStatusPayload)) / sizeof(id);
+    payload_len = 0;
+
+    for (i = 0; i < count; i++) {
+        id = ldl_be_p(buf + sizeof(NBDBlockStatusPayload) + sizeof(id) * i);
+        if (id == NBD_META_ID_BASE_ALLOCATION) {
+            if (!client->contexts.base_allocation ||
+                request->contexts->base_allocation) {
+                goto skip;
+            }
+            request->contexts->base_allocation = true;
+        } else if (id == NBD_META_ID_ALLOCATION_DEPTH) {
+            if (!client->contexts.allocation_depth ||
+                request->contexts->allocation_depth) {
+                goto skip;
+            }
+            request->contexts->allocation_depth = true;
+        } else {
+            unsigned idx = id - NBD_META_ID_DIRTY_BITMAP;
+
+            if (idx >= nr_bitmaps || !client->contexts.bitmaps[idx] ||
+                request->contexts->bitmaps[idx]) {
+                goto skip;
+            }
+            request->contexts->bitmaps[idx] = true;
+        }
+    }
+
+    request->len = ldq_be_p(buf);
+    request->contexts->count = count;
+    return 0;
+
+ skip:
+    trace_nbd_co_receive_block_status_payload_compliance(request->from,
+                                                         request->len);
+    request->len = request->contexts->count = 0;
+    return nbd_drop(client->ioc, payload_len, errp);
 }
 
 /* nbd_co_receive_request
  * Collect a client request. Return 0 if request looks valid, -EIO to drop
- * connection right away, and any other negative value to report an error to
- * the client (although the caller may still need to disconnect after reporting
- * the error).
+ * connection right away, -EAGAIN to indicate we were interrupted and the
+ * channel should be quiesced, and any other negative value to report an error
+ * to the client (although the caller may still need to disconnect after
+ * reporting the error).
  */
-static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
-                                  Error **errp)
+static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
+                                               NBDRequest *request,
+                                               Error **errp)
 {
     NBDClient *client = req->client;
-    int valid_flags;
+    bool extended_with_payload;
+    bool check_length = false;
+    bool check_rofs = false;
+    bool allocate_buffer = false;
+    bool payload_okay = false;
+    uint64_t payload_len = 0;
+    int valid_flags = NBD_CMD_FLAG_FUA;
+    int ret;
 
     g_assert(qemu_in_coroutine());
     assert(client->recv_coroutine == qemu_coroutine_self());
-    if (nbd_receive_request(client->ioc, request, errp) < 0) {
-        return -EIO;
+    ret = nbd_receive_request(client, request, errp);
+    if (ret < 0) {
+        return ret;
     }
 
-    trace_nbd_co_receive_request_decode_type(request->handle, request->type,
+    trace_nbd_co_receive_request_decode_type(request->cookie, request->type,
                                              nbd_cmd_lookup(request->type));
-
-    if (request->type != NBD_CMD_WRITE) {
-        /* No payload, we are ready to read the next request.  */
-        req->complete = true;
+    extended_with_payload = client->mode >= NBD_MODE_EXTENDED &&
+        request->flags & NBD_CMD_FLAG_PAYLOAD_LEN;
+    if (extended_with_payload) {
+        payload_len = request->len;
+        check_length = true;
     }
 
-    if (request->type == NBD_CMD_DISC) {
+    switch (request->type) {
+    case NBD_CMD_DISC:
         /* Special case: we're going to disconnect without a reply,
          * whether or not flags, from, or len are bogus */
+        req->complete = true;
         return -EIO;
-    }
 
-    if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE ||
-        request->type == NBD_CMD_CACHE)
-    {
-        if (request->len > NBD_MAX_BUFFER_SIZE) {
-            error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
-                       request->len, NBD_MAX_BUFFER_SIZE);
-            return -EINVAL;
+    case NBD_CMD_READ:
+        if (client->mode >= NBD_MODE_STRUCTURED) {
+            valid_flags |= NBD_CMD_FLAG_DF;
+        }
+        check_length = true;
+        allocate_buffer = true;
+        break;
+
+    case NBD_CMD_WRITE:
+        if (client->mode >= NBD_MODE_EXTENDED) {
+            if (!extended_with_payload) {
+                /* The client is noncompliant. Trace it, but proceed. */
+                trace_nbd_co_receive_ext_payload_compliance(request->from,
+                                                            request->len);
+            }
+            valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
         }
+        payload_okay = true;
+        payload_len = request->len;
+        check_length = true;
+        allocate_buffer = true;
+        check_rofs = true;
+        break;
+
+    case NBD_CMD_FLUSH:
+        break;
 
-        if (request->type != NBD_CMD_CACHE) {
-            req->data = blk_try_blockalign(client->exp->common.blk,
-                                           request->len);
-            if (req->data == NULL) {
-                error_setg(errp, "No memory");
-                return -ENOMEM;
+    case NBD_CMD_TRIM:
+        check_rofs = true;
+        break;
+
+    case NBD_CMD_CACHE:
+        check_length = true;
+        break;
+
+    case NBD_CMD_WRITE_ZEROES:
+        valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
+        check_rofs = true;
+        break;
+
+    case NBD_CMD_BLOCK_STATUS:
+        if (extended_with_payload) {
+            ret = nbd_co_block_status_payload_read(client, request, errp);
+            if (ret < 0) {
+                return ret;
             }
+            /* payload now consumed */
+            check_length = false;
+            payload_len = 0;
+            valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
+        } else {
+            request->contexts = &client->contexts;
         }
+        valid_flags |= NBD_CMD_FLAG_REQ_ONE;
+        break;
+
+    default:
+        /* Unrecognized, will fail later */
+        ;
     }
 
-    if (request->type == NBD_CMD_WRITE) {
-        if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
-                     errp) < 0)
-        {
+    /* Payload and buffer handling. */
+    if (!payload_len) {
+        req->complete = true;
+    }
+    if (check_length && request->len > NBD_MAX_BUFFER_SIZE) {
+        /* READ, WRITE, CACHE */
+        error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
+                   request->len, NBD_MAX_BUFFER_SIZE);
+        return -EINVAL;
+    }
+    if (payload_len && !payload_okay) {
+        /*
+         * For now, we don't support payloads on other commands; but
+         * we can keep the connection alive by ignoring the payload.
+         * We will fail the command later with NBD_EINVAL for the use
+         * of an unsupported flag (and not for access beyond bounds).
+         */
+        assert(request->type != NBD_CMD_WRITE);
+        request->len = 0;
+    }
+    if (allocate_buffer) {
+        /* READ, WRITE */
+        req->data = blk_try_blockalign(client->exp->common.blk,
+                                       request->len);
+        if (req->data == NULL) {
+            error_setg(errp, "No memory");
+            return -ENOMEM;
+        }
+    }
+    if (payload_len) {
+        if (payload_okay) {
+            /* WRITE */
+            assert(req->data);
+            ret = nbd_read(client->ioc, req->data, payload_len,
+                           "CMD_WRITE data", errp);
+        } else {
+            ret = nbd_drop(client->ioc, payload_len, errp);
+        }
+        if (ret < 0) {
             return -EIO;
         }
         req->complete = true;
-
-        trace_nbd_co_receive_request_payload_received(request->handle,
-                                                      request->len);
+        trace_nbd_co_receive_request_payload_received(request->cookie,
+                                                      payload_len);
     }
 
     /* Sanity checks. */
-    if (client->exp->nbdflags & NBD_FLAG_READ_ONLY &&
-        (request->type == NBD_CMD_WRITE ||
-         request->type == NBD_CMD_WRITE_ZEROES ||
-         request->type == NBD_CMD_TRIM)) {
+    if (client->exp->nbdflags & NBD_FLAG_READ_ONLY && check_rofs) {
+        /* WRITE, TRIM, WRITE_ZEROES */
         error_setg(errp, "Export is read-only");
         return -EROFS;
     }
     if (request->from > client->exp->size ||
         request->len > client->exp->size - request->from) {
-        error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
+        error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu64
                    ", Size: %" PRIu64, request->from, request->len,
                    client->exp->size);
         return (request->type == NBD_CMD_WRITE ||
@@ -2239,14 +2693,6 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
                                               request->len,
                                               client->check_align);
     }
-    valid_flags = NBD_CMD_FLAG_FUA;
-    if (request->type == NBD_CMD_READ && client->structured_reply) {
-        valid_flags |= NBD_CMD_FLAG_DF;
-    } else if (request->type == NBD_CMD_WRITE_ZEROES) {
-        valid_flags |= NBD_CMD_FLAG_NO_HOLE | NBD_CMD_FLAG_FAST_ZERO;
-    } else if (request->type == NBD_CMD_BLOCK_STATUS) {
-        valid_flags |= NBD_CMD_FLAG_REQ_ONE;
-    }
     if (request->flags & ~valid_flags) {
         error_setg(errp, "unsupported flags for command %s (got 0x%x)",
                    nbd_cmd_lookup(request->type), request->flags);
@@ -2261,16 +2707,17 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
  * Returns 0 if connection is still live, -errno on failure to talk to client
  */
 static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
-                                               uint64_t handle,
+                                               NBDRequest *request,
                                                int ret,
                                                const char *error_msg,
                                                Error **errp)
 {
-    if (client->structured_reply && ret < 0) {
-        return nbd_co_send_structured_error(client, handle, -ret, error_msg,
-                                            errp);
+    if (client->mode >= NBD_MODE_STRUCTURED && ret < 0) {
+        return nbd_co_send_chunk_error(client, request, -ret, error_msg, errp);
+    } else if (client->mode >= NBD_MODE_EXTENDED) {
+        return nbd_co_send_chunk_done(client, request, errp);
     } else {
-        return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0,
+        return nbd_co_send_simple_reply(client, request, ret < 0 ? -ret : 0,
                                         NULL, 0, errp);
     }
 }
@@ -2285,39 +2732,39 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
     NBDExport *exp = client->exp;
 
     assert(request->type == NBD_CMD_READ);
+    assert(request->len <= NBD_MAX_BUFFER_SIZE);
 
     /* XXX: NBD Protocol only documents use of FUA with WRITE */
     if (request->flags & NBD_CMD_FLAG_FUA) {
         ret = blk_co_flush(exp->common.blk);
         if (ret < 0) {
-            return nbd_send_generic_reply(client, request->handle, ret,
+            return nbd_send_generic_reply(client, request, ret,
                                           "flush failed", errp);
         }
     }
 
-    if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
-        request->len)
+    if (client->mode >= NBD_MODE_STRUCTURED &&
+        !(request->flags & NBD_CMD_FLAG_DF) && request->len)
     {
-        return nbd_co_send_sparse_read(client, request->handle, request->from,
+        return nbd_co_send_sparse_read(client, request, request->from,
                                        data, request->len, errp);
     }
 
-    ret = blk_pread(exp->common.blk, request->from, data, request->len);
+    ret = blk_co_pread(exp->common.blk, request->from, request->len, data, 0);
     if (ret < 0) {
-        return nbd_send_generic_reply(client, request->handle, ret,
+        return nbd_send_generic_reply(client, request, ret,
                                       "reading from file failed", errp);
     }
 
-    if (client->structured_reply) {
+    if (client->mode >= NBD_MODE_STRUCTURED) {
         if (request->len) {
-            return nbd_co_send_structured_read(client, request->handle,
-                                               request->from, data,
-                                               request->len, true, errp);
+            return nbd_co_send_chunk_read(client, request, request->from, data,
+                                          request->len, true, errp);
         } else {
-            return nbd_co_send_structured_done(client, request->handle, errp);
+            return nbd_co_send_chunk_done(client, request, errp);
         }
     } else {
-        return nbd_co_send_simple_reply(client, request->handle, 0,
+        return nbd_co_send_simple_reply(client, request, 0,
                                         data, request->len, errp);
     }
 }
@@ -2336,11 +2783,12 @@ static coroutine_fn int nbd_do_cmd_cache(NBDClient *client, NBDRequest *request,
     NBDExport *exp = client->exp;
 
     assert(request->type == NBD_CMD_CACHE);
+    assert(request->len <= NBD_MAX_BUFFER_SIZE);
 
     ret = blk_co_preadv(exp->common.blk, request->from, request->len,
                         NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
 
-    return nbd_send_generic_reply(client, request->handle, ret,
+    return nbd_send_generic_reply(client, request, ret,
                                   "caching data failed", errp);
 }
 
@@ -2369,9 +2817,10 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
         if (request->flags & NBD_CMD_FLAG_FUA) {
             flags |= BDRV_REQ_FUA;
         }
-        ret = blk_pwrite(exp->common.blk, request->from, data, request->len,
-                         flags);
-        return nbd_send_generic_reply(client, request->handle, ret,
+        assert(request->len <= NBD_MAX_BUFFER_SIZE);
+        ret = blk_co_pwrite(exp->common.blk, request->from, request->len, data,
+                            flags);
+        return nbd_send_generic_reply(client, request, ret,
                                       "writing to file failed", errp);
 
     case NBD_CMD_WRITE_ZEROES:
@@ -2385,17 +2834,9 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
         if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
             flags |= BDRV_REQ_NO_FALLBACK;
         }
-        ret = 0;
-        /* FIXME simplify this when blk_pwrite_zeroes switches to 64-bit */
-        while (ret >= 0 && request->len) {
-            int align = client->check_align ?: 1;
-            int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
-                                                        align));
-            ret = blk_pwrite_zeroes(exp->common.blk, request->from, len, flags);
-            request->len -= len;
-            request->from += len;
-        }
-        return nbd_send_generic_reply(client, request->handle, ret,
+        ret = blk_co_pwrite_zeroes(exp->common.blk, request->from, request->len,
+                                   flags);
+        return nbd_send_generic_reply(client, request, ret,
                                       "writing to file failed", errp);
 
     case NBD_CMD_DISC:
@@ -2404,38 +2845,32 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
 
     case NBD_CMD_FLUSH:
         ret = blk_co_flush(exp->common.blk);
-        return nbd_send_generic_reply(client, request->handle, ret,
+        return nbd_send_generic_reply(client, request, ret,
                                       "flush failed", errp);
 
     case NBD_CMD_TRIM:
-        ret = 0;
-        /* FIXME simplify this when blk_co_pdiscard switches to 64-bit */
-        while (ret >= 0 && request->len) {
-            int align = client->check_align ?: 1;
-            int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
-                                                        align));
-            ret = blk_co_pdiscard(exp->common.blk, request->from, len);
-            request->len -= len;
-            request->from += len;
-        }
+        ret = blk_co_pdiscard(exp->common.blk, request->from, request->len);
         if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) {
             ret = blk_co_flush(exp->common.blk);
         }
-        return nbd_send_generic_reply(client, request->handle, ret,
+        return nbd_send_generic_reply(client, request, ret,
                                       "discard failed", errp);
 
     case NBD_CMD_BLOCK_STATUS:
-        if (!request->len) {
-            return nbd_send_generic_reply(client, request->handle, -EINVAL,
-                                          "need non-zero length", errp);
-        }
-        if (client->export_meta.count) {
+        assert(request->contexts);
+        assert(client->mode >= NBD_MODE_EXTENDED ||
+               request->len <= UINT32_MAX);
+        if (request->contexts->count) {
             bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
-            int contexts_remaining = client->export_meta.count;
+            int contexts_remaining = request->contexts->count;
 
-            if (client->export_meta.base_allocation) {
-                ret = nbd_co_send_block_status(client, request->handle,
-                                               blk_bs(exp->common.blk),
+            if (!request->len) {
+                return nbd_send_generic_reply(client, request, -EINVAL,
+                                              "need non-zero length", errp);
+            }
+            if (request->contexts->base_allocation) {
+                ret = nbd_co_send_block_status(client, request,
+                                               exp->common.blk,
                                                request->from,
                                                request->len, dont_fragment,
                                                !--contexts_remaining,
@@ -2446,9 +2881,9 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
                 }
             }
 
-            if (client->export_meta.allocation_depth) {
-                ret = nbd_co_send_block_status(client, request->handle,
-                                               blk_bs(exp->common.blk),
+            if (request->contexts->allocation_depth) {
+                ret = nbd_co_send_block_status(client, request,
+                                               exp->common.blk,
                                                request->from, request->len,
                                                dont_fragment,
                                                !--contexts_remaining,
@@ -2459,11 +2894,12 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
                 }
             }
 
+            assert(request->contexts->exp == client->exp);
             for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
-                if (!client->export_meta.bitmaps[i]) {
+                if (!request->contexts->bitmaps[i]) {
                     continue;
                 }
-                ret = nbd_co_send_bitmap(client, request->handle,
+                ret = nbd_co_send_bitmap(client, request,
                                          client->exp->export_bitmaps[i],
                                          request->from, request->len,
                                          dont_fragment, !--contexts_remaining,
@@ -2476,8 +2912,12 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
             assert(!contexts_remaining);
 
             return 0;
+        } else if (client->contexts.count) {
+            return nbd_send_generic_reply(client, request, -EINVAL,
+                                          "CMD_BLOCK_STATUS payload not valid",
+                                          errp);
         } else {
-            return nbd_send_generic_reply(client, request->handle, -EINVAL,
+            return nbd_send_generic_reply(client, request, -EINVAL,
                                           "CMD_BLOCK_STATUS not negotiated",
                                           errp);
         }
@@ -2485,7 +2925,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
     default:
         msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
                               request->type);
-        ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg,
+        ret = nbd_send_generic_reply(client, request, -EINVAL, msg,
                                      errp);
         g_free(msg);
         return ret;
@@ -2507,6 +2947,17 @@ static coroutine_fn void nbd_trip(void *opaque)
         return;
     }
 
+    if (client->quiescing) {
+        /*
+         * We're switching between AIO contexts. Don't attempt to receive a new
+         * request and kick the main context which may be waiting for us.
+         */
+        nbd_client_put(client);
+        client->recv_coroutine = NULL;
+        aio_wait_kick();
+        return;
+    }
+
     req = nbd_request_get(client);
     ret = nbd_co_receive_request(req, &request, &local_err);
     client->recv_coroutine = NULL;
@@ -2519,36 +2970,50 @@ static coroutine_fn void nbd_trip(void *opaque)
         goto done;
     }
 
+    if (ret == -EAGAIN) {
+        assert(client->quiescing);
+        goto done;
+    }
+
     nbd_client_receive_next_request(client);
     if (ret == -EIO) {
         goto disconnect;
     }
 
+    qio_channel_set_cork(client->ioc, true);
+
     if (ret < 0) {
-        /* It wans't -EIO, so, according to nbd_co_receive_request()
+        /* It wasn't -EIO, so, according to nbd_co_receive_request()
          * semantics, we should return the error to the client. */
         Error *export_err = local_err;
 
         local_err = NULL;
-        ret = nbd_send_generic_reply(client, request.handle, -EINVAL,
+        ret = nbd_send_generic_reply(client, &request, -EINVAL,
                                      error_get_pretty(export_err), &local_err);
         error_free(export_err);
     } else {
         ret = nbd_handle_request(client, &request, req->data, &local_err);
     }
+    if (request.contexts && request.contexts != &client->contexts) {
+        assert(request.type == NBD_CMD_BLOCK_STATUS);
+        g_free(request.contexts->bitmaps);
+        g_free(request.contexts);
+    }
     if (ret < 0) {
         error_prepend(&local_err, "Failed to send reply: ");
         goto disconnect;
     }
 
-    /* We must disconnect after NBD_CMD_WRITE if we did not
-     * read the payload.
+    /*
+     * We must disconnect after NBD_CMD_WRITE or BLOCK_STATUS with
+     * payload if we did not read the payload.
      */
     if (!req->complete) {
         error_setg(&local_err, "Request handling failed in intermediate state");
         goto disconnect;
     }
 
+    qio_channel_set_cork(client->ioc, false);
 done:
     nbd_request_put(req);
     nbd_client_put(client);
@@ -2565,7 +3030,8 @@ disconnect:
 
 static void nbd_client_receive_next_request(NBDClient *client)
 {
-    if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) {
+    if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS &&
+        !client->quiescing) {
         nbd_client_get(client);
         client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
         aio_co_schedule(client->exp->common.ctx, client->recv_coroutine);
@@ -2611,6 +3077,7 @@ void nbd_client_new(QIOChannelSocket *sioc,
     }
     client->tlsauthz = g_strdup(tlsauthz);
     client->sioc = sioc;
+    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
     object_ref(OBJECT(client->sioc));
     client->ioc = QIO_CHANNEL(sioc);
     object_ref(OBJECT(client->ioc));
index a955918e970786d43d1d2fdd92bf721cf870fc77..00ae3216a11148a39cfd8b7074bc01e6d5212fc9 100644 (file)
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # client.c
 nbd_send_option_request(uint32_t opt, const char *name, uint32_t len) "Sending option request %" PRIu32" (%s), len %" PRIu32
@@ -31,9 +31,10 @@ nbd_client_loop(void) "Doing NBD loop"
 nbd_client_loop_ret(int ret, const char *error) "NBD loop returned %d: %s"
 nbd_client_clear_queue(void) "Clearing NBD queue"
 nbd_client_clear_socket(void) "Clearing NBD socket"
-nbd_send_request(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name) "Sending request to server: { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) }"
-nbd_receive_simple_reply(int32_t error, const char *errname, uint64_t handle) "Got simple reply: { .error = %" PRId32 " (%s), handle = %" PRIu64" }"
-nbd_receive_structured_reply_chunk(uint16_t flags, uint16_t type, const char *name, uint64_t handle, uint32_t length) "Got structured reply chunk: { flags = 0x%" PRIx16 ", type = %d (%s), handle = %" PRIu64 ", length = %" PRIu32 " }"
+nbd_send_request(uint64_t from, uint64_t len, uint64_t cookie, uint16_t flags, uint16_t type, const char *name) "Sending request to server: { .from = %" PRIu64", .len = %" PRIu64 ", .cookie = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) }"
+nbd_receive_simple_reply(int32_t error, const char *errname, uint64_t cookie) "Got simple reply: { .error = %" PRId32 " (%s), cookie = %" PRIu64" }"
+nbd_receive_reply_chunk_header(uint16_t flags, uint16_t type, const char *name, uint64_t cookie, uint32_t length) "Got reply chunk header: { flags = 0x%" PRIx16 ", type = %" PRIu16 " (%s), cookie = %" PRIu64 ", length = %" PRIu32 " }"
+nbd_receive_wrong_header(uint32_t magic, const char *mode) "Server sent unexpected magic 0x%" PRIx32 " for negotiated mode %s"
 
 # common.c
 nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL"
@@ -60,16 +61,21 @@ nbd_negotiate_options_check_option(uint32_t option, const char *name) "Checking
 nbd_negotiate_begin(void) "Beginning negotiation"
 nbd_negotiate_new_style_size_flags(uint64_t size, unsigned flags) "advertising size %" PRIu64 " and flags 0x%x"
 nbd_negotiate_success(void) "Negotiation succeeded"
-nbd_receive_request(uint32_t magic, uint16_t flags, uint16_t type, uint64_t from, uint32_t len) "Got request: { magic = 0x%" PRIx32 ", .flags = 0x%" PRIx16 ", .type = 0x%" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }"
+nbd_receive_request(uint32_t magic, uint16_t flags, uint16_t type, uint64_t from, uint64_t len) "Got request: { magic = 0x%" PRIx32 ", .flags = 0x%" PRIx16 ", .type = 0x%" PRIx16 ", from = %" PRIu64 ", len = %" PRIu64 " }"
 nbd_blk_aio_attached(const char *name, void *ctx) "Export %s: Attaching clients to AIO context %p"
 nbd_blk_aio_detach(const char *name, void *ctx) "Export %s: Detaching clients from AIO context %p"
-nbd_co_send_simple_reply(uint64_t handle, uint32_t error, const char *errname, int len) "Send simple reply: handle = %" PRIu64 ", error = %" PRIu32 " (%s), len = %d"
-nbd_co_send_structured_done(uint64_t handle) "Send structured reply done: handle = %" PRIu64
-nbd_co_send_structured_read(uint64_t handle, uint64_t offset, void *data, size_t size) "Send structured read data reply: handle = %" PRIu64 ", offset = %" PRIu64 ", data = %p, len = %zu"
-nbd_co_send_structured_read_hole(uint64_t handle, uint64_t offset, size_t size) "Send structured read hole reply: handle = %" PRIu64 ", offset = %" PRIu64 ", len = %zu"
-nbd_co_send_extents(uint64_t handle, unsigned int extents, uint32_t id, uint64_t length, int last) "Send block status reply: handle = %" PRIu64 ", extents = %u, context = %d (extents cover %" PRIu64 " bytes, last chunk = %d)"
-nbd_co_send_structured_error(uint64_t handle, int err, const char *errname, const char *msg) "Send structured error reply: handle = %" PRIu64 ", error = %d (%s), msg = '%s'"
-nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char *name) "Decoding type: handle = %" PRIu64 ", type = %" PRIu16 " (%s)"
-nbd_co_receive_request_payload_received(uint64_t handle, uint32_t len) "Payload received: handle = %" PRIu64 ", len = %" PRIu32
-nbd_co_receive_align_compliance(const char *op, uint64_t from, uint32_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx32 ", align=0x%" PRIx32
+nbd_co_send_simple_reply(uint64_t cookie, uint32_t error, const char *errname, uint64_t len) "Send simple reply: cookie = %" PRIu64 ", error = %" PRIu32 " (%s), len = %" PRIu64
+nbd_co_send_chunk_done(uint64_t cookie) "Send structured reply done: cookie = %" PRIu64
+nbd_co_send_chunk_read(uint64_t cookie, uint64_t offset, void *data, uint64_t size) "Send structured read data reply: cookie = %" PRIu64 ", offset = %" PRIu64 ", data = %p, len = %" PRIu64
+nbd_co_send_chunk_read_hole(uint64_t cookie, uint64_t offset, uint64_t size) "Send structured read hole reply: cookie = %" PRIu64 ", offset = %" PRIu64 ", len = %" PRIu64
+nbd_co_send_extents(uint64_t cookie, unsigned int extents, uint32_t id, uint64_t length, int last) "Send block status reply: cookie = %" PRIu64 ", extents = %u, context = %d (extents cover %" PRIu64 " bytes, last chunk = %d)"
+nbd_co_send_chunk_error(uint64_t cookie, int err, const char *errname, const char *msg) "Send structured error reply: cookie = %" PRIu64 ", error = %d (%s), msg = '%s'"
+nbd_co_receive_block_status_payload_compliance(uint64_t from, uint64_t len) "client sent unusable block status payload: from=0x%" PRIx64 ", len=0x%" PRIx64
+nbd_co_receive_request_decode_type(uint64_t cookie, uint16_t type, const char *name) "Decoding type: cookie = %" PRIu64 ", type = %" PRIu16 " (%s)"
+nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload received: cookie = %" PRIu64 ", len = %" PRIu64
+nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64
+nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32
 nbd_trip(void) "Reading request"
+
+# client-connection.c
+nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64
index 1de2839554ccefc124767dd894f5a0671c563062..52ef6990ff9d4c4e2cd52b6f6a820cdbb8efb9fb 100644 (file)
 #include <grp.h>
 #include <libgen.h>
 
-#include "qemu-common.h"
-/* Needed early for CONFIG_BSD etc. */
-#include "net/slirp.h"
-#include "qemu-options.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
 #include "sysemu/runstate.h"
 #include <sys/prctl.h>
 #endif
 
-/*
- * Must set all three of these at once.
- * Legal combinations are              unset   by name   by uid
- */
-static struct passwd *user_pwd;    /*   NULL   non-NULL   NULL   */
-static uid_t user_uid = (uid_t)-1; /*   -1      -1        >=0    */
-static gid_t user_gid = (gid_t)-1; /*   -1      -1        >=0    */
-
-static const char *chroot_dir;
-static int daemonize;
-static int daemon_pipe;
 
 void os_setup_early_signal_handling(void)
 {
@@ -100,7 +85,22 @@ void os_set_proc_name(const char *s)
 }
 
 
-static bool os_parse_runas_uid_gid(const char *optarg)
+/*
+ * Must set all three of these at once.
+ * Legal combinations are              unset   by name   by uid
+ */
+static struct passwd *user_pwd;    /*   NULL   non-NULL   NULL   */
+static uid_t user_uid = (uid_t)-1; /*   -1      -1        >=0    */
+static gid_t user_gid = (gid_t)-1; /*   -1      -1        >=0    */
+
+/*
+ * Prepare to change user ID. user_id can be one of 3 forms:
+ *   - a username, in which case user ID will be changed to its uid,
+ *     with primary and supplementary groups set up too;
+ *   - a numeric uid, in which case only the uid will be set;
+ *   - a pair of numeric uid:gid.
+ */
+bool os_set_runas(const char *user_id)
 {
     unsigned long lv;
     const char *ep;
@@ -108,7 +108,14 @@ static bool os_parse_runas_uid_gid(const char *optarg)
     gid_t got_gid;
     int rc;
 
-    rc = qemu_strtoul(optarg, &ep, 0, &lv);
+    user_pwd = getpwnam(user_id);
+    if (user_pwd) {
+        user_uid = -1;
+        user_gid = -1;
+        return true;
+    }
+
+    rc = qemu_strtoul(user_id, &ep, 0, &lv);
     got_uid = lv; /* overflow here is ID in C99 */
     if (rc || *ep != ':' || got_uid != lv || got_uid == (uid_t)-1) {
         return false;
@@ -126,43 +133,6 @@ static bool os_parse_runas_uid_gid(const char *optarg)
     return true;
 }
 
-/*
- * Parse OS specific command line options.
- * return 0 if option handled, -1 otherwise
- */
-int os_parse_cmd_args(int index, const char *optarg)
-{
-    switch (index) {
-    case QEMU_OPTION_runas:
-        user_pwd = getpwnam(optarg);
-        if (user_pwd) {
-            user_uid = -1;
-            user_gid = -1;
-        } else if (!os_parse_runas_uid_gid(optarg)) {
-            error_report("User \"%s\" doesn't exist"
-                         " (and is not <uid>:<gid>)",
-                         optarg);
-            exit(1);
-        }
-        break;
-    case QEMU_OPTION_chroot:
-        chroot_dir = optarg;
-        break;
-    case QEMU_OPTION_daemonize:
-        daemonize = 1;
-        break;
-#if defined(CONFIG_LINUX)
-    case QEMU_OPTION_enablefips:
-        fips_set_state(true);
-        break;
-#endif
-    default:
-        return -1;
-    }
-
-    return 0;
-}
-
 static void change_process_uid(void)
 {
     assert((user_uid == (uid_t)-1) || user_pwd == NULL);
@@ -200,6 +170,14 @@ static void change_process_uid(void)
     }
 }
 
+
+static const char *chroot_dir;
+
+void os_set_chroot(const char *path)
+{
+    chroot_dir = path;
+}
+
 static void change_root(void)
 {
     if (chroot_dir) {
@@ -215,13 +193,28 @@ static void change_root(void)
 
 }
 
+
+static int daemonize;
+static int daemon_pipe;
+
+bool is_daemonized(void)
+{
+    return daemonize;
+}
+
+int os_set_daemonize(bool d)
+{
+    daemonize = d;
+    return 0;
+}
+
 void os_daemonize(void)
 {
     if (daemonize) {
         pid_t pid;
         int fds[2];
 
-        if (pipe(fds) == -1) {
+        if (!g_unix_open_pipe(fds, FD_CLOEXEC, NULL)) {
             exit(1);
         }
 
@@ -246,7 +239,6 @@ void os_daemonize(void)
 
         close(fds[0]);
         daemon_pipe = fds[1];
-        qemu_set_cloexec(daemon_pipe);
 
         setsid();
 
@@ -273,7 +265,7 @@ void os_setup_post(void)
             error_report("not able to chdir to /: %s", strerror(errno));
             exit(1);
         }
-        TFR(fd = qemu_open_old("/dev/null", O_RDWR));
+        fd = RETRY_ON_EINTR(qemu_open_old("/dev/null", O_RDWR));
         if (fd == -1) {
             exit(1);
         }
@@ -289,7 +281,7 @@ void os_setup_post(void)
         dup2(fd, 0);
         dup2(fd, 1);
         /* In case -D is given do not redirect stderr to /dev/null */
-        if (!qemu_logfile) {
+        if (!qemu_log_enabled()) {
             dup2(fd, 2);
         }
 
@@ -309,11 +301,6 @@ void os_set_line_buffering(void)
     setvbuf(stdout, NULL, _IOLBF, 0);
 }
 
-bool is_daemonized(void)
-{
-    return daemonize;
-}
-
 int os_mlock(void)
 {
 #ifdef HAVE_MLOCKALL
old mode 100755 (executable)
new mode 100644 (file)
old mode 100755 (executable)
new mode 100644 (file)
index 8592d99..2fed376
 
 %define _buildshell /bin/bash
 
-%define qemuver 5.2.0
+%define srcdir %{_builddir}/%buildsubdir
+%define blddir %srcdir/build
 
 Name:           qemu
 URL:            https://www.qemu.org/
 Summary:        Machine emulator and virtualizer
 License:        BSD-2-Clause AND BSD-3-Clause AND GPL-2.0-only AND GPL-2.0-or-later AND LGPL-2.1-or-later AND MIT
 Group:          System/Emulators/PC
-Version:        %qemuver
-Release:        20.1
-Source        qemu-%{version}.tar.xz
+Version:        8.2.2
+Release:        0
+Source0:        qemu-%{version}.tar.xz
 Source1:        bridge.conf
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
-
 BuildRequires:  gcc-c++
 BuildRequires:  meson
 BuildRequires:  ninja >= 1.7
-
 BuildRequires:  pkgconfig
-BuildRequires:  pkgconfig(glib-2.0) >= 2.48
+BuildRequires:  pkgconfig(glib-2.0) >= 2.56
 
 %description
-QEMU provides CPU emulation along with other related capabilities. This package
-provides programs to run user space binaries and libraries meant for another
-architecture. The syscall interface is intercepted and execution below the
-syscall layer occurs on the native hardware and operating system.
-
-%package tools
-Summary:        Tools for QEMU
-Group:          System/Emulators/PC
-Version:        %{qemuver}
-Release:        20.1
-
-%description tools
-This package contains various QEMU related tools, including a bridge helper,
-a virtfs helper, ivshmem, disk utilities and scripts for various purposes.
+QEMU provides full machine emulation and cross architecture usage. It closely\
+integrates with KVM and Xen virtualization, allowing for excellent performance.\
+Many options are available for defining the emulated environment, including\
+traditional devices, direct host device access, and interfaces specific to\
+virtualization.
 
 %prep
 %setup -q -n qemu-%{version}
 
 %build
 
-%define srcdir %{_builddir}/%buildsubdir
-%define blddir %srcdir/build
 mkdir -p %blddir
 cd %blddir
 
+EXTRA_CFLAGS="$(echo %{optflags}) -Wno-error"
+
 %srcdir/configure \
-       --prefix=%_prefix \
-       --sysconfdir=%_sysconfdir \
+       --python=%_bindir/python3 \
+       --docdir=%_docdir \
+       --datadir=%_datadir \
+       --extra-cflags="${EXTRA_CFLAGS}" \
+       --firmwarepath=%_datadir/%name \
        --libdir=%_libdir \
        --libexecdir=%_libexecdir \
        --localstatedir=%_localstatedir \
-       --docdir=%_docdir \
-       --firmwarepath=%_datadir/%name \
-       --extra-cflags="%{optflags}" \
-       --without-default-devices \
+       --prefix=%_prefix \
+       --sysconfdir=%_sysconfdir \
+       --with-pkgversion="%(echo '%{distro}' | sed 's/ (.*)//')" \
+       --disable-af-xdp \
+       --disable-alsa \
+       --disable-attr \
        --disable-auth-pam \
+       --disable-avx2 \
+       --disable-avx512f \
+       --disable-block-drv-whitelist-in-tools \
        --disable-bochs \
-       --disable-blobs \
+       --disable-bpf \
+       --disable-brlapi \
+       --disable-bsd-user \
+       --disable-bzip2 \
+       --disable-cap-ng \
        --disable-capstone \
+       --disable-cfi \
+       --disable-cfi-debug \
        --disable-cloop \
+       --disable-cocoa \
+       --disable-coreaudio \
        --disable-coroutine-pool \
+       --disable-crypto-afalg \
+       --disable-curl \
+       --disable-curses \
+       --disable-dbus-display \
+       --disable-debug-info \
+       --disable-debug-mutex \
+       --disable-debug-tcg \
        --disable-dmg \
        --disable-docs \
+       --disable-download \
+       --disable-dsound \
+       --disable-fdt \
+       --disable-fuse \
+       --disable-fuse-lseek \
+       --disable-gcrypt \
+       --disable-gettext \
+       --disable-gio \
+       --disable-glusterfs \
+       --disable-gnutls \
+       --disable-gtk \
        --disable-guest-agent \
-       --disable-hax \
+       --disable-guest-agent-msi \
+       --disable-hv-balloon \
        --disable-hvf \
-       --disable-keyring \
-       --disable-libxml2 \
+       --disable-iconv \
+       --disable-jack \
+       --disable-kvm \
+       --disable-l2tpv3 \
+       --disable-libdaxctl \
+       --disable-libiscsi \
+       --disable-libkeyutils \
+       --disable-libnfs \
+       --disable-libpmem \
+       --disable-libssh \
+       --disable-libudev \
+       --disable-libusb \
+       --disable-libvduse \
        --disable-linux-aio \
        --disable-linux-io-uring \
        --disable-linux-user \
        --disable-live-block-migration \
+       --disable-lto \
+       --disable-lzfse \
+       --disable-lzo \
        --disable-malloc-trim \
        --disable-membarrier \
+       --disable-module-upgrades \
+       --disable-modules \
+       --disable-mpath \
+       --disable-multiprocess \
+       --disable-netmap \
        --disable-nettle \
+       --disable-numa \
+       --disable-nvmm \
+       --disable-opengl \
+       --disable-oss \
+       --disable-pa \
        --disable-parallels \
        --disable-pie \
+       --disable-pipewire \
+       --disable-pixman \
+       --disable-plugins \
+       --disable-png \
+       --disable-pvrdma \
        --disable-qcow1 \
        --disable-qed \
        --disable-qom-cast-debug \
        --disable-rbd \
+       --disable-rdma \
+       --disable-relocatable \
        --disable-replication \
+       --disable-rng-none \
+       --disable-rutabaga-gfx \
        --disable-safe-stack \
+       --disable-sanitizers \
        --disable-sdl \
        --disable-sdl-image \
-       --disable-stack-protector \
+       --disable-seccomp \
+       --disable-selinux \
+       --disable-slirp \
+       --disable-slirp-smbd \
+       --disable-smartcard \
+       --disable-snappy \
+       --disable-sparse \
+       --disable-spice \
+       --disable-spice-protocol \
        --disable-strip \
        --disable-system \
        --disable-tcg \
+       --disable-tcg-interpreter \
+       --disable-tools \
        --disable-tpm \
+       --disable-u2f \
+       --disable-usb-redir \
+       --disable-user \
+       --disable-vde \
        --disable-vdi \
-       --disable-vnc \
        --disable-vhost-crypto \
        --disable-vhost-kernel \
        --disable-vhost-net \
-       --disable-vhost-scsi \
        --disable-vhost-user \
        --disable-vhost-user-blk-server \
-       --disable-vhost-user-fs \
        --disable-vhost-vdpa \
-       --disable-vhost-vsock \
+       --disable-virglrenderer \
+       --disable-virtfs \
+       --disable-vnc \
+       --disable-vnc-jpeg \
+       --disable-vnc-sasl \
+       --disable-vte \
        --disable-vvfat \
+       --disable-werror \
        --disable-whpx \
        --disable-xen \
+       --disable-xen-pci-passthrough \
        --disable-xkbcommon \
+       --disable-zstd \
+       --without-default-devices \
+       --disable-install-blobs \
+       --enable-attr \
        --enable-tools
 
 make %{?_smp_mflags} V=1
 
-%check
-
 %install
 cd %blddir
 
@@ -122,19 +201,35 @@ make %{?_smp_mflags} install DESTDIR=%{buildroot}
 install -D -m 0644 %{SOURCE1} %{buildroot}%_sysconfdir/%name/bridge.conf
 install -D -m 0755 scripts/analyze-migration.py  %{buildroot}%_bindir/analyze-migration.py
 install -D -m 0755 scripts/vmstate-static-checker.py  %{buildroot}%_bindir/vmstate-static-checker.py
+install -D -m 0755 scripts/kvm/vmxcap  %{buildroot}%_bindir/vmxcap
 
 rm -rf %{buildroot}%_datadir/qemu/keymaps
 unlink %{buildroot}%_datadir/qemu/trace-events-all
 
-%clean
-rm -rf %{buildroot}
+%package img
+Summary:        QEMU disk image utility
+Group:          System/Emulators/PC
 
-%files tools
-%defattr(-, root, root)
-%_bindir/analyze-migration.py
+%description img
+This package provides command line tools for manipulating disk images.
+
+%files img
 %_bindir/qemu-img
 %_bindir/qemu-io
 %_bindir/qemu-nbd
+
+%package tools
+Summary:        Tools for QEMU
+Group:          System/Emulators/PC
+Requires:       qemu-img
+
+%description tools
+This package contains various QEMU related tools, including a bridge helper,
+a virtfs helper, ivshmem, disk utilities and scripts for various purposes.
+
+%files tools
+%_bindir/analyze-migration.py
 %_bindir/vmstate-static-checker.py
+%_bindir/vmxcap
 %dir %_sysconfdir/%name
-%config %_sysconfdir/%name/bridge.conf
+%config(noreplace) %_sysconfdir/%name/bridge.conf
diff --git a/python/.gitignore b/python/.gitignore
new file mode 100644 (file)
index 0000000..c3ceb1c
--- /dev/null
@@ -0,0 +1,22 @@
+# linter/tooling cache
+.mypy_cache/
+.cache/
+
+# python packaging
+build/
+dist/
+qemu.egg-info/
+
+# editor config
+.idea/
+.vscode/
+
+# virtual environments
+.min-venv/
+.tox/
+.dev-venv/
+
+# Coverage.py reports
+.coverage
+.coverage.*
+htmlcov/
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
new file mode 100644 (file)
index 0000000..7059ad2
--- /dev/null
@@ -0,0 +1,3 @@
+include VERSION
+include PACKAGE.rst
+exclude README.rst
diff --git a/python/Makefile b/python/Makefile
new file mode 100644 (file)
index 0000000..1fa4ba2
--- /dev/null
@@ -0,0 +1,133 @@
+QEMU_VENV_DIR=.dev-venv
+QEMU_MINVENV_DIR=.min-venv
+QEMU_TOX_EXTRA_ARGS ?=
+
+.PHONY: help
+help:
+       @echo "python packaging help:"
+       @echo ""
+       @echo "make check-minreqs:"
+       @echo "    Run tests in the minreqs virtual environment."
+       @echo "    These tests use the oldest dependencies."
+       @echo "    Requires: Python 3.8"
+       @echo "    Hint (Fedora): 'sudo dnf install python3.8'"
+       @echo ""
+       @echo "make check-tox:"
+       @echo "    Run tests against multiple python versions."
+       @echo "    These tests use the newest dependencies."
+       @echo "    Requires: Python 3.8 - 3.11, and tox."
+       @echo "    Hint (Fedora): 'sudo dnf install python3-tox python3.11'"
+       @echo "    The variable QEMU_TOX_EXTRA_ARGS can be use to pass extra"
+       @echo "    arguments to tox".
+       @echo ""
+       @echo "make check-dev:"
+       @echo "    Run tests in a venv against your default python3 version."
+       @echo "    These tests use the newest dependencies."
+       @echo "    Requires: Python 3.x"
+       @echo ""
+       @echo "make check:"
+       @echo "    Run tests in your *current environment*."
+       @echo "    Performs no environment setup of any kind."
+       @echo ""
+       @echo "make develop:"
+       @echo "    Install deps needed for 'make check',"
+       @echo "    and install the qemu package in editable mode."
+       @echo "    (Can be used in or outside of a venv.)"
+       @echo ""
+       @echo "make min-venv"
+       @echo "    Creates the minreqs virtual environment ($(QEMU_MINVENV_DIR))"
+       @echo ""
+       @echo "make dev-venv"
+       @echo "    Creates a simple venv for check-dev. ($(QEMU_VENV_DIR))"
+       @echo ""
+       @echo "make clean:"
+       @echo "    Remove package build output."
+       @echo ""
+       @echo "make distclean:"
+       @echo "    remove venv files, qemu package forwarder,"
+       @echo "    built distribution files, and everything from 'make clean'."
+       @echo ""
+       @echo -e "Have a nice day ^_^\n"
+
+.PHONY: pipenv check-pipenv
+pipenv check-pipenv:
+       @echo "pipenv was dropped; try 'make check-minreqs' or 'make min-venv'"
+       @exit 1
+
+PIP_INSTALL = pip install --disable-pip-version-check
+.PHONY: min-venv
+min-venv: $(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate
+$(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate: setup.cfg tests/minreqs.txt
+       @echo "VENV $(QEMU_MINVENV_DIR)"
+       @python3.8 -m venv $(QEMU_MINVENV_DIR)
+       @(                                                              \
+               echo "ACTIVATE $(QEMU_MINVENV_DIR)";                    \
+               . $(QEMU_MINVENV_DIR)/bin/activate;                     \
+               echo "INSTALL wheel $(QEMU_MINVENV_DIR)";               \
+               $(PIP_INSTALL) wheel 1>/dev/null;                              \
+               echo "INSTALL -r tests/minreqs.txt $(QEMU_MINVENV_DIR)";\
+               $(PIP_INSTALL) -r tests/minreqs.txt 1>/dev/null;        \
+               echo "INSTALL -e qemu $(QEMU_MINVENV_DIR)";             \
+               $(PIP_INSTALL) -e . 1>/dev/null;                        \
+       )
+       @touch $(QEMU_MINVENV_DIR)
+
+.PHONY: check-minreqs
+check-minreqs: min-venv
+       @(                                                      \
+               echo "ACTIVATE $(QEMU_MINVENV_DIR)";            \
+               . $(QEMU_MINVENV_DIR)/bin/activate;             \
+               make check;                                     \
+       )
+
+.PHONY: dev-venv
+dev-venv: $(QEMU_VENV_DIR) $(QEMU_VENV_DIR)/bin/activate
+$(QEMU_VENV_DIR) $(QEMU_VENV_DIR)/bin/activate: setup.cfg
+       @echo "VENV $(QEMU_VENV_DIR)"
+       @python3 -m venv $(QEMU_VENV_DIR)
+       @(                                                      \
+               echo "ACTIVATE $(QEMU_VENV_DIR)";               \
+               . $(QEMU_VENV_DIR)/bin/activate;                \
+               echo "INSTALL qemu[devel] $(QEMU_VENV_DIR)";    \
+               make develop 1>/dev/null;                       \
+       )
+       @touch $(QEMU_VENV_DIR)
+
+.PHONY: check-dev
+check-dev: dev-venv
+       @(                                                      \
+               echo "ACTIVATE $(QEMU_VENV_DIR)";               \
+               . $(QEMU_VENV_DIR)/bin/activate;                \
+               make check;                                     \
+       )
+
+.PHONY: develop
+develop:
+       $(PIP_INSTALL) -e .[devel]
+
+.PHONY: check
+check:
+       @avocado --config avocado.cfg run tests/
+
+.PHONY: check-tox
+check-tox:
+       @tox $(QEMU_TOX_EXTRA_ARGS)
+
+.PHONY: check-coverage
+check-coverage:
+       @coverage run -m avocado --config avocado.cfg run tests/*.py
+       @coverage combine
+       @coverage html
+       @coverage report
+
+.PHONY: clean
+clean:
+       python3 setup.py clean --all
+       rm -f pyproject.toml
+
+.PHONY: distclean
+distclean: clean
+       rm -rf qemu.egg-info/ .eggs/ dist/
+       rm -rf $(QEMU_VENV_DIR) $(QEMU_MINVENV_DIR) .tox/
+       rm -f .coverage .coverage.*
+       rm -rf htmlcov/
diff --git a/python/PACKAGE.rst b/python/PACKAGE.rst
new file mode 100644 (file)
index 0000000..b0b86cc
--- /dev/null
@@ -0,0 +1,43 @@
+QEMU Python Tooling
+===================
+
+This package provides QEMU tooling used by the QEMU project to build,
+configure, and test QEMU. It is not a fully-fledged SDK and it is subject
+to change at any time.
+
+Usage
+-----
+
+The ``qemu.qmp`` subpackage provides a library for communicating with
+QMP servers. The ``qemu.machine`` subpackage offers rudimentary
+facilities for launching and managing QEMU processes. Refer to each
+package's documentation
+(``>>> help(qemu.qmp)``, ``>>> help(qemu.machine)``)
+for more information.
+
+Contributing
+------------
+
+This package is maintained by John Snow <jsnow@redhat.com> as part of
+the QEMU source tree. Contributions are welcome and follow the `QEMU
+patch submission process
+<https://wiki.qemu.org/Contribute/SubmitAPatch>`_, which involves
+sending patches to the QEMU development mailing list.
+
+John maintains a `GitLab staging branch
+<https://gitlab.com/jsnow/qemu/-/tree/python>`_, and there is an
+official `GitLab mirror <https://gitlab.com/qemu-project/qemu>`_.
+
+Please report bugs on the `QEMU issue tracker
+<https://gitlab.com/qemu-project/qemu/-/issues>`_ and tag ``@jsnow`` in
+the report.
+
+Optional packages necessary for running code quality analysis for this
+package can be installed with the optional dependency group "devel":
+``pip install qemu[devel]``.
+
+``make develop`` can be used to install this package in editable mode
+(to the current environment) *and* bring in testing dependencies in one
+command.
+
+``make check`` can be used to run the available tests.
diff --git a/python/README.rst b/python/README.rst
new file mode 100644 (file)
index 0000000..d62e715
--- /dev/null
@@ -0,0 +1,84 @@
+QEMU Python Tooling
+===================
+
+This directory houses Python tooling used by the QEMU project to build,
+configure, and test QEMU. It is organized by namespace (``qemu``), and
+then by package (e.g. ``qemu/machine``, ``qemu/qmp``, etc).
+
+``setup.py`` is used by ``pip`` to install this tooling to the current
+environment. ``setup.cfg`` provides the packaging configuration used by
+``setup.py``. You will generally invoke it by doing one of the following:
+
+1. ``pip3 install .`` will install these packages to your current
+   environment. If you are inside a virtual environment, they will
+   install there. If you are not, it will attempt to install to the
+   global environment, which is **not recommended**.
+
+2. ``pip3 install --user .`` will install these packages to your user's
+   local python packages. If you are inside of a virtual environment,
+   this will fail; you want the first invocation above.
+
+If you append the ``--editable`` or ``-e`` argument to either invocation
+above, pip will install in "editable" mode. This installs the package as
+a forwarder ("qemu.egg-link") that points to the source tree. In so
+doing, the installed package always reflects the latest version in your
+source tree.
+
+Installing ".[devel]" instead of "." will additionally pull in required
+packages for testing this package. They are not runtime requirements,
+and are not needed to simply use these libraries.
+
+Running ``make develop`` will pull in all testing dependencies and
+install QEMU in editable mode to the current environment.
+(It is a shortcut for ``pip3 install -e .[devel]``.)
+
+See `Installing packages using pip and virtual environments
+<https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/>`_
+for more information.
+
+
+Using these packages without installing them
+--------------------------------------------
+
+These packages may be used without installing them first, by using one
+of two tricks:
+
+1. Set your PYTHONPATH environment variable to include this source
+   directory, e.g. ``~/src/qemu/python``. See
+   https://docs.python.org/3/using/cmdline.html#envvar-PYTHONPATH
+
+2. Inside a Python script, use ``sys.path`` to forcibly include a search
+   path prior to importing the ``qemu`` namespace. See
+   https://docs.python.org/3/library/sys.html#sys.path
+
+A strong downside to both approaches is that they generally interfere
+with static analysis tools being able to locate and analyze the code
+being imported.
+
+Package installation also normally provides executable console scripts,
+so that tools like ``qmp-shell`` are always available via $PATH. To
+invoke them without installation, you can invoke e.g.:
+
+``> PYTHONPATH=~/src/qemu/python python3 -m qemu.qmp.qmp_shell``
+
+The mappings between console script name and python module path can be
+found in ``setup.cfg``.
+
+
+Files in this directory
+-----------------------
+
+- ``qemu/`` Python 'qemu' namespace package source directory.
+- ``tests/`` Python package tests directory.
+- ``avocado.cfg`` Configuration for the Avocado test-runner.
+  Used by ``make check`` et al.
+- ``Makefile`` provides some common testing/installation invocations.
+  Try ``make help`` to see available targets.
+- ``MANIFEST.in`` is read by python setuptools, it specifies additional files
+  that should be included by a source distribution.
+- ``PACKAGE.rst`` is used as the README file that is visible on PyPI.org.
+- ``README.rst`` you are here!
+- ``VERSION`` contains the PEP-440 compliant version used to describe
+  this package; it is referenced by ``setup.cfg``.
+- ``setup.cfg`` houses setuptools package configuration.
+- ``setup.py`` is the setuptools installer used by pip; See above.
diff --git a/python/VERSION b/python/VERSION
new file mode 100644 (file)
index 0000000..c19f3b8
--- /dev/null
@@ -0,0 +1 @@
+0.6.1.0a1
diff --git a/python/avocado.cfg b/python/avocado.cfg
new file mode 100644 (file)
index 0000000..a460420
--- /dev/null
@@ -0,0 +1,13 @@
+[run]
+test_runner = nrunner
+
+[simpletests]
+# Don't show stdout/stderr in the test *summary*
+status.failure_fields = ['status']
+
+[job]
+# Don't show the full debug.log output; only select stdout/stderr.
+output.testlogs.logfiles = ['stdout', 'stderr']
+
+# Show full stdout/stderr only on tests that FAIL
+output.testlogs.statuses = ['FAIL']
diff --git a/python/qemu/README.rst b/python/qemu/README.rst
new file mode 100644 (file)
index 0000000..d04943f
--- /dev/null
@@ -0,0 +1,8 @@
+QEMU Python Namespace
+=====================
+
+This directory serves as the root of a `Python PEP 420 implicit
+namespace package <https://www.python.org/dev/peps/pep-0420/>`_.
+
+Each directory below is assumed to be an installable Python package that
+is available under the ``qemu.<package>`` namespace.
diff --git a/python/qemu/machine/README.rst b/python/qemu/machine/README.rst
new file mode 100644 (file)
index 0000000..8de2c3d
--- /dev/null
@@ -0,0 +1,9 @@
+qemu.machine package
+====================
+
+This package provides core utilities used for testing and debugging
+QEMU. It is used by the iotests, vm tests, avocado tests, and several
+other utilities in the ./scripts directory. It is not a fully-fledged
+SDK and it is subject to change at any time.
+
+See the documentation in ``__init__.py`` for more information.
diff --git a/python/qemu/machine/__init__.py b/python/qemu/machine/__init__.py
new file mode 100644 (file)
index 0000000..9ccd58e
--- /dev/null
@@ -0,0 +1,36 @@
+"""
+QEMU development and testing library.
+
+This library provides a few high-level classes for driving QEMU from a
+test suite, not intended for production use.
+
+ | QEMUQtestProtocol: send/receive qtest messages.
+ | QEMUMachine: Configure and Boot a QEMU VM
+ | +-- QEMUQtestMachine: VM class, with a qtest socket.
+
+"""
+
+# Copyright (C) 2020-2021 John Snow for Red Hat Inc.
+# Copyright (C) 2015-2016 Red Hat Inc.
+# Copyright (C) 2012 IBM Corp.
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#  Fam Zheng <fam@euphon.net>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+# pylint: disable=import-error
+# see: https://github.com/PyCQA/pylint/issues/3624
+# see: https://github.com/PyCQA/pylint/issues/3651
+from .machine import QEMUMachine
+from .qtest import QEMUQtestMachine, QEMUQtestProtocol
+
+
+__all__ = (
+    'QEMUMachine',
+    'QEMUQtestProtocol',
+    'QEMUQtestMachine',
+)
diff --git a/python/qemu/machine/console_socket.py b/python/qemu/machine/console_socket.py
new file mode 100644 (file)
index 0000000..0a4e09f
--- /dev/null
@@ -0,0 +1,142 @@
+"""
+QEMU Console Socket Module:
+
+This python module implements a ConsoleSocket object,
+which can drain a socket and optionally dump the bytes to file.
+"""
+# Copyright 2020 Linaro
+#
+# Authors:
+#  Robert Foley <robert.foley@linaro.org>
+#
+# This code is licensed under the GPL version 2 or later.  See
+# the COPYING file in the top-level directory.
+#
+
+from collections import deque
+import socket
+import threading
+import time
+from typing import Deque, Optional
+
+
+class ConsoleSocket(socket.socket):
+    """
+    ConsoleSocket represents a socket attached to a char device.
+
+    :param address: An AF_UNIX path or address.
+    :param sock_fd: Optionally, an existing socket file descriptor.
+                    One of address or sock_fd must be specified.
+    :param file: Optionally, a filename to log to.
+    :param drain: Optionally, drains the socket and places the bytes
+                  into an in memory buffer for later processing.
+    """
+    def __init__(self,
+                 address: Optional[str] = None,
+                 sock_fd: Optional[int] = None,
+                 file: Optional[str] = None,
+                 drain: bool = False):
+        if address is None and sock_fd is None:
+            raise ValueError("one of 'address' or 'sock_fd' must be specified")
+        if address is not None and sock_fd is not None:
+            raise ValueError("can't specify both 'address' and 'sock_fd'")
+
+        self._recv_timeout_sec = 300.0
+        self._sleep_time = 0.5
+        self._buffer: Deque[int] = deque()
+        if address is not None:
+            socket.socket.__init__(self, socket.AF_UNIX, socket.SOCK_STREAM)
+            self.connect(address)
+        else:
+            assert sock_fd is not None
+            socket.socket.__init__(self, fileno=sock_fd)
+        self._logfile = None
+        if file:
+            # pylint: disable=consider-using-with
+            self._logfile = open(file, "bw")
+        self._open = True
+        self._drain_thread = None
+        if drain:
+            self._drain_thread = self._thread_start()
+
+    def __repr__(self) -> str:
+        tmp = super().__repr__()
+        tmp = tmp.rstrip(">")
+        tmp = "%s,  logfile=%s, drain_thread=%s>" % (tmp, self._logfile,
+                                                     self._drain_thread)
+        return tmp
+
+    def _drain_fn(self) -> None:
+        """Drains the socket and runs while the socket is open."""
+        while self._open:
+            try:
+                self._drain_socket()
+            except socket.timeout:
+                # The socket is expected to timeout since we set a
+                # short timeout to allow the thread to exit when
+                # self._open is set to False.
+                time.sleep(self._sleep_time)
+
+    def _thread_start(self) -> threading.Thread:
+        """Kick off a thread to drain the socket."""
+        # Configure socket to not block and timeout.
+        # This allows our drain thread to not block
+        # on receive and exit smoothly.
+        socket.socket.setblocking(self, False)
+        socket.socket.settimeout(self, 1)
+        drain_thread = threading.Thread(target=self._drain_fn)
+        drain_thread.daemon = True
+        drain_thread.start()
+        return drain_thread
+
+    def close(self) -> None:
+        """Close the base object and wait for the thread to terminate"""
+        if self._open:
+            self._open = False
+            if self._drain_thread is not None:
+                thread, self._drain_thread = self._drain_thread, None
+                thread.join()
+            socket.socket.close(self)
+            if self._logfile:
+                self._logfile.close()
+                self._logfile = None
+
+    def _drain_socket(self) -> None:
+        """process arriving characters into in memory _buffer"""
+        data = socket.socket.recv(self, 1)
+        if self._logfile:
+            self._logfile.write(data)
+            self._logfile.flush()
+        self._buffer.extend(data)
+
+    def recv(self, bufsize: int = 1, flags: int = 0) -> bytes:
+        """Return chars from in memory buffer.
+           Maintains the same API as socket.socket.recv.
+        """
+        if self._drain_thread is None:
+            # Not buffering the socket, pass thru to socket.
+            return socket.socket.recv(self, bufsize, flags)
+        assert not flags, "Cannot pass flags to recv() in drained mode"
+        start_time = time.time()
+        while len(self._buffer) < bufsize:
+            time.sleep(self._sleep_time)
+            elapsed_sec = time.time() - start_time
+            if elapsed_sec > self._recv_timeout_sec:
+                raise socket.timeout
+        return bytes((self._buffer.popleft() for i in range(bufsize)))
+
+    def setblocking(self, value: bool) -> None:
+        """When not draining we pass thru to the socket,
+           since when draining we control socket blocking.
+        """
+        if self._drain_thread is None:
+            socket.socket.setblocking(self, value)
+
+    def settimeout(self, value: Optional[float]) -> None:
+        """When not draining we pass thru to the socket,
+           since when draining we control the timeout.
+        """
+        if value is not None:
+            self._recv_timeout_sec = value
+        if self._drain_thread is None:
+            socket.socket.settimeout(self, value)
diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
new file mode 100644 (file)
index 0000000..31cb9d6
--- /dev/null
@@ -0,0 +1,948 @@
+"""
+QEMU machine module:
+
+The machine module primarily provides the QEMUMachine class,
+which provides facilities for managing the lifetime of a QEMU VM.
+"""
+
+# Copyright (C) 2015-2016 Red Hat Inc.
+# Copyright (C) 2012 IBM Corp.
+#
+# Authors:
+#  Fam Zheng <famz@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+# Based on qmp.py.
+#
+
+import errno
+from itertools import chain
+import locale
+import logging
+import os
+import shutil
+import signal
+import socket
+import subprocess
+import tempfile
+from types import TracebackType
+from typing import (
+    Any,
+    BinaryIO,
+    Dict,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    TypeVar,
+)
+
+from qemu.qmp import SocketAddrT
+from qemu.qmp.legacy import (
+    QEMUMonitorProtocol,
+    QMPMessage,
+    QMPReturnValue,
+)
+
+from . import console_socket
+
+
+LOG = logging.getLogger(__name__)
+
+
+class QEMUMachineError(Exception):
+    """
+    Exception called when an error in QEMUMachine happens.
+    """
+
+
+class QEMUMachineAddDeviceError(QEMUMachineError):
+    """
+    Exception raised when a request to add a device can not be fulfilled
+
+    The failures are caused by limitations, lack of information or conflicting
+    requests on the QEMUMachine methods.  This exception does not represent
+    failures reported by the QEMU binary itself.
+    """
+
+
+class VMLaunchFailure(QEMUMachineError):
+    """
+    Exception raised when a VM launch was attempted, but failed.
+    """
+    def __init__(self, exitcode: Optional[int],
+                 command: str, output: Optional[str]):
+        super().__init__(exitcode, command, output)
+        self.exitcode = exitcode
+        self.command = command
+        self.output = output
+
+    def __str__(self) -> str:
+        ret = ''
+        if self.__cause__ is not None:
+            name = type(self.__cause__).__name__
+            reason = str(self.__cause__)
+            if reason:
+                ret += f"{name}: {reason}"
+            else:
+                ret += f"{name}"
+        ret += '\n'
+
+        if self.exitcode is not None:
+            ret += f"\tExit code: {self.exitcode}\n"
+        ret += f"\tCommand: {self.command}\n"
+        ret += f"\tOutput: {self.output}\n"
+        return ret
+
+
+class AbnormalShutdown(QEMUMachineError):
+    """
+    Exception raised when a graceful shutdown was requested, but not performed.
+    """
+
+
+_T = TypeVar('_T', bound='QEMUMachine')
+
+
+class QEMUMachine:
+    """
+    A QEMU VM.
+
+    Use this object as a context manager to ensure
+    the QEMU process terminates::
+
+        with VM(binary) as vm:
+            ...
+        # vm is guaranteed to be shut down here
+    """
+    # pylint: disable=too-many-instance-attributes, too-many-public-methods
+
+    def __init__(self,
+                 binary: str,
+                 args: Sequence[str] = (),
+                 wrapper: Sequence[str] = (),
+                 name: Optional[str] = None,
+                 base_temp_dir: str = "/var/tmp",
+                 monitor_address: Optional[SocketAddrT] = None,
+                 drain_console: bool = False,
+                 console_log: Optional[str] = None,
+                 log_dir: Optional[str] = None,
+                 qmp_timer: Optional[float] = 30):
+        '''
+        Initialize a QEMUMachine
+
+        @param binary: path to the qemu binary
+        @param args: list of extra arguments
+        @param wrapper: list of arguments used as prefix to qemu binary
+        @param name: prefix for socket and log file names (default: qemu-PID)
+        @param base_temp_dir: default location where temp files are created
+        @param monitor_address: address for QMP monitor
+        @param drain_console: (optional) True to drain console socket to buffer
+        @param console_log: (optional) path to console log file
+        @param log_dir: where to create and keep log files
+        @param qmp_timer: (optional) default QMP socket timeout
+        @note: Qemu process is not started until launch() is used.
+        '''
+        # pylint: disable=too-many-arguments
+
+        # Direct user configuration
+
+        self._binary = binary
+        self._args = list(args)
+        self._wrapper = wrapper
+        self._qmp_timer = qmp_timer
+
+        self._name = name or f"{id(self):x}"
+        self._sock_pair: Optional[Tuple[socket.socket, socket.socket]] = None
+        self._cons_sock_pair: Optional[
+            Tuple[socket.socket, socket.socket]] = None
+        self._temp_dir: Optional[str] = None
+        self._base_temp_dir = base_temp_dir
+        self._log_dir = log_dir
+
+        self._monitor_address = monitor_address
+
+        self._console_log_path = console_log
+        if self._console_log_path:
+            # In order to log the console, buffering needs to be enabled.
+            self._drain_console = True
+        else:
+            self._drain_console = drain_console
+
+        # Runstate
+        self._qemu_log_path: Optional[str] = None
+        self._qemu_log_file: Optional[BinaryIO] = None
+        self._popen: Optional['subprocess.Popen[bytes]'] = None
+        self._events: List[QMPMessage] = []
+        self._iolog: Optional[str] = None
+        self._qmp_set = True   # Enable QMP monitor by default.
+        self._qmp_connection: Optional[QEMUMonitorProtocol] = None
+        self._qemu_full_args: Tuple[str, ...] = ()
+        self._launched = False
+        self._machine: Optional[str] = None
+        self._console_index = 0
+        self._console_set = False
+        self._console_device_type: Optional[str] = None
+        self._console_socket: Optional[socket.socket] = None
+        self._console_file: Optional[socket.SocketIO] = None
+        self._remove_files: List[str] = []
+        self._user_killed = False
+        self._quit_issued = False
+
+    def __enter__(self: _T) -> _T:
+        return self
+
+    def __exit__(self,
+                 exc_type: Optional[Type[BaseException]],
+                 exc_val: Optional[BaseException],
+                 exc_tb: Optional[TracebackType]) -> None:
+        self.shutdown()
+
+    def add_monitor_null(self) -> None:
+        """
+        This can be used to add an unused monitor instance.
+        """
+        self._args.append('-monitor')
+        self._args.append('null')
+
+    def add_fd(self: _T, fd: int, fdset: int,
+               opaque: str, opts: str = '') -> _T:
+        """
+        Pass a file descriptor to the VM
+        """
+        options = ['fd=%d' % fd,
+                   'set=%d' % fdset,
+                   'opaque=%s' % opaque]
+        if opts:
+            options.append(opts)
+
+        # This did not exist before 3.4, but since then it is
+        # mandatory for our purpose
+        if hasattr(os, 'set_inheritable'):
+            os.set_inheritable(fd, True)
+
+        self._args.append('-add-fd')
+        self._args.append(','.join(options))
+        return self
+
+    def send_fd_scm(self, fd: Optional[int] = None,
+                    file_path: Optional[str] = None) -> int:
+        """
+        Send an fd or file_path to the remote via SCM_RIGHTS.
+
+        Exactly one of fd and file_path must be given.  If it is
+        file_path, the file will be opened read-only and the new file
+        descriptor will be sent to the remote.
+        """
+        if file_path is not None:
+            assert fd is None
+            with open(file_path, "rb") as passfile:
+                fd = passfile.fileno()
+                self._qmp.send_fd_scm(fd)
+        else:
+            assert fd is not None
+            self._qmp.send_fd_scm(fd)
+
+        return 0
+
+    @staticmethod
+    def _remove_if_exists(path: str) -> None:
+        """
+        Remove file object at path if it exists
+        """
+        try:
+            os.remove(path)
+        except OSError as exception:
+            if exception.errno == errno.ENOENT:
+                return
+            raise
+
+    def is_running(self) -> bool:
+        """Returns true if the VM is running."""
+        return self._popen is not None and self._popen.poll() is None
+
+    @property
+    def _subp(self) -> 'subprocess.Popen[bytes]':
+        if self._popen is None:
+            raise QEMUMachineError('Subprocess pipe not present')
+        return self._popen
+
+    def exitcode(self) -> Optional[int]:
+        """Returns the exit code if possible, or None."""
+        if self._popen is None:
+            return None
+        return self._popen.poll()
+
+    def get_pid(self) -> Optional[int]:
+        """Returns the PID of the running process, or None."""
+        if not self.is_running():
+            return None
+        return self._subp.pid
+
+    def _load_io_log(self) -> None:
+        # Assume that the output encoding of QEMU's terminal output is
+        # defined by our locale. If indeterminate, allow open() to fall
+        # back to the platform default.
+        _, encoding = locale.getlocale()
+        if self._qemu_log_path is not None:
+            with open(self._qemu_log_path, "r", encoding=encoding) as iolog:
+                self._iolog = iolog.read()
+
+    @property
+    def _base_args(self) -> List[str]:
+        args = ['-display', 'none', '-vga', 'none']
+
+        if self._qmp_set:
+            if self._sock_pair:
+                moncdev = f"socket,id=mon,fd={self._sock_pair[0].fileno()}"
+            elif isinstance(self._monitor_address, tuple):
+                moncdev = "socket,id=mon,host={},port={}".format(
+                    *self._monitor_address
+                )
+            else:
+                moncdev = f"socket,id=mon,path={self._monitor_address}"
+            args.extend(['-chardev', moncdev, '-mon',
+                         'chardev=mon,mode=control'])
+
+        if self._machine is not None:
+            args.extend(['-machine', self._machine])
+        for _ in range(self._console_index):
+            args.extend(['-serial', 'null'])
+        if self._console_set:
+            assert self._cons_sock_pair is not None
+            fd = self._cons_sock_pair[0].fileno()
+            chardev = f"socket,id=console,fd={fd}"
+            args.extend(['-chardev', chardev])
+            if self._console_device_type is None:
+                args.extend(['-serial', 'chardev:console'])
+            else:
+                device = '%s,chardev=console' % self._console_device_type
+                args.extend(['-device', device])
+        return args
+
+    @property
+    def args(self) -> List[str]:
+        """Returns the list of arguments given to the QEMU binary."""
+        return self._args
+
+    def _pre_launch(self) -> None:
+        if self._qmp_set:
+            if self._monitor_address is None:
+                self._sock_pair = socket.socketpair()
+                os.set_inheritable(self._sock_pair[0].fileno(), True)
+                sock = self._sock_pair[1]
+            if isinstance(self._monitor_address, str):
+                self._remove_files.append(self._monitor_address)
+
+            sock_or_addr = self._monitor_address or sock
+            assert sock_or_addr is not None
+
+            self._qmp_connection = QEMUMonitorProtocol(
+                sock_or_addr,
+                server=bool(self._monitor_address),
+                nickname=self._name
+            )
+
+        if self._console_set:
+            self._cons_sock_pair = socket.socketpair()
+            os.set_inheritable(self._cons_sock_pair[0].fileno(), True)
+
+        # NOTE: Make sure any opened resources are *definitely* freed in
+        # _post_shutdown()!
+        # pylint: disable=consider-using-with
+        self._qemu_log_path = os.path.join(self.log_dir, self._name + ".log")
+        self._qemu_log_file = open(self._qemu_log_path, 'wb')
+
+        self._iolog = None
+        self._qemu_full_args = tuple(chain(
+            self._wrapper,
+            [self._binary],
+            self._base_args,
+            self._args
+        ))
+
+    def _post_launch(self) -> None:
+        if self._sock_pair:
+            self._sock_pair[0].close()
+        if self._cons_sock_pair:
+            self._cons_sock_pair[0].close()
+
+        if self._qmp_connection:
+            if self._sock_pair:
+                self._qmp.connect()
+            else:
+                self._qmp.accept(self._qmp_timer)
+
+    def _close_qemu_log_file(self) -> None:
+        if self._qemu_log_file is not None:
+            self._qemu_log_file.close()
+            self._qemu_log_file = None
+
+    def _post_shutdown(self) -> None:
+        """
+        Called to cleanup the VM instance after the process has exited.
+        May also be called after a failed launch.
+        """
+        LOG.debug("Cleaning up after VM process")
+        try:
+            self._close_qmp_connection()
+        except Exception as err:  # pylint: disable=broad-except
+            LOG.warning(
+                "Exception closing QMP connection: %s",
+                str(err) if str(err) else type(err).__name__
+            )
+        finally:
+            assert self._qmp_connection is None
+
+        if self._sock_pair:
+            self._sock_pair[0].close()
+            self._sock_pair[1].close()
+            self._sock_pair = None
+
+        self._close_qemu_log_file()
+
+        self._load_io_log()
+
+        self._qemu_log_path = None
+
+        if self._temp_dir is not None:
+            shutil.rmtree(self._temp_dir)
+            self._temp_dir = None
+
+        while len(self._remove_files) > 0:
+            self._remove_if_exists(self._remove_files.pop())
+
+        exitcode = self.exitcode()
+        if (exitcode is not None and exitcode < 0
+                and not (self._user_killed and exitcode == -signal.SIGKILL)):
+            msg = 'qemu received signal %i; command: "%s"'
+            if self._qemu_full_args:
+                command = ' '.join(self._qemu_full_args)
+            else:
+                command = ''
+            LOG.warning(msg, -int(exitcode), command)
+
+        self._quit_issued = False
+        self._user_killed = False
+        self._launched = False
+
+    def launch(self) -> None:
+        """
+        Launch the VM and make sure we cleanup and expose the
+        command line/output in case of exception
+        """
+
+        if self._launched:
+            raise QEMUMachineError('VM already launched')
+
+        try:
+            self._launch()
+        except BaseException as exc:
+            # We may have launched the process but it may
+            # have exited before we could connect via QMP.
+            # Assume the VM didn't launch or is exiting.
+            # If we don't wait for the process, exitcode() may still be
+            # 'None' by the time control is ceded back to the caller.
+            if self._launched:
+                self.wait()
+            else:
+                self._post_shutdown()
+
+            if isinstance(exc, Exception):
+                raise VMLaunchFailure(
+                    exitcode=self.exitcode(),
+                    command=' '.join(self._qemu_full_args),
+                    output=self._iolog
+                ) from exc
+
+            # Don't wrap 'BaseException'; doing so would downgrade
+            # that exception. However, we still want to clean up.
+            raise
+
+    def _launch(self) -> None:
+        """
+        Launch the VM and establish a QMP connection
+        """
+        self._pre_launch()
+        LOG.debug('VM launch command: %r', ' '.join(self._qemu_full_args))
+
+        # Cleaning up of this subprocess is guaranteed by _do_shutdown.
+        # pylint: disable=consider-using-with
+        self._popen = subprocess.Popen(self._qemu_full_args,
+                                       stdin=subprocess.DEVNULL,
+                                       stdout=self._qemu_log_file,
+                                       stderr=subprocess.STDOUT,
+                                       shell=False,
+                                       close_fds=False)
+        self._launched = True
+        self._post_launch()
+
+    def _close_qmp_connection(self) -> None:
+        """
+        Close the underlying QMP connection, if any.
+
+        Dutifully report errors that occurred while closing, but assume
+        that any error encountered indicates an abnormal termination
+        process and not a failure to close.
+        """
+        if self._qmp_connection is None:
+            return
+
+        try:
+            self._qmp.close()
+        except EOFError:
+            # EOF can occur as an Exception here when using the Async
+            # QMP backend. It indicates that the server closed the
+            # stream. If we successfully issued 'quit' at any point,
+            # then this was expected. If the remote went away without
+            # our permission, it's worth reporting that as an abnormal
+            # shutdown case.
+            if not (self._user_killed or self._quit_issued):
+                raise
+        finally:
+            self._qmp_connection = None
+
+    def _early_cleanup(self) -> None:
+        """
+        Perform any cleanup that needs to happen before the VM exits.
+
+        This method may be called twice upon shutdown, once each by soft
+        and hard shutdown in failover scenarios.
+        """
+        # If we keep the console socket open, we may deadlock waiting
+        # for QEMU to exit, while QEMU is waiting for the socket to
+        # become writable.
+        if self._console_file is not None:
+            LOG.debug("Closing console file")
+            self._console_file.close()
+            self._console_file = None
+
+        if self._console_socket is not None:
+            LOG.debug("Closing console socket")
+            self._console_socket.close()
+            self._console_socket = None
+
+        if self._cons_sock_pair:
+            self._cons_sock_pair[0].close()
+            self._cons_sock_pair[1].close()
+            self._cons_sock_pair = None
+
+    def _hard_shutdown(self) -> None:
+        """
+        Perform early cleanup, kill the VM, and wait for it to terminate.
+
+        :raise subprocess.Timeout: When timeout is exceeds 60 seconds
+            waiting for the QEMU process to terminate.
+        """
+        LOG.debug("Performing hard shutdown")
+        self._early_cleanup()
+        self._subp.kill()
+        self._subp.wait(timeout=60)
+
+    def _soft_shutdown(self, timeout: Optional[int]) -> None:
+        """
+        Perform early cleanup, attempt to gracefully shut down the VM, and wait
+        for it to terminate.
+
+        :param timeout: Timeout in seconds for graceful shutdown.
+                        A value of None is an infinite wait.
+
+        :raise ConnectionReset: On QMP communication errors
+        :raise subprocess.TimeoutExpired: When timeout is exceeded waiting for
+            the QEMU process to terminate.
+        """
+        LOG.debug("Attempting graceful termination")
+
+        self._early_cleanup()
+
+        if self._quit_issued:
+            LOG.debug(
+                "Anticipating QEMU termination due to prior 'quit' command, "
+                "or explicit call to wait()"
+            )
+        else:
+            LOG.debug("Politely asking QEMU to terminate")
+
+        if self._qmp_connection:
+            try:
+                if not self._quit_issued:
+                    # May raise ExecInterruptedError or StateError if the
+                    # connection dies or has *already* died.
+                    self.qmp('quit')
+            finally:
+                # Regardless, we want to quiesce the connection.
+                self._close_qmp_connection()
+        elif not self._quit_issued:
+            LOG.debug(
+                "Not anticipating QEMU quit and no QMP connection present, "
+                "issuing SIGTERM"
+            )
+            self._subp.terminate()
+
+        # May raise subprocess.TimeoutExpired
+        LOG.debug(
+            "Waiting (timeout=%s) for QEMU process (pid=%s) to terminate",
+            timeout, self._subp.pid
+        )
+        self._subp.wait(timeout=timeout)
+
+    def _do_shutdown(self, timeout: Optional[int]) -> None:
+        """
+        Attempt to shutdown the VM gracefully; fallback to a hard shutdown.
+
+        :param timeout: Timeout in seconds for graceful shutdown.
+                        A value of None is an infinite wait.
+
+        :raise AbnormalShutdown: When the VM could not be shut down gracefully.
+            The inner exception will likely be ConnectionReset or
+            subprocess.TimeoutExpired. In rare cases, non-graceful termination
+            may result in its own exceptions, likely subprocess.TimeoutExpired.
+        """
+        try:
+            self._soft_shutdown(timeout)
+        except Exception as exc:
+            if isinstance(exc, subprocess.TimeoutExpired):
+                LOG.debug("Timed out waiting for QEMU process to exit")
+            LOG.debug("Graceful shutdown failed", exc_info=True)
+            LOG.debug("Falling back to hard shutdown")
+            self._hard_shutdown()
+            raise AbnormalShutdown("Could not perform graceful shutdown") \
+                from exc
+
+    def shutdown(self,
+                 hard: bool = False,
+                 timeout: Optional[int] = 30) -> None:
+        """
+        Terminate the VM (gracefully if possible) and perform cleanup.
+        Cleanup will always be performed.
+
+        If the VM has not yet been launched, or shutdown(), wait(), or kill()
+        have already been called, this method does nothing.
+
+        :param hard: When true, do not attempt graceful shutdown, and
+                     suppress the SIGKILL warning log message.
+        :param timeout: Optional timeout in seconds for graceful shutdown.
+                        Default 30 seconds, A `None` value is an infinite wait.
+        """
+        if not self._launched:
+            return
+
+        LOG.debug("Shutting down VM appliance; timeout=%s", timeout)
+        if hard:
+            LOG.debug("Caller requests immediate termination of QEMU process.")
+
+        try:
+            if hard:
+                self._user_killed = True
+                self._hard_shutdown()
+            else:
+                self._do_shutdown(timeout)
+        finally:
+            self._post_shutdown()
+
+    def kill(self) -> None:
+        """
+        Terminate the VM forcefully, wait for it to exit, and perform cleanup.
+        """
+        self.shutdown(hard=True)
+
+    def wait(self, timeout: Optional[int] = 30) -> None:
+        """
+        Wait for the VM to power off and perform post-shutdown cleanup.
+
+        :param timeout: Optional timeout in seconds. Default 30 seconds.
+                        A value of `None` is an infinite wait.
+        """
+        self._quit_issued = True
+        self.shutdown(timeout=timeout)
+
+    def set_qmp_monitor(self, enabled: bool = True) -> None:
+        """
+        Set the QMP monitor.
+
+        @param enabled: if False, qmp monitor options will be removed from
+                        the base arguments of the resulting QEMU command
+                        line. Default is True.
+
+        .. note:: Call this function before launch().
+        """
+        self._qmp_set = enabled
+
+    @property
+    def _qmp(self) -> QEMUMonitorProtocol:
+        if self._qmp_connection is None:
+            raise QEMUMachineError("Attempt to access QMP with no connection")
+        return self._qmp_connection
+
+    @classmethod
+    def _qmp_args(cls, conv_keys: bool,
+                  args: Dict[str, Any]) -> Dict[str, object]:
+        if conv_keys:
+            return {k.replace('_', '-'): v for k, v in args.items()}
+
+        return args
+
+    def qmp(self, cmd: str,
+            args_dict: Optional[Dict[str, object]] = None,
+            conv_keys: Optional[bool] = None,
+            **args: Any) -> QMPMessage:
+        """
+        Invoke a QMP command and return the response dict
+        """
+        if args_dict is not None:
+            assert not args
+            assert conv_keys is None
+            args = args_dict
+            conv_keys = False
+
+        if conv_keys is None:
+            conv_keys = True
+
+        qmp_args = self._qmp_args(conv_keys, args)
+        ret = self._qmp.cmd_raw(cmd, args=qmp_args)
+        if cmd == 'quit' and 'error' not in ret and 'return' in ret:
+            self._quit_issued = True
+        return ret
+
+    def cmd(self, cmd: str,
+            args_dict: Optional[Dict[str, object]] = None,
+            conv_keys: Optional[bool] = None,
+            **args: Any) -> QMPReturnValue:
+        """
+        Invoke a QMP command.
+        On success return the response dict.
+        On failure raise an exception.
+        """
+        if args_dict is not None:
+            assert not args
+            assert conv_keys is None
+            args = args_dict
+            conv_keys = False
+
+        if conv_keys is None:
+            conv_keys = True
+
+        qmp_args = self._qmp_args(conv_keys, args)
+        ret = self._qmp.cmd(cmd, **qmp_args)
+        if cmd == 'quit':
+            self._quit_issued = True
+        return ret
+
+    def get_qmp_event(self, wait: bool = False) -> Optional[QMPMessage]:
+        """
+        Poll for one queued QMP events and return it
+        """
+        if self._events:
+            return self._events.pop(0)
+        return self._qmp.pull_event(wait=wait)
+
+    def get_qmp_events(self, wait: bool = False) -> List[QMPMessage]:
+        """
+        Poll for queued QMP events and return a list of dicts
+        """
+        events = self._qmp.get_events(wait=wait)
+        events.extend(self._events)
+        del self._events[:]
+        return events
+
+    @staticmethod
+    def event_match(event: Any, match: Optional[Any]) -> bool:
+        """
+        Check if an event matches optional match criteria.
+
+        The match criteria takes the form of a matching subdict. The event is
+        checked to be a superset of the subdict, recursively, with matching
+        values whenever the subdict values are not None.
+
+        This has a limitation that you cannot explicitly check for None values.
+
+        Examples, with the subdict queries on the left:
+         - None matches any object.
+         - {"foo": None} matches {"foo": {"bar": 1}}
+         - {"foo": None} matches {"foo": 5}
+         - {"foo": {"abc": None}} does not match {"foo": {"bar": 1}}
+         - {"foo": {"rab": 2}} matches {"foo": {"bar": 1, "rab": 2}}
+        """
+        if match is None:
+            return True
+
+        try:
+            for key in match:
+                if key in event:
+                    if not QEMUMachine.event_match(event[key], match[key]):
+                        return False
+                else:
+                    return False
+            return True
+        except TypeError:
+            # either match or event wasn't iterable (not a dict)
+            return bool(match == event)
+
+    def event_wait(self, name: str,
+                   timeout: float = 60.0,
+                   match: Optional[QMPMessage] = None) -> Optional[QMPMessage]:
+        """
+        event_wait waits for and returns a named event from QMP with a timeout.
+
+        name: The event to wait for.
+        timeout: QEMUMonitorProtocol.pull_event timeout parameter.
+        match: Optional match criteria. See event_match for details.
+        """
+        return self.events_wait([(name, match)], timeout)
+
+    def events_wait(self,
+                    events: Sequence[Tuple[str, Any]],
+                    timeout: float = 60.0) -> Optional[QMPMessage]:
+        """
+        events_wait waits for and returns a single named event from QMP.
+        In the case of multiple qualifying events, this function returns the
+        first one.
+
+        :param events: A sequence of (name, match_criteria) tuples.
+                       The match criteria are optional and may be None.
+                       See event_match for details.
+        :param timeout: Optional timeout, in seconds.
+                        See QEMUMonitorProtocol.pull_event.
+
+        :raise asyncio.TimeoutError:
+            If timeout was non-zero and no matching events were found.
+
+        :return: A QMP event matching the filter criteria.
+                 If timeout was 0 and no event matched, None.
+        """
+        def _match(event: QMPMessage) -> bool:
+            for name, match in events:
+                if event['event'] == name and self.event_match(event, match):
+                    return True
+            return False
+
+        event: Optional[QMPMessage]
+
+        # Search cached events
+        for event in self._events:
+            if _match(event):
+                self._events.remove(event)
+                return event
+
+        # Poll for new events
+        while True:
+            event = self._qmp.pull_event(wait=timeout)
+            if event is None:
+                # NB: None is only returned when timeout is false-ish.
+                # Timeouts raise asyncio.TimeoutError instead!
+                break
+            if _match(event):
+                return event
+            self._events.append(event)
+
+        return None
+
+    def get_log(self) -> Optional[str]:
+        """
+        After self.shutdown or failed qemu execution, this returns the output
+        of the qemu process.
+        """
+        return self._iolog
+
+    def add_args(self, *args: str) -> None:
+        """
+        Adds to the list of extra arguments to be given to the QEMU binary
+        """
+        self._args.extend(args)
+
+    def set_machine(self, machine_type: str) -> None:
+        """
+        Sets the machine type
+
+        If set, the machine type will be added to the base arguments
+        of the resulting QEMU command line.
+        """
+        self._machine = machine_type
+
+    def set_console(self,
+                    device_type: Optional[str] = None,
+                    console_index: int = 0) -> None:
+        """
+        Sets the device type for a console device
+
+        If set, the console device and a backing character device will
+        be added to the base arguments of the resulting QEMU command
+        line.
+
+        This is a convenience method that will either use the provided
+        device type, or default to a "-serial chardev:console" command
+        line argument.
+
+        The actual setting of command line arguments will be be done at
+        machine launch time, as it depends on the temporary directory
+        to be created.
+
+        @param device_type: the device type, such as "isa-serial".  If
+                            None is given (the default value) a "-serial
+                            chardev:console" command line argument will
+                            be used instead, resorting to the machine's
+                            default device type.
+        @param console_index: the index of the console device to use.
+                              If not zero, the command line will create
+                              'index - 1' consoles and connect them to
+                              the 'null' backing character device.
+        """
+        self._console_set = True
+        self._console_device_type = device_type
+        self._console_index = console_index
+
+    @property
+    def console_socket(self) -> socket.socket:
+        """
+        Returns a socket connected to the console
+        """
+        if self._console_socket is None:
+            LOG.debug("Opening console socket")
+            if not self._console_set:
+                raise QEMUMachineError(
+                    "Attempt to access console socket with no connection")
+            assert self._cons_sock_pair is not None
+            # os.dup() is used here for sock_fd because otherwise we'd
+            # have two rich python socket objects that would each try to
+            # close the same underlying fd when either one gets garbage
+            # collected.
+            self._console_socket = console_socket.ConsoleSocket(
+                sock_fd=os.dup(self._cons_sock_pair[1].fileno()),
+                file=self._console_log_path,
+                drain=self._drain_console)
+            self._cons_sock_pair[1].close()
+        return self._console_socket
+
+    @property
+    def console_file(self) -> socket.SocketIO:
+        """
+        Returns a file associated with the console socket
+        """
+        if self._console_file is None:
+            LOG.debug("Opening console file")
+            self._console_file = self.console_socket.makefile(mode='rb',
+                                                              buffering=0,
+                                                              encoding='utf-8')
+        return self._console_file
+
+    @property
+    def temp_dir(self) -> str:
+        """
+        Returns a temporary directory to be used for this machine
+        """
+        if self._temp_dir is None:
+            self._temp_dir = tempfile.mkdtemp(prefix="qemu-machine-",
+                                              dir=self._base_temp_dir)
+        return self._temp_dir
+
+    @property
+    def log_dir(self) -> str:
+        """
+        Returns a directory to be used for writing logs
+        """
+        if self._log_dir is None:
+            return self.temp_dir
+        return self._log_dir
diff --git a/python/qemu/machine/py.typed b/python/qemu/machine/py.typed
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/python/qemu/machine/qtest.py b/python/qemu/machine/qtest.py
new file mode 100644 (file)
index 0000000..4f5ede8
--- /dev/null
@@ -0,0 +1,191 @@
+"""
+QEMU qtest library
+
+qtest offers the QEMUQtestProtocol and QEMUQTestMachine classes, which
+offer a connection to QEMU's qtest protocol socket, and a qtest-enabled
+subclass of QEMUMachine, respectively.
+"""
+
+# Copyright (C) 2015 Red Hat Inc.
+#
+# Authors:
+#  Fam Zheng <famz@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+# Based on qmp.py.
+#
+
+import os
+import socket
+from typing import (
+    List,
+    Optional,
+    Sequence,
+    TextIO,
+    Tuple,
+)
+
+from qemu.qmp import SocketAddrT
+
+from .machine import QEMUMachine
+
+
+class QEMUQtestProtocol:
+    """
+    QEMUQtestProtocol implements a connection to a qtest socket.
+
+    :param address: QEMU address, can be either a unix socket path (string)
+                    or a tuple in the form ( address, port ) for a TCP
+                    connection
+    :param sock: An existing socket can be provided as an alternative to
+                 an address. One of address or sock must be provided.
+    :param server: server mode, listens on the socket. Only meaningful
+                   in conjunction with an address and not an existing
+                   socket.
+
+    :raise socket.error: on socket connection errors
+
+    .. note::
+       No connection is established by __init__(), this is done
+       by the connect() or accept() methods.
+    """
+    def __init__(self,
+                 address: Optional[SocketAddrT] = None,
+                 sock: Optional[socket.socket] = None,
+                 server: bool = False):
+        if address is None and sock is None:
+            raise ValueError("Either 'address' or 'sock' must be specified")
+        if address is not None and sock is not None:
+            raise ValueError(
+                "Either 'address' or 'sock' must be specified, but not both")
+        if sock is not None and server:
+            raise ValueError("server=True is meaningless when passing socket")
+
+        self._address = address
+        self._sock = sock or self._get_sock()
+        self._sockfile: Optional[TextIO] = None
+
+        if server:
+            assert self._address is not None
+            self._sock.bind(self._address)
+            self._sock.listen(1)
+
+    def _get_sock(self) -> socket.socket:
+        assert self._address is not None
+        if isinstance(self._address, tuple):
+            family = socket.AF_INET
+        else:
+            family = socket.AF_UNIX
+        return socket.socket(family, socket.SOCK_STREAM)
+
+    def connect(self) -> None:
+        """
+        Connect to the qtest socket.
+
+        @raise socket.error on socket connection errors
+        """
+        if self._address is not None:
+            self._sock.connect(self._address)
+        self._sockfile = self._sock.makefile(mode='r')
+
+    def accept(self) -> None:
+        """
+        Await connection from QEMU.
+
+        @raise socket.error on socket connection errors
+        """
+        self._sock, _ = self._sock.accept()
+        self._sockfile = self._sock.makefile(mode='r')
+
+    def cmd(self, qtest_cmd: str) -> str:
+        """
+        Send a qtest command on the wire.
+
+        @param qtest_cmd: qtest command text to be sent
+        """
+        assert self._sockfile is not None
+        self._sock.sendall((qtest_cmd + "\n").encode('utf-8'))
+        resp = self._sockfile.readline()
+        return resp
+
+    def close(self) -> None:
+        """
+        Close this socket.
+        """
+        self._sock.close()
+        if self._sockfile:
+            self._sockfile.close()
+            self._sockfile = None
+
+    def settimeout(self, timeout: Optional[float]) -> None:
+        """Set a timeout, in seconds."""
+        self._sock.settimeout(timeout)
+
+
+class QEMUQtestMachine(QEMUMachine):
+    """
+    A QEMU VM, with a qtest socket available.
+    """
+
+    def __init__(self,
+                 binary: str,
+                 args: Sequence[str] = (),
+                 wrapper: Sequence[str] = (),
+                 name: Optional[str] = None,
+                 base_temp_dir: str = "/var/tmp",
+                 qmp_timer: Optional[float] = None):
+        # pylint: disable=too-many-arguments
+
+        if name is None:
+            name = "qemu-%d" % os.getpid()
+        super().__init__(binary, args, wrapper=wrapper, name=name,
+                         base_temp_dir=base_temp_dir,
+                         qmp_timer=qmp_timer)
+        self._qtest: Optional[QEMUQtestProtocol] = None
+        self._qtest_sock_pair: Optional[
+            Tuple[socket.socket, socket.socket]] = None
+
+    @property
+    def _base_args(self) -> List[str]:
+        args = super()._base_args
+        assert self._qtest_sock_pair is not None
+        fd = self._qtest_sock_pair[0].fileno()
+        args.extend([
+            '-chardev', f"socket,id=qtest,fd={fd}",
+            '-qtest', 'chardev:qtest',
+            '-accel', 'qtest'
+        ])
+        return args
+
+    def _pre_launch(self) -> None:
+        self._qtest_sock_pair = socket.socketpair()
+        os.set_inheritable(self._qtest_sock_pair[0].fileno(), True)
+        super()._pre_launch()
+        self._qtest = QEMUQtestProtocol(sock=self._qtest_sock_pair[1])
+
+    def _post_launch(self) -> None:
+        assert self._qtest is not None
+        super()._post_launch()
+        if self._qtest_sock_pair:
+            self._qtest_sock_pair[0].close()
+        self._qtest.connect()
+
+    def _post_shutdown(self) -> None:
+        if self._qtest_sock_pair:
+            self._qtest_sock_pair[0].close()
+            self._qtest_sock_pair[1].close()
+            self._qtest_sock_pair = None
+        super()._post_shutdown()
+
+    def qtest(self, cmd: str) -> str:
+        """
+        Send a qtest command to the guest.
+
+        :param cmd: qtest command to send
+        :return: qtest server response
+        """
+        if self._qtest is None:
+            raise RuntimeError("qtest socket not available")
+        return self._qtest.cmd(cmd)
diff --git a/python/qemu/qmp/__init__.py b/python/qemu/qmp/__init__.py
new file mode 100644 (file)
index 0000000..69190d0
--- /dev/null
@@ -0,0 +1,59 @@
+"""
+QEMU Monitor Protocol (QMP) development library & tooling.
+
+This package provides a fairly low-level class for communicating
+asynchronously with QMP protocol servers, as implemented by QEMU, the
+QEMU Guest Agent, and the QEMU Storage Daemon.
+
+`QMPClient` provides the main functionality of this package. All errors
+raised by this library derive from `QMPError`, see `qmp.error` for
+additional detail. See `qmp.events` for an in-depth tutorial on
+managing QMP events.
+"""
+
+# Copyright (C) 2020-2022 John Snow for Red Hat, Inc.
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#
+# Based on earlier work by Luiz Capitulino <lcapitulino@redhat.com>.
+#
+# This work is licensed under the terms of the GNU LGPL, version 2 or
+# later. See the COPYING file in the top-level directory.
+
+import logging
+
+from .error import QMPError
+from .events import EventListener
+from .message import Message
+from .protocol import (
+    ConnectError,
+    Runstate,
+    SocketAddrT,
+    StateError,
+)
+from .qmp_client import ExecInterruptedError, ExecuteError, QMPClient
+
+
+# Suppress logging unless an application engages it.
+logging.getLogger('qemu.qmp').addHandler(logging.NullHandler())
+
+
+# The order of these fields impact the Sphinx documentation order.
+__all__ = (
+    # Classes, most to least important
+    'QMPClient',
+    'Message',
+    'EventListener',
+    'Runstate',
+
+    # Exceptions, most generic to most explicit
+    'QMPError',
+    'StateError',
+    'ConnectError',
+    'ExecuteError',
+    'ExecInterruptedError',
+
+    # Type aliases
+    'SocketAddrT',
+)
diff --git a/python/qemu/qmp/error.py b/python/qemu/qmp/error.py
new file mode 100644 (file)
index 0000000..24ba4d5
--- /dev/null
@@ -0,0 +1,50 @@
+"""
+QMP Error Classes
+
+This package seeks to provide semantic error classes that are intended
+to be used directly by clients when they would like to handle particular
+semantic failures (e.g. "failed to connect") without needing to know the
+enumeration of possible reasons for that failure.
+
+QMPError serves as the ancestor for all exceptions raised by this
+package, and is suitable for use in handling semantic errors from this
+library. In most cases, individual public methods will attempt to catch
+and re-encapsulate various exceptions to provide a semantic
+error-handling interface.
+
+.. admonition:: QMP Exception Hierarchy Reference
+
+ |   `Exception`
+ |    +-- `QMPError`
+ |         +-- `ConnectError`
+ |         +-- `StateError`
+ |         +-- `ExecInterruptedError`
+ |         +-- `ExecuteError`
+ |         +-- `ListenerError`
+ |         +-- `ProtocolError`
+ |              +-- `DeserializationError`
+ |              +-- `UnexpectedTypeError`
+ |              +-- `ServerParseError`
+ |              +-- `BadReplyError`
+ |              +-- `GreetingError`
+ |              +-- `NegotiationError`
+"""
+
+
+class QMPError(Exception):
+    """Abstract error class for all errors originating from this package."""
+
+
+class ProtocolError(QMPError):
+    """
+    Abstract error class for protocol failures.
+
+    Semantically, these errors are generally the fault of either the
+    protocol server or as a result of a bug in this library.
+
+    :param error_message: Human-readable string describing the error.
+    """
+    def __init__(self, error_message: str):
+        super().__init__(error_message)
+        #: Human-readable error message, without any prefix.
+        self.error_message: str = error_message
diff --git a/python/qemu/qmp/events.py b/python/qemu/qmp/events.py
new file mode 100644 (file)
index 0000000..6199776
--- /dev/null
@@ -0,0 +1,717 @@
+"""
+QMP Events and EventListeners
+
+Asynchronous QMP uses `EventListener` objects to listen for events. An
+`EventListener` is a FIFO event queue that can be pre-filtered to listen
+for only specific events. Each `EventListener` instance receives its own
+copy of events that it hears, so events may be consumed without fear or
+worry for depriving other listeners of events they need to hear.
+
+
+EventListener Tutorial
+----------------------
+
+In all of the following examples, we assume that we have a `QMPClient`
+instantiated named ``qmp`` that is already connected.
+
+
+`listener()` context blocks with one name
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The most basic usage is by using the `listener()` context manager to
+construct them:
+
+.. code:: python
+
+   with qmp.listener('STOP') as listener:
+       await qmp.execute('stop')
+       await listener.get()
+
+The listener is active only for the duration of the ‘with’ block. This
+instance listens only for ‘STOP’ events.
+
+
+`listener()` context blocks with two or more names
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Multiple events can be selected for by providing any ``Iterable[str]``:
+
+.. code:: python
+
+   with qmp.listener(('STOP', 'RESUME')) as listener:
+       await qmp.execute('stop')
+       event = await listener.get()
+       assert event['event'] == 'STOP'
+
+       await qmp.execute('cont')
+       event = await listener.get()
+       assert event['event'] == 'RESUME'
+
+
+`listener()` context blocks with no names
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By omitting names entirely, you can listen to ALL events.
+
+.. code:: python
+
+   with qmp.listener() as listener:
+       await qmp.execute('stop')
+       event = await listener.get()
+       assert event['event'] == 'STOP'
+
+This isn’t a very good use case for this feature: In a non-trivial
+running system, we may not know what event will arrive next. Grabbing
+the top of a FIFO queue returning multiple kinds of events may be prone
+to error.
+
+
+Using async iterators to retrieve events
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you’d like to simply watch what events happen to arrive, you can use
+the listener as an async iterator:
+
+.. code:: python
+
+   with qmp.listener() as listener:
+       async for event in listener:
+           print(f"Event arrived: {event['event']}")
+
+This is analogous to the following code:
+
+.. code:: python
+
+   with qmp.listener() as listener:
+       while True:
+           event = listener.get()
+           print(f"Event arrived: {event['event']}")
+
+This event stream will never end, so these blocks will never terminate.
+
+
+Using asyncio.Task to concurrently retrieve events
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Since a listener’s event stream will never terminate, it is not likely
+useful to use that form in a script. For longer-running clients, we can
+create event handlers by using `asyncio.Task` to create concurrent
+coroutines:
+
+.. code:: python
+
+   async def print_events(listener):
+       try:
+           async for event in listener:
+               print(f"Event arrived: {event['event']}")
+       except asyncio.CancelledError:
+           return
+
+   with qmp.listener() as listener:
+       task = asyncio.Task(print_events(listener))
+       await qmp.execute('stop')
+       await qmp.execute('cont')
+       task.cancel()
+       await task
+
+However, there is no guarantee that these events will be received by the
+time we leave this context block. Once the context block is exited, the
+listener will cease to hear any new events, and becomes inert.
+
+Be mindful of the timing: the above example will *probably*– but does
+not *guarantee*– that both STOP/RESUMED events will be printed. The
+example below outlines how to use listeners outside of a context block.
+
+
+Using `register_listener()` and `remove_listener()`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To create a listener with a longer lifetime, beyond the scope of a
+single block, create a listener and then call `register_listener()`:
+
+.. code:: python
+
+   class MyClient:
+       def __init__(self, qmp):
+           self.qmp = qmp
+           self.listener = EventListener()
+
+       async def print_events(self):
+           try:
+               async for event in self.listener:
+                   print(f"Event arrived: {event['event']}")
+           except asyncio.CancelledError:
+               return
+
+       async def run(self):
+           self.task = asyncio.Task(self.print_events)
+           self.qmp.register_listener(self.listener)
+           await qmp.execute('stop')
+           await qmp.execute('cont')
+
+       async def stop(self):
+           self.task.cancel()
+           await self.task
+           self.qmp.remove_listener(self.listener)
+
+The listener can be deactivated by using `remove_listener()`. When it is
+removed, any possible pending events are cleared and it can be
+re-registered at a later time.
+
+
+Using the built-in all events listener
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The `QMPClient` object creates its own default listener named
+:py:obj:`~Events.events` that can be used for the same purpose without
+having to create your own:
+
+.. code:: python
+
+   async def print_events(listener):
+       try:
+           async for event in listener:
+               print(f"Event arrived: {event['event']}")
+       except asyncio.CancelledError:
+           return
+
+   task = asyncio.Task(print_events(qmp.events))
+
+   await qmp.execute('stop')
+   await qmp.execute('cont')
+
+   task.cancel()
+   await task
+
+
+Using both .get() and async iterators
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The async iterator and `get()` methods pull events from the same FIFO
+queue. If you mix the usage of both, be aware: Events are emitted
+precisely once per listener.
+
+If multiple contexts try to pull events from the same listener instance,
+events are still emitted only precisely once.
+
+This restriction can be lifted by creating additional listeners.
+
+
+Creating multiple listeners
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Additional `EventListener` objects can be created at-will. Each one
+receives its own copy of events, with separate FIFO event queues.
+
+.. code:: python
+
+   my_listener = EventListener()
+   qmp.register_listener(my_listener)
+
+   await qmp.execute('stop')
+   copy1 = await my_listener.get()
+   copy2 = await qmp.events.get()
+
+   assert copy1 == copy2
+
+In this example, we await an event from both a user-created
+`EventListener` and the built-in events listener. Both receive the same
+event.
+
+
+Clearing listeners
+~~~~~~~~~~~~~~~~~~
+
+`EventListener` objects can be cleared, clearing all events seen thus far:
+
+.. code:: python
+
+   await qmp.execute('stop')
+   qmp.events.clear()
+   await qmp.execute('cont')
+   event = await qmp.events.get()
+   assert event['event'] == 'RESUME'
+
+`EventListener` objects are FIFO queues. If events are not consumed,
+they will remain in the queue until they are witnessed or discarded via
+`clear()`. FIFO queues will be drained automatically upon leaving a
+context block, or when calling `remove_listener()`.
+
+
+Accessing listener history
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+`EventListener` objects record their history. Even after being cleared,
+you can obtain a record of all events seen so far:
+
+.. code:: python
+
+   await qmp.execute('stop')
+   await qmp.execute('cont')
+   qmp.events.clear()
+
+   assert len(qmp.events.history) == 2
+   assert qmp.events.history[0]['event'] == 'STOP'
+   assert qmp.events.history[1]['event'] == 'RESUME'
+
+The history is updated immediately and does not require the event to be
+witnessed first.
+
+
+Using event filters
+~~~~~~~~~~~~~~~~~~~
+
+`EventListener` objects can be given complex filtering criteria if names
+are not sufficient:
+
+.. code:: python
+
+   def job1_filter(event) -> bool:
+       event_data = event.get('data', {})
+       event_job_id = event_data.get('id')
+       return event_job_id == "job1"
+
+   with qmp.listener('JOB_STATUS_CHANGE', job1_filter) as listener:
+       await qmp.execute('blockdev-backup', arguments={'job-id': 'job1', ...})
+       async for event in listener:
+           if event['data']['status'] == 'concluded':
+               break
+
+These filters might be most useful when parameterized. `EventListener`
+objects expect a function that takes only a single argument (the raw
+event, as a `Message`) and returns a bool; True if the event should be
+accepted into the stream. You can create a function that adapts this
+signature to accept configuration parameters:
+
+.. code:: python
+
+   def job_filter(job_id: str) -> EventFilter:
+       def filter(event: Message) -> bool:
+           return event['data']['id'] == job_id
+       return filter
+
+   with qmp.listener('JOB_STATUS_CHANGE', job_filter('job2')) as listener:
+       await qmp.execute('blockdev-backup', arguments={'job-id': 'job2', ...})
+       async for event in listener:
+           if event['data']['status'] == 'concluded':
+               break
+
+
+Activating an existing listener with `listen()`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Listeners with complex, long configurations can also be created manually
+and activated temporarily by using `listen()` instead of `listener()`:
+
+.. code:: python
+
+   listener = EventListener(('BLOCK_JOB_COMPLETED', 'BLOCK_JOB_CANCELLED',
+                             'BLOCK_JOB_ERROR', 'BLOCK_JOB_READY',
+                             'BLOCK_JOB_PENDING', 'JOB_STATUS_CHANGE'))
+
+   with qmp.listen(listener):
+       await qmp.execute('blockdev-backup', arguments={'job-id': 'job3', ...})
+       async for event in listener:
+           print(event)
+           if event['event'] == 'BLOCK_JOB_COMPLETED':
+               break
+
+Any events that are not witnessed by the time the block is left will be
+cleared from the queue; entering the block is an implicit
+`register_listener()` and leaving the block is an implicit
+`remove_listener()`.
+
+
+Activating multiple existing listeners with `listen()`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+While `listener()` is only capable of creating a single listener,
+`listen()` is capable of activating multiple listeners simultaneously:
+
+.. code:: python
+
+   def job_filter(job_id: str) -> EventFilter:
+       def filter(event: Message) -> bool:
+           return event['data']['id'] == job_id
+       return filter
+
+   jobA = EventListener('JOB_STATUS_CHANGE', job_filter('jobA'))
+   jobB = EventListener('JOB_STATUS_CHANGE', job_filter('jobB'))
+
+   with qmp.listen(jobA, jobB):
+       qmp.execute('blockdev-create', arguments={'job-id': 'jobA', ...})
+       qmp.execute('blockdev-create', arguments={'job-id': 'jobB', ...})
+
+       async for event in jobA.get():
+           if event['data']['status'] == 'concluded':
+               break
+       async for event in jobB.get():
+           if event['data']['status'] == 'concluded':
+               break
+
+
+Extending the `EventListener` class
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In the case that a more specialized `EventListener` is desired to
+provide either more functionality or more compact syntax for specialized
+cases, it can be extended.
+
+One of the key methods to extend or override is
+:py:meth:`~EventListener.accept()`. The default implementation checks an
+incoming message for:
+
+1. A qualifying name, if any :py:obj:`~EventListener.names` were
+   specified at initialization time
+2. That :py:obj:`~EventListener.event_filter()` returns True.
+
+This can be modified however you see fit to change the criteria for
+inclusion in the stream.
+
+For convenience, a ``JobListener`` class could be created that simply
+bakes in configuration so it does not need to be repeated:
+
+.. code:: python
+
+   class JobListener(EventListener):
+       def __init__(self, job_id: str):
+           super().__init__(('BLOCK_JOB_COMPLETED', 'BLOCK_JOB_CANCELLED',
+                             'BLOCK_JOB_ERROR', 'BLOCK_JOB_READY',
+                             'BLOCK_JOB_PENDING', 'JOB_STATUS_CHANGE'))
+           self.job_id = job_id
+
+       def accept(self, event) -> bool:
+           if not super().accept(event):
+               return False
+           if event['event'] in ('BLOCK_JOB_PENDING', 'JOB_STATUS_CHANGE'):
+               return event['data']['id'] == job_id
+           return event['data']['device'] == job_id
+
+From here on out, you can conjure up a custom-purpose listener that
+listens only for job-related events for a specific job-id easily:
+
+.. code:: python
+
+   listener = JobListener('job4')
+   with qmp.listener(listener):
+       await qmp.execute('blockdev-backup', arguments={'job-id': 'job4', ...})
+       async for event in listener:
+           print(event)
+           if event['event'] == 'BLOCK_JOB_COMPLETED':
+               break
+
+
+Experimental Interfaces & Design Issues
+---------------------------------------
+
+These interfaces are not ones I am sure I will keep or otherwise modify
+heavily.
+
+qmp.listener()’s type signature
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+`listener()` does not return anything, because it was assumed the caller
+already had a handle to the listener. However, for
+``qmp.listener(EventListener())`` forms, the caller will not have saved
+a handle to the listener.
+
+Because this function can accept *many* listeners, I found it hard to
+accurately type in a way where it could be used in both “one” or “many”
+forms conveniently and in a statically type-safe manner.
+
+Ultimately, I removed the return altogether, but perhaps with more time
+I can work out a way to re-add it.
+
+
+API Reference
+-------------
+
+"""
+
+import asyncio
+from contextlib import contextmanager
+import logging
+from typing import (
+    AsyncIterator,
+    Callable,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Set,
+    Tuple,
+    Union,
+)
+
+from .error import QMPError
+from .message import Message
+
+
+EventNames = Union[str, Iterable[str], None]
+EventFilter = Callable[[Message], bool]
+
+
+class ListenerError(QMPError):
+    """
+    Generic error class for `EventListener`-related problems.
+    """
+
+
+class EventListener:
+    """
+    Selectively listens for events with runtime configurable filtering.
+
+    This class is designed to be directly usable for the most common cases,
+    but it can be extended to provide more rigorous control.
+
+    :param names:
+        One or more names of events to listen for.
+        When not provided, listen for ALL events.
+    :param event_filter:
+        An optional event filtering function.
+        When names are also provided, this acts as a secondary filter.
+
+    When ``names`` and ``event_filter`` are both provided, the names
+    will be filtered first, and then the filter function will be called
+    second. The event filter function can assume that the format of the
+    event is a known format.
+    """
+    def __init__(
+        self,
+        names: EventNames = None,
+        event_filter: Optional[EventFilter] = None,
+    ):
+        # Queue of 'heard' events yet to be witnessed by a caller.
+        self._queue: 'asyncio.Queue[Message]' = asyncio.Queue()
+
+        # Intended as a historical record, NOT a processing queue or backlog.
+        self._history: List[Message] = []
+
+        #: Primary event filter, based on one or more event names.
+        self.names: Set[str] = set()
+        if isinstance(names, str):
+            self.names.add(names)
+        elif names is not None:
+            self.names.update(names)
+
+        #: Optional, secondary event filter.
+        self.event_filter: Optional[EventFilter] = event_filter
+
+    @property
+    def history(self) -> Tuple[Message, ...]:
+        """
+        A read-only history of all events seen so far.
+
+        This represents *every* event, including those not yet witnessed
+        via `get()` or ``async for``. It persists between `clear()`
+        calls and is immutable.
+        """
+        return tuple(self._history)
+
+    def accept(self, event: Message) -> bool:
+        """
+        Determine if this listener accepts this event.
+
+        This method determines which events will appear in the stream.
+        The default implementation simply checks the event against the
+        list of names and the event_filter to decide if this
+        `EventListener` accepts a given event. It can be
+        overridden/extended to provide custom listener behavior.
+
+        User code is not expected to need to invoke this method.
+
+        :param event: The event under consideration.
+        :return: `True`, if this listener accepts this event.
+        """
+        name_ok = (not self.names) or (event['event'] in self.names)
+        return name_ok and (
+            (not self.event_filter) or self.event_filter(event)
+        )
+
+    async def put(self, event: Message) -> None:
+        """
+        Conditionally put a new event into the FIFO queue.
+
+        This method is not designed to be invoked from user code, and it
+        should not need to be overridden. It is a public interface so
+        that `QMPClient` has an interface by which it can inform
+        registered listeners of new events.
+
+        The event will be put into the queue if
+        :py:meth:`~EventListener.accept()` returns `True`.
+
+        :param event: The new event to put into the FIFO queue.
+        """
+        if not self.accept(event):
+            return
+
+        self._history.append(event)
+        await self._queue.put(event)
+
+    async def get(self) -> Message:
+        """
+        Wait for the very next event in this stream.
+
+        If one is already available, return that one.
+        """
+        return await self._queue.get()
+
+    def empty(self) -> bool:
+        """
+        Return `True` if there are no pending events.
+        """
+        return self._queue.empty()
+
+    def clear(self) -> List[Message]:
+        """
+        Clear this listener of all pending events.
+
+        Called when an `EventListener` is being unregistered, this clears the
+        pending FIFO queue synchronously. It can be also be used to
+        manually clear any pending events, if desired.
+
+        :return: The cleared events, if any.
+
+        .. warning::
+            Take care when discarding events. Cleared events will be
+            silently tossed on the floor. All events that were ever
+            accepted by this listener are visible in `history()`.
+        """
+        events = []
+        while True:
+            try:
+                events.append(self._queue.get_nowait())
+            except asyncio.QueueEmpty:
+                break
+
+        return events
+
+    def __aiter__(self) -> AsyncIterator[Message]:
+        return self
+
+    async def __anext__(self) -> Message:
+        """
+        Enables the `EventListener` to function as an async iterator.
+
+        It may be used like this:
+
+        .. code:: python
+
+            async for event in listener:
+                print(event)
+
+        These iterators will never terminate of their own accord; you
+        must provide break conditions or otherwise prepare to run them
+        in an `asyncio.Task` that can be cancelled.
+        """
+        return await self.get()
+
+
+class Events:
+    """
+    Events is a mix-in class that adds event functionality to the QMP class.
+
+    It's designed specifically as a mix-in for `QMPClient`, and it
+    relies upon the class it is being mixed into having a 'logger'
+    property.
+    """
+    def __init__(self) -> None:
+        self._listeners: List[EventListener] = []
+
+        #: Default, all-events `EventListener`.
+        self.events: EventListener = EventListener()
+        self.register_listener(self.events)
+
+        # Parent class needs to have a logger
+        self.logger: logging.Logger
+
+    async def _event_dispatch(self, msg: Message) -> None:
+        """
+        Given a new event, propagate it to all of the active listeners.
+
+        :param msg: The event to propagate.
+        """
+        for listener in self._listeners:
+            await listener.put(msg)
+
+    def register_listener(self, listener: EventListener) -> None:
+        """
+        Register and activate an `EventListener`.
+
+        :param listener: The listener to activate.
+        :raise ListenerError: If the given listener is already registered.
+        """
+        if listener in self._listeners:
+            raise ListenerError("Attempted to re-register existing listener")
+        self.logger.debug("Registering %s.", str(listener))
+        self._listeners.append(listener)
+
+    def remove_listener(self, listener: EventListener) -> None:
+        """
+        Unregister and deactivate an `EventListener`.
+
+        The removed listener will have its pending events cleared via
+        `clear()`. The listener can be re-registered later when
+        desired.
+
+        :param listener: The listener to deactivate.
+        :raise ListenerError: If the given listener is not registered.
+        """
+        if listener == self.events:
+            raise ListenerError("Cannot remove the default listener.")
+        self.logger.debug("Removing %s.", str(listener))
+        listener.clear()
+        self._listeners.remove(listener)
+
+    @contextmanager
+    def listen(self, *listeners: EventListener) -> Iterator[None]:
+        r"""
+        Context manager: Temporarily listen with an `EventListener`.
+
+        Accepts one or more `EventListener` objects and registers them,
+        activating them for the duration of the context block.
+
+        `EventListener` objects will have any pending events in their
+        FIFO queue cleared upon exiting the context block, when they are
+        deactivated.
+
+        :param \*listeners: One or more EventListeners to activate.
+        :raise ListenerError: If the given listener(s) are already active.
+        """
+        _added = []
+
+        try:
+            for listener in listeners:
+                self.register_listener(listener)
+                _added.append(listener)
+
+            yield
+
+        finally:
+            for listener in _added:
+                self.remove_listener(listener)
+
+    @contextmanager
+    def listener(
+        self,
+        names: EventNames = (),
+        event_filter: Optional[EventFilter] = None
+    ) -> Iterator[EventListener]:
+        """
+        Context manager: Temporarily listen with a new `EventListener`.
+
+        Creates an `EventListener` object and registers it, activating
+        it for the duration of the context block.
+
+        :param names:
+            One or more names of events to listen for.
+            When not provided, listen for ALL events.
+        :param event_filter:
+            An optional event filtering function.
+            When names are also provided, this acts as a secondary filter.
+
+        :return: The newly created and active `EventListener`.
+        """
+        listener = EventListener(names, event_filter)
+        with self.listen(listener):
+            yield listener
diff --git a/python/qemu/qmp/legacy.py b/python/qemu/qmp/legacy.py
new file mode 100644 (file)
index 0000000..22a2b56
--- /dev/null
@@ -0,0 +1,319 @@
+"""
+(Legacy) Sync QMP Wrapper
+
+This module provides the `QEMUMonitorProtocol` class, which is a
+synchronous wrapper around `QMPClient`.
+
+Its design closely resembles that of the original QEMUMonitorProtocol
+class, originally written by Luiz Capitulino. It is provided here for
+compatibility with scripts inside the QEMU source tree that expect the
+old interface.
+"""
+
+#
+# Copyright (C) 2009-2022 Red Hat Inc.
+#
+# Authors:
+#  Luiz Capitulino <lcapitulino@redhat.com>
+#  John Snow <jsnow@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+import asyncio
+import socket
+from types import TracebackType
+from typing import (
+    Any,
+    Awaitable,
+    Dict,
+    List,
+    Optional,
+    Type,
+    TypeVar,
+    Union,
+)
+
+from .error import QMPError
+from .protocol import Runstate, SocketAddrT
+from .qmp_client import QMPClient
+
+
+#: QMPMessage is an entire QMP message of any kind.
+QMPMessage = Dict[str, Any]
+
+#: QMPReturnValue is the 'return' value of a command.
+QMPReturnValue = object
+
+#: QMPObject is any object in a QMP message.
+QMPObject = Dict[str, object]
+
+# QMPMessage can be outgoing commands or incoming events/returns.
+# QMPReturnValue is usually a dict/json object, but due to QAPI's
+# 'command-returns-exceptions', it can actually be anything.
+#
+# {'return': {}} is a QMPMessage,
+# {} is the QMPReturnValue.
+
+
+class QMPBadPortError(QMPError):
+    """
+    Unable to parse socket address: Port was non-numerical.
+    """
+
+
+class QEMUMonitorProtocol:
+    """
+    Provide an API to connect to QEMU via QEMU Monitor Protocol (QMP)
+    and then allow to handle commands and events.
+
+    :param address:  QEMU address, can be a unix socket path (string), a tuple
+                     in the form ( address, port ) for a TCP connection, or an
+                     existing `socket.socket` object.
+    :param server:   Act as the socket server. (See 'accept')
+                     Not applicable when passing a socket directly.
+    :param nickname: Optional nickname used for logging.
+    """
+
+    def __init__(self,
+                 address: Union[SocketAddrT, socket.socket],
+                 server: bool = False,
+                 nickname: Optional[str] = None):
+
+        if server and isinstance(address, socket.socket):
+            raise ValueError(
+                "server argument should be False when passing a socket")
+
+        self._qmp = QMPClient(nickname)
+        self._aloop = asyncio.get_event_loop()
+        self._address = address
+        self._timeout: Optional[float] = None
+
+        if server:
+            assert not isinstance(self._address, socket.socket)
+            self._sync(self._qmp.start_server(self._address))
+
+    _T = TypeVar('_T')
+
+    def _sync(
+            self, future: Awaitable[_T], timeout: Optional[float] = None
+    ) -> _T:
+        return self._aloop.run_until_complete(
+            asyncio.wait_for(future, timeout=timeout)
+        )
+
+    def _get_greeting(self) -> Optional[QMPMessage]:
+        if self._qmp.greeting is not None:
+            # pylint: disable=protected-access
+            return self._qmp.greeting._asdict()
+        return None
+
+    def __enter__(self: _T) -> _T:
+        # Implement context manager enter function.
+        return self
+
+    def __exit__(self,
+                 exc_type: Optional[Type[BaseException]],
+                 exc_val: Optional[BaseException],
+                 exc_tb: Optional[TracebackType]) -> None:
+        # Implement context manager exit function.
+        self.close()
+
+    @classmethod
+    def parse_address(cls, address: str) -> SocketAddrT:
+        """
+        Parse a string into a QMP address.
+
+        Figure out if the argument is in the port:host form.
+        If it's not, it's probably a file path.
+        """
+        components = address.split(':')
+        if len(components) == 2:
+            try:
+                port = int(components[1])
+            except ValueError:
+                msg = f"Bad port: '{components[1]}' in '{address}'."
+                raise QMPBadPortError(msg) from None
+            return (components[0], port)
+
+        # Treat as filepath.
+        return address
+
+    def connect(self, negotiate: bool = True) -> Optional[QMPMessage]:
+        """
+        Connect to the QMP Monitor and perform capabilities negotiation.
+
+        :return: QMP greeting dict, or None if negotiate is false
+        :raise ConnectError: on connection errors
+        """
+        self._qmp.await_greeting = negotiate
+        self._qmp.negotiate = negotiate
+
+        self._sync(
+            self._qmp.connect(self._address)
+        )
+        return self._get_greeting()
+
+    def accept(self, timeout: Optional[float] = 15.0) -> QMPMessage:
+        """
+        Await connection from QMP Monitor and perform capabilities negotiation.
+
+        :param timeout:
+            timeout in seconds (nonnegative float number, or None).
+            If None, there is no timeout, and this may block forever.
+
+        :return: QMP greeting dict
+        :raise ConnectError: on connection errors
+        """
+        self._qmp.await_greeting = True
+        self._qmp.negotiate = True
+
+        self._sync(self._qmp.accept(), timeout)
+
+        ret = self._get_greeting()
+        assert ret is not None
+        return ret
+
+    def cmd_obj(self, qmp_cmd: QMPMessage) -> QMPMessage:
+        """
+        Send a QMP command to the QMP Monitor.
+
+        :param qmp_cmd: QMP command to be sent as a Python dict
+        :return: QMP response as a Python dict
+        """
+        return dict(
+            self._sync(
+                # pylint: disable=protected-access
+
+                # _raw() isn't a public API, because turning off
+                # automatic ID assignment is discouraged. For
+                # compatibility with iotests *only*, do it anyway.
+                self._qmp._raw(qmp_cmd, assign_id=False),
+                self._timeout
+            )
+        )
+
+    def cmd_raw(self, name: str,
+                args: Optional[Dict[str, object]] = None) -> QMPMessage:
+        """
+        Build a QMP command and send it to the QMP Monitor.
+
+        :param name: command name (string)
+        :param args: command arguments (dict)
+        """
+        qmp_cmd: QMPMessage = {'execute': name}
+        if args:
+            qmp_cmd['arguments'] = args
+        return self.cmd_obj(qmp_cmd)
+
+    def cmd(self, cmd: str, **kwds: object) -> QMPReturnValue:
+        """
+        Build and send a QMP command to the monitor, report errors if any
+        """
+        return self._sync(
+            self._qmp.execute(cmd, kwds),
+            self._timeout
+        )
+
+    def pull_event(self,
+                   wait: Union[bool, float] = False) -> Optional[QMPMessage]:
+        """
+        Pulls a single event.
+
+        :param wait:
+            If False or 0, do not wait. Return None if no events ready.
+            If True, wait forever until the next event.
+            Otherwise, wait for the specified number of seconds.
+
+        :raise asyncio.TimeoutError:
+            When a timeout is requested and the timeout period elapses.
+
+        :return: The first available QMP event, or None.
+        """
+        if not wait:
+            # wait is False/0: "do not wait, do not except."
+            if self._qmp.events.empty():
+                return None
+
+        # If wait is 'True', wait forever. If wait is False/0, the events
+        # queue must not be empty; but it still needs some real amount
+        # of time to complete.
+        timeout = None
+        if wait and isinstance(wait, float):
+            timeout = wait
+
+        return dict(
+            self._sync(
+                self._qmp.events.get(),
+                timeout
+            )
+        )
+
+    def get_events(self, wait: Union[bool, float] = False) -> List[QMPMessage]:
+        """
+        Get a list of QMP events and clear all pending events.
+
+        :param wait:
+            If False or 0, do not wait. Return None if no events ready.
+            If True, wait until we have at least one event.
+            Otherwise, wait for up to the specified number of seconds for at
+            least one event.
+
+        :raise asyncio.TimeoutError:
+            When a timeout is requested and the timeout period elapses.
+
+        :return: A list of QMP events.
+        """
+        events = [dict(x) for x in self._qmp.events.clear()]
+        if events:
+            return events
+
+        event = self.pull_event(wait)
+        return [event] if event is not None else []
+
+    def clear_events(self) -> None:
+        """Clear current list of pending events."""
+        self._qmp.events.clear()
+
+    def close(self) -> None:
+        """Close the connection."""
+        self._sync(
+            self._qmp.disconnect()
+        )
+
+    def settimeout(self, timeout: Optional[float]) -> None:
+        """
+        Set the timeout for QMP RPC execution.
+
+        This timeout affects the `cmd`, `cmd_obj`, and `command` methods.
+        The `accept`, `pull_event` and `get_event` methods have their
+        own configurable timeouts.
+
+        :param timeout:
+            timeout in seconds, or None.
+            None will wait indefinitely.
+        """
+        self._timeout = timeout
+
+    def send_fd_scm(self, fd: int) -> None:
+        """
+        Send a file descriptor to the remote via SCM_RIGHTS.
+        """
+        self._qmp.send_fd_scm(fd)
+
+    def __del__(self) -> None:
+        if self._qmp.runstate == Runstate.IDLE:
+            return
+
+        if not self._aloop.is_running():
+            self.close()
+        else:
+            # Garbage collection ran while the event loop was running.
+            # Nothing we can do about it now, but if we don't raise our
+            # own error, the user will be treated to a lot of traceback
+            # they might not understand.
+            raise QMPError(
+                "QEMUMonitorProtocol.close()"
+                " was not called before object was garbage collected"
+            )
diff --git a/python/qemu/qmp/message.py b/python/qemu/qmp/message.py
new file mode 100644 (file)
index 0000000..f76ccc9
--- /dev/null
@@ -0,0 +1,209 @@
+"""
+QMP Message Format
+
+This module provides the `Message` class, which represents a single QMP
+message sent to or from the server.
+"""
+
+import json
+from json import JSONDecodeError
+from typing import (
+    Dict,
+    Iterator,
+    Mapping,
+    MutableMapping,
+    Optional,
+    Union,
+)
+
+from .error import ProtocolError
+
+
+class Message(MutableMapping[str, object]):
+    """
+    Represents a single QMP protocol message.
+
+    QMP uses JSON objects as its basic communicative unit; so this
+    Python object is a :py:obj:`~collections.abc.MutableMapping`. It may
+    be instantiated from either another mapping (like a `dict`), or from
+    raw `bytes` that still need to be deserialized.
+
+    Once instantiated, it may be treated like any other MutableMapping::
+
+        >>> msg = Message(b'{"hello": "world"}')
+        >>> assert msg['hello'] == 'world'
+        >>> msg['id'] = 'foobar'
+        >>> print(msg)
+        {
+          "hello": "world",
+          "id": "foobar"
+        }
+
+    It can be converted to `bytes`::
+
+        >>> msg = Message({"hello": "world"})
+        >>> print(bytes(msg))
+        b'{"hello":"world","id":"foobar"}'
+
+    Or back into a garden-variety `dict`::
+
+       >>> dict(msg)
+       {'hello': 'world'}
+
+
+    :param value: Initial value, if any.
+    :param eager:
+        When `True`, attempt to serialize or deserialize the initial value
+        immediately, so that conversion exceptions are raised during
+        the call to ``__init__()``.
+    """
+    # pylint: disable=too-many-ancestors
+
+    def __init__(self,
+                 value: Union[bytes, Mapping[str, object]] = b'{}', *,
+                 eager: bool = True):
+        self._data: Optional[bytes] = None
+        self._obj: Optional[Dict[str, object]] = None
+
+        if isinstance(value, bytes):
+            self._data = value
+            if eager:
+                self._obj = self._deserialize(self._data)
+        else:
+            self._obj = dict(value)
+            if eager:
+                self._data = self._serialize(self._obj)
+
+    # Methods necessary to implement the MutableMapping interface, see:
+    # https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableMapping
+
+    # We get pop, popitem, clear, update, setdefault, __contains__,
+    # keys, items, values, get, __eq__ and __ne__ for free.
+
+    def __getitem__(self, key: str) -> object:
+        return self._object[key]
+
+    def __setitem__(self, key: str, value: object) -> None:
+        self._object[key] = value
+        self._data = None
+
+    def __delitem__(self, key: str) -> None:
+        del self._object[key]
+        self._data = None
+
+    def __iter__(self) -> Iterator[str]:
+        return iter(self._object)
+
+    def __len__(self) -> int:
+        return len(self._object)
+
+    # Dunder methods not related to MutableMapping:
+
+    def __repr__(self) -> str:
+        if self._obj is not None:
+            return f"Message({self._object!r})"
+        return f"Message({bytes(self)!r})"
+
+    def __str__(self) -> str:
+        """Pretty-printed representation of this QMP message."""
+        return json.dumps(self._object, indent=2)
+
+    def __bytes__(self) -> bytes:
+        """bytes representing this QMP message."""
+        if self._data is None:
+            self._data = self._serialize(self._obj or {})
+        return self._data
+
+    # Conversion Methods
+
+    @property
+    def _object(self) -> Dict[str, object]:
+        """
+        A `dict` representing this QMP message.
+
+        Generated on-demand, if required. This property is private
+        because it returns an object that could be used to invalidate
+        the internal state of the `Message` object.
+        """
+        if self._obj is None:
+            self._obj = self._deserialize(self._data or b'{}')
+        return self._obj
+
+    @classmethod
+    def _serialize(cls, value: object) -> bytes:
+        """
+        Serialize a JSON object as `bytes`.
+
+        :raise ValueError: When the object cannot be serialized.
+        :raise TypeError: When the object cannot be serialized.
+
+        :return: `bytes` ready to be sent over the wire.
+        """
+        return json.dumps(value, separators=(',', ':')).encode('utf-8')
+
+    @classmethod
+    def _deserialize(cls, data: bytes) -> Dict[str, object]:
+        """
+        Deserialize JSON `bytes` into a native Python `dict`.
+
+        :raise DeserializationError:
+            If JSON deserialization fails for any reason.
+        :raise UnexpectedTypeError:
+            If the data does not represent a JSON object.
+
+        :return: A `dict` representing this QMP message.
+        """
+        try:
+            obj = json.loads(data)
+        except JSONDecodeError as err:
+            emsg = "Failed to deserialize QMP message."
+            raise DeserializationError(emsg, data) from err
+        if not isinstance(obj, dict):
+            raise UnexpectedTypeError(
+                "QMP message is not a JSON object.",
+                obj
+            )
+        return obj
+
+
+class DeserializationError(ProtocolError):
+    """
+    A QMP message was not understood as JSON.
+
+    When this Exception is raised, ``__cause__`` will be set to the
+    `json.JSONDecodeError` Exception, which can be interrogated for
+    further details.
+
+    :param error_message: Human-readable string describing the error.
+    :param raw: The raw `bytes` that prompted the failure.
+    """
+    def __init__(self, error_message: str, raw: bytes):
+        super().__init__(error_message)
+        #: The raw `bytes` that were not understood as JSON.
+        self.raw: bytes = raw
+
+    def __str__(self) -> str:
+        return "\n".join([
+            super().__str__(),
+            f"  raw bytes were: {str(self.raw)}",
+        ])
+
+
+class UnexpectedTypeError(ProtocolError):
+    """
+    A QMP message was JSON, but not a JSON object.
+
+    :param error_message: Human-readable string describing the error.
+    :param value: The deserialized JSON value that wasn't an object.
+    """
+    def __init__(self, error_message: str, value: object):
+        super().__init__(error_message)
+        #: The JSON value that was expected to be an object.
+        self.value: object = value
+
+    def __str__(self) -> str:
+        strval = json.dumps(self.value, indent=2)
+        return "\n".join([
+            super().__str__(),
+            f"  json value was: {strval}",
+        ])
diff --git a/python/qemu/qmp/models.py b/python/qemu/qmp/models.py
new file mode 100644 (file)
index 0000000..da52848
--- /dev/null
@@ -0,0 +1,146 @@
+"""
+QMP Data Models
+
+This module provides simplistic data classes that represent the few
+structures that the QMP spec mandates; they are used to verify incoming
+data to make sure it conforms to spec.
+"""
+# pylint: disable=too-few-public-methods
+
+from collections import abc
+import copy
+from typing import (
+    Any,
+    Dict,
+    Mapping,
+    Optional,
+    Sequence,
+)
+
+
+class Model:
+    """
+    Abstract data model, representing some QMP object of some kind.
+
+    :param raw: The raw object to be validated.
+    :raise KeyError: If any required fields are absent.
+    :raise TypeError: If any required fields have the wrong type.
+    """
+    def __init__(self, raw: Mapping[str, Any]):
+        self._raw = raw
+
+    def _check_key(self, key: str) -> None:
+        if key not in self._raw:
+            raise KeyError(f"'{self._name}' object requires '{key}' member")
+
+    def _check_value(self, key: str, type_: type, typestr: str) -> None:
+        assert key in self._raw
+        if not isinstance(self._raw[key], type_):
+            raise TypeError(
+                f"'{self._name}' member '{key}' must be a {typestr}"
+            )
+
+    def _check_member(self, key: str, type_: type, typestr: str) -> None:
+        self._check_key(key)
+        self._check_value(key, type_, typestr)
+
+    @property
+    def _name(self) -> str:
+        return type(self).__name__
+
+    def __repr__(self) -> str:
+        return f"{self._name}({self._raw!r})"
+
+
+class Greeting(Model):
+    """
+    Defined in qmp-spec.rst, section "Server Greeting".
+
+    :param raw: The raw Greeting object.
+    :raise KeyError: If any required fields are absent.
+    :raise TypeError: If any required fields have the wrong type.
+    """
+    def __init__(self, raw: Mapping[str, Any]):
+        super().__init__(raw)
+        #: 'QMP' member
+        self.QMP: QMPGreeting  # pylint: disable=invalid-name
+
+        self._check_member('QMP', abc.Mapping, "JSON object")
+        self.QMP = QMPGreeting(self._raw['QMP'])
+
+    def _asdict(self) -> Dict[str, object]:
+        """
+        For compatibility with the iotests sync QMP wrapper.
+
+        The legacy QMP interface needs Greetings as a garden-variety Dict.
+
+        This interface is private in the hopes that it will be able to
+        be dropped again in the near-future. Caller beware!
+        """
+        return dict(copy.deepcopy(self._raw))
+
+
+class QMPGreeting(Model):
+    """
+    Defined in qmp-spec.rst, section "Server Greeting".
+
+    :param raw: The raw QMPGreeting object.
+    :raise KeyError: If any required fields are absent.
+    :raise TypeError: If any required fields have the wrong type.
+    """
+    def __init__(self, raw: Mapping[str, Any]):
+        super().__init__(raw)
+        #: 'version' member
+        self.version: Mapping[str, object]
+        #: 'capabilities' member
+        self.capabilities: Sequence[object]
+
+        self._check_member('version', abc.Mapping, "JSON object")
+        self.version = self._raw['version']
+
+        self._check_member('capabilities', abc.Sequence, "JSON array")
+        self.capabilities = self._raw['capabilities']
+
+
+class ErrorResponse(Model):
+    """
+    Defined in qmp-spec.rst, section "Error".
+
+    :param raw: The raw ErrorResponse object.
+    :raise KeyError: If any required fields are absent.
+    :raise TypeError: If any required fields have the wrong type.
+    """
+    def __init__(self, raw: Mapping[str, Any]):
+        super().__init__(raw)
+        #: 'error' member
+        self.error: ErrorInfo
+        #: 'id' member
+        self.id: Optional[object] = None  # pylint: disable=invalid-name
+
+        self._check_member('error', abc.Mapping, "JSON object")
+        self.error = ErrorInfo(self._raw['error'])
+
+        if 'id' in raw:
+            self.id = raw['id']
+
+
+class ErrorInfo(Model):
+    """
+    Defined in qmp-spec.rst, section "Error".
+
+    :param raw: The raw ErrorInfo object.
+    :raise KeyError: If any required fields are absent.
+    :raise TypeError: If any required fields have the wrong type.
+    """
+    def __init__(self, raw: Mapping[str, Any]):
+        super().__init__(raw)
+        #: 'class' member, with an underscore to avoid conflicts in Python.
+        self.class_: str
+        #: 'desc' member
+        self.desc: str
+
+        self._check_member('class', str, "string")
+        self.class_ = self._raw['class']
+
+        self._check_member('desc', str, "string")
+        self.desc = self._raw['desc']
diff --git a/python/qemu/qmp/protocol.py b/python/qemu/qmp/protocol.py
new file mode 100644 (file)
index 0000000..a4ffdfa
--- /dev/null
@@ -0,0 +1,1057 @@
+"""
+Generic Asynchronous Message-based Protocol Support
+
+This module provides a generic framework for sending and receiving
+messages over an asyncio stream. `AsyncProtocol` is an abstract class
+that implements the core mechanisms of a simple send/receive protocol,
+and is designed to be extended.
+
+In this package, it is used as the implementation for the `QMPClient`
+class.
+"""
+
+# It's all the docstrings ... ! It's long for a good reason ^_^;
+# pylint: disable=too-many-lines
+
+import asyncio
+from asyncio import StreamReader, StreamWriter
+from enum import Enum
+from functools import wraps
+import logging
+import socket
+from ssl import SSLContext
+from typing import (
+    Any,
+    Awaitable,
+    Callable,
+    Generic,
+    List,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+    cast,
+)
+
+from .error import QMPError
+from .util import (
+    bottom_half,
+    create_task,
+    exception_summary,
+    flush,
+    is_closing,
+    pretty_traceback,
+    upper_half,
+    wait_closed,
+)
+
+
+T = TypeVar('T')
+_U = TypeVar('_U')
+_TaskFN = Callable[[], Awaitable[None]]  # aka ``async def func() -> None``
+
+InternetAddrT = Tuple[str, int]
+UnixAddrT = str
+SocketAddrT = Union[UnixAddrT, InternetAddrT]
+
+
+class Runstate(Enum):
+    """Protocol session runstate."""
+
+    #: Fully quiesced and disconnected.
+    IDLE = 0
+    #: In the process of connecting or establishing a session.
+    CONNECTING = 1
+    #: Fully connected and active session.
+    RUNNING = 2
+    #: In the process of disconnecting.
+    #: Runstate may be returned to `IDLE` by calling `disconnect()`.
+    DISCONNECTING = 3
+
+
+class ConnectError(QMPError):
+    """
+    Raised when the initial connection process has failed.
+
+    This Exception always wraps a "root cause" exception that can be
+    interrogated for additional information.
+
+    :param error_message: Human-readable string describing the error.
+    :param exc: The root-cause exception.
+    """
+    def __init__(self, error_message: str, exc: Exception):
+        super().__init__(error_message)
+        #: Human-readable error string
+        self.error_message: str = error_message
+        #: Wrapped root cause exception
+        self.exc: Exception = exc
+
+    def __str__(self) -> str:
+        cause = str(self.exc)
+        if not cause:
+            # If there's no error string, use the exception name.
+            cause = exception_summary(self.exc)
+        return f"{self.error_message}: {cause}"
+
+
+class StateError(QMPError):
+    """
+    An API command (connect, execute, etc) was issued at an inappropriate time.
+
+    This error is raised when a command like
+    :py:meth:`~AsyncProtocol.connect()` is issued at an inappropriate
+    time.
+
+    :param error_message: Human-readable string describing the state violation.
+    :param state: The actual `Runstate` seen at the time of the violation.
+    :param required: The `Runstate` required to process this command.
+    """
+    def __init__(self, error_message: str,
+                 state: Runstate, required: Runstate):
+        super().__init__(error_message)
+        self.error_message = error_message
+        self.state = state
+        self.required = required
+
+
+F = TypeVar('F', bound=Callable[..., Any])  # pylint: disable=invalid-name
+
+
+# Don't Panic.
+def require(required_state: Runstate) -> Callable[[F], F]:
+    """
+    Decorator: protect a method so it can only be run in a certain `Runstate`.
+
+    :param required_state: The `Runstate` required to invoke this method.
+    :raise StateError: When the required `Runstate` is not met.
+    """
+    def _decorator(func: F) -> F:
+        # _decorator is the decorator that is built by calling the
+        # require() decorator factory; e.g.:
+        #
+        # @require(Runstate.IDLE) def foo(): ...
+        # will replace 'foo' with the result of '_decorator(foo)'.
+
+        @wraps(func)
+        def _wrapper(proto: 'AsyncProtocol[Any]',
+                     *args: Any, **kwargs: Any) -> Any:
+            # _wrapper is the function that gets executed prior to the
+            # decorated method.
+
+            name = type(proto).__name__
+
+            if proto.runstate != required_state:
+                if proto.runstate == Runstate.CONNECTING:
+                    emsg = f"{name} is currently connecting."
+                elif proto.runstate == Runstate.DISCONNECTING:
+                    emsg = (f"{name} is disconnecting."
+                            " Call disconnect() to return to IDLE state.")
+                elif proto.runstate == Runstate.RUNNING:
+                    emsg = f"{name} is already connected and running."
+                elif proto.runstate == Runstate.IDLE:
+                    emsg = f"{name} is disconnected and idle."
+                else:
+                    assert False
+                raise StateError(emsg, proto.runstate, required_state)
+            # No StateError, so call the wrapped method.
+            return func(proto, *args, **kwargs)
+
+        # Return the decorated method;
+        # Transforming Func to Decorated[Func].
+        return cast(F, _wrapper)
+
+    # Return the decorator instance from the decorator factory. Phew!
+    return _decorator
+
+
+class AsyncProtocol(Generic[T]):
+    """
+    AsyncProtocol implements a generic async message-based protocol.
+
+    This protocol assumes the basic unit of information transfer between
+    client and server is a "message", the details of which are left up
+    to the implementation. It assumes the sending and receiving of these
+    messages is full-duplex and not necessarily correlated; i.e. it
+    supports asynchronous inbound messages.
+
+    It is designed to be extended by a specific protocol which provides
+    the implementations for how to read and send messages. These must be
+    defined in `_do_recv()` and `_do_send()`, respectively.
+
+    Other callbacks have a default implementation, but are intended to be
+    either extended or overridden:
+
+     - `_establish_session`:
+         The base implementation starts the reader/writer tasks.
+         A protocol implementation can override this call, inserting
+         actions to be taken prior to starting the reader/writer tasks
+         before the super() call; actions needing to occur afterwards
+         can be written after the super() call.
+     - `_on_message`:
+         Actions to be performed when a message is received.
+     - `_cb_outbound`:
+         Logging/Filtering hook for all outbound messages.
+     - `_cb_inbound`:
+         Logging/Filtering hook for all inbound messages.
+         This hook runs *before* `_on_message()`.
+
+    :param name:
+        Name used for logging messages, if any. By default, messages
+        will log to 'qemu.qmp.protocol', but each individual connection
+        can be given its own logger by giving it a name; messages will
+        then log to 'qemu.qmp.protocol.${name}'.
+    """
+    # pylint: disable=too-many-instance-attributes
+
+    #: Logger object for debugging messages from this connection.
+    logger = logging.getLogger(__name__)
+
+    # Maximum allowable size of read buffer
+    _limit = 64 * 1024
+
+    # -------------------------
+    # Section: Public interface
+    # -------------------------
+
+    def __init__(self, name: Optional[str] = None) -> None:
+        #: The nickname for this connection, if any.
+        self.name: Optional[str] = name
+        if self.name is not None:
+            self.logger = self.logger.getChild(self.name)
+
+        # stream I/O
+        self._reader: Optional[StreamReader] = None
+        self._writer: Optional[StreamWriter] = None
+
+        # Outbound Message queue
+        self._outgoing: asyncio.Queue[T]
+
+        # Special, long-running tasks:
+        self._reader_task: Optional[asyncio.Future[None]] = None
+        self._writer_task: Optional[asyncio.Future[None]] = None
+
+        # Aggregate of the above two tasks, used for Exception management.
+        self._bh_tasks: Optional[asyncio.Future[Tuple[None, None]]] = None
+
+        #: Disconnect task. The disconnect implementation runs in a task
+        #: so that asynchronous disconnects (initiated by the
+        #: reader/writer) are allowed to wait for the reader/writers to
+        #: exit.
+        self._dc_task: Optional[asyncio.Future[None]] = None
+
+        self._runstate = Runstate.IDLE
+        self._runstate_changed: Optional[asyncio.Event] = None
+
+        # Server state for start_server() and _incoming()
+        self._server: Optional[asyncio.AbstractServer] = None
+        self._accepted: Optional[asyncio.Event] = None
+
+    def __repr__(self) -> str:
+        cls_name = type(self).__name__
+        tokens = []
+        if self.name is not None:
+            tokens.append(f"name={self.name!r}")
+        tokens.append(f"runstate={self.runstate.name}")
+        return f"<{cls_name} {' '.join(tokens)}>"
+
+    @property  # @upper_half
+    def runstate(self) -> Runstate:
+        """The current `Runstate` of the connection."""
+        return self._runstate
+
+    @upper_half
+    async def runstate_changed(self) -> Runstate:
+        """
+        Wait for the `runstate` to change, then return that runstate.
+        """
+        await self._runstate_event.wait()
+        return self.runstate
+
+    @upper_half
+    @require(Runstate.IDLE)
+    async def start_server_and_accept(
+            self, address: SocketAddrT,
+            ssl: Optional[SSLContext] = None
+    ) -> None:
+        """
+        Accept a connection and begin processing message queues.
+
+        If this call fails, `runstate` is guaranteed to be set back to `IDLE`.
+        This method is precisely equivalent to calling `start_server()`
+        followed by `accept()`.
+
+        :param address:
+            Address to listen on; UNIX socket path or TCP address/port.
+        :param ssl: SSL context to use, if any.
+
+        :raise StateError: When the `Runstate` is not `IDLE`.
+        :raise ConnectError:
+            When a connection or session cannot be established.
+
+            This exception will wrap a more concrete one. In most cases,
+            the wrapped exception will be `OSError` or `EOFError`. If a
+            protocol-level failure occurs while establishing a new
+            session, the wrapped error may also be an `QMPError`.
+        """
+        await self.start_server(address, ssl)
+        await self.accept()
+        assert self.runstate == Runstate.RUNNING
+
+    @upper_half
+    @require(Runstate.IDLE)
+    async def start_server(self, address: SocketAddrT,
+                           ssl: Optional[SSLContext] = None) -> None:
+        """
+        Start listening for an incoming connection, but do not wait for a peer.
+
+        This method starts listening for an incoming connection, but
+        does not block waiting for a peer. This call will return
+        immediately after binding and listening on a socket. A later
+        call to `accept()` must be made in order to finalize the
+        incoming connection.
+
+        :param address:
+            Address to listen on; UNIX socket path or TCP address/port.
+        :param ssl: SSL context to use, if any.
+
+        :raise StateError: When the `Runstate` is not `IDLE`.
+        :raise ConnectError:
+            When the server could not start listening on this address.
+
+            This exception will wrap a more concrete one. In most cases,
+            the wrapped exception will be `OSError`.
+        """
+        await self._session_guard(
+            self._do_start_server(address, ssl),
+            'Failed to establish connection')
+        assert self.runstate == Runstate.CONNECTING
+
+    @upper_half
+    @require(Runstate.CONNECTING)
+    async def accept(self) -> None:
+        """
+        Accept an incoming connection and begin processing message queues.
+
+        If this call fails, `runstate` is guaranteed to be set back to `IDLE`.
+
+        :raise StateError: When the `Runstate` is not `CONNECTING`.
+        :raise QMPError: When `start_server()` was not called yet.
+        :raise ConnectError:
+            When a connection or session cannot be established.
+
+            This exception will wrap a more concrete one. In most cases,
+            the wrapped exception will be `OSError` or `EOFError`. If a
+            protocol-level failure occurs while establishing a new
+            session, the wrapped error may also be an `QMPError`.
+        """
+        if self._accepted is None:
+            raise QMPError("Cannot call accept() before start_server().")
+        await self._session_guard(
+            self._do_accept(),
+            'Failed to establish connection')
+        await self._session_guard(
+            self._establish_session(),
+            'Failed to establish session')
+        assert self.runstate == Runstate.RUNNING
+
+    @upper_half
+    @require(Runstate.IDLE)
+    async def connect(self, address: Union[SocketAddrT, socket.socket],
+                      ssl: Optional[SSLContext] = None) -> None:
+        """
+        Connect to the server and begin processing message queues.
+
+        If this call fails, `runstate` is guaranteed to be set back to `IDLE`.
+
+        :param address:
+            Address to connect to; UNIX socket path or TCP address/port.
+        :param ssl: SSL context to use, if any.
+
+        :raise StateError: When the `Runstate` is not `IDLE`.
+        :raise ConnectError:
+            When a connection or session cannot be established.
+
+            This exception will wrap a more concrete one. In most cases,
+            the wrapped exception will be `OSError` or `EOFError`. If a
+            protocol-level failure occurs while establishing a new
+            session, the wrapped error may also be an `QMPError`.
+        """
+        await self._session_guard(
+            self._do_connect(address, ssl),
+            'Failed to establish connection')
+        await self._session_guard(
+            self._establish_session(),
+            'Failed to establish session')
+        assert self.runstate == Runstate.RUNNING
+
+    @upper_half
+    async def disconnect(self) -> None:
+        """
+        Disconnect and wait for all tasks to fully stop.
+
+        If there was an exception that caused the reader/writers to
+        terminate prematurely, it will be raised here.
+
+        :raise Exception: When the reader or writer terminate unexpectedly.
+        """
+        self.logger.debug("disconnect() called.")
+        self._schedule_disconnect()
+        await self._wait_disconnect()
+
+    # --------------------------
+    # Section: Session machinery
+    # --------------------------
+
+    async def _session_guard(self, coro: Awaitable[None], emsg: str) -> None:
+        """
+        Async guard function used to roll back to `IDLE` on any error.
+
+        On any Exception, the state machine will be reset back to
+        `IDLE`. Most Exceptions will be wrapped with `ConnectError`, but
+        `BaseException` events will be left alone (This includes
+        asyncio.CancelledError, even prior to Python 3.8).
+
+        :param error_message:
+            Human-readable string describing what connection phase failed.
+
+        :raise BaseException:
+            When `BaseException` occurs in the guarded block.
+        :raise ConnectError:
+            When any other error is encountered in the guarded block.
+        """
+        # Note: After Python 3.6 support is removed, this should be an
+        # @asynccontextmanager instead of accepting a callback.
+        try:
+            await coro
+        except BaseException as err:
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            try:
+                # Reset the runstate back to IDLE.
+                await self.disconnect()
+            except:
+                # We don't expect any Exceptions from the disconnect function
+                # here, because we failed to connect in the first place.
+                # The disconnect() function is intended to perform
+                # only cannot-fail cleanup here, but you never know.
+                emsg = (
+                    "Unexpected bottom half exception. "
+                    "This is a bug in the QMP library. "
+                    "Please report it to <qemu-devel@nongnu.org> and "
+                    "CC: John Snow <jsnow@redhat.com>."
+                )
+                self.logger.critical("%s:\n%s\n", emsg, pretty_traceback())
+                raise
+
+            # CancelledError is an Exception with special semantic meaning;
+            # We do NOT want to wrap it up under ConnectError.
+            # NB: CancelledError is not a BaseException before Python 3.8
+            if isinstance(err, asyncio.CancelledError):
+                raise
+
+            # Any other kind of error can be treated as some kind of connection
+            # failure broadly. Inspect the 'exc' field to explore the root
+            # cause in greater detail.
+            if isinstance(err, Exception):
+                raise ConnectError(emsg, err) from err
+
+            # Raise BaseExceptions un-wrapped, they're more important.
+            raise
+
+    @property
+    def _runstate_event(self) -> asyncio.Event:
+        # asyncio.Event() objects should not be created prior to entrance into
+        # an event loop, so we can ensure we create it in the correct context.
+        # Create it on-demand *only* at the behest of an 'async def' method.
+        if not self._runstate_changed:
+            self._runstate_changed = asyncio.Event()
+        return self._runstate_changed
+
+    @upper_half
+    @bottom_half
+    def _set_state(self, state: Runstate) -> None:
+        """
+        Change the `Runstate` of the protocol connection.
+
+        Signals the `runstate_changed` event.
+        """
+        if state == self._runstate:
+            return
+
+        self.logger.debug("Transitioning from '%s' to '%s'.",
+                          str(self._runstate), str(state))
+        self._runstate = state
+        self._runstate_event.set()
+        self._runstate_event.clear()
+
+    @bottom_half
+    async def _stop_server(self) -> None:
+        """
+        Stop listening for / accepting new incoming connections.
+        """
+        if self._server is None:
+            return
+
+        try:
+            self.logger.debug("Stopping server.")
+            self._server.close()
+            self.logger.debug("Server stopped.")
+        finally:
+            self._server = None
+
+    @bottom_half  # However, it does not run from the R/W tasks.
+    async def _incoming(self,
+                        reader: asyncio.StreamReader,
+                        writer: asyncio.StreamWriter) -> None:
+        """
+        Accept an incoming connection and signal the upper_half.
+
+        This method does the minimum necessary to accept a single
+        incoming connection. It signals back to the upper_half ASAP so
+        that any errors during session initialization can occur
+        naturally in the caller's stack.
+
+        :param reader: Incoming `asyncio.StreamReader`
+        :param writer: Incoming `asyncio.StreamWriter`
+        """
+        peer = writer.get_extra_info('peername', 'Unknown peer')
+        self.logger.debug("Incoming connection from %s", peer)
+
+        if self._reader or self._writer:
+            # Sadly, we can have more than one pending connection
+            # because of https://bugs.python.org/issue46715
+            # Close any extra connections we don't actually want.
+            self.logger.warning("Extraneous connection inadvertently accepted")
+            writer.close()
+            return
+
+        # A connection has been accepted; stop listening for new ones.
+        assert self._accepted is not None
+        await self._stop_server()
+        self._reader, self._writer = (reader, writer)
+        self._accepted.set()
+
+    @upper_half
+    async def _do_start_server(self, address: SocketAddrT,
+                               ssl: Optional[SSLContext] = None) -> None:
+        """
+        Start listening for an incoming connection, but do not wait for a peer.
+
+        This method starts listening for an incoming connection, but does not
+        block waiting for a peer. This call will return immediately after
+        binding and listening to a socket. A later call to accept() must be
+        made in order to finalize the incoming connection.
+
+        :param address:
+            Address to listen on; UNIX socket path or TCP address/port.
+        :param ssl: SSL context to use, if any.
+
+        :raise OSError: For stream-related errors.
+        """
+        assert self.runstate == Runstate.IDLE
+        self._set_state(Runstate.CONNECTING)
+
+        self.logger.debug("Awaiting connection on %s ...", address)
+        self._accepted = asyncio.Event()
+
+        if isinstance(address, tuple):
+            coro = asyncio.start_server(
+                self._incoming,
+                host=address[0],
+                port=address[1],
+                ssl=ssl,
+                backlog=1,
+                limit=self._limit,
+            )
+        else:
+            coro = asyncio.start_unix_server(
+                self._incoming,
+                path=address,
+                ssl=ssl,
+                backlog=1,
+                limit=self._limit,
+            )
+
+        # Allow runstate watchers to witness 'CONNECTING' state; some
+        # failures in the streaming layer are synchronous and will not
+        # otherwise yield.
+        await asyncio.sleep(0)
+
+        # This will start the server (bind(2), listen(2)). It will also
+        # call accept(2) if we yield, but we don't block on that here.
+        self._server = await coro
+        self.logger.debug("Server listening on %s", address)
+
+    @upper_half
+    async def _do_accept(self) -> None:
+        """
+        Wait for and accept an incoming connection.
+
+        Requires that we have not yet accepted an incoming connection
+        from the upper_half, but it's OK if the server is no longer
+        running because the bottom_half has already accepted the
+        connection.
+        """
+        assert self._accepted is not None
+        await self._accepted.wait()
+        assert self._server is None
+        self._accepted = None
+
+        self.logger.debug("Connection accepted.")
+
+    @upper_half
+    async def _do_connect(self, address: Union[SocketAddrT, socket.socket],
+                          ssl: Optional[SSLContext] = None) -> None:
+        """
+        Acting as the transport client, initiate a connection to a server.
+
+        :param address:
+            Address to connect to; UNIX socket path or TCP address/port.
+        :param ssl: SSL context to use, if any.
+
+        :raise OSError: For stream-related errors.
+        """
+        assert self.runstate == Runstate.IDLE
+        self._set_state(Runstate.CONNECTING)
+
+        # Allow runstate watchers to witness 'CONNECTING' state; some
+        # failures in the streaming layer are synchronous and will not
+        # otherwise yield.
+        await asyncio.sleep(0)
+
+        if isinstance(address, socket.socket):
+            self.logger.debug("Connecting with existing socket: "
+                              "fd=%d, family=%r, type=%r",
+                              address.fileno(), address.family, address.type)
+            connect = asyncio.open_connection(
+                limit=self._limit,
+                ssl=ssl,
+                sock=address,
+            )
+        elif isinstance(address, tuple):
+            self.logger.debug("Connecting to %s ...", address)
+            connect = asyncio.open_connection(
+                address[0],
+                address[1],
+                ssl=ssl,
+                limit=self._limit,
+            )
+        else:
+            self.logger.debug("Connecting to file://%s ...", address)
+            connect = asyncio.open_unix_connection(
+                path=address,
+                ssl=ssl,
+                limit=self._limit,
+            )
+
+        self._reader, self._writer = await connect
+        self.logger.debug("Connected.")
+
+    @upper_half
+    async def _establish_session(self) -> None:
+        """
+        Establish a new session.
+
+        Starts the readers/writer tasks; subclasses may perform their
+        own negotiations here. The Runstate will be RUNNING upon
+        successful conclusion.
+        """
+        assert self.runstate == Runstate.CONNECTING
+
+        self._outgoing = asyncio.Queue()
+
+        reader_coro = self._bh_loop_forever(self._bh_recv_message, 'Reader')
+        writer_coro = self._bh_loop_forever(self._bh_send_message, 'Writer')
+
+        self._reader_task = create_task(reader_coro)
+        self._writer_task = create_task(writer_coro)
+
+        self._bh_tasks = asyncio.gather(
+            self._reader_task,
+            self._writer_task,
+        )
+
+        self._set_state(Runstate.RUNNING)
+        await asyncio.sleep(0)  # Allow runstate_event to process
+
+    @upper_half
+    @bottom_half
+    def _schedule_disconnect(self) -> None:
+        """
+        Initiate a disconnect; idempotent.
+
+        This method is used both in the upper-half as a direct
+        consequence of `disconnect()`, and in the bottom-half in the
+        case of unhandled exceptions in the reader/writer tasks.
+
+        It can be invoked no matter what the `runstate` is.
+        """
+        if not self._dc_task:
+            self._set_state(Runstate.DISCONNECTING)
+            self.logger.debug("Scheduling disconnect.")
+            self._dc_task = create_task(self._bh_disconnect())
+
+    @upper_half
+    async def _wait_disconnect(self) -> None:
+        """
+        Waits for a previously scheduled disconnect to finish.
+
+        This method will gather any bottom half exceptions and re-raise
+        the one that occurred first; presuming it to be the root cause
+        of any subsequent Exceptions. It is intended to be used in the
+        upper half of the call chain.
+
+        :raise Exception:
+            Arbitrary exception re-raised on behalf of the reader/writer.
+        """
+        assert self.runstate == Runstate.DISCONNECTING
+        assert self._dc_task
+
+        aws: List[Awaitable[object]] = [self._dc_task]
+        if self._bh_tasks:
+            aws.insert(0, self._bh_tasks)
+        all_defined_tasks = asyncio.gather(*aws)
+
+        # Ensure disconnect is done; Exception (if any) is not raised here:
+        await asyncio.wait((self._dc_task,))
+
+        try:
+            await all_defined_tasks  # Raise Exceptions from the bottom half.
+        finally:
+            self._cleanup()
+            self._set_state(Runstate.IDLE)
+
+    @upper_half
+    def _cleanup(self) -> None:
+        """
+        Fully reset this object to a clean state and return to `IDLE`.
+        """
+        def _paranoid_task_erase(task: Optional['asyncio.Future[_U]']
+                                 ) -> Optional['asyncio.Future[_U]']:
+            # Help to erase a task, ENSURING it is fully quiesced first.
+            assert (task is None) or task.done()
+            return None if (task and task.done()) else task
+
+        assert self.runstate == Runstate.DISCONNECTING
+        self._dc_task = _paranoid_task_erase(self._dc_task)
+        self._reader_task = _paranoid_task_erase(self._reader_task)
+        self._writer_task = _paranoid_task_erase(self._writer_task)
+        self._bh_tasks = _paranoid_task_erase(self._bh_tasks)
+
+        self._reader = None
+        self._writer = None
+        self._accepted = None
+
+        # NB: _runstate_changed cannot be cleared because we still need it to
+        # send the final runstate changed event ...!
+
+    # ----------------------------
+    # Section: Bottom Half methods
+    # ----------------------------
+
+    @bottom_half
+    async def _bh_disconnect(self) -> None:
+        """
+        Disconnect and cancel all outstanding tasks.
+
+        It is designed to be called from its task context,
+        :py:obj:`~AsyncProtocol._dc_task`. By running in its own task,
+        it is free to wait on any pending actions that may still need to
+        occur in either the reader or writer tasks.
+        """
+        assert self.runstate == Runstate.DISCONNECTING
+
+        def _done(task: Optional['asyncio.Future[Any]']) -> bool:
+            return task is not None and task.done()
+
+        # If the server is running, stop it.
+        await self._stop_server()
+
+        # Are we already in an error pathway? If either of the tasks are
+        # already done, or if we have no tasks but a reader/writer; we
+        # must be.
+        #
+        # NB: We can't use _bh_tasks to check for premature task
+        # completion, because it may not yet have had a chance to run
+        # and gather itself.
+        tasks = tuple(filter(None, (self._writer_task, self._reader_task)))
+        error_pathway = _done(self._reader_task) or _done(self._writer_task)
+        if not tasks:
+            error_pathway |= bool(self._reader) or bool(self._writer)
+
+        try:
+            # Try to flush the writer, if possible.
+            # This *may* cause an error and force us over into the error path.
+            if not error_pathway:
+                await self._bh_flush_writer()
+        except BaseException as err:
+            error_pathway = True
+            emsg = "Failed to flush the writer"
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            raise
+        finally:
+            # Cancel any still-running tasks (Won't raise):
+            if self._writer_task is not None and not self._writer_task.done():
+                self.logger.debug("Cancelling writer task.")
+                self._writer_task.cancel()
+            if self._reader_task is not None and not self._reader_task.done():
+                self.logger.debug("Cancelling reader task.")
+                self._reader_task.cancel()
+
+            # Close out the tasks entirely (Won't raise):
+            if tasks:
+                self.logger.debug("Waiting for tasks to complete ...")
+                await asyncio.wait(tasks)
+
+            # Lastly, close the stream itself. (*May raise*!):
+            await self._bh_close_stream(error_pathway)
+            self.logger.debug("Disconnected.")
+
+    @bottom_half
+    async def _bh_flush_writer(self) -> None:
+        if not self._writer_task:
+            return
+
+        self.logger.debug("Draining the outbound queue ...")
+        await self._outgoing.join()
+        if self._writer is not None:
+            self.logger.debug("Flushing the StreamWriter ...")
+            await flush(self._writer)
+
+    @bottom_half
+    async def _bh_close_stream(self, error_pathway: bool = False) -> None:
+        # NB: Closing the writer also implicitly closes the reader.
+        if not self._writer:
+            return
+
+        if not is_closing(self._writer):
+            self.logger.debug("Closing StreamWriter.")
+            self._writer.close()
+
+        self.logger.debug("Waiting for StreamWriter to close ...")
+        try:
+            await wait_closed(self._writer)
+        except Exception:  # pylint: disable=broad-except
+            # It's hard to tell if the Stream is already closed or
+            # not. Even if one of the tasks has failed, it may have
+            # failed for a higher-layered protocol reason. The
+            # stream could still be open and perfectly fine.
+            # I don't know how to discern its health here.
+
+            if error_pathway:
+                # We already know that *something* went wrong. Let's
+                # just trust that the Exception we already have is the
+                # better one to present to the user, even if we don't
+                # genuinely *know* the relationship between the two.
+                self.logger.debug(
+                    "Discarding Exception from wait_closed:\n%s\n",
+                    pretty_traceback(),
+                )
+            else:
+                # Oops, this is a brand-new error!
+                raise
+        finally:
+            self.logger.debug("StreamWriter closed.")
+
+    @bottom_half
+    async def _bh_loop_forever(self, async_fn: _TaskFN, name: str) -> None:
+        """
+        Run one of the bottom-half methods in a loop forever.
+
+        If the bottom half ever raises any exception, schedule a
+        disconnect that will terminate the entire loop.
+
+        :param async_fn: The bottom-half method to run in a loop.
+        :param name: The name of this task, used for logging.
+        """
+        try:
+            while True:
+                await async_fn()
+        except asyncio.CancelledError:
+            # We have been cancelled by _bh_disconnect, exit gracefully.
+            self.logger.debug("Task.%s: cancelled.", name)
+            return
+        except BaseException as err:
+            self.logger.log(
+                logging.INFO if isinstance(err, EOFError) else logging.ERROR,
+                "Task.%s: %s",
+                name, exception_summary(err)
+            )
+            self.logger.debug("Task.%s: failure:\n%s\n",
+                              name, pretty_traceback())
+            self._schedule_disconnect()
+            raise
+        finally:
+            self.logger.debug("Task.%s: exiting.", name)
+
+    @bottom_half
+    async def _bh_send_message(self) -> None:
+        """
+        Wait for an outgoing message, then send it.
+
+        Designed to be run in `_bh_loop_forever()`.
+        """
+        msg = await self._outgoing.get()
+        try:
+            await self._send(msg)
+        finally:
+            self._outgoing.task_done()
+
+    @bottom_half
+    async def _bh_recv_message(self) -> None:
+        """
+        Wait for an incoming message and call `_on_message` to route it.
+
+        Designed to be run in `_bh_loop_forever()`.
+        """
+        msg = await self._recv()
+        await self._on_message(msg)
+
+    # --------------------
+    # Section: Message I/O
+    # --------------------
+
+    @upper_half
+    @bottom_half
+    def _cb_outbound(self, msg: T) -> T:
+        """
+        Callback: outbound message hook.
+
+        This is intended for subclasses to be able to add arbitrary
+        hooks to filter or manipulate outgoing messages. The base
+        implementation does nothing but log the message without any
+        manipulation of the message.
+
+        :param msg: raw outbound message
+        :return: final outbound message
+        """
+        self.logger.debug("--> %s", str(msg))
+        return msg
+
+    @upper_half
+    @bottom_half
+    def _cb_inbound(self, msg: T) -> T:
+        """
+        Callback: inbound message hook.
+
+        This is intended for subclasses to be able to add arbitrary
+        hooks to filter or manipulate incoming messages. The base
+        implementation does nothing but log the message without any
+        manipulation of the message.
+
+        This method does not "handle" incoming messages; it is a filter.
+        The actual "endpoint" for incoming messages is `_on_message()`.
+
+        :param msg: raw inbound message
+        :return: processed inbound message
+        """
+        self.logger.debug("<-- %s", str(msg))
+        return msg
+
+    @upper_half
+    @bottom_half
+    async def _readline(self) -> bytes:
+        """
+        Wait for a newline from the incoming reader.
+
+        This method is provided as a convenience for upper-layer
+        protocols, as many are line-based.
+
+        This method *may* return a sequence of bytes without a trailing
+        newline if EOF occurs, but *some* bytes were received. In this
+        case, the next call will raise `EOFError`. It is assumed that
+        the layer 5 protocol will decide if there is anything meaningful
+        to be done with a partial message.
+
+        :raise OSError: For stream-related errors.
+        :raise EOFError:
+            If the reader stream is at EOF and there are no bytes to return.
+        :return: bytes, including the newline.
+        """
+        assert self._reader is not None
+        msg_bytes = await self._reader.readline()
+
+        if not msg_bytes:
+            if self._reader.at_eof():
+                raise EOFError
+
+        return msg_bytes
+
+    @upper_half
+    @bottom_half
+    async def _do_recv(self) -> T:
+        """
+        Abstract: Read from the stream and return a message.
+
+        Very low-level; intended to only be called by `_recv()`.
+        """
+        raise NotImplementedError
+
+    @upper_half
+    @bottom_half
+    async def _recv(self) -> T:
+        """
+        Read an arbitrary protocol message.
+
+        .. warning::
+            This method is intended primarily for `_bh_recv_message()`
+            to use in an asynchronous task loop. Using it outside of
+            this loop will "steal" messages from the normal routing
+            mechanism. It is safe to use prior to `_establish_session()`,
+            but should not be used otherwise.
+
+        This method uses `_do_recv()` to retrieve the raw message, and
+        then transforms it using `_cb_inbound()`.
+
+        :return: A single (filtered, processed) protocol message.
+        """
+        message = await self._do_recv()
+        return self._cb_inbound(message)
+
+    @upper_half
+    @bottom_half
+    def _do_send(self, msg: T) -> None:
+        """
+        Abstract: Write a message to the stream.
+
+        Very low-level; intended to only be called by `_send()`.
+        """
+        raise NotImplementedError
+
+    @upper_half
+    @bottom_half
+    async def _send(self, msg: T) -> None:
+        """
+        Send an arbitrary protocol message.
+
+        This method will transform any outgoing messages according to
+        `_cb_outbound()`.
+
+        .. warning::
+            Like `_recv()`, this method is intended to be called by
+            the writer task loop that processes outgoing
+            messages. Calling it directly may circumvent logic
+            implemented by the caller meant to correlate outgoing and
+            incoming messages.
+
+        :raise OSError: For problems with the underlying stream.
+        """
+        msg = self._cb_outbound(msg)
+        self._do_send(msg)
+
+    @bottom_half
+    async def _on_message(self, msg: T) -> None:
+        """
+        Called to handle the receipt of a new message.
+
+        .. caution::
+            This is executed from within the reader loop, so be advised
+            that waiting on either the reader or writer task will lead
+            to deadlock. Additionally, any unhandled exceptions will
+            directly cause the loop to halt, so logic may be best-kept
+            to a minimum if at all possible.
+
+        :param msg: The incoming message, already logged/filtered.
+        """
+        # Nothing to do in the abstract case.
diff --git a/python/qemu/qmp/py.typed b/python/qemu/qmp/py.typed
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/python/qemu/qmp/qmp_client.py b/python/qemu/qmp/qmp_client.py
new file mode 100644 (file)
index 0000000..2a817f9
--- /dev/null
@@ -0,0 +1,655 @@
+"""
+QMP Protocol Implementation
+
+This module provides the `QMPClient` class, which can be used to connect
+and send commands to a QMP server such as QEMU. The QMP class can be
+used to either connect to a listening server, or used to listen and
+accept an incoming connection from that server.
+"""
+
+import asyncio
+import logging
+import socket
+import struct
+from typing import (
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    Union,
+    cast,
+)
+
+from .error import ProtocolError, QMPError
+from .events import Events
+from .message import Message
+from .models import ErrorResponse, Greeting
+from .protocol import AsyncProtocol, Runstate, require
+from .util import (
+    bottom_half,
+    exception_summary,
+    pretty_traceback,
+    upper_half,
+)
+
+
+class _WrappedProtocolError(ProtocolError):
+    """
+    Abstract exception class for Protocol errors that wrap an Exception.
+
+    :param error_message: Human-readable string describing the error.
+    :param exc: The root-cause exception.
+    """
+    def __init__(self, error_message: str, exc: Exception):
+        super().__init__(error_message)
+        self.exc = exc
+
+    def __str__(self) -> str:
+        return f"{self.error_message}: {self.exc!s}"
+
+
+class GreetingError(_WrappedProtocolError):
+    """
+    An exception occurred during the Greeting phase.
+
+    :param error_message: Human-readable string describing the error.
+    :param exc: The root-cause exception.
+    """
+
+
+class NegotiationError(_WrappedProtocolError):
+    """
+    An exception occurred during the Negotiation phase.
+
+    :param error_message: Human-readable string describing the error.
+    :param exc: The root-cause exception.
+    """
+
+
+class ExecuteError(QMPError):
+    """
+    Exception raised by `QMPClient.execute()` on RPC failure.
+
+    :param error_response: The RPC error response object.
+    :param sent: The sent RPC message that caused the failure.
+    :param received: The raw RPC error reply received.
+    """
+    def __init__(self, error_response: ErrorResponse,
+                 sent: Message, received: Message):
+        super().__init__(error_response.error.desc)
+        #: The sent `Message` that caused the failure
+        self.sent: Message = sent
+        #: The received `Message` that indicated failure
+        self.received: Message = received
+        #: The parsed error response
+        self.error: ErrorResponse = error_response
+        #: The QMP error class
+        self.error_class: str = error_response.error.class_
+
+
+class ExecInterruptedError(QMPError):
+    """
+    Exception raised by `execute()` (et al) when an RPC is interrupted.
+
+    This error is raised when an `execute()` statement could not be
+    completed.  This can occur because the connection itself was
+    terminated before a reply was received.
+
+    The true cause of the interruption will be available via `disconnect()`.
+    """
+
+
+class _MsgProtocolError(ProtocolError):
+    """
+    Abstract error class for protocol errors that have a `Message` object.
+
+    This Exception class is used for protocol errors where the `Message`
+    was mechanically understood, but was found to be inappropriate or
+    malformed.
+
+    :param error_message: Human-readable string describing the error.
+    :param msg: The QMP `Message` that caused the error.
+    """
+    def __init__(self, error_message: str, msg: Message):
+        super().__init__(error_message)
+        #: The received `Message` that caused the error.
+        self.msg: Message = msg
+
+    def __str__(self) -> str:
+        return "\n".join([
+            super().__str__(),
+            f"  Message was: {str(self.msg)}\n",
+        ])
+
+
+class ServerParseError(_MsgProtocolError):
+    """
+    The Server sent a `Message` indicating parsing failure.
+
+    i.e. A reply has arrived from the server, but it is missing the "ID"
+    field, indicating a parsing error.
+
+    :param error_message: Human-readable string describing the error.
+    :param msg: The QMP `Message` that caused the error.
+    """
+
+
+class BadReplyError(_MsgProtocolError):
+    """
+    An execution reply was successfully routed, but not understood.
+
+    If a QMP message is received with an 'id' field to allow it to be
+    routed, but is otherwise malformed, this exception will be raised.
+
+    A reply message is malformed if it is missing either the 'return' or
+    'error' keys, or if the 'error' value has missing keys or members of
+    the wrong type.
+
+    :param error_message: Human-readable string describing the error.
+    :param msg: The malformed reply that was received.
+    :param sent: The message that was sent that prompted the error.
+    """
+    def __init__(self, error_message: str, msg: Message, sent: Message):
+        super().__init__(error_message, msg)
+        #: The sent `Message` that caused the failure
+        self.sent = sent
+
+
+class QMPClient(AsyncProtocol[Message], Events):
+    """
+    Implements a QMP client connection.
+
+    QMP can be used to establish a connection as either the transport
+    client or server, though this class always acts as the QMP client.
+
+    :param name: Optional nickname for the connection, used for logging.
+
+    Basic script-style usage looks like this::
+
+      qmp = QMPClient('my_virtual_machine_name')
+      await qmp.connect(('127.0.0.1', 1234))
+      ...
+      res = await qmp.execute('block-query')
+      ...
+      await qmp.disconnect()
+
+    Basic async client-style usage looks like this::
+
+      class Client:
+          def __init__(self, name: str):
+              self.qmp = QMPClient(name)
+
+          async def watch_events(self):
+              try:
+                  async for event in self.qmp.events:
+                      print(f"Event: {event['event']}")
+              except asyncio.CancelledError:
+                  return
+
+          async def run(self, address='/tmp/qemu.socket'):
+              await self.qmp.connect(address)
+              asyncio.create_task(self.watch_events())
+              await self.qmp.runstate_changed.wait()
+              await self.disconnect()
+
+    See `qmp.events` for more detail on event handling patterns.
+    """
+    #: Logger object used for debugging messages.
+    logger = logging.getLogger(__name__)
+
+    # Read buffer limit; 10MB like libvirt default
+    _limit = 10 * 1024 * 1024
+
+    # Type alias for pending execute() result items
+    _PendingT = Union[Message, ExecInterruptedError]
+
+    def __init__(self, name: Optional[str] = None) -> None:
+        super().__init__(name)
+        Events.__init__(self)
+
+        #: Whether or not to await a greeting after establishing a connection.
+        self.await_greeting: bool = True
+
+        #: Whether or not to perform capabilities negotiation upon connection.
+        #: Implies `await_greeting`.
+        self.negotiate: bool = True
+
+        # Cached Greeting, if one was awaited.
+        self._greeting: Optional[Greeting] = None
+
+        # Command ID counter
+        self._execute_id = 0
+
+        # Incoming RPC reply messages.
+        self._pending: Dict[
+            Union[str, None],
+            'asyncio.Queue[QMPClient._PendingT]'
+        ] = {}
+
+    @property
+    def greeting(self) -> Optional[Greeting]:
+        """The `Greeting` from the QMP server, if any."""
+        return self._greeting
+
+    @upper_half
+    async def _establish_session(self) -> None:
+        """
+        Initiate the QMP session.
+
+        Wait for the QMP greeting and perform capabilities negotiation.
+
+        :raise GreetingError: When the greeting is not understood.
+        :raise NegotiationError: If the negotiation fails.
+        :raise EOFError: When the server unexpectedly hangs up.
+        :raise OSError: For underlying stream errors.
+        """
+        self._greeting = None
+        self._pending = {}
+
+        if self.await_greeting or self.negotiate:
+            self._greeting = await self._get_greeting()
+
+        if self.negotiate:
+            await self._negotiate()
+
+        # This will start the reader/writers:
+        await super()._establish_session()
+
+    @upper_half
+    async def _get_greeting(self) -> Greeting:
+        """
+        :raise GreetingError: When the greeting is not understood.
+        :raise EOFError: When the server unexpectedly hangs up.
+        :raise OSError: For underlying stream errors.
+
+        :return: the Greeting object given by the server.
+        """
+        self.logger.debug("Awaiting greeting ...")
+
+        try:
+            msg = await self._recv()
+            return Greeting(msg)
+        except (ProtocolError, KeyError, TypeError) as err:
+            emsg = "Did not understand Greeting"
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            raise GreetingError(emsg, err) from err
+        except BaseException as err:
+            # EOFError, OSError, or something unexpected.
+            emsg = "Failed to receive Greeting"
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            raise
+
+    @upper_half
+    async def _negotiate(self) -> None:
+        """
+        Perform QMP capabilities negotiation.
+
+        :raise NegotiationError: When negotiation fails.
+        :raise EOFError: When the server unexpectedly hangs up.
+        :raise OSError: For underlying stream errors.
+        """
+        self.logger.debug("Negotiating capabilities ...")
+
+        arguments: Dict[str, List[str]] = {}
+        if self._greeting and 'oob' in self._greeting.QMP.capabilities:
+            arguments.setdefault('enable', []).append('oob')
+        msg = self.make_execute_msg('qmp_capabilities', arguments=arguments)
+
+        # It's not safe to use execute() here, because the reader/writers
+        # aren't running. AsyncProtocol *requires* that a new session
+        # does not fail after the reader/writers are running!
+        try:
+            await self._send(msg)
+            reply = await self._recv()
+            assert 'return' in reply
+            assert 'error' not in reply
+        except (ProtocolError, AssertionError) as err:
+            emsg = "Negotiation failed"
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            raise NegotiationError(emsg, err) from err
+        except BaseException as err:
+            # EOFError, OSError, or something unexpected.
+            emsg = "Negotiation failed"
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            raise
+
+    @bottom_half
+    async def _bh_disconnect(self) -> None:
+        try:
+            await super()._bh_disconnect()
+        finally:
+            if self._pending:
+                self.logger.debug("Cancelling pending executions")
+            keys = self._pending.keys()
+            for key in keys:
+                self.logger.debug("Cancelling execution '%s'", key)
+                self._pending[key].put_nowait(
+                    ExecInterruptedError("Disconnected")
+                )
+
+            self.logger.debug("QMP Disconnected.")
+
+    @upper_half
+    def _cleanup(self) -> None:
+        super()._cleanup()
+        assert not self._pending
+
+    @bottom_half
+    async def _on_message(self, msg: Message) -> None:
+        """
+        Add an incoming message to the appropriate queue/handler.
+
+        :raise ServerParseError: When Message indicates server parse failure.
+        """
+        # Incoming messages are not fully parsed/validated here;
+        # do only light peeking to know how to route the messages.
+
+        if 'event' in msg:
+            await self._event_dispatch(msg)
+            return
+
+        # Below, we assume everything left is an execute/exec-oob response.
+
+        exec_id = cast(Optional[str], msg.get('id'))
+
+        if exec_id in self._pending:
+            await self._pending[exec_id].put(msg)
+            return
+
+        # We have a message we can't route back to a caller.
+
+        is_error = 'error' in msg
+        has_id = 'id' in msg
+
+        if is_error and not has_id:
+            # This is very likely a server parsing error.
+            # It doesn't inherently belong to any pending execution.
+            # Instead of performing clever recovery, just terminate.
+            # See "NOTE" in qmp-spec.rst, section "Error".
+            raise ServerParseError(
+                ("Server sent an error response without an ID, "
+                 "but there are no ID-less executions pending. "
+                 "Assuming this is a server parser failure."),
+                msg
+            )
+
+        # qmp-spec.rst, section "Commands Responses":
+        # 'Clients should drop all the responses
+        # that have an unknown "id" field.'
+        self.logger.log(
+            logging.ERROR if is_error else logging.WARNING,
+            "Unknown ID '%s', message dropped.",
+            exec_id,
+        )
+        self.logger.debug("Unroutable message: %s", str(msg))
+
+    @upper_half
+    @bottom_half
+    async def _do_recv(self) -> Message:
+        """
+        :raise OSError: When a stream error is encountered.
+        :raise EOFError: When the stream is at EOF.
+        :raise ProtocolError:
+            When the Message is not understood.
+            See also `Message._deserialize`.
+
+        :return: A single QMP `Message`.
+        """
+        msg_bytes = await self._readline()
+        msg = Message(msg_bytes, eager=True)
+        return msg
+
+    @upper_half
+    @bottom_half
+    def _do_send(self, msg: Message) -> None:
+        """
+        :raise ValueError: JSON serialization failure
+        :raise TypeError: JSON serialization failure
+        :raise OSError: When a stream error is encountered.
+        """
+        assert self._writer is not None
+        self._writer.write(bytes(msg))
+
+    @upper_half
+    def _get_exec_id(self) -> str:
+        exec_id = f"__qmp#{self._execute_id:05d}"
+        self._execute_id += 1
+        return exec_id
+
+    @upper_half
+    async def _issue(self, msg: Message) -> Union[None, str]:
+        """
+        Issue a QMP `Message` and do not wait for a reply.
+
+        :param msg: The QMP `Message` to send to the server.
+
+        :return: The ID of the `Message` sent.
+        """
+        msg_id: Optional[str] = None
+        if 'id' in msg:
+            assert isinstance(msg['id'], str)
+            msg_id = msg['id']
+
+        self._pending[msg_id] = asyncio.Queue(maxsize=1)
+        try:
+            await self._outgoing.put(msg)
+        except:
+            del self._pending[msg_id]
+            raise
+
+        return msg_id
+
+    @upper_half
+    async def _reply(self, msg_id: Union[str, None]) -> Message:
+        """
+        Await a reply to a previously issued QMP message.
+
+        :param msg_id: The ID of the previously issued message.
+
+        :return: The reply from the server.
+        :raise ExecInterruptedError:
+            When the reply could not be retrieved because the connection
+            was lost, or some other problem.
+        """
+        queue = self._pending[msg_id]
+
+        try:
+            result = await queue.get()
+            if isinstance(result, ExecInterruptedError):
+                raise result
+            return result
+        finally:
+            del self._pending[msg_id]
+
+    @upper_half
+    async def _execute(self, msg: Message, assign_id: bool = True) -> Message:
+        """
+        Send a QMP `Message` to the server and await a reply.
+
+        This method *assumes* you are sending some kind of an execute
+        statement that *will* receive a reply.
+
+        An execution ID will be assigned if assign_id is `True`. It can be
+        disabled, but this requires that an ID is manually assigned
+        instead. For manually assigned IDs, you must not use the string
+        '__qmp#' anywhere in the ID.
+
+        :param msg: The QMP `Message` to execute.
+        :param assign_id: If True, assign a new execution ID.
+
+        :return: Execution reply from the server.
+        :raise ExecInterruptedError:
+            When the reply could not be retrieved because the connection
+            was lost, or some other problem.
+        """
+        if assign_id:
+            msg['id'] = self._get_exec_id()
+        elif 'id' in msg:
+            assert isinstance(msg['id'], str)
+            assert '__qmp#' not in msg['id']
+
+        exec_id = await self._issue(msg)
+        return await self._reply(exec_id)
+
+    @upper_half
+    @require(Runstate.RUNNING)
+    async def _raw(
+            self,
+            msg: Union[Message, Mapping[str, object], bytes],
+            assign_id: bool = True,
+    ) -> Message:
+        """
+        Issue a raw `Message` to the QMP server and await a reply.
+
+        :param msg:
+            A Message to send to the server. It may be a `Message`, any
+            Mapping (including Dict), or raw bytes.
+        :param assign_id:
+            Assign an arbitrary execution ID to this message. If
+            `False`, the existing id must either be absent (and no other
+            such pending execution may omit an ID) or a string. If it is
+            a string, it must not start with '__qmp#' and no other such
+            pending execution may currently be using that ID.
+
+        :return: Execution reply from the server.
+
+        :raise ExecInterruptedError:
+            When the reply could not be retrieved because the connection
+            was lost, or some other problem.
+        :raise TypeError:
+            When assign_id is `False`, an ID is given, and it is not a string.
+        :raise ValueError:
+            When assign_id is `False`, but the ID is not usable;
+            Either because it starts with '__qmp#' or it is already in-use.
+        """
+        # 1. convert generic Mapping or bytes to a QMP Message
+        # 2. copy Message objects so that we assign an ID only to the copy.
+        msg = Message(msg)
+
+        exec_id = msg.get('id')
+        if not assign_id and 'id' in msg:
+            if not isinstance(exec_id, str):
+                raise TypeError(f"ID ('{exec_id}') must be a string.")
+            if exec_id.startswith('__qmp#'):
+                raise ValueError(
+                    f"ID ('{exec_id}') must not start with '__qmp#'."
+                )
+
+        if not assign_id and exec_id in self._pending:
+            raise ValueError(
+                f"ID '{exec_id}' is in-use and cannot be used."
+            )
+
+        return await self._execute(msg, assign_id=assign_id)
+
+    @upper_half
+    @require(Runstate.RUNNING)
+    async def execute_msg(self, msg: Message) -> object:
+        """
+        Execute a QMP command and return its value.
+
+        :param msg: The QMP `Message` to execute.
+
+        :return:
+            The command execution return value from the server. The type of
+            object returned depends on the command that was issued,
+            though most in QEMU return a `dict`.
+        :raise ValueError:
+            If the QMP `Message` does not have either the 'execute' or
+            'exec-oob' fields set.
+        :raise ExecuteError: When the server returns an error response.
+        :raise ExecInterruptedError: if the connection was terminated early.
+        """
+        if not ('execute' in msg or 'exec-oob' in msg):
+            raise ValueError("Requires 'execute' or 'exec-oob' message")
+
+        # Copy the Message so that the ID assigned by _execute() is
+        # local to this method; allowing the ID to be seen in raised
+        # Exceptions but without modifying the caller's held copy.
+        msg = Message(msg)
+        reply = await self._execute(msg)
+
+        if 'error' in reply:
+            try:
+                error_response = ErrorResponse(reply)
+            except (KeyError, TypeError) as err:
+                # Error response was malformed.
+                raise BadReplyError(
+                    "QMP error reply is malformed", reply, msg,
+                ) from err
+
+            raise ExecuteError(error_response, msg, reply)
+
+        if 'return' not in reply:
+            raise BadReplyError(
+                "QMP reply is missing a 'error' or 'return' member",
+                reply, msg,
+            )
+
+        return reply['return']
+
+    @classmethod
+    def make_execute_msg(cls, cmd: str,
+                         arguments: Optional[Mapping[str, object]] = None,
+                         oob: bool = False) -> Message:
+        """
+        Create an executable message to be sent by `execute_msg` later.
+
+        :param cmd: QMP command name.
+        :param arguments: Arguments (if any). Must be JSON-serializable.
+        :param oob: If `True`, execute "out of band".
+
+        :return: An executable QMP `Message`.
+        """
+        msg = Message({'exec-oob' if oob else 'execute': cmd})
+        if arguments is not None:
+            msg['arguments'] = arguments
+        return msg
+
+    @upper_half
+    async def execute(self, cmd: str,
+                      arguments: Optional[Mapping[str, object]] = None,
+                      oob: bool = False) -> object:
+        """
+        Execute a QMP command and return its value.
+
+        :param cmd: QMP command name.
+        :param arguments: Arguments (if any). Must be JSON-serializable.
+        :param oob: If `True`, execute "out of band".
+
+        :return:
+            The command execution return value from the server. The type of
+            object returned depends on the command that was issued,
+            though most in QEMU return a `dict`.
+        :raise ExecuteError: When the server returns an error response.
+        :raise ExecInterruptedError: if the connection was terminated early.
+        """
+        msg = self.make_execute_msg(cmd, arguments, oob=oob)
+        return await self.execute_msg(msg)
+
+    @upper_half
+    @require(Runstate.RUNNING)
+    def send_fd_scm(self, fd: int) -> None:
+        """
+        Send a file descriptor to the remote via SCM_RIGHTS.
+        """
+        assert self._writer is not None
+        sock = self._writer.transport.get_extra_info('socket')
+
+        if sock.family != socket.AF_UNIX:
+            raise QMPError("Sending file descriptors requires a UNIX socket.")
+
+        if not hasattr(sock, 'sendmsg'):
+            # We need to void the warranty sticker.
+            # Access to sendmsg is scheduled for removal in Python 3.11.
+            # Find the real backing socket to use it anyway.
+            sock = sock._sock  # pylint: disable=protected-access
+
+        sock.sendmsg(
+            [b' '],
+            [(socket.SOL_SOCKET, socket.SCM_RIGHTS, struct.pack('@i', fd))]
+        )
diff --git a/python/qemu/qmp/qmp_shell.py b/python/qemu/qmp/qmp_shell.py
new file mode 100644 (file)
index 0000000..98e684e
--- /dev/null
@@ -0,0 +1,618 @@
+#
+# Copyright (C) 2009-2022 Red Hat Inc.
+#
+# Authors:
+#  Luiz Capitulino <lcapitulino@redhat.com>
+#  John Snow <jsnow@redhat.com>
+#
+# This work is licensed under the terms of the GNU LGPL, version 2 or
+# later. See the COPYING file in the top-level directory.
+#
+
+"""
+Low-level QEMU shell on top of QMP.
+
+usage: qmp-shell [-h] [-H] [-N] [-v] [-p] qmp_server
+
+positional arguments:
+  qmp_server            < UNIX socket path | TCP address:port >
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -H, --hmp             Use HMP interface
+  -N, --skip-negotiation
+                        Skip negotiate (for qemu-ga)
+  -v, --verbose         Verbose (echo commands sent and received)
+  -p, --pretty          Pretty-print JSON
+
+
+Start QEMU with:
+
+# qemu [...] -qmp unix:./qmp-sock,server
+
+Run the shell:
+
+$ qmp-shell ./qmp-sock
+
+Commands have the following format:
+
+   < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
+
+For example:
+
+(QEMU) device_add driver=e1000 id=net1
+{'return': {}}
+(QEMU)
+
+key=value pairs also support Python or JSON object literal subset notations,
+without spaces. Dictionaries/objects {} are supported as are arrays [].
+
+   example-command arg-name1={'key':'value','obj'={'prop':"value"}}
+
+Both JSON and Python formatting should work, including both styles of
+string literal quotes. Both paradigms of literal values should work,
+including null/true/false for JSON and None/True/False for Python.
+
+
+Transactions have the following multi-line format:
+
+   transaction(
+   action-name1 [ arg-name1=arg1 ] ... [arg-nameN=argN ]
+   ...
+   action-nameN [ arg-name1=arg1 ] ... [arg-nameN=argN ]
+   )
+
+One line transactions are also supported:
+
+   transaction( action-name1 ... )
+
+For example:
+
+    (QEMU) transaction(
+    TRANS> block-dirty-bitmap-add node=drive0 name=bitmap1
+    TRANS> block-dirty-bitmap-clear node=drive0 name=bitmap0
+    TRANS> )
+    {"return": {}}
+    (QEMU)
+
+Use the -v and -p options to activate the verbose and pretty-print options,
+which will echo back the properly formatted JSON-compliant QMP that is being
+sent to QEMU, which is useful for debugging and documentation generation.
+"""
+
+import argparse
+import ast
+import json
+import logging
+import os
+import re
+import readline
+from subprocess import Popen
+import sys
+from typing import (
+    IO,
+    Dict,
+    Iterator,
+    List,
+    NoReturn,
+    Optional,
+    Sequence,
+    cast,
+)
+
+from qemu.qmp import (
+    ConnectError,
+    ExecuteError,
+    QMPError,
+    SocketAddrT,
+)
+from qemu.qmp.legacy import (
+    QEMUMonitorProtocol,
+    QMPBadPortError,
+    QMPMessage,
+    QMPObject,
+)
+
+
+LOG = logging.getLogger(__name__)
+
+
+class QMPCompleter:
+    """
+    QMPCompleter provides a readline library tab-complete behavior.
+    """
+    # NB: Python 3.9+ will probably allow us to subclass list[str] directly,
+    # but pylint as of today does not know that List[str] is simply 'list'.
+    def __init__(self) -> None:
+        self._matches: List[str] = []
+
+    def append(self, value: str) -> None:
+        """Append a new valid completion to the list of possibilities."""
+        return self._matches.append(value)
+
+    def complete(self, text: str, state: int) -> Optional[str]:
+        """readline.set_completer() callback implementation."""
+        for cmd in self._matches:
+            if cmd.startswith(text):
+                if state == 0:
+                    return cmd
+                state -= 1
+        return None
+
+
+class QMPShellError(QMPError):
+    """
+    QMP Shell Base error class.
+    """
+
+
+class FuzzyJSON(ast.NodeTransformer):
+    """
+    This extension of ast.NodeTransformer filters literal "true/false/null"
+    values in a Python AST and replaces them by proper "True/False/None" values
+    that Python can properly evaluate.
+    """
+
+    @classmethod
+    def visit_Name(cls,  # pylint: disable=invalid-name
+                   node: ast.Name) -> ast.AST:
+        """
+        Transform Name nodes with certain values into Constant (keyword) nodes.
+        """
+        if node.id == 'true':
+            return ast.Constant(value=True)
+        if node.id == 'false':
+            return ast.Constant(value=False)
+        if node.id == 'null':
+            return ast.Constant(value=None)
+        return node
+
+
+class QMPShell(QEMUMonitorProtocol):
+    """
+    QMPShell provides a basic readline-based QMP shell.
+
+    :param address: Address of the QMP server.
+    :param pretty: Pretty-print QMP messages.
+    :param verbose: Echo outgoing QMP messages to console.
+    """
+    def __init__(self, address: SocketAddrT,
+                 pretty: bool = False,
+                 verbose: bool = False,
+                 server: bool = False,
+                 logfile: Optional[str] = None):
+        super().__init__(address, server=server)
+        self._greeting: Optional[QMPMessage] = None
+        self._completer = QMPCompleter()
+        self._transmode = False
+        self._actions: List[QMPMessage] = []
+        self._histfile = os.path.join(os.path.expanduser('~'),
+                                      '.qmp-shell_history')
+        self.pretty = pretty
+        self.verbose = verbose
+        self.logfile = None
+
+        if logfile is not None:
+            self.logfile = open(logfile, "w", encoding='utf-8')
+
+    def close(self) -> None:
+        # Hook into context manager of parent to save shell history.
+        self._save_history()
+        super().close()
+
+    def _fill_completion(self) -> None:
+        try:
+            cmds = cast(List[Dict[str, str]], self.cmd('query-commands'))
+            for cmd in cmds:
+                self._completer.append(cmd['name'])
+        except ExecuteError:
+            pass
+
+    def _completer_setup(self) -> None:
+        self._completer = QMPCompleter()
+        self._fill_completion()
+        readline.set_history_length(1024)
+        readline.set_completer(self._completer.complete)
+        readline.parse_and_bind("tab: complete")
+        # NB: default delimiters conflict with some command names
+        # (eg. query-), clearing everything as it doesn't seem to matter
+        readline.set_completer_delims('')
+        try:
+            readline.read_history_file(self._histfile)
+        except FileNotFoundError:
+            pass
+        except IOError as err:
+            msg = f"Failed to read history '{self._histfile}': {err!s}"
+            LOG.warning(msg)
+
+    def _save_history(self) -> None:
+        try:
+            readline.write_history_file(self._histfile)
+        except IOError as err:
+            msg = f"Failed to save history file '{self._histfile}': {err!s}"
+            LOG.warning(msg)
+
+    @classmethod
+    def _parse_value(cls, val: str) -> object:
+        try:
+            return int(val)
+        except ValueError:
+            pass
+
+        if val.lower() == 'true':
+            return True
+        if val.lower() == 'false':
+            return False
+        if val.startswith(('{', '[')):
+            # Try first as pure JSON:
+            try:
+                return json.loads(val)
+            except ValueError:
+                pass
+            # Try once again as FuzzyJSON:
+            try:
+                tree = ast.parse(val, mode='eval')
+                transformed = FuzzyJSON().visit(tree)
+                return ast.literal_eval(transformed)
+            except (SyntaxError, ValueError):
+                pass
+        return val
+
+    def _cli_expr(self,
+                  tokens: Sequence[str],
+                  parent: QMPObject) -> None:
+        for arg in tokens:
+            (key, sep, val) = arg.partition('=')
+            if sep != '=':
+                raise QMPShellError(
+                    f"Expected a key=value pair, got '{arg!s}'"
+                )
+
+            value = self._parse_value(val)
+            optpath = key.split('.')
+            curpath = []
+            for path in optpath[:-1]:
+                curpath.append(path)
+                obj = parent.get(path, {})
+                if not isinstance(obj, dict):
+                    msg = 'Cannot use "{:s}" as both leaf and non-leaf key'
+                    raise QMPShellError(msg.format('.'.join(curpath)))
+                parent[path] = obj
+                parent = obj
+            if optpath[-1] in parent:
+                if isinstance(parent[optpath[-1]], dict):
+                    msg = 'Cannot use "{:s}" as both leaf and non-leaf key'
+                    raise QMPShellError(msg.format('.'.join(curpath)))
+                raise QMPShellError(f'Cannot set "{key}" multiple times')
+            parent[optpath[-1]] = value
+
+    def _build_cmd(self, cmdline: str) -> Optional[QMPMessage]:
+        """
+        Build a QMP input object from a user provided command-line in the
+        following format:
+
+            < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
+        """
+        argument_regex = r'''(?:[^\s"']|"(?:\\.|[^"])*"|'(?:\\.|[^'])*')+'''
+        cmdargs = re.findall(argument_regex, cmdline)
+        qmpcmd: QMPMessage
+
+        # Transactional CLI entry:
+        if cmdargs and cmdargs[0] == 'transaction(':
+            self._transmode = True
+            self._actions = []
+            cmdargs.pop(0)
+
+        # Transactional CLI exit:
+        if cmdargs and cmdargs[0] == ')' and self._transmode:
+            self._transmode = False
+            if len(cmdargs) > 1:
+                msg = 'Unexpected input after close of Transaction sub-shell'
+                raise QMPShellError(msg)
+            qmpcmd = {
+                'execute': 'transaction',
+                'arguments': {'actions': self._actions}
+            }
+            return qmpcmd
+
+        # No args, or no args remaining
+        if not cmdargs:
+            return None
+
+        if self._transmode:
+            # Parse and cache this Transactional Action
+            finalize = False
+            action = {'type': cmdargs[0], 'data': {}}
+            if cmdargs[-1] == ')':
+                cmdargs.pop(-1)
+                finalize = True
+            self._cli_expr(cmdargs[1:], action['data'])
+            self._actions.append(action)
+            return self._build_cmd(')') if finalize else None
+
+        # Standard command: parse and return it to be executed.
+        qmpcmd = {'execute': cmdargs[0], 'arguments': {}}
+        self._cli_expr(cmdargs[1:], qmpcmd['arguments'])
+        return qmpcmd
+
+    def _print(self, qmp_message: object, fh: IO[str] = sys.stdout) -> None:
+        jsobj = json.dumps(qmp_message,
+                           indent=4 if self.pretty else None,
+                           sort_keys=self.pretty)
+        print(str(jsobj), file=fh)
+
+    def _execute_cmd(self, cmdline: str) -> bool:
+        try:
+            qmpcmd = self._build_cmd(cmdline)
+        except QMPShellError as err:
+            print(
+                f"Error while parsing command line: {err!s}\n"
+                "command format: <command-name> "
+                "[arg-name1=arg1] ... [arg-nameN=argN",
+                file=sys.stderr
+            )
+            return True
+        # For transaction mode, we may have just cached the action:
+        if qmpcmd is None:
+            return True
+        if self.verbose:
+            self._print(qmpcmd)
+        resp = self.cmd_obj(qmpcmd)
+        if resp is None:
+            print('Disconnected')
+            return False
+        self._print(resp)
+        if self.logfile is not None:
+            cmd = {**qmpcmd, **resp}
+            self._print(cmd, fh=self.logfile)
+        return True
+
+    def connect(self, negotiate: bool = True) -> None:
+        self._greeting = super().connect(negotiate)
+        self._completer_setup()
+
+    def show_banner(self,
+                    msg: str = 'Welcome to the QMP low-level shell!') -> None:
+        """
+        Print to stdio a greeting, and the QEMU version if available.
+        """
+        print(msg)
+        if not self._greeting:
+            print('Connected')
+            return
+        version = self._greeting['QMP']['version']['qemu']
+        print("Connected to QEMU {major}.{minor}.{micro}\n".format(**version))
+
+    @property
+    def prompt(self) -> str:
+        """
+        Return the current shell prompt, including a trailing space.
+        """
+        if self._transmode:
+            return 'TRANS> '
+        return '(QEMU) '
+
+    def read_exec_command(self) -> bool:
+        """
+        Read and execute a command.
+
+        @return True if execution was ok, return False if disconnected.
+        """
+        try:
+            cmdline = input(self.prompt)
+        except EOFError:
+            print()
+            return False
+
+        if cmdline == '':
+            for event in self.get_events():
+                print(event)
+            return True
+
+        return self._execute_cmd(cmdline)
+
+    def repl(self) -> Iterator[None]:
+        """
+        Return an iterator that implements the REPL.
+        """
+        self.show_banner()
+        while self.read_exec_command():
+            yield
+        self.close()
+
+
+class HMPShell(QMPShell):
+    """
+    HMPShell provides a basic readline-based HMP shell, tunnelled via QMP.
+
+    :param address: Address of the QMP server.
+    :param pretty: Pretty-print QMP messages.
+    :param verbose: Echo outgoing QMP messages to console.
+    """
+    def __init__(self, address: SocketAddrT,
+                 pretty: bool = False,
+                 verbose: bool = False,
+                 server: bool = False,
+                 logfile: Optional[str] = None):
+        super().__init__(address, pretty, verbose, server, logfile)
+        self._cpu_index = 0
+
+    def _cmd_completion(self) -> None:
+        for cmd in self._cmd_passthrough('help')['return'].split('\r\n'):
+            if cmd and cmd[0] != '[' and cmd[0] != '\t':
+                name = cmd.split()[0]  # drop help text
+                if name == 'info':
+                    continue
+                if name.find('|') != -1:
+                    # Command in the form 'foobar|f' or 'f|foobar', take the
+                    # full name
+                    opt = name.split('|')
+                    if len(opt[0]) == 1:
+                        name = opt[1]
+                    else:
+                        name = opt[0]
+                self._completer.append(name)
+                self._completer.append('help ' + name)  # help completion
+
+    def _info_completion(self) -> None:
+        for cmd in self._cmd_passthrough('info')['return'].split('\r\n'):
+            if cmd:
+                self._completer.append('info ' + cmd.split()[1])
+
+    def _other_completion(self) -> None:
+        # special cases
+        self._completer.append('help info')
+
+    def _fill_completion(self) -> None:
+        self._cmd_completion()
+        self._info_completion()
+        self._other_completion()
+
+    def _cmd_passthrough(self, cmdline: str,
+                         cpu_index: int = 0) -> QMPMessage:
+        return self.cmd_obj({
+            'execute': 'human-monitor-command',
+            'arguments': {
+                'command-line': cmdline,
+                'cpu-index': cpu_index
+            }
+        })
+
+    def _execute_cmd(self, cmdline: str) -> bool:
+        if cmdline.split()[0] == "cpu":
+            # trap the cpu command, it requires special setting
+            try:
+                idx = int(cmdline.split()[1])
+                if 'return' not in self._cmd_passthrough('info version', idx):
+                    print('bad CPU index')
+                    return True
+                self._cpu_index = idx
+            except ValueError:
+                print('cpu command takes an integer argument')
+                return True
+        resp = self._cmd_passthrough(cmdline, self._cpu_index)
+        if resp is None:
+            print('Disconnected')
+            return False
+        assert 'return' in resp or 'error' in resp
+        if 'return' in resp:
+            # Success
+            if len(resp['return']) > 0:
+                print(resp['return'], end=' ')
+        else:
+            # Error
+            print('%s: %s' % (resp['error']['class'], resp['error']['desc']))
+        return True
+
+    def show_banner(self, msg: str = 'Welcome to the HMP shell!') -> None:
+        QMPShell.show_banner(self, msg)
+
+
+def die(msg: str) -> NoReturn:
+    """Write an error to stderr, then exit with a return code of 1."""
+    sys.stderr.write('ERROR: %s\n' % msg)
+    sys.exit(1)
+
+
+def main() -> None:
+    """
+    qmp-shell entry point: parse command line arguments and start the REPL.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-H', '--hmp', action='store_true',
+                        help='Use HMP interface')
+    parser.add_argument('-N', '--skip-negotiation', action='store_true',
+                        help='Skip negotiate (for qemu-ga)')
+    parser.add_argument('-v', '--verbose', action='store_true',
+                        help='Verbose (echo commands sent and received)')
+    parser.add_argument('-p', '--pretty', action='store_true',
+                        help='Pretty-print JSON')
+    parser.add_argument('-l', '--logfile',
+                        help='Save log of all QMP messages to PATH')
+
+    default_server = os.environ.get('QMP_SOCKET')
+    parser.add_argument('qmp_server', action='store',
+                        default=default_server,
+                        help='< UNIX socket path | TCP address:port >')
+
+    args = parser.parse_args()
+    if args.qmp_server is None:
+        parser.error("QMP socket or TCP address must be specified")
+
+    shell_class = HMPShell if args.hmp else QMPShell
+
+    try:
+        address = shell_class.parse_address(args.qmp_server)
+    except QMPBadPortError:
+        parser.error(f"Bad port number: {args.qmp_server}")
+        return  # pycharm doesn't know error() is noreturn
+
+    with shell_class(address, args.pretty, args.verbose, args.logfile) as qemu:
+        try:
+            qemu.connect(negotiate=not args.skip_negotiation)
+        except ConnectError as err:
+            if isinstance(err.exc, OSError):
+                die(f"Couldn't connect to {args.qmp_server}: {err!s}")
+            die(str(err))
+
+        for _ in qemu.repl():
+            pass
+
+
+def main_wrap() -> None:
+    """
+    qmp-shell-wrap entry point: parse command line arguments and
+    start the REPL.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-H', '--hmp', action='store_true',
+                        help='Use HMP interface')
+    parser.add_argument('-v', '--verbose', action='store_true',
+                        help='Verbose (echo commands sent and received)')
+    parser.add_argument('-p', '--pretty', action='store_true',
+                        help='Pretty-print JSON')
+    parser.add_argument('-l', '--logfile',
+                        help='Save log of all QMP messages to PATH')
+
+    parser.add_argument('command', nargs=argparse.REMAINDER,
+                        help='QEMU command line to invoke')
+
+    args = parser.parse_args()
+
+    cmd = args.command
+    if len(cmd) != 0 and cmd[0] == '--':
+        cmd = cmd[1:]
+    if len(cmd) == 0:
+        cmd = ["qemu-system-x86_64"]
+
+    sockpath = "qmp-shell-wrap-%d" % os.getpid()
+    cmd += ["-qmp", "unix:%s" % sockpath]
+
+    shell_class = HMPShell if args.hmp else QMPShell
+
+    try:
+        address = shell_class.parse_address(sockpath)
+    except QMPBadPortError:
+        parser.error(f"Bad port number: {sockpath}")
+        return  # pycharm doesn't know error() is noreturn
+
+    try:
+        with shell_class(address, args.pretty, args.verbose,
+                         True, args.logfile) as qemu:
+            with Popen(cmd):
+
+                try:
+                    qemu.accept()
+                except ConnectError as err:
+                    if isinstance(err.exc, OSError):
+                        die(f"Couldn't connect to {args.qmp_server}: {err!s}")
+                    die(str(err))
+
+                for _ in qemu.repl():
+                    pass
+    finally:
+        os.unlink(sockpath)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/python/qemu/qmp/qmp_tui.py b/python/qemu/qmp/qmp_tui.py
new file mode 100644 (file)
index 0000000..2d9ebbd
--- /dev/null
@@ -0,0 +1,655 @@
+# Copyright (c) 2021
+#
+# Authors:
+#  Niteesh Babu G S <niteesh.gs@gmail.com>
+#
+# This work is licensed under the terms of the GNU LGPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+"""
+QMP TUI
+
+QMP TUI is an asynchronous interface built on top the of the QMP library.
+It is the successor of QMP-shell and is bought-in as a replacement for it.
+
+Example Usage: qmp-tui <SOCKET | TCP IP:PORT>
+Full Usage: qmp-tui --help
+"""
+
+import argparse
+import asyncio
+import json
+import logging
+from logging import Handler, LogRecord
+import signal
+from typing import (
+    List,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+    cast,
+)
+
+from pygments import lexers
+from pygments import token as Token
+import urwid
+import urwid_readline
+
+from .error import ProtocolError
+from .legacy import QEMUMonitorProtocol, QMPBadPortError
+from .message import DeserializationError, Message, UnexpectedTypeError
+from .protocol import ConnectError, Runstate
+from .qmp_client import ExecInterruptedError, QMPClient
+from .util import create_task, pretty_traceback
+
+
+# The name of the signal that is used to update the history list
+UPDATE_MSG: str = 'UPDATE_MSG'
+
+
+palette = [
+    (Token.Punctuation, '', '', '', 'h15,bold', 'g7'),
+    (Token.Text, '', '', '', '', 'g7'),
+    (Token.Name.Tag, '', '', '', 'bold,#f88', 'g7'),
+    (Token.Literal.Number.Integer, '', '', '', '#fa0', 'g7'),
+    (Token.Literal.String.Double, '', '', '', '#6f6', 'g7'),
+    (Token.Keyword.Constant, '', '', '', '#6af', 'g7'),
+    ('DEBUG', '', '', '', '#ddf', 'g7'),
+    ('INFO', '', '', '', 'g100', 'g7'),
+    ('WARNING', '', '', '', '#ff6', 'g7'),
+    ('ERROR', '', '', '', '#a00', 'g7'),
+    ('CRITICAL', '', '', '', '#a00', 'g7'),
+    ('background', '', 'black', '', '', 'g7'),
+]
+
+
+def format_json(msg: str) -> str:
+    """
+    Formats valid/invalid multi-line JSON message into a single-line message.
+
+    Formatting is first tried using the standard json module. If that fails
+    due to an decoding error then a simple string manipulation is done to
+    achieve a single line JSON string.
+
+    Converting into single line is more aesthetically pleasing when looking
+    along with error messages.
+
+    Eg:
+    Input:
+          [ 1,
+            true,
+            3 ]
+    The above input is not a valid QMP message and produces the following error
+    "QMP message is not a JSON object."
+    When displaying this in TUI in multiline mode we get
+
+        [ 1,
+          true,
+          3 ]: QMP message is not a JSON object.
+
+    whereas in singleline mode we get the following
+
+        [1, true, 3]: QMP message is not a JSON object.
+
+    The single line mode is more aesthetically pleasing.
+
+    :param msg:
+        The message to formatted into single line.
+
+    :return: Formatted singleline message.
+    """
+    try:
+        msg = json.loads(msg)
+        return str(json.dumps(msg))
+    except json.decoder.JSONDecodeError:
+        msg = msg.replace('\n', '')
+        words = msg.split(' ')
+        words = list(filter(None, words))
+        return ' '.join(words)
+
+
+def has_handler_type(logger: logging.Logger,
+                     handler_type: Type[Handler]) -> bool:
+    """
+    The Logger class has no interface to check if a certain type of handler is
+    installed or not. So we provide an interface to do so.
+
+    :param logger:
+        Logger object
+    :param handler_type:
+        The type of the handler to be checked.
+
+    :return: returns True if handler of type `handler_type`.
+    """
+    for handler in logger.handlers:
+        if isinstance(handler, handler_type):
+            return True
+    return False
+
+
+class App(QMPClient):
+    """
+    Implements the QMP TUI.
+
+    Initializes the widgets and starts the urwid event loop.
+
+    :param address:
+        Address of the server to connect to.
+    :param num_retries:
+        The number of times to retry before stopping to reconnect.
+    :param retry_delay:
+        The delay(sec) before each retry
+    """
+    def __init__(self, address: Union[str, Tuple[str, int]], num_retries: int,
+                 retry_delay: Optional[int]) -> None:
+        urwid.register_signal(type(self), UPDATE_MSG)
+        self.window = Window(self)
+        self.address = address
+        self.aloop: Optional[asyncio.AbstractEventLoop] = None
+        self.num_retries = num_retries
+        self.retry_delay = retry_delay if retry_delay else 2
+        self.retry: bool = False
+        self.exiting: bool = False
+        super().__init__()
+
+    def add_to_history(self, msg: str, level: Optional[str] = None) -> None:
+        """
+        Appends the msg to the history list.
+
+        :param msg:
+            The raw message to be appended in string type.
+        """
+        urwid.emit_signal(self, UPDATE_MSG, msg, level)
+
+    def _cb_outbound(self, msg: Message) -> Message:
+        """
+        Callback: outbound message hook.
+
+        Appends the outgoing messages to the history box.
+
+        :param msg: raw outbound message.
+        :return: final outbound message.
+        """
+        str_msg = str(msg)
+
+        if not has_handler_type(logging.getLogger(), TUILogHandler):
+            logging.debug('Request: %s', str_msg)
+        self.add_to_history('<-- ' + str_msg)
+        return msg
+
+    def _cb_inbound(self, msg: Message) -> Message:
+        """
+        Callback: outbound message hook.
+
+        Appends the incoming messages to the history box.
+
+        :param msg: raw inbound message.
+        :return: final inbound message.
+        """
+        str_msg = str(msg)
+
+        if not has_handler_type(logging.getLogger(), TUILogHandler):
+            logging.debug('Request: %s', str_msg)
+        self.add_to_history('--> ' + str_msg)
+        return msg
+
+    async def _send_to_server(self, msg: Message) -> None:
+        """
+        This coroutine sends the message to the server.
+        The message has to be pre-validated.
+
+        :param msg:
+            Pre-validated message to be to sent to the server.
+
+        :raise Exception: When an unhandled exception is caught.
+        """
+        try:
+            await self._raw(msg, assign_id='id' not in msg)
+        except ExecInterruptedError as err:
+            logging.info('Error server disconnected before reply %s', str(err))
+            self.add_to_history('Server disconnected before reply', 'ERROR')
+        except Exception as err:
+            logging.error('Exception from _send_to_server: %s', str(err))
+            raise err
+
+    def cb_send_to_server(self, raw_msg: str) -> None:
+        """
+        Validates and sends the message to the server.
+        The raw string message is first converted into a Message object
+        and is then sent to the server.
+
+        :param raw_msg:
+            The raw string message to be sent to the server.
+
+        :raise Exception: When an unhandled exception is caught.
+        """
+        try:
+            msg = Message(bytes(raw_msg, encoding='utf-8'))
+            create_task(self._send_to_server(msg))
+        except (DeserializationError, UnexpectedTypeError) as err:
+            raw_msg = format_json(raw_msg)
+            logging.info('Invalid message: %s', err.error_message)
+            self.add_to_history(f'{raw_msg}: {err.error_message}', 'ERROR')
+
+    def unhandled_input(self, key: str) -> None:
+        """
+        Handle's keys which haven't been handled by the child widgets.
+
+        :param key:
+            Unhandled key
+        """
+        if key == 'esc':
+            self.kill_app()
+
+    def kill_app(self) -> None:
+        """
+        Initiates killing of app. A bridge between asynchronous and synchronous
+        code.
+        """
+        create_task(self._kill_app())
+
+    async def _kill_app(self) -> None:
+        """
+        This coroutine initiates the actual disconnect process and calls
+        urwid.ExitMainLoop() to kill the TUI.
+
+        :raise Exception: When an unhandled exception is caught.
+        """
+        self.exiting = True
+        await self.disconnect()
+        logging.debug('Disconnect finished. Exiting app')
+        raise urwid.ExitMainLoop()
+
+    async def disconnect(self) -> None:
+        """
+        Overrides the disconnect method to handle the errors locally.
+        """
+        try:
+            await super().disconnect()
+        except (OSError, EOFError) as err:
+            logging.info('disconnect: %s', str(err))
+            self.retry = True
+        except ProtocolError as err:
+            logging.info('disconnect: %s', str(err))
+        except Exception as err:
+            logging.error('disconnect: Unhandled exception %s', str(err))
+            raise err
+
+    def _set_status(self, msg: str) -> None:
+        """
+        Sets the message as the status.
+
+        :param msg:
+            The message to be displayed in the status bar.
+        """
+        self.window.footer.set_text(msg)
+
+    def _get_formatted_address(self) -> str:
+        """
+        Returns a formatted version of the server's address.
+
+        :return: formatted address
+        """
+        if isinstance(self.address, tuple):
+            host, port = self.address
+            addr = f'{host}:{port}'
+        else:
+            addr = f'{self.address}'
+        return addr
+
+    async def _initiate_connection(self) -> Optional[ConnectError]:
+        """
+        Tries connecting to a server a number of times with a delay between
+        each try. If all retries failed then return the error faced during
+        the last retry.
+
+        :return: Error faced during last retry.
+        """
+        current_retries = 0
+        err = None
+
+        # initial try
+        await self.connect_server()
+        while self.retry and current_retries < self.num_retries:
+            logging.info('Connection Failed, retrying in %d', self.retry_delay)
+            status = f'[Retry #{current_retries} ({self.retry_delay}s)]'
+            self._set_status(status)
+
+            await asyncio.sleep(self.retry_delay)
+
+            err = await self.connect_server()
+            current_retries += 1
+        # If all retries failed report the last error
+        if err:
+            logging.info('All retries failed: %s', err)
+            return err
+        return None
+
+    async def manage_connection(self) -> None:
+        """
+        Manage the connection based on the current run state.
+
+        A reconnect is issued when the current state is IDLE and the number
+        of retries is not exhausted.
+        A disconnect is issued when the current state is DISCONNECTING.
+        """
+        while not self.exiting:
+            if self.runstate == Runstate.IDLE:
+                err = await self._initiate_connection()
+                # If retry is still true then, we have exhausted all our tries.
+                if err:
+                    self._set_status(f'[Error: {err.error_message}]')
+                else:
+                    addr = self._get_formatted_address()
+                    self._set_status(f'[Connected {addr}]')
+            elif self.runstate == Runstate.DISCONNECTING:
+                self._set_status('[Disconnected]')
+                await self.disconnect()
+                # check if a retry is needed
+                # mypy 1.4.0 doesn't believe runstate can change after
+                # disconnect(), hence the cast.
+                state = cast(Runstate, self.runstate)
+                if state == Runstate.IDLE:
+                    continue
+            await self.runstate_changed()
+
+    async def connect_server(self) -> Optional[ConnectError]:
+        """
+        Initiates a connection to the server at address `self.address`
+        and in case of a failure, sets the status to the respective error.
+        """
+        try:
+            await self.connect(self.address)
+            self.retry = False
+        except ConnectError as err:
+            logging.info('connect_server: ConnectError %s', str(err))
+            self.retry = True
+            return err
+        return None
+
+    def run(self, debug: bool = False) -> None:
+        """
+        Starts the long running co-routines and the urwid event loop.
+
+        :param debug:
+            Enables/Disables asyncio event loop debugging
+        """
+        screen = urwid.raw_display.Screen()
+        screen.set_terminal_properties(256)
+
+        self.aloop = asyncio.get_event_loop()
+        self.aloop.set_debug(debug)
+
+        # Gracefully handle SIGTERM and SIGINT signals
+        cancel_signals = [signal.SIGTERM, signal.SIGINT]
+        for sig in cancel_signals:
+            self.aloop.add_signal_handler(sig, self.kill_app)
+
+        event_loop = urwid.AsyncioEventLoop(loop=self.aloop)
+        main_loop = urwid.MainLoop(urwid.AttrMap(self.window, 'background'),
+                                   unhandled_input=self.unhandled_input,
+                                   screen=screen,
+                                   palette=palette,
+                                   handle_mouse=True,
+                                   event_loop=event_loop)
+
+        create_task(self.manage_connection(), self.aloop)
+        try:
+            main_loop.run()
+        except Exception as err:
+            logging.error('%s\n%s\n', str(err), pretty_traceback())
+            raise err
+
+
+class StatusBar(urwid.Text):
+    """
+    A simple statusbar modelled using the Text widget. The status can be
+    set using the set_text function. All text set is aligned to right.
+
+    :param text: Initial text to be displayed. Default is empty str.
+    """
+    def __init__(self, text: str = ''):
+        super().__init__(text, align='right')
+
+
+class Editor(urwid_readline.ReadlineEdit):
+    """
+    A simple editor modelled using the urwid_readline.ReadlineEdit widget.
+    Mimcs GNU readline shortcuts and provides history support.
+
+    The readline shortcuts can be found below:
+    https://github.com/rr-/urwid_readline#features
+
+    Along with the readline features, this editor also has support for
+    history. Pressing the 'up'/'down' switches between the prev/next messages
+    available in the history.
+
+    Currently there is no support to save the history to a file. The history of
+    previous commands is lost on exit.
+
+    :param parent: Reference to the TUI object.
+    """
+    def __init__(self, parent: App) -> None:
+        super().__init__(caption='> ', multiline=True)
+        self.parent = parent
+        self.history: List[str] = []
+        self.last_index: int = -1
+        self.show_history: bool = False
+
+    def keypress(self, size: Tuple[int, int], key: str) -> Optional[str]:
+        """
+        Handles the keypress on this widget.
+
+        :param size:
+            The current size of the widget.
+        :param key:
+            The key to be handled.
+
+        :return: Unhandled key if any.
+        """
+        msg = self.get_edit_text()
+        if key == 'up' and not msg:
+            # Show the history when 'up arrow' is pressed with no input text.
+            # NOTE: The show_history logic is necessary because in 'multiline'
+            # mode (which we use) 'up arrow' is used to move between lines.
+            if not self.history:
+                return None
+            self.show_history = True
+            last_msg = self.history[self.last_index]
+            self.set_edit_text(last_msg)
+            self.edit_pos = len(last_msg)
+        elif key == 'up' and self.show_history:
+            self.last_index = max(self.last_index - 1, -len(self.history))
+            self.set_edit_text(self.history[self.last_index])
+            self.edit_pos = len(self.history[self.last_index])
+        elif key == 'down' and self.show_history:
+            if self.last_index == -1:
+                self.set_edit_text('')
+                self.show_history = False
+            else:
+                self.last_index += 1
+                self.set_edit_text(self.history[self.last_index])
+                self.edit_pos = len(self.history[self.last_index])
+        elif key == 'meta enter':
+            # When using multiline, enter inserts a new line into the editor
+            # send the input to the server on alt + enter
+            self.parent.cb_send_to_server(msg)
+            self.history.append(msg)
+            self.set_edit_text('')
+            self.last_index = -1
+            self.show_history = False
+        else:
+            self.show_history = False
+            self.last_index = -1
+            return cast(Optional[str], super().keypress(size, key))
+        return None
+
+
+class EditorWidget(urwid.Filler):
+    """
+    Wrapper around the editor widget.
+
+    The Editor is a flow widget and has to wrapped inside a box widget.
+    This class wraps the Editor inside filler widget.
+
+    :param parent: Reference to the TUI object.
+    """
+    def __init__(self, parent: App) -> None:
+        super().__init__(Editor(parent), valign='top')
+
+
+class HistoryBox(urwid.ListBox):
+    """
+    This widget is modelled using the ListBox widget, contains the list of
+    all messages both QMP messages and log messages to be shown in the TUI.
+
+    The messages are urwid.Text widgets. On every append of a message, the
+    focus is shifted to the last appended message.
+
+    :param parent: Reference to the TUI object.
+    """
+    def __init__(self, parent: App) -> None:
+        self.parent = parent
+        self.history = urwid.SimpleFocusListWalker([])
+        super().__init__(self.history)
+
+    def add_to_history(self,
+                       history: Union[str, List[Tuple[str, str]]]) -> None:
+        """
+        Appends a message to the list and set the focus to the last appended
+        message.
+
+        :param history:
+            The history item(message/event) to be appended to the list.
+        """
+        self.history.append(urwid.Text(history))
+        self.history.set_focus(len(self.history) - 1)
+
+    def mouse_event(self, size: Tuple[int, int], _event: str, button: float,
+                    _x: int, _y: int, focus: bool) -> None:
+        # Unfortunately there are no urwid constants that represent the mouse
+        # events.
+        if button == 4:  # Scroll up event
+            super().keypress(size, 'up')
+        elif button == 5:  # Scroll down event
+            super().keypress(size, 'down')
+
+
+class HistoryWindow(urwid.Frame):
+    """
+    This window composes the HistoryBox and EditorWidget in a horizontal split.
+    By default the first focus is given to the history box.
+
+    :param parent: Reference to the TUI object.
+    """
+    def __init__(self, parent: App) -> None:
+        self.parent = parent
+        self.editor_widget = EditorWidget(parent)
+        self.editor = urwid.LineBox(self.editor_widget)
+        self.history = HistoryBox(parent)
+        self.body = urwid.Pile([('weight', 80, self.history),
+                                ('weight', 20, self.editor)])
+        super().__init__(self.body)
+        urwid.connect_signal(self.parent, UPDATE_MSG, self.cb_add_to_history)
+
+    def cb_add_to_history(self, msg: str, level: Optional[str] = None) -> None:
+        """
+        Appends a message to the history box
+
+        :param msg:
+            The message to be appended to the history box.
+        :param level:
+            The log level of the message, if it is a log message.
+        """
+        formatted = []
+        if level:
+            msg = f'[{level}]: {msg}'
+            formatted.append((level, msg))
+        else:
+            lexer = lexers.JsonLexer()  # pylint: disable=no-member
+            for token in lexer.get_tokens(msg):
+                formatted.append(token)
+        self.history.add_to_history(formatted)
+
+
+class Window(urwid.Frame):
+    """
+    This window is the top most widget of the TUI and will contain other
+    windows. Each child of this widget is responsible for displaying a specific
+    functionality.
+
+    :param parent: Reference to the TUI object.
+    """
+    def __init__(self, parent: App) -> None:
+        self.parent = parent
+        footer = StatusBar()
+        body = HistoryWindow(parent)
+        super().__init__(body, footer=footer)
+
+
+class TUILogHandler(Handler):
+    """
+    This handler routes all the log messages to the TUI screen.
+    It is installed to the root logger to so that the log message from all
+    libraries begin used is routed to the screen.
+
+    :param tui: Reference to the TUI object.
+    """
+    def __init__(self, tui: App) -> None:
+        super().__init__()
+        self.tui = tui
+
+    def emit(self, record: LogRecord) -> None:
+        """
+        Emits a record to the TUI screen.
+
+        Appends the log message to the TUI screen
+        """
+        level = record.levelname
+        msg = record.getMessage()
+        self.tui.add_to_history(msg, level)
+
+
+def main() -> None:
+    """
+    Driver of the whole script, parses arguments, initialize the TUI and
+    the logger.
+    """
+    parser = argparse.ArgumentParser(description='QMP TUI')
+    parser.add_argument('qmp_server', help='Address of the QMP server. '
+                        'Format <UNIX socket path | TCP addr:port>')
+    parser.add_argument('--num-retries', type=int, default=10,
+                        help='Number of times to reconnect before giving up.')
+    parser.add_argument('--retry-delay', type=int,
+                        help='Time(s) to wait before next retry. '
+                        'Default action is to wait 2s between each retry.')
+    parser.add_argument('--log-file', help='The Log file name')
+    parser.add_argument('--log-level', default='WARNING',
+                        help='Log level <CRITICAL|ERROR|WARNING|INFO|DEBUG|>')
+    parser.add_argument('--asyncio-debug', action='store_true',
+                        help='Enable debug mode for asyncio loop. '
+                        'Generates lot of output, makes TUI unusable when '
+                        'logs are logged in the TUI. '
+                        'Use only when logging to a file.')
+    args = parser.parse_args()
+
+    try:
+        address = QEMUMonitorProtocol.parse_address(args.qmp_server)
+    except QMPBadPortError as err:
+        parser.error(str(err))
+
+    app = App(address, args.num_retries, args.retry_delay)
+
+    root_logger = logging.getLogger()
+    root_logger.setLevel(logging.getLevelName(args.log_level))
+
+    if args.log_file:
+        root_logger.addHandler(logging.FileHandler(args.log_file))
+    else:
+        root_logger.addHandler(TUILogHandler(app))
+
+    app.run(args.asyncio_debug)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/python/qemu/qmp/util.py b/python/qemu/qmp/util.py
new file mode 100644 (file)
index 0000000..ca6225e
--- /dev/null
@@ -0,0 +1,219 @@
+"""
+Miscellaneous Utilities
+
+This module provides asyncio utilities and compatibility wrappers for
+Python 3.6 to provide some features that otherwise become available in
+Python 3.7+.
+
+Various logging and debugging utilities are also provided, such as
+`exception_summary()` and `pretty_traceback()`, used primarily for
+adding information into the logging stream.
+"""
+
+import asyncio
+import sys
+import traceback
+from typing import (
+    Any,
+    Coroutine,
+    Optional,
+    TypeVar,
+    cast,
+)
+
+
+T = TypeVar('T')
+
+
+# --------------------------
+# Section: Utility Functions
+# --------------------------
+
+
+async def flush(writer: asyncio.StreamWriter) -> None:
+    """
+    Utility function to ensure a StreamWriter is *fully* drained.
+
+    `asyncio.StreamWriter.drain` only promises we will return to below
+    the "high-water mark". This function ensures we flush the entire
+    buffer -- by setting the high water mark to 0 and then calling
+    drain. The flow control limits are restored after the call is
+    completed.
+    """
+    transport = cast(  # type: ignore[redundant-cast]
+        asyncio.WriteTransport, writer.transport
+    )
+
+    # https://github.com/python/typeshed/issues/5779
+    low, high = transport.get_write_buffer_limits()  # type: ignore
+    transport.set_write_buffer_limits(0, 0)
+    try:
+        await writer.drain()
+    finally:
+        transport.set_write_buffer_limits(high, low)
+
+
+def upper_half(func: T) -> T:
+    """
+    Do-nothing decorator that annotates a method as an "upper-half" method.
+
+    These methods must not call bottom-half functions directly, but can
+    schedule them to run.
+    """
+    return func
+
+
+def bottom_half(func: T) -> T:
+    """
+    Do-nothing decorator that annotates a method as a "bottom-half" method.
+
+    These methods must take great care to handle their own exceptions whenever
+    possible. If they go unhandled, they will cause termination of the loop.
+
+    These methods do not, in general, have the ability to directly
+    report information to a caller’s context and will usually be
+    collected as a Task result instead.
+
+    They must not call upper-half functions directly.
+    """
+    return func
+
+
+# -------------------------------
+# Section: Compatibility Wrappers
+# -------------------------------
+
+
+def create_task(coro: Coroutine[Any, Any, T],
+                loop: Optional[asyncio.AbstractEventLoop] = None
+                ) -> 'asyncio.Future[T]':
+    """
+    Python 3.6-compatible `asyncio.create_task` wrapper.
+
+    :param coro: The coroutine to execute in a task.
+    :param loop: Optionally, the loop to create the task in.
+
+    :return: An `asyncio.Future` object.
+    """
+    if sys.version_info >= (3, 7):
+        if loop is not None:
+            return loop.create_task(coro)
+        return asyncio.create_task(coro)  # pylint: disable=no-member
+
+    # Python 3.6:
+    return asyncio.ensure_future(coro, loop=loop)
+
+
+def is_closing(writer: asyncio.StreamWriter) -> bool:
+    """
+    Python 3.6-compatible `asyncio.StreamWriter.is_closing` wrapper.
+
+    :param writer: The `asyncio.StreamWriter` object.
+    :return: `True` if the writer is closing, or closed.
+    """
+    if sys.version_info >= (3, 7):
+        return writer.is_closing()
+
+    # Python 3.6:
+    transport = writer.transport
+    assert isinstance(transport, asyncio.WriteTransport)
+    return transport.is_closing()
+
+
+async def wait_closed(writer: asyncio.StreamWriter) -> None:
+    """
+    Python 3.6-compatible `asyncio.StreamWriter.wait_closed` wrapper.
+
+    :param writer: The `asyncio.StreamWriter` to wait on.
+    """
+    if sys.version_info >= (3, 7):
+        await writer.wait_closed()
+        return
+
+    # Python 3.6
+    transport = writer.transport
+    assert isinstance(transport, asyncio.WriteTransport)
+
+    while not transport.is_closing():
+        await asyncio.sleep(0)
+
+    # This is an ugly workaround, but it's the best I can come up with.
+    sock = transport.get_extra_info('socket')
+
+    if sock is None:
+        # Our transport doesn't have a socket? ...
+        # Nothing we can reasonably do.
+        return
+
+    while sock.fileno() != -1:
+        await asyncio.sleep(0)
+
+
+def asyncio_run(coro: Coroutine[Any, Any, T], *, debug: bool = False) -> T:
+    """
+    Python 3.6-compatible `asyncio.run` wrapper.
+
+    :param coro: A coroutine to execute now.
+    :return: The return value from the coroutine.
+    """
+    if sys.version_info >= (3, 7):
+        return asyncio.run(coro, debug=debug)
+
+    # Python 3.6
+    loop = asyncio.get_event_loop()
+    loop.set_debug(debug)
+    ret = loop.run_until_complete(coro)
+    loop.close()
+
+    return ret
+
+
+# ----------------------------
+# Section: Logging & Debugging
+# ----------------------------
+
+
+def exception_summary(exc: BaseException) -> str:
+    """
+    Return a summary string of an arbitrary exception.
+
+    It will be of the form "ExceptionType: Error Message", if the error
+    string is non-empty, and just "ExceptionType" otherwise.
+    """
+    name = type(exc).__qualname__
+    smod = type(exc).__module__
+    if smod not in ("__main__", "builtins"):
+        name = smod + '.' + name
+
+    error = str(exc)
+    if error:
+        return f"{name}: {error}"
+    return name
+
+
+def pretty_traceback(prefix: str = "  | ") -> str:
+    """
+    Formats the current traceback, indented to provide visual distinction.
+
+    This is useful for printing a traceback within a traceback for
+    debugging purposes when encapsulating errors to deliver them up the
+    stack; when those errors are printed, this helps provide a nice
+    visual grouping to quickly identify the parts of the error that
+    belong to the inner exception.
+
+    :param prefix: The prefix to append to each line of the traceback.
+    :return: A string, formatted something like the following::
+
+      | Traceback (most recent call last):
+      |   File "foobar.py", line 42, in arbitrary_example
+      |     foo.baz()
+      | ArbitraryError: [Errno 42] Something bad happened!
+    """
+    output = "".join(traceback.format_exception(*sys.exc_info()))
+
+    exc_lines = []
+    for line in output.split('\n'):
+        exc_lines.append(prefix + line)
+
+    # The last line is always empty, omit it
+    return "\n".join(exc_lines[:-1])
diff --git a/python/qemu/utils/README.rst b/python/qemu/utils/README.rst
new file mode 100644 (file)
index 0000000..d5f2da1
--- /dev/null
@@ -0,0 +1,7 @@
+qemu.utils package
+==================
+
+This package provides miscellaneous utilities used for testing and
+debugging QEMU. It is used primarily by the vm and avocado tests.
+
+See the documentation in ``__init__.py`` for more information.
diff --git a/python/qemu/utils/__init__.py b/python/qemu/utils/__init__.py
new file mode 100644 (file)
index 0000000..017cfdc
--- /dev/null
@@ -0,0 +1,162 @@
+"""
+QEMU development and testing utilities
+
+This package provides a small handful of utilities for performing
+various tasks not directly related to the launching of a VM.
+"""
+
+# Copyright (C) 2021 Red Hat Inc.
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#  Cleber Rosa <crosa@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+import os
+import re
+import shutil
+from subprocess import CalledProcessError
+import textwrap
+from typing import Optional
+
+# pylint: disable=import-error
+from .accel import kvm_available, list_accel, tcg_available
+
+
+__all__ = (
+    'VerboseProcessError',
+    'add_visual_margin',
+    'get_info_usernet_hostfwd_port',
+    'kvm_available',
+    'list_accel',
+    'tcg_available',
+)
+
+
+def get_info_usernet_hostfwd_port(info_usernet_output: str) -> Optional[int]:
+    """
+    Returns the port given to the hostfwd parameter via info usernet
+
+    :param info_usernet_output: output generated by hmp command "info usernet"
+    :return: the port number allocated by the hostfwd option
+    """
+    for line in info_usernet_output.split('\r\n'):
+        regex = r'TCP.HOST_FORWARD.*127\.0\.0\.1\s+(\d+)\s+10\.'
+        match = re.search(regex, line)
+        if match is not None:
+            return int(match[1])
+    return None
+
+
+# pylint: disable=too-many-arguments
+def add_visual_margin(
+        content: str = '',
+        width: Optional[int] = None,
+        name: Optional[str] = None,
+        padding: int = 1,
+        upper_left: str = '┏',
+        lower_left: str = '┗',
+        horizontal: str = '━',
+        vertical: str = '┃',
+) -> str:
+    """
+    Decorate and wrap some text with a visual decoration around it.
+
+    This function assumes that the text decoration characters are single
+    characters that display using a single monospace column.
+
+    ┏━ Example ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+    ┃ This is what this function looks like with text content that's
+    ┃ wrapped to 66 characters. The right-hand margin is left open to
+    ┃ accommodate the occasional unicode character that might make
+    ┃ predicting the total "visual" width of a line difficult. This
+    ┃ provides a visual distinction that's good-enough, though.
+    ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+    :param content: The text to wrap and decorate.
+    :param width:
+        The number of columns to use, including for the decoration
+        itself. The default (None) uses the available width of the
+        current terminal, or a fallback of 72 lines. A negative number
+        subtracts a fixed-width from the default size. The default obeys
+        the COLUMNS environment variable, if set.
+    :param name: A label to apply to the upper-left of the box.
+    :param padding: How many columns of padding to apply inside.
+    :param upper_left: Upper-left single-width text decoration character.
+    :param lower_left: Lower-left single-width text decoration character.
+    :param horizontal: Horizontal single-width text decoration character.
+    :param vertical: Vertical single-width text decoration character.
+    """
+    if width is None or width < 0:
+        avail = shutil.get_terminal_size(fallback=(72, 24))[0]
+        if width is None:
+            _width = avail
+        else:
+            _width = avail + width
+    else:
+        _width = width
+
+    prefix = vertical + (' ' * padding)
+
+    def _bar(name: Optional[str], top: bool = True) -> str:
+        ret = upper_left if top else lower_left
+        if name is not None:
+            ret += f"{horizontal} {name} "
+
+        filler_len = _width - len(ret)
+        ret += f"{horizontal * filler_len}"
+        return ret
+
+    def _wrap(line: str) -> str:
+        return os.linesep.join(
+            textwrap.wrap(
+                line, width=_width - padding, initial_indent=prefix,
+                subsequent_indent=prefix, replace_whitespace=False,
+                drop_whitespace=True, break_on_hyphens=False)
+        )
+
+    return os.linesep.join((
+        _bar(name, top=True),
+        os.linesep.join(_wrap(line) for line in content.splitlines()),
+        _bar(None, top=False),
+    ))
+
+
+class VerboseProcessError(CalledProcessError):
+    """
+    The same as CalledProcessError, but more verbose.
+
+    This is useful for debugging failed calls during test executions.
+    The return code, signal (if any), and terminal output will be displayed
+    on unhandled exceptions.
+    """
+    def summary(self) -> str:
+        """Return the normal CalledProcessError str() output."""
+        return super().__str__()
+
+    def __str__(self) -> str:
+        lmargin = '  '
+        width = -len(lmargin)
+        sections = []
+
+        # Does self.stdout contain both stdout and stderr?
+        has_combined_output = self.stderr is None
+
+        name = 'output' if has_combined_output else 'stdout'
+        if self.stdout:
+            sections.append(add_visual_margin(self.stdout, width, name))
+        else:
+            sections.append(f"{name}: N/A")
+
+        if self.stderr:
+            sections.append(add_visual_margin(self.stderr, width, 'stderr'))
+        elif not has_combined_output:
+            sections.append("stderr: N/A")
+
+        return os.linesep.join((
+            self.summary(),
+            textwrap.indent(os.linesep.join(sections), prefix=lmargin),
+        ))
diff --git a/python/qemu/utils/accel.py b/python/qemu/utils/accel.py
new file mode 100644 (file)
index 0000000..386ff64
--- /dev/null
@@ -0,0 +1,84 @@
+"""
+QEMU accel module:
+
+This module provides utilities for discover and check the availability of
+accelerators.
+"""
+# Copyright (C) 2015-2016 Red Hat Inc.
+# Copyright (C) 2012 IBM Corp.
+#
+# Authors:
+#  Fam Zheng <famz@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+import logging
+import os
+import subprocess
+from typing import List, Optional
+
+
+LOG = logging.getLogger(__name__)
+
+# Mapping host architecture to any additional architectures it can
+# support which often includes its 32 bit cousin.
+ADDITIONAL_ARCHES = {
+    "x86_64": "i386",
+    "aarch64": "armhf",
+    "ppc64le": "ppc64",
+}
+
+
+def list_accel(qemu_bin: str) -> List[str]:
+    """
+    List accelerators enabled in the QEMU binary.
+
+    @param qemu_bin (str): path to the QEMU binary.
+    @raise Exception: if failed to run ``qemu -accel help``
+    @return a list of accelerator names.
+    """
+    if not qemu_bin:
+        return []
+    try:
+        out = subprocess.check_output([qemu_bin, '-accel', 'help'],
+                                      universal_newlines=True)
+    except:
+        LOG.debug("Failed to get the list of accelerators in %s", qemu_bin)
+        raise
+    # Skip the first line which is the header.
+    return [acc.strip() for acc in out.splitlines()[1:]]
+
+
+def kvm_available(target_arch: Optional[str] = None,
+                  qemu_bin: Optional[str] = None) -> bool:
+    """
+    Check if KVM is available using the following heuristic:
+      - Kernel module is present in the host;
+      - Target and host arches don't mismatch;
+      - KVM is enabled in the QEMU binary.
+
+    @param target_arch (str): target architecture
+    @param qemu_bin (str): path to the QEMU binary
+    @return True if kvm is available, otherwise False.
+    """
+    if not os.access("/dev/kvm", os.R_OK | os.W_OK):
+        return False
+    if target_arch:
+        host_arch = os.uname()[4]
+        if target_arch != host_arch:
+            if target_arch != ADDITIONAL_ARCHES.get(host_arch):
+                return False
+    if qemu_bin and "kvm" not in list_accel(qemu_bin):
+        return False
+    return True
+
+
+def tcg_available(qemu_bin: str) -> bool:
+    """
+    Check if TCG is available.
+
+    @param qemu_bin (str): path to the QEMU binary
+    """
+    return 'tcg' in list_accel(qemu_bin)
diff --git a/python/qemu/utils/py.typed b/python/qemu/utils/py.typed
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/python/qemu/utils/qemu_ga_client.py b/python/qemu/utils/qemu_ga_client.py
new file mode 100644 (file)
index 0000000..9a665e6
--- /dev/null
@@ -0,0 +1,323 @@
+"""
+QEMU Guest Agent Client
+
+Usage:
+
+Start QEMU with:
+
+# qemu [...] -chardev socket,path=/tmp/qga.sock,server=on,wait=off,id=qga0 \
+  -device virtio-serial \
+  -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0
+
+Run the script:
+
+$ qemu-ga-client --address=/tmp/qga.sock <command> [args...]
+
+or
+
+$ export QGA_CLIENT_ADDRESS=/tmp/qga.sock
+$ qemu-ga-client <command> [args...]
+
+For example:
+
+$ qemu-ga-client cat /etc/resolv.conf
+# Generated by NetworkManager
+nameserver 10.0.2.3
+$ qemu-ga-client fsfreeze status
+thawed
+$ qemu-ga-client fsfreeze freeze
+2 filesystems frozen
+
+See also: https://wiki.qemu.org/Features/QAPI/GuestAgent
+"""
+
+# Copyright (C) 2012 Ryota Ozaki <ozaki.ryota@gmail.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+
+import argparse
+import asyncio
+import base64
+import os
+import random
+import sys
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Optional,
+    Sequence,
+)
+
+from qemu.qmp import ConnectError, SocketAddrT
+from qemu.qmp.legacy import QEMUMonitorProtocol
+
+
+# This script has not seen many patches or careful attention in quite
+# some time. If you would like to improve it, please review the design
+# carefully and add docstrings at that point in time. Until then:
+
+# pylint: disable=missing-docstring
+
+
+class QemuGuestAgent(QEMUMonitorProtocol):
+    def __getattr__(self, name: str) -> Callable[..., Any]:
+        def wrapper(**kwds: object) -> object:
+            return self.cmd('guest-' + name.replace('_', '-'), **kwds)
+        return wrapper
+
+
+class QemuGuestAgentClient:
+    def __init__(self, address: SocketAddrT):
+        self.qga = QemuGuestAgent(address)
+        self.qga.connect(negotiate=False)
+
+    def sync(self, timeout: Optional[float] = 3) -> None:
+        # Avoid being blocked forever
+        if not self.ping(timeout):
+            raise EnvironmentError('Agent seems not alive')
+        uid = random.randint(0, (1 << 32) - 1)
+        while True:
+            ret = self.qga.sync(id=uid)
+            if isinstance(ret, int) and int(ret) == uid:
+                break
+
+    def __file_read_all(self, handle: int) -> bytes:
+        eof = False
+        data = b''
+        while not eof:
+            ret = self.qga.file_read(handle=handle, count=1024)
+            _data = base64.b64decode(ret['buf-b64'])
+            data += _data
+            eof = ret['eof']
+        return data
+
+    def read(self, path: str) -> bytes:
+        handle = self.qga.file_open(path=path)
+        try:
+            data = self.__file_read_all(handle)
+        finally:
+            self.qga.file_close(handle=handle)
+        return data
+
+    def info(self) -> str:
+        info = self.qga.info()
+
+        msgs = []
+        msgs.append('version: ' + info['version'])
+        msgs.append('supported_commands:')
+        enabled = [c['name'] for c in info['supported_commands']
+                   if c['enabled']]
+        msgs.append('\tenabled: ' + ', '.join(enabled))
+        disabled = [c['name'] for c in info['supported_commands']
+                    if not c['enabled']]
+        msgs.append('\tdisabled: ' + ', '.join(disabled))
+
+        return '\n'.join(msgs)
+
+    @classmethod
+    def __gen_ipv4_netmask(cls, prefixlen: int) -> str:
+        mask = int('1' * prefixlen + '0' * (32 - prefixlen), 2)
+        return '.'.join([str(mask >> 24),
+                         str((mask >> 16) & 0xff),
+                         str((mask >> 8) & 0xff),
+                         str(mask & 0xff)])
+
+    def ifconfig(self) -> str:
+        nifs = self.qga.network_get_interfaces()
+
+        msgs = []
+        for nif in nifs:
+            msgs.append(nif['name'] + ':')
+            if 'ip-addresses' in nif:
+                for ipaddr in nif['ip-addresses']:
+                    if ipaddr['ip-address-type'] == 'ipv4':
+                        addr = ipaddr['ip-address']
+                        mask = self.__gen_ipv4_netmask(int(ipaddr['prefix']))
+                        msgs.append(f"\tinet {addr}  netmask {mask}")
+                    elif ipaddr['ip-address-type'] == 'ipv6':
+                        addr = ipaddr['ip-address']
+                        prefix = ipaddr['prefix']
+                        msgs.append(f"\tinet6 {addr}  prefixlen {prefix}")
+            if nif['hardware-address'] != '00:00:00:00:00:00':
+                msgs.append("\tether " + nif['hardware-address'])
+
+        return '\n'.join(msgs)
+
+    def ping(self, timeout: Optional[float]) -> bool:
+        self.qga.settimeout(timeout)
+        try:
+            self.qga.ping()
+        except asyncio.TimeoutError:
+            return False
+        return True
+
+    def fsfreeze(self, cmd: str) -> object:
+        if cmd not in ['status', 'freeze', 'thaw']:
+            raise ValueError('Invalid command: ' + cmd)
+        # Can be int (freeze, thaw) or GuestFsfreezeStatus (status)
+        return getattr(self.qga, 'fsfreeze' + '_' + cmd)()
+
+    def fstrim(self, minimum: int) -> Dict[str, object]:
+        # returns GuestFilesystemTrimResponse
+        ret = getattr(self.qga, 'fstrim')(minimum=minimum)
+        assert isinstance(ret, dict)
+        return ret
+
+    def suspend(self, mode: str) -> None:
+        if mode not in ['disk', 'ram', 'hybrid']:
+            raise ValueError('Invalid mode: ' + mode)
+
+        try:
+            getattr(self.qga, 'suspend' + '_' + mode)()
+            # On error exception will raise
+        except asyncio.TimeoutError:
+            # On success command will timed out
+            return
+
+    def shutdown(self, mode: str = 'powerdown') -> None:
+        if mode not in ['powerdown', 'halt', 'reboot']:
+            raise ValueError('Invalid mode: ' + mode)
+
+        try:
+            self.qga.shutdown(mode=mode)
+        except asyncio.TimeoutError:
+            pass
+
+
+def _cmd_cat(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    if len(args) != 1:
+        print('Invalid argument')
+        print('Usage: cat <file>')
+        sys.exit(1)
+    print(client.read(args[0]))
+
+
+def _cmd_fsfreeze(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    usage = 'Usage: fsfreeze status|freeze|thaw'
+    if len(args) != 1:
+        print('Invalid argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['status', 'freeze', 'thaw']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    cmd = args[0]
+    ret = client.fsfreeze(cmd)
+    if cmd == 'status':
+        print(ret)
+        return
+
+    assert isinstance(ret, int)
+    verb = 'frozen' if cmd == 'freeze' else 'thawed'
+    print(f"{ret:d} filesystems {verb}")
+
+
+def _cmd_fstrim(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    if len(args) == 0:
+        minimum = 0
+    else:
+        minimum = int(args[0])
+    print(client.fstrim(minimum))
+
+
+def _cmd_ifconfig(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    assert not args
+    print(client.ifconfig())
+
+
+def _cmd_info(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    assert not args
+    print(client.info())
+
+
+def _cmd_ping(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    timeout = 3.0 if len(args) == 0 else float(args[0])
+    alive = client.ping(timeout)
+    if not alive:
+        print("Not responded in %s sec" % args[0])
+        sys.exit(1)
+
+
+def _cmd_suspend(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    usage = 'Usage: suspend disk|ram|hybrid'
+    if len(args) != 1:
+        print('Less argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['disk', 'ram', 'hybrid']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    client.suspend(args[0])
+
+
+def _cmd_shutdown(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    assert not args
+    client.shutdown()
+
+
+_cmd_powerdown = _cmd_shutdown
+
+
+def _cmd_halt(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    assert not args
+    client.shutdown('halt')
+
+
+def _cmd_reboot(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    assert not args
+    client.shutdown('reboot')
+
+
+commands = [m.replace('_cmd_', '') for m in dir() if '_cmd_' in m]
+
+
+def send_command(address: str, cmd: str, args: Sequence[str]) -> None:
+    if not os.path.exists(address):
+        print(f"'{address}' not found. (Is QEMU running?)")
+        sys.exit(1)
+
+    if cmd not in commands:
+        print('Invalid command: ' + cmd)
+        print('Available commands: ' + ', '.join(commands))
+        sys.exit(1)
+
+    try:
+        client = QemuGuestAgentClient(address)
+    except ConnectError as err:
+        print(err)
+        if isinstance(err.exc, ConnectionError):
+            print('(Is QEMU running?)')
+        sys.exit(1)
+
+    if cmd == 'fsfreeze' and args[0] == 'freeze':
+        client.sync(60)
+    elif cmd != 'ping':
+        client.sync()
+
+    globals()['_cmd_' + cmd](client, args)
+
+
+def main() -> None:
+    address = os.environ.get('QGA_CLIENT_ADDRESS')
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--address', action='store',
+                        default=address,
+                        help='Specify a ip:port pair or a unix socket path')
+    parser.add_argument('command', choices=commands)
+    parser.add_argument('args', nargs='*')
+
+    args = parser.parse_args()
+    if args.address is None:
+        parser.error('address is not specified')
+        sys.exit(1)
+
+    send_command(args.address, args.command, args.args)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/python/qemu/utils/qom.py b/python/qemu/utils/qom.py
new file mode 100644 (file)
index 0000000..426a0f2
--- /dev/null
@@ -0,0 +1,273 @@
+"""
+QEMU Object Model testing tools.
+
+usage: qom [-h] {set,get,list,tree,fuse} ...
+
+Query and manipulate QOM data
+
+optional arguments:
+  -h, --help           show this help message and exit
+
+QOM commands:
+  {set,get,list,tree,fuse}
+    set                Set a QOM property value
+    get                Get a QOM property value
+    list               List QOM properties at a given path
+    tree               Show QOM tree from a given path
+    fuse               Mount a QOM tree as a FUSE filesystem
+"""
+##
+# Copyright John Snow 2020, for Red Hat, Inc.
+# Copyright IBM, Corp. 2011
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#  Anthony Liguori <aliguori@amazon.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+#
+# Based on ./scripts/qmp/qom-[set|get|tree|list]
+##
+
+import argparse
+
+from qemu.qmp import ExecuteError
+
+from .qom_common import QOMCommand
+
+
+try:
+    from .qom_fuse import QOMFuse
+except ModuleNotFoundError as _err:
+    if _err.name != 'fuse':
+        raise
+else:
+    assert issubclass(QOMFuse, QOMCommand)
+
+
+class QOMSet(QOMCommand):
+    """
+    QOM Command - Set a property to a given value.
+
+    usage: qom-set [-h] [--socket SOCKET] <path>.<property> <value>
+
+    Set a QOM property value
+
+    positional arguments:
+      <path>.<property>     QOM path and property, separated by a period '.'
+      <value>               new QOM property value
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      --socket SOCKET, -s SOCKET
+                            QMP socket path or address (addr:port). May also be
+                            set via QMP_SOCKET environment variable.
+    """
+    name = 'set'
+    help = 'Set a QOM property value'
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        super().configure_parser(parser)
+        cls.add_path_prop_arg(parser)
+        parser.add_argument(
+            'value',
+            metavar='<value>',
+            action='store',
+            help='new QOM property value'
+        )
+
+    def __init__(self, args: argparse.Namespace):
+        super().__init__(args)
+        self.path, self.prop = args.path_prop.rsplit('.', 1)
+        self.value = args.value
+
+    def run(self) -> int:
+        rsp = self.qmp.cmd(
+            'qom-set',
+            path=self.path,
+            property=self.prop,
+            value=self.value
+        )
+        print(rsp)
+        return 0
+
+
+class QOMGet(QOMCommand):
+    """
+    QOM Command - Get a property's current value.
+
+    usage: qom-get [-h] [--socket SOCKET] <path>.<property>
+
+    Get a QOM property value
+
+    positional arguments:
+      <path>.<property>     QOM path and property, separated by a period '.'
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      --socket SOCKET, -s SOCKET
+                            QMP socket path or address (addr:port). May also be
+                            set via QMP_SOCKET environment variable.
+    """
+    name = 'get'
+    help = 'Get a QOM property value'
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        super().configure_parser(parser)
+        cls.add_path_prop_arg(parser)
+
+    def __init__(self, args: argparse.Namespace):
+        super().__init__(args)
+        try:
+            tmp = args.path_prop.rsplit('.', 1)
+        except ValueError as err:
+            raise ValueError('Invalid format for <path>.<property>') from err
+        self.path = tmp[0]
+        self.prop = tmp[1]
+
+    def run(self) -> int:
+        rsp = self.qmp.cmd(
+            'qom-get',
+            path=self.path,
+            property=self.prop
+        )
+        if isinstance(rsp, dict):
+            for key, value in rsp.items():
+                print(f"{key}: {value}")
+        else:
+            print(rsp)
+        return 0
+
+
+class QOMList(QOMCommand):
+    """
+    QOM Command - List the properties at a given path.
+
+    usage: qom-list [-h] [--socket SOCKET] <path>
+
+    List QOM properties at a given path
+
+    positional arguments:
+      <path>                QOM path
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      --socket SOCKET, -s SOCKET
+                            QMP socket path or address (addr:port). May also be
+                            set via QMP_SOCKET environment variable.
+    """
+    name = 'list'
+    help = 'List QOM properties at a given path'
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        super().configure_parser(parser)
+        parser.add_argument(
+            'path',
+            metavar='<path>',
+            action='store',
+            help='QOM path',
+        )
+
+    def __init__(self, args: argparse.Namespace):
+        super().__init__(args)
+        self.path = args.path
+
+    def run(self) -> int:
+        rsp = self.qom_list(self.path)
+        for item in rsp:
+            if item.child:
+                print(f"{item.name}/")
+            elif item.link:
+                print(f"@{item.name}/")
+            else:
+                print(item.name)
+        return 0
+
+
+class QOMTree(QOMCommand):
+    """
+    QOM Command - Show the full tree below a given path.
+
+    usage: qom-tree [-h] [--socket SOCKET] [<path>]
+
+    Show QOM tree from a given path
+
+    positional arguments:
+      <path>                QOM path
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      --socket SOCKET, -s SOCKET
+                            QMP socket path or address (addr:port). May also be
+                            set via QMP_SOCKET environment variable.
+    """
+    name = 'tree'
+    help = 'Show QOM tree from a given path'
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        super().configure_parser(parser)
+        parser.add_argument(
+            'path',
+            metavar='<path>',
+            action='store',
+            help='QOM path',
+            nargs='?',
+            default='/'
+        )
+
+    def __init__(self, args: argparse.Namespace):
+        super().__init__(args)
+        self.path = args.path
+
+    def _list_node(self, path: str) -> None:
+        print(path)
+        items = self.qom_list(path)
+        for item in items:
+            if item.child:
+                continue
+            try:
+                rsp = self.qmp.cmd('qom-get', path=path,
+                                   property=item.name)
+                print(f"  {item.name}: {rsp} ({item.type})")
+            except ExecuteError as err:
+                print(f"  {item.name}: <EXCEPTION: {err!s}> ({item.type})")
+        print('')
+        for item in items:
+            if not item.child:
+                continue
+            if path == '/':
+                path = ''
+            self._list_node(f"{path}/{item.name}")
+
+    def run(self) -> int:
+        self._list_node(self.path)
+        return 0
+
+
+def main() -> int:
+    """QOM script main entry point."""
+    parser = argparse.ArgumentParser(
+        description='Query and manipulate QOM data'
+    )
+    subparsers = parser.add_subparsers(
+        title='QOM commands',
+        dest='command'
+    )
+
+    for command in QOMCommand.__subclasses__():
+        command.register(subparsers)
+
+    args = parser.parse_args()
+
+    if args.command is None:
+        parser.error('Command not specified.')
+        return 1
+
+    cmd_class = args.cmd_class
+    assert isinstance(cmd_class, type(QOMCommand))
+    return cmd_class.command_runner(args)
diff --git a/python/qemu/utils/qom_common.py b/python/qemu/utils/qom_common.py
new file mode 100644 (file)
index 0000000..dd2c8b1
--- /dev/null
@@ -0,0 +1,175 @@
+"""
+QOM Command abstractions.
+"""
+##
+# Copyright John Snow 2020, for Red Hat, Inc.
+# Copyright IBM, Corp. 2011
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#  Anthony Liguori <aliguori@amazon.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+#
+# Based on ./scripts/qmp/qom-[set|get|tree|list]
+##
+
+import argparse
+import os
+import sys
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Type,
+    TypeVar,
+)
+
+from qemu.qmp import QMPError
+from qemu.qmp.legacy import QEMUMonitorProtocol
+
+
+class ObjectPropertyInfo:
+    """
+    Represents the return type from e.g. qom-list.
+    """
+    def __init__(self, name: str, type_: str,
+                 description: Optional[str] = None,
+                 default_value: Optional[object] = None):
+        self.name = name
+        self.type = type_
+        self.description = description
+        self.default_value = default_value
+
+    @classmethod
+    def make(cls, value: Dict[str, Any]) -> 'ObjectPropertyInfo':
+        """
+        Build an ObjectPropertyInfo from a Dict with an unknown shape.
+        """
+        assert value.keys() >= {'name', 'type'}
+        assert value.keys() <= {'name', 'type', 'description', 'default-value'}
+        return cls(value['name'], value['type'],
+                   value.get('description'),
+                   value.get('default-value'))
+
+    @property
+    def child(self) -> bool:
+        """Is this property a child property?"""
+        return self.type.startswith('child<')
+
+    @property
+    def link(self) -> bool:
+        """Is this property a link property?"""
+        return self.type.startswith('link<')
+
+
+CommandT = TypeVar('CommandT', bound='QOMCommand')
+
+
+class QOMCommand:
+    """
+    Represents a QOM sub-command.
+
+    :param args: Parsed arguments, as returned from parser.parse_args.
+    """
+    name: str
+    help: str
+
+    def __init__(self, args: argparse.Namespace):
+        if args.socket is None:
+            raise QMPError("No QMP socket path or address given")
+        self.qmp = QEMUMonitorProtocol(
+            QEMUMonitorProtocol.parse_address(args.socket)
+        )
+        self.qmp.connect()
+
+    @classmethod
+    def register(cls, subparsers: Any) -> None:
+        """
+        Register this command with the argument parser.
+
+        :param subparsers: argparse subparsers object, from "add_subparsers".
+        """
+        subparser = subparsers.add_parser(cls.name, help=cls.help,
+                                          description=cls.help)
+        cls.configure_parser(subparser)
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        """
+        Configure a parser with this command's arguments.
+
+        :param parser: argparse parser or subparser object.
+        """
+        default_path = os.environ.get('QMP_SOCKET')
+        parser.add_argument(
+            '--socket', '-s',
+            dest='socket',
+            action='store',
+            help='QMP socket path or address (addr:port).'
+            ' May also be set via QMP_SOCKET environment variable.',
+            default=default_path
+        )
+        parser.set_defaults(cmd_class=cls)
+
+    @classmethod
+    def add_path_prop_arg(cls, parser: argparse.ArgumentParser) -> None:
+        """
+        Add the <path>.<proptery> positional argument to this command.
+
+        :param parser: The parser to add the argument to.
+        """
+        parser.add_argument(
+            'path_prop',
+            metavar='<path>.<property>',
+            action='store',
+            help="QOM path and property, separated by a period '.'"
+        )
+
+    def run(self) -> int:
+        """
+        Run this command.
+
+        :return: 0 on success, 1 otherwise.
+        """
+        raise NotImplementedError
+
+    def qom_list(self, path: str) -> List[ObjectPropertyInfo]:
+        """
+        :return: a strongly typed list from the 'qom-list' command.
+        """
+        rsp = self.qmp.cmd('qom-list', path=path)
+        # qom-list returns List[ObjectPropertyInfo]
+        assert isinstance(rsp, list)
+        return [ObjectPropertyInfo.make(x) for x in rsp]
+
+    @classmethod
+    def command_runner(
+            cls: Type[CommandT],
+            args: argparse.Namespace
+    ) -> int:
+        """
+        Run a fully-parsed subcommand, with error-handling for the CLI.
+
+        :return: The return code from `run()`.
+        """
+        try:
+            cmd = cls(args)
+            return cmd.run()
+        except QMPError as err:
+            print(f"{type(err).__name__}: {err!s}", file=sys.stderr)
+            return -1
+
+    @classmethod
+    def entry_point(cls) -> int:
+        """
+        Build this command's parser, parse arguments, and run the command.
+
+        :return: `run`'s return code.
+        """
+        parser = argparse.ArgumentParser(description=cls.help)
+        cls.configure_parser(parser)
+        args = parser.parse_args()
+        return cls.command_runner(args)
diff --git a/python/qemu/utils/qom_fuse.py b/python/qemu/utils/qom_fuse.py
new file mode 100644 (file)
index 0000000..cf7e344
--- /dev/null
@@ -0,0 +1,207 @@
+"""
+QEMU Object Model FUSE filesystem tool
+
+This script offers a simple FUSE filesystem within which the QOM tree
+may be browsed, queried and edited using traditional shell tooling.
+
+This script requires the 'fusepy' python package.
+
+
+usage: qom-fuse [-h] [--socket SOCKET] <mount>
+
+Mount a QOM tree as a FUSE filesystem
+
+positional arguments:
+  <mount>               Mount point
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --socket SOCKET, -s SOCKET
+                        QMP socket path or address (addr:port). May also be
+                        set via QMP_SOCKET environment variable.
+"""
+##
+# Copyright IBM, Corp. 2012
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# Authors:
+#  Anthony Liguori   <aliguori@us.ibm.com>
+#  Markus Armbruster <armbru@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+##
+
+import argparse
+from errno import ENOENT, EPERM
+import stat
+import sys
+from typing import (
+    IO,
+    Dict,
+    Iterator,
+    Mapping,
+    Optional,
+    Union,
+)
+
+import fuse
+from fuse import FUSE, FuseOSError, Operations
+
+from qemu.qmp import ExecuteError
+
+from .qom_common import QOMCommand
+
+
+fuse.fuse_python_api = (0, 2)
+
+
+class QOMFuse(QOMCommand, Operations):
+    """
+    QOMFuse implements both fuse.Operations and QOMCommand.
+
+    Operations implements the FS, and QOMCommand implements the CLI command.
+    """
+    name = 'fuse'
+    help = 'Mount a QOM tree as a FUSE filesystem'
+    fuse: FUSE
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        super().configure_parser(parser)
+        parser.add_argument(
+            'mount',
+            metavar='<mount>',
+            action='store',
+            help="Mount point",
+        )
+
+    def __init__(self, args: argparse.Namespace):
+        super().__init__(args)
+        self.mount = args.mount
+        self.ino_map: Dict[str, int] = {}
+        self.ino_count = 1
+
+    def run(self) -> int:
+        print(f"Mounting QOMFS to '{self.mount}'", file=sys.stderr)
+        self.fuse = FUSE(self, self.mount, foreground=True)
+        return 0
+
+    def get_ino(self, path: str) -> int:
+        """Get an inode number for a given QOM path."""
+        if path in self.ino_map:
+            return self.ino_map[path]
+        self.ino_map[path] = self.ino_count
+        self.ino_count += 1
+        return self.ino_map[path]
+
+    def is_object(self, path: str) -> bool:
+        """Is the given QOM path an object?"""
+        try:
+            self.qom_list(path)
+            return True
+        except ExecuteError:
+            return False
+
+    def is_property(self, path: str) -> bool:
+        """Is the given QOM path a property?"""
+        path, prop = path.rsplit('/', 1)
+        if path == '':
+            path = '/'
+        try:
+            for item in self.qom_list(path):
+                if item.name == prop:
+                    return True
+            return False
+        except ExecuteError:
+            return False
+
+    def is_link(self, path: str) -> bool:
+        """Is the given QOM path a link?"""
+        path, prop = path.rsplit('/', 1)
+        if path == '':
+            path = '/'
+        try:
+            for item in self.qom_list(path):
+                if item.name == prop and item.link:
+                    return True
+            return False
+        except ExecuteError:
+            return False
+
+    def read(self, path: str, size: int, offset: int, fh: IO[bytes]) -> bytes:
+        if not self.is_property(path):
+            raise FuseOSError(ENOENT)
+
+        path, prop = path.rsplit('/', 1)
+        if path == '':
+            path = '/'
+        try:
+            data = str(self.qmp.cmd('qom-get', path=path, property=prop))
+            data += '\n'  # make values shell friendly
+        except ExecuteError as err:
+            raise FuseOSError(EPERM) from err
+
+        if offset > len(data):
+            return b''
+
+        return bytes(data[offset:][:size], encoding='utf-8')
+
+    def readlink(self, path: str) -> Union[bool, str]:
+        if not self.is_link(path):
+            return False
+        path, prop = path.rsplit('/', 1)
+        prefix = '/'.join(['..'] * (len(path.split('/')) - 1))
+        return prefix + str(self.qmp.cmd('qom-get', path=path,
+                                         property=prop))
+
+    def getattr(self, path: str,
+                fh: Optional[IO[bytes]] = None) -> Mapping[str, object]:
+        if self.is_link(path):
+            value = {
+                'st_mode': 0o755 | stat.S_IFLNK,
+                'st_ino': self.get_ino(path),
+                'st_dev': 0,
+                'st_nlink': 2,
+                'st_uid': 1000,
+                'st_gid': 1000,
+                'st_size': 4096,
+                'st_atime': 0,
+                'st_mtime': 0,
+                'st_ctime': 0
+            }
+        elif self.is_object(path):
+            value = {
+                'st_mode': 0o755 | stat.S_IFDIR,
+                'st_ino': self.get_ino(path),
+                'st_dev': 0,
+                'st_nlink': 2,
+                'st_uid': 1000,
+                'st_gid': 1000,
+                'st_size': 4096,
+                'st_atime': 0,
+                'st_mtime': 0,
+                'st_ctime': 0
+            }
+        elif self.is_property(path):
+            value = {
+                'st_mode': 0o644 | stat.S_IFREG,
+                'st_ino': self.get_ino(path),
+                'st_dev': 0,
+                'st_nlink': 1,
+                'st_uid': 1000,
+                'st_gid': 1000,
+                'st_size': 4096,
+                'st_atime': 0,
+                'st_mtime': 0,
+                'st_ctime': 0
+            }
+        else:
+            raise FuseOSError(ENOENT)
+        return value
+
+    def readdir(self, path: str, fh: IO[bytes]) -> Iterator[str]:
+        yield '.'
+        yield '..'
+        for item in self.qom_list(path):
+            yield item.name
diff --git a/python/scripts/mkvenv.py b/python/scripts/mkvenv.py
new file mode 100644 (file)
index 0000000..d0b9c21
--- /dev/null
@@ -0,0 +1,988 @@
+"""
+mkvenv - QEMU pyvenv bootstrapping utility
+
+usage: mkvenv [-h] command ...
+
+QEMU pyvenv bootstrapping utility
+
+options:
+  -h, --help  show this help message and exit
+
+Commands:
+  command     Description
+    create    create a venv
+    post_init
+              post-venv initialization
+    ensure    Ensure that the specified package is installed.
+    ensuregroup
+              Ensure that the specified package group is installed.
+
+--------------------------------------------------
+
+usage: mkvenv create [-h] target
+
+positional arguments:
+  target      Target directory to install virtual environment into.
+
+options:
+  -h, --help  show this help message and exit
+
+--------------------------------------------------
+
+usage: mkvenv post_init [-h]
+
+options:
+  -h, --help         show this help message and exit
+
+--------------------------------------------------
+
+usage: mkvenv ensure [-h] [--online] [--dir DIR] dep_spec...
+
+positional arguments:
+  dep_spec    PEP 508 Dependency specification, e.g. 'meson>=0.61.5'
+
+options:
+  -h, --help  show this help message and exit
+  --online    Install packages from PyPI, if necessary.
+  --dir DIR   Path to vendored packages where we may install from.
+
+--------------------------------------------------
+
+usage: mkvenv ensuregroup [-h] [--online] [--dir DIR] file group...
+
+positional arguments:
+  file        pointer to a TOML file
+  group       section name in the TOML file
+
+options:
+  -h, --help  show this help message and exit
+  --online    Install packages from PyPI, if necessary.
+  --dir DIR   Path to vendored packages where we may install from.
+
+"""
+
+# Copyright (C) 2022-2023 Red Hat, Inc.
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#  Paolo Bonzini <pbonzini@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later. See the COPYING file in the top-level directory.
+
+import argparse
+from importlib.metadata import (
+    Distribution,
+    EntryPoint,
+    PackageNotFoundError,
+    distribution,
+    version,
+)
+from importlib.util import find_spec
+import logging
+import os
+from pathlib import Path
+import re
+import shutil
+import site
+import subprocess
+import sys
+import sysconfig
+from types import SimpleNamespace
+from typing import (
+    Any,
+    Dict,
+    Iterator,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+)
+import venv
+
+
+# Try to load distlib, with a fallback to pip's vendored version.
+# HAVE_DISTLIB is checked below, just-in-time, so that mkvenv does not fail
+# outside the venv or before a potential call to ensurepip in checkpip().
+HAVE_DISTLIB = True
+try:
+    import distlib.scripts
+    import distlib.version
+except ImportError:
+    try:
+        # Reach into pip's cookie jar.  pylint and flake8 don't understand
+        # that these imports will be used via distlib.xxx.
+        from pip._vendor import distlib
+        import pip._vendor.distlib.scripts  # noqa, pylint: disable=unused-import
+        import pip._vendor.distlib.version  # noqa, pylint: disable=unused-import
+    except ImportError:
+        HAVE_DISTLIB = False
+
+# Try to load tomllib, with a fallback to tomli.
+# HAVE_TOMLLIB is checked below, just-in-time, so that mkvenv does not fail
+# outside the venv or before a potential call to ensurepip in checkpip().
+HAVE_TOMLLIB = True
+try:
+    import tomllib
+except ImportError:
+    try:
+        import tomli as tomllib
+    except ImportError:
+        HAVE_TOMLLIB = False
+
+# Do not add any mandatory dependencies from outside the stdlib:
+# This script *must* be usable standalone!
+
+DirType = Union[str, bytes, "os.PathLike[str]", "os.PathLike[bytes]"]
+logger = logging.getLogger("mkvenv")
+
+
+def inside_a_venv() -> bool:
+    """Returns True if it is executed inside of a virtual environment."""
+    return sys.prefix != sys.base_prefix
+
+
+class Ouch(RuntimeError):
+    """An Exception class we can't confuse with a builtin."""
+
+
+class QemuEnvBuilder(venv.EnvBuilder):
+    """
+    An extension of venv.EnvBuilder for building QEMU's configure-time venv.
+
+    The primary difference is that it emulates a "nested" virtual
+    environment when invoked from inside of an existing virtual
+    environment by including packages from the parent.  Also,
+    "ensurepip" is replaced if possible with just recreating pip's
+    console_scripts inside the virtual environment.
+
+    Parameters for base class init:
+      - system_site_packages: bool = False
+      - clear: bool = False
+      - symlinks: bool = False
+      - upgrade: bool = False
+      - with_pip: bool = False
+      - prompt: Optional[str] = None
+      - upgrade_deps: bool = False             (Since 3.9)
+    """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        logger.debug("QemuEnvBuilder.__init__(...)")
+
+        # For nested venv emulation:
+        self.use_parent_packages = False
+        if inside_a_venv():
+            # Include parent packages only if we're in a venv and
+            # system_site_packages was True.
+            self.use_parent_packages = kwargs.pop(
+                "system_site_packages", False
+            )
+            # Include system_site_packages only when the parent,
+            # The venv we are currently in, also does so.
+            kwargs["system_site_packages"] = sys.base_prefix in site.PREFIXES
+
+        # ensurepip is slow: venv creation can be very fast for cases where
+        # we allow the use of system_site_packages. Therefore, ensurepip is
+        # replaced with our own script generation once the virtual environment
+        # is setup.
+        self.want_pip = kwargs.get("with_pip", False)
+        if self.want_pip:
+            if (
+                kwargs.get("system_site_packages", False)
+                and not need_ensurepip()
+            ):
+                kwargs["with_pip"] = False
+            else:
+                check_ensurepip()
+
+        super().__init__(*args, **kwargs)
+
+        # Make the context available post-creation:
+        self._context: Optional[SimpleNamespace] = None
+
+    def get_parent_libpath(self) -> Optional[str]:
+        """Return the libpath of the parent venv, if applicable."""
+        if self.use_parent_packages:
+            return sysconfig.get_path("purelib")
+        return None
+
+    @staticmethod
+    def compute_venv_libpath(context: SimpleNamespace) -> str:
+        """
+        Compatibility wrapper for context.lib_path for Python < 3.12
+        """
+        # Python 3.12+, not strictly necessary because it's documented
+        # to be the same as 3.10 code below:
+        if sys.version_info >= (3, 12):
+            return context.lib_path
+
+        # Python 3.10+
+        if "venv" in sysconfig.get_scheme_names():
+            lib_path = sysconfig.get_path(
+                "purelib", scheme="venv", vars={"base": context.env_dir}
+            )
+            assert lib_path is not None
+            return lib_path
+
+        # For Python <= 3.9 we need to hardcode this. Fortunately the
+        # code below was the same in Python 3.6-3.10, so there is only
+        # one case.
+        if sys.platform == "win32":
+            return os.path.join(context.env_dir, "Lib", "site-packages")
+        return os.path.join(
+            context.env_dir,
+            "lib",
+            "python%d.%d" % sys.version_info[:2],
+            "site-packages",
+        )
+
+    def ensure_directories(self, env_dir: DirType) -> SimpleNamespace:
+        logger.debug("ensure_directories(env_dir=%s)", env_dir)
+        self._context = super().ensure_directories(env_dir)
+        return self._context
+
+    def create(self, env_dir: DirType) -> None:
+        logger.debug("create(env_dir=%s)", env_dir)
+        super().create(env_dir)
+        assert self._context is not None
+        self.post_post_setup(self._context)
+
+    def post_post_setup(self, context: SimpleNamespace) -> None:
+        """
+        The final, final hook. Enter the venv and run commands inside of it.
+        """
+        if self.use_parent_packages:
+            # We're inside of a venv and we want to include the parent
+            # venv's packages.
+            parent_libpath = self.get_parent_libpath()
+            assert parent_libpath is not None
+            logger.debug("parent_libpath: %s", parent_libpath)
+
+            our_libpath = self.compute_venv_libpath(context)
+            logger.debug("our_libpath: %s", our_libpath)
+
+            pth_file = os.path.join(our_libpath, "nested.pth")
+            with open(pth_file, "w", encoding="UTF-8") as file:
+                file.write(parent_libpath + os.linesep)
+
+        if self.want_pip:
+            args = [
+                context.env_exe,
+                __file__,
+                "post_init",
+            ]
+            subprocess.run(args, check=True)
+
+    def get_value(self, field: str) -> str:
+        """
+        Get a string value from the context namespace after a call to build.
+
+        For valid field names, see:
+        https://docs.python.org/3/library/venv.html#venv.EnvBuilder.ensure_directories
+        """
+        ret = getattr(self._context, field)
+        assert isinstance(ret, str)
+        return ret
+
+
+def need_ensurepip() -> bool:
+    """
+    Tests for the presence of setuptools and pip.
+
+    :return: `True` if we do not detect both packages.
+    """
+    # Don't try to actually import them, it's fraught with danger:
+    # https://github.com/pypa/setuptools/issues/2993
+    if find_spec("setuptools") and find_spec("pip"):
+        return False
+    return True
+
+
+def check_ensurepip() -> None:
+    """
+    Check that we have ensurepip.
+
+    Raise a fatal exception with a helpful hint if it isn't available.
+    """
+    if not find_spec("ensurepip"):
+        msg = (
+            "Python's ensurepip module is not found.\n"
+            "It's normally part of the Python standard library, "
+            "maybe your distribution packages it separately?\n"
+            "Either install ensurepip, or alleviate the need for it in the "
+            "first place by installing pip and setuptools for "
+            f"'{sys.executable}'.\n"
+            "(Hint: Debian puts ensurepip in its python3-venv package.)"
+        )
+        raise Ouch(msg)
+
+    # ensurepip uses pyexpat, which can also go missing on us:
+    if not find_spec("pyexpat"):
+        msg = (
+            "Python's pyexpat module is not found.\n"
+            "It's normally part of the Python standard library, "
+            "maybe your distribution packages it separately?\n"
+            "Either install pyexpat, or alleviate the need for it in the "
+            "first place by installing pip and setuptools for "
+            f"'{sys.executable}'.\n\n"
+            "(Hint: NetBSD's pkgsrc debundles this to e.g. 'py310-expat'.)"
+        )
+        raise Ouch(msg)
+
+
+def make_venv(  # pylint: disable=too-many-arguments
+    env_dir: Union[str, Path],
+    system_site_packages: bool = False,
+    clear: bool = True,
+    symlinks: Optional[bool] = None,
+    with_pip: bool = True,
+) -> None:
+    """
+    Create a venv using `QemuEnvBuilder`.
+
+    This is analogous to the `venv.create` module-level convenience
+    function that is part of the Python stdblib, except it uses
+    `QemuEnvBuilder` instead.
+
+    :param env_dir: The directory to create/install to.
+    :param system_site_packages:
+        Allow inheriting packages from the system installation.
+    :param clear: When True, fully remove any prior venv and files.
+    :param symlinks:
+        Whether to use symlinks to the target interpreter or not. If
+        left unspecified, it will use symlinks except on Windows to
+        match behavior with the "venv" CLI tool.
+    :param with_pip:
+        Whether to install "pip" binaries or not.
+    """
+    logger.debug(
+        "%s: make_venv(env_dir=%s, system_site_packages=%s, "
+        "clear=%s, symlinks=%s, with_pip=%s)",
+        __file__,
+        str(env_dir),
+        system_site_packages,
+        clear,
+        symlinks,
+        with_pip,
+    )
+
+    if symlinks is None:
+        # Default behavior of standard venv CLI
+        symlinks = os.name != "nt"
+
+    builder = QemuEnvBuilder(
+        system_site_packages=system_site_packages,
+        clear=clear,
+        symlinks=symlinks,
+        with_pip=with_pip,
+    )
+
+    style = "non-isolated" if builder.system_site_packages else "isolated"
+    nested = ""
+    if builder.use_parent_packages:
+        nested = f"(with packages from '{builder.get_parent_libpath()}') "
+    print(
+        f"mkvenv: Creating {style} virtual environment"
+        f" {nested}at '{str(env_dir)}'",
+        file=sys.stderr,
+    )
+
+    try:
+        logger.debug("Invoking builder.create()")
+        try:
+            builder.create(str(env_dir))
+        except SystemExit as exc:
+            # Some versions of the venv module raise SystemExit; *nasty*!
+            # We want the exception that prompted it. It might be a subprocess
+            # error that has output we *really* want to see.
+            logger.debug("Intercepted SystemExit from EnvBuilder.create()")
+            raise exc.__cause__ or exc.__context__ or exc
+        logger.debug("builder.create() finished")
+    except subprocess.CalledProcessError as exc:
+        logger.error("mkvenv subprocess failed:")
+        logger.error("cmd: %s", exc.cmd)
+        logger.error("returncode: %d", exc.returncode)
+
+        def _stringify(data: Union[str, bytes]) -> str:
+            if isinstance(data, bytes):
+                return data.decode()
+            return data
+
+        lines = []
+        if exc.stdout:
+            lines.append("========== stdout ==========")
+            lines.append(_stringify(exc.stdout))
+            lines.append("============================")
+        if exc.stderr:
+            lines.append("========== stderr ==========")
+            lines.append(_stringify(exc.stderr))
+            lines.append("============================")
+        if lines:
+            logger.error(os.linesep.join(lines))
+
+        raise Ouch("VENV creation subprocess failed.") from exc
+
+    # print the python executable to stdout for configure.
+    print(builder.get_value("env_exe"))
+
+
+def _get_entry_points(packages: Sequence[str]) -> Iterator[str]:
+
+    def _generator() -> Iterator[str]:
+        for package in packages:
+            try:
+                entry_points: Iterator[EntryPoint] = \
+                    iter(distribution(package).entry_points)
+            except PackageNotFoundError:
+                continue
+
+            # The EntryPoints type is only available in 3.10+,
+            # treat this as a vanilla list and filter it ourselves.
+            entry_points = filter(
+                lambda ep: ep.group == "console_scripts", entry_points
+            )
+
+            for entry_point in entry_points:
+                yield f"{entry_point.name} = {entry_point.value}"
+
+    return _generator()
+
+
+def generate_console_scripts(
+    packages: Sequence[str],
+    python_path: Optional[str] = None,
+    bin_path: Optional[str] = None,
+) -> None:
+    """
+    Generate script shims for console_script entry points in @packages.
+    """
+    if python_path is None:
+        python_path = sys.executable
+    if bin_path is None:
+        bin_path = sysconfig.get_path("scripts")
+        assert bin_path is not None
+
+    logger.debug(
+        "generate_console_scripts(packages=%s, python_path=%s, bin_path=%s)",
+        packages,
+        python_path,
+        bin_path,
+    )
+
+    if not packages:
+        return
+
+    maker = distlib.scripts.ScriptMaker(None, bin_path)
+    maker.variants = {""}
+    maker.clobber = False
+
+    for entry_point in _get_entry_points(packages):
+        for filename in maker.make(entry_point):
+            logger.debug("wrote console_script '%s'", filename)
+
+
+def pkgname_from_depspec(dep_spec: str) -> str:
+    """
+    Parse package name out of a PEP-508 depspec.
+
+    See https://peps.python.org/pep-0508/#names
+    """
+    match = re.match(
+        r"^([A-Z0-9]([A-Z0-9._-]*[A-Z0-9])?)", dep_spec, re.IGNORECASE
+    )
+    if not match:
+        raise ValueError(
+            f"dep_spec '{dep_spec}'"
+            " does not appear to contain a valid package name"
+        )
+    return match.group(0)
+
+
+def _path_is_prefix(prefix: Optional[str], path: str) -> bool:
+    try:
+        return (
+            prefix is not None and os.path.commonpath([prefix, path]) == prefix
+        )
+    except ValueError:
+        return False
+
+
+def _is_system_package(dist: Distribution) -> bool:
+    path = str(dist.locate_file("."))
+    return not (
+        _path_is_prefix(sysconfig.get_path("purelib"), path)
+        or _path_is_prefix(sysconfig.get_path("platlib"), path)
+    )
+
+
+def diagnose(
+    dep_spec: str,
+    online: bool,
+    wheels_dir: Optional[Union[str, Path]],
+    prog: Optional[str],
+) -> Tuple[str, bool]:
+    """
+    Offer a summary to the user as to why a package failed to be installed.
+
+    :param dep_spec: The package we tried to ensure, e.g. 'meson>=0.61.5'
+    :param online: Did we allow PyPI access?
+    :param prog:
+        Optionally, a shell program name that can be used as a
+        bellwether to detect if this program is installed elsewhere on
+        the system. This is used to offer advice when a program is
+        detected for a different python version.
+    :param wheels_dir:
+        Optionally, a directory that was searched for vendored packages.
+    """
+    # pylint: disable=too-many-branches
+
+    # Some errors are not particularly serious
+    bad = False
+
+    pkg_name = pkgname_from_depspec(dep_spec)
+    pkg_version: Optional[str] = None
+    try:
+        pkg_version = version(pkg_name)
+    except PackageNotFoundError:
+        pass
+
+    lines = []
+
+    if pkg_version:
+        lines.append(
+            f"Python package '{pkg_name}' version '{pkg_version}' was found,"
+            " but isn't suitable."
+        )
+    else:
+        lines.append(
+            f"Python package '{pkg_name}' was not found nor installed."
+        )
+
+    if wheels_dir:
+        lines.append(
+            "No suitable version found in, or failed to install from"
+            f" '{wheels_dir}'."
+        )
+        bad = True
+
+    if online:
+        lines.append("A suitable version could not be obtained from PyPI.")
+        bad = True
+    else:
+        lines.append(
+            "mkvenv was configured to operate offline and did not check PyPI."
+        )
+
+    if prog and not pkg_version:
+        which = shutil.which(prog)
+        if which:
+            if sys.base_prefix in site.PREFIXES:
+                pypath = Path(sys.executable).resolve()
+                lines.append(
+                    f"'{prog}' was detected on your system at '{which}', "
+                    f"but the Python package '{pkg_name}' was not found by "
+                    f"this Python interpreter ('{pypath}'). "
+                    f"Typically this means that '{prog}' has been installed "
+                    "against a different Python interpreter on your system."
+                )
+            else:
+                lines.append(
+                    f"'{prog}' was detected on your system at '{which}', "
+                    "but the build is using an isolated virtual environment."
+                )
+            bad = True
+
+    lines = [f" • {line}" for line in lines]
+    if bad:
+        lines.insert(0, f"Could not provide build dependency '{dep_spec}':")
+    else:
+        lines.insert(0, f"'{dep_spec}' not found:")
+    return os.linesep.join(lines), bad
+
+
+def pip_install(
+    args: Sequence[str],
+    online: bool = False,
+    wheels_dir: Optional[Union[str, Path]] = None,
+) -> None:
+    """
+    Use pip to install a package or package(s) as specified in @args.
+    """
+    loud = bool(
+        os.environ.get("DEBUG")
+        or os.environ.get("GITLAB_CI")
+        or os.environ.get("V")
+    )
+
+    full_args = [
+        sys.executable,
+        "-m",
+        "pip",
+        "install",
+        "--disable-pip-version-check",
+        "-v" if loud else "-q",
+    ]
+    if not online:
+        full_args += ["--no-index"]
+    if wheels_dir:
+        full_args += ["--find-links", f"file://{str(wheels_dir)}"]
+    full_args += list(args)
+    subprocess.run(
+        full_args,
+        check=True,
+    )
+
+
+def _make_version_constraint(info: Dict[str, str], install: bool) -> str:
+    """
+    Construct the version constraint part of a PEP 508 dependency
+    specification (for example '>=0.61.5') from the accepted and
+    installed keys of the provided dictionary.
+
+    :param info: A dictionary corresponding to a TOML key-value list.
+    :param install: True generates install constraints, False generates
+        presence constraints
+    """
+    if install and "installed" in info:
+        return "==" + info["installed"]
+
+    dep_spec = info.get("accepted", "")
+    dep_spec = dep_spec.strip()
+    # Double check that they didn't just use a version number
+    if dep_spec and dep_spec[0] not in "!~><=(":
+        raise Ouch(
+            "invalid dependency specifier " + dep_spec + " in dependency file"
+        )
+
+    return dep_spec
+
+
+def _do_ensure(
+    group: Dict[str, Dict[str, str]],
+    online: bool = False,
+    wheels_dir: Optional[Union[str, Path]] = None,
+) -> Optional[Tuple[str, bool]]:
+    """
+    Use pip to ensure we have the packages specified in @group.
+
+    If the packages are already installed, do nothing. If online and
+    wheels_dir are both provided, prefer packages found in wheels_dir
+    first before connecting to PyPI.
+
+    :param group: A dictionary of dictionaries, corresponding to a
+        section in a pythondeps.toml file.
+    :param online: If True, fall back to PyPI.
+    :param wheels_dir: If specified, search this path for packages.
+    """
+    absent = []
+    present = []
+    canary = None
+    for name, info in group.items():
+        constraint = _make_version_constraint(info, False)
+        matcher = distlib.version.LegacyMatcher(name + constraint)
+        print(f"mkvenv: checking for {matcher}", file=sys.stderr)
+
+        dist: Optional[Distribution] = None
+        try:
+            dist = distribution(matcher.name)
+        except PackageNotFoundError:
+            pass
+
+        if (
+            dist is None
+            # Always pass installed package to pip, so that they can be
+            # updated if the requested version changes
+            or not _is_system_package(dist)
+            or not matcher.match(distlib.version.LegacyVersion(dist.version))
+        ):
+            absent.append(name + _make_version_constraint(info, True))
+            if len(absent) == 1:
+                canary = info.get("canary", None)
+        else:
+            logger.info("found %s %s", name, dist.version)
+            present.append(name)
+
+    if present:
+        generate_console_scripts(present)
+
+    if absent:
+        if online or wheels_dir:
+            # Some packages are missing or aren't a suitable version,
+            # install a suitable (possibly vendored) package.
+            print(f"mkvenv: installing {', '.join(absent)}", file=sys.stderr)
+            try:
+                pip_install(args=absent, online=online, wheels_dir=wheels_dir)
+                return None
+            except subprocess.CalledProcessError:
+                pass
+
+        return diagnose(
+            absent[0],
+            online,
+            wheels_dir,
+            canary,
+        )
+
+    return None
+
+
+def ensure(
+    dep_specs: Sequence[str],
+    online: bool = False,
+    wheels_dir: Optional[Union[str, Path]] = None,
+    prog: Optional[str] = None,
+) -> None:
+    """
+    Use pip to ensure we have the package specified by @dep_specs.
+
+    If the package is already installed, do nothing. If online and
+    wheels_dir are both provided, prefer packages found in wheels_dir
+    first before connecting to PyPI.
+
+    :param dep_specs:
+        PEP 508 dependency specifications. e.g. ['meson>=0.61.5'].
+    :param online: If True, fall back to PyPI.
+    :param wheels_dir: If specified, search this path for packages.
+    :param prog:
+        If specified, use this program name for error diagnostics that will
+        be presented to the user. e.g., 'sphinx-build' can be used as a
+        bellwether for the presence of 'sphinx'.
+    """
+
+    if not HAVE_DISTLIB:
+        raise Ouch("a usable distlib could not be found, please install it")
+
+    # Convert the depspecs to a dictionary, as if they came
+    # from a section in a pythondeps.toml file
+    group: Dict[str, Dict[str, str]] = {}
+    for spec in dep_specs:
+        name = distlib.version.LegacyMatcher(spec).name
+        group[name] = {}
+
+        spec = spec.strip()
+        pos = len(name)
+        ver = spec[pos:].strip()
+        if ver:
+            group[name]["accepted"] = ver
+
+        if prog:
+            group[name]["canary"] = prog
+            prog = None
+
+    result = _do_ensure(group, online, wheels_dir)
+    if result:
+        # Well, that's not good.
+        if result[1]:
+            raise Ouch(result[0])
+        raise SystemExit(f"\n{result[0]}\n\n")
+
+
+def _parse_groups(file: str) -> Dict[str, Dict[str, Any]]:
+    if not HAVE_TOMLLIB:
+        if sys.version_info < (3, 11):
+            raise Ouch("found no usable tomli, please install it")
+
+        raise Ouch(
+            "Python >=3.11 does not have tomllib... what have you done!?"
+        )
+
+    # Use loads() to support both tomli v1.2.x (Ubuntu 22.04,
+    # Debian bullseye-backports) and v2.0.x
+    with open(file, "r", encoding="ascii") as depfile:
+        contents = depfile.read()
+        return tomllib.loads(contents)  # type: ignore
+
+
+def ensure_group(
+    file: str,
+    groups: Sequence[str],
+    online: bool = False,
+    wheels_dir: Optional[Union[str, Path]] = None,
+) -> None:
+    """
+    Use pip to ensure we have the package specified by @dep_specs.
+
+    If the package is already installed, do nothing. If online and
+    wheels_dir are both provided, prefer packages found in wheels_dir
+    first before connecting to PyPI.
+
+    :param dep_specs:
+        PEP 508 dependency specifications. e.g. ['meson>=0.61.5'].
+    :param online: If True, fall back to PyPI.
+    :param wheels_dir: If specified, search this path for packages.
+    """
+
+    if not HAVE_DISTLIB:
+        raise Ouch("found no usable distlib, please install it")
+
+    parsed_deps = _parse_groups(file)
+
+    to_install: Dict[str, Dict[str, str]] = {}
+    for group in groups:
+        try:
+            to_install.update(parsed_deps[group])
+        except KeyError as exc:
+            raise Ouch(f"group {group} not defined") from exc
+
+    result = _do_ensure(to_install, online, wheels_dir)
+    if result:
+        # Well, that's not good.
+        if result[1]:
+            raise Ouch(result[0])
+        raise SystemExit(f"\n{result[0]}\n\n")
+
+
+def post_venv_setup() -> None:
+    """
+    This is intended to be run *inside the venv* after it is created.
+    """
+    logger.debug("post_venv_setup()")
+    # Generate a 'pip' script so the venv is usable in a normal
+    # way from the CLI. This only happens when we inherited pip from a
+    # parent/system-site and haven't run ensurepip in some way.
+    generate_console_scripts(["pip"])
+
+
+def _add_create_subcommand(subparsers: Any) -> None:
+    subparser = subparsers.add_parser("create", help="create a venv")
+    subparser.add_argument(
+        "target",
+        type=str,
+        action="store",
+        help="Target directory to install virtual environment into.",
+    )
+
+
+def _add_post_init_subcommand(subparsers: Any) -> None:
+    subparsers.add_parser("post_init", help="post-venv initialization")
+
+
+def _add_ensuregroup_subcommand(subparsers: Any) -> None:
+    subparser = subparsers.add_parser(
+        "ensuregroup",
+        help="Ensure that the specified package group is installed.",
+    )
+    subparser.add_argument(
+        "--online",
+        action="store_true",
+        help="Install packages from PyPI, if necessary.",
+    )
+    subparser.add_argument(
+        "--dir",
+        type=str,
+        action="store",
+        help="Path to vendored packages where we may install from.",
+    )
+    subparser.add_argument(
+        "file",
+        type=str,
+        action="store",
+        help=("Path to a TOML file describing package groups"),
+    )
+    subparser.add_argument(
+        "group",
+        type=str,
+        action="store",
+        help="One or more package group names",
+        nargs="+",
+    )
+
+
+def _add_ensure_subcommand(subparsers: Any) -> None:
+    subparser = subparsers.add_parser(
+        "ensure", help="Ensure that the specified package is installed."
+    )
+    subparser.add_argument(
+        "--online",
+        action="store_true",
+        help="Install packages from PyPI, if necessary.",
+    )
+    subparser.add_argument(
+        "--dir",
+        type=str,
+        action="store",
+        help="Path to vendored packages where we may install from.",
+    )
+    subparser.add_argument(
+        "--diagnose",
+        type=str,
+        action="store",
+        help=(
+            "Name of a shell utility to use for "
+            "diagnostics if this command fails."
+        ),
+    )
+    subparser.add_argument(
+        "dep_specs",
+        type=str,
+        action="store",
+        help="PEP 508 Dependency specification, e.g. 'meson>=0.61.5'",
+        nargs="+",
+    )
+
+
+def main() -> int:
+    """CLI interface to make_qemu_venv. See module docstring."""
+    if os.environ.get("DEBUG") or os.environ.get("GITLAB_CI"):
+        # You're welcome.
+        logging.basicConfig(level=logging.DEBUG)
+    else:
+        if os.environ.get("V"):
+            logging.basicConfig(level=logging.INFO)
+
+    parser = argparse.ArgumentParser(
+        prog="mkvenv",
+        description="QEMU pyvenv bootstrapping utility",
+    )
+    subparsers = parser.add_subparsers(
+        title="Commands",
+        dest="command",
+        required=True,
+        metavar="command",
+        help="Description",
+    )
+
+    _add_create_subcommand(subparsers)
+    _add_post_init_subcommand(subparsers)
+    _add_ensure_subcommand(subparsers)
+    _add_ensuregroup_subcommand(subparsers)
+
+    args = parser.parse_args()
+    try:
+        if args.command == "create":
+            make_venv(
+                args.target,
+                system_site_packages=True,
+                clear=True,
+            )
+        if args.command == "post_init":
+            post_venv_setup()
+        if args.command == "ensure":
+            ensure(
+                dep_specs=args.dep_specs,
+                online=args.online,
+                wheels_dir=args.dir,
+                prog=args.diagnose,
+            )
+        if args.command == "ensuregroup":
+            ensure_group(
+                file=args.file,
+                groups=args.group,
+                online=args.online,
+                wheels_dir=args.dir,
+            )
+        logger.debug("mkvenv.py %s: exiting", args.command)
+    except Ouch as exc:
+        print("\n*** Ouch! ***\n", file=sys.stderr)
+        print(str(exc), "\n\n", file=sys.stderr)
+        return 1
+    except SystemExit:
+        raise
+    except:  # pylint: disable=bare-except
+        logger.exception("mkvenv did not complete successfully:")
+        return 2
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/python/scripts/vendor.py b/python/scripts/vendor.py
new file mode 100644 (file)
index 0000000..1038b14
--- /dev/null
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+"""
+vendor - QEMU python vendoring utility
+
+usage: vendor [-h]
+
+QEMU python vendoring utility
+
+options:
+  -h, --help  show this help message and exit
+"""
+
+# Copyright (C) 2023 Red Hat, Inc.
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later. See the COPYING file in the top-level directory.
+
+import argparse
+import os
+from pathlib import Path
+import subprocess
+import sys
+import tempfile
+
+
+def main() -> int:
+    """Run the vendoring utility. See module-level docstring."""
+    loud = False
+    if os.environ.get("DEBUG") or os.environ.get("V"):
+        loud = True
+
+    # No options or anything for now, but I guess
+    # you'll figure that out when you run --help.
+    parser = argparse.ArgumentParser(
+        prog="vendor",
+        description="QEMU python vendoring utility",
+    )
+    parser.parse_args()
+
+    packages = {
+        "meson==1.2.3":
+        "4533a43c34548edd1f63a276a42690fce15bde9409bcf20c4b8fa3d7e4d7cac1",
+
+        "tomli==2.0.1":
+        "939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
+    }
+
+    vendor_dir = Path(__file__, "..", "..", "wheels").resolve()
+
+    with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as file:
+        for dep_spec, checksum in packages.items():
+            print(f"{dep_spec} --hash=sha256:{checksum}", file=file)
+        file.flush()
+
+        cli_args = [
+            "pip",
+            "download",
+            "--dest",
+            str(vendor_dir),
+            "--require-hashes",
+            "-r",
+            file.name,
+        ]
+        if loud:
+            cli_args.append("-v")
+
+        print(" ".join(cli_args))
+        subprocess.run(cli_args, check=True)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/python/setup.cfg b/python/setup.cfg
new file mode 100644 (file)
index 0000000..4866860
--- /dev/null
@@ -0,0 +1,206 @@
+[metadata]
+name = qemu
+version = file:VERSION
+maintainer = QEMU Developer Team
+maintainer_email = qemu-devel@nongnu.org
+url = https://www.qemu.org/
+download_url = https://www.qemu.org/download/
+description = QEMU Python Build, Debug and SDK tooling.
+long_description = file:PACKAGE.rst
+long_description_content_type = text/x-rst
+classifiers =
+    Development Status :: 3 - Alpha
+    License :: OSI Approved :: GNU General Public License v2 (GPLv2)
+    Natural Language :: English
+    Operating System :: OS Independent
+    Programming Language :: Python :: 3 :: Only
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
+    Programming Language :: Python :: 3.11
+    Programming Language :: Python :: 3.12
+    Typing :: Typed
+
+[options]
+python_requires = >= 3.8
+packages =
+    qemu.qmp
+    qemu.machine
+    qemu.utils
+
+[options.package_data]
+* = py.typed
+
+[options.extras_require]
+# Remember to update tests/minreqs.txt if changing anything below:
+devel =
+    avocado-framework >= 90.0
+    distlib >= 0.3.6
+    flake8 >= 5.0.4
+    fusepy >= 2.0.4
+    isort >= 5.1.2
+    mypy >= 1.4.0
+    pylint >= 2.17.3
+    tox >= 3.18.0
+    urwid >= 2.1.2
+    urwid-readline >= 0.13
+    Pygments >= 2.9.0
+
+# Provides qom-fuse functionality
+fuse =
+    fusepy >= 2.0.4
+
+# QMP TUI dependencies
+tui =
+    urwid >= 2.1.2
+    urwid-readline >= 0.13
+    Pygments >= 2.9.0
+
+[options.entry_points]
+console_scripts =
+    qom = qemu.utils.qom:main
+    qom-set = qemu.utils.qom:QOMSet.entry_point
+    qom-get = qemu.utils.qom:QOMGet.entry_point
+    qom-list = qemu.utils.qom:QOMList.entry_point
+    qom-tree = qemu.utils.qom:QOMTree.entry_point
+    qom-fuse = qemu.utils.qom_fuse:QOMFuse.entry_point [fuse]
+    qemu-ga-client = qemu.utils.qemu_ga_client:main
+    qmp-shell = qemu.qmp.qmp_shell:main
+    qmp-shell-wrap = qemu.qmp.qmp_shell:main_wrap
+    qmp-tui = qemu.qmp.qmp_tui:main [tui]
+
+[flake8]
+# Prefer pylint's bare-except checks to flake8's
+extend-ignore = E722
+exclude = __pycache__,
+
+[mypy]
+strict = True
+python_version = 3.8
+warn_unused_configs = True
+namespace_packages = True
+warn_unused_ignores = False
+
+[mypy-qemu.utils.qom_fuse]
+# fusepy has no type stubs:
+allow_subclassing_any = True
+
+[mypy-qemu.qmp.qmp_tui]
+# urwid and urwid_readline have no type stubs:
+allow_subclassing_any = True
+
+# The following missing import directives are because these libraries do not
+# provide type stubs. Allow them on an as-needed basis for mypy.
+[mypy-fuse]
+ignore_missing_imports = True
+
+[mypy-tomli]
+ignore_missing_imports = True
+
+[mypy-tomllib]
+ignore_missing_imports = True
+
+[mypy-urwid]
+ignore_missing_imports = True
+
+[mypy-urwid_readline]
+ignore_missing_imports = True
+
+[mypy-pygments]
+ignore_missing_imports = True
+
+[mypy-distlib]
+ignore_missing_imports = True
+
+[mypy-distlib.scripts]
+ignore_missing_imports = True
+
+[mypy-distlib.version]
+ignore_missing_imports = True
+
+[mypy-pip._vendor.distlib]
+ignore_missing_imports = True
+
+[mypy-pip._vendor.distlib.scripts]
+ignore_missing_imports = True
+
+[mypy-pip._vendor.distlib.version]
+ignore_missing_imports = True
+
+[pylint.messages control]
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=consider-using-f-string,
+        consider-using-with,
+        too-many-arguments,
+        too-many-function-args,  # mypy handles this with less false positives.
+        too-many-instance-attributes,
+        no-member,  # mypy also handles this better.
+
+[pylint.basic]
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+           j,
+           k,
+           ex,
+           Run,
+           _,   # By convention: Unused variable
+           fh,  # fh = open(...)
+           fd,  # fd = os.open(...)
+           c,   # for c in string: ...
+           T,   # for TypeVars. See pylint#3401
+           SocketAddrT,  # Not sure why this is invalid.
+
+[pylint.similarities]
+# Ignore imports when computing similarities.
+ignore-imports=yes
+ignore-signatures=yes
+
+# Minimum lines number of a similarity.
+# TODO: Remove after we opt in to Pylint 2.8.3. See commit msg.
+min-similarity-lines=6
+
+
+[isort]
+force_grid_wrap=4
+force_sort_within_sections=True
+include_trailing_comma=True
+line_length=72
+lines_after_imports=2
+multi_line_output=3
+
+# tox (https://tox.readthedocs.io/) is a tool for running tests in
+# multiple virtualenvs. This configuration file will run the test suite
+# on all supported python versions. To use it, "pip install tox" and
+# then run "tox" from this directory. You will need all of these versions
+# of python available on your system to run this test.
+
+[tox:tox]
+envlist = py38, py39, py310, py311, py312
+skip_missing_interpreters = true
+
+[testenv]
+allowlist_externals = make
+deps =
+    .[devel]
+    .[fuse]  # Workaround to trigger tox venv rebuild
+    .[tui]   # Workaround to trigger tox venv rebuild
+commands =
+    make check
+
+# Coverage.py [https://coverage.readthedocs.io/en/latest/] is a tool for
+# measuring code coverage of Python programs. It monitors your program,
+# noting which parts of the code have been executed, then analyzes the
+# source to identify code that could have been executed but was not.
+
+[coverage:run]
+concurrency = multiprocessing
+source = qemu/
+parallel = true
diff --git a/python/setup.py b/python/setup.py
new file mode 100644 (file)
index 0000000..c5bc459
--- /dev/null
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+"""
+QEMU tooling installer script
+Copyright (c) 2020-2021 John Snow for Red Hat, Inc.
+"""
+
+import setuptools
+from setuptools.command import bdist_egg
+import sys
+import pkg_resources
+
+
+class bdist_egg_guard(bdist_egg.bdist_egg):
+    """
+    Protect against bdist_egg from being executed
+
+    This prevents calling 'setup.py install' directly, as the 'install'
+    CLI option will invoke the deprecated bdist_egg hook. "pip install"
+    calls the more modern bdist_wheel hook, which is what we want.
+    """
+    def run(self):
+        sys.exit(
+            'Installation directly via setup.py is not supported.\n'
+            'Please use `pip install .` instead.'
+        )
+
+
+def main():
+    """
+    QEMU tooling installer
+    """
+
+    # https://medium.com/@daveshawley/safely-using-setup-cfg-for-metadata-1babbe54c108
+    pkg_resources.require('setuptools>=39.2')
+
+    setuptools.setup(cmdclass={'bdist_egg': bdist_egg_guard})
+
+
+if __name__ == '__main__':
+    main()
diff --git a/python/tests/flake8.sh b/python/tests/flake8.sh
new file mode 100755 (executable)
index 0000000..e013699
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/sh -e
+python3 -m flake8 qemu/
+python3 -m flake8 scripts/
diff --git a/python/tests/iotests-mypy.sh b/python/tests/iotests-mypy.sh
new file mode 100755 (executable)
index 0000000..ee76470
--- /dev/null
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+
+cd ../tests/qemu-iotests/
+python3 -m linters --mypy
diff --git a/python/tests/iotests-pylint.sh b/python/tests/iotests-pylint.sh
new file mode 100755 (executable)
index 0000000..33c5ae9
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+cd ../tests/qemu-iotests/
+# See commit message for environment variable explainer.
+SETUPTOOLS_USE_DISTUTILS=stdlib python3 -m linters --pylint
diff --git a/python/tests/isort.sh b/python/tests/isort.sh
new file mode 100755 (executable)
index 0000000..66c2f7d
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/sh -e
+python3 -m isort -c qemu/
+python3 -m isort -c scripts/
diff --git a/python/tests/minreqs.txt b/python/tests/minreqs.txt
new file mode 100644 (file)
index 0000000..a3f423e
--- /dev/null
@@ -0,0 +1,47 @@
+# This file lists the ***oldest possible dependencies*** needed to run
+# "make check" successfully under ***Python 3.8***. It is used primarily
+# by GitLab CI to ensure that our stated minimum versions in setup.cfg
+# are truthful and regularly validated.
+#
+# This file should not contain any dependencies that are not expressed
+# by the [devel] section of setup.cfg, except for transitive
+# dependencies which must be enumerated here explicitly to eliminate
+# dependency resolution ambiguity.
+#
+# When adding new dependencies, pin the very oldest non-yanked version
+# on PyPI that allows the test suite to pass.
+
+# Dependencies for the TUI addon (Required for successful linting)
+urwid==2.1.2
+urwid-readline==0.13
+Pygments==2.9.0
+
+# Dependencies for mkvenv
+distlib==0.3.6
+
+# Dependencies for FUSE support for qom-fuse
+fusepy==2.0.4
+
+# Test-runners, utilities, etc.
+avocado-framework==90.0
+
+# Linters
+flake8==5.0.4
+isort==5.1.2
+mypy==1.4.0
+pylint==2.17.3
+
+# Transitive flake8 dependencies
+mccabe==0.7.0
+pycodestyle==2.9.1
+pyflakes==2.5.0
+
+# Transitive mypy dependencies
+mypy-extensions==1.0.0
+typing-extensions==4.7.1
+
+# Transitive pylint dependencies
+astroid==2.15.4
+lazy-object-proxy==1.4.0
+toml==0.10.0
+wrapt==1.14.0
diff --git a/python/tests/mypy.sh b/python/tests/mypy.sh
new file mode 100755 (executable)
index 0000000..a33a3f5
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/sh -e
+python3 -m mypy -p qemu
+python3 -m mypy scripts/
diff --git a/python/tests/protocol.py b/python/tests/protocol.py
new file mode 100644 (file)
index 0000000..56c4d44
--- /dev/null
@@ -0,0 +1,596 @@
+import asyncio
+from contextlib import contextmanager
+import os
+import socket
+from tempfile import TemporaryDirectory
+
+import avocado
+
+from qemu.qmp import ConnectError, Runstate
+from qemu.qmp.protocol import AsyncProtocol, StateError
+from qemu.qmp.util import asyncio_run, create_task
+
+
+class NullProtocol(AsyncProtocol[None]):
+    """
+    NullProtocol is a test mockup of an AsyncProtocol implementation.
+
+    It adds a fake_session instance variable that enables a code path
+    that bypasses the actual connection logic, but still allows the
+    reader/writers to start.
+
+    Because the message type is defined as None, an asyncio.Event named
+    'trigger_input' is created that prohibits the reader from
+    incessantly being able to yield None; this event can be poked to
+    simulate an incoming message.
+
+    For testing symmetry with do_recv, an interface is added to "send" a
+    Null message.
+
+    For testing purposes, a "simulate_disconnection" method is also
+    added which allows us to trigger a bottom half disconnect without
+    injecting any real errors into the reader/writer loops; in essence
+    it performs exactly half of what disconnect() normally does.
+    """
+    def __init__(self, name=None):
+        self.fake_session = False
+        self.trigger_input: asyncio.Event
+        super().__init__(name)
+
+    async def _establish_session(self):
+        self.trigger_input = asyncio.Event()
+        await super()._establish_session()
+
+    async def _do_start_server(self, address, ssl=None):
+        if self.fake_session:
+            self._accepted = asyncio.Event()
+            self._set_state(Runstate.CONNECTING)
+            await asyncio.sleep(0)
+        else:
+            await super()._do_start_server(address, ssl)
+
+    async def _do_accept(self):
+        if self.fake_session:
+            self._accepted = None
+        else:
+            await super()._do_accept()
+
+    async def _do_connect(self, address, ssl=None):
+        if self.fake_session:
+            self._set_state(Runstate.CONNECTING)
+            await asyncio.sleep(0)
+        else:
+            await super()._do_connect(address, ssl)
+
+    async def _do_recv(self) -> None:
+        await self.trigger_input.wait()
+        self.trigger_input.clear()
+
+    def _do_send(self, msg: None) -> None:
+        pass
+
+    async def send_msg(self) -> None:
+        await self._outgoing.put(None)
+
+    async def simulate_disconnect(self) -> None:
+        """
+        Simulates a bottom-half disconnect.
+
+        This method schedules a disconnection but does not wait for it
+        to complete. This is used to put the loop into the DISCONNECTING
+        state without fully quiescing it back to IDLE. This is normally
+        something you cannot coax AsyncProtocol to do on purpose, but it
+        will be similar to what happens with an unhandled Exception in
+        the reader/writer.
+
+        Under normal circumstances, the library design requires you to
+        await on disconnect(), which awaits the disconnect task and
+        returns bottom half errors as a pre-condition to allowing the
+        loop to return back to IDLE.
+        """
+        self._schedule_disconnect()
+
+
+class LineProtocol(AsyncProtocol[str]):
+    def __init__(self, name=None):
+        super().__init__(name)
+        self.rx_history = []
+
+    async def _do_recv(self) -> str:
+        raw = await self._readline()
+        msg = raw.decode()
+        self.rx_history.append(msg)
+        return msg
+
+    def _do_send(self, msg: str) -> None:
+        assert self._writer is not None
+        self._writer.write(msg.encode() + b'\n')
+
+    async def send_msg(self, msg: str) -> None:
+        await self._outgoing.put(msg)
+
+
+def run_as_task(coro, allow_cancellation=False):
+    """
+    Run a given coroutine as a task.
+
+    Optionally, wrap it in a try..except block that allows this
+    coroutine to be canceled gracefully.
+    """
+    async def _runner():
+        try:
+            await coro
+        except asyncio.CancelledError:
+            if allow_cancellation:
+                return
+            raise
+    return create_task(_runner())
+
+
+@contextmanager
+def jammed_socket():
+    """
+    Opens up a random unused TCP port on localhost, then jams it.
+    """
+    socks = []
+
+    try:
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        sock.bind(('127.0.0.1', 0))
+        sock.listen(1)
+        address = sock.getsockname()
+
+        socks.append(sock)
+
+        # I don't *fully* understand why, but it takes *two* un-accepted
+        # connections to start jamming the socket.
+        for _ in range(2):
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.connect(address)
+            socks.append(sock)
+
+        yield address
+
+    finally:
+        for sock in socks:
+            sock.close()
+
+
+class Smoke(avocado.Test):
+
+    def setUp(self):
+        self.proto = NullProtocol()
+
+    def test__repr__(self):
+        self.assertEqual(
+            repr(self.proto),
+            "<NullProtocol runstate=IDLE>"
+        )
+
+    def testRunstate(self):
+        self.assertEqual(
+            self.proto.runstate,
+            Runstate.IDLE
+        )
+
+    def testDefaultName(self):
+        self.assertEqual(
+            self.proto.name,
+            None
+        )
+
+    def testLogger(self):
+        self.assertEqual(
+            self.proto.logger.name,
+            'qemu.qmp.protocol'
+        )
+
+    def testName(self):
+        self.proto = NullProtocol('Steve')
+
+        self.assertEqual(
+            self.proto.name,
+            'Steve'
+        )
+
+        self.assertEqual(
+            self.proto.logger.name,
+            'qemu.qmp.protocol.Steve'
+        )
+
+        self.assertEqual(
+            repr(self.proto),
+            "<NullProtocol name='Steve' runstate=IDLE>"
+        )
+
+
+class TestBase(avocado.Test):
+
+    def setUp(self):
+        self.proto = NullProtocol(type(self).__name__)
+        self.assertEqual(self.proto.runstate, Runstate.IDLE)
+        self.runstate_watcher = None
+
+    def tearDown(self):
+        self.assertEqual(self.proto.runstate, Runstate.IDLE)
+
+    async def _asyncSetUp(self):
+        pass
+
+    async def _asyncTearDown(self):
+        if self.runstate_watcher:
+            await self.runstate_watcher
+
+    @staticmethod
+    def async_test(async_test_method):
+        """
+        Decorator; adds SetUp and TearDown to async tests.
+        """
+        async def _wrapper(self, *args, **kwargs):
+            loop = asyncio.get_event_loop()
+            loop.set_debug(True)
+
+            await self._asyncSetUp()
+            await async_test_method(self, *args, **kwargs)
+            await self._asyncTearDown()
+
+        return _wrapper
+
+    # Definitions
+
+    # The states we expect a "bad" connect/accept attempt to transition through
+    BAD_CONNECTION_STATES = (
+        Runstate.CONNECTING,
+        Runstate.DISCONNECTING,
+        Runstate.IDLE,
+    )
+
+    # The states we expect a "good" session to transition through
+    GOOD_CONNECTION_STATES = (
+        Runstate.CONNECTING,
+        Runstate.RUNNING,
+        Runstate.DISCONNECTING,
+        Runstate.IDLE,
+    )
+
+    # Helpers
+
+    async def _watch_runstates(self, *states):
+        """
+        This launches a task alongside (most) tests below to confirm that
+        the sequence of runstate changes that occur is exactly as
+        anticipated.
+        """
+        async def _watcher():
+            for state in states:
+                new_state = await self.proto.runstate_changed()
+                self.assertEqual(
+                    new_state,
+                    state,
+                    msg=f"Expected state '{state.name}'",
+                )
+
+        self.runstate_watcher = create_task(_watcher())
+        # Kick the loop and force the task to block on the event.
+        await asyncio.sleep(0)
+
+
+class State(TestBase):
+
+    @TestBase.async_test
+    async def testSuperfluousDisconnect(self):
+        """
+        Test calling disconnect() while already disconnected.
+        """
+        await self._watch_runstates(
+            Runstate.DISCONNECTING,
+            Runstate.IDLE,
+        )
+        await self.proto.disconnect()
+
+
+class Connect(TestBase):
+    """
+    Tests primarily related to calling Connect().
+    """
+    async def _bad_connection(self, family: str):
+        assert family in ('INET', 'UNIX')
+
+        if family == 'INET':
+            await self.proto.connect(('127.0.0.1', 0))
+        elif family == 'UNIX':
+            await self.proto.connect('/dev/null')
+
+    async def _hanging_connection(self):
+        with jammed_socket() as addr:
+            await self.proto.connect(addr)
+
+    async def _bad_connection_test(self, family: str):
+        await self._watch_runstates(*self.BAD_CONNECTION_STATES)
+
+        with self.assertRaises(ConnectError) as context:
+            await self._bad_connection(family)
+
+        self.assertIsInstance(context.exception.exc, OSError)
+        self.assertEqual(
+            context.exception.error_message,
+            "Failed to establish connection"
+        )
+
+    @TestBase.async_test
+    async def testBadINET(self):
+        """
+        Test an immediately rejected call to an IP target.
+        """
+        await self._bad_connection_test('INET')
+
+    @TestBase.async_test
+    async def testBadUNIX(self):
+        """
+        Test an immediately rejected call to a UNIX socket target.
+        """
+        await self._bad_connection_test('UNIX')
+
+    @TestBase.async_test
+    async def testCancellation(self):
+        """
+        Test what happens when a connection attempt is aborted.
+        """
+        # Note that accept() cannot be cancelled outright, as it isn't a task.
+        # However, we can wrap it in a task and cancel *that*.
+        await self._watch_runstates(*self.BAD_CONNECTION_STATES)
+        task = run_as_task(self._hanging_connection(), allow_cancellation=True)
+
+        state = await self.proto.runstate_changed()
+        self.assertEqual(state, Runstate.CONNECTING)
+
+        # This is insider baseball, but the connection attempt has
+        # yielded *just* before the actual connection attempt, so kick
+        # the loop to make sure it's truly wedged.
+        await asyncio.sleep(0)
+
+        task.cancel()
+        await task
+
+    @TestBase.async_test
+    async def testTimeout(self):
+        """
+        Test what happens when a connection attempt times out.
+        """
+        await self._watch_runstates(*self.BAD_CONNECTION_STATES)
+        task = run_as_task(self._hanging_connection())
+
+        # More insider baseball: to improve the speed of this test while
+        # guaranteeing that the connection even gets a chance to start,
+        # verify that the connection hangs *first*, then await the
+        # result of the task with a nearly-zero timeout.
+
+        state = await self.proto.runstate_changed()
+        self.assertEqual(state, Runstate.CONNECTING)
+        await asyncio.sleep(0)
+
+        with self.assertRaises(asyncio.TimeoutError):
+            await asyncio.wait_for(task, timeout=0)
+
+    @TestBase.async_test
+    async def testRequire(self):
+        """
+        Test what happens when a connection attempt is made while CONNECTING.
+        """
+        await self._watch_runstates(*self.BAD_CONNECTION_STATES)
+        task = run_as_task(self._hanging_connection(), allow_cancellation=True)
+
+        state = await self.proto.runstate_changed()
+        self.assertEqual(state, Runstate.CONNECTING)
+
+        with self.assertRaises(StateError) as context:
+            await self._bad_connection('UNIX')
+
+        self.assertEqual(
+            context.exception.error_message,
+            "NullProtocol is currently connecting."
+        )
+        self.assertEqual(context.exception.state, Runstate.CONNECTING)
+        self.assertEqual(context.exception.required, Runstate.IDLE)
+
+        task.cancel()
+        await task
+
+    @TestBase.async_test
+    async def testImplicitRunstateInit(self):
+        """
+        Test what happens if we do not wait on the runstate event until
+        AFTER a connection is made, i.e., connect()/accept() themselves
+        initialize the runstate event. All of the above tests force the
+        initialization by waiting on the runstate *first*.
+        """
+        task = run_as_task(self._hanging_connection(), allow_cancellation=True)
+
+        # Kick the loop to coerce the state change
+        await asyncio.sleep(0)
+        assert self.proto.runstate == Runstate.CONNECTING
+
+        # We already missed the transition to CONNECTING
+        await self._watch_runstates(Runstate.DISCONNECTING, Runstate.IDLE)
+
+        task.cancel()
+        await task
+
+
+class Accept(Connect):
+    """
+    All of the same tests as Connect, but using the accept() interface.
+    """
+    async def _bad_connection(self, family: str):
+        assert family in ('INET', 'UNIX')
+
+        if family == 'INET':
+            await self.proto.start_server_and_accept(('example.com', 1))
+        elif family == 'UNIX':
+            await self.proto.start_server_and_accept('/dev/null')
+
+    async def _hanging_connection(self):
+        with TemporaryDirectory(suffix='.qmp') as tmpdir:
+            sock = os.path.join(tmpdir, type(self.proto).__name__ + ".sock")
+            await self.proto.start_server_and_accept(sock)
+
+
+class FakeSession(TestBase):
+
+    def setUp(self):
+        super().setUp()
+        self.proto.fake_session = True
+
+    async def _asyncSetUp(self):
+        await super()._asyncSetUp()
+        await self._watch_runstates(*self.GOOD_CONNECTION_STATES)
+
+    async def _asyncTearDown(self):
+        await self.proto.disconnect()
+        await super()._asyncTearDown()
+
+    ####
+
+    @TestBase.async_test
+    async def testFakeConnect(self):
+
+        """Test the full state lifecycle (via connect) with a no-op session."""
+        await self.proto.connect('/not/a/real/path')
+        self.assertEqual(self.proto.runstate, Runstate.RUNNING)
+
+    @TestBase.async_test
+    async def testFakeAccept(self):
+        """Test the full state lifecycle (via accept) with a no-op session."""
+        await self.proto.start_server_and_accept('/not/a/real/path')
+        self.assertEqual(self.proto.runstate, Runstate.RUNNING)
+
+    @TestBase.async_test
+    async def testFakeRecv(self):
+        """Test receiving a fake/null message."""
+        await self.proto.start_server_and_accept('/not/a/real/path')
+
+        logname = self.proto.logger.name
+        with self.assertLogs(logname, level='DEBUG') as context:
+            self.proto.trigger_input.set()
+            self.proto.trigger_input.clear()
+            await asyncio.sleep(0)  # Kick reader.
+
+        self.assertEqual(
+            context.output,
+            [f"DEBUG:{logname}:<-- None"],
+        )
+
+    @TestBase.async_test
+    async def testFakeSend(self):
+        """Test sending a fake/null message."""
+        await self.proto.start_server_and_accept('/not/a/real/path')
+
+        logname = self.proto.logger.name
+        with self.assertLogs(logname, level='DEBUG') as context:
+            # Cheat: Send a Null message to nobody.
+            await self.proto.send_msg()
+            # Kick writer; awaiting on a queue.put isn't sufficient to yield.
+            await asyncio.sleep(0)
+
+        self.assertEqual(
+            context.output,
+            [f"DEBUG:{logname}:--> None"],
+        )
+
+    async def _prod_session_api(
+            self,
+            current_state: Runstate,
+            error_message: str,
+            accept: bool = True
+    ):
+        with self.assertRaises(StateError) as context:
+            if accept:
+                await self.proto.start_server_and_accept('/not/a/real/path')
+            else:
+                await self.proto.connect('/not/a/real/path')
+
+        self.assertEqual(context.exception.error_message, error_message)
+        self.assertEqual(context.exception.state, current_state)
+        self.assertEqual(context.exception.required, Runstate.IDLE)
+
+    @TestBase.async_test
+    async def testAcceptRequireRunning(self):
+        """Test that accept() cannot be called when Runstate=RUNNING"""
+        await self.proto.start_server_and_accept('/not/a/real/path')
+
+        await self._prod_session_api(
+            Runstate.RUNNING,
+            "NullProtocol is already connected and running.",
+            accept=True,
+        )
+
+    @TestBase.async_test
+    async def testConnectRequireRunning(self):
+        """Test that connect() cannot be called when Runstate=RUNNING"""
+        await self.proto.start_server_and_accept('/not/a/real/path')
+
+        await self._prod_session_api(
+            Runstate.RUNNING,
+            "NullProtocol is already connected and running.",
+            accept=False,
+        )
+
+    @TestBase.async_test
+    async def testAcceptRequireDisconnecting(self):
+        """Test that accept() cannot be called when Runstate=DISCONNECTING"""
+        await self.proto.start_server_and_accept('/not/a/real/path')
+
+        # Cheat: force a disconnect.
+        await self.proto.simulate_disconnect()
+
+        await self._prod_session_api(
+            Runstate.DISCONNECTING,
+            ("NullProtocol is disconnecting."
+             " Call disconnect() to return to IDLE state."),
+            accept=True,
+        )
+
+    @TestBase.async_test
+    async def testConnectRequireDisconnecting(self):
+        """Test that connect() cannot be called when Runstate=DISCONNECTING"""
+        await self.proto.start_server_and_accept('/not/a/real/path')
+
+        # Cheat: force a disconnect.
+        await self.proto.simulate_disconnect()
+
+        await self._prod_session_api(
+            Runstate.DISCONNECTING,
+            ("NullProtocol is disconnecting."
+             " Call disconnect() to return to IDLE state."),
+            accept=False,
+        )
+
+
+class SimpleSession(TestBase):
+
+    def setUp(self):
+        super().setUp()
+        self.server = LineProtocol(type(self).__name__ + '-server')
+
+    async def _asyncSetUp(self):
+        await super()._asyncSetUp()
+        await self._watch_runstates(*self.GOOD_CONNECTION_STATES)
+
+    async def _asyncTearDown(self):
+        await self.proto.disconnect()
+        try:
+            await self.server.disconnect()
+        except EOFError:
+            pass
+        await super()._asyncTearDown()
+
+    @TestBase.async_test
+    async def testSmoke(self):
+        with TemporaryDirectory(suffix='.qmp') as tmpdir:
+            sock = os.path.join(tmpdir, type(self.proto).__name__ + ".sock")
+            server_task = create_task(self.server.start_server_and_accept(sock))
+
+            # give the server a chance to start listening [...]
+            await asyncio.sleep(0)
+            await self.proto.connect(sock)
diff --git a/python/tests/pylint.sh b/python/tests/pylint.sh
new file mode 100755 (executable)
index 0000000..2b68da9
--- /dev/null
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+# See commit message for environment variable explainer.
+SETUPTOOLS_USE_DISTUTILS=stdlib python3 -m pylint qemu/
+SETUPTOOLS_USE_DISTUTILS=stdlib python3 -m pylint scripts/
diff --git a/python/wheels/meson-1.2.3-py3-none-any.whl b/python/wheels/meson-1.2.3-py3-none-any.whl
new file mode 100644 (file)
index 0000000..a8b84e5
Binary files /dev/null and b/python/wheels/meson-1.2.3-py3-none-any.whl differ
diff --git a/python/wheels/tomli-2.0.1-py3-none-any.whl b/python/wheels/tomli-2.0.1-py3-none-any.whl
new file mode 100644 (file)
index 0000000..29670b9
Binary files /dev/null and b/python/wheels/tomli-2.0.1-py3-none-any.whl differ
diff --git a/pythondeps.toml b/pythondeps.toml
new file mode 100644 (file)
index 0000000..0e88415
--- /dev/null
@@ -0,0 +1,33 @@
+# This file describes Python package requirements to be
+# installed in the pyvenv Python virtual environment.
+#
+# Packages are placed in groups, which are installed using
+# the ensuregroup subcommand of python/scripts/mkvenv.py.
+# Each group forms a TOML section and each entry in the
+# section is a TOML key-value list describing a package.
+# All fields are optional; valid fields are:
+#
+# - accepted: accepted versions when using a system package
+# - installed: fixed version to install in the virtual environment
+#              if a system package is not found; if not specified,
+#              defaults to the same as "accepted" or, if also missing,
+#              to the newest version available on PyPI.
+# - canary: if specified, use this program name to present more
+#           precise error diagnostics to the user.  For example,
+#           'sphinx-build' can be used as a bellwether for the
+#           presence of 'sphinx' in the system.
+
+[meson]
+# The install key should match the version in python/wheels/
+meson = { accepted = ">=0.63.0", installed = "1.2.3", canary = "meson" }
+
+[docs]
+sphinx = { accepted = ">=1.6", installed = "5.3.0", canary = "sphinx-build" }
+sphinx_rtd_theme = { accepted = ">=0.5", installed = "1.1.1" }
+
+[avocado]
+# Note that qemu.git/python/ is always implicitly installed.
+# Prefer an LTS version when updating the accepted versions of
+# avocado-framework, for example right now the limit is 92.x.
+avocado-framework = { accepted = "(>=88.1, <93.0)", installed = "88.1", canary = "avocado" }
+pycdlib = { accepted = ">=1.11.0" }
index 51f0d55db735ba5106a9c46b649c574aae28cd25..e0739bd6ae9ceef5d7a8441949d9849dffffc35d 100644 (file)
 #
 # Specify an ACPI table on the command line to load.
 #
-# At most one of @file and @data can be specified. The list of files specified
-# by any one of them is loaded and concatenated in order. If both are omitted,
-# @data is implied.
+# At most one of @file and @data can be specified.  The list of files
+# specified by any one of them is loaded and concatenated in order.
+# If both are omitted, @data is implied.
 #
-# Other fields / optargs can be used to override fields of the generic ACPI
-# table header; refer to the ACPI specification 5.0, section 5.2.6 System
-# Description Table Header. If a header field is not overridden, then the
-# corresponding value from the concatenated blob is used (in case of @file), or
-# it is filled in with a hard-coded value (in case of @data).
+# Other fields / optargs can be used to override fields of the generic
+# ACPI table header; refer to the ACPI specification 5.0, section
+# 5.2.6 System Description Table Header.  If a header field is not
+# overridden, then the corresponding value from the concatenated blob
+# is used (in case of @file), or it is filled in with a hard-coded
+# value (in case of @data).
 #
-# String fields are copied into the matching ACPI member from lowest address
-# upwards, and silently truncated / NUL-padded to length.
+# String fields are copied into the matching ACPI member from lowest
+# address upwards, and silently truncated / NUL-padded to length.
 #
 # @sig: table signature / identifier (4 bytes)
 #
 # @oem_rev: OEM-supplied revision number (4 bytes)
 #
 # @asl_compiler_id: identifier of the utility that created the table
-#                   (4 bytes)
+#     (4 bytes)
 #
 # @asl_compiler_rev: revision number of the utility that created the
-#                    table (4 bytes)
+#     table (4 bytes)
 #
-# @file: colon (:) separated list of pathnames to load and
-#        concatenate as table data. The resultant binary blob is expected to
-#        have an ACPI table header. At least one file is required. This field
-#        excludes @data.
+# @file: colon (:) separated list of pathnames to load and concatenate
+#     as table data.  The resultant binary blob is expected to have an
+#     ACPI table header.  At least one file is required.  This field
+#     excludes @data.
 #
-# @data: colon (:) separated list of pathnames to load and
-#        concatenate as table data. The resultant binary blob must not have an
-#        ACPI table header. At least one file is required. This field excludes
-#        @file.
+# @data: colon (:) separated list of pathnames to load and concatenate
+#     as table data.  The resultant binary blob must not have an ACPI
+#     table header.  At least one file is required.  This field
+#     excludes @file.
 #
 # Since: 1.5
 ##
@@ -71,6 +72,7 @@
 # @ACPISlotType:
 #
 # @DIMM: memory slot
+#
 # @CPU: logical CPU slot (since 2.7)
 ##
 { 'enum': 'ACPISlotType', 'data': [ 'DIMM', 'CPU' ] }
@@ -78,9 +80,9 @@
 ##
 # @ACPIOSTInfo:
 #
-# OSPM Status Indication for a device
-# For description of possible values of @source and @status fields
-# see "_OST (OSPM Status Indication)" chapter of ACPI5.0 spec.
+# OSPM Status Indication for a device For description of possible
+# values of @source and @status fields see "_OST (OSPM Status
+# Indication)" chapter of ACPI5.0 spec.
 #
 # @device: device ID associated with slot
 #
 #                  { "slot": "2", "slot-type": "DIMM", "source": 0, "status": 0},
 #                  { "slot": "3", "slot-type": "DIMM", "source": 0, "status": 0}
 #    ]}
-#
 ##
 { 'command': 'query-acpi-ospm-status', 'returns': ['ACPIOSTInfo'] }
 
 # Example:
 #
 # <- { "event": "ACPI_DEVICE_OST",
-#      "data": { "device": "d1", "slot": "0",
-#                "slot-type": "DIMM", "source": 1, "status": 0 } }
-#
+#      "data": { "info": { "device": "d1", "slot": "0",
+#                          "slot-type": "DIMM", "source": 1, "status": 0 } },
+#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
 ##
 { 'event': 'ACPI_DEVICE_OST',
      'data': { 'info': 'ACPIOSTInfo' } }
index 072ed79def503fcbe09247e6a230be197d31759c..519697c0cd845aac7cb9f198b968122c99d676c3 100644 (file)
@@ -1,4 +1,5 @@
 # -*- mode: python -*-
+# vim: filetype=python
 #
 # Copyright (C) 2015-2019 Zoltán Kővágó <DirtY.iCE.hu@gmail.com>
 #
 # General audio backend options that are used for both playback and
 # recording.
 #
-# @mixing-engine: use QEMU's mixing engine to mix all streams inside QEMU and
-#                 convert audio formats when not supported by the backend. When
-#                 set to off, fixed-settings must be also off (default on,
-#                 since 4.2)
+# @mixing-engine: use QEMU's mixing engine to mix all streams inside
+#     QEMU and convert audio formats when not supported by the
+#     backend.  When set to off, fixed-settings must be also off
+#     (default on, since 4.2)
 #
-# @fixed-settings: use fixed settings for host input/output. When off,
-#                  frequency, channels and format must not be
-#                  specified (default true)
+# @fixed-settings: use fixed settings for host input/output.  When
+#     off, frequency, channels and format must not be specified
+#     (default true)
 #
-# @frequency: frequency to use when using fixed settings
-#             (default 44100)
+# @frequency: frequency to use when using fixed settings (default
+#     44100)
 #
 # @channels: number of channels when using fixed settings (default 2)
 #
 # @voices: number of voices to use (default 1)
 #
-# @format: sample format to use when using fixed settings
-#          (default s16)
+# @format: sample format to use when using fixed settings (default
+#     s16)
 #
 # @buffer-length: the buffer length in microseconds
 #
@@ -75,7 +76,7 @@
 # @period-length: the period length in microseconds
 #
 # @try-poll: attempt to use poll mode, falling back to non-polling
-#            access on failure (default true)
+#     access on failure (default true)
 #
 # Since: 4.0
 ##
     '*out':       'AudiodevAlsaPerDirectionOptions',
     '*threshold': 'uint32' } }
 
+##
+# @AudiodevSndioOptions:
+#
+# Options of the sndio audio backend.
+#
+# @in: options of the capture stream
+#
+# @out: options of the playback stream
+#
+# @dev: the name of the sndio device to use (default 'default')
+#
+# @latency: play buffer size (in microseconds)
+#
+# Since: 7.2
+##
+{ 'struct': 'AudiodevSndioOptions',
+  'data': {
+    '*in':        'AudiodevPerDirectionOptions',
+    '*out':       'AudiodevPerDirectionOptions',
+    '*dev':       'str',
+    '*latency':   'uint32'} }
+
 ##
 # @AudiodevCoreaudioPerDirectionOptions:
 #
-# Options of the Core Audio backend that are used for both playback and
-# recording.
+# Options of the Core Audio backend that are used for both playback
+# and recording.
 #
 # @buffer-count: number of buffers
 #
 #
 # @out: options of the playback stream
 #
-# @latency: add extra latency to playback in microseconds
-#           (default 10000)
+# @latency: add extra latency to playback in microseconds (default
+#     10000)
 #
 # Since: 4.0
 ##
 # Options of the JACK backend that are used for both playback and
 # recording.
 #
-# @server-name: select from among several possible concurrent server instances
-#               (default: environment variable $JACK_DEFAULT_SERVER if set, else "default")
+# @server-name: select from among several possible concurrent server
+#     instances (default: environment variable $JACK_DEFAULT_SERVER if
+#     set, else "default")
 #
-# @client-name: the client name to use. The server will modify this name to
-#               create a unique variant, if needed unless @exact-name is true (default: the
-#               guest's name)
+# @client-name: the client name to use.  The server will modify this
+#     name to create a unique variant, if needed unless @exact-name is
+#     true (default: the guest's name)
 #
-# @connect-ports: if set, a regular expression of JACK client port name(s) to
-#                 monitor for and automatically connect to
+# @connect-ports: if set, a regular expression of JACK client port
+#     name(s) to monitor for and automatically connect to
 #
-# @start-server: start a jack server process if one is not already present
-#                (default: false)
+# @start-server: start a jack server process if one is not already
+#     present (default: false)
 #
-# @exact-name: use the exact name requested otherwise JACK automatically
-#              generates a unique one, if needed (default: false)
+# @exact-name: use the exact name requested otherwise JACK
+#     automatically generates a unique one, if needed (default: false)
 #
 # Since: 5.1
 ##
 # @buffer-count: number of buffers
 #
 # @try-poll: attempt to use poll mode, falling back to non-polling
-#            access on failure (default true)
+#     access on failure (default true)
 #
 # Since: 4.0
 ##
 # @out: options of the playback stream
 #
 # @try-mmap: try using memory-mapped access, falling back to
-#            non-memory-mapped access on failure (default true)
+#     non-memory-mapped access on failure (default true)
 #
-# @exclusive: open device in exclusive mode (vmix won't work)
-#             (default false)
+# @exclusive: open device in exclusive mode (vmix won't work) (default
+#     false)
 #
 # @dsp-policy: set the timing policy of the device (between 0 and 10,
-#              where smaller number means smaller latency but higher
-#              CPU usage) or -1 to use fragment mode (option ignored
-#              on some platforms) (default 5)
+#     where smaller number means smaller latency but higher CPU usage)
+#     or -1 to use fragment mode (option ignored on some platforms)
+#     (default 5)
 #
 # Since: 4.0
 ##
 ##
 # @AudiodevPaPerDirectionOptions:
 #
-# Options of the Pulseaudio backend that are used for both playback and
-# recording.
+# Options of the Pulseaudio backend that are used for both playback
+# and recording.
 #
 # @name: name of the sink/source to use
 #
 # @stream-name: name of the PulseAudio stream created by qemu.  Can be
-#               used to identify the stream in PulseAudio when you
-#               create multiple PulseAudio devices or run multiple qemu
-#               instances (default: audiodev's id, since 4.2)
+#     used to identify the stream in PulseAudio when you create
+#     multiple PulseAudio devices or run multiple qemu instances
+#     (default: audiodev's id, since 4.2)
 #
 # @latency: latency you want PulseAudio to achieve in microseconds
-#           (default 15000)
+#     (default 15000)
 #
 # Since: 4.0
 ##
     '*out':    'AudiodevPaPerDirectionOptions',
     '*server': 'str' } }
 
+##
+# @AudiodevPipewirePerDirectionOptions:
+#
+# Options of the PipeWire backend that are used for both playback and
+# recording.
+#
+# @name: name of the sink/source to use
+#
+# @stream-name: name of the PipeWire stream created by qemu.  Can be
+#     used to identify the stream in PipeWire when you create multiple
+#     PipeWire devices or run multiple qemu instances (default:
+#     audiodev's id)
+#
+# @latency: latency you want PipeWire to achieve in microseconds
+#     (default 46000)
+#
+# Since: 8.1
+##
+{ 'struct': 'AudiodevPipewirePerDirectionOptions',
+  'base': 'AudiodevPerDirectionOptions',
+  'data': {
+    '*name': 'str',
+    '*stream-name': 'str',
+    '*latency': 'uint32' } }
+
+##
+# @AudiodevPipewireOptions:
+#
+# Options of the PipeWire audio backend.
+#
+# @in: options of the capture stream
+#
+# @out: options of the playback stream
+#
+# Since: 8.1
+##
+{ 'struct': 'AudiodevPipewireOptions',
+  'data': {
+    '*in':     'AudiodevPipewirePerDirectionOptions',
+    '*out':    'AudiodevPipewirePerDirectionOptions' } }
+
+##
+# @AudiodevSdlPerDirectionOptions:
+#
+# Options of the SDL audio backend that are used for both playback and
+# recording.
+#
+# @buffer-count: number of buffers (default 4)
+#
+# Since: 6.0
+##
+{ 'struct': 'AudiodevSdlPerDirectionOptions',
+  'base': 'AudiodevPerDirectionOptions',
+  'data': {
+    '*buffer-count': 'uint32' } }
+
+##
+# @AudiodevSdlOptions:
+#
+# Options of the SDL audio backend.
+#
+# @in: options of the recording stream
+#
+# @out: options of the playback stream
+#
+# Since: 6.0
+##
+{ 'struct': 'AudiodevSdlOptions',
+  'data': {
+    '*in':  'AudiodevSdlPerDirectionOptions',
+    '*out': 'AudiodevSdlPerDirectionOptions' } }
+
 ##
 # @AudiodevWavOptions:
 #
     '*out':  'AudiodevPerDirectionOptions',
     '*path': 'str' } }
 
-
 ##
 # @AudioFormat:
 #
 # Since: 4.0
 ##
 { 'enum': 'AudiodevDriver',
-  'data': [ 'none', 'alsa', 'coreaudio', 'dsound', 'jack', 'oss', 'pa',
-            'sdl', 'spice', 'wav' ] }
+  'data': [ 'none',
+            { 'name': 'alsa', 'if': 'CONFIG_AUDIO_ALSA' },
+            { 'name': 'coreaudio', 'if': 'CONFIG_AUDIO_COREAUDIO' },
+            { 'name': 'dbus', 'if': 'CONFIG_DBUS_DISPLAY' },
+            { 'name': 'dsound', 'if': 'CONFIG_AUDIO_DSOUND' },
+            { 'name': 'jack', 'if': 'CONFIG_AUDIO_JACK' },
+            { 'name': 'oss', 'if': 'CONFIG_AUDIO_OSS' },
+            { 'name': 'pa', 'if': 'CONFIG_AUDIO_PA' },
+            { 'name': 'pipewire', 'if': 'CONFIG_AUDIO_PIPEWIRE' },
+            { 'name': 'sdl', 'if': 'CONFIG_AUDIO_SDL' },
+            { 'name': 'sndio', 'if': 'CONFIG_AUDIO_SNDIO' },
+            { 'name': 'spice', 'if': 'CONFIG_SPICE' },
+            'wav' ] }
 
 ##
 # @Audiodev:
 #
 # @driver: the backend driver to use
 #
-# @timer-period: timer period (in microseconds, 0: use lowest possible)
+# @timer-period: timer period (in microseconds, 0: use lowest
+#     possible)
 #
 # Since: 4.0
 ##
   'discriminator': 'driver',
   'data': {
     'none':      'AudiodevGenericOptions',
-    'alsa':      'AudiodevAlsaOptions',
-    'coreaudio': 'AudiodevCoreaudioOptions',
-    'dsound':    'AudiodevDsoundOptions',
-    'jack':      'AudiodevJackOptions',
-    'oss':       'AudiodevOssOptions',
-    'pa':        'AudiodevPaOptions',
-    'sdl':       'AudiodevGenericOptions',
-    'spice':     'AudiodevGenericOptions',
+    'alsa':      { 'type': 'AudiodevAlsaOptions',
+                   'if': 'CONFIG_AUDIO_ALSA' },
+    'coreaudio': { 'type': 'AudiodevCoreaudioOptions',
+                   'if': 'CONFIG_AUDIO_COREAUDIO' },
+    'dbus':      { 'type': 'AudiodevGenericOptions',
+                   'if': 'CONFIG_DBUS_DISPLAY' },
+    'dsound':    { 'type': 'AudiodevDsoundOptions',
+                   'if': 'CONFIG_AUDIO_DSOUND' },
+    'jack':      { 'type': 'AudiodevJackOptions',
+                   'if': 'CONFIG_AUDIO_JACK' },
+    'oss':       { 'type': 'AudiodevOssOptions',
+                   'if': 'CONFIG_AUDIO_OSS' },
+    'pa':        { 'type': 'AudiodevPaOptions',
+                   'if': 'CONFIG_AUDIO_PA' },
+    'pipewire':  { 'type': 'AudiodevPipewireOptions',
+                   'if': 'CONFIG_AUDIO_PIPEWIRE' },
+    'sdl':       { 'type': 'AudiodevSdlOptions',
+                   'if': 'CONFIG_AUDIO_SDL' },
+    'sndio':     { 'type': 'AudiodevSndioOptions',
+                   'if': 'CONFIG_AUDIO_SNDIO' },
+    'spice':     { 'type': 'AudiodevGenericOptions',
+                   'if': 'CONFIG_SPICE' },
     'wav':       'AudiodevWavOptions' } }
+
+##
+# @query-audiodevs:
+#
+# Returns information about audiodev configuration
+#
+# Returns: array of @Audiodev
+#
+# Since: 8.0
+##
+{ 'command': 'query-audiodevs',
+  'returns': ['Audiodev'] }
index 42afe752d1c18d8d2f46aefd0e6790e3fec41265..7fc6e3032ea63bbcb0ebc6b9a0f3a7f3581e7310 100644 (file)
@@ -11,6 +11,7 @@
 # The authorization policy result
 #
 # @deny: deny access
+#
 # @allow: allow access
 #
 # Since: 4.0
@@ -25,6 +26,7 @@
 # The authorization policy match format
 #
 # @exact: an exact string match
+#
 # @glob: string with ? and * shell wildcard support
 #
 # Since: 4.0
@@ -39,7 +41,9 @@
 # A single authorization rule.
 #
 # @match: a string or glob to match against a user identity
+#
 # @policy: the result to return if @match evaluates to true
+#
 # @format: the format of the @match rule (default 'exact')
 #
 # Since: 4.0
            '*format': 'QAuthZListFormat'}}
 
 ##
-# @QAuthZListRuleListHack:
+# @AuthZListProperties:
+#
+# Properties for authz-list objects.
+#
+# @policy: Default policy to apply when no rule matches (default:
+#     deny)
+#
+# @rules: Authorization rules based on matching user
+#
+# Since: 4.0
+##
+{ 'struct': 'AuthZListProperties',
+  'data': { '*policy': 'QAuthZListPolicy',
+            '*rules': ['QAuthZListRule'] } }
+
+##
+# @AuthZListFileProperties:
+#
+# Properties for authz-listfile objects.
+#
+# @filename: File name to load the configuration from.  The file must
+#     contain valid JSON for AuthZListProperties.
+#
+# @refresh: If true, inotify is used to monitor the file,
+#     automatically reloading changes.  If an error occurs during
+#     reloading, all authorizations will fail until the file is next
+#     successfully loaded.  (default: true if the binary was built
+#     with CONFIG_INOTIFY1, false otherwise)
+#
+# Since: 4.0
+##
+{ 'struct': 'AuthZListFileProperties',
+  'data': { 'filename': 'str',
+            '*refresh': 'bool' } }
+
+##
+# @AuthZPAMProperties:
+#
+# Properties for authz-pam objects.
+#
+# @service: PAM service name to use for authorization
+#
+# Since: 4.0
+##
+{ 'struct': 'AuthZPAMProperties',
+  'data': { 'service': 'str' } }
+
+##
+# @AuthZSimpleProperties:
+#
+# Properties for authz-simple objects.
 #
-# Not exposed via QMP; hack to generate QAuthZListRuleList
-# for use internally by the code.
+# @identity: Identifies the allowed user.  Its format depends on the
+#     network service that authorization object is associated with.
+#     For authorizing based on TLS x509 certificates, the identity
+#     must be the x509 distinguished name.
 #
 # Since: 4.0
 ##
-{ 'struct': 'QAuthZListRuleListHack',
-  'data': { 'unused': ['QAuthZListRule'] } }
+{ 'struct': 'AuthZSimpleProperties',
+  'data': { 'identity': 'str' } }
index 04ad80bc1e51bb0913bc5a6153cff2bfdae517f8..a4e4761f3c39e652b2c318878c2deb4f576adca7 100644 (file)
 #
 # @vm-clock-sec: VM clock relative to boot in seconds
 #
-# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec
+# @vm-clock-nsec: fractional part in nano seconds to be used with
+#     vm-clock-sec
 #
-# @icount: Current instruction count. Appears when execution record/replay
-#          is enabled. Used for "time-traveling" to match the moment
-#          in the recorded execution with the snapshots. This counter may
-#          be obtained through @query-replay command (since 5.2)
+# @icount: Current instruction count.  Appears when execution
+#     record/replay is enabled.  Used for "time-traveling" to match
+#     the moment in the recorded execution with the snapshots.  This
+#     counter may be obtained through @query-replay command (since
+#     5.2)
 #
 # Since: 1.3
-#
 ##
 { 'struct': 'SnapshotInfo',
   'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int',
 #
 # @compat: compatibility level
 #
-# @data-file: the filename of the external data file that is stored in the
-#             image and used as a default for opening the image (since: 4.0)
+# @data-file: the filename of the external data file that is stored in
+#     the image and used as a default for opening the image
+#     (since: 4.0)
 #
 # @data-file-raw: True if the external data file must stay valid as a
-#                 standalone (read-only) raw image without looking at qcow2
-#                 metadata (since: 4.0)
+#     standalone (read-only) raw image without looking at qcow2
+#     metadata (since: 4.0)
 #
-# @extended-l2: true if the image has extended L2 entries; only valid for
-#               compat >= 1.1 (since 5.2)
+# @extended-l2: true if the image has extended L2 entries; only valid
+#     for compat >= 1.1 (since 5.2)
 #
 # @lazy-refcounts: on or off; only valid for compat >= 1.1
 #
 # @corrupt: true if the image has been marked corrupt; only valid for
-#           compat >= 1.1 (since 2.2)
+#     compat >= 1.1 (since 2.2)
 #
 # @refcount-bits: width of a refcount entry in bits (since 2.3)
 #
-# @encrypt: details about encryption parameters; only set if image
-#           is encrypted (since 2.10)
+# @encrypt: details about encryption parameters; only set if image is
+#     encrypted (since 2.10)
 #
 # @bitmaps: A list of qcow2 bitmap details (since 4.0)
 #
       'create-type': 'str',
       'cid': 'int',
       'parent-cid': 'int',
-      'extents': ['ImageInfo']
+      'extents': ['VmdkExtentInfo']
+  } }
+
+##
+# @VmdkExtentInfo:
+#
+# Information about a VMDK extent file
+#
+# @filename: Name of the extent file
+#
+# @format: Extent type (e.g. FLAT or SPARSE)
+#
+# @virtual-size: Number of bytes covered by this extent
+#
+# @cluster-size: Cluster size in bytes (for non-flat extents)
+#
+# @compressed: Whether this extent contains compressed data
+#
+# Since: 8.0
+##
+{ 'struct': 'VmdkExtentInfo',
+  'data': {
+      'filename': 'str',
+      'format': 'str',
+      'virtual-size': 'int',
+      '*cluster-size': 'int',
+      '*compressed': 'bool'
+  } }
+
+##
+# @ImageInfoSpecificRbd:
+#
+# @encryption-format: Image encryption format
+#
+# Since: 6.1
+##
+{ 'struct': 'ImageInfoSpecificRbd',
+  'data': {
+      '*encryption-format': 'RbdImageEncryptionFormat'
+  } }
+
+##
+# @ImageInfoSpecificFile:
+#
+# @extent-size-hint: Extent size hint (if available)
+#
+# Since: 8.0
+##
+{ 'struct': 'ImageInfoSpecificFile',
+  'data': {
+      '*extent-size-hint': 'size'
   } }
 
+##
+# @ImageInfoSpecificKind:
+#
+# @luks: Since 2.7
+#
+# @rbd: Since 6.1
+#
+# @file: Since 8.0
+#
+# Since: 1.7
+##
+{ 'enum': 'ImageInfoSpecificKind',
+  'data': [ 'qcow2', 'vmdk', 'luks', 'rbd', 'file' ] }
+
+##
+# @ImageInfoSpecificQCow2Wrapper:
+#
+# Since: 1.7
+##
+{ 'struct': 'ImageInfoSpecificQCow2Wrapper',
+  'data': { 'data': 'ImageInfoSpecificQCow2' } }
+
+##
+# @ImageInfoSpecificVmdkWrapper:
+#
+# Since: 6.1
+##
+{ 'struct': 'ImageInfoSpecificVmdkWrapper',
+  'data': { 'data': 'ImageInfoSpecificVmdk' } }
+
+##
+# @ImageInfoSpecificLUKSWrapper:
+#
+# Since: 2.7
+##
+{ 'struct': 'ImageInfoSpecificLUKSWrapper',
+  'data': { 'data': 'QCryptoBlockInfoLUKS' } }
+# If we need to add block driver specific parameters for
+# LUKS in future, then we'll subclass QCryptoBlockInfoLUKS
+# to define a ImageInfoSpecificLUKS
+
+##
+# @ImageInfoSpecificRbdWrapper:
+#
+# Since: 6.1
+##
+{ 'struct': 'ImageInfoSpecificRbdWrapper',
+  'data': { 'data': 'ImageInfoSpecificRbd' } }
+
+##
+# @ImageInfoSpecificFileWrapper:
+#
+# Since: 8.0
+##
+{ 'struct': 'ImageInfoSpecificFileWrapper',
+  'data': { 'data': 'ImageInfoSpecificFile' } }
+
 ##
 # @ImageInfoSpecific:
 #
-# A discriminated record of image format specific information structures.
+# A discriminated record of image format specific information
+# structures.
 #
 # Since: 1.7
 ##
 { 'union': 'ImageInfoSpecific',
+  'base': { 'type': 'ImageInfoSpecificKind' },
+  'discriminator': 'type',
   'data': {
-      'qcow2': 'ImageInfoSpecificQCow2',
-      'vmdk': 'ImageInfoSpecificVmdk',
-      # If we need to add block driver specific parameters for
-      # LUKS in future, then we'll subclass QCryptoBlockInfoLUKS
-      # to define a ImageInfoSpecificLUKS
-      'luks': 'QCryptoBlockInfoLUKS'
+      'qcow2': 'ImageInfoSpecificQCow2Wrapper',
+      'vmdk': 'ImageInfoSpecificVmdkWrapper',
+      'luks': 'ImageInfoSpecificLUKSWrapper',
+      'rbd': 'ImageInfoSpecificRbdWrapper',
+      'file': 'ImageInfoSpecificFileWrapper'
   } }
 
 ##
-# @ImageInfo:
+# @BlockNodeInfo:
 #
 # Information about a QEMU image file
 #
 #
 # @snapshots: list of VM snapshots
 #
-# @backing-image: info of the backing image (since 1.6)
-#
 # @format-specific: structure supplying additional format-specific
-#                   information (since 1.7)
-#
-# Since: 1.3
+#     information (since 1.7)
 #
+# Since: 8.0
 ##
-{ 'struct': 'ImageInfo',
+{ 'struct': 'BlockNodeInfo',
   'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
            '*actual-size': 'int', 'virtual-size': 'int',
            '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
            '*backing-filename': 'str', '*full-backing-filename': 'str',
            '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
-           '*backing-image': 'ImageInfo',
            '*format-specific': 'ImageInfoSpecific' } }
 
+##
+# @ImageInfo:
+#
+# Information about a QEMU image file, and potentially its backing
+# image
+#
+# @backing-image: info of the backing image
+#
+# Since: 1.3
+##
+{ 'struct': 'ImageInfo',
+  'base': 'BlockNodeInfo',
+  'data': {
+      '*backing-image': 'ImageInfo'
+  } }
+
+##
+# @BlockChildInfo:
+#
+# Information about all nodes in the block graph starting at some
+# node, annotated with information about that node in relation to its
+# parent.
+#
+# @name: Child name of the root node in the BlockGraphInfo struct, in
+#     its role as the child of some undescribed parent node
+#
+# @info: Block graph information starting at this node
+#
+# Since: 8.0
+##
+{ 'struct': 'BlockChildInfo',
+  'data': {
+      'name': 'str',
+      'info': 'BlockGraphInfo'
+  } }
+
+##
+# @BlockGraphInfo:
+#
+# Information about all nodes in a block (sub)graph in the form of
+# BlockNodeInfo data.  The base BlockNodeInfo struct contains the
+# information for the (sub)graph's root node.
+#
+# @children: Array of links to this node's child nodes' information
+#
+# Since: 8.0
+##
+{ 'struct': 'BlockGraphInfo',
+  'base': 'BlockNodeInfo',
+  'data': { 'children': ['BlockChildInfo'] } }
+
 ##
 # @ImageCheck:
 #
 # @check-errors: number of unexpected errors occurred during check
 #
 # @image-end-offset: offset (in bytes) where the image ends, this
-#                    field is present if the driver for the image format
-#                    supports it
+#     field is present if the driver for the image format supports it
 #
 # @corruptions: number of corruptions found during the check if any
 #
 # @leaks: number of leaks found during the check if any
 #
-# @corruptions-fixed: number of corruptions fixed during the check
-#                     if any
+# @corruptions-fixed: number of corruptions fixed during the check if
+#     any
 #
 # @leaks-fixed: number of leaks fixed during the check if any
 #
-# @total-clusters: total number of clusters, this field is present
-#                  if the driver for the image format supports it
+# @total-clusters: total number of clusters, this field is present if
+#     the driver for the image format supports it
 #
-# @allocated-clusters: total number of allocated clusters, this
-#                      field is present if the driver for the image format
-#                      supports it
+# @allocated-clusters: total number of allocated clusters, this field
+#     is present if the driver for the image format supports it
 #
 # @fragmented-clusters: total number of fragmented clusters, this
-#                       field is present if the driver for the image format
-#                       supports it
+#     field is present if the driver for the image format supports it
 #
 # @compressed-clusters: total number of compressed clusters, this
-#                       field is present if the driver for the image format
-#                       supports it
+#     field is present if the driver for the image format supports it
 #
 # Since: 1.4
-#
 ##
 { 'struct': 'ImageCheck',
   'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
 # Mapping information from a virtual block range to a host file range
 #
 # @start: virtual (guest) offset of the first byte described by this
-#         entry
+#     entry
 #
 # @length: the number of bytes of the mapped virtual range
 #
 # @data: reading the image will actually read data from a file (in
-#        particular, if @offset is present this means that the sectors
-#        are not simply preallocated, but contain actual data in raw
-#        format)
+#     particular, if @offset is present this means that the sectors
+#     are not simply preallocated, but contain actual data in raw
+#     format)
 #
 # @zero: whether the virtual blocks read as zeroes
 #
+# @compressed: true if the data is stored compressed (since 8.2)
+#
 # @depth: number of layers (0 = top image, 1 = top image's backing
-#         file, ..., n - 1 = bottom image (where n is the number of
-#         images in the chain)) before reaching one for which the
-#         range is allocated
+#     file, ..., n - 1 = bottom image (where n is the number of images
+#     in the chain)) before reaching one for which the range is
+#     allocated
+#
+# @present: true if this layer provides the data, false if adding a
+#     backing layer could impact this region (since 6.1)
 #
 # @offset: if present, the image file stores the data for this range
-#          in raw format at the given (host) offset
+#     in raw format at the given (host) offset
 #
 # @filename: filename that is referred to by @offset
 #
 # Since: 2.6
-#
 ##
 { 'struct': 'MapEntry',
   'data': {'start': 'int', 'length': 'int', 'data': 'bool',
-           'zero': 'bool', 'depth': 'int', '*offset': 'int',
-           '*filename': 'str' } }
+           'zero': 'bool', 'compressed': 'bool', 'depth': 'int',
+           'present': 'bool', '*offset': 'int', '*filename': 'str' } }
 
 ##
 # @BlockdevCacheInfo:
 #
 # Cache mode information for a block device
 #
-# @writeback:   true if writeback mode is enabled
-# @direct:      true if the host page cache is bypassed (O_DIRECT)
-# @no-flush:    true if flush requests are ignored for the device
+# @writeback: true if writeback mode is enabled
+#
+# @direct: true if the host page cache is bypassed (O_DIRECT)
+#
+# @no-flush: true if flush requests are ignored for the device
 #
 # Since: 2.3
 ##
 #
 # @ro: true if the backing device was open read-only
 #
-# @drv: the name of the block format used to open the backing device. As of
-#       0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
-#       'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
-#       'http', 'https', 'luks', 'nbd', 'parallels', 'qcow',
-#       'qcow2', 'raw', 'vdi', 'vmdk', 'vpc', 'vvfat'
-#       2.2: 'archipelago' added, 'cow' dropped
-#       2.3: 'host_floppy' deprecated
-#       2.5: 'host_floppy' dropped
-#       2.6: 'luks' added
-#       2.8: 'replication' added, 'tftp' dropped
-#       2.9: 'archipelago' dropped
+# @drv: the name of the block format used to open the backing device.
+#     As of 0.14 this can be: 'blkdebug', 'bochs', 'cloop', 'cow',
+#     'dmg', 'file', 'file', 'ftp', 'ftps', 'host_cdrom',
+#     'host_device', 'http', 'https', 'luks', 'nbd', 'parallels',
+#     'qcow', 'qcow2', 'raw', 'vdi', 'vmdk', 'vpc', 'vvfat' 2.2:
+#     'archipelago' added, 'cow' dropped 2.3: 'host_floppy' deprecated
+#     2.5: 'host_floppy' dropped 2.6: 'luks' added 2.8: 'replication'
+#     added, 'tftp' dropped 2.9: 'archipelago' dropped
 #
 # @backing_file: the name of the backing file (for copy-on-write)
 #
-# @backing_file_depth: number of files in the backing file chain (since: 1.2)
+# @backing_file_depth: number of files in the backing file chain
+#     (since: 1.2)
 #
 # @encrypted: true if the backing device is encrypted
 #
-# @encryption_key_missing: always false
-#
 # @detect_zeroes: detect and optimize zero writes (Since 2.1)
 #
 # @bps: total throughput limit in bytes per second is specified
 #
 # @image: the info of image used (since: 1.6)
 #
-# @bps_max: total throughput limit during bursts,
-#                     in bytes (Since 1.7)
+# @bps_max: total throughput limit during bursts, in bytes (Since 1.7)
 #
-# @bps_rd_max: read throughput limit during bursts,
-#                        in bytes (Since 1.7)
+# @bps_rd_max: read throughput limit during bursts, in bytes (Since
+#     1.7)
 #
-# @bps_wr_max: write throughput limit during bursts,
-#                        in bytes (Since 1.7)
+# @bps_wr_max: write throughput limit during bursts, in bytes (Since
+#     1.7)
 #
-# @iops_max: total I/O operations per second during bursts,
-#                      in bytes (Since 1.7)
+# @iops_max: total I/O operations per second during bursts, in bytes
+#     (Since 1.7)
 #
-# @iops_rd_max: read I/O operations per second during bursts,
-#                         in bytes (Since 1.7)
+# @iops_rd_max: read I/O operations per second during bursts, in bytes
+#     (Since 1.7)
 #
-# @iops_wr_max: write I/O operations per second during bursts,
-#                         in bytes (Since 1.7)
+# @iops_wr_max: write I/O operations per second during bursts, in
+#     bytes (Since 1.7)
 #
-# @bps_max_length: maximum length of the @bps_max burst
-#                            period, in seconds. (Since 2.6)
+# @bps_max_length: maximum length of the @bps_max burst period, in
+#     seconds.  (Since 2.6)
 #
-# @bps_rd_max_length: maximum length of the @bps_rd_max
-#                               burst period, in seconds. (Since 2.6)
+# @bps_rd_max_length: maximum length of the @bps_rd_max burst period,
+#     in seconds.  (Since 2.6)
 #
-# @bps_wr_max_length: maximum length of the @bps_wr_max
-#                               burst period, in seconds. (Since 2.6)
+# @bps_wr_max_length: maximum length of the @bps_wr_max burst period,
+#     in seconds.  (Since 2.6)
 #
-# @iops_max_length: maximum length of the @iops burst
-#                             period, in seconds. (Since 2.6)
+# @iops_max_length: maximum length of the @iops burst period, in
+#     seconds.  (Since 2.6)
 #
-# @iops_rd_max_length: maximum length of the @iops_rd_max
-#                                burst period, in seconds. (Since 2.6)
+# @iops_rd_max_length: maximum length of the @iops_rd_max burst
+#     period, in seconds.  (Since 2.6)
 #
-# @iops_wr_max_length: maximum length of the @iops_wr_max
-#                                burst period, in seconds. (Since 2.6)
+# @iops_wr_max_length: maximum length of the @iops_wr_max burst
+#     period, in seconds.  (Since 2.6)
 #
 # @iops_size: an I/O size in bytes (Since 1.7)
 #
 #
 # @cache: the cache mode used for the block device (since: 2.3)
 #
-# @write_threshold: configured write threshold for the device.
-#                   0 if disabled. (Since 2.3)
+# @write_threshold: configured write threshold for the device.  0 if
+#     disabled.  (Since 2.3)
 #
-# @dirty-bitmaps: dirty bitmaps information (only present if node
-#                 has one or more dirty bitmaps) (Since 4.2)
-#
-# Features:
-# @deprecated: Member @encryption_key_missing is deprecated.  It is
-#              always false.
-#
-# Since: 0.14.0
+# @dirty-bitmaps: dirty bitmaps information (only present if node has
+#     one or more dirty bitmaps) (Since 4.2)
 #
+# Since: 0.14
 ##
 { 'struct': 'BlockDeviceInfo',
   'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
             '*backing_file': 'str', 'backing_file_depth': 'int',
             'encrypted': 'bool',
-            'encryption_key_missing': { 'type': 'bool',
-                                        'features': [ 'deprecated' ] },
             'detect_zeroes': 'BlockdevDetectZeroesOptions',
             'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
             'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
 #
 # @failed: The last I/O operation has failed
 #
-# @nospace: The last I/O operation has failed due to a no-space condition
+# @nospace: The last I/O operation has failed due to a no-space
+#     condition
 #
 # Since: 1.0
 ##
 { 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
 
-##
-# @DirtyBitmapStatus:
-#
-# An enumeration of possible states that a dirty bitmap can report to the user.
-#
-# @frozen: The bitmap is currently in-use by some operation and is immutable.
-#          If the bitmap was @active prior to the operation, new writes by the
-#          guest are being recorded in a temporary buffer, and will not be lost.
-#          Generally, bitmaps are cleared on successful use in an operation and
-#          the temporary buffer is committed into the bitmap. On failure, the
-#          temporary buffer is merged back into the bitmap without first
-#          clearing it.
-#          Please refer to the documentation for each bitmap-using operation,
-#          See also @blockdev-backup, @drive-backup.
-#
-# @disabled: The bitmap is not currently recording new writes by the guest.
-#            This is requested explicitly via @block-dirty-bitmap-disable.
-#            It can still be cleared, deleted, or used for backup operations.
-#
-# @active: The bitmap is actively monitoring for new writes, and can be cleared,
-#          deleted, or used for backup operations.
-#
-# @locked: The bitmap is currently in-use by some operation and is immutable.
-#          If the bitmap was @active prior to the operation, it is still
-#          recording new writes. If the bitmap was @disabled, it is not
-#          recording new writes. (Since 2.12)
-#
-# @inconsistent: This is a persistent dirty bitmap that was marked in-use on
-#                disk, and is unusable by QEMU. It can only be deleted.
-#                Please rely on the inconsistent field in @BlockDirtyInfo
-#                instead, as the status field is deprecated. (Since 4.0)
-#
-# Since: 2.4
-##
-{ 'enum': 'DirtyBitmapStatus',
-  'data': ['active', 'disabled', 'frozen', 'locked', 'inconsistent'] }
-
 ##
 # @BlockDirtyInfo:
 #
 #
 # @granularity: granularity of the dirty bitmap in bytes (since 1.4)
 #
-# @status: current status of the dirty bitmap (since 2.4)
-#
-# @recording: true if the bitmap is recording new writes from the guest.
-#             Replaces `active` and `disabled` statuses. (since 4.0)
+# @recording: true if the bitmap is recording new writes from the
+#     guest.  (since 4.0)
 #
 # @busy: true if the bitmap is in-use by some operation (NBD or jobs)
-#        and cannot be modified via QMP or used by another operation.
-#        Replaces `locked` and `frozen` statuses. (since 4.0)
-#
-# @persistent: true if the bitmap was stored on disk, is scheduled to be stored
-#              on disk, or both. (since 4.0)
+#     and cannot be modified via QMP or used by another operation.
+#     (since 4.0)
 #
-# @inconsistent: true if this is a persistent bitmap that was improperly
-#                stored. Implies @persistent to be true; @recording and
-#                @busy to be false. This bitmap cannot be used. To remove
-#                it, use @block-dirty-bitmap-remove. (Since 4.0)
+# @persistent: true if the bitmap was stored on disk, is scheduled to
+#     be stored on disk, or both.  (since 4.0)
 #
-# Features:
-# @deprecated: Member @status is deprecated.  Use @recording and
-#              @locked instead.
+# @inconsistent: true if this is a persistent bitmap that was
+#     improperly stored.  Implies @persistent to be true; @recording
+#     and @busy to be false.  This bitmap cannot be used.  To remove
+#     it, use @block-dirty-bitmap-remove.  (Since 4.0)
 #
 # Since: 1.3
 ##
 { 'struct': 'BlockDirtyInfo',
   'data': {'*name': 'str', 'count': 'int', 'granularity': 'uint32',
            'recording': 'bool', 'busy': 'bool',
-           'status': { 'type': 'DirtyBitmapStatus',
-                       'features': [ 'deprecated' ] },
            'persistent': 'bool', '*inconsistent': 'bool' } }
 
 ##
 #
 # An enumeration of flags that a bitmap can report to the user.
 #
-# @in-use: This flag is set by any process actively modifying the qcow2 file,
-#          and cleared when the updated bitmap is flushed to the qcow2 image.
-#          The presence of this flag in an offline image means that the bitmap
-#          was not saved correctly after its last usage, and may contain
-#          inconsistent data.
+# @in-use: This flag is set by any process actively modifying the
+#     qcow2 file, and cleared when the updated bitmap is flushed to
+#     the qcow2 image.  The presence of this flag in an offline image
+#     means that the bitmap was not saved correctly after its last
+#     usage, and may contain inconsistent data.
 #
-# @auto: The bitmap must reflect all changes of the virtual disk by any
-#        application that would write to this qcow2 file.
+# @auto: The bitmap must reflect all changes of the virtual disk by
+#     any application that would write to this qcow2 file.
 #
 # Since: 4.0
 ##
 #
 # Block latency histogram.
 #
-# @boundaries: list of interval boundary values in nanoseconds, all greater
-#              than zero and in ascending order.
-#              For example, the list [10, 50, 100] produces the following
-#              histogram intervals: [0, 10), [10, 50), [50, 100), [100, +inf).
+# @boundaries: list of interval boundary values in nanoseconds, all
+#     greater than zero and in ascending order.  For example, the list
+#     [10, 50, 100] produces the following histogram intervals: [0,
+#     10), [10, 50), [50, 100), [100, +inf).
 #
-# @bins: list of io request counts corresponding to histogram intervals.
-#        len(@bins) = len(@boundaries) + 1
-#        For the example above, @bins may be something like [3, 1, 5, 2],
-#        and corresponding histogram looks like:
+# @bins: list of io request counts corresponding to histogram
+#     intervals, one more element than @boundaries has.  For the
+#     example above, @bins may be something like [3, 1, 5, 2], and
+#     corresponding histogram looks like:
 #
 # ::
 #
 ##
 # @BlockInfo:
 #
-# Block device information.  This structure describes a virtual device and
-# the backing device associated with it.
+# Block device information.  This structure describes a virtual device
+# and the backing device associated with it.
 #
 # @device: The device name associated with the virtual device.
 #
-# @qdev: The qdev ID, or if no ID is assigned, the QOM path of the block
-#        device. (since 2.10)
+# @qdev: The qdev ID, or if no ID is assigned, the QOM path of the
+#     block device.  (since 2.10)
 #
-# @type: This field is returned only for compatibility reasons, it should
-#        not be used (always returns 'unknown')
+# @type: This field is returned only for compatibility reasons, it
+#     should not be used (always returns 'unknown')
 #
 # @removable: True if the device supports removable media.
 #
-# @locked: True if the guest has locked this device from having its media
-#          removed
-#
-# @tray_open: True if the device's tray is open
-#             (only present if it has a tray)
+# @locked: True if the guest has locked this device from having its
+#     media removed
 #
-# @dirty-bitmaps: dirty bitmaps information (only present if the
-#                 driver has one or more dirty bitmaps) (Since 2.0)
+# @tray_open: True if the device's tray is open (only present if it
+#     has a tray)
 #
-# @io-status: @BlockDeviceIoStatus. Only present if the device
-#             supports it and the VM is configured to stop on errors
-#             (supported device models: virtio-blk, IDE, SCSI except
-#             scsi-generic)
+# @io-status: @BlockDeviceIoStatus.  Only present if the device
+#     supports it and the VM is configured to stop on errors
+#     (supported device models: virtio-blk, IDE, SCSI except
+#     scsi-generic)
 #
 # @inserted: @BlockDeviceInfo describing the device if media is
-#            present
-#
-# Features:
-# @deprecated: Member @dirty-bitmaps is deprecated.  Use @inserted
-#              member @dirty-bitmaps instead.
+#     present
 #
-# Since:  0.14.0
+# Since: 0.14
 ##
 { 'struct': 'BlockInfo',
   'data': {'device': 'str', '*qdev': 'str', 'type': 'str', 'removable': 'bool',
            'locked': 'bool', '*inserted': 'BlockDeviceInfo',
-           '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus',
-           '*dirty-bitmaps': { 'type': ['BlockDirtyInfo'],
-                               'features': [ 'deprecated' ] } } }
+           '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus' } }
 
 ##
 # @BlockMeasureInfo:
 #
-# Image file size calculation information.  This structure describes the size
-# requirements for creating a new image file.
+# Image file size calculation information.  This structure describes
+# the size requirements for creating a new image file.
 #
-# The size requirements depend on the new image file format.  File size always
-# equals virtual disk size for the 'raw' format, even for sparse POSIX files.
-# Compact formats such as 'qcow2' represent unallocated and zero regions
-# efficiently so file size may be smaller than virtual disk size.
+# The size requirements depend on the new image file format.  File
+# size always equals virtual disk size for the 'raw' format, even for
+# sparse POSIX files.  Compact formats such as 'qcow2' represent
+# unallocated and zero regions efficiently so file size may be smaller
+# than virtual disk size.
 #
-# The values are upper bounds that are guaranteed to fit the new image file.
-# Subsequent modification, such as internal snapshot or further bitmap
-# creation, may require additional space and is not covered here.
+# The values are upper bounds that are guaranteed to fit the new image
+# file.  Subsequent modification, such as internal snapshot or further
+# bitmap creation, may require additional space and is not covered
+# here.
 #
-# @required: Size required for a new image file, in bytes, when copying just
-#            allocated guest-visible contents.
+# @required: Size required for a new image file, in bytes, when
+#     copying just allocated guest-visible contents.
 #
-# @fully-allocated: Image file size, in bytes, once data has been written
-#                   to all sectors, when copying just guest-visible contents.
+# @fully-allocated: Image file size, in bytes, once data has been
+#     written to all sectors, when copying just guest-visible
+#     contents.
 #
-# @bitmaps: Additional size required if all the top-level bitmap metadata
-#           in the source image were to be copied to the destination,
-#           present only when source and destination both support
-#           persistent bitmaps. (since 5.1)
+# @bitmaps: Additional size required if all the top-level bitmap
+#     metadata in the source image were to be copied to the
+#     destination, present only when source and destination both
+#     support persistent bitmaps.  (since 5.1)
 #
 # Since: 2.10
 ##
 #
 # Get a list of BlockInfo for all virtual block devices.
 #
-# Returns: a list of @BlockInfo describing each virtual block device. Filter
-#          nodes that were created implicitly are skipped over.
+# Returns: a list of @BlockInfo describing each virtual block device.
+#     Filter nodes that were created implicitly are skipped over.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #          }
 #       ]
 #    }
-#
 ##
-{ 'command': 'query-block', 'returns': ['BlockInfo'] }
-
+{ 'command': 'query-block', 'returns': ['BlockInfo'],
+  'allow-preconfig': true, 'coroutine': true }
 
 ##
 # @BlockDeviceTimedStats:
 #
 # Statistics of a block device during a given interval of time.
 #
-# @interval_length: Interval used for calculating the statistics,
-#                   in seconds.
+# @interval_length: Interval used for calculating the statistics, in
+#     seconds.
 #
 # @min_rd_latency_ns: Minimum latency of read operations in the
-#                     defined interval, in nanoseconds.
+#     defined interval, in nanoseconds.
 #
 # @min_wr_latency_ns: Minimum latency of write operations in the
-#                     defined interval, in nanoseconds.
+#     defined interval, in nanoseconds.
+#
+# @min_zone_append_latency_ns: Minimum latency of zone append
+#     operations in the defined interval, in nanoseconds (since 8.1)
 #
 # @min_flush_latency_ns: Minimum latency of flush operations in the
-#                        defined interval, in nanoseconds.
+#     defined interval, in nanoseconds.
 #
 # @max_rd_latency_ns: Maximum latency of read operations in the
-#                     defined interval, in nanoseconds.
+#     defined interval, in nanoseconds.
 #
 # @max_wr_latency_ns: Maximum latency of write operations in the
-#                     defined interval, in nanoseconds.
+#     defined interval, in nanoseconds.
+#
+# @max_zone_append_latency_ns: Maximum latency of zone append
+#     operations in the defined interval, in nanoseconds (since 8.1)
 #
 # @max_flush_latency_ns: Maximum latency of flush operations in the
-#                        defined interval, in nanoseconds.
+#     defined interval, in nanoseconds.
 #
 # @avg_rd_latency_ns: Average latency of read operations in the
-#                     defined interval, in nanoseconds.
+#     defined interval, in nanoseconds.
 #
 # @avg_wr_latency_ns: Average latency of write operations in the
-#                     defined interval, in nanoseconds.
+#     defined interval, in nanoseconds.
+#
+# @avg_zone_append_latency_ns: Average latency of zone append
+#     operations in the defined interval, in nanoseconds (since 8.1)
 #
 # @avg_flush_latency_ns: Average latency of flush operations in the
-#                        defined interval, in nanoseconds.
+#     defined interval, in nanoseconds.
+#
+# @avg_rd_queue_depth: Average number of pending read operations in
+#     the defined interval.
 #
-# @avg_rd_queue_depth: Average number of pending read operations
-#                      in the defined interval.
+# @avg_wr_queue_depth: Average number of pending write operations in
+#     the defined interval.
 #
-# @avg_wr_queue_depth: Average number of pending write operations
-#                      in the defined interval.
+# @avg_zone_append_queue_depth: Average number of pending zone append
+#     operations in the defined interval (since 8.1).
 #
 # Since: 2.5
 ##
   'data': { 'interval_length': 'int', 'min_rd_latency_ns': 'int',
             'max_rd_latency_ns': 'int', 'avg_rd_latency_ns': 'int',
             'min_wr_latency_ns': 'int', 'max_wr_latency_ns': 'int',
-            'avg_wr_latency_ns': 'int', 'min_flush_latency_ns': 'int',
-            'max_flush_latency_ns': 'int', 'avg_flush_latency_ns': 'int',
-            'avg_rd_queue_depth': 'number', 'avg_wr_queue_depth': 'number' } }
+            'avg_wr_latency_ns': 'int', 'min_zone_append_latency_ns': 'int',
+            'max_zone_append_latency_ns': 'int',
+            'avg_zone_append_latency_ns': 'int',
+            'min_flush_latency_ns': 'int', 'max_flush_latency_ns': 'int',
+            'avg_flush_latency_ns': 'int', 'avg_rd_queue_depth': 'number',
+            'avg_wr_queue_depth': 'number',
+            'avg_zone_append_queue_depth': 'number'  } }
 
 ##
 # @BlockDeviceStats:
 #
 # Statistics of a virtual block device or a block backing device.
 #
-# @rd_bytes:      The number of bytes read by the device.
+# @rd_bytes: The number of bytes read by the device.
 #
-# @wr_bytes:      The number of bytes written by the device.
+# @wr_bytes: The number of bytes written by the device.
+#
+# @zone_append_bytes: The number of bytes appended by the zoned
+#     devices (since 8.1)
 #
 # @unmap_bytes: The number of bytes unmapped by the device (Since 4.2)
 #
-# @rd_operations: The number of read operations performed by the device.
+# @rd_operations: The number of read operations performed by the
+#     device.
+#
+# @wr_operations: The number of write operations performed by the
+#     device.
 #
-# @wr_operations: The number of write operations performed by the device.
+# @zone_append_operations: The number of zone append operations
+#     performed by the zoned devices (since 8.1)
 #
-# @flush_operations: The number of cache flush operations performed by the
-#                    device (since 0.15.0)
+# @flush_operations: The number of cache flush operations performed by
+#     the device (since 0.15)
 #
-# @unmap_operations: The number of unmap operations performed by the device
-#                    (Since 4.2)
+# @unmap_operations: The number of unmap operations performed by the
+#     device (Since 4.2)
 #
-# @rd_total_time_ns: Total time spent on reads in nanoseconds (since 0.15.0).
+# @rd_total_time_ns: Total time spent on reads in nanoseconds (since
+#     0.15).
 #
-# @wr_total_time_ns: Total time spent on writes in nanoseconds (since 0.15.0).
+# @wr_total_time_ns: Total time spent on writes in nanoseconds (since
+#     0.15).
 #
-# @flush_total_time_ns: Total time spent on cache flushes in nanoseconds
-#                       (since 0.15.0).
+# @zone_append_total_time_ns: Total time spent on zone append writes
+#     in nanoseconds (since 8.1)
 #
-# @unmap_total_time_ns: Total time spent on unmap operations in nanoseconds
-#                       (Since 4.2)
+# @flush_total_time_ns: Total time spent on cache flushes in
+#     nanoseconds (since 0.15).
 #
-# @wr_highest_offset: The offset after the greatest byte written to the
-#                     device.  The intended use of this information is for
-#                     growable sparse files (like qcow2) that are used on top
-#                     of a physical device.
+# @unmap_total_time_ns: Total time spent on unmap operations in
+#     nanoseconds (Since 4.2)
 #
-# @rd_merged: Number of read requests that have been merged into another
-#             request (Since 2.3).
+# @wr_highest_offset: The offset after the greatest byte written to
+#     the device.  The intended use of this information is for
+#     growable sparse files (like qcow2) that are used on top of a
+#     physical device.
 #
-# @wr_merged: Number of write requests that have been merged into another
-#             request (Since 2.3).
+# @rd_merged: Number of read requests that have been merged into
+#     another request (Since 2.3).
 #
-# @unmap_merged: Number of unmap requests that have been merged into another
-#                request (Since 4.2)
+# @wr_merged: Number of write requests that have been merged into
+#     another request (Since 2.3).
 #
-# @idle_time_ns: Time since the last I/O operation, in
-#                nanoseconds. If the field is absent it means that
-#                there haven't been any operations yet (Since 2.5).
+# @zone_append_merged: Number of zone append requests that have been
+#     merged into another request (since 8.1)
+#
+# @unmap_merged: Number of unmap requests that have been merged into
+#     another request (Since 4.2)
+#
+# @idle_time_ns: Time since the last I/O operation, in nanoseconds.
+#     If the field is absent it means that there haven't been any
+#     operations yet (Since 2.5).
 #
 # @failed_rd_operations: The number of failed read operations
-#                        performed by the device (Since 2.5)
+#     performed by the device (Since 2.5)
 #
 # @failed_wr_operations: The number of failed write operations
-#                        performed by the device (Since 2.5)
+#     performed by the device (Since 2.5)
+#
+# @failed_zone_append_operations: The number of failed zone append
+#     write operations performed by the zoned devices (since 8.1)
 #
 # @failed_flush_operations: The number of failed flush operations
-#                           performed by the device (Since 2.5)
+#     performed by the device (Since 2.5)
 #
-# @failed_unmap_operations: The number of failed unmap operations performed
-#                           by the device (Since 4.2)
+# @failed_unmap_operations: The number of failed unmap operations
+#     performed by the device (Since 4.2)
 #
 # @invalid_rd_operations: The number of invalid read operations
-#                          performed by the device (Since 2.5)
+#     performed by the device (Since 2.5)
 #
 # @invalid_wr_operations: The number of invalid write operations
-#                         performed by the device (Since 2.5)
+#     performed by the device (Since 2.5)
+#
+# @invalid_zone_append_operations: The number of invalid zone append
+#     operations performed by the zoned device (since 8.1)
 #
 # @invalid_flush_operations: The number of invalid flush operations
-#                            performed by the device (Since 2.5)
+#     performed by the device (Since 2.5)
 #
-# @invalid_unmap_operations: The number of invalid unmap operations performed
-#                            by the device (Since 4.2)
+# @invalid_unmap_operations: The number of invalid unmap operations
+#     performed by the device (Since 4.2)
 #
 # @account_invalid: Whether invalid operations are included in the
-#                   last access statistics (Since 2.5)
+#     last access statistics (Since 2.5)
 #
 # @account_failed: Whether failed operations are included in the
-#                  latency and last access statistics (Since 2.5)
+#     latency and last access statistics (Since 2.5)
 #
 # @timed_stats: Statistics specific to the set of previously defined
-#               intervals of time (Since 2.5)
+#     intervals of time (Since 2.5)
 #
-# @rd_latency_histogram: @BlockLatencyHistogramInfo. (Since 4.0)
+# @rd_latency_histogram: @BlockLatencyHistogramInfo.  (Since 4.0)
 #
-# @wr_latency_histogram: @BlockLatencyHistogramInfo. (Since 4.0)
+# @wr_latency_histogram: @BlockLatencyHistogramInfo.  (Since 4.0)
 #
-# @flush_latency_histogram: @BlockLatencyHistogramInfo. (Since 4.0)
+# @zone_append_latency_histogram: @BlockLatencyHistogramInfo.
+#     (since 8.1)
 #
-# Since: 0.14.0
+# @flush_latency_histogram: @BlockLatencyHistogramInfo.  (Since 4.0)
+#
+# Since: 0.14
 ##
 { 'struct': 'BlockDeviceStats',
-  'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'unmap_bytes' : 'int',
-           'rd_operations': 'int', 'wr_operations': 'int',
+  'data': {'rd_bytes': 'int', 'wr_bytes': 'int', 'zone_append_bytes': 'int',
+           'unmap_bytes' : 'int', 'rd_operations': 'int',
+           'wr_operations': 'int', 'zone_append_operations': 'int',
            'flush_operations': 'int', 'unmap_operations': 'int',
            'rd_total_time_ns': 'int', 'wr_total_time_ns': 'int',
-           'flush_total_time_ns': 'int', 'unmap_total_time_ns': 'int',
-           'wr_highest_offset': 'int',
-           'rd_merged': 'int', 'wr_merged': 'int', 'unmap_merged': 'int',
-           '*idle_time_ns': 'int',
+           'zone_append_total_time_ns': 'int', 'flush_total_time_ns': 'int',
+           'unmap_total_time_ns': 'int', 'wr_highest_offset': 'int',
+           'rd_merged': 'int', 'wr_merged': 'int', 'zone_append_merged': 'int',
+           'unmap_merged': 'int', '*idle_time_ns': 'int',
            'failed_rd_operations': 'int', 'failed_wr_operations': 'int',
-           'failed_flush_operations': 'int', 'failed_unmap_operations': 'int',
-           'invalid_rd_operations': 'int', 'invalid_wr_operations': 'int',
+           'failed_zone_append_operations': 'int',
+           'failed_flush_operations': 'int',
+           'failed_unmap_operations': 'int', 'invalid_rd_operations': 'int',
+           'invalid_wr_operations': 'int',
+           'invalid_zone_append_operations': 'int',
            'invalid_flush_operations': 'int', 'invalid_unmap_operations': 'int',
            'account_invalid': 'bool', 'account_failed': 'bool',
            'timed_stats': ['BlockDeviceTimedStats'],
            '*rd_latency_histogram': 'BlockLatencyHistogramInfo',
            '*wr_latency_histogram': 'BlockLatencyHistogramInfo',
+           '*zone_append_latency_histogram': 'BlockLatencyHistogramInfo',
            '*flush_latency_histogram': 'BlockLatencyHistogramInfo' } }
 
 ##
 #
 # File driver statistics
 #
-# @discard-nb-ok: The number of successful discard operations performed by
-#                 the driver.
+# @discard-nb-ok: The number of successful discard operations
+#     performed by the driver.
 #
-# @discard-nb-failed: The number of failed discard operations performed by
-#                     the driver.
+# @discard-nb-failed: The number of failed discard operations
+#     performed by the driver.
 #
 # @discard-bytes-ok: The number of bytes discarded by the driver.
 #
 #
 # @completion-errors: The number of completion errors.
 #
-# @aligned-accesses: The number of aligned accesses performed by
-#                    the driver.
+# @aligned-accesses: The number of aligned accesses performed by the
+#     driver.
 #
 # @unaligned-accesses: The number of unaligned accesses performed by
-#                      the driver.
+#     the driver.
 #
 # Since: 5.2
 ##
   'discriminator': 'driver',
   'data': {
       'file': 'BlockStatsSpecificFile',
-      'host_device': 'BlockStatsSpecificFile',
+      'host_device': { 'type': 'BlockStatsSpecificFile',
+                       'if': 'HAVE_HOST_BLOCK_DEVICE' },
       'nvme': 'BlockStatsSpecificNvme' } }
 
 ##
 # Statistics of a virtual block device or a block backing device.
 #
 # @device: If the stats are for a virtual block device, the name
-#          corresponding to the virtual block device.
+#     corresponding to the virtual block device.
 #
-# @node-name: The node name of the device. (Since 2.3)
+# @node-name: The node name of the device.  (Since 2.3)
 #
-# @qdev: The qdev ID, or if no ID is assigned, the QOM path of the block
-#        device. (since 3.0)
+# @qdev: The qdev ID, or if no ID is assigned, the QOM path of the
+#     block device.  (since 3.0)
 #
-# @stats:  A @BlockDeviceStats for the device.
+# @stats: A @BlockDeviceStats for the device.
 #
-# @driver-specific: Optional driver-specific stats. (Since 4.2)
+# @driver-specific: Optional driver-specific stats.  (Since 4.2)
 #
 # @parent: This describes the file block device if it has one.
-#          Contains recursively the statistics of the underlying
-#          protocol (e.g. the host file for a qcow2 image). If there is
-#          no underlying protocol, this field is omitted
+#     Contains recursively the statistics of the underlying protocol
+#     (e.g. the host file for a qcow2 image).  If there is no
+#     underlying protocol, this field is omitted
 #
 # @backing: This describes the backing block device if it has one.
-#           (Since 2.0)
+#     (Since 2.0)
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'BlockStats',
   'data': {'*device': 'str', '*qdev': 'str', '*node-name': 'str',
 # Query the @BlockStats for all virtual block devices.
 #
 # @query-nodes: If true, the command will query all the block nodes
-#               that have a node name, in a list which will include "parent"
-#               information, but not "backing".
-#               If false or omitted, the behavior is as before - query all the
-#               device backends, recursively including their "parent" and
-#               "backing". Filter nodes that were created implicitly are
-#               skipped over in this mode. (Since 2.3)
+#     that have a node name, in a list which will include "parent"
+#     information, but not "backing". If false or omitted, the
+#     behavior is as before - query all the device backends,
+#     recursively including their "parent" and "backing". Filter nodes
+#     that were created implicitly are skipped over in this mode.
+#     (Since 2.3)
 #
 # Returns: A list of @BlockStats for each virtual block devices.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #          }
 #       ]
 #    }
-#
 ##
 { 'command': 'query-blockstats',
   'data': { '*query-nodes': 'bool' },
-  'returns': ['BlockStats'] }
+  'returns': ['BlockStats'],
+  'allow-preconfig': true }
 
 ##
 # @BlockdevOnError:
 #
 # An enumeration of possible behaviors for errors on I/O operations.
-# The exact meaning depends on whether the I/O was initiated by a guest
-# or by a block job
+# The exact meaning depends on whether the I/O was initiated by a
+# guest or by a block job
 #
-# @report: for guest operations, report the error to the guest;
-#          for jobs, cancel the job
+# @report: for guest operations, report the error to the guest; for
+#     jobs, cancel the job
 #
 # @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
-#          or BLOCK_JOB_ERROR). The backup, mirror and commit block jobs retry
-#          the failing request later and may still complete successfully. The
-#          stream block job continues to stream and will complete with an
-#          error.
+#     or BLOCK_JOB_ERROR). The backup, mirror and commit block jobs
+#     retry the failing request later and may still complete
+#     successfully.  The stream block job continues to stream and will
+#     complete with an error.
 #
 # @enospc: same as @stop on ENOSPC, same as @report otherwise.
 #
-# @stop: for guest operations, stop the virtual machine;
-#        for jobs, pause the job
+# @stop: for guest operations, stop the virtual machine; for jobs,
+#     pause the job
 #
 # @auto: inherit the error handling policy of the backend (since: 2.7)
 #
 #
 # @none: only copy data written from now on
 #
-# @incremental: only copy data described by the dirty bitmap. (since: 2.4)
+# @incremental: only copy data described by the dirty bitmap.
+#     (since: 2.4)
 #
-# @bitmap: only copy data described by the dirty bitmap. (since: 4.2)
-#          Behavior on completion is determined by the BitmapSyncMode.
+# @bitmap: only copy data described by the dirty bitmap.  (since: 4.2)
+#     Behavior on completion is determined by the BitmapSyncMode.
 #
 # Since: 1.3
 ##
 ##
 # @BitmapSyncMode:
 #
-# An enumeration of possible behaviors for the synchronization of a bitmap
-# when used for data copy operations.
+# An enumeration of possible behaviors for the synchronization of a
+# bitmap when used for data copy operations.
 #
-# @on-success: The bitmap is only synced when the operation is successful.
-#              This is the behavior always used for 'INCREMENTAL' backups.
+# @on-success: The bitmap is only synced when the operation is
+#     successful.  This is the behavior always used for 'INCREMENTAL'
+#     backups.
 #
 # @never: The bitmap is never synchronized with the operation, and is
-#         treated solely as a read-only manifest of blocks to copy.
+#     treated solely as a read-only manifest of blocks to copy.
 #
 # @always: The bitmap is always synchronized with the operation,
-#          regardless of whether or not the operation was successful.
+#     regardless of whether or not the operation was successful.
 #
 # Since: 4.2
 ##
 # @background: copy data in background only.
 #
 # @write-blocking: when data is written to the source, write it
-#                  (synchronously) to the target as well.  In
-#                  addition, data is copied in background just like in
-#                  @background mode.
+#     (synchronously) to the target as well.  In addition, data is
+#     copied in background just like in @background mode.
 #
 # Since: 3.0
 ##
 { 'enum': 'MirrorCopyMode',
   'data': ['background', 'write-blocking'] }
 
+##
+# @BlockJobInfoMirror:
+#
+# Information specific to mirror block jobs.
+#
+# @actively-synced: Whether the source is actively synced to the
+#     target, i.e. same data and new writes are done synchronously to
+#     both.
+#
+# Since 8.2
+##
+{ 'struct': 'BlockJobInfoMirror',
+  'data': { 'actively-synced': 'bool' } }
+
 ##
 # @BlockJobInfo:
 #
 #
 # @type: the job type ('stream' for image streaming)
 #
-# @device: The job identifier. Originally the device name but other
-#          values are allowed since QEMU 2.7
+# @device: The job identifier.  Originally the device name but other
+#     values are allowed since QEMU 2.7
 #
-# @len: Estimated @offset value at the completion of the job. This value can
-#       arbitrarily change while the job is running, in both directions.
+# @len: Estimated @offset value at the completion of the job.  This
+#     value can arbitrarily change while the job is running, in both
+#     directions.
 #
-# @offset: Progress made until now. The unit is arbitrary and the value can
-#          only meaningfully be used for the ratio of @offset to @len. The
-#          value is monotonically increasing.
+# @offset: Progress made until now.  The unit is arbitrary and the
+#     value can only meaningfully be used for the ratio of @offset to
+#     @len.  The value is monotonically increasing.
 #
-# @busy: false if the job is known to be in a quiescent state, with
-#        no pending I/O.  Since 1.3.
+# @busy: false if the job is known to be in a quiescent state, with no
+#     pending I/O.  (Since 1.3)
 #
-# @paused: whether the job is paused or, if @busy is true, will
-#          pause itself as soon as possible.  Since 1.3.
+# @paused: whether the job is paused or, if @busy is true, will pause
+#     itself as soon as possible.  (Since 1.3)
 #
 # @speed: the rate limit, bytes per second
 #
 #
 # @status: Current job state/status (since 2.12)
 #
-# @auto-finalize: Job will finalize itself when PENDING, moving to
-#                 the CONCLUDED state. (since 2.12)
+# @auto-finalize: Job will finalize itself when PENDING, moving to the
+#     CONCLUDED state.  (since 2.12)
 #
-# @auto-dismiss: Job will dismiss itself when CONCLUDED, moving to the NULL
-#                state and disappearing from the query list. (since 2.12)
+# @auto-dismiss: Job will dismiss itself when CONCLUDED, moving to the
+#     NULL state and disappearing from the query list.  (since 2.12)
 #
 # @error: Error information if the job did not complete successfully.
-#         Not set if the job completed successfully. (since 2.12.1)
+#     Not set if the job completed successfully.  (since 2.12.1)
 #
 # Since: 1.1
 ##
-{ 'struct': 'BlockJobInfo',
-  'data': {'type': 'str', 'device': 'str', 'len': 'int',
+{ 'union': 'BlockJobInfo',
+  'base': {'type': 'JobType', 'device': 'str', 'len': 'int',
            'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
            'io-status': 'BlockDeviceIoStatus', 'ready': 'bool',
            'status': 'JobStatus',
            'auto-finalize': 'bool', 'auto-dismiss': 'bool',
-           '*error': 'str' } }
+           '*error': 'str' },
+  'discriminator': 'type',
+  'data': { 'mirror': 'BlockJobInfoMirror' } }
 
 ##
 # @query-block-jobs:
 #
 # Since: 1.1
 ##
-{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'] }
-
-##
-# @block_passwd:
-#
-# This command sets the password of a block device that has not been open
-# with a password and requires one.
-#
-# This command is now obsolete and will always return an error since 2.10
-#
-##
-{ 'command': 'block_passwd',
-  'data': { '*device': 'str',
-            '*node-name': 'str',
-            'password': 'str' } }
+{ 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'],
+  'allow-preconfig': true }
 
 ##
 # @block_resize:
 #
 # @node-name: graph node name to get the image resized (Since 2.0)
 #
-# @size:  new image size in bytes
+# @size: new image size in bytes
 #
-# Returns: - nothing on success
-#          - If @device is not a valid block device, DeviceNotFound
+# Returns:
+#     - nothing on success
+#     - If @device is not a valid block device, DeviceNotFound
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "block_resize",
 #      "arguments": { "device": "scratch", "size": 1073741824 } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'block_resize',
   'data': { '*device': 'str',
             '*node-name': 'str',
             'size': 'int' },
-  'coroutine': true }
+  'coroutine': true,
+  'allow-preconfig': true }
 
 ##
 # @NewImageMode:
 #
-# An enumeration that tells QEMU how to set the backing file path in
-# new image file.
+# An enumeration that tells QEMU how to set the backing file path in a
+# new image file.
 #
 # @existing: QEMU should look for an existing image file.
 #
 # @absolute-paths: QEMU should create a new image with absolute paths
-#                  for the backing file. If there is no backing file available, the new
-#                  image will not be backed either.
+#     for the backing file.  If there is no backing file available,
+#     the new image will not be backed either.
 #
 # Since: 1.1
 ##
 #
 # @device: the name of the device to take a snapshot of.
 #
-# @node-name: graph node name to generate the snapshot from (Since 2.0)
+# @node-name: graph node name to generate the snapshot from (Since
+#     2.0)
 #
-# @snapshot-file: the target of the new overlay image. If the file
-#                 exists, or if it is a device, the overlay will be created in the
-#                 existing file/device. Otherwise, a new file will be created.
+# @snapshot-file: the target of the new overlay image.  If the file
+#     exists, or if it is a device, the overlay will be created in the
+#     existing file/device.  Otherwise, a new file will be created.
 #
-# @snapshot-node-name: the graph node name of the new image (Since 2.0)
+# @snapshot-node-name: the graph node name of the new image (Since
+#     2.0)
 #
 # @format: the format of the overlay image, default is 'qcow2'.
 #
 # @mode: whether and how QEMU should create a new image, default is
-#        'absolute-paths'.
+#     'absolute-paths'.
 ##
 { 'struct': 'BlockdevSnapshotSync',
   'data': { '*device': 'str', '*node-name': 'str',
 # @node: device or node name that will have a snapshot taken.
 #
 # @overlay: reference to the existing block device that will become
-#           the overlay of @node, as part of taking the snapshot.
-#           It must not have a current backing file (this can be
-#           achieved by passing "backing": null to blockdev-add).
+#     the overlay of @node, as part of taking the snapshot.  It must
+#     not have a current backing file (this can be achieved by passing
+#     "backing": null to blockdev-add).
 #
 # Since: 2.5
 ##
 { 'struct': 'BlockdevSnapshot',
   'data': { 'node': 'str', 'overlay': 'str' } }
 
+##
+# @BackupPerf:
+#
+# Optional parameters for backup.  These parameters don't affect
+# functionality, but may significantly affect performance.
+#
+# @use-copy-range: Use copy offloading.  Default false.
+#
+# @max-workers: Maximum number of parallel requests for the sustained
+#     background copying process.  Doesn't influence copy-before-write
+#     operations.  Default 64.
+#
+# @max-chunk: Maximum request length for the sustained background
+#     copying process.  Doesn't influence copy-before-write
+#     operations.  0 means unlimited.  If max-chunk is non-zero then
+#     it should not be less than job cluster size which is calculated
+#     as maximum of target image cluster size and 64k.  Default 0.
+#
+# Since: 6.0
+##
+{ 'struct': 'BackupPerf',
+  'data': { '*use-copy-range': 'bool',
+            '*max-workers': 'int', '*max-chunk': 'int64' } }
+
 ##
 # @BackupCommon:
 #
-# @job-id: identifier for the newly-created block job. If
-#          omitted, the device name will be used. (Since 2.7)
+# @job-id: identifier for the newly-created block job.  If omitted,
+#     the device name will be used.  (Since 2.7)
 #
-# @device: the device name or node-name of a root node which should be copied.
+# @device: the device name or node-name of a root node which should be
+#     copied.
 #
-# @sync: what parts of the disk image should be copied to the destination
-#        (all the disk, only the sectors allocated in the topmost image, from a
-#        dirty bitmap, or only new I/O).
+# @sync: what parts of the disk image should be copied to the
+#     destination (all the disk, only the sectors allocated in the
+#     topmost image, from a dirty bitmap, or only new I/O).
 #
-# @speed: the maximum speed, in bytes per second. The default is 0,
-#         for unlimited.
+# @speed: the maximum speed, in bytes per second.  The default is 0,
+#     for unlimited.
 #
-# @bitmap: The name of a dirty bitmap to use.
-#          Must be present if sync is "bitmap" or "incremental".
-#          Can be present if sync is "full" or "top".
-#          Must not be present otherwise.
-#          (Since 2.4 (drive-backup), 3.1 (blockdev-backup))
+# @bitmap: The name of a dirty bitmap to use.  Must be present if sync
+#     is "bitmap" or "incremental". Can be present if sync is "full"
+#     or "top".  Must not be present otherwise.
+#     (Since 2.4 (drive-backup), 3.1 (blockdev-backup))
 #
-# @bitmap-mode: Specifies the type of data the bitmap should contain after
-#               the operation concludes.
-#               Must be present if a bitmap was provided,
-#               Must NOT be present otherwise. (Since 4.2)
+# @bitmap-mode: Specifies the type of data the bitmap should contain
+#     after the operation concludes.  Must be present if a bitmap was
+#     provided, Must NOT be present otherwise.  (Since 4.2)
 #
 # @compress: true to compress data, if the target format supports it.
-#            (default: false) (since 2.8)
+#     (default: false) (since 2.8)
 #
 # @on-source-error: the action to take on an error on the source,
-#                   default 'report'.  'stop' and 'enospc' can only be used
-#                   if the block device supports io-status (see BlockInfo).
+#     default 'report'.  'stop' and 'enospc' can only be used if the
+#     block device supports io-status (see BlockInfo).
 #
 # @on-target-error: the action to take on an error on the target,
-#                   default 'report' (no limitations, since this applies to
-#                   a different block device than @device).
-#
-# @auto-finalize: When false, this job will wait in a PENDING state after it has
-#                 finished its work, waiting for @block-job-finalize before
-#                 making any block graph changes.
-#                 When true, this job will automatically
-#                 perform its abort or commit actions.
-#                 Defaults to true. (Since 2.12)
-#
-# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
-#                has completely ceased all work, and awaits @block-job-dismiss.
-#                When true, this job will automatically disappear from the query
-#                list without user intervention.
-#                Defaults to true. (Since 2.12)
+#     default 'report' (no limitations, since this applies to a
+#     different block device than @device).
+#
+# @auto-finalize: When false, this job will wait in a PENDING state
+#     after it has finished its work, waiting for @block-job-finalize
+#     before making any block graph changes.  When true, this job will
+#     automatically perform its abort or commit actions.  Defaults to
+#     true.  (Since 2.12)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state
+#     after it has completely ceased all work, and awaits
+#     @block-job-dismiss.  When true, this job will automatically
+#     disappear from the query list without user intervention.
+#     Defaults to true.  (Since 2.12)
 #
 # @filter-node-name: the node name that should be assigned to the
-#                    filter driver that the backup job inserts into the graph
-#                    above node specified by @drive. If this option is not given,
-#                    a node name is autogenerated. (Since: 4.2)
+#     filter driver that the backup job inserts into the graph above
+#     node specified by @drive.  If this option is not given, a node
+#     name is autogenerated.  (Since: 4.2)
+#
+# @x-perf: Performance options.  (Since 6.0)
+#
+# Features:
+#
+# @unstable: Member @x-perf is experimental.
 #
 # Note: @on-source-error and @on-target-error only affect background
-#       I/O.  If an error occurs during a guest write request, the device's
-#       rerror/werror actions will be used.
+#     I/O.  If an error occurs during a guest write request, the
+#     device's rerror/werror actions will be used.
 #
 # Since: 4.2
 ##
             '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError',
             '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
-            '*filter-node-name': 'str' } }
+            '*filter-node-name': 'str',
+            '*x-perf': { 'type': 'BackupPerf',
+                         'features': [ 'unstable' ] } } }
 
 ##
 # @DriveBackup:
 #
-# @target: the target of the new image. If the file exists, or if it
-#          is a device, the existing file/device will be used as the new
-#          destination.  If it does not exist, a new file will be created.
+# @target: the target of the new image.  If the file exists, or if it
+#     is a device, the existing file/device will be used as the new
+#     destination.  If it does not exist, a new file will be created.
 #
-# @format: the format of the new destination, default is to
-#          probe if @mode is 'existing', else the format of the source
+# @format: the format of the new destination, default is to probe if
+#     @mode is 'existing', else the format of the source
 #
 # @mode: whether and how QEMU should create a new image, default is
-#        'absolute-paths'.
+#     'absolute-paths'.
 #
 # Since: 1.6
 ##
 #
 # For the arguments, see the documentation of BlockdevSnapshotSync.
 #
-# Returns: - nothing on success
-#          - If @device is not a valid block device, DeviceNotFound
+# Returns:
+#     - nothing on success
+#     - If @device is not a valid block device, DeviceNotFound
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #                     "/some/place/my-image",
 #                     "format": "qcow2" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'blockdev-snapshot-sync',
-  'data': 'BlockdevSnapshotSync' }
-
+  'data': 'BlockdevSnapshotSync',
+  'allow-preconfig': true }
 
 ##
 # @blockdev-snapshot:
 #
 # Take a snapshot, by installing 'node' as the backing image of
 # 'overlay'. Additionally, if 'node' is associated with a block
-# device, the block device changes to using 'overlay' as its new active
-# image.
+# device, the block device changes to using 'overlay' as its new
+# active image.
 #
 # For the arguments, see the documentation of BlockdevSnapshot.
 #
 # Features:
-# @allow-write-only-overlay: If present, the check whether this operation is safe
-#                            was relaxed so that it can be used to change
-#                            backing file of a destination of a blockdev-mirror.
-#                            (since 5.0)
+#
+# @allow-write-only-overlay: If present, the check whether this
+#     operation is safe was relaxed so that it can be used to change
+#     backing file of a destination of a blockdev-mirror.  (since 5.0)
 #
 # Since: 2.5
 #
 #      "arguments": { "node": "ide-hd0",
 #                     "overlay": "node1534" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'blockdev-snapshot',
   'data': 'BlockdevSnapshot',
-  'features': [ 'allow-write-only-overlay' ] }
+  'features': [ 'allow-write-only-overlay' ],
+  'allow-preconfig': true }
 
 ##
 # @change-backing-file:
 #
 # Change the backing file in the image file metadata.  This does not
 # cause QEMU to reopen the image file to reparse the backing filename
-# (it may, however, perform a reopen to change permissions from
-# r/o -> r/w -> r/o, if needed). The new backing file string is written
-# into the image file metadata, and the QEMU internal strings are
-# updated.
+# (it may, however, perform a reopen to change permissions from r/o ->
+# r/w -> r/o, if needed). The new backing file string is written into
+# the image file metadata, and the QEMU internal strings are updated.
 #
 # @image-node-name: The name of the block driver state node of the
-#                   image to modify. The "device" argument is used
-#                   to verify "image-node-name" is in the chain
-#                   described by "device".
+#     image to modify.  The "device" argument is used to verify
+#     "image-node-name" is in the chain described by "device".
 #
 # @device: The device name or node-name of the root node that owns
-#          image-node-name.
+#     image-node-name.
 #
-# @backing-file: The string to write as the backing file.  This
-#                string is not validated, so care should be taken
-#                when specifying the string or the image chain may
-#                not be able to be reopened again.
+# @backing-file: The string to write as the backing file.  This string
+#     is not validated, so care should be taken when specifying the
+#     string or the image chain may not be able to be reopened again.
 #
-# Returns: - Nothing on success
-#          - If "device" does not exist or cannot be determined, DeviceNotFound
+# Returns:
+#     - Nothing on success
+#     - If "device" does not exist or cannot be determined,
+#       DeviceNotFound
 #
 # Since: 2.1
 ##
 { 'command': 'change-backing-file',
   'data': { 'device': 'str', 'image-node-name': 'str',
-            'backing-file': 'str' } }
+            'backing-file': 'str' },
+  'allow-preconfig': true }
 
 ##
 # @block-commit:
 #
-# Live commit of data from overlay image nodes into backing nodes - i.e.,
-# writes data between 'top' and 'base' into 'base'.
+# Live commit of data from overlay image nodes into backing nodes -
+# i.e., writes data between 'top' and 'base' into 'base'.
 #
-# If top == base, that is an error.
-# If top has no overlays on top of it, or if it is in use by a writer,
-# the job will not be completed by itself.  The user needs to complete
-# the job with the block-job-complete command after getting the ready
-# event. (Since 2.0)
+# If top == base, that is an error.  If top has no overlays on top of
+# it, or if it is in use by a writer, the job will not be completed by
+# itself.  The user needs to complete the job with the
+# block-job-complete command after getting the ready event.  (Since
+# 2.0)
 #
 # If the base image is smaller than top, then the base image will be
 # resized to be the same size as top.  If top is smaller than the base
 # size to match the size of the smaller top, you can safely truncate
 # it yourself once the commit operation successfully completes.
 #
-# @job-id: identifier for the newly-created block job. If
-#          omitted, the device name will be used. (Since 2.7)
+# @job-id: identifier for the newly-created block job.  If omitted,
+#     the device name will be used.  (Since 2.7)
 #
 # @device: the device name or node-name of a root node
 #
 # @base-node: The node name of the backing image to write data into.
-#             If not specified, this is the deepest backing image.
-#             (since: 3.1)
+#     If not specified, this is the deepest backing image.
+#     (since: 3.1)
 #
-# @base: Same as @base-node, except that it is a file name rather than a node
-#        name. This must be the exact filename string that was used to open the
-#        node; other strings, even if addressing the same file, are not
-#        accepted
+# @base: Same as @base-node, except that it is a file name rather than
+#     a node name.  This must be the exact filename string that was
+#     used to open the node; other strings, even if addressing the
+#     same file, are not accepted
 #
 # @top-node: The node name of the backing image within the image chain
-#            which contains the topmost data to be committed down. If
-#            not specified, this is the active layer. (since: 3.1)
+#     which contains the topmost data to be committed down.  If not
+#     specified, this is the active layer.  (since: 3.1)
 #
-# @top: Same as @top-node, except that it is a file name rather than a node
-#       name. This must be the exact filename string that was used to open the
-#       node; other strings, even if addressing the same file, are not
-#       accepted
+# @top: Same as @top-node, except that it is a file name rather than a
+#     node name.  This must be the exact filename string that was used
+#     to open the node; other strings, even if addressing the same
+#     file, are not accepted
 #
 # @backing-file: The backing file string to write into the overlay
-#                image of 'top'.  If 'top' does not have an overlay
-#                image, or if 'top' is in use by a writer, specifying
-#                a backing file string is an error.
-#
-#                This filename is not validated.  If a pathname string
-#                is such that it cannot be resolved by QEMU, that
-#                means that subsequent QMP or HMP commands must use
-#                node-names for the image in question, as filename
-#                lookup methods will fail.
-#
-#                If not specified, QEMU will automatically determine
-#                the backing file string to use, or error out if
-#                there is no obvious choice. Care should be taken
-#                when specifying the string, to specify a valid
-#                filename or protocol.
-#                (Since 2.1)
+#     image of 'top'.  If 'top' does not have an overlay image, or if
+#     'top' is in use by a writer, specifying a backing file string is
+#     an error.
+#
+#     This filename is not validated.  If a pathname string is such
+#     that it cannot be resolved by QEMU, that means that subsequent
+#     QMP or HMP commands must use node-names for the image in
+#     question, as filename lookup methods will fail.
+#
+#     If not specified, QEMU will automatically determine the backing
+#     file string to use, or error out if there is no obvious choice.
+#     Care should be taken when specifying the string, to specify a
+#     valid filename or protocol.  (Since 2.1)
 #
 # @speed: the maximum speed, in bytes per second
 #
-# @on-error: the action to take on an error. 'ignore' means that the request
-#            should be retried. (default: report; Since: 5.0)
+# @on-error: the action to take on an error.  'ignore' means that the
+#     request should be retried.  (default: report; Since: 5.0)
 #
 # @filter-node-name: the node name that should be assigned to the
-#                    filter driver that the commit job inserts into the graph
-#                    above @top. If this option is not given, a node name is
-#                    autogenerated. (Since: 2.9)
-#
-# @auto-finalize: When false, this job will wait in a PENDING state after it has
-#                 finished its work, waiting for @block-job-finalize before
-#                 making any block graph changes.
-#                 When true, this job will automatically
-#                 perform its abort or commit actions.
-#                 Defaults to true. (Since 3.1)
-#
-# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
-#                has completely ceased all work, and awaits @block-job-dismiss.
-#                When true, this job will automatically disappear from the query
-#                list without user intervention.
-#                Defaults to true. (Since 3.1)
+#     filter driver that the commit job inserts into the graph above
+#     @top.  If this option is not given, a node name is
+#     autogenerated.  (Since: 2.9)
+#
+# @auto-finalize: When false, this job will wait in a PENDING state
+#     after it has finished its work, waiting for @block-job-finalize
+#     before making any block graph changes.  When true, this job will
+#     automatically perform its abort or commit actions.  Defaults to
+#     true.  (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state
+#     after it has completely ceased all work, and awaits
+#     @block-job-dismiss.  When true, this job will automatically
+#     disappear from the query list without user intervention.
+#     Defaults to true.  (Since 3.1)
 #
 # Features:
+#
 # @deprecated: Members @base and @top are deprecated.  Use @base-node
-#              and @top-node instead.
+#     and @top-node instead.
 #
-# Returns: - Nothing on success
-#          - If @device does not exist, DeviceNotFound
-#          - Any other error returns a GenericError.
+# Returns:
+#     - Nothing on success
+#     - If @device does not exist, DeviceNotFound
+#     - Any other error returns a GenericError.
 #
 # Since: 1.3
 #
 #      "arguments": { "device": "virtio0",
 #                     "top": "/tmp/snap1.qcow2" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'block-commit',
   'data': { '*job-id': 'str', 'device': 'str', '*base-node': 'str',
             '*backing-file': 'str', '*speed': 'int',
             '*on-error': 'BlockdevOnError',
             '*filter-node-name': 'str',
-            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' },
+  'allow-preconfig': true }
 
 ##
 # @drive-backup:
 #
-# Start a point-in-time copy of a block device to a new destination.  The
-# status of ongoing drive-backup operations can be checked with
-# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
-# The operation can be stopped before it has completed using the
-# block-job-cancel command.
+# Start a point-in-time copy of a block device to a new destination.
+# The status of ongoing drive-backup operations can be checked with
+# query-block-jobs where the BlockJobInfo.type field has the value
+# 'backup'. The operation can be stopped before it has completed using
+# the block-job-cancel command.
+#
+# Features:
+#
+# @deprecated: This command is deprecated.  Use @blockdev-backup
+#     instead.
 #
-# Returns: - nothing on success
-#          - If @device is not a valid block device, GenericError
+# Returns:
+#     - nothing on success
+#     - If @device is not a valid block device, GenericError
 #
 # Since: 1.6
 #
 #                     "sync": "full",
 #                     "target": "backup.img" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'drive-backup', 'boxed': true,
-  'data': 'DriveBackup' }
+  'data': 'DriveBackup', 'features': ['deprecated'],
+  'allow-preconfig': true }
 
 ##
 # @blockdev-backup:
 #
-# Start a point-in-time copy of a block device to a new destination.  The
-# status of ongoing blockdev-backup operations can be checked with
-# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
-# The operation can be stopped before it has completed using the
-# block-job-cancel command.
+# Start a point-in-time copy of a block device to a new destination.
+# The status of ongoing blockdev-backup operations can be checked with
+# query-block-jobs where the BlockJobInfo.type field has the value
+# 'backup'. The operation can be stopped before it has completed using
+# the block-job-cancel command.
 #
-# Returns: - nothing on success
-#          - If @device is not a valid block device, DeviceNotFound
+# Returns:
+#     - nothing on success
+#     - If @device is not a valid block device, DeviceNotFound
 #
 # Since: 2.3
 #
 # Example:
+#
 # -> { "execute": "blockdev-backup",
 #      "arguments": { "device": "src-id",
 #                     "sync": "full",
 #                     "target": "tgt-id" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'blockdev-backup', 'boxed': true,
-  'data': 'BlockdevBackup' }
-
+  'data': 'BlockdevBackup',
+  'allow-preconfig': true }
 
 ##
 # @query-named-block-nodes:
 #
 # Get the named block driver list
 #
-# @flat: Omit the nested data about backing image ("backing-image" key) if true.
-#        Default is false (Since 5.0)
+# @flat: Omit the nested data about backing image ("backing-image"
+#     key) if true.  Default is false (Since 5.0)
 #
 # Returns: the list of BlockDeviceInfo
 #
 #                    "file":"disks/test.qcow2",
 #                    "node-name": "my-node",
 #                    "backing_file_depth":1,
+#                    "detect_zeroes":"off",
 #                    "bps":1000000,
 #                    "bps_rd":0,
 #                    "bps_wr":0,
 #                           "virtual-size":2048000
 #                       }
 #                    } } ] }
-#
 ##
 { 'command': 'query-named-block-nodes',
   'returns': [ 'BlockDeviceInfo' ],
-  'data': { '*flat': 'bool' } }
+  'data': { '*flat': 'bool' },
+  'allow-preconfig': true,
+  'coroutine': true}
 
 ##
 # @XDbgBlockGraphNodeType:
 ##
 # @XDbgBlockGraphNode:
 #
-# @id: Block graph node identifier. This @id is generated only for
-#      x-debug-query-block-graph and does not relate to any other identifiers in
-#      Qemu.
+# @id: Block graph node identifier.  This @id is generated only for
+#     x-debug-query-block-graph and does not relate to any other
+#     identifiers in Qemu.
 #
-# @type: Type of graph node. Can be one of block-backend, block-job or
-#        block-driver-state.
+# @type: Type of graph node.  Can be one of block-backend, block-job
+#     or block-driver-state.
 #
-# @name: Human readable name of the node. Corresponds to node-name for
-#        block-driver-state nodes; is not guaranteed to be unique in the whole
-#        graph (with block-jobs and block-backends).
+# @name: Human readable name of the node.  Corresponds to node-name
+#     for block-driver-state nodes; is not guaranteed to be unique in
+#     the whole graph (with block-jobs and block-backends).
 #
 # Since: 4.0
 ##
 #
 # Enum of base block permissions.
 #
-# @consistent-read: A user that has the "permission" of consistent reads is
-#                   guaranteed that their view of the contents of the block
-#                   device is complete and self-consistent, representing the
-#                   contents of a disk at a specific point.
-#                   For most block devices (including their backing files) this
-#                   is true, but the property cannot be maintained in a few
-#                   situations like for intermediate nodes of a commit block
-#                   job.
+# @consistent-read: A user that has the "permission" of consistent
+#     reads is guaranteed that their view of the contents of the block
+#     device is complete and self-consistent, representing the
+#     contents of a disk at a specific point.  For most block devices
+#     (including their backing files) this is true, but the property
+#     cannot be maintained in a few situations like for intermediate
+#     nodes of a commit block job.
 #
-# @write: This permission is required to change the visible disk contents.
+# @write: This permission is required to change the visible disk
+#     contents.
 #
-# @write-unchanged: This permission (which is weaker than BLK_PERM_WRITE) is
-#                   both enough and required for writes to the block node when
-#                   the caller promises that the visible disk content doesn't
-#                   change.
-#                   As the BLK_PERM_WRITE permission is strictly stronger,
-#                   either is sufficient to perform an unchanging write.
+# @write-unchanged: This permission (which is weaker than
+#     BLK_PERM_WRITE) is both enough and required for writes to the
+#     block node when the caller promises that the visible disk
+#     content doesn't change.  As the BLK_PERM_WRITE permission is
+#     strictly stronger, either is sufficient to perform an unchanging
+#     write.
 #
-# @resize: This permission is required to change the size of a block node.
-#
-# @graph-mod: This permission is required to change the node that this
-#             BdrvChild points to.
+# @resize: This permission is required to change the size of a block
+#     node.
 #
 # Since: 4.0
 ##
 { 'enum': 'BlockPermission',
-  'data': [ 'consistent-read', 'write', 'write-unchanged', 'resize',
-            'graph-mod' ] }
+  'data': [ 'consistent-read', 'write', 'write-unchanged', 'resize' ] }
+
 ##
 # @XDbgBlockGraphEdge:
 #
 #
 # @perm: granted permissions for the parent operating on the child
 #
-# @shared-perm: permissions that can still be granted to other users of the
-#               child while it is still attached to this parent
+# @shared-perm: permissions that can still be granted to other users
+#     of the child while it is still attached to this parent
 #
 # Since: 4.0
 ##
 #
 # Get the block graph.
 #
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
 # Since: 4.0
 ##
-{ 'command': 'x-debug-query-block-graph', 'returns': 'XDbgBlockGraph' }
+{ 'command': 'x-debug-query-block-graph', 'returns': 'XDbgBlockGraph',
+  'features': [ 'unstable' ],
+  'allow-preconfig': true }
 
 ##
 # @drive-mirror:
 #
-# Start mirroring a block device's writes to a new destination. target
-# specifies the target of the new image. If the file exists, or if it
-# is a device, it will be used as the new destination for writes. If
-# it does not exist, a new file will be created. format specifies the
-# format of the mirror image, default is to probe if mode='existing',
-# else the format of the source.
+# Start mirroring a block device's writes to a new destination.
+# target specifies the target of the new image.  If the file exists,
+# or if it is a device, it will be used as the new destination for
+# writes.  If it does not exist, a new file will be created.  format
+# specifies the format of the mirror image, default is to probe if
+# mode='existing', else the format of the source.
 #
-# Returns: - nothing on success
-#          - If @device is not a valid block device, GenericError
+# Returns:
+#     - nothing on success
+#     - If @device is not a valid block device, GenericError
 #
 # Since: 1.3
 #
 #                     "sync": "full",
 #                     "format": "qcow2" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'drive-mirror', 'boxed': true,
-  'data': 'DriveMirror' }
+  'data': 'DriveMirror',
+  'allow-preconfig': true }
 
 ##
 # @DriveMirror:
 #
 # A set of parameters describing drive mirror setup.
 #
-# @job-id: identifier for the newly-created block job. If
-#          omitted, the device name will be used. (Since 2.7)
+# @job-id: identifier for the newly-created block job.  If omitted,
+#     the device name will be used.  (Since 2.7)
 #
-# @device:  the device name or node-name of a root node whose writes should be
-#           mirrored.
+# @device: the device name or node-name of a root node whose writes
+#     should be mirrored.
 #
-# @target: the target of the new image. If the file exists, or if it
-#          is a device, the existing file/device will be used as the new
-#          destination.  If it does not exist, a new file will be created.
+# @target: the target of the new image.  If the file exists, or if it
+#     is a device, the existing file/device will be used as the new
+#     destination.  If it does not exist, a new file will be created.
 #
-# @format: the format of the new destination, default is to
-#          probe if @mode is 'existing', else the format of the source
+# @format: the format of the new destination, default is to probe if
+#     @mode is 'existing', else the format of the source
 #
-# @node-name: the new block driver state node name in the graph
-#             (Since 2.1)
+# @node-name: the new block driver state node name in the graph (Since
+#     2.1)
 #
 # @replaces: with sync=full graph node name to be replaced by the new
-#            image when a whole image copy is done. This can be used to repair
-#            broken Quorum files.  By default, @device is replaced, although
-#            implicitly created filters on it are kept. (Since 2.1)
+#     image when a whole image copy is done.  This can be used to
+#     repair broken Quorum files.  By default, @device is replaced,
+#     although implicitly created filters on it are kept.  (Since 2.1)
 #
 # @mode: whether and how QEMU should create a new image, default is
-#        'absolute-paths'.
+#     'absolute-paths'.
 #
-# @speed:  the maximum speed, in bytes per second
+# @speed: the maximum speed, in bytes per second
 #
-# @sync: what parts of the disk image should be copied to the destination
-#        (all the disk, only the sectors allocated in the topmost image, or
-#        only new I/O).
+# @sync: what parts of the disk image should be copied to the
+#     destination (all the disk, only the sectors allocated in the
+#     topmost image, or only new I/O).
 #
-# @granularity: granularity of the dirty bitmap, default is 64K
-#               if the image format doesn't have clusters, 4K if the clusters
-#               are smaller than that, else the cluster size.  Must be a
-#               power of 2 between 512 and 64M (since 1.4).
+# @granularity: granularity of the dirty bitmap, default is 64K if the
+#     image format doesn't have clusters, 4K if the clusters are
+#     smaller than that, else the cluster size.  Must be a power of 2
+#     between 512 and 64M (since 1.4).
 #
-# @buf-size: maximum amount of data in flight from source to
-#            target (since 1.4).
+# @buf-size: maximum amount of data in flight from source to target
+#     (since 1.4).
 #
 # @on-source-error: the action to take on an error on the source,
-#                   default 'report'.  'stop' and 'enospc' can only be used
-#                   if the block device supports io-status (see BlockInfo).
+#     default 'report'.  'stop' and 'enospc' can only be used if the
+#     block device supports io-status (see BlockInfo).
 #
 # @on-target-error: the action to take on an error on the target,
-#                   default 'report' (no limitations, since this applies to
-#                   a different block device than @device).
-# @unmap: Whether to try to unmap target sectors where source has
-#         only zero. If true, and target unallocated sectors will read as zero,
-#         target image sectors will be unmapped; otherwise, zeroes will be
-#         written. Both will result in identical contents.
-#         Default is true. (Since 2.4)
-#
-# @copy-mode: when to copy data to the destination; defaults to 'background'
-#             (Since: 3.0)
-#
-# @auto-finalize: When false, this job will wait in a PENDING state after it has
-#                 finished its work, waiting for @block-job-finalize before
-#                 making any block graph changes.
-#                 When true, this job will automatically
-#                 perform its abort or commit actions.
-#                 Defaults to true. (Since 3.1)
-#
-# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
-#                has completely ceased all work, and awaits @block-job-dismiss.
-#                When true, this job will automatically disappear from the query
-#                list without user intervention.
-#                Defaults to true. (Since 3.1)
+#     default 'report' (no limitations, since this applies to a
+#     different block device than @device).
+#
+# @unmap: Whether to try to unmap target sectors where source has only
+#     zero.  If true, and target unallocated sectors will read as
+#     zero, target image sectors will be unmapped; otherwise, zeroes
+#     will be written.  Both will result in identical contents.
+#     Default is true.  (Since 2.4)
+#
+# @copy-mode: when to copy data to the destination; defaults to
+#     'background' (Since: 3.0)
+#
+# @auto-finalize: When false, this job will wait in a PENDING state
+#     after it has finished its work, waiting for @block-job-finalize
+#     before making any block graph changes.  When true, this job will
+#     automatically perform its abort or commit actions.  Defaults to
+#     true.  (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state
+#     after it has completely ceased all work, and awaits
+#     @block-job-dismiss.  When true, this job will automatically
+#     disappear from the query list without user intervention.
+#     Defaults to true.  (Since 3.1)
+#
 # Since: 1.3
 ##
 { 'struct': 'DriveMirror',
 # @name: name of the dirty bitmap (must be less than 1024 bytes)
 #
 # @granularity: the bitmap granularity, default is 64k for
-#               block-dirty-bitmap-add
+#     block-dirty-bitmap-add
 #
 # @persistent: the bitmap is persistent, i.e. it will be saved to the
-#              corresponding block device image file on its close. For now only
-#              Qcow2 disks support persistent bitmaps. Default is false for
-#              block-dirty-bitmap-add. (Since: 2.10)
+#     corresponding block device image file on its close.  For now
+#     only Qcow2 disks support persistent bitmaps.  Default is false
+#     for block-dirty-bitmap-add.  (Since: 2.10)
 #
-# @disabled: the bitmap is created in the disabled state, which means that
-#            it will not track drive changes. The bitmap may be enabled with
-#            block-dirty-bitmap-enable. Default is false. (Since: 4.0)
+# @disabled: the bitmap is created in the disabled state, which means
+#     that it will not track drive changes.  The bitmap may be enabled
+#     with block-dirty-bitmap-enable.  Default is false.  (Since: 4.0)
 #
 # Since: 2.4
 ##
             '*persistent': 'bool', '*disabled': 'bool' } }
 
 ##
-# @BlockDirtyBitmapMergeSource:
+# @BlockDirtyBitmapOrStr:
 #
-# @local: name of the bitmap, attached to the same node as target bitmap.
+# @local: name of the bitmap, attached to the same node as target
+#     bitmap.
 #
 # @external: bitmap with specified node
 #
 # Since: 4.1
 ##
-{ 'alternate': 'BlockDirtyBitmapMergeSource',
+{ 'alternate': 'BlockDirtyBitmapOrStr',
   'data': { 'local': 'str',
             'external': 'BlockDirtyBitmap' } }
 
 #
 # @target: name of the destination dirty bitmap
 #
-# @bitmaps: name(s) of the source dirty bitmap(s) at @node and/or fully
-#           specified BlockDirtyBitmap elements. The latter are supported
-#           since 4.1.
+# @bitmaps: name(s) of the source dirty bitmap(s) at @node and/or
+#     fully specified BlockDirtyBitmap elements.  The latter are
+#     supported since 4.1.
 #
 # Since: 4.0
 ##
 { 'struct': 'BlockDirtyBitmapMerge',
   'data': { 'node': 'str', 'target': 'str',
-            'bitmaps': ['BlockDirtyBitmapMergeSource'] } }
+            'bitmaps': ['BlockDirtyBitmapOrStr'] } }
 
 ##
 # @block-dirty-bitmap-add:
 #
-# Create a dirty bitmap with a name on the node, and start tracking the writes.
+# Create a dirty bitmap with a name on the node, and start tracking
+# the writes.
 #
-# Returns: - nothing on success
-#          - If @node is not a valid block device or node, DeviceNotFound
-#          - If @name is already taken, GenericError with an explanation
+# Returns:
+#     - nothing on success
+#     - If @node is not a valid block device or node, DeviceNotFound
+#     - If @name is already taken, GenericError with an explanation
 #
 # Since: 2.4
 #
 # -> { "execute": "block-dirty-bitmap-add",
 #      "arguments": { "node": "drive0", "name": "bitmap0" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'block-dirty-bitmap-add',
-  'data': 'BlockDirtyBitmapAdd' }
+  'data': 'BlockDirtyBitmapAdd',
+  'allow-preconfig': true }
 
 ##
 # @block-dirty-bitmap-remove:
 #
 # Stop write tracking and remove the dirty bitmap that was created
-# with block-dirty-bitmap-add. If the bitmap is persistent, remove it from its
-# storage too.
+# with block-dirty-bitmap-add.  If the bitmap is persistent, remove it
+# from its storage too.
 #
-# Returns: - nothing on success
-#          - If @node is not a valid block device or node, DeviceNotFound
-#          - If @name is not found, GenericError with an explanation
-#          - if @name is frozen by an operation, GenericError
+# Returns:
+#     - nothing on success
+#     - If @node is not a valid block device or node, DeviceNotFound
+#     - If @name is not found, GenericError with an explanation
+#     - if @name is frozen by an operation, GenericError
 #
 # Since: 2.4
 #
 # -> { "execute": "block-dirty-bitmap-remove",
 #      "arguments": { "node": "drive0", "name": "bitmap0" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'block-dirty-bitmap-remove',
-  'data': 'BlockDirtyBitmap' }
+  'data': 'BlockDirtyBitmap',
+  'allow-preconfig': true }
 
 ##
 # @block-dirty-bitmap-clear:
 # backup from this point in time forward will only backup clusters
 # modified after this clear operation.
 #
-# Returns: - nothing on success
-#          - If @node is not a valid block device, DeviceNotFound
-#          - If @name is not found, GenericError with an explanation
+# Returns:
+#     - nothing on success
+#     - If @node is not a valid block device, DeviceNotFound
+#     - If @name is not found, GenericError with an explanation
 #
 # Since: 2.4
 #
 # -> { "execute": "block-dirty-bitmap-clear",
 #      "arguments": { "node": "drive0", "name": "bitmap0" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'block-dirty-bitmap-clear',
-  'data': 'BlockDirtyBitmap' }
+  'data': 'BlockDirtyBitmap',
+  'allow-preconfig': true }
 
 ##
 # @block-dirty-bitmap-enable:
 #
 # Enables a dirty bitmap so that it will begin tracking disk changes.
 #
-# Returns: - nothing on success
-#          - If @node is not a valid block device, DeviceNotFound
-#          - If @name is not found, GenericError with an explanation
+# Returns:
+#     - nothing on success
+#     - If @node is not a valid block device, DeviceNotFound
+#     - If @name is not found, GenericError with an explanation
 #
 # Since: 4.0
 #
 # -> { "execute": "block-dirty-bitmap-enable",
 #      "arguments": { "node": "drive0", "name": "bitmap0" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'block-dirty-bitmap-enable',
-  'data': 'BlockDirtyBitmap' }
+  'data': 'BlockDirtyBitmap',
+  'allow-preconfig': true }
 
 ##
 # @block-dirty-bitmap-disable:
 #
 # Disables a dirty bitmap so that it will stop tracking disk changes.
 #
-# Returns: - nothing on success
-#          - If @node is not a valid block device, DeviceNotFound
-#          - If @name is not found, GenericError with an explanation
+# Returns:
+#     - nothing on success
+#     - If @node is not a valid block device, DeviceNotFound
+#     - If @name is not found, GenericError with an explanation
 #
 # Since: 4.0
 #
 # -> { "execute": "block-dirty-bitmap-disable",
 #      "arguments": { "node": "drive0", "name": "bitmap0" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'block-dirty-bitmap-disable',
-  'data': 'BlockDirtyBitmap' }
+  'data': 'BlockDirtyBitmap',
+  'allow-preconfig': true }
 
 ##
 # @block-dirty-bitmap-merge:
 #
 # Merge dirty bitmaps listed in @bitmaps to the @target dirty bitmap.
-# Dirty bitmaps in @bitmaps will be unchanged, except if it also appears
-# as the @target bitmap. Any bits already set in @target will still be
-# set after the merge, i.e., this operation does not clear the target.
-# On error, @target is unchanged.
-#
-# The resulting bitmap will count as dirty any clusters that were dirty in any
-# of the source bitmaps. This can be used to achieve backup checkpoints, or in
-# simpler usages, to copy bitmaps.
-#
-# Returns: - nothing on success
-#          - If @node is not a valid block device, DeviceNotFound
-#          - If any bitmap in @bitmaps or @target is not found, GenericError
-#          - If any of the bitmaps have different sizes or granularities,
-#            GenericError
+# Dirty bitmaps in @bitmaps will be unchanged, except if it also
+# appears as the @target bitmap.  Any bits already set in @target will
+# still be set after the merge, i.e., this operation does not clear
+# the target.  On error, @target is unchanged.
+#
+# The resulting bitmap will count as dirty any clusters that were
+# dirty in any of the source bitmaps.  This can be used to achieve
+# backup checkpoints, or in simpler usages, to copy bitmaps.
+#
+# Returns:
+#     - nothing on success
+#     - If @node is not a valid block device, DeviceNotFound
+#     - If any bitmap in @bitmaps or @target is not found,
+#       GenericError
+#     - If any of the bitmaps have different sizes or granularities,
+#       GenericError
 #
 # Since: 4.0
 #
 #      "arguments": { "node": "drive0", "target": "bitmap0",
 #                     "bitmaps": ["bitmap1"] } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'block-dirty-bitmap-merge',
-  'data': 'BlockDirtyBitmapMerge' }
+  'data': 'BlockDirtyBitmapMerge',
+  'allow-preconfig': true }
 
 ##
 # @BlockDirtyBitmapSha256:
 #
 # Get bitmap SHA256.
 #
-# Returns: - BlockDirtyBitmapSha256 on success
-#          - If @node is not a valid block device, DeviceNotFound
-#          - If @name is not found or if hashing has failed, GenericError with an
-#            explanation
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns:
+#     - BlockDirtyBitmapSha256 on success
+#     - If @node is not a valid block device, DeviceNotFound
+#     - If @name is not found or if hashing has failed, GenericError
+#       with an explanation
 #
 # Since: 2.10
 ##
 { 'command': 'x-debug-block-dirty-bitmap-sha256',
-  'data': 'BlockDirtyBitmap', 'returns': 'BlockDirtyBitmapSha256' }
+  'data': 'BlockDirtyBitmap', 'returns': 'BlockDirtyBitmapSha256',
+  'features': [ 'unstable' ],
+  'allow-preconfig': true }
 
 ##
 # @blockdev-mirror:
 #
 # Start mirroring a block device's writes to a new destination.
 #
-# @job-id: identifier for the newly-created block job. If
-#          omitted, the device name will be used. (Since 2.7)
+# @job-id: identifier for the newly-created block job.  If omitted,
+#     the device name will be used.  (Since 2.7)
 #
-# @device: The device name or node-name of a root node whose writes should be
-#          mirrored.
+# @device: The device name or node-name of a root node whose writes
+#     should be mirrored.
 #
-# @target: the id or node-name of the block device to mirror to. This mustn't be
-#          attached to guest.
+# @target: the id or node-name of the block device to mirror to.  This
+#     mustn't be attached to guest.
 #
 # @replaces: with sync=full graph node name to be replaced by the new
-#            image when a whole image copy is done. This can be used to repair
-#            broken Quorum files.  By default, @device is replaced, although
-#            implicitly created filters on it are kept.
+#     image when a whole image copy is done.  This can be used to
+#     repair broken Quorum files.  By default, @device is replaced,
+#     although implicitly created filters on it are kept.
 #
-# @speed:  the maximum speed, in bytes per second
+# @speed: the maximum speed, in bytes per second
 #
-# @sync: what parts of the disk image should be copied to the destination
-#        (all the disk, only the sectors allocated in the topmost image, or
-#        only new I/O).
+# @sync: what parts of the disk image should be copied to the
+#     destination (all the disk, only the sectors allocated in the
+#     topmost image, or only new I/O).
 #
-# @granularity: granularity of the dirty bitmap, default is 64K
-#               if the image format doesn't have clusters, 4K if the clusters
-#               are smaller than that, else the cluster size.  Must be a
-#               power of 2 between 512 and 64M
+# @granularity: granularity of the dirty bitmap, default is 64K if the
+#     image format doesn't have clusters, 4K if the clusters are
+#     smaller than that, else the cluster size.  Must be a power of 2
+#     between 512 and 64M
 #
-# @buf-size: maximum amount of data in flight from source to
-#            target
+# @buf-size: maximum amount of data in flight from source to target
 #
 # @on-source-error: the action to take on an error on the source,
-#                   default 'report'.  'stop' and 'enospc' can only be used
-#                   if the block device supports io-status (see BlockInfo).
+#     default 'report'.  'stop' and 'enospc' can only be used if the
+#     block device supports io-status (see BlockInfo).
 #
 # @on-target-error: the action to take on an error on the target,
-#                   default 'report' (no limitations, since this applies to
-#                   a different block device than @device).
+#     default 'report' (no limitations, since this applies to a
+#     different block device than @device).
 #
 # @filter-node-name: the node name that should be assigned to the
-#                    filter driver that the mirror job inserts into the graph
-#                    above @device. If this option is not given, a node name is
-#                    autogenerated. (Since: 2.9)
-#
-# @copy-mode: when to copy data to the destination; defaults to 'background'
-#             (Since: 3.0)
-#
-# @auto-finalize: When false, this job will wait in a PENDING state after it has
-#                 finished its work, waiting for @block-job-finalize before
-#                 making any block graph changes.
-#                 When true, this job will automatically
-#                 perform its abort or commit actions.
-#                 Defaults to true. (Since 3.1)
-#
-# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
-#                has completely ceased all work, and awaits @block-job-dismiss.
-#                When true, this job will automatically disappear from the query
-#                list without user intervention.
-#                Defaults to true. (Since 3.1)
+#     filter driver that the mirror job inserts into the graph above
+#     @device.  If this option is not given, a node name is
+#     autogenerated.  (Since: 2.9)
+#
+# @copy-mode: when to copy data to the destination; defaults to
+#     'background' (Since: 3.0)
+#
+# @auto-finalize: When false, this job will wait in a PENDING state
+#     after it has finished its work, waiting for @block-job-finalize
+#     before making any block graph changes.  When true, this job will
+#     automatically perform its abort or commit actions.  Defaults to
+#     true.  (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state
+#     after it has completely ceased all work, and awaits
+#     @block-job-dismiss.  When true, this job will automatically
+#     disappear from the query list without user intervention.
+#     Defaults to true.  (Since 3.1)
+#
 # Returns: nothing on success.
 #
 # Since: 2.6
 #                     "target": "target0",
 #                     "sync": "full" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'blockdev-mirror',
   'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
             '*on-target-error': 'BlockdevOnError',
             '*filter-node-name': 'str',
             '*copy-mode': 'MirrorCopyMode',
-            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' },
+  'allow-preconfig': true }
 
 ##
 # @BlockIOThrottle:
 #
 # @iops_wr: write I/O operations per second
 #
-# @bps_max: total throughput limit during bursts,
-#           in bytes (Since 1.7)
+# @bps_max: total throughput limit during bursts, in bytes (Since 1.7)
 #
-# @bps_rd_max: read throughput limit during bursts,
-#              in bytes (Since 1.7)
+# @bps_rd_max: read throughput limit during bursts, in bytes (Since
+#     1.7)
 #
-# @bps_wr_max: write throughput limit during bursts,
-#              in bytes (Since 1.7)
+# @bps_wr_max: write throughput limit during bursts, in bytes (Since
+#     1.7)
 #
-# @iops_max: total I/O operations per second during bursts,
-#            in bytes (Since 1.7)
+# @iops_max: total I/O operations per second during bursts, in bytes
+#     (Since 1.7)
 #
-# @iops_rd_max: read I/O operations per second during bursts,
-#               in bytes (Since 1.7)
+# @iops_rd_max: read I/O operations per second during bursts, in bytes
+#     (Since 1.7)
 #
-# @iops_wr_max: write I/O operations per second during bursts,
-#               in bytes (Since 1.7)
+# @iops_wr_max: write I/O operations per second during bursts, in
+#     bytes (Since 1.7)
 #
-# @bps_max_length: maximum length of the @bps_max burst
-#                  period, in seconds. It must only
-#                  be set if @bps_max is set as well.
-#                  Defaults to 1. (Since 2.6)
+# @bps_max_length: maximum length of the @bps_max burst period, in
+#     seconds.  It must only be set if @bps_max is set as well.
+#     Defaults to 1. (Since 2.6)
 #
-# @bps_rd_max_length: maximum length of the @bps_rd_max
-#                     burst period, in seconds. It must only
-#                     be set if @bps_rd_max is set as well.
-#                     Defaults to 1. (Since 2.6)
+# @bps_rd_max_length: maximum length of the @bps_rd_max burst period,
+#     in seconds.  It must only be set if @bps_rd_max is set as well.
+#     Defaults to 1. (Since 2.6)
 #
-# @bps_wr_max_length: maximum length of the @bps_wr_max
-#                     burst period, in seconds. It must only
-#                     be set if @bps_wr_max is set as well.
-#                     Defaults to 1. (Since 2.6)
+# @bps_wr_max_length: maximum length of the @bps_wr_max burst period,
+#     in seconds.  It must only be set if @bps_wr_max is set as well.
+#     Defaults to 1. (Since 2.6)
 #
-# @iops_max_length: maximum length of the @iops burst
-#                   period, in seconds. It must only
-#                   be set if @iops_max is set as well.
-#                   Defaults to 1. (Since 2.6)
+# @iops_max_length: maximum length of the @iops burst period, in
+#     seconds.  It must only be set if @iops_max is set as well.
+#     Defaults to 1. (Since 2.6)
 #
-# @iops_rd_max_length: maximum length of the @iops_rd_max
-#                      burst period, in seconds. It must only
-#                      be set if @iops_rd_max is set as well.
-#                      Defaults to 1. (Since 2.6)
+# @iops_rd_max_length: maximum length of the @iops_rd_max burst
+#     period, in seconds.  It must only be set if @iops_rd_max is set
+#     as well.  Defaults to 1. (Since 2.6)
 #
-# @iops_wr_max_length: maximum length of the @iops_wr_max
-#                      burst period, in seconds. It must only
-#                      be set if @iops_wr_max is set as well.
-#                      Defaults to 1. (Since 2.6)
+# @iops_wr_max_length: maximum length of the @iops_wr_max burst
+#     period, in seconds.  It must only be set if @iops_wr_max is set
+#     as well.  Defaults to 1. (Since 2.6)
 #
 # @iops_size: an I/O size in bytes (Since 1.7)
 #
 # @group: throttle group name (Since 2.4)
 #
 # Features:
+#
 # @deprecated: Member @device is deprecated.  Use @id instead.
 #
 # Since: 1.1
 ##
 # @ThrottleLimits:
 #
-# Limit parameters for throttling.
-# Since some limit combinations are illegal, limits should always be set in one
-# transaction. All fields are optional. When setting limits, if a field is
-# missing the current value is not changed.
+# Limit parameters for throttling.  Since some limit combinations are
+# illegal, limits should always be set in one transaction.  All fields
+# are optional.  When setting limits, if a field is missing the
+# current value is not changed.
 #
 # @iops-total: limit total I/O operations per second
+#
 # @iops-total-max: I/O operations burst
-# @iops-total-max-length: length of the iops-total-max burst period, in seconds
-#                         It must only be set if @iops-total-max is set as well.
+#
+# @iops-total-max-length: length of the iops-total-max burst period,
+#     in seconds It must only be set if @iops-total-max is set as
+#     well.
+#
 # @iops-read: limit read operations per second
+#
 # @iops-read-max: I/O operations read burst
-# @iops-read-max-length: length of the iops-read-max burst period, in seconds
-#                        It must only be set if @iops-read-max is set as well.
+#
+# @iops-read-max-length: length of the iops-read-max burst period, in
+#     seconds It must only be set if @iops-read-max is set as well.
+#
 # @iops-write: limit write operations per second
+#
 # @iops-write-max: I/O operations write burst
-# @iops-write-max-length: length of the iops-write-max burst period, in seconds
-#                         It must only be set if @iops-write-max is set as well.
+#
+# @iops-write-max-length: length of the iops-write-max burst period,
+#     in seconds It must only be set if @iops-write-max is set as
+#     well.
+#
 # @bps-total: limit total bytes per second
+#
 # @bps-total-max: total bytes burst
-# @bps-total-max-length: length of the bps-total-max burst period, in seconds.
-#                        It must only be set if @bps-total-max is set as well.
+#
+# @bps-total-max-length: length of the bps-total-max burst period, in
+#     seconds.  It must only be set if @bps-total-max is set as well.
+#
 # @bps-read: limit read bytes per second
+#
 # @bps-read-max: total bytes read burst
-# @bps-read-max-length: length of the bps-read-max burst period, in seconds
-#                       It must only be set if @bps-read-max is set as well.
+#
+# @bps-read-max-length: length of the bps-read-max burst period, in
+#     seconds It must only be set if @bps-read-max is set as well.
+#
 # @bps-write: limit write bytes per second
+#
 # @bps-write-max: total bytes write burst
-# @bps-write-max-length: length of the bps-write-max burst period, in seconds
-#                        It must only be set if @bps-write-max is set as well.
+#
+# @bps-write-max-length: length of the bps-write-max burst period, in
+#     seconds It must only be set if @bps-write-max is set as well.
+#
 # @iops-size: when limiting by iops max size of an I/O in bytes
 #
 # Since: 2.11
             '*bps-write-max' : 'int', '*bps-write-max-length' : 'int',
             '*iops-size' : 'int' } }
 
+##
+# @ThrottleGroupProperties:
+#
+# Properties for throttle-group objects.
+#
+# @limits: limits to apply for this throttle group
+#
+# Features:
+#
+# @unstable: All members starting with x- are aliases for the same key
+#     without x- in the @limits object.  This is not a stable
+#     interface and may be removed or changed incompatibly in the
+#     future.  Use @limits for a supported stable interface.
+#
+# Since: 2.11
+##
+{ 'struct': 'ThrottleGroupProperties',
+  'data': { '*limits': 'ThrottleLimits',
+            '*x-iops-total': { 'type': 'int',
+                               'features': [ 'unstable' ] },
+            '*x-iops-total-max': { 'type': 'int',
+                                   'features': [ 'unstable' ] },
+            '*x-iops-total-max-length': { 'type': 'int',
+                                          'features': [ 'unstable' ] },
+            '*x-iops-read': { 'type': 'int',
+                              'features': [ 'unstable' ] },
+            '*x-iops-read-max': { 'type': 'int',
+                                  'features': [ 'unstable' ] },
+            '*x-iops-read-max-length': { 'type': 'int',
+                                         'features': [ 'unstable' ] },
+            '*x-iops-write': { 'type': 'int',
+                               'features': [ 'unstable' ] },
+            '*x-iops-write-max': { 'type': 'int',
+                                   'features': [ 'unstable' ] },
+            '*x-iops-write-max-length': { 'type': 'int',
+                                          'features': [ 'unstable' ] },
+            '*x-bps-total': { 'type': 'int',
+                              'features': [ 'unstable' ] },
+            '*x-bps-total-max': { 'type': 'int',
+                                  'features': [ 'unstable' ] },
+            '*x-bps-total-max-length': { 'type': 'int',
+                                         'features': [ 'unstable' ] },
+            '*x-bps-read': { 'type': 'int',
+                             'features': [ 'unstable' ] },
+            '*x-bps-read-max': { 'type': 'int',
+                                 'features': [ 'unstable' ] },
+            '*x-bps-read-max-length': { 'type': 'int',
+                                        'features': [ 'unstable' ] },
+            '*x-bps-write': { 'type': 'int',
+                              'features': [ 'unstable' ] },
+            '*x-bps-write-max': { 'type': 'int',
+                                  'features': [ 'unstable' ] },
+            '*x-bps-write-max-length': { 'type': 'int',
+                                         'features': [ 'unstable' ] },
+            '*x-iops-size': { 'type': 'int',
+                              'features': [ 'unstable' ] } } }
+
 ##
 # @block-stream:
 #
 # Copy data from a backing file into a block device.
 #
-# The block streaming operation is performed in the background until the entire
-# backing file has been copied.  This command returns immediately once streaming
-# has started.  The status of ongoing block streaming operations can be checked
-# with query-block-jobs.  The operation can be stopped before it has completed
-# using the block-job-cancel command.
-#
-# The node that receives the data is called the top image, can be located in
-# any part of the chain (but always above the base image; see below) and can be
-# specified using its device or node name. Earlier qemu versions only allowed
-# 'device' to name the top level node; presence of the 'base-node' parameter
-# during introspection can be used as a witness of the enhanced semantics
-# of 'device'.
-#
-# If a base file is specified then sectors are not copied from that base file and
-# its backing chain.  This can be used to stream a subset of the backing file
-# chain instead of flattening the entire image.
-# When streaming completes the image file will have the base file as its backing
-# file, unless that node was changed while the job was running.  In that case,
-# base's parent's backing (or filtered, whichever exists) child (i.e., base at
-# the beginning of the job) will be the new backing file.
-#
-# On successful completion the image file is updated to drop the backing file
-# and the BLOCK_JOB_COMPLETED event is emitted.
-#
-# In case @device is a filter node, block-stream modifies the first non-filter
-# overlay node below it to point to the new backing node instead of modifying
-# @device itself.
-#
-# @job-id: identifier for the newly-created block job. If
-#          omitted, the device name will be used. (Since 2.7)
+# The block streaming operation is performed in the background until
+# the entire backing file has been copied.  This command returns
+# immediately once streaming has started.  The status of ongoing block
+# streaming operations can be checked with query-block-jobs.  The
+# operation can be stopped before it has completed using the
+# block-job-cancel command.
 #
-# @device: the device or node name of the top image
+# The node that receives the data is called the top image, can be
+# located in any part of the chain (but always above the base image;
+# see below) and can be specified using its device or node name.
+# Earlier qemu versions only allowed 'device' to name the top level
+# node; presence of the 'base-node' parameter during introspection can
+# be used as a witness of the enhanced semantics of 'device'.
 #
-# @base: the common backing file name.
-#        It cannot be set if @base-node is also set.
+# If a base file is specified then sectors are not copied from that
+# base file and its backing chain.  This can be used to stream a
+# subset of the backing file chain instead of flattening the entire
+# image.  When streaming completes the image file will have the base
+# file as its backing file, unless that node was changed while the job
+# was running.  In that case, base's parent's backing (or filtered,
+# whichever exists) child (i.e., base at the beginning of the job)
+# will be the new backing file.
 #
-# @base-node: the node name of the backing file.
-#             It cannot be set if @base is also set. (Since 2.8)
+# On successful completion the image file is updated to drop the
+# backing file and the BLOCK_JOB_COMPLETED event is emitted.
 #
-# @backing-file: The backing file string to write into the top
-#                image. This filename is not validated.
+# In case @device is a filter node, block-stream modifies the first
+# non-filter overlay node below it to point to the new backing node
+# instead of modifying @device itself.
 #
-#                If a pathname string is such that it cannot be
-#                resolved by QEMU, that means that subsequent QMP or
-#                HMP commands must use node-names for the image in
-#                question, as filename lookup methods will fail.
+# @job-id: identifier for the newly-created block job.  If omitted,
+#     the device name will be used.  (Since 2.7)
 #
-#                If not specified, QEMU will automatically determine
-#                the backing file string to use, or error out if there
-#                is no obvious choice.  Care should be taken when
-#                specifying the string, to specify a valid filename or
-#                protocol.
-#                (Since 2.1)
+# @device: the device or node name of the top image
 #
-# @speed: the maximum speed, in bytes per second
+# @base: the common backing file name.  It cannot be set if @base-node
+#     or @bottom is also set.
 #
-# @on-error: the action to take on an error (default report).
-#            'stop' and 'enospc' can only be used if the block device
-#            supports io-status (see BlockInfo).  Since 1.3.
+# @base-node: the node name of the backing file.  It cannot be set if
+#     @base or @bottom is also set.  (Since 2.8)
 #
-# @auto-finalize: When false, this job will wait in a PENDING state after it has
-#                 finished its work, waiting for @block-job-finalize before
-#                 making any block graph changes.
-#                 When true, this job will automatically
-#                 perform its abort or commit actions.
-#                 Defaults to true. (Since 3.1)
+# @bottom: the last node in the chain that should be streamed into
+#     top.  It cannot be set if @base or @base-node is also set.  It
+#     cannot be filter node.  (Since 6.0)
 #
-# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
-#                has completely ceased all work, and awaits @block-job-dismiss.
-#                When true, this job will automatically disappear from the query
-#                list without user intervention.
-#                Defaults to true. (Since 3.1)
+# @backing-file: The backing file string to write into the top image.
+#     This filename is not validated.
 #
-# Returns: - Nothing on success.
-#          - If @device does not exist, DeviceNotFound.
+#     If a pathname string is such that it cannot be resolved by QEMU,
+#     that means that subsequent QMP or HMP commands must use
+#     node-names for the image in question, as filename lookup methods
+#     will fail.
+#
+#     If not specified, QEMU will automatically determine the backing
+#     file string to use, or error out if there is no obvious choice.
+#     Care should be taken when specifying the string, to specify a
+#     valid filename or protocol.  (Since 2.1)
+#
+# @speed: the maximum speed, in bytes per second
+#
+# @on-error: the action to take on an error (default report). 'stop'
+#     and 'enospc' can only be used if the block device supports
+#     io-status (see BlockInfo).  (Since 1.3)
+#
+# @filter-node-name: the node name that should be assigned to the
+#     filter driver that the stream job inserts into the graph above
+#     @device.  If this option is not given, a node name is
+#     autogenerated.  (Since: 6.0)
+#
+# @auto-finalize: When false, this job will wait in a PENDING state
+#     after it has finished its work, waiting for @block-job-finalize
+#     before making any block graph changes.  When true, this job will
+#     automatically perform its abort or commit actions.  Defaults to
+#     true.  (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state
+#     after it has completely ceased all work, and awaits
+#     @block-job-dismiss.  When true, this job will automatically
+#     disappear from the query list without user intervention.
+#     Defaults to true.  (Since 3.1)
+#
+# Returns:
+#     - Nothing on success.
+#     - If @device does not exist, DeviceNotFound.
 #
 # Since: 1.1
 #
 #      "arguments": { "device": "virtio0",
 #                     "base": "/tmp/master.qcow2" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'block-stream',
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
-            '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
-            '*on-error': 'BlockdevOnError',
-            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
+            '*base-node': 'str', '*backing-file': 'str', '*bottom': 'str',
+            '*speed': 'int', '*on-error': 'BlockdevOnError',
+            '*filter-node-name': 'str',
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' },
+  'allow-preconfig': true }
 
 ##
 # @block-job-set-speed:
 #
 # Throttling can be disabled by setting the speed to 0.
 #
-# @device: The job identifier. This used to be a device name (hence
-#          the name of the parameter), but since QEMU 2.7 it can have
-#          other values.
+# @device: The job identifier.  This used to be a device name (hence
+#     the name of the parameter), but since QEMU 2.7 it can have other
+#     values.
 #
 # @speed: the maximum speed, in bytes per second, or 0 for unlimited.
-#         Defaults to 0.
+#     Defaults to 0.
 #
-# Returns: - Nothing on success
-#          - If no background operation is active on this device, DeviceNotActive
+# Returns:
+#     - Nothing on success
+#     - If no background operation is active on this device,
+#       DeviceNotActive
 #
 # Since: 1.1
 ##
 { 'command': 'block-job-set-speed',
-  'data': { 'device': 'str', 'speed': 'int' } }
+  'data': { 'device': 'str', 'speed': 'int' },
+  'allow-preconfig': true }
 
 ##
 # @block-job-cancel:
 #
 # Stop an active background block operation.
 #
-# This command returns immediately after marking the active background block
-# operation for cancellation.  It is an error to call this command if no
-# operation is in progress.
+# This command returns immediately after marking the active background
+# block operation for cancellation.  It is an error to call this
+# command if no operation is in progress.
 #
 # The operation will cancel as soon as possible and then emit the
-# BLOCK_JOB_CANCELLED event.  Before that happens the job is still visible when
-# enumerated using query-block-jobs.
-#
-# Note that if you issue 'block-job-cancel' after 'drive-mirror' has indicated
-# (via the event BLOCK_JOB_READY) that the source and destination are
-# synchronized, then the event triggered by this command changes to
-# BLOCK_JOB_COMPLETED, to indicate that the mirroring has ended and the
-# destination now has a point-in-time copy tied to the time of the cancellation.
-#
-# For streaming, the image file retains its backing file unless the streaming
-# operation happens to complete just as it is being cancelled.  A new streaming
-# operation can be started at a later time to finish copying all data from the
-# backing file.
-#
-# @device: The job identifier. This used to be a device name (hence
-#          the name of the parameter), but since QEMU 2.7 it can have
-#          other values.
-#
-# @force: If true, and the job has already emitted the event BLOCK_JOB_READY,
-#         abandon the job immediately (even if it is paused) instead of waiting
-#         for the destination to complete its final synchronization (since 1.3)
-#
-# Returns: - Nothing on success
-#          - If no background operation is active on this device, DeviceNotActive
+# BLOCK_JOB_CANCELLED event.  Before that happens the job is still
+# visible when enumerated using query-block-jobs.
+#
+# Note that if you issue 'block-job-cancel' after 'drive-mirror' has
+# indicated (via the event BLOCK_JOB_READY) that the source and
+# destination are synchronized, then the event triggered by this
+# command changes to BLOCK_JOB_COMPLETED, to indicate that the
+# mirroring has ended and the destination now has a point-in-time copy
+# tied to the time of the cancellation.
+#
+# For streaming, the image file retains its backing file unless the
+# streaming operation happens to complete just as it is being
+# cancelled.  A new streaming operation can be started at a later time
+# to finish copying all data from the backing file.
+#
+# @device: The job identifier.  This used to be a device name (hence
+#     the name of the parameter), but since QEMU 2.7 it can have other
+#     values.
+#
+# @force: If true, and the job has already emitted the event
+#     BLOCK_JOB_READY, abandon the job immediately (even if it is
+#     paused) instead of waiting for the destination to complete its
+#     final synchronization (since 1.3)
+#
+# Returns:
+#     - Nothing on success
+#     - If no background operation is active on this device,
+#       DeviceNotActive
 #
 # Since: 1.1
 ##
-{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
+{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' },
+  'allow-preconfig': true }
 
 ##
 # @block-job-pause:
 #
 # Pause an active background block operation.
 #
-# This command returns immediately after marking the active background block
-# operation for pausing.  It is an error to call this command if no
-# operation is in progress or if the job is already paused.
+# This command returns immediately after marking the active background
+# block operation for pausing.  It is an error to call this command if
+# no operation is in progress or if the job is already paused.
 #
-# The operation will pause as soon as possible.  No event is emitted when
-# the operation is actually paused.  Cancelling a paused job automatically
-# resumes it.
+# The operation will pause as soon as possible.  No event is emitted
+# when the operation is actually paused.  Cancelling a paused job
+# automatically resumes it.
 #
-# @device: The job identifier. This used to be a device name (hence
-#          the name of the parameter), but since QEMU 2.7 it can have
-#          other values.
+# @device: The job identifier.  This used to be a device name (hence
+#     the name of the parameter), but since QEMU 2.7 it can have other
+#     values.
 #
-# Returns: - Nothing on success
-#          - If no background operation is active on this device, DeviceNotActive
+# Returns:
+#     - Nothing on success
+#     - If no background operation is active on this device,
+#       DeviceNotActive
 #
 # Since: 1.3
 ##
-{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
+{ 'command': 'block-job-pause', 'data': { 'device': 'str' },
+  'allow-preconfig': true }
 
 ##
 # @block-job-resume:
 #
 # Resume an active background block operation.
 #
-# This command returns immediately after resuming a paused background block
-# operation.  It is an error to call this command if no operation is in
-# progress or if the job is not paused.
+# This command returns immediately after resuming a paused background
+# block operation.  It is an error to call this command if no
+# operation is in progress or if the job is not paused.
 #
 # This command also clears the error status of the job.
 #
-# @device: The job identifier. This used to be a device name (hence
-#          the name of the parameter), but since QEMU 2.7 it can have
-#          other values.
+# @device: The job identifier.  This used to be a device name (hence
+#     the name of the parameter), but since QEMU 2.7 it can have other
+#     values.
 #
-# Returns: - Nothing on success
-#          - If no background operation is active on this device, DeviceNotActive
+# Returns:
+#     - Nothing on success
+#     - If no background operation is active on this device,
+#       DeviceNotActive
 #
 # Since: 1.3
 ##
-{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
+{ 'command': 'block-job-resume', 'data': { 'device': 'str' },
+  'allow-preconfig': true }
 
 ##
 # @block-job-complete:
 #
-# Manually trigger completion of an active background block operation.  This
-# is supported for drive mirroring, where it also switches the device to
-# write to the target path only.  The ability to complete is signaled with
-# a BLOCK_JOB_READY event.
+# Manually trigger completion of an active background block operation.
+# This is supported for drive mirroring, where it also switches the
+# device to write to the target path only.  The ability to complete is
+# signaled with a BLOCK_JOB_READY event.
 #
-# This command completes an active background block operation synchronously.
-# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
-# is not defined.  Note that if an I/O error occurs during the processing of
-# this command: 1) the command itself will fail; 2) the error will be processed
-# according to the rerror/werror arguments that were specified when starting
-# the operation.
+# This command completes an active background block operation
+# synchronously.  The ordering of this command's return with the
+# BLOCK_JOB_COMPLETED event is not defined.  Note that if an I/O error
+# occurs during the processing of this command: 1) the command itself
+# will fail; 2) the error will be processed according to the
+# rerror/werror arguments that were specified when starting the
+# operation.
 #
 # A cancelled or paused job cannot be completed.
 #
-# @device: The job identifier. This used to be a device name (hence
-#          the name of the parameter), but since QEMU 2.7 it can have
-#          other values.
+# @device: The job identifier.  This used to be a device name (hence
+#     the name of the parameter), but since QEMU 2.7 it can have other
+#     values.
 #
-# Returns: - Nothing on success
-#          - If no background operation is active on this device, DeviceNotActive
+# Returns:
+#     - Nothing on success
+#     - If no background operation is active on this device,
+#       DeviceNotActive
 #
 # Since: 1.3
 ##
-{ 'command': 'block-job-complete', 'data': { 'device': 'str' } }
+{ 'command': 'block-job-complete', 'data': { 'device': 'str' },
+  'allow-preconfig': true }
 
 ##
 # @block-job-dismiss:
 #
-# For jobs that have already concluded, remove them from the block-job-query
-# list. This command only needs to be run for jobs which were started with
-# QEMU 2.12+ job lifetime management semantics.
+# For jobs that have already concluded, remove them from the
+# block-job-query list.  This command only needs to be run for jobs
+# which were started with QEMU 2.12+ job lifetime management
+# semantics.
 #
-# This command will refuse to operate on any job that has not yet reached
-# its terminal state, JOB_STATUS_CONCLUDED. For jobs that make use of the
-# BLOCK_JOB_READY event, block-job-cancel or block-job-complete will still need
-# to be used as appropriate.
+# This command will refuse to operate on any job that has not yet
+# reached its terminal state, JOB_STATUS_CONCLUDED. For jobs that make
+# use of the BLOCK_JOB_READY event, block-job-cancel or
+# block-job-complete will still need to be used as appropriate.
 #
 # @id: The job identifier.
 #
 #
 # Since: 2.12
 ##
-{ 'command': 'block-job-dismiss', 'data': { 'id': 'str' } }
+{ 'command': 'block-job-dismiss', 'data': { 'id': 'str' },
+  'allow-preconfig': true }
 
 ##
 # @block-job-finalize:
 #
 # Once a job that has manual=true reaches the pending state, it can be
-# instructed to finalize any graph changes and do any necessary cleanup
-# via this command.
-# For jobs in a transaction, instructing one job to finalize will force
-# ALL jobs in the transaction to finalize, so it is only necessary to instruct
-# a single member job to finalize.
+# instructed to finalize any graph changes and do any necessary
+# cleanup via this command.  For jobs in a transaction, instructing
+# one job to finalize will force ALL jobs in the transaction to
+# finalize, so it is only necessary to instruct a single member job to
+# finalize.
 #
 # @id: The job identifier.
 #
 #
 # Since: 2.12
 ##
-{ 'command': 'block-job-finalize', 'data': { 'id': 'str' } }
+{ 'command': 'block-job-finalize', 'data': { 'id': 'str' },
+  'allow-preconfig': true }
 
 ##
-# @BlockdevDiscardOptions:
+# @BlockJobChangeOptionsMirror:
+#
+# @copy-mode: Switch to this copy mode.  Currently, only the switch
+#     from 'background' to 'write-blocking' is implemented.
+#
+# Since: 8.2
+##
+{ 'struct': 'BlockJobChangeOptionsMirror',
+  'data': { 'copy-mode' : 'MirrorCopyMode' } }
+
+##
+# @BlockJobChangeOptions:
+#
+# Block job options that can be changed after job creation.
+#
+# @id: The job identifier
+#
+# @type: The job type
+#
+# Since 8.2
+##
+{ 'union': 'BlockJobChangeOptions',
+  'base': { 'id': 'str', 'type': 'JobType' },
+  'discriminator': 'type',
+  'data': { 'mirror': 'BlockJobChangeOptionsMirror' } }
+
+##
+# @block-job-change:
+#
+# Change the block job's options.
+#
+# Since: 8.2
+##
+{ 'command': 'block-job-change',
+  'data': 'BlockJobChangeOptions', 'boxed': true }
+
+##
+# @BlockdevDiscardOptions:
 #
 # Determines how to handle discard requests.
 #
 # @ignore: Ignore the request
+#
 # @unmap: Forward as an unmap request
 #
 # Since: 2.9
 # @BlockdevDetectZeroesOptions:
 #
 # Describes the operation mode for the automatic conversion of plain
-# zero writes by the OS to driver specific optimized zero write commands.
+# zero writes by the OS to driver specific optimized zero write
+# commands.
 #
 # @off: Disabled (default)
+#
 # @on: Enabled
-# @unmap: Enabled and even try to unmap blocks if possible. This requires
-#         also that @BlockdevDiscardOptions is set to unmap for this device.
+#
+# @unmap: Enabled and even try to unmap blocks if possible.  This
+#     requires also that @BlockdevDiscardOptions is set to unmap for
+#     this device.
 #
 # Since: 2.1
 ##
 # Selects the AIO backend to handle I/O requests
 #
 # @threads: Use qemu's thread pool
+#
 # @native: Use native AIO backend (only Linux and Windows)
+#
 # @io_uring: Use linux io_uring (since 5.0)
 #
 # Since: 2.9
 ##
 { 'enum': 'BlockdevAioOptions',
   'data': [ 'threads', 'native',
-            { 'name': 'io_uring', 'if': 'defined(CONFIG_LINUX_IO_URING)' } ] }
+            { 'name': 'io_uring', 'if': 'CONFIG_LINUX_IO_URING' } ] }
 
 ##
 # @BlockdevCacheOptions:
 # Includes cache-related options for block devices
 #
 # @direct: enables use of O_DIRECT (bypass the host page cache;
-#          default: false)
-# @no-flush: ignore any flush requests for the device (default:
-#            false)
+#     default: false)
+#
+# @no-flush: ignore any flush requests for the device (default: false)
 #
 # Since: 2.9
 ##
 # Drivers that are supported in block device operations.
 #
 # @throttle: Since 2.11
+#
 # @nvme: Since 2.12
+#
 # @copy-on-read: Since 3.0
+#
 # @blklogwrites: Since 3.0
+#
 # @blkreplay: Since 4.2
+#
 # @compress: Since 5.0
 #
+# @copy-before-write: Since 6.2
+#
+# @snapshot-access: Since 7.0
+#
 # Since: 2.9
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'blkdebug', 'blklogwrites', 'blkreplay', 'blkverify', 'bochs',
-            'cloop', 'compress', 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps',
-            'gluster', 'host_cdrom', 'host_device', 'http', 'https', 'iscsi',
-            'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
-            'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
-            { 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
-            'sheepdog',
-            'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+            'cloop', 'compress', 'copy-before-write', 'copy-on-read', 'dmg',
+            'file', 'snapshot-access', 'ftp', 'ftps', 'gluster',
+            {'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
+            {'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
+            'http', 'https',
+            { 'name': 'io_uring', 'if': 'CONFIG_BLKIO' },
+            'iscsi',
+            'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme',
+            { 'name': 'nvme-io_uring', 'if': 'CONFIG_BLKIO' },
+            'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
+            'raw', 'rbd',
+            { 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
+            'ssh', 'throttle', 'vdi', 'vhdx',
+            { 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
+            { 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
+            { 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
+            'vmdk', 'vpc', 'vvfat' ] }
 
 ##
 # @BlockdevOptionsFile:
 # Driver specific block device options for the file backend.
 #
 # @filename: path to the image file
-# @pr-manager: the id for the object that will handle persistent reservations
-#              for this device (default: none, forward the commands via SG_IO;
-#              since 2.11)
+#
+# @pr-manager: the id for the object that will handle persistent
+#     reservations for this device (default: none, forward the
+#     commands via SG_IO; since 2.11)
+#
 # @aio: AIO backend (default: threads) (since: 2.8)
-# @locking: whether to enable file locking. If set to 'auto', only enable
-#           when Open File Descriptor (OFD) locking API is available
-#           (default: auto, since 2.10)
-# @drop-cache: invalidate page cache during live migration.  This prevents
-#              stale data on the migration destination with cache.direct=off.
-#              Currently only supported on Linux hosts.
-#              (default: on, since: 4.0)
-# @x-check-cache-dropped: whether to check that page cache was dropped on live
-#                         migration.  May cause noticeable delays if the image
-#                         file is large, do not use in production.
-#                         (default: off) (since: 3.0)
+#
+# @aio-max-batch: maximum number of requests to batch together into a
+#     single submission in the AIO backend.  The smallest value
+#     between this and the aio-max-batch value of the IOThread object
+#     is chosen.  0 means that the AIO backend will handle it
+#     automatically.  (default: 0, since 6.2)
+#
+# @locking: whether to enable file locking.  If set to 'auto', only
+#     enable when Open File Descriptor (OFD) locking API is available
+#     (default: auto, since 2.10)
+#
+# @drop-cache: invalidate page cache during live migration.  This
+#     prevents stale data on the migration destination with
+#     cache.direct=off.  Currently only supported on Linux hosts.
+#     (default: on, since: 4.0)
+#
+# @x-check-cache-dropped: whether to check that page cache was dropped
+#     on live migration.  May cause noticeable delays if the image
+#     file is large, do not use in production.  (default: off)
+#     (since: 3.0)
 #
 # Features:
-# @dynamic-auto-read-only: If present, enabled auto-read-only means that the
-#                          driver will open the image read-only at first,
-#                          dynamically reopen the image file read-write when
-#                          the first writer is attached to the node and reopen
-#                          read-only when the last writer is detached. This
-#                          allows giving QEMU write permissions only on demand
-#                          when an operation actually needs write access.
+#
+# @dynamic-auto-read-only: If present, enabled auto-read-only means
+#     that the driver will open the image read-only at first,
+#     dynamically reopen the image file read-write when the first
+#     writer is attached to the node and reopen read-only when the
+#     last writer is detached.  This allows giving QEMU write
+#     permissions only on demand when an operation actually needs
+#     write access.
+#
+# @unstable: Member x-check-cache-dropped is meant for debugging.
 #
 # Since: 2.9
 ##
             '*pr-manager': 'str',
             '*locking': 'OnOffAuto',
             '*aio': 'BlockdevAioOptions',
+            '*aio-max-batch': 'int',
             '*drop-cache': {'type': 'bool',
-                            'if': 'defined(CONFIG_LINUX)'},
-            '*x-check-cache-dropped': 'bool' },
+                            'if': 'CONFIG_LINUX'},
+            '*x-check-cache-dropped': { 'type': 'bool',
+                                        'features': [ 'unstable' ] } },
   'features': [ { 'name': 'dynamic-auto-read-only',
-                  'if': 'defined(CONFIG_POSIX)' } ] }
+                  'if': 'CONFIG_POSIX' } ] }
 
 ##
 # @BlockdevOptionsNull:
 # Driver specific block device options for the null backend.
 #
 # @size: size of the device in bytes.
+#
 # @latency-ns: emulated latency (in nanoseconds) in processing
-#              requests. Default to zero which completes requests immediately.
-#              (Since 2.4)
-# @read-zeroes: if true, reads from the device produce zeroes; if false, the
-#               buffer is left unchanged. (default: false; since: 4.1)
+#     requests.  Default to zero which completes requests immediately.
+#     (Since 2.4)
+#
+# @read-zeroes: if true, reads from the device produce zeroes; if
+#     false, the buffer is left unchanged.
+#     (default: false; since: 4.1)
 #
 # Since: 2.9
 ##
 #
 # Driver specific block device options for the NVMe backend.
 #
-# @device: PCI controller address of the NVMe device in
-#          format hhhh:bb:ss.f (host:bus:slot.function)
+# @device: PCI controller address of the NVMe device in format
+#     hhhh:bb:ss.f (host:bus:slot.function)
+#
 # @namespace: namespace number of the device, starting from 1.
 #
 # Note that the PCI @device must have been unbound from any host
 # Driver specific block device options for the vvfat protocol.
 #
 # @dir: directory to be exported as FAT image
+#
 # @fat-type: FAT type: 12, 16 or 32
-# @floppy: whether to export a floppy image (true) or
-#          partitioned hard disk (false; default)
-# @label: set the volume label, limited to 11 bytes. FAT16 and
-#         FAT32 traditionally have some restrictions on labels, which are
-#         ignored by most operating systems. Defaults to "QEMU VVFAT".
-#         (since 2.4)
+#
+# @floppy: whether to export a floppy image (true) or partitioned hard
+#     disk (false; default)
+#
+# @label: set the volume label, limited to 11 bytes.  FAT16 and FAT32
+#     traditionally have some restrictions on labels, which are
+#     ignored by most operating systems.  Defaults to "QEMU VVFAT".
+#     (since 2.4)
+#
 # @rw: whether to allow write operations (default: false)
 #
 # Since: 2.9
 ##
 # @BlockdevOptionsGenericFormat:
 #
-# Driver specific block device options for image format that have no option
-# besides their data source.
+# Driver specific block device options for image format that have no
+# option besides their data source.
 #
 # @file: reference to or definition of the data source block device
 #
 #
 # Driver specific block device options for LUKS.
 #
-# @key-secret: the ID of a QCryptoSecret object providing
-#              the decryption key (since 2.6). Mandatory except when
-#              doing a metadata-only probe of the image.
+# @key-secret: the ID of a QCryptoSecret object providing the
+#     decryption key (since 2.6). Mandatory except when doing a
+#     metadata-only probe of the image.
 #
 # Since: 2.9
 ##
   'base': 'BlockdevOptionsGenericFormat',
   'data': { '*key-secret': 'str' } }
 
-
 ##
 # @BlockdevOptionsGenericCOWFormat:
 #
-# Driver specific block device options for image format that have no option
-# besides their data source and an optional backing file.
+# Driver specific block device options for image format that have no
+# option besides their data source and an optional backing file.
 #
 # @backing: reference to or definition of the backing file block
-#           device, null disables the backing file entirely.
-#           Defaults to the backing file stored the image file.
+#     device, null disables the backing file entirely.  Defaults to
+#     the backing file stored the image file.
 #
 # Since: 2.9
 ##
 #
 # @none: Do not perform any checks
 #
-# @constant: Perform only checks which can be done in constant time and
-#            without reading anything from disk
+# @constant: Perform only checks which can be done in constant time
+#     and without reading anything from disk
 #
-# @cached: Perform only checks which can be done without reading anything
-#          from disk
+# @cached: Perform only checks which can be done without reading
+#     anything from disk
 #
 # @all: Perform all available overlap checks
 #
 ##
 # @Qcow2OverlapCheckFlags:
 #
-# Structure of flags for each metadata structure. Setting a field to 'true'
-# makes qemu guard that structure against unintended overwriting. The default
-# value is chosen according to the template given.
+# Structure of flags for each metadata structure.  Setting a field to
+# 'true' makes qemu guard that structure against unintended
+# overwriting.  The default value is chosen according to the template
+# given.
 #
-# @template: Specifies a template mode which can be adjusted using the other
-#            flags, defaults to 'cached'
+# @template: Specifies a template mode which can be adjusted using the
+#     other flags, defaults to 'cached'
 #
 # @bitmap-directory: since 3.0
 #
 ##
 # @Qcow2OverlapChecks:
 #
-# Specifies which metadata structures should be guarded against unintended
-# overwriting.
+# Specifies which metadata structures should be guarded against
+# unintended overwriting.
 #
-# @flags: set of flags for separate specification of each metadata structure
-#         type
+# @flags: set of flags for separate specification of each metadata
+#     structure type
 #
 # @mode: named mode which chooses a specific set of flags
 #
 #
 # Driver specific block device options for qcow.
 #
-# @encrypt: Image decryption options. Mandatory for
-#           encrypted images, except when doing a metadata-only
-#           probe of the image.
+# @encrypt: Image decryption options.  Mandatory for encrypted images,
+#     except when doing a metadata-only probe of the image.
 #
 # Since: 2.10
 ##
   'base': 'BlockdevOptionsGenericCOWFormat',
   'data': { '*encrypt': 'BlockdevQcowEncryption' } }
 
-
-
 ##
 # @BlockdevQcow2EncryptionFormat:
+#
 # @aes: AES-CBC with plain64 initialization vectors
 #
 # Since: 2.10
   'data': { 'aes': 'QCryptoBlockOptionsQCow',
             'luks': 'QCryptoBlockOptionsLUKS'} }
 
+##
+# @BlockdevOptionsPreallocate:
+#
+# Filter driver intended to be inserted between format and protocol
+# node and do preallocation in protocol node on write.
+#
+# @prealloc-align: on preallocation, align file length to this number,
+#     default 1048576 (1M)
+#
+# @prealloc-size: how much to preallocate, default 134217728 (128M)
+#
+# Since: 6.0
+##
+{ 'struct': 'BlockdevOptionsPreallocate',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*prealloc-align': 'int', '*prealloc-size': 'int' } }
+
 ##
 # @BlockdevOptionsQcow2:
 #
 # Driver specific block device options for qcow2.
 #
-# @lazy-refcounts: whether to enable the lazy refcounts
-#                  feature (default is taken from the image file)
+# @lazy-refcounts: whether to enable the lazy refcounts feature
+#     (default is taken from the image file)
 #
-# @pass-discard-request: whether discard requests to the qcow2
-#                        device should be forwarded to the data source
+# @pass-discard-request: whether discard requests to the qcow2 device
+#     should be forwarded to the data source
 #
 # @pass-discard-snapshot: whether discard requests for the data source
-#                         should be issued when a snapshot operation (e.g.
-#                         deleting a snapshot) frees clusters in the qcow2 file
+#     should be issued when a snapshot operation (e.g. deleting a
+#     snapshot) frees clusters in the qcow2 file
 #
 # @pass-discard-other: whether discard requests for the data source
-#                      should be issued on other occasions where a cluster
-#                      gets freed
-#
-# @overlap-check: which overlap checks to perform for writes
-#                 to the image, defaults to 'cached' (since 2.2)
-#
-# @cache-size: the maximum total size of the L2 table and
-#              refcount block caches in bytes (since 2.2)
-#
-# @l2-cache-size: the maximum size of the L2 table cache in
-#                 bytes (since 2.2)
+#     should be issued on other occasions where a cluster gets freed
+#
+# @discard-no-unref: when enabled, data clusters will remain
+#     preallocated when they are no longer used, e.g. because they are
+#     discarded or converted to zero clusters.  As usual, whether the
+#     old data is discarded or kept on the protocol level (i.e. in the
+#     image file) depends on the setting of the pass-discard-request
+#     option.  Keeping the clusters preallocated prevents qcow2
+#     fragmentation that would otherwise be caused by freeing and
+#     re-allocating them later.  Besides potential performance
+#     degradation, such fragmentation can lead to increased allocation
+#     of clusters past the end of the image file, resulting in image
+#     files whose file length can grow much larger than their guest disk
+#     size would suggest.  If image file length is of concern (e.g. when
+#     storing qcow2 images directly on block devices), you should
+#     consider enabling this option.  (since 8.1)
+#
+# @overlap-check: which overlap checks to perform for writes to the
+#     image, defaults to 'cached' (since 2.2)
+#
+# @cache-size: the maximum total size of the L2 table and refcount
+#     block caches in bytes (since 2.2)
+#
+# @l2-cache-size: the maximum size of the L2 table cache in bytes
+#     (since 2.2)
 #
 # @l2-cache-entry-size: the size of each entry in the L2 cache in
-#                       bytes. It must be a power of two between 512
-#                       and the cluster size. The default value is
-#                       the cluster size (since 2.12)
+#     bytes.  It must be a power of two between 512 and the cluster
+#     size.  The default value is the cluster size (since 2.12)
 #
 # @refcount-cache-size: the maximum size of the refcount block cache
-#                       in bytes (since 2.2)
+#     in bytes (since 2.2)
 #
 # @cache-clean-interval: clean unused entries in the L2 and refcount
-#                        caches. The interval is in seconds. The default value
-#                        is 600 on supporting platforms, and 0 on other
-#                        platforms. 0 disables this feature. (since 2.5)
+#     caches.  The interval is in seconds.  The default value is 600
+#     on supporting platforms, and 0 on other platforms.  0 disables
+#     this feature.  (since 2.5)
 #
-# @encrypt: Image decryption options. Mandatory for
-#           encrypted images, except when doing a metadata-only
-#           probe of the image. (since 2.10)
+# @encrypt: Image decryption options.  Mandatory for encrypted images,
+#     except when doing a metadata-only probe of the image.  (since
+#     2.10)
 #
 # @data-file: reference to or definition of the external data file.
-#             This may only be specified for images that require an
-#             external data file. If it is not specified for such
-#             an image, the data file name is loaded from the image
-#             file. (since 4.0)
+#     This may only be specified for images that require an external
+#     data file.  If it is not specified for such an image, the data
+#     file name is loaded from the image file.  (since 4.0)
 #
 # Since: 2.9
 ##
             '*pass-discard-request': 'bool',
             '*pass-discard-snapshot': 'bool',
             '*pass-discard-other': 'bool',
+            '*discard-no-unref': 'bool',
             '*overlap-check': 'Qcow2OverlapChecks',
             '*cache-size': 'int',
             '*l2-cache-size': 'int',
 # @SshHostKeyCheckMode:
 #
 # @none: Don't check the host key at all
+#
 # @hash: Compare the host key with a given hash
+#
 # @known_hosts: Check the host key against the known_hosts file
 #
 # Since: 2.12
 # @SshHostKeyCheckHashType:
 #
 # @md5: The given hash is an md5 hash
+#
 # @sha1: The given hash is an sha1 hash
 #
+# @sha256: The given hash is an sha256 hash
+#
 # Since: 2.12
 ##
 { 'enum': 'SshHostKeyCheckHashType',
-  'data': [ 'md5', 'sha1' ] }
+  'data': [ 'md5', 'sha1', 'sha256' ] }
 
 ##
 # @SshHostKeyHash:
 #
 # @type: The hash algorithm used for the hash
+#
 # @hash: The expected hash value
 #
 # Since: 2.12
 ##
 # @BlockdevOptionsSsh:
 #
-# @server:              host address
+# @server: host address
 #
-# @path:                path to the image on the host
+# @path: path to the image on the host
 #
-# @user:                user as which to connect, defaults to current
-#                       local user name
+# @user: user as which to connect, defaults to current local user name
 #
-# @host-key-check:      Defines how and what to check the host key against
-#                       (default: known_hosts)
+# @host-key-check: Defines how and what to check the host key against
+#     (default: known_hosts)
 #
 # Since: 2.9
 ##
             '*user': 'str',
             '*host-key-check': 'SshHostKeyCheck' } }
 
-
 ##
 # @BlkdebugEvent:
 #
 # Trigger events supported by blkdebug.
 #
 # @l1_shrink_write_table: write zeros to the l1 table to shrink image.
-#                         (since 2.11)
+#     (since 2.11)
 #
-# @l1_shrink_free_l2_clusters: discard the l2 tables. (since 2.11)
+# @l1_shrink_free_l2_clusters: discard the l2 tables.  (since 2.11)
 #
 # @cor_write: a write due to copy-on-read (since 2.11)
 #
-# @cluster_alloc_space: an allocation of file space for a cluster (since 4.1)
+# @cluster_alloc_space: an allocation of file space for a cluster
+#     (since 4.1)
 #
 # @none: triggers once at creation of the blkdebug node (since 4.1)
 #
 #
 # @event: trigger event
 #
-# @state: the state identifier blkdebug needs to be in to
-#         actually trigger the event; defaults to "any"
+# @state: the state identifier blkdebug needs to be in to actually
+#     trigger the event; defaults to "any"
 #
-# @iotype: the type of I/O operations on which this error should
-#          be injected; defaults to "all read, write,
-#          write-zeroes, discard, and flush operations"
-#          (since: 4.1)
+# @iotype: the type of I/O operations on which this error should be
+#     injected; defaults to "all read, write, write-zeroes, discard,
+#     and flush operations" (since: 4.1)
 #
-# @errno: error identifier (errno) to be returned; defaults to
-#         EIO
+# @errno: error identifier (errno) to be returned; defaults to EIO
 #
-# @sector: specifies the sector index which has to be affected
-#          in order to actually trigger the event; defaults to "any
-#          sector"
+# @sector: specifies the sector index which has to be affected in
+#     order to actually trigger the event; defaults to "any sector"
 #
-# @once: disables further events after this one has been
-#        triggered; defaults to false
+# @once: disables further events after this one has been triggered;
+#     defaults to false
 #
 # @immediately: fail immediately; defaults to false
 #
 # @event: trigger event
 #
 # @state: the current state identifier blkdebug needs to be in;
-#         defaults to "any"
+#     defaults to "any"
 #
 # @new_state: the state identifier blkdebug is supposed to assume if
-#             this event is triggered
+#     this event is triggered
 #
 # Since: 2.9
 ##
 #
 # @config: filename of the configuration file
 #
-# @align: required alignment for requests in bytes, must be
-#         positive power of 2, or 0 for default
+# @align: required alignment for requests in bytes, must be positive
+#     power of 2, or 0 for default
 #
 # @max-transfer: maximum size for I/O transfers in bytes, must be
-#                positive multiple of @align and of the underlying
-#                file's request alignment (but need not be a power of
-#                2), or 0 for default (since 2.10)
+#     positive multiple of @align and of the underlying file's request
+#     alignment (but need not be a power of 2), or 0 for default
+#     (since 2.10)
 #
-# @opt-write-zero: preferred alignment for write zero requests in bytes,
-#                  must be positive multiple of @align and of the
-#                  underlying file's request alignment (but need not be a
-#                  power of 2), or 0 for default (since 2.10)
+# @opt-write-zero: preferred alignment for write zero requests in
+#     bytes, must be positive multiple of @align and of the underlying
+#     file's request alignment (but need not be a power of 2), or 0
+#     for default (since 2.10)
 #
-# @max-write-zero: maximum size for write zero requests in bytes, must be
-#                  positive multiple of @align, of @opt-write-zero, and of
-#                  the underlying file's request alignment (but need not
-#                  be a power of 2), or 0 for default (since 2.10)
+# @max-write-zero: maximum size for write zero requests in bytes, must
+#     be positive multiple of @align, of @opt-write-zero, and of the
+#     underlying file's request alignment (but need not be a power of
+#     2), or 0 for default (since 2.10)
 #
-# @opt-discard: preferred alignment for discard requests in bytes, must
-#               be positive multiple of @align and of the underlying
-#               file's request alignment (but need not be a power of
-#               2), or 0 for default (since 2.10)
+# @opt-discard: preferred alignment for discard requests in bytes,
+#     must be positive multiple of @align and of the underlying file's
+#     request alignment (but need not be a power of 2), or 0 for
+#     default (since 2.10)
 #
 # @max-discard: maximum size for discard requests in bytes, must be
-#               positive multiple of @align, of @opt-discard, and of
-#               the underlying file's request alignment (but need not
-#               be a power of 2), or 0 for default (since 2.10)
+#     positive multiple of @align, of @opt-discard, and of the
+#     underlying file's request alignment (but need not be a power of
+#     2), or 0 for default (since 2.10)
 #
 # @inject-error: array of error injection descriptions
 #
 # @set-state: array of state-change descriptions
 #
 # @take-child-perms: Permissions to take on @image in addition to what
-#                    is necessary anyway (which depends on how the
-#                    blkdebug node is used).  Defaults to none.
-#                    (since 5.0)
+#     is necessary anyway (which depends on how the blkdebug node is
+#     used).  Defaults to none.  (since 5.0)
 #
 # @unshare-child-perms: Permissions not to share on @image in addition
-#                       to what cannot be shared anyway (which depends
-#                       on how the blkdebug node is used).  Defaults
-#                       to none.  (since 5.0)
+#     to what cannot be shared anyway (which depends on how the
+#     blkdebug node is used).  Defaults to none.  (since 5.0)
 #
 # Since: 2.9
 ##
 #
 # @log: block device used to log writes to @file
 #
-# @log-sector-size: sector size used in logging writes to @file, determines
-#                   granularity of offsets and sizes of writes (default: 512)
+# @log-sector-size: sector size used in logging writes to @file,
+#     determines granularity of offsets and sizes of writes
+#     (default: 512)
 #
 # @log-append: append to an existing log (default: false)
 #
-# @log-super-update-interval: interval of write requests after which the log
-#                             super block is updated to disk (default: 4096)
+# @log-super-update-interval: interval of write requests after which
+#     the log super block is updated to disk (default: 4096)
 #
 # Since: 3.0
 ##
 #
 # Driver specific block device options for Quorum
 #
-# @blkverify: true if the driver must print content mismatch
-#                  set to false by default
+# @blkverify: true if the driver must print content mismatch set to
+#     false by default
 #
 # @children: the children block devices to use
 #
 # @vote-threshold: the vote limit under which a read will fail
 #
 # @rewrite-corrupted: rewrite corrupted data when quorum is reached
-#                     (Since 2.1)
+#     (Since 2.1)
 #
 # @read-pattern: choose read pattern and set to quorum by default
-#                (Since 2.2)
+#     (Since 2.2)
 #
 # Since: 2.9
 ##
 #
 # @server: gluster servers description
 #
-# @debug: libgfapi log level (default '4' which is Error)
-#         (Since 2.8)
+# @debug: libgfapi log level (default '4' which is Error) (Since 2.8)
 #
 # @logfile: libgfapi log file (default /dev/stderr) (Since 2.8)
 #
             '*debug': 'int',
             '*logfile': 'str' } }
 
+##
+# @BlockdevOptionsIoUring:
+#
+# Driver specific block device options for the io_uring backend.
+#
+# @filename: path to the image file
+#
+# Since: 7.2
+##
+{ 'struct': 'BlockdevOptionsIoUring',
+  'data': { 'filename': 'str' },
+  'if': 'CONFIG_BLKIO' }
+
+##
+# @BlockdevOptionsNvmeIoUring:
+#
+# Driver specific block device options for the nvme-io_uring backend.
+#
+# @path: path to the NVMe namespace's character device (e.g.
+#     /dev/ng0n1).
+#
+# Since: 7.2
+##
+{ 'struct': 'BlockdevOptionsNvmeIoUring',
+  'data': { 'path': 'str' },
+  'if': 'CONFIG_BLKIO' }
+
+##
+# @BlockdevOptionsVirtioBlkVfioPci:
+#
+# Driver specific block device options for the virtio-blk-vfio-pci
+# backend.
+#
+# @path: path to the PCI device's sysfs directory (e.g.
+#     /sys/bus/pci/devices/0000:00:01.0).
+#
+# Since: 7.2
+##
+{ 'struct': 'BlockdevOptionsVirtioBlkVfioPci',
+  'data': { 'path': 'str' },
+  'if': 'CONFIG_BLKIO' }
+
+##
+# @BlockdevOptionsVirtioBlkVhostUser:
+#
+# Driver specific block device options for the virtio-blk-vhost-user
+# backend.
+#
+# @path: path to the vhost-user UNIX domain socket.
+#
+# Since: 7.2
+##
+{ 'struct': 'BlockdevOptionsVirtioBlkVhostUser',
+  'data': { 'path': 'str' },
+  'if': 'CONFIG_BLKIO' }
+
+##
+# @BlockdevOptionsVirtioBlkVhostVdpa:
+#
+# Driver specific block device options for the virtio-blk-vhost-vdpa
+# backend.
+#
+# @path: path to the vhost-vdpa character device.
+#
+# Features:
+# @fdset: Member @path supports the special "/dev/fdset/N" path
+#     (since 8.1)
+#
+# Since: 7.2
+##
+{ 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
+  'data': { 'path': 'str' },
+  'features': [ { 'name' :'fdset',
+                  'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ],
+  'if': 'CONFIG_BLKIO' }
+
 ##
 # @IscsiTransport:
 #
 #
 # @target: The target iqn name
 #
-# @lun: LUN to connect to. Defaults to 0.
+# @lun: LUN to connect to.  Defaults to 0.
 #
-# @user: User name to log in with. If omitted, no CHAP
-#        authentication is performed.
+# @user: User name to log in with.  If omitted, no CHAP authentication
+#     is performed.
 #
-# @password-secret: The ID of a QCryptoSecret object providing
-#                   the password for the login. This option is required if
-#                   @user is specified.
+# @password-secret: The ID of a QCryptoSecret object providing the
+#     password for the login.  This option is required if @user is
+#     specified.
 #
-# @initiator-name: The iqn name we want to identify to the target
-#                  as. If this option is not specified, an initiator name is
-#                  generated automatically.
+# @initiator-name: The iqn name we want to identify to the target as.
+#     If this option is not specified, an initiator name is generated
+#     automatically.
 #
-# @header-digest: The desired header digest. Defaults to
-#                 none-crc32c.
+# @header-digest: The desired header digest.  Defaults to none-crc32c.
 #
-# @timeout: Timeout in seconds after which a request will
-#           timeout. 0 means no timeout and is the default.
+# @timeout: Timeout in seconds after which a request will timeout.  0
+#     means no timeout and is the default.
 #
 # Driver specific block device options for iscsi
 #
             '*header-digest': 'IscsiHeaderDigest',
             '*timeout': 'int' } }
 
-
 ##
 # @RbdAuthMode:
 #
 { 'enum': 'RbdAuthMode',
   'data': [ 'cephx', 'none' ] }
 
+##
+# @RbdImageEncryptionFormat:
+#
+# @luks-any: Used for opening either luks or luks2 (Since 8.0)
+#
+# Since: 6.1
+##
+{ 'enum': 'RbdImageEncryptionFormat',
+  'data': [ 'luks', 'luks2', 'luks-any' ] }
+
+##
+# @RbdEncryptionOptionsLUKSBase:
+#
+# @key-secret: ID of a QCryptoSecret object providing a passphrase for
+#     unlocking the encryption
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionOptionsLUKSBase',
+  'data': { 'key-secret': 'str' } }
+
+##
+# @RbdEncryptionCreateOptionsLUKSBase:
+#
+# @cipher-alg: The encryption algorithm
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionCreateOptionsLUKSBase',
+  'base': 'RbdEncryptionOptionsLUKSBase',
+  'data': { '*cipher-alg': 'QCryptoCipherAlgorithm' } }
+
+##
+# @RbdEncryptionOptionsLUKS:
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionOptionsLUKS',
+  'base': 'RbdEncryptionOptionsLUKSBase',
+  'data': { } }
+
+##
+# @RbdEncryptionOptionsLUKS2:
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionOptionsLUKS2',
+  'base': 'RbdEncryptionOptionsLUKSBase',
+  'data': { } }
+
+##
+# @RbdEncryptionOptionsLUKSAny:
+#
+# Since: 8.0
+##
+{ 'struct': 'RbdEncryptionOptionsLUKSAny',
+  'base': 'RbdEncryptionOptionsLUKSBase',
+  'data': { } }
+
+##
+# @RbdEncryptionCreateOptionsLUKS:
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionCreateOptionsLUKS',
+  'base': 'RbdEncryptionCreateOptionsLUKSBase',
+  'data': { } }
+
+##
+# @RbdEncryptionCreateOptionsLUKS2:
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionCreateOptionsLUKS2',
+  'base': 'RbdEncryptionCreateOptionsLUKSBase',
+  'data': { } }
+
+##
+# @RbdEncryptionOptions:
+#
+# @format: Encryption format.
+#
+# @parent: Parent image encryption options (for cloned images).  Can
+#     be left unspecified if this cloned image is encrypted using the
+#     same format and secret as its parent image (i.e. not explicitly
+#     formatted) or if its parent image is not encrypted.  (Since 8.0)
+#
+# Since: 6.1
+##
+{ 'union': 'RbdEncryptionOptions',
+  'base': { 'format': 'RbdImageEncryptionFormat',
+            '*parent': 'RbdEncryptionOptions' },
+  'discriminator': 'format',
+  'data': { 'luks': 'RbdEncryptionOptionsLUKS',
+            'luks2': 'RbdEncryptionOptionsLUKS2',
+            'luks-any': 'RbdEncryptionOptionsLUKSAny'} }
+
+##
+# @RbdEncryptionCreateOptions:
+#
+# Since: 6.1
+##
+{ 'union': 'RbdEncryptionCreateOptions',
+  'base': { 'format': 'RbdImageEncryptionFormat' },
+  'discriminator': 'format',
+  'data': { 'luks': 'RbdEncryptionCreateOptionsLUKS',
+            'luks2': 'RbdEncryptionCreateOptionsLUKS2' } }
+
 ##
 # @BlockdevOptionsRbd:
 #
 # @pool: Ceph pool name.
 #
-# @namespace: Rados namespace name in the Ceph pool. (Since 5.0)
+# @namespace: Rados namespace name in the Ceph pool.  (Since 5.0)
 #
 # @image: Image name in the Ceph pool.
 #
-# @conf: path to Ceph configuration file.  Values
-#        in the configuration file will be overridden by
-#        options specified via QAPI.
+# @conf: path to Ceph configuration file.  Values in the configuration
+#     file will be overridden by options specified via QAPI.
 #
 # @snapshot: Ceph snapshot name.
 #
+# @encrypt: Image encryption options.  (Since 6.1)
+#
 # @user: Ceph id name.
 #
-# @auth-client-required: Acceptable authentication modes.
-#                        This maps to Ceph configuration option
-#                        "auth_client_required".  (Since 3.0)
+# @auth-client-required: Acceptable authentication modes.  This maps
+#     to Ceph configuration option "auth_client_required".  (Since
+#     3.0)
 #
-# @key-secret: ID of a QCryptoSecret object providing a key
-#              for cephx authentication.
-#              This maps to Ceph configuration option
-#              "key".  (Since 3.0)
+# @key-secret: ID of a QCryptoSecret object providing a key for cephx
+#     authentication.  This maps to Ceph configuration option "key".
+#     (Since 3.0)
 #
-# @server: Monitor host address and port.  This maps
-#          to the "mon_host" Ceph option.
+# @server: Monitor host address and port.  This maps to the "mon_host"
+#     Ceph option.
 #
 # Since: 2.9
 ##
             'image': 'str',
             '*conf': 'str',
             '*snapshot': 'str',
+            '*encrypt': 'RbdEncryptionOptions',
             '*user': 'str',
             '*auth-client-required': ['RbdAuthMode'],
             '*key-secret': 'str',
             '*server': ['InetSocketAddressBase'] } }
 
-##
-# @BlockdevOptionsSheepdog:
-#
-# Driver specific block device options for sheepdog
-#
-# @vdi: Virtual disk image name
-# @server: The Sheepdog server to connect to
-# @snap-id: Snapshot ID
-# @tag: Snapshot tag name
-#
-# Only one of @snap-id and @tag may be present.
-#
-# Since: 2.9
-##
-{ 'struct': 'BlockdevOptionsSheepdog',
-  'data': { 'server': 'SocketAddress',
-            'vdi': 'str',
-            '*snap-id': 'uint32',
-            '*tag': 'str' } }
-
 ##
 # @ReplicationMode:
 #
 # An enumeration of replication modes.
 #
-# @primary: Primary mode, the vm's state will be sent to secondary QEMU.
+# @primary: Primary mode, the vm's state will be sent to secondary
+#     QEMU.
 #
-# @secondary: Secondary mode, receive the vm's state from primary QEMU.
+# @secondary: Secondary mode, receive the vm's state from primary
+#     QEMU.
 #
 # Since: 2.9
 ##
 { 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ],
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @BlockdevOptionsReplication:
 #
 # @mode: the replication mode
 #
-# @top-id: In secondary mode, node name or device ID of the root
-#          node who owns the replication node chain. Must not be given in
-#          primary mode.
+# @top-id: In secondary mode, node name or device ID of the root node
+#     who owns the replication node chain.  Must not be given in
+#     primary mode.
 #
 # Since: 2.9
 ##
   'base': 'BlockdevOptionsGenericFormat',
   'data': { 'mode': 'ReplicationMode',
             '*top-id': 'str' },
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @NFSTransport:
 #
 # @path: path of the image on the host
 #
-# @user: UID value to use when talking to the
-#        server (defaults to 65534 on Windows and getuid()
-#        on unix)
+# @user: UID value to use when talking to the server (defaults to
+#     65534 on Windows and getuid() on unix)
 #
-# @group: GID value to use when talking to the
-#         server (defaults to 65534 on Windows and getgid()
-#         in unix)
+# @group: GID value to use when talking to the server (defaults to
+#     65534 on Windows and getgid() in unix)
 #
-# @tcp-syn-count: number of SYNs during the session
-#                 establishment (defaults to libnfs default)
+# @tcp-syn-count: number of SYNs during the session establishment
+#     (defaults to libnfs default)
 #
-# @readahead-size: set the readahead size in bytes (defaults
-#                  to libnfs default)
+# @readahead-size: set the readahead size in bytes (defaults to libnfs
+#     default)
 #
-# @page-cache-size: set the pagecache size in bytes (defaults
-#                   to libnfs default)
+# @page-cache-size: set the pagecache size in bytes (defaults to
+#     libnfs default)
 #
-# @debug: set the NFS debug level (max 2) (defaults
-#         to libnfs default)
+# @debug: set the NFS debug level (max 2) (defaults to libnfs default)
 #
 # Since: 2.9
 ##
 ##
 # @BlockdevOptionsCurlBase:
 #
-# Driver specific block device options shared by all protocols supported by the
-# curl backend.
+# Driver specific block device options shared by all protocols
+# supported by the curl backend.
 #
 # @url: URL of the image file
 #
-# @readahead: Size of the read-ahead cache; must be a multiple of
-#             512 (defaults to 256 kB)
+# @readahead: Size of the read-ahead cache; must be a multiple of 512
+#     (defaults to 256 kB)
 #
 # @timeout: Timeout for connections, in seconds (defaults to 5)
 #
 # @username: Username for authentication (defaults to none)
 #
 # @password-secret: ID of a QCryptoSecret object providing a password
-#                   for authentication (defaults to no password)
+#     for authentication (defaults to no password)
 #
-# @proxy-username: Username for proxy authentication (defaults to none)
+# @proxy-username: Username for proxy authentication (defaults to
+#     none)
 #
-# @proxy-password-secret: ID of a QCryptoSecret object providing a password
-#                         for proxy authentication (defaults to no password)
+# @proxy-password-secret: ID of a QCryptoSecret object providing a
+#     password for proxy authentication (defaults to no password)
 #
 # Since: 2.9
 ##
 ##
 # @BlockdevOptionsCurlHttp:
 #
-# Driver specific block device options for HTTP connections over the curl
-# backend.  URLs must start with "http://".
+# Driver specific block device options for HTTP connections over the
+# curl backend.  URLs must start with "http://".
 #
-# @cookie: List of cookies to set; format is
-#          "name1=content1; name2=content2;" as explained by
-#          CURLOPT_COOKIE(3). Defaults to no cookies.
+# @cookie: List of cookies to set; format is "name1=content1;
+#     name2=content2;" as explained by CURLOPT_COOKIE(3). Defaults to
+#     no cookies.
 #
-# @cookie-secret: ID of a QCryptoSecret object providing the cookie data in a
-#                 secure way. See @cookie for the format. (since 2.10)
+# @cookie-secret: ID of a QCryptoSecret object providing the cookie
+#     data in a secure way.  See @cookie for the format.  (since 2.10)
 #
 # Since: 2.9
 ##
 ##
 # @BlockdevOptionsCurlHttps:
 #
-# Driver specific block device options for HTTPS connections over the curl
-# backend.  URLs must start with "https://".
+# Driver specific block device options for HTTPS connections over the
+# curl backend.  URLs must start with "https://".
 #
-# @cookie: List of cookies to set; format is
-#          "name1=content1; name2=content2;" as explained by
-#          CURLOPT_COOKIE(3). Defaults to no cookies.
+# @cookie: List of cookies to set; format is "name1=content1;
+#     name2=content2;" as explained by CURLOPT_COOKIE(3). Defaults to
+#     no cookies.
 #
-# @sslverify: Whether to verify the SSL certificate's validity (defaults to
-#             true)
+# @sslverify: Whether to verify the SSL certificate's validity
+#     (defaults to true)
 #
-# @cookie-secret: ID of a QCryptoSecret object providing the cookie data in a
-#                 secure way. See @cookie for the format. (since 2.10)
+# @cookie-secret: ID of a QCryptoSecret object providing the cookie
+#     data in a secure way.  See @cookie for the format.  (since 2.10)
 #
 # Since: 2.9
 ##
 ##
 # @BlockdevOptionsCurlFtp:
 #
-# Driver specific block device options for FTP connections over the curl
-# backend.  URLs must start with "ftp://".
+# Driver specific block device options for FTP connections over the
+# curl backend.  URLs must start with "ftp://".
 #
 # Since: 2.9
 ##
 ##
 # @BlockdevOptionsCurlFtps:
 #
-# Driver specific block device options for FTPS connections over the curl
-# backend.  URLs must start with "ftps://".
+# Driver specific block device options for FTPS connections over the
+# curl backend.  URLs must start with "ftps://".
 #
-# @sslverify: Whether to verify the SSL certificate's validity (defaults to
-#             true)
+# @sslverify: Whether to verify the SSL certificate's validity
+#     (defaults to true)
 #
 # Since: 2.9
 ##
 #
 # @tls-creds: TLS credentials ID
 #
-# @x-dirty-bitmap: A metadata context name such as "qemu:dirty-bitmap:NAME"
-#                  or "qemu:allocation-depth" to query in place of the
-#                  traditional "base:allocation" block status (see
-#                  NBD_OPT_LIST_META_CONTEXT in the NBD protocol; and
-#                  yes, naming this option x-context would have made
-#                  more sense) (since 3.0)
-#
-# @reconnect-delay: On an unexpected disconnect, the nbd client tries to
-#                   connect again until succeeding or encountering a serious
-#                   error.  During the first @reconnect-delay seconds, all
-#                   requests are paused and will be rerun on a successful
-#                   reconnect. After that time, any delayed requests and all
-#                   future requests before a successful reconnect will
-#                   immediately fail. Default 0 (Since 4.2)
+# @tls-hostname: TLS hostname override for certificate validation
+#     (Since 7.0)
+#
+# @x-dirty-bitmap: A metadata context name such as
+#     "qemu:dirty-bitmap:NAME" or "qemu:allocation-depth" to query in
+#     place of the traditional "base:allocation" block status (see
+#     NBD_OPT_LIST_META_CONTEXT in the NBD protocol; and yes, naming
+#     this option x-context would have made more sense) (since 3.0)
+#
+# @reconnect-delay: On an unexpected disconnect, the nbd client tries
+#     to connect again until succeeding or encountering a serious
+#     error.  During the first @reconnect-delay seconds, all requests
+#     are paused and will be rerun on a successful reconnect.  After
+#     that time, any delayed requests and all future requests before a
+#     successful reconnect will immediately fail.  Default 0 (Since
+#     4.2)
+#
+# @open-timeout: In seconds.  If zero, the nbd driver tries the
+#     connection only once, and fails to open if the connection fails.
+#     If non-zero, the nbd driver will repeat connection attempts
+#     until successful or until @open-timeout seconds have elapsed.
+#     Default 0 (Since 7.0)
+#
+# Features:
+#
+# @unstable: Member @x-dirty-bitmap is experimental.
 #
 # Since: 2.9
 ##
   'data': { 'server': 'SocketAddress',
             '*export': 'str',
             '*tls-creds': 'str',
-            '*x-dirty-bitmap': 'str',
-            '*reconnect-delay': 'uint32' } }
+            '*tls-hostname': 'str',
+            '*x-dirty-bitmap': { 'type': 'str', 'features': [ 'unstable' ] },
+            '*reconnect-delay': 'uint32',
+            '*open-timeout': 'uint32' } }
 
 ##
 # @BlockdevOptionsRaw:
 # Driver specific block device options for the raw driver.
 #
 # @offset: position where the block device starts
+#
 # @size: the assumed size of the device
 #
 # Since: 2.9
 #
 # Driver specific block device options for the throttle driver
 #
-# @throttle-group: the name of the throttle-group object to use. It
-#                  must already exist.
+# @throttle-group: the name of the throttle-group object to use.  It
+#     must already exist.
+#
 # @file: reference to or definition of the data source block device
+#
 # Since: 2.11
 ##
 { 'struct': 'BlockdevOptionsThrottle',
   'data': { 'throttle-group': 'str',
             'file' : 'BlockdevRef'
              } }
+
+##
+# @BlockdevOptionsCor:
+#
+# Driver specific block device options for the copy-on-read driver.
+#
+# @bottom: The name of a non-filter node (allocation-bearing layer)
+#     that limits the COR operations in the backing chain (inclusive),
+#     so that no data below this node will be copied by this filter.
+#     If option is absent, the limit is not applied, so that data from
+#     all backing layers may be copied.
+#
+# Since: 6.0
+##
+{ 'struct': 'BlockdevOptionsCor',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*bottom': 'str' } }
+
+##
+# @OnCbwError:
+#
+# An enumeration of possible behaviors for copy-before-write operation
+# failures.
+#
+# @break-guest-write: report the error to the guest.  This way, the
+#     guest will not be able to overwrite areas that cannot be backed
+#     up, so the backup process remains valid.
+#
+# @break-snapshot: continue guest write.  Doing so will make the
+#     provided snapshot state invalid and any backup or export process
+#     based on it will finally fail.
+#
+# Since: 7.1
+##
+{ 'enum': 'OnCbwError',
+  'data': [ 'break-guest-write', 'break-snapshot' ] }
+
+##
+# @BlockdevOptionsCbw:
+#
+# Driver specific block device options for the copy-before-write
+# driver, which does so called copy-before-write operations: when data
+# is written to the filter, the filter first reads corresponding
+# blocks from its file child and copies them to @target child.  After
+# successfully copying, the write request is propagated to file child.
+# If copying fails, the original write request is failed too and no
+# data is written to file child.
+#
+# @target: The target for copy-before-write operations.
+#
+# @bitmap: If specified, copy-before-write filter will do
+#     copy-before-write operations only for dirty regions of the
+#     bitmap.  Bitmap size must be equal to length of file and target
+#     child of the filter.  Note also, that bitmap is used only to
+#     initialize internal bitmap of the process, so further
+#     modifications (or removing) of specified bitmap doesn't
+#     influence the filter.  (Since 7.0)
+#
+# @on-cbw-error: Behavior on failure of copy-before-write operation.
+#     Default is @break-guest-write.  (Since 7.1)
+#
+# @cbw-timeout: Zero means no limit.  Non-zero sets the timeout in
+#     seconds for copy-before-write operation.  When a timeout occurs,
+#     the respective copy-before-write operation will fail, and the
+#     @on-cbw-error parameter will decide how this failure is handled.
+#     Default 0. (Since 7.1)
+#
+# Since: 6.2
+##
+{ 'struct': 'BlockdevOptionsCbw',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
+            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
+
 ##
 # @BlockdevOptions:
 #
-# Options for creating a block device.  Many options are available for all
-# block devices, independent of the block driver:
+# Options for creating a block device.  Many options are available for
+# all block devices, independent of the block driver:
 #
 # @driver: block driver name
-# @node-name: the node name of the new node (Since 2.0).
-#             This option is required on the top level of blockdev-add.
-#             Valid node names start with an alphabetic character and may
-#             contain only alphanumeric characters, '-', '.' and '_'. Their
-#             maximum length is 31 characters.
+#
+# @node-name: the node name of the new node (Since 2.0). This option
+#     is required on the top level of blockdev-add.  Valid node names
+#     start with an alphabetic character and may contain only
+#     alphanumeric characters, '-', '.' and '_'. Their maximum length
+#     is 31 characters.
+#
 # @discard: discard-related options (default: ignore)
+#
 # @cache: cache-related options
-# @read-only: whether the block device should be read-only (default: false).
-#             Note that some block drivers support only read-only access,
-#             either generally or in certain configurations. In this case,
-#             the default value does not work and the option must be
-#             specified explicitly.
-# @auto-read-only: if true and @read-only is false, QEMU may automatically
-#                  decide not to open the image read-write as requested, but
-#                  fall back to read-only instead (and switch between the modes
-#                  later), e.g. depending on whether the image file is writable
-#                  or whether a writing user is attached to the node
-#                  (default: false, since 3.1)
+#
+# @read-only: whether the block device should be read-only (default:
+#     false). Note that some block drivers support only read-only
+#     access, either generally or in certain configurations.  In this
+#     case, the default value does not work and the option must be
+#     specified explicitly.
+#
+# @auto-read-only: if true and @read-only is false, QEMU may
+#     automatically decide not to open the image read-write as
+#     requested, but fall back to read-only instead (and switch
+#     between the modes later), e.g. depending on whether the image
+#     file is writable or whether a writing user is attached to the
+#     node (default: false, since 3.1)
+#
 # @detect-zeroes: detect and optimize zero writes (Since 2.1)
-#                 (default: off)
-# @force-share: force share all permission on added nodes.
-#               Requires read-only=true. (Since 2.10)
+#     (default: off)
+#
+# @force-share: force share all permission on added nodes.  Requires
+#     read-only=true.  (Since 2.10)
 #
 # Remaining options are determined by the block driver.
 #
       'bochs':      'BlockdevOptionsGenericFormat',
       'cloop':      'BlockdevOptionsGenericFormat',
       'compress':   'BlockdevOptionsGenericFormat',
-      'copy-on-read':'BlockdevOptionsGenericFormat',
+      'copy-before-write':'BlockdevOptionsCbw',
+      'copy-on-read':'BlockdevOptionsCor',
       'dmg':        'BlockdevOptionsGenericFormat',
       'file':       'BlockdevOptionsFile',
       'ftp':        'BlockdevOptionsCurlFtp',
       'ftps':       'BlockdevOptionsCurlFtps',
       'gluster':    'BlockdevOptionsGluster',
-      'host_cdrom': 'BlockdevOptionsFile',
-      'host_device':'BlockdevOptionsFile',
+      'host_cdrom':  { 'type': 'BlockdevOptionsFile',
+                       'if': 'HAVE_HOST_BLOCK_DEVICE' },
+      'host_device': { 'type': 'BlockdevOptionsFile',
+                       'if': 'HAVE_HOST_BLOCK_DEVICE' },
       'http':       'BlockdevOptionsCurlHttp',
       'https':      'BlockdevOptionsCurlHttps',
+      'io_uring':   { 'type': 'BlockdevOptionsIoUring',
+                      'if': 'CONFIG_BLKIO' },
       'iscsi':      'BlockdevOptionsIscsi',
       'luks':       'BlockdevOptionsLUKS',
       'nbd':        'BlockdevOptionsNbd',
       'null-aio':   'BlockdevOptionsNull',
       'null-co':    'BlockdevOptionsNull',
       'nvme':       'BlockdevOptionsNVMe',
+      'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring',
+                         'if': 'CONFIG_BLKIO' },
       'parallels':  'BlockdevOptionsGenericFormat',
+      'preallocate':'BlockdevOptionsPreallocate',
       'qcow2':      'BlockdevOptionsQcow2',
       'qcow':       'BlockdevOptionsQcow',
       'qed':        'BlockdevOptionsGenericCOWFormat',
       'raw':        'BlockdevOptionsRaw',
       'rbd':        'BlockdevOptionsRbd',
       'replication': { 'type': 'BlockdevOptionsReplication',
-                       'if': 'defined(CONFIG_REPLICATION)' },
-      'sheepdog':   'BlockdevOptionsSheepdog',
+                       'if': 'CONFIG_REPLICATION' },
+      'snapshot-access': 'BlockdevOptionsGenericFormat',
       'ssh':        'BlockdevOptionsSsh',
       'throttle':   'BlockdevOptionsThrottle',
       'vdi':        'BlockdevOptionsGenericFormat',
       'vhdx':       'BlockdevOptionsGenericFormat',
+      'virtio-blk-vfio-pci':
+                    { 'type': 'BlockdevOptionsVirtioBlkVfioPci',
+                      'if': 'CONFIG_BLKIO' },
+      'virtio-blk-vhost-user':
+                    { 'type': 'BlockdevOptionsVirtioBlkVhostUser',
+                      'if': 'CONFIG_BLKIO' },
+      'virtio-blk-vhost-vdpa':
+                    { 'type': 'BlockdevOptionsVirtioBlkVhostVdpa',
+                      'if': 'CONFIG_BLKIO' },
       'vmdk':       'BlockdevOptionsGenericCOWFormat',
       'vpc':        'BlockdevOptionsGenericFormat',
       'vvfat':      'BlockdevOptionsVVFAT'
 # Reference to a block device.
 #
 # @definition: defines a new block device inline
+#
 # @reference: references the ID of an existing block device
 #
 # Since: 2.9
 # Reference to a block device.
 #
 # @definition: defines a new block device inline
-# @reference: references the ID of an existing block device.
-#             An empty string means that no block device should
-#             be referenced.  Deprecated; use null instead.
+#
+# @reference: references the ID of an existing block device.  An empty
+#     string means that no block device should be referenced.
+#     Deprecated; use null instead.
+#
 # @null: No block device should be referenced (since 2.10)
 #
 # Since: 2.9
 #
 # Since: 2.9
 #
-# Example:
+# Examples:
 #
-# 1.
 # -> { "execute": "blockdev-add",
 #      "arguments": {
 #           "driver": "qcow2",
 #     }
 # <- { "return": {} }
 #
-# 2.
 # -> { "execute": "blockdev-add",
 #      "arguments": {
 #           "driver": "qcow2",
 #      }
 #
 # <- { "return": {} }
-#
 ##
-{ 'command': 'blockdev-add', 'data': 'BlockdevOptions', 'boxed': true }
+{ 'command': 'blockdev-add', 'data': 'BlockdevOptions', 'boxed': true,
+  'allow-preconfig': true }
 
 ##
-# @x-blockdev-reopen:
+# @blockdev-reopen:
 #
-# Reopens a block device using the given set of options. Any option
-# not specified will be reset to its default value regardless of its
-# previous status. If an option cannot be changed or a particular
-# driver does not support reopening then the command will return an
-# error.
+# Reopens one or more block devices using the given set of options.
+# Any option not specified will be reset to its default value
+# regardless of its previous status.  If an option cannot be changed
+# or a particular driver does not support reopening then the command
+# will return an error.  All devices in the list are reopened in one
+# transaction, so if one of them fails then the whole transaction is
+# cancelled.
 #
-# The top-level @node-name option (from BlockdevOptions) must be
-# specified and is used to select the block device to be reopened.
-# Other @node-name options must be either omitted or set to the
-# current name of the appropriate node. This command won't change any
-# node name and any attempt to do it will result in an error.
+# The command receives a list of block devices to reopen.  For each
+# one of them, the top-level @node-name option (from BlockdevOptions)
+# must be specified and is used to select the block device to be
+# reopened.  Other @node-name options must be either omitted or set to
+# the current name of the appropriate node.  This command won't change
+# any node name and any attempt to do it will result in an error.
 #
 # In the case of options that refer to child nodes, the behavior of
 # this command depends on the value:
 #
 #  4) NULL: the current child (if any) is detached.
 #
-# Options (1) and (2) are supported in all cases, but at the moment
-# only @backing allows replacing or detaching an existing child.
+# Options (1) and (2) are supported in all cases.  Option (3) is
+# supported for @file and @backing, and option (4) for @backing only.
 #
 # Unlike with blockdev-add, the @backing option must always be present
 # unless the node being reopened does not have a backing file and its
 # image does not have a default backing file name as part of its
 # metadata.
 #
-# Since: 4.0
+# Since: 6.1
 ##
-{ 'command': 'x-blockdev-reopen',
-  'data': 'BlockdevOptions', 'boxed': true }
+{ 'command': 'blockdev-reopen',
+  'data': { 'options': ['BlockdevOptions'] },
+  'allow-preconfig': true }
 
 ##
 # @blockdev-del:
 #
-# Deletes a block device that has been added using blockdev-add.
-# The command will fail if the node is attached to a device or is
+# Deletes a block device that has been added using blockdev-add.  The
+# command will fail if the node is attached to a device or is
 # otherwise being used.
 #
 # @node-name: Name of the graph node to delete.
 #      "arguments": { "node-name": "node0" }
 #    }
 # <- { "return": {} }
-#
 ##
-{ 'command': 'blockdev-del', 'data': { 'node-name': 'str' } }
+{ 'command': 'blockdev-del', 'data': { 'node-name': 'str' },
+  'allow-preconfig': true }
 
 ##
 # @BlockdevCreateOptionsFile:
 # Driver specific image creation options for file.
 #
 # @filename: Filename for the new image file
+#
 # @size: Size of the virtual disk in bytes
+#
 # @preallocation: Preallocation mode for the new image (default: off;
-#                 allowed values: off,
-#                 falloc (if defined CONFIG_POSIX_FALLOCATE),
-#                 full (if defined CONFIG_POSIX))
+#     allowed values: off, falloc (if CONFIG_POSIX_FALLOCATE), full
+#     (if CONFIG_POSIX))
+#
 # @nocow: Turn off copy-on-write (valid only on btrfs; default: off)
-# @extent-size-hint: Extent size hint to add to the image file; 0 for not
-#                    adding an extent size hint (default: 1 MB, since 5.1)
+#
+# @extent-size-hint: Extent size hint to add to the image file; 0 for
+#     not adding an extent size hint (default: 1 MB, since 5.1)
 #
 # Since: 2.12
 ##
 # Driver specific image creation options for gluster.
 #
 # @location: Where to store the new image file
+#
 # @size: Size of the virtual disk in bytes
+#
 # @preallocation: Preallocation mode for the new image (default: off;
-#                 allowed values: off,
-#                 falloc (if defined CONFIG_GLUSTERFS_FALLOCATE),
-#                 full (if defined CONFIG_GLUSTERFS_ZEROFILL))
+#     allowed values: off, falloc (if CONFIG_GLUSTERFS_FALLOCATE),
+#     full (if CONFIG_GLUSTERFS_ZEROFILL))
 #
 # Since: 2.12
 ##
 # Driver specific image creation options for LUKS.
 #
 # @file: Node to create the image format on
+#
 # @size: Size of the virtual disk in bytes
-# @preallocation: Preallocation mode for the new image
-#                 (since: 4.2)
-#                 (default: off; allowed values: off, metadata, falloc, full)
+#
+# @preallocation: Preallocation mode for the new image (since: 4.2)
+#     (default: off; allowed values: off, metadata, falloc, full)
 #
 # Since: 2.12
 ##
 # Driver specific image creation options for NFS.
 #
 # @location: Where to store the new image file
+#
 # @size: Size of the virtual disk in bytes
 #
 # Since: 2.12
 # Driver specific image creation options for parallels.
 #
 # @file: Node to create the image format on
+#
 # @size: Size of the virtual disk in bytes
+#
 # @cluster-size: Cluster size in bytes (default: 1 MB)
 #
 # Since: 2.12
 # Driver specific image creation options for qcow.
 #
 # @file: Node to create the image format on
+#
 # @size: Size of the virtual disk in bytes
+#
 # @backing-file: File name of the backing file if a backing file
-#                should be used
+#     should be used
+#
 # @encrypt: Encryption options if the image should be encrypted
 #
 # Since: 2.12
 ##
 # @BlockdevQcow2Version:
 #
-# @v2:  The original QCOW2 format as introduced in qemu 0.10 (version 2)
-# @v3:  The extended QCOW2 format as introduced in qemu 1.1 (version 3)
+# @v2: The original QCOW2 format as introduced in qemu 0.10 (version
+#     2)
+#
+# @v3: The extended QCOW2 format as introduced in qemu 1.1 (version 3)
 #
 # Since: 2.12
 ##
 { 'enum': 'BlockdevQcow2Version',
   'data': [ 'v2', 'v3' ] }
 
-
 ##
 # @Qcow2CompressionType:
 #
 # Compression type used in qcow2 image file
 #
 # @zlib: zlib compression, see <http://zlib.net/>
+#
 # @zstd: zstd compression, see <http://github.com/facebook/zstd>
 #
 # Since: 5.1
 ##
 { 'enum': 'Qcow2CompressionType',
-  'data': [ 'zlib', { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] }
+  'data': [ 'zlib', { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
 
 ##
 # @BlockdevCreateOptionsQcow2:
 # Driver specific image creation options for qcow2.
 #
 # @file: Node to create the image format on
+#
 # @data-file: Node to use as an external data file in which all guest
-#             data is stored so that only metadata remains in the qcow2
-#             file (since: 4.0)
+#     data is stored so that only metadata remains in the qcow2 file
+#     (since: 4.0)
+#
 # @data-file-raw: True if the external data file must stay valid as a
-#                 standalone (read-only) raw image without looking at qcow2
-#                 metadata (default: false; since: 4.0)
+#     standalone (read-only) raw image without looking at qcow2
+#     metadata (default: false; since: 4.0)
+#
 # @extended-l2: True to make the image have extended L2 entries
-#               (default: false; since 5.2)
+#     (default: false; since 5.2)
+#
 # @size: Size of the virtual disk in bytes
+#
 # @version: Compatibility level (default: v3)
+#
 # @backing-file: File name of the backing file if a backing file
-#                should be used
+#     should be used
+#
 # @backing-fmt: Name of the block driver to use for the backing file
+#
 # @encrypt: Encryption options if the image should be encrypted
+#
 # @cluster-size: qcow2 cluster size in bytes (default: 65536)
+#
 # @preallocation: Preallocation mode for the new image (default: off;
-#                 allowed values: off, falloc, full, metadata)
-# @lazy-refcounts: True if refcounts may be updated lazily (default: off)
+#     allowed values: off, falloc, full, metadata)
+#
+# @lazy-refcounts: True if refcounts may be updated lazily
+#     (default: off)
+#
 # @refcount-bits: Width of reference counts in bits (default: 16)
+#
 # @compression-type: The image cluster compression method
-#                    (default: zlib, since 5.1)
+#     (default: zlib, since 5.1)
 #
 # Since: 2.12
 ##
 # Driver specific image creation options for qed.
 #
 # @file: Node to create the image format on
+#
 # @size: Size of the virtual disk in bytes
+#
 # @backing-file: File name of the backing file if a backing file
-#                should be used
+#     should be used
+#
 # @backing-fmt: Name of the block driver to use for the backing file
+#
 # @cluster-size: Cluster size in bytes (default: 65536)
+#
 # @table-size: L1/L2 table size (in clusters)
 #
 # Since: 2.12
 #
 # Driver specific image creation options for rbd/Ceph.
 #
-# @location: Where to store the new image file. This location cannot
-#            point to a snapshot.
+# @location: Where to store the new image file.  This location cannot
+#     point to a snapshot.
+#
 # @size: Size of the virtual disk in bytes
+#
 # @cluster-size: RBD object size
 #
+# @encrypt: Image encryption options.  (Since 6.1)
+#
 # Since: 2.12
 ##
 { 'struct': 'BlockdevCreateOptionsRbd',
   'data': { 'location':         'BlockdevOptionsRbd',
             'size':             'size',
-            '*cluster-size' :   'size' } }
+            '*cluster-size' :   'size',
+            '*encrypt' :        'RbdEncryptionCreateOptions' } }
 
 ##
 # @BlockdevVmdkSubformat:
 #
 # Subformat options for VMDK images
 #
-# @monolithicSparse:     Single file image with sparse cluster allocation
+# @monolithicSparse: Single file image with sparse cluster allocation
 #
-# @monolithicFlat:       Single flat data image and a descriptor file
+# @monolithicFlat: Single flat data image and a descriptor file
 #
-# @twoGbMaxExtentSparse: Data is split into 2GB (per virtual LBA) sparse extent
-#                        files, in addition to a descriptor file
+# @twoGbMaxExtentSparse: Data is split into 2GB (per virtual LBA)
+#     sparse extent files, in addition to a descriptor file
 #
-# @twoGbMaxExtentFlat:   Data is split into 2GB (per virtual LBA) flat extent
-#                        files, in addition to a descriptor file
+# @twoGbMaxExtentFlat: Data is split into 2GB (per virtual LBA) flat
+#     extent files, in addition to a descriptor file
 #
-# @streamOptimized:      Single file image sparse cluster allocation, optimized
-#                        for streaming over network.
+# @streamOptimized: Single file image sparse cluster allocation,
+#     optimized for streaming over network.
 #
 # Since: 4.0
 ##
 #
 # Driver specific image creation options for VMDK.
 #
-# @file: Where to store the new image file. This refers to the image
-#        file for monolithcSparse and streamOptimized format, or the
-#        descriptor file for other formats.
+# @file: Where to store the new image file.  This refers to the image
+#     file for monolithcSparse and streamOptimized format, or the
+#     descriptor file for other formats.
+#
 # @size: Size of the virtual disk in bytes
-# @extents: Where to store the data extents. Required for monolithcFlat,
-#           twoGbMaxExtentSparse and twoGbMaxExtentFlat formats. For
-#           monolithicFlat, only one entry is required; for
-#           twoGbMaxExtent* formats, the number of entries required is
-#           calculated as extent_number = virtual_size / 2GB. Providing
-#           more extents than will be used is an error.
-# @subformat: The subformat of the VMDK image. Default: "monolithicSparse".
-# @backing-file: The path of backing file. Default: no backing file is used.
-# @adapter-type: The adapter type used to fill in the descriptor. Default: ide.
-# @hwversion: Hardware version. The meaningful options are "4" or "6".
-#             Default: "4".
-# @zeroed-grain: Whether to enable zeroed-grain feature for sparse subformats.
-#                Default: false.
+#
+# @extents: Where to store the data extents.  Required for
+#     monolithcFlat, twoGbMaxExtentSparse and twoGbMaxExtentFlat
+#     formats.  For monolithicFlat, only one entry is required; for
+#     twoGbMaxExtent* formats, the number of entries required is
+#     calculated as extent_number = virtual_size / 2GB. Providing more
+#     extents than will be used is an error.
+#
+# @subformat: The subformat of the VMDK image.  Default:
+#     "monolithicSparse".
+#
+# @backing-file: The path of backing file.  Default: no backing file
+#     is used.
+#
+# @adapter-type: The adapter type used to fill in the descriptor.
+#     Default: ide.
+#
+# @hwversion: Hardware version.  The meaningful options are "4" or
+#     "6". Default: "4".
+#
+# @toolsversion: VMware guest tools version.  Default: "2147483647"
+#     (Since 6.2)
+#
+# @zeroed-grain: Whether to enable zeroed-grain feature for sparse
+#     subformats.  Default: false.
 #
 # Since: 4.0
 ##
             '*backing-file':    'str',
             '*adapter-type':    'BlockdevVmdkAdapterType',
             '*hwversion':       'str',
+            '*toolsversion':    'str',
             '*zeroed-grain':    'bool' } }
 
-
-##
-# @SheepdogRedundancyType:
-#
-# @full: Create a fully replicated vdi with x copies
-# @erasure-coded: Create an erasure coded vdi with x data strips and
-#                 y parity strips
-#
-# Since: 2.12
-##
-{ 'enum': 'SheepdogRedundancyType',
-  'data': [ 'full', 'erasure-coded' ] }
-
-##
-# @SheepdogRedundancyFull:
-#
-# @copies: Number of copies to use (between 1 and 31)
-#
-# Since: 2.12
-##
-{ 'struct': 'SheepdogRedundancyFull',
-  'data': { 'copies': 'int' }}
-
-##
-# @SheepdogRedundancyErasureCoded:
-#
-# @data-strips: Number of data strips to use (one of {2,4,8,16})
-# @parity-strips: Number of parity strips to use (between 1 and 15)
-#
-# Since: 2.12
-##
-{ 'struct': 'SheepdogRedundancyErasureCoded',
-  'data': { 'data-strips': 'int',
-            'parity-strips': 'int' }}
-
-##
-# @SheepdogRedundancy:
-#
-# Since: 2.12
-##
-{ 'union': 'SheepdogRedundancy',
-  'base': { 'type': 'SheepdogRedundancyType' },
-  'discriminator': 'type',
-  'data': { 'full': 'SheepdogRedundancyFull',
-            'erasure-coded': 'SheepdogRedundancyErasureCoded' } }
-
-##
-# @BlockdevCreateOptionsSheepdog:
-#
-# Driver specific image creation options for Sheepdog.
-#
-# @location: Where to store the new image file
-# @size: Size of the virtual disk in bytes
-# @backing-file: File name of a base image
-# @preallocation: Preallocation mode for the new image (default: off;
-#                 allowed values: off, full)
-# @redundancy: Redundancy of the image
-# @object-size: Object size of the image
-#
-# Since: 2.12
-##
-{ 'struct': 'BlockdevCreateOptionsSheepdog',
-  'data': { 'location':         'BlockdevOptionsSheepdog',
-            'size':             'size',
-            '*backing-file':    'str',
-            '*preallocation':   'PreallocMode',
-            '*redundancy':      'SheepdogRedundancy',
-            '*object-size':     'size' } }
-
 ##
 # @BlockdevCreateOptionsSsh:
 #
 # Driver specific image creation options for SSH.
 #
 # @location: Where to store the new image file
+#
 # @size: Size of the virtual disk in bytes
 #
 # Since: 2.12
 # Driver specific image creation options for VDI.
 #
 # @file: Node to create the image format on
+#
 # @size: Size of the virtual disk in bytes
+#
 # @preallocation: Preallocation mode for the new image (default: off;
-#                 allowed values: off, metadata)
+#     allowed values: off, metadata)
 #
 # Since: 2.12
 ##
 # @BlockdevVhdxSubformat:
 #
 # @dynamic: Growing image file
-# @fixed:   Preallocated fixed-size image file
+#
+# @fixed: Preallocated fixed-size image file
 #
 # Since: 2.12
 ##
 # Driver specific image creation options for vhdx.
 #
 # @file: Node to create the image format on
+#
 # @size: Size of the virtual disk in bytes
-# @log-size: Log size in bytes, must be a multiple of 1 MB
-#            (default: 1 MB)
+#
+# @log-size: Log size in bytes, must be a multiple of 1 MB (default: 1
+#     MB)
+#
 # @block-size: Block size in bytes, must be a multiple of 1 MB and not
-#              larger than 256 MB (default: automatically choose a block
-#              size depending on the image size)
+#     larger than 256 MB (default: automatically choose a block size
+#     depending on the image size)
+#
 # @subformat: vhdx subformat (default: dynamic)
-# @block-state-zero: Force use of payload blocks of type 'ZERO'. Non-standard,
-#                    but default.  Do not set to 'off' when using 'qemu-img
-#                    convert' with subformat=dynamic.
+#
+# @block-state-zero: Force use of payload blocks of type 'ZERO'.
+#     Non-standard, but default.  Do not set to 'off' when using
+#     'qemu-img convert' with subformat=dynamic.
 #
 # Since: 2.12
 ##
 # @BlockdevVpcSubformat:
 #
 # @dynamic: Growing image file
-# @fixed:   Preallocated fixed-size image file
+#
+# @fixed: Preallocated fixed-size image file
 #
 # Since: 2.12
 ##
 # Driver specific image creation options for vpc (VHD).
 #
 # @file: Node to create the image format on
+#
 # @size: Size of the virtual disk in bytes
+#
 # @subformat: vhdx subformat (default: dynamic)
-# @force-size: Force use of the exact byte size instead of rounding to the
-#              next size that can be represented in CHS geometry
-#              (default: false)
+#
+# @force-size: Force use of the exact byte size instead of rounding to
+#     the next size that can be represented in CHS geometry
+#     (default: false)
 #
 # Since: 2.12
 ##
       'qcow2':          'BlockdevCreateOptionsQcow2',
       'qed':            'BlockdevCreateOptionsQed',
       'rbd':            'BlockdevCreateOptionsRbd',
-      'sheepdog':       'BlockdevCreateOptionsSheepdog',
       'ssh':            'BlockdevCreateOptionsSsh',
       'vdi':            'BlockdevCreateOptionsVdi',
       'vhdx':           'BlockdevCreateOptionsVhdx',
 ##
 # @blockdev-create:
 #
-# Starts a job to create an image format on a given node. The job is
+# Starts a job to create an image format on a given node.  The job is
 # automatically finalized, but a manual job-dismiss is required.
 #
-# @job-id:          Identifier for the newly created job.
+# @job-id: Identifier for the newly created job.
 #
-# @options:         Options for the image creation.
+# @options: Options for the image creation.
 #
 # Since: 3.0
 ##
 { 'command': 'blockdev-create',
   'data': { 'job-id': 'str',
-            'options': 'BlockdevCreateOptions' } }
+            'options': 'BlockdevCreateOptions' },
+  'allow-preconfig': true }
 
 ##
 # @BlockdevAmendOptionsLUKS:
 ##
 # @BlockdevAmendOptionsQcow2:
 #
-# Driver specific image amend options for qcow2.
-# For now, only encryption options can be amended
+# Driver specific image amend options for qcow2. For now, only
+# encryption options can be amended
 #
-# @encrypt          Encryption options to be amended
+# @encrypt: Encryption options to be amended
 #
 # Since: 5.1
 ##
 #
 # Options for amending an image format
 #
-# @driver:          Block driver of the node to amend.
+# @driver: Block driver of the node to amend.
 #
 # Since: 5.1
 ##
 ##
 # @x-blockdev-amend:
 #
-# Starts a job to amend format specific options of an existing open block device
-# The job is automatically finalized, but a manual job-dismiss is required.
+# Starts a job to amend format specific options of an existing open
+# block device The job is automatically finalized, but a manual
+# job-dismiss is required.
 #
-# @job-id:          Identifier for the newly created job.
+# @job-id: Identifier for the newly created job.
 #
-# @node-name:       Name of the block node to work on
+# @node-name: Name of the block node to work on
 #
-# @options:         Options (driver specific)
+# @options: Options (driver specific)
 #
-# @force:           Allow unsafe operations, format specific
-#                   For luks that allows erase of the last active keyslot
-#                   (permanent loss of data),
-#                   and replacement of an active keyslot
-#                   (possible loss of data if IO error happens)
+# @force: Allow unsafe operations, format specific For luks that
+#     allows erase of the last active keyslot (permanent loss of
+#     data), and replacement of an active keyslot (possible loss of
+#     data if IO error happens)
+#
+# Features:
+#
+# @unstable: This command is experimental.
 #
 # Since: 5.1
 ##
   'data': { 'job-id': 'str',
             'node-name': 'str',
             'options': 'BlockdevAmendOptions',
-            '*force': 'bool' } }
+            '*force': 'bool' },
+  'features': [ 'unstable' ],
+  'allow-preconfig': true }
 
 ##
 # @BlockErrorAction:
 { 'enum': 'BlockErrorAction',
   'data': [ 'ignore', 'report', 'stop' ] }
 
-
 ##
 # @BLOCK_IMAGE_CORRUPTED:
 #
-# Emitted when a disk image is being marked corrupt. The image can be
-# identified by its device or node name. The 'device' field is always
+# Emitted when a disk image is being marked corrupt.  The image can be
+# identified by its device or node name.  The 'device' field is always
 # present for compatibility reasons, but it can be empty ("") if the
 # image does not have a device name associated.
 #
-# @device: device name. This is always present for compatibility
-#          reasons, but it can be empty ("") if the image does not
-#          have a device name associated.
+# @device: device name.  This is always present for compatibility
+#     reasons, but it can be empty ("") if the image does not have a
+#     device name associated.
 #
 # @node-name: node name (Since: 2.4)
 #
 # @msg: informative message for human consumption, such as the kind of
-#       corruption being detected. It should not be parsed by machine as it is
-#       not guaranteed to be stable
+#     corruption being detected.  It should not be parsed by machine
+#     as it is not guaranteed to be stable
 #
 # @offset: if the corruption resulted from an image access, this is
-#          the host's access offset into the image
+#     the host's access offset into the image
 #
-# @size: if the corruption resulted from an image access, this is
-#        the access size
+# @size: if the corruption resulted from an image access, this is the
+#     access size
 #
-# @fatal: if set, the image is marked corrupt and therefore unusable after this
-#         event and must be repaired (Since 2.2; before, every
-#         BLOCK_IMAGE_CORRUPTED event was fatal)
+# @fatal: if set, the image is marked corrupt and therefore unusable
+#     after this event and must be repaired (Since 2.2; before, every
+#     BLOCK_IMAGE_CORRUPTED event was fatal)
 #
 # Note: If action is "stop", a STOP event will eventually follow the
-#       BLOCK_IO_ERROR event.
+#     BLOCK_IO_ERROR event.
 #
 # Example:
 #
 # <- { "event": "BLOCK_IMAGE_CORRUPTED",
-#      "data": { "device": "ide0-hd0", "node-name": "node0",
-#                "msg": "Prevented active L1 table overwrite", "offset": 196608,
-#                "size": 65536 },
-#      "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
+#      "data": { "device": "", "node-name": "drive", "fatal": false,
+#                "msg": "L2 table offset 0x2a2a2a00 unaligned (L1 index: 0)" },
+#      "timestamp": { "seconds": 1648243240, "microseconds": 906060 } }
 #
 # Since: 1.7
 ##
 #
 # Emitted when a disk I/O error occurs
 #
-# @device: device name. This is always present for compatibility
-#          reasons, but it can be empty ("") if the image does not
-#          have a device name associated.
+# @device: device name.  This is always present for compatibility
+#     reasons, but it can be empty ("") if the image does not have a
+#     device name associated.
 #
-# @node-name: node name. Note that errors may be reported for the root node
-#             that is directly attached to a guest device rather than for the
-#             node where the error occurred. The node name is not present if
-#             the drive is empty. (Since: 2.8)
+# @node-name: node name.  Note that errors may be reported for the
+#     root node that is directly attached to a guest device rather
+#     than for the node where the error occurred.  The node name is
+#     not present if the drive is empty.  (Since: 2.8)
 #
 # @operation: I/O operation
 #
 # @action: action that has been taken
 #
-# @nospace: true if I/O error was caused due to a no-space
-#           condition. This key is only present if query-block's
-#           io-status is present, please see query-block documentation
-#           for more information (since: 2.2)
+# @nospace: true if I/O error was caused due to a no-space condition.
+#     This key is only present if query-block's io-status is present,
+#     please see query-block documentation for more information
+#     (since: 2.2)
 #
-# @reason: human readable string describing the error cause.
-#          (This field is a debugging aid for humans, it should not
-#          be parsed by applications) (since: 2.2)
+# @reason: human readable string describing the error cause.  (This
+#     field is a debugging aid for humans, it should not be parsed by
+#     applications) (since: 2.2)
 #
 # Note: If action is "stop", a STOP event will eventually follow the
-#       BLOCK_IO_ERROR event
+#     BLOCK_IO_ERROR event
 #
-# Since: 0.13.0
+# Since: 0.13
 #
 # Example:
 #
 #      "data": { "device": "ide0-hd1",
 #                "node-name": "#block212",
 #                "operation": "write",
-#                "action": "stop" },
+#                "action": "stop",
+#                "reason": "No space left on device" },
 #      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
-#
 ##
 { 'event': 'BLOCK_IO_ERROR',
   'data': { 'device': 'str', '*node-name': 'str',
 #
 # @type: job type
 #
-# @device: The job identifier. Originally the device name but other
-#          values are allowed since QEMU 2.7
+# @device: The job identifier.  Originally the device name but other
+#     values are allowed since QEMU 2.7
 #
 # @len: maximum progress value
 #
-# @offset: current progress value. On success this is equal to len.
-#          On failure this is less than len
+# @offset: current progress value.  On success this is equal to len.
+#     On failure this is less than len
 #
 # @speed: rate limit, bytes per second
 #
-# @error: error message. Only present on failure. This field
-#         contains a human-readable error message. There are no semantics
-#         other than that streaming has failed and clients should not try to
-#         interpret the error string
+# @error: error message.  Only present on failure.  This field
+#     contains a human-readable error message.  There are no semantics
+#     other than that streaming has failed and clients should not try
+#     to interpret the error string
 #
 # Since: 1.1
 #
 #                "len": 10737418240, "offset": 10737418240,
 #                "speed": 0 },
 #      "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
-#
 ##
 { 'event': 'BLOCK_JOB_COMPLETED',
   'data': { 'type'  : 'JobType',
 #
 # @type: job type
 #
-# @device: The job identifier. Originally the device name but other
-#          values are allowed since QEMU 2.7
+# @device: The job identifier.  Originally the device name but other
+#     values are allowed since QEMU 2.7
 #
 # @len: maximum progress value
 #
-# @offset: current progress value. On success this is equal to len.
-#          On failure this is less than len
+# @offset: current progress value.  On success this is equal to len.
+#     On failure this is less than len
 #
 # @speed: rate limit, bytes per second
 #
 #                "len": 10737418240, "offset": 134217728,
 #                "speed": 0 },
 #      "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
-#
 ##
 { 'event': 'BLOCK_JOB_CANCELLED',
   'data': { 'type'  : 'JobType',
 #
 # Emitted when a block job encounters an error
 #
-# @device: The job identifier. Originally the device name but other
-#          values are allowed since QEMU 2.7
+# @device: The job identifier.  Originally the device name but other
+#     values are allowed since QEMU 2.7
 #
 # @operation: I/O operation
 #
 #                "operation": "write",
 #                "action": "stop" },
 #      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
-#
 ##
 { 'event': 'BLOCK_JOB_ERROR',
   'data': { 'device'   : 'str',
 #
 # @type: job type
 #
-# @device: The job identifier. Originally the device name but other
-#          values are allowed since QEMU 2.7
+# @device: The job identifier.  Originally the device name but other
+#     values are allowed since QEMU 2.7
 #
 # @len: maximum progress value
 #
-# @offset: current progress value. On success this is equal to len.
-#          On failure this is less than len
+# @offset: current progress value.  On success this is equal to len.
+#     On failure this is less than len
 #
 # @speed: rate limit, bytes per second
 #
-# Note: The "ready to complete" status is always reset by a @BLOCK_JOB_ERROR
-#       event
+# Note: The "ready to complete" status is always reset by a
+#     @BLOCK_JOB_ERROR event
 #
 # Since: 1.3
 #
 #
 # <- { "event": "BLOCK_JOB_READY",
 #      "data": { "device": "drive0", "type": "mirror", "speed": 0,
-#                "len": 2097152, "offset": 2097152 }
+#                "len": 2097152, "offset": 2097152 },
 #      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
-#
 ##
 { 'event': 'BLOCK_JOB_READY',
   'data': { 'type'  : 'JobType',
 ##
 # @BLOCK_JOB_PENDING:
 #
-# Emitted when a block job is awaiting explicit authorization to finalize graph
-# changes via @block-job-finalize. If this job is part of a transaction, it will
-# not emit this event until the transaction has converged first.
+# Emitted when a block job is awaiting explicit authorization to
+# finalize graph changes via @block-job-finalize.  If this job is part
+# of a transaction, it will not emit this event until the transaction
+# has converged first.
 #
 # @type: job type
 #
 #
 # Example:
 #
-# <- { "event": "BLOCK_JOB_WAITING",
-#      "data": { "device": "drive0", "type": "mirror" },
+# <- { "event": "BLOCK_JOB_PENDING",
+#      "data": { "type": "mirror", "id": "backup_1" },
 #      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
-#
 ##
 { 'event': 'BLOCK_JOB_PENDING',
   'data': { 'type'  : 'JobType',
 # Preallocation mode of QEMU image file
 #
 # @off: no preallocation
+#
 # @metadata: preallocate only for metadata
+#
 # @falloc: like @full preallocation but allocate disk space by
-#          posix_fallocate() rather than writing data.
+#     posix_fallocate() rather than writing data.
+#
 # @full: preallocate all data by writing it to the device to ensure
-#        disk space is really available. This data may or may not be
-#        zero, depending on the image format and storage.
-#        @full preallocation also sets up metadata correctly.
+#     disk space is really available.  This data may or may not be
+#     zero, depending on the image format and storage.  @full
+#     preallocation also sets up metadata correctly.
 #
 # Since: 2.2
 ##
 # @BLOCK_WRITE_THRESHOLD:
 #
 # Emitted when writes on block device reaches or exceeds the
-# configured write threshold. For thin-provisioned devices, this
-# means the device should be extended to avoid pausing for
-# disk exhaustion.
-# The event is one shot. Once triggered, it needs to be
+# configured write threshold.  For thin-provisioned devices, this
+# means the device should be extended to avoid pausing for disk
+# exhaustion.  The event is one shot.  Once triggered, it needs to be
 # re-registered with another block-set-write-threshold command.
 #
 # @node-name: graph node name on which the threshold was exceeded.
 #
-# @amount-exceeded: amount of data which exceeded the threshold, in bytes.
+# @amount-exceeded: amount of data which exceeded the threshold, in
+#     bytes.
 #
 # @write-threshold: last configured threshold, in bytes.
 #
 ##
 # @block-set-write-threshold:
 #
-# Change the write threshold for a block drive. An event will be
+# Change the write threshold for a block drive.  An event will be
 # delivered if a write to this block drive crosses the configured
-# threshold.  The threshold is an offset, thus must be
-# non-negative. Default is no write threshold. Setting the threshold
-# to zero disables it.
+# threshold.  The threshold is an offset, thus must be non-negative.
+# Default is no write threshold.  Setting the threshold to zero
+# disables it.
 #
-# This is useful to transparently resize thin-provisioned drives without
-# the guest OS noticing.
+# This is useful to transparently resize thin-provisioned drives
+# without the guest OS noticing.
 #
 # @node-name: graph node name on which the threshold must be set.
 #
 # @write-threshold: configured threshold for the block device, bytes.
-#                   Use 0 to disable the threshold.
+#     Use 0 to disable the threshold.
 #
 # Since: 2.3
 #
 #      "arguments": { "node-name": "mydev",
 #                     "write-threshold": 17179869184 } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'block-set-write-threshold',
-  'data': { 'node-name': 'str', 'write-threshold': 'uint64' } }
+  'data': { 'node-name': 'str', 'write-threshold': 'uint64' },
+  'allow-preconfig': true }
 
 ##
 # @x-blockdev-change:
 #
-# Dynamically reconfigure the block driver state graph. It can be used
-# to add, remove, insert or replace a graph node. Currently only the
-# Quorum driver implements this feature to add or remove its child. This
-# is useful to fix a broken quorum child.
+# Dynamically reconfigure the block driver state graph.  It can be
+# used to add, remove, insert or replace a graph node.  Currently only
+# the Quorum driver implements this feature to add or remove its
+# child.  This is useful to fix a broken quorum child.
 #
-# If @node is specified, it will be inserted under @parent. @child
-# may not be specified in this case. If both @parent and @child are
+# If @node is specified, it will be inserted under @parent.  @child
+# may not be specified in this case.  If both @parent and @child are
 # specified but @node is not, @child will be detached from @parent.
 #
 # @parent: the id or name of the parent node.
 #
 # @node: the name of the node that will be added.
 #
-# Note: this command is experimental, and its API is not stable. It
-#       does not support all kinds of operations, all kinds of children, nor
-#       all block drivers.
+# Features:
 #
-#       FIXME Removing children from a quorum node means introducing gaps in the
-#       child indices. This cannot be represented in the 'children' list of
-#       BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename().
+# @unstable: This command is experimental, and its API is not stable.
+#     It does not support all kinds of operations, all kinds of
+#     children, nor all block drivers.
 #
-#       Warning: The data in a new quorum child MUST be consistent with that of
-#       the rest of the array.
+#     FIXME Removing children from a quorum node means introducing
+#     gaps in the child indices.  This cannot be represented in the
+#     'children' list of BlockdevOptionsQuorum, as returned by
+#     .bdrv_refresh_filename().
+#
+#     Warning: The data in a new quorum child MUST be consistent with
+#     that of the rest of the array.
 #
 # Since: 2.7
 #
-# Example:
+# Examples:
 #
 # 1. Add a new node to a quorum
+#
 # -> { "execute": "blockdev-add",
 #      "arguments": {
 #          "driver": "raw",
 # <- { "return": {} }
 #
 # 2. Delete a quorum's node
+#
 # -> { "execute": "x-blockdev-change",
 #      "arguments": { "parent": "disk1",
 #                     "child": "children.1" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'x-blockdev-change',
   'data' : { 'parent': 'str',
              '*child': 'str',
-             '*node': 'str' } }
+             '*node': 'str' },
+  'features': [ 'unstable' ],
+  'allow-preconfig': true }
 
 ##
 # @x-blockdev-set-iothread:
 #
-# Move @node and its children into the @iothread.  If @iothread is null then
-# move @node and its children into the main loop.
+# Move @node and its children into the @iothread.  If @iothread is
+# null then move @node and its children into the main loop.
 #
 # The node must not be attached to a BlockBackend.
 #
 #
 # @iothread: the name of the IOThread object or null for the main loop
 #
-# @force: true if the node and its children should be moved when a BlockBackend
-#         is already attached
+# @force: true if the node and its children should be moved when a
+#     BlockBackend is already attached
 #
-# Note: this command is experimental and intended for test cases that need
-#       control over IOThreads only.
+# Features:
+#
+# @unstable: This command is experimental and intended for test cases
+#     that need control over IOThreads only.
 #
 # Since: 2.12
 #
-# Example:
+# Examples:
 #
 # 1. Move a node into an IOThread
+#
 # -> { "execute": "x-blockdev-set-iothread",
 #      "arguments": { "node-name": "disk1",
 #                     "iothread": "iothread0" } }
 # <- { "return": {} }
 #
 # 2. Move a node into the main loop
+#
 # -> { "execute": "x-blockdev-set-iothread",
 #      "arguments": { "node-name": "disk1",
 #                     "iothread": null } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'x-blockdev-set-iothread',
   'data' : { 'node-name': 'str',
              'iothread': 'StrOrNull',
-             '*force': 'bool' } }
+             '*force': 'bool' },
+  'features': [ 'unstable' ],
+  'allow-preconfig': true }
 
 ##
 # @QuorumOpType:
 # <- { "event": "QUORUM_FAILURE",
 #      "data": { "reference": "usr1", "sector-num": 345435, "sectors-count": 5 },
 #      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
-#
 ##
 { 'event': 'QUORUM_FAILURE',
   'data': { 'reference': 'str', 'sector-num': 'int', 'sectors-count': 'int' } }
 #
 # @type: quorum operation type (Since 2.6)
 #
-# @error: error message. Only present on failure. This field
-#         contains a human-readable error message. There are no semantics other
-#         than that the block layer reported an error and clients should not
-#         try to interpret the error string.
+# @error: error message.  Only present on failure.  This field
+#     contains a human-readable error message.  There are no semantics
+#     other than that the block layer reported an error and clients
+#     should not try to interpret the error string.
 #
 # @node-name: the graph node name of the block driver state
 #
 #
 # Since: 2.0
 #
-# Example:
+# Examples:
 #
 # 1. Read operation
 #
-# { "event": "QUORUM_REPORT_BAD",
+# <- { "event": "QUORUM_REPORT_BAD",
 #      "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5,
 #                "type": "read" },
 #      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
 #
 # 2. Flush operation
 #
-# { "event": "QUORUM_REPORT_BAD",
+# <- { "event": "QUORUM_REPORT_BAD",
 #      "data": { "node-name": "node0", "sector-num": 0, "sectors-count": 2097120,
 #                "type": "flush", "error": "Broken pipe" },
 #      "timestamp": { "seconds": 1456406829, "microseconds": 291763 } }
-#
 ##
 { 'event': 'QUORUM_REPORT_BAD',
   'data': { 'type': 'QuorumOpType', '*error': 'str', 'node-name': 'str',
 ##
 # @BlockdevSnapshotInternal:
 #
-# @device: the device name or node-name of a root node to generate the snapshot
-#          from
+# @device: the device name or node-name of a root node to generate the
+#     snapshot from
 #
 # @name: the name of the internal snapshot to be created
 #
-# Notes: In transaction, if @name is empty, or any snapshot matching @name
-#        exists, the operation will fail. Only some image formats support it,
-#        for example, qcow2, rbd, and sheepdog.
+# Notes: In transaction, if @name is empty, or any snapshot matching
+#     @name exists, the operation will fail.  Only some image formats
+#     support it, for example, qcow2, and rbd.
 #
 # Since: 1.7
 ##
 # @blockdev-snapshot-internal-sync:
 #
 # Synchronously take an internal snapshot of a block device, when the
-# format of the image used supports it. If the name is an empty
+# format of the image used supports it.  If the name is an empty
 # string, or a snapshot with name already exists, the operation will
 # fail.
 #
-# For the arguments, see the documentation of BlockdevSnapshotInternal.
+# For the arguments, see the documentation of
+# BlockdevSnapshotInternal.
 #
-# Returns: - nothing on success
-#          - If @device is not a valid block device, GenericError
-#          - If any snapshot matching @name exists, or @name is empty,
-#            GenericError
-#          - If the format of the image used does not support it,
-#            BlockFormatFeatureNotSupported
+# Returns:
+#     - nothing on success
+#     - If @device is not a valid block device, GenericError
+#     - If any snapshot matching @name exists, or @name is empty,
+#       GenericError
+#     - If the format of the image used does not support it,
+#       GenericError
 #
 # Since: 1.7
 #
 #                     "name": "snapshot0" }
 #    }
 # <- { "return": {} }
-#
 ##
 { 'command': 'blockdev-snapshot-internal-sync',
-  'data': 'BlockdevSnapshotInternal' }
+  'data': 'BlockdevSnapshotInternal',
+  'allow-preconfig': true }
 
 ##
 # @blockdev-snapshot-delete-internal-sync:
 #
-# Synchronously delete an internal snapshot of a block device, when the format
-# of the image used support it. The snapshot is identified by name or id or
-# both. One of the name or id is required. Return SnapshotInfo for the
-# successfully deleted snapshot.
+# Synchronously delete an internal snapshot of a block device, when
+# the format of the image used support it.  The snapshot is identified
+# by name or id or both.  One of the name or id is required.  Return
+# SnapshotInfo for the successfully deleted snapshot.
 #
-# @device: the device name or node-name of a root node to delete the snapshot
-#          from
+# @device: the device name or node-name of a root node to delete the
+#     snapshot from
 #
 # @id: optional the snapshot's ID to be deleted
 #
 # @name: optional the snapshot's name to be deleted
 #
-# Returns: - SnapshotInfo on success
-#          - If @device is not a valid block device, GenericError
-#          - If snapshot not found, GenericError
-#          - If the format of the image used does not support it,
-#            BlockFormatFeatureNotSupported
-#          - If @id and @name are both not specified, GenericError
+# Returns:
+#     - SnapshotInfo on success
+#     - If @device is not a valid block device, GenericError
+#     - If snapshot not found, GenericError
+#     - If the format of the image used does not support it,
+#       GenericError
+#     - If @id and @name are both not specified, GenericError
 #
 # Since: 1.7
 #
 #                    "icount": 220414
 #      }
 #    }
-#
 ##
 { 'command': 'blockdev-snapshot-delete-internal-sync',
   'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
-  'returns': 'SnapshotInfo' }
+  'returns': 'SnapshotInfo',
+  'allow-preconfig': true }
+
+##
+# @DummyBlockCoreForceArrays:
+#
+# Not used by QMP; hack to let us use BlockGraphInfoList internally
+#
+# Since: 8.0
+##
+{ 'struct': 'DummyBlockCoreForceArrays',
+  'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } }
index a9f488f99c1acf8f3accaa0048ebe9d599afb142..7874a49ba71320a849052ffd8ea335722fff83fa 100644 (file)
@@ -6,22 +6,29 @@
 ##
 
 { 'include': 'sockets.json' }
+{ 'include': 'block-core.json' }
 
 ##
 # @NbdServerOptions:
 #
-# Keep this type consistent with the nbd-server-start arguments. The only
-# intended difference is using SocketAddress instead of SocketAddressLegacy.
+# Keep this type consistent with the nbd-server-start arguments.  The
+# only intended difference is using SocketAddress instead of
+# SocketAddressLegacy.
 #
 # @addr: Address on which to listen.
+#
 # @tls-creds: ID of the TLS credentials object (since 2.6).
+#
 # @tls-authz: ID of the QAuthZ authorization object used to validate
-#             the client's x509 distinguished name. This object is
-#             is only resolved at time of use, so can be deleted and
-#             recreated on the fly while the NBD server is active.
-#             If missing, it will default to denying access (since 4.0).
-# @max-connections: The maximum number of connections to allow at the same
-#                   time, 0 for unlimited. (since 5.2; default: 0)
+#     the client's x509 distinguished name.  This object is is only
+#     resolved at time of use, so can be deleted and recreated on the
+#     fly while the NBD server is active.  If missing, it will default
+#     to denying access (since 4.0).
+#
+# @max-connections: The maximum number of connections to allow at the
+#     same time, 0 for unlimited.  Setting this to 1 also stops the
+#     server from advertising multiple client support (since 5.2;
+#     default: 0)
 #
 # Since: 4.2
 ##
 # @nbd-server-start:
 #
 # Start an NBD server listening on the given host and port.  Block
-# devices can then be exported using @nbd-server-add.  The NBD
-# server will present them as named exports; for example, another
-# QEMU instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
+# devices can then be exported using @nbd-server-add.  The NBD server
+# will present them as named exports; for example, another QEMU
+# instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
 #
-# Keep this type consistent with the NbdServerOptions type. The only intended
-# difference is using SocketAddressLegacy instead of SocketAddress.
+# Keep this type consistent with the NbdServerOptions type.  The only
+# intended difference is using SocketAddressLegacy instead of
+# SocketAddress.
 #
 # @addr: Address on which to listen.
+#
 # @tls-creds: ID of the TLS credentials object (since 2.6).
+#
 # @tls-authz: ID of the QAuthZ authorization object used to validate
-#             the client's x509 distinguished name. This object is
-#             is only resolved at time of use, so can be deleted and
-#             recreated on the fly while the NBD server is active.
-#             If missing, it will default to denying access (since 4.0).
-# @max-connections: The maximum number of connections to allow at the same
-#                   time, 0 for unlimited. (since 5.2; default: 0)
+#     the client's x509 distinguished name.  This object is is only
+#     resolved at time of use, so can be deleted and recreated on the
+#     fly while the NBD server is active.  If missing, it will default
+#     to denying access (since 4.0).
+#
+# @max-connections: The maximum number of connections to allow at the
+#     same time, 0 for unlimited.  Setting this to 1 also stops the
+#     server from advertising multiple client support (since 5.2;
+#     default: 0).
 #
 # Returns: error if the server is already running.
 #
-# Since: 1.3.0
+# Since: 1.3
 ##
 { 'command': 'nbd-server-start',
   'data': { 'addr': 'SocketAddressLegacy',
             '*tls-creds': 'str',
             '*tls-authz': 'str',
-            '*max-connections': 'uint32' } }
+            '*max-connections': 'uint32' },
+  'allow-preconfig': true }
 
 ##
 # @BlockExportOptionsNbdBase:
 #
-# An NBD block export (common options shared between nbd-server-add and
-# the NBD branch of block-export-add).
+# An NBD block export (common options shared between nbd-server-add
+# and the NBD branch of block-export-add).
 #
-# @name: Export name. If unspecified, the @device parameter is used as the
-#        export name. (Since 2.12)
+# @name: Export name.  If unspecified, the @device parameter is used
+#     as the export name.  (Since 2.12)
 #
 # @description: Free-form description of the export, up to 4096 bytes.
-#               (Since 5.0)
+#     (Since 5.0)
 #
 # Since: 5.0
 ##
 # block-export-add).
 #
 # @bitmaps: Also export each of the named dirty bitmaps reachable from
-#           @device, so the NBD client can use NBD_OPT_SET_META_CONTEXT with
-#           the metadata context name "qemu:dirty-bitmap:BITMAP" to inspect
-#           each bitmap.
+#     @device, so the NBD client can use NBD_OPT_SET_META_CONTEXT with
+#     the metadata context name "qemu:dirty-bitmap:BITMAP" to inspect
+#     each bitmap.  Since 7.1 bitmap may be specified by node/name
+#     pair.
 #
-# @allocation-depth: Also export the allocation depth map for @device, so
-#                    the NBD client can use NBD_OPT_SET_META_CONTEXT with
-#                    the metadata context name "qemu:allocation-depth" to
-#                    inspect allocation details. (since 5.2)
+# @allocation-depth: Also export the allocation depth map for @device,
+#     so the NBD client can use NBD_OPT_SET_META_CONTEXT with the
+#     metadata context name "qemu:allocation-depth" to inspect
+#     allocation details.  (since 5.2)
 #
 # Since: 5.2
 ##
 { 'struct': 'BlockExportOptionsNbd',
   'base': 'BlockExportOptionsNbdBase',
-  'data': { '*bitmaps': ['str'], '*allocation-depth': 'bool' } }
+  'data': { '*bitmaps': ['BlockDirtyBitmapOrStr'],
+            '*allocation-depth': 'bool' } }
 
 ##
 # @BlockExportOptionsVhostUserBlk:
 #
 # A vhost-user-blk block export.
 #
-# @addr: The vhost-user socket on which to listen. Both 'unix' and 'fd'
-#        SocketAddress types are supported. Passed fds must be UNIX domain
-#        sockets.
-# @logical-block-size: Logical block size in bytes. Defaults to 512 bytes.
-# @num-queues: Number of request virtqueues. Must be greater than 0. Defaults
-#              to 1.
+# @addr: The vhost-user socket on which to listen.  Both 'unix' and
+#     'fd' SocketAddress types are supported.  Passed fds must be UNIX
+#     domain sockets.
+#
+# @logical-block-size: Logical block size in bytes.  Defaults to 512
+#     bytes.
+#
+# @num-queues: Number of request virtqueues.  Must be greater than 0.
+#     Defaults to 1.
 #
 # Since: 5.2
 ##
            '*logical-block-size': 'size',
             '*num-queues': 'uint16'} }
 
+##
+# @FuseExportAllowOther:
+#
+# Possible allow_other modes for FUSE exports.
+#
+# @off: Do not pass allow_other as a mount option.
+#
+# @on: Pass allow_other as a mount option.
+#
+# @auto: Try mounting with allow_other first, and if that fails, retry
+#     without allow_other.
+#
+# Since: 6.1
+##
+{ 'enum': 'FuseExportAllowOther',
+  'data': ['off', 'on', 'auto'] }
+
+##
+# @BlockExportOptionsFuse:
+#
+# Options for exporting a block graph node on some (file) mountpoint
+# as a raw image.
+#
+# @mountpoint: Path on which to export the block device via FUSE. This
+#     must point to an existing regular file.
+#
+# @growable: Whether writes beyond the EOF should grow the block node
+#     accordingly.  (default: false)
+#
+# @allow-other: If this is off, only qemu's user is allowed access to
+#     this export.  That cannot be changed even with chmod or chown.
+#     Enabling this option will allow other users access to the export
+#     with the FUSE mount option "allow_other". Note that using
+#     allow_other as a non-root user requires user_allow_other to be
+#     enabled in the global fuse.conf configuration file.  In auto
+#     mode (the default), the FUSE export driver will first attempt to
+#     mount the export with allow_other, and if that fails, try again
+#     without.  (since 6.1; default: auto)
+#
+# Since: 6.0
+##
+{ 'struct': 'BlockExportOptionsFuse',
+  'data': { 'mountpoint': 'str',
+            '*growable': 'bool',
+            '*allow-other': 'FuseExportAllowOther' },
+  'if': 'CONFIG_FUSE' }
+
+##
+# @BlockExportOptionsVduseBlk:
+#
+# A vduse-blk block export.
+#
+# @name: the name of VDUSE device (must be unique across the host).
+#
+# @num-queues: the number of virtqueues.  Defaults to 1.
+#
+# @queue-size: the size of virtqueue.  Defaults to 256.
+#
+# @logical-block-size: Logical block size in bytes.  Range [512,
+#     PAGE_SIZE] and must be power of 2. Defaults to 512 bytes.
+#
+# @serial: the serial number of virtio block device.  Defaults to
+#     empty string.
+#
+# Since: 7.1
+##
+{ 'struct': 'BlockExportOptionsVduseBlk',
+  'data': { 'name': 'str',
+            '*num-queues': 'uint16',
+            '*queue-size': 'uint16',
+            '*logical-block-size': 'size',
+            '*serial': 'str' } }
+
 ##
 # @NbdServerAddOptions:
 #
 #
 # @device: The device name or node name of the node to be exported
 #
-# @writable: Whether clients should be able to write to the device via the
-#            NBD connection (default false).
+# @writable: Whether clients should be able to write to the device via
+#     the NBD connection (default false).
 #
-# @bitmap: Also export a single dirty bitmap reachable from @device, so the
-#          NBD client can use NBD_OPT_SET_META_CONTEXT with the metadata
-#          context name "qemu:dirty-bitmap:BITMAP" to inspect the bitmap
-#          (since 4.0).
+# @bitmap: Also export a single dirty bitmap reachable from @device,
+#     so the NBD client can use NBD_OPT_SET_META_CONTEXT with the
+#     metadata context name "qemu:dirty-bitmap:BITMAP" to inspect the
+#     bitmap (since 4.0).
 #
 # Since: 5.0
 ##
 #
 # Export a block node to QEMU's embedded NBD server.
 #
-# The export name will be used as the id for the resulting block export.
+# The export name will be used as the id for the resulting block
+# export.
 #
 # Features:
-# @deprecated: This command is deprecated. Use @block-export-add instead.
 #
-# Returns: error if the server is not running, or export with the same name
-#          already exists.
+# @deprecated: This command is deprecated.  Use @block-export-add
+#     instead.
 #
-# Since: 1.3.0
+# Returns: error if the server is not running, or export with the same
+#     name already exists.
+#
+# Since: 1.3
 ##
 { 'command': 'nbd-server-add',
-  'data': 'NbdServerAddOptions', 'boxed': true, 'features': ['deprecated'] }
+  'data': 'NbdServerAddOptions', 'boxed': true, 'features': ['deprecated'],
+  'allow-preconfig': true }
 
 ##
 # @BlockExportRemoveMode:
 #
 # Mode for removing a block export.
 #
-# @safe: Remove export if there are no existing connections, fail otherwise.
+# @safe: Remove export if there are no existing connections, fail
+#     otherwise.
 #
 # @hard: Drop all connections immediately and remove export.
 #
 # Potential additional modes to be added in the future:
 #
-# hide: Just hide export from new clients, leave existing connections as is.
-# Remove export after all clients are disconnected.
+# hide: Just hide export from new clients, leave existing connections
+# as is.  Remove export after all clients are disconnected.
 #
-# soft: Hide export from new clients, answer with ESHUTDOWN for all further
-# requests from existing clients.
+# soft: Hide export from new clients, answer with ESHUTDOWN for all
+# further requests from existing clients.
 #
 # Since: 2.12
 ##
 #
 # @name: Block export id.
 #
-# @mode: Mode of command operation. See @BlockExportRemoveMode description.
-#        Default is 'safe'.
+# @mode: Mode of command operation.  See @BlockExportRemoveMode
+#     description.  Default is 'safe'.
 #
 # Features:
-# @deprecated: This command is deprecated. Use @block-export-del instead.
+#
+# @deprecated: This command is deprecated.  Use @block-export-del
+#     instead.
 #
 # Returns: error if
-#            - the server is not running
-#            - export is not found
-#            - mode is 'safe' and there are existing connections
+#
+#     - the server is not running
+#     - export is not found
+#     - mode is 'safe' and there are existing connections
 #
 # Since: 2.12
 ##
 { 'command': 'nbd-server-remove',
   'data': {'name': 'str', '*mode': 'BlockExportRemoveMode'},
-  'features': ['deprecated'] }
+  'features': ['deprecated'],
+  'allow-preconfig': true }
 
 ##
 # @nbd-server-stop:
 #
-# Stop QEMU's embedded NBD server, and unregister all devices previously
-# added via @nbd-server-add.
+# Stop QEMU's embedded NBD server, and unregister all devices
+# previously added via @nbd-server-add.
 #
-# Since: 1.3.0
+# Since: 1.3
 ##
-{ 'command': 'nbd-server-stop' }
+{ 'command': 'nbd-server-stop',
+  'allow-preconfig': true }
 
 ##
 # @BlockExportType:
 # An enumeration of block export types
 #
 # @nbd: NBD export
+#
 # @vhost-user-blk: vhost-user-blk export (since 5.2)
 #
+# @fuse: FUSE export (since: 6.0)
+#
+# @vduse-blk: vduse-blk export (since 7.1)
+#
 # Since: 4.2
 ##
 { 'enum': 'BlockExportType',
-  'data': [ 'nbd', 'vhost-user-blk' ] }
+  'data': [ 'nbd',
+            { 'name': 'vhost-user-blk',
+              'if': 'CONFIG_VHOST_USER_BLK_SERVER' },
+            { 'name': 'fuse', 'if': 'CONFIG_FUSE' },
+            { 'name': 'vduse-blk', 'if': 'CONFIG_VDUSE_BLK_EXPORT' } ] }
 
 ##
 # @BlockExportOptions:
 #
-# Describes a block export, i.e. how single node should be exported on an
-# external interface.
+# Describes a block export, i.e. how single node should be exported on
+# an external interface.
 #
-# @id: A unique identifier for the block export (across all export types)
+# @id: A unique identifier for the block export (across all export
+#     types)
 #
-# @node-name: The node name of the block node to be exported (since: 5.2)
+# @node-name: The node name of the block node to be exported
+#     (since: 5.2)
 #
 # @writable: True if clients should be able to write to the export
-#            (default false)
+#     (default false)
 #
-# @writethrough: If true, caches are flushed after every write request to the
-#                export before completion is signalled. (since: 5.2;
-#                default: false)
+# @writethrough: If true, caches are flushed after every write request
+#     to the export before completion is signalled.  (since: 5.2;
+#     default: false)
 #
-# @iothread: The name of the iothread object where the export will run. The
-#            default is to use the thread currently associated with the
-#            block node. (since: 5.2)
+# @iothread: The name of the iothread object where the export will
+#     run.  The default is to use the thread currently associated with
+#     the block node.  (since: 5.2)
 #
-# @fixed-iothread: True prevents the block node from being moved to another
-#                  thread while the export is active. If true and @iothread is
-#                  given, export creation fails if the block node cannot be
-#                  moved to the iothread. The default is false. (since: 5.2)
+# @fixed-iothread: True prevents the block node from being moved to
+#     another thread while the export is active.  If true and
+#     @iothread is given, export creation fails if the block node
+#     cannot be moved to the iothread.  The default is false.
+#     (since: 5.2)
 #
 # Since: 4.2
 ##
   'discriminator': 'type',
   'data': {
       'nbd': 'BlockExportOptionsNbd',
-      'vhost-user-blk': 'BlockExportOptionsVhostUserBlk'
+      'vhost-user-blk': { 'type': 'BlockExportOptionsVhostUserBlk',
+                          'if': 'CONFIG_VHOST_USER_BLK_SERVER' },
+      'fuse': { 'type': 'BlockExportOptionsFuse',
+                'if': 'CONFIG_FUSE' },
+      'vduse-blk': { 'type': 'BlockExportOptionsVduseBlk',
+                     'if': 'CONFIG_VDUSE_BLK_EXPORT' }
    } }
 
 ##
 # Since: 5.2
 ##
 { 'command': 'block-export-add',
-  'data': 'BlockExportOptions', 'boxed': true }
+  'data': 'BlockExportOptions', 'boxed': true,
+  'allow-preconfig': true }
 
 ##
 # @block-export-del:
 #
-# Request to remove a block export. This drops the user's reference to the
-# export, but the export may still stay around after this command returns until
-# the shutdown of the export has completed.
+# Request to remove a block export.  This drops the user's reference
+# to the export, but the export may still stay around after this
+# command returns until the shutdown of the export has completed.
 #
 # @id: Block export id.
 #
-# @mode: Mode of command operation. See @BlockExportRemoveMode description.
-#        Default is 'safe'.
+# @mode: Mode of command operation.  See @BlockExportRemoveMode
+#     description.  Default is 'safe'.
 #
-# Returns: Error if the export is not found or @mode is 'safe' and the export
-#          is still in use (e.g. by existing client connections)
+# Returns: Error if the export is not found or @mode is 'safe' and the
+#     export is still in use (e.g. by existing client connections)
 #
 # Since: 5.2
 ##
 { 'command': 'block-export-del',
-  'data': { 'id': 'str', '*mode': 'BlockExportRemoveMode' } }
+  'data': { 'id': 'str', '*mode': 'BlockExportRemoveMode' },
+  'allow-preconfig': true }
 
 ##
 # @BLOCK_EXPORT_DELETED:
 # @node-name: The node name of the block node that is exported
 #
 # @shutting-down: True if the export is shutting down (e.g. after a
-#                 block-export-del command, but before the shutdown has
-#                 completed)
+#     block-export-del command, but before the shutdown has completed)
 #
-# Since:  5.2
+# Since: 5.2
 ##
 { 'struct': 'BlockExportInfo',
   'data': { 'id': 'str',
 #
 # Since: 5.2
 ##
-{ 'command': 'query-block-exports', 'returns': ['BlockExportInfo'] }
+{ 'command': 'query-block-exports', 'returns': ['BlockExportInfo'],
+  'allow-preconfig': true }
index a009f7d3a2de7b129839cdcd84bb67c336c7660f..998008cfa8fe4792837dde83fa0f4596d368782c 100644 (file)
 # translate logical CHS to physical; instead, they will use logical
 # block addressing.
 #
-# @auto: If cylinder/heads/sizes are passed, choose between none and LBA
-#        depending on the size of the disk.  If they are not passed,
-#        choose none if QEMU can guess that the disk had 16 or fewer
-#        heads, large if QEMU can guess that the disk had 131072 or
-#        fewer tracks across all heads (i.e. cylinders*heads<131072),
-#        otherwise LBA.
+# @auto: If cylinder/heads/sizes are passed, choose between none and
+#     LBA depending on the size of the disk.  If they are not passed,
+#     choose none if QEMU can guess that the disk had 16 or fewer
+#     heads, large if QEMU can guess that the disk had 131072 or fewer
+#     tracks across all heads (i.e. cylinders*heads<131072), otherwise
+#     LBA.
 #
 # @none: The physical disk geometry is equal to the logical geometry.
 #
 # @lba: Assume 63 sectors per track and one of 16, 32, 64, 128 or 255
-#       heads (if fewer than 255 are enough to cover the whole disk
-#       with 1024 cylinders/head).  The number of cylinders/head is
-#       then computed based on the number of sectors and heads.
+#     heads (if fewer than 255 are enough to cover the whole disk with
+#     1024 cylinders/head).  The number of cylinders/head is then
+#     computed based on the number of sectors and heads.
 #
-# @large: The number of cylinders per head is scaled down to 1024
-#         by correspondingly scaling up the number of heads.
+# @large: The number of cylinders per head is scaled down to 1024 by
+#     correspondingly scaling up the number of heads.
 #
 # @rechs: Same as @large, but first convert a 16-head geometry to
-#         15-head, by proportionally scaling up the number of
-#         cylinders/head.
+#     15-head, by proportionally scaling up the number of
+#     cylinders/head.
 #
 # Since: 2.0
 ##
 #
 # Type of Floppy drive to be emulated by the Floppy Disk Controller.
 #
-# @144:  1.44MB 3.5" drive
-# @288:  2.88MB 3.5" drive
-# @120:  1.2MB 5.25" drive
+# @144: 1.44MB 3.5" drive
+#
+# @288: 2.88MB 3.5" drive
+#
+# @120: 1.2MB 5.25" drive
+#
 # @none: No drive connected
+#
 # @auto: Automatically determined by inserted media at boot
 #
 # Since: 2.6
@@ -68,8 +72,8 @@
 #
 # @id: the identifier of the persistent reservation manager
 #
-# @connected: true if the persistent reservation manager is connected to
-#             the underlying storage or helper
+# @connected: true if the persistent reservation manager is connected
+#     to the underlying storage or helper
 #
 # Since: 3.0
 ##
 ##
 # @query-pr-managers:
 #
-# Returns a list of information about each persistent reservation manager.
+# Returns a list of information about each persistent reservation
+# manager.
 #
-# Returns: a list of @PRManagerInfo for each persistent reservation manager
+# Returns: a list of @PRManagerInfo for each persistent reservation
+#     manager
 #
 # Since: 3.0
 ##
 # @id: The name or QOM path of the guest device (since: 2.8)
 #
 # @force: If true, eject regardless of whether the drive is locked.
-#         If not specified, the default value is false.
+#     If not specified, the default value is false.
 #
 # Features:
+#
 # @deprecated: Member @device is deprecated.  Use @id instead.
 #
-# Returns: - Nothing on success
-#          - If @device is not a valid block device, DeviceNotFound
-# Notes:    Ejecting a device with no media results in success
+# Returns:
+#     - Nothing on success
+#     - If @device is not a valid block device, DeviceNotFound
+#
+# Notes: Ejecting a device with no media results in success
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 ##
 # @blockdev-open-tray:
 #
-# Opens a block device's tray. If there is a block driver state tree inserted as
-# a medium, it will become inaccessible to the guest (but it will remain
-# associated to the block device, so closing the tray will make it accessible
-# again).
+# Opens a block device's tray.  If there is a block driver state tree
+# inserted as a medium, it will become inaccessible to the guest (but
+# it will remain associated to the block device, so closing the tray
+# will make it accessible again).
 #
 # If the tray was already open before, this will be a no-op.
 #
-# Once the tray opens, a DEVICE_TRAY_MOVED event is emitted. There are cases in
-# which no such event will be generated, these include:
+# Once the tray opens, a DEVICE_TRAY_MOVED event is emitted.  There
+# are cases in which no such event will be generated, these include:
 #
-# - if the guest has locked the tray, @force is false and the guest does not
-#   respond to the eject request
-# - if the BlockBackend denoted by @device does not have a guest device attached
-#   to it
+# - if the guest has locked the tray, @force is false and the guest
+#   does not respond to the eject request
+# - if the BlockBackend denoted by @device does not have a guest
+#   device attached to it
 # - if the guest device does not have an actual tray
 #
 # @device: Block device name
 #
 # @id: The name or QOM path of the guest device (since: 2.8)
 #
-# @force: if false (the default), an eject request will be sent to
-#         the guest if it has locked the tray (and the tray will not be opened
-#         immediately); if true, the tray will be opened regardless of whether
-#         it is locked
+# @force: if false (the default), an eject request will be sent to the
+#     guest if it has locked the tray (and the tray will not be opened
+#     immediately); if true, the tray will be opened regardless of
+#     whether it is locked
 #
 # Features:
+#
 # @deprecated: Member @device is deprecated.  Use @id instead.
 #
 # Since: 2.5
 #                "tray-open": true } }
 #
 # <- { "return": {} }
-#
 ##
 { 'command': 'blockdev-open-tray',
   'data': { '*device': { 'type': 'str', 'features': [ 'deprecated' ] },
 ##
 # @blockdev-close-tray:
 #
-# Closes a block device's tray. If there is a block driver state tree associated
-# with the block device (which is currently ejected), that tree will be loaded
-# as the medium.
+# Closes a block device's tray.  If there is a block driver state tree
+# associated with the block device (which is currently ejected), that
+# tree will be loaded as the medium.
 #
 # If the tray was already closed before, this will be a no-op.
 #
 # @id: The name or QOM path of the guest device (since: 2.8)
 #
 # Features:
+#
 # @deprecated: Member @device is deprecated.  Use @id instead.
 #
 # Since: 2.5
 #                "tray-open": false } }
 #
 # <- { "return": {} }
-#
 ##
 { 'command': 'blockdev-close-tray',
   'data': { '*device': { 'type': 'str', 'features': [ 'deprecated' ] },
 ##
 # @blockdev-remove-medium:
 #
-# Removes a medium (a block driver state tree) from a block device. That block
-# device's tray must currently be open (unless there is no attached guest
-# device).
+# Removes a medium (a block driver state tree) from a block device.
+# That block device's tray must currently be open (unless there is no
+# attached guest device).
 #
-# If the tray is open and there is no medium inserted, this will be a no-op.
+# If the tray is open and there is no medium inserted, this will be a
+# no-op.
 #
 # @id: The name or QOM path of the guest device
 #
 #      "arguments": { "id": "ide0-1-0" } }
 #
 # <- { "return": {} }
-#
 ##
 { 'command': 'blockdev-remove-medium',
   'data': { 'id': 'str' } }
 ##
 # @blockdev-insert-medium:
 #
-# Inserts a medium (a block driver state tree) into a block device. That block
-# device's tray must currently be open (unless there is no attached guest
-# device) and there must be no medium inserted already.
+# Inserts a medium (a block driver state tree) into a block device.
+# That block device's tray must currently be open (unless there is no
+# attached guest device) and there must be no medium inserted already.
 #
 # @id: The name or QOM path of the guest device
 #
 #                     "node-name": "node0" } }
 #
 # <- { "return": {} }
-#
 ##
 { 'command': 'blockdev-insert-medium',
   'data': { 'id': 'str',
             'node-name': 'str'} }
 
-
 ##
 # @BlockdevChangeReadOnlyMode:
 #
 # @read-write: Makes the device writable
 #
 # Since: 2.3
-#
 ##
 { 'enum': 'BlockdevChangeReadOnlyMode',
   'data': ['retain', 'read-only', 'read-write'] }
 
-
 ##
 # @blockdev-change-medium:
 #
-# Changes the medium inserted into a block device by ejecting the current medium
-# and loading a new image file which is inserted as the new medium (this command
-# combines blockdev-open-tray, blockdev-remove-medium, blockdev-insert-medium
-# and blockdev-close-tray).
+# Changes the medium inserted into a block device by ejecting the
+# current medium and loading a new image file which is inserted as the
+# new medium (this command combines blockdev-open-tray,
+# blockdev-remove-medium, blockdev-insert-medium and
+# blockdev-close-tray).
 #
 # @device: Block device name
 #
-# @id: The name or QOM path of the guest device
-#      (since: 2.8)
+# @id: The name or QOM path of the guest device (since: 2.8)
 #
 # @filename: filename of the new image to be loaded
 #
-# @format: format to open the new image with (defaults to
-#          the probed format)
+# @format: format to open the new image with (defaults to the probed
+#     format)
 #
 # @read-only-mode: change the read-only mode of the device; defaults
-#                  to 'retain'
+#     to 'retain'
+#
+# @force: if false (the default), an eject request through
+#     blockdev-open-tray will be sent to the guest if it has locked
+#     the tray (and the tray will not be opened immediately); if true,
+#     the tray will be opened regardless of whether it is locked.
+#     (since 7.1)
 #
 # Features:
+#
 # @deprecated: Member @device is deprecated.  Use @id instead.
 #
 # Since: 2.5
 #                     "read-only-mode": "read-only" } }
 #
 # <- { "return": {} }
-#
 ##
 { 'command': 'blockdev-change-medium',
   'data': { '*device': { 'type': 'str', 'features': [ 'deprecated' ] },
             '*id': 'str',
             'filename': 'str',
             '*format': 'str',
+            '*force': 'bool',
             '*read-only-mode': 'BlockdevChangeReadOnlyMode' } }
 
-
 ##
 # @DEVICE_TRAY_MOVED:
 #
-# Emitted whenever the tray of a removable device is moved by the guest or by
-# HMP/QMP commands
+# Emitted whenever the tray of a removable device is moved by the
+# guest or by HMP/QMP commands
 #
-# @device: Block device name. This is always present for compatibility
-#          reasons, but it can be empty ("") if the image does not
-#          have a device name associated.
+# @device: Block device name.  This is always present for
+#     compatibility reasons, but it can be empty ("") if the image
+#     does not have a device name associated.
 #
 # @id: The name or QOM path of the guest device (since 2.8)
 #
-# @tray-open: true if the tray has been opened or false if it has been closed
+# @tray-open: true if the tray has been opened or false if it has been
+#     closed
 #
 # Since: 1.1
 #
 #                "tray-open": true
 #      },
 #      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
-#
 ##
 { 'event': 'DEVICE_TRAY_MOVED',
   'data': { 'device': 'str', 'id': 'str', 'tray-open': 'bool' } }
 #                "connected": true
 #      },
 #      "timestamp": { "seconds": 1519840375, "microseconds": 450486 } }
-#
 ##
 { 'event': 'PR_MANAGER_STATUS_CHANGED',
   'data': { 'id': 'str', 'connected': 'bool' } }
 #
 # If two or more devices are members of the same group, the limits
 # will apply to the combined I/O of the whole group in a round-robin
-# fashion. Therefore, setting new I/O limits to a device will affect
+# fashion.  Therefore, setting new I/O limits to a device will affect
 # the whole group.
 #
 # The name of the group can be specified using the 'group' parameter.
 # If the parameter is unset, it is assumed to be the current group of
-# that device. If it's not in any group yet, the name of the device
+# that device.  If it's not in any group yet, the name of the device
 # will be used as the name for its group.
 #
 # The 'group' parameter can also be used to move a device to a
-# different group. In this case the limits specified in the parameters
-# will be applied to the new group only.
+# different group.  In this case the limits specified in the
+# parameters will be applied to the new group only.
 #
 # I/O limits can be disabled by setting all of them to 0. In this case
 # the device will be removed from its group and the rest of its
-# members will not be affected. The 'group' parameter is ignored.
+# members will not be affected.  The 'group' parameter is ignored.
 #
-# Returns: - Nothing on success
-#          - If @device is not a valid block device, DeviceNotFound
+# Returns:
+#     - Nothing on success
+#     - If @device is not a valid block device, DeviceNotFound
 #
 # Since: 1.1
 #
-# Example:
+# Examples:
 #
 # -> { "execute": "block_set_io_throttle",
 #      "arguments": { "id": "virtio-blk-pci0/virtio-backend",
 # <- { "return": {} }
 ##
 { 'command': 'block_set_io_throttle', 'boxed': true,
-  'data': 'BlockIOThrottle' }
+  'data': 'BlockIOThrottle',
+  'allow-preconfig': true }
 
 ##
 # @block-latency-histogram-set:
 #
 # Manage read, write and flush latency histograms for the device.
 #
-# If only @id parameter is specified, remove all present latency histograms
-# for the device. Otherwise, add/reset some of (or all) latency histograms.
+# If only @id parameter is specified, remove all present latency
+# histograms for the device.  Otherwise, add/reset some of (or all)
+# latency histograms.
 #
 # @id: The name or QOM path of the guest device.
 #
 # @boundaries: list of interval boundary values (see description in
-#              BlockLatencyHistogramInfo definition). If specified, all
-#              latency histograms are removed, and empty ones created for all
-#              io types with intervals corresponding to @boundaries (except for
-#              io types, for which specific boundaries are set through the
-#              following parameters).
+#     BlockLatencyHistogramInfo definition). If specified, all latency
+#     histograms are removed, and empty ones created for all io types
+#     with intervals corresponding to @boundaries (except for io
+#     types, for which specific boundaries are set through the
+#     following parameters).
 #
 # @boundaries-read: list of interval boundary values for read latency
-#                   histogram. If specified, old read latency histogram is
-#                   removed, and empty one created with intervals
-#                   corresponding to @boundaries-read. The parameter has higher
-#                   priority then @boundaries.
+#     histogram.  If specified, old read latency histogram is removed,
+#     and empty one created with intervals corresponding to
+#     @boundaries-read.  The parameter has higher priority then
+#     @boundaries.
 #
-# @boundaries-write: list of interval boundary values for write latency
-#                    histogram.
+# @boundaries-write: list of interval boundary values for write
+#     latency histogram.
 #
-# @boundaries-flush: list of interval boundary values for flush latency
-#                    histogram.
+# @boundaries-zap: list of interval boundary values for zone append
+#     write latency histogram.
 #
-# Returns: error if device is not found or any boundary arrays are invalid.
+# @boundaries-flush: list of interval boundary values for flush
+#     latency histogram.
+#
+# Returns: error if device is not found or any boundary arrays are
+#     invalid.
 #
 # Since: 4.0
 #
 # Example:
-# set new histograms for all io types with intervals
-# [0, 10), [10, 50), [50, 100), [100, +inf):
+#
+# Set new histograms for all io types with intervals [0, 10), [10,
+# 50), [50, 100), [100, +inf):
 #
 # -> { "execute": "block-latency-histogram-set",
 #      "arguments": { "id": "drive0",
 # <- { "return": {} }
 #
 # Example:
-# set new histogram only for write, other histograms will remain
-# not changed (or not created):
+#
+# Set new histogram only for write, other histograms will remain not
+# changed (or not created):
 #
 # -> { "execute": "block-latency-histogram-set",
 #      "arguments": { "id": "drive0",
 # <- { "return": {} }
 #
 # Example:
-# set new histograms with the following intervals:
-#   read, flush: [0, 10), [10, 50), [50, 100), [100, +inf)
-#   write: [0, 1000), [1000, 5000), [5000, +inf)
+#
+# Set new histograms with the following intervals:   read, flush: [0,
+# 10), [10, 50), [50, 100), [100, +inf)   write: [0, 1000), [1000,
+# 5000), [5000, +inf)
 #
 # -> { "execute": "block-latency-histogram-set",
 #      "arguments": { "id": "drive0",
 # <- { "return": {} }
 #
 # Example:
-# remove all latency histograms:
+#
+# Remove all latency histograms:
 #
 # -> { "execute": "block-latency-histogram-set",
 #      "arguments": { "id": "drive0" } }
            '*boundaries': ['uint64'],
            '*boundaries-read': ['uint64'],
            '*boundaries-write': ['uint64'],
-           '*boundaries-flush': ['uint64'] } }
+           '*boundaries-zap': ['uint64'],
+           '*boundaries-flush': ['uint64'] },
+  'allow-preconfig': true }
index 43486d1daa93a9b612545cce27d6063daf63e537..c1bab7b855022274777a164072bf12ff3f58338f 100644 (file)
 #
 # @filename: the filename of the character device
 #
-# @frontend-open: shows whether the frontend device attached to this backend
-#                 (eg. with the chardev=... option) is in open or closed state
-#                 (since 2.1)
+# @frontend-open: shows whether the frontend device attached to this
+#     backend (e.g. with the chardev=... option) is in open or closed
+#     state (since 2.1)
 #
-# Notes: @filename is encoded using the QEMU command line character device
-#        encoding.  See the QEMU man page for details.
+# Notes: @filename is encoded using the QEMU command line character
+#     device encoding.  See the QEMU man page for details.
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'ChardevInfo',
   'data': { 'label': 'str',
@@ -38,7 +38,7 @@
 #
 # Returns: a list of @ChardevInfo
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #       "return": [
 #          {
 #             "label": "charchannel0",
-#             "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.agent,server",
+#             "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.agent,server=on",
 #             "frontend-open": false
 #          },
 #          {
 #             "label": "charmonitor",
-#             "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.monitor,server",
+#             "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.monitor,server=on",
 #             "frontend-open": true
 #          },
 #          {
@@ -62,7 +62,6 @@
 #          }
 #       ]
 #    }
-#
 ##
 { 'command': 'query-chardev', 'returns': ['ChardevInfo'],
   'allow-preconfig': true }
 #          }
 #       ]
 #    }
-#
 ##
 { 'command': 'query-chardev-backends', 'returns': ['ChardevBackendInfo'] }
 
 #
 # @format: data encoding (default 'utf8').
 #
-#          - base64: data must be base64 encoded text.  Its binary
-#            decoding gets written.
-#          - utf8: data's UTF-8 encoding is written
-#          - data itself is always Unicode regardless of format, like
-#            any other string.
+#     - base64: data must be base64 encoded text.  Its binary decoding
+#       gets written.
+#     - utf8: data's UTF-8 encoding is written
+#     - data itself is always Unicode regardless of format, like any
+#       other string.
 #
 # Returns: Nothing on success
 #
 #                     "data": "abcdefgh",
 #                     "format": "utf8" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'ringbuf-write',
   'data': { 'device': 'str',
 #
 # @format: data encoding (default 'utf8').
 #
-#          - base64: the data read is returned in base64 encoding.
-#          - utf8: the data read is interpreted as UTF-8.
-#            Bug: can screw up when the buffer contains invalid UTF-8
-#            sequences, NUL characters, after the ring buffer lost
-#            data, and when reading stops because the size limit is
-#            reached.
-#          - The return value is always Unicode regardless of format,
-#            like any other string.
+#     - base64: the data read is returned in base64 encoding.
+#     - utf8: the data read is interpreted as UTF-8.
+#       Bug: can screw up when the buffer contains invalid UTF-8
+#       sequences, NUL characters, after the ring buffer lost data,
+#       and when reading stops because the size limit is reached.
+#     - The return value is always Unicode regardless of format, like
+#       any other string.
 #
 # Returns: data read from the device
 #
 #                     "size": 1000,
 #                     "format": "utf8" } }
 # <- { "return": "abcdefgh" }
-#
 ##
 { 'command': 'ringbuf-read',
   'data': {'device': 'str', 'size': 'int', '*format': 'DataFormat'},
 # Configuration shared across all chardev backends
 #
 # @logfile: The name of a logfile to save output
-# @logappend: true to append instead of truncate
-#             (default to false to truncate)
+#
+# @logappend: true to append instead of truncate (default to false to
+#     truncate)
 #
 # Since: 2.6
 ##
 #
 # Configuration info for file chardevs.
 #
-# @in:  The name of the input file
+# @in: The name of the input file
+#
 # @out: The name of the output file
-# @append: Open the file in append mode (default false to
-#          truncate) (Since 2.6)
+#
+# @append: Open the file in append mode (default false to truncate)
+#     (Since 2.6)
 #
 # Since: 1.4
 ##
 #
 # Configuration info for device and pipe chardevs.
 #
-# @device: The name of the special file for the device,
-#          i.e. /dev/ttyS0 on Unix or COM1: on Windows
+# @device: The name of the special file for the device, i.e.
+#     /dev/ttyS0 on Unix or COM1: on Windows
 #
 # Since: 1.4
 ##
 #
 # Configuration info for (stream) socket chardevs.
 #
-# @addr: socket address to listen on (server=true)
-#        or connect to (server=false)
+# @addr: socket address to listen on (server=true) or connect to
+#     (server=false)
+#
 # @tls-creds: the ID of the TLS credentials object (since 2.6)
+#
 # @tls-authz: the ID of the QAuthZ authorization object against which
-#             the client's x509 distinguished name will be validated. This
-#             object is only resolved at time of use, so can be deleted
-#             and recreated on the fly while the chardev server is active.
-#             If missing, it will default to denying access (since 4.0)
+#     the client's x509 distinguished name will be validated.  This
+#     object is only resolved at time of use, so can be deleted and
+#     recreated on the fly while the chardev server is active.  If
+#     missing, it will default to denying access (since 4.0)
+#
 # @server: create server socket (default: true)
-# @wait: wait for incoming connection on server
-#        sockets (default: false).
-#        Silently ignored with server: false.  This use is deprecated.
+#
+# @wait: wait for incoming connection on server sockets (default:
+#     false). Silently ignored with server: false.  This use is
+#     deprecated.
+#
 # @nodelay: set TCP_NODELAY socket option (default: false)
-# @telnet: enable telnet protocol on server
-#          sockets (default: false)
-# @tn3270: enable tn3270 protocol on server
-#          sockets (default: false) (Since: 2.10)
-# @websocket: enable websocket protocol on server
-#             sockets (default: false) (Since: 3.1)
-# @reconnect: For a client socket, if a socket is disconnected,
-#             then attempt a reconnect after the given number of seconds.
-#             Setting this to zero disables this function. (default: 0)
-#             (Since: 2.2)
+#
+# @telnet: enable telnet protocol on server sockets (default: false)
+#
+# @tn3270: enable tn3270 protocol on server sockets (default: false)
+#     (Since: 2.10)
+#
+# @websocket: enable websocket protocol on server sockets
+#     (default: false) (Since: 3.1)
+#
+# @reconnect: For a client socket, if a socket is disconnected, then
+#     attempt a reconnect after the given number of seconds.  Setting
+#     this to zero disables this function.  (default: 0) (Since: 2.2)
 #
 # Since: 1.4
 ##
 # Configuration info for datagram socket chardevs.
 #
 # @remote: remote address
+#
 # @local: local address
 #
 # Since: 1.5
 #
 # Configuration info for stdio chardevs.
 #
-# @signal: Allow signals (such as SIGINT triggered by ^C)
-#          be delivered to qemu.  Default: true.
+# @signal: Allow signals (such as SIGINT triggered by ^C) be delivered
+#     to qemu.  Default: true.
 #
 # Since: 1.5
 ##
   'data': { '*signal': 'bool' },
   'base': 'ChardevCommon' }
 
-
 ##
 # @ChardevSpiceChannel:
 #
 { 'struct': 'ChardevSpiceChannel',
   'data': { 'type': 'str' },
   'base': 'ChardevCommon',
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @ChardevSpicePort:
 { 'struct': 'ChardevSpicePort',
   'data': { 'fqdn': 'str' },
   'base': 'ChardevCommon',
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
+
+##
+# @ChardevDBus:
+#
+# Configuration info for DBus chardevs.
+#
+# @name: name of the channel (following docs/spice-port-fqdn.txt)
+#
+# Since: 7.0
+##
+{ 'struct': 'ChardevDBus',
+  'data': { 'name': 'str' },
+  'base': 'ChardevCommon',
+  'if': 'CONFIG_DBUS_DISPLAY' }
 
 ##
 # @ChardevVC:
 #
 # Configuration info for virtual console chardevs.
 #
-# @width:  console width,  in pixels
+# @width: console width, in pixels
+#
 # @height: console height, in pixels
-# @cols:   console width,  in chars
-# @rows:   console height, in chars
+#
+# @cols: console width, in chars
+#
+# @rows: console height, in chars
+#
+# Note: the options are only effective when the VNC or SDL graphical
+# display backend is active. They are ignored with the GTK, Spice, VNC
+# and D-Bus display backends.
 #
 # Since: 1.5
 ##
   'data': { '*size': 'int' },
   'base': 'ChardevCommon' }
 
+##
+# @ChardevQemuVDAgent:
+#
+# Configuration info for qemu vdagent implementation.
+#
+# @mouse: enable/disable mouse, default is enabled.
+#
+# @clipboard: enable/disable clipboard, default is disabled.
+#
+# Since: 6.1
+##
+{ 'struct': 'ChardevQemuVDAgent',
+  'data': { '*mouse': 'bool',
+            '*clipboard': 'bool' },
+  'base': 'ChardevCommon',
+  'if': 'CONFIG_SPICE_PROTOCOL' }
+
+##
+# @ChardevBackendKind:
+#
+# @pipe: Since 1.5
+#
+# @udp: Since 1.5
+#
+# @mux: Since 1.5
+#
+# @msmouse: Since 1.5
+#
+# @wctablet: Since 2.9
+#
+# @braille: Since 1.5
+#
+# @testdev: Since 2.2
+#
+# @stdio: Since 1.5
+#
+# @console: Since 1.5
+#
+# @spicevmc: Since 1.5
+#
+# @spiceport: Since 1.5
+#
+# @qemu-vdagent: Since 6.1
+#
+# @dbus: Since 7.0
+#
+# @vc: v1.5
+#
+# @ringbuf: Since 1.6
+#
+# @memory: Since 1.5
+#
+# Since: 1.4
+##
+{ 'enum': 'ChardevBackendKind',
+  'data': [ 'file',
+            'serial',
+            'parallel',
+            'pipe',
+            'socket',
+            'udp',
+            'pty',
+            'null',
+            'mux',
+            'msmouse',
+            'wctablet',
+            'braille',
+            'testdev',
+            'stdio',
+            'console',
+            { 'name': 'spicevmc', 'if': 'CONFIG_SPICE' },
+            { 'name': 'spiceport', 'if': 'CONFIG_SPICE' },
+            { 'name': 'qemu-vdagent', 'if': 'CONFIG_SPICE_PROTOCOL' },
+            { 'name': 'dbus', 'if': 'CONFIG_DBUS_DISPLAY' },
+            'vc',
+            'ringbuf',
+            # next one is just for compatibility
+            'memory' ] }
+
+##
+# @ChardevFileWrapper:
+#
+# Since: 1.4
+##
+{ 'struct': 'ChardevFileWrapper',
+  'data': { 'data': 'ChardevFile' } }
+
+##
+# @ChardevHostdevWrapper:
+#
+# Since: 1.4
+##
+{ 'struct': 'ChardevHostdevWrapper',
+  'data': { 'data': 'ChardevHostdev' } }
+
+##
+# @ChardevSocketWrapper:
+#
+# Since: 1.4
+##
+{ 'struct': 'ChardevSocketWrapper',
+  'data': { 'data': 'ChardevSocket' } }
+
+##
+# @ChardevUdpWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevUdpWrapper',
+  'data': { 'data': 'ChardevUdp' } }
+
+##
+# @ChardevCommonWrapper:
+#
+# Since: 2.6
+##
+{ 'struct': 'ChardevCommonWrapper',
+  'data': { 'data': 'ChardevCommon' } }
+
+##
+# @ChardevMuxWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevMuxWrapper',
+  'data': { 'data': 'ChardevMux' } }
+
+##
+# @ChardevStdioWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevStdioWrapper',
+  'data': { 'data': 'ChardevStdio' } }
+
+##
+# @ChardevSpiceChannelWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevSpiceChannelWrapper',
+  'data': { 'data': 'ChardevSpiceChannel' },
+  'if': 'CONFIG_SPICE' }
+
+##
+# @ChardevSpicePortWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevSpicePortWrapper',
+  'data': { 'data': 'ChardevSpicePort' },
+  'if': 'CONFIG_SPICE' }
+
+##
+# @ChardevQemuVDAgentWrapper:
+#
+# Since: 6.1
+##
+{ 'struct': 'ChardevQemuVDAgentWrapper',
+  'data': { 'data': 'ChardevQemuVDAgent' },
+  'if': 'CONFIG_SPICE_PROTOCOL' }
+
+##
+# @ChardevDBusWrapper:
+#
+# Since: 7.0
+##
+{ 'struct': 'ChardevDBusWrapper',
+  'data': { 'data': 'ChardevDBus' },
+  'if': 'CONFIG_DBUS_DISPLAY' }
+
+##
+# @ChardevVCWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevVCWrapper',
+  'data': { 'data': 'ChardevVC' } }
+
+##
+# @ChardevRingbufWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevRingbufWrapper',
+  'data': { 'data': 'ChardevRingbuf' } }
+
 ##
 # @ChardevBackend:
 #
 # Configuration info for the new chardev backend.
 #
-# Since: 1.4 (testdev since 2.2, wctablet since 2.9)
+# Since: 1.4
 ##
 { 'union': 'ChardevBackend',
-  'data': { 'file': 'ChardevFile',
-            'serial': 'ChardevHostdev',
-            'parallel': 'ChardevHostdev',
-            'pipe': 'ChardevHostdev',
-            'socket': 'ChardevSocket',
-            'udp': 'ChardevUdp',
-            'pty': 'ChardevCommon',
-            'null': 'ChardevCommon',
-            'mux': 'ChardevMux',
-            'msmouse': 'ChardevCommon',
-            'wctablet': 'ChardevCommon',
-            'braille': 'ChardevCommon',
-            'testdev': 'ChardevCommon',
-            'stdio': 'ChardevStdio',
-            'console': 'ChardevCommon',
-            'spicevmc': { 'type': 'ChardevSpiceChannel',
-                          'if': 'defined(CONFIG_SPICE)' },
-            'spiceport': { 'type': 'ChardevSpicePort',
-                           'if': 'defined(CONFIG_SPICE)' },
-            'vc': 'ChardevVC',
-            'ringbuf': 'ChardevRingbuf',
+  'base': { 'type': 'ChardevBackendKind' },
+  'discriminator': 'type',
+  'data': { 'file': 'ChardevFileWrapper',
+            'serial': 'ChardevHostdevWrapper',
+            'parallel': 'ChardevHostdevWrapper',
+            'pipe': 'ChardevHostdevWrapper',
+            'socket': 'ChardevSocketWrapper',
+            'udp': 'ChardevUdpWrapper',
+            'pty': 'ChardevCommonWrapper',
+            'null': 'ChardevCommonWrapper',
+            'mux': 'ChardevMuxWrapper',
+            'msmouse': 'ChardevCommonWrapper',
+            'wctablet': 'ChardevCommonWrapper',
+            'braille': 'ChardevCommonWrapper',
+            'testdev': 'ChardevCommonWrapper',
+            'stdio': 'ChardevStdioWrapper',
+            'console': 'ChardevCommonWrapper',
+            'spicevmc': { 'type': 'ChardevSpiceChannelWrapper',
+                          'if': 'CONFIG_SPICE' },
+            'spiceport': { 'type': 'ChardevSpicePortWrapper',
+                           'if': 'CONFIG_SPICE' },
+            'qemu-vdagent': { 'type': 'ChardevQemuVDAgentWrapper',
+                              'if': 'CONFIG_SPICE_PROTOCOL' },
+            'dbus': { 'type': 'ChardevDBusWrapper',
+                      'if': 'CONFIG_DBUS_DISPLAY' },
+            'vc': 'ChardevVCWrapper',
+            'ringbuf': 'ChardevRingbufWrapper',
             # next one is just for compatibility
-            'memory': 'ChardevRingbuf' } }
+            'memory': 'ChardevRingbufWrapper' } }
 
 ##
 # @ChardevReturn:
 #
 # Return info about the chardev backend just created.
 #
-# @pty: name of the slave pseudoterminal device, present if
-#       and only if a chardev of type 'pty' was created
+# @pty: name of the slave pseudoterminal device, present if and only
+#     if a chardev of type 'pty' was created
 #
 # Since: 1.4
 ##
 # Add a character device backend
 #
 # @id: the chardev's ID, must be unique
+#
 # @backend: backend type and parameters
 #
 # Returns: ChardevReturn.
 #
 # Since: 1.4
 #
-# Example:
+# Examples:
 #
 # -> { "execute" : "chardev-add",
 #      "arguments" : { "id" : "foo",
 #      "arguments" : { "id" : "baz",
 #                      "backend" : { "type" : "pty", "data" : {} } } }
 # <- { "return": { "pty" : "/dev/pty/42" } }
-#
 ##
 { 'command': 'chardev-add',
   'data': { 'id': 'str',
 # Change a character device backend
 #
 # @id: the chardev's ID, must exist
+#
 # @backend: new backend type and parameters
 #
 # Returns: ChardevReturn.
 #
 # Since: 2.10
 #
-# Example:
+# Examples:
 #
 # -> { "execute" : "chardev-change",
 #      "arguments" : { "id" : "baz",
 #                  "server" : true,
 #                  "wait" : false }}}}
 # <- {"return": {}}
-#
 ##
 { 'command': 'chardev-change',
   'data': { 'id': 'str',
 #
 # -> { "execute": "chardev-remove", "arguments": { "id" : "foo" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'chardev-remove',
   'data': { 'id': 'str' } }
 #
 # -> { "execute": "chardev-send-break", "arguments": { "id" : "foo" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'chardev-send-break',
   'data': { 'id': 'str' } }
 # <- { "event": "VSERPORT_CHANGE",
 #      "data": { "id": "channel0", "open": true },
 #      "timestamp": { "seconds": 1401385907, "microseconds": 422329 } }
-#
 ##
 { 'event': 'VSERPORT_CHANGE',
   'data': { 'id': 'str',
index 716712d4b317e81c926bf4f308e869b651c05330..6fed9cde1a9e11839d9532c72d5e6311c378d177 100644 (file)
@@ -70,6 +70,7 @@
 # has a different meaning.
 #
 # @s: the string value
+#
 # @n: no string value
 #
 # Since: 2.10
 ##
 { 'enum': 'PCIELinkWidth',
   'data': [ '1', '2', '4', '8', '12', '16', '32' ] }
+
+##
+# @HostMemPolicy:
+#
+# Host memory policy types
+#
+# @default: restore default policy, remove any nondefault policy
+#
+# @preferred: set the preferred host nodes for allocation
+#
+# @bind: a strict policy that restricts memory allocation to the host
+#     nodes specified
+#
+# @interleave: memory allocations are interleaved across the set of
+#     host nodes specified
+#
+# Since: 2.1
+##
+{ 'enum': 'HostMemPolicy',
+  'data': [ 'default', 'preferred', 'bind', 'interleave' ] }
+
+##
+# @NetFilterDirection:
+#
+# Indicates whether a netfilter is attached to a netdev's transmit
+# queue or receive queue or both.
+#
+# @all: the filter is attached both to the receive and the transmit
+#     queue of the netdev (default).
+#
+# @rx: the filter is attached to the receive queue of the netdev,
+#     where it will receive packets sent to the netdev.
+#
+# @tx: the filter is attached to the transmit queue of the netdev,
+#     where it will receive packets sent by the netdev.
+#
+# Since: 2.5
+##
+{ 'enum': 'NetFilterDirection',
+  'data': [ 'all', 'rx', 'tx' ] }
+
+##
+# @GrabToggleKeys:
+#
+# Keys to toggle input-linux between host and guest.
+#
+# Since: 4.0
+##
+{ 'enum': 'GrabToggleKeys',
+  'data': [ 'ctrl-ctrl', 'alt-alt', 'shift-shift','meta-meta', 'scrolllock',
+            'ctrl-scrolllock' ] }
+
+##
+# @HumanReadableText:
+#
+# @human-readable-text: Formatted output intended for humans.
+#
+# Since: 6.2
+##
+{ 'struct': 'HumanReadableText',
+  'data': { 'human-readable-text': 'str' } }
diff --git a/qapi/compat.json b/qapi/compat.json
new file mode 100644 (file)
index 0000000..42034d9
--- /dev/null
@@ -0,0 +1,69 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# = Compatibility policy
+##
+
+##
+# @CompatPolicyInput:
+#
+# Policy for handling "funny" input.
+#
+# @accept: Accept silently
+#
+# @reject: Reject with an error
+#
+# @crash: abort() the process
+#
+# Since: 6.0
+##
+{ 'enum': 'CompatPolicyInput',
+  'data': [ 'accept', 'reject', 'crash' ] }
+
+##
+# @CompatPolicyOutput:
+#
+# Policy for handling "funny" output.
+#
+# @accept: Pass on unchanged
+#
+# @hide: Filter out
+#
+# Since: 6.0
+##
+{ 'enum': 'CompatPolicyOutput',
+  'data': [ 'accept', 'hide' ] }
+
+##
+# @CompatPolicy:
+#
+# Policy for handling deprecated management interfaces.
+#
+# This is intended for testing users of the management interfaces.
+#
+# Limitation: covers only syntactic aspects of QMP, i.e. stuff tagged
+# with feature 'deprecated' or 'unstable'.  We may want to extend it
+# to cover semantic aspects and CLI.
+#
+# Limitation: deprecated-output policy @hide is not implemented for
+# enumeration values.  They behave the same as with policy @accept.
+#
+# @deprecated-input: how to handle deprecated input (default 'accept')
+#
+# @deprecated-output: how to handle deprecated output (default
+#     'accept')
+#
+# @unstable-input: how to handle unstable input (default 'accept')
+#     (since 6.2)
+#
+# @unstable-output: how to handle unstable output (default 'accept')
+#     (since 6.2)
+#
+# Since: 6.0
+##
+{ 'struct': 'CompatPolicy',
+  'data': { '*deprecated-input': 'CompatPolicyInput',
+            '*deprecated-output': 'CompatPolicyOutput',
+            '*unstable-input': 'CompatPolicyInput',
+            '*unstable-output': 'CompatPolicyOutput' } }
index 134f842bafa52c2db9436591a3244c4d5222e39a..a91fa334079fac3608d480099323b48cef6aca0e 100644 (file)
 # Arguments:
 #
 # @enable: An optional list of QMPCapability values to enable.  The
-#          client must not enable any capability that is not
-#          mentioned in the QMP greeting message.  If the field is not
-#          provided, it means no QMP capabilities will be enabled.
-#          (since 2.12)
+#     client must not enable any capability that is not mentioned in
+#     the QMP greeting message.  If the field is not provided, it
+#     means no QMP capabilities will be enabled.  (since 2.12)
 #
 # Example:
 #
 #      "arguments": { "enable": [ "oob" ] } }
 # <- { "return": {} }
 #
-# Notes: This command is valid exactly when first connecting: it must be
-#        issued before any other command will be accepted, and will fail once the
-#        monitor is accepting other commands. (see qemu docs/interop/qmp-spec.txt)
+# Notes: This command is valid exactly when first connecting: it must
+#     be issued before any other command will be accepted, and will
+#     fail once the monitor is accepting other commands.  (see qemu
+#     docs/interop/qmp-spec.rst)
 #
-#        The QMP client needs to explicitly enable QMP capabilities, otherwise
-#        all the QMP capabilities will be turned off by default.
+#     The QMP client needs to explicitly enable QMP capabilities,
+#     otherwise all the QMP capabilities will be turned off by
+#     default.
 #
 # Since: 0.13
-#
 ##
 { 'command': 'qmp_capabilities',
   'data': { '*enable': [ 'QMPCapability' ] },
 # Enumeration of capabilities to be advertised during initial client
 # connection, used for agreeing on particular QMP extension behaviors.
 #
-# @oob: QMP ability to support out-of-band requests.
-#       (Please refer to qmp-spec.txt for more information on OOB)
+# @oob: QMP ability to support out-of-band requests.  (Please refer to
+#     qmp-spec.rst for more information on OOB)
 #
 # Since: 2.12
-#
 ##
 { 'enum': 'QMPCapability',
   'data': [ 'oob' ] }
 { 'struct': 'VersionTriple',
   'data': {'major': 'int', 'minor': 'int', 'micro': 'int'} }
 
-
 ##
 # @VersionInfo:
 #
 # A description of QEMU's version.
 #
-# @qemu: The version of QEMU.  By current convention, a micro
-#        version of 50 signifies a development branch.  A micro version
-#        greater than or equal to 90 signifies a release candidate for
-#        the next minor version.  A micro version of less than 50
-#        signifies a stable release.
+# @qemu: The version of QEMU.  By current convention, a micro version
+#     of 50 signifies a development branch.  A micro version greater
+#     than or equal to 90 signifies a release candidate for the next
+#     minor version.  A micro version of less than 50 signifies a
+#     stable release.
 #
-# @package: QEMU will always set this field to an empty string.  Downstream
-#           versions of QEMU should set this to a non-empty string.  The
-#           exact format depends on the downstream however it highly
-#           recommended that a unique name is used.
+# @package: QEMU will always set this field to an empty string.
+#     Downstream versions of QEMU should set this to a non-empty
+#     string.  The exact format depends on the downstream however it
+#     highly recommended that a unique name is used.
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'VersionInfo',
   'data': {'qemu': 'VersionTriple', 'package': 'str'} }
 #
 # Returns the current version of QEMU.
 #
-# Returns: A @VersionInfo object describing the current version of QEMU.
+# Returns: A @VersionInfo object describing the current version of
+#     QEMU.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #          "package":""
 #       }
 #    }
-#
 ##
 { 'command': 'query-version', 'returns': 'VersionInfo',
   'allow-preconfig': true }
 #
 # @name: The command name
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'CommandInfo', 'data': {'name': 'str'} }
 
 #
 # Returns: A list of @CommandInfo for all supported commands
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #      ]
 #    }
 #
-# Note: This example has been shortened as the real response is too long.
-#
+# Note: This example has been shortened as the real response is too
+#     long.
 ##
 { 'command': 'query-commands', 'returns': ['CommandInfo'],
   'allow-preconfig': true }
 
-##
-# @EventInfo:
-#
-# Information about a QMP event
-#
-# @name: The event name
-#
-# Since: 1.2.0
-##
-{ 'struct': 'EventInfo', 'data': {'name': 'str'} }
-
-##
-# @query-events:
-#
-# Return information on QMP events.
-#
-# Features:
-# @deprecated: This command is deprecated, because its output doesn't
-#              reflect compile-time configuration.  Use 'query-qmp-schema'
-#              instead.
-#
-# Returns: A list of @EventInfo.
-#
-# Since: 1.2.0
-#
-# Example:
-#
-# -> { "execute": "query-events" }
-# <- {
-#      "return": [
-#          {
-#             "name":"SHUTDOWN"
-#          },
-#          {
-#             "name":"RESET"
-#          }
-#       ]
-#    }
-#
-# Note: This example has been shortened as the real response is too long.
-#
-##
-{ 'command': 'query-events', 'returns': ['EventInfo'],
-  'features': [ 'deprecated' ] }
-
 ##
 # @quit:
 #
-# This command will cause the QEMU process to exit gracefully.  While every
-# attempt is made to send the QMP response before terminating, this is not
-# guaranteed.  When using this interface, a premature EOF would not be
-# unexpected.
+# This command will cause the QEMU process to exit gracefully.  While
+# every attempt is made to send the QMP response before terminating,
+# this is not guaranteed.  When using this interface, a premature EOF
+# would not be unexpected.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "quit" }
 # <- { "return": {} }
 ##
-{ 'command': 'quit' }
+{ 'command': 'quit',
+  'allow-preconfig': true }
 
 ##
 # @MonitorMode:
 #
 # Options to be used for adding a new monitor.
 #
-# @id:          Name of the monitor
+# @id: Name of the monitor
 #
-# @mode:        Selects the monitor mode (default: readline in the system
-#               emulator, control in qemu-storage-daemon)
+# @mode: Selects the monitor mode (default: readline in the system
+#     emulator, control in qemu-storage-daemon)
 #
-# @pretty:      Enables pretty printing (QMP only)
+# @pretty: Enables pretty printing (QMP only)
 #
-# @chardev:     Name of a character device to expose the monitor on
+# @chardev: Name of a character device to expose the monitor on
 #
 # Since: 5.0
 ##
index 2aebe6fa20fd8051667a2e57d8ce37eec4c4de13..fd3d46ebd12821fbed62196e76b87ef25be6802e 100644 (file)
@@ -11,8 +11,7 @@
 #
 # The type of network endpoint that will be using the credentials.
 # Most types of credential require different setup / structures
-# depending on whether they will be used in a server versus a
-# client.
+# depending on whether they will be used in a server versus a client.
 #
 # @client: the network endpoint is acting as the client
 #
   'prefix': 'QCRYPTO_TLS_CREDS_ENDPOINT',
   'data': ['client', 'server']}
 
-
 ##
 # @QCryptoSecretFormat:
 #
 # The data format that the secret is provided in
 #
-# @raw: raw bytes. When encoded in JSON only valid UTF-8 sequences can be used
+# @raw: raw bytes.  When encoded in JSON only valid UTF-8 sequences
+#     can be used
+#
 # @base64: arbitrary base64 encoded binary data
+#
 # Since: 2.6
 ##
 { 'enum': 'QCryptoSecretFormat',
   'prefix': 'QCRYPTO_SECRET_FORMAT',
   'data': ['raw', 'base64']}
 
-
 ##
 # @QCryptoHashAlgorithm:
 #
 # The supported algorithms for computing content digests
 #
 # @md5: MD5. Should not be used in any new code, legacy compat only
+#
 # @sha1: SHA-1. Should not be used in any new code, legacy compat only
+#
 # @sha224: SHA-224. (since 2.7)
+#
 # @sha256: SHA-256. Current recommended strong hash.
+#
 # @sha384: SHA-384. (since 2.7)
+#
 # @sha512: SHA-512. (since 2.7)
+#
 # @ripemd160: RIPEMD-160. (since 2.7)
+#
 # Since: 2.6
 ##
 { 'enum': 'QCryptoHashAlgorithm',
   'prefix': 'QCRYPTO_HASH_ALG',
   'data': ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', 'ripemd160']}
 
-
 ##
 # @QCryptoCipherAlgorithm:
 #
 # The supported algorithms for content encryption ciphers
 #
 # @aes-128: AES with 128 bit / 16 byte keys
+#
 # @aes-192: AES with 192 bit / 24 byte keys
+#
 # @aes-256: AES with 256 bit / 32 byte keys
-# @des-rfb: RFB specific variant of single DES. Do not use except in VNC.
+#
+# @des: DES with 56 bit / 8 byte keys.  Do not use except in VNC.
+#     (since 6.1)
+#
 # @3des: 3DES(EDE) with 192 bit / 24 byte keys (since 2.9)
+#
 # @cast5-128: Cast5 with 128 bit / 16 byte keys
+#
 # @serpent-128: Serpent with 128 bit / 16 byte keys
+#
 # @serpent-192: Serpent with 192 bit / 24 byte keys
+#
 # @serpent-256: Serpent with 256 bit / 32 byte keys
+#
 # @twofish-128: Twofish with 128 bit / 16 byte keys
+#
 # @twofish-192: Twofish with 192 bit / 24 byte keys
+#
 # @twofish-256: Twofish with 256 bit / 32 byte keys
+#
 # Since: 2.6
 ##
 { 'enum': 'QCryptoCipherAlgorithm',
   'prefix': 'QCRYPTO_CIPHER_ALG',
   'data': ['aes-128', 'aes-192', 'aes-256',
-           'des-rfb', '3des',
+           'des', '3des',
            'cast5-128',
            'serpent-128', 'serpent-192', 'serpent-256',
            'twofish-128', 'twofish-192', 'twofish-256']}
 
-
 ##
 # @QCryptoCipherMode:
 #
 # The supported modes for content encryption ciphers
 #
 # @ecb: Electronic Code Book
+#
 # @cbc: Cipher Block Chaining
+#
 # @xts: XEX with tweaked code book and ciphertext stealing
+#
 # @ctr: Counter (Since 2.8)
+#
 # Since: 2.6
 ##
 { 'enum': 'QCryptoCipherMode',
   'prefix': 'QCRYPTO_CIPHER_MODE',
   'data': ['ecb', 'cbc', 'xts', 'ctr']}
 
-
 ##
 # @QCryptoIVGenAlgorithm:
 #
-# The supported algorithms for generating initialization
-# vectors for full disk encryption. The 'plain' generator
-# should not be used for disks with sector numbers larger
-# than 2^32, except where compatibility with pre-existing
-# Linux dm-crypt volumes is required.
+# The supported algorithms for generating initialization vectors for
+# full disk encryption.  The 'plain' generator should not be used for
+# disks with sector numbers larger than 2^32, except where
+# compatibility with pre-existing Linux dm-crypt volumes is required.
 #
 # @plain: 64-bit sector number truncated to 32-bits
+#
 # @plain64: 64-bit sector number
-# @essiv: 64-bit sector number encrypted with a hash of the encryption key
+#
+# @essiv: 64-bit sector number encrypted with a hash of the encryption
+#     key
+#
 # Since: 2.6
 ##
 { 'enum': 'QCryptoIVGenAlgorithm',
 #
 # The supported full disk encryption formats
 #
-# @qcow: QCow/QCow2 built-in AES-CBC encryption. Use only
-#        for liberating data from old images.
-# @luks: LUKS encryption format. Recommended for new images
+# @qcow: QCow/QCow2 built-in AES-CBC encryption.  Use only for
+#     liberating data from old images.
+#
+# @luks: LUKS encryption format.  Recommended for new images
 #
 # Since: 2.6
 ##
 ##
 # @QCryptoBlockOptionsBase:
 #
-# The common options that apply to all full disk
-# encryption formats
+# The common options that apply to all full disk encryption formats
 #
 # @format: the encryption format
 #
 # The options that apply to QCow/QCow2 AES-CBC encryption format
 #
 # @key-secret: the ID of a QCryptoSecret object providing the
-#              decryption key. Mandatory except when probing image for
-#              metadata only.
+#     decryption key.  Mandatory except when probing image for
+#     metadata only.
 #
 # Since: 2.6
 ##
 # The options that apply to LUKS encryption format
 #
 # @key-secret: the ID of a QCryptoSecret object providing the
-#              decryption key. Mandatory except when probing image for
-#              metadata only.
+#     decryption key.  Mandatory except when probing image for
+#     metadata only.
+#
 # Since: 2.6
 ##
 { 'struct': 'QCryptoBlockOptionsLUKS',
   'data': { '*key-secret': 'str' }}
 
-
 ##
 # @QCryptoBlockCreateOptionsLUKS:
 #
 # The options that apply to LUKS encryption format initialization
 #
-# @cipher-alg: the cipher algorithm for data encryption
-#              Currently defaults to 'aes-256'.
-# @cipher-mode: the cipher mode for data encryption
-#               Currently defaults to 'xts'
-# @ivgen-alg: the initialization vector generator
-#             Currently defaults to 'plain64'
-# @ivgen-hash-alg: the initialization vector generator hash
-#                  Currently defaults to 'sha256'
-# @hash-alg: the master key hash algorithm
-#            Currently defaults to 'sha256'
-# @iter-time: number of milliseconds to spend in
-#             PBKDF passphrase processing. Currently defaults
-#             to 2000. (since 2.8)
+# @cipher-alg: the cipher algorithm for data encryption Currently
+#     defaults to 'aes-256'.
+#
+# @cipher-mode: the cipher mode for data encryption Currently defaults
+#     to 'xts'
+#
+# @ivgen-alg: the initialization vector generator Currently defaults
+#     to 'plain64'
+#
+# @ivgen-hash-alg: the initialization vector generator hash Currently
+#     defaults to 'sha256'
+#
+# @hash-alg: the master key hash algorithm Currently defaults to
+#     'sha256'
+#
+# @iter-time: number of milliseconds to spend in PBKDF passphrase
+#     processing.  Currently defaults to 2000. (since 2.8)
+#
 # Since: 2.6
 ##
 { 'struct': 'QCryptoBlockCreateOptionsLUKS',
             '*hash-alg': 'QCryptoHashAlgorithm',
             '*iter-time': 'int'}}
 
-
 ##
 # @QCryptoBlockOpenOptions:
 #
-# The options that are available for all encryption formats
-# when opening an existing volume
+# The options that are available for all encryption formats when
+# opening an existing volume
 #
 # Since: 2.6
 ##
   'data': { 'qcow': 'QCryptoBlockOptionsQCow',
             'luks': 'QCryptoBlockOptionsLUKS' } }
 
-
 ##
 # @QCryptoBlockCreateOptions:
 #
-# The options that are available for all encryption formats
-# when initializing a new volume
+# The options that are available for all encryption formats when
+# initializing a new volume
 #
 # Since: 2.6
 ##
   'data': { 'qcow': 'QCryptoBlockOptionsQCow',
             'luks': 'QCryptoBlockCreateOptionsLUKS' } }
 
-
 ##
 # @QCryptoBlockInfoBase:
 #
-# The common information that applies to all full disk
-# encryption formats
+# The common information that applies to all full disk encryption
+# formats
 #
 # @format: the encryption format
 #
 { 'struct': 'QCryptoBlockInfoBase',
   'data': { 'format': 'QCryptoBlockFormat' }}
 
-
 ##
 # @QCryptoBlockInfoLUKSSlot:
 #
-# Information about the LUKS block encryption key
-# slot options
+# Information about the LUKS block encryption key slot options
 #
 # @active: whether the key slot is currently in use
+#
 # @key-offset: offset to the key material in bytes
+#
 # @iters: number of PBKDF2 iterations for key material
+#
 # @stripes: number of stripes for splitting key material
 #
 # Since: 2.7
            '*stripes': 'int',
            'key-offset': 'int' } }
 
-
 ##
 # @QCryptoBlockInfoLUKS:
 #
 # Information about the LUKS block encryption options
 #
 # @cipher-alg: the cipher algorithm for data encryption
+#
 # @cipher-mode: the cipher mode for data encryption
+#
 # @ivgen-alg: the initialization vector generator
+#
 # @ivgen-hash-alg: the initialization vector generator hash
+#
 # @hash-alg: the master key hash algorithm
+#
 # @payload-offset: offset to the payload data in bytes
+#
 # @master-key-iters: number of PBKDF2 iterations for key material
+#
 # @uuid: unique identifier for the volume
+#
 # @slots: information about each key slot
 #
 # Since: 2.7
 #
 # Defines state of keyslots that are affected by the update
 #
-# @active:    The slots contain the given password and marked as active
-# @inactive:  The slots are erased (contain garbage) and marked as inactive
+# @active: The slots contain the given password and marked as active
+#
+# @inactive: The slots are erased (contain garbage) and marked as
+#     inactive
 #
 # Since: 5.1
 ##
 { 'enum': 'QCryptoBlockLUKSKeyslotState',
   'data': [ 'active', 'inactive' ] }
 
-
 ##
 # @QCryptoBlockAmendOptionsLUKS:
 #
-# This struct defines the update parameters that activate/de-activate set
-# of keyslots
+# This struct defines the update parameters that activate/de-activate
+# set of keyslots
 #
 # @state: the desired state of the keyslots
 #
-# @new-secret:    The ID of a QCryptoSecret object providing the password to be
-#                 written into added active keyslots
+# @new-secret: The ID of a QCryptoSecret object providing the password
+#     to be written into added active keyslots
 #
-# @old-secret:    Optional (for deactivation only)
-#                 If given will deactivate all keyslots that
-#                 match password located in QCryptoSecret with this ID
+# @old-secret: Optional (for deactivation only) If given will
+#     deactivate all keyslots that match password located in
+#     QCryptoSecret with this ID
 #
-# @iter-time:     Optional (for activation only)
-#                 Number of milliseconds to spend in
-#                 PBKDF passphrase processing for the newly activated keyslot.
-#                 Currently defaults to 2000.
+# @iter-time: Optional (for activation only) Number of milliseconds to
+#     spend in PBKDF passphrase processing for the newly activated
+#     keyslot.  Currently defaults to 2000.
 #
-# @keyslot:       Optional. ID of the keyslot to activate/deactivate.
-#                 For keyslot activation, keyslot should not be active already
-#                 (this is unsafe to update an active keyslot),
-#                 but possible if 'force' parameter is given.
-#                 If keyslot is not given, first free keyslot will be written.
+# @keyslot: Optional.  ID of the keyslot to activate/deactivate.  For
+#     keyslot activation, keyslot should not be active already (this
+#     is unsafe to update an active keyslot), but possible if 'force'
+#     parameter is given.  If keyslot is not given, first free keyslot
+#     will be written.
 #
-#                 For keyslot deactivation, this parameter specifies the exact
-#                 keyslot to deactivate
+#     For keyslot deactivation, this parameter specifies the exact
+#     keyslot to deactivate
 #
-# @secret:        Optional. The ID of a QCryptoSecret object providing the
-#                 password to use to retrieve current master key.
-#                 Defaults to the same secret that was used to open the image
+# @secret: Optional.  The ID of a QCryptoSecret object providing the
+#     password to use to retrieve current master key.  Defaults to the
+#     same secret that was used to open the image
 #
-#
-# Since 5.1
+# Since: 5.1
 ##
 { 'struct': 'QCryptoBlockAmendOptionsLUKS',
   'data': { 'state': 'QCryptoBlockLUKSKeyslotState',
 ##
 # @QCryptoBlockAmendOptions:
 #
-# The options that are available for all encryption formats
-# when amending encryption settings
+# The options that are available for all encryption formats when
+# amending encryption settings
 #
 # Since: 5.1
 ##
   'discriminator': 'format',
   'data': {
           'luks': 'QCryptoBlockAmendOptionsLUKS' } }
+
+##
+# @SecretCommonProperties:
+#
+# Properties for objects of classes derived from secret-common.
+#
+# @loaded: if true, the secret is loaded immediately when applying
+#     this option and will probably fail when processing the next
+#     option.  Don't use; only provided for compatibility.
+#     (default: false)
+#
+# @format: the data format that the secret is provided in
+#     (default: raw)
+#
+# @keyid: the name of another secret that should be used to decrypt
+#     the provided data.  If not present, the data is assumed to be
+#     unencrypted.
+#
+# @iv: the random initialization vector used for encryption of this
+#     particular secret.  Should be a base64 encrypted string of the
+#     16-byte IV. Mandatory if @keyid is given.  Ignored if @keyid is
+#     absent.
+#
+# Features:
+#
+# @deprecated: Member @loaded is deprecated.  Setting true doesn't
+#     make sense, and false is already the default.
+#
+# Since: 2.6
+##
+{ 'struct': 'SecretCommonProperties',
+  'data': { '*loaded': { 'type': 'bool', 'features': ['deprecated'] },
+            '*format': 'QCryptoSecretFormat',
+            '*keyid': 'str',
+            '*iv': 'str' } }
+
+##
+# @SecretProperties:
+#
+# Properties for secret objects.
+#
+# Either @data or @file must be provided, but not both.
+#
+# @data: the associated with the secret from
+#
+# @file: the filename to load the data associated with the secret from
+#
+# Since: 2.6
+##
+{ 'struct': 'SecretProperties',
+  'base': 'SecretCommonProperties',
+  'data': { '*data': 'str',
+            '*file': 'str' } }
+
+##
+# @SecretKeyringProperties:
+#
+# Properties for secret_keyring objects.
+#
+# @serial: serial number that identifies a key to get from the kernel
+#
+# Since: 5.1
+##
+{ 'struct': 'SecretKeyringProperties',
+  'base': 'SecretCommonProperties',
+  'data': { 'serial': 'int32' } }
+
+##
+# @TlsCredsProperties:
+#
+# Properties for objects of classes derived from tls-creds.
+#
+# @verify-peer: if true the peer credentials will be verified once the
+#     handshake is completed.  This is a no-op for anonymous
+#     credentials.  (default: true)
+#
+# @dir: the path of the directory that contains the credential files
+#
+# @endpoint: whether the QEMU network backend that uses the
+#     credentials will be acting as a client or as a server
+#     (default: client)
+#
+# @priority: a gnutls priority string as described at
+#     https://gnutls.org/manual/html_node/Priority-Strings.html
+#
+# Since: 2.5
+##
+{ 'struct': 'TlsCredsProperties',
+  'data': { '*verify-peer': 'bool',
+            '*dir': 'str',
+            '*endpoint': 'QCryptoTLSCredsEndpoint',
+            '*priority': 'str' } }
+
+##
+# @TlsCredsAnonProperties:
+#
+# Properties for tls-creds-anon objects.
+#
+# @loaded: if true, the credentials are loaded immediately when
+#     applying this option and will ignore options that are processed
+#     later.  Don't use; only provided for compatibility.
+#     (default: false)
+#
+# Features:
+#
+# @deprecated: Member @loaded is deprecated.  Setting true doesn't
+#     make sense, and false is already the default.
+#
+# Since: 2.5
+##
+{ 'struct': 'TlsCredsAnonProperties',
+  'base': 'TlsCredsProperties',
+  'data': { '*loaded': { 'type': 'bool', 'features': ['deprecated'] } } }
+
+##
+# @TlsCredsPskProperties:
+#
+# Properties for tls-creds-psk objects.
+#
+# @loaded: if true, the credentials are loaded immediately when
+#     applying this option and will ignore options that are processed
+#     later.  Don't use; only provided for compatibility.
+#     (default: false)
+#
+# @username: the username which will be sent to the server.  For
+#     clients only.  If absent, "qemu" is sent and the property will
+#     read back as an empty string.
+#
+# Features:
+#
+# @deprecated: Member @loaded is deprecated.  Setting true doesn't
+#     make sense, and false is already the default.
+#
+# Since: 3.0
+##
+{ 'struct': 'TlsCredsPskProperties',
+  'base': 'TlsCredsProperties',
+  'data': { '*loaded': { 'type': 'bool', 'features': ['deprecated'] },
+            '*username': 'str' } }
+
+##
+# @TlsCredsX509Properties:
+#
+# Properties for tls-creds-x509 objects.
+#
+# @loaded: if true, the credentials are loaded immediately when
+#     applying this option and will ignore options that are processed
+#     later.  Don't use; only provided for compatibility.
+#     (default: false)
+#
+# @sanity-check: if true, perform some sanity checks before using the
+#     credentials (default: true)
+#
+# @passwordid: For the server-key.pem and client-key.pem files which
+#     contain sensitive private keys, it is possible to use an
+#     encrypted version by providing the @passwordid parameter.  This
+#     provides the ID of a previously created secret object containing
+#     the password for decryption.
+#
+# Features:
+#
+# @deprecated: Member @loaded is deprecated.  Setting true doesn't
+#     make sense, and false is already the default.
+#
+# Since: 2.5
+##
+{ 'struct': 'TlsCredsX509Properties',
+  'base': 'TlsCredsProperties',
+  'data': { '*loaded': { 'type': 'bool', 'features': ['deprecated'] },
+            '*sanity-check': 'bool',
+            '*passwordid': 'str' } }
+##
+# @QCryptoAkCipherAlgorithm:
+#
+# The supported algorithms for asymmetric encryption ciphers
+#
+# @rsa: RSA algorithm
+#
+# Since: 7.1
+##
+{ 'enum': 'QCryptoAkCipherAlgorithm',
+  'prefix': 'QCRYPTO_AKCIPHER_ALG',
+  'data': ['rsa']}
+
+##
+# @QCryptoAkCipherKeyType:
+#
+# The type of asymmetric keys.
+#
+# Since: 7.1
+##
+{ 'enum': 'QCryptoAkCipherKeyType',
+  'prefix': 'QCRYPTO_AKCIPHER_KEY_TYPE',
+  'data': ['public', 'private']}
+
+##
+# @QCryptoRSAPaddingAlgorithm:
+#
+# The padding algorithm for RSA.
+#
+# @raw: no padding used
+#
+# @pkcs1: pkcs1#v1.5
+#
+# Since: 7.1
+##
+{ 'enum': 'QCryptoRSAPaddingAlgorithm',
+  'prefix': 'QCRYPTO_RSA_PADDING_ALG',
+  'data': ['raw', 'pkcs1']}
+
+##
+# @QCryptoAkCipherOptionsRSA:
+#
+# Specific parameters for RSA algorithm.
+#
+# @hash-alg: QCryptoHashAlgorithm
+#
+# @padding-alg: QCryptoRSAPaddingAlgorithm
+#
+# Since: 7.1
+##
+{ 'struct': 'QCryptoAkCipherOptionsRSA',
+  'data': { 'hash-alg':'QCryptoHashAlgorithm',
+            'padding-alg': 'QCryptoRSAPaddingAlgorithm'}}
+
+##
+# @QCryptoAkCipherOptions:
+#
+# The options that are available for all asymmetric key algorithms
+# when creating a new QCryptoAkCipher.
+#
+# Since: 7.1
+##
+{ 'union': 'QCryptoAkCipherOptions',
+  'base': { 'alg': 'QCryptoAkCipherAlgorithm' },
+  'discriminator': 'alg',
+  'data': { 'rsa': 'QCryptoAkCipherOptionsRSA' }}
diff --git a/qapi/cryptodev.json b/qapi/cryptodev.json
new file mode 100644 (file)
index 0000000..68289f4
--- /dev/null
@@ -0,0 +1,96 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# = Cryptography devices
+##
+
+##
+# @QCryptodevBackendAlgType:
+#
+# The supported algorithm types of a crypto device.
+#
+# @sym: symmetric encryption
+#
+# @asym: asymmetric Encryption
+#
+# Since: 8.0
+##
+{ 'enum': 'QCryptodevBackendAlgType',
+  'prefix': 'QCRYPTODEV_BACKEND_ALG',
+  'data': ['sym', 'asym']}
+
+##
+# @QCryptodevBackendServiceType:
+#
+# The supported service types of a crypto device.
+#
+# Since: 8.0
+##
+{ 'enum': 'QCryptodevBackendServiceType',
+  'prefix': 'QCRYPTODEV_BACKEND_SERVICE',
+  'data': ['cipher', 'hash', 'mac', 'aead', 'akcipher']}
+
+##
+# @QCryptodevBackendType:
+#
+# The crypto device backend type
+#
+# @builtin: the QEMU builtin support
+#
+# @vhost-user: vhost-user
+#
+# @lkcf: Linux kernel cryptographic framework
+#
+# Since: 8.0
+##
+{ 'enum': 'QCryptodevBackendType',
+  'prefix': 'QCRYPTODEV_BACKEND_TYPE',
+  'data': ['builtin', 'vhost-user', 'lkcf']}
+
+##
+# @QCryptodevBackendClient:
+#
+# Information about a queue of crypto device.
+#
+# @queue: the queue index of the crypto device
+#
+# @type: the type of the crypto device
+#
+# Since: 8.0
+##
+{ 'struct': 'QCryptodevBackendClient',
+  'data': { 'queue': 'uint32',
+            'type': 'QCryptodevBackendType' } }
+
+##
+# @QCryptodevInfo:
+#
+# Information about a crypto device.
+#
+# @id: the id of the crypto device
+#
+# @service: supported service types of a crypto device
+#
+# @client: the additional information of the crypto device
+#
+# Since: 8.0
+##
+{ 'struct': 'QCryptodevInfo',
+  'data': { 'id': 'str',
+            'service': ['QCryptodevBackendServiceType'],
+            'client': ['QCryptodevBackendClient'] } }
+
+##
+# @query-cryptodev:
+#
+# Returns information about current crypto devices.
+#
+# Returns: a list of @QCryptodevInfo
+#
+# Since: 8.0
+##
+{ 'command': 'query-cryptodev', 'returns': ['QCryptodevInfo']}
diff --git a/qapi/cxl.json b/qapi/cxl.json
new file mode 100644 (file)
index 0000000..8cc4c72
--- /dev/null
@@ -0,0 +1,363 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# = CXL devices
+##
+
+##
+# @CxlEventLog:
+#
+# CXL has a number of separate event logs for different types of
+# events.  Each such event log is handled and signaled independently.
+#
+# @informational: Information Event Log
+#
+# @warning: Warning Event Log
+#
+# @failure: Failure Event Log
+#
+# @fatal: Fatal Event Log
+#
+# Since: 8.1
+##
+{ 'enum': 'CxlEventLog',
+  'data': ['informational',
+           'warning',
+           'failure',
+           'fatal']
+ }
+
+##
+# @cxl-inject-general-media-event:
+#
+# Inject an event record for a General Media Event (CXL r3.0
+# 8.2.9.2.1.1).  This event type is reported via one of the event logs
+# specified via the log parameter.
+#
+# @path: CXL type 3 device canonical QOM path
+#
+# @log: event log to add the event to
+#
+# @flags: Event Record Flags.  See CXL r3.0 Table 8-42 Common Event
+#     Record Format, Event Record Flags for subfield definitions.
+#
+# @dpa: Device Physical Address (relative to @path device).  Note
+#     lower bits include some flags.  See CXL r3.0 Table 8-43 General
+#     Media Event Record, Physical Address.
+#
+# @descriptor: Memory Event Descriptor with additional memory event
+#     information.  See CXL r3.0 Table 8-43 General Media Event
+#     Record, Memory Event Descriptor for bit definitions.
+#
+# @type: Type of memory event that occurred.  See CXL r3.0 Table 8-43
+#     General Media Event Record, Memory Event Type for possible
+#     values.
+#
+# @transaction-type: Type of first transaction that caused the event
+#     to occur.  See CXL r3.0 Table 8-43 General Media Event Record,
+#     Transaction Type for possible values.
+#
+# @channel: The channel of the memory event location.  A channel is an
+#     interface that can be independently accessed for a transaction.
+#
+# @rank: The rank of the memory event location.  A rank is a set of
+#     memory devices on a channel that together execute a transaction.
+#
+# @device: Bitmask that represents all devices in the rank associated
+#     with the memory event location.
+#
+# @component-id: Device specific component identifier for the event.
+#     May describe a field replaceable sub-component of the device.
+#
+# Since: 8.1
+##
+{ 'command': 'cxl-inject-general-media-event',
+  'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8',
+            'dpa': 'uint64', 'descriptor': 'uint8',
+            'type': 'uint8', 'transaction-type': 'uint8',
+            '*channel': 'uint8', '*rank': 'uint8',
+            '*device': 'uint32', '*component-id': 'str' } }
+
+##
+# @cxl-inject-dram-event:
+#
+# Inject an event record for a DRAM Event (CXL r3.0 8.2.9.2.1.2).
+# This event type is reported via one of the event logs specified via
+# the log parameter.
+#
+# @path: CXL type 3 device canonical QOM path
+#
+# @log: Event log to add the event to
+#
+# @flags: Event Record Flags.  See CXL r3.0 Table 8-42 Common Event
+#     Record Format, Event Record Flags for subfield definitions.
+#
+# @dpa: Device Physical Address (relative to @path device).  Note
+#     lower bits include some flags.  See CXL r3.0 Table 8-44 DRAM
+#     Event Record, Physical Address.
+#
+# @descriptor: Memory Event Descriptor with additional memory event
+#     information.  See CXL r3.0 Table 8-44 DRAM Event Record, Memory
+#     Event Descriptor for bit definitions.
+#
+# @type: Type of memory event that occurred.  See CXL r3.0 Table 8-44
+#     DRAM Event Record, Memory Event Type for possible values.
+#
+# @transaction-type: Type of first transaction that caused the event
+#     to occur.  See CXL r3.0 Table 8-44 DRAM Event Record,
+#     Transaction Type for possible values.
+#
+# @channel: The channel of the memory event location.  A channel is an
+#     interface that can be independently accessed for a transaction.
+#
+# @rank: The rank of the memory event location.  A rank is a set of
+#     memory devices on a channel that together execute a transaction.
+#
+# @nibble-mask: Identifies one or more nibbles that the error affects
+#
+# @bank-group: Bank group of the memory event location, incorporating
+#     a number of Banks.
+#
+# @bank: Bank of the memory event location.  A single bank is accessed
+#     per read or write of the memory.
+#
+# @row: Row address within the DRAM.
+#
+# @column: Column address within the DRAM.
+#
+# @correction-mask: Bits within each nibble.  Used in order of bits
+#     set in the nibble-mask.  Up to 4 nibbles may be covered.
+#
+# Since: 8.1
+##
+{ 'command': 'cxl-inject-dram-event',
+  'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8',
+            'dpa': 'uint64', 'descriptor': 'uint8',
+            'type': 'uint8', 'transaction-type': 'uint8',
+            '*channel': 'uint8', '*rank': 'uint8', '*nibble-mask': 'uint32',
+            '*bank-group': 'uint8', '*bank': 'uint8', '*row': 'uint32',
+            '*column': 'uint16', '*correction-mask': [ 'uint64' ]
+           }}
+
+##
+# @cxl-inject-memory-module-event:
+#
+# Inject an event record for a Memory Module Event (CXL r3.0
+# 8.2.9.2.1.3).  This event includes a copy of the Device Health
+# info at the time of the event.
+#
+# @path: CXL type 3 device canonical QOM path
+#
+# @log: Event Log to add the event to
+#
+# @flags: Event Record Flags.  See CXL r3.0 Table 8-42 Common Event
+#     Record Format, Event Record Flags for subfield definitions.
+#
+# @type: Device Event Type.  See CXL r3.0 Table 8-45 Memory Module
+#     Event Record for bit definitions for bit definiions.
+#
+# @health-status: Overall health summary bitmap.  See CXL r3.0 Table
+#     8-100 Get Health Info Output Payload, Health Status for bit
+#     definitions.
+#
+# @media-status: Overall media health summary.  See CXL r3.0 Table
+#     8-100 Get Health Info Output Payload, Media Status for bit
+#     definitions.
+#
+# @additional-status: See CXL r3.0 Table 8-100 Get Health Info Output
+#     Payload, Additional Status for subfield definitions.
+#
+# @life-used: Percentage (0-100) of factory expected life span.
+#
+# @temperature: Device temperature in degrees Celsius.
+#
+# @dirty-shutdown-count: Number of times the device has been unable to
+#     determine whether data loss may have occurred.
+#
+# @corrected-volatile-error-count: Total number of correctable errors
+#     in volatile memory.
+#
+# @corrected-persistent-error-count: Total number of correctable
+#     errors in persistent memory
+#
+# Since: 8.1
+##
+{ 'command': 'cxl-inject-memory-module-event',
+  'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8',
+            'type': 'uint8', 'health-status': 'uint8',
+            'media-status': 'uint8', 'additional-status': 'uint8',
+            'life-used': 'uint8', 'temperature' : 'int16',
+            'dirty-shutdown-count': 'uint32',
+            'corrected-volatile-error-count': 'uint32',
+            'corrected-persistent-error-count': 'uint32'
+            }}
+
+##
+# @cxl-inject-poison:
+#
+# Poison records indicate that a CXL memory device knows that a
+# particular memory region may be corrupted.  This may be because of
+# locally detected errors (e.g. ECC failure) or poisoned writes
+# received from other components in the system.  This injection
+# mechanism enables testing of the OS handling of poison records which
+# may be queried via the CXL mailbox.
+#
+# @path: CXL type 3 device canonical QOM path
+#
+# @start: Start address; must be 64 byte aligned.
+#
+# @length: Length of poison to inject; must be a multiple of 64 bytes.
+#
+# Since: 8.1
+##
+{ 'command': 'cxl-inject-poison',
+  'data': { 'path': 'str', 'start': 'uint64', 'length': 'size' }}
+
+##
+# @CxlUncorErrorType:
+#
+# Type of uncorrectable CXL error to inject.  These errors are
+# reported via an AER uncorrectable internal error with additional
+# information logged at the CXL device.
+#
+# @cache-data-parity: Data error such as data parity or data ECC error
+#     CXL.cache
+#
+# @cache-address-parity: Address parity or other errors associated
+#     with the address field on CXL.cache
+#
+# @cache-be-parity: Byte enable parity or other byte enable errors on
+#     CXL.cache
+#
+# @cache-data-ecc: ECC error on CXL.cache
+#
+# @mem-data-parity: Data error such as data parity or data ECC error
+#     on CXL.mem
+#
+# @mem-address-parity: Address parity or other errors associated with
+#     the address field on CXL.mem
+#
+# @mem-be-parity: Byte enable parity or other byte enable errors on
+#     CXL.mem.
+#
+# @mem-data-ecc: Data ECC error on CXL.mem.
+#
+# @reinit-threshold: REINIT threshold hit.
+#
+# @rsvd-encoding: Received unrecognized encoding.
+#
+# @poison-received: Received poison from the peer.
+#
+# @receiver-overflow: Buffer overflows (first 3 bits of header log
+#     indicate which)
+#
+# @internal: Component specific error
+#
+# @cxl-ide-tx: Integrity and data encryption tx error.
+#
+# @cxl-ide-rx: Integrity and data encryption rx error.
+#
+# Since: 8.0
+##
+
+{ 'enum': 'CxlUncorErrorType',
+  'data': ['cache-data-parity',
+           'cache-address-parity',
+           'cache-be-parity',
+           'cache-data-ecc',
+           'mem-data-parity',
+           'mem-address-parity',
+           'mem-be-parity',
+           'mem-data-ecc',
+           'reinit-threshold',
+           'rsvd-encoding',
+           'poison-received',
+           'receiver-overflow',
+           'internal',
+           'cxl-ide-tx',
+           'cxl-ide-rx'
+           ]
+ }
+
+##
+# @CXLUncorErrorRecord:
+#
+# Record of a single error including header log.
+#
+# @type: Type of error
+#
+# @header: 16 DWORD of header.
+#
+# Since: 8.0
+##
+{ 'struct': 'CXLUncorErrorRecord',
+  'data': {
+      'type': 'CxlUncorErrorType',
+      'header': [ 'uint32' ]
+  }
+}
+
+##
+# @cxl-inject-uncorrectable-errors:
+#
+# Command to allow injection of multiple errors in one go.  This
+# allows testing of multiple header log handling in the OS.
+#
+# @path: CXL Type 3 device canonical QOM path
+#
+# @errors: Errors to inject
+#
+# Since: 8.0
+##
+{ 'command': 'cxl-inject-uncorrectable-errors',
+  'data': { 'path': 'str',
+             'errors': [ 'CXLUncorErrorRecord' ] }}
+
+##
+# @CxlCorErrorType:
+#
+# Type of CXL correctable error to inject
+#
+# @cache-data-ecc: Data ECC error on CXL.cache
+#
+# @mem-data-ecc: Data ECC error on CXL.mem
+#
+# @crc-threshold: Component specific and applicable to 68 byte Flit
+#     mode only.
+#
+# @cache-poison-received: Received poison from a peer on CXL.cache.
+#
+# @mem-poison-received: Received poison from a peer on CXL.mem
+#
+# @physical: Received error indication from the physical layer.
+#
+# Since: 8.0
+##
+{ 'enum': 'CxlCorErrorType',
+  'data': ['cache-data-ecc',
+           'mem-data-ecc',
+           'crc-threshold',
+           'retry-threshold',
+           'cache-poison-received',
+           'mem-poison-received',
+           'physical']
+}
+
+##
+# @cxl-inject-correctable-error:
+#
+# Command to inject a single correctable error.  Multiple error
+# injection of this error type is not interesting as there is no
+# associated header log.  These errors are reported via AER as a
+# correctable internal error, with additional detail available from
+# the CXL device.
+#
+# @path: CXL Type 3 device canonical QOM path
+#
+# @type: Type of error.
+#
+# Since: 8.0
+##
+{'command': 'cxl-inject-correctable-error',
+ 'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
index f7c4267e3fcd4eeb34aa8957547b05c1462d60ab..5cbc237ad917cc99056e5b244c9f29d73138ec48 100644 (file)
 #
 # @elf: elf format
 #
-# @kdump-zlib: kdump-compressed format with zlib-compressed
+# @kdump-zlib: makedumpfile flattened, kdump-compressed format with zlib
+#     compression
 #
-# @kdump-lzo: kdump-compressed format with lzo-compressed
+# @kdump-lzo: makedumpfile flattened, kdump-compressed format with lzo
+#     compression
 #
-# @kdump-snappy: kdump-compressed format with snappy-compressed
+# @kdump-snappy: makedumpfile flattened, kdump-compressed format with snappy
+#     compression
 #
-# @win-dmp: Windows full crashdump format,
-#           can be used instead of ELF converting (since 2.13)
+# @kdump-raw-zlib: raw assembled kdump-compressed format with zlib compression
+#     (since 8.2)
+#
+# @kdump-raw-lzo: raw assembled kdump-compressed format with lzo compression
+#     (since 8.2)
+#
+# @kdump-raw-snappy: raw assembled kdump-compressed format with snappy
+#     compression (since 8.2)
+#
+# @win-dmp: Windows full crashdump format, can be used instead of ELF
+#     converting (since 2.13)
 #
 # Since: 2.0
 ##
 { 'enum': 'DumpGuestMemoryFormat',
-  'data': [ 'elf', 'kdump-zlib', 'kdump-lzo', 'kdump-snappy', 'win-dmp' ] }
+  'data': [
+      'elf',
+      'kdump-zlib', 'kdump-lzo', 'kdump-snappy',
+      'kdump-raw-zlib', 'kdump-raw-lzo', 'kdump-raw-snappy',
+      'win-dmp' ] }
 
 ##
 # @dump-guest-memory:
 #
-# Dump guest's memory to vmcore. It is a synchronous operation that can take
-# very long depending on the amount of guest memory.
+# Dump guest's memory to vmcore.  It is a synchronous operation that
+# can take very long depending on the amount of guest memory.
 #
-# @paging: if true, do paging to get guest's memory mapping. This allows
-#          using gdb to process the core file.
+# @paging: if true, do paging to get guest's memory mapping.  This
+#     allows using gdb to process the core file.
 #
-#          IMPORTANT: this option can make QEMU allocate several gigabytes
-#          of RAM. This can happen for a large guest, or a
-#          malicious guest pretending to be large.
+#     IMPORTANT: this option can make QEMU allocate several gigabytes
+#     of RAM. This can happen for a large guest, or a malicious guest
+#     pretending to be large.
 #
-#          Also, paging=true has the following limitations:
+#     Also, paging=true has the following limitations:
 #
-#             1. The guest may be in a catastrophic state or can have corrupted
-#                memory, which cannot be trusted
-#             2. The guest can be in real-mode even if paging is enabled. For
-#                example, the guest uses ACPI to sleep, and ACPI sleep state
-#                goes in real-mode
-#             3. Currently only supported on i386 and x86_64.
+#     1. The guest may be in a catastrophic state or can have
+#        corrupted memory, which cannot be trusted
+#     2. The guest can be in real-mode even if paging is enabled.  For
+#        example, the guest uses ACPI to sleep, and ACPI sleep state
+#        goes in real-mode
+#     3. Currently only supported on i386 and x86_64.
 #
-# @protocol: the filename or file descriptor of the vmcore. The supported
-#            protocols are:
+# @protocol: the filename or file descriptor of the vmcore.  The
+#     supported protocols are:
 #
-#            1. file: the protocol starts with "file:", and the following
-#               string is the file's path.
-#            2. fd: the protocol starts with "fd:", and the following string
-#               is the fd's name.
+#     1. file: the protocol starts with "file:", and the following
+#        string is the file's path.
+#     2. fd: the protocol starts with "fd:", and the following string
+#        is the fd's name.
 #
-# @detach: if true, QMP will return immediately rather than
-#          waiting for the dump to finish. The user can track progress
-#          using "query-dump". (since 2.6).
+# @detach: if true, QMP will return immediately rather than waiting
+#     for the dump to finish.  The user can track progress using
+#     "query-dump". (since 2.6).
 #
 # @begin: if specified, the starting physical address.
 #
-# @length: if specified, the memory size, in bytes. If you don't
-#          want to dump all guest's memory, please specify the start @begin
-#          and @length
+# @length: if specified, the memory size, in bytes.  If you don't want
+#     to dump all guest's memory, please specify the start @begin and
+#     @length
 #
-# @format: if specified, the format of guest memory dump. But non-elf
-#          format is conflict with paging and filter, ie. @paging, @begin and
-#          @length is not allowed to be specified with non-elf @format at the
-#          same time (since 2.0)
+# @format: if specified, the format of guest memory dump.  But non-elf
+#     format is conflict with paging and filter, ie.  @paging, @begin
+#     and @length is not allowed to be specified with non-elf @format
+#     at the same time (since 2.0)
 #
 # Note: All boolean arguments default to false
 #
@@ -83,9 +99,8 @@
 # Example:
 #
 # -> { "execute": "dump-guest-memory",
-#      "arguments": { "protocol": "fd:dump" } }
+#      "arguments": { "paging": false, "protocol": "fd:dump" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'dump-guest-memory',
   'data': { 'paging': 'bool', 'protocol': 'str', '*detach': 'bool',
 # -> { "execute": "query-dump" }
 # <- { "return": { "status": "active", "completed": 1024000,
 #                  "total": 2048000 } }
-#
 ##
 { 'command': 'query-dump', 'returns': 'DumpQueryResult' }
 
 #
 # @result: final dump status
 #
-# @error: human-readable error string that provides
-#         hint on why dump failed. Only presents on failure. The
-#         user should not try to interpret the error string.
+# @error: human-readable error string that provides hint on why dump
+#     failed.  Only presents on failure.  The user should not try to
+#     interpret the error string.
 #
 # Since: 2.6
 #
 # Example:
 #
-# { "event": "DUMP_COMPLETED",
-#   "data": {"result": {"total": 1090650112, "status": "completed",
-#                       "completed": 1090650112} } }
-#
+# <- { "event": "DUMP_COMPLETED",
+#      "data": { "result": { "total": 1090650112, "status": "completed",
+#                            "completed": 1090650112 } },
+#      "timestamp": { "seconds": 1648244171, "microseconds": 950316 } }
 ##
 { 'event': 'DUMP_COMPLETED' ,
   'data': { 'result': 'DumpQueryResult', '*error': 'str' } }
 #
 # Returns the available formats for dump-guest-memory
 #
-# Returns:  A @DumpGuestMemoryCapability object listing available formats for
-#           dump-guest-memory
+# Returns: A @DumpGuestMemoryCapability object listing available
+#     formats for dump-guest-memory
 #
 # Since: 2.0
 #
 #
 # -> { "execute": "query-dump-guest-memory-capability" }
 # <- { "return": { "formats":
-#                  ["elf", "kdump-zlib", "kdump-lzo", "kdump-snappy"] }
-#
+#                  ["elf", "kdump-zlib", "kdump-lzo", "kdump-snappy"] } }
 ##
 { 'command': 'query-dump-guest-memory-capability',
   'returns': 'DumpGuestMemoryCapability' }
index 94a6502de9d2a6e4bb5f768408044e649cc74b1e..135c1e823197217df3c13b0abb306370adbc210a 100644 (file)
@@ -10,8 +10,8 @@
 #
 # QEMU error classes
 #
-# @GenericError: this is used for errors that don't require a specific error
-#                class. This should be the default case for most errors
+# @GenericError: this is used for errors that don't require a specific
+#     error class.  This should be the default case for most errors
 #
 # @CommandNotFound: the requested command has not been found
 #
@@ -20,7 +20,7 @@
 # @DeviceNotFound: the requested device has not been found
 #
 # @KVMMissingCap: the requested operation can't be fulfilled because a
-#                 required KVM capability is missing
+#     required KVM capability is missing
 #
 # Since: 1.2
 ##
index 944bb87a20df019bf9cc2976c6b6be96fa49e7b2..9173e60fdd0ddbe41f3b07c08e13d260f24e9f21 100644 (file)
 # alternate that includes the original type alongside something else.
 #
 # Returns: array of @SchemaInfo, where each element describes an
-#          entity in the ABI: command, event, type, ...
+#     entity in the ABI: command, event, type, ...
 #
-#          The order of the various SchemaInfo is unspecified; however, all
-#          names are guaranteed to be unique (no name will be duplicated with
-#          different meta-types).
+#     The order of the various SchemaInfo is unspecified; however, all
+#     names are guaranteed to be unique (no name will be duplicated
+#     with different meta-types).
 #
 # Note: the QAPI schema is also used to help define *internal*
-#       interfaces, by defining QAPI types.  These are not part of the QMP
-#       wire ABI, and therefore not returned by this command.
+#     interfaces, by defining QAPI types.  These are not part of the
+#     QMP wire ABI, and therefore not returned by this command.
 #
 # Since: 2.5
 ##
 { 'command': 'query-qmp-schema',
   'returns': [ 'SchemaInfo' ],
-  'gen': false }                # just to simplify qmp_query_json()
+  'allow-preconfig': true }
 
 ##
 # @SchemaMetaType:
 ##
 # @SchemaInfo:
 #
-# @name: the entity's name, inherited from @base.
-#        The SchemaInfo is always referenced by this name.
-#        Commands and events have the name defined in the QAPI schema.
-#        Unlike command and event names, type names are not part of
-#        the wire ABI.  Consequently, type names are meaningless
-#        strings here, although they are still guaranteed unique
-#        regardless of @meta-type.
+# @name: the entity's name, inherited from @base.  The SchemaInfo is
+#     always referenced by this name.  Commands and events have the
+#     name defined in the QAPI schema.  Unlike command and event
+#     names, type names are not part of the wire ABI.  Consequently,
+#     type names are meaningless strings here, although they are still
+#     guaranteed unique regardless of @meta-type.
 #
 # @meta-type: the entity's meta type, inherited from @base.
 #
 # @features: names of features associated with the entity, in no
-#            particular order.
-#            (since 4.1 for object types, 4.2 for commands, 5.0 for
-#            the rest)
+#     particular order.  (since 4.1 for object types, 4.2 for
+#     commands, 5.0 for the rest)
 #
 # Additional members depend on the value of @meta-type.
 #
 #
 # Additional SchemaInfo members for meta-type 'enum'.
 #
-# @values: the enumeration type's values, in no particular order.
+# @members: the enum type's members, in no particular order (since
+#     6.2).
+#
+# @values: the enumeration type's member names, in no particular
+#     order.  Redundant with @members.  Just for backward
+#     compatibility.
+#
+# Features:
+#
+# @deprecated: Member @values is deprecated.  Use @members instead.
 #
 # Values of this type are JSON string on the wire.
 #
 # Since: 2.5
 ##
 { 'struct': 'SchemaInfoEnum',
-  'data': { 'values': ['str'] } }
+  'data': { 'members': [ 'SchemaInfoEnumMember' ],
+            'values': { 'type': [ 'str' ],
+                        'features': [ 'deprecated' ] } } }
+
+##
+# @SchemaInfoEnumMember:
+#
+# An object member.
+#
+# @name: the member's name, as defined in the QAPI schema.
+#
+# @features: names of features associated with the member, in no
+#     particular order.
+#
+# Since: 6.2
+##
+{ 'struct': 'SchemaInfoEnumMember',
+  'data': { 'name': 'str', '*features': [ 'str' ] } }
 
 ##
 # @SchemaInfoArray:
 #
 # Additional SchemaInfo members for meta-type 'object'.
 #
-# @members: the object type's (non-variant) members, in no particular order.
+# @members: the object type's (non-variant) members, in no particular
+#     order.
 #
-# @tag: the name of the member serving as type tag.
-#       An element of @members with this name must exist.
+# @tag: the name of the member serving as type tag.  An element of
+#     @members with this name must exist.
 #
-# @variants: variant members, i.e. additional members that
-#            depend on the type tag's value.  Present exactly when
-#            @tag is present.  The variants are in no particular order,
-#            and may even differ from the order of the values of the
-#            enum type of the @tag.
+# @variants: variant members, i.e. additional members that depend on
+#     the type tag's value.  Present exactly when @tag is present.
+#     The variants are in no particular order, and may even differ
+#     from the order of the values of the enum type of the @tag.
 #
 # Values of this type are JSON object on the wire.
 #
 #
 # @type: the name of the member's type.
 #
-# @default: default when used as command parameter.
-#           If absent, the parameter is mandatory.
-#           If present, the value must be null.  The parameter is
-#           optional, and behavior when it's missing is not specified
-#           here.
-#           Future extension: if present and non-null, the parameter
-#           is optional, and defaults to this value.
+# @default: default when used as command parameter.  If absent, the
+#     parameter is mandatory.  If present, the value must be null.
+#     The parameter is optional, and behavior when it's missing is not
+#     specified here.  Future extension: if present and non-null, the
+#     parameter is optional, and defaults to this value.
 #
 # @features: names of features associated with the member, in no
-#            particular order.  (since 5.0)
+#     particular order.  (since 5.0)
 #
 # Since: 2.5
 ##
 # @case: a value of the type tag.
 #
 # @type: the name of the object type that provides the variant members
-#        when the type tag has value @case.
+#     when the type tag has value @case.
 #
 # Since: 2.5
 ##
 #
 # Additional SchemaInfo members for meta-type 'alternate'.
 #
-# @members: the alternate type's members, in no particular order.
-#           The members' wire encoding is distinct, see
-#           docs/devel/qapi-code-gen.txt section Alternate types.
+# @members: the alternate type's members, in no particular order.  The
+#     members' wire encoding is distinct, see
+#     docs/devel/qapi-code-gen.txt section Alternate types.
 #
 # On the wire, this can be any of the members.
 #
 # Additional SchemaInfo members for meta-type 'command'.
 #
 # @arg-type: the name of the object type that provides the command's
-#            parameters.
+#     parameters.
 #
 # @ret-type: the name of the command's result type.
 #
 # @allow-oob: whether the command allows out-of-band execution,
-#             defaults to false (Since: 2.12)
+#     defaults to false (Since: 2.12)
 #
-# TODO: @success-response (currently irrelevant, because it's QGA, not QMP)
+# TODO: @success-response (currently irrelevant, because it's QGA, not
+#     QMP)
 #
 # Since: 2.5
 ##
 # Additional SchemaInfo members for meta-type 'event'.
 #
 # @arg-type: the name of the object type that provides the event's
-#            parameters.
+#     parameters.
 #
 # Since: 2.5
 ##
index 280c2f76f136dc66446863fdb1a5a95a1bd1d846..b3957207a456cb6f2836690df1432f547e762d5f 100644 (file)
@@ -2,7 +2,7 @@
 # vim: filetype=python
 
 ##
-# == Background jobs
+# = Background jobs
 ##
 
 ##
 #
 # @create: image creation job type, see "blockdev-create" (since 3.0)
 #
-# @amend: image options amend job type, see "x-blockdev-amend" (since 5.1)
+# @amend: image options amend job type, see "x-blockdev-amend" (since
+#     5.1)
+#
+# @snapshot-load: snapshot load job type, see "snapshot-load" (since
+#     6.0)
+#
+# @snapshot-save: snapshot save job type, see "snapshot-save" (since
+#     6.0)
+#
+# @snapshot-delete: snapshot delete job type, see "snapshot-delete"
+#     (since 6.0)
 #
 # Since: 1.7
 ##
 { 'enum': 'JobType',
-  'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'amend'] }
+  'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'amend',
+           'snapshot-load', 'snapshot-save', 'snapshot-delete'] }
 
 ##
 # @JobStatus:
 #
 # Indicates the present state of a given job in its lifetime.
 #
-# @undefined: Erroneous, default state. Should not ever be visible.
+# @undefined: Erroneous, default state.  Should not ever be visible.
 #
 # @created: The job has been created, but not yet started.
 #
 # @running: The job is currently running.
 #
-# @paused: The job is running, but paused. The pause may be requested by
-#          either the QMP user or by internal processes.
+# @paused: The job is running, but paused.  The pause may be requested
+#     by either the QMP user or by internal processes.
 #
-# @ready: The job is running, but is ready for the user to signal completion.
-#         This is used for long-running jobs like mirror that are designed to
-#         run indefinitely.
+# @ready: The job is running, but is ready for the user to signal
+#     completion.  This is used for long-running jobs like mirror that
+#     are designed to run indefinitely.
 #
-# @standby: The job is ready, but paused. This is nearly identical to @paused.
-#           The job may return to @ready or otherwise be canceled.
+# @standby: The job is ready, but paused.  This is nearly identical to
+#     @paused.  The job may return to @ready or otherwise be canceled.
 #
-# @waiting: The job is waiting for other jobs in the transaction to converge
-#           to the waiting state. This status will likely not be visible for
-#           the last job in a transaction.
+# @waiting: The job is waiting for other jobs in the transaction to
+#     converge to the waiting state.  This status will likely not be
+#     visible for the last job in a transaction.
 #
-# @pending: The job has finished its work, but has finalization steps that it
-#           needs to make prior to completing. These changes will require
-#           manual intervention via @job-finalize if auto-finalize was set to
-#           false. These pending changes may still fail.
+# @pending: The job has finished its work, but has finalization steps
+#     that it needs to make prior to completing.  These changes will
+#     require manual intervention via @job-finalize if auto-finalize
+#     was set to false.  These pending changes may still fail.
 #
-# @aborting: The job is in the process of being aborted, and will finish with
-#            an error. The job will afterwards report that it is @concluded.
-#            This status may not be visible to the management process.
+# @aborting: The job is in the process of being aborted, and will
+#     finish with an error.  The job will afterwards report that it is
+#     @concluded.  This status may not be visible to the management
+#     process.
 #
-# @concluded: The job has finished all work. If auto-dismiss was set to false,
-#             the job will remain in the query list until it is dismissed via
-#             @job-dismiss.
+# @concluded: The job has finished all work.  If auto-dismiss was set
+#     to false, the job will remain in the query list until it is
+#     dismissed via @job-dismiss.
 #
-# @null: The job is in the process of being dismantled. This state should not
-#        ever be visible externally.
+# @null: The job is in the process of being dismantled.  This state
+#     should not ever be visible externally.
 #
 # Since: 2.12
 ##
 #
 # @finalize: see @job-finalize
 #
+# @change: see @block-job-change (since 8.2)
+#
 # Since: 2.12
 ##
 { 'enum': 'JobVerb',
   'data': ['cancel', 'pause', 'resume', 'set-speed', 'complete', 'dismiss',
-           'finalize' ] }
+           'finalize', 'change' ] }
 
 ##
 # @JOB_STATUS_CHANGE:
 # Emitted when a job transitions to a different status.
 #
 # @id: The job identifier
+#
 # @status: The new job status
 #
 # Since: 3.0
 #
 # Pause an active job.
 #
-# This command returns immediately after marking the active job for pausing.
-# Pausing an already paused job is an error.
+# This command returns immediately after marking the active job for
+# pausing.  Pausing an already paused job is an error.
 #
-# The job will pause as soon as possible, which means transitioning into the
-# PAUSED state if it was RUNNING, or into STANDBY if it was READY. The
-# corresponding JOB_STATUS_CHANGE event will be emitted.
+# The job will pause as soon as possible, which means transitioning
+# into the PAUSED state if it was RUNNING, or into STANDBY if it was
+# READY. The corresponding JOB_STATUS_CHANGE event will be emitted.
 #
 # Cancelling a paused job automatically resumes it.
 #
 #
 # Resume a paused job.
 #
-# This command returns immediately after resuming a paused job. Resuming an
-# already running job is an error.
+# This command returns immediately after resuming a paused job.
+# Resuming an already running job is an error.
 #
-# @id : The job identifier.
+# @id: The job identifier.
 #
 # Since: 3.0
 ##
 # This command returns immediately after marking the active job for
 # cancellation.
 #
-# The job will cancel as soon as possible and then emit a JOB_STATUS_CHANGE
-# event. Usually, the status will change to ABORTING, but it is possible that
-# a job successfully completes (e.g. because it was almost done and there was
-# no opportunity to cancel earlier than completing the job) and transitions to
-# PENDING instead.
+# The job will cancel as soon as possible and then emit a
+# JOB_STATUS_CHANGE event.  Usually, the status will change to
+# ABORTING, but it is possible that a job successfully completes (e.g.
+# because it was almost done and there was no opportunity to cancel
+# earlier than completing the job) and transitions to PENDING instead.
 #
 # @id: The job identifier.
 #
 ##
 { 'command': 'job-cancel', 'data': { 'id': 'str' } }
 
-
 ##
 # @job-complete:
 #
 ##
 # @job-dismiss:
 #
-# Deletes a job that is in the CONCLUDED state. This command only needs to be
-# run explicitly for jobs that don't have automatic dismiss enabled.
+# Deletes a job that is in the CONCLUDED state.  This command only
+# needs to be run explicitly for jobs that don't have automatic
+# dismiss enabled.
 #
-# This command will refuse to operate on any job that has not yet reached its
-# terminal state, JOB_STATUS_CONCLUDED. For jobs that make use of JOB_READY
-# event, job-cancel or job-complete will still need to be used as appropriate.
+# This command will refuse to operate on any job that has not yet
+# reached its terminal state, JOB_STATUS_CONCLUDED. For jobs that make
+# use of JOB_READY event, job-cancel or job-complete will still need
+# to be used as appropriate.
 #
 # @id: The job identifier.
 #
 ##
 # @job-finalize:
 #
-# Instructs all jobs in a transaction (or a single job if it is not part of any
-# transaction) to finalize any graph changes and do any necessary cleanup. This
-# command requires that all involved jobs are in the PENDING state.
+# Instructs all jobs in a transaction (or a single job if it is not
+# part of any transaction) to finalize any graph changes and do any
+# necessary cleanup.  This command requires that all involved jobs are
+# in the PENDING state.
 #
-# For jobs in a transaction, instructing one job to finalize will force
-# ALL jobs in the transaction to finalize, so it is only necessary to instruct
-# a single member job to finalize.
+# For jobs in a transaction, instructing one job to finalize will
+# force ALL jobs in the transaction to finalize, so it is only
+# necessary to instruct a single member job to finalize.
 #
-# @id: The identifier of any job in the transaction, or of a job that is not
-#      part of any transaction.
+# @id: The identifier of any job in the transaction, or of a job that
+#     is not part of any transaction.
 #
 # Since: 3.0
 ##
 #
 # @status: Current job state/status
 #
-# @current-progress: Progress made until now. The unit is arbitrary and the
-#                    value can only meaningfully be used for the ratio of
-#                    @current-progress to @total-progress. The value is
-#                    monotonically increasing.
+# @current-progress: Progress made until now.  The unit is arbitrary
+#     and the value can only meaningfully be used for the ratio of
+#     @current-progress to @total-progress.  The value is
+#     monotonically increasing.
 #
-# @total-progress: Estimated @current-progress value at the completion of
-#                  the job. This value can arbitrarily change while the
-#                  job is running, in both directions.
+# @total-progress: Estimated @current-progress value at the completion
+#     of the job.  This value can arbitrarily change while the job is
+#     running, in both directions.
 #
-# @error: If this field is present, the job failed; if it is
-#         still missing in the CONCLUDED state, this indicates
-#         successful completion.
+# @error: If this field is present, the job failed; if it is still
+#     missing in the CONCLUDED state, this indicates successful
+#     completion.
 #
-#         The value is a human-readable error message to describe
-#         the reason for the job failure. It should not be parsed
-#         by applications.
+#     The value is a human-readable error message to describe the
+#     reason for the job failure.  It should not be parsed by
+#     applications.
 #
 # Since: 3.0
 ##
diff --git a/qapi/machine-common.json b/qapi/machine-common.json
new file mode 100644 (file)
index 0000000..fa6bd71
--- /dev/null
@@ -0,0 +1,21 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# = Machines S390 data types
+##
+
+##
+# @CpuS390Entitlement:
+#
+# An enumeration of CPU entitlements that can be assumed by a virtual
+# S390 CPU
+#
+# Since: 8.2
+##
+{ 'enum': 'CpuS390Entitlement',
+  'prefix': 'S390_CPU_ENTITLEMENT',
+  'data': [ 'auto', 'low', 'medium', 'high' ] }
index fec3bb8679fc303fed314c1e35a8b01bf9b7cec1..7b7149f81c64bf157cd7762510453ba10f0fa8ff 100644 (file)
@@ -4,20 +4,23 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or later.
 # See the COPYING file in the top-level directory.
 
+{ 'include': 'machine-common.json' }
+
 ##
 # @CpuModelInfo:
 #
 # Virtual CPU model.
 #
-# A CPU model consists of the name of a CPU definition, to which
-# delta changes are applied (e.g. features added/removed). Most magic values
+# A CPU model consists of the name of a CPU definition, to which delta
+# changes are applied (e.g. features added/removed). Most magic values
 # that an architecture might require should be hidden behind the name.
 # However, if required, architectures can expose relevant properties.
 #
 # @name: the name of the CPU definition the model is based on
+#
 # @props: a dictionary of QOM properties to be applied
 #
-# Since: 2.8.0
+# Since: 2.8
 ##
 { 'struct': 'CpuModelInfo',
   'data': { 'name': 'str',
 #
 # An enumeration of CPU model expansion types.
 #
-# @static: Expand to a static CPU model, a combination of a static base
-#          model name and property delta changes. As the static base model will
-#          never change, the expanded CPU model will be the same, independent of
-#          QEMU version, machine type, machine options, and accelerator options.
-#          Therefore, the resulting model can be used by tooling without having
-#          to specify a compatibility machine - e.g. when displaying the "host"
-#          model. The @static CPU models are migration-safe.
-
-# @full: Expand all properties. The produced model is not guaranteed to be
-#        migration-safe, but allows tooling to get an insight and work with
-#        model details.
-#
-# Note: When a non-migration-safe CPU model is expanded in static mode, some
-#       features enabled by the CPU model may be omitted, because they can't be
-#       implemented by a static CPU model definition (e.g. cache info passthrough and
-#       PMU passthrough in x86). If you need an accurate representation of the
-#       features enabled by a non-migration-safe CPU model, use @full. If you need a
-#       static representation that will keep ABI compatibility even when changing QEMU
-#       version or machine-type, use @static (but keep in mind that some features may
-#       be omitted).
-#
-# Since: 2.8.0
+# @static: Expand to a static CPU model, a combination of a static
+#     base model name and property delta changes.  As the static base
+#     model will never change, the expanded CPU model will be the
+#     same, independent of QEMU version, machine type, machine
+#     options, and accelerator options.  Therefore, the resulting
+#     model can be used by tooling without having to specify a
+#     compatibility machine - e.g. when displaying the "host" model.
+#     The @static CPU models are migration-safe.
+#
+# @full: Expand all properties.  The produced model is not guaranteed
+#     to be migration-safe, but allows tooling to get an insight and
+#     work with model details.
+#
+# Note: When a non-migration-safe CPU model is expanded in static
+#     mode, some features enabled by the CPU model may be omitted,
+#     because they can't be implemented by a static CPU model
+#     definition (e.g. cache info passthrough and PMU passthrough in
+#     x86). If you need an accurate representation of the features
+#     enabled by a non-migration-safe CPU model, use @full.  If you
+#     need a static representation that will keep ABI compatibility
+#     even when changing QEMU version or machine-type, use @static
+#     (but keep in mind that some features may be omitted).
+#
+# Since: 2.8
 ##
 { 'enum': 'CpuModelExpansionType',
   'data': [ 'static', 'full' ] }
 
-
 ##
 # @CpuModelCompareResult:
 #
-# An enumeration of CPU model comparison results. The result is usually
-# calculated using e.g. CPU features or CPU generations.
+# An enumeration of CPU model comparison results.  The result is
+# usually calculated using e.g. CPU features or CPU generations.
 #
 # @incompatible: If model A is incompatible to model B, model A is not
-#                guaranteed to run where model B runs and the other way around.
+#     guaranteed to run where model B runs and the other way around.
 #
-# @identical: If model A is identical to model B, model A is guaranteed to run
-#             where model B runs and the other way around.
+# @identical: If model A is identical to model B, model A is
+#     guaranteed to run where model B runs and the other way around.
 #
-# @superset: If model A is a superset of model B, model B is guaranteed to run
-#            where model A runs. There are no guarantees about the other way.
+# @superset: If model A is a superset of model B, model B is
+#     guaranteed to run where model A runs.  There are no guarantees
+#     about the other way.
 #
-# @subset: If model A is a subset of model B, model A is guaranteed to run
-#          where model B runs. There are no guarantees about the other way.
+# @subset: If model A is a subset of model B, model A is guaranteed to
+#     run where model B runs.  There are no guarantees about the other
+#     way.
 #
-# Since: 2.8.0
+# Since: 2.8
 ##
 { 'enum': 'CpuModelCompareResult',
   'data': [ 'incompatible', 'identical', 'superset', 'subset' ] }
 #
 # @model: the baselined CpuModelInfo.
 #
-# Since: 2.8.0
+# Since: 2.8
 ##
 { 'struct': 'CpuModelBaselineInfo',
   'data': { 'model': 'CpuModelInfo' },
-  'if': 'defined(TARGET_S390X)' }
+  'if': 'TARGET_S390X' }
 
 ##
 # @CpuModelCompareInfo:
 # The result of a CPU model comparison.
 #
 # @result: The result of the compare operation.
-# @responsible-properties: List of properties that led to the comparison result
-#                          not being identical.
+#
+# @responsible-properties: List of properties that led to the
+#     comparison result not being identical.
 #
 # @responsible-properties is a list of QOM property names that led to
-# both CPUs not being detected as identical. For identical models, this
-# list is empty.
-# If a QOM property is read-only, that means there's no known way to make the
-# CPU models identical. If the special property name "type" is included, the
-# models are by definition not identical and cannot be made identical.
+# both CPUs not being detected as identical.  For identical models,
+# this list is empty.  If a QOM property is read-only, that means
+# there's no known way to make the CPU models identical.  If the
+# special property name "type" is included, the models are by
+# definition not identical and cannot be made identical.
 #
-# Since: 2.8.0
+# Since: 2.8
 ##
 { 'struct': 'CpuModelCompareInfo',
   'data': { 'result': 'CpuModelCompareResult',
             'responsible-properties': ['str'] },
-  'if': 'defined(TARGET_S390X)' }
+  'if': 'TARGET_S390X' }
 
 ##
 # @query-cpu-model-comparison:
 #
 # Compares two CPU models, returning how they compare in a specific
-# configuration. The results indicates how both models compare regarding
-# runnability. This result can be used by tooling to make decisions if a
-# certain CPU model will run in a certain configuration or if a compatible
-# CPU model has to be created by baselining.
+# configuration.  The results indicates how both models compare
+# regarding runnability.  This result can be used by tooling to make
+# decisions if a certain CPU model will run in a certain configuration
+# or if a compatible CPU model has to be created by baselining.
 #
-# Usually, a CPU model is compared against the maximum possible CPU model
-# of a certain configuration (e.g. the "host" model for KVM). If that CPU
-# model is identical or a subset, it will run in that configuration.
+# Usually, a CPU model is compared against the maximum possible CPU
+# model of a certain configuration (e.g. the "host" model for KVM).
+# If that CPU model is identical or a subset, it will run in that
+# configuration.
 #
 # The result returned by this command may be affected by:
 #
-# * QEMU version: CPU models may look different depending on the QEMU version.
-#   (Except for CPU models reported as "static" in query-cpu-definitions.)
-# * machine-type: CPU model may look different depending on the machine-type.
-#   (Except for CPU models reported as "static" in query-cpu-definitions.)
-# * machine options (including accelerator): in some architectures, CPU models
-#   may look different depending on machine and accelerator options. (Except for
-#   CPU models reported as "static" in query-cpu-definitions.)
-# * "-cpu" arguments and global properties: arguments to the -cpu option and
-#   global properties may affect expansion of CPU models. Using
-#   query-cpu-model-expansion while using these is not advised.
-#
-# Some architectures may not support comparing CPU models. s390x supports
-# comparing CPU models.
-#
-# Returns: a CpuModelBaselineInfo. Returns an error if comparing CPU models is
-#          not supported, if a model cannot be used, if a model contains
-#          an unknown cpu definition name, unknown properties or properties
-#          with wrong types.
+# * QEMU version: CPU models may look different depending on the QEMU
+#   version.  (Except for CPU models reported as "static" in
+#   query-cpu-definitions.)
+# * machine-type: CPU model may look different depending on the
+#   machine-type.  (Except for CPU models reported as "static" in
+#   query-cpu-definitions.)
+# * machine options (including accelerator): in some architectures,
+#   CPU models may look different depending on machine and accelerator
+#   options.  (Except for CPU models reported as "static" in
+#   query-cpu-definitions.)
+# * "-cpu" arguments and global properties: arguments to the -cpu
+#   option and global properties may affect expansion of CPU models.
+#   Using query-cpu-model-expansion while using these is not advised.
+#
+# Some architectures may not support comparing CPU models.  s390x
+# supports comparing CPU models.
+#
+# Returns: a CpuModelBaselineInfo.  Returns an error if comparing CPU
+#     models is not supported, if a model cannot be used, if a model
+#     contains an unknown cpu definition name, unknown properties or
+#     properties with wrong types.
 #
 # Note: this command isn't specific to s390x, but is only implemented
-#       on this architecture currently.
+#     on this architecture currently.
 #
-# Since: 2.8.0
+# Since: 2.8
 ##
 { 'command': 'query-cpu-model-comparison',
   'data': { 'modela': 'CpuModelInfo', 'modelb': 'CpuModelInfo' },
   'returns': 'CpuModelCompareInfo',
-  'if': 'defined(TARGET_S390X)' }
+  'if': 'TARGET_S390X' }
 
 ##
 # @query-cpu-model-baseline:
 #
-# Baseline two CPU models, creating a compatible third model. The created
-# model will always be a static, migration-safe CPU model (see "static"
-# CPU model expansion for details).
+# Baseline two CPU models, creating a compatible third model.  The
+# created model will always be a static, migration-safe CPU model (see
+# "static" CPU model expansion for details).
 #
-# This interface can be used by tooling to create a compatible CPU model out
-# two CPU models. The created CPU model will be identical to or a subset of
-# both CPU models when comparing them. Therefore, the created CPU model is
-# guaranteed to run where the given CPU models run.
+# This interface can be used by tooling to create a compatible CPU
+# model out two CPU models.  The created CPU model will be identical
+# to or a subset of both CPU models when comparing them.  Therefore,
+# the created CPU model is guaranteed to run where the given CPU
+# models run.
 #
 # The result returned by this command may be affected by:
 #
-# * QEMU version: CPU models may look different depending on the QEMU version.
-#   (Except for CPU models reported as "static" in query-cpu-definitions.)
-# * machine-type: CPU model may look different depending on the machine-type.
-#   (Except for CPU models reported as "static" in query-cpu-definitions.)
-# * machine options (including accelerator): in some architectures, CPU models
-#   may look different depending on machine and accelerator options. (Except for
-#   CPU models reported as "static" in query-cpu-definitions.)
-# * "-cpu" arguments and global properties: arguments to the -cpu option and
-#   global properties may affect expansion of CPU models. Using
-#   query-cpu-model-expansion while using these is not advised.
-#
-# Some architectures may not support baselining CPU models. s390x supports
-# baselining CPU models.
-#
-# Returns: a CpuModelBaselineInfo. Returns an error if baselining CPU models is
-#          not supported, if a model cannot be used, if a model contains
-#          an unknown cpu definition name, unknown properties or properties
-#          with wrong types.
+# * QEMU version: CPU models may look different depending on the QEMU
+#   version.  (Except for CPU models reported as "static" in
+#   query-cpu-definitions.)
+# * machine-type: CPU model may look different depending on the
+#   machine-type.  (Except for CPU models reported as "static" in
+#   query-cpu-definitions.)
+# * machine options (including accelerator): in some architectures,
+#   CPU models may look different depending on machine and accelerator
+#   options.  (Except for CPU models reported as "static" in
+#   query-cpu-definitions.)
+# * "-cpu" arguments and global properties: arguments to the -cpu
+#   option and global properties may affect expansion of CPU models.
+#   Using query-cpu-model-expansion while using these is not advised.
+#
+# Some architectures may not support baselining CPU models.  s390x
+# supports baselining CPU models.
+#
+# Returns: a CpuModelBaselineInfo.  Returns an error if baselining CPU
+#     models is not supported, if a model cannot be used, if a model
+#     contains an unknown cpu definition name, unknown properties or
+#     properties with wrong types.
 #
 # Note: this command isn't specific to s390x, but is only implemented
-#       on this architecture currently.
+#     on this architecture currently.
 #
-# Since: 2.8.0
+# Since: 2.8
 ##
 { 'command': 'query-cpu-model-baseline',
   'data': { 'modela': 'CpuModelInfo',
             'modelb': 'CpuModelInfo' },
   'returns': 'CpuModelBaselineInfo',
-  'if': 'defined(TARGET_S390X)' }
+  'if': 'TARGET_S390X' }
 
 ##
 # @CpuModelExpansionInfo:
 #
 # @model: the expanded CpuModelInfo.
 #
-# Since: 2.8.0
+# Since: 2.8
 ##
 { 'struct': 'CpuModelExpansionInfo',
   'data': { 'model': 'CpuModelInfo' },
-  'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' }
+  'if': { 'any': [ 'TARGET_S390X',
+                   'TARGET_I386',
+                   'TARGET_ARM',
+                   'TARGET_LOONGARCH64',
+                   'TARGET_RISCV' ] } }
 
 ##
 # @query-cpu-model-expansion:
 #
-# Expands a given CPU model (or a combination of CPU model + additional options)
-# to different granularities, allowing tooling to get an understanding what a
-# specific CPU model looks like in QEMU under a certain configuration.
+# Expands a given CPU model (or a combination of CPU model +
+# additional options) to different granularities, allowing tooling to
+# get an understanding what a specific CPU model looks like in QEMU
+# under a certain configuration.
 #
 # This interface can be used to query the "host" CPU model.
 #
 # The data returned by this command may be affected by:
 #
-# * QEMU version: CPU models may look different depending on the QEMU version.
-#   (Except for CPU models reported as "static" in query-cpu-definitions.)
-# * machine-type: CPU model  may look different depending on the machine-type.
-#   (Except for CPU models reported as "static" in query-cpu-definitions.)
-# * machine options (including accelerator): in some architectures, CPU models
-#   may look different depending on machine and accelerator options. (Except for
-#   CPU models reported as "static" in query-cpu-definitions.)
-# * "-cpu" arguments and global properties: arguments to the -cpu option and
-#   global properties may affect expansion of CPU models. Using
-#   query-cpu-model-expansion while using these is not advised.
-#
-# Some architectures may not support all expansion types. s390x supports
-# "full" and "static". Arm only supports "full".
-#
-# Returns: a CpuModelExpansionInfo. Returns an error if expanding CPU models is
-#          not supported, if the model cannot be expanded, if the model contains
-#          an unknown CPU definition name, unknown properties or properties
-#          with a wrong type. Also returns an error if an expansion type is
-#          not supported.
-#
-# Since: 2.8.0
+# * QEMU version: CPU models may look different depending on the QEMU
+#   version.  (Except for CPU models reported as "static" in
+#   query-cpu-definitions.)
+# * machine-type: CPU model may look different depending on the
+#   machine-type.  (Except for CPU models reported as "static" in
+#   query-cpu-definitions.)
+# * machine options (including accelerator): in some architectures,
+#   CPU models may look different depending on machine and accelerator
+#   options.  (Except for CPU models reported as "static" in
+#   query-cpu-definitions.)
+# * "-cpu" arguments and global properties: arguments to the -cpu
+#   option and global properties may affect expansion of CPU models.
+#   Using query-cpu-model-expansion while using these is not advised.
+#
+# Some architectures may not support all expansion types.  s390x
+# supports "full" and "static". Arm only supports "full".
+#
+# Returns: a CpuModelExpansionInfo.  Returns an error if expanding CPU
+#     models is not supported, if the model cannot be expanded, if the
+#     model contains an unknown CPU definition name, unknown
+#     properties or properties with a wrong type.  Also returns an
+#     error if an expansion type is not supported.
+#
+# Since: 2.8
 ##
 { 'command': 'query-cpu-model-expansion',
   'data': { 'type': 'CpuModelExpansionType',
             'model': 'CpuModelInfo' },
   'returns': 'CpuModelExpansionInfo',
-  'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' }
+  'if': { 'any': [ 'TARGET_S390X',
+                   'TARGET_I386',
+                   'TARGET_ARM',
+                   'TARGET_LOONGARCH64',
+                   'TARGET_RISCV' ] } }
 
 ##
 # @CpuDefinitionInfo:
 # @name: the name of the CPU definition
 #
 # @migration-safe: whether a CPU definition can be safely used for
-#                  migration in combination with a QEMU compatibility machine
-#                  when migrating between different QEMU versions and between
-#                  hosts with different sets of (hardware or software)
-#                  capabilities. If not provided, information is not available
-#                  and callers should not assume the CPU definition to be
-#                  migration-safe. (since 2.8)
-#
-# @static: whether a CPU definition is static and will not change depending on
-#          QEMU version, machine type, machine options and accelerator options.
-#          A static model is always migration-safe. (since 2.8)
-#
-# @unavailable-features: List of properties that prevent
-#                        the CPU model from running in the current
-#                        host. (since 2.8)
-# @typename: Type name that can be used as argument to @device-list-properties,
-#            to introspect properties configurable using -cpu or -global.
-#            (since 2.9)
-#
-# @alias-of: Name of CPU model this model is an alias for.  The target of the
-#            CPU model alias may change depending on the machine type.
-#            Management software is supposed to translate CPU model aliases
-#            in the VM configuration, because aliases may stop being
-#            migration-safe in the future (since 4.1)
-#
-# @deprecated: If true, this CPU model is deprecated and may be removed in
-#              in some future version of QEMU according to the QEMU deprecation
-#              policy. (since 5.2)
-#
-# @unavailable-features is a list of QOM property names that
-# represent CPU model attributes that prevent the CPU from running.
-# If the QOM property is read-only, that means there's no known
-# way to make the CPU model run in the current host. Implementations
-# that choose not to provide specific information return the
-# property name "type".
-# If the property is read-write, it means that it MAY be possible
-# to run the CPU model in the current host if that property is
-# changed. Management software can use it as hints to suggest or
-# choose an alternative for the user, or just to generate meaningful
-# error messages explaining why the CPU model can't be used.
-# If @unavailable-features is an empty list, the CPU model is
-# runnable using the current host and machine-type.
-# If @unavailable-features is not present, runnability
-# information for the CPU is not available.
-#
-# Since: 1.2.0
+#     migration in combination with a QEMU compatibility machine when
+#     migrating between different QEMU versions and between hosts with
+#     different sets of (hardware or software) capabilities.  If not
+#     provided, information is not available and callers should not
+#     assume the CPU definition to be migration-safe.  (since 2.8)
+#
+# @static: whether a CPU definition is static and will not change
+#     depending on QEMU version, machine type, machine options and
+#     accelerator options.  A static model is always migration-safe.
+#     (since 2.8)
+#
+# @unavailable-features: List of properties that prevent the CPU model
+#     from running in the current host.  (since 2.8)
+#
+# @typename: Type name that can be used as argument to
+#     @device-list-properties, to introspect properties configurable
+#     using -cpu or -global.  (since 2.9)
+#
+# @alias-of: Name of CPU model this model is an alias for.  The target
+#     of the CPU model alias may change depending on the machine type.
+#     Management software is supposed to translate CPU model aliases
+#     in the VM configuration, because aliases may stop being
+#     migration-safe in the future (since 4.1)
+#
+# @deprecated: If true, this CPU model is deprecated and may be
+#     removed in in some future version of QEMU according to the QEMU
+#     deprecation policy.  (since 5.2)
+#
+# @unavailable-features is a list of QOM property names that represent
+# CPU model attributes that prevent the CPU from running.  If the QOM
+# property is read-only, that means there's no known way to make the
+# CPU model run in the current host.  Implementations that choose not
+# to provide specific information return the property name "type". If
+# the property is read-write, it means that it MAY be possible to run
+# the CPU model in the current host if that property is changed.
+# Management software can use it as hints to suggest or choose an
+# alternative for the user, or just to generate meaningful error
+# messages explaining why the CPU model can't be used.  If
+# @unavailable-features is an empty list, the CPU model is runnable
+# using the current host and machine-type.  If @unavailable-features
+# is not present, runnability information for the CPU is not
+# available.
+#
+# Since: 1.2
 ##
 { 'struct': 'CpuDefinitionInfo',
   'data': { 'name': 'str',
             'typename': 'str',
             '*alias-of' : 'str',
             'deprecated' : 'bool' },
-  'if': 'defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_I386) || defined(TARGET_S390X) || defined(TARGET_MIPS)' }
+  'if': { 'any': [ 'TARGET_PPC',
+                   'TARGET_ARM',
+                   'TARGET_I386',
+                   'TARGET_S390X',
+                   'TARGET_MIPS',
+                   'TARGET_LOONGARCH64',
+                   'TARGET_RISCV' ] } }
 
 ##
 # @query-cpu-definitions:
 #
 # Return a list of supported virtual CPU definitions
 #
-# Returns: a list of CpuDefInfo
+# Returns: a list of CpuDefinitionInfo
 #
-# Since: 1.2.0
+# Since: 1.2
 ##
 { 'command': 'query-cpu-definitions', 'returns': ['CpuDefinitionInfo'],
-  'if': 'defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_I386) || defined(TARGET_S390X) || defined(TARGET_MIPS)' }
+  'if': { 'any': [ 'TARGET_PPC',
+                   'TARGET_ARM',
+                   'TARGET_I386',
+                   'TARGET_S390X',
+                   'TARGET_MIPS',
+                   'TARGET_LOONGARCH64',
+                   'TARGET_RISCV' ] } }
+
+##
+# @CpuS390Polarization:
+#
+# An enumeration of CPU polarization that can be assumed by a virtual
+# S390 CPU
+#
+# Since: 8.2
+##
+{ 'enum': 'CpuS390Polarization',
+  'prefix': 'S390_CPU_POLARIZATION',
+  'data': [ 'horizontal', 'vertical' ],
+  'if': 'TARGET_S390X'
+}
+
+##
+# @set-cpu-topology:
+#
+# Modify the topology by moving the CPU inside the topology tree,
+# or by changing a modifier attribute of a CPU.
+# Absent values will not be modified.
+#
+# @core-id: the vCPU ID to be moved
+#
+# @socket-id: destination socket to move the vCPU to
+#
+# @book-id: destination book to move the vCPU to
+#
+# @drawer-id: destination drawer to move the vCPU to
+#
+# @entitlement: entitlement to set
+#
+# @dedicated: whether the provisioning of real to virtual CPU is dedicated
+#
+# Features:
+#
+# @unstable: This command is experimental.
+#
+# Returns: Nothing on success.
+#
+# Since: 8.2
+##
+{ 'command': 'set-cpu-topology',
+  'data': {
+      'core-id': 'uint16',
+      '*socket-id': 'uint16',
+      '*book-id': 'uint16',
+      '*drawer-id': 'uint16',
+      '*entitlement': 'CpuS390Entitlement',
+      '*dedicated': 'bool'
+  },
+  'features': [ 'unstable' ],
+  'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] }
+}
+
+##
+# @CPU_POLARIZATION_CHANGE:
+#
+# Emitted when the guest asks to change the polarization.
+#
+# The guest can tell the host (via the PTF instruction) whether the
+# CPUs should be provisioned using horizontal or vertical polarization.
+#
+# On horizontal polarization the host is expected to provision all vCPUs
+# equally.
+#
+# On vertical polarization the host can provision each vCPU differently.
+# The guest will get information on the details of the provisioning
+# the next time it uses the STSI(15) instruction.
+#
+# @polarization: polarization specified by the guest
+#
+# Features:
+#
+# @unstable: This event is experimental.
+#
+# Since: 8.2
+#
+# Example:
+#
+# <- { "event": "CPU_POLARIZATION_CHANGE",
+#      "data": { "polarization": "horizontal" },
+#      "timestamp": { "seconds": 1401385907, "microseconds": 422329 } }
+##
+{ 'event': 'CPU_POLARIZATION_CHANGE',
+  'data': { 'polarization': 'CpuS390Polarization' },
+  'features': [ 'unstable' ],
+  'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] }
+}
+
+##
+# @CpuPolarizationInfo:
+#
+# The result of a CPU polarization query.
+#
+# @polarization: the CPU polarization
+#
+# Since: 8.2
+##
+{ 'struct': 'CpuPolarizationInfo',
+  'data': { 'polarization': 'CpuS390Polarization' },
+  'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] }
+}
+
+##
+# @query-s390x-cpu-polarization:
+#
+# Features:
+#
+# @unstable: This command is experimental.
+#
+# Returns: the machine's CPU polarization
+#
+# Since: 8.2
+##
+{ 'command': 'query-s390x-cpu-polarization', 'returns': 'CpuPolarizationInfo',
+  'features': [ 'unstable' ],
+  'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] }
+}
index 7c9a263778d8b5757d828f7f833de517ea3c5ffc..b6d634b30d55565d84189185c612255cbf4631b8 100644 (file)
 # = Machines
 ##
 
+{ 'include': 'common.json' }
+{ 'include': 'machine-common.json' }
+
 ##
 # @SysEmuTarget:
 #
 # The comprehensive enumeration of QEMU system emulation ("softmmu")
-# targets. Run "./configure --help" in the project root directory, and
-# look for the \*-softmmu targets near the "--target-list" option. The
-# individual target constants are not documented here, for the time
-# being.
+# targets.  Run "./configure --help" in the project root directory,
+# and look for the \*-softmmu targets near the "--target-list" option.
+# The individual target constants are not documented here, for the
+# time being.
 #
 # @rx: since 5.0
+#
 # @avr: since 5.1
 #
-# Notes: The resulting QMP strings can be appended to the "qemu-system-"
-#        prefix to produce the corresponding QEMU executable name. This
-#        is true even for "qemu-system-x86_64".
+# Notes: The resulting QMP strings can be appended to the
+#     "qemu-system-" prefix to produce the corresponding QEMU
+#     executable name.  This is true even for "qemu-system-x86_64".
 #
 # Since: 3.0
 ##
 { 'enum' : 'SysEmuTarget',
-  'data' : [ 'aarch64', 'alpha', 'arm', 'avr', 'cris', 'hppa', 'i386', 'lm32',
-             'm68k', 'microblaze', 'microblazeel', 'mips', 'mips64',
-             'mips64el', 'mipsel', 'moxie', 'nios2', 'or1k', 'ppc',
+  'data' : [ 'aarch64', 'alpha', 'arm', 'avr', 'cris', 'hppa', 'i386',
+             'loongarch64', 'm68k', 'microblaze', 'microblazeel', 'mips', 'mips64',
+             'mips64el', 'mipsel', 'nios2', 'or1k', 'ppc',
              'ppc64', 'riscv32', 'riscv64', 'rx', 's390x', 'sh4',
-             'sh4eb', 'sparc', 'sparc64', 'tricore', 'unicore32',
+             'sh4eb', 'sparc', 'sparc64', 'tricore',
              'x86_64', 'xtensa', 'xtensaeb' ] }
 
-##
-# @CpuInfoArch:
-#
-# An enumeration of cpu types that enable additional information during
-# @query-cpus and @query-cpus-fast.
-#
-# @s390: since 2.12
-#
-# @riscv: since 2.12
-#
-# Since: 2.6
-##
-{ 'enum': 'CpuInfoArch',
-  'data': ['x86', 'sparc', 'ppc', 'mips', 'tricore', 's390', 'riscv', 'other' ] }
-
-##
-# @CpuInfo:
-#
-# Information about a virtual CPU
-#
-# @CPU: the index of the virtual CPU
-#
-# @current: this only exists for backwards compatibility and should be ignored
-#
-# @halted: true if the virtual CPU is in the halt state.  Halt usually refers
-#          to a processor specific low power mode.
-#
-# @qom_path: path to the CPU object in the QOM tree (since 2.4)
-#
-# @thread_id: ID of the underlying host thread
-#
-# @props: properties describing to which node/socket/core/thread
-#         virtual CPU belongs to, provided if supported by board (since 2.10)
-#
-# @arch: architecture of the cpu, which determines which additional fields
-#        will be listed (since 2.6)
-#
-# Since: 0.14.0
-#
-# Notes: @halted is a transient state that changes frequently.  By the time the
-#        data is sent to the client, the guest may no longer be halted.
-##
-{ 'union': 'CpuInfo',
-  'base': {'CPU': 'int', 'current': 'bool', 'halted': 'bool',
-           'qom_path': 'str', 'thread_id': 'int',
-           '*props': 'CpuInstanceProperties', 'arch': 'CpuInfoArch' },
-  'discriminator': 'arch',
-  'data': { 'x86': 'CpuInfoX86',
-            'sparc': 'CpuInfoSPARC',
-            'ppc': 'CpuInfoPPC',
-            'mips': 'CpuInfoMIPS',
-            'tricore': 'CpuInfoTricore',
-            's390': 'CpuInfoS390',
-            'riscv': 'CpuInfoRISCV' } }
-
-##
-# @CpuInfoX86:
-#
-# Additional information about a virtual i386 or x86_64 CPU
-#
-# @pc: the 64-bit instruction pointer
-#
-# Since: 2.6
-##
-{ 'struct': 'CpuInfoX86', 'data': { 'pc': 'int' } }
-
-##
-# @CpuInfoSPARC:
-#
-# Additional information about a virtual SPARC CPU
-#
-# @pc: the PC component of the instruction pointer
-#
-# @npc: the NPC component of the instruction pointer
-#
-# Since: 2.6
-##
-{ 'struct': 'CpuInfoSPARC', 'data': { 'pc': 'int', 'npc': 'int' } }
-
-##
-# @CpuInfoPPC:
-#
-# Additional information about a virtual PPC CPU
-#
-# @nip: the instruction pointer
-#
-# Since: 2.6
-##
-{ 'struct': 'CpuInfoPPC', 'data': { 'nip': 'int' } }
-
-##
-# @CpuInfoMIPS:
-#
-# Additional information about a virtual MIPS CPU
-#
-# @PC: the instruction pointer
-#
-# Since: 2.6
-##
-{ 'struct': 'CpuInfoMIPS', 'data': { 'PC': 'int' } }
-
-##
-# @CpuInfoTricore:
-#
-# Additional information about a virtual Tricore CPU
-#
-# @PC: the instruction pointer
-#
-# Since: 2.6
-##
-{ 'struct': 'CpuInfoTricore', 'data': { 'PC': 'int' } }
-
-##
-# @CpuInfoRISCV:
-#
-# Additional information about a virtual RISCV CPU
-#
-# @pc: the instruction pointer
-#
-# Since 2.12
-##
-{ 'struct': 'CpuInfoRISCV', 'data': { 'pc': 'int' } }
-
 ##
 # @CpuS390State:
 #
-# An enumeration of cpu states that can be assumed by a virtual
-# S390 CPU
+# An enumeration of cpu states that can be assumed by a virtual S390
+# CPU
 #
 # Since: 2.12
 ##
 #
 # @cpu-state: the virtual CPU's state
 #
-# Since: 2.12
-##
-{ 'struct': 'CpuInfoS390', 'data': { 'cpu-state': 'CpuS390State' } }
-
-##
-# @query-cpus:
-#
-# Returns a list of information about each virtual CPU.
-#
-# This command causes vCPU threads to exit to userspace, which causes
-# a small interruption to guest CPU execution. This will have a negative
-# impact on realtime guests and other latency sensitive guest workloads.
-#
-# Features:
-# @deprecated: This command is deprecated, because it interferes with
-#              the guest.  Use 'query-cpus-fast' instead to avoid the vCPU
-#              interruption.
-#
-# Returns: a list of @CpuInfo for each virtual CPU
+# @dedicated: the virtual CPU's dedication (since 8.2)
 #
-# Since: 0.14.0
-#
-# Example:
-#
-# -> { "execute": "query-cpus" }
-# <- { "return": [
-#          {
-#             "CPU":0,
-#             "current":true,
-#             "halted":false,
-#             "qom_path":"/machine/unattached/device[0]",
-#             "arch":"x86",
-#             "pc":3227107138,
-#             "thread_id":3134
-#          },
-#          {
-#             "CPU":1,
-#             "current":false,
-#             "halted":true,
-#             "qom_path":"/machine/unattached/device[2]",
-#             "arch":"x86",
-#             "pc":7108165,
-#             "thread_id":3135
-#          }
-#       ]
-#    }
+# @entitlement: the virtual CPU's entitlement (since 8.2)
 #
+# Since: 2.12
 ##
-{ 'command': 'query-cpus', 'returns': ['CpuInfo'],
-  'features': [ 'deprecated' ] }
+{ 'struct': 'CpuInfoS390',
+  'data': { 'cpu-state': 'CpuS390State',
+            '*dedicated': 'bool',
+            '*entitlement': 'CpuS390Entitlement' } }
 
 ##
 # @CpuInfoFast:
 #
 # @thread-id: ID of the underlying host thread
 #
-# @props: properties describing to which node/socket/core/thread
-#         virtual CPU belongs to, provided if supported by board
-#
-# @arch: base architecture of the cpu
+# @props: properties associated with a virtual CPU, e.g. the socket id
 #
 # @target: the QEMU system emulation target, which determines which
-#          additional fields will be listed (since 3.0)
-#
-# Features:
-# @deprecated: Member @arch is deprecated.  Use @target instead.
+#     additional fields will be listed (since 3.0)
 #
 # Since: 2.12
-#
 ##
 { 'union'         : 'CpuInfoFast',
   'base'          : { 'cpu-index'    : 'int',
                       'qom-path'     : 'str',
                       'thread-id'    : 'int',
                       '*props'       : 'CpuInstanceProperties',
-                      'arch'         : { 'type': 'CpuInfoArch',
-                                         'features': [ 'deprecated' ] },
                       'target'       : 'SysEmuTarget' },
   'discriminator' : 'target',
   'data'          : { 's390x'        : 'CpuInfoS390' } }
 ##
 # @query-cpus-fast:
 #
-# Returns information about all virtual CPUs. This command does not
-# incur a performance penalty and should be used in production
-# instead of query-cpus.
+# Returns information about all virtual CPUs.
 #
 # Returns: list of @CpuInfoFast
 #
 #                 "socket-id": 0
 #             },
 #             "qom-path": "/machine/unattached/device[0]",
-#             "arch":"x86",
 #             "target":"x86_64",
 #             "cpu-index": 0
 #         },
 #                 "socket-id": 1
 #             },
 #             "qom-path": "/machine/unattached/device[2]",
-#             "arch":"x86",
 #             "target":"x86_64",
 #             "cpu-index": 1
 #         }
 # @is-default: whether the machine is default
 #
 # @cpu-max: maximum number of CPUs supported by the machine type
-#           (since 1.5.0)
+#     (since 1.5)
 #
-# @hotpluggable-cpus: cpu hotplug via -device is supported (since 2.7.0)
+# @hotpluggable-cpus: cpu hotplug via -device is supported (since 2.7)
 #
 # @numa-mem-supported: true if '-numa node,mem' option is supported by
-#                      the machine type and false otherwise (since 4.1)
+#     the machine type and false otherwise (since 4.1)
 #
-# @deprecated: if true, the machine type is deprecated and may be removed
-#              in future versions of QEMU according to the QEMU deprecation
-#              policy (since 4.1.0)
+# @deprecated: if true, the machine type is deprecated and may be
+#     removed in future versions of QEMU according to the QEMU
+#     deprecation policy (since 4.1)
 #
-# @default-cpu-type: default CPU model typename if none is requested via
-#                    the -cpu argument. (since 4.2)
+# @default-cpu-type: default CPU model typename if none is requested
+#     via the -cpu argument.  (since 4.2)
 #
-# @default-ram-id: the default ID of initial RAM memory backend (since 5.2)
+# @default-ram-id: the default ID of initial RAM memory backend (since
+#     5.2)
 #
-# Since: 1.2.0
+# @acpi: machine type supports ACPI (since 8.0)
+#
+# Since: 1.2
 ##
 { 'struct': 'MachineInfo',
   'data': { 'name': 'str', '*alias': 'str',
             '*is-default': 'bool', 'cpu-max': 'int',
             'hotpluggable-cpus': 'bool',  'numa-mem-supported': 'bool',
             'deprecated': 'bool', '*default-cpu-type': 'str',
-            '*default-ram-id': 'str' } }
+            '*default-ram-id': 'str', 'acpi': 'bool' } }
 
 ##
 # @query-machines:
 #
 # Returns: a list of MachineInfo
 #
-# Since: 1.2.0
+# Since: 1.2
 ##
 { 'command': 'query-machines', 'returns': ['MachineInfo'] }
 
 # Information describing the running machine parameters.
 #
 # @wakeup-suspend-support: true if the machine supports wake up from
-#                          suspend
+#     suspend
 #
 # Since: 4.0
 ##
 #
 # @arch: the target architecture
 #
-# Since: 1.2.0
+# Since: 1.2
 ##
 { 'struct': 'TargetInfo',
   'data': { 'arch': 'SysEmuTarget' } }
 #
 # Returns: TargetInfo
 #
-# Since: 1.2.0
+# Since: 1.2
 ##
 { 'command': 'query-target', 'returns': 'TargetInfo' }
 
 #
 # @UUID: the UUID of the guest
 #
-# Since: 0.14.0
+# Since: 0.14
 #
-# Notes: If no UUID was specified for the guest, a null UUID is returned.
+# Notes: If no UUID was specified for the guest, a null UUID is
+#     returned.
 ##
 { 'struct': 'UuidInfo', 'data': {'UUID': 'str'} }
 
 #
 # Returns: The @UuidInfo for the guest
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "query-uuid" }
 # <- { "return": { "UUID": "550e8400-e29b-41d4-a716-446655440000" } }
-#
 ##
 { 'command': 'query-uuid', 'returns': 'UuidInfo', 'allow-preconfig': true }
 
 #
 # Performs a hard reset of a guest.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "system_reset" }
 # <- { "return": {} }
-#
 ##
 { 'command': 'system_reset' }
 
 #
 # Requests that a guest perform a powerdown operation.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Notes: A guest may or may not respond to this command.  This command
-#        returning does not indicate that a guest has accepted the request or
-#        that it has shut down.  Many guests will respond to this command by
-#        prompting the user in some way.
+#     returning does not indicate that a guest has accepted the
+#     request or that it has shut down.  Many guests will respond to
+#     this command by prompting the user in some way.
+#
 # Example:
 #
 # -> { "execute": "system_powerdown" }
 # <- { "return": {} }
-#
 ##
 { 'command': 'system_powerdown' }
 
 ##
 # @system_wakeup:
 #
-# Wake up guest from suspend. If the guest has wake-up from suspend
+# Wake up guest from suspend.  If the guest has wake-up from suspend
 # support enabled (wakeup-suspend-support flag from
 # query-current-machine), wake-up guest from suspend if the guest is
-# in SUSPENDED state. Return an error otherwise.
+# in SUSPENDED state.  Return an error otherwise.
 #
-# Since:  1.1
+# Since: 1.1
 #
-# Returns:  nothing.
+# Returns: nothing.
 #
 # Note: prior to 4.0, this command does nothing in case the guest
-#       isn't suspended.
+#     isn't suspended.
 #
 # Example:
 #
 # -> { "execute": "system_wakeup" }
 # <- { "return": {} }
-#
 ##
 { 'command': 'system_wakeup' }
 
 ##
 # @LostTickPolicy:
 #
-# Policy for handling lost ticks in timer devices.  Ticks end up getting
-# lost when, for example, the guest is paused.
-#
-# @discard: throw away the missed ticks and continue with future injection
-#           normally.  The guest OS will see the timer jump ahead by a
-#           potentially quite significant amount all at once, as if the
-#           intervening chunk of time had simply not existed; needless to
-#           say, such a sudden jump can easily confuse a guest OS which is
-#           not specifically prepared to deal with it.  Assuming the guest
-#           OS can deal correctly with the time jump, the time in the guest
-#           and in the host should now match.
-#
-# @delay: continue to deliver ticks at the normal rate.  The guest OS will
-#         not notice anything is amiss, as from its point of view time will
-#         have continued to flow normally.  The time in the guest should now
-#         be behind the time in the host by exactly the amount of time during
-#         which ticks have been missed.
-#
-# @slew: deliver ticks at a higher rate to catch up with the missed ticks.
-#        The guest OS will not notice anything is amiss, as from its point
-#        of view time will have continued to flow normally.  Once the timer
-#        has managed to catch up with all the missing ticks, the time in
-#        the guest and in the host should match.
+# Policy for handling lost ticks in timer devices.  Ticks end up
+# getting lost when, for example, the guest is paused.
+#
+# @discard: throw away the missed ticks and continue with future
+#     injection normally.  The guest OS will see the timer jump ahead
+#     by a potentially quite significant amount all at once, as if the
+#     intervening chunk of time had simply not existed; needless to
+#     say, such a sudden jump can easily confuse a guest OS which is
+#     not specifically prepared to deal with it.  Assuming the guest
+#     OS can deal correctly with the time jump, the time in the guest
+#     and in the host should now match.
+#
+# @delay: continue to deliver ticks at the normal rate.  The guest OS
+#     will not notice anything is amiss, as from its point of view
+#     time will have continued to flow normally.  The time in the
+#     guest should now be behind the time in the host by exactly the
+#     amount of time during which ticks have been missed.
+#
+# @slew: deliver ticks at a higher rate to catch up with the missed
+#     ticks.  The guest OS will not notice anything is amiss, as from
+#     its point of view time will have continued to flow normally.
+#     Once the timer has managed to catch up with all the missing
+#     ticks, the time in the guest and in the host should match.
 #
 # Since: 2.0
 ##
 ##
 # @inject-nmi:
 #
-# Injects a Non-Maskable Interrupt into the default CPU (x86/s390) or all CPUs (ppc64).
-# The command fails when the guest doesn't support injecting.
+# Injects a Non-Maskable Interrupt into the default CPU (x86/s390) or
+# all CPUs (ppc64). The command fails when the guest doesn't support
+# injecting.
 #
-# Returns:  If successful, nothing
+# Returns: If successful, nothing
 #
-# Since:  0.14.0
+# Since: 0.14
 #
-# Note: prior to 2.1, this command was only supported for x86 and s390 VMs
+# Note: prior to 2.1, this command was only supported for x86 and s390
+#     VMs
 #
 # Example:
 #
 # -> { "execute": "inject-nmi" }
 # <- { "return": {} }
-#
 ##
 { 'command': 'inject-nmi' }
 
 #
 # @present: true if KVM acceleration is built into this executable
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'KvmInfo', 'data': {'enabled': 'bool', 'present': 'bool'} }
 
 #
 # Returns: @KvmInfo
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "query-kvm" }
 # <- { "return": { "enabled": true, "present": true } }
-#
 ##
 { 'command': 'query-kvm', 'returns': 'KvmInfo' }
 
 ##
 # @NumaOptions:
 #
-# A discriminated record of NUMA options. (for OptsVisitor)
+# A discriminated record of NUMA options.  (for OptsVisitor)
 #
 # Since: 2.1
 ##
 ##
 # @NumaNodeOptions:
 #
-# Create a guest NUMA node. (for OptsVisitor)
+# Create a guest NUMA node.  (for OptsVisitor)
 #
 # @nodeid: NUMA node ID (increase by 1 from 0 if omitted)
 #
-# @cpus: VCPUs belonging to this node (assign VCPUS round-robin
-#         if omitted)
+# @cpus: VCPUs belonging to this node (assign VCPUS round-robin if
+#     omitted)
 #
 # @mem: memory size of this node; mutually exclusive with @memdev.
-#       Equally divide total memory among nodes if both @mem and @memdev are
-#       omitted.
+#     Equally divide total memory among nodes if both @mem and @memdev
+#     are omitted.
 #
-# @memdev: memory backend object.  If specified for one node,
-#          it must be specified for all nodes.
+# @memdev: memory backend object.  If specified for one node, it must
+#     be specified for all nodes.
 #
-# @initiator: defined in ACPI 6.3 Chapter 5.2.27.3 Table 5-145,
-#             points to the nodeid which has the memory controller
-#             responsible for this NUMA node. This field provides
-#             additional information as to the initiator node that
-#             is closest (as in directly attached) to this node, and
-#             therefore has the best performance (since 5.0)
+# @initiator: defined in ACPI 6.3 Chapter 5.2.27.3 Table 5-145, points
+#     to the nodeid which has the memory controller responsible for
+#     this NUMA node.  This field provides additional information as
+#     to the initiator node that is closest (as in directly attached)
+#     to this node, and therefore has the best performance (since 5.0)
 #
 # Since: 2.1
 ##
 #
 # @dst: destination NUMA node.
 #
-# @val: NUMA distance from source node to destination node.
-#       When a node is unreachable from another node, set the distance
-#       between them to 255.
+# @val: NUMA distance from source node to destination node.  When a
+#     node is unreachable from another node, set the distance between
+#     them to 255.
 #
 # Since: 2.10
 ##
    'dst': 'uint16',
    'val': 'uint8' }}
 
+##
+# @CXLFixedMemoryWindowOptions:
+#
+# Create a CXL Fixed Memory Window
+#
+# @size: Size of the Fixed Memory Window in bytes.  Must be a multiple
+#     of 256MiB.
+#
+# @interleave-granularity: Number of contiguous bytes for which
+#     accesses will go to a given interleave target.  Accepted values
+#     [256, 512, 1k, 2k, 4k, 8k, 16k]
+#
+# @targets: Target root bridge IDs from -device ...,id=<ID> for each
+#     root bridge.
+#
+# Since: 7.1
+##
+{ 'struct': 'CXLFixedMemoryWindowOptions',
+  'data': {
+      'size': 'size',
+      '*interleave-granularity': 'size',
+      'targets': ['str'] }}
+
+##
+# @CXLFMWProperties:
+#
+# List of CXL Fixed Memory Windows.
+#
+# @cxl-fmw: List of CXLFixedMemoryWindowOptions
+#
+# Since: 7.1
+##
+{ 'struct' : 'CXLFMWProperties',
+  'data': { 'cxl-fmw': ['CXLFixedMemoryWindowOptions'] }
+}
+
 ##
 # @X86CPURegister32:
 #
 #
 # Information about a X86 CPU feature word
 #
-# @cpuid-input-eax: Input EAX value for CPUID instruction for that feature word
+# @cpuid-input-eax: Input EAX value for CPUID instruction for that
+#     feature word
 #
 # @cpuid-input-ecx: Input ECX value for CPUID instruction for that
-#                   feature word
+#     feature word
 #
 # @cpuid-register: Output register containing the feature bits
 #
 ##
 # @DummyForceArrays:
 #
-# Not used by QMP; hack to let us use X86CPUFeatureWordInfoList internally
+# Not used by QMP; hack to let us use X86CPUFeatureWordInfoList
+# internally
 #
 # Since: 2.5
 ##
 ##
 # @NumaCpuOptions:
 #
-# Option "-numa cpu" overrides default cpu to node mapping.
-# It accepts the same set of cpu properties as returned by
+# Option "-numa cpu" overrides default cpu to node mapping.  It
+# accepts the same set of cpu properties as returned by
 # query-hotpluggable-cpus[].props, where node-id could be used to
 # override default node mapping.
 #
 ##
 # @HmatLBDataType:
 #
-# Data type in the System Locality Latency and Bandwidth
-# Information Structure of HMAT (Heterogeneous Memory Attribute Table)
+# Data type in the System Locality Latency and Bandwidth Information
+# Structure of HMAT (Heterogeneous Memory Attribute Table)
 #
-# For more information about @HmatLBDataType, see chapter
-# 5.2.27.4: Table 5-146:  Field "Data Type" of ACPI 6.3 spec.
+# For more information about @HmatLBDataType, see chapter 5.2.27.4:
+# Table 5-146:  Field "Data Type" of ACPI 6.3 spec.
 #
 # @access-latency: access latency (nanoseconds)
 #
 ##
 # @NumaHmatLBOptions:
 #
-# Set the system locality latency and bandwidth information
-# between Initiator and Target proximity Domains.
+# Set the system locality latency and bandwidth information between
+# Initiator and Target proximity Domains.
 #
-# For more information about @NumaHmatLBOptions, see chapter
-# 5.2.27.4: Table 5-146 of ACPI 6.3 spec.
+# For more information about @NumaHmatLBOptions, see chapter 5.2.27.4:
+# Table 5-146 of ACPI 6.3 spec.
 #
 # @initiator: the Initiator Proximity Domain.
 #
 # @target: the Target Proximity Domain.
 #
-# @hierarchy: the Memory Hierarchy. Indicates the performance
-#             of memory or side cache.
+# @hierarchy: the Memory Hierarchy.  Indicates the performance of
+#     memory or side cache.
 #
-# @data-type: presents the type of data, access/read/write
-#             latency or hit latency.
+# @data-type: presents the type of data, access/read/write latency or
+#     hit latency.
 #
-# @latency: the value of latency from @initiator to @target
-#           proximity domain, the latency unit is "ns(nanosecond)".
+# @latency: the value of latency from @initiator to @target proximity
+#     domain, the latency unit is "ns(nanosecond)".
 #
 # @bandwidth: the value of bandwidth between @initiator and @target
-#             proximity domain, the bandwidth unit is
-#             "Bytes per second".
+#     proximity domain, the bandwidth unit is "Bytes per second".
 #
 # Since: 5.0
 ##
 # For more information of @HmatCacheAssociativity, see chapter
 # 5.2.27.5: Table 5-147 of ACPI 6.3 spec.
 #
-# @none: None (no memory side cache in this proximity domain,
-#              or cache associativity unknown)
+# @none: None (no memory side cache in this proximity domain, or cache
+#     associativity unknown)
 #
 # @direct: Direct Mapped
 #
 ##
 # @HmatCacheWritePolicy:
 #
-# Cache write policy in the Memory Side Cache Information Structure
-# of HMAT
+# Cache write policy in the Memory Side Cache Information Structure of
+# HMAT
 #
-# For more information of @HmatCacheWritePolicy, see chapter
-# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
+# For more information of @HmatCacheWritePolicy, see chapter 5.2.27.5:
+# Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
 #
-# @none: None (no memory side cache in this proximity domain,
-#        or cache write policy unknown)
+# @none: None (no memory side cache in this proximity domain, or cache
+#     write policy unknown)
 #
 # @write-back: Write Back (WB)
 #
 #
 # Set the memory side cache information for a given memory domain.
 #
-# For more information of @NumaHmatCacheOptions, see chapter
-# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
+# For more information of @NumaHmatCacheOptions, see chapter 5.2.27.5:
+# Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
 #
 # @node-id: the memory proximity domain to which the memory belongs.
 #
 # @level: the cache level described in this structure.
 #
 # @associativity: the cache associativity,
-#                 none/direct-mapped/complex(complex cache indexing).
+#     none/direct-mapped/complex(complex cache indexing).
 #
 # @policy: the write policy, none/write-back/write-through.
 #
    'policy': 'HmatCacheWritePolicy',
    'line': 'uint16' }}
 
-##
-# @HostMemPolicy:
-#
-# Host memory policy types
-#
-# @default: restore default policy, remove any nondefault policy
-#
-# @preferred: set the preferred host nodes for allocation
-#
-# @bind: a strict policy that restricts memory allocation to the
-#        host nodes specified
-#
-# @interleave: memory allocations are interleaved across the set
-#              of host nodes specified
-#
-# Since: 2.1
-##
-{ 'enum': 'HostMemPolicy',
-  'data': [ 'default', 'preferred', 'bind', 'interleave' ] }
-
 ##
 # @memsave:
 #
 # @filename: the file to save the memory to as binary data
 #
 # @cpu-index: the index of the virtual CPU to use for translating the
-#             virtual address (defaults to CPU 0)
+#     virtual address (defaults to CPU 0)
 #
 # Returns: Nothing on success
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Notes: Errors were not reliably returned until 1.1
 #
 #                     "size": 100,
 #                     "filename": "/tmp/virtual-mem-dump" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'memsave',
   'data': {'val': 'int', 'size': 'int', 'filename': 'str', '*cpu-index': 'int'} }
 #
 # Returns: Nothing on success
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Notes: Errors were not reliably returned until 1.1
 #
 #                     "size": 100,
 #                     "filename": "/tmp/physical-mem-dump" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'pmemsave',
   'data': {'val': 'int', 'size': 'int', 'filename': 'str'} }
 #
 # @size: memory backend size
 #
-# @merge: enables or disables memory merge support
+# @merge: whether memory merge support is enabled
+#
+# @dump: whether memory backend's memory is included in a core dump
 #
-# @dump: includes memory backend's memory in a core dump or not
+# @prealloc: whether memory was preallocated
 #
-# @prealloc: enables or disables memory preallocation
+# @share: whether memory is private to QEMU or shared (since 6.1)
+#
+# @reserve: whether swap space (or huge pages) was reserved if
+#     applicable.  This corresponds to the user configuration and not
+#     the actual behavior implemented in the OS to perform the
+#     reservation.  For example, Linux will never reserve swap space
+#     for shared file mappings.  (since 6.1)
 #
 # @host-nodes: host nodes for its memory policy
 #
     'merge':      'bool',
     'dump':       'bool',
     'prealloc':   'bool',
+    'share':      'bool',
+    '*reserve':    'bool',
     'host-nodes': ['uint16'],
     'policy':     'HostMemPolicy' }}
 
 #          "merge": false,
 #          "dump": true,
 #          "prealloc": false,
+#          "share": false,
 #          "host-nodes": [0, 1],
 #          "policy": "bind"
 #        },
 #          "merge": false,
 #          "dump": true,
 #          "prealloc": true,
+#          "share": false,
 #          "host-nodes": [2, 3],
 #          "policy": "preferred"
 #        }
 #      ]
 #    }
-#
 ##
 { 'command': 'query-memdev', 'returns': ['Memdev'], 'allow-preconfig': true }
 
 ##
 # @CpuInstanceProperties:
 #
-# List of properties to be used for hotplugging a CPU instance,
-# it should be passed by management with device_add command when
-# a CPU is being hotplugged.
+# List of properties to be used for hotplugging a CPU instance, it
+# should be passed by management with device_add command when a CPU is
+# being hotplugged.
+#
+# Which members are optional and which mandatory depends on the
+# architecture and board.
+#
+# For s390x see :ref:`cpu-topology-s390x`.
+#
+# The ids other than the node-id specify the position of the CPU
+# within the CPU topology (as defined by the machine property "smp",
+# thus see also type @SMPConfiguration)
 #
 # @node-id: NUMA node ID the CPU belongs to
-# @socket-id: socket number within node/board the CPU belongs to
-# @die-id: die number within node/board the CPU belongs to (Since 4.1)
-# @core-id: core number within die the CPU belongs to
-# @thread-id: thread number within core the CPU belongs to
 #
-# Note: currently there are 5 properties that could be present
-#       but management should be prepared to pass through other
-#       properties with device_add command to allow for future
-#       interface extension. This also requires the filed names to be kept in
-#       sync with the properties passed to -device/device_add.
+# @drawer-id: drawer number within CPU topology the CPU belongs to
+#     (since 8.2)
+#
+# @book-id: book number within parent container the CPU belongs to
+#     (since 8.2)
+#
+# @socket-id: socket number within parent container the CPU belongs to
+#
+# @die-id: die number within the parent container the CPU belongs to
+#    (since 4.1)
+#
+# @cluster-id: cluster number within the parent container the CPU
+#     belongs to (since 7.1)
+#
+# @core-id: core number within the parent container the CPU
+#     belongs to
+#
+# @thread-id: thread number within the core the CPU  belongs to
+#
+# Note: management should be prepared to pass through additional
+#     properties with device_add.
 #
 # Since: 2.7
 ##
 { 'struct': 'CpuInstanceProperties',
+  # Keep these in sync with the properties device_add accepts
   'data': { '*node-id': 'int',
+            '*drawer-id': 'int',
+            '*book-id': 'int',
             '*socket-id': 'int',
             '*die-id': 'int',
+            '*cluster-id': 'int',
             '*core-id': 'int',
             '*thread-id': 'int'
   }
 # @HotpluggableCPU:
 #
 # @type: CPU object type for usage with device_add command
+#
 # @props: list of properties to be used for hotplugging CPU
-# @vcpus-count: number of logical VCPU threads @HotpluggableCPU provides
-# @qom-path: link to existing CPU object if CPU is present or
-#            omitted if CPU is not present.
+#
+# @vcpus-count: number of logical VCPU threads @HotpluggableCPU
+#     provides
+#
+# @qom-path: link to existing CPU object if CPU is present or omitted
+#     if CPU is not present.
 #
 # Since: 2.7
 ##
 #
 # Since: 2.7
 #
-# Example:
+# Examples:
 #
-# For pseries machine type started with -smp 2,cores=2,maxcpus=4 -cpu POWER8:
+# For pseries machine type started with -smp 2,cores=2,maxcpus=4 -cpu
+# POWER8:
 #
 # -> { "execute": "query-hotpluggable-cpus" }
 # <- {"return": [
-#      { "props": { "core": 8 }, "type": "POWER8-spapr-cpu-core",
+#      { "props": { "core-id": 8 }, "type": "POWER8-spapr-cpu-core",
 #        "vcpus-count": 1 },
-#      { "props": { "core": 0 }, "type": "POWER8-spapr-cpu-core",
+#      { "props": { "core-id": 0 }, "type": "POWER8-spapr-cpu-core",
 #        "vcpus-count": 1, "qom-path": "/machine/unattached/device[0]"}
 #    ]}'
 #
 #      }
 #    ]}
 #
-# For s390x-virtio-ccw machine type started with -smp 1,maxcpus=2 -cpu qemu
-# (Since: 2.11):
+# For s390x-virtio-ccw machine type started with -smp 1,maxcpus=2 -cpu
+# qemu (Since: 2.11):
 #
 # -> { "execute": "query-hotpluggable-cpus" }
 # <- {"return": [
 #         "props": { "core-id": 0 }
 #      }
 #    ]}
-#
 ##
 { 'command': 'query-hotpluggable-cpus', 'returns': ['HotpluggableCPU'],
              'allow-preconfig': true }
 ##
 # @set-numa-node:
 #
-# Runtime equivalent of '-numa' CLI option, available at
-# preconfigure stage to configure numa mapping before initializing
-# machine.
+# Runtime equivalent of '-numa' CLI option, available at preconfigure
+# stage to configure numa mapping before initializing machine.
 #
-# Since 3.0
+# Since: 3.0
 ##
 { 'command': 'set-numa-node', 'boxed': true,
   'data': 'NumaOptions',
 #
 # Request the balloon driver to change its balloon size.
 #
-# @value: the target logical size of the VM in bytes.
-#         We can deduce the size of the balloon using this formula:
+# @value: the target logical size of the VM in bytes.  We can deduce
+#     the size of the balloon using this formula:
 #
-#            logical_vm_size = vm_ram_size - balloon_size
+#        logical_vm_size = vm_ram_size - balloon_size
 #
-#         From it we have: balloon_size = vm_ram_size - @value
+#     From it we have: balloon_size = vm_ram_size - @value
 #
-# Returns: - Nothing on success
-#          - If the balloon driver is enabled but not functional because the KVM
-#            kernel module cannot support it, KvmMissingCap
-#          - If no balloon device is present, DeviceNotActive
+# Returns:
+# - Nothing on success
+# - If the balloon driver is enabled but not functional because the
+#   KVM kernel module cannot support it, KVMMissingCap
+# - If no balloon device is present, DeviceNotActive
 #
-# Notes: This command just issues a request to the guest.  When it returns,
-#        the balloon size may not have changed.  A guest can change the balloon
-#        size independent of this command.
+# Notes: This command just issues a request to the guest.  When it
+#     returns, the balloon size may not have changed.  A guest can
+#     change the balloon size independent of this command.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # <- { "return": {} }
 #
 # With a 2.5GiB guest this command inflated the ballon to 3GiB.
-#
 ##
 { 'command': 'balloon', 'data': {'value': 'int'} }
 
 #
 # Information about the guest balloon device.
 #
-# @actual: the logical size of the VM in bytes
-#          Formula used: logical_vm_size = vm_ram_size - balloon_size
-#
-# Since: 0.14.0
+# @actual: the logical size of the VM in bytes Formula used:
+#     logical_vm_size = vm_ram_size - balloon_size
 #
+# Since: 0.14
 ##
 { 'struct': 'BalloonInfo', 'data': {'actual': 'int' } }
 
 #
 # Return information about the balloon device.
 #
-# Returns: - @BalloonInfo on success
-#          - If the balloon driver is enabled but not functional because the KVM
-#            kernel module cannot support it, KvmMissingCap
-#          - If no balloon device is present, DeviceNotActive
+# Returns:
+# - @BalloonInfo on success
+# - If the balloon driver is enabled but not functional because the
+#   KVM kernel module cannot support it, KVMMissingCap
+# - If no balloon device is present, DeviceNotActive
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "query-balloon" }
 # <- { "return": {
-#          "actual": 1073741824,
+#          "actual": 1073741824
 #       }
 #    }
-#
 ##
 { 'command': 'query-balloon', 'returns': 'BalloonInfo' }
 
 ##
 # @BALLOON_CHANGE:
 #
-# Emitted when the guest changes the actual BALLOON level. This value is
-# equivalent to the @actual field return by the 'query-balloon' command
+# Emitted when the guest changes the actual BALLOON level.  This value
+# is equivalent to the @actual field return by the 'query-balloon'
+# command
 #
-# @actual: the logical size of the VM in bytes
-#          Formula used: logical_vm_size = vm_ram_size - balloon_size
+# @actual: the logical size of the VM in bytes Formula used:
+#     logical_vm_size = vm_ram_size - balloon_size
 #
 # Note: this event is rate-limited.
 #
 # <- { "event": "BALLOON_CHANGE",
 #      "data": { "actual": 944766976 },
 #      "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
-#
 ##
 { 'event': 'BALLOON_CHANGE',
   'data': { 'actual': 'int' } }
 
+##
+# @HvBalloonInfo:
+#
+# hv-balloon guest-provided memory status information.
+#
+# @committed: the amount of memory in use inside the guest plus the
+#     amount of the memory unusable inside the guest (ballooned out,
+#     offline, etc.)
+#
+# @available: the amount of the memory inside the guest available for
+#     new allocations ("free")
+#
+# Since: 8.2
+##
+{ 'struct': 'HvBalloonInfo',
+  'data': { 'committed': 'size', 'available': 'size' } }
+
+##
+# @query-hv-balloon-status-report:
+#
+# Returns the hv-balloon driver data contained in the last received "STATUS"
+# message from the guest.
+#
+# Returns:
+# - @HvBalloonInfo on success
+# - If no hv-balloon device is present, guest memory status reporting
+#   is not enabled or no guest memory status report received yet,
+#   GenericError
+#
+# Since: 8.2
+#
+# Example:
+#
+# -> { "execute": "query-hv-balloon-status-report" }
+# <- { "return": {
+#          "committed": 816640000,
+#          "available": 3333054464
+#       }
+#    }
+##
+{ 'command': 'query-hv-balloon-status-report', 'returns': 'HvBalloonInfo' }
+
+##
+# @HV_BALLOON_STATUS_REPORT:
+#
+# Emitted when the hv-balloon driver receives a "STATUS" message from
+# the guest.
+#
+# Note: this event is rate-limited.
+#
+# Since: 8.2
+#
+# Example:
+#
+# <- { "event": "HV_BALLOON_STATUS_REPORT",
+#      "data": { "committed": 816640000, "available": 3333054464 },
+#      "timestamp": { "seconds": 1600295492, "microseconds": 661044 } }
+#
+##
+{ 'event': 'HV_BALLOON_STATUS_REPORT',
+  'data': 'HvBalloonInfo' }
+
 ##
 # @MemoryInfo:
 #
 # Actual memory information in bytes.
 #
 # @base-memory: size of "base" memory specified with command line
-#               option -m.
+#     option -m.
 #
-# @plugged-memory: size of memory that can be hot-unplugged. This field
-#                  is omitted if target doesn't support memory hotplug
-#                  (i.e. CONFIG_MEM_DEVICE not defined at build time).
+# @plugged-memory: size of memory that can be hot-unplugged.  This
+#     field is omitted if target doesn't support memory hotplug (i.e.
+#     CONFIG_MEM_DEVICE not defined at build time).
 #
-# Since: 2.11.0
+# Since: 2.11
 ##
 { 'struct': 'MemoryInfo',
   'data'  : { 'base-memory': 'size', '*plugged-memory': 'size' } }
 ##
 # @query-memory-size-summary:
 #
-# Return the amount of initially allocated and present hotpluggable (if
-# enabled) memory in bytes.
+# Return the amount of initially allocated and present hotpluggable
+# (if enabled) memory in bytes.
 #
 # Example:
 #
 # -> { "execute": "query-memory-size-summary" }
 # <- { "return": { "base-memory": 4294967296, "plugged-memory": 0 } }
 #
-# Since: 2.11.0
+# Since: 2.11
 ##
 { 'command': 'query-memory-size-summary', 'returns': 'MemoryInfo' }
 
 #
 # @hotplugged: true if device was hotplugged
 #
-# @hotpluggable: true if device if could be added/removed while machine is running
+# @hotpluggable: true if device if could be added/removed while
+#     machine is running
 #
 # Since: 2.1
 ##
           }
 }
 
+##
+# @SgxEPCDeviceInfo:
+#
+# Sgx EPC state information
+#
+# @id: device's ID
+#
+# @memaddr: physical address in memory, where device is mapped
+#
+# @size: size of memory that the device provides
+#
+# @memdev: memory backend linked with device
+#
+# @node: the numa node (Since: 7.0)
+#
+# Since: 6.2
+##
+{ 'struct': 'SgxEPCDeviceInfo',
+  'data': { '*id': 'str',
+            'memaddr': 'size',
+            'size': 'size',
+            'node': 'int',
+            'memdev': 'str'
+          }
+}
+
+##
+# @HvBalloonDeviceInfo:
+#
+# hv-balloon provided memory state information
+#
+# @id: device's ID
+#
+# @memaddr: physical address in memory, where device is mapped
+#
+# @max-size: the maximum size of memory that the device can provide
+#
+# @memdev: memory backend linked with device
+#
+# Since: 8.2
+##
+{ 'struct': 'HvBalloonDeviceInfo',
+  'data': { '*id': 'str',
+            '*memaddr': 'size',
+            'max-size': 'size',
+            '*memdev': 'str'
+          }
+}
+
+##
+# @MemoryDeviceInfoKind:
+#
+# @nvdimm: since 2.12
+#
+# @virtio-pmem: since 4.1
+#
+# @virtio-mem: since 5.1
+#
+# @sgx-epc: since 6.2.
+#
+# @hv-balloon: since 8.2.
+#
+# Since: 2.1
+##
+{ 'enum': 'MemoryDeviceInfoKind',
+  'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc',
+            'hv-balloon' ] }
+
+##
+# @PCDIMMDeviceInfoWrapper:
+#
+# Since: 2.1
+##
+{ 'struct': 'PCDIMMDeviceInfoWrapper',
+  'data': { 'data': 'PCDIMMDeviceInfo' } }
+
+##
+# @VirtioPMEMDeviceInfoWrapper:
+#
+# Since: 2.1
+##
+{ 'struct': 'VirtioPMEMDeviceInfoWrapper',
+  'data': { 'data': 'VirtioPMEMDeviceInfo' } }
+
+##
+# @VirtioMEMDeviceInfoWrapper:
+#
+# Since: 2.1
+##
+{ 'struct': 'VirtioMEMDeviceInfoWrapper',
+  'data': { 'data': 'VirtioMEMDeviceInfo' } }
+
+##
+# @SgxEPCDeviceInfoWrapper:
+#
+# Since: 6.2
+##
+{ 'struct': 'SgxEPCDeviceInfoWrapper',
+  'data': { 'data': 'SgxEPCDeviceInfo' } }
+
+##
+# @HvBalloonDeviceInfoWrapper:
+#
+# Since: 8.2
+##
+{ 'struct': 'HvBalloonDeviceInfoWrapper',
+  'data': { 'data': 'HvBalloonDeviceInfo' } }
+
 ##
 # @MemoryDeviceInfo:
 #
 # Union containing information about a memory device
 #
-# nvdimm is included since 2.12. virtio-pmem is included since 4.1.
-# virtio-mem is included since 5.1.
-#
 # Since: 2.1
 ##
 { 'union': 'MemoryDeviceInfo',
-  'data': { 'dimm': 'PCDIMMDeviceInfo',
-            'nvdimm': 'PCDIMMDeviceInfo',
-            'virtio-pmem': 'VirtioPMEMDeviceInfo',
-            'virtio-mem': 'VirtioMEMDeviceInfo'
+  'base': { 'type': 'MemoryDeviceInfoKind' },
+  'discriminator': 'type',
+  'data': { 'dimm': 'PCDIMMDeviceInfoWrapper',
+            'nvdimm': 'PCDIMMDeviceInfoWrapper',
+            'virtio-pmem': 'VirtioPMEMDeviceInfoWrapper',
+            'virtio-mem': 'VirtioMEMDeviceInfoWrapper',
+            'sgx-epc': 'SgxEPCDeviceInfoWrapper',
+            'hv-balloon': 'HvBalloonDeviceInfoWrapper'
+          }
+}
+
+##
+# @SgxEPC:
+#
+# Sgx EPC cmdline information
+#
+# @memdev: memory backend linked with device
+#
+# @node: the numa node (Since: 7.0)
+#
+# Since: 6.2
+##
+{ 'struct': 'SgxEPC',
+  'data': { 'memdev': 'str',
+            'node': 'int'
           }
 }
 
+##
+# @SgxEPCProperties:
+#
+# SGX properties of machine types.
+#
+# @sgx-epc: list of ids of memory-backend-epc objects.
+#
+# Since: 6.2
+##
+{ 'struct': 'SgxEPCProperties',
+  'data': { 'sgx-epc': ['SgxEPC'] }
+}
+
 ##
 # @query-memory-devices:
 #
 #                         "slot": 0},
 #                    "type": "dimm"
 #                  } ] }
-#
 ##
 { 'command': 'query-memory-devices', 'returns': ['MemoryDeviceInfo'] }
 
 ##
 # @MEMORY_DEVICE_SIZE_CHANGE:
 #
-# Emitted when the size of a memory device changes. Only emitted for memory
-# devices that can actually change the size (e.g., virtio-mem due to guest
-# action).
+# Emitted when the size of a memory device changes.  Only emitted for
+# memory devices that can actually change the size (e.g., virtio-mem
+# due to guest action).
 #
 # @id: device's ID
+#
 # @size: the new size of memory that the device provides
 #
+# @qom-path: path to the device object in the QOM tree (since 6.2)
+#
 # Note: this event is rate-limited.
 #
 # Since: 5.1
 # Example:
 #
 # <- { "event": "MEMORY_DEVICE_SIZE_CHANGE",
-#      "data": { "id": "vm0", "size": 1073741824},
+#      "data": { "id": "vm0", "size": 1073741824,
+#                "qom-path": "/machine/unattached/device[2]" },
 #      "timestamp": { "seconds": 1588168529, "microseconds": 201316 } }
-#
 ##
 { 'event': 'MEMORY_DEVICE_SIZE_CHANGE',
-  'data': { '*id': 'str', 'size': 'size' } }
-
+  'data': { '*id': 'str', 'size': 'size', 'qom-path' : 'str'} }
 
 ##
 # @MEM_UNPLUG_ERROR:
 #
 # @msg: Informative message
 #
+# Features:
+#
+# @deprecated: This event is deprecated.  Use
+#     @DEVICE_UNPLUG_GUEST_ERROR instead.
+#
 # Since: 2.4
 #
 # Example:
 #
-# <- { "event": "MEM_UNPLUG_ERROR"
+# <- { "event": "MEM_UNPLUG_ERROR",
 #      "data": { "device": "dimm1",
 #                "msg": "acpi: device unplug for unsupported device"
 #      },
 #      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
-#
 ##
 { 'event': 'MEM_UNPLUG_ERROR',
-  'data': { 'device': 'str', 'msg': 'str' } }
+  'data': { 'device': 'str', 'msg': 'str' },
+  'features': ['deprecated'] }
+
+##
+# @BootConfiguration:
+#
+# Schema for virtual machine boot configuration.
+#
+# @order: Boot order (a=floppy, c=hard disk, d=CD-ROM, n=network)
+#
+# @once: Boot order to apply on first boot
+#
+# @menu: Whether to show a boot menu
+#
+# @splash: The name of the file to be passed to the firmware as logo
+#     picture, if @menu is true.
+#
+# @splash-time: How long to show the logo picture, in milliseconds
+#
+# @reboot-timeout: Timeout before guest reboots after boot fails
+#
+# @strict: Whether to attempt booting from devices not included in the
+#     boot order
+#
+# Since: 7.1
+##
+{ 'struct': 'BootConfiguration', 'data': {
+     '*order': 'str',
+     '*once': 'str',
+     '*menu': 'bool',
+     '*splash': 'str',
+     '*splash-time': 'int',
+     '*reboot-timeout': 'int',
+     '*strict': 'bool' } }
+
+##
+# @SMPConfiguration:
+#
+# Schema for CPU topology configuration.  A missing value lets QEMU
+# figure out a suitable value based on the ones that are provided.
+#
+# The members other than @cpus and @maxcpus define a topology of
+# containers.
+#
+# The ordering from highest/coarsest to lowest/finest is:
+# @drawers, @books, @sockets, @dies, @clusters, @cores, @threads.
+#
+# Different architectures support different subsets of topology
+# containers.
+#
+# For example, s390x does not have clusters and dies, and the socket
+# is the parent container of cores.
+#
+# @cpus: number of virtual CPUs in the virtual machine
+#
+# @maxcpus: maximum number of hotpluggable virtual CPUs in the virtual
+#     machine
+#
+# @drawers: number of drawers in the CPU topology (since 8.2)
+#
+# @books: number of books in the CPU topology (since 8.2)
+#
+# @sockets: number of sockets per parent container
+#
+# @dies: number of dies per parent container
+#
+# @clusters: number of clusters per parent container (since 7.0)
+#
+# @cores: number of cores per parent container
+#
+# @threads: number of threads per core
+#
+# Since: 6.1
+##
+{ 'struct': 'SMPConfiguration', 'data': {
+     '*cpus': 'int',
+     '*drawers': 'int',
+     '*books': 'int',
+     '*sockets': 'int',
+     '*dies': 'int',
+     '*clusters': 'int',
+     '*cores': 'int',
+     '*threads': 'int',
+     '*maxcpus': 'int' } }
+
+##
+# @x-query-irq:
+#
+# Query interrupt statistics
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: interrupt statistics
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-irq',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-jit:
+#
+# Query TCG compiler statistics
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: TCG compiler statistics
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-jit',
+  'returns': 'HumanReadableText',
+  'if': 'CONFIG_TCG',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-numa:
+#
+# Query NUMA topology information
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: topology information
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-numa',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-opcount:
+#
+# Query TCG opcode counters
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: TCG opcode counters
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-opcount',
+  'returns': 'HumanReadableText',
+  'if': 'CONFIG_TCG',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-ramblock:
+#
+# Query system ramblock information
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: system ramblock information
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-ramblock',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-rdma:
+#
+# Query RDMA state
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: RDMA state
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-rdma',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-roms:
+#
+# Query information on the registered ROMS
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: registered ROMs
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-roms',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-usb:
+#
+# Query information on the USB devices
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: USB device information
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-usb',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @SmbiosEntryPointType:
+#
+# @32: SMBIOS version 2.1 (32-bit) Entry Point
+#
+# @64: SMBIOS version 3.0 (64-bit) Entry Point
+#
+# Since: 7.0
+##
+{ 'enum': 'SmbiosEntryPointType',
+  'data': [ '32', '64' ] }
+
+##
+# @MemorySizeConfiguration:
+#
+# Schema for memory size configuration.
+#
+# @size: memory size in bytes
+#
+# @max-size: maximum hotpluggable memory size in bytes
+#
+# @slots: number of available memory slots for hotplug
+#
+# Since: 7.1
+##
+{ 'struct': 'MemorySizeConfiguration', 'data': {
+     '*size': 'size',
+     '*max-size': 'size',
+     '*slots': 'uint64' } }
+
+##
+# @dumpdtb:
+#
+# Save the FDT in dtb format.
+#
+# @filename: name of the dtb file to be created
+#
+# Since: 7.2
+#
+# Example:
+#
+# -> { "execute": "dumpdtb" }
+#      "arguments": { "filename": "fdt.dtb" } }
+# <- { "return": {} }
+##
+{ 'command': 'dumpdtb',
+  'data': { 'filename': 'str' },
+  'if': 'CONFIG_FDT' }
index 0e98146f1fcd1fc5e28c43b2ccf899e86209a02a..f81a37565ca7f10830bdd4aac03d598282048489 100644 (file)
@@ -2,32 +2,41 @@ util_ss.add(files(
   'opts-visitor.c',
   'qapi-clone-visitor.c',
   'qapi-dealloc-visitor.c',
+  'qapi-forward-visitor.c',
   'qapi-util.c',
   'qapi-visit-core.c',
-  'qmp-dispatch.c',
-  'qmp-event.c',
-  'qmp-registry.c',
   'qobject-input-visitor.c',
   'qobject-output-visitor.c',
   'string-input-visitor.c',
   'string-output-visitor.c',
 ))
+if have_system
+  util_ss.add(files('qapi-type-helpers.c'))
+endif
+if have_system or have_tools or have_ga
+  util_ss.add(files(
+    'qmp-dispatch.c',
+    'qmp-event.c',
+    'qmp-registry.c',
+  ))
+endif
 
 qapi_all_modules = [
-  'acpi',
-  'audio',
   'authz',
   'block',
   'block-core',
   'block-export',
   'char',
   'common',
+  'compat',
   'control',
   'crypto',
+  'cxl',
   'dump',
   'error',
   'introspect',
   'job',
+  'machine-common',
   'machine',
   'machine-target',
   'migration',
@@ -35,34 +44,33 @@ qapi_all_modules = [
   'misc-target',
   'net',
   'pragma',
-  'qdev',
-  'pci',
   'qom',
-  'rdma',
   'replay',
-  'rocker',
   'run-state',
   'sockets',
-  'tpm',
+  'stats',
   'trace',
   'transaction',
-  'ui',
-]
-
-qapi_storage_daemon_modules = [
-  'block-core',
-  'block-export',
-  'char',
-  'common',
-  'control',
-  'crypto',
-  'introspect',
-  'job',
-  'qom',
-  'sockets',
-  'pragma',
-  'transaction',
+  'virtio',
+  'yank',
 ]
+if have_system
+  qapi_all_modules += [
+    'acpi',
+    'audio',
+    'cryptodev',
+    'qdev',
+    'pci',
+    'rdma',
+    'rocker',
+    'tpm',
+  ]
+endif
+if have_system or have_tools
+  qapi_all_modules += [
+    'ui',
+  ]
+endif
 
 qapi_nonmodule_outputs = [
   'qapi-introspect.c', 'qapi-introspect.h',
@@ -89,11 +97,16 @@ foreach module : qapi_all_modules
     'qapi-types-@0@.h'.format(module),
     'qapi-visit-@0@.c'.format(module),
     'qapi-visit-@0@.h'.format(module),
-    'qapi-events-@0@.c'.format(module),
-    'qapi-events-@0@.h'.format(module),
-    'qapi-commands-@0@.c'.format(module),
-    'qapi-commands-@0@.h'.format(module),
   ]
+  if have_system or have_tools
+    qapi_module_outputs += [
+      'qapi-events-@0@.c'.format(module),
+      'qapi-events-@0@.h'.format(module),
+      'qapi-commands-@0@.c'.format(module),
+      'qapi-commands-@0@.h'.format(module),
+      'qapi-commands-@0@.trace-events'.format(module),
+    ]
+  endif
   if module.endswith('-target')
     qapi_specific_outputs += qapi_module_outputs
   else
@@ -115,6 +128,9 @@ foreach output : qapi_util_outputs
   if output.endswith('.h')
     genh += qapi_files[i]
   endif
+  if output.endswith('.trace-events')
+    qapi_trace_events += qapi_files[i]
+  endif
   util_ss.add(qapi_files[i])
   i = i + 1
 endforeach
@@ -123,6 +139,9 @@ foreach output : qapi_specific_outputs + qapi_nonmodule_outputs
   if output.endswith('.h')
     genh += qapi_files[i]
   endif
-  specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: qapi_files[i])
+  if output.endswith('.trace-events')
+    qapi_trace_events += qapi_files[i]
+  endif
+  specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: qapi_files[i])
   i = i + 1
 endforeach
index 3c75820527259562bb6ed5db540e0d9d7a95e071..197d3faa43fdac90a12450d53cd7d81249c050eb 100644 (file)
 #
 # @transferred: amount of bytes already transferred to the target VM
 #
-# @remaining: amount of bytes remaining to be transferred to the target VM
+# @remaining: amount of bytes remaining to be transferred to the
+#     target VM
 #
 # @total: total amount of bytes involved in the migration process
 #
 # @duplicate: number of duplicate (zero) pages (since 1.2)
 #
-# @skipped: number of skipped zero pages (since 1.5)
+# @skipped: number of skipped zero pages. Always zero, only provided for
+#     compatibility (since 1.5)
 #
 # @normal: number of normal pages (since 1.2)
 #
 # @normal-bytes: number of normal bytes sent (since 1.2)
 #
-# @dirty-pages-rate: number of pages dirtied by second by the
-#                    guest (since 1.3)
+# @dirty-pages-rate: number of pages dirtied by second by the guest
+#     (since 1.3)
 #
-# @mbps: throughput in megabits/sec. (since 1.6)
+# @mbps: throughput in megabits/sec.  (since 1.6)
 #
-# @dirty-sync-count: number of times that dirty ram was synchronized (since 2.1)
+# @dirty-sync-count: number of times that dirty ram was synchronized
+#     (since 2.1)
 #
-# @postcopy-requests: The number of page requests received from the destination
-#                     (since 2.7)
+# @postcopy-requests: The number of page requests received from the
+#     destination (since 2.7)
 #
 # @page-size: The number of bytes per page for the various page-based
-#             statistics (since 2.10)
+#     statistics (since 2.10)
 #
 # @multifd-bytes: The number of bytes sent through multifd (since 3.0)
 #
 # @pages-per-second: the number of memory pages transferred per second
-#                    (Since 4.0)
+#     (Since 4.0)
+#
+# @precopy-bytes: The number of bytes sent in the pre-copy phase
+#     (since 7.0).
+#
+# @downtime-bytes: The number of bytes sent while the guest is paused
+#     (since 7.0).
+#
+# @postcopy-bytes: The number of bytes sent during the post-copy phase
+#     (since 7.0).
+#
+# @dirty-sync-missed-zero-copy: Number of times dirty RAM
+#     synchronization could not avoid copying dirty pages.  This is
+#     between 0 and @dirty-sync-count * @multifd-channels.  (since
+#     7.1)
+#
+# Features:
+#
+# @deprecated: Member @skipped is always zero since 1.5.3
+#
+# Since: 0.14
 #
-# Since: 0.14.0
 ##
 { 'struct': 'MigrationStats',
   'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' ,
-           'duplicate': 'int', 'skipped': 'int', 'normal': 'int',
-           'normal-bytes': 'int', 'dirty-pages-rate' : 'int',
-           'mbps' : 'number', 'dirty-sync-count' : 'int',
-           'postcopy-requests' : 'int', 'page-size' : 'int',
-           'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64' } }
+           'duplicate': 'int',
+           'skipped': { 'type': 'int', 'features': [ 'deprecated' ] },
+           'normal': 'int',
+           'normal-bytes': 'int', 'dirty-pages-rate': 'int',
+           'mbps': 'number', 'dirty-sync-count': 'int',
+           'postcopy-requests': 'int', 'page-size': 'int',
+           'multifd-bytes': 'uint64', 'pages-per-second': 'uint64',
+           'precopy-bytes': 'uint64', 'downtime-bytes': 'uint64',
+           'postcopy-bytes': 'uint64',
+           'dirty-sync-missed-zero-copy': 'uint64' } }
 
 ##
 # @XBZRLECacheStats:
 # Since: 1.2
 ##
 { 'struct': 'XBZRLECacheStats',
-  'data': {'cache-size': 'int', 'bytes': 'int', 'pages': 'int',
+  'data': {'cache-size': 'size', 'bytes': 'int', 'pages': 'int',
            'cache-miss': 'int', 'cache-miss-rate': 'number',
            'encoding-rate': 'number', 'overflow': 'int' } }
 
 #
 # @pages: amount of pages compressed and transferred to the target VM
 #
-# @busy: count of times that no free thread was available to compress data
+# @busy: count of times that no free thread was available to compress
+#     data
 #
 # @busy-rate: rate of thread busy
 #
 #
 # @active: in the process of doing migration.
 #
-# @postcopy-active: like active, but now in postcopy mode. (since 2.5)
+# @postcopy-active: like active, but now in postcopy mode.  (since
+#     2.5)
 #
-# @postcopy-paused: during postcopy but paused. (since 3.0)
+# @postcopy-paused: during postcopy but paused.  (since 3.0)
 #
-# @postcopy-recover: trying to recover from a paused postcopy. (since 3.0)
+# @postcopy-recover: trying to recover from a paused postcopy.  (since
+#     3.0)
 #
 # @completed: migration is finished.
 #
 # @failed: some error occurred during migration process.
 #
-# @colo: VM is in the process of fault tolerance, VM can not get into this
-#        state unless colo capability is enabled for migration. (since 2.8)
+# @colo: VM is in the process of fault tolerance, VM can not get into
+#     this state unless colo capability is enabled for migration.
+#     (since 2.8)
 #
-# @pre-switchover: Paused before device serialisation. (since 2.11)
+# @pre-switchover: Paused before device serialisation.  (since 2.11)
 #
-# @device: During device serialisation when pause-before-switchover is enabled
-#          (since 2.11)
+# @device: During device serialisation when pause-before-switchover is
+#     enabled (since 2.11)
 #
-# @wait-unplug: wait for device unplug request by guest OS to be completed.
-#               (since 4.2)
+# @wait-unplug: wait for device unplug request by guest OS to be
+#     completed.  (since 4.2)
 #
 # Since: 2.3
-#
 ##
 { 'enum': 'MigrationStatus',
   'data': [ 'none', 'setup', 'cancelling', 'cancelled',
 #
 # Detailed VFIO devices migration statistics
 #
-# @transferred: amount of bytes transferred to the target VM by VFIO devices
+# @transferred: amount of bytes transferred to the target VM by VFIO
+#     devices
 #
 # Since: 5.2
-#
 ##
 { 'struct': 'VfioStats',
   'data': {'transferred': 'int' } }
 # Information about current migration process.
 #
 # @status: @MigrationStatus describing the current migration status.
-#          If this field is not returned, no migration process
-#          has been initiated
+#     If this field is not returned, no migration process has been
+#     initiated
 #
-# @ram: @MigrationStats containing detailed migration
-#       status, only returned if status is 'active' or
-#       'completed'(since 1.2)
+# @ram: @MigrationStats containing detailed migration status, only
+#     returned if status is 'active' or 'completed'(since 1.2)
 #
-# @disk: @MigrationStats containing detailed disk migration
-#        status, only returned if status is 'active' and it is a block
-#        migration
+# @disk: @MigrationStats containing detailed disk migration status,
+#     only returned if status is 'active' and it is a block migration
 #
 # @xbzrle-cache: @XBZRLECacheStats containing detailed XBZRLE
-#                migration statistics, only returned if XBZRLE feature is on and
-#                status is 'active' or 'completed' (since 1.2)
+#     migration statistics, only returned if XBZRLE feature is on and
+#     status is 'active' or 'completed' (since 1.2)
 #
 # @total-time: total amount of milliseconds since migration started.
-#              If migration has ended, it returns the total migration
-#              time. (since 1.2)
+#     If migration has ended, it returns the total migration time.
+#     (since 1.2)
 #
-# @downtime: only present when migration finishes correctly
-#            total downtime in milliseconds for the guest.
-#            (since 1.3)
+# @downtime: only present when migration finishes correctly total
+#     downtime in milliseconds for the guest.  (since 1.3)
 #
-# @expected-downtime: only present while migration is active
-#                     expected downtime in milliseconds for the guest in last walk
-#                     of the dirty bitmap. (since 1.3)
+# @expected-downtime: only present while migration is active expected
+#     downtime in milliseconds for the guest in last walk of the dirty
+#     bitmap.  (since 1.3)
 #
 # @setup-time: amount of setup time in milliseconds *before* the
-#              iterations begin but *after* the QMP command is issued. This is designed
-#              to provide an accounting of any activities (such as RDMA pinning) which
-#              may be expensive, but do not actually occur during the iterative
-#              migration rounds themselves. (since 1.6)
+#     iterations begin but *after* the QMP command is issued.  This is
+#     designed to provide an accounting of any activities (such as
+#     RDMA pinning) which may be expensive, but do not actually occur
+#     during the iterative migration rounds themselves.  (since 1.6)
 #
 # @cpu-throttle-percentage: percentage of time guest cpus are being
-#                           throttled during auto-converge. This is only present when auto-converge
-#                           has started throttling guest cpus. (Since 2.7)
+#     throttled during auto-converge.  This is only present when
+#     auto-converge has started throttling guest cpus.  (Since 2.7)
 #
-# @error-desc: the human readable error description string, when
-#              @status is 'failed'. Clients should not attempt to parse the
-#              error strings. (Since 2.7)
+# @error-desc: the human readable error description string. Clients
+#     should not attempt to parse the error strings.  (Since 2.7)
 #
-# @postcopy-blocktime: total time when all vCPU were blocked during postcopy
-#                      live migration. This is only present when the postcopy-blocktime
-#                      migration capability is enabled. (Since 3.0)
+# @postcopy-blocktime: total time when all vCPU were blocked during
+#     postcopy live migration.  This is only present when the
+#     postcopy-blocktime migration capability is enabled.  (Since 3.0)
 #
-# @postcopy-vcpu-blocktime: list of the postcopy blocktime per vCPU.  This is
-#                           only present when the postcopy-blocktime migration capability
-#                           is enabled. (Since 3.0)
+# @postcopy-vcpu-blocktime: list of the postcopy blocktime per vCPU.
+#     This is only present when the postcopy-blocktime migration
+#     capability is enabled.  (Since 3.0)
 #
-# @compression: migration compression statistics, only returned if compression
-#               feature is on and status is 'active' or 'completed' (Since 3.1)
+# @compression: migration compression statistics, only returned if
+#     compression feature is on and status is 'active' or 'completed'
+#     (Since 3.1)
 #
-# @socket-address: Only used for tcp, to know what the real port is (Since 4.0)
+# @socket-address: Only used for tcp, to know what the real port is
+#     (Since 4.0)
 #
-# @vfio: @VfioStats containing detailed VFIO devices migration statistics,
-#        only returned if VFIO device is present, migration is supported by all
-#        VFIO devices and status is 'active' or 'completed' (since 5.2)
+# @vfio: @VfioStats containing detailed VFIO devices migration
+#     statistics, only returned if VFIO device is present, migration
+#     is supported by all VFIO devices and status is 'active' or
+#     'completed' (since 5.2)
 #
-# Since: 0.14.0
+# @blocked-reasons: A list of reasons an outgoing migration is
+#     blocked.  Present and non-empty when migration is blocked.
+#     (since 6.0)
+#
+# @dirty-limit-throttle-time-per-round: Maximum throttle time
+#     (in microseconds) of virtual CPUs each dirty ring full round,
+#     which shows how MigrationCapability dirty-limit affects the
+#     guest during live migration.  (Since 8.1)
+#
+# @dirty-limit-ring-full-time: Estimated average dirty ring full time
+#     (in microseconds) for each dirty ring full round.  The value
+#     equals the dirty ring memory size divided by the average dirty
+#     page rate of the virtual CPU, which can be used to observe the
+#     average memory load of the virtual CPU indirectly.  Note that
+#     zero means guest doesn't dirty memory.  (Since 8.1)
+#
+# Features:
+#
+# @deprecated: Member @disk is deprecated because block migration is.
+#     Member @compression is deprecated because it is unreliable and
+#     untested.  It is recommended to use multifd migration, which
+#     offers an alternative compression implementation that is
+#     reliable and tested.
+#
+# Since: 0.14
 ##
 { 'struct': 'MigrationInfo',
   'data': {'*status': 'MigrationStatus', '*ram': 'MigrationStats',
-           '*disk': 'MigrationStats',
+           '*disk': { 'type': 'MigrationStats', 'features': [ 'deprecated' ] },
            '*vfio': 'VfioStats',
            '*xbzrle-cache': 'XBZRLECacheStats',
            '*total-time': 'int',
            '*setup-time': 'int',
            '*cpu-throttle-percentage': 'int',
            '*error-desc': 'str',
-           '*postcopy-blocktime' : 'uint32',
+           '*blocked-reasons': ['str'],
+           '*postcopy-blocktime': 'uint32',
            '*postcopy-vcpu-blocktime': ['uint32'],
-           '*compression': 'CompressionStats',
-           '*socket-address': ['SocketAddress'] } }
+           '*compression': { 'type': 'CompressionStats', 'features': [ 'deprecated' ] },
+           '*socket-address': ['SocketAddress'],
+           '*dirty-limit-throttle-time-per-round': 'uint64',
+           '*dirty-limit-ring-full-time': 'uint64'} }
 
 ##
 # @query-migrate:
 #
-# Returns information about current migration process. If migration
+# Returns information about current migration process.  If migration
 # is active there will be another json-object with RAM migration
 # status and if block migration is active another one with block
 # migration status.
 #
 # Returns: @MigrationInfo
 #
-# Since: 0.14.0
+# Since: 0.14
 #
-# Example:
+# Examples:
 #
 # 1. Before the first migration
 #
 #          }
 #       }
 #    }
-#
 ##
 { 'command': 'query-migrate', 'returns': 'MigrationInfo' }
 
 #
 # Migration capabilities enumeration
 #
-# @xbzrle: Migration supports xbzrle (Xor Based Zero Run Length Encoding).
-#          This feature allows us to minimize migration traffic for certain work
-#          loads, by sending compressed difference of the pages
-#
-# @rdma-pin-all: Controls whether or not the entire VM memory footprint is
-#                mlock()'d on demand or all at once. Refer to docs/rdma.txt for usage.
-#                Disabled by default. (since 2.0)
-#
-# @zero-blocks: During storage migration encode blocks of zeroes efficiently. This
-#               essentially saves 1MB of zeroes per block on the wire. Enabling requires
-#               source and target VM to support this feature. To enable it is sufficient
-#               to enable the capability on the source VM. The feature is disabled by
-#               default. (since 1.6)
-#
-# @compress: Use multiple compression threads to accelerate live migration.
-#            This feature can help to reduce the migration traffic, by sending
-#            compressed pages. Please note that if compress and xbzrle are both
-#            on, compress only takes effect in the ram bulk stage, after that,
-#            it will be disabled and only xbzrle takes effect, this can help to
-#            minimize migration traffic. The feature is disabled by default.
-#            (since 2.4 )
-#
-# @events: generate events for each migration state change
-#          (since 2.4 )
-#
-# @auto-converge: If enabled, QEMU will automatically throttle down the guest
-#                 to speed up convergence of RAM migration. (since 1.6)
-#
-# @postcopy-ram: Start executing on the migration target before all of RAM has
-#                been migrated, pulling the remaining pages along as needed. The
-#                capacity must have the same setting on both source and target
-#                or migration will not even start. NOTE: If the migration fails during
-#                postcopy the VM will fail.  (since 2.6)
-#
-# @x-colo: If enabled, migration will never end, and the state of the VM on the
-#          primary side will be migrated continuously to the VM on secondary
-#          side, this process is called COarse-Grain LOck Stepping (COLO) for
-#          Non-stop Service. (since 2.8)
-#
-# @release-ram: if enabled, qemu will free the migrated ram pages on the source
-#               during postcopy-ram migration. (since 2.9)
+# @xbzrle: Migration supports xbzrle (Xor Based Zero Run Length
+#     Encoding). This feature allows us to minimize migration traffic
+#     for certain work loads, by sending compressed difference of the
+#     pages
+#
+# @rdma-pin-all: Controls whether or not the entire VM memory
+#     footprint is mlock()'d on demand or all at once.  Refer to
+#     docs/rdma.txt for usage.  Disabled by default.  (since 2.0)
+#
+# @zero-blocks: During storage migration encode blocks of zeroes
+#     efficiently.  This essentially saves 1MB of zeroes per block on
+#     the wire.  Enabling requires source and target VM to support
+#     this feature.  To enable it is sufficient to enable the
+#     capability on the source VM. The feature is disabled by default.
+#     (since 1.6)
+#
+# @compress: Use multiple compression threads to accelerate live
+#     migration.  This feature can help to reduce the migration
+#     traffic, by sending compressed pages.  Please note that if
+#     compress and xbzrle are both on, compress only takes effect in
+#     the ram bulk stage, after that, it will be disabled and only
+#     xbzrle takes effect, this can help to minimize migration
+#     traffic.  The feature is disabled by default.  (since 2.4)
+#
+# @events: generate events for each migration state change (since 2.4)
+#
+# @auto-converge: If enabled, QEMU will automatically throttle down
+#     the guest to speed up convergence of RAM migration.  (since 1.6)
+#
+# @postcopy-ram: Start executing on the migration target before all of
+#     RAM has been migrated, pulling the remaining pages along as
+#     needed.  The capacity must have the same setting on both source
+#     and target or migration will not even start.  NOTE: If the
+#     migration fails during postcopy the VM will fail.  (since 2.6)
+#
+# @x-colo: If enabled, migration will never end, and the state of the
+#     VM on the primary side will be migrated continuously to the VM
+#     on secondary side, this process is called COarse-Grain LOck
+#     Stepping (COLO) for Non-stop Service.  (since 2.8)
+#
+# @release-ram: if enabled, qemu will free the migrated ram pages on
+#     the source during postcopy-ram migration.  (since 2.9)
 #
 # @block: If enabled, QEMU will also migrate the contents of all block
-#         devices.  Default is disabled.  A possible alternative uses
-#         mirror jobs to a builtin NBD server on the destination, which
-#         offers more flexibility.
-#         (Since 2.10)
+#     devices.  Default is disabled.  A possible alternative uses
+#     mirror jobs to a builtin NBD server on the destination, which
+#     offers more flexibility.  (Since 2.10)
 #
 # @return-path: If enabled, migration will use the return path even
-#               for precopy. (since 2.10)
+#     for precopy.  (since 2.10)
 #
-# @pause-before-switchover: Pause outgoing migration before serialising device
-#                           state and before disabling block IO (since 2.11)
+# @pause-before-switchover: Pause outgoing migration before
+#     serialising device state and before disabling block IO (since
+#     2.11)
 #
 # @multifd: Use more than one fd for migration (since 4.0)
 #
 # @dirty-bitmaps: If enabled, QEMU will migrate named dirty bitmaps.
-#                 (since 2.12)
+#     (since 2.12)
 #
 # @postcopy-blocktime: Calculate downtime for postcopy live migration
-#                      (since 3.0)
+#     (since 3.0)
 #
-# @late-block-activate: If enabled, the destination will not activate block
-#                       devices (and thus take locks) immediately at the end of migration.
-#                       (since 3.0)
+# @late-block-activate: If enabled, the destination will not activate
+#     block devices (and thus take locks) immediately at the end of
+#     migration.  (since 3.0)
 #
-# @x-ignore-shared: If enabled, QEMU will not migrate shared memory (since 4.0)
+# @x-ignore-shared: If enabled, QEMU will not migrate shared memory
+#     that is accessible on the destination machine.  (since 4.0)
 #
 # @validate-uuid: Send the UUID of the source to allow the destination
-#                 to ensure it is the same. (since 4.2)
+#     to ensure it is the same.  (since 4.2)
+#
+# @background-snapshot: If enabled, the migration stream will be a
+#     snapshot of the VM exactly at the point when the migration
+#     procedure starts.  The VM RAM is saved with running VM. (since
+#     6.0)
+#
+# @zero-copy-send: Controls behavior on sending memory pages on
+#     migration.  When true, enables a zero-copy mechanism for sending
+#     memory pages, if host supports it.  Requires that QEMU be
+#     permitted to use locked memory for guest RAM pages.  (since 7.1)
+#
+# @postcopy-preempt: If enabled, the migration process will allow
+#     postcopy requests to preempt precopy stream, so postcopy
+#     requests will be handled faster.  This is a performance feature
+#     and should not affect the correctness of postcopy migration.
+#     (since 7.1)
+#
+# @switchover-ack: If enabled, migration will not stop the source VM
+#     and complete the migration until an ACK is received from the
+#     destination that it's OK to do so.  Exactly when this ACK is
+#     sent depends on the migrated devices that use this feature.  For
+#     example, a device can use it to make sure some of its data is
+#     sent and loaded in the destination before doing switchover.
+#     This can reduce downtime if devices that support this capability
+#     are present.  'return-path' capability must be enabled to use
+#     it.  (since 8.1)
+#
+# @dirty-limit: If enabled, migration will throttle vCPUs as needed to
+#     keep their dirty page rate within @vcpu-dirty-limit.  This can
+#     improve responsiveness of large guests during live migration,
+#     and can result in more stable read performance.  Requires KVM
+#     with accelerator property "dirty-ring-size" set.  (Since 8.1)
+#
+# Features:
+#
+# @deprecated: Member @block is deprecated.  Use blockdev-mirror with
+#     NBD instead.  Member @compression is deprecated because it is
+#     unreliable and untested.  It is recommended to use multifd
+#     migration, which offers an alternative compression
+#     implementation that is reliable and tested.
+#
+# @unstable: Members @x-colo and @x-ignore-shared are experimental.
 #
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
-           'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
-           'block', 'return-path', 'pause-before-switchover', 'multifd',
+           { 'name': 'compress', 'features': [ 'deprecated' ] },
+           'events', 'postcopy-ram',
+           { 'name': 'x-colo', 'features': [ 'unstable' ] },
+           'release-ram',
+           { 'name': 'block', 'features': [ 'deprecated' ] },
+           'return-path', 'pause-before-switchover', 'multifd',
            'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
-           'x-ignore-shared', 'validate-uuid' ] }
+           { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
+           'validate-uuid', 'background-snapshot',
+           'zero-copy-send', 'postcopy-preempt', 'switchover-ack',
+           'dirty-limit'] }
 
 ##
 # @MigrationCapabilityStatus:
 # Since: 1.2
 ##
 { 'struct': 'MigrationCapabilityStatus',
-  'data': { 'capability' : 'MigrationCapability', 'state' : 'bool' } }
+  'data': { 'capability': 'MigrationCapability', 'state': 'bool' } }
 
 ##
 # @migrate-set-capabilities:
 #
 # -> { "execute": "migrate-set-capabilities" , "arguments":
 #      { "capabilities": [ { "capability": "xbzrle", "state": true } ] } }
-#
+# <- { "return": {} }
 ##
 { 'command': 'migrate-set-capabilities',
   'data': { 'capabilities': ['MigrationCapabilityStatus'] } }
 #
 # Returns information about the current migration capabilities status
 #
-# Returns: @MigrationCapabilitiesStatus
+# Returns: @MigrationCapabilityStatus
 #
 # Since: 1.2
 #
 #       {"state": false, "capability": "postcopy-ram"},
 #       {"state": false, "capability": "x-colo"}
 #    ]}
-#
 ##
 { 'command': 'query-migrate-capabilities', 'returns':   ['MigrationCapabilityStatus']}
 
 # An enumeration of multifd compression methods.
 #
 # @none: no compression.
+#
 # @zlib: use zlib compression method.
+#
 # @zstd: use zstd compression method.
 #
 # Since: 5.0
-#
 ##
 { 'enum': 'MultiFDCompression',
   'data': [ 'none', 'zlib',
-            { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] }
+            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
+
+##
+# @MigMode:
+#
+# @normal: the original form of migration. (since 8.2)
+#
+# @cpr-reboot: The migrate command saves state to a file, allowing one to
+#              quit qemu, reboot to an updated kernel, and restart an updated
+#              version of qemu.  The caller must specify a migration URI
+#              that writes to and reads from a file.  Unlike normal mode,
+#              the use of certain local storage options does not block the
+#              migration, but the caller must not modify guest block devices
+#              between the quit and restart.  To avoid saving guest RAM to the
+#              file, the memory backend must be shared, and the @x-ignore-shared
+#              migration capability must be set.  Guest RAM must be non-volatile
+#              across reboot, such as by backing it with a dax device, but this
+#              is not enforced.  The restarted qemu arguments must match those
+#              used to initially start qemu, plus the -incoming option.
+#              (since 8.2)
+##
+{ 'enum': 'MigMode',
+  'data': [ 'normal', 'cpr-reboot' ] }
+
+##
+# @BitmapMigrationBitmapAliasTransform:
+#
+# @persistent: If present, the bitmap will be made persistent or
+#     transient depending on this parameter.
+#
+# Since: 6.0
+##
+{ 'struct': 'BitmapMigrationBitmapAliasTransform',
+  'data': {
+      '*persistent': 'bool'
+  } }
 
 ##
 # @BitmapMigrationBitmapAlias:
 # @name: The name of the bitmap.
 #
 # @alias: An alias name for migration (for example the bitmap name on
-#         the opposite site).
+#     the opposite site).
+#
+# @transform: Allows the modification of the migrated bitmap.  (since
+#     6.0)
 #
 # Since: 5.2
 ##
 { 'struct': 'BitmapMigrationBitmapAlias',
   'data': {
       'name': 'str',
-      'alias': 'str'
+      'alias': 'str',
+      '*transform': 'BitmapMigrationBitmapAliasTransform'
   } }
 
 ##
 #
 # @node-name: A block node name.
 #
-# @alias: An alias block node name for migration (for example the
-#         node name on the opposite site).
+# @alias: An alias block node name for migration (for example the node
+#     name on the opposite site).
 #
 # @bitmaps: Mappings for the bitmaps on this node.
 #
 #
 # Migration parameters enumeration
 #
-# @announce-initial: Initial delay (in milliseconds) before sending the first
-#                    announce (Since 4.0)
+# @announce-initial: Initial delay (in milliseconds) before sending
+#     the first announce (Since 4.0)
 #
-# @announce-max: Maximum delay (in milliseconds) between packets in the
-#                announcement (Since 4.0)
+# @announce-max: Maximum delay (in milliseconds) between packets in
+#     the announcement (Since 4.0)
 #
-# @announce-rounds: Number of self-announce packets sent after migration
-#                   (Since 4.0)
+# @announce-rounds: Number of self-announce packets sent after
+#     migration (Since 4.0)
 #
-# @announce-step: Increase in delay (in milliseconds) between subsequent
-#                 packets in the announcement (Since 4.0)
+# @announce-step: Increase in delay (in milliseconds) between
+#     subsequent packets in the announcement (Since 4.0)
 #
-# @compress-level: Set the compression level to be used in live migration,
-#                  the compression level is an integer between 0 and 9, where 0 means
-#                  no compression, 1 means the best compression speed, and 9 means best
-#                  compression ratio which will consume more CPU.
+# @compress-level: Set the compression level to be used in live
+#     migration, the compression level is an integer between 0 and 9,
+#     where 0 means no compression, 1 means the best compression
+#     speed, and 9 means best compression ratio which will consume
+#     more CPU.
 #
-# @compress-threads: Set compression thread count to be used in live migration,
-#                    the compression thread count is an integer between 1 and 255.
+# @compress-threads: Set compression thread count to be used in live
+#     migration, the compression thread count is an integer between 1
+#     and 255.
 #
-# @compress-wait-thread: Controls behavior when all compression threads are
-#                        currently busy. If true (default), wait for a free
-#                        compression thread to become available; otherwise,
-#                        send the page uncompressed. (Since 3.1)
+# @compress-wait-thread: Controls behavior when all compression
+#     threads are currently busy.  If true (default), wait for a free
+#     compression thread to become available; otherwise, send the page
+#     uncompressed.  (Since 3.1)
 #
-# @decompress-threads: Set decompression thread count to be used in live
-#                      migration, the decompression thread count is an integer between 1
-#                      and 255. Usually, decompression is at least 4 times as fast as
-#                      compression, so set the decompress-threads to the number about 1/4
-#                      of compress-threads is adequate.
+# @decompress-threads: Set decompression thread count to be used in
+#     live migration, the decompression thread count is an integer
+#     between 1 and 255. Usually, decompression is at least 4 times as
+#     fast as compression, so set the decompress-threads to the number
+#     about 1/4 of compress-threads is adequate.
 #
-# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
-#                              to trigger throttling. It is expressed as percentage.
-#                              The default value is 50. (Since 5.0)
+# @throttle-trigger-threshold: The ratio of bytes_dirty_period and
+#     bytes_xfer_period to trigger throttling.  It is expressed as
+#     percentage.  The default value is 50. (Since 5.0)
 #
-# @cpu-throttle-initial: Initial percentage of time guest cpus are throttled
-#                        when migration auto-converge is activated. The
-#                        default value is 20. (Since 2.7)
+# @cpu-throttle-initial: Initial percentage of time guest cpus are
+#     throttled when migration auto-converge is activated.  The
+#     default value is 20. (Since 2.7)
 #
 # @cpu-throttle-increment: throttle percentage increase each time
-#                          auto-converge detects that migration is not making
-#                          progress. The default value is 10. (Since 2.7)
-#
-# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
-#                         At the tail stage of throttling, the Guest is very
-#                         sensitive to CPU percentage while the @cpu-throttle
-#                         -increment is excessive usually at tail stage.
-#                         If this parameter is true, we will compute the ideal
-#                         CPU percentage used by the Guest, which may exactly make
-#                         the dirty rate match the dirty rate threshold. Then we
-#                         will choose a smaller throttle increment between the
-#                         one specified by @cpu-throttle-increment and the one
-#                         generated by ideal CPU percentage.
-#                         Therefore, it is compatible to traditional throttling,
-#                         meanwhile the throttle increment won't be excessive
-#                         at tail stage.
-#                         The default value is false. (Since 5.1)
-#
-# @tls-creds: ID of the 'tls-creds' object that provides credentials for
-#             establishing a TLS connection over the migration data channel.
-#             On the outgoing side of the migration, the credentials must
-#             be for a 'client' endpoint, while for the incoming side the
-#             credentials must be for a 'server' endpoint. Setting this
-#             will enable TLS for all migrations. The default is unset,
-#             resulting in unsecured migration at the QEMU level. (Since 2.7)
-#
-# @tls-hostname: hostname of the target host for the migration. This is
-#                required when using x509 based TLS credentials and the
-#                migration URI does not already include a hostname. For
-#                example if using fd: or exec: based migration, the
-#                hostname must be provided so that the server's x509
-#                certificate identity can be validated. (Since 2.7)
-#
-# @tls-authz: ID of the 'authz' object subclass that provides access control
-#             checking of the TLS x509 certificate distinguished name.
-#             This object is only resolved at time of use, so can be deleted
-#             and recreated on the fly while the migration server is active.
-#             If missing, it will default to denying access (Since 4.0)
-#
-# @max-bandwidth: to set maximum speed for migration. maximum speed in
-#                 bytes per second. (Since 2.8)
-#
-# @downtime-limit: set maximum tolerated downtime for migration. maximum
-#                  downtime in milliseconds (Since 2.8)
-#
-# @x-checkpoint-delay: The delay time (in ms) between two COLO checkpoints in
-#                      periodic mode. (Since 2.8)
+#     auto-converge detects that migration is not making progress.
+#     The default value is 10. (Since 2.7)
+#
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At
+#     the tail stage of throttling, the Guest is very sensitive to CPU
+#     percentage while the @cpu-throttle -increment is excessive
+#     usually at tail stage.  If this parameter is true, we will
+#     compute the ideal CPU percentage used by the Guest, which may
+#     exactly make the dirty rate match the dirty rate threshold.
+#     Then we will choose a smaller throttle increment between the one
+#     specified by @cpu-throttle-increment and the one generated by
+#     ideal CPU percentage.  Therefore, it is compatible to
+#     traditional throttling, meanwhile the throttle increment won't
+#     be excessive at tail stage.  The default value is false.  (Since
+#     5.1)
+#
+# @tls-creds: ID of the 'tls-creds' object that provides credentials
+#     for establishing a TLS connection over the migration data
+#     channel.  On the outgoing side of the migration, the credentials
+#     must be for a 'client' endpoint, while for the incoming side the
+#     credentials must be for a 'server' endpoint.  Setting this will
+#     enable TLS for all migrations.  The default is unset, resulting
+#     in unsecured migration at the QEMU level.  (Since 2.7)
+#
+# @tls-hostname: hostname of the target host for the migration.  This
+#     is required when using x509 based TLS credentials and the
+#     migration URI does not already include a hostname.  For example
+#     if using fd: or exec: based migration, the hostname must be
+#     provided so that the server's x509 certificate identity can be
+#     validated.  (Since 2.7)
+#
+# @tls-authz: ID of the 'authz' object subclass that provides access
+#     control checking of the TLS x509 certificate distinguished name.
+#     This object is only resolved at time of use, so can be deleted
+#     and recreated on the fly while the migration server is active.
+#     If missing, it will default to denying access (Since 4.0)
+#
+# @max-bandwidth: to set maximum speed for migration.  maximum speed
+#     in bytes per second.  (Since 2.8)
+#
+# @avail-switchover-bandwidth: to set the available bandwidth that
+#     migration can use during switchover phase.  NOTE!  This does not
+#     limit the bandwidth during switchover, but only for calculations when
+#     making decisions to switchover.  By default, this value is zero,
+#     which means QEMU will estimate the bandwidth automatically.  This can
+#     be set when the estimated value is not accurate, while the user is
+#     able to guarantee such bandwidth is available when switching over.
+#     When specified correctly, this can make the switchover decision much
+#     more accurate.  (Since 8.2)
+#
+# @downtime-limit: set maximum tolerated downtime for migration.
+#     maximum downtime in milliseconds (Since 2.8)
+#
+# @x-checkpoint-delay: The delay time (in ms) between two COLO
+#     checkpoints in periodic mode.  (Since 2.8)
 #
 # @block-incremental: Affects how much storage is migrated when the
-#                     block migration capability is enabled.  When false, the entire
-#                     storage backing chain is migrated into a flattened image at
-#                     the destination; when true, only the active qcow2 layer is
-#                     migrated and the destination must already have access to the
-#                     same backing chain as was used on the source.  (since 2.10)
+#     block migration capability is enabled.  When false, the entire
+#     storage backing chain is migrated into a flattened image at the
+#     destination; when true, only the active qcow2 layer is migrated
+#     and the destination must already have access to the same backing
+#     chain as was used on the source.  (since 2.10)
 #
 # @multifd-channels: Number of channels used to migrate data in
-#                    parallel. This is the same number that the
-#                    number of sockets used for migration.  The
-#                    default value is 2 (since 4.0)
+#     parallel.  This is the same number that the number of sockets
+#     used for migration.  The default value is 2 (since 4.0)
 #
 # @xbzrle-cache-size: cache size to be used by XBZRLE migration.  It
-#                     needs to be a multiple of the target page size
-#                     and a power of 2
-#                     (Since 2.11)
+#     needs to be a multiple of the target page size and a power of 2
+#     (Since 2.11)
 #
-# @max-postcopy-bandwidth: Background transfer bandwidth during postcopy.
-#                          Defaults to 0 (unlimited).  In bytes per second.
-#                          (Since 3.0)
+# @max-postcopy-bandwidth: Background transfer bandwidth during
+#     postcopy.  Defaults to 0 (unlimited).  In bytes per second.
+#     (Since 3.0)
 #
-# @max-cpu-throttle: maximum cpu throttle percentage.
-#                    Defaults to 99. (Since 3.1)
+# @max-cpu-throttle: maximum cpu throttle percentage.  Defaults to 99.
+#     (Since 3.1)
 #
-# @multifd-compression: Which compression method to use.
-#                       Defaults to none. (Since 5.0)
+# @multifd-compression: Which compression method to use.  Defaults to
+#     none.  (Since 5.0)
 #
 # @multifd-zlib-level: Set the compression level to be used in live
-#                      migration, the compression level is an integer between 0
-#                      and 9, where 0 means no compression, 1 means the best
-#                      compression speed, and 9 means best compression ratio which
-#                      will consume more CPU.
-#                      Defaults to 1. (Since 5.0)
+#     migration, the compression level is an integer between 0 and 9,
+#     where 0 means no compression, 1 means the best compression
+#     speed, and 9 means best compression ratio which will consume
+#     more CPU. Defaults to 1. (Since 5.0)
 #
 # @multifd-zstd-level: Set the compression level to be used in live
-#                      migration, the compression level is an integer between 0
-#                      and 20, where 0 means no compression, 1 means the best
-#                      compression speed, and 20 means best compression ratio which
-#                      will consume more CPU.
-#                      Defaults to 1. (Since 5.0)
+#     migration, the compression level is an integer between 0 and 20,
+#     where 0 means no compression, 1 means the best compression
+#     speed, and 20 means best compression ratio which will consume
+#     more CPU. Defaults to 1. (Since 5.0)
 #
 # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
-#                        aliases for the purpose of dirty bitmap migration.  Such
-#                        aliases may for example be the corresponding names on the
-#                        opposite site.
-#                        The mapping must be one-to-one, but not necessarily
-#                        complete: On the source, unmapped bitmaps and all bitmaps
-#                        on unmapped nodes will be ignored.  On the destination,
-#                        encountering an unmapped alias in the incoming migration
-#                        stream will result in a report, and all further bitmap
-#                        migration data will then be discarded.
-#                        Note that the destination does not know about bitmaps it
-#                        does not receive, so there is no limitation or requirement
-#                        regarding the number of bitmaps received, or how they are
-#                        named, or on which nodes they are placed.
-#                        By default (when this parameter has never been set), bitmap
-#                        names are mapped to themselves.  Nodes are mapped to their
-#                        block device name if there is one, and to their node name
-#                        otherwise. (Since 5.2)
+#     aliases for the purpose of dirty bitmap migration.  Such aliases
+#     may for example be the corresponding names on the opposite site.
+#     The mapping must be one-to-one, but not necessarily complete: On
+#     the source, unmapped bitmaps and all bitmaps on unmapped nodes
+#     will be ignored.  On the destination, encountering an unmapped
+#     alias in the incoming migration stream will result in a report,
+#     and all further bitmap migration data will then be discarded.
+#     Note that the destination does not know about bitmaps it does
+#     not receive, so there is no limitation or requirement regarding
+#     the number of bitmaps received, or how they are named, or on
+#     which nodes they are placed.  By default (when this parameter
+#     has never been set), bitmap names are mapped to themselves.
+#     Nodes are mapped to their block device name if there is one, and
+#     to their node name otherwise.  (Since 5.2)
+#
+# @x-vcpu-dirty-limit-period: Periodic time (in milliseconds) of dirty
+#     limit during live migration.  Should be in the range 1 to 1000ms.
+#     Defaults to 1000ms.  (Since 8.1)
+#
+# @vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration.
+#     Defaults to 1.  (Since 8.1)
+#
+# @mode: Migration mode. See description in @MigMode. Default is 'normal'.
+#        (Since 8.2)
+#
+# Features:
+#
+# @deprecated: Member @block-incremental is deprecated.  Use
+#     blockdev-mirror with NBD instead.  Members @compress-level,
+#     @compress-threads, @decompress-threads and @compress-wait-thread
+#     are deprecated because @compression is deprecated.
+#
+# @unstable: Members @x-checkpoint-delay and @x-vcpu-dirty-limit-period
+#     are experimental.
 #
 # Since: 2.4
 ##
 { 'enum': 'MigrationParameter',
   'data': ['announce-initial', 'announce-max',
            'announce-rounds', 'announce-step',
-           'compress-level', 'compress-threads', 'decompress-threads',
-           'compress-wait-thread', 'throttle-trigger-threshold',
+           { 'name': 'compress-level', 'features': [ 'deprecated' ] },
+           { 'name': 'compress-threads', 'features': [ 'deprecated' ] },
+           { 'name': 'decompress-threads', 'features': [ 'deprecated' ] },
+           { 'name': 'compress-wait-thread', 'features': [ 'deprecated' ] },
+           'throttle-trigger-threshold',
            'cpu-throttle-initial', 'cpu-throttle-increment',
            'cpu-throttle-tailslow',
            'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth',
-           'downtime-limit', 'x-checkpoint-delay', 'block-incremental',
+           'avail-switchover-bandwidth', 'downtime-limit',
+           { 'name': 'x-checkpoint-delay', 'features': [ 'unstable' ] },
+           { 'name': 'block-incremental', 'features': [ 'deprecated' ] },
            'multifd-channels',
            'xbzrle-cache-size', 'max-postcopy-bandwidth',
            'max-cpu-throttle', 'multifd-compression',
-           'multifd-zlib-level' ,'multifd-zstd-level',
-           'block-bitmap-mapping' ] }
+           'multifd-zlib-level', 'multifd-zstd-level',
+           'block-bitmap-mapping',
+           { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] },
+           'vcpu-dirty-limit',
+           'mode'] }
 
 ##
 # @MigrateSetParameters:
 #
-# @announce-initial: Initial delay (in milliseconds) before sending the first
-#                    announce (Since 4.0)
+# @announce-initial: Initial delay (in milliseconds) before sending
+#     the first announce (Since 4.0)
 #
-# @announce-max: Maximum delay (in milliseconds) between packets in the
-#                announcement (Since 4.0)
+# @announce-max: Maximum delay (in milliseconds) between packets in
+#     the announcement (Since 4.0)
 #
-# @announce-rounds: Number of self-announce packets sent after migration
-#                   (Since 4.0)
+# @announce-rounds: Number of self-announce packets sent after
+#     migration (Since 4.0)
 #
-# @announce-step: Increase in delay (in milliseconds) between subsequent
-#                 packets in the announcement (Since 4.0)
+# @announce-step: Increase in delay (in milliseconds) between
+#     subsequent packets in the announcement (Since 4.0)
 #
 # @compress-level: compression level
 #
 # @compress-threads: compression thread count
 #
-# @compress-wait-thread: Controls behavior when all compression threads are
-#                        currently busy. If true (default), wait for a free
-#                        compression thread to become available; otherwise,
-#                        send the page uncompressed. (Since 3.1)
+# @compress-wait-thread: Controls behavior when all compression
+#     threads are currently busy.  If true (default), wait for a free
+#     compression thread to become available; otherwise, send the page
+#     uncompressed.  (Since 3.1)
 #
 # @decompress-threads: decompression thread count
 #
-# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
-#                              to trigger throttling. It is expressed as percentage.
-#                              The default value is 50. (Since 5.0)
+# @throttle-trigger-threshold: The ratio of bytes_dirty_period and
+#     bytes_xfer_period to trigger throttling.  It is expressed as
+#     percentage.  The default value is 50. (Since 5.0)
 #
 # @cpu-throttle-initial: Initial percentage of time guest cpus are
-#                        throttled when migration auto-converge is activated.
-#                        The default value is 20. (Since 2.7)
+#     throttled when migration auto-converge is activated.  The
+#     default value is 20. (Since 2.7)
 #
 # @cpu-throttle-increment: throttle percentage increase each time
-#                          auto-converge detects that migration is not making
-#                          progress. The default value is 10. (Since 2.7)
-#
-# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
-#                         At the tail stage of throttling, the Guest is very
-#                         sensitive to CPU percentage while the @cpu-throttle
-#                         -increment is excessive usually at tail stage.
-#                         If this parameter is true, we will compute the ideal
-#                         CPU percentage used by the Guest, which may exactly make
-#                         the dirty rate match the dirty rate threshold. Then we
-#                         will choose a smaller throttle increment between the
-#                         one specified by @cpu-throttle-increment and the one
-#                         generated by ideal CPU percentage.
-#                         Therefore, it is compatible to traditional throttling,
-#                         meanwhile the throttle increment won't be excessive
-#                         at tail stage.
-#                         The default value is false. (Since 5.1)
+#     auto-converge detects that migration is not making progress.
+#     The default value is 10. (Since 2.7)
+#
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At
+#     the tail stage of throttling, the Guest is very sensitive to CPU
+#     percentage while the @cpu-throttle -increment is excessive
+#     usually at tail stage.  If this parameter is true, we will
+#     compute the ideal CPU percentage used by the Guest, which may
+#     exactly make the dirty rate match the dirty rate threshold.
+#     Then we will choose a smaller throttle increment between the one
+#     specified by @cpu-throttle-increment and the one generated by
+#     ideal CPU percentage.  Therefore, it is compatible to
+#     traditional throttling, meanwhile the throttle increment won't
+#     be excessive at tail stage.  The default value is false.  (Since
+#     5.1)
 #
 # @tls-creds: ID of the 'tls-creds' object that provides credentials
-#             for establishing a TLS connection over the migration data
-#             channel. On the outgoing side of the migration, the credentials
-#             must be for a 'client' endpoint, while for the incoming side the
-#             credentials must be for a 'server' endpoint. Setting this
-#             to a non-empty string enables TLS for all migrations.
-#             An empty string means that QEMU will use plain text mode for
-#             migration, rather than TLS (Since 2.9)
-#             Previously (since 2.7), this was reported by omitting
-#             tls-creds instead.
-#
-# @tls-hostname: hostname of the target host for the migration. This
-#                is required when using x509 based TLS credentials and the
-#                migration URI does not already include a hostname. For
-#                example if using fd: or exec: based migration, the
-#                hostname must be provided so that the server's x509
-#                certificate identity can be validated. (Since 2.7)
-#                An empty string means that QEMU will use the hostname
-#                associated with the migration URI, if any. (Since 2.9)
-#                Previously (since 2.7), this was reported by omitting
-#                tls-hostname instead.
-#
-# @max-bandwidth: to set maximum speed for migration. maximum speed in
-#                 bytes per second. (Since 2.8)
-#
-# @downtime-limit: set maximum tolerated downtime for migration. maximum
-#                  downtime in milliseconds (Since 2.8)
-#
-# @x-checkpoint-delay: the delay time between two COLO checkpoints. (Since 2.8)
+#     for establishing a TLS connection over the migration data
+#     channel.  On the outgoing side of the migration, the credentials
+#     must be for a 'client' endpoint, while for the incoming side the
+#     credentials must be for a 'server' endpoint.  Setting this to a
+#     non-empty string enables TLS for all migrations.  An empty
+#     string means that QEMU will use plain text mode for migration,
+#     rather than TLS (Since 2.9) Previously (since 2.7), this was
+#     reported by omitting tls-creds instead.
+#
+# @tls-hostname: hostname of the target host for the migration.  This
+#     is required when using x509 based TLS credentials and the
+#     migration URI does not already include a hostname.  For example
+#     if using fd: or exec: based migration, the hostname must be
+#     provided so that the server's x509 certificate identity can be
+#     validated.  (Since 2.7) An empty string means that QEMU will use
+#     the hostname associated with the migration URI, if any.  (Since
+#     2.9) Previously (since 2.7), this was reported by omitting
+#     tls-hostname instead.
+#
+# @max-bandwidth: to set maximum speed for migration.  maximum speed
+#     in bytes per second.  (Since 2.8)
+#
+# @avail-switchover-bandwidth: to set the available bandwidth that
+#     migration can use during switchover phase.  NOTE!  This does not
+#     limit the bandwidth during switchover, but only for calculations when
+#     making decisions to switchover.  By default, this value is zero,
+#     which means QEMU will estimate the bandwidth automatically.  This can
+#     be set when the estimated value is not accurate, while the user is
+#     able to guarantee such bandwidth is available when switching over.
+#     When specified correctly, this can make the switchover decision much
+#     more accurate.  (Since 8.2)
+#
+# @downtime-limit: set maximum tolerated downtime for migration.
+#     maximum downtime in milliseconds (Since 2.8)
+#
+# @x-checkpoint-delay: the delay time between two COLO checkpoints.
+#     (Since 2.8)
 #
 # @block-incremental: Affects how much storage is migrated when the
-#                     block migration capability is enabled.  When false, the entire
-#                     storage backing chain is migrated into a flattened image at
-#                     the destination; when true, only the active qcow2 layer is
-#                     migrated and the destination must already have access to the
-#                     same backing chain as was used on the source.  (since 2.10)
+#     block migration capability is enabled.  When false, the entire
+#     storage backing chain is migrated into a flattened image at the
+#     destination; when true, only the active qcow2 layer is migrated
+#     and the destination must already have access to the same backing
+#     chain as was used on the source.  (since 2.10)
 #
 # @multifd-channels: Number of channels used to migrate data in
-#                    parallel. This is the same number that the
-#                    number of sockets used for migration.  The
-#                    default value is 2 (since 4.0)
+#     parallel.  This is the same number that the number of sockets
+#     used for migration.  The default value is 2 (since 4.0)
 #
 # @xbzrle-cache-size: cache size to be used by XBZRLE migration.  It
-#                     needs to be a multiple of the target page size
-#                     and a power of 2
-#                     (Since 2.11)
+#     needs to be a multiple of the target page size and a power of 2
+#     (Since 2.11)
 #
-# @max-postcopy-bandwidth: Background transfer bandwidth during postcopy.
-#                          Defaults to 0 (unlimited).  In bytes per second.
-#                          (Since 3.0)
+# @max-postcopy-bandwidth: Background transfer bandwidth during
+#     postcopy.  Defaults to 0 (unlimited).  In bytes per second.
+#     (Since 3.0)
 #
-# @max-cpu-throttle: maximum cpu throttle percentage.
-#                    The default value is 99. (Since 3.1)
+# @max-cpu-throttle: maximum cpu throttle percentage.  The default
+#     value is 99. (Since 3.1)
 #
-# @multifd-compression: Which compression method to use.
-#                       Defaults to none. (Since 5.0)
+# @multifd-compression: Which compression method to use.  Defaults to
+#     none.  (Since 5.0)
 #
 # @multifd-zlib-level: Set the compression level to be used in live
-#                      migration, the compression level is an integer between 0
-#                      and 9, where 0 means no compression, 1 means the best
-#                      compression speed, and 9 means best compression ratio which
-#                      will consume more CPU.
-#                      Defaults to 1. (Since 5.0)
+#     migration, the compression level is an integer between 0 and 9,
+#     where 0 means no compression, 1 means the best compression
+#     speed, and 9 means best compression ratio which will consume
+#     more CPU. Defaults to 1. (Since 5.0)
 #
 # @multifd-zstd-level: Set the compression level to be used in live
-#                      migration, the compression level is an integer between 0
-#                      and 20, where 0 means no compression, 1 means the best
-#                      compression speed, and 20 means best compression ratio which
-#                      will consume more CPU.
-#                      Defaults to 1. (Since 5.0)
+#     migration, the compression level is an integer between 0 and 20,
+#     where 0 means no compression, 1 means the best compression
+#     speed, and 20 means best compression ratio which will consume
+#     more CPU. Defaults to 1. (Since 5.0)
 #
 # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
-#                        aliases for the purpose of dirty bitmap migration.  Such
-#                        aliases may for example be the corresponding names on the
-#                        opposite site.
-#                        The mapping must be one-to-one, but not necessarily
-#                        complete: On the source, unmapped bitmaps and all bitmaps
-#                        on unmapped nodes will be ignored.  On the destination,
-#                        encountering an unmapped alias in the incoming migration
-#                        stream will result in a report, and all further bitmap
-#                        migration data will then be discarded.
-#                        Note that the destination does not know about bitmaps it
-#                        does not receive, so there is no limitation or requirement
-#                        regarding the number of bitmaps received, or how they are
-#                        named, or on which nodes they are placed.
-#                        By default (when this parameter has never been set), bitmap
-#                        names are mapped to themselves.  Nodes are mapped to their
-#                        block device name if there is one, and to their node name
-#                        otherwise. (Since 5.2)
+#     aliases for the purpose of dirty bitmap migration.  Such aliases
+#     may for example be the corresponding names on the opposite site.
+#     The mapping must be one-to-one, but not necessarily complete: On
+#     the source, unmapped bitmaps and all bitmaps on unmapped nodes
+#     will be ignored.  On the destination, encountering an unmapped
+#     alias in the incoming migration stream will result in a report,
+#     and all further bitmap migration data will then be discarded.
+#     Note that the destination does not know about bitmaps it does
+#     not receive, so there is no limitation or requirement regarding
+#     the number of bitmaps received, or how they are named, or on
+#     which nodes they are placed.  By default (when this parameter
+#     has never been set), bitmap names are mapped to themselves.
+#     Nodes are mapped to their block device name if there is one, and
+#     to their node name otherwise.  (Since 5.2)
+#
+# @x-vcpu-dirty-limit-period: Periodic time (in milliseconds) of dirty
+#     limit during live migration.  Should be in the range 1 to 1000ms.
+#     Defaults to 1000ms.  (Since 8.1)
+#
+# @vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration.
+#     Defaults to 1.  (Since 8.1)
+#
+# @mode: Migration mode. See description in @MigMode. Default is 'normal'.
+#        (Since 8.2)
+#
+# Features:
+#
+# @deprecated: Member @block-incremental is deprecated.  Use
+#     blockdev-mirror with NBD instead.  Members @compress-level,
+#     @compress-threads, @decompress-threads and @compress-wait-thread
+#     are deprecated because @compression is deprecated.
+#
+# @unstable: Members @x-checkpoint-delay and @x-vcpu-dirty-limit-period
+#     are experimental.
+#
+# TODO: either fuse back into MigrationParameters, or make
+#     MigrationParameters members mandatory
 #
 # Since: 2.4
 ##
-# TODO either fuse back into MigrationParameters, or make
-# MigrationParameters members mandatory
 { 'struct': 'MigrateSetParameters',
   'data': { '*announce-initial': 'size',
             '*announce-max': 'size',
             '*announce-rounds': 'size',
             '*announce-step': 'size',
-            '*compress-level': 'int',
-            '*compress-threads': 'int',
-            '*compress-wait-thread': 'bool',
-            '*decompress-threads': 'int',
-            '*throttle-trigger-threshold': 'int',
-            '*cpu-throttle-initial': 'int',
-            '*cpu-throttle-increment': 'int',
+            '*compress-level': { 'type': 'uint8',
+                                 'features': [ 'deprecated' ] },
+            '*compress-threads':  { 'type': 'uint8',
+                                    'features': [ 'deprecated' ] },
+            '*compress-wait-thread':  { 'type': 'bool',
+                                        'features': [ 'deprecated' ] },
+            '*decompress-threads':  { 'type': 'uint8',
+                                      'features': [ 'deprecated' ] },
+            '*throttle-trigger-threshold': 'uint8',
+            '*cpu-throttle-initial': 'uint8',
+            '*cpu-throttle-increment': 'uint8',
             '*cpu-throttle-tailslow': 'bool',
             '*tls-creds': 'StrOrNull',
             '*tls-hostname': 'StrOrNull',
             '*tls-authz': 'StrOrNull',
-            '*max-bandwidth': 'int',
-            '*downtime-limit': 'int',
-            '*x-checkpoint-delay': 'int',
-            '*block-incremental': 'bool',
-            '*multifd-channels': 'int',
+            '*max-bandwidth': 'size',
+            '*avail-switchover-bandwidth': 'size',
+            '*downtime-limit': 'uint64',
+            '*x-checkpoint-delay': { 'type': 'uint32',
+                                     'features': [ 'unstable' ] },
+            '*block-incremental': { 'type': 'bool',
+                                    'features': [ 'deprecated' ] },
+            '*multifd-channels': 'uint8',
             '*xbzrle-cache-size': 'size',
             '*max-postcopy-bandwidth': 'size',
-            '*max-cpu-throttle': 'int',
+            '*max-cpu-throttle': 'uint8',
             '*multifd-compression': 'MultiFDCompression',
-            '*multifd-zlib-level': 'int',
-            '*multifd-zstd-level': 'int',
-            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+            '*multifd-zlib-level': 'uint8',
+            '*multifd-zstd-level': 'uint8',
+            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ],
+            '*x-vcpu-dirty-limit-period': { 'type': 'uint64',
+                                            'features': [ 'unstable' ] },
+            '*vcpu-dirty-limit': 'uint64',
+            '*mode': 'MigMode'} }
 
 ##
 # @migrate-set-parameters:
 # Example:
 #
 # -> { "execute": "migrate-set-parameters" ,
-#      "arguments": { "compress-level": 1 } }
-#
+#      "arguments": { "multifd-channels": 5 } }
+# <- { "return": {} }
 ##
 { 'command': 'migrate-set-parameters', 'boxed': true,
   'data': 'MigrateSetParameters' }
 #
 # The optional members aren't actually optional.
 #
-# @announce-initial: Initial delay (in milliseconds) before sending the
-#                    first announce (Since 4.0)
+# @announce-initial: Initial delay (in milliseconds) before sending
+#     the first announce (Since 4.0)
 #
-# @announce-max: Maximum delay (in milliseconds) between packets in the
-#                announcement (Since 4.0)
+# @announce-max: Maximum delay (in milliseconds) between packets in
+#     the announcement (Since 4.0)
 #
-# @announce-rounds: Number of self-announce packets sent after migration
-#                   (Since 4.0)
+# @announce-rounds: Number of self-announce packets sent after
+#     migration (Since 4.0)
 #
-# @announce-step: Increase in delay (in milliseconds) between subsequent
-#                 packets in the announcement (Since 4.0)
+# @announce-step: Increase in delay (in milliseconds) between
+#     subsequent packets in the announcement (Since 4.0)
 #
 # @compress-level: compression level
 #
 # @compress-threads: compression thread count
 #
-# @compress-wait-thread: Controls behavior when all compression threads are
-#                        currently busy. If true (default), wait for a free
-#                        compression thread to become available; otherwise,
-#                        send the page uncompressed. (Since 3.1)
+# @compress-wait-thread: Controls behavior when all compression
+#     threads are currently busy.  If true (default), wait for a free
+#     compression thread to become available; otherwise, send the page
+#     uncompressed.  (Since 3.1)
 #
 # @decompress-threads: decompression thread count
 #
-# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
-#                              to trigger throttling. It is expressed as percentage.
-#                              The default value is 50. (Since 5.0)
+# @throttle-trigger-threshold: The ratio of bytes_dirty_period and
+#     bytes_xfer_period to trigger throttling.  It is expressed as
+#     percentage.  The default value is 50. (Since 5.0)
 #
 # @cpu-throttle-initial: Initial percentage of time guest cpus are
-#                        throttled when migration auto-converge is activated.
-#                        (Since 2.7)
+#     throttled when migration auto-converge is activated.  (Since
+#     2.7)
 #
 # @cpu-throttle-increment: throttle percentage increase each time
-#                          auto-converge detects that migration is not making
-#                          progress. (Since 2.7)
-#
-# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage
-#                         At the tail stage of throttling, the Guest is very
-#                         sensitive to CPU percentage while the @cpu-throttle
-#                         -increment is excessive usually at tail stage.
-#                         If this parameter is true, we will compute the ideal
-#                         CPU percentage used by the Guest, which may exactly make
-#                         the dirty rate match the dirty rate threshold. Then we
-#                         will choose a smaller throttle increment between the
-#                         one specified by @cpu-throttle-increment and the one
-#                         generated by ideal CPU percentage.
-#                         Therefore, it is compatible to traditional throttling,
-#                         meanwhile the throttle increment won't be excessive
-#                         at tail stage.
-#                         The default value is false. (Since 5.1)
+#     auto-converge detects that migration is not making progress.
+#     (Since 2.7)
+#
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At
+#     the tail stage of throttling, the Guest is very sensitive to CPU
+#     percentage while the @cpu-throttle -increment is excessive
+#     usually at tail stage.  If this parameter is true, we will
+#     compute the ideal CPU percentage used by the Guest, which may
+#     exactly make the dirty rate match the dirty rate threshold.
+#     Then we will choose a smaller throttle increment between the one
+#     specified by @cpu-throttle-increment and the one generated by
+#     ideal CPU percentage.  Therefore, it is compatible to
+#     traditional throttling, meanwhile the throttle increment won't
+#     be excessive at tail stage.  The default value is false.  (Since
+#     5.1)
 #
 # @tls-creds: ID of the 'tls-creds' object that provides credentials
-#             for establishing a TLS connection over the migration data
-#             channel. On the outgoing side of the migration, the credentials
-#             must be for a 'client' endpoint, while for the incoming side the
-#             credentials must be for a 'server' endpoint.
-#             An empty string means that QEMU will use plain text mode for
-#             migration, rather than TLS (Since 2.7)
-#             Note: 2.8 reports this by omitting tls-creds instead.
-#
-# @tls-hostname: hostname of the target host for the migration. This
-#                is required when using x509 based TLS credentials and the
-#                migration URI does not already include a hostname. For
-#                example if using fd: or exec: based migration, the
-#                hostname must be provided so that the server's x509
-#                certificate identity can be validated. (Since 2.7)
-#                An empty string means that QEMU will use the hostname
-#                associated with the migration URI, if any. (Since 2.9)
-#                Note: 2.8 reports this by omitting tls-hostname instead.
-#
-# @tls-authz: ID of the 'authz' object subclass that provides access control
-#             checking of the TLS x509 certificate distinguished name. (Since
-#             4.0)
-#
-# @max-bandwidth: to set maximum speed for migration. maximum speed in
-#                 bytes per second. (Since 2.8)
-#
-# @downtime-limit: set maximum tolerated downtime for migration. maximum
-#                  downtime in milliseconds (Since 2.8)
-#
-# @x-checkpoint-delay: the delay time between two COLO checkpoints. (Since 2.8)
+#     for establishing a TLS connection over the migration data
+#     channel.  On the outgoing side of the migration, the credentials
+#     must be for a 'client' endpoint, while for the incoming side the
+#     credentials must be for a 'server' endpoint.  An empty string
+#     means that QEMU will use plain text mode for migration, rather
+#     than TLS (Since 2.7) Note: 2.8 reports this by omitting
+#     tls-creds instead.
+#
+# @tls-hostname: hostname of the target host for the migration.  This
+#     is required when using x509 based TLS credentials and the
+#     migration URI does not already include a hostname.  For example
+#     if using fd: or exec: based migration, the hostname must be
+#     provided so that the server's x509 certificate identity can be
+#     validated.  (Since 2.7) An empty string means that QEMU will use
+#     the hostname associated with the migration URI, if any.  (Since
+#     2.9) Note: 2.8 reports this by omitting tls-hostname instead.
+#
+# @tls-authz: ID of the 'authz' object subclass that provides access
+#     control checking of the TLS x509 certificate distinguished name.
+#     (Since 4.0)
+#
+# @max-bandwidth: to set maximum speed for migration.  maximum speed
+#     in bytes per second.  (Since 2.8)
+#
+# @avail-switchover-bandwidth: to set the available bandwidth that
+#     migration can use during switchover phase.  NOTE!  This does not
+#     limit the bandwidth during switchover, but only for calculations when
+#     making decisions to switchover.  By default, this value is zero,
+#     which means QEMU will estimate the bandwidth automatically.  This can
+#     be set when the estimated value is not accurate, while the user is
+#     able to guarantee such bandwidth is available when switching over.
+#     When specified correctly, this can make the switchover decision much
+#     more accurate.  (Since 8.2)
+#
+# @downtime-limit: set maximum tolerated downtime for migration.
+#     maximum downtime in milliseconds (Since 2.8)
+#
+# @x-checkpoint-delay: the delay time between two COLO checkpoints.
+#     (Since 2.8)
 #
 # @block-incremental: Affects how much storage is migrated when the
-#                     block migration capability is enabled.  When false, the entire
-#                     storage backing chain is migrated into a flattened image at
-#                     the destination; when true, only the active qcow2 layer is
-#                     migrated and the destination must already have access to the
-#                     same backing chain as was used on the source.  (since 2.10)
+#     block migration capability is enabled.  When false, the entire
+#     storage backing chain is migrated into a flattened image at the
+#     destination; when true, only the active qcow2 layer is migrated
+#     and the destination must already have access to the same backing
+#     chain as was used on the source.  (since 2.10)
 #
 # @multifd-channels: Number of channels used to migrate data in
-#                    parallel. This is the same number that the
-#                    number of sockets used for migration.
-#                    The default value is 2 (since 4.0)
+#     parallel.  This is the same number that the number of sockets
+#     used for migration.  The default value is 2 (since 4.0)
 #
 # @xbzrle-cache-size: cache size to be used by XBZRLE migration.  It
-#                     needs to be a multiple of the target page size
-#                     and a power of 2
-#                     (Since 2.11)
+#     needs to be a multiple of the target page size and a power of 2
+#     (Since 2.11)
 #
-# @max-postcopy-bandwidth: Background transfer bandwidth during postcopy.
-#                          Defaults to 0 (unlimited).  In bytes per second.
-#                          (Since 3.0)
+# @max-postcopy-bandwidth: Background transfer bandwidth during
+#     postcopy.  Defaults to 0 (unlimited).  In bytes per second.
+#     (Since 3.0)
 #
-# @max-cpu-throttle: maximum cpu throttle percentage.
-#                    Defaults to 99.
-#                    (Since 3.1)
+# @max-cpu-throttle: maximum cpu throttle percentage.  Defaults to 99.
+#     (Since 3.1)
 #
-# @multifd-compression: Which compression method to use.
-#                       Defaults to none. (Since 5.0)
+# @multifd-compression: Which compression method to use.  Defaults to
+#     none.  (Since 5.0)
 #
 # @multifd-zlib-level: Set the compression level to be used in live
-#                      migration, the compression level is an integer between 0
-#                      and 9, where 0 means no compression, 1 means the best
-#                      compression speed, and 9 means best compression ratio which
-#                      will consume more CPU.
-#                      Defaults to 1. (Since 5.0)
+#     migration, the compression level is an integer between 0 and 9,
+#     where 0 means no compression, 1 means the best compression
+#     speed, and 9 means best compression ratio which will consume
+#     more CPU. Defaults to 1. (Since 5.0)
 #
 # @multifd-zstd-level: Set the compression level to be used in live
-#                      migration, the compression level is an integer between 0
-#                      and 20, where 0 means no compression, 1 means the best
-#                      compression speed, and 20 means best compression ratio which
-#                      will consume more CPU.
-#                      Defaults to 1. (Since 5.0)
+#     migration, the compression level is an integer between 0 and 20,
+#     where 0 means no compression, 1 means the best compression
+#     speed, and 20 means best compression ratio which will consume
+#     more CPU. Defaults to 1. (Since 5.0)
 #
 # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
-#                        aliases for the purpose of dirty bitmap migration.  Such
-#                        aliases may for example be the corresponding names on the
-#                        opposite site.
-#                        The mapping must be one-to-one, but not necessarily
-#                        complete: On the source, unmapped bitmaps and all bitmaps
-#                        on unmapped nodes will be ignored.  On the destination,
-#                        encountering an unmapped alias in the incoming migration
-#                        stream will result in a report, and all further bitmap
-#                        migration data will then be discarded.
-#                        Note that the destination does not know about bitmaps it
-#                        does not receive, so there is no limitation or requirement
-#                        regarding the number of bitmaps received, or how they are
-#                        named, or on which nodes they are placed.
-#                        By default (when this parameter has never been set), bitmap
-#                        names are mapped to themselves.  Nodes are mapped to their
-#                        block device name if there is one, and to their node name
-#                        otherwise. (Since 5.2)
+#     aliases for the purpose of dirty bitmap migration.  Such aliases
+#     may for example be the corresponding names on the opposite site.
+#     The mapping must be one-to-one, but not necessarily complete: On
+#     the source, unmapped bitmaps and all bitmaps on unmapped nodes
+#     will be ignored.  On the destination, encountering an unmapped
+#     alias in the incoming migration stream will result in a report,
+#     and all further bitmap migration data will then be discarded.
+#     Note that the destination does not know about bitmaps it does
+#     not receive, so there is no limitation or requirement regarding
+#     the number of bitmaps received, or how they are named, or on
+#     which nodes they are placed.  By default (when this parameter
+#     has never been set), bitmap names are mapped to themselves.
+#     Nodes are mapped to their block device name if there is one, and
+#     to their node name otherwise.  (Since 5.2)
+#
+# @x-vcpu-dirty-limit-period: Periodic time (in milliseconds) of dirty
+#     limit during live migration.  Should be in the range 1 to 1000ms.
+#     Defaults to 1000ms.  (Since 8.1)
+#
+# @vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration.
+#     Defaults to 1.  (Since 8.1)
+#
+# @mode: Migration mode. See description in @MigMode. Default is 'normal'.
+#        (Since 8.2)
+#
+# Features:
+#
+# @deprecated: Member @block-incremental is deprecated.  Use
+#     blockdev-mirror with NBD instead.  Members @compress-level,
+#     @compress-threads, @decompress-threads and @compress-wait-thread
+#     are deprecated because @compression is deprecated.
+#
+# @unstable: Members @x-checkpoint-delay and @x-vcpu-dirty-limit-period
+#     are experimental.
 #
 # Since: 2.4
 ##
             '*announce-max': 'size',
             '*announce-rounds': 'size',
             '*announce-step': 'size',
-            '*compress-level': 'uint8',
-            '*compress-threads': 'uint8',
-            '*compress-wait-thread': 'bool',
-            '*decompress-threads': 'uint8',
+            '*compress-level': { 'type': 'uint8',
+                                 'features': [ 'deprecated' ] },
+            '*compress-threads': { 'type': 'uint8',
+                                   'features': [ 'deprecated' ] },
+            '*compress-wait-thread': { 'type': 'bool',
+                                       'features': [ 'deprecated' ] },
+            '*decompress-threads': { 'type': 'uint8',
+                                     'features': [ 'deprecated' ] },
             '*throttle-trigger-threshold': 'uint8',
             '*cpu-throttle-initial': 'uint8',
             '*cpu-throttle-increment': 'uint8',
             '*tls-hostname': 'str',
             '*tls-authz': 'str',
             '*max-bandwidth': 'size',
+            '*avail-switchover-bandwidth': 'size',
             '*downtime-limit': 'uint64',
-            '*x-checkpoint-delay': 'uint32',
-            '*block-incremental': 'bool' ,
+            '*x-checkpoint-delay': { 'type': 'uint32',
+                                     'features': [ 'unstable' ] },
+            '*block-incremental': { 'type': 'bool',
+                                    'features': [ 'deprecated' ] },
             '*multifd-channels': 'uint8',
             '*xbzrle-cache-size': 'size',
             '*max-postcopy-bandwidth': 'size',
             '*multifd-compression': 'MultiFDCompression',
             '*multifd-zlib-level': 'uint8',
             '*multifd-zstd-level': 'uint8',
-            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ],
+            '*x-vcpu-dirty-limit-period': { 'type': 'uint64',
+                                            'features': [ 'unstable' ] },
+            '*vcpu-dirty-limit': 'uint64',
+            '*mode': 'MigMode'} }
 
 ##
 # @query-migrate-parameters:
 #
 # -> { "execute": "query-migrate-parameters" }
 # <- { "return": {
-#          "decompress-threads": 2,
+#          "multifd-channels": 2,
 #          "cpu-throttle-increment": 10,
-#          "compress-threads": 8,
-#          "compress-level": 1,
 #          "cpu-throttle-initial": 20,
 #          "max-bandwidth": 33554432,
 #          "downtime-limit": 300
 #       }
 #    }
-#
 ##
 { 'command': 'query-migrate-parameters',
   'returns': 'MigrationParameters' }
 
-##
-# @client_migrate_info:
-#
-# Set migration information for remote display.  This makes the server
-# ask the client to automatically reconnect using the new parameters
-# once migration finished successfully.  Only implemented for SPICE.
-#
-# @protocol:     must be "spice"
-# @hostname:     migration target hostname
-# @port:         spice tcp port for plaintext channels
-# @tls-port:     spice tcp port for tls-secured channels
-# @cert-subject: server certificate subject
-#
-# Since: 0.14.0
-#
-# Example:
-#
-# -> { "execute": "client_migrate_info",
-#      "arguments": { "protocol": "spice",
-#                     "hostname": "virt42.lab.kraxel.org",
-#                     "port": 1234 } }
-# <- { "return": {} }
-#
-##
-{ 'command': 'client_migrate_info',
-  'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int',
-            '*tls-port': 'int', '*cert-subject': 'str' } }
-
 ##
 # @migrate-start-postcopy:
 #
-# Followup to a migration command to switch the migration to postcopy mode.
-# The postcopy-ram capability must be set on both source and destination
-# before the original migration command.
+# Followup to a migration command to switch the migration to postcopy
+# mode.  The postcopy-ram capability must be set on both source and
+# destination before the original migration command.
 #
 # Since: 2.5
 #
 #
 # -> { "execute": "migrate-start-postcopy" }
 # <- { "return": {} }
-#
 ##
 { 'command': 'migrate-start-postcopy' }
 
 # <- {"timestamp": {"seconds": 1432121972, "microseconds": 744001},
 #     "event": "MIGRATION",
 #     "data": {"status": "completed"} }
-#
 ##
 { 'event': 'MIGRATION',
   'data': {'status': 'MigrationStatus'}}
 ##
 # @MIGRATION_PASS:
 #
-# Emitted from the source side of a migration at the start of each pass
-# (when it syncs the dirty bitmap)
+# Emitted from the source side of a migration at the start of each
+# pass (when it syncs the dirty bitmap)
 #
 # @pass: An incrementing count (starting at 1 on the first pass)
 #
 #
 # Example:
 #
-# { "timestamp": {"seconds": 1449669631, "microseconds": 239225},
-#   "event": "MIGRATION_PASS", "data": {"pass": 2} }
-#
+# <- { "timestamp": {"seconds": 1449669631, "microseconds": 239225},
+#       "event": "MIGRATION_PASS", "data": {"pass": 2} }
 ##
 { 'event': 'MIGRATION_PASS',
   'data': { 'pass': 'int' } }
 #
 # @checkpoint-ready: Secondary VM (SVM) is ready for checkpointing
 #
-# @checkpoint-request: Primary VM (PVM) tells SVM to prepare for checkpointing
+# @checkpoint-request: Primary VM (PVM) tells SVM to prepare for
+#     checkpointing
 #
 # @checkpoint-reply: SVM gets PVM's checkpoint request
 #
 #
 # @completed: finish the process of failover
 #
-# @relaunch: restart the failover process, from 'none' -> 'completed' (Since 2.9)
+# @relaunch: restart the failover process, from 'none' -> 'completed'
+#     (Since 2.9)
 #
 # Since: 2.8
 ##
 #
 # <- { "timestamp": {"seconds": 2032141960, "microseconds": 417172},
 #      "event": "COLO_EXIT", "data": {"mode": "primary", "reason": "request" } }
-#
 ##
 { 'event': 'COLO_EXIT',
   'data': {'mode': 'COLOMode', 'reason': 'COLOExitReason' } }
 #
 # The reason for a COLO exit.
 #
-# @none: failover has never happened. This state does not occur
-#        in the COLO_EXIT event, and is only visible in the result of
-#        query-colo-status.
+# @none: failover has never happened.  This state does not occur in
+#     the COLO_EXIT event, and is only visible in the result of
+#     query-colo-status.
 #
 # @request: COLO exit is due to an external request.
 #
 ##
 # @x-colo-lost-heartbeat:
 #
-# Tell qemu that heartbeat is lost, request it to do takeover procedures.
-# If this command is sent to the PVM, the Primary side will exit COLO mode.
-# If sent to the Secondary, the Secondary side will run failover work,
-# then takes over server operation to become the service VM.
+# Tell qemu that heartbeat is lost, request it to do takeover
+# procedures.  If this command is sent to the PVM, the Primary side
+# will exit COLO mode.  If sent to the Secondary, the Secondary side
+# will run failover work, then takes over server operation to become
+# the service VM.
+#
+# Features:
+#
+# @unstable: This command is experimental.
 #
 # Since: 2.8
 #
 #
 # -> { "execute": "x-colo-lost-heartbeat" }
 # <- { "return": {} }
-#
 ##
-{ 'command': 'x-colo-lost-heartbeat' }
+{ 'command': 'x-colo-lost-heartbeat',
+  'features': [ 'unstable' ],
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @migrate_cancel:
 #
 # Returns: nothing on success
 #
-# Notes: This command succeeds even if there is no migration process running.
+# Notes: This command succeeds even if there is no migration process
+#     running.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "migrate_cancel" }
 # <- { "return": {} }
-#
 ##
 { 'command': 'migrate_cancel' }
 
 # @state: The state the migration is currently expected to be in
 #
 # Returns: nothing on success
+#
 # Since: 2.11
+#
 # Example:
 #
 # -> { "execute": "migrate-continue" , "arguments":
 { 'command': 'migrate-continue', 'data': {'state': 'MigrationStatus'} }
 
 ##
-# @migrate_set_downtime:
-#
-# Set maximum tolerated downtime for migration.
+# @MigrationAddressType:
 #
-# @value: maximum downtime in seconds
-#
-# Features:
-# @deprecated: This command is deprecated.  Use
-#              'migrate-set-parameters' instead.
+# The migration stream transport mechanisms.
 #
-# Returns: nothing on success
+# @socket: Migrate via socket.
 #
-# Since: 0.14.0
+# @exec: Direct the migration stream to another process.
 #
-# Example:
+# @rdma: Migrate via RDMA.
 #
-# -> { "execute": "migrate_set_downtime", "arguments": { "value": 0.1 } }
-# <- { "return": {} }
+# @file: Direct the migration stream to a file.
 #
+# Since 8.2
 ##
-{ 'command': 'migrate_set_downtime', 'data': {'value': 'number'},
-  'features': [ 'deprecated' ] }
+{ 'enum': 'MigrationAddressType',
+  'data': [ 'socket', 'exec', 'rdma', 'file' ] }
 
 ##
-# @migrate_set_speed:
+# @FileMigrationArgs:
 #
-# Set maximum speed for migration.
+# @filename: The file to receive the migration stream
 #
-# @value: maximum speed in bytes per second.
-#
-# Features:
-# @deprecated: This command is deprecated.  Use
-#              'migrate-set-parameters' instead.
-#
-# Returns: nothing on success
-#
-# Since: 0.14.0
-#
-# Example:
-#
-# -> { "execute": "migrate_set_speed", "arguments": { "value": 1024 } }
-# <- { "return": {} }
+# @offset: The file offset where the migration stream will start
 #
+# Since 8.2
 ##
-{ 'command': 'migrate_set_speed', 'data': {'value': 'int'},
-  'features': [ 'deprecated' ] }
+{ 'struct': 'FileMigrationArgs',
+  'data': { 'filename': 'str',
+            'offset': 'uint64' } }
 
 ##
-# @migrate-set-cache-size:
+# @MigrationExecCommand:
 #
-# Set cache size to be used by XBZRLE migration
+# @args: command (list head) and arguments to execute.
 #
-# @value: cache size in bytes
-#
-# Features:
-# @deprecated: This command is deprecated.  Use
-#              'migrate-set-parameters' instead.
-#
-# The size will be rounded down to the nearest power of 2.
-# The cache size can be modified before and during ongoing migration
-#
-# Returns: nothing on success
-#
-# Since: 1.2
+# Since 8.2
+##
+{ 'struct': 'MigrationExecCommand',
+  'data': {'args': [ 'str' ] } }
+
+##
+# @MigrationAddress:
 #
-# Example:
-#
-# -> { "execute": "migrate-set-cache-size",
-#      "arguments": { "value": 536870912 } }
-# <- { "return": {} }
+# Migration endpoint configuration.
 #
+# Since 8.2
 ##
-{ 'command': 'migrate-set-cache-size', 'data': {'value': 'int'},
-  'features': [ 'deprecated' ] }
+{ 'union': 'MigrationAddress',
+  'base': { 'transport' : 'MigrationAddressType'},
+  'discriminator': 'transport',
+  'data': {
+    'socket': 'SocketAddress',
+    'exec': 'MigrationExecCommand',
+    'rdma': 'InetSocketAddress',
+    'file': 'FileMigrationArgs' } }
 
 ##
-# @query-migrate-cache-size:
+# @MigrationChannelType:
 #
-# Query migration XBZRLE cache size
+# The migration channel-type request options.
 #
-# Features:
-# @deprecated: This command is deprecated.  Use
-#              'query-migrate-parameters' instead.
+# @main: Main outbound migration channel.
 #
-# Returns: XBZRLE cache size in bytes
+# Since 8.1
+##
+{ 'enum': 'MigrationChannelType',
+  'data': [ 'main' ] }
+
+##
+# @MigrationChannel:
 #
-# Since: 1.2
+# Migration stream channel parameters.
 #
-# Example:
+# @channel-type: Channel type for transferring packet information.
 #
-# -> { "execute": "query-migrate-cache-size" }
-# <- { "return": 67108864 }
+# @addr: Migration endpoint configuration on destination interface.
 #
+# Since 8.1
 ##
-{ 'command': 'query-migrate-cache-size', 'returns': 'int',
-  'features': [ 'deprecated' ] }
+{ 'struct': 'MigrationChannel',
+  'data': {
+      'channel-type': 'MigrationChannelType',
+      'addr': 'MigrationAddress' } }
 
 ##
 # @migrate:
 #
 # @uri: the Uniform Resource Identifier of the destination VM
 #
+# @channels: list of migration stream channels with each stream in the
+#     list connected to a destination interface endpoint.
+#
 # @blk: do block migration (full disk copy)
 #
 # @inc: incremental disk copy migration
 #
-# @detach: this argument exists only for compatibility reasons and
-#          is ignored by QEMU
+# @detach: this argument exists only for compatibility reasons and is
+#     ignored by QEMU
 #
 # @resume: resume one paused migration, default "off". (since 3.0)
 #
+# Features:
+#
+# @deprecated: Members @inc and @blk are deprecated.  Use
+#     blockdev-mirror with NBD instead.
+#
 # Returns: nothing on success
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Notes:
 #
-# 1. The 'query-migrate' command should be used to check migration's progress
-#    and final result (this information is provided by the 'status' member)
+# 1. The 'query-migrate' command should be used to check migration's
+#    progress and final result (this information is provided by the
+#    'status' member)
 #
 # 2. All boolean arguments default to false
 #
-# 3. The user Monitor's "detach" argument is invalid in QMP and should not
-#    be used
+# 3. The user Monitor's "detach" argument is invalid in QMP and should
+#    not be used
+#
+# 4. The uri argument should have the Uniform Resource Identifier of
+#    default destination VM. This connection will be bound to default
+#    network.
+#
+# 5. For now, number of migration streams is restricted to one, i.e
+#    number of items in 'channels' list is just 1.
+#
+# 6. The 'uri' and 'channels' arguments are mutually exclusive;
+#    exactly one of the two should be present.
 #
 # Example:
 #
 # -> { "execute": "migrate", "arguments": { "uri": "tcp:0:4446" } }
 # <- { "return": {} }
+# -> { "execute": "migrate",
+#      "arguments": {
+#          "channels": [ { "channel-type": "main",
+#                          "addr": { "transport": "socket",
+#                                    "type": "inet",
+#                                    "host": "10.12.34.9",
+#                                    "port": "1050" } } ] } }
+# <- { "return": {} }
+#
+# -> { "execute": "migrate",
+#      "arguments": {
+#          "channels": [ { "channel-type": "main",
+#                          "addr": { "transport": "exec",
+#                                    "args": [ "/bin/nc", "-p", "6000",
+#                                              "/some/sock" ] } } ] } }
+# <- { "return": {} }
+#
+# -> { "execute": "migrate",
+#      "arguments": {
+#          "channels": [ { "channel-type": "main",
+#                          "addr": { "transport": "rdma",
+#                                    "host": "10.12.34.9",
+#                                    "port": "1050" } } ] } }
+# <- { "return": {} }
+#
+# -> { "execute": "migrate",
+#      "arguments": {
+#          "channels": [ { "channel-type": "main",
+#                          "addr": { "transport": "file",
+#                                    "filename": "/tmp/migfile",
+#                                    "offset": "0x1000" } } ] } }
+# <- { "return": {} }
 #
 ##
 { 'command': 'migrate',
-  'data': {'uri': 'str', '*blk': 'bool', '*inc': 'bool',
+  'data': {'*uri': 'str',
+           '*channels': [ 'MigrationChannel' ],
+           '*blk': { 'type': 'bool', 'features': [ 'deprecated' ] },
+           '*inc': { 'type': 'bool', 'features': [ 'deprecated' ] },
            '*detach': 'bool', '*resume': 'bool' } }
 
 ##
 # @migrate-incoming:
 #
-# Start an incoming migration, the qemu must have been started
-# with -incoming defer
+# Start an incoming migration, the qemu must have been started with
+# -incoming defer
 #
 # @uri: The Uniform Resource Identifier identifying the source or
-#       address to listen on
+#     address to listen on
+#
+# @channels: list of migration stream channels with each stream in the
+#     list connected to a destination interface endpoint.
 #
 # Returns: nothing on success
 #
 #
 # Notes:
 #
-# 1. It's a bad idea to use a string for the uri, but it needs to stay
-#    compatible with -incoming and the format of the uri is already exposed
-#    above libvirt.
+# 1. It's a bad idea to use a string for the uri, but it needs
+#    to stay compatible with -incoming and the format of the uri
+#    is already exposed above libvirt.
 #
-# 2. QEMU must be started with -incoming defer to allow migrate-incoming to
-#    be used.
+# 2. QEMU must be started with -incoming defer to allow
+#    migrate-incoming to be used.
 #
 # 3. The uri format is the same as for -incoming
 #
+# 5. For now, number of migration streams is restricted to one, i.e
+#    number of items in 'channels' list is just 1.
+#
+# 4. The 'uri' and 'channels' arguments are mutually exclusive;
+#    exactly one of the two should be present.
+#
 # Example:
 #
 # -> { "execute": "migrate-incoming",
 #      "arguments": { "uri": "tcp::4446" } }
 # <- { "return": {} }
 #
+# -> { "execute": "migrate",
+#      "arguments": {
+#          "channels": [ { "channel-type": "main",
+#                          "addr": { "transport": "socket",
+#                                    "type": "inet",
+#                                    "host": "10.12.34.9",
+#                                    "port": "1050" } } ] } }
+# <- { "return": {} }
+#
+# -> { "execute": "migrate",
+#      "arguments": {
+#          "channels": [ { "channel-type": "main",
+#                          "addr": { "transport": "exec",
+#                                    "args": [ "/bin/nc", "-p", "6000",
+#                                              "/some/sock" ] } } ] } }
+# <- { "return": {} }
+#
+# -> { "execute": "migrate",
+#      "arguments": {
+#          "channels": [ { "channel-type": "main",
+#                          "addr": { "transport": "rdma",
+#                                    "host": "10.12.34.9",
+#                                    "port": "1050" } } ] } }
+# <- { "return": {} }
 ##
-{ 'command': 'migrate-incoming', 'data': {'uri': 'str' } }
+{ 'command': 'migrate-incoming',
+             'data': {'*uri': 'str',
+                      '*channels': [ 'MigrationChannel' ] } }
 
 ##
 # @xen-save-devices-state:
 #
-# Save the state of all devices to file. The RAM and the block devices
-# of the VM are not saved by this command.
+# Save the state of all devices to file.  The RAM and the block
+# devices of the VM are not saved by this command.
 #
 # @filename: the file to save the state of the devices to as binary
-#            data. See xen-save-devices-state.txt for a description of the binary
-#            format.
+#     data.  See xen-save-devices-state.txt for a description of the
+#     binary format.
 #
-# @live: Optional argument to ask QEMU to treat this command as part of a live
-#        migration. Default to true. (since 2.11)
+# @live: Optional argument to ask QEMU to treat this command as part
+#     of a live migration.  Default to true.  (since 2.11)
 #
 # Returns: Nothing on success
 #
 # -> { "execute": "xen-save-devices-state",
 #      "arguments": { "filename": "/tmp/save" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'xen-save-devices-state',
   'data': {'filename': 'str', '*live':'bool' } }
 # -> { "execute": "xen-set-global-dirty-log",
 #      "arguments": { "enable": true } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'xen-set-global-dirty-log', 'data': { 'enable': 'bool' } }
 
 ##
 # @xen-load-devices-state:
 #
-# Load the state of all devices from file. The RAM and the block devices
-# of the VM are not loaded by this command.
+# Load the state of all devices from file.  The RAM and the block
+# devices of the VM are not loaded by this command.
 #
 # @filename: the file to load the state of the devices from as binary
-#            data. See xen-save-devices-state.txt for a description of the binary
-#            format.
+#     data.  See xen-save-devices-state.txt for a description of the
+#     binary format.
 #
 # Since: 2.7
 #
 # -> { "execute": "xen-load-devices-state",
 #      "arguments": { "filename": "/tmp/resume" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'xen-load-devices-state', 'data': {'filename': 'str'} }
 
 #
 # @primary: true for primary or false for secondary.
 #
-# @failover: true to do failover, false to stop. but cannot be
-#            specified if 'enable' is true. default value is false.
+# @failover: true to do failover, false to stop.  but cannot be
+#     specified if 'enable' is true.  default value is false.
 #
 # Returns: nothing.
 #
 # Since: 2.9
 ##
 { 'command': 'xen-set-replication',
-  'data': { 'enable': 'bool', 'primary': 'bool', '*failover' : 'bool' },
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'data': { 'enable': 'bool', 'primary': 'bool', '*failover': 'bool' },
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @ReplicationStatus:
 #
 # @error: true if an error happened, false if replication is normal.
 #
-# @desc: the human readable error description string, when
-#        @error is 'true'.
+# @desc: the human readable error description string, when @error is
+#     'true'.
 #
 # Since: 2.9
 ##
 { 'struct': 'ReplicationStatus',
   'data': { 'error': 'bool', '*desc': 'str' },
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @query-xen-replication-status:
 #
 # Query replication status while the vm is running.
 #
-# Returns: A @ReplicationResult object showing the status.
+# Returns: A @ReplicationStatus object showing the status.
 #
 # Example:
 #
 ##
 { 'command': 'query-xen-replication-status',
   'returns': 'ReplicationStatus',
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @xen-colo-do-checkpoint:
 # Since: 2.9
 ##
 { 'command': 'xen-colo-do-checkpoint',
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @COLOStatus:
 #
 # The result format for 'query-colo-status'.
 #
-# @mode: COLO running mode. If COLO is running, this field will return
-#        'primary' or 'secondary'.
+# @mode: COLO running mode.  If COLO is running, this field will
+#     return 'primary' or 'secondary'.
 #
-# @last-mode: COLO last running mode. If COLO is running, this field
-#             will return same like mode field, after failover we can
-#             use this field to get last colo mode. (since 4.0)
+# @last-mode: COLO last running mode.  If COLO is running, this field
+#     will return same like mode field, after failover we can use this
+#     field to get last colo mode.  (since 4.0)
 #
 # @reason: describes the reason for the COLO exit.
 #
 ##
 { 'struct': 'COLOStatus',
   'data': { 'mode': 'COLOMode', 'last-mode': 'COLOMode',
-            'reason': 'COLOExitReason' } }
+            'reason': 'COLOExitReason' },
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @query-colo-status:
 # Example:
 #
 # -> { "execute": "query-colo-status" }
-# <- { "return": { "mode": "primary", "reason": "request" } }
+# <- { "return": { "mode": "primary", "last-mode": "none", "reason": "request" } }
 #
 # Since: 3.1
 ##
 { 'command': 'query-colo-status',
-  'returns': 'COLOStatus' }
+  'returns': 'COLOStatus',
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @migrate-recover:
 # @UNPLUG_PRIMARY:
 #
 # Emitted from source side of a migration when migration state is
-# WAIT_UNPLUG. Device was unplugged by guest operating system.
-# Device resources in QEMU are kept on standby to be able to re-plug it in case
-# of migration failure.
+# WAIT_UNPLUG. Device was unplugged by guest operating system.  Device
+# resources in QEMU are kept on standby to be able to re-plug it in
+# case of migration failure.
 #
 # @device-id: QEMU device id of the unplugged device
 #
 # Since: 4.2
 #
 # Example:
-#   {"event": "UNPLUG_PRIMARY", "data": {"device-id": "hostdev0"} }
 #
+# <- { "event": "UNPLUG_PRIMARY",
+#      "data": { "device-id": "hostdev0" },
+#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
 ##
 { 'event': 'UNPLUG_PRIMARY',
   'data': { 'device-id': 'str' } }
 
+##
+# @DirtyRateVcpu:
+#
+# Dirty rate of vcpu.
+#
+# @id: vcpu index.
+#
+# @dirty-rate: dirty rate.
+#
+# Since: 6.2
+##
+{ 'struct': 'DirtyRateVcpu',
+  'data': { 'id': 'int', 'dirty-rate': 'int64' } }
+
 ##
 # @DirtyRateStatus:
 #
-# An enumeration of dirtyrate status.
+# Dirty page rate measurement status.
 #
-# @unstarted: the dirtyrate thread has not been started.
+# @unstarted: measuring thread has not been started yet
 #
-# @measuring: the dirtyrate thread is measuring.
+# @measuring: measuring thread is running
 #
-# @measured: the dirtyrate thread has measured and results are available.
+# @measured: dirty page rate is measured and the results are available
 #
 # Since: 5.2
-#
 ##
 { 'enum': 'DirtyRateStatus',
   'data': [ 'unstarted', 'measuring', 'measured'] }
 
+##
+# @DirtyRateMeasureMode:
+#
+# Method used to measure dirty page rate.  Differences between
+# available methods are explained in @calc-dirty-rate.
+#
+# @page-sampling: use page sampling
+#
+# @dirty-ring: use dirty ring
+#
+# @dirty-bitmap: use dirty bitmap
+#
+# Since: 6.2
+##
+{ 'enum': 'DirtyRateMeasureMode',
+  'data': ['page-sampling', 'dirty-ring', 'dirty-bitmap'] }
+
+##
+# @TimeUnit:
+#
+# Specifies unit in which time-related value is specified.
+#
+# @second: value is in seconds
+#
+# @millisecond: value is in milliseconds
+#
+# Since 8.2
+#
+##
+{ 'enum': 'TimeUnit',
+  'data': ['second', 'millisecond'] }
+
 ##
 # @DirtyRateInfo:
 #
-# Information about current dirty page rate of vm.
+# Information about measured dirty page rate.
 #
-# @dirty-rate: an estimate of the dirty page rate of the VM in units of
-#              MB/s, present only when estimating the rate has completed.
+# @dirty-rate: an estimate of the dirty page rate of the VM in units
+#     of MiB/s.  Value is present only when @status is 'measured'.
 #
-# @status: status containing dirtyrate query status includes
-#          'unstarted' or 'measuring' or 'measured'
+# @status: current status of dirty page rate measurements
 #
 # @start-time: start time in units of second for calculation
 #
-# @calc-time: time in units of second for sample dirty pages
+# @calc-time: time period for which dirty page rate was measured,
+#     expressed and rounded down to @calc-time-unit.
 #
-# Since: 5.2
+# @calc-time-unit: time unit of @calc-time  (Since 8.2)
+#
+# @sample-pages: number of sampled pages per GiB of guest memory.
+#     Valid only in page-sampling mode (Since 6.1)
+#
+# @mode: mode that was used to measure dirty page rate (Since 6.2)
 #
+# @vcpu-dirty-rate: dirty rate for each vCPU if dirty-ring mode was
+#     specified (Since 6.2)
+#
+# Since: 5.2
 ##
 { 'struct': 'DirtyRateInfo',
   'data': {'*dirty-rate': 'int64',
            'status': 'DirtyRateStatus',
            'start-time': 'int64',
-           'calc-time': 'int64'} }
+           'calc-time': 'int64',
+           'calc-time-unit': 'TimeUnit',
+           'sample-pages': 'uint64',
+           'mode': 'DirtyRateMeasureMode',
+           '*vcpu-dirty-rate': [ 'DirtyRateVcpu' ] } }
 
 ##
 # @calc-dirty-rate:
 #
-# start calculating dirty page rate for vm
-#
-# @calc-time: time in units of second for sample dirty pages
+# Start measuring dirty page rate of the VM.  Results can be retrieved
+# with @query-dirty-rate after measurements are completed.
+#
+# Dirty page rate is the number of pages changed in a given time
+# period expressed in MiB/s.  The following methods of calculation are
+# available:
+#
+# 1. In page sampling mode, a random subset of pages are selected and
+#    hashed twice: once at the beginning of measurement time period,
+#    and once again at the end.  If two hashes for some page are
+#    different, the page is counted as changed.  Since this method
+#    relies on sampling and hashing, calculated dirty page rate is
+#    only an estimate of its true value.  Increasing @sample-pages
+#    improves estimation quality at the cost of higher computational
+#    overhead.
+#
+# 2. Dirty bitmap mode captures writes to memory (for example by
+#    temporarily revoking write access to all pages) and counting page
+#    faults.  Information about modified pages is collected into a
+#    bitmap, where each bit corresponds to one guest page.  This mode
+#    requires that KVM accelerator property "dirty-ring-size" is *not*
+#    set.
+#
+# 3. Dirty ring mode is similar to dirty bitmap mode, but the
+#    information about modified pages is collected into ring buffer.
+#    This mode tracks page modification per each vCPU separately.  It
+#    requires that KVM accelerator property "dirty-ring-size" is set.
+#
+# @calc-time: time period for which dirty page rate is calculated.
+#     By default it is specified in seconds, but the unit can be set
+#     explicitly with @calc-time-unit.  Note that larger @calc-time
+#     values will typically result in smaller dirty page rates because
+#     page dirtying is a one-time event.  Once some page is counted
+#     as dirty during @calc-time period, further writes to this page
+#     will not increase dirty page rate anymore.
+#
+# @calc-time-unit: time unit in which @calc-time is specified.
+#     By default it is seconds. (Since 8.2)
+#
+# @sample-pages: number of sampled pages per each GiB of guest memory.
+#     Default value is 512.  For 4KiB guest pages this corresponds to
+#     sampling ratio of 0.2%.  This argument is used only in page
+#     sampling mode.  (Since 6.1)
+#
+# @mode: mechanism for tracking dirty pages.  Default value is
+#     'page-sampling'.  Others are 'dirty-bitmap' and 'dirty-ring'.
+#     (Since 6.1)
 #
 # Since: 5.2
 #
 # Example:
-#   {"command": "calc-dirty-rate", "data": {"calc-time": 1} }
 #
+# -> {"execute": "calc-dirty-rate", "arguments": {"calc-time": 1,
+#                                                 'sample-pages': 512} }
+# <- { "return": {} }
+#
+# Measure dirty rate using dirty bitmap for 500 milliseconds:
+#
+# -> {"execute": "calc-dirty-rate", "arguments": {"calc-time": 500,
+#     "calc-time-unit": "millisecond", "mode": "dirty-bitmap"} }
+#
+# <- { "return": {} }
 ##
-{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} }
+{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64',
+                                         '*calc-time-unit': 'TimeUnit',
+                                         '*sample-pages': 'int',
+                                         '*mode': 'DirtyRateMeasureMode'} }
 
 ##
 # @query-dirty-rate:
 #
-# query dirty page rate in units of MB/s for vm
+# Query results of the most recent invocation of @calc-dirty-rate.
+#
+# @calc-time-unit: time unit in which to report calculation time.
+#     By default it is reported in seconds. (Since 8.2)
 #
 # Since: 5.2
+#
+# Examples:
+#
+# 1. Measurement is in progress:
+#
+# <- {"status": "measuring", "sample-pages": 512,
+#     "mode": "page-sampling", "start-time": 1693900454, "calc-time": 10,
+#     "calc-time-unit": "second"}
+#
+# 2. Measurement has been completed:
+#
+# <- {"status": "measured", "sample-pages": 512, "dirty-rate": 108,
+#     "mode": "page-sampling", "start-time": 1693900454, "calc-time": 10,
+#     "calc-time-unit": "second"}
+##
+{ 'command': 'query-dirty-rate', 'data': {'*calc-time-unit': 'TimeUnit' },
+                                 'returns': 'DirtyRateInfo' }
+
+##
+# @DirtyLimitInfo:
+#
+# Dirty page rate limit information of a virtual CPU.
+#
+# @cpu-index: index of a virtual CPU.
+#
+# @limit-rate: upper limit of dirty page rate (MB/s) for a virtual
+#     CPU, 0 means unlimited.
+#
+# @current-rate: current dirty page rate (MB/s) for a virtual CPU.
+#
+# Since: 7.1
+##
+{ 'struct': 'DirtyLimitInfo',
+  'data': { 'cpu-index': 'int',
+            'limit-rate': 'uint64',
+            'current-rate': 'uint64' } }
+
+##
+# @set-vcpu-dirty-limit:
+#
+# Set the upper limit of dirty page rate for virtual CPUs.
+#
+# Requires KVM with accelerator property "dirty-ring-size" set.  A
+# virtual CPU's dirty page rate is a measure of its memory load.  To
+# observe dirty page rates, use @calc-dirty-rate.
+#
+# @cpu-index: index of a virtual CPU, default is all.
+#
+# @dirty-rate: upper limit of dirty page rate (MB/s) for virtual CPUs.
+#
+# Since: 7.1
+#
+# Example:
+#
+# -> {"execute": "set-vcpu-dirty-limit"}
+#     "arguments": { "dirty-rate": 200,
+#                    "cpu-index": 1 } }
+# <- { "return": {} }
+##
+{ 'command': 'set-vcpu-dirty-limit',
+  'data': { '*cpu-index': 'int',
+            'dirty-rate': 'uint64' } }
+
+##
+# @cancel-vcpu-dirty-limit:
+#
+# Cancel the upper limit of dirty page rate for virtual CPUs.
+#
+# Cancel the dirty page limit for the vCPU which has been set with
+# set-vcpu-dirty-limit command.  Note that this command requires
+# support from dirty ring, same as the "set-vcpu-dirty-limit".
+#
+# @cpu-index: index of a virtual CPU, default is all.
+#
+# Since: 7.1
+#
+# Example:
+#
+# -> {"execute": "cancel-vcpu-dirty-limit"},
+#     "arguments": { "cpu-index": 1 } }
+# <- { "return": {} }
+##
+{ 'command': 'cancel-vcpu-dirty-limit',
+  'data': { '*cpu-index': 'int'} }
+
+##
+# @query-vcpu-dirty-limit:
+#
+# Returns information about virtual CPU dirty page rate limits, if
+# any.
+#
+# Since: 7.1
+#
+# Example:
+#
+# -> {"execute": "query-vcpu-dirty-limit"}
+# <- {"return": [
+#        { "limit-rate": 60, "current-rate": 3, "cpu-index": 0},
+#        { "limit-rate": 60, "current-rate": 3, "cpu-index": 1}]}
+##
+{ 'command': 'query-vcpu-dirty-limit',
+  'returns': [ 'DirtyLimitInfo' ] }
+
+##
+# @MigrationThreadInfo:
+#
+# Information about migrationthreads
+#
+# @name: the name of migration thread
+#
+# @thread-id: ID of the underlying host thread
+#
+# Since: 7.2
 ##
-{ 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' }
+{ 'struct': 'MigrationThreadInfo',
+  'data': {'name': 'str',
+           'thread-id': 'int'} }
+
+##
+# @query-migrationthreads:
+#
+# Returns information of migration threads
+#
+# data: migration thread name
+#
+# Returns: information about migration threads
+#
+# Since: 7.2
+##
+{ 'command': 'query-migrationthreads',
+  'returns': ['MigrationThreadInfo'] }
+
+##
+# @snapshot-save:
+#
+# Save a VM snapshot
+#
+# @job-id: identifier for the newly created job
+#
+# @tag: name of the snapshot to create
+#
+# @vmstate: block device node name to save vmstate to
+#
+# @devices: list of block device node names to save a snapshot to
+#
+# Applications should not assume that the snapshot save is complete
+# when this command returns.  The job commands / events must be used
+# to determine completion and to fetch details of any errors that
+# arise.
+#
+# Note that execution of the guest CPUs may be stopped during the time
+# it takes to save the snapshot.  A future version of QEMU may ensure
+# CPUs are executing continuously.
+#
+# It is strongly recommended that @devices contain all writable block
+# device nodes if a consistent snapshot is required.
+#
+# If @tag already exists, an error will be reported
+#
+# Returns: nothing
+#
+# Example:
+#
+# -> { "execute": "snapshot-save",
+#      "arguments": {
+#         "job-id": "snapsave0",
+#         "tag": "my-snap",
+#         "vmstate": "disk0",
+#         "devices": ["disk0", "disk1"]
+#      }
+#    }
+# <- { "return": { } }
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1432121972, "microseconds": 744001},
+#     "data": {"status": "created", "id": "snapsave0"}}
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1432122172, "microseconds": 744001},
+#     "data": {"status": "running", "id": "snapsave0"}}
+# <- {"event": "STOP",
+#     "timestamp": {"seconds": 1432122372, "microseconds": 744001} }
+# <- {"event": "RESUME",
+#     "timestamp": {"seconds": 1432122572, "microseconds": 744001} }
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1432122772, "microseconds": 744001},
+#     "data": {"status": "waiting", "id": "snapsave0"}}
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1432122972, "microseconds": 744001},
+#     "data": {"status": "pending", "id": "snapsave0"}}
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1432123172, "microseconds": 744001},
+#     "data": {"status": "concluded", "id": "snapsave0"}}
+# -> {"execute": "query-jobs"}
+# <- {"return": [{"current-progress": 1,
+#                 "status": "concluded",
+#                 "total-progress": 1,
+#                 "type": "snapshot-save",
+#                 "id": "snapsave0"}]}
+#
+# Since: 6.0
+##
+{ 'command': 'snapshot-save',
+  'data': { 'job-id': 'str',
+            'tag': 'str',
+            'vmstate': 'str',
+            'devices': ['str'] } }
+
+##
+# @snapshot-load:
+#
+# Load a VM snapshot
+#
+# @job-id: identifier for the newly created job
+#
+# @tag: name of the snapshot to load.
+#
+# @vmstate: block device node name to load vmstate from
+#
+# @devices: list of block device node names to load a snapshot from
+#
+# Applications should not assume that the snapshot load is complete
+# when this command returns.  The job commands / events must be used
+# to determine completion and to fetch details of any errors that
+# arise.
+#
+# Note that execution of the guest CPUs will be stopped during the
+# time it takes to load the snapshot.
+#
+# It is strongly recommended that @devices contain all writable block
+# device nodes that can have changed since the original @snapshot-save
+# command execution.
+#
+# Returns: nothing
+#
+# Example:
+#
+# -> { "execute": "snapshot-load",
+#      "arguments": {
+#         "job-id": "snapload0",
+#         "tag": "my-snap",
+#         "vmstate": "disk0",
+#         "devices": ["disk0", "disk1"]
+#      }
+#    }
+# <- { "return": { } }
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1472124172, "microseconds": 744001},
+#     "data": {"status": "created", "id": "snapload0"}}
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1472125172, "microseconds": 744001},
+#     "data": {"status": "running", "id": "snapload0"}}
+# <- {"event": "STOP",
+#     "timestamp": {"seconds": 1472125472, "microseconds": 744001} }
+# <- {"event": "RESUME",
+#     "timestamp": {"seconds": 1472125872, "microseconds": 744001} }
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1472126172, "microseconds": 744001},
+#     "data": {"status": "waiting", "id": "snapload0"}}
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1472127172, "microseconds": 744001},
+#     "data": {"status": "pending", "id": "snapload0"}}
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1472128172, "microseconds": 744001},
+#     "data": {"status": "concluded", "id": "snapload0"}}
+# -> {"execute": "query-jobs"}
+# <- {"return": [{"current-progress": 1,
+#                 "status": "concluded",
+#                 "total-progress": 1,
+#                 "type": "snapshot-load",
+#                 "id": "snapload0"}]}
+#
+# Since: 6.0
+##
+{ 'command': 'snapshot-load',
+  'data': { 'job-id': 'str',
+            'tag': 'str',
+            'vmstate': 'str',
+            'devices': ['str'] } }
+
+##
+# @snapshot-delete:
+#
+# Delete a VM snapshot
+#
+# @job-id: identifier for the newly created job
+#
+# @tag: name of the snapshot to delete.
+#
+# @devices: list of block device node names to delete a snapshot from
+#
+# Applications should not assume that the snapshot delete is complete
+# when this command returns.  The job commands / events must be used
+# to determine completion and to fetch details of any errors that
+# arise.
+#
+# Returns: nothing
+#
+# Example:
+#
+# -> { "execute": "snapshot-delete",
+#      "arguments": {
+#         "job-id": "snapdelete0",
+#         "tag": "my-snap",
+#         "devices": ["disk0", "disk1"]
+#      }
+#    }
+# <- { "return": { } }
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1442124172, "microseconds": 744001},
+#     "data": {"status": "created", "id": "snapdelete0"}}
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1442125172, "microseconds": 744001},
+#     "data": {"status": "running", "id": "snapdelete0"}}
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1442126172, "microseconds": 744001},
+#     "data": {"status": "waiting", "id": "snapdelete0"}}
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1442127172, "microseconds": 744001},
+#     "data": {"status": "pending", "id": "snapdelete0"}}
+# <- {"event": "JOB_STATUS_CHANGE",
+#     "timestamp": {"seconds": 1442128172, "microseconds": 744001},
+#     "data": {"status": "concluded", "id": "snapdelete0"}}
+# -> {"execute": "query-jobs"}
+# <- {"return": [{"current-progress": 1,
+#                 "status": "concluded",
+#                 "total-progress": 1,
+#                 "type": "snapshot-delete",
+#                 "id": "snapdelete0"}]}
+#
+# Since: 6.0
+##
+{ 'command': 'snapshot-delete',
+  'data': { 'job-id': 'str',
+            'tag': 'str',
+            'devices': ['str'] } }
index 1e561fa97ba239a3b8428fd868726de85f9828ce..88291453ba476a26a4b5583f79a553857aa061f5 100644 (file)
@@ -2,36 +2,12 @@
 # vim: filetype=python
 #
 
-##
-# @RTC_CHANGE:
-#
-# Emitted when the guest changes the RTC time.
-#
-# @offset: offset between base RTC clock (as specified by -rtc base), and
-#          new RTC clock value
-#
-# Note: This event is rate-limited.
-#
-# Since: 0.13.0
-#
-# Example:
-#
-# <-   { "event": "RTC_CHANGE",
-#        "data": { "offset": 78 },
-#        "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
-#
-##
-{ 'event': 'RTC_CHANGE',
-  'data': { 'offset': 'int' },
-  'if': 'defined(TARGET_ALPHA) || defined(TARGET_ARM) || defined(TARGET_HPPA) || defined(TARGET_I386) || defined(TARGET_MIPS) || defined(TARGET_MIPS64) || defined(TARGET_MOXIE) || defined(TARGET_PPC) || defined(TARGET_PPC64) || defined(TARGET_S390X) || defined(TARGET_SH4) || defined(TARGET_SPARC)' }
-
 ##
 # @rtc-reset-reinjection:
 #
-# This command will reset the RTC interrupt reinjection backlog.
-# Can be used if another mechanism to synchronize guest time
-# is in effect, for example QEMU guest agent's guest-set-time
-# command.
+# This command will reset the RTC interrupt reinjection backlog.  Can
+# be used if another mechanism to synchronize guest time is in effect,
+# for example QEMU guest agent's guest-set-time command.
 #
 # Since: 2.1
 #
 #
 # -> { "execute": "rtc-reset-reinjection" }
 # <- { "return": {} }
-#
 ##
 { 'command': 'rtc-reset-reinjection',
-  'if': 'defined(TARGET_I386)' }
-
+  'if': 'TARGET_I386' }
 
 ##
 # @SevState:
 #
 # @uninit: The guest is uninitialized.
 #
-# @launch-update: The guest is currently being launched; plaintext data and
-#                 register state is being imported.
+# @launch-update: The guest is currently being launched; plaintext
+#     data and register state is being imported.
 #
-# @launch-secret: The guest is currently being launched; ciphertext data
-#                 is being imported.
+# @launch-secret: The guest is currently being launched; ciphertext
+#     data is being imported.
 #
 # @running: The guest is fully launched or migrated in.
 #
-# @send-update: The guest is currently being migrated out to another machine.
+# @send-update: The guest is currently being migrated out to another
+#     machine.
 #
-# @receive-update: The guest is currently being migrated from another machine.
+# @receive-update: The guest is currently being migrated from another
+#     machine.
 #
 # Since: 2.12
 ##
 { 'enum': 'SevState',
   'data': ['uninit', 'launch-update', 'launch-secret', 'running',
            'send-update', 'receive-update' ],
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
 
 ##
 # @SevInfo:
               'state' : 'SevState',
               'handle' : 'uint32'
             },
-  'if': 'defined(TARGET_I386)'
+  'if': 'TARGET_I386'
 }
 
 ##
 # <- { "return": { "enabled": true, "api-major" : 0, "api-minor" : 0,
 #                  "build-id" : 0, "policy" : 0, "state" : "running",
 #                  "handle" : 1 } }
-#
 ##
 { 'command': 'query-sev', 'returns': 'SevInfo',
-  'if': 'defined(TARGET_I386)' }
-
+  'if': 'TARGET_I386' }
 
 ##
 # @SevLaunchMeasureInfo:
 # @data: the measurement value encoded in base64
 #
 # Since: 2.12
-#
 ##
 { 'struct': 'SevLaunchMeasureInfo', 'data': {'data': 'str'},
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
 
 ##
 # @query-sev-launch-measure:
 #
 # -> { "execute": "query-sev-launch-measure" }
 # <- { "return": { "data": "4l8LXeNlSPUDlXPJG5966/8%YZ" } }
-#
 ##
 { 'command': 'query-sev-launch-measure', 'returns': 'SevLaunchMeasureInfo',
-  'if': 'defined(TARGET_I386)' }
-
+  'if': 'TARGET_I386' }
 
 ##
 # @SevCapability:
 #
-# The struct describes capability for a Secure Encrypted Virtualization
-# feature.
+# The struct describes capability for a Secure Encrypted
+# Virtualization feature.
 #
-# @pdh:  Platform Diffie-Hellman key (base64 encoded)
+# @pdh: Platform Diffie-Hellman key (base64 encoded)
 #
-# @cert-chain:  PDH certificate chain (base64 encoded)
+# @cert-chain: PDH certificate chain (base64 encoded)
+#
+# @cpu0-id: Unique ID of CPU0 (base64 encoded) (since 7.1)
 #
 # @cbitpos: C-bit location in page table entry
 #
-# @reduced-phys-bits: Number of physical Address bit reduction when SEV is
-#                     enabled
+# @reduced-phys-bits: Number of physical Address bit reduction when
+#     SEV is enabled
 #
 # Since: 2.12
 ##
 { 'struct': 'SevCapability',
   'data': { 'pdh': 'str',
             'cert-chain': 'str',
+            'cpu0-id': 'str',
             'cbitpos': 'int',
             'reduced-phys-bits': 'int'},
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
 
 ##
 # @query-sev-capabilities:
 #
-# This command is used to get the SEV capabilities, and is supported on AMD
-# X86 platforms only.
+# This command is used to get the SEV capabilities, and is supported
+# on AMD X86 platforms only.
 #
 # Returns: SevCapability objects.
 #
 #
 # -> { "execute": "query-sev-capabilities" }
 # <- { "return": { "pdh": "8CCDD8DDD", "cert-chain": "888CCCDDDEE",
-#                  "cbitpos": 47, "reduced-phys-bits": 5}}
-#
+#                  "cpu0-id": "2lvmGwo+...61iEinw==",
+#                  "cbitpos": 47, "reduced-phys-bits": 1}}
 ##
 { 'command': 'query-sev-capabilities', 'returns': 'SevCapability',
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
+
+##
+# @sev-inject-launch-secret:
+#
+# This command injects a secret blob into memory of SEV guest.
+#
+# @packet-header: the launch secret packet header encoded in base64
+#
+# @secret: the launch secret data to be injected encoded in base64
+#
+# @gpa: the guest physical address where secret will be injected.
+#
+# Since: 6.0
+##
+{ 'command': 'sev-inject-launch-secret',
+  'data': { 'packet-header': 'str', 'secret': 'str', '*gpa': 'uint64' },
+  'if': 'TARGET_I386' }
+
+##
+# @SevAttestationReport:
+#
+# The struct describes attestation report for a Secure Encrypted
+# Virtualization feature.
+#
+# @data: guest attestation report (base64 encoded)
+#
+# Since: 6.1
+##
+{ 'struct': 'SevAttestationReport',
+  'data': { 'data': 'str'},
+  'if': 'TARGET_I386' }
+
+##
+# @query-sev-attestation-report:
+#
+# This command is used to get the SEV attestation report, and is
+# supported on AMD X86 platforms only.
+#
+# @mnonce: a random 16 bytes value encoded in base64 (it will be
+#     included in report)
+#
+# Returns: SevAttestationReport objects.
+#
+# Since: 6.1
+#
+# Example:
+#
+# -> { "execute" : "query-sev-attestation-report",
+#                  "arguments": { "mnonce": "aaaaaaa" } }
+# <- { "return" : { "data": "aaaaaaaabbbddddd"} }
+##
+{ 'command': 'query-sev-attestation-report',
+  'data': { 'mnonce': 'str' },
+  'returns': 'SevAttestationReport',
+  'if': 'TARGET_I386' }
 
 ##
 # @dump-skeys:
 # -> { "execute": "dump-skeys",
 #      "arguments": { "filename": "/tmp/skeys" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'dump-skeys',
   'data': { 'filename': 'str' },
-  'if': 'defined(TARGET_S390X)' }
+  'if': 'TARGET_S390X' }
 
 ##
 # @GICCapability:
 #
 # The struct describes capability for a specific GIC (Generic
-# Interrupt Controller) version. These bits are not only decided by
-# QEMU/KVM software version, but also decided by the hardware that
-# the program is running upon.
+# Interrupt Controller) version.  These bits are not only decided by
+# QEMU/KVM software version, but also decided by the hardware that the
+# program is running upon.
 #
-# @version: version of GIC to be described. Currently, only 2 and 3
-#           are supported.
+# @version: version of GIC to be described.  Currently, only 2 and 3
+#     are supported.
 #
 # @emulated: whether current QEMU/hardware supports emulated GIC
-#            device in user space.
+#     device in user space.
 #
-# @kernel: whether current QEMU/hardware supports hardware
-#          accelerated GIC device in kernel.
+# @kernel: whether current QEMU/hardware supports hardware accelerated
+#     GIC device in kernel.
 #
 # Since: 2.6
 ##
   'data': { 'version': 'int',
             'emulated': 'bool',
             'kernel': 'bool' },
-  'if': 'defined(TARGET_ARM)' }
+  'if': 'TARGET_ARM' }
 
 ##
 # @query-gic-capabilities:
 #
-# This command is ARM-only. It will return a list of GICCapability
+# This command is ARM-only.  It will return a list of GICCapability
 # objects that describe its capability bits.
 #
 # Returns: a list of GICCapability objects.
 # -> { "execute": "query-gic-capabilities" }
 # <- { "return": [{ "version": 2, "emulated": true, "kernel": false },
 #                 { "version": 3, "emulated": false, "kernel": true } ] }
-#
 ##
 { 'command': 'query-gic-capabilities', 'returns': ['GICCapability'],
-  'if': 'defined(TARGET_ARM)' }
+  'if': 'TARGET_ARM' }
+
+##
+# @SGXEPCSection:
+#
+# Information about intel SGX EPC section info
+#
+# @node: the numa node
+#
+# @size: the size of EPC section
+#
+# Since: 7.0
+##
+{ 'struct': 'SGXEPCSection',
+  'data': { 'node': 'int',
+            'size': 'uint64'}}
+
+##
+# @SGXInfo:
+#
+# Information about intel Safe Guard eXtension (SGX) support
+#
+# @sgx: true if SGX is supported
+#
+# @sgx1: true if SGX1 is supported
+#
+# @sgx2: true if SGX2 is supported
+#
+# @flc: true if FLC is supported
+#
+# @sections: The EPC sections info for guest (Since: 7.0)
+#
+# Since: 6.2
+##
+{ 'struct': 'SGXInfo',
+  'data': { 'sgx': 'bool',
+            'sgx1': 'bool',
+            'sgx2': 'bool',
+            'flc': 'bool',
+            'sections': ['SGXEPCSection']},
+   'if': 'TARGET_I386' }
+
+##
+# @query-sgx:
+#
+# Returns information about SGX
+#
+# Returns: @SGXInfo
+#
+# Since: 6.2
+#
+# Example:
+#
+# -> { "execute": "query-sgx" }
+# <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true,
+#                  "flc": true,
+#                  "sections": [{"node": 0, "size": 67108864},
+#                  {"node": 1, "size": 29360128}]} }
+##
+{ 'command': 'query-sgx', 'returns': 'SGXInfo', 'if': 'TARGET_I386' }
+
+##
+# @query-sgx-capabilities:
+#
+# Returns information from host SGX capabilities
+#
+# Returns: @SGXInfo
+#
+# Since: 6.2
+#
+# Example:
+#
+# -> { "execute": "query-sgx-capabilities" }
+# <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true,
+#                  "flc": true,
+#                  "section" : [{"node": 0, "size": 67108864},
+#                  {"node": 1, "size": 29360128}]} }
+##
+{ 'command': 'query-sgx-capabilities', 'returns': 'SGXInfo', 'if': 'TARGET_I386' }
+
+
+##
+# @EvtchnPortType:
+#
+# An enumeration of Xen event channel port types.
+#
+# @closed: The port is unused.
+#
+# @unbound: The port is allocated and ready to be bound.
+#
+# @interdomain: The port is connected as an interdomain interrupt.
+#
+# @pirq: The port is bound to a physical IRQ (PIRQ).
+#
+# @virq: The port is bound to a virtual IRQ (VIRQ).
+#
+# @ipi: The post is an inter-processor interrupt (IPI).
+#
+# Since: 8.0
+##
+{ 'enum': 'EvtchnPortType',
+  'data': ['closed', 'unbound', 'interdomain', 'pirq', 'virq', 'ipi'],
+  'if': 'TARGET_I386' }
+
+##
+# @EvtchnInfo:
+#
+# Information about a Xen event channel port
+#
+# @port: the port number
+#
+# @vcpu: target vCPU for this port
+#
+# @type: the port type
+#
+# @remote-domain: remote domain for interdomain ports
+#
+# @target: remote port ID, or virq/pirq number
+#
+# @pending: port is currently active pending delivery
+#
+# @masked: port is masked
+#
+# Since: 8.0
+##
+{ 'struct': 'EvtchnInfo',
+  'data': {'port': 'uint16',
+           'vcpu': 'uint32',
+           'type': 'EvtchnPortType',
+           'remote-domain': 'str',
+           'target': 'uint16',
+           'pending': 'bool',
+           'masked': 'bool'},
+  'if': 'TARGET_I386' }
+
+
+##
+# @xen-event-list:
+#
+# Query the Xen event channels opened by the guest.
+#
+# Returns: list of open event channel ports.
+#
+# Since: 8.0
+#
+# Example:
+#
+# -> { "execute": "xen-event-list" }
+# <- { "return": [
+#         {
+#             "pending": false,
+#             "port": 1,
+#             "vcpu": 1,
+#             "remote-domain": "qemu",
+#             "masked": false,
+#             "type": "interdomain",
+#             "target": 1
+#         },
+#         {
+#             "pending": false,
+#             "port": 2,
+#             "vcpu": 0,
+#             "remote-domain": "",
+#             "masked": false,
+#             "type": "virq",
+#             "target": 0
+#         }
+#      ]
+#    }
+##
+{ 'command': 'xen-event-list',
+  'returns': ['EvtchnInfo'],
+  'if': 'TARGET_I386' }
+
+##
+# @xen-event-inject:
+#
+# Inject a Xen event channel port (interrupt) to the guest.
+#
+# @port: The port number
+#
+# Returns:
+# - Nothing on success.
+#
+# Since: 8.0
+#
+# Example:
+#
+# -> { "execute": "xen-event-inject", "arguments": { "port": 1 } }
+# <- { "return": { } }
+##
+{ 'command': 'xen-event-inject',
+  'data': { 'port': 'uint32' },
+  'if': 'TARGET_I386' }
index 40df513856e7b8c00b68c1ead2c04272e471a27e..cda2effa8155a8a6175036f678877ec2c7cdd1bf 100644 (file)
 ##
 # @add_client:
 #
-# Allow client connections for VNC, Spice and socket based
-# character devices to be passed in to QEMU via SCM_RIGHTS.
+# Allow client connections for VNC, Spice and socket based character
+# devices to be passed in to QEMU via SCM_RIGHTS.
 #
-# @protocol: protocol name. Valid names are "vnc", "spice" or the
-#            name of a character device (eg. from -chardev id=XXXX)
+# If the FD associated with @fdname is not a socket, the command will
+# fail and the FD will be closed.
+#
+# @protocol: protocol name.  Valid names are "vnc", "spice",
+#     "@dbus-display" or the name of a character device (e.g. from
+#     -chardev id=XXXX)
 #
 # @fdname: file descriptor name previously passed via 'getfd' command
 #
-# @skipauth: whether to skip authentication. Only applies
-#            to "vnc" and "spice" protocols
+# @skipauth: whether to skip authentication.  Only applies to "vnc"
+#     and "spice" protocols
 #
-# @tls: whether to perform TLS. Only applies to the "spice"
-#       protocol
+# @tls: whether to perform TLS. Only applies to the "spice" protocol
 #
 # Returns: nothing on success.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "add_client", "arguments": { "protocol": "vnc",
 #                                              "fdname": "myclient" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'add_client',
   'data': { 'protocol': 'str', 'fdname': 'str', '*skipauth': 'bool',
@@ -47,7 +49,7 @@
 #
 # @name: The name of the guest
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'NameInfo', 'data': {'*name': 'str'} }
 
 #
 # Returns: @NameInfo of the guest
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "query-name" }
 # <- { "return": { "name": "qemu-name" } }
-#
 ##
 { 'command': 'query-name', 'returns': 'NameInfo', 'allow-preconfig': true }
 
 #
 # @thread-id: ID of the underlying host thread
 #
-# @poll-max-ns: maximum polling time in ns, 0 means polling is disabled
-#               (since 2.9)
+# @poll-max-ns: maximum polling time in ns, 0 means polling is
+#     disabled (since 2.9)
+#
+# @poll-grow: how many ns will be added to polling time, 0 means that
+#     it's not configured (since 2.9)
 #
-# @poll-grow: how many ns will be added to polling time, 0 means that it's not
-#             configured (since 2.9)
+# @poll-shrink: how many ns will be removed from polling time, 0 means
+#     that it's not configured (since 2.9)
 #
-# @poll-shrink: how many ns will be removed from polling time, 0 means that
-#               it's not configured (since 2.9)
+# @aio-max-batch: maximum number of requests in a batch for the AIO
+#     engine, 0 means that the engine will use its default (since 6.1)
 #
 # Since: 2.0
 ##
            'thread-id': 'int',
            'poll-max-ns': 'int',
            'poll-grow': 'int',
-           'poll-shrink': 'int' } }
+           'poll-shrink': 'int',
+           'aio-max-batch': 'int' } }
 
 ##
 # @query-iothreads:
 #
 # Returns a list of information about each iothread.
 #
-# Note: this list excludes the QEMU main loop thread, which is not declared
-#       using the -object iothread command-line option.  It is always the main thread
-#       of the process.
+# Note: this list excludes the QEMU main loop thread, which is not
+#     declared using the -object iothread command-line option.  It is
+#     always the main thread of the process.
 #
 # Returns: a list of @IOThreadInfo for each iothread
 #
 #          }
 #       ]
 #    }
-#
 ##
 { 'command': 'query-iothreads', 'returns': ['IOThreadInfo'],
   'allow-preconfig': true }
 #
 # Stop all guest VCPU execution.
 #
-# Since:  0.14.0
+# Since: 0.14
 #
-# Notes: This function will succeed even if the guest is already in the stopped
-#        state.  In "inmigrate" state, it will ensure that the guest
-#        remains paused once migration finishes, as if the -S option was
-#        passed on the command line.
+# Notes: This function will succeed even if the guest is already in
+#     the stopped state.  In "inmigrate" state, it will ensure that
+#     the guest remains paused once migration finishes, as if the -S
+#     option was passed on the command line.
 #
 # Example:
 #
 # -> { "execute": "stop" }
 # <- { "return": {} }
-#
 ##
 { 'command': 'stop' }
 
 #
 # Resume guest VCPU execution.
 #
-# Since:  0.14.0
+# Since: 0.14
 #
-# Returns:  If successful, nothing
+# Returns: If successful, nothing
 #
-# Notes: This command will succeed if the guest is currently running.  It
-#        will also succeed if the guest is in the "inmigrate" state; in
-#        this case, the effect of the command is to make sure the guest
-#        starts once migration finishes, removing the effect of the -S
-#        command line option if it was passed.
+# Notes: This command will succeed if the guest is currently running.
+#     It will also succeed if the guest is in the "inmigrate" state;
+#     in this case, the effect of the command is to make sure the
+#     guest starts once migration finishes, removing the effect of the
+#     -S command line option if it was passed.
 #
 # Example:
 #
 # -> { "execute": "cont" }
 # <- { "return": {} }
-#
 ##
 { 'command': 'cont' }
 
 #
 # Exit from "preconfig" state
 #
-# This command makes QEMU exit the preconfig state and proceed with
-# VM initialization using configuration data provided on the command line
-# and via the QMP monitor during the preconfig state. The command is only
-# available during the preconfig state (i.e. when the --preconfig command
-# line option was in use).
+# This command makes QEMU exit the preconfig state and proceed with VM
+# initialization using configuration data provided on the command line
+# and via the QMP monitor during the preconfig state.  The command is
+# only available during the preconfig state (i.e. when the --preconfig
+# command line option was in use).
 #
-# Since 3.0
+# Features:
+#
+# @unstable: This command is experimental.
+#
+# Since: 3.0
 #
 # Returns: nothing
 #
 #
 # -> { "execute": "x-exit-preconfig" }
 # <- { "return": {} }
-#
 ##
-{ 'command': 'x-exit-preconfig', 'allow-preconfig': true }
+{ 'command': 'x-exit-preconfig', 'allow-preconfig': true,
+  'features': [ 'unstable' ] }
 
 ##
 # @human-monitor-command:
 # @cpu-index: The CPU to use for commands that require an implicit CPU
 #
 # Features:
+#
 # @savevm-monitor-nodes: If present, HMP command savevm only snapshots
-#                        monitor-owned nodes if they have no parents.
-#                        This allows the use of 'savevm' with
-#                        -blockdev. (since 4.2)
+#     monitor-owned nodes if they have no parents.  This allows the
+#     use of 'savevm' with -blockdev.  (since 4.2)
 #
 # Returns: the output of the command as a string
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Notes: This command only exists as a stop-gap.  Its use is highly
-#        discouraged.  The semantics of this command are not
-#        guaranteed: this means that command names, arguments and
-#        responses can change or be removed at ANY time.  Applications
-#        that rely on long term stability guarantees should NOT
-#        use this command.
+#     discouraged.  The semantics of this command are not guaranteed:
+#     this means that command names, arguments and responses can
+#     change or be removed at ANY time.  Applications that rely on
+#     long term stability guarantees should NOT use this command.
 #
-#        Known limitations:
+#     Known limitations:
 #
-#        * This command is stateless, this means that commands that depend
-#          on state information (such as getfd) might not work
+#     * This command is stateless, this means that commands that
+#       depend on state information (such as getfd) might not work
 #
-#        * Commands that prompt the user for data don't currently work
+#     * Commands that prompt the user for data don't currently work
 #
 # Example:
 #
 # -> { "execute": "human-monitor-command",
 #      "arguments": { "command-line": "info kvm" } }
 # <- { "return": "kvm support: enabled\r\n" }
-#
 ##
 { 'command': 'human-monitor-command',
   'data': {'command-line': 'str', '*cpu-index': 'int'},
   'features': [ 'savevm-monitor-nodes' ] }
 
 ##
-# @change:
-#
-# This command is multiple commands multiplexed together.
+# @getfd:
 #
-# @device: This is normally the name of a block device but it may also be 'vnc'.
-#          when it's 'vnc', then sub command depends on @target
+# Receive a file descriptor via SCM rights and assign it a name
 #
-# @target: If @device is a block device, then this is the new filename.
-#          If @device is 'vnc', then if the value 'password' selects the vnc
-#          change password command.   Otherwise, this specifies a new server URI
-#          address to listen to for VNC connections.
+# @fdname: file descriptor name
 #
-# @arg: If @device is a block device, then this is an optional format to open
-#       the device with.
-#       If @device is 'vnc' and @target is 'password', this is the new VNC
-#       password to set.  See change-vnc-password for additional notes.
+# Returns: Nothing on success
 #
-# Features:
-# @deprecated: This command is deprecated.  For changing block
-#              devices, use 'blockdev-change-medium' instead; for changing VNC
-#              parameters, use 'change-vnc-password' instead.
+# Since: 0.14
 #
-# Returns: - Nothing on success.
-#          - If @device is not a valid block device, DeviceNotFound
+# Notes: If @fdname already exists, the file descriptor assigned to it
+#     will be closed and replaced by the received file descriptor.
 #
-# Since: 0.14.0
+#     The 'closefd' command can be used to explicitly close the file
+#     descriptor when it is no longer needed.
 #
 # Example:
 #
-# 1. Change a removable medium
-#
-# -> { "execute": "change",
-#      "arguments": { "device": "ide1-cd0",
-#                     "target": "/srv/images/Fedora-12-x86_64-DVD.iso" } }
-# <- { "return": {} }
-#
-# 2. Change VNC password
-#
-# -> { "execute": "change",
-#      "arguments": { "device": "vnc", "target": "password",
-#                     "arg": "foobar1" } }
+# -> { "execute": "getfd", "arguments": { "fdname": "fd1" } }
 # <- { "return": {} }
-#
 ##
-{ 'command': 'change',
-  'data': {'device': 'str', 'target': 'str', '*arg': 'str'},
-  'features': [ 'deprecated' ] }
+{ 'command': 'getfd', 'data': {'fdname': 'str'}, 'if': 'CONFIG_POSIX' }
 
 ##
-# @getfd:
+# @get-win32-socket:
 #
-# Receive a file descriptor via SCM rights and assign it a name
+# Add a socket that was duplicated to QEMU process with
+# WSADuplicateSocketW() via WSASocket() & WSAPROTOCOL_INFOW structure
+# and assign it a name (the SOCKET is associated with a CRT file
+# descriptor)
+#
+# @info: the WSAPROTOCOL_INFOW structure (encoded in base64)
 #
 # @fdname: file descriptor name
 #
 # Returns: Nothing on success
 #
-# Since: 0.14.0
+# Since: 8.0
 #
-# Notes: If @fdname already exists, the file descriptor assigned to
-#        it will be closed and replaced by the received file
-#        descriptor.
+# Notes: If @fdname already exists, the file descriptor assigned to it
+#     will be closed and replaced by the received file descriptor.
 #
-#        The 'closefd' command can be used to explicitly close the
-#        file descriptor when it is no longer needed.
+#     The 'closefd' command can be used to explicitly close the file
+#     descriptor when it is no longer needed.
 #
 # Example:
 #
-# -> { "execute": "getfd", "arguments": { "fdname": "fd1" } }
+# -> { "execute": "get-win32-socket", "arguments": { "info": "abcd123..", fdname": "skclient" } }
 # <- { "return": {} }
-#
 ##
-{ 'command': 'getfd', 'data': {'fdname': 'str'} }
+{ 'command': 'get-win32-socket', 'data': {'info': 'str', 'fdname': 'str'}, 'if': 'CONFIG_WIN32' }
 
 ##
 # @closefd:
 #
 # Returns: Nothing on success
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "closefd", "arguments": { "fdname": "fd1" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'closefd', 'data': {'fdname': 'str'} }
 
 #
 # @fdset-id: The ID of the fd set that @fd was added to.
 #
-# @fd: The file descriptor that was received via SCM rights and
-#      added to the fd set.
+# @fd: The file descriptor that was received via SCM rights and added
+#     to the fd set.
 #
-# Since: 1.2.0
+# Since: 1.2
 ##
 { 'struct': 'AddfdInfo', 'data': {'fdset-id': 'int', 'fd': 'int'} }
 
 #
 # @opaque: A free-form string that can be used to describe the fd.
 #
-# Returns: - @AddfdInfo on success
-#          - If file descriptor was not received, FdNotSupplied
-#          - If @fdset-id is a negative value, InvalidParameterValue
+# Returns:
+# - @AddfdInfo on success
+# - If file descriptor was not received, GenericError
+# - If @fdset-id is a negative value, GenericError
 #
 # Notes: The list of fd sets is shared by all monitor connections.
 #
-#        If @fdset-id is not specified, a new fd set will be created.
+# If @fdset-id is not specified, a new fd set will be created.
 #
-# Since: 1.2.0
+# Since: 1.2
 #
 # Example:
 #
 # -> { "execute": "add-fd", "arguments": { "fdset-id": 1 } }
 # <- { "return": { "fdset-id": 1, "fd": 3 } }
-#
 ##
 { 'command': 'add-fd',
   'data': { '*fdset-id': 'int',
 #
 # @fd: The file descriptor that is to be removed.
 #
-# Returns: - Nothing on success
-#          - If @fdset-id or @fd is not found, FdNotFound
+# Returns:
+# - Nothing on success
+# - If @fdset-id or @fd is not found, GenericError
 #
-# Since: 1.2.0
+# Since: 1.2
 #
 # Notes: The list of fd sets is shared by all monitor connections.
 #
-#        If @fd is not specified, all file descriptors in @fdset-id
-#        will be removed.
+# If @fd is not specified, all file descriptors in @fdset-id will be
+# removed.
 #
 # Example:
 #
 # -> { "execute": "remove-fd", "arguments": { "fdset-id": 1, "fd": 3 } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'remove-fd', 'data': {'fdset-id': 'int', '*fd': 'int'} }
 
 #
 # @opaque: A free-form string that can be used to describe the fd.
 #
-# Since: 1.2.0
+# Since: 1.2
 ##
 { 'struct': 'FdsetFdInfo',
   'data': {'fd': 'int', '*opaque': 'str'} }
 #
 # @fds: A list of file descriptors that belong to this fd set.
 #
-# Since: 1.2.0
+# Since: 1.2
 ##
 { 'struct': 'FdsetInfo',
   'data': {'fdset-id': 'int', 'fds': ['FdsetFdInfo']} }
 #
 # Returns: A list of @FdsetInfo
 #
-# Since: 1.2.0
+# Since: 1.2
 #
 # Note: The list of fd sets is shared by all monitor connections.
 #
 #        }
 #      ]
 #    }
-#
 ##
 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
 
 # @number: accepts a number
 #
 # @size: accepts a number followed by an optional suffix (K)ilo,
-#        (M)ega, (G)iga, (T)era
+#     (M)ega, (G)iga, (T)era
 #
 # Since: 1.5
 ##
 ##
 # @CommandLineOptionInfo:
 #
-# Details about a command line option, including its list of parameter details
+# Details about a command line option, including its list of parameter
+# details
 #
 # @option: option name
 #
 #
 # @option: option name
 #
-# Returns: list of @CommandLineOptionInfo for all options (or for the given
-#          @option).  Returns an error if the given @option doesn't exist.
+# Returns: list of @CommandLineOptionInfo for all options (or for the
+#     given @option).  Returns an error if the given @option doesn't
+#     exist.
 #
 # Since: 1.5
 #
 #         }
 #      ]
 #    }
-#
 ##
 {'command': 'query-command-line-options',
- 'data': { '*option': 'str' },
+ 'data': {'*option': 'str'},
  'returns': ['CommandLineOptionInfo'],
- 'allow-preconfig': true }
+ 'allow-preconfig': true}
+
+##
+# @RTC_CHANGE:
+#
+# Emitted when the guest changes the RTC time.
+#
+# @offset: offset in seconds between base RTC clock (as specified by
+#     -rtc base), and new RTC clock value
+#
+# @qom-path: path to the RTC object in the QOM tree
+#
+# Note: This event is rate-limited.  It is not guaranteed that the RTC
+#     in the system implements this event, or even that the system has
+#     an RTC at all.
+#
+# Since: 0.13
+#
+# Example:
+#
+# <- { "event": "RTC_CHANGE",
+#      "data": { "offset": 78 },
+#      "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+##
+{ 'event': 'RTC_CHANGE',
+  'data': { 'offset': 'int', 'qom-path': 'str' } }
+
+##
+# @VFU_CLIENT_HANGUP:
+#
+# Emitted when the client of a TYPE_VFIO_USER_SERVER closes the
+# communication channel
+#
+# @vfu-id: ID of the TYPE_VFIO_USER_SERVER object.  It is the last
+#     component of @vfu-qom-path referenced below
+#
+# @vfu-qom-path: path to the TYPE_VFIO_USER_SERVER object in the QOM
+#     tree
+#
+# @dev-id: ID of attached PCI device
+#
+# @dev-qom-path: path to attached PCI device in the QOM tree
+#
+# Since: 7.1
+#
+# Example:
+#
+# <- { "event": "VFU_CLIENT_HANGUP",
+#      "data": { "vfu-id": "vfu1",
+#                "vfu-qom-path": "/objects/vfu1",
+#                "dev-id": "sas1",
+#                "dev-qom-path": "/machine/peripheral/sas1" },
+#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+##
+{ 'event': 'VFU_CLIENT_HANGUP',
+  'data': { 'vfu-id': 'str', 'vfu-qom-path': 'str',
+            'dev-id': 'str', 'dev-qom-path': 'str' } }
index a3a13360018d477d736e6bc05377f7bf92ff6ad3..8095b68fa831f0f310b515903c9fbe64b028d741 100644 (file)
@@ -7,6 +7,7 @@
 ##
 
 { 'include': 'common.json' }
+{ 'include': 'sockets.json' }
 
 ##
 # @set_link:
 #
 # @up: true to set the link status to be up
 #
-# Returns: Nothing on success
-#          If @name is not a valid network device, DeviceNotFound
+# Returns: Nothing on success If @name is not a valid network device,
+#     DeviceNotFound
 #
-# Since: 0.14.0
+# Since: 0.14
 #
-# Notes: Not all network adapters support setting link status.  This command
-#        will succeed even if the network adapter does not support link status
-#        notification.
+# Notes: Not all network adapters support setting link status.  This
+#     command will succeed even if the network adapter does not
+#     support link status notification.
 #
 # Example:
 #
 # -> { "execute": "set_link",
 #      "arguments": { "name": "e1000.0", "up": false } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'set_link', 'data': {'name': 'str', 'up': 'bool'} }
 
 #
 # Additional arguments depend on the type.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
-# Returns: Nothing on success
-#          If @type is not a valid network backend, DeviceNotFound
+# Returns: Nothing on success If @type is not a valid network backend,
+#     DeviceNotFound
 #
 # Example:
 #
 # -> { "execute": "netdev_add",
 #      "arguments": { "type": "user", "id": "netdev1",
-#                     "dnssearch": "example.org" } }
+#                     "dnssearch": [ { "str": "example.org" } ] } }
 # <- { "return": {} }
-#
 ##
-{ 'command': 'netdev_add', 'data': 'Netdev', 'boxed': true }
+{ 'command': 'netdev_add', 'data': 'Netdev', 'boxed': true,
+  'allow-preconfig': true }
 
 ##
 # @netdev_del:
 #
 # @id: the name of the network backend to remove
 #
-# Returns: Nothing on success
-#          If @id is not a valid network backend, DeviceNotFound
+# Returns: Nothing on success If @id is not a valid network backend,
+#     DeviceNotFound
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "netdev_del", "arguments": { "id": "netdev1" } }
 # <- { "return": {} }
-#
 ##
-{ 'command': 'netdev_del', 'data': {'id': 'str'} }
+{ 'command': 'netdev_del', 'data': {'id': 'str'},
+  'allow-preconfig': true }
 
 ##
 # @NetLegacyNicOptions:
 ##
 # @NetdevUserOptions:
 #
-# Use the user mode network stack which requires no administrator privilege to
-# run.
+# Use the user mode network stack which requires no administrator
+# privilege to run.
 #
 # @hostname: client hostname reported by the builtin DHCP server
 #
 # @restrict: isolate the guest from the host
 #
-# @ipv4: whether to support IPv4, default true for enabled
-#        (since 2.6)
+# @ipv4: whether to support IPv4, default true for enabled (since 2.6)
 #
-# @ipv6: whether to support IPv6, default true for enabled
-#        (since 2.6)
+# @ipv6: whether to support IPv6, default true for enabled (since 2.6)
 #
 # @ip: legacy parameter, use net= instead
 #
-# @net: IP network address that the guest will see, in the
-#       form addr[/netmask] The netmask is optional, and can be
-#       either in the form a.b.c.d or as a number of valid top-most
-#       bits. Default is 10.0.2.0/24.
+# @net: IP network address that the guest will see, in the form
+#     addr[/netmask] The netmask is optional, and can be either in the
+#     form a.b.c.d or as a number of valid top-most bits.  Default is
+#     10.0.2.0/24.
 #
 # @host: guest-visible address of the host
 #
 # @bootfile: BOOTP filename, for use with tftp=
 #
 # @dhcpstart: the first of the 16 IPs the built-in DHCP server can
-#             assign
+#     assign
 #
 # @dns: guest-visible address of the virtual nameserver
 #
-# @dnssearch: list of DNS suffixes to search, passed as DHCP option
-#             to the guest
+# @dnssearch: list of DNS suffixes to search, passed as DHCP option to
+#     the guest
 #
 # @domainname: guest-visible domain name of the virtual nameserver
-#              (since 3.0)
+#     (since 3.0)
 #
-# @ipv6-prefix: IPv6 network prefix (default is fec0::) (since
-#               2.6). The network prefix is given in the usual
-#               hexadecimal IPv6 address notation.
+# @ipv6-prefix: IPv6 network prefix (default is fec0::) (since 2.6).
+#     The network prefix is given in the usual hexadecimal IPv6
+#     address notation.
 #
-# @ipv6-prefixlen: IPv6 network prefix length (default is 64)
-#                  (since 2.6)
+# @ipv6-prefixlen: IPv6 network prefix length (default is 64) (since
+#     2.6)
 #
 # @ipv6-host: guest-visible IPv6 address of the host (since 2.6)
 #
-# @ipv6-dns: guest-visible IPv6 address of the virtual
-#            nameserver (since 2.6)
+# @ipv6-dns: guest-visible IPv6 address of the virtual nameserver
+#     (since 2.6)
 #
 # @smb: root directory of the built-in SMB server
 #
 # @smbserver: IP address of the built-in SMB server
 #
 # @hostfwd: redirect incoming TCP or UDP host connections to guest
-#           endpoints
+#     endpoints
 #
 # @guestfwd: forward guest TCP connections
 #
 # @fd: file descriptor of an already opened tap
 #
 # @fds: multiple file descriptors of already opened multiqueue capable
-#       tap
+#     tap
 #
 # @script: script to initialize the interface
 #
 #
 # @helper: command to execute to configure bridge
 #
-# @sndbuf: send buffer limit. Understands [TGMKkb] suffixes.
+# @sndbuf: send buffer limit.  Understands [TGMKkb] suffixes.
 #
 # @vnet_hdr: enable the IFF_VNET_HDR flag on the tap interface
 #
 # @vhostfd: file descriptor of an already opened vhost net device
 #
 # @vhostfds: file descriptors of multiple already opened vhost net
-#            devices
+#     devices
 #
 # @vhostforce: vhost on for non-MSIX virtio guests
 #
 # @queues: number of queues to be created for multiqueue capable tap
 #
-# @poll-us: maximum number of microseconds that could
-#           be spent on busy polling for tap (since 2.7)
+# @poll-us: maximum number of microseconds that could be spent on busy
+#     polling for tap (since 2.7)
 #
 # Since: 1.2
 ##
 #
 # @udp: use the udp version of l2tpv3 encapsulation
 #
-# @cookie64: use 64 bit coookies
+# @cookie64: use 64 bit cookies
 #
 # @counter: have sequence counter
 #
-# @pincounter: pin sequence counter to zero -
-#              workaround for buggy implementations or
-#              networks with packet reorder
+# @pincounter: pin sequence counter to zero - workaround for buggy
+#     implementations or networks with packet reorder
 #
 # @txcookie: 32 or 64 bit transmit cookie
 #
 #
 # @txsession: 32 bit transmit session
 #
-# @rxsession: 32 bit receive session - if not specified
-#             set to the same value as transmit
+# @rxsession: 32 bit receive session - if not specified set to the
+#     same value as transmit
 #
-# @offset: additional offset - allows the insertion of
-#          additional application-specific data before the packet payload
+# @offset: additional offset - allows the insertion of additional
+#     application-specific data before the packet payload
 #
 # Since: 2.1
 ##
 # Connect two or more net clients through a software hub.
 #
 # @hubid: hub identifier number
-# @netdev: used to connect hub to a netdev instead of a device (since 2.12)
+#
+# @netdev: used to connect hub to a netdev instead of a device (since
+#     2.12)
 #
 # Since: 1.2
 ##
 #
 # Connect a client to a netmap-enabled NIC or to a VALE switch port
 #
-# @ifname: Either the name of an existing network interface supported by
-#          netmap, or the name of a VALE port (created on the fly).
-#          A VALE port name is in the form 'valeXXX:YYY', where XXX and
-#          YYY are non-negative integers. XXX identifies a switch and
-#          YYY identifies a port of the switch. VALE ports having the
-#          same XXX are therefore connected to the same switch.
+# @ifname: Either the name of an existing network interface supported
+#     by netmap, or the name of a VALE port (created on the fly). A
+#     VALE port name is in the form 'valeXXX:YYY', where XXX and YYY
+#     are non-negative integers.  XXX identifies a switch and YYY
+#     identifies a port of the switch.  VALE ports having the same XXX
+#     are therefore connected to the same switch.
 #
 # @devname: path of the netmap device (default: '/dev/netmap').
 #
     'ifname':     'str',
     '*devname':    'str' } }
 
+##
+# @AFXDPMode:
+#
+# Attach mode for a default XDP program
+#
+# @skb: generic mode, no driver support necessary
+#
+# @native: DRV mode, program is attached to a driver, packets are passed to
+#     the socket without allocation of skb.
+#
+# Since: 8.2
+##
+{ 'enum': 'AFXDPMode',
+  'data': [ 'native', 'skb' ],
+  'if': 'CONFIG_AF_XDP' }
+
+##
+# @NetdevAFXDPOptions:
+#
+# AF_XDP network backend
+#
+# @ifname: The name of an existing network interface.
+#
+# @mode: Attach mode for a default XDP program.  If not specified, then
+#     'native' will be tried first, then 'skb'.
+#
+# @force-copy: Force XDP copy mode even if device supports zero-copy.
+#     (default: false)
+#
+# @queues: number of queues to be used for multiqueue interfaces (default: 1).
+#
+# @start-queue: Use @queues starting from this queue number (default: 0).
+#
+# @inhibit: Don't load a default XDP program, use one already loaded to
+#     the interface (default: false).  Requires @sock-fds.
+#
+# @sock-fds: A colon (:) separated list of file descriptors for already open
+#     but not bound AF_XDP sockets in the queue order.  One fd per queue.
+#     These descriptors should already be added into XDP socket map for
+#     corresponding queues.  Requires @inhibit.
+#
+# Since: 8.2
+##
+{ 'struct': 'NetdevAFXDPOptions',
+  'data': {
+    'ifname':       'str',
+    '*mode':        'AFXDPMode',
+    '*force-copy':  'bool',
+    '*queues':      'int',
+    '*start-queue': 'int',
+    '*inhibit':     'bool',
+    '*sock-fds':    'str' },
+  'if': 'CONFIG_AF_XDP' }
+
 ##
 # @NetdevVhostUserOptions:
 #
 # @vhostforce: vhost on for non-MSIX virtio guests (default: false).
 #
 # @queues: number of queues to be created for multiqueue vhost-user
-#          (default: 1) (Since 2.5)
+#     (default: 1) (Since 2.5)
 #
 # Since: 2.1
 ##
 #
 # Vhost-vdpa network backend
 #
-# vDPA device is a device that uses a datapath which complies with the virtio
-# specifications with a vendor specific control path.
+# vDPA device is a device that uses a datapath which complies with the
+# virtio specifications with a vendor specific control path.
+#
+# @vhostdev: path of vhost-vdpa device (default:'/dev/vhost-vdpa-0')
 #
-# @vhostdev: path of vhost-vdpa device
-#            (default:'/dev/vhost-vdpa-0')
+# @vhostfd: file descriptor of an already opened vhost vdpa device
 #
 # @queues: number of queues to be created for multiqueue vhost-vdpa
-#          (default: 1)
+#     (default: 1)
+#
+# @x-svq: Start device with (experimental) shadow virtqueue.  (Since
+#     7.1) (default: false)
+#
+# Features:
+#
+# @unstable: Member @x-svq is experimental.
 #
 # Since: 5.1
 ##
 { 'struct': 'NetdevVhostVDPAOptions',
   'data': {
     '*vhostdev':     'str',
-    '*queues':       'int' } }
+    '*vhostfd':      'str',
+    '*queues':       'int',
+    '*x-svq':        {'type': 'bool', 'features' : [ 'unstable'] } } }
+
+##
+# @NetdevVmnetHostOptions:
+#
+# vmnet (host mode) network backend.
+#
+# Allows the vmnet interface to communicate with other vmnet
+# interfaces that are in host mode and also with the host.
+#
+# @start-address: The starting IPv4 address to use for the interface.
+#     Must be in the private IP range (RFC 1918). Must be specified
+#     along with @end-address and @subnet-mask.  This address is used
+#     as the gateway address.  The subsequent address up to and
+#     including end-address are placed in the DHCP pool.
+#
+# @end-address: The DHCP IPv4 range end address to use for the
+#     interface.  Must be in the private IP range (RFC 1918). Must be
+#     specified along with @start-address and @subnet-mask.
+#
+# @subnet-mask: The IPv4 subnet mask to use on the interface.  Must be
+#     specified along with @start-address and @subnet-mask.
+#
+# @isolated: Enable isolation for this interface.  Interface isolation
+#     ensures that vmnet interface is not able to communicate with any
+#     other vmnet interfaces.  Only communication with host is
+#     allowed.  Requires at least macOS Big Sur 11.0.
+#
+# @net-uuid: The identifier (UUID) to uniquely identify the isolated
+#     network vmnet interface should be added to.  If set, no DHCP
+#     service is provided for this interface and network communication
+#     is allowed only with other interfaces added to this network
+#     identified by the UUID. Requires at least macOS Big Sur 11.0.
+#
+# Since: 7.1
+##
+{ 'struct': 'NetdevVmnetHostOptions',
+  'data': {
+    '*start-address': 'str',
+    '*end-address':   'str',
+    '*subnet-mask':   'str',
+    '*isolated':      'bool',
+    '*net-uuid':      'str' },
+  'if': 'CONFIG_VMNET' }
+
+##
+# @NetdevVmnetSharedOptions:
+#
+# vmnet (shared mode) network backend.
+#
+# Allows traffic originating from the vmnet interface to reach the
+# Internet through a network address translator (NAT). The vmnet
+# interface can communicate with the host and with other shared mode
+# interfaces on the same subnet.  If no DHCP settings, subnet mask and
+# IPv6 prefix specified, the interface can communicate with any of
+# other interfaces in shared mode.
+#
+# @start-address: The starting IPv4 address to use for the interface.
+#     Must be in the private IP range (RFC 1918). Must be specified
+#     along with @end-address and @subnet-mask.  This address is used
+#     as the gateway address.  The subsequent address up to and
+#     including end-address are placed in the DHCP pool.
+#
+# @end-address: The DHCP IPv4 range end address to use for the
+#     interface.  Must be in the private IP range (RFC 1918). Must be
+#     specified along with @start-address and @subnet-mask.
+#
+# @subnet-mask: The IPv4 subnet mask to use on the interface.  Must be
+#     specified along with @start-address and @subnet-mask.
+#
+# @isolated: Enable isolation for this interface.  Interface isolation
+#     ensures that vmnet interface is not able to communicate with any
+#     other vmnet interfaces.  Only communication with host is
+#     allowed.  Requires at least macOS Big Sur 11.0.
+#
+# @nat66-prefix: The IPv6 prefix to use into guest network.  Must be a
+#     unique local address i.e. start with fd00::/8 and have length of
+#     64.
+#
+# Since: 7.1
+##
+{ 'struct': 'NetdevVmnetSharedOptions',
+  'data': {
+    '*start-address': 'str',
+    '*end-address':   'str',
+    '*subnet-mask':   'str',
+    '*isolated':      'bool',
+    '*nat66-prefix':  'str' },
+  'if': 'CONFIG_VMNET' }
+
+##
+# @NetdevVmnetBridgedOptions:
+#
+# vmnet (bridged mode) network backend.
+#
+# Bridges the vmnet interface with a physical network interface.
+#
+# @ifname: The name of the physical interface to be bridged.
+#
+# @isolated: Enable isolation for this interface.  Interface isolation
+#     ensures that vmnet interface is not able to communicate with any
+#     other vmnet interfaces.  Only communication with host is
+#     allowed.  Requires at least macOS Big Sur 11.0.
+#
+# Since: 7.1
+##
+{ 'struct': 'NetdevVmnetBridgedOptions',
+  'data': {
+    'ifname':     'str',
+    '*isolated':  'bool' },
+  'if': 'CONFIG_VMNET' }
+
+##
+# @NetdevStreamOptions:
+#
+# Configuration info for stream socket netdev
+#
+# @addr: socket address to listen on (server=true) or connect to
+#     (server=false)
+#
+# @server: create server socket (default: false)
+#
+# @reconnect: For a client socket, if a socket is disconnected, then
+#     attempt a reconnect after the given number of seconds.  Setting
+#     this to zero disables this function.  (default: 0) (since 8.0)
+#
+# Only SocketAddress types 'unix', 'inet' and 'fd' are supported.
+#
+# Since: 7.2
+##
+{ 'struct': 'NetdevStreamOptions',
+  'data': {
+    'addr':   'SocketAddress',
+    '*server': 'bool',
+    '*reconnect': 'uint32' } }
+
+##
+# @NetdevDgramOptions:
+#
+# Configuration info for datagram socket netdev.
+#
+# @remote: remote address
+#
+# @local: local address
+#
+# Only SocketAddress types 'unix', 'inet' and 'fd' are supported.
+#
+# If remote address is present and it's a multicast address, local
+# address is optional.  Otherwise local address is required and remote
+# address is optional.
+#
+# .. table:: Valid parameters combination table
+#    :widths: auto
+#
+#    =============  ========  =====
+#    remote         local     okay?
+#    =============  ========  =====
+#    absent         absent    no
+#    absent         not fd    no
+#    absent         fd        yes
+#    multicast      absent    yes
+#    multicast      present   yes
+#    not multicast  absent    no
+#    not multicast  present   yes
+#    =============  ========  =====
+#
+# Since: 7.2
+##
+{ 'struct': 'NetdevDgramOptions',
+  'data': {
+    '*local':  'SocketAddress',
+    '*remote': 'SocketAddress' } }
 
 ##
 # @NetClientDriver:
 #
 # Available netdev drivers.
 #
-# Since: 2.7
+# @l2tpv3: since 2.1
+# @vhost-vdpa: since 5.1
+# @vmnet-host: since 7.1
+# @vmnet-shared: since 7.1
+# @vmnet-bridged: since 7.1
+# @stream: since 7.2
+# @dgram: since 7.2
+# @af-xdp: since 8.2
 #
-#        @vhost-vdpa since 5.1
+# Since: 2.7
 ##
 { 'enum': 'NetClientDriver',
-  'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
-            'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa' ] }
+  'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'stream',
+            'dgram', 'vde', 'bridge', 'hubport', 'netmap', 'vhost-user',
+            'vhost-vdpa',
+            { 'name': 'af-xdp', 'if': 'CONFIG_AF_XDP' },
+            { 'name': 'vmnet-host', 'if': 'CONFIG_VMNET' },
+            { 'name': 'vmnet-shared', 'if': 'CONFIG_VMNET' },
+            { 'name': 'vmnet-bridged', 'if': 'CONFIG_VMNET' }] }
 
 ##
 # @Netdev:
 # @type: Specify the driver used for interpreting remaining arguments.
 #
 # Since: 1.2
-#
-#        'l2tpv3' - since 2.1
 ##
 { 'union': 'Netdev',
   'base': { 'id': 'str', 'type': 'NetClientDriver' },
     'tap':      'NetdevTapOptions',
     'l2tpv3':   'NetdevL2TPv3Options',
     'socket':   'NetdevSocketOptions',
+    'stream':   'NetdevStreamOptions',
+    'dgram':    'NetdevDgramOptions',
     'vde':      'NetdevVdeOptions',
     'bridge':   'NetdevBridgeOptions',
     'hubport':  'NetdevHubPortOptions',
     'netmap':   'NetdevNetmapOptions',
+    'af-xdp':   { 'type': 'NetdevAFXDPOptions',
+                  'if': 'CONFIG_AF_XDP' },
     'vhost-user': 'NetdevVhostUserOptions',
-    'vhost-vdpa': 'NetdevVhostVDPAOptions' } }
-
-##
-# @NetFilterDirection:
-#
-# Indicates whether a netfilter is attached to a netdev's transmit queue or
-# receive queue or both.
-#
-# @all: the filter is attached both to the receive and the transmit
-#       queue of the netdev (default).
-#
-# @rx: the filter is attached to the receive queue of the netdev,
-#      where it will receive packets sent to the netdev.
-#
-# @tx: the filter is attached to the transmit queue of the netdev,
-#      where it will receive packets sent by the netdev.
-#
-# Since: 2.5
-##
-{ 'enum': 'NetFilterDirection',
-  'data': [ 'all', 'rx', 'tx' ] }
+    'vhost-vdpa': 'NetdevVhostVDPAOptions',
+    'vmnet-host': { 'type': 'NetdevVmnetHostOptions',
+                    'if': 'CONFIG_VMNET' },
+    'vmnet-shared': { 'type': 'NetdevVmnetSharedOptions',
+                      'if': 'CONFIG_VMNET' },
+    'vmnet-bridged': { 'type': 'NetdevVmnetBridgedOptions',
+                       'if': 'CONFIG_VMNET' } } }
 
 ##
 # @RxState:
 # @name: net client name
 #
 # Returns: list of @RxFilterInfo for all NICs (or for the given NIC).
-#          Returns an error if the given @name doesn't exist, or given
-#          NIC doesn't support rx-filter querying, or given net client
-#          isn't a NIC.
+#     Returns an error if the given @name doesn't exist, or given NIC
+#     doesn't support rx-filter querying, or given net client isn't a
+#     NIC.
 #
 # Since: 1.6
 #
 #         }
 #       ]
 #    }
-#
 ##
 { 'command': 'query-rx-filter',
   'data': { '*name': 'str' },
 ##
 # @NIC_RX_FILTER_CHANGED:
 #
-# Emitted once until the 'query-rx-filter' command is executed, the first event
-# will always be emitted
+# Emitted once until the 'query-rx-filter' command is executed, the
+# first event will always be emitted
 #
 # @name: net client name
 #
 #      "data": { "name": "vnet0",
 #                "path": "/machine/peripheral/vnet0/virtio-backend" },
 #      "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
-#    }
-#
 ##
 { 'event': 'NIC_RX_FILTER_CHANGED',
   'data': { '*name': 'str', 'path': 'str' } }
 # Parameters for self-announce timers
 #
 # @initial: Initial delay (in ms) before sending the first GARP/RARP
-#           announcement
+#     announcement
 #
 # @max: Maximum delay (in ms) between GARP/RARP announcement packets
 #
 #
 # @step: Delay increase (in ms) after each self-announcement attempt
 #
-# @interfaces: An optional list of interface names, which restricts the
-#              announcement to the listed interfaces. (Since 4.1)
+# @interfaces: An optional list of interface names, which restricts
+#     the announcement to the listed interfaces.  (Since 4.1)
 #
 # @id: A name to be used to identify an instance of announce-timers
-#      and to allow it to modified later.  Not for use as
-#      part of the migration parameters. (Since 4.1)
+#     and to allow it to modified later.  Not for use as part of the
+#     migration parameters.  (Since 4.1)
 #
 # Since: 4.0
 ##
 ##
 # @announce-self:
 #
-# Trigger generation of broadcast RARP frames to update network switches.
-# This can be useful when network bonds fail-over the active slave.
+# Trigger generation of broadcast RARP frames to update network
+# switches.  This can be useful when network bonds fail-over the
+# active slave.
 #
 # Example:
 #
 ##
 # @FAILOVER_NEGOTIATED:
 #
-# Emitted when VIRTIO_NET_F_STANDBY was enabled during feature negotiation.
-# Failover primary devices which were hidden (not hotplugged when requested)
-# before will now be hotplugged by the virtio-net standby device.
+# Emitted when VIRTIO_NET_F_STANDBY was enabled during feature
+# negotiation.  Failover primary devices which were hidden (not
+# hotplugged when requested) before will now be hotplugged by the
+# virtio-net standby device.
+#
+# @device-id: QEMU device id of the unplugged device
 #
-# device-id: QEMU device id of the unplugged device
 # Since: 4.2
 #
 # Example:
 #
 # <- { "event": "FAILOVER_NEGOTIATED",
-#      "data": "net1" }
-#
+#      "data": { "device-id": "net1" },
+#      "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
 ##
 { 'event': 'FAILOVER_NEGOTIATED',
   'data': {'device-id': 'str'} }
+
+##
+# @NETDEV_STREAM_CONNECTED:
+#
+# Emitted when the netdev stream backend is connected
+#
+# @netdev-id: QEMU netdev id that is connected
+#
+# @addr: The destination address
+#
+# Since: 7.2
+#
+# Examples:
+#
+# <- { "event": "NETDEV_STREAM_CONNECTED",
+#      "data": { "netdev-id": "netdev0",
+#                "addr": { "port": "47666", "ipv6": true,
+#                          "host": "::1", "type": "inet" } },
+#      "timestamp": { "seconds": 1666269863, "microseconds": 311222 } }
+#
+# <- { "event": "NETDEV_STREAM_CONNECTED",
+#      "data": { "netdev-id": "netdev0",
+#                "addr": { "path": "/tmp/qemu0", "type": "unix" } },
+#      "timestamp": { "seconds": 1666269706, "microseconds": 413651 } }
+##
+{ 'event': 'NETDEV_STREAM_CONNECTED',
+  'data': { 'netdev-id': 'str',
+            'addr': 'SocketAddress' } }
+
+##
+# @NETDEV_STREAM_DISCONNECTED:
+#
+# Emitted when the netdev stream backend is disconnected
+#
+# @netdev-id: QEMU netdev id that is disconnected
+#
+# Since: 7.2
+#
+# Example:
+#
+# <- { 'event': 'NETDEV_STREAM_DISCONNECTED',
+#      'data': {'netdev-id': 'netdev0'},
+#      'timestamp': {'seconds': 1663330937, 'microseconds': 526695} }
+##
+{ 'event': 'NETDEV_STREAM_DISCONNECTED',
+  'data': { 'netdev-id': 'str' } }
index 587f31baf6b8bd3f53c69a4d1f61979b073eb39d..8f1efab8b9de23fe94999791983a042cffa893de 100644 (file)
@@ -454,8 +454,8 @@ opts_type_uint64(Visitor *v, const char *name, uint64_t *obj, Error **errp)
     OptsVisitor *ov = to_ov(v);
     const QemuOpt *opt;
     const char *str;
-    unsigned long long val;
-    char *endptr;
+    uint64_t val;
+    const char *endptr;
 
     if (ov->list_mode == LM_UNSIGNED_INTERVAL) {
         *obj = ov->range_next.u;
@@ -471,18 +471,18 @@ opts_type_uint64(Visitor *v, const char *name, uint64_t *obj, Error **errp)
     /* we've gotten past lookup_scalar() */
     assert(ov->list_mode == LM_NONE || ov->list_mode == LM_IN_PROGRESS);
 
-    if (parse_uint(str, &val, &endptr, 0) == 0 && val <= UINT64_MAX) {
+    if (parse_uint(str, &endptr, 0, &val) == 0) {
         if (*endptr == '\0') {
             *obj = val;
             processed(ov, name);
             return true;
         }
         if (*endptr == '-' && ov->list_mode == LM_IN_PROGRESS) {
-            unsigned long long val2;
+            uint64_t val2;
 
             str = endptr + 1;
-            if (parse_uint_full(str, &val2, 0) == 0 &&
-                val2 <= UINT64_MAX && val <= val2 &&
+            if (parse_uint_full(str, 0, &val2) == 0 &&
+                val <= val2 &&
                 val2 - val < OPTS_VISITOR_RANGE_MAX) {
                 ov->range_next.u = val;
                 ov->range_limit.u = val2;
index b79cbd787bee6fe5a649d9d0227ff204c0cc8a61..086c77305282ba09c78cf790b4b20736853fd079 100644 (file)
@@ -18,7 +18,7 @@
 #
 # @limit: the ending address (guest physical)
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'PciMemoryRange', 'data': {'base': 'int', 'limit': 'int'} }
 
@@ -29,8 +29,9 @@
 #
 # @bar: the index of the Base Address Register for this region
 #
-# @type: - 'io' if the region is a PIO region
-#        - 'memory' if the region is a MMIO region
+# @type:
+#     - 'io' if the region is a PIO region
+#     - 'memory' if the region is a MMIO region
 #
 # @size: memory size
 #
@@ -38,7 +39,7 @@
 #
 # @mem_type_64: if @type is 'memory', true if the BAR is 64-bit
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'PciMemoryRegion',
   'data': {'bar': 'int', 'type': 'str', 'address': 'int', 'size': 'int',
 #
 # Information about a bus of a PCI Bridge device
 #
-# @number: primary bus interface number.  This should be the number of the
-#          bus the device resides on.
+# @number: primary bus interface number.  This should be the number of
+#     the bus the device resides on.
 #
-# @secondary: secondary bus interface number.  This is the number of the
-#             main bus for the bridge
+# @secondary: secondary bus interface number.  This is the number of
+#     the main bus for the bridge
 #
 # @subordinate: This is the highest number bus that resides below the
-#               bridge.
+#     bridge.
 #
 # @io_range: The PIO range for all devices on this bridge
 #
 # @memory_range: The MMIO range for all devices on this bridge
 #
-# @prefetchable_range: The range of prefetchable MMIO for all devices on
-#                      this bridge
+# @prefetchable_range: The range of prefetchable MMIO for all devices
+#     on this bridge
 #
 # Since: 2.4
 ##
@@ -82,7 +83,7 @@
 #
 # @devices: a list of @PciDeviceInfo for each device on this bridge
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'PciBridgeInfo',
   'data': {'bus': 'PciBusInfo', '*devices': ['PciDeviceInfo']} }
 #
 # @regions: a list of the PCI I/O regions associated with the device
 #
-# Notes: the contents of @class_info.desc are not stable and should only be
-#        treated as informational.
+# Notes: the contents of @class_info.desc are not stable and should
+#     only be treated as informational.
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'PciDeviceInfo',
   'data': {'bus': 'int', 'slot': 'int', 'function': 'int',
 #
 # @devices: a list of devices on this bus
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'PciInfo', 'data': {'bus': 'int', 'devices': ['PciDeviceInfo']} }
 
 #
 # Return information about the PCI bus topology of the guest.
 #
-# Returns: a list of @PciInfo for each PCI bus. Each bus is
-#          represented by a json-object, which has a key with a json-array of
-#          all PCI devices attached to it. Each device is represented by a
-#          json-object.
+# Returns: a list of @PciInfo for each PCI bus.  Each bus is
+#     represented by a json-object, which has a key with a json-array
+#     of all PCI devices attached to it.  Each device is represented
+#     by a json-object.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #       ]
 #    }
 #
-# Note: This example has been shortened as the real response is too long.
-#
+# Note: This example has been shortened as the real response is too
+#     long.
 ##
 { 'command': 'query-pci', 'returns': ['PciInfo'] }
index cffae27666d9bc63071c59ac44a475925c7d6329..0aa4eeddd3893bee760fb28ab1d7222dfb3c13ed 100644 (file)
@@ -1,24 +1,68 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
 { 'pragma': { 'doc-required': true } }
 
-# Whitelists to permit QAPI rule violations; think twice before you
-# add to them!
+# Entries in these lists are allowed to violate the QAPI rules (for
+# historical reasons); think twice before you add to them!
 { 'pragma': {
-    # Commands allowed to return a non-dictionary:
-    'returns-whitelist': [
+    # Command names containing '_'
+    'command-name-exceptions': [
+        'add_client',
+        'block_resize',
+        'block_set_io_throttle',
+        'client_migrate_info',
+        'device_add',
+        'device_del',
+        'expire_password',
+        'migrate_cancel',
+        'netdev_add',
+        'netdev_del',
+        'qmp_capabilities',
+        'set_link',
+        'set_password',
+        'system_powerdown',
+        'system_reset',
+        'system_wakeup' ],
+    # Commands allowed to return a non-dictionary
+    'command-returns-exceptions': [
         'human-monitor-command',
         'qom-get',
-        'query-migrate-cache-size',
         'query-tpm-models',
         'query-tpm-types',
         'ringbuf-read' ],
-    'name-case-whitelist': [
-        'ACPISlotType',             # DIMM, visible through query-acpi-ospm-status
-        'CpuInfoMIPS',              # PC, visible through query-cpu
-        'CpuInfoTricore',           # PC, visible through query-cpu
-        'BlockdevVmdkSubformat',    # all members, to match VMDK spec spellings
-        'BlockdevVmdkAdapterType',  # legacyESX, to match VMDK spec spellings
-        'QapiErrorClass',           # all members, visible through errors
-        'UuidInfo',                 # UUID, visible through query-uuid
-        'X86CPURegister32',         # all members, visible indirectly through qom-get
-        'CpuInfo'                   # CPU, visible through query-cpu
+    # Externally visible types whose member names may use uppercase
+    'member-name-exceptions': [     # visible in:
+        'ACPISlotType',             # query-acpi-ospm-status
+        'AcpiTableOptions',         # -acpitable
+        'BlkdebugEvent',            # blockdev-add, -blockdev
+        'BlkdebugSetStateOptions',  # blockdev-add, -blockdev
+        'BlockDeviceInfo',          # query-block
+        'BlockDeviceStats',         # query-blockstats
+        'BlockDeviceTimedStats',    # query-blockstats
+        'BlockIOThrottle',          # block_set_io_throttle
+        'BlockInfo',                # query-block
+        'BlockdevAioOptions',       # blockdev-add, -blockdev
+        'BlockdevDriver',           # blockdev-add, query-blockstats, ...
+        'BlockdevVmdkAdapterType',  # blockdev-create (to match VMDK spec)
+        'BlockdevVmdkSubformat',    # blockdev-create (to match VMDK spec)
+        'ColoCompareProperties',    # object_add, -object
+        'FilterMirrorProperties',   # object_add, -object
+        'FilterRedirectorProperties', # object_add, -object
+        'FilterRewriterProperties', # object_add, -object
+        'InputLinuxProperties',     # object_add, -object
+        'NetdevTapOptions',         # netdev_add, query-netdev, -netdev
+        'ObjectType',               # object-add, -object
+        'PCIELinkSpeed',            # internal only
+        'PciBusInfo',               # query-pci
+        'PciDeviceInfo',            # query-pci
+        'PciMemoryRegion',          # query-pci
+        'QKeyCode',                 # send-key, input-sent-event
+        'QapiErrorClass',           # QMP error replies
+        'SshHostKeyCheckMode',      # blockdev-add, -blockdev
+        'SysEmuTarget',             # query-cpu-fast, query-target
+        'UuidInfo',                 # query-uuid
+        'VncClientInfo',            # query-vnc, query-vnc-servers, ...
+        'X86CPURegister32'          # qom-get of x86 CPU properties
+                                    # feature-words, filtered-features
     ] } }
diff --git a/qapi/qapi-forward-visitor.c b/qapi/qapi-forward-visitor.c
new file mode 100644 (file)
index 0000000..e36d9bc
--- /dev/null
@@ -0,0 +1,327 @@
+/*
+ * Forward Visitor
+ *
+ * Copyright (C) 2021 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/compat-policy.h"
+#include "qapi/error.h"
+#include "qapi/forward-visitor.h"
+#include "qapi/visitor-impl.h"
+#include "qemu/queue.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnull.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
+
+struct ForwardFieldVisitor {
+    Visitor visitor;
+
+    Visitor *target;
+    char *from;
+    char *to;
+
+    int depth;
+};
+
+static ForwardFieldVisitor *to_ffv(Visitor *v)
+{
+    return container_of(v, ForwardFieldVisitor, visitor);
+}
+
+static bool forward_field_translate_name(ForwardFieldVisitor *v, const char **name,
+                                         Error **errp)
+{
+    if (v->depth) {
+        return true;
+    }
+    if (g_str_equal(*name, v->from)) {
+        *name = v->to;
+        return true;
+    }
+    error_setg(errp, QERR_MISSING_PARAMETER, *name);
+    return false;
+}
+
+static bool forward_field_check_struct(Visitor *v, Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    return visit_check_struct(ffv->target, errp);
+}
+
+static bool forward_field_start_struct(Visitor *v, const char *name, void **obj,
+                                       size_t size, Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    if (!visit_start_struct(ffv->target, name, obj, size, errp)) {
+        return false;
+    }
+    ffv->depth++;
+    return true;
+}
+
+static void forward_field_end_struct(Visitor *v, void **obj)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    assert(ffv->depth);
+    ffv->depth--;
+    visit_end_struct(ffv->target, obj);
+}
+
+static bool forward_field_start_list(Visitor *v, const char *name,
+                                     GenericList **list, size_t size,
+                                     Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    ffv->depth++;
+    return visit_start_list(ffv->target, name, list, size, errp);
+}
+
+static GenericList *forward_field_next_list(Visitor *v, GenericList *tail,
+                                            size_t size)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    assert(ffv->depth);
+    return visit_next_list(ffv->target, tail, size);
+}
+
+static bool forward_field_check_list(Visitor *v, Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    assert(ffv->depth);
+    return visit_check_list(ffv->target, errp);
+}
+
+static void forward_field_end_list(Visitor *v, void **obj)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    assert(ffv->depth);
+    ffv->depth--;
+    visit_end_list(ffv->target, obj);
+}
+
+static bool forward_field_start_alternate(Visitor *v, const char *name,
+                                          GenericAlternate **obj, size_t size,
+                                          Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    /*
+     * The name passed to start_alternate is used also in the visit_type_* calls
+     * that retrieve the alternate's content; so, do not increase depth here.
+     */
+    return visit_start_alternate(ffv->target, name, obj, size, errp);
+}
+
+static void forward_field_end_alternate(Visitor *v, void **obj)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    visit_end_alternate(ffv->target, obj);
+}
+
+static bool forward_field_type_int64(Visitor *v, const char *name, int64_t *obj,
+                                     Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_int64(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_uint64(Visitor *v, const char *name,
+                                      uint64_t *obj, Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_uint64(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_bool(Visitor *v, const char *name, bool *obj,
+                                    Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_bool(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_str(Visitor *v, const char *name, char **obj,
+                                   Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_str(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_size(Visitor *v, const char *name, uint64_t *obj,
+                                    Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_size(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_number(Visitor *v, const char *name, double *obj,
+                                      Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_number(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_any(Visitor *v, const char *name, QObject **obj,
+                                   Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_any(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_null(Visitor *v, const char *name,
+                                    QNull **obj, Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_null(ffv->target, name, obj, errp);
+}
+
+static void forward_field_optional(Visitor *v, const char *name, bool *present)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, NULL)) {
+        *present = false;
+        return;
+    }
+    visit_optional(ffv->target, name, present);
+}
+
+static bool forward_field_policy_reject(Visitor *v, const char *name,
+                                        unsigned special_features,
+                                        Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return true;
+    }
+    return visit_policy_reject(ffv->target, name, special_features, errp);
+}
+
+static bool forward_field_policy_skip(Visitor *v, const char *name,
+                                      unsigned special_features)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, NULL)) {
+        return true;
+    }
+    return visit_policy_skip(ffv->target, name, special_features);
+}
+
+static void forward_field_complete(Visitor *v, void *opaque)
+{
+    /*
+     * Do nothing, the complete method will be called in due time
+     * on the target visitor.
+     */
+}
+
+static void forward_field_free(Visitor *v)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    g_free(ffv->from);
+    g_free(ffv->to);
+    g_free(ffv);
+}
+
+Visitor *visitor_forward_field(Visitor *target, const char *from, const char *to)
+{
+    ForwardFieldVisitor *v = g_new0(ForwardFieldVisitor, 1);
+
+    /*
+     * Clone and dealloc visitors don't use a name for the toplevel
+     * visit, so they make no sense here.
+     */
+    assert(target->type == VISITOR_OUTPUT || target->type == VISITOR_INPUT);
+
+    v->visitor.type = target->type;
+    v->visitor.start_struct = forward_field_start_struct;
+    v->visitor.check_struct = forward_field_check_struct;
+    v->visitor.end_struct = forward_field_end_struct;
+    v->visitor.start_list = forward_field_start_list;
+    v->visitor.next_list = forward_field_next_list;
+    v->visitor.check_list = forward_field_check_list;
+    v->visitor.end_list = forward_field_end_list;
+    v->visitor.start_alternate = forward_field_start_alternate;
+    v->visitor.end_alternate = forward_field_end_alternate;
+    v->visitor.type_int64 = forward_field_type_int64;
+    v->visitor.type_uint64 = forward_field_type_uint64;
+    v->visitor.type_size = forward_field_type_size;
+    v->visitor.type_bool = forward_field_type_bool;
+    v->visitor.type_str = forward_field_type_str;
+    v->visitor.type_number = forward_field_type_number;
+    v->visitor.type_any = forward_field_type_any;
+    v->visitor.type_null = forward_field_type_null;
+    v->visitor.optional = forward_field_optional;
+    v->visitor.policy_reject = forward_field_policy_reject;
+    v->visitor.policy_skip = forward_field_policy_skip;
+    v->visitor.complete = forward_field_complete;
+    v->visitor.free = forward_field_free;
+
+    v->target = target;
+    v->from = g_strdup(from);
+    v->to = g_strdup(to);
+
+    return &v->visitor;
+}
index 0b444b76d2d14c2b298a31bff00f5d0b50405fd6..c01ec335e68058d8c87bbd36ade9133fea5e2441 100644 (file)
@@ -5,17 +5,20 @@
 #
 # This document describes all commands currently supported by QMP.
 #
-# Most of the time their usage is exactly the same as in the user Monitor, this
-# means that any other document which also describe commands (the manpage,
-# QEMU's manual, etc) can and should be consulted.
+# Most of the time their usage is exactly the same as in the user
+# Monitor, this means that any other document which also describe
+# commands (the manpage, QEMU's manual, etc) can and should be
+# consulted.
 #
-# QMP has two types of commands: regular and query commands. Regular commands
-# usually change the Virtual Machine's state someway, while query commands just
-# return information. The sections below are divided accordingly.
+# QMP has two types of commands: regular and query commands.  Regular
+# commands usually change the Virtual Machine's state someway, while
+# query commands just return information.  The sections below are
+# divided accordingly.
 #
-# It's important to observe that all communication examples are formatted in
-# a reader-friendly way, so that they're easier to understand. However, in real
-# protocol usage, they're emitted as a single line.
+# It's important to observe that all communication examples are
+# formatted in a reader-friendly way, so that they're easier to
+# understand.  However, in real protocol usage, they're emitted as a
+# single line.
 #
 # Also, the following notation is used to denote data flow:
 #
 #   -> data issued by the Client
 #   <- Server data response
 #
-# Please, refer to the QMP specification (docs/interop/qmp-spec.txt) for
-# detailed information on the Server command and response formats.
-#
-# = Stability Considerations
-#
-# The current QMP command set (described in this file) may be useful for a
-# number of use cases, however it's limited and several commands have bad
-# defined semantics, specially with regard to command completion.
-#
-# These problems are going to be solved incrementally in the next QEMU releases
-# and we're going to establish a deprecation policy for badly defined commands.
-#
-# If you're planning to adopt QMP, please observe the following:
-#
-#     1. The deprecation policy will take effect and be documented soon, please
-#        check the documentation of each used command as soon as a new release of
-#        QEMU is available
-#
-#     2. DO NOT rely on anything which is not explicit documented
-#
-#     3. Errors, in special, are not documented. Applications should NOT check
-#        for specific errors classes or data (it's strongly recommended to only
-#        check for the "error" key)
-#
+# Please refer to the
+# :doc:`QEMU Machine Protocol Specification </interop/qmp-spec>`
+# for detailed information on the Server command and response formats.
 ##
 
 { 'include': 'pragma.json' }
 { 'include': 'sockets.json' }
 { 'include': 'run-state.json' }
 { 'include': 'crypto.json' }
+{ 'include': 'job.json' }
 { 'include': 'block.json' }
 { 'include': 'block-export.json' }
 { 'include': 'char.json' }
 { 'include': 'dump.json' }
-{ 'include': 'job.json' }
 { 'include': 'net.json' }
 { 'include': 'rdma.json' }
 { 'include': 'rocker.json' }
 { 'include': 'migration.json' }
 { 'include': 'transaction.json' }
 { 'include': 'trace.json' }
+{ 'include': 'compat.json' }
 { 'include': 'control.json' }
 { 'include': 'introspect.json' }
 { 'include': 'qom.json' }
 { 'include': 'qdev.json' }
+{ 'include': 'machine-common.json' }
 { 'include': 'machine.json' }
 { 'include': 'machine-target.json' }
 { 'include': 'replay.json' }
+{ 'include': 'yank.json' }
 { 'include': 'misc.json' }
 { 'include': 'misc-target.json' }
 { 'include': 'audio.json' }
 { 'include': 'acpi.json' }
 { 'include': 'pci.json' }
+{ 'include': 'stats.json' }
+{ 'include': 'virtio.json' }
+{ 'include': 'cryptodev.json' }
+{ 'include': 'cxl.json' }
diff --git a/qapi/qapi-type-helpers.c b/qapi/qapi-type-helpers.c
new file mode 100644 (file)
index 0000000..f76b34f
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * QAPI common helper functions
+ *
+ * This file provides helper functions related to types defined
+ * in the QAPI schema.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/type-helpers.h"
+
+HumanReadableText *human_readable_text_from_str(GString *str)
+{
+    HumanReadableText *ret = g_new0(HumanReadableText, 1);
+
+    ret->human_readable_text = g_steal_pointer(&str->str);
+
+    return ret;
+}
index 3c24bb3d4548c2f1740078dea3bf5b7cb3f5cf16..63596e11c569285cd530ae6e57b61d02bab72157 100644 (file)
  */
 
 #include "qemu/osdep.h"
+#include "qapi/compat-policy.h"
 #include "qapi/error.h"
 #include "qemu/ctype.h"
 #include "qapi/qmp/qerror.h"
 
+CompatPolicy compat_policy;
+
+static bool compat_policy_input_ok1(const char *adjective,
+                                    CompatPolicyInput policy,
+                                    ErrorClass error_class,
+                                    const char *kind, const char *name,
+                                    Error **errp)
+{
+    switch (policy) {
+    case COMPAT_POLICY_INPUT_ACCEPT:
+        return true;
+    case COMPAT_POLICY_INPUT_REJECT:
+        error_set(errp, error_class, "%s %s %s disabled by policy",
+                  adjective, kind, name);
+        return false;
+    case COMPAT_POLICY_INPUT_CRASH:
+    default:
+        abort();
+    }
+}
+
+bool compat_policy_input_ok(unsigned special_features,
+                            const CompatPolicy *policy,
+                            ErrorClass error_class,
+                            const char *kind, const char *name,
+                            Error **errp)
+{
+    if ((special_features & 1u << QAPI_DEPRECATED)
+        && !compat_policy_input_ok1("Deprecated",
+                                    policy->deprecated_input,
+                                    error_class, kind, name, errp)) {
+        return false;
+    }
+    if ((special_features & (1u << QAPI_UNSTABLE))
+        && !compat_policy_input_ok1("Unstable",
+                                    policy->unstable_input,
+                                    error_class, kind, name, errp)) {
+        return false;
+    }
+    return true;
+}
+
 const char *qapi_enum_lookup(const QEnumLookup *lookup, int val)
 {
     assert(val >= 0 && val < lookup->size);
@@ -70,7 +113,7 @@ bool qapi_bool_parse(const char *name, const char *value, bool *obj, Error **err
  * may contain only letters, digits, hyphen and period.
  * The special exception for enumeration names is not implemented.
  * See docs/devel/qapi-code-gen.txt for more on QAPI naming rules.
- * Keep this consistent with scripts/qapi.py!
+ * Keep this consistent with scripts/qapi-gen.py!
  * If @complete, the parse fails unless it consumes @str completely.
  * Return its length on success, -1 on failure.
  */
index 7e5f40e7f01a4c95e3706c082b3b7a88d428ec54..6c13510a2bc72ee4b512fa525995fc7da908633a 100644 (file)
  */
 
 #include "qemu/osdep.h"
+#include "qapi/compat-policy.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/visitor.h"
 #include "qapi/visitor-impl.h"
 #include "trace.h"
 
+/* Zero-initialization must result in default policy */
+QEMU_BUILD_BUG_ON(COMPAT_POLICY_INPUT_ACCEPT || COMPAT_POLICY_OUTPUT_ACCEPT);
+
+
 void visit_complete(Visitor *v, void *opaque)
 {
     assert(v->type != VISITOR_OUTPUT || v->complete);
@@ -135,6 +140,31 @@ bool visit_optional(Visitor *v, const char *name, bool *present)
     return *present;
 }
 
+bool visit_policy_reject(Visitor *v, const char *name,
+                         unsigned special_features, Error **errp)
+{
+    trace_visit_policy_reject(v, name);
+    if (v->policy_reject) {
+        return v->policy_reject(v, name, special_features, errp);
+    }
+    return false;
+}
+
+bool visit_policy_skip(Visitor *v, const char *name,
+                       unsigned special_features)
+{
+    trace_visit_policy_skip(v, name);
+    if (v->policy_skip) {
+        return v->policy_skip(v, name, special_features);
+    }
+    return false;
+}
+
+void visit_set_policy(Visitor *v, CompatPolicy *policy)
+{
+    v->compat_policy = *policy;
+}
+
 bool visit_is_input(Visitor *v)
 {
     return v->type == VISITOR_INPUT;
@@ -366,7 +396,7 @@ static bool input_type_enum(Visitor *v, const char *name, int *obj,
                             const QEnumLookup *lookup, Error **errp)
 {
     int64_t value;
-    char *enum_str;
+    g_autofree char *enum_str = NULL;
 
     if (!visit_type_str(v, name, &enum_str, errp)) {
         return false;
@@ -374,12 +404,19 @@ static bool input_type_enum(Visitor *v, const char *name, int *obj,
 
     value = qapi_enum_parse(lookup, enum_str, -1, NULL);
     if (value < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, enum_str);
-        g_free(enum_str);
+        error_setg(errp, "Parameter '%s' does not accept value '%s'",
+                   name ? name : "null", enum_str);
+        return false;
+    }
+
+    if (lookup->special_features
+        && !compat_policy_input_ok(lookup->special_features[value],
+                                   &v->compat_policy,
+                                   ERROR_CLASS_GENERIC_ERROR,
+                                   "value", enum_str, errp)) {
         return false;
     }
 
-    g_free(enum_str);
     *obj = value;
     return true;
 }
index 13254529bfabd726b6bcf0bfe3590419ebe34bbf..6bc5a733b86f1cff5b060fb88161ba3e17513b85 100644 (file)
 #
 # @typename: the type name of a device
 #
-# Returns: a list of ObjectPropertyInfo describing a devices properties
+# Returns: a list of ObjectPropertyInfo describing a devices
+#     properties
 #
-# Note: objects can create properties at runtime, for example to describe
-#       links between different devices and/or objects. These properties
-#       are not included in the output of this command.
+# Note: objects can create properties at runtime, for example to
+#     describe links between different devices and/or objects.  These
+#     properties are not included in the output of this command.
 #
 # Since: 1.2
 ##
 ##
 # @device_add:
 #
+# Add a device.
+#
 # @driver: the name of the new device's driver
 #
 # @bus: the device's parent bus (device tree path)
 #
 # @id: the device's ID, must be unique
 #
-# Additional arguments depend on the type.
+# Features:
 #
-# Add a device.
+# @json-cli: If present, the "-device" command line option supports
+#     JSON syntax with a structure identical to the arguments of this
+#     command.
+#
+# @json-cli-hotplug: If present, the "-device" command line option
+#     supports JSON syntax without the reference counting leak that
+#     broke hot-unplug
 #
 # Notes:
-# 1. For detailed information about this command, please refer to the
+#
+# 1. Additional arguments depend on the type.
+#
+# 2. For detailed information about this command, please refer to the
 #    'docs/qdev-device-use.txt' file.
 #
-# 2. It's possible to list device properties by running QEMU with the
+# 3. It's possible to list device properties by running QEMU with the
 #    "-device DEVICE,help" command-line argument, where DEVICE is the
 #    device's name
 #
 # <- { "return": {} }
 #
 # TODO: This command effectively bypasses QAPI completely due to its
-#       "additional arguments" business.  It shouldn't have been added to
-#       the schema in this form.  It should be qapified properly, or
-#       replaced by a properly qapified command.
+#     "additional arguments" business.  It shouldn't have been added
+#     to the schema in this form.  It should be qapified properly, or
+#     replaced by a properly qapified command.
 #
 # Since: 0.13
 ##
 { 'command': 'device_add',
   'data': {'driver': 'str', '*bus': 'str', '*id': 'str'},
-  'gen': false } # so we can get the additional arguments
+  'gen': false, # so we can get the additional arguments
+  'features': ['json-cli', 'json-cli-hotplug'] }
 
 ##
 # @device_del:
 #
 # @id: the device's ID or QOM path
 #
-# Returns: Nothing on success
-#          If @id is not a valid device, DeviceNotFound
+# Returns: Nothing on success If @id is not a valid device,
+#     DeviceNotFound
 #
-# Notes: When this command completes, the device may not be removed from the
-#        guest.  Hot removal is an operation that requires guest cooperation.
-#        This command merely requests that the guest begin the hot removal
-#        process.  Completion of the device removal process is signaled with a
-#        DEVICE_DELETED event. Guest reset will automatically complete removal
-#        for all devices.
+# Notes: When this command completes, the device may not be removed
+#     from the guest.  Hot removal is an operation that requires guest
+#     cooperation.  This command merely requests that the guest begin
+#     the hot removal process.  Completion of the device removal
+#     process is signaled with a DEVICE_DELETED event.  Guest reset
+#     will automatically complete removal for all devices.  If a
+#     guest-side error in the hot removal process is detected, the
+#     device will not be removed and a DEVICE_UNPLUG_GUEST_ERROR event
+#     is sent.  Some errors cannot be detected.
 #
-# Since: 0.14.0
+# Since: 0.14
 #
-# Example:
+# Examples:
 #
 # -> { "execute": "device_del",
 #      "arguments": { "id": "net1" } }
 # -> { "execute": "device_del",
 #      "arguments": { "id": "/machine/peripheral-anon/device[0]" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'device_del', 'data': {'id': 'str'} }
 
 ##
 # @DEVICE_DELETED:
 #
-# Emitted whenever the device removal completion is acknowledged by the guest.
-# At this point, it's safe to reuse the specified device ID. Device removal can
-# be initiated by the guest or by HMP/QMP commands.
+# Emitted whenever the device removal completion is acknowledged by
+# the guest.  At this point, it's safe to reuse the specified device
+# ID. Device removal can be initiated by the guest or by HMP/QMP
+# commands.
 #
-# @device: device name
+# @device: the device's ID if it has one
 #
-# @path: device path
+# @path: the device's QOM path
 #
 # Since: 1.5
 #
 #      "data": { "device": "virtio-net-pci-0",
 #                "path": "/machine/peripheral/virtio-net-pci-0" },
 #      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
-#
 ##
 { 'event': 'DEVICE_DELETED',
   'data': { '*device': 'str', 'path': 'str' } }
+
+##
+# @DEVICE_UNPLUG_GUEST_ERROR:
+#
+# Emitted when a device hot unplug fails due to a guest reported
+# error.
+#
+# @device: the device's ID if it has one
+#
+# @path: the device's QOM path
+#
+# Since: 6.2
+#
+# Example:
+#
+# <- { "event": "DEVICE_UNPLUG_GUEST_ERROR",
+#      "data": { "device": "core1",
+#                "path": "/machine/peripheral/core1" },
+#      "timestamp": { "seconds": 1615570772, "microseconds": 202844 } }
+##
+{ 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
+  'data': { '*device': 'str', 'path': 'str' } }
index 9a2d7dd29a931f0fa53917295a1c671c68141ea8..176b549473c3c0cd1627c1888c88bf9441ca2bd2 100644 (file)
 #include "qemu/osdep.h"
 
 #include "block/aio.h"
+#include "qapi/compat-policy.h"
 #include "qapi/error.h"
 #include "qapi/qmp/dispatch.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
-#include "sysemu/runstate.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp/qbool.h"
 #include "qemu/coroutine.h"
 #include "qemu/main-loop.h"
 
+Visitor *qobject_input_visitor_new_qmp(QObject *obj)
+{
+    Visitor *v = qobject_input_visitor_new(obj);
+
+    visit_set_policy(v, &compat_policy);
+    return v;
+}
+
+Visitor *qobject_output_visitor_new_qmp(QObject **result)
+{
+    Visitor *v = qobject_output_visitor_new(result);
+
+    visit_set_policy(v, &compat_policy);
+    return v;
+}
+
 static QDict *qmp_dispatch_check_obj(QDict *dict, bool allow_oob,
                                      Error **errp)
 {
@@ -116,8 +134,8 @@ static void do_qmp_dispatch_bh(void *opaque)
  * Runs outside of coroutine context for OOB commands, but in coroutine
  * context for everything else.
  */
-QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
-                    bool allow_oob, Monitor *cur_mon)
+QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *request,
+                                       bool allow_oob, Monitor *cur_mon)
 {
     Error *err = NULL;
     bool oob;
@@ -155,10 +173,17 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
                   "The command %s has not been found", command);
         goto out;
     }
+    if (!compat_policy_input_ok(cmd->special_features, &compat_policy,
+                                ERROR_CLASS_COMMAND_NOT_FOUND,
+                                "command", command, &err)) {
+        goto out;
+    }
     if (!cmd->enabled) {
         error_set(&err, ERROR_CLASS_COMMAND_NOT_FOUND,
-                  "The command %s has been disabled for this instance",
-                  command);
+                  "Command %s has been disabled%s%s",
+                  command,
+                  cmd->disable_reason ? ": " : "",
+                  cmd->disable_reason ?: "");
         goto out;
     }
     if (oob && !(cmd->options & QCO_ALLOW_OOB)) {
@@ -167,10 +192,7 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
         goto out;
     }
 
-    if (runstate_check(RUN_STATE_PRECONFIG) &&
-        !(cmd->options & QCO_ALLOW_PRECONFIG)) {
-        error_setg(&err, "The command '%s' isn't permitted in '%s' state",
-                   cmd->name, RunState_str(RUN_STATE_PRECONFIG));
+    if (!qmp_command_available(cmd, &err)) {
         goto out;
     }
 
@@ -184,9 +206,31 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
     assert(!(oob && qemu_in_coroutine()));
     assert(monitor_cur() == NULL);
     if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) {
+        if (qemu_in_coroutine()) {
+            /*
+             * Move the coroutine from iohandler_ctx to qemu_aio_context for
+             * executing the command handler so that it can make progress if it
+             * involves an AIO_WAIT_WHILE().
+             */
+            aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self());
+            qemu_coroutine_yield();
+        }
+
         monitor_set_cur(qemu_coroutine_self(), cur_mon);
         cmd->fn(args, &ret, &err);
         monitor_set_cur(qemu_coroutine_self(), NULL);
+
+        if (qemu_in_coroutine()) {
+            /*
+             * Yield and reschedule so the main loop stays responsive.
+             *
+             * Move back to iohandler_ctx so that nested event loops for
+             * qemu_aio_context don't start new monitor commands.
+             */
+            aio_co_schedule(iohandler_get_aio_context(),
+                            qemu_coroutine_self());
+            qemu_coroutine_yield();
+        }
     } else {
        /*
         * Actual context doesn't match the one the command needs.
@@ -210,7 +254,7 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
             .errp       = &err,
             .co         = qemu_coroutine_self(),
         };
-        aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh,
+        aio_bh_schedule_oneshot(iohandler_get_aio_context(), do_qmp_dispatch_bh,
                                 &data);
         qemu_coroutine_yield();
     }
index 19d3cd003833fc70dcc246cd034ddd1d2fe0ebaf..0fe0d0a5a6e5abdf318046dcd7a7f9d55921d144 100644 (file)
 
 static void timestamp_put(QDict *qdict)
 {
-    int err;
     QDict *ts;
-    qemu_timeval tv;
+    int64_t rt = g_get_real_time();
 
-    err = qemu_gettimeofday(&tv);
-    /* Put -1 to indicate failure of getting host time */
     ts = qdict_from_jsonf_nofail("{ 'seconds': %lld, 'microseconds': %lld }",
-                                 err < 0 ? -1LL : (long long)tv.tv_sec,
-                                 err < 0 ? -1LL : (long long)tv.tv_usec);
+                                 (long long)rt / G_USEC_PER_SEC,
+                                 (long long)rt % G_USEC_PER_SEC);
     qdict_put(qdict, "timestamp", ts);
 }
 
index 58c65b5052e54ea797aff868ead18088ae1e85e1..485bc5e6fc713ebd0d44888c2be4fc31a00d988c 100644 (file)
@@ -16,7 +16,8 @@
 #include "qapi/qmp/dispatch.h"
 
 void qmp_register_command(QmpCommandList *cmds, const char *name,
-                          QmpCommandFunc *fn, QmpCommandOptions options)
+                          QmpCommandFunc *fn, QmpCommandOptions options,
+                          unsigned special_features)
 {
     QmpCommand *cmd = g_malloc0(sizeof(*cmd));
 
@@ -27,6 +28,7 @@ void qmp_register_command(QmpCommandList *cmds, const char *name,
     cmd->fn = fn;
     cmd->enabled = true;
     cmd->options = options;
+    cmd->special_features = special_features;
     QTAILQ_INSERT_TAIL(cmds, cmd, node);
 }
 
@@ -43,26 +45,28 @@ const QmpCommand *qmp_find_command(const QmpCommandList *cmds, const char *name)
 }
 
 static void qmp_toggle_command(QmpCommandList *cmds, const char *name,
-                               bool enabled)
+                               bool enabled, const char *disable_reason)
 {
     QmpCommand *cmd;
 
     QTAILQ_FOREACH(cmd, cmds, node) {
         if (strcmp(cmd->name, name) == 0) {
             cmd->enabled = enabled;
+            cmd->disable_reason = disable_reason;
             return;
         }
     }
 }
 
-void qmp_disable_command(QmpCommandList *cmds, const char *name)
+void qmp_disable_command(QmpCommandList *cmds, const char *name,
+                         const char *disable_reason)
 {
-    qmp_toggle_command(cmds, name, false);
+    qmp_toggle_command(cmds, name, false, disable_reason);
 }
 
 void qmp_enable_command(QmpCommandList *cmds, const char *name)
 {
-    qmp_toggle_command(cmds, name, true);
+    qmp_toggle_command(cmds, name, true, NULL);
 }
 
 bool qmp_command_is_enabled(const QmpCommand *cmd)
index 23843b242e8a5cd32f5b02c0e59839f750eebc40..3e8aca6b1594cf1a41841cbd9140d6932113cd64 100644 (file)
@@ -14,6 +14,7 @@
 
 #include "qemu/osdep.h"
 #include <math.h>
+#include "qapi/compat-policy.h"
 #include "qapi/error.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/visitor-impl.h"
@@ -27,7 +28,7 @@
 #include "qapi/qmp/qnum.h"
 #include "qapi/qmp/qstring.h"
 #include "qemu/cutils.h"
-#include "qemu/option.h"
+#include "qemu/keyval.h"
 
 typedef struct StackObject {
     const char *name;            /* Name of @obj in its parent, if any */
@@ -662,6 +663,15 @@ static void qobject_input_optional(Visitor *v, const char *name, bool *present)
     *present = true;
 }
 
+static bool qobject_input_policy_reject(Visitor *v, const char *name,
+                                        unsigned special_features,
+                                        Error **errp)
+{
+    return !compat_policy_input_ok(special_features, &v->compat_policy,
+                                   ERROR_CLASS_GENERIC_ERROR,
+                                   "parameter", name, errp);
+}
+
 static void qobject_input_free(Visitor *v)
 {
     QObjectInputVisitor *qiv = to_qiv(v);
@@ -696,6 +706,7 @@ static QObjectInputVisitor *qobject_input_visitor_base_new(QObject *obj)
     v->visitor.end_list = qobject_input_end_list;
     v->visitor.start_alternate = qobject_input_start_alternate;
     v->visitor.optional = qobject_input_optional;
+    v->visitor.policy_reject = qobject_input_policy_reject;
     v->visitor.free = qobject_input_free;
 
     v->root = qobject_ref(obj);
index ba6f6ac8a72a422c0235751a4befe7429530ab1d..74770edd73c68849b2099bef597b3953aa7716fa 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/compat-policy.h"
 #include "qapi/qobject-output-visitor.h"
 #include "qapi/visitor-impl.h"
 #include "qemu/queue.h"
@@ -31,6 +32,7 @@ typedef struct QStackEntry {
 
 struct QObjectOutputVisitor {
     Visitor visitor;
+
     QSLIST_HEAD(, QStackEntry) stack; /* Stack of unfinished containers */
     QObject *root; /* Root of the output visit */
     QObject **result; /* User's storage location for result */
@@ -207,6 +209,17 @@ static bool qobject_output_type_null(Visitor *v, const char *name,
     return true;
 }
 
+static bool qobject_output_policy_skip(Visitor *v, const char *name,
+                                       unsigned special_features)
+{
+    CompatPolicy *pol = &v->compat_policy;
+
+    return ((special_features & 1u << QAPI_DEPRECATED)
+            && pol->deprecated_output == COMPAT_POLICY_OUTPUT_HIDE)
+        || ((special_features & 1u << QAPI_UNSTABLE)
+            && pol->unstable_output == COMPAT_POLICY_OUTPUT_HIDE);
+}
+
 /* Finish building, and return the root object.
  * The root object is never null. The caller becomes the object's
  * owner, and should use qobject_unref() when done with it.  */
@@ -256,6 +269,7 @@ Visitor *qobject_output_visitor_new(QObject **result)
     v->visitor.type_number = qobject_output_type_number;
     v->visitor.type_any = qobject_output_type_any;
     v->visitor.type_null = qobject_output_type_null;
+    v->visitor.policy_skip = qobject_output_policy_skip;
     v->visitor.complete = qobject_output_complete;
     v->visitor.free = qobject_output_free;
 
index 0b0b92944ba0376a79adce0a17b5b761df80c3db..c53ef978ff7e6a81f8c926159fe52c0d349696ac 100644 (file)
@@ -4,6 +4,11 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or later.
 # See the COPYING file in the top-level directory.
 
+{ 'include': 'authz.json' }
+{ 'include': 'block-core.json' }
+{ 'include': 'common.json' }
+{ 'include': 'crypto.json' }
+
 ##
 # = QEMU Object Model (QOM)
 ##
 #
 # @name: the name of the property
 #
-# @type: the type of the property.  This will typically come in one of four
-#        forms:
+# @type: the type of the property.  This will typically come in one of
+#     four forms:
 #
-#        1) A primitive type such as 'u8', 'u16', 'bool', 'str', or 'double'.
-#           These types are mapped to the appropriate JSON type.
+#     1) A primitive type such as 'u8', 'u16', 'bool', 'str', or
+#        'double'.  These types are mapped to the appropriate JSON
+#        type.
 #
-#        2) A child type in the form 'child<subtype>' where subtype is a qdev
-#           device type name.  Child properties create the composition tree.
+#     2) A child type in the form 'child<subtype>' where subtype is a
+#        qdev device type name.  Child properties create the
+#        composition tree.
 #
-#        3) A link type in the form 'link<subtype>' where subtype is a qdev
-#           device type name.  Link properties form the device model graph.
+#     3) A link type in the form 'link<subtype>' where subtype is a
+#        qdev device type name.  Link properties form the device model
+#        graph.
 #
 # @description: if specified, the description of the property.
 #
 ##
 # @qom-list:
 #
-# This command will list any properties of a object given a path in the object
-# model.
+# This command will list any properties of a object given a path in
+# the object model.
 #
-# @path: the path within the object model.  See @qom-get for a description of
-#        this parameter.
+# @path: the path within the object model.  See @qom-get for a
+#     description of this parameter.
 #
-# Returns: a list of @ObjectPropertyInfo that describe the properties of the
-#          object.
+# Returns: a list of @ObjectPropertyInfo that describe the properties
+#     of the object.
 #
 # Since: 1.2
 #
@@ -59,7 +67,6 @@
 #                  { "name": "parallel0", "type": "child<chardev-vc>" },
 #                  { "name": "serial0", "type": "child<chardev-vc>" },
 #                  { "name": "mon0", "type": "child<chardev-stdio>" } ] }
-#
 ##
 { 'command': 'qom-list',
   'data': { 'path': 'str' },
 ##
 # @qom-get:
 #
-# This command will get a property from a object model path and return the
-# value.
+# This command will get a property from a object model path and return
+# the value.
 #
-# @path: The path within the object model.  There are two forms of supported
-#        paths--absolute and partial paths.
+# @path: The path within the object model.  There are two forms of
+#     supported paths--absolute and partial paths.
 #
-#        Absolute paths are derived from the root object and can follow child<>
-#        or link<> properties.  Since they can follow link<> properties, they
-#        can be arbitrarily long.  Absolute paths look like absolute filenames
-#        and are prefixed  with a leading slash.
+#     Absolute paths are derived from the root object and can follow
+#     child<> or link<> properties.  Since they can follow link<>
+#     properties, they can be arbitrarily long.  Absolute paths look
+#     like absolute filenames and are prefixed  with a leading slash.
 #
-#        Partial paths look like relative filenames.  They do not begin
-#        with a prefix.  The matching rules for partial paths are subtle but
-#        designed to make specifying objects easy.  At each level of the
-#        composition tree, the partial path is matched as an absolute path.
-#        The first match is not returned.  At least two matches are searched
-#        for.  A successful result is only returned if only one match is
-#        found.  If more than one match is found, a flag is return to
-#        indicate that the match was ambiguous.
+#     Partial paths look like relative filenames.  They do not begin
+#     with a prefix.  The matching rules for partial paths are subtle
+#     but designed to make specifying objects easy.  At each level of
+#     the composition tree, the partial path is matched as an absolute
+#     path.  The first match is not returned.  At least two matches
+#     are searched for.  A successful result is only returned if only
+#     one match is found.  If more than one match is found, a flag is
+#     return to indicate that the match was ambiguous.
 #
 # @property: The property name to read
 #
-# Returns: The property value.  The type depends on the property
-#          type. child<> and link<> properties are returned as #str
-#          pathnames.  All integer property types (u8, u16, etc) are
-#          returned as #int.
+# Returns: The property value.  The type depends on the property type.
+#     child<> and link<> properties are returned as #str pathnames.
+#     All integer property types (u8, u16, etc) are returned as #int.
 #
 # Since: 1.2
 #
-# Example:
+# Examples:
 #
 # 1. Use absolute path
 #
 #      "arguments": { "path": "unattached/sysbus",
 #                     "property": "type" } }
 # <- { "return": "System" }
-#
 ##
 { 'command': 'qom-get',
   'data': { 'path': 'str', 'property': 'str' },
 #
 # @property: the property name to set
 #
-# @value: a value who's type is appropriate for the property type.  See @qom-get
-#         for a description of type mapping.
+# @value: a value who's type is appropriate for the property type.
+#     See @qom-get for a description of type mapping.
 #
 # Since: 1.2
 #
 #                     "property": "graphics",
 #                     "value": false } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'qom-set',
   'data': { 'path': 'str', 'property': 'str', 'value': 'any' },
 # @name: the type name found in the search
 #
 # @abstract: the type is abstract and can't be directly instantiated.
-#            Omitted if false. (since 2.10)
+#     Omitted if false.  (since 2.10)
 #
 # @parent: Name of parent type, if any (since 2.10)
 #
 #
 # This command will return a list of types given search parameters
 #
-# @implements: if specified, only return types that implement this type name
+# @implements: if specified, only return types that implement this
+#     type name
 #
 # @abstract: if true, include abstract types in the results
 #
-# Returns: a list of @ObjectTypeInfo or an empty list if no results are found
+# Returns: a list of @ObjectTypeInfo or an empty list if no results
+#     are found
 #
 # Since: 1.1
 ##
 #
 # @typename: the type name of an object
 #
-# Note: objects can create properties at runtime, for example to describe
-#       links between different devices and/or objects. These properties
-#       are not included in the output of this command.
+# Note: objects can create properties at runtime, for example to
+#     describe links between different devices and/or objects.  These
+#     properties are not included in the output of this command.
 #
 # Returns: a list of ObjectPropertyInfo describing object properties
 #
   'allow-preconfig': true }
 
 ##
-# @object-add:
+# @CanHostSocketcanProperties:
 #
-# Create a QOM object.
+# Properties for can-host-socketcan objects.
+#
+# @if: interface name of the host system CAN bus to connect to
+#
+# @canbus: object ID of the can-bus object to connect to the host
+#     interface
+#
+# Since: 2.12
+##
+{ 'struct': 'CanHostSocketcanProperties',
+  'data': { 'if': 'str',
+            'canbus': 'str' } }
+
+##
+# @ColoCompareProperties:
+#
+# Properties for colo-compare objects.
+#
+# @primary_in: name of the character device backend to use for the
+#     primary input (incoming packets are redirected to @outdev)
+#
+# @secondary_in: name of the character device backend to use for
+#     secondary input (incoming packets are only compared to the input
+#     on @primary_in and then dropped)
+#
+# @outdev: name of the character device backend to use for output
+#
+# @iothread: name of the iothread to run in
+#
+# @notify_dev: name of the character device backend to be used to
+#     communicate with the remote colo-frame (only for Xen COLO)
+#
+# @compare_timeout: the maximum time to hold a packet from @primary_in
+#     for comparison with an incoming packet on @secondary_in in
+#     milliseconds (default: 3000)
+#
+# @expired_scan_cycle: the interval at which colo-compare checks
+#     whether packets from @primary have timed out, in milliseconds
+#     (default: 3000)
+#
+# @max_queue_size: the maximum number of packets to keep in the queue
+#     for comparing with incoming packets from @secondary_in.  If the
+#     queue is full and additional packets are received, the
+#     additional packets are dropped.  (default: 1024)
+#
+# @vnet_hdr_support: if true, vnet header support is enabled
+#     (default: false)
+#
+# Since: 2.8
+##
+{ 'struct': 'ColoCompareProperties',
+  'data': { 'primary_in': 'str',
+            'secondary_in': 'str',
+            'outdev': 'str',
+            'iothread': 'str',
+            '*notify_dev': 'str',
+            '*compare_timeout': 'uint64',
+            '*expired_scan_cycle': 'uint32',
+            '*max_queue_size': 'uint32',
+            '*vnet_hdr_support': 'bool' } }
+
+##
+# @CryptodevBackendProperties:
+#
+# Properties for cryptodev-backend and cryptodev-backend-builtin
+# objects.
+#
+# @queues: the number of queues for the cryptodev backend.  Ignored
+#     for cryptodev-backend and must be 1 for
+#     cryptodev-backend-builtin.  (default: 1)
+#
+# @throttle-bps: limit total bytes per second (Since 8.0)
+#
+# @throttle-ops: limit total operations per second (Since 8.0)
+#
+# Since: 2.8
+##
+{ 'struct': 'CryptodevBackendProperties',
+  'data': { '*queues': 'uint32',
+            '*throttle-bps': 'uint64',
+            '*throttle-ops': 'uint64' } }
+
+##
+# @CryptodevVhostUserProperties:
+#
+# Properties for cryptodev-vhost-user objects.
+#
+# @chardev: the name of a Unix domain socket character device that
+#     connects to the vhost-user server
+#
+# Since: 2.12
+##
+{ 'struct': 'CryptodevVhostUserProperties',
+  'base': 'CryptodevBackendProperties',
+  'data': { 'chardev': 'str' } }
+
+##
+# @DBusVMStateProperties:
+#
+# Properties for dbus-vmstate objects.
+#
+# @addr: the name of the DBus bus to connect to
+#
+# @id-list: a comma separated list of DBus IDs of helpers whose data
+#     should be included in the VM state on migration
+#
+# Since: 5.0
+##
+{ 'struct': 'DBusVMStateProperties',
+  'data': { 'addr': 'str' ,
+            '*id-list': 'str' } }
+
+##
+# @NetfilterInsert:
+#
+# Indicates where to insert a netfilter relative to a given other
+# filter.
+#
+# @before: insert before the specified filter
+#
+# @behind: insert behind the specified filter
+#
+# Since: 5.0
+##
+{ 'enum': 'NetfilterInsert',
+  'data': [ 'before', 'behind' ] }
+
+##
+# @NetfilterProperties:
+#
+# Properties for objects of classes derived from netfilter.
+#
+# @netdev: id of the network device backend to filter
+#
+# @queue: indicates which queue(s) to filter (default: all)
+#
+# @status: indicates whether the filter is enabled ("on") or disabled
+#     ("off") (default: "on")
+#
+# @position: specifies where the filter should be inserted in the
+#     filter list.  "head" means the filter is inserted at the head of
+#     the filter list, before any existing filters.  "tail" means the
+#     filter is inserted at the tail of the filter list, behind any
+#     existing filters (default). "id=<id>" means the filter is
+#     inserted before or behind the filter specified by <id>,
+#     depending on the @insert property.  (default: "tail")
+#
+# @insert: where to insert the filter relative to the filter given in
+#     @position.  Ignored if @position is "head" or "tail".
+#     (default: behind)
+#
+# Since: 2.5
+##
+{ 'struct': 'NetfilterProperties',
+  'data': { 'netdev': 'str',
+            '*queue': 'NetFilterDirection',
+            '*status': 'str',
+            '*position': 'str',
+            '*insert': 'NetfilterInsert' } }
+
+##
+# @FilterBufferProperties:
+#
+# Properties for filter-buffer objects.
+#
+# @interval: a non-zero interval in microseconds.  All packets
+#     arriving in the given interval are delayed until the end of the
+#     interval.
+#
+# Since: 2.5
+##
+{ 'struct': 'FilterBufferProperties',
+  'base': 'NetfilterProperties',
+  'data': { 'interval': 'uint32' } }
+
+##
+# @FilterDumpProperties:
+#
+# Properties for filter-dump objects.
+#
+# @file: the filename where the dumped packets should be stored
+#
+# @maxlen: maximum number of bytes in a packet that are stored
+#     (default: 65536)
+#
+# Since: 2.5
+##
+{ 'struct': 'FilterDumpProperties',
+  'base': 'NetfilterProperties',
+  'data': { 'file': 'str',
+            '*maxlen': 'uint32' } }
+
+##
+# @FilterMirrorProperties:
+#
+# Properties for filter-mirror objects.
+#
+# @outdev: the name of a character device backend to which all
+#     incoming packets are mirrored
+#
+# @vnet_hdr_support: if true, vnet header support is enabled
+#     (default: false)
+#
+# Since: 2.6
+##
+{ 'struct': 'FilterMirrorProperties',
+  'base': 'NetfilterProperties',
+  'data': { 'outdev': 'str',
+            '*vnet_hdr_support': 'bool' } }
+
+##
+# @FilterRedirectorProperties:
+#
+# Properties for filter-redirector objects.
+#
+# At least one of @indev or @outdev must be present.  If both are
+# present, they must not refer to the same character device backend.
+#
+# @indev: the name of a character device backend from which packets
+#     are received and redirected to the filtered network device
+#
+# @outdev: the name of a character device backend to which all
+#     incoming packets are redirected
+#
+# @vnet_hdr_support: if true, vnet header support is enabled
+#     (default: false)
+#
+# Since: 2.6
+##
+{ 'struct': 'FilterRedirectorProperties',
+  'base': 'NetfilterProperties',
+  'data': { '*indev': 'str',
+            '*outdev': 'str',
+            '*vnet_hdr_support': 'bool' } }
+
+##
+# @FilterRewriterProperties:
+#
+# Properties for filter-rewriter objects.
+#
+# @vnet_hdr_support: if true, vnet header support is enabled
+#     (default: false)
+#
+# Since: 2.8
+##
+{ 'struct': 'FilterRewriterProperties',
+  'base': 'NetfilterProperties',
+  'data': { '*vnet_hdr_support': 'bool' } }
+
+##
+# @InputBarrierProperties:
+#
+# Properties for input-barrier objects.
+#
+# @name: the screen name as declared in the screens section of
+#     barrier.conf
+#
+# @server: hostname of the Barrier server (default: "localhost")
+#
+# @port: TCP port of the Barrier server (default: "24800")
+#
+# @x-origin: x coordinate of the leftmost pixel on the guest screen
+#     (default: "0")
+#
+# @y-origin: y coordinate of the topmost pixel on the guest screen
+#     (default: "0")
+#
+# @width: the width of secondary screen in pixels (default: "1920")
+#
+# @height: the height of secondary screen in pixels (default: "1080")
+#
+# Since: 4.2
+##
+{ 'struct': 'InputBarrierProperties',
+  'data': { 'name': 'str',
+            '*server': 'str',
+            '*port': 'str',
+            '*x-origin': 'str',
+            '*y-origin': 'str',
+            '*width': 'str',
+            '*height': 'str' } }
+
+##
+# @InputLinuxProperties:
+#
+# Properties for input-linux objects.
+#
+# @evdev: the path of the host evdev device to use
+#
+# @grab_all: if true, grab is toggled for all devices (e.g. both
+#     keyboard and mouse) instead of just one device (default: false)
+#
+# @repeat: enables auto-repeat events (default: false)
+#
+# @grab-toggle: the key or key combination that toggles device grab
+#     (default: ctrl-ctrl)
+#
+# Since: 2.6
+##
+{ 'struct': 'InputLinuxProperties',
+  'data': { 'evdev': 'str',
+            '*grab_all': 'bool',
+            '*repeat': 'bool',
+            '*grab-toggle': 'GrabToggleKeys' } }
+
+##
+# @EventLoopBaseProperties:
+#
+# Common properties for event loops
+#
+# @aio-max-batch: maximum number of requests in a batch for the AIO
+#     engine, 0 means that the engine will use its default.
+#     (default: 0)
+#
+# @thread-pool-min: minimum number of threads reserved in the thread
+#     pool (default:0)
+#
+# @thread-pool-max: maximum number of threads the thread pool can
+#     contain (default:64)
+#
+# Since: 7.1
+##
+{ 'struct': 'EventLoopBaseProperties',
+  'data': { '*aio-max-batch': 'int',
+            '*thread-pool-min': 'int',
+            '*thread-pool-max': 'int' } }
+
+##
+# @IothreadProperties:
+#
+# Properties for iothread objects.
+#
+# @poll-max-ns: the maximum number of nanoseconds to busy wait for
+#     events.  0 means polling is disabled (default: 32768 on POSIX
+#     hosts, 0 otherwise)
+#
+# @poll-grow: the multiplier used to increase the polling time when
+#     the algorithm detects it is missing events due to not polling
+#     long enough.  0 selects a default behaviour (default: 0)
+#
+# @poll-shrink: the divisor used to decrease the polling time when the
+#     algorithm detects it is spending too long polling without
+#     encountering events.  0 selects a default behaviour (default: 0)
+#
+# The @aio-max-batch option is available since 6.1.
+#
+# Since: 2.0
+##
+{ 'struct': 'IothreadProperties',
+  'base': 'EventLoopBaseProperties',
+  'data': { '*poll-max-ns': 'int',
+            '*poll-grow': 'int',
+            '*poll-shrink': 'int' } }
+
+##
+# @MainLoopProperties:
+#
+# Properties for the main-loop object.
+#
+# Since: 7.1
+##
+{ 'struct': 'MainLoopProperties',
+  'base': 'EventLoopBaseProperties',
+  'data': {} }
+
+##
+# @MemoryBackendProperties:
+#
+# Properties for objects of classes derived from memory-backend.
+#
+# @merge: if true, mark the memory as mergeable (default depends on
+#     the machine type)
+#
+# @dump: if true, include the memory in core dumps (default depends on
+#     the machine type)
+#
+# @host-nodes: the list of NUMA host nodes to bind the memory to
+#
+# @policy: the NUMA policy (default: 'default')
+#
+# @prealloc: if true, preallocate memory (default: false)
+#
+# @prealloc-threads: number of CPU threads to use for prealloc
+#     (default: 1)
+#
+# @prealloc-context: thread context to use for creation of
+#     preallocation threads (default: none) (since 7.2)
+#
+# @share: if false, the memory is private to QEMU; if true, it is
+#     shared (default: false)
+#
+# @reserve: if true, reserve swap space (or huge pages) if applicable
+#     (default: true) (since 6.1)
+#
+# @size: size of the memory region in bytes
+#
+# @x-use-canonical-path-for-ramblock-id: if true, the canonical path
+#     is used for ramblock-id.  Disable this for 4.0 machine types or
+#     older to allow migration with newer QEMU versions.
+#     (default: false generally, but true for machine types <= 4.0)
+#
+# Note: prealloc=true and reserve=false cannot be set at the same
+#     time.  With reserve=true, the behavior depends on the operating
+#     system: for example, Linux will not reserve swap space for
+#     shared file mappings -- "not applicable". In contrast,
+#     reserve=false will bail out if it cannot be configured
+#     accordingly.
+#
+# Since: 2.1
+##
+{ 'struct': 'MemoryBackendProperties',
+  'data': { '*dump': 'bool',
+            '*host-nodes': ['uint16'],
+            '*merge': 'bool',
+            '*policy': 'HostMemPolicy',
+            '*prealloc': 'bool',
+            '*prealloc-threads': 'uint32',
+            '*prealloc-context': 'str',
+            '*share': 'bool',
+            '*reserve': 'bool',
+            'size': 'size',
+            '*x-use-canonical-path-for-ramblock-id': 'bool' } }
+
+##
+# @MemoryBackendFileProperties:
+#
+# Properties for memory-backend-file objects.
+#
+# @align: the base address alignment when QEMU mmap(2)s @mem-path.
+#     Some backend stores specified by @mem-path require an alignment
+#     different than the default one used by QEMU, e.g. the device DAX
+#     /dev/dax0.0 requires 2M alignment rather than 4K. In such cases,
+#     users can specify the required alignment via this option.  0
+#     selects a default alignment (currently the page size).
+#     (default: 0)
+#
+# @offset: the offset into the target file that the region starts at.
+#     You can use this option to back multiple regions with a single
+#     file. Must be a multiple of the page size.
+#     (default: 0) (since 8.1)
+#
+# @discard-data: if true, the file contents can be destroyed when QEMU
+#     exits, to avoid unnecessarily flushing data to the backing file.
+#     Note that @discard-data is only an optimization, and QEMU might
+#     not discard file contents if it aborts unexpectedly or is
+#     terminated using SIGKILL. (default: false)
+#
+# @mem-path: the path to either a shared memory or huge page
+#     filesystem mount
+#
+# @pmem: specifies whether the backing file specified by @mem-path is
+#     in host persistent memory that can be accessed using the SNIA
+#     NVM programming model (e.g. Intel NVDIMM).
+#
+# @readonly: if true, the backing file is opened read-only; if false,
+#     it is opened read-write.  (default: false)
+#
+# @rom: whether to create Read Only Memory (ROM) that cannot be modified
+#       by the VM.  Any write attempts to such ROM will be denied.  Most
+#       use cases want writable RAM instead of ROM.  However, selected use
+#       cases, like R/O NVDIMMs, can benefit from ROM.  If set to 'on',
+#       create ROM; if set to 'off', create writable RAM;  if set to
+#       'auto', the value of the @readonly property is used.  This
+#       property is primarily helpful when we want to have proper RAM in
+#       configurations that would traditionally create ROM before this
+#       property was introduced: VM templating, where we want to open a
+#       file readonly (@readonly set to true) and mark the memory to be
+#       private for QEMU (@share set to false).  For this use case, we need
+#       writable RAM instead of ROM, and want to set this property to 'off'.
+#       (default: auto, since 8.2)
+#
+# Since: 2.1
+##
+{ 'struct': 'MemoryBackendFileProperties',
+  'base': 'MemoryBackendProperties',
+  'data': { '*align': 'size',
+            '*offset': 'size',
+            '*discard-data': 'bool',
+            'mem-path': 'str',
+            '*pmem': { 'type': 'bool', 'if': 'CONFIG_LIBPMEM' },
+            '*readonly': 'bool',
+            '*rom': 'OnOffAuto' } }
+
+##
+# @MemoryBackendMemfdProperties:
+#
+# Properties for memory-backend-memfd objects.
+#
+# The @share boolean option is true by default with memfd.
+#
+# @hugetlb: if true, the file to be created resides in the hugetlbfs
+#     filesystem (default: false)
+#
+# @hugetlbsize: the hugetlb page size on systems that support multiple
+#     hugetlb page sizes (it must be a power of 2 value supported by
+#     the system). 0 selects a default page size.  This option is
+#     ignored if @hugetlb is false.  (default: 0)
+#
+# @seal: if true, create a sealed-file, which will block further
+#     resizing of the memory (default: true)
+#
+# Since: 2.12
+##
+{ 'struct': 'MemoryBackendMemfdProperties',
+  'base': 'MemoryBackendProperties',
+  'data': { '*hugetlb': 'bool',
+            '*hugetlbsize': 'size',
+            '*seal': 'bool' } }
+
+##
+# @MemoryBackendEpcProperties:
+#
+# Properties for memory-backend-epc objects.
+#
+# The @share boolean option is true by default with epc
+#
+# The @merge boolean option is false by default with epc
+#
+# The @dump boolean option is false by default with epc
+#
+# Since: 6.2
+##
+{ 'struct': 'MemoryBackendEpcProperties',
+  'base': 'MemoryBackendProperties',
+  'data': {} }
+
+##
+# @PrManagerHelperProperties:
+#
+# Properties for pr-manager-helper objects.
+#
+# @path: the path to a Unix domain socket for connecting to the
+#     external helper
+#
+# Since: 2.11
+##
+{ 'struct': 'PrManagerHelperProperties',
+  'data': { 'path': 'str' } }
+
+##
+# @QtestProperties:
+#
+# Properties for qtest objects.
+#
+# @chardev: the chardev to be used to receive qtest commands on.
+#
+# @log: the path to a log file
+#
+# Since: 6.0
+##
+{ 'struct': 'QtestProperties',
+        'data': { 'chardev': 'str',
+                  '*log': 'str' } }
+
+##
+# @RemoteObjectProperties:
+#
+# Properties for x-remote-object objects.
+#
+# @fd: file descriptor name previously passed via 'getfd' command
+#
+# @devid: the id of the device to be associated with the file
+#     descriptor
+#
+# Since: 6.0
+##
+{ 'struct': 'RemoteObjectProperties',
+  'data': { 'fd': 'str', 'devid': 'str' } }
+
+##
+# @VfioUserServerProperties:
+#
+# Properties for x-vfio-user-server objects.
+#
+# @socket: socket to be used by the libvfio-user library
+#
+# @device: the ID of the device to be emulated at the server
+#
+# Since: 7.1
+##
+{ 'struct': 'VfioUserServerProperties',
+  'data': { 'socket': 'SocketAddress', 'device': 'str' } }
+
+##
+# @RngProperties:
+#
+# Properties for objects of classes derived from rng.
+#
+# @opened: if true, the device is opened immediately when applying
+#     this option and will probably fail when processing the next
+#     option.  Don't use; only provided for compatibility.
+#     (default: false)
+#
+# Features:
+#
+# @deprecated: Member @opened is deprecated.  Setting true doesn't
+#     make sense, and false is already the default.
+#
+# Since: 1.3
+##
+{ 'struct': 'RngProperties',
+  'data': { '*opened': { 'type': 'bool', 'features': ['deprecated'] } } }
+
+##
+# @RngEgdProperties:
+#
+# Properties for rng-egd objects.
+#
+# @chardev: the name of a character device backend that provides the
+#     connection to the RNG daemon
+#
+# Since: 1.3
+##
+{ 'struct': 'RngEgdProperties',
+  'base': 'RngProperties',
+  'data': { 'chardev': 'str' } }
+
+##
+# @RngRandomProperties:
+#
+# Properties for rng-random objects.
+#
+# @filename: the filename of the device on the host to obtain entropy
+#     from (default: "/dev/urandom")
+#
+# Since: 1.3
+##
+{ 'struct': 'RngRandomProperties',
+  'base': 'RngProperties',
+  'data': { '*filename': 'str' } }
+
+##
+# @SevGuestProperties:
+#
+# Properties for sev-guest objects.
+#
+# @sev-device: SEV device to use (default: "/dev/sev")
+#
+# @dh-cert-file: guest owners DH certificate (encoded with base64)
+#
+# @session-file: guest owners session parameters (encoded with base64)
+#
+# @policy: SEV policy value (default: 0x1)
+#
+# @handle: SEV firmware handle (default: 0)
+#
+# @cbitpos: C-bit location in page table entry (default: 0)
+#
+# @reduced-phys-bits: number of bits in physical addresses that become
+#     unavailable when SEV is enabled
+#
+# @kernel-hashes: if true, add hashes of kernel/initrd/cmdline to a
+#     designated guest firmware page for measured boot with -kernel
+#     (default: false) (since 6.2)
+#
+# Since: 2.12
+##
+{ 'struct': 'SevGuestProperties',
+  'data': { '*sev-device': 'str',
+            '*dh-cert-file': 'str',
+            '*session-file': 'str',
+            '*policy': 'uint32',
+            '*handle': 'uint32',
+            '*cbitpos': 'uint32',
+            'reduced-phys-bits': 'uint32',
+            '*kernel-hashes': 'bool' } }
+
+##
+# @ThreadContextProperties:
+#
+# Properties for thread context objects.
+#
+# @cpu-affinity: the list of host CPU numbers used as CPU affinity for
+#     all threads created in the thread context (default: QEMU main
+#     thread CPU affinity)
+#
+# @node-affinity: the list of host node numbers that will be resolved
+#     to a list of host CPU numbers used as CPU affinity.  This is a
+#     shortcut for specifying the list of host CPU numbers belonging
+#     to the host nodes manually by setting @cpu-affinity.
+#     (default: QEMU main thread affinity)
+#
+# Since: 7.2
+##
+{ 'struct': 'ThreadContextProperties',
+  'data': { '*cpu-affinity': ['uint16'],
+            '*node-affinity': ['uint16'] } }
+
+
+##
+# @ObjectType:
+#
+# Features:
+#
+# @unstable: Member @x-remote-object is experimental.
+#
+# Since: 6.0
+##
+{ 'enum': 'ObjectType',
+  'data': [
+    'authz-list',
+    'authz-listfile',
+    'authz-pam',
+    'authz-simple',
+    'can-bus',
+    { 'name': 'can-host-socketcan',
+      'if': 'CONFIG_LINUX' },
+    'colo-compare',
+    'cryptodev-backend',
+    'cryptodev-backend-builtin',
+    'cryptodev-backend-lkcf',
+    { 'name': 'cryptodev-vhost-user',
+      'if': 'CONFIG_VHOST_CRYPTO' },
+    'dbus-vmstate',
+    'filter-buffer',
+    'filter-dump',
+    'filter-mirror',
+    'filter-redirector',
+    'filter-replay',
+    'filter-rewriter',
+    'input-barrier',
+    { 'name': 'input-linux',
+      'if': 'CONFIG_LINUX' },
+    'iothread',
+    'main-loop',
+    { 'name': 'memory-backend-epc',
+      'if': 'CONFIG_LINUX' },
+    'memory-backend-file',
+    { 'name': 'memory-backend-memfd',
+      'if': 'CONFIG_LINUX' },
+    'memory-backend-ram',
+    'pef-guest',
+    { 'name': 'pr-manager-helper',
+      'if': 'CONFIG_LINUX' },
+    'qtest',
+    'rng-builtin',
+    'rng-egd',
+    { 'name': 'rng-random',
+      'if': 'CONFIG_POSIX' },
+    'secret',
+    { 'name': 'secret_keyring',
+      'if': 'CONFIG_SECRET_KEYRING' },
+    'sev-guest',
+    'thread-context',
+    's390-pv-guest',
+    'throttle-group',
+    'tls-creds-anon',
+    'tls-creds-psk',
+    'tls-creds-x509',
+    'tls-cipher-suites',
+    { 'name': 'x-remote-object', 'features': [ 'unstable' ] },
+    { 'name': 'x-vfio-user-server', 'features': [ 'unstable' ] }
+  ] }
+
+##
+# @ObjectOptions:
+#
+# Describes the options of a user creatable QOM object.
 #
 # @qom-type: the class name for the object to be created
 #
 # @id: the name of the new object
 #
-# @props: a dictionary of properties to be passed to the backend. Deprecated
-#         since 5.0, specify the properties on the top level instead. It is an
-#         error to specify the same option both on the top level and in @props.
+# Since: 6.0
+##
+{ 'union': 'ObjectOptions',
+  'base': { 'qom-type': 'ObjectType',
+            'id': 'str' },
+  'discriminator': 'qom-type',
+  'data': {
+      'authz-list':                 'AuthZListProperties',
+      'authz-listfile':             'AuthZListFileProperties',
+      'authz-pam':                  'AuthZPAMProperties',
+      'authz-simple':               'AuthZSimpleProperties',
+      'can-host-socketcan':         { 'type': 'CanHostSocketcanProperties',
+                                      'if': 'CONFIG_LINUX' },
+      'colo-compare':               'ColoCompareProperties',
+      'cryptodev-backend':          'CryptodevBackendProperties',
+      'cryptodev-backend-builtin':  'CryptodevBackendProperties',
+      'cryptodev-backend-lkcf':     'CryptodevBackendProperties',
+      'cryptodev-vhost-user':       { 'type': 'CryptodevVhostUserProperties',
+                                      'if': 'CONFIG_VHOST_CRYPTO' },
+      'dbus-vmstate':               'DBusVMStateProperties',
+      'filter-buffer':              'FilterBufferProperties',
+      'filter-dump':                'FilterDumpProperties',
+      'filter-mirror':              'FilterMirrorProperties',
+      'filter-redirector':          'FilterRedirectorProperties',
+      'filter-replay':              'NetfilterProperties',
+      'filter-rewriter':            'FilterRewriterProperties',
+      'input-barrier':              'InputBarrierProperties',
+      'input-linux':                { 'type': 'InputLinuxProperties',
+                                      'if': 'CONFIG_LINUX' },
+      'iothread':                   'IothreadProperties',
+      'main-loop':                  'MainLoopProperties',
+      'memory-backend-epc':         { 'type': 'MemoryBackendEpcProperties',
+                                      'if': 'CONFIG_LINUX' },
+      'memory-backend-file':        'MemoryBackendFileProperties',
+      'memory-backend-memfd':       { 'type': 'MemoryBackendMemfdProperties',
+                                      'if': 'CONFIG_LINUX' },
+      'memory-backend-ram':         'MemoryBackendProperties',
+      'pr-manager-helper':          { 'type': 'PrManagerHelperProperties',
+                                      'if': 'CONFIG_LINUX' },
+      'qtest':                      'QtestProperties',
+      'rng-builtin':                'RngProperties',
+      'rng-egd':                    'RngEgdProperties',
+      'rng-random':                 { 'type': 'RngRandomProperties',
+                                      'if': 'CONFIG_POSIX' },
+      'secret':                     'SecretProperties',
+      'secret_keyring':             { 'type': 'SecretKeyringProperties',
+                                      'if': 'CONFIG_SECRET_KEYRING' },
+      'sev-guest':                  'SevGuestProperties',
+      'thread-context':             'ThreadContextProperties',
+      'throttle-group':             'ThrottleGroupProperties',
+      'tls-creds-anon':             'TlsCredsAnonProperties',
+      'tls-creds-psk':              'TlsCredsPskProperties',
+      'tls-creds-x509':             'TlsCredsX509Properties',
+      'tls-cipher-suites':          'TlsCredsProperties',
+      'x-remote-object':            'RemoteObjectProperties',
+      'x-vfio-user-server':         'VfioUserServerProperties'
+  } }
+
+##
+# @object-add:
 #
-# Additional arguments depend on qom-type and are passed to the backend
-# unchanged.
+# Create a QOM object.
 #
-# Returns: Nothing on success
-#          Error if @qom-type is not a valid class name
+# Returns: Nothing on success Error if @qom-type is not a valid class
+#     name
 #
 # Since: 2.0
 #
 #      "arguments": { "qom-type": "rng-random", "id": "rng1",
 #                     "filename": "/dev/hwrng" } }
 # <- { "return": {} }
-#
 ##
-{ 'command': 'object-add',
-  'data': {'qom-type': 'str', 'id': 'str', '*props': 'any'},
-  'gen': false } # so we can get the additional arguments
+{ 'command': 'object-add', 'data': 'ObjectOptions', 'boxed': true,
+  'allow-preconfig': true }
 
 ##
 # @object-del:
 #
 # @id: the name of the QOM object to remove
 #
-# Returns: Nothing on success
-#          Error if @id is not a valid id for a QOM object
+# Returns: Nothing on success Error if @id is not a valid id for a QOM
+#     object
 #
 # Since: 2.0
 #
 #
 # -> { "execute": "object-del", "arguments": { "id": "rng1" } }
 # <- { "return": {} }
-#
 ##
-{ 'command': 'object-del', 'data': {'id': 'str'} }
+{ 'command': 'object-del', 'data': {'id': 'str'},
+  'allow-preconfig': true }
index a1d2175a8b3614d237f0cfb0a57f2f680a57cff9..23ebcf7885edf6420060b574eebaac750fa4aed0 100644 (file)
@@ -17,7 +17,7 @@
 #
 # @subnet-prefix: Subnet Prefix
 #
-# @interface-id : Interface ID
+# @interface-id: Interface ID
 #
 # Since: 4.0
 #
@@ -30,7 +30,6 @@
 #         "interface-id": 15880512517475447892,
 #         "gid-status": true,
 #         "subnet-prefix": 33022}}
-#
 ##
 { 'event': 'RDMA_GID_STATUS_CHANGED',
   'data': { 'netdev'        : 'str',
index bfd83d7591446363468dbf61cc8b6fb445f0d2fb..289b2d3658068ec88e8212ec4d075d4a537c3b70 100644 (file)
@@ -1,4 +1,5 @@
 # -*- Mode: Python -*-
+# vim: filetype=python
 #
 
 ##
 #
 # Mode of the replay subsystem.
 #
-# @none: normal execution mode. Replay or record are not enabled.
+# @none: normal execution mode.  Replay or record are not enabled.
 #
-# @record: record mode. All non-deterministic data is written into the
-#          replay log.
+# @record: record mode.  All non-deterministic data is written into
+#     the replay log.
 #
-# @play: replay mode. Non-deterministic data required for system execution
-#        is read from the log.
+# @play: replay mode.  Non-deterministic data required for system
+#     execution is read from the log.
 #
 # Since: 2.5
 ##
 #
 # @mode: current mode.
 #
-# @filename: name of the record/replay log file.
-#            It is present only in record or replay modes, when the log
-#            is recorded or replayed.
+# @filename: name of the record/replay log file.  It is present only
+#     in record or replay modes, when the log is recorded or replayed.
 #
 # @icount: current number of executed instructions.
 #
 # Since: 5.2
-#
 ##
 { 'struct': 'ReplayInfo',
   'data': { 'mode': 'ReplayMode', '*filename': 'str', 'icount': 'int' } }
@@ -47,9 +46,9 @@
 ##
 # @query-replay:
 #
-# Retrieve the record/replay information.
-# It includes current instruction count which may be used for
-# @replay-break and @replay-seek commands.
+# Retrieve the record/replay information.  It includes current
+# instruction count which may be used for @replay-break and
+# @replay-seek commands.
 #
 # Returns: record/replay information.
 #
@@ -59,7 +58,6 @@
 #
 # -> { "execute": "query-replay" }
 # <- { "return": { "mode": "play", "filename": "log.rr", "icount": 220414 } }
-#
 ##
 { 'command': 'query-replay',
   'returns': 'ReplayInfo' }
 ##
 # @replay-break:
 #
-# Set replay breakpoint at instruction count @icount.
-# Execution stops when the specified instruction is reached.
-# There can be at most one breakpoint. When breakpoint is set, any prior
-# one is removed.  The breakpoint may be set only in replay mode and only
-# "in the future", i.e. at instruction counts greater than the current one.
-# The current instruction count can be observed with @query-replay.
+# Set replay breakpoint at instruction count @icount.  Execution stops
+# when the specified instruction is reached.  There can be at most one
+# breakpoint.  When breakpoint is set, any prior one is removed.  The
+# breakpoint may be set only in replay mode and only "in the future",
+# i.e. at instruction counts greater than the current one.  The
+# current instruction count can be observed with @query-replay.
 #
 # @icount: instruction count to stop at
 #
 #
 # Example:
 #
-# -> { "execute": "replay-break", "data": { "icount": 220414 } }
-#
+# -> { "execute": "replay-break", "arguments": { "icount": 220414 } }
+# <- { "return": {} }
 ##
 { 'command': 'replay-break', 'data': { 'icount': 'int' } }
 
 ##
 # @replay-delete-break:
 #
-# Remove replay breakpoint which was set with @replay-break.
-# The command is ignored when there are no replay breakpoints.
+# Remove replay breakpoint which was set with @replay-break.  The
+# command is ignored when there are no replay breakpoints.
 #
 # Since: 5.2
 #
 # Example:
 #
 # -> { "execute": "replay-delete-break" }
-#
+# <- { "return": {} }
 ##
 { 'command': 'replay-delete-break' }
 
 # @replay-seek:
 #
 # Automatically proceed to the instruction count @icount, when
-# replaying the execution. The command automatically loads nearest
+# replaying the execution.  The command automatically loads nearest
 # snapshot and replays the execution to find the desired instruction.
-# When there is no preceding snapshot or the execution is not replayed,
-# then the command fails.
-# icount for the reference may be obtained with @query-replay command.
+# When there is no preceding snapshot or the execution is not
+# replayed, then the command fails.  icount for the reference may be
+# obtained with @query-replay command.
 #
 # @icount: target instruction count
 #
 #
 # Example:
 #
-# -> { "execute": "replay-seek", "data": { "icount": 220414 } }
+# -> { "execute": "replay-seek", "arguments": { "icount": 220414 } }
+# <- { "return": {} }
 ##
 { 'command': 'replay-seek', 'data': { 'icount': 'int' } }
index b48e49a89bb3a4741621281b938dd121cc9ce7d0..31ce0b36f6964da2c369fdb057009b1a53269e38 100644 (file)
@@ -34,7 +34,6 @@
 #
 # -> { "execute": "query-rocker", "arguments": { "name": "sw1" } }
 # <- { "return": {"name": "sw1", "ports": 2, "id": 1327446905938}}
-#
 ##
 { 'command': 'query-rocker',
   'data': { 'name': 'str' },
 #                  {"duplex": "full", "enabled": true, "name": "sw1.2",
 #                   "autoneg": "off", "link-up": true, "speed": 10000}
 #    ]}
-#
 ##
 { 'command': 'query-rocker-ports',
   'data': { 'name': 'str' },
 # @ip-dst: IP header destination address
 #
 # Note: optional members may or may not appear in the flow key
-#       depending if they're relevant to the flow key.
+#     depending if they're relevant to the flow key.
 #
 # Since: 2.4
 ##
 # @ip-tos: IP header TOS field
 #
 # Note: optional members may or may not appear in the flow mask
-#       depending if they're relevant to the flow mask.
+#     depending if they're relevant to the flow mask.
 #
 # Since: 2.4
 ##
 # @out-pport: physical output port
 #
 # Note: optional members may or may not appear in the flow action
-#       depending if they're relevant to the flow action.
+#     depending if they're relevant to the flow action.
 #
 # Since: 2.4
 ##
 #
 # @name: switch name
 #
-# @tbl-id: flow table ID.  If tbl-id is not specified, returns
-#          flow information for all tables.
+# @tbl-id: flow table ID.  If tbl-id is not specified, returns flow
+#     information for all tables.
 #
 # Returns: rocker OF-DPA flow information
 #
 #                  },
 #                  {...more...},
 #    ]}
-#
 ##
 { 'command': 'query-rocker-of-dpa-flows',
   'data': { 'name': 'str', '*tbl-id': 'uint32' },
 # @ttl-check: perform TTL check
 #
 # Note: optional members may or may not appear in the group depending
-#       if they're relevant to the group type.
+#     if they're relevant to the group type.
 #
 # Since: 2.4
 ##
 #
 # @name: switch name
 #
-# @type: group type.  If type is not specified, returns
-#        group information for all group types.
+# @type: group type.  If type is not specified, returns group
+#     information for all group types.
 #
 # Returns: rocker OF-DPA group information
 #
 #                   "pport": 0, "vlan-id": 3840,
 #                   "pop-vlan": 1, "id": 251658240}
 #    ]}
-#
 ##
 { 'command': 'query-rocker-of-dpa-groups',
   'data': { 'name': 'str', '*type': 'uint8' },
index 964c8ef3916c43046ea6cc6ffcb875987d071eb7..f216ba54ec4c16146e0f77f316be61ab42fa7f9b 100644 (file)
 # @finish-migrate: guest is paused to finish the migration process
 #
 # @inmigrate: guest is paused waiting for an incoming migration.  Note
-#             that this state does not tell whether the machine will start at the
-#             end of the migration.  This depends on the command-line -S option and
-#             any invocation of 'stop' or 'cont' that has happened since QEMU was
-#             started.
+#     that this state does not tell whether the machine will start at
+#     the end of the migration.  This depends on the command-line -S
+#     option and any invocation of 'stop' or 'cont' that has happened
+#     since QEMU was started.
 #
-# @internal-error: An internal error that prevents further guest execution
-#                  has occurred
+# @internal-error: An internal error that prevents further guest
+#     execution has occurred
 #
-# @io-error: the last IOP has failed and the device is configured to pause
-#            on I/O errors
+# @io-error: the last IOP has failed and the device is configured to
+#     pause on I/O errors
 #
 # @paused: guest has been paused via the 'stop' command
 #
 #
 # @suspended: guest is suspended (ACPI S3)
 #
-# @watchdog: the watchdog action is configured to pause and has been triggered
+# @watchdog: the watchdog action is configured to pause and has been
+#     triggered
 #
-# @guest-panicked: guest has been panicked as a result of guest OS panic
+# @guest-panicked: guest has been panicked as a result of guest OS
+#     panic
 #
-# @colo: guest is paused to save/restore VM state under colo checkpoint,
-#        VM can not get into this state unless colo capability is enabled
-#        for migration. (since 2.8)
-# @preconfig: QEMU is paused before board specific init callback is executed.
-#             The state is reachable only if the --preconfig CLI option is used.
-#             (Since 3.0)
+# @colo: guest is paused to save/restore VM state under colo
+#     checkpoint, VM can not get into this state unless colo
+#     capability is enabled for migration.  (since 2.8)
 ##
 { 'enum': 'RunState',
   'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused',
             'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm',
             'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
-            'guest-panicked', 'colo', 'preconfig' ] }
+            'guest-panicked', 'colo' ] }
 
 ##
 # @ShutdownCause:
 # @host-ui: Reaction to a UI event, like window close
 #
 # @guest-shutdown: Guest shutdown/suspend request, via ACPI or other
-#                  hardware-specific means
+#     hardware-specific means
 #
 # @guest-reset: Guest reset request, and command line turns that into
-#               a shutdown
+#     a shutdown
 #
-# @guest-panic: Guest panicked, and command line turns that into a shutdown
+# @guest-panic: Guest panicked, and command line turns that into a
+#     shutdown
 #
-# @subsystem-reset: Partial guest reset that does not trigger QMP events and
-#                   ignores --no-reboot. This is useful for sanitizing
-#                   hypercalls on s390 that are used during kexec/kdump/boot
+# @subsystem-reset: Partial guest reset that does not trigger QMP
+#     events and ignores --no-reboot.  This is useful for sanitizing
+#     hypercalls on s390 that are used during kexec/kdump/boot
 #
+# @snapshot-load: A snapshot is being loaded by the record & replay
+#     subsystem.  This value is used only within QEMU.  It doesn't
+#     occur in QMP. (since 7.2)
 ##
 { 'enum': 'ShutdownCause',
   # Beware, shutdown_caused_by_guest() depends on enumeration order
   'data': [ 'none', 'host-error', 'host-qmp-quit', 'host-qmp-system-reset',
             'host-signal', 'host-ui', 'guest-shutdown', 'guest-reset',
-            'guest-panic', 'subsystem-reset'] }
+            'guest-panic', 'subsystem-reset', 'snapshot-load'] }
 
 ##
 # @StatusInfo:
 #
 # @running: true if all VCPUs are runnable, false if not runnable
 #
-# @singlestep: true if VCPUs are in single-step mode
+# @singlestep: true if using TCG with one guest instruction per
+#     translation block
 #
 # @status: the virtual machine @RunState
 #
-# Since:  0.14.0
+# Features:
 #
-# Notes: @singlestep is enabled through the GDB stub
+# @deprecated: Member 'singlestep' is deprecated (with no
+#     replacement).
+#
+# Since: 0.14
+#
+# Notes: @singlestep is enabled on the command line with '-accel
+#     tcg,one-insn-per-tb=on', or with the HMP 'one-insn-per-tb'
+#     command.
 ##
 { 'struct': 'StatusInfo',
-  'data': {'running': 'bool', 'singlestep': 'bool', 'status': 'RunState'} }
+  'data': {'running': 'bool',
+           'singlestep': { 'type': 'bool', 'features': [ 'deprecated' ]},
+           'status': 'RunState'} }
 
 ##
 # @query-status:
 #
 # Returns: @StatusInfo reflecting all VCPUs
 #
-# Since:  0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # <- { "return": { "running": true,
 #                  "singlestep": false,
 #                  "status": "running" } }
-#
 ##
 { 'command': 'query-status', 'returns': 'StatusInfo',
   'allow-preconfig': true }
 ##
 # @SHUTDOWN:
 #
-# Emitted when the virtual machine has shut down, indicating that qemu is
-# about to exit.
+# Emitted when the virtual machine has shut down, indicating that qemu
+# is about to exit.
 #
-# @guest: If true, the shutdown was triggered by a guest request (such as
-#         a guest-initiated ACPI shutdown request or other hardware-specific action)
-#         rather than a host request (such as sending qemu a SIGINT). (since 2.10)
+# @guest: If true, the shutdown was triggered by a guest request (such
+#     as a guest-initiated ACPI shutdown request or other
+#     hardware-specific action) rather than a host request (such as
+#     sending qemu a SIGINT). (since 2.10)
 #
-# @reason: The @ShutdownCause which resulted in the SHUTDOWN. (since 4.0)
+# @reason: The @ShutdownCause which resulted in the SHUTDOWN. (since
+#     4.0)
 #
-# Note: If the command-line option "-no-shutdown" has been specified, qemu will
-#       not exit, and a STOP event will eventually follow the SHUTDOWN event
+# Note: If the command-line option "-no-shutdown" has been specified,
+#     qemu will not exit, and a STOP event will eventually follow the
+#     SHUTDOWN event
 #
-# Since: 0.12.0
+# Since: 0.12
 #
 # Example:
 #
-# <- { "event": "SHUTDOWN", "data": { "guest": true },
+# <- { "event": "SHUTDOWN",
+#      "data": { "guest": true, "reason": "guest-shutdown" },
 #      "timestamp": { "seconds": 1267040730, "microseconds": 682951 } }
-#
 ##
 { 'event': 'SHUTDOWN', 'data': { 'guest': 'bool', 'reason': 'ShutdownCause' } }
 
 ##
 # @POWERDOWN:
 #
-# Emitted when the virtual machine is powered down through the power control
-# system, such as via ACPI.
+# Emitted when the virtual machine is powered down through the power
+# control system, such as via ACPI.
 #
-# Since: 0.12.0
+# Since: 0.12
 #
 # Example:
 #
 # <- { "event": "POWERDOWN",
 #      "timestamp": { "seconds": 1267040730, "microseconds": 682951 } }
-#
 ##
 { 'event': 'POWERDOWN' }
 
 # Emitted when the virtual machine is reset
 #
 # @guest: If true, the reset was triggered by a guest request (such as
-#         a guest-initiated ACPI reboot request or other hardware-specific action)
-#         rather than a host request (such as the QMP command system_reset).
-#         (since 2.10)
+#     a guest-initiated ACPI reboot request or other hardware-specific
+#     action) rather than a host request (such as the QMP command
+#     system_reset). (since 2.10)
 #
 # @reason: The @ShutdownCause of the RESET. (since 4.0)
 #
-# Since: 0.12.0
+# Since: 0.12
 #
 # Example:
 #
-# <- { "event": "RESET", "data": { "guest": false },
+# <- { "event": "RESET",
+#      "data": { "guest": false, "reason": "guest-reset" },
 #      "timestamp": { "seconds": 1267041653, "microseconds": 9518 } }
-#
 ##
 { 'event': 'RESET', 'data': { 'guest': 'bool', 'reason': 'ShutdownCause' } }
 
 #
 # Emitted when the virtual machine is stopped
 #
-# Since: 0.12.0
+# Since: 0.12
 #
 # Example:
 #
 # <- { "event": "STOP",
 #      "timestamp": { "seconds": 1267041730, "microseconds": 281295 } }
-#
 ##
 { 'event': 'STOP' }
 
 #
 # Emitted when the virtual machine resumes execution
 #
-# Since: 0.12.0
+# Since: 0.12
 #
 # Example:
 #
 # <- { "event": "RESUME",
 #      "timestamp": { "seconds": 1271770767, "microseconds": 582542 } }
-#
 ##
 { 'event': 'RESUME' }
 
 ##
 # @SUSPEND:
 #
-# Emitted when guest enters a hardware suspension state, for example, S3 state,
-# which is sometimes called standby state
+# Emitted when guest enters a hardware suspension state, for example,
+# S3 state, which is sometimes called standby state
 #
 # Since: 1.1
 #
 #
 # <- { "event": "SUSPEND",
 #      "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
-#
 ##
 { 'event': 'SUSPEND' }
 
 ##
 # @SUSPEND_DISK:
 #
-# Emitted when guest enters a hardware suspension state with data saved on
-# disk, for example, S4 state, which is sometimes called hibernate state
+# Emitted when guest enters a hardware suspension state with data
+# saved on disk, for example, S4 state, which is sometimes called
+# hibernate state
 #
-# Note: QEMU shuts down (similar to event @SHUTDOWN) when entering this state
+# Note: QEMU shuts down (similar to event @SHUTDOWN) when entering
+#     this state
 #
 # Since: 1.2
 #
 # Example:
 #
-# <-   { "event": "SUSPEND_DISK",
-#        "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
-#
+# <- { "event": "SUSPEND_DISK",
+#      "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
 ##
 { 'event': 'SUSPEND_DISK' }
 
 ##
 # @WAKEUP:
 #
-# Emitted when the guest has woken up from suspend state and is running
+# Emitted when the guest has woken up from suspend state and is
+# running
 #
 # Since: 1.1
 #
 #
 # <- { "event": "WAKEUP",
 #      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
-#
 ##
 { 'event': 'WAKEUP' }
 
 #
 # @action: action that has been taken
 #
-# Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
-#       followed respectively by the RESET, SHUTDOWN, or STOP events
+# Note: If action is "reset", "shutdown", or "pause" the WATCHDOG
+#     event is followed respectively by the RESET, SHUTDOWN, or STOP
+#     events
 #
 # Note: This event is rate-limited.
 #
-# Since: 0.13.0
+# Since: 0.13
 #
 # Example:
 #
 # <- { "event": "WATCHDOG",
 #      "data": { "action": "reset" },
 #      "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
-#
 ##
 { 'event': 'WATCHDOG',
   'data': { 'action': 'WatchdogAction' } }
 ##
 # @WatchdogAction:
 #
-# An enumeration of the actions taken when the watchdog device's timer is
-# expired
+# An enumeration of the actions taken when the watchdog device's timer
+# is expired
 #
 # @reset: system resets
 #
-# @shutdown: system shutdown, note that it is similar to @powerdown, which
-#            tries to set to system status and notify guest
+# @shutdown: system shutdown, note that it is similar to @powerdown,
+#     which tries to set to system status and notify guest
 #
 # @poweroff: system poweroff, the emulator program exits
 #
 #
 # @none: nothing is done
 #
-# @inject-nmi: a non-maskable interrupt is injected into the first VCPU (all
-#              VCPUS on x86) (since 2.4)
+# @inject-nmi: a non-maskable interrupt is injected into the first
+#     VCPU (all VCPUS on x86) (since 2.4)
 #
 # Since: 2.1
 ##
   'data': [ 'reset', 'shutdown', 'poweroff', 'pause', 'debug', 'none',
             'inject-nmi' ] }
 
+##
+# @RebootAction:
+#
+# Possible QEMU actions upon guest reboot
+#
+# @reset: Reset the VM
+#
+# @shutdown: Shutdown the VM and exit, according to the shutdown
+#     action
+#
+# Since: 6.0
+##
+{ 'enum': 'RebootAction',
+  'data': [ 'reset', 'shutdown' ] }
+
+##
+# @ShutdownAction:
+#
+# Possible QEMU actions upon guest shutdown
+#
+# @poweroff: Shutdown the VM and exit
+#
+# @pause: pause the VM
+#
+# Since: 6.0
+##
+{ 'enum': 'ShutdownAction',
+  'data': [ 'poweroff', 'pause' ] }
+
+##
+# @PanicAction:
+#
+# @none: Continue VM execution
+#
+# @pause: Pause the VM
+#
+# @shutdown: Shutdown the VM and exit, according to the shutdown
+#     action
+#
+# @exit-failure: Shutdown the VM and exit with nonzero status (since
+#     7.1)
+#
+# Since: 6.0
+##
+{ 'enum': 'PanicAction',
+  'data': [ 'pause', 'shutdown', 'exit-failure', 'none' ] }
+
 ##
 # @watchdog-set-action:
 #
 ##
 { 'command': 'watchdog-set-action', 'data' : {'action': 'WatchdogAction'} }
 
+##
+# @set-action:
+#
+# Set the actions that will be taken by the emulator in response to
+# guest events.
+#
+# @reboot: @RebootAction action taken on guest reboot.
+#
+# @shutdown: @ShutdownAction action taken on guest shutdown.
+#
+# @panic: @PanicAction action taken on guest panic.
+#
+# @watchdog: @WatchdogAction action taken when watchdog timer expires
+#     .
+#
+# Returns: Nothing on success.
+#
+# Since: 6.0
+#
+# Example:
+#
+# -> { "execute": "set-action",
+#      "arguments": { "reboot": "shutdown",
+#                     "shutdown" : "pause",
+#                     "panic": "pause",
+#                     "watchdog": "inject-nmi" } }
+# <- { "return": {} }
+##
+{ 'command': 'set-action',
+  'data': { '*reboot': 'RebootAction',
+            '*shutdown': 'ShutdownAction',
+            '*panic': 'PanicAction',
+            '*watchdog': 'WatchdogAction' },
+  'allow-preconfig': true }
+
 ##
 # @GUEST_PANICKED:
 #
 # Example:
 #
 # <- { "event": "GUEST_PANICKED",
-#      "data": { "action": "pause" } }
-#
+#      "data": { "action": "pause" },
+#      "timestamp": { "seconds": 1648245231, "microseconds": 900001 } }
 ##
 { 'event': 'GUEST_PANICKED',
   'data': { 'action': 'GuestPanicAction', '*info': 'GuestPanicInformation' } }
 # Example:
 #
 # <- { "event": "GUEST_CRASHLOADED",
-#      "data": { "action": "run" } }
-#
+#      "data": { "action": "run" },
+#      "timestamp": { "seconds": 1648245259, "microseconds": 893771 } }
 ##
 { 'event': 'GUEST_CRASHLOADED',
   'data': { 'action': 'GuestPanicAction', '*info': 'GuestPanicInformation' } }
 #
 # @pause: system pauses
 #
-# Since: 2.1 (poweroff since 2.8, run since 5.0)
+# @poweroff: system powers off (since 2.8)
+#
+# @run: system continues to run (since 5.0)
+#
+# Since: 2.1
 ##
 { 'enum': 'GuestPanicAction',
   'data': [ 'pause', 'poweroff', 'run' ] }
 {'union': 'GuestPanicInformation',
  'base': {'type': 'GuestPanicInformationType'},
  'discriminator': 'type',
- 'data': { 'hyper-v': 'GuestPanicInformationHyperV',
-           's390': 'GuestPanicInformationS390' } }
+ 'data': {'hyper-v': 'GuestPanicInformationHyperV',
+          's390': 'GuestPanicInformationS390'}}
 
 ##
 # @GuestPanicInformationHyperV:
 # Since: 2.9
 ##
 {'struct': 'GuestPanicInformationHyperV',
- 'data': { 'arg1': 'uint64',
-           'arg2': 'uint64',
-           'arg3': 'uint64',
-           'arg4': 'uint64',
-           'arg5': 'uint64' } }
+ 'data': {'arg1': 'uint64',
+          'arg2': 'uint64',
+          'arg3': 'uint64',
+          'arg4': 'uint64',
+          'arg5': 'uint64'}}
 
 ##
 # @S390CrashReason:
 #
 # @disabled-wait: the CPU has entered a disabled wait state
 #
-# @extint-loop: clock comparator or cpu timer interrupt with new PSW enabled
-#               for external interrupts
+# @extint-loop: clock comparator or cpu timer interrupt with new PSW
+#     enabled for external interrupts
 #
 # @pgmint-loop: program interrupt with BAD new PSW
 #
-# @opint-loop: operation exception interrupt with invalid code at the program
-#              interrupt new PSW
+# @opint-loop: operation exception interrupt with invalid code at the
+#     program interrupt new PSW
 #
 # Since: 2.12
 ##
 # S390 specific guest panic information (PSW)
 #
 # @core: core id of the CPU that crashed
+#
 # @psw-mask: control fields of guest PSW
+#
 # @psw-addr: guest instruction address
+#
 # @reason: guest crash reason
 #
 # Since: 2.12
 ##
 {'struct': 'GuestPanicInformationS390',
- 'data': { 'core': 'uint32',
-           'psw-mask': 'uint64',
-           'psw-addr': 'uint64',
-           'reason': 'S390CrashReason' } }
+ 'data': {'core': 'uint32',
+          'psw-mask': 'uint64',
+          'psw-addr': 'uint64',
+          'reason': 'S390CrashReason'}}
 
 ##
 # @MEMORY_FAILURE:
 #
 # @recipient: recipient is defined as @MemoryFailureRecipient.
 #
-# @action: action that has been taken. action is defined as @MemoryFailureAction.
+# @action: action that has been taken.  action is defined as
+#     @MemoryFailureAction.
 #
-# @flags: flags for MemoryFailureAction. action is defined as @MemoryFailureFlags.
+# @flags: flags for MemoryFailureAction.  action is defined as
+#     @MemoryFailureFlags.
 #
 # Since: 5.2
 #
 # <- { "event": "MEMORY_FAILURE",
 #      "data": { "recipient": "hypervisor",
 #                "action": "fatal",
-#                "flags": { 'action-required': false } }
-#
+#                "flags": { "action-required": false,
+#                           "recursive": false } },
+#      "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
 ##
 { 'event': 'MEMORY_FAILURE',
   'data': { 'recipient': 'MemoryFailureRecipient',
 #
 # Hardware memory failure occurs, handled by recipient.
 #
-# @hypervisor: memory failure at QEMU process address space.
-#              (none guest memory, but used by QEMU itself).
+# @hypervisor: memory failure at QEMU process address space.  (none
+#     guest memory, but used by QEMU itself).
 #
 # @guest: memory failure at guest memory,
 #
 # Since: 5.2
-#
 ##
 { 'enum': 'MemoryFailureRecipient',
   'data': [ 'hypervisor',
             'guest' ] }
 
-
 ##
 # @MemoryFailureAction:
 #
 # Actions taken by QEMU in response to a hardware memory failure.
 #
-# @ignore: the memory failure could be ignored.  This will only be the case
-#          for action-optional failures.
+# @ignore: the memory failure could be ignored.  This will only be the
+#     case for action-optional failures.
 #
-# @inject: memory failure occurred in guest memory, the guest enabled MCE
-#          handling mechanism, and QEMU could inject the MCE into the guest
-#          successfully.
+# @inject: memory failure occurred in guest memory, the guest enabled
+#     MCE handling mechanism, and QEMU could inject the MCE into the
+#     guest successfully.
 #
-# @fatal: the failure is unrecoverable.  This occurs for action-required
-#         failures if the recipient is the hypervisor; QEMU will exit.
+# @fatal: the failure is unrecoverable.  This occurs for
+#     action-required failures if the recipient is the hypervisor;
+#     QEMU will exit.
 #
-# @reset: the failure is unrecoverable but confined to the guest.  This
-#         occurs if the recipient is a guest guest which is not ready
-#         to handle memory failures.
+# @reset: the failure is unrecoverable but confined to the guest.
+#     This occurs if the recipient is a guest guest which is not ready
+#     to handle memory failures.
 #
 # Since: 5.2
-#
 ##
 { 'enum': 'MemoryFailureAction',
   'data': [ 'ignore',
 # Additional information on memory failures.
 #
 # @action-required: whether a memory failure event is action-required
-#                   or action-optional (e.g. a failure during memory scrub).
+#     or action-optional (e.g. a failure during memory scrub).
 #
-# @recursive: whether the failure occurred while the previous
-#             failure was still in progress.
+# @recursive: whether the failure occurred while the previous failure
+#     was still in progress.
 #
 # Since: 5.2
-#
 ##
 { 'struct': 'MemoryFailureFlags',
   'data': { 'action-required': 'bool',
             'recursive': 'bool'} }
+
+##
+# @NotifyVmexitOption:
+#
+# An enumeration of the options specified when enabling notify VM exit
+#
+# @run: enable the feature, do nothing and continue if the notify VM
+#     exit happens.
+#
+# @internal-error: enable the feature, raise a internal error if the
+#     notify VM exit happens.
+#
+# @disable: disable the feature.
+#
+# Since: 7.2
+##
+{ 'enum': 'NotifyVmexitOption',
+  'data': [ 'run', 'internal-error', 'disable' ] }
index 2e83452797f81f30dee300f58241b3a15f9120ca..621315452555f9c6db98a98877fae02ace586871 100644 (file)
 ##
 # @InetSocketAddress:
 #
-# Captures a socket address or address range in the Internet namespace.
+# Captures a socket address or address range in the Internet
+# namespace.
 #
-# @numeric: true if the host/port are guaranteed to be numeric,
-#           false if name resolution should be attempted. Defaults to false.
-#           (Since 2.9)
+# @numeric: true if the host/port are guaranteed to be numeric, false
+#     if name resolution should be attempted.  Defaults to false.
+#     (Since 2.9)
 #
 # @to: If present, this is range of possible addresses, with port
-#      between @port and @to.
+#     between @port and @to.
 #
-# @ipv4: whether to accept IPv4 addresses, default try both IPv4 and IPv6
+# @ipv4: whether to accept IPv4 addresses, default try both IPv4 and
+#     IPv6
 #
-# @ipv6: whether to accept IPv6 addresses, default try both IPv4 and IPv6
+# @ipv6: whether to accept IPv6 addresses, default try both IPv4 and
+#     IPv6
 #
-# @keep-alive: enable keep-alive when connecting to this socket. Not supported
-#              for passive sockets. (Since 4.2)
+# @keep-alive: enable keep-alive when connecting to this socket.  Not
+#     supported for passive sockets.  (Since 4.2)
+#
+# @mptcp: enable multi-path TCP. (Since 6.1)
 #
 # Since: 1.3
 ##
@@ -66,7 +71,8 @@
     '*to': 'uint16',
     '*ipv4': 'bool',
     '*ipv6': 'bool',
-    '*keep-alive': 'bool' } }
+    '*keep-alive': 'bool',
+    '*mptcp': { 'type': 'bool', 'if': 'HAVE_IPPROTO_MPTCP' } } }
 
 ##
 # @UnixSocketAddress:
 # Captures a socket address in the local ("Unix socket") namespace.
 #
 # @path: filesystem path to use
+#
 # @abstract: if true, this is a Linux abstract socket address.  @path
-#            will be prefixed by a null byte, and optionally padded
-#            with null bytes.  Defaults to false.  (Since 5.1)
+#     will be prefixed by a null byte, and optionally padded with null
+#     bytes.  Defaults to false.  (Since 5.1)
+#
 # @tight: if false, pad an abstract socket address with enough null
-#         bytes to make it fill struct sockaddr_un member sun_path.
-#         Defaults to true.  (Since 5.1)
+#     bytes to make it fill struct sockaddr_un member sun_path.
+#     Defaults to true.  (Since 5.1)
 #
 # Since: 1.3
 ##
 { 'struct': 'UnixSocketAddress',
   'data': {
     'path': 'str',
-    '*abstract': { 'type': 'bool', 'if': 'defined(CONFIG_LINUX)' },
-    '*tight': { 'type': 'bool', 'if': 'defined(CONFIG_LINUX)' } } }
+    '*abstract': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
+    '*tight': { 'type': 'bool', 'if': 'CONFIG_LINUX' } } }
 
 ##
 # @VsockSocketAddress:
 # Captures a socket address in the vsock namespace.
 #
 # @cid: unique host identifier
+#
 # @port: port
 #
 # Note: string types are used to allow for possible future hostname or
-#       service resolution support.
+#     service resolution support.
 #
 # Since: 2.8
 ##
     'cid': 'str',
     'port': 'str' } }
 
+##
+# @InetSocketAddressWrapper:
+#
+# Since: 1.3
+##
+{ 'struct': 'InetSocketAddressWrapper',
+  'data': { 'data': 'InetSocketAddress' } }
+
+##
+# @UnixSocketAddressWrapper:
+#
+# Since: 1.3
+##
+{ 'struct': 'UnixSocketAddressWrapper',
+  'data': { 'data': 'UnixSocketAddress' } }
+
+##
+# @VsockSocketAddressWrapper:
+#
+# Since: 2.8
+##
+{ 'struct': 'VsockSocketAddressWrapper',
+  'data': { 'data': 'VsockSocketAddress' } }
+
+##
+# @StringWrapper:
+#
+# Since: 1.3
+##
+{ 'struct': 'StringWrapper',
+  'data': { 'data': 'String' } }
+
 ##
 # @SocketAddressLegacy:
 #
-# Captures the address of a socket, which could also be a named file descriptor
+# Captures the address of a socket, which could also be a named file
+# descriptor
 #
 # Note: This type is deprecated in favor of SocketAddress.  The
-#       difference between SocketAddressLegacy and SocketAddress is that the
-#       latter is a flat union rather than a simple union. Flat is nicer
-#       because it avoids nesting on the wire, i.e. that form has fewer {}.
-
+#     difference between SocketAddressLegacy and SocketAddress is that
+#     the latter has fewer {} on the wire.
 #
 # Since: 1.3
 ##
 { 'union': 'SocketAddressLegacy',
+  'base': { 'type': 'SocketAddressType' },
+  'discriminator': 'type',
   'data': {
-    'inet': 'InetSocketAddress',
-    'unix': 'UnixSocketAddress',
-    'vsock': 'VsockSocketAddress',
-    'fd': 'String' } }
+    'inet': 'InetSocketAddressWrapper',
+    'unix': 'UnixSocketAddressWrapper',
+    'vsock': 'VsockSocketAddressWrapper',
+    'fd': 'StringWrapper' } }
 
 ##
 # @SocketAddressType:
 #
 # Available SocketAddress types
 #
-# @inet:  Internet address
+# @inet: Internet address
 #
-# @unix:  Unix domain socket
+# @unix: Unix domain socket
 #
 # @vsock: VMCI address
 #
-# @fd: decimal is for file descriptor number, otherwise a file descriptor name.
-#      Named file descriptors are permitted in monitor commands, in combination
-#      with the 'getfd' command. Decimal file descriptors are permitted at
-#      startup or other contexts where no monitor context is active.
+# @fd: decimal is for file descriptor number, otherwise a file
+#     descriptor name.  Named file descriptors are permitted in
+#     monitor commands, in combination with the 'getfd' command.
+#     Decimal file descriptors are permitted at startup or other
+#     contexts where no monitor context is active.
 #
 # Since: 2.9
 ##
 # Captures the address of a socket, which could also be a named file
 # descriptor
 #
-# @type:       Transport type
+# @type: Transport type
 #
 # Since: 2.9
 ##
diff --git a/qapi/stats.json b/qapi/stats.json
new file mode 100644 (file)
index 0000000..01791e8
--- /dev/null
@@ -0,0 +1,271 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# Copyright (c) 2022 Oracle and/or its affiliates.
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+##
+# = Statistics
+##
+
+##
+# @StatsType:
+#
+# Enumeration of statistics types
+#
+# @cumulative: stat is cumulative; value can only increase.
+#
+# @instant: stat is instantaneous; value can increase or decrease.
+#
+# @peak: stat is the peak value; value can only increase.
+#
+# @linear-histogram: stat is a linear histogram.
+#
+# @log2-histogram: stat is a logarithmic histogram, with one bucket
+#     for each power of two.
+#
+# Since: 7.1
+##
+{ 'enum' : 'StatsType',
+  'data' : [ 'cumulative', 'instant', 'peak', 'linear-histogram',
+             'log2-histogram' ] }
+
+##
+# @StatsUnit:
+#
+# Enumeration of unit of measurement for statistics
+#
+# @bytes: stat reported in bytes.
+#
+# @seconds: stat reported in seconds.
+#
+# @cycles: stat reported in clock cycles.
+#
+# @boolean: stat is a boolean value.
+#
+# Since: 7.1
+##
+{ 'enum' : 'StatsUnit',
+  'data' : [ 'bytes', 'seconds', 'cycles', 'boolean' ] }
+
+##
+# @StatsProvider:
+#
+# Enumeration of statistics providers.
+#
+# @kvm: since 7.1
+#
+# @cryptodev: since 8.0
+#
+# Since: 7.1
+##
+{ 'enum': 'StatsProvider',
+  'data': [ 'kvm', 'cryptodev' ] }
+
+##
+# @StatsTarget:
+#
+# The kinds of objects on which one can request statistics.
+#
+# @vm: statistics that apply to the entire virtual machine or the
+#     entire QEMU process.
+#
+# @vcpu: statistics that apply to a single virtual CPU.
+#
+# @cryptodev: statistics that apply to a crypto device (since 8.0)
+#
+# Since: 7.1
+##
+{ 'enum': 'StatsTarget',
+  'data': [ 'vm', 'vcpu', 'cryptodev' ] }
+
+##
+# @StatsRequest:
+#
+# Indicates a set of statistics that should be returned by
+# query-stats.
+#
+# @provider: provider for which to return statistics.
+#
+# @names: statistics to be returned (all if omitted).
+#
+# Since: 7.1
+##
+{ 'struct': 'StatsRequest',
+  'data': { 'provider': 'StatsProvider',
+            '*names': [ 'str' ] } }
+
+##
+# @StatsVCPUFilter:
+#
+# @vcpus: list of QOM paths for the desired vCPU objects.
+#
+# Since: 7.1
+##
+{ 'struct': 'StatsVCPUFilter',
+  'data': { '*vcpus': [ 'str' ] } }
+
+##
+# @StatsFilter:
+#
+# The arguments to the query-stats command; specifies a target for
+# which to request statistics and optionally the required subset of
+# information for that target:
+#
+# - which vCPUs to request statistics for
+# - which providers to request statistics from
+# - which named values to return within each provider
+#
+# Since: 7.1
+##
+{ 'union': 'StatsFilter',
+  'base': {
+      'target': 'StatsTarget',
+      '*providers': [ 'StatsRequest' ] },
+  'discriminator': 'target',
+  'data': { 'vcpu': 'StatsVCPUFilter' } }
+
+##
+# @StatsValue:
+#
+# @scalar: single unsigned 64-bit integers.
+#
+# @list: list of unsigned 64-bit integers (used for histograms).
+#
+# Since: 7.1
+##
+{ 'alternate': 'StatsValue',
+  'data': { 'scalar': 'uint64',
+            'boolean': 'bool',
+            'list': [ 'uint64' ] } }
+
+##
+# @Stats:
+#
+# @name: name of stat.
+#
+# @value: stat value.
+#
+# Since: 7.1
+##
+{ 'struct': 'Stats',
+  'data': { 'name': 'str',
+            'value' : 'StatsValue' } }
+
+##
+# @StatsResult:
+#
+# @provider: provider for this set of statistics.
+#
+# @qom-path: Path to the object for which the statistics are returned,
+#     if the object is exposed in the QOM tree
+#
+# @stats: list of statistics.
+#
+# Since: 7.1
+##
+{ 'struct': 'StatsResult',
+  'data': { 'provider': 'StatsProvider',
+            '*qom-path': 'str',
+            'stats': [ 'Stats' ] } }
+
+##
+# @query-stats:
+#
+# Return runtime-collected statistics for objects such as the VM or
+# its vCPUs.
+#
+# The arguments are a StatsFilter and specify the provider and objects
+# to return statistics about.
+#
+# Returns: a list of StatsResult, one for each provider and object
+#     (e.g., for each vCPU).
+#
+# Since: 7.1
+##
+{ 'command': 'query-stats',
+  'data': 'StatsFilter',
+  'boxed': true,
+  'returns': [ 'StatsResult' ] }
+
+##
+# @StatsSchemaValue:
+#
+# Schema for a single statistic.
+#
+# @name: name of the statistic; each element of the schema is uniquely
+#     identified by a target, a provider (both available in
+#     @StatsSchema) and the name.
+#
+# @type: kind of statistic.
+#
+# @unit: basic unit of measure for the statistic; if missing, the
+#     statistic is a simple number or counter.
+#
+# @base: base for the multiple of @unit in which the statistic is
+#     measured.  Only present if @exponent is non-zero; @base and
+#     @exponent together form a SI prefix (e.g., _nano-_ for
+#     ``base=10`` and ``exponent=-9``) or IEC binary prefix (e.g.
+#     _kibi-_ for ``base=2`` and ``exponent=10``)
+#
+# @exponent: exponent for the multiple of @unit in which the statistic
+#     is expressed, or 0 for the basic unit
+#
+# @bucket-size: Present when @type is "linear-histogram", contains the
+#     width of each bucket of the histogram.
+#
+# Since: 7.1
+##
+{ 'struct': 'StatsSchemaValue',
+  'data': { 'name': 'str',
+            'type': 'StatsType',
+            '*unit': 'StatsUnit',
+            '*base': 'int8',
+            'exponent': 'int16',
+            '*bucket-size': 'uint32' } }
+
+##
+# @StatsSchema:
+#
+# Schema for all available statistics for a provider and target.
+#
+# @provider: provider for this set of statistics.
+#
+# @target: the kind of object that can be queried through the
+#     provider.
+#
+# @stats: list of statistics.
+#
+# Since: 7.1
+##
+{ 'struct': 'StatsSchema',
+  'data': { 'provider': 'StatsProvider',
+            'target': 'StatsTarget',
+            'stats': [ 'StatsSchemaValue' ] } }
+
+##
+# @query-stats-schemas:
+#
+# Return the schema for all available runtime-collected statistics.
+#
+# Note: runtime-collected statistics and their names fall outside
+#     QEMU's usual deprecation policies.  QEMU will try to keep the
+#     set of available data stable, together with their names, but
+#     will not guarantee stability at all costs; the same is true of
+#     providers that source statistics externally, e.g. from Linux.
+#     For example, if the same value is being tracked with different
+#     names on different architectures or by different providers, one
+#     of them might be renamed.  A statistic might go away if an
+#     algorithm is changed or some code is removed; changing a default
+#     might cause previously useful statistics to always report 0.
+#     Such changes, however, are expected to be rare.
+#
+# Since: 7.1
+##
+{ 'command': 'query-stats-schemas',
+  'data': { '*provider': 'StatsProvider' },
+  'returns': [ 'StatsSchema' ] }
index b74aa4d44c46abbe326c384c12ac7b71963ca25f..c0cb72dbe4de88dcb473d40155a107339b62abe1 100644 (file)
@@ -14,7 +14,6 @@
 #include "qemu/cutils.h"
 #include "qapi/string-output-visitor.h"
 #include "qapi/visitor-impl.h"
-#include "qemu/host-utils.h"
 #include <math.h>
 #include "qemu/range.h"
 
@@ -75,11 +74,27 @@ static StringOutputVisitor *to_sov(Visitor *v)
 
 static void string_output_set(StringOutputVisitor *sov, char *string)
 {
-    if (sov->string) {
-        g_string_free(sov->string, true);
+    switch (sov->list_mode) {
+    case LM_STARTED:
+        sov->list_mode = LM_IN_PROGRESS;
+        /* fall through */
+    case LM_NONE:
+        if (sov->string) {
+            g_string_free(sov->string, true);
+        }
+        sov->string = g_string_new(string);
+        g_free(string);
+        break;
+
+    case LM_IN_PROGRESS:
+    case LM_END:
+        g_string_append(sov->string, ", ");
+        g_string_append(sov->string, string);
+        break;
+
+    default:
+        abort();
     }
-    sov->string = g_string_new(string);
-    g_free(string);
 }
 
 static void string_output_append(StringOutputVisitor *sov, int64_t a)
@@ -258,7 +273,7 @@ static bool print_type_number(Visitor *v, const char *name, double *obj,
                               Error **errp)
 {
     StringOutputVisitor *sov = to_sov(v);
-    string_output_set(sov, g_strdup_printf("%f", *obj));
+    string_output_set(sov, g_strdup_printf("%.17g", *obj));
     return true;
 }
 
index 6a10c9ed8d27023eacfc48499c534920582820f9..a754455ca5581c1bdd9cb84d204d7664179e1a6b 100644 (file)
 # An enumeration of TPM models
 #
 # @tpm-tis: TPM TIS model
+#
 # @tpm-crb: TPM CRB model (since 2.12)
+#
 # @tpm-spapr: TPM SPAPR model (since 5.0)
 #
 # Since: 1.5
 ##
-{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb', 'tpm-spapr' ] }
+{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb', 'tpm-spapr' ],
+  'if': 'CONFIG_TPM' }
+
 ##
 # @query-tpm-models:
 #
@@ -31,9 +35,9 @@
 #
 # -> { "execute": "query-tpm-models" }
 # <- { "return": [ "tpm-tis", "tpm-crb", "tpm-spapr" ] }
-#
 ##
-{ 'command': 'query-tpm-models', 'returns': ['TpmModel'] }
+{ 'command': 'query-tpm-models', 'returns': ['TpmModel'],
+  'if': 'CONFIG_TPM' }
 
 ##
 # @TpmType:
 # An enumeration of TPM types
 #
 # @passthrough: TPM passthrough type
-# @emulator: Software Emulator TPM type
-#            Since: 2.11
+#
+# @emulator: Software Emulator TPM type (since 2.11)
 #
 # Since: 1.5
 ##
-{ 'enum': 'TpmType', 'data': [ 'passthrough', 'emulator' ] }
+{ 'enum': 'TpmType', 'data': [ 'passthrough', 'emulator' ],
+  'if': 'CONFIG_TPM' }
 
 ##
 # @query-tpm-types:
@@ -61,9 +66,9 @@
 #
 # -> { "execute": "query-tpm-types" }
 # <- { "return": [ "passthrough", "emulator" ] }
-#
 ##
-{ 'command': 'query-tpm-types', 'returns': ['TpmType'] }
+{ 'command': 'query-tpm-types', 'returns': ['TpmType'],
+  'if': 'CONFIG_TPM' }
 
 ##
 # @TPMPassthroughOptions:
 #
 # @path: string describing the path used for accessing the TPM device
 #
-# @cancel-path: string showing the TPM's sysfs cancel file
-#               for cancellation of TPM commands while they are executing
+# @cancel-path: string showing the TPM's sysfs cancel file for
+#     cancellation of TPM commands while they are executing
 #
 # Since: 1.5
 ##
 { 'struct': 'TPMPassthroughOptions',
   'data': { '*path': 'str',
-            '*cancel-path': 'str' } }
+            '*cancel-path': 'str' },
+  'if': 'CONFIG_TPM' }
 
 ##
 # @TPMEmulatorOptions:
 #
 # Since: 2.11
 ##
-{ 'struct': 'TPMEmulatorOptions', 'data': { 'chardev' : 'str' } }
+{ 'struct': 'TPMEmulatorOptions', 'data': { 'chardev' : 'str' },
+  'if': 'CONFIG_TPM' }
+
+##
+# @TPMPassthroughOptionsWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'TPMPassthroughOptionsWrapper',
+  'data': { 'data': 'TPMPassthroughOptions' },
+  'if': 'CONFIG_TPM' }
+
+##
+# @TPMEmulatorOptionsWrapper:
+#
+# Since: 2.11
+##
+{ 'struct': 'TPMEmulatorOptionsWrapper',
+  'data': { 'data': 'TPMEmulatorOptions' },
+  'if': 'CONFIG_TPM' }
 
 ##
 # @TpmTypeOptions:
 #
-# A union referencing different TPM backend types' configuration options
+# A union referencing different TPM backend types' configuration
+# options
 #
-# @type: - 'passthrough' The configuration options for the TPM passthrough type
-#        - 'emulator' The configuration options for TPM emulator backend type
+# @type:
+#     - 'passthrough' The configuration options for the TPM
+#       passthrough type
+#     - 'emulator' The configuration options for TPM emulator backend
+#       type
 #
 # Since: 1.5
 ##
 { 'union': 'TpmTypeOptions',
-   'data': { 'passthrough' : 'TPMPassthroughOptions',
-             'emulator': 'TPMEmulatorOptions' } }
+  'base': { 'type': 'TpmType' },
+  'discriminator': 'type',
+  'data': { 'passthrough' : 'TPMPassthroughOptionsWrapper',
+            'emulator': 'TPMEmulatorOptionsWrapper' },
+  'if': 'CONFIG_TPM' }
 
 ##
 # @TPMInfo:
 { 'struct': 'TPMInfo',
   'data': {'id': 'str',
            'model': 'TpmModel',
-           'options': 'TpmTypeOptions' } }
+           'options': 'TpmTypeOptions' },
+  'if': 'CONFIG_TPM' }
 
 ##
 # @query-tpm:
 #        }
 #      ]
 #    }
-#
 ##
-{ 'command': 'query-tpm', 'returns': ['TPMInfo'] }
+{ 'command': 'query-tpm', 'returns': ['TPMInfo'],
+  'if': 'CONFIG_TPM' }
index 5eb4afa110c77f5a0a09803a47588ae972f09cce..ab108c4f0e34b315d4de49b96871b094bc42563b 100644 (file)
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # qapi-visit-core.c
 visit_free(void *v) "v=%p"
@@ -17,6 +17,8 @@ visit_start_alternate(void *v, const char *name, void *obj, size_t size) "v=%p n
 visit_end_alternate(void *v, void *obj) "v=%p obj=%p"
 
 visit_optional(void *v, const char *name, bool *present) "v=%p name=%s present=%p"
+visit_policy_reject(void *v, const char *name) "v=%p name=%s"
+visit_policy_skip(void *v, const char *name) "v=%p name=%s"
 
 visit_type_enum(void *v, const char *name, int *obj) "v=%p name=%s obj=%p"
 visit_type_int(void *v, const char *name, int64_t *obj) "v=%p name=%s obj=%p"
index 47c68f04da7a349c50eae491a2ff4e01c78cddd1..2077d7e117b8ba381392a708b9a9d47409402d77 100644 (file)
@@ -1,4 +1,5 @@
 # -*- mode: python -*-
+# vim: filetype=python
 #
 # Copyright (C) 2011-2016 Lluís Vilanova <vilanova@ac.upc.edu>
 #
 # Information of a tracing event.
 #
 # @name: Event name.
+#
 # @state: Tracing state.
+#
 # @vcpu: Whether this is a per-vCPU event (since 2.7).
 #
-# An event is per-vCPU if it has the "vcpu" property in the "trace-events"
-# files.
+# Features:
+#
+# @deprecated: Member @vcpu is deprecated, and always ignored.
 #
 # Since: 2.2
 ##
 { 'struct': 'TraceEventInfo',
-  'data': {'name': 'str', 'state': 'TraceEventState', 'vcpu': 'bool'} }
+  'data': {'name': 'str', 'state': 'TraceEventState',
+           'vcpu': { 'type': 'bool', 'features': ['deprecated'] } } }
 
 ##
 # @trace-event-get-state:
 # Query the state of events.
 #
 # @name: Event name pattern (case-sensitive glob).
-# @vcpu: The vCPU to query (any by default; since 2.7).
 #
-# Returns: a list of @TraceEventInfo for the matching events
+# @vcpu: The vCPU to query (since 2.7).
 #
-#          An event is returned if:
+# Features:
 #
-#          - its name matches the @name pattern, and
-#          - if @vcpu is given, the event has the "vcpu" property.
+# @deprecated: Member @vcpu is deprecated, and always ignored.
 #
-#          Therefore, if @vcpu is given, the operation will only match per-vCPU events,
-#          returning their state on the specified vCPU. Special case: if @name is an
-#          exact match, @vcpu is given and the event does not have the "vcpu" property,
-#          an error is returned.
+# Returns: a list of @TraceEventInfo for the matching events
 #
 # Since: 2.2
 #
 #
 # -> { "execute": "trace-event-get-state",
 #      "arguments": { "name": "qemu_memalign" } }
-# <- { "return": [ { "name": "qemu_memalign", "state": "disabled" } ] }
-#
+# <- { "return": [ { "name": "qemu_memalign", "state": "disabled", "vcpu": false } ] }
 ##
 { 'command': 'trace-event-get-state',
-  'data': {'name': 'str', '*vcpu': 'int'},
+  'data': {'name': 'str',
+           '*vcpu': {'type': 'int', 'features': ['deprecated'] } },
   'returns': ['TraceEventInfo'] }
 
 ##
 # Set the dynamic tracing state of events.
 #
 # @name: Event name pattern (case-sensitive glob).
+#
 # @enable: Whether to enable tracing.
+#
 # @ignore-unavailable: Do not match unavailable events with @name.
+#
 # @vcpu: The vCPU to act upon (all by default; since 2.7).
 #
-# An event's state is modified if:
-# - its name matches the @name pattern, and
-# - if @vcpu is given, the event has the "vcpu" property.
+# Features:
 #
-# Therefore, if @vcpu is given, the operation will only match per-vCPU events,
-# setting their state on the specified vCPU. Special case: if @name is an exact
-# match, @vcpu is given and the event does not have the "vcpu" property, an
-# error is returned.
+# @deprecated: Member @vcpu is deprecated, and always ignored.
 #
 # Since: 2.2
 #
 # Example:
 #
 # -> { "execute": "trace-event-set-state",
-#      "arguments": { "name": "qemu_memalign", "enable": "true" } }
+#      "arguments": { "name": "qemu_memalign", "enable": true } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'trace-event-set-state',
   'data': {'name': 'str', 'enable': 'bool', '*ignore-unavailable': 'bool',
-           '*vcpu': 'int'} }
+           '*vcpu': {'type': 'int', 'features': ['deprecated'] } } }
index 15ddebdbc360b79d4663d96c10e2df00cdace52d..cffee2de28d19fe204da93e29c2707ca8285eeff 100644 (file)
 #
 # An enumeration of Transactional completion modes.
 #
-# @individual: Do not attempt to cancel any other Actions if any Actions fail
-#              after the Transaction request succeeds. All Actions that
-#              can complete successfully will do so without waiting on others.
-#              This is the default.
+# @individual: Do not attempt to cancel any other Actions if any
+#     Actions fail after the Transaction request succeeds.  All
+#     Actions that can complete successfully will do so without
+#     waiting on others.  This is the default.
 #
-# @grouped: If any Action fails after the Transaction succeeds, cancel all
-#           Actions. Actions do not complete until all Actions are ready to
-#           complete. May be rejected by Actions that do not support this
-#           completion mode.
+# @grouped: If any Action fails after the Transaction succeeds, cancel
+#     all Actions.  Actions do not complete until all Actions are
+#     ready to complete.  May be rejected by Actions that do not
+#     support this completion mode.
 #
 # Since: 2.5
 ##
 { 'enum': 'ActionCompletionMode',
   'data': [ 'individual', 'grouped' ] }
 
+##
+# @TransactionActionKind:
+#
+# @abort: Since 1.6
+#
+# @block-dirty-bitmap-add: Since 2.5
+#
+# @block-dirty-bitmap-remove: Since 4.2
+#
+# @block-dirty-bitmap-clear: Since 2.5
+#
+# @block-dirty-bitmap-enable: Since 4.0
+#
+# @block-dirty-bitmap-disable: Since 4.0
+#
+# @block-dirty-bitmap-merge: Since 4.0
+#
+# @blockdev-backup: Since 2.3
+#
+# @blockdev-snapshot: Since 2.5
+#
+# @blockdev-snapshot-internal-sync: Since 1.7
+#
+# @blockdev-snapshot-sync: since 1.1
+#
+# @drive-backup: Since 1.6
+#
+# Features:
+#
+# @deprecated: Member @drive-backup is deprecated.  Use member
+#     @blockdev-backup instead.
+#
+# Since: 1.1
+##
+{ 'enum': 'TransactionActionKind',
+  'data': [ 'abort', 'block-dirty-bitmap-add', 'block-dirty-bitmap-remove',
+            'block-dirty-bitmap-clear', 'block-dirty-bitmap-enable',
+            'block-dirty-bitmap-disable', 'block-dirty-bitmap-merge',
+            'blockdev-backup', 'blockdev-snapshot',
+            'blockdev-snapshot-internal-sync', 'blockdev-snapshot-sync',
+            { 'name': 'drive-backup', 'features': [ 'deprecated' ] } ] }
+
+##
+# @AbortWrapper:
+#
+# Since: 1.6
+##
+{ 'struct': 'AbortWrapper',
+  'data': { 'data': 'Abort' } }
+
+##
+# @BlockDirtyBitmapAddWrapper:
+#
+# Since: 2.5
+##
+{ 'struct': 'BlockDirtyBitmapAddWrapper',
+  'data': { 'data': 'BlockDirtyBitmapAdd' } }
+
+##
+# @BlockDirtyBitmapWrapper:
+#
+# Since: 2.5
+##
+{ 'struct': 'BlockDirtyBitmapWrapper',
+  'data': { 'data': 'BlockDirtyBitmap' } }
+
+##
+# @BlockDirtyBitmapMergeWrapper:
+#
+# Since: 4.0
+##
+{ 'struct': 'BlockDirtyBitmapMergeWrapper',
+  'data': { 'data': 'BlockDirtyBitmapMerge' } }
+
+##
+# @BlockdevBackupWrapper:
+#
+# Since: 2.3
+##
+{ 'struct': 'BlockdevBackupWrapper',
+  'data': { 'data': 'BlockdevBackup' } }
+
+##
+# @BlockdevSnapshotWrapper:
+#
+# Since: 2.5
+##
+{ 'struct': 'BlockdevSnapshotWrapper',
+  'data': { 'data': 'BlockdevSnapshot' } }
+
+##
+# @BlockdevSnapshotInternalWrapper:
+#
+# Since: 1.7
+##
+{ 'struct': 'BlockdevSnapshotInternalWrapper',
+  'data': { 'data': 'BlockdevSnapshotInternal' } }
+
+##
+# @BlockdevSnapshotSyncWrapper:
+#
+# Since: 1.1
+##
+{ 'struct': 'BlockdevSnapshotSyncWrapper',
+  'data': { 'data': 'BlockdevSnapshotSync' } }
+
+##
+# @DriveBackupWrapper:
+#
+# Since: 1.6
+##
+{ 'struct': 'DriveBackupWrapper',
+  'data': { 'data': 'DriveBackup' } }
+
 ##
 # @TransactionAction:
 #
 # A discriminated record of operations that can be performed with
-# @transaction. Action @type can be:
-#
-# - @abort: since 1.6
-# - @block-dirty-bitmap-add: since 2.5
-# - @block-dirty-bitmap-remove: since 4.2
-# - @block-dirty-bitmap-clear: since 2.5
-# - @block-dirty-bitmap-enable: since 4.0
-# - @block-dirty-bitmap-disable: since 4.0
-# - @block-dirty-bitmap-merge: since 4.0
-# - @blockdev-backup: since 2.3
-# - @blockdev-snapshot: since 2.5
-# - @blockdev-snapshot-internal-sync: since 1.7
-# - @blockdev-snapshot-sync: since 1.1
-# - @drive-backup: since 1.6
+# @transaction.
 #
 # Since: 1.1
 ##
 { 'union': 'TransactionAction',
+  'base': { 'type': 'TransactionActionKind' },
+  'discriminator': 'type',
   'data': {
-       'abort': 'Abort',
-       'block-dirty-bitmap-add': 'BlockDirtyBitmapAdd',
-       'block-dirty-bitmap-remove': 'BlockDirtyBitmap',
-       'block-dirty-bitmap-clear': 'BlockDirtyBitmap',
-       'block-dirty-bitmap-enable': 'BlockDirtyBitmap',
-       'block-dirty-bitmap-disable': 'BlockDirtyBitmap',
-       'block-dirty-bitmap-merge': 'BlockDirtyBitmapMerge',
-       'blockdev-backup': 'BlockdevBackup',
-       'blockdev-snapshot': 'BlockdevSnapshot',
-       'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal',
-       'blockdev-snapshot-sync': 'BlockdevSnapshotSync',
-       'drive-backup': 'DriveBackup'
+       'abort': 'AbortWrapper',
+       'block-dirty-bitmap-add': 'BlockDirtyBitmapAddWrapper',
+       'block-dirty-bitmap-remove': 'BlockDirtyBitmapWrapper',
+       'block-dirty-bitmap-clear': 'BlockDirtyBitmapWrapper',
+       'block-dirty-bitmap-enable': 'BlockDirtyBitmapWrapper',
+       'block-dirty-bitmap-disable': 'BlockDirtyBitmapWrapper',
+       'block-dirty-bitmap-merge': 'BlockDirtyBitmapMergeWrapper',
+       'blockdev-backup': 'BlockdevBackupWrapper',
+       'blockdev-snapshot': 'BlockdevSnapshotWrapper',
+       'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternalWrapper',
+       'blockdev-snapshot-sync': 'BlockdevSnapshotSyncWrapper',
+       'drive-backup': 'DriveBackupWrapper'
    } }
 
 ##
 # Optional arguments to modify the behavior of a Transaction.
 #
 # @completion-mode: Controls how jobs launched asynchronously by
-#                   Actions will complete or fail as a group.
-#                   See @ActionCompletionMode for details.
+#     Actions will complete or fail as a group.  See
+#     @ActionCompletionMode for details.
 #
 # Since: 2.5
 ##
 ##
 # @transaction:
 #
-# Executes a number of transactionable QMP commands atomically. If any
-# operation fails, then the entire set of actions will be abandoned and the
-# appropriate error returned.
+# Executes a number of transactionable QMP commands atomically.  If
+# any operation fails, then the entire set of actions will be
+# abandoned and the appropriate error returned.
 #
-# For external snapshots, the dictionary contains the device, the file to use for
-# the new snapshot, and the format.  The default format, if not specified, is
-# qcow2.
+# For external snapshots, the dictionary contains the device, the file
+# to use for the new snapshot, and the format.  The default format, if
+# not specified, is qcow2.
 #
 # Each new snapshot defaults to being created by QEMU (wiping any
-# contents if the file already exists), but it is also possible to reuse
-# an externally-created file.  In the latter case, you should ensure that
-# the new image file has the same contents as the current one; QEMU cannot
-# perform any meaningful check.  Typically this is achieved by using the
-# current image file as the backing file for the new image.
+# contents if the file already exists), but it is also possible to
+# reuse an externally-created file.  In the latter case, you should
+# ensure that the new image file has the same contents as the current
+# one; QEMU cannot perform any meaningful check.  Typically this is
+# achieved by using the current image file as the backing file for the
+# new image.
 #
 # On failure, the original disks pre-snapshot attempt will be used.
 #
-# For internal snapshots, the dictionary contains the device and the snapshot's
-# name.  If an internal snapshot matching name already exists, the request will
-# be rejected.  Only some image formats support it, for example, qcow2, rbd,
-# and sheepdog.
+# For internal snapshots, the dictionary contains the device and the
+# snapshot's name.  If an internal snapshot matching name already
+# exists, the request will be rejected.  Only some image formats
+# support it, for example, qcow2, and rbd,
 #
-# On failure, qemu will try delete the newly created internal snapshot in the
-# transaction.  When an I/O error occurs during deletion, the user needs to fix
-# it later with qemu-img or other command.
+# On failure, qemu will try delete the newly created internal snapshot
+# in the transaction.  When an I/O error occurs during deletion, the
+# user needs to fix it later with qemu-img or other command.
 #
-# @actions: List of @TransactionAction;
-#           information needed for the respective operations.
+# @actions: List of @TransactionAction; information needed for the
+#     respective operations.
 #
 # @properties: structure of additional options to control the
-#              execution of the transaction. See @TransactionProperties
-#              for additional detail.
+#     execution of the transaction.  See @TransactionProperties for
+#     additional detail.
 #
 # Returns: nothing on success
 #
-#          Errors depend on the operations of the transaction
+#     Errors depend on the operations of the transaction
 #
-# Note: The transaction aborts on the first failure.  Therefore, there will be
-#       information on only one failed operation returned in an error condition, and
-#       subsequent actions will not have been attempted.
+# Note: The transaction aborts on the first failure.  Therefore, there
+#     will be information on only one failed operation returned in an
+#     error condition, and subsequent actions will not have been
+#     attempted.
 #
 # Since: 1.1
 #
 #                                      "device": "ide-hd2",
 #                                      "name": "snapshot0" } } ] } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'transaction',
   'data': { 'actions': [ 'TransactionAction' ],
index 6c7b33cb72389988ce71f291c863430ec4e32335..a0158baf23123f5c336c569bb046b3c6f6a52f21 100644 (file)
 # = Remote desktop
 ##
 
+{ 'include': 'common.json' }
 { 'include': 'sockets.json' }
 
 ##
-# @set_password:
+# @DisplayProtocol:
+#
+# Display protocols which support changing password options.
+#
+# Since: 7.0
+##
+{ 'enum': 'DisplayProtocol',
+  'data': [ 'vnc', 'spice' ] }
+
+##
+# @SetPasswordAction:
+#
+# An action to take on changing a password on a connection with active
+# clients.
+#
+# @keep: maintain existing clients
+#
+# @fail: fail the command if clients are connected
+#
+# @disconnect: disconnect existing clients
 #
-# Sets the password of a remote display session.
+# Since: 7.0
+##
+{ 'enum': 'SetPasswordAction',
+  'data': [ 'keep', 'fail', 'disconnect' ] }
+
+##
+# @SetPasswordOptions:
 #
-# @protocol: - 'vnc' to modify the VNC server password
-#            - 'spice' to modify the Spice server password
+# Options for set_password.
+#
+# @protocol:
+#     - 'vnc' to modify the VNC server password
+#     - 'spice' to modify the Spice server password
 #
 # @password: the new password
 #
-# @connected: how to handle existing clients when changing the
-#             password.  If nothing is specified, defaults to 'keep'
-#             'fail' to fail the command if clients are connected
-#             'disconnect' to disconnect existing clients
-#             'keep' to maintain existing clients
+# @connected: How to handle existing clients when changing the
+#     password.  If nothing is specified, defaults to 'keep'. For VNC,
+#     only 'keep' is currently implemented.
+#
+# Since: 7.0
+##
+{ 'union': 'SetPasswordOptions',
+  'base': { 'protocol': 'DisplayProtocol',
+            'password': 'str',
+            '*connected': 'SetPasswordAction' },
+  'discriminator': 'protocol',
+  'data': { 'vnc': 'SetPasswordOptionsVnc' } }
+
+##
+# @SetPasswordOptionsVnc:
 #
-# Returns: - Nothing on success
-#          - If Spice is not enabled, DeviceNotFound
+# Options for set_password specific to the VNC procotol.
 #
-# Since: 0.14.0
+# @display: The id of the display where the password should be
+#     changed.  Defaults to the first.
+#
+# Since: 7.0
+##
+{ 'struct': 'SetPasswordOptionsVnc',
+  'data': { '*display': 'str' } }
+
+##
+# @set_password:
+#
+# Set the password of a remote display server.
+#
+# Returns:
+#     - Nothing on success
+#     - If Spice is not enabled, DeviceNotFound
+#
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "set_password", "arguments": { "protocol": "vnc",
 #                                                "password": "secret" } }
 # <- { "return": {} }
-#
 ##
-{ 'command': 'set_password',
-  'data': {'protocol': 'str', 'password': 'str', '*connected': 'str'} }
+{ 'command': 'set_password', 'boxed': true, 'data': 'SetPasswordOptions' }
 
 ##
-# @expire_password:
+# @ExpirePasswordOptions:
 #
-# Expire the password of a remote display server.
+# General options for expire_password.
 #
-# @protocol: the name of the remote display protocol 'vnc' or 'spice'
+# @protocol:
+#     - 'vnc' to modify the VNC server expiration
+#     - 'spice' to modify the Spice server expiration
 #
 # @time: when to expire the password.
 #
-#        - 'now' to expire the password immediately
-#        - 'never' to cancel password expiration
-#        - '+INT' where INT is the number of seconds from now (integer)
-#        - 'INT' where INT is the absolute time in seconds
+#     - 'now' to expire the password immediately
+#     - 'never' to cancel password expiration
+#     - '+INT' where INT is the number of seconds from now (integer)
+#     - 'INT' where INT is the absolute time in seconds
 #
-# Returns: - Nothing on success
-#          - If @protocol is 'spice' and Spice is not active, DeviceNotFound
+# Notes: Time is relative to the server and currently there is no way
+#     to coordinate server time with client time.  It is not
+#     recommended to use the absolute time version of the @time
+#     parameter unless you're sure you are on the same machine as the
+#     QEMU instance.
 #
-# Since: 0.14.0
+# Since: 7.0
+##
+{ 'union': 'ExpirePasswordOptions',
+  'base': { 'protocol': 'DisplayProtocol',
+            'time': 'str' },
+  'discriminator': 'protocol',
+  'data': { 'vnc': 'ExpirePasswordOptionsVnc' } }
+
+##
+# @ExpirePasswordOptionsVnc:
+#
+# Options for expire_password specific to the VNC procotol.
 #
-# Notes: Time is relative to the server and currently there is no way to
-#        coordinate server time with client time.  It is not recommended to
-#        use the absolute time version of the @time parameter unless you're
-#        sure you are on the same machine as the QEMU instance.
+# @display: The id of the display where the expiration should be
+#     changed.  Defaults to the first.
+#
+# Since: 7.0
+##
+{ 'struct': 'ExpirePasswordOptionsVnc',
+  'data': { '*display': 'str' } }
+
+##
+# @expire_password:
+#
+# Expire the password of a remote display server.
+#
+# Returns:
+#     - Nothing on success
+#     - If @protocol is 'spice' and Spice is not active,
+#       DeviceNotFound
+#
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "expire_password", "arguments": { "protocol": "vnc",
 #                                                   "time": "+60" } }
 # <- { "return": {} }
+##
+{ 'command': 'expire_password', 'boxed': true, 'data': 'ExpirePasswordOptions' }
+
+##
+# @ImageFormat:
+#
+# Supported image format types.
+#
+# @png: PNG format
 #
+# @ppm: PPM format
+#
+# Since: 7.1
 ##
-{ 'command': 'expire_password', 'data': {'protocol': 'str', 'time': 'str'} }
+{ 'enum': 'ImageFormat',
+  'data': ['ppm', 'png'] }
 
 ##
 # @screendump:
 #
-# Write a PPM of the VGA screen to a file.
+# Capture the contents of a screen and write it to a file.
+#
+# @filename: the path of a new file to store the image
 #
-# @filename: the path of a new PPM file to store the image
+# @device: ID of the display device that should be dumped.  If this
+#     parameter is missing, the primary display will be used.  (Since
+#     2.12)
 #
-# @device: ID of the display device that should be dumped. If this parameter
-#          is missing, the primary display will be used. (Since 2.12)
+# @head: head to use in case the device supports multiple heads.  If
+#     this parameter is missing, head #0 will be used.  Also note that
+#     the head can only be specified in conjunction with the device
+#     ID. (Since 2.12)
 #
-# @head: head to use in case the device supports multiple heads. If this
-#        parameter is missing, head #0 will be used. Also note that the head
-#        can only be specified in conjunction with the device ID. (Since 2.12)
+# @format: image format for screendump.  (default: ppm) (Since 7.1)
 #
 # Returns: Nothing on success
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 # -> { "execute": "screendump",
 #      "arguments": { "filename": "/tmp/image" } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'screendump',
-  'data': {'filename': 'str', '*device': 'str', '*head': 'int'},
-  'coroutine': true }
+  'data': {'filename': 'str', '*device': 'str', '*head': 'int',
+           '*format': 'ImageFormat'},
+  'coroutine': true,
+  'if': 'CONFIG_PIXMAN' }
 
 ##
 # == Spice
   'data': { 'host': 'str',
             'port': 'str',
             'family': 'NetworkAddressFamily' },
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SpiceServerInfo:
 { 'struct': 'SpiceServerInfo',
   'base': 'SpiceBasicInfo',
   'data': { '*auth': 'str' },
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SpiceChannel:
 #
 # Information about a SPICE client channel.
 #
-# @connection-id: SPICE connection id number.  All channels with the same id
-#                 belong to the same SPICE session.
+# @connection-id: SPICE connection id number.  All channels with the
+#     same id belong to the same SPICE session.
 #
 # @channel-type: SPICE channel type number.  "1" is the main control
-#                channel, filter for this one if you want to track spice
-#                sessions only
+#     channel, filter for this one if you want to track spice sessions
+#     only
 #
-# @channel-id: SPICE channel ID number.  Usually "0", might be different when
-#              multiple channels of the same type exist, such as multiple
-#              display channels in a multihead setup
+# @channel-id: SPICE channel ID number.  Usually "0", might be
+#     different when multiple channels of the same type exist, such as
+#     multiple display channels in a multihead setup
 #
 # @tls: true if the channel is encrypted, false otherwise.
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'SpiceChannel',
   'base': 'SpiceBasicInfo',
   'data': {'connection-id': 'int', 'channel-type': 'int', 'channel-id': 'int',
            'tls': 'bool'},
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SpiceQueryMouseMode:
 #
 # @server: Mouse cursor position is determined by the server.
 #
-# @unknown: No information is available about mouse mode used by
-#           the spice server.
+# @unknown: No information is available about mouse mode used by the
+#     spice server.
 #
 # Note: spice/enums.h has a SpiceMouseMode already, hence the name.
 #
 ##
 { 'enum': 'SpiceQueryMouseMode',
   'data': [ 'client', 'server', 'unknown' ],
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SpiceInfo:
 # @enabled: true if the SPICE server is enabled, false otherwise
 #
 # @migrated: true if the last guest migration completed and spice
-#            migration had completed as well. false otherwise. (since 1.4)
+#     migration had completed as well.  false otherwise.  (since 1.4)
 #
 # @host: The hostname the SPICE server is bound to.  This depends on
-#        the name resolution on the host and may be an IP address.
+#     the name resolution on the host and may be an IP address.
 #
 # @port: The SPICE server's port number.
 #
 #
 # @auth: the current authentication type used by the server
 #
-#        - 'none'  if no authentication is being used
-#        - 'spice' uses SASL or direct TLS authentication, depending on command
-#          line options
+#     - 'none'  if no authentication is being used
+#     - 'spice' uses SASL or direct TLS authentication, depending on
+#       command line options
 #
-# @mouse-mode: The mode in which the mouse cursor is displayed currently. Can
-#              be determined by the client or the server, or unknown if spice
-#              server doesn't provide this information. (since: 1.1)
+# @mouse-mode: The mode in which the mouse cursor is displayed
+#     currently.  Can be determined by the client or the server, or
+#     unknown if spice server doesn't provide this information.
+#     (since: 1.1)
 #
 # @channels: a list of @SpiceChannel for each active spice channel
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'SpiceInfo',
   'data': {'enabled': 'bool', 'migrated': 'bool', '*host': 'str', '*port': 'int',
            '*tls-port': 'int', '*auth': 'str', '*compiled-version': 'str',
            'mouse-mode': 'SpiceQueryMouseMode', '*channels': ['SpiceChannel']},
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @query-spice:
 #
 # Returns: @SpiceInfo
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #          "enabled": true,
 #          "auth": "spice",
 #          "port": 5920,
+#          "migrated":false,
 #          "tls-port": 5921,
 #          "host": "0.0.0.0",
+#          "mouse-mode":"client",
 #          "channels": [
 #             {
 #                "port": "54924",
 #          ]
 #       }
 #    }
-#
 ##
 { 'command': 'query-spice', 'returns': 'SpiceInfo',
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SPICE_CONNECTED:
 #
 # @client: client information
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #        "server": { "port": "5920", "family": "ipv4", "host": "127.0.0.1"},
 #        "client": {"port": "52873", "family": "ipv4", "host": "127.0.0.1"}
 #    }}
-#
 ##
 { 'event': 'SPICE_CONNECTED',
   'data': { 'server': 'SpiceBasicInfo',
             'client': 'SpiceBasicInfo' },
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SPICE_INITIALIZED:
 #
-# Emitted after initial handshake and authentication takes place (if any)
-# and the SPICE channel is up and running
+# Emitted after initial handshake and authentication takes place (if
+# any) and the SPICE channel is up and running
 #
 # @server: server information
 #
 # @client: client information
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #                          "connection-id": 1804289383, "host": "127.0.0.1",
 #                          "channel-id": 0, "tls": true}
 #    }}
-#
 ##
 { 'event': 'SPICE_INITIALIZED',
   'data': { 'server': 'SpiceServerInfo',
             'client': 'SpiceChannel' },
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SPICE_DISCONNECTED:
 #
 # @client: client information
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #        "server": { "port": "5920", "family": "ipv4", "host": "127.0.0.1"},
 #        "client": {"port": "52873", "family": "ipv4", "host": "127.0.0.1"}
 #    }}
-#
 ##
 { 'event': 'SPICE_DISCONNECTED',
   'data': { 'server': 'SpiceBasicInfo',
             'client': 'SpiceBasicInfo' },
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SPICE_MIGRATE_COMPLETED:
 #
 # <- { "timestamp": {"seconds": 1290688046, "microseconds": 417172},
 #      "event": "SPICE_MIGRATE_COMPLETED" }
-#
 ##
 { 'event': 'SPICE_MIGRATE_COMPLETED',
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # == VNC
 #
 # @host: IP address
 #
-# @service: The service name of the vnc port. This may depend on the host
-#           system's service database so symbolic names should not be relied
-#           on.
+# @service: The service name of the vnc port.  This may depend on the
+#     host system's service database so symbolic names should not be
+#     relied on.
 #
 # @family: address family
 #
             'service': 'str',
             'family': 'NetworkAddressFamily',
             'websocket': 'bool' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncServerInfo:
 #
 # The network connection information for server
 #
-# @auth: authentication method used for
-#        the plain (non-websocket) VNC server
+# @auth: authentication method used for the plain (non-websocket) VNC
+#     server
 #
 # Since: 2.1
 ##
 { 'struct': 'VncServerInfo',
   'base': 'VncBasicInfo',
   'data': { '*auth': 'str' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncClientInfo:
 # Information about a connected VNC client.
 #
 # @x509_dname: If x509 authentication is in use, the Distinguished
-#              Name of the client.
+#     Name of the client.
 #
 # @sasl_username: If SASL authentication is in use, the SASL username
-#                 used for authentication.
+#     used for authentication.
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'VncClientInfo',
   'base': 'VncBasicInfo',
   'data': { '*x509_dname': 'str', '*sasl_username': 'str' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncInfo:
 #
 # @enabled: true if the VNC server is enabled, false otherwise
 #
-# @host: The hostname the VNC server is bound to.  This depends on
-#        the name resolution on the host and may be an IP address.
+# @host: The hostname the VNC server is bound to.  This depends on the
+#     name resolution on the host and may be an IP address.
 #
-# @family: - 'ipv6' if the host is listening for IPv6 connections
-#          - 'ipv4' if the host is listening for IPv4 connections
-#          - 'unix' if the host is listening on a unix domain socket
-#          - 'unknown' otherwise
+# @family:
+#     - 'ipv6' if the host is listening for IPv6 connections
+#     - 'ipv4' if the host is listening for IPv4 connections
+#     - 'unix' if the host is listening on a unix domain socket
+#     - 'unknown' otherwise
 #
 # @service: The service name of the server's port.  This may depends
-#           on the host system's service database so symbolic names should not
-#           be relied on.
+#     on the host system's service database so symbolic names should
+#     not be relied on.
 #
 # @auth: the current authentication type used by the server
 #
-#        - 'none' if no authentication is being used
-#        - 'vnc' if VNC authentication is being used
-#        - 'vencrypt+plain' if VEncrypt is used with plain text authentication
-#        - 'vencrypt+tls+none' if VEncrypt is used with TLS and no authentication
-#        - 'vencrypt+tls+vnc' if VEncrypt is used with TLS and VNC authentication
-#        - 'vencrypt+tls+plain' if VEncrypt is used with TLS and plain text auth
-#        - 'vencrypt+x509+none' if VEncrypt is used with x509 and no auth
-#        - 'vencrypt+x509+vnc' if VEncrypt is used with x509 and VNC auth
-#        - 'vencrypt+x509+plain' if VEncrypt is used with x509 and plain text auth
-#        - 'vencrypt+tls+sasl' if VEncrypt is used with TLS and SASL auth
-#        - 'vencrypt+x509+sasl' if VEncrypt is used with x509 and SASL auth
-#
-# @clients: a list of @VncClientInfo of all currently connected clients
-#
-# Since: 0.14.0
+#     - 'none' if no authentication is being used
+#     - 'vnc' if VNC authentication is being used
+#     - 'vencrypt+plain' if VEncrypt is used with plain text
+#       authentication
+#     - 'vencrypt+tls+none' if VEncrypt is used with TLS and no
+#       authentication
+#     - 'vencrypt+tls+vnc' if VEncrypt is used with TLS and VNC
+#       authentication
+#     - 'vencrypt+tls+plain' if VEncrypt is used with TLS and plain
+#       text auth
+#     - 'vencrypt+x509+none' if VEncrypt is used with x509 and no auth
+#     - 'vencrypt+x509+vnc' if VEncrypt is used with x509 and VNC auth
+#     - 'vencrypt+x509+plain' if VEncrypt is used with x509 and plain
+#       text auth
+#     - 'vencrypt+tls+sasl' if VEncrypt is used with TLS and SASL auth
+#     - 'vencrypt+x509+sasl' if VEncrypt is used with x509 and SASL
+#       auth
+#
+# @clients: a list of @VncClientInfo of all currently connected
+#     clients
+#
+# Since: 0.14
 ##
 { 'struct': 'VncInfo',
   'data': {'enabled': 'bool', '*host': 'str',
            '*family': 'NetworkAddressFamily',
            '*service': 'str', '*auth': 'str', '*clients': ['VncClientInfo']},
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncPrimaryAuth:
 { 'enum': 'VncPrimaryAuth',
   'data': [ 'none', 'vnc', 'ra2', 'ra2ne', 'tight', 'ultra',
             'tls', 'vencrypt', 'sasl' ],
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncVencryptSubAuth:
             'tls-vnc',   'x509-vnc',
             'tls-plain', 'x509-plain',
             'tls-sasl',  'x509-sasl' ],
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncServerInfo2:
 #
 # @auth: The current authentication type used by the servers
 #
-# @vencrypt: The vencrypt sub authentication type used by the
-#            servers, only specified in case auth == vencrypt.
+# @vencrypt: The vencrypt sub authentication type used by the servers,
+#     only specified in case auth == vencrypt.
 #
 # Since: 2.9
 ##
   'base': 'VncBasicInfo',
   'data': { 'auth'      : 'VncPrimaryAuth',
             '*vencrypt' : 'VncVencryptSubAuth' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncInfo2:
 # @id: vnc server name.
 #
 # @server: A list of @VncBasincInfo describing all listening sockets.
-#          The list can be empty (in case the vnc server is disabled).
-#          It also may have multiple entries: normal + websocket,
-#          possibly also ipv4 + ipv6 in the future.
+#     The list can be empty (in case the vnc server is disabled). It
+#     also may have multiple entries: normal + websocket, possibly
+#     also ipv4 + ipv6 in the future.
 #
-# @clients: A list of @VncClientInfo of all currently connected clients.
-#           The list can be empty, for obvious reasons.
+# @clients: A list of @VncClientInfo of all currently connected
+#     clients.  The list can be empty, for obvious reasons.
 #
-# @auth: The current authentication type used by the non-websockets servers
+# @auth: The current authentication type used by the non-websockets
+#     servers
 #
 # @vencrypt: The vencrypt authentication type used by the servers,
-#            only specified in case auth == vencrypt.
+#     only specified in case auth == vencrypt.
 #
 # @display: The display device the vnc server is linked to.
 #
             'auth'      : 'VncPrimaryAuth',
             '*vencrypt' : 'VncVencryptSubAuth',
             '*display'  : 'str' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @query-vnc:
 #
 # Returns: @VncInfo
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #             {
 #                "host":"127.0.0.1",
 #                "service":"50401",
-#                "family":"ipv4"
+#                "family":"ipv4",
+#                "websocket":false
 #             }
 #          ]
 #       }
 #    }
-#
 ##
 { 'command': 'query-vnc', 'returns': 'VncInfo',
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 ##
 # @query-vnc-servers:
 #
 # Since: 2.3
 ##
 { 'command': 'query-vnc-servers', 'returns': ['VncInfo2'],
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @change-vnc-password:
 #
 # Since: 1.1
 #
-# Notes: An empty password in this command will set the password to the empty
-#        string.  Existing clients are unaffected by executing this command.
+# Notes: An empty password in this command will set the password to
+#     the empty string.  Existing clients are unaffected by executing
+#     this command.
 ##
 { 'command': 'change-vnc-password',
   'data': { 'password': 'str' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VNC_CONNECTED:
 #
 # @client: client information
 #
-# Note: This event is emitted before any authentication takes place, thus
-#       the authentication ID is not provided
+# Note: This event is emitted before any authentication takes place,
+#     thus the authentication ID is not provided
 #
-# Since: 0.13.0
+# Since: 0.13
 #
 # Example:
 #
 # <- { "event": "VNC_CONNECTED",
 #      "data": {
-#            "server": { "auth": "sasl", "family": "ipv4",
+#            "server": { "auth": "sasl", "family": "ipv4", "websocket": false,
 #                        "service": "5901", "host": "0.0.0.0" },
 #            "client": { "family": "ipv4", "service": "58425",
-#                        "host": "127.0.0.1" } },
+#                        "host": "127.0.0.1", "websocket": false } },
 #      "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
-#
 ##
 { 'event': 'VNC_CONNECTED',
   'data': { 'server': 'VncServerInfo',
             'client': 'VncBasicInfo' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VNC_INITIALIZED:
 #
-# Emitted after authentication takes place (if any) and the VNC session is
-# made active
+# Emitted after authentication takes place (if any) and the VNC
+# session is made active
 #
 # @server: server information
 #
 # @client: client information
 #
-# Since: 0.13.0
+# Since: 0.13
 #
 # Example:
 #
 # <-  { "event": "VNC_INITIALIZED",
 #       "data": {
-#            "server": { "auth": "sasl", "family": "ipv4",
+#            "server": { "auth": "sasl", "family": "ipv4", "websocket": false,
 #                        "service": "5901", "host": "0.0.0.0"},
-#            "client": { "family": "ipv4", "service": "46089",
+#            "client": { "family": "ipv4", "service": "46089", "websocket": false,
 #                        "host": "127.0.0.1", "sasl_username": "luiz" } },
 #       "timestamp": { "seconds": 1263475302, "microseconds": 150772 } }
-#
 ##
 { 'event': 'VNC_INITIALIZED',
   'data': { 'server': 'VncServerInfo',
             'client': 'VncClientInfo' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VNC_DISCONNECTED:
 #
 # @client: client information
 #
-# Since: 0.13.0
+# Since: 0.13
 #
 # Example:
 #
 # <- { "event": "VNC_DISCONNECTED",
 #      "data": {
-#            "server": { "auth": "sasl", "family": "ipv4",
+#            "server": { "auth": "sasl", "family": "ipv4", "websocket": false,
 #                        "service": "5901", "host": "0.0.0.0" },
-#            "client": { "family": "ipv4", "service": "58425",
+#            "client": { "family": "ipv4", "service": "58425", "websocket": false,
 #                        "host": "127.0.0.1", "sasl_username": "luiz" } },
 #      "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
-#
 ##
 { 'event': 'VNC_DISCONNECTED',
   'data': { 'server': 'VncServerInfo',
             'client': 'VncClientInfo' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # = Input
 #
 # @current: true if this device is currently receiving mouse events
 #
-# @absolute: true if this device supports absolute coordinates as input
+# @absolute: true if this device supports absolute coordinates as
+#     input
 #
-# Since: 0.14.0
+# Since: 0.14
 ##
 { 'struct': 'MouseInfo',
   'data': {'name': 'str', 'index': 'int', 'current': 'bool',
 #
 # Returns: a list of @MouseInfo for each device
 #
-# Since: 0.14.0
+# Since: 0.14
 #
 # Example:
 #
 #          }
 #       ]
 #    }
-#
 ##
 { 'command': 'query-mice', 'returns': ['MouseInfo'] }
 
 # This is used by the @send-key command.
 #
 # @unmapped: since 2.0
+#
 # @pause: since 2.0
+#
 # @ro: since 2.4
+#
 # @kp_comma: since 2.4
+#
 # @kp_equals: since 2.6
+#
 # @power: since 2.6
+#
 # @hiragana: since 2.9
+#
 # @henkan: since 2.9
+#
 # @yen: since 2.9
 #
 # @sleep: since 2.10
+#
 # @wake: since 2.10
+#
 # @audionext: since 2.10
+#
 # @audioprev: since 2.10
+#
 # @audiostop: since 2.10
+#
 # @audioplay: since 2.10
+#
 # @audiomute: since 2.10
+#
 # @volumeup: since 2.10
+#
 # @volumedown: since 2.10
+#
 # @mediaselect: since 2.10
+#
 # @mail: since 2.10
+#
 # @calculator: since 2.10
+#
 # @computer: since 2.10
+#
 # @ac_home: since 2.10
+#
 # @ac_back: since 2.10
+#
 # @ac_forward: since 2.10
+#
 # @ac_refresh: since 2.10
+#
 # @ac_bookmarks: since 2.10
 #
 # @muhenkan: since 2.12
+#
 # @katakanahiragana: since 2.12
 #
-# 'sysrq' was mistakenly added to hack around the fact that
-# the ps2 driver was not generating correct scancodes sequences
-# when 'alt+print' was pressed. This flaw is now fixed and the
-# 'sysrq' key serves no further purpose. Any further use of
-# 'sysrq' will be transparently changed to 'print', so they
-# are effectively synonyms.
+# @lang1: since 6.1
+#
+# @lang2: since 6.1
+#
+# @f13: since 8.0
+#
+# @f14: since 8.0
+#
+# @f15: since 8.0
+#
+# @f16: since 8.0
+#
+# @f17: since 8.0
+#
+# @f18: since 8.0
 #
-# Since: 1.3.0
+# @f19: since 8.0
 #
+# @f20: since 8.0
+#
+# @f21: since 8.0
+#
+# @f22: since 8.0
+#
+# @f23: since 8.0
+#
+# @f24: since 8.0
+#
+# 'sysrq' was mistakenly added to hack around the fact that the ps2
+# driver was not generating correct scancodes sequences when
+# 'alt+print' was pressed.  This flaw is now fixed and the 'sysrq' key
+# serves no further purpose.  Any further use of 'sysrq' will be
+# transparently changed to 'print', so they are effectively synonyms.
+#
+# Since: 1.3
 ##
 { 'enum': 'QKeyCode',
   'data': [ 'unmapped',
             'audionext', 'audioprev', 'audiostop', 'audioplay', 'audiomute',
             'volumeup', 'volumedown', 'mediaselect',
             'mail', 'calculator', 'computer',
-            'ac_home', 'ac_back', 'ac_forward', 'ac_refresh', 'ac_bookmarks' ] }
+            'ac_home', 'ac_back', 'ac_forward', 'ac_refresh', 'ac_bookmarks',
+            'lang1', 'lang2','f13','f14','f15','f16','f17','f18','f19','f20','f21','f22','f23','f24' ] }
+
+##
+# @KeyValueKind:
+#
+# Since: 1.3
+##
+{ 'enum': 'KeyValueKind',
+  'data': [ 'number', 'qcode' ] }
+
+##
+# @IntWrapper:
+#
+# Since: 1.3
+##
+{ 'struct': 'IntWrapper',
+  'data': { 'data': 'int' } }
+
+##
+# @QKeyCodeWrapper:
+#
+# Since: 1.3
+##
+{ 'struct': 'QKeyCodeWrapper',
+  'data': { 'data': 'QKeyCode' } }
 
 ##
 # @KeyValue:
 #
 # Represents a keyboard key.
 #
-# Since: 1.3.0
+# Since: 1.3
 ##
 { 'union': 'KeyValue',
+  'base': { 'type': 'KeyValueKind' },
+  'discriminator': 'type',
   'data': {
-    'number': 'int',
-    'qcode': 'QKeyCode' } }
+    'number': 'IntWrapper',
+    'qcode': 'QKeyCodeWrapper' } }
 
 ##
 # @send-key:
 #
 # Send keys to guest.
 #
-# @keys: An array of @KeyValue elements. All @KeyValues in this array are
-#        simultaneously sent to the guest. A @KeyValue.number value is sent
-#        directly to the guest, while @KeyValue.qcode must be a valid
-#        @QKeyCode value
+# @keys: An array of @KeyValue elements.  All @KeyValues in this array
+#     are simultaneously sent to the guest.  A @KeyValue.number value
+#     is sent directly to the guest, while @KeyValue.qcode must be a
+#     valid @QKeyCode value
 #
-# @hold-time: time to delay key up events, milliseconds. Defaults
-#             to 100
+# @hold-time: time to delay key up events, milliseconds.  Defaults to
+#     100
 #
-# Returns: - Nothing on success
-#          - If key is unknown or redundant, InvalidParameter
+# Returns:
+#     - Nothing on success
+#     - If key is unknown or redundant, GenericError
 #
-# Since: 1.3.0
+# Since: 1.3
 #
 # Example:
 #
 #                               { "type": "qcode", "data": "alt" },
 #                               { "type": "qcode", "data": "delete" } ] } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'send-key',
   'data': { 'keys': ['KeyValue'], '*hold-time': 'int' } }
 #
 # @extra: rear side button of a 5-button mouse (since 2.9)
 #
+# @touch: screen contact on a multi-touch device (since 8.1)
+#
 # Since: 2.0
 ##
 { 'enum'  : 'InputButton',
   'data'  : [ 'left', 'middle', 'right', 'wheel-up', 'wheel-down', 'side',
-  'extra' ] }
+  'extra', 'wheel-left', 'wheel-right', 'touch' ] }
 
 ##
 # @InputAxis:
 { 'enum'  : 'InputAxis',
   'data'  : [ 'x', 'y' ] }
 
+##
+# @InputMultiTouchType:
+#
+# Type of a multi-touch event.
+#
+# Since: 8.1
+##
+{ 'enum'  : 'InputMultiTouchType',
+  'data'  : [ 'begin', 'update', 'end', 'cancel', 'data' ] }
+
+
 ##
 # @InputKeyEvent:
 #
 # Keyboard input event.
 #
-# @key:    Which key this event is for.
-# @down:   True for key-down and false for key-up events.
+# @key: Which key this event is for.
+#
+# @down: True for key-down and false for key-up events.
 #
 # Since: 2.0
 ##
 # Pointer button input event.
 #
 # @button: Which button this event is for.
-# @down:   True for key-down and false for key-up events.
+#
+# @down: True for key-down and false for key-up events.
 #
 # Since: 2.0
 ##
 # Pointer motion input event.
 #
 # @axis: Which axis is referenced by @value.
-# @value: Pointer position.  For absolute coordinates the
-#         valid range is 0 -> 0x7ffff
+#
+# @value: Pointer position.  For absolute coordinates the valid range
+#     is 0 -> 0x7ffff
 #
 # Since: 2.0
 ##
   'data'  : { 'axis'    : 'InputAxis',
               'value'   : 'int' } }
 
+##
+# @InputMultiTouchEvent:
+#
+# MultiTouch input event.
+#
+# @slot: Which slot has generated the event.
+#
+# @tracking-id: ID to correlate this event with previously generated
+#     events.
+#
+# @axis: Which axis is referenced by @value.
+#
+# @value: Contact position.
+#
+# Since: 8.1
+##
+{ 'struct'  : 'InputMultiTouchEvent',
+  'data'  : { 'type'       : 'InputMultiTouchType',
+              'slot'       : 'int',
+              'tracking-id': 'int',
+              'axis'       : 'InputAxis',
+              'value'      : 'int' } }
+
+##
+# @InputEventKind:
+#
+# @key: a keyboard input event
+#
+# @btn: a pointer button input event
+#
+# @rel: a relative pointer motion input event
+#
+# @abs: an absolute pointer motion input event
+#
+# @mtt: a multi-touch input event
+#
+# Since: 2.0
+##
+{ 'enum': 'InputEventKind',
+  'data': [ 'key', 'btn', 'rel', 'abs', 'mtt' ] }
+
+##
+# @InputKeyEventWrapper:
+#
+# Since: 2.0
+##
+{ 'struct': 'InputKeyEventWrapper',
+  'data': { 'data': 'InputKeyEvent' } }
+
+##
+# @InputBtnEventWrapper:
+#
+# Since: 2.0
+##
+{ 'struct': 'InputBtnEventWrapper',
+  'data': { 'data': 'InputBtnEvent' } }
+
+##
+# @InputMoveEventWrapper:
+#
+# Since: 2.0
+##
+{ 'struct': 'InputMoveEventWrapper',
+  'data': { 'data': 'InputMoveEvent' } }
+
+##
+# @InputMultiTouchEventWrapper:
+#
+# Since: 8.1
+##
+{ 'struct': 'InputMultiTouchEventWrapper',
+  'data': { 'data': 'InputMultiTouchEvent' } }
+
 ##
 # @InputEvent:
 #
 # Input event union.
 #
-# @type: the input type, one of:
-#
-#        - 'key': Input event of Keyboard
-#        - 'btn': Input event of pointer buttons
-#        - 'rel': Input event of relative pointer motion
-#        - 'abs': Input event of absolute pointer motion
+# @type: the type of input event
 #
 # Since: 2.0
 ##
 { 'union' : 'InputEvent',
-  'data'  : { 'key'     : 'InputKeyEvent',
-              'btn'     : 'InputBtnEvent',
-              'rel'     : 'InputMoveEvent',
-              'abs'     : 'InputMoveEvent' } }
+  'base': { 'type': 'InputEventKind' },
+  'discriminator': 'type',
+  'data'  : { 'key'     : 'InputKeyEventWrapper',
+              'btn'     : 'InputBtnEventWrapper',
+              'rel'     : 'InputMoveEventWrapper',
+              'abs'     : 'InputMoveEventWrapper',
+              'mtt'     : 'InputMultiTouchEventWrapper' } }
 
 ##
 # @input-send-event:
 # precedence.
 #
 # @device: display device to send event(s) to.
-# @head: head to send event(s) to, in case the
-#        display device supports multiple scanouts.
+#
+# @head: head to send event(s) to, in case the display device supports
+#     multiple scanouts.
+#
 # @events: List of InputEvent union.
 #
 # Returns: Nothing on success.
 # Since: 2.6
 #
 # Note: The consoles are visible in the qom tree, under
-#       /backend/console[$index]. They have a device link and head property,
-#       so it is possible to map which console belongs to which device and
-#       display.
+#     /backend/console[$index]. They have a device link and head
+#     property, so it is possible to map which console belongs to
+#     which device and display.
 #
-# Example:
+# Examples:
 #
 # 1. Press left mouse button.
 #
 #                { "type": "abs", "data" : { "axis": "x", "value" : 20000 } },
 #                { "type": "abs", "data" : { "axis": "y", "value" : 400 } } ] } }
 # <- { "return": {} }
-#
 ##
 { 'command': 'input-send-event',
   'data': { '*device': 'str',
             '*head'  : 'int',
             'events' : [ 'InputEvent' ] } }
 
-##
-# @GrabToggleKeys:
-#
-# Keys to toggle input-linux between host and guest.
-#
-# Since: 4.0
-#
-##
-{ 'enum': 'GrabToggleKeys',
-  'data': [ 'ctrl-ctrl', 'alt-alt', 'shift-shift','meta-meta', 'scrolllock',
-            'ctrl-scrolllock' ] }
-
 ##
 # @DisplayGTK:
 #
 # GTK display options.
 #
 # @grab-on-hover: Grab keyboard input on mouse hover.
+#
 # @zoom-to-fit: Zoom guest display to fit into the host window.  When
-#               turned off the host window will be resized instead.
-#               In case the display device can notify the guest on
-#               window resizes (virtio-gpu) this will default to "on",
-#               assuming the guest will resize the display to match
-#               the window size then.  Otherwise it defaults to "off".
-#               Since 3.1
+#     turned off the host window will be resized instead.  In case the
+#     display device can notify the guest on window resizes
+#     (virtio-gpu) this will default to "on", assuming the guest will
+#     resize the display to match the window size then.  Otherwise it
+#     defaults to "off". (Since 3.1)
 #
-# Since: 2.12
+# @show-tabs: Display the tab bar for switching between the various
+#     graphical interfaces (e.g. VGA and virtual console character
+#     devices) by default.  (Since 7.1)
+#
+# @show-menubar: Display the main window menubar.  Defaults to "on".
+#     (Since 8.0)
 #
+# Since: 2.12
 ##
 { 'struct'  : 'DisplayGTK',
   'data'    : { '*grab-on-hover' : 'bool',
-                '*zoom-to-fit'   : 'bool'  } }
+                '*zoom-to-fit'   : 'bool',
+                '*show-tabs'     : 'bool',
+                '*show-menubar'  : 'bool'  } }
 
 ##
 # @DisplayEGLHeadless:
 #
 # EGL headless display options.
 #
-# @rendernode: Which DRM render node should be used. Default is the first
-#              available node on the host.
+# @rendernode: Which DRM render node should be used.  Default is the
+#     first available node on the host.
 #
 # Since: 3.1
-#
 ##
 { 'struct'  : 'DisplayEGLHeadless',
   'data'    : { '*rendernode' : 'str' } }
 
- ##
- # @DisplayGLMode:
- #
- # Display OpenGL mode.
- #
- # @off: Disable OpenGL (default).
- # @on: Use OpenGL, pick context type automatically.
- #      Would better be named 'auto' but is called 'on' for backward
- #      compatibility with bool type.
- # @core: Use OpenGL with Core (desktop) Context.
- # @es: Use OpenGL with ES (embedded systems) Context.
- #
- # Since: 3.0
- #
- ##
+##
+# @DisplayDBus:
+#
+# DBus display options.
+#
+# @addr: The D-Bus bus address (default to the session bus).
+#
+# @rendernode: Which DRM render node should be used.  Default is the
+#     first available node on the host.
+#
+# @p2p: Whether to use peer-to-peer connections (accepted through
+#     @add_client).
+#
+# @audiodev: Use the specified DBus audiodev to export audio.
+#
+# Since: 7.0
+##
+{ 'struct'  : 'DisplayDBus',
+  'data'    : { '*rendernode' : 'str',
+                '*addr': 'str',
+                '*p2p': 'bool',
+                '*audiodev': 'str' } }
+
+##
+# @DisplayGLMode:
+#
+# Display OpenGL mode.
+#
+# @off: Disable OpenGL (default).
+#
+# @on: Use OpenGL, pick context type automatically.  Would better be
+#     named 'auto' but is called 'on' for backward compatibility with
+#     bool type.
+#
+# @core: Use OpenGL with Core (desktop) Context.
+#
+# @es: Use OpenGL with ES (embedded systems) Context.
+#
+# Since: 3.0
+##
 { 'enum'    : 'DisplayGLMode',
   'data'    : [ 'off', 'on', 'core', 'es' ] }
 
 #
 # Curses display options.
 #
-# @charset:       Font charset used by guest (default: CP437).
+# @charset: Font charset used by guest (default: CP437).
 #
 # Since: 4.0
-#
 ##
 { 'struct'  : 'DisplayCurses',
   'data'    : { '*charset'       : 'str' } }
 
+##
+# @DisplayCocoa:
+#
+# Cocoa display options.
+#
+# @left-command-key: Enable/disable forwarding of left command key to
+#     guest.  Allows command-tab window switching on the host without
+#     sending this key to the guest when "off". Defaults to "on"
+#
+# @full-grab: Capture all key presses, including system combos.  This
+#     requires accessibility permissions, since it performs a global
+#     grab on key events.  (default: off) See
+#     https://support.apple.com/en-in/guide/mac-help/mh32356/mac
+#
+# @swap-opt-cmd: Swap the Option and Command keys so that their key
+#     codes match their position on non-Mac keyboards and you can use
+#     Meta/Super and Alt where you expect them.  (default: off)
+#
+# @zoom-to-fit: Zoom guest display to fit into the host window. When
+#     turned off the host window will be resized instead. Defaults to
+#     "off". (Since 8.2)
+#
+# Since: 7.0
+##
+{ 'struct': 'DisplayCocoa',
+  'data': {
+      '*left-command-key': 'bool',
+      '*full-grab': 'bool',
+      '*swap-opt-cmd': 'bool',
+      '*zoom-to-fit': 'bool'
+  } }
+
+##
+# @HotKeyMod:
+#
+# Set of modifier keys that need to be held for shortcut key actions.
+#
+# Since: 7.1
+##
+{ 'enum'  : 'HotKeyMod',
+  'data'  : [ 'lctrl-lalt', 'lshift-lctrl-lalt', 'rctrl' ] }
+
+##
+# @DisplaySDL:
+#
+# SDL2 display options.
+#
+# @grab-mod: Modifier keys that should be pressed together with the
+#     "G" key to release the mouse grab.
+#
+# Since: 7.1
+##
+{ 'struct'  : 'DisplaySDL',
+  'data'    : { '*grab-mod'   : 'HotKeyMod' } }
+
 ##
 # @DisplayType:
 #
 # Display (user interface) type.
 #
-# @default: The default user interface, selecting from the first available
-#           of gtk, sdl, cocoa, and vnc.
+# @default: The default user interface, selecting from the first
+#     available of gtk, sdl, cocoa, and vnc.
 #
-# @none: No user interface or video output display. The guest will
-#        still see an emulated graphics card, but its output will not
-#        be displayed to the QEMU user.
+# @none: No user interface or video output display.  The guest will
+#     still see an emulated graphics card, but its output will not be
+#     displayed to the QEMU user.
 #
 # @gtk: The GTK user interface.
 #
 # @sdl: The SDL user interface.
 #
 # @egl-headless: No user interface, offload GL operations to a local
-#                DRI device. Graphical display need to be paired with
-#                VNC or Spice. (Since 3.1)
+#     DRI device.  Graphical display need to be paired with VNC or
+#     Spice.  (Since 3.1)
 #
 # @curses: Display video output via curses.  For graphics device
-#          models which support a text mode, QEMU can display this
-#          output using a curses/ncurses interface. Nothing is
-#          displayed when the graphics device is in graphical mode or
-#          if the graphics device does not support a text
-#          mode. Generally only the VGA device models support text
-#          mode.
+#     models which support a text mode, QEMU can display this output
+#     using a curses/ncurses interface.  Nothing is displayed when the
+#     graphics device is in graphical mode or if the graphics device
+#     does not support a text mode.  Generally only the VGA device
+#     models support text mode.
 #
 # @cocoa: The Cocoa user interface.
 #
 # @spice-app: Set up a Spice server and run the default associated
-#             application to connect to it. The server will redirect
-#             the serial console and QEMU monitors. (Since 4.0)
+#     application to connect to it.  The server will redirect the
+#     serial console and QEMU monitors.  (Since 4.0)
 #
-# Since: 2.12
+# @dbus: Start a D-Bus service for the display.  (Since 7.0)
 #
+# Since: 2.12
 ##
 { 'enum'    : 'DisplayType',
-  'data'    : [ 'default', 'none', 'gtk', 'sdl',
-                'egl-headless', 'curses', 'cocoa',
-                'spice-app'] }
+  'data'    : [
+    { 'name': 'default' },
+    { 'name': 'none' },
+    { 'name': 'gtk', 'if': 'CONFIG_GTK' },
+    { 'name': 'sdl', 'if': 'CONFIG_SDL' },
+    { 'name': 'egl-headless', 'if': 'CONFIG_OPENGL' },
+    { 'name': 'curses', 'if': 'CONFIG_CURSES' },
+    { 'name': 'cocoa', 'if': 'CONFIG_COCOA' },
+    { 'name': 'spice-app', 'if': 'CONFIG_SPICE' },
+    { 'name': 'dbus', 'if': 'CONFIG_DBUS_DISPLAY' }
+  ]
+}
 
 ##
 # @DisplayOptions:
 #
 # Display (user interface) options.
 #
-# @type:          Which DisplayType qemu should use.
-# @full-screen:   Start user interface in fullscreen mode (default: off).
-# @window-close:  Allow to quit qemu with window close button (default: on).
-# @show-cursor:   Force showing the mouse cursor (default: off).
-#                 (since: 5.0)
-# @gl:            Enable OpenGL support (default: off).
+# @type: Which DisplayType qemu should use.
 #
-# Since: 2.12
+# @full-screen: Start user interface in fullscreen mode
+#     (default: off).
+#
+# @window-close: Allow to quit qemu with window close button
+#     (default: on).
 #
+# @show-cursor: Force showing the mouse cursor (default: off).
+#     (since: 5.0)
+#
+# @gl: Enable OpenGL support (default: off).
+#
+# Since: 2.12
 ##
 { 'union'   : 'DisplayOptions',
   'base'    : { 'type'           : 'DisplayType',
                 '*show-cursor'   : 'bool',
                 '*gl'            : 'DisplayGLMode' },
   'discriminator' : 'type',
-  'data'    : { 'gtk'            : 'DisplayGTK',
-                'curses'         : 'DisplayCurses',
-                'egl-headless'   : 'DisplayEGLHeadless'} }
+  'data'    : {
+      'gtk': { 'type': 'DisplayGTK', 'if': 'CONFIG_GTK' },
+      'cocoa': { 'type': 'DisplayCocoa', 'if': 'CONFIG_COCOA' },
+      'curses': { 'type': 'DisplayCurses', 'if': 'CONFIG_CURSES' },
+      'egl-headless': { 'type': 'DisplayEGLHeadless',
+                        'if': 'CONFIG_OPENGL' },
+      'dbus': { 'type': 'DisplayDBus', 'if': 'CONFIG_DBUS_DISPLAY' },
+      'sdl': { 'type': 'DisplaySDL', 'if': 'CONFIG_SDL' }
+  }
+}
 
 ##
 # @query-display-options:
 # Returns: @DisplayOptions
 #
 # Since: 3.1
-#
 ##
 { 'command': 'query-display-options',
   'returns': 'DisplayOptions' }
+
+##
+# @DisplayReloadType:
+#
+# Available DisplayReload types.
+#
+# @vnc: VNC display
+#
+# Since: 6.0
+##
+{ 'enum': 'DisplayReloadType',
+  'data': ['vnc'] }
+
+##
+# @DisplayReloadOptionsVNC:
+#
+# Specify the VNC reload options.
+#
+# @tls-certs: reload tls certs or not.
+#
+# Since: 6.0
+##
+{ 'struct': 'DisplayReloadOptionsVNC',
+  'data': { '*tls-certs': 'bool' } }
+
+##
+# @DisplayReloadOptions:
+#
+# Options of the display configuration reload.
+#
+# @type: Specify the display type.
+#
+# Since: 6.0
+##
+{ 'union': 'DisplayReloadOptions',
+  'base': {'type': 'DisplayReloadType'},
+  'discriminator': 'type',
+  'data': { 'vnc': 'DisplayReloadOptionsVNC' } }
+
+##
+# @display-reload:
+#
+# Reload display configuration.
+#
+# Returns: Nothing on success.
+#
+# Since: 6.0
+#
+# Example:
+#
+# -> { "execute": "display-reload",
+#      "arguments": { "type": "vnc", "tls-certs": true  } }
+# <- { "return": {} }
+##
+{ 'command': 'display-reload',
+  'data': 'DisplayReloadOptions',
+  'boxed' : true }
+
+##
+# @DisplayUpdateType:
+#
+# Available DisplayUpdate types.
+#
+# @vnc: VNC display
+#
+# Since: 7.1
+##
+{ 'enum': 'DisplayUpdateType',
+  'data': ['vnc'] }
+
+##
+# @DisplayUpdateOptionsVNC:
+#
+# Specify the VNC reload options.
+#
+# @addresses: If specified, change set of addresses to listen for
+#     connections.  Addresses configured for websockets are not
+#     touched.
+#
+# Since: 7.1
+##
+{ 'struct': 'DisplayUpdateOptionsVNC',
+  'data': { '*addresses': ['SocketAddress'] } }
+
+##
+# @DisplayUpdateOptions:
+#
+# Options of the display configuration reload.
+#
+# @type: Specify the display type.
+#
+# Since: 7.1
+##
+{ 'union': 'DisplayUpdateOptions',
+  'base': {'type': 'DisplayUpdateType'},
+  'discriminator': 'type',
+  'data': { 'vnc': 'DisplayUpdateOptionsVNC' } }
+
+##
+# @display-update:
+#
+# Update display configuration.
+#
+# Returns: Nothing on success.
+#
+# Since: 7.1
+#
+# Example:
+#
+# -> { "execute": "display-update",
+#      "arguments": { "type": "vnc", "addresses":
+#                     [ { "type": "inet", "host": "0.0.0.0",
+#                         "port": "5901" } ] } }
+# <- { "return": {} }
+##
+{ 'command': 'display-update',
+  'data': 'DisplayUpdateOptions',
+  'boxed' : true }
+
+##
+# @client_migrate_info:
+#
+# Set migration information for remote display.  This makes the server
+# ask the client to automatically reconnect using the new parameters
+# once migration finished successfully.  Only implemented for SPICE.
+#
+# @protocol: must be "spice"
+#
+# @hostname: migration target hostname
+#
+# @port: spice tcp port for plaintext channels
+#
+# @tls-port: spice tcp port for tls-secured channels
+#
+# @cert-subject: server certificate subject
+#
+# Since: 0.14
+#
+# Example:
+#
+# -> { "execute": "client_migrate_info",
+#      "arguments": { "protocol": "spice",
+#                     "hostname": "virt42.lab.kraxel.org",
+#                     "port": 1234 } }
+# <- { "return": {} }
+##
+{ 'command': 'client_migrate_info',
+  'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int',
+            '*tls-port': 'int', '*cert-subject': 'str' } }
diff --git a/qapi/virtio.json b/qapi/virtio.json
new file mode 100644 (file)
index 0000000..e6dcee7
--- /dev/null
@@ -0,0 +1,930 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = Virtio devices
+##
+
+##
+# @VirtioInfo:
+#
+# Basic information about a given VirtIODevice
+#
+# @path: The VirtIODevice's canonical QOM path
+#
+# @name: Name of the VirtIODevice
+#
+# Since: 7.2
+##
+{ 'struct': 'VirtioInfo',
+  'data': { 'path': 'str',
+            'name': 'str' } }
+
+##
+# @x-query-virtio:
+#
+# Returns a list of all realized VirtIODevices
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: List of gathered VirtIODevices
+#
+# Since: 7.2
+#
+# Example:
+#
+# -> { "execute": "x-query-virtio" }
+# <- { "return": [
+#          {
+#              "name": "virtio-input",
+#              "path": "/machine/peripheral-anon/device[4]/virtio-backend"
+#          },
+#          {
+#              "name": "virtio-crypto",
+#              "path": "/machine/peripheral/crypto0/virtio-backend"
+#          },
+#          {
+#              "name": "virtio-scsi",
+#              "path": "/machine/peripheral-anon/device[2]/virtio-backend"
+#          },
+#          {
+#              "name": "virtio-net",
+#              "path": "/machine/peripheral-anon/device[1]/virtio-backend"
+#          },
+#          {
+#              "name": "virtio-serial",
+#              "path": "/machine/peripheral-anon/device[0]/virtio-backend"
+#          }
+#      ]
+#    }
+##
+{ 'command': 'x-query-virtio',
+  'returns': [ 'VirtioInfo' ],
+  'features': [ 'unstable' ] }
+
+##
+# @VhostStatus:
+#
+# Information about a vhost device.  This information will only be
+# displayed if the vhost device is active.
+#
+# @n-mem-sections: vhost_dev n_mem_sections
+#
+# @n-tmp-sections: vhost_dev n_tmp_sections
+#
+# @nvqs: vhost_dev nvqs (number of virtqueues being used)
+#
+# @vq-index: vhost_dev vq_index
+#
+# @features: vhost_dev features
+#
+# @acked-features: vhost_dev acked_features
+#
+# @backend-features: vhost_dev backend_features
+#
+# @protocol-features: vhost_dev protocol_features
+#
+# @max-queues: vhost_dev max_queues
+#
+# @backend-cap: vhost_dev backend_cap
+#
+# @log-enabled: vhost_dev log_enabled flag
+#
+# @log-size: vhost_dev log_size
+#
+# Since: 7.2
+##
+{ 'struct': 'VhostStatus',
+  'data': { 'n-mem-sections': 'int',
+            'n-tmp-sections': 'int',
+            'nvqs': 'uint32',
+            'vq-index': 'int',
+            'features': 'VirtioDeviceFeatures',
+            'acked-features': 'VirtioDeviceFeatures',
+            'backend-features': 'VirtioDeviceFeatures',
+            'protocol-features': 'VhostDeviceProtocols',
+            'max-queues': 'uint64',
+            'backend-cap': 'uint64',
+            'log-enabled': 'bool',
+            'log-size': 'uint64' } }
+
+##
+# @VirtioStatus:
+#
+# Full status of the virtio device with most VirtIODevice members.
+# Also includes the full status of the corresponding vhost device if
+# the vhost device is active.
+#
+# @name: VirtIODevice name
+#
+# @device-id: VirtIODevice ID
+#
+# @vhost-started: VirtIODevice vhost_started flag
+#
+# @guest-features: VirtIODevice guest_features
+#
+# @host-features: VirtIODevice host_features
+#
+# @backend-features: VirtIODevice backend_features
+#
+# @device-endian: VirtIODevice device_endian
+#
+# @num-vqs: VirtIODevice virtqueue count.  This is the number of
+#     active virtqueues being used by the VirtIODevice.
+#
+# @status: VirtIODevice configuration status (VirtioDeviceStatus)
+#
+# @isr: VirtIODevice ISR
+#
+# @queue-sel: VirtIODevice queue_sel
+#
+# @vm-running: VirtIODevice vm_running flag
+#
+# @broken: VirtIODevice broken flag
+#
+# @disabled: VirtIODevice disabled flag
+#
+# @use-started: VirtIODevice use_started flag
+#
+# @started: VirtIODevice started flag
+#
+# @start-on-kick: VirtIODevice start_on_kick flag
+#
+# @disable-legacy-check: VirtIODevice disabled_legacy_check flag
+#
+# @bus-name: VirtIODevice bus_name
+#
+# @use-guest-notifier-mask: VirtIODevice use_guest_notifier_mask flag
+#
+# @vhost-dev: Corresponding vhost device info for a given
+#     VirtIODevice.  Present if the given VirtIODevice has an active
+#     vhost device.
+#
+# Since: 7.2
+##
+{ 'struct': 'VirtioStatus',
+  'data': { 'name': 'str',
+            'device-id': 'uint16',
+            'vhost-started': 'bool',
+            'device-endian': 'str',
+            'guest-features': 'VirtioDeviceFeatures',
+            'host-features': 'VirtioDeviceFeatures',
+            'backend-features': 'VirtioDeviceFeatures',
+            'num-vqs': 'int',
+            'status': 'VirtioDeviceStatus',
+            'isr': 'uint8',
+            'queue-sel': 'uint16',
+            'vm-running': 'bool',
+            'broken': 'bool',
+            'disabled': 'bool',
+            'use-started': 'bool',
+            'started': 'bool',
+            'start-on-kick': 'bool',
+            'disable-legacy-check': 'bool',
+            'bus-name': 'str',
+            'use-guest-notifier-mask': 'bool',
+            '*vhost-dev': 'VhostStatus' } }
+
+##
+# @x-query-virtio-status:
+#
+# Poll for a comprehensive status of a given virtio device
+#
+# @path: Canonical QOM path of the VirtIODevice
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: VirtioStatus of the virtio device
+#
+# Since: 7.2
+#
+# Examples:
+#
+# 1. Poll for the status of virtio-crypto (no vhost-crypto active)
+#
+# -> { "execute": "x-query-virtio-status",
+#      "arguments": { "path": "/machine/peripheral/crypto0/virtio-backend" }
+#    }
+# <- { "return": {
+#          "device-endian": "little",
+#          "bus-name": "",
+#          "disable-legacy-check": false,
+#          "name": "virtio-crypto",
+#          "started": true,
+#          "device-id": 20,
+#          "backend-features": {
+#              "transports": [],
+#              "dev-features": []
+#          },
+#          "start-on-kick": false,
+#          "isr": 1,
+#          "broken": false,
+#          "status": {
+#              "statuses": [
+#                  "VIRTIO_CONFIG_S_ACKNOWLEDGE: Valid virtio device found",
+#                  "VIRTIO_CONFIG_S_DRIVER: Guest OS compatible with device",
+#                  "VIRTIO_CONFIG_S_FEATURES_OK: Feature negotiation complete",
+#                  "VIRTIO_CONFIG_S_DRIVER_OK: Driver setup and ready"
+#              ]
+#          },
+#          "num-vqs": 2,
+#          "guest-features": {
+#              "dev-features": [],
+#              "transports": [
+#                  "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled",
+#                  "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported",
+#                  "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)"
+#              ]
+#          },
+#          "host-features": {
+#              "unknown-dev-features": 1073741824,
+#              "dev-features": [],
+#              "transports": [
+#                  "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled",
+#                  "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported",
+#                  "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)",
+#                  "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts",
+#                  "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. descs. on VQ"
+#              ]
+#          },
+#          "use-guest-notifier-mask": true,
+#          "vm-running": true,
+#          "queue-sel": 1,
+#          "disabled": false,
+#          "vhost-started": false,
+#          "use-started": true
+#      }
+#    }
+#
+# 2. Poll for the status of virtio-net (vhost-net is active)
+#
+# -> { "execute": "x-query-virtio-status",
+#      "arguments": { "path": "/machine/peripheral-anon/device[1]/virtio-backend" }
+#    }
+# <- { "return": {
+#          "device-endian": "little",
+#          "bus-name": "",
+#          "disabled-legacy-check": false,
+#          "name": "virtio-net",
+#          "started": true,
+#          "device-id": 1,
+#          "vhost-dev": {
+#              "n-tmp-sections": 4,
+#              "n-mem-sections": 4,
+#              "max-queues": 1,
+#              "backend-cap": 2,
+#              "log-size": 0,
+#              "backend-features": {
+#                  "dev-features": [],
+#                  "transports": []
+#              },
+#              "nvqs": 2,
+#              "protocol-features": {
+#                  "protocols": []
+#              },
+#              "vq-index": 0,
+#              "log-enabled": false,
+#              "acked-features": {
+#                  "dev-features": [
+#                      "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers"
+#                  ],
+#                  "transports": [
+#                      "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled",
+#                      "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported",
+#                      "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)"
+#                  ]
+#              },
+#              "features": {
+#                  "dev-features": [
+#                      "VHOST_F_LOG_ALL: Logging write descriptors supported",
+#                      "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers"
+#                  ],
+#                  "transports": [
+#                      "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled",
+#                      "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported",
+#                      "VIRTIO_F_IOMMU_PLATFORM: Device can be used on IOMMU platform",
+#                      "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)",
+#                      "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts",
+#                      "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. descs. on VQ"
+#                  ]
+#              }
+#          },
+#          "backend-features": {
+#              "dev-features": [
+#                  "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features negotiation supported",
+#                  "VIRTIO_NET_F_GSO: Handling GSO-type packets supported",
+#                  "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control channel",
+#                  "VIRTIO_NET_F_GUEST_ANNOUNCE: Driver sending gratuitous packets supported",
+#                  "VIRTIO_NET_F_CTRL_RX_EXTRA: Extra RX mode control supported",
+#                  "VIRTIO_NET_F_CTRL_VLAN: Control channel VLAN filtering supported",
+#                  "VIRTIO_NET_F_CTRL_RX: Control channel RX mode supported",
+#                  "VIRTIO_NET_F_CTRL_VQ: Control channel available",
+#                  "VIRTIO_NET_F_STATUS: Configuration status field available",
+#                  "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers",
+#                  "VIRTIO_NET_F_HOST_UFO: Device can receive UFO",
+#                  "VIRTIO_NET_F_HOST_ECN: Device can receive TSO with ECN",
+#                  "VIRTIO_NET_F_HOST_TSO6: Device can receive TSOv6",
+#                  "VIRTIO_NET_F_HOST_TSO4: Device can receive TSOv4",
+#                  "VIRTIO_NET_F_GUEST_UFO: Driver can receive UFO",
+#                  "VIRTIO_NET_F_GUEST_ECN: Driver can receive TSO with ECN",
+#                  "VIRTIO_NET_F_GUEST_TSO6: Driver can receive TSOv6",
+#                  "VIRTIO_NET_F_GUEST_TSO4: Driver can receive TSOv4",
+#                  "VIRTIO_NET_F_MAC: Device has given MAC address",
+#                  "VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: Control channel offloading reconfig. supported",
+#                  "VIRTIO_NET_F_GUEST_CSUM: Driver handling packets with partial checksum supported",
+#                  "VIRTIO_NET_F_CSUM: Device handling packets with partial checksum supported"
+#              ],
+#              "transports": [
+#                  "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled",
+#                  "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported",
+#                  "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)",
+#                  "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts",
+#                  "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. descs. on VQ"
+#              ]
+#          },
+#          "start-on-kick": false,
+#          "isr": 1,
+#          "broken": false,
+#          "status": {
+#              "statuses": [
+#                  "VIRTIO_CONFIG_S_ACKNOWLEDGE: Valid virtio device found",
+#                  "VIRTIO_CONFIG_S_DRIVER: Guest OS compatible with device",
+#                  "VIRTIO_CONFIG_S_FEATURES_OK: Feature negotiation complete",
+#                  "VIRTIO_CONFIG_S_DRIVER_OK: Driver setup and ready"
+#              ]
+#          },
+#          "num-vqs": 3,
+#          "guest-features": {
+#              "dev-features": [
+#                  "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control channel",
+#                  "VIRTIO_NET_F_GUEST_ANNOUNCE: Driver sending gratuitous packets supported",
+#                  "VIRTIO_NET_F_CTRL_VLAN: Control channel VLAN filtering supported",
+#                  "VIRTIO_NET_F_CTRL_RX: Control channel RX mode supported",
+#                  "VIRTIO_NET_F_CTRL_VQ: Control channel available",
+#                  "VIRTIO_NET_F_STATUS: Configuration status field available",
+#                  "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers",
+#                  "VIRTIO_NET_F_HOST_UFO: Device can receive UFO",
+#                  "VIRTIO_NET_F_HOST_ECN: Device can receive TSO with ECN",
+#                  "VIRTIO_NET_F_HOST_TSO6: Device can receive TSOv6",
+#                  "VIRTIO_NET_F_HOST_TSO4: Device can receive TSOv4",
+#                  "VIRTIO_NET_F_GUEST_UFO: Driver can receive UFO",
+#                  "VIRTIO_NET_F_GUEST_ECN: Driver can receive TSO with ECN",
+#                  "VIRTIO_NET_F_GUEST_TSO6: Driver can receive TSOv6",
+#                  "VIRTIO_NET_F_GUEST_TSO4: Driver can receive TSOv4",
+#                  "VIRTIO_NET_F_MAC: Device has given MAC address",
+#                  "VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: Control channel offloading reconfig. supported",
+#                  "VIRTIO_NET_F_GUEST_CSUM: Driver handling packets with partial checksum supported",
+#                  "VIRTIO_NET_F_CSUM: Device handling packets with partial checksum supported"
+#              ],
+#              "transports": [
+#                  "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled",
+#                  "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported",
+#                  "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)"
+#             ]
+#          },
+#          "host-features": {
+#              "dev-features": [
+#                  "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features negotiation supported",
+#                  "VIRTIO_NET_F_GSO: Handling GSO-type packets supported",
+#                  "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control channel",
+#                  "VIRTIO_NET_F_GUEST_ANNOUNCE: Driver sending gratuitous packets supported",
+#                  "VIRTIO_NET_F_CTRL_RX_EXTRA: Extra RX mode control supported",
+#                  "VIRTIO_NET_F_CTRL_VLAN: Control channel VLAN filtering supported",
+#                  "VIRTIO_NET_F_CTRL_RX: Control channel RX mode supported",
+#                  "VIRTIO_NET_F_CTRL_VQ: Control channel available",
+#                  "VIRTIO_NET_F_STATUS: Configuration status field available",
+#                  "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers",
+#                  "VIRTIO_NET_F_HOST_UFO: Device can receive UFO",
+#                  "VIRTIO_NET_F_HOST_ECN: Device can receive TSO with ECN",
+#                  "VIRTIO_NET_F_HOST_TSO6: Device can receive TSOv6",
+#                  "VIRTIO_NET_F_HOST_TSO4: Device can receive TSOv4",
+#                  "VIRTIO_NET_F_GUEST_UFO: Driver can receive UFO",
+#                  "VIRTIO_NET_F_GUEST_ECN: Driver can receive TSO with ECN",
+#                  "VIRTIO_NET_F_GUEST_TSO6: Driver can receive TSOv6",
+#                  "VIRTIO_NET_F_GUEST_TSO4: Driver can receive TSOv4",
+#                  "VIRTIO_NET_F_MAC: Device has given MAC address",
+#                  "VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: Control channel offloading reconfig. supported",
+#                  "VIRTIO_NET_F_GUEST_CSUM: Driver handling packets with partial checksum supported",
+#                  "VIRTIO_NET_F_CSUM: Device handling packets with partial checksum supported"
+#              ],
+#              "transports": [
+#                  "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled",
+#                  "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported",
+#                  "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)",
+#                  "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts",
+#                  "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. descs. on VQ"
+#             ]
+#          },
+#          "use-guest-notifier-mask": true,
+#          "vm-running": true,
+#          "queue-sel": 2,
+#          "disabled": false,
+#          "vhost-started": true,
+#          "use-started": true
+#      }
+#    }
+##
+{ 'command': 'x-query-virtio-status',
+  'data': { 'path': 'str' },
+  'returns': 'VirtioStatus',
+  'features': [ 'unstable' ] }
+
+##
+# @VirtioDeviceStatus:
+#
+# A structure defined to list the configuration statuses of a virtio
+# device
+#
+# @statuses: List of decoded configuration statuses of the virtio
+#     device
+#
+# @unknown-statuses: Virtio device statuses bitmap that have not been
+#     decoded
+#
+# Since: 7.2
+##
+{ 'struct': 'VirtioDeviceStatus',
+  'data': { 'statuses': [ 'str' ],
+            '*unknown-statuses': 'uint8' } }
+
+##
+# @VhostDeviceProtocols:
+#
+# A structure defined to list the vhost user protocol features of a
+# Vhost User device
+#
+# @protocols: List of decoded vhost user protocol features of a vhost
+#     user device
+#
+# @unknown-protocols: Vhost user device protocol features bitmap that
+#     have not been decoded
+#
+# Since: 7.2
+##
+{ 'struct': 'VhostDeviceProtocols',
+  'data': { 'protocols': [ 'str' ],
+            '*unknown-protocols': 'uint64' } }
+
+##
+# @VirtioDeviceFeatures:
+#
+# The common fields that apply to most Virtio devices.  Some devices
+# may not have their own device-specific features (e.g. virtio-rng).
+#
+# @transports: List of transport features of the virtio device
+#
+# @dev-features: List of device-specific features (if the device has
+#     unique features)
+#
+# @unknown-dev-features: Virtio device features bitmap that have not
+#     been decoded
+#
+# Since: 7.2
+##
+{ 'struct': 'VirtioDeviceFeatures',
+  'data': { 'transports': [ 'str' ],
+            '*dev-features': [ 'str' ],
+            '*unknown-dev-features': 'uint64' } }
+
+##
+# @VirtQueueStatus:
+#
+# Information of a VirtIODevice VirtQueue, including most members of
+# the VirtQueue data structure.
+#
+# @name: Name of the VirtIODevice that uses this VirtQueue
+#
+# @queue-index: VirtQueue queue_index
+#
+# @inuse: VirtQueue inuse
+#
+# @vring-num: VirtQueue vring.num
+#
+# @vring-num-default: VirtQueue vring.num_default
+#
+# @vring-align: VirtQueue vring.align
+#
+# @vring-desc: VirtQueue vring.desc (descriptor area)
+#
+# @vring-avail: VirtQueue vring.avail (driver area)
+#
+# @vring-used: VirtQueue vring.used (device area)
+#
+# @last-avail-idx: VirtQueue last_avail_idx or return of vhost_dev
+#     vhost_get_vring_base (if vhost active)
+#
+# @shadow-avail-idx: VirtQueue shadow_avail_idx
+#
+# @used-idx: VirtQueue used_idx
+#
+# @signalled-used: VirtQueue signalled_used
+#
+# @signalled-used-valid: VirtQueue signalled_used_valid flag
+#
+# Since: 7.2
+##
+{ 'struct': 'VirtQueueStatus',
+  'data': { 'name': 'str',
+            'queue-index': 'uint16',
+            'inuse': 'uint32',
+            'vring-num': 'uint32',
+            'vring-num-default': 'uint32',
+            'vring-align': 'uint32',
+            'vring-desc': 'uint64',
+            'vring-avail': 'uint64',
+            'vring-used': 'uint64',
+            '*last-avail-idx': 'uint16',
+            '*shadow-avail-idx': 'uint16',
+            'used-idx': 'uint16',
+            'signalled-used': 'uint16',
+            'signalled-used-valid': 'bool' } }
+
+##
+# @x-query-virtio-queue-status:
+#
+# Return the status of a given VirtIODevice's VirtQueue
+#
+# @path: VirtIODevice canonical QOM path
+#
+# @queue: VirtQueue index to examine
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: VirtQueueStatus of the VirtQueue
+#
+# Notes: last_avail_idx will not be displayed in the case where the
+#     selected VirtIODevice has a running vhost device and the
+#     VirtIODevice VirtQueue index (queue) does not exist for the
+#     corresponding vhost device vhost_virtqueue.  Also,
+#     shadow_avail_idx will not be displayed in the case where the
+#     selected VirtIODevice has a running vhost device.
+#
+# Since: 7.2
+#
+# Examples:
+#
+# 1. Get VirtQueueStatus for virtio-vsock (vhost-vsock running)
+#
+# -> { "execute": "x-query-virtio-queue-status",
+#      "arguments": { "path": "/machine/peripheral/vsock0/virtio-backend",
+#                     "queue": 1 }
+#    }
+# <- { "return": {
+#          "signalled-used": 0,
+#          "inuse": 0,
+#          "name": "vhost-vsock",
+#          "vring-align": 4096,
+#          "vring-desc": 5217370112,
+#          "signalled-used-valid": false,
+#          "vring-num-default": 128,
+#          "vring-avail": 5217372160,
+#          "queue-index": 1,
+#          "last-avail-idx": 0,
+#          "vring-used": 5217372480,
+#          "used-idx": 0,
+#          "vring-num": 128
+#      }
+#    }
+#
+# 2. Get VirtQueueStatus for virtio-serial (no vhost)
+#
+# -> { "execute": "x-query-virtio-queue-status",
+#      "arguments": { "path": "/machine/peripheral-anon/device[0]/virtio-backend",
+#                     "queue": 20 }
+#    }
+# <- { "return": {
+#          "signalled-used": 0,
+#          "inuse": 0,
+#          "name": "virtio-serial",
+#          "vring-align": 4096,
+#          "vring-desc": 5182074880,
+#          "signalled-used-valid": false,
+#          "vring-num-default": 128,
+#          "vring-avail": 5182076928,
+#          "queue-index": 20,
+#          "last-avail-idx": 0,
+#          "vring-used": 5182077248,
+#          "used-idx": 0,
+#          "shadow-avail-idx": 0,
+#          "vring-num": 128
+#      }
+#    }
+##
+{ 'command': 'x-query-virtio-queue-status',
+  'data': { 'path': 'str', 'queue': 'uint16' },
+  'returns': 'VirtQueueStatus',
+  'features': [ 'unstable' ] }
+
+##
+# @VirtVhostQueueStatus:
+#
+# Information of a vhost device's vhost_virtqueue, including most
+# members of the vhost_dev vhost_virtqueue data structure.
+#
+# @name: Name of the VirtIODevice that uses this vhost_virtqueue
+#
+# @kick: vhost_virtqueue kick
+#
+# @call: vhost_virtqueue call
+#
+# @desc: vhost_virtqueue desc
+#
+# @avail: vhost_virtqueue avail
+#
+# @used: vhost_virtqueue used
+#
+# @num: vhost_virtqueue num
+#
+# @desc-phys: vhost_virtqueue desc_phys (descriptor area phys. addr.)
+#
+# @desc-size: vhost_virtqueue desc_size
+#
+# @avail-phys: vhost_virtqueue avail_phys (driver area phys. addr.)
+#
+# @avail-size: vhost_virtqueue avail_size
+#
+# @used-phys: vhost_virtqueue used_phys (device area phys. addr.)
+#
+# @used-size: vhost_virtqueue used_size
+#
+# Since: 7.2
+##
+{ 'struct': 'VirtVhostQueueStatus',
+  'data': { 'name': 'str',
+            'kick': 'int',
+            'call': 'int',
+            'desc': 'uint64',
+            'avail': 'uint64',
+            'used': 'uint64',
+            'num': 'int',
+            'desc-phys': 'uint64',
+            'desc-size': 'uint32',
+            'avail-phys': 'uint64',
+            'avail-size': 'uint32',
+            'used-phys': 'uint64',
+            'used-size': 'uint32' } }
+
+##
+# @x-query-virtio-vhost-queue-status:
+#
+# Return information of a given vhost device's vhost_virtqueue
+#
+# @path: VirtIODevice canonical QOM path
+#
+# @queue: vhost_virtqueue index to examine
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: VirtVhostQueueStatus of the vhost_virtqueue
+#
+# Since: 7.2
+#
+# Examples:
+#
+# 1. Get vhost_virtqueue status for vhost-crypto
+#
+# -> { "execute": "x-query-virtio-vhost-queue-status",
+#      "arguments": { "path": "/machine/peripheral/crypto0/virtio-backend",
+#                     "queue": 0 }
+#    }
+# <- { "return": {
+#          "avail-phys": 5216124928,
+#          "name": "virtio-crypto",
+#          "used-phys": 5216127040,
+#          "avail-size": 2054,
+#          "desc-size": 16384,
+#          "used-size": 8198,
+#          "desc": 140141447430144,
+#          "num": 1024,
+#          "call": 0,
+#          "avail": 140141447446528,
+#          "desc-phys": 5216108544,
+#          "used": 140141447448640,
+#          "kick": 0
+#      }
+#    }
+#
+# 2. Get vhost_virtqueue status for vhost-vsock
+#
+# -> { "execute": "x-query-virtio-vhost-queue-status",
+#      "arguments": { "path": "/machine/peripheral/vsock0/virtio-backend",
+#                     "queue": 0 }
+#    }
+# <- { "return": {
+#          "avail-phys": 5182261248,
+#          "name": "vhost-vsock",
+#          "used-phys": 5182261568,
+#          "avail-size": 262,
+#          "desc-size": 2048,
+#          "used-size": 1030,
+#          "desc": 140141413580800,
+#          "num": 128,
+#          "call": 0,
+#          "avail": 140141413582848,
+#          "desc-phys": 5182259200,
+#          "used": 140141413583168,
+#          "kick": 0
+#      }
+#    }
+##
+{ 'command': 'x-query-virtio-vhost-queue-status',
+  'data': { 'path': 'str', 'queue': 'uint16' },
+  'returns': 'VirtVhostQueueStatus',
+  'features': [ 'unstable' ] }
+
+##
+# @VirtioRingDesc:
+#
+# Information regarding the vring descriptor area
+#
+# @addr: Guest physical address of the descriptor area
+#
+# @len: Length of the descriptor area
+#
+# @flags: List of descriptor flags
+#
+# Since: 7.2
+##
+{ 'struct': 'VirtioRingDesc',
+  'data': { 'addr': 'uint64',
+            'len': 'uint32',
+            'flags': [ 'str' ] } }
+
+##
+# @VirtioRingAvail:
+#
+# Information regarding the avail vring (a.k.a. driver area)
+#
+# @flags: VRingAvail flags
+#
+# @idx: VRingAvail index
+#
+# @ring: VRingAvail ring[] entry at provided index
+#
+# Since: 7.2
+##
+{ 'struct': 'VirtioRingAvail',
+  'data': { 'flags': 'uint16',
+            'idx': 'uint16',
+            'ring': 'uint16' } }
+
+##
+# @VirtioRingUsed:
+#
+# Information regarding the used vring (a.k.a. device area)
+#
+# @flags: VRingUsed flags
+#
+# @idx: VRingUsed index
+#
+# Since: 7.2
+##
+{ 'struct': 'VirtioRingUsed',
+  'data': { 'flags': 'uint16',
+            'idx': 'uint16' } }
+
+##
+# @VirtioQueueElement:
+#
+# Information regarding a VirtQueue's VirtQueueElement including
+# descriptor, driver, and device areas
+#
+# @name: Name of the VirtIODevice that uses this VirtQueue
+#
+# @index: Index of the element in the queue
+#
+# @descs: List of descriptors (VirtioRingDesc)
+#
+# @avail: VRingAvail info
+#
+# @used: VRingUsed info
+#
+# Since: 7.2
+##
+{ 'struct': 'VirtioQueueElement',
+  'data': { 'name': 'str',
+            'index': 'uint32',
+            'descs': [ 'VirtioRingDesc' ],
+            'avail': 'VirtioRingAvail',
+            'used': 'VirtioRingUsed' } }
+
+##
+# @x-query-virtio-queue-element:
+#
+# Return the information about a VirtQueue's VirtQueueElement
+#
+# @path: VirtIODevice canonical QOM path
+#
+# @queue: VirtQueue index to examine
+#
+# @index: Index of the element in the queue (default: head of the
+#     queue)
+#
+# Features:
+#
+# @unstable: This command is meant for debugging.
+#
+# Returns: VirtioQueueElement information
+#
+# Since: 7.2
+#
+# Examples:
+#
+# 1. Introspect on virtio-net's VirtQueue 0 at index 5
+#
+# -> { "execute": "x-query-virtio-queue-element",
+#      "arguments": { "path": "/machine/peripheral-anon/device[1]/virtio-backend",
+#                     "queue": 0,
+#                     "index": 5 }
+#    }
+# <- { "return": {
+#          "index": 5,
+#          "name": "virtio-net",
+#          "descs": [
+#              {
+#                  "flags": ["write"],
+#                  "len": 1536,
+#                  "addr": 5257305600
+#              }
+#          ],
+#          "avail": {
+#              "idx": 256,
+#              "flags": 0,
+#              "ring": 5
+#          },
+#          "used": {
+#              "idx": 13,
+#              "flags": 0
+#          }
+#      }
+#    }
+#
+# 2. Introspect on virtio-crypto's VirtQueue 1 at head
+#
+# -> { "execute": "x-query-virtio-queue-element",
+#      "arguments": { "path": "/machine/peripheral/crypto0/virtio-backend",
+#                     "queue": 1 }
+#    }
+# <- { "return": {
+#          "index": 0,
+#          "name": "virtio-crypto",
+#          "descs": [
+#              {
+#                  "flags": [],
+#                  "len": 0,
+#                  "addr": 8080268923184214134
+#              }
+#          ],
+#          "avail": {
+#              "idx": 280,
+#              "flags": 0,
+#              "ring": 0
+#          },
+#          "used": {
+#              "idx": 280,
+#              "flags": 0
+#          }
+#      }
+#    }
+#
+# 3. Introspect on virtio-scsi's VirtQueue 2 at head
+#
+# -> { "execute": "x-query-virtio-queue-element",
+#      "arguments": { "path": "/machine/peripheral-anon/device[2]/virtio-backend",
+#                     "queue": 2 }
+#    }
+# <- { "return": {
+#          "index": 19,
+#          "name": "virtio-scsi",
+#          "descs": [
+#              {
+#                  "flags": ["used", "indirect", "write"],
+#                  "len": 4099327944,
+#                  "addr": 12055409292258155293
+#              }
+#          ],
+#          "avail": {
+#              "idx": 1147,
+#              "flags": 0,
+#              "ring": 19
+#          },
+#          "used": {
+#              "idx": 280,
+#              "flags": 0
+#          }
+#      }
+#    }
+##
+{ 'command': 'x-query-virtio-queue-element',
+  'data': { 'path': 'str', 'queue': 'uint16', '*index': 'uint16' },
+  'returns': 'VirtioQueueElement',
+  'features': [ 'unstable' ] }
diff --git a/qapi/yank.json b/qapi/yank.json
new file mode 100644 (file)
index 0000000..87ec7ca
--- /dev/null
@@ -0,0 +1,118 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+
+##
+# = Yank feature
+##
+
+##
+# @YankInstanceType:
+#
+# An enumeration of yank instance types.  See @YankInstance for more
+# information.
+#
+# Since: 6.0
+##
+{ 'enum': 'YankInstanceType',
+  'data': [ 'block-node', 'chardev', 'migration' ] }
+
+##
+# @YankInstanceBlockNode:
+#
+# Specifies which block graph node to yank.  See @YankInstance for
+# more information.
+#
+# @node-name: the name of the block graph node
+#
+# Since: 6.0
+##
+{ 'struct': 'YankInstanceBlockNode',
+  'data': { 'node-name': 'str' } }
+
+##
+# @YankInstanceChardev:
+#
+# Specifies which character device to yank.  See @YankInstance for
+# more information.
+#
+# @id: the chardev's ID
+#
+# Since: 6.0
+##
+{ 'struct': 'YankInstanceChardev',
+  'data': { 'id': 'str' } }
+
+##
+# @YankInstance:
+#
+# A yank instance can be yanked with the @yank qmp command to recover
+# from a hanging QEMU.
+#
+# Currently implemented yank instances:
+#
+# - nbd block device: Yanking it will shut down the connection to the
+#   nbd server without attempting to reconnect.
+# - socket chardev: Yanking it will shut down the connected socket.
+# - migration: Yanking it will shut down all migration connections.
+#   Unlike @migrate_cancel, it will not notify the migration process,
+#   so migration will go into @failed state, instead of @cancelled
+#   state.  @yank should be used to recover from hangs.
+#
+# Since: 6.0
+##
+{ 'union': 'YankInstance',
+  'base': { 'type': 'YankInstanceType' },
+  'discriminator': 'type',
+  'data': {
+      'block-node': 'YankInstanceBlockNode',
+      'chardev': 'YankInstanceChardev' } }
+
+##
+# @yank:
+#
+# Try to recover from hanging QEMU by yanking the specified instances.
+# See @YankInstance for more information.
+#
+# Takes a list of @YankInstance as argument.
+#
+# Returns:
+# - Nothing on success
+# - @DeviceNotFound error, if any of the YankInstances doesn't exist
+#
+# Example:
+#
+# -> { "execute": "yank",
+#      "arguments": {
+#          "instances": [
+#               { "type": "block-node",
+#                 "node-name": "nbd0" }
+#          ] } }
+# <- { "return": {} }
+#
+# Since: 6.0
+##
+{ 'command': 'yank',
+  'data': { 'instances': ['YankInstance'] },
+  'allow-oob': true }
+
+##
+# @query-yank:
+#
+# Query yank instances.  See @YankInstance for more information.
+#
+# Returns: list of @YankInstance
+#
+# Example:
+#
+# -> { "execute": "query-yank" }
+# <- { "return": [
+#          { "type": "block-node",
+#            "node-name": "nbd0" }
+#      ] }
+#
+# Since: 6.0
+##
+{ 'command': 'query-yank',
+  'returns': ['YankInstance'],
+  'allow-oob': true }
index b3620f29e50c3e34c151563759c52c872b90abd6..068692d13ebd944b78ded2c76b5f1b616b351b4d 100644 (file)
@@ -46,15 +46,15 @@ SRST
 ERST
 
 DEF("convert", img_convert,
-    "convert [--object objectdef] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l snapshot_param] [-S sparse_size] [-r rate_limit] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename")
+    "convert [--object objectdef] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file [-F backing_fmt]] [-o options] [-l snapshot_param] [-S sparse_size] [-r rate_limit] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename")
 SRST
-.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] [--salvage] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
+.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE [-F BACKING_FMT]] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] [--salvage] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
 ERST
 
 DEF("create", img_create,
-    "create [--object objectdef] [-q] [-f fmt] [-b backing_file] [-F backing_fmt] [-u] [-o options] filename [size]")
+    "create [--object objectdef] [-q] [-f fmt] [-b backing_file [-F backing_fmt]] [-u] [-o options] filename [size]")
 SRST
-.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE]
+.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE [-F BACKING_FMT]] [-u] [-o OPTIONS] FILENAME [SIZE]
 ERST
 
 DEF("dd", img_dd,
@@ -88,9 +88,9 @@ SRST
 ERST
 
 DEF("rebase", img_rebase,
-    "rebase [--object objectdef] [--image-opts] [-U] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename")
+    "rebase [--object objectdef] [--image-opts] [-U] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] [-c] -b backing_file [-F backing_fmt] filename")
 SRST
-.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-p] [-u] -b BACKING_FILE [-F BACKING_FMT] FILENAME
+.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-p] [-u] [-c] -b BACKING_FILE [-F BACKING_FMT] FILENAME
 ERST
 
 DEF("resize", img_resize,
index 8bdea40b58d18ee91217374bf85cd0602470f255..63281a84a19ca4ad80b1ea3616afd9ff400ea0ce 100644 (file)
@@ -25,7 +25,8 @@
 #include "qemu/osdep.h"
 #include <getopt.h>
 
-#include "qemu-common.h"
+#include "qemu/help-texts.h"
+#include "qemu/qemu-progress.h"
 #include "qemu-version.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-block-core.h"
@@ -33,7 +34,6 @@
 #include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qstring.h"
 #include "qemu/cutils.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "qemu/module.h"
 #include "qemu/sockets.h"
 #include "qemu/units.h"
+#include "qemu/memalign.h"
 #include "qom/object_interfaces.h"
 #include "sysemu/block-backend.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/dirty-bitmap.h"
 #include "block/qapi.h"
 #include "crypto/init.h"
 #include "trace/control.h"
@@ -83,6 +85,7 @@ enum {
     OPTION_MERGE = 274,
     OPTION_BITMAPS = 275,
     OPTION_FORCE = 276,
+    OPTION_SKIP_BROKEN = 277,
 };
 
 typedef enum OutputFormat {
@@ -98,7 +101,8 @@ static void format_print(void *opaque, const char *name)
     printf(" %s", name);
 }
 
-static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
+static G_NORETURN G_GNUC_PRINTF(1, 2)
+void error_exit(const char *fmt, ...)
 {
     va_list ap;
 
@@ -110,18 +114,21 @@ static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
     exit(EXIT_FAILURE);
 }
 
-static void QEMU_NORETURN missing_argument(const char *option)
+static G_NORETURN
+void missing_argument(const char *option)
 {
     error_exit("missing argument for option '%s'", option);
 }
 
-static void QEMU_NORETURN unrecognized_option(const char *option)
+static G_NORETURN
+void unrecognized_option(const char *option)
 {
     error_exit("unrecognized option '%s'", option);
 }
 
 /* Please keep in synch with docs/tools/qemu-img.rst */
-static void QEMU_NORETURN help(void)
+static G_NORETURN
+void help(void)
 {
     const char *help_msg =
            QEMU_IMG_VERSION
@@ -158,8 +165,8 @@ static void QEMU_NORETURN help(void)
            "  'output_filename' is the destination disk image filename\n"
            "  'output_fmt' is the destination format\n"
            "  'options' is a comma separated list of format specific options in a\n"
-           "    name=value format. Use -o ? for an overview of the options supported by the\n"
-           "    used format\n"
+           "    name=value format. Use -o help for an overview of the options supported by\n"
+           "    the used format\n"
            "  'snapshot_param' is param used for internal snapshot, format\n"
            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
            "    '[ID_OR_NAME]'\n"
@@ -227,43 +234,26 @@ static void QEMU_NORETURN help(void)
     exit(EXIT_SUCCESS);
 }
 
-static QemuOptsList qemu_object_opts = {
-    .name = "object",
-    .implied_opt_name = "qom-type",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
-    .desc = {
-        { }
-    },
-};
-
-static bool qemu_img_object_print_help(const char *type, QemuOpts *opts)
-{
-    if (user_creatable_print_help(type, opts)) {
-        exit(0);
-    }
-    return true;
-}
-
 /*
- * Is @optarg safe for accumulate_options()?
+ * Is @list safe for accumulate_options()?
  * It is when multiple of them can be joined together separated by ','.
- * To make that work, @optarg must not start with ',' (or else a
+ * To make that work, @list must not start with ',' (or else a
  * separating ',' preceding it gets escaped), and it must not end with
  * an odd number of ',' (or else a separating ',' following it gets
  * escaped), or be empty (or else a separating ',' preceding it can
  * escape a separating ',' following it).
  * 
  */
-static bool is_valid_option_list(const char *optarg)
+static bool is_valid_option_list(const char *list)
 {
-    size_t len = strlen(optarg);
+    size_t len = strlen(list);
     size_t i;
 
-    if (!optarg[0] || optarg[0] == ',') {
+    if (!list[0] || list[0] == ',') {
         return false;
     }
 
-    for (i = len; i > 0 && optarg[i - 1] == ','; i--) {
+    for (i = len; i > 0 && list[i - 1] == ','; i--) {
     }
     if ((len - i) % 2) {
         return false;
@@ -272,19 +262,19 @@ static bool is_valid_option_list(const char *optarg)
     return true;
 }
 
-static int accumulate_options(char **options, char *optarg)
+static int accumulate_options(char **options, char *list)
 {
     char *new_options;
 
-    if (!is_valid_option_list(optarg)) {
-        error_report("Invalid option list: %s", optarg);
+    if (!is_valid_option_list(list)) {
+        error_report("Invalid option list: %s", list);
         return -1;
     }
 
     if (!*options) {
-        *options = g_strdup(optarg);
+        *options = g_strdup(list);
     } else {
-        new_options = g_strdup_printf("%s,%s", *options, optarg);
+        new_options = g_strdup_printf("%s,%s", *options, list);
         g_free(*options);
         *options = new_options;
     }
@@ -300,7 +290,7 @@ static QemuOptsList qemu_source_opts = {
     },
 };
 
-static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
+static int G_GNUC_PRINTF(2, 3) qprintf(bool quiet, const char *fmt, ...)
 {
     int ret = 0;
     if (!quiet) {
@@ -460,6 +450,11 @@ static BlockBackend *img_open(bool image_opts,
         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
                             force_share);
     }
+
+    if (blk) {
+        blk_set_force_allow_inactivate(blk);
+    }
+
     return blk;
 }
 
@@ -567,14 +562,9 @@ static int img_create(int argc, char **argv)
         case 'u':
             flags |= BDRV_O_NO_BACKING;
             break;
-        case OPTION_OBJECT: {
-            QemuOpts *opts;
-            opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                           optarg, true);
-            if (!opts) {
-                goto fail;
-            }
-        }   break;
+        case OPTION_OBJECT:
+            user_creatable_process_cmdline(optarg);
+            break;
         }
     }
 
@@ -590,12 +580,6 @@ static int img_create(int argc, char **argv)
     }
     optind++;
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        goto fail;
-    }
-
     /* Get image size, if specified */
     if (optind < argc) {
         int64_t sval;
@@ -627,18 +611,18 @@ fail:
 
 static void dump_json_image_check(ImageCheck *check, bool quiet)
 {
-    QString *str;
+    GString *str;
     QObject *obj;
     Visitor *v = qobject_output_visitor_new(&obj);
 
     visit_type_ImageCheck(v, NULL, &check, &error_abort);
     visit_complete(v, &obj);
-    str = qobject_to_json_pretty(obj);
+    str = qobject_to_json_pretty(obj, true);
     assert(str != NULL);
-    qprintf(quiet, "%s\n", qstring_get_str(str));
+    qprintf(quiet, "%s\n", str->str);
     qobject_unref(obj);
     visit_free(v);
-    qobject_unref(str);
+    g_string_free(str, true);
 }
 
 static void dump_human_image_check(ImageCheck *check, bool quiet)
@@ -805,14 +789,9 @@ static int img_check(int argc, char **argv)
         case 'U':
             force_share = true;
             break;
-        case OPTION_OBJECT: {
-            QemuOpts *opts;
-            opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                           optarg, true);
-            if (!opts) {
-                return 1;
-            }
-        }   break;
+        case OPTION_OBJECT:
+            user_creatable_process_cmdline(optarg);
+            break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
@@ -832,12 +811,6 @@ static int img_check(int argc, char **argv)
         return 1;
     }
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        return 1;
-    }
-
     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
     if (ret < 0) {
         error_report("Invalid source cache option: %s", cache);
@@ -940,28 +913,34 @@ static void common_block_job_cb(void *opaque, int ret)
 
 static void run_block_job(BlockJob *job, Error **errp)
 {
-    AioContext *aio_context = blk_get_aio_context(job->blk);
+    uint64_t progress_current, progress_total;
+    AioContext *aio_context = block_job_get_aio_context(job);
     int ret = 0;
 
-    aio_context_acquire(aio_context);
-    job_ref(&job->job);
+    job_lock();
+    job_ref_locked(&job->job);
     do {
         float progress = 0.0f;
+        job_unlock();
         aio_poll(aio_context, true);
-        if (job->job.progress.total) {
-            progress = (float)job->job.progress.current /
-                       job->job.progress.total * 100.f;
+
+        progress_get_snapshot(&job->job.progress, &progress_current,
+                              &progress_total);
+        if (progress_total) {
+            progress = (float)progress_current / progress_total * 100.f;
         }
         qemu_progress_print(progress, 0);
-    } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
+        job_lock();
+    } while (!job_is_ready_locked(&job->job) &&
+             !job_is_completed_locked(&job->job));
 
-    if (!job_is_completed(&job->job)) {
-        ret = job_complete_sync(&job->job, errp);
+    if (!job_is_completed_locked(&job->job)) {
+        ret = job_complete_sync_locked(&job->job, errp);
     } else {
         ret = job->job.ret;
     }
-    job_unref(&job->job);
-    aio_context_release(aio_context);
+    job_unref_locked(&job->job);
+    job_unlock();
 
     /* publish completion progress only when success */
     if (!ret) {
@@ -1035,14 +1014,9 @@ static int img_commit(int argc, char **argv)
                 return 1;
             }
             break;
-        case OPTION_OBJECT: {
-            QemuOpts *opts;
-            opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                           optarg, true);
-            if (!opts) {
-                return 1;
-            }
-        }   break;
+        case OPTION_OBJECT:
+            user_creatable_process_cmdline(optarg);
+            break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
@@ -1059,12 +1033,6 @@ static int img_commit(int argc, char **argv)
     }
     filename = argv[optind++];
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        return 1;
-    }
-
     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
     if (ret < 0) {
@@ -1082,12 +1050,14 @@ static int img_commit(int argc, char **argv)
     qemu_progress_init(progress, 1.f);
     qemu_progress_print(0.f, 100);
 
+    bdrv_graph_rdlock_main_loop();
     if (base) {
         base_bs = bdrv_find_backing_image(bs, base);
         if (!base_bs) {
             error_setg(&local_err,
                        "Did not find '%s' in the backing chain of '%s'",
                        base, filename);
+            bdrv_graph_rdunlock_main_loop();
             goto done;
         }
     } else {
@@ -1097,9 +1067,11 @@ static int img_commit(int argc, char **argv)
         base_bs = bdrv_backing_chain_next(bs);
         if (!base_bs) {
             error_setg(&local_err, "Image does not have a backing file");
+            bdrv_graph_rdunlock_main_loop();
             goto done;
         }
     }
+    bdrv_graph_rdunlock_main_loop();
 
     cbi = (CommonBlockJobCBInfo){
         .errp = &local_err,
@@ -1157,6 +1129,14 @@ unref_backing:
 done:
     qemu_progress_end();
 
+    /*
+     * Manually inactivate the image first because this way we can know whether
+     * an error occurred. blk_unref() doesn't tell us about failures.
+     */
+    ret = bdrv_inactivate_all();
+    if (ret < 0 && !local_err) {
+        error_setg_errno(&local_err, -ret, "Error while closing the image");
+    }
     blk_unref(blk);
 
     if (local_err) {
@@ -1218,19 +1198,34 @@ static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
         }
     }
 
+    if (i == n) {
+        /*
+         * The whole buf is the same.
+         * No reason to split it into chunks, so return now.
+         */
+        *pnum = i;
+        return !is_zero;
+    }
+
     tail = (sector_num + i) & (alignment - 1);
     if (tail) {
         if (is_zero && i <= tail) {
-            /* treat unallocated areas which only consist
-             * of a small tail as allocated. */
+            /*
+             * For sure next sector after i is data, and it will rewrite this
+             * tail anyway due to RMW. So, let's just write data now.
+             */
             is_zero = false;
         }
         if (!is_zero) {
-            /* align up end offset of allocated areas. */
+            /* If possible, align up end offset of allocated areas. */
             i += alignment - tail;
             i = MIN(i, n);
         } else {
-            /* align down end offset of zero areas. */
+            /*
+             * For sure next sector after i is data, and it will rewrite this
+             * tail anyway due to RMW. Better is avoid RMW and write zeroes up
+             * to aligned bound.
+             */
             i -= tail;
         }
     }
@@ -1283,23 +1278,29 @@ static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
 }
 
 /*
- * Compares two buffers sector by sector. Returns 0 if the first
- * sector of each buffer matches, non-zero otherwise.
+ * Compares two buffers chunk by chunk, where @chsize is the chunk size.
+ * If @chsize is 0, default chunk size of BDRV_SECTOR_SIZE is used.
+ * Returns 0 if the first chunk of each buffer matches, non-zero otherwise.
  *
- * pnum is set to the sector-aligned size of the buffer prefix that
- * has the same matching status as the first sector.
+ * @pnum is set to the size of the buffer prefix aligned to @chsize that
+ * has the same matching status as the first chunk.
  */
 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
-                           int64_t bytes, int64_t *pnum)
+                           int64_t bytes, uint64_t chsize, int64_t *pnum)
 {
     bool res;
-    int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
+    int64_t i;
 
     assert(bytes > 0);
 
+    if (!chsize) {
+        chsize = BDRV_SECTOR_SIZE;
+    }
+    i = MIN(bytes, chsize);
+
     res = !!memcmp(buf1, buf2, i);
     while (i < bytes) {
-        int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
+        int64_t len = MIN(bytes - i, chsize);
 
         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
             break;
@@ -1335,7 +1336,7 @@ static int check_empty_sectors(BlockBackend *blk, int64_t offset,
     int ret = 0;
     int64_t idx;
 
-    ret = blk_pread(blk, offset, buffer, bytes);
+    ret = blk_pread(blk, offset, bytes, buffer, 0);
     if (ret < 0) {
         error_report("Error while reading offset %" PRId64 " of %s: %s",
                      offset, filename, strerror(-ret));
@@ -1354,7 +1355,7 @@ static int check_empty_sectors(BlockBackend *blk, int64_t offset,
 /*
  * Compares two images. Exit codes:
  *
- * 0 - Images are identical
+ * 0 - Images are identical or the requested help was printed
  * 1 - Images differ
  * >1 - Error occurred
  */
@@ -1424,15 +1425,21 @@ static int img_compare(int argc, char **argv)
         case 'U':
             force_share = true;
             break;
-        case OPTION_OBJECT: {
-            QemuOpts *opts;
-            opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                           optarg, true);
-            if (!opts) {
-                ret = 2;
-                goto out4;
+        case OPTION_OBJECT:
+            {
+                Error *local_err = NULL;
+
+                if (!user_creatable_add_from_str(optarg, &local_err)) {
+                    if (local_err) {
+                        error_report_err(local_err);
+                        exit(2);
+                    } else {
+                        /* Help was printed */
+                        exit(EXIT_SUCCESS);
+                    }
+                }
+                break;
             }
-        }   break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
@@ -1451,13 +1458,6 @@ static int img_compare(int argc, char **argv)
     filename1 = argv[optind++];
     filename2 = argv[optind++];
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        ret = 2;
-        goto out4;
-    }
-
     /* Initialize before goto out */
     qemu_progress_init(progress, 2.0);
 
@@ -1553,7 +1553,7 @@ static int img_compare(int argc, char **argv)
                 int64_t pnum;
 
                 chunk = MIN(chunk, IO_BUF_SIZE);
-                ret = blk_pread(blk1, offset, buf1, chunk);
+                ret = blk_pread(blk1, offset, chunk, buf1, 0);
                 if (ret < 0) {
                     error_report("Error while reading offset %" PRId64
                                  " of %s: %s",
@@ -1561,7 +1561,7 @@ static int img_compare(int argc, char **argv)
                     ret = 4;
                     goto out;
                 }
-                ret = blk_pread(blk2, offset, buf2, chunk);
+                ret = blk_pread(blk2, offset, chunk, buf2, 0);
                 if (ret < 0) {
                     error_report("Error while reading offset %" PRId64
                                  " of %s: %s",
@@ -1569,7 +1569,7 @@ static int img_compare(int argc, char **argv)
                     ret = 4;
                     goto out;
                 }
-                ret = compare_buffers(buf1, buf2, chunk, &pnum);
+                ret = compare_buffers(buf1, buf2, chunk, 0, &pnum);
                 if (ret || pnum != chunk) {
                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
                             offset + (ret ? 0 : pnum));
@@ -1642,7 +1642,6 @@ out2:
     blk_unref(blk1);
 out3:
     qemu_progress_end();
-out4:
     return ret;
 }
 
@@ -1651,17 +1650,16 @@ static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name,
                                   const char *src_node, const char *src_name,
                                   Error **errp)
 {
-    BlockDirtyBitmapMergeSource *merge_src;
-    BlockDirtyBitmapMergeSourceList *list;
+    BlockDirtyBitmapOrStr *merge_src;
+    BlockDirtyBitmapOrStrList *list = NULL;
 
-    merge_src = g_new0(BlockDirtyBitmapMergeSource, 1);
+    merge_src = g_new0(BlockDirtyBitmapOrStr, 1);
     merge_src->type = QTYPE_QDICT;
     merge_src->u.external.node = g_strdup(src_node);
     merge_src->u.external.name = g_strdup(src_name);
-    list = g_new0(BlockDirtyBitmapMergeSourceList, 1);
-    list->value = merge_src;
+    QAPI_LIST_PREPEND(list, merge_src);
     qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp);
-    qapi_free_BlockDirtyBitmapMergeSourceList(list);
+    qapi_free_BlockDirtyBitmapOrStrList(list);
 }
 
 enum ImgConvertBlockStatus {
@@ -1719,7 +1717,8 @@ static void convert_select_part(ImgConvertState *s, int64_t sector_num,
     }
 }
 
-static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
+static int coroutine_mixed_fn GRAPH_RDLOCK
+convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
 {
     int64_t src_cur_offset;
     int ret, n, src_cur;
@@ -2003,7 +2002,9 @@ static void coroutine_fn convert_co_do_copy(void *opaque)
             qemu_co_mutex_unlock(&s->lock);
             break;
         }
-        n = convert_iteration_sectors(s, s->sector_num);
+        WITH_GRAPH_RDLOCK_GUARD() {
+            n = convert_iteration_sectors(s, s->sector_num);
+        }
         if (n < 0) {
             qemu_co_mutex_unlock(&s->lock);
             s->ret = n;
@@ -2051,7 +2052,9 @@ retry:
 
         if (s->ret == -EINPROGRESS) {
             if (copy_range) {
-                ret = convert_co_copy_range(s, sector_num, n);
+                WITH_GRAPH_RDLOCK_GUARD() {
+                    ret = convert_co_copy_range(s, sector_num, n);
+                }
                 if (ret) {
                     s->copy_range = false;
                     goto retry;
@@ -2101,7 +2104,9 @@ static int convert_do_copy(ImgConvertState *s)
     /* Check whether we have zero initialisation or can get it efficiently */
     if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
         !s->target_has_backing) {
+        bdrv_graph_rdlock_main_loop();
         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
+        bdrv_graph_rdunlock_main_loop();
     }
 
     /* Allocate buffer for copied data. For compressed images, only one cluster
@@ -2115,7 +2120,9 @@ static int convert_do_copy(ImgConvertState *s)
     }
 
     while (sector_num < s->total_sectors) {
+        bdrv_graph_rdlock_main_loop();
         n = convert_iteration_sectors(s, sector_num);
+        bdrv_graph_rdunlock_main_loop();
         if (n < 0) {
             return n;
         }
@@ -2143,7 +2150,7 @@ static int convert_do_copy(ImgConvertState *s)
 
     if (s->compressed && !s->ret) {
         /* signal EOF to align */
-        ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
+        ret = blk_pwrite_compressed(s->target, 0, 0, NULL);
         if (ret < 0) {
             return ret;
         }
@@ -2152,7 +2159,32 @@ static int convert_do_copy(ImgConvertState *s)
     return s->ret;
 }
 
-static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst)
+/* Check that bitmaps can be copied, or output an error */
+static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken)
+{
+    BdrvDirtyBitmap *bm;
+
+    if (!bdrv_supports_persistent_dirty_bitmap(src)) {
+        error_report("Source lacks bitmap support");
+        return -1;
+    }
+    FOR_EACH_DIRTY_BITMAP(src, bm) {
+        if (!bdrv_dirty_bitmap_get_persistence(bm)) {
+            continue;
+        }
+        if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
+            error_report("Cannot copy inconsistent bitmap '%s'",
+                         bdrv_dirty_bitmap_name(bm));
+            error_printf("Try --skip-broken-bitmaps, or "
+                         "use 'qemu-img bitmap --remove' to delete it\n");
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst,
+                                bool skip_broken)
 {
     BdrvDirtyBitmap *bm;
     Error *err = NULL;
@@ -2164,6 +2196,10 @@ static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst)
             continue;
         }
         name = bdrv_dirty_bitmap_name(bm);
+        if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
+            warn_report("Skipping inconsistent bitmap '%s'", name);
+            continue;
+        }
         qmp_block_dirty_bitmap_add(dst->node_name, name,
                                    true, bdrv_dirty_bitmap_granularity(bm),
                                    true, true,
@@ -2178,6 +2214,7 @@ static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst)
                               &err);
         if (err) {
             error_reportf_err(err, "Failed to populate bitmap %s: ", name);
+            qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL);
             return -1;
         }
     }
@@ -2200,10 +2237,11 @@ static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
 
 static int img_convert(int argc, char **argv)
 {
-    int c, bs_i, flags, src_flags = 0;
+    int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
-               *out_filename, *out_baseimg_param, *snapshot_name = NULL;
+               *out_filename, *out_baseimg_param, *snapshot_name = NULL,
+               *backing_fmt = NULL;
     BlockDriver *drv = NULL, *proto_drv = NULL;
     BlockDriverInfo bdi;
     BlockDriverState *out_bs;
@@ -2218,6 +2256,7 @@ static int img_convert(int argc, char **argv)
     bool force_share = false;
     bool explict_min_sparse = false;
     bool bitmaps = false;
+    bool skip_broken = false;
     int64_t rate_limit = 0;
 
     ImgConvertState s = (ImgConvertState) {
@@ -2239,9 +2278,10 @@ static int img_convert(int argc, char **argv)
             {"salvage", no_argument, 0, OPTION_SALVAGE},
             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
             {"bitmaps", no_argument, 0, OPTION_BITMAPS},
+            {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WUr:",
+        c = getopt_long(argc, argv, ":hf:O:B:CcF:o:l:S:pt:T:qnm:WUr:",
                         long_options, NULL);
         if (c == -1) {
             break;
@@ -2271,6 +2311,9 @@ static int img_convert(int argc, char **argv)
         case 'c':
             s.compressed = true;
             break;
+        case 'F':
+            backing_fmt = optarg;
+            break;
         case 'o':
             if (accumulate_options(&options, optarg) < 0) {
                 goto fail_getopt;
@@ -2344,15 +2387,9 @@ static int img_convert(int argc, char **argv)
                 goto fail_getopt;
             }
             break;
-        case OPTION_OBJECT: {
-            QemuOpts *object_opts;
-            object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                                  optarg, true);
-            if (!object_opts) {
-                goto fail_getopt;
-            }
+        case OPTION_OBJECT:
+            user_creatable_process_cmdline(optarg);
             break;
-        }
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
@@ -2373,6 +2410,9 @@ static int img_convert(int argc, char **argv)
         case OPTION_BITMAPS:
             bitmaps = true;
             break;
+        case OPTION_SKIP_BROKEN:
+            skip_broken = true;
+            break;
         }
     }
 
@@ -2380,9 +2420,8 @@ static int img_convert(int argc, char **argv)
         out_fmt = "raw";
     }
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
+    if (skip_broken && !bitmaps) {
+        error_report("Use of --skip-broken-bitmaps requires --bitmaps");
         goto fail_getopt;
     }
 
@@ -2543,7 +2582,7 @@ static int img_convert(int argc, char **argv)
 
         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
                             s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
-        ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
+        ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
         if (ret < 0) {
             goto out;
         }
@@ -2571,8 +2610,10 @@ static int img_convert(int argc, char **argv)
 
     if (out_baseimg_param) {
         if (!qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT)) {
-            warn_report("Deprecated use of backing file without explicit "
-                        "backing format");
+            error_report("Use of backing file requires explicit "
+                         "backing format");
+            ret = -1;
+            goto out;
         }
     }
 
@@ -2615,9 +2656,8 @@ static int img_convert(int argc, char **argv)
             ret = -1;
             goto out;
         }
-        if (!bdrv_supports_persistent_dirty_bitmap(blk_bs(s.src[0]))) {
-            error_report("Source lacks bitmap support");
-            ret = -1;
+        ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken);
+        if (ret < 0) {
             goto out;
         }
     }
@@ -2649,6 +2689,14 @@ static int img_convert(int argc, char **argv)
         goto out;
     }
 
+    if (flags & BDRV_O_NOCACHE) {
+        /*
+         * If we open the target with O_DIRECT, it may be necessary to
+         * extend its size to align to the physical sector size.
+         */
+        flags |= BDRV_O_RESIZE;
+    }
+
     if (skip_create) {
         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
                             flags, writethrough, s.quiet, false);
@@ -2716,8 +2764,10 @@ static int img_convert(int argc, char **argv)
          * s.target_backing_sectors has to be negative, which it will
          * be automatically).  The backing file length is used only
          * for optimizations, so such a case is not fatal. */
+        bdrv_graph_rdlock_main_loop();
         s.target_backing_sectors =
             bdrv_nb_sectors(bdrv_backing_chain_next(out_bs));
+        bdrv_graph_rdunlock_main_loop();
     } else {
         s.target_backing_sectors = -1;
     }
@@ -2741,7 +2791,7 @@ static int img_convert(int argc, char **argv)
 
     /* Now copy the bitmaps */
     if (bitmaps && ret == 0) {
-        ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs);
+        ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken);
     }
 
 out:
@@ -2788,41 +2838,62 @@ static void dump_snapshots(BlockDriverState *bs)
     g_free(sn_tab);
 }
 
-static void dump_json_image_info_list(ImageInfoList *list)
+static void dump_json_block_graph_info_list(BlockGraphInfoList *list)
 {
-    QString *str;
+    GString *str;
     QObject *obj;
     Visitor *v = qobject_output_visitor_new(&obj);
 
-    visit_type_ImageInfoList(v, NULL, &list, &error_abort);
+    visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort);
     visit_complete(v, &obj);
-    str = qobject_to_json_pretty(obj);
+    str = qobject_to_json_pretty(obj, true);
     assert(str != NULL);
-    printf("%s\n", qstring_get_str(str));
+    printf("%s\n", str->str);
     qobject_unref(obj);
     visit_free(v);
-    qobject_unref(str);
+    g_string_free(str, true);
 }
 
-static void dump_json_image_info(ImageInfo *info)
+static void dump_json_block_graph_info(BlockGraphInfo *info)
 {
-    QString *str;
+    GString *str;
     QObject *obj;
     Visitor *v = qobject_output_visitor_new(&obj);
 
-    visit_type_ImageInfo(v, NULL, &info, &error_abort);
+    visit_type_BlockGraphInfo(v, NULL, &info, &error_abort);
     visit_complete(v, &obj);
-    str = qobject_to_json_pretty(obj);
+    str = qobject_to_json_pretty(obj, true);
     assert(str != NULL);
-    printf("%s\n", qstring_get_str(str));
+    printf("%s\n", str->str);
     qobject_unref(obj);
     visit_free(v);
-    qobject_unref(str);
+    g_string_free(str, true);
 }
 
-static void dump_human_image_info_list(ImageInfoList *list)
+static void dump_human_image_info(BlockGraphInfo *info, int indentation,
+                                  const char *path)
 {
-    ImageInfoList *elem;
+    BlockChildInfoList *children_list;
+
+    bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation,
+                        info->children == NULL);
+
+    for (children_list = info->children; children_list;
+         children_list = children_list->next)
+    {
+        BlockChildInfo *child = children_list->value;
+        g_autofree char *child_path = NULL;
+
+        printf("%*sChild node '%s%s':\n",
+               indentation * 4, "", path, child->name);
+        child_path = g_strdup_printf("%s%s/", path, child->name);
+        dump_human_image_info(child->info, indentation + 1, child_path);
+    }
+}
+
+static void dump_human_image_info_list(BlockGraphInfoList *list)
+{
+    BlockGraphInfoList *elem;
     bool delim = false;
 
     for (elem = list; elem; elem = elem->next) {
@@ -2831,7 +2902,7 @@ static void dump_human_image_info_list(ImageInfoList *list)
         }
         delim = true;
 
-        bdrv_image_info_dump(elem->value);
+        dump_human_image_info(elem->value, 0, "/");
     }
 }
 
@@ -2841,24 +2912,24 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b)
 }
 
 /**
- * Open an image file chain and return an ImageInfoList
+ * Open an image file chain and return an BlockGraphInfoList
  *
  * @filename: topmost image filename
  * @fmt: topmost image format (may be NULL to autodetect)
  * @chain: true  - enumerate entire backing file chain
  *         false - only topmost image file
  *
- * Returns a list of ImageInfo objects or NULL if there was an error opening an
- * image file.  If there was an error a message will have been printed to
- * stderr.
+ * Returns a list of BlockNodeInfo objects or NULL if there was an error
+ * opening an image file.  If there was an error a message will have been
+ * printed to stderr.
  */
-static ImageInfoList *collect_image_info_list(bool image_opts,
-                                              const char *filename,
-                                              const char *fmt,
-                                              bool chain, bool force_share)
+static BlockGraphInfoList *collect_image_info_list(bool image_opts,
+                                                   const char *filename,
+                                                   const char *fmt,
+                                                   bool chain, bool force_share)
 {
-    ImageInfoList *head = NULL;
-    ImageInfoList **last = &head;
+    BlockGraphInfoList *head = NULL;
+    BlockGraphInfoList **tail = &head;
     GHashTable *filenames;
     Error *err = NULL;
 
@@ -2867,8 +2938,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts,
     while (filename) {
         BlockBackend *blk;
         BlockDriverState *bs;
-        ImageInfo *info;
-        ImageInfoList *elem;
+        BlockGraphInfo *info;
 
         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
             error_report("Backing file '%s' creates an infinite loop.",
@@ -2885,17 +2955,22 @@ static ImageInfoList *collect_image_info_list(bool image_opts,
         }
         bs = blk_bs(blk);
 
-        bdrv_query_image_info(bs, &info, &err);
+        /*
+         * Note that the returned BlockGraphInfo object will not have
+         * information about this image's backing node, because we have opened
+         * it with BDRV_O_NO_BACKING.  Printing this object will therefore not
+         * duplicate the backing chain information that we obtain by walking
+         * the chain manually here.
+         */
+        bdrv_query_block_graph_info(bs, &info, &err);
+
         if (err) {
             error_report_err(err);
             blk_unref(blk);
             goto err;
         }
 
-        elem = g_new0(ImageInfoList, 1);
-        elem->value = info;
-        *last = elem;
-        last = &elem->next;
+        QAPI_LIST_APPEND(tail, info);
 
         blk_unref(blk);
 
@@ -2904,15 +2979,15 @@ static ImageInfoList *collect_image_info_list(bool image_opts,
         image_opts = false;
 
         if (chain) {
-            if (info->has_full_backing_filename) {
+            if (info->full_backing_filename) {
                 filename = info->full_backing_filename;
-            } else if (info->has_backing_filename) {
+            } else if (info->backing_filename) {
                 error_report("Could not determine absolute backing filename,"
                              " but backing filename '%s' present",
                              info->backing_filename);
                 goto err;
             }
-            if (info->has_backing_filename_format) {
+            if (info->backing_filename_format) {
                 fmt = info->backing_filename_format;
             }
         }
@@ -2921,7 +2996,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts,
     return head;
 
 err:
-    qapi_free_ImageInfoList(head);
+    qapi_free_BlockGraphInfoList(head);
     g_hash_table_destroy(filenames);
     return NULL;
 }
@@ -2932,7 +3007,7 @@ static int img_info(int argc, char **argv)
     OutputFormat output_format = OFORMAT_HUMAN;
     bool chain = false;
     const char *filename, *fmt, *output;
-    ImageInfoList *list;
+    BlockGraphInfoList *list;
     bool image_opts = false;
     bool force_share = false;
 
@@ -2977,14 +3052,9 @@ static int img_info(int argc, char **argv)
         case OPTION_BACKING_CHAIN:
             chain = true;
             break;
-        case OPTION_OBJECT: {
-            QemuOpts *opts;
-            opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                           optarg, true);
-            if (!opts) {
-                return 1;
-            }
-        }   break;
+        case OPTION_OBJECT:
+            user_creatable_process_cmdline(optarg);
+            break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
@@ -3004,12 +3074,6 @@ static int img_info(int argc, char **argv)
         return 1;
     }
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        return 1;
-    }
-
     list = collect_image_info_list(image_opts, filename, fmt, chain,
                                    force_share);
     if (!list) {
@@ -3022,14 +3086,14 @@ static int img_info(int argc, char **argv)
         break;
     case OFORMAT_JSON:
         if (chain) {
-            dump_json_image_info_list(list);
+            dump_json_block_graph_info_list(list);
         } else {
-            dump_json_image_info(list->value);
+            dump_json_block_graph_info(list->value);
         }
         break;
     }
 
-    qapi_free_ImageInfoList(list);
+    qapi_free_BlockGraphInfoList(list);
     return 0;
 }
 
@@ -3046,7 +3110,7 @@ static int dump_map_entry(OutputFormat output_format, MapEntry *e,
             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
                    e->start, e->length,
                    e->has_offset ? e->offset : 0,
-                   e->has_filename ? e->filename : "");
+                   e->filename ?: "");
         }
         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
          * Modify the flags here to allow more coalescing.
@@ -3058,10 +3122,13 @@ static int dump_map_entry(OutputFormat output_format, MapEntry *e,
         break;
     case OFORMAT_JSON:
         printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
-               " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
+               " \"depth\": %"PRId64", \"present\": %s, \"zero\": %s,"
+               " \"data\": %s, \"compressed\": %s",
                e->start, e->length, e->depth,
+               e->present ? "true" : "false",
                e->zero ? "true" : "false",
-               e->data ? "true" : "false");
+               e->data ? "true" : "false",
+               e->compressed ? "true" : "false");
         if (e->has_offset) {
             printf(", \"offset\": %"PRId64"", e->offset);
         }
@@ -3085,6 +3152,9 @@ static int get_block_status(BlockDriverState *bs, int64_t offset,
     int64_t map;
     char *filename = NULL;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     /* As an optimization, we could cache the current range of unallocated
      * clusters in each file of the chain, and avoid querying the same
      * range repeatedly.
@@ -3122,10 +3192,11 @@ static int get_block_status(BlockDriverState *bs, int64_t offset,
         .length = bytes,
         .data = !!(ret & BDRV_BLOCK_DATA),
         .zero = !!(ret & BDRV_BLOCK_ZERO),
+        .compressed = !!(ret & BDRV_BLOCK_COMPRESSED),
         .offset = map,
         .has_offset = has_offset,
         .depth = depth,
-        .has_filename = filename,
+        .present = !!(ret & BDRV_BLOCK_ALLOCATED),
         .filename = filename,
     };
 
@@ -3139,12 +3210,14 @@ static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
     }
     if (curr->zero != next->zero ||
         curr->data != next->data ||
+        curr->compressed != next->compressed ||
         curr->depth != next->depth ||
-        curr->has_filename != next->has_filename ||
+        curr->present != next->present ||
+        !curr->filename != !next->filename ||
         curr->has_offset != next->has_offset) {
         return false;
     }
-    if (curr->has_filename && strcmp(curr->filename, next->filename)) {
+    if (curr->filename && strcmp(curr->filename, next->filename)) {
         return false;
     }
     if (curr->has_offset && curr->offset + curr->length != next->offset) {
@@ -3219,14 +3292,9 @@ static int img_map(int argc, char **argv)
                 return 1;
             }
             break;
-        case OPTION_OBJECT: {
-            QemuOpts *opts;
-            opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                           optarg, true);
-            if (!opts) {
-                return 1;
-            }
-        }   break;
+        case OPTION_OBJECT:
+            user_creatable_process_cmdline(optarg);
+            break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
@@ -3246,12 +3314,6 @@ static int img_map(int argc, char **argv)
         return 1;
     }
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        return 1;
-    }
-
     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
     if (!blk) {
         return 1;
@@ -3321,11 +3383,11 @@ static int img_snapshot(int argc, char **argv)
     char *filename, *snapshot_name = NULL;
     int c, ret = 0, bdrv_oflags;
     int action = 0;
-    qemu_timeval tv;
     bool quiet = false;
     Error *err = NULL;
     bool image_opts = false;
     bool force_share = false;
+    int64_t rt;
 
     bdrv_oflags = BDRV_O_RDWR;
     /* Parse commandline parameters */
@@ -3390,14 +3452,9 @@ static int img_snapshot(int argc, char **argv)
         case 'U':
             force_share = true;
             break;
-        case OPTION_OBJECT: {
-            QemuOpts *opts;
-            opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                           optarg, true);
-            if (!opts) {
-                return 1;
-            }
-        }   break;
+        case OPTION_OBJECT:
+            user_creatable_process_cmdline(optarg);
+            break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
@@ -3409,12 +3466,6 @@ static int img_snapshot(int argc, char **argv)
     }
     filename = argv[optind++];
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        return 1;
-    }
-
     /* Open the image */
     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
                    force_share);
@@ -3433,14 +3484,17 @@ static int img_snapshot(int argc, char **argv)
         memset(&sn, 0, sizeof(sn));
         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
 
-        qemu_gettimeofday(&tv);
-        sn.date_sec = tv.tv_sec;
-        sn.date_nsec = tv.tv_usec * 1000;
+        rt = g_get_real_time();
+        sn.date_sec = rt / G_USEC_PER_SEC;
+        sn.date_nsec = (rt % G_USEC_PER_SEC) * 1000;
 
+        bdrv_graph_rdlock_main_loop();
         ret = bdrv_snapshot_create(bs, &sn);
+        bdrv_graph_rdunlock_main_loop();
+
         if (ret) {
-            error_report("Could not create snapshot '%s': %d (%s)",
-                snapshot_name, ret, strerror(-ret));
+            error_report("Could not create snapshot '%s': %s",
+                snapshot_name, strerror(-ret));
         }
         break;
 
@@ -3453,6 +3507,7 @@ static int img_snapshot(int argc, char **argv)
         break;
 
     case SNAPSHOT_DELETE:
+        bdrv_graph_rdlock_main_loop();
         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
         if (ret < 0) {
             error_report("Could not delete snapshot '%s': snapshot not "
@@ -3466,6 +3521,7 @@ static int img_snapshot(int argc, char **argv)
                 ret = 1;
             }
         }
+        bdrv_graph_rdunlock_main_loop();
         break;
     }
 
@@ -3483,17 +3539,21 @@ static int img_rebase(int argc, char **argv)
     uint8_t *buf_old = NULL;
     uint8_t *buf_new = NULL;
     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
-    BlockDriverState *unfiltered_bs;
+    BlockDriverState *unfiltered_bs, *unfiltered_bs_cow;
+    BlockDriverInfo bdi = {0};
     char *filename;
     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
     int c, flags, src_flags, ret;
+    BdrvRequestFlags write_flags = 0;
     bool writethrough, src_writethrough;
     int unsafe = 0;
     bool force_share = false;
     int progress = 0;
     bool quiet = false;
+    bool compress = false;
     Error *local_err = NULL;
     bool image_opts = false;
+    int64_t write_align;
 
     /* Parse commandline parameters */
     fmt = NULL;
@@ -3507,9 +3567,10 @@ static int img_rebase(int argc, char **argv)
             {"object", required_argument, 0, OPTION_OBJECT},
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {"force-share", no_argument, 0, 'U'},
+            {"compress", no_argument, 0, 'c'},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
+        c = getopt_long(argc, argv, ":hf:F:b:upt:T:qUc",
                         long_options, NULL);
         if (c == -1) {
             break;
@@ -3548,20 +3609,18 @@ static int img_rebase(int argc, char **argv)
         case 'q':
             quiet = true;
             break;
-        case OPTION_OBJECT: {
-            QemuOpts *opts;
-            opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                           optarg, true);
-            if (!opts) {
-                return 1;
-            }
-        }   break;
+        case OPTION_OBJECT:
+            user_creatable_process_cmdline(optarg);
+            break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
         case 'U':
             force_share = true;
             break;
+        case 'c':
+            compress = true;
+            break;
         }
     }
 
@@ -3577,12 +3636,6 @@ static int img_rebase(int argc, char **argv)
     }
     filename = argv[optind++];
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        return 1;
-    }
-
     qemu_progress_init(progress, 2.0);
     qemu_progress_print(0, 100);
 
@@ -3618,7 +3671,18 @@ static int img_rebase(int argc, char **argv)
     }
     bs = blk_bs(blk);
 
+    bdrv_graph_rdlock_main_loop();
     unfiltered_bs = bdrv_skip_filters(bs);
+    unfiltered_bs_cow = bdrv_cow_bs(unfiltered_bs);
+    bdrv_graph_rdunlock_main_loop();
+
+    if (compress && !block_driver_can_compress(unfiltered_bs->drv)) {
+        error_report("Compression not supported for this file format");
+        ret = -1;
+        goto out;
+    } else if (compress) {
+        write_flags |= BDRV_REQ_WRITE_COMPRESSED;
+    }
 
     if (out_basefmt != NULL) {
         if (bdrv_find_format(out_basefmt) == NULL) {
@@ -3628,10 +3692,28 @@ static int img_rebase(int argc, char **argv)
         }
     }
 
+    /*
+     * We need overlay subcluster size (or cluster size in case writes are
+     * compressed) to make sure write requests are aligned.
+     */
+    ret = bdrv_get_info(unfiltered_bs, &bdi);
+    if (ret < 0) {
+        error_report("could not get block driver info");
+        goto out;
+    } else if (bdi.subcluster_size == 0) {
+        bdi.cluster_size = bdi.subcluster_size = 1;
+    }
+
+    write_align = compress ? bdi.cluster_size : bdi.subcluster_size;
+
     /* For safe rebasing we need to compare old and new backing file */
     if (!unsafe) {
         QDict *options = NULL;
-        BlockDriverState *base_bs = bdrv_cow_bs(unfiltered_bs);
+        BlockDriverState *base_bs;
+
+        bdrv_graph_rdlock_main_loop();
+        base_bs = bdrv_cow_bs(unfiltered_bs);
+        bdrv_graph_rdunlock_main_loop();
 
         if (base_bs) {
             blk_old_backing = blk_new(qemu_get_aio_context(),
@@ -3661,7 +3743,9 @@ static int img_rebase(int argc, char **argv)
                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
             }
 
+            bdrv_graph_rdlock_main_loop();
             bdrv_refresh_filename(bs);
+            bdrv_graph_rdunlock_main_loop();
             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
                                                      : bs->filename;
             out_real_path =
@@ -3725,11 +3809,16 @@ static int img_rebase(int argc, char **argv)
         int64_t old_backing_size = 0;
         int64_t new_backing_size = 0;
         uint64_t offset;
-        int64_t n;
+        int64_t n, n_old = 0, n_new = 0;
         float local_progress = 0;
 
-        buf_old = blk_blockalign(blk, IO_BUF_SIZE);
-        buf_new = blk_blockalign(blk, IO_BUF_SIZE);
+        if (blk_old_backing && bdrv_opt_mem_align(blk_bs(blk_old_backing)) >
+            bdrv_opt_mem_align(blk_bs(blk))) {
+            buf_old = blk_blockalign(blk_old_backing, IO_BUF_SIZE);
+        } else {
+            buf_old = blk_blockalign(blk, IO_BUF_SIZE);
+        }
+        buf_new = blk_blockalign(blk_new_backing, IO_BUF_SIZE);
 
         size = blk_getlength(blk);
         if (size < 0) {
@@ -3766,7 +3855,8 @@ static int img_rebase(int argc, char **argv)
         }
 
         for (offset = 0; offset < size; offset += n) {
-            bool buf_old_is_zero = false;
+            bool old_backing_eof = false;
+            int64_t n_alloc;
 
             /* How many bytes can we handle with the next read? */
             n = MIN(IO_BUF_SIZE, size - offset);
@@ -3783,11 +3873,13 @@ static int img_rebase(int argc, char **argv)
             }
 
             if (prefix_chain_bs) {
+                uint64_t bytes = n;
+
                 /*
                  * If cluster wasn't changed since prefix_chain, we don't need
                  * to take action
                  */
-                ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
+                ret = bdrv_is_allocated_above(unfiltered_bs_cow,
                                               prefix_chain_bs, false,
                                               offset, n, &n);
                 if (ret < 0) {
@@ -3795,38 +3887,60 @@ static int img_rebase(int argc, char **argv)
                                  strerror(-ret));
                     goto out;
                 }
-                if (!ret) {
+                if (!ret && n) {
                     continue;
                 }
+                if (!n) {
+                    /*
+                     * If we've reached EOF of the old backing, it means that
+                     * offsets beyond the old backing size were read as zeroes.
+                     * Now we will need to explicitly zero the cluster in
+                     * order to preserve that state after the rebase.
+                     */
+                    n = bytes;
+                }
             }
 
+            /*
+             * At this point we know that the region [offset; offset + n)
+             * is unallocated within the target image.  This region might be
+             * unaligned to the target image's (sub)cluster boundaries, as
+             * old backing may have smaller clusters (or have subclusters).
+             * We extend it to the aligned boundaries to avoid CoW on
+             * partial writes in blk_pwrite(),
+             */
+            n += offset - QEMU_ALIGN_DOWN(offset, write_align);
+            offset = QEMU_ALIGN_DOWN(offset, write_align);
+            n += QEMU_ALIGN_UP(offset + n, write_align) - (offset + n);
+            n = MIN(n, size - offset);
+            assert(!bdrv_is_allocated(unfiltered_bs, offset, n, &n_alloc) &&
+                   n_alloc == n);
+
+            /*
+             * Much like with the target image, we'll try to read as much
+             * of the old and new backings as we can.
+             */
+            n_old = MIN(n, MAX(0, old_backing_size - (int64_t) offset));
+            n_new = MIN(n, MAX(0, new_backing_size - (int64_t) offset));
+
             /*
              * Read old and new backing file and take into consideration that
              * backing files may be smaller than the COW image.
              */
-            if (offset >= old_backing_size) {
-                memset(buf_old, 0, n);
-                buf_old_is_zero = true;
+            memset(buf_old + n_old, 0, n - n_old);
+            if (!n_old) {
+                old_backing_eof = true;
             } else {
-                if (offset + n > old_backing_size) {
-                    n = old_backing_size - offset;
-                }
-
-                ret = blk_pread(blk_old_backing, offset, buf_old, n);
+                ret = blk_pread(blk_old_backing, offset, n_old, buf_old, 0);
                 if (ret < 0) {
                     error_report("error while reading from old backing file");
                     goto out;
                 }
             }
 
-            if (offset >= new_backing_size || !blk_new_backing) {
-                memset(buf_new, 0, n);
-            } else {
-                if (offset + n > new_backing_size) {
-                    n = new_backing_size - offset;
-                }
-
-                ret = blk_pread(blk_new_backing, offset, buf_new, n);
+            memset(buf_new + n_new, 0, n - n_new);
+            if (n_new) {
+                ret = blk_pread(blk_new_backing, offset, n_new, buf_new, 0);
                 if (ret < 0) {
                     error_report("error while reading from new backing file");
                     goto out;
@@ -3840,13 +3954,14 @@ static int img_rebase(int argc, char **argv)
                 int64_t pnum;
 
                 if (compare_buffers(buf_old + written, buf_new + written,
-                                    n - written, &pnum))
+                                    n - written, write_align, &pnum))
                 {
-                    if (buf_old_is_zero) {
+                    if (old_backing_eof) {
                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
                     } else {
-                        ret = blk_pwrite(blk, offset + written,
-                                         buf_old + written, pnum, 0);
+                        assert(written + pnum <= IO_BUF_SIZE);
+                        ret = blk_pwrite(blk, offset + written, pnum,
+                                         buf_old + written, write_flags);
                     }
                     if (ret < 0) {
                         error_report("Error while writing to COW image: %s",
@@ -3856,6 +3971,9 @@ static int img_rebase(int argc, char **argv)
                 }
 
                 written += pnum;
+                if (offset + written >= old_backing_size) {
+                    old_backing_eof = true;
+                }
             }
             qemu_progress_print(local_progress, 100);
         }
@@ -3876,6 +3994,9 @@ static int img_rebase(int argc, char **argv)
     if (ret == -ENOSPC) {
         error_report("Could not change the backing file to '%s': No "
                      "space left in the file header", out_baseimg);
+    } else if (ret == -EINVAL && out_baseimg && !out_basefmt) {
+        error_report("Could not change the backing file to '%s': backing "
+                     "format must be specified", out_baseimg);
     } else if (ret < 0) {
         error_report("Could not change the backing file to '%s': %s",
             out_baseimg, strerror(-ret));
@@ -3973,14 +4094,9 @@ static int img_resize(int argc, char **argv)
         case 'q':
             quiet = true;
             break;
-        case OPTION_OBJECT: {
-            QemuOpts *opts;
-            opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                           optarg, true);
-            if (!opts) {
-                return 1;
-            }
-        }   break;
+        case OPTION_OBJECT:
+            user_creatable_process_cmdline(optarg);
+            break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
             break;
@@ -4002,12 +4118,6 @@ static int img_resize(int argc, char **argv)
     }
     filename = argv[optind++];
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        return 1;
-    }
-
     /* Choose grow, shrink, or absolute resize mode */
     switch (size[0]) {
     case '+':
@@ -4106,6 +4216,8 @@ static int print_amend_option_help(const char *format)
 {
     BlockDriver *drv;
 
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     /* Find driver and parse its options */
     drv = bdrv_find_format(format);
     if (!drv) {
@@ -4187,12 +4299,7 @@ static int img_amend(int argc, char **argv)
             quiet = true;
             break;
         case OPTION_OBJECT:
-            opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                           optarg, true);
-            if (!opts) {
-                ret = -1;
-                goto out_no_progress;
-            }
+            user_creatable_process_cmdline(optarg);
             break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
@@ -4207,13 +4314,6 @@ static int img_amend(int argc, char **argv)
         error_exit("Must specify options (-o)");
     }
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        ret = -1;
-        goto out_no_progress;
-    }
-
     if (quiet) {
         progress = false;
     }
@@ -4256,9 +4356,11 @@ static int img_amend(int argc, char **argv)
         goto out;
     }
 
+    bdrv_graph_rdlock_main_loop();
     if (!bs->drv->bdrv_amend_options) {
         error_report("Format driver '%s' does not support option amendment",
                      fmt);
+        bdrv_graph_rdunlock_main_loop();
         ret = -1;
         goto out;
     }
@@ -4278,6 +4380,7 @@ static int img_amend(int argc, char **argv)
                               "This option is only supported for image creation\n");
         }
 
+        bdrv_graph_rdunlock_main_loop();
         error_report_err(err);
         ret = -1;
         goto out;
@@ -4287,6 +4390,8 @@ static int img_amend(int argc, char **argv)
     qemu_progress_print(0.f, 0);
     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err);
     qemu_progress_print(100.f, 0);
+    bdrv_graph_rdunlock_main_loop();
+
     if (ret < 0) {
         error_report_err(err);
         goto out;
@@ -4421,7 +4526,7 @@ static int img_bench(int argc, char **argv)
     struct timeval t1, t2;
     int i;
     bool force_share = false;
-    size_t buf_size;
+    size_t buf_size = 0;
 
     for (;;) {
         static const struct option long_options[] = {
@@ -4620,7 +4725,7 @@ static int img_bench(int argc, char **argv)
     data.buf = blk_blockalign(blk, buf_size);
     memset(data.buf, pattern, data.nrreq * data.bufsize);
 
-    blk_register_buf(blk, data.buf, buf_size);
+    blk_register_buf(blk, data.buf, buf_size, &error_fatal);
 
     data.qiov = g_new(QEMUIOVector, data.nrreq);
     for (i = 0; i < data.nrreq; i++) {
@@ -4643,7 +4748,7 @@ static int img_bench(int argc, char **argv)
 
 out:
     if (data.buf) {
-        blk_unregister_buf(blk, data.buf);
+        blk_unregister_buf(blk, data.buf, buf_size);
     }
     qemu_vfree(data.buf);
     blk_unref(blk);
@@ -4683,6 +4788,7 @@ static int img_bitmap(int argc, char **argv)
     QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
     ImgBitmapAction *act, *act_next;
     const char *op;
+    int inactivate_ret;
 
     QSIMPLEQ_INIT(&actions);
 
@@ -4766,10 +4872,7 @@ static int img_bitmap(int argc, char **argv)
             merge = true;
             break;
         case OPTION_OBJECT:
-            opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true);
-            if (!opts) {
-                goto out;
-            }
+            user_creatable_process_cmdline(optarg);
             break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
@@ -4777,12 +4880,6 @@ static int img_bitmap(int argc, char **argv)
         }
     }
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        goto out;
-    }
-
     if (QSIMPLEQ_EMPTY(&actions)) {
         error_report("Need at least one of --add, --remove, --clear, "
                      "--enable, --disable, or --merge");
@@ -4876,6 +4973,16 @@ static int img_bitmap(int argc, char **argv)
     ret = 0;
 
  out:
+    /*
+     * Manually inactivate the images first because this way we can know whether
+     * an error occurred. blk_unref() doesn't tell us about failures.
+     */
+    inactivate_ret = bdrv_inactivate_all();
+    if (inactivate_ret < 0) {
+        error_report("Error while closing the image: %s", strerror(-inactivate_ret));
+        ret = 1;
+    }
+
     blk_unref(src);
     blk_unref(blk);
     qemu_opts_del(opts);
@@ -4981,7 +5088,7 @@ static int img_dd(int argc, char **argv)
     const char *out_fmt = "raw";
     const char *fmt = NULL;
     int64_t size = 0;
-    int64_t block_count = 0, out_pos, in_pos;
+    int64_t out_pos, in_pos;
     bool force_share = false;
     struct DdInfo dd = {
         .flags = 0,
@@ -5040,10 +5147,7 @@ static int img_dd(int argc, char **argv)
             force_share = true;
             break;
         case OPTION_OBJECT:
-            if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) {
-                ret = -1;
-                goto out;
-            }
+            user_creatable_process_cmdline(optarg);
             break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
@@ -5090,13 +5194,6 @@ static int img_dd(int argc, char **argv)
         goto out;
     }
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        ret = -1;
-        goto out;
-    }
-
     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
                     force_share);
 
@@ -5191,31 +5288,24 @@ static int img_dd(int argc, char **argv)
 
     in.buf = g_new(uint8_t, in.bsz);
 
-    for (out_pos = 0; in_pos < size; block_count++) {
-        int in_ret, out_ret;
+    for (out_pos = 0; in_pos < size; ) {
+        int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
 
-        if (in_pos + in.bsz > size) {
-            in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
-        } else {
-            in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
-        }
-        if (in_ret < 0) {
+        ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
+        if (ret < 0) {
             error_report("error while reading from input image file: %s",
-                         strerror(-in_ret));
-            ret = -1;
+                         strerror(-ret));
             goto out;
         }
-        in_pos += in_ret;
-
-        out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+        in_pos += bytes;
 
-        if (out_ret < 0) {
+        ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
+        if (ret < 0) {
             error_report("error while writing to output image file: %s",
-                         strerror(-out_ret));
-            ret = -1;
+                         strerror(-ret));
             goto out;
         }
-        out_pos += out_ret;
+        out_pos += bytes;
     }
 
 out:
@@ -5237,18 +5327,18 @@ out:
 
 static void dump_json_block_measure_info(BlockMeasureInfo *info)
 {
-    QString *str;
+    GString *str;
     QObject *obj;
     Visitor *v = qobject_output_visitor_new(&obj);
 
     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
     visit_complete(v, &obj);
-    str = qobject_to_json_pretty(obj);
+    str = qobject_to_json_pretty(obj, true);
     assert(str != NULL);
-    printf("%s\n", qstring_get_str(str));
+    printf("%s\n", str->str);
     qobject_unref(obj);
     visit_free(v);
-    qobject_unref(str);
+    g_string_free(str, true);
 }
 
 static int img_measure(int argc, char **argv)
@@ -5317,11 +5407,7 @@ static int img_measure(int argc, char **argv)
             force_share = true;
             break;
         case OPTION_OBJECT:
-            object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                                  optarg, true);
-            if (!object_opts) {
-                goto out;
-            }
+            user_creatable_process_cmdline(optarg);
             break;
         case OPTION_IMAGE_OPTS:
             image_opts = true;
@@ -5351,12 +5437,6 @@ static int img_measure(int argc, char **argv)
         }
     }
 
-    if (qemu_opts_foreach(&qemu_object_opts,
-                          user_creatable_add_opts_foreach,
-                          qemu_img_object_print_help, &error_fatal)) {
-        goto out;
-    }
-
     if (argc - optind > 1) {
         error_report("At most one filename argument is allowed.");
         goto out;
@@ -5465,7 +5545,6 @@ int main(int argc, char **argv)
 {
     const img_cmd_t *cmd;
     const char *cmdname;
-    Error *local_error = NULL;
     int c;
     static const struct option long_options[] = {
         {"help", no_argument, 0, 'h'},
@@ -5483,10 +5562,7 @@ int main(int argc, char **argv)
     module_call_init(MODULE_INIT_TRACE);
     qemu_init_exec_dir(argv[0]);
 
-    if (qemu_init_main_loop(&local_error)) {
-        error_report_err(local_error);
-        exit(EXIT_FAILURE);
-    }
+    qemu_init_main_loop(&error_fatal);
 
     qcrypto_init(&error_fatal);
 
@@ -5496,7 +5572,6 @@ int main(int argc, char **argv)
         error_exit("Not enough arguments");
     }
 
-    qemu_add_opts(&qemu_object_opts);
     qemu_add_opts(&qemu_source_opts);
     qemu_add_opts(&qemu_trace_opts);
 
@@ -5534,7 +5609,7 @@ int main(int argc, char **argv)
         exit(1);
     }
     trace_init_file();
-    qemu_set_log(LOG_TRACE);
+    qemu_set_log(LOG_TRACE, &error_fatal);
 
     /* find the command */
     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
index 4153f1c0b02605cdc4af9cff54009b9e48307d92..f5d7202a131b0bada4cb411d1a82b761611a7278 100644 (file)
@@ -21,6 +21,7 @@
 #include "qemu/option.h"
 #include "qemu/timer.h"
 #include "qemu/cutils.h"
+#include "qemu/memalign.h"
 
 #define CMD_NOFILE_OK   0x01
 
@@ -92,9 +93,19 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc,
         return -EINVAL;
     }
 
-    /* Request additional permissions if necessary for this command. The caller
+    /*
+     * Request additional permissions if necessary for this command. The caller
      * is responsible for restoring the original permissions afterwards if this
-     * is what it wants. */
+     * is what it wants.
+     *
+     * Coverity thinks that blk may be NULL in the following if condition. It's
+     * not so: in init_check_command() we fail if blk is NULL for command with
+     * both CMD_FLAG_GLOBAL and CMD_NOFILE_OK flags unset. And in
+     * qemuio_add_command() we assert that command with non-zero .perm field
+     * doesn't set this flags. So, the following assertion is to silence
+     * Coverity:
+     */
+    assert(blk || !ct->perm);
     if (ct->perm && blk_is_available(blk)) {
         uint64_t orig_perm, orig_shared_perm;
         blk_get_perm(blk, &orig_perm, &orig_shared_perm);
@@ -327,7 +338,8 @@ static int parse_pattern(const char *arg)
  */
 
 #define MISALIGN_OFFSET     16
-static void *qemu_io_alloc(BlockBackend *blk, size_t len, int pattern)
+static void *qemu_io_alloc(BlockBackend *blk, size_t len, int pattern,
+                           bool register_buf)
 {
     void *buf;
 
@@ -336,16 +348,24 @@ static void *qemu_io_alloc(BlockBackend *blk, size_t len, int pattern)
     }
     buf = blk_blockalign(blk, len);
     memset(buf, pattern, len);
+    if (register_buf) {
+        blk_register_buf(blk, buf, len, &error_abort);
+    }
     if (qemuio_misalign) {
         buf += MISALIGN_OFFSET;
     }
     return buf;
 }
 
-static void qemu_io_free(void *p)
+static void qemu_io_free(BlockBackend *blk, void *p, size_t len,
+                         bool unregister_buf)
 {
     if (qemuio_misalign) {
         p -= MISALIGN_OFFSET;
+        len += MISALIGN_OFFSET;
+    }
+    if (unregister_buf) {
+        blk_unregister_buf(blk, p, len);
     }
     qemu_vfree(p);
 }
@@ -360,14 +380,16 @@ static void qemu_io_free(void *p)
  * @blk - the block backend where the buffer content is going to be written to
  * @len - the buffer length
  * @file_name - the file to read the content from
+ * @register_buf - call blk_register_buf()
  *
  * Returns: the buffer pointer on success
  *          NULL on error
  */
 static void *qemu_io_alloc_from_file(BlockBackend *blk, size_t len,
-                                     const char *file_name)
+                                     const char *file_name, bool register_buf)
 {
-    char *buf, *buf_origin;
+    size_t alloc_len = len + (qemuio_misalign ? MISALIGN_OFFSET : 0);
+    char *alloc_buf, *buf, *end;
     FILE *f = fopen(file_name, "r");
     int pattern_len;
 
@@ -376,19 +398,13 @@ static void *qemu_io_alloc_from_file(BlockBackend *blk, size_t len,
         return NULL;
     }
 
-    if (qemuio_misalign) {
-        len += MISALIGN_OFFSET;
-    }
-
-    buf_origin = buf = blk_blockalign(blk, len);
+    alloc_buf = buf = blk_blockalign(blk, alloc_len);
 
     if (qemuio_misalign) {
-        buf_origin += MISALIGN_OFFSET;
         buf += MISALIGN_OFFSET;
-        len -= MISALIGN_OFFSET;
     }
 
-    pattern_len = fread(buf_origin, 1, len, f);
+    pattern_len = fread(buf, 1, len, f);
 
     if (ferror(f)) {
         perror(file_name);
@@ -403,24 +419,23 @@ static void *qemu_io_alloc_from_file(BlockBackend *blk, size_t len,
     fclose(f);
     f = NULL;
 
-    if (len > pattern_len) {
-        len -= pattern_len;
-        buf += pattern_len;
-
-        while (len > 0) {
-            size_t len_to_copy = MIN(pattern_len, len);
-
-            memcpy(buf, buf_origin, len_to_copy);
+    if (register_buf) {
+        blk_register_buf(blk, alloc_buf, alloc_len, &error_abort);
+    }
 
-            len -= len_to_copy;
-            buf += len_to_copy;
-        }
+    end = buf + len;
+    for (char *p = buf + pattern_len; p < end; p += pattern_len) {
+        memcpy(p, buf, MIN(pattern_len, end - p));
     }
 
-    return buf_origin;
+    return buf;
 
 error:
-    qemu_io_free(buf_origin);
+    /*
+     * This code path is only taken before blk_register_buf() is called, so
+     * hardcode the qemu_io_free() unregister_buf argument to false.
+     */
+    qemu_io_free(blk, alloc_buf, alloc_len, false);
     if (f) {
         fclose(f);
     }
@@ -479,7 +494,7 @@ static void print_report(const char *op, struct timespec *t, int64_t offset,
  */
 static void *
 create_iovec(BlockBackend *blk, QEMUIOVector *qiov, char **argv, int nr_iov,
-             int pattern)
+             int pattern, bool register_buf)
 {
     size_t *sizes = g_new0(size_t, nr_iov);
     size_t count = 0;
@@ -515,7 +530,7 @@ create_iovec(BlockBackend *blk, QEMUIOVector *qiov, char **argv, int nr_iov,
 
     qemu_iovec_init(qiov, nr_iov);
 
-    buf = p = qemu_io_alloc(blk, count, pattern);
+    buf = p = qemu_io_alloc(blk, count, pattern, register_buf);
 
     for (i = 0; i < nr_iov; i++) {
         qemu_iovec_add(qiov, p, sizes[i]);
@@ -528,85 +543,51 @@ fail:
 }
 
 static int do_pread(BlockBackend *blk, char *buf, int64_t offset,
-                    int64_t bytes, int64_t *total)
+                    int64_t bytes, BdrvRequestFlags flags, int64_t *total)
 {
+    int ret;
+
     if (bytes > INT_MAX) {
         return -ERANGE;
     }
 
-    *total = blk_pread(blk, offset, (uint8_t *)buf, bytes);
-    if (*total < 0) {
-        return *total;
+    ret = blk_pread(blk, offset, bytes, (uint8_t *)buf, flags);
+    if (ret < 0) {
+        return ret;
     }
+    *total = bytes;
     return 1;
 }
 
 static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset,
-                     int64_t bytes, int flags, int64_t *total)
+                     int64_t bytes, BdrvRequestFlags flags, int64_t *total)
 {
+    int ret;
+
     if (bytes > INT_MAX) {
         return -ERANGE;
     }
 
-    *total = blk_pwrite(blk, offset, (uint8_t *)buf, bytes, flags);
-    if (*total < 0) {
-        return *total;
+    ret = blk_pwrite(blk, offset, bytes, (uint8_t *)buf, flags);
+    if (ret < 0) {
+        return ret;
     }
+    *total = bytes;
     return 1;
 }
 
-typedef struct {
-    BlockBackend *blk;
-    int64_t offset;
-    int64_t bytes;
-    int64_t *total;
-    int flags;
-    int ret;
-    bool done;
-} CoWriteZeroes;
-
-static void coroutine_fn co_pwrite_zeroes_entry(void *opaque)
+static int do_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                               int64_t bytes, BdrvRequestFlags flags,
+                               int64_t *total)
 {
-    CoWriteZeroes *data = opaque;
-
-    data->ret = blk_co_pwrite_zeroes(data->blk, data->offset, data->bytes,
-                                     data->flags);
-    data->done = true;
-    if (data->ret < 0) {
-        *data->total = data->ret;
-        return;
-    }
-
-    *data->total = data->bytes;
-}
+    int ret = blk_pwrite_zeroes(blk, offset, bytes,
+                                flags | BDRV_REQ_ZERO_WRITE);
 
-static int do_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                               int64_t bytes, int flags, int64_t *total)
-{
-    Coroutine *co;
-    CoWriteZeroes data = {
-        .blk    = blk,
-        .offset = offset,
-        .bytes  = bytes,
-        .total  = total,
-        .flags  = flags,
-        .done   = false,
-    };
-
-    if (bytes > INT_MAX) {
-        return -ERANGE;
-    }
-
-    co = qemu_coroutine_create(co_pwrite_zeroes_entry, &data);
-    bdrv_coroutine_enter(blk_bs(blk), co);
-    while (!data.done) {
-        aio_poll(blk_get_aio_context(blk), true);
-    }
-    if (data.ret < 0) {
-        return data.ret;
-    } else {
-        return 1;
+    if (ret < 0) {
+        return ret;
     }
+    *total = bytes;
+    return 1;
 }
 
 static int do_write_compressed(BlockBackend *blk, char *buf, int64_t offset,
@@ -618,7 +599,7 @@ static int do_write_compressed(BlockBackend *blk, char *buf, int64_t offset,
         return -ERANGE;
     }
 
-    ret = blk_pwrite_compressed(blk, offset, buf, bytes);
+    ret = blk_pwrite_compressed(blk, offset, bytes, buf);
     if (ret < 0) {
         return ret;
     }
@@ -661,11 +642,11 @@ static void aio_rw_done(void *opaque, int ret)
 }
 
 static int do_aio_readv(BlockBackend *blk, QEMUIOVector *qiov,
-                        int64_t offset, int *total)
+                        int64_t offset, BdrvRequestFlags flags, int *total)
 {
     int async_ret = NOT_DONE;
 
-    blk_aio_preadv(blk, offset, qiov, 0, aio_rw_done, &async_ret);
+    blk_aio_preadv(blk, offset, qiov, flags, aio_rw_done, &async_ret);
     while (async_ret == NOT_DONE) {
         main_loop_wait(false);
     }
@@ -675,7 +656,7 @@ static int do_aio_readv(BlockBackend *blk, QEMUIOVector *qiov,
 }
 
 static int do_aio_writev(BlockBackend *blk, QEMUIOVector *qiov,
-                         int64_t offset, int flags, int *total)
+                         int64_t offset, BdrvRequestFlags flags, int *total)
 {
     int async_ret = NOT_DONE;
 
@@ -705,6 +686,7 @@ static void read_help(void)
 " -p, -- ignored for backwards compatibility\n"
 " -P, -- use a pattern to verify read data\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
 " -s, -- start offset for pattern verification (only with -P)\n"
 " -v, -- dump buffer to standard output\n"
 "\n");
@@ -718,7 +700,7 @@ static const cmdinfo_t read_cmd = {
     .cfunc      = read_f,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-abCqv] [-P pattern [-s off] [-l len]] off len",
+    .args       = "[-abCqrv] [-P pattern [-s off] [-l len]] off len",
     .oneline    = "reads a number of bytes at a specified offset",
     .help       = read_help,
 };
@@ -736,8 +718,9 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
     int64_t total = 0;
     int pattern = 0;
     int64_t pattern_offset = 0, pattern_count = 0;
+    BdrvRequestFlags flags = 0;
 
-    while ((c = getopt(argc, argv, "bCl:pP:qs:v")) != -1) {
+    while ((c = getopt(argc, argv, "bCl:pP:qrs:v")) != -1) {
         switch (c) {
         case 'b':
             bflag = true;
@@ -766,6 +749,9 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 's':
             sflag = true;
             pattern_offset = cvtnum(optarg);
@@ -830,15 +816,20 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
                    count);
             return -EINVAL;
         }
+        if (flags & BDRV_REQ_REGISTERED_BUF) {
+            printf("I/O buffer registration is not supported when reading "
+                    "from vmstate\n");
+            return -EINVAL;
+        }
     }
 
-    buf = qemu_io_alloc(blk, count, 0xab);
+    buf = qemu_io_alloc(blk, count, 0xab, flags & BDRV_REQ_REGISTERED_BUF);
 
     clock_gettime(CLOCK_MONOTONIC, &t1);
     if (bflag) {
         ret = do_load_vmstate(blk, buf, offset, count, &total);
     } else {
-        ret = do_pread(blk, buf, offset, count, &total);
+        ret = do_pread(blk, buf, offset, count, flags, &total);
     }
     clock_gettime(CLOCK_MONOTONIC, &t2);
 
@@ -875,7 +866,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
     print_report("read", &t2, offset, count, total, cnt, Cflag);
 
 out:
-    qemu_io_free(buf);
+    qemu_io_free(blk, buf, count, flags & BDRV_REQ_REGISTERED_BUF);
     return ret;
 }
 
@@ -893,8 +884,9 @@ static void readv_help(void)
 " Uses multiple iovec buffers if more than one byte range is specified.\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -P, -- use a pattern to verify read data\n"
-" -v, -- dump buffer to standard output\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
+" -v, -- dump buffer to standard output\n"
 "\n");
 }
 
@@ -905,7 +897,7 @@ static const cmdinfo_t readv_cmd = {
     .cfunc      = readv_f,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Cqv] [-P pattern] off len [len..]",
+    .args       = "[-Cqrv] [-P pattern] off len [len..]",
     .oneline    = "reads a number of bytes at a specified offset",
     .help       = readv_help,
 };
@@ -923,8 +915,9 @@ static int readv_f(BlockBackend *blk, int argc, char **argv)
     QEMUIOVector qiov;
     int pattern = 0;
     bool Pflag = false;
+    BdrvRequestFlags flags = 0;
 
-    while ((c = getopt(argc, argv, "CP:qv")) != -1) {
+    while ((c = getopt(argc, argv, "CP:qrv")) != -1) {
         switch (c) {
         case 'C':
             Cflag = true;
@@ -939,6 +932,9 @@ static int readv_f(BlockBackend *blk, int argc, char **argv)
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'v':
             vflag = true;
             break;
@@ -962,13 +958,14 @@ static int readv_f(BlockBackend *blk, int argc, char **argv)
     optind++;
 
     nr_iov = argc - optind;
-    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, 0xab);
+    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, 0xab,
+                       flags & BDRV_REQ_REGISTERED_BUF);
     if (buf == NULL) {
         return -EINVAL;
     }
 
     clock_gettime(CLOCK_MONOTONIC, &t1);
-    ret = do_aio_readv(blk, &qiov, offset, &total);
+    ret = do_aio_readv(blk, &qiov, offset, flags, &total);
     clock_gettime(CLOCK_MONOTONIC, &t2);
 
     if (ret < 0) {
@@ -1003,8 +1000,8 @@ static int readv_f(BlockBackend *blk, int argc, char **argv)
     print_report("read", &t2, offset, qiov.size, total, cnt, Cflag);
 
 out:
+    qemu_io_free(blk, buf, qiov.size, flags & BDRV_REQ_REGISTERED_BUF);
     qemu_iovec_destroy(&qiov);
-    qemu_io_free(buf);
     return ret;
 }
 
@@ -1021,15 +1018,16 @@ static void write_help(void)
 " filled with a set pattern (0xcdcdcdcd).\n"
 " -b, -- write to the VM state rather than the virtual disk\n"
 " -c, -- write compressed data with blk_write_compressed\n"
+" -C, -- report statistics in a machine parsable format\n"
 " -f, -- use Force Unit Access semantics\n"
 " -n, -- with -z, don't allow slow fallback\n"
 " -p, -- ignored for backwards compatibility\n"
 " -P, -- use different pattern to fill file\n"
-" -s, -- use a pattern file to fill the write buffer\n"
-" -C, -- report statistics in a machine parsable format\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
+" -s, -- use a pattern file to fill the write buffer\n"
 " -u, -- with -z, allow unmapping\n"
-" -z, -- write zeroes using blk_co_pwrite_zeroes\n"
+" -z, -- write zeroes using blk_pwrite_zeroes\n"
 "\n");
 }
 
@@ -1042,7 +1040,7 @@ static const cmdinfo_t write_cmd = {
     .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-bcCfnquz] [-P pattern | -s source_file] off len",
+    .args       = "[-bcCfnqruz] [-P pattern | -s source_file] off len",
     .oneline    = "writes a number of bytes at a specified offset",
     .help       = write_help,
 };
@@ -1052,7 +1050,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
     struct timespec t1, t2;
     bool Cflag = false, qflag = false, bflag = false;
     bool Pflag = false, zflag = false, cflag = false, sflag = false;
-    int flags = 0;
+    BdrvRequestFlags flags = 0;
     int c, cnt, ret;
     char *buf = NULL;
     int64_t offset;
@@ -1062,7 +1060,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
     int pattern = 0xcd;
     const char *file_name = NULL;
 
-    while ((c = getopt(argc, argv, "bcCfnpP:qs:uz")) != -1) {
+    while ((c = getopt(argc, argv, "bcCfnpP:qrs:uz")) != -1) {
         switch (c) {
         case 'b':
             bflag = true;
@@ -1092,6 +1090,9 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 's':
             sflag = true;
             file_name = optarg;
@@ -1150,8 +1151,9 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
     if (count < 0) {
         print_cvtnum_err(count, argv[optind]);
         return count;
-    } else if (count > BDRV_REQUEST_MAX_BYTES) {
-        printf("length cannot exceed %" PRIu64 ", given %s\n",
+    } else if (count > BDRV_REQUEST_MAX_BYTES &&
+               !(flags & BDRV_REQ_NO_FALLBACK)) {
+        printf("length cannot exceed %" PRIu64 " without -n, given %s\n",
                (uint64_t)BDRV_REQUEST_MAX_BYTES, argv[optind]);
         return -EINVAL;
     }
@@ -1170,14 +1172,21 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
         }
     }
 
-    if (!zflag) {
+    if (zflag) {
+        if (flags & BDRV_REQ_REGISTERED_BUF) {
+            printf("cannot combine zero write with registered I/O buffer\n");
+            return -EINVAL;
+        }
+    } else {
         if (sflag) {
-            buf = qemu_io_alloc_from_file(blk, count, file_name);
+            buf = qemu_io_alloc_from_file(blk, count, file_name,
+                                          flags & BDRV_REQ_REGISTERED_BUF);
             if (!buf) {
                 return -EINVAL;
             }
         } else {
-            buf = qemu_io_alloc(blk, count, pattern);
+            buf = qemu_io_alloc(blk, count, pattern,
+                                flags & BDRV_REQ_REGISTERED_BUF);
         }
     }
 
@@ -1185,7 +1194,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
     if (bflag) {
         ret = do_save_vmstate(blk, buf, offset, count, &total);
     } else if (zflag) {
-        ret = do_co_pwrite_zeroes(blk, offset, count, flags, &total);
+        ret = do_pwrite_zeroes(blk, offset, count, flags, &total);
     } else if (cflag) {
         ret = do_write_compressed(blk, buf, offset, count, &total);
     } else {
@@ -1211,7 +1220,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
 
 out:
     if (!zflag) {
-        qemu_io_free(buf);
+        qemu_io_free(blk, buf, count, flags & BDRV_REQ_REGISTERED_BUF);
     }
     return ret;
 }
@@ -1228,10 +1237,11 @@ writev_help(void)
 "\n"
 " Writes into a segment of the currently open file, using a buffer\n"
 " filled with a set pattern (0xcdcdcdcd).\n"
-" -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -f, -- use Force Unit Access semantics\n"
+" -P, -- use different pattern to fill file\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
 "\n");
 }
 
@@ -1243,7 +1253,7 @@ static const cmdinfo_t writev_cmd = {
     .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Cfq] [-P pattern] off len [len..]",
+    .args       = "[-Cfqr] [-P pattern] off len [len..]",
     .oneline    = "writes a number of bytes at a specified offset",
     .help       = writev_help,
 };
@@ -1252,7 +1262,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv)
 {
     struct timespec t1, t2;
     bool Cflag = false, qflag = false;
-    int flags = 0;
+    BdrvRequestFlags flags = 0;
     int c, cnt, ret;
     char *buf;
     int64_t offset;
@@ -1262,7 +1272,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv)
     int pattern = 0xcd;
     QEMUIOVector qiov;
 
-    while ((c = getopt(argc, argv, "CfqP:")) != -1) {
+    while ((c = getopt(argc, argv, "CfP:qr")) != -1) {
         switch (c) {
         case 'C':
             Cflag = true;
@@ -1273,6 +1283,9 @@ static int writev_f(BlockBackend *blk, int argc, char **argv)
         case 'q':
             qflag = true;
             break;
+        case 'r':
+            flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'P':
             pattern = parse_pattern(optarg);
             if (pattern < 0) {
@@ -1298,7 +1311,8 @@ static int writev_f(BlockBackend *blk, int argc, char **argv)
     optind++;
 
     nr_iov = argc - optind;
-    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, pattern);
+    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, pattern,
+                       flags & BDRV_REQ_REGISTERED_BUF);
     if (buf == NULL) {
         return -EINVAL;
     }
@@ -1323,8 +1337,8 @@ static int writev_f(BlockBackend *blk, int argc, char **argv)
     t2 = tsub(t2, t1);
     print_report("wrote", &t2, offset, qiov.size, total, cnt, Cflag);
 out:
+    qemu_io_free(blk, buf, qiov.size, flags & BDRV_REQ_REGISTERED_BUF);
     qemu_iovec_destroy(&qiov);
-    qemu_io_free(buf);
     return ret;
 }
 
@@ -1340,6 +1354,7 @@ struct aio_ctx {
     bool zflag;
     BlockAcctCookie acct;
     int pattern;
+    BdrvRequestFlags flags;
     struct timespec t1;
 };
 
@@ -1369,7 +1384,8 @@ static void aio_write_done(void *opaque, int ret)
                  ctx->qiov.size, 1, ctx->Cflag);
 out:
     if (!ctx->zflag) {
-        qemu_io_free(ctx->buf);
+        qemu_io_free(ctx->blk, ctx->buf, ctx->qiov.size,
+                     ctx->flags & BDRV_REQ_REGISTERED_BUF);
         qemu_iovec_destroy(&ctx->qiov);
     }
     g_free(ctx);
@@ -1414,7 +1430,8 @@ static void aio_read_done(void *opaque, int ret)
     print_report("read", &t2, ctx->offset, ctx->qiov.size,
                  ctx->qiov.size, 1, ctx->Cflag);
 out:
-    qemu_io_free(ctx->buf);
+    qemu_io_free(ctx->blk, ctx->buf, ctx->qiov.size,
+                 ctx->flags & BDRV_REQ_REGISTERED_BUF);
     qemu_iovec_destroy(&ctx->qiov);
     g_free(ctx);
 }
@@ -1436,10 +1453,11 @@ static void aio_read_help(void)
 " considered successful once the request is submitted, independently\n"
 " of potential I/O errors or pattern mismatches.\n"
 " -C, -- report statistics in a machine parsable format\n"
-" -P, -- use a pattern to verify read data\n"
 " -i, -- treat request as invalid, for exercising stats\n"
-" -v, -- dump buffer to standard output\n"
+" -P, -- use a pattern to verify read data\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
+" -v, -- dump buffer to standard output\n"
 "\n");
 }
 
@@ -1450,7 +1468,7 @@ static const cmdinfo_t aio_read_cmd = {
     .cfunc      = aio_read_f,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Ciqv] [-P pattern] off len [len..]",
+    .args       = "[-Ciqrv] [-P pattern] off len [len..]",
     .oneline    = "asynchronously reads a number of bytes",
     .help       = aio_read_help,
 };
@@ -1461,7 +1479,7 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv)
     struct aio_ctx *ctx = g_new0(struct aio_ctx, 1);
 
     ctx->blk = blk;
-    while ((c = getopt(argc, argv, "CP:iqv")) != -1) {
+    while ((c = getopt(argc, argv, "CiP:qrv")) != -1) {
         switch (c) {
         case 'C':
             ctx->Cflag = true;
@@ -1482,6 +1500,9 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv)
         case 'q':
             ctx->qflag = true;
             break;
+        case 'r':
+            ctx->flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'v':
             ctx->vflag = true;
             break;
@@ -1508,7 +1529,8 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv)
     optind++;
 
     nr_iov = argc - optind;
-    ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, 0xab);
+    ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, 0xab,
+                            ctx->flags & BDRV_REQ_REGISTERED_BUF);
     if (ctx->buf == NULL) {
         block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ);
         g_free(ctx);
@@ -1518,7 +1540,8 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv)
     clock_gettime(CLOCK_MONOTONIC, &ctx->t1);
     block_acct_start(blk_get_stats(blk), &ctx->acct, ctx->qiov.size,
                      BLOCK_ACCT_READ);
-    blk_aio_preadv(blk, ctx->offset, &ctx->qiov, 0, aio_read_done, ctx);
+    blk_aio_preadv(blk, ctx->offset, &ctx->qiov, ctx->flags, aio_read_done,
+                   ctx);
     return 0;
 }
 
@@ -1539,11 +1562,12 @@ static void aio_write_help(void)
 " Note that due to its asynchronous nature, this command will be\n"
 " considered successful once the request is submitted, independently\n"
 " of potential I/O errors or pattern mismatches.\n"
-" -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -f, -- use Force Unit Access semantics\n"
 " -i, -- treat request as invalid, for exercising stats\n"
+" -P, -- use different pattern to fill file\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -r, -- register I/O buffer\n"
 " -u, -- with -z, allow unmapping\n"
 " -z, -- write zeroes using blk_aio_pwrite_zeroes\n"
 "\n");
@@ -1557,7 +1581,7 @@ static const cmdinfo_t aio_write_cmd = {
     .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-Cfiquz] [-P pattern] off len [len..]",
+    .args       = "[-Cfiqruz] [-P pattern] off len [len..]",
     .oneline    = "asynchronously writes a number of bytes",
     .help       = aio_write_help,
 };
@@ -1567,22 +1591,24 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
     int nr_iov, c;
     int pattern = 0xcd;
     struct aio_ctx *ctx = g_new0(struct aio_ctx, 1);
-    int flags = 0;
 
     ctx->blk = blk;
-    while ((c = getopt(argc, argv, "CfiqP:uz")) != -1) {
+    while ((c = getopt(argc, argv, "CfiP:qruz")) != -1) {
         switch (c) {
         case 'C':
             ctx->Cflag = true;
             break;
         case 'f':
-            flags |= BDRV_REQ_FUA;
+            ctx->flags |= BDRV_REQ_FUA;
             break;
         case 'q':
             ctx->qflag = true;
             break;
+        case 'r':
+            ctx->flags |= BDRV_REQ_REGISTERED_BUF;
+            break;
         case 'u':
-            flags |= BDRV_REQ_MAY_UNMAP;
+            ctx->flags |= BDRV_REQ_MAY_UNMAP;
             break;
         case 'P':
             pattern = parse_pattern(optarg);
@@ -1618,7 +1644,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
         return -EINVAL;
     }
 
-    if ((flags & BDRV_REQ_MAY_UNMAP) && !ctx->zflag) {
+    if ((ctx->flags & BDRV_REQ_MAY_UNMAP) && !ctx->zflag) {
         printf("-u requires -z to be specified\n");
         g_free(ctx);
         return -EINVAL;
@@ -1630,6 +1656,12 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
         return -EINVAL;
     }
 
+    if (ctx->zflag && (ctx->flags & BDRV_REQ_REGISTERED_BUF)) {
+        printf("cannot combine zero write with registered I/O buffer\n");
+        g_free(ctx);
+        return -EINVAL;
+    }
+
     ctx->offset = cvtnum(argv[optind]);
     if (ctx->offset < 0) {
         int ret = ctx->offset;
@@ -1648,12 +1680,12 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
         }
 
         ctx->qiov.size = count;
-        blk_aio_pwrite_zeroes(blk, ctx->offset, count, flags, aio_write_done,
-                              ctx);
+        blk_aio_pwrite_zeroes(blk, ctx->offset, count, ctx->flags,
+                              aio_write_done, ctx);
     } else {
         nr_iov = argc - optind;
         ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov,
-                                pattern);
+                                pattern, ctx->flags & BDRV_REQ_REGISTERED_BUF);
         if (ctx->buf == NULL) {
             block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE);
             g_free(ctx);
@@ -1664,8 +1696,8 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
         block_acct_start(blk_get_stats(blk), &ctx->acct, ctx->qiov.size,
                          BLOCK_ACCT_WRITE);
 
-        blk_aio_pwritev(blk, ctx->offset, &ctx->qiov, flags, aio_write_done,
-                        ctx);
+        blk_aio_pwritev(blk, ctx->offset, &ctx->qiov, ctx->flags,
+                        aio_write_done, ctx);
     }
 
     return 0;
@@ -1698,13 +1730,260 @@ static const cmdinfo_t flush_cmd = {
     .oneline    = "flush all in-core file state to disk",
 };
 
+static inline int64_t tosector(int64_t bytes)
+{
+    return bytes >> BDRV_SECTOR_BITS;
+}
+
+static int zone_report_f(BlockBackend *blk, int argc, char **argv)
+{
+    int ret;
+    int64_t offset;
+    unsigned int nr_zones;
+
+    ++optind;
+    offset = cvtnum(argv[optind]);
+    ++optind;
+    nr_zones = cvtnum(argv[optind]);
+
+    g_autofree BlockZoneDescriptor *zones = NULL;
+    zones = g_new(BlockZoneDescriptor, nr_zones);
+    ret = blk_zone_report(blk, offset, &nr_zones, zones);
+    if (ret < 0) {
+        printf("zone report failed: %s\n", strerror(-ret));
+    } else {
+        for (int i = 0; i < nr_zones; ++i) {
+            printf("start: 0x%" PRIx64 ", len 0x%" PRIx64 ", "
+                   "cap"" 0x%" PRIx64 ", wptr 0x%" PRIx64 ", "
+                   "zcond:%u, [type: %u]\n",
+                    tosector(zones[i].start), tosector(zones[i].length),
+                    tosector(zones[i].cap), tosector(zones[i].wp),
+                    zones[i].state, zones[i].type);
+        }
+    }
+    return ret;
+}
+
+static const cmdinfo_t zone_report_cmd = {
+    .name = "zone_report",
+    .altname = "zrp",
+    .cfunc = zone_report_f,
+    .argmin = 2,
+    .argmax = 2,
+    .args = "offset number",
+    .oneline = "report zone information",
+};
+
+static int zone_open_f(BlockBackend *blk, int argc, char **argv)
+{
+    int ret;
+    int64_t offset, len;
+    ++optind;
+    offset = cvtnum(argv[optind]);
+    ++optind;
+    len = cvtnum(argv[optind]);
+    ret = blk_zone_mgmt(blk, BLK_ZO_OPEN, offset, len);
+    if (ret < 0) {
+        printf("zone open failed: %s\n", strerror(-ret));
+    }
+    return ret;
+}
+
+static const cmdinfo_t zone_open_cmd = {
+    .name = "zone_open",
+    .altname = "zo",
+    .cfunc = zone_open_f,
+    .argmin = 2,
+    .argmax = 2,
+    .args = "offset len",
+    .oneline = "explicit open a range of zones in zone block device",
+};
+
+static int zone_close_f(BlockBackend *blk, int argc, char **argv)
+{
+    int ret;
+    int64_t offset, len;
+    ++optind;
+    offset = cvtnum(argv[optind]);
+    ++optind;
+    len = cvtnum(argv[optind]);
+    ret = blk_zone_mgmt(blk, BLK_ZO_CLOSE, offset, len);
+    if (ret < 0) {
+        printf("zone close failed: %s\n", strerror(-ret));
+    }
+    return ret;
+}
+
+static const cmdinfo_t zone_close_cmd = {
+    .name = "zone_close",
+    .altname = "zc",
+    .cfunc = zone_close_f,
+    .argmin = 2,
+    .argmax = 2,
+    .args = "offset len",
+    .oneline = "close a range of zones in zone block device",
+};
+
+static int zone_finish_f(BlockBackend *blk, int argc, char **argv)
+{
+    int ret;
+    int64_t offset, len;
+    ++optind;
+    offset = cvtnum(argv[optind]);
+    ++optind;
+    len = cvtnum(argv[optind]);
+    ret = blk_zone_mgmt(blk, BLK_ZO_FINISH, offset, len);
+    if (ret < 0) {
+        printf("zone finish failed: %s\n", strerror(-ret));
+    }
+    return ret;
+}
+
+static const cmdinfo_t zone_finish_cmd = {
+    .name = "zone_finish",
+    .altname = "zf",
+    .cfunc = zone_finish_f,
+    .argmin = 2,
+    .argmax = 2,
+    .args = "offset len",
+    .oneline = "finish a range of zones in zone block device",
+};
+
+static int zone_reset_f(BlockBackend *blk, int argc, char **argv)
+{
+    int ret;
+    int64_t offset, len;
+    ++optind;
+    offset = cvtnum(argv[optind]);
+    ++optind;
+    len = cvtnum(argv[optind]);
+    ret = blk_zone_mgmt(blk, BLK_ZO_RESET, offset, len);
+    if (ret < 0) {
+        printf("zone reset failed: %s\n", strerror(-ret));
+    }
+    return ret;
+}
+
+static const cmdinfo_t zone_reset_cmd = {
+    .name = "zone_reset",
+    .altname = "zrs",
+    .cfunc = zone_reset_f,
+    .argmin = 2,
+    .argmax = 2,
+    .args = "offset len",
+    .oneline = "reset a zone write pointer in zone block device",
+};
+
+static int do_aio_zone_append(BlockBackend *blk, QEMUIOVector *qiov,
+                              int64_t *offset, int flags, int *total)
+{
+    int async_ret = NOT_DONE;
+
+    blk_aio_zone_append(blk, offset, qiov, flags, aio_rw_done, &async_ret);
+    while (async_ret == NOT_DONE) {
+        main_loop_wait(false);
+    }
+
+    *total = qiov->size;
+    return async_ret < 0 ? async_ret : 1;
+}
+
+static int zone_append_f(BlockBackend *blk, int argc, char **argv)
+{
+    int ret;
+    bool pflag = false;
+    int flags = 0;
+    int total = 0;
+    int64_t offset;
+    char *buf;
+    int c, nr_iov;
+    int pattern = 0xcd;
+    QEMUIOVector qiov;
+
+    if (optind > argc - 3) {
+        return -EINVAL;
+    }
+
+    if ((c = getopt(argc, argv, "p")) != -1) {
+        pflag = true;
+    }
+
+    offset = cvtnum(argv[optind]);
+    if (offset < 0) {
+        print_cvtnum_err(offset, argv[optind]);
+        return offset;
+    }
+    optind++;
+    nr_iov = argc - optind;
+    buf = create_iovec(blk, &qiov, &argv[optind], nr_iov, pattern,
+                       flags & BDRV_REQ_REGISTERED_BUF);
+    if (buf == NULL) {
+        return -EINVAL;
+    }
+    ret = do_aio_zone_append(blk, &qiov, &offset, flags, &total);
+    if (ret < 0) {
+        printf("zone append failed: %s\n", strerror(-ret));
+        goto out;
+    }
+
+    if (pflag) {
+        printf("After zap done, the append sector is 0x%" PRIx64 "\n",
+               tosector(offset));
+    }
+
+out:
+    qemu_io_free(blk, buf, qiov.size,
+                 flags & BDRV_REQ_REGISTERED_BUF);
+    qemu_iovec_destroy(&qiov);
+    return ret;
+}
+
+static const cmdinfo_t zone_append_cmd = {
+    .name = "zone_append",
+    .altname = "zap",
+    .cfunc = zone_append_f,
+    .argmin = 3,
+    .argmax = 4,
+    .args = "offset len [len..]",
+    .oneline = "append write a number of bytes at a specified offset",
+};
+
+static int truncate_f(BlockBackend *blk, int argc, char **argv);
+static const cmdinfo_t truncate_cmd = {
+    .name       = "truncate",
+    .altname    = "t",
+    .cfunc      = truncate_f,
+    .perm       = BLK_PERM_WRITE | BLK_PERM_RESIZE,
+    .argmin     = 1,
+    .argmax     = 3,
+    .args       = "[-m prealloc_mode] off",
+    .oneline    = "truncates the current file at the given offset",
+};
+
 static int truncate_f(BlockBackend *blk, int argc, char **argv)
 {
     Error *local_err = NULL;
     int64_t offset;
-    int ret;
+    int c, ret;
+    PreallocMode prealloc = PREALLOC_MODE_OFF;
 
-    offset = cvtnum(argv[1]);
+    while ((c = getopt(argc, argv, "m:")) != -1) {
+        switch (c) {
+        case 'm':
+            prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
+                                       PREALLOC_MODE__MAX, NULL);
+            if (prealloc == PREALLOC_MODE__MAX) {
+                error_report("Invalid preallocation mode '%s'", optarg);
+                return -EINVAL;
+            }
+            break;
+        default:
+            qemuio_command_usage(&truncate_cmd);
+            return -EINVAL;
+        }
+    }
+
+    offset = cvtnum(argv[optind]);
     if (offset < 0) {
         print_cvtnum_err(offset, argv[1]);
         return offset;
@@ -1715,7 +1994,7 @@ static int truncate_f(BlockBackend *blk, int argc, char **argv)
      * exact=true.  It is better to err on the "emit more errors" side
      * than to be overly permissive.
      */
-    ret = blk_truncate(blk, offset, false, PREALLOC_MODE_OFF, 0, &local_err);
+    ret = blk_truncate(blk, offset, false, prealloc, 0, &local_err);
     if (ret < 0) {
         error_report_err(local_err);
         return ret;
@@ -1724,17 +2003,6 @@ static int truncate_f(BlockBackend *blk, int argc, char **argv)
     return 0;
 }
 
-static const cmdinfo_t truncate_cmd = {
-    .name       = "truncate",
-    .altname    = "t",
-    .cfunc      = truncate_f,
-    .perm       = BLK_PERM_WRITE | BLK_PERM_RESIZE,
-    .argmin     = 1,
-    .argmax     = 1,
-    .args       = "off",
-    .oneline    = "truncates the current file at the given offset",
-};
-
 static int length_f(BlockBackend *blk, int argc, char **argv)
 {
     int64_t size;
@@ -1769,6 +2037,9 @@ static int info_f(BlockBackend *blk, int argc, char **argv)
     char s1[64], s2[64];
     int ret;
 
+    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD_MAINLOOP();
+
     if (bs->drv && bs->drv->format_name) {
         printf("format name: %s\n", bs->drv->format_name);
     }
@@ -1793,8 +2064,9 @@ static int info_f(BlockBackend *blk, int argc, char **argv)
         return -EIO;
     }
     if (spec_info) {
-        printf("Format specific information:\n");
-        bdrv_image_info_specific_dump(spec_info);
+        bdrv_image_info_specific_dump(spec_info,
+                                      "Format specific information:\n",
+                                      0);
         qapi_free_ImageInfoSpecific(spec_info);
     }
 
@@ -1965,11 +2237,9 @@ static int map_is_allocated(BlockDriverState *bs, int64_t offset,
                             int64_t bytes, int64_t *pnum)
 {
     int64_t num;
-    int num_checked;
     int ret, firstret;
 
-    num_checked = MIN(bytes, BDRV_REQUEST_MAX_BYTES);
-    ret = bdrv_is_allocated(bs, offset, num_checked, &num);
+    ret = bdrv_is_allocated(bs, offset, bytes, &num);
     if (ret < 0) {
         return ret;
     }
@@ -1981,8 +2251,7 @@ static int map_is_allocated(BlockDriverState *bs, int64_t offset,
         offset += num;
         bytes -= num;
 
-        num_checked = MIN(bytes, BDRV_REQUEST_MAX_BYTES);
-        ret = bdrv_is_allocated(bs, offset, num_checked, &num);
+        ret = bdrv_is_allocated(bs, offset, bytes, &num);
         if (ret == firstret && num) {
             *pnum += num;
         } else {
@@ -2088,8 +2357,6 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
     bool writethrough = !blk_enable_write_cache(blk);
     bool has_rw_option = false;
     bool has_cache_option = false;
-
-    BlockReopenQueue *brq;
     Error *local_err = NULL;
 
     while ((c = getopt(argc, argv, "c:o:rw")) != -1) {
@@ -2182,10 +2449,7 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
         qdict_put_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, flags & BDRV_O_NO_FLUSH);
     }
 
-    bdrv_subtree_drained_begin(bs);
-    brq = bdrv_reopen_queue(NULL, bs, opts, true);
-    bdrv_reopen_multiple(brq, &local_err);
-    bdrv_subtree_drained_end(bs);
+    bdrv_reopen(bs, opts, true, &local_err);
 
     if (local_err) {
         error_report_err(local_err);
@@ -2439,9 +2703,12 @@ static const cmdinfo_t help_cmd = {
     .oneline    = "help for one or all commands",
 };
 
+/*
+ * Called with aio context of blk acquired. Or with qemu_get_aio_context()
+ * context acquired if blk is NULL.
+ */
 int qemuio_command(BlockBackend *blk, const char *cmd)
 {
-    AioContext *ctx;
     char *input;
     const cmdinfo_t *ct;
     char **v;
@@ -2453,10 +2720,7 @@ int qemuio_command(BlockBackend *blk, const char *cmd)
     if (c) {
         ct = find_command(v[0]);
         if (ct) {
-            ctx = blk ? blk_get_aio_context(blk) : qemu_get_aio_context();
-            aio_context_acquire(ctx);
             ret = command(blk, ct, c, v);
-            aio_context_release(ctx);
         } else {
             fprintf(stderr, "command \"%s\" not found\n", v[0]);
             ret = -EINVAL;
@@ -2480,6 +2744,12 @@ static void __attribute((constructor)) init_qemuio_commands(void)
     qemuio_add_command(&aio_write_cmd);
     qemuio_add_command(&aio_flush_cmd);
     qemuio_add_command(&flush_cmd);
+    qemuio_add_command(&zone_report_cmd);
+    qemuio_add_command(&zone_open_cmd);
+    qemuio_add_command(&zone_close_cmd);
+    qemuio_add_command(&zone_finish_cmd);
+    qemuio_add_command(&zone_reset_cmd);
+    qemuio_add_command(&zone_append_cmd);
     qemuio_add_command(&truncate_cmd);
     qemuio_add_command(&length_cmd);
     qemuio_add_command(&info_cmd);
index ac88d8bd40f7c1b4b8e3a48cb0b9784b6848ff86..050c70835f933134013e36d64528343213cfe144 100644 (file)
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -15,7 +15,8 @@
 #include <termios.h>
 #endif
 
-#include "qemu-common.h"
+#include "qemu/help-texts.h"
+#include "qemu/cutils.h"
 #include "qapi/error.h"
 #include "qemu-io.h"
 #include "qemu/error-report.h"
@@ -323,13 +324,13 @@ static char *get_prompt(void)
     static char prompt[FILENAME_MAX + 2 /*"> "*/ + 1 /*"\0"*/ ];
 
     if (!prompt[0]) {
-        snprintf(prompt, sizeof(prompt), "%s> ", error_get_progname());
+        snprintf(prompt, sizeof(prompt), "%s> ", g_get_prgname());
     }
 
     return prompt;
 }
 
-static void GCC_FMT_ATTR(2, 3) readline_printf_func(void *opaque,
+static void G_GNUC_PRINTF(2, 3) readline_printf_func(void *opaque,
                                                     const char *fmt, ...)
 {
     va_list ap;
@@ -411,6 +412,19 @@ static void prep_fetchline(void *opaque)
     *fetchable= 1;
 }
 
+static int do_qemuio_command(const char *cmd)
+{
+    int ret;
+    AioContext *ctx =
+        qemuio_blk ? blk_get_aio_context(qemuio_blk) : qemu_get_aio_context();
+
+    aio_context_acquire(ctx);
+    ret = qemuio_command(qemuio_blk, cmd);
+    aio_context_release(ctx);
+
+    return ret;
+}
+
 static int command_loop(void)
 {
     int i, fetchable = 0, prompted = 0;
@@ -418,7 +432,7 @@ static int command_loop(void)
     char *input;
 
     for (i = 0; !quit_qemu_io && i < ncmdline; i++) {
-        ret = qemuio_command(qemuio_blk, cmdline[i]);
+        ret = do_qemuio_command(cmdline[i]);
         if (ret < 0) {
             last_error = ret;
         }
@@ -446,7 +460,7 @@ static int command_loop(void)
         if (input == NULL) {
             break;
         }
-        ret = qemuio_command(qemuio_blk, input);
+        ret = do_qemuio_command(input);
         g_free(input);
 
         if (ret < 0) {
@@ -461,10 +475,10 @@ static int command_loop(void)
     return last_error;
 }
 
-static void add_user_command(char *optarg)
+static void add_user_command(char *user_cmd)
 {
     cmdline = g_renew(char *, cmdline, ++ncmdline);
-    cmdline[ncmdline-1] = optarg;
+    cmdline[ncmdline - 1] = user_cmd;
 }
 
 static void reenable_tty_echo(void)
@@ -477,23 +491,6 @@ enum {
     OPTION_IMAGE_OPTS = 257,
 };
 
-static QemuOptsList qemu_object_opts = {
-    .name = "object",
-    .implied_opt_name = "qom-type",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
-    .desc = {
-        { }
-    },
-};
-
-static bool qemu_io_object_print_help(const char *type, QemuOpts *opts)
-{
-    if (user_creatable_print_help(type, opts)) {
-        exit(0);
-    }
-    return true;
-}
-
 static QemuOptsList file_opts = {
     .name = "file",
     .implied_opt_name = "file",
@@ -533,7 +530,6 @@ int main(int argc, char **argv)
     int flags = BDRV_O_UNMAP;
     int ret;
     bool writethrough = true;
-    Error *local_error = NULL;
     QDict *opts = NULL;
     const char *format = NULL;
     bool force_share = false;
@@ -550,7 +546,6 @@ int main(int argc, char **argv)
     qcrypto_init(&error_fatal);
 
     module_call_init(MODULE_INIT_QOM);
-    qemu_add_opts(&qemu_object_opts);
     qemu_add_opts(&qemu_trace_opts);
     bdrv_init();
 
@@ -604,33 +599,28 @@ int main(int argc, char **argv)
             break;
         case 'V':
             printf("%s version " QEMU_FULL_VERSION "\n"
-                   QEMU_COPYRIGHT "\n", error_get_progname());
+                   QEMU_COPYRIGHT "\n", g_get_prgname());
             exit(0);
         case 'h':
-            usage(error_get_progname());
+            usage(g_get_prgname());
             exit(0);
         case 'U':
             force_share = true;
             break;
-        case OPTION_OBJECT: {
-            QemuOpts *qopts;
-            qopts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                            optarg, true);
-            if (!qopts) {
-                exit(1);
-            }
-        }   break;
+        case OPTION_OBJECT:
+            user_creatable_process_cmdline(optarg);
+            break;
         case OPTION_IMAGE_OPTS:
             imageOpts = true;
             break;
         default:
-            usage(error_get_progname());
+            usage(g_get_prgname());
             exit(1);
         }
     }
 
     if ((argc - optind) > 1) {
-        usage(error_get_progname());
+        usage(g_get_prgname());
         exit(1);
     }
 
@@ -639,20 +629,13 @@ int main(int argc, char **argv)
         exit(1);
     }
 
-    if (qemu_init_main_loop(&local_error)) {
-        error_report_err(local_error);
-        exit(1);
-    }
-
-    qemu_opts_foreach(&qemu_object_opts,
-                      user_creatable_add_opts_foreach,
-                      qemu_io_object_print_help, &error_fatal);
+    qemu_init_main_loop(&error_fatal);
 
     if (!trace_init_backends()) {
         exit(1);
     }
     trace_init_file();
-    qemu_set_log(LOG_TRACE);
+    qemu_set_log(LOG_TRACE, &error_fatal);
 
     /* initialize commands */
     qemuio_add_command(&quit_cmd);
index 39b517c948b4c45544e01fc3f070fd7e5e518b6d..186e6468b1abf2fde0d911bececb1e60d9fd266a 100644 (file)
@@ -21,7 +21,7 @@
 #include <libgen.h>
 #include <pthread.h>
 
-#include "qemu-common.h"
+#include "qemu/help-texts.h"
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 #include "sysemu/block-backend.h"
 #include "io/channel-socket.h"
 #include "io/net-listener.h"
 #include "crypto/init.h"
+#include "crypto/tlscreds.h"
 #include "trace/control.h"
 #include "qemu-version.h"
 
+#ifdef CONFIG_SELINUX
+#include <selinux/selinux.h>
+#endif
+
 #ifdef __linux__
 #define HAVE_NBD_DEVICE 1
 #else
 #define QEMU_NBD_OPT_FORK          263
 #define QEMU_NBD_OPT_TLSAUTHZ      264
 #define QEMU_NBD_OPT_PID_FILE      265
+#define QEMU_NBD_OPT_SELINUX_LABEL 266
+#define QEMU_NBD_OPT_TLSHOSTNAME   267
 
 #define MBR_SIZE 512
 
-static int verbose;
-static char *srcpath;
-static SocketAddress *saddr;
 static int persistent = 0;
 static enum { RUNNING, TERMINATE, TERMINATED } state;
 static int shared = 1;
@@ -115,6 +119,9 @@ static void usage(const char *name)
 "  --fork                    fork off the server process and exit the parent\n"
 "                            once the server is running\n"
 "  --pid-file=PATH           store the server's process ID in the given file\n"
+#ifdef CONFIG_SELINUX
+"  --selinux-label=LABEL     set SELinux process label on listening socket\n"
+#endif
 #if HAVE_NBD_DEVICE
 "\n"
 "Kernel NBD client support:\n"
@@ -134,7 +141,9 @@ static void usage(const char *name)
 "                            'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
 "                            '[ID_OR_NAME]'\n"
 "  -n, --nocache             disable host cache\n"
-"      --cache=MODE          set cache mode (none, writeback, ...)\n"
+"      --cache=MODE          set cache mode used to access the disk image, the\n"
+"                            valid options are: 'none', 'writeback' (default),\n"
+"                            'writethrough', 'directsync' and 'unsafe'\n"
 "      --aio=MODE            set AIO mode (native, io_uring or threads)\n"
 "      --discard=MODE        set discard mode (ignore, unmap)\n"
 "      --detect-zeroes=MODE  set detect-zeroes mode (off, on, unmap)\n"
@@ -181,7 +190,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
     sioc = qio_channel_socket_new();
     if (qio_channel_socket_connect_sync(sioc, saddr, &err) < 0) {
         error_report_err(err);
-        return EXIT_FAILURE;
+        goto out;
     }
     rc = nbd_receive_export_list(QIO_CHANNEL(sioc), tls, hostname, &list,
                                  &err);
@@ -210,6 +219,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
                 [NBD_FLAG_SEND_RESIZE_BIT]          = "resize",
                 [NBD_FLAG_SEND_CACHE_BIT]           = "cache",
                 [NBD_FLAG_SEND_FAST_ZERO_BIT]       = "fast-zero",
+                [NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT]   = "block-status-payload",
             };
 
             printf("  size:  %" PRIu64 "\n", list[i].size);
@@ -226,6 +236,9 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
             printf("  opt block: %u\n", list[i].opt_block);
             printf("  max block: %u\n", list[i].max_block);
         }
+        printf("  transaction size: %s\n",
+               list[i].mode >= NBD_MODE_EXTENDED ?
+               "64-bit" : "32-bit");
         if (list[i].n_contexts) {
             printf("  available meta contexts: %d\n", list[i].n_contexts);
             for (j = 0; j < list[i].n_contexts; j++) {
@@ -242,6 +255,29 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
 }
 
 
+struct NbdClientOpts {
+    char *device;
+    char *srcpath;
+    SocketAddress *saddr;
+    int old_stderr;
+    bool fork_process;
+    bool verbose;
+};
+
+static void nbd_client_release_pipe(int old_stderr)
+{
+    /* Close stderr so that the qemu-nbd process exits.  */
+    if (dup2(old_stderr, STDERR_FILENO) < 0) {
+        error_report("Could not release pipe to parent: %s",
+                     strerror(errno));
+        exit(EXIT_FAILURE);
+    }
+    if (old_stderr != STDOUT_FILENO && close(old_stderr) < 0) {
+        error_report("Could not release qemu-nbd: %s", strerror(errno));
+        exit(EXIT_FAILURE);
+    }
+}
+
 #if HAVE_NBD_DEVICE
 static void *show_parts(void *arg)
 {
@@ -262,79 +298,74 @@ static void *show_parts(void *arg)
 
 static void *nbd_client_thread(void *arg)
 {
-    char *device = arg;
-    NBDExportInfo info = { .request_sizes = false, .name = g_strdup("") };
+    struct NbdClientOpts *opts = arg;
+    /* TODO: Revisit this if nbd.ko ever gains support for structured reply */
+    NBDExportInfo info = { .request_sizes = false, .name = g_strdup(""),
+                           .mode = NBD_MODE_SIMPLE };
     QIOChannelSocket *sioc;
-    int fd;
-    int ret;
+    int fd = -1;
+    int ret = EXIT_FAILURE;
     pthread_t show_parts_thread;
     Error *local_error = NULL;
 
     sioc = qio_channel_socket_new();
     if (qio_channel_socket_connect_sync(sioc,
-                                        saddr,
+                                        opts->saddr,
                                         &local_error) < 0) {
         error_report_err(local_error);
         goto out;
     }
 
-    ret = nbd_receive_negotiate(NULL, QIO_CHANNEL(sioc),
-                                NULL, NULL, NULL, &info, &local_error);
-    if (ret < 0) {
+    if (nbd_receive_negotiate(QIO_CHANNEL(sioc), NULL, NULL, NULL,
+                              &info, &local_error) < 0) {
         if (local_error) {
             error_report_err(local_error);
         }
-        goto out_socket;
+        goto out;
     }
 
-    fd = open(device, O_RDWR);
+    fd = open(opts->device, O_RDWR);
     if (fd < 0) {
         /* Linux-only, we can use %m in printf.  */
-        error_report("Failed to open %s: %m", device);
-        goto out_socket;
+        error_report("Failed to open %s: %m", opts->device);
+        goto out;
     }
 
-    ret = nbd_init(fd, sioc, &info, &local_error);
-    if (ret < 0) {
+    if (nbd_init(fd, sioc, &info, &local_error) < 0) {
         error_report_err(local_error);
-        goto out_fd;
+        goto out;
     }
 
     /* update partition table */
-    pthread_create(&show_parts_thread, NULL, show_parts, device);
+    pthread_create(&show_parts_thread, NULL, show_parts, opts->device);
 
-    if (verbose) {
+    if (opts->verbose && !opts->fork_process) {
         fprintf(stderr, "NBD device %s is now connected to %s\n",
-                device, srcpath);
+                opts->device, opts->srcpath);
     } else {
-        /* Close stderr so that the qemu-nbd process exits.  */
-        dup2(STDOUT_FILENO, STDERR_FILENO);
+        nbd_client_release_pipe(opts->old_stderr);
     }
 
-    ret = nbd_client(fd);
-    if (ret) {
-        goto out_fd;
+    if (nbd_client(fd) < 0) {
+        goto out;
     }
-    close(fd);
-    object_unref(OBJECT(sioc));
-    g_free(info.name);
-    kill(getpid(), SIGTERM);
-    return (void *) EXIT_SUCCESS;
 
-out_fd:
-    close(fd);
-out_socket:
+    ret = EXIT_SUCCESS;
+
+ out:
+    if (fd >= 0) {
+        close(fd);
+    }
     object_unref(OBJECT(sioc));
-out:
     g_free(info.name);
     kill(getpid(), SIGTERM);
-    return (void *) EXIT_FAILURE;
+    return (void *) (intptr_t) ret;
 }
 #endif /* HAVE_NBD_DEVICE */
 
 static int nbd_can_accept(void)
 {
-    return state == RUNNING && nb_fds < shared;
+    return state == RUNNING && (shared == 0 || nb_fds < shared);
 }
 
 static void nbd_update_server_watch(void);
@@ -407,24 +438,6 @@ static QemuOptsList file_opts = {
     },
 };
 
-static QemuOptsList qemu_object_opts = {
-    .name = "object",
-    .implied_opt_name = "qom-type",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
-    .desc = {
-        { }
-    },
-};
-
-static bool qemu_nbd_object_print_help(const char *type, QemuOpts *opts)
-{
-    if (user_creatable_print_help(type, opts)) {
-        exit(0);
-    }
-    return true;
-}
-
-
 static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, bool list,
                                           Error **errp)
 {
@@ -446,18 +459,12 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, bool list,
         return NULL;
     }
 
-    if (list) {
-        if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
-            error_setg(errp,
-                       "Expecting TLS credentials with a client endpoint");
-            return NULL;
-        }
-    } else {
-        if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
-            error_setg(errp,
-                       "Expecting TLS credentials with a server endpoint");
-            return NULL;
-        }
+    if (!qcrypto_tls_creds_check_endpoint(creds,
+                                          list
+                                          ? QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT
+                                          : QCRYPTO_TLS_CREDS_ENDPOINT_SERVER,
+                                          errp)) {
+        return NULL;
     }
     object_ref(obj);
     return creds;
@@ -481,6 +488,7 @@ static const char *socket_activation_validate_opts(const char *device,
                                                    const char *sockpath,
                                                    const char *address,
                                                    const char *port,
+                                                   const char *selinux,
                                                    bool list)
 {
     if (device != NULL) {
@@ -499,6 +507,10 @@ static const char *socket_activation_validate_opts(const char *device,
         return "TCP port number can't be set when using socket activation";
     }
 
+    if (selinux != NULL) {
+        return "SELinux label can't be set when using socket activation";
+    }
+
     if (list) {
         return "List mode is incompatible with socket activation";
     }
@@ -509,6 +521,7 @@ static const char *socket_activation_validate_opts(const char *device,
 static void qemu_nbd_shutdown(void)
 {
     job_cancel_sync_all();
+    blk_exp_close_all();
     bdrv_close_all();
 }
 
@@ -522,7 +535,6 @@ int main(int argc, char **argv)
     const char *bindto = NULL;
     const char *port = NULL;
     char *sockpath = NULL;
-    char *device = NULL;
     QemuOpts *sn_opts = NULL;
     const char *sn_id_or_name = NULL;
     const char *sopt = "hVb:o:p:rsnc:dvk:e:f:tl:x:T:D:AB:L";
@@ -555,11 +567,14 @@ int main(int argc, char **argv)
         { "export-name", required_argument, NULL, 'x' },
         { "description", required_argument, NULL, 'D' },
         { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS },
+        { "tls-hostname", required_argument, NULL, QEMU_NBD_OPT_TLSHOSTNAME },
         { "tls-authz", required_argument, NULL, QEMU_NBD_OPT_TLSAUTHZ },
         { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
         { "trace", required_argument, NULL, 'T' },
         { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK },
         { "pid-file", required_argument, NULL, QEMU_NBD_OPT_PID_FILE },
+        { "selinux-label", required_argument, NULL,
+          QEMU_NBD_OPT_SELINUX_LABEL },
         { NULL, 0, NULL, 0 }
     };
     int ch;
@@ -576,17 +591,25 @@ int main(int argc, char **argv)
     QDict *options = NULL;
     const char *export_name = NULL; /* defaults to "" later for server mode */
     const char *export_description = NULL;
-    strList *bitmaps = NULL;
+    BlockDirtyBitmapOrStrList *bitmaps = NULL;
     bool alloc_depth = false;
     const char *tlscredsid = NULL;
+    const char *tlshostname = NULL;
     bool imageOpts = false;
-    bool writethrough = true;
-    bool fork_process = false;
+    bool writethrough = false; /* Client will flush as needed. */
     bool list = false;
-    int old_stderr = -1;
     unsigned socket_activation;
     const char *pid_file_name = NULL;
+    const char *selinux_label = NULL;
     BlockExportOptions *export_opts;
+    struct NbdClientOpts opts = {
+        .fork_process = false,
+        .verbose = false,
+        .device = NULL,
+        .srcpath = NULL,
+        .saddr = NULL,
+        .old_stderr = STDOUT_FILENO,
+    };
 
 #ifdef CONFIG_POSIX
     os_setup_early_signal_handling();
@@ -599,7 +622,6 @@ int main(int argc, char **argv)
     qcrypto_init(&error_fatal);
 
     module_call_init(MODULE_INIT_QOM);
-    qemu_add_opts(&qemu_object_opts);
     qemu_add_opts(&qemu_trace_opts);
     qemu_init_exec_dir(argv[0]);
 
@@ -695,7 +717,14 @@ int main(int argc, char **argv)
             alloc_depth = true;
             break;
         case 'B':
-            QAPI_LIST_PREPEND(bitmaps, g_strdup(optarg));
+            {
+                BlockDirtyBitmapOrStr *el = g_new(BlockDirtyBitmapOrStr, 1);
+                *el = (BlockDirtyBitmapOrStr) {
+                    .type = QTYPE_QSTRING,
+                    .u.local = g_strdup(optarg),
+                };
+                QAPI_LIST_PREPEND(bitmaps, el);
+            }
             break;
         case 'k':
             sockpath = optarg;
@@ -708,11 +737,11 @@ int main(int argc, char **argv)
             disconnect = true;
             break;
         case 'c':
-            device = optarg;
+            opts.device = optarg;
             break;
         case 'e':
             if (qemu_strtoi(optarg, NULL, 0, &shared) < 0 ||
-                shared < 1) {
+                shared < 0) {
                 error_report("Invalid shared device number '%s'", optarg);
                 exit(EXIT_FAILURE);
             }
@@ -739,7 +768,7 @@ int main(int argc, char **argv)
             }
             break;
         case 'v':
-            verbose = 1;
+            opts.verbose = true;
             break;
         case 'V':
             version(argv[0]);
@@ -752,17 +781,15 @@ int main(int argc, char **argv)
         case '?':
             error_report("Try `%s --help' for more information.", argv[0]);
             exit(EXIT_FAILURE);
-        case QEMU_NBD_OPT_OBJECT: {
-            QemuOpts *opts;
-            opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                           optarg, true);
-            if (!opts) {
-                exit(EXIT_FAILURE);
-            }
-        }   break;
+        case QEMU_NBD_OPT_OBJECT:
+            user_creatable_process_cmdline(optarg);
+            break;
         case QEMU_NBD_OPT_TLSCREDS:
             tlscredsid = optarg;
             break;
+        case QEMU_NBD_OPT_TLSHOSTNAME:
+            tlshostname = optarg;
+            break;
         case QEMU_NBD_OPT_IMAGE_OPTS:
             imageOpts = true;
             break;
@@ -773,7 +800,7 @@ int main(int argc, char **argv)
             tlsauthz = optarg;
             break;
         case QEMU_NBD_OPT_FORK:
-            fork_process = true;
+            opts.fork_process = true;
             break;
         case 'L':
             list = true;
@@ -781,6 +808,9 @@ int main(int argc, char **argv)
         case QEMU_NBD_OPT_PID_FILE:
             pid_file_name = optarg;
             break;
+        case QEMU_NBD_OPT_SELINUX_LABEL:
+            selinux_label = optarg;
+            break;
         }
     }
 
@@ -790,12 +820,12 @@ int main(int argc, char **argv)
             exit(EXIT_FAILURE);
         }
         if (export_name || export_description || dev_offset ||
-            device || disconnect || fmt || sn_id_or_name || bitmaps ||
+            opts.device || disconnect || fmt || sn_id_or_name || bitmaps ||
             alloc_depth || seen_aio || seen_discard || seen_cache) {
             error_report("List mode is incompatible with per-device settings");
             exit(EXIT_FAILURE);
         }
-        if (fork_process) {
+        if (opts.fork_process) {
             error_report("List mode is incompatible with forking");
             exit(EXIT_FAILURE);
         }
@@ -807,23 +837,23 @@ int main(int argc, char **argv)
         export_name = "";
     }
 
-    qemu_opts_foreach(&qemu_object_opts,
-                      user_creatable_add_opts_foreach,
-                      qemu_nbd_object_print_help, &error_fatal);
-
     if (!trace_init_backends()) {
         exit(1);
     }
     trace_init_file();
-    qemu_set_log(LOG_TRACE);
+    qemu_set_log(LOG_TRACE, &error_fatal);
 
     socket_activation = check_socket_activation();
     if (socket_activation == 0) {
-        setup_address_and_port(&bindto, &port);
+        if (!sockpath) {
+            setup_address_and_port(&bindto, &port);
+        }
     } else {
         /* Using socket activation - check user didn't use -p etc. */
-        const char *err_msg = socket_activation_validate_opts(device, sockpath,
+        const char *err_msg = socket_activation_validate_opts(opts.device,
+                                                              sockpath,
                                                               bindto, port,
+                                                              selinux_label,
                                                               list);
         if (err_msg != NULL) {
             error_report("%s", err_msg);
@@ -839,11 +869,7 @@ int main(int argc, char **argv)
     }
 
     if (tlscredsid) {
-        if (sockpath) {
-            error_report("TLS is only supported with IPv4/IPv6");
-            exit(EXIT_FAILURE);
-        }
-        if (device) {
+        if (opts.device) {
             error_report("TLS is not supported with a host device");
             exit(EXIT_FAILURE);
         }
@@ -851,6 +877,10 @@ int main(int argc, char **argv)
             error_report("TLS authorization is incompatible with export list");
             exit(EXIT_FAILURE);
         }
+        if (tlshostname && !list) {
+            error_report("TLS hostname is only supported with export list");
+            exit(EXIT_FAILURE);
+        }
         tlscreds = nbd_get_tls_creds(tlscredsid, list, &local_err);
         if (local_err) {
             error_reportf_err(local_err, "Failed to get TLS creds: ");
@@ -861,15 +891,32 @@ int main(int argc, char **argv)
             error_report("--tls-authz is not permitted without --tls-creds");
             exit(EXIT_FAILURE);
         }
+        if (tlshostname) {
+            error_report("--tls-hostname is not permitted without --tls-creds");
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    if (selinux_label) {
+#ifdef CONFIG_SELINUX
+        if (sockpath == NULL && opts.device == NULL) {
+            error_report("--selinux-label is not permitted without --socket");
+            exit(EXIT_FAILURE);
+        }
+#else
+        error_report("SELinux support not enabled in this binary");
+        exit(EXIT_FAILURE);
+#endif
     }
 
     if (list) {
-        saddr = nbd_build_socket_address(sockpath, bindto, port);
-        return qemu_nbd_client_list(saddr, tlscreds, bindto);
+        opts.saddr = nbd_build_socket_address(sockpath, bindto, port);
+        return qemu_nbd_client_list(opts.saddr, tlscreds,
+                                    tlshostname ? tlshostname : bindto);
     }
 
 #if !HAVE_NBD_DEVICE
-    if (disconnect || device) {
+    if (disconnect || opts.device) {
         error_report("Kernel /dev/nbdN support not available");
         exit(EXIT_FAILURE);
     }
@@ -891,15 +938,15 @@ int main(int argc, char **argv)
     }
 #endif
 
-    if ((device && !verbose) || fork_process) {
+    if ((opts.device && !opts.verbose) || opts.fork_process) {
 #ifndef WIN32
+        g_autoptr(GError) err = NULL;
         int stderr_fd[2];
         pid_t pid;
-        int ret;
 
-        if (qemu_pipe(stderr_fd) < 0) {
+        if (!g_unix_open_pipe(stderr_fd, FD_CLOEXEC, &err)) {
             error_report("Error setting up communication pipe: %s",
-                         strerror(errno));
+                         err->message);
             exit(EXIT_FAILURE);
         }
 
@@ -911,19 +958,40 @@ int main(int argc, char **argv)
             error_report("Failed to fork: %s", strerror(errno));
             exit(EXIT_FAILURE);
         } else if (pid == 0) {
+            int saved_errno;
+
             close(stderr_fd[0]);
 
             /* Remember parent's stderr if we will be restoring it. */
-            if (fork_process) {
-                old_stderr = dup(STDERR_FILENO);
+            if (opts.verbose /* fork_process is set */) {
+                opts.old_stderr = dup(STDERR_FILENO);
+                if (opts.old_stderr < 0) {
+                    error_report("Could not dup original stderr: %s",
+                                 strerror(errno));
+                    exit(EXIT_FAILURE);
+                }
             }
 
             ret = qemu_daemon(1, 0);
+            saved_errno = errno;    /* dup2 will overwrite error below */
 
             /* Temporarily redirect stderr to the parent's pipe...  */
-            dup2(stderr_fd[1], STDERR_FILENO);
+            if (dup2(stderr_fd[1], STDERR_FILENO) < 0) {
+                char str[256];
+                snprintf(str, sizeof(str),
+                         "%s: Failed to link stderr to the pipe: %s\n",
+                         g_get_prgname(), strerror(errno));
+                /*
+                 * We are unable to use error_report() here as we need to get
+                 * stderr pointed to the parent's pipe. Write to that pipe
+                 * manually.
+                 */
+                ret = write(stderr_fd[1], str, strlen(str));
+                exit(EXIT_FAILURE);
+            }
+
             if (ret < 0) {
-                error_report("Failed to daemonize: %s", strerror(errno));
+                error_report("Failed to daemonize: %s", strerror(saved_errno));
                 exit(EXIT_FAILURE);
             }
 
@@ -962,27 +1030,41 @@ int main(int argc, char **argv)
 #endif /* WIN32 */
     }
 
-    if (device != NULL && sockpath == NULL) {
+    if (opts.device != NULL && sockpath == NULL) {
         sockpath = g_malloc(128);
-        snprintf(sockpath, 128, SOCKET_PATH, basename(device));
+        snprintf(sockpath, 128, SOCKET_PATH, basename(opts.device));
     }
 
     server = qio_net_listener_new();
     if (socket_activation == 0) {
         int backlog;
 
-        if (persistent) {
+        if (persistent || shared == 0) {
             backlog = SOMAXCONN;
         } else {
             backlog = MIN(shared, SOMAXCONN);
         }
-        saddr = nbd_build_socket_address(sockpath, bindto, port);
-        if (qio_net_listener_open_sync(server, saddr, backlog,
+#ifdef CONFIG_SELINUX
+        if (selinux_label && setsockcreatecon_raw(selinux_label) == -1) {
+            error_report("Cannot set SELinux socket create context to %s: %s",
+                         selinux_label, strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+#endif
+        opts.saddr = nbd_build_socket_address(sockpath, bindto, port);
+        if (qio_net_listener_open_sync(server, opts.saddr, backlog,
                                        &local_err) < 0) {
             object_unref(OBJECT(server));
             error_report_err(local_err);
             exit(EXIT_FAILURE);
         }
+#ifdef CONFIG_SELINUX
+        if (selinux_label && setsockcreatecon_raw(NULL) == -1) {
+            error_report("Cannot clear SELinux socket create context: %s",
+                         strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+#endif
     } else {
         size_t i;
         /* See comment in check_socket_activation above. */
@@ -1001,26 +1083,23 @@ int main(int argc, char **argv)
         }
     }
 
-    if (qemu_init_main_loop(&local_err)) {
-        error_report_err(local_err);
-        exit(EXIT_FAILURE);
-    }
+    qemu_init_main_loop(&error_fatal);
     bdrv_init();
     atexit(qemu_nbd_shutdown);
 
-    srcpath = argv[optind];
+    opts.srcpath = argv[optind];
     if (imageOpts) {
-        QemuOpts *opts;
+        QemuOpts *o;
         if (fmt) {
             error_report("--image-opts and -f are mutually exclusive");
             exit(EXIT_FAILURE);
         }
-        opts = qemu_opts_parse_noisily(&file_opts, srcpath, true);
-        if (!opts) {
+        o = qemu_opts_parse_noisily(&file_opts, opts.srcpath, true);
+        if (!o) {
             qemu_opts_reset(&file_opts);
             exit(EXIT_FAILURE);
         }
-        options = qemu_opts_to_qdict(opts, NULL);
+        options = qemu_opts_to_qdict(o, NULL);
         qemu_opts_reset(&file_opts);
         blk = blk_new_open(NULL, NULL, options, flags, &local_err);
     } else {
@@ -1028,7 +1107,7 @@ int main(int argc, char **argv)
             options = qdict_new();
             qdict_put_str(options, "driver", fmt);
         }
-        blk = blk_new_open(srcpath, NULL, options, flags, &local_err);
+        blk = blk_new_open(opts.srcpath, NULL, options, flags, &local_err);
     }
 
     if (!blk) {
@@ -1043,7 +1122,11 @@ int main(int argc, char **argv)
         qdict_put_str(raw_opts, "driver", "raw");
         qdict_put_str(raw_opts, "file", bs->node_name);
         qdict_put_int(raw_opts, "offset", dev_offset);
+
+        aio_context_acquire(qemu_get_aio_context());
         bs = bdrv_open(NULL, NULL, raw_opts, flags, &error_fatal);
+        aio_context_release(qemu_get_aio_context());
+
         blk_remove_bs(blk);
         blk_insert_bs(blk, bs, &error_fatal);
         bdrv_unref(bs);
@@ -1067,7 +1150,7 @@ int main(int argc, char **argv)
 
     bs->detect_zeroes = detect_zeroes;
 
-    nbd_server_is_qemu_nbd(true);
+    nbd_server_is_qemu_nbd(shared);
 
     export_opts = g_new(BlockExportOptions, 1);
     *export_opts = (BlockExportOptions) {
@@ -1079,9 +1162,7 @@ int main(int argc, char **argv)
         .has_writable       = true,
         .writable           = !readonly,
         .u.nbd = {
-            .has_name             = true,
             .name                 = g_strdup(export_name),
-            .has_description      = !!export_description,
             .description          = g_strdup(export_description),
             .has_bitmaps          = !!bitmaps,
             .bitmaps              = bitmaps,
@@ -1092,11 +1173,9 @@ int main(int argc, char **argv)
     blk_exp_add(export_opts, &error_fatal);
     qapi_free_BlockExportOptions(export_opts);
 
-    if (device) {
+    if (opts.device) {
 #if HAVE_NBD_DEVICE
-        int ret;
-
-        ret = pthread_create(&client_thread, NULL, nbd_client_thread, device);
+        ret = pthread_create(&client_thread, NULL, nbd_client_thread, &opts);
         if (ret != 0) {
             error_report("Failed to create client thread: %s", strerror(ret));
             exit(EXIT_FAILURE);
@@ -1121,9 +1200,8 @@ int main(int argc, char **argv)
         exit(EXIT_FAILURE);
     }
 
-    if (fork_process) {
-        dup2(old_stderr, STDERR_FILENO);
-        close(old_stderr);
+    if (opts.fork_process) {
+        nbd_client_release_pipe(opts.old_stderr);
     }
 
     state = RUNNING;
@@ -1142,10 +1220,11 @@ int main(int argc, char **argv)
 
     qemu_opts_del(sn_opts);
 
-    if (device) {
-        void *ret;
-        pthread_join(client_thread, &ret);
-        exit(ret != NULL);
+    if (opts.device) {
+        void *result;
+        pthread_join(client_thread, &result);
+        ret = (intptr_t)result;
+        exit(ret);
     } else {
         exit(EXIT_SUCCESS);
     }
diff --git a/qemu-options-wrapper.h b/qemu-options-wrapper.h
deleted file mode 100644 (file)
index 6f548e3..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-
-#if defined(QEMU_OPTIONS_GENERATE_ENUM)
-
-#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
-    opt_enum,
-#define DEFHEADING(text)
-#define ARCHHEADING(text, arch_mask)
-
-#elif defined(QEMU_OPTIONS_GENERATE_HELP)
-
-#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)    \
-    if ((arch_mask) & arch_type)                               \
-        fputs(opt_help, stdout);
-
-#define ARCHHEADING(text, arch_mask) \
-    if ((arch_mask) & arch_type)    \
-        puts(stringify(text));
-
-#define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL)
-
-#elif defined(QEMU_OPTIONS_GENERATE_OPTIONS)
-
-#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
-    { option, opt_arg, opt_enum, arch_mask },
-#define DEFHEADING(text)
-#define ARCHHEADING(text, arch_mask)
-
-#else
-#error "qemu-options-wrapper.h included with no option defined"
-#endif
-
-#include "qemu-options.def"
-
-#undef DEF
-#undef DEFHEADING
-#undef ARCHHEADING
-
-#undef QEMU_OPTIONS_GENERATE_ENUM
-#undef QEMU_OPTIONS_GENERATE_HELP
-#undef QEMU_OPTIONS_GENERATE_OPTIONS
diff --git a/qemu-options.h b/qemu-options.h
deleted file mode 100644 (file)
index b4ee63c..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * qemu-options.h
- *
- * Defines needed for command line argument processing.
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef QEMU_OPTIONS_H
-#define QEMU_OPTIONS_H
-
-enum {
-#define QEMU_OPTIONS_GENERATE_ENUM
-#include "qemu-options-wrapper.h"
-};
-
-#endif
index 104632ea3431886172bfa0c4e0e05d402bf8e2c4..b6b4ad9e6763d666b0cc9e8cac7835c595bcaa13 100644 (file)
@@ -26,7 +26,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
     "-machine [type=]name[,prop[=value][,...]]\n"
     "                selects emulated machine ('-machine help' for list)\n"
     "                property accel=accel1[:accel2[:...]] selects accelerator\n"
-    "                supported accelerators are kvm, xen, hax, hvf, whpx or tcg (default: tcg)\n"
+    "                supported accelerators are kvm, xen, hvf, nvmm, whpx or tcg (default: tcg)\n"
     "                vmport=on|off|auto controls emulation of vmport (default: auto)\n"
     "                dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
     "                mem-merge=on|off controls memory merge support (default: on)\n"
@@ -35,7 +35,9 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
     "                suppress-vmdesc=on|off disables self-describing migration (default=off)\n"
     "                nvdimm=on|off controls NVDIMM support (default=off)\n"
     "                memory-encryption=@var{} memory encryption object to use (default=none)\n"
-    "                hmat=on|off controls ACPI HMAT support (default=off)\n",
+    "                hmat=on|off controls ACPI HMAT support (default=off)\n"
+    "                memory-backend='backend-id' specifies explicitly provided backend for main RAM (default=none)\n"
+    "                cxl-fmw.0.targets.0=firsttarget,cxl-fmw.0.targets.1=secondtarget,cxl-fmw.0.size=size[,cxl-fmw.0.interleave-granularity=granularity]\n",
     QEMU_ARCH_ALL)
 SRST
 ``-machine [type=]name[,prop=value[,...]]``
@@ -57,7 +59,7 @@ SRST
 
     ``accel=accels1[:accels2[:...]]``
         This is used to enable an accelerator. Depending on the target
-        architecture, kvm, xen, hax, hvf, whpx or tcg can be available.
+        architecture, kvm, xen, hvf, nvmm, whpx or tcg can be available.
         By default, tcg is used. If there is more than one accelerator
         specified, the next one is used if the previous one fails to
         initialize.
@@ -96,10 +98,75 @@ SRST
     ``hmat=on|off``
         Enables or disables ACPI Heterogeneous Memory Attribute Table
         (HMAT) support. The default is off.
+
+    ``memory-backend='id'``
+        An alternative to legacy ``-mem-path`` and ``mem-prealloc`` options.
+        Allows to use a memory backend as main RAM.
+
+        For example:
+        ::
+
+            -object memory-backend-file,id=pc.ram,size=512M,mem-path=/hugetlbfs,prealloc=on,share=on
+            -machine memory-backend=pc.ram
+            -m 512M
+
+        Migration compatibility note:
+
+        * as backend id one shall use value of 'default-ram-id', advertised by
+          machine type (available via ``query-machines`` QMP command), if migration
+          to/from old QEMU (<5.0) is expected.
+        * for machine types 4.0 and older, user shall
+          use ``x-use-canonical-path-for-ramblock-id=off`` backend option
+          if migration to/from old QEMU (<5.0) is expected.
+
+        For example:
+        ::
+
+            -object memory-backend-ram,id=pc.ram,size=512M,x-use-canonical-path-for-ramblock-id=off
+            -machine memory-backend=pc.ram
+            -m 512M
+
+    ``cxl-fmw.0.targets.0=firsttarget,cxl-fmw.0.targets.1=secondtarget,cxl-fmw.0.size=size[,cxl-fmw.0.interleave-granularity=granularity]``
+        Define a CXL Fixed Memory Window (CFMW).
+
+        Described in the CXL 2.0 ECN: CEDT CFMWS & QTG _DSM.
+
+        They are regions of Host Physical Addresses (HPA) on a system which
+        may be interleaved across one or more CXL host bridges.  The system
+        software will assign particular devices into these windows and
+        configure the downstream Host-managed Device Memory (HDM) decoders
+        in root ports, switch ports and devices appropriately to meet the
+        interleave requirements before enabling the memory devices.
+
+        ``targets.X=target`` provides the mapping to CXL host bridges
+        which may be identified by the id provided in the -device entry.
+        Multiple entries are needed to specify all the targets when
+        the fixed memory window represents interleaved memory. X is the
+        target index from 0.
+
+        ``size=size`` sets the size of the CFMW. This must be a multiple of
+        256MiB. The region will be aligned to 256MiB but the location is
+        platform and configuration dependent.
+
+        ``interleave-granularity=granularity`` sets the granularity of
+        interleave. Default 256KiB. Only 256KiB, 512KiB, 1024KiB, 2048KiB
+        4096KiB, 8192KiB and 16384KiB granularities supported.
+
+        Example:
+
+        ::
+
+            -machine cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=128G,cxl-fmw.0.interleave-granularity=512k
 ERST
 
-HXCOMM Deprecated by -machine
-DEF("M", HAS_ARG, QEMU_OPTION_M, "", QEMU_ARCH_ALL)
+DEF("M", HAS_ARG, QEMU_OPTION_M,
+    "                sgx-epc.0.memdev=memid,sgx-epc.0.node=numaid\n",
+    QEMU_ARCH_ALL)
+
+SRST
+``sgx-epc.0.memdev=@var{memid},sgx-epc.0.node=@var{numaid}``
+    Define an SGX EPC section.
+ERST
 
 DEF("cpu", HAS_ARG, QEMU_OPTION_cpu,
     "-cpu cpu        select CPU ('-cpu help' for list)\n", QEMU_ARCH_ALL)
@@ -111,16 +178,21 @@ ERST
 
 DEF("accel", HAS_ARG, QEMU_OPTION_accel,
     "-accel [accel=]accelerator[,prop[=value][,...]]\n"
-    "                select accelerator (kvm, xen, hax, hvf, whpx or tcg; use 'help' for a list)\n"
+    "                select accelerator (kvm, xen, hvf, nvmm, whpx or tcg; use 'help' for a list)\n"
     "                igd-passthru=on|off (enable Xen integrated Intel graphics passthrough, default=off)\n"
     "                kernel-irqchip=on|off|split controls accelerated irqchip support (default=on)\n"
     "                kvm-shadow-mem=size of KVM shadow MMU in bytes\n"
+    "                one-insn-per-tb=on|off (one guest instruction per TCG translation block)\n"
+    "                split-wx=on|off (enable TCG split w^x mapping)\n"
     "                tb-size=n (TCG translation block cache size)\n"
+    "                dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n"
+    "                eager-split-size=n (KVM Eager Page Split chunk size, default 0, disabled. ARM only)\n"
+    "                notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM exit and set notify window, x86 only)\n"
     "                thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL)
 SRST
 ``-accel name[,prop=value[,...]]``
     This is used to enable an accelerator. Depending on the target
-    architecture, kvm, xen, hax, hvf, whpx or tcg can be available. By
+    architecture, kvm, xen, hvf, nvmm, whpx or tcg can be available. By
     default, tcg is used. If there is more than one accelerator
     specified, the next one is used if the previous one fails to
     initialize.
@@ -140,38 +212,165 @@ SRST
     ``kvm-shadow-mem=size``
         Defines the size of the KVM shadow MMU.
 
+    ``one-insn-per-tb=on|off``
+        Makes the TCG accelerator put only one guest instruction into
+        each translation block. This slows down emulation a lot, but
+        can be useful in some situations, such as when trying to analyse
+        the logs produced by the ``-d`` option.
+
+    ``split-wx=on|off``
+        Controls the use of split w^x mapping for the TCG code generation
+        buffer. Some operating systems require this to be enabled, and in
+        such a case this will default on. On other operating systems, this
+        will default off, but one may enable this for testing or debugging.
+
     ``tb-size=n``
         Controls the size (in MiB) of the TCG translation block cache.
 
     ``thread=single|multi``
         Controls number of TCG threads. When the TCG is multi-threaded
-        there will be one thread per vCPU therefor taking advantage of
+        there will be one thread per vCPU therefore taking advantage of
         additional host cores. The default is to enable multi-threading
         where both the back-end and front-ends support it and no
         incompatible TCG features have been enabled (e.g.
         icount/replay).
+
+    ``dirty-ring-size=n``
+        When the KVM accelerator is used, it controls the size of the per-vCPU
+        dirty page ring buffer (number of entries for each vCPU). It should
+        be a value that is power of two, and it should be 1024 or bigger (but
+        still less than the maximum value that the kernel supports).  4096
+        could be a good initial value if you have no idea which is the best.
+        Set this value to 0 to disable the feature.  By default, this feature
+        is disabled (dirty-ring-size=0).  When enabled, KVM will instead
+        record dirty pages in a bitmap.
+
+    ``eager-split-size=n``
+        KVM implements dirty page logging at the PAGE_SIZE granularity and
+        enabling dirty-logging on a huge-page requires breaking it into
+        PAGE_SIZE pages in the first place. KVM on ARM does this splitting
+        lazily by default. There are performance benefits in doing huge-page
+        split eagerly, especially in situations where TLBI costs associated
+        with break-before-make sequences are considerable and also if guest
+        workloads are read intensive. The size here specifies how many pages
+        to break at a time and needs to be a valid block size which is
+        1GB/2MB/4KB, 32MB/16KB and 512MB/64KB for 4KB/16KB/64KB PAGE_SIZE
+        respectively. Be wary of specifying a higher size as it will have an
+        impact on the memory. By default, this feature is disabled
+        (eager-split-size=0).
+
+    ``notify-vmexit=run|internal-error|disable,notify-window=n``
+        Enables or disables notify VM exit support on x86 host and specify
+        the corresponding notify window to trigger the VM exit if enabled.
+        ``run`` option enables the feature. It does nothing and continue
+        if the exit happens. ``internal-error`` option enables the feature.
+        It raises a internal error. ``disable`` option doesn't enable the feature.
+        This feature can mitigate the CPU stuck issue due to event windows don't
+        open up for a specified of time (i.e. notify-window).
+        Default: notify-vmexit=run,notify-window=0.
+
 ERST
 
 DEF("smp", HAS_ARG, QEMU_OPTION_smp,
-    "-smp [cpus=]n[,maxcpus=cpus][,cores=cores][,threads=threads][,dies=dies][,sockets=sockets]\n"
-    "                set the number of CPUs to 'n' [default=1]\n"
-    "                maxcpus= maximum number of total cpus, including\n"
+    "-smp [[cpus=]n][,maxcpus=maxcpus][,drawers=drawers][,books=books][,sockets=sockets]\n"
+    "               [,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]\n"
+    "                set the number of initial CPUs to 'n' [default=1]\n"
+    "                maxcpus= maximum number of total CPUs, including\n"
     "                offline CPUs for hotplug, etc\n"
-    "                cores= number of CPU cores on one socket (for PC, it's on one die)\n"
-    "                threads= number of threads on one CPU core\n"
-    "                dies= number of CPU dies on one socket (for PC only)\n"
-    "                sockets= number of discrete sockets in the system\n",
-        QEMU_ARCH_ALL)
+    "                drawers= number of drawers on the machine board\n"
+    "                books= number of books in one drawer\n"
+    "                sockets= number of sockets in one book\n"
+    "                dies= number of dies in one socket\n"
+    "                clusters= number of clusters in one die\n"
+    "                cores= number of cores in one cluster\n"
+    "                threads= number of threads in one core\n"
+    "Note: Different machines may have different subsets of the CPU topology\n"
+    "      parameters supported, so the actual meaning of the supported parameters\n"
+    "      will vary accordingly. For example, for a machine type that supports a\n"
+    "      three-level CPU hierarchy of sockets/cores/threads, the parameters will\n"
+    "      sequentially mean as below:\n"
+    "                sockets means the number of sockets on the machine board\n"
+    "                cores means the number of cores in one socket\n"
+    "                threads means the number of threads in one core\n"
+    "      For a particular machine type board, an expected CPU topology hierarchy\n"
+    "      can be defined through the supported sub-option. Unsupported parameters\n"
+    "      can also be provided in addition to the sub-option, but their values\n"
+    "      must be set as 1 in the purpose of correct parsing.\n",
+    QEMU_ARCH_ALL)
 SRST
-``-smp [cpus=]n[,cores=cores][,threads=threads][,dies=dies][,sockets=sockets][,maxcpus=maxcpus]``
-    Simulate an SMP system with n CPUs. On the PC target, up to 255 CPUs
-    are supported. On Sparc32 target, Linux limits the number of usable
-    CPUs to 4. For the PC target, the number of cores per die, the
-    number of threads per cores, the number of dies per packages and the
-    total number of sockets can be specified. Missing values will be
-    computed. If any on the three values is given, the total number of
-    CPUs n can be omitted. maxcpus specifies the maximum number of
-    hotpluggable CPUs.
+``-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]``
+    Simulate a SMP system with '\ ``n``\ ' CPUs initially present on
+    the machine type board. On boards supporting CPU hotplug, the optional
+    '\ ``maxcpus``\ ' parameter can be set to enable further CPUs to be
+    added at runtime. When both parameters are omitted, the maximum number
+    of CPUs will be calculated from the provided topology members and the
+    initial CPU count will match the maximum number. When only one of them
+    is given then the omitted one will be set to its counterpart's value.
+    Both parameters may be specified, but the maximum number of CPUs must
+    be equal to or greater than the initial CPU count. Product of the
+    CPU topology hierarchy must be equal to the maximum number of CPUs.
+    Both parameters are subject to an upper limit that is determined by
+    the specific machine type chosen.
+
+    To control reporting of CPU topology information, values of the topology
+    parameters can be specified. Machines may only support a subset of the
+    parameters and different machines may have different subsets supported
+    which vary depending on capacity of the corresponding CPU targets. So
+    for a particular machine type board, an expected topology hierarchy can
+    be defined through the supported sub-option. Unsupported parameters can
+    also be provided in addition to the sub-option, but their values must be
+    set as 1 in the purpose of correct parsing.
+
+    Either the initial CPU count, or at least one of the topology parameters
+    must be specified. The specified parameters must be greater than zero,
+    explicit configuration like "cpus=0" is not allowed. Values for any
+    omitted parameters will be computed from those which are given.
+
+    For example, the following sub-option defines a CPU topology hierarchy
+    (2 sockets totally on the machine, 2 cores per socket, 2 threads per
+    core) for a machine that only supports sockets/cores/threads.
+    Some members of the option can be omitted but their values will be
+    automatically computed:
+
+    ::
+
+        -smp 8,sockets=2,cores=2,threads=2,maxcpus=8
+
+    The following sub-option defines a CPU topology hierarchy (2 sockets
+    totally on the machine, 2 dies per socket, 2 cores per die, 2 threads
+    per core) for PC machines which support sockets/dies/cores/threads.
+    Some members of the option can be omitted but their values will be
+    automatically computed:
+
+    ::
+
+        -smp 16,sockets=2,dies=2,cores=2,threads=2,maxcpus=16
+
+    The following sub-option defines a CPU topology hierarchy (2 sockets
+    totally on the machine, 2 clusters per socket, 2 cores per cluster,
+    2 threads per core) for ARM virt machines which support sockets/clusters
+    /cores/threads. Some members of the option can be omitted but their values
+    will be automatically computed:
+
+    ::
+
+        -smp 16,sockets=2,clusters=2,cores=2,threads=2,maxcpus=16
+
+    Historically preference was given to the coarsest topology parameters
+    when computing missing values (ie sockets preferred over cores, which
+    were preferred over threads), however, this behaviour is considered
+    liable to change. Prior to 6.2 the preference was sockets over cores
+    over threads. Since 6.2 the preference is cores over sockets over threads.
+
+    For example, the following option defines a machine board with 2 sockets
+    of 1 core before 6.2 and 1 socket of 2 cores after 6.2:
+
+    ::
+
+        -smp 2
+
+    Note: The cluster topology will only be generated in ACPI and exposed
+    to guest if it's explicitly specified in -smp.
 ERST
 
 DEF("numa", HAS_ARG, QEMU_OPTION_numa,
@@ -191,7 +390,7 @@ SRST
   \ 
 ``-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]``
   \ 
-``-numa hmat-lb,initiator=node,target=node,hierarchy=hierarchy,data-type=tpye[,latency=lat][,bandwidth=bw]``
+``-numa hmat-lb,initiator=node,target=node,hierarchy=hierarchy,data-type=type[,latency=lat][,bandwidth=bw]``
   \ 
 ``-numa hmat-cache,node-id=node,size=size,level=level[,associativity=str][,policy=str][,line=size]``
     Define a NUMA node and assign RAM and VCPUs to it. Set the NUMA
@@ -231,15 +430,22 @@ SRST
         -numa node,nodeid=0 -numa node,nodeid=1 \
         -numa cpu,node-id=0,socket-id=0 -numa cpu,node-id=1,socket-id=1
 
-    Legacy '\ ``mem``\ ' assigns a given RAM amount to a node (not supported
-    for 5.1 and newer machine types). '\ ``memdev``\ ' assigns RAM from
-    a given memory backend device to a node. If '\ ``mem``\ ' and
-    '\ ``memdev``\ ' are omitted in all nodes, RAM is split equally between them.
+    '\ ``memdev``\ ' option assigns RAM from a given memory backend
+    device to a node. It is recommended to use '\ ``memdev``\ ' option
+    over legacy '\ ``mem``\ ' option. This is because '\ ``memdev``\ '
+    option provides better performance and more control over the
+    backend's RAM (e.g. '\ ``prealloc``\ ' parameter of
+    '\ ``-memory-backend-ram``\ ' allows memory preallocation).
 
+    For compatibility reasons, legacy '\ ``mem``\ ' option is
+    supported in 5.0 and older machine types. Note that '\ ``mem``\ '
+    and '\ ``memdev``\ ' are mutually exclusive. If one node uses
+    '\ ``memdev``\ ', the rest nodes have to use '\ ``memdev``\ '
+    option, and vice versa.
 
-    '\ ``mem``\ ' and '\ ``memdev``\ ' are mutually exclusive.
-    Furthermore, if one node uses '\ ``memdev``\ ', all of them have to
-    use it.
+    Users must specify memory for all NUMA nodes by '\ ``memdev``\ '
+    (or legacy '\ ``mem``\ ' if available). In QEMU 5.2, the support
+    for '\ ``-numa node``\ ' without memory specified was removed.
 
     '\ ``initiator``\ ' is an additional option that points to an
     initiator NUMA node that has best performance (the lowest latency or
@@ -327,7 +533,7 @@ SRST
         -m 2G \
         -object memory-backend-ram,size=1G,id=m0 \
         -object memory-backend-ram,size=1G,id=m1 \
-        -smp 2 \
+        -smp 2,sockets=2,maxcpus=2 \
         -numa node,nodeid=0,memdev=m0 \
         -numa node,nodeid=1,memdev=m1,initiator=0 \
         -numa cpu,node-id=0,socket-id=0 \
@@ -464,7 +670,7 @@ DEF("m", HAS_ARG, QEMU_OPTION_m,
     "                size: initial amount of guest memory\n"
     "                slots: number of hotplug slots (default: none)\n"
     "                maxmem: maximum amount of guest memory (default: none)\n"
-    "NOTE: Some architectures might enforce a specific granularity\n",
+    "                Note: Some architectures might enforce a specific granularity\n",
     QEMU_ARCH_ALL)
 SRST
 ``-m [size=]megs[,slots=n,maxmem=size]``
@@ -524,19 +730,48 @@ SRST
 ERST
 
 
-HXCOMM Deprecated by -audiodev
-DEF("audio-help", 0, QEMU_OPTION_audio_help,
-    "-audio-help     show -audiodev equivalent of the currently specified audio settings\n",
+DEF("audio", HAS_ARG, QEMU_OPTION_audio,
+    "-audio [driver=]driver[,prop[=value][,...]]\n"
+    "                specifies default audio backend when `audiodev` is not\n"
+    "                used to create a machine or sound device;"
+    "                options are the same as for -audiodev\n"
+    "-audio [driver=]driver,model=value[,prop[=value][,...]]\n"
+    "                specifies the audio backend and device to use;\n"
+    "                apart from 'model', options are the same as for -audiodev.\n"
+    "                use '-audio model=help' to show possible devices.\n",
     QEMU_ARCH_ALL)
 SRST
-``-audio-help``
-    Will show the -audiodev equivalent of the currently specified
-    (deprecated) environment variables.
+``-audio [driver=]driver[,model=value][,prop[=value][,...]]``
+    If the ``model`` option is specified, ``-audio`` is a shortcut
+    for configuring both the guest audio hardware and the host audio
+    backend in one go. The guest hardware model can be set with
+    ``model=modelname``.  Use ``model=help`` to list the available
+    device types.
+
+    The following two example do exactly the same, to show how ``-audio``
+    can be used to shorten the command line length:
+
+    .. parsed-literal::
+
+        |qemu_system| -audiodev pa,id=pa -device sb16,audiodev=pa
+        |qemu_system| -audio pa,model=sb16
+
+    If the ``model`` option is not specified, ``-audio`` is used to
+    configure a default audio backend that will be used whenever the
+    ``audiodev`` property is not set on a device or machine.  In
+    particular, ``-audio none`` ensures that no audio is produced even
+    for machines that have embedded sound hardware.
+
+    In both cases, the driver option is the same as with the corresponding
+    ``-audiodev`` option below.  Use ``driver=help`` to list the available
+    drivers.
+
 ERST
 
 DEF("audiodev", HAS_ARG, QEMU_OPTION_audiodev,
     "-audiodev [driver=]driver,id=id[,prop[=value][,...]]\n"
     "                specifies the audio backend to use\n"
+    "                Use ``-audiodev help`` to list the available drivers\n"
     "                id= identifier of the backend\n"
     "                timer-period= timer period in microseconds\n"
     "                in|out.mixing-engine= use mixing engine to mix streams inside QEMU\n"
@@ -579,11 +814,24 @@ DEF("audiodev", HAS_ARG, QEMU_OPTION_audiodev,
     "                in|out.name= source/sink device name\n"
     "                in|out.latency= desired latency in microseconds\n"
 #endif
+#ifdef CONFIG_AUDIO_PIPEWIRE
+    "-audiodev pipewire,id=id[,prop[=value][,...]]\n"
+    "                in|out.name= source/sink device name\n"
+    "                in|out.stream-name= name of pipewire stream\n"
+    "                in|out.latency= desired latency in microseconds\n"
+#endif
 #ifdef CONFIG_AUDIO_SDL
     "-audiodev sdl,id=id[,prop[=value][,...]]\n"
+    "                in|out.buffer-count= number of buffers\n"
+#endif
+#ifdef CONFIG_AUDIO_SNDIO
+    "-audiodev sndio,id=id[,prop[=value][,...]]\n"
 #endif
 #ifdef CONFIG_SPICE
     "-audiodev spice,id=id[,prop[=value][,...]]\n"
+#endif
+#ifdef CONFIG_DBUS_DISPLAY
+    "-audiodev dbus,id=id[,prop[=value][,...]]\n"
 #endif
     "-audiodev wav,id=id[,prop[=value][,...]]\n"
     "                path= path of wav file to record\n",
@@ -735,10 +983,43 @@ SRST
         Desired latency in microseconds. The PulseAudio server will try
         to honor this value but actual latencies may be lower or higher.
 
+``-audiodev pipewire,id=id[,prop[=value][,...]]``
+    Creates a backend using PipeWire. This backend is available on
+    most systems.
+
+    PipeWire specific options are:
+
+    ``in|out.latency=usecs``
+        Desired latency in microseconds.
+
+    ``in|out.name=sink``
+        Use the specified source/sink for recording/playback.
+
+    ``in|out.stream-name``
+        Specify the name of pipewire stream.
+
 ``-audiodev sdl,id=id[,prop[=value][,...]]``
     Creates a backend using SDL. This backend is available on most
     systems, but you should use your platform's native backend if
-    possible. This backend has no backend specific properties.
+    possible.
+
+    SDL specific options are:
+
+    ``in|out.buffer-count=count``
+        Sets the count of the buffers.
+
+``-audiodev sndio,id=id[,prop[=value][,...]]``
+    Creates a backend using SNDIO. This backend is available on
+    OpenBSD and most other Unix-like systems.
+
+    Sndio specific options are:
+
+    ``in|out.dev=device``
+        Specify the sndio device to use for input and/or output. Default
+        is ``default``.
+
+    ``in|out.latency=usecs``
+        Sets the desired period length in microseconds.
 
 ``-audiodev spice,id=id[,prop[=value][,...]]``
     Creates a backend that sends audio through SPICE. This backend
@@ -756,33 +1037,6 @@ SRST
         ``qemu.wav``.
 ERST
 
-DEF("soundhw", HAS_ARG, QEMU_OPTION_soundhw,
-    "-soundhw c1,... enable audio support\n"
-    "                and only specified sound cards (comma separated list)\n"
-    "                use '-soundhw help' to get the list of supported cards\n"
-    "                use '-soundhw all' to enable all of them\n", QEMU_ARCH_ALL)
-SRST
-``-soundhw card1[,card2,...] or -soundhw all``
-    Enable audio and selected sound hardware. Use 'help' to print all
-    available sound hardware. For example:
-
-    .. parsed-literal::
-
-        |qemu_system_x86| -soundhw sb16,adlib disk.img
-        |qemu_system_x86| -soundhw es1370 disk.img
-        |qemu_system_x86| -soundhw ac97 disk.img
-        |qemu_system_x86| -soundhw hda disk.img
-        |qemu_system_x86| -soundhw all disk.img
-        |qemu_system_x86| -soundhw help
-
-    Note that Linux's i810\_audio OSS kernel (for AC97) module might
-    require manually specifying clocking.
-
-    ::
-
-        modprobe i810_audio clocking=48000
-ERST
-
 DEF("device", HAS_ARG, QEMU_OPTION_device,
     "-device driver[,prop[=value][,...]]\n"
     "                add device (based on driver)\n"
@@ -850,7 +1104,7 @@ SRST
     details on the external interface.
 
 ``-device isa-ipmi-kcs,bmc=id[,ioport=val][,irq=val]``
-    Add a KCS IPMI interafce on the ISA bus. This also adds a
+    Add a KCS IPMI interface on the ISA bus. This also adds a
     corresponding ACPI and SMBIOS entries, if appropriate.
 
     ``bmc=id``
@@ -870,13 +1124,46 @@ SRST
     is 0xe4 and the default interrupt is 5.
 
 ``-device pci-ipmi-kcs,bmc=id``
-    Add a KCS IPMI interafce on the PCI bus.
+    Add a KCS IPMI interface on the PCI bus.
 
     ``bmc=id``
         The BMC to connect to, one of ipmi-bmc-sim or ipmi-bmc-extern above.
 
 ``-device pci-ipmi-bt,bmc=id``
     Like the KCS interface, but defines a BT interface on the PCI bus.
+
+``-device intel-iommu[,option=...]``
+    This is only supported by ``-machine q35``, which will enable Intel VT-d
+    emulation within the guest.  It supports below options:
+
+    ``intremap=on|off`` (default: auto)
+        This enables interrupt remapping feature.  It's required to enable
+        complete x2apic.  Currently it only supports kvm kernel-irqchip modes
+        ``off`` or ``split``, while full kernel-irqchip is not yet supported.
+        The default value is "auto", which will be decided by the mode of
+        kernel-irqchip.
+
+    ``caching-mode=on|off`` (default: off)
+        This enables caching mode for the VT-d emulated device.  When
+        caching-mode is enabled, each guest DMA buffer mapping will generate an
+        IOTLB invalidation from the guest IOMMU driver to the vIOMMU device in
+        a synchronous way.  It is required for ``-device vfio-pci`` to work
+        with the VT-d device, because host assigned devices requires to setup
+        the DMA mapping on the host before guest DMA starts.
+
+    ``device-iotlb=on|off`` (default: off)
+        This enables device-iotlb capability for the emulated VT-d device.  So
+        far virtio/vhost should be the only real user for this parameter,
+        paired with ats=on configured for the device.
+
+    ``aw-bits=39|48`` (default: 39)
+        This decides the address width of IOVA address space.  The address
+        space has 39 bits width for 3-level IOMMU page tables, and 48 bits for
+        4-level IOMMU page tables.
+
+    Please also refer to the wiki page for general scenarios of VT-d
+    emulation in QEMU: https://wiki.qemu.org/Features/VT-d.
+
 ERST
 
 DEF("name", HAS_ARG, QEMU_OPTION_name,
@@ -906,6 +1193,31 @@ DEFHEADING()
 
 DEFHEADING(Block device options:)
 
+SRST
+The QEMU block device handling options have a long history and
+have gone through several iterations as the feature set and complexity
+of the block layer have grown. Many online guides to QEMU often
+reference older and deprecated options, which can lead to confusion.
+
+The most explicit way to describe disks is to use a combination of
+``-device`` to specify the hardware device and ``-blockdev`` to
+describe the backend. The device defines what the guest sees and the
+backend describes how QEMU handles the data. It is the only guaranteed
+stable interface for describing block devices and as such is
+recommended for management tools and scripting.
+
+The ``-drive`` option combines the device and backend into a single
+command line option which is a more human friendly. There is however no
+interface stability guarantee although some older board models still
+need updating to work with the modern blockdev forms.
+
+Older options like ``-hda`` are essentially macros which expand into
+``-drive`` options for various drive interfaces. The original forms
+bake in a lot of assumptions from the days when QEMU was emulating a
+legacy PC, they are not recommended for modern configurations.
+
+ERST
+
 DEF("fda", HAS_ARG, QEMU_OPTION_fda,
     "-fda/-fdb file  use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL)
 DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL)
@@ -918,10 +1230,10 @@ SRST
 ERST
 
 DEF("hda", HAS_ARG, QEMU_OPTION_hda,
-    "-hda/-hdb file  use 'file' as IDE hard disk 0/1 image\n", QEMU_ARCH_ALL)
+    "-hda/-hdb file  use 'file' as hard disk 0/1 image\n", QEMU_ARCH_ALL)
 DEF("hdb", HAS_ARG, QEMU_OPTION_hdb, "", QEMU_ARCH_ALL)
 DEF("hdc", HAS_ARG, QEMU_OPTION_hdc,
-    "-hdc/-hdd file  use 'file' as IDE hard disk 2/3 image\n", QEMU_ARCH_ALL)
+    "-hdc/-hdd file  use 'file' as hard disk 2/3 image\n", QEMU_ARCH_ALL)
 DEF("hdd", HAS_ARG, QEMU_OPTION_hdd, "", QEMU_ARCH_ALL)
 SRST
 ``-hda file``
@@ -931,18 +1243,22 @@ SRST
 ``-hdc file``
   \ 
 ``-hdd file``
-    Use file as hard disk 0, 1, 2 or 3 image (see the :ref:`disk images`
-    chapter in the System Emulation Users Guide).
+    Use file as hard disk 0, 1, 2 or 3 image on the default bus of the
+    emulated machine (this is for example the IDE bus on most x86 machines,
+    but it can also be SCSI, virtio or something else on other target
+    architectures). See also the :ref:`disk images` chapter in the System
+    Emulation Users Guide.
 ERST
 
 DEF("cdrom", HAS_ARG, QEMU_OPTION_cdrom,
-    "-cdrom file     use 'file' as IDE cdrom image (cdrom is ide1 master)\n",
+    "-cdrom file     use 'file' as CD-ROM image\n",
     QEMU_ARCH_ALL)
 SRST
 ``-cdrom file``
-    Use file as CD-ROM image (you cannot use ``-hdc`` and ``-cdrom`` at
-    the same time). You can use the host CD-ROM by using ``/dev/cdrom``
-    as filename.
+    Use file as CD-ROM image on the default bus of the emulated machine
+    (which is IDE1 master on x86, so you cannot use ``-hdc`` and ``-cdrom``
+    at the same time there). On systems that support it, you can use the
+    host CD-ROM by using ``/dev/cdrom`` as filename.
 ERST
 
 DEF("blockdev", HAS_ARG, QEMU_OPTION_blockdev,
@@ -1140,6 +1456,22 @@ SRST
             issued on other occasions where a cluster gets freed
             (on/off; default: off)
 
+        ``discard-no-unref``
+            When enabled, data clusters will remain preallocated when they are
+            no longer used, e.g. because they are discarded or converted to
+            zero clusters. As usual, whether the old data is discarded or kept
+            on the protocol level (i.e. in the image file) depends on the
+            setting of the pass-discard-request option. Keeping the clusters
+            preallocated prevents qcow2 fragmentation that would otherwise be
+            caused by freeing and re-allocating them later. Besides potential
+            performance degradation, such fragmentation can lead to increased
+            allocation of clusters past the end of the image file,
+            resulting in image files whose file length can grow much larger
+            than their guest disk size would suggest.
+            If image file length is of concern (e.g. when storing qcow2
+            images directly on block devices), you should consider enabling
+            this option.
+
         ``overlap-check``
             Which overlap checks to perform for writes to the image
             (none/constant/cached/all; default: cached). For details or
@@ -1208,7 +1540,7 @@ SRST
         the bus number and the unit id.
 
     ``index=index``
-        This option defines where is connected the drive by using an
+        This option defines where the drive is connected by using an
         index in the list of available connectors of a given interface
         type.
 
@@ -1362,7 +1694,7 @@ SRST
 
     .. parsed-literal::
 
-        |qemu_system_x86| -drive file=a -drive file=b"
+        |qemu_system_x86| -drive file=a -drive file=b
 
     is interpreted like:
 
@@ -1386,13 +1718,6 @@ SRST
     Use file as SecureDigital card image.
 ERST
 
-DEF("pflash", HAS_ARG, QEMU_OPTION_pflash,
-    "-pflash file    use 'file' as a parallel flash image\n", QEMU_ARCH_ALL)
-SRST
-``-pflash file``
-    Use file as a parallel flash image.
-ERST
-
 DEF("snapshot", 0, QEMU_OPTION_snapshot,
     "-snapshot       write to temporary files instead of disk image files\n",
     QEMU_ARCH_ALL)
@@ -1402,36 +1727,46 @@ SRST
     the raw disk image you use is not written back. You can however
     force the write back by pressing C-a s (see the :ref:`disk images`
     chapter in the System Emulation Users Guide).
+
+    .. warning::
+       snapshot is incompatible with ``-blockdev`` (instead use qemu-img
+       to manually create snapshot images to attach to your blockdev).
+       If you have mixed ``-blockdev`` and ``-drive`` declarations you
+       can use the 'snapshot' property on your drive declarations
+       instead of this global option.
+
 ERST
 
 DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev,
     "-fsdev local,id=id,path=path,security_model=mapped-xattr|mapped-file|passthrough|none\n"
-    " [,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode]\n"
+    " [,writeout=immediate][,readonly=on][,fmode=fmode][,dmode=dmode]\n"
     " [[,throttling.bps-total=b]|[[,throttling.bps-read=r][,throttling.bps-write=w]]]\n"
     " [[,throttling.iops-total=i]|[[,throttling.iops-read=r][,throttling.iops-write=w]]]\n"
     " [[,throttling.bps-total-max=bm]|[[,throttling.bps-read-max=rm][,throttling.bps-write-max=wm]]]\n"
     " [[,throttling.iops-total-max=im]|[[,throttling.iops-read-max=irm][,throttling.iops-write-max=iwm]]]\n"
     " [[,throttling.iops-size=is]]\n"
-    "-fsdev proxy,id=id,socket=socket[,writeout=immediate][,readonly]\n"
-    "-fsdev proxy,id=id,sock_fd=sock_fd[,writeout=immediate][,readonly]\n"
+    "-fsdev proxy,id=id,socket=socket[,writeout=immediate][,readonly=on]\n"
+    "-fsdev proxy,id=id,sock_fd=sock_fd[,writeout=immediate][,readonly=on]\n"
     "-fsdev synth,id=id\n",
     QEMU_ARCH_ALL)
 
 SRST
-``-fsdev local,id=id,path=path,security_model=security_model [,writeout=writeout][,readonly][,fmode=fmode][,dmode=dmode] [,throttling.option=value[,throttling.option=value[,...]]]``
+``-fsdev local,id=id,path=path,security_model=security_model [,writeout=writeout][,readonly=on][,fmode=fmode][,dmode=dmode] [,throttling.option=value[,throttling.option=value[,...]]]``
   \ 
-``-fsdev proxy,id=id,socket=socket[,writeout=writeout][,readonly]``
+``-fsdev proxy,id=id,socket=socket[,writeout=writeout][,readonly=on]``
   \
-``-fsdev proxy,id=id,sock_fd=sock_fd[,writeout=writeout][,readonly]``
+``-fsdev proxy,id=id,sock_fd=sock_fd[,writeout=writeout][,readonly=on]``
   \
-``-fsdev synth,id=id[,readonly]``
+``-fsdev synth,id=id[,readonly=on]``
     Define a new file system device. Valid options are:
 
     ``local``
         Accesses to the filesystem are done by QEMU.
 
     ``proxy``
-        Accesses to the filesystem are done by virtfs-proxy-helper(1).
+        Accesses to the filesystem are done by virtfs-proxy-helper(1). This
+        option is deprecated (since QEMU 8.1) and will be removed in a future
+        version of QEMU. Use ``local`` instead.
 
     ``synth``
         Synthetic filesystem, only used by QTests.
@@ -1467,7 +1802,7 @@ SRST
         guest only when the data has been reported as written by the
         storage subsystem.
 
-    ``readonly``
+    ``readonly=on``
         Enables exporting 9p share as a readonly mount for guests. By
         default read-write access is given.
 
@@ -1532,18 +1867,18 @@ ERST
 
 DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs,
     "-virtfs local,path=path,mount_tag=tag,security_model=mapped-xattr|mapped-file|passthrough|none\n"
-    "        [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode][,multidevs=remap|forbid|warn]\n"
-    "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly]\n"
-    "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly]\n"
-    "-virtfs synth,mount_tag=tag[,id=id][,readonly]\n",
+    "        [,id=id][,writeout=immediate][,readonly=on][,fmode=fmode][,dmode=dmode][,multidevs=remap|forbid|warn]\n"
+    "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly=on]\n"
+    "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly=on]\n"
+    "-virtfs synth,mount_tag=tag[,id=id][,readonly=on]\n",
     QEMU_ARCH_ALL)
 
 SRST
-``-virtfs local,path=path,mount_tag=mount_tag ,security_model=security_model[,writeout=writeout][,readonly] [,fmode=fmode][,dmode=dmode][,multidevs=multidevs]``
+``-virtfs local,path=path,mount_tag=mount_tag ,security_model=security_model[,writeout=writeout][,readonly=on] [,fmode=fmode][,dmode=dmode][,multidevs=multidevs]``
   \ 
-``-virtfs proxy,socket=socket,mount_tag=mount_tag [,writeout=writeout][,readonly]``
+``-virtfs proxy,socket=socket,mount_tag=mount_tag [,writeout=writeout][,readonly=on]``
   \ 
-``-virtfs proxy,sock_fd=sock_fd,mount_tag=mount_tag [,writeout=writeout][,readonly]``
+``-virtfs proxy,sock_fd=sock_fd,mount_tag=mount_tag [,writeout=writeout][,readonly=on]``
   \
 ``-virtfs synth,mount_tag=mount_tag``
     Define a new virtual filesystem device and expose it to the guest using
@@ -1551,7 +1886,7 @@ SRST
     directory on host is made directly accessible by guest as a pass-through
     file system by using the 9P network protocol for communication between
     host and guests, if desired even accessible, shared by several guests
-    simultaniously.
+    simultaneously.
 
     Note that ``-virtfs`` is actually just a convenience shortcut for its
     generalized form ``-fsdev -device virtio-9p-pci``.
@@ -1563,6 +1898,8 @@ SRST
 
     ``proxy``
         Accesses to the filesystem are done by virtfs-proxy-helper(1).
+        This option is deprecated (since QEMU 8.1) and will be removed in a
+        future version of QEMU. Use ``local`` instead.
 
     ``synth``
         Synthetic filesystem, only used by QTests.
@@ -1598,7 +1935,7 @@ SRST
         guest only when the data has been reported as written by the
         storage subsystem.
 
-    ``readonly``
+    ``readonly=on``
         Enables exporting 9p share as a readonly mount for guests. By
         default read-write access is given.
 
@@ -1655,8 +1992,8 @@ SRST
 ERST
 
 DEF("iscsi", HAS_ARG, QEMU_OPTION_iscsi,
-    "-iscsi [user=user][,password=password]\n"
-    "       [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE\n"
+    "-iscsi [user=user][,password=password][,password-secret=secret-id]\n"
+    "       [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE]\n"
     "       [,initiator-name=initiator-iqn][,id=target-iqn]\n"
     "       [,timeout=timeout]\n"
     "                iSCSI session parameters\n", QEMU_ARCH_ALL)
@@ -1668,7 +2005,7 @@ ERST
 
 DEFHEADING()
 
-DEFHEADING(USB options:)
+DEFHEADING(USB convenience options:)
 
 DEF("usb", 0, QEMU_OPTION_usb,
     "-usb            enable on-board USB host controller (if not enabled by default)\n",
@@ -1686,9 +2023,28 @@ DEF("usbdevice", HAS_ARG, QEMU_OPTION_usbdevice,
     QEMU_ARCH_ALL)
 SRST
 ``-usbdevice devname``
-    Add the USB device devname. Note that this option is deprecated,
-    please use ``-device usb-...`` instead. See the chapter about
+    Add the USB device devname, and enable an on-board USB controller
+    if possible and necessary (just like it can be done via
+    ``-machine usb=on``). Note that this option is mainly intended for
+    the user's convenience only. More fine-grained control can be
+    achieved by selecting a USB host controller (if necessary) and the
+    desired USB device via the ``-device`` option instead. For example,
+    instead of using ``-usbdevice mouse`` it is possible to use
+    ``-device qemu-xhci -device usb-mouse`` to connect the USB mouse
+    to a USB 3.0 controller instead (at least on machines that support
+    PCI and do not have an USB controller enabled by default yet).
+    For more details, see the chapter about
     :ref:`Connecting USB devices` in the System Emulation Users Guide.
+    Possible devices for devname are:
+
+    ``braille``
+        Braille device. This will use BrlAPI to display the braille
+        output on a real or fake device (i.e. it also creates a
+        corresponding ``braille`` chardev automatically beside the
+        ``usb-braille`` USB device).
+
+    ``keyboard``
+        Standard USB keyboard. Will override the PS/2 keyboard (if present).
 
     ``mouse``
         Virtual Mouse. This will override the PS/2 mouse emulation when
@@ -1700,9 +2056,10 @@ SRST
         position without having to grab the mouse. Also overrides the
         PS/2 mouse emulation when activated.
 
-    ``braille``
-        Braille device. This will use BrlAPI to display the braille
-        output on a real or fake device.
+    ``wacom-tablet``
+        Wacom PenPartner USB tablet.
+
+
 ERST
 
 DEFHEADING()
@@ -1714,11 +2071,13 @@ DEF("display", HAS_ARG, QEMU_OPTION_display,
     "-display spice-app[,gl=on|off]\n"
 #endif
 #if defined(CONFIG_SDL)
-    "-display sdl[,alt_grab=on|off][,ctrl_grab=on|off]\n"
-    "            [,window_close=on|off][,gl=on|core|es|off]\n"
+    "-display sdl[,gl=on|core|es|off][,grab-mod=<mod>][,show-cursor=on|off]\n"
+    "            [,window-close=on|off]\n"
 #endif
 #if defined(CONFIG_GTK)
-    "-display gtk[,grab_on_hover=on|off][,gl=on|off]|\n"
+    "-display gtk[,full-screen=on|off][,gl=on|off][,grab-on-hover=on|off]\n"
+    "            [,show-tabs=on|off][,show-cursor=on|off][,window-close=on|off]\n"
+    "            [,show-menubar=on|off]\n"
 #endif
 #if defined(CONFIG_VNC)
     "-display vnc=<display>[,<optargs>]\n"
@@ -1726,8 +2085,18 @@ DEF("display", HAS_ARG, QEMU_OPTION_display,
 #if defined(CONFIG_CURSES)
     "-display curses[,charset=<encoding>]\n"
 #endif
+#if defined(CONFIG_COCOA)
+    "-display cocoa[,full-grab=on|off][,swap-opt-cmd=on|off]\n"
+#endif
 #if defined(CONFIG_OPENGL)
     "-display egl-headless[,rendernode=<file>]\n"
+#endif
+#if defined(CONFIG_DBUS_DISPLAY)
+    "-display dbus[,addr=<dbusaddr>]\n"
+    "             [,gl=on|core|es|off][,rendernode=<file>]\n"
+#endif
+#if defined(CONFIG_COCOA)
+    "-display cocoa[,show-cursor=on|off][,left-command-key=on|off]\n"
 #endif
     "-display none\n"
     "                select display backend type\n"
@@ -1746,15 +2115,67 @@ DEF("display", HAS_ARG, QEMU_OPTION_display,
     , QEMU_ARCH_ALL)
 SRST
 ``-display type``
-    Select type of display to use. This option is a replacement for the
-    old style -sdl/-curses/... options. Use ``-display help`` to list
-    the available display types. Valid values for type are
+    Select type of display to use. Use ``-display help`` to list the available
+    display types. Valid values for type are
+
+    ``spice-app[,gl=on|off]``
+        Start QEMU as a Spice server and launch the default Spice client
+        application. The Spice server will redirect the serial consoles
+        and QEMU monitors. (Since 4.0)
+
+    ``dbus``
+        Export the display over D-Bus interfaces. (Since 7.0)
+
+        The connection is registered with the "org.qemu" name (and queued when
+        already owned).
+
+        ``addr=<dbusaddr>`` : D-Bus bus address to connect to.
+
+        ``p2p=yes|no`` : Use peer-to-peer connection, accepted via QMP ``add_client``.
+
+        ``gl=on|off|core|es`` : Use OpenGL for rendering (the D-Bus interface
+        will share framebuffers with DMABUF file descriptors).
 
     ``sdl``
         Display video output via SDL (usually in a separate graphics
         window; see the SDL documentation for other possibilities).
+        Valid parameters are:
+
+        ``grab-mod=<mods>`` : Used to select the modifier keys for toggling
+        the mouse grabbing in conjunction with the "g" key. ``<mods>`` can be
+        either ``lshift-lctrl-lalt`` or ``rctrl``.
 
-    ``curses``
+        ``gl=on|off|core|es`` : Use OpenGL for displaying
+
+        ``show-cursor=on|off`` :  Force showing the mouse cursor
+
+        ``window-close=on|off`` : Allow to quit qemu with window close button
+
+    ``gtk``
+        Display video output in a GTK window. This interface provides
+        drop-down menus and other UI elements to configure and control
+        the VM during runtime. Valid parameters are:
+
+        ``full-screen=on|off`` : Start in fullscreen mode
+
+        ``gl=on|off`` : Use OpenGL for displaying
+
+        ``grab-on-hover=on|off`` : Grab keyboard input on mouse hover
+
+        ``show-tabs=on|off`` : Display the tab bar for switching between the
+                               various graphical interfaces (e.g. VGA and
+                               virtual console character devices) by default.
+
+        ``show-cursor=on|off`` :  Force showing the mouse cursor
+
+        ``window-close=on|off`` : Allow to quit qemu with window close button
+
+        ``show-menubar=on|off`` : Display the main window menubar, defaults to "on"
+
+        ``zoom-to-fit=on|off`` : Expand video output to the window size,
+                                 defaults to "off"
+
+    ``curses[,charset=<encoding>]``
         Display video output via curses. For graphics device models
         which support a text mode, QEMU can display this output using a
         curses/ncurses interface. Nothing is displayed when the graphics
@@ -1765,31 +2186,30 @@ SRST
         ``charset=CP850`` for IBM CP850 encoding. The default is
         ``CP437``.
 
-    ``none``
-        Do not display video output. The guest will still see an
-        emulated graphics card, but its output will not be displayed to
-        the QEMU user. This option differs from the -nographic option in
-        that it only affects what is done with video output; -nographic
-        also changes the destination of the serial and parallel port
-        data.
+    ``cocoa``
+        Display video output in a Cocoa window. Mac only. This interface
+        provides drop-down menus and other UI elements to configure and
+        control the VM during runtime. Valid parameters are:
 
-    ``gtk``
-        Display video output in a GTK window. This interface provides
-        drop-down menus and other UI elements to configure and control
-        the VM during runtime.
+        ``show-cursor=on|off`` :  Force showing the mouse cursor
 
-    ``vnc``
-        Start a VNC server on display <arg>
+        ``left-command-key=on|off`` : Disable forwarding left command key to host
 
-    ``egl-headless``
+    ``egl-headless[,rendernode=<file>]``
         Offload all OpenGL operations to a local DRI device. For any
         graphical display, this display needs to be paired with either
         VNC or SPICE displays.
 
-    ``spice-app``
-        Start QEMU as a Spice server and launch the default Spice client
-        application. The Spice server will redirect the serial consoles
-        and QEMU monitors. (Since 4.0)
+    ``vnc=<display>``
+        Start a VNC server on display <display>
+
+    ``none``
+        Do not display video output. The guest will still see an
+        emulated graphics card, but its output will not be displayed to
+        the QEMU user. This option differs from the -nographic option in
+        that it only affects what is done with video output; -nographic
+        also changes the destination of the serial and parallel port
+        data.
 ERST
 
 DEF("nographic", 0, QEMU_OPTION_nographic,
@@ -1807,71 +2227,29 @@ SRST
     Use C-a h for help on switching between the console and monitor.
 ERST
 
-DEF("curses", 0, QEMU_OPTION_curses,
-    "-curses         shorthand for -display curses\n",
-    QEMU_ARCH_ALL)
-SRST
-``-curses``
-    Normally, if QEMU is compiled with graphical window support, it
-    displays output such as guest graphics, guest console, and the QEMU
-    monitor in a window. With this option, QEMU can display the VGA
-    output when in text mode using a curses/ncurses interface. Nothing
-    is displayed in graphical mode.
-ERST
-
-DEF("alt-grab", 0, QEMU_OPTION_alt_grab,
-    "-alt-grab       use Ctrl-Alt-Shift to grab mouse (instead of Ctrl-Alt)\n",
-    QEMU_ARCH_ALL)
-SRST
-``-alt-grab``
-    Use Ctrl-Alt-Shift to grab mouse (instead of Ctrl-Alt). Note that
-    this also affects the special keys (for fullscreen, monitor-mode
-    switching, etc).
-ERST
-
-DEF("ctrl-grab", 0, QEMU_OPTION_ctrl_grab,
-    "-ctrl-grab      use Right-Ctrl to grab mouse (instead of Ctrl-Alt)\n",
-    QEMU_ARCH_ALL)
-SRST
-``-ctrl-grab``
-    Use Right-Ctrl to grab mouse (instead of Ctrl-Alt). Note that this
-    also affects the special keys (for fullscreen, monitor-mode
-    switching, etc).
-ERST
-
-DEF("no-quit", 0, QEMU_OPTION_no_quit,
-    "-no-quit        disable SDL window close capability\n", QEMU_ARCH_ALL)
-SRST
-``-no-quit``
-    Disable SDL window close capability.
-ERST
-
-DEF("sdl", 0, QEMU_OPTION_sdl,
-    "-sdl            shorthand for -display sdl\n", QEMU_ARCH_ALL)
-SRST
-``-sdl``
-    Enable SDL.
-ERST
-
+#ifdef CONFIG_SPICE
 DEF("spice", HAS_ARG, QEMU_OPTION_spice,
     "-spice [port=port][,tls-port=secured-port][,x509-dir=<dir>]\n"
     "       [,x509-key-file=<file>][,x509-key-password=<file>]\n"
     "       [,x509-cert-file=<file>][,x509-cacert-file=<file>]\n"
-    "       [,x509-dh-key-file=<file>][,addr=addr][,ipv4|ipv6|unix]\n"
+    "       [,x509-dh-key-file=<file>][,addr=addr]\n"
+    "       [,ipv4=on|off][,ipv6=on|off][,unix=on|off]\n"
     "       [,tls-ciphers=<list>]\n"
     "       [,tls-channel=[main|display|cursor|inputs|record|playback]]\n"
     "       [,plaintext-channel=[main|display|cursor|inputs|record|playback]]\n"
-    "       [,sasl][,password=<secret>][,disable-ticketing]\n"
+    "       [,sasl=on|off][,disable-ticketing=on|off]\n"
+    "       [,password-secret=<secret-id>]\n"
     "       [,image-compression=[auto_glz|auto_lz|quic|glz|lz|off]]\n"
     "       [,jpeg-wan-compression=[auto|never|always]]\n"
     "       [,zlib-glz-wan-compression=[auto|never|always]]\n"
-    "       [,streaming-video=[off|all|filter]][,disable-copy-paste]\n"
-    "       [,disable-agent-file-xfer][,agent-mouse=[on|off]]\n"
+    "       [,streaming-video=[off|all|filter]][,disable-copy-paste=on|off]\n"
+    "       [,disable-agent-file-xfer=on|off][,agent-mouse=[on|off]]\n"
     "       [,playback-compression=[on|off]][,seamless-migration=[on|off]]\n"
     "       [,gl=[on|off]][,rendernode=<file>]\n"
-    "   enable spice\n"
-    "   at least one of {port, tls-port} is mandatory\n",
+    "                enable spice\n"
+    "                at least one of {port, tls-port} is mandatory\n",
     QEMU_ARCH_ALL)
+#endif
 SRST
 ``-spice option[,option[,...]]``
     Enable the spice remote desktop protocol. Valid options are
@@ -1883,13 +2261,14 @@ SRST
         Set the IP address spice is listening on. Default is any
         address.
 
-    ``ipv4``; \ ``ipv6``; \ ``unix``
+    ``ipv4=on|off``; \ ``ipv6=on|off``; \ ``unix=on|off``
         Force using the specified IP version.
 
-    ``password=<secret>``
-        Set the password you need to authenticate.
+    ``password-secret=<secret-id>``
+        Set the ID of the ``secret`` object containing the password
+        you need to authenticate.
 
-    ``sasl``
+    ``sasl=on|off``
         Require that the client use SASL to authenticate with the spice.
         The exact choice of authentication method used is controlled
         from the system / user's SASL configuration file for the 'qemu'
@@ -1903,13 +2282,13 @@ SRST
         data encryption preventing compromise of authentication
         credentials.
 
-    ``disable-ticketing``
+    ``disable-ticketing=on|off``
         Allow client connects without authentication.
 
-    ``disable-copy-paste``
+    ``disable-copy-paste=on|off``
         Disable copy paste between the client and the guest.
 
-    ``disable-agent-file-xfer``
+    ``disable-agent-file-xfer=on|off``
         Disable spice-vdagent based file-xfer between the client and the
         guest.
 
@@ -2049,8 +2428,10 @@ SRST
     OBP.
 ERST
 
+#ifdef CONFIG_VNC
 DEF("vnc", HAS_ARG, QEMU_OPTION_vnc ,
     "-vnc <display>  shorthand for -display vnc=<display>\n", QEMU_ARCH_ALL)
+#endif
 SRST
 ``-vnc display[,option[,option[,...]]]``
     Normally, if QEMU is compiled with graphical window support, it
@@ -2085,13 +2466,13 @@ SRST
     Following the display value there may be one or more option flags
     separated by commas. Valid options are
 
-    ``reverse``
+    ``reverse=on|off``
         Connect to a listening VNC client via a "reverse" connection.
         The client is specified by the display. For reverse network
         connections (host:d,``reverse``), the d argument is a TCP port
         number, not a display number.
 
-    ``websocket``
+    ``websocket=on|off``
         Opens an additional TCP listening port dedicated to VNC
         Websocket connections. If a bare websocket option is given, the
         Websocket port is 5700+display. An alternative port can be
@@ -2105,7 +2486,7 @@ SRST
         runs in unencrypted mode. If TLS credentials are provided, the
         websocket connection requires encrypted client connections.
 
-    ``password``
+    ``password=on|off``
         Require that password based authentication is used for client
         connections.
 
@@ -2127,6 +2508,11 @@ SRST
         time to allow <protocol> password to expire immediately or never
         expire.
 
+    ``password-secret=<secret-id>``
+        Require that password based authentication is used for client
+        connections, using the password provided by the ``secret``
+        object identified by ``secret-id``.
+
     ``tls-creds=ID``
         Provides the ID of a set of TLS credentials to use to secure the
         VNC server. They will apply to both the normal VNC server socket
@@ -2142,7 +2528,7 @@ SRST
         on the fly while the VNC server is active. If missing, it will
         default to denying access.
 
-    ``sasl``
+    ``sasl=on|off``
         Require that the client use SASL to authenticate with the VNC
         server. The exact choice of authentication method used is
         controlled from the system / user's SASL configuration file for
@@ -2165,7 +2551,7 @@ SRST
         fly while the VNC server is active. If missing, it will default
         to denying access.
 
-    ``acl``
+    ``acl=on|off``
         Legacy method for enabling authorization of clients against the
         x509 distinguished name and SASL username. It results in the
         creation of two ``authz-list`` objects with IDs of
@@ -2175,13 +2561,13 @@ SRST
         This option is deprecated and should no longer be used. The new
         ``sasl-authz`` and ``tls-authz`` options are a replacement.
 
-    ``lossy``
+    ``lossy=on|off``
         Enable lossy compression methods (gradient, JPEG, ...). If this
         option is set, VNC client may receive lossy framebuffer updates
         depending on its encoding settings. Enabling this option can
         save a lot of bandwidth at the expense of quality.
 
-    ``non-adaptive``
+    ``non-adaptive=on|off``
         Disable adaptive encodings. Adaptive encodings are enabled by
         default. An adaptive encoding will try to detect frequently
         updated screen regions, and send updates in these regions using
@@ -2215,6 +2601,10 @@ SRST
         transmission. When not using an -audiodev argument, this option
         must be omitted, otherwise is must be present and specify a
         valid audiodev.
+
+    ``power-control=on|off``
+        Permit the remote client to issue shutdown, reboot or reset power
+        control requests.
 ERST
 
 ARCHHEADING(, QEMU_ARCH_I386)
@@ -2253,7 +2643,7 @@ DEF("no-hpet", 0, QEMU_OPTION_no_hpet,
     "-no-hpet        disable HPET\n", QEMU_ARCH_I386)
 SRST
 ``-no-hpet``
-    Disable HPET support.
+    Disable HPET support. Deprecated, use '-machine hpet=off' instead.
 ERST
 
 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable,
@@ -2289,13 +2679,18 @@ DEF("smbios", HAS_ARG, QEMU_OPTION_smbios,
     "                specify SMBIOS type 3 fields\n"
     "-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str]\n"
     "              [,asset=str][,part=str][,max-speed=%d][,current-speed=%d]\n"
+    "              [,processor-id=%d]\n"
     "                specify SMBIOS type 4 fields\n"
+    "-smbios type=8[,external_reference=str][,internal_reference=str][,connector_type=%d][,port_type=%d]\n"
+    "                specify SMBIOS type 8 fields\n"
     "-smbios type=11[,value=str][,path=filename]\n"
     "                specify SMBIOS type 11 fields\n"
     "-smbios type=17[,loc_pfx=str][,bank=str][,manufacturer=str][,serial=str]\n"
     "               [,asset=str][,part=str][,speed=%d]\n"
-    "                specify SMBIOS type 17 fields\n",
-    QEMU_ARCH_I386 | QEMU_ARCH_ARM)
+    "                specify SMBIOS type 17 fields\n"
+    "-smbios type=41[,designation=str][,kind=str][,instance=%d][,pcidev=str]\n"
+    "                specify SMBIOS type 41 fields\n",
+    QEMU_ARCH_I386 | QEMU_ARCH_ARM | QEMU_ARCH_LOONGARCH)
 SRST
 ``-smbios file=binary``
     Load SMBIOS entry from binary file.
@@ -2312,7 +2707,7 @@ SRST
 ``-smbios type=3[,manufacturer=str][,version=str][,serial=str][,asset=str][,sku=str]``
     Specify SMBIOS type 3 fields
 
-``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str]``
+``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]``
     Specify SMBIOS type 4 fields
 
 ``-smbios type=11[,value=str][,path=filename]``
@@ -2356,6 +2751,32 @@ SRST
 
 ``-smbios type=17[,loc_pfx=str][,bank=str][,manufacturer=str][,serial=str][,asset=str][,part=str][,speed=%d]``
     Specify SMBIOS type 17 fields
+
+``-smbios type=41[,designation=str][,kind=str][,instance=%d][,pcidev=str]``
+    Specify SMBIOS type 41 fields
+
+    This argument can be repeated multiple times.  Its main use is to allow network interfaces be created
+    as ``enoX`` on Linux, with X being the instance number, instead of the name depending on the interface
+    position on the PCI bus.
+
+    Here is an example of use:
+
+    .. parsed-literal::
+
+        -netdev user,id=internet \\
+        -device virtio-net-pci,mac=50:54:00:00:00:42,netdev=internet,id=internet-dev \\
+        -smbios type=41,designation='Onboard LAN',instance=1,kind=ethernet,pcidev=internet-dev
+
+    In the guest OS, the device should then appear as ``eno1``:
+
+    ..parsed-literal::
+
+         $ ip -brief l
+         lo               UNKNOWN        00:00:00:00:00:00 <LOOPBACK,UP,LOWER_UP>
+         eno1             UP             50:54:00:00:00:42 <BROADCAST,MULTICAST,UP,LOWER_UP>
+
+    Currently, the PCI device has to be attached to the root bus.
+
 ERST
 
 DEFHEADING()
@@ -2364,8 +2785,8 @@ DEFHEADING(Network options:)
 
 DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
 #ifdef CONFIG_SLIRP
-    "-netdev user,id=str[,ipv4[=on|off]][,net=addr[/mask]][,host=addr]\n"
-    "         [,ipv6[=on|off]][,ipv6-net=addr[/int]][,ipv6-host=addr]\n"
+    "-netdev user,id=str[,ipv4=on|off][,net=addr[/mask]][,host=addr]\n"
+    "         [,ipv6=on|off][,ipv6-net=addr[/int]][,ipv6-host=addr]\n"
     "         [,restrict=on|off][,hostname=host][,dhcpstart=addr]\n"
     "         [,dns=addr][,ipv6-dns=addr][,dnssearch=domain][,domainname=domain]\n"
     "         [,tftp=dir][,tftp-server-name=name][,bootfile=f][,hostfwd=rule][,guestfwd=rule]"
@@ -2403,7 +2824,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
     "                use 'vhostfd=h' to connect to an already opened vhost net device\n"
     "                use 'vhostfds=x:y:...:z to connect to multiple already opened vhost net devices\n"
     "                use 'queues=n' to specify the number of queues to be created for multiqueue TAP\n"
-    "                use 'poll-us=n' to speciy the maximum number of microseconds that could be\n"
+    "                use 'poll-us=n' to specify the maximum number of microseconds that could be\n"
     "                spent on busy polling for vhost net\n"
     "-netdev bridge,id=str[,br=bridge][,helper=helper]\n"
     "                configure a host TAP network backend with ID 'str' that is\n"
@@ -2412,8 +2833,8 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
 #endif
 #ifdef __linux__
     "-netdev l2tpv3,id=str,src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport]\n"
-    "         [,rxsession=rxsession],txsession=txsession[,ipv6=on/off][,udp=on/off]\n"
-    "         [,cookie64=on/off][,counter][,pincounter][,txcookie=txcookie]\n"
+    "         [,rxsession=rxsession],txsession=txsession[,ipv6=on|off][,udp=on|off]\n"
+    "         [,cookie64=on|off][,counter][,pincounter][,txcookie=txcookie]\n"
     "         [,rxcookie=rxcookie][,offset=offset]\n"
     "                configure a network backend with ID 'str' connected to\n"
     "                an Ethernet over L2TPv3 pseudowire.\n"
@@ -2446,6 +2867,20 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
     "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n"
     "                configure a network backend to connect to another network\n"
     "                using an UDP tunnel\n"
+    "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n"
+    "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n"
+    "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n"
+    "                configure a network backend to connect to another network\n"
+    "                using a socket connection in stream mode.\n"
+    "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n"
+    "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=fd,local.str=file-descriptor]\n"
+    "                configure a network backend to connect to a multicast maddr and port\n"
+    "                use ``local.host=addr`` to specify the host address to send packets from\n"
+    "-netdev dgram,id=str,local.type=inet,local.host=addr,local.port=port[,remote.type=inet,remote.host=addr,remote.port=port]\n"
+    "-netdev dgram,id=str,local.type=unix,local.path=path[,remote.type=unix,remote.path=path]\n"
+    "-netdev dgram,id=str,local.type=fd,local.str=file-descriptor\n"
+    "                configure a network backend to connect to another network\n"
+    "                using an UDP tunnel\n"
 #ifdef CONFIG_VDE
     "-netdev vde,id=str[,sock=socketpath][,port=n][,group=groupname][,mode=octalmode]\n"
     "                configure a network backend to connect to port 'n' of a vde switch\n"
@@ -2459,13 +2894,47 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
     "                VALE port (created on the fly) called 'name' ('nmname' is name of the \n"
     "                netmap device, defaults to '/dev/netmap')\n"
 #endif
+#ifdef CONFIG_AF_XDP
+    "-netdev af-xdp,id=str,ifname=name[,mode=native|skb][,force-copy=on|off]\n"
+    "         [,queues=n][,start-queue=m][,inhibit=on|off][,sock-fds=x:y:...:z]\n"
+    "                attach to the existing network interface 'name' with AF_XDP socket\n"
+    "                use 'mode=MODE' to specify an XDP program attach mode\n"
+    "                use 'force-copy=on|off' to force XDP copy mode even if device supports zero-copy (default: off)\n"
+    "                use 'inhibit=on|off' to inhibit loading of a default XDP program (default: off)\n"
+    "                with inhibit=on,\n"
+    "                  use 'sock-fds' to provide file descriptors for already open AF_XDP sockets\n"
+    "                  added to a socket map in XDP program.  One socket per queue.\n"
+    "                use 'queues=n' to specify how many queues of a multiqueue interface should be used\n"
+    "                use 'start-queue=m' to specify the first queue that should be used\n"
+#endif
 #ifdef CONFIG_POSIX
     "-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
     "                configure a vhost-user network, backed by a chardev 'dev'\n"
 #endif
 #ifdef __linux__
-    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
+    "-netdev vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
     "                configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n"
+    "                use 'vhostdev=/path/to/dev' to open a vhost vdpa device\n"
+    "                use 'vhostfd=h' to connect to an already opened vhost vdpa device\n"
+#endif
+#ifdef CONFIG_VMNET
+    "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
+    "         [,start-address=addr,end-address=addr,subnet-mask=mask]\n"
+    "                configure a vmnet network backend in host mode with ID 'str',\n"
+    "                isolate this interface from others with 'isolated',\n"
+    "                configure the address range and choose a subnet mask,\n"
+    "                specify network UUID 'uuid' to disable DHCP and interact with\n"
+    "                vmnet-host interfaces within this isolated network\n"
+    "-netdev vmnet-shared,id=str[,isolated=on|off][,nat66-prefix=addr]\n"
+    "         [,start-address=addr,end-address=addr,subnet-mask=mask]\n"
+    "                configure a vmnet network backend in shared mode with ID 'str',\n"
+    "                configure the address range and choose a subnet mask,\n"
+    "                set IPv6 ULA prefix (of length 64) to use for internal network,\n"
+    "                isolate this interface from others with 'isolated'\n"
+    "-netdev vmnet-bridged,id=str,ifname=name[,isolated=on|off]\n"
+    "                configure a vmnet network backend in bridged mode with ID 'str',\n"
+    "                use 'ifname=name' to select a physical network interface to be bridged,\n"
+    "                isolate this interface from others with 'isolated'\n"
 #endif
     "-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
     "                configure a hub port on the hub with ID 'n'\n", QEMU_ARCH_ALL)
@@ -2483,8 +2952,14 @@ DEF("nic", HAS_ARG, QEMU_OPTION_nic,
 #ifdef CONFIG_NETMAP
     "netmap|"
 #endif
+#ifdef CONFIG_AF_XDP
+    "af-xdp|"
+#endif
 #ifdef CONFIG_POSIX
     "vhost-user|"
+#endif
+#ifdef CONFIG_VMNET
+    "vmnet-host|vmnet-shared|vmnet-bridged|"
 #endif
     "socket][,option][,...][mac=macaddr]\n"
     "                initialize an on-board / default host NIC (using MAC address\n"
@@ -2507,12 +2982,18 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
 #endif
 #ifdef CONFIG_NETMAP
     "netmap|"
+#endif
+#ifdef CONFIG_AF_XDP
+    "af-xdp|"
+#endif
+#ifdef CONFIG_VMNET
+    "vmnet-host|vmnet-shared|vmnet-bridged|"
 #endif
     "socket][,option][,option][,...]\n"
     "                old way to initialize a host network interface\n"
     "                (use the -netdev option if possible instead)\n", QEMU_ARCH_ALL)
 SRST
-``-nic [tap|bridge|user|l2tpv3|vde|netmap|vhost-user|socket][,...][,mac=macaddr][,model=mn]``
+``-nic [tap|bridge|user|l2tpv3|vde|netmap|af-xdp|vhost-user|socket][,...][,mac=macaddr][,model=mn]``
     This option is a shortcut for configuring both the on-board
     (default) guest NIC hardware and the host network backend in one go.
     The host backend options are the same as with the corresponding
@@ -2842,7 +3323,7 @@ SRST
                          -device e1000,netdev=n1,mac=52:54:00:12:34:56 \\
                          -netdev socket,id=n1,mcast=239.192.168.1:1102,localaddr=1.2.3.4
 
-``-netdev l2tpv3,id=id,src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport],txsession=txsession[,rxsession=rxsession][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=txcookie][,rxcookie=rxcookie][,offset=offset]``
+``-netdev l2tpv3,id=id,src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport],txsession=txsession[,rxsession=rxsession][,ipv6=on|off][,udp=on|off][,cookie64][,counter][,pincounter][,txcookie=txcookie][,rxcookie=rxcookie][,offset=offset]``
     Configure a L2TPv3 pseudowire host network backend. L2TPv3 (RFC3931)
     is a popular protocol to transport Ethernet (and other Layer 2) data
     frames between two systems. It is present in routers, firewalls and
@@ -2926,6 +3407,55 @@ SRST
         # launch QEMU instance
         |qemu_system| linux.img -nic vde,sock=/tmp/myswitch
 
+``-netdev af-xdp,id=str,ifname=name[,mode=native|skb][,force-copy=on|off][,queues=n][,start-queue=m][,inhibit=on|off][,sock-fds=x:y:...:z]``
+    Configure AF_XDP backend to connect to a network interface 'name'
+    using AF_XDP socket.  A specific program attach mode for a default
+    XDP program can be forced with 'mode', defaults to best-effort,
+    where the likely most performant mode will be in use.  Number of queues
+    'n' should generally match the number or queues in the interface,
+    defaults to 1.  Traffic arriving on non-configured device queues will
+    not be delivered to the network backend.
+
+    .. parsed-literal::
+
+        # set number of queues to 4
+        ethtool -L eth0 combined 4
+        # launch QEMU instance
+        |qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
+            -netdev af-xdp,id=n1,ifname=eth0,queues=4
+
+    'start-queue' option can be specified if a particular range of queues
+    [m, m + n] should be in use.  For example, this is may be necessary in
+    order to use certain NICs in native mode.  Kernel allows the driver to
+    create a separate set of XDP queues on top of regular ones, and only
+    these queues can be used for AF_XDP sockets.  NICs that work this way
+    may also require an additional traffic redirection with ethtool to these
+    special queues.
+
+    .. parsed-literal::
+
+        # set number of queues to 1
+        ethtool -L eth0 combined 1
+        # redirect all the traffic to the second queue (id: 1)
+        # note: drivers may require non-empty key/mask pair.
+        ethtool -N eth0 flow-type ether \\
+            dst 00:00:00:00:00:00 m FF:FF:FF:FF:FF:FE action 1
+        ethtool -N eth0 flow-type ether \\
+            dst 00:00:00:00:00:01 m FF:FF:FF:FF:FF:FE action 1
+        # launch QEMU instance
+        |qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
+            -netdev af-xdp,id=n1,ifname=eth0,queues=1,start-queue=1
+
+    XDP program can also be loaded externally.  In this case 'inhibit' option
+    should be set to 'on' and 'sock-fds' provided with file descriptors for
+    already open but not bound XDP sockets already added to a socket map for
+    corresponding queues.  One socket per queue.
+
+    .. parsed-literal::
+
+        |qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
+            -netdev af-xdp,id=n1,ifname=eth0,queues=3,inhibit=on,sock-fds=15:16:17
+
 ``-netdev vhost-user,chardev=id[,vhostforce=on|off][,queues=n]``
     Establish a vhost-user netdev, backed by a chardev id. The chardev
     should be a unix domain socket backed one. The vhost-user uses a
@@ -2945,7 +3475,7 @@ SRST
              -netdev type=vhost-user,id=net0,chardev=chr0 \
              -device virtio-net-pci,netdev=net0
 
-``-netdev vhost-vdpa,vhostdev=/path/to/dev``
+``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
     Establish a vhost-vdpa netdev.
 
     vDPA device is a device that uses a datapath which complies with
@@ -2991,19 +3521,19 @@ DEFHEADING(Character device options:)
 DEF("chardev", HAS_ARG, QEMU_OPTION_chardev,
     "-chardev help\n"
     "-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
-    "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4][,ipv6][,nodelay][,reconnect=seconds]\n"
-    "         [,server][,nowait][,telnet][,websocket][,reconnect=seconds][,mux=on|off]\n"
+    "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4=on|off][,ipv6=on|off][,nodelay=on|off]\n"
+    "         [,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect=seconds][,mux=on|off]\n"
     "         [,logfile=PATH][,logappend=on|off][,tls-creds=ID][,tls-authz=ID] (tcp)\n"
-    "-chardev socket,id=id,path=path[,server][,nowait][,telnet][,websocket][,reconnect=seconds]\n"
+    "-chardev socket,id=id,path=path[,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect=seconds]\n"
     "         [,mux=on|off][,logfile=PATH][,logappend=on|off][,abstract=on|off][,tight=on|off] (unix)\n"
     "-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr]\n"
-    "         [,localport=localport][,ipv4][,ipv6][,mux=on|off]\n"
+    "         [,localport=localport][,ipv4=on|off][,ipv6=on|off][,mux=on|off]\n"
     "         [,logfile=PATH][,logappend=on|off]\n"
     "-chardev msmouse,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
     "-chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]]\n"
     "         [,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
     "-chardev ringbuf,id=id[,size=size][,logfile=PATH][,logappend=on|off]\n"
-    "-chardev file,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
+    "-chardev file,id=id,path=path[,input-path=input-file][,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
     "-chardev pipe,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
 #ifdef _WIN32
     "-chardev console,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
@@ -3018,11 +3548,9 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev,
 #if defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \
         || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
     "-chardev serial,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
-    "-chardev tty,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
 #endif
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__DragonFly__)
     "-chardev parallel,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
-    "-chardev parport,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
 #endif
 #if defined(CONFIG_SPICE)
     "-chardev spicevmc,id=id,name=name[,debug=debug][,logfile=PATH][,logappend=on|off]\n"
@@ -3037,7 +3565,7 @@ The general form of a character device option is:
 ``-chardev backend,id=id[,mux=on|off][,options]``
     Backend is one of: ``null``, ``socket``, ``udp``, ``msmouse``,
     ``vc``, ``ringbuf``, ``file``, ``pipe``, ``console``, ``serial``,
-    ``pty``, ``stdio``, ``braille``, ``tty``, ``parallel``, ``parport``,
+    ``pty``, ``stdio``, ``braille``, ``parallel``,
     ``spicevmc``, ``spiceport``. The specific backend will determine the
     applicable options.
 
@@ -3107,21 +3635,21 @@ The available backends are:
     A void device. This device will not emit any data, and will drop any
     data it receives. The null backend does not take any options.
 
-``-chardev socket,id=id[,TCP options or unix options][,server][,nowait][,telnet][,websocket][,reconnect=seconds][,tls-creds=id][,tls-authz=id]``
+``-chardev socket,id=id[,TCP options or unix options][,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect=seconds][,tls-creds=id][,tls-authz=id]``
     Create a two-way stream socket, which can be either a TCP or a unix
     socket. A unix socket will be created if ``path`` is specified.
     Behaviour is undefined if TCP options are specified for a unix
     socket.
 
-    ``server`` specifies that the socket shall be a listening socket.
+    ``server=on|off`` specifies that the socket shall be a listening socket.
 
-    ``nowait`` specifies that QEMU should not block waiting for a client
+    ``wait=on|off`` specifies that QEMU should not block waiting for a client
     to connect to a listening socket.
 
-    ``telnet`` specifies that traffic on the socket should interpret
+    ``telnet=on|off`` specifies that traffic on the socket should interpret
     telnet escape sequences.
 
-    ``websocket`` specifies that the socket uses WebSocket protocol for
+    ``websocket=on|off`` specifies that the socket uses WebSocket protocol for
     communication.
 
     ``reconnect`` sets the timeout for reconnecting on non-server
@@ -3142,7 +3670,7 @@ The available backends are:
 
     TCP and unix socket options are given below:
 
-    ``TCP options: port=port[,host=host][,to=to][,ipv4][,ipv6][,nodelay]``
+    ``TCP options: port=port[,host=host][,to=to][,ipv4=on|off][,ipv6=on|off][,nodelay=on|off]``
         ``host`` for a listening socket specifies the local address to
         be bound. For a connecting socket species the remote host to
         connect to. ``host`` is optional for listening sockets. If not
@@ -3158,21 +3686,21 @@ The available backends are:
         bind to subsequent ports up to and including ``to`` until it
         succeeds. ``to`` must be specified as a port number.
 
-        ``ipv4`` and ``ipv6`` specify that either IPv4 or IPv6 must be
-        used. If neither is specified the socket may use either
-        protocol.
+        ``ipv4=on|off`` and ``ipv6=on|off`` specify that either IPv4
+        or IPv6 must be used. If neither is specified the socket may
+        use either protocol.
 
-        ``nodelay`` disables the Nagle algorithm.
+        ``nodelay=on|off`` disables the Nagle algorithm.
 
     ``unix options: path=path[,abstract=on|off][,tight=on|off]``
         ``path`` specifies the local path of the unix socket. ``path``
         is required.
-        ``abstract`` specifies the use of the abstract socket namespace,
+        ``abstract=on|off`` specifies the use of the abstract socket namespace,
         rather than the filesystem.  Optional, defaults to false.
-        ``tight`` sets the socket length of abstract sockets to their minimum,
+        ``tight=on|off`` sets the socket length of abstract sockets to their minimum,
         rather than the full sun_path length.  Optional, defaults to true.
 
-``-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr][,localport=localport][,ipv4][,ipv6]``
+``-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr][,localport=localport][,ipv4=on|off][,ipv6=on|off]``
     Sends all traffic from the guest to a remote host over UDP.
 
     ``host`` specifies the remote host to connect to. If not specified
@@ -3187,7 +3715,7 @@ The available backends are:
     ``localport`` specifies the local port to bind to. If not specified
     any available local port will be used.
 
-    ``ipv4`` and ``ipv6`` specify that either IPv4 or IPv6 must be used.
+    ``ipv4=on|off`` and ``ipv6=on|off`` specify that either IPv4 or IPv6 must be used.
     If neither is specified the device may use either protocol.
 
 ``-chardev msmouse,id=id``
@@ -3208,13 +3736,19 @@ The available backends are:
     Create a ring buffer with fixed size ``size``. size must be a power
     of two and defaults to ``64K``.
 
-``-chardev file,id=id,path=path``
+``-chardev file,id=id,path=path[,input-path=input-path]``
     Log all traffic received from the guest to a file.
 
     ``path`` specifies the path of the file to be opened. This file will
     be created if it does not already exist, and overwritten if it does.
     ``path`` is required.
 
+    If ``input-path`` is specified, this is the path of a second file
+    which will be used for input. If ``input-path`` is not specified,
+    no input will be available from the chardev.
+
+    Note that ``input-path`` is not supported on Windows hosts.
+
 ``-chardev pipe,id=id,path=path``
     Create a two-way connection to the guest. The behaviour differs
     slightly between Windows hosts and other hosts:
@@ -3261,15 +3795,8 @@ The available backends are:
     Connect to a local BrlAPI server. ``braille`` does not take any
     options.
 
-``-chardev tty,id=id,path=path``
-    ``tty`` is only available on Linux, Sun, FreeBSD, NetBSD, OpenBSD
-    and DragonFlyBSD hosts. It is an alias for ``serial``.
-
-    ``path`` specifies the path to the tty. ``path`` is required.
-
 ``-chardev parallel,id=id,path=path``
   \
-``-chardev parport,id=id,path=path``
     ``parallel`` is only available on Linux, FreeBSD and DragonFlyBSD
     hosts.
 
@@ -3379,12 +3906,67 @@ DEFHEADING()
 
 #endif
 
-DEFHEADING(Linux/Multiboot boot specific:)
+DEFHEADING(Boot Image or Kernel specific:)
+SRST
+There are broadly 4 ways you can boot a system with QEMU.
+
+ - specify a firmware and let it control finding a kernel
+ - specify a firmware and pass a hint to the kernel to boot
+ - direct kernel image boot
+ - manually load files into the guest's address space
+
+The third method is useful for quickly testing kernels but as there is
+no firmware to pass configuration information to the kernel the
+hardware must either be probeable, the kernel built for the exact
+configuration or passed some configuration data (e.g. a DTB blob)
+which tells the kernel what drivers it needs. This exact details are
+often hardware specific.
+
+The final method is the most generic way of loading images into the
+guest address space and used mostly for ``bare metal`` type
+development where the reset vectors of the processor are taken into
+account.
+
+ERST
+
+SRST
+
+For x86 machines and some other architectures ``-bios`` will generally
+do the right thing with whatever it is given. For other machines the
+more strict ``-pflash`` option needs an image that is sized for the
+flash device for the given machine type.
+
+Please see the :ref:`system-targets-ref` section of the manual for
+more detailed documentation.
+
+ERST
+
+DEF("bios", HAS_ARG, QEMU_OPTION_bios, \
+    "-bios file      set the filename for the BIOS\n", QEMU_ARCH_ALL)
+SRST
+``-bios file``
+    Set the filename for the BIOS.
+ERST
+
+DEF("pflash", HAS_ARG, QEMU_OPTION_pflash,
+    "-pflash file    use 'file' as a parallel flash image\n", QEMU_ARCH_ALL)
 SRST
-When using these options, you can use a given Linux or Multiboot kernel
-without installing it in the disk image. It can be useful for easier
-testing of various kernels.
+``-pflash file``
+    Use file as a parallel flash image.
+ERST
+
+SRST
+
+The kernel options were designed to work with Linux kernels although
+other things (like hypervisors) can be packaged up as a kernel
+executable image. The exact format of a executable image is usually
+architecture specific.
 
+The way in which the kernel is started (what address it is loaded at,
+what if any information is passed to it via CPU registers, the state
+of the hardware when it is started, and so on) is also architecture
+specific. Typically it follows the specification laid down by the
+Linux kernel for how kernels for that architecture must be started.
 
 ERST
 
@@ -3406,14 +3988,22 @@ ERST
 DEF("initrd", HAS_ARG, QEMU_OPTION_initrd, \
            "-initrd file    use 'file' as initial ram disk\n", QEMU_ARCH_ALL)
 SRST
+
 ``-initrd file``
     Use file as initial ram disk.
 
 ``-initrd "file1 arg=foo,file2"``
     This syntax is only available with multiboot.
 
-    Use file1 and file2 as modules and pass arg=foo as parameter to the
-    first module.
+    Use file1 and file2 as modules and pass ``arg=foo`` as parameter to the
+    first module. Commas can be provided in module parameters by doubling
+    them on the command line to escape them:
+
+``-initrd "bzImage earlyprintk=xen,,keep root=/dev/xvda1,initrd.img"``
+    Multiboot only. Use bzImage as the first module with
+    "``earlyprintk=xen,keep root=/dev/xvda1``" as its command line,
+    and initrd.img as the second module.
+
 ERST
 
 DEF("dtb", HAS_ARG, QEMU_OPTION_dtb, \
@@ -3424,10 +4014,69 @@ SRST
     kernel on boot.
 ERST
 
+SRST
+
+Finally you can also manually load images directly into the address
+space of the guest. This is most useful for developers who already
+know the layout of their guest and take care to ensure something sane
+will happen when the reset vector executes.
+
+The generic loader can be invoked by using the loader device:
+
+``-device loader,addr=<addr>,data=<data>,data-len=<data-len>[,data-be=<data-be>][,cpu-num=<cpu-num>]``
+
+there is also the guest loader which operates in a similar way but
+tweaks the DTB so a hypervisor loaded via ``-kernel`` can find where
+the guest image is:
+
+``-device guest-loader,addr=<addr>[,kernel=<path>,[bootargs=<arguments>]][,initrd=<path>]``
+
+ERST
+
 DEFHEADING()
 
 DEFHEADING(Debug/Expert options:)
 
+DEF("compat", HAS_ARG, QEMU_OPTION_compat,
+    "-compat [deprecated-input=accept|reject|crash][,deprecated-output=accept|hide]\n"
+    "                Policy for handling deprecated management interfaces\n"
+    "-compat [unstable-input=accept|reject|crash][,unstable-output=accept|hide]\n"
+    "                Policy for handling unstable management interfaces\n",
+    QEMU_ARCH_ALL)
+SRST
+``-compat [deprecated-input=@var{input-policy}][,deprecated-output=@var{output-policy}]``
+    Set policy for handling deprecated management interfaces (experimental):
+
+    ``deprecated-input=accept`` (default)
+        Accept deprecated commands and arguments
+    ``deprecated-input=reject``
+        Reject deprecated commands and arguments
+    ``deprecated-input=crash``
+        Crash on deprecated commands and arguments
+    ``deprecated-output=accept`` (default)
+        Emit deprecated command results and events
+    ``deprecated-output=hide``
+        Suppress deprecated command results and events
+
+    Limitation: covers only syntactic aspects of QMP.
+
+``-compat [unstable-input=@var{input-policy}][,unstable-output=@var{output-policy}]``
+    Set policy for handling unstable management interfaces (experimental):
+
+    ``unstable-input=accept`` (default)
+        Accept unstable commands and arguments
+    ``unstable-input=reject``
+        Reject unstable commands and arguments
+    ``unstable-input=crash``
+        Crash on unstable commands and arguments
+    ``unstable-output=accept`` (default)
+        Emit unstable command results and events
+    ``unstable-output=hide``
+        Suppress unstable command results and events
+
+    Limitation: covers only syntactic aspects of QMP.
+ERST
+
 DEF("fw_cfg", HAS_ARG, QEMU_OPTION_fwcfg,
     "-fw_cfg [name=]<name>,file=<file>\n"
     "                add named fw_cfg entry with contents from file\n"
@@ -3469,7 +4118,8 @@ SRST
     This option can be used several times to simulate up to 4 serial
     ports.
 
-    Use ``-serial none`` to disable all serial ports.
+    You can use ``-serial none`` to suppress the creation of default
+    serial devices.
 
     Available character devices are:
 
@@ -3491,10 +4141,17 @@ SRST
         [Linux only] Pseudo TTY (a new PTY is automatically allocated)
 
     ``none``
-        No device is allocated.
+        No device is allocated. Note that for machine types which
+        emulate systems where a serial device is always present in
+        real hardware, this may be equivalent to the ``null`` option,
+        in that the serial device is still present but all output
+        is discarded. For boards where the number of serial ports is
+        truly variable, this suppresses the creation of the device.
 
     ``null``
-        void device
+        A guest will see the UART or serial device as present in the
+        machine, but all output is discarded, and there is no input.
+        Conceptually equivalent to redirecting the output to ``/dev/null``.
 
     ``chardev:id``
         Use a named character device defined with the ``-chardev``
@@ -3551,30 +4208,30 @@ SRST
         ``telnet options:``
             localhost 5555
 
-    ``tcp:[host]:port[,server][,nowait][,nodelay][,reconnect=seconds]``
+    ``tcp:[host]:port[,server=on|off][,wait=on|off][,nodelay=on|off][,reconnect=seconds]``
         The TCP Net Console has two modes of operation. It can send the
         serial I/O to a location or wait for a connection from a
         location. By default the TCP Net Console is sent to host at the
-        port. If you use the server option QEMU will wait for a client
+        port. If you use the ``server=on`` option QEMU will wait for a client
         socket application to connect to the port before continuing,
-        unless the ``nowait`` option was specified. The ``nodelay``
-        option disables the Nagle buffering algorithm. The ``reconnect``
-        option only applies if noserver is set, if the connection goes
+        unless the ``wait=on|off`` option was specified. The ``nodelay=on|off``
+        option disables the Nagle buffering algorithm. The ``reconnect=on``
+        option only applies if ``server=no`` is set, if the connection goes
         down it will attempt to reconnect at the given interval. If host
         is omitted, 0.0.0.0 is assumed. Only one TCP connection at a
-        time is accepted. You can use ``telnet`` to connect to the
+        time is accepted. You can use ``telnet=on`` to connect to the
         corresponding character device.
 
         ``Example to send tcp console to 192.168.0.2 port 4444``
             -serial tcp:192.168.0.2:4444
 
         ``Example to listen and wait on port 4444 for connection``
-            -serial tcp::4444,server
+            -serial tcp::4444,server=on
 
         ``Example to not wait and listen on ip 192.168.0.100 port 4444``
-            -serial tcp:192.168.0.100:4444,server,nowait
+            -serial tcp:192.168.0.100:4444,server=on,wait=off
 
-    ``telnet:host:port[,server][,nowait][,nodelay]``
+    ``telnet:host:port[,server=on|off][,wait=on|off][,nodelay=on|off]``
         The telnet protocol is used instead of raw tcp sockets. The
         options work the same as if you had specified ``-serial tcp``.
         The difference is that the port acts like a telnet server or
@@ -3584,11 +4241,11 @@ SRST
         you do it with Control-] and then type "send break" followed by
         pressing the enter key.
 
-    ``websocket:host:port,server[,nowait][,nodelay]``
+    ``websocket:host:port,server=on[,wait=on|off][,nodelay=on|off]``
         The WebSocket protocol is used instead of raw tcp socket. The
         port acts as a WebSocket server. Client mode is not supported.
 
-    ``unix:path[,server][,nowait][,reconnect=seconds]``
+    ``unix:path[,server=on|off][,wait=on|off][,reconnect=seconds]``
         A unix domain socket is used instead of a tcp socket. The option
         works the same as if you had specified ``-serial tcp`` except
         the unix domain socket path is used for connections.
@@ -3601,7 +4258,7 @@ SRST
         multiplex the monitor onto a telnet server listening on port
         4444 would be:
 
-        ``-serial mon:telnet::4444,server,nowait``
+        ``-serial mon:telnet::4444,server=on,wait=off``
 
         When the monitor is multiplexed to stdio in this way, Ctrl+C
         will not terminate QEMU any more but will be passed to the guest
@@ -3647,22 +4304,42 @@ DEF("qmp", HAS_ARG, QEMU_OPTION_qmp, \
     QEMU_ARCH_ALL)
 SRST
 ``-qmp dev``
-    Like -monitor but opens in 'control' mode.
+    Like ``-monitor`` but opens in 'control' mode. For example, to make
+    QMP available on localhost port 4444::
+
+        -qmp tcp:localhost:4444,server=on,wait=off
+
+    Not all options are configurable via this syntax; for maximum
+    flexibility use the ``-mon`` option and an accompanying ``-chardev``.
+
 ERST
 DEF("qmp-pretty", HAS_ARG, QEMU_OPTION_qmp_pretty, \
     "-qmp-pretty dev like -qmp but uses pretty JSON formatting\n",
     QEMU_ARCH_ALL)
 SRST
 ``-qmp-pretty dev``
-    Like -qmp but uses pretty JSON formatting.
+    Like ``-qmp`` but uses pretty JSON formatting.
 ERST
 
 DEF("mon", HAS_ARG, QEMU_OPTION_mon, \
     "-mon [chardev=]name[,mode=readline|control][,pretty[=on|off]]\n", QEMU_ARCH_ALL)
 SRST
 ``-mon [chardev=]name[,mode=readline|control][,pretty[=on|off]]``
-    Setup monitor on chardev name. ``pretty`` turns on JSON pretty
-    printing easing human reading and debugging.
+    Set up a monitor connected to the chardev ``name``.
+    QEMU supports two monitors: the Human Monitor Protocol
+    (HMP; for human interaction), and the QEMU Monitor Protocol
+    (QMP; a JSON RPC-style protocol).
+    The default is HMP; ``mode=control`` selects QMP instead.
+    ``pretty`` is only valid when ``mode=control``,
+    turning on JSON pretty printing to ease
+    human reading and debugging.
+
+    For example::
+
+      -chardev socket,id=mon1,host=localhost,port=4444,server=on,wait=off \
+      -mon chardev=mon1,mode=control,pretty=on
+
+    enables the QMP monitor on localhost port 4444 with pretty-printing.
 ERST
 
 DEF("debugcon", HAS_ARG, QEMU_OPTION_debugcon, \
@@ -3686,10 +4363,11 @@ SRST
 ERST
 
 DEF("singlestep", 0, QEMU_OPTION_singlestep, \
-    "-singlestep     always run in singlestep mode\n", QEMU_ARCH_ALL)
+    "-singlestep     deprecated synonym for -accel tcg,one-insn-per-tb=on\n", QEMU_ARCH_ALL)
 SRST
 ``-singlestep``
-    Run the emulation in single step mode.
+    This is a deprecated synonym for the TCG accelerator property
+    ``one-insn-per-tb``.
 ERST
 
 DEF("preconfig", 0, QEMU_OPTION_preconfig, \
@@ -3713,17 +4391,6 @@ SRST
     Do not start CPU at startup (you must type 'c' in the monitor).
 ERST
 
-DEF("realtime", HAS_ARG, QEMU_OPTION_realtime,
-    "-realtime [mlock=on|off]\n"
-    "                run qemu with realtime features\n"
-    "                mlock=on|off controls mlock support (default: on)\n",
-    QEMU_ARCH_ALL)
-SRST
-``-realtime mlock=on|off``
-    Run qemu with realtime features. mlocking qemu and guest memory can
-    be enabled via ``mlock=on`` (enabled by default).
-ERST
-
 DEF("overcommit", HAS_ARG, QEMU_OPTION_overcommit,
     "-overcommit [mem-lock=on|off][cpu-pm=on|off]\n"
     "                run qemu with overcommit hints\n"
@@ -3739,8 +4406,7 @@ SRST
 
     Locking qemu and guest memory can be enabled via ``mem-lock=on``
     (disabled by default). This works when host memory is not
-    overcommitted and reduces the worst-case latency for guest. This is
-    equivalent to ``realtime``.
+    overcommitted and reduces the worst-case latency for guest.
 
     Guest ability to manage power state of host cpus (increasing latency
     for other processes on the same host cpu, but decreasing latency for
@@ -3842,15 +4508,10 @@ SRST
     To list all the data directories, use ``-L help``.
 ERST
 
-DEF("bios", HAS_ARG, QEMU_OPTION_bios, \
-    "-bios file      set the filename for the BIOS\n", QEMU_ARCH_ALL)
-SRST
-``-bios file``
-    Set the filename for the BIOS.
-ERST
-
 DEF("enable-kvm", 0, QEMU_OPTION_enable_kvm, \
-    "-enable-kvm     enable KVM full virtualization support\n", QEMU_ARCH_ALL)
+    "-enable-kvm     enable KVM full virtualization support\n",
+    QEMU_ARCH_ARM | QEMU_ARCH_I386 | QEMU_ARCH_MIPS | QEMU_ARCH_PPC |
+    QEMU_ARCH_RISCV | QEMU_ARCH_S390X)
 SRST
 ``-enable-kvm``
     Enable KVM full virtualization support. This option is only
@@ -3858,16 +4519,17 @@ SRST
 ERST
 
 DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid,
-    "-xen-domid id   specify xen guest domain id\n", QEMU_ARCH_ALL)
+    "-xen-domid id   specify xen guest domain id\n",
+    QEMU_ARCH_ARM | QEMU_ARCH_I386)
 DEF("xen-attach", 0, QEMU_OPTION_xen_attach,
     "-xen-attach     attach to existing xen domain\n"
     "                libxl will use this when starting QEMU\n",
-    QEMU_ARCH_ALL)
+    QEMU_ARCH_ARM | QEMU_ARCH_I386)
 DEF("xen-domid-restrict", 0, QEMU_OPTION_xen_domid_restrict,
     "-xen-domid-restrict     restrict set of available xen operations\n"
     "                        to specified domain id. (Does not affect\n"
     "                        xenpv machine type).\n",
-    QEMU_ARCH_ALL)
+    QEMU_ARCH_ARM | QEMU_ARCH_I386)
 SRST
 ``-xen-domid id``
     Specify xen guest domain id (XEN only).
@@ -3894,6 +4556,31 @@ SRST
     changes to the disk image.
 ERST
 
+DEF("action", HAS_ARG, QEMU_OPTION_action,
+    "-action reboot=reset|shutdown\n"
+    "                   action when guest reboots [default=reset]\n"
+    "-action shutdown=poweroff|pause\n"
+    "                   action when guest shuts down [default=poweroff]\n"
+    "-action panic=pause|shutdown|exit-failure|none\n"
+    "                   action when guest panics [default=shutdown]\n"
+    "-action watchdog=reset|shutdown|poweroff|inject-nmi|pause|debug|none\n"
+    "                   action when watchdog fires [default=reset]\n",
+    QEMU_ARCH_ALL)
+SRST
+``-action event=action``
+    The action parameter serves to modify QEMU's default behavior when
+    certain guest events occur. It provides a generic method for specifying the
+    same behaviors that are modified by the ``-no-reboot`` and ``-no-shutdown``
+    parameters.
+
+    Examples:
+
+    ``-action panic=none``
+    ``-action reboot=shutdown,shutdown=pause``
+    ``-device i6300esb -action watchdog=pause``
+
+ERST
+
 DEF("loadvm", HAS_ARG, QEMU_OPTION_loadvm, \
     "-loadvm [tag|id]\n" \
     "                start right away with a saved state (loadvm in monitor)\n",
@@ -3958,30 +4645,34 @@ SRST
 ERST
 
 DEF("icount", HAS_ARG, QEMU_OPTION_icount, \
-    "-icount [shift=N|auto][,align=on|off][,sleep=on|off,rr=record|replay,rrfile=<filename>,rrsnapshot=<snapshot>]\n" \
+    "-icount [shift=N|auto][,align=on|off][,sleep=on|off][,rr=record|replay,rrfile=<filename>[,rrsnapshot=<snapshot>]]\n" \
     "                enable virtual instruction counter with 2^N clock ticks per\n" \
     "                instruction, enable aligning the host and virtual clocks\n" \
-    "                or disable real time cpu sleeping\n", QEMU_ARCH_ALL)
+    "                or disable real time cpu sleeping, and optionally enable\n" \
+    "                record-and-replay mode\n", QEMU_ARCH_ALL)
 SRST
-``-icount [shift=N|auto][,rr=record|replay,rrfile=filename,rrsnapshot=snapshot]``
+``-icount [shift=N|auto][,align=on|off][,sleep=on|off][,rr=record|replay,rrfile=filename[,rrsnapshot=snapshot]]``
     Enable virtual instruction counter. The virtual cpu will execute one
     instruction every 2^N ns of virtual time. If ``auto`` is specified
     then the virtual cpu speed will be automatically adjusted to keep
     virtual time within a few seconds of real time.
 
-    When the virtual cpu is sleeping, the virtual time will advance at
-    default speed unless ``sleep=on|off`` is specified. With
-    ``sleep=on|off``, the virtual time will jump to the next timer
-    deadline instantly whenever the virtual cpu goes to sleep mode and
-    will not advance if no timer is enabled. This behavior give
-    deterministic execution times from the guest point of view.
-
     Note that while this option can give deterministic behavior, it does
     not provide cycle accurate emulation. Modern CPUs contain
     superscalar out of order cores with complex cache hierarchies. The
     number of instructions executed often has little or no correlation
     with actual performance.
 
+    When the virtual cpu is sleeping, the virtual time will advance at
+    default speed unless ``sleep=on`` is specified. With
+    ``sleep=on``, the virtual time will jump to the next timer
+    deadline instantly whenever the virtual cpu goes to sleep mode and
+    will not advance if no timer is enabled. This behavior gives
+    deterministic execution times from the guest point of view.
+    The default if icount is enabled is ``sleep=off``.
+    ``sleep=on`` cannot be used together with either ``shift=auto``
+    or ``align=on``.
+
     ``align=on`` will activate the delay algorithm which will try to
     synchronise the host clock and the virtual clock. The goal is to
     have a guest running at the real frequency imposed by the shift
@@ -3991,44 +4682,17 @@ SRST
     ``shift`` is ``auto``. Note: The sync algorithm will work for those
     shift values for which the guest clock runs ahead of the host clock.
     Typically this happens when the shift value is high (how high
-    depends on the host machine).
-
-    When ``rr`` option is specified deterministic record/replay is
-    enabled. Replay log is written into filename file in record mode and
-    read from this file in replay mode.
-
-    Option rrsnapshot is used to create new vm snapshot named snapshot
-    at the start of execution recording. In replay mode this option is
-    used to load the initial VM state.
-ERST
-
-DEF("watchdog", HAS_ARG, QEMU_OPTION_watchdog, \
-    "-watchdog model\n" \
-    "                enable virtual hardware watchdog [default=none]\n",
-    QEMU_ARCH_ALL)
-SRST
-``-watchdog model``
-    Create a virtual hardware watchdog device. Once enabled (by a guest
-    action), the watchdog must be periodically polled by an agent inside
-    the guest or else the guest will be restarted. Choose a model for
-    which your guest has drivers.
-
-    The model is the model of hardware watchdog to emulate. Use
-    ``-watchdog help`` to list available hardware models. Only one
-    watchdog can be enabled for a guest.
-
-    The following models may be available:
-
-    ``ib700``
-        iBASE 700 is a very simple ISA watchdog with a single timer.
-
-    ``i6300esb``
-        Intel 6300ESB I/O controller hub is a much more featureful
-        PCI-based dual-timer watchdog.
-
-    ``diag288``
-        A virtual watchdog for s390x backed by the diagnose 288
-        hypercall (currently KVM only).
+    depends on the host machine). The default if icount is enabled
+    is ``align=off``.
+
+    When the ``rr`` option is specified deterministic record/replay is
+    enabled. The ``rrfile=`` option must also be provided to
+    specify the path to the replay log. In record mode data is written
+    to this file, and in replay mode it is read back.
+    If the ``rrsnapshot`` option is given then it specifies a VM snapshot
+    name. In record mode, a new VM snapshot with the given name is created
+    at the start of execution recording. In replay mode this option
+    specifies the snapshot name used to load the initial VM state.
 ERST
 
 DEF("watchdog-action", HAS_ARG, QEMU_OPTION_watchdog_action, \
@@ -4052,7 +4716,7 @@ SRST
 
     Examples:
 
-    ``-watchdog i6300esb -watchdog-action pause``; \ ``-watchdog ib700``
+    ``-device i6300esb -watchdog-action pause``
 
 ERST
 
@@ -4073,28 +4737,14 @@ SRST
 
 ERST
 
-DEF("show-cursor", 0, QEMU_OPTION_show_cursor, \
-    "-show-cursor    show cursor\n", QEMU_ARCH_ALL)
-SRST
-``-show-cursor``
-    Show cursor.
-ERST
-
-DEF("tb-size", HAS_ARG, QEMU_OPTION_tb_size, \
-    "-tb-size n      set TB size\n", QEMU_ARCH_ALL)
-SRST
-``-tb-size n``
-    Set TCG translation block cache size. Deprecated, use
-    '\ ``-accel tcg,tb-size=n``\ ' instead.
-ERST
-
 DEF("incoming", HAS_ARG, QEMU_OPTION_incoming, \
-    "-incoming tcp:[host]:port[,to=maxport][,ipv4][,ipv6]\n" \
-    "-incoming rdma:host:port[,ipv4][,ipv6]\n" \
+    "-incoming tcp:[host]:port[,to=maxport][,ipv4=on|off][,ipv6=on|off]\n" \
+    "-incoming rdma:host:port[,ipv4=on|off][,ipv6=on|off]\n" \
     "-incoming unix:socketpath\n" \
     "                prepare for incoming migration, listen on\n" \
     "                specified protocol and socket address\n" \
     "-incoming fd:fd\n" \
+    "-incoming file:filename[,offset=offset]\n" \
     "-incoming exec:cmdline\n" \
     "                accept incoming migration on given file descriptor\n" \
     "                or from given external command\n" \
@@ -4102,16 +4752,20 @@ DEF("incoming", HAS_ARG, QEMU_OPTION_incoming, \
     "                wait for the URI to be specified via migrate_incoming\n",
     QEMU_ARCH_ALL)
 SRST
-``-incoming tcp:[host]:port[,to=maxport][,ipv4][,ipv6]``
+``-incoming tcp:[host]:port[,to=maxport][,ipv4=on|off][,ipv6=on|off]``
   \ 
-``-incoming rdma:host:port[,ipv4][,ipv6]``
+``-incoming rdma:host:port[,ipv4=on|off][,ipv6=on|off]``
     Prepare for incoming migration, listen on a given tcp port.
 
 ``-incoming unix:socketpath``
     Prepare for incoming migration, listen on a given unix socket.
 
 ``-incoming fd:fd``
-    Accept incoming migration from a given filedescriptor.
+    Accept incoming migration from a given file descriptor.
+
+``-incoming file:filename[,offset=offset]``
+    Accept incoming migration from a given file starting at offset.
+    offset allows the common size suffixes, or a 0x prefix, but not both.
 
 ``-incoming exec:cmdline``
     Accept incoming migration as an output from specified external
@@ -4143,11 +4797,12 @@ ERST
 
 #ifndef _WIN32
 DEF("chroot", HAS_ARG, QEMU_OPTION_chroot, \
-    "-chroot dir     chroot to dir just before starting the VM\n",
+    "-chroot dir     chroot to dir just before starting the VM (deprecated)\n",
     QEMU_ARCH_ALL)
 #endif
 SRST
 ``-chroot dir``
+    Deprecated, use '-run-with chroot=...' instead.
     Immediately before starting guest execution, chroot to the specified
     directory. Especially useful in combination with -runas.
 ERST
@@ -4185,39 +4840,32 @@ SRST
 ERST
 DEF("semihosting", 0, QEMU_OPTION_semihosting,
     "-semihosting    semihosting mode\n",
-    QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32 |
-    QEMU_ARCH_MIPS | QEMU_ARCH_NIOS2)
+    QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA |
+    QEMU_ARCH_MIPS | QEMU_ARCH_NIOS2 | QEMU_ARCH_RISCV)
 SRST
 ``-semihosting``
-    Enable semihosting mode (ARM, M68K, Xtensa, MIPS, Nios II only).
+    Enable :ref:`Semihosting` mode (ARM, M68K, Xtensa, MIPS, Nios II, RISC-V only).
 
-    Note that this allows guest direct access to the host filesystem, so
-    should only be used with a trusted guest OS.
+    .. warning::
+      Note that this allows guest direct access to the host filesystem, so
+      should only be used with a trusted guest OS.
 
     See the -semihosting-config option documentation for further
     information about the facilities this enables.
 ERST
 DEF("semihosting-config", HAS_ARG, QEMU_OPTION_semihosting_config,
-    "-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,arg=str[,...]]\n" \
+    "-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,arg=str[,...]]\n" \
     "                semihosting configuration\n",
-QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32 |
-QEMU_ARCH_MIPS | QEMU_ARCH_NIOS2)
+QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA |
+QEMU_ARCH_MIPS | QEMU_ARCH_NIOS2 | QEMU_ARCH_RISCV)
 SRST
-``-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,arg=str[,...]]``
-    Enable and configure semihosting (ARM, M68K, Xtensa, MIPS, Nios II
+``-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,arg=str[,...]]``
+    Enable and configure :ref:`Semihosting` (ARM, M68K, Xtensa, MIPS, Nios II, RISC-V
     only).
 
-    Note that this allows guest direct access to the host filesystem, so
-    should only be used with a trusted guest OS.
-
-    On Arm this implements the standard semihosting API, version 2.0.
-
-    On M68K this implements the "ColdFire GDB" interface used by
-    libgloss.
-
-    Xtensa semihosting provides basic file IO calls, such as
-    open/read/write/seek/select. Tensilica baremetal libc for ISS and
-    linux platform "sim" use this interface.
+    .. warning::
+      Note that this allows guest direct access to the host filesystem, so
+      should only be used with a trusted guest OS.
 
     ``target=native|gdb|auto``
         Defines where the semihosting calls will be addressed, to QEMU
@@ -4228,6 +4876,13 @@ SRST
         Send the output to a chardev backend output for native or auto
         output when not in gdb
 
+    ``userspace=on|off``
+        Allows code running in guest userspace to access the semihosting
+        interface. The default is that only privileged guest code can
+        make semihosting calls. Note that setting ``userspace=on`` should
+        only be used if all guest code is trusted (for example, in
+        bare-metal test case code).
+
     ``arg=str1,arg=str2,...``
         Allows the user to pass input arguments, and can be used
         multiple times to build up a list. The old-style
@@ -4251,12 +4906,12 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
     "                use 'obsolete' to allow obsolete system calls that are provided\n" \
     "                    by the kernel, but typically no longer used by modern\n" \
     "                    C library implementations.\n" \
-    "                use 'elevateprivileges' to allow or deny QEMU process to elevate\n" \
-    "                    its privileges by blacklisting all set*uid|gid system calls.\n" \
+    "                use 'elevateprivileges' to allow or deny the QEMU process ability\n" \
+    "                    to elevate privileges using set*uid|gid system calls.\n" \
     "                    The value 'children' will deny set*uid|gid system calls for\n" \
     "                    main QEMU process but will allow forks and execves to run unprivileged\n" \
     "                use 'spawn' to avoid QEMU to spawn new threads or processes by\n" \
-    "                     blacklisting *fork and execve\n" \
+    "                     blocking *fork and execve\n" \
     "                use 'resourcecontrol' to disable process affinity and schedular priority\n",
     QEMU_ARCH_ALL)
 SRST
@@ -4278,23 +4933,14 @@ SRST
 ERST
 
 DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig,
-    "-readconfig <file>\n", QEMU_ARCH_ALL)
+    "-readconfig <file>\n"
+    "                read config file\n", QEMU_ARCH_ALL)
 SRST
 ``-readconfig file``
     Read device configuration from file. This approach is useful when
     you want to spawn QEMU process with many command line options but
     you don't want to exceed the command line character limit.
 ERST
-DEF("writeconfig", HAS_ARG, QEMU_OPTION_writeconfig,
-    "-writeconfig <file>\n"
-    "                read/write config file\n", QEMU_ARCH_ALL)
-SRST
-``-writeconfig file``
-    Write device configuration to file. The file can be either filename
-    to save command line and device configuration into file or dash
-    ``-``) character to print the output to stdout. This can be later
-    used as input file for ``-readconfig`` option.
-ERST
 
 DEF("no-user-config", 0, QEMU_OPTION_nouserconfig,
     "-no-user-config\n"
@@ -4316,19 +4962,18 @@ SRST
 
 ERST
 DEF("plugin", HAS_ARG, QEMU_OPTION_plugin,
-    "-plugin [file=]<file>[,arg=<string>]\n"
+    "-plugin [file=]<file>[,<argname>=<argvalue>]\n"
     "                load a plugin\n",
     QEMU_ARCH_ALL)
 SRST
-``-plugin file=file[,arg=string]``
+``-plugin file=file[,argname=argvalue]``
     Load a plugin.
 
     ``file=file``
         Load the given plugin from a shared library file.
 
-    ``arg=string``
-        Argument string passed to the plugin. (Can be given multiple
-        times.)
+    ``argname=argvalue``
+        Argument passed to the plugin. (Can be given multiple times.)
 ERST
 
 HXCOMM Internal use
@@ -4336,14 +4981,42 @@ DEF("qtest", HAS_ARG, QEMU_OPTION_qtest, "", QEMU_ARCH_ALL)
 DEF("qtest-log", HAS_ARG, QEMU_OPTION_qtest_log, "", QEMU_ARCH_ALL)
 
 #ifdef __linux__
-DEF("enable-fips", 0, QEMU_OPTION_enablefips,
-    "-enable-fips    enable FIPS 140-2 compliance\n",
+DEF("async-teardown", 0, QEMU_OPTION_asyncteardown,
+    "-async-teardown enable asynchronous teardown\n",
     QEMU_ARCH_ALL)
+SRST
+``-async-teardown``
+    This option is deprecated and should no longer be used. The new option
+    ``-run-with async-teardown=on`` is a replacement.
+ERST
 #endif
+#ifdef CONFIG_POSIX
+DEF("run-with", HAS_ARG, QEMU_OPTION_run_with,
+    "-run-with [async-teardown=on|off][,chroot=dir]\n"
+    "                Set miscellaneous QEMU process lifecycle options:\n"
+    "                async-teardown=on enables asynchronous teardown (Linux only)\n"
+    "                chroot=dir chroot to dir just before starting the VM\n",
+    QEMU_ARCH_ALL)
 SRST
-``-enable-fips``
-    Enable FIPS 140-2 compliance mode.
+``-run-with [async-teardown=on|off][,chroot=dir]``
+    Set QEMU process lifecycle options.
+
+    ``async-teardown=on`` enables asynchronous teardown. A new process called
+    "cleanup/<QEMU_PID>" will be created at startup sharing the address
+    space with the main QEMU process, using clone. It will wait for the
+    main QEMU process to terminate completely, and then exit. This allows
+    QEMU to terminate very quickly even if the guest was huge, leaving the
+    teardown of the address space to the cleanup process. Since the cleanup
+    process shares the same cgroups as the main QEMU process, accounting is
+    performed correctly. This only works if the cleanup process is not
+    forcefully killed with SIGKILL before the main QEMU process has
+    terminated completely.
+
+    ``chroot=dir`` can be used for doing a chroot to the specified directory
+    immediately before starting the guest execution. This is especially useful
+    in combination with -runas.
 ERST
+#endif
 
 DEF("msg", HAS_ARG, QEMU_OPTION_msg,
     "-msg [timestamp[=on|off]][,guest-name=[on|off]]\n"
@@ -4386,6 +5059,26 @@ SRST
     Enable synchronization profiling.
 ERST
 
+#if defined(CONFIG_TCG) && defined(CONFIG_LINUX)
+DEF("perfmap", 0, QEMU_OPTION_perfmap,
+    "-perfmap        generate a /tmp/perf-${pid}.map file for perf\n",
+    QEMU_ARCH_ALL)
+SRST
+``-perfmap``
+    Generate a map file for Linux perf tools that will allow basic profiling
+    information to be broken down into basic blocks.
+ERST
+
+DEF("jitdump", 0, QEMU_OPTION_jitdump,
+    "-jitdump        generate a jit-${pid}.dump file for perf\n",
+    QEMU_ARCH_ALL)
+SRST
+``-jitdump``
+    Generate a dump file for Linux perf tools that maps basic blocks to symbol
+    names, line numbers and JITted code.
+ERST
+#endif
+
 DEFHEADING()
 
 DEFHEADING(Generic object creation:)
@@ -4403,16 +5096,16 @@ SRST
     they are specified. Note that the 'id' property must be set. These
     objects are placed in the '/objects' path.
 
-    ``-object memory-backend-file,id=id,size=size,mem-path=dir,share=on|off,discard-data=on|off,merge=on|off,dump=on|off,prealloc=on|off,host-nodes=host-nodes,policy=default|preferred|bind|interleave,align=align``
+    ``-object memory-backend-file,id=id,size=size,mem-path=dir,share=on|off,discard-data=on|off,merge=on|off,dump=on|off,prealloc=on|off,host-nodes=host-nodes,policy=default|preferred|bind|interleave,align=align,offset=offset,readonly=on|off,rom=on|off|auto``
         Creates a memory file backend object, which can be used to back
         the guest RAM with huge pages.
 
         The ``id`` parameter is a unique ID that will be used to
-        reference this memory region when configuring the ``-numa``
-        argument.
+        reference this memory region in other parameters, e.g. ``-numa``,
+        ``-device nvdimm``, etc.
 
         The ``size`` option provides the size of the memory region, and
-        accepts common suffixes, eg ``500M``.
+        accepts common suffixes, e.g. ``500M``.
 
         The ``mem-path`` provides the path to either a shared memory or
         huge page filesystem mount.
@@ -4473,6 +5166,10 @@ SRST
         such cases, users can specify the required alignment via this
         option.
 
+        The ``offset`` option specifies the offset into the target file
+        that the region starts at. You can use this parameter to back
+        multiple regions with a single file.
+
         The ``pmem`` option specifies whether the backing file specified
         by ``mem-path`` is in host persistent memory that can be
         accessed using the SNIA NVM programming model (e.g. Intel
@@ -4486,6 +5183,23 @@ SRST
         4.15) and the filesystem of ``mem-path`` mounted with DAX
         option.
 
+        The ``readonly`` option specifies whether the backing file is opened
+        read-only or read-write (default).
+
+        The ``rom`` option specifies whether to create Read Only Memory
+        (ROM) that cannot be modified by the VM. Any write attempts to such
+        ROM will be denied. Most use cases want proper RAM instead of ROM.
+        However, selected use cases, like R/O NVDIMMs, can benefit from
+        ROM. If set to ``on``, create ROM; if set to ``off``, create
+        writable RAM; if set to ``auto`` (default), the value of the
+        ``readonly`` option is used. This option is primarily helpful when
+        we want to have writable RAM in configurations that would
+        traditionally create ROM before the ``rom`` option was introduced:
+        VM templating, where we want to open a file readonly
+        (``readonly=on``) and mark the memory to be private for QEMU
+        (``share=off``). For this use case, we need writable RAM instead
+        of ROM, and want to also set ``rom=off``.
+
     ``-object memory-backend-ram,id=id,merge=on|off,dump=on|off,share=on|off,prealloc=on|off,size=size,host-nodes=host-nodes,policy=default|preferred|bind|interleave``
         Creates a memory backend object, which can be used to back the
         guest RAM. Memory backend objects offer more control than the
@@ -4765,11 +5479,11 @@ SRST
             primary:
             -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
             -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
-            -chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
-            -chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
-            -chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
+            -chardev socket,id=mirror0,host=3.3.3.3,port=9003,server=on,wait=off
+            -chardev socket,id=compare1,host=3.3.3.3,port=9004,server=on,wait=off
+            -chardev socket,id=compare0,host=3.3.3.3,port=9001,server=on,wait=off
             -chardev socket,id=compare0-0,host=3.3.3.3,port=9001
-            -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
+            -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server=on,wait=off
             -chardev socket,id=compare_out0,host=3.3.3.3,port=9005
             -object iothread,id=iothread1
             -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0
@@ -4791,13 +5505,13 @@ SRST
             primary:
             -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
             -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
-            -chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
-            -chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
-            -chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
+            -chardev socket,id=mirror0,host=3.3.3.3,port=9003,server=on,wait=off
+            -chardev socket,id=compare1,host=3.3.3.3,port=9004,server=on,wait=off
+            -chardev socket,id=compare0,host=3.3.3.3,port=9001,server=on,wait=off
             -chardev socket,id=compare0-0,host=3.3.3.3,port=9001
-            -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
+            -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server=on,wait=off
             -chardev socket,id=compare_out0,host=3.3.3.3,port=9005
-            -chardev socket,id=notify_way,host=3.3.3.3,port=9009,server,nowait
+            -chardev socket,id=notify_way,host=3.3.3.3,port=9009,server=on,wait=off
             -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0
             -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out
             -object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0
@@ -4816,8 +5530,8 @@ SRST
         read the colo-compare git log.
 
     ``-object cryptodev-backend-builtin,id=id[,queues=queues]``
-        Creates a cryptodev backend which executes crypto opreation from
-        the QEMU cipher APIS. The id parameter is a unique ID that will
+        Creates a cryptodev backend which executes crypto operations from
+        the QEMU cipher APIs. The id parameter is a unique ID that will
         be used to reference this cryptodev backend from the
         ``virtio-crypto`` device. The queues parameter is optional,
         which specify the queue number of cryptodev backend, the default
@@ -4931,7 +5645,7 @@ SRST
                  -object secret,id=sec0,keyid=secmaster0,format=base64,\\
                      data=$SECRET,iv=$(<iv.b64)
 
-    ``-object sev-guest,id=id,cbitpos=cbitpos,reduced-phys-bits=val,[sev-device=string,policy=policy,handle=handle,dh-cert-file=file,session-file=file]``
+    ``-object sev-guest,id=id,cbitpos=cbitpos,reduced-phys-bits=val,[sev-device=string,policy=policy,handle=handle,dh-cert-file=file,session-file=file,kernel-hashes=on|off]``
         Create a Secure Encrypted Virtualization (SEV) guest object,
         which can be used to provide the guest memory encryption support
         on AMD processors.
@@ -4946,7 +5660,7 @@ SRST
         physical address space. The ``reduced-phys-bits`` is used to
         provide the number of bits we loose in physical address space.
         Similar to C-bit, the value is Host family dependent. On EPYC,
-        the value should be 5.
+        a guest will lose a maximum of 1 bit, so the value should be 1.
 
         The ``sev-device`` provides the device file to use for
         communicating with the SEV firmware running inside AMD Secure
@@ -4971,13 +5685,17 @@ SRST
         session with the guest owner to negotiate keys used for
         attestation. The file must be encoded in base64.
 
+        The ``kernel-hashes`` adds the hashes of given kernel/initrd/
+        cmdline to a designated guest firmware page for measured Linux
+        boot with -kernel. The default is off. (Since 6.2)
+
         e.g to launch a SEV guest
 
         .. parsed-literal::
 
              # |qemu_system_x86| \\
                  ...... \\
-                 -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=5 \\
+                 -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 \\
                  -machine ...,memory-encryption=sev0 \\
                  .....
 
@@ -5004,7 +5722,7 @@ SRST
         Note the use of quotes due to the x509 distinguished name
         containing whitespace, and escaping of ','.
 
-    ``-object authz-listfile,id=id,filename=path,refresh=yes|no``
+    ``-object authz-listfile,id=id,filename=path,refresh=on|off``
         Create an authorization object that will control access to
         network services.
 
@@ -5049,7 +5767,7 @@ SRST
 
              # |qemu_system| \\
                  ... \\
-                 -object authz-simple,id=auth0,filename=/etc/qemu/vnc-sasl.acl,refresh=yes \\
+                 -object authz-simple,id=auth0,filename=/etc/qemu/vnc-sasl.acl,refresh=on \\
                  ...
 
     ``-object authz-pam,id=id,service=string``
@@ -5080,13 +5798,13 @@ SRST
                        file=/etc/qemu/vnc.allow
 
         Finally the ``/etc/qemu/vnc.allow`` file would contain the list
-        of x509 distingished names that are permitted access
+        of x509 distinguished names that are permitted access
 
         ::
 
             CN=laptop.example.com,O=Example Home,L=London,ST=London,C=GB
 
-    ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink``
+    ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink,aio-max-batch=aio-max-batch``
         Creates a dedicated event loop thread that devices can be
         assigned to. This is known as an IOThread. By default device
         emulation happens in vCPU threads or the main event loop thread.
@@ -5122,7 +5840,11 @@ SRST
         the polling time when the algorithm detects it is spending too
         long polling without encountering events.
 
-        The polling parameters can be modified at run-time using the
+        The ``aio-max-batch`` parameter is the maximum number of requests
+        in a batch for the AIO engine, 0 means that the engine will use
+        its default.
+
+        The IOThread parameters can be modified at run-time using the
         ``qom-set`` command (where ``iothread1`` is the IOThread's
         ``id``):
 
@@ -5133,3 +5855,7 @@ ERST
 
 
 HXCOMM This is the last statement. Insert new options before this line!
+
+#undef DEF
+#undef DEFHEADING
+#undef ARCHHEADING
index 1487cc5dd8b1cfbd50b9f354bcfa19a056484e43..4a83bda2c3be54994ae6a704b20b992ac610a7c6 100644 (file)
@@ -251,12 +251,12 @@ void qdict_array_split(QDict *src, QList **dst)
         if (is_subqdict) {
             qdict_extract_subqdict(src, &subqdict, prefix);
             assert(qdict_size(subqdict) > 0);
+            qlist_append_obj(*dst, QOBJECT(subqdict));
         } else {
             qobject_ref(subqobj);
             qdict_del(src, indexstr);
+            qlist_append_obj(*dst, subqobj);
         }
-
-        qlist_append_obj(*dst, subqobj ?: QOBJECT(subqdict));
     }
 }
 
index 632320d72d5de5950b3482f492a1e668f6d406c9..51341d96e49e99b504db97b3507ff15ed98b8597 100644 (file)
@@ -139,7 +139,7 @@ static const uint8_t json_lexer[][256] =  {
          * bytes '\xFE', '\xFF'.  Structural characters and line
          * endings are promising resynchronization points.  Clients
          * may use the others to force the JSON parser into known-good
-         * state; see docs/interop/qmp-spec.txt.
+         * state; see docs/interop/qmp-spec.rst.
          */
         [0 ... 0x1F] = IN_START | LOOKAHEAD,
         [0x20 ... 0xFD] = IN_RECOVERY,
index c0f521b56b108554d642b9a1c7d224a6d2b1d83b..d498db6e7027e644521d6dbf10597a2f98efd476 100644 (file)
@@ -31,8 +31,7 @@ struct JSONToken {
     char str[];
 };
 
-typedef struct JSONParserContext
-{
+typedef struct JSONParserContext {
     Error *err;
     JSONToken *current;
     GQueue *buf;
@@ -55,7 +54,7 @@ static QObject *parse_value(JSONParserContext *ctxt);
 /**
  * Error handler
  */
-static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
+static void G_GNUC_PRINTF(3, 4) parse_error(JSONParserContext *ctxt,
                                            JSONToken *token, const char *msg, ...)
 {
     va_list ap;
@@ -130,7 +129,7 @@ static int cvt4hex(const char *s)
 static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
 {
     const char *ptr = token->str;
-    QString *str;
+    GString *str;
     char quote;
     const char *beg;
     int cp, trailing;
@@ -140,7 +139,7 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
 
     assert(*ptr == '"' || *ptr == '\'');
     quote = *ptr++;
-    str = qstring_new();
+    str = g_string_new(NULL);
 
     while (*ptr != quote) {
         assert(*ptr);
@@ -149,31 +148,31 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
             beg = ptr++;
             switch (*ptr++) {
             case '"':
-                qstring_append_chr(str, '"');
+                g_string_append_c(str, '"');
                 break;
             case '\'':
-                qstring_append_chr(str, '\'');
+                g_string_append_c(str, '\'');
                 break;
             case '\\':
-                qstring_append_chr(str, '\\');
+                g_string_append_c(str, '\\');
                 break;
             case '/':
-                qstring_append_chr(str, '/');
+                g_string_append_c(str, '/');
                 break;
             case 'b':
-                qstring_append_chr(str, '\b');
+                g_string_append_c(str, '\b');
                 break;
             case 'f':
-                qstring_append_chr(str, '\f');
+                g_string_append_c(str, '\f');
                 break;
             case 'n':
-                qstring_append_chr(str, '\n');
+                g_string_append_c(str, '\n');
                 break;
             case 'r':
-                qstring_append_chr(str, '\r');
+                g_string_append_c(str, '\r');
                 break;
             case 't':
-                qstring_append_chr(str, '\t');
+                g_string_append_c(str, '\t');
                 break;
             case 'u':
                 cp = cvt4hex(ptr);
@@ -200,7 +199,7 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
                                 (int)(ptr - beg), beg);
                     goto out;
                 }
-                qstring_append(str, utf8_buf);
+                g_string_append(str, utf8_buf);
                 break;
             default:
                 parse_error(ctxt, token, "invalid escape sequence in string");
@@ -225,14 +224,14 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
             ptr = end;
             len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
             assert(len >= 0);
-            qstring_append(str, utf8_buf);
+            g_string_append(str, utf8_buf);
         }
     }
 
-    return str;
+    return qstring_from_gstring(str);
 
 out:
-    qobject_unref(str);
+    g_string_free(str, true);
     return NULL;
 }
 
diff --git a/qobject/json-writer.c b/qobject/json-writer.c
new file mode 100644 (file)
index 0000000..309a31d
--- /dev/null
@@ -0,0 +1,247 @@
+/*
+ * JSON Writer
+ *
+ * Copyright IBM, Corp. 2009
+ * Copyright (c) 2010-2020 Red Hat Inc.
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Markus Armbruster <armbru@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qmp/json-writer.h"
+#include "qemu/unicode.h"
+
+struct JSONWriter {
+    bool pretty;
+    bool need_comma;
+    GString *contents;
+    GByteArray *container_is_array;
+};
+
+JSONWriter *json_writer_new(bool pretty)
+{
+    JSONWriter *writer = g_new(JSONWriter, 1);
+
+    writer->pretty = pretty;
+    writer->need_comma = false;
+    writer->contents = g_string_new(NULL);
+    writer->container_is_array = g_byte_array_new();
+    return writer;
+}
+
+const char *json_writer_get(JSONWriter *writer)
+{
+    g_assert(!writer->container_is_array->len);
+    return writer->contents->str;
+}
+
+GString *json_writer_get_and_free(JSONWriter *writer)
+{
+    GString *contents = writer->contents;
+
+    writer->contents = NULL;
+    g_byte_array_free(writer->container_is_array, true);
+    g_free(writer);
+    return contents;
+}
+
+void json_writer_free(JSONWriter *writer)
+{
+    if (writer) {
+        g_string_free(json_writer_get_and_free(writer), true);
+    }
+}
+
+static void enter_container(JSONWriter *writer, bool is_array)
+{
+    unsigned depth = writer->container_is_array->len;
+
+    g_byte_array_set_size(writer->container_is_array, depth + 1);
+    writer->container_is_array->data[depth] = is_array;
+    writer->need_comma = false;
+}
+
+static void leave_container(JSONWriter *writer, bool is_array)
+{
+    unsigned depth = writer->container_is_array->len;
+
+    assert(depth);
+    assert(writer->container_is_array->data[depth - 1] == is_array);
+    g_byte_array_set_size(writer->container_is_array, depth - 1);
+    writer->need_comma = true;
+}
+
+static bool in_object(JSONWriter *writer)
+{
+    unsigned depth = writer->container_is_array->len;
+
+    return depth && !writer->container_is_array->data[depth - 1];
+}
+
+static void pretty_newline(JSONWriter *writer)
+{
+    if (writer->pretty) {
+        g_string_append_printf(writer->contents, "\n%*s",
+                               writer->container_is_array->len * 4, "");
+    }
+}
+
+static void pretty_newline_or_space(JSONWriter *writer)
+{
+    if (writer->pretty) {
+        g_string_append_printf(writer->contents, "\n%*s",
+                               writer->container_is_array->len * 4, "");
+    } else {
+        g_string_append_c(writer->contents, ' ');
+    }
+}
+
+static void quoted_str(JSONWriter *writer, const char *str)
+{
+    const char *ptr;
+    char *end;
+    int cp;
+
+    g_string_append_c(writer->contents, '"');
+
+    for (ptr = str; *ptr; ptr = end) {
+        cp = mod_utf8_codepoint(ptr, 6, &end);
+        switch (cp) {
+        case '\"':
+            g_string_append(writer->contents, "\\\"");
+            break;
+        case '\\':
+            g_string_append(writer->contents, "\\\\");
+            break;
+        case '\b':
+            g_string_append(writer->contents, "\\b");
+            break;
+        case '\f':
+            g_string_append(writer->contents, "\\f");
+            break;
+        case '\n':
+            g_string_append(writer->contents, "\\n");
+            break;
+        case '\r':
+            g_string_append(writer->contents, "\\r");
+            break;
+        case '\t':
+            g_string_append(writer->contents, "\\t");
+            break;
+        default:
+            if (cp < 0) {
+                cp = 0xFFFD; /* replacement character */
+            }
+            if (cp > 0xFFFF) {
+                /* beyond BMP; need a surrogate pair */
+                g_string_append_printf(writer->contents, "\\u%04X\\u%04X",
+                                       0xD800 + ((cp - 0x10000) >> 10),
+                                       0xDC00 + ((cp - 0x10000) & 0x3FF));
+            } else if (cp < 0x20 || cp >= 0x7F) {
+                g_string_append_printf(writer->contents, "\\u%04X", cp);
+            } else {
+                g_string_append_c(writer->contents, cp);
+            }
+        }
+    };
+
+    g_string_append_c(writer->contents, '"');
+}
+
+static void maybe_comma_name(JSONWriter *writer, const char *name)
+{
+    if (writer->need_comma) {
+        g_string_append_c(writer->contents, ',');
+        pretty_newline_or_space(writer);
+    } else {
+        if (writer->contents->len) {
+            pretty_newline(writer);
+        }
+        writer->need_comma = true;
+    }
+
+    if (in_object(writer)) {
+        quoted_str(writer, name);
+        g_string_append(writer->contents, ": ");
+    }
+}
+
+void json_writer_start_object(JSONWriter *writer, const char *name)
+{
+    maybe_comma_name(writer, name);
+    g_string_append_c(writer->contents, '{');
+    enter_container(writer, false);
+}
+
+void json_writer_end_object(JSONWriter *writer)
+{
+    leave_container(writer, false);
+    pretty_newline(writer);
+    g_string_append_c(writer->contents, '}');
+}
+
+void json_writer_start_array(JSONWriter *writer, const char *name)
+{
+    maybe_comma_name(writer, name);
+    g_string_append_c(writer->contents, '[');
+    enter_container(writer, true);
+}
+
+void json_writer_end_array(JSONWriter *writer)
+{
+    leave_container(writer, true);
+    pretty_newline(writer);
+    g_string_append_c(writer->contents, ']');
+}
+
+void json_writer_bool(JSONWriter *writer, const char *name, bool val)
+{
+    maybe_comma_name(writer, name);
+    g_string_append(writer->contents, val ? "true" : "false");
+}
+
+void json_writer_null(JSONWriter *writer, const char *name)
+{
+    maybe_comma_name(writer, name);
+    g_string_append(writer->contents, "null");
+}
+
+void json_writer_int64(JSONWriter *writer, const char *name, int64_t val)
+{
+    maybe_comma_name(writer, name);
+    g_string_append_printf(writer->contents, "%" PRId64, val);
+}
+
+void json_writer_uint64(JSONWriter *writer, const char *name, uint64_t val)
+{
+    maybe_comma_name(writer, name);
+    g_string_append_printf(writer->contents, "%" PRIu64, val);
+}
+
+void json_writer_double(JSONWriter *writer, const char *name, double val)
+{
+    maybe_comma_name(writer, name);
+
+    /*
+     * FIXME: g_string_append_printf() is locale dependent; but JSON
+     * requires numbers to be formatted as if in the C locale.
+     * Dependence on C locale is a pervasive issue in QEMU.
+     */
+    /*
+     * FIXME: This risks printing Inf or NaN, which are not valid
+     * JSON values.
+     */
+    g_string_append_printf(writer->contents, "%.17g", val);
+}
+
+void json_writer_str(JSONWriter *writer, const char *name, const char *str)
+{
+    maybe_comma_name(writer, name);
+    quoted_str(writer, str);
+}
index bb63c06b638cc5d0f789230b6a1e1cc56c416417..4683a852a28a18b75318af49dac67d3f8c5d851e 100644 (file)
@@ -1,3 +1,4 @@
-util_ss.add(files('qnull.c', 'qnum.c', 'qstring.c', 'qdict.c', 'qlist.c', 'qbool.c',
-  'qlit.c', 'qjson.c', 'qobject.c', 'json-lexer.c', 'json-streamer.c', 'json-parser.c',
+util_ss.add(files('qnull.c', 'qnum.c', 'qstring.c', 'qdict.c',
+  'qlist.c', 'qbool.c', 'qlit.c', 'qjson.c', 'qobject.c',
+  'json-writer.c', 'json-lexer.c', 'json-streamer.c', 'json-parser.c',
   'block-qdict.c'))
index 06dfc43498117c2e8c5a59fc36289b68ac90079f..c7049c0c501b0eecd95a56177e6ae5695c95ba59 100644 (file)
@@ -13,6 +13,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/qmp/qbool.h"
+#include "qobject-internal.h"
 
 /**
  * qbool_from_bool(): Create a new QBool from a bool
@@ -55,3 +56,8 @@ void qbool_destroy_obj(QObject *obj)
     assert(obj != NULL);
     g_free(qobject_to(QBool, obj));
 }
+
+void qbool_unref(QBool *q)
+{
+    qobject_unref(q);
+}
index 1079bd3f6f68844e24a28b9fd35ed5fd00c75580..8faff230d39152fb42894266a8d49fa5a612ac3c 100644 (file)
@@ -16,6 +16,7 @@
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qnull.h"
 #include "qapi/qmp/qstring.h"
+#include "qobject-internal.h"
 
 /**
  * qdict_new(): Create a new QDict
@@ -38,12 +39,13 @@ QDict *qdict_new(void)
  */
 static unsigned int tdb_hash(const char *name)
 {
-    unsigned value;    /* Used to compute the hash value.  */
-    unsigned   i;      /* Used to cycle through random values. */
+    unsigned value;    /* Used to compute the hash value.  */
+    unsigned   i;      /* Used to cycle through random values. */
 
     /* Set the initial value from the key size. */
-    for (value = 0x238F13AF * strlen(name), i=0; name[i]; i++)
-        value = (value + (((const unsigned char *)name)[i] << (i*5 % 24)));
+    for (value = 0x238F13AF * strlen(name), i = 0; name[i]; i++) {
+        value = (value + (((const unsigned char *)name)[i] << (i * 5 % 24)));
+    }
 
     return (1103515243 * value + 12345);
 }
@@ -92,8 +94,9 @@ static QDictEntry *qdict_find(const QDict *qdict,
     QDictEntry *entry;
 
     QLIST_FOREACH(entry, &qdict->table[bucket], next)
-        if (!strcmp(entry->key, key))
+        if (!strcmp(entry->key, key)) {
             return entry;
+        }
 
     return NULL;
 }
@@ -439,3 +442,8 @@ void qdict_destroy_obj(QObject *obj)
 
     g_free(qdict);
 }
+
+void qdict_unref(QDict *q)
+{
+    qobject_unref(q);
+}
index f1f2c6970457d3b0eac909b493bdf5c87ec9778f..167fcb429c90bd7ed78b6f3bf71218ac35f02b4c 100644 (file)
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/qmp/json-parser.h"
+#include "qapi/qmp/json-writer.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
 #include "qapi/qmp/qnum.h"
 #include "qapi/qmp/qstring.h"
-#include "qemu/unicode.h"
 
-typedef struct JSONParsingState
-{
+typedef struct JSONParsingState {
     JSONMessageParser parser;
     QObject *result;
     Error *err;
@@ -149,144 +148,69 @@ QDict *qdict_from_jsonf_nofail(const char *string, ...)
     return qdict;
 }
 
-static void to_json(const QObject *obj, QString *str, int pretty, int indent);
-
-static void json_pretty_newline(QString *str, bool pretty, int indent)
-{
-    int i;
-
-    if (pretty) {
-        qstring_append(str, "\n");
-        for (i = 0; i < indent; i++) {
-            qstring_append(str, "    ");
-        }
-    }
-}
-
-static void to_json(const QObject *obj, QString *str, int pretty, int indent)
+static void to_json(JSONWriter *writer, const char *name,
+                    const QObject *obj)
 {
     switch (qobject_type(obj)) {
     case QTYPE_QNULL:
-        qstring_append(str, "null");
+        json_writer_null(writer, name);
         break;
     case QTYPE_QNUM: {
         QNum *val = qobject_to(QNum, obj);
-        char *buffer = qnum_to_string(val);
-        qstring_append(str, buffer);
-        g_free(buffer);
+
+        switch (val->kind) {
+        case QNUM_I64:
+            json_writer_int64(writer, name, val->u.i64);
+            break;
+        case QNUM_U64:
+            json_writer_uint64(writer, name, val->u.u64);
+            break;
+        case QNUM_DOUBLE:
+            json_writer_double(writer, name, val->u.dbl);
+            break;
+        default:
+            abort();
+        }
         break;
     }
     case QTYPE_QSTRING: {
         QString *val = qobject_to(QString, obj);
-        const char *ptr;
-        int cp;
-        char buf[16];
-        char *end;
-
-        ptr = qstring_get_str(val);
-        qstring_append(str, "\"");
-
-        for (; *ptr; ptr = end) {
-            cp = mod_utf8_codepoint(ptr, 6, &end);
-            switch (cp) {
-            case '\"':
-                qstring_append(str, "\\\"");
-                break;
-            case '\\':
-                qstring_append(str, "\\\\");
-                break;
-            case '\b':
-                qstring_append(str, "\\b");
-                break;
-            case '\f':
-                qstring_append(str, "\\f");
-                break;
-            case '\n':
-                qstring_append(str, "\\n");
-                break;
-            case '\r':
-                qstring_append(str, "\\r");
-                break;
-            case '\t':
-                qstring_append(str, "\\t");
-                break;
-            default:
-                if (cp < 0) {
-                    cp = 0xFFFD; /* replacement character */
-                }
-                if (cp > 0xFFFF) {
-                    /* beyond BMP; need a surrogate pair */
-                    snprintf(buf, sizeof(buf), "\\u%04X\\u%04X",
-                             0xD800 + ((cp - 0x10000) >> 10),
-                             0xDC00 + ((cp - 0x10000) & 0x3FF));
-                } else if (cp < 0x20 || cp >= 0x7F) {
-                    snprintf(buf, sizeof(buf), "\\u%04X", cp);
-                } else {
-                    buf[0] = cp;
-                    buf[1] = 0;
-                }
-                qstring_append(str, buf);
-            }
-        };
-
-        qstring_append(str, "\"");
+
+        json_writer_str(writer, name, qstring_get_str(val));
         break;
     }
     case QTYPE_QDICT: {
         QDict *val = qobject_to(QDict, obj);
-        const char *comma = pretty ? "," : ", ";
-        const char *sep = "";
         const QDictEntry *entry;
-        QString *qkey;
 
-        qstring_append(str, "{");
+        json_writer_start_object(writer, name);
 
         for (entry = qdict_first(val);
              entry;
              entry = qdict_next(val, entry)) {
-            qstring_append(str, sep);
-            json_pretty_newline(str, pretty, indent + 1);
-
-            qkey = qstring_from_str(qdict_entry_key(entry));
-            to_json(QOBJECT(qkey), str, pretty, indent + 1);
-            qobject_unref(qkey);
-
-            qstring_append(str, ": ");
-            to_json(qdict_entry_value(entry), str, pretty, indent + 1);
-            sep = comma;
+            to_json(writer, qdict_entry_key(entry), qdict_entry_value(entry));
         }
 
-        json_pretty_newline(str, pretty, indent);
-        qstring_append(str, "}");
+        json_writer_end_object(writer);
         break;
     }
     case QTYPE_QLIST: {
         QList *val = qobject_to(QList, obj);
-        const char *comma = pretty ? "," : ", ";
-        const char *sep = "";
         QListEntry *entry;
 
-        qstring_append(str, "[");
+        json_writer_start_array(writer, name);
 
         QLIST_FOREACH_ENTRY(val, entry) {
-            qstring_append(str, sep);
-            json_pretty_newline(str, pretty, indent + 1);
-            to_json(qlist_entry_obj(entry), str, pretty, indent + 1);
-            sep = comma;
+            to_json(writer, NULL, qlist_entry_obj(entry));
         }
 
-        json_pretty_newline(str, pretty, indent);
-        qstring_append(str, "]");
+        json_writer_end_array(writer);
         break;
     }
     case QTYPE_QBOOL: {
         QBool *val = qobject_to(QBool, obj);
 
-        if (qbool_get_bool(val)) {
-            qstring_append(str, "true");
-        } else {
-            qstring_append(str, "false");
-        }
+        json_writer_bool(writer, name, qbool_get_bool(val));
         break;
     }
     default:
@@ -294,20 +218,15 @@ static void to_json(const QObject *obj, QString *str, int pretty, int indent)
     }
 }
 
-QString *qobject_to_json(const QObject *obj)
+GString *qobject_to_json_pretty(const QObject *obj, bool pretty)
 {
-    QString *str = qstring_new();
-
-    to_json(obj, str, 0, 0);
+    JSONWriter *writer = json_writer_new(pretty);
 
-    return str;
+    to_json(writer, NULL, obj);
+    return json_writer_get_and_free(writer);
 }
 
-QString *qobject_to_json_pretty(const QObject *obj)
+GString *qobject_to_json(const QObject *obj)
 {
-    QString *str = qstring_new();
-
-    to_json(obj, str, 1, 0);
-
-    return str;
+    return qobject_to_json_pretty(obj, false);
 }
index 1be95367d1a0b7299f1a32de1df4139de8a0d635..356ad946b00c59b645612c4268085bcba522ccf3 100644 (file)
@@ -17,6 +17,7 @@
 #include "qapi/qmp/qnum.h"
 #include "qapi/qmp/qstring.h"
 #include "qemu/queue.h"
+#include "qobject-internal.h"
 
 /**
  * qlist_new(): Create a new QList
@@ -181,3 +182,8 @@ void qlist_destroy_obj(QObject *obj)
 
     g_free(qlist);
 }
+
+void qlist_unref(QList *q)
+{
+    qobject_unref(q);
+}
index 00870a1824a2100b404beee94dd58938e0aeb721..445a5db7f36f83b568e30bf619e5a2d49d7f610e 100644 (file)
@@ -12,6 +12,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/qmp/qnull.h"
+#include "qobject-internal.h"
 
 QNull qnull_ = {
     .base = {
@@ -28,3 +29,8 @@ bool qnull_is_equal(const QObject *x, const QObject *y)
 {
     return true;
 }
+
+void qnull_unref(QNull *q)
+{
+    qobject_unref(q);
+}
index 7012fc57f2fbe5bbbc71add116a66eecc9a70103..2bbeaedc7b44768fb1c5e22abd83430565acf8eb 100644 (file)
@@ -14,6 +14,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/qmp/qnum.h"
+#include "qobject-internal.h"
 
 /**
  * qnum_from_int(): Create a new QNum from an int64_t
@@ -161,37 +162,14 @@ double qnum_get_double(QNum *qn)
 
 char *qnum_to_string(QNum *qn)
 {
-    char *buffer;
-    int len;
-
     switch (qn->kind) {
     case QNUM_I64:
         return g_strdup_printf("%" PRId64, qn->u.i64);
     case QNUM_U64:
         return g_strdup_printf("%" PRIu64, qn->u.u64);
     case QNUM_DOUBLE:
-        /* FIXME: snprintf() is locale dependent; but JSON requires
-         * numbers to be formatted as if in the C locale. Dependence
-         * on C locale is a pervasive issue in QEMU. */
-        /* FIXME: This risks printing Inf or NaN, which are not valid
-         * JSON values. */
-        /* FIXME: the default precision of 6 for %f often causes
-         * rounding errors; we should be using DBL_DECIMAL_DIG (17),
-         * and only rounding to a shorter number if the result would
-         * still produce the same floating point value.  */
-        buffer = g_strdup_printf("%f" , qn->u.dbl);
-        len = strlen(buffer);
-        while (len > 0 && buffer[len - 1] == '0') {
-            len--;
-        }
-
-        if (len && buffer[len - 1] == '.') {
-            buffer[len - 1] = 0;
-        } else {
-            buffer[len] = 0;
-        }
-
-        return buffer;
+        /* 17 digits suffice for IEEE double */
+        return g_strdup_printf("%.17g", qn->u.dbl);
     }
 
     assert(0);
@@ -261,3 +239,8 @@ void qnum_destroy_obj(QObject *obj)
     assert(obj != NULL);
     g_free(qobject_to(QNum, obj));
 }
+
+void qnum_unref(QNum *q)
+{
+    qobject_unref(q);
+}
diff --git a/qobject/qobject-internal.h b/qobject/qobject-internal.h
new file mode 100644 (file)
index 0000000..b310c8e
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * QObject internals
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QOBJECT_INTERNAL_H
+#define QOBJECT_INTERNAL_H
+
+#include "qapi/qmp/qobject.h"
+
+static inline void qobject_init(QObject *obj, QType type)
+{
+    assert(QTYPE_NONE < type && type < QTYPE__MAX);
+    obj->base.refcnt = 1;
+    obj->base.type = type;
+}
+
+void qbool_destroy_obj(QObject *obj);
+bool qbool_is_equal(const QObject *x, const QObject *y);
+
+void qdict_destroy_obj(QObject *obj);
+bool qdict_is_equal(const QObject *x, const QObject *y);
+
+void qlist_destroy_obj(QObject *obj);
+bool qlist_is_equal(const QObject *x, const QObject *y);
+
+bool qnull_is_equal(const QObject *x, const QObject *y);
+
+void qnum_destroy_obj(QObject *obj);
+bool qnum_is_equal(const QObject *x, const QObject *y);
+
+void qstring_destroy_obj(QObject *obj);
+bool qstring_is_equal(const QObject *x, const QObject *y);
+
+#endif
index 878dd76e7924ab9d8ea52bec24427090e6582159..d7077b8f2a9dadfe5b46563dfa011ca6244a9deb 100644 (file)
@@ -14,6 +14,7 @@
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
 #include "qapi/qmp/qstring.h"
+#include "qobject-internal.h"
 
 QEMU_BUILD_BUG_MSG(
     offsetof(QNull, base) != 0 ||
index b66a2c35f22b61dc5e9df3c42b36f52db13a43f3..794f8c93578a831a5feecfc9a0d8dfa50335ff48 100644 (file)
@@ -12,6 +12,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/qmp/qstring.h"
+#include "qobject-internal.h"
 
 /**
  * qstring_new(): Create a new empty QString
@@ -23,14 +24,6 @@ QString *qstring_new(void)
     return qstring_from_str("");
 }
 
-/**
- * qstring_get_length(): Get the length of a QString
- */
-size_t qstring_get_length(const QString *qstring)
-{
-    return qstring->length;
-}
-
 /**
  * qstring_from_substr(): Create a new QString from a C string substring
  *
@@ -41,18 +34,9 @@ QString *qstring_from_substr(const char *str, size_t start, size_t end)
     QString *qstring;
 
     assert(start <= end);
-
     qstring = g_malloc(sizeof(*qstring));
     qobject_init(QOBJECT(qstring), QTYPE_QSTRING);
-
-    qstring->length = end - start;
-    qstring->capacity = qstring->length;
-
-    assert(qstring->capacity < SIZE_MAX);
-    qstring->string = g_malloc(qstring->capacity + 1);
-    memcpy(qstring->string, str + start, qstring->length);
-    qstring->string[qstring->length] = 0;
-
+    qstring->string = g_strndup(str + start, end - start);
     return qstring;
 }
 
@@ -66,47 +50,22 @@ QString *qstring_from_str(const char *str)
     return qstring_from_substr(str, 0, strlen(str));
 }
 
-static void capacity_increase(QString *qstring, size_t len)
-{
-    if (qstring->capacity < (qstring->length + len)) {
-        assert(len <= SIZE_MAX - qstring->capacity);
-        qstring->capacity += len;
-        assert(qstring->capacity <= SIZE_MAX / 2);
-        qstring->capacity *= 2; /* use exponential growth */
-
-        qstring->string = g_realloc(qstring->string, qstring->capacity + 1);
-    }
-}
-
-/* qstring_append(): Append a C string to a QString
+/**
+ * qstring_from_gstring(): Convert a GString to a QString
+ *
+ * Return strong reference.
  */
-void qstring_append(QString *qstring, const char *str)
-{
-    size_t len = strlen(str);
 
-    capacity_increase(qstring, len);
-    memcpy(qstring->string + qstring->length, str, len);
-    qstring->length += len;
-    qstring->string[qstring->length] = 0;
-}
-
-void qstring_append_int(QString *qstring, int64_t value)
+QString *qstring_from_gstring(GString *gstr)
 {
-    char num[32];
+    QString *qstring;
 
-    snprintf(num, sizeof(num), "%" PRId64, value);
-    qstring_append(qstring, num);
+    qstring = g_malloc(sizeof(*qstring));
+    qobject_init(QOBJECT(qstring), QTYPE_QSTRING);
+    qstring->string = g_string_free(gstr, false);
+    return qstring;
 }
 
-/**
- * qstring_append_chr(): Append a C char to a QString
- */
-void qstring_append_chr(QString *qstring, int c)
-{
-    capacity_increase(qstring, 1);
-    qstring->string[qstring->length++] = c;
-    qstring->string[qstring->length] = 0;
-}
 
 /**
  * qstring_get_str(): Return a pointer to the stored string
@@ -119,27 +78,6 @@ const char *qstring_get_str(const QString *qstring)
     return qstring->string;
 }
 
-/**
- * qstring_get_try_str(): Return a pointer to the stored string
- *
- * NOTE: will return NULL if qstring is not provided.
- */
-const char *qstring_get_try_str(const QString *qstring)
-{
-    return qstring ? qstring_get_str(qstring) : NULL;
-}
-
-/**
- * qobject_get_try_str(): Return a pointer to the corresponding string
- *
- * NOTE: the string will only be returned if the object is valid, and
- * its type is QString, otherwise NULL is returned.
- */
-const char *qobject_get_try_str(const QObject *qstring)
-{
-    return qstring_get_try_str(qobject_to(QString, qstring));
-}
-
 /**
  * qstring_is_equal(): Test whether the two QStrings are equal
  */
@@ -149,33 +87,21 @@ bool qstring_is_equal(const QObject *x, const QObject *y)
                    qobject_to(QString, y)->string);
 }
 
-/**
- * qstring_free(): Free the memory allocated by a QString object
- *
- * Return: if @return_str, return the underlying string, to be
- * g_free(), otherwise NULL is returned.
- */
-char *qstring_free(QString *qstring, bool return_str)
-{
-    char *rv = NULL;
-
-    if (return_str) {
-        rv = qstring->string;
-    } else {
-        g_free(qstring->string);
-    }
-
-    g_free(qstring);
-
-    return rv;
-}
-
 /**
  * qstring_destroy_obj(): Free all memory allocated by a QString
  * object
  */
 void qstring_destroy_obj(QObject *obj)
 {
+    QString *qs;
+
     assert(obj != NULL);
-    qstring_free(qobject_to(QString, obj), FALSE);
+    qs = qobject_to(QString, obj);
+    g_free((char *)qs->string);
+    g_free(qs);
+}
+
+void qstring_unref(QString *q)
+{
+    qobject_unref(q);
 }
index 062a3789d877fd21632a3151d72baf83dfb6f92c..8192243430960d15ac00c9159395101ebeedf18f 100644 (file)
@@ -7,4 +7,4 @@ qom_ss.add(files(
 ))
 
 qmp_ss.add(files('qom-qmp-cmds.c'))
-softmmu_ss.add(files('qom-hmp-cmds.c'))
+system_ss.add(files('qom-hmp-cmds.c'))
index 0dec164192a55d3d9d955d445db9c9839f618405..95c0dc8285fe70ceec211d56c6de7327045cff05 100644 (file)
 #include "qom/object.h"
 #include "qom/object_interfaces.h"
 #include "qemu/cutils.h"
+#include "qemu/memalign.h"
 #include "qapi/visitor.h"
 #include "qapi/string-input-visitor.h"
 #include "qapi/string-output-visitor.h"
 #include "qapi/qobject-input-visitor.h"
+#include "qapi/forward-visitor.h"
 #include "qapi/qapi-builtin-visit.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
@@ -29,6 +31,7 @@
  * of the QOM core on QObject?  */
 #include "qom/qom-qobject.h"
 #include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qlist.h"
 #include "qapi/qmp/qnum.h"
 #include "qapi/qmp/qstring.h"
 #include "qemu/error-report.h"
@@ -218,6 +221,19 @@ static size_t type_object_get_size(TypeImpl *ti)
     return 0;
 }
 
+static size_t type_object_get_align(TypeImpl *ti)
+{
+    if (ti->instance_align) {
+        return ti->instance_align;
+    }
+
+    if (type_has_parent(ti)) {
+        return type_object_get_align(type_get_parent(ti));
+    }
+
+    return 0;
+}
+
 size_t object_type_get_instance_size(const char *typename)
 {
     TypeImpl *type = type_get_by_name(typename);
@@ -236,12 +252,6 @@ static bool type_is_ancestor(TypeImpl *type, TypeImpl *target_type)
             return true;
         }
 
-
-        if (type->parent && !strcmp(type->parent, "chardev-spiceport")) {
-            if (!type->parent_type && !type_get_by_name(type->parent)) {
-                return false;
-            }
-        }
         type = type_get_parent(type);
     }
 
@@ -297,6 +307,7 @@ static void type_initialize(TypeImpl *ti)
 
     ti->class_size = type_class_get_size(ti);
     ti->instance_size = type_object_get_size(ti);
+    ti->instance_align = type_object_get_align(ti);
     /* Any type with zero instance_size is implicitly abstract.
      * This means interface types are all abstract.
      */
@@ -448,7 +459,8 @@ static GPtrArray *object_compat_props[3];
  * other than "-global".  These are generally used for syntactic
  * sugar and legacy command line options.
  */
-void object_register_sugar_prop(const char *driver, const char *prop, const char *value)
+void object_register_sugar_prop(const char *driver, const char *prop,
+                                const char *value, bool optional)
 {
     GlobalProperty *g;
     if (!object_compat_props[2]) {
@@ -458,6 +470,7 @@ void object_register_sugar_prop(const char *driver, const char *prop, const char
     g->driver = g_strdup(driver);
     g->property = g_strdup(prop);
     g->value = g_strdup(value);
+    g->optional = optional;
     g_ptr_array_add(object_compat_props[2], g);
 }
 
@@ -522,26 +535,19 @@ static void object_initialize_with_type(Object *obj, size_t size, TypeImpl *type
     object_post_init_with_type(obj, type);
 }
 
-#ifdef CONFIG_MODULES
-int module_load_check(const char *name)
-{
-    TypeImpl *type = type_get_by_name(name);
-    if (!type) {
-        module_load_qom_one(name);
-        type = type_get_by_name(name);
-    }
-    return type == NULL;
-}
-#endif
-
 void object_initialize(void *data, size_t size, const char *typename)
 {
     TypeImpl *type = type_get_by_name(typename);
 
 #ifdef CONFIG_MODULES
     if (!type) {
-        module_load_qom_one(typename);
-        type = type_get_by_name(typename);
+        int rv = module_load_qom(typename, &error_fatal);
+        if (rv > 0) {
+            type = type_get_by_name(typename);
+        } else {
+            error_report("missing object type '%s'", typename);
+            exit(1);
+        }
     }
 #endif
     if (!type) {
@@ -703,6 +709,7 @@ static void object_finalize(void *data)
     object_deinit(obj, ti);
 
     g_assert(obj->ref == 0);
+    g_assert(obj->parent == NULL);
     if (obj->free) {
         obj->free(obj);
     }
@@ -710,7 +717,7 @@ static void object_finalize(void *data)
 
 /* Find the minimum alignment guaranteed by the system malloc. */
 #if __STDC_VERSION__ >= 201112L
-typddef max_align_t qemu_max_align_t;
+typedef max_align_t qemu_max_align_t;
 #else
 typedef union {
     long l;
@@ -1046,8 +1053,13 @@ ObjectClass *module_object_class_by_name(const char *typename)
     oc = object_class_by_name(typename);
 #ifdef CONFIG_MODULES
     if (!oc) {
-        module_load_qom_one(typename);
-        oc = object_class_by_name(typename);
+        Error *local_err = NULL;
+        int rv = module_load_qom(typename, &local_err);
+        if (rv > 0) {
+            oc = object_class_by_name(typename);
+        } else if (rv < 0) {
+            error_report_err(local_err);
+        }
     }
 #endif
     return oc;
@@ -1181,10 +1193,14 @@ GSList *object_class_get_list_sorted(const char *implements_type,
 Object *object_ref(void *objptr)
 {
     Object *obj = OBJECT(objptr);
+    uint32_t ref;
+
     if (!obj) {
         return NULL;
     }
-    qatomic_inc(&obj->ref);
+    ref = qatomic_fetch_inc(&obj->ref);
+    /* Assert waaay before the integer overflows */
+    g_assert(ref < INT_MAX);
     return obj;
 }
 
@@ -1214,11 +1230,11 @@ object_property_try_add(Object *obj, const char *name, const char *type,
 
     if (name_len >= 3 && !memcmp(name + name_len - 3, "[*]", 4)) {
         int i;
-        ObjectProperty *ret;
+        ObjectProperty *ret = NULL;
         char *name_no_array = g_strdup(name);
 
         name_no_array[name_len - 3] = '\0';
-        for (i = 0; ; ++i) {
+        for (i = 0; i < INT16_MAX; ++i) {
             char *full_name = g_strdup_printf("%s[%d]", name_no_array, i);
 
             ret = object_property_try_add(obj, full_name, type, get, set,
@@ -1229,6 +1245,7 @@ object_property_try_add(Object *obj, const char *name, const char *type,
             }
         }
         g_free(name_no_array);
+        assert(ret);
         return ret;
     }
 
@@ -1391,7 +1408,8 @@ bool object_property_get(Object *obj, const char *name, Visitor *v,
     }
 
     if (!prop->get) {
-        error_setg(errp, QERR_PERMISSION_DENIED);
+        error_setg(errp, "Property '%s.%s' is not readable",
+                   object_get_typename(obj), name);
         return false;
     }
     prop->get(obj, v, name, prop->opaque, &err);
@@ -1402,7 +1420,7 @@ bool object_property_get(Object *obj, const char *name, Visitor *v,
 bool object_property_set(Object *obj, const char *name, Visitor *v,
                          Error **errp)
 {
-    Error *err = NULL;
+    ERRP_GUARD();
     ObjectProperty *prop = object_property_find_err(obj, name, errp);
 
     if (prop == NULL) {
@@ -1410,12 +1428,12 @@ bool object_property_set(Object *obj, const char *name, Visitor *v,
     }
 
     if (!prop->set) {
-        error_setg(errp, QERR_PERMISSION_DENIED);
+        error_setg(errp, "Property '%s.%s' is not writable",
+                   object_get_typename(obj), name);
         return false;
     }
-    prop->set(obj, v, name, prop->opaque, &err);
-    error_propagate(errp, err);
-    return !err;
+    prop->set(obj, v, name, prop->opaque, errp);
+    return !*errp;
 }
 
 bool object_property_set_str(Object *obj, const char *name,
@@ -1432,15 +1450,18 @@ char *object_property_get_str(Object *obj, const char *name,
                               Error **errp)
 {
     QObject *ret = object_property_get_qobject(obj, name, errp);
+    QString *qstring;
     char *retval;
 
     if (!ret) {
         return NULL;
     }
-
-    retval = g_strdup(qobject_get_try_str(ret));
-    if (!retval) {
+    qstring = qobject_to(QString, ret);
+    if (!qstring) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name, "string");
+        retval = NULL;
+    } else {
+        retval = g_strdup(qstring_get_str(qstring));
     }
 
     qobject_unref(ret);
@@ -1568,6 +1589,11 @@ void object_property_set_default_str(ObjectProperty *prop, const char *value)
     object_property_set_default(prop, QOBJECT(qstring_from_str(value)));
 }
 
+void object_property_set_default_list(ObjectProperty *prop)
+{
+    object_property_set_default(prop, QOBJECT(qlist_new()));
+}
+
 void object_property_set_default_int(ObjectProperty *prop, int64_t value)
 {
     object_property_set_default(prop, QOBJECT(qnum_from_int(value)));
@@ -2155,6 +2181,17 @@ Object *object_resolve_path(const char *path, bool *ambiguous)
     return object_resolve_path_type(path, TYPE_OBJECT, ambiguous);
 }
 
+Object *object_resolve_path_at(Object *parent, const char *path)
+{
+    g_auto(GStrv) parts = g_strsplit(path, "/", 0);
+
+    if (*path == '/') {
+        return object_resolve_abs_path(object_get_root(), parts + 1,
+                                       TYPE_OBJECT);
+    }
+    return object_resolve_abs_path(parent, parts, TYPE_OBJECT);
+}
+
 typedef struct StringProperty
 {
     char *(*get)(Object *, Error **);
@@ -2192,11 +2229,10 @@ static void property_set_str(Object *obj, Visitor *v, const char *name,
     g_free(value);
 }
 
-static void property_release_str(Object *obj, const char *name,
-                                 void *opaque)
+static void property_release_data(Object *obj, const char *name,
+                                  void *opaque)
 {
-    StringProperty *prop = opaque;
-    g_free(prop);
+    g_free(opaque);
 }
 
 ObjectProperty *
@@ -2212,7 +2248,7 @@ object_property_add_str(Object *obj, const char *name,
     return object_property_add(obj, name, "string",
                                get ? property_get_str : NULL,
                                set ? property_set_str : NULL,
-                               property_release_str,
+                               property_release_data,
                                prop);
 }
 
@@ -2269,13 +2305,6 @@ static void property_set_bool(Object *obj, Visitor *v, const char *name,
     prop->set(obj, value, errp);
 }
 
-static void property_release_bool(Object *obj, const char *name,
-                                  void *opaque)
-{
-    BoolProperty *prop = opaque;
-    g_free(prop);
-}
-
 ObjectProperty *
 object_property_add_bool(Object *obj, const char *name,
                          bool (*get)(Object *, Error **),
@@ -2289,7 +2318,7 @@ object_property_add_bool(Object *obj, const char *name,
     return object_property_add(obj, name, "bool",
                                get ? property_get_bool : NULL,
                                set ? property_set_bool : NULL,
-                               property_release_bool,
+                               property_release_data,
                                prop);
 }
 
@@ -2338,13 +2367,6 @@ static void property_set_enum(Object *obj, Visitor *v, const char *name,
     prop->set(obj, value, errp);
 }
 
-static void property_release_enum(Object *obj, const char *name,
-                                  void *opaque)
-{
-    EnumProperty *prop = opaque;
-    g_free(prop);
-}
-
 ObjectProperty *
 object_property_add_enum(Object *obj, const char *name,
                          const char *typename,
@@ -2361,7 +2383,7 @@ object_property_add_enum(Object *obj, const char *name,
     return object_property_add(obj, name, typename,
                                get ? property_get_enum : NULL,
                                set ? property_set_enum : NULL,
-                               property_release_enum,
+                               property_release_data,
                                prop);
 }
 
@@ -2428,13 +2450,6 @@ out_end:
     visit_end_struct(v, NULL);
 }
 
-static void property_release_tm(Object *obj, const char *name,
-                                void *opaque)
-{
-    TMProperty *prop = opaque;
-    g_free(prop);
-}
-
 ObjectProperty *
 object_property_add_tm(Object *obj, const char *name,
                        void (*get)(Object *, struct tm *, Error **))
@@ -2445,7 +2460,7 @@ object_property_add_tm(Object *obj, const char *name,
 
     return object_property_add(obj, name, "struct tm",
                                get ? property_get_tm : NULL, NULL,
-                               property_release_tm,
+                               property_release_data,
                                prop);
 }
 
@@ -2716,16 +2731,20 @@ static void property_get_alias(Object *obj, Visitor *v, const char *name,
                                void *opaque, Error **errp)
 {
     AliasProperty *prop = opaque;
+    Visitor *alias_v = visitor_forward_field(v, prop->target_name, name);
 
-    object_property_get(prop->target_obj, prop->target_name, v, errp);
+    object_property_get(prop->target_obj, prop->target_name, alias_v, errp);
+    visit_free(alias_v);
 }
 
 static void property_set_alias(Object *obj, Visitor *v, const char *name,
                                void *opaque, Error **errp)
 {
     AliasProperty *prop = opaque;
+    Visitor *alias_v = visitor_forward_field(v, prop->target_name, name);
 
-    object_property_set(prop->target_obj, prop->target_name, v, errp);
+    object_property_set(prop->target_obj, prop->target_name, alias_v, errp);
+    visit_free(alias_v);
 }
 
 static Object *property_resolve_alias(Object *obj, void *opaque,
@@ -2811,13 +2830,13 @@ static void object_class_init(ObjectClass *klass, void *data)
 
 static void register_types(void)
 {
-    static TypeInfo interface_info = {
+    static const TypeInfo interface_info = {
         .name = TYPE_INTERFACE,
         .class_size = sizeof(InterfaceClass),
         .abstract = true,
     };
 
-    static TypeInfo object_info = {
+    static const TypeInfo object_info = {
         .name = TYPE_OBJECT,
         .instance_size = sizeof(Object),
         .class_init = object_class_init,
index ed896fe764ccf45de570d5da241936dcb8cf0fe4..e0833c8bfe48e8e9627e21846e70c3d53d969fc8 100644 (file)
@@ -2,17 +2,22 @@
 
 #include "qemu/cutils.h"
 #include "qapi/error.h"
+#include "qapi/qapi-visit-qom.h"
+#include "qapi/qmp/qobject.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
-#include "qapi/qmp/qstring.h"
 #include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qom/object_interfaces.h"
 #include "qemu/help_option.h"
+#include "qemu/id.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
+#include "qemu/qemu-print.h"
 #include "qapi/opts-visitor.h"
 #include "qemu/config-file.h"
+#include "qemu/keyval.h"
 
 bool user_creatable_complete(UserCreatable *uc, Error **errp)
 {
@@ -38,15 +43,53 @@ bool user_creatable_can_be_deleted(UserCreatable *uc)
     }
 }
 
+static void object_set_properties_from_qdict(Object *obj, const QDict *qdict,
+                                             Visitor *v, Error **errp)
+{
+    const QDictEntry *e;
+
+    if (!visit_start_struct(v, NULL, NULL, 0, errp)) {
+        return;
+    }
+    for (e = qdict_first(qdict); e; e = qdict_next(qdict, e)) {
+        if (!object_property_set(obj, e->key, v, errp)) {
+            goto out;
+        }
+    }
+    visit_check_struct(v, errp);
+out:
+    visit_end_struct(v, NULL);
+}
+
+void object_set_properties_from_keyval(Object *obj, const QDict *qdict,
+                                       bool from_json, Error **errp)
+{
+    Visitor *v;
+    if (from_json) {
+        v = qobject_input_visitor_new(QOBJECT(qdict));
+    } else {
+        v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+    }
+    object_set_properties_from_qdict(obj, qdict, v, errp);
+    visit_free(v);
+}
+
 Object *user_creatable_add_type(const char *type, const char *id,
                                 const QDict *qdict,
                                 Visitor *v, Error **errp)
 {
+    ERRP_GUARD();
     Object *obj;
     ObjectClass *klass;
-    const QDictEntry *e;
     Error *local_err = NULL;
 
+    if (id != NULL && !id_wellformed(id)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "id", "an identifier");
+        error_append_hint(errp, "Identifiers consist of letters, digits, "
+                          "'-', '.', '_', starting with a letter.\n");
+        return NULL;
+    }
+
     klass = object_class_by_name(type);
     if (!klass) {
         error_setg(errp, "invalid object type: %s", type);
@@ -66,18 +109,7 @@ Object *user_creatable_add_type(const char *type, const char *id,
 
     assert(qdict);
     obj = object_new(type);
-    if (!visit_start_struct(v, NULL, NULL, 0, &local_err)) {
-        goto out;
-    }
-    for (e = qdict_first(qdict); e; e = qdict_next(qdict, e)) {
-        if (!object_property_set(obj, e->key, v, &local_err)) {
-            break;
-        }
-    }
-    if (!local_err) {
-        visit_check_struct(v, &local_err);
-    }
-    visit_end_struct(v, NULL);
+    object_set_properties_from_qdict(obj, qdict, v, &local_err);
     if (local_err) {
         goto out;
     }
@@ -105,90 +137,28 @@ out:
     return obj;
 }
 
-bool user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp)
+void user_creatable_add_qapi(ObjectOptions *options, Error **errp)
 {
     Visitor *v;
+    QObject *qobj;
+    QDict *props;
     Object *obj;
-    g_autofree char *type = NULL;
-    g_autofree char *id = NULL;
-
-    type = g_strdup(qdict_get_try_str(qdict, "qom-type"));
-    if (!type) {
-        error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
-        return false;
-    }
-    qdict_del(qdict, "qom-type");
 
-    id = g_strdup(qdict_get_try_str(qdict, "id"));
-    if (!id) {
-        error_setg(errp, QERR_MISSING_PARAMETER, "id");
-        return false;
-    }
-    qdict_del(qdict, "id");
-
-    if (keyval) {
-        v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
-    } else {
-        v = qobject_input_visitor_new(QOBJECT(qdict));
-    }
-    obj = user_creatable_add_type(type, id, qdict, v, errp);
+    v = qobject_output_visitor_new(&qobj);
+    visit_type_ObjectOptions(v, NULL, &options, &error_abort);
+    visit_complete(v, &qobj);
     visit_free(v);
-    object_unref(obj);
-    return !!obj;
-}
 
-Object *user_creatable_add_opts(QemuOpts *opts, Error **errp)
-{
-    Visitor *v;
-    QDict *pdict;
-    Object *obj;
-    const char *id = qemu_opts_id(opts);
-    char *type = qemu_opt_get_del(opts, "qom-type");
-
-    if (!type) {
-        error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
-        return NULL;
-    }
-    if (!id) {
-        error_setg(errp, QERR_MISSING_PARAMETER, "id");
-        qemu_opt_set(opts, "qom-type", type, &error_abort);
-        g_free(type);
-        return NULL;
-    }
-
-    qemu_opts_set_id(opts, NULL);
-    pdict = qemu_opts_to_qdict(opts, NULL);
-
-    v = opts_visitor_new(opts);
-    obj = user_creatable_add_type(type, id, pdict, v, errp);
-    visit_free(v);
-
-    qemu_opts_set_id(opts, (char *) id);
-    qemu_opt_set(opts, "qom-type", type, &error_abort);
-    g_free(type);
-    qobject_unref(pdict);
-    return obj;
-}
+    props = qobject_to(QDict, qobj);
+    qdict_del(props, "qom-type");
+    qdict_del(props, "id");
 
-
-int user_creatable_add_opts_foreach(void *opaque, QemuOpts *opts, Error **errp)
-{
-    bool (*type_opt_predicate)(const char *, QemuOpts *) = opaque;
-    Object *obj = NULL;
-    const char *type;
-
-    type = qemu_opt_get(opts, "qom-type");
-    if (type && type_opt_predicate &&
-        !type_opt_predicate(type, opts)) {
-        return 0;
-    }
-
-    obj = user_creatable_add_opts(opts, errp);
-    if (!obj) {
-        return -1;
-    }
+    v = qobject_input_visitor_new(QOBJECT(props));
+    obj = user_creatable_add_type(ObjectType_str(options->qom_type),
+                                  options->id, props, v, errp);
     object_unref(obj);
-    return 0;
+    qobject_unref(qobj);
+    visit_free(v);
 }
 
 char *object_property_help(const char *name, const char *type,
@@ -207,7 +177,8 @@ char *object_property_help(const char *name, const char *type,
         g_string_append(str, description);
     }
     if (defval) {
-        g_autofree char *def_json = qstring_free(qobject_to_json(defval), TRUE);
+        g_autofree char *def_json = g_string_free(qobject_to_json(defval),
+                                                  false);
         g_string_append_printf(str, " (default: %s)", def_json);
     }
 
@@ -218,16 +189,16 @@ static void user_creatable_print_types(void)
 {
     GSList *l, *list;
 
-    printf("List of user creatable objects:\n");
+    qemu_printf("List of user creatable objects:\n");
     list = object_class_get_list_sorted(TYPE_USER_CREATABLE, false);
     for (l = list; l != NULL; l = l->next) {
         ObjectClass *oc = OBJECT_CLASS(l->data);
-        printf("  %s\n", object_class_get_name(oc));
+        qemu_printf("  %s\n", object_class_get_name(oc));
     }
     g_slist_free(list);
 }
 
-static bool user_creatable_print_type_properites(const char *type)
+bool type_print_class_properties(const char *type)
 {
     ObjectClass *klass;
     ObjectPropertyIterator iter;
@@ -253,12 +224,12 @@ static bool user_creatable_print_type_properites(const char *type)
     }
     g_ptr_array_sort(array, (GCompareFunc)qemu_pstrcmp0);
     if (array->len > 0) {
-        printf("%s options:\n", type);
+        qemu_printf("%s options:\n", type);
     } else {
-        printf("There are no options for %s.\n", type);
+        qemu_printf("There are no options for %s.\n", type);
     }
     for (i = 0; i < array->len; i++) {
-        printf("%s\n", (char *)array->pdata[i]);
+        qemu_printf("%s\n", (char *)array->pdata[i]);
     }
     g_ptr_array_set_free_func(array, g_free);
     g_ptr_array_free(array, true);
@@ -273,23 +244,83 @@ bool user_creatable_print_help(const char *type, QemuOpts *opts)
     }
 
     if (qemu_opt_has_help_opt(opts)) {
-        return user_creatable_print_type_properites(type);
+        return type_print_class_properties(type);
     }
 
     return false;
 }
 
-void user_creatable_print_help_from_qdict(QDict *args)
+static void user_creatable_print_help_from_qdict(QDict *args)
 {
     const char *type = qdict_get_try_str(args, "qom-type");
 
-    if (!type || !user_creatable_print_type_properites(type)) {
+    if (!type || !type_print_class_properties(type)) {
         user_creatable_print_types();
     }
 }
 
+ObjectOptions *user_creatable_parse_str(const char *str, Error **errp)
+{
+    ERRP_GUARD();
+    QObject *obj;
+    bool help;
+    Visitor *v;
+    ObjectOptions *options;
+
+    if (str[0] == '{') {
+        obj = qobject_from_json(str, errp);
+        if (!obj) {
+            return NULL;
+        }
+        v = qobject_input_visitor_new(obj);
+    } else {
+        QDict *args = keyval_parse(str, "qom-type", &help, errp);
+        if (*errp) {
+            return NULL;
+        }
+        if (help) {
+            user_creatable_print_help_from_qdict(args);
+            qobject_unref(args);
+            return NULL;
+        }
+
+        obj = QOBJECT(args);
+        v = qobject_input_visitor_new_keyval(obj);
+    }
+
+    visit_type_ObjectOptions(v, NULL, &options, errp);
+    visit_free(v);
+    qobject_unref(obj);
+
+    return options;
+}
+
+bool user_creatable_add_from_str(const char *str, Error **errp)
+{
+    ERRP_GUARD();
+    ObjectOptions *options;
+
+    options = user_creatable_parse_str(str, errp);
+    if (!options) {
+        return false;
+    }
+
+    user_creatable_add_qapi(options, errp);
+    qapi_free_ObjectOptions(options);
+    return !*errp;
+}
+
+void user_creatable_process_cmdline(const char *cmdline)
+{
+    if (!user_creatable_add_from_str(cmdline, &error_fatal)) {
+        /* Help was printed */
+        exit(EXIT_SUCCESS);
+    }
+}
+
 bool user_creatable_del(const char *id, Error **errp)
 {
+    QemuOptsList *opts_list;
     Object *container;
     Object *obj;
 
@@ -309,8 +340,10 @@ bool user_creatable_del(const char *id, Error **errp)
      * if object was defined on the command-line, remove its corresponding
      * option group entry
      */
-    qemu_opts_del(qemu_opts_find(qemu_find_opts_err("object", &error_abort),
-                                 id));
+    opts_list = qemu_find_opts_err("object", NULL);
+    if (opts_list) {
+        qemu_opts_del(qemu_opts_find(opts_list, id));
+    }
 
     object_unparent(obj);
     return true;
index 8861a109d5d35239ab943f00c58233937ecc14ae..6e3a2175a4e97f697817e771d40ae5665352abcc 100644 (file)
@@ -13,8 +13,9 @@
 #include "qapi/qapi-commands-qom.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
-#include "qapi/qmp/qstring.h"
+#include "qemu/readline.h"
 #include "qom/object.h"
+#include "qom/object_interfaces.h"
 
 void hmp_qom_list(Monitor *mon, const QDict *qdict)
 {
@@ -78,9 +79,9 @@ void hmp_qom_get(Monitor *mon, const QDict *qdict)
     QObject *obj = qmp_qom_get(path, property, &err);
 
     if (err == NULL) {
-        QString *str = qobject_to_json_pretty(obj);
-        monitor_printf(mon, "%s\n", qstring_get_str(str));
-        qobject_unref(str);
+        GString *str = qobject_to_json_pretty(obj, true);
+        monitor_printf(mon, "%s\n", str->str);
+        g_string_free(str, true);
     }
 
     qobject_unref(obj);
@@ -151,3 +152,68 @@ void hmp_info_qom_tree(Monitor *mon, const QDict *dict)
     }
     print_qom_composition(mon, obj, 0);
 }
+
+void hmp_object_add(Monitor *mon, const QDict *qdict)
+{
+    const char *options = qdict_get_str(qdict, "object");
+    Error *err = NULL;
+
+    user_creatable_add_from_str(options, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_object_del(Monitor *mon, const QDict *qdict)
+{
+    const char *id = qdict_get_str(qdict, "id");
+    Error *err = NULL;
+
+    user_creatable_del(id, &err);
+    hmp_handle_error(mon, err);
+}
+
+void object_add_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    GSList *list, *elt;
+    size_t len;
+
+    if (nb_args != 2) {
+        return;
+    }
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    list = elt = object_class_get_list(TYPE_USER_CREATABLE, false);
+    while (elt) {
+        const char *name;
+
+        name = object_class_get_name(OBJECT_CLASS(elt->data));
+        if (strcmp(name, TYPE_USER_CREATABLE)) {
+            readline_add_completion_of(rs, str, name);
+        }
+        elt = elt->next;
+    }
+    g_slist_free(list);
+}
+
+void object_del_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    ObjectPropertyInfoList *list, *start;
+    size_t len;
+
+    if (nb_args != 2) {
+        return;
+    }
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+
+    start = list = qmp_qom_list("/objects", NULL);
+    while (list) {
+        ObjectPropertyInfo *info = list->value;
+
+        if (!strncmp(info->type, "child<", 5)) {
+            readline_add_completion_of(rs, str, info->name);
+        }
+        list = list->next;
+    }
+    qapi_free_ObjectPropertyInfoList(start);
+}
index cf130dc875bb6a7921fd7fb5af26f6448c7ad274..7c087299de9eba5d0bed7e156338e65866bf8832 100644 (file)
 #include "qapi/error.h"
 #include "qapi/qapi-commands-qdev.h"
 #include "qapi/qapi-commands-qom.h"
+#include "qapi/qapi-visit-qom.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qemu/cutils.h"
 #include "qom/object_interfaces.h"
 #include "qom/qom-qobject.h"
@@ -46,14 +49,12 @@ ObjectPropertyInfoList *qmp_qom_list(const char *path, Error **errp)
 
     object_property_iter_init(&iter, obj);
     while ((prop = object_property_iter_next(&iter))) {
-        ObjectPropertyInfoList *entry = g_malloc0(sizeof(*entry));
+        ObjectPropertyInfo *value = g_new0(ObjectPropertyInfo, 1);
 
-        entry->value = g_malloc0(sizeof(ObjectPropertyInfo));
-        entry->next = props;
-        props = entry;
+        QAPI_LIST_PREPEND(props, value);
 
-        entry->value->name = g_strdup(prop->name);
-        entry->value->type = g_strdup(prop->type);
+        value->name = g_strdup(prop->name);
+        value->type = g_strdup(prop->type);
     }
 
     return props;
@@ -90,7 +91,7 @@ QObject *qmp_qom_get(const char *path, const char *property, Error **errp)
 
 static void qom_list_types_tramp(ObjectClass *klass, void *data)
 {
-    ObjectTypeInfoList *e, **pret = data;
+    ObjectTypeInfoList **pret = data;
     ObjectTypeInfo *info;
     ObjectClass *parent = object_class_get_parent(klass);
 
@@ -98,18 +99,13 @@ static void qom_list_types_tramp(ObjectClass *klass, void *data)
     info->name = g_strdup(object_class_get_name(klass));
     info->has_abstract = info->abstract = object_class_is_abstract(klass);
     if (parent) {
-        info->has_parent = true;
         info->parent = g_strdup(object_class_get_name(parent));
     }
 
-    e = g_malloc0(sizeof(*e));
-    e->value = info;
-    e->next = *pret;
-    *pret = e;
+    QAPI_LIST_PREPEND(*pret, info);
 }
 
-ObjectTypeInfoList *qmp_qom_list_types(bool has_implements,
-                                       const char *implements,
+ObjectTypeInfoList *qmp_qom_list_types(const char *implements,
                                        bool has_abstract,
                                        bool abstract,
                                        Error **errp)
@@ -131,23 +127,6 @@ ObjectPropertyInfoList *qmp_device_list_properties(const char *typename,
     ObjectPropertyIterator iter;
     ObjectPropertyInfoList *prop_list = NULL;
 
-#ifdef CONFIG_MODULES
-    if (!strcmp(typename, "virtio-gpu-pci") || !strcmp(typename, "virtio-gpu-ccw")) {
-        if (module_load_check("virtio-gpu-device")) {
-            ObjectPropertyInfo *info;
-            info = g_new0(ObjectPropertyInfo, 1);
-            info->name = g_strdup("dummy");
-            info->type = g_strdup("dummy");
-            info->has_description = false;
-            info->description = NULL;
-            info->default_value = 0;
-            info->has_default_value = 0;
-            QAPI_LIST_PREPEND(prop_list, info);
-            return prop_list;
-        }
-    }
-#endif
-
     klass = module_object_class_by_name(typename);
     if (klass == NULL) {
         error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
@@ -155,15 +134,10 @@ ObjectPropertyInfoList *qmp_device_list_properties(const char *typename,
         return NULL;
     }
 
-    klass = object_class_dynamic_cast(klass, TYPE_DEVICE);
-    if (klass == NULL) {
-        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "typename", TYPE_DEVICE);
-        return NULL;
-    }
-
-    if (object_class_is_abstract(klass)) {
+    if (!object_class_dynamic_cast(klass, TYPE_DEVICE)
+        || object_class_is_abstract(klass)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "typename",
-                   "non-abstract device type");
+                   "non-abstract device type");
         return NULL;
     }
 
@@ -172,7 +146,6 @@ ObjectPropertyInfoList *qmp_device_list_properties(const char *typename,
     object_property_iter_init(&iter, obj);
     while ((prop = object_property_iter_next(&iter))) {
         ObjectPropertyInfo *info;
-        ObjectPropertyInfoList *entry;
 
         /* Skip Object and DeviceState properties */
         if (strcmp(prop->name, "type") == 0 ||
@@ -193,15 +166,10 @@ ObjectPropertyInfoList *qmp_device_list_properties(const char *typename,
         info = g_new0(ObjectPropertyInfo, 1);
         info->name = g_strdup(prop->name);
         info->type = g_strdup(prop->type);
-        info->has_description = !!prop->description;
         info->description = g_strdup(prop->description);
         info->default_value = qobject_ref(prop->defval);
-        info->has_default_value = !!info->default_value;
 
-        entry = g_malloc0(sizeof(*entry));
-        entry->value = info;
-        entry->next = prop_list;
-        prop_list = entry;
+        QAPI_LIST_PREPEND(prop_list, info);
     }
 
     object_unref(obj);
@@ -225,9 +193,9 @@ ObjectPropertyInfoList *qmp_qom_list_properties(const char *typename,
         return NULL;
     }
 
-    klass = object_class_dynamic_cast(klass, TYPE_OBJECT);
-    if (klass == NULL) {
-        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "typename", TYPE_OBJECT);
+    if (!object_class_dynamic_cast(klass, TYPE_OBJECT)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "typename",
+                   "a QOM type");
         return NULL;
     }
 
@@ -239,18 +207,13 @@ ObjectPropertyInfoList *qmp_qom_list_properties(const char *typename,
     }
     while ((prop = object_property_iter_next(&iter))) {
         ObjectPropertyInfo *info;
-        ObjectPropertyInfoList *entry;
 
         info = g_malloc0(sizeof(*info));
         info->name = g_strdup(prop->name);
         info->type = g_strdup(prop->type);
-        info->has_description = !!prop->description;
         info->description = g_strdup(prop->description);
 
-        entry = g_malloc0(sizeof(*entry));
-        entry->value = info;
-        entry->next = prop_list;
-        prop_list = entry;
+        QAPI_LIST_PREPEND(prop_list, info);
     }
 
     object_unref(obj);
@@ -258,30 +221,9 @@ ObjectPropertyInfoList *qmp_qom_list_properties(const char *typename,
     return prop_list;
 }
 
-void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
+void qmp_object_add(ObjectOptions *options, Error **errp)
 {
-    QObject *props;
-    QDict *pdict;
-
-    props = qdict_get(qdict, "props");
-    if (props) {
-        pdict = qobject_to(QDict, props);
-        if (!pdict) {
-            error_setg(errp, QERR_INVALID_PARAMETER_TYPE, "props", "dict");
-            return;
-        }
-        qobject_ref(pdict);
-        qdict_del(qdict, "props");
-        qdict_join(qdict, pdict, false);
-        if (qdict_size(pdict) != 0) {
-            error_setg(errp, "Option in 'props' conflicts with top level");
-            qobject_unref(pdict);
-            return;
-        }
-        qobject_unref(pdict);
-    }
-
-    user_creatable_add_dict(qdict, false, errp);
+    user_creatable_add_qapi(options, errp);
 }
 
 void qmp_object_del(const char *id, Error **errp)
index 945205bd1061da0f88e09d93c744e2fb379585a3..b2e9f4a7127f0e088290b5a5aa6b2a44b61b3da5 100644 (file)
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # object.c
 object_dynamic_cast_assert(const char *type, const char *target, const char *file, int line, const char *func) "%s->%s (%s:%d:%s)"
old mode 100755 (executable)
new mode 100644 (file)
old mode 100755 (executable)
new mode 100644 (file)
index 63c11f4..d650dd7
@@ -75,7 +75,7 @@ if __name__ == '__main__':
            (analyser.locks, analyser.locked, analyser.unlocks))
 
     # Now dump the individual lock stats
-    for key, val in sorted(analyser.mutex_records.iteritems(),
+    for key, val in sorted(analyser.mutex_records.items(),
                            key=lambda k_v: k_v[1]["locks"]):
         print ("Lock: %#x locks: %d, locked: %d, unlocked: %d" %
                (key, val["locks"], val["locked"], val["unlocked"]))
index 14806e18c6e1d91400dc419f3017594c4e200855..45c821de32b3636073e403473529e5055eba2a29 100644 (file)
@@ -46,7 +46,6 @@ grep_include() {
 }
 
 echo Found $(find . -name "*.d" | wc -l) object files
-echo $(grep_include -F 'include/qemu-common.h') files include qemu-common.h
 echo $(grep_include -F 'hw/hw.h') files include hw/hw.h
 echo $(grep_include 'target/[a-z0-9]*/cpu\.h') files include cpu.h
 echo $(grep_include -F 'qapi-types.h') files include qapi-types.h
@@ -86,9 +85,6 @@ analyze() {
 echo osdep.h:
 analyze ../include/qemu/osdep.h
 
-echo qemu-common.h:
-analyze  -include ../include/qemu/osdep.h ../include/qemu-common.h
-
 echo hw/hw.h:
 analyze -include ../include/qemu/osdep.h ../include/hw/hw.h
 
old mode 100755 (executable)
new mode 100644 (file)
index 502ae14..3a7f9cd
@@ -38,13 +38,13 @@ class MigrationFile(object):
         self.file = open(self.filename, "rb")
 
     def read64(self):
-        return int.from_bytes(self.file.read(8), byteorder='big', signed=True)
+        return int.from_bytes(self.file.read(8), byteorder='big', signed=False)
 
     def read32(self):
-        return int.from_bytes(self.file.read(4), byteorder='big', signed=True)
+        return int.from_bytes(self.file.read(4), byteorder='big', signed=False)
 
     def read16(self):
-        return int.from_bytes(self.file.read(2), byteorder='big', signed=True)
+        return int.from_bytes(self.file.read(2), byteorder='big', signed=False)
 
     def read8(self):
         return int.from_bytes(self.file.read(1), byteorder='big', signed=True)
@@ -111,6 +111,8 @@ class RamSection(object):
     RAM_SAVE_FLAG_CONTINUE = 0x20
     RAM_SAVE_FLAG_XBZRLE   = 0x40
     RAM_SAVE_FLAG_HOOK     = 0x80
+    RAM_SAVE_FLAG_COMPRESS_PAGE = 0x100
+    RAM_SAVE_FLAG_MULTIFD_FLUSH = 0x200
 
     def __init__(self, file, version_id, ramargs, section_key):
         if version_id != 4:
@@ -121,6 +123,7 @@ class RamSection(object):
         self.TARGET_PAGE_SIZE = ramargs['page_size']
         self.dump_memory = ramargs['dump_memory']
         self.write_memory = ramargs['write_memory']
+        self.ignore_shared = ramargs['ignore_shared']
         self.sizeinfo = collections.OrderedDict()
         self.data = collections.OrderedDict()
         self.data['section sizes'] = self.sizeinfo
@@ -167,6 +170,8 @@ class RamSection(object):
                         f.truncate(0)
                         f.truncate(len)
                         self.files[self.name] = f
+                    if self.ignore_shared:
+                        mr_addr = self.file.read64()
                 flags &= ~self.RAM_SAVE_FLAG_MEM_SIZE
 
             if flags & self.RAM_SAVE_FLAG_COMPRESS:
@@ -205,6 +210,8 @@ class RamSection(object):
                 raise Exception("XBZRLE RAM compression is not supported yet")
             elif flags & self.RAM_SAVE_FLAG_HOOK:
                 raise Exception("RAM hooks don't make sense with files")
+            if flags & self.RAM_SAVE_FLAG_MULTIFD_FLUSH:
+                continue
 
             # End of RAM section
             if flags & self.RAM_SAVE_FLAG_EOS:
@@ -257,12 +264,41 @@ class HTABSection(object):
 
 
 class ConfigurationSection(object):
-    def __init__(self, file):
+    def __init__(self, file, desc):
         self.file = file
+        self.desc = desc
+        self.caps = []
+
+    def parse_capabilities(self, vmsd_caps):
+        if not vmsd_caps:
+            return
+
+        ncaps = vmsd_caps.data['caps_count'].data
+        self.caps = vmsd_caps.data['capabilities']
+
+        if type(self.caps) != list:
+            self.caps = [self.caps]
+
+        if len(self.caps) != ncaps:
+            raise Exception("Number of capabilities doesn't match "
+                            "caps_count field")
+
+    def has_capability(self, cap):
+        return any([str(c) == cap for c in self.caps])
 
     def read(self):
-        name_len = self.file.read32()
-        name = self.file.readstr(len = name_len)
+        if self.desc:
+            version_id = self.desc['version']
+            section = VMSDSection(self.file, version_id, self.desc,
+                                  'configuration')
+            section.read()
+            self.parse_capabilities(
+                section.data.get("configuration/capabilities"))
+        else:
+            # backward compatibility for older streams that don't have
+            # the configuration section in the json
+            name_len = self.file.read32()
+            name = self.file.readstr(len = name_len)
 
 class VMSDFieldGeneric(object):
     def __init__(self, desc, file):
@@ -284,6 +320,23 @@ class VMSDFieldGeneric(object):
         self.data = self.file.readvar(size)
         return self.data
 
+class VMSDFieldCap(object):
+    def __init__(self, desc, file):
+        self.file = file
+        self.desc = desc
+        self.data = ""
+
+    def __repr__(self):
+        return self.data
+
+    def __str__(self):
+        return self.data
+
+    def read(self):
+        len = self.file.read8()
+        self.data = self.file.readstr(len)
+
+
 class VMSDFieldInt(VMSDFieldGeneric):
     def __init__(self, desc, file):
         super(VMSDFieldInt, self).__init__(desc, file)
@@ -458,6 +511,7 @@ vmsd_field_readers = {
     "unused_buffer" : VMSDFieldGeneric,
     "bitmap" : VMSDFieldGeneric,
     "struct" : VMSDFieldStruct,
+    "capability": VMSDFieldCap,
     "unknown" : VMSDFieldGeneric,
 }
 
@@ -521,6 +575,7 @@ class MigrationDump(object):
         ramargs['page_size'] = self.vmsd_desc['page_size']
         ramargs['dump_memory'] = dump_memory
         ramargs['write_memory'] = write_memory
+        ramargs['ignore_shared'] = False
         self.section_classes[('ram',0)][1] = ramargs
 
         while True:
@@ -528,8 +583,10 @@ class MigrationDump(object):
             if section_type == self.QEMU_VM_EOF:
                 break
             elif section_type == self.QEMU_VM_CONFIGURATION:
-                section = ConfigurationSection(file)
+                config_desc = self.vmsd_desc.get('configuration')
+                section = ConfigurationSection(file, config_desc)
                 section.read()
+                ramargs['ignore_shared'] = section.has_capability('x-ignore-shared')
             elif section_type == self.QEMU_VM_SECTION_START or section_type == self.QEMU_VM_SECTION_FULL:
                 section_id = file.read32()
                 name = file.readstr()
@@ -588,7 +645,7 @@ if args.extract:
 
     dump.read(desc_only = True)
     print("desc.json")
-    f = open("desc.json", "wb")
+    f = open("desc.json", "w")
     f.truncate()
     f.write(jsonenc.encode(dump.vmsd_desc))
     f.close()
@@ -596,7 +653,7 @@ if args.extract:
     dump.read(write_memory = True)
     dict = dump.getDict()
     print("state.json")
-    f = open("state.json", "wb")
+    f = open("state.json", "w")
     f.truncate()
     f.write(jsonenc.encode(dict))
     f.close()
@@ -610,4 +667,4 @@ elif args.dump == "desc":
     dump.read(desc_only = True)
     print(jsonenc.encode(dump.vmsd_desc))
 else:
-    raise Exception("Please specify either -x, -d state or -d dump")
+    raise Exception("Please specify either -x, -d state or -d desc")
index c6169db69f51cdd39fc40e89231abd844751dab5..65af8063e4bddc89ea4bdf05716ebc9ed108e7fb 100755 (executable)
@@ -26,8 +26,7 @@ sub_file="${sub_tdir}/submodule.tar"
 # independent of what the developer currently has initialized
 # in their checkout, because the build environment is completely
 # different to the host OS.
-submodules="dtc slirp meson ui/keycodemapdb"
-submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloat-3"
+subprojects="keycodemapdb libvfio-user berkeley-softfloat-3 berkeley-testfloat-3"
 sub_deinit=""
 
 function cleanup() {
@@ -51,23 +50,11 @@ function tree_ish() {
 
 git archive --format tar "$(tree_ish)" > "$tar_file"
 test $? -ne 0 && error "failed to archive qemu"
-for sm in $submodules; do
-    status="$(git submodule status "$sm")"
-    smhash="${status#[ +-]}"
-    smhash="${smhash%% *}"
-    case "$status" in
-        -*)
-            sub_deinit="$sub_deinit $sm"
-            git submodule update --init "$sm"
-            test $? -ne 0 && error "failed to update submodule $sm"
-            ;;
-        +*)
-            echo "WARNING: submodule $sm is out of sync"
-            ;;
-    esac
-    (cd $sm; git archive --format tar --prefix "$sm/" $(tree_ish)) > "$sub_file"
-    test $? -ne 0 && error "failed to archive submodule $sm ($smhash)"
-    tar --concatenate --file "$tar_file" "$sub_file"
-    test $? -ne 0 && error "failed append submodule $sm to $tar_file"
+
+for sp in $subprojects; do
+    meson subprojects download $sp
+    test $? -ne 0 && error "failed to download subproject $sp"
+    tar --append --file "$tar_file" --exclude=.git subprojects/$sp
+    test $? -ne 0 && error "failed to append subproject $sp to $tar_file"
 done
 exit 0
index 0461fd1c459cf2df41db093d5ac01879b0d853d1..28685d190d9a8ce0c5252f51b3add244849b4295 100644 (file)
@@ -2,7 +2,7 @@
 """Generate coroutine wrappers for block subsystem.
 
 The program parses one or several concatenated c files from stdin,
-searches for functions with the 'generated_co_wrapper' specifier
+searches for functions with the 'co_wrapper' specifier
 and generates corresponding wrappers on stdout.
 
 Usage: block-coroutine-wrapper.py generated-file.c FILE.[ch]...
@@ -42,7 +42,9 @@ def gen_header():
 #include "qemu/osdep.h"
 #include "block/coroutines.h"
 #include "block/block-gen.h"
-#include "block/block_int.h"\
+#include "block/block_int.h"
+#include "block/dirty-bitmap.h"
+#include "block/qapi.h"
 """
 
 
@@ -62,10 +64,58 @@ class ParamDecl:
 
 
 class FuncDecl:
-    def __init__(self, return_type: str, name: str, args: str) -> None:
+    def __init__(self, wrapper_type: str, return_type: str, name: str,
+                 args: str, variant: str) -> None:
         self.return_type = return_type.strip()
         self.name = name.strip()
+        self.struct_name = snake_to_camel(self.name)
         self.args = [ParamDecl(arg.strip()) for arg in args.split(',')]
+        self.create_only_co = 'mixed' not in variant
+        self.graph_rdlock = 'bdrv_rdlock' in variant
+        self.graph_wrlock = 'bdrv_wrlock' in variant
+
+        self.wrapper_type = wrapper_type
+
+        if wrapper_type == 'co':
+            if self.graph_wrlock:
+                raise ValueError(f"co function can't be wrlock: {self.name}")
+            subsystem, subname = self.name.split('_', 1)
+            self.target_name = f'{subsystem}_co_{subname}'
+        else:
+            assert wrapper_type == 'no_co'
+            subsystem, co_infix, subname = self.name.split('_', 2)
+            if co_infix != 'co':
+                raise ValueError(f"Invalid no_co function name: {self.name}")
+            if not self.create_only_co:
+                raise ValueError(f"no_co function can't be mixed: {self.name}")
+            if self.graph_rdlock and self.graph_wrlock:
+                raise ValueError("function can't be both rdlock and wrlock: "
+                                 f"{self.name}")
+            self.target_name = f'{subsystem}_{subname}'
+
+        self.ctx = self.gen_ctx()
+
+        self.get_result = 's->ret = '
+        self.ret = 'return s.ret;'
+        self.co_ret = 'return '
+        self.return_field = self.return_type + " ret;"
+        if self.return_type == 'void':
+            self.get_result = ''
+            self.ret = ''
+            self.co_ret = ''
+            self.return_field = ''
+
+    def gen_ctx(self, prefix: str = '') -> str:
+        t = self.args[0].type
+        name = self.args[0].name
+        if t == 'BlockDriverState *':
+            return f'bdrv_get_aio_context({prefix}{name})'
+        elif t == 'BdrvChild *':
+            return f'bdrv_get_aio_context({prefix}{name}->bs)'
+        elif t == 'BlockBackend *':
+            return f'blk_get_aio_context({prefix}{name})'
+        else:
+            return 'qemu_get_aio_context()'
 
     def gen_list(self, format: str) -> str:
         return ', '.join(format.format_map(arg.__dict__) for arg in self.args)
@@ -74,17 +124,22 @@ class FuncDecl:
         return '\n'.join(format.format_map(arg.__dict__) for arg in self.args)
 
 
-# Match wrappers declared with a generated_co_wrapper mark
-func_decl_re = re.compile(r'^int\s*generated_co_wrapper\s*'
+# Match wrappers declared with a co_wrapper mark
+func_decl_re = re.compile(r'^(?P<return_type>[a-zA-Z][a-zA-Z0-9_]* [\*]?)'
+                          r'(\s*coroutine_fn)?'
+                          r'\s*(?P<wrapper_type>(no_)?co)_wrapper'
+                          r'(?P<variant>(_[a-z][a-z0-9_]*)?)\s*'
                           r'(?P<wrapper_name>[a-z][a-z0-9_]*)'
                           r'\((?P<args>[^)]*)\);$', re.MULTILINE)
 
 
 def func_decl_iter(text: str) -> Iterator:
     for m in func_decl_re.finditer(text):
-        yield FuncDecl(return_type='int',
+        yield FuncDecl(wrapper_type=m.group('wrapper_type'),
+                       return_type=m.group('return_type'),
                        name=m.group('wrapper_name'),
-                       args=m.group('args'))
+                       args=m.group('args'),
+                       variant=m.group('variant'))
 
 
 def snake_to_camel(func_name: str) -> str:
@@ -97,15 +152,76 @@ def snake_to_camel(func_name: str) -> str:
     return ''.join(words)
 
 
-def gen_wrapper(func: FuncDecl) -> str:
-    assert func.name.startswith('bdrv_')
-    assert not func.name.startswith('bdrv_co_')
-    assert func.return_type == 'int'
-    assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *']
+def create_mixed_wrapper(func: FuncDecl) -> str:
+    """
+    Checks if we are already in coroutine
+    """
+    name = func.target_name
+    struct_name = func.struct_name
+    graph_assume_lock = 'assume_graph_lock();' if func.graph_rdlock else ''
+
+    return f"""\
+{func.return_type} {func.name}({ func.gen_list('{decl}') })
+{{
+    if (qemu_in_coroutine()) {{
+        {graph_assume_lock}
+        {func.co_ret}{name}({ func.gen_list('{name}') });
+    }} else {{
+        {struct_name} s = {{
+            .poll_state.ctx = {func.ctx},
+            .poll_state.in_progress = true,
+
+{ func.gen_block('            .{name} = {name},') }
+        }};
+
+        s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
+
+        bdrv_poll_co(&s.poll_state);
+        {func.ret}
+    }}
+}}"""
+
+
+def create_co_wrapper(func: FuncDecl) -> str:
+    """
+    Assumes we are not in coroutine, and creates one
+    """
+    name = func.target_name
+    struct_name = func.struct_name
+    return f"""\
+{func.return_type} {func.name}({ func.gen_list('{decl}') })
+{{
+    {struct_name} s = {{
+        .poll_state.ctx = {func.ctx},
+        .poll_state.in_progress = true,
+
+{ func.gen_block('        .{name} = {name},') }
+    }};
+    assert(!qemu_in_coroutine());
+
+    s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
+
+    bdrv_poll_co(&s.poll_state);
+    {func.ret}
+}}"""
+
+
+def gen_co_wrapper(func: FuncDecl) -> str:
+    assert not '_co_' in func.name
+    assert func.wrapper_type == 'co'
 
-    name = 'bdrv_co_' + func.name[5:]
-    bs = 'bs' if func.args[0].type == 'BlockDriverState *' else 'child->bs'
-    struct_name = snake_to_camel(name)
+    name = func.target_name
+    struct_name = func.struct_name
+
+    graph_lock=''
+    graph_unlock=''
+    if func.graph_rdlock:
+        graph_lock='    bdrv_graph_co_rdlock();'
+        graph_unlock='    bdrv_graph_co_rdunlock();'
+
+    creation_function = create_mixed_wrapper
+    if func.create_only_co:
+        creation_function = create_co_wrapper
 
     return f"""\
 /*
@@ -114,6 +230,7 @@ def gen_wrapper(func: FuncDecl) -> str:
 
 typedef struct {struct_name} {{
     BdrvPollCo poll_state;
+    {func.return_field}
 { func.gen_block('    {decl};') }
 }} {struct_name};
 
@@ -121,28 +238,70 @@ static void coroutine_fn {name}_entry(void *opaque)
 {{
     {struct_name} *s = opaque;
 
-    s->poll_state.ret = {name}({ func.gen_list('s->{name}') });
+{graph_lock}
+    {func.get_result}{name}({ func.gen_list('s->{name}') });
+{graph_unlock}
     s->poll_state.in_progress = false;
 
     aio_wait_kick();
 }}
 
-int {func.name}({ func.gen_list('{decl}') })
+{creation_function(func)}"""
+
+
+def gen_no_co_wrapper(func: FuncDecl) -> str:
+    assert '_co_' in func.name
+    assert func.wrapper_type == 'no_co'
+
+    name = func.target_name
+    struct_name = func.struct_name
+
+    graph_lock=''
+    graph_unlock=''
+    if func.graph_rdlock:
+        graph_lock='    bdrv_graph_rdlock_main_loop();'
+        graph_unlock='    bdrv_graph_rdunlock_main_loop();'
+    elif func.graph_wrlock:
+        graph_lock='    bdrv_graph_wrlock(NULL);'
+        graph_unlock='    bdrv_graph_wrunlock(NULL);'
+
+    return f"""\
+/*
+ * Wrappers for {name}
+ */
+
+typedef struct {struct_name} {{
+    Coroutine *co;
+    {func.return_field}
+{ func.gen_block('    {decl};') }
+}} {struct_name};
+
+static void {name}_bh(void *opaque)
 {{
-    if (qemu_in_coroutine()) {{
-        return {name}({ func.gen_list('{name}') });
-    }} else {{
-        {struct_name} s = {{
-            .poll_state.bs = {bs},
-            .poll_state.in_progress = true,
+    {struct_name} *s = opaque;
+    AioContext *ctx = {func.gen_ctx('s->')};
 
-{ func.gen_block('            .{name} = {name},') }
-        }};
+{graph_lock}
+    aio_context_acquire(ctx);
+    {func.get_result}{name}({ func.gen_list('s->{name}') });
+    aio_context_release(ctx);
+{graph_unlock}
 
-        s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
+    aio_co_wake(s->co);
+}}
 
-        return bdrv_poll_co(&s.poll_state);
-    }}
+{func.return_type} coroutine_fn {func.name}({ func.gen_list('{decl}') })
+{{
+    {struct_name} s = {{
+        .co = qemu_coroutine_self(),
+{ func.gen_block('        .{name} = {name},') }
+    }};
+    assert(qemu_in_coroutine());
+
+    aio_bh_schedule_oneshot(qemu_get_aio_context(), {name}_bh, &s);
+    qemu_coroutine_yield();
+
+    {func.ret}
 }}"""
 
 
@@ -150,7 +309,10 @@ def gen_wrappers(input_code: str) -> str:
     res = ''
     for func in func_decl_iter(input_code):
         res += '\n\n\n'
-        res += gen_wrapper(func)
+        if func.wrapper_type == 'co':
+            res += gen_co_wrapper(func)
+        else:
+            res += gen_no_co_wrapper(func)
 
     return res
 
old mode 100755 (executable)
new mode 100644 (file)
index 88c858f..6e4100d
@@ -12,7 +12,7 @@ use Term::ANSIColor qw(:constants);
 my $P = $0;
 $P =~ s@.*/@@g;
 
-our $SrcFile    = qr{\.(?:h|c|cpp|s|S|pl|py|sh)$};
+our $SrcFile    = qr{\.(?:(h|c)(\.inc)?|cpp|s|S|pl|py|sh)$};
 
 my $V = '0.31';
 
@@ -223,11 +223,11 @@ our $Sparse       = qr{
 our $Attribute = qr{
                        const|
                        volatile|
-                       QEMU_NORETURN|
-                       QEMU_WARN_UNUSED_RESULT|
-                       QEMU_SENTINEL|
+                       G_NORETURN|
+                       G_GNUC_WARN_UNUSED_RESULT|
+                       G_GNUC_NULL_TERMINATED|
                        QEMU_PACKED|
-                       GCC_FMT_ATTR
+                       G_GNUC_PRINTF
                  }x;
 our $Modifier;
 our $Inline    = qr{inline};
@@ -307,7 +307,6 @@ our @typeList = (
        qr{target_(?:u)?long},
        qr{hwaddr},
         # external libraries
-       qr{xml${Ident}},
        qr{xen\w+_handle},
        # Glib definitions
        qr{gchar},
@@ -399,7 +398,12 @@ if ($chk_branch) {
        my $num_patches = @patches;
        for my $hash (@patches) {
                my $FILE;
-               open($FILE, '-|', "git", "show", $hash) ||
+               open($FILE, '-|', "git",
+                     "-c", "diff.renamelimit=0",
+                     "-c", "diff.renames=True",
+                     "-c", "diff.algorithm=histogram",
+                     "show",
+                     "--patch-with-stat", $hash) ||
                        die "$P: git show $hash - $!\n";
                while (<$FILE>) {
                        chomp;
@@ -462,7 +466,7 @@ sub top_of_kernel_tree {
        my @tree_check = (
                "COPYING", "MAINTAINERS", "Makefile",
                "README.rst", "docs", "VERSION",
-               "linux-user", "softmmu"
+               "linux-user", "system"
        );
 
        foreach my $check (@tree_check) {
@@ -1499,7 +1503,7 @@ sub process {
                        $is_patch = 1;
                }
 
-               if ($line =~ /^Author: .*via Qemu-devel.*<qemu-devel\@nongnu.org>/) {
+               if ($line =~ /^(Author|From): .* via .*<qemu-devel\@nongnu.org>/) {
                    ERROR("Author email address is mangled by the mailing list\n" . $herecurr);
                }
 
@@ -1530,7 +1534,10 @@ sub process {
                    ($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ ||
                     $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ ||
                     ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ &&
-                     (defined($1) || defined($2))))) {
+                     (defined($1) || defined($2)))) &&
+                      !(($realfile ne '') &&
+                        defined($acpi_testexpected) &&
+                        ($realfile eq $acpi_testexpected))) {
                        $reported_maintainer_file = 1;
                        WARN("added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr);
                }
@@ -1614,7 +1621,7 @@ sub process {
                                my $hex =
                                        qr/%[-+ *.0-9]*([hljztL]|ll|hh)?(x|X|"\s*PRI[xX][^"]*"?)/;
 
-                               # don't consider groups splitted by [.:/ ], like 2A.20:12ab
+                               # don't consider groups split by [.:/ ], like 2A.20:12ab
                                my $tmpline = $rawline;
                                $tmpline =~ s/($hex[.:\/ ])+$hex//g;
 
@@ -1660,6 +1667,7 @@ sub process {
 # some scripts we imported from other projects.
                next if ($realfile =~ /\.(s|S)$/);
                next if ($realfile =~ /(checkpatch|get_maintainer)\.pl$/);
+               next if ($realfile =~ /^target\/hexagon\/imported\/*/);
 
                if ($rawline =~ /^\+.*\t/) {
                        my $herevet = "$here\n" . cat_vet($rawline) . "\n";
@@ -1668,13 +1676,15 @@ sub process {
                }
 
 # check we are in a valid C source file if not then ignore this hunk
-               next if ($realfile !~ /\.(h|c|cpp)$/);
+               next if ($realfile !~ /\.((h|c)(\.inc)?|cpp)$/);
 
 # Block comment styles
 
                # Block comments use /* on a line of its own
-               if ($rawline !~ m@^\+.*/\*.*\*/[ \t)}]*$@ &&    #inline /*...*/
-                   $rawline =~ m@^\+.*/\*\*?+[ \t]*[^ \t]@) { # /* or /** non-blank
+               my $commentline = $rawline;
+               while ($commentline =~ s@^(\+.*)/\*.*\*/@$1@o) { # remove inline /*...*/
+               }
+               if ($commentline =~ m@^\+.*/\*\*?+[ \t]*[^ \t]@) { # /* or /** non-blank
                        WARN("Block comments use a leading /* on a separate line\n" . $herecurr);
                }
 
@@ -2824,8 +2834,8 @@ sub process {
                }
 
 # check for pointless casting of g_malloc return
-               if ($line =~ /\*\s*\)\s*g_(try)?(m|re)alloc(0?)(_n)?\b/) {
-                       if ($2 == 'm') {
+               if ($line =~ /\*\s*\)\s*g_(try|)(m|re)alloc(0?)(_n)?\b/) {
+                       if ($2 eq 'm') {
                                ERROR("unnecessary cast may hide bugs, use g_$1new$3 instead\n" . $herecurr);
                        } else {
                                ERROR("unnecessary cast may hide bugs, use g_$1renew$3 instead\n" . $herecurr);
@@ -2842,6 +2852,11 @@ sub process {
                        WARN("consider using g_path_get_$1() in preference to g_strdup($1())\n" . $herecurr);
                }
 
+# enforce g_memdup2() over g_memdup()
+               if ($line =~ /\bg_memdup\s*\(/) {
+                       ERROR("use g_memdup2() instead of unsafe g_memdup()\n" . $herecurr);
+               }
+
 # recommend qemu_strto* over strto* for numeric conversions
                if ($line =~ /\b(strto[^kd].*?)\s*\(/) {
                        ERROR("consider using qemu_$1 in preference to $1\n" . $herecurr);
@@ -2850,6 +2865,14 @@ sub process {
                if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) {
                        ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr);
                }
+# recommend qemu_bh_new_guarded instead of qemu_bh_new
+        if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) {
+                       ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr);
+               }
+# recommend aio_bh_new_guarded instead of aio_bh_new
+        if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) {
+                       ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr);
+               }
 # check for module_init(), use category-specific init macros explicitly please
                if ($line =~ /^module_init\s*\(/) {
                        ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
@@ -2870,6 +2893,7 @@ sub process {
                                SCSIBusInfo|
                                SCSIReqOps|
                                Spice[A-Z][a-zA-Z0-9]*Interface|
+                               TypeInfo|
                                USBDesc[A-Z][a-zA-Z0-9]*|
                                VhostOps|
                                VMStateDescription|
@@ -2961,10 +2985,13 @@ sub process {
                        ERROR("use memset() instead of bzero()\n" . $herecurr);
                }
                if ($line =~ /\bgetpagesize\(\)/) {
-                       ERROR("use qemu_real_host_page_size instead of getpagesize()\n" . $herecurr);
+                       ERROR("use qemu_real_host_page_size() instead of getpagesize()\n" . $herecurr);
                }
                if ($line =~ /\bsysconf\(_SC_PAGESIZE\)/) {
-                       ERROR("use qemu_real_host_page_size instead of sysconf(_SC_PAGESIZE)\n" . $herecurr);
+                       ERROR("use qemu_real_host_page_size() instead of sysconf(_SC_PAGESIZE)\n" . $herecurr);
+               }
+               if ($line =~ /\b(g_)?assert\(0\)/) {
+                       ERROR("use g_assert_not_reached() instead of assert(0)\n" . $herecurr);
                }
                my $non_exit_glib_asserts = qr{g_assert_cmpstr|
                                                g_assert_cmpint|
diff --git a/scripts/ci/coverage-summary.sh b/scripts/ci/coverage-summary.sh
new file mode 100755 (executable)
index 0000000..8d9fb4d
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# Author: Alex Bennée <alex.bennee@linaro.org>
+#
+# Summerise the state of code coverage with gcovr and tweak the output
+# to be more sane on CI runner. As we expect to be executed on a
+# throw away CI instance we do spam temp files all over the shop. You
+# most likely don't want to execute this script but just call gcovr
+# directly. See also "make coverage-report"
+#
+# This code is licensed under the GPL version 2 or later.  See
+# the COPYING file in the top-level directory.
+
+# first generate the coverage report
+gcovr -p -o raw-report.txt
+
+# strip the full-path and line markers
+sed s@$PWD\/@@ raw-report.txt | sed s/[0-9]\*[,-]//g > simplified.txt
+
+# reflow lines that got split
+awk '/.[ch]$/ { printf("%s", $0); next } 1' simplified.txt > rejoined.txt
+
+# columnify
+column -t rejoined.txt > final.txt
+
+# and dump, stripping out 0% coverage
+grep -v "0%" final.txt
diff --git a/scripts/ci/gitlab-kubernetes-runners/values.yaml b/scripts/ci/gitlab-kubernetes-runners/values.yaml
new file mode 100644 (file)
index 0000000..204a96a
--- /dev/null
@@ -0,0 +1,30 @@
+gitlabUrl: "https://gitlab.com/"
+runnerRegistrationToken: ""
+rbac:
+  create: true
+concurrent: 200
+runners:
+  privileged: true
+  config: |
+    [[runners]]
+      limit = 100
+      environment = [
+        "DOCKER_HOST=tcp://docker:2376",
+        "DOCKER_TLS_CERTDIR=/certs",
+        "DOCKER_TLS_VERIFY=1",
+        "DOCKER_CERT_PATH=/certs/client"
+      ]
+      [runners.kubernetes]
+        poll_timeout = 1200
+        image = "ubuntu:20.04"
+        cpu_request = "0.5"
+        service_cpu_request = "0.5"
+        helper_cpu_request = "0.25"
+        cpu_request_overwrite_max_allowed = "7"
+        memory_request_overwrite_max_allowed = "30Gi"
+      [[runners.kubernetes.volumes.empty_dir]]
+        name = "docker-certs"
+        mount_path = "/certs/client"
+        medium = "Memory"
+      [runners.kubernetes.node_selector]
+        agentpool = "jobs"
old mode 100755 (executable)
new mode 100644 (file)
index 78e72f6..e3343b0
@@ -28,7 +28,7 @@ class CommunicationFailure(Exception):
 
 
 class NoPipelineFound(Exception):
-    """Communication is successfull but pipeline is not found."""
+    """Communication is successful but pipeline is not found."""
 
 
 def get_local_branch_commit(branch):
@@ -48,24 +48,35 @@ def get_local_branch_commit(branch):
     return result
 
 
-def get_pipeline_status(project_id, commit_sha1):
+def get_json_http_response(url):
     """
-    Returns the JSON content of the pipeline status API response
+    Returns the JSON content of an HTTP GET request to gitlab.com
     """
-    url = '/api/v4/projects/{}/pipelines?sha={}'.format(project_id,
-                                                        commit_sha1)
     connection = http.client.HTTPSConnection('gitlab.com')
     connection.request('GET', url=url)
     response = connection.getresponse()
     if response.code != http.HTTPStatus.OK:
-        raise CommunicationFailure("Failed to receive a successful response")
-    json_response = json.loads(response.read())
+        msg = "Received unsuccessful response: %s (%s)" % (response.code,
+                                                           response.reason)
+        raise CommunicationFailure(msg)
+    return json.loads(response.read())
+
+
+def get_pipeline_status(project_id, commit_sha1):
+    """
+    Returns the JSON content of the pipeline status API response
+    """
+    url = '/api/v4/projects/{}/pipelines?sha={}'.format(project_id,
+                                                        commit_sha1)
+    json_response = get_json_http_response(url)
 
     # As far as I can tell, there should be only one pipeline for the same
     # project + commit. If this assumption is false, we can add further
     # filters to the url, such as username, and order_by.
     if not json_response:
-        raise NoPipelineFound("No pipeline found")
+        msg = "No pipeline found for project %s and commit %s" % (project_id,
+                                                                  commit_sha1)
+        raise NoPipelineFound(msg)
     return json_response[0]
 
 
diff --git a/scripts/ci/org.centos/stream/8/build-environment.yml b/scripts/ci/org.centos/stream/8/build-environment.yml
new file mode 100644 (file)
index 0000000..1ead77e
--- /dev/null
@@ -0,0 +1,82 @@
+---
+- name: Installation of extra packages to build QEMU
+  hosts: all
+  tasks:
+    - name: Extra check for CentOS Stream 8
+      lineinfile:
+        path: /etc/redhat-release
+        line: CentOS Stream release 8
+        state: present
+      check_mode: yes
+      register: centos_stream_8
+
+    - name: Enable EPEL repo on CentOS Stream 8
+      dnf:
+        name:
+          - epel-release
+        state: present
+      when:
+        - centos_stream_8
+
+    - name: Enable PowerTools repo on CentOS Stream 8
+      ini_file:
+        path: /etc/yum.repos.d/CentOS-Stream-PowerTools.repo
+        section: powertools
+        option: enabled
+        value: "1"
+      when:
+        - centos_stream_8
+
+    - name: Install basic packages to build QEMU on CentOS Stream 8
+      dnf:
+        name:
+          - bzip2
+          - bzip2-devel
+          - capstone-devel
+          - dbus-daemon
+          - device-mapper-multipath-devel
+          - diffutils
+          - gcc
+          - gcc-c++
+          - genisoimage
+          - gettext
+          - git
+          - glib2-devel
+          - glusterfs-api-devel
+          - gnutls-devel
+          - libaio-devel
+          - libcap-ng-devel
+          - libcurl-devel
+          - libepoxy-devel
+          - libfdt-devel
+          - libgcrypt-devel
+          - libiscsi-devel
+          - libpmem-devel
+          - librados-devel
+          - librbd-devel
+          - libseccomp-devel
+          - libslirp-devel
+          - libssh-devel
+          - libxkbcommon-devel
+          - lzo-devel
+          - make
+          - mesa-libEGL-devel
+          - nettle-devel
+          - ninja-build
+          - nmap-ncat
+          - numactl-devel
+          - pixman-devel
+          - python38
+          - python3-sphinx
+          - rdma-core-devel
+          - redhat-rpm-config
+          - snappy-devel
+          - spice-glib-devel
+          - spice-server-devel
+          - systemd-devel
+          - systemtap-sdt-devel
+          - tar
+          - zlib-devel
+        state: present
+      when:
+        - centos_stream_8
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
new file mode 100755 (executable)
index 0000000..76781f1
--- /dev/null
@@ -0,0 +1,199 @@
+#!/bin/sh -e
+#
+# Configuration for QEMU based on CentOS Stream 8 x86_64 builds
+#
+# The "configure" command line is based on:
+#
+# https://git.centos.org/rpms/qemu-kvm/blob/c8s-stream-rhel/f/SPECS/qemu-kvm.spec
+#
+# But, because the SPEC file contains a number of conditionals and
+# variable and expansions only available at RPM build time, this version
+# was initially generated from an actual RPM build on an x86_64 platform.
+#
+# From that initial version, options that are required or are a
+# consequence of non-upstream patches have been adapted.  One example
+# is "--without-default-devices" which is *not* present here, given
+# that patches adding downstream specific devices are not available.
+#
+../configure \
+--python=/usr/bin/python3.8 \
+--prefix="/usr" \
+--libdir="/usr/lib64" \
+--datadir="/usr/share" \
+--sysconfdir="/etc" \
+--interp-prefix=/usr/qemu-%M \
+--localstatedir="/var" \
+--docdir="/usr/share/doc" \
+--libexecdir="/usr/libexec" \
+--extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \
+--extra-cflags="-O2 -g -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions -fstack-protector-strong -grecord-gcc-switches -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection" \
+--with-suffix="qemu-kvm" \
+--firmwarepath=/usr/share/qemu-firmware \
+--target-list="x86_64-softmmu" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--audio-drv-list="" \
+--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
+--with-coroutine=ucontext \
+--tls-priority=@QEMU,SYSTEM \
+--disable-af-xdp \
+--disable-attr \
+--disable-auth-pam \
+--disable-avx2 \
+--disable-avx512f \
+--disable-bochs \
+--disable-bpf \
+--disable-brlapi \
+--disable-bsd-user \
+--disable-bzip2 \
+--disable-cap-ng \
+--disable-capstone \
+--disable-cfi \
+--disable-cfi-debug \
+--disable-cloop \
+--disable-cocoa \
+--disable-coroutine-pool \
+--disable-crypto-afalg \
+--disable-curl \
+--disable-curses \
+--disable-debug-info \
+--disable-debug-mutex \
+--disable-debug-tcg \
+--disable-dmg \
+--disable-docs \
+--disable-fuse \
+--disable-fuse-lseek \
+--disable-gcrypt \
+--disable-gio \
+--disable-glusterfs \
+--disable-gnutls \
+--disable-gtk \
+--disable-guest-agent \
+--disable-guest-agent-msi \
+--disable-hvf \
+--disable-iconv \
+--disable-kvm \
+--disable-libdaxctl \
+--disable-libiscsi \
+--disable-libnfs \
+--disable-libpmem \
+--disable-libssh \
+--disable-libudev \
+--disable-libusb \
+--disable-linux-aio \
+--disable-linux-io-uring \
+--disable-linux-user \
+--disable-live-block-migration \
+--disable-lto \
+--disable-lzfse \
+--disable-lzo \
+--disable-malloc-trim \
+--disable-membarrier \
+--disable-modules \
+--disable-module-upgrades \
+--disable-mpath \
+--disable-multiprocess \
+--disable-netmap \
+--disable-nettle \
+--disable-numa \
+--disable-nvmm \
+--disable-opengl \
+--disable-parallels \
+--disable-pie \
+--disable-pvrdma \
+--disable-qcow1 \
+--disable-qed \
+--disable-qom-cast-debug \
+--disable-rbd \
+--disable-rdma \
+--disable-replication \
+--disable-rng-none \
+--disable-safe-stack \
+--disable-sanitizers \
+--disable-sdl \
+--disable-sdl-image \
+--disable-seccomp \
+--disable-slirp-smbd \
+--disable-smartcard \
+--disable-snappy \
+--disable-sparse \
+--disable-spice \
+--disable-strip \
+--disable-system \
+--disable-tcg \
+--disable-tools \
+--disable-tpm \
+--disable-u2f \
+--disable-usb-redir \
+--disable-user \
+--disable-vde \
+--disable-vdi \
+--disable-vhost-crypto \
+--disable-vhost-kernel \
+--disable-vhost-net \
+--disable-vhost-user \
+--disable-vhost-user-blk-server \
+--disable-vhost-vdpa \
+--disable-virglrenderer \
+--disable-virtfs \
+--disable-vnc \
+--disable-vnc-jpeg \
+--disable-png \
+--disable-vnc-sasl \
+--disable-vte \
+--disable-vvfat \
+--disable-werror \
+--disable-whpx \
+--disable-xen \
+--disable-xen-pci-passthrough \
+--disable-xkbcommon \
+--disable-zstd \
+--enable-attr \
+--enable-avx2 \
+--enable-cap-ng \
+--enable-capstone \
+--enable-coroutine-pool \
+--enable-curl \
+--enable-debug-info \
+--enable-docs \
+--enable-fdt \
+--enable-gcrypt \
+--enable-glusterfs \
+--enable-gnutls \
+--enable-guest-agent \
+--enable-iconv \
+--enable-kvm \
+--enable-libiscsi \
+--enable-libpmem \
+--enable-libssh \
+--enable-libusb \
+--enable-libudev \
+--enable-linux-aio \
+--enable-lzo \
+--enable-malloc-trim \
+--enable-modules \
+--enable-mpath \
+--enable-numa \
+--enable-opengl \
+--enable-pie \
+--enable-rbd \
+--enable-rdma \
+--enable-seccomp \
+--enable-snappy \
+--enable-smartcard \
+--enable-spice \
+--enable-system \
+--enable-tcg \
+--enable-tools \
+--enable-tpm \
+--enable-trace-backends=dtrace \
+--enable-usb-redir \
+--enable-vhost-kernel \
+--enable-vhost-net \
+--enable-vhost-user \
+--enable-vhost-user-blk-server \
+--enable-vhost-vdpa \
+--enable-vnc \
+--enable-png \
+--enable-vnc-sasl \
+--enable-werror \
+--enable-xkbcommon
diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado
new file mode 100644 (file)
index 0000000..73e7a1a
--- /dev/null
@@ -0,0 +1,65 @@
+#!/bin/sh -e
+#
+# Runs a previously vetted list of tests, either marked explicitly for
+# KVM and x86_64, or tests that are generic enough to be valid for all
+# targets. Such a test list can be generated with:
+#
+# ./pyvenv/bin/avocado list --filter-by-tags-include-empty \
+#   --filter-by-tags-include-empty-key -t accel:kvm,arch:x86_64 \
+#   tests/avocado/
+#
+# This is almost the complete list of avocado based tests available at
+# the time this was compile, with the following exceptions:
+#
+# * Require machine type "x-remote":
+#   - tests/avocado/multiprocess.py:Multiprocess.test_multiprocess_x86_64
+#
+# * Requires display type "egl-headless":
+#   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_virtio_vga_virgl
+#   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_vhost_user_vga_virgl
+#
+#  * Test is marked (unconditionally) to be skipped:
+#   - tests/avocado/virtio_check_params.py:VirtioMaxSegSettingsCheck.test_machine_types
+#
+make get-vm-images
+./pyvenv/bin/avocado run \
+    --job-results-dir=tests/results/ \
+    tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_i440fx_kvm \
+    tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_q35_kvm \
+    tests/avocado/boot_linux_console.py:BootLinuxConsole.test_x86_64_pc \
+    tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \
+    tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \
+    tests/avocado/hotplug_cpu.py:HotPlugCPU.test \
+    tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb \
+    tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb_nomsi \
+    tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_pt \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict_cm \
+    tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_exit_error_msg_with_linux_v3_6 \
+    tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_work_with_linux_v4_16 \
+    tests/avocado/migration.py:Migration.test_migration_with_exec \
+    tests/avocado/migration.py:Migration.test_migration_with_tcp_localhost \
+    tests/avocado/migration.py:Migration.test_migration_with_unix \
+    tests/avocado/pc_cpu_hotplug_props.py:OmittedCPUProps.test_no_die_id \
+    tests/avocado/replay_kernel.py:ReplayKernelNormal.test_x86_64_pc \
+    tests/avocado/reverse_debugging.py:ReverseDebugging_X86_64.test_x86_64_pc \
+    tests/avocado/version.py:Version.test_qmp_human_info_version \
+    tests/avocado/virtio_version.py:VirtioVersionCheck.test_conventional_devs \
+    tests/avocado/virtio_version.py:VirtioVersionCheck.test_modern_only_devs \
+    tests/avocado/vnc.py:Vnc.test_change_password \
+    tests/avocado/vnc.py:Vnc.test_change_password_requires_a_password \
+    tests/avocado/vnc.py:Vnc.test_no_vnc \
+    tests/avocado/vnc.py:Vnc.test_no_vnc_change_password \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_1 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_set_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_unset_4_1 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_set_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_unset_4_1 \
+    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_0_alias_compatibility \
+    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_1_alias \
+    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_none_alias
diff --git a/scripts/ci/org.centos/stream/README b/scripts/ci/org.centos/stream/README
new file mode 100644 (file)
index 0000000..e3eadfe
--- /dev/null
@@ -0,0 +1,17 @@
+This directory contains scripts for generating a build of QEMU that
+closely matches the CentOS Stream[1] builds of the qemu-kvm package.
+
+To have the environment ready to configure, build QEMU and run tests,
+please start with a CentOS Stream machine and:
+
+ * apply the generic "build-environment.yml" playbook located at
+   scripts/ci/setup
+
+ * apply the "build-environment.yml" in the directory following the
+   CentOS Stream version (such as "8").
+
+This currently only covers CentOS Stream 8 environments and
+packages[2].
+
+[1] https://www.centos.org/centos-stream/
+[2] https://git.centos.org/rpms/qemu-kvm/commits/c8s-stream-rhel
diff --git a/scripts/ci/setup/.gitignore b/scripts/ci/setup/.gitignore
new file mode 100644 (file)
index 0000000..f4a6183
--- /dev/null
@@ -0,0 +1,2 @@
+inventory
+vars.yml
diff --git a/scripts/ci/setup/build-environment.yml b/scripts/ci/setup/build-environment.yml
new file mode 100644 (file)
index 0000000..f344d1a
--- /dev/null
@@ -0,0 +1,274 @@
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# Author:
+#  Cleber Rosa <crosa@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+#
+# This is an ansible playbook file.  Run it to set up systems with the
+# environment needed to build QEMU.
+---
+- name: Installation of basic packages to build QEMU
+  hosts: all
+  tasks:
+    - name: Check for suitable ansible version
+      delegate_to: localhost
+      assert:
+        that:
+          - '((ansible_version.major == 2) and (ansible_version.minor >= 8)) or (ansible_version.major >= 3)'
+        msg: "Unsuitable ansible version, please use version 2.8.0 or later"
+
+    - name: Add armhf foreign architecture to aarch64 hosts
+      command: dpkg --add-architecture armhf
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+        - ansible_facts['architecture'] == 'aarch64'
+
+    - name: Update apt cache / upgrade packages via apt
+      apt:
+        update_cache: yes
+        upgrade: yes
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+
+    # lcitool variables -f json ubuntu-2204 qemu | jq -r '.pkgs[]' | xargs -n 1 echo "-"
+    - name: Install basic packages to build QEMU on Ubuntu 22.04
+      package:
+        name:
+          - bash
+          - bc
+          - bison
+          - bsdextrautils
+          - bzip2
+          - ca-certificates
+          - ccache
+          - clang
+          - dbus
+          - debianutils
+          - diffutils
+          - exuberant-ctags
+          - findutils
+          - flex
+          - g++
+          - gcc
+          - gcovr
+          - genisoimage
+          - gettext
+          - git
+          - hostname
+          - libaio-dev
+          - libasan5
+          - libasound2-dev
+          - libattr1-dev
+          - libbpf-dev
+          - libbrlapi-dev
+          - libbz2-dev
+          - libc6-dev
+          - libcacard-dev
+          - libcap-ng-dev
+          - libcapstone-dev
+          - libcmocka-dev
+          - libcurl4-gnutls-dev
+          - libdaxctl-dev
+          - libdrm-dev
+          - libepoxy-dev
+          - libfdt-dev
+          - libffi-dev
+          - libgbm-dev
+          - libgcrypt20-dev
+          - libglib2.0-dev
+          - libglusterfs-dev
+          - libgnutls28-dev
+          - libgtk-3-dev
+          - libibumad-dev
+          - libibverbs-dev
+          - libiscsi-dev
+          - libjemalloc-dev
+          - libjpeg-turbo8-dev
+          - libjson-c-dev
+          - liblttng-ust-dev
+          - liblzo2-dev
+          - libncursesw5-dev
+          - libnfs-dev
+          - libnuma-dev
+          - libpam0g-dev
+          - libpcre2-dev
+          - libpixman-1-dev
+          - libpmem-dev
+          - libpng-dev
+          - libpulse-dev
+          - librbd-dev
+          - librdmacm-dev
+          - libsasl2-dev
+          - libsdl2-dev
+          - libsdl2-image-dev
+          - libseccomp-dev
+          - libslirp-dev
+          - libsnappy-dev
+          - libspice-protocol-dev
+          - libspice-server-dev
+          - libssh-dev
+          - libsystemd-dev
+          - libtasn1-6-dev
+          - libubsan1
+          - libudev-dev
+          - liburing-dev
+          - libusb-1.0-0-dev
+          - libusbredirhost-dev
+          - libvdeplug-dev
+          - libvirglrenderer-dev
+          - libvte-2.91-dev
+          - libxen-dev
+          - libxml2-dev
+          - libzstd-dev
+          - llvm
+          - locales
+          - make
+          - meson
+          - multipath-tools
+          - ncat
+          - nettle-dev
+          - ninja-build
+          - openssh-client
+          - pkgconf
+          - python3
+          - python3-numpy
+          - python3-opencv
+          - python3-pillow
+          - python3-pip
+          - python3-sphinx
+          - python3-sphinx-rtd-theme
+          - python3-venv
+          - python3-yaml
+          - rpm2cpio
+          - sed
+          - sparse
+          - systemtap-sdt-dev
+          - tar
+          - tesseract-ocr
+          - tesseract-ocr-eng
+          - texinfo
+          - xfslibs-dev
+          - zlib1g-dev
+        state: present
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+        - ansible_facts['distribution_version'] == '22.04'
+
+    - name: Install armhf cross-compile packages to build QEMU on AArch64 Ubuntu 22.04
+      package:
+        name:
+          - binutils-arm-linux-gnueabihf
+          - gcc-arm-linux-gnueabihf
+          - libblkid-dev:armhf
+          - libc6-dev:armhf
+          - libffi-dev:armhf
+          - libglib2.0-dev:armhf
+          - libmount-dev:armhf
+          - libpcre2-dev:armhf
+          - libpixman-1-dev:armhf
+          - zlib1g-dev:armhf
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+        - ansible_facts['distribution_version'] == '22.04'
+        - ansible_facts['architecture'] == 'aarch64'
+
+    - name: Enable EPEL repo on EL8
+      dnf:
+        name:
+          - epel-release
+        state: present
+      when:
+        - ansible_facts['distribution_file_variety'] in ['RedHat', 'CentOS']
+        - ansible_facts['distribution_major_version'] == '8'
+
+    - name: Enable PowerTools repo on CentOS 8
+      ini_file:
+        path: /etc/yum.repos.d/CentOS-Stream-PowerTools.repo
+        section: powertools
+        option: enabled
+        value: "1"
+      when:
+        - ansible_facts['distribution_file_variety'] == 'CentOS'
+        - ansible_facts['distribution_major_version'] == '8'
+
+    - name: Install basic packages to build QEMU on EL8
+      dnf:
+        # This list of packages start with tests/docker/dockerfiles/centos8.docker
+        # but only include files that are common to all distro variants and present
+        # in the standard repos (no add-ons)
+        name:
+          - bzip2
+          - bzip2-devel
+          - capstone-devel
+          - dbus-daemon
+          - device-mapper-multipath-devel
+          - diffutils
+          - gcc
+          - gcc-c++
+          - genisoimage
+          - gettext
+          - git
+          - glib2-devel
+          - glusterfs-api-devel
+          - gnutls-devel
+          - libaio-devel
+          - libcap-ng-devel
+          - libcurl-devel
+          - libepoxy-devel
+          - libfdt-devel
+          - libgcrypt-devel
+          - libiscsi-devel
+          - libpmem-devel
+          - librados-devel
+          - librbd-devel
+          - libseccomp-devel
+          - libssh-devel
+          - libxkbcommon-devel
+          - lzo-devel
+          - make
+          - mesa-libEGL-devel
+          - nettle-devel
+          - ninja-build
+          - nmap-ncat
+          - numactl-devel
+          - pixman-devel
+          - python38
+          - python3-sphinx
+          - rdma-core-devel
+          - redhat-rpm-config
+          - snappy-devel
+          - spice-glib-devel
+          - systemd-devel
+          - systemtap-sdt-devel
+          - tar
+          - zlib-devel
+        state: present
+      when:
+        - ansible_facts['distribution_file_variety'] in ['RedHat', 'CentOS']
+        - ansible_facts['distribution_version'] == '8'
+
+    - name: Install packages only available on x86 and aarch64
+      dnf:
+        # Spice server not available in ppc64le
+        name:
+          - spice-server
+          - spice-server-devel
+        state: present
+      when:
+        - ansible_facts['distribution_file_variety'] in ['RedHat', 'CentOS']
+        - ansible_facts['distribution_version'] == '8'
+        - ansible_facts['architecture'] == 'aarch64' or ansible_facts['architecture'] == 'x86_64'
+
+    - name: Check whether the Python runtime version is managed by alternatives
+      stat:
+        path: /etc/alternatives/python3
+      register: python3
+
+    - name: Set default Python runtime to 3.8 on EL8
+      command: alternatives --set python3 /usr/bin/python3.8
+      when:
+        - ansible_facts['distribution_file_variety'] in ['RedHat', 'CentOS']
+        - ansible_facts['distribution_version'] == '8'
+        - python3.stat.islnk and python3.stat.lnk_target != '/usr/bin/python3.8'
diff --git a/scripts/ci/setup/gitlab-runner.yml b/scripts/ci/setup/gitlab-runner.yml
new file mode 100644 (file)
index 0000000..7bdafab
--- /dev/null
@@ -0,0 +1,96 @@
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# Author:
+#  Cleber Rosa <crosa@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+#
+# This is an ansible playbook file.  Run it to set up systems with the
+# gitlab-runner agent.
+---
+- name: Installation of gitlab-runner
+  hosts: all
+  vars_files:
+    - vars.yml
+  tasks:
+    - debug:
+        msg: 'Checking for a valid GitLab registration token'
+      failed_when: "gitlab_runner_registration_token == 'PLEASE_PROVIDE_A_VALID_TOKEN'"
+
+    - name: Create a group for the gitlab-runner service
+      group:
+        name: gitlab-runner
+
+    - name: Create a user for the gitlab-runner service
+      user:
+        user: gitlab-runner
+        group: gitlab-runner
+        groups: kvm
+        comment: GitLab Runner
+        home: /home/gitlab-runner
+        shell: /bin/bash
+
+    - name: Remove the .bash_logout file when on Ubuntu systems
+      file:
+        path: /home/gitlab-runner/.bash_logout
+        state: absent
+      when: "ansible_facts['distribution'] == 'Ubuntu'"
+
+    - name: Set the Operating System for gitlab-runner
+      set_fact:
+        gitlab_runner_os: "{{ ansible_facts[\"system\"]|lower }}"
+    - debug:
+        msg: gitlab-runner OS is {{ gitlab_runner_os }}
+
+    - name: Set the architecture for gitlab-runner
+      set_fact:
+        gitlab_runner_arch: "{{ ansible_to_gitlab_arch[ansible_facts[\"architecture\"]] }}"
+    - debug:
+        msg: gitlab-runner arch is {{ gitlab_runner_arch }}
+
+    - name: Download the matching gitlab-runner (DEB)
+      get_url:
+        dest: "/root/"
+        url: "https://gitlab-runner-downloads.s3.amazonaws.com/latest/deb/gitlab-runner_{{ gitlab_runner_arch }}.deb"
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+
+    - name: Download the matching gitlab-runner (RPM)
+      get_url:
+        dest: "/root/"
+        url: "https://gitlab-runner-downloads.s3.amazonaws.com/latest/rpm/gitlab-runner_{{ gitlab_runner_arch }}.rpm"
+      when:
+        - ansible_facts['distribution'] == 'CentOS'
+
+    - name: Install gitlab-runner via package manager (DEB)
+      apt: deb="/root/gitlab-runner_{{ gitlab_runner_arch }}.deb"
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+
+    - name: Install gitlab-runner via package manager (RPM)
+      yum: name="/root/gitlab-runner_{{ gitlab_runner_arch }}.rpm"
+      when:
+        - ansible_facts['distribution'] == 'CentOS'
+
+    - name: Register the gitlab-runner
+      command: "/usr/bin/gitlab-runner register --non-interactive --url {{ gitlab_runner_server_url }} --registration-token {{ gitlab_runner_registration_token }} --executor shell --tag-list {{ ansible_facts[\"architecture\"] }},{{ ansible_facts[\"distribution\"]|lower }}_{{ ansible_facts[\"distribution_version\"] }} --description '{{ ansible_facts[\"distribution\"] }} {{ ansible_facts[\"distribution_version\"] }} {{ ansible_facts[\"architecture\"] }} ({{ ansible_facts[\"os_family\"] }})'"
+
+    # The secondary runner will still run under the single gitlab-runner service
+    - name: Register secondary gitlab-runner
+      command: "/usr/bin/gitlab-runner register --non-interactive --url {{ gitlab_runner_server_url }} --registration-token {{ gitlab_runner_registration_token }} --executor shell --tag-list aarch32,{{ ansible_facts[\"distribution\"]|lower }}_{{ ansible_facts[\"distribution_version\"] }} --description '{{ ansible_facts[\"distribution\"] }} {{ ansible_facts[\"distribution_version\"] }} {{ ansible_facts[\"architecture\"] }} ({{ ansible_facts[\"os_family\"] }})'"
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+        - ansible_facts['architecture'] == 'aarch64'
+        - ansible_facts['distribution_version'] == '22.04'
+
+    - name: Install the gitlab-runner service using its own functionality
+      command: "/usr/bin/gitlab-runner install --user gitlab-runner --working-directory /home/gitlab-runner"
+      register: gitlab_runner_install_service_result
+      failed_when: "gitlab_runner_install_service_result.rc != 0 and \"already exists\" not in gitlab_runner_install_service_result.stderr"
+
+    - name: Enable the gitlab-runner service
+      service:
+        name: gitlab-runner
+        state: started
+        enabled: yes
diff --git a/scripts/ci/setup/inventory.template b/scripts/ci/setup/inventory.template
new file mode 100644 (file)
index 0000000..2fbb50c
--- /dev/null
@@ -0,0 +1 @@
+localhost
diff --git a/scripts/ci/setup/vars.yml.template b/scripts/ci/setup/vars.yml.template
new file mode 100644 (file)
index 0000000..4b355fb
--- /dev/null
@@ -0,0 +1,10 @@
+# The URL of the gitlab server to use, usually https://gitlab.com unless you're
+# using a private GitLab instance
+gitlab_runner_server_url: https://gitlab.com
+# A mapping of the ansible to gitlab architecture nomenclature
+ansible_to_gitlab_arch:
+  x86_64: amd64
+  aarch64: arm64
+  s390x: s390x
+# A unique token made available by GitLab to your project for registering runners
+gitlab_runner_registration_token: PLEASE_PROVIDE_A_VALID_TOKEN
old mode 100755 (executable)
new mode 100644 (file)
index a668025..a7fd8dc
@@ -32,8 +32,8 @@ use warnings;
 use Getopt::Std;
 
 # Stuff we don't want to clean because we import it into our tree:
-my $exclude = qr,^(disas/libvixl/|include/standard-headers/
-    |linux-headers/|pc-bios/|tests/tcg/|tests/multiboot/),x;
+my $exclude = qr,^(include/standard-headers/|linux-headers/
+    |pc-bios/|tests/tcg/|tests/multiboot/),x;
 # Stuff that is expected to fail the preprocessing test:
 my $exclude_cpp = qr,^include/libdecnumber/decNumberLocal.h,;
 
old mode 100755 (executable)
new mode 100644 (file)
index aaa7d4c..58e1607
@@ -51,7 +51,7 @@ GIT=no
 DUPHEAD=no
 
 # Extended regular expression defining files to ignore when using --all
-XDIRREGEX='^(tests/tcg|tests/multiboot|pc-bios|disas/libvixl)'
+XDIRREGEX='^(tests/tcg|tests/multiboot|pc-bios)'
 
 while true
 do
@@ -111,7 +111,12 @@ cat >"$COCCIFILE" <<EOT
 )
 EOT
 
+files=
 for f in "$@"; do
+  if [ -L "$f" ]; then
+      echo "SKIPPING $f (symbolic link)"
+      continue
+  fi
   case "$f" in
     *.c.inc)
       # These aren't standalone C source files
@@ -144,6 +149,7 @@ for f in "$@"; do
       continue
       ;;
   esac
+  files="$files $f"
 
   if [ "$MODE" = "c" ]; then
     # First, use Coccinelle to add qemu/osdep.h before the first existing include
@@ -174,25 +180,30 @@ for f in "$@"; do
 
 done
 
-if [ "$DUPHEAD" = "yes" ]; then
-    egrep "^[[:space:]]*#[[:space:]]*include" "$@" | tr -d '[:blank:]' \
-        | sort | uniq -c | awk '{if ($1 > 1) print $0}'
-    if [ $? -eq 0 ]; then
+if [ "$DUPHEAD" = "yes" ] && [ -n "$files" ]; then
+    if egrep "^[[:space:]]*#[[:space:]]*include" $files | tr -d '[:blank:]' \
+        | sort | uniq -c | grep -v '^ *1 '; then
         echo "Found duplicate header file includes. Please check the above files manually."
         exit 1
     fi
 fi
 
 if [ "$GIT" = "yes" ]; then
-    git add -- "$@"
+    git add -- $files
     git commit --signoff -F - <<EOF
 $GITSUBJ: Clean up includes
 
-Clean up includes so that osdep.h is included first and headers
-which it implies are not included manually.
-
 This commit was created with scripts/clean-includes.
 
+All .c should include qemu/osdep.h first.  The script performs three
+related cleanups:
+
+* Ensure .c files include qemu/osdep.h first.
+* Including it in a .h is redundant, since the .c  already includes
+  it.  Drop such inclusions.
+* Likewise, including headers qemu/osdep.h includes is redundant.
+  Drop these, too.
+
 EOF
 
 fi
old mode 100755 (executable)
new mode 100644 (file)
index c6bbc05ba3eada172a1370372eeb4a5c309ee5d9..d247a5086e91d1e42d63152c670b8725697971c5 100644 (file)
  */
 
 /* From qemu/compiler.h */
-#define QEMU_GNUC_PREREQ(maj, min) 1
-#define QEMU_NORETURN __attribute__ ((__noreturn__))
-#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
-#define QEMU_SENTINEL __attribute__((sentinel))
+#define G_NORETURN __attribute__ ((__noreturn__))
+#define G_GNUC_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#define G_GNUC_NULL_TERMINATED __attribute__((sentinel))
 
 #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
 # define QEMU_PACKED __attribute__((gcc_struct, packed))
@@ -35,7 +34,7 @@
 #define QEMU_BUILD_BUG_ON(x) \
     typedef char cat2(qemu_build_bug_on__,__LINE__)[(x)?-1:1] __attribute__((unused));
 
-#define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
+#define G_GNUC_PRINTF(n, m) __attribute__((format(gnu_printf, n, m)))
 
 #define xglue(x, y) x ## y
 #define glue(x, y) xglue(x, y)
index c768d8140abdc450574fe060c926d0c279404987..29651ebde9097a30a5b78577f697379f8a65811c 100644 (file)
@@ -127,8 +127,8 @@ static void device_fn(DeviceState *dev, ...)
 - memory_region_init_rom(E1, NULL, E2, E3, E4);
 + memory_region_init_rom(E1, obj, E2, E3, E4);
 |
-- memory_region_init_ram_shared_nomigrate(E1, NULL, E2, E3, E4, E5);
-+ memory_region_init_ram_shared_nomigrate(E1, obj, E2, E3, E4, E5);
+- memory_region_init_ram_flags_nomigrate(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_ram_flags_nomigrate(E1, obj, E2, E3, E4, E5);
 )
   ...+>
 }
@@ -152,8 +152,8 @@ static void device_fn(DeviceState *dev, ...)
 - memory_region_init_rom(E1, NULL, E2, E3, E4);
 + memory_region_init_rom(E1, OBJECT(dev), E2, E3, E4);
 |
-- memory_region_init_ram_shared_nomigrate(E1, NULL, E2, E3, E4, E5);
-+ memory_region_init_ram_shared_nomigrate(E1, OBJECT(dev), E2, E3, E4, E5);
+- memory_region_init_ram_flags_nomigrate(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_ram_flags_nomigrate(E1, OBJECT(dev), E2, E3, E4, E5);
 )
   ...+>
 }
index 4cf50e75ea035963ba0bfae7ea547121cc7f3c17..6cb1b3c99a77a5cf5a1bd4fd97e6273a46da83a8 100644 (file)
@@ -11,9 +11,8 @@ identifier F;
 -    T VAR;
      ... when != VAR
 
--    VAR =
-+    return
-     E;
+-    VAR = (E);
 -    return VAR;
++    return E;
      ... when != VAR
  }
diff --git a/scripts/coccinelle/timer-del-timer-free.cocci b/scripts/coccinelle/timer-del-timer-free.cocci
new file mode 100644 (file)
index 0000000..c3cfd42
--- /dev/null
@@ -0,0 +1,18 @@
+// Remove superfluous timer_del() calls
+//
+// Copyright Linaro Limited 2020
+// This work is licensed under the terms of the GNU GPLv2 or later.
+//
+// spatch --macro-file scripts/cocci-macro-file.h \
+//        --sp-file scripts/coccinelle/timer-del-timer-free.cocci \
+//        --in-place --dir .
+//
+// The timer_free() function now implicitly calls timer_del()
+// for you, so calls to timer_del() immediately before the
+// timer_free() of the same timer can be deleted.
+
+@@
+expression T;
+@@
+-timer_del(T);
+ timer_free(T);
diff --git a/scripts/coccinelle/use-g_new-etc.cocci b/scripts/coccinelle/use-g_new-etc.cocci
new file mode 100644 (file)
index 0000000..e2280e9
--- /dev/null
@@ -0,0 +1,75 @@
+// Use g_new() & friends where that makes obvious sense
+@@
+type T;
+@@
+-g_malloc(sizeof(T))
++g_new(T, 1)
+@@
+type T;
+@@
+-g_try_malloc(sizeof(T))
++g_try_new(T, 1)
+@@
+type T;
+@@
+-g_malloc0(sizeof(T))
++g_new0(T, 1)
+@@
+type T;
+@@
+-g_try_malloc0(sizeof(T))
++g_try_new0(T, 1)
+@@
+type T;
+expression n;
+@@
+-g_malloc(sizeof(T) * (n))
++g_new(T, n)
+@@
+type T;
+expression n;
+@@
+-g_try_malloc(sizeof(T) * (n))
++g_try_new(T, n)
+@@
+type T;
+expression n;
+@@
+-g_malloc0(sizeof(T) * (n))
++g_new0(T, n)
+@@
+type T;
+expression n;
+@@
+-g_try_malloc0(sizeof(T) * (n))
++g_try_new0(T, n)
+@@
+type T;
+expression p, n;
+@@
+-g_realloc(p, sizeof(T) * (n))
++g_renew(T, p, n)
+@@
+type T;
+expression p, n;
+@@
+-g_try_realloc(p, sizeof(T) * (n))
++g_try_renew(T, p, n)
+@@
+type T;
+expression n;
+@@
+-(T *)g_new(T, n)
++g_new(T, n)
+@@
+type T;
+expression n;
+@@
+-(T *)g_new0(T, n)
++g_new0(T, n)
+@@
+type T;
+expression p, n;
+@@
+-(T *)g_renew(T, p, n)
++g_renew(T, p, n)
index 2d2f2055a3da7d72a884d3e67db7ba43207c70cd..2b0c8224a1809036b3d34622a9f98020c15d51ee 100644 (file)
@@ -142,7 +142,7 @@ class FullStructTypedefMatch(TypedefMatch):
         return name
 
     def strip_typedef(self) -> Patch:
-        """generate patch that will strip typedef from the struct declartion
+        """generate patch that will strip typedef from the struct declaration
 
         The caller is responsible for readding the typedef somewhere else.
         """
old mode 100755 (executable)
new mode 100644 (file)
diff --git a/scripts/coverage/compare_gcov_json.py b/scripts/coverage/compare_gcov_json.py
new file mode 100644 (file)
index 0000000..1b92dc2
--- /dev/null
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+#
+# Compare output of two gcovr JSON reports and report differences. To
+# generate the required output first:
+#   - create two build dirs with --enable-gcov
+#   - run set of tests in each
+#   - run make coverage-html in each
+#   - run gcovr --json --exclude-unreachable-branches \
+#           --print-summary -o coverage.json --root ../../ . *.p
+#
+# Author: Alex Bennée <alex.bennee@linaro.org>
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+def create_parser():
+    parser = argparse.ArgumentParser(
+        prog='compare_gcov_json',
+        description='analyse the differences in coverage between two runs')
+
+    parser.add_argument('-a', type=Path, default=None,
+                        help=('First file to check'))
+
+    parser.add_argument('-b', type=Path, default=None,
+                        help=('Second file to check'))
+
+    parser.add_argument('--verbose', action='store_true', default=False,
+                        help=('A minimal verbosity level that prints the '
+                              'overall result of the check/wait'))
+    return parser
+
+
+# See https://gcovr.com/en/stable/output/json.html#json-format-reference
+def load_json(json_file_path: Path, verbose = False) -> dict[str, set[int]]:
+
+    with open(json_file_path) as f:
+        data = json.load(f)
+
+    root_dir = json_file_path.absolute().parent
+    covered_lines = dict()
+
+    for filecov in data["files"]:
+        file_path = Path(filecov["file"])
+
+        # account for generated files - map into src tree
+        resolved_path = Path(file_path).absolute()
+        if resolved_path.is_relative_to(root_dir):
+            file_path = resolved_path.relative_to(root_dir)
+            # print(f"remapped {resolved_path} to {file_path}")
+
+        lines = filecov["lines"]
+
+        executed_lines = set(
+            linecov["line_number"]
+            for linecov in filecov["lines"]
+            if linecov["count"] != 0 and not linecov["gcovr/noncode"]
+        )
+
+        # if this file has any coverage add it to the system
+        if len(executed_lines) > 0:
+            if verbose:
+                print(f"file {file_path} {len(executed_lines)}/{len(lines)}")
+            covered_lines[str(file_path)] = executed_lines
+
+    return covered_lines
+
+def find_missing_files(first, second):
+    """
+    Return a list of files not covered in the second set
+    """
+    missing_files = []
+    for f in sorted(first):
+        file_a = first[f]
+        try:
+            file_b = second[f]
+        except KeyError:
+            missing_files.append(f)
+
+    return missing_files
+
+def main():
+    """
+    Script entry point
+    """
+    parser = create_parser()
+    args = parser.parse_args()
+
+    if not args.a or not args.b:
+        print("We need two files to compare")
+        sys.exit(1)
+
+    first_coverage = load_json(args.a, args.verbose)
+    second_coverage = load_json(args.b, args.verbose)
+
+    first_missing = find_missing_files(first_coverage,
+                                       second_coverage)
+
+    second_missing = find_missing_files(second_coverage,
+                                        first_coverage)
+
+    a_name = args.a.parent.name
+    b_name = args.b.parent.name
+
+    print(f"{b_name} missing coverage in {len(first_missing)} files")
+    for f in first_missing:
+        print(f"  {f}")
+
+    print(f"{a_name} missing coverage in {len(second_missing)} files")
+    for f in second_missing:
+        print(f"  {f}")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/coverity-model.c b/scripts/coverity-model.c
deleted file mode 100644 (file)
index 2c0346f..0000000
+++ /dev/null
@@ -1,386 +0,0 @@
-/* Coverity Scan model
- *
- * Copyright (C) 2014 Red Hat, Inc.
- *
- * Authors:
- *  Markus Armbruster <armbru@redhat.com>
- *  Paolo Bonzini <pbonzini@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or, at your
- * option, any later version.  See the COPYING file in the top-level directory.
- */
-
-
-/*
- * This is the source code for our Coverity user model file.  The
- * purpose of user models is to increase scanning accuracy by explaining
- * code Coverity can't see (out of tree libraries) or doesn't
- * sufficiently understand.  Better accuracy means both fewer false
- * positives and more true defects.  Memory leaks in particular.
- *
- * - A model file can't import any header files.  Some built-in primitives are
- *   available but not wchar_t, NULL etc.
- * - Modeling doesn't need full structs and typedefs. Rudimentary structs
- *   and similar types are sufficient.
- * - An uninitialized local variable signifies that the variable could be
- *   any value.
- *
- * The model file must be uploaded by an admin in the analysis settings of
- * http://scan.coverity.com/projects/378
- */
-
-#define NULL ((void *)0)
-
-typedef unsigned char uint8_t;
-typedef char int8_t;
-typedef unsigned int uint32_t;
-typedef int int32_t;
-typedef long ssize_t;
-typedef unsigned long long uint64_t;
-typedef long long int64_t;
-typedef _Bool bool;
-
-typedef struct va_list_str *va_list;
-
-/* exec.c */
-
-typedef struct AddressSpace AddressSpace;
-typedef uint64_t hwaddr;
-typedef uint32_t MemTxResult;
-typedef uint64_t MemTxAttrs;
-
-static void __bufwrite(uint8_t *buf, ssize_t len)
-{
-    int first, last;
-    __coverity_negative_sink__(len);
-    if (len == 0) return;
-    buf[0] = first;
-    buf[len-1] = last;
-    __coverity_writeall__(buf);
-}
-
-static void __bufread(uint8_t *buf, ssize_t len)
-{
-    __coverity_negative_sink__(len);
-    if (len == 0) return;
-    int first = buf[0];
-    int last = buf[len-1];
-}
-
-MemTxResult address_space_read(AddressSpace *as, hwaddr addr,
-                               MemTxAttrs attrs,
-                               uint8_t *buf, int len)
-{
-    MemTxResult result;
-    // TODO: investigate impact of treating reads as producing
-    // tainted data, with __coverity_tainted_data_argument__(buf).
-    __bufwrite(buf, len);
-    return result;
-}
-
-MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
-                                MemTxAttrs attrs,
-                                const uint8_t *buf, int len)
-{
-    MemTxResult result;
-    __bufread(buf, len);
-    return result;
-}
-
-
-/* Tainting */
-
-typedef struct {} name2keysym_t;
-static int get_keysym(const name2keysym_t *table,
-                      const char *name)
-{
-    int result;
-    if (result > 0) {
-        __coverity_tainted_string_sanitize_content__(name);
-        return result;
-    } else {
-        return 0;
-    }
-}
-
-/* Replay data is considered trusted.  */
-uint8_t replay_get_byte(void)
-{
-    uint8_t byte;
-    return byte;
-}
-
-
-/*
- * GLib memory allocation functions.
- *
- * Note that we ignore the fact that g_malloc of 0 bytes returns NULL,
- * and g_realloc of 0 bytes frees the pointer.
- *
- * Modeling this would result in Coverity flagging a lot of memory
- * allocations as potentially returning NULL, and asking us to check
- * whether the result of the allocation is NULL or not.  However, the
- * resulting pointer should never be dereferenced anyway, and in fact
- * it is not in the vast majority of cases.
- *
- * If a dereference did happen, this would suppress a defect report
- * for an actual null pointer dereference.  But it's too unlikely to
- * be worth wading through the false positives, and with some luck
- * we'll get a buffer overflow reported anyway.
- */
-
-/*
- * Allocation primitives, cannot return NULL
- * See also Coverity's library/generic/libc/all/all.c
- */
-
-void *g_malloc_n(size_t nmemb, size_t size)
-{
-    size_t sz;
-    void *ptr;
-
-    __coverity_negative_sink__(nmemb);
-    __coverity_negative_sink__(size);
-    sz = nmemb * size;
-    ptr = __coverity_alloc__(sz);
-    __coverity_mark_as_uninitialized_buffer__(ptr);
-    __coverity_mark_as_afm_allocated__(ptr, "g_free");
-    return ptr;
-}
-
-void *g_malloc0_n(size_t nmemb, size_t size)
-{
-    size_t sz;
-    void *ptr;
-
-    __coverity_negative_sink__(nmemb);
-    __coverity_negative_sink__(size);
-    sz = nmemb * size;
-    ptr = __coverity_alloc__(sz);
-    __coverity_writeall0__(ptr);
-    __coverity_mark_as_afm_allocated__(ptr, "g_free");
-    return ptr;
-}
-
-void *g_realloc_n(void *ptr, size_t nmemb, size_t size)
-{
-    size_t sz;
-
-    __coverity_negative_sink__(nmemb);
-    __coverity_negative_sink__(size);
-    sz = nmemb * size;
-    __coverity_escape__(ptr);
-    ptr = __coverity_alloc__(sz);
-    /*
-     * Memory beyond the old size isn't actually initialized.  Can't
-     * model that.  See Coverity's realloc() model
-     */
-    __coverity_writeall__(ptr);
-    __coverity_mark_as_afm_allocated__(ptr, "g_free");
-    return ptr;
-}
-
-void g_free(void *ptr)
-{
-    __coverity_free__(ptr);
-    __coverity_mark_as_afm_freed__(ptr, "g_free");
-}
-
-/*
- * Derive the g_try_FOO_n() from the g_FOO_n() by adding indeterminate
- * out of memory conditions
- */
-
-void *g_try_malloc_n(size_t nmemb, size_t size)
-{
-    int nomem;
-
-    if (nomem) {
-        return NULL;
-    }
-    return g_malloc_n(nmemb, size);
-}
-
-void *g_try_malloc0_n(size_t nmemb, size_t size)
-{
-    int nomem;
-
-    if (nomem) {
-        return NULL;
-    }
-    return g_malloc0_n(nmemb, size);
-}
-
-void *g_try_realloc_n(void *ptr, size_t nmemb, size_t size)
-{
-    int nomem;
-
-    if (nomem) {
-        return NULL;
-    }
-    return g_realloc_n(ptr, nmemb, size);
-}
-
-/* Trivially derive the g_FOO() from the g_FOO_n() */
-
-void *g_malloc(size_t size)
-{
-    return g_malloc_n(1, size);
-}
-
-void *g_malloc0(size_t size)
-{
-    return g_malloc0_n(1, size);
-}
-
-void *g_realloc(void *ptr, size_t size)
-{
-    return g_realloc_n(ptr, 1, size);
-}
-
-void *g_try_malloc(size_t size)
-{
-    return g_try_malloc_n(1, size);
-}
-
-void *g_try_malloc0(size_t size)
-{
-    return g_try_malloc0_n(1, size);
-}
-
-void *g_try_realloc(void *ptr, size_t size)
-{
-    return g_try_realloc_n(ptr, 1, size);
-}
-
-/* Other memory allocation functions */
-
-void *g_memdup(const void *ptr, unsigned size)
-{
-    unsigned char *dup;
-    unsigned i;
-
-    if (!ptr) {
-        return NULL;
-    }
-
-    dup = g_malloc(size);
-    for (i = 0; i < size; i++)
-        dup[i] = ((unsigned char *)ptr)[i];
-    return dup;
-}
-
-/*
- * GLib string allocation functions
- */
-
-char *g_strdup(const char *s)
-{
-    char *dup;
-    size_t i;
-
-    if (!s) {
-        return NULL;
-    }
-
-    __coverity_string_null_sink__(s);
-    __coverity_string_size_sink__(s);
-    dup = __coverity_alloc_nosize__();
-    __coverity_mark_as_afm_allocated__(dup, "g_free");
-    for (i = 0; (dup[i] = s[i]); i++) ;
-    return dup;
-}
-
-char *g_strndup(const char *s, size_t n)
-{
-    char *dup;
-    size_t i;
-
-    __coverity_negative_sink__(n);
-
-    if (!s) {
-        return NULL;
-    }
-
-    dup = g_malloc(n + 1);
-    for (i = 0; i < n && (dup[i] = s[i]); i++) ;
-    dup[i] = 0;
-    return dup;
-}
-
-char *g_strdup_printf(const char *format, ...)
-{
-    char ch, *s;
-    size_t len;
-
-    __coverity_string_null_sink__(format);
-    __coverity_string_size_sink__(format);
-
-    ch = *format;
-
-    s = __coverity_alloc_nosize__();
-    __coverity_writeall__(s);
-    __coverity_mark_as_afm_allocated__(s, "g_free");
-    return s;
-}
-
-char *g_strdup_vprintf(const char *format, va_list ap)
-{
-    char ch, *s;
-    size_t len;
-
-    __coverity_string_null_sink__(format);
-    __coverity_string_size_sink__(format);
-
-    ch = *format;
-    ch = *(char *)ap;
-
-    s = __coverity_alloc_nosize__();
-    __coverity_writeall__(s);
-    __coverity_mark_as_afm_allocated__(s, "g_free");
-
-    return len;
-}
-
-char *g_strconcat(const char *s, ...)
-{
-    char *s;
-
-    /*
-     * Can't model: last argument must be null, the others
-     * null-terminated strings
-     */
-
-    s = __coverity_alloc_nosize__();
-    __coverity_writeall__(s);
-    __coverity_mark_as_afm_allocated__(s, "g_free");
-    return s;
-}
-
-/* Other glib functions */
-
-typedef struct pollfd GPollFD;
-
-int poll();
-
-int g_poll (GPollFD *fds, unsigned nfds, int timeout)
-{
-    return poll(fds, nfds, timeout);
-}
-
-typedef struct _GIOChannel GIOChannel;
-GIOChannel *g_io_channel_unix_new(int fd)
-{
-    GIOChannel *c = g_malloc0(sizeof(GIOChannel));
-    __coverity_escape__(fd);
-    return c;
-}
-
-void g_assertion_message_expr(const char     *domain,
-                              const char     *file,
-                              int             line,
-                              const char     *func,
-                              const char     *expr)
-{
-    __coverity_panic__();
-}
diff --git a/scripts/coverity-scan/COMPONENTS.md b/scripts/coverity-scan/COMPONENTS.md
new file mode 100644 (file)
index 0000000..0e62f10
--- /dev/null
@@ -0,0 +1,160 @@
+This is the list of currently configured Coverity components:
+
+alpha
+  ~ (/qemu)?((/include)?/hw/alpha/.*|/target/alpha/.*)
+
+arm
+  ~ (/qemu)?((/include)?/hw/arm/.*|(/include)?/hw/.*/(arm|allwinner-a10|bcm28|digic|exynos|imx|omap|stellaris|pxa2xx|versatile|zynq|cadence).*|/hw/net/xgmac.c|/hw/ssi/xilinx_spips.c|/target/arm/.*)
+
+avr
+  ~ (/qemu)?((/include)?/hw/avr/.*|/target/avr/.*)
+
+cris
+  ~ (/qemu)?((/include)?/hw/cris/.*|/target/cris/.*)
+
+hexagon-gen (component should be ignored in analysis)
+  ~ (/qemu)?(/target/hexagon/.*generated.*)
+
+hexagon
+  ~ (/qemu)?(/target/hexagon/.*)
+
+hppa
+  ~ (/qemu)?((/include)?/hw/hppa/.*|/target/hppa/.*)
+
+i386
+  ~ (/qemu)?((/include)?/hw/i386/.*|/target/i386/.*|/hw/intc/[^/]*apic[^/]*\.c)
+
+loongarch
+  ~ (/qemu)?((/include)?/hw/(loongarch/.*|.*/loongarch.*)|/target/loongarch/.*)
+
+m68k
+  ~ (/qemu)?((/include)?/hw/m68k/.*|/target/m68k/.*|(/include)?/hw(/.*)?/mcf.*|(/include)?/hw/nubus/.*)
+
+microblaze
+  ~ (/qemu)?((/include)?/hw/microblaze/.*|/target/microblaze/.*)
+
+mips
+  ~ (/qemu)?((/include)?/hw/mips/.*|/target/mips/.*)
+
+nios2
+  ~ (/qemu)?((/include)?/hw/nios2/.*|/target/nios2/.*)
+
+openrisc
+  ~ (/qemu)?((/include)?/hw/openrisc/.*|/target/openrisc/.*)
+
+ppc
+  ~ (/qemu)?((/include)?/hw/ppc/.*|/target/ppc/.*|/hw/pci-host/(uninorth.*|dec.*|prep.*|ppc.*)|/hw/misc/macio/.*|(/include)?/hw/.*/(xics|openpic|spapr).*)
+
+riscv
+  ~ (/qemu)?((/include)?/hw/riscv/.*|/target/riscv/.*|/hw/.*/(riscv_|ibex_|sifive_).*)
+
+rx
+  ~ (/qemu)?((/include)?/hw/rx/.*|/target/rx/.*)
+
+s390
+  ~ (/qemu)?((/include)?/hw/s390x/.*|/target/s390x/.*|/hw/.*/s390_.*)
+
+sh4
+  ~ (/qemu)?((/include)?/hw/sh4/.*|/target/sh4/.*)
+
+sparc
+  ~ (/qemu)?((/include)?/hw/sparc(64)?.*|/target/sparc/.*|/hw/.*/grlib.*|/hw/display/cg3.c)
+
+tricore
+  ~ (/qemu)?((/include)?/hw/tricore/.*|/target/tricore/.*)
+
+xtensa
+  ~ (/qemu)?((/include)?/hw/xtensa/.*|/target/xtensa/.*)
+
+9pfs
+  ~ (/qemu)?(/hw/9pfs/.*|/fsdev/.*)
+
+audio
+  ~ (/qemu)?((/include)?/(audio|hw/audio)/.*)
+
+block
+  ~ (/qemu)?(/block.*|(/include?)/(block|storage-daemon)/.*|(/include)?/hw/(block|ide|nvme)/.*|/qemu-(img|io).*|/util/(aio|async|thread-pool).*)
+
+char
+  ~ (/qemu)?(/qemu-char\.c|/include/sysemu/char\.h|(/include)?/hw/char/.*)
+
+crypto
+  ~ (/qemu)?((/include)?/crypto/.*|/hw/.*/.*crypto.*|(/include/sysemu|/backends)/cryptodev.*)
+
+disas
+  ~ (/qemu)?((/include)?/disas.*)
+
+fpu
+  ~ (/qemu)?((/include)?(/fpu|/libdecnumber)/.*)
+
+io
+  ~ (/qemu)?((/include)?/io/.*)
+
+ipmi
+  ~ (/qemu)?((/include)?/hw/ipmi/.*)
+
+migration
+  ~ (/qemu)?((/include)?/migration/.*)
+
+monitor
+  ~ (/qemu)?(/qapi.*|/qobject/.*|/monitor\..*|/[hq]mp\..*)
+
+nbd
+  ~ (/qemu)?(/nbd/.*|/include/block/nbd.*|/qemu-nbd\.c)
+
+net
+  ~ (/qemu)?((/include)?(/hw)?/(net|rdma)/.*)
+
+pci
+  ~ (/qemu)?(/include)?/hw/(cxl/|pci).*
+
+qemu-ga
+  ~ (/qemu)?(/qga/.*)
+
+scsi
+  ~ (/qemu)?(/scsi/.*|/hw/scsi/.*|/include/hw/scsi/.*)
+
+trace
+  ~ (/qemu)?(/.*trace.*\.[ch])
+
+ui
+  ~ (/qemu)?((/include)?(/ui|/hw/display|/hw/input)/.*)
+
+usb
+  ~ (/qemu)?(/hw/usb/.*|/include/hw/usb/.*)
+
+user
+  ~ (/qemu)?(/linux-user/.*|/bsd-user/.*|/user-exec\.c|/thunk\.c|/include/exec/user/.*)
+
+util
+  ~ (/qemu)?(/util/.*|/include/qemu/.*)
+
+vfio
+  ~ (/qemu)?(/include)?/hw/vfio/.*
+
+virtio
+  ~ (/qemu)?(/include)?/hw/virtio/.*
+
+xen
+  ~ (/qemu)?(.*/xen.*)
+
+hvf
+  ~ (/qemu)?(.*/hvf.*)
+
+kvm
+  ~ (/qemu)?(.*/kvm.*)
+
+tcg
+  ~ (/qemu)?(/accel/tcg|/replay|/tcg)/.*
+
+sysemu
+  ~ (/qemu)?(/system/.*|/accel/.*)
+
+(headers)
+  ~ (/qemu)?(/include/.*)
+
+testlibs
+  ~ (/qemu)?(/tests/qtest(/libqos/.*|/libqtest.*))
+
+tests
+  ~ (/qemu)?(/tests/.*)
index 501ac6723323e5bafcd09189fb141040bce0cefa..a349578526dada209825f1df8bc0a6b5ab38f3ad 100644 (file)
 # The work of actually doing the build is handled by the
 # run-coverity-scan script.
 
-FROM fedora:30
-ENV PACKAGES \
-    alsa-lib-devel \
-    bc \
-    brlapi-devel \
-    bzip2 \
-    bzip2-devel \
-    ccache \
-    clang \
-    curl \
-    cyrus-sasl-devel \
-    dbus-daemon \
-    device-mapper-multipath-devel \
-    findutils \
-    gcc \
-    gcc-c++ \
-    gettext \
-    git \
-    glib2-devel \
-    glusterfs-api-devel \
-    gnutls-devel \
-    gtk3-devel \
-    hostname \
-    libaio-devel \
-    libasan \
-    libattr-devel \
-    libblockdev-mpath-devel \
-    libcap-devel \
-    libcap-ng-devel \
-    libcurl-devel \
-    libepoxy-devel \
-    libfdt-devel \
-    libgbm-devel \
-    libiscsi-devel \
-    libjpeg-devel \
-    libpmem-devel \
-    libnfs-devel \
-    libpng-devel \
-    librbd-devel \
-    libseccomp-devel \
-    libssh-devel \
-    libubsan \
-    libudev-devel \
-    libusbx-devel \
-    libxml2-devel \
-    libzstd-devel \
-    llvm \
-    lzo-devel \
-    make \
-    mingw32-bzip2 \
-    mingw32-curl \
-    mingw32-glib2 \
-    mingw32-gmp \
-    mingw32-gnutls \
-    mingw32-gtk3 \
-    mingw32-libjpeg-turbo \
-    mingw32-libpng \
-    mingw32-libtasn1 \
-    mingw32-nettle \
-    mingw32-nsis \
-    mingw32-pixman \
-    mingw32-pkg-config \
-    mingw32-SDL2 \
-    mingw64-bzip2 \
-    mingw64-curl \
-    mingw64-glib2 \
-    mingw64-gmp \
-    mingw64-gnutls \
-    mingw64-gtk3 \
-    mingw64-libjpeg-turbo \
-    mingw64-libpng \
-    mingw64-libtasn1 \
-    mingw64-nettle \
-    mingw64-pixman \
-    mingw64-pkg-config \
-    mingw64-SDL2 \
-    ncurses-devel \
-    nettle-devel \
-    nss-devel \
-    numactl-devel \
-    perl \
-    perl-Test-Harness \
-    pixman-devel \
-    pulseaudio-libs-devel \
-    python3 \
-    python3-sphinx \
-    PyYAML \
-    rdma-core-devel \
-    SDL2-devel \
-    snappy-devel \
-    sparse \
-    spice-server-devel \
-    systemd-devel \
-    systemtap-sdt-devel \
-    tar \
-    usbredir-devel \
-    virglrenderer-devel \
-    vte291-devel \
-    wget \
-    which \
-    xen-devel \
-    xfsprogs-devel \
-    zlib-devel
-ENV QEMU_CONFIGURE_OPTS --python=/usr/bin/python3
+FROM registry.fedoraproject.org/fedora:37
 
-RUN dnf install -y $PACKAGES
-RUN rpm -q $PACKAGES | sort > /packages.txt
-ENV PATH $PATH:/usr/libexec/python3-sphinx/
+RUN dnf install -y nosync && \
+    echo -e '#!/bin/sh\n\
+if test -d /usr/lib64\n\
+then\n\
+    export LD_PRELOAD=/usr/lib64/nosync/nosync.so\n\
+else\n\
+    export LD_PRELOAD=/usr/lib/nosync/nosync.so\n\
+fi\n\
+exec "$@"' > /usr/bin/nosync && \
+    chmod +x /usr/bin/nosync && \
+    nosync dnf update -y && \
+    nosync dnf install -y \
+               SDL2-devel \
+               SDL2_image-devel \
+               alsa-lib-devel \
+               bash \
+               bc \
+               bison \
+               brlapi-devel \
+               bzip2 \
+               bzip2-devel \
+               ca-certificates \
+               capstone-devel \
+               ccache \
+               clang \
+               ctags \
+               cyrus-sasl-devel \
+               daxctl-devel \
+               dbus-daemon \
+               device-mapper-multipath-devel \
+               diffutils \
+               findutils \
+               flex \
+               fuse3-devel \
+               gcc \
+               gcc-c++ \
+               gcovr \
+               genisoimage \
+               gettext \
+               git \
+               glib2-devel \
+               glib2-static \
+               glibc-langpack-en \
+               glibc-static \
+               glusterfs-api-devel \
+               gnutls-devel \
+               gtk3-devel \
+               hostname \
+               jemalloc-devel \
+               json-c-devel \
+               libaio-devel \
+               libasan \
+               libattr-devel \
+               libbpf-devel \
+               libcacard-devel \
+               libcap-ng-devel \
+               libcmocka-devel \
+               libcurl-devel \
+               libdrm-devel \
+               libepoxy-devel \
+               libfdt-devel \
+               libffi-devel \
+               libgcrypt-devel \
+               libiscsi-devel \
+               libjpeg-devel \
+               libnfs-devel \
+               libpmem-devel \
+               libpng-devel \
+               librbd-devel \
+               libseccomp-devel \
+               libselinux-devel \
+               libslirp-devel \
+               libssh-devel \
+               libtasn1-devel \
+               libubsan \
+               liburing-devel \
+               libusbx-devel \
+               libzstd-devel \
+               llvm \
+               lttng-ust-devel \
+               lzo-devel \
+               make \
+               mesa-libgbm-devel \
+               meson \
+               ncurses-devel \
+               nettle-devel \
+               ninja-build \
+               nmap-ncat \
+               numactl-devel \
+               openssh-clients \
+               pam-devel \
+               pcre-static \
+               pixman-devel \
+               pkgconfig \
+               pulseaudio-libs-devel \
+               python3 \
+               python3-PyYAML \
+               python3-numpy \
+               python3-opencv \
+               python3-pillow \
+               python3-pip \
+               python3-sphinx \
+               python3-sphinx_rtd_theme \
+               rdma-core-devel \
+               rpm \
+               sed \
+               snappy-devel \
+               socat \
+               sparse \
+               spice-protocol \
+               spice-server-devel \
+               systemd-devel \
+               systemtap-sdt-devel \
+               tar \
+               tesseract \
+               tesseract-langpack-eng \
+               usbredir-devel \
+               util-linux \
+               virglrenderer-devel \
+               vte291-devel \
+               which \
+               xen-devel \
+               xfsprogs-devel \
+               zlib-devel \
+               zlib-static \
+               zstd && \
+    nosync dnf autoremove -y && \
+    nosync dnf clean all -y && \
+    rpm -qa | sort > /packages.txt && \
+    mkdir -p /usr/libexec/ccache-wrappers && \
+    ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/c++ && \
+    ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/cc && \
+    ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/clang && \
+    ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/g++ && \
+    ln -s /usr/bin/ccache /usr/libexec/ccache-wrappers/gcc
+
+ENV CCACHE_WRAPPERSDIR "/usr/libexec/ccache-wrappers"
+ENV LANG "en_US.UTF-8"
+ENV MAKE "/usr/bin/make"
+ENV NINJA "/usr/bin/ninja"
+ENV PYTHON "/usr/bin/python3"
+ENV QEMU_CONFIGURE_OPTS --meson=internal
+
+RUN dnf install -y curl wget
 ENV COVERITY_TOOL_BASE=/coverity-tools
 COPY coverity_tool.tgz coverity_tool.tgz
 RUN mkdir -p /coverity-tools/coverity_tool && cd /coverity-tools/coverity_tool && tar xf /coverity_tool.tgz
diff --git a/scripts/coverity-scan/model.c b/scripts/coverity-scan/model.c
new file mode 100644 (file)
index 0000000..a064d84
--- /dev/null
@@ -0,0 +1,284 @@
+/* Coverity Scan model
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>
+ *  Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or, at your
+ * option, any later version.  See the COPYING file in the top-level directory.
+ */
+
+
+/*
+ * This is the source code for our Coverity user model file.  The
+ * purpose of user models is to increase scanning accuracy by explaining
+ * code Coverity can't see (out of tree libraries) or doesn't
+ * sufficiently understand.  Better accuracy means both fewer false
+ * positives and more true defects.  Memory leaks in particular.
+ *
+ * - A model file can't import any header files.  Some built-in primitives are
+ *   available but not wchar_t, NULL etc.
+ * - Modeling doesn't need full structs and typedefs. Rudimentary structs
+ *   and similar types are sufficient.
+ * - An uninitialized local variable signifies that the variable could be
+ *   any value.
+ *
+ * The model file must be uploaded by an admin in the analysis settings of
+ * http://scan.coverity.com/projects/378
+ */
+
+#define NULL ((void *)0)
+
+typedef unsigned char uint8_t;
+typedef char int8_t;
+typedef unsigned int uint32_t;
+typedef int int32_t;
+typedef long ssize_t;
+typedef unsigned long long uint64_t;
+typedef long long int64_t;
+typedef _Bool bool;
+
+typedef struct va_list_str *va_list;
+
+/* Tainting */
+
+typedef struct {} name2keysym_t;
+static int get_keysym(const name2keysym_t *table,
+                      const char *name)
+{
+    int result;
+    if (result > 0) {
+        __coverity_tainted_string_sanitize_content__(name);
+        return result;
+    } else {
+        return 0;
+    }
+}
+
+/* Replay data is considered trusted.  */
+uint8_t replay_get_byte(void)
+{
+    uint8_t byte;
+    return byte;
+}
+
+
+/*
+ * GLib memory allocation functions.
+ *
+ * Note that we ignore the fact that g_malloc of 0 bytes returns NULL,
+ * and g_realloc of 0 bytes frees the pointer.
+ *
+ * Modeling this would result in Coverity flagging a lot of memory
+ * allocations as potentially returning NULL, and asking us to check
+ * whether the result of the allocation is NULL or not.  However, the
+ * resulting pointer should never be dereferenced anyway, and in fact
+ * it is not in the vast majority of cases.
+ *
+ * If a dereference did happen, this would suppress a defect report
+ * for an actual null pointer dereference.  But it's too unlikely to
+ * be worth wading through the false positives, and with some luck
+ * we'll get a buffer overflow reported anyway.
+ */
+
+/*
+ * Allocation primitives, cannot return NULL
+ * See also Coverity's library/generic/libc/all/all.c
+ */
+
+void *g_malloc_n(size_t nmemb, size_t size)
+{
+    void *ptr;
+
+    __coverity_negative_sink__(nmemb);
+    __coverity_negative_sink__(size);
+    ptr = __coverity_alloc__(nmemb * size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    __coverity_mark_as_uninitialized_buffer__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void *g_malloc0_n(size_t nmemb, size_t size)
+{
+    void *ptr;
+
+    __coverity_negative_sink__(nmemb);
+    __coverity_negative_sink__(size);
+    ptr = __coverity_alloc__(nmemb * size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    __coverity_writeall0__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void *g_realloc_n(void *ptr, size_t nmemb, size_t size)
+{
+    __coverity_negative_sink__(nmemb);
+    __coverity_negative_sink__(size);
+    __coverity_escape__(ptr);
+    ptr = __coverity_alloc__(nmemb * size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    /*
+     * Memory beyond the old size isn't actually initialized.  Can't
+     * model that.  See Coverity's realloc() model
+     */
+    __coverity_writeall__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void g_free(void *ptr)
+{
+    __coverity_free__(ptr);
+    __coverity_mark_as_afm_freed__(ptr, AFM_free);
+}
+
+/*
+ * Derive the g_try_FOO_n() from the g_FOO_n() by adding indeterminate
+ * out of memory conditions
+ */
+
+void *g_try_malloc_n(size_t nmemb, size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_malloc_n(nmemb, size);
+}
+
+void *g_try_malloc0_n(size_t nmemb, size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_malloc0_n(nmemb, size);
+}
+
+void *g_try_realloc_n(void *ptr, size_t nmemb, size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_realloc_n(ptr, nmemb, size);
+}
+
+/* Derive the g_FOO() from the g_FOO_n() */
+
+void *g_malloc(size_t size)
+{
+    void *ptr;
+
+    __coverity_negative_sink__(size);
+    ptr = __coverity_alloc__(size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    __coverity_mark_as_uninitialized_buffer__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void *g_malloc0(size_t size)
+{
+    void *ptr;
+
+    __coverity_negative_sink__(size);
+    ptr = __coverity_alloc__(size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    __coverity_writeall0__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void *g_realloc(void *ptr, size_t size)
+{
+    __coverity_negative_sink__(size);
+    __coverity_escape__(ptr);
+    ptr = __coverity_alloc__(size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    /*
+     * Memory beyond the old size isn't actually initialized.  Can't
+     * model that.  See Coverity's realloc() model
+     */
+    __coverity_writeall__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void *g_try_malloc(size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_malloc(size);
+}
+
+void *g_try_malloc0(size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_malloc0(size);
+}
+
+void *g_try_realloc(void *ptr, size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_realloc(ptr, size);
+}
+
+/* Other glib functions */
+
+typedef struct pollfd GPollFD;
+
+int poll();
+
+int g_poll (GPollFD *fds, unsigned nfds, int timeout)
+{
+    return poll(fds, nfds, timeout);
+}
+
+typedef struct _GIOChannel GIOChannel;
+GIOChannel *g_io_channel_unix_new(int fd)
+{
+    /* cannot use incomplete type, the actual struct is roughly this size.  */
+    GIOChannel *c = g_malloc0(20 * sizeof(void *));
+    __coverity_escape__(fd);
+    return c;
+}
+
+void g_assertion_message_expr(const char     *domain,
+                              const char     *file,
+                              int             line,
+                              const char     *func,
+                              const char     *expr)
+{
+    __coverity_panic__();
+}
old mode 100755 (executable)
new mode 100644 (file)
index 6eefb4b..d56c9b6
@@ -116,14 +116,14 @@ update_coverity_tools () {
     cd "$COVERITY_TOOL_BASE"
 
     echo "Checking for new version of coverity build tools..."
-    wget https://scan.coverity.com/download/linux64 --post-data "token=$COVERITY_TOKEN&project=$PROJNAME&md5=1" -O coverity_tool.md5.new
+    wget https://scan.coverity.com/download/cxx/linux64 --post-data "token=$COVERITY_TOKEN&project=$PROJNAME&md5=1" -O coverity_tool.md5.new
 
     if ! cmp -s coverity_tool.md5 coverity_tool.md5.new; then
         # out of date md5 or no md5: download new build tool
         # blow away the old build tool
         echo "Downloading coverity build tools..."
         rm -rf coverity_tool coverity_tool.tgz
-        wget https://scan.coverity.com/download/linux64 --post-data "token=$COVERITY_TOKEN&project=$PROJNAME" -O coverity_tool.tgz
+        wget https://scan.coverity.com/download/cxx/linux64 --post-data "token=$COVERITY_TOKEN&project=$PROJNAME" -O coverity_tool.tgz
         if ! (cat coverity_tool.md5.new; echo "  coverity_tool.tgz") | md5sum -c --status; then
             echo "Downloaded tarball didn't match md5sum!"
             exit 1
@@ -380,27 +380,29 @@ export PATH="$TOOLBIN:$PATH"
 
 cd "$SRCDIR"
 
-echo "Doing make distclean..."
-make distclean
+echo "Nuking build directory..."
+rm -rf +build
+mkdir +build
+cd +build
 
 echo "Configuring..."
 # We configure with a fixed set of enables here to ensure that we don't
 # accidentally reduce the scope of the analysis by doing the build on
 # the system that's missing a dependency that we need to build part of
 # the codebase.
-./configure --disable-modules --enable-sdl --enable-gtk \
+../configure --disable-modules --enable-sdl --enable-gtk \
     --enable-opengl --enable-vte --enable-gnutls \
     --enable-nettle --enable-curses --enable-curl \
     --audio-drv-list=oss,alsa,sdl,pa --enable-virtfs \
-    --enable-vnc --enable-vnc-sasl --enable-vnc-jpeg --enable-vnc-png \
+    --enable-vnc --enable-vnc-sasl --enable-vnc-jpeg --enable-png \
     --enable-xen --enable-brlapi \
     --enable-linux-aio --enable-attr \
     --enable-cap-ng --enable-trace-backends=log --enable-spice --enable-rbd \
-    --enable-xfsctl --enable-libusb --enable-usb-redir \
+    --enable-libusb --enable-usb-redir \
     --enable-libiscsi --enable-libnfs --enable-seccomp \
     --enable-tpm --enable-libssh --enable-lzo --enable-snappy --enable-bzip2 \
     --enable-numa --enable-rdma --enable-smartcard --enable-virglrenderer \
-    --enable-mpath --enable-libxml2 --enable-glusterfs \
+    --enable-mpath --enable-glusterfs \
     --enable-virtfs --enable-zstd
 
 echo "Running cov-build..."
diff --git a/scripts/cpu-x86-uarch-abi.py b/scripts/cpu-x86-uarch-abi.py
new file mode 100644 (file)
index 0000000..052ddd7
--- /dev/null
@@ -0,0 +1,193 @@
+#!/usr/bin/python3
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# A script to generate a CSV file showing the x86_64 ABI
+# compatibility levels for each CPU model.
+#
+
+from qemu.qmp.legacy import QEMUMonitorProtocol
+import sys
+
+if len(sys.argv) != 2:
+    print("syntax: %s QMP-SOCK\n\n" % __file__ +
+          "Where QMP-SOCK points to a QEMU process such as\n\n" +
+          " # qemu-system-x86_64 -qmp unix:/tmp/qmp,server,nowait " +
+          "-display none -accel kvm", file=sys.stderr)
+    sys.exit(1)
+
+# Mandatory CPUID features for each microarch ABI level
+levels = [
+    [ # x86-64 baseline
+        "cmov",
+        "cx8",
+        "fpu",
+        "fxsr",
+        "mmx",
+        "syscall",
+        "sse",
+        "sse2",
+    ],
+    [ # x86-64-v2
+        "cx16",
+        "lahf-lm",
+        "popcnt",
+        "pni",
+        "sse4.1",
+        "sse4.2",
+        "ssse3",
+    ],
+    [ # x86-64-v3
+        "avx",
+        "avx2",
+        "bmi1",
+        "bmi2",
+        "f16c",
+        "fma",
+        "abm",
+        "movbe",
+    ],
+    [ # x86-64-v4
+        "avx512f",
+        "avx512bw",
+        "avx512cd",
+        "avx512dq",
+        "avx512vl",
+    ],
+]
+
+# Assumes externally launched process such as
+#
+#   qemu-system-x86_64 -qmp unix:/tmp/qmp,server,nowait -display none -accel kvm
+#
+# Note different results will be obtained with TCG, as
+# TCG masks out certain features otherwise present in
+# the CPU model definitions, as does KVM.
+
+
+sock = sys.argv[1]
+shell = QEMUMonitorProtocol(sock)
+shell.connect()
+
+models = shell.cmd("query-cpu-definitions")
+
+# These QMP props don't correspond to CPUID fatures
+# so ignore them
+skip = [
+    "family",
+    "min-level",
+    "min-xlevel",
+    "vendor",
+    "model",
+    "model-id",
+    "stepping",
+]
+
+names = []
+
+for model in models:
+    if "alias-of" in model:
+        continue
+    names.append(model["name"])
+
+models = {}
+
+for name in sorted(names):
+    cpu = shell.cmd("query-cpu-model-expansion",
+                    type="static",
+                    model={ "name": name })
+
+    got = {}
+    for (feature, present) in cpu["model"]["props"].items():
+        if present and feature not in skip:
+            got[feature] = True
+
+    if name in ["host", "max", "base"]:
+        continue
+
+    models[name] = {
+        # Dict of all present features in this CPU model
+        "features": got,
+
+        # Whether each x86-64 ABI level is satisfied
+        "levels": [False, False, False, False],
+
+        # Number of extra CPUID features compared to the x86-64 ABI level
+        "distance":[-1, -1, -1, -1],
+
+        # CPUID features present in model, but not in ABI level
+        "delta":[[], [], [], []],
+
+        # CPUID features in ABI level but not present in model
+        "missing": [[], [], [], []],
+    }
+
+
+# Calculate whether the CPU models satisfy each ABI level
+for name in models.keys():
+    for level in range(len(levels)):
+        got = set(models[name]["features"])
+        want = set(levels[level])
+        missing = want - got
+        match = True
+        if len(missing) > 0:
+            match = False
+        models[name]["levels"][level] = match
+        models[name]["missing"][level] = missing
+
+# Cache list of CPU models satisfying each ABI level
+abi_models = [
+    [],
+    [],
+    [],
+    [],
+]
+
+for name in models.keys():
+    for level in range(len(levels)):
+        if models[name]["levels"][level]:
+            abi_models[level].append(name)
+
+
+for level in range(len(abi_models)):
+    # Find the union of features in all CPU models satisfying this ABI
+    allfeatures = {}
+    for name in abi_models[level]:
+        for feat in models[name]["features"]:
+            allfeatures[feat] = True
+
+    # Find the intersection of features in all CPU models satisfying this ABI
+    commonfeatures = []
+    for feat in allfeatures:
+        present = True
+        for name in models.keys():
+            if not models[name]["levels"][level]:
+                continue
+            if feat not in models[name]["features"]:
+                present = False
+        if present:
+            commonfeatures.append(feat)
+
+    # Determine how many extra features are present compared to the lowest
+    # common denominator
+    for name in models.keys():
+        if not models[name]["levels"][level]:
+            continue
+
+        delta = set(models[name]["features"].keys()) - set(commonfeatures)
+        models[name]["distance"][level] = len(delta)
+        models[name]["delta"][level] = delta
+
+def print_uarch_abi_csv():
+    print("# Automatically generated from '%s'" % __file__)
+    print("Model,baseline,v2,v3,v4")
+    for name in models.keys():
+        print(name, end="")
+        for level in range(len(levels)):
+            if models[name]["levels"][level]:
+                print(",✅", end="")
+            else:
+                print(",", end="")
+        print()
+
+print_uarch_abi_csv()
index 47aa9caf6d15c55a78acf80d0dfcedea941871b9..e8b72da3a97a236cfe88ecac660eae82939345d6 100644 (file)
 # See the syntax and semantics in docs/devel/decodetree.rst.
 #
 
+import io
 import os
 import re
 import sys
 import getopt
 
 insnwidth = 32
+bitop_width = 32
 insnmask = 0xffffffff
 variablewidth = False
 fields = {}
@@ -33,12 +35,14 @@ arguments = {}
 formats = {}
 allpatterns = []
 anyextern = False
+testforerror = False
 
 translate_prefix = 'trans'
 translate_scope = 'static '
 input_file = ''
 output_file = None
 output_fd = None
+output_null = False
 insntype = 'uint32_t'
 decode_function = 'decode'
 
@@ -51,25 +55,103 @@ re_fld_ident = '%[a-zA-Z0-9_]*'
 re_fmt_ident = '@[a-zA-Z0-9_]*'
 re_pat_ident = '[a-zA-Z0-9_]*'
 
+# Local implementation of a topological sort. We use the same API that
+# the Python graphlib does, so that when QEMU moves forward to a
+# baseline of Python 3.9 or newer this code can all be dropped and
+# replaced with:
+#    from graphlib import TopologicalSorter, CycleError
+#
+# https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter
+#
+# We only implement the parts of TopologicalSorter we care about:
+#  ts = TopologicalSorter(graph=None)
+#    create the sorter. graph is a dictionary whose keys are
+#    nodes and whose values are lists of the predecessors of that node.
+#    (That is, if graph contains "A" -> ["B", "C"] then we must output
+#    B and C before A.)
+#  ts.static_order()
+#    returns a list of all the nodes in sorted order, or raises CycleError
+#  CycleError
+#    exception raised if there are cycles in the graph. The second
+#    element in the args attribute is a list of nodes which form a
+#    cycle; the first and last element are the same, eg [a, b, c, a]
+#    (Our implementation doesn't give the order correctly.)
+#
+# For our purposes we can assume that the data set is always small
+# (typically 10 nodes or less, actual links in the graph very rare),
+# so we don't need to worry about efficiency of implementation.
+#
+# The core of this implementation is from
+# https://code.activestate.com/recipes/578272-topological-sort/
+# (but updated to Python 3), and is under the MIT license.
+
+class CycleError(ValueError):
+    """Subclass of ValueError raised if cycles exist in the graph"""
+    pass
+
+class TopologicalSorter:
+    """Topologically sort a graph"""
+    def __init__(self, graph=None):
+        self.graph = graph
+
+    def static_order(self):
+        # We do the sort right here, unlike the stdlib version
+        from functools import reduce
+        data = {}
+        r = []
+
+        if not self.graph:
+            return []
+
+        # This code wants the values in the dict to be specifically sets
+        for k, v in self.graph.items():
+            data[k] = set(v)
+
+        # Find all items that don't depend on anything.
+        extra_items_in_deps = (reduce(set.union, data.values())
+                               - set(data.keys()))
+        # Add empty dependencies where needed
+        data.update({item:{} for item in extra_items_in_deps})
+        while True:
+            ordered = set(item for item, dep in data.items() if not dep)
+            if not ordered:
+                break
+            r.extend(ordered)
+            data = {item: (dep - ordered)
+                    for item, dep in data.items()
+                        if item not in ordered}
+        if data:
+            # This doesn't give as nice results as the stdlib, which
+            # gives you the cycle by listing the nodes in order. Here
+            # we only know the nodes in the cycle but not their order.
+            raise CycleError(f'nodes are in a cycle', list(data.keys()))
+
+        return r
+# end TopologicalSorter
+
 def error_with_file(file, lineno, *args):
     """Print an error message from file:line and args and exit."""
     global output_file
     global output_fd
 
+    # For the test suite expected-errors case, don't print the
+    # string "error: ", so they don't turn up as false positives
+    # if you grep the meson logs for strings like that.
+    end = 'error: ' if not testforerror else 'detected: '
     prefix = ''
     if file:
-        prefix += '{0}:'.format(file)
+        prefix += f'{file}:'
     if lineno:
-        prefix += '{0}:'.format(lineno)
+        prefix += f'{lineno}:'
     if prefix:
         prefix += ' '
-    print(prefix, end='error: ', file=sys.stderr)
+    print(prefix, end=end, file=sys.stderr)
     print(*args, file=sys.stderr)
 
     if output_file and output_fd:
         output_fd.close()
         os.remove(output_file)
-    exit(1)
+    exit(0 if testforerror else 1)
 # end error_with_file
 
 
@@ -101,6 +183,23 @@ def str_fields(fields):
     return r[1:]
 
 
+def whex(val):
+    """Return a hex string for val padded for insnwidth"""
+    global insnwidth
+    return f'0x{val:0{insnwidth // 4}x}'
+
+
+def whexC(val):
+    """Return a hex string for val padded for insnwidth,
+       and with the proper suffix for a C constant."""
+    suffix = ''
+    if val >= 0x100000000:
+        suffix = 'ull'
+    elif val >= 0x80000000:
+        suffix = 'u'
+    return whex(val) + suffix
+
+
 def str_match_bits(bits, mask):
     """Return a string pretty-printing BITS/MASK"""
     global insnwidth
@@ -146,11 +245,15 @@ def is_contiguous(bits):
         return -1
 
 
-def eq_fields_for_args(flds_a, flds_b):
-    if len(flds_a) != len(flds_b):
+def eq_fields_for_args(flds_a, arg):
+    if len(flds_a) != len(arg.fields):
         return False
+    # Only allow inference on default types
+    for t in arg.types:
+        if t != 'int':
+            return False
     for k, a in flds_a.items():
-        if k not in flds_b:
+        if k not in arg.fields:
             return False
     return True
 
@@ -182,12 +285,13 @@ class Field:
             s = ''
         return str(self.pos) + ':' + s + str(self.len)
 
-    def str_extract(self):
-        if self.sign:
-            extr = 'sextract32'
-        else:
-            extr = 'extract32'
-        return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
+    def str_extract(self, lvalue_formatter):
+        global bitop_width
+        s = 's' if self.sign else ''
+        return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
+
+    def referenced_fields(self):
+        return []
 
     def __eq__(self, other):
         return self.sign == other.sign and self.mask == other.mask
@@ -207,18 +311,25 @@ class MultiField:
     def __str__(self):
         return str(self.subs)
 
-    def str_extract(self):
+    def str_extract(self, lvalue_formatter):
+        global bitop_width
         ret = '0'
         pos = 0
         for f in reversed(self.subs):
+            ext = f.str_extract(lvalue_formatter)
             if pos == 0:
-                ret = f.str_extract()
+                ret = ext
             else:
-                ret = 'deposit32({0}, {1}, {2}, {3})' \
-                      .format(ret, pos, 32 - pos, f.str_extract())
+                ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
             pos += f.len
         return ret
 
+    def referenced_fields(self):
+        l = []
+        for f in self.subs:
+            l.extend(f.referenced_fields())
+        return l
+
     def __ne__(self, other):
         if len(self.subs) != len(other.subs):
             return True
@@ -242,9 +353,12 @@ class ConstField:
     def __str__(self):
         return str(self.value)
 
-    def str_extract(self):
+    def str_extract(self, lvalue_formatter):
         return str(self.value)
 
+    def referenced_fields(self):
+        return []
+
     def __cmp__(self, other):
         return self.value - other.value
 # end ConstField
@@ -261,8 +375,12 @@ class FunctionField:
     def __str__(self):
         return self.func + '(' + str(self.base) + ')'
 
-    def str_extract(self):
-        return self.func + '(ctx, ' + self.base.str_extract() + ')'
+    def str_extract(self, lvalue_formatter):
+        return (self.func + '(ctx, '
+                + self.base.str_extract(lvalue_formatter) + ')')
+
+    def referenced_fields(self):
+        return self.base.referenced_fields()
 
     def __eq__(self, other):
         return self.func == other.func and self.base == other.base
@@ -282,9 +400,12 @@ class ParameterField:
     def __str__(self):
         return self.func
 
-    def str_extract(self):
+    def str_extract(self, lvalue_formatter):
         return self.func + '(ctx)'
 
+    def referenced_fields(self):
+        return []
+
     def __eq__(self, other):
         return self.func == other.func
 
@@ -292,13 +413,40 @@ class ParameterField:
         return not self.__eq__(other)
 # end ParameterField
 
+class NamedField:
+    """Class representing a field already named in the pattern"""
+    def __init__(self, name, sign, len):
+        self.mask = 0
+        self.sign = sign
+        self.len = len
+        self.name = name
+
+    def __str__(self):
+        return self.name
+
+    def str_extract(self, lvalue_formatter):
+        global bitop_width
+        s = 's' if self.sign else ''
+        lvalue = lvalue_formatter(self.name)
+        return f'{s}extract{bitop_width}({lvalue}, 0, {self.len})'
+
+    def referenced_fields(self):
+        return [self.name]
+
+    def __eq__(self, other):
+        return self.name == other.name
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+# end NamedField
 
 class Arguments:
     """Class representing the extracted fields of a format"""
-    def __init__(self, nm, flds, extern):
+    def __init__(self, nm, flds, types, extern):
         self.name = nm
         self.extern = extern
-        self.fields = sorted(flds)
+        self.fields = flds
+        self.types = types
 
     def __str__(self):
         return self.name + ' ' + str(self.fields)
@@ -309,12 +457,11 @@ class Arguments:
     def output_def(self):
         if not self.extern:
             output('typedef struct {\n')
-            for n in self.fields:
-                output('    int ', n, ';\n')
+            for (n, t) in zip(self.fields, self.types):
+                output(f'    {t} {n};\n')
             output('} ', self.struct_name(), ';\n\n')
 # end Arguments
 
-
 class General:
     """Common code between instruction formats and instruction patterns"""
     def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
@@ -328,12 +475,59 @@ class General:
         self.fieldmask = fldm
         self.fields = flds
         self.width = w
+        self.dangling = None
 
     def __str__(self):
         return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
 
     def str1(self, i):
         return str_indent(i) + self.__str__()
+
+    def dangling_references(self):
+        # Return a list of all named references which aren't satisfied
+        # directly by this format/pattern. This will be either:
+        #  * a format referring to a field which is specified by the
+        #    pattern(s) using it
+        #  * a pattern referring to a field which is specified by the
+        #    format it uses
+        #  * a user error (referring to a field that doesn't exist at all)
+        if self.dangling is None:
+            # Compute this once and cache the answer
+            dangling = []
+            for n, f in self.fields.items():
+                for r in f.referenced_fields():
+                    if r not in self.fields:
+                        dangling.append(r)
+            self.dangling = dangling
+        return self.dangling
+
+    def output_fields(self, indent, lvalue_formatter):
+        # We use a topological sort to ensure that any use of NamedField
+        # comes after the initialization of the field it is referencing.
+        graph = {}
+        for n, f in self.fields.items():
+            refs = f.referenced_fields()
+            graph[n] = refs
+
+        try:
+            ts = TopologicalSorter(graph)
+            for n in ts.static_order():
+                # We only want to emit assignments for the keys
+                # in our fields list, not for anything that ends up
+                # in the tsort graph only because it was referenced as
+                # a NamedField.
+                try:
+                    f = self.fields[n]
+                    output(indent, lvalue_formatter(n), ' = ',
+                           f.str_extract(lvalue_formatter), ';\n')
+                except KeyError:
+                    pass
+        except CycleError as e:
+            # The second element of args is a list of nodes which form
+            # a cycle (there might be others too, but only one is reported).
+            # Pretty-print it to tell the user.
+            cycle = ' => '.join(e.args[1])
+            error(self.lineno, 'field definitions form a cycle: ' + cycle)
 # end General
 
 
@@ -347,8 +541,7 @@ class Format(General):
     def output_extract(self):
         output('static void ', self.extract_name(), '(DisasContext *ctx, ',
                self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
-        for n, f in self.fields.items():
-            output('    a->', n, ' = ', f.str_extract(), ';\n')
+        self.output_fields(str_indent(4), lambda n: 'a->' + n)
         output('}\n\n')
 # end Format
 
@@ -369,11 +562,36 @@ class Pattern(General):
         ind = str_indent(i)
         arg = self.base.base.name
         output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
+        # We might have named references in the format that refer to fields
+        # in the pattern, or named references in the pattern that refer
+        # to fields in the format. This affects whether we extract the fields
+        # for the format before or after the ones for the pattern.
+        # For simplicity we don't allow cross references in both directions.
+        # This is also where we catch the syntax error of referring to
+        # a nonexistent field.
+        fmt_refs = self.base.dangling_references()
+        for r in fmt_refs:
+            if r not in self.fields:
+                error(self.lineno, f'format refers to undefined field {r}')
+        pat_refs = self.dangling_references()
+        for r in pat_refs:
+            if r not in self.base.fields:
+                error(self.lineno, f'pattern refers to undefined field {r}')
+        if pat_refs and fmt_refs:
+            error(self.lineno, ('pattern that uses fields defined in format '
+                                'cannot use format that uses fields defined '
+                                'in pattern'))
+        if fmt_refs:
+            # pattern fields first
+            self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
+            assert not extracted, "dangling fmt refs but it was already extracted"
         if not extracted:
             output(ind, self.base.extract_name(),
                    '(ctx, &u.f_', arg, ', insn);\n')
-        for n, f in self.fields.items():
-            output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
+        if not fmt_refs:
+            # pattern fields last
+            self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
+
         output(ind, 'if (', translate_prefix, '_', self.name,
                '(ctx, &u.f_', arg, ')) return true;\n')
 
@@ -450,7 +668,7 @@ class MultiPattern(General):
 
     def prop_format(self):
         for p in self.pats:
-            p.build_tree()
+            p.prop_format()
 
     def prop_width(self):
         width = None
@@ -476,15 +694,18 @@ class IncMultiPattern(MultiPattern):
             if outermask != p.fixedmask:
                 innermask = p.fixedmask & ~outermask
                 innerbits = p.fixedbits & ~outermask
-                output(ind, 'if ((insn & ',
-                       '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
-                       ') {\n')
-                output(ind, '    /* ',
-                       str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
+                output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
+                output(ind, f'    /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
                 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
                 output(ind, '}\n')
             else:
                 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
+
+    def build_tree(self):
+        if not self.pats:
+            error_with_file(self.file, self.lineno, 'empty pattern group')
+        super().build_tree()
+
 #end IncMultiPattern
 
 
@@ -499,12 +720,12 @@ class Tree:
 
     def str1(self, i):
         ind = str_indent(i)
-        r = '{0}{1:08x}'.format(ind, self.fixedmask)
+        r = ind + whex(self.fixedmask)
         if self.format:
             r += ' ' + self.format.name
         r += ' [\n'
         for (b, s) in self.subs:
-            r += '{0}  {1:08x}:\n'.format(ind, b)
+            r += ind + f'  {whex(b)}:\n'
             r += s.str1(i + 4) + '\n'
         r += ind + ']'
         return r
@@ -516,8 +737,10 @@ class Tree:
         ind = str_indent(i)
 
         # If we identified all nodes below have the same format,
-        # extract the fields now.
-        if not extracted and self.base:
+        # extract the fields now. But don't do it if the format relies
+        # on named fields from the insn pattern, as those won't have
+        # been initialised at this point.
+        if not extracted and self.base and not self.base.dangling_references():
             output(ind, self.base.extract_name(),
                    '(ctx, &u.f_', self.base.base.name, ', insn);\n')
             extracted = True
@@ -528,16 +751,16 @@ class Tree:
         if sh > 0:
             # Propagate SH down into the local functions.
             def str_switch(b, sh=sh):
-                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
+                return f'(insn >> {sh}) & {b >> sh:#x}'
 
             def str_case(b, sh=sh):
-                return '0x{0:x}'.format(b >> sh)
+                return hex(b >> sh)
         else:
             def str_switch(b):
-                return 'insn & 0x{0:08x}'.format(b)
+                return f'insn & {whexC(b)}'
 
             def str_case(b):
-                return '0x{0:08x}'.format(b)
+                return whexC(b)
 
         output(ind, 'switch (', str_switch(self.thismask), ') {\n')
         for b, s in sorted(self.subs):
@@ -603,7 +826,7 @@ class ExcMultiPattern(MultiPattern):
         return t
 
     def build_tree(self):
-        super().prop_format()
+        super().build_tree()
         self.tree = self.__build_tree(self.pats, self.fixedbits,
                                       self.fixedmask)
 
@@ -639,6 +862,7 @@ def parse_field(lineno, name, toks):
     """Parse one instruction field from TOKS at LINENO"""
     global fields
     global insnwidth
+    global re_C_ident
 
     # A "simple" field will have only one entry;
     # a "multifield" will have several.
@@ -653,6 +877,25 @@ def parse_field(lineno, name, toks):
             func = func[1]
             continue
 
+        if re.fullmatch(re_C_ident + ':s[0-9]+', t):
+            # Signed named field
+            subtoks = t.split(':')
+            n = subtoks[0]
+            le = int(subtoks[1])
+            f = NamedField(n, True, le)
+            subs.append(f)
+            width += le
+            continue
+        if re.fullmatch(re_C_ident + ':[0-9]+', t):
+            # Unsigned named field
+            subtoks = t.split(':')
+            n = subtoks[0]
+            le = int(subtoks[1])
+            f = NamedField(n, False, le)
+            subs.append(f)
+            width += le
+            continue
+
         if re.fullmatch('[0-9]+:s[0-9]+', t):
             # Signed field extract
             subtoks = t.split(':s')
@@ -662,11 +905,11 @@ def parse_field(lineno, name, toks):
             subtoks = t.split(':')
             sign = False
         else:
-            error(lineno, 'invalid field token "{0}"'.format(t))
+            error(lineno, f'invalid field token "{t}"')
         po = int(subtoks[0])
         le = int(subtoks[1])
         if po + le > insnwidth:
-            error(lineno, 'field {0} too large'.format(t))
+            error(lineno, f'field {t} too large')
         f = Field(sign, po, le)
         subs.append(f)
         width += le
@@ -704,21 +947,27 @@ def parse_arguments(lineno, name, toks):
     global anyextern
 
     flds = []
+    types = []
     extern = False
-    for t in toks:
-        if re.fullmatch('!extern', t):
+    for n in toks:
+        if re.fullmatch('!extern', n):
             extern = True
             anyextern = True
             continue
-        if not re.fullmatch(re_C_ident, t):
-            error(lineno, 'invalid argument set token "{0}"'.format(t))
-        if t in flds:
-            error(lineno, 'duplicate argument "{0}"'.format(t))
-        flds.append(t)
+        if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
+            (n, t) = n.split(':')
+        elif re.fullmatch(re_C_ident, n):
+            t = 'int'
+        else:
+            error(lineno, f'invalid argument set token "{n}"')
+        if n in flds:
+            error(lineno, f'duplicate argument "{n}"')
+        flds.append(n)
+        types.append(t)
 
     if name in arguments:
         error(lineno, 'duplicate argument set', name)
-    arguments[name] = Arguments(name, flds, extern)
+    arguments[name] = Arguments(name, flds, types, extern)
 # end parse_arguments
 
 
@@ -745,11 +994,11 @@ def infer_argument_set(flds):
     global decode_function
 
     for arg in arguments.values():
-        if eq_fields_for_args(flds, arg.fields):
+        if eq_fields_for_args(flds, arg):
             return arg
 
     name = decode_function + str(len(arguments))
-    arg = Arguments(name, flds.keys(), False)
+    arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
     arguments[name] = arg
     return arg
 
@@ -882,14 +1131,14 @@ def parse_generic(lineno, parent_pat, name, toks):
                 flen = flen[1:]
             shift = int(flen, 10)
             if shift + width > insnwidth:
-                error(lineno, 'field {0} exceeds insnwidth'.format(fname))
+                error(lineno, f'field {fname} exceeds insnwidth')
             f = Field(sign, insnwidth - width - shift, shift)
             flds = add_field(lineno, flds, fname, f)
             fixedbits <<= shift
             fixedmask <<= shift
             undefmask <<= shift
         else:
-            error(lineno, 'invalid token "{0}"'.format(t))
+            error(lineno, f'invalid token "{t}"')
         width += shift
 
     if variablewidth and width < insnwidth and width % 8 == 0:
@@ -901,7 +1150,7 @@ def parse_generic(lineno, parent_pat, name, toks):
 
     # We should have filled in all of the bits of the instruction.
     elif not (is_format and width == 0) and width != insnwidth:
-        error(lineno, 'definition has {0} bits'.format(width))
+        error(lineno, f'definition has {width} bits')
 
     # Do not check for fields overlapping fields; one valid usage
     # is to be able to duplicate fields via import.
@@ -919,8 +1168,7 @@ def parse_generic(lineno, parent_pat, name, toks):
         if arg:
             for f in flds.keys():
                 if f not in arg.fields:
-                    error(lineno, 'field {0} not in argument set {1}'
-                                  .format(f, arg.name))
+                    error(lineno, f'field {f} not in argument set {arg.name}')
         else:
             arg = infer_argument_set(flds)
         if name in formats:
@@ -947,13 +1195,12 @@ def parse_generic(lineno, parent_pat, name, toks):
         arg = fmt.base
         for f in flds.keys():
             if f not in arg.fields:
-                error(lineno, 'field {0} not in argument set {1}'
-                              .format(f, arg.name))
+                error(lineno, f'field {f} not in argument set {arg.name}')
             if f in fmt.fields.keys():
-                error(lineno, 'field {0} set by format and pattern'.format(f))
+                error(lineno, f'field {f} set by format and pattern')
         for f in arg.fields:
             if f not in flds.keys() and f not in fmt.fields.keys():
-                error(lineno, 'field {0} not initialized'.format(f))
+                error(lineno, f'field {f} not initialized')
         pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
                       undefmask, fieldmask, flds, width)
         parent_pat.pats.append(pat)
@@ -961,19 +1208,19 @@ def parse_generic(lineno, parent_pat, name, toks):
 
     # Validate the masks that we have assembled.
     if fieldmask & fixedmask:
-        error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
-                      .format(fieldmask, fixedmask))
+        error(lineno, 'fieldmask overlaps fixedmask ',
+              f'({whex(fieldmask)} & {whex(fixedmask)})')
     if fieldmask & undefmask:
-        error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
-                      .format(fieldmask, undefmask))
+        error(lineno, 'fieldmask overlaps undefmask ',
+              f'({whex(fieldmask)} & {whex(undefmask)})')
     if fixedmask & undefmask:
-        error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
-                      .format(fixedmask, undefmask))
+        error(lineno, 'fixedmask overlaps undefmask ',
+              f'({whex(fixedmask)} & {whex(undefmask)})')
     if not is_format:
         allbits = fieldmask | fixedmask | undefmask
         if allbits != insnmask:
-            error(lineno, 'bits left unspecified (0x{0:08x})'
-                          .format(allbits ^ insnmask))
+            error(lineno, 'bits left unspecified ',
+                  f'({whex(allbits ^ insnmask)})')
 # end parse_general
 
 
@@ -1084,7 +1331,7 @@ def parse_file(f, parent_pat):
         elif re.fullmatch(re_pat_ident, name):
             parse_generic(start_lineno, parent_pat, name, toks)
         else:
-            error(lineno, 'invalid token "{0}"'.format(name))
+            error(lineno, f'invalid token "{name}"')
         toks = []
 
     if nesting != 0:
@@ -1103,10 +1350,9 @@ class SizeTree:
 
     def str1(self, i):
         ind = str_indent(i)
-        r = '{0}{1:08x}'.format(ind, self.mask)
-        r += ' [\n'
+        r = ind + whex(self.mask) + ' [\n'
         for (b, s) in self.subs:
-            r += '{0}  {1:08x}:\n'.format(ind, b)
+            r += ind + f'  {whex(b)}:\n'
             r += s.str1(i + 4) + '\n'
         r += ind + ']'
         return r
@@ -1119,9 +1365,8 @@ class SizeTree:
 
         # If we need to load more bytes to test, do so now.
         if extracted < self.width:
-            output(ind, 'insn = ', decode_function,
-                   '_load_bytes(ctx, insn, {0}, {1});\n'
-                   .format(extracted // 8, self.width // 8));
+            output(ind, f'insn = {decode_function}_load_bytes',
+                   f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
             extracted = self.width
 
         # Attempt to aid the compiler in producing compact switch statements.
@@ -1130,16 +1375,16 @@ class SizeTree:
         if sh > 0:
             # Propagate SH down into the local functions.
             def str_switch(b, sh=sh):
-                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
+                return f'(insn >> {sh}) & {b >> sh:#x}'
 
             def str_case(b, sh=sh):
-                return '0x{0:x}'.format(b >> sh)
+                return hex(b >> sh)
         else:
             def str_switch(b):
-                return 'insn & 0x{0:08x}'.format(b)
+                return f'insn & {whexC(b)}'
 
             def str_case(b):
-                return '0x{0:08x}'.format(b)
+                return whexC(b)
 
         output(ind, 'switch (', str_switch(self.mask), ') {\n')
         for b, s in sorted(self.subs):
@@ -1161,8 +1406,7 @@ class SizeLeaf:
         self.width = w
 
     def str1(self, i):
-        ind = str_indent(i)
-        return '{0}{1:08x}'.format(ind, self.mask)
+        return str_indent(i) + whex(self.mask)
 
     def __str__(self):
         return self.str1(0)
@@ -1173,9 +1417,8 @@ class SizeLeaf:
 
         # If we need to load more bytes, do so now.
         if extracted < self.width:
-            output(ind, 'insn = ', decode_function,
-                   '_load_bytes(ctx, insn, {0}, {1});\n'
-                   .format(extracted // 8, self.width // 8));
+            output(ind, f'insn = {decode_function}_load_bytes',
+                   f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
             extracted = self.width
         output(ind, 'return insn;\n')
 # end SizeLeaf
@@ -1209,7 +1452,7 @@ def build_size_tree(pats, width, outerbits, outermask):
         for p in pats:
             pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
         error_with_file(pats[0].file, pats[0].lineno,
-                        'overlapping patterns size {0}:'.format(width), pnames)
+                        f'overlapping patterns size {width}:', pnames)
 
     bins = {}
     for i in pats:
@@ -1258,18 +1501,22 @@ def main():
     global translate_prefix
     global output_fd
     global output_file
+    global output_null
     global input_file
     global insnwidth
     global insntype
     global insnmask
     global decode_function
+    global bitop_width
     global variablewidth
     global anyextern
+    global testforerror
 
     decode_scope = 'static '
 
     long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
-                 'static-decode=', 'varinsnwidth=']
+                 'static-decode=', 'varinsnwidth=', 'test-for-error',
+                 'output-null']
     try:
         (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
     except getopt.GetoptError as err:
@@ -1292,8 +1539,16 @@ def main():
             if insnwidth == 16:
                 insntype = 'uint16_t'
                 insnmask = 0xffff
+            elif insnwidth == 64:
+                insntype = 'uint64_t'
+                insnmask = 0xffffffffffffffff
+                bitop_width = 64
             elif insnwidth != 32:
                 error(0, 'cannot handle insns of width', insnwidth)
+        elif o == '--test-for-error':
+            testforerror = True
+        elif o == '--output-null':
+            output_null = True
         else:
             assert False, 'unhandled option'
 
@@ -1304,7 +1559,7 @@ def main():
 
     for filename in args:
         input_file = filename
-        f = open(filename, 'r')
+        f = open(filename, 'rt', encoding='utf-8')
         parse_file(f, toppat)
         f.close()
 
@@ -1323,10 +1578,14 @@ def main():
         stree = build_size_tree(toppat.pats, 8, 0, 0)
         prop_size(stree)
 
-    if output_file:
-        output_fd = open(output_file, 'w')
+    if output_null:
+        output_fd = open(os.devnull, 'wt', encoding='utf-8', errors="ignore")
+    elif output_file:
+        output_fd = open(output_file, 'wt', encoding='utf-8')
     else:
-        output_fd = sys.stdout
+        output_fd = io.TextIOWrapper(sys.stdout.buffer,
+                                     encoding=sys.stdout.encoding,
+                                     errors="ignore")
 
     output_autogen()
     for n in sorted(arguments.keys()):
@@ -1390,6 +1649,7 @@ def main():
 
     if output_file:
         output_fd.close()
+    exit(1 if testforerror else 0)
 # end main
 
 
old mode 100755 (executable)
new mode 100644 (file)
index 0411866..da8b56e
@@ -33,26 +33,35 @@ import re
 import random
 import argparse
 from itertools import chain
-
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
-from qemu.machine import QEMUMachine
+from pathlib import Path
+
+try:
+    from qemu.machine import QEMUMachine
+    from qemu.qmp import ConnectError
+except ModuleNotFoundError as exc:
+    path = Path(__file__).resolve()
+    print(f"Module '{exc.name}' not found.")
+    print("  Try 'make check-venv' from your build directory,")
+    print("  and then one way to run this script is like so:")
+    print(f'  > $builddir/pyvenv/bin/python3 "{path}"')
+    sys.exit(1)
 
 logger = logging.getLogger('device-crash-test')
 dbg = logger.debug
 
 
-# Purposes of the following whitelist:
+# Purposes of the following rule list:
 # * Avoiding verbose log messages when we find known non-fatal
 #   (exitcode=1) errors
 # * Avoiding fatal errors when we find known crashes
 # * Skipping machines/devices that are known not to work out of
 #   the box, when running in --quick mode
 #
-# Keeping the whitelist updated is desirable, but not required,
+# Keeping the rule list updated is desirable, but not required,
 # because unexpected cases where QEMU exits with exitcode=1 will
 # just trigger a INFO message.
 
-# Valid whitelist entry keys:
+# Valid error rule keys:
 # * accel: regexp, full match only
 # * machine: regexp, full match only
 # * device: regexp, full match only
@@ -62,7 +71,7 @@ dbg = logger.debug
 # * expected: if True, QEMU is expected to always fail every time
 #   when testing the corresponding test case
 # * loglevel: log level of log output when there's a match.
-ERROR_WHITELIST = [
+ERROR_RULE_LIST = [
     # Machines that won't work out of the box:
     #             MACHINE                         | ERROR MESSAGE
     {'machine':'niagara', 'expected':True},       # Unable to load a firmware for -M niagara
@@ -76,7 +85,6 @@ ERROR_WHITELIST = [
     {'device':'ics', 'expected':True},                     # ics_base_realize: required link 'xics' not found: Property '.xics' not found
     # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True
     {'device':'ide-cd'},                                 # No drive specified
-    {'device':'ide-drive', 'expected':True},               # No drive specified
     {'device':'ide-hd', 'expected':True},                  # No drive specified
     {'device':'ipmi-bmc-extern', 'expected':True},         # IPMI external bmc requires chardev attribute
     {'device':'isa-debugcon', 'expected':True},            # Can't create serial device, empty char device
@@ -93,8 +101,8 @@ ERROR_WHITELIST = [
     {'device':'pci-bridge', 'expected':True},              # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
     {'device':'pci-bridge-seat', 'expected':True},         # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
     {'device':'pxb', 'expected':True},                     # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
+    {'device':'pxb-cxl', 'expected':True},                 # pxb-cxl devices cannot reside on a PCI bus.
     {'device':'scsi-block', 'expected':True},              # drive property not set
-    {'device':'scsi-disk', 'expected':True},               # drive property not set
     {'device':'scsi-generic', 'expected':True},            # drive property not set
     {'device':'scsi-hd', 'expected':True},                 # drive property not set
     {'device':'spapr-pci-host-bridge', 'expected':True},   # BUID not specified for PHB
@@ -186,65 +194,65 @@ ERROR_WHITELIST = [
 ]
 
 
-def whitelistTestCaseMatch(wl, t):
-    """Check if a test case specification can match a whitelist entry
+def errorRuleTestCaseMatch(rule, t):
+    """Check if a test case specification can match a error rule
 
-    This only checks if a whitelist entry is a candidate match
+    This only checks if a error rule is a candidate match
     for a given test case, it won't check if the test case
-    results/output match the entry.  See whitelistResultMatch().
+    results/output match the rule.  See ruleListResultMatch().
     """
-    return (('machine' not in wl or
+    return (('machine' not in rule or
              'machine' not in t or
-             re.match(wl['machine'] + '$', t['machine'])) and
-            ('accel' not in wl or
+             re.match(rule['machine'] + '$', t['machine'])) and
+            ('accel' not in rule or
              'accel' not in t or
-             re.match(wl['accel'] + '$', t['accel'])) and
-            ('device' not in wl or
+             re.match(rule['accel'] + '$', t['accel'])) and
+            ('device' not in rule or
              'device' not in t or
-             re.match(wl['device'] + '$', t['device'])))
+             re.match(rule['device'] + '$', t['device'])))
 
 
-def whitelistCandidates(t):
+def ruleListCandidates(t):
     """Generate the list of candidates that can match a test case"""
-    for i, wl in enumerate(ERROR_WHITELIST):
-        if whitelistTestCaseMatch(wl, t):
-            yield (i, wl)
+    for i, rule in enumerate(ERROR_RULE_LIST):
+        if errorRuleTestCaseMatch(rule, t):
+            yield (i, rule)
 
 
 def findExpectedResult(t):
-    """Check if there's an expected=True whitelist entry for a test case
+    """Check if there's an expected=True error rule for a test case
 
-    Returns (i, wl) tuple, where i is the index in
-    ERROR_WHITELIST and wl is the whitelist entry itself.
+    Returns (i, rule) tuple, where i is the index in
+    ERROR_RULE_LIST and rule is the error rule itself.
     """
-    for i, wl in whitelistCandidates(t):
-        if wl.get('expected'):
-            return (i, wl)
+    for i, rule in ruleListCandidates(t):
+        if rule.get('expected'):
+            return (i, rule)
 
 
-def whitelistResultMatch(wl, r):
-    """Check if test case results/output match a whitelist entry
+def ruleListResultMatch(rule, r):
+    """Check if test case results/output match a error rule
 
     It is valid to call this function only if
-    whitelistTestCaseMatch() is True for the entry (e.g. on
-    entries returned by whitelistCandidates())
+    errorRuleTestCaseMatch() is True for the rule (e.g. on
+    rules returned by ruleListCandidates())
     """
-    assert whitelistTestCaseMatch(wl, r['testcase'])
-    return ((wl.get('exitcode', 1) is None or
-             r['exitcode'] == wl.get('exitcode', 1)) and
-            ('log' not in wl or
-             re.search(wl['log'], r['log'], re.MULTILINE)))
+    assert errorRuleTestCaseMatch(rule, r['testcase'])
+    return ((rule.get('exitcode', 1) is None or
+             r['exitcode'] == rule.get('exitcode', 1)) and
+            ('log' not in rule or
+             re.search(rule['log'], r['log'], re.MULTILINE)))
 
 
-def checkResultWhitelist(r):
-    """Look up whitelist entry for a given test case result
+def checkResultRuleList(r):
+    """Look up error rule for a given test case result
 
-    Returns (i, wl) tuple, where i is the index in
-    ERROR_WHITELIST and wl is the whitelist entry itself.
+    Returns (i, rule) tuple, where i is the index in
+    ERROR_RULE_LIST and rule is the error rule itself.
     """
-    for i, wl in whitelistCandidates(r['testcase']):
-        if whitelistResultMatch(wl, r):
-            return i, wl
+    for i, rule in ruleListCandidates(r['testcase']):
+        if ruleListResultMatch(rule, r):
+            return i, rule
 
     raise Exception("this should never happen")
 
@@ -261,14 +269,14 @@ def formatTestCase(t):
 
 def qomListTypeNames(vm, **kwargs):
     """Run qom-list-types QMP command, return type names"""
-    types = vm.command('qom-list-types', **kwargs)
+    types = vm.cmd('qom-list-types', **kwargs)
     return [t['name'] for t in types]
 
 
 def infoQDM(vm):
     """Parse 'info qdm' output"""
     args = {'command-line': 'info qdm'}
-    devhelp = vm.command('human-monitor-command', **args)
+    devhelp = vm.cmd('human-monitor-command', **args)
     for l in devhelp.split('\n'):
         l = l.strip()
         if l == '' or l.endswith(':'):
@@ -296,9 +304,9 @@ class QemuBinaryInfo(object):
             # there's no way to query DeviceClass::user_creatable using QMP,
             # so use 'info qdm':
             self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']])
-            self.machines = list(m['name'] for m in vm.command('query-machines'))
+            self.machines = list(m['name'] for m in vm.cmd('query-machines'))
             self.user_devs = self.alldevs.difference(self.no_user_devs)
-            self.kvm_available = vm.command('query-kvm')['enabled']
+            self.kvm_available = vm.cmd('query-kvm')['enabled']
         finally:
             vm.shutdown()
 
@@ -318,9 +326,7 @@ class QemuBinaryInfo(object):
         try:
             vm.launch()
             mi['runnable'] = True
-        except KeyboardInterrupt:
-            raise
-        except:
+        except Exception:
             dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info())
             dbg("log: %r", vm.get_log())
             mi['runnable'] = False
@@ -356,14 +362,14 @@ def checkOneCase(args, testcase):
             '-device', qemuOptsEscape(device)]
     cmdline = ' '.join([binary] + args)
     dbg("will launch QEMU: %s", cmdline)
-    vm = QEMUMachine(binary=binary, args=args)
+    vm = QEMUMachine(binary=binary, args=args, qmp_timer=15)
 
+    exc = None
     exc_traceback = None
     try:
         vm.launch()
-    except KeyboardInterrupt:
-        raise
-    except:
+    except Exception as this_exc:
+        exc = this_exc
         exc_traceback = traceback.format_exc()
         dbg("Exception while running test case")
     finally:
@@ -371,8 +377,9 @@ def checkOneCase(args, testcase):
         ec = vm.exitcode()
         log = vm.get_log()
 
-    if exc_traceback is not None or ec != 0:
-        return {'exc_traceback':exc_traceback,
+    if exc is not None or ec != 0:
+        return {'exc': exc,
+                'exc_traceback':exc_traceback,
                 'exitcode':ec,
                 'log':log,
                 'testcase':testcase,
@@ -390,7 +397,7 @@ def binariesToTest(args, testcase):
 
 
 def accelsToTest(args, testcase):
-    if getBinaryInfo(args, testcase['binary']).kvm_available:
+    if getBinaryInfo(args, testcase['binary']).kvm_available and not args.tcg_only:
         yield 'kvm'
     yield 'tcg'
 
@@ -460,6 +467,17 @@ def logFailure(f, level):
     for l in f['log'].strip().split('\n'):
         logger.log(level, "log: %s", l)
     logger.log(level, "exit code: %r", f['exitcode'])
+
+    # If the Exception is merely a QMP connect error,
+    # reduce the logging level for its traceback to
+    # improve visual clarity.
+    if isinstance(f.get('exc'), ConnectError):
+        logger.log(level, "%s.%s: %s",
+                   type(f['exc']).__module__,
+                   type(f['exc']).__qualname__,
+                   str(f['exc']))
+        level = logging.DEBUG
+
     if f['exc_traceback']:
         logger.log(level, "exception:")
         for l in f['exc_traceback'].split('\n'):
@@ -492,6 +510,8 @@ def main():
                         help="Full mode: test cases that are expected to fail")
     parser.add_argument('--strict', action='store_true', dest='strict',
                         help="Treat all warnings as fatal")
+    parser.add_argument('--tcg-only', action='store_true', dest='tcg_only',
+                        help="Only test with TCG accelerator")
     parser.add_argument('qemu', nargs='*', metavar='QEMU',
                         help='QEMU binary to run')
     args = parser.parse_args()
@@ -504,6 +524,12 @@ def main():
         lvl = logging.WARN
     logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s')
 
+    if not args.debug:
+        # Async QMP, when in use, is chatty about connection failures.
+        # This script knowingly generates a ton of connection errors.
+        # Silence this logger.
+        logging.getLogger('qemu.qmp.qmp_client').setLevel(logging.CRITICAL)
+
     fatal_failures = []
     wl_stats = {}
     skipped = 0
@@ -543,12 +569,12 @@ def main():
             break
 
         if f:
-            i, wl = checkResultWhitelist(f)
-            dbg("testcase: %r, whitelist match: %r", t, wl)
+            i, rule = checkResultRuleList(f)
+            dbg("testcase: %r, rule list match: %r", t, rule)
             wl_stats.setdefault(i, []).append(f)
-            level = wl.get('loglevel', logging.DEBUG)
+            level = rule.get('loglevel', logging.DEBUG)
             logFailure(f, level)
-            if wl.get('fatal') or (args.strict and level >= logging.WARN):
+            if rule.get('fatal') or (args.strict and level >= logging.WARN):
                 fatal_failures.append(f)
         else:
             dbg("success: %s", formatTestCase(t))
@@ -560,10 +586,10 @@ def main():
         logger.info("Skipped %d test cases", skipped)
 
     if args.debug:
-        stats = sorted([(len(wl_stats.get(i, [])), wl) for i, wl in
-                         enumerate(ERROR_WHITELIST)], key=lambda x: x[0])
-        for count, wl in stats:
-            dbg("whitelist entry stats: %d: %r", count, wl)
+        stats = sorted([(len(wl_stats.get(i, [])), rule) for i, rule in
+                         enumerate(ERROR_RULE_LIST)], key=lambda x: x[0])
+        for count, rule in stats:
+            dbg("error rule stats: %d: %r", count, rule)
 
     if fatal_failures:
         for f in fatal_failures:
old mode 100755 (executable)
new mode 100644 (file)
diff --git a/scripts/entitlement.sh b/scripts/entitlement.sh
new file mode 100755 (executable)
index 0000000..0f41294
--- /dev/null
@@ -0,0 +1,33 @@
+#!/bin/sh -e
+#
+# Helper script for the build process to apply entitlements
+
+in_place=:
+if [ "$1" = --install ]; then
+  shift
+  in_place=false
+fi
+
+DST="$1"
+SRC="$2"
+ICON="$3"
+ENTITLEMENT="$4"
+
+if $in_place; then
+  trap 'rm "$DST.tmp"' exit
+  cp -pPf "$SRC" "$DST.tmp"
+  SRC="$DST.tmp"
+else
+  cd "$MESON_INSTALL_DESTDIR_PREFIX"
+fi
+
+if test -n "$ENTITLEMENT"; then
+  codesign --entitlements "$ENTITLEMENT" --force -s - "$SRC"
+fi
+
+# Add the QEMU icon to the binary on Mac OS
+Rez -append "$ICON" -o "$SRC"
+SetFile -a C "$SRC"
+
+mv -f "$SRC" "$DST"
+trap '' exit
old mode 100755 (executable)
new mode 100644 (file)
diff --git a/scripts/feature_to_c.py b/scripts/feature_to_c.py
new file mode 100644 (file)
index 0000000..e04d6b2
--- /dev/null
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import os, sys, xml.etree.ElementTree
+
+def writeliteral(indent, bytes):
+    sys.stdout.write(' ' * indent)
+    sys.stdout.write('"')
+    quoted = True
+
+    for c in bytes:
+        if not quoted:
+            sys.stdout.write('\n')
+            sys.stdout.write(' ' * indent)
+            sys.stdout.write('"')
+            quoted = True
+
+        if c == b'"'[0]:
+            sys.stdout.write('\\"')
+        elif c == b'\\'[0]:
+            sys.stdout.write('\\\\')
+        elif c == b'\n'[0]:
+            sys.stdout.write('\\n"')
+            quoted = False
+        elif c >= 32 and c < 127:
+            sys.stdout.write(c.to_bytes(1, 'big').decode())
+        else:
+            sys.stdout.write(f'\{c:03o}')
+
+    if quoted:
+        sys.stdout.write('"')
+
+sys.stdout.write('#include "qemu/osdep.h"\n' \
+                 '#include "exec/gdbstub.h"\n' \
+                 '\n'
+                 'const GDBFeature gdb_static_features[] = {\n')
+
+for input in sys.argv[1:]:
+    with open(input, 'rb') as file:
+        read = file.read()
+
+    parser = xml.etree.ElementTree.XMLPullParser(['start', 'end'])
+    parser.feed(read)
+    events = parser.read_events()
+    event, element = next(events)
+    if event != 'start':
+        sys.stderr.write(f'unexpected event: {event}\n')
+        exit(1)
+    if element.tag != 'feature':
+        sys.stderr.write(f'unexpected start tag: {element.tag}\n')
+        exit(1)
+
+    regnum = 0
+    regnums = []
+    tags = ['feature']
+    for event, element in events:
+        if event == 'end':
+            if element.tag != tags[len(tags) - 1]:
+                sys.stderr.write(f'unexpected end tag: {element.tag}\n')
+                exit(1)
+
+            tags.pop()
+            if element.tag == 'feature':
+                break
+        elif event == 'start':
+            if len(tags) < 2 and element.tag == 'reg':
+                if 'regnum' in element.attrib:
+                    regnum = int(element.attrib['regnum'])
+
+                regnums.append(regnum)
+                regnum += 1
+
+            tags.append(element.tag)
+        else:
+            raise Exception(f'unexpected event: {event}\n')
+
+    if len(tags):
+        sys.stderr.write('unterminated feature tag\n')
+        exit(1)
+
+    base_reg = min(regnums)
+    num_regs = max(regnums) - base_reg + 1 if len(regnums) else 0
+
+    sys.stdout.write('    {\n')
+    writeliteral(8, bytes(os.path.basename(input), 'utf-8'))
+    sys.stdout.write(',\n')
+    writeliteral(8, read)
+    sys.stdout.write(f',\n        {num_regs},\n    }},\n')
+
+sys.stdout.write('    { NULL }\n};\n')
diff --git a/scripts/feature_to_c.sh b/scripts/feature_to_c.sh
deleted file mode 100644 (file)
index b116989..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/bin/sh
-
-# Convert text files to compilable C arrays.
-#
-# Copyright (C) 2007 Free Software Foundation, Inc.
-#
-# This file is part of GDB.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
-
-if test -z "$1"; then
-  echo "Usage: $0 INPUTFILE..."
-  exit 1
-fi
-
-for input; do
-  arrayname=xml_feature_$(echo $input | sed 's,.*/,,; s/[-.]/_/g')
-
-  ${AWK:-awk} 'BEGIN { n = 0
-      printf "#include \"qemu/osdep.h\"\n"
-      print "static const char '$arrayname'[] = {"
-      for (i = 0; i < 255; i++)
-        _ord_[sprintf("%c", i)] = i
-    } {
-      split($0, line, "");
-      printf "  "
-      for (i = 1; i <= length($0); i++) {
-        c = line[i]
-        if (c == "'\''") {
-          printf "'\''\\'\'''\'', "
-        } else if (c == "\\") {
-          printf "'\''\\\\'\'', "
-        } else if (_ord_[c] >= 32 && _ord_[c] < 127) {
-         printf "'\''%s'\'', ", c
-        } else {
-          printf "'\''\\%03o'\'', ", _ord_[c]
-        }
-        if (i % 10 == 0)
-          printf "\n   "
-      }
-      printf "'\''\\n'\'', \n"
-    } END {
-      print "  0 };"
-    }' < $input
-done
-
-echo
-echo "const char *const xml_builtin[][2] = {"
-
-for input; do
-  basename=$(echo $input | sed 's,.*/,,')
-  arrayname=xml_feature_$(echo $input | sed 's,.*/,,; s/[-.]/_/g')
-  echo "  { \"$basename\", $arrayname },"
-done
-
-echo "  { (char *)0, (char *)0 }"
-echo "};"
index 93f9b106698a37381844ebd67a31d05a50c23405..c15a041272023ab6ac0e3668cdfca79b894059f6 100755 (executable)
@@ -1,6 +1,6 @@
 #! /bin/sh
 #
-# Fix multiline comments to match CODING_STYLE
+# Fix multiline comments to match docs/devel/style.rst
 #
 # Copyright (C) 2018 Red Hat, Inc.
 #
index bba9fb052c47a50eebd10e6dcc0b814251890e76..a2f7664b7bb4e08e7637fb6e687a54ce6d550ca8 100755 (executable)
@@ -44,6 +44,7 @@ read_includes()
 
      cpp -P -nostdinc -fdirectives-only \
         -D_UAPI_ASM_$(upper ${arch})_BITSPERLONG_H \
+        -D__ASM_$(upper ${arch})_BITSPERLONG_H \
         -D__BITS_PER_LONG=${bits} \
         -I${linux}/arch/${arch}/include/uapi/ \
         -I${linux}/include/uapi \
@@ -98,4 +99,6 @@ generate_syscall_nr openrisc 32 "$output/linux-user/openrisc/syscall_nr.h"
 
 generate_syscall_nr riscv 32 "$output/linux-user/riscv/syscall32_nr.h"
 generate_syscall_nr riscv 64 "$output/linux-user/riscv/syscall64_nr.h"
+generate_syscall_nr hexagon 32 "$output/linux-user/hexagon/syscall_nr.h"
+generate_syscall_nr loongarch 64 "$output/linux-user/loongarch64/syscall_nr.h"
 rm -fr "$TMP"
old mode 100755 (executable)
new mode 100644 (file)
index 271f5ff..00a0870
@@ -796,7 +796,7 @@ sub top_of_tree {
         && (-d "${lk_path}docs")
         && (-f "${lk_path}VERSION")
         && (-d "${lk_path}linux-user/")
-        && (-d "${lk_path}softmmu/")) {
+        && (-d "${lk_path}system/")) {
        return 1;
     }
     return 0;
@@ -907,6 +907,7 @@ sub get_subsystem_name {
     if (length($subsystem) > 20) {
        $subsystem = substr($subsystem, 0, 17);
        $subsystem =~ s/\s*$//;
+       $subsystem =~ s/[()]//g;
        $subsystem = $subsystem . "...";
     }
     return $subsystem;
@@ -1377,7 +1378,7 @@ sub vcs_exists {
        warn("$P: No supported VCS found.  Add --nogit to options?\n");
        warn("Using a git repository produces better results.\n");
        warn("Try latest git repository using:\n");
-       warn("git clone https://git.qemu.org/git/qemu.git\n");
+       warn("git clone https://gitlab.com/qemu-project/qemu.git\n");
        $printed_novcs = 1;
     }
     return 0;
index 65ed877aefd9837cefb8a4b2328b092f8c58bf99..bb1222c772792fc7b8c0185d7b9bf24916b5a053 100755 (executable)
@@ -9,77 +9,111 @@ command=$1
 shift
 maybe_modules="$@"
 
-test -z "$GIT" && GIT=git
+test -z "$maybe_modules" && exit 0
+test -z "$GIT" && GIT=$(command -v git)
 
-error() {
+cd "$(dirname "$0")/.."
+
+no_git_error=
+if ! test -e ".git"; then
+    no_git_error='no git checkout exists'
+elif test -z "$GIT"; then
+    no_git_error='git binary not found'
+fi
+
+is_git() {
+    test -z "$no_git_error"
+}
+
+update_error() {
     echo "$0: $*"
     echo
     echo "Unable to automatically checkout GIT submodules '$modules'."
     echo "If you require use of an alternative GIT binary (for example to"
-    echo "enable use of a transparent proxy), then please specify it by"
-    echo "running configure by with the '--with-git' argument. e.g."
-    echo
-    echo " $ ./configure --with-git='tsocks git'"
+    echo "enable use of a transparent proxy), please disable automatic"
+    echo "GIT submodule checkout with:"
     echo
-    echo "Alternatively you may disable automatic GIT submodule checkout"
-    echo "with:"
-    echo
-    echo " $ ./configure --disable-git-update"
+    echo " $ ./configure --disable-download"
     echo
     echo "and then manually update submodules prior to running make, with:"
     echo
-    echo " $ scripts/git-submodule.sh update $modules"
+    echo " $ GIT='tsocks git' scripts/git-submodule.sh update $modules"
     echo
     exit 1
 }
 
-modules=""
-for m in $maybe_modules
-do
-    $GIT submodule status $m 1> /dev/null 2>&1
-    if test $? = 0
-    then
-        modules="$modules $m"
-    else
-        echo "warn: ignoring non-existent submodule $m"
+validate_error() {
+    if is_git && test "$1" = "validate"; then
+        echo "GIT submodules checkout is out of date, and submodules"
+        echo "configured for validate only. Please run"
+        echo "  scripts/git-submodule.sh update $maybe_modules"
+        echo "from the source directory or call configure with"
+        echo "  --enable-download"
     fi
-done
-
-if test -n "$maybe_modules" && ! test -e ".git"
-then
-    echo "$0: unexpectedly called with submodules but no git checkout exists"
     exit 1
+}
+
+check_updated() {
+    local CURSTATUS OLDSTATUS
+    CURSTATUS=$($GIT submodule status $module)
+    OLDSTATUS=$(grep $module $substat)
+    test "$CURSTATUS" = "$OLDSTATUS"
+}
+
+if is_git; then
+    test -e $substat || touch $substat
+    modules=""
+    for m in $maybe_modules
+    do
+        $GIT submodule status $m 1> /dev/null 2>&1
+        if test $? = 0
+        then
+            modules="$modules $m"
+            grep $m $substat > /dev/null 2>&1 || $GIT submodule status $module >> $substat
+        else
+            echo "warn: ignoring non-existent submodule $m"
+        fi
+    done
+else
+    modules=$maybe_modules
 fi
 
 case "$command" in
-status)
-    if test -z "$maybe_modules"
-    then
-         test -s ${substat} && exit 1 || exit 0
-    fi
-
-    test -f "$substat" || exit 1
+status|validate)
     for module in $modules; do
-        CURSTATUS=$($GIT submodule status $module)
-        OLDSTATUS=$(cat $substat | grep $module)
-        if test "$CURSTATUS" != "$OLDSTATUS"; then
-            exit 1
+        if is_git; then
+            check_updated $module || validate_error "$command"
+        elif ! (set xyz "$module"/* && test -e "$2"); then
+            # The directory does not exist or it contains no files
+            echo "$0: sources not available for $module and $no_git_error"
+            validate_error "$command"
         fi
     done
-    exit 0
     ;;
+
 update)
-    if test -z "$maybe_modules"
-    then
-        test -e $substat || touch $substat
-        exit 0
-    fi
+    is_git || {
+        echo "$0: unexpectedly called with submodules but $no_git_error"
+        exit 1
+    }
 
     $GIT submodule update --init $modules 1>/dev/null
-    test $? -ne 0 && error "failed to update modules"
+    test $? -ne 0 && update_error "failed to update modules"
+    for module in $modules; do
+        check_updated $module || echo Updated "$module"
+    done
 
-    $GIT submodule status $modules > "${substat}"
-    test $? -ne 0 && error "failed to save git submodule status" >&2
+    (while read -r REPLY; do
+        for module in $modules; do
+            case $REPLY in
+                *" $module "*) continue 2 ;;
+            esac
+        done
+        printf '%s\n' "$REPLY"
+    done
+    $GIT submodule status $modules
+    test $? -ne 0 && update_error "failed to save git submodule status" >&2) < $substat > $substat.new
+    mv -f $substat.new $substat
     ;;
 esac
 
index 3736c1d6afff598e1040bff60cadd415c25baaf4..8edac0380ba32d60b3ff5dae7a4f7328a110f8d9 100644 (file)
@@ -9,9 +9,12 @@
 #   git config diff.orderFile scripts/git.orderfile
 #
 
+MAINTAINERS
+
 # Documentation
 docs/*
 *.rst
+*.rst.inc
 
 # build system
 configure
@@ -28,9 +31,11 @@ qga/*.json
 
 # headers
 *.h
+*.h.inc
 
 # decoding tree specification
 *.decode
 
 # code
 *.c
+*.c.inc
old mode 100755 (executable)
new mode 100644 (file)
diff --git a/scripts/hxtool-conv.pl b/scripts/hxtool-conv.pl
deleted file mode 100755 (executable)
index eede40b..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/perl -w
-#
-# Script to convert .hx file STEXI/ETEXI blocks to SRST/ERST
-#
-# Copyright (C) 2020 Linaro
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or
-# (at your option) any later version. See the COPYING file in the
-# top-level directory.
-
-# This script was only ever intended as a one-off conversion operation.
-# Please excuse the places where it is a bit hacky.
-# Some manual intervention after the conversion is expected, as are
-# some warnings from makeinfo.
-# Warning: this script is not idempotent: don't try to run it on
-# a .hx file that already has SRST/ERST sections.
-
-# Expected usage:
-# scripts/hxtool-conv.pl file.hx > file.hx.new
-
-use utf8;
-
-my $reading_texi = 0;
-my $texiblock = '';
-my @tables = ();
-
-sub update_tables($) {
-    my ($texi) = @_;
-    # Update our list of open table directives: every @table
-    # line in the texi fragment is added to the list, and every
-    # @end table line means we remove an entry from the list.
-    # If this fragment had a completely self contained table with
-    # both the @table and @end table lines, this will be a no-op.
-    foreach (split(/\n/, $texi)) {
-        push @tables, $_ if /^\@table/;
-        pop @tables if /^\@end table/;
-    }
-}
-
-sub only_table_directives($) {
-    # Return true if every line in the fragment is a start or end table directive
-    my ($texi) = @_;
-    foreach (split(/\n/, $texi)) {
-        return 0 unless /^\@table/ or /^\@end table/;
-    }
-    return 1;
-}
-
-sub output_rstblock($) {
-    # Write the output to /tmp/frag.texi, wrapped in whatever current @table
-    # lines we need.
-    my ($texi) = @_;
-
-    # As a special case, if this fragment is only table directives and
-    # nothing else, update our set of open table directives but otherwise
-    # ignore it. This avoids emitting an empty SRST/ERST block.
-    if (only_table_directives($texi)) {
-        update_tables($texi);
-        return;
-    }
-
-    open(my $fragfh, '>', '/tmp/frag.texi');
-    # First output the currently active set of open table directives
-    print $fragfh join("\n", @tables);
-    # Next, update our list of open table directives.
-    # We need to do this before we emit the closing table directives
-    # so that we emit the right number if this fragment had an
-    # unbalanced set of directives.
-    update_tables($texi);
-    # Then emit the texi fragment itself.
-    print $fragfh "\n$texi\n";
-    # Finally, add the necessary closing table directives.
-    print $fragfh "\@end table\n" x scalar @tables;
-    close $fragfh;
-
-    # Now invoke makeinfo/pandoc on it and slurp the results into a string
-    open(my $fh, '-|', "makeinfo --force -o - --docbook "
-         . "-D 'qemu_system_x86 QEMU_SYSTEM_X86_MACRO' "
-         . "-D 'qemu_system     QEMU_SYSTEM_MACRO'  /tmp/frag.texi "
-         . " | pandoc  -f docbook -t rst")
-        or die "can't start makeinfo/pandoc: $!";
-
-    binmode $fh, ':encoding(utf8)';
-
-    print "SRST\n";
-
-    # Slurp the whole thing into a string so we can do multiline
-    # string matches on it.
-    my $rst = do {
-        local $/ = undef;
-        <$fh>;
-    };
-    $rst =~ s/^-  − /-  /gm;
-    $rst =~ s/“/"/gm;
-    $rst =~ s/”/"/gm;
-    $rst =~ s/‘/'/gm;
-    $rst =~ s/’/'/gm;
-    $rst =~ s/QEMU_SYSTEM_MACRO/|qemu_system|/g;
-    $rst =~ s/QEMU_SYSTEM_X86_MACRO/|qemu_system_x86|/g;
-    $rst =~ s/(?=::\n\n +\|qemu)/.. parsed-literal/g;
-    $rst =~ s/:\n\n::$/::/gm;
-
-    # Fix up the invalid reference format makeinfo/pandoc emit:
-    # `Some string here <#anchorname>`__
-    # should be:
-    # :ref:`anchorname`
-    $rst =~ s/\`[^<`]+\<\#([^>]+)\>\`__/:ref:`$1`/gm;
-    print $rst;
-
-    close $fh or die "error on close: $!";
-    print "ERST\n";
-}
-
-# Read the whole .hx input file.
-while (<>) {
-    # Always print the current line
-    print;
-    if (/STEXI/) {
-        $reading_texi = 1;
-        $texiblock = '';
-        next;
-    }
-    if (/ETEXI/) {
-        $reading_texi = 0;
-        # dump RST version of block
-        output_rstblock($texiblock);
-        next;
-    }
-    if ($reading_texi) {
-        # Accumulate the texi into a string
-        # but drop findex entries as they will confuse makeinfo
-        next if /^\@findex/;
-        $texiblock .= $_;
-    }
-}
-
-die "Unexpectedly still in texi block at EOF" if $reading_texi;
old mode 100755 (executable)
new mode 100644 (file)
index 4fbaaa0..240923d
@@ -56,6 +56,13 @@ Output format selection (mutually exclusive):
   -rst                 Output reStructuredText format.
   -none                        Do not output documentation, only warnings.
 
+Output format selection modifier (affects only ReST output):
+
+  -sphinx-version      Use the ReST C domain dialect compatible with an
+                       specific Sphinx Version.
+                       If not specified, kernel-doc will auto-detect using
+                       the sphinx-build version found on PATH.
+
 Output selection (mutually exclusive):
   -export              Only output documentation for symbols that have been
                        exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL()
@@ -66,13 +73,10 @@ Output selection (mutually exclusive):
   -function NAME       Only output documentation for the given function(s)
                        or DOC: section title(s). All other functions and DOC:
                        sections are ignored. May be specified multiple times.
-  -nofunction NAME     Do NOT output documentation for the given function(s);
-                       only output documentation for the other functions and
-                       DOC: sections. May be specified multiple times.
+  -nosymbol NAME       Exclude the specified symbols from the output
+                       documentation. May be specified multiple times.
 
 Output selection modifiers:
-  -sphinx-version VER   Generate rST syntax for the specified Sphinx version.
-                        Only works with reStructuredTextFormat.
   -no-doc-sections     Do not output DOC: sections.
   -enable-lineno        Enable output of #define LINENO lines. Only works with
                         reStructuredText format.
@@ -83,6 +87,7 @@ Output selection modifiers:
 Other parameters:
   -v                   Verbose output, more warnings and other information.
   -h                   Print this help.
+  -Werror              Treat warnings as errors.
 
 EOF
     print $message;
@@ -215,7 +220,9 @@ my $type_constant = '\b``([^\`]+)``\b';
 my $type_constant2 = '\%([-_\w]+)';
 my $type_func = '(\w+)\(\)';
 my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
+my $type_param_ref = '([\!]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
 my $type_fp_param = '\@(\w+)\(\)';  # Special RST handling for func ptr params
+my $type_fp_param2 = '\@(\w+->\S+)\(\)';  # Special RST handling for structs with func ptr params
 my $type_env = '(\$\w+)';
 my $type_enum = '#(enum\s*([_\w]+))';
 my $type_struct = '#(struct\s*([_\w]+))';
@@ -238,6 +245,7 @@ my @highlights_man = (
                       [$type_typedef, "\\\\fI\$1\\\\fP"],
                       [$type_union, "\\\\fI\$1\\\\fP"],
                       [$type_param, "\\\\fI\$1\\\\fP"],
+                      [$type_param_ref, "\\\\fI\$1\$2\\\\fP"],
                       [$type_member, "\\\\fI\$1\$2\$3\\\\fP"],
                       [$type_fallback, "\\\\fI\$1\\\\fP"]
                     );
@@ -251,6 +259,7 @@ my @highlights_rst = (
                        [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"],
                        [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"],
                       [$type_fp_param, "**\$1\\\\(\\\\)**"],
+                      [$type_fp_param2, "**\$1\\\\(\\\\)**"],
                        [$type_func, "\$1()"],
                        [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"],
                        [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"],
@@ -258,7 +267,7 @@ my @highlights_rst = (
                        [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"],
                        # in rst this can refer to any type
                        [$type_fallback, "\\:c\\:type\\:`\$1`"],
-                       [$type_param, "**\$1**"]
+                       [$type_param_ref, "**\$1\$2**"]
                      );
 my $blankline_rst = "\n";
 
@@ -268,9 +277,12 @@ if ($#ARGV == -1) {
 }
 
 my $kernelversion;
+my ($sphinx_major, $sphinx_minor, $sphinx_patch);
+
 my $dohighlight = "";
 
 my $verbose = 0;
+my $Werror = 0;
 my $output_mode = "rst";
 my $output_preformatted = 0;
 my $no_doc_sections = 0;
@@ -282,13 +294,11 @@ my $modulename = "Kernel API";
 use constant {
     OUTPUT_ALL          => 0, # output all symbols and doc sections
     OUTPUT_INCLUDE      => 1, # output only specified symbols
-    OUTPUT_EXCLUDE      => 2, # output everything except specified symbols
-    OUTPUT_EXPORTED     => 3, # output exported symbols
-    OUTPUT_INTERNAL     => 4, # output non-exported symbols
+    OUTPUT_EXPORTED     => 2, # output exported symbols
+    OUTPUT_INTERNAL     => 3, # output non-exported symbols
 };
 my $output_selection = OUTPUT_ALL;
 my $show_not_found = 0;        # No longer used
-my $sphinx_version = "0.0"; # if not specified, assume old
 
 my @export_file_list;
 
@@ -310,6 +320,7 @@ my $man_date = ('January', 'February', 'March', 'April', 'May', 'June',
 # CAVEAT EMPTOR!  Some of the others I localised may not want to be, which
 # could cause "use of undefined value" or other bugs.
 my ($function, %function_table, %parametertypes, $declaration_purpose);
+my %nosymbol_table = ();
 my $declaration_start_line;
 my ($type, $declaration_name, $return_type);
 my ($newsection, $newcontents, $prototype, $brcount, %source_map);
@@ -318,9 +329,21 @@ if (defined($ENV{'KBUILD_VERBOSE'})) {
        $verbose = "$ENV{'KBUILD_VERBOSE'}";
 }
 
+if (defined($ENV{'KDOC_WERROR'})) {
+       $Werror = "$ENV{'KDOC_WERROR'}";
+}
+
+if (defined($ENV{'KCFLAGS'})) {
+       my $kcflags = "$ENV{'KCFLAGS'}";
+
+       if ($kcflags =~ /Werror/) {
+               $Werror = 1;
+       }
+}
+
 # Generated docbook code is inserted in a template at a point where
 # docbook v3.1 requires a non-zero sequence of RefEntry's; see:
-# http://www.oasis-open.org/docbook/documentation/reference/html/refentry.html
+# https://www.oasis-open.org/docbook/documentation/reference/html/refentry.html
 # We keep track of number of generated entries and generate a dummy
 # if needs be to ensure the expanded template can be postprocessed
 # into html.
@@ -330,13 +353,14 @@ my $lineprefix="";
 
 # Parser states
 use constant {
-    STATE_NORMAL        => 0, # normal code
-    STATE_NAME          => 1, # looking for function name
-    STATE_BODY_MAYBE    => 2, # body - or maybe more description
-    STATE_BODY          => 3, # the body of the comment
-    STATE_PROTO         => 4, # scanning prototype
-    STATE_DOCBLOCK      => 5, # documentation block
-    STATE_INLINE        => 6, # gathering documentation outside main block
+    STATE_NORMAL        => 0,        # normal code
+    STATE_NAME          => 1,        # looking for function name
+    STATE_BODY_MAYBE    => 2,        # body - or maybe more description
+    STATE_BODY          => 3,        # the body of the comment
+    STATE_BODY_WITH_BLANK_LINE => 4, # the body, which has a blank line
+    STATE_PROTO         => 5,        # scanning prototype
+    STATE_DOCBLOCK      => 6,        # documentation block
+    STATE_INLINE        => 7,        # gathering doc outside main block
 };
 my $state;
 my $in_doc_sect;
@@ -416,10 +440,9 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
        $output_selection = OUTPUT_INCLUDE;
        $function = shift @ARGV;
        $function_table{$function} = 1;
-    } elsif ($cmd eq "nofunction") { # output all except specific functions
-       $output_selection = OUTPUT_EXCLUDE;
-       $function = shift @ARGV;
-       $function_table{$function} = 1;
+    } elsif ($cmd eq "nosymbol") { # Exclude specific symbols
+       my $symbol = shift @ARGV;
+       $nosymbol_table{$symbol} = 1;
     } elsif ($cmd eq "export") { # only exported symbols
        $output_selection = OUTPUT_EXPORTED;
        %function_table = ();
@@ -431,6 +454,8 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
        push(@export_file_list, $file);
     } elsif ($cmd eq "v") {
        $verbose = 1;
+    } elsif ($cmd eq "Werror") {
+       $Werror = 1;
     } elsif (($cmd eq "h") || ($cmd eq "help")) {
        usage();
     } elsif ($cmd eq 'no-doc-sections') {
@@ -439,8 +464,23 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
            $enable_lineno = 1;
     } elsif ($cmd eq 'show-not-found') {
        $show_not_found = 1;  # A no-op but don't fail
-    } elsif ($cmd eq 'sphinx-version') {
-        $sphinx_version = shift @ARGV;
+    } elsif ($cmd eq "sphinx-version") {
+       my $ver_string = shift @ARGV;
+       if ($ver_string =~ m/^(\d+)(\.\d+)?(\.\d+)?/) {
+           $sphinx_major = $1;
+           if (defined($2)) {
+               $sphinx_minor = substr($2,1);
+           } else {
+               $sphinx_minor = 0;
+           }
+           if (defined($3)) {
+               $sphinx_patch = substr($3,1)
+           } else {
+               $sphinx_patch = 0;
+           }
+       } else {
+           die "Sphinx version should either major.minor or major.minor.patch format\n";
+       }
     } else {
        # Unknown argument
         usage();
@@ -449,6 +489,51 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
 
 # continue execution near EOF;
 
+# The C domain dialect changed on Sphinx 3. So, we need to check the
+# version in order to produce the right tags.
+sub findprog($)
+{
+       foreach(split(/:/, $ENV{PATH})) {
+               return "$_/$_[0]" if(-x "$_/$_[0]");
+       }
+}
+
+sub get_sphinx_version()
+{
+       my $ver;
+
+       my $cmd = "sphinx-build";
+       if (!findprog($cmd)) {
+               my $cmd = "sphinx-build3";
+               if (!findprog($cmd)) {
+                       $sphinx_major = 1;
+                       $sphinx_minor = 2;
+                       $sphinx_patch = 0;
+                       printf STDERR "Warning: Sphinx version not found. Using default (Sphinx version %d.%d.%d)\n",
+                              $sphinx_major, $sphinx_minor, $sphinx_patch;
+                       return;
+               }
+       }
+
+       open IN, "$cmd --version 2>&1 |";
+       while (<IN>) {
+               if (m/^\s*sphinx-build\s+([\d]+)\.([\d\.]+)(\+\/[\da-f]+)?$/) {
+                       $sphinx_major = $1;
+                       $sphinx_minor = $2;
+                       $sphinx_patch = $3;
+                       last;
+               }
+               # Sphinx 1.2.x uses a different format
+               if (m/^\s*Sphinx.*\s+([\d]+)\.([\d\.]+)$/) {
+                       $sphinx_major = $1;
+                       $sphinx_minor = $2;
+                       $sphinx_patch = $3;
+                       last;
+               }
+       }
+       close IN;
+}
+
 # get kernel version from env
 sub get_kernel_version() {
     my $version = 'unknown kernel version';
@@ -515,11 +600,11 @@ sub dump_doc_section {
         return;
     }
 
+    return if (defined($nosymbol_table{$name}));
+
     if (($output_selection == OUTPUT_ALL) ||
-       ($output_selection == OUTPUT_INCLUDE &&
-        defined($function_table{$name})) ||
-       ($output_selection == OUTPUT_EXCLUDE &&
-        !defined($function_table{$name})))
+       (($output_selection == OUTPUT_INCLUDE) &&
+        defined($function_table{$name})))
     {
        dump_section($file, $name, $contents);
        output_blockhead({'sectionlist' => \@sectionlist,
@@ -602,10 +687,10 @@ sub output_function_man(%) {
        $type = $args{'parametertypes'}{$parameter};
        if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
            # pointer-to-function
-           print ".BI \"" . $parenth . $1 . "\" " . $parameter . " \") (" . $2 . ")" . $post . "\"\n";
+           print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
        } else {
            $type =~ s/([^\*])$/$1 /;
-           print ".BI \"" . $parenth . $type . "\" " . $parameter . " \"" . $post . "\"\n";
+           print ".BI \"" . $parenth . $type . "\" " . " \"" . $post . "\"\n";
        }
        $count++;
        $parenth = "";
@@ -745,6 +830,8 @@ sub output_blockhead_rst(%) {
     my ($parameter, $section);
 
     foreach $section (@{$args{'sectionlist'}}) {
+       next if (defined($nosymbol_table{$section}));
+
        if ($output_selection != OUTPUT_INCLUDE) {
            print "**$section**\n\n";
        }
@@ -830,32 +917,37 @@ sub output_function_rst(%) {
     my ($parameter, $section);
     my $oldprefix = $lineprefix;
     my $start = "";
-
-    if ($args{'typedef'}) {
-       print ".. c:type:: ". $args{'function'} . "\n\n";
-       print_lineno($declaration_start_line);
-       print "   **Typedef**: ";
-       $lineprefix = "";
-       output_highlight_rst($args{'purpose'});
-       $start = "\n\n**Syntax**\n\n  ``";
+    my $is_macro = 0;
+
+    if ($sphinx_major < 3) {
+       if ($args{'typedef'}) {
+           print ".. c:type:: ". $args{'function'} . "\n\n";
+           print_lineno($declaration_start_line);
+           print "   **Typedef**: ";
+           $lineprefix = "";
+           output_highlight_rst($args{'purpose'});
+           $start = "\n\n**Syntax**\n\n  ``";
+           $is_macro = 1;
+       } else {
+           print ".. c:function:: ";
+       }
     } else {
-        if ((split(/\./, $sphinx_version))[0] >= 3) {
-            # Sphinx 3 and later distinguish macros and functions and
-            # complain if you use c:function with something that's not
-            # syntactically valid as a function declaration.
-            # We assume that anything with a return type is a function
-            # and anything without is a macro.
-            if ($args{'functiontype'} ne "") {
-                print ".. c:function:: ";
-            } else {
-                print ".. c:macro:: ";
-            }
-        } else {
-            # Older Sphinx don't support documenting macros that take
-            # arguments with c:macro, and don't complain about the use
-            # of c:function for this.
-            print ".. c:function:: ";
-        }
+       if ($args{'typedef'} || $args{'functiontype'} eq "") {
+           $is_macro = 1;
+           print ".. c:macro:: ". $args{'function'} . "\n\n";
+       } else {
+           print ".. c:function:: ";
+       }
+
+       if ($args{'typedef'}) {
+           print_lineno($declaration_start_line);
+           print "   **Typedef**: ";
+           $lineprefix = "";
+           output_highlight_rst($args{'purpose'});
+           $start = "\n\n**Syntax**\n\n  ``";
+       } else {
+           print "``" if ($is_macro);
+       }
     }
     if ($args{'functiontype'} ne "") {
        $start .= $args{'functiontype'} . " " . $args{'function'} . " (";
@@ -876,13 +968,15 @@ sub output_function_rst(%) {
            # pointer-to-function
            print $1 . $parameter . ") (" . $2 . ")";
        } else {
-           print $type . " " . $parameter;
+           print $type;
        }
     }
-    if ($args{'typedef'}) {
-       print ");``\n\n";
+    if ($is_macro) {
+       print ")``\n\n";
     } else {
        print ")\n\n";
+    }
+    if (!$args{'typedef'}) {
        print_lineno($declaration_start_line);
        $lineprefix = "   ";
        output_highlight_rst($args{'purpose'});
@@ -897,7 +991,7 @@ sub output_function_rst(%) {
        $type = $args{'parametertypes'}{$parameter};
 
        if ($type ne "") {
-           print "``$type $parameter``\n";
+           print "``$type``\n";
        } else {
            print "``$parameter``\n";
        }
@@ -938,9 +1032,14 @@ sub output_enum_rst(%) {
     my ($parameter);
     my $oldprefix = $lineprefix;
     my $count;
-    my $name = "enum " . $args{'enum'};
 
-    print "\n\n.. c:type:: " . $name . "\n\n";
+    if ($sphinx_major < 3) {
+       my $name = "enum " . $args{'enum'};
+       print "\n\n.. c:type:: " . $name . "\n\n";
+    } else {
+       my $name = $args{'enum'};
+       print "\n\n.. c:enum:: " . $name . "\n\n";
+    }
     print_lineno($declaration_start_line);
     $lineprefix = "   ";
     output_highlight_rst($args{'purpose'});
@@ -966,8 +1065,13 @@ sub output_typedef_rst(%) {
     my %args = %{$_[0]};
     my ($parameter);
     my $oldprefix = $lineprefix;
-    my $name = "typedef " . $args{'typedef'};
+    my $name;
 
+    if ($sphinx_major < 3) {
+       $name = "typedef " . $args{'typedef'};
+    } else {
+       $name = $args{'typedef'};
+    }
     print "\n\n.. c:type:: " . $name . "\n\n";
     print_lineno($declaration_start_line);
     $lineprefix = "   ";
@@ -982,17 +1086,17 @@ sub output_struct_rst(%) {
     my %args = %{$_[0]};
     my ($parameter);
     my $oldprefix = $lineprefix;
-    my $name = $args{'type'} . " " . $args{'struct'};
-
-    # Sphinx 3.0 and up will emit warnings for "c:type:: struct Foo".
-    # It wants to see "c:struct:: Foo" (and will add the word 'struct' in
-    # the rendered output).
-    if ((split(/\./, $sphinx_version))[0] >= 3) {
-        my $sname = $name;
-        $sname =~ s/^struct //;
-        print "\n\n.. c:struct:: " . $sname . "\n\n";
+
+    if ($sphinx_major < 3) {
+       my $name = $args{'type'} . " " . $args{'struct'};
+       print "\n\n.. c:type:: " . $name . "\n\n";
     } else {
-        print "\n\n.. c:type:: " . $name . "\n\n";
+       my $name = $args{'struct'};
+       if ($args{'type'} eq 'union') {
+           print "\n\n.. c:union:: " . $name . "\n\n";
+       } else {
+           print "\n\n.. c:struct:: " . $name . "\n\n";
+       }
     }
     print_lineno($declaration_start_line);
     $lineprefix = "   ";
@@ -1052,12 +1156,14 @@ sub output_declaration {
     my $name = shift;
     my $functype = shift;
     my $func = "output_${functype}_$output_mode";
+
+    return if (defined($nosymbol_table{$name}));
+
     if (($output_selection == OUTPUT_ALL) ||
        (($output_selection == OUTPUT_INCLUDE ||
          $output_selection == OUTPUT_EXPORTED) &&
         defined($function_table{$name})) ||
-       (($output_selection == OUTPUT_EXCLUDE ||
-         $output_selection == OUTPUT_INTERNAL) &&
+       ($output_selection == OUTPUT_INTERNAL &&
         !($functype eq "function" && defined($function_table{$name}))))
     {
        &$func(@_);
@@ -1092,7 +1198,7 @@ sub dump_struct($$) {
     my $x = shift;
     my $file = shift;
 
-    if ($x =~ /(struct|union)\s+(\w+)\s*\{(.*)\}(\s*(__packed|__aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*/) {
+    if ($x =~ /(struct|union)\s+(\w+)\s*\{(.*)\}(\s*(__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*/) {
        my $decl_type = $1;
        $declaration_name = $2;
        my $members = $3;
@@ -1103,11 +1209,15 @@ sub dump_struct($$) {
        # strip comments:
        $members =~ s/\/\*.*?\*\///gos;
        # strip attributes
-       $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)//gi;
-       $members =~ s/\s*__aligned\s*\([^;]*\)//gos;
-       $members =~ s/\s*__packed\s*//gos;
-       $members =~ s/\s*CRYPTO_MINALIGN_ATTR//gos;
+       $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
+       $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
+       $members =~ s/\s*__packed\s*/ /gos;
+       $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
+       $members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
+       $members =~ s/\s*____cacheline_aligned/ /gos;
+
        # replace DECLARE_BITMAP
+       $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
        $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
        # replace DECLARE_HASHTABLE
        $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
@@ -1234,6 +1344,8 @@ sub show_warnings($$) {
        my $functype = shift;
        my $name = shift;
 
+       return 0 if (defined($nosymbol_table{$name}));
+
        return 1 if ($output_selection == OUTPUT_ALL);
 
        if ($output_selection == OUTPUT_EXPORTED) {
@@ -1257,27 +1369,28 @@ sub show_warnings($$) {
                        return 0;
                }
        }
-       if ($output_selection == OUTPUT_EXCLUDE) {
-               if (!defined($function_table{$name})) {
-                       return 1;
-               } else {
-                       return 0;
-               }
-       }
        die("Please add the new output type at show_warnings()");
 }
 
 sub dump_enum($$) {
     my $x = shift;
     my $file = shift;
+    my $members;
+
 
     $x =~ s@/\*.*?\*/@@gos;    # strip comments.
     # strip #define macros inside enums
     $x =~ s@#\s*((define|ifdef)\s+|endif)[^;]*;@@gos;
 
-    if ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) {
+    if ($x =~ /typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;/) {
+       $declaration_name = $2;
+       $members = $1;
+    } elsif ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) {
        $declaration_name = $1;
-       my $members = $2;
+       $members = $2;
+    }
+
+    if ($declaration_name) {
        my %_members;
 
        $members =~ s/\s+$//;
@@ -1312,27 +1425,31 @@ sub dump_enum($$) {
                            'sections' => \%sections,
                            'purpose' => $declaration_purpose
                           });
-    }
-    else {
+    } else {
        print STDERR "${file}:$.: error: Cannot parse enum!\n";
        ++$errors;
     }
 }
 
+my $typedef_type = qr { ((?:\s+[\w\*]+){1,8})\s* }x;
+my $typedef_ident = qr { \*?\s*(\w\S+)\s* }x;
+my $typedef_args = qr { \s*\((.*)\); }x;
+
+my $typedef1 = qr { typedef$typedef_type\($typedef_ident\)$typedef_args }x;
+my $typedef2 = qr { typedef$typedef_type$typedef_ident$typedef_args }x;
+
 sub dump_typedef($$) {
     my $x = shift;
     my $file = shift;
 
     $x =~ s@/\*.*?\*/@@gos;    # strip comments.
 
-    # Parse function prototypes
-    if ($x =~ /typedef\s+(\w+\s*\**)\s*\(\*?\s*(\w\S+)\s*\)\s*\((.*)\);/ ||
-       $x =~ /typedef\s+(\w+\s*\**)\s*(\w\S+)\s*\s*\((.*)\);/) {
-
-       # Function typedefs
+    # Parse function typedef prototypes
+    if ($x =~ $typedef1 || $x =~ $typedef2) {
        $return_type = $1;
        $declaration_name = $2;
        my $args = $3;
+       $return_type =~ s/^\s+//;
 
        create_parameterlist($args, ',', $file, $declaration_name);
 
@@ -1408,7 +1525,7 @@ sub create_parameterlist($$$$) {
            # Treat preprocessor directive as a typeless variable just to fill
            # corresponding data structures "correctly". Catch it later in
            # output_* subs.
-           push_parameter($arg, "", $file);
+           push_parameter($arg, "", "", $file);
        } elsif ($arg =~ m/\(.+\)\s*\(/) {
            # pointer-to-function
            $arg =~ tr/#/,/;
@@ -1417,7 +1534,7 @@ sub create_parameterlist($$$$) {
            $type = $arg;
            $type =~ s/([^\(]+\(\*?)\s*$param/$1/;
            save_struct_actual($param);
-           push_parameter($param, $type, $file, $declaration_name);
+           push_parameter($param, $type, $arg, $file, $declaration_name);
        } elsif ($arg) {
            $arg =~ s/\s*:\s*/:/g;
            $arg =~ s/\s*\[/\[/g;
@@ -1442,26 +1559,28 @@ sub create_parameterlist($$$$) {
            foreach $param (@args) {
                if ($param =~ m/^(\*+)\s*(.*)/) {
                    save_struct_actual($2);
-                   push_parameter($2, "$type $1", $file, $declaration_name);
+
+                   push_parameter($2, "$type $1", $arg, $file, $declaration_name);
                }
                elsif ($param =~ m/(.*?):(\d+)/) {
                    if ($type ne "") { # skip unnamed bit-fields
                        save_struct_actual($1);
-                       push_parameter($1, "$type:$2", $file, $declaration_name)
+                       push_parameter($1, "$type:$2", $arg, $file, $declaration_name)
                    }
                }
                else {
                    save_struct_actual($param);
-                   push_parameter($param, $type, $file, $declaration_name);
+                   push_parameter($param, $type, $arg, $file, $declaration_name);
                }
            }
        }
     }
 }
 
-sub push_parameter($$$$) {
+sub push_parameter($$$$$) {
        my $param = shift;
        my $type = shift;
+       my $org_arg = shift;
        my $file = shift;
        my $declaration_name = shift;
 
@@ -1479,6 +1598,10 @@ sub push_parameter($$$$) {
              # handles unnamed variable parameters
              $param = "...";
            }
+           elsif ($param =~ /\w\.\.\.$/) {
+             # for named variable parameters of the form `x...`, remove the dots
+             $param =~ s/\.\.\.$//;
+           }
            if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") {
                $parameterdescs{$param} = "variable arguments";
            }
@@ -1521,8 +1644,8 @@ sub push_parameter($$$$) {
        # "[blah" in a parameter string;
        ###$param =~ s/\s*//g;
        push @parameterlist, $param;
-       $type =~ s/\s\s+/ /g;
-       $parametertypes{$param} = $type;
+       $org_arg =~ s/\s\s+/ /g;
+       $parametertypes{$param} = $org_arg;
 }
 
 sub check_sections($$$$$) {
@@ -1596,6 +1719,8 @@ sub dump_function($$) {
     my $file = shift;
     my $noret = 0;
 
+    print_lineno($new_start_line);
+
     $prototype =~ s/^static +//;
     $prototype =~ s/^extern +//;
     $prototype =~ s/^asmlinkage +//;
@@ -1620,6 +1745,9 @@ sub dump_function($$) {
             )+
           \)\)\s+//x;
 
+    # Strip QEMU specific compiler annotations
+    $prototype =~ s/QEMU_[A-Z_]+ +//;
+
     # Yes, this truly is vile.  We are looking for:
     # 1. Return type (may be nothing if we're looking at a macro)
     # 2. Function name
@@ -1633,7 +1761,7 @@ sub dump_function($$) {
     # If you mess with these regexps, it's a good idea to check that
     # the following functions' documentation still comes out right:
     # - parport_register_device (function pointer parameters)
-    # - qatomic_set (macro)
+    # - atomic_set (macro)
     # - pci_match_device, __copy_to_user (long return type)
 
     if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) {
@@ -1671,30 +1799,48 @@ sub dump_function($$) {
        return;
     }
 
-       my $prms = join " ", @parameterlist;
-       check_sections($file, $declaration_name, "function", $sectcheck, $prms);
+    my $prms = join " ", @parameterlist;
+    check_sections($file, $declaration_name, "function", $sectcheck, $prms);
 
-        # This check emits a lot of warnings at the moment, because many
-        # functions don't have a 'Return' doc section. So until the number
-        # of warnings goes sufficiently down, the check is only performed in
-        # verbose mode.
-        # TODO: always perform the check.
-        if ($verbose && !$noret) {
-                check_return_section($file, $declaration_name, $return_type);
-        }
+    # This check emits a lot of warnings at the moment, because many
+    # functions don't have a 'Return' doc section. So until the number
+    # of warnings goes sufficiently down, the check is only performed in
+    # verbose mode.
+    # TODO: always perform the check.
+    if ($verbose && !$noret) {
+           check_return_section($file, $declaration_name, $return_type);
+    }
 
-    output_declaration($declaration_name,
-                      'function',
-                      {'function' => $declaration_name,
-                       'module' => $modulename,
-                       'functiontype' => $return_type,
-                       'parameterlist' => \@parameterlist,
-                       'parameterdescs' => \%parameterdescs,
-                       'parametertypes' => \%parametertypes,
-                       'sectionlist' => \@sectionlist,
-                       'sections' => \%sections,
-                       'purpose' => $declaration_purpose
-                      });
+    # The function parser can be called with a typedef parameter.
+    # Handle it.
+    if ($return_type =~ /typedef/) {
+       output_declaration($declaration_name,
+                          'function',
+                          {'function' => $declaration_name,
+                           'typedef' => 1,
+                           'module' => $modulename,
+                           'functiontype' => $return_type,
+                           'parameterlist' => \@parameterlist,
+                           'parameterdescs' => \%parameterdescs,
+                           'parametertypes' => \%parametertypes,
+                           'sectionlist' => \@sectionlist,
+                           'sections' => \%sections,
+                           'purpose' => $declaration_purpose
+                          });
+    } else {
+       output_declaration($declaration_name,
+                          'function',
+                          {'function' => $declaration_name,
+                           'module' => $modulename,
+                           'functiontype' => $return_type,
+                           'parameterlist' => \@parameterlist,
+                           'parameterdescs' => \%parameterdescs,
+                           'parametertypes' => \%parametertypes,
+                           'sectionlist' => \@sectionlist,
+                           'sections' => \%sections,
+                           'purpose' => $declaration_purpose
+                          });
+    }
 }
 
 sub reset_state {
@@ -1789,6 +1935,11 @@ sub process_proto_function($$) {
        $prototype =~ s@/\*.*?\*/@@gos; # strip comments.
        $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's.
        $prototype =~ s@^\s+@@gos; # strip leading spaces
+
+        # Handle prototypes for function pointers like:
+        # int (*pcs_config)(struct foo)
+       $prototype =~ s@^(\S+\s+)\(\s*\*(\S+)\)@$1$2@gos;
+
        if ($prototype =~ /SYSCALL_DEFINE/) {
                syscall_munge();
        }
@@ -1867,6 +2018,7 @@ sub process_export_file($) {
 
     while (<IN>) {
        if (/$export_symbol/) {
+           next if (defined($nosymbol_table{$2}));
            $function_table{$2} = 1;
        }
     }
@@ -1898,7 +2050,7 @@ sub process_name($$) {
     if (/$doc_block/o) {
        $state = STATE_DOCBLOCK;
        $contents = "";
-       $new_start_line = $. + 1;
+       $new_start_line = $.;
 
        if ( $1 eq "" ) {
            $section = $section_intro;
@@ -1966,6 +2118,25 @@ sub process_name($$) {
 sub process_body($$) {
     my $file = shift;
 
+    # Until all named variable macro parameters are
+    # documented using the bare name (`x`) rather than with
+    # dots (`x...`), strip the dots:
+    if ($section =~ /\w\.\.\.$/) {
+       $section =~ s/\.\.\.$//;
+
+       if ($verbose) {
+           print STDERR "${file}:$.: warning: Variable macro arguments should be documented without dots\n";
+           ++$warnings;
+       }
+    }
+
+    if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) {
+       dump_section($file, $section, $contents);
+       $section = $section_default;
+       $new_start_line = $.;
+       $contents = "";
+    }
+
     if (/$doc_sect/i) { # case insensitive for supported section names
        $newsection = $1;
        $newcontents = $2;
@@ -2018,19 +2189,23 @@ sub process_body($$) {
        $prototype = "";
        $state = STATE_PROTO;
        $brcount = 0;
+        $new_start_line = $. + 1;
     } elsif (/$doc_content/) {
-       # miguel-style comment kludge, look for blank lines after
-       # @parameter line to signify start of description
        if ($1 eq "") {
-           if ($section =~ m/^@/ || $section eq $section_context) {
+           if ($section eq $section_context) {
                dump_section($file, $section, $contents);
                $section = $section_default;
                $contents = "";
                $new_start_line = $.;
+               $state = STATE_BODY;
            } else {
+               if ($section ne $section_default) {
+                   $state = STATE_BODY_WITH_BLANK_LINE;
+               } else {
+                   $state = STATE_BODY;
+               }
                $contents .= "\n";
            }
-           $state = STATE_BODY;
        } elsif ($state == STATE_BODY_MAYBE) {
            # Continued declaration purpose
            chomp($declaration_purpose);
@@ -2162,7 +2337,7 @@ sub process_file($) {
 
     $file = map_filename($orig_file);
 
-    if (!open(IN,"<$file")) {
+    if (!open(IN_FILE,"<$file")) {
        print STDERR "Error: Cannot open file $file\n";
        ++$errors;
        return;
@@ -2171,9 +2346,9 @@ sub process_file($) {
     $. = 1;
 
     $section_counter = 0;
-    while (<IN>) {
+    while (<IN_FILE>) {
        while (s/\\\s*$//) {
-           $_ .= <IN>;
+           $_ .= <IN_FILE>;
        }
        # Replace tabs by spaces
         while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {};
@@ -2182,7 +2357,8 @@ sub process_file($) {
            process_normal();
        } elsif ($state == STATE_NAME) {
            process_name($file, $_);
-       } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE) {
+       } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE ||
+                $state == STATE_BODY_WITH_BLANK_LINE) {
            process_body($file, $_);
        } elsif ($state == STATE_INLINE) { # scanning for inline parameters
            process_inline($file, $_);
@@ -2204,9 +2380,14 @@ sub process_file($) {
            print STDERR "${file}:1: warning: no structured comments found\n";
        }
     }
+    close IN_FILE;
 }
 
 
+if ($output_mode eq "rst") {
+       get_sphinx_version() if (!$sphinx_major);
+}
+
 $kernelversion = get_kernel_version();
 
 # generate a sequence of code that will splice in highlighting information
@@ -2253,4 +2434,9 @@ if ($verbose && $warnings) {
   print STDERR "$warnings warnings\n";
 }
 
-exit($output_mode eq "none" ? 0 : $errors);
+if ($Werror && $warnings) {
+    print STDERR "$warnings warnings as Errors\n";
+    exit($warnings);
+} else {
+    exit($output_mode eq "none" ? 0 : $errors)
+}
old mode 100755 (executable)
new mode 100644 (file)
old mode 100755 (executable)
new mode 100644 (file)
index 6fe66d5..3fb4d5b
@@ -23,6 +23,7 @@ MSR_IA32_VMX_TRUE_PROCBASED_CTLS = 0x48E
 MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F
 MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490
 MSR_IA32_VMX_VMFUNC = 0x491
+MSR_IA32_VMX_PROCBASED_CTLS3 = 0x492
 
 class msr(object):
     def __init__(self):
@@ -71,6 +72,13 @@ class Control(object):
                 s = 'yes'
             print('  %-40s %s' % (self.bits[bit], s))
 
+# All 64 bits in the tertiary controls MSR are allowed-1
+class Allowed1Control(Control):
+    def read2(self, nr):
+        m = msr()
+        val = m.read(nr, 0)
+        return (0, val)
+
 class Misc(object):
     def __init__(self, name, bits, msr):
         self.name = name
@@ -107,6 +115,7 @@ controls = [
             (50, 53): 'VMCS memory type',
             54: 'INS/OUTS instruction information',
             55: 'IA32_VMX_TRUE_*_CTLS support',
+            56: 'Skip checks on event error code',
             },
         msr = MSR_IA32_VMX_BASIC,
         ),
@@ -135,6 +144,7 @@ controls = [
             12: 'RDTSC exiting',
             15: 'CR3-load exiting',
             16: 'CR3-store exiting',
+            17: 'Activate tertiary controls',
             19: 'CR8-load exiting',
             20: 'CR8-store exiting',
             21: 'Use TPR shadow',
@@ -186,6 +196,14 @@ controls = [
         cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2,
         ),
 
+    Allowed1Control(
+        name = 'tertiary processor-based controls',
+        bits = {
+            4: 'Enable IPI virtualization'
+            },
+        cap_msr = MSR_IA32_VMX_PROCBASED_CTLS3,
+        ),
+
     Control(
         name = 'VM-Exit controls',
         bits = {
@@ -249,6 +267,7 @@ controls = [
         bits = {
             0: 'Execute-only EPT translations',
             6: 'Page-walk length 4',
+            7: 'Page-walk length 5',
             8: 'Paging-structure memory type UC',
             14: 'Paging-structure memory type WB',
             16: '2MB EPT pages',
diff --git a/scripts/make-config-poison.sh b/scripts/make-config-poison.sh
new file mode 100755 (executable)
index 0000000..2b36907
--- /dev/null
@@ -0,0 +1,17 @@
+#! /bin/sh
+
+if test $# = 0; then
+  exit 0
+fi
+
+# Create list of config switches that should be poisoned in common code,
+# but filter out several which are handled manually.
+exec sed -n \
+  -e' /CONFIG_TCG/d' \
+  -e '/CONFIG_USER_ONLY/d' \
+  -e '/CONFIG_SOFTMMU/d' \
+  -e '/^#define / {' \
+  -e    's///' \
+  -e    's/ .*//' \
+  -e    's/^/#pragma GCC poison /p' \
+  -e '}' "$@" | sort -u
old mode 100755 (executable)
new mode 100644 (file)
index a2a8cda..9c570b8
 # This work is licensed under the terms of the GNU GPLv2 or later.
 # See the COPYING file in the top-level directory.
 
+if [ $# -ne 2 ]; then
+    echo "Usage:"
+    echo " $0 gitrepo version"
+    exit 0
+fi
+
+# Only include wraps that are invoked with subproject()
+SUBPROJECTS="libvfio-user keycodemapdb berkeley-softfloat-3 berkeley-testfloat-3"
+
 src="$1"
 version="$2"
 destination=qemu-${version}
 
-git clone "${src}" ${destination}
+git clone --single-branch -b "v${version}" -c advice.detachedHead=false \
+    "${src}" ${destination}
+
 pushd ${destination}
-git checkout "v${version}"
-git submodule update --init
+
+git submodule update --init --single-branch
+meson subprojects download $SUBPROJECTS
+
 (cd roms/seabios && git describe --tags --long --dirty > .version)
 (cd roms/skiboot && ./make_version.sh > .version)
 # Fetch edk2 submodule's submodules, since it won't have access to them via
@@ -27,7 +40,12 @@ git submodule update --init
 # don't necessarily have much control over how a submodule handles its
 # submodule dependencies, so we continue to handle these on a case-by-case
 # basis for now.
-(cd roms/edk2 && git submodule update --init)
+(cd roms/edk2 && \
+    git submodule update --init --depth 1 -- \
+        ArmPkg/Library/ArmSoftFloatLib/berkeley-softfloat-3 \
+        BaseTools/Source/C/BrotliCompress/brotli \
+        CryptoPkg/Library/OpensslLib/openssl \
+        MdeModulePkg/Library/BrotliCustomDecompressLib/brotli)
 popd
 tar --exclude=.git -cjf ${destination}.tar.bz2 ${destination}
 rm -rf ${destination}
diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py
new file mode 100644 (file)
index 0000000..4814a8f
--- /dev/null
@@ -0,0 +1,248 @@
+#! /usr/bin/env python3
+
+# Generate configure command line options handling code, based on Meson's
+# user build options introspection data
+#
+# Copyright (C) 2021 Red Hat, Inc.
+#
+# Author: Paolo Bonzini <pbonzini@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+import json
+import textwrap
+import shlex
+import sys
+
+# Options with nonstandard names (e.g. --with/--without) or OS-dependent
+# defaults.  Try not to add any.
+SKIP_OPTIONS = {
+    "default_devices",
+    "fuzzing_engine",
+}
+
+# Options whose name doesn't match the option for backwards compatibility
+# reasons, because Meson gives them a funny name, or both
+OPTION_NAMES = {
+    "b_coverage": "gcov",
+    "b_lto": "lto",
+    "coroutine_backend": "with-coroutine",
+    "debug": "debug-info",
+    "malloc": "enable-malloc",
+    "pkgversion": "with-pkgversion",
+    "qemu_firmwarepath": "firmwarepath",
+    "qemu_suffix": "with-suffix",
+    "trace_backends": "enable-trace-backends",
+    "trace_file": "with-trace-file",
+}
+
+# Options that configure autodetects, even though meson defines them as boolean
+AUTO_OPTIONS = {
+    "plugins",
+    "werror",
+}
+
+# Builtin options that should be definable via configure.  Some of the others
+# we really do not want (e.g. c_args is defined via the native file, not
+# via -D, because it's a mix of CFLAGS and --extra-cflags); for specific
+# cases "../configure -D" can be used as an escape hatch.
+BUILTIN_OPTIONS = {
+    "b_coverage",
+    "b_lto",
+    "bindir",
+    "datadir",
+    "debug",
+    "includedir",
+    "libdir",
+    "libexecdir",
+    "localedir",
+    "localstatedir",
+    "mandir",
+    "prefix",
+    "strip",
+    "sysconfdir",
+    "werror",
+}
+
+LINE_WIDTH = 76
+
+
+# Convert the default value of an option to the string used in
+# the help message
+def get_help(opt):
+    if opt["name"] == "libdir":
+        return 'system default'
+    value = opt["value"]
+    if isinstance(value, list):
+        return ",".join(value)
+    if isinstance(value, bool):
+        return "enabled" if value else "disabled"
+    return str(value)
+
+
+def wrap(left, text, indent):
+    spaces = " " * indent
+    if len(left) >= indent:
+        yield left
+        left = spaces
+    else:
+        left = (left + spaces)[0:indent]
+    yield from textwrap.wrap(
+        text, width=LINE_WIDTH, initial_indent=left, subsequent_indent=spaces
+    )
+
+
+def sh_print(line=""):
+    print('  printf "%s\\n"', shlex.quote(line))
+
+
+def help_line(left, opt, indent, long):
+    right = f'{opt["description"]}'
+    if long:
+        value = get_help(opt)
+        if value != "auto" and value != "":
+            right += f" [{value}]"
+    if "choices" in opt and long:
+        choices = "/".join(sorted(opt["choices"]))
+        right += f" (choices: {choices})"
+    for x in wrap("  " + left, right, indent):
+        sh_print(x)
+
+
+# Return whether the option (a dictionary) can be used with
+# arguments.  Booleans can never be used with arguments;
+# combos allow an argument only if they accept other values
+# than "auto", "enabled", and "disabled".
+def allow_arg(opt):
+    if opt["type"] == "boolean":
+        return False
+    if opt["type"] != "combo":
+        return True
+    return not (set(opt["choices"]) <= {"auto", "disabled", "enabled"})
+
+
+# Return whether the option (a dictionary) can be used without
+# arguments.  Booleans can only be used without arguments;
+# combos require an argument if they accept neither "enabled"
+# nor "disabled"
+def require_arg(opt):
+    if opt["type"] == "boolean":
+        return False
+    if opt["type"] != "combo":
+        return True
+    return not ({"enabled", "disabled"}.intersection(opt["choices"]))
+
+
+def filter_options(json):
+    if ":" in json["name"]:
+        return False
+    if json["section"] == "user":
+        return json["name"] not in SKIP_OPTIONS
+    else:
+        return json["name"] in BUILTIN_OPTIONS
+
+
+def load_options(json):
+    json = [x for x in json if filter_options(x)]
+    return sorted(json, key=lambda x: x["name"])
+
+
+def cli_option(opt):
+    name = opt["name"]
+    if name in OPTION_NAMES:
+        return OPTION_NAMES[name]
+    return name.replace("_", "-")
+
+
+def cli_help_key(opt):
+    key = cli_option(opt)
+    if require_arg(opt):
+        return key
+    if opt["type"] == "boolean" and opt["value"]:
+        return f"disable-{key}"
+    return f"enable-{key}"
+
+
+def cli_metavar(opt):
+    if opt["type"] == "string":
+        return "VALUE"
+    if opt["type"] == "array":
+        return "CHOICES" if "choices" in opt else "VALUES"
+    return "CHOICE"
+
+
+def print_help(options):
+    print("meson_options_help() {")
+    feature_opts = []
+    for opt in sorted(options, key=cli_help_key):
+        key = cli_help_key(opt)
+        # The first section includes options that have an arguments,
+        # and booleans (i.e., only one of enable/disable makes sense)
+        if require_arg(opt):
+            metavar = cli_metavar(opt)
+            left = f"--{key}={metavar}"
+            help_line(left, opt, 27, True)
+        elif opt["type"] == "boolean" and opt["name"] not in AUTO_OPTIONS:
+            left = f"--{key}"
+            help_line(left, opt, 27, False)
+        elif allow_arg(opt):
+            if opt["type"] == "combo" and "enabled" in opt["choices"]:
+                left = f"--{key}[=CHOICE]"
+            else:
+                left = f"--{key}=CHOICE"
+            help_line(left, opt, 27, True)
+        else:
+            feature_opts.append(opt)
+
+    sh_print()
+    sh_print("Optional features, enabled with --enable-FEATURE and")
+    sh_print("disabled with --disable-FEATURE, default is enabled if available")
+    sh_print("(unless built with --without-default-features):")
+    sh_print()
+    for opt in sorted(feature_opts, key=cli_option):
+        key = cli_option(opt)
+        help_line(key, opt, 18, False)
+    print("}")
+
+
+def print_parse(options):
+    print("_meson_option_parse() {")
+    print("  case $1 in")
+    for opt in options:
+        key = cli_option(opt)
+        name = opt["name"]
+        if require_arg(opt):
+            if opt["type"] == "array" and not "choices" in opt:
+                print(f'    --{key}=*) quote_sh "-D{name}=$(meson_option_build_array $2)" ;;')
+            else:
+                print(f'    --{key}=*) quote_sh "-D{name}=$2" ;;')
+        elif opt["type"] == "boolean":
+            print(f'    --enable-{key}) printf "%s" -D{name}=true ;;')
+            print(f'    --disable-{key}) printf "%s" -D{name}=false ;;')
+        else:
+            if opt["type"] == "combo" and "enabled" in opt["choices"]:
+                print(f'    --enable-{key}) printf "%s" -D{name}=enabled ;;')
+            if opt["type"] == "combo" and "disabled" in opt["choices"]:
+                print(f'    --disable-{key}) printf "%s" -D{name}=disabled ;;')
+            if allow_arg(opt):
+                print(f'    --enable-{key}=*) quote_sh "-D{name}=$2" ;;')
+    print("    *) return 1 ;;")
+    print("  esac")
+    print("}")
+
+
+options = load_options(json.load(sys.stdin))
+print("# This file is generated by meson-buildoptions.py, do not edit!")
+print_help(options)
+print_parse(options)
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
new file mode 100755 (executable)
index 0000000..680fa3f
--- /dev/null
@@ -0,0 +1,567 @@
+# This file is generated by meson-buildoptions.py, do not edit!
+meson_options_help() {
+  printf "%s\n" '  --audio-drv-list=CHOICES Set audio driver list [default] (choices: alsa/co'
+  printf "%s\n" '                           reaudio/default/dsound/jack/oss/pa/pipewire/sdl/s'
+  printf "%s\n" '                           ndio)'
+  printf "%s\n" '  --bindir=VALUE           Executable directory [bin]'
+  printf "%s\n" '  --block-drv-ro-whitelist=VALUE'
+  printf "%s\n" '                           set block driver read-only whitelist (by default'
+  printf "%s\n" '                           affects only QEMU, not tools like qemu-img)'
+  printf "%s\n" '  --block-drv-rw-whitelist=VALUE'
+  printf "%s\n" '                           set block driver read-write whitelist (by default'
+  printf "%s\n" '                           affects only QEMU, not tools like qemu-img)'
+  printf "%s\n" '  --datadir=VALUE          Data file directory [share]'
+  printf "%s\n" '  --disable-coroutine-pool coroutine freelist (better performance)'
+  printf "%s\n" '  --disable-debug-info     Enable debug symbols and other information'
+  printf "%s\n" '  --disable-hexagon-idef-parser'
+  printf "%s\n" '                           use idef-parser to automatically generate TCG'
+  printf "%s\n" '                           code for the Hexagon frontend'
+  printf "%s\n" '  --disable-install-blobs  install provided firmware blobs'
+  printf "%s\n" '  --disable-qom-cast-debug cast debugging support'
+  printf "%s\n" '  --disable-relocatable    toggle relocatable install'
+  printf "%s\n" '  --docdir=VALUE           Base directory for documentation installation'
+  printf "%s\n" '                           (can be empty) [share/doc]'
+  printf "%s\n" '  --enable-block-drv-whitelist-in-tools'
+  printf "%s\n" '                           use block whitelist also in tools instead of only'
+  printf "%s\n" '                           QEMU'
+  printf "%s\n" '  --enable-cfi             Control-Flow Integrity (CFI)'
+  printf "%s\n" '  --enable-cfi-debug       Verbose errors in case of CFI violation'
+  printf "%s\n" '  --enable-debug-graph-lock'
+  printf "%s\n" '                           graph lock debugging support'
+  printf "%s\n" '  --enable-debug-mutex     mutex debugging support'
+  printf "%s\n" '  --enable-debug-stack-usage'
+  printf "%s\n" '                           measure coroutine stack usage'
+  printf "%s\n" '  --enable-debug-tcg       TCG debugging'
+  printf "%s\n" '  --enable-fdt[=CHOICE]    Whether and how to find the libfdt library'
+  printf "%s\n" '                           (choices: auto/disabled/enabled/internal/system)'
+  printf "%s\n" '  --enable-fuzzing         build fuzzing targets'
+  printf "%s\n" '  --enable-gcov            Enable coverage tracking.'
+  printf "%s\n" '  --enable-lto             Use link time optimization'
+  printf "%s\n" '  --enable-malloc=CHOICE   choose memory allocator to use [system] (choices:'
+  printf "%s\n" '                           jemalloc/system/tcmalloc)'
+  printf "%s\n" '  --enable-module-upgrades try to load modules from alternate paths for'
+  printf "%s\n" '                           upgrades'
+  printf "%s\n" '  --enable-rng-none        dummy RNG, avoid using /dev/(u)random and'
+  printf "%s\n" '                           getrandom()'
+  printf "%s\n" '  --enable-safe-stack      SafeStack Stack Smash Protection (requires'
+  printf "%s\n" '                           clang/llvm and coroutine backend ucontext)'
+  printf "%s\n" '  --enable-sanitizers      enable default sanitizers'
+  printf "%s\n" '  --enable-strip           Strip targets on install'
+  printf "%s\n" '  --enable-tcg-interpreter TCG with bytecode interpreter (slow)'
+  printf "%s\n" '  --enable-trace-backends=CHOICES'
+  printf "%s\n" '                           Set available tracing backends [log] (choices:'
+  printf "%s\n" '                           dtrace/ftrace/log/nop/simple/syslog/ust)'
+  printf "%s\n" '  --enable-tsan            enable thread sanitizer'
+  printf "%s\n" '  --firmwarepath=VALUES    search PATH for firmware files [share/qemu-'
+  printf "%s\n" '                           firmware]'
+  printf "%s\n" '  --iasl=VALUE             Path to ACPI disassembler'
+  printf "%s\n" '  --includedir=VALUE       Header file directory [include]'
+  printf "%s\n" '  --interp-prefix=VALUE    where to find shared libraries etc., use %M for'
+  printf "%s\n" '                           cpu name [/usr/gnemul/qemu-%M]'
+  printf "%s\n" '  --libdir=VALUE           Library directory [system default]'
+  printf "%s\n" '  --libexecdir=VALUE       Library executable directory [libexec]'
+  printf "%s\n" '  --localedir=VALUE        Locale data directory [share/locale]'
+  printf "%s\n" '  --localstatedir=VALUE    Localstate data directory [/var/local]'
+  printf "%s\n" '  --mandir=VALUE           Manual page directory [share/man]'
+  printf "%s\n" '  --prefix=VALUE           Installation prefix [/usr/local]'
+  printf "%s\n" '  --qemu-ga-distro=VALUE   second path element in qemu-ga registry entries'
+  printf "%s\n" '                           [Linux]'
+  printf "%s\n" '  --qemu-ga-manufacturer=VALUE'
+  printf "%s\n" '                           "manufacturer" name for qemu-ga registry entries'
+  printf "%s\n" '                           [QEMU]'
+  printf "%s\n" '  --qemu-ga-version=VALUE  version number for qemu-ga installer'
+  printf "%s\n" '  --smbd=VALUE             Path to smbd for slirp networking'
+  printf "%s\n" '  --sysconfdir=VALUE       Sysconf data directory [etc]'
+  printf "%s\n" '  --tls-priority=VALUE     Default TLS protocol/cipher priority string'
+  printf "%s\n" '                           [NORMAL]'
+  printf "%s\n" '  --with-coroutine=CHOICE  coroutine backend to use (choices:'
+  printf "%s\n" '                           auto/sigaltstack/ucontext/windows)'
+  printf "%s\n" '  --with-pkgversion=VALUE  use specified string as sub-version of the'
+  printf "%s\n" '                           package'
+  printf "%s\n" '  --with-suffix=VALUE      Suffix for QEMU data/modules/config directories'
+  printf "%s\n" '                           (can be empty) [qemu]'
+  printf "%s\n" '  --with-trace-file=VALUE  Trace file prefix for simple backend [trace]'
+  printf "%s\n" ''
+  printf "%s\n" 'Optional features, enabled with --enable-FEATURE and'
+  printf "%s\n" 'disabled with --disable-FEATURE, default is enabled if available'
+  printf "%s\n" '(unless built with --without-default-features):'
+  printf "%s\n" ''
+  printf "%s\n" '  af-xdp          AF_XDP network backend support'
+  printf "%s\n" '  alsa            ALSA sound support'
+  printf "%s\n" '  attr            attr/xattr support'
+  printf "%s\n" '  auth-pam        PAM access control'
+  printf "%s\n" '  avx2            AVX2 optimizations'
+  printf "%s\n" '  avx512bw        AVX512BW optimizations'
+  printf "%s\n" '  avx512f         AVX512F optimizations'
+  printf "%s\n" '  blkio           libblkio block device driver'
+  printf "%s\n" '  bochs           bochs image format support'
+  printf "%s\n" '  bpf             eBPF support'
+  printf "%s\n" '  brlapi          brlapi character device driver'
+  printf "%s\n" '  bzip2           bzip2 support for DMG images'
+  printf "%s\n" '  canokey         CanoKey support'
+  printf "%s\n" '  cap-ng          cap_ng support'
+  printf "%s\n" '  capstone        Whether and how to find the capstone library'
+  printf "%s\n" '  cloop           cloop image format support'
+  printf "%s\n" '  cocoa           Cocoa user interface (macOS only)'
+  printf "%s\n" '  colo-proxy      colo-proxy support'
+  printf "%s\n" '  coreaudio       CoreAudio sound support'
+  printf "%s\n" '  crypto-afalg    Linux AF_ALG crypto backend driver'
+  printf "%s\n" '  curl            CURL block device driver'
+  printf "%s\n" '  curses          curses UI'
+  printf "%s\n" '  dbus-display    -display dbus support'
+  printf "%s\n" '  dmg             dmg image format support'
+  printf "%s\n" '  docs            Documentations build support'
+  printf "%s\n" '  dsound          DirectSound sound support'
+  printf "%s\n" '  fuse            FUSE block device export'
+  printf "%s\n" '  fuse-lseek      SEEK_HOLE/SEEK_DATA support for FUSE exports'
+  printf "%s\n" '  gcrypt          libgcrypt cryptography support'
+  printf "%s\n" '  gettext         Localization of the GTK+ user interface'
+  printf "%s\n" '  gio             use libgio for D-Bus support'
+  printf "%s\n" '  glusterfs       Glusterfs block device driver'
+  printf "%s\n" '  gnutls          GNUTLS cryptography support'
+  printf "%s\n" '  gtk             GTK+ user interface'
+  printf "%s\n" '  gtk-clipboard   clipboard support for the gtk UI (EXPERIMENTAL, MAY HANG)'
+  printf "%s\n" '  guest-agent     Build QEMU Guest Agent'
+  printf "%s\n" '  guest-agent-msi Build MSI package for the QEMU Guest Agent'
+  printf "%s\n" '  hv-balloon      hv-balloon driver (requires Glib 2.68+ GTree API)'
+  printf "%s\n" '  hvf             HVF acceleration support'
+  printf "%s\n" '  iconv           Font glyph conversion support'
+  printf "%s\n" '  jack            JACK sound support'
+  printf "%s\n" '  keyring         Linux keyring support'
+  printf "%s\n" '  kvm             KVM acceleration support'
+  printf "%s\n" '  l2tpv3          l2tpv3 network backend support'
+  printf "%s\n" '  libdaxctl       libdaxctl support'
+  printf "%s\n" '  libdw           debuginfo support'
+  printf "%s\n" '  libiscsi        libiscsi userspace initiator'
+  printf "%s\n" '  libkeyutils     Linux keyutils support'
+  printf "%s\n" '  libnfs          libnfs block device driver'
+  printf "%s\n" '  libpmem         libpmem support'
+  printf "%s\n" '  libssh          ssh block device support'
+  printf "%s\n" '  libudev         Use libudev to enumerate host devices'
+  printf "%s\n" '  libusb          libusb support for USB passthrough'
+  printf "%s\n" '  libvduse        build VDUSE Library'
+  printf "%s\n" '  linux-aio       Linux AIO support'
+  printf "%s\n" '  linux-io-uring  Linux io_uring support'
+  printf "%s\n" '  live-block-migration'
+  printf "%s\n" '                  block migration in the main migration stream'
+  printf "%s\n" '  lzfse           lzfse support for DMG images'
+  printf "%s\n" '  lzo             lzo compression support'
+  printf "%s\n" '  malloc-trim     enable libc malloc_trim() for memory optimization'
+  printf "%s\n" '  membarrier      membarrier system call (for Linux 4.14+ or Windows'
+  printf "%s\n" '  modules         modules support (non Windows)'
+  printf "%s\n" '  mpath           Multipath persistent reservation passthrough'
+  printf "%s\n" '  multiprocess    Out of process device emulation support'
+  printf "%s\n" '  netmap          netmap network backend support'
+  printf "%s\n" '  nettle          nettle cryptography support'
+  printf "%s\n" '  numa            libnuma support'
+  printf "%s\n" '  nvmm            NVMM acceleration support'
+  printf "%s\n" '  opengl          OpenGL support'
+  printf "%s\n" '  oss             OSS sound support'
+  printf "%s\n" '  pa              PulseAudio sound support'
+  printf "%s\n" '  parallels       parallels image format support'
+  printf "%s\n" '  pipewire        PipeWire sound support'
+  printf "%s\n" '  pixman          pixman support'
+  printf "%s\n" '  plugins         TCG plugins via shared library loading'
+  printf "%s\n" '  png             PNG support with libpng'
+  printf "%s\n" '  pvrdma          Enable PVRDMA support'
+  printf "%s\n" '  qcow1           qcow1 image format support'
+  printf "%s\n" '  qed             qed image format support'
+  printf "%s\n" '  qga-vss         build QGA VSS support (broken with MinGW)'
+  printf "%s\n" '  rbd             Ceph block device driver'
+  printf "%s\n" '  rdma            Enable RDMA-based migration'
+  printf "%s\n" '  replication     replication support'
+  printf "%s\n" '  rutabaga-gfx    rutabaga_gfx support'
+  printf "%s\n" '  sdl             SDL user interface'
+  printf "%s\n" '  sdl-image       SDL Image support for icons'
+  printf "%s\n" '  seccomp         seccomp support'
+  printf "%s\n" '  selinux         SELinux support in qemu-nbd'
+  printf "%s\n" '  slirp           libslirp user mode network backend support'
+  printf "%s\n" '  slirp-smbd      use smbd (at path --smbd=*) in slirp networking'
+  printf "%s\n" '  smartcard       CA smartcard emulation support'
+  printf "%s\n" '  snappy          snappy compression support'
+  printf "%s\n" '  sndio           sndio sound support'
+  printf "%s\n" '  sparse          sparse checker'
+  printf "%s\n" '  spice           Spice server support'
+  printf "%s\n" '  spice-protocol  Spice protocol support'
+  printf "%s\n" '  stack-protector compiler-provided stack protection'
+  printf "%s\n" '  tcg             TCG support'
+  printf "%s\n" '  tools           build support utilities that come with QEMU'
+  printf "%s\n" '  tpm             TPM support'
+  printf "%s\n" '  u2f             U2F emulation support'
+  printf "%s\n" '  usb-redir       libusbredir support'
+  printf "%s\n" '  vde             vde network backend support'
+  printf "%s\n" '  vdi             vdi image format support'
+  printf "%s\n" '  vduse-blk-export'
+  printf "%s\n" '                  VDUSE block export support'
+  printf "%s\n" '  vfio-user-server'
+  printf "%s\n" '                  vfio-user server support'
+  printf "%s\n" '  vhdx            vhdx image format support'
+  printf "%s\n" '  vhost-crypto    vhost-user crypto backend support'
+  printf "%s\n" '  vhost-kernel    vhost kernel backend support'
+  printf "%s\n" '  vhost-net       vhost-net kernel acceleration support'
+  printf "%s\n" '  vhost-user      vhost-user backend support'
+  printf "%s\n" '  vhost-user-blk-server'
+  printf "%s\n" '                  build vhost-user-blk server'
+  printf "%s\n" '  vhost-vdpa      vhost-vdpa kernel backend support'
+  printf "%s\n" '  virglrenderer   virgl rendering support'
+  printf "%s\n" '  virtfs          virtio-9p support'
+  printf "%s\n" '  virtfs-proxy-helper'
+  printf "%s\n" '                  virtio-9p proxy helper support'
+  printf "%s\n" '  vmdk            vmdk image format support'
+  printf "%s\n" '  vmnet           vmnet.framework network backend support'
+  printf "%s\n" '  vnc             VNC server'
+  printf "%s\n" '  vnc-jpeg        JPEG lossy compression for VNC server'
+  printf "%s\n" '  vnc-sasl        SASL authentication for VNC server'
+  printf "%s\n" '  vpc             vpc image format support'
+  printf "%s\n" '  vte             vte support for the gtk UI'
+  printf "%s\n" '  vvfat           vvfat image format support'
+  printf "%s\n" '  werror          Treat warnings as errors'
+  printf "%s\n" '  whpx            WHPX acceleration support'
+  printf "%s\n" '  xen             Xen backend support'
+  printf "%s\n" '  xen-pci-passthrough'
+  printf "%s\n" '                  Xen PCI passthrough support'
+  printf "%s\n" '  xkbcommon       xkbcommon support'
+  printf "%s\n" '  zstd            zstd compression support'
+}
+_meson_option_parse() {
+  case $1 in
+    --enable-af-xdp) printf "%s" -Daf_xdp=enabled ;;
+    --disable-af-xdp) printf "%s" -Daf_xdp=disabled ;;
+    --enable-alsa) printf "%s" -Dalsa=enabled ;;
+    --disable-alsa) printf "%s" -Dalsa=disabled ;;
+    --enable-attr) printf "%s" -Dattr=enabled ;;
+    --disable-attr) printf "%s" -Dattr=disabled ;;
+    --audio-drv-list=*) quote_sh "-Daudio_drv_list=$2" ;;
+    --enable-auth-pam) printf "%s" -Dauth_pam=enabled ;;
+    --disable-auth-pam) printf "%s" -Dauth_pam=disabled ;;
+    --enable-avx2) printf "%s" -Davx2=enabled ;;
+    --disable-avx2) printf "%s" -Davx2=disabled ;;
+    --enable-avx512bw) printf "%s" -Davx512bw=enabled ;;
+    --disable-avx512bw) printf "%s" -Davx512bw=disabled ;;
+    --enable-avx512f) printf "%s" -Davx512f=enabled ;;
+    --disable-avx512f) printf "%s" -Davx512f=disabled ;;
+    --enable-gcov) printf "%s" -Db_coverage=true ;;
+    --disable-gcov) printf "%s" -Db_coverage=false ;;
+    --enable-lto) printf "%s" -Db_lto=true ;;
+    --disable-lto) printf "%s" -Db_lto=false ;;
+    --bindir=*) quote_sh "-Dbindir=$2" ;;
+    --enable-blkio) printf "%s" -Dblkio=enabled ;;
+    --disable-blkio) printf "%s" -Dblkio=disabled ;;
+    --block-drv-ro-whitelist=*) quote_sh "-Dblock_drv_ro_whitelist=$2" ;;
+    --block-drv-rw-whitelist=*) quote_sh "-Dblock_drv_rw_whitelist=$2" ;;
+    --enable-block-drv-whitelist-in-tools) printf "%s" -Dblock_drv_whitelist_in_tools=true ;;
+    --disable-block-drv-whitelist-in-tools) printf "%s" -Dblock_drv_whitelist_in_tools=false ;;
+    --enable-bochs) printf "%s" -Dbochs=enabled ;;
+    --disable-bochs) printf "%s" -Dbochs=disabled ;;
+    --enable-bpf) printf "%s" -Dbpf=enabled ;;
+    --disable-bpf) printf "%s" -Dbpf=disabled ;;
+    --enable-brlapi) printf "%s" -Dbrlapi=enabled ;;
+    --disable-brlapi) printf "%s" -Dbrlapi=disabled ;;
+    --enable-bzip2) printf "%s" -Dbzip2=enabled ;;
+    --disable-bzip2) printf "%s" -Dbzip2=disabled ;;
+    --enable-canokey) printf "%s" -Dcanokey=enabled ;;
+    --disable-canokey) printf "%s" -Dcanokey=disabled ;;
+    --enable-cap-ng) printf "%s" -Dcap_ng=enabled ;;
+    --disable-cap-ng) printf "%s" -Dcap_ng=disabled ;;
+    --enable-capstone) printf "%s" -Dcapstone=enabled ;;
+    --disable-capstone) printf "%s" -Dcapstone=disabled ;;
+    --enable-cfi) printf "%s" -Dcfi=true ;;
+    --disable-cfi) printf "%s" -Dcfi=false ;;
+    --enable-cfi-debug) printf "%s" -Dcfi_debug=true ;;
+    --disable-cfi-debug) printf "%s" -Dcfi_debug=false ;;
+    --enable-cloop) printf "%s" -Dcloop=enabled ;;
+    --disable-cloop) printf "%s" -Dcloop=disabled ;;
+    --enable-cocoa) printf "%s" -Dcocoa=enabled ;;
+    --disable-cocoa) printf "%s" -Dcocoa=disabled ;;
+    --enable-colo-proxy) printf "%s" -Dcolo_proxy=enabled ;;
+    --disable-colo-proxy) printf "%s" -Dcolo_proxy=disabled ;;
+    --enable-coreaudio) printf "%s" -Dcoreaudio=enabled ;;
+    --disable-coreaudio) printf "%s" -Dcoreaudio=disabled ;;
+    --with-coroutine=*) quote_sh "-Dcoroutine_backend=$2" ;;
+    --enable-coroutine-pool) printf "%s" -Dcoroutine_pool=true ;;
+    --disable-coroutine-pool) printf "%s" -Dcoroutine_pool=false ;;
+    --enable-crypto-afalg) printf "%s" -Dcrypto_afalg=enabled ;;
+    --disable-crypto-afalg) printf "%s" -Dcrypto_afalg=disabled ;;
+    --enable-curl) printf "%s" -Dcurl=enabled ;;
+    --disable-curl) printf "%s" -Dcurl=disabled ;;
+    --enable-curses) printf "%s" -Dcurses=enabled ;;
+    --disable-curses) printf "%s" -Dcurses=disabled ;;
+    --datadir=*) quote_sh "-Ddatadir=$2" ;;
+    --enable-dbus-display) printf "%s" -Ddbus_display=enabled ;;
+    --disable-dbus-display) printf "%s" -Ddbus_display=disabled ;;
+    --enable-debug-info) printf "%s" -Ddebug=true ;;
+    --disable-debug-info) printf "%s" -Ddebug=false ;;
+    --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;;
+    --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;;
+    --enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;;
+    --disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;;
+    --enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;;
+    --disable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=false ;;
+    --enable-debug-tcg) printf "%s" -Ddebug_tcg=true ;;
+    --disable-debug-tcg) printf "%s" -Ddebug_tcg=false ;;
+    --enable-dmg) printf "%s" -Ddmg=enabled ;;
+    --disable-dmg) printf "%s" -Ddmg=disabled ;;
+    --docdir=*) quote_sh "-Ddocdir=$2" ;;
+    --enable-docs) printf "%s" -Ddocs=enabled ;;
+    --disable-docs) printf "%s" -Ddocs=disabled ;;
+    --enable-dsound) printf "%s" -Ddsound=enabled ;;
+    --disable-dsound) printf "%s" -Ddsound=disabled ;;
+    --enable-fdt) printf "%s" -Dfdt=enabled ;;
+    --disable-fdt) printf "%s" -Dfdt=disabled ;;
+    --enable-fdt=*) quote_sh "-Dfdt=$2" ;;
+    --enable-fuse) printf "%s" -Dfuse=enabled ;;
+    --disable-fuse) printf "%s" -Dfuse=disabled ;;
+    --enable-fuse-lseek) printf "%s" -Dfuse_lseek=enabled ;;
+    --disable-fuse-lseek) printf "%s" -Dfuse_lseek=disabled ;;
+    --enable-fuzzing) printf "%s" -Dfuzzing=true ;;
+    --disable-fuzzing) printf "%s" -Dfuzzing=false ;;
+    --enable-gcrypt) printf "%s" -Dgcrypt=enabled ;;
+    --disable-gcrypt) printf "%s" -Dgcrypt=disabled ;;
+    --enable-gettext) printf "%s" -Dgettext=enabled ;;
+    --disable-gettext) printf "%s" -Dgettext=disabled ;;
+    --enable-gio) printf "%s" -Dgio=enabled ;;
+    --disable-gio) printf "%s" -Dgio=disabled ;;
+    --enable-glusterfs) printf "%s" -Dglusterfs=enabled ;;
+    --disable-glusterfs) printf "%s" -Dglusterfs=disabled ;;
+    --enable-gnutls) printf "%s" -Dgnutls=enabled ;;
+    --disable-gnutls) printf "%s" -Dgnutls=disabled ;;
+    --enable-gtk) printf "%s" -Dgtk=enabled ;;
+    --disable-gtk) printf "%s" -Dgtk=disabled ;;
+    --enable-gtk-clipboard) printf "%s" -Dgtk_clipboard=enabled ;;
+    --disable-gtk-clipboard) printf "%s" -Dgtk_clipboard=disabled ;;
+    --enable-guest-agent) printf "%s" -Dguest_agent=enabled ;;
+    --disable-guest-agent) printf "%s" -Dguest_agent=disabled ;;
+    --enable-guest-agent-msi) printf "%s" -Dguest_agent_msi=enabled ;;
+    --disable-guest-agent-msi) printf "%s" -Dguest_agent_msi=disabled ;;
+    --enable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=true ;;
+    --disable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=false ;;
+    --enable-hv-balloon) printf "%s" -Dhv_balloon=enabled ;;
+    --disable-hv-balloon) printf "%s" -Dhv_balloon=disabled ;;
+    --enable-hvf) printf "%s" -Dhvf=enabled ;;
+    --disable-hvf) printf "%s" -Dhvf=disabled ;;
+    --iasl=*) quote_sh "-Diasl=$2" ;;
+    --enable-iconv) printf "%s" -Diconv=enabled ;;
+    --disable-iconv) printf "%s" -Diconv=disabled ;;
+    --includedir=*) quote_sh "-Dincludedir=$2" ;;
+    --enable-install-blobs) printf "%s" -Dinstall_blobs=true ;;
+    --disable-install-blobs) printf "%s" -Dinstall_blobs=false ;;
+    --interp-prefix=*) quote_sh "-Dinterp_prefix=$2" ;;
+    --enable-jack) printf "%s" -Djack=enabled ;;
+    --disable-jack) printf "%s" -Djack=disabled ;;
+    --enable-keyring) printf "%s" -Dkeyring=enabled ;;
+    --disable-keyring) printf "%s" -Dkeyring=disabled ;;
+    --enable-kvm) printf "%s" -Dkvm=enabled ;;
+    --disable-kvm) printf "%s" -Dkvm=disabled ;;
+    --enable-l2tpv3) printf "%s" -Dl2tpv3=enabled ;;
+    --disable-l2tpv3) printf "%s" -Dl2tpv3=disabled ;;
+    --enable-libdaxctl) printf "%s" -Dlibdaxctl=enabled ;;
+    --disable-libdaxctl) printf "%s" -Dlibdaxctl=disabled ;;
+    --libdir=*) quote_sh "-Dlibdir=$2" ;;
+    --enable-libdw) printf "%s" -Dlibdw=enabled ;;
+    --disable-libdw) printf "%s" -Dlibdw=disabled ;;
+    --libexecdir=*) quote_sh "-Dlibexecdir=$2" ;;
+    --enable-libiscsi) printf "%s" -Dlibiscsi=enabled ;;
+    --disable-libiscsi) printf "%s" -Dlibiscsi=disabled ;;
+    --enable-libkeyutils) printf "%s" -Dlibkeyutils=enabled ;;
+    --disable-libkeyutils) printf "%s" -Dlibkeyutils=disabled ;;
+    --enable-libnfs) printf "%s" -Dlibnfs=enabled ;;
+    --disable-libnfs) printf "%s" -Dlibnfs=disabled ;;
+    --enable-libpmem) printf "%s" -Dlibpmem=enabled ;;
+    --disable-libpmem) printf "%s" -Dlibpmem=disabled ;;
+    --enable-libssh) printf "%s" -Dlibssh=enabled ;;
+    --disable-libssh) printf "%s" -Dlibssh=disabled ;;
+    --enable-libudev) printf "%s" -Dlibudev=enabled ;;
+    --disable-libudev) printf "%s" -Dlibudev=disabled ;;
+    --enable-libusb) printf "%s" -Dlibusb=enabled ;;
+    --disable-libusb) printf "%s" -Dlibusb=disabled ;;
+    --enable-libvduse) printf "%s" -Dlibvduse=enabled ;;
+    --disable-libvduse) printf "%s" -Dlibvduse=disabled ;;
+    --enable-linux-aio) printf "%s" -Dlinux_aio=enabled ;;
+    --disable-linux-aio) printf "%s" -Dlinux_aio=disabled ;;
+    --enable-linux-io-uring) printf "%s" -Dlinux_io_uring=enabled ;;
+    --disable-linux-io-uring) printf "%s" -Dlinux_io_uring=disabled ;;
+    --enable-live-block-migration) printf "%s" -Dlive_block_migration=enabled ;;
+    --disable-live-block-migration) printf "%s" -Dlive_block_migration=disabled ;;
+    --localedir=*) quote_sh "-Dlocaledir=$2" ;;
+    --localstatedir=*) quote_sh "-Dlocalstatedir=$2" ;;
+    --enable-lzfse) printf "%s" -Dlzfse=enabled ;;
+    --disable-lzfse) printf "%s" -Dlzfse=disabled ;;
+    --enable-lzo) printf "%s" -Dlzo=enabled ;;
+    --disable-lzo) printf "%s" -Dlzo=disabled ;;
+    --enable-malloc=*) quote_sh "-Dmalloc=$2" ;;
+    --enable-malloc-trim) printf "%s" -Dmalloc_trim=enabled ;;
+    --disable-malloc-trim) printf "%s" -Dmalloc_trim=disabled ;;
+    --mandir=*) quote_sh "-Dmandir=$2" ;;
+    --enable-membarrier) printf "%s" -Dmembarrier=enabled ;;
+    --disable-membarrier) printf "%s" -Dmembarrier=disabled ;;
+    --enable-module-upgrades) printf "%s" -Dmodule_upgrades=true ;;
+    --disable-module-upgrades) printf "%s" -Dmodule_upgrades=false ;;
+    --enable-modules) printf "%s" -Dmodules=enabled ;;
+    --disable-modules) printf "%s" -Dmodules=disabled ;;
+    --enable-mpath) printf "%s" -Dmpath=enabled ;;
+    --disable-mpath) printf "%s" -Dmpath=disabled ;;
+    --enable-multiprocess) printf "%s" -Dmultiprocess=enabled ;;
+    --disable-multiprocess) printf "%s" -Dmultiprocess=disabled ;;
+    --enable-netmap) printf "%s" -Dnetmap=enabled ;;
+    --disable-netmap) printf "%s" -Dnetmap=disabled ;;
+    --enable-nettle) printf "%s" -Dnettle=enabled ;;
+    --disable-nettle) printf "%s" -Dnettle=disabled ;;
+    --enable-numa) printf "%s" -Dnuma=enabled ;;
+    --disable-numa) printf "%s" -Dnuma=disabled ;;
+    --enable-nvmm) printf "%s" -Dnvmm=enabled ;;
+    --disable-nvmm) printf "%s" -Dnvmm=disabled ;;
+    --enable-opengl) printf "%s" -Dopengl=enabled ;;
+    --disable-opengl) printf "%s" -Dopengl=disabled ;;
+    --enable-oss) printf "%s" -Doss=enabled ;;
+    --disable-oss) printf "%s" -Doss=disabled ;;
+    --enable-pa) printf "%s" -Dpa=enabled ;;
+    --disable-pa) printf "%s" -Dpa=disabled ;;
+    --enable-parallels) printf "%s" -Dparallels=enabled ;;
+    --disable-parallels) printf "%s" -Dparallels=disabled ;;
+    --enable-pipewire) printf "%s" -Dpipewire=enabled ;;
+    --disable-pipewire) printf "%s" -Dpipewire=disabled ;;
+    --enable-pixman) printf "%s" -Dpixman=enabled ;;
+    --disable-pixman) printf "%s" -Dpixman=disabled ;;
+    --with-pkgversion=*) quote_sh "-Dpkgversion=$2" ;;
+    --enable-plugins) printf "%s" -Dplugins=true ;;
+    --disable-plugins) printf "%s" -Dplugins=false ;;
+    --enable-png) printf "%s" -Dpng=enabled ;;
+    --disable-png) printf "%s" -Dpng=disabled ;;
+    --prefix=*) quote_sh "-Dprefix=$2" ;;
+    --enable-pvrdma) printf "%s" -Dpvrdma=enabled ;;
+    --disable-pvrdma) printf "%s" -Dpvrdma=disabled ;;
+    --enable-qcow1) printf "%s" -Dqcow1=enabled ;;
+    --disable-qcow1) printf "%s" -Dqcow1=disabled ;;
+    --enable-qed) printf "%s" -Dqed=enabled ;;
+    --disable-qed) printf "%s" -Dqed=disabled ;;
+    --firmwarepath=*) quote_sh "-Dqemu_firmwarepath=$(meson_option_build_array $2)" ;;
+    --qemu-ga-distro=*) quote_sh "-Dqemu_ga_distro=$2" ;;
+    --qemu-ga-manufacturer=*) quote_sh "-Dqemu_ga_manufacturer=$2" ;;
+    --qemu-ga-version=*) quote_sh "-Dqemu_ga_version=$2" ;;
+    --with-suffix=*) quote_sh "-Dqemu_suffix=$2" ;;
+    --enable-qga-vss) printf "%s" -Dqga_vss=enabled ;;
+    --disable-qga-vss) printf "%s" -Dqga_vss=disabled ;;
+    --enable-qom-cast-debug) printf "%s" -Dqom_cast_debug=true ;;
+    --disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
+    --enable-rbd) printf "%s" -Drbd=enabled ;;
+    --disable-rbd) printf "%s" -Drbd=disabled ;;
+    --enable-rdma) printf "%s" -Drdma=enabled ;;
+    --disable-rdma) printf "%s" -Drdma=disabled ;;
+    --enable-relocatable) printf "%s" -Drelocatable=true ;;
+    --disable-relocatable) printf "%s" -Drelocatable=false ;;
+    --enable-replication) printf "%s" -Dreplication=enabled ;;
+    --disable-replication) printf "%s" -Dreplication=disabled ;;
+    --enable-rng-none) printf "%s" -Drng_none=true ;;
+    --disable-rng-none) printf "%s" -Drng_none=false ;;
+    --enable-rutabaga-gfx) printf "%s" -Drutabaga_gfx=enabled ;;
+    --disable-rutabaga-gfx) printf "%s" -Drutabaga_gfx=disabled ;;
+    --enable-safe-stack) printf "%s" -Dsafe_stack=true ;;
+    --disable-safe-stack) printf "%s" -Dsafe_stack=false ;;
+    --enable-sanitizers) printf "%s" -Dsanitizers=true ;;
+    --disable-sanitizers) printf "%s" -Dsanitizers=false ;;
+    --enable-sdl) printf "%s" -Dsdl=enabled ;;
+    --disable-sdl) printf "%s" -Dsdl=disabled ;;
+    --enable-sdl-image) printf "%s" -Dsdl_image=enabled ;;
+    --disable-sdl-image) printf "%s" -Dsdl_image=disabled ;;
+    --enable-seccomp) printf "%s" -Dseccomp=enabled ;;
+    --disable-seccomp) printf "%s" -Dseccomp=disabled ;;
+    --enable-selinux) printf "%s" -Dselinux=enabled ;;
+    --disable-selinux) printf "%s" -Dselinux=disabled ;;
+    --enable-slirp) printf "%s" -Dslirp=enabled ;;
+    --disable-slirp) printf "%s" -Dslirp=disabled ;;
+    --enable-slirp-smbd) printf "%s" -Dslirp_smbd=enabled ;;
+    --disable-slirp-smbd) printf "%s" -Dslirp_smbd=disabled ;;
+    --enable-smartcard) printf "%s" -Dsmartcard=enabled ;;
+    --disable-smartcard) printf "%s" -Dsmartcard=disabled ;;
+    --smbd=*) quote_sh "-Dsmbd=$2" ;;
+    --enable-snappy) printf "%s" -Dsnappy=enabled ;;
+    --disable-snappy) printf "%s" -Dsnappy=disabled ;;
+    --enable-sndio) printf "%s" -Dsndio=enabled ;;
+    --disable-sndio) printf "%s" -Dsndio=disabled ;;
+    --enable-sparse) printf "%s" -Dsparse=enabled ;;
+    --disable-sparse) printf "%s" -Dsparse=disabled ;;
+    --enable-spice) printf "%s" -Dspice=enabled ;;
+    --disable-spice) printf "%s" -Dspice=disabled ;;
+    --enable-spice-protocol) printf "%s" -Dspice_protocol=enabled ;;
+    --disable-spice-protocol) printf "%s" -Dspice_protocol=disabled ;;
+    --enable-stack-protector) printf "%s" -Dstack_protector=enabled ;;
+    --disable-stack-protector) printf "%s" -Dstack_protector=disabled ;;
+    --enable-strip) printf "%s" -Dstrip=true ;;
+    --disable-strip) printf "%s" -Dstrip=false ;;
+    --sysconfdir=*) quote_sh "-Dsysconfdir=$2" ;;
+    --enable-tcg) printf "%s" -Dtcg=enabled ;;
+    --disable-tcg) printf "%s" -Dtcg=disabled ;;
+    --enable-tcg-interpreter) printf "%s" -Dtcg_interpreter=true ;;
+    --disable-tcg-interpreter) printf "%s" -Dtcg_interpreter=false ;;
+    --tls-priority=*) quote_sh "-Dtls_priority=$2" ;;
+    --enable-tools) printf "%s" -Dtools=enabled ;;
+    --disable-tools) printf "%s" -Dtools=disabled ;;
+    --enable-tpm) printf "%s" -Dtpm=enabled ;;
+    --disable-tpm) printf "%s" -Dtpm=disabled ;;
+    --enable-trace-backends=*) quote_sh "-Dtrace_backends=$2" ;;
+    --with-trace-file=*) quote_sh "-Dtrace_file=$2" ;;
+    --enable-tsan) printf "%s" -Dtsan=true ;;
+    --disable-tsan) printf "%s" -Dtsan=false ;;
+    --enable-u2f) printf "%s" -Du2f=enabled ;;
+    --disable-u2f) printf "%s" -Du2f=disabled ;;
+    --enable-usb-redir) printf "%s" -Dusb_redir=enabled ;;
+    --disable-usb-redir) printf "%s" -Dusb_redir=disabled ;;
+    --enable-vde) printf "%s" -Dvde=enabled ;;
+    --disable-vde) printf "%s" -Dvde=disabled ;;
+    --enable-vdi) printf "%s" -Dvdi=enabled ;;
+    --disable-vdi) printf "%s" -Dvdi=disabled ;;
+    --enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;;
+    --disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;;
+    --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;;
+    --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;;
+    --enable-vhdx) printf "%s" -Dvhdx=enabled ;;
+    --disable-vhdx) printf "%s" -Dvhdx=disabled ;;
+    --enable-vhost-crypto) printf "%s" -Dvhost_crypto=enabled ;;
+    --disable-vhost-crypto) printf "%s" -Dvhost_crypto=disabled ;;
+    --enable-vhost-kernel) printf "%s" -Dvhost_kernel=enabled ;;
+    --disable-vhost-kernel) printf "%s" -Dvhost_kernel=disabled ;;
+    --enable-vhost-net) printf "%s" -Dvhost_net=enabled ;;
+    --disable-vhost-net) printf "%s" -Dvhost_net=disabled ;;
+    --enable-vhost-user) printf "%s" -Dvhost_user=enabled ;;
+    --disable-vhost-user) printf "%s" -Dvhost_user=disabled ;;
+    --enable-vhost-user-blk-server) printf "%s" -Dvhost_user_blk_server=enabled ;;
+    --disable-vhost-user-blk-server) printf "%s" -Dvhost_user_blk_server=disabled ;;
+    --enable-vhost-vdpa) printf "%s" -Dvhost_vdpa=enabled ;;
+    --disable-vhost-vdpa) printf "%s" -Dvhost_vdpa=disabled ;;
+    --enable-virglrenderer) printf "%s" -Dvirglrenderer=enabled ;;
+    --disable-virglrenderer) printf "%s" -Dvirglrenderer=disabled ;;
+    --enable-virtfs) printf "%s" -Dvirtfs=enabled ;;
+    --disable-virtfs) printf "%s" -Dvirtfs=disabled ;;
+    --enable-virtfs-proxy-helper) printf "%s" -Dvirtfs_proxy_helper=enabled ;;
+    --disable-virtfs-proxy-helper) printf "%s" -Dvirtfs_proxy_helper=disabled ;;
+    --enable-vmdk) printf "%s" -Dvmdk=enabled ;;
+    --disable-vmdk) printf "%s" -Dvmdk=disabled ;;
+    --enable-vmnet) printf "%s" -Dvmnet=enabled ;;
+    --disable-vmnet) printf "%s" -Dvmnet=disabled ;;
+    --enable-vnc) printf "%s" -Dvnc=enabled ;;
+    --disable-vnc) printf "%s" -Dvnc=disabled ;;
+    --enable-vnc-jpeg) printf "%s" -Dvnc_jpeg=enabled ;;
+    --disable-vnc-jpeg) printf "%s" -Dvnc_jpeg=disabled ;;
+    --enable-vnc-sasl) printf "%s" -Dvnc_sasl=enabled ;;
+    --disable-vnc-sasl) printf "%s" -Dvnc_sasl=disabled ;;
+    --enable-vpc) printf "%s" -Dvpc=enabled ;;
+    --disable-vpc) printf "%s" -Dvpc=disabled ;;
+    --enable-vte) printf "%s" -Dvte=enabled ;;
+    --disable-vte) printf "%s" -Dvte=disabled ;;
+    --enable-vvfat) printf "%s" -Dvvfat=enabled ;;
+    --disable-vvfat) printf "%s" -Dvvfat=disabled ;;
+    --enable-werror) printf "%s" -Dwerror=true ;;
+    --disable-werror) printf "%s" -Dwerror=false ;;
+    --enable-whpx) printf "%s" -Dwhpx=enabled ;;
+    --disable-whpx) printf "%s" -Dwhpx=disabled ;;
+    --enable-xen) printf "%s" -Dxen=enabled ;;
+    --disable-xen) printf "%s" -Dxen=disabled ;;
+    --enable-xen-pci-passthrough) printf "%s" -Dxen_pci_passthrough=enabled ;;
+    --disable-xen-pci-passthrough) printf "%s" -Dxen_pci_passthrough=disabled ;;
+    --enable-xkbcommon) printf "%s" -Dxkbcommon=enabled ;;
+    --disable-xkbcommon) printf "%s" -Dxkbcommon=disabled ;;
+    --enable-zstd) printf "%s" -Dzstd=enabled ;;
+    --disable-zstd) printf "%s" -Dzstd=disabled ;;
+    *) return 1 ;;
+  esac
+}
index e8cc63896d8799ea97fe593aa5815c183dee9030..532277f5a20e1a23155af730187f7759ae1faf68 100644 (file)
@@ -1,3 +1,5 @@
-if 'CONFIG_TRACE_SYSTEMTAP' in config_host
+if stap.found()
   install_data('qemu-trace-stap', install_dir: get_option('bindir'))
 endif
+
+test('xml-preprocess', files('xml-preprocess-test.py'), suite: ['unit'])
diff --git a/scripts/modinfo-collect.py b/scripts/modinfo-collect.py
new file mode 100644 (file)
index 0000000..4e7584d
--- /dev/null
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import json
+import shlex
+import subprocess
+
+def find_command(src, target, compile_commands):
+    for command in compile_commands:
+        if command['file'] != src:
+            continue
+        if target != '' and command['command'].find(target) == -1:
+            continue
+        return command['command']
+    return 'false'
+
+def process_command(src, command):
+    skip = False
+    out = []
+    for item in shlex.split(command):
+        if skip:
+            skip = False
+            continue
+        if item == '-MF' or item == '-MQ' or item == '-o':
+            skip = True
+            continue
+        if item == '-c':
+            skip = True
+            continue
+        out.append(item)
+    out.append('-DQEMU_MODINFO')
+    out.append('-E')
+    out.append(src)
+    return out
+
+def main(args):
+    target = ''
+    if args[0] == '--target':
+        args.pop(0)
+        target = args.pop(0)
+        print("MODINFO_DEBUG target %s" % target)
+        arch = target[:-8] # cut '-softmmu'
+        print("MODINFO_START arch \"%s\" MODINFO_END" % arch)
+    with open('compile_commands.json') as f:
+        compile_commands = json.load(f)
+    for src in args:
+        if not src.endswith('.c'):
+            print("MODINFO_DEBUG skip %s" % src)
+            continue
+        print("MODINFO_DEBUG src %s" % src)
+        command = find_command(src, target, compile_commands)
+        cmdline = process_command(src, command)
+        print("MODINFO_DEBUG cmd", cmdline)
+        result = subprocess.run(cmdline, stdout = subprocess.PIPE,
+                                universal_newlines = True)
+        if result.returncode != 0:
+            sys.exit(result.returncode)
+        for line in result.stdout.split('\n'):
+            if line.find('MODINFO') != -1:
+                print(line)
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/scripts/modinfo-generate.py b/scripts/modinfo-generate.py
new file mode 100644 (file)
index 0000000..b1538fc
--- /dev/null
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+
+def print_array(name, values):
+    if len(values) == 0:
+        return
+    list = ", ".join(values)
+    print("    .%s = ((const char*[]){ %s, NULL })," % (name, list))
+
+def parse_line(line):
+    kind = ""
+    data = ""
+    get_kind = False
+    get_data = False
+    for item in line.split():
+        if item == "MODINFO_START":
+            get_kind = True
+            continue
+        if item.startswith("MODINFO_END"):
+            get_data = False
+            continue
+        if get_kind:
+            kind = item
+            get_kind = False
+            get_data = True
+            continue
+        if get_data:
+            data += " " + item
+            continue
+    return (kind, data)
+
+def generate(name, lines, enabled):
+    arch = ""
+    objs = []
+    deps = []
+    opts = []
+    for line in lines:
+        if line.find("MODINFO_START") != -1:
+            (kind, data) = parse_line(line)
+            if kind == 'obj':
+                objs.append(data)
+            elif kind == 'dep':
+                deps.append(data)
+            elif kind == 'opts':
+                opts.append(data)
+            elif kind == 'arch':
+                arch = data;
+            elif kind == 'kconfig':
+                # don't add a module which dependency is not enabled
+                # in kconfig
+                if data.strip() not in enabled:
+                    print("    /* module {} isn't enabled in Kconfig. */"
+                          .format(data.strip()))
+                    print("/* },{ */")
+                    return None
+            else:
+                print("unknown:", kind)
+                exit(1)
+
+    print("    .name = \"%s\"," % name)
+    if arch != "":
+        print("    .arch = %s," % arch)
+    print_array("objs", objs)
+    print_array("deps", deps)
+    print_array("opts", opts)
+    print("},{")
+    return {dep.strip('" ') for dep in deps}
+
+def print_pre():
+    print("/* generated by scripts/modinfo-generate.py */")
+    print("#include \"qemu/osdep.h\"")
+    print("#include \"qemu/module.h\"")
+    print("const QemuModinfo qemu_modinfo[] = {{")
+
+def print_post():
+    print("    /* end of list */")
+    print("}};")
+
+def main(args):
+    if len(args) < 3 or args[0] != '--devices':
+        print('Expected: modinfo-generate.py --devices '
+              'config-device.mak [modinfo files]', file=sys.stderr)
+        exit(1)
+
+    # get all devices enabled in kconfig, from *-config-device.mak
+    enabled = set()
+    with open(args[1]) as file:
+        for line in file.readlines():
+            config = line.split('=')
+            if config[1].rstrip() == 'y':
+                enabled.add(config[0][7:]) # remove CONFIG_
+
+    deps = set()
+    modules = set()
+    print_pre()
+    for modinfo in args[2:]:
+        with open(modinfo) as f:
+            lines = f.readlines()
+        print("    /* %s */" % modinfo)
+        (basename, _) = os.path.splitext(modinfo)
+        moddeps = generate(basename, lines, enabled)
+        if moddeps is not None:
+            modules.add(basename)
+            deps.update(moddeps)
+    print_post()
+
+    error = False
+    for dep in deps.difference(modules):
+        print("Dependency {} cannot be satisfied".format(dep),
+              file=sys.stderr)
+        error = True
+
+    if error:
+        exit(1)
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
index 25ee6887cf324626b71a6d48d31263ad8cb4edd9..179dd54871824ffc08f9c0c16a8179e687fd01e9 100644 (file)
@@ -13,103 +13,88 @@ import sys
 
 class Suite(object):
     def __init__(self):
-        self.tests = list()
-        self.slow_tests = list()
-        self.executables = set()
+        self.deps = set()
+        self.speeds = ['quick']
+
+    def names(self, base):
+        return [base if speed == 'quick' else f'{base}-{speed}' for speed in self.speeds]
+
 
 print('''
 SPEED = quick
 
-# $1 = environment, $2 = test command, $3 = test name, $4 = dir
-.test-human-tap = $1 $(if $4,(cd $4 && $2),$2) < /dev/null | ./scripts/tap-driver.pl --test-name="$3" $(if $(V),,--show-failures-only)
-.test-human-exitcode = $1 $(PYTHON) scripts/test-driver.py $(if $4,-C$4) $(if $(V),--verbose) -- $2 < /dev/null
-.test-tap-tap = $1 $(if $4,(cd $4 && $2),$2) < /dev/null | sed "s/^[a-z][a-z]* [0-9]*/& $3/" || true
-.test-tap-exitcode = printf "%s\\n" 1..1 "`$1 $(if $4,(cd $4 && $2),$2) < /dev/null > /dev/null || echo "not "`ok 1 $3"
-.test.human-print = echo $(if $(V),'$1 $2','Running test $3') &&
-.test.env = MALLOC_PERTURB_=$${MALLOC_PERTURB_:-$$(( $${RANDOM:-0} % 255 + 1))}
+.speed.quick = $(foreach s,$(sort $(filter-out %-slow %-thorough, $1)), --suite $s)
+.speed.slow = $(foreach s,$(sort $(filter-out %-thorough, $1)), --suite $s)
+.speed.thorough = $(foreach s,$(sort $1), --suite $s)
 
-# $1 = test name, $2 = test target (human or tap)
-.test.run = $(call .test.$2-print,$(.test.env.$1),$(.test.cmd.$1),$(.test.name.$1)) $(call .test-$2-$(.test.driver.$1),$(.test.env.$1),$(.test.cmd.$1),$(.test.name.$1),$(.test.dir.$1))
+.mtestargs = --no-rebuild -t 0
+ifneq ($(SPEED), quick)
+.mtestargs += --setup $(SPEED)
+endif
+.mtestargs += $(subst -j,--num-processes , $(filter-out -j, $(lastword -j1 $(filter -j%, $(MAKEFLAGS)))))
 
-.test.output-format = human
-''')
+.check.mtestargs = $(MTESTARGS) $(.mtestargs) $(if $(V),--verbose,--print-errorlogs)
+.bench.mtestargs = $(MTESTARGS) $(.mtestargs) --benchmark --verbose''')
 
 introspect = json.load(sys.stdin)
-i = 0
 
 def process_tests(test, targets, suites):
-    global i
-    env = ' '.join(('%s=%s' % (shlex.quote(k), shlex.quote(v))
-                    for k, v in test['env'].items()))
     executable = test['cmd'][0]
     try:
         executable = os.path.relpath(executable)
     except:
         pass
-    if test['workdir'] is not None:
-        try:
-            test['cmd'][0] = os.path.relpath(executable, test['workdir'])
-        except:
-            test['cmd'][0] = executable
-    else:
-        test['cmd'][0] = executable
-    cmd = ' '.join((shlex.quote(x) for x in test['cmd']))
-    driver = test['protocol'] if 'protocol' in test else 'exitcode'
-
-    i += 1
-    if test['workdir'] is not None:
-        print('.test.dir.%d := %s' % (i, shlex.quote(test['workdir'])))
-
-    if 'depends' in test:
-        deps = (targets.get(x, []) for x in test['depends'])
-        deps = itertools.chain.from_iterable(deps)
-    else:
-        deps = ['all']
-
-    print('.test.name.%d := %s' % (i, test['name']))
-    print('.test.driver.%d := %s' % (i, driver))
-    print('.test.env.%d := $(.test.env) %s' % (i, env))
-    print('.test.cmd.%d := %s' % (i, cmd))
-    print('.test.deps.%d := %s' % (i, ' '.join(deps)))
-    print('.PHONY: run-test-%d' % (i,))
-    print('run-test-%d: $(.test.deps.%d)' % (i,i))
-    print('\t@$(call .test.run,%d,$(.test.output-format))' % (i,))
+
+    deps = (targets.get(x, []) for x in test['depends'])
+    deps = itertools.chain.from_iterable(deps)
+    deps = list(deps)
 
     test_suites = test['suite'] or ['default']
-    is_slow = any(s.endswith('-slow') for s in test_suites)
     for s in test_suites:
-        # The suite name in the introspection info is "PROJECT:SUITE"
-        s = s.split(':')[1]
+        # The suite name in the introspection info is "PROJECT" or "PROJECT:SUITE"
+        if ':' in s:
+            s = s.split(':')[1]
+            if s == 'slow' or s == 'thorough':
+                continue
         if s.endswith('-slow'):
             s = s[:-5]
-        if is_slow:
-            suites[s].slow_tests.append(i)
-        else:
-            suites[s].tests.append(i)
-        suites[s].executables.add(executable)
+            suites[s].speeds.append('slow')
+        if s.endswith('-thorough'):
+            s = s[:-9]
+            suites[s].speeds.append('thorough')
+        suites[s].deps.update(deps)
 
 def emit_prolog(suites, prefix):
-    all_tap = ' '.join(('%s-report-%s.tap' % (prefix, k) for k in suites.keys()))
-    print('.PHONY: %s %s-report.tap %s' % (prefix, prefix, all_tap))
-    print('%s: run-tests' % (prefix,))
-    print('%s-report.tap %s: %s-report%%.tap: all' % (prefix, all_tap, prefix))
-    print('''\t$(MAKE) .test.output-format=tap --quiet -Otarget V=1 %s$* | ./scripts/tap-merge.pl | tee "$@" \\
-              | ./scripts/tap-driver.pl $(if $(V),, --show-failures-only)''' % (prefix, ))
+    all_targets = ' '.join((f'{prefix}-{k}' for k in suites.keys()))
+    all_xml = ' '.join((f'{prefix}-report-{k}.junit.xml' for k in suites.keys()))
+    print()
+    print(f'all-{prefix}-targets = {all_targets}')
+    print(f'all-{prefix}-xml = {all_xml}')
+    print(f'.PHONY: {prefix} do-meson-{prefix} {prefix}-report.junit.xml $(all-{prefix}-targets) $(all-{prefix}-xml)')
+    print(f'ifeq ($(filter {prefix}, $(MAKECMDGOALS)),)')
+    print(f'.{prefix}.mtestargs += $(call .speed.$(SPEED), $(.{prefix}.mtest-suites))')
+    print(f'endif')
+    print(f'{prefix}-build: run-ninja')
+    print(f'{prefix} $(all-{prefix}-targets): do-meson-{prefix}')
+    print(f'do-meson-{prefix}: run-ninja; $(if $(MAKE.n),,+)$(MESON) test $(.{prefix}.mtestargs)')
+    print(f'{prefix}-report.junit.xml $(all-{prefix}-xml): {prefix}-report%.junit.xml: run-ninja')
+    print(f'\t$(MAKE) {prefix}$* MTESTARGS="$(MTESTARGS) --logbase {prefix}-report$*" && ln -f meson-logs/$@ .')
+
+def emit_suite_deps(name, suite, prefix):
+    deps = ' '.join(suite.deps)
+    targets = [f'{prefix}-{name}', f'{prefix}-report-{name}.junit.xml', f'{prefix}', f'{prefix}-report.junit.xml',
+               f'{prefix}-build']
+    print()
+    print(f'.{prefix}-{name}.deps = {deps}')
+    for t in targets:
+        print(f'.ninja-goals.{t} += $(.{prefix}-{name}.deps)')
 
 def emit_suite(name, suite, prefix):
-    executables = ' '.join(suite.executables)
-    slow_test_numbers = ' '.join((str(x) for x in suite.slow_tests))
-    test_numbers = ' '.join((str(x) for x in suite.tests))
-    target = '%s-%s' % (prefix, name)
-    print('.test.quick.%s := %s' % (target, test_numbers))
-    print('.test.slow.%s := $(.test.quick.%s) %s' % (target, target, slow_test_numbers))
-    print('%s-build: %s' % (prefix, executables))
-    print('.PHONY: %s' % (target, ))
-    print('.PHONY: %s-report-%s.tap' % (prefix, name))
-    print('%s: run-tests' % (target, ))
-    print('ifneq ($(filter %s %s, $(MAKECMDGOALS)),)' % (target, prefix))
-    print('.tests += $(.test.$(SPEED).%s)' % (target, ))
-    print('endif')
+    emit_suite_deps(name, suite, prefix)
+    targets = f'{prefix}-{name} {prefix}-report-{name}.junit.xml {prefix} {prefix}-report.junit.xml'
+    print(f'ifneq ($(filter {targets}, $(MAKECMDGOALS)),)')
+    print(f'.{prefix}.mtest-suites += ' + ' '.join(suite.names(name)))
+    print(f'endif')
 
 targets = {t['id']: [os.path.relpath(f) for f in t['filename']]
            for t in introspect['targets']}
@@ -127,5 +112,3 @@ for test in introspect['benchmarks']:
 emit_prolog(benchsuites, 'bench')
 for name, suite in benchsuites.items():
     emit_suite(name, suite, 'bench')
-
-print('run-tests: $(patsubst %, run-test-%, $(.tests))')
index 5135a0583167ea8a386a97f160e5350611e48d7c..03ed7608a2cae33bdcf9bc2122274adfaba6ef70 100644 (file)
@@ -18,25 +18,53 @@ def signcode(path):
         return
     subprocess.run([cmd, path])
 
+def find_deps(exe_or_dll, search_path, analyzed_deps):
+    deps = [exe_or_dll]
+    output = subprocess.check_output(["objdump", "-p", exe_or_dll], text=True)
+    output = output.split("\n")
+    for line in output:
+        if not line.startswith("\tDLL Name: "):
+            continue
+
+        dep = line.split("DLL Name: ")[1].strip()
+        if dep in analyzed_deps:
+            continue
+
+        dll = os.path.join(search_path, dep)
+        if not os.path.exists(dll):
+            # assume it's a Windows provided dll, skip it
+            continue
+
+        analyzed_deps.add(dep)
+        # locate the dll dependencies recursively
+        rdeps = find_deps(dll, search_path, analyzed_deps)
+        deps.extend(rdeps)
+
+    return deps
 
 def main():
     parser = argparse.ArgumentParser(description="QEMU NSIS build helper.")
     parser.add_argument("outfile")
     parser.add_argument("prefix")
     parser.add_argument("srcdir")
+    parser.add_argument("dlldir")
     parser.add_argument("cpu")
     parser.add_argument("nsisargs", nargs="*")
     args = parser.parse_args()
 
+    # canonicalize the Windows native prefix path
+    prefix = os.path.splitdrive(args.prefix)[1]
     destdir = tempfile.mkdtemp()
     try:
-        subprocess.run(["make", "install", "DESTDIR=" + destdir + os.path.sep])
+        subprocess.run(["make", "install", "DESTDIR=" + destdir])
         with open(
-            os.path.join(destdir + args.prefix, "system-emulations.nsh"), "w"
-        ) as nsh:
-            for exe in glob.glob(
-                os.path.join(destdir + args.prefix, "qemu-system-*.exe")
-            ):
+            os.path.join(destdir + prefix, "system-emulations.nsh"), "w"
+        ) as nsh, open(
+            os.path.join(destdir + prefix, "system-mui-text.nsh"), "w"
+        ) as muinsh:
+            for exe in sorted(glob.glob(
+                os.path.join(destdir + prefix, "qemu-system-*.exe")
+            )):
                 exe = os.path.basename(exe)
                 arch = exe[12:-4]
                 nsh.write(
@@ -49,23 +77,46 @@ def main():
                         arch, exe
                     )
                 )
+                if arch.endswith('w'):
+                    desc = arch[:-1] + " emulation (GUI)."
+                else:
+                    desc = arch + " emulation."
 
-        for exe in glob.glob(os.path.join(destdir + args.prefix, "*.exe")):
+                muinsh.write(
+                    """
+                !insertmacro MUI_DESCRIPTION_TEXT ${{Section_{0}}} "{1}"
+                """.format(arch, desc))
+
+        search_path = args.dlldir
+        print("Searching '%s' for the dependent dlls ..." % search_path)
+        dlldir = os.path.join(destdir + prefix, "dll")
+        os.mkdir(dlldir)
+
+        for exe in glob.glob(os.path.join(destdir + prefix, "*.exe")):
             signcode(exe)
 
+            # find all dll dependencies
+            deps = set(find_deps(exe, search_path, set()))
+            deps.remove(exe)
+
+            # copy all dlls to the DLLDIR
+            for dep in deps:
+                dllfile = os.path.join(dlldir, os.path.basename(dep))
+                if (os.path.exists(dllfile)):
+                    continue
+                print("Copying '%s' to '%s'" % (dep, dllfile))
+                shutil.copy(dep, dllfile)
+
         makensis = [
             "makensis",
             "-V2",
             "-NOCD",
             "-DSRCDIR=" + args.srcdir,
-            "-DBINDIR=" + destdir + args.prefix,
+            "-DBINDIR=" + destdir + prefix,
         ]
-        dlldir = "w32"
         if args.cpu == "x86_64":
-            dlldir = "w64"
             makensis += ["-DW64"]
-        if os.path.exists(os.path.join(args.srcdir, "dll")):
-            makensis += ["-DDLLDIR={0}/dll/{1}".format(args.srcdir, dlldir)]
+        makensis += ["-DDLLDIR=" + dlldir]
 
         makensis += ["-DOUTFILE=" + args.outfile] + args.nsisargs
         subprocess.run(makensis)
index c1af43fded15b0e51c03dd179f823af4c9612374..5238f83343582a5cee9a87d7604aaa3707a515c4 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh -e
+#!/bin/bash -e
 #
 # OSS-Fuzz build script. See:
 # https://google.github.io/oss-fuzz/getting-started/new-project-guide/#buildsh
@@ -43,10 +43,10 @@ EXTRA_CFLAGS="$CFLAGS -U __OPTIMIZE__"
 if ! { [ -e "./COPYING" ] &&
    [ -e "./MAINTAINERS" ] &&
    [ -e "./Makefile" ] &&
-   [ -e "./docs" ] &&
+   [ -d "./docs" ] &&
    [ -e "./VERSION" ] &&
-   [ -e "./linux-user" ] &&
-   [ -e "./softmmu" ];} ; then
+   [ -d "./linux-user" ] &&
+   [ -d "./system" ];} ; then
     fatal "Please run the script from the top of the QEMU tree"
 fi
 
@@ -64,7 +64,7 @@ mkdir -p "$DEST_DIR/lib/"  # Copy the shared libraries here
 
 # Build once to get the list of dynamic lib paths, and copy them over
 ../configure --disable-werror --cc="$CC" --cxx="$CXX" --enable-fuzzing \
-    --prefix="$DEST_DIR" --bindir="$DEST_DIR" --datadir="$DEST_DIR/data/" \
+    --prefix="/opt/qemu-oss-fuzz" \
     --extra-cflags="$EXTRA_CFLAGS" --target-list="i386-softmmu"
 
 if ! make "-j$(nproc)" qemu-fuzz-i386; then
@@ -73,22 +73,26 @@ if ! make "-j$(nproc)" qemu-fuzz-i386; then
           "\nFor example: CC=clang CXX=clang++ $0"
 fi
 
-for i in $(ldd ./qemu-fuzz-i386 | cut -f3 -d' '); do
-    cp "$i" "$DEST_DIR/lib/"
-done
-rm qemu-fuzz-i386
-
-# Build a second time to build the final binary with correct rpath
-../configure --disable-werror --cc="$CC" --cxx="$CXX" --enable-fuzzing \
-    --prefix="$DEST_DIR" --bindir="$DEST_DIR" --datadir="$DEST_DIR/data/" \
-    --extra-cflags="$EXTRA_CFLAGS" --extra-ldflags="-Wl,-rpath,\$ORIGIN/lib" \
-    --target-list="i386-softmmu"
-make "-j$(nproc)" qemu-fuzz-i386 V=1
+if [ "$GITLAB_CI" != "true" ]; then
+    for i in $(ldd ./qemu-fuzz-i386 | cut -f3 -d' '); do
+        cp "$i" "$DEST_DIR/lib/"
+    done
+    rm qemu-fuzz-i386
+
+    # Build a second time to build the final binary with correct rpath
+    ../configure --disable-werror --cc="$CC" --cxx="$CXX" --enable-fuzzing \
+        --prefix="/opt/qemu-oss-fuzz" \
+        --extra-cflags="$EXTRA_CFLAGS" --extra-ldflags="-Wl,-rpath,\$ORIGIN/lib" \
+        --target-list="i386-softmmu"
+    make "-j$(nproc)" qemu-fuzz-i386 V=1
+fi
 
-# Copy over the datadir
-cp  -r ../pc-bios/ "$DEST_DIR/pc-bios"
+# Place data files in the preinstall tree
+make install DESTDIR=$DEST_DIR/qemu-bundle
+rm -rf $DEST_DIR/qemu-bundle/opt/qemu-oss-fuzz/bin
+rm -rf $DEST_DIR/qemu-bundle/opt/qemu-oss-fuzz/libexec
 
-targets=$(./qemu-fuzz-i386 | awk '$1 ~ /\*/  {print $2}')
+targets=$(./qemu-fuzz-i386 | grep generic-fuzz | awk '$1 ~ /\*/  {print $2}')
 base_copy="$DEST_DIR/qemu-fuzz-i386-target-$(echo "$targets" | head -n 1)"
 
 cp "./qemu-fuzz-i386" "$base_copy"
@@ -103,7 +107,7 @@ do
     # to be configured. We have some generic-fuzz-{pc-q35, floppy, ...} targets
     # that are thin wrappers around this target that set the required
     # environment variables according to predefined configs.
-    if [ "$target" != "generic-fuzz" ]; then
+    if [[ $target == "generic-fuzz-"* ]]; then
         ln  $base_copy \
             "$DEST_DIR/qemu-fuzz-i386-target-$target"
     fi
diff --git a/scripts/oss-fuzz/instrumentation-filter-template b/scripts/oss-fuzz/instrumentation-filter-template
new file mode 100644 (file)
index 0000000..76d2b61
--- /dev/null
@@ -0,0 +1,15 @@
+# Code that we actually want the fuzzer to target
+# See: https://clang.llvm.org/docs/SanitizerCoverage.html#disabling-instrumentation-without-source-modification
+#
+src:*/hw/*
+src:*/include/hw/*
+src:*/slirp/*
+src:*/net/*
+
+# We don't care about coverage over fuzzer-specific code, however we should
+# instrument the fuzzer entry-point so libFuzzer always sees at least some
+# coverage - otherwise it will exit after the first input
+src:*/tests/qtest/fuzz/fuzz.c
+
+# Enable instrumentation for all functions in those files
+fun:*
diff --git a/scripts/oss-fuzz/lsan_suppressions.txt b/scripts/oss-fuzz/lsan_suppressions.txt
new file mode 100644 (file)
index 0000000..7d90c28
--- /dev/null
@@ -0,0 +1,5 @@
+# The tcmalloc on Fedora37 confuses things
+leak:/lib64/libtcmalloc_minimal.so.4
+
+# libxkbcommon also leaks in qemu-keymap
+leak:/lib64/libxkbcommon.so.0
old mode 100755 (executable)
new mode 100644 (file)
index 5e405a0..d1f3990
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 
 """
-This takes a crashing qtest trace and tries to remove superflous operations
+This takes a crashing qtest trace and tries to remove superfluous operations
 """
 
 import sys
@@ -16,6 +16,10 @@ QEMU_PATH = None
 TIMEOUT = 5
 CRASH_TOKEN = None
 
+# Minimization levels
+M1 = False # try removing IO commands iteratively
+M2 = False # try setting bits in operand of write/out to zero
+
 write_suffix_lookup = {"b": (1, "B"),
                        "w": (2, "H"),
                        "l": (4, "L"),
@@ -23,14 +27,30 @@ write_suffix_lookup = {"b": (1, "B"),
 
 def usage():
     sys.exit("""\
-Usage: QEMU_PATH="/path/to/qemu" QEMU_ARGS="args" {} input_trace output_trace
+Usage:
+
+QEMU_PATH="/path/to/qemu" QEMU_ARGS="args" {} [Options] input_trace output_trace
+
 By default, will try to use the second-to-last line in the output to identify
 whether the crash occred. Optionally, manually set a string that idenitifes the
 crash by setting CRASH_TOKEN=
+
+Options:
+
+-M1: enable a loop around the remove minimizer, which may help decrease some
+     timing dependent instructions. Off by default.
+-M2: try setting bits in operand of write/out to zero. Off by default.
+
 """.format((sys.argv[0])))
 
+deduplication_note = """\n\
+Note: While trimming the input, sometimes the mutated trace triggers a different
+type crash but indicates the same bug. Under this situation, our minimizer is
+incapable of recognizing and stopped from removing it. In the future, we may
+use a more sophisticated crash case deduplication method.
+\n"""
+
 def check_if_trace_crashes(trace, path):
-    global CRASH_TOKEN
     with open(path, "w") as tracefile:
         tracefile.write("".join(trace))
 
@@ -41,51 +61,99 @@ def check_if_trace_crashes(trace, path):
                            trace_path=path),
                           shell=True,
                           stdin=subprocess.PIPE,
-                          stdout=subprocess.PIPE)
-    stdo = rc.communicate()[0]
-    output = stdo.decode('unicode_escape')
-    if rc.returncode == 137:    # Timed Out
-        return False
-    if len(output.splitlines()) < 2:
-        return False
-
+                          stdout=subprocess.PIPE,
+                          encoding="utf-8")
+    global CRASH_TOKEN
     if CRASH_TOKEN is None:
-        CRASH_TOKEN = output.splitlines()[-2]
+        try:
+            outs, _ = rc.communicate(timeout=5)
+            CRASH_TOKEN = " ".join(outs.splitlines()[-2].split()[0:3])
+        except subprocess.TimeoutExpired:
+            print("subprocess.TimeoutExpired")
+            return False
+        print("Identifying Crashes by this string: {}".format(CRASH_TOKEN))
+        global deduplication_note
+        print(deduplication_note)
+        return True
 
-    return CRASH_TOKEN in output
+    for line in iter(rc.stdout.readline, ""):
+        if "CLOSED" in line:
+            return False
+        if CRASH_TOKEN in line:
+            return True
 
+    print("\nWarning:")
+    print("  There is no 'CLOSED'or CRASH_TOKEN in the stdout of subprocess.")
+    print("  Usually this indicates a different type of crash.\n")
+    return False
 
-def minimize_trace(inpath, outpath):
-    global TIMEOUT
-    with open(inpath) as f:
-        trace = f.readlines()
-    start = time.time()
-    if not check_if_trace_crashes(trace, outpath):
-        sys.exit("The input qtest trace didn't cause a crash...")
-    end = time.time()
-    print("Crashed in {} seconds".format(end-start))
-    TIMEOUT = (end-start)*5
-    print("Setting the timeout for {} seconds".format(TIMEOUT))
-    print("Identifying Crashes by this string: {}".format(CRASH_TOKEN))
 
+# If previous write commands write the same length of data at the same
+# interval, we view it as a hint.
+def split_write_hint(newtrace, i):
+    HINT_LEN = 3 # > 2
+    if i <=(HINT_LEN-1):
+        return None
+
+    #find previous continuous write traces
+    k = 0
+    l = i-1
+    writes = []
+    while (k != HINT_LEN and l >= 0):
+        if newtrace[l].startswith("write "):
+            writes.append(newtrace[l])
+            k += 1
+            l -= 1
+        elif newtrace[l] == "":
+            l -= 1
+        else:
+            return None
+    if k != HINT_LEN:
+        return None
+
+    length = int(writes[0].split()[2], 16)
+    for j in range(1, HINT_LEN):
+        if length != int(writes[j].split()[2], 16):
+            return None
+
+    step = int(writes[0].split()[1], 16) - int(writes[1].split()[1], 16)
+    for j in range(1, HINT_LEN-1):
+        if step != int(writes[j].split()[1], 16) - \
+            int(writes[j+1].split()[1], 16):
+            return None
+
+    return (int(writes[0].split()[1], 16)+step, length)
+
+
+def remove_lines(newtrace, outpath):
+    remove_step = 1
     i = 0
-    newtrace = trace[:]
-    # For each line
     while i < len(newtrace):
-        # 1.) Try to remove it completely and reproduce the crash. If it works,
-        # we're done.
-        prior = newtrace[i]
-        print("Trying to remove {}".format(newtrace[i]))
-        # Try to remove the line completely
-        newtrace[i] = ""
+        # 1.) Try to remove lines completely and reproduce the crash.
+        # If it works, we're done.
+        if (i+remove_step) >= len(newtrace):
+            remove_step = 1
+        prior = newtrace[i:i+remove_step]
+        for j in range(i, i+remove_step):
+            newtrace[j] = ""
+        print("Removing {lines} ...\n".format(lines=prior))
         if check_if_trace_crashes(newtrace, outpath):
-            i += 1
+            i += remove_step
+            # Double the number of lines to remove for next round
+            remove_step *= 2
             continue
-        newtrace[i] = prior
+        # Failed to remove multiple IOs, fast recovery
+        if remove_step > 1:
+            for j in range(i, i+remove_step):
+                newtrace[j] = prior[j-i]
+            remove_step = 1
+            continue
+        newtrace[i] = prior[0] # remove_step = 1
 
         # 2.) Try to replace write{bwlq} commands with a write addr, len
         # command. Since this can require swapping endianness, try both LE and
         # BE options. We do this, so we can "trim" the writes in (3)
+
         if (newtrace[i].startswith("write") and not
             newtrace[i].startswith("write ")):
             suffix = newtrace[i].split()[0][-1]
@@ -103,22 +171,46 @@ def minimize_trace(inpath, outpath):
                 if(check_if_trace_crashes(newtrace, outpath)):
                     break
             else:
-                newtrace[i] = prior
+                newtrace[i] = prior[0]
 
         # 3.) If it is a qtest write command: write addr len data, try to split
-        # it into two separate write commands. If splitting the write down the
-        # middle does not work, try to move the pivot "left" and retry, until
-        # there is no space left. The idea is to prune unneccessary bytes from
-        # long writes, while accommodating arbitrary MemoryRegion access sizes
-        # and alignments.
+        # it into two separate write commands. If splitting the data operand
+        # from length/2^n bytes to the left does not work, try to move the pivot
+        # to the right side, then add one to n, until length/2^n == 0. The idea
+        # is to prune unnecessary bytes from long writes, while accommodating
+        # arbitrary MemoryRegion access sizes and alignments.
+
+        # This algorithm will fail under some rare situations.
+        # e.g., xxxxxxxxxuxxxxxx (u is the unnecessary byte)
+
         if newtrace[i].startswith("write "):
             addr = int(newtrace[i].split()[1], 16)
             length = int(newtrace[i].split()[2], 16)
             data = newtrace[i].split()[3][2:]
             if length > 1:
+
+                # Can we get a hint from previous writes?
+                hint = split_write_hint(newtrace, i)
+                if hint is not None:
+                    hint_addr = hint[0]
+                    hint_len = hint[1]
+                    if hint_addr >= addr and hint_addr+hint_len <= addr+length:
+                        newtrace[i] = "write {addr} {size} 0x{data}\n".format(
+                            addr=hex(hint_addr),
+                            size=hex(hint_len),
+                            data=data[(hint_addr-addr)*2:\
+                                (hint_addr-addr)*2+hint_len*2])
+                        if check_if_trace_crashes(newtrace, outpath):
+                            # next round
+                            i += 1
+                            continue
+                        newtrace[i] = prior[0]
+
+                # Try splitting it using a binary approach
                 leftlength = int(length/2)
                 rightlength = length - leftlength
                 newtrace.insert(i+1, "")
+                power = 1
                 while leftlength > 0:
                     newtrace[i] = "write {addr} {size} 0x{data}\n".format(
                             addr=hex(addr),
@@ -130,22 +222,96 @@ def minimize_trace(inpath, outpath):
                             data=data[leftlength*2:])
                     if check_if_trace_crashes(newtrace, outpath):
                         break
-                    else:
-                        leftlength -= 1
-                        rightlength += 1
+                    # move the pivot to right side
+                    if leftlength < rightlength:
+                        rightlength, leftlength = leftlength, rightlength
+                        continue
+                    power += 1
+                    leftlength = int(length/pow(2, power))
+                    rightlength = length - leftlength
                 if check_if_trace_crashes(newtrace, outpath):
                     i -= 1
                 else:
-                    newtrace[i] = prior
+                    newtrace[i] = prior[0]
                     del newtrace[i+1]
         i += 1
-    check_if_trace_crashes(newtrace, outpath)
+
+
+def clear_bits(newtrace, outpath):
+    # try setting bits in operands of out/write to zero
+    i = 0
+    while i < len(newtrace):
+        if (not newtrace[i].startswith("write ") and not
+           newtrace[i].startswith("out")):
+           i += 1
+           continue
+        # write ADDR SIZE DATA
+        # outx ADDR VALUE
+        print("\nzero setting bits: {}".format(newtrace[i]))
+
+        prefix = " ".join(newtrace[i].split()[:-1])
+        data = newtrace[i].split()[-1]
+        data_bin = bin(int(data, 16))
+        data_bin_list = list(data_bin)
+
+        for j in range(2, len(data_bin_list)):
+            prior = newtrace[i]
+            if (data_bin_list[j] == '1'):
+                data_bin_list[j] = '0'
+                data_try = hex(int("".join(data_bin_list), 2))
+                # It seems qtest only accepts padded hex-values.
+                if len(data_try) % 2 == 1:
+                    data_try = data_try[:2] + "0" + data_try[2:]
+
+                newtrace[i] = "{prefix} {data_try}\n".format(
+                        prefix=prefix,
+                        data_try=data_try)
+
+                if not check_if_trace_crashes(newtrace, outpath):
+                    data_bin_list[j] = '1'
+                    newtrace[i] = prior
+        i += 1
+
+
+def minimize_trace(inpath, outpath):
+    global TIMEOUT
+    with open(inpath) as f:
+        trace = f.readlines()
+    start = time.time()
+    if not check_if_trace_crashes(trace, outpath):
+        sys.exit("The input qtest trace didn't cause a crash...")
+    end = time.time()
+    print("Crashed in {} seconds".format(end-start))
+    TIMEOUT = (end-start)*5
+    print("Setting the timeout for {} seconds".format(TIMEOUT))
+
+    newtrace = trace[:]
+    global M1, M2
+
+    # remove lines
+    old_len = len(newtrace) + 1
+    while(old_len > len(newtrace)):
+        old_len = len(newtrace)
+        print("trace length = ", old_len)
+        remove_lines(newtrace, outpath)
+        if not M1 and not M2:
+            break
+        newtrace = list(filter(lambda s: s != "", newtrace))
+    assert(check_if_trace_crashes(newtrace, outpath))
+
+    # set bits to zero
+    if M2:
+        clear_bits(newtrace, outpath)
+    assert(check_if_trace_crashes(newtrace, outpath))
 
 
 if __name__ == '__main__':
     if len(sys.argv) < 3:
         usage()
-
+    if "-M1" in sys.argv:
+        M1 = True
+    if "-M2" in sys.argv:
+        M2 = True
     QEMU_PATH = os.getenv("QEMU_PATH")
     QEMU_ARGS = os.getenv("QEMU_ARGS")
     if QEMU_PATH is None or QEMU_ARGS is None:
@@ -154,4 +320,4 @@ if __name__ == '__main__':
     #     QEMU_ARGS += " -accel qtest"
     CRASH_TOKEN = os.getenv("CRASH_TOKEN")
     QEMU_ARGS += " -qtest stdio -monitor none -serial none "
-    minimize_trace(sys.argv[1], sys.argv[2])
+    minimize_trace(sys.argv[-2], sys.argv[-1])
diff --git a/scripts/oss-fuzz/output_reproducer.py b/scripts/oss-fuzz/output_reproducer.py
new file mode 100644 (file)
index 0000000..e8ef76b
--- /dev/null
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+Convert plain qtest traces to C or Bash reproducers
+
+Use this to help build bug-reports or create in-tree reproducers for bugs.
+Note: This will not format C code for you. Pipe the output through
+clang-format -style="{BasedOnStyle: llvm, IndentWidth: 4, ColumnLimit: 90}"
+or similar
+"""
+
+import sys
+import os
+import argparse
+import textwrap
+from datetime import date
+
+__author__     = "Alexander Bulekov <alxndr@bu.edu>"
+__copyright__  = "Copyright (C) 2021, Red Hat, Inc."
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Alexander Bulekov"
+__email__      = "alxndr@bu.edu"
+
+
+def c_header(owner):
+    return """/*
+ * Autogenerated Fuzzer Test Case
+ *
+ * Copyright (c) {date} {owner}
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "libqtest.h"
+
+    """.format(date=date.today().year, owner=owner)
+
+def c_comment(s):
+    """ Return a multi-line C comment. Assume the text is already wrapped """
+    return "/*\n * " + "\n * ".join(s.splitlines()) + "\n*/"
+
+def print_c_function(s):
+    print("/* ")
+    for l in s.splitlines():
+        print(" * {}".format(l))
+
+def bash_reproducer(path, args, trace):
+    result = '\\\n'.join(textwrap.wrap("cat << EOF | {} {}".format(path, args),
+                                       72, break_on_hyphens=False,
+                                       drop_whitespace=False))
+    for l in trace.splitlines():
+        result += "\n" + '\\\n'.join(textwrap.wrap(l,72,drop_whitespace=False))
+    result += "\nEOF"
+    return result
+
+def c_reproducer(name, args, trace):
+    result = []
+    result.append("""static void {}(void)\n{{""".format(name))
+
+    # libqtest will add its own qtest args, so get rid of them
+    args = args.replace("-accel qtest","")
+    args = args.replace(",accel=qtest","")
+    args = args.replace("-machine accel=qtest","")
+    args = args.replace("-qtest stdio","")
+    result.append("""QTestState *s = qtest_init("{}");""".format(args))
+    for l in trace.splitlines():
+        param = l.split()
+        cmd = param[0]
+        if cmd == "write":
+            buf = param[3][2:] #Get the 0x... buffer and trim the "0x"
+            assert len(buf)%2 == 0
+            bufbytes = [buf[i:i+2] for i in range(0, len(buf), 2)]
+            bufstring = '\\x'+'\\x'.join(bufbytes)
+            addr = param[1]
+            size = param[2]
+            result.append("""qtest_bufwrite(s, {}, "{}", {});""".format(
+                          addr, bufstring, size))
+        elif cmd.startswith("in") or cmd.startswith("read"):
+            result.append("qtest_{}(s, {});".format(
+                          cmd, param[1]))
+        elif cmd.startswith("out") or cmd.startswith("write"):
+            result.append("qtest_{}(s, {}, {});".format(
+                          cmd, param[1], param[2]))
+        elif cmd == "clock_step":
+            if len(param) ==1:
+                result.append("qtest_clock_step_next(s);")
+            else:
+                result.append("qtest_clock_step(s, {});".format(param[1]))
+    result.append("qtest_quit(s);\n}")
+    return "\n".join(result)
+
+def c_main(name, arch):
+    return """int main(int argc, char **argv)
+{{
+    const char *arch = qtest_get_arch();
+
+    g_test_init(&argc, &argv, NULL);
+
+   if (strcmp(arch, "{arch}") == 0) {{
+        qtest_add_func("fuzz/{name}",{name});
+   }}
+
+   return g_test_run();
+}}""".format(name=name, arch=arch)
+
+def main():
+    parser = argparse.ArgumentParser()
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument("-bash", help="Only output a copy-pastable bash command",
+                        action="store_true")
+    group.add_argument("-c", help="Only output a c function",
+                        action="store_true")
+    parser.add_argument('-owner', help="If generating complete C source code, \
+                        this specifies the Copyright owner",
+                        nargs='?', default="<name of author>")
+    parser.add_argument("-no_comment", help="Don't include a bash reproducer \
+                        as a comment in the C reproducers",
+                        action="store_true")
+    parser.add_argument('-name', help="The name of the c function",
+                        nargs='?', default="test_fuzz")
+    parser.add_argument('input_trace', help="input QTest command sequence \
+                        (stdin by default)",
+                        nargs='?', type=argparse.FileType('r'),
+                        default=sys.stdin)
+    args = parser.parse_args()
+
+    qemu_path = os.getenv("QEMU_PATH")
+    qemu_args = os.getenv("QEMU_ARGS")
+    if not qemu_args or not qemu_path:
+        print("Please set QEMU_PATH and QEMU_ARGS environment variables")
+        sys.exit(1)
+
+    bash_args = qemu_args
+    if " -qtest stdio" not in  qemu_args:
+        bash_args += " -qtest stdio"
+
+    arch = qemu_path.split("-")[-1]
+    trace = args.input_trace.read().strip()
+
+    if args.bash :
+        print(bash_reproducer(qemu_path, bash_args, trace))
+    else:
+        output = ""
+        if not args.c:
+            output += c_header(args.owner) + "\n"
+        if not args.no_comment:
+            output += c_comment(bash_reproducer(qemu_path, bash_args, trace))
+        output += c_reproducer(args.name, qemu_args, trace)
+        if not args.c:
+            output += c_main(args.name, arch)
+        print(output)
+
+
+if __name__ == '__main__':
+    main()
old mode 100755 (executable)
new mode 100644 (file)
index 890e1de..b154a25
@@ -14,7 +14,7 @@ QEMU_FUZZ_ARGS="-machine q35,accel=qtest" QEMU_FUZZ_OBJECTS="*" \
         /path/to/crash 2> qtest_log_output
 scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py qtest_log_output > qtest_trace
 ./i386-softmmu/qemu-fuzz-i386 -machine q35,accel=qtest \
-        -qtest stdin < qtest_trace
+        -qtest stdio < qtest_trace
 
 ### Details ###
 
old mode 100755 (executable)
new mode 100644 (file)
old mode 100755 (executable)
new mode 100644 (file)
index 67c5919..f3f05fc
@@ -4,7 +4,7 @@
 #  Syntax:
 #  topN_callgrind.py [-h] [-n] <number of displayed top functions>  -- \
 #           <qemu executable> [<qemu executable options>] \
-#           <target executable> [<target execurable options>]
+#           <target executable> [<target executable options>]
 #
 #  [-h] - Print the script arguments help message.
 #  [-n] - Specify the number of top functions to print.
old mode 100755 (executable)
new mode 100644 (file)
index 07be195..7b19e6a
@@ -4,7 +4,7 @@
 #  Syntax:
 #  topN_perf.py [-h] [-n] <number of displayed top functions>  -- \
 #           <qemu executable> [<qemu executable options>] \
-#           <target executable> [<target execurable options>]
+#           <target executable> [<target executable options>]
 #
 #  [-h] - Print the script arguments help message.
 #  [-n] - Specify the number of top functions to print.
diff --git a/scripts/probe-gdb-support.py b/scripts/probe-gdb-support.py
new file mode 100644 (file)
index 0000000..5755255
--- /dev/null
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# coding: utf-8
+#
+# Probe gdb for supported architectures.
+#
+# This is required to support testing of the gdbstub as its hard to
+# handle errors gracefully during the test. Instead this script when
+# passed a GDB binary will probe its architecture support and return a
+# string of supported arches, stripped of guff.
+#
+# Copyright 2023 Linaro Ltd
+#
+# Author: Alex Bennée <alex.bennee@linaro.org>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import argparse
+import re
+from subprocess import check_output, STDOUT
+
+# mappings from gdb arch to QEMU target
+mappings = {
+    "alpha" : "alpha",
+    "aarch64" : ["aarch64", "aarch64_be"],
+    "armv7": "arm",
+    "armv8-a" : ["aarch64", "aarch64_be"],
+    "avr" : "avr",
+    "cris" : "cris",
+    # no hexagon in upstream gdb
+    "hppa1.0" : "hppa",
+    "i386" : "i386",
+    "i386:x86-64" : "x86_64",
+    "Loongarch64" : "loongarch64",
+    "m68k" : "m68k",
+    "MicroBlaze" : "microblaze",
+    "mips:isa64" : ["mips64", "mips64el"],
+    "nios2" : "nios2",
+    "or1k" : "or1k",
+    "powerpc:common" : "ppc",
+    "powerpc:common64" : ["ppc64", "ppc64le"],
+    "riscv:rv32" : "riscv32",
+    "riscv:rv64" : "riscv64",
+    "s390:64-bit" : "s390x",
+    "sh4" : ["sh4", "sh4eb"],
+    "sparc": "sparc",
+    "sparc:v8plus": "sparc32plus",
+    "sparc:v9a" : "sparc64",
+    # no tricore in upstream gdb
+    "xtensa" : ["xtensa", "xtensaeb"]
+}
+
+def do_probe(gdb):
+    gdb_out = check_output([gdb,
+                            "-ex", "set architecture",
+                            "-ex", "quit"], stderr=STDOUT)
+
+    m = re.search(r"Valid arguments are (.*)",
+                  gdb_out.decode("utf-8"))
+
+    valid_arches = set()
+
+    if m.group(1):
+        for arch in m.group(1).split(", "):
+            if arch in mappings:
+                mapping = mappings[arch]
+                if isinstance(mapping, str):
+                    valid_arches.add(mapping)
+                else:
+                    for entry in mapping:
+                        valid_arches.add(entry)
+
+    return valid_arches
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description='Probe GDB Architectures')
+    parser.add_argument('gdb', help='Path to GDB binary.')
+
+    args = parser.parse_args()
+
+    supported = do_probe(args.gdb)
+
+    print(" ".join(supported))
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/python_qmp_updater.py b/scripts/python_qmp_updater.py
new file mode 100644 (file)
index 0000000..494a169
--- /dev/null
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+#
+# Intended usage:
+#
+# git grep -l '\.qmp(' | xargs ./scripts/python_qmp_updater.py
+#
+
+import re
+import sys
+from typing import Optional
+
+start_reg = re.compile(r'^(?P<padding> *)(?P<res>\w+) = (?P<vm>.*).qmp\(',
+                       flags=re.MULTILINE)
+
+success_reg_templ = re.sub('\n *', '', r"""
+    (\n*{padding}(?P<comment>\#.*$))?
+    \n*{padding}
+    (
+        self.assert_qmp\({res},\ 'return',\ {{}}\)
+    |
+        assert\ {res}\['return'\]\ ==\ {{}}
+    |
+        assert\ {res}\ ==\ {{'return':\ {{}}}}
+    |
+        self.assertEqual\({res}\['return'\],\ {{}}\)
+    )""")
+
+some_check_templ = re.sub('\n *', '', r"""
+    (\n*{padding}(?P<comment>\#.*$))?
+    \s*self.assert_qmp\({res},""")
+
+
+def tmatch(template: str, text: str,
+           padding: str, res: str) -> Optional[re.Match[str]]:
+    return re.match(template.format(padding=padding, res=res), text,
+                    flags=re.MULTILINE)
+
+
+def find_closing_brace(text: str, start: int) -> int:
+    """
+    Having '(' at text[start] search for pairing ')' and return its index.
+    """
+    assert text[start] == '('
+
+    height = 1
+
+    for i in range(start + 1, len(text)):
+        if text[i] == '(':
+            height += 1
+        elif text[i] == ')':
+            height -= 1
+        if height == 0:
+            return i
+
+    raise ValueError
+
+
+def update(text: str) -> str:
+    result = ''
+
+    while True:
+        m = start_reg.search(text)
+        if m is None:
+            result += text
+            break
+
+        result += text[:m.start()]
+
+        args_ind = m.end()
+        args_end = find_closing_brace(text, args_ind - 1)
+
+        all_args = text[args_ind:args_end].split(',', 1)
+
+        name = all_args[0]
+        args = None if len(all_args) == 1 else all_args[1]
+
+        unchanged_call = text[m.start():args_end+1]
+        text = text[args_end+1:]
+
+        padding, res, vm = m.group('padding', 'res', 'vm')
+
+        m = tmatch(success_reg_templ, text, padding, res)
+
+        if m is None:
+            result += unchanged_call
+
+            if ('query-' not in name and
+                    'x-debug-block-dirty-bitmap-sha256' not in name and
+                    not tmatch(some_check_templ, text, padding, res)):
+                print(unchanged_call + text[:200] + '...\n\n')
+
+            continue
+
+        if m.group('comment'):
+            result += f'{padding}{m.group("comment")}\n'
+
+        result += f'{padding}{vm}.cmd({name}'
+
+        if args:
+            result += ','
+
+            if '\n' in args:
+                m_args = re.search('(?P<pad> *).*$', args)
+                assert m_args is not None
+
+                cur_padding = len(m_args.group('pad'))
+                expected = len(f'{padding}{res} = {vm}.qmp(')
+                drop = len(f'{res} = ')
+                if cur_padding == expected - 1:
+                    # tolerate this bad style
+                    drop -= 1
+                elif cur_padding < expected - 1:
+                    # assume nothing to do
+                    drop = 0
+
+                if drop:
+                    args = re.sub('\n' + ' ' * drop, '\n', args)
+
+            result += args
+
+        result += ')'
+
+        text = text[m.end():]
+
+    return result
+
+
+for fname in sys.argv[1:]:
+    print(fname)
+    with open(fname) as f:
+        t = f.read()
+
+    t = update(t)
+
+    with open(fname, 'w') as f:
+        f.write(t)
index 6b158c68b84a4281ff7e5aadc2e6f84bd0df5198..a873ff6730924d2c1778b841bd49dfabea57a168 100644 (file)
@@ -1,2 +1,3 @@
 [flake8]
-extend-ignore = E722  # Prefer pylint's bare-except checks to flake8's
+# Prefer pylint's bare-except checks to flake8's
+extend-ignore = E722
index 50978090b440c302a3348b10324c0bd2aa316452..d1fdf4182c9467d53d70779c5b76493c49994fbb 100644 (file)
@@ -23,14 +23,15 @@ from typing import (
 from .common import c_name, mcgen
 from .gen import (
     QAPIGenC,
-    QAPIGenCCode,
     QAPISchemaModularCVisitor,
     build_params,
+    gen_special_features,
     ifcontext,
 )
 from .schema import (
     QAPISchema,
     QAPISchemaFeature,
+    QAPISchemaIfCond,
     QAPISchemaObjectType,
     QAPISchemaType,
 )
@@ -40,11 +41,13 @@ from .source import QAPISourceInfo
 def gen_command_decl(name: str,
                      arg_type: Optional[QAPISchemaObjectType],
                      boxed: bool,
-                     ret_type: Optional[QAPISchemaType]) -> str:
+                     ret_type: Optional[QAPISchemaType],
+                     coroutine: bool) -> str:
     return mcgen('''
-%(c_type)s qmp_%(c_name)s(%(params)s);
+%(c_type)s %(coroutine_fn)sqmp_%(c_name)s(%(params)s);
 ''',
                  c_type=(ret_type and ret_type.c_type()) or 'void',
+                 coroutine_fn='coroutine_fn ' if coroutine else '',
                  c_name=c_name(name),
                  params=build_params(arg_type, boxed, 'Error **errp'))
 
@@ -52,7 +55,8 @@ def gen_command_decl(name: str,
 def gen_call(name: str,
              arg_type: Optional[QAPISchemaObjectType],
              boxed: bool,
-             ret_type: Optional[QAPISchemaType]) -> str:
+             ret_type: Optional[QAPISchemaType],
+             gen_tracing: bool) -> str:
     ret = ''
 
     argstr = ''
@@ -62,7 +66,8 @@ def gen_call(name: str,
     elif arg_type:
         assert not arg_type.variants
         for memb in arg_type.members:
-            if memb.optional:
+            assert not memb.ifcond.is_present()
+            if memb.need_has():
                 argstr += 'arg.has_%s, ' % c_name(memb.name)
             argstr += 'arg.%s, ' % c_name(memb.name)
 
@@ -70,21 +75,67 @@ def gen_call(name: str,
     if ret_type:
         lhs = 'retval = '
 
-    ret = mcgen('''
+    name = c_name(name)
+    upper = name.upper()
 
-    %(lhs)sqmp_%(c_name)s(%(args)s&err);
-    error_propagate(errp, err);
-''',
-                c_name=c_name(name), args=argstr, lhs=lhs)
-    if ret_type:
+    if gen_tracing:
         ret += mcgen('''
+
+    if (trace_event_get_state_backends(TRACE_QMP_ENTER_%(upper)s)) {
+        g_autoptr(GString) req_json = qobject_to_json(QOBJECT(args));
+
+        trace_qmp_enter_%(name)s(req_json->str);
+    }
+''',
+                     upper=upper, name=name)
+
+    ret += mcgen('''
+
+    %(lhs)sqmp_%(name)s(%(args)s&err);
+''',
+                 name=name, args=argstr, lhs=lhs)
+
+    ret += mcgen('''
     if (err) {
+''')
+
+    if gen_tracing:
+        ret += mcgen('''
+        trace_qmp_exit_%(name)s(error_get_pretty(err), false);
+''',
+                     name=name)
+
+    ret += mcgen('''
+        error_propagate(errp, err);
         goto out;
     }
+''')
+
+    if ret_type:
+        ret += mcgen('''
 
     qmp_marshal_output_%(c_name)s(retval, ret, errp);
 ''',
                      c_name=ret_type.c_name())
+
+    if gen_tracing:
+        if ret_type:
+            ret += mcgen('''
+
+    if (trace_event_get_state_backends(TRACE_QMP_EXIT_%(upper)s)) {
+        g_autoptr(GString) ret_json = qobject_to_json(*ret);
+
+        trace_qmp_exit_%(name)s(ret_json->str, true);
+    }
+''',
+                         upper=upper, name=name)
+        else:
+            ret += mcgen('''
+
+    trace_qmp_exit_%(name)s("{}", true);
+''',
+                         name=name)
+
     return ret
 
 
@@ -96,7 +147,7 @@ static void qmp_marshal_output_%(c_name)s(%(c_type)s ret_in,
 {
     Visitor *v;
 
-    v = qobject_output_visitor_new(ret_out);
+    v = qobject_output_visitor_new_qmp(ret_out);
     if (visit_type_%(c_name)s(v, "unused", &ret_in, errp)) {
         visit_complete(v, ret_out);
     }
@@ -109,23 +160,41 @@ static void qmp_marshal_output_%(c_name)s(%(c_type)s ret_in,
                  c_type=ret_type.c_type(), c_name=ret_type.c_name())
 
 
-def build_marshal_proto(name: str) -> str:
-    return ('void qmp_marshal_%s(QDict *args, QObject **ret, Error **errp)'
-            % c_name(name))
+def build_marshal_proto(name: str,
+                        coroutine: bool) -> str:
+    return ('void %(coroutine_fn)sqmp_marshal_%(c_name)s(%(params)s)' % {
+        'coroutine_fn': 'coroutine_fn ' if coroutine else '',
+        'c_name': c_name(name),
+        'params': 'QDict *args, QObject **ret, Error **errp',
+    })
 
 
-def gen_marshal_decl(name: str) -> str:
+def gen_marshal_decl(name: str,
+                     coroutine: bool) -> str:
     return mcgen('''
 %(proto)s;
 ''',
-                 proto=build_marshal_proto(name))
+                 proto=build_marshal_proto(name, coroutine))
+
+
+def gen_trace(name: str) -> str:
+    return mcgen('''
+qmp_enter_%(name)s(const char *json) "%%s"
+qmp_exit_%(name)s(const char *result, bool succeeded) "%%s %%d"
+''',
+                 name=c_name(name))
 
 
 def gen_marshal(name: str,
                 arg_type: Optional[QAPISchemaObjectType],
                 boxed: bool,
-                ret_type: Optional[QAPISchemaType]) -> str:
+                ret_type: Optional[QAPISchemaType],
+                gen_tracing: bool,
+                coroutine: bool) -> str:
     have_args = boxed or (arg_type and not arg_type.is_empty())
+    if have_args:
+        assert arg_type is not None
+        arg_type_c_name = arg_type.c_name()
 
     ret = mcgen('''
 
@@ -135,7 +204,7 @@ def gen_marshal(name: str,
     bool ok = false;
     Visitor *v;
 ''',
-                proto=build_marshal_proto(name))
+                proto=build_marshal_proto(name, coroutine))
 
     if ret_type:
         ret += mcgen('''
@@ -147,11 +216,11 @@ def gen_marshal(name: str,
         ret += mcgen('''
     %(c_name)s arg = {0};
 ''',
-                     c_name=arg_type.c_name())
+                     c_name=arg_type_c_name)
 
     ret += mcgen('''
 
-    v = qobject_input_visitor_new(QOBJECT(args));
+    v = qobject_input_visitor_new_qmp(QOBJECT(args));
     if (!visit_start_struct(v, NULL, NULL, 0, errp)) {
         goto out;
     }
@@ -163,7 +232,7 @@ def gen_marshal(name: str,
         ok = visit_check_struct(v, errp);
     }
 ''',
-                     c_arg_type=arg_type.c_name())
+                     c_arg_type=arg_type_c_name)
     else:
         ret += mcgen('''
     ok = visit_check_struct(v, errp);
@@ -176,7 +245,7 @@ def gen_marshal(name: str,
     }
 ''')
 
-    ret += gen_call(name, arg_type, boxed, ret_type)
+    ret += gen_call(name, arg_type, boxed, ret_type, gen_tracing)
 
     ret += mcgen('''
 
@@ -193,7 +262,7 @@ out:
         ret += mcgen('''
     visit_type_%(c_arg_type)s_members(v, &arg, NULL);
 ''',
-                     c_arg_type=arg_type.c_name())
+                     c_arg_type=arg_type_c_name)
 
     ret += mcgen('''
     visit_end_struct(v, NULL);
@@ -207,6 +276,7 @@ out:
 
 
 def gen_register_command(name: str,
+                         features: List[QAPISchemaFeature],
                          success_response: bool,
                          allow_oob: bool,
                          allow_preconfig: bool,
@@ -222,41 +292,24 @@ def gen_register_command(name: str,
     if coroutine:
         options += ['QCO_COROUTINE']
 
-    if not options:
-        options = ['QCO_NO_OPTIONS']
-
     ret = mcgen('''
     qmp_register_command(cmds, "%(name)s",
-                         qmp_marshal_%(c_name)s, %(opts)s);
+                         qmp_marshal_%(c_name)s, %(opts)s, %(feats)s);
 ''',
                 name=name, c_name=c_name(name),
-                opts=" | ".join(options))
-    return ret
-
-
-def gen_registry(registry: str, prefix: str) -> str:
-    ret = mcgen('''
-
-void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds)
-{
-    QTAILQ_INIT(cmds);
-
-''',
-                c_prefix=c_name(prefix, protect=False))
-    ret += registry
-    ret += mcgen('''
-}
-''')
+                opts=' | '.join(options) or 0,
+                feats=gen_special_features(features))
     return ret
 
 
 class QAPISchemaGenCommandVisitor(QAPISchemaModularCVisitor):
-    def __init__(self, prefix: str):
+    def __init__(self, prefix: str, gen_tracing: bool):
         super().__init__(
             prefix, 'qapi-commands',
-            ' * Schema-defined QAPI/QMP commands', None, __doc__)
-        self._regy = QAPIGenCCode(None)
+            ' * Schema-defined QAPI/QMP commands', None, __doc__,
+            gen_tracing=gen_tracing)
         self._visited_ret_types: Dict[QAPIGenC, Set[QAPISchemaType]] = {}
+        self._gen_tracing = gen_tracing
 
     def _begin_user_module(self, name: str) -> None:
         self._visited_ret_types[self._genc] = set()
@@ -265,43 +318,62 @@ class QAPISchemaGenCommandVisitor(QAPISchemaModularCVisitor):
         visit = self._module_basename('qapi-visit', name)
         self._genc.add(mcgen('''
 #include "qemu/osdep.h"
+#include "qapi/compat-policy.h"
 #include "qapi/visitor.h"
 #include "qapi/qmp/qdict.h"
-#include "qapi/qobject-output-visitor.h"
-#include "qapi/qobject-input-visitor.h"
 #include "qapi/dealloc-visitor.h"
 #include "qapi/error.h"
 #include "%(visit)s.h"
 #include "%(commands)s.h"
-
 ''',
                              commands=commands, visit=visit))
+
+        if self._gen_tracing and commands != 'qapi-commands':
+            self._genc.add(mcgen('''
+#include "qapi/qmp/qjson.h"
+#include "trace/trace-%(nm)s_trace_events.h"
+''',
+                                 nm=c_name(commands, protect=False)))
+            # We use c_name(commands, protect=False) to turn '-' into '_', to
+            # match .underscorify() in trace/meson.build
+
         self._genh.add(mcgen('''
 #include "%(types)s.h"
 
 ''',
                              types=types))
 
-    def visit_end(self) -> None:
-        self._add_system_module('init', ' * QAPI Commands initialization')
+    def visit_begin(self, schema: QAPISchema) -> None:
+        self._add_module('./init', ' * QAPI Commands initialization')
         self._genh.add(mcgen('''
 #include "qapi/qmp/dispatch.h"
 
 void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds);
 ''',
                              c_prefix=c_name(self._prefix, protect=False)))
-        self._genc.preamble_add(mcgen('''
+        self._genc.add(mcgen('''
 #include "qemu/osdep.h"
 #include "%(prefix)sqapi-commands.h"
 #include "%(prefix)sqapi-init-commands.h"
+
+void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds)
+{
+    QTAILQ_INIT(cmds);
+
 ''',
-                                      prefix=self._prefix))
-        self._genc.add(gen_registry(self._regy.get_content(), self._prefix))
+                             prefix=self._prefix,
+                             c_prefix=c_name(self._prefix, protect=False)))
+
+    def visit_end(self) -> None:
+        with self._temp_module('./init'):
+            self._genc.add(mcgen('''
+}
+'''))
 
     def visit_command(self,
                       name: str,
-                      info: QAPISourceInfo,
-                      ifcond: List[str],
+                      info: Optional[QAPISourceInfo],
+                      ifcond: QAPISchemaIfCond,
                       features: List[QAPISchemaFeature],
                       arg_type: Optional[QAPISchemaObjectType],
                       ret_type: Optional[QAPISchemaType],
@@ -321,20 +393,27 @@ void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds);
         if ret_type and ret_type not in self._visited_ret_types[self._genc]:
             self._visited_ret_types[self._genc].add(ret_type)
             with ifcontext(ret_type.ifcond,
-                           self._genh, self._genc, self._regy):
+                           self._genh, self._genc):
                 self._genc.add(gen_marshal_output(ret_type))
-        with ifcontext(ifcond, self._genh, self._genc, self._regy):
-            self._genh.add(gen_command_decl(name, arg_type, boxed, ret_type))
-            self._genh.add(gen_marshal_decl(name))
-            self._genc.add(gen_marshal(name, arg_type, boxed, ret_type))
-            self._regy.add(gen_register_command(name, success_response,
-                                                allow_oob, allow_preconfig,
-                                                coroutine))
+        with ifcontext(ifcond, self._genh, self._genc):
+            self._genh.add(gen_command_decl(name, arg_type, boxed,
+                                            ret_type, coroutine))
+            self._genh.add(gen_marshal_decl(name, coroutine))
+            self._genc.add(gen_marshal(name, arg_type, boxed, ret_type,
+                                       self._gen_tracing, coroutine))
+            if self._gen_tracing:
+                self._gen_trace_events.add(gen_trace(name))
+        with self._temp_module('./init'):
+            with ifcontext(ifcond, self._genh, self._genc):
+                self._genc.add(gen_register_command(
+                    name, features, success_response, allow_oob,
+                    allow_preconfig, coroutine))
 
 
 def gen_commands(schema: QAPISchema,
                  output_dir: str,
-                 prefix: str) -> None:
-    vis = QAPISchemaGenCommandVisitor(prefix)
+                 prefix: str,
+                 gen_tracing: bool) -> None:
+    vis = QAPISchemaGenCommandVisitor(prefix, gen_tracing)
     schema.visit(vis)
     vis.write(output_dir)
index 11b86beeabe6337ef2e35e78d1a1ac7363175651..737b059e629142f90fd5229f70416838c1f7c426 100644 (file)
 # See the COPYING file in the top-level directory.
 
 import re
-from typing import Optional, Sequence
+from typing import (
+    Any,
+    Dict,
+    Match,
+    Optional,
+    Sequence,
+    Union,
+)
 
 
 #: Magic string that gets removed along with all space to its right.
 EATSPACE = '\033EATSPACE.'
 POINTER_SUFFIX = ' *' + EATSPACE
-_C_NAME_TRANS = str.maketrans('.-', '__')
 
 
 def camel_to_upper(value: str) -> str:
@@ -108,10 +114,11 @@ def c_name(name: str, protect: bool = True) -> str:
                      'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not',
                      'not_eq', 'or', 'or_eq', 'xor', 'xor_eq'])
     # namespace pollution:
-    polluted_words = set(['unix', 'errno', 'mips', 'sparc', 'i386'])
-    name = name.translate(_C_NAME_TRANS)
-    if protect and (name in c89_words | c99_words | c11_words | gcc_words
-                    | cpp_words | polluted_words):
+    polluted_words = set(['unix', 'errno', 'mips', 'sparc', 'i386', 'linux'])
+    name = re.sub(r'[^A-Za-z0-9_]', '_', name)
+    if protect and (name in (c89_words | c99_words | c11_words | gcc_words
+                             | cpp_words | polluted_words)
+                    or name[0].isdigit()):
         return 'q_' + name
     return name
 
@@ -125,9 +132,6 @@ class Indentation:
     def __init__(self, initial: int = 0) -> None:
         self._level = initial
 
-    def __int__(self) -> int:
-        return self._level
-
     def __repr__(self) -> str:
         return "{}({:d})".format(type(self).__name__, self._level)
 
@@ -135,19 +139,13 @@ class Indentation:
         """Return the current indentation as a string of spaces."""
         return ' ' * self._level
 
-    def __bool__(self) -> bool:
-        """True when there is a non-zero indentation."""
-        return bool(self._level)
-
     def increase(self, amount: int = 4) -> None:
         """Increase the indentation level by ``amount``, default 4."""
         self._level += amount
 
     def decrease(self, amount: int = 4) -> None:
         """Decrease the indentation level by ``amount``, default 4."""
-        if self._level < amount:
-            raise ArithmeticError(
-                f"Can't remove {amount:d} spaces from {self!r}")
+        assert amount <= self._level
         self._level -= amount
 
 
@@ -162,8 +160,9 @@ def cgen(code: str, **kwds: object) -> str:
     Obey `indent`, and strip `EATSPACE`.
     """
     raw = code % kwds
-    if indent:
-        raw = re.sub(r'^(?!(#|$))', str(indent), raw, flags=re.MULTILINE)
+    pfx = str(indent)
+    if pfx:
+        raw = re.sub(r'^(?!(#|$))', pfx, raw, flags=re.MULTILINE)
     return re.sub(re.escape(EATSPACE) + r' *', '', raw)
 
 
@@ -194,19 +193,59 @@ def guardend(name: str) -> str:
                  name=c_fname(name).upper())
 
 
-def gen_if(ifcond: Sequence[str]) -> str:
-    ret = ''
-    for ifc in ifcond:
-        ret += mcgen('''
+def gen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]],
+               cond_fmt: str, not_fmt: str,
+               all_operator: str, any_operator: str) -> str:
+
+    def do_gen(ifcond: Union[str, Dict[str, Any]],
+               need_parens: bool) -> str:
+        if isinstance(ifcond, str):
+            return cond_fmt % ifcond
+        assert isinstance(ifcond, dict) and len(ifcond) == 1
+        if 'not' in ifcond:
+            return not_fmt % do_gen(ifcond['not'], True)
+        if 'all' in ifcond:
+            gen = gen_infix(all_operator, ifcond['all'])
+        else:
+            gen = gen_infix(any_operator, ifcond['any'])
+        if need_parens:
+            gen = '(' + gen + ')'
+        return gen
+
+    def gen_infix(operator: str, operands: Sequence[Any]) -> str:
+        return operator.join([do_gen(o, True) for o in operands])
+
+    if not ifcond:
+        return ''
+    return do_gen(ifcond, False)
+
+
+def cgen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]]) -> str:
+    return gen_ifcond(ifcond, 'defined(%s)', '!%s', ' && ', ' || ')
+
+
+def docgen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]]) -> str:
+    # TODO Doc generated for conditions needs polish
+    return gen_ifcond(ifcond, '%s', 'not %s', ' and ', ' or ')
+
+
+def gen_if(cond: str) -> str:
+    if not cond:
+        return ''
+    return mcgen('''
 #if %(cond)s
-''', cond=ifc)
-    return ret
+''', cond=cond)
 
 
-def gen_endif(ifcond: Sequence[str]) -> str:
-    ret = ''
-    for ifc in reversed(ifcond):
-        ret += mcgen('''
+def gen_endif(cond: str) -> str:
+    if not cond:
+        return ''
+    return mcgen('''
 #endif /* %(cond)s */
-''', cond=ifc)
-    return ret
+''', cond=cond)
+
+
+def must_match(pattern: str, string: str) -> Match[str]:
+    match = re.match(pattern, string)
+    assert match is not None
+    return match
index ae60d9e2fe8166f82e4601da3d70e599529b5ba0..e35e4ddb26a01085d2f86f201791a9e86a2d9f6f 100644 (file)
@@ -1,7 +1,5 @@
 # -*- coding: utf-8 -*-
 #
-# QAPI error classes
-#
 # Copyright (c) 2017-2019 Red Hat Inc.
 #
 # Authors:
 # This work is licensed under the terms of the GNU GPL, version 2.
 # See the COPYING file in the top-level directory.
 
+"""
+QAPI error classes
+
+Common error classes used throughout the package.  Additional errors may
+be defined in other modules.  At present, `QAPIParseError` is defined in
+parser.py.
+"""
+
+from typing import Optional
+
+from .source import QAPISourceInfo
+
 
 class QAPIError(Exception):
-    def __init__(self, info, col, msg):
-        Exception.__init__(self)
+    """Base class for all exceptions from the QAPI package."""
+
+
+class QAPISourceError(QAPIError):
+    """Error class for all exceptions identifying a source location."""
+    def __init__(self,
+                 info: Optional[QAPISourceInfo],
+                 msg: str,
+                 col: Optional[int] = None):
+        super().__init__()
         self.info = info
-        self.col = col
         self.msg = msg
+        self.col = col
 
-    def __str__(self):
+    def __str__(self) -> str:
+        assert self.info is not None
         loc = str(self.info)
         if self.col is not None:
             assert self.info.line is not None
@@ -27,17 +46,5 @@ class QAPIError(Exception):
         return loc + ': ' + self.msg
 
 
-class QAPIParseError(QAPIError):
-    def __init__(self, parser, msg):
-        col = 1
-        for ch in parser.src[parser.line_pos:parser.pos]:
-            if ch == '\t':
-                col = (col + 7) % 8 + 1
-            else:
-                col += 1
-        super().__init__(parser.info, col, msg)
-
-
-class QAPISemError(QAPIError):
-    def __init__(self, info, msg):
-        super().__init__(info, None, msg)
+class QAPISemError(QAPISourceError):
+    """Error class for semantic QAPI errors."""
index 599f3d1f564bd389110b04506e09f1b4eb49dadb..3cf01e96b6893074eb18a72b8bff4e465f5c0f40 100644 (file)
@@ -12,7 +12,7 @@ This work is licensed under the terms of the GNU GPL, version 2.
 See the COPYING file in the top-level directory.
 """
 
-from typing import List
+from typing import List, Optional
 
 from .common import c_enum_const, c_name, mcgen
 from .gen import QAPISchemaModularCVisitor, build_params, ifcontext
@@ -20,6 +20,7 @@ from .schema import (
     QAPISchema,
     QAPISchemaEnumMember,
     QAPISchemaFeature,
+    QAPISchemaIfCond,
     QAPISchemaObjectType,
 )
 from .source import QAPISourceInfo
@@ -27,7 +28,7 @@ from .types import gen_enum, gen_enum_lookup
 
 
 def build_event_send_proto(name: str,
-                           arg_type: QAPISchemaObjectType,
+                           arg_type: Optional[QAPISchemaObjectType],
                            boxed: bool) -> str:
     return 'void qapi_event_send_%(c_name)s(%(param)s)' % {
         'c_name': c_name(name.lower()),
@@ -35,7 +36,7 @@ def build_event_send_proto(name: str,
 
 
 def gen_event_send_decl(name: str,
-                        arg_type: QAPISchemaObjectType,
+                        arg_type: Optional[QAPISchemaObjectType],
                         boxed: bool) -> str:
     return mcgen('''
 
@@ -59,7 +60,7 @@ def gen_param_var(typ: QAPISchemaObjectType) -> str:
     for memb in typ.members:
         ret += sep
         sep = ', '
-        if memb.optional:
+        if memb.need_has():
             ret += 'has_' + c_name(memb.name) + sep
         if memb.type.name == 'str':
             # Cast away const added in build_params()
@@ -78,7 +79,8 @@ def gen_param_var(typ: QAPISchemaObjectType) -> str:
 
 
 def gen_event_send(name: str,
-                   arg_type: QAPISchemaObjectType,
+                   arg_type: Optional[QAPISchemaObjectType],
+                   features: List[QAPISchemaFeature],
                    boxed: bool,
                    event_enum_name: str,
                    event_emit: str) -> str:
@@ -99,6 +101,7 @@ def gen_event_send(name: str,
                 proto=build_event_send_proto(name, arg_type, boxed))
 
     if have_args:
+        assert arg_type is not None
         ret += mcgen('''
     QObject *obj;
     Visitor *v;
@@ -106,6 +109,16 @@ def gen_event_send(name: str,
         if not boxed:
             ret += gen_param_var(arg_type)
 
+    for f in features:
+        if f.is_special():
+            ret += mcgen('''
+
+    if (compat_policy.%(feat)s_output == COMPAT_POLICY_OUTPUT_HIDE) {
+        return;
+    }
+''',
+                         feat=f.name)
+
     ret += mcgen('''
 
     qmp = qmp_event_build_dict("%(name)s");
@@ -114,8 +127,9 @@ def gen_event_send(name: str,
                  name=name)
 
     if have_args:
+        assert arg_type is not None
         ret += mcgen('''
-    v = qobject_output_visitor_new(&obj);
+    v = qobject_output_visitor_new_qmp(&obj);
 ''')
         if not arg_type.is_implicit():
             ret += mcgen('''
@@ -134,7 +148,11 @@ def gen_event_send(name: str,
         ret += mcgen('''
 
     visit_complete(v, &obj);
-    qdict_put_obj(qmp, "data", obj);
+    if (qdict_size(qobject_to(QDict, obj))) {
+        qdict_put_obj(qmp, "data", obj);
+    } else {
+        qobject_unref(obj);
+    }
 ''')
 
     ret += mcgen('''
@@ -174,11 +192,10 @@ class QAPISchemaGenEventVisitor(QAPISchemaModularCVisitor):
 #include "%(prefix)sqapi-emit-events.h"
 #include "%(events)s.h"
 #include "%(visit)s.h"
+#include "qapi/compat-policy.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
-#include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp-event.h"
-
 ''',
                              events=events, visit=visit,
                              prefix=self._prefix))
@@ -189,7 +206,7 @@ class QAPISchemaGenEventVisitor(QAPISchemaModularCVisitor):
                              types=types))
 
     def visit_end(self) -> None:
-        self._add_system_module('emit', ' * QAPI Events emission')
+        self._add_module('./emit', ' * QAPI Events emission')
         self._genc.preamble_add(mcgen('''
 #include "qemu/osdep.h"
 #include "%(prefix)sqapi-emit-events.h"
@@ -211,14 +228,14 @@ void %(event_emit)s(%(event_enum)s event, QDict *qdict);
 
     def visit_event(self,
                     name: str,
-                    info: QAPISourceInfo,
-                    ifcond: List[str],
+                    info: Optional[QAPISourceInfo],
+                    ifcond: QAPISchemaIfCond,
                     features: List[QAPISchemaFeature],
-                    arg_type: QAPISchemaObjectType,
+                    arg_type: Optional[QAPISchemaObjectType],
                     boxed: bool) -> None:
         with ifcontext(ifcond, self._genh, self._genc):
             self._genh.add(gen_event_send_decl(name, arg_type, boxed))
-            self._genc.add(gen_event_send(name, arg_type, boxed,
+            self._genc.add(gen_event_send(name, arg_type, features, boxed,
                                           self._event_enum_name,
                                           self._event_emit_name))
         # Note: we generate the enum member regardless of @ifcond, to
index 2fcaaa2497a89e1be3d1fc41d08a182d26d6f202..cae0a083591abfb7aa25b1ed88f25c43f9b8371a 100644 (file)
 # -*- coding: utf-8 -*-
 #
-# Check (context-free) QAPI schema expression structure
-#
 # Copyright IBM, Corp. 2011
-# Copyright (c) 2013-2019 Red Hat Inc.
+# Copyright (c) 2013-2021 Red Hat Inc.
 #
 # Authors:
 #  Anthony Liguori <aliguori@us.ibm.com>
 #  Markus Armbruster <armbru@redhat.com>
 #  Eric Blake <eblake@redhat.com>
 #  Marc-André Lureau <marcandre.lureau@redhat.com>
+#  John Snow <jsnow@redhat.com>
 #
 # This work is licensed under the terms of the GNU GPL, version 2.
 # See the COPYING file in the top-level directory.
 
-from collections import OrderedDict
+"""
+Normalize and validate (context-free) QAPI schema expression structures.
+
+`QAPISchemaParser` parses a QAPI schema into abstract syntax trees
+consisting of dict, list, str, bool, and int nodes.  This module ensures
+that these nested structures have the correct type(s) and key(s) where
+appropriate for the QAPI context-free grammar.
+
+The QAPI schema expression language allows for certain syntactic sugar;
+this module also handles the normalization process of these nested
+structures.
+
+See `check_exprs` for the main entry point.
+
+See `schema.QAPISchema` for processing into native Python data
+structures and contextual semantic validation.
+"""
+
 import re
+from typing import (
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Union,
+    cast,
+)
 
 from .common import c_name
 from .error import QAPISemError
+from .parser import QAPIExpression
+from .source import QAPISourceInfo
 
 
-# Names must be letters, numbers, -, and _.  They must start with letter,
-# except for downstream extensions which must start with __RFQDN_.
-# Dots are only valid in the downstream extension prefix.
-valid_name = re.compile(r'^(__[a-zA-Z0-9.-]+_)?'
-                        '[a-zA-Z][a-zA-Z0-9_-]*$')
+# See check_name_str(), below.
+valid_name = re.compile(r'(__[a-z0-9.-]+_)?'
+                        r'(x-)?'
+                        r'([a-z][a-z0-9_-]*)$', re.IGNORECASE)
 
 
-def check_name_is_str(name, info, source):
+def check_name_is_str(name: object,
+                      info: QAPISourceInfo,
+                      source: str) -> None:
+    """
+    Ensure that ``name`` is a ``str``.
+
+    :raise QAPISemError: When ``name`` fails validation.
+    """
     if not isinstance(name, str):
         raise QAPISemError(info, "%s requires a string name" % source)
 
 
-def check_name_str(name, info, source,
-                   allow_optional=False, enum_member=False,
-                   permit_upper=False):
-    membername = name
+def check_name_str(name: str, info: QAPISourceInfo, source: str) -> str:
+    """
+    Ensure that ``name`` is a valid QAPI name.
+
+    A valid name consists of ASCII letters, digits, ``-``, and ``_``,
+    starting with a letter.  It may be prefixed by a downstream prefix
+    of the form __RFQDN_, or the experimental prefix ``x-``.  If both
+    prefixes are present, the __RFDQN_ prefix goes first.
+
+    A valid name cannot start with ``q_``, which is reserved.
 
-    if allow_optional and name.startswith('*'):
-        membername = name[1:]
-    # Enum members can start with a digit, because the generated C
-    # code always prefixes it with the enum name
-    if enum_member and membername[0].isdigit():
-        membername = 'D' + membername
+    :param name: Name to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing what ``name`` belongs to.
+
+    :raise QAPISemError: When ``name`` fails validation.
+    :return: The stem of the valid name, with no prefixes.
+    """
     # Reserve the entire 'q_' namespace for c_name(), and for 'q_empty'
     # and 'q_obj_*' implicit type names.
-    if not valid_name.match(membername) or \
-       c_name(membername, False).startswith('q_'):
+    match = valid_name.match(name)
+    if not match or c_name(name, False).startswith('q_'):
         raise QAPISemError(info, "%s has an invalid name" % source)
-    if not permit_upper and name.lower() != name:
-        raise QAPISemError(
-            info, "%s uses uppercase in name" % source)
-    assert not membername.startswith('*')
+    return match.group(3)
+
 
+def check_name_upper(name: str, info: QAPISourceInfo, source: str) -> None:
+    """
+    Ensure that ``name`` is a valid event name.
 
-def check_defn_name_str(name, info, meta):
-    check_name_str(name, info, meta, permit_upper=True)
-    if name.endswith('Kind') or name.endswith('List'):
+    This means it must be a valid QAPI name as checked by
+    `check_name_str()`, but where the stem prohibits lowercase
+    characters and ``-``.
+
+    :param name: Name to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing what ``name`` belongs to.
+
+    :raise QAPISemError: When ``name`` fails validation.
+    """
+    stem = check_name_str(name, info, source)
+    if re.search(r'[a-z-]', stem):
+        raise QAPISemError(
+            info, "name of %s must not use lowercase or '-'" % source)
+
+
+def check_name_lower(name: str, info: QAPISourceInfo, source: str,
+                     permit_upper: bool = False,
+                     permit_underscore: bool = False) -> None:
+    """
+    Ensure that ``name`` is a valid command or member name.
+
+    This means it must be a valid QAPI name as checked by
+    `check_name_str()`, but where the stem prohibits uppercase
+    characters and ``_``.
+
+    :param name: Name to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing what ``name`` belongs to.
+    :param permit_upper: Additionally permit uppercase.
+    :param permit_underscore: Additionally permit ``_``.
+
+    :raise QAPISemError: When ``name`` fails validation.
+    """
+    stem = check_name_str(name, info, source)
+    if ((not permit_upper and re.search(r'[A-Z]', stem))
+            or (not permit_underscore and '_' in stem)):
         raise QAPISemError(
-            info, "%s name should not end in '%s'" % (meta, name[-4:]))
+            info, "name of %s must not use uppercase or '_'" % source)
+
+
+def check_name_camel(name: str, info: QAPISourceInfo, source: str) -> None:
+    """
+    Ensure that ``name`` is a valid user-defined type name.
+
+    This means it must be a valid QAPI name as checked by
+    `check_name_str()`, but where the stem must be in CamelCase.
 
+    :param name: Name to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing what ``name`` belongs to.
 
-def check_keys(value, info, source, required, optional):
+    :raise QAPISemError: When ``name`` fails validation.
+    """
+    stem = check_name_str(name, info, source)
+    if not re.match(r'[A-Z][A-Za-z0-9]*[a-z][A-Za-z0-9]*$', stem):
+        raise QAPISemError(info, "name of %s must use CamelCase" % source)
 
-    def pprint(elems):
+
+def check_defn_name_str(name: str, info: QAPISourceInfo, meta: str) -> None:
+    """
+    Ensure that ``name`` is a valid definition name.
+
+    Based on the value of ``meta``, this means that:
+      - 'event' names adhere to `check_name_upper()`.
+      - 'command' names adhere to `check_name_lower()`.
+      - Else, meta is a type, and must pass `check_name_camel()`.
+        These names must not end with ``List``.
+
+    :param name: Name to check.
+    :param info: QAPI schema source file information.
+    :param meta: Meta-type name of the QAPI expression.
+
+    :raise QAPISemError: When ``name`` fails validation.
+    """
+    if meta == 'event':
+        check_name_upper(name, info, meta)
+    elif meta == 'command':
+        check_name_lower(
+            name, info, meta,
+            permit_underscore=name in info.pragma.command_name_exceptions)
+    else:
+        check_name_camel(name, info, meta)
+        if name.endswith('List'):
+            raise QAPISemError(
+                info, "%s name should not end in 'List'" % meta)
+
+
+def check_keys(value: Dict[str, object],
+               info: QAPISourceInfo,
+               source: str,
+               required: List[str],
+               optional: List[str]) -> None:
+    """
+    Ensure that a dict has a specific set of keys.
+
+    :param value: The dict to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing this ``value``.
+    :param required: Keys that *must* be present.
+    :param optional: Keys that *may* be present.
+
+    :raise QAPISemError: When unknown keys are present.
+    """
+
+    def pprint(elems: Iterable[str]) -> str:
         return ', '.join("'" + e + "'" for e in sorted(elems))
 
     missing = set(required) - set(value)
@@ -74,7 +210,7 @@ def check_keys(value, info, source, required, optional):
             "%s misses key%s %s"
             % (source, 's' if len(missing) > 1 else '',
                pprint(missing)))
-    allowed = set(required + optional)
+    allowed = set(required) | set(optional)
     unknown = set(value) - allowed
     if unknown:
         raise QAPISemError(
@@ -84,234 +220,409 @@ def check_keys(value, info, source, required, optional):
                pprint(unknown), pprint(allowed)))
 
 
-def check_flags(expr, info):
-    for key in ['gen', 'success-response']:
+def check_flags(expr: QAPIExpression) -> None:
+    """
+    Ensure flag members (if present) have valid values.
+
+    :param expr: The expression to validate.
+
+    :raise QAPISemError:
+        When certain flags have an invalid value, or when
+        incompatible flags are present.
+    """
+    for key in ('gen', 'success-response'):
         if key in expr and expr[key] is not False:
             raise QAPISemError(
-                info, "flag '%s' may only use false value" % key)
-    for key in ['boxed', 'allow-oob', 'allow-preconfig', 'coroutine']:
+                expr.info, "flag '%s' may only use false value" % key)
+    for key in ('boxed', 'allow-oob', 'allow-preconfig', 'coroutine'):
         if key in expr and expr[key] is not True:
             raise QAPISemError(
-                info, "flag '%s' may only use true value" % key)
+                expr.info, "flag '%s' may only use true value" % key)
     if 'allow-oob' in expr and 'coroutine' in expr:
         # This is not necessarily a fundamental incompatibility, but
         # we don't have a use case and the desired semantics isn't
         # obvious.  The simplest solution is to forbid it until we get
         # a use case for it.
-        raise QAPISemError(info, "flags 'allow-oob' and 'coroutine' "
-                                 "are incompatible")
+        raise QAPISemError(
+            expr.info, "flags 'allow-oob' and 'coroutine' are incompatible")
+
 
+def check_if(expr: Dict[str, object],
+             info: QAPISourceInfo, source: str) -> None:
+    """
+    Validate the ``if`` member of an object.
 
-def check_if(expr, info, source):
+    The ``if`` member may be either a ``str`` or a dict.
 
-    def check_if_str(ifcond, info):
-        if not isinstance(ifcond, str):
+    :param expr: The expression containing the ``if`` member to validate.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing ``expr``.
+
+    :raise QAPISemError:
+        When the "if" member fails validation, or when there are no
+        non-empty conditions.
+    :return: None
+    """
+
+    def _check_if(cond: Union[str, object]) -> None:
+        if isinstance(cond, str):
+            if not re.fullmatch(r'[A-Z][A-Z0-9_]*', cond):
+                raise QAPISemError(
+                    info,
+                    "'if' condition '%s' of %s is not a valid identifier"
+                    % (cond, source))
+            return
+
+        if not isinstance(cond, dict):
             raise QAPISemError(
                 info,
-                "'if' condition of %s must be a string or a list of strings"
-                % source)
-        if ifcond.strip() == '':
+                "'if' condition of %s must be a string or an object" % source)
+        check_keys(cond, info, "'if' condition of %s" % source, [],
+                   ["all", "any", "not"])
+        if len(cond) != 1:
             raise QAPISemError(
                 info,
-                "'if' condition '%s' of %s makes no sense"
-                % (ifcond, source))
+                "'if' condition of %s has conflicting keys" % source)
+
+        if 'not' in cond:
+            _check_if(cond['not'])
+        elif 'all' in cond:
+            _check_infix('all', cond['all'])
+        else:
+            _check_infix('any', cond['any'])
+
+    def _check_infix(operator: str, operands: object) -> None:
+        if not isinstance(operands, list):
+            raise QAPISemError(
+                info,
+                "'%s' condition of %s must be an array"
+                % (operator, source))
+        if not operands:
+            raise QAPISemError(
+                info, "'if' condition [] of %s is useless" % source)
+        for operand in operands:
+            _check_if(operand)
 
     ifcond = expr.get('if')
     if ifcond is None:
         return
-    if isinstance(ifcond, list):
-        if ifcond == []:
-            raise QAPISemError(
-                info, "'if' condition [] of %s is useless" % source)
-        for elt in ifcond:
-            check_if_str(elt, info)
-    else:
-        check_if_str(ifcond, info)
-        expr['if'] = [ifcond]
 
+    _check_if(ifcond)
 
-def normalize_members(members):
-    if isinstance(members, OrderedDict):
+
+def normalize_members(members: object) -> None:
+    """
+    Normalize a "members" value.
+
+    If ``members`` is a dict, for every value in that dict, if that
+    value is not itself already a dict, normalize it to
+    ``{'type': value}``.
+
+    :forms:
+      :sugared: ``Dict[str, Union[str, TypeRef]]``
+      :canonical: ``Dict[str, TypeRef]``
+
+    :param members: The members value to normalize.
+
+    :return: None, ``members`` is normalized in-place as needed.
+    """
+    if isinstance(members, dict):
         for key, arg in members.items():
             if isinstance(arg, dict):
                 continue
             members[key] = {'type': arg}
 
 
-def check_type(value, info, source,
-               allow_array=False, allow_dict=False):
-    if value is None:
-        return
+def check_type_name(value: Optional[object],
+                    info: QAPISourceInfo, source: str) -> None:
+    if value is not None and not isinstance(value, str):
+        raise QAPISemError(info, "%s should be a type name" % source)
 
-    # Array type
-    if isinstance(value, list):
-        if not allow_array:
-            raise QAPISemError(info, "%s cannot be an array" % source)
-        if len(value) != 1 or not isinstance(value[0], str):
-            raise QAPISemError(info,
-                               "%s: array type must contain single type name" %
-                               source)
-        return
 
-    # Type name
-    if isinstance(value, str):
+def check_type_name_or_array(value: Optional[object],
+                             info: QAPISourceInfo, source: str) -> None:
+    if value is None or isinstance(value, str):
         return
 
-    # Anonymous type
+    if not isinstance(value, list):
+        raise QAPISemError(info,
+                           "%s should be a type name or array" % source)
 
-    if not allow_dict:
-        raise QAPISemError(info, "%s should be a type name" % source)
+    if len(value) != 1 or not isinstance(value[0], str):
+        raise QAPISemError(info,
+                           "%s: array type must contain single type name" %
+                           source)
+
+
+def check_type_implicit(value: Optional[object],
+                        info: QAPISourceInfo, source: str,
+                        parent_name: Optional[str]) -> None:
+    """
+    Normalize and validate an optional implicit struct type.
+
+    Accept ``None`` or a ``dict`` defining an implicit struct type.
+    The latter is normalized in place.
+
+    :param value: The value to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing this ``value``.
+    :param parent_name:
+        When the value of ``parent_name`` is in pragma
+        ``member-name-exceptions``, an implicit struct type may
+        violate the member naming rules.
+
+    :raise QAPISemError: When ``value`` fails validation.
+    :return: None
+    """
+    if value is None:
+        return
 
-    if not isinstance(value, OrderedDict):
+    if not isinstance(value, dict):
         raise QAPISemError(info,
                            "%s should be an object or type name" % source)
 
-    permit_upper = allow_dict in info.pragma.name_case_whitelist
+    permissive = parent_name in info.pragma.member_name_exceptions
 
-    # value is a dictionary, check that each member is okay
     for (key, arg) in value.items():
         key_source = "%s member '%s'" % (source, key)
-        check_name_str(key, info, key_source,
-                       allow_optional=True, permit_upper=permit_upper)
+        if key.startswith('*'):
+            key = key[1:]
+        check_name_lower(key, info, key_source,
+                         permit_upper=permissive,
+                         permit_underscore=permissive)
         if c_name(key, False) == 'u' or c_name(key, False).startswith('has_'):
             raise QAPISemError(info, "%s uses reserved name" % key_source)
         check_keys(arg, info, key_source, ['type'], ['if', 'features'])
         check_if(arg, info, key_source)
         check_features(arg.get('features'), info)
-        check_type(arg['type'], info, key_source, allow_array=True)
+        check_type_name_or_array(arg['type'], info, key_source)
+
+
+def check_type_name_or_implicit(value: Optional[object],
+                                info: QAPISourceInfo, source: str,
+                                parent_name: Optional[str]) -> None:
+    if value is None or isinstance(value, str):
+        return
+
+    check_type_implicit(value, info, source, parent_name)
+
+
+def check_features(features: Optional[object],
+                   info: QAPISourceInfo) -> None:
+    """
+    Normalize and validate the ``features`` member.
+
+    ``features`` may be a ``list`` of either ``str`` or ``dict``.
+    Any ``str`` element will be normalized to ``{'name': element}``.
+
+    :forms:
+      :sugared: ``List[Union[str, Feature]]``
+      :canonical: ``List[Feature]``
 
+    :param features: The features member value to validate.
+    :param info: QAPI schema source file information.
 
-def check_features(features, info):
+    :raise QAPISemError: When ``features`` fails validation.
+    :return: None, ``features`` is normalized in-place as needed.
+    """
     if features is None:
         return
     if not isinstance(features, list):
         raise QAPISemError(info, "'features' must be an array")
     features[:] = [f if isinstance(f, dict) else {'name': f}
                    for f in features]
-    for f in features:
+    for feat in features:
         source = "'features' member"
-        assert isinstance(f, dict)
-        check_keys(f, info, source, ['name'], ['if'])
-        check_name_is_str(f['name'], info, source)
-        source = "%s '%s'" % (source, f['name'])
-        check_name_str(f['name'], info, source)
-        check_if(f, info, source)
+        assert isinstance(feat, dict)
+        check_keys(feat, info, source, ['name'], ['if'])
+        check_name_is_str(feat['name'], info, source)
+        source = "%s '%s'" % (source, feat['name'])
+        check_name_lower(feat['name'], info, source)
+        check_if(feat, info, source)
 
 
-def check_enum(expr, info):
+def check_enum(expr: QAPIExpression) -> None:
+    """
+    Normalize and validate this expression as an ``enum`` definition.
+
+    :param expr: The expression to validate.
+
+    :raise QAPISemError: When ``expr`` is not a valid ``enum``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
     name = expr['enum']
     members = expr['data']
     prefix = expr.get('prefix')
+    info = expr.info
 
     if not isinstance(members, list):
         raise QAPISemError(info, "'data' must be an array")
     if prefix is not None and not isinstance(prefix, str):
         raise QAPISemError(info, "'prefix' must be a string")
 
-    permit_upper = name in info.pragma.name_case_whitelist
+    permissive = name in info.pragma.member_name_exceptions
 
     members[:] = [m if isinstance(m, dict) else {'name': m}
                   for m in members]
     for member in members:
         source = "'data' member"
-        check_keys(member, info, source, ['name'], ['if'])
-        check_name_is_str(member['name'], info, source)
-        source = "%s '%s'" % (source, member['name'])
-        check_name_str(member['name'], info, source,
-                       enum_member=True, permit_upper=permit_upper)
+        check_keys(member, info, source, ['name'], ['if', 'features'])
+        member_name = member['name']
+        check_name_is_str(member_name, info, source)
+        source = "%s '%s'" % (source, member_name)
+        # Enum members may start with a digit
+        if member_name[0].isdigit():
+            member_name = 'd' + member_name  # Hack: hide the digit
+        check_name_lower(member_name, info, source,
+                         permit_upper=permissive,
+                         permit_underscore=permissive)
         check_if(member, info, source)
+        check_features(member.get('features'), info)
+
 
+def check_struct(expr: QAPIExpression) -> None:
+    """
+    Normalize and validate this expression as a ``struct`` definition.
 
-def check_struct(expr, info):
-    name = expr['struct']
+    :param expr: The expression to validate.
+
+    :raise QAPISemError: When ``expr`` is not a valid ``struct``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
+    name = cast(str, expr['struct'])  # Checked in check_exprs
     members = expr['data']
 
-    check_type(members, info, "'data'", allow_dict=name)
-    check_type(expr.get('base'), info, "'base'")
+    check_type_implicit(members, expr.info, "'data'", name)
+    check_type_name(expr.get('base'), expr.info, "'base'")
+
+
+def check_union(expr: QAPIExpression) -> None:
+    """
+    Normalize and validate this expression as a ``union`` definition.
 
+    :param expr: The expression to validate.
 
-def check_union(expr, info):
-    name = expr['union']
-    base = expr.get('base')
-    discriminator = expr.get('discriminator')
+    :raise QAPISemError: when ``expr`` is not a valid ``union``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
+    name = cast(str, expr['union'])  # Checked in check_exprs
+    base = expr['base']
+    discriminator = expr['discriminator']
     members = expr['data']
+    info = expr.info
 
-    if discriminator is None:   # simple union
-        if base is not None:
-            raise QAPISemError(info, "'base' requires 'discriminator'")
-    else:                       # flat union
-        check_type(base, info, "'base'", allow_dict=name)
-        if not base:
-            raise QAPISemError(info, "'discriminator' requires 'base'")
-        check_name_is_str(discriminator, info, "'discriminator'")
+    check_type_name_or_implicit(base, info, "'base'", name)
+    check_name_is_str(discriminator, info, "'discriminator'")
+
+    if not isinstance(members, dict):
+        raise QAPISemError(info, "'data' must be an object")
 
     for (key, value) in members.items():
         source = "'data' member '%s'" % key
-        check_name_str(key, info, source)
         check_keys(value, info, source, ['type'], ['if'])
         check_if(value, info, source)
-        check_type(value['type'], info, source, allow_array=not base)
+        check_type_name(value['type'], info, source)
+
+
+def check_alternate(expr: QAPIExpression) -> None:
+    """
+    Normalize and validate this expression as an ``alternate`` definition.
 
+    :param expr: The expression to validate.
 
-def check_alternate(expr, info):
+    :raise QAPISemError: When ``expr`` is not a valid ``alternate``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
     members = expr['data']
+    info = expr.info
 
     if not members:
         raise QAPISemError(info, "'data' must not be empty")
+
+    if not isinstance(members, dict):
+        raise QAPISemError(info, "'data' must be an object")
+
     for (key, value) in members.items():
         source = "'data' member '%s'" % key
-        check_name_str(key, info, source)
+        check_name_lower(key, info, source)
         check_keys(value, info, source, ['type'], ['if'])
         check_if(value, info, source)
-        check_type(value['type'], info, source)
+        check_type_name_or_array(value['type'], info, source)
+
 
+def check_command(expr: QAPIExpression) -> None:
+    """
+    Normalize and validate this expression as a ``command`` definition.
 
-def check_command(expr, info):
+    :param expr: The expression to validate.
+
+    :raise QAPISemError: When ``expr`` is not a valid ``command``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
     args = expr.get('data')
     rets = expr.get('returns')
     boxed = expr.get('boxed', False)
 
-    if boxed and args is None:
-        raise QAPISemError(info, "'boxed': true requires 'data'")
-    check_type(args, info, "'data'", allow_dict=not boxed)
-    check_type(rets, info, "'returns'", allow_array=True)
+    if boxed:
+        if args is None:
+            raise QAPISemError(expr.info, "'boxed': true requires 'data'")
+        check_type_name(args, expr.info, "'data'")
+    else:
+        check_type_name_or_implicit(args, expr.info, "'data'", None)
+    check_type_name_or_array(rets, expr.info, "'returns'")
+
 
+def check_event(expr: QAPIExpression) -> None:
+    """
+    Normalize and validate this expression as an ``event`` definition.
 
-def check_event(expr, info):
+    :param expr: The expression to validate.
+
+    :raise QAPISemError: When ``expr`` is not a valid ``event``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
     args = expr.get('data')
     boxed = expr.get('boxed', False)
 
-    if boxed and args is None:
-        raise QAPISemError(info, "'boxed': true requires 'data'")
-    check_type(args, info, "'data'", allow_dict=not boxed)
+    if boxed:
+        if args is None:
+            raise QAPISemError(expr.info, "'boxed': true requires 'data'")
+        check_type_name(args, expr.info, "'data'")
+    else:
+        check_type_name_or_implicit(args, expr.info, "'data'", None)
+
 
+def check_exprs(exprs: List[QAPIExpression]) -> List[QAPIExpression]:
+    """
+    Validate and normalize a list of parsed QAPI schema expressions.
 
-def check_exprs(exprs):
-    for expr_elem in exprs:
-        expr = expr_elem['expr']
-        info = expr_elem['info']
-        doc = expr_elem.get('doc')
+    This function accepts a list of expressions and metadata as returned
+    by the parser.  It destructively normalizes the expressions in-place.
+
+    :param exprs: The list of expressions to normalize and validate.
+
+    :raise QAPISemError: When any expression fails validation.
+    :return: The same list of expressions (now modified).
+    """
+    for expr in exprs:
+        info = expr.info
+        doc = expr.doc
 
         if 'include' in expr:
             continue
 
-        if 'enum' in expr:
-            meta = 'enum'
-        elif 'union' in expr:
-            meta = 'union'
-        elif 'alternate' in expr:
-            meta = 'alternate'
-        elif 'struct' in expr:
-            meta = 'struct'
-        elif 'command' in expr:
-            meta = 'command'
-        elif 'event' in expr:
-            meta = 'event'
-        else:
-            raise QAPISemError(info, "expression is missing metatype")
+        metas = expr.keys() & {'enum', 'struct', 'union', 'alternate',
+                               'command', 'event'}
+        if len(metas) != 1:
+            raise QAPISemError(
+                info,
+                "expression must have exactly one key"
+                " 'enum', 'struct', 'union', 'alternate',"
+                " 'command', 'event'")
+        meta = metas.pop()
 
-        name = expr[meta]
-        check_name_is_str(name, info, "'%s'" % meta)
+        check_name_is_str(expr[meta], info, "'%s'" % meta)
+        name = cast(str, expr[meta])
         info.set_defn(meta, name)
         check_defn_name_str(name, info, meta)
 
@@ -327,24 +638,24 @@ def check_exprs(exprs):
         if meta == 'enum':
             check_keys(expr, info, meta,
                        ['enum', 'data'], ['if', 'features', 'prefix'])
-            check_enum(expr, info)
+            check_enum(expr)
         elif meta == 'union':
             check_keys(expr, info, meta,
-                       ['union', 'data'],
-                       ['base', 'discriminator', 'if', 'features'])
+                       ['union', 'base', 'discriminator', 'data'],
+                       ['if', 'features'])
             normalize_members(expr.get('base'))
             normalize_members(expr['data'])
-            check_union(expr, info)
+            check_union(expr)
         elif meta == 'alternate':
             check_keys(expr, info, meta,
                        ['alternate', 'data'], ['if', 'features'])
             normalize_members(expr['data'])
-            check_alternate(expr, info)
+            check_alternate(expr)
         elif meta == 'struct':
             check_keys(expr, info, meta,
                        ['struct', 'data'], ['base', 'if', 'features'])
             normalize_members(expr['data'])
-            check_struct(expr, info)
+            check_struct(expr)
         elif meta == 'command':
             check_keys(expr, info, meta,
                        ['command'],
@@ -352,17 +663,17 @@ def check_exprs(exprs):
                         'gen', 'success-response', 'allow-oob',
                         'allow-preconfig', 'coroutine'])
             normalize_members(expr.get('data'))
-            check_command(expr, info)
+            check_command(expr)
         elif meta == 'event':
             check_keys(expr, info, meta,
                        ['event'], ['data', 'boxed', 'if', 'features'])
             normalize_members(expr.get('data'))
-            check_event(expr, info)
+            check_event(expr)
         else:
             assert False, 'unexpected meta type'
 
         check_if(expr, info, meta)
         check_features(expr.get('features'), info)
-        check_flags(expr, info)
+        check_flags(expr)
 
     return exprs
index b40f18eee3cdc7f199a67e91ca4718e3bec93579..5412716617aad5ad4e4685e2aedd5431cb3636f0 100644 (file)
 from contextlib import contextmanager
 import os
 import re
+import sys
 from typing import (
     Dict,
     Iterator,
-    List,
     Optional,
+    Sequence,
     Tuple,
 )
 
 from .common import (
     c_fname,
     c_name,
-    gen_endif,
-    gen_if,
     guardend,
     guardstart,
     mcgen,
 )
-from .schema import QAPISchemaObjectType, QAPISchemaVisitor
+from .schema import (
+    QAPISchemaFeature,
+    QAPISchemaIfCond,
+    QAPISchemaModule,
+    QAPISchemaObjectType,
+    QAPISchemaVisitor,
+)
 from .source import QAPISourceInfo
 
 
+def gen_special_features(features: Sequence[QAPISchemaFeature]) -> str:
+    special_features = [f"1u << QAPI_{feat.name.upper()}"
+                        for feat in features if feat.is_special()]
+    return ' | '.join(special_features) or '0'
+
+
 class QAPIGen:
-    def __init__(self, fname: Optional[str]):
+    def __init__(self, fname: str):
         self.fname = fname
         self._preamble = ''
         self._body = ''
@@ -70,7 +81,7 @@ class QAPIGen:
         if odir:
             os.makedirs(odir, exist_ok=True)
 
-        # use os.open for O_CREAT to create and read a non-existant file
+        # use os.open for O_CREAT to create and read a non-existent file
         fd = os.open(pathname, os.O_RDWR | os.O_CREAT, 0o666)
         with os.fdopen(fd, 'r+', encoding='utf-8') as fp:
             text = self.get_content()
@@ -81,7 +92,7 @@ class QAPIGen:
                 fp.write(text)
 
 
-def _wrap_ifcond(ifcond: List[str], before: str, after: str) -> str:
+def _wrap_ifcond(ifcond: QAPISchemaIfCond, before: str, after: str) -> str:
     if before == after:
         return after   # suppress empty #if ... #endif
 
@@ -91,9 +102,9 @@ def _wrap_ifcond(ifcond: List[str], before: str, after: str) -> str:
     if added[0] == '\n':
         out += '\n'
         added = added[1:]
-    out += gen_if(ifcond)
+    out += ifcond.gen_if()
     out += added
-    out += gen_endif(ifcond)
+    out += ifcond.gen_endif()
     return out
 
 
@@ -109,9 +120,10 @@ def build_params(arg_type: Optional[QAPISchemaObjectType],
     elif arg_type:
         assert not arg_type.variants
         for memb in arg_type.members:
+            assert not memb.ifcond.is_present()
             ret += sep
             sep = ', '
-            if memb.optional:
+            if memb.need_has():
                 ret += 'bool has_%s, ' % c_name(memb.name)
             ret += '%s %s' % (memb.type.c_param_type(),
                               c_name(memb.name))
@@ -121,24 +133,21 @@ def build_params(arg_type: Optional[QAPISchemaObjectType],
 
 
 class QAPIGenCCode(QAPIGen):
-    def __init__(self, fname: Optional[str]):
+    def __init__(self, fname: str):
         super().__init__(fname)
-        self._start_if: Optional[Tuple[List[str], str, str]] = None
+        self._start_if: Optional[Tuple[QAPISchemaIfCond, str, str]] = None
 
-    def start_if(self, ifcond: List[str]) -> None:
+    def start_if(self, ifcond: QAPISchemaIfCond) -> None:
         assert self._start_if is None
         self._start_if = (ifcond, self._body, self._preamble)
 
     def end_if(self) -> None:
-        assert self._start_if
-        self._wrap_ifcond()
-        self._start_if = None
-
-    def _wrap_ifcond(self) -> None:
+        assert self._start_if is not None
         self._body = _wrap_ifcond(self._start_if[0],
                                   self._start_if[1], self._body)
         self._preamble = _wrap_ifcond(self._start_if[0],
                                       self._start_if[2], self._preamble)
+        self._start_if = None
 
     def get_content(self) -> str:
         assert self._start_if is None
@@ -154,7 +163,7 @@ class QAPIGenC(QAPIGenCCode):
 
     def _top(self) -> str:
         return mcgen('''
-/* AUTOMATICALLY GENERATED, DO NOT MODIFY */
+/* AUTOMATICALLY GENERATED by %(tool)s DO NOT MODIFY */
 
 /*
 %(blurb)s
@@ -166,6 +175,7 @@ class QAPIGenC(QAPIGenCCode):
  */
 
 ''',
+                     tool=os.path.basename(sys.argv[0]),
                      blurb=self._blurb, copyright=self._copyright)
 
     def _bottom(self) -> str:
@@ -185,12 +195,20 @@ class QAPIGenH(QAPIGenC):
         return guardend(self.fname)
 
 
+class QAPIGenTrace(QAPIGen):
+    def _top(self) -> str:
+        return (super()._top()
+                + '# AUTOMATICALLY GENERATED by '
+                + os.path.basename(sys.argv[0])
+                + ', DO NOT MODIFY\n\n')
+
+
 @contextmanager
-def ifcontext(ifcond: List[str], *args: QAPIGenCCode) -> Iterator[None]:
+def ifcontext(ifcond: QAPISchemaIfCond, *args: QAPIGenCCode) -> Iterator[None]:
     """
     A with-statement context manager that wraps with `start_if()` / `end_if()`.
 
-    :param ifcond: A list of conditionals, passed to `start_if()`.
+    :param ifcond: A sequence of conditionals, passed to `start_if()`.
     :param args: any number of `QAPIGenCCode`.
 
     Example::
@@ -237,91 +255,111 @@ class QAPISchemaModularCVisitor(QAPISchemaVisitor):
                  what: str,
                  user_blurb: str,
                  builtin_blurb: Optional[str],
-                 pydoc: str):
+                 pydoc: str,
+                 gen_tracing: bool = False):
         self._prefix = prefix
         self._what = what
         self._user_blurb = user_blurb
         self._builtin_blurb = builtin_blurb
         self._pydoc = pydoc
-        self._genc: Optional[QAPIGenC] = None
-        self._genh: Optional[QAPIGenH] = None
-        self._module: Dict[Optional[str], Tuple[QAPIGenC, QAPIGenH]] = {}
+        self._current_module: Optional[str] = None
+        self._module: Dict[str, Tuple[QAPIGenC, QAPIGenH,
+                                      Optional[QAPIGenTrace]]] = {}
         self._main_module: Optional[str] = None
+        self._gen_tracing = gen_tracing
+
+    @property
+    def _genc(self) -> QAPIGenC:
+        assert self._current_module is not None
+        return self._module[self._current_module][0]
+
+    @property
+    def _genh(self) -> QAPIGenH:
+        assert self._current_module is not None
+        return self._module[self._current_module][1]
+
+    @property
+    def _gen_trace_events(self) -> QAPIGenTrace:
+        assert self._gen_tracing
+        assert self._current_module is not None
+        gent = self._module[self._current_module][2]
+        assert gent is not None
+        return gent
 
     @staticmethod
-    def _is_user_module(name: Optional[str]) -> bool:
-        return bool(name and not name.startswith('./'))
-
-    @staticmethod
-    def _is_builtin_module(name: Optional[str]) -> bool:
-        return not name
-
-    def _module_dirname(self, name: Optional[str]) -> str:
-        if self._is_user_module(name):
+    def _module_dirname(name: str) -> str:
+        if QAPISchemaModule.is_user_module(name):
             return os.path.dirname(name)
         return ''
 
-    def _module_basename(self, what: str, name: Optional[str]) -> str:
-        ret = '' if self._is_builtin_module(name) else self._prefix
-        if self._is_user_module(name):
+    def _module_basename(self, what: str, name: str) -> str:
+        ret = '' if QAPISchemaModule.is_builtin_module(name) else self._prefix
+        if QAPISchemaModule.is_user_module(name):
             basename = os.path.basename(name)
             ret += what
             if name != self._main_module:
                 ret += '-' + os.path.splitext(basename)[0]
         else:
-            name = name[2:] if name else 'builtin'
-            ret += re.sub(r'-', '-' + name + '-', what)
+            assert QAPISchemaModule.is_system_module(name)
+            ret += re.sub(r'-', '-' + name[2:] + '-', what)
         return ret
 
-    def _module_filename(self, what: str, name: Optional[str]) -> str:
+    def _module_filename(self, what: str, name: str) -> str:
         return os.path.join(self._module_dirname(name),
                             self._module_basename(what, name))
 
-    def _add_module(self, name: Optional[str], blurb: str) -> None:
+    def _add_module(self, name: str, blurb: str) -> None:
+        if QAPISchemaModule.is_user_module(name):
+            if self._main_module is None:
+                self._main_module = name
         basename = self._module_filename(self._what, name)
         genc = QAPIGenC(basename + '.c', blurb, self._pydoc)
         genh = QAPIGenH(basename + '.h', blurb, self._pydoc)
-        self._module[name] = (genc, genh)
-        self._genc, self._genh = self._module[name]
 
-    def _add_user_module(self, name: str, blurb: str) -> None:
-        assert self._is_user_module(name)
-        if self._main_module is None:
-            self._main_module = name
-        self._add_module(name, blurb)
+        gent: Optional[QAPIGenTrace] = None
+        if self._gen_tracing:
+            gent = QAPIGenTrace(basename + '.trace-events')
+
+        self._module[name] = (genc, genh, gent)
+        self._current_module = name
 
-    def _add_system_module(self, name: Optional[str], blurb: str) -> None:
-        self._add_module(name and './' + name, blurb)
+    @contextmanager
+    def _temp_module(self, name: str) -> Iterator[None]:
+        old_module = self._current_module
+        self._current_module = name
+        yield
+        self._current_module = old_module
 
     def write(self, output_dir: str, opt_builtins: bool = False) -> None:
-        for name in self._module:
-            if self._is_builtin_module(name) and not opt_builtins:
+        for name, (genc, genh, gent) in self._module.items():
+            if QAPISchemaModule.is_builtin_module(name) and not opt_builtins:
                 continue
-            (genc, genh) = self._module[name]
             genc.write(output_dir)
             genh.write(output_dir)
+            if gent is not None:
+                gent.write(output_dir)
 
-    def _begin_system_module(self, name: None) -> None:
+    def _begin_builtin_module(self) -> None:
         pass
 
     def _begin_user_module(self, name: str) -> None:
         pass
 
-    def visit_module(self, name: Optional[str]) -> None:
-        if name is None:
+    def visit_module(self, name: str) -> None:
+        if QAPISchemaModule.is_builtin_module(name):
             if self._builtin_blurb:
-                self._add_system_module(None, self._builtin_blurb)
-                self._begin_system_module(name)
+                self._add_module(name, self._builtin_blurb)
+                self._begin_builtin_module()
             else:
                 # The built-in module has not been created.  No code may
                 # be generated.
-                self._genc = None
-                self._genh = None
+                self._current_module = None
         else:
-            self._add_user_module(name, self._user_blurb)
+            assert QAPISchemaModule.is_user_module(name)
+            self._add_module(name, self._user_blurb)
             self._begin_user_module(name)
 
-    def visit_include(self, name: str, info: QAPISourceInfo) -> None:
+    def visit_include(self, name: str, info: Optional[QAPISourceInfo]) -> None:
         relname = os.path.relpath(self._module_filename(self._what, name),
                                   os.path.dirname(self._genh.fname))
         self._genh.preamble_add(mcgen('''
index fafec94e0229d5b2d504784d560d1a5465ebf40d..67c7d89aae00cba7eade35abd21129c41038b3f6 100644 (file)
 """
 QAPI introspection generator
 
-Copyright (C) 2015-2018 Red Hat, Inc.
+Copyright (C) 2015-2021 Red Hat, Inc.
 
 Authors:
  Markus Armbruster <armbru@redhat.com>
+ John Snow <jsnow@redhat.com>
 
 This work is licensed under the terms of the GNU GPL, version 2.
 See the COPYING file in the top-level directory.
 """
 
-from .common import (
-    c_name,
-    gen_endif,
-    gen_if,
-    mcgen,
+from typing import (
+    Any,
+    Dict,
+    Generic,
+    List,
+    Optional,
+    Sequence,
+    TypeVar,
+    Union,
 )
+
+from .common import c_name, mcgen
 from .gen import QAPISchemaMonolithicCVisitor
 from .schema import (
+    QAPISchema,
     QAPISchemaArrayType,
     QAPISchemaBuiltinType,
+    QAPISchemaEntity,
+    QAPISchemaEnumMember,
+    QAPISchemaFeature,
+    QAPISchemaIfCond,
+    QAPISchemaObjectType,
+    QAPISchemaObjectTypeMember,
     QAPISchemaType,
+    QAPISchemaVariant,
+    QAPISchemaVariants,
 )
-
-
-def _make_tree(obj, ifcond, features, extra=None):
-    if extra is None:
-        extra = {}
-    if ifcond:
-        extra['if'] = ifcond
-    if features:
-        obj['features'] = [(f.name, {'if': f.ifcond}) for f in features]
-    if extra:
-        return (obj, extra)
-    return obj
-
-
-def _tree_to_qlit(obj, level=0, suppress_first_indent=False):
-
-    def indent(level):
+from .source import QAPISourceInfo
+
+
+# This module constructs a tree data structure that is used to
+# generate the introspection information for QEMU. It is shaped
+# like a JSON value.
+#
+# A complexity over JSON is that our values may or may not be annotated.
+#
+# Un-annotated values may be:
+#     Scalar: str, bool, None.
+#     Non-scalar: List, Dict
+# _value = Union[str, bool, None, Dict[str, JSONValue], List[JSONValue]]
+#
+# With optional annotations, the type of all values is:
+# JSONValue = Union[_Value, Annotated[_Value]]
+#
+# Sadly, mypy does not support recursive types; so the _Stub alias is used to
+# mark the imprecision in the type model where we'd otherwise use JSONValue.
+_Stub = Any
+_Scalar = Union[str, bool, None]
+_NonScalar = Union[Dict[str, _Stub], List[_Stub]]
+_Value = Union[_Scalar, _NonScalar]
+JSONValue = Union[_Value, 'Annotated[_Value]']
+
+# These types are based on structures defined in QEMU's schema, so we
+# lack precise types for them here. Python 3.6 does not offer
+# TypedDict constructs, so they are broadly typed here as simple
+# Python Dicts.
+SchemaInfo = Dict[str, object]
+SchemaInfoEnumMember = Dict[str, object]
+SchemaInfoObject = Dict[str, object]
+SchemaInfoObjectVariant = Dict[str, object]
+SchemaInfoObjectMember = Dict[str, object]
+SchemaInfoCommand = Dict[str, object]
+
+
+_ValueT = TypeVar('_ValueT', bound=_Value)
+
+
+class Annotated(Generic[_ValueT]):
+    """
+    Annotated generally contains a SchemaInfo-like type (as a dict),
+    But it also used to wrap comments/ifconds around scalar leaf values,
+    for the benefit of features and enums.
+    """
+    # TODO: Remove after Python 3.7 adds @dataclass:
+    # pylint: disable=too-few-public-methods
+    def __init__(self, value: _ValueT, ifcond: QAPISchemaIfCond,
+                 comment: Optional[str] = None):
+        self.value = value
+        self.comment: Optional[str] = comment
+        self.ifcond = ifcond
+
+
+def _tree_to_qlit(obj: JSONValue,
+                  level: int = 0,
+                  dict_value: bool = False) -> str:
+    """
+    Convert the type tree into a QLIT C string, recursively.
+
+    :param obj: The value to convert.
+                This value may not be Annotated when dict_value is True.
+    :param level: The indentation level for this particular value.
+    :param dict_value: True when the value being processed belongs to a
+                       dict key; which suppresses the output indent.
+    """
+
+    def indent(level: int) -> str:
         return level * 4 * ' '
 
-    if isinstance(obj, tuple):
-        ifobj, extra = obj
-        ifcond = extra.get('if')
-        comment = extra.get('comment')
+    if isinstance(obj, Annotated):
+        # NB: _tree_to_qlit is called recursively on the values of a
+        # key:value pair; those values can't be decorated with
+        # comments or conditionals.
+        msg = "dict values cannot have attached comments or if-conditionals."
+        assert not dict_value, msg
+
         ret = ''
-        if comment:
-            ret += indent(level) + '/* %s */\n' % comment
-        if ifcond:
-            ret += gen_if(ifcond)
-        ret += _tree_to_qlit(ifobj, level)
-        if ifcond:
-            ret += '\n' + gen_endif(ifcond)
+        if obj.comment:
+            ret += indent(level) + f"/* {obj.comment} */\n"
+        if obj.ifcond.is_present():
+            ret += obj.ifcond.gen_if()
+        ret += _tree_to_qlit(obj.value, level)
+        if obj.ifcond.is_present():
+            ret += '\n' + obj.ifcond.gen_endif()
         return ret
 
     ret = ''
-    if not suppress_first_indent:
+    if not dict_value:
         ret += indent(level)
+
+    # Scalars:
     if obj is None:
         ret += 'QLIT_QNULL'
     elif isinstance(obj, str):
-        ret += 'QLIT_QSTR(' + to_c_string(obj) + ')'
+        ret += f"QLIT_QSTR({to_c_string(obj)})"
+    elif isinstance(obj, bool):
+        ret += f"QLIT_QBOOL({str(obj).lower()})"
+
+    # Non-scalars:
     elif isinstance(obj, list):
-        elts = [_tree_to_qlit(elt, level + 1).strip('\n')
-                for elt in obj]
-        elts.append(indent(level + 1) + "{}")
         ret += 'QLIT_QLIST(((QLitObject[]) {\n'
-        ret += '\n'.join(elts) + '\n'
+        for value in obj:
+            ret += _tree_to_qlit(value, level + 1).strip('\n') + '\n'
+        ret += indent(level + 1) + '{}\n'
         ret += indent(level) + '}))'
     elif isinstance(obj, dict):
-        elts = []
-        for key, value in sorted(obj.items()):
-            elts.append(indent(level + 1) + '{ %s, %s }' %
-                        (to_c_string(key),
-                         _tree_to_qlit(value, level + 1, True)))
-        elts.append(indent(level + 1) + '{}')
         ret += 'QLIT_QDICT(((QLitDictEntry[]) {\n'
-        ret += ',\n'.join(elts) + '\n'
+        for key, value in sorted(obj.items()):
+            ret += indent(level + 1) + "{{ {:s}, {:s} }},\n".format(
+                to_c_string(key),
+                _tree_to_qlit(value, level + 1, dict_value=True)
+            )
+        ret += indent(level + 1) + '{}\n'
         ret += indent(level) + '}))'
-    elif isinstance(obj, bool):
-        ret += 'QLIT_QBOOL(%s)' % ('true' if obj else 'false')
     else:
-        assert False                # not implemented
+        raise NotImplementedError(
+            f"type '{type(obj).__name__}' not implemented"
+        )
+
     if level > 0:
         ret += ','
     return ret
 
 
-def to_c_string(string):
+def to_c_string(string: str) -> str:
     return '"' + string.replace('\\', r'\\').replace('"', r'\"') + '"'
 
 
 class QAPISchemaGenIntrospectVisitor(QAPISchemaMonolithicCVisitor):
 
-    def __init__(self, prefix, unmask):
+    def __init__(self, prefix: str, unmask: bool):
         super().__init__(
             prefix, 'qapi-introspect',
             ' * QAPI/QMP schema introspection', __doc__)
         self._unmask = unmask
-        self._schema = None
-        self._trees = []
-        self._used_types = []
-        self._name_map = {}
+        self._schema: Optional[QAPISchema] = None
+        self._trees: List[Annotated[SchemaInfo]] = []
+        self._used_types: List[QAPISchemaType] = []
+        self._name_map: Dict[str, str] = {}
         self._genc.add(mcgen('''
 #include "qemu/osdep.h"
 #include "%(prefix)sqapi-introspect.h"
@@ -110,10 +186,10 @@ class QAPISchemaGenIntrospectVisitor(QAPISchemaMonolithicCVisitor):
 ''',
                              prefix=prefix))
 
-    def visit_begin(self, schema):
+    def visit_begin(self, schema: QAPISchema) -> None:
         self._schema = schema
 
-    def visit_end(self):
+    def visit_end(self) -> None:
         # visit the types that are actually used
         for typ in self._used_types:
             typ.visit(self)
@@ -135,18 +211,20 @@ const QLitObject %(c_name)s = %(c_string)s;
         self._used_types = []
         self._name_map = {}
 
-    def visit_needed(self, entity):
+    def visit_needed(self, entity: QAPISchemaEntity) -> bool:
         # Ignore types on first pass; visit_end() will pick up used types
         return not isinstance(entity, QAPISchemaType)
 
-    def _name(self, name):
+    def _name(self, name: str) -> str:
         if self._unmask:
             return name
         if name not in self._name_map:
             self._name_map[name] = '%d' % len(self._name_map)
         return self._name_map[name]
 
-    def _use_type(self, typ):
+    def _use_type(self, typ: QAPISchemaType) -> str:
+        assert self._schema is not None
+
         # Map the various integer types to plain int
         if typ.json_type() == 'int':
             typ = self._schema.lookup_type('int')
@@ -165,81 +243,148 @@ const QLitObject %(c_name)s = %(c_string)s;
             return '[' + self._use_type(typ.element_type) + ']'
         return self._name(typ.name)
 
-    def _gen_tree(self, name, mtype, obj, ifcond, features):
-        extra = None
+    @staticmethod
+    def _gen_features(features: Sequence[QAPISchemaFeature]
+                      ) -> List[Annotated[str]]:
+        return [Annotated(f.name, f.ifcond) for f in features]
+
+    def _gen_tree(self, name: str, mtype: str, obj: Dict[str, object],
+                  ifcond: QAPISchemaIfCond = QAPISchemaIfCond(),
+                  features: Sequence[QAPISchemaFeature] = ()) -> None:
+        """
+        Build and append a SchemaInfo object to self._trees.
+
+        :param name: The SchemaInfo's name.
+        :param mtype: The SchemaInfo's meta-type.
+        :param obj: Additional SchemaInfo members, as appropriate for
+                    the meta-type.
+        :param ifcond: Conditionals to apply to the SchemaInfo.
+        :param features: The SchemaInfo's features.
+                         Will be omitted from the output if empty.
+        """
+        comment: Optional[str] = None
         if mtype not in ('command', 'event', 'builtin', 'array'):
             if not self._unmask:
                 # Output a comment to make it easy to map masked names
                 # back to the source when reading the generated output.
-                extra = {'comment': '"%s" = %s' % (self._name(name), name)}
+                comment = f'"{self._name(name)}" = {name}'
             name = self._name(name)
         obj['name'] = name
         obj['meta-type'] = mtype
-        self._trees.append(_make_tree(obj, ifcond, features, extra))
-
-    def _gen_member(self, member):
-        obj = {'name': member.name, 'type': self._use_type(member.type)}
+        if features:
+            obj['features'] = self._gen_features(features)
+        self._trees.append(Annotated(obj, ifcond, comment))
+
+    def _gen_enum_member(self, member: QAPISchemaEnumMember
+                         ) -> Annotated[SchemaInfoEnumMember]:
+        obj: SchemaInfoEnumMember = {
+            'name': member.name,
+        }
+        if member.features:
+            obj['features'] = self._gen_features(member.features)
+        return Annotated(obj, member.ifcond)
+
+    def _gen_object_member(self, member: QAPISchemaObjectTypeMember
+                           ) -> Annotated[SchemaInfoObjectMember]:
+        obj: SchemaInfoObjectMember = {
+            'name': member.name,
+            'type': self._use_type(member.type)
+        }
         if member.optional:
             obj['default'] = None
-        return _make_tree(obj, member.ifcond, member.features)
-
-    def _gen_variants(self, tag_name, variants):
-        return {'tag': tag_name,
-                'variants': [self._gen_variant(v) for v in variants]}
-
-    def _gen_variant(self, variant):
-        obj = {'case': variant.name, 'type': self._use_type(variant.type)}
-        return _make_tree(obj, variant.ifcond, None)
-
-    def visit_builtin_type(self, name, info, json_type):
-        self._gen_tree(name, 'builtin', {'json-type': json_type}, [], None)
-
-    def visit_enum_type(self, name, info, ifcond, features, members, prefix):
-        self._gen_tree(name, 'enum',
-                       {'values': [_make_tree(m.name, m.ifcond, None)
-                                   for m in members]},
-                       ifcond, features)
-
-    def visit_array_type(self, name, info, ifcond, element_type):
+        if member.features:
+            obj['features'] = self._gen_features(member.features)
+        return Annotated(obj, member.ifcond)
+
+    def _gen_variant(self, variant: QAPISchemaVariant
+                     ) -> Annotated[SchemaInfoObjectVariant]:
+        obj: SchemaInfoObjectVariant = {
+            'case': variant.name,
+            'type': self._use_type(variant.type)
+        }
+        return Annotated(obj, variant.ifcond)
+
+    def visit_builtin_type(self, name: str, info: Optional[QAPISourceInfo],
+                           json_type: str) -> None:
+        self._gen_tree(name, 'builtin', {'json-type': json_type})
+
+    def visit_enum_type(self, name: str, info: Optional[QAPISourceInfo],
+                        ifcond: QAPISchemaIfCond,
+                        features: List[QAPISchemaFeature],
+                        members: List[QAPISchemaEnumMember],
+                        prefix: Optional[str]) -> None:
+        self._gen_tree(
+            name, 'enum',
+            {'members': [self._gen_enum_member(m) for m in members],
+             'values': [Annotated(m.name, m.ifcond) for m in members]},
+            ifcond, features
+        )
+
+    def visit_array_type(self, name: str, info: Optional[QAPISourceInfo],
+                         ifcond: QAPISchemaIfCond,
+                         element_type: QAPISchemaType) -> None:
         element = self._use_type(element_type)
         self._gen_tree('[' + element + ']', 'array', {'element-type': element},
-                       ifcond, None)
-
-    def visit_object_type_flat(self, name, info, ifcond, features,
-                               members, variants):
-        obj = {'members': [self._gen_member(m) for m in members]}
+                       ifcond)
+
+    def visit_object_type_flat(self, name: str, info: Optional[QAPISourceInfo],
+                               ifcond: QAPISchemaIfCond,
+                               features: List[QAPISchemaFeature],
+                               members: List[QAPISchemaObjectTypeMember],
+                               variants: Optional[QAPISchemaVariants]) -> None:
+        obj: SchemaInfoObject = {
+            'members': [self._gen_object_member(m) for m in members]
+        }
         if variants:
-            obj.update(self._gen_variants(variants.tag_member.name,
-                                          variants.variants))
-
+            obj['tag'] = variants.tag_member.name
+            obj['variants'] = [self._gen_variant(v) for v in variants.variants]
         self._gen_tree(name, 'object', obj, ifcond, features)
 
-    def visit_alternate_type(self, name, info, ifcond, features, variants):
-        self._gen_tree(name, 'alternate',
-                       {'members': [
-                           _make_tree({'type': self._use_type(m.type)},
-                                      m.ifcond, None)
-                           for m in variants.variants]},
-                       ifcond, features)
+    def visit_alternate_type(self, name: str, info: Optional[QAPISourceInfo],
+                             ifcond: QAPISchemaIfCond,
+                             features: List[QAPISchemaFeature],
+                             variants: QAPISchemaVariants) -> None:
+        self._gen_tree(
+            name, 'alternate',
+            {'members': [Annotated({'type': self._use_type(m.type)},
+                                   m.ifcond)
+                         for m in variants.variants]},
+            ifcond, features
+        )
+
+    def visit_command(self, name: str, info: Optional[QAPISourceInfo],
+                      ifcond: QAPISchemaIfCond,
+                      features: List[QAPISchemaFeature],
+                      arg_type: Optional[QAPISchemaObjectType],
+                      ret_type: Optional[QAPISchemaType], gen: bool,
+                      success_response: bool, boxed: bool, allow_oob: bool,
+                      allow_preconfig: bool, coroutine: bool) -> None:
+        assert self._schema is not None
 
-    def visit_command(self, name, info, ifcond, features,
-                      arg_type, ret_type, gen, success_response, boxed,
-                      allow_oob, allow_preconfig, coroutine):
         arg_type = arg_type or self._schema.the_empty_object_type
         ret_type = ret_type or self._schema.the_empty_object_type
-        obj = {'arg-type': self._use_type(arg_type),
-               'ret-type': self._use_type(ret_type)}
+        obj: SchemaInfoCommand = {
+            'arg-type': self._use_type(arg_type),
+            'ret-type': self._use_type(ret_type)
+        }
         if allow_oob:
             obj['allow-oob'] = allow_oob
         self._gen_tree(name, 'command', obj, ifcond, features)
 
-    def visit_event(self, name, info, ifcond, features, arg_type, boxed):
+    def visit_event(self, name: str, info: Optional[QAPISourceInfo],
+                    ifcond: QAPISchemaIfCond,
+                    features: List[QAPISchemaFeature],
+                    arg_type: Optional[QAPISchemaObjectType],
+                    boxed: bool) -> None:
+        assert self._schema is not None
+
         arg_type = arg_type or self._schema.the_empty_object_type
         self._gen_tree(name, 'event', {'arg-type': self._use_type(arg_type)},
                        ifcond, features)
 
 
-def gen_introspect(schema, output_dir, prefix, opt_unmask):
+def gen_introspect(schema: QAPISchema, output_dir: str, prefix: str,
+                   opt_unmask: bool) -> None:
     vis = QAPISchemaGenIntrospectVisitor(prefix, opt_unmask)
     schema.visit(vis)
     vis.write(output_dir)
index 42517210b80580937bcdd1f3737fac59a0aca868..316736b6a29429b33d7850411fdf66753aa110f8 100644 (file)
@@ -8,11 +8,11 @@ This is the main entry point for generating C code from the QAPI schema.
 """
 
 import argparse
-import re
 import sys
 from typing import Optional
 
 from .commands import gen_commands
+from .common import must_match
 from .error import QAPIError
 from .events import gen_events
 from .introspect import gen_introspect
@@ -22,7 +22,7 @@ from .visit import gen_visit
 
 
 def invalid_prefix_char(prefix: str) -> Optional[str]:
-    match = re.match(r'([A-Za-z_.-][A-Za-z0-9_.-]*)?', prefix)
+    match = must_match(r'([A-Za-z_.-][A-Za-z0-9_.-]*)?', prefix)
     if match.end() != len(prefix):
         return prefix[match.end()]
     return None
@@ -32,7 +32,8 @@ def generate(schema_file: str,
              output_dir: str,
              prefix: str,
              unmask: bool = False,
-             builtins: bool = False) -> None:
+             builtins: bool = False,
+             gen_tracing: bool = False) -> None:
     """
     Generate C code for the given schema into the target directory.
 
@@ -49,7 +50,7 @@ def generate(schema_file: str,
     schema = QAPISchema(schema_file)
     gen_types(schema, output_dir, prefix, builtins)
     gen_visit(schema, output_dir, prefix, builtins)
-    gen_commands(schema, output_dir, prefix)
+    gen_commands(schema, output_dir, prefix, gen_tracing)
     gen_events(schema, output_dir, prefix)
     gen_introspect(schema, output_dir, prefix, unmask)
 
@@ -74,6 +75,12 @@ def main() -> int:
     parser.add_argument('-u', '--unmask-non-abi-names', action='store_true',
                         dest='unmask',
                         help="expose non-ABI names in introspection")
+
+    # Option --suppress-tracing exists so we can avoid solving build system
+    # problems.  TODO Drop it when we no longer need it.
+    parser.add_argument('--suppress-tracing', action='store_true',
+                        help="suppress adding trace events to qmp marshals")
+
     parser.add_argument('schema', action='store')
     args = parser.parse_args()
 
@@ -88,8 +95,9 @@ def main() -> int:
                  output_dir=args.output_dir,
                  prefix=args.prefix,
                  unmask=args.unmask,
-                 builtins=args.builtins)
+                 builtins=args.builtins,
+                 gen_tracing=not args.suppress_tracing)
     except QAPIError as err:
-        print(f"{sys.argv[0]}: {str(err)}", file=sys.stderr)
+        print(err, file=sys.stderr)
         return 1
     return 0
index 74fc6c82153e35d6a3f53c6410660d7e289a78ad..56e0dfb132788ef517ddb576cab1981a990b051a 100644 (file)
@@ -1,28 +1,7 @@
 [mypy]
 strict = True
-strict_optional = False
 disallow_untyped_calls = False
-python_version = 3.6
-
-[mypy-qapi.error]
-disallow_untyped_defs = False
-disallow_incomplete_defs = False
-check_untyped_defs = False
-
-[mypy-qapi.expr]
-disallow_untyped_defs = False
-disallow_incomplete_defs = False
-check_untyped_defs = False
-
-[mypy-qapi.introspect]
-disallow_untyped_defs = False
-disallow_incomplete_defs = False
-check_untyped_defs = False
-
-[mypy-qapi.parser]
-disallow_untyped_defs = False
-disallow_incomplete_defs = False
-check_untyped_defs = False
+python_version = 3.8
 
 [mypy-qapi.schema]
 disallow_untyped_defs = False
index e7b9d670ad6c154b9ec9250cf4a7312fef603039..bf31018aef0ac43cf7995cda6362635e8c093c5b 100644 (file)
 from collections import OrderedDict
 import os
 import re
-
-from .error import QAPIParseError, QAPISemError
+from typing import (
+    TYPE_CHECKING,
+    Dict,
+    List,
+    Mapping,
+    Match,
+    Optional,
+    Set,
+    Union,
+)
+
+from .common import must_match
+from .error import QAPISemError, QAPISourceError
 from .source import QAPISourceInfo
 
 
+if TYPE_CHECKING:
+    # pylint: disable=cyclic-import
+    # TODO: Remove cycle. [schema -> expr -> parser -> schema]
+    from .schema import QAPISchemaFeature, QAPISchemaMember
+
+
+# Return value alias for get_expr().
+_ExprValue = Union[List[object], Dict[str, object], str, bool]
+
+
+class QAPIExpression(Dict[str, object]):
+    # pylint: disable=too-few-public-methods
+    def __init__(self,
+                 data: Mapping[str, object],
+                 info: QAPISourceInfo,
+                 doc: Optional['QAPIDoc'] = None):
+        super().__init__(data)
+        self.info = info
+        self.doc: Optional['QAPIDoc'] = doc
+
+
+class QAPIParseError(QAPISourceError):
+    """Error class for all QAPI schema parsing errors."""
+    def __init__(self, parser: 'QAPISchemaParser', msg: str):
+        col = 1
+        for ch in parser.src[parser.line_pos:parser.pos]:
+            if ch == '\t':
+                col = (col + 7) % 8 + 1
+            else:
+                col += 1
+        super().__init__(parser.info, msg, col)
+
+
 class QAPISchemaParser:
+    """
+    Parse QAPI schema source.
 
-    def __init__(self, fname, previously_included=None, incl_info=None):
-        previously_included = previously_included or set()
-        previously_included.add(os.path.abspath(fname))
+    Parse a JSON-esque schema file and process directives.  See
+    qapi-code-gen.txt section "Schema Syntax" for the exact syntax.
+    Grammatical validation is handled later by `expr.check_exprs()`.
 
-        try:
-            fp = open(fname, 'r', encoding='utf-8')
-            self.src = fp.read()
-        except IOError as e:
-            raise QAPISemError(incl_info or QAPISourceInfo(None, None, None),
-                               "can't read %s file '%s': %s"
-                               % ("include" if incl_info else "schema",
-                                  fname,
-                                  e.strerror))
+    :param fname: Source file name.
+    :param previously_included:
+        The absolute names of previously included source files,
+        if being invoked from another parser.
+    :param incl_info:
+       `QAPISourceInfo` belonging to the parent module.
+       ``None`` implies this is the root module.
 
-        if self.src == '' or self.src[-1] != '\n':
-            self.src += '\n'
+    :ivar exprs: Resulting parsed expressions.
+    :ivar docs: Resulting parsed documentation blocks.
+
+    :raise OSError: For problems reading the root schema document.
+    :raise QAPIError: For errors in the schema source.
+    """
+    def __init__(self,
+                 fname: str,
+                 previously_included: Optional[Set[str]] = None,
+                 incl_info: Optional[QAPISourceInfo] = None):
+        self._fname = fname
+        self._included = previously_included or set()
+        self._included.add(os.path.abspath(self._fname))
+        self.src = ''
+
+        # Lexer state (see `accept` for details):
+        self.info = QAPISourceInfo(self._fname, incl_info)
+        self.tok: Union[None, str] = None
+        self.pos = 0
         self.cursor = 0
-        self.info = QAPISourceInfo(fname, 1, incl_info)
+        self.val: Optional[Union[bool, str]] = None
         self.line_pos = 0
-        self.exprs = []
-        self.docs = []
-        self.accept()
+
+        # Parser output:
+        self.exprs: List[QAPIExpression] = []
+        self.docs: List[QAPIDoc] = []
+
+        # Showtime!
+        self._parse()
+
+    def _parse(self) -> None:
+        """
+        Parse the QAPI schema document.
+
+        :return: None.  Results are stored in ``.exprs`` and ``.docs``.
+        """
         cur_doc = None
 
+        # May raise OSError; allow the caller to handle it.
+        with open(self._fname, 'r', encoding='utf-8') as fp:
+            self.src = fp.read()
+        if self.src == '' or self.src[-1] != '\n':
+            self.src += '\n'
+
+        # Prime the lexer:
+        self.accept()
+
+        # Parse until done:
         while self.tok is not None:
             info = self.info
             if self.tok == '#':
@@ -56,7 +138,11 @@ class QAPISchemaParser:
                     self.docs.append(cur_doc)
                 continue
 
-            expr = self.get_expr(False)
+            expr = self.get_expr()
+            if not isinstance(expr, dict):
+                raise QAPISemError(
+                    info, "top-level expression must be an object")
+
             if 'include' in expr:
                 self.reject_expr_doc(cur_doc)
                 if len(expr) != 1:
@@ -65,12 +151,11 @@ class QAPISchemaParser:
                 if not isinstance(include, str):
                     raise QAPISemError(info,
                                        "value of 'include' must be a string")
-                incl_fname = os.path.join(os.path.dirname(fname),
+                incl_fname = os.path.join(os.path.dirname(self._fname),
                                           include)
-                self.exprs.append({'expr': {'include': incl_fname},
-                                   'info': info})
+                self._add_expr(OrderedDict({'include': incl_fname}), info)
                 exprs_include = self._include(include, info, incl_fname,
-                                              previously_included)
+                                              self._included)
                 if exprs_include:
                     self.exprs.extend(exprs_include.exprs)
                     self.docs.extend(exprs_include.docs)
@@ -85,29 +170,35 @@ class QAPISchemaParser:
                 for name, value in pragma.items():
                     self._pragma(name, value, info)
             else:
-                expr_elem = {'expr': expr,
-                             'info': info}
-                if cur_doc:
-                    if not cur_doc.symbol:
-                        raise QAPISemError(
-                            cur_doc.info, "definition documentation required")
-                    expr_elem['doc'] = cur_doc
-                self.exprs.append(expr_elem)
+                if cur_doc and not cur_doc.symbol:
+                    raise QAPISemError(
+                        cur_doc.info, "definition documentation required")
+                self._add_expr(expr, info, cur_doc)
             cur_doc = None
         self.reject_expr_doc(cur_doc)
 
+    def _add_expr(self, expr: Mapping[str, object],
+                  info: QAPISourceInfo,
+                  doc: Optional['QAPIDoc'] = None) -> None:
+        self.exprs.append(QAPIExpression(expr, info, doc))
+
     @staticmethod
-    def reject_expr_doc(doc):
+    def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
         if doc and doc.symbol:
             raise QAPISemError(
                 doc.info,
                 "documentation for '%s' is not followed by the definition"
                 % doc.symbol)
 
-    def _include(self, include, info, incl_fname, previously_included):
+    @staticmethod
+    def _include(include: str,
+                 info: QAPISourceInfo,
+                 incl_fname: str,
+                 previously_included: Set[str]
+                 ) -> Optional['QAPISchemaParser']:
         incl_abs_fname = os.path.abspath(incl_fname)
         # catch inclusion cycle
-        inf = info
+        inf: Optional[QAPISourceInfo] = info
         while inf:
             if incl_abs_fname == os.path.abspath(inf.fname):
                 raise QAPISemError(info, "inclusion loop for %s" % include)
@@ -117,32 +208,86 @@ class QAPISchemaParser:
         if incl_abs_fname in previously_included:
             return None
 
-        return QAPISchemaParser(incl_fname, previously_included, info)
+        try:
+            return QAPISchemaParser(incl_fname, previously_included, info)
+        except OSError as err:
+            raise QAPISemError(
+                info,
+                f"can't read include file '{incl_fname}': {err.strerror}"
+            ) from err
+
+    @staticmethod
+    def _pragma(name: str, value: object, info: QAPISourceInfo) -> None:
+
+        def check_list_str(name: str, value: object) -> List[str]:
+            if (not isinstance(value, list) or
+                    any(not isinstance(elt, str) for elt in value)):
+                raise QAPISemError(
+                    info,
+                    "pragma %s must be a list of strings" % name)
+            return value
+
+        pragma = info.pragma
 
-    def _pragma(self, name, value, info):
         if name == 'doc-required':
             if not isinstance(value, bool):
                 raise QAPISemError(info,
                                    "pragma 'doc-required' must be boolean")
-            info.pragma.doc_required = value
-        elif name == 'returns-whitelist':
-            if (not isinstance(value, list)
-                    or any([not isinstance(elt, str) for elt in value])):
-                raise QAPISemError(
-                    info,
-                    "pragma returns-whitelist must be a list of strings")
-            info.pragma.returns_whitelist = value
-        elif name == 'name-case-whitelist':
-            if (not isinstance(value, list)
-                    or any([not isinstance(elt, str) for elt in value])):
-                raise QAPISemError(
-                    info,
-                    "pragma name-case-whitelist must be a list of strings")
-            info.pragma.name_case_whitelist = value
+            pragma.doc_required = value
+        elif name == 'command-name-exceptions':
+            pragma.command_name_exceptions = check_list_str(name, value)
+        elif name == 'command-returns-exceptions':
+            pragma.command_returns_exceptions = check_list_str(name, value)
+        elif name == 'member-name-exceptions':
+            pragma.member_name_exceptions = check_list_str(name, value)
         else:
             raise QAPISemError(info, "unknown pragma '%s'" % name)
 
-    def accept(self, skip_comment=True):
+    def accept(self, skip_comment: bool = True) -> None:
+        """
+        Read and store the next token.
+
+        :param skip_comment:
+            When false, return COMMENT tokens ("#").
+            This is used when reading documentation blocks.
+
+        :return:
+            None.  Several instance attributes are updated instead:
+
+            - ``.tok`` represents the token type.  See below for values.
+            - ``.info`` describes the token's source location.
+            - ``.val`` is the token's value, if any.  See below.
+            - ``.pos`` is the buffer index of the first character of
+              the token.
+
+        * Single-character tokens:
+
+            These are "{", "}", ":", ",", "[", and "]".
+            ``.tok`` holds the single character and ``.val`` is None.
+
+        * Multi-character tokens:
+
+          * COMMENT:
+
+            This token is not normally returned by the lexer, but it can
+            be when ``skip_comment`` is False.  ``.tok`` is "#", and
+            ``.val`` is a string including all chars until end-of-line,
+            including the "#" itself.
+
+          * STRING:
+
+            ``.tok`` is "'", the single quote.  ``.val`` contains the
+            string, excluding the surrounding quotes.
+
+          * TRUE and FALSE:
+
+            ``.tok`` is either "t" or "f", ``.val`` will be the
+            corresponding bool value.
+
+          * EOF:
+
+            ``.tok`` and ``.val`` will both be None at EOF.
+        """
         while True:
             self.tok = self.src[self.cursor]
             self.pos = self.cursor
@@ -202,12 +347,12 @@ class QAPISchemaParser:
             elif not self.tok.isspace():
                 # Show up to next structural, whitespace or quote
                 # character
-                match = re.match('[^[\\]{}:,\\s\'"]+',
-                                 self.src[self.cursor-1:])
+                match = must_match('[^[\\]{}:,\\s\']+',
+                                   self.src[self.cursor-1:])
                 raise QAPIParseError(self, "stray '%s'" % match.group(0))
 
-    def get_members(self):
-        expr = OrderedDict()
+    def get_members(self) -> Dict[str, object]:
+        expr: Dict[str, object] = OrderedDict()
         if self.tok == '}':
             self.accept()
             return expr
@@ -215,13 +360,15 @@ class QAPISchemaParser:
             raise QAPIParseError(self, "expected string or '}'")
         while True:
             key = self.val
+            assert isinstance(key, str)  # Guaranteed by tok == "'"
+
             self.accept()
             if self.tok != ':':
                 raise QAPIParseError(self, "expected ':'")
             self.accept()
             if key in expr:
                 raise QAPIParseError(self, "duplicate key '%s'" % key)
-            expr[key] = self.get_expr(True)
+            expr[key] = self.get_expr()
             if self.tok == '}':
                 self.accept()
                 return expr
@@ -231,16 +378,16 @@ class QAPISchemaParser:
             if self.tok != "'":
                 raise QAPIParseError(self, "expected string")
 
-    def get_values(self):
-        expr = []
+    def get_values(self) -> List[object]:
+        expr: List[object] = []
         if self.tok == ']':
             self.accept()
             return expr
-        if self.tok not in "{['tfn":
+        if self.tok not in tuple("{['tf"):
             raise QAPIParseError(
-                self, "expected '{', '[', ']', string, boolean or 'null'")
+                self, "expected '{', '[', ']', string, or boolean")
         while True:
-            expr.append(self.get_expr(True))
+            expr.append(self.get_expr())
             if self.tok == ']':
                 self.accept()
                 return expr
@@ -248,24 +395,24 @@ class QAPISchemaParser:
                 raise QAPIParseError(self, "expected ',' or ']'")
             self.accept()
 
-    def get_expr(self, nested):
-        if self.tok != '{' and not nested:
-            raise QAPIParseError(self, "expected '{'")
+    def get_expr(self) -> _ExprValue:
+        expr: _ExprValue
         if self.tok == '{':
             self.accept()
             expr = self.get_members()
         elif self.tok == '[':
             self.accept()
             expr = self.get_values()
-        elif self.tok in "'tfn":
+        elif self.tok in tuple("'tf"):
+            assert isinstance(self.val, (str, bool))
             expr = self.val
             self.accept()
         else:
             raise QAPIParseError(
-                self, "expected '{', '[', string, boolean or 'null'")
+                self, "expected '{', '[', string, or boolean")
         return expr
 
-    def get_doc(self, info):
+    def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']:
         if self.val != '##':
             raise QAPIParseError(
                 self, "junk after '##' at start of documentation comment")
@@ -274,6 +421,7 @@ class QAPISchemaParser:
         cur_doc = QAPIDoc(self, info)
         self.accept(False)
         while self.tok == '#':
+            assert isinstance(self.val, str)
             if self.val.startswith('##'):
                 # End of doc comment
                 if self.val != '##':
@@ -319,63 +467,79 @@ class QAPIDoc:
     """
 
     class Section:
-        def __init__(self, parser, name=None, indent=0):
+        # pylint: disable=too-few-public-methods
+        def __init__(self, parser: QAPISchemaParser,
+                     name: Optional[str] = None):
             # parser, for error messages about indentation
             self._parser = parser
             # optional section name (argument/member or section name)
             self.name = name
+            # section text without section name
             self.text = ''
-            # the expected indent level of the text of this section
-            self._indent = indent
+            # indentation to strip (None means indeterminate)
+            self._indent = None if self.name else 0
+
+        def append(self, line: str) -> None:
+            line = line.rstrip()
 
-        def append(self, line):
-            # Strip leading spaces corresponding to the expected indent level
-            # Blank lines are always OK.
             if line:
-                indent = re.match(r'\s*', line).end()
-                if indent < self._indent:
+                indent = must_match(r'\s*', line).end()
+                if self._indent is None:
+                    # indeterminate indentation
+                    if self.text != '':
+                        # non-blank, non-first line determines indentation
+                        self._indent = indent
+                elif indent < self._indent:
                     raise QAPIParseError(
                         self._parser,
                         "unexpected de-indent (expected at least %d spaces)" %
                         self._indent)
                 line = line[self._indent:]
 
-            self.text += line.rstrip() + '\n'
+            self.text += line + '\n'
 
     class ArgSection(Section):
-        def __init__(self, parser, name, indent=0):
-            super().__init__(parser, name, indent)
-            self.member = None
+        def __init__(self, parser: QAPISchemaParser,
+                     name: str):
+            super().__init__(parser, name)
+            self.member: Optional['QAPISchemaMember'] = None
 
-        def connect(self, member):
+        def connect(self, member: 'QAPISchemaMember') -> None:
             self.member = member
 
-    def __init__(self, parser, info):
+    class NullSection(Section):
+        """
+        Immutable dummy section for use at the end of a doc block.
+        """
+        # pylint: disable=too-few-public-methods
+        def append(self, line: str) -> None:
+            assert False, "Text appended after end_comment() called."
+
+    def __init__(self, parser: QAPISchemaParser, info: QAPISourceInfo):
         # self._parser is used to report errors with QAPIParseError.  The
         # resulting error position depends on the state of the parser.
         # It happens to be the beginning of the comment.  More or less
         # servicable, but action at a distance.
         self._parser = parser
         self.info = info
-        self.symbol = None
+        self.symbol: Optional[str] = None
         self.body = QAPIDoc.Section(parser)
-        # dict mapping parameter name to ArgSection
-        self.args = OrderedDict()
-        self.features = OrderedDict()
-        # a list of Section
-        self.sections = []
+        # dicts mapping parameter/feature names to their ArgSection
+        self.args: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
+        self.features: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
+        self.sections: List[QAPIDoc.Section] = []
         # the current section
         self._section = self.body
         self._append_line = self._append_body_line
 
-    def has_section(self, name):
+    def has_section(self, name: str) -> bool:
         """Return True if we have a section with this name."""
         for i in self.sections:
             if i.name == name:
                 return True
         return False
 
-    def append(self, line):
+    def append(self, line: str) -> None:
         """
         Parse a comment line and add it to the documentation.
 
@@ -396,18 +560,18 @@ class QAPIDoc:
         line = line[1:]
         self._append_line(line)
 
-    def end_comment(self):
-        self._end_section()
+    def end_comment(self) -> None:
+        self._switch_section(QAPIDoc.NullSection(self._parser))
+
+    @staticmethod
+    def _match_at_name_colon(string: str) -> Optional[Match[str]]:
+        return re.match(r'@([^:]*): *', string)
 
     @staticmethod
-    def _is_section_tag(name):
-        return name in ('Returns:', 'Since:',
-                        # those are often singular or plural
-                        'Note:', 'Notes:',
-                        'Example:', 'Examples:',
-                        'TODO:')
-
-    def _append_body_line(self, line):
+    def _match_section_tag(string: str) -> Optional[Match[str]]:
+        return re.match(r'(Returns|Since|Notes?|Examples?|TODO): *', string)
+
+    def _append_body_line(self, line: str) -> None:
         """
         Process a line of documentation text in the body section.
 
@@ -421,24 +585,25 @@ class QAPIDoc:
 
         Else, append the line to the current section.
         """
-        name = line.split(' ', 1)[0]
         # FIXME not nice: things like '#  @foo:' and '# @foo: ' aren't
         # recognized, and get silently treated as ordinary text
         if not self.symbol and not self.body.text and line.startswith('@'):
             if not line.endswith(':'):
                 raise QAPIParseError(self._parser, "line should end with ':'")
             self.symbol = line[1:-1]
-            # FIXME invalid names other than the empty string aren't flagged
+            # Invalid names are not checked here, but the name provided MUST
+            # match the following definition, which *is* validated in expr.py.
             if not self.symbol:
-                raise QAPIParseError(self._parser, "invalid name")
+                raise QAPIParseError(
+                    self._parser, "name required after '@'")
         elif self.symbol:
             # This is a definition documentation block
-            if name.startswith('@') and name.endswith(':'):
+            if self._match_at_name_colon(line):
                 self._append_line = self._append_args_line
                 self._append_args_line(line)
             elif line == 'Features:':
                 self._append_line = self._append_features_line
-            elif self._is_section_tag(name):
+            elif self._match_section_tag(line):
                 self._append_line = self._append_various_line
                 self._append_various_line(line)
             else:
@@ -447,7 +612,7 @@ class QAPIDoc:
             # This is a free-form documentation block
             self._append_freeform(line)
 
-    def _append_args_line(self, line):
+    def _append_args_line(self, line: str) -> None:
         """
         Process a line of documentation text in an argument section.
 
@@ -459,25 +624,11 @@ class QAPIDoc:
         Else, append the line to the current section.
 
         """
-        name = line.split(' ', 1)[0]
-
-        if name.startswith('@') and name.endswith(':'):
-            # If line is "@arg:   first line of description", find
-            # the index of 'f', which is the indent we expect for any
-            # following lines.  We then remove the leading "@arg:"
-            # from line and replace it with spaces so that 'f' has the
-            # same index as it did in the original line and can be
-            # handled the same way we will handle following lines.
-            indent = re.match(r'@\S*:\s*', line).end()
-            line = line[indent:]
-            if not line:
-                # Line was just the "@arg:" header; following lines
-                # are not indented
-                indent = 0
-            else:
-                line = ' ' * indent + line
-            self._start_args_section(name[1:-1], indent)
-        elif self._is_section_tag(name):
+        match = self._match_at_name_colon(line)
+        if match:
+            line = line[match.end():]
+            self._start_args_section(match.group(1))
+        elif self._match_section_tag(line):
             self._append_line = self._append_various_line
             self._append_various_line(line)
             return
@@ -493,26 +644,12 @@ class QAPIDoc:
 
         self._append_freeform(line)
 
-    def _append_features_line(self, line):
-        name = line.split(' ', 1)[0]
-
-        if name.startswith('@') and name.endswith(':'):
-            # If line is "@arg:   first line of description", find
-            # the index of 'f', which is the indent we expect for any
-            # following lines.  We then remove the leading "@arg:"
-            # from line and replace it with spaces so that 'f' has the
-            # same index as it did in the original line and can be
-            # handled the same way we will handle following lines.
-            indent = re.match(r'@\S*:\s*', line).end()
-            line = line[indent:]
-            if not line:
-                # Line was just the "@arg:" header; following lines
-                # are not indented
-                indent = 0
-            else:
-                line = ' ' * indent + line
-            self._start_features_section(name[1:-1], indent)
-        elif self._is_section_tag(name):
+    def _append_features_line(self, line: str) -> None:
+        match = self._match_at_name_colon(line)
+        if match:
+            line = line[match.end():]
+            self._start_features_section(match.group(1))
+        elif self._match_section_tag(line):
             self._append_line = self._append_various_line
             self._append_various_line(line)
             return
@@ -525,7 +662,7 @@ class QAPIDoc:
 
         self._append_freeform(line)
 
-    def _append_various_line(self, line):
+    def _append_various_line(self, line: str) -> None:
         """
         Process a line of documentation text in an additional section.
 
@@ -536,32 +673,22 @@ class QAPIDoc:
 
         Else, append the line to the current section.
         """
-        name = line.split(' ', 1)[0]
-
-        if name.startswith('@') and name.endswith(':'):
+        match = self._match_at_name_colon(line)
+        if match:
             raise QAPIParseError(self._parser,
-                                 "'%s' can't follow '%s' section"
-                                 % (name, self.sections[0].name))
-        if self._is_section_tag(name):
-            # If line is "Section:   first line of description", find
-            # the index of 'f', which is the indent we expect for any
-            # following lines.  We then remove the leading "Section:"
-            # from line and replace it with spaces so that 'f' has the
-            # same index as it did in the original line and can be
-            # handled the same way we will handle following lines.
-            indent = re.match(r'\S*:\s*', line).end()
-            line = line[indent:]
-            if not line:
-                # Line was just the "Section:" header; following lines
-                # are not indented
-                indent = 0
-            else:
-                line = ' ' * indent + line
-            self._start_section(name[:-1], indent)
+                                 "description of '@%s:' follows a section"
+                                 % match.group(1))
+        match = self._match_section_tag(line)
+        if match:
+            line = line[match.end():]
+            self._start_section(match.group(1))
 
         self._append_freeform(line)
 
-    def _start_symbol_section(self, symbols_dict, name, indent):
+    def _start_symbol_section(
+            self,
+            symbols_dict: Dict[str, 'QAPIDoc.ArgSection'],
+            name: str) -> None:
         # FIXME invalid names other than the empty string aren't flagged
         if not name:
             raise QAPIParseError(self._parser, "invalid parameter name")
@@ -569,34 +696,40 @@ class QAPIDoc:
             raise QAPIParseError(self._parser,
                                  "'%s' parameter name duplicated" % name)
         assert not self.sections
-        self._end_section()
-        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
-        symbols_dict[name] = self._section
+        new_section = QAPIDoc.ArgSection(self._parser, name)
+        self._switch_section(new_section)
+        symbols_dict[name] = new_section
 
-    def _start_args_section(self, name, indent):
-        self._start_symbol_section(self.args, name, indent)
+    def _start_args_section(self, name: str) -> None:
+        self._start_symbol_section(self.args, name)
 
-    def _start_features_section(self, name, indent):
-        self._start_symbol_section(self.features, name, indent)
+    def _start_features_section(self, name: str) -> None:
+        self._start_symbol_section(self.features, name)
 
-    def _start_section(self, name=None, indent=0):
+    def _start_section(self, name: Optional[str] = None) -> None:
         if name in ('Returns', 'Since') and self.has_section(name):
             raise QAPIParseError(self._parser,
                                  "duplicated '%s' section" % name)
-        self._end_section()
-        self._section = QAPIDoc.Section(self._parser, name, indent)
-        self.sections.append(self._section)
-
-    def _end_section(self):
-        if self._section:
-            text = self._section.text = self._section.text.strip()
-            if self._section.name and (not text or text.isspace()):
-                raise QAPIParseError(
-                    self._parser,
-                    "empty doc section '%s'" % self._section.name)
-            self._section = None
+        new_section = QAPIDoc.Section(self._parser, name)
+        self._switch_section(new_section)
+        self.sections.append(new_section)
+
+    def _switch_section(self, new_section: 'QAPIDoc.Section') -> None:
+        text = self._section.text = self._section.text.strip('\n')
+
+        # Only the 'body' section is allowed to have an empty body.
+        # All other sections, including anonymous ones, must have text.
+        if self._section != self.body and not text:
+            # We do not create anonymous sections unless there is
+            # something to put in them; this is a parser bug.
+            assert self._section.name
+            raise QAPIParseError(
+                self._parser,
+                "empty doc section '%s'" % self._section.name)
 
-    def _append_freeform(self, line):
+        self._section = new_section
+
+    def _append_freeform(self, line: str) -> None:
         match = re.match(r'(@\S+:)', line)
         if match:
             raise QAPIParseError(self._parser,
@@ -604,37 +737,41 @@ class QAPIDoc:
                                  % match.group(1))
         self._section.append(line)
 
-    def connect_member(self, member):
+    def connect_member(self, member: 'QAPISchemaMember') -> None:
         if member.name not in self.args:
             # Undocumented TODO outlaw
             self.args[member.name] = QAPIDoc.ArgSection(self._parser,
                                                         member.name)
         self.args[member.name].connect(member)
 
-    def connect_feature(self, feature):
+    def connect_feature(self, feature: 'QAPISchemaFeature') -> None:
         if feature.name not in self.features:
             raise QAPISemError(feature.info,
                                "feature '%s' lacks documentation"
                                % feature.name)
         self.features[feature.name].connect(feature)
 
-    def check_expr(self, expr):
+    def check_expr(self, expr: QAPIExpression) -> None:
         if self.has_section('Returns') and 'command' not in expr:
             raise QAPISemError(self.info,
                                "'Returns:' is only valid for commands")
 
-    def check(self):
+    def check(self) -> None:
 
-        def check_args_section(args, info, what):
+        def check_args_section(
+                args: Dict[str, QAPIDoc.ArgSection], what: str
+        ) -> None:
             bogus = [name for name, section in args.items()
                      if not section.member]
             if bogus:
                 raise QAPISemError(
                     self.info,
-                    "documented member%s '%s' %s not exist"
-                    % ("s" if len(bogus) > 1 else "",
-                       "', '".join(bogus),
-                       "do" if len(bogus) > 1 else "does"))
-
-        check_args_section(self.args, self.info, 'members')
-        check_args_section(self.features, self.info, 'features')
+                    "documented %s%s '%s' %s not exist" % (
+                        what,
+                        "s" if len(bogus) > 1 else "",
+                        "', '".join(bogus),
+                        "do" if len(bogus) > 1 else "does"
+                    ))
+
+        check_args_section(self.args, 'member')
+        check_args_section(self.features, 'feature')
index b9e077a1642d10ce6fb7b857b394b812c5ac6dfc..90546df534589f10e15c1e67353f40d79faf44df 100644 (file)
@@ -2,10 +2,7 @@
 
 # Add files or directories matching the regex patterns to the ignore list.
 # The regex matches against base names, not paths.
-ignore-patterns=error.py,
-                expr.py,
-                parser.py,
-                schema.py,
+ignore-patterns=schema.py,
 
 
 [MESSAGES CONTROL]
@@ -25,6 +22,8 @@ disable=fixme,
         too-many-branches,
         too-many-statements,
         too-many-instance-attributes,
+        consider-using-f-string,
+        useless-option-value,
 
 [REPORTS]
 
@@ -36,15 +35,12 @@ disable=fixme,
 
 [BASIC]
 
-# Good variable names which should always be accepted, separated by a comma.
-good-names=i,
-           j,
-           k,
-           ex,
-           Run,
-           _,
-           fp,  # fp = open(...)
-           fd,  # fd = os.open(...)
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted.
+#
+# Suppress complaints about short names.  PEP-8 is cool with them,
+# and so are we.
+good-names-rgxs=^[_a-z][_a-z0-9]?$
 
 [VARIABLES]
 
index 720449feee4d861431b8cdaddd36bf06e6b9f20f..6a836950a9abc17f194f7fd0595eca0f9632b1cb 100644 (file)
 from collections import OrderedDict
 import os
 import re
-from typing import Optional
-
-from .common import POINTER_SUFFIX, c_name
-from .error import QAPIError, QAPISemError
+from typing import List, Optional
+
+from .common import (
+    POINTER_SUFFIX,
+    c_name,
+    cgen_ifcond,
+    docgen_ifcond,
+    gen_endif,
+    gen_if,
+)
+from .error import QAPIError, QAPISemError, QAPISourceError
 from .expr import check_exprs
-from .parser import QAPISchemaParser
+from .parser import QAPIExpression, QAPISchemaParser
+
+
+class QAPISchemaIfCond:
+    def __init__(self, ifcond=None):
+        self.ifcond = ifcond
+
+    def _cgen(self):
+        return cgen_ifcond(self.ifcond)
+
+    def gen_if(self):
+        return gen_if(self._cgen())
+
+    def gen_endif(self):
+        return gen_endif(self._cgen())
+
+    def docgen(self):
+        return docgen_ifcond(self.ifcond)
+
+    def is_present(self):
+        return bool(self.ifcond)
 
 
 class QAPISchemaEntity:
     meta: Optional[str] = None
 
-    def __init__(self, name, info, doc, ifcond=None, features=None):
+    def __init__(self, name: str, info, doc, ifcond=None, features=None):
         assert name is None or isinstance(name, str)
         for f in features or []:
             assert isinstance(f, QAPISchemaFeature)
@@ -42,10 +69,16 @@ class QAPISchemaEntity:
         # such place).
         self.info = info
         self.doc = doc
-        self._ifcond = ifcond or []
+        self._ifcond = ifcond or QAPISchemaIfCond()
         self.features = features or []
         self._checked = False
 
+    def __repr__(self):
+        if self.name is None:
+            return "<%s at 0x%x>" % (type(self).__name__, id(self))
+        return "<%s:%s at 0x%x>" % (type(self).__name__, self.name,
+                                    id(self))
+
     def c_name(self):
         return c_name(self.name)
 
@@ -68,7 +101,8 @@ class QAPISchemaEntity:
 
     def _set_module(self, schema, info):
         assert self._checked
-        self._module = schema.module_by_fname(info and info.fname)
+        fname = info.fname if info else QAPISchemaModule.BUILTIN_MODULE_NAME
+        self._module = schema.module_by_fname(fname)
         self._module.add_entity(self)
 
     def set_module(self, schema):
@@ -137,10 +171,40 @@ class QAPISchemaVisitor:
 
 
 class QAPISchemaModule:
+
+    BUILTIN_MODULE_NAME = './builtin'
+
     def __init__(self, name):
         self.name = name
         self._entity_list = []
 
+    @staticmethod
+    def is_system_module(name: str) -> bool:
+        """
+        System modules are internally defined modules.
+
+        Their names start with the "./" prefix.
+        """
+        return name.startswith('./')
+
+    @classmethod
+    def is_user_module(cls, name: str) -> bool:
+        """
+        User modules are those defined by the user in qapi JSON files.
+
+        They do not start with the "./" prefix.
+        """
+        return not cls.is_system_module(name)
+
+    @classmethod
+    def is_builtin_module(cls, name: str) -> bool:
+        """
+        The built-in module is a single System module for the built-in types.
+
+        It is always "./builtin".
+        """
+        return name == cls.BUILTIN_MODULE_NAME
+
     def add_entity(self, ent):
         self._entity_list.append(ent)
 
@@ -185,6 +249,7 @@ class QAPISchemaType(QAPISchemaEntity):
             'number':  'QTYPE_QNUM',
             'int':     'QTYPE_QNUM',
             'boolean': 'QTYPE_QBOOL',
+            'array':   'QTYPE_QLIST',
             'object':  'QTYPE_QDICT'
         }
         return json2qtype.get(self.json_type())
@@ -194,11 +259,18 @@ class QAPISchemaType(QAPISchemaEntity):
             return None
         return self.name
 
+    def need_has_if_optional(self):
+        # When FOO is a pointer, has_FOO == !!FOO, i.e. has_FOO is redundant.
+        # Except for arrays; see QAPISchemaArrayType.need_has_if_optional().
+        return not self.c_type().endswith(POINTER_SUFFIX)
+
     def check(self, schema):
-        QAPISchemaEntity.check(self, schema)
-        if 'deprecated' in [f.name for f in self.features]:
-            raise QAPISemError(
-                self.info, "feature 'deprecated' is not supported for types")
+        super().check(schema)
+        for feat in self.features:
+            if feat.is_special():
+                raise QAPISemError(
+                    self.info,
+                    f"feature '{feat.name}' is not supported for types")
 
     def describe(self):
         assert self.meta
@@ -263,8 +335,8 @@ class QAPISchemaEnumType(QAPISchemaType):
             m.connect_doc(doc)
 
     def is_implicit(self):
-        # See QAPISchema._make_implicit_enum_type() and ._def_predefineds()
-        return self.name.endswith('Kind') or self.name == 'QType'
+        # See QAPISchema._def_predefineds()
+        return self.name == 'QType'
 
     def c_type(self):
         return c_name(self.name)
@@ -291,6 +363,11 @@ class QAPISchemaArrayType(QAPISchemaType):
         self._element_type_name = element_type
         self.element_type = None
 
+    def need_has_if_optional(self):
+        # When FOO is an array, we still need has_FOO to distinguish
+        # absent (!has_FOO) from present and empty (has_FOO && !FOO).
+        return True
+
     def check(self, schema):
         super().check(schema)
         self.element_type = schema.resolve_type(
@@ -335,8 +412,7 @@ class QAPISchemaObjectType(QAPISchemaType):
     def __init__(self, name, info, doc, ifcond, features,
                  base, local_members, variants):
         # struct has local_members, optional base, and no variants
-        # flat union has base, variants, and no local_members
-        # simple union has local_members, variants, and no base
+        # union has base, variants, and no local_members
         super().__init__(name, info, doc, ifcond, features)
         self.meta = 'union' if variants else 'struct'
         assert base is None or isinstance(base, str)
@@ -395,9 +471,10 @@ class QAPISchemaObjectType(QAPISchemaType):
     # on behalf of info, which is not necessarily self.info
     def check_clash(self, info, seen):
         assert self._checked
-        assert not self.variants       # not implemented
         for m in self.members:
             m.check_clash(info, seen)
+        if self.variants:
+            self.variants.check_clash(info, seen)
 
     def connect_doc(self, doc=None):
         super().connect_doc(doc)
@@ -407,15 +484,6 @@ class QAPISchemaObjectType(QAPISchemaType):
         for m in self.local_members:
             m.connect_doc(doc)
 
-    @property
-    def ifcond(self):
-        assert self._checked
-        if isinstance(self._ifcond, QAPISchemaType):
-            # Simple union wrapper type inherits from wrapped type;
-            # see _make_implicit_object_type()
-            return self._ifcond.ifcond
-        return self._ifcond
-
     def is_implicit(self):
         # See QAPISchema._make_implicit_object_type(), as well as
         # _def_predefineds()
@@ -425,6 +493,10 @@ class QAPISchemaObjectType(QAPISchemaType):
         assert self.members is not None
         return not self.members and not self.variants
 
+    def has_conditional_members(self):
+        assert self.members is not None
+        return any(m.ifcond.is_present() for m in self.members)
+
     def c_name(self):
         assert self.name != 'q_empty'
         return super().c_name()
@@ -518,10 +590,9 @@ class QAPISchemaAlternateType(QAPISchemaType):
 
 class QAPISchemaVariants:
     def __init__(self, tag_name, info, tag_member, variants):
-        # Flat unions pass tag_name but not tag_member.
-        # Simple unions and alternates pass tag_member but not tag_name.
-        # After check(), tag_member is always set, and tag_name remains
-        # a reliable witness of being used by a flat union.
+        # Unions pass tag_name but not tag_member.
+        # Alternates pass tag_member but not tag_name.
+        # After check(), tag_member is always set.
         assert bool(tag_member) != bool(tag_name)
         assert (isinstance(tag_name, str) or
                 isinstance(tag_member, QAPISchemaObjectTypeMember))
@@ -537,7 +608,7 @@ class QAPISchemaVariants:
             v.set_defined_in(name)
 
     def check(self, schema, seen):
-        if not self.tag_member:  # flat union
+        if self._tag_name:      # union
             self.tag_member = seen.get(c_name(self._tag_name))
             base = "'base'"
             # Pointing to the base type when not implicit would be
@@ -562,16 +633,16 @@ class QAPISchemaVariants:
                     self.info,
                     "discriminator member '%s' of %s must not be optional"
                     % (self._tag_name, base))
-            if self.tag_member.ifcond:
+            if self.tag_member.ifcond.is_present():
                 raise QAPISemError(
                     self.info,
                     "discriminator member '%s' of %s must not be conditional"
                     % (self._tag_name, base))
-        else:                   # simple union
+        else:                   # alternate
             assert isinstance(self.tag_member.type, QAPISchemaEnumType)
             assert not self.tag_member.optional
-            assert self.tag_member.ifcond == []
-        if self._tag_name:    # flat union
+            assert not self.tag_member.ifcond.is_present()
+        if self._tag_name:      # union
             # branches that are not explicitly covered get an empty type
             cases = {v.name for v in self.variants}
             for m in self.tag_member.type.members:
@@ -592,8 +663,7 @@ class QAPISchemaVariants:
                         self.info,
                         "branch '%s' is not a value of %s"
                         % (v.name, self.tag_member.type.describe()))
-                if (not isinstance(v.type, QAPISchemaObjectType)
-                        or v.type.variants):
+                if not isinstance(v.type, QAPISchemaObjectType):
                     raise QAPISemError(
                         self.info,
                         "%s cannot use %s"
@@ -615,7 +685,7 @@ class QAPISchemaMember:
         assert isinstance(name, str)
         self.name = name
         self.info = info
-        self.ifcond = ifcond or []
+        self.ifcond = ifcond or QAPISchemaIfCond()
         self.defined_in = None
 
     def set_defined_in(self, name):
@@ -637,6 +707,7 @@ class QAPISchemaMember:
 
     def describe(self, info):
         role = self.role
+        meta = 'type'
         defined_in = self.defined_in
         assert defined_in
 
@@ -648,31 +719,43 @@ class QAPISchemaMember:
                 # Implicit type created for a command's dict 'data'
                 assert role == 'member'
                 role = 'parameter'
+                meta = 'command'
+                defined_in = defined_in[:-4]
             elif defined_in.endswith('-base'):
-                # Implicit type created for a flat union's dict 'base'
+                # Implicit type created for a union's dict 'base'
                 role = 'base ' + role
+                defined_in = defined_in[:-5]
             else:
-                # Implicit type created for a simple union's branch
-                assert defined_in.endswith('-wrapper')
-                # Unreachable and not implemented
                 assert False
-        elif defined_in.endswith('Kind'):
-            # See QAPISchema._make_implicit_enum_type()
-            # Implicit enum created for simple union's branches
-            assert role == 'value'
-            role = 'branch'
-        elif defined_in != info.defn_name:
-            return "%s '%s' of type '%s'" % (role, self.name, defined_in)
+
+        if defined_in != info.defn_name:
+            return "%s '%s' of %s '%s'" % (role, self.name, meta, defined_in)
         return "%s '%s'" % (role, self.name)
 
 
 class QAPISchemaEnumMember(QAPISchemaMember):
     role = 'value'
 
+    def __init__(self, name, info, ifcond=None, features=None):
+        super().__init__(name, info, ifcond)
+        for f in features or []:
+            assert isinstance(f, QAPISchemaFeature)
+            f.set_defined_in(name)
+        self.features = features or []
+
+    def connect_doc(self, doc):
+        super().connect_doc(doc)
+        if doc:
+            for f in self.features:
+                doc.connect_feature(f)
+
 
 class QAPISchemaFeature(QAPISchemaMember):
     role = 'feature'
 
+    def is_special(self):
+        return self.name in ('deprecated', 'unstable')
+
 
 class QAPISchemaObjectTypeMember(QAPISchemaMember):
     def __init__(self, name, info, typ, optional, ifcond=None, features=None):
@@ -687,6 +770,10 @@ class QAPISchemaObjectTypeMember(QAPISchemaMember):
         self.optional = optional
         self.features = features or []
 
+    def need_has(self):
+        assert self.type
+        return self.optional and self.type.need_has_if_optional()
+
     def check(self, schema):
         assert self.defined_in
         self.type = schema.resolve_type(self._type_name, self.info,
@@ -745,10 +832,15 @@ class QAPISchemaCommand(QAPISchemaEntity):
                     self.info,
                     "command's 'data' can take %s only with 'boxed': true"
                     % self.arg_type.describe())
+            self.arg_type.check(schema)
+            if self.arg_type.has_conditional_members() and not self.boxed:
+                raise QAPISemError(
+                    self.info,
+                    "conditional command arguments require 'boxed': true")
         if self._ret_type_name:
             self.ret_type = schema.resolve_type(
                 self._ret_type_name, self.info, "command's 'returns'")
-            if self.name not in self.info.pragma.returns_whitelist:
+            if self.name not in self.info.pragma.command_returns_exceptions:
                 typ = self.ret_type
                 if isinstance(typ, QAPISchemaArrayType):
                     typ = self.ret_type.element_type
@@ -800,6 +892,11 @@ class QAPISchemaEvent(QAPISchemaEntity):
                     self.info,
                     "event's 'data' can take %s only with 'boxed': true"
                     % self.arg_type.describe())
+            self.arg_type.check(schema)
+            if self.arg_type.has_conditional_members() and not self.boxed:
+                raise QAPISemError(
+                    self.info,
+                    "conditional event arguments require 'boxed': true")
 
     def connect_doc(self, doc=None):
         super().connect_doc(doc)
@@ -818,14 +915,21 @@ class QAPISchemaEvent(QAPISchemaEntity):
 class QAPISchema:
     def __init__(self, fname):
         self.fname = fname
-        parser = QAPISchemaParser(fname)
+
+        try:
+            parser = QAPISchemaParser(fname)
+        except OSError as err:
+            raise QAPIError(
+                f"can't read schema file '{fname}': {err.strerror}"
+            ) from err
+
         exprs = check_exprs(parser.exprs)
         self.docs = parser.docs
         self._entity_list = []
         self._entity_dict = {}
         self._module_dict = OrderedDict()
         self._schema_dir = os.path.dirname(fname)
-        self._make_module(None)  # built-ins
+        self._make_module(QAPISchemaModule.BUILTIN_MODULE_NAME)
         self._make_module(fname)
         self._predefining = True
         self._def_predefineds()
@@ -844,7 +948,7 @@ class QAPISchema:
         other_ent = self._entity_dict.get(ent.name)
         if other_ent:
             if other_ent.info:
-                where = QAPIError(other_ent.info, None, "previous definition")
+                where = QAPISourceError(other_ent.info, "previous definition")
                 raise QAPISemError(
                     ent.info,
                     "'%s' is already defined\n%s" % (ent.name, where))
@@ -870,9 +974,9 @@ class QAPISchema:
                 info, "%s uses unknown type '%s'" % (what, name))
         return typ
 
-    def _module_name(self, fname):
-        if fname is None:
-            return None
+    def _module_name(self, fname: str) -> str:
+        if QAPISchemaModule.is_system_module(fname):
+            return fname
         return os.path.relpath(fname, self._schema_dir)
 
     def _make_module(self, fname):
@@ -883,13 +987,13 @@ class QAPISchema:
 
     def module_by_fname(self, fname):
         name = self._module_name(fname)
-        assert name in self._module_dict
         return self._module_dict[name]
 
-    def _def_include(self, expr, info, doc):
+    def _def_include(self, expr: QAPIExpression):
         include = expr['include']
-        assert doc is None
-        self._def_entity(QAPISchemaInclude(self._make_module(include), info))
+        assert expr.doc is None
+        self._def_entity(
+            QAPISchemaInclude(self._make_module(include), expr.info))
 
     def _def_builtin_type(self, name, json_type, c_type):
         self._def_entity(QAPISchemaBuiltinType(name, json_type, c_type))
@@ -931,22 +1035,20 @@ class QAPISchema:
     def _make_features(self, features, info):
         if features is None:
             return []
-        return [QAPISchemaFeature(f['name'], info, f.get('if'))
+        return [QAPISchemaFeature(f['name'], info,
+                                  QAPISchemaIfCond(f.get('if')))
                 for f in features]
 
+    def _make_enum_member(self, name, ifcond, features, info):
+        return QAPISchemaEnumMember(name, info,
+                                    QAPISchemaIfCond(ifcond),
+                                    self._make_features(features, info))
+
     def _make_enum_members(self, values, info):
-        return [QAPISchemaEnumMember(v['name'], info, v.get('if'))
+        return [self._make_enum_member(v['name'], v.get('if'),
+                                       v.get('features'), info)
                 for v in values]
 
-    def _make_implicit_enum_type(self, name, info, ifcond, values):
-        # See also QAPISchemaObjectTypeMember.describe()
-        name = name + 'Kind'    # reserved by check_defn_name_str()
-        self._def_entity(QAPISchemaEnumType(
-            name, info, None, ifcond, None,
-            self._make_enum_members(values, info),
-            None))
-        return name
-
     def _make_array_type(self, element_type, info):
         name = element_type + 'List'    # reserved by check_defn_name_str()
         if not self.lookup_type(name):
@@ -961,30 +1063,23 @@ class QAPISchema:
         typ = self.lookup_entity(name, QAPISchemaObjectType)
         if typ:
             # The implicit object type has multiple users.  This can
-            # happen only for simple unions' implicit wrapper types.
-            # Its ifcond should be the disjunction of its user's
-            # ifconds.  Not implemented.  Instead, we always pass the
-            # wrapped type's ifcond, which is trivially the same for all
-            # users.  It's also necessary for the wrapper to compile.
-            # But it's not tight: the disjunction need not imply it.  We
-            # may end up compiling useless wrapper types.
-            # TODO kill simple unions or implement the disjunction
-
-            # pylint: disable=protected-access
-            assert (ifcond or []) == typ._ifcond
+            # only be a duplicate definition, which will be flagged
+            # later.
+            pass
         else:
             self._def_entity(QAPISchemaObjectType(
                 name, info, None, ifcond, None, None, members, None))
         return name
 
-    def _def_enum_type(self, expr, info, doc):
+    def _def_enum_type(self, expr: QAPIExpression):
         name = expr['enum']
         data = expr['data']
         prefix = expr.get('prefix')
-        ifcond = expr.get('if')
+        ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
         self._def_entity(QAPISchemaEnumType(
-            name, info, doc, ifcond, features,
+            name, info, expr.doc, ifcond, features,
             self._make_enum_members(data, info), prefix))
 
     def _make_member(self, name, typ, ifcond, features, info):
@@ -999,79 +1094,73 @@ class QAPISchema:
                                           self._make_features(features, info))
 
     def _make_members(self, data, info):
-        return [self._make_member(key, value['type'], value.get('if'),
+        return [self._make_member(key, value['type'],
+                                  QAPISchemaIfCond(value.get('if')),
                                   value.get('features'), info)
                 for (key, value) in data.items()]
 
-    def _def_struct_type(self, expr, info, doc):
+    def _def_struct_type(self, expr: QAPIExpression):
         name = expr['struct']
         base = expr.get('base')
         data = expr['data']
-        ifcond = expr.get('if')
+        info = expr.info
+        ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
         self._def_entity(QAPISchemaObjectType(
-            name, info, doc, ifcond, features, base,
+            name, info, expr.doc, ifcond, features, base,
             self._make_members(data, info),
             None))
 
     def _make_variant(self, case, typ, ifcond, info):
-        return QAPISchemaVariant(case, info, typ, ifcond)
-
-    def _make_simple_variant(self, case, typ, ifcond, info):
         if isinstance(typ, list):
             assert len(typ) == 1
             typ = self._make_array_type(typ[0], info)
-        typ = self._make_implicit_object_type(
-            typ, info, self.lookup_type(typ),
-            'wrapper', [self._make_member('data', typ, None, None, info)])
         return QAPISchemaVariant(case, info, typ, ifcond)
 
-    def _def_union_type(self, expr, info, doc):
+    def _def_union_type(self, expr: QAPIExpression):
         name = expr['union']
+        base = expr['base']
+        tag_name = expr['discriminator']
         data = expr['data']
-        base = expr.get('base')
-        ifcond = expr.get('if')
+        assert isinstance(data, dict)
+        info = expr.info
+        ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
-        tag_name = expr.get('discriminator')
-        tag_member = None
         if isinstance(base, dict):
             base = self._make_implicit_object_type(
                 name, info, ifcond,
                 'base', self._make_members(base, info))
-        if tag_name:
-            variants = [self._make_variant(key, value['type'],
-                                           value.get('if'), info)
-                        for (key, value) in data.items()]
-            members = []
-        else:
-            variants = [self._make_simple_variant(key, value['type'],
-                                                  value.get('if'), info)
-                        for (key, value) in data.items()]
-            enum = [{'name': v.name, 'if': v.ifcond} for v in variants]
-            typ = self._make_implicit_enum_type(name, info, ifcond, enum)
-            tag_member = QAPISchemaObjectTypeMember('type', info, typ, False)
-            members = [tag_member]
+        variants = [
+            self._make_variant(key, value['type'],
+                               QAPISchemaIfCond(value.get('if')),
+                               info)
+            for (key, value) in data.items()]
+        members: List[QAPISchemaObjectTypeMember] = []
         self._def_entity(
-            QAPISchemaObjectType(name, info, doc, ifcond, features,
+            QAPISchemaObjectType(name, info, expr.doc, ifcond, features,
                                  base, members,
                                  QAPISchemaVariants(
-                                     tag_name, info, tag_member, variants)))
+                                     tag_name, info, None, variants)))
 
-    def _def_alternate_type(self, expr, info, doc):
+    def _def_alternate_type(self, expr: QAPIExpression):
         name = expr['alternate']
         data = expr['data']
-        ifcond = expr.get('if')
+        assert isinstance(data, dict)
+        ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
-        variants = [self._make_variant(key, value['type'], value.get('if'),
-                                       info)
-                    for (key, value) in data.items()]
+        variants = [
+            self._make_variant(key, value['type'],
+                               QAPISchemaIfCond(value.get('if')),
+                               info)
+            for (key, value) in data.items()]
         tag_member = QAPISchemaObjectTypeMember('type', info, 'QType', False)
         self._def_entity(
-            QAPISchemaAlternateType(name, info, doc, ifcond, features,
-                                    QAPISchemaVariants(
-                                        None, info, tag_member, variants)))
+            QAPISchemaAlternateType(
+                name, info, expr.doc, ifcond, features,
+                QAPISchemaVariants(None, info, tag_member, variants)))
 
-    def _def_command(self, expr, info, doc):
+    def _def_command(self, expr: QAPIExpression):
         name = expr['command']
         data = expr.get('data')
         rets = expr.get('returns')
@@ -1081,7 +1170,8 @@ class QAPISchema:
         allow_oob = expr.get('allow-oob', False)
         allow_preconfig = expr.get('allow-preconfig', False)
         coroutine = expr.get('coroutine', False)
-        ifcond = expr.get('if')
+        ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
         if isinstance(data, OrderedDict):
             data = self._make_implicit_object_type(
@@ -1090,44 +1180,42 @@ class QAPISchema:
         if isinstance(rets, list):
             assert len(rets) == 1
             rets = self._make_array_type(rets[0], info)
-        self._def_entity(QAPISchemaCommand(name, info, doc, ifcond, features,
-                                           data, rets,
+        self._def_entity(QAPISchemaCommand(name, info, expr.doc, ifcond,
+                                           features, data, rets,
                                            gen, success_response,
                                            boxed, allow_oob, allow_preconfig,
                                            coroutine))
 
-    def _def_event(self, expr, info, doc):
+    def _def_event(self, expr: QAPIExpression):
         name = expr['event']
         data = expr.get('data')
         boxed = expr.get('boxed', False)
-        ifcond = expr.get('if')
+        ifcond = QAPISchemaIfCond(expr.get('if'))
+        info = expr.info
         features = self._make_features(expr.get('features'), info)
         if isinstance(data, OrderedDict):
             data = self._make_implicit_object_type(
                 name, info, ifcond,
                 'arg', self._make_members(data, info))
-        self._def_entity(QAPISchemaEvent(name, info, doc, ifcond, features,
-                                         data, boxed))
+        self._def_entity(QAPISchemaEvent(name, info, expr.doc, ifcond,
+                                         features, data, boxed))
 
     def _def_exprs(self, exprs):
-        for expr_elem in exprs:
-            expr = expr_elem['expr']
-            info = expr_elem['info']
-            doc = expr_elem.get('doc')
+        for expr in exprs:
             if 'enum' in expr:
-                self._def_enum_type(expr, info, doc)
+                self._def_enum_type(expr)
             elif 'struct' in expr:
-                self._def_struct_type(expr, info, doc)
+                self._def_struct_type(expr)
             elif 'union' in expr:
-                self._def_union_type(expr, info, doc)
+                self._def_union_type(expr)
             elif 'alternate' in expr:
-                self._def_alternate_type(expr, info, doc)
+                self._def_alternate_type(expr)
             elif 'command' in expr:
-                self._def_command(expr, info, doc)
+                self._def_command(expr)
             elif 'event' in expr:
-                self._def_event(expr, info, doc)
+                self._def_event(expr)
             elif 'include' in expr:
-                self._def_include(expr, info, doc)
+                self._def_include(expr)
             else:
                 assert False
 
index d7a79a9b8aee77cd762fdc1557d4e425a8c6e3d3..04193cc964371aaba56cde9bb289b0b538c63e6a 100644 (file)
@@ -10,7 +10,6 @@
 # See the COPYING file in the top-level directory.
 
 import copy
-import sys
 from typing import List, Optional, TypeVar
 
 
@@ -21,19 +20,20 @@ class QAPISchemaPragma:
     def __init__(self) -> None:
         # Are documentation comments required?
         self.doc_required = False
-        # Whitelist of commands allowed to return a non-dictionary
-        self.returns_whitelist: List[str] = []
-        # Whitelist of entities allowed to violate case conventions
-        self.name_case_whitelist: List[str] = []
+        # Commands whose names may use '_'
+        self.command_name_exceptions: List[str] = []
+        # Commands allowed to return a non-dictionary
+        self.command_returns_exceptions: List[str] = []
+        # Types whose member names may violate case conventions
+        self.member_name_exceptions: List[str] = []
 
 
 class QAPISourceInfo:
     T = TypeVar('T', bound='QAPISourceInfo')
 
-    def __init__(self, fname: str, line: int,
-                 parent: Optional['QAPISourceInfo']):
+    def __init__(self, fname: str, parent: Optional['QAPISourceInfo']):
         self.fname = fname
-        self.line = line
+        self.line = 1
         self.parent = parent
         self.pragma: QAPISchemaPragma = (
             parent.pragma if parent else QAPISchemaPragma()
@@ -51,12 +51,7 @@ class QAPISourceInfo:
         return info
 
     def loc(self) -> str:
-        if self.fname is None:
-            return sys.argv[0]
-        ret = self.fname
-        if self.line is not None:
-            ret += ':%d' % self.line
-        return ret
+        return f"{self.fname}:{self.line}"
 
     def in_defn(self) -> str:
         if self.defn_name:
index 2b4916cdaa1bdeee46c5387fec20472127f47c3b..c39d054d2cda80a7f5a17c19e97d6bee2b65aff4 100644 (file)
@@ -15,18 +15,17 @@ This work is licensed under the terms of the GNU GPL, version 2.
 
 from typing import List, Optional
 
-from .common import (
-    c_enum_const,
-    c_name,
-    gen_endif,
-    gen_if,
-    mcgen,
+from .common import c_enum_const, c_name, mcgen
+from .gen import (
+    QAPISchemaModularCVisitor,
+    gen_special_features,
+    ifcontext,
 )
-from .gen import QAPISchemaModularCVisitor, ifcontext
 from .schema import (
     QAPISchema,
     QAPISchemaEnumMember,
     QAPISchemaFeature,
+    QAPISchemaIfCond,
     QAPISchemaObjectType,
     QAPISchemaObjectTypeMember,
     QAPISchemaType,
@@ -43,6 +42,8 @@ objects_seen = set()
 def gen_enum_lookup(name: str,
                     members: List[QAPISchemaEnumMember],
                     prefix: Optional[str] = None) -> str:
+    max_index = c_enum_const(name, '_MAX', prefix)
+    feats = ''
     ret = mcgen('''
 
 const QEnumLookup %(c_name)s_lookup = {
@@ -50,20 +51,35 @@ const QEnumLookup %(c_name)s_lookup = {
 ''',
                 c_name=c_name(name))
     for memb in members:
-        ret += gen_if(memb.ifcond)
+        ret += memb.ifcond.gen_if()
         index = c_enum_const(name, memb.name, prefix)
         ret += mcgen('''
         [%(index)s] = "%(name)s",
 ''',
                      index=index, name=memb.name)
-        ret += gen_endif(memb.ifcond)
+        ret += memb.ifcond.gen_endif()
+
+        special_features = gen_special_features(memb.features)
+        if special_features != '0':
+            feats += mcgen('''
+        [%(index)s] = %(special_features)s,
+''',
+                           index=index, special_features=special_features)
+
+    if feats:
+        ret += mcgen('''
+    },
+    .special_features = (const unsigned char[%(max_index)s]) {
+''',
+                     max_index=max_index)
+        ret += feats
 
     ret += mcgen('''
     },
     .size = %(max_index)s
 };
 ''',
-                 max_index=c_enum_const(name, '_MAX', prefix))
+                 max_index=max_index)
     return ret
 
 
@@ -80,12 +96,12 @@ typedef enum %(c_name)s {
                 c_name=c_name(name))
 
     for memb in enum_members:
-        ret += gen_if(memb.ifcond)
+        ret += memb.ifcond.gen_if()
         ret += mcgen('''
     %(c_enum)s,
 ''',
                      c_enum=c_enum_const(name, memb.name, prefix))
-        ret += gen_endif(memb.ifcond)
+        ret += memb.ifcond.gen_endif()
 
     ret += mcgen('''
 } %(c_name)s;
@@ -125,8 +141,8 @@ struct %(c_name)s {
 def gen_struct_members(members: List[QAPISchemaObjectTypeMember]) -> str:
     ret = ''
     for memb in members:
-        ret += gen_if(memb.ifcond)
-        if memb.optional:
+        ret += memb.ifcond.gen_if()
+        if memb.need_has():
             ret += mcgen('''
     bool has_%(c_name)s;
 ''',
@@ -135,11 +151,11 @@ def gen_struct_members(members: List[QAPISchemaObjectTypeMember]) -> str:
     %(c_type)s %(c_name)s;
 ''',
                      c_type=memb.type.c_type(), c_name=c_name(memb.name))
-        ret += gen_endif(memb.ifcond)
+        ret += memb.ifcond.gen_endif()
     return ret
 
 
-def gen_object(name: str, ifcond: List[str],
+def gen_object(name: str, ifcond: QAPISchemaIfCond,
                base: Optional[QAPISchemaObjectType],
                members: List[QAPISchemaObjectTypeMember],
                variants: Optional[QAPISchemaVariants]) -> str:
@@ -158,7 +174,7 @@ def gen_object(name: str, ifcond: List[str],
     ret += mcgen('''
 
 ''')
-    ret += gen_if(ifcond)
+    ret += ifcond.gen_if()
     ret += mcgen('''
 struct %(c_name)s {
 ''',
@@ -192,7 +208,7 @@ struct %(c_name)s {
     ret += mcgen('''
 };
 ''')
-    ret += gen_endif(ifcond)
+    ret += ifcond.gen_endif()
 
     return ret
 
@@ -219,13 +235,13 @@ def gen_variants(variants: QAPISchemaVariants) -> str:
     for var in variants.variants:
         if var.type.name == 'q_empty':
             continue
-        ret += gen_if(var.ifcond)
+        ret += var.ifcond.gen_if()
         ret += mcgen('''
         %(c_type)s %(c_name)s;
 ''',
                      c_type=var.type.c_unboxed_type(),
                      c_name=c_name(var.name))
-        ret += gen_endif(var.ifcond)
+        ret += var.ifcond.gen_endif()
 
     ret += mcgen('''
     } u;
@@ -271,7 +287,7 @@ class QAPISchemaGenTypeVisitor(QAPISchemaModularCVisitor):
             prefix, 'qapi-types', ' * Schema-defined QAPI types',
             ' * Built-in QAPI types', __doc__)
 
-    def _begin_system_module(self, name: None) -> None:
+    def _begin_builtin_module(self) -> None:
         self._genc.preamble_add(mcgen('''
 #include "qemu/osdep.h"
 #include "qapi/dealloc-visitor.h"
@@ -307,7 +323,7 @@ class QAPISchemaGenTypeVisitor(QAPISchemaModularCVisitor):
     def visit_enum_type(self,
                         name: str,
                         info: Optional[QAPISourceInfo],
-                        ifcond: List[str],
+                        ifcond: QAPISchemaIfCond,
                         features: List[QAPISchemaFeature],
                         members: List[QAPISchemaEnumMember],
                         prefix: Optional[str]) -> None:
@@ -318,7 +334,7 @@ class QAPISchemaGenTypeVisitor(QAPISchemaModularCVisitor):
     def visit_array_type(self,
                          name: str,
                          info: Optional[QAPISourceInfo],
-                         ifcond: List[str],
+                         ifcond: QAPISchemaIfCond,
                          element_type: QAPISchemaType) -> None:
         with ifcontext(ifcond, self._genh, self._genc):
             self._genh.preamble_add(gen_fwd_object_or_array(name))
@@ -328,7 +344,7 @@ class QAPISchemaGenTypeVisitor(QAPISchemaModularCVisitor):
     def visit_object_type(self,
                           name: str,
                           info: Optional[QAPISourceInfo],
-                          ifcond: List[str],
+                          ifcond: QAPISchemaIfCond,
                           features: List[QAPISchemaFeature],
                           base: Optional[QAPISchemaObjectType],
                           members: List[QAPISchemaObjectTypeMember],
@@ -350,8 +366,8 @@ class QAPISchemaGenTypeVisitor(QAPISchemaModularCVisitor):
 
     def visit_alternate_type(self,
                              name: str,
-                             info: QAPISourceInfo,
-                             ifcond: List[str],
+                             info: Optional[QAPISourceInfo],
+                             ifcond: QAPISchemaIfCond,
                              features: List[QAPISchemaFeature],
                              variants: QAPISchemaVariants) -> None:
         with ifcontext(ifcond, self._genh):
index 339f1521524c89ed29008c435fa1a60c77660a1f..c56ea4d7242e9175f480591c555cfa80bec35997 100644 (file)
@@ -18,17 +18,20 @@ from typing import List, Optional
 from .common import (
     c_enum_const,
     c_name,
-    gen_endif,
-    gen_if,
     indent,
     mcgen,
 )
-from .gen import QAPISchemaModularCVisitor, ifcontext
+from .gen import (
+    QAPISchemaModularCVisitor,
+    gen_special_features,
+    ifcontext,
+)
 from .schema import (
     QAPISchema,
     QAPISchemaEnumMember,
     QAPISchemaEnumType,
     QAPISchemaFeature,
+    QAPISchemaIfCond,
     QAPISchemaObjectType,
     QAPISchemaObjectTypeMember,
     QAPISchemaType,
@@ -68,6 +71,18 @@ bool visit_type_%(c_name)s_members(Visitor *v, %(c_name)s *obj, Error **errp)
 ''',
                 c_name=c_name(name))
 
+    sep = ''
+    for memb in members:
+        if memb.optional and not memb.need_has():
+            ret += memb.ifcond.gen_if()
+            ret += mcgen('''
+    bool has_%(c_name)s = !!obj->%(c_name)s;
+''',
+                         c_name=c_name(memb.name))
+            sep = '\n'
+            ret += memb.ifcond.gen_endif()
+    ret += sep
+
     if base:
         ret += mcgen('''
     if (!visit_type_%(c_type)s_members(v, (%(c_type)s *)obj, errp)) {
@@ -77,12 +92,25 @@ bool visit_type_%(c_name)s_members(Visitor *v, %(c_name)s *obj, Error **errp)
                      c_type=base.c_name())
 
     for memb in members:
-        ret += gen_if(memb.ifcond)
+        ret += memb.ifcond.gen_if()
         if memb.optional:
+            has = 'has_' + c_name(memb.name)
+            if memb.need_has():
+                has = 'obj->' + has
+            ret += mcgen('''
+    if (visit_optional(v, "%(name)s", &%(has)s)) {
+''',
+                         name=memb.name, has=has)
+            indent.increase()
+        special_features = gen_special_features(memb.features)
+        if special_features != '0':
             ret += mcgen('''
-    if (visit_optional(v, "%(name)s", &obj->has_%(c_name)s)) {
+    if (visit_policy_reject(v, "%(name)s", %(special_features)s, errp)) {
+        return false;
+    }
+    if (!visit_policy_skip(v, "%(name)s", %(special_features)s)) {
 ''',
-                         name=memb.name, c_name=c_name(memb.name))
+                         name=memb.name, special_features=special_features)
             indent.increase()
         ret += mcgen('''
     if (!visit_type_%(c_type)s(v, "%(name)s", &obj->%(c_name)s, errp)) {
@@ -91,12 +119,17 @@ bool visit_type_%(c_name)s_members(Visitor *v, %(c_name)s *obj, Error **errp)
 ''',
                      c_type=memb.type.c_name(), name=memb.name,
                      c_name=c_name(memb.name))
+        if special_features != '0':
+            indent.decrease()
+            ret += mcgen('''
+    }
+''')
         if memb.optional:
             indent.decrease()
             ret += mcgen('''
     }
 ''')
-        ret += gen_endif(memb.ifcond)
+        ret += memb.ifcond.gen_endif()
 
     if variants:
         tag_member = variants.tag_member
@@ -110,7 +143,7 @@ bool visit_type_%(c_name)s_members(Visitor *v, %(c_name)s *obj, Error **errp)
         for var in variants.variants:
             case_str = c_enum_const(tag_member.type.name, var.name,
                                     tag_member.type.prefix)
-            ret += gen_if(var.ifcond)
+            ret += var.ifcond.gen_if()
             if var.type.name == 'q_empty':
                 # valid variant and nothing to do
                 ret += mcgen('''
@@ -126,7 +159,7 @@ bool visit_type_%(c_name)s_members(Visitor *v, %(c_name)s *obj, Error **errp)
                              case=case_str,
                              c_type=var.type.c_name(), c_name=c_name(var.name))
 
-            ret += gen_endif(var.ifcond)
+            ret += var.ifcond.gen_endif()
         ret += mcgen('''
     default:
         abort();
@@ -212,7 +245,7 @@ bool visit_type_%(c_name)s(Visitor *v, const char *name,
                 c_name=c_name(name))
 
     for var in variants.variants:
-        ret += gen_if(var.ifcond)
+        ret += var.ifcond.gen_if()
         ret += mcgen('''
     case %(case)s:
 ''',
@@ -238,7 +271,7 @@ bool visit_type_%(c_name)s(Visitor *v, const char *name,
         ret += mcgen('''
         break;
 ''')
-        ret += gen_endif(var.ifcond)
+        ret += var.ifcond.gen_endif()
 
     ret += mcgen('''
     case QTYPE_NONE:
@@ -305,7 +338,7 @@ class QAPISchemaGenVisitVisitor(QAPISchemaModularCVisitor):
             prefix, 'qapi-visit', ' * Schema-defined QAPI visitors',
             ' * Built-in QAPI visitors', __doc__)
 
-    def _begin_system_module(self, name: None) -> None:
+    def _begin_builtin_module(self) -> None:
         self._genc.preamble_add(mcgen('''
 #include "qemu/osdep.h"
 #include "qapi/error.h"
@@ -336,8 +369,8 @@ class QAPISchemaGenVisitVisitor(QAPISchemaModularCVisitor):
 
     def visit_enum_type(self,
                         name: str,
-                        info: QAPISourceInfo,
-                        ifcond: List[str],
+                        info: Optional[QAPISourceInfo],
+                        ifcond: QAPISchemaIfCond,
                         features: List[QAPISchemaFeature],
                         members: List[QAPISchemaEnumMember],
                         prefix: Optional[str]) -> None:
@@ -348,7 +381,7 @@ class QAPISchemaGenVisitVisitor(QAPISchemaModularCVisitor):
     def visit_array_type(self,
                          name: str,
                          info: Optional[QAPISourceInfo],
-                         ifcond: List[str],
+                         ifcond: QAPISchemaIfCond,
                          element_type: QAPISchemaType) -> None:
         with ifcontext(ifcond, self._genh, self._genc):
             self._genh.add(gen_visit_decl(name))
@@ -357,7 +390,7 @@ class QAPISchemaGenVisitVisitor(QAPISchemaModularCVisitor):
     def visit_object_type(self,
                           name: str,
                           info: Optional[QAPISourceInfo],
-                          ifcond: List[str],
+                          ifcond: QAPISchemaIfCond,
                           features: List[QAPISchemaFeature],
                           base: Optional[QAPISchemaObjectType],
                           members: List[QAPISchemaObjectTypeMember],
@@ -378,8 +411,8 @@ class QAPISchemaGenVisitVisitor(QAPISchemaModularCVisitor):
 
     def visit_alternate_type(self,
                              name: str,
-                             info: QAPISourceInfo,
-                             ifcond: List[str],
+                             info: Optional[QAPISourceInfo],
+                             ifcond: QAPISchemaIfCond,
                              features: List[QAPISchemaFeature],
                              variants: QAPISchemaVariants) -> None:
         with ifcontext(ifcond, self._genh, self._genc):
index e0666a3afdc81f0f8277a53f3e1e323ca6efc98e..4115eaa121aabc17a417f5ef495512d44c6f98f1 100755 (executable)
@@ -4,7 +4,7 @@
 qemu_target_list="i386 i486 alpha arm armeb sparc sparc32plus sparc64 \
 ppc ppc64 ppc64le m68k mips mipsel mipsn32 mipsn32el mips64 mips64el \
 sh4 sh4eb s390x aarch64 aarch64_be hppa riscv32 riscv64 xtensa xtensaeb \
-microblaze microblazeel or1k x86_64"
+microblaze microblazeel or1k x86_64 hexagon loongarch64"
 
 i386_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00'
 i386_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
@@ -60,28 +60,28 @@ m68k_family=m68k
 
 # FIXME: We could use the other endianness on a MIPS host.
 
-mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08'
-mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20'
 mips_family=mips
 
-mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00'
-mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
+mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00'
 mipsel_family=mips
 
-mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08'
-mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20'
+mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20'
 mipsn32_family=mips
 
-mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00'
-mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00'
+mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00'
 mipsn32el_family=mips
 
 mips64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08'
-mips64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+mips64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
 mips64_family=mips
 
 mips64el_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00'
-mips64el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+mips64el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\x00\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
 mips64el_family=mips
 
 sh4_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a\x00'
@@ -136,6 +136,14 @@ or1k_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\
 or1k_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
 or1k_family=or1k
 
+hexagon_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xa4\x00'
+hexagon_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+hexagon_family=hexagon
+
+loongarch64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02\x01'
+loongarch64_mask='\xff\xff\xff\xff\xff\xff\xff\xfc\x00\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+loongarch64_family=loongarch
+
 qemu_get_family() {
     cpu=${HOST_ARCH:-$(uname -m)}
     case "$cpu" in
@@ -163,6 +171,9 @@ qemu_get_family() {
     riscv*)
         echo "riscv"
         ;;
+    loongarch*)
+        echo "loongarch"
+        ;;
     *)
         echo "$cpu"
         ;;
@@ -174,25 +185,27 @@ usage() {
 Usage: qemu-binfmt-conf.sh [--qemu-path PATH][--debian][--systemd CPU]
                            [--help][--credential yes|no][--exportdir PATH]
                            [--persistent yes|no][--qemu-suffix SUFFIX]
+                           [--preserve-argv0 yes|no]
 
        Configure binfmt_misc to use qemu interpreter
 
-       --help:        display this usage
-       --qemu-path:   set path to qemu interpreter ($QEMU_PATH)
-       --qemu-suffix: add a suffix to the default interpreter name
-       --debian:      don't write into /proc,
-                      instead generate update-binfmts templates
-       --systemd:     don't write into /proc,
-                      instead generate file for systemd-binfmt.service
-                      for the given CPU. If CPU is "ALL", generate a
-                      file for all known cpus
-       --exportdir:   define where to write configuration files
-                      (default: $SYSTEMDDIR or $DEBIANDIR)
-       --credential:  if yes, credential and security tokens are
-                      calculated according to the binary to interpret
-       --persistent:  if yes, the interpreter is loaded when binfmt is
-                      configured and remains in memory. All future uses
-                      are cloned from the open file.
+       --help:          display this usage
+       --qemu-path:     set path to qemu interpreter ($QEMU_PATH)
+       --qemu-suffix:   add a suffix to the default interpreter name
+       --debian:        don't write into /proc,
+                        instead generate update-binfmts templates
+       --systemd:       don't write into /proc,
+                        instead generate file for systemd-binfmt.service
+                        for the given CPU. If CPU is "ALL", generate a
+                        file for all known cpus
+       --exportdir:     define where to write configuration files
+                        (default: $SYSTEMDDIR or $DEBIANDIR)
+       --credential:    if yes, credential and security tokens are
+                        calculated according to the binary to interpret
+       --persistent:    if yes, the interpreter is loaded when binfmt is
+                        configured and remains in memory. All future uses
+                        are cloned from the open file.
+       --preserve-argv0 preserve argv[0]
 
     To import templates with update-binfmts, use :
 
@@ -265,8 +278,11 @@ qemu_generate_register() {
     if [ "$PERSISTENT" = "yes" ] ; then
         flags="${flags}F"
     fi
+    if [ "$PRESERVE_ARG0" = "yes" ] ; then
+        flags="${flags}P"
+    fi
 
-    echo ":qemu-$cpu:M::$magic:$mask:$qemu:P$flags"
+    echo ":qemu-$cpu:M::$magic:$mask:$qemu:$flags"
 }
 
 qemu_register_interpreter() {
@@ -285,7 +301,9 @@ package qemu-$cpu
 interpreter $qemu
 magic $magic
 mask $mask
-credential $CREDENTIAL
+credentials $CREDENTIAL
+preserve $PRESERVE_ARG0
+fix_binary $PERSISTENT
 EOF
 }
 
@@ -305,9 +323,9 @@ qemu_set_binfmts() {
             continue
         fi
 
-        qemu="$QEMU_PATH/qemu-$cpu-binfmt"
+        qemu="$QEMU_PATH/qemu-$cpu"
         if [ "$cpu" = "i486" ] ; then
-            qemu="$QEMU_PATH/qemu-i386-binfmt"
+            qemu="$QEMU_PATH/qemu-i386"
         fi
 
         qemu="$qemu$QEMU_SUFFIX"
@@ -326,9 +344,12 @@ DEBIANDIR="/usr/share/binfmts"
 QEMU_PATH=/usr/bin
 CREDENTIAL=no
 PERSISTENT=no
+PRESERVE_ARG0=no
 QEMU_SUFFIX=""
 
-options=$(getopt -o ds:Q:S:e:hc:p: -l debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,persistent: -- "$@")
+_longopts="debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,\
+persistent:,preserve-argv0:"
+options=$(getopt -o ds:Q:S:e:hc:p:g:F: -l ${_longopts} -- "$@")
 eval set -- "$options"
 
 while true ; do
@@ -384,6 +405,10 @@ while true ; do
         shift
         PERSISTENT="$1"
         ;;
+    -g|--preserve-argv0)
+        shift
+        PRESERVE_ARG0="$1"
+        ;;
     *)
         break
         ;;
index e0bfa7b5a4e87ea069cffbf60e997e65b02f1859..4d2a9f6c430ade56697a5d349579d733b266f823 100644 (file)
@@ -40,6 +40,7 @@ timers.TimersCommand()
 
 coroutine.CoroutineSPFunction()
 coroutine.CoroutinePCFunction()
+coroutine.CoroutineBt()
 
 # Default to silently passing through SIGUSR1, because QEMU sends it
 # to itself a lot.
old mode 100755 (executable)
new mode 100644 (file)
diff --git a/scripts/qemu-stamp.py b/scripts/qemu-stamp.py
new file mode 100644 (file)
index 0000000..7beeeb0
--- /dev/null
@@ -0,0 +1,24 @@
+#! /usr/bin/env python3
+
+# Usage: scripts/qemu-stamp.py STRING1 STRING2... -- FILE1 FILE2...
+import hashlib
+import os
+import sys
+
+sha = hashlib.sha1()
+is_file = False
+for arg in sys.argv[1:]:
+    if arg == '--':
+        is_file = True
+        continue
+    if is_file:
+        with open(arg, 'rb') as f:
+            for chunk in iter(lambda: f.read(65536), b''):
+                sha.update(chunk)
+    else:
+        sha.update(os.fsencode(arg))
+        sha.update(b'\n')
+
+# The hash can start with a digit, which the compiler doesn't
+# like as an symbol. So prefix it with an underscore
+print("_" + sha.hexdigest())
old mode 100755 (executable)
new mode 100644 (file)
index 90527eb..eb6e951
@@ -55,11 +55,6 @@ def tapset_dir(binary):
     return os.path.realpath(tapset)
 
 
-def tapset_env(tapset_dir):
-    tenv = copy.copy(os.environ)
-    tenv["SYSTEMTAP_TAPSET"] = tapset_dir
-    return tenv
-
 def cmd_run(args):
     prefix = probe_prefix(args.binary)
     tapsets = tapset_dir(args.binary)
@@ -81,11 +76,11 @@ def cmd_run(args):
 
     # We request an 8MB buffer, since the stap default 1MB buffer
     # can be easily overflowed by frequently firing QEMU traces
-    stapargs = ["stap", "-s", "8"]
+    stapargs = ["stap", "-s", "8", "-I", tapsets ]
     if args.pid is not None:
         stapargs.extend(["-x", args.pid])
     stapargs.extend(["-e", script])
-    subprocess.call(stapargs, env=tapset_env(tapsets))
+    subprocess.call(stapargs)
 
 
 def cmd_list(args):
@@ -101,10 +96,9 @@ def cmd_list(args):
 
         if verbose:
             print("Listing probes with name '%s'" % script)
-        proc = subprocess.Popen(["stap", "-l", script],
+        proc = subprocess.Popen(["stap", "-I", tapsets, "-l", script],
                                 stdout=subprocess.PIPE,
-                                universal_newlines=True,
-                                env=tapset_env(tapsets))
+                                universal_newlines=True)
         out, err = proc.communicate()
         if proc.returncode != 0:
             print("No probes found, are the tapsets installed in %s" % tapset_dir(args.binary))
index db6138902262a1ec3ede99e358988e6632d43889..7db46d4b6841acf7e09e8d77f6114fdd1532085d 100644 (file)
@@ -70,6 +70,11 @@ def bt_jmpbuf(jmpbuf):
     regs = get_jmpbuf_regs(jmpbuf)
     old = dict()
 
+    # remember current stack frame and select the topmost
+    # so that register modifications don't wreck it
+    selected_frame = gdb.selected_frame()
+    gdb.newest_frame().select()
+
     for i in regs:
         old[i] = gdb.parse_and_eval('(uint64_t)$%s' % i)
 
@@ -81,8 +86,13 @@ def bt_jmpbuf(jmpbuf):
     for i in regs:
         gdb.execute('set $%s = %s' % (i, old[i]))
 
+    selected_frame.select()
+
+def co_cast(co):
+    return co.cast(gdb.lookup_type('CoroutineUContext').pointer())
+
 def coroutine_to_jmpbuf(co):
-    coroutine_pointer = co.cast(gdb.lookup_type('CoroutineUContext').pointer())
+    coroutine_pointer = co_cast(co)
     return coroutine_pointer['env']['__jmpbuf']
 
 
@@ -100,6 +110,29 @@ class CoroutineCommand(gdb.Command):
 
         bt_jmpbuf(coroutine_to_jmpbuf(gdb.parse_and_eval(argv[0])))
 
+class CoroutineBt(gdb.Command):
+    '''Display backtrace including coroutine switches'''
+    def __init__(self):
+        gdb.Command.__init__(self, 'qemu bt', gdb.COMMAND_STACK,
+                             gdb.COMPLETE_NONE)
+
+    def invoke(self, arg, from_tty):
+
+        gdb.execute("bt")
+
+        if gdb.parse_and_eval("qemu_in_coroutine()") == False:
+            return
+
+        co_ptr = gdb.parse_and_eval("qemu_coroutine_self()")
+
+        while True:
+            co = co_cast(co_ptr)
+            co_ptr = co["base"]["caller"]
+            if co_ptr == 0:
+                break
+            gdb.write("Coroutine at " + str(co_ptr) + ":\n")
+            bt_jmpbuf(coroutine_to_jmpbuf(co_ptr))
+
 class CoroutineSPFunction(gdb.Function):
     def __init__(self):
         gdb.Function.__init__(self, 'qemu_coroutine_sp')
old mode 100755 (executable)
new mode 100644 (file)
index ce12298..56edd02
 #!/usr/bin/env python3
 
-# QEMU Guest Agent Client
-#
-# Copyright (C) 2012 Ryota Ozaki <ozaki.ryota@gmail.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2.  See
-# the COPYING file in the top-level directory.
-#
-# Usage:
-#
-# Start QEMU with:
-#
-# # qemu [...] -chardev socket,path=/tmp/qga.sock,server,nowait,id=qga0 \
-#   -device virtio-serial -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0
-#
-# Run the script:
-#
-# $ qemu-ga-client --address=/tmp/qga.sock <command> [args...]
-#
-# or
-#
-# $ export QGA_CLIENT_ADDRESS=/tmp/qga.sock
-# $ qemu-ga-client <command> [args...]
-#
-# For example:
-#
-# $ qemu-ga-client cat /etc/resolv.conf
-# # Generated by NetworkManager
-# nameserver 10.0.2.3
-# $ qemu-ga-client fsfreeze status
-# thawed
-# $ qemu-ga-client fsfreeze freeze
-# 2 filesystems frozen
-#
-# See also: https://wiki.qemu.org/Features/QAPI/GuestAgent
-#
-
 import os
 import sys
-import base64
-import random
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu import qmp
-
-
-class QemuGuestAgent(qmp.QEMUMonitorProtocol):
-    def __getattr__(self, name):
-        def wrapper(**kwds):
-            return self.command('guest-' + name.replace('_', '-'), **kwds)
-        return wrapper
-
-
-class QemuGuestAgentClient:
-    error = QemuGuestAgent.error
-
-    def __init__(self, address):
-        self.qga = QemuGuestAgent(address)
-        self.qga.connect(negotiate=False)
-
-    def sync(self, timeout=3):
-        # Avoid being blocked forever
-        if not self.ping(timeout):
-            raise EnvironmentError('Agent seems not alive')
-        uid = random.randint(0, (1 << 32) - 1)
-        while True:
-            ret = self.qga.sync(id=uid)
-            if isinstance(ret, int) and int(ret) == uid:
-                break
-
-    def __file_read_all(self, handle):
-        eof = False
-        data = ''
-        while not eof:
-            ret = self.qga.file_read(handle=handle, count=1024)
-            _data = base64.b64decode(ret['buf-b64'])
-            data += _data
-            eof = ret['eof']
-        return data
-
-    def read(self, path):
-        handle = self.qga.file_open(path=path)
-        try:
-            data = self.__file_read_all(handle)
-        finally:
-            self.qga.file_close(handle=handle)
-        return data
-
-    def info(self):
-        info = self.qga.info()
-
-        msgs = []
-        msgs.append('version: ' + info['version'])
-        msgs.append('supported_commands:')
-        enabled = [c['name'] for c in info['supported_commands'] if c['enabled']]
-        msgs.append('\tenabled: ' + ', '.join(enabled))
-        disabled = [c['name'] for c in info['supported_commands'] if not c['enabled']]
-        msgs.append('\tdisabled: ' + ', '.join(disabled))
-
-        return '\n'.join(msgs)
-
-    def __gen_ipv4_netmask(self, prefixlen):
-        mask = int('1' * prefixlen + '0' * (32 - prefixlen), 2)
-        return '.'.join([str(mask >> 24),
-                         str((mask >> 16) & 0xff),
-                         str((mask >> 8) & 0xff),
-                         str(mask & 0xff)])
-
-    def ifconfig(self):
-        nifs = self.qga.network_get_interfaces()
-
-        msgs = []
-        for nif in nifs:
-            msgs.append(nif['name'] + ':')
-            if 'ip-addresses' in nif:
-                for ipaddr in nif['ip-addresses']:
-                    if ipaddr['ip-address-type'] == 'ipv4':
-                        addr = ipaddr['ip-address']
-                        mask = self.__gen_ipv4_netmask(int(ipaddr['prefix']))
-                        msgs.append("\tinet %s  netmask %s" % (addr, mask))
-                    elif ipaddr['ip-address-type'] == 'ipv6':
-                        addr = ipaddr['ip-address']
-                        prefix = ipaddr['prefix']
-                        msgs.append("\tinet6 %s  prefixlen %s" % (addr, prefix))
-            if nif['hardware-address'] != '00:00:00:00:00:00':
-                msgs.append("\tether " + nif['hardware-address'])
-
-        return '\n'.join(msgs)
-
-    def ping(self, timeout):
-        self.qga.settimeout(timeout)
-        try:
-            self.qga.ping()
-        except self.qga.timeout:
-            return False
-        return True
-
-    def fsfreeze(self, cmd):
-        if cmd not in ['status', 'freeze', 'thaw']:
-            raise Exception('Invalid command: ' + cmd)
-
-        return getattr(self.qga, 'fsfreeze' + '_' + cmd)()
-
-    def fstrim(self, minimum=0):
-        return getattr(self.qga, 'fstrim')(minimum=minimum)
-
-    def suspend(self, mode):
-        if mode not in ['disk', 'ram', 'hybrid']:
-            raise Exception('Invalid mode: ' + mode)
-
-        try:
-            getattr(self.qga, 'suspend' + '_' + mode)()
-            # On error exception will raise
-        except self.qga.timeout:
-            # On success command will timed out
-            return
-
-    def shutdown(self, mode='powerdown'):
-        if mode not in ['powerdown', 'halt', 'reboot']:
-            raise Exception('Invalid mode: ' + mode)
-
-        try:
-            self.qga.shutdown(mode=mode)
-        except self.qga.timeout:
-            return
-
-
-def _cmd_cat(client, args):
-    if len(args) != 1:
-        print('Invalid argument')
-        print('Usage: cat <file>')
-        sys.exit(1)
-    print(client.read(args[0]))
-
-
-def _cmd_fsfreeze(client, args):
-    usage = 'Usage: fsfreeze status|freeze|thaw'
-    if len(args) != 1:
-        print('Invalid argument')
-        print(usage)
-        sys.exit(1)
-    if args[0] not in ['status', 'freeze', 'thaw']:
-        print('Invalid command: ' + args[0])
-        print(usage)
-        sys.exit(1)
-    cmd = args[0]
-    ret = client.fsfreeze(cmd)
-    if cmd == 'status':
-        print(ret)
-    elif cmd == 'freeze':
-        print("%d filesystems frozen" % ret)
-    else:
-        print("%d filesystems thawed" % ret)
-
-
-def _cmd_fstrim(client, args):
-    if len(args) == 0:
-        minimum = 0
-    else:
-        minimum = int(args[0])
-    print(client.fstrim(minimum))
-
-
-def _cmd_ifconfig(client, args):
-    print(client.ifconfig())
-
-
-def _cmd_info(client, args):
-    print(client.info())
-
-
-def _cmd_ping(client, args):
-    if len(args) == 0:
-        timeout = 3
-    else:
-        timeout = float(args[0])
-    alive = client.ping(timeout)
-    if not alive:
-        print("Not responded in %s sec" % args[0])
-        sys.exit(1)
-
-
-def _cmd_suspend(client, args):
-    usage = 'Usage: suspend disk|ram|hybrid'
-    if len(args) != 1:
-        print('Less argument')
-        print(usage)
-        sys.exit(1)
-    if args[0] not in ['disk', 'ram', 'hybrid']:
-        print('Invalid command: ' + args[0])
-        print(usage)
-        sys.exit(1)
-    client.suspend(args[0])
-
-
-def _cmd_shutdown(client, args):
-    client.shutdown()
-_cmd_powerdown = _cmd_shutdown
-
-
-def _cmd_halt(client, args):
-    client.shutdown('halt')
-
-
-def _cmd_reboot(client, args):
-    client.shutdown('reboot')
-
-
-commands = [m.replace('_cmd_', '') for m in dir() if '_cmd_' in m]
-
-
-def main(address, cmd, args):
-    if not os.path.exists(address):
-        print('%s not found' % address)
-        sys.exit(1)
-
-    if cmd not in commands:
-        print('Invalid command: ' + cmd)
-        print('Available commands: ' + ', '.join(commands))
-        sys.exit(1)
-
-    try:
-        client = QemuGuestAgentClient(address)
-    except QemuGuestAgent.error as e:
-        import errno
-
-        print(e)
-        if e.errno == errno.ECONNREFUSED:
-            print('Hint: qemu is not running?')
-        sys.exit(1)
-
-    if cmd == 'fsfreeze' and args[0] == 'freeze':
-        client.sync(60)
-    elif cmd != 'ping':
-        client.sync()
-
-    globals()['_cmd_' + cmd](client, args)
+from qemu.utils import qemu_ga_client
 
 
 if __name__ == '__main__':
-    import sys
-    import os
-    import optparse
-
-    address = os.environ['QGA_CLIENT_ADDRESS'] if 'QGA_CLIENT_ADDRESS' in os.environ else None
-
-    usage = "%prog [--address=<unix_path>|<ipv4_address>] <command> [args...]\n"
-    usage += '<command>: ' + ', '.join(commands)
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option('--address', action='store', type='string',
-                      default=address, help='Specify a ip:port pair or a unix socket path')
-    options, args = parser.parse_args()
-
-    address = options.address
-    if address is None:
-        parser.error('address is not specified')
-        sys.exit(1)
-
-    if len(args) == 0:
-        parser.error('Less argument')
-        sys.exit(1)
-
-    main(address, args[0], args[1:])
+    sys.exit(qemu_ga_client.main())
old mode 100755 (executable)
new mode 100644 (file)
old mode 100755 (executable)
new mode 100644 (file)
index b4d0609..4a20f97
 #!/usr/bin/env python3
-#
-# Low-level QEMU shell on top of QMP.
-#
-# Copyright (C) 2009, 2010 Red Hat Inc.
-#
-# Authors:
-#  Luiz Capitulino <lcapitulino@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2.  See
-# the COPYING file in the top-level directory.
-#
-# Usage:
-#
-# Start QEMU with:
-#
-# # qemu [...] -qmp unix:./qmp-sock,server
-#
-# Run the shell:
-#
-# $ qmp-shell ./qmp-sock
-#
-# Commands have the following format:
-#
-#    < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
-#
-# For example:
-#
-# (QEMU) device_add driver=e1000 id=net1
-# {u'return': {}}
-# (QEMU)
-#
-# key=value pairs also support Python or JSON object literal subset notations,
-# without spaces. Dictionaries/objects {} are supported as are arrays [].
-#
-#    example-command arg-name1={'key':'value','obj'={'prop':"value"}}
-#
-# Both JSON and Python formatting should work, including both styles of
-# string literal quotes. Both paradigms of literal values should work,
-# including null/true/false for JSON and None/True/False for Python.
-#
-#
-# Transactions have the following multi-line format:
-#
-#    transaction(
-#    action-name1 [ arg-name1=arg1 ] ... [arg-nameN=argN ]
-#    ...
-#    action-nameN [ arg-name1=arg1 ] ... [arg-nameN=argN ]
-#    )
-#
-# One line transactions are also supported:
-#
-#    transaction( action-name1 ... )
-#
-# For example:
-#
-#     (QEMU) transaction(
-#     TRANS> block-dirty-bitmap-add node=drive0 name=bitmap1
-#     TRANS> block-dirty-bitmap-clear node=drive0 name=bitmap0
-#     TRANS> )
-#     {"return": {}}
-#     (QEMU)
-#
-# Use the -v and -p options to activate the verbose and pretty-print options,
-# which will echo back the properly formatted JSON-compliant QMP that is being
-# sent to QEMU, which is useful for debugging and documentation generation.
 
-import json
-import ast
-import readline
-import sys
 import os
-import errno
-import atexit
-import re
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu import qmp
-
-class QMPCompleter(list):
-    def complete(self, text, state):
-        for cmd in self:
-            if cmd.startswith(text):
-                if not state:
-                    return cmd
-                else:
-                    state -= 1
-
-class QMPShellError(Exception):
-    pass
-
-class QMPShellBadPort(QMPShellError):
-    pass
-
-class FuzzyJSON(ast.NodeTransformer):
-    '''This extension of ast.NodeTransformer filters literal "true/false/null"
-    values in an AST and replaces them by proper "True/False/None" values that
-    Python can properly evaluate.'''
-    def visit_Name(self, node):
-        if node.id == 'true':
-            node.id = 'True'
-        if node.id == 'false':
-            node.id = 'False'
-        if node.id == 'null':
-            node.id = 'None'
-        return node
-
-# TODO: QMPShell's interface is a bit ugly (eg. _fill_completion() and
-#       _execute_cmd()). Let's design a better one.
-class QMPShell(qmp.QEMUMonitorProtocol):
-    def __init__(self, address, pretty=False):
-        super(QMPShell, self).__init__(self.__get_address(address))
-        self._greeting = None
-        self._completer = None
-        self._pretty = pretty
-        self._transmode = False
-        self._actions = list()
-        self._histfile = os.path.join(os.path.expanduser('~'),
-                                      '.qmp-shell_history')
-
-    def __get_address(self, arg):
-        """
-        Figure out if the argument is in the port:host form, if it's not it's
-        probably a file path.
-        """
-        addr = arg.split(':')
-        if len(addr) == 2:
-            try:
-                port = int(addr[1])
-            except ValueError:
-                raise QMPShellBadPort
-            return ( addr[0], port )
-        # socket path
-        return arg
-
-    def _fill_completion(self):
-        cmds = self.cmd('query-commands')
-        if 'error' in cmds:
-            return
-        for cmd in cmds['return']:
-            self._completer.append(cmd['name'])
-
-    def __completer_setup(self):
-        self._completer = QMPCompleter()
-        self._fill_completion()
-        readline.set_history_length(1024)
-        readline.set_completer(self._completer.complete)
-        readline.parse_and_bind("tab: complete")
-        # XXX: default delimiters conflict with some command names (eg. query-),
-        # clearing everything as it doesn't seem to matter
-        readline.set_completer_delims('')
-        try:
-            readline.read_history_file(self._histfile)
-        except Exception as e:
-            if isinstance(e, IOError) and e.errno == errno.ENOENT:
-                # File not found. No problem.
-                pass
-            else:
-                print("Failed to read history '%s'; %s" % (self._histfile, e))
-        atexit.register(self.__save_history)
-
-    def __save_history(self):
-        try:
-            readline.write_history_file(self._histfile)
-        except Exception as e:
-            print("Failed to save history file '%s'; %s" % (self._histfile, e))
-
-    def __parse_value(self, val):
-        try:
-            return int(val)
-        except ValueError:
-            pass
-
-        if val.lower() == 'true':
-            return True
-        if val.lower() == 'false':
-            return False
-        if val.startswith(('{', '[')):
-            # Try first as pure JSON:
-            try:
-                return json.loads(val)
-            except ValueError:
-                pass
-            # Try once again as FuzzyJSON:
-            try:
-                st = ast.parse(val, mode='eval')
-                return ast.literal_eval(FuzzyJSON().visit(st))
-            except SyntaxError:
-                pass
-            except ValueError:
-                pass
-        return val
-
-    def __cli_expr(self, tokens, parent):
-        for arg in tokens:
-            (key, sep, val) = arg.partition('=')
-            if sep != '=':
-                raise QMPShellError("Expected a key=value pair, got '%s'" % arg)
-
-            value = self.__parse_value(val)
-            optpath = key.split('.')
-            curpath = []
-            for p in optpath[:-1]:
-                curpath.append(p)
-                d = parent.get(p, {})
-                if type(d) is not dict:
-                    raise QMPShellError('Cannot use "%s" as both leaf and non-leaf key' % '.'.join(curpath))
-                parent[p] = d
-                parent = d
-            if optpath[-1] in parent:
-                if type(parent[optpath[-1]]) is dict:
-                    raise QMPShellError('Cannot use "%s" as both leaf and non-leaf key' % '.'.join(curpath))
-                else:
-                    raise QMPShellError('Cannot set "%s" multiple times' % key)
-            parent[optpath[-1]] = value
-
-    def __build_cmd(self, cmdline):
-        """
-        Build a QMP input object from a user provided command-line in the
-        following format:
-
-            < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
-        """
-        cmdargs = re.findall(r'''(?:[^\s"']|"(?:\\.|[^"])*"|'(?:\\.|[^'])*')+''', cmdline)
-
-        # Transactional CLI entry/exit:
-        if cmdargs[0] == 'transaction(':
-            self._transmode = True
-            cmdargs.pop(0)
-        elif cmdargs[0] == ')' and self._transmode:
-            self._transmode = False
-            if len(cmdargs) > 1:
-                raise QMPShellError("Unexpected input after close of Transaction sub-shell")
-            qmpcmd = { 'execute': 'transaction',
-                       'arguments': { 'actions': self._actions } }
-            self._actions = list()
-            return qmpcmd
-
-        # Nothing to process?
-        if not cmdargs:
-            return None
-
-        # Parse and then cache this Transactional Action
-        if self._transmode:
-            finalize = False
-            action = { 'type': cmdargs[0], 'data': {} }
-            if cmdargs[-1] == ')':
-                cmdargs.pop(-1)
-                finalize = True
-            self.__cli_expr(cmdargs[1:], action['data'])
-            self._actions.append(action)
-            return self.__build_cmd(')') if finalize else None
-
-        # Standard command: parse and return it to be executed.
-        qmpcmd = { 'execute': cmdargs[0], 'arguments': {} }
-        self.__cli_expr(cmdargs[1:], qmpcmd['arguments'])
-        return qmpcmd
-
-    def _print(self, qmp):
-        indent = None
-        if self._pretty:
-            indent = 4
-        jsobj = json.dumps(qmp, indent=indent, sort_keys=self._pretty)
-        print(str(jsobj))
-
-    def _execute_cmd(self, cmdline):
-        try:
-            qmpcmd = self.__build_cmd(cmdline)
-        except Exception as e:
-            print('Error while parsing command line: %s' % e)
-            print('command format: <command-name> ', end=' ')
-            print('[arg-name1=arg1] ... [arg-nameN=argN]')
-            return True
-        # For transaction mode, we may have just cached the action:
-        if qmpcmd is None:
-            return True
-        if self._verbose:
-            self._print(qmpcmd)
-        resp = self.cmd_obj(qmpcmd)
-        if resp is None:
-            print('Disconnected')
-            return False
-        self._print(resp)
-        return True
-
-    def connect(self, negotiate):
-        self._greeting = super(QMPShell, self).connect(negotiate)
-        self.__completer_setup()
-
-    def show_banner(self, msg='Welcome to the QMP low-level shell!'):
-        print(msg)
-        if not self._greeting:
-            print('Connected')
-            return
-        version = self._greeting['QMP']['version']['qemu']
-        print('Connected to QEMU %d.%d.%d\n' % (version['major'],version['minor'],version['micro']))
-
-    def get_prompt(self):
-        if self._transmode:
-            return "TRANS> "
-        return "(QEMU) "
-
-    def read_exec_command(self, prompt):
-        """
-        Read and execute a command.
-
-        @return True if execution was ok, return False if disconnected.
-        """
-        try:
-            cmdline = input(prompt)
-        except EOFError:
-            print()
-            return False
-        if cmdline == '':
-            for ev in self.get_events():
-                print(ev)
-            self.clear_events()
-            return True
-        else:
-            return self._execute_cmd(cmdline)
-
-    def set_verbosity(self, verbose):
-        self._verbose = verbose
-
-class HMPShell(QMPShell):
-    def __init__(self, address):
-        QMPShell.__init__(self, address)
-        self.__cpu_index = 0
-
-    def __cmd_completion(self):
-        for cmd in self.__cmd_passthrough('help')['return'].split('\r\n'):
-            if cmd and cmd[0] != '[' and cmd[0] != '\t':
-                name = cmd.split()[0] # drop help text
-                if name == 'info':
-                    continue
-                if name.find('|') != -1:
-                    # Command in the form 'foobar|f' or 'f|foobar', take the
-                    # full name
-                    opt = name.split('|')
-                    if len(opt[0]) == 1:
-                        name = opt[1]
-                    else:
-                        name = opt[0]
-                self._completer.append(name)
-                self._completer.append('help ' + name) # help completion
-
-    def __info_completion(self):
-        for cmd in self.__cmd_passthrough('info')['return'].split('\r\n'):
-            if cmd:
-                self._completer.append('info ' + cmd.split()[1])
-
-    def __other_completion(self):
-        # special cases
-        self._completer.append('help info')
-
-    def _fill_completion(self):
-        self.__cmd_completion()
-        self.__info_completion()
-        self.__other_completion()
-
-    def __cmd_passthrough(self, cmdline, cpu_index = 0):
-        return self.cmd_obj({ 'execute': 'human-monitor-command', 'arguments':
-                              { 'command-line': cmdline,
-                                'cpu-index': cpu_index } })
-
-    def _execute_cmd(self, cmdline):
-        if cmdline.split()[0] == "cpu":
-            # trap the cpu command, it requires special setting
-            try:
-                idx = int(cmdline.split()[1])
-                if not 'return' in self.__cmd_passthrough('info version', idx):
-                    print('bad CPU index')
-                    return True
-                self.__cpu_index = idx
-            except ValueError:
-                print('cpu command takes an integer argument')
-                return True
-        resp = self.__cmd_passthrough(cmdline, self.__cpu_index)
-        if resp is None:
-            print('Disconnected')
-            return False
-        assert 'return' in resp or 'error' in resp
-        if 'return' in resp:
-            # Success
-            if len(resp['return']) > 0:
-                print(resp['return'], end=' ')
-        else:
-            # Error
-            print('%s: %s' % (resp['error']['class'], resp['error']['desc']))
-        return True
-
-    def show_banner(self):
-        QMPShell.show_banner(self, msg='Welcome to the HMP shell!')
-
-def die(msg):
-    sys.stderr.write('ERROR: %s\n' % msg)
-    sys.exit(1)
-
-def fail_cmdline(option=None):
-    if option:
-        sys.stderr.write('ERROR: bad command-line option \'%s\'\n' % option)
-    sys.stderr.write('qmp-shell [ -v ] [ -p ] [ -H ] [ -N ] < UNIX socket path> | < TCP address:port >\n')
-    sys.stderr.write('    -v     Verbose (echo command sent and received)\n')
-    sys.stderr.write('    -p     Pretty-print JSON\n')
-    sys.stderr.write('    -H     Use HMP interface\n')
-    sys.stderr.write('    -N     Skip negotiate (for qemu-ga)\n')
-    sys.exit(1)
-
-def main():
-    addr = ''
-    qemu = None
-    hmp = False
-    pretty = False
-    verbose = False
-    negotiate = True
-
-    try:
-        for arg in sys.argv[1:]:
-            if arg == "-H":
-                if qemu is not None:
-                    fail_cmdline(arg)
-                hmp = True
-            elif arg == "-p":
-                pretty = True
-            elif arg == "-N":
-                negotiate = False
-            elif arg == "-v":
-                verbose = True
-            else:
-                if qemu is not None:
-                    fail_cmdline(arg)
-                if hmp:
-                    qemu = HMPShell(arg)
-                else:
-                    qemu = QMPShell(arg, pretty)
-                addr = arg
-
-        if qemu is None:
-            fail_cmdline()
-    except QMPShellBadPort:
-        die('bad port number in command-line')
-
-    try:
-        qemu.connect(negotiate)
-    except qmp.QMPConnectError:
-        die('Didn\'t get QMP greeting message')
-    except qmp.QMPCapabilitiesError:
-        die('Could not negotiate capabilities')
-    except qemu.error:
-        die('Could not connect to %s' % addr)
+from qemu.qmp import qmp_shell
 
-    qemu.show_banner()
-    qemu.set_verbosity(verbose)
-    while qemu.read_exec_command(qemu.get_prompt()):
-        pass
-    qemu.close()
 
 if __name__ == '__main__':
-    main()
+    qmp_shell.main()
diff --git a/scripts/qmp/qmp-shell-wrap b/scripts/qmp/qmp-shell-wrap
new file mode 100644 (file)
index 0000000..9e94da1
--- /dev/null
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
+from qemu.qmp import qmp_shell
+
+
+if __name__ == '__main__':
+    qmp_shell.main_wrap()
old mode 100755 (executable)
new mode 100644 (file)
index 7c7cff8..d453807
 #!/usr/bin/env python3
-##
-# QEMU Object Model test tools
-#
-# Copyright IBM, Corp. 2012
-# Copyright (C) 2020 Red Hat, Inc.
-#
-# Authors:
-#  Anthony Liguori   <aliguori@us.ibm.com>
-#  Markus Armbruster <armbru@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
-# the COPYING file in the top-level directory.
-##
 
-import fuse, stat
-from fuse import FUSE, FuseOSError, Operations
-import os, posix, sys
-from errno import *
+import os
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.qmp import QEMUMonitorProtocol
+from qemu.utils.qom_fuse import QOMFuse
 
-fuse.fuse_python_api = (0, 2)
-
-class QOMFS(Operations):
-    def __init__(self, qmp):
-        self.qmp = qmp
-        self.qmp.connect()
-        self.ino_map = {}
-        self.ino_count = 1
-
-    def get_ino(self, path):
-        if path in self.ino_map:
-            return self.ino_map[path]
-        self.ino_map[path] = self.ino_count
-        self.ino_count += 1
-        return self.ino_map[path]
-
-    def is_object(self, path):
-        try:
-            items = self.qmp.command('qom-list', path=path)
-            return True
-        except:
-            return False
-
-    def is_property(self, path):
-        path, prop = path.rsplit('/', 1)
-        if path == '':
-            path = '/'
-        try:
-            for item in self.qmp.command('qom-list', path=path):
-                if item['name'] == prop:
-                    return True
-            return False
-        except:
-            return False
-
-    def is_link(self, path):
-        path, prop = path.rsplit('/', 1)
-        if path == '':
-            path = '/'
-        try:
-            for item in self.qmp.command('qom-list', path=path):
-                if item['name'] == prop:
-                    if item['type'].startswith('link<'):
-                        return True
-                    return False
-            return False
-        except:
-            return False
-
-    def read(self, path, length, offset, fh):
-        if not self.is_property(path):
-            return -ENOENT
-
-        path, prop = path.rsplit('/', 1)
-        if path == '':
-            path = '/'
-        try:
-            data = self.qmp.command('qom-get', path=path, property=prop)
-            data += '\n' # make values shell friendly
-        except:
-            raise FuseOSError(EPERM)
-
-        if offset > len(data):
-            return ''
-
-        return bytes(data[offset:][:length], encoding='utf-8')
-
-    def readlink(self, path):
-        if not self.is_link(path):
-            return False
-        path, prop = path.rsplit('/', 1)
-        prefix = '/'.join(['..'] * (len(path.split('/')) - 1))
-        return prefix + str(self.qmp.command('qom-get', path=path,
-                                             property=prop))
-
-    def getattr(self, path, fh=None):
-        if self.is_link(path):
-            value = { 'st_mode': 0o755 | stat.S_IFLNK,
-                      'st_ino': self.get_ino(path),
-                      'st_dev': 0,
-                      'st_nlink': 2,
-                      'st_uid': 1000,
-                      'st_gid': 1000,
-                      'st_size': 4096,
-                      'st_atime': 0,
-                      'st_mtime': 0,
-                      'st_ctime': 0 }
-        elif self.is_object(path):
-            value = { 'st_mode': 0o755 | stat.S_IFDIR,
-                      'st_ino': self.get_ino(path),
-                      'st_dev': 0,
-                      'st_nlink': 2,
-                      'st_uid': 1000,
-                      'st_gid': 1000,
-                      'st_size': 4096,
-                      'st_atime': 0,
-                      'st_mtime': 0,
-                      'st_ctime': 0 }
-        elif self.is_property(path):
-            value = { 'st_mode': 0o644 | stat.S_IFREG,
-                      'st_ino': self.get_ino(path),
-                      'st_dev': 0,
-                      'st_nlink': 1,
-                      'st_uid': 1000,
-                      'st_gid': 1000,
-                      'st_size': 4096,
-                      'st_atime': 0,
-                      'st_mtime': 0,
-                      'st_ctime': 0 }
-        else:
-            raise FuseOSError(ENOENT)
-        return value
-
-    def readdir(self, path, fh):
-        yield '.'
-        yield '..'
-        for item in self.qmp.command('qom-list', path=path):
-            yield str(item['name'])
 
 if __name__ == '__main__':
-    import os
-
-    fuse = FUSE(QOMFS(QEMUMonitorProtocol(os.environ['QMP_SOCKET'])),
-                sys.argv[1], foreground=True)
+    sys.exit(QOMFuse.entry_point())
old mode 100755 (executable)
new mode 100644 (file)
index 666df71..04ebe05
@@ -1,69 +1,11 @@
 #!/usr/bin/env python3
-##
-# QEMU Object Model test tools
-#
-# Copyright IBM, Corp. 2011
-#
-# Authors:
-#  Anthony Liguori   <aliguori@us.ibm.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
-# the COPYING file in the top-level directory.
-##
 
-import sys
 import os
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.qmp import QEMUMonitorProtocol
-
-cmd, args = sys.argv[0], sys.argv[1:]
-socket_path = None
-path = None
-prop = None
-
-def usage():
-    return '''environment variables:
-    QMP_SOCKET=<path | addr:port>
-usage:
-    %s [-h] [-s <QMP socket path | addr:port>] <path>.<property>
-''' % cmd
-
-def usage_error(error_msg = "unspecified error"):
-    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
-    exit(1)
-
-if len(args) > 0:
-    if args[0] == "-h":
-        print(usage())
-        exit(0);
-    elif args[0] == "-s":
-        try:
-            socket_path = args[1]
-        except:
-            usage_error("missing argument: QMP socket path or address");
-        args = args[2:]
-
-if not socket_path:
-    if 'QMP_SOCKET' in os.environ:
-        socket_path = os.environ['QMP_SOCKET']
-    else:
-        usage_error("no QMP socket path or address given");
-
-if len(args) > 0:
-    try:
-        path, prop = args[0].rsplit('.', 1)
-    except:
-        usage_error("invalid format for path/property/value")
-else:
-    usage_error("not enough arguments")
+from qemu.utils.qom import QOMGet
 
-srv = QEMUMonitorProtocol(socket_path)
-srv.connect()
 
-rsp = srv.command('qom-get', path=path, property=prop)
-if type(rsp) == dict:
-    for i in rsp.keys():
-        print('%s: %s' % (i, rsp[i]))
-else:
-    print(rsp)
+if __name__ == '__main__':
+    sys.exit(QOMGet.entry_point())
old mode 100755 (executable)
new mode 100644 (file)
index 5074fd9..853b85a
@@ -1,66 +1,11 @@
 #!/usr/bin/env python3
-##
-# QEMU Object Model test tools
-#
-# Copyright IBM, Corp. 2011
-#
-# Authors:
-#  Anthony Liguori   <aliguori@us.ibm.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
-# the COPYING file in the top-level directory.
-##
 
-import sys
 import os
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.qmp import QEMUMonitorProtocol
-
-cmd, args = sys.argv[0], sys.argv[1:]
-socket_path = None
-path = None
-prop = None
-
-def usage():
-    return '''environment variables:
-    QMP_SOCKET=<path | addr:port>
-usage:
-    %s [-h] [-s <QMP socket path | addr:port>] [<path>]
-''' % cmd
-
-def usage_error(error_msg = "unspecified error"):
-    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
-    exit(1)
-
-if len(args) > 0:
-    if args[0] == "-h":
-        print(usage())
-        exit(0);
-    elif args[0] == "-s":
-        try:
-            socket_path = args[1]
-        except:
-            usage_error("missing argument: QMP socket path or address");
-        args = args[2:]
-
-if not socket_path:
-    if 'QMP_SOCKET' in os.environ:
-        socket_path = os.environ['QMP_SOCKET']
-    else:
-        usage_error("no QMP socket path or address given");
-
-srv = QEMUMonitorProtocol(socket_path)
-srv.connect()
+from qemu.utils.qom import QOMList
 
-if len(args) == 0:
-    print('/')
-    sys.exit(0)
 
-for item in srv.command('qom-list', path=args[0]):
-    if item['type'].startswith('child<'):
-        print('%s/' % item['name'])
-    elif item['type'].startswith('link<'):
-        print('@%s/' % item['name'])
-    else:
-        print('%s' % item['name'])
+if __name__ == '__main__':
+    sys.exit(QOMList.entry_point())
old mode 100755 (executable)
new mode 100644 (file)
index 240a781..06820fe
@@ -1,66 +1,11 @@
 #!/usr/bin/env python3
-##
-# QEMU Object Model test tools
-#
-# Copyright IBM, Corp. 2011
-#
-# Authors:
-#  Anthony Liguori   <aliguori@us.ibm.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
-# the COPYING file in the top-level directory.
-##
 
-import sys
 import os
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.qmp import QEMUMonitorProtocol
-
-cmd, args = sys.argv[0], sys.argv[1:]
-socket_path = None
-path = None
-prop = None
-value = None
-
-def usage():
-    return '''environment variables:
-    QMP_SOCKET=<path | addr:port>
-usage:
-    %s [-h] [-s <QMP socket path | addr:port>] <path>.<property> <value>
-''' % cmd
-
-def usage_error(error_msg = "unspecified error"):
-    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
-    exit(1)
-
-if len(args) > 0:
-    if args[0] == "-h":
-        print(usage())
-        exit(0);
-    elif args[0] == "-s":
-        try:
-            socket_path = args[1]
-        except:
-            usage_error("missing argument: QMP socket path or address");
-        args = args[2:]
-
-if not socket_path:
-    if 'QMP_SOCKET' in os.environ:
-        socket_path = os.environ['QMP_SOCKET']
-    else:
-        usage_error("no QMP socket path or address given");
-
-if len(args) > 1:
-    try:
-        path, prop = args[0].rsplit('.', 1)
-    except:
-        usage_error("invalid format for path/property/value")
-    value = args[1]
-else:
-    usage_error("not enough arguments")
+from qemu.utils.qom import QOMSet
 
-srv = QEMUMonitorProtocol(socket_path)
-srv.connect()
 
-print(srv.command('qom-set', path=path, property=prop, value=value))
+if __name__ == '__main__':
+    sys.exit(QOMSet.entry_point())
old mode 100755 (executable)
new mode 100644 (file)
index 25b0781..760e172
@@ -1,77 +1,11 @@
 #!/usr/bin/env python3
-##
-# QEMU Object Model test tools
-#
-# Copyright IBM, Corp. 2011
-# Copyright (c) 2013 SUSE LINUX Products GmbH
-#
-# Authors:
-#  Anthony Liguori   <aliguori@amazon.com>
-#  Andreas Faerber   <afaerber@suse.de>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
-# the COPYING file in the top-level directory.
-##
 
-import sys
 import os
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.qmp import QEMUMonitorProtocol
-
-cmd, args = sys.argv[0], sys.argv[1:]
-socket_path = None
-path = None
-prop = None
-
-def usage():
-    return '''environment variables:
-    QMP_SOCKET=<path | addr:port>
-usage:
-    %s [-h] [-s <QMP socket path | addr:port>] [<path>]
-''' % cmd
-
-def usage_error(error_msg = "unspecified error"):
-    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
-    exit(1)
-
-if len(args) > 0:
-    if args[0] == "-h":
-        print(usage())
-        exit(0);
-    elif args[0] == "-s":
-        try:
-            socket_path = args[1]
-        except:
-            usage_error("missing argument: QMP socket path or address");
-        args = args[2:]
-
-if not socket_path:
-    if 'QMP_SOCKET' in os.environ:
-        socket_path = os.environ['QMP_SOCKET']
-    else:
-        usage_error("no QMP socket path or address given");
-
-srv = QEMUMonitorProtocol(socket_path)
-srv.connect()
-
-def list_node(path):
-    print('%s' % path)
-    items = srv.command('qom-list', path=path)
-    for item in items:
-        if not item['type'].startswith('child<'):
-            try:
-                print('  %s: %s (%s)' % (item['name'], srv.command('qom-get', path=path, property=item['name']), item['type']))
-            except:
-                print('  %s: <EXCEPTION> (%s)' % (item['name'], item['type']))
-    print('')
-    for item in items:
-        if item['type'].startswith('child<'):
-            list_node((path if (path != '/') else '')  + '/' + item['name'])
+from qemu.utils.qom import QOMTree
 
-if len(args) == 0:
-    path = '/'
-else:
-    path = args[0]
 
-list_node(path)
+if __name__ == '__main__':
+    sys.exit(QOMTree.entry_point())
diff --git a/scripts/qom-cast-macro-clean-cocci-gen.py b/scripts/qom-cast-macro-clean-cocci-gen.py
new file mode 100644 (file)
index 0000000..2fa8438
--- /dev/null
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+#
+# Generate a Coccinelle semantic patch to remove pointless QOM cast.
+#
+# Usage:
+#
+# $ qom-cast-macro-clean-cocci-gen.py $(git ls-files) > qom_pointless_cast.cocci
+# $ spatch \
+#           --macro-file scripts/cocci-macro-file.h \
+#           --sp-file qom_pointless_cast.cocci \
+#           --keep-comments \
+#           --use-gitgrep \
+#           --in-place \
+#           --dir .
+#
+# SPDX-FileContributor: Philippe Mathieu-Daudé <philmd@linaro.org>
+# SPDX-FileCopyrightText: 2023 Linaro Ltd.
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import re
+import sys
+
+assert len(sys.argv) > 0
+
+def print_cocci_rule(qom_typedef, qom_cast_macro):
+    print(f'''@@
+typedef {qom_typedef};
+{qom_typedef} *obj;
+@@
+-    {qom_cast_macro}(obj)
++    obj
+''')
+
+patterns = [
+    r'DECLARE_INSTANCE_CHECKER\((\w+),\W*(\w+),\W*TYPE_\w+\)',
+    r'DECLARE_OBJ_CHECKERS\((\w+),\W*\w+,\W*(\w+),\W*TYPE_\w+\)',
+    r'OBJECT_DECLARE_TYPE\((\w+),\W*\w+,\W*(\w+)\)',
+    r'OBJECT_DECLARE_SIMPLE_TYPE\((\w+),\W*(\w+)\)',
+    r'INTERFACE_CHECK\((\w+),\W*\(\w+\),\W*TYPE_(\w+)\)',
+]
+
+for fn in sys.argv[1:]:
+    try:
+        content = open(fn, 'rt').read()
+    except:
+        continue
+    for pattern in patterns:
+        for match in re.findall(pattern, content):
+            print_cocci_rule(match[0], match[1])
old mode 100755 (executable)
new mode 100644 (file)
index da6acf0..3e1a2e3
@@ -25,17 +25,14 @@ import json
 from graphviz import Digraph
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
-from qemu.qmp import (
-    QEMUMonitorProtocol,
-    QMPResponseError,
-)
+from qemu.qmp import QMPError
+from qemu.qmp.legacy import QEMUMonitorProtocol
 
 
 def perm(arr):
     s = 'w' if 'write' in arr else '_'
     s += 'r' if 'consistent-read' in arr else '_'
     s += 'u' if 'write-unchanged' in arr else '_'
-    s += 'g' if 'graph-mod' in arr else '_'
     s += 's' if 'resize' in arr else '_'
     return s
 
@@ -46,13 +43,13 @@ def render_block_graph(qmp, filename, format='png'):
     representation in @format into "@filename.@format"
     '''
 
-    bds_nodes = qmp.command('query-named-block-nodes')
+    bds_nodes = qmp.cmd('query-named-block-nodes')
     bds_nodes = {n['node-name']: n for n in bds_nodes}
 
-    job_nodes = qmp.command('query-block-jobs')
+    job_nodes = qmp.cmd('query-block-jobs')
     job_nodes = {n['device']: n for n in job_nodes}
 
-    block_graph = qmp.command('x-debug-query-block-graph')
+    block_graph = qmp.cmd('x-debug-query-block-graph')
 
     graph = Digraph(comment='Block Nodes Graph')
     graph.format = format
@@ -97,7 +94,7 @@ class LibvirtGuest():
     def __init__(self, name):
         self.name = name
 
-    def command(self, cmd):
+    def cmd(self, cmd):
         # only supports qmp commands without parameters
         m = {'execute': cmd}
         ar = ['virsh', 'qemu-monitor-command', self.name, json.dumps(m)]
@@ -105,7 +102,7 @@ class LibvirtGuest():
         reply = json.loads(subprocess.check_output(ar))
 
         if 'error' in reply:
-            raise QMPResponseError(reply)
+            raise QMPError(reply)
 
         return reply['return']
 
old mode 100755 (executable)
new mode 100644 (file)
index 3ba97a6..b89dc29
@@ -111,7 +111,7 @@ def print_event(eid, name, string=None, event_count=None):
 # Decoders for each event type
 
 def decode_unimp(eid, name, _unused_dumpfile):
-    "Unimplimented decoder, will trigger exit"
+    "Unimplemented decoder, will trigger exit"
     print("%s not handled - will now stop" % (name))
     return False
 
diff --git a/scripts/shaderinclude.pl b/scripts/shaderinclude.pl
deleted file mode 100644 (file)
index cd3bb40..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env perl
-use strict;
-use warnings;
-
-my $file = shift;
-open FILE, "<", $file or die "open $file: $!";
-my $name = $file;
-$name =~ s|.*/||;
-$name =~ s/[-.]/_/g;
-print "static GLchar ${name}_src[] =\n";
-while (<FILE>) {
-    chomp;
-    printf "    \"%s\\n\"\n", $_;
-}
-print "    \"\\n\";\n";
-close FILE;
diff --git a/scripts/shaderinclude.py b/scripts/shaderinclude.py
new file mode 100644 (file)
index 0000000..ab2aade
--- /dev/null
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2023 Red Hat, Inc.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import sys
+import os
+
+
+def main(args):
+    file_path = args[1]
+    basename = os.path.basename(file_path)
+    varname = basename.replace('-', '_').replace('.', '_')
+
+    with os.fdopen(sys.stdout.fileno(), "wt", closefd=False, newline='\n') as stdout:
+        with open(file_path, "r", encoding='utf-8') as file:
+            print(f'static GLchar {varname}_src[] =', file=stdout)
+            for line in file:
+                line = line.rstrip()
+                print(f'    "{line}\\n"', file=stdout)
+            print('    "\\n";', file=stdout)
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv))
diff --git a/scripts/show-fixed-bugs.sh b/scripts/show-fixed-bugs.sh
deleted file mode 100755 (executable)
index a095a4d..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/bin/sh
-
-# This script checks the git log for URLs to the QEMU launchpad bugtracker
-# and optionally checks whether the corresponding bugs are not closed yet.
-
-show_help () {
-    echo "Usage:"
-    echo "  -s <commit>  : Start searching at this commit"
-    echo "  -e <commit>  : End searching at this commit"
-    echo "  -c           : Check if bugs are still open"
-    echo "  -b           : Open bugs in browser"
-}
-
-while getopts "s:e:cbh" opt; do
-   case "$opt" in
-    s)  start="$OPTARG" ;;
-    e)  end="$OPTARG" ;;
-    c)  check_if_open=1 ;;
-    b)  show_in_browser=1 ;;
-    h)  show_help ; exit 0 ;;
-    *)   echo "Use -h for help." ; exit 1 ;;
-   esac
-done
-
-if [ "x$start" = "x" ]; then
-    start=$(git tag -l 'v[0-9]*\.[0-9]*\.0' | tail -n 2 | head -n 1)
-fi
-if [ "x$end" = "x" ]; then
-    end=$(git tag -l  'v[0-9]*\.[0-9]*\.0' | tail -n 1)
-fi
-
-if [ "x$start" = "x" ] || [ "x$end" = "x" ]; then
-    echo "Could not determine start or end revision ... Please note that this"
-    echo "script must be run from a checked out git repository of QEMU."
-    exit 1
-fi
-
-echo "Searching git log for bugs in the range $start..$end"
-
-urlstr='https://bugs.launchpad.net/\(bugs\|qemu/+bug\)/'
-bug_urls=$(git log $start..$end \
-  | sed -n '\,'"$urlstr"', s,\(.*\)\('"$urlstr"'\)\([0-9]*\).*,\2\4,p' \
-  | sort -u)
-
-echo Found bug URLs:
-for i in $bug_urls ; do echo " $i" ; done
-
-if [ "x$check_if_open" = "x1" ]; then
-    echo
-    echo "Checking which ones are still open..."
-    for i in $bug_urls ; do
-        if ! curl -s -L "$i" | grep "value status" | grep -q "Fix Released" ; then
-            echo " $i"
-            final_bug_urls="$final_bug_urls $i"
-        fi
-    done
-else
-    final_bug_urls=$bug_urls
-fi
-
-if [ "x$final_bug_urls" = "x" ]; then
-    echo "No open bugs found."
-elif [ "x$show_in_browser" = "x1" ]; then
-    # Try to determine which browser we should use
-    if [ "x$BROWSER" != "x" ]; then
-        bugbrowser="$BROWSER"
-    elif command -v xdg-open >/dev/null 2>&1; then
-        bugbrowser=xdg-open
-    elif command -v gnome-open >/dev/null 2>&1; then
-        bugbrowser=gnome-open
-    elif [ "$(uname)" = "Darwin" ]; then
-        bugbrowser=open
-    elif command -v sensible-browser >/dev/null 2>&1; then
-        bugbrowser=sensible-browser
-    else
-        echo "Please set the BROWSER variable to the browser of your choice."
-        exit 1
-    fi
-    # Now show the bugs in the browser
-    first=1
-    for i in $final_bug_urls; do
-        "$bugbrowser" "$i"
-        if [ $first = 1 ]; then
-            # if it is the first entry, give the browser some time to start
-            # (to avoid messages like "Firefox is already running, but is
-            # not responding...")
-            sleep 4
-            first=0
-        fi
-    done
-fi
diff --git a/scripts/simplebench/bench-backup.py b/scripts/simplebench/bench-backup.py
new file mode 100644 (file)
index 0000000..5a0675c
--- /dev/null
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+#
+# Bench backup block-job
+#
+# Copyright (c) 2020 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import argparse
+import json
+
+import simplebench
+from results_to_text import results_to_text
+from bench_block_job import bench_block_copy, drv_file, drv_nbd, drv_qcow2
+
+
+def bench_func(env, case):
+    """ Handle one "cell" of benchmarking table. """
+    cmd_options = env['cmd-options'] if 'cmd-options' in env else {}
+    return bench_block_copy(env['qemu-binary'], env['cmd'],
+                            cmd_options,
+                            case['source'], case['target'])
+
+
+def bench(args):
+    test_cases = []
+
+    # paths with colon not supported, so we just split by ':'
+    dirs = dict(d.split(':') for d in args.dir)
+
+    nbd_drv = None
+    if args.nbd:
+        nbd = args.nbd.split(':')
+        host = nbd[0]
+        port = '10809' if len(nbd) == 1 else nbd[1]
+        nbd_drv = drv_nbd(host, port)
+
+    for t in args.test:
+        src, dst = t.split(':')
+
+        if src == 'nbd' and dst == 'nbd':
+            raise ValueError("Can't use 'nbd' label for both src and dst")
+
+        if (src == 'nbd' or dst == 'nbd') and not nbd_drv:
+            raise ValueError("'nbd' label used but --nbd is not given")
+
+        if src == 'nbd':
+            source = nbd_drv
+        elif args.qcow2_sources:
+            source = drv_qcow2(drv_file(dirs[src] + '/test-source.qcow2'))
+        else:
+            source = drv_file(dirs[src] + '/test-source')
+
+        if dst == 'nbd':
+            test_cases.append({'id': t, 'source': source, 'target': nbd_drv})
+            continue
+
+        if args.target_cache == 'both':
+            target_caches = ['direct', 'cached']
+        else:
+            target_caches = [args.target_cache]
+
+        for c in target_caches:
+            o_direct = c == 'direct'
+            fname = dirs[dst] + '/test-target'
+            if args.compressed:
+                fname += '.qcow2'
+            target = drv_file(fname, o_direct=o_direct)
+            if args.compressed:
+                target = drv_qcow2(target)
+
+            test_id = t
+            if args.target_cache == 'both':
+                test_id += f'({c})'
+
+            test_cases.append({'id': test_id, 'source': source,
+                               'target': target})
+
+    binaries = []  # list of (<label>, <path>, [<options>])
+    for i, q in enumerate(args.env):
+        name_path = q.split(':')
+        if len(name_path) == 1:
+            label = f'q{i}'
+            path_opts = name_path[0].split(',')
+        else:
+            assert len(name_path) == 2  # paths with colon not supported
+            label = name_path[0]
+            path_opts = name_path[1].split(',')
+
+        binaries.append((label, path_opts[0], path_opts[1:]))
+
+    test_envs = []
+
+    bin_paths = {}
+    for i, q in enumerate(args.env):
+        opts = q.split(',')
+        label_path = opts[0]
+        opts = opts[1:]
+
+        if ':' in label_path:
+            # path with colon inside is not supported
+            label, path = label_path.split(':')
+            bin_paths[label] = path
+        elif label_path in bin_paths:
+            label = label_path
+            path = bin_paths[label]
+        else:
+            path = label_path
+            label = f'q{i}'
+            bin_paths[label] = path
+
+        x_perf = {}
+        is_mirror = False
+        for opt in opts:
+            if opt == 'mirror':
+                is_mirror = True
+            elif opt == 'copy-range=on':
+                x_perf['use-copy-range'] = True
+            elif opt == 'copy-range=off':
+                x_perf['use-copy-range'] = False
+            elif opt.startswith('max-workers='):
+                x_perf['max-workers'] = int(opt.split('=')[1])
+
+        backup_options = {}
+        if x_perf:
+            backup_options['x-perf'] = x_perf
+
+        if args.compressed:
+            backup_options['compress'] = True
+
+        if is_mirror:
+            assert not x_perf
+            test_envs.append({
+                    'id': f'mirror({label})',
+                    'cmd': 'blockdev-mirror',
+                    'qemu-binary': path
+                })
+        else:
+            test_envs.append({
+                'id': f'backup({label})\n' + '\n'.join(opts),
+                'cmd': 'blockdev-backup',
+                'cmd-options': backup_options,
+                'qemu-binary': path
+            })
+
+    result = simplebench.bench(bench_func, test_envs, test_cases,
+                               count=args.count, initial_run=args.initial_run,
+                               drop_caches=args.drop_caches)
+    with open('results.json', 'w') as f:
+        json.dump(result, f, indent=4)
+    print(results_to_text(result))
+
+
+class ExtendAction(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        items = getattr(namespace, self.dest) or []
+        items.extend(values)
+        setattr(namespace, self.dest, items)
+
+
+if __name__ == '__main__':
+    p = argparse.ArgumentParser('Backup benchmark', epilog='''
+ENV format
+
+    (LABEL:PATH|LABEL|PATH)[,max-workers=N][,use-copy-range=(on|off)][,mirror]
+
+    LABEL                short name for the binary
+    PATH                 path to the binary
+    max-workers          set x-perf.max-workers of backup job
+    use-copy-range       set x-perf.use-copy-range of backup job
+    mirror               use mirror job instead of backup''',
+                                formatter_class=argparse.RawTextHelpFormatter)
+    p.add_argument('--env', nargs='+', help='''\
+Qemu binaries with labels and options, see below
+"ENV format" section''',
+                   action=ExtendAction)
+    p.add_argument('--dir', nargs='+', help='''\
+Directories, each containing "test-source" and/or
+"test-target" files, raw images to used in
+benchmarking. File path with label, like
+label:/path/to/directory''',
+                   action=ExtendAction)
+    p.add_argument('--nbd', help='''\
+host:port for remote NBD image, (or just host, for
+default port 10809). Use it in tests, label is "nbd"
+(but you cannot create test nbd:nbd).''')
+    p.add_argument('--test', nargs='+', help='''\
+Tests, in form source-dir-label:target-dir-label''',
+                   action=ExtendAction)
+    p.add_argument('--compressed', help='''\
+Use compressed backup. It automatically means
+automatically creating qcow2 target with
+lazy_refcounts for each test run''', action='store_true')
+    p.add_argument('--qcow2-sources', help='''\
+Use test-source.qcow2 images as sources instead of
+test-source raw images''', action='store_true')
+    p.add_argument('--target-cache', help='''\
+Setup cache for target nodes. Options:
+   direct: default, use O_DIRECT and aio=native
+   cached: use system cache (Qemu default) and aio=threads (Qemu default)
+   both: generate two test cases for each src:dst pair''',
+                   default='direct', choices=('direct', 'cached', 'both'))
+
+    p.add_argument('--count', type=int, default=3, help='''\
+Number of test runs per table cell''')
+
+    # BooleanOptionalAction helps to support --no-initial-run option
+    p.add_argument('--initial-run', action=argparse.BooleanOptionalAction,
+                   help='''\
+Do additional initial run per cell which doesn't count in result,
+default true''')
+
+    p.add_argument('--drop-caches', action='store_true', help='''\
+Do "sync; echo 3 > /proc/sys/vm/drop_caches" before each test run''')
+
+    bench(p.parse_args())
index c642a5b89154ca555b842637c4909adf0254ee09..fc370691e04f73a472c58e6141619e30548c76f9 100644 (file)
 #
 
 import simplebench
+from results_to_text import results_to_text
 from bench_block_job import bench_block_copy, drv_file, drv_nbd
 
 
 def bench_func(env, case):
     """ Handle one "cell" of benchmarking table. """
-    return bench_block_copy(env['qemu_binary'], env['cmd'],
+    return bench_block_copy(env['qemu_binary'], env['cmd'], {},
                             case['source'], case['target'])
 
 
@@ -77,4 +78,4 @@ test_envs = [
 ]
 
 result = simplebench.bench(bench_func, test_envs, test_cases, count=3)
-print(simplebench.ascii(result))
+print(results_to_text(result))
old mode 100755 (executable)
new mode 100644 (file)
index 9808d69..e575a3a
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # Benchmark block jobs
 #
 
 import sys
 import os
+import subprocess
 import socket
 import json
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
 from qemu.machine import QEMUMachine
-from qemu.qmp import QMPConnectError
+from qemu.qmp import ConnectError
 
 
 def bench_block_job(cmd, cmd_args, qemu_args):
@@ -38,7 +39,7 @@ def bench_block_job(cmd, cmd_args, qemu_args):
                  binary
 
     Returns {'seconds': int} on success and {'error': str} on failure, dict may
-    contain addional 'vm-log' field. Return value is compatible with
+    contain additional 'vm-log' field. Return value is compatible with
     simplebench lib.
     """
 
@@ -48,7 +49,7 @@ def bench_block_job(cmd, cmd_args, qemu_args):
         vm.launch()
     except OSError as e:
         return {'error': 'popen failed: ' + str(e)}
-    except (QMPConnectError, socket.timeout):
+    except (ConnectError, socket.timeout):
         return {'error': 'qemu failed: ' + str(vm.get_log())}
 
     try:
@@ -69,6 +70,10 @@ def bench_block_job(cmd, cmd_args, qemu_args):
             vm.shutdown()
             return {'error': 'block-job failed: ' + str(e),
                     'vm-log': vm.get_log()}
+        if 'error' in e['data']:
+            vm.shutdown()
+            return {'error': 'block-job failed: ' + e['data']['error'],
+                    'vm-log': vm.get_log()}
         end_ms = e['timestamp']['seconds'] * 1000000 + \
             e['timestamp']['microseconds']
     finally:
@@ -77,25 +82,55 @@ def bench_block_job(cmd, cmd_args, qemu_args):
     return {'seconds': (end_ms - start_ms) / 1000000.0}
 
 
+def get_image_size(path):
+    out = subprocess.run(['qemu-img', 'info', '--out=json', path],
+                         stdout=subprocess.PIPE, check=True).stdout
+    return json.loads(out)['virtual-size']
+
+
+def get_blockdev_size(obj):
+    img = obj['filename'] if 'filename' in obj else obj['file']['filename']
+    return get_image_size(img)
+
+
 # Bench backup or mirror
-def bench_block_copy(qemu_binary, cmd, source, target):
+def bench_block_copy(qemu_binary, cmd, cmd_options, source, target):
     """Helper to run bench_block_job() for mirror or backup"""
     assert cmd in ('blockdev-backup', 'blockdev-mirror')
 
+    if target['driver'] == 'qcow2':
+        try:
+            os.remove(target['file']['filename'])
+        except OSError:
+            pass
+
+        subprocess.run(['qemu-img', 'create', '-f', 'qcow2',
+                        target['file']['filename'],
+                        str(get_blockdev_size(source))],
+                       stdout=subprocess.DEVNULL,
+                       stderr=subprocess.DEVNULL, check=True)
+
     source['node-name'] = 'source'
     target['node-name'] = 'target'
 
-    return bench_block_job(cmd,
-                           {'job-id': 'job0', 'device': 'source',
-                            'target': 'target', 'sync': 'full'},
+    cmd_options['job-id'] = 'job0'
+    cmd_options['device'] = 'source'
+    cmd_options['target'] = 'target'
+    cmd_options['sync'] = 'full'
+
+    return bench_block_job(cmd, cmd_options,
                            [qemu_binary,
                             '-blockdev', json.dumps(source),
                             '-blockdev', json.dumps(target)])
 
 
-def drv_file(filename):
-    return {'driver': 'file', 'filename': filename,
-            'cache': {'direct': True}, 'aio': 'native'}
+def drv_file(filename, o_direct=True):
+    node = {'driver': 'file', 'filename': filename}
+    if o_direct:
+        node['cache'] = {'direct': True}
+        node['aio'] = 'native'
+
+    return node
 
 
 def drv_nbd(host, port):
@@ -103,6 +138,10 @@ def drv_nbd(host, port):
             'server': {'type': 'inet', 'host': host, 'port': port}}
 
 
+def drv_qcow2(file):
+    return {'driver': 'qcow2', 'file': file}
+
+
 if __name__ == '__main__':
     import sys
 
diff --git a/scripts/simplebench/bench_prealloc.py b/scripts/simplebench/bench_prealloc.py
new file mode 100644 (file)
index 0000000..85f588c
--- /dev/null
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+#
+# Benchmark preallocate filter
+#
+# Copyright (c) 2020 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+
+import sys
+import os
+import subprocess
+import re
+import json
+
+import simplebench
+from results_to_text import results_to_text
+
+
+def qemu_img_bench(args):
+    p = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                       universal_newlines=True)
+
+    if p.returncode == 0:
+        try:
+            m = re.search(r'Run completed in (\d+.\d+) seconds.', p.stdout)
+            return {'seconds': float(m.group(1))}
+        except Exception:
+            return {'error': f'failed to parse qemu-img output: {p.stdout}'}
+    else:
+        return {'error': f'qemu-img failed: {p.returncode}: {p.stdout}'}
+
+
+def bench_func(env, case):
+    fname = f"{case['dir']}/prealloc-test.qcow2"
+    try:
+        os.remove(fname)
+    except OSError:
+        pass
+
+    subprocess.run([env['qemu-img-binary'], 'create', '-f', 'qcow2', fname,
+                   '16G'], stdout=subprocess.DEVNULL,
+                   stderr=subprocess.DEVNULL, check=True)
+
+    args = [env['qemu-img-binary'], 'bench', '-c', str(case['count']),
+            '-d', '64', '-s', case['block-size'], '-t', 'none', '-n', '-w']
+    if env['prealloc']:
+        args += ['--image-opts',
+                 'driver=qcow2,file.driver=preallocate,file.file.driver=file,'
+                 f'file.file.filename={fname}']
+    else:
+        args += ['-f', 'qcow2', fname]
+
+    return qemu_img_bench(args)
+
+
+def auto_count_bench_func(env, case):
+    case['count'] = 100
+    while True:
+        res = bench_func(env, case)
+        if 'error' in res:
+            return res
+
+        if res['seconds'] >= 1:
+            break
+
+        case['count'] *= 10
+
+    if res['seconds'] < 5:
+        case['count'] = round(case['count'] * 5 / res['seconds'])
+        res = bench_func(env, case)
+        if 'error' in res:
+            return res
+
+    res['iops'] = case['count'] / res['seconds']
+    return res
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print(f'USAGE: {sys.argv[0]} <qemu-img binary> '
+              'DISK_NAME:DIR_PATH ...')
+        exit(1)
+
+    qemu_img = sys.argv[1]
+
+    envs = [
+        {
+            'id': 'no-prealloc',
+            'qemu-img-binary': qemu_img,
+            'prealloc': False
+        },
+        {
+            'id': 'prealloc',
+            'qemu-img-binary': qemu_img,
+            'prealloc': True
+        }
+    ]
+
+    aligned_cases = []
+    unaligned_cases = []
+
+    for disk in sys.argv[2:]:
+        name, path = disk.split(':')
+        aligned_cases.append({
+            'id': f'{name}, aligned sequential 16k',
+            'block-size': '16k',
+            'dir': path
+        })
+        unaligned_cases.append({
+            'id': f'{name}, unaligned sequential 64k',
+            'block-size': '16k',
+            'dir': path
+        })
+
+    result = simplebench.bench(auto_count_bench_func, envs,
+                               aligned_cases + unaligned_cases, count=5)
+    print(results_to_text(result))
+    with open('results.json', 'w') as f:
+        json.dump(result, f, indent=4)
old mode 100755 (executable)
new mode 100644 (file)
index ca1178f..da601ea
@@ -26,6 +26,7 @@ import sys
 import os
 import subprocess
 import simplebench
+from results_to_text import results_to_text
 
 
 def bench_func(env, case):
@@ -167,4 +168,4 @@ if __name__ == '__main__':
 
     result = simplebench.bench(bench_func, test_envs, test_cases, count=3,
                                initial_run=False)
-    print(simplebench.ascii(result))
+    print(results_to_text(result))
diff --git a/scripts/simplebench/img_bench_templater.py b/scripts/simplebench/img_bench_templater.py
new file mode 100644 (file)
index 0000000..f8e1540
--- /dev/null
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+#
+# Process img-bench test templates
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+
+import sys
+import subprocess
+import re
+import json
+
+import simplebench
+from results_to_text import results_to_text
+from table_templater import Templater
+
+
+def bench_func(env, case):
+    test = templater.gen(env['data'], case['data'])
+
+    p = subprocess.run(test, shell=True, stdout=subprocess.PIPE,
+                       stderr=subprocess.STDOUT, universal_newlines=True)
+
+    if p.returncode == 0:
+        try:
+            m = re.search(r'Run completed in (\d+.\d+) seconds.', p.stdout)
+            return {'seconds': float(m.group(1))}
+        except Exception:
+            return {'error': f'failed to parse qemu-img output: {p.stdout}'}
+    else:
+        return {'error': f'qemu-img failed: {p.returncode}: {p.stdout}'}
+
+
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+        print("""
+Usage: img_bench_templater.py < path/to/test-template.sh
+
+This script generates performance tests from a test template (example below),
+runs them, and displays the results in a table. The template is read from
+stdin.  It must be written in bash and end with a `qemu-img bench` invocation
+(whose result is parsed to get the test instance’s result).
+
+Use the following syntax in the template to create the various different test
+instances:
+
+  column templating: {var1|var2|...} - test will use different values in
+  different columns. You may use several {} constructions in the test, in this
+  case product of all choice-sets will be used.
+
+  row templating: [var1|var2|...] - similar thing to define rows (test-cases)
+
+Test template example:
+
+Assume you want to compare two qemu-img binaries, called qemu-img-old and
+qemu-img-new in your build directory in two test-cases with 4K writes and 64K
+writes. The template may look like this:
+
+qemu_img=/path/to/qemu/build/qemu-img-{old|new}
+$qemu_img create -f qcow2 /ssd/x.qcow2 1G
+$qemu_img bench -c 100 -d 8 [-s 4K|-s 64K] -w -t none -n /ssd/x.qcow2
+
+When passing this to stdin of img_bench_templater.py, the resulting comparison
+table will contain two columns (for two binaries) and two rows (for two
+test-cases).
+
+In addition to displaying the results, script also stores results in JSON
+format into results.json file in current directory.
+""")
+        sys.exit()
+
+    templater = Templater(sys.stdin.read())
+
+    envs = [{'id': ' / '.join(x), 'data': x} for x in templater.columns]
+    cases = [{'id': ' / '.join(x), 'data': x} for x in templater.rows]
+
+    result = simplebench.bench(bench_func, envs, cases, count=5,
+                               initial_run=False)
+    print(results_to_text(result))
+    with open('results.json', 'w') as f:
+        json.dump(result, f, indent=4)
diff --git a/scripts/simplebench/results_to_text.py b/scripts/simplebench/results_to_text.py
new file mode 100644 (file)
index 0000000..d561e5e
--- /dev/null
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+#
+# Simple benchmarking framework
+#
+# Copyright (c) 2019 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import math
+import tabulate
+
+# We want leading whitespace for difference row cells (see below)
+tabulate.PRESERVE_WHITESPACE = True
+
+
+def format_value(x, stdev):
+    stdev_pr = stdev / x * 100
+    if stdev_pr < 1.5:
+        # don't care too much
+        return f'{x:.2g}'
+    else:
+        return f'{x:.2g} ± {math.ceil(stdev_pr)}%'
+
+
+def result_to_text(result):
+    """Return text representation of bench_one() returned dict."""
+    if 'average' in result:
+        s = format_value(result['average'], result['stdev'])
+        if 'n-failed' in result:
+            s += '\n({} failed)'.format(result['n-failed'])
+        return s
+    else:
+        return 'FAILED'
+
+
+def results_dimension(results):
+    dim = None
+    for case in results['cases']:
+        for env in results['envs']:
+            res = results['tab'][case['id']][env['id']]
+            if dim is None:
+                dim = res['dimension']
+            else:
+                assert dim == res['dimension']
+
+    assert dim in ('iops', 'seconds')
+
+    return dim
+
+
+def results_to_text(results):
+    """Return text representation of bench() returned dict."""
+    n_columns = len(results['envs'])
+    named_columns = n_columns > 2
+    dim = results_dimension(results)
+    tab = []
+
+    if named_columns:
+        # Environment columns are named A, B, ...
+        tab.append([''] + [chr(ord('A') + i) for i in range(n_columns)])
+
+    tab.append([''] + [c['id'] for c in results['envs']])
+
+    for case in results['cases']:
+        row = [case['id']]
+        case_results = results['tab'][case['id']]
+        for env in results['envs']:
+            res = case_results[env['id']]
+            row.append(result_to_text(res))
+        tab.append(row)
+
+        # Add row of difference between columns. For each column starting from
+        # B we calculate difference with all previous columns.
+        row = ['', '']  # case name and first column
+        for i in range(1, n_columns):
+            cell = ''
+            env = results['envs'][i]
+            res = case_results[env['id']]
+
+            if 'average' not in res:
+                # Failed result
+                row.append(cell)
+                continue
+
+            for j in range(0, i):
+                env_j = results['envs'][j]
+                res_j = case_results[env_j['id']]
+                cell += ' '
+
+                if 'average' not in res_j:
+                    # Failed result
+                    cell += '--'
+                    continue
+
+                col_j = tab[0][j + 1] if named_columns else ''
+                diff_pr = round((res['average'] - res_j['average']) /
+                                res_j['average'] * 100)
+                cell += f' {col_j}{diff_pr:+}%'
+            row.append(cell)
+        tab.append(row)
+
+    return f'All results are in {dim}\n\n' + tabulate.tabulate(tab)
+
+
+if __name__ == '__main__':
+    import sys
+    import json
+
+    if len(sys.argv) < 2:
+        print(f'USAGE: {sys.argv[0]} results.json')
+        exit(1)
+
+    with open(sys.argv[1]) as f:
+        print(results_to_text(json.load(f)))
index 59e7314ff6f5aa223240f22108449616acc016a6..8efca2af98851d65c6ced0df52742446d0c660eb 100644 (file)
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 
+import statistics
+import subprocess
+import time
 
-def bench_one(test_func, test_env, test_case, count=5, initial_run=True):
+
+def do_drop_caches():
+    subprocess.run('sync; echo 3 > /proc/sys/vm/drop_caches', shell=True,
+                   check=True)
+
+
+def bench_one(test_func, test_env, test_case, count=5, initial_run=True,
+              slow_limit=100, drop_caches=False):
     """Benchmark one test-case
 
     test_func   -- benchmarking function with prototype
                    test_func(env, case), which takes test_env and test_case
-                   arguments and returns {'seconds': int} (which is benchmark
-                   result) on success and {'error': str} on error. Returned
-                   dict may contain any other additional fields.
+                   arguments and on success returns dict with 'seconds' or
+                   'iops' (or both) fields, specifying the benchmark result.
+                   If both 'iops' and 'seconds' provided, the 'iops' is
+                   considered the main, and 'seconds' is just an additional
+                   info. On failure test_func should return {'error': str}.
+                   Returned dict may contain any other additional fields.
     test_env    -- test environment - opaque first argument for test_func
     test_case   -- test case - opaque second argument for test_func
     count       -- how many times to call test_func, to calculate average
     initial_run -- do initial run of test_func, which don't get into result
+    slow_limit  -- stop at slow run (that exceedes the slow_limit by seconds).
+                   (initial run is not measured)
+    drop_caches -- drop caches before each run
 
     Returns dict with the following fields:
         'runs':     list of test_func results
-        'average':  average seconds per run (exists only if at least one run
-                    succeeded)
-        'delta':    maximum delta between test_func result and the average
+        'dimension': dimension of results, may be 'seconds' or 'iops'
+        'average':  average value (iops or seconds) per run (exists only if at
+                    least one run succeeded)
+        'stdev':    standard deviation of results
                     (exists only if at least one run succeeded)
         'n-failed': number of failed runs (exists only if at least one run
                     failed)
     """
     if initial_run:
         print('  #initial run:')
+        do_drop_caches()
         print('   ', test_func(test_env, test_case))
 
     runs = []
     for i in range(count):
+        t = time.time()
+
         print('  #run {}'.format(i+1))
+        do_drop_caches()
         res = test_func(test_env, test_case)
         print('   ', res)
         runs.append(res)
 
-    result = {'runs': runs}
-
-    successed = [r for r in runs if ('seconds' in r)]
-    if successed:
-        avg = sum(r['seconds'] for r in successed) / len(successed)
-        result['average'] = avg
-        result['delta'] = max(abs(r['seconds'] - avg) for r in successed)
+        if time.time() - t > slow_limit:
+            print('    - run is too slow, stop here')
+            break
 
-    if len(successed) < count:
-        result['n-failed'] = count - len(successed)
+    count = len(runs)
 
-    return result
+    result = {'runs': runs}
 
+    succeeded = [r for r in runs if ('seconds' in r or 'iops' in r)]
+    if succeeded:
+        if 'iops' in succeeded[0]:
+            assert all('iops' in r for r in succeeded)
+            dim = 'iops'
+        else:
+            assert all('seconds' in r for r in succeeded)
+            assert all('iops' not in r for r in succeeded)
+            dim = 'seconds'
+        result['dimension'] = dim
+        result['average'] = statistics.mean(r[dim] for r in succeeded)
+        if len(succeeded) == 1:
+            result['stdev'] = 0
+        else:
+            result['stdev'] = statistics.stdev(r[dim] for r in succeeded)
+
+    if len(succeeded) < count:
+        result['n-failed'] = count - len(succeeded)
 
-def ascii_one(result):
-    """Return ASCII representation of bench_one() returned dict."""
-    if 'average' in result:
-        s = '{:.2f} +- {:.2f}'.format(result['average'], result['delta'])
-        if 'n-failed' in result:
-            s += '\n({} failed)'.format(result['n-failed'])
-        return s
-    else:
-        return 'FAILED'
+    return result
 
 
 def bench(test_func, test_envs, test_cases, *args, **vargs):
@@ -112,17 +138,3 @@ def bench(test_func, test_envs, test_cases, *args, **vargs):
 
     print('Done')
     return results
-
-
-def ascii(results):
-    """Return ASCII representation of bench() returned dict."""
-    from tabulate import tabulate
-
-    tab = [[""] + [c['id'] for c in results['envs']]]
-    for case in results['cases']:
-        row = [case['id']]
-        for env in results['envs']:
-            row.append(ascii_one(results['tab'][case['id']][env['id']]))
-        tab.append(row)
-
-    return tabulate(tab)
diff --git a/scripts/simplebench/table_templater.py b/scripts/simplebench/table_templater.py
new file mode 100644 (file)
index 0000000..950f3b3
--- /dev/null
@@ -0,0 +1,62 @@
+# Parser for test templates
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import itertools
+from lark import Lark
+
+grammar = """
+start: ( text | column_switch | row_switch )+
+
+column_switch: "{" text ["|" text]+ "}"
+row_switch: "[" text ["|" text]+ "]"
+text: /[^|{}\[\]]+/
+"""
+
+parser = Lark(grammar)
+
+class Templater:
+    def __init__(self, template):
+        self.tree = parser.parse(template)
+
+        c_switches = []
+        r_switches = []
+        for x in self.tree.children:
+            if x.data == 'column_switch':
+                c_switches.append([el.children[0].value for el in x.children])
+            elif x.data == 'row_switch':
+                r_switches.append([el.children[0].value for el in x.children])
+
+        self.columns = list(itertools.product(*c_switches))
+        self.rows = list(itertools.product(*r_switches))
+
+    def gen(self, column, row):
+        i = 0
+        j = 0
+        result = []
+
+        for x in self.tree.children:
+            if x.data == 'text':
+                result.append(x.children[0].value)
+            elif x.data == 'column_switch':
+                result.append(column[i])
+                i += 1
+            elif x.data == 'row_switch':
+                result.append(row[j])
+                j += 1
+
+        return ''.join(result)
old mode 100755 (executable)
new mode 100644 (file)
index 20f0026..cef81b0
@@ -7,13 +7,22 @@
 # This work is licensed under the terms of the GNU GPL, version 2.  See
 # the COPYING file in the top-level directory.
 #
-# For help see docs/devel/tracing.txt
+# For help see docs/devel/tracing.rst
 
+import sys
 import struct
 import inspect
+import warnings
 from tracetool import read_events, Event
 from tracetool.backend.simple import is_string
 
+__all__ = ['Analyzer', 'Analyzer2', 'process', 'run']
+
+# This is the binary format that the QEMU "simple" trace backend
+# emits. There is no specification documentation because the format is
+# not guaranteed to be stable. Trace files must be parsed with the
+# same trace-events-all file and the same simpletrace.py file that
+# QEMU was built with.
 header_event_id = 0xffffffffffffffff
 header_magic    = 0xf2b177cb0aa429b4
 dropped_event_id = 0xfffffffffffffffe
@@ -23,48 +32,19 @@ record_type_event = 1
 
 log_header_fmt = '=QQQ'
 rec_header_fmt = '=QQII'
+rec_header_fmt_len = struct.calcsize(rec_header_fmt)
+
+class SimpleException(Exception):
+    pass
 
 def read_header(fobj, hfmt):
     '''Read a trace record header'''
     hlen = struct.calcsize(hfmt)
     hdr = fobj.read(hlen)
     if len(hdr) != hlen:
-        return None
+        raise SimpleException('Error reading header. Wrong filetype provided?')
     return struct.unpack(hfmt, hdr)
 
-def get_record(edict, idtoname, rechdr, fobj):
-    """Deserialize a trace record from a file into a tuple
-       (name, timestamp, pid, arg1, ..., arg6)."""
-    if rechdr is None:
-        return None
-    if rechdr[0] != dropped_event_id:
-        event_id = rechdr[0]
-        name = idtoname[event_id]
-        rec = (name, rechdr[1], rechdr[3])
-        try:
-            event = edict[name]
-        except KeyError as e:
-            import sys
-            sys.stderr.write('%s event is logged but is not declared ' \
-                             'in the trace events file, try using ' \
-                             'trace-events-all instead.\n' % str(e))
-            sys.exit(1)
-
-        for type, name in event.args:
-            if is_string(type):
-                l = fobj.read(4)
-                (len,) = struct.unpack('=L', l)
-                s = fobj.read(len)
-                rec = rec + (s,)
-            else:
-                (value,) = struct.unpack('=Q', fobj.read(8))
-                rec = rec + (value,)
-    else:
-        rec = ("dropped", rechdr[1], rechdr[3])
-        (value,) = struct.unpack('=Q', fobj.read(8))
-        rec = rec + (value,)
-    return rec
-
 def get_mapping(fobj):
     (event_id, ) = struct.unpack('=Q', fobj.read(8))
     (len, ) = struct.unpack('=L', fobj.read(4))
@@ -72,41 +52,47 @@ def get_mapping(fobj):
 
     return (event_id, name)
 
-def read_record(edict, idtoname, fobj):
-    """Deserialize a trace record from a file into a tuple (event_num, timestamp, pid, arg1, ..., arg6)."""
-    rechdr = read_header(fobj, rec_header_fmt)
-    return get_record(edict, idtoname, rechdr, fobj)
+def read_record(fobj):
+    """Deserialize a trace record from a file into a tuple (event_num, timestamp, pid, args)."""
+    event_id, timestamp_ns, record_length, record_pid = read_header(fobj, rec_header_fmt)
+    args_payload = fobj.read(record_length - rec_header_fmt_len)
+    return (event_id, timestamp_ns, record_pid, args_payload)
 
 def read_trace_header(fobj):
     """Read and verify trace file header"""
-    header = read_header(fobj, log_header_fmt)
-    if header is None:
-        raise ValueError('Not a valid trace file!')
-    if header[0] != header_event_id:
-        raise ValueError('Not a valid trace file, header id %d != %d' %
-                         (header[0], header_event_id))
-    if header[1] != header_magic:
-        raise ValueError('Not a valid trace file, header magic %d != %d' %
-                         (header[1], header_magic))
-
-    log_version = header[2]
+    _header_event_id, _header_magic, log_version = read_header(fobj, log_header_fmt)
+    if _header_event_id != header_event_id:
+        raise ValueError(f'Not a valid trace file, header id {_header_event_id} != {header_event_id}')
+    if _header_magic != header_magic:
+        raise ValueError(f'Not a valid trace file, header magic {_header_magic} != {header_magic}')
+
     if log_version not in [0, 2, 3, 4]:
-        raise ValueError('Unknown version of tracelog format!')
+        raise ValueError(f'Unknown version {log_version} of tracelog format!')
     if log_version != 4:
-        raise ValueError('Log format %d not supported with this QEMU release!'
-                         % log_version)
-
-def read_trace_records(edict, idtoname, fobj):
-    """Deserialize trace records from a file, yielding record tuples (event_num, timestamp, pid, arg1, ..., arg6).
+        raise ValueError(f'Log format {log_version} not supported with this QEMU release!')
 
-    Note that `idtoname` is modified if the file contains mapping records.
+def read_trace_records(events, fobj, read_header):
+    """Deserialize trace records from a file, yielding record tuples (event, event_num, timestamp, pid, arg1, ..., arg6).
 
     Args:
-        edict (str -> Event): events dict, indexed by name
-        idtoname (int -> str): event names dict, indexed by event ID
+        event_mapping (str -> Event): events dict, indexed by name
         fobj (file): input file
+        read_header (bool): whether headers were read from fobj
 
     """
+    frameinfo = inspect.getframeinfo(inspect.currentframe())
+    dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)",
+                                frameinfo.lineno + 1, frameinfo.filename)
+
+    event_mapping = {e.name: e for e in events}
+    event_mapping["dropped"] = dropped_event
+    event_id_to_name = {dropped_event_id: "dropped"}
+
+    # If there is no header assume event ID mapping matches events list
+    if not read_header:
+        for event_id, event in enumerate(events):
+            event_id_to_name[event_id] = event.name
+
     while True:
         t = fobj.read(8)
         if len(t) == 0:
@@ -114,19 +100,45 @@ def read_trace_records(edict, idtoname, fobj):
 
         (rectype, ) = struct.unpack('=Q', t)
         if rectype == record_type_mapping:
-            event_id, name = get_mapping(fobj)
-            idtoname[event_id] = name
+            event_id, event_name = get_mapping(fobj)
+            event_id_to_name[event_id] = event_name
         else:
-            rec = read_record(edict, idtoname, fobj)
+            event_id, timestamp_ns, pid, args_payload = read_record(fobj)
+            event_name = event_id_to_name[event_id]
+
+            try:
+                event = event_mapping[event_name]
+            except KeyError as e:
+                raise SimpleException(
+                    f'{e} event is logged but is not declared in the trace events'
+                    'file, try using trace-events-all instead.'
+                )
+
+            offset = 0
+            args = []
+            for type, _ in event.args:
+                if is_string(type):
+                    (length,) = struct.unpack_from('=L', args_payload, offset=offset)
+                    offset += 4
+                    s = args_payload[offset:offset+length]
+                    offset += length
+                    args.append(s)
+                else:
+                    (value,) = struct.unpack_from('=Q', args_payload, offset=offset)
+                    offset += 8
+                    args.append(value)
 
-            yield rec
+            yield (event_mapping[event_name], event_name, timestamp_ns, pid) + tuple(args)
 
-class Analyzer(object):
-    """A trace file analyzer which processes trace records.
+class Analyzer:
+    """[Deprecated. Refer to Analyzer2 instead.]
+
+    A trace file analyzer which processes trace records.
 
     An analyzer can be passed to run() or process().  The begin() method is
     invoked, then each trace record is processed, and finally the end() method
-    is invoked.
+    is invoked. When Analyzer is used as a context-manager (using the `with`
+    statement), begin() and end() are called automatically.
 
     If a method matching a trace event name exists, it is invoked to process
     that trace record.  Otherwise the catchall() method is invoked.
@@ -160,42 +172,14 @@ class Analyzer(object):
         """Called if no specific method for processing a trace event has been found."""
         pass
 
-    def end(self):
-        """Called at the end of the trace."""
-        pass
-
-def process(events, log, analyzer, read_header=True):
-    """Invoke an analyzer on each event in a log."""
-    if isinstance(events, str):
-        events = read_events(open(events, 'r'), events)
-    if isinstance(log, str):
-        log = open(log, 'rb')
-
-    if read_header:
-        read_trace_header(log)
-
-    dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)")
-    edict = {"dropped": dropped_event}
-    idtoname = {dropped_event_id: "dropped"}
-
-    for event in events:
-        edict[event.name] = event
-
-    # If there is no header assume event ID mapping matches events list
-    if not read_header:
-        for event_id, event in enumerate(events):
-            idtoname[event_id] = event.name
-
-    def build_fn(analyzer, event):
-        if isinstance(event, str):
-            return analyzer.catchall
-
-        fn = getattr(analyzer, event.name, None)
+    def _build_fn(self, event):
+        fn = getattr(self, event.name, None)
         if fn is None:
-            return analyzer.catchall
+            # Return early to avoid costly call to inspect.getfullargspec
+            return self.catchall
 
         event_argcount = len(event.args)
-        fn_argcount = len(inspect.getargspec(fn)[0]) - 1
+        fn_argcount = len(inspect.getfullargspec(fn)[0]) - 1
         if fn_argcount == event_argcount + 1:
             # Include timestamp as first argument
             return lambda _, rec: fn(*(rec[1:2] + rec[3:3 + event_argcount]))
@@ -206,56 +190,170 @@ def process(events, log, analyzer, read_header=True):
             # Just arguments, no timestamp or pid
             return lambda _, rec: fn(*rec[3:3 + event_argcount])
 
-    analyzer.begin()
-    fn_cache = {}
-    for rec in read_trace_records(edict, idtoname, log):
-        event_num = rec[0]
-        event = edict[event_num]
-        if event_num not in fn_cache:
-            fn_cache[event_num] = build_fn(analyzer, event)
-        fn_cache[event_num](event, rec)
-    analyzer.end()
+    def _process_event(self, rec_args, *, event, event_id, timestamp_ns, pid, **kwargs):
+        warnings.warn(
+            "Use of deprecated Analyzer class. Refer to Analyzer2 instead.",
+            DeprecationWarning,
+        )
+
+        if not hasattr(self, '_fn_cache'):
+            # NOTE: Cannot depend on downstream subclasses to have
+            # super().__init__() because of legacy.
+            self._fn_cache = {}
+
+        rec = (event_id, timestamp_ns, pid, *rec_args)
+        if event_id not in self._fn_cache:
+            self._fn_cache[event_id] = self._build_fn(event)
+        self._fn_cache[event_id](event, rec)
+
+    def end(self):
+        """Called at the end of the trace."""
+        pass
+
+    def __enter__(self):
+        self.begin()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if exc_type is None:
+            self.end()
+        return False
+
+class Analyzer2(Analyzer):
+    """A trace file analyzer which processes trace records.
+
+    An analyzer can be passed to run() or process().  The begin() method is
+    invoked, then each trace record is processed, and finally the end() method
+    is invoked. When Analyzer is used as a context-manager (using the `with`
+    statement), begin() and end() are called automatically.
+
+    If a method matching a trace event name exists, it is invoked to process
+    that trace record.  Otherwise the catchall() method is invoked.
+
+    The methods are called with a set of keyword-arguments. These can be ignored
+    using `**kwargs` or defined like any keyword-argument.
+
+    The following keyword-arguments are available, but make sure to have an
+    **kwargs to allow for unmatched arguments in the future:
+        event: Event object of current trace
+        event_id: The id of the event in the current trace file
+        timestamp_ns: The timestamp in nanoseconds of the trace
+        pid: The process id recorded for the given trace
+
+    Example:
+    The following method handles the runstate_set(int new_state) trace event::
+
+      def runstate_set(self, new_state, **kwargs):
+          ...
+
+    The method can also explicitly take a timestamp keyword-argument with the
+    trace event arguments::
+
+      def runstate_set(self, new_state, *, timestamp_ns, **kwargs):
+          ...
+
+    Timestamps have the uint64_t type and are in nanoseconds.
+
+    The pid can be included in addition to the timestamp and is useful when
+    dealing with traces from multiple processes:
+
+      def runstate_set(self, new_state, *, timestamp_ns, pid, **kwargs):
+          ...
+    """
+
+    def catchall(self, *rec_args, event, timestamp_ns, pid, event_id, **kwargs):
+        """Called if no specific method for processing a trace event has been found."""
+        pass
+
+    def _process_event(self, rec_args, *, event, **kwargs):
+        fn = getattr(self, event.name, self.catchall)
+        fn(*rec_args, event=event, **kwargs)
+
+def process(events, log, analyzer, read_header=True):
+    """Invoke an analyzer on each event in a log.
+    Args:
+        events (file-object or list or str): events list or file-like object or file path as str to read event data from
+        log (file-object or str): file-like object or file path as str to read log data from
+        analyzer (Analyzer): Instance of Analyzer to interpret the event data
+        read_header (bool, optional): Whether to read header data from the log data. Defaults to True.
+    """
+
+    if isinstance(events, str):
+        with open(events, 'r') as f:
+            events_list = read_events(f, events)
+    elif isinstance(events, list):
+        # Treat as a list of events already produced by tracetool.read_events
+        events_list = events
+    else:
+        # Treat as an already opened file-object
+        events_list = read_events(events, events.name)
+
+    if isinstance(log, str):
+        with open(log, 'rb') as log_fobj:
+            _process(events_list, log_fobj, analyzer, read_header)
+    else:
+        # Treat `log` as an already opened file-object. We will not close it,
+        # as we do not own it.
+        _process(events_list, log, analyzer, read_header)
+
+def _process(events, log_fobj, analyzer, read_header=True):
+    """Internal function for processing
+
+    Args:
+        events (list): list of events already produced by tracetool.read_events
+        log_fobj (file): file-object to read log data from
+        analyzer (Analyzer): the Analyzer to interpret the event data
+        read_header (bool, optional): Whether to read header data from the log data. Defaults to True.
+    """
+
+    if read_header:
+        read_trace_header(log_fobj)
+
+    with analyzer:
+        for event, event_id, timestamp_ns, record_pid, *rec_args in read_trace_records(events, log_fobj, read_header):
+            analyzer._process_event(
+                rec_args,
+                event=event,
+                event_id=event_id,
+                timestamp_ns=timestamp_ns,
+                pid=record_pid,
+            )
 
 def run(analyzer):
     """Execute an analyzer on a trace file given on the command-line.
 
     This function is useful as a driver for simple analysis scripts.  More
     advanced scripts will want to call process() instead."""
-    import sys
-
-    read_header = True
-    if len(sys.argv) == 4 and sys.argv[1] == '--no-header':
-        read_header = False
-        del sys.argv[1]
-    elif len(sys.argv) != 3:
-        sys.stderr.write('usage: %s [--no-header] <trace-events> ' \
-                         '<trace-file>\n' % sys.argv[0])
-        sys.exit(1)
 
-    events = read_events(open(sys.argv[1], 'r'), sys.argv[1])
-    process(events, sys.argv[2], analyzer, read_header=read_header)
+    try:
+        # NOTE: See built-in `argparse` module for a more robust cli interface
+        *no_header, trace_event_path, trace_file_path = sys.argv[1:]
+        assert no_header == [] or no_header == ['--no-header'], 'Invalid no-header argument'
+    except (AssertionError, ValueError):
+        raise SimpleException(f'usage: {sys.argv[0]} [--no-header] <trace-events> <trace-file>\n')
+
+    with open(trace_event_path, 'r') as events_fobj, open(trace_file_path, 'rb') as log_fobj:
+        process(events_fobj, log_fobj, analyzer, read_header=not no_header)
 
 if __name__ == '__main__':
-    class Formatter(Analyzer):
+    class Formatter2(Analyzer2):
         def __init__(self):
-            self.last_timestamp = None
-
-        def catchall(self, event, rec):
-            timestamp = rec[1]
-            if self.last_timestamp is None:
-                self.last_timestamp = timestamp
-            delta_ns = timestamp - self.last_timestamp
-            self.last_timestamp = timestamp
-
-            fields = [event.name, '%0.3f' % (delta_ns / 1000.0),
-                      'pid=%d' % rec[2]]
-            i = 3
-            for type, name in event.args:
-                if is_string(type):
-                    fields.append('%s=%s' % (name, rec[i]))
-                else:
-                    fields.append('%s=0x%x' % (name, rec[i]))
-                i += 1
-            print(' '.join(fields))
-
-    run(Formatter())
+            self.last_timestamp_ns = None
+
+        def catchall(self, *rec_args, event, timestamp_ns, pid, event_id):
+            if self.last_timestamp_ns is None:
+                self.last_timestamp_ns = timestamp_ns
+            delta_ns = timestamp_ns - self.last_timestamp_ns
+            self.last_timestamp_ns = timestamp_ns
+
+            fields = [
+                f'{name}={r}' if is_string(type) else f'{name}=0x{r:x}'
+                for r, (type, name) in zip(rec_args, event.args)
+            ]
+            print(f'{event.name} {delta_ns / 1000:0.3f} {pid=} ' + ' '.join(fields))
+
+    try:
+        run(Formatter2())
+    except SimpleException as e:
+        sys.stderr.write(str(e) + "\n")
+        sys.exit(1)
diff --git a/scripts/switch-timer-api b/scripts/switch-timer-api
deleted file mode 100755 (executable)
index 41736d1..0000000
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/env perl
-
-use strict;
-use warnings;
-use Getopt::Long;
-use FindBin;
-
-my @legacy = qw(qemu_clock_ptr qemu_get_clock_ns qemu_get_clock_ms qemu_register_clock_reset_notifier qemu_unregister_clock_reset_notifier qemu_new_timer qemu_free_timer qemu_del_timer qemu_mod_timer_ns qemu_mod_timer qemu_run_timers qemu_new_timer_ns qemu_new_timer_us qemu_new_timer_ms);
-my $legacyre = '\b('.join('|', @legacy).')\b';
-my $option_git;
-my $option_dryrun;
-my $option_quiet;
-my $option_rtc;
-my $suffix=".tmp.$$";
-my @files;
-my $getfiles = 'git grep -l -E \'\b((host|rt|vm|rtc)_clock\b|qemu_\w*timer)\' | egrep \'\.[ch]$\' | egrep -v \'qemu-timer\.c$|include/qemu/timer\.h$\'';
-
-sub Syntax
-{
-    print STDERR <<STOP;
-Usage: $FindBin::Script [options] FILE ...
-
-Translate each FILE to the new QEMU timer API. If no files
-are passed, a reasonable guess is taken.
-
-Options:
-  -q, --quiet     Do not show warnings etc
-  -d, --dry-run   Do a dry run
-  -g, --git       Generate a git commit for each change
-  -r, --rtc       Only fix up rtc usage
-  -h, --help      Print this message
-
-STOP
-return;
-}
-
-sub ParseOptions
-{
-    if (!GetOptions (
-            "dry-run|d" => \$option_dryrun,
-             "git|g" => \$option_git,
-            "quiet|q" => \$option_quiet,
-            "rtc|r" => \$option_rtc,
-             "help|h" => sub { Syntax(); exit(0); }
-        ))
-    {
-        Syntax();
-        die "Bad options";
-    }
-
-    if ($#ARGV >=0)
-    {
-       @files = @ARGV;
-    }
-    else
-    {
-       @files = split(/\s+/, `$getfiles`);
-    }
-
-    foreach my $file (@files)
-    {
-       die "Cannot find $file" unless (-f $file && -r $file);
-    }
-}
-
-sub DoWarn
-{
-    my $text = shift @_;
-    my $line = shift @_;
-    return if ($option_quiet);
-    chomp ($line);
-    print STDERR "$text\n";
-    print STDERR "$line\n\n";
-}
-
-sub Process
-{
-    my $ifn = shift @_;
-    my $ofn = $ifn.$suffix;
-
-    my $intext;
-    my $outtext;
-    my $linenum = 0;
-
-    open my $input, "<", $ifn || die "Cannot open $ifn for read: $!";
-
-    while (<$input>)
-    {
-       my $line = $_;
-       $intext .= $line;
-       $linenum++;
-
-       # fix the specific uses
-       unless ($option_rtc)
-       {
-           $line =~ s/\bqemu_new_timer(_[num]s)\s*\((vm_|rt_|host_)clock\b/timer_new$1(XXX_$2clock/g;
-           $line =~ s/\bqemu_new_timer\s*\((vm_|rt_|host_)clock\b/timer_new(XXX_$1clock/g;
-           $line =~ s/\bqemu_get_clock(_[num]s)\s*\((vm_|rt_|host_)clock\b/qemu_clock_get$1(XXX_$2clock/g;
-       }
-
-       # rtc is different
-       $line =~ s/\bqemu_new_timer(_[num]s)\s*\(rtc_clock\b/timer_new$1(rtc_clock/g;
-       $line =~ s/\bqemu_new_timer\s*\(rtc_clock\b/timer_new(rtc_clock/g;
-       $line =~ s/\bqemu_get_clock(_[num]s)\s*\(rtc_clock\b/qemu_clock_get$1(rtc_clock/g;
-       $line =~ s/\bqemu_register_clock_reset_notifier\s*\(rtc_clock\b/qemu_register_clock_reset_notifier(qemu_clock_ptr(rtc_clock)/g;
-
-       unless ($option_rtc)
-       {
-           # fix up comments
-           $line =~ s/\b(vm_|rt_|host_)clock\b/XXX_$1clock/g if ($line =~ m,^[/ ]+\*,);
-
-           # spurious fprintf error reporting
-           $line =~ s/: qemu_new_timer_ns failed/: timer_new_ns failed/g;
-
-           # these have just changed name
-           $line =~ s/\bqemu_mod_timer\b/timer_mod/g;
-           $line =~ s/\bqemu_mod_timer_(ns|us|ms)\b/timer_mod_$1/g;
-           $line =~ s/\bqemu_free_timer\b/timer_free/g;
-           $line =~ s/\bqemu_del_timer\b/timer_del/g;
-       }
-
-       # fix up rtc_clock
-       $line =~ s/QEMUClock \*rtc_clock;/QEMUClockType rtc_clock;/g;
-       $line =~ s/\brtc_clock = (vm_|rt_|host_)clock\b/rtc_clock = XXX_$1clock/g;
-
-       unless ($option_rtc)
-       {
-           # replace any more general uses
-           $line =~ s/\b(vm_|rt_|host_)clock\b/qemu_clock_ptr(XXX_$1clock)/g;
-       }
-
-       # fix up the place holders
-       $line =~ s/\bXXX_vm_clock\b/QEMU_CLOCK_VIRTUAL/g;
-       $line =~ s/\bXXX_rt_clock\b/QEMU_CLOCK_REALTIME/g;
-       $line =~ s/\bXXX_host_clock\b/QEMU_CLOCK_HOST/g;
-
-       unless ($option_rtc)
-       {
-           DoWarn("$ifn:$linenum WARNING: timer $1 not fixed up", $line) if ($line =~ /\b((vm_|rt_|host_)clock)\b/);
-           DoWarn("$ifn:$linenum WARNING: function $1 not fixed up", $line) if ($line =~ /\b(qemu_new_timer\w+)\b/);
-           DoWarn("$ifn:$linenum WARNING: legacy function $1 remains", $line) if ($line =~ /$legacyre/o);
-       }
-
-       $outtext .= $line;
-    }
-
-    close $input;
-
-    if ($intext ne $outtext)
-    {
-       print STDERR "Patching $ifn\n" unless ($option_quiet);
-       unless ($option_dryrun)
-       {
-           open my $output, ">", $ofn || die "Cannot open $ofn for write: $!";
-           print $output $outtext;
-           close $output;
-           rename ($ofn, $ifn) || die "Cannot rename temp file to $ifn: $!";
-           return 1;
-       }
-    }
-    return 0;
-}
-
-sub DoCommit
-{
-    my $file = shift @_;
-    open (my $git, "| git commit -F - $file") || die "Cannot run git commit on $file: $!";
-    print $git "timers api: use new timer api in $file\n\nConvert $file to use new timer API.\nThis is an automated commit made by scripts/switch-timer-api\n";
-    close ($git);
-}
-
-ParseOptions;
-
-foreach my $file (@files)
-{
-    my $changed = Process ($file);
-    DoCommit($file) if ($changed && $option_git);
-}
diff --git a/scripts/symlink-install-tree.py b/scripts/symlink-install-tree.py
new file mode 100644 (file)
index 0000000..8ed97e3
--- /dev/null
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+from pathlib import PurePath
+import errno
+import json
+import os
+import subprocess
+import sys
+
+def destdir_join(d1: str, d2: str) -> str:
+    if not d1:
+        return d2
+    # c:\destdir + c:\prefix must produce c:\destdir\prefix
+    return str(PurePath(d1, *PurePath(d2).parts[1:]))
+
+introspect = os.environ.get('MESONINTROSPECT')
+out = subprocess.run([*introspect.split(' '), '--installed'],
+                     stdout=subprocess.PIPE, check=True).stdout
+for source, dest in json.loads(out).items():
+    bundle_dest = destdir_join('qemu-bundle', dest)
+    path = os.path.dirname(bundle_dest)
+    try:
+        os.makedirs(path, exist_ok=True)
+    except BaseException as e:
+        print(f'error making directory {path}', file=sys.stderr)
+        raise e
+    try:
+        os.symlink(source, bundle_dest)
+    except BaseException as e:
+        if not isinstance(e, OSError) or e.errno != errno.EEXIST:
+            if os.name == 'nt':
+                print('Please enable Developer Mode to support soft link '
+                      'without Administrator permission')
+            print(f'error making symbolic link {dest}', file=sys.stderr)
+            raise e
diff --git a/scripts/tap-driver.pl b/scripts/tap-driver.pl
deleted file mode 100755 (executable)
index b1d3880..0000000
+++ /dev/null
@@ -1,379 +0,0 @@
-#! /usr/bin/env perl
-# Copyright (C) 2011-2013 Free Software Foundation, Inc.
-# Copyright (C) 2018 Red Hat, Inc.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# ---------------------------------- #
-#  Imports, static data, and setup.  #
-# ---------------------------------- #
-
-use warnings FATAL => 'all';
-use strict;
-use Getopt::Long ();
-use TAP::Parser;
-use Term::ANSIColor qw(:constants);
-
-my $ME = "tap-driver.pl";
-my $VERSION = "2018-11-30";
-
-my $USAGE = <<'END';
-Usage:
-  tap-driver [--test-name=TEST] [--color={always|never|auto}]
-             [--verbose] [--show-failures-only]
-END
-
-my $HELP = "$ME: TAP-aware test driver for QEMU testsuite harness." .
-           "\n" . $USAGE;
-
-# It's important that NO_PLAN evaluates "false" as a boolean.
-use constant NO_PLAN => 0;
-use constant EARLY_PLAN => 1;
-use constant LATE_PLAN => 2;
-
-use constant DIAG_STRING => "#";
-
-# ------------------- #
-#  Global variables.  #
-# ------------------- #
-
-my $testno = 0;     # Number of test results seen so far.
-my $bailed_out = 0; # Whether a "Bail out!" directive has been seen.
-my $failed = 0;     # Final exit code
-
-# Whether the TAP plan has been seen or not, and if yes, which kind
-# it is ("early" is seen before any test result, "late" otherwise).
-my $plan_seen = NO_PLAN;
-
-# ----------------- #
-#  Option parsing.  #
-# ----------------- #
-
-my %cfg = (
-  "color" => 0,
-  "verbose" => 0,
-  "show-failures-only" => 0,
-);
-
-my $color = "auto";
-my $test_name = undef;
-
-# Perl's Getopt::Long allows options to take optional arguments after a space.
-# Prevent --color by itself from consuming other arguments
-foreach (@ARGV) {
-  if ($_ eq "--color" || $_ eq "-color") {
-    $_ = "--color=$color";
-  }
-}
-
-Getopt::Long::GetOptions
-  (
-    'help' => sub { print $HELP; exit 0; },
-    'version' => sub { print "$ME $VERSION\n"; exit 0; },
-    'test-name=s' => \$test_name,
-    'color=s'  => \$color,
-    'show-failures-only' => sub { $cfg{"show-failures-only"} = 1; },
-    'verbose' => sub { $cfg{"verbose"} = 1; },
-  ) or exit 1;
-
-if ($color =~ /^always$/i) {
-  $cfg{'color'} = 1;
-} elsif ($color =~ /^never$/i) {
-  $cfg{'color'} = 0;
-} elsif ($color =~ /^auto$/i) {
-  $cfg{'color'} = (-t STDOUT);
-} else {
-  die "Invalid color mode: $color\n";
-}
-
-# ------------- #
-#  Prototypes.  #
-# ------------- #
-
-sub colored ($$);
-sub decorate_result ($);
-sub extract_tap_comment ($);
-sub handle_tap_bailout ($);
-sub handle_tap_plan ($);
-sub handle_tap_result ($);
-sub is_null_string ($);
-sub main ();
-sub report ($;$);
-sub stringify_result_obj ($);
-sub testsuite_error ($);
-
-# -------------- #
-#  Subroutines.  #
-# -------------- #
-
-# If the given string is undefined or empty, return true, otherwise
-# return false.  This function is useful to avoid pitfalls like:
-#   if ($message) { print "$message\n"; }
-# which wouldn't print anything if $message is the literal "0".
-sub is_null_string ($)
-{
-  my $str = shift;
-  return ! (defined $str and length $str);
-}
-
-sub stringify_result_obj ($)
-{
-  my $result_obj = shift;
-  if ($result_obj->is_unplanned || $result_obj->number != $testno)
-    {
-      return "ERROR";
-    }
-  elsif ($plan_seen == LATE_PLAN)
-    {
-      return "ERROR";
-    }
-  elsif (!$result_obj->directive)
-    {
-      return $result_obj->is_ok ? "PASS" : "FAIL";
-    }
-  elsif ($result_obj->has_todo)
-    {
-      return $result_obj->is_actual_ok ? "XPASS" : "XFAIL";
-    }
-  elsif ($result_obj->has_skip)
-    {
-      return $result_obj->is_ok ? "SKIP" : "FAIL";
-    }
-  die "$ME: INTERNAL ERROR"; # NOTREACHED
-}
-
-sub colored ($$)
-{
-  my ($color_string, $text) = @_;
-  return $color_string . $text . RESET;
-}
-
-sub decorate_result ($)
-{
-  my $result = shift;
-  return $result unless $cfg{"color"};
-  my %color_for_result =
-    (
-      "ERROR" => BOLD.MAGENTA,
-      "PASS"  => GREEN,
-      "XPASS" => BOLD.YELLOW,
-      "FAIL"  => BOLD.RED,
-      "XFAIL" => YELLOW,
-      "SKIP"  => BLUE,
-    );
-  if (my $color = $color_for_result{$result})
-    {
-      return colored ($color, $result);
-    }
-  else
-    {
-      return $result; # Don't colorize unknown stuff.
-    }
-}
-
-sub report ($;$)
-{
-  my ($msg, $result, $explanation) = (undef, @_);
-  if ($result =~ /^(?:X?(?:PASS|FAIL)|SKIP|ERROR)/)
-    {
-      # Output on console might be colorized.
-      $msg = decorate_result($result);
-      if ($result =~ /^(?:PASS|XFAIL|SKIP)/)
-        {
-          return if $cfg{"show-failures-only"};
-        }
-      else
-        {
-          $failed = 1;
-        }
-    }
-  elsif ($result eq "#")
-    {
-      $msg = "  ";
-    }
-  else
-    {
-      die "$ME: INTERNAL ERROR"; # NOTREACHED
-    }
-  $msg .= " $explanation" if defined $explanation;
-  print $msg . "\n";
-}
-
-sub testsuite_error ($)
-{
-  report "ERROR", "$test_name - $_[0]";
-}
-
-sub handle_tap_result ($)
-{
-  $testno++;
-  my $result_obj = shift;
-
-  my $test_result = stringify_result_obj $result_obj;
-  my $string = $result_obj->number;
-
-  my $description = $result_obj->description;
-  $string .= " $test_name" unless is_null_string $test_name;
-  $string .= " $description" unless is_null_string $description;
-
-  if ($plan_seen == LATE_PLAN)
-    {
-      $string .= " # AFTER LATE PLAN";
-    }
-  elsif ($result_obj->is_unplanned)
-    {
-      $string .= " # UNPLANNED";
-    }
-  elsif ($result_obj->number != $testno)
-    {
-      $string .= " # OUT-OF-ORDER (expecting $testno)";
-    }
-  elsif (my $directive = $result_obj->directive)
-    {
-      $string .= " # $directive";
-      my $explanation = $result_obj->explanation;
-      $string .= " $explanation"
-        unless is_null_string $explanation;
-    }
-
-  report $test_result, $string;
-}
-
-sub handle_tap_plan ($)
-{
-  my $plan = shift;
-  if ($plan_seen)
-    {
-      # Error, only one plan per stream is acceptable.
-      testsuite_error "multiple test plans";
-      return;
-    }
-  # The TAP plan can come before or after *all* the TAP results; we speak
-  # respectively of an "early" or a "late" plan.  If we see the plan line
-  # after at least one TAP result has been seen, assume we have a late
-  # plan; in this case, any further test result seen after the plan will
-  # be flagged as an error.
-  $plan_seen = ($testno >= 1 ? LATE_PLAN : EARLY_PLAN);
-  # If $testno > 0, we have an error ("too many tests run") that will be
-  # automatically dealt with later, so don't worry about it here.  If
-  # $plan_seen is true, we have an error due to a repeated plan, and that
-  # has already been dealt with above.  Otherwise, we have a valid "plan
-  # with SKIP" specification, and should report it as a particular kind
-  # of SKIP result.
-  if ($plan->directive && $testno == 0)
-    {
-      my $explanation = is_null_string ($plan->explanation) ?
-                        undef : "- " . $plan->explanation;
-      report "SKIP", $explanation;
-    }
-}
-
-sub handle_tap_bailout ($)
-{
-  my ($bailout, $msg) = ($_[0], "Bail out!");
-  $bailed_out = 1;
-  $msg .= " " . $bailout->explanation
-    unless is_null_string $bailout->explanation;
-  testsuite_error $msg;
-}
-
-sub extract_tap_comment ($)
-{
-  my $line = shift;
-  if (index ($line, DIAG_STRING) == 0)
-    {
-      # Strip leading `DIAG_STRING' from `$line'.
-      $line = substr ($line, length (DIAG_STRING));
-      # And strip any leading and trailing whitespace left.
-      $line =~ s/(?:^\s*|\s*$)//g;
-      # Return what is left (if any).
-      return $line;
-    }
-  return "";
-}
-
-sub main ()
-{
-  my $iterator = TAP::Parser::Iterator::Stream->new(\*STDIN);
-  my $parser = TAP::Parser->new ({iterator => $iterator });
-
-  STDOUT->autoflush(1);
-  while (defined (my $cur = $parser->next))
-    {
-      # Parsing of TAP input should stop after a "Bail out!" directive.
-      next if $bailed_out;
-
-      if ($cur->is_plan)
-        {
-          handle_tap_plan ($cur);
-        }
-      elsif ($cur->is_test)
-        {
-          handle_tap_result ($cur);
-        }
-      elsif ($cur->is_bailout)
-        {
-          handle_tap_bailout ($cur);
-        }
-      elsif ($cfg{"verbose"})
-        {
-          my $comment = extract_tap_comment ($cur->raw);
-          report "#", "$comment" if length $comment;
-       }
-    }
-  # A "Bail out!" directive should cause us to ignore any following TAP
-  # error.
-  if (!$bailed_out)
-    {
-      if (!$plan_seen)
-        {
-          testsuite_error "missing test plan";
-        }
-      elsif ($parser->tests_planned != $parser->tests_run)
-        {
-          my ($planned, $run) = ($parser->tests_planned, $parser->tests_run);
-          my $bad_amount = $run > $planned ? "many" : "few";
-          testsuite_error (sprintf "too %s tests run (expected %d, got %d)",
-                                   $bad_amount, $planned, $run);
-        }
-    }
-}
-
-# ----------- #
-#  Main code. #
-# ----------- #
-
-main;
-exit($failed);
-
-# Local Variables:
-# perl-indent-level: 2
-# perl-continued-statement-offset: 2
-# perl-continued-brace-offset: 0
-# perl-brace-offset: 0
-# perl-brace-imaginary-offset: 0
-# perl-label-offset: -2
-# cperl-indent-level: 2
-# cperl-brace-offset: 0
-# cperl-continued-brace-offset: 0
-# cperl-label-offset: -2
-# cperl-extra-newline-before-brace: t
-# cperl-merge-trailing-else: nil
-# cperl-continued-statement-offset: 2
-# End:
diff --git a/scripts/tap-merge.pl b/scripts/tap-merge.pl
deleted file mode 100755 (executable)
index 10ccf57..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-#! /usr/bin/env perl
-# Copyright (C) 2018 Red Hat, Inc.
-#
-# Author: Paolo Bonzini <pbonzini@redhat.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-# ---------------------------------- #
-#  Imports, static data, and setup.  #
-# ---------------------------------- #
-
-use warnings FATAL => 'all';
-use strict;
-use Getopt::Long ();
-use TAP::Parser;
-
-my $ME = "tap-merge.pl";
-my $VERSION = "2018-11-30";
-
-my $HELP = "$ME: merge multiple TAP inputs from stdin.";
-
-use constant DIAG_STRING => "#";
-
-# ----------------- #
-#  Option parsing.  #
-# ----------------- #
-
-Getopt::Long::GetOptions
-  (
-    'help' => sub { print $HELP; exit 0; },
-    'version' => sub { print "$ME $VERSION\n"; exit 0; },
-  );
-
-# -------------- #
-#  Subroutines.  #
-# -------------- #
-
-sub main ()
-{
-  my $iterator = TAP::Parser::Iterator::Stream->new(\*STDIN);
-  my $parser = TAP::Parser->new ({iterator => $iterator });
-  my $testno = 0;     # Number of test results seen so far.
-  my $bailed_out = 0; # Whether a "Bail out!" directive has been seen.
-
-  STDOUT->autoflush(1);
-  while (defined (my $cur = $parser->next))
-    {
-      if ($cur->is_bailout)
-        {
-          $bailed_out = 1;
-          print DIAG_STRING . " " . $cur->as_string . "\n";
-          next;
-        }
-      elsif ($cur->is_plan)
-        {
-          $bailed_out = 0;
-          next;
-        }
-      elsif ($cur->is_test)
-        {
-          $bailed_out = 0 if $cur->number == 1;
-          $testno++;
-          $cur = TAP::Parser::Result::Test->new({
-                          ok => $cur->ok,
-                          test_num => $testno,
-                          directive => $cur->directive,
-                          explanation => $cur->explanation,
-                          description => $cur->description
-                  });
-        }
-      elsif ($cur->is_version)
-        {
-          next if $testno > 0;
-        }
-      print $cur->as_string . "\n" unless $bailed_out;
-    }
-  print "1..$testno\n";
-}
-
-# ----------- #
-#  Main code. #
-# ----------- #
-
-main;
-
-# Local Variables:
-# perl-indent-level: 2
-# perl-continued-statement-offset: 2
-# perl-continued-brace-offset: 0
-# perl-brace-offset: 0
-# perl-brace-imaginary-offset: 0
-# perl-label-offset: -2
-# cperl-indent-level: 2
-# cperl-brace-offset: 0
-# cperl-continued-brace-offset: 0
-# cperl-label-offset: -2
-# cperl-extra-newline-before-brace: t
-# cperl-merge-trailing-else: nil
-# cperl-continued-statement-offset: 2
-# End:
diff --git a/scripts/test-driver.py b/scripts/test-driver.py
deleted file mode 100644 (file)
index eef74b2..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-#! /usr/bin/env python3
-
-# Wrapper for tests that hides the output if they succeed.
-# Used by "make check"
-#
-# Copyright (C) 2020 Red Hat, Inc.
-#
-# Author: Paolo Bonzini <pbonzini@redhat.com>
-
-import subprocess
-import sys
-import os
-import argparse
-
-parser = argparse.ArgumentParser(description='Test driver for QEMU')
-parser.add_argument('-C', metavar='DIR', dest='dir', default='.',
-                    help='change to DIR before doing anything else')
-parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
-                    help='be more verbose')
-parser.add_argument('test_args', nargs=argparse.REMAINDER)
-
-args = parser.parse_args()
-os.chdir(args.dir)
-
-test_args = args.test_args
-if test_args[0] == '--':
-    test_args = test_args[1:]
-
-if args.verbose:
-    result = subprocess.run(test_args, stdout=None, stderr=None)
-else:
-    result = subprocess.run(test_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-    if result.returncode:
-        sys.stdout.buffer.write(result.stdout)
-sys.exit(result.returncode)
old mode 100755 (executable)
new mode 100644 (file)
index 3114624..ab7653a
@@ -16,7 +16,7 @@ __email__      = "stefanha@redhat.com"
 import sys
 import getopt
 
-from tracetool import error_write, out
+from tracetool import error_write, out, out_open
 import tracetool.backend
 import tracetool.format
 
@@ -32,7 +32,7 @@ def error_opt(msg = None):
     format_descr = "\n".join([ "    %-15s %s" % (n, d)
                                for n,d in tracetool.format.get_list() ])
     error_write("""\
-Usage: %(script)s --format=<format> --backends=<backends> [<options>]
+Usage: %(script)s --format=<format> --backends=<backends> [<options>] <trace-events> ... <output>
 
 Backends:
 %(backends)s
@@ -135,13 +135,15 @@ def main(args):
         if probe_prefix is None:
             probe_prefix = ".".join(["qemu", target_type, target_name])
 
-    if len(args) < 1:
-        error_opt("missing trace-events filepath")
+    if len(args) < 2:
+        error_opt("missing trace-events and output filepaths")
     events = []
-    for arg in args:
+    for arg in args[:-1]:
         with open(arg, "r") as fh:
             events.extend(tracetool.read_events(fh, arg))
 
+    out_open(args[-1])
+
     try:
         tracetool.generate(events, arg_group, arg_format, arg_backends,
                            binary=binary, probe_prefix=probe_prefix)
index 3ee54be223c6bfd1ba54988b1c1143a270c2fe42..b887540a5572693125586633075bd64f1fbfd864 100644 (file)
@@ -18,7 +18,6 @@ import weakref
 
 import tracetool.format
 import tracetool.backend
-import tracetool.transform
 
 
 def error_write(*lines):
@@ -31,14 +30,36 @@ def error(*lines):
     sys.exit(1)
 
 
+out_lineno = 1
+out_filename = '<none>'
+out_fobj = sys.stdout
+
+def out_open(filename):
+    global out_filename, out_fobj
+    out_filename = filename
+    out_fobj = open(filename, 'wt')
+
 def out(*lines, **kwargs):
     """Write a set of output lines.
 
     You can use kwargs as a shorthand for mapping variables when formatting all
     the strings in lines.
+
+    The 'out_lineno' kwarg is automatically added to reflect the current output
+    file line number. The 'out_next_lineno' kwarg is also automatically added
+    with the next output line number. The 'out_filename' kwarg is automatically
+    added with the output filename.
     """
-    lines = [ l % kwargs for l in lines ]
-    sys.stdout.writelines("\n".join(lines) + "\n")
+    global out_lineno
+    output = []
+    for l in lines:
+        kwargs['out_lineno'] = out_lineno
+        kwargs['out_next_lineno'] = out_lineno + 1
+        kwargs['out_filename'] = out_filename
+        output.append(l % kwargs)
+        out_lineno += 1
+
+    out_fobj.writelines("\n".join(output) + "\n")
 
 # We only want to allow standard C types or fixed sized
 # integer types. We don't want QEMU specific types
@@ -65,20 +86,18 @@ ALLOWED_TYPES = [
     "ssize_t",
     "uintptr_t",
     "ptrdiff_t",
-    # Magic substitution is done by tracetool
-    "TCGv",
 ]
 
 def validate_type(name):
     bits = name.split(" ")
     for bit in bits:
-        bit = re.sub("\*", "", bit)
+        bit = re.sub(r"\*", "", bit)
         if bit == "":
             continue
         if bit == "const":
             continue
         if bit not in ALLOWED_TYPES:
-            raise ValueError("Argument type '%s' is not in whitelist. "
+            raise ValueError("Argument type '%s' is not allowed. "
                              "Only standard C types and fixed size integer "
                              "types should be used. struct, union, and "
                              "other complex pointer types should be "
@@ -170,18 +189,6 @@ class Arguments:
         """List of argument names casted to their type."""
         return ["(%s)%s" % (type_, name) for type_, name in self._args]
 
-    def transform(self, *trans):
-        """Return a new Arguments instance with transformed types.
-
-        The types in the resulting Arguments instance are transformed according
-        to tracetool.transform.transform_type.
-        """
-        res = []
-        for type_, name in self._args:
-            res.append((tracetool.transform.transform_type(type_, *trans),
-                        name))
-        return Arguments(res)
-
 
 class Event(object):
     """Event description.
@@ -196,19 +203,23 @@ class Event(object):
         Properties of the event.
     args : Arguments
         The event arguments.
+    lineno : int
+        The line number in the input file.
+    filename : str
+        The path to the input file.
 
     """
 
-    _CRE = re.compile("((?P<props>[\w\s]+)\s+)?"
-                      "(?P<name>\w+)"
-                      "\((?P<args>[^)]*)\)"
-                      "\s*"
-                      "(?:(?:(?P<fmt_trans>\".+),)?\s*(?P<fmt>\".+))?"
-                      "\s*")
+    _CRE = re.compile(r"((?P<props>[\w\s]+)\s+)?"
+                      r"(?P<name>\w+)"
+                      r"\((?P<args>[^)]*)\)"
+                      r"\s*"
+                      r"(?:(?:(?P<fmt_trans>\".+),)?\s*(?P<fmt>\".+))?"
+                      r"\s*")
 
-    _VALID_PROPS = set(["disable", "tcg", "tcg-trans", "tcg-exec", "vcpu"])
+    _VALID_PROPS = set(["disable", "vcpu"])
 
-    def __init__(self, name, props, fmt, args, orig=None,
+    def __init__(self, name, props, fmt, args, lineno, filename, orig=None,
                  event_trans=None, event_exec=None):
         """
         Parameters
@@ -221,6 +232,10 @@ class Event(object):
             Event printing format string(s).
         args : Arguments
             Event arguments.
+        lineno : int
+            The line number in the input file.
+        filename : str
+            The path to the input file.
         orig : Event or None
             Original Event before transformation/generation.
         event_trans : Event or None
@@ -233,6 +248,8 @@ class Event(object):
         self.properties = props
         self.fmt = fmt
         self.args = args
+        self.lineno = int(lineno)
+        self.filename = str(filename)
         self.event_trans = event_trans
         self.event_exec = event_exec
 
@@ -254,16 +271,21 @@ class Event(object):
     def copy(self):
         """Create a new copy."""
         return Event(self.name, list(self.properties), self.fmt,
-                     self.args.copy(), self, self.event_trans, self.event_exec)
+                     self.args.copy(), self.lineno, self.filename,
+                     self, self.event_trans, self.event_exec)
 
     @staticmethod
-    def build(line_str):
+    def build(line_str, lineno, filename):
         """Build an Event instance from a string.
 
         Parameters
         ----------
         line_str : str
             Line describing the event.
+        lineno : int
+            Line number in input file.
+        filename : str
+            Path to input file.
         """
         m = Event._CRE.match(line_str)
         assert m is not None
@@ -284,16 +306,7 @@ class Event(object):
             fmt = [fmt_trans, fmt]
         args = Arguments.build(groups["args"])
 
-        if "tcg-trans" in props:
-            raise ValueError("Invalid property 'tcg-trans'")
-        if "tcg-exec" in props:
-            raise ValueError("Invalid property 'tcg-exec'")
-        if "tcg" not in props and not isinstance(fmt, str):
-            raise ValueError("Only events with 'tcg' property can have two format strings")
-        if "tcg" in props and isinstance(fmt, str):
-            raise ValueError("Events with 'tcg' property must have two format strings")
-
-        event = Event(name, props, fmt, args)
+        event = Event(name, props, fmt, args, lineno, filename)
 
         # add implicit arguments when using the 'vcpu' property
         import tracetool.vcpu
@@ -313,7 +326,7 @@ class Event(object):
                                           fmt)
     # Star matching on PRI is dangerous as one might have multiple
     # arguments with that format, hence the non-greedy version of it.
-    _FMT = re.compile("(%[\d\.]*\w+|%.*?PRI\S+)")
+    _FMT = re.compile(r"(%[\d\.]*\w+|%.*?PRI\S+)")
 
     def formats(self):
         """List conversion specifiers in the argument print format string."""
@@ -332,14 +345,6 @@ class Event(object):
             fmt = Event.QEMU_TRACE
         return fmt % {"name": self.name, "NAME": self.name.upper()}
 
-    def transform(self, *trans):
-        """Return a new Event with transformed Arguments."""
-        return Event(self.name,
-                     list(self.properties),
-                     self.fmt,
-                     self.args.transform(*trans),
-                     self)
-
 
 def read_events(fobj, fname):
     """Generate the output for the given (format, backends) pair.
@@ -364,39 +369,13 @@ def read_events(fobj, fname):
             continue
 
         try:
-            event = Event.build(line)
+            event = Event.build(line, lineno, fname)
         except ValueError as e:
             arg0 = 'Error at %s:%d: %s' % (fname, lineno, e.args[0])
             e.args = (arg0,) + e.args[1:]
             raise
 
-        # transform TCG-enabled events
-        if "tcg" not in event.properties:
-            events.append(event)
-        else:
-            event_trans = event.copy()
-            event_trans.name += "_trans"
-            event_trans.properties += ["tcg-trans"]
-            event_trans.fmt = event.fmt[0]
-            # ignore TCG arguments
-            args_trans = []
-            for atrans, aorig in zip(
-                    event_trans.transform(tracetool.transform.TCG_2_HOST).args,
-                    event.args):
-                if atrans == aorig:
-                    args_trans.append(atrans)
-            event_trans.args = Arguments(args_trans)
-
-            event_exec = event.copy()
-            event_exec.name += "_exec"
-            event_exec.properties += ["tcg-exec"]
-            event_exec.fmt = event.fmt[1]
-            event_exec.args = event_exec.args.transform(tracetool.transform.TCG_2_HOST)
-
-            new_event = [event_trans, event_exec]
-            event.event_trans, event.event_exec = new_event
-
-            events.extend(new_event)
+        events.append(event)
 
     return events
 
index e9844dd335c56fdb3c92102d8e8ad2bcd02dbe6a..baed2ae61cbeb00405d0ea1ecf1c3cc2c4220aa8 100644 (file)
@@ -12,6 +12,8 @@ __maintainer__ = "Stefan Hajnoczi"
 __email__      = "stefanha@redhat.com"
 
 
+import os.path
+
 from tracetool import out
 
 
@@ -33,8 +35,10 @@ def generate_h(event, group):
         '        int unused __attribute__ ((unused));',
         '        int trlen;',
         '        if (trace_event_get_state(%(event_id)s)) {',
+        '#line %(event_lineno)d "%(event_filename)s"',
         '            trlen = snprintf(ftrace_buf, MAX_TRACE_STRLEN,',
         '                             "%(name)s " %(fmt)s "\\n" %(argnames)s);',
+        '#line %(out_next_lineno)d "%(out_filename)s"',
         '            trlen = MIN(trlen, MAX_TRACE_STRLEN - 1);',
         '            unused = write(trace_marker_fd, ftrace_buf, trlen);',
         '        }',
@@ -42,6 +46,8 @@ def generate_h(event, group):
         name=event.name,
         args=event.args,
         event_id="TRACE_" + event.name.upper(),
+        event_lineno=event.lineno,
+        event_filename=os.path.relpath(event.filename),
         fmt=event.fmt.rstrip("\n"),
         argnames=argnames)
 
index 877222bbe9b7093423d3c8d0be8e619fa2307982..de27b7e62e4adaf122db536181aa80e3989409e4 100644 (file)
@@ -12,6 +12,8 @@ __maintainer__ = "Stefan Hajnoczi"
 __email__      = "stefanha@redhat.com"
 
 
+import os.path
+
 from tracetool import out
 
 
@@ -20,6 +22,7 @@ PUBLIC = True
 
 def generate_h_begin(events, group):
     out('#include "qemu/log-for-trace.h"',
+        '#include "qemu/error-report.h"',
         '')
 
 
@@ -35,14 +38,24 @@ def generate_h(event, group):
         cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper())
 
     out('    if (%(cond)s && qemu_loglevel_mask(LOG_TRACE)) {',
-        '        struct timeval _now;',
-        '        gettimeofday(&_now, NULL);',
-        '        qemu_log("%%d@%%zu.%%06zu:%(name)s " %(fmt)s "\\n",',
-        '                 qemu_get_thread_id(),',
-        '                 (size_t)_now.tv_sec, (size_t)_now.tv_usec',
-        '                 %(argnames)s);',
+        '        if (message_with_timestamp) {',
+        '            struct timeval _now;',
+        '            gettimeofday(&_now, NULL);',
+        '#line %(event_lineno)d "%(event_filename)s"',
+        '            qemu_log("%%d@%%zu.%%06zu:%(name)s " %(fmt)s "\\n",',
+        '                     qemu_get_thread_id(),',
+        '                     (size_t)_now.tv_sec, (size_t)_now.tv_usec',
+        '                     %(argnames)s);',
+        '#line %(out_next_lineno)d "%(out_filename)s"',
+        '        } else {',
+        '#line %(event_lineno)d "%(event_filename)s"',
+        '            qemu_log("%(name)s " %(fmt)s "\\n"%(argnames)s);',
+        '#line %(out_next_lineno)d "%(out_filename)s"',
+        '        }',
         '    }',
         cond=cond,
+        event_lineno=event.lineno,
+        event_filename=os.path.relpath(event.filename),
         name=event.name,
         fmt=event.fmt.rstrip("\n"),
         argnames=argnames)
index 1373a9019294e93f12e2386cd3d3a729a4da5531..012970f6cc02e33fd79dfa3e411faccd8cc57bc0 100644 (file)
@@ -12,6 +12,8 @@ __maintainer__ = "Stefan Hajnoczi"
 __email__      = "stefanha@redhat.com"
 
 
+import os.path
+
 from tracetool import out
 
 
@@ -35,9 +37,13 @@ def generate_h(event, group):
         cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper())
 
     out('    if (%(cond)s) {',
+        '#line %(event_lineno)d "%(event_filename)s"',
         '        syslog(LOG_INFO, "%(name)s " %(fmt)s %(argnames)s);',
+        '#line %(out_next_lineno)d "%(out_filename)s"',
         '    }',
         cond=cond,
+        event_lineno=event.lineno,
+        event_filename=os.path.relpath(event.filename),
         name=event.name,
         fmt=event.fmt.rstrip("\n"),
         argnames=argnames)
index c390c1844af6d717d7c718bd8aaf268f7c4f9f74..69edf0d588ee84672e552ecfbf0e3db064d15e4e 100644 (file)
@@ -32,19 +32,13 @@ def generate(events, backend, group):
         out('uint16_t %s;' % e.api(e.QEMU_DSTATE))
 
     for e in events:
-        if "vcpu" in e.properties:
-            vcpu_id = 0
-        else:
-            vcpu_id = "TRACE_VCPU_EVENT_NONE"
         out('TraceEvent %(event)s = {',
             '    .id = 0,',
-            '    .vcpu_id = %(vcpu_id)s,',
             '    .name = \"%(name)s\",',
             '    .sstate = %(sstate)s,',
             '    .dstate = &%(dstate)s ',
             '};',
             event = e.api(e.QEMU_EVENT),
-            vcpu_id = vcpu_id,
             name = e.name,
             sstate = "TRACE_%s_ENABLED" % e.name.upper(),
             dstate = e.api(e.QEMU_DSTATE))
index e94f0be7dafddd90d6f3757059937cc0b07f3984..ea126b07ea57b2ccbacda97eaa9494a2f1ebec8b 100644 (file)
@@ -16,10 +16,7 @@ from tracetool import out
 
 
 def generate(events, backend, group):
-    if group == "root":
-        header = "trace/control-vcpu.h"
-    else:
-        header = "trace/control.h"
+    header = "trace/control.h"
 
     out('/* This file is autogenerated by tracetool, do not edit. */',
         '',
@@ -74,16 +71,7 @@ def generate(events, backend, group):
 
         out('}')
 
-        # tracer wrapper with checks (per-vCPU tracing)
-        if "vcpu" in e.properties:
-            trace_cpu = next(iter(e.args))[1]
-            cond = "trace_event_get_vcpu_state(%(cpu)s,"\
-                   " TRACE_%(id)s)"\
-                   % dict(
-                       cpu=trace_cpu,
-                       id=e.name.upper())
-        else:
-            cond = "true"
+        cond = "true"
 
         out('',
             'static inline void %(api)s(%(args)s)',
index b486beb6723987fec0d6f7aade3aba2135cbfc1f..b49afababd676d5c67626f1237e6c28e8c6612e5 100644 (file)
@@ -54,6 +54,7 @@ def c_fmt_to_stap(fmt):
             else:
                 if state == STATE_MACRO:
                     bits.append(c_macro_to_format(macro))
+                    macro = ""
                 state = STATE_LITERAL
         elif fmt[i] == ' ' or fmt[i] == '\t':
             if state == STATE_MACRO:
@@ -77,7 +78,12 @@ def c_fmt_to_stap(fmt):
     elif state == STATE_LITERAL:
         bits.append(literal)
 
-    fmt = re.sub("%(\d*)z(x|u|d)", "%\\1\\2", "".join(bits))
+    # All variables in systemtap are 64-bit in size
+    # The "%l" integer size qualifier is thus redundant
+    # and "%ll" is not valid at all. Similarly the size_t
+    # based "%z" size qualifier is not valid. We just
+    # strip all size qualifiers for sanity.
+    fmt = re.sub(r"%(\d*)(l+|z)(x|u|d)", r"%\1\3", "".join(bits))
     return fmt
 
 def generate(events, backend, group):
diff --git a/scripts/tracetool/format/tcg_h.py b/scripts/tracetool/format/tcg_h.py
deleted file mode 100644 (file)
index 4d84440..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-Generate .h file for TCG code generation.
-"""
-
-__author__     = "Lluís Vilanova <vilanova@ac.upc.edu>"
-__copyright__  = "Copyright 2012-2017, Lluís Vilanova <vilanova@ac.upc.edu>"
-__license__    = "GPL version 2 or (at your option) any later version"
-
-__maintainer__ = "Stefan Hajnoczi"
-__email__      = "stefanha@redhat.com"
-
-
-from tracetool import out, Arguments
-import tracetool.vcpu
-
-
-def vcpu_transform_args(args):
-    assert len(args) == 1
-    return Arguments([
-        args,
-        # NOTE: this name must be kept in sync with the one in "tcg_h"
-        # NOTE: Current helper code uses TCGv_env (CPUArchState*)
-        ("TCGv_env", "__tcg_" + args.names()[0]),
-    ])
-
-
-def generate(events, backend, group):
-    if group == "root":
-        header = "trace/trace-root.h"
-    else:
-        header = "trace.h"
-
-    out('/* This file is autogenerated by tracetool, do not edit. */',
-        '/* You must include this file after the inclusion of helper.h */',
-        '',
-        '#ifndef TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(),
-        '#define TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(),
-        '',
-        '#include "exec/helper-proto.h"',
-        '#include "%s"' % header,
-        '',
-        )
-
-    for e in events:
-        # just keep one of them
-        if "tcg-exec" not in e.properties:
-            continue
-
-        out('static inline void %(name_tcg)s(%(args)s)',
-            '{',
-            name_tcg=e.original.api(e.QEMU_TRACE_TCG),
-            args=tracetool.vcpu.transform_args("tcg_h", e.original))
-
-        if "disable" not in e.properties:
-            args_trans = e.original.event_trans.args
-            args_exec = tracetool.vcpu.transform_args(
-                "tcg_helper_c", e.original.event_exec, "wrapper")
-            if "vcpu" in e.properties:
-                trace_cpu = e.args.names()[0]
-                cond = "trace_event_get_vcpu_state(%(cpu)s,"\
-                       " TRACE_%(id)s)"\
-                       % dict(
-                           cpu=trace_cpu,
-                           id=e.original.event_exec.name.upper())
-            else:
-                cond = "true"
-
-            out('    %(name_trans)s(%(argnames_trans)s);',
-                '    if (%(cond)s) {',
-                '        gen_helper_%(name_exec)s(%(argnames_exec)s);',
-                '    }',
-                name_trans=e.original.event_trans.api(e.QEMU_TRACE),
-                name_exec=e.original.event_exec.api(e.QEMU_TRACE),
-                argnames_trans=", ".join(args_trans.names()),
-                argnames_exec=", ".join(args_exec.names()),
-                cond=cond)
-
-        out('}')
-
-    out('',
-        '#endif /* TRACE_%s_GENERATED_TCG_TRACERS_H */' % group.upper())
diff --git a/scripts/tracetool/format/tcg_helper_c.py b/scripts/tracetool/format/tcg_helper_c.py
deleted file mode 100644 (file)
index 72576e6..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-Generate trace/generated-helpers.c.
-"""
-
-__author__     = "Lluís Vilanova <vilanova@ac.upc.edu>"
-__copyright__  = "Copyright 2012-2017, Lluís Vilanova <vilanova@ac.upc.edu>"
-__license__    = "GPL version 2 or (at your option) any later version"
-
-__maintainer__ = "Stefan Hajnoczi"
-__email__      = "stefanha@redhat.com"
-
-
-from tracetool import Arguments, out
-from tracetool.transform import *
-import tracetool.vcpu
-
-
-def vcpu_transform_args(args, mode):
-    assert len(args) == 1
-    # NOTE: this name must be kept in sync with the one in "tcg_h"
-    args = Arguments([(args.types()[0], "__tcg_" + args.names()[0])])
-    if mode == "code":
-        return Arguments([
-            # Does cast from helper requirements to tracing types
-            ("CPUState *", "env_cpu(%s)" % args.names()[0]),
-        ])
-    else:
-        args = Arguments([
-            # NOTE: Current helper code uses TCGv_env (CPUArchState*)
-            ("CPUArchState *", args.names()[0]),
-        ])
-        if mode == "header":
-            return args
-        elif mode == "wrapper":
-            return args.transform(HOST_2_TCG)
-        else:
-            assert False
-
-
-def generate(events, backend, group):
-    if group == "root":
-        header = "trace/trace-root.h"
-    else:
-        header = "trace.h"
-
-    events = [e for e in events
-              if "disable" not in e.properties]
-
-    out('/* This file is autogenerated by tracetool, do not edit. */',
-        '',
-        '#include "qemu/osdep.h"',
-        '#include "cpu.h"',
-        '#include "exec/helper-proto.h"',
-        '#include "%s"' % header,
-        '',
-        )
-
-    for e in events:
-        if "tcg-exec" not in e.properties:
-            continue
-
-        e_args_api = tracetool.vcpu.transform_args(
-            "tcg_helper_c", e.original, "header").transform(
-                HOST_2_TCG_COMPAT, TCG_2_TCG_HELPER_DEF)
-        e_args_call = tracetool.vcpu.transform_args(
-            "tcg_helper_c", e, "code")
-
-        out('void %(name_tcg)s(%(args_api)s)',
-            '{',
-            # NOTE: the check was already performed at TCG-generation time
-            '    %(name)s(%(args_call)s);',
-            '}',
-            name_tcg="helper_%s_proxy" % e.api(),
-            name=e.api(e.QEMU_TRACE_NOCHECK),
-            args_api=e_args_api,
-            args_call=", ".join(e_args_call.casted()),
-            )
diff --git a/scripts/tracetool/format/tcg_helper_h.py b/scripts/tracetool/format/tcg_helper_h.py
deleted file mode 100644 (file)
index 08554fb..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-Generate trace/generated-helpers.h.
-"""
-
-__author__     = "Lluís Vilanova <vilanova@ac.upc.edu>"
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova <vilanova@ac.upc.edu>"
-__license__    = "GPL version 2 or (at your option) any later version"
-
-__maintainer__ = "Stefan Hajnoczi"
-__email__      = "stefanha@redhat.com"
-
-
-from tracetool import out
-from tracetool.transform import *
-import tracetool.vcpu
-
-
-def generate(events, backend, group):
-    events = [e for e in events
-              if "disable" not in e.properties]
-
-    out('/* This file is autogenerated by tracetool, do not edit. */',
-        '',
-        )
-
-    for e in events:
-        if "tcg-exec" not in e.properties:
-            continue
-
-        # TCG helper proxy declaration
-        fmt = "DEF_HELPER_FLAGS_%(argc)d(%(name)s, %(flags)svoid%(types)s)"
-        e_args = tracetool.vcpu.transform_args("tcg_helper_c", e.original, "header")
-        args = e_args.transform(HOST_2_TCG_COMPAT, HOST_2_TCG,
-                                TCG_2_TCG_HELPER_DECL)
-        types = ", ".join(args.types())
-        if types != "":
-            types = ", " + types
-
-        flags = "TCG_CALL_NO_RWG, "
-
-        out(fmt,
-            flags=flags,
-            argc=len(args),
-            name=e.api() + "_proxy",
-            types=types,
-            )
diff --git a/scripts/tracetool/format/tcg_helper_wrapper_h.py b/scripts/tracetool/format/tcg_helper_wrapper_h.py
deleted file mode 100644 (file)
index 0c5a979..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-Generate trace/generated-helpers-wrappers.h.
-"""
-
-__author__     = "Lluís Vilanova <vilanova@ac.upc.edu>"
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova <vilanova@ac.upc.edu>"
-__license__    = "GPL version 2 or (at your option) any later version"
-
-__maintainer__ = "Stefan Hajnoczi"
-__email__      = "stefanha@redhat.com"
-
-
-from tracetool import out
-from tracetool.transform import *
-import tracetool.vcpu
-
-
-def generate(events, backend, group):
-    events = [e for e in events
-              if "disable" not in e.properties]
-
-    out('/* This file is autogenerated by tracetool, do not edit. */',
-        '',
-        '#define tcg_temp_new_nop(v) (v)',
-        '#define tcg_temp_free_nop(v)',
-        '',
-        )
-
-    for e in events:
-        if "tcg-exec" not in e.properties:
-            continue
-
-        # tracetool.generate always transforms types to host
-        e_args = tracetool.vcpu.transform_args("tcg_helper_c", e.original, "wrapper")
-
-        # mixed-type to TCG helper bridge
-        args_tcg_compat = e_args.transform(HOST_2_TCG_COMPAT)
-
-        code_new = [
-            "%(tcg_type)s __%(name)s = %(tcg_func)s(%(name)s);" %
-            {"tcg_type": transform_type(type_, HOST_2_TCG),
-             "tcg_func": transform_type(type_, HOST_2_TCG_TMP_NEW),
-             "name": name}
-            for (type_, name) in args_tcg_compat
-        ]
-
-        code_free = [
-            "%(tcg_func)s(__%(name)s);" %
-            {"tcg_func": transform_type(type_, HOST_2_TCG_TMP_FREE),
-             "name": name}
-            for (type_, name) in args_tcg_compat
-        ]
-
-        gen_name = "gen_helper_" + e.api()
-
-        out('static inline void %(name)s(%(args)s)',
-            '{',
-            '    %(code_new)s',
-            '    %(proxy_name)s(%(tmp_names)s);',
-            '    %(code_free)s',
-            '}',
-            name=gen_name,
-            args=e_args,
-            proxy_name=gen_name + "_proxy",
-            code_new="\n    ".join(code_new),
-            code_free="\n    ".join(code_free),
-            tmp_names=", ".join(["__%s" % name for _, name in e_args]),
-            )
index 6ce559f6cc6ef88f7819ebcc9ec8734c4c79b91a..b99fe6896bafa056c131d2871498f5ad342391e0 100644 (file)
@@ -29,8 +29,8 @@ def generate(events, backend, group):
         '#undef TRACEPOINT_PROVIDER',
         '#define TRACEPOINT_PROVIDER qemu',
         '',
-        '#undef TRACEPOINT_INCLUDE_FILE',
-        '#define TRACEPOINT_INCLUDE_FILE ./%s' % include,
+        '#undef TRACEPOINT_INCLUDE',
+        '#define TRACEPOINT_INCLUDE "./%s"' % include,
         '',
         '#if !defined (TRACE_%s_GENERATED_UST_H) || \\'  % group.upper(),
         '     defined(TRACEPOINT_HEADER_MULTI_READ)',
diff --git a/scripts/tracetool/transform.py b/scripts/tracetool/transform.py
deleted file mode 100644 (file)
index ea8b277..0000000
+++ /dev/null
@@ -1,168 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-Type-transformation rules.
-"""
-
-__author__     = "Lluís Vilanova <vilanova@ac.upc.edu>"
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova <vilanova@ac.upc.edu>"
-__license__    = "GPL version 2 or (at your option) any later version"
-
-__maintainer__ = "Stefan Hajnoczi"
-__email__      = "stefanha@redhat.com"
-
-
-def _transform_type(type_, trans):
-    if isinstance(trans, str):
-        return trans
-    elif isinstance(trans, dict):
-        if type_ in trans:
-            return _transform_type(type_, trans[type_])
-        elif None in trans:
-            return _transform_type(type_, trans[None])
-        else:
-            return type_
-    elif callable(trans):
-        return trans(type_)
-    else:
-        raise ValueError("Invalid type transformation rule: %s" % trans)
-
-
-def transform_type(type_, *trans):
-    """Return a new type transformed according to the given rules.
-
-    Applies each of the transformation rules in trans in order.
-
-    If an element of trans is a string, return it.
-
-    If an element of trans is a function, call it with type_ as its only
-    argument.
-
-    If an element of trans is a dict, search type_ in its keys. If type_ is
-    a key, use the value as a transformation rule for type_. Otherwise, if
-    None is a key use the value as a transformation rule for type_.
-
-    Otherwise, return type_.
-
-    Parameters
-    ----------
-    type_ : str
-        Type to transform.
-    trans : list of function or dict
-        Type transformation rules.
-    """
-    if len(trans) == 0:
-        raise ValueError
-    res = type_
-    for t in trans:
-        res = _transform_type(res, t)
-    return res
-
-
-##################################################
-# tcg -> host
-
-def _tcg_2_host(type_):
-    if type_ == "TCGv":
-        # force a fixed-size type (target-independent)
-        return "uint64_t"
-    else:
-        return type_
-
-TCG_2_HOST = {
-    "TCGv_i32": "uint32_t",
-    "TCGv_i64": "uint64_t",
-    "TCGv_ptr": "void *",
-    None: _tcg_2_host,
-    }
-
-
-##################################################
-# host -> host compatible with tcg sizes
-
-HOST_2_TCG_COMPAT = {
-    "uint8_t": "uint32_t",
-    "uint16_t": "uint32_t",
-    }
-
-
-##################################################
-# host/tcg -> tcg
-
-def _host_2_tcg(type_):
-    if type_.startswith("TCGv"):
-        return type_
-    raise ValueError("Don't know how to translate '%s' into a TCG type\n" % type_)
-
-HOST_2_TCG = {
-    "uint32_t": "TCGv_i32",
-    "uint64_t": "TCGv_i64",
-    "void *"  : "TCGv_ptr",
-    "CPUArchState *": "TCGv_env",
-    None: _host_2_tcg,
-    }
-
-
-##################################################
-# tcg -> tcg helper definition
-
-def _tcg_2_helper_def(type_):
-    if type_ == "TCGv":
-        return "target_ulong"
-    else:
-        return type_
-
-TCG_2_TCG_HELPER_DEF = {
-    "TCGv_i32": "uint32_t",
-    "TCGv_i64": "uint64_t",
-    "TCGv_ptr": "void *",
-    None: _tcg_2_helper_def,
-    }
-
-
-##################################################
-# tcg -> tcg helper declaration
-
-def _tcg_2_tcg_helper_decl_error(type_):
-    raise ValueError("Don't know how to translate type '%s' into a TCG helper declaration type\n" % type_)
-
-TCG_2_TCG_HELPER_DECL = {
-    "TCGv"    : "tl",
-    "TCGv_ptr": "ptr",
-    "TCGv_i32": "i32",
-    "TCGv_i64": "i64",
-    "TCGv_env": "env",
-    None: _tcg_2_tcg_helper_decl_error,
-    }
-
-
-##################################################
-# host/tcg -> tcg temporal constant allocation
-
-def _host_2_tcg_tmp_new(type_):
-    if type_.startswith("TCGv"):
-        return "tcg_temp_new_nop"
-    raise ValueError("Don't know how to translate type '%s' into a TCG temporal allocation" % type_)
-
-HOST_2_TCG_TMP_NEW = {
-    "uint32_t": "tcg_const_i32",
-    "uint64_t": "tcg_const_i64",
-    "void *"  : "tcg_const_ptr",
-    None: _host_2_tcg_tmp_new,
-    }
-
-
-##################################################
-# host/tcg -> tcg temporal constant deallocation
-
-def _host_2_tcg_tmp_free(type_):
-    if type_.startswith("TCGv"):
-        return "tcg_temp_free_nop"
-    raise ValueError("Don't know how to translate type '%s' into a TCG temporal deallocation" % type_)
-
-HOST_2_TCG_TMP_FREE = {
-    "uint32_t": "tcg_temp_free_i32",
-    "uint64_t": "tcg_temp_free_i64",
-    "void *"  : "tcg_temp_free_ptr",
-    None: _host_2_tcg_tmp_free,
-    }
index 868b4cb04c457a7d47cddba7f7776067a350eb07..d232cb1d06560866c0de31e893061310afadf85b 100644 (file)
@@ -19,19 +19,9 @@ from tracetool import Arguments, try_import
 def transform_event(event):
     """Transform event to comply with the 'vcpu' property (if present)."""
     if "vcpu" in event.properties:
-        # events with 'tcg-trans' and 'tcg-exec' are auto-generated from
-        # already-patched events
-        assert "tcg-trans" not in event.properties
-        assert "tcg-exec" not in event.properties
-
         event.args = Arguments([("void *", "__cpu"), event.args])
-        if "tcg" in event.properties:
-            fmt = "\"cpu=%p \""
-            event.fmt = [fmt + event.fmt[0],
-                         fmt + event.fmt[1]]
-        else:
-            fmt = "\"cpu=%p \""
-            event.fmt = fmt + event.fmt
+        fmt = "\"cpu=%p \""
+        event.fmt = fmt + event.fmt
     return event
 
 
diff --git a/scripts/travis/coverage-summary.sh b/scripts/travis/coverage-summary.sh
deleted file mode 100755 (executable)
index d7086cf..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/sh
-#
-# Author: Alex Bennée <alex.bennee@linaro.org>
-#
-# Summerise the state of code coverage with gcovr and tweak the output
-# to be more sane on Travis hosts. As we expect to be executed on a
-# throw away CI instance we do spam temp files all over the shop. You
-# most likely don't want to execute this script but just call gcovr
-# directly. See also "make coverage-report"
-#
-# This code is licensed under the GPL version 2 or later.  See
-# the COPYING file in the top-level directory.
-
-# first generate the coverage report
-gcovr -p -o raw-report.txt
-
-# strip the full-path and line markers
-sed s@$PWD\/@@ raw-report.txt | sed s/[0-9]\*[,-]//g > simplified.txt
-
-# reflow lines that got split
-awk '/.[ch]$/ { printf("%s", $0); next } 1' simplified.txt > rejoined.txt
-
-# columnify
-column -t rejoined.txt > final.txt
-
-# and dump, stripping out 0% coverage
-grep -v "0%" final.txt
old mode 100755 (executable)
new mode 100644 (file)
index fa6f2b6272b78223d28f4c823858442b704ed8c0..34295c0fe55b72bbf4db013c4e96262fa1ebfeac 100755 (executable)
@@ -9,6 +9,22 @@
 #
 # This work is licensed under the terms of the GNU GPL version 2.
 # See the COPYING file in the top-level directory.
+#
+# The script will copy the headers into two target folders:
+#
+# - linux-headers/ for files that are required for compiling for a
+#   Linux host.  Generally we have these so we can use kernel structs
+#   and defines that are more recent than the headers that might be
+#   installed on the host system.  Usually this script can do simple
+#   file copies for these headers.
+#
+# - include/standard-headers/ for files that are used for guest
+#   device emulation and are required on all hosts.  For instance, we
+#   get our definitions of the virtio structures from the Linux
+#   kernel headers, but we need those definitions regardless of which
+#   host OS we are building for.  This script has to be careful to
+#   sanitize the headers to remove any use of Linux-specifics such as
+#   types like "__u64".  This work is done in the cp_portable function.
 
 tmpdir=$(mktemp -d)
 linux="$1"
@@ -34,6 +50,7 @@ cp_portable() {
     if
         grep '#include' "$f" | grep -v -e 'linux/virtio' \
                                      -e 'linux/types' \
+                                     -e 'linux/ioctl' \
                                      -e 'stdint' \
                                      -e 'linux/if_ether' \
                                      -e 'input-event-codes' \
@@ -66,6 +83,7 @@ cp_portable() {
         -e 's/__BITS_PER_LONG/HOST_LONG_BITS/' \
         -e '/\"drm.h\"/d' \
         -e '/sys\/ioctl.h/d' \
+        -e '/linux\/ioctl.h/d' \
         -e 's/SW_MAX/SW_MAX_/' \
         -e 's/atomic_t/int/' \
         -e 's/__kernel_long_t/long/' \
@@ -142,8 +160,9 @@ done
 
 rm -rf "$output/linux-headers/linux"
 mkdir -p "$output/linux-headers/linux"
-for header in kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \
-              psci.h psp-sev.h userfaultfd.h mman.h; do
+for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \
+              psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \
+              vduse.h iommufd.h; do
     cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
 done
 
@@ -190,12 +209,14 @@ for i in "$tmpdir"/include/linux/*virtio*.h \
          "$tmpdir/include/linux/fuse.h" \
          "$tmpdir/include/linux/input.h" \
          "$tmpdir/include/linux/input-event-codes.h" \
+         "$tmpdir/include/linux/udmabuf.h" \
          "$tmpdir/include/linux/pci_regs.h" \
          "$tmpdir/include/linux/ethtool.h" \
          "$tmpdir/include/linux/const.h" \
          "$tmpdir/include/linux/kernel.h" \
          "$tmpdir/include/linux/vhost_types.h" \
-         "$tmpdir/include/linux/sysinfo.h"; do
+         "$tmpdir/include/linux/sysinfo.h" \
+         "$tmpdir/include/misc/pvpanic.h"; do
     cp_portable "$i" "$output/include/standard-headers/linux"
 done
 mkdir -p "$output/include/standard-headers/drm"
@@ -215,8 +236,7 @@ sed  -e '1h;2,$H;$!d;g'  -e 's/[^};]*pvrdma[^(| ]*([^)]*);//g' \
     "$linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h" > \
     "$tmp_pvrdma_verbs";
 
-for i in "$linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h" \
-         "$linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h" \
+for i in "$linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h" \
          "$tmp_pvrdma_verbs"; do \
     cp_portable "$i" \
          "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/"
index 4f0dda4b83bcd3ce1136d008abf72053418a7329..5a529b699eb8bf291f53bf3152d4d7c9ce56aeca 100755 (executable)
@@ -1,9 +1,9 @@
 #!/bin/sh
 
-URL=https://raw.githubusercontent.com/strace/strace/master
+URL=https://raw.githubusercontent.com/strace/strace/master/src
 FILES="sysent.h sysent_shorthand_defs.h linux/mips/syscallent-compat.h \
-       linux/mips/syscallent-o32.h linux/syscallent-common-32.h \
-       linux/syscallent-common.h"
+       linux/mips/syscallent-o32.h linux/32/syscallent-common-32.h \
+       linux/generic/syscallent-common.h"
 
 output="$1"
 if [ "$output" = "" ] ; then
@@ -16,10 +16,11 @@ TMP=$(mktemp -d)
 cd $TMP
 
 for file in $FILES; do
-    curl -O $URL/$file
+    curl --create-dirs $URL/$file -o $TMP/$file
 done
 
-> subcall32.h
+> linux/generic/subcallent.h
+> linux/32/subcallent.h
 
 cat > gen_mips_o32.c <<EOF
 #include <stdio.h>
@@ -52,6 +53,6 @@ int main(void)
 }
 EOF
 
-cc -o gen_mips_o32 gen_mips_o32.c && ./gen_mips_o32 > "$output/$INC"
+cc -o gen_mips_o32 -I linux/mips -I linux/generic gen_mips_o32.c && ./gen_mips_o32 > "$output/$INC"
 
 rm -fr "$TMP"
diff --git a/scripts/userfaultfd-wrlat.py b/scripts/userfaultfd-wrlat.py
new file mode 100644 (file)
index 0000000..0684be4
--- /dev/null
@@ -0,0 +1,122 @@
+#!/usr/bin/python3
+#
+# userfaultfd-wrlat Summarize userfaultfd write fault latencies.
+#                   Events are continuously accumulated for the
+#                   run, while latency distribution histogram is
+#                   dumped each 'interval' seconds.
+#
+#                   For Linux, uses BCC, eBPF.
+#
+# USAGE: userfaultfd-lat [interval [count]]
+#
+# Copyright Virtuozzo GmbH, 2020
+#
+# Authors:
+#   Andrey Gruzdev   <andrey.gruzdev@virtuozzo.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+from __future__ import print_function
+from bcc import BPF
+from ctypes import c_ushort, c_int, c_ulonglong
+from time import sleep
+from sys import argv
+
+def usage():
+    print("USAGE: %s [interval [count]]" % argv[0])
+    exit()
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/mm.h>
+
+BPF_HASH(ev_start, u32, u64);
+BPF_HISTOGRAM(ev_delta_hist, u64);
+
+/* Trace UFFD page fault start event. */
+static void do_event_start()
+{
+    /* Using "(u32)" to drop group ID which is upper 32 bits */
+    u32 tid = (u32) bpf_get_current_pid_tgid();
+    u64 ts = bpf_ktime_get_ns();
+
+    ev_start.update(&tid, &ts);
+}
+
+/* Trace UFFD page fault end event. */
+static void do_event_end()
+{
+    /* Using "(u32)" to drop group ID which is upper 32 bits */
+    u32 tid = (u32) bpf_get_current_pid_tgid();
+    u64 ts = bpf_ktime_get_ns();
+    u64 *tsp;
+
+    tsp = ev_start.lookup(&tid);
+    if (tsp) {
+        u64 delta = ts - (*tsp);
+        /* Transform time delta to milliseconds */
+        ev_delta_hist.increment(bpf_log2l(delta / 1000000));
+        ev_start.delete(&tid);
+    }
+}
+
+/* KPROBE for handle_userfault(). */
+int probe_handle_userfault(struct pt_regs *ctx, struct vm_fault *vmf,
+        unsigned long reason)
+{
+    /* Trace only UFFD write faults. */
+    if (reason & VM_UFFD_WP) {
+        do_event_start();
+    }
+    return 0;
+}
+
+/* KRETPROBE for handle_userfault(). */
+int retprobe_handle_userfault(struct pt_regs *ctx)
+{
+    do_event_end();
+    return 0;
+}
+"""
+
+# arguments
+interval = 10
+count = -1
+if len(argv) > 1:
+    try:
+        interval = int(argv[1])
+        if interval == 0:
+            raise
+        if len(argv) > 2:
+            count = int(argv[2])
+    except:    # also catches -h, --help
+        usage()
+
+# load BPF program
+b = BPF(text=bpf_text)
+# attach KRPOBEs
+b.attach_kprobe(event="handle_userfault", fn_name="probe_handle_userfault")
+b.attach_kretprobe(event="handle_userfault", fn_name="retprobe_handle_userfault")
+
+# header
+print("Tracing UFFD-WP write fault latency... Hit Ctrl-C to end.")
+
+# output
+loop = 0
+do_exit = 0
+while (1):
+    if count > 0:
+        loop += 1
+        if loop > count:
+            exit()
+    try:
+        sleep(interval)
+    except KeyboardInterrupt:
+        pass; do_exit = 1
+
+    print()
+    b["ev_delta_hist"].print_log2_hist("msecs")
+    if do_exit:
+        exit()
old mode 100755 (executable)
new mode 100644 (file)
index 0b7d30e..3d5d55b
@@ -40,7 +40,7 @@ def check_fields_match(name, s_field, d_field):
         return True
 
     # Some fields changed names between qemu versions.  This list
-    # is used to whitelist such changes in each section / description.
+    # is used to allow such changes in each section / description.
     changed_names = {
         'apic': ['timer', 'timer_expiry'],
         'e1000': ['dev', 'parent_obj'],
@@ -134,6 +134,11 @@ def exists_in_substruct(fields, item):
     return check_fields_match(fields["Description"]["name"],
                               substruct_fields[0]["field"], item)
 
+def size_total(entry):
+    size = entry["size"]
+    if "num" not in entry:
+        return size
+    return size * entry["num"]
 
 def check_fields(src_fields, dest_fields, desc, sec):
     # This function checks for all the fields in a section.  If some
@@ -249,17 +254,19 @@ def check_fields(src_fields, dest_fields, desc, sec):
                 continue
 
             if s_item["field"] == "unused" or d_item["field"] == "unused":
-                if s_item["size"] == d_item["size"]:
+                s_size = size_total(s_item)
+                d_size = size_total(d_item)
+                if s_size == d_size:
                     continue
 
                 if d_item["field"] == "unused":
                     advance_dest = False
-                    unused_count = d_item["size"] - s_item["size"]
+                    unused_count = d_size - s_size;
                     continue
 
                 if s_item["field"] == "unused":
                     advance_src = False
-                    unused_count = s_item["size"] - d_item["size"]
+                    unused_count = s_size - d_size
                     continue
 
             print("Section \"" + sec + "\",", end=' ')
@@ -367,7 +374,6 @@ def check_machine_type(s, d):
     if s["Name"] != d["Name"]:
         print("Warning: checking incompatible machine types:", end=' ')
         print("\"" + s["Name"] + "\", \"" + d["Name"] + "\"")
-    return
 
 
 def main():
diff --git a/scripts/xen-detect.c b/scripts/xen-detect.c
new file mode 100644 (file)
index 0000000..db049e6
--- /dev/null
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* Test programs for various Xen versions that QEMU supports.  */
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION == 41100
+  #undef XC_WANT_COMPAT_DEVICEMODEL_API
+  #define __XEN_TOOLS__
+  #include <xendevicemodel.h>
+  #include <xenforeignmemory.h>
+  int main(void) {
+    xendevicemodel_handle *xd;
+    xenforeignmemory_handle *xfmem;
+
+    xd = xendevicemodel_open(0, 0);
+    xendevicemodel_pin_memory_cacheattr(xd, 0, 0, 0, 0);
+
+    xfmem = xenforeignmemory_open(0, 0);
+    xenforeignmemory_map_resource(xfmem, 0, 0, 0, 0, 0, NULL, 0, 0);
+
+    return 0;
+  }
+
+#elif CONFIG_XEN_CTRL_INTERFACE_VERSION == 41000
+  #undef XC_WANT_COMPAT_MAP_FOREIGN_API
+  #include <xenforeignmemory.h>
+  #include <xentoolcore.h>
+  int main(void) {
+    xenforeignmemory_handle *xfmem;
+
+    xfmem = xenforeignmemory_open(0, 0);
+    xenforeignmemory_map2(xfmem, 0, 0, 0, 0, 0, 0, 0);
+    xentoolcore_restrict_all(0);
+
+    return 0;
+  }
+
+#elif CONFIG_XEN_CTRL_INTERFACE_VERSION == 40900
+  #undef XC_WANT_COMPAT_DEVICEMODEL_API
+  #define __XEN_TOOLS__
+  #include <xendevicemodel.h>
+  int main(void) {
+    xendevicemodel_handle *xd;
+
+    xd = xendevicemodel_open(0, 0);
+    xendevicemodel_close(xd);
+
+    return 0;
+  }
+
+#elif CONFIG_XEN_CTRL_INTERFACE_VERSION == 40800
+  /*
+   * If we have stable libs the we don't want the libxc compat
+   * layers, regardless of what CFLAGS we may have been given.
+   *
+   * Also, check if xengnttab_grant_copy_segment_t is defined and
+   * grant copy operation is implemented.
+   */
+  #undef XC_WANT_COMPAT_EVTCHN_API
+  #undef XC_WANT_COMPAT_GNTTAB_API
+  #undef XC_WANT_COMPAT_MAP_FOREIGN_API
+  #include <xenctrl.h>
+  #include <xenstore.h>
+  #include <xenevtchn.h>
+  #include <xengnttab.h>
+  #include <xenforeignmemory.h>
+  #include <stdint.h>
+  #include <xen/hvm/hvm_info_table.h>
+  #if !defined(HVM_MAX_VCPUS)
+  # error HVM_MAX_VCPUS not defined
+  #endif
+  int main(void) {
+    xc_interface *xc = NULL;
+    xenforeignmemory_handle *xfmem;
+    xenevtchn_handle *xe;
+    xengnttab_handle *xg;
+    xengnttab_grant_copy_segment_t* seg = NULL;
+
+    xs_daemon_open();
+
+    xc = xc_interface_open(0, 0, 0);
+    xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
+    xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
+    xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
+    xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
+
+    xfmem = xenforeignmemory_open(0, 0);
+    xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
+
+    xe = xenevtchn_open(0, 0);
+    xenevtchn_fd(xe);
+
+    xg = xengnttab_open(0, 0);
+    xengnttab_grant_copy(xg, 0, seg);
+
+    return 0;
+  }
+
+#elif CONFIG_XEN_CTRL_INTERFACE_VERSION == 40701
+  /*
+   * If we have stable libs the we don't want the libxc compat
+   * layers, regardless of what CFLAGS we may have been given.
+   */
+  #undef XC_WANT_COMPAT_EVTCHN_API
+  #undef XC_WANT_COMPAT_GNTTAB_API
+  #undef XC_WANT_COMPAT_MAP_FOREIGN_API
+  #include <xenctrl.h>
+  #include <xenstore.h>
+  #include <xenevtchn.h>
+  #include <xengnttab.h>
+  #include <xenforeignmemory.h>
+  #include <stdint.h>
+  #include <xen/hvm/hvm_info_table.h>
+  #if !defined(HVM_MAX_VCPUS)
+  # error HVM_MAX_VCPUS not defined
+  #endif
+  int main(void) {
+    xc_interface *xc = NULL;
+    xenforeignmemory_handle *xfmem;
+    xenevtchn_handle *xe;
+    xengnttab_handle *xg;
+
+    xs_daemon_open();
+
+    xc = xc_interface_open(0, 0, 0);
+    xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
+    xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
+    xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
+    xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
+
+    xfmem = xenforeignmemory_open(0, 0);
+    xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
+
+    xe = xenevtchn_open(0, 0);
+    xenevtchn_fd(xe);
+
+    xg = xengnttab_open(0, 0);
+    xengnttab_map_grant_ref(xg, 0, 0, 0);
+
+    return 0;
+  }
+
+#else
+#error invalid CONFIG_XEN_CTRL_INTERFACE_VERSION
+#endif
diff --git a/scripts/xml-preprocess-test.py b/scripts/xml-preprocess-test.py
new file mode 100644 (file)
index 0000000..dd92579
--- /dev/null
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2023 Red Hat, Inc.
+#
+# SPDX-License-Identifier: MIT
+"""Unit tests for xml-preprocess"""
+
+import contextlib
+import importlib
+import os
+import platform
+import subprocess
+import tempfile
+import unittest
+from io import StringIO
+
+xmlpp = importlib.import_module("xml-preprocess")
+
+
+class TestXmlPreprocess(unittest.TestCase):
+    """Tests for xml-preprocess.Preprocessor"""
+
+    def test_preprocess_xml(self):
+        with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
+            temp_file.write("<root></root>")
+            temp_file_name = temp_file.name
+        result = xmlpp.preprocess_xml(temp_file_name)
+        self.assertEqual(result, "<root></root>")
+        os.remove(temp_file_name)
+
+    def test_save_xml(self):
+        with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
+            temp_file_name = temp_file.name
+            xmlpp.save_xml("<root></root>", temp_file_name)
+        self.assertTrue(os.path.isfile(temp_file_name))
+        os.remove(temp_file_name)
+
+    def test_include(self):
+        with tempfile.NamedTemporaryFile(mode="w", delete=False) as inc_file:
+            inc_file.write("<included>Content from included file</included>")
+            inc_file_name = inc_file.name
+        xml_str = f"<?include {inc_file_name} ?>"
+        expected = "<included>Content from included file</included>"
+        xpp = xmlpp.Preprocessor()
+        result = xpp.preprocess(xml_str)
+        self.assertEqual(result, expected)
+        os.remove(inc_file_name)
+        self.assertRaises(FileNotFoundError, xpp.preprocess, xml_str)
+
+    def test_envvar(self):
+        os.environ["TEST_ENV_VAR"] = "TestValue"
+        xml_str = "<root>$(env.TEST_ENV_VAR)</root>"
+        expected = "<root>TestValue</root>"
+        xpp = xmlpp.Preprocessor()
+        result = xpp.preprocess(xml_str)
+        self.assertEqual(result, expected)
+        self.assertRaises(KeyError, xpp.preprocess, "$(env.UNKNOWN)")
+
+    def test_sys_var(self):
+        xml_str = "<root>$(sys.ARCH)</root>"
+        expected = f"<root>{platform.architecture()[0]}</root>"
+        xpp = xmlpp.Preprocessor()
+        result = xpp.preprocess(xml_str)
+        self.assertEqual(result, expected)
+        self.assertRaises(KeyError, xpp.preprocess, "$(sys.UNKNOWN)")
+
+    def test_cus_var(self):
+        xml_str = "<root>$(var.USER)</root>"
+        expected = "<root></root>"
+        xpp = xmlpp.Preprocessor()
+        result = xpp.preprocess(xml_str)
+        self.assertEqual(result, expected)
+        xml_str = "<?define USER=FOO?><root>$(var.USER)</root>"
+        expected = "<root>FOO</root>"
+        xpp = xmlpp.Preprocessor()
+        result = xpp.preprocess(xml_str)
+        self.assertEqual(result, expected)
+
+    def test_error_warning(self):
+        xml_str = "<root><?warning \"test warn\"?></root>"
+        expected = "<root></root>"
+        xpp = xmlpp.Preprocessor()
+        out = StringIO()
+        with contextlib.redirect_stdout(out):
+            result = xpp.preprocess(xml_str)
+        self.assertEqual(result, expected)
+        self.assertEqual(out.getvalue(), "[Warning]: test warn\n")
+        self.assertRaises(RuntimeError, xpp.preprocess, "<?error \"test\"?>")
+
+    def test_cmd(self):
+        xpp = xmlpp.Preprocessor()
+        result = xpp.preprocess('<root><?cmd "echo hello world"?></root>')
+        self.assertEqual(result, "<root>hello world</root>")
+        self.assertRaises(
+            subprocess.CalledProcessError,
+            xpp.preprocess, '<?cmd "test-unknown-cmd"?>'
+        )
+
+    def test_foreach(self):
+        xpp = xmlpp.Preprocessor()
+        result = xpp.preprocess(
+            '<root><?foreach x in a;b;c?>$(var.x)<?endforeach?></root>'
+        )
+        self.assertEqual(result, "<root>abc</root>")
+
+    def test_if_elseif(self):
+        xpp = xmlpp.Preprocessor()
+        result = xpp.preprocess('<root><?if True?>ok<?endif?></root>')
+        self.assertEqual(result, "<root>ok</root>")
+        result = xpp.preprocess('<root><?if False?>ok<?endif?></root>')
+        self.assertEqual(result, "<root></root>")
+        result = xpp.preprocess('<root><?if True?>ok<?else?>ko<?endif?></root>')
+        self.assertEqual(result, "<root>ok</root>")
+        result = xpp.preprocess('<root><?if False?>ok<?else?>ko<?endif?></root>')
+        self.assertEqual(result, "<root>ko</root>")
+        result = xpp.preprocess(
+            '<root><?if False?>ok<?elseif True?>ok2<?else?>ko<?endif?></root>'
+        )
+        self.assertEqual(result, "<root>ok2</root>")
+        result = xpp.preprocess(
+            '<root><?if False?>ok<?elseif False?>ok<?else?>ko<?endif?></root>'
+        )
+        self.assertEqual(result, "<root>ko</root>")
+
+    def test_ifdef(self):
+        xpp = xmlpp.Preprocessor()
+        result = xpp.preprocess('<root><?ifdef USER?>ok<?else?>ko<?endif?></root>')
+        self.assertEqual(result, "<root>ko</root>")
+        result = xpp.preprocess(
+            '<?define USER=FOO?><root><?ifdef USER?>ok<?else?>ko<?endif?></root>'
+        )
+        self.assertEqual(result, "<root>ok</root>")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/scripts/xml-preprocess.py b/scripts/xml-preprocess.py
new file mode 100644 (file)
index 0000000..57f1d28
--- /dev/null
@@ -0,0 +1,293 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2017-2019 Tony Su
+# Copyright (c) 2023 Red Hat, Inc.
+#
+# SPDX-License-Identifier: MIT
+#
+# Adapted from https://github.com/peitaosu/XML-Preprocessor
+#
+"""This is a XML Preprocessor which can be used to process your XML file before
+you use it, to process conditional statements, variables, iteration
+statements, error/warning, execute command, etc.
+
+## XML Schema
+
+### Include Files
+```
+<?include path/to/file ?>
+```
+
+### Variables
+```
+$(env.EnvironmentVariable)
+
+$(sys.SystemVariable)
+
+$(var.CustomVariable)
+```
+
+### Conditional Statements
+```
+<?if ?>
+
+<?ifdef ?>
+
+<?ifndef ?>
+
+<?else?>
+
+<?elseif ?>
+
+<?endif?>
+```
+
+### Iteration Statements
+```
+<?foreach VARNAME in 1;2;3?>
+    $(var.VARNAME)
+<?endforeach?>
+```
+
+### Errors and Warnings
+```
+<?error "This is error message!" ?>
+
+<?warning "This is warning message!" ?>
+```
+
+### Commands
+```
+<? cmd "echo hello world" ?>
+```
+"""
+
+import os
+import platform
+import re
+import subprocess
+import sys
+from typing import Optional
+from xml.dom import minidom
+
+
+class Preprocessor():
+    """This class holds the XML preprocessing state"""
+
+    def __init__(self):
+        self.sys_vars = {
+            "ARCH": platform.architecture()[0],
+            "SOURCE": os.path.abspath(__file__),
+            "CURRENT": os.getcwd(),
+        }
+        self.cus_vars = {}
+
+    def _pp_include(self, xml_str: str) -> str:
+        include_regex = r"(<\?include([\w\s\\/.:_-]+)\s*\?>)"
+        matches = re.findall(include_regex, xml_str)
+        for group_inc, group_xml in matches:
+            inc_file_path = group_xml.strip()
+            with open(inc_file_path, "r", encoding="utf-8") as inc_file:
+                inc_file_content = inc_file.read()
+                xml_str = xml_str.replace(group_inc, inc_file_content)
+        return xml_str
+
+    def _pp_env_var(self, xml_str: str) -> str:
+        envvar_regex = r"(\$\(env\.(\w+)\))"
+        matches = re.findall(envvar_regex, xml_str)
+        for group_env, group_var in matches:
+            xml_str = xml_str.replace(group_env, os.environ[group_var])
+        return xml_str
+
+    def _pp_sys_var(self, xml_str: str) -> str:
+        sysvar_regex = r"(\$\(sys\.(\w+)\))"
+        matches = re.findall(sysvar_regex, xml_str)
+        for group_sys, group_var in matches:
+            xml_str = xml_str.replace(group_sys, self.sys_vars[group_var])
+        return xml_str
+
+    def _pp_cus_var(self, xml_str: str) -> str:
+        define_regex = r"(<\?define\s*(\w+)\s*=\s*([\w\s\"]+)\s*\?>)"
+        matches = re.findall(define_regex, xml_str)
+        for group_def, group_name, group_var in matches:
+            group_name = group_name.strip()
+            group_var = group_var.strip().strip("\"")
+            self.cus_vars[group_name] = group_var
+            xml_str = xml_str.replace(group_def, "")
+        cusvar_regex = r"(\$\(var\.(\w+)\))"
+        matches = re.findall(cusvar_regex, xml_str)
+        for group_cus, group_var in matches:
+            xml_str = xml_str.replace(
+                group_cus,
+                self.cus_vars.get(group_var, "")
+            )
+        return xml_str
+
+    def _pp_foreach(self, xml_str: str) -> str:
+        foreach_regex = r"(<\?foreach\s+(\w+)\s+in\s+([\w;]+)\s*\?>(.*)<\?endforeach\?>)"
+        matches = re.findall(foreach_regex, xml_str)
+        for group_for, group_name, group_vars, group_text in matches:
+            group_texts = ""
+            for var in group_vars.split(";"):
+                self.cus_vars[group_name] = var
+                group_texts += self._pp_cus_var(group_text)
+            xml_str = xml_str.replace(group_for, group_texts)
+        return xml_str
+
+    def _pp_error_warning(self, xml_str: str) -> str:
+        error_regex = r"<\?error\s*\"([^\"]+)\"\s*\?>"
+        matches = re.findall(error_regex, xml_str)
+        for group_var in matches:
+            raise RuntimeError("[Error]: " + group_var)
+        warning_regex = r"(<\?warning\s*\"([^\"]+)\"\s*\?>)"
+        matches = re.findall(warning_regex, xml_str)
+        for group_wrn, group_var in matches:
+            print("[Warning]: " + group_var)
+            xml_str = xml_str.replace(group_wrn, "")
+        return xml_str
+
+    def _pp_if_eval(self, xml_str: str) -> str:
+        ifelif_regex = (
+            r"(<\?(if|elseif)\s*([^\"\s=<>!]+)\s*([!=<>]+)\s*\"*([^\"=<>!]+)\"*\s*\?>)"
+        )
+        matches = re.findall(ifelif_regex, xml_str)
+        for ifelif, tag, left, operator, right in matches:
+            if "<" in operator or ">" in operator:
+                result = eval(f"{left} {operator} {right}")
+            else:
+                result = eval(f'"{left}" {operator} "{right}"')
+            xml_str = xml_str.replace(ifelif, f"<?{tag} {result}?>")
+        return xml_str
+
+    def _pp_ifdef_ifndef(self, xml_str: str) -> str:
+        ifndef_regex = r"(<\?(ifdef|ifndef)\s*([\w]+)\s*\?>)"
+        matches = re.findall(ifndef_regex, xml_str)
+        for group_ifndef, group_tag, group_var in matches:
+            if group_tag == "ifdef":
+                result = group_var in self.cus_vars
+            else:
+                result = group_var not in self.cus_vars
+            xml_str = xml_str.replace(group_ifndef, f"<?if {result}?>")
+        return xml_str
+
+    def _pp_if_elseif(self, xml_str: str) -> str:
+        if_elif_else_regex = (
+            r"(<\?if\s(True|False)\?>"
+            r"(.*?)"
+            r"<\?elseif\s(True|False)\?>"
+            r"(.*?)"
+            r"<\?else\?>"
+            r"(.*?)"
+            r"<\?endif\?>)"
+        )
+        if_else_regex = (
+            r"(<\?if\s(True|False)\?>"
+            r"(.*?)"
+            r"<\?else\?>"
+            r"(.*?)"
+            r"<\?endif\?>)"
+        )
+        if_regex = r"(<\?if\s(True|False)\?>(.*?)<\?endif\?>)"
+        matches = re.findall(if_elif_else_regex, xml_str, re.DOTALL)
+        for (group_full, group_if, group_if_elif, group_elif,
+             group_elif_else, group_else) in matches:
+            result = ""
+            if group_if == "True":
+                result = group_if_elif
+            elif group_elif == "True":
+                result = group_elif_else
+            else:
+                result = group_else
+            xml_str = xml_str.replace(group_full, result)
+        matches = re.findall(if_else_regex, xml_str, re.DOTALL)
+        for group_full, group_if, group_if_else, group_else in matches:
+            result = ""
+            if group_if == "True":
+                result = group_if_else
+            else:
+                result = group_else
+            xml_str = xml_str.replace(group_full, result)
+        matches = re.findall(if_regex, xml_str, re.DOTALL)
+        for group_full, group_if, group_text in matches:
+            result = ""
+            if group_if == "True":
+                result = group_text
+            xml_str = xml_str.replace(group_full, result)
+        return xml_str
+
+    def _pp_command(self, xml_str: str) -> str:
+        cmd_regex = r"(<\?cmd\s*\"([^\"]+)\"\s*\?>)"
+        matches = re.findall(cmd_regex, xml_str)
+        for group_cmd, group_exec in matches:
+            output = subprocess.check_output(
+                group_exec, shell=True,
+                text=True, stderr=subprocess.STDOUT
+            )
+            xml_str = xml_str.replace(group_cmd, output)
+        return xml_str
+
+    def _pp_blanks(self, xml_str: str) -> str:
+        right_blank_regex = r">[\n\s\t\r]*"
+        left_blank_regex = r"[\n\s\t\r]*<"
+        xml_str = re.sub(right_blank_regex, ">", xml_str)
+        xml_str = re.sub(left_blank_regex, "<", xml_str)
+        return xml_str
+
+    def preprocess(self, xml_str: str) -> str:
+        fns = [
+            self._pp_blanks,
+            self._pp_include,
+            self._pp_foreach,
+            self._pp_env_var,
+            self._pp_sys_var,
+            self._pp_cus_var,
+            self._pp_if_eval,
+            self._pp_ifdef_ifndef,
+            self._pp_if_elseif,
+            self._pp_command,
+            self._pp_error_warning,
+        ]
+
+        while True:
+            changed = False
+            for func in fns:
+                out_xml = func(xml_str)
+                if not changed and out_xml != xml_str:
+                    changed = True
+                xml_str = out_xml
+            if not changed:
+                break
+
+        return xml_str
+
+
+def preprocess_xml(path: str) -> str:
+    with open(path, "r", encoding="utf-8") as original_file:
+        input_xml = original_file.read()
+
+        proc = Preprocessor()
+        return proc.preprocess(input_xml)
+
+
+def save_xml(xml_str: str, path: Optional[str]):
+    xml = minidom.parseString(xml_str)
+    with open(path, "w", encoding="utf-8") if path else sys.stdout as output_file:
+        output_file.write(xml.toprettyxml())
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: xml-preprocessor input.xml [output.xml]")
+        sys.exit(1)
+
+    output_file = None
+    if len(sys.argv) == 3:
+        output_file = sys.argv[2]
+
+    input_file = sys.argv[1]
+    output_xml = preprocess_xml(input_file)
+    save_xml(output_xml, output_file)
+
+
+if __name__ == "__main__":
+    main()
index 451c7631b76f36b60e581c3e7edc60c1853d7854..3be52a98d5805a78a3711991e6240c0c20abba27 100644 (file)
@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr,
         iov.iov_base = (void *)buf;
         iov.iov_len = sz;
         n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1,
-                                            nfds ? &fd : NULL, nfds, errp);
+                                            nfds ? &fd : NULL, nfds, 0, errp);
 
         if (n_written <= 0) {
             assert(n_written != QIO_CHANNEL_ERR_BLOCK);
index 32b9287e680d7cdebf4f27ba9717126b8e6d97b9..fb5fc297309c98ff9e8d6cb21559a468d7d5c0f6 100644 (file)
@@ -51,7 +51,6 @@ static int pr_manager_worker(void *opaque)
 int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd,
                                     struct sg_io_hdr *hdr)
 {
-    ThreadPool *pool = aio_get_thread_pool(ctx);
     PRManagerData data = {
         .pr_mgr = pr_mgr,
         .fd     = fd,
@@ -62,7 +61,7 @@ int coroutine_fn pr_manager_execute(PRManager *pr_mgr, AioContext *ctx, int fd,
 
     /* The matching object_unref is in pr_manager_worker.  */
     object_ref(OBJECT(pr_mgr));
-    return thread_pool_submit_co(pool, pr_manager_worker, &data);
+    return thread_pool_submit_co(pr_manager_worker, &data);
 }
 
 bool pr_manager_is_connected(PRManager *pr_mgr)
@@ -116,8 +115,7 @@ pr_manager_register_types(void)
 
 static int query_one_pr_manager(Object *object, void *opaque)
 {
-    PRManagerInfoList ***prev = opaque;
-    PRManagerInfoList *elem;
+    PRManagerInfoList ***tail = opaque;
     PRManagerInfo *info;
     PRManager *pr_mgr;
 
@@ -126,15 +124,10 @@ static int query_one_pr_manager(Object *object, void *opaque)
         return 0;
     }
 
-    elem = g_new0(PRManagerInfoList, 1);
     info = g_new0(PRManagerInfo, 1);
     info->id = g_strdup(object_get_canonical_path_component(object));
     info->connected = pr_manager_is_connected(pr_mgr);
-    elem->value = info;
-    elem->next = NULL;
-
-    **prev = elem;
-    *prev = &elem->next;
+    QAPI_LIST_APPEND(*tail, info);
     return 0;
 }
 
index 2733d92f2d814d42d91b25877d7dc03cee260b2e..c6c6347e9b6a5554772908bc4d45f077b9907934 100644 (file)
@@ -36,7 +36,7 @@
 #include <mpath_persist.h>
 #endif
 
-#include "qemu-common.h"
+#include "qemu/help-texts.h"
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 #include "qemu/main-loop.h"
@@ -77,8 +77,10 @@ static int gid = -1;
 
 static void compute_default_paths(void)
 {
-    socket_path = qemu_get_local_state_pathname("run/qemu-pr-helper.sock");
-    pidfile = qemu_get_local_state_pathname("run/qemu-pr-helper.pid");
+    g_autofree char *state = qemu_get_local_state_dir();
+
+    socket_path = g_build_filename(state, "run", "qemu-pr-helper.sock", NULL);
+    pidfile = g_build_filename(state, "run", "qemu-pr-helper.pid", NULL);
 }
 
 static void usage(const char *name)
@@ -149,26 +151,35 @@ static int do_sgio_worker(void *opaque)
     io_hdr.dxferp = (char *)data->buf;
     io_hdr.dxfer_len = data->sz;
     ret = ioctl(data->fd, SG_IO, &io_hdr);
-    status = sg_io_sense_from_errno(ret < 0 ? errno : 0, &io_hdr,
-                                    &sense_code);
+
+    if (ret < 0) {
+        status = scsi_sense_from_errno(errno, &sense_code);
+        if (status == CHECK_CONDITION) {
+            scsi_build_sense(data->sense, sense_code);
+        }
+    } else if (io_hdr.host_status != SCSI_HOST_OK) {
+        status = scsi_sense_from_host_status(io_hdr.host_status, &sense_code);
+        if (status == CHECK_CONDITION) {
+            scsi_build_sense(data->sense, sense_code);
+        }
+    } else if (io_hdr.driver_status & SG_ERR_DRIVER_TIMEOUT) {
+        status = BUSY;
+    } else {
+        status = io_hdr.status;
+    }
+
     if (status == GOOD) {
         data->sz -= io_hdr.resid;
     } else {
         data->sz = 0;
     }
 
-    if (status == CHECK_CONDITION &&
-        !(io_hdr.driver_status & SG_ERR_DRIVER_SENSE)) {
-        scsi_build_sense(data->sense, sense_code);
-    }
-
     return status;
 }
 
-static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
-                    uint8_t *buf, int *sz, int dir)
+static int coroutine_fn do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
+                                uint8_t *buf, int *sz, int dir)
 {
-    ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
     int r;
 
     PRHelperSGIOData data = {
@@ -180,7 +191,7 @@ static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
         .dir = dir,
     };
 
-    r = thread_pool_submit_co(pool, do_sgio_worker, &data);
+    r = thread_pool_submit_co(do_sgio_worker, &data);
     *sz = data.sz;
     return r;
 }
@@ -269,11 +280,7 @@ void put_multipath_config(struct config *conf)
 static void multipath_pr_init(void)
 {
     udev = udev_new();
-#ifdef CONFIG_MPATH_NEW_API
     multipath_conf = mpath_lib_init();
-#else
-    mpath_lib_init(udev);
-#endif
 }
 
 static int is_mpath(int fd)
@@ -308,7 +315,7 @@ static SCSISense mpath_generic_sense(int r)
     }
 }
 
-static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
+static int coroutine_fn mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
 {
     switch (r) {
     case MPATH_PR_SUCCESS:
@@ -360,8 +367,8 @@ static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
     }
 }
 
-static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
-                           uint8_t *data, int sz)
+static int coroutine_fn multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
+                                        uint8_t *data, int sz)
 {
     int rq_servact = cdb[1];
     struct prin_resp resp;
@@ -415,8 +422,8 @@ static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
     return mpath_reconstruct_sense(fd, r, sense);
 }
 
-static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
-                            const uint8_t *param, int sz)
+static int coroutine_fn multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
+                                         const uint8_t *param, int sz)
 {
     int rq_servact = cdb[1];
     int rq_scope = cdb[2] >> 4;
@@ -533,8 +540,8 @@ static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
 }
 #endif
 
-static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
-                    uint8_t *data, int *resp_sz)
+static int coroutine_fn do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
+                                 uint8_t *data, int *resp_sz)
 {
 #ifdef CONFIG_MPATH
     if (is_mpath(fd)) {
@@ -551,8 +558,8 @@ static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
                    SG_DXFER_FROM_DEV);
 }
 
-static int do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
-                     const uint8_t *param, int sz)
+static int coroutine_fn do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
+                                  const uint8_t *param, int sz)
 {
     int resp_sz;
 
@@ -602,7 +609,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
         iov.iov_base = buf;
         iov.iov_len = sz;
         n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
-                                        &fds, &nfds, errp);
+                                        &fds, &nfds, 0, errp);
 
         if (n_read == QIO_CHANNEL_ERR_BLOCK) {
             qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
@@ -728,8 +735,7 @@ static void coroutine_fn prh_co_entry(void *opaque)
 
     qio_channel_set_blocking(QIO_CHANNEL(client->ioc),
                              false, NULL);
-    qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc),
-                                   qemu_get_aio_context());
+    qio_channel_set_follow_coroutine_ctx(QIO_CHANNEL(client->ioc), true);
 
     /* A very simple negotiation for future extensibility.  No features
      * are defined so write 0.
@@ -789,7 +795,6 @@ static void coroutine_fn prh_co_entry(void *opaque)
     }
 
 out:
-    qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
     object_unref(OBJECT(client->ioc));
     g_free(client);
 }
@@ -991,7 +996,7 @@ int main(int argc, char **argv)
         exit(EXIT_FAILURE);
     }
     trace_init_file();
-    qemu_set_log(LOG_TRACE);
+    qemu_set_log(LOG_TRACE, &error_fatal);
 
 #ifdef CONFIG_MPATH
     dm_init();
@@ -1034,10 +1039,7 @@ int main(int argc, char **argv)
         }
     }
 
-    if (qemu_init_main_loop(&local_err)) {
-        error_report_err(local_err);
-        exit(EXIT_FAILURE);
-    }
+    qemu_init_main_loop(&error_fatal);
 
     server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
                                          G_IO_IN,
@@ -1051,10 +1053,8 @@ int main(int argc, char **argv)
         }
     }
 
-    if ((daemonize || pidfile_specified) &&
-        !qemu_write_pidfile(pidfile, &local_err)) {
-        error_report_err(local_err);
-        exit(EXIT_FAILURE);
+    if (daemonize || pidfile_specified) {
+        qemu_write_pidfile(pidfile, &error_fatal);
     }
 
 #ifdef CONFIG_LIBCAP_NG
index 6dbfeae790c8e8168785b1315648b397ecbf31cb..baf924fa89e0c4ccf9832dcc117ac2664250e65b 100644 (file)
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # pr-manager.c
 pr_manager_execute(int fd, int cmd, int sa) "fd=%d cmd=0x%02x service action=0x%02x"
index b37c283014899c7b3a9ff98e04a0cdcca9ec6424..357b036671612400b1f93a01d66fbdc89a3496e8 100644 (file)
@@ -197,6 +197,11 @@ const struct SCSISense sense_code_INVALID_PARAM = {
     .key = ILLEGAL_REQUEST, .asc = 0x26, .ascq = 0x00
 };
 
+/* Illegal request, Invalid value in parameter list */
+const struct SCSISense sense_code_INVALID_PARAM_VALUE = {
+    .key = ILLEGAL_REQUEST, .asc = 0x26, .ascq = 0x01
+};
+
 /* Illegal request, Parameter list length error */
 const struct SCSISense sense_code_INVALID_PARAM_LEN = {
     .key = ILLEGAL_REQUEST, .asc = 0x1a, .ascq = 0x00
@@ -252,6 +257,21 @@ const struct SCSISense sense_code_LUN_COMM_FAILURE = {
     .key = ABORTED_COMMAND, .asc = 0x08, .ascq = 0x00
 };
 
+/* Command aborted, LUN does not respond to selection */
+const struct SCSISense sense_code_LUN_NOT_RESPONDING = {
+    .key = ABORTED_COMMAND, .asc = 0x05, .ascq = 0x00
+};
+
+/* Command aborted, Command Timeout during processing */
+const struct SCSISense sense_code_COMMAND_TIMEOUT = {
+    .key = ABORTED_COMMAND, .asc = 0x2e, .ascq = 0x02
+};
+
+/* Command aborted, Commands cleared by device server */
+const struct SCSISense sense_code_COMMAND_ABORTED = {
+    .key = ABORTED_COMMAND, .asc = 0x2f, .ascq = 0x02
+};
+
 /* Medium Error, Unrecovered read error */
 const struct SCSISense sense_code_READ_ERROR = {
     .key = MEDIUM_ERROR, .asc = 0x11, .ascq = 0x00
@@ -560,37 +580,81 @@ const char *scsi_command_name(uint8_t cmd)
     return names[cmd];
 }
 
+int scsi_sense_from_errno(int errno_value, SCSISense *sense)
+{
+    switch (errno_value) {
+    case 0:
+        return GOOD;
+    case EDOM:
+        return TASK_SET_FULL;
 #ifdef CONFIG_LINUX
-int sg_io_sense_from_errno(int errno_value, struct sg_io_hdr *io_hdr,
-                           SCSISense *sense)
+        /* These errno mapping are specific to Linux.  For more information:
+         * - scsi_check_sense and scsi_decide_disposition in drivers/scsi/scsi_error.c
+         * - scsi_result_to_blk_status in drivers/scsi/scsi_lib.c
+         * - blk_errors[] in block/blk-core.c
+         */
+    case EBADE:
+        return RESERVATION_CONFLICT;
+    case ENODATA:
+        *sense = SENSE_CODE(READ_ERROR);
+        return CHECK_CONDITION;
+    case EREMOTEIO:
+        *sense = SENSE_CODE(TARGET_FAILURE);
+        return CHECK_CONDITION;
+#endif
+    case ENOMEDIUM:
+        *sense = SENSE_CODE(NO_MEDIUM);
+        return CHECK_CONDITION;
+    case ENOMEM:
+        *sense = SENSE_CODE(TARGET_FAILURE);
+        return CHECK_CONDITION;
+    case EINVAL:
+        *sense = SENSE_CODE(INVALID_FIELD);
+        return CHECK_CONDITION;
+    case ENOSPC:
+        *sense = SENSE_CODE(SPACE_ALLOC_FAILED);
+        return CHECK_CONDITION;
+    default:
+        *sense = SENSE_CODE(IO_ERROR);
+        return CHECK_CONDITION;
+    }
+}
+
+int scsi_sense_from_host_status(uint8_t host_status,
+                                SCSISense *sense)
 {
-    if (errno_value != 0) {
-        switch (errno_value) {
-        case EDOM:
-            return TASK_SET_FULL;
-        case ENOMEM:
-            *sense = SENSE_CODE(TARGET_FAILURE);
-            return CHECK_CONDITION;
-        default:
-            *sense = SENSE_CODE(IO_ERROR);
-            return CHECK_CONDITION;
-        }
-    } else {
-        if (io_hdr->host_status == SG_ERR_DID_NO_CONNECT ||
-            io_hdr->host_status == SG_ERR_DID_BUS_BUSY ||
-            io_hdr->host_status == SG_ERR_DID_TIME_OUT ||
-            (io_hdr->driver_status & SG_ERR_DRIVER_TIMEOUT)) {
-            return BUSY;
-        } else if (io_hdr->host_status) {
-            *sense = SENSE_CODE(I_T_NEXUS_LOSS);
-            return CHECK_CONDITION;
-        } else if (io_hdr->status) {
-            return io_hdr->status;
-        } else if (io_hdr->driver_status & SG_ERR_DRIVER_SENSE) {
-            return CHECK_CONDITION;
-        } else {
-            return GOOD;
-        }
+    switch (host_status) {
+    case SCSI_HOST_NO_LUN:
+        *sense = SENSE_CODE(LUN_NOT_RESPONDING);
+        return CHECK_CONDITION;
+    case SCSI_HOST_BUSY:
+        return BUSY;
+    case SCSI_HOST_TIME_OUT:
+        *sense = SENSE_CODE(COMMAND_TIMEOUT);
+        return CHECK_CONDITION;
+    case SCSI_HOST_BAD_RESPONSE:
+        *sense = SENSE_CODE(LUN_COMM_FAILURE);
+        return CHECK_CONDITION;
+    case SCSI_HOST_ABORTED:
+        *sense = SENSE_CODE(COMMAND_ABORTED);
+        return CHECK_CONDITION;
+    case SCSI_HOST_RESET:
+        *sense = SENSE_CODE(RESET);
+        return CHECK_CONDITION;
+    case SCSI_HOST_TRANSPORT_DISRUPTED:
+        *sense = SENSE_CODE(I_T_NEXUS_LOSS);
+        return CHECK_CONDITION;
+    case SCSI_HOST_TARGET_FAILURE:
+        *sense = SENSE_CODE(TARGET_FAILURE);
+        return CHECK_CONDITION;
+    case SCSI_HOST_RESERVATION_ERROR:
+        return RESERVATION_CONFLICT;
+    case SCSI_HOST_ALLOCATION_FAILURE:
+        *sense = SENSE_CODE(SPACE_ALLOC_FAILED);
+        return CHECK_CONDITION;
+    case SCSI_HOST_MEDIUM_ERROR:
+        *sense = SENSE_CODE(READ_ERROR);
+        return CHECK_CONDITION;
     }
+    return GOOD;
 }
-#endif
diff --git a/stubs/arch_type.c b/stubs/arch_type.c
deleted file mode 100644 (file)
index fc5423b..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "qemu/osdep.h"
-#include "sysemu/arch_init.h"
-
-const uint32_t arch_type = QEMU_ARCH_NONE;
diff --git a/stubs/colo-compare.c b/stubs/colo-compare.c
new file mode 100644 (file)
index 0000000..ec72666
--- /dev/null
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "qemu/notify.h"
+#include "net/colo-compare.h"
+
+void colo_compare_cleanup(void)
+{
+}
diff --git a/stubs/colo.c b/stubs/colo.c
new file mode 100644 (file)
index 0000000..08c9f98
--- /dev/null
@@ -0,0 +1,37 @@
+#include "qemu/osdep.h"
+#include "qemu/notify.h"
+#include "net/colo-compare.h"
+#include "migration/colo.h"
+#include "migration/migration.h"
+#include "qemu/error-report.h"
+#include "qapi/qapi-commands-migration.h"
+
+void colo_shutdown(void)
+{
+}
+
+int coroutine_fn colo_incoming_co(void)
+{
+    return 0;
+}
+
+void colo_checkpoint_delay_set(void)
+{
+}
+
+void migrate_start_colo_process(MigrationState *s)
+{
+    error_report("Impossible happened: trying to start COLO when COLO "
+                 "module is not built in");
+    abort();
+}
+
+bool migration_in_colo_state(void)
+{
+    return false;
+}
+
+bool migration_incoming_in_colo_state(void)
+{
+    return false;
+}
index a2f61521a16f79f1d75522d28a6967f46890d753..0e326d801059877234761e0ab7366c6a517f4230 100644 (file)
@@ -1,5 +1,6 @@
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
+#include "monitor/monitor.h"
 
 int error_vprintf(const char *fmt, va_list ap)
 {
index 2b7aee50d30200abbfc97c7bda57aee30866f6de..580e20702b2d1d4fb4c8a0881519d588d425f965 100644 (file)
@@ -1,6 +1,6 @@
 #include "qemu/osdep.h"
-#include "exec/gdbstub.h"       /* xml_builtin */
+#include "exec/gdbstub.h"       /* gdb_static_features */
 
-const char *const xml_builtin[][2] = {
-  { NULL, NULL }
+const GDBFeature gdb_static_features[] = {
+  { NULL }
 };
index fa990136b068d0cff44da95ac11f46084b267009..0906303f73068e792fd065f90fc040d911782b5b 100644 (file)
@@ -1,5 +1,5 @@
 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "sysemu/sysemu.h"
 
 const char *qemu_get_vm_name(void)
 {
diff --git a/stubs/graph-lock.c b/stubs/graph-lock.c
new file mode 100644 (file)
index 0000000..177aa0a
--- /dev/null
@@ -0,0 +1,10 @@
+#include "qemu/osdep.h"
+#include "block/graph-lock.h"
+
+void register_aiocontext(AioContext *ctx)
+{
+}
+
+void unregister_aiocontext(AioContext *ctx)
+{
+}
index f13c43568bceca6a8bb1b3982da5c7078c6744ed..6df8c2bf7d4488b9d2682a6d5e4787a4f7db2430 100644 (file)
@@ -43,3 +43,7 @@ void icount_account_warp_timer(void)
 {
     abort();
 }
+
+void icount_notify_exit(void)
+{
+}
diff --git a/stubs/iothread-lock-block.c b/stubs/iothread-lock-block.c
new file mode 100644 (file)
index 0000000..c88ed70
--- /dev/null
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+
+bool qemu_in_main_thread(void)
+{
+    return qemu_get_current_aio_context() == qemu_get_aio_context();
+}
+
index 2a6efad64a169855a5f77e8831f78fd69e4dccc7..5b45b7fc8b905701f7b30fd769032dcc36f2b7e0 100644 (file)
@@ -3,7 +3,7 @@
 
 bool qemu_mutex_iothread_locked(void)
 {
-    return true;
+    return false;
 }
 
 void qemu_mutex_lock_iothread_impl(const char *file, int line)
diff --git a/stubs/iothread.c b/stubs/iothread.c
deleted file mode 100644 (file)
index 8cc9e28..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "qemu/osdep.h"
-#include "block/aio.h"
-#include "qemu/main-loop.h"
-
-AioContext *qemu_get_current_aio_context(void)
-{
-    return qemu_get_aio_context();
-}
diff --git a/stubs/machine-init-done.c b/stubs/machine-init-done.c
deleted file mode 100644 (file)
index cd8e813..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "qemu/osdep.h"
-#include "sysemu/sysemu.h"
-
-bool machine_init_done = true;
-
-void qemu_add_machine_init_done_notifier(Notifier *notify)
-{
-}
diff --git a/stubs/memory_device.c b/stubs/memory_device.c
new file mode 100644 (file)
index 0000000..15fd93f
--- /dev/null
@@ -0,0 +1,22 @@
+#include "qemu/osdep.h"
+#include "hw/mem/memory-device.h"
+
+MemoryDeviceInfoList *qmp_memory_device_list(void)
+{
+   return NULL;
+}
+
+uint64_t get_plugged_memory_size(void)
+{
+    return (uint64_t)-1;
+}
+
+unsigned int memory_devices_get_reserved_memslots(void)
+{
+    return 0;
+}
+
+bool memory_devices_memslot_auto_decision_active(void)
+{
+    return false;
+}
index 82b7ba60abe54bfe0e3069ac17c238eaed76fb38..a4f34de83f095766b3e178020023aaabd9a5b05e 100644 (file)
@@ -1,4 +1,3 @@
-stub_ss.add(files('arch_type.c'))
 stub_ss.add(files('bdrv-next-monitor-owned.c'))
 stub_ss.add(files('blk-commit-all.c'))
 stub_ss.add(files('blk-exp-close-all.c'))
@@ -12,44 +11,57 @@ stub_ss.add(files('icount.c'))
 stub_ss.add(files('dump.c'))
 stub_ss.add(files('error-printf.c'))
 stub_ss.add(files('fdset.c'))
-stub_ss.add(files('fw_cfg.c'))
 stub_ss.add(files('gdbstub.c'))
 stub_ss.add(files('get-vm-name.c'))
-stub_ss.add(when: 'CONFIG_LINUX_IO_URING', if_true: files('io_uring.c'))
-stub_ss.add(files('iothread.c'))
+stub_ss.add(files('graph-lock.c'))
+if linux_io_uring.found()
+  stub_ss.add(files('io_uring.c'))
+endif
 stub_ss.add(files('iothread-lock.c'))
+if have_block
+  stub_ss.add(files('iothread-lock-block.c'))
+endif
 stub_ss.add(files('isa-bus.c'))
 stub_ss.add(files('is-daemonized.c'))
-stub_ss.add(when: 'CONFIG_LINUX_AIO', if_true: files('linux-aio.c'))
-stub_ss.add(files('machine-init-done.c'))
+if libaio.found()
+  stub_ss.add(files('linux-aio.c'))
+endif
 stub_ss.add(files('migr-blocker.c'))
+stub_ss.add(files('module-opts.c'))
 stub_ss.add(files('monitor.c'))
 stub_ss.add(files('monitor-core.c'))
-stub_ss.add(files('pci-bus.c'))
-stub_ss.add(files('pci-host-piix.c'))
+stub_ss.add(files('physmem.c'))
 stub_ss.add(files('qemu-timer-notify-cb.c'))
-stub_ss.add(files('qmp_memory_device.c'))
+stub_ss.add(files('memory_device.c'))
+stub_ss.add(files('qmp-command-available.c'))
+stub_ss.add(files('qmp-quit.c'))
 stub_ss.add(files('qtest.c'))
 stub_ss.add(files('ram-block.c'))
 stub_ss.add(files('ramfb.c'))
 stub_ss.add(files('replay.c'))
 stub_ss.add(files('runstate-check.c'))
-stub_ss.add(files('set-fd-handler.c'))
 stub_ss.add(files('sysbus.c'))
 stub_ss.add(files('target-get-monitor-def.c'))
 stub_ss.add(files('target-monitor-defs.c'))
-stub_ss.add(files('tpm.c'))
 stub_ss.add(files('trace-control.c'))
 stub_ss.add(files('uuid.c'))
-stub_ss.add(files('vmgenid.c'))
+#stub_ss.add(files('colo.c'))
+#stub_ss.add(files('colo-compare.c'))
 stub_ss.add(files('vmstate.c'))
 stub_ss.add(files('vm-stop.c'))
 stub_ss.add(files('win32-kbd-hook.c'))
 stub_ss.add(files('cpu-synchronize-state.c'))
-if have_block
+if have_block or have_ga
   stub_ss.add(files('replay-tools.c'))
 endif
 if have_system
+  stub_ss.add(files('fw_cfg.c'))
+  stub_ss.add(files('pci-bus.c'))
   stub_ss.add(files('semihost.c'))
+  stub_ss.add(files('usb-dev-stub.c'))
   stub_ss.add(files('xen-hw-stub.c'))
+  stub_ss.add(files('virtio-md-pci.c'))
+else
+  stub_ss.add(files('qdev.c'))
 endif
+stub_ss.add(files('semihost-all.c'))
index 5676a2f93c80668bb5a77e77c3da9042c88495ae..11cbff268fd0c4ca162b98869696588098571c31 100644 (file)
@@ -1,11 +1,21 @@
 #include "qemu/osdep.h"
 #include "migration/blocker.h"
 
-int migrate_add_blocker(Error *reason, Error **errp)
+int migrate_add_blocker(Error **reasonp, Error **errp)
 {
     return 0;
 }
 
-void migrate_del_blocker(Error *reason)
+int migrate_add_blocker_normal(Error **reasonp, Error **errp)
+{
+    return 0;
+}
+
+int migrate_add_blocker_modes(Error **reasonp, Error **errp, MigMode mode, ...)
+{
+    return 0;
+}
+
+void migrate_del_blocker(Error **reasonp)
 {
 }
diff --git a/stubs/module-opts.c b/stubs/module-opts.c
new file mode 100644 (file)
index 0000000..5412429
--- /dev/null
@@ -0,0 +1,2 @@
+#include "qemu/osdep.h"
+#include "qemu/config-file.h"
index d058a2a00d85e36274b2d33c14c1940b9605f22e..afa477aae656afbaa83329635333eba1e98d3cf3 100644 (file)
@@ -1,6 +1,5 @@
 #include "qemu/osdep.h"
 #include "monitor/monitor.h"
-#include "qemu-common.h"
 #include "qapi/qapi-emit-events.h"
 
 Monitor *monitor_cur(void)
diff --git a/stubs/pci-host-piix.c b/stubs/pci-host-piix.c
deleted file mode 100644 (file)
index 93975ad..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "qemu/osdep.h"
-#include "hw/pci-host/i440fx.h"
-
-PCIBus *find_i440fx(void)
-{
-    return NULL;
-}
diff --git a/stubs/physmem.c b/stubs/physmem.c
new file mode 100644 (file)
index 0000000..1fc5f2d
--- /dev/null
@@ -0,0 +1,13 @@
+#include "qemu/osdep.h"
+#include "exec/cpu-common.h"
+
+RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
+                                   ram_addr_t *offset)
+{
+    return NULL;
+}
+
+int qemu_ram_get_fd(RAMBlock *rb)
+{
+    return -1;
+}
diff --git a/stubs/qdev.c b/stubs/qdev.c
new file mode 100644 (file)
index 0000000..6869f6f
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * QOM stubs
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * Author:
+ *   Philippe Mathieu-Daudé <philmd@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qapi-events-qdev.h"
+
+void qapi_event_send_device_deleted(const char *device,
+                                    const char *path)
+{
+    /* Nothing to do. */
+}
+
+void qapi_event_send_device_unplug_guest_error(const char *device,
+                                               const char *path)
+{
+    /* Nothing to do. */
+}
diff --git a/stubs/qmp-command-available.c b/stubs/qmp-command-available.c
new file mode 100644 (file)
index 0000000..46540af
--- /dev/null
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "qapi/qmp/dispatch.h"
+
+bool qmp_command_available(const QmpCommand *cmd, Error **errp)
+{
+    return true;
+}
diff --git a/stubs/qmp-quit.c b/stubs/qmp-quit.c
new file mode 100644 (file)
index 0000000..a3ff47f
--- /dev/null
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "qapi/qapi-commands-control.h"
+#include "qapi/qmp/dispatch.h"
+
+void qmp_quit(Error **errp)
+{
+    g_assert_not_reached();
+}
diff --git a/stubs/qmp_memory_device.c b/stubs/qmp_memory_device.c
deleted file mode 100644 (file)
index e75cac6..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#include "qemu/osdep.h"
-#include "hw/mem/memory-device.h"
-
-MemoryDeviceInfoList *qmp_memory_device_list(void)
-{
-   return NULL;
-}
-
-uint64_t get_plugged_memory_size(void)
-{
-    return (uint64_t)-1;
-}
index 48143f33542f4c8382525a926a3d0fc37fad8771..cf64733b10cda2fd9bdcfacd6e4990a0123ec60f 100644 (file)
@@ -2,6 +2,8 @@
 #include "qapi/error.h"
 #include "hw/display/ramfb.h"
 
+const VMStateDescription ramfb_vmstate = {};
+
 void ramfb_display_update(QemuConsole *con, RAMFBState *s)
 {
 }
index c06b360e2224cd2a7b65faebff634011cd38afbd..3e8ca3212d97236a4848cf582983e7afab14f500 100644 (file)
@@ -7,13 +7,14 @@ bool replay_events_enabled(void)
     return false;
 }
 
-int64_t replay_save_clock(unsigned int kind, int64_t clock, int64_t raw_icount)
+int64_t replay_save_clock(ReplayClockKind kind,
+                          int64_t clock, int64_t raw_icount)
 {
     abort();
     return 0;
 }
 
-int64_t replay_read_clock(unsigned int kind)
+int64_t replay_read_clock(ReplayClockKind kind, int64_t raw_icount)
 {
     abort();
     return 0;
@@ -48,11 +49,11 @@ void replay_mutex_unlock(void)
 {
 }
 
-void replay_register_char_driver(Chardev *chr)
+void replay_register_char_driver(struct Chardev *chr)
 {
 }
 
-void replay_chr_be_write(Chardev *s, uint8_t *buf, int len)
+void replay_chr_be_write(struct Chardev *s, const uint8_t *buf, int len)
 {
     abort();
 }
index 9d5b4be339b5aef7d62dac7fc53ee6afc017d26c..42c92e4acb814b278b0b72943179bf56f034f366 100644 (file)
@@ -1,5 +1,5 @@
 #include "qemu/osdep.h"
-#include "sysemu/replay.h"
+#include "exec/replay-core.h"
 
 ReplayMode replay_mode;
 
diff --git a/stubs/semihost-all.c b/stubs/semihost-all.c
new file mode 100644 (file)
index 0000000..a2a1fc9
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * Semihosting Stubs for all targets
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ *
+ * Stubs for all targets that don't actually do semihosting.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "semihosting/semihost.h"
+
+SemihostingTarget semihosting_get_target(void)
+{
+    return SEMIHOSTING_TARGET_AUTO;
+}
index 1d8b37f7b2f68d93f2a40faf8fd396308bda653b..f26cbb7c257e8983a98fb5e9b3380b5f22ef4515 100644 (file)
@@ -1,9 +1,9 @@
 /*
- * Semihosting Stubs for SoftMMU
+ * Semihosting Stubs for system emulation
  *
  * Copyright (c) 2019 Linaro Ltd
  *
- * Stubs for SoftMMU targets that don't actually do semihosting.
+ * Stubs for system targets that don't actually do semihosting.
  *
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
@@ -11,8 +11,7 @@
 #include "qemu/osdep.h"
 #include "qemu/option.h"
 #include "qemu/error-report.h"
-#include "hw/semihosting/semihost.h"
-#include "sysemu/sysemu.h"
+#include "semihosting/semihost.h"
 
 /* Empty config */
 QemuOptsList qemu_semihosting_config_opts = {
@@ -24,16 +23,11 @@ QemuOptsList qemu_semihosting_config_opts = {
 };
 
 /* Queries to config status default to off */
-bool semihosting_enabled(void)
+bool semihosting_enabled(bool is_user)
 {
     return false;
 }
 
-SemihostingTarget semihosting_get_target(void)
-{
-    return SEMIHOSTING_TARGET_AUTO;
-}
-
 /*
  * All the rest are empty subs. We could g_assert_not_reached() but
  * that adds extra weight to the final binary. Waste not want not.
@@ -42,7 +36,7 @@ void qemu_semihosting_enable(void)
 {
 }
 
-int qemu_semihosting_config_options(const char *optarg)
+int qemu_semihosting_config_options(const char *optstr)
 {
     return 1;
 }
@@ -66,10 +60,6 @@ void semihosting_arg_fallback(const char *file, const char *cmd)
 {
 }
 
-void qemu_semihosting_connect_chardevs(void)
-{
-}
-
-void qemu_semihosting_console_init(void)
+void qemu_semihosting_chardev_init(void)
 {
 }
diff --git a/stubs/set-fd-handler.c b/stubs/set-fd-handler.c
deleted file mode 100644 (file)
index bff7e0a..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "qemu/osdep.h"
-#include "qemu/main-loop.h"
-
-void qemu_set_fd_handler(int fd,
-                         IOHandler *fd_read,
-                         IOHandler *fd_write,
-                         void *opaque)
-{
-    abort();
-}
diff --git a/stubs/tpm.c b/stubs/tpm.c
deleted file mode 100644 (file)
index 9bded19..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * TPM stubs
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/qapi-commands-tpm.h"
-#include "sysemu/tpm.h"
-#include "hw/acpi/tpm.h"
-
-int tpm_init(void)
-{
-    return 0;
-}
-
-void tpm_cleanup(void)
-{
-}
-
-TPMInfoList *qmp_query_tpm(Error **errp)
-{
-    return NULL;
-}
-
-TpmTypeList *qmp_query_tpm_types(Error **errp)
-{
-    return NULL;
-}
-
-TpmModelList *qmp_query_tpm_models(Error **errp)
-{
-    return NULL;
-}
-
-void tpm_build_ppi_acpi(TPMIf *tpm, Aml *dev)
-{
-}
index 7f856e5c24e34d41e87872308050b4584c955cab..b428f34c87881eb5af2f45be89e9a90e59b75d18 100644 (file)
@@ -36,16 +36,3 @@ void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
         }
     }
 }
-
-void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
-                                        TraceEvent *ev, bool state)
-{
-    /* should never be called on non-target binaries */
-    abort();
-}
-
-void trace_init_vcpu(CPUState *vcpu)
-{
-    /* should never be called on non-target binaries */
-    abort();
-}
diff --git a/stubs/usb-dev-stub.c b/stubs/usb-dev-stub.c
new file mode 100644 (file)
index 0000000..aa55769
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * QEMU USB device emulation stubs
+ *
+ * Copyright (C) 2021 Philippe Mathieu-Daudé <f4bug@amsat.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "sysemu/sysemu.h"
+#include "monitor/monitor.h"
+#include "hw/usb.h"
+
+USBDevice *usbdevice_create(const char *driver)
+{
+    error_report("Support for USB devices not built-in");
+
+    return NULL;
+}
+
+HumanReadableText *qmp_x_query_usb(Error **errp)
+{
+    error_setg(errp, "Support for USB devices not built-in");
+    return NULL;
+}
+
+void hmp_info_usb(Monitor *mon, const QDict *qdict)
+{
+    monitor_printf(mon, "Support for USB devices not built-in\n");
+}
diff --git a/stubs/virtio-md-pci.c b/stubs/virtio-md-pci.c
new file mode 100644 (file)
index 0000000..ce5bba0
--- /dev/null
@@ -0,0 +1,24 @@
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/virtio/virtio-md-pci.h"
+
+void virtio_md_pci_pre_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp)
+{
+    error_setg(errp, "virtio based memory devices not supported");
+}
+
+void virtio_md_pci_plug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp)
+{
+    error_setg(errp, "virtio based memory devices not supported");
+}
+
+void virtio_md_pci_unplug_request(VirtIOMDPCI *vmd, MachineState *ms,
+                                  Error **errp)
+{
+    error_setg(errp, "virtio based memory devices not supported");
+}
+
+void virtio_md_pci_unplug(VirtIOMDPCI *vmd, MachineState *ms, Error **errp)
+{
+    error_setg(errp, "virtio based memory devices not supported");
+}
diff --git a/stubs/vmgenid.c b/stubs/vmgenid.c
deleted file mode 100644 (file)
index bfad656..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qapi/qapi-commands-machine.h"
-#include "qapi/qmp/qerror.h"
-
-GuidInfo *qmp_query_vm_generation_id(Error **errp)
-{
-    error_setg(errp, QERR_UNSUPPORTED);
-    return NULL;
-}
index cc4fe41dfc24890846b37eacf9d8230854218fb0..8513d9204e41e4cb5524d0072ddf9b2b0572f7b7 100644 (file)
@@ -1,8 +1,6 @@
 #include "qemu/osdep.h"
 #include "migration/vmstate.h"
 
-const VMStateDescription vmstate_dummy = {};
-
 int vmstate_register_with_alias_id(VMStateIf *obj,
                                    uint32_t instance_id,
                                    const VMStateDescription *vmsd,
index 15f3921a76b05b90f953b3e4ecc8095322245efb..7d7ffe83a93fb7613d1109c880f9787c952ca031 100644 (file)
@@ -15,12 +15,13 @@ int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
     return -1;
 }
 
-void xen_piix3_set_irq(void *opaque, int irq_num, int level)
+void xen_intx_set_irq(void *opaque, int irq_num, int level)
 {
 }
 
-void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
+int xen_set_pci_link_route(uint8_t link, uint8_t irq)
 {
+    return -1;
 }
 
 void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
index ac7cef9335980dc05b3a127fb8fc33e60da0a19e..dd318ed1af64d98205e6ab2b2bfb031054837d73 100644 (file)
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 #
 # This file is processed by the tracetool script during the build.
 #
 #
 # The <format-string> should be a sprintf()-compatible format string.
 
+# cpu.c
+breakpoint_insert(int cpu_index, uint64_t pc, int flags) "cpu=%d pc=0x%" PRIx64 " flags=0x%x"
+breakpoint_remove(int cpu_index, uint64_t pc, int flags) "cpu=%d pc=0x%" PRIx64 " flags=0x%x"
+breakpoint_singlestep(int cpu_index, int enabled) "cpu=%d enable=%d"
+
 # dma-helpers.c
 dma_blk_io(void *dbs, void *bs, int64_t offset, bool to_dev) "dbs=%p bs=%p offset=%" PRId64 " to_dev=%d"
 dma_aio_cancel(void *dbs) "dbs=%p"
@@ -37,38 +42,6 @@ find_ram_offset(uint64_t size, uint64_t offset) "size: 0x%" PRIx64 " @ 0x%" PRIx
 find_ram_offset_loop(uint64_t size, uint64_t candidate, uint64_t offset, uint64_t next, uint64_t mingap) "trying size: 0x%" PRIx64 " @ 0x%" PRIx64 ", offset: 0x%" PRIx64" next: 0x%" PRIx64 " mingap: 0x%" PRIx64
 ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_madvise, bool need_fallocate, int ret) "%s@%p + 0x%zx: madvise: %d fallocate: %d ret: %d"
 
-# accel/tcg/cputlb.c
-memory_notdirty_write_access(uint64_t vaddr, uint64_t ram_addr, unsigned size) "0x%" PRIx64 " ram_addr 0x%" PRIx64 " size %u"
-memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64
-
-# gdbstub.c
-gdbstub_op_start(const char *device) "Starting gdbstub using device %s"
-gdbstub_op_exiting(uint8_t code) "notifying exit with code=0x%02x"
-gdbstub_op_continue(void) "Continuing all CPUs"
-gdbstub_op_continue_cpu(int cpu_index) "Continuing CPU %d"
-gdbstub_op_stepping(int cpu_index) "Stepping CPU %d"
-gdbstub_op_extra_info(const char *info) "Thread extra info: %s"
-gdbstub_hit_watchpoint(const char *type, int cpu_gdb_index, uint64_t vaddr) "Watchpoint hit, type=\"%s\" cpu=%d, vaddr=0x%" PRIx64 ""
-gdbstub_hit_internal_error(void) "RUN_STATE_INTERNAL_ERROR"
-gdbstub_hit_break(void) "RUN_STATE_DEBUG"
-gdbstub_hit_paused(void) "RUN_STATE_PAUSED"
-gdbstub_hit_shutdown(void) "RUN_STATE_SHUTDOWN"
-gdbstub_hit_io_error(void) "RUN_STATE_IO_ERROR"
-gdbstub_hit_watchdog(void) "RUN_STATE_WATCHDOG"
-gdbstub_hit_unknown(int state) "Unknown run state=0x%x"
-gdbstub_io_reply(const char *message) "Sent: %s"
-gdbstub_io_binaryreply(size_t ofs, const char *line) "0x%04zx: %s"
-gdbstub_io_command(const char *command) "Received: %s"
-gdbstub_io_got_ack(void) "Got ACK"
-gdbstub_io_got_unexpected(uint8_t ch) "Got 0x%02x when expecting ACK/NACK"
-gdbstub_err_got_nack(void) "Got NACK, retransmitting"
-gdbstub_err_garbage(uint8_t ch) "received garbage between packets: 0x%02x"
-gdbstub_err_overrun(void) "command buffer overrun, dropping command"
-gdbstub_err_invalid_repeat(uint8_t ch) "got invalid RLE count: 0x%02x"
-gdbstub_err_invalid_rle(void) "got invalid RLE sequence"
-gdbstub_err_checksum_invalid(uint8_t ch) "got invalid command checksum digit: 0x%02x"
-gdbstub_err_checksum_incorrect(uint8_t expected, uint8_t got) "got command packet with incorrect checksum, expected=0x%02x, received=0x%02x"
-
 # job.c
 job_state_transition(void *job,  int ret, const char *legal, const char *s0, const char *s1) "job %p (ret: %d) attempting %s transition (%s-->%s)"
 job_apply_verb(void *job, const char *state, const char *verb, const char *legal) "job %p in state %s; applying verb %s (%s)"
@@ -81,77 +54,3 @@ qmp_job_resume(void *job) "job %p"
 qmp_job_complete(void *job) "job %p"
 qmp_job_finalize(void *job) "job %p"
 qmp_job_dismiss(void *job) "job %p"
-
-
-### Guest events, keep at bottom
-
-
-## vCPU
-
-# trace/control-target.c
-
-# Hot-plug a new virtual (guest) CPU
-#
-# Mode: user, softmmu
-# Targets: all
-vcpu guest_cpu_enter(void)
-
-# trace/control.c
-
-# Hot-unplug a virtual (guest) CPU
-#
-# Mode: user, softmmu
-# Targets: all
-vcpu guest_cpu_exit(void)
-
-# hw/core/cpu.c
-
-# Reset the state of a virtual (guest) CPU
-#
-# Mode: user, softmmu
-# Targets: all
-vcpu guest_cpu_reset(void)
-
-# tcg/tcg-op.c
-
-# @vaddr: Access' virtual address.
-# @info : Access' information (see below).
-#
-# Start virtual memory access (before any potential access violation).
-#
-# Does not include memory accesses performed by devices.
-#
-# Access information can be parsed as:
-#
-# struct mem_info {
-#     uint8_t size_shift : 4; /* interpreted as "1 << size_shift" bytes */
-#     bool    sign_extend: 1; /* sign-extended */
-#     uint8_t endianness : 1; /* 0: little, 1: big */
-#     bool    store      : 1; /* whether it is a store operation */
-#             pad        : 1;
-#     uint8_t mmuidx     : 4; /* mmuidx (softmmu only)  */
-# };
-#
-# Mode: user, softmmu
-# Targets: TCG(all)
-vcpu tcg guest_mem_before(TCGv vaddr, uint16_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
-
-# include/user/syscall-trace.h
-
-# @num: System call number.
-# @arg*: System call argument value.
-#
-# Start executing a guest system call in syscall emulation mode.
-#
-# Mode: user
-# Targets: TCG(all)
-vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, uint64_t arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" arg7=0x%016"PRIx64" arg8=0x%016"PRIx64
-
-# @num: System call number.
-# @ret: System call result value.
-#
-# Finish executing a guest system call in syscall emulation mode.
-#
-# Mode: user
-# Targets: TCG(all)
-vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) "num=0x%016"PRIx64" ret=0x%016"PRIx64
index 8b2b50a7cf39bac3d0d61bb719bfd5a76858c720..8d818d359becfb2bd26648ee01abda5efaeb124f 100644 (file)
@@ -25,16 +25,6 @@ static inline uint32_t trace_event_get_id(TraceEvent *ev)
     return ev->id;
 }
 
-static inline uint32_t trace_event_get_vcpu_id(TraceEvent *ev)
-{
-    return ev->vcpu_id;
-}
-
-static inline bool trace_event_is_vcpu(TraceEvent *ev)
-{
-    return ev->vcpu_id != TRACE_VCPU_EVENT_NONE;
-}
-
 static inline const char * trace_event_get_name(TraceEvent *ev)
 {
     assert(ev != NULL);
index e293eeed7c00120bf6983ca6c54275308920480f..97f21e476d2d7d7d77748669978fe4ca6aa00271 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/lockable.h"
 #include "cpu.h"
 #include "trace/trace-root.h"
 #include "trace/control.h"
@@ -35,115 +36,22 @@ void trace_event_set_state_dynamic_init(TraceEvent *ev, bool state)
 
 void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
 {
-    CPUState *vcpu;
     assert(trace_event_get_state_static(ev));
-    if (trace_event_is_vcpu(ev) && likely(first_cpu != NULL)) {
-        CPU_FOREACH(vcpu) {
-            trace_event_set_vcpu_state_dynamic(vcpu, ev, state);
-        }
-    } else {
-        /*
-         * Without the "vcpu" property, dstate can only be 1 or 0. With it, we
-         * haven't instantiated any vCPU yet, so we will set a global state
-         * instead, and trace_init_vcpu will reconcile it afterwards.
-         */
-        bool state_pre = *ev->dstate;
-        if (state_pre != state) {
-            if (state) {
-                trace_events_enabled_count++;
-                *ev->dstate = 1;
-            } else {
-                trace_events_enabled_count--;
-                *ev->dstate = 0;
-            }
-        }
-    }
-}
 
-static void trace_event_synchronize_vcpu_state_dynamic(
-    CPUState *vcpu, run_on_cpu_data ignored)
-{
-    bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed,
-                CPU_TRACE_DSTATE_MAX_EVENTS);
-    cpu_tb_jmp_cache_clear(vcpu);
-}
-
-void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
-                                        TraceEvent *ev, bool state)
-{
-    uint32_t vcpu_id;
-    bool state_pre;
-    assert(trace_event_get_state_static(ev));
-    assert(trace_event_is_vcpu(ev));
-    vcpu_id = trace_event_get_vcpu_id(ev);
-    state_pre = test_bit(vcpu_id, vcpu->trace_dstate);
+    /*
+     * There is no longer a "vcpu" property, dstate can only be 1 or
+     * 0. With it, we haven't instantiated any vCPU yet, so we will
+     * set a global state instead, and trace_init_vcpu will reconcile
+     * it afterwards.
+     */
+    bool state_pre = *ev->dstate;
     if (state_pre != state) {
         if (state) {
             trace_events_enabled_count++;
-            set_bit(vcpu_id, vcpu->trace_dstate_delayed);
-            (*ev->dstate)++;
+            *ev->dstate = 1;
         } else {
             trace_events_enabled_count--;
-            clear_bit(vcpu_id, vcpu->trace_dstate_delayed);
-            (*ev->dstate)--;
-        }
-        if (vcpu->created) {
-            /*
-             * Delay changes until next TB; we want all TBs to be built from a
-             * single set of dstate values to ensure consistency of generated
-             * tracing code.
-             */
-            async_run_on_cpu(vcpu, trace_event_synchronize_vcpu_state_dynamic,
-                             RUN_ON_CPU_NULL);
-        } else {
-            trace_event_synchronize_vcpu_state_dynamic(vcpu, RUN_ON_CPU_NULL);
-        }
-    }
-}
-
-static bool adding_first_cpu1(void)
-{
-    CPUState *cpu;
-    size_t count = 0;
-    CPU_FOREACH(cpu) {
-        count++;
-        if (count > 1) {
-            return false;
-        }
-    }
-    return true;
-}
-
-static bool adding_first_cpu(void)
-{
-    bool res;
-    cpu_list_lock();
-    res = adding_first_cpu1();
-    cpu_list_unlock();
-    return res;
-}
-
-void trace_init_vcpu(CPUState *vcpu)
-{
-    TraceEventIter iter;
-    TraceEvent *ev;
-    trace_event_iter_init(&iter, NULL);
-    while ((ev = trace_event_iter_next(&iter)) != NULL) {
-        if (trace_event_is_vcpu(ev) &&
-            trace_event_get_state_static(ev) &&
-            trace_event_get_state_dynamic(ev)) {
-            if (adding_first_cpu()) {
-                /* check preconditions */
-                assert(*ev->dstate == 1);
-                /* disable early-init state ... */
-                *ev->dstate = 0;
-                trace_events_enabled_count--;
-                /* ... and properly re-enable */
-                trace_event_set_vcpu_state_dynamic(vcpu, ev, true);
-            } else {
-                trace_event_set_vcpu_state_dynamic(vcpu, ev, true);
-            }
+            *ev->dstate = 0;
         }
     }
-    trace_guest_cpu_enter(vcpu);
 }
diff --git a/trace/control-vcpu.h b/trace/control-vcpu.h
deleted file mode 100644 (file)
index 0f98ebe..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Interface for configuring and controlling the state of tracing events.
- *
- * Copyright (C) 2011-2016 Lluís Vilanova <vilanova@ac.upc.edu>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef TRACE__CONTROL_VCPU_H
-#define TRACE__CONTROL_VCPU_H
-
-#include "control.h"
-#include "event-internal.h"
-#include "hw/core/cpu.h"
-
-/**
- * trace_event_get_vcpu_state:
- * @vcpu: Target vCPU.
- * @id: Event identifier name.
- *
- * Get the tracing state of an event (both static and dynamic) for the given
- * vCPU.
- *
- * If the event has the disabled property, the check will have no performance
- * impact.
- */
-#define trace_event_get_vcpu_state(vcpu, id)                            \
-    ((id ##_ENABLED) &&                                                 \
-     trace_event_get_vcpu_state_dynamic_by_vcpu_id(                     \
-         vcpu, _ ## id ## _EVENT.vcpu_id))
-
-/**
- * trace_event_get_vcpu_state_dynamic:
- *
- * Get the dynamic tracing state of an event for the given vCPU.
- */
-static bool trace_event_get_vcpu_state_dynamic(CPUState *vcpu, TraceEvent *ev);
-
-#include "control-internal.h"
-
-static inline bool
-trace_event_get_vcpu_state_dynamic_by_vcpu_id(CPUState *vcpu,
-                                              uint32_t vcpu_id)
-{
-    /* it's on fast path, avoid consistency checks (asserts) */
-    if (unlikely(trace_events_enabled_count)) {
-        return test_bit(vcpu_id, vcpu->trace_dstate);
-    } else {
-        return false;
-    }
-}
-
-static inline bool trace_event_get_vcpu_state_dynamic(CPUState *vcpu,
-                                                      TraceEvent *ev)
-{
-    uint32_t vcpu_id;
-    assert(trace_event_is_vcpu(ev));
-    vcpu_id = trace_event_get_vcpu_id(ev);
-    return trace_event_get_vcpu_state_dynamic_by_vcpu_id(vcpu, vcpu_id);
-}
-
-#endif
index b82fb87316daed1fe855579fbcc919c5b807511d..ef107829ac00ebfb90c01b2677a27aae33c7e841 100644 (file)
@@ -40,6 +40,7 @@ static size_t nevent_groups;
 static uint32_t next_id;
 static uint32_t next_vcpu_id;
 static bool init_trace_on_startup;
+static char *trace_opts_file;
 
 QemuOptsList qemu_trace_opts = {
     .name = "trace",
@@ -67,20 +68,14 @@ void trace_event_register_group(TraceEvent **events)
     size_t i;
     for (i = 0; events[i] != NULL; i++) {
         events[i]->id = next_id++;
-        if (events[i]->vcpu_id == TRACE_VCPU_EVENT_NONE) {
-            continue;
-        }
-
-        if (likely(next_vcpu_id < CPU_TRACE_DSTATE_MAX_EVENTS)) {
-            events[i]->vcpu_id = next_vcpu_id++;
-        } else {
-            warn_report("too many vcpu trace events; dropping '%s'",
-                        events[i]->name);
-        }
     }
     event_groups = g_renew(TraceEventGroup, event_groups, nevent_groups + 1);
     event_groups[nevent_groups].events = events;
     nevent_groups++;
+
+#ifdef CONFIG_TRACE_SIMPLE
+    st_init_group(nevent_groups - 1);
+#endif
 }
 
 
@@ -90,7 +85,7 @@ TraceEvent *trace_event_name(const char *name)
 
     TraceEventIter iter;
     TraceEvent *ev;
-    trace_event_iter_init(&iter, NULL);
+    trace_event_iter_init_all(&iter);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
         if (strcmp(trace_event_get_name(ev), name) == 0) {
             return ev;
@@ -99,45 +94,64 @@ TraceEvent *trace_event_name(const char *name)
     return NULL;
 }
 
-void trace_event_iter_init(TraceEventIter *iter, const char *pattern)
+void trace_event_iter_init_all(TraceEventIter *iter)
 {
     iter->event = 0;
     iter->group = 0;
+    iter->group_id = -1;
+    iter->pattern = NULL;
+}
+
+void trace_event_iter_init_pattern(TraceEventIter *iter, const char *pattern)
+{
+    trace_event_iter_init_all(iter);
     iter->pattern = pattern;
 }
 
+void trace_event_iter_init_group(TraceEventIter *iter, size_t group_id)
+{
+    trace_event_iter_init_all(iter);
+    iter->group_id = group_id;
+}
+
 TraceEvent *trace_event_iter_next(TraceEventIter *iter)
 {
     while (iter->group < nevent_groups &&
            event_groups[iter->group].events[iter->event] != NULL) {
         TraceEvent *ev = event_groups[iter->group].events[iter->event];
+        size_t group = iter->group;
         iter->event++;
         if (event_groups[iter->group].events[iter->event] == NULL) {
             iter->event = 0;
             iter->group++;
         }
-        if (!iter->pattern ||
-            g_pattern_match_simple(iter->pattern, trace_event_get_name(ev))) {
-            return ev;
+        if (iter->pattern &&
+            !g_pattern_match_simple(iter->pattern, trace_event_get_name(ev))) {
+            continue;
+        }
+        if (iter->group_id != -1 &&
+            iter->group_id != group) {
+            continue;
         }
+        return ev;
     }
 
     return NULL;
 }
 
-void trace_list_events(void)
+void trace_list_events(FILE *f)
 {
     TraceEventIter iter;
     TraceEvent *ev;
-    trace_event_iter_init(&iter, NULL);
+    trace_event_iter_init_all(&iter);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
-        fprintf(stderr, "%s\n", trace_event_get_name(ev));
+        fprintf(f, "%s\n", trace_event_get_name(ev));
     }
 #ifdef CONFIG_TRACE_DTRACE
-    fprintf(stderr, "This list of names of trace points may be incomplete "
-                    "when using the DTrace/SystemTap backends.\n"
-                    "Run 'qemu-trace-stap list %s' to print the full list.\n",
-            error_get_progname());
+    fprintf(f, "This list of names of trace points may be incomplete "
+               "when using the DTrace/SystemTap backends.\n"
+               "Run 'qemu-trace-stap list %s' to print the full list.\n",
+            g_get_prgname());
 #endif
 }
 
@@ -149,7 +163,7 @@ static void do_trace_enable_events(const char *line_buf)
     TraceEvent *ev;
     bool is_pattern = trace_event_is_pattern(line_ptr);
 
-    trace_event_iter_init(&iter, line_ptr);
+    trace_event_iter_init_pattern(&iter, line_ptr);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
         if (!trace_event_get_state_static(ev)) {
             if (!is_pattern) {
@@ -176,7 +190,7 @@ static void do_trace_enable_events(const char *line_buf)
 void trace_enable_events(const char *line_buf)
 {
     if (is_help_option(line_buf)) {
-        trace_list_events();
+        trace_list_events(stdout);
         if (monitor_cur() == NULL) {
             exit(0);
         }
@@ -224,10 +238,8 @@ static void trace_init_events(const char *fname)
 
 void trace_init_file(void)
 {
-    QemuOpts *opts = qemu_find_opts_singleton("trace");
-    const char *file = qemu_opt_get(opts, "file");
 #ifdef CONFIG_TRACE_SIMPLE
-    st_set_trace_file(file);
+    st_set_trace_file(trace_opts_file);
     if (init_trace_on_startup) {
         st_set_trace_file_enabled(true);
     }
@@ -238,11 +250,11 @@ void trace_init_file(void)
      * backend. However we should only override -D if we actually have
      * something to override it with.
      */
-    if (file) {
-        qemu_set_log_filename(file, &error_fatal);
+    if (trace_opts_file) {
+        qemu_set_log_filename(trace_opts_file, &error_fatal);
     }
 #else
-    if (file) {
+    if (trace_opts_file) {
         fprintf(stderr, "error: --trace file=...: "
                 "option not supported by the selected tracing backends\n");
         exit(1);
@@ -250,24 +262,6 @@ void trace_init_file(void)
 #endif
 }
 
-void trace_fini_vcpu(CPUState *vcpu)
-{
-    TraceEventIter iter;
-    TraceEvent *ev;
-
-    trace_guest_cpu_exit(vcpu);
-
-    trace_event_iter_init(&iter, NULL);
-    while ((ev = trace_event_iter_next(&iter)) != NULL) {
-        if (trace_event_is_vcpu(ev) &&
-            trace_event_get_state_static(ev) &&
-            trace_event_get_vcpu_state_dynamic(vcpu, ev)) {
-            /* must disable to affect the global counter */
-            trace_event_set_vcpu_state_dynamic(vcpu, ev, false);
-        }
-    }
-}
-
 bool trace_init_backends(void)
 {
 #ifdef CONFIG_TRACE_SIMPLE
@@ -291,10 +285,10 @@ bool trace_init_backends(void)
     return true;
 }
 
-void trace_opt_parse(const char *optarg)
+void trace_opt_parse(const char *optstr)
 {
     QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("trace"),
-                                             optarg, true);
+                                             optstr, true);
     if (!opts) {
         exit(1);
     }
@@ -303,6 +297,8 @@ void trace_opt_parse(const char *optarg)
     }
     trace_init_events(qemu_opt_get(opts, "events"));
     init_trace_on_startup = true;
+    g_free(trace_opts_file);
+    trace_opts_file = g_strdup(qemu_opt_get(opts, "file"));
     qemu_opts_del(opts);
 }
 
index 05b95ea4532afc77d47e943184400350480e799b..6754bfe052b8a7eb2f93ebc4f7cc556990ac0d51 100644 (file)
 #include "event-internal.h"
 
 typedef struct TraceEventIter {
+    /* iter state */
     size_t event;
     size_t group;
+    /* filter conditions */
+    size_t group_id;
     const char *pattern;
 } TraceEventIter;
 
 
 /**
- * trace_event_iter_init:
+ * trace_event_iter_init_all:
  * @iter: the event iterator struct
- * @pattern: optional pattern to filter events on name
  *
  * Initialize the event iterator struct @iter,
- * optionally using @pattern to filter out events
+ * for all events.
+ */
+void trace_event_iter_init_all(TraceEventIter *iter);
+
+/**
+ * trace_event_iter_init_pattern:
+ * @iter: the event iterator struct
+ * @pattern: pattern to filter events on name
+ *
+ * Initialize the event iterator struct @iter,
+ * using @pattern to filter out events
  * with non-matching names.
  */
-void trace_event_iter_init(TraceEventIter *iter, const char *pattern);
+void trace_event_iter_init_pattern(TraceEventIter *iter, const char *pattern);
+
+/**
+ * trace_event_iter_init_group:
+ * @iter: the event iterator struct
+ * @group_id: group_id to filter events by group.
+ *
+ * Initialize the event iterator struct @iter,
+ * using @group_id to filter for events in the group.
+ */
+void trace_event_iter_init_group(TraceEventIter *iter, size_t group_id);
 
 /**
  * trace_event_iter_next:
@@ -67,23 +89,6 @@ static bool trace_event_is_pattern(const char *str);
  */
 static uint32_t trace_event_get_id(TraceEvent *ev);
 
-/**
- * trace_event_get_vcpu_id:
- *
- * Get the per-vCPU identifier of an event.
- *
- * Special value #TRACE_VCPU_EVENT_NONE means the event is not vCPU-specific
- * (does not have the "vcpu" property).
- */
-static uint32_t trace_event_get_vcpu_id(TraceEvent *ev);
-
-/**
- * trace_event_is_vcpu:
- *
- * Whether this is a per-vCPU event.
- */
-static bool trace_event_is_vcpu(TraceEvent *ev);
-
 /**
  * trace_event_get_name:
  *
@@ -150,21 +155,6 @@ static bool trace_event_get_state_dynamic(TraceEvent *ev);
  */
 void trace_event_set_state_dynamic(TraceEvent *ev, bool state);
 
-/**
- * trace_event_set_vcpu_state_dynamic:
- *
- * Set the dynamic tracing state of an event for the given vCPU.
- *
- * Pre-condition: trace_event_get_vcpu_state_static(ev) == true
- *
- * Note: Changes for execution-time events with the 'tcg' property will not be
- *       propagated until the next TB is executed (iff executing in TCG mode).
- */
-void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
-                                        TraceEvent *ev, bool state);
-
-
-
 /**
  * trace_init_backends:
  *
@@ -183,28 +173,13 @@ bool trace_init_backends(void);
  */
 void trace_init_file(void);
 
-/**
- * trace_init_vcpu:
- * @vcpu: Added vCPU.
- *
- * Set initial dynamic event state for a hot-plugged vCPU.
- */
-void trace_init_vcpu(CPUState *vcpu);
-
-/**
- * trace_fini_vcpu:
- * @vcpu: Removed vCPU.
- *
- * Disable dynamic event state for a hot-unplugged vCPU.
- */
-void trace_fini_vcpu(CPUState *vcpu);
-
 /**
  * trace_list_events:
+ * @f: Where to send output.
  *
  * List all available events.
  */
-void trace_list_events(void);
+void trace_list_events(FILE *f);
 
 /**
  * trace_enable_events:
@@ -222,11 +197,11 @@ extern QemuOptsList qemu_trace_opts;
 
 /**
  * trace_opt_parse:
- * @optarg: A string argument of --trace command line argument
+ * @optstr: A string argument of --trace command line argument
  *
  * Initialize tracing subsystem.
  */
-void trace_opt_parse(const char *optarg);
+void trace_opt_parse(const char *optstr);
 
 /**
  * trace_get_vcpu_event_count:
index f63500b37e6e4fefda4e97eac811f935ae40e3fb..0c24e01b521b1a4017dc4ea5c9292196e6b56b7a 100644 (file)
@@ -19,7 +19,6 @@
 /**
  * TraceEvent:
  * @id: Unique event identifier.
- * @vcpu_id: Unique per-vCPU event identifier.
  * @name: Event name.
  * @sstate: Static tracing state.
  * @dstate: Dynamic tracing state
@@ -33,7 +32,6 @@
  */
 typedef struct TraceEvent {
     uint32_t id;
-    uint32_t vcpu_id;
     const char * name;
     const bool sstate;
     uint16_t *dstate;
diff --git a/trace/mem-internal.h b/trace/mem-internal.h
deleted file mode 100644 (file)
index 8b72b67..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Helper functions for guest memory tracing
- *
- * Copyright (C) 2016 Lluís Vilanova <vilanova@ac.upc.edu>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef TRACE__MEM_INTERNAL_H
-#define TRACE__MEM_INTERNAL_H
-
-#define TRACE_MEM_SZ_SHIFT_MASK 0xf /* size shift mask */
-#define TRACE_MEM_SE (1ULL << 4)    /* sign extended (y/n) */
-#define TRACE_MEM_BE (1ULL << 5)    /* big endian (y/n) */
-#define TRACE_MEM_ST (1ULL << 6)    /* store (y/n) */
-#define TRACE_MEM_MMU_SHIFT 8       /* mmu idx */
-
-static inline uint16_t trace_mem_build_info(
-    int size_shift, bool sign_extend, MemOp endianness,
-    bool store, unsigned int mmu_idx)
-{
-    uint16_t res;
-
-    res = size_shift & TRACE_MEM_SZ_SHIFT_MASK;
-    if (sign_extend) {
-        res |= TRACE_MEM_SE;
-    }
-    if (endianness == MO_BE) {
-        res |= TRACE_MEM_BE;
-    }
-    if (store) {
-        res |= TRACE_MEM_ST;
-    }
-#ifdef CONFIG_SOFTMMU
-    res |= mmu_idx << TRACE_MEM_MMU_SHIFT;
-#endif
-    return res;
-}
-
-static inline uint16_t trace_mem_get_info(MemOp op,
-                                          unsigned int mmu_idx,
-                                          bool store)
-{
-    return trace_mem_build_info(op & MO_SIZE, !!(op & MO_SIGN),
-                                op & MO_BSWAP, store,
-                                mmu_idx);
-}
-
-#endif /* TRACE__MEM_INTERNAL_H */
diff --git a/trace/mem.h b/trace/mem.h
deleted file mode 100644 (file)
index 9644f59..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Helper functions for guest memory tracing
- *
- * Copyright (C) 2016 Lluís Vilanova <vilanova@ac.upc.edu>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef TRACE__MEM_H
-#define TRACE__MEM_H
-
-#include "tcg/tcg.h"
-
-
-/**
- * trace_mem_get_info:
- *
- * Return a value for the 'info' argument in guest memory access traces.
- */
-static uint16_t trace_mem_get_info(MemOp op, unsigned int mmu_idx, bool store);
-
-/**
- * trace_mem_build_info:
- *
- * Return a value for the 'info' argument in guest memory access traces.
- */
-static uint16_t trace_mem_build_info(int size_shift, bool sign_extend,
-                                     MemOp endianness, bool store,
-                                     unsigned int mmuidx);
-
-
-#include "trace/mem-internal.h"
-
-#endif /* TRACE__MEM_H */
index 843ea1449540a580d233c857d9b2c7e863705424..b0d31a67e6864505a9593dcecd601f58e986c5b7 100644 (file)
@@ -1,50 +1,57 @@
+system_ss.add(files('trace-hmp-cmds.c'))
+
 specific_ss.add(files('control-target.c'))
 
 trace_events_files = []
-foreach dir : [ '.' ] + trace_events_subdirs
-  trace_events_file = meson.source_root() / dir / 'trace-events'
+foreach item : [ '.' ] + trace_events_subdirs + qapi_trace_events
+  if item in qapi_trace_events
+    trace_events_file = item
+    group_name = item.full_path().split('/')[-1].underscorify()
+  else
+    trace_events_file = meson.project_source_root() / item / 'trace-events'
+    group_name = item == '.' ? 'root' : item.underscorify()
+  endif
   trace_events_files += [ trace_events_file ]
-  group_name = dir == '.' ? 'root' : dir.underscorify()
   group = '--group=' + group_name
   fmt = '@0@-' + group_name + '.@1@'
 
   trace_h = custom_target(fmt.format('trace', 'h'),
                           output: fmt.format('trace', 'h'),
                           input: trace_events_file,
-                          command: [ tracetool, group, '--format=h', '@INPUT@' ],
-                          capture: true)
+                          command: [ tracetool, group, '--format=h', '@INPUT@', '@OUTPUT@' ],
+                          depend_files: tracetool_depends)
   genh += trace_h
   trace_c = custom_target(fmt.format('trace', 'c'),
                           output: fmt.format('trace', 'c'),
                           input: trace_events_file,
-                          command: [ tracetool, group, '--format=c', '@INPUT@' ],
-                          capture: true)
-  if 'CONFIG_TRACE_UST' in config_host
+                          command: [ tracetool, group, '--format=c', '@INPUT@', '@OUTPUT@' ],
+                          depend_files: tracetool_depends)
+  if 'ust' in get_option('trace_backends')
     trace_ust_h = custom_target(fmt.format('trace-ust', 'h'),
                                 output: fmt.format('trace-ust', 'h'),
                                 input: trace_events_file,
-                                command: [ tracetool, group, '--format=ust-events-h', '@INPUT@' ],
-                                capture: true)
-    trace_ss.add(trace_ust_h, lttng, urcubp)
+                                command: [ tracetool, group, '--format=ust-events-h', '@INPUT@', '@OUTPUT@' ],
+                                depend_files: tracetool_depends)
+    trace_ss.add(trace_ust_h, lttng)
     genh += trace_ust_h
   endif
   trace_ss.add(trace_h, trace_c)
-  if 'CONFIG_TRACE_DTRACE' in config_host
+  if 'dtrace' in get_option('trace_backends')
     trace_dtrace = custom_target(fmt.format('trace-dtrace', 'dtrace'),
                                  output: fmt.format('trace-dtrace', 'dtrace'),
                                  input: trace_events_file,
-                                 command: [ tracetool, group, '--format=d', '@INPUT@' ],
-                                 capture: true)
+                                 command: [ tracetool, group, '--format=d', '@INPUT@', '@OUTPUT@' ],
+                                 depend_files: tracetool_depends)
     trace_dtrace_h = custom_target(fmt.format('trace-dtrace', 'h'),
                                    output: fmt.format('trace-dtrace', 'h'),
                                    input: trace_dtrace,
-                                   command: [ 'dtrace', '-DSTAP_SDT_V2', '-o', '@OUTPUT@', '-h', '-s', '@INPUT@' ])
+                                   command: [ dtrace, '-DSTAP_SDT_V2', '-o', '@OUTPUT@', '-h', '-s', '@INPUT@' ])
     trace_ss.add(trace_dtrace_h)
     if host_machine.system() != 'darwin'
       trace_dtrace_o = custom_target(fmt.format('trace-dtrace', 'o'),
                                      output: fmt.format('trace-dtrace', 'o'),
                                      input: trace_dtrace,
-                                     command: [ 'dtrace', '-DSTAP_SDT_V2', '-o', '@OUTPUT@', '-G', '-s', '@INPUT@' ])
+                                     command: [ dtrace, '-DSTAP_SDT_V2', '-o', '@OUTPUT@', '-G', '-s', '@INPUT@' ])
       trace_ss.add(trace_dtrace_o)
     endif
 
@@ -60,36 +67,28 @@ trace_events_all = custom_target('trace-events-all',
                                  install: true,
                                  install_dir: qemu_datadir)
 
-foreach d : [
-  ['generated-tcg-tracers.h', 'tcg-h'],
-  ['generated-helpers.c', 'tcg-helper-c'],
-  ['generated-helpers.h', 'tcg-helper-h'],
-  ['generated-helpers-wrappers.h', 'tcg-helper-wrapper-h'],
-]
-  gen = custom_target(d[0],
-                output: d[0],
-                input: meson.source_root() / 'trace-events',
-                command: [ tracetool, '--group=root', '--format=@0@'.format(d[1]), '@INPUT@' ],
-                capture: true)
-  specific_ss.add(gen)
-endforeach
-
-if 'CONFIG_TRACE_UST' in config_host
+if 'ust' in get_option('trace_backends')
   trace_ust_all_h = custom_target('trace-ust-all.h',
                                   output: 'trace-ust-all.h',
                                   input: trace_events_files,
-                                  command: [ tracetool, '--group=all', '--format=ust-events-h', '@INPUT@' ],
-                                  capture: true)
+                                  command: [ tracetool, '--group=all', '--format=ust-events-h', '@INPUT@', '@OUTPUT@' ],
+                                  depend_files: tracetool_depends)
   trace_ust_all_c = custom_target('trace-ust-all.c',
                                   output: 'trace-ust-all.c',
                                   input: trace_events_files,
-                                  command: [ tracetool, '--group=all', '--format=ust-events-c', '@INPUT@' ],
-                                  capture: true)
+                                  command: [ tracetool, '--group=all', '--format=ust-events-c', '@INPUT@', '@OUTPUT@' ],
+                                  depend_files: tracetool_depends)
   trace_ss.add(trace_ust_all_h, trace_ust_all_c)
   genh += trace_ust_all_h
 endif
 
-trace_ss.add(when: 'CONFIG_TRACE_SIMPLE', if_true: files('simple.c'))
-trace_ss.add(when: 'CONFIG_TRACE_FTRACE', if_true: files('ftrace.c'))
+if 'simple' in get_option('trace_backends')
+  trace_ss.add(files('simple.c'))
+endif
+if 'ftrace' in get_option('trace_backends')
+  trace_ss.add(files('ftrace.c'))
+endif
 trace_ss.add(files('control.c'))
-trace_ss.add(files('qmp.c'))
+if have_system or have_tools or have_ga
+  trace_ss.add(files('qmp.c'))
+endif
index 38246e1aa6926a23f340b2ff99f701da44fb3c47..3e3971c6a8778779f88b01e39a756b3b99404332 100644 (file)
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-trace.h"
-#include "control-vcpu.h"
+#include "control.h"
 
 
-static CPUState *get_cpu(bool has_vcpu, int vcpu, Error **errp)
-{
-    if (has_vcpu) {
-        CPUState *cpu = qemu_get_cpu(vcpu);
-        if (cpu == NULL) {
-            error_setg(errp, "invalid vCPU index %u", vcpu);
-        }
-        return cpu;
-    } else {
-        return NULL;
-    }
-}
-
-static bool check_events(bool has_vcpu, bool ignore_unavailable, bool is_pattern,
+static bool check_events(bool ignore_unavailable, bool is_pattern,
                          const char *name, Error **errp)
 {
     if (!is_pattern) {
@@ -38,12 +25,6 @@ static bool check_events(bool has_vcpu, bool ignore_unavailable, bool is_pattern
             return false;
         }
 
-        /* error for non-vcpu event */
-        if (has_vcpu && !trace_event_is_vcpu(ev)) {
-            error_setg(errp, "event \"%s\" is not vCPU-specific", name);
-            return false;
-        }
-
         /* error for unavailable event */
         if (!ignore_unavailable && !trace_event_get_state_static(ev)) {
             error_setg(errp, "event \"%s\" is disabled", name);
@@ -55,7 +36,7 @@ static bool check_events(bool has_vcpu, bool ignore_unavailable, bool is_pattern
         /* error for unavailable events */
         TraceEventIter iter;
         TraceEvent *ev;
-        trace_event_iter_init(&iter, name);
+        trace_event_iter_init_pattern(&iter, name);
         while ((ev = trace_event_iter_next(&iter)) != NULL) {
             if (!ignore_unavailable && !trace_event_get_state_static(ev)) {
                 error_setg(errp, "event \"%s\" is disabled", trace_event_get_name(ev));
@@ -70,61 +51,34 @@ TraceEventInfoList *qmp_trace_event_get_state(const char *name,
                                               bool has_vcpu, int64_t vcpu,
                                               Error **errp)
 {
-    Error *err = NULL;
     TraceEventInfoList *events = NULL;
     TraceEventIter iter;
     TraceEvent *ev;
     bool is_pattern = trace_event_is_pattern(name);
-    CPUState *cpu;
-
-    /* Check provided vcpu */
-    cpu = get_cpu(has_vcpu, vcpu, &err);
-    if (err) {
-        error_propagate(errp, err);
-        return NULL;
-    }
 
     /* Check events */
-    if (!check_events(has_vcpu, true, is_pattern, name, errp)) {
+    if (!check_events(true, is_pattern, name, errp)) {
         return NULL;
     }
 
     /* Get states (all errors checked above) */
-    trace_event_iter_init(&iter, name);
+    trace_event_iter_init_pattern(&iter, name);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
-        TraceEventInfoList *elem;
-        bool is_vcpu = trace_event_is_vcpu(ev);
-        if (has_vcpu && !is_vcpu) {
-            continue;
-        }
+        TraceEventInfo *value;
 
-        elem = g_new(TraceEventInfoList, 1);
-        elem->value = g_new(TraceEventInfo, 1);
-        elem->value->vcpu = is_vcpu;
-        elem->value->name = g_strdup(trace_event_get_name(ev));
+        value = g_new(TraceEventInfo, 1);
+        value->name = g_strdup(trace_event_get_name(ev));
 
         if (!trace_event_get_state_static(ev)) {
-            elem->value->state = TRACE_EVENT_STATE_UNAVAILABLE;
+            value->state = TRACE_EVENT_STATE_UNAVAILABLE;
         } else {
-            if (has_vcpu) {
-                if (is_vcpu) {
-                    if (trace_event_get_vcpu_state_dynamic(cpu, ev)) {
-                        elem->value->state = TRACE_EVENT_STATE_ENABLED;
-                    } else {
-                        elem->value->state = TRACE_EVENT_STATE_DISABLED;
-                    }
-                }
-                /* else: already skipped above */
+            if (trace_event_get_state_dynamic(ev)) {
+                value->state = TRACE_EVENT_STATE_ENABLED;
             } else {
-                if (trace_event_get_state_dynamic(ev)) {
-                    elem->value->state = TRACE_EVENT_STATE_ENABLED;
-                } else {
-                    elem->value->state = TRACE_EVENT_STATE_DISABLED;
-                }
+                value->state = TRACE_EVENT_STATE_DISABLED;
             }
         }
-        elem->next = events;
-        events = elem;
+        QAPI_LIST_PREPEND(events, value);
     }
 
     return events;
@@ -135,36 +89,22 @@ void qmp_trace_event_set_state(const char *name, bool enable,
                                bool has_vcpu, int64_t vcpu,
                                Error **errp)
 {
-    Error *err = NULL;
     TraceEventIter iter;
     TraceEvent *ev;
     bool is_pattern = trace_event_is_pattern(name);
-    CPUState *cpu;
-
-    /* Check provided vcpu */
-    cpu = get_cpu(has_vcpu, vcpu, &err);
-    if (err) {
-        error_propagate(errp, err);
-        return;
-    }
 
     /* Check events */
-    if (!check_events(has_vcpu, has_ignore_unavailable && ignore_unavailable,
+    if (!check_events(has_ignore_unavailable && ignore_unavailable,
                       is_pattern, name, errp)) {
         return;
     }
 
     /* Apply changes (all errors checked above) */
-    trace_event_iter_init(&iter, name);
+    trace_event_iter_init_pattern(&iter, name);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
-        if (!trace_event_get_state_static(ev) ||
-            (has_vcpu && !trace_event_is_vcpu(ev))) {
+        if (!trace_event_get_state_static(ev)) {
             continue;
         }
-        if (has_vcpu) {
-            trace_event_set_vcpu_state_dynamic(cpu, ev, enable);
-        } else {
-            trace_event_set_state_dynamic(ev, enable);
-        }
+        trace_event_set_state_dynamic(ev, enable);
     }
 }
index 9cd2ed1fb3f4c90c2af4885c2a98c1bd31e8492c..18af590cf7b1c8af556a3280984e1b807a0c196b 100644 (file)
@@ -280,14 +280,12 @@ void trace_record_finish(TraceBufferRecord *rec)
     }
 }
 
-static int st_write_event_mapping(void)
+static int st_write_event_mapping(TraceEventIter *iter)
 {
     uint64_t type = TRACE_RECORD_TYPE_MAPPING;
-    TraceEventIter iter;
     TraceEvent *ev;
 
-    trace_event_iter_init(&iter, NULL);
-    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+    while ((ev = trace_event_iter_next(iter)) != NULL) {
         uint64_t id = trace_event_get_id(ev);
         const char *name = trace_event_get_name(ev);
         uint32_t len = strlen(name);
@@ -309,6 +307,7 @@ static int st_write_event_mapping(void)
  */
 bool st_set_trace_file_enabled(bool enable)
 {
+    TraceEventIter iter;
     bool was_enabled = trace_fp;
 
     if (enable == !!trace_fp) {
@@ -333,8 +332,9 @@ bool st_set_trace_file_enabled(bool enable)
             return was_enabled;
         }
 
+        trace_event_iter_init_all(&iter);
         if (fwrite(&header, sizeof header, 1, trace_fp) != 1 ||
-            st_write_event_mapping() < 0) {
+            st_write_event_mapping(&iter) < 0) {
             fclose(trace_fp);
             trace_fp = NULL;
             return was_enabled;
@@ -364,7 +364,7 @@ void st_set_trace_file(const char *file)
 
     if (!file) {
         /* Type cast needed for Windows where getpid() returns an int. */
-        trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, (pid_t)getpid());
+        trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE "-" FMT_pid, (pid_t)getpid());
     } else {
         trace_file_name = g_strdup_printf("%s", file);
     }
@@ -422,3 +422,15 @@ bool st_init(void)
     atexit(st_flush_trace_buffer);
     return true;
 }
+
+void st_init_group(size_t group)
+{
+    TraceEventIter iter;
+
+    if (!trace_writeout_enabled) {
+        return;
+    }
+
+    trace_event_iter_init_group(&iter, group);
+    st_write_event_mapping(&iter);
+}
index 26ccbc8b8ae353244eb1fe5351cd6d8002b36a28..ee1983ce5617a9c8b09f66d8ca7d83ed3a155a57 100644 (file)
@@ -15,6 +15,7 @@ void st_print_trace_file_status(void);
 bool st_set_trace_file_enabled(bool enable);
 void st_set_trace_file(const char *file);
 bool st_init(void);
+void st_init_group(size_t group);
 void st_flush_trace_buffer(void);
 
 typedef struct {
diff --git a/trace/trace-hmp-cmds.c b/trace/trace-hmp-cmds.c
new file mode 100644 (file)
index 0000000..86211fc
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * HMP commands related to tracing
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-trace.h"
+#include "qapi/qmp/qdict.h"
+#include "trace/control.h"
+#ifdef CONFIG_TRACE_SIMPLE
+#include "trace/simple.h"
+#endif
+
+void hmp_trace_event(Monitor *mon, const QDict *qdict)
+{
+    const char *tp_name = qdict_get_str(qdict, "name");
+    bool new_state = qdict_get_bool(qdict, "option");
+    Error *local_err = NULL;
+
+    qmp_trace_event_set_state(tp_name, new_state,
+                              true, true, false, 0, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+}
+
+#ifdef CONFIG_TRACE_SIMPLE
+void hmp_trace_file(Monitor *mon, const QDict *qdict)
+{
+    const char *op = qdict_get_try_str(qdict, "op");
+    const char *arg = qdict_get_try_str(qdict, "arg");
+
+    if (!op) {
+        st_print_trace_file_status();
+    } else if (!strcmp(op, "on")) {
+        st_set_trace_file_enabled(true);
+    } else if (!strcmp(op, "off")) {
+        st_set_trace_file_enabled(false);
+    } else if (!strcmp(op, "flush")) {
+        st_flush_trace_buffer();
+    } else if (!strcmp(op, "set")) {
+        if (arg) {
+            st_set_trace_file(arg);
+        }
+    } else {
+        monitor_printf(mon, "unexpected argument \"%s\"\n", op);
+        hmp_help_cmd(mon, "trace-file");
+    }
+}
+#endif
+
+void hmp_info_trace_events(Monitor *mon, const QDict *qdict)
+{
+    const char *name = qdict_get_try_str(qdict, "name");
+    TraceEventInfoList *events;
+    TraceEventInfoList *elem;
+    Error *local_err = NULL;
+
+    if (name == NULL) {
+        name = "*";
+    }
+
+    events = qmp_trace_event_get_state(name, false, 0, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return;
+    }
+
+    for (elem = events; elem != NULL; elem = elem->next) {
+        monitor_printf(mon, "%s : state %u\n",
+                       elem->value->name,
+                       elem->value->state == TRACE_EVENT_STATE_ENABLED ? 1 : 0);
+    }
+    qapi_free_TraceEventInfoList(events);
+}
+
+void info_trace_events_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    size_t len;
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    if (nb_args == 2) {
+        TraceEventIter iter;
+        TraceEvent *ev;
+        char *pattern = g_strdup_printf("%s*", str);
+        trace_event_iter_init_pattern(&iter, pattern);
+        while ((ev = trace_event_iter_next(&iter)) != NULL) {
+            readline_add_completion(rs, trace_event_get_name(ev));
+        }
+        g_free(pattern);
+    }
+}
+
+void trace_event_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    size_t len;
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    if (nb_args == 2) {
+        TraceEventIter iter;
+        TraceEvent *ev;
+        char *pattern = g_strdup_printf("%s*", str);
+        trace_event_iter_init_pattern(&iter, pattern);
+        while ((ev = trace_event_iter_next(&iter)) != NULL) {
+            readline_add_completion(rs, trace_event_get_name(ev));
+        }
+        g_free(pattern);
+    } else if (nb_args == 3) {
+        readline_add_completion_of(rs, str, "on");
+        readline_add_completion_of(rs, str, "off");
+    }
+}
index 30f5354b1e974259e92e10133ae0a377371d34dd..7f2c99729d4475640b7c6b21977123a8e6f97bae 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "qemu/osdep.h"
 #include "block/block.h"
+#include "block/thread-pool.h"
 #include "qemu/main-loop.h"
 #include "qemu/rcu.h"
 #include "qemu/rcu_queue.h"
@@ -40,6 +41,14 @@ void aio_add_ready_handler(AioHandlerList *ready_list,
     QLIST_INSERT_HEAD(ready_list, node, node_ready);
 }
 
+static void aio_add_poll_ready_handler(AioHandlerList *ready_list,
+                                       AioHandler *node)
+{
+    QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
+    node->poll_ready = true;
+    QLIST_INSERT_HEAD(ready_list, node, node_ready);
+}
+
 static AioHandler *find_aio_handler(AioContext *ctx, int fd)
 {
     AioHandler *node;
@@ -67,6 +76,7 @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
     }
 
     node->pfd.revents = 0;
+    node->poll_ready = false;
 
     /* If the fd monitor has already marked it deleted, leave it alone */
     if (QLIST_IS_INSERTED(node, node_deleted)) {
@@ -89,10 +99,10 @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
 
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
-                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         AioPollFn *io_poll,
+                        IOHandler *io_poll_ready,
                         void *opaque)
 {
     AioHandler *node;
@@ -101,6 +111,10 @@ void aio_set_fd_handler(AioContext *ctx,
     bool deleted = false;
     int poll_disable_change;
 
+    if (io_poll && !io_poll_ready) {
+        io_poll = NULL; /* polling only makes sense if there is a handler */
+    }
+
     qemu_lockcnt_lock(&ctx->list_lock);
 
     node = find_aio_handler(ctx, fd);
@@ -127,8 +141,8 @@ void aio_set_fd_handler(AioContext *ctx,
         new_node->io_read = io_read;
         new_node->io_write = io_write;
         new_node->io_poll = io_poll;
+        new_node->io_poll_ready = io_poll_ready;
         new_node->opaque = opaque;
-        new_node->is_external = is_external;
 
         if (is_new) {
             new_node->pfd.fd = fd;
@@ -164,9 +178,9 @@ void aio_set_fd_handler(AioContext *ctx,
     }
 }
 
-void aio_set_fd_poll(AioContext *ctx, int fd,
-                     IOHandler *io_poll_begin,
-                     IOHandler *io_poll_end)
+static void aio_set_fd_poll(AioContext *ctx, int fd,
+                            IOHandler *io_poll_begin,
+                            IOHandler *io_poll_end)
 {
     AioHandler *node = find_aio_handler(ctx, fd);
 
@@ -180,12 +194,13 @@ void aio_set_fd_poll(AioContext *ctx, int fd,
 
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *notifier,
-                            bool is_external,
                             EventNotifierHandler *io_read,
-                            AioPollFn *io_poll)
+                            AioPollFn *io_poll,
+                            EventNotifierHandler *io_poll_ready)
 {
-    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
-                       (IOHandler *)io_read, NULL, io_poll, notifier);
+    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
+                       (IOHandler *)io_read, NULL, io_poll,
+                       (IOHandler *)io_poll_ready, notifier);
 }
 
 void aio_set_event_notifier_poll(AioContext *ctx,
@@ -198,7 +213,8 @@ void aio_set_event_notifier_poll(AioContext *ctx,
                     (IOHandler *)io_poll_end);
 }
 
-static bool poll_set_started(AioContext *ctx, bool started)
+static bool poll_set_started(AioContext *ctx, AioHandlerList *ready_list,
+                             bool started)
 {
     AioHandler *node;
     bool progress = false;
@@ -228,8 +244,9 @@ static bool poll_set_started(AioContext *ctx, bool started)
         }
 
         /* Poll one last time in case ->io_poll_end() raced with the event */
-        if (!started) {
-            progress = node->io_poll(node->opaque) || progress;
+        if (!started && node->io_poll(node->opaque)) {
+            aio_add_poll_ready_handler(ready_list, node);
+            progress = true;
         }
     }
     qemu_lockcnt_dec(&ctx->list_lock);
@@ -240,8 +257,11 @@ static bool poll_set_started(AioContext *ctx, bool started)
 
 bool aio_prepare(AioContext *ctx)
 {
+    AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
+
     /* Poll mode cannot be used with glib's event loop, disable it. */
-    poll_set_started(ctx, false);
+    poll_set_started(ctx, &ready_list, false);
+    /* TODO what to do with this list? */
 
     return false;
 }
@@ -260,14 +280,13 @@ bool aio_pending(AioContext *ctx)
     QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
         int revents;
 
+        /* TODO should this check poll ready? */
         revents = node->pfd.revents & node->pfd.events;
-        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
-            aio_node_check(ctx, node->is_external)) {
+        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
             result = true;
             break;
         }
-        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
-            aio_node_check(ctx, node->is_external)) {
+        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
             result = true;
             break;
         }
@@ -301,11 +320,15 @@ static void aio_free_deleted_handlers(AioContext *ctx)
 static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
 {
     bool progress = false;
+    bool poll_ready;
     int revents;
 
     revents = node->pfd.revents & node->pfd.events;
     node->pfd.revents = 0;
 
+    poll_ready = node->poll_ready;
+    node->poll_ready = false;
+
     /*
      * Start polling AioHandlers when they become ready because activity is
      * likely to continue.  Note that starvation is theoretically possible when
@@ -321,10 +344,30 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
         }
         QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
     }
+    if (!QLIST_IS_INSERTED(node, node_deleted) &&
+        poll_ready && revents == 0 && node->io_poll_ready) {
+        /*
+         * Remove temporarily to avoid infinite loops when ->io_poll_ready()
+         * calls aio_poll() before clearing the condition that made the poll
+         * handler become ready.
+         */
+        QLIST_SAFE_REMOVE(node, node_poll);
+
+        node->io_poll_ready(node->opaque);
+
+        if (!QLIST_IS_INSERTED(node, node_poll)) {
+            QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
+        }
+
+        /*
+         * Return early since revents was zero. aio_notify() does not count as
+         * progress.
+         */
+        return node->opaque != &ctx->notifier;
+    }
 
     if (!QLIST_IS_INSERTED(node, node_deleted) &&
         (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
-        aio_node_check(ctx, node->is_external) &&
         node->io_read) {
         node->io_read(node->opaque);
 
@@ -335,7 +378,6 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
     }
     if (!QLIST_IS_INSERTED(node, node_deleted) &&
         (revents & (G_IO_OUT | G_IO_ERR)) &&
-        aio_node_check(ctx, node->is_external) &&
         node->io_write) {
         node->io_write(node->opaque);
         progress = true;
@@ -387,6 +429,7 @@ void aio_dispatch(AioContext *ctx)
 }
 
 static bool run_poll_handlers_once(AioContext *ctx,
+                                   AioHandlerList *ready_list,
                                    int64_t now,
                                    int64_t *timeout)
 {
@@ -395,8 +438,9 @@ static bool run_poll_handlers_once(AioContext *ctx,
     AioHandler *tmp;
 
     QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
-        if (aio_node_check(ctx, node->is_external) &&
-            node->io_poll(node->opaque)) {
+        if (node->io_poll(node->opaque)) {
+            aio_add_poll_ready_handler(ready_list, node);
+
             node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
 
             /*
@@ -420,7 +464,9 @@ static bool fdmon_supports_polling(AioContext *ctx)
     return ctx->fdmon_ops->need_wait != aio_poll_disabled;
 }
 
-static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now)
+static bool remove_idle_poll_handlers(AioContext *ctx,
+                                      AioHandlerList *ready_list,
+                                      int64_t now)
 {
     AioHandler *node;
     AioHandler *tmp;
@@ -451,7 +497,10 @@ static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now)
                  * Nevermind about re-adding the handler in the rare case where
                  * this causes progress.
                  */
-                progress = node->io_poll(node->opaque) || progress;
+                if (node->io_poll(node->opaque)) {
+                    aio_add_poll_ready_handler(ready_list, node);
+                    progress = true;
+                }
             }
         }
     }
@@ -461,6 +510,7 @@ static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now)
 
 /* run_poll_handlers:
  * @ctx: the AioContext
+ * @ready_list: the list to place ready handlers on
  * @max_ns: maximum time to poll for, in nanoseconds
  *
  * Polls for a given time.
@@ -469,7 +519,8 @@ static bool remove_idle_poll_handlers(AioContext *ctx, int64_t now)
  *
  * Returns: true if progress was made, false otherwise
  */
-static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
+static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list,
+                              int64_t max_ns, int64_t *timeout)
 {
     bool progress;
     int64_t start_time, elapsed_time;
@@ -490,13 +541,15 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
 
     start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     do {
-        progress = run_poll_handlers_once(ctx, start_time, timeout);
+        progress = run_poll_handlers_once(ctx, ready_list,
+                                          start_time, timeout);
         elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
         max_ns = qemu_soonest_timeout(*timeout, max_ns);
         assert(!(max_ns && progress));
     } while (elapsed_time < max_ns && !ctx->fdmon_ops->need_wait(ctx));
 
-    if (remove_idle_poll_handlers(ctx, start_time + elapsed_time)) {
+    if (remove_idle_poll_handlers(ctx, ready_list,
+                                  start_time + elapsed_time)) {
         *timeout = 0;
         progress = true;
     }
@@ -514,6 +567,7 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
 
 /* try_poll_mode:
  * @ctx: the AioContext
+ * @ready_list: list to add handlers that need to be run
  * @timeout: timeout for blocking wait, computed by the caller and updated if
  *    polling succeeds.
  *
@@ -521,7 +575,8 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
  *
  * Returns: true if progress was made, false otherwise
  */
-static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
+static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
+                          int64_t *timeout)
 {
     int64_t max_ns;
 
@@ -531,25 +586,22 @@ static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
 
     max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
     if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
-        poll_set_started(ctx, true);
+        /*
+         * Enable poll mode. It pairs with the poll_set_started() in
+         * aio_poll() which disables poll mode.
+         */
+        poll_set_started(ctx, ready_list, true);
 
-        if (run_poll_handlers(ctx, max_ns, timeout)) {
+        if (run_poll_handlers(ctx, ready_list, max_ns, timeout)) {
             return true;
         }
     }
-
-    if (poll_set_started(ctx, false)) {
-        *timeout = 0;
-        return true;
-    }
-
     return false;
 }
 
 bool aio_poll(AioContext *ctx, bool blocking)
 {
     AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
-    int ret = 0;
     bool progress;
     bool use_notify_me;
     int64_t timeout;
@@ -574,7 +626,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     }
 
     timeout = blocking ? aio_compute_timeout(ctx) : 0;
-    progress = try_poll_mode(ctx, &timeout);
+    progress = try_poll_mode(ctx, &ready_list, &timeout);
     assert(!(timeout && progress));
 
     /*
@@ -604,7 +656,18 @@ bool aio_poll(AioContext *ctx, bool blocking)
      * system call---a single round of run_poll_handlers_once suffices.
      */
     if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
-        ret = ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
+        /*
+         * Disable poll mode. poll mode should be disabled before the call
+         * of ctx->fdmon_ops->wait() so that guest's notification can wake
+         * up IO threads when some work becomes pending. It is essential to
+         * avoid hangs or unnecessary latency.
+         */
+        if (poll_set_started(ctx, &ready_list, false)) {
+            timeout = 0;
+            progress = true;
+        }
+
+        ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
     }
 
     if (use_notify_me) {
@@ -657,10 +720,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     }
 
     progress |= aio_bh_poll(ctx);
-
-    if (ret > 0) {
-        progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
-    }
+    progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
 
     aio_free_deleted_handlers(ctx);
 
@@ -716,3 +776,15 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
 
     aio_notify(ctx);
 }
+
+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
+                                Error **errp)
+{
+    /*
+     * No thread synchronization here, it doesn't matter if an incorrect value
+     * is used once.
+     */
+    ctx->aio_max_batch = max_batch;
+
+    aio_notify(ctx);
+}
index c80c04506a8590486de9646b839d2389db9bbe55..4264c518be012e78d3615174f6848c76b4cd1302 100644 (file)
@@ -24,6 +24,7 @@ struct AioHandler {
     IOHandler *io_read;
     IOHandler *io_write;
     AioPollFn *io_poll;
+    IOHandler *io_poll_ready;
     IOHandler *io_poll_begin;
     IOHandler *io_poll_end;
     void *opaque;
@@ -36,7 +37,7 @@ struct AioHandler {
     unsigned flags; /* see fdmon-io_uring.c */
 #endif
     int64_t poll_idle_timeout; /* when to stop userspace polling */
-    bool is_external;
+    bool poll_ready; /* has polling detected an event? */
 };
 
 /* Add a handler to a ready list */
index bdb3d3af22ddbb0b77b52e12b4e9538acebba08e..b5336cf5fd2932dd0f54d98c96c68a4a16231bd7 100644 (file)
@@ -35,7 +35,21 @@ static void dummy_bh_cb(void *opaque)
 
 void aio_wait_kick(void)
 {
-    /* The barrier (or an atomic op) is in the caller.  */
+    /*
+     * Paired with smp_mb in AIO_WAIT_WHILE. Here we have:
+     * write(condition);
+     * aio_wait_kick() {
+     *      smp_mb();
+     *      read(num_waiters);
+     * }
+     *
+     * And in AIO_WAIT_WHILE:
+     * write(num_waiters);
+     * smp_mb();
+     * read(condition);
+     */
+    smp_mb();
+
     if (qatomic_read(&global_aio_wait.num_waiters)) {
         aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
     }
@@ -68,5 +82,5 @@ void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
 
     aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data);
-    AIO_WAIT_WHILE(ctx, !data.done);
+    AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
 }
index 168717b51bd617aa0233a6025fc9ed1e4a4337c3..948ef47a4d3e01a940c7701ba2f51dcf6bfc83c4 100644 (file)
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "block/block.h"
 #include "qemu/main-loop.h"
 #include "qemu/queue.h"
 #include "qemu/sockets.h"
 #include "qapi/error.h"
 #include "qemu/rcu_queue.h"
+#include "qemu/error-report.h"
 
 struct AioHandler {
     EventNotifier *e;
@@ -32,7 +32,6 @@ struct AioHandler {
     GPollFD pfd;
     int deleted;
     void *opaque;
-    bool is_external;
     QLIST_ENTRY(AioHandler) node;
 };
 
@@ -64,19 +63,26 @@ static void aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
 
 void aio_set_fd_handler(AioContext *ctx,
                         int fd,
-                        bool is_external,
                         IOHandler *io_read,
                         IOHandler *io_write,
                         AioPollFn *io_poll,
+                        IOHandler *io_poll_ready,
                         void *opaque)
 {
-    /* fd is a SOCKET in our case */
     AioHandler *old_node;
     AioHandler *node = NULL;
+    SOCKET s;
+
+    if (!fd_is_socket(fd)) {
+        error_report("fd=%d is not a socket, AIO implementation is missing", fd);
+        return;
+    }
+
+    s = _get_osfhandle(fd);
 
     qemu_lockcnt_lock(&ctx->list_lock);
     QLIST_FOREACH(old_node, &ctx->aio_handlers, node) {
-        if (old_node->pfd.fd == fd && !old_node->deleted) {
+        if (old_node->pfd.fd == s && !old_node->deleted) {
             break;
         }
     }
@@ -87,7 +93,7 @@ void aio_set_fd_handler(AioContext *ctx,
 
         /* Alloc and insert if it's not already there */
         node = g_new0(AioHandler, 1);
-        node->pfd.fd = fd;
+        node->pfd.fd = s;
 
         node->pfd.events = 0;
         if (node->io_read) {
@@ -103,7 +109,6 @@ void aio_set_fd_handler(AioContext *ctx,
         node->opaque = opaque;
         node->io_read = io_read;
         node->io_write = io_write;
-        node->is_external = is_external;
 
         if (io_read) {
             bitmask |= FD_READ | FD_ACCEPT | FD_CLOSE;
@@ -115,7 +120,7 @@ void aio_set_fd_handler(AioContext *ctx,
 
         QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
         event = event_notifier_get_handle(&ctx->notifier);
-        WSAEventSelect(node->pfd.fd, event, bitmask);
+        qemu_socket_select(fd, event, bitmask, NULL);
     }
     if (old_node) {
         aio_remove_fd_handler(ctx, old_node);
@@ -125,18 +130,11 @@ void aio_set_fd_handler(AioContext *ctx,
     aio_notify(ctx);
 }
 
-void aio_set_fd_poll(AioContext *ctx, int fd,
-                     IOHandler *io_poll_begin,
-                     IOHandler *io_poll_end)
-{
-    /* Not implemented */
-}
-
 void aio_set_event_notifier(AioContext *ctx,
                             EventNotifier *e,
-                            bool is_external,
                             EventNotifierHandler *io_notify,
-                            AioPollFn *io_poll)
+                            AioPollFn *io_poll,
+                            EventNotifierHandler *io_poll_ready)
 {
     AioHandler *node;
 
@@ -159,7 +157,6 @@ void aio_set_event_notifier(AioContext *ctx,
             node->e = e;
             node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
             node->pfd.events = G_IO_IN;
-            node->is_external = is_external;
             QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
 
             g_source_add_poll(&ctx->source, &node->pfd);
@@ -325,9 +322,9 @@ void aio_dispatch(AioContext *ctx)
 bool aio_poll(AioContext *ctx, bool blocking)
 {
     AioHandler *node;
-    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
+    HANDLE events[MAXIMUM_WAIT_OBJECTS];
     bool progress, have_select_revents, first;
-    int count;
+    unsigned count;
     int timeout;
 
     /*
@@ -366,8 +363,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
     /* fill fd sets */
     count = 0;
     QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->io_notify
-            && aio_node_check(ctx, node->is_external)) {
+        if (!node->deleted && node->io_notify) {
+            assert(count < MAXIMUM_WAIT_OBJECTS);
             events[count++] = event_notifier_get_handle(node->e);
         }
     }
@@ -440,3 +437,8 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
         error_setg(errp, "AioContext polling is not implemented on Windows");
     }
 }
+
+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
+                                Error **errp)
+{
+}
index 674dbefb7c2494887b3a7a1648908cb7209f0b4c..8f90ddc3047a9f0567da47ab6f89b6190e1d0f9d 100644 (file)
 #include "qapi/error.h"
 #include "block/aio.h"
 #include "block/thread-pool.h"
+#include "block/graph-lock.h"
 #include "qemu/main-loop.h"
 #include "qemu/atomic.h"
 #include "qemu/rcu_queue.h"
 #include "block/raw-aio.h"
 #include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
+#include "sysemu/cpu-timers.h"
 #include "trace.h"
 
 /***********************************************************/
@@ -57,10 +60,12 @@ enum {
 
 struct QEMUBH {
     AioContext *ctx;
+    const char *name;
     QEMUBHFunc *cb;
     void *opaque;
     QSLIST_ENTRY(QEMUBH) next;
     unsigned flags;
+    MemReentrancyGuard *reentrancy_guard;
 };
 
 /* Called concurrently from any thread */
@@ -70,18 +75,32 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
     unsigned old_flags;
 
     /*
-     * The memory barrier implicit in qatomic_fetch_or makes sure that:
-     * 1. idle & any writes needed by the callback are done before the
-     *    locations are read in the aio_bh_poll.
-     * 2. ctx is loaded before the callback has a chance to execute and bh
-     *    could be freed.
+     * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
+     * insertion starts after BH_PENDING is set.
      */
     old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
+
     if (!(old_flags & BH_PENDING)) {
+        /*
+         * At this point the bottom half becomes visible to aio_bh_poll().
+         * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
+         * aio_bh_poll(), ensuring that:
+         * 1. any writes needed by the callback are visible from the callback
+         *    after aio_bh_dequeue() returns bh.
+         * 2. ctx is loaded before the callback has a chance to execute and bh
+         *    could be freed.
+         */
         QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
     }
 
     aio_notify(ctx);
+    /*
+     * Workaround for record/replay.
+     * vCPU execution should be suspended when new BH is set.
+     * This is needed to avoid guest timeouts caused
+     * by the long cycles of the execution.
+     */
+    icount_notify_exit();
 }
 
 /* Only called from aio_bh_poll() and aio_ctx_finalize() */
@@ -96,18 +115,16 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
     QSLIST_REMOVE_HEAD(head, next);
 
     /*
-     * The qatomic_and is paired with aio_bh_enqueue().  The implicit memory
-     * barrier ensures that the callback sees all writes done by the scheduling
-     * thread.  It also ensures that the scheduling thread sees the cleared
-     * flag before bh->cb has run, and thus will call aio_notify again if
-     * necessary.
+     * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
+     * the removal finishes before BH_PENDING is reset.
      */
     *flags = qatomic_fetch_and(&bh->flags,
                               ~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
     return bh;
 }
 
-void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
+void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
+                                  void *opaque, const char *name)
 {
     QEMUBH *bh;
     bh = g_new(QEMUBH, 1);
@@ -115,11 +132,13 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
         .ctx = ctx,
         .cb = cb,
         .opaque = opaque,
+        .name = name,
     };
     aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
 }
 
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
+QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
+                        const char *name, MemReentrancyGuard *reentrancy_guard)
 {
     QEMUBH *bh;
     bh = g_new(QEMUBH, 1);
@@ -127,13 +146,31 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
         .ctx = ctx,
         .cb = cb,
         .opaque = opaque,
+        .name = name,
+        .reentrancy_guard = reentrancy_guard,
     };
     return bh;
 }
 
 void aio_bh_call(QEMUBH *bh)
 {
+    bool last_engaged_in_io = false;
+
+    /* Make a copy of the guard-pointer as cb may free the bh */
+    MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
+    if (reentrancy_guard) {
+        last_engaged_in_io = reentrancy_guard->engaged_in_io;
+        if (reentrancy_guard->engaged_in_io) {
+            trace_reentrant_aio(bh->ctx, bh->name);
+        }
+        reentrancy_guard->engaged_in_io = true;
+    }
+
     bh->cb(bh->opaque);
+
+    if (reentrancy_guard) {
+        reentrancy_guard->engaged_in_io = last_engaged_in_io;
+    }
 }
 
 /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
@@ -143,8 +180,23 @@ int aio_bh_poll(AioContext *ctx)
     BHListSlice *s;
     int ret = 0;
 
+    /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue().  */
     QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
+
+    /*
+     * GCC13 [-Werror=dangling-pointer=] complains that the local variable
+     * 'slice' is being stored in the global 'ctx->bh_slice_list' but the
+     * list is emptied before this function returns.
+     */
+#if !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wdangling-pointer="
+#endif
     QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
+#if !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
 
     while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) {
         QEMUBH *bh;
@@ -339,17 +391,30 @@ aio_ctx_finalize(GSource     *source)
     assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
 
     while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
-        /* qemu_bh_delete() must have been called on BHs in this AioContext */
-        assert(flags & BH_DELETED);
+        /*
+         * qemu_bh_delete() must have been called on BHs in this AioContext. In
+         * many cases memory leaks, hangs, or inconsistent state occur when a
+         * BH is leaked because something still expects it to run.
+         *
+         * If you hit this, fix the lifecycle of the BH so that
+         * qemu_bh_delete() and any associated cleanup is called before the
+         * AioContext is finalized.
+         */
+        if (unlikely(!(flags & BH_DELETED))) {
+            fprintf(stderr, "%s: BH '%s' leaked, aborting...\n",
+                    __func__, bh->name);
+            abort();
+        }
 
         g_free(bh);
     }
 
-    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL);
+    aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL, NULL);
     event_notifier_cleanup(&ctx->notifier);
     qemu_rec_mutex_destroy(&ctx->lock);
     qemu_lockcnt_destroy(&ctx->list_lock);
     timerlistgroup_deinit(&ctx->tlg);
+    unregister_aiocontext(ctx);
     aio_context_destroy(ctx);
 }
 
@@ -420,15 +485,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
 void aio_notify(AioContext *ctx)
 {
     /*
-     * Write e.g. bh->flags before writing ctx->notified.  Pairs with smp_mb in
-     * aio_notify_accept.
+     * Write e.g. ctx->bh_list before writing ctx->notified.  Pairs with
+     * smp_mb() in aio_notify_accept().
      */
     smp_wmb();
     qatomic_set(&ctx->notified, true);
 
     /*
-     * Write ctx->notified before reading ctx->notify_me.  Pairs
-     * with smp_mb in aio_ctx_prepare or aio_poll.
+     * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
+     * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
      */
     smp_mb();
     if (qatomic_read(&ctx->notify_me)) {
@@ -441,8 +506,9 @@ void aio_notify_accept(AioContext *ctx)
     qatomic_set(&ctx->notified, false);
 
     /*
-     * Write ctx->notified before reading e.g. bh->flags.  Pairs with smp_wmb
-     * in aio_notify.
+     * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
+     * above clearing of ctx->notified before reads of e.g. bh->flags.  Pairs
+     * with smp_wmb() in aio_notify.
      */
     smp_mb();
 }
@@ -465,9 +531,19 @@ static bool aio_context_notifier_poll(void *opaque)
     EventNotifier *e = opaque;
     AioContext *ctx = container_of(e, AioContext, notifier);
 
+    /*
+     * No need for load-acquire because we just want to kick the
+     * event loop.  aio_notify_accept() takes care of synchronizing
+     * the event loop with the producers.
+     */
     return qatomic_read(&ctx->notified);
 }
 
+static void aio_context_notifier_poll_ready(EventNotifier *e)
+{
+    /* Do nothing, we just wanted to kick the event loop */
+}
+
 static void co_schedule_bh_cb(void *opaque)
 {
     AioContext *ctx = opaque;
@@ -517,9 +593,9 @@ AioContext *aio_context_new(Error **errp)
     QSLIST_INIT(&ctx->scheduled_coroutines);
 
     aio_set_event_notifier(ctx, &ctx->notifier,
-                           false,
                            aio_context_notifier_cb,
-                           aio_context_notifier_poll);
+                           aio_context_notifier_poll,
+                           aio_context_notifier_poll_ready);
 #ifdef CONFIG_LINUX_AIO
     ctx->linux_aio = NULL;
 #endif
@@ -537,6 +613,13 @@ AioContext *aio_context_new(Error **errp)
     ctx->poll_grow = 0;
     ctx->poll_shrink = 0;
 
+    ctx->aio_max_batch = 0;
+
+    ctx->thread_pool_min = 0;
+    ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
+
+    register_aiocontext(ctx);
+
     return ctx;
 fail:
     g_source_destroy(&ctx->source);
@@ -599,7 +682,7 @@ void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
     }
 }
 
-void aio_co_wake(struct Coroutine *co)
+void aio_co_wake(Coroutine *co)
 {
     AioContext *ctx;
 
@@ -612,7 +695,7 @@ void aio_co_wake(struct Coroutine *co)
     aio_co_enter(ctx, co);
 }
 
-void aio_co_enter(AioContext *ctx, struct Coroutine *co)
+void aio_co_enter(AioContext *ctx, Coroutine *co)
 {
     if (ctx != qemu_get_current_aio_context()) {
         aio_co_schedule(ctx, co);
@@ -649,3 +732,41 @@ void aio_context_release(AioContext *ctx)
 {
     qemu_rec_mutex_unlock(&ctx->lock);
 }
+
+QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
+
+AioContext *qemu_get_current_aio_context(void)
+{
+    AioContext *ctx = get_my_aiocontext();
+    if (ctx) {
+        return ctx;
+    }
+    if (qemu_mutex_iothread_locked()) {
+        /* Possibly in a vCPU thread.  */
+        return qemu_get_aio_context();
+    }
+    return NULL;
+}
+
+void qemu_set_current_aio_context(AioContext *ctx)
+{
+    assert(!get_my_aiocontext());
+    set_my_aiocontext(ctx);
+}
+
+void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
+                                        int64_t max, Error **errp)
+{
+
+    if (min > max || !max || min > INT_MAX || max > INT_MAX) {
+        error_setg(errp, "bad thread-pool-min/thread-pool-max values");
+        return;
+    }
+
+    ctx->thread_pool_min = min;
+    ctx->thread_pool_max = max;
+
+    if (ctx->thread_pool) {
+        thread_pool_update_params(ctx->thread_pool, ctx);
+    }
+}
index 93037d5b116cda0df02e445f9d9c7aa5af984bcc..c20d071d8e5f00214cfd4e918fdb92c24e34bd88 100644 (file)
@@ -7,6 +7,8 @@
 #include "qemu/osdep.h"
 #include "qemu/atomic.h"
 #include "qemu/thread.h"
+#include "qemu/cacheinfo.h"
+#include "qemu/memalign.h"
 
 #ifdef CONFIG_ATOMIC64
 #error This file must only be compiled if !CONFIG_ATOMIC64
index 1f201393aef195f098953ca3aa19bcef1b55922c..8d12e90a5a42290b866f99b3b9fd5552ed7eaa91 100644 (file)
@@ -240,6 +240,51 @@ void bitmap_clear(unsigned long *map, long start, long nr)
     }
 }
 
+bool bitmap_test_and_clear(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+    bool dirty = false;
+
+    assert(start >= 0 && nr >= 0);
+
+    /* First word */
+    if (nr - bits_to_clear > 0) {
+        if ((*p) & mask_to_clear) {
+            dirty = true;
+        }
+        *p &= ~mask_to_clear;
+        nr -= bits_to_clear;
+        bits_to_clear = BITS_PER_LONG;
+        p++;
+    }
+
+    /* Full words */
+    if (bits_to_clear == BITS_PER_LONG) {
+        while (nr >= BITS_PER_LONG) {
+            if (*p) {
+                dirty = true;
+                *p = 0;
+            }
+            nr -= BITS_PER_LONG;
+            p++;
+        }
+    }
+
+    /* Last word */
+    if (nr) {
+        mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+        if ((*p) & mask_to_clear) {
+            dirty = true;
+        }
+        *p &= ~mask_to_clear;
+    }
+
+    return dirty;
+}
+
 bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr)
 {
     unsigned long *p = map + BIT_WORD(start);
@@ -376,7 +421,7 @@ static void bitmap_to_from_le(unsigned long *dst,
 {
     long len = BITS_TO_LONGS(nbits);
 
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
     long index;
 
     for (index = 0; index < len; index++) {
index 3fe6b1c4f16089b9b9ba4547e459884229f5eace..4b647b3eff790ec6455edee72cfbd0f17c901f37 100644 (file)
@@ -71,8 +71,8 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 
 found_first:
     tmp &= (~0UL >> (BITS_PER_LONG - size));
-    if (tmp == 0UL) {          /* Are any bits set? */
-        return result + size;  /* Nope. */
+    if (tmp == 0UL) {           /* Are any bits set? */
+        return result + size;   /* Nope. */
     }
 found_middle:
     return result + ctzl(tmp);
@@ -120,8 +120,8 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 
 found_first:
     tmp |= ~0UL << size;
-    if (tmp == ~0UL) { /* Are any bits zero? */
-        return result + size;  /* Nope. */
+    if (tmp == ~0UL) {          /* Are any bits zero? */
+        return result + size;   /* Nope. */
     }
 found_middle:
     return result + ctzl(~tmp);
index 695bb4ce28b6ff9b85804e32cac023a350d48143..3e6a5dfd632e88174a62377a79729bb37dd4219a 100644 (file)
@@ -24,6 +24,7 @@
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
 #include "qemu/bswap.h"
+#include "host/cpuinfo.h"
 
 static bool
 buffer_zero_int(const void *buf, size_t len)
@@ -64,18 +65,11 @@ buffer_zero_int(const void *buf, size_t len)
 }
 
 #if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
-/* Do not use push_options pragmas unnecessarily, because clang
- * does not support them.
- */
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-#pragma GCC push_options
-#pragma GCC target("sse2")
-#endif
-#include <emmintrin.h>
+#include <immintrin.h>
 
 /* Note that each of these vectorized functions require len >= 64.  */
 
-static bool
+static bool __attribute__((target("sse2")))
 buffer_zero_sse2(const void *buf, size_t len)
 {
     __m128i t = _mm_loadu_si128(buf);
@@ -104,20 +98,9 @@ buffer_zero_sse2(const void *buf, size_t len)
 
     return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
 }
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-#pragma GCC pop_options
-#endif
 
 #ifdef CONFIG_AVX2_OPT
-/* Note that due to restrictions/bugs wrt __builtin functions in gcc <= 4.8,
- * the includes have to be within the corresponding push_options region, and
- * therefore the regions themselves have to be ordered with increasing ISA.
- */
-#pragma GCC push_options
-#pragma GCC target("sse4")
-#include <smmintrin.h>
-
-static bool
+static bool __attribute__((target("sse4")))
 buffer_zero_sse4(const void *buf, size_t len)
 {
     __m128i t = _mm_loadu_si128(buf);
@@ -145,12 +128,7 @@ buffer_zero_sse4(const void *buf, size_t len)
     return _mm_testz_si128(t, t);
 }
 
-#pragma GCC pop_options
-#pragma GCC push_options
-#pragma GCC target("avx2")
-#include <immintrin.h>
-
-static bool
+static bool __attribute__((target("avx2")))
 buffer_zero_avx2(const void *buf, size_t len)
 {
     /* Begin with an unaligned head of 32 bytes.  */
@@ -176,15 +154,10 @@ buffer_zero_avx2(const void *buf, size_t len)
 
     return _mm256_testz_si256(t, t);
 }
-#pragma GCC pop_options
 #endif /* CONFIG_AVX2_OPT */
 
 #ifdef CONFIG_AVX512F_OPT
-#pragma GCC push_options
-#pragma GCC target("avx512f")
-#include <immintrin.h>
-
-static bool
+static bool __attribute__((target("avx512f")))
 buffer_zero_avx512(const void *buf, size_t len)
 {
     /* Begin with an unaligned head of 64 bytes.  */
@@ -210,115 +183,77 @@ buffer_zero_avx512(const void *buf, size_t len)
     return !_mm512_test_epi64_mask(t, t);
 
 }
-#pragma GCC pop_options
-#endif
-
-
-/* Note that for test_buffer_is_zero_next_accel, the most preferred
- * ISA must have the least significant bit.
- */
-#define CACHE_AVX512F 1
-#define CACHE_AVX2    2
-#define CACHE_SSE4    4
-#define CACHE_SSE2    8
+#endif /* CONFIG_AVX512F_OPT */
 
-/* Make sure that these variables are appropriately initialized when
+/*
+ * Make sure that these variables are appropriately initialized when
  * SSE2 is enabled on the compiler command-line, but the compiler is
  * too old to support CONFIG_AVX2_OPT.
  */
 #if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-# define INIT_CACHE 0
-# define INIT_ACCEL buffer_zero_int
+# define INIT_USED     0
+# define INIT_LENGTH   0
+# define INIT_ACCEL    buffer_zero_int
 #else
 # ifndef __SSE2__
 #  error "ISA selection confusion"
 # endif
-# define INIT_CACHE CACHE_SSE2
-# define INIT_ACCEL buffer_zero_sse2
+# define INIT_USED     CPUINFO_SSE2
+# define INIT_LENGTH   64
+# define INIT_ACCEL    buffer_zero_sse2
 #endif
 
-static unsigned cpuid_cache = INIT_CACHE;
+static unsigned used_accel = INIT_USED;
+static unsigned length_to_accel = INIT_LENGTH;
 static bool (*buffer_accel)(const void *, size_t) = INIT_ACCEL;
-static int length_to_accel = 64;
 
-static void init_accel(unsigned cache)
+static unsigned __attribute__((noinline))
+select_accel_cpuinfo(unsigned info)
 {
-    bool (*fn)(const void *, size_t) = buffer_zero_int;
-    if (cache & CACHE_SSE2) {
-        fn = buffer_zero_sse2;
-        length_to_accel = 64;
-    }
+    /* Array is sorted in order of algorithm preference. */
+    static const struct {
+        unsigned bit;
+        unsigned len;
+        bool (*fn)(const void *, size_t);
+    } all[] = {
+#ifdef CONFIG_AVX512F_OPT
+        { CPUINFO_AVX512F, 256, buffer_zero_avx512 },
+#endif
 #ifdef CONFIG_AVX2_OPT
-    if (cache & CACHE_SSE4) {
-        fn = buffer_zero_sse4;
-        length_to_accel = 64;
-    }
-    if (cache & CACHE_AVX2) {
-        fn = buffer_zero_avx2;
-        length_to_accel = 128;
-    }
+        { CPUINFO_AVX2,    128, buffer_zero_avx2 },
+        { CPUINFO_SSE4,     64, buffer_zero_sse4 },
 #endif
-#ifdef CONFIG_AVX512F_OPT
-    if (cache & CACHE_AVX512F) {
-        fn = buffer_zero_avx512;
-        length_to_accel = 256;
+        { CPUINFO_SSE2,     64, buffer_zero_sse2 },
+        { CPUINFO_ALWAYS,    0, buffer_zero_int },
+    };
+
+    for (unsigned i = 0; i < ARRAY_SIZE(all); ++i) {
+        if (info & all[i].bit) {
+            length_to_accel = all[i].len;
+            buffer_accel = all[i].fn;
+            return all[i].bit;
+        }
     }
-#endif
-    buffer_accel = fn;
+    return 0;
 }
 
 #if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-#include "qemu/cpuid.h"
-
-static void __attribute__((constructor)) init_cpuid_cache(void)
+static void __attribute__((constructor)) init_accel(void)
 {
-    int max = __get_cpuid_max(0, NULL);
-    int a, b, c, d;
-    unsigned cache = 0;
-
-    if (max >= 1) {
-        __cpuid(1, a, b, c, d);
-        if (d & bit_SSE2) {
-            cache |= CACHE_SSE2;
-        }
-        if (c & bit_SSE4_1) {
-            cache |= CACHE_SSE4;
-        }
-
-        /* We must check that AVX is not just available, but usable.  */
-        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
-            int bv;
-            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
-            __cpuid_count(7, 0, a, b, c, d);
-            if ((bv & 0x6) == 0x6 && (b & bit_AVX2)) {
-                cache |= CACHE_AVX2;
-            }
-            /* 0xe6:
-            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
-            *                    and ZMM16-ZMM31 state are enabled by OS)
-            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
-            */
-            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512F)) {
-                cache |= CACHE_AVX512F;
-            }
-        }
-    }
-    cpuid_cache = cache;
-    init_accel(cache);
+    used_accel = select_accel_cpuinfo(cpuinfo_init());
 }
 #endif /* CONFIG_AVX2_OPT */
 
 bool test_buffer_is_zero_next_accel(void)
 {
-    /* If no bits set, we just tested buffer_zero_int, and there
-       are no more acceleration options to test.  */
-    if (cpuid_cache == 0) {
-        return false;
-    }
-    /* Disable the accelerator we used before and select a new one.  */
-    cpuid_cache &= cpuid_cache - 1;
-    init_accel(cpuid_cache);
-    return true;
+    /*
+     * Accumulate the accelerators that we've already tested, and
+     * remove them from the set to test this round.  We'll get back
+     * a zero from select_accel_cpuinfo when there are no more.
+     */
+    unsigned used = select_accel_cpuinfo(cpuinfo & ~used_accel);
+    used_accel |= used;
+    return used;
 }
 
 static bool select_accel_fn(const void *buf, size_t len)
diff --git a/util/cacheflush.c b/util/cacheflush.c
new file mode 100644 (file)
index 0000000..a089061
--- /dev/null
@@ -0,0 +1,377 @@
+/*
+ * Info about, and flushing the host cpu caches.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cacheflush.h"
+#include "qemu/cacheinfo.h"
+#include "qemu/bitops.h"
+#include "qemu/host-utils.h"
+#include "qemu/atomic.h"
+
+
+int qemu_icache_linesize = 0;
+int qemu_icache_linesize_log;
+int qemu_dcache_linesize = 0;
+int qemu_dcache_linesize_log;
+
+/*
+ * Operating system specific cache detection mechanisms.
+ */
+
+#if defined(_WIN32)
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+    SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf;
+    DWORD size = 0;
+    BOOL success;
+    size_t i, n;
+
+    /*
+     * Check for the required buffer size first.  Note that if the zero
+     * size we use for the probe results in success, then there is no
+     * data available; fail in that case.
+     */
+    success = GetLogicalProcessorInformation(0, &size);
+    if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+        return;
+    }
+
+    n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+    size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+    buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n);
+    if (!GetLogicalProcessorInformation(buf, &size)) {
+        goto fail;
+    }
+
+    for (i = 0; i < n; i++) {
+        if (buf[i].Relationship == RelationCache
+            && buf[i].Cache.Level == 1) {
+            switch (buf[i].Cache.Type) {
+            case CacheUnified:
+                *isize = *dsize = buf[i].Cache.LineSize;
+                break;
+            case CacheInstruction:
+                *isize = buf[i].Cache.LineSize;
+                break;
+            case CacheData:
+                *dsize = buf[i].Cache.LineSize;
+                break;
+            default:
+                break;
+            }
+        }
+    }
+ fail:
+    g_free(buf);
+}
+
+#elif defined(CONFIG_DARWIN)
+# include <sys/sysctl.h>
+static void sys_cache_info(int *isize, int *dsize)
+{
+    /* There's only a single sysctl for both I/D cache line sizes.  */
+    long size;
+    size_t len = sizeof(size);
+    if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) {
+        *isize = *dsize = size;
+    }
+}
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+# include <sys/sysctl.h>
+static void sys_cache_info(int *isize, int *dsize)
+{
+    /* There's only a single sysctl for both I/D cache line sizes.  */
+    int size;
+    size_t len = sizeof(size);
+    if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) {
+        *isize = *dsize = size;
+    }
+}
+#else
+/* POSIX */
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+# ifdef _SC_LEVEL1_ICACHE_LINESIZE
+    int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
+    if (tmp_isize > 0) {
+        *isize = tmp_isize;
+    }
+# endif
+# ifdef _SC_LEVEL1_DCACHE_LINESIZE
+    int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+    if (tmp_dsize > 0) {
+        *dsize = tmp_dsize;
+    }
+# endif
+}
+#endif /* sys_cache_info */
+
+
+/*
+ * Architecture (+ OS) specific cache detection mechanisms.
+ */
+
+#if defined(__powerpc__)
+static bool have_coherent_icache;
+#endif
+
+#if defined(__aarch64__) && !defined(CONFIG_DARWIN) && !defined(CONFIG_WIN32)
+/*
+ * Apple does not expose CTR_EL0, so we must use system interfaces.
+ * Windows neither, but we use a generic implementation of flush_idcache_range
+ * in this case.
+ */
+static uint64_t save_ctr_el0;
+static void arch_cache_info(int *isize, int *dsize)
+{
+    uint64_t ctr;
+
+    /*
+     * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1,
+     * but (at least under Linux) these are marked protected by the
+     * kernel.  However, CTR_EL0 contains the minimum linesize in the
+     * entire hierarchy, and is used by userspace cache flushing.
+     *
+     * We will also use this value in flush_idcache_range.
+     */
+    asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr));
+    save_ctr_el0 = ctr;
+
+    if (*isize == 0 || *dsize == 0) {
+        if (*isize == 0) {
+            *isize = 4 << (ctr & 0xf);
+        }
+        if (*dsize == 0) {
+            *dsize = 4 << ((ctr >> 16) & 0xf);
+        }
+    }
+}
+
+#elif defined(_ARCH_PPC) && defined(__linux__)
+# include "elf.h"
+
+static void arch_cache_info(int *isize, int *dsize)
+{
+    if (*isize == 0) {
+        *isize = qemu_getauxval(AT_ICACHEBSIZE);
+    }
+    if (*dsize == 0) {
+        *dsize = qemu_getauxval(AT_DCACHEBSIZE);
+    }
+    have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP;
+}
+
+#else
+static void arch_cache_info(int *isize, int *dsize) { }
+#endif /* arch_cache_info */
+
+/*
+ * ... and if all else fails ...
+ */
+
+static void fallback_cache_info(int *isize, int *dsize)
+{
+    /* If we can only find one of the two, assume they're the same.  */
+    if (*isize) {
+        if (*dsize) {
+            /* Success! */
+        } else {
+            *dsize = *isize;
+        }
+    } else if (*dsize) {
+        *isize = *dsize;
+    } else {
+#if defined(_ARCH_PPC)
+        /*
+         * For PPC, we're going to use the cache sizes computed for
+         * flush_idcache_range.  Which means that we must use the
+         * architecture minimum.
+         */
+        *isize = *dsize = 16;
+#else
+        /* Otherwise, 64 bytes is not uncommon.  */
+        *isize = *dsize = 64;
+#endif
+    }
+}
+
+static void __attribute__((constructor)) init_cache_info(void)
+{
+    int isize = 0, dsize = 0;
+
+    sys_cache_info(&isize, &dsize);
+    arch_cache_info(&isize, &dsize);
+    fallback_cache_info(&isize, &dsize);
+
+    assert((isize & (isize - 1)) == 0);
+    assert((dsize & (dsize - 1)) == 0);
+
+    qemu_icache_linesize = isize;
+    qemu_icache_linesize_log = ctz32(isize);
+    qemu_dcache_linesize = dsize;
+    qemu_dcache_linesize_log = ctz32(dsize);
+
+    qatomic64_init();
+}
+
+
+/*
+ * Architecture (+ OS) specific cache flushing mechanisms.
+ */
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__s390__)
+
+/* Caches are coherent and do not require flushing; symbol inline. */
+
+#elif defined(__aarch64__) && !defined(CONFIG_WIN32)
+/*
+ * For Windows, we use generic implementation of flush_idcache_range, that
+ * performs a call to FlushInstructionCache, through __builtin___clear_cache.
+ */
+
+#ifdef CONFIG_DARWIN
+/* Apple does not expose CTR_EL0, so we must use system interfaces. */
+#include <libkern/OSCacheControl.h>
+
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
+{
+    if (rx == rw) {
+        /*
+         * sys_icache_invalidate() syncs the dcache and icache,
+         * so no need to call sys_dcache_flush().
+         */
+    } else {
+        sys_dcache_flush((void *)rw, len);
+    }
+    sys_icache_invalidate((void *)rx, len);
+}
+#else
+
+/*
+ * This is a copy of gcc's __aarch64_sync_cache_range, modified
+ * to fit this three-operand interface.
+ */
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
+{
+    const unsigned CTR_IDC = 1u << 28;
+    const unsigned CTR_DIC = 1u << 29;
+    const uint64_t ctr_el0 = save_ctr_el0;
+    const uintptr_t icache_lsize = qemu_icache_linesize;
+    const uintptr_t dcache_lsize = qemu_dcache_linesize;
+    uintptr_t p;
+
+    /*
+     * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification
+     * is not required for instruction to data coherence.
+     */
+    if (!(ctr_el0 & CTR_IDC)) {
+        /*
+         * Loop over the address range, clearing one cache line at once.
+         * Data cache must be flushed to unification first to make sure
+         * the instruction cache fetches the updated data.
+         */
+        for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) {
+            asm volatile("dc\tcvau, %0" : : "r" (p) : "memory");
+        }
+        asm volatile("dsb\tish" : : : "memory");
+    }
+
+    /*
+     * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point
+     * of Unification is not required for instruction to data coherence.
+     */
+    if (!(ctr_el0 & CTR_DIC)) {
+        for (p = rx & -icache_lsize; p < rx + len; p += icache_lsize) {
+            asm volatile("ic\tivau, %0" : : "r"(p) : "memory");
+        }
+        asm volatile ("dsb\tish" : : : "memory");
+    }
+
+    asm volatile("isb" : : : "memory");
+}
+#endif /* CONFIG_DARWIN */
+
+#elif defined(__mips__)
+
+#ifdef __OpenBSD__
+#include <machine/sysarch.h>
+#else
+#include <sys/cachectl.h>
+#endif
+
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
+{
+    if (rx != rw) {
+        cacheflush((void *)rw, len, DCACHE);
+    }
+    cacheflush((void *)rx, len, ICACHE);
+}
+
+#elif defined(__powerpc__)
+
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
+{
+    uintptr_t p, b, e;
+    size_t dsize, isize;
+
+    /*
+     * Some processors have coherent caches and support a simplified
+     * flushing procedure.  See
+     *   POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) 
+     *   https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k
+     */
+    if (have_coherent_icache) {
+        asm volatile ("sync\n\t"
+                      "icbi 0,%0\n\t"
+                      "isync"
+                      : : "r"(rx) : "memory");
+        return;
+    }
+
+    dsize = qemu_dcache_linesize;
+    isize = qemu_icache_linesize;
+
+    b = rw & ~(dsize - 1);
+    e = (rw + len + dsize - 1) & ~(dsize - 1);
+    for (p = b; p < e; p += dsize) {
+        asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
+    }
+    asm volatile ("sync" : : : "memory");
+
+    b = rx & ~(isize - 1);
+    e = (rx + len + isize - 1) & ~(isize - 1);
+    for (p = b; p < e; p += isize) {
+        asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
+    }
+    asm volatile ("sync" : : : "memory");
+    asm volatile ("isync" : : : "memory");
+}
+
+#elif defined(__sparc__)
+
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
+{
+    /* No additional data flush to the RW virtual address required. */
+    uintptr_t p, end = (rx + len + 7) & -8;
+    for (p = rx & -8; p < end; p += 8) {
+        __asm__ __volatile__("flush\t%0" : : "r" (p));
+    }
+}
+
+#else
+
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
+{
+    if (rw != rx) {
+        __builtin___clear_cache((char *)rw, (char *)rw + len);
+    }
+    __builtin___clear_cache((char *)rx, (char *)rx + len);
+}
+
+#endif
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
deleted file mode 100644 (file)
index 7804c18..0000000
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * cacheinfo.c - helpers to query the host about its caches
- *
- * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
- * License: GNU GPL, version 2 or later.
- *   See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/host-utils.h"
-#include "qemu/atomic.h"
-
-int qemu_icache_linesize = 0;
-int qemu_icache_linesize_log;
-int qemu_dcache_linesize = 0;
-int qemu_dcache_linesize_log;
-
-/*
- * Operating system specific detection mechanisms.
- */
-
-#if defined(_WIN32)
-
-static void sys_cache_info(int *isize, int *dsize)
-{
-    SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf;
-    DWORD size = 0;
-    BOOL success;
-    size_t i, n;
-
-    /* Check for the required buffer size first.  Note that if the zero
-       size we use for the probe results in success, then there is no
-       data available; fail in that case.  */
-    success = GetLogicalProcessorInformation(0, &size);
-    if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
-        return;
-    }
-
-    n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
-    size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
-    buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n);
-    if (!GetLogicalProcessorInformation(buf, &size)) {
-        goto fail;
-    }
-
-    for (i = 0; i < n; i++) {
-        if (buf[i].Relationship == RelationCache
-            && buf[i].Cache.Level == 1) {
-            switch (buf[i].Cache.Type) {
-            case CacheUnified:
-                *isize = *dsize = buf[i].Cache.LineSize;
-                break;
-            case CacheInstruction:
-                *isize = buf[i].Cache.LineSize;
-                break;
-            case CacheData:
-                *dsize = buf[i].Cache.LineSize;
-                break;
-            default:
-                break;
-            }
-        }
-    }
- fail:
-    g_free(buf);
-}
-
-#elif defined(__APPLE__)
-# include <sys/sysctl.h>
-static void sys_cache_info(int *isize, int *dsize)
-{
-    /* There's only a single sysctl for both I/D cache line sizes.  */
-    long size;
-    size_t len = sizeof(size);
-    if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) {
-        *isize = *dsize = size;
-    }
-}
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-# include <sys/sysctl.h>
-static void sys_cache_info(int *isize, int *dsize)
-{
-    /* There's only a single sysctl for both I/D cache line sizes.  */
-    int size;
-    size_t len = sizeof(size);
-    if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) {
-        *isize = *dsize = size;
-    }
-}
-#else
-/* POSIX */
-
-static void sys_cache_info(int *isize, int *dsize)
-{
-# ifdef _SC_LEVEL1_ICACHE_LINESIZE
-    int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
-    if (tmp_isize > 0) {
-        *isize = tmp_isize;
-    }
-# endif
-# ifdef _SC_LEVEL1_DCACHE_LINESIZE
-    int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
-    if (tmp_dsize > 0) {
-        *dsize = tmp_dsize;
-    }
-# endif
-}
-#endif /* sys_cache_info */
-
-/*
- * Architecture (+ OS) specific detection mechanisms.
- */
-
-#if defined(__aarch64__)
-
-static void arch_cache_info(int *isize, int *dsize)
-{
-    if (*isize == 0 || *dsize == 0) {
-        uint64_t ctr;
-
-        /* The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1,
-           but (at least under Linux) these are marked protected by the
-           kernel.  However, CTR_EL0 contains the minimum linesize in the
-           entire hierarchy, and is used by userspace cache flushing.  */
-        asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr));
-        if (*isize == 0) {
-            *isize = 4 << (ctr & 0xf);
-        }
-        if (*dsize == 0) {
-            *dsize = 4 << ((ctr >> 16) & 0xf);
-        }
-    }
-}
-
-#elif defined(_ARCH_PPC) && defined(__linux__)
-# include "elf.h"
-
-static void arch_cache_info(int *isize, int *dsize)
-{
-    if (*isize == 0) {
-        *isize = qemu_getauxval(AT_ICACHEBSIZE);
-    }
-    if (*dsize == 0) {
-        *dsize = qemu_getauxval(AT_DCACHEBSIZE);
-    }
-}
-
-#else
-static void arch_cache_info(int *isize, int *dsize) { }
-#endif /* arch_cache_info */
-
-/*
- * ... and if all else fails ...
- */
-
-static void fallback_cache_info(int *isize, int *dsize)
-{
-    /* If we can only find one of the two, assume they're the same.  */
-    if (*isize) {
-        if (*dsize) {
-            /* Success! */
-        } else {
-            *dsize = *isize;
-        }
-    } else if (*dsize) {
-        *isize = *dsize;
-    } else {
-#if defined(_ARCH_PPC)
-        /* For PPC, we're going to use the icache size computed for
-           flush_icache_range.  Which means that we must use the
-           architecture minimum.  */
-        *isize = *dsize = 16;
-#else
-        /* Otherwise, 64 bytes is not uncommon.  */
-        *isize = *dsize = 64;
-#endif
-    }
-}
-
-static void __attribute__((constructor)) init_cache_info(void)
-{
-    int isize = 0, dsize = 0;
-
-    sys_cache_info(&isize, &dsize);
-    arch_cache_info(&isize, &dsize);
-    fallback_cache_info(&isize, &dsize);
-
-    assert((isize & (isize - 1)) == 0);
-    assert((dsize & (dsize - 1)) == 0);
-
-    qemu_icache_linesize = isize;
-    qemu_icache_linesize_log = ctz32(isize);
-    qemu_dcache_linesize = dsize;
-    qemu_dcache_linesize_log = ctz32(dsize);
-
-    qatomic64_init();
-}
index ee47dd808977d089dc92baed486fed5e3f3200f3..147e39e2c62be977751d67f7772b1b9acbd8722d 100644 (file)
 #include "qemu/thread.h"
 
 #if defined(CONFIG_SIGNALFD)
-#include <sys/syscall.h>
+#include <sys/signalfd.h>
 #endif
 
-struct sigfd_compat_info
-{
+struct sigfd_compat_info {
     sigset_t mask;
     int fd;
 };
@@ -43,24 +42,11 @@ static void *sigwait_compat(void *opaque)
             }
         } else {
             struct qemu_signalfd_siginfo buffer;
-            size_t offset = 0;
-
             memset(&buffer, 0, sizeof(buffer));
             buffer.ssi_signo = sig;
 
-            while (offset < sizeof(buffer)) {
-                ssize_t len;
-
-                len = write(info->fd, (char *)&buffer + offset,
-                            sizeof(buffer) - offset);
-                if (len == -1 && errno == EINTR)
-                    continue;
-
-                if (len <= 0) {
-                    return NULL;
-                }
-
-                offset += len;
+            if (qemu_write_full(info->fd, &buffer, sizeof(buffer)) != sizeof(buffer)) {
+                return NULL;
             }
         }
     }
@@ -72,20 +58,13 @@ static int qemu_signalfd_compat(const sigset_t *mask)
     QemuThread thread;
     int fds[2];
 
-    info = malloc(sizeof(*info));
-    if (info == NULL) {
-        errno = ENOMEM;
-        return -1;
-    }
+    info = g_malloc(sizeof(*info));
 
-    if (pipe(fds) == -1) {
-        free(info);
+    if (!g_unix_open_pipe(fds, FD_CLOEXEC, NULL)) {
+        g_free(info);
         return -1;
     }
 
-    qemu_set_cloexec(fds[0]);
-    qemu_set_cloexec(fds[1]);
-
     memcpy(&info->mask, mask, sizeof(*mask));
     info->fd = fds[1];
 
@@ -100,9 +79,8 @@ int qemu_signalfd(const sigset_t *mask)
 #if defined(CONFIG_SIGNALFD)
     int ret;
 
-    ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
+    ret = signalfd(-1, mask, SFD_CLOEXEC);
     if (ret != -1) {
-        qemu_set_cloexec(ret);
         return ret;
     }
 #endif
index aade82afb8c0053e7ecc2a3c06efc596b9ec31eb..037d6416c4aead14a37afdfa054c222c0848f618 100644 (file)
  */
 
 /* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
-#ifdef _FORTIFY_SOURCE
 #undef _FORTIFY_SOURCE
-#endif
+#define _FORTIFY_SOURCE 0
+
 #include "qemu/osdep.h"
 #include <pthread.h>
-#include "qemu-common.h"
 #include "qemu/coroutine_int.h"
 
 #ifdef CONFIG_SAFESTACK
@@ -157,6 +156,7 @@ Coroutine *qemu_coroutine_new(void)
     sigset_t sigs;
     sigset_t osigs;
     sigjmp_buf old_env;
+    static pthread_mutex_t sigusr2_mutex = PTHREAD_MUTEX_INITIALIZER;
 
     /* The way to manipulate stack is with the sigaltstack function. We
      * prepare a stack, with it delivering a signal to ourselves and then
@@ -186,6 +186,12 @@ Coroutine *qemu_coroutine_new(void)
     sa.sa_handler = coroutine_trampoline;
     sigfillset(&sa.sa_mask);
     sa.sa_flags = SA_ONSTACK;
+
+    /*
+     * sigaction() is a process-global operation.  We must not run
+     * this code in multiple threads at once.
+     */
+    pthread_mutex_lock(&sigusr2_mutex);
     if (sigaction(SIGUSR2, &sa, &osa) != 0) {
         abort();
     }
@@ -234,6 +240,8 @@ Coroutine *qemu_coroutine_new(void)
      * Restore the old SIGUSR2 signal handler and mask
      */
     sigaction(SIGUSR2, &osa, NULL);
+    pthread_mutex_unlock(&sigusr2_mutex);
+
     pthread_sigmask(SIG_SETMASK, &osigs, NULL);
 
     /*
index 904b375192cab6d24c2bdea46d8c53beedb9b296..8ef603d081ea02b80ecfca27a22a6b19d8a1a49c 100644 (file)
  */
 
 /* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
-#ifdef _FORTIFY_SOURCE
 #undef _FORTIFY_SOURCE
-#endif
+#define _FORTIFY_SOURCE 0
+
 #include "qemu/osdep.h"
 #include <ucontext.h>
 #include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
 
 #ifdef CONFIG_VALGRIND_H
 #include <valgrind/valgrind.h>
 #endif
 
-#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
+#ifdef QEMU_SANITIZE_ADDRESS
 #ifdef CONFIG_ASAN_IFACE_FIBER
 #define CONFIG_ASAN 1
 #include <sanitizer/asan_interface.h>
@@ -66,8 +67,8 @@ typedef struct {
 /**
  * Per-thread coroutine bookkeeping
  */
-static __thread CoroutineUContext leader;
-static __thread Coroutine *current;
+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current);
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader);
 
 /*
  * va_args to makecontext() must be type 'int', so passing
@@ -97,14 +98,15 @@ static inline __attribute__((always_inline))
 void finish_switch_fiber(void *fake_stack_save)
 {
 #ifdef CONFIG_ASAN
+    CoroutineUContext *leaderp = get_ptr_leader();
     const void *bottom_old;
     size_t size_old;
 
     __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old);
 
-    if (!leader.stack) {
-        leader.stack = (void *)bottom_old;
-        leader.stack_size = size_old;
+    if (!leaderp->stack) {
+        leaderp->stack = (void *)bottom_old;
+        leaderp->stack_size = size_old;
     }
 #endif
 #ifdef CONFIG_TSAN
@@ -117,13 +119,11 @@ void finish_switch_fiber(void *fake_stack_save)
 
 /* always_inline is required to avoid TSan runtime fatal errors. */
 static inline __attribute__((always_inline))
-void start_switch_fiber_asan(CoroutineAction action, void **fake_stack_save,
+void start_switch_fiber_asan(void **fake_stack_save,
                              const void *bottom, size_t size)
 {
 #ifdef CONFIG_ASAN
-    __sanitizer_start_switch_fiber(
-            action == COROUTINE_TERMINATE ? NULL : fake_stack_save,
-            bottom, size);
+    __sanitizer_start_switch_fiber(fake_stack_save, bottom, size);
 #endif
 }
 
@@ -161,8 +161,10 @@ static void coroutine_trampoline(int i0, int i1)
 
     /* Initialize longjmp environment and switch back the caller */
     if (!sigsetjmp(self->env, 0)) {
-        start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack,
-                                leader.stack_size);
+        CoroutineUContext *leaderp = get_ptr_leader();
+
+        start_switch_fiber_asan(&fake_stack_save,
+                                leaderp->stack, leaderp->stack_size);
         start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */
         siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
     }
@@ -222,8 +224,7 @@ Coroutine *qemu_coroutine_new(void)
 
     /* swapcontext() in, siglongjmp() back out */
     if (!sigsetjmp(old_env, 0)) {
-        start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, co->stack,
-                                co->stack_size);
+        start_switch_fiber_asan(&fake_stack_save, co->stack, co->stack_size);
         start_switch_fiber_tsan(&fake_stack_save,
                                 co, false); /* false=not caller */
 
@@ -265,10 +266,28 @@ static inline void valgrind_stack_deregister(CoroutineUContext *co)
 #endif
 #endif
 
+#if defined(CONFIG_ASAN) && defined(CONFIG_COROUTINE_POOL)
+static void coroutine_fn terminate_asan(void *opaque)
+{
+    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, opaque);
+
+    set_current(opaque);
+    start_switch_fiber_asan(NULL, to->stack, to->stack_size);
+    G_STATIC_ASSERT(!IS_ENABLED(CONFIG_TSAN));
+    siglongjmp(to->env, COROUTINE_ENTER);
+}
+#endif
+
 void qemu_coroutine_delete(Coroutine *co_)
 {
     CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
 
+#if defined(CONFIG_ASAN) && defined(CONFIG_COROUTINE_POOL)
+    co_->entry_arg = qemu_coroutine_self();
+    co_->entry = terminate_asan;
+    qemu_coroutine_switch(co_->entry_arg, co_, COROUTINE_ENTER);
+#endif
+
 #ifdef CONFIG_VALGRIND_H
     valgrind_stack_deregister(co);
 #endif
@@ -297,12 +316,14 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
     int ret;
     void *fake_stack_save = NULL;
 
-    current = to_;
+    set_current(to_);
 
     ret = sigsetjmp(from->env, 0);
     if (ret == 0) {
-        start_switch_fiber_asan(action, &fake_stack_save, to->stack,
-                                to->stack_size);
+        start_switch_fiber_asan(IS_ENABLED(CONFIG_COROUTINE_POOL) ||
+                                action != COROUTINE_TERMINATE ?
+                                    &fake_stack_save : NULL,
+                                to->stack, to->stack_size);
         start_switch_fiber_tsan(&fake_stack_save,
                                 to, false); /* false=not caller */
         siglongjmp(to->env, action);
@@ -315,18 +336,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
 
 Coroutine *qemu_coroutine_self(void)
 {
-    if (!current) {
-        current = &leader.base;
+    Coroutine *self = get_current();
+    CoroutineUContext *leaderp = get_ptr_leader();
+
+    if (!self) {
+        self = &leaderp->base;
+        set_current(self);
     }
 #ifdef CONFIG_TSAN
-    if (!leader.tsan_co_fiber) {
-        leader.tsan_co_fiber = __tsan_get_current_fiber();
+    if (!leaderp->tsan_co_fiber) {
+        leaderp->tsan_co_fiber = __tsan_get_current_fiber();
     }
 #endif
-    return current;
+    return self;
 }
 
 bool qemu_in_coroutine(void)
 {
-    return current && current->caller;
+    Coroutine *self = get_current();
+
+    return self && self->caller;
 }
diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c
deleted file mode 100644 (file)
index de6bd4f..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Win32 coroutine initialization code
- *
- * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/coroutine_int.h"
-
-typedef struct
-{
-    Coroutine base;
-
-    LPVOID fiber;
-    CoroutineAction action;
-} CoroutineWin32;
-
-static __thread CoroutineWin32 leader;
-static __thread Coroutine *current;
-
-/* This function is marked noinline to prevent GCC from inlining it
- * into coroutine_trampoline(). If we allow it to do that then it
- * hoists the code to get the address of the TLS variable "current"
- * out of the while() loop. This is an invalid transformation because
- * the SwitchToFiber() call may be called when running thread A but
- * return in thread B, and so we might be in a different thread
- * context each time round the loop.
- */
-CoroutineAction __attribute__((noinline))
-qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
-                      CoroutineAction action)
-{
-    CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
-    CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
-
-    current = to_;
-
-    to->action = action;
-    SwitchToFiber(to->fiber);
-    return from->action;
-}
-
-static void CALLBACK coroutine_trampoline(void *co_)
-{
-    Coroutine *co = co_;
-
-    while (true) {
-        co->entry(co->entry_arg);
-        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
-    }
-}
-
-Coroutine *qemu_coroutine_new(void)
-{
-    const size_t stack_size = COROUTINE_STACK_SIZE;
-    CoroutineWin32 *co;
-
-    co = g_malloc0(sizeof(*co));
-    co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base);
-    return &co->base;
-}
-
-void qemu_coroutine_delete(Coroutine *co_)
-{
-    CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_);
-
-    DeleteFiber(co->fiber);
-    g_free(co);
-}
-
-Coroutine *qemu_coroutine_self(void)
-{
-    if (!current) {
-        current = &leader.base;
-        leader.fiber = ConvertThreadToFiber(NULL);
-    }
-    return current;
-}
-
-bool qemu_in_coroutine(void)
-{
-    return current && current->caller;
-}
diff --git a/util/coroutine-windows.c b/util/coroutine-windows.c
new file mode 100644 (file)
index 0000000..7db2e8f
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ * Win32 coroutine initialization code
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
+
+typedef struct
+{
+    Coroutine base;
+
+    LPVOID fiber;
+    CoroutineAction action;
+} CoroutineWin32;
+
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader);
+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current);
+
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the SwitchToFiber() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
+{
+    CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
+    CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
+
+    set_current(to_);
+
+    to->action = action;
+    SwitchToFiber(to->fiber);
+    return from->action;
+}
+
+static void CALLBACK coroutine_trampoline(void *co_)
+{
+    Coroutine *co = co_;
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    const size_t stack_size = COROUTINE_STACK_SIZE;
+    CoroutineWin32 *co;
+
+    co = g_malloc0(sizeof(*co));
+    co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base);
+    return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_);
+
+    DeleteFiber(co->fiber);
+    g_free(co);
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    Coroutine *current = get_current();
+
+    if (!current) {
+        CoroutineWin32 *leader = get_ptr_leader();
+
+        current = &leader->base;
+        set_current(current);
+        leader->fiber = ConvertThreadToFiber(NULL);
+    }
+    return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    Coroutine *current = get_current();
+
+    return current && current->caller;
+}
diff --git a/util/cpuinfo-aarch64.c b/util/cpuinfo-aarch64.c
new file mode 100644 (file)
index 0000000..4c8a005
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Host specific cpu identification for AArch64.
+ */
+
+#include "qemu/osdep.h"
+#include "host/cpuinfo.h"
+
+#ifdef CONFIG_LINUX
+# ifdef CONFIG_GETAUXVAL
+#  include <sys/auxv.h>
+# else
+#  include <asm/hwcap.h>
+#  include "elf.h"
+# endif
+# ifndef HWCAP2_BTI
+#  define HWCAP2_BTI 0  /* added in glibc 2.32 */
+# endif
+#endif
+#ifdef CONFIG_DARWIN
+# include <sys/sysctl.h>
+#endif
+
+unsigned cpuinfo;
+
+#ifdef CONFIG_DARWIN
+static bool sysctl_for_bool(const char *name)
+{
+    int val = 0;
+    size_t len = sizeof(val);
+
+    if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
+        return val != 0;
+    }
+
+    /*
+     * We might in the future ask for properties not present in older kernels,
+     * but we're only asking about static properties, all of which should be
+     * 'int'.  So we shouldn't see ENOMEM (val too small), or any of the other
+     * more exotic errors.
+     */
+    assert(errno == ENOENT);
+    return false;
+}
+#endif
+
+/* Called both as constructor and (possibly) via other constructors. */
+unsigned __attribute__((constructor)) cpuinfo_init(void)
+{
+    unsigned info = cpuinfo;
+
+    if (info) {
+        return info;
+    }
+
+    info = CPUINFO_ALWAYS;
+
+#ifdef CONFIG_LINUX
+    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+    info |= (hwcap & HWCAP_ATOMICS ? CPUINFO_LSE : 0);
+    info |= (hwcap & HWCAP_USCAT ? CPUINFO_LSE2 : 0);
+    info |= (hwcap & HWCAP_AES ? CPUINFO_AES : 0);
+    info |= (hwcap & HWCAP_PMULL ? CPUINFO_PMULL : 0);
+
+    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
+    info |= (hwcap2 & HWCAP2_BTI ? CPUINFO_BTI : 0);
+#endif
+#ifdef CONFIG_DARWIN
+    info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE") * CPUINFO_LSE;
+    info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE2") * CPUINFO_LSE2;
+    info |= sysctl_for_bool("hw.optional.arm.FEAT_AES") * CPUINFO_AES;
+    info |= sysctl_for_bool("hw.optional.arm.FEAT_PMULL") * CPUINFO_PMULL;
+    info |= sysctl_for_bool("hw.optional.arm.FEAT_BTI") * CPUINFO_BTI;
+#endif
+
+    cpuinfo = info;
+    return info;
+}
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
new file mode 100644 (file)
index 0000000..9fddb18
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Host specific cpu identification for x86.
+ */
+
+#include "qemu/osdep.h"
+#include "host/cpuinfo.h"
+#ifdef CONFIG_CPUID_H
+# include "qemu/cpuid.h"
+#endif
+
+unsigned cpuinfo;
+
+/* Called both as constructor and (possibly) via other constructors. */
+unsigned __attribute__((constructor)) cpuinfo_init(void)
+{
+    unsigned info = cpuinfo;
+
+    if (info) {
+        return info;
+    }
+
+#ifdef CONFIG_CPUID_H
+    unsigned max, a, b, c, d, b7 = 0, c7 = 0;
+
+    max = __get_cpuid_max(0, 0);
+
+    if (max >= 7) {
+        __cpuid_count(7, 0, a, b7, c7, d);
+        info |= (b7 & bit_BMI ? CPUINFO_BMI1 : 0);
+        info |= (b7 & bit_BMI2 ? CPUINFO_BMI2 : 0);
+    }
+
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+
+        info |= (d & bit_CMOV ? CPUINFO_CMOV : 0);
+        info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0);
+        info |= (c & bit_SSE4_1 ? CPUINFO_SSE4 : 0);
+        info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
+        info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
+        info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0);
+
+        /* Our AES support requires PSHUFB as well. */
+        info |= ((c & bit_AES) && (c & bit_SSSE3) ? CPUINFO_AES : 0);
+
+        /* For AVX features, we must check available and usable. */
+        if ((c & bit_AVX) && (c & bit_OSXSAVE)) {
+            unsigned bv = xgetbv_low(0);
+
+            if ((bv & 6) == 6) {
+                info |= CPUINFO_AVX1;
+                info |= (b7 & bit_AVX2 ? CPUINFO_AVX2 : 0);
+
+                if ((bv & 0xe0) == 0xe0) {
+                    info |= (b7 & bit_AVX512F ? CPUINFO_AVX512F : 0);
+                    info |= (b7 & bit_AVX512VL ? CPUINFO_AVX512VL : 0);
+                    info |= (b7 & bit_AVX512BW ? CPUINFO_AVX512BW : 0);
+                    info |= (b7 & bit_AVX512DQ ? CPUINFO_AVX512DQ : 0);
+                    info |= (c7 & bit_AVX512VBMI2 ? CPUINFO_AVX512VBMI2 : 0);
+                }
+
+                /*
+                 * The Intel SDM has added:
+                 *   Processors that enumerate support for Intel® AVX
+                 *   (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
+                 *   guarantee that the 16-byte memory operations performed
+                 *   by the following instructions will always be carried
+                 *   out atomically:
+                 *   - MOVAPD, MOVAPS, and MOVDQA.
+                 *   - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
+                 *   - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
+                 *     with EVEX.128 and k0 (masking disabled).
+                 * Note that these instructions require the linear addresses
+                 * of their memory operands to be 16-byte aligned.
+                 *
+                 * AMD has provided an even stronger guarantee that processors
+                 * with AVX provide 16-byte atomicity for all cacheable,
+                 * naturally aligned single loads and stores, e.g. MOVDQU.
+                 *
+                 * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
+                 */
+                __cpuid(0, a, b, c, d);
+                if (c == signature_INTEL_ecx) {
+                    info |= CPUINFO_ATOMIC_VMOVDQA;
+                } else if (c == signature_AMD_ecx) {
+                    info |= CPUINFO_ATOMIC_VMOVDQA | CPUINFO_ATOMIC_VMOVDQU;
+                }
+            }
+        }
+    }
+
+    max = __get_cpuid_max(0x8000000, 0);
+    if (max >= 1) {
+        __cpuid(0x80000001, a, b, c, d);
+        info |= (c & bit_LZCNT ? CPUINFO_LZCNT : 0);
+    }
+#endif
+
+    info |= CPUINFO_ALWAYS;
+    cpuinfo = info;
+    return info;
+}
diff --git a/util/cpuinfo-loongarch.c b/util/cpuinfo-loongarch.c
new file mode 100644 (file)
index 0000000..08b6d74
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Host specific cpu identification for LoongArch.
+ */
+
+#include "qemu/osdep.h"
+#include "host/cpuinfo.h"
+
+#ifdef CONFIG_GETAUXVAL
+# include <sys/auxv.h>
+#else
+# include "elf.h"
+#endif
+#include <asm/hwcap.h>
+
+unsigned cpuinfo;
+
+/* Called both as constructor and (possibly) via other constructors. */
+unsigned __attribute__((constructor)) cpuinfo_init(void)
+{
+    unsigned info = cpuinfo;
+    unsigned long hwcap;
+
+    if (info) {
+        return info;
+    }
+
+    hwcap = qemu_getauxval(AT_HWCAP);
+
+    info = CPUINFO_ALWAYS;
+    info |= (hwcap & HWCAP_LOONGARCH_LSX ? CPUINFO_LSX : 0);
+
+    cpuinfo = info;
+    return info;
+}
diff --git a/util/cpuinfo-ppc.c b/util/cpuinfo-ppc.c
new file mode 100644 (file)
index 0000000..b2d8893
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Host specific cpu identification for ppc.
+ */
+
+#include "qemu/osdep.h"
+#include "host/cpuinfo.h"
+
+#include <asm/cputable.h>
+#ifdef CONFIG_GETAUXVAL
+# include <sys/auxv.h>
+#else
+# include "elf.h"
+#endif
+
+unsigned cpuinfo;
+
+/* Called both as constructor and (possibly) via other constructors. */
+unsigned __attribute__((constructor)) cpuinfo_init(void)
+{
+    unsigned info = cpuinfo;
+    unsigned long hwcap, hwcap2;
+
+    if (info) {
+        return info;
+    }
+
+    hwcap = qemu_getauxval(AT_HWCAP);
+    hwcap2 = qemu_getauxval(AT_HWCAP2);
+    info = CPUINFO_ALWAYS;
+
+    /* Version numbers are monotonic, and so imply all lower versions. */
+    if (hwcap2 & PPC_FEATURE2_ARCH_3_1) {
+        info |= CPUINFO_V3_1 | CPUINFO_V3_0 | CPUINFO_V2_07 | CPUINFO_V2_06;
+    } else if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
+        info |= CPUINFO_V3_0 | CPUINFO_V2_07 | CPUINFO_V2_06;
+    } else if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
+        info |= CPUINFO_V2_07 | CPUINFO_V2_06;
+    } else if (hwcap & PPC_FEATURE_ARCH_2_06) {
+        info |= CPUINFO_V2_06;
+    }
+
+    if (hwcap2 & PPC_FEATURE2_ISEL) {
+        info |= CPUINFO_ISEL;
+    }
+    if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
+        info |= CPUINFO_ALTIVEC;
+        /* We only care about the portion of VSX that overlaps Altivec. */
+        if (hwcap & PPC_FEATURE_HAS_VSX) {
+            info |= CPUINFO_VSX;
+            /*
+             * We use VSX especially for little-endian, but we should
+             * always have both anyway, since VSX came with Power7
+             * and crypto came with Power8.
+             */
+            if (hwcap2 & PPC_FEATURE2_VEC_CRYPTO) {
+                info |= CPUINFO_CRYPTO;
+            }
+        }
+    }
+
+    cpuinfo = info;
+    return info;
+}
diff --git a/util/crc-ccitt.c b/util/crc-ccitt.c
new file mode 100644 (file)
index 0000000..b981d8a
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * CRC16 (CCITT) Checksum Algorithm
+ *
+ * Copyright (c) 2021 Wind River Systems, Inc.
+ *
+ * Author:
+ *   Bin Meng <bin.meng@windriver.com>
+ *
+ * From Linux kernel v5.10 lib/crc-ccitt.c
+ *
+ * SPDX-License-Identifier: GPL-2.0-only
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/crc-ccitt.h"
+
+/*
+ * This mysterious table is just the CRC of each possible byte. It can be
+ * computed using the standard bit-at-a-time methods. The polynomial can
+ * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12.
+ * Add the implicit x^16, and you have the standard CRC-CCITT.
+ */
+uint16_t const crc_ccitt_table[256] = {
+    0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+    0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+    0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+    0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+    0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+    0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+    0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+    0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+    0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+    0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+    0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+    0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+    0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+    0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+    0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+    0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+    0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+    0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+    0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+    0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+    0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+    0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+    0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+    0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+    0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+    0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+    0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+    0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+    0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+    0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+    0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+    0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
+};
+
+/*
+ * Similar table to calculate CRC16 variant known as CRC-CCITT-FALSE
+ * Reflected bits order, does not augment final value.
+ */
+uint16_t const crc_ccitt_false_table[256] = {
+    0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
+    0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
+    0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
+    0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
+    0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
+    0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
+    0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
+    0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
+    0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
+    0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
+    0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
+    0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
+    0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
+    0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
+    0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
+    0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
+    0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
+    0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
+    0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
+    0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
+    0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
+    0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
+    0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
+    0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
+    0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
+    0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
+    0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
+    0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
+    0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
+    0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
+    0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
+    0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
+};
+
+/**
+ * crc_ccitt - recompute the CRC (CRC-CCITT variant)
+ * for the data buffer
+ *
+ * @crc: previous CRC value
+ * @buffer: data pointer
+ * @len: number of bytes in the buffer
+ */
+uint16_t crc_ccitt(uint16_t crc, uint8_t const *buffer, size_t len)
+{
+    while (len--) {
+        crc = crc_ccitt_byte(crc, *buffer++);
+    }
+    return crc;
+}
+
+/**
+ * crc_ccitt_false - recompute the CRC (CRC-CCITT-FALSE variant)
+ * for the data buffer
+ *
+ * @crc: previous CRC value
+ * @buffer: data pointer
+ * @len: number of bytes in the buffer
+ */
+uint16_t crc_ccitt_false(uint16_t crc, uint8_t const *buffer, size_t len)
+{
+    while (len--) {
+        crc = crc_ccitt_false_byte(crc, *buffer++);
+    }
+    return crc;
+}
index 762657d853d6fdc8c18f3b4d7cb528327879c21f..ea7f345de86073998250f54a5a19aef0a5bf164f 100644 (file)
@@ -113,3 +113,11 @@ uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length)
     return crc^0xffffffff;
 }
 
+uint32_t iov_crc32c(uint32_t crc, const struct iovec *iov, size_t iov_cnt)
+{
+    while (iov_cnt--) {
+        crc = crc32c(crc, iov->iov_base, iov->iov_len) ^ 0xffffffff;
+        iov++;
+    }
+    return crc ^ 0xffffffff;
+}
index 0b5073b33012a91a8d6a59d74965bb72327a996f..42364039a598814451c65b8935251cd3df96a5d3 100644 (file)
 #include "qemu/host-utils.h"
 #include <math.h>
 
-#include "qemu-common.h"
-#include "qemu/sockets.h"
-#include "qemu/iov.h"
-#include "net/net.h"
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#include <sys/user.h>
+#endif
+
+#ifdef __NetBSD__
+#include <sys/sysctl.h>
+#endif
+
+#ifdef __HAIKU__
+#include <kernel/image.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#endif
+
+#ifdef G_OS_WIN32
+#include <pathcch.h>
+#include <wchar.h>
+#endif
+
 #include "qemu/ctype.h"
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
@@ -148,77 +166,6 @@ time_t mktimegm(struct tm *tm)
     return t;
 }
 
-/*
- * Make sure data goes on disk, but if possible do not bother to
- * write out the inode just for timestamp updates.
- *
- * Unfortunately even in 2009 many operating systems do not support
- * fdatasync and have to fall back to fsync.
- */
-int qemu_fdatasync(int fd)
-{
-#ifdef CONFIG_FDATASYNC
-    return fdatasync(fd);
-#else
-    return fsync(fd);
-#endif
-}
-
-/**
- * Sync changes made to the memory mapped file back to the backing
- * storage. For POSIX compliant systems this will fallback
- * to regular msync call. Otherwise it will trigger whole file sync
- * (including the metadata case there is no support to skip that otherwise)
- *
- * @addr   - start of the memory area to be synced
- * @length - length of the are to be synced
- * @fd     - file descriptor for the file to be synced
- *           (mandatory only for POSIX non-compliant systems)
- */
-int qemu_msync(void *addr, size_t length, int fd)
-{
-#ifdef CONFIG_POSIX
-    size_t align_mask = ~(qemu_real_host_page_size - 1);
-
-    /**
-     * There are no strict reqs as per the length of mapping
-     * to be synced. Still the length needs to follow the address
-     * alignment changes. Additionally - round the size to the multiple
-     * of PAGE_SIZE
-     */
-    length += ((uintptr_t)addr & (qemu_real_host_page_size - 1));
-    length = (length + ~align_mask) & align_mask;
-
-    addr = (void *)((uintptr_t)addr & align_mask);
-
-    return msync(addr, length, MS_SYNC);
-#else /* CONFIG_POSIX */
-    /**
-     * Perform the sync based on the file descriptor
-     * The sync range will most probably be wider than the one
-     * requested - but it will still get the job done
-     */
-    return qemu_fdatasync(fd);
-#endif /* CONFIG_POSIX */
-}
-
-#ifndef _WIN32
-/* Sets a specific flag */
-int fcntl_setfl(int fd, int flag)
-{
-    int flags;
-
-    flags = fcntl(fd, F_GETFL);
-    if (flags == -1)
-        return -errno;
-
-    if (fcntl(fd, F_SETFL, flags | flag) == -1)
-        return -errno;
-
-    return 0;
-}
-#endif
-
 static int64_t suffix_mul(char suffix, int64_t unit)
 {
     switch (qemu_toupper(suffix)) {
@@ -241,10 +188,37 @@ static int64_t suffix_mul(char suffix, int64_t unit)
 }
 
 /*
- * Convert string to bytes, allowing either B/b for bytes, K/k for KB,
- * M/m for MB, G/g for GB or T/t for TB. End pointer will be returned
- * in *end, if not NULL. Return -ERANGE on overflow, and -EINVAL on
- * other error.
+ * Convert size string to bytes.
+ *
+ * The size parsing supports the following syntaxes
+ * - 12345 - decimal, scale determined by @default_suffix and @unit
+ * - 12345{bBkKmMgGtTpPeE} - decimal, scale determined by suffix and @unit
+ * - 12345.678{kKmMgGtTpPeE} - decimal, scale determined by suffix, and
+ *   fractional portion is truncated to byte, either side of . may be empty
+ * - 0x7fEE - hexadecimal, unit determined by @default_suffix
+ *
+ * The following are intentionally not supported
+ * - hex with scaling suffix, such as 0x20M or 0x1p3 (both fail with
+ *   -EINVAL), while 0x1b is 27 (not 1 with byte scale)
+ * - octal, such as 08 (parsed as decimal instead)
+ * - binary, such as 0b1000 (parsed as 0b with trailing garbage "1000")
+ * - fractional hex, such as 0x1.8 (parsed as 0 with trailing garbage "x1.8")
+ * - negative values, including -0 (fail with -ERANGE)
+ * - floating point exponents, such as 1e3 (parsed as 1e with trailing
+ *   garbage "3") or 0x1p3 (rejected as hex with scaling suffix)
+ * - non-finite values, such as inf or NaN (fail with -EINVAL)
+ *
+ * The end pointer will be returned in *end, if not NULL.  If there is
+ * no fraction, the input can be decimal or hexadecimal; if there is a
+ * non-zero fraction, then the input must be decimal and there must be
+ * a suffix (possibly by @default_suffix) larger than Byte, and the
+ * fractional portion may suffer from precision loss or rounding.  The
+ * input must be positive.
+ *
+ * Return -ERANGE on overflow (with *@end advanced), and -EINVAL on
+ * other error (with *@end at @nptr).  Unlike strtoull, *@result is
+ * set to 0 on all errors, as returning UINT64_MAX on overflow is less
+ * likely to be usable as a size.
  */
 static int do_strtosz(const char *nptr, const char **end,
                       const char default_suffix, int64_t unit,
@@ -253,48 +227,140 @@ static int do_strtosz(const char *nptr, const char **end,
     int retval;
     const char *endptr;
     unsigned char c;
-    int mul_required = 0;
-    double val, mul, integral, fraction;
+    uint64_t val = 0, valf = 0;
+    int64_t mul;
 
-    retval = qemu_strtod_finite(nptr, &endptr, &val);
-    if (retval) {
+    /* Parse integral portion as decimal. */
+    retval = parse_uint(nptr, &endptr, 10, &val);
+    if (retval == -ERANGE || !nptr) {
         goto out;
     }
-    fraction = modf(val, &integral);
-    if (fraction != 0) {
-        mul_required = 1;
+    if (retval == 0 && val == 0 && (*endptr == 'x' || *endptr == 'X')) {
+        /* Input looks like hex; reparse, and insist on no fraction or suffix. */
+        retval = qemu_strtou64(nptr, &endptr, 16, &val);
+        if (retval) {
+            goto out;
+        }
+        if (*endptr == '.' || suffix_mul(*endptr, unit) > 0) {
+            endptr = nptr;
+            retval = -EINVAL;
+            goto out;
+        }
+    } else if (*endptr == '.' || (endptr == nptr && strchr(nptr, '.'))) {
+        /*
+         * Input looks like a fraction.  Make sure even 1.k works
+         * without fractional digits.  strtod tries to treat 'e' as an
+         * exponent, but we want to treat it as a scaling suffix;
+         * doing this requires modifying a copy of the fraction.
+         */
+        double fraction = 0.0;
+
+        if (retval == 0 && *endptr == '.' && !isdigit(endptr[1])) {
+            /* If we got here, we parsed at least one digit already. */
+            endptr++;
+        } else {
+            char *e;
+            const char *tail;
+            g_autofree char *copy = g_strdup(endptr);
+
+            e = strchr(copy, 'e');
+            if (e) {
+                *e = '\0';
+            }
+            e = strchr(copy, 'E');
+            if (e) {
+                *e = '\0';
+            }
+            /*
+             * If this is a floating point, we are guaranteed that '.'
+             * appears before any possible digits in copy.  If it is
+             * not a floating point, strtod will fail.  Either way,
+             * there is now no exponent in copy, so if it parses, we
+             * know 0.0 <= abs(result) <= 1.0 (after rounding), and
+             * ERANGE is only possible on underflow which is okay.
+             */
+            retval = qemu_strtod_finite(copy, &tail, &fraction);
+            endptr += tail - copy;
+            if (signbit(fraction)) {
+                retval = -ERANGE;
+                goto out;
+            }
+        }
+
+        /* Extract into a 64-bit fixed-point fraction. */
+        if (fraction == 1.0) {
+            if (val == UINT64_MAX) {
+                retval = -ERANGE;
+                goto out;
+            }
+            val++;
+        } else if (retval == -ERANGE) {
+            /* See comments above about underflow */
+            valf = 1;
+            retval = 0;
+        } else {
+            /* We want non-zero valf for any non-zero fraction */
+            valf = (uint64_t)(fraction * 0x1p64);
+            if (valf == 0 && fraction > 0.0) {
+                valf = 1;
+            }
+        }
+    }
+    if (retval) {
+        goto out;
     }
     c = *endptr;
     mul = suffix_mul(c, unit);
-    if (mul >= 0) {
+    if (mul > 0) {
         endptr++;
     } else {
         mul = suffix_mul(default_suffix, unit);
-        assert(mul >= 0);
+        assert(mul > 0);
     }
-    if (mul == 1 && mul_required) {
-        retval = -EINVAL;
-        goto out;
-    }
-    /*
-     * Values near UINT64_MAX overflow to 2**64 when converting to double
-     * precision.  Compare against the maximum representable double precision
-     * value below 2**64, computed as "the next value after 2**64 (0x1p64) in
-     * the direction of 0".
-     */
-    if ((val * mul > nextafter(0x1p64, 0)) || val < 0) {
-        retval = -ERANGE;
-        goto out;
+    if (mul == 1) {
+        /* When a fraction is present, a scale is required. */
+        if (valf != 0) {
+            endptr = nptr;
+            retval = -EINVAL;
+            goto out;
+        }
+    } else {
+        uint64_t valh, tmp;
+
+        /* Compute exact result: 64.64 x 64.0 -> 128.64 fixed point */
+        mulu64(&val, &valh, val, mul);
+        mulu64(&valf, &tmp, valf, mul);
+        val += tmp;
+        valh += val < tmp;
+
+        /* Round 0.5 upward. */
+        tmp = valf >> 63;
+        val += tmp;
+        valh += val < tmp;
+
+        /* Report overflow. */
+        if (valh != 0) {
+            retval = -ERANGE;
+            goto out;
+        }
     }
-    *result = val * mul;
+
     retval = 0;
 
 out:
     if (end) {
         *end = endptr;
-    } else if (*endptr) {
+    } else if (nptr && *endptr) {
         retval = -EINVAL;
     }
+    if (retval == 0) {
+        *result = val;
+    } else {
+        *result = 0;
+        if (end && retval == -EINVAL) {
+            *end = nptr;
+        }
+    }
 
     return retval;
 }
@@ -318,9 +384,22 @@ int qemu_strtosz_metric(const char *nptr, const char **end, uint64_t *result)
  * Helper function for error checking after strtol() and the like
  */
 static int check_strtox_error(const char *nptr, char *ep,
-                              const char **endptr, int libc_errno)
+                              const char **endptr, bool check_zero,
+                              int libc_errno)
 {
     assert(ep >= nptr);
+
+    /* Windows has a bug in that it fails to parse 0 from "0x" in base 16 */
+    if (check_zero && ep == nptr && libc_errno == 0) {
+        char *tmp;
+
+        errno = 0;
+        if (strtol(nptr, &tmp, 10) == 0 && errno == 0 &&
+            (*tmp == 'x' || *tmp == 'X')) {
+            ep = tmp;
+        }
+    }
+
     if (endptr) {
         *endptr = ep;
     }
@@ -347,12 +426,13 @@ static int check_strtox_error(const char *nptr, char *ep,
  *
  * @nptr may be null, and no conversion is performed then.
  *
- * If no conversion is performed, store @nptr in *@endptr and return
- * -EINVAL.
+ * If no conversion is performed, store @nptr in *@endptr, 0 in
+ * @result, and return -EINVAL.
  *
  * If @endptr is null, and the string isn't fully converted, return
- * -EINVAL.  This is the case when the pointer that would be stored in
- * a non-null @endptr points to a character other than '\0'.
+ * -EINVAL with @result set to the parsed value.  This is the case
+ * when the pointer that would be stored in a non-null @endptr points
+ * to a character other than '\0'.
  *
  * If the conversion overflows @result, store INT_MAX in @result,
  * and return -ERANGE.
@@ -361,6 +441,9 @@ static int check_strtox_error(const char *nptr, char *ep,
  * and return -ERANGE.
  *
  * Else store the converted value in @result, and return zero.
+ *
+ * This matches the behavior of strtol() on 32-bit platforms, even on
+ * platforms where long is 64-bits.
  */
 int qemu_strtoi(const char *nptr, const char **endptr, int base,
                 int *result)
@@ -370,6 +453,7 @@ int qemu_strtoi(const char *nptr, const char **endptr, int base,
 
     assert((unsigned) base <= 36 && base != 1);
     if (!nptr) {
+        *result = 0;
         if (endptr) {
             *endptr = nptr;
         }
@@ -387,7 +471,7 @@ int qemu_strtoi(const char *nptr, const char **endptr, int base,
     } else {
         *result = lresult;
     }
-    return check_strtox_error(nptr, ep, endptr, errno);
+    return check_strtox_error(nptr, ep, endptr, lresult == 0, errno);
 }
 
 /**
@@ -399,12 +483,13 @@ int qemu_strtoi(const char *nptr, const char **endptr, int base,
  *
  * @nptr may be null, and no conversion is performed then.
  *
- * If no conversion is performed, store @nptr in *@endptr and return
- * -EINVAL.
+ * If no conversion is performed, store @nptr in *@endptr, 0 in
+ * @result, and return -EINVAL.
  *
  * If @endptr is null, and the string isn't fully converted, return
- * -EINVAL.  This is the case when the pointer that would be stored in
- * a non-null @endptr points to a character other than '\0'.
+ * -EINVAL with @result set to the parsed value.  This is the case
+ * when the pointer that would be stored in a non-null @endptr points
+ * to a character other than '\0'.
  *
  * If the conversion overflows @result, store UINT_MAX in @result,
  * and return -ERANGE.
@@ -413,16 +498,19 @@ int qemu_strtoi(const char *nptr, const char **endptr, int base,
  *
  * Note that a number with a leading minus sign gets converted without
  * the minus sign, checked for overflow (see above), then negated (in
- * @result's type).  This is exactly how strtoul() works.
+ * @result's type).  This matches the behavior of strtoul() on 32-bit
+ * platforms, even on platforms where long is 64-bits.
  */
 int qemu_strtoui(const char *nptr, const char **endptr, int base,
                  unsigned int *result)
 {
     char *ep;
-    long long lresult;
+    unsigned long long lresult;
+    bool neg;
 
     assert((unsigned) base <= 36 && base != 1);
     if (!nptr) {
+        *result = 0;
         if (endptr) {
             *endptr = nptr;
         }
@@ -436,17 +524,25 @@ int qemu_strtoui(const char *nptr, const char **endptr, int base,
     if (errno == ERANGE) {
         *result = -1;
     } else {
+        /*
+         * Note that platforms with 32-bit strtoul only accept input
+         * in the range [-4294967295, 4294967295]; but we used 64-bit
+         * strtoull which wraps -18446744073709551615 to 1 instead of
+         * declaring overflow.  So we must check if '-' was parsed,
+         * and if so, undo the negation before doing our bounds check.
+         */
+        neg = memchr(nptr, '-', ep - nptr) != NULL;
+        if (neg) {
+            lresult = -lresult;
+        }
         if (lresult > UINT_MAX) {
             *result = UINT_MAX;
             errno = ERANGE;
-        } else if (lresult < INT_MIN) {
-            *result = UINT_MAX;
-            errno = ERANGE;
         } else {
-            *result = lresult;
+            *result = neg ? -lresult : lresult;
         }
     }
-    return check_strtox_error(nptr, ep, endptr, errno);
+    return check_strtox_error(nptr, ep, endptr, lresult == 0, errno);
 }
 
 /**
@@ -458,12 +554,13 @@ int qemu_strtoui(const char *nptr, const char **endptr, int base,
  *
  * @nptr may be null, and no conversion is performed then.
  *
- * If no conversion is performed, store @nptr in *@endptr and return
- * -EINVAL.
+ * If no conversion is performed, store @nptr in *@endptr, 0 in
+ * @result, and return -EINVAL.
  *
  * If @endptr is null, and the string isn't fully converted, return
- * -EINVAL.  This is the case when the pointer that would be stored in
- * a non-null @endptr points to a character other than '\0'.
+ * -EINVAL with @result set to the parsed value.  This is the case
+ * when the pointer that would be stored in a non-null @endptr points
+ * to a character other than '\0'.
  *
  * If the conversion overflows @result, store LONG_MAX in @result,
  * and return -ERANGE.
@@ -480,6 +577,7 @@ int qemu_strtol(const char *nptr, const char **endptr, int base,
 
     assert((unsigned) base <= 36 && base != 1);
     if (!nptr) {
+        *result = 0;
         if (endptr) {
             *endptr = nptr;
         }
@@ -488,7 +586,7 @@ int qemu_strtol(const char *nptr, const char **endptr, int base,
 
     errno = 0;
     *result = strtol(nptr, &ep, base);
-    return check_strtox_error(nptr, ep, endptr, errno);
+    return check_strtox_error(nptr, ep, endptr, *result == 0, errno);
 }
 
 /**
@@ -500,12 +598,13 @@ int qemu_strtol(const char *nptr, const char **endptr, int base,
  *
  * @nptr may be null, and no conversion is performed then.
  *
- * If no conversion is performed, store @nptr in *@endptr and return
- * -EINVAL.
+ * If no conversion is performed, store @nptr in *@endptr, 0 in
+ * @result, and return -EINVAL.
  *
  * If @endptr is null, and the string isn't fully converted, return
- * -EINVAL.  This is the case when the pointer that would be stored in
- * a non-null @endptr points to a character other than '\0'.
+ * -EINVAL with @result set to the parsed value.  This is the case
+ * when the pointer that would be stored in a non-null @endptr points
+ * to a character other than '\0'.
  *
  * If the conversion overflows @result, store ULONG_MAX in @result,
  * and return -ERANGE.
@@ -523,6 +622,7 @@ int qemu_strtoul(const char *nptr, const char **endptr, int base,
 
     assert((unsigned) base <= 36 && base != 1);
     if (!nptr) {
+        *result = 0;
         if (endptr) {
             *endptr = nptr;
         }
@@ -535,7 +635,7 @@ int qemu_strtoul(const char *nptr, const char **endptr, int base,
     if (errno == ERANGE) {
         *result = -1;
     }
-    return check_strtox_error(nptr, ep, endptr, errno);
+    return check_strtox_error(nptr, ep, endptr, *result == 0, errno);
 }
 
 /**
@@ -551,6 +651,7 @@ int qemu_strtoi64(const char *nptr, const char **endptr, int base,
 
     assert((unsigned) base <= 36 && base != 1);
     if (!nptr) {
+        *result = 0;
         if (endptr) {
             *endptr = nptr;
         }
@@ -561,13 +662,15 @@ int qemu_strtoi64(const char *nptr, const char **endptr, int base,
     QEMU_BUILD_BUG_ON(sizeof(int64_t) != sizeof(long long));
     errno = 0;
     *result = strtoll(nptr, &ep, base);
-    return check_strtox_error(nptr, ep, endptr, errno);
+    return check_strtox_error(nptr, ep, endptr, *result == 0, errno);
 }
 
 /**
  * Convert string @nptr to an uint64_t.
  *
  * Works like qemu_strtoul(), except it stores UINT64_MAX on overflow.
+ * (If you want to prohibit negative numbers that wrap around to
+ * positive, use parse_uint()).
  */
 int qemu_strtou64(const char *nptr, const char **endptr, int base,
                   uint64_t *result)
@@ -576,6 +679,7 @@ int qemu_strtou64(const char *nptr, const char **endptr, int base,
 
     assert((unsigned) base <= 36 && base != 1);
     if (!nptr) {
+        *result = 0;
         if (endptr) {
             *endptr = nptr;
         }
@@ -590,7 +694,7 @@ int qemu_strtou64(const char *nptr, const char **endptr, int base,
     if (errno == ERANGE) {
         *result = -1;
     }
-    return check_strtox_error(nptr, ep, endptr, errno);
+    return check_strtox_error(nptr, ep, endptr, *result == 0, errno);
 }
 
 /**
@@ -602,12 +706,13 @@ int qemu_strtou64(const char *nptr, const char **endptr, int base,
  *
  * @nptr may be null, and no conversion is performed then.
  *
- * If no conversion is performed, store @nptr in *@endptr and return
- * -EINVAL.
+ * If no conversion is performed, store @nptr in *@endptr, +0.0 in
+ * @result, and return -EINVAL.
  *
  * If @endptr is null, and the string isn't fully converted, return
- * -EINVAL. This is the case when the pointer that would be stored in
- * a non-null @endptr points to a character other than '\0'.
+ * -EINVAL with @result set to the parsed value.  This is the case
+ * when the pointer that would be stored in a non-null @endptr points
+ * to a character other than '\0'.
  *
  * If the conversion overflows, store +/-HUGE_VAL in @result, depending
  * on the sign, and return -ERANGE.
@@ -622,6 +727,7 @@ int qemu_strtod(const char *nptr, const char **endptr, double *result)
     char *ep;
 
     if (!nptr) {
+        *result = 0.0;
         if (endptr) {
             *endptr = nptr;
         }
@@ -630,30 +736,34 @@ int qemu_strtod(const char *nptr, const char **endptr, double *result)
 
     errno = 0;
     *result = strtod(nptr, &ep);
-    return check_strtox_error(nptr, ep, endptr, errno);
+    return check_strtox_error(nptr, ep, endptr, false, errno);
 }
 
 /**
  * Convert string @nptr to a finite double.
  *
- * Works like qemu_strtod(), except that "NaN" and "inf" are rejected
- * with -EINVAL and no conversion is performed.
+ * Works like qemu_strtod(), except that "NaN", "inf", and strings
+ * that cause ERANGE overflow errors are rejected with -EINVAL as if
+ * no conversion is performed, storing 0.0 into @result regardless of
+ * any sign.  -ERANGE failures for underflow still preserve the parsed
+ * sign.
  */
 int qemu_strtod_finite(const char *nptr, const char **endptr, double *result)
 {
-    double tmp;
+    const char *tmp;
     int ret;
 
-    ret = qemu_strtod(nptr, endptr, &tmp);
-    if (!ret && !isfinite(tmp)) {
+    ret = qemu_strtod(nptr, &tmp, result);
+    if (!isfinite(*result)) {
         if (endptr) {
             *endptr = nptr;
         }
+        *result = 0.0;
+        ret = -EINVAL;
+    } else if (endptr) {
+        *endptr = tmp;
+    } else if (*tmp) {
         ret = -EINVAL;
-    }
-
-    if (ret != -EINVAL) {
-        *result = tmp;
     }
     return ret;
 }
@@ -677,32 +787,33 @@ const char *qemu_strchrnul(const char *s, int c)
  * parse_uint:
  *
  * @s: String to parse
- * @value: Destination for parsed integer value
  * @endptr: Destination for pointer to first character not consumed
  * @base: integer base, between 2 and 36 inclusive, or 0
+ * @value: Destination for parsed integer value
  *
  * Parse unsigned integer
  *
  * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional
  * '+' or '-', an optional "0x" if @base is 0 or 16, one or more digits.
  *
- * If @s is null, or @base is invalid, or @s doesn't start with an
- * integer in the syntax above, set *@value to 0, *@endptr to @s, and
- * return -EINVAL.
+ * If @s is null, or @s doesn't start with an integer in the syntax
+ * above, set *@value to 0, *@endptr to @s, and return -EINVAL.
  *
  * Set *@endptr to point right beyond the parsed integer (even if the integer
  * overflows or is negative, all digits will be parsed and *@endptr will
- * point right beyond them).
+ * point right beyond them).  If @endptr is %NULL, any trailing character
+ * instead causes a result of -EINVAL with *@value of 0.
  *
  * If the integer is negative, set *@value to 0, and return -ERANGE.
+ * (If you want to allow negative numbers that wrap around within
+ * bounds, use qemu_strtou64()).
  *
  * If the integer overflows unsigned long long, set *@value to
  * ULLONG_MAX, and return -ERANGE.
  *
  * Else, set *@value to the parsed integer, and return 0.
  */
-int parse_uint(const char *s, unsigned long long *value, char **endptr,
-               int base)
+int parse_uint(const char *s, const char **endptr, int base, uint64_t *value)
 {
     int r = 0;
     char *endp = (char *)s;
@@ -738,7 +849,12 @@ int parse_uint(const char *s, unsigned long long *value, char **endptr,
 
 out:
     *value = val;
-    *endptr = endp;
+    if (endptr) {
+        *endptr = endp;
+    } else if (s && *endp) {
+        r = -EINVAL;
+        *value = 0;
+    }
     return r;
 }
 
@@ -746,31 +862,16 @@ out:
  * parse_uint_full:
  *
  * @s: String to parse
- * @value: Destination for parsed integer value
  * @base: integer base, between 2 and 36 inclusive, or 0
+ * @value: Destination for parsed integer value
  *
- * Parse unsigned integer from entire string
+ * Parse unsigned integer from entire string, rejecting any trailing slop.
  *
- * Have the same behavior of parse_uint(), but with an additional check
- * for additional data after the parsed number. If extra characters are present
- * after the parsed number, the function will return -EINVAL, and *@v will
- * be set to 0.
+ * Shorthand for parse_uint(s, NULL, base, value).
  */
-int parse_uint_full(const char *s, unsigned long long *value, int base)
+int parse_uint_full(const char *s, int base, uint64_t *value)
 {
-    char *endp;
-    int r;
-
-    r = parse_uint(s, value, &endp, base);
-    if (r < 0) {
-        return r;
-    }
-    if (*endp) {
-        *value = 0;
-        return -EINVAL;
-    }
-
-    return 0;
+    return parse_uint(s, NULL, base, value);
 }
 
 int qemu_parse_fd(const char *param)
@@ -847,17 +948,23 @@ int parse_debug_env(const char *name, int max, int initial)
     return debug;
 }
 
-/*
- * Helper to print ethernet mac address
- */
-const char *qemu_ether_ntoa(const MACAddr *mac)
+const char *si_prefix(unsigned int exp10)
 {
-    static char ret[18];
+    static const char *prefixes[] = {
+        "a", "f", "p", "n", "u", "m", "", "K", "M", "G", "T", "P", "E"
+    };
+
+    exp10 += 18;
+    assert(exp10 % 3 == 0 && exp10 / 3 < ARRAY_SIZE(prefixes));
+    return prefixes[exp10 / 3];
+}
 
-    snprintf(ret, sizeof(ret), "%02x:%02x:%02x:%02x:%02x:%02x",
-             mac->a[0], mac->a[1], mac->a[2], mac->a[3], mac->a[4], mac->a[5]);
+const char *iec_binary_prefix(unsigned int exp2)
+{
+    static const char *prefixes[] = { "", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei" };
 
-    return ret;
+    assert(exp2 % 10 == 0 && exp2 / 10 < ARRAY_SIZE(prefixes));
+    return prefixes[exp2 / 10];
 }
 
 /*
@@ -868,7 +975,6 @@ const char *qemu_ether_ntoa(const MACAddr *mac)
  */
 char *size_to_str(uint64_t val)
 {
-    static const char *suffixes[] = { "", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei" };
     uint64_t div;
     int i;
 
@@ -879,25 +985,23 @@ char *size_to_str(uint64_t val)
      * (see e41b509d68afb1f for more info)
      */
     frexp(val / (1000.0 / 1024.0), &i);
-    i = (i - 1) / 10;
-    div = 1ULL << (i * 10);
+    i = (i - 1) / 10 * 10;
+    div = 1ULL << i;
 
-    return g_strdup_printf("%0.3g %sB", (double)val / div, suffixes[i]);
+    return g_strdup_printf("%0.3g %sB", (double)val / div, iec_binary_prefix(i));
 }
 
 char *freq_to_str(uint64_t freq_hz)
 {
-    static const char *const suffixes[] = { "", "K", "M", "G", "T", "P", "E" };
     double freq = freq_hz;
-    size_t idx = 0;
+    size_t exp10 = 0;
 
     while (freq >= 1000.0) {
         freq /= 1000.0;
-        idx++;
+        exp10 += 3;
     }
-    assert(idx < ARRAY_SIZE(suffixes));
 
-    return g_strdup_printf("%0.3g %sHz", freq, suffixes[idx]);
+    return g_strdup_printf("%0.3g %sHz", freq, si_prefix(exp10));
 }
 
 int qemu_pstrcmp0(const char **str1, const char **str2)
@@ -908,15 +1012,25 @@ int qemu_pstrcmp0(const char **str1, const char **str2)
 static inline bool starts_with_prefix(const char *dir)
 {
     size_t prefix_len = strlen(CONFIG_PREFIX);
+    /*
+     * dir[prefix_len] is only accessed if the length of dir is
+     * >= prefix_len, so no out of bounds access is possible.
+     */
+#pragma GCC diagnostic push
+#if !defined(__clang__) || __has_warning("-Warray-bounds=")
+#pragma GCC diagnostic ignored "-Warray-bounds="
+#endif
     return !memcmp(dir, CONFIG_PREFIX, prefix_len) &&
         (!dir[prefix_len] || G_IS_DIR_SEPARATOR(dir[prefix_len]));
+#pragma GCC diagnostic pop
 }
 
 /* Return the next path component in dir, and store its length in *p_len.  */
 static inline const char *next_component(const char *dir, int *p_len)
 {
     int len;
-    while (*dir && G_IS_DIR_SEPARATOR(*dir)) {
+    while ((*dir && G_IS_DIR_SEPARATOR(*dir)) ||
+           (*dir == '.' && (G_IS_DIR_SEPARATOR(dir[1]) || dir[1] == '\0'))) {
         dir++;
     }
     len = 0;
@@ -927,41 +1041,178 @@ static inline const char *next_component(const char *dir, int *p_len)
     return dir;
 }
 
+static const char *exec_dir;
+
+void qemu_init_exec_dir(const char *argv0)
+{
+#ifdef G_OS_WIN32
+    char *p;
+    char buf[MAX_PATH];
+    DWORD len;
+
+    if (exec_dir) {
+        return;
+    }
+
+    len = GetModuleFileName(NULL, buf, sizeof(buf) - 1);
+    if (len == 0) {
+        return;
+    }
+
+    buf[len] = 0;
+    p = buf + len - 1;
+    while (p != buf && *p != '\\') {
+        p--;
+    }
+    *p = 0;
+    if (access(buf, R_OK) == 0) {
+        exec_dir = g_strdup(buf);
+    } else {
+        exec_dir = CONFIG_BINDIR;
+    }
+#else
+    char *p = NULL;
+    char buf[PATH_MAX];
+
+    if (exec_dir) {
+        return;
+    }
+
+#if defined(__linux__)
+    {
+        int len;
+        len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
+        if (len > 0) {
+            buf[len] = 0;
+            p = buf;
+        }
+    }
+#elif defined(__FreeBSD__) \
+      || (defined(__NetBSD__) && defined(KERN_PROC_PATHNAME))
+    {
+#if defined(__FreeBSD__)
+        static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+#else
+        static int mib[4] = {CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME};
+#endif
+        size_t len = sizeof(buf) - 1;
+
+        *buf = '\0';
+        if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
+            *buf) {
+            buf[sizeof(buf) - 1] = '\0';
+            p = buf;
+        }
+    }
+#elif defined(__APPLE__)
+    {
+        char fpath[PATH_MAX];
+        uint32_t len = sizeof(fpath);
+        if (_NSGetExecutablePath(fpath, &len) == 0) {
+            p = realpath(fpath, buf);
+            if (!p) {
+                return;
+            }
+        }
+    }
+#elif defined(__HAIKU__)
+    {
+        image_info ii;
+        int32_t c = 0;
+
+        *buf = '\0';
+        while (get_next_image_info(0, &c, &ii) == B_OK) {
+            if (ii.type == B_APP_IMAGE) {
+                strncpy(buf, ii.name, sizeof(buf));
+                buf[sizeof(buf) - 1] = 0;
+                p = buf;
+                break;
+            }
+        }
+    }
+#endif
+    /* If we don't have any way of figuring out the actual executable
+       location then try argv[0].  */
+    if (!p && argv0) {
+        p = realpath(argv0, buf);
+    }
+    if (p) {
+        exec_dir = g_path_get_dirname(p);
+    } else {
+        exec_dir = CONFIG_BINDIR;
+    }
+#endif
+}
+
+const char *qemu_get_exec_dir(void)
+{
+    return exec_dir;
+}
+
 char *get_relocated_path(const char *dir)
 {
     size_t prefix_len = strlen(CONFIG_PREFIX);
     const char *bindir = CONFIG_BINDIR;
-    const char *exec_dir = qemu_get_exec_dir();
     GString *result;
     int len_dir, len_bindir;
 
     /* Fail if qemu_init_exec_dir was not called.  */
     assert(exec_dir[0]);
-    if (!starts_with_prefix(dir) || !starts_with_prefix(bindir)) {
-        return g_strdup(dir);
-    }
 
     result = g_string_new(exec_dir);
+    g_string_append(result, "/qemu-bundle");
+    if (access(result->str, R_OK) == 0) {
+#ifdef G_OS_WIN32
+        const char *src = dir;
+        size_t size = mbsrtowcs(NULL, &src, 0, &(mbstate_t){0}) + 1;
+        PWSTR wdir = g_new(WCHAR, size);
+        mbsrtowcs(wdir, &src, size, &(mbstate_t){0});
+
+        PCWSTR wdir_skipped_root;
+        if (PathCchSkipRoot(wdir, &wdir_skipped_root) == S_OK) {
+            size = wcsrtombs(NULL, &wdir_skipped_root, 0, &(mbstate_t){0});
+            char *cursor = result->str + result->len;
+            g_string_set_size(result, result->len + size);
+            wcsrtombs(cursor, &wdir_skipped_root, size + 1, &(mbstate_t){0});
+        } else {
+            g_string_append(result, dir);
+        }
 
-    /* Advance over common components.  */
-    len_dir = len_bindir = prefix_len;
-    do {
-        dir += len_dir;
-        bindir += len_bindir;
-        dir = next_component(dir, &len_dir);
-        bindir = next_component(bindir, &len_bindir);
-    } while (len_dir && len_dir == len_bindir && !memcmp(dir, bindir, len_dir));
-
-    /* Ascend from bindir to the common prefix with dir.  */
-    while (len_bindir) {
-        bindir += len_bindir;
-        g_string_append(result, "/..");
-        bindir = next_component(bindir, &len_bindir);
+        g_free(wdir);
+#else
+        g_string_append(result, dir);
+#endif
+        goto out;
     }
 
-    if (*dir) {
-        assert(G_IS_DIR_SEPARATOR(dir[-1]));
-        g_string_append(result, dir - 1);
+    if (IS_ENABLED(CONFIG_RELOCATABLE) &&
+        starts_with_prefix(dir) && starts_with_prefix(bindir)) {
+        g_string_assign(result, exec_dir);
+
+        /* Advance over common components.  */
+        len_dir = len_bindir = prefix_len;
+        do {
+            dir += len_dir;
+            bindir += len_bindir;
+            dir = next_component(dir, &len_dir);
+            bindir = next_component(bindir, &len_bindir);
+        } while (len_dir && len_dir == len_bindir && !memcmp(dir, bindir, len_dir));
+
+        /* Ascend from bindir to the common prefix with dir.  */
+        while (len_bindir) {
+            bindir += len_bindir;
+            g_string_append(result, "/..");
+            bindir = next_component(bindir, &len_bindir);
+        }
+
+        if (*dir) {
+            assert(G_IS_DIR_SEPARATOR(dir[-1]));
+            g_string_append(result, dir - 1);
+        }
+        goto out;
     }
-    return result->str;
+
+    g_string_assign(result, dir);
+out:
+    return g_string_free(result, false);
 }
diff --git a/util/defer-call.c b/util/defer-call.c
new file mode 100644 (file)
index 0000000..037dc0a
--- /dev/null
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Deferred calls
+ *
+ * Copyright Red Hat.
+ *
+ * This API defers a function call within a defer_call_begin()/defer_call_end()
+ * section, allowing multiple calls to batch up. This is a performance
+ * optimization that is used in the block layer to submit several I/O requests
+ * at once instead of individually:
+ *
+ *   defer_call_begin(); <-- start of section
+ *   ...
+ *   defer_call(my_func, my_obj); <-- deferred my_func(my_obj) call
+ *   defer_call(my_func, my_obj); <-- another
+ *   defer_call(my_func, my_obj); <-- another
+ *   ...
+ *   defer_call_end(); <-- end of section, my_func(my_obj) is called once
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/coroutine-tls.h"
+#include "qemu/notify.h"
+#include "qemu/thread.h"
+#include "qemu/defer-call.h"
+
+/* A function call that has been deferred until defer_call_end() */
+typedef struct {
+    void (*fn)(void *);
+    void *opaque;
+} DeferredCall;
+
+/* Per-thread state */
+typedef struct {
+    unsigned nesting_level;
+    GArray *deferred_call_array;
+} DeferCallThreadState;
+
+/* Use get_ptr_defer_call_thread_state() to fetch this thread-local value */
+QEMU_DEFINE_STATIC_CO_TLS(DeferCallThreadState, defer_call_thread_state);
+
+/* Called at thread cleanup time */
+static void defer_call_atexit(Notifier *n, void *value)
+{
+    DeferCallThreadState *thread_state = get_ptr_defer_call_thread_state();
+    g_array_free(thread_state->deferred_call_array, TRUE);
+}
+
+/* This won't involve coroutines, so use __thread */
+static __thread Notifier defer_call_atexit_notifier;
+
+/**
+ * defer_call:
+ * @fn: a function pointer to be invoked
+ * @opaque: a user-defined argument to @fn()
+ *
+ * Call @fn(@opaque) immediately if not within a
+ * defer_call_begin()/defer_call_end() section.
+ *
+ * Otherwise defer the call until the end of the outermost
+ * defer_call_begin()/defer_call_end() section in this thread. If the same
+ * @fn/@opaque pair has already been deferred, it will only be called once upon
+ * defer_call_end() so that accumulated calls are batched into a single call.
+ *
+ * The caller must ensure that @opaque is not freed before @fn() is invoked.
+ */
+void defer_call(void (*fn)(void *), void *opaque)
+{
+    DeferCallThreadState *thread_state = get_ptr_defer_call_thread_state();
+
+    /* Call immediately if we're not deferring calls */
+    if (thread_state->nesting_level == 0) {
+        fn(opaque);
+        return;
+    }
+
+    GArray *array = thread_state->deferred_call_array;
+    if (!array) {
+        array = g_array_new(FALSE, FALSE, sizeof(DeferredCall));
+        thread_state->deferred_call_array = array;
+        defer_call_atexit_notifier.notify = defer_call_atexit;
+        qemu_thread_atexit_add(&defer_call_atexit_notifier);
+    }
+
+    DeferredCall *fns = (DeferredCall *)array->data;
+    DeferredCall new_fn = {
+        .fn = fn,
+        .opaque = opaque,
+    };
+
+    /*
+     * There won't be many, so do a linear search. If this becomes a bottleneck
+     * then a binary search (glib 2.62+) or different data structure could be
+     * used.
+     */
+    for (guint i = 0; i < array->len; i++) {
+        if (memcmp(&fns[i], &new_fn, sizeof(new_fn)) == 0) {
+            return; /* already exists */
+        }
+    }
+
+    g_array_append_val(array, new_fn);
+}
+
+/**
+ * defer_call_begin: Defer defer_call() functions until defer_call_end()
+ *
+ * defer_call_begin() and defer_call_end() are thread-local operations. The
+ * caller must ensure that each defer_call_begin() has a matching
+ * defer_call_end() in the same thread.
+ *
+ * Nesting is supported. defer_call() functions are only called at the
+ * outermost defer_call_end().
+ */
+void defer_call_begin(void)
+{
+    DeferCallThreadState *thread_state = get_ptr_defer_call_thread_state();
+
+    assert(thread_state->nesting_level < UINT32_MAX);
+
+    thread_state->nesting_level++;
+}
+
+/**
+ * defer_call_end: Run any pending defer_call() functions
+ *
+ * There must have been a matching defer_call_begin() call in the same thread
+ * prior to this defer_call_end() call.
+ */
+void defer_call_end(void)
+{
+    DeferCallThreadState *thread_state = get_ptr_defer_call_thread_state();
+
+    assert(thread_state->nesting_level > 0);
+
+    if (--thread_state->nesting_level > 0) {
+        return;
+    }
+
+    GArray *array = thread_state->deferred_call_array;
+    if (!array) {
+        return;
+    }
+
+    DeferredCall *fns = (DeferredCall *)array->data;
+
+    for (guint i = 0; i < array->len; i++) {
+        fns[i].fn(fns[i].opaque);
+    }
+
+    /*
+     * This resets the array without freeing memory so that appending is cheap
+     * in the future.
+     */
+    g_array_set_size(array, 0);
+}
index 2bcc13f09415484edeca0edf3633e063fea51936..db937c04276110db06144132bd3caa2fff398dc7 100644 (file)
@@ -3,13 +3,13 @@
 #include "qemu/envlist.h"
 
 struct envlist_entry {
-       const char *ev_var;                     /* actual env value */
-       QLIST_ENTRY(envlist_entry) ev_link;
+    const char *ev_var;            /* actual env value */
+    QLIST_ENTRY(envlist_entry) ev_link;
 };
 
 struct envlist {
-       QLIST_HEAD(, envlist_entry) el_entries; /* actual entries */
-       size_t el_count;                        /* number of entries */
+    QLIST_HEAD(, envlist_entry) el_entries; /* actual entries */
+    size_t el_count;                        /* number of entries */
 };
 
 static int envlist_parse(envlist_t *envlist,
@@ -21,14 +21,14 @@ static int envlist_parse(envlist_t *envlist,
 envlist_t *
 envlist_create(void)
 {
-       envlist_t *envlist;
+    envlist_t *envlist;
 
-       envlist = g_malloc(sizeof(*envlist));
+    envlist = g_malloc(sizeof(*envlist));
 
-       QLIST_INIT(&envlist->el_entries);
-       envlist->el_count = 0;
+    QLIST_INIT(&envlist->el_entries);
+    envlist->el_count = 0;
 
-       return (envlist);
+    return (envlist);
 }
 
 /*
@@ -37,18 +37,18 @@ envlist_create(void)
 void
 envlist_free(envlist_t *envlist)
 {
-       struct envlist_entry *entry;
+    struct envlist_entry *entry;
 
-       assert(envlist != NULL);
+    assert(envlist != NULL);
 
-       while (envlist->el_entries.lh_first != NULL) {
-               entry = envlist->el_entries.lh_first;
-               QLIST_REMOVE(entry, ev_link);
+    while (envlist->el_entries.lh_first != NULL) {
+        entry = envlist->el_entries.lh_first;
+        QLIST_REMOVE(entry, ev_link);
 
-               g_free((char *)entry->ev_var);
-               g_free(entry);
-       }
-       g_free(envlist);
+        g_free((char *)entry->ev_var);
+        g_free(entry);
+    }
+    g_free(envlist);
 }
 
 /*
@@ -65,7 +65,7 @@ envlist_free(envlist_t *envlist)
 int
 envlist_parse_set(envlist_t *envlist, const char *env)
 {
-       return (envlist_parse(envlist, env, &envlist_setenv));
+    return (envlist_parse(envlist, env, &envlist_setenv));
 }
 
 /*
@@ -77,7 +77,7 @@ envlist_parse_set(envlist_t *envlist, const char *env)
 int
 envlist_parse_unset(envlist_t *envlist, const char *env)
 {
-       return (envlist_parse(envlist, env, &envlist_unsetenv));
+    return (envlist_parse(envlist, env, &envlist_unsetenv));
 }
 
 /*
@@ -90,15 +90,15 @@ static int
 envlist_parse(envlist_t *envlist, const char *env,
     int (*callback)(envlist_t *, const char *))
 {
-       char *tmpenv, *envvar;
-       char *envsave = NULL;
+    char *tmpenv, *envvar;
+    char *envsave = NULL;
     int ret = 0;
     assert(callback != NULL);
 
-       if ((envlist == NULL) || (env == NULL))
-               return (EINVAL);
+    if ((envlist == NULL) || (env == NULL))
+        return (EINVAL);
 
-       tmpenv = g_strdup(env);
+    tmpenv = g_strdup(env);
     envsave = tmpenv;
 
     do {
@@ -109,7 +109,7 @@ envlist_parse(envlist_t *envlist, const char *env,
         if ((*callback)(envlist, tmpenv) != 0) {
             ret = errno;
             break;
-               }
+        }
         tmpenv = envvar + 1;
     } while (envvar != NULL);
 
@@ -126,42 +126,42 @@ envlist_parse(envlist_t *envlist, const char *env,
 int
 envlist_setenv(envlist_t *envlist, const char *env)
 {
-       struct envlist_entry *entry = NULL;
-       const char *eq_sign;
-       size_t envname_len;
-
-       if ((envlist == NULL) || (env == NULL))
-               return (EINVAL);
-
-       /* find out first equals sign in given env */
-       if ((eq_sign = strchr(env, '=')) == NULL)
-               return (EINVAL);
-       envname_len = eq_sign - env + 1;
-
-       /*
-        * If there already exists variable with given name
-        * we remove and release it before allocating a whole
-        * new entry.
-        */
-       for (entry = envlist->el_entries.lh_first; entry != NULL;
-           entry = entry->ev_link.le_next) {
-               if (strncmp(entry->ev_var, env, envname_len) == 0)
-                       break;
-       }
-
-       if (entry != NULL) {
-               QLIST_REMOVE(entry, ev_link);
-               g_free((char *)entry->ev_var);
-               g_free(entry);
-       } else {
-               envlist->el_count++;
-       }
-
-       entry = g_malloc(sizeof(*entry));
-       entry->ev_var = g_strdup(env);
-       QLIST_INSERT_HEAD(&envlist->el_entries, entry, ev_link);
-
-       return (0);
+    struct envlist_entry *entry = NULL;
+    const char *eq_sign;
+    size_t envname_len;
+
+    if ((envlist == NULL) || (env == NULL))
+        return (EINVAL);
+
+    /* find out first equals sign in given env */
+    if ((eq_sign = strchr(env, '=')) == NULL)
+        return (EINVAL);
+    envname_len = eq_sign - env + 1;
+
+    /*
+     * If there already exists variable with given name
+     * we remove and release it before allocating a whole
+     * new entry.
+     */
+    for (entry = envlist->el_entries.lh_first; entry != NULL;
+        entry = entry->ev_link.le_next) {
+        if (strncmp(entry->ev_var, env, envname_len) == 0)
+            break;
+    }
+
+    if (entry != NULL) {
+        QLIST_REMOVE(entry, ev_link);
+        g_free((char *)entry->ev_var);
+        g_free(entry);
+    } else {
+        envlist->el_count++;
+    }
+
+    entry = g_malloc(sizeof(*entry));
+    entry->ev_var = g_strdup(env);
+    QLIST_INSERT_HEAD(&envlist->el_entries, entry, ev_link);
+
+    return (0);
 }
 
 /*
@@ -171,34 +171,34 @@ envlist_setenv(envlist_t *envlist, const char *env)
 int
 envlist_unsetenv(envlist_t *envlist, const char *env)
 {
-       struct envlist_entry *entry;
-       size_t envname_len;
-
-       if ((envlist == NULL) || (env == NULL))
-               return (EINVAL);
-
-       /* env is not allowed to contain '=' */
-       if (strchr(env, '=') != NULL)
-               return (EINVAL);
-
-       /*
-        * Find out the requested entry and remove
-        * it from the list.
-        */
-       envname_len = strlen(env);
-       for (entry = envlist->el_entries.lh_first; entry != NULL;
-           entry = entry->ev_link.le_next) {
-               if (strncmp(entry->ev_var, env, envname_len) == 0)
-                       break;
-       }
-       if (entry != NULL) {
-               QLIST_REMOVE(entry, ev_link);
-               g_free((char *)entry->ev_var);
-               g_free(entry);
-
-               envlist->el_count--;
-       }
-       return (0);
+    struct envlist_entry *entry;
+    size_t envname_len;
+
+    if ((envlist == NULL) || (env == NULL))
+        return (EINVAL);
+
+    /* env is not allowed to contain '=' */
+    if (strchr(env, '=') != NULL)
+        return (EINVAL);
+
+    /*
+     * Find out the requested entry and remove
+     * it from the list.
+     */
+    envname_len = strlen(env);
+    for (entry = envlist->el_entries.lh_first; entry != NULL;
+        entry = entry->ev_link.le_next) {
+        if (strncmp(entry->ev_var, env, envname_len) == 0)
+            break;
+    }
+    if (entry != NULL) {
+        QLIST_REMOVE(entry, ev_link);
+        g_free((char *)entry->ev_var);
+        g_free(entry);
+
+        envlist->el_count--;
+    }
+    return (0);
 }
 
 /*
@@ -214,19 +214,19 @@ envlist_unsetenv(envlist_t *envlist, const char *env)
 char **
 envlist_to_environ(const envlist_t *envlist, size_t *count)
 {
-       struct envlist_entry *entry;
-       char **env, **penv;
+    struct envlist_entry *entry;
+    char **env, **penv;
 
-       penv = env = g_malloc((envlist->el_count + 1) * sizeof(char *));
+    penv = env = g_new(char *, envlist->el_count + 1);
 
-       for (entry = envlist->el_entries.lh_first; entry != NULL;
-           entry = entry->ev_link.le_next) {
-               *(penv++) = g_strdup(entry->ev_var);
-       }
-       *penv = NULL; /* NULL terminate the list */
+    for (entry = envlist->el_entries.lh_first; entry != NULL;
+        entry = entry->ev_link.le_next) {
+        *(penv++) = g_strdup(entry->ev_var);
+    }
+    *penv = NULL; /* NULL terminate the list */
 
-       if (count != NULL)
-               *count = envlist->el_count;
+    if (count != NULL)
+        *count = envlist->el_count;
 
-       return (env);
+    return (env);
 }
diff --git a/util/error-report.c b/util/error-report.c
new file mode 100644 (file)
index 0000000..6e44a55
--- /dev/null
@@ -0,0 +1,404 @@
+/*
+ * Error reporting
+ *
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/monitor.h"
+#include "qemu/error-report.h"
+
+/*
+ * @report_type is the type of message: error, warning or
+ * informational.
+ */
+typedef enum {
+    REPORT_TYPE_ERROR,
+    REPORT_TYPE_WARNING,
+    REPORT_TYPE_INFO,
+} report_type;
+
+/* Prepend timestamp to messages */
+bool message_with_timestamp;
+bool error_with_guestname;
+const char *error_guest_name;
+
+int error_printf(const char *fmt, ...)
+{
+    va_list ap;
+    int ret;
+
+    va_start(ap, fmt);
+    ret = error_vprintf(fmt, ap);
+    va_end(ap);
+    return ret;
+}
+
+static Location std_loc = {
+    .kind = LOC_NONE
+};
+static Location *cur_loc = &std_loc;
+
+/*
+ * Push location saved in LOC onto the location stack, return it.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ */
+Location *loc_push_restore(Location *loc)
+{
+    assert(!loc->prev);
+    loc->prev = cur_loc;
+    cur_loc = loc;
+    return loc;
+}
+
+/*
+ * Initialize *LOC to "nowhere", push it onto the location stack.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ * Return LOC.
+ */
+Location *loc_push_none(Location *loc)
+{
+    loc->kind = LOC_NONE;
+    loc->prev = NULL;
+    return loc_push_restore(loc);
+}
+
+/*
+ * Pop the location stack.
+ * LOC must be the current location, i.e. the top of the stack.
+ */
+Location *loc_pop(Location *loc)
+{
+    assert(cur_loc == loc && loc->prev);
+    cur_loc = loc->prev;
+    loc->prev = NULL;
+    return loc;
+}
+
+/*
+ * Save the current location in LOC, return LOC.
+ */
+Location *loc_save(Location *loc)
+{
+    *loc = *cur_loc;
+    loc->prev = NULL;
+    return loc;
+}
+
+/*
+ * Change the current location to the one saved in LOC.
+ */
+void loc_restore(Location *loc)
+{
+    Location *prev = cur_loc->prev;
+    assert(!loc->prev);
+    *cur_loc = *loc;
+    cur_loc->prev = prev;
+}
+
+/*
+ * Change the current location to "nowhere in particular".
+ */
+void loc_set_none(void)
+{
+    cur_loc->kind = LOC_NONE;
+}
+
+/*
+ * Change the current location to argument ARGV[IDX..IDX+CNT-1].
+ */
+void loc_set_cmdline(char **argv, int idx, int cnt)
+{
+    cur_loc->kind = LOC_CMDLINE;
+    cur_loc->num = cnt;
+    cur_loc->ptr = argv + idx;
+}
+
+/*
+ * Change the current location to file FNAME, line LNO.
+ */
+void loc_set_file(const char *fname, int lno)
+{
+    assert (fname || cur_loc->kind == LOC_FILE);
+    cur_loc->kind = LOC_FILE;
+    cur_loc->num = lno;
+    if (fname) {
+        cur_loc->ptr = fname;
+    }
+}
+
+/*
+ * Print current location to current monitor if we have one, else to stderr.
+ */
+static void print_loc(void)
+{
+    const char *sep = "";
+    int i;
+    const char *const *argp;
+
+    if (!monitor_cur() && g_get_prgname()) {
+        error_printf("%s:", g_get_prgname());
+        sep = " ";
+    }
+    switch (cur_loc->kind) {
+    case LOC_CMDLINE:
+        argp = cur_loc->ptr;
+        for (i = 0; i < cur_loc->num; i++) {
+            error_printf("%s%s", sep, argp[i]);
+            sep = " ";
+        }
+        error_printf(": ");
+        break;
+    case LOC_FILE:
+        error_printf("%s:", (const char *)cur_loc->ptr);
+        if (cur_loc->num) {
+            error_printf("%d:", cur_loc->num);
+        }
+        error_printf(" ");
+        break;
+    default:
+        error_printf("%s", sep);
+    }
+}
+
+static char *
+real_time_iso8601(void)
+{
+#if GLIB_CHECK_VERSION(2,62,0)
+    g_autoptr(GDateTime) dt = g_date_time_new_now_utc();
+    /* ignore deprecation warning, since GLIB_VERSION_MAX_ALLOWED is 2.56 */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+    return g_date_time_format_iso8601(dt);
+#pragma GCC diagnostic pop
+#else
+    GTimeVal tv;
+    g_get_current_time(&tv);
+    return g_time_val_to_iso8601(&tv);
+#endif
+}
+
+/*
+ * Print a message to current monitor if we have one, else to stderr.
+ * @report_type is the type of message: error, warning or informational.
+ * Format arguments like vsprintf().  The resulting message should be
+ * a single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ */
+G_GNUC_PRINTF(2, 0)
+static void vreport(report_type type, const char *fmt, va_list ap)
+{
+    gchar *timestr;
+
+    if (message_with_timestamp && !monitor_cur()) {
+        timestr = real_time_iso8601();
+        error_printf("%s ", timestr);
+        g_free(timestr);
+    }
+
+    /* Only prepend guest name if -msg guest-name and -name guest=... are set */
+    if (error_with_guestname && error_guest_name && !monitor_cur()) {
+        error_printf("%s ", error_guest_name);
+    }
+
+    print_loc();
+
+    switch (type) {
+    case REPORT_TYPE_ERROR:
+        break;
+    case REPORT_TYPE_WARNING:
+        error_printf("warning: ");
+        break;
+    case REPORT_TYPE_INFO:
+        error_printf("info: ");
+        break;
+    }
+
+    error_vprintf(fmt, ap);
+    error_printf("\n");
+}
+
+/*
+ * Print an error message to current monitor if we have one, else to stderr.
+ * Format arguments like vsprintf().  The resulting message should be
+ * a single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ * It's wrong to call this in a QMP monitor.  Use error_setg() there.
+ */
+void error_vreport(const char *fmt, va_list ap)
+{
+    vreport(REPORT_TYPE_ERROR, fmt, ap);
+}
+
+/*
+ * Print a warning message to current monitor if we have one, else to stderr.
+ * Format arguments like vsprintf().  The resulting message should be
+ * a single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ */
+void warn_vreport(const char *fmt, va_list ap)
+{
+    vreport(REPORT_TYPE_WARNING, fmt, ap);
+}
+
+/*
+ * Print an information message to current monitor if we have one, else to
+ * stderr.
+ * Format arguments like vsprintf().  The resulting message should be
+ * a single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ */
+void info_vreport(const char *fmt, va_list ap)
+{
+    vreport(REPORT_TYPE_INFO, fmt, ap);
+}
+
+/*
+ * Print an error message to current monitor if we have one, else to stderr.
+ * Format arguments like sprintf().  The resulting message should be
+ * a single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ * It's wrong to call this in a QMP monitor.  Use error_setg() there.
+ */
+void error_report(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    vreport(REPORT_TYPE_ERROR, fmt, ap);
+    va_end(ap);
+}
+
+/*
+ * Print a warning message to current monitor if we have one, else to stderr.
+ * Format arguments like sprintf(). The resulting message should be a
+ * single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ */
+void warn_report(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    vreport(REPORT_TYPE_WARNING, fmt, ap);
+    va_end(ap);
+}
+
+/*
+ * Print an information message to current monitor if we have one, else to
+ * stderr.
+ * Format arguments like sprintf(). The resulting message should be a
+ * single phrase, with no newline or trailing punctuation.
+ * Prepend the current location and append a newline.
+ */
+void info_report(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    vreport(REPORT_TYPE_INFO, fmt, ap);
+    va_end(ap);
+}
+
+/*
+ * Like error_report(), except print just once.
+ * If *printed is false, print the message, and flip *printed to true.
+ * Return whether the message was printed.
+ */
+bool error_report_once_cond(bool *printed, const char *fmt, ...)
+{
+    va_list ap;
+
+    assert(printed);
+    if (*printed) {
+        return false;
+    }
+    *printed = true;
+    va_start(ap, fmt);
+    vreport(REPORT_TYPE_ERROR, fmt, ap);
+    va_end(ap);
+    return true;
+}
+
+/*
+ * Like warn_report(), except print just once.
+ * If *printed is false, print the message, and flip *printed to true.
+ * Return whether the message was printed.
+ */
+bool warn_report_once_cond(bool *printed, const char *fmt, ...)
+{
+    va_list ap;
+
+    assert(printed);
+    if (*printed) {
+        return false;
+    }
+    *printed = true;
+    va_start(ap, fmt);
+    vreport(REPORT_TYPE_WARNING, fmt, ap);
+    va_end(ap);
+    return true;
+}
+
+static char *qemu_glog_domains;
+
+static void qemu_log_func(const gchar *log_domain,
+                          GLogLevelFlags log_level,
+                          const gchar *message,
+                          gpointer user_data)
+{
+    switch (log_level & G_LOG_LEVEL_MASK) {
+    case G_LOG_LEVEL_DEBUG:
+    case G_LOG_LEVEL_INFO:
+        /*
+         * Use same G_MESSAGES_DEBUG logic as glib to enable/disable debug
+         * messages
+         */
+        if (qemu_glog_domains == NULL) {
+            break;
+        }
+        if (strcmp(qemu_glog_domains, "all") != 0 &&
+          (log_domain == NULL || !strstr(qemu_glog_domains, log_domain))) {
+            break;
+        }
+        /* Fall through */
+    case G_LOG_LEVEL_MESSAGE:
+        info_report("%s%s%s",
+                    log_domain ?: "", log_domain ? ": " : "", message);
+
+        break;
+    case G_LOG_LEVEL_WARNING:
+        warn_report("%s%s%s",
+                    log_domain ?: "", log_domain ? ": " : "", message);
+        break;
+    case G_LOG_LEVEL_CRITICAL:
+    case G_LOG_LEVEL_ERROR:
+        error_report("%s%s%s",
+                     log_domain ?: "", log_domain ? ": " : "", message);
+        break;
+    }
+}
+
+void error_init(const char *argv0)
+{
+    const char *p = strrchr(argv0, '/');
+
+    /* Set the program name for error_print_loc(). */
+    g_set_prgname(p ? p + 1 : argv0);
+
+    /*
+     * This sets up glib logging so libraries using it also print their logs
+     * through error_report(), warn_report(), info_report().
+     */
+    g_log_set_default_handler(qemu_log_func, NULL);
+    g_warn_if_fail(qemu_glog_domains == NULL);
+    qemu_glog_domains = g_strdup(g_getenv("G_MESSAGES_DEBUG"));
+}
index b6c89d141208cf9d7d683eeb6c309e960cd3722f..e5e247209a9e0796074a9794f5598325f22f8d35 100644 (file)
@@ -27,8 +27,9 @@ struct Error
 
 Error *error_abort;
 Error *error_fatal;
+Error *error_warn;
 
-static void error_handle_fatal(Error **errp, Error *err)
+static void error_handle(Error **errp, Error *err)
 {
     if (errp == &error_abort) {
         fprintf(stderr, "Unexpected error in %s() at %s:%d:\n",
@@ -43,8 +44,16 @@ static void error_handle_fatal(Error **errp, Error *err)
         error_report_err(err);
         exit(1);
     }
+    if (errp == &error_warn) {
+        warn_report_err(err);
+    } else if (errp && !*errp) {
+        *errp = err;
+    } else {
+        error_free(err);
+    }
 }
 
+G_GNUC_PRINTF(6, 0)
 static void error_setv(Error **errp,
                        const char *src, int line, const char *func,
                        ErrorClass err_class, const char *fmt, va_list ap,
@@ -70,8 +79,7 @@ static void error_setv(Error **errp,
     err->line = line;
     err->func = func;
 
-    error_handle_fatal(errp, err);
-    *errp = err;
+    error_handle(errp, err);
 
     errno = saved_errno;
 }
@@ -283,12 +291,7 @@ void error_propagate(Error **dst_errp, Error *local_err)
     if (!local_err) {
         return;
     }
-    error_handle_fatal(dst_errp, local_err);
-    if (dst_errp && !*dst_errp) {
-        *dst_errp = local_err;
-    } else {
-        error_free(local_err);
-    }
+    error_handle(dst_errp, local_err);
 }
 
 void error_propagate_prepend(Error **dst_errp, Error *err,
index 00d93204f988c753d1ba308421936c457277ff25..76420c5b560c15bb7a6a0c7960a2072ebe174bc1 100644 (file)
@@ -11,7 +11,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "qemu/cutils.h"
 #include "qemu/event_notifier.h"
 #include "qemu/main-loop.h"
@@ -29,6 +28,7 @@ void event_notifier_init_fd(EventNotifier *e, int fd)
 {
     e->rfd = fd;
     e->wfd = fd;
+    e->initialized = true;
 }
 #endif
 
@@ -49,22 +49,21 @@ int event_notifier_init(EventNotifier *e, int active)
         if (errno != ENOSYS) {
             return -errno;
         }
-        if (qemu_pipe(fds) < 0) {
+        if (!g_unix_open_pipe(fds, FD_CLOEXEC, NULL)) {
             return -errno;
         }
-        ret = fcntl_setfl(fds[0], O_NONBLOCK);
-        if (ret < 0) {
+        if (!g_unix_set_fd_nonblocking(fds[0], true, NULL)) {
             ret = -errno;
             goto fail;
         }
-        ret = fcntl_setfl(fds[1], O_NONBLOCK);
-        if (ret < 0) {
+        if (!g_unix_set_fd_nonblocking(fds[1], true, NULL)) {
             ret = -errno;
             goto fail;
         }
         e->rfd = fds[0];
         e->wfd = fds[1];
     }
+    e->initialized = true;
     if (active) {
         event_notifier_set(e);
     }
@@ -78,12 +77,18 @@ fail:
 
 void event_notifier_cleanup(EventNotifier *e)
 {
+    if (!e->initialized) {
+        return;
+    }
+
     if (e->rfd != e->wfd) {
         close(e->rfd);
     }
+
     e->rfd = -1;
     close(e->wfd);
     e->wfd = -1;
+    e->initialized = false;
 }
 
 int event_notifier_get_fd(const EventNotifier *e)
@@ -91,11 +96,20 @@ int event_notifier_get_fd(const EventNotifier *e)
     return e->rfd;
 }
 
+int event_notifier_get_wfd(const EventNotifier *e)
+{
+    return e->wfd;
+}
+
 int event_notifier_set(EventNotifier *e)
 {
     static const uint64_t value = 1;
     ssize_t ret;
 
+    if (!e->initialized) {
+        return -1;
+    }
+
     do {
         ret = write(e->wfd, &value, sizeof(value));
     } while (ret < 0 && errno == EINTR);
@@ -113,6 +127,10 @@ int event_notifier_test_and_clear(EventNotifier *e)
     ssize_t len;
     char buffer[512];
 
+    if (!e->initialized) {
+        return 0;
+    }
+
     /* Drain the notify pipe.  For eventfd, only 8 bytes will be read.  */
     value = 0;
     do {
index 62c53b0a9909af504281c5e55f749a22eff899fb..9352da4699c736f1f6709b8e0e7d723268dc3cbe 100644 (file)
@@ -11,7 +11,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "qemu/event_notifier.h"
 #include "qemu/main-loop.h"
 
index e11a8a022e9822e05c267901ca2a1f859803a9b4..c6413cb18fe76094214380c26b3b18c7bf2a69e9 100644 (file)
@@ -64,11 +64,6 @@ static int fdmon_epoll_wait(AioContext *ctx, AioHandlerList *ready_list,
     int i, ret = 0;
     struct epoll_event events[128];
 
-    /* Fall back while external clients are disabled */
-    if (qatomic_read(&ctx->external_disable_cnt)) {
-        return fdmon_poll_ops.wait(ctx, ready_list, timeout);
-    }
-
     if (timeout > 0) {
         ret = qemu_poll_ns(&pfd, 1, timeout);
         if (ret > 0) {
@@ -127,23 +122,29 @@ static bool fdmon_epoll_try_enable(AioContext *ctx)
 
 bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
 {
+    bool ok;
+
     if (ctx->epollfd < 0) {
         return false;
     }
 
-    /* Do not upgrade while external clients are disabled */
-    if (qatomic_read(&ctx->external_disable_cnt)) {
+    if (npfd < EPOLL_ENABLE_THRESHOLD) {
         return false;
     }
 
-    if (npfd >= EPOLL_ENABLE_THRESHOLD) {
-        if (fdmon_epoll_try_enable(ctx)) {
-            return true;
-        } else {
-            fdmon_epoll_disable(ctx);
-        }
+    /* The list must not change while we add fds to epoll */
+    if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
+        return false;
+    }
+
+    ok = fdmon_epoll_try_enable(ctx);
+
+    qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
+
+    if (!ok) {
+        fdmon_epoll_disable(ctx);
     }
-    return false;
+    return ok;
 }
 
 void fdmon_epoll_setup(AioContext *ctx)
index 1461dfa40743ba0a33dcb3e7e795a43265f4ff3d..16054c5ede3425837d0e7426ce58d72e09e8e2af 100644 (file)
@@ -179,7 +179,12 @@ static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node)
 {
     struct io_uring_sqe *sqe = get_sqe(ctx);
 
+#ifdef LIBURING_HAVE_DATA64
+    io_uring_prep_poll_remove(sqe, (__u64)(uintptr_t)node);
+#else
     io_uring_prep_poll_remove(sqe, node);
+#endif
+    io_uring_sqe_set_data(sqe, NULL);
 }
 
 /* Add a timeout that self-cancels when another cqe becomes ready */
@@ -193,6 +198,7 @@ static void add_timeout_sqe(AioContext *ctx, int64_t ns)
 
     sqe = get_sqe(ctx);
     io_uring_prep_timeout(sqe, &ts, 1, 0);
+    io_uring_sqe_set_data(sqe, NULL);
 }
 
 /* Add sqes from ctx->submit_list for submission */
@@ -272,11 +278,6 @@ static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list,
     unsigned wait_nr = 1; /* block until at least one cqe is ready */
     int ret;
 
-    /* Fall back while external clients are disabled */
-    if (qatomic_read(&ctx->external_disable_cnt)) {
-        return fdmon_poll_ops.wait(ctx, ready_list, timeout);
-    }
-
     if (timeout == 0) {
         wait_nr = 0; /* non-blocking */
     } else if (timeout > 0) {
@@ -311,8 +312,7 @@ static bool fdmon_io_uring_need_wait(AioContext *ctx)
         return true;
     }
 
-    /* Are we falling back to fdmon-poll? */
-    return qatomic_read(&ctx->external_disable_cnt);
+    return false;
 }
 
 static const FDMonOps fdmon_io_uring_ops = {
index 5fe3b47865a58af7e1ebba6306b6974662da363a..17df917cf962a9f73000c970ec616f229c4e4f9a 100644 (file)
@@ -65,8 +65,7 @@ static int fdmon_poll_wait(AioContext *ctx, AioHandlerList *ready_list,
     assert(npfd == 0);
 
     QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
-        if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events
-                && aio_node_check(ctx, node->is_external)) {
+        if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events) {
             add_pollfd(node);
         }
     }
index a5dd789ce505ddd3eeca54444d4cec70b8a122b8..d4d1c135e03b87b76b8979ce6b4275f926599e8e 100644 (file)
@@ -31,9 +31,7 @@ void fifo8_destroy(Fifo8 *fifo)
 
 void fifo8_push(Fifo8 *fifo, uint8_t data)
 {
-    if (fifo->num == fifo->capacity) {
-        abort();
-    }
+    assert(fifo->num < fifo->capacity);
     fifo->data[(fifo->head + fifo->num) % fifo->capacity] = data;
     fifo->num++;
 }
@@ -42,9 +40,7 @@ void fifo8_push_all(Fifo8 *fifo, const uint8_t *data, uint32_t num)
 {
     uint32_t start, avail;
 
-    if (fifo->num + num > fifo->capacity) {
-        abort();
-    }
+    assert(fifo->num + num <= fifo->capacity);
 
     start = (fifo->head + fifo->num) % fifo->capacity;
 
@@ -63,9 +59,7 @@ uint8_t fifo8_pop(Fifo8 *fifo)
 {
     uint8_t ret;
 
-    if (fifo->num == 0) {
-        abort();
-    }
+    assert(fifo->num > 0);
     ret = fifo->data[fifo->head++];
     fifo->head %= fifo->capacity;
     fifo->num--;
@@ -76,9 +70,7 @@ const uint8_t *fifo8_pop_buf(Fifo8 *fifo, uint32_t max, uint32_t *num)
 {
     uint8_t *ret;
 
-    if (max == 0 || max > fifo->num) {
-        abort();
-    }
+    assert(max > 0 && max <= fifo->num);
     *num = MIN(fifo->capacity - fifo->head, max);
     ret = &fifo->data[fifo->head];
     fifo->head += *num;
index 2c45f7f1764bef0c3555941c711275185cc9991e..7352b9fe53ec402bee3e20f5448196ffb2e1c296 100644 (file)
@@ -81,16 +81,25 @@ static void qemu_file_monitor_watch(void *arg)
 
     /* Loop over all events in the buffer */
     while (used < len) {
-        struct inotify_event *ev =
-            (struct inotify_event *)(buf + used);
-        const char *name = ev->len ? ev->name : "";
-        QFileMonitorDir *dir = g_hash_table_lookup(mon->idmap,
-                                                   GINT_TO_POINTER(ev->wd));
-        uint32_t iev = ev->mask &
-            (IN_CREATE | IN_MODIFY | IN_DELETE | IN_IGNORED |
-             IN_MOVED_TO | IN_MOVED_FROM | IN_ATTRIB);
+        const char *name;
+        QFileMonitorDir *dir;
+        uint32_t iev;
         int qev;
         gsize i;
+        struct inotify_event *ev = (struct inotify_event *)(buf + used);
+
+        /*
+         * We trust the kernel to provide valid buffer with complete event
+         * records.
+         */
+        assert(len - used >= sizeof(struct inotify_event));
+        assert(len - used - sizeof(struct inotify_event) >= ev->len);
+
+        name = ev->len ? ev->name : "";
+        dir = g_hash_table_lookup(mon->idmap, GINT_TO_POINTER(ev->wd));
+        iev = ev->mask &
+            (IN_CREATE | IN_MODIFY | IN_DELETE | IN_IGNORED |
+             IN_MOVED_TO | IN_MOVED_FROM | IN_ATTRIB);
 
         used += sizeof(struct inotify_event) + ev->len;
 
index 086115bd67026c4ef7b7004d1915ecb2d8cd7fcb..33607d5ff24d0ace1ddec5fc90dc11235234d866 100644 (file)
@@ -14,7 +14,7 @@
 #include "qapi/error.h"
 #include "qemu/guest-random.h"
 #include "crypto/random.h"
-#include "sysemu/replay.h"
+#include "exec/replay-core.h"
 
 
 static __thread GRand *thread_rand;
@@ -38,7 +38,7 @@ static int glib_random_bytes(void *buf, size_t len)
     }
     if (i < len) {
         x = g_rand_int(rand);
-        __builtin_memcpy(buf + i, &x, i - len);
+        __builtin_memcpy(buf + i, &x, len - i);
     }
     return 0;
 }
@@ -87,11 +87,11 @@ void qemu_guest_random_seed_thread_part2(uint64_t seed)
     }
 }
 
-int qemu_guest_random_seed_main(const char *optarg, Error **errp)
+int qemu_guest_random_seed_main(const char *seedstr, Error **errp)
 {
-    unsigned long long seed;
-    if (parse_uint_full(optarg, &seed, 0)) {
-        error_setg(errp, "Invalid seed number: %s", optarg);
+    uint64_t seed;
+    if (parse_uint_full(seedstr, 0, &seed)) {
+        error_setg(errp, "Invalid seed number: %s", seedstr);
         return -1;
     } else {
         deterministic = true;
index 305b894a63f15d4327082823861a956cac51dca9..6d6e1b595d9ade3edc3c5c73b9e49da05027ac57 100644 (file)
@@ -301,6 +301,39 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t start, int64_t end,
     return true;
 }
 
+bool hbitmap_status(const HBitmap *hb, int64_t start, int64_t count,
+                    int64_t *pnum)
+{
+    int64_t next_dirty, next_zero;
+
+    assert(start >= 0);
+    assert(count > 0);
+    assert(start + count <= hb->orig_size);
+
+    next_dirty = hbitmap_next_dirty(hb, start, count);
+    if (next_dirty == -1) {
+        *pnum = count;
+        return false;
+    }
+
+    if (next_dirty > start) {
+        *pnum = next_dirty - start;
+        return false;
+    }
+
+    assert(next_dirty == start);
+
+    next_zero = hbitmap_next_zero(hb, start, count);
+    if (next_zero == -1) {
+        *pnum = count;
+        return true;
+    }
+
+    assert(next_zero > start);
+    *pnum = next_zero - start;
+    return true;
+}
+
 bool hbitmap_empty(const HBitmap *hb)
 {
     return hb->count == 0;
@@ -829,7 +862,7 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size)
         }
         old = hb->sizes[i];
         hb->sizes[i] = size;
-        hb->levels[i] = g_realloc(hb->levels[i], size * sizeof(unsigned long));
+        hb->levels[i] = g_renew(unsigned long, hb->levels[i], size);
         if (!shrink) {
             memset(&hb->levels[i][old], 0x00,
                    (size - old) * sizeof(*hb->levels[i]));
@@ -840,11 +873,6 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size)
     }
 }
 
-bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b)
-{
-    return (a->orig_size == b->orig_size);
-}
-
 /**
  * hbitmap_sparse_merge: performs dst = dst | src
  * works with differing granularities.
@@ -868,28 +896,24 @@ static void hbitmap_sparse_merge(HBitmap *dst, const HBitmap *src)
  * Given HBitmaps A and B, let R := A (BITOR) B.
  * Bitmaps A and B will not be modified,
  *     except when bitmap R is an alias of A or B.
- *
- * @return true if the merge was successful,
- *         false if it was not attempted.
+ * Bitmaps must have same size.
  */
-bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result)
+void hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result)
 {
     int i;
     uint64_t j;
 
-    if (!hbitmap_can_merge(a, b) || !hbitmap_can_merge(a, result)) {
-        return false;
-    }
-    assert(hbitmap_can_merge(b, result));
+    assert(a->orig_size == result->orig_size);
+    assert(b->orig_size == result->orig_size);
 
     if ((!hbitmap_count(a) && result == b) ||
         (!hbitmap_count(b) && result == a)) {
-        return true;
+        return;
     }
 
     if (!hbitmap_count(a) && !hbitmap_count(b)) {
         hbitmap_reset_all(result);
-        return true;
+        return;
     }
 
     if (a->granularity != b->granularity) {
@@ -902,7 +926,7 @@ bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result)
         if (b != result) {
             hbitmap_sparse_merge(result, b);
         }
-        return true;
+        return;
     }
 
     /* This merge is O(size), as BITS_PER_LONG and HBITMAP_LEVELS are constant.
@@ -918,8 +942,6 @@ bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result)
 
     /* Recompute the dirty count */
     result->count = hb_count_between(result, 0, result->size - 1);
-
-    return true;
 }
 
 char *hbitmap_sha256(const HBitmap *bitmap, Error **errp)
index 2c105a8846200a044f82b7580dfd83a379e5b905..9921114b3c75a7ead393cdd5e650e9f750284b70 100644 (file)
@@ -14,7 +14,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qemu/cutils.h"
 
 void qemu_hexdump_line(char *line, unsigned int b, const void *bufptr,
                        unsigned int len, bool ascii)
index 7b9322071d1d5a74d86dd15985a36a50deb13016..fb91bcba823de5bdb6da822af31803cbc5c0f238 100644 (file)
@@ -34,7 +34,7 @@ static inline void mul64(uint64_t *plow, uint64_t *phigh,
     typedef union {
         uint64_t ll;
         struct {
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
             uint32_t high, low;
 #else
             uint32_t low, high;
@@ -86,78 +86,119 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
     *phigh = rh;
 }
 
-/* Unsigned 128x64 division.  Returns 1 if overflow (divide by zero or */
-/* quotient exceeds 64 bits).  Otherwise returns quotient via plow and */
-/* remainder via phigh. */
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+/*
+ * Unsigned 128-by-64 division.
+ * Returns the remainder.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
+ */
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
 {
     uint64_t dhi = *phigh;
     uint64_t dlo = *plow;
-    unsigned i;
-    uint64_t carry = 0;
+    uint64_t rem, dhighest;
+    int sh;
 
-    if (divisor == 0) {
-        return 1;
-    } else if (dhi == 0) {
+    if (divisor == 0 || dhi == 0) {
         *plow  = dlo / divisor;
-        *phigh = dlo % divisor;
-        return 0;
-    } else if (dhi > divisor) {
-        return 1;
+        *phigh = 0;
+        return dlo % divisor;
     } else {
+        sh = clz64(divisor);
 
-        for (i = 0; i < 64; i++) {
-            carry = dhi >> 63;
-            dhi = (dhi << 1) | (dlo >> 63);
-            if (carry || (dhi >= divisor)) {
-                dhi -= divisor;
-                carry = 1;
+        if (dhi < divisor) {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor <<= sh;
+                dhi = (dhi << sh) | (dlo >> (64 - sh));
+                dlo <<= sh;
+            }
+
+            *phigh = 0;
+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
+        } else {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor <<= sh;
+                dhighest = dhi >> (64 - sh);
+                dhi = (dhi << sh) | (dlo >> (64 - sh));
+                dlo <<= sh;
+
+                *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
             } else {
-                carry = 0;
+                /**
+                 * dhi >= divisor
+                 * Since the MSB of divisor is set (sh == 0),
+                 * (dhi - divisor) < divisor
+                 *
+                 * Thus, the high part of the quotient is 1, and we can
+                 * calculate the low part with a single call to udiv_qrnnd
+                 * after subtracting divisor from dhi
+                 */
+                dhi -= divisor;
+                *phigh = 1;
             }
-            dlo = (dlo << 1) | carry;
+
+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
         }
 
-        *plow = dlo;
-        *phigh = dhi;
-        return 0;
+        /*
+         * since the dividend/divisor might have been normalized,
+         * the remainder might also have to be shifted back
+         */
+        return rem >> sh;
     }
 }
 
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+/*
+ * Signed 128-by-64 division.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
+ */
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
 {
-    int sgn_dvdnd = *phigh < 0;
-    int sgn_divsr = divisor < 0;
-    int overflow = 0;
-
-    if (sgn_dvdnd) {
-        *plow = ~(*plow);
-        *phigh = ~(*phigh);
-        if (*plow == (int64_t)-1) {
-            *plow = 0;
-            (*phigh)++;
-         } else {
-            (*plow)++;
-         }
-    }
+    bool neg_quotient = false, neg_remainder = false;
+    uint64_t unsig_hi = *phigh, unsig_lo = *plow;
+    uint64_t rem;
 
-    if (sgn_divsr) {
-        divisor = 0 - divisor;
+    if (*phigh < 0) {
+        neg_quotient = !neg_quotient;
+        neg_remainder = !neg_remainder;
+
+        if (unsig_lo == 0) {
+            unsig_hi = -unsig_hi;
+        } else {
+            unsig_hi = ~unsig_hi;
+            unsig_lo = -unsig_lo;
+        }
     }
 
-    overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
+    if (divisor < 0) {
+        neg_quotient = !neg_quotient;
 
-    if (sgn_dvdnd  ^ sgn_divsr) {
-        *plow = 0 - *plow;
+        divisor = -divisor;
     }
 
-    if (!overflow) {
-        if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
-            overflow = 1;
+    rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
+
+    if (neg_quotient) {
+        if (unsig_lo == 0) {
+            *phigh = -unsig_hi;
+            *plow = 0;
+        } else {
+            *phigh = ~unsig_hi;
+            *plow = -unsig_lo;
         }
+    } else {
+        *phigh = unsig_hi;
+        *plow = unsig_lo;
     }
 
-    return overflow;
+    if (neg_remainder) {
+        return -rem;
+    } else {
+        return rem;
+    }
 }
 #endif
 
@@ -225,3 +266,183 @@ void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow)
         *plow = *plow << shift;
     }
 }
+
+/*
+ * Unsigned 256-by-128 division.
+ * Returns the remainder via r.
+ * Returns lower 128 bit of quotient.
+ * Needs a normalized divisor (most significant bit set to 1).
+ *
+ * Adapted from include/qemu/host-utils.h udiv_qrnnd,
+ * from the GNU Multi Precision Library - longlong.h __udiv_qrnnd
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
+ *
+ * Licensed under the GPLv2/LGPLv3
+ */
+static Int128 udiv256_qrnnd(Int128 *r, Int128 n1, Int128 n0, Int128 d)
+{
+    Int128 d0, d1, q0, q1, r1, r0, m;
+    uint64_t mp0, mp1;
+
+    d0 = int128_make64(int128_getlo(d));
+    d1 = int128_make64(int128_gethi(d));
+
+    r1 = int128_remu(n1, d1);
+    q1 = int128_divu(n1, d1);
+    mp0 = int128_getlo(q1);
+    mp1 = int128_gethi(q1);
+    mulu128(&mp0, &mp1, int128_getlo(d0));
+    m = int128_make128(mp0, mp1);
+    r1 = int128_make128(int128_gethi(n0), int128_getlo(r1));
+    if (int128_ult(r1, m)) {
+        q1 = int128_sub(q1, int128_one());
+        r1 = int128_add(r1, d);
+        if (int128_uge(r1, d)) {
+            if (int128_ult(r1, m)) {
+                q1 = int128_sub(q1, int128_one());
+                r1 = int128_add(r1, d);
+            }
+        }
+    }
+    r1 = int128_sub(r1, m);
+
+    r0 = int128_remu(r1, d1);
+    q0 = int128_divu(r1, d1);
+    mp0 = int128_getlo(q0);
+    mp1 = int128_gethi(q0);
+    mulu128(&mp0, &mp1, int128_getlo(d0));
+    m = int128_make128(mp0, mp1);
+    r0 = int128_make128(int128_getlo(n0), int128_getlo(r0));
+    if (int128_ult(r0, m)) {
+        q0 = int128_sub(q0, int128_one());
+        r0 = int128_add(r0, d);
+        if (int128_uge(r0, d)) {
+            if (int128_ult(r0, m)) {
+                q0 = int128_sub(q0, int128_one());
+                r0 = int128_add(r0, d);
+            }
+        }
+    }
+    r0 = int128_sub(r0, m);
+
+    *r = r0;
+    return int128_or(int128_lshift(q1, 64), q0);
+}
+
+/*
+ * Unsigned 256-by-128 division.
+ * Returns the remainder.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
+ */
+Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor)
+{
+    Int128 dhi = *phigh;
+    Int128 dlo = *plow;
+    Int128 rem, dhighest;
+    int sh;
+
+    if (!int128_nz(divisor) || !int128_nz(dhi)) {
+        *plow  = int128_divu(dlo, divisor);
+        *phigh = int128_zero();
+        return int128_remu(dlo, divisor);
+    } else {
+        sh = clz128(divisor);
+
+        if (int128_ult(dhi, divisor)) {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor = int128_lshift(divisor, sh);
+                dhi = int128_or(int128_lshift(dhi, sh),
+                                int128_urshift(dlo, (128 - sh)));
+                dlo = int128_lshift(dlo, sh);
+            }
+
+            *phigh = int128_zero();
+            *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor);
+        } else {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor = int128_lshift(divisor, sh);
+                dhighest = int128_rshift(dhi, (128 - sh));
+                dhi = int128_or(int128_lshift(dhi, sh),
+                                int128_urshift(dlo, (128 - sh)));
+                dlo = int128_lshift(dlo, sh);
+
+                *phigh = udiv256_qrnnd(&dhi, dhighest, dhi, divisor);
+            } else {
+                /*
+                 * dhi >= divisor
+                 * Since the MSB of divisor is set (sh == 0),
+                 * (dhi - divisor) < divisor
+                 *
+                 * Thus, the high part of the quotient is 1, and we can
+                 * calculate the low part with a single call to udiv_qrnnd
+                 * after subtracting divisor from dhi
+                 */
+                dhi = int128_sub(dhi, divisor);
+                *phigh = int128_one();
+            }
+
+            *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor);
+        }
+
+        /*
+         * since the dividend/divisor might have been normalized,
+         * the remainder might also have to be shifted back
+         */
+        rem = int128_urshift(rem, sh);
+        return rem;
+    }
+}
+
+/*
+ * Signed 256-by-128 division.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
+ */
+Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor)
+{
+    bool neg_quotient = false, neg_remainder = false;
+    Int128 unsig_hi = *phigh, unsig_lo = *plow;
+    Int128 rem;
+
+    if (!int128_nonneg(*phigh)) {
+        neg_quotient = !neg_quotient;
+        neg_remainder = !neg_remainder;
+
+        if (!int128_nz(unsig_lo)) {
+            unsig_hi = int128_neg(unsig_hi);
+        } else {
+            unsig_hi = int128_not(unsig_hi);
+            unsig_lo = int128_neg(unsig_lo);
+        }
+    }
+
+    if (!int128_nonneg(divisor)) {
+        neg_quotient = !neg_quotient;
+
+        divisor = int128_neg(divisor);
+    }
+
+    rem = divu256(&unsig_lo, &unsig_hi, divisor);
+
+    if (neg_quotient) {
+        if (!int128_nz(unsig_lo)) {
+            *phigh = int128_neg(unsig_hi);
+            *plow = int128_zero();
+        } else {
+            *phigh = int128_not(unsig_hi);
+            *plow = int128_neg(unsig_lo);
+        }
+    } else {
+        *phigh = unsig_hi;
+        *plow = unsig_lo;
+    }
+
+    if (neg_remainder) {
+        return int128_neg(rem);
+    } else {
+        return rem;
+    }
+}
index 5addb4460ea08d42a0c5784680d991b8fefb0cc7..ded41c5025e4a0a1c3422469ff723b91d8c210c2 100644 (file)
--- a/util/id.c
+++ b/util/id.c
@@ -35,6 +35,7 @@ static const char *const id_subsys_str[ID_MAX] = {
     [ID_QDEV]  = "qdev",
     [ID_BLOCK] = "block",
     [ID_CHR] = "chr",
+    [ID_NET] = "net",
 };
 
 /*
diff --git a/util/int128.c b/util/int128.c
new file mode 100644 (file)
index 0000000..df6c633
--- /dev/null
@@ -0,0 +1,189 @@
+/*
+ * 128-bit division and remainder for compilers not supporting __int128
+ *
+ * Copyright (c) 2021 Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "qemu/int128.h"
+
+#ifndef CONFIG_INT128
+
+/*
+ * Division and remainder algorithms for 128-bit due to Stefan Kanthak,
+ * https://skanthak.homepage.t-online.de/integer.html#udivmodti4
+ * Preconditions:
+ *     - function should never be called with v equals to 0, it has to
+ *       be dealt with beforehand
+ *     - quotien pointer must be valid
+ */
+static Int128 divrem128(Int128 u, Int128 v, Int128 *q)
+{
+    Int128 qq;
+    uint64_t hi, lo, tmp;
+    int s = clz64(v.hi);
+
+    if (s == 64) {
+        /* we have uu÷0v => let's use divu128 */
+        hi = u.hi;
+        lo = u.lo;
+        tmp = divu128(&lo, &hi, v.lo);
+        *q = int128_make128(lo, hi);
+        return int128_make128(tmp, 0);
+    } else {
+        hi = int128_gethi(int128_lshift(v, s));
+
+        if (hi > u.hi) {
+            lo = u.lo;
+            tmp = u.hi;
+            divu128(&lo, &tmp, hi);
+            lo = int128_gethi(int128_lshift(int128_make128(lo, 0), s));
+        } else { /* prevent overflow */
+            lo = u.lo;
+            tmp = u.hi - hi;
+            divu128(&lo, &tmp, hi);
+            lo = int128_gethi(int128_lshift(int128_make128(lo, 1), s));
+        }
+
+        qq = int128_make64(lo);
+
+        tmp = lo * v.hi;
+        mulu64(&lo, &hi, lo, v.lo);
+        hi += tmp;
+
+        if (hi < tmp     /* quotient * divisor >= 2**128 > dividend */
+            || hi > u.hi /* quotient * divisor > dividend */
+            || (hi == u.hi && lo > u.lo)) {
+            qq.lo -= 1;
+            mulu64(&lo, &hi, qq.lo, v.lo);
+            hi += qq.lo * v.hi;
+        }
+
+        *q = qq;
+        u.hi -= hi + (u.lo < lo);
+        u.lo -= lo;
+        return u;
+    }
+}
+
+Int128 int128_divu(Int128 a, Int128 b)
+{
+    Int128 q;
+    divrem128(a, b, &q);
+    return q;
+}
+
+Int128 int128_remu(Int128 a, Int128 b)
+{
+    Int128 q;
+    return divrem128(a, b, &q);
+}
+
+Int128 int128_divs(Int128 a, Int128 b)
+{
+    Int128 q;
+    bool sgna = !int128_nonneg(a);
+    bool sgnb = !int128_nonneg(b);
+
+    if (sgna) {
+        a = int128_neg(a);
+    }
+
+    if (sgnb) {
+        b = int128_neg(b);
+    }
+
+    divrem128(a, b, &q);
+
+    if (sgna != sgnb) {
+        q = int128_neg(q);
+    }
+
+    return q;
+}
+
+Int128 int128_rems(Int128 a, Int128 b)
+{
+    Int128 q, r;
+    bool sgna = !int128_nonneg(a);
+    bool sgnb = !int128_nonneg(b);
+
+    if (sgna) {
+        a = int128_neg(a);
+    }
+
+    if (sgnb) {
+        b = int128_neg(b);
+    }
+
+    r = divrem128(a, b, &q);
+
+    if (sgna) {
+        r = int128_neg(r);
+    }
+
+    return r;
+}
+
+#elif defined(CONFIG_TCG_INTERPRETER)
+
+Int128 int128_divu(Int128 a_s, Int128 b_s)
+{
+    Int128Alias r, a, b;
+
+    a.s = a_s;
+    b.s = b_s;
+    r.u = a.u / b.u;
+    return r.s;
+}
+
+Int128 int128_remu(Int128 a_s, Int128 b_s)
+{
+    Int128Alias r, a, b;
+
+    a.s = a_s;
+    b.s = b_s;
+    r.u = a.u % b.u;
+    return r.s;
+}
+
+Int128 int128_divs(Int128 a_s, Int128 b_s)
+{
+    Int128Alias r, a, b;
+
+    a.s = a_s;
+    b.s = b_s;
+    r.i = a.i / b.i;
+    return r.s;
+}
+
+Int128 int128_rems(Int128 a_s, Int128 b_s)
+{
+    Int128Alias r, a, b;
+
+    a.s = a_s;
+    b.s = b_s;
+    r.i = a.i % b.i;
+    return r.s;
+}
+
+#endif
diff --git a/util/interval-tree.c b/util/interval-tree.c
new file mode 100644 (file)
index 0000000..5346518
--- /dev/null
@@ -0,0 +1,899 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "qemu/osdep.h"
+#include "qemu/interval-tree.h"
+#include "qemu/atomic.h"
+
+/*
+ * Red Black Trees.
+ *
+ * For now, don't expose Linux Red-Black Trees separately, but retain the
+ * separate type definitions to keep the implementation sane, and allow
+ * the possibility of separating them later.
+ *
+ * Derived from include/linux/rbtree_augmented.h and its dependencies.
+ */
+
+/*
+ * red-black trees properties:  https://en.wikipedia.org/wiki/Rbtree
+ *
+ *  1) A node is either red or black
+ *  2) The root is black
+ *  3) All leaves (NULL) are black
+ *  4) Both children of every red node are black
+ *  5) Every simple path from root to leaves contains the same number
+ *     of black nodes.
+ *
+ *  4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
+ *  consecutive red nodes in a path and every red node is therefore followed by
+ *  a black. So if B is the number of black nodes on every simple path (as per
+ *  5), then the longest possible path due to 4 is 2B.
+ *
+ *  We shall indicate color with case, where black nodes are uppercase and red
+ *  nodes will be lowercase. Unknown color nodes shall be drawn as red within
+ *  parentheses and have some accompanying text comment.
+ *
+ * Notes on lockless lookups:
+ *
+ * All stores to the tree structure (rb_left and rb_right) must be done using
+ * WRITE_ONCE [qatomic_set for QEMU]. And we must not inadvertently cause
+ * (temporary) loops in the tree structure as seen in program order.
+ *
+ * These two requirements will allow lockless iteration of the tree -- not
+ * correct iteration mind you, tree rotations are not atomic so a lookup might
+ * miss entire subtrees.
+ *
+ * But they do guarantee that any such traversal will only see valid elements
+ * and that it will indeed complete -- does not get stuck in a loop.
+ *
+ * It also guarantees that if the lookup returns an element it is the 'correct'
+ * one. But not returning an element does _NOT_ mean it's not present.
+ */
+
+typedef enum RBColor
+{
+    RB_RED,
+    RB_BLACK,
+} RBColor;
+
+typedef struct RBAugmentCallbacks {
+    void (*propagate)(RBNode *node, RBNode *stop);
+    void (*copy)(RBNode *old, RBNode *new);
+    void (*rotate)(RBNode *old, RBNode *new);
+} RBAugmentCallbacks;
+
+static inline uintptr_t rb_pc(const RBNode *n)
+{
+    return qatomic_read(&n->rb_parent_color);
+}
+
+static inline void rb_set_pc(RBNode *n, uintptr_t pc)
+{
+    qatomic_set(&n->rb_parent_color, pc);
+}
+
+static inline RBNode *pc_parent(uintptr_t pc)
+{
+    return (RBNode *)(pc & ~1);
+}
+
+static inline RBNode *rb_parent(const RBNode *n)
+{
+    return pc_parent(rb_pc(n));
+}
+
+static inline RBNode *rb_red_parent(const RBNode *n)
+{
+    return (RBNode *)rb_pc(n);
+}
+
+static inline RBColor pc_color(uintptr_t pc)
+{
+    return (RBColor)(pc & 1);
+}
+
+static inline bool pc_is_red(uintptr_t pc)
+{
+    return pc_color(pc) == RB_RED;
+}
+
+static inline bool pc_is_black(uintptr_t pc)
+{
+    return !pc_is_red(pc);
+}
+
+static inline RBColor rb_color(const RBNode *n)
+{
+    return pc_color(rb_pc(n));
+}
+
+static inline bool rb_is_red(const RBNode *n)
+{
+    return pc_is_red(rb_pc(n));
+}
+
+static inline bool rb_is_black(const RBNode *n)
+{
+    return pc_is_black(rb_pc(n));
+}
+
+static inline void rb_set_black(RBNode *n)
+{
+    rb_set_pc(n, rb_pc(n) | RB_BLACK);
+}
+
+static inline void rb_set_parent_color(RBNode *n, RBNode *p, RBColor color)
+{
+    rb_set_pc(n, (uintptr_t)p | color);
+}
+
+static inline void rb_set_parent(RBNode *n, RBNode *p)
+{
+    rb_set_parent_color(n, p, rb_color(n));
+}
+
+static inline void rb_link_node(RBNode *node, RBNode *parent, RBNode **rb_link)
+{
+    node->rb_parent_color = (uintptr_t)parent;
+    node->rb_left = node->rb_right = NULL;
+
+    /*
+     * Ensure that node is initialized before insertion,
+     * as viewed by a concurrent search.
+     */
+    qatomic_set_mb(rb_link, node);
+}
+
+static RBNode *rb_next(RBNode *node)
+{
+    RBNode *parent;
+
+    /* OMIT: if empty node, return null. */
+
+    /*
+     * If we have a right-hand child, go down and then left as far as we can.
+     */
+    if (node->rb_right) {
+        node = node->rb_right;
+        while (node->rb_left) {
+            node = node->rb_left;
+        }
+        return node;
+    }
+
+    /*
+     * No right-hand children. Everything down and left is smaller than us,
+     * so any 'next' node must be in the general direction of our parent.
+     * Go up the tree; any time the ancestor is a right-hand child of its
+     * parent, keep going up. First time it's a left-hand child of its
+     * parent, said parent is our 'next' node.
+     */
+    while ((parent = rb_parent(node)) && node == parent->rb_right) {
+        node = parent;
+    }
+
+    return parent;
+}
+
+static inline void rb_change_child(RBNode *old, RBNode *new,
+                                   RBNode *parent, RBRoot *root)
+{
+    if (!parent) {
+        qatomic_set(&root->rb_node, new);
+    } else if (parent->rb_left == old) {
+        qatomic_set(&parent->rb_left, new);
+    } else {
+        qatomic_set(&parent->rb_right, new);
+    }
+}
+
+static inline void rb_rotate_set_parents(RBNode *old, RBNode *new,
+                                         RBRoot *root, RBColor color)
+{
+    uintptr_t pc = rb_pc(old);
+    RBNode *parent = pc_parent(pc);
+
+    rb_set_pc(new, pc);
+    rb_set_parent_color(old, new, color);
+    rb_change_child(old, new, parent, root);
+}
+
+static void rb_insert_augmented(RBNode *node, RBRoot *root,
+                                const RBAugmentCallbacks *augment)
+{
+    RBNode *parent = rb_red_parent(node), *gparent, *tmp;
+
+    while (true) {
+        /*
+         * Loop invariant: node is red.
+         */
+        if (unlikely(!parent)) {
+            /*
+             * The inserted node is root. Either this is the first node, or
+             * we recursed at Case 1 below and are no longer violating 4).
+             */
+            rb_set_parent_color(node, NULL, RB_BLACK);
+            break;
+        }
+
+        /*
+         * If there is a black parent, we are done.  Otherwise, take some
+         * corrective action as, per 4), we don't want a red root or two
+         * consecutive red nodes.
+         */
+        if (rb_is_black(parent)) {
+            break;
+        }
+
+        gparent = rb_red_parent(parent);
+
+        tmp = gparent->rb_right;
+        if (parent != tmp) {    /* parent == gparent->rb_left */
+            if (tmp && rb_is_red(tmp)) {
+                /*
+                 * Case 1 - node's uncle is red (color flips).
+                 *
+                 *       G            g
+                 *      / \          / \
+                 *     p   u  -->   P   U
+                 *    /            /
+                 *   n            n
+                 *
+                 * However, since g's parent might be red, and 4) does not
+                 * allow this, we need to recurse at g.
+                 */
+                rb_set_parent_color(tmp, gparent, RB_BLACK);
+                rb_set_parent_color(parent, gparent, RB_BLACK);
+                node = gparent;
+                parent = rb_parent(node);
+                rb_set_parent_color(node, parent, RB_RED);
+                continue;
+            }
+
+            tmp = parent->rb_right;
+            if (node == tmp) {
+                /*
+                 * Case 2 - node's uncle is black and node is
+                 * the parent's right child (left rotate at parent).
+                 *
+                 *      G             G
+                 *     / \           / \
+                 *    p   U  -->    n   U
+                 *     \           /
+                 *      n         p
+                 *
+                 * This still leaves us in violation of 4), the
+                 * continuation into Case 3 will fix that.
+                 */
+                tmp = node->rb_left;
+                qatomic_set(&parent->rb_right, tmp);
+                qatomic_set(&node->rb_left, parent);
+                if (tmp) {
+                    rb_set_parent_color(tmp, parent, RB_BLACK);
+                }
+                rb_set_parent_color(parent, node, RB_RED);
+                augment->rotate(parent, node);
+                parent = node;
+                tmp = node->rb_right;
+            }
+
+            /*
+             * Case 3 - node's uncle is black and node is
+             * the parent's left child (right rotate at gparent).
+             *
+             *        G           P
+             *       / \         / \
+             *      p   U  -->  n   g
+             *     /                 \
+             *    n                   U
+             */
+            qatomic_set(&gparent->rb_left, tmp); /* == parent->rb_right */
+            qatomic_set(&parent->rb_right, gparent);
+            if (tmp) {
+                rb_set_parent_color(tmp, gparent, RB_BLACK);
+            }
+            rb_rotate_set_parents(gparent, parent, root, RB_RED);
+            augment->rotate(gparent, parent);
+            break;
+        } else {
+            tmp = gparent->rb_left;
+            if (tmp && rb_is_red(tmp)) {
+                /* Case 1 - color flips */
+                rb_set_parent_color(tmp, gparent, RB_BLACK);
+                rb_set_parent_color(parent, gparent, RB_BLACK);
+                node = gparent;
+                parent = rb_parent(node);
+                rb_set_parent_color(node, parent, RB_RED);
+                continue;
+            }
+
+            tmp = parent->rb_left;
+            if (node == tmp) {
+                /* Case 2 - right rotate at parent */
+                tmp = node->rb_right;
+                qatomic_set(&parent->rb_left, tmp);
+                qatomic_set(&node->rb_right, parent);
+                if (tmp) {
+                    rb_set_parent_color(tmp, parent, RB_BLACK);
+                }
+                rb_set_parent_color(parent, node, RB_RED);
+                augment->rotate(parent, node);
+                parent = node;
+                tmp = node->rb_left;
+            }
+
+            /* Case 3 - left rotate at gparent */
+            qatomic_set(&gparent->rb_right, tmp); /* == parent->rb_left */
+            qatomic_set(&parent->rb_left, gparent);
+            if (tmp) {
+                rb_set_parent_color(tmp, gparent, RB_BLACK);
+            }
+            rb_rotate_set_parents(gparent, parent, root, RB_RED);
+            augment->rotate(gparent, parent);
+            break;
+        }
+    }
+}
+
+static void rb_insert_augmented_cached(RBNode *node,
+                                       RBRootLeftCached *root, bool newleft,
+                                       const RBAugmentCallbacks *augment)
+{
+    if (newleft) {
+        root->rb_leftmost = node;
+    }
+    rb_insert_augmented(node, &root->rb_root, augment);
+}
+
+static void rb_erase_color(RBNode *parent, RBRoot *root,
+                           const RBAugmentCallbacks *augment)
+{
+    RBNode *node = NULL, *sibling, *tmp1, *tmp2;
+
+    while (true) {
+        /*
+         * Loop invariants:
+         * - node is black (or NULL on first iteration)
+         * - node is not the root (parent is not NULL)
+         * - All leaf paths going through parent and node have a
+         *   black node count that is 1 lower than other leaf paths.
+         */
+        sibling = parent->rb_right;
+        if (node != sibling) {  /* node == parent->rb_left */
+            if (rb_is_red(sibling)) {
+                /*
+                 * Case 1 - left rotate at parent
+                 *
+                 *     P               S
+                 *    / \             / \ 
+                 *   N   s    -->    p   Sr
+                 *      / \         / \ 
+                 *     Sl  Sr      N   Sl
+                 */
+                tmp1 = sibling->rb_left;
+                qatomic_set(&parent->rb_right, tmp1);
+                qatomic_set(&sibling->rb_left, parent);
+                rb_set_parent_color(tmp1, parent, RB_BLACK);
+                rb_rotate_set_parents(parent, sibling, root, RB_RED);
+                augment->rotate(parent, sibling);
+                sibling = tmp1;
+            }
+            tmp1 = sibling->rb_right;
+            if (!tmp1 || rb_is_black(tmp1)) {
+                tmp2 = sibling->rb_left;
+                if (!tmp2 || rb_is_black(tmp2)) {
+                    /*
+                     * Case 2 - sibling color flip
+                     * (p could be either color here)
+                     *
+                     *    (p)           (p)
+                     *    / \           / \ 
+                     *   N   S    -->  N   s
+                     *      / \           / \ 
+                     *     Sl  Sr        Sl  Sr
+                     *
+                     * This leaves us violating 5) which
+                     * can be fixed by flipping p to black
+                     * if it was red, or by recursing at p.
+                     * p is red when coming from Case 1.
+                     */
+                    rb_set_parent_color(sibling, parent, RB_RED);
+                    if (rb_is_red(parent)) {
+                        rb_set_black(parent);
+                    } else {
+                        node = parent;
+                        parent = rb_parent(node);
+                        if (parent) {
+                            continue;
+                        }
+                    }
+                    break;
+                }
+                /*
+                 * Case 3 - right rotate at sibling
+                 * (p could be either color here)
+                 *
+                 *   (p)           (p)
+                 *   / \           / \
+                 *  N   S    -->  N   sl
+                 *     / \             \
+                 *    sl  Sr            S
+                 *                       \
+                 *                        Sr
+                 *
+                 * Note: p might be red, and then bot
+                 * p and sl are red after rotation (which
+                 * breaks property 4). This is fixed in
+                 * Case 4 (in rb_rotate_set_parents()
+                 *         which set sl the color of p
+                 *         and set p RB_BLACK)
+                 *
+                 *   (p)            (sl)
+                 *   / \            /  \
+                 *  N   sl   -->   P    S
+                 *       \        /      \
+                 *        S      N        Sr
+                 *         \
+                 *          Sr
+                 */
+                tmp1 = tmp2->rb_right;
+                qatomic_set(&sibling->rb_left, tmp1);
+                qatomic_set(&tmp2->rb_right, sibling);
+                qatomic_set(&parent->rb_right, tmp2);
+                if (tmp1) {
+                    rb_set_parent_color(tmp1, sibling, RB_BLACK);
+                }
+                augment->rotate(sibling, tmp2);
+                tmp1 = sibling;
+                sibling = tmp2;
+            }
+            /*
+             * Case 4 - left rotate at parent + color flips
+             * (p and sl could be either color here.
+             *  After rotation, p becomes black, s acquires
+             *  p's color, and sl keeps its color)
+             *
+             *      (p)             (s)
+             *      / \             / \
+             *     N   S     -->   P   Sr
+             *        / \         / \
+             *      (sl) sr      N  (sl)
+             */
+            tmp2 = sibling->rb_left;
+            qatomic_set(&parent->rb_right, tmp2);
+            qatomic_set(&sibling->rb_left, parent);
+            rb_set_parent_color(tmp1, sibling, RB_BLACK);
+            if (tmp2) {
+                rb_set_parent(tmp2, parent);
+            }
+            rb_rotate_set_parents(parent, sibling, root, RB_BLACK);
+            augment->rotate(parent, sibling);
+            break;
+        } else {
+            sibling = parent->rb_left;
+            if (rb_is_red(sibling)) {
+                /* Case 1 - right rotate at parent */
+                tmp1 = sibling->rb_right;
+                qatomic_set(&parent->rb_left, tmp1);
+                qatomic_set(&sibling->rb_right, parent);
+                rb_set_parent_color(tmp1, parent, RB_BLACK);
+                rb_rotate_set_parents(parent, sibling, root, RB_RED);
+                augment->rotate(parent, sibling);
+                sibling = tmp1;
+            }
+            tmp1 = sibling->rb_left;
+            if (!tmp1 || rb_is_black(tmp1)) {
+                tmp2 = sibling->rb_right;
+                if (!tmp2 || rb_is_black(tmp2)) {
+                    /* Case 2 - sibling color flip */
+                    rb_set_parent_color(sibling, parent, RB_RED);
+                    if (rb_is_red(parent)) {
+                        rb_set_black(parent);
+                    } else {
+                        node = parent;
+                        parent = rb_parent(node);
+                        if (parent) {
+                            continue;
+                        }
+                    }
+                    break;
+                }
+                /* Case 3 - left rotate at sibling */
+                tmp1 = tmp2->rb_left;
+                qatomic_set(&sibling->rb_right, tmp1);
+                qatomic_set(&tmp2->rb_left, sibling);
+                qatomic_set(&parent->rb_left, tmp2);
+                if (tmp1) {
+                    rb_set_parent_color(tmp1, sibling, RB_BLACK);
+                }
+                augment->rotate(sibling, tmp2);
+                tmp1 = sibling;
+                sibling = tmp2;
+            }
+            /* Case 4 - right rotate at parent + color flips */
+            tmp2 = sibling->rb_right;
+            qatomic_set(&parent->rb_left, tmp2);
+            qatomic_set(&sibling->rb_right, parent);
+            rb_set_parent_color(tmp1, sibling, RB_BLACK);
+            if (tmp2) {
+                rb_set_parent(tmp2, parent);
+            }
+            rb_rotate_set_parents(parent, sibling, root, RB_BLACK);
+            augment->rotate(parent, sibling);
+            break;
+        }
+    }
+}
+
+static void rb_erase_augmented(RBNode *node, RBRoot *root,
+                               const RBAugmentCallbacks *augment)
+{
+    RBNode *child = node->rb_right;
+    RBNode *tmp = node->rb_left;
+    RBNode *parent, *rebalance;
+    uintptr_t pc;
+
+    if (!tmp) {
+        /*
+         * Case 1: node to erase has no more than 1 child (easy!)
+         *
+         * Note that if there is one child it must be red due to 5)
+         * and node must be black due to 4). We adjust colors locally
+         * so as to bypass rb_erase_color() later on.
+         */
+        pc = rb_pc(node);
+        parent = pc_parent(pc);
+        rb_change_child(node, child, parent, root);
+        if (child) {
+            rb_set_pc(child, pc);
+            rebalance = NULL;
+        } else {
+            rebalance = pc_is_black(pc) ? parent : NULL;
+        }
+        tmp = parent;
+    } else if (!child) {
+        /* Still case 1, but this time the child is node->rb_left */
+        pc = rb_pc(node);
+        parent = pc_parent(pc);
+        rb_set_pc(tmp, pc);
+        rb_change_child(node, tmp, parent, root);
+        rebalance = NULL;
+        tmp = parent;
+    } else {
+        RBNode *successor = child, *child2;
+        tmp = child->rb_left;
+        if (!tmp) {
+            /*
+             * Case 2: node's successor is its right child
+             *
+             *    (n)          (s)
+             *    / \          / \
+             *  (x) (s)  ->  (x) (c)
+             *        \
+             *        (c)
+             */
+            parent = successor;
+            child2 = successor->rb_right;
+
+            augment->copy(node, successor);
+        } else {
+            /*
+             * Case 3: node's successor is leftmost under
+             * node's right child subtree
+             *
+             *    (n)          (s)
+             *    / \          / \
+             *  (x) (y)  ->  (x) (y)
+             *      /            /
+             *    (p)          (p)
+             *    /            /
+             *  (s)          (c)
+             *    \
+             *    (c)
+             */
+            do {
+                parent = successor;
+                successor = tmp;
+                tmp = tmp->rb_left;
+            } while (tmp);
+            child2 = successor->rb_right;
+            qatomic_set(&parent->rb_left, child2);
+            qatomic_set(&successor->rb_right, child);
+            rb_set_parent(child, successor);
+
+            augment->copy(node, successor);
+            augment->propagate(parent, successor);
+        }
+
+        tmp = node->rb_left;
+        qatomic_set(&successor->rb_left, tmp);
+        rb_set_parent(tmp, successor);
+
+        pc = rb_pc(node);
+        tmp = pc_parent(pc);
+        rb_change_child(node, successor, tmp, root);
+
+        if (child2) {
+            rb_set_parent_color(child2, parent, RB_BLACK);
+            rebalance = NULL;
+        } else {
+            rebalance = rb_is_black(successor) ? parent : NULL;
+        }
+        rb_set_pc(successor, pc);
+        tmp = successor;
+    }
+
+    augment->propagate(tmp, NULL);
+
+    if (rebalance) {
+        rb_erase_color(rebalance, root, augment);
+    }
+}
+
+static void rb_erase_augmented_cached(RBNode *node, RBRootLeftCached *root,
+                                      const RBAugmentCallbacks *augment)
+{
+    if (root->rb_leftmost == node) {
+        root->rb_leftmost = rb_next(node);
+    }
+    rb_erase_augmented(node, &root->rb_root, augment);
+}
+
+
+/*
+ * Interval trees.
+ *
+ * Derived from lib/interval_tree.c and its dependencies,
+ * especially include/linux/interval_tree_generic.h.
+ */
+
+#define rb_to_itree(N)  container_of(N, IntervalTreeNode, rb)
+
+static bool interval_tree_compute_max(IntervalTreeNode *node, bool exit)
+{
+    IntervalTreeNode *child;
+    uint64_t max = node->last;
+
+    if (node->rb.rb_left) {
+        child = rb_to_itree(node->rb.rb_left);
+        if (child->subtree_last > max) {
+            max = child->subtree_last;
+        }
+    }
+    if (node->rb.rb_right) {
+        child = rb_to_itree(node->rb.rb_right);
+        if (child->subtree_last > max) {
+            max = child->subtree_last;
+        }
+    }
+    if (exit && node->subtree_last == max) {
+        return true;
+    }
+    node->subtree_last = max;
+    return false;
+}
+
+static void interval_tree_propagate(RBNode *rb, RBNode *stop)
+{
+    while (rb != stop) {
+        IntervalTreeNode *node = rb_to_itree(rb);
+        if (interval_tree_compute_max(node, true)) {
+            break;
+        }
+        rb = rb_parent(&node->rb);
+    }
+}
+
+static void interval_tree_copy(RBNode *rb_old, RBNode *rb_new)
+{
+    IntervalTreeNode *old = rb_to_itree(rb_old);
+    IntervalTreeNode *new = rb_to_itree(rb_new);
+
+    new->subtree_last = old->subtree_last;
+}
+
+static void interval_tree_rotate(RBNode *rb_old, RBNode *rb_new)
+{
+    IntervalTreeNode *old = rb_to_itree(rb_old);
+    IntervalTreeNode *new = rb_to_itree(rb_new);
+
+    new->subtree_last = old->subtree_last;
+    interval_tree_compute_max(old, false);
+}
+
+static const RBAugmentCallbacks interval_tree_augment = {
+    .propagate = interval_tree_propagate,
+    .copy = interval_tree_copy,
+    .rotate = interval_tree_rotate,
+};
+
+/* Insert / remove interval nodes from the tree */
+void interval_tree_insert(IntervalTreeNode *node, IntervalTreeRoot *root)
+{
+    RBNode **link = &root->rb_root.rb_node, *rb_parent = NULL;
+    uint64_t start = node->start, last = node->last;
+    IntervalTreeNode *parent;
+    bool leftmost = true;
+
+    while (*link) {
+        rb_parent = *link;
+        parent = rb_to_itree(rb_parent);
+
+        if (parent->subtree_last < last) {
+            parent->subtree_last = last;
+        }
+        if (start < parent->start) {
+            link = &parent->rb.rb_left;
+        } else {
+            link = &parent->rb.rb_right;
+            leftmost = false;
+        }
+    }
+
+    node->subtree_last = last;
+    rb_link_node(&node->rb, rb_parent, link);
+    rb_insert_augmented_cached(&node->rb, root, leftmost,
+                               &interval_tree_augment);
+}
+
+void interval_tree_remove(IntervalTreeNode *node, IntervalTreeRoot *root)
+{
+    rb_erase_augmented_cached(&node->rb, root, &interval_tree_augment);
+}
+
+/*
+ * Iterate over intervals intersecting [start;last]
+ *
+ * Note that a node's interval intersects [start;last] iff:
+ *   Cond1: node->start <= last
+ * and
+ *   Cond2: start <= node->last
+ */
+
+static IntervalTreeNode *interval_tree_subtree_search(IntervalTreeNode *node,
+                                                      uint64_t start,
+                                                      uint64_t last)
+{
+    while (true) {
+        /*
+         * Loop invariant: start <= node->subtree_last
+         * (Cond2 is satisfied by one of the subtree nodes)
+         */
+        RBNode *tmp = qatomic_read(&node->rb.rb_left);
+        if (tmp) {
+            IntervalTreeNode *left = rb_to_itree(tmp);
+
+            if (start <= left->subtree_last) {
+                /*
+                 * Some nodes in left subtree satisfy Cond2.
+                 * Iterate to find the leftmost such node N.
+                 * If it also satisfies Cond1, that's the
+                 * match we are looking for. Otherwise, there
+                 * is no matching interval as nodes to the
+                 * right of N can't satisfy Cond1 either.
+                 */
+                node = left;
+                continue;
+            }
+        }
+        if (node->start <= last) {         /* Cond1 */
+            if (start <= node->last) {     /* Cond2 */
+                return node; /* node is leftmost match */
+            }
+            tmp = qatomic_read(&node->rb.rb_right);
+            if (tmp) {
+                node = rb_to_itree(tmp);
+                if (start <= node->subtree_last) {
+                    continue;
+                }
+            }
+        }
+        return NULL; /* no match */
+    }
+}
+
+IntervalTreeNode *interval_tree_iter_first(IntervalTreeRoot *root,
+                                           uint64_t start, uint64_t last)
+{
+    IntervalTreeNode *node, *leftmost;
+
+    if (!root || !root->rb_root.rb_node) {
+        return NULL;
+    }
+
+    /*
+     * Fastpath range intersection/overlap between A: [a0, a1] and
+     * B: [b0, b1] is given by:
+     *
+     *         a0 <= b1 && b0 <= a1
+     *
+     *  ... where A holds the lock range and B holds the smallest
+     * 'start' and largest 'last' in the tree. For the later, we
+     * rely on the root node, which by augmented interval tree
+     * property, holds the largest value in its last-in-subtree.
+     * This allows mitigating some of the tree walk overhead for
+     * for non-intersecting ranges, maintained and consulted in O(1).
+     */
+    node = rb_to_itree(root->rb_root.rb_node);
+    if (node->subtree_last < start) {
+        return NULL;
+    }
+
+    leftmost = rb_to_itree(root->rb_leftmost);
+    if (leftmost->start > last) {
+        return NULL;
+    }
+
+    return interval_tree_subtree_search(node, start, last);
+}
+
+IntervalTreeNode *interval_tree_iter_next(IntervalTreeNode *node,
+                                          uint64_t start, uint64_t last)
+{
+    RBNode *rb, *prev;
+
+    rb = qatomic_read(&node->rb.rb_right);
+    while (true) {
+        /*
+         * Loop invariants:
+         *   Cond1: node->start <= last
+         *   rb == node->rb.rb_right
+         *
+         * First, search right subtree if suitable
+         */
+        if (rb) {
+            IntervalTreeNode *right = rb_to_itree(rb);
+
+            if (start <= right->subtree_last) {
+                return interval_tree_subtree_search(right, start, last);
+            }
+        }
+
+        /* Move up the tree until we come from a node's left child */
+        do {
+            rb = rb_parent(&node->rb);
+            if (!rb) {
+                return NULL;
+            }
+            prev = &node->rb;
+            node = rb_to_itree(rb);
+            rb = qatomic_read(&node->rb.rb_right);
+        } while (prev == rb);
+
+        /* Check if the node intersects [start;last] */
+        if (last < node->start) {  /* !Cond1 */
+            return NULL;
+        }
+        if (start <= node->last) { /* Cond2 */
+            return node;
+        }
+    }
+}
+
+/* Occasionally useful for calling from within the debugger. */
+#if 0
+static void debug_interval_tree_int(IntervalTreeNode *node,
+                                    const char *dir, int level)
+{
+    printf("%4d %*s %s [%" PRIu64 ",%" PRIu64 "] subtree_last:%" PRIu64 "\n",
+           level, level + 1, dir, rb_is_red(&node->rb) ? "r" : "b",
+           node->start, node->last, node->subtree_last);
+
+    if (node->rb.rb_left) {
+        debug_interval_tree_int(rb_to_itree(node->rb.rb_left), "<", level + 1);
+    }
+    if (node->rb.rb_right) {
+        debug_interval_tree_int(rb_to_itree(node->rb.rb_right), ">", level + 1);
+    }
+}
+
+void debug_interval_tree(IntervalTreeNode *node);
+void debug_interval_tree(IntervalTreeNode *node)
+{
+    if (node) {
+        debug_interval_tree_int(node, "*", 0);
+    } else {
+        printf("null\n");
+    }
+}
+#endif
index f3a9e92a378f3eb5255b939b14039fe6680b1f52..7e73948f5e3de984d8d5d83066b97b3bdf3a8b24 100644 (file)
@@ -17,7 +17,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "qemu/iov.h"
 #include "qemu/sockets.h"
 #include "qemu/cutils.h"
@@ -112,12 +111,17 @@ do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
     /*XXX Note: windows has WSASend() and WSARecv() */
     unsigned i = 0;
     ssize_t ret = 0;
+    ssize_t off = 0;
     while (i < iov_cnt) {
         ssize_t r = do_send
-            ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0)
-            : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0);
+            ? send(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, 0)
+            : recv(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, 0);
         if (r > 0) {
             ret += r;
+            off += r;
+            if (off < iov[i].iov_len) {
+                continue;
+            }
         } else if (!r) {
             break;
         } else if (errno == EINTR) {
@@ -130,6 +134,7 @@ do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
             }
             break;
         }
+        off = 0;
         i++;
     }
     return ret;
@@ -373,15 +378,15 @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset,
 }
 
 /*
- * qiov_slice
+ * qemu_iovec_slice
  *
  * Find subarray of iovec's, containing requested range. @head would
  * be offset in first iov (returned by the function), @tail would be
  * count of extra bytes in last iovec (returned iov + @niov - 1).
  */
-static struct iovec *qiov_slice(QEMUIOVector *qiov,
-                                size_t offset, size_t len,
-                                size_t *head, size_t *tail, int *niov)
+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
+                               size_t offset, size_t len,
+                               size_t *head, size_t *tail, int *niov)
 {
     struct iovec *iov, *end_iov;
 
@@ -406,61 +411,11 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
     size_t head, tail;
     int niov;
 
-    qiov_slice(qiov, offset, len, &head, &tail, &niov);
+    qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov);
 
     return niov;
 }
 
-/*
- * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
- * and @tail_buf buffer into new qiov.
- */
-void qemu_iovec_init_extended(
-        QEMUIOVector *qiov,
-        void *head_buf, size_t head_len,
-        QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
-        void *tail_buf, size_t tail_len)
-{
-    size_t mid_head, mid_tail;
-    int total_niov, mid_niov = 0;
-    struct iovec *p, *mid_iov = NULL;
-
-    if (mid_len) {
-        mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
-                             &mid_head, &mid_tail, &mid_niov);
-    }
-
-    total_niov = !!head_len + mid_niov + !!tail_len;
-    if (total_niov == 1) {
-        qemu_iovec_init_buf(qiov, NULL, 0);
-        p = &qiov->local_iov;
-    } else {
-        qiov->niov = qiov->nalloc = total_niov;
-        qiov->size = head_len + mid_len + tail_len;
-        p = qiov->iov = g_new(struct iovec, qiov->niov);
-    }
-
-    if (head_len) {
-        p->iov_base = head_buf;
-        p->iov_len = head_len;
-        p++;
-    }
-
-    assert(!mid_niov == !mid_len);
-    if (mid_niov) {
-        memcpy(p, mid_iov, mid_niov * sizeof(*p));
-        p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
-        p[0].iov_len -= mid_head;
-        p[mid_niov - 1].iov_len -= mid_tail;
-        p += mid_niov;
-    }
-
-    if (tail_len) {
-        p->iov_base = tail_buf;
-        p->iov_len = tail_len;
-    }
-}
-
 /*
  * Check if the contents of subrange of qiov data is all zeroes.
  */
@@ -492,7 +447,21 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes)
 void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
                            size_t offset, size_t len)
 {
-    qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0);
+    struct iovec *slice_iov;
+    int slice_niov;
+    size_t slice_head, slice_tail;
+
+    assert(source->size >= len);
+    assert(source->size - len >= offset);
+
+    slice_iov = qemu_iovec_slice(source, offset, len,
+                                 &slice_head, &slice_tail, &slice_niov);
+    if (slice_niov == 1) {
+        qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len);
+    } else {
+        qemu_iovec_init(qiov, slice_niov);
+        qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len);
+    }
 }
 
 void qemu_iovec_destroy(QEMUIOVector *qiov)
@@ -602,7 +571,7 @@ static int sortelem_cmp_src_index(const void *a, const void *b)
  */
 void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf)
 {
-    IOVectorSortElem sortelems[src->niov];
+    g_autofree IOVectorSortElem *sortelems = g_new(IOVectorSortElem, src->niov);
     void *last_end;
     int i;
 
index 7990692cbd33f12ce77ad175ca05637b3196199d..536789797e472fb947ec99a2d32c9909d3c21eed 100644 (file)
@@ -16,6 +16,45 @@ struct IOVATree {
     GTree *tree;
 };
 
+/* Args to pass to iova_tree_alloc foreach function. */
+struct IOVATreeAllocArgs {
+    /* Size of the desired allocation */
+    size_t new_size;
+
+    /* The minimum address allowed in the allocation */
+    hwaddr iova_begin;
+
+    /* Map at the left of the hole, can be NULL if "this" is first one */
+    const DMAMap *prev;
+
+    /* Map at the right of the hole, can be NULL if "prev" is the last one */
+    const DMAMap *this;
+
+    /* If found, we fill in the IOVA here */
+    hwaddr iova_result;
+
+    /* Whether have we found a valid IOVA */
+    bool iova_found;
+};
+
+typedef struct IOVATreeFindIOVAArgs {
+    const DMAMap *needle;
+    const DMAMap *result;
+} IOVATreeFindIOVAArgs;
+
+/**
+ * Iterate args to the next hole
+ *
+ * @args: The alloc arguments
+ * @next: The next mapping in the tree. Can be NULL to signal the last one
+ */
+static void iova_tree_alloc_args_iterate(struct IOVATreeAllocArgs *args,
+                                         const DMAMap *next)
+{
+    args->prev = args->this;
+    args->this = next;
+}
+
 static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
 {
     const DMAMap *m1 = a, *m2 = b;
@@ -42,14 +81,43 @@ IOVATree *iova_tree_new(void)
     return iova_tree;
 }
 
-DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map)
+const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map)
 {
     return g_tree_lookup(tree->tree, map);
 }
 
-DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova)
+static gboolean iova_tree_find_address_iterator(gpointer key, gpointer value,
+                                                gpointer data)
+{
+    const DMAMap *map = key;
+    IOVATreeFindIOVAArgs *args = data;
+    const DMAMap *needle;
+
+    g_assert(key == value);
+
+    needle = args->needle;
+    if (map->translated_addr + map->size < needle->translated_addr ||
+        needle->translated_addr + needle->size < map->translated_addr) {
+        return false;
+    }
+
+    args->result = map;
+    return true;
+}
+
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map)
 {
-    DMAMap map = { .iova = iova, .size = 0 };
+    IOVATreeFindIOVAArgs args = {
+        .needle = map,
+    };
+
+    g_tree_foreach(tree->tree, iova_tree_find_address_iterator, &args);
+    return args.result;
+}
+
+const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova)
+{
+    const DMAMap map = { .iova = iova, .size = 0 };
 
     return iova_tree_find(tree, &map);
 }
@@ -60,7 +128,7 @@ static inline void iova_tree_insert_internal(GTree *gtree, DMAMap *range)
     g_tree_insert(gtree, range, range);
 }
 
-int iova_tree_insert(IOVATree *tree, DMAMap *map)
+int iova_tree_insert(IOVATree *tree, const DMAMap *map)
 {
     DMAMap *new;
 
@@ -96,15 +164,115 @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator)
     g_tree_foreach(tree->tree, iova_tree_traverse, iterator);
 }
 
-int iova_tree_remove(IOVATree *tree, DMAMap *map)
+void iova_tree_remove(IOVATree *tree, DMAMap map)
 {
-    DMAMap *overlap;
+    const DMAMap *overlap;
 
-    while ((overlap = iova_tree_find(tree, map))) {
+    while ((overlap = iova_tree_find(tree, &map))) {
         g_tree_remove(tree->tree, overlap);
     }
+}
 
-    return IOVA_OK;
+/**
+ * Try to find an unallocated IOVA range between prev and this elements.
+ *
+ * @args: Arguments to allocation
+ *
+ * Cases:
+ *
+ * (1) !prev, !this: No entries allocated, always succeed
+ *
+ * (2) !prev, this: We're iterating at the 1st element.
+ *
+ * (3) prev, !this: We're iterating at the last element.
+ *
+ * (4) prev, this: this is the most common case, we'll try to find a hole
+ * between "prev" and "this" mapping.
+ *
+ * Note that this function assumes the last valid iova is HWADDR_MAX, but it
+ * searches linearly so it's easy to discard the result if it's not the case.
+ */
+static void iova_tree_alloc_map_in_hole(struct IOVATreeAllocArgs *args)
+{
+    const DMAMap *prev = args->prev, *this = args->this;
+    uint64_t hole_start, hole_last;
+
+    if (this && this->iova + this->size < args->iova_begin) {
+        return;
+    }
+
+    hole_start = MAX(prev ? prev->iova + prev->size + 1 : 0, args->iova_begin);
+    hole_last = this ? this->iova : HWADDR_MAX;
+
+    if (hole_last - hole_start > args->new_size) {
+        args->iova_result = hole_start;
+        args->iova_found = true;
+    }
+}
+
+/**
+ * Foreach dma node in the tree, compare if there is a hole with its previous
+ * node (or minimum iova address allowed) and the node.
+ *
+ * @key: Node iterating
+ * @value: Node iterating
+ * @pargs: Struct to communicate with the outside world
+ *
+ * Return: false to keep iterating, true if needs break.
+ */
+static gboolean iova_tree_alloc_traverse(gpointer key, gpointer value,
+                                         gpointer pargs)
+{
+    struct IOVATreeAllocArgs *args = pargs;
+    DMAMap *node = value;
+
+    assert(key == value);
+
+    iova_tree_alloc_args_iterate(args, node);
+    iova_tree_alloc_map_in_hole(args);
+    return args->iova_found;
+}
+
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
+                        hwaddr iova_last)
+{
+    struct IOVATreeAllocArgs args = {
+        .new_size = map->size,
+        .iova_begin = iova_begin,
+    };
+
+    if (unlikely(iova_last < iova_begin)) {
+        return IOVA_ERR_INVALID;
+    }
+
+    /*
+     * Find a valid hole for the mapping
+     *
+     * Assuming low iova_begin, so no need to do a binary search to
+     * locate the first node.
+     *
+     * TODO: Replace all this with g_tree_node_first/next/last when available
+     * (from glib since 2.68). To do it with g_tree_foreach complicates the
+     * code a lot.
+     *
+     */
+    g_tree_foreach(tree->tree, iova_tree_alloc_traverse, &args);
+    if (!args.iova_found) {
+        /*
+         * Either tree is empty or the last hole is still not checked.
+         * g_tree_foreach does not compare (last, iova_last] range, so we check
+         * it here.
+         */
+        iova_tree_alloc_args_iterate(&args, NULL);
+        iova_tree_alloc_map_in_hole(&args);
+    }
+
+    if (!args.iova_found || args.iova_result + map->size > iova_last) {
+        return IOVA_ERR_NOMEM;
+    }
+
+    map->iova = args.iova_result;
+    return iova_tree_insert(tree, map);
 }
 
 void iova_tree_destroy(IOVATree *tree)
index 7f625ad33c65a41370f2834e186e6cb43171a7cc..66a5b4740f12d792be99a9a5036c48f8611264ef 100644 (file)
@@ -16,7 +16,9 @@
  *   key-vals     = [ key-val { ',' key-val } [ ',' ] ]
  *   key-val      = key '=' val | help
  *   key          = key-fragment { '.' key-fragment }
- *   key-fragment = / [^=,.]+ /
+ *   key-fragment = qapi-name | index
+ *   qapi-name    = '__' / [a-z0-9.-]+ / '_' / [A-Za-z][A-Za-z0-9_-]* /
+ *   index        = / [0-9]+ /
  *   val          = { / [^,]+ / | ',,' }
  *   help         = 'help' | '?'
  *
@@ -93,8 +95,8 @@
 #include "qapi/qmp/qlist.h"
 #include "qapi/qmp/qstring.h"
 #include "qemu/cutils.h"
+#include "qemu/keyval.h"
 #include "qemu/help_option.h"
-#include "qemu/option.h"
 
 /*
  * Convert @key to a list index.
@@ -189,7 +191,7 @@ static const char *keyval_parse_one(QDict *qdict, const char *params,
     QDict *cur;
     int ret;
     QObject *next;
-    QString *val;
+    GString *val;
 
     key = params;
     val_end = NULL;
@@ -263,7 +265,7 @@ static const char *keyval_parse_one(QDict *qdict, const char *params,
 
     if (key == implied_key) {
         assert(!*s);
-        val = qstring_from_substr(params, 0, val_end - params);
+        val = g_string_new_len(params, val_end - params);
         s = val_end;
         if (*s == ',') {
             s++;
@@ -276,7 +278,7 @@ static const char *keyval_parse_one(QDict *qdict, const char *params,
         }
         s++;
 
-        val = qstring_new();
+        val = g_string_new(NULL);
         for (;;) {
             if (!*s) {
                 break;
@@ -286,11 +288,12 @@ static const char *keyval_parse_one(QDict *qdict, const char *params,
                     break;
                 }
             }
-            qstring_append_chr(val, *s++);
+            g_string_append_c(val, *s++);
         }
     }
 
-    if (!keyval_parse_put(cur, key_in_cur, val, key, key_end, errp)) {
+    if (!keyval_parse_put(cur, key_in_cur, qstring_from_gstring(val),
+                          key, key_end, errp)) {
         return NULL;
     }
     return s;
@@ -309,6 +312,86 @@ static char *reassemble_key(GSList *key)
     return g_string_free(s, FALSE);
 }
 
+/*
+ * Recursive worker for keyval_merge.
+ *
+ * @str is the path that led to the * current dictionary (to be used for
+ * error messages).  It is modified internally but restored before the
+ * function returns.
+ */
+static void keyval_do_merge(QDict *dest, const QDict *merged, GString *str, Error **errp)
+{
+    size_t save_len = str->len;
+    const QDictEntry *ent;
+    QObject *old_value;
+
+    for (ent = qdict_first(merged); ent; ent = qdict_next(merged, ent)) {
+        old_value = qdict_get(dest, ent->key);
+        if (old_value) {
+            if (qobject_type(old_value) != qobject_type(ent->value)) {
+                error_setg(errp, "Parameter '%s%s' used inconsistently",
+                           str->str, ent->key);
+                return;
+            } else if (qobject_type(ent->value) == QTYPE_QDICT) {
+                /* Merge sub-dictionaries.  */
+                g_string_append(str, ent->key);
+                g_string_append_c(str, '.');
+                keyval_do_merge(qobject_to(QDict, old_value),
+                                qobject_to(QDict, ent->value),
+                                str, errp);
+                g_string_truncate(str, save_len);
+                continue;
+            } else if (qobject_type(ent->value) == QTYPE_QLIST) {
+                /* Append to old list.  */
+                QList *old = qobject_to(QList, old_value);
+                QList *new = qobject_to(QList, ent->value);
+                const QListEntry *item;
+                QLIST_FOREACH_ENTRY(new, item) {
+                    qobject_ref(item->value);
+                    qlist_append_obj(old, item->value);
+                }
+                continue;
+            } else {
+                assert(qobject_type(ent->value) == QTYPE_QSTRING);
+            }
+        }
+
+        qobject_ref(ent->value);
+        qdict_put_obj(dest, ent->key, ent->value);
+    }
+}
+
+/* Merge the @merged dictionary into @dest.
+ *
+ * The dictionaries are expected to be returned by the keyval parser, and
+ * therefore the only expected scalar type is the string.  In case the same
+ * path is present in both @dest and @merged, the semantics are as follows:
+ *
+ * - lists are concatenated
+ *
+ * - dictionaries are merged recursively
+ *
+ * - for scalar values, @merged wins
+ *
+ * In case an error is reported, @dest may already have been modified.
+ *
+ * This function can be used to implement semantics analogous to QemuOpts's
+ * .merge_lists = true case, or to implement -set for options backed by QDicts.
+ *
+ * Note: while QemuOpts is commonly used so that repeated keys overwrite
+ * ("last one wins"), it can also be used so that repeated keys build up
+ * a list. keyval_merge() can only be used when the options' semantics are
+ * the former, not the latter.
+ */
+void keyval_merge(QDict *dest, const QDict *merged, Error **errp)
+{
+    GString *str;
+
+    str = g_string_new("");
+    keyval_do_merge(dest, merged, str, errp);
+    g_string_free(str, TRUE);
+}
+
 /*
  * Listify @cur recursively.
  * Replace QDicts whose keys are all valid list indexes by QLists.
@@ -430,13 +513,14 @@ static QObject *keyval_listify(QDict *cur, GSList *key_of_cur, Error **errp)
  * If @p_help is not NULL, store whether help is requested there.
  * If @p_help is NULL and help is requested, fail.
  *
- * On success, return a dictionary of the parsed keys and values.
- * On failure, store an error through @errp and return NULL.
+ * On success, return @dict, now filled with the parsed keys and values.
+ *
+ * On failure, store an error through @errp and return NULL.  Any keys
+ * and values parsed so far will be in @dict nevertheless.
  */
-QDict *keyval_parse(const char *params, const char *implied_key,
-                    bool *p_help, Error **errp)
+QDict *keyval_parse_into(QDict *qdict, const char *params, const char *implied_key,
+                         bool *p_help, Error **errp)
 {
-    QDict *qdict = qdict_new();
     QObject *listified;
     const char *s;
     bool help = false;
@@ -445,7 +529,6 @@ QDict *keyval_parse(const char *params, const char *implied_key,
     while (*s) {
         s = keyval_parse_one(qdict, s, implied_key, &help, errp);
         if (!s) {
-            qobject_unref(qdict);
             return NULL;
         }
         implied_key = NULL;
@@ -455,15 +538,42 @@ QDict *keyval_parse(const char *params, const char *implied_key,
         *p_help = help;
     } else if (help) {
         error_setg(errp, "Help is not available for this option");
-        qobject_unref(qdict);
         return NULL;
     }
 
     listified = keyval_listify(qdict, NULL, errp);
     if (!listified) {
-        qobject_unref(qdict);
         return NULL;
     }
     assert(listified == QOBJECT(qdict));
     return qdict;
 }
+
+/*
+ * Parse @params in QEMU's traditional KEY=VALUE,... syntax.
+ *
+ * If @implied_key, the first KEY= can be omitted.  @implied_key is
+ * implied then, and VALUE can't be empty or contain ',' or '='.
+ *
+ * A parameter "help" or "?" without a value isn't added to the
+ * resulting dictionary, but instead is interpreted as help request.
+ * All other options are parsed and returned normally so that context
+ * specific help can be printed.
+ *
+ * If @p_help is not NULL, store whether help is requested there.
+ * If @p_help is NULL and help is requested, fail.
+ *
+ * On success, return a dictionary of the parsed keys and values.
+ * On failure, store an error through @errp and return NULL.
+ */
+QDict *keyval_parse(const char *params, const char *implied_key,
+                    bool *p_help, Error **errp)
+{
+    QDict *qdict = qdict_new();
+    QDict *ret = keyval_parse_into(qdict, params, implied_key, p_help, errp);
+
+    if (!ret) {
+        qobject_unref(qdict);
+    }
+    return ret;
+}
index 2ee1500beedf9630f162454a38333e46b23953b9..d36c98da0b4ee251bd79f8ff5eda3250ec3bd967 100644 (file)
 #include "trace/control.h"
 #include "qemu/thread.h"
 #include "qemu/lockable.h"
+#include "qemu/rcu.h"
+#ifdef CONFIG_LINUX
+#include <sys/syscall.h>
+#endif
+
+
+typedef struct RCUCloseFILE {
+    struct rcu_head rcu;
+    FILE *fd;
+} RCUCloseFILE;
+
+/* Mutex covering the other global_* variables. */
+static QemuMutex global_mutex;
+static char *global_filename;
+static FILE *global_file;
+static __thread FILE *thread_file;
+static __thread Notifier qemu_log_thread_cleanup_notifier;
 
-static char *logfilename;
-static QemuMutex qemu_logfile_mutex;
-QemuLogFile *qemu_logfile;
 int qemu_loglevel;
-static int log_append = 0;
+static bool log_per_thread;
 static GArray *debug_regions;
 
-/* Return the number of characters emitted.  */
-int qemu_log(const char *fmt, ...)
+/* Returns true if qemu_log() will really write somewhere. */
+bool qemu_log_enabled(void)
 {
-    int ret = 0;
-    QemuLogFile *logfile;
+    return log_per_thread || qatomic_read(&global_file) != NULL;
+}
 
-    rcu_read_lock();
-    logfile = qatomic_rcu_read(&qemu_logfile);
+/* Returns true if qemu_log() will write somewhere other than stderr. */
+bool qemu_log_separate(void)
+{
+    if (log_per_thread) {
+        return true;
+    } else {
+        FILE *logfile = qatomic_read(&global_file);
+        return logfile && logfile != stderr;
+    }
+}
+
+static int log_thread_id(void)
+{
+#ifdef CONFIG_GETTID
+    return gettid();
+#elif defined(SYS_gettid)
+    return syscall(SYS_gettid);
+#else
+    static int counter;
+    return qatomic_fetch_inc(&counter);
+#endif
+}
+
+static void qemu_log_thread_cleanup(Notifier *n, void *unused)
+{
+    if (thread_file != stderr) {
+        fclose(thread_file);
+        thread_file = NULL;
+    }
+}
+
+/* Lock/unlock output. */
+
+static FILE *qemu_log_trylock_with_err(Error **errp)
+{
+    FILE *logfile;
+
+    logfile = thread_file;
+    if (!logfile) {
+        if (log_per_thread) {
+            g_autofree char *filename
+                = g_strdup_printf(global_filename, log_thread_id());
+            logfile = fopen(filename, "w");
+            if (!logfile) {
+                error_setg_errno(errp, errno,
+                                 "Error opening logfile %s for thread %d",
+                                 filename, log_thread_id());
+                return NULL;
+            }
+            thread_file = logfile;
+            qemu_log_thread_cleanup_notifier.notify = qemu_log_thread_cleanup;
+            qemu_thread_atexit_add(&qemu_log_thread_cleanup_notifier);
+        } else {
+            rcu_read_lock();
+            /*
+             * FIXME: typeof_strip_qual, as used by qatomic_rcu_read,
+             * does not work with pointers to undefined structures,
+             * such as we have with struct _IO_FILE and musl libc.
+             * Since all we want is a read of a pointer, cast to void**,
+             * which does work with typeof_strip_qual.
+             */
+            logfile = qatomic_rcu_read((void **)&global_file);
+            if (!logfile) {
+                rcu_read_unlock();
+                return NULL;
+            }
+        }
+    }
+
+    qemu_flockfile(logfile);
+    return logfile;
+}
+
+FILE *qemu_log_trylock(void)
+{
+    return qemu_log_trylock_with_err(NULL);
+}
+
+void qemu_log_unlock(FILE *logfile)
+{
     if (logfile) {
+        fflush(logfile);
+        qemu_funlockfile(logfile);
+        if (!log_per_thread) {
+            rcu_read_unlock();
+        }
+    }
+}
+
+void qemu_log(const char *fmt, ...)
+{
+    FILE *f = qemu_log_trylock();
+    if (f) {
         va_list ap;
+
         va_start(ap, fmt);
-        ret = vfprintf(logfile->fd, fmt, ap);
+        vfprintf(f, fmt, ap);
         va_end(ap);
-
-        /* Don't pass back error results.  */
-        if (ret < 0) {
-            ret = 0;
-        }
+        qemu_log_unlock(f);
     }
-    rcu_read_unlock();
-    return ret;
 }
 
-static void __attribute__((__constructor__)) qemu_logfile_init(void)
+static void __attribute__((__constructor__)) startup(void)
+{
+    qemu_mutex_init(&global_mutex);
+}
+
+static void rcu_close_file(RCUCloseFILE *r)
 {
-    qemu_mutex_init(&qemu_logfile_mutex);
+    fclose(r->fd);
+    g_free(r);
 }
 
-static void qemu_logfile_free(QemuLogFile *logfile)
+/**
+ * valid_filename_template:
+ *
+ * Validate the filename template.  Require %d if per_thread, allow it
+ * otherwise; require no other % within the template.
+ */
+
+typedef enum {
+    vft_error,
+    vft_stderr,
+    vft_strdup,
+    vft_pid_printf,
+} ValidFilenameTemplateResult;
+
+static ValidFilenameTemplateResult
+valid_filename_template(const char *filename, bool per_thread, Error **errp)
 {
-    g_assert(logfile);
+    if (filename) {
+        char *pidstr = strstr(filename, "%");
 
-    if (logfile->fd != stderr) {
-        fclose(logfile->fd);
+        if (pidstr) {
+            /* We only accept one %d, no other format strings */
+            if (pidstr[1] != 'd' || strchr(pidstr + 2, '%')) {
+                error_setg(errp, "Bad logfile template: %s", filename);
+                return 0;
+            }
+            return per_thread ? vft_strdup : vft_pid_printf;
+        }
     }
-    g_free(logfile);
+    if (per_thread) {
+        error_setg(errp, "Filename template with '%%d' required for 'tid'");
+        return vft_error;
+    }
+    return filename ? vft_strdup : vft_stderr;
 }
 
-static bool log_uses_own_buffers;
-
 /* enable or disable low levels log */
-void qemu_set_log(int log_flags)
+static bool qemu_set_log_internal(const char *filename, bool changed_name,
+                                  int log_flags, Error **errp)
 {
-    bool need_to_open_file = false;
-    QemuLogFile *logfile;
+    bool need_to_open_file;
+    bool daemonized;
+    bool per_thread;
+    FILE *logfile;
 
-    qemu_loglevel = log_flags;
+    QEMU_LOCK_GUARD(&global_mutex);
+    logfile = global_file;
+
+    /* The per-thread flag is immutable. */
+    if (log_per_thread) {
+        log_flags |= LOG_PER_THREAD;
+    } else {
+        if (global_filename) {
+            log_flags &= ~LOG_PER_THREAD;
+        }
+    }
+
+    per_thread = log_flags & LOG_PER_THREAD;
+
+    if (changed_name) {
+        char *newname = NULL;
+
+        /*
+         * Once threads start opening their own log files, we have no
+         * easy mechanism to tell them all to close and re-open.
+         * There seems little cause to do so either -- this option
+         * will most often be used at user-only startup.
+         */
+        if (log_per_thread) {
+            error_setg(errp, "Cannot change log filename after setting 'tid'");
+            return false;
+        }
+
+        switch (valid_filename_template(filename, per_thread, errp)) {
+        case vft_error:
+            return false;
+        case vft_stderr:
+            break;
+        case vft_strdup:
+            newname = g_strdup(filename);
+            break;
+        case vft_pid_printf:
+            newname = g_strdup_printf(filename, getpid());
+            break;
+        }
+
+        g_free(global_filename);
+        global_filename = newname;
+        filename = newname;
+    } else {
+        filename = global_filename;
+        if (per_thread &&
+            valid_filename_template(filename, true, errp) == vft_error) {
+            return false;
+        }
+    }
+
+    /* Once the per-thread flag is set, it cannot be unset. */
+    if (per_thread) {
+        log_per_thread = true;
+    }
+    /* The flag itself is not relevant for need_to_open_file. */
+    log_flags &= ~LOG_PER_THREAD;
 #ifdef CONFIG_TRACE_LOG
-    qemu_loglevel |= LOG_TRACE;
+    log_flags |= LOG_TRACE;
 #endif
-    /*
-     * In all cases we only log if qemu_loglevel is set.
-     * Also:
-     *   If not daemonized we will always log either to stderr
-     *     or to a file (if there is a logfilename).
-     *   If we are daemonized,
-     *     we will only log if there is a logfilename.
-     */
-    if (qemu_loglevel && (!is_daemonized() || logfilename)) {
-        need_to_open_file = true;
+    qemu_loglevel = log_flags;
+
+    daemonized = is_daemonized();
+    need_to_open_file = false;
+    if (!daemonized) {
+        /*
+         * If not daemonized we only log if qemu_loglevel is set, either to
+         * stderr or to a file (if there is a filename).
+         * If per-thread, open the file for each thread in qemu_log_trylock().
+         */
+        need_to_open_file = qemu_loglevel && !log_per_thread;
+    } else {
+        /*
+         * If we are daemonized, we will only log if there is a filename.
+         */
+        need_to_open_file = filename != NULL;
+    }
+
+    if (logfile) {
+        fflush(logfile);
+        if (changed_name && logfile != stderr) {
+            RCUCloseFILE *r = g_new0(RCUCloseFILE, 1);
+            r->fd = logfile;
+            qatomic_rcu_set(&global_file, NULL);
+            call_rcu(r, rcu_close_file, rcu);
+        }
+        if (changed_name) {
+            logfile = NULL;
+        }
     }
-    QEMU_LOCK_GUARD(&qemu_logfile_mutex);
-    if (qemu_logfile && !need_to_open_file) {
-        logfile = qemu_logfile;
-        qatomic_rcu_set(&qemu_logfile, NULL);
-        call_rcu(logfile, qemu_logfile_free, rcu);
-    } else if (!qemu_logfile && need_to_open_file) {
-        logfile = g_new0(QemuLogFile, 1);
-        if (logfilename) {
-            logfile->fd = fopen(logfilename, log_append ? "a" : "w");
-            if (!logfile->fd) {
-                g_free(logfile);
-                perror(logfilename);
-                _exit(1);
+
+    if (log_per_thread && daemonized) {
+        logfile = thread_file;
+    }
+
+    if (!logfile && need_to_open_file) {
+        if (filename) {
+            if (log_per_thread) {
+                logfile = qemu_log_trylock_with_err(errp);
+                if (!logfile) {
+                    return false;
+                }
+                qemu_log_unlock(logfile);
+            } else {
+                logfile = fopen(filename, "w");
+                if (!logfile) {
+                    error_setg_errno(errp, errno, "Error opening logfile %s",
+                                     filename);
+                    return false;
+                }
             }
             /* In case we are a daemon redirect stderr to logfile */
-            if (is_daemonized()) {
-                dup2(fileno(logfile->fd), STDERR_FILENO);
-                fclose(logfile->fd);
-                /* This will skip closing logfile in qemu_log_close() */
-                logfile->fd = stderr;
+            if (daemonized) {
+                dup2(fileno(logfile), STDERR_FILENO);
+                fclose(logfile);
+                /*
+                 * This will skip closing logfile in rcu_close_file()
+                 * or qemu_log_thread_cleanup().
+                 */
+                logfile = stderr;
             }
         } else {
             /* Default to stderr if no log file specified */
-            assert(!is_daemonized());
-            logfile->fd = stderr;
+            assert(!daemonized);
+            logfile = stderr;
         }
-        /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
-        if (log_uses_own_buffers) {
-            static char logfile_buf[4096];
 
-            setvbuf(logfile->fd, logfile_buf, _IOLBF, sizeof(logfile_buf));
+        if (log_per_thread && daemonized) {
+            thread_file = logfile;
         } else {
-#if defined(_WIN32)
-            /* Win32 doesn't support line-buffering, so use unbuffered output. */
-            setvbuf(logfile->fd, NULL, _IONBF, 0);
-#else
-            setvbuf(logfile->fd, NULL, _IOLBF, 0);
-#endif
-            log_append = 1;
+            qatomic_rcu_set(&global_file, logfile);
         }
-        qatomic_rcu_set(&qemu_logfile, logfile);
     }
+    return true;
 }
 
-void qemu_log_needs_buffers(void)
+bool qemu_set_log(int log_flags, Error **errp)
 {
-    log_uses_own_buffers = true;
+    return qemu_set_log_internal(NULL, false, log_flags, errp);
 }
 
-/*
- * Allow the user to include %d in their logfile which will be
- * substituted with the current PID. This is useful for debugging many
- * nested linux-user tasks but will result in lots of logs.
- *
- * filename may be NULL. In that case, log output is sent to stderr
- */
-void qemu_set_log_filename(const char *filename, Error **errp)
+bool qemu_set_log_filename(const char *filename, Error **errp)
 {
-    g_free(logfilename);
-    logfilename = NULL;
-
-    if (filename) {
-            char *pidstr = strstr(filename, "%");
-            if (pidstr) {
-                /* We only accept one %d, no other format strings */
-                if (pidstr[1] != 'd' || strchr(pidstr + 2, '%')) {
-                    error_setg(errp, "Bad logfile format: %s", filename);
-                    return;
-                } else {
-                    logfilename = g_strdup_printf(filename, getpid());
-                }
-            } else {
-                logfilename = g_strdup(filename);
-            }
-    }
+    return qemu_set_log_internal(filename, true, qemu_loglevel, errp);
+}
 
-    qemu_log_close();
-    qemu_set_log(qemu_loglevel);
+bool qemu_set_log_filename_flags(const char *name, int flags, Error **errp)
+{
+    return qemu_set_log_internal(name, true, flags, errp);
 }
 
 /* Returns true if addr is in our debug filter or no filter defined
@@ -266,34 +455,6 @@ out:
     g_strfreev(ranges);
 }
 
-/* fflush() the log file */
-void qemu_log_flush(void)
-{
-    QemuLogFile *logfile;
-
-    rcu_read_lock();
-    logfile = qatomic_rcu_read(&qemu_logfile);
-    if (logfile) {
-        fflush(logfile->fd);
-    }
-    rcu_read_unlock();
-}
-
-/* Close the log file */
-void qemu_log_close(void)
-{
-    QemuLogFile *logfile;
-
-    qemu_mutex_lock(&qemu_logfile_mutex);
-    logfile = qemu_logfile;
-
-    if (logfile) {
-        qatomic_rcu_set(&qemu_logfile, NULL);
-        call_rcu(logfile, qemu_logfile_free, rcu);
-    }
-    qemu_mutex_unlock(&qemu_logfile_mutex);
-}
-
 const QEMULogItem qemu_log_items[] = {
     { CPU_LOG_TB_OUT_ASM, "out_asm",
       "show generated host assembly code for each compiled TB" },
@@ -330,10 +491,14 @@ const QEMULogItem qemu_log_items[] = {
       "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n"
       "complete traces" },
 #ifdef CONFIG_PLUGIN
-    { CPU_LOG_PLUGIN, "plugin", "output from TCG plugins\n"},
+    { CPU_LOG_PLUGIN, "plugin", "output from TCG plugins"},
 #endif
     { LOG_STRACE, "strace",
       "log every user-mode syscall, its input, and its result" },
+    { LOG_PER_THREAD, "tid",
+      "open a separate log file per thread; filename must contain '%d'" },
+    { CPU_LOG_TB_VPU, "vpu",
+      "include VPU registers in the 'cpu' logging" },
     { 0, NULL, NULL },
 };
 
index 6470f8eae31087ed7771f8db91afdefc1c262c86..797b640c4152d6ff22cb1b6a37a38d8a1c639bdf 100644 (file)
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 #include "qemu/timer.h"
-#include "sysemu/qtest.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/replay.h"
 #include "qemu/main-loop.h"
 #include "block/aio.h"
+#include "block/thread-pool.h"
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
+#include "qom/object.h"
 
 #ifndef _WIN32
 #include <sys/wait.h>
  * use signalfd to listen for them.  We rely on whatever the current signal
  * handler is to dispatch the signals when we receive them.
  */
+/*
+ * Disable CFI checks.
+ * We are going to call a signal handler directly. Such handler may or may not
+ * have been defined in our binary, so there's no guarantee that the pointer
+ * used to set the handler is a cfi-valid pointer. Since the handlers are
+ * stored in kernel memory, changing the handler to an attacker-defined
+ * function requires being able to call a sigaction() syscall,
+ * which is not as easy as overwriting a pointer in memory.
+ */
+QEMU_DISABLE_CFI
 static void sigfd_handler(void *opaque)
 {
     int fd = (intptr_t)opaque;
@@ -52,9 +63,7 @@ static void sigfd_handler(void *opaque)
     ssize_t len;
 
     while (1) {
-        do {
-            len = read(fd, &info, sizeof(info));
-        } while (len == -1 && errno == EINTR);
+        len = RETRY_ON_EINTR(read(fd, &info, sizeof(info)));
 
         if (len == -1 && errno == EAGAIN) {
             break;
@@ -104,7 +113,7 @@ static int qemu_signal_init(Error **errp)
         return -errno;
     }
 
-    fcntl_setfl(sigfd, O_NONBLOCK);
+    g_unix_set_fd_nonblocking(sigfd, true, NULL);
 
     qemu_set_fd_handler(sigfd, sigfd_handler, NULL, (void *)(intptr_t)sigfd);
 
@@ -160,6 +169,7 @@ int qemu_init_main_loop(Error **errp)
     if (!qemu_aio_context) {
         return -EMFILE;
     }
+    qemu_set_current_aio_context(qemu_aio_context);
     qemu_notify_bh = qemu_bh_new(notify_event_cb, NULL);
     gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
     src = aio_get_g_source(qemu_aio_context);
@@ -173,16 +183,75 @@ int qemu_init_main_loop(Error **errp)
     return 0;
 }
 
+static void main_loop_update_params(EventLoopBase *base, Error **errp)
+{
+    ERRP_GUARD();
+
+    if (!qemu_aio_context) {
+        error_setg(errp, "qemu aio context not ready");
+        return;
+    }
+
+    aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp);
+    if (*errp) {
+        return;
+    }
+
+    aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min,
+                                       base->thread_pool_max, errp);
+}
+
+MainLoop *mloop;
+
+static void main_loop_init(EventLoopBase *base, Error **errp)
+{
+    MainLoop *m = MAIN_LOOP(base);
+
+    if (mloop) {
+        error_setg(errp, "only one main-loop instance allowed");
+        return;
+    }
+
+    main_loop_update_params(base, errp);
+
+    mloop = m;
+    return;
+}
+
+static bool main_loop_can_be_deleted(EventLoopBase *base)
+{
+    return false;
+}
+
+static void main_loop_class_init(ObjectClass *oc, void *class_data)
+{
+    EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc);
+
+    bc->init = main_loop_init;
+    bc->update_params = main_loop_update_params;
+    bc->can_be_deleted = main_loop_can_be_deleted;
+}
+
+static const TypeInfo main_loop_info = {
+    .name = TYPE_MAIN_LOOP,
+    .parent = TYPE_EVENT_LOOP_BASE,
+    .class_init = main_loop_class_init,
+    .instance_size = sizeof(MainLoop),
+};
+
+static void main_loop_register_types(void)
+{
+    type_register_static(&main_loop_info);
+}
+
+type_init(main_loop_register_types)
+
 static int max_priority;
 
 #ifndef _WIN32
 static int glib_pollfds_idx;
 static int glib_n_poll_fds;
 
-void qemu_fd_register(int fd)
-{
-}
-
 static void glib_pollfds_fill(int64_t *cur_timeout)
 {
     GMainContext *context = g_main_context_default();
@@ -262,7 +331,7 @@ static PollingEntry *first_polling_entry;
 int qemu_add_polling_cb(PollingFunc *func, void *opaque)
 {
     PollingEntry **ppe, *pe;
-    pe = g_malloc0(sizeof(PollingEntry));
+    pe = g_new0(PollingEntry, 1);
     pe->func = func;
     pe->opaque = opaque;
     for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
@@ -287,20 +356,30 @@ void qemu_del_polling_cb(PollingFunc *func, void *opaque)
 /* Wait objects support */
 typedef struct WaitObjects {
     int num;
-    int revents[MAXIMUM_WAIT_OBJECTS + 1];
-    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
-    WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
-    void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
+    int revents[MAXIMUM_WAIT_OBJECTS];
+    HANDLE events[MAXIMUM_WAIT_OBJECTS];
+    WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS];
+    void *opaque[MAXIMUM_WAIT_OBJECTS];
 } WaitObjects;
 
 static WaitObjects wait_objects = {0};
 
 int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
 {
+    int i;
     WaitObjects *w = &wait_objects;
+
     if (w->num >= MAXIMUM_WAIT_OBJECTS) {
         return -1;
     }
+
+    for (i = 0; i < w->num; i++) {
+        /* check if the same handle is added twice */
+        if (w->events[i] == handle) {
+            return -1;
+        }
+    }
+
     w->events[w->num] = handle;
     w->func[w->num] = func;
     w->opaque[w->num] = opaque;
@@ -319,7 +398,7 @@ void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
         if (w->events[i] == handle) {
             found = 1;
         }
-        if (found) {
+        if (found && i < (MAXIMUM_WAIT_OBJECTS - 1)) {
             w->events[i] = w->events[i + 1];
             w->func[i] = w->func[i + 1];
             w->opaque[i] = w->opaque[i + 1];
@@ -331,13 +410,6 @@ void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
     }
 }
 
-void qemu_fd_register(int fd)
-{
-    WSAEventSelect(fd, event_notifier_get_handle(&qemu_aio_context->notifier),
-                   FD_READ | FD_ACCEPT | FD_CLOSE |
-                   FD_CONNECT | FD_WRITE | FD_OOB);
-}
-
 static int pollfds_fill(GArray *pollfds, fd_set *rfds, fd_set *wfds,
                         fd_set *xfds)
 {
@@ -533,9 +605,11 @@ void main_loop_wait(int nonblocking)
 
 /* Functions to operate on the main QEMU AioContext.  */
 
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
+                         MemReentrancyGuard *reentrancy_guard)
 {
-    return aio_bh_new(qemu_aio_context, cb, opaque);
+    return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
+                           reentrancy_guard);
 }
 
 /*
@@ -570,75 +644,13 @@ void qemu_set_fd_handler(int fd,
                          void *opaque)
 {
     iohandler_init();
-    aio_set_fd_handler(iohandler_ctx, fd, false,
-                       fd_read, fd_write, NULL, opaque);
+    aio_set_fd_handler(iohandler_ctx, fd, fd_read, fd_write, NULL, NULL,
+                       opaque);
 }
 
 void event_notifier_set_handler(EventNotifier *e,
                                 EventNotifierHandler *handler)
 {
     iohandler_init();
-    aio_set_event_notifier(iohandler_ctx, e, false,
-                           handler, NULL);
+    aio_set_event_notifier(iohandler_ctx, e, handler, NULL, NULL);
 }
-
-/* reaping of zombies.  right now we're not passing the status to
-   anyone, but it would be possible to add a callback.  */
-#ifndef _WIN32
-typedef struct ChildProcessRecord {
-    int pid;
-    QLIST_ENTRY(ChildProcessRecord) next;
-} ChildProcessRecord;
-
-static QLIST_HEAD(, ChildProcessRecord) child_watches =
-    QLIST_HEAD_INITIALIZER(child_watches);
-
-static QEMUBH *sigchld_bh;
-
-static void sigchld_handler(int signal)
-{
-    qemu_bh_schedule(sigchld_bh);
-}
-
-static void sigchld_bh_handler(void *opaque)
-{
-    ChildProcessRecord *rec, *next;
-
-    QLIST_FOREACH_SAFE(rec, &child_watches, next, next) {
-        if (waitpid(rec->pid, NULL, WNOHANG) == rec->pid) {
-            QLIST_REMOVE(rec, next);
-            g_free(rec);
-        }
-    }
-}
-
-static void qemu_init_child_watch(void)
-{
-    struct sigaction act;
-    sigchld_bh = qemu_bh_new(sigchld_bh_handler, NULL);
-
-    memset(&act, 0, sizeof(act));
-    act.sa_handler = sigchld_handler;
-    act.sa_flags = SA_NOCLDSTOP;
-    sigaction(SIGCHLD, &act, NULL);
-}
-
-int qemu_add_child_watch(pid_t pid)
-{
-    ChildProcessRecord *rec;
-
-    if (!sigchld_bh) {
-        qemu_init_child_watch();
-    }
-
-    QLIST_FOREACH(rec, &child_watches, next) {
-        if (rec->pid == pid) {
-            return 1;
-        }
-    }
-    rec = g_malloc0(sizeof(ChildProcessRecord));
-    rec->pid = pid;
-    QLIST_INSERT_HEAD(&child_watches, rec, next);
-    return 0;
-}
-#endif
diff --git a/util/memalign.c b/util/memalign.c
new file mode 100644 (file)
index 0000000..c199ae7
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * memalign.c: Allocate an aligned memory region
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010-2016 Red Hat, Inc.
+ * Copyright (c) 2022 Linaro Ltd
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "qemu/memalign.h"
+#include "trace.h"
+
+void *qemu_try_memalign(size_t alignment, size_t size)
+{
+    void *ptr;
+
+    if (alignment < sizeof(void*)) {
+        alignment = sizeof(void*);
+    } else {
+        g_assert(is_power_of_2(alignment));
+    }
+
+    /*
+     * Handling of 0 allocations varies among the different
+     * platform APIs (for instance _aligned_malloc() will
+     * fail) -- ensure that we always return a valid non-NULL
+     * pointer that can be freed by qemu_vfree().
+     */
+    if (size == 0) {
+        size++;
+    }
+#if defined(CONFIG_POSIX_MEMALIGN)
+    int ret;
+    ret = posix_memalign(&ptr, alignment, size);
+    if (ret != 0) {
+        errno = ret;
+        ptr = NULL;
+    }
+#elif defined(CONFIG_ALIGNED_MALLOC)
+    ptr = _aligned_malloc(size, alignment);
+#elif defined(CONFIG_VALLOC)
+    ptr = valloc(size);
+#elif defined(CONFIG_MEMALIGN)
+    ptr = memalign(alignment, size);
+#else
+    #error No function to allocate aligned memory available
+#endif
+    trace_qemu_memalign(alignment, size, ptr);
+    return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+    void *p = qemu_try_memalign(alignment, size);
+    if (p) {
+        return p;
+    }
+    fprintf(stderr,
+            "qemu_memalign: failed to allocate %zu bytes at alignment %zu: %s\n",
+            size, alignment, strerror(errno));
+    abort();
+}
+
+void qemu_vfree(void *ptr)
+{
+    trace_qemu_vfree(ptr);
+#if !defined(CONFIG_POSIX_MEMALIGN) && defined(CONFIG_ALIGNED_MALLOC)
+    /* Only Windows _aligned_malloc needs a special free function */
+    _aligned_free(ptr);
+#else
+    free(ptr);
+#endif
+}
index a65472f30cea05746d12bd72ac33b84bf7a0cc90..5c784fdd347ece3f0fb9ea1a776716ce8003b284 100644 (file)
@@ -1,15 +1,22 @@
-util_ss.add(dependency('threads'))
 util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
-util_ss.add(when: 'CONFIG_ATOMIC64', if_false: files('atomic64.c'))
+util_ss.add(files('thread-context.c'), numa)
+if not config_host_data.get('CONFIG_ATOMIC64')
+  util_ss.add(files('atomic64.c'))
+endif
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
-util_ss.add(when: 'CONFIG_EPOLL_CREATE1', if_true: files('fdmon-epoll.c'))
-util_ss.add(when: ['CONFIG_LINUX_IO_URING', linux_io_uring], if_true: files('fdmon-io_uring.c'))
+if config_host_data.get('CONFIG_EPOLL_CREATE1')
+  util_ss.add(files('fdmon-epoll.c'))
+endif
+util_ss.add(when: linux_io_uring, if_true: files('fdmon-io_uring.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('compatfd.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('event_notifier-posix.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('mmap-alloc.c'))
-util_ss.add(when: 'CONFIG_POSIX', if_true: files('oslib-posix.c'))
-util_ss.add(when: 'CONFIG_POSIX', if_true: [files('qemu-openpty.c'), util])
+freebsd_dep = []
+if targetos == 'freebsd'
+  freebsd_dep = util
+endif
+util_ss.add(when: 'CONFIG_POSIX', if_true: [files('oslib-posix.c'), freebsd_dep])
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('qemu-thread-posix.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('memfd.c'))
 util_ss.add(when: 'CONFIG_WIN32', if_true: files('aio-win32.c'))
@@ -17,13 +24,17 @@ util_ss.add(when: 'CONFIG_WIN32', if_true: files('event_notifier-win32.c'))
 util_ss.add(when: 'CONFIG_WIN32', if_true: files('oslib-win32.c'))
 util_ss.add(when: 'CONFIG_WIN32', if_true: files('qemu-thread-win32.c'))
 util_ss.add(when: 'CONFIG_WIN32', if_true: winmm)
-util_ss.add(when: 'HAVE_GLIB_WITH_SLICE_ALLOCATOR', if_true: files('qtree.c'))
+util_ss.add(when: 'CONFIG_WIN32', if_true: pathcch)
+if glib_has_gslice
+  util_ss.add(files('qtree.c'))
+endif
+util_ss.add(files('defer-call.c'))
 util_ss.add(files('envlist.c', 'path.c', 'module.c'))
 util_ss.add(files('host-utils.c'))
 util_ss.add(files('bitmap.c', 'bitops.c'))
 util_ss.add(files('fifo8.c'))
-util_ss.add(files('cacheinfo.c'))
-util_ss.add(files('error.c', 'qemu-error.c'))
+util_ss.add(files('cacheflush.c'))
+util_ss.add(files('error.c', 'error-report.c'))
 util_ss.add(files('qemu-print.c'))
 util_ss.add(files('id.c'))
 util_ss.add(files('qemu-config.c', 'notify.c'))
@@ -33,51 +44,78 @@ util_ss.add(files('crc32c.c'))
 util_ss.add(files('uuid.c'))
 util_ss.add(files('getauxval.c'))
 util_ss.add(files('rcu.c'))
-util_ss.add(when: 'CONFIG_MEMBARRIER', if_true: files('sys_membarrier.c'))
+if have_membarrier
+  util_ss.add(files('sys_membarrier.c'))
+endif
 util_ss.add(files('log.c'))
-util_ss.add(files('pagesize.c'))
 util_ss.add(files('qdist.c'))
 util_ss.add(files('qht.c'))
 util_ss.add(files('qsp.c'))
 util_ss.add(files('range.c'))
+util_ss.add(files('reserved-region.c'))
 util_ss.add(files('stats64.c'))
 util_ss.add(files('systemd.c'))
+util_ss.add(files('transactions.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c'))
 util_ss.add(files('guest-random.c'))
+util_ss.add(files('yank.c'))
+util_ss.add(files('int128.c'))
+util_ss.add(files('memalign.c'))
+util_ss.add(files('interval-tree.c'))
+util_ss.add(files('lockcnt.c'))
 
 if have_user
   util_ss.add(files('selfmap.c'))
 endif
 
 if have_system
-  util_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus.c'), gio])
+  util_ss.add(files('crc-ccitt.c'))
+  util_ss.add(when: gio, if_true: files('dbus.c'))
+  util_ss.add(when: 'CONFIG_LINUX', if_true: files('userfaultfd.c'))
 endif
 
-if have_block
-  util_ss.add(files('aiocb.c', 'async.c', 'aio-wait.c'))
+if have_block or have_ga
+  util_ss.add(files('aiocb.c', 'async.c'))
   util_ss.add(files('base64.c'))
+  util_ss.add(files('main-loop.c'))
+  util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
+  util_ss.add(files(f'coroutine-@coroutine_backend@.c'))
+  util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
+  util_ss.add(files('qemu-sockets.c'))
+endif
+if have_block
+  util_ss.add(files('aio-wait.c'))
   util_ss.add(files('buffer.c'))
   util_ss.add(files('bufferiszero.c'))
-  util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND'])))
   util_ss.add(files('hbitmap.c'))
   util_ss.add(files('hexdump.c'))
   util_ss.add(files('iova-tree.c'))
-  util_ss.add(files('iov.c', 'qemu-sockets.c', 'uri.c'))
-  util_ss.add(files('lockcnt.c'))
-  util_ss.add(files('main-loop.c'))
+  util_ss.add(files('iov.c', 'uri.c'))
   util_ss.add(files('nvdimm-utils.c'))
-  util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
-#  util_ss.add(when: 'CONFIG_LINUX', if_true: [
-#    files('vhost-user-server.c'), vhost_user
-#  ])
+  #util_ss.add(when: 'CONFIG_LINUX', if_true: [
+  #  files('vhost-user-server.c'), vhost_user
+  #])
   util_ss.add(files('block-helpers.c'))
   util_ss.add(files('qemu-coroutine-sleep.c'))
   util_ss.add(files('qemu-co-shared-resource.c'))
-  util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
+  util_ss.add(files('qemu-co-timeout.c'))
   util_ss.add(files('readline.c'))
   util_ss.add(files('throttle.c'))
   util_ss.add(files('timed-average.c'))
-  util_ss.add(when: 'CONFIG_INOTIFY1', if_true: files('filemonitor-inotify.c'),
-                                        if_false: files('filemonitor-stub.c'))
+  if config_host_data.get('CONFIG_INOTIFY1')
+    util_ss.add(files('filemonitor-inotify.c'))
+  else
+    util_ss.add(files('filemonitor-stub.c'))
+  endif
   util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
 endif
+
+if cpu == 'aarch64'
+  util_ss.add(files('cpuinfo-aarch64.c'))
+elif cpu in ['x86', 'x86_64']
+  util_ss.add(files('cpuinfo-i386.c'))
+elif cpu == 'loongarch64'
+  util_ss.add(files('cpuinfo-loongarch.c'))
+elif cpu in ['ppc', 'ppc64']
+  util_ss.add(files('cpuinfo-ppc.c'))
+endif
index 27dcccd8ec8e5a33121b3c69bf4458dde21b0eca..ed14f9c64de510e1add0e1b1e29844e43a36f012 100644 (file)
 #include "qemu/osdep.h"
 #include "qemu/mmap-alloc.h"
 #include "qemu/host-utils.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
 
 #define HUGETLBFS_MAGIC       0x958458f6
 
 #ifdef CONFIG_LINUX
 #include <sys/vfs.h>
+#include <linux/magic.h>
 #endif
 
-size_t qemu_fd_getpagesize(int fd)
+QemuFsType qemu_fd_getfs(int fd)
 {
 #ifdef CONFIG_LINUX
     struct statfs fs;
     int ret;
 
-    if (fd != -1) {
-        do {
-            ret = fstatfs(fd, &fs);
-        } while (ret != 0 && errno == EINTR);
+    if (fd < 0) {
+        return QEMU_FS_TYPE_UNKNOWN;
+    }
 
-        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
-            return fs.f_bsize;
-        }
+    do {
+        ret = fstatfs(fd, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    switch (fs.f_type) {
+    case TMPFS_MAGIC:
+        return QEMU_FS_TYPE_TMPFS;
+    case HUGETLBFS_MAGIC:
+        return QEMU_FS_TYPE_HUGETLBFS;
+    default:
+        return QEMU_FS_TYPE_UNKNOWN;
     }
-#ifdef __sparc__
-    /* SPARC Linux needs greater alignment than the pagesize */
-    return QEMU_VMALLOC_ALIGN;
-#endif
+#else
+    return QEMU_FS_TYPE_UNKNOWN;
 #endif
-
-    return qemu_real_host_page_size;
 }
 
-size_t qemu_mempath_getpagesize(const char *mem_path)
+size_t qemu_fd_getpagesize(int fd)
 {
 #ifdef CONFIG_LINUX
     struct statfs fs;
     int ret;
 
-    if (mem_path) {
+    if (fd != -1) {
         do {
-            ret = statfs(mem_path, &fs);
+            ret = fstatfs(fd, &fs);
         } while (ret != 0 && errno == EINTR);
 
-        if (ret != 0) {
-            fprintf(stderr, "Couldn't statfs() memory path: %s\n",
-                    strerror(errno));
-            exit(1);
-        }
-
-        if (fs.f_type == HUGETLBFS_MAGIC) {
-            /* It's hugepage, return the huge page size */
+        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
             return fs.f_bsize;
         }
     }
@@ -79,32 +78,84 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
 #endif
 #endif
 
-    return qemu_real_host_page_size;
+    return qemu_real_host_page_size();
 }
 
-void *qemu_ram_mmap(int fd,
-                    size_t size,
-                    size_t align,
-                    bool shared,
-                    bool is_pmem)
+#define OVERCOMMIT_MEMORY_PATH "/proc/sys/vm/overcommit_memory"
+static bool map_noreserve_effective(int fd, uint32_t qemu_map_flags)
 {
-    int flags;
-    int map_sync_flags = 0;
-    int guardfd;
-    size_t offset;
-    size_t pagesize;
-    size_t total;
-    void *guardptr;
-    void *ptr;
+#if defined(__linux__)
+    const bool readonly = qemu_map_flags & QEMU_MAP_READONLY;
+    const bool shared = qemu_map_flags & QEMU_MAP_SHARED;
+    gchar *content = NULL;
+    const char *endptr;
+    unsigned int tmp;
 
     /*
-     * Note: this always allocates at least one extra page of virtual address
-     * space, even if size is already aligned.
+     * hugeltb accounting is different than ordinary swap reservation:
+     * a) Hugetlb pages from the pool are reserved for both private and
+     *    shared mappings. For shared mappings, all mappers have to specify
+     *    MAP_NORESERVE.
+     * b) MAP_NORESERVE is not affected by /proc/sys/vm/overcommit_memory.
      */
-    total = size + align;
+    if (qemu_fd_getpagesize(fd) != qemu_real_host_page_size()) {
+        return true;
+    }
+
+    /*
+     * Accountable mappings in the kernel that can be affected by MAP_NORESEVE
+     * are private writable mappings (see mm/mmap.c:accountable_mapping() in
+     * Linux). For all shared or readonly mappings, MAP_NORESERVE is always
+     * implicitly active -- no reservation; this includes shmem. The only
+     * exception is shared anonymous memory, it is accounted like private
+     * anonymous memory.
+     */
+    if (readonly || (shared && fd >= 0)) {
+        return true;
+    }
+
+    /*
+     * MAP_NORESERVE is globally ignored for applicable !hugetlb mappings when
+     * memory overcommit is set to "never". Sparse memory regions aren't really
+     * possible in this system configuration.
+     *
+     * Bail out now instead of silently committing way more memory than
+     * currently desired by the user.
+     */
+    if (g_file_get_contents(OVERCOMMIT_MEMORY_PATH, &content, NULL, NULL) &&
+        !qemu_strtoui(content, &endptr, 0, &tmp) &&
+        (!endptr || *endptr == '\n')) {
+        if (tmp == 2) {
+            error_report("Skipping reservation of swap space is not supported:"
+                         " \"" OVERCOMMIT_MEMORY_PATH "\" is \"2\"");
+            return false;
+        }
+        return true;
+    }
+    /* this interface has been around since Linux 2.6 */
+    error_report("Skipping reservation of swap space is not supported:"
+                 " Could not read: \"" OVERCOMMIT_MEMORY_PATH "\"");
+    return false;
+#endif
+    /*
+     * E.g., FreeBSD used to define MAP_NORESERVE, never implemented it,
+     * and removed it a while ago.
+     */
+    error_report("Skipping reservation of swap space is not supported");
+    return false;
+}
+
+/*
+ * Reserve a new memory region of the requested size to be used for mapping
+ * from the given fd (if any).
+ */
+static void *mmap_reserve(size_t size, int fd)
+{
+    int flags = MAP_PRIVATE;
 
 #if defined(__powerpc64__) && defined(__linux__)
-    /* On ppc64 mappings in the same segment (aka slice) must share the same
+    /*
+     * On ppc64 mappings in the same segment (aka slice) must share the same
      * page size. Since we will be re-allocating part of this segment
      * from the supplied fd, we should make sure to use the same page size, to
      * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
@@ -112,50 +163,55 @@ void *qemu_ram_mmap(int fd,
      * We do this unless we are using the system page size, in which case
      * anonymous memory is OK.
      */
-    flags = MAP_PRIVATE;
-    pagesize = qemu_fd_getpagesize(fd);
-    if (fd == -1 || pagesize == qemu_real_host_page_size) {
-        guardfd = -1;
+    if (fd == -1 || qemu_fd_getpagesize(fd) == qemu_real_host_page_size()) {
+        fd = -1;
         flags |= MAP_ANONYMOUS;
     } else {
-        guardfd = fd;
         flags |= MAP_NORESERVE;
     }
 #else
-    guardfd = -1;
-    pagesize = qemu_real_host_page_size;
-    flags = MAP_PRIVATE | MAP_ANONYMOUS;
+    fd = -1;
+    flags |= MAP_ANONYMOUS;
 #endif
 
-    guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0);
+    return mmap(0, size, PROT_NONE, flags, fd, 0);
+}
 
-    if (guardptr == MAP_FAILED) {
+/*
+ * Activate memory in a reserved region from the given fd (if any), to make
+ * it accessible.
+ */
+static void *mmap_activate(void *ptr, size_t size, int fd,
+                           uint32_t qemu_map_flags, off_t map_offset)
+{
+    const bool noreserve = qemu_map_flags & QEMU_MAP_NORESERVE;
+    const bool readonly = qemu_map_flags & QEMU_MAP_READONLY;
+    const bool shared = qemu_map_flags & QEMU_MAP_SHARED;
+    const bool sync = qemu_map_flags & QEMU_MAP_SYNC;
+    const int prot = PROT_READ | (readonly ? 0 : PROT_WRITE);
+    int map_sync_flags = 0;
+    int flags = MAP_FIXED;
+    void *activated_ptr;
+
+    if (noreserve && !map_noreserve_effective(fd, qemu_map_flags)) {
         return MAP_FAILED;
     }
 
-    assert(is_power_of_2(align));
-    /* Always align to host page size */
-    assert(align >= pagesize);
-
-    flags = MAP_FIXED;
     flags |= fd == -1 ? MAP_ANONYMOUS : 0;
     flags |= shared ? MAP_SHARED : MAP_PRIVATE;
-    if (shared && is_pmem) {
+    flags |= noreserve ? MAP_NORESERVE : 0;
+    if (shared && sync) {
         map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
     }
 
-    offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
-
-    ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
-               flags | map_sync_flags, fd, 0);
-
-    if (ptr == MAP_FAILED && map_sync_flags) {
+    activated_ptr = mmap(ptr, size, prot, flags | map_sync_flags, fd,
+                         map_offset);
+    if (activated_ptr == MAP_FAILED && map_sync_flags) {
         if (errno == ENOTSUP) {
-            char *proc_link, *file_name;
-            int len;
-            proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
-            file_name = g_malloc0(PATH_MAX);
-            len = readlink(proc_link, file_name, PATH_MAX - 1);
+            char *proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
+            char *file_name = g_malloc0(PATH_MAX);
+            int len = readlink(proc_link, file_name, PATH_MAX - 1);
+
             if (len < 0) {
                 len = 0;
             }
@@ -166,15 +222,57 @@ void *qemu_ram_mmap(int fd,
                     "crash.\n", file_name);
             g_free(proc_link);
             g_free(file_name);
+            warn_report("Using non DAX backing file with 'pmem=on' option"
+                        " is deprecated");
         }
         /*
-         * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
-         * we will remove these flags to handle compatibility.
+         * If mmap failed with MAP_SHARED_VALIDATE | MAP_SYNC, we will try
+         * again without these flags to handle backwards compatibility.
          */
-        ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
-                   flags, fd, 0);
+        activated_ptr = mmap(ptr, size, prot, flags, fd, map_offset);
+    }
+    return activated_ptr;
+}
+
+static inline size_t mmap_guard_pagesize(int fd)
+{
+#if defined(__powerpc64__) && defined(__linux__)
+    /* Mappings in the same segment must share the same page size */
+    return qemu_fd_getpagesize(fd);
+#else
+    return qemu_real_host_page_size();
+#endif
+}
+
+void *qemu_ram_mmap(int fd,
+                    size_t size,
+                    size_t align,
+                    uint32_t qemu_map_flags,
+                    off_t map_offset)
+{
+    const size_t guard_pagesize = mmap_guard_pagesize(fd);
+    size_t offset, total;
+    void *ptr, *guardptr;
+
+    /*
+     * Note: this always allocates at least one extra page of virtual address
+     * space, even if size is already aligned.
+     */
+    total = size + align;
+
+    guardptr = mmap_reserve(total, fd);
+    if (guardptr == MAP_FAILED) {
+        return MAP_FAILED;
     }
 
+    assert(is_power_of_2(align));
+    /* Always align to host page size */
+    assert(align >= guard_pagesize);
+
+    offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
+
+    ptr = mmap_activate(guardptr + offset, size, fd, qemu_map_flags,
+                        map_offset);
     if (ptr == MAP_FAILED) {
         munmap(guardptr, total);
         return MAP_FAILED;
@@ -189,8 +287,8 @@ void *qemu_ram_mmap(int fd,
      * a guard page guarding against potential buffer overflows.
      */
     total -= offset;
-    if (total > size + pagesize) {
-        munmap(ptr + size + pagesize, total - size - pagesize);
+    if (total > size + guard_pagesize) {
+        munmap(ptr + size + guard_pagesize, total - size - guard_pagesize);
     }
 
     return ptr;
@@ -198,15 +296,8 @@ void *qemu_ram_mmap(int fd,
 
 void qemu_ram_munmap(int fd, void *ptr, size_t size)
 {
-    size_t pagesize;
-
     if (ptr) {
         /* Unmap both the RAM block and the guard page */
-#if defined(__powerpc64__) && defined(__linux__)
-        pagesize = qemu_fd_getpagesize(fd);
-#else
-        pagesize = qemu_real_host_page_size;
-#endif
-        munmap(ptr, size + pagesize);
+        munmap(ptr, size + mmap_guard_pagesize(fd));
     }
 }
index 7661d0f6234d952f375ad09f67d714e991560de4..32e263163c75dfa8e93ca67c739d4a501162353c 100644 (file)
 #include "qemu/queue.h"
 #include "qemu/module.h"
 #include "qemu/cutils.h"
+#include "qemu/config-file.h"
+#include "qapi/error.h"
 #ifdef CONFIG_MODULE_UPGRADES
 #include "qemu-version.h"
 #endif
+#include "trace.h"
 
 typedef struct ModuleEntry
 {
@@ -110,25 +113,54 @@ void module_call_init(module_init_type type)
 }
 
 #ifdef CONFIG_MODULES
-static int module_load_file(const char *fname, bool mayfail, bool export_symbols)
+
+static const QemuModinfo module_info_stub[] = { {
+    /* end of list */
+} };
+static const QemuModinfo *module_info = module_info_stub;
+static const char *module_arch;
+
+void module_init_info(const QemuModinfo *info)
+{
+    module_info = info;
+}
+
+void module_allow_arch(const char *arch)
+{
+    module_arch = arch;
+}
+
+static bool module_check_arch(const QemuModinfo *modinfo)
+{
+    if (modinfo->arch) {
+        if (!module_arch) {
+            /* no arch set -> ignore all */
+            return false;
+        }
+        if (strcmp(module_arch, modinfo->arch) != 0) {
+            /* mismatch */
+            return false;
+        }
+    }
+    return true;
+}
+
+/*
+ * module_load_dso: attempt to load an existing dso file
+ *
+ * fname:          full pathname of the file to load
+ * export_symbols: if true, add the symbols to the global name space
+ * errp:           error to set.
+ *
+ * Return value:   true on success, false on error, and errp will be set.
+ */
+static bool module_load_dso(const char *fname, bool export_symbols,
+                            Error **errp)
 {
     GModule *g_module;
     void (*sym)(void);
-    const char *dsosuf = CONFIG_HOST_DSOSUF;
-    int len = strlen(fname);
-    int suf_len = strlen(dsosuf);
     ModuleEntry *e, *next;
-    int ret, flags;
-
-    if (len <= suf_len || strcmp(&fname[len - suf_len], dsosuf)) {
-        /* wrong suffix */
-        ret = -EINVAL;
-        goto out;
-    }
-    if (access(fname, F_OK)) {
-        ret = -ENOENT;
-        goto out;
-    }
+    int flags;
 
     assert(QTAILQ_EMPTY(&dso_init_list));
 
@@ -138,65 +170,38 @@ static int module_load_file(const char *fname, bool mayfail, bool export_symbols
     }
     g_module = g_module_open(fname, flags);
     if (!g_module) {
-        if (!mayfail) {
-            fprintf(stderr, "Failed to open module: %s\n",
-                    g_module_error());
-        }
-        ret = -EINVAL;
-        goto out;
+        error_setg(errp, "failed to open module: %s", g_module_error());
+        return false;
     }
     if (!g_module_symbol(g_module, DSO_STAMP_FUN_STR, (gpointer *)&sym)) {
-        fprintf(stderr, "Failed to initialize module: %s\n",
-                fname);
-        /* Print some info if this is a QEMU module (but from different build),
-         * this will make debugging user problems easier. */
+        error_setg(errp, "failed to initialize module: %s", fname);
+        /*
+         * Print some info if this is a QEMU module (but from different build),
+         * this will make debugging user problems easier.
+         */
         if (g_module_symbol(g_module, "qemu_module_dummy", (gpointer *)&sym)) {
-            fprintf(stderr,
-                    "Note: only modules from the same build can be loaded.\n");
+            error_append_hint(errp,
+                "Only modules from the same build can be loaded.\n");
         }
         g_module_close(g_module);
-        ret = -EINVAL;
-    } else {
-        QTAILQ_FOREACH(e, &dso_init_list, node) {
-            e->init();
-            register_module_init(e->init, e->type);
-        }
-        ret = 0;
+        return false;
     }
 
+    QTAILQ_FOREACH(e, &dso_init_list, node) {
+        e->init();
+        register_module_init(e->init, e->type);
+    }
+    trace_module_load_module(fname);
     QTAILQ_FOREACH_SAFE(e, &dso_init_list, node, next) {
         QTAILQ_REMOVE(&dso_init_list, e, node);
         g_free(e);
     }
-out:
-    return ret;
+    return true;
 }
 
-static const struct {
-    const char *name;
-    const char *dep;
-} module_deps[] = {
-    { "audio-spice",    "ui-spice-core" },
-    { "chardev-spice",  "ui-spice-core" },
-    { "hw-display-qxl", "ui-spice-core" },
-    { "ui-spice-app",   "ui-spice-core" },
-    { "ui-spice-app",   "chardev-spice" },
-
-#ifdef CONFIG_OPENGL
-    { "ui-egl-headless", "ui-opengl"    },
-    { "ui-gtk",          "ui-opengl"    },
-    { "ui-sdl",          "ui-opengl"    },
-    { "ui-spice-core",   "ui-opengl"    },
-#endif
-};
-#endif
-
-bool module_load_one(const char *prefix, const char *lib_name, bool mayfail)
+int module_load(const char *prefix, const char *name, Error **errp)
 {
-    bool success = false;
-
-#ifdef CONFIG_MODULES
-    char *fname = NULL;
+    int rv = -1;
 #ifdef CONFIG_MODULE_UPGRADES
     char *version_dir;
 #endif
@@ -204,35 +209,26 @@ bool module_load_one(const char *prefix, const char *lib_name, bool mayfail)
     char *dirs[5];
     char *module_name;
     int i = 0, n_dirs = 0;
-    int ret, dep;
     bool export_symbols = false;
     static GHashTable *loaded_modules;
+    const QemuModinfo *modinfo;
+    const char **sl;
 
     if (!g_module_supported()) {
-        fprintf(stderr, "Module is not supported by system.\n");
-        return false;
+        error_setg(errp, "%s", "this platform does not support GLib modules");
+        return -1;
     }
 
     if (!loaded_modules) {
         loaded_modules = g_hash_table_new(g_str_hash, g_str_equal);
     }
 
-    module_name = g_strdup_printf("%s%s", prefix, lib_name);
-
-    for (dep = 0; dep < ARRAY_SIZE(module_deps); dep++) {
-        if (strcmp(module_name, module_deps[dep].name) == 0) {
-            /* we depend on another module */
-            module_load_one("", module_deps[dep].dep, false);
-        }
-        if (strcmp(module_name, module_deps[dep].dep) == 0) {
-            /* another module depends on us */
-            export_symbols = true;
-        }
-    }
+    /* allocate all resources managed by the out: label here */
+    module_name = g_strdup_printf("%s%s", prefix, name);
 
     if (g_hash_table_contains(loaded_modules, module_name)) {
         g_free(module_name);
-        return true;
+        return 2; /* module already loaded */
     }
     g_hash_table_add(loaded_modules, module_name);
 
@@ -241,7 +237,6 @@ bool module_load_one(const char *prefix, const char *lib_name, bool mayfail)
         dirs[n_dirs++] = g_strdup_printf("%s", search_dir);
     }
     dirs[n_dirs++] = get_relocated_path(CONFIG_QEMU_MODDIR);
-    dirs[n_dirs++] = g_strdup(qemu_get_exec_dir());
 
 #ifdef CONFIG_MODULE_UPGRADES
     version_dir = g_strcanon(g_strdup(QEMU_PKGVERSION),
@@ -249,106 +244,162 @@ bool module_load_one(const char *prefix, const char *lib_name, bool mayfail)
                              '_');
     dirs[n_dirs++] = g_strdup_printf("/var/run/qemu/%s", version_dir);
 #endif
-
     assert(n_dirs <= ARRAY_SIZE(dirs));
 
+    /* end of resources managed by the out: label */
+
+    for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
+        if (modinfo->arch) {
+            if (strcmp(modinfo->name, module_name) == 0) {
+                if (!module_check_arch(modinfo)) {
+                    error_setg(errp, "module arch does not match: "
+                        "expected '%s', got '%s'", module_arch, modinfo->arch);
+                    goto out;
+                }
+            }
+        }
+        if (modinfo->deps) {
+            if (strcmp(modinfo->name, module_name) == 0) {
+                /* we depend on other module(s) */
+                for (sl = modinfo->deps; *sl != NULL; sl++) {
+                    int subrv = module_load("", *sl, errp);
+                    if (subrv <= 0) {
+                        rv = subrv;
+                        goto out;
+                    }
+                }
+            } else {
+                for (sl = modinfo->deps; *sl != NULL; sl++) {
+                    if (strcmp(module_name, *sl) == 0) {
+                        /* another module depends on us */
+                        export_symbols = true;
+                    }
+                }
+            }
+        }
+    }
+
     for (i = 0; i < n_dirs; i++) {
-        fname = g_strdup_printf("%s/%s%s",
-                dirs[i], module_name, CONFIG_HOST_DSOSUF);
-        ret = module_load_file(fname, mayfail, export_symbols);
-        g_free(fname);
-        fname = NULL;
-        /* Try loading until loaded a module file */
-        if (!ret) {
-            success = true;
-            break;
+        char *fname = g_strdup_printf("%s/%s%s",
+                                      dirs[i], module_name, CONFIG_HOST_DSOSUF);
+        int ret = access(fname, F_OK);
+        if (ret != 0 && (errno == ENOENT || errno == ENOTDIR)) {
+            /*
+             * if we don't find the module in this dir, try the next one.
+             * If we don't find it in any dir, that can be fine too: user
+             * did not install the module. We will return 0 in this case
+             * with no error set.
+             */
+            g_free(fname);
+            continue;
+        } else if (ret != 0) {
+            /* most common is EACCES here */
+            error_setg_errno(errp, errno, "error trying to access %s", fname);
+        } else if (module_load_dso(fname, export_symbols, errp)) {
+            rv = 1; /* module successfully loaded */
         }
+        g_free(fname);
+        goto out;
     }
+    rv = 0; /* module not found */
 
-    if (!success) {
+out:
+    if (rv <= 0) {
         g_hash_table_remove(loaded_modules, module_name);
         g_free(module_name);
     }
-
     for (i = 0; i < n_dirs; i++) {
         g_free(dirs[i]);
     }
-
-#endif
-    return success;
+    return rv;
 }
 
-/*
- * Building devices and other qom objects modular is mostly useful in
- * case they have dependencies to external shared libraries, so we can
- * cut down the core qemu library dependencies.  Which is the case for
- * only a very few devices & objects.
- *
- * So with the expectation that this will be rather the exception than
- * the rule and the list will not gain that many entries, go with a
- * simple manually maintained list for now.
- *
- * The list must be sorted by module (module_load_qom_all() needs this).
- */
-static struct {
-    const char *type;
-    const char *prefix;
-    const char *module;
-} const qom_modules[] = {
-    { "ccid-card-passthru",    "hw-", "usb-smartcard"         },
-    { "ccid-card-emulated",    "hw-", "usb-smartcard"         },
-    { "usb-redir",             "hw-", "usb-redirect"          },
-    { "qxl-vga",               "hw-", "display-qxl"           },
-    { "qxl",                   "hw-", "display-qxl"           },
-    { "virtio-gpu-device",     "hw-", "display-virtio-gpu"    },
-    { "vhost-user-gpu",        "hw-", "display-virtio-gpu"    },
-    { "virtio-gpu-pci-base",   "hw-", "display-virtio-gpu-pci" },
-    { "virtio-gpu-pci",        "hw-", "display-virtio-gpu-pci" },
-    { "vhost-user-gpu-pci",    "hw-", "display-virtio-gpu-pci" },
-    { "virtio-gpu-ccw",        "hw-", "s390x-virtio-gpu-ccw"   },
-    { "virtio-vga-base",       "hw-", "display-virtio-vga"    },
-    { "virtio-vga",            "hw-", "display-virtio-vga"    },
-    { "vhost-user-vga",        "hw-", "display-virtio-vga"    },
-    { "chardev-braille",       "chardev-", "baum"             },
-    { "chardev-spicevmc",      "chardev-", "spice"            },
-    { "chardev-spiceport",     "chardev-", "spice"            },
-};
-
 static bool module_loaded_qom_all;
 
-void module_load_qom_one(const char *type)
+int module_load_qom(const char *type, Error **errp)
 {
-    int i;
+    const QemuModinfo *modinfo;
+    const char **sl;
+    int rv = 0;
 
     if (!type) {
-        return;
+        error_setg(errp, "%s", "type is NULL");
+        return -1;
     }
-    for (i = 0; i < ARRAY_SIZE(qom_modules); i++) {
-        if (strcmp(qom_modules[i].type, type) == 0) {
-            module_load_one(qom_modules[i].prefix,
-                            qom_modules[i].module,
-                            false);
-            return;
+
+    trace_module_lookup_object_type(type);
+    for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
+        if (!modinfo->objs) {
+            continue;
+        }
+        if (!module_check_arch(modinfo)) {
+            continue;
+        }
+        for (sl = modinfo->objs; *sl != NULL; sl++) {
+            if (strcmp(type, *sl) == 0) {
+                if (rv > 0) {
+                    error_setg(errp, "multiple modules providing '%s'", type);
+                    return -1;
+                }
+                rv = module_load("", modinfo->name, errp);
+                if (rv < 0) {
+                    return rv;
+                }
+            }
         }
     }
+    return rv;
 }
 
 void module_load_qom_all(void)
 {
-    int i;
+    const QemuModinfo *modinfo;
+    Error *local_err = NULL;
 
     if (module_loaded_qom_all) {
         return;
     }
-    for (i = 0; i < ARRAY_SIZE(qom_modules); i++) {
-        if (i > 0 && (strcmp(qom_modules[i - 1].module,
-                             qom_modules[i].module) == 0 &&
-                      strcmp(qom_modules[i - 1].prefix,
-                             qom_modules[i].prefix) == 0)) {
-            /* one module implementing multiple types -> load only once */
+
+    for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
+        if (!modinfo->objs) {
             continue;
         }
-        module_load_one(qom_modules[i].prefix, qom_modules[i].module, true);
+        if (!module_check_arch(modinfo)) {
+            continue;
+        }
+        if (module_load("", modinfo->name, &local_err) < 0) {
+            error_report_err(local_err);
+        }
     }
     module_loaded_qom_all = true;
 }
+
+void qemu_load_module_for_opts(const char *group)
+{
+    const QemuModinfo *modinfo;
+    const char **sl;
+
+    for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
+        if (!modinfo->opts) {
+            continue;
+        }
+        for (sl = modinfo->opts; *sl != NULL; sl++) {
+            if (strcmp(group, *sl) == 0) {
+                Error *local_err = NULL;
+                if (module_load("", modinfo->name, &local_err) < 0) {
+                    error_report_err(local_err);
+                }
+            }
+        }
+    }
+}
+
+#else
+
+void module_allow_arch(const char *arch) {}
+void qemu_load_module_for_opts(const char *group) {}
+int module_load(const char *prefix, const char *name, Error **errp) { return 2; }
+int module_load_qom(const char *type, Error **errp) { return 2; }
+void module_load_qom_all(void) {}
+
+#endif
index 5cc768ca47965e9636123317910c5314eb8b30eb..aa3d199f2d117616e8bacf8103e3760e8ba45a52 100644 (file)
@@ -1,3 +1,4 @@
+#include "qemu/osdep.h"
 #include "qemu/nvdimm-utils.h"
 #include "hw/mem/nvdimm.h"
 
index 66d01b9160fb93e0265f53ae410b8a72535e3275..e996c4744af1aee49a0ae0ff2f92430360ea3d36 100644 (file)
  */
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-
-/* Needed early for CONFIG_BSD etc. */
-
-#ifdef CONFIG_SOLARIS
-#include <sys/statvfs.h>
-/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for
-   discussion about Solaris header problems */
-extern int madvise(char *, size_t, int);
-#endif
-
-#include "qemu-common.h"
 #include "qemu/cutils.h"
 #include "qemu/sockets.h"
 #include "qemu/error-report.h"
+#include "qemu/madvise.h"
+#include "qemu/mprotect.h"
+#include "qemu/hw-version.h"
 #include "monitor/monitor.h"
 
-static bool fips_enabled = false;
-
 static const char *hw_version = QEMU_HW_VERSION;
 
 int socket_set_cork(int fd, int v)
 {
 #if defined(SOL_TCP) && defined(TCP_CORK)
-    return qemu_setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
+    return setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
 #else
     return 0;
 #endif
@@ -55,7 +45,7 @@ int socket_set_cork(int fd, int v)
 int socket_set_nodelay(int fd)
 {
     int v = 1;
-    return qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
+    return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
 }
 
 int qemu_madvise(void *addr, size_t len, int advice)
@@ -76,8 +66,8 @@ int qemu_madvise(void *addr, size_t len, int advice)
 
 static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
 {
-    g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask));
-    g_assert(!(size & ~qemu_real_host_page_mask));
+    g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask()));
+    g_assert(!(size & ~qemu_real_host_page_mask()));
 
 #ifdef _WIN32
     DWORD old_protect;
@@ -97,6 +87,15 @@ static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
 #endif
 }
 
+int qemu_mprotect_rw(void *addr, size_t size)
+{
+#ifdef _WIN32
+    return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
+#else
+    return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
+#endif
+}
+
 int qemu_mprotect_rwx(void *addr, size_t size)
 {
 #ifdef _WIN32
@@ -245,9 +244,7 @@ static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
         .l_type   = fl_type,
     };
     qemu_probe_lock_ops();
-    do {
-        ret = fcntl(fd, fcntl_op_setlk, &fl);
-    } while (ret == -1 && errno == EINTR);
+    ret = RETRY_ON_EINTR(fcntl(fd, fcntl_op_setlk, &fl));
     return ret == -1 ? -errno : 0;
 }
 
@@ -503,40 +500,36 @@ int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
     return ret;
 }
 
-void qemu_set_hw_version(const char *version)
-{
-    hw_version = version;
-}
-
-const char *qemu_hw_version(void)
+ssize_t qemu_send_full(int s, const void *buf, size_t count)
 {
-    return hw_version;
-}
+    ssize_t ret = 0;
+    ssize_t total = 0;
 
-void fips_set_state(bool requested)
-{
-#ifdef __linux__
-    if (requested) {
-        FILE *fds = fopen("/proc/sys/crypto/fips_enabled", "r");
-        if (fds != NULL) {
-            fips_enabled = (fgetc(fds) == '1');
-            fclose(fds);
+    while (count) {
+        ret = send(s, buf, count, 0);
+        if (ret < 0) {
+            if (errno == EINTR) {
+                continue;
+            }
+            break;
         }
+
+        count -= ret;
+        buf += ret;
+        total += ret;
     }
-#else
-    fips_enabled = false;
-#endif /* __linux__ */
 
-#ifdef _FIPS_DEBUG
-    fprintf(stderr, "FIPS mode %s (requested %s)\n",
-            (fips_enabled ? "enabled" : "disabled"),
-            (requested ? "enabled" : "disabled"));
-#endif
+    return total;
 }
 
-bool fips_get_state(void)
+void qemu_set_hw_version(const char *version)
 {
-    return fips_enabled;
+    hw_version = version;
+}
+
+const char *qemu_hw_version(void)
+{
+    return hw_version;
 }
 
 #ifdef _WIN32
@@ -565,18 +558,22 @@ int socket_init(void)
 
 
 #ifndef CONFIG_IOVEC
-/* helper function for iov_send_recv() */
 static ssize_t
 readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
 {
     unsigned i = 0;
     ssize_t ret = 0;
+    ssize_t off = 0;
     while (i < iov_cnt) {
         ssize_t r = do_write
-            ? write(fd, iov[i].iov_base, iov[i].iov_len)
-            : read(fd, iov[i].iov_base, iov[i].iov_len);
+            ? write(fd, iov[i].iov_base + off, iov[i].iov_len - off)
+            : read(fd, iov[i].iov_base + off, iov[i].iov_len - off);
         if (r > 0) {
             ret += r;
+            off += r;
+            if (off < iov[i].iov_len) {
+                continue;
+            }
         } else if (!r) {
             break;
         } else if (errno == EINTR) {
@@ -589,6 +586,7 @@ readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
             }
             break;
         }
+        off = 0;
         i++;
     }
     return ret;
@@ -606,3 +604,19 @@ writev(int fd, const struct iovec *iov, int iov_cnt)
     return readv_writev(fd, iov, iov_cnt, true);
 }
 #endif
+
+/*
+ * Make sure data goes on disk, but if possible do not bother to
+ * write out the inode just for timestamp updates.
+ *
+ * Unfortunately even in 2009 many operating systems do not support
+ * fdatasync and have to fall back to fsync.
+ */
+int qemu_fdatasync(int fd)
+{
+#ifdef CONFIG_FDATASYNC
+    return fdatasync(fd);
+#else
+    return fsync(fd);
+#endif
+}
index f15234b5c030563284f710f096858fa80458a35c..e86fd64e099877cec2818d948da659d261fde040 100644 (file)
 
 #include <glib/gprintf.h>
 
-#include "qemu-common.h"
 #include "sysemu/sysemu.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/madvise.h"
 #include "qemu/sockets.h"
 #include "qemu/thread.h"
 #include <libgen.h>
 #include "qemu/cutils.h"
+#include "qemu/units.h"
+#include "qemu/thread-context.h"
 
 #ifdef CONFIG_LINUX
 #include <sys/syscall.h>
 #endif
 
 #ifdef __FreeBSD__
-#include <sys/sysctl.h>
-#include <sys/user.h>
 #include <sys/thr.h>
+#include <sys/user.h>
 #include <libutil.h>
 #endif
 
 #ifdef __NetBSD__
-#include <sys/sysctl.h>
 #include <lwp.h>
 #endif
 
-#ifdef __APPLE__
-#include <mach-o/dyld.h>
-#endif
-
-#ifdef __HAIKU__
-#include <kernel/image.h>
-#endif
-
 #include "qemu/mmap-alloc.h"
 
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#include "qemu/error-report.h"
-#endif
-
 #define MAX_MEM_PREALLOC_THREAD_COUNT 16
 
+struct MemsetThread;
+
+typedef struct MemsetContext {
+    bool all_threads_created;
+    bool any_thread_failed;
+    struct MemsetThread *threads;
+    int num_threads;
+} MemsetContext;
+
 struct MemsetThread {
     char *addr;
     size_t numpages;
     size_t hpagesize;
     QemuThread pgthread;
     sigjmp_buf env;
+    MemsetContext *context;
 };
 typedef struct MemsetThread MemsetThread;
 
-static MemsetThread *memset_thread;
-static int memset_num_threads;
-static bool memset_thread_failed;
+/* used by sigbus_handler() */
+static MemsetContext *sigbus_memset_context;
+struct sigaction sigbus_oldact;
+static QemuMutex sigbus_mutex;
 
 static QemuMutex page_mutex;
 static QemuCond page_cond;
-static bool threads_created_flag;
 
 int qemu_get_thread_id(void)
 {
@@ -125,9 +124,8 @@ bool qemu_write_pidfile(const char *path, Error **errp)
             .l_len = 0,
         };
 
-        fd = qemu_open_old(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
+        fd = qemu_create(path, O_WRONLY, S_IRUSR | S_IWUSR, errp);
         if (fd == -1) {
-            error_setg_errno(errp, errno, "Cannot open pid file");
             return false;
         }
 
@@ -171,7 +169,7 @@ bool qemu_write_pidfile(const char *path, Error **errp)
     }
 
     snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid());
-    if (write(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
+    if (qemu_write_full(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
         error_setg(errp, "Failed to write pid file");
         goto fail_unlink;
     }
@@ -185,49 +183,14 @@ fail_close:
     return false;
 }
 
-void *qemu_oom_check(void *ptr)
-{
-    if (ptr == NULL) {
-        fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
-        abort();
-    }
-    return ptr;
-}
-
-void *qemu_try_memalign(size_t alignment, size_t size)
-{
-    void *ptr;
-
-    if (alignment < sizeof(void*)) {
-        alignment = sizeof(void*);
-    }
-
-#if defined(CONFIG_POSIX_MEMALIGN)
-    int ret;
-    ret = posix_memalign(&ptr, alignment, size);
-    if (ret != 0) {
-        errno = ret;
-        ptr = NULL;
-    }
-#elif defined(CONFIG_BSD)
-    ptr = valloc(size);
-#else
-    ptr = memalign(alignment, size);
-#endif
-    trace_qemu_memalign(alignment, size, ptr);
-    return ptr;
-}
-
-void *qemu_memalign(size_t alignment, size_t size)
-{
-    return qemu_oom_check(qemu_try_memalign(alignment, size));
-}
-
 /* alloc shared memory pages */
-void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared)
+void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
+                          bool noreserve)
 {
+    const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) |
+                                    (noreserve ? QEMU_MAP_NORESERVE : 0);
     size_t align = QEMU_VMALLOC_ALIGN;
-    void *ptr = qemu_ram_mmap(-1, size, align, shared, false);
+    void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0);
 
     if (ptr == MAP_FAILED) {
         return NULL;
@@ -241,55 +204,26 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared)
     return ptr;
 }
 
-void qemu_vfree(void *ptr)
-{
-    trace_qemu_vfree(ptr);
-    free(ptr);
-}
-
 void qemu_anon_ram_free(void *ptr, size_t size)
 {
     trace_qemu_anon_ram_free(ptr, size);
     qemu_ram_munmap(-1, ptr, size);
 }
 
-void qemu_set_block(int fd)
+void qemu_socket_set_block(int fd)
 {
-    int f;
-    f = fcntl(fd, F_GETFL);
-    assert(f != -1);
-    f = fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
-    assert(f != -1);
+    g_unix_set_fd_nonblocking(fd, false, NULL);
 }
 
-int qemu_try_set_nonblock(int fd)
+int qemu_socket_try_set_nonblock(int fd)
 {
-    int f;
-    f = fcntl(fd, F_GETFL);
-    if (f == -1) {
-        return -errno;
-    }
-    if (fcntl(fd, F_SETFL, f | O_NONBLOCK) == -1) {
-#ifdef __OpenBSD__
-        /*
-         * Previous to OpenBSD 6.3, fcntl(F_SETFL) is not permitted on
-         * memory devices and sets errno to ENODEV.
-         * It's OK if we fail to set O_NONBLOCK on devices like /dev/null,
-         * because they will never block anyway.
-         */
-        if (errno == ENODEV) {
-            return 0;
-        }
-#endif
-        return -errno;
-    }
-    return 0;
+    return g_unix_set_fd_nonblocking(fd, true, NULL) ? 0 : -errno;
 }
 
-void qemu_set_nonblock(int fd)
+void qemu_socket_set_nonblock(int fd)
 {
     int f;
-    f = qemu_try_set_nonblock(fd);
+    f = qemu_socket_try_set_nonblock(fd);
     assert(f == 0);
 }
 
@@ -314,35 +248,29 @@ void qemu_set_cloexec(int fd)
     assert(f != -1);
 }
 
-/*
- * Creates a pipe with FD_CLOEXEC set on both file descriptors
- */
-int qemu_pipe(int pipefd[2])
+int qemu_socketpair(int domain, int type, int protocol, int sv[2])
 {
     int ret;
 
-#ifdef CONFIG_PIPE2
-    ret = pipe2(pipefd, O_CLOEXEC);
-    if (ret != -1 || errno != ENOSYS) {
+#ifdef SOCK_CLOEXEC
+    ret = socketpair(domain, type | SOCK_CLOEXEC, protocol, sv);
+    if (ret != -1 || errno != EINVAL) {
         return ret;
     }
 #endif
-    ret = pipe(pipefd);
+    ret = socketpair(domain, type, protocol, sv);;
     if (ret == 0) {
-        qemu_set_cloexec(pipefd[0]);
-        qemu_set_cloexec(pipefd[1]);
+        qemu_set_cloexec(sv[0]);
+        qemu_set_cloexec(sv[1]);
     }
 
     return ret;
 }
 
 char *
-qemu_get_local_state_pathname(const char *relative_pathname)
+qemu_get_local_state_dir(void)
 {
-    g_autofree char *dir = g_strdup_printf("%s/%s",
-                                           CONFIG_QEMU_LOCALSTATEDIR,
-                                           relative_pathname);
-    return get_relocated_path(dir);
+    return get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR);
 }
 
 void qemu_set_tty_echo(int fd, bool echo)
@@ -360,103 +288,50 @@ void qemu_set_tty_echo(int fd, bool echo)
     tcsetattr(fd, TCSANOW, &tty);
 }
 
-static const char *exec_dir;
-
-void qemu_init_exec_dir(const char *argv0)
+#ifdef CONFIG_LINUX
+static void sigbus_handler(int signal, siginfo_t *siginfo, void *ctx)
+#else /* CONFIG_LINUX */
+static void sigbus_handler(int signal)
+#endif /* CONFIG_LINUX */
 {
-    char *p = NULL;
-    char buf[PATH_MAX];
+    int i;
 
-    if (exec_dir) {
-        return;
-    }
+    if (sigbus_memset_context) {
+        for (i = 0; i < sigbus_memset_context->num_threads; i++) {
+            MemsetThread *thread = &sigbus_memset_context->threads[i];
 
-#if defined(__linux__)
-    {
-        int len;
-        len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
-        if (len > 0) {
-            buf[len] = 0;
-            p = buf;
-        }
-    }
-#elif defined(__FreeBSD__) \
-      || (defined(__NetBSD__) && defined(KERN_PROC_PATHNAME))
-    {
-#if defined(__FreeBSD__)
-        static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
-#else
-        static int mib[4] = {CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME};
-#endif
-        size_t len = sizeof(buf) - 1;
-
-        *buf = '\0';
-        if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
-            *buf) {
-            buf[sizeof(buf) - 1] = '\0';
-            p = buf;
-        }
-    }
-#elif defined(__APPLE__)
-    {
-        char fpath[PATH_MAX];
-        uint32_t len = sizeof(fpath);
-        if (_NSGetExecutablePath(fpath, &len) == 0) {
-            p = realpath(fpath, buf);
-            if (!p) {
-                return;
-            }
-        }
-    }
-#elif defined(__HAIKU__)
-    {
-        image_info ii;
-        int32_t c = 0;
-
-        *buf = '\0';
-        while (get_next_image_info(0, &c, &ii) == B_OK) {
-            if (ii.type == B_APP_IMAGE) {
-                strncpy(buf, ii.name, sizeof(buf));
-                buf[sizeof(buf) - 1] = 0;
-                p = buf;
-                break;
+            if (qemu_thread_is_self(&thread->pgthread)) {
+                siglongjmp(thread->env, 1);
             }
         }
     }
-#endif
-    /* If we don't have any way of figuring out the actual executable
-       location then try argv[0].  */
-    if (!p && argv0) {
-        p = realpath(argv0, buf);
-    }
-    if (p) {
-        exec_dir = g_path_get_dirname(p);
-    } else {
-        exec_dir = CONFIG_BINDIR;
-    }
-}
 
-const char *qemu_get_exec_dir(void)
-{
-    return exec_dir;
-}
-
-static void sigbus_handler(int signal)
-{
-    int i;
-    if (memset_thread) {
-        for (i = 0; i < memset_num_threads; i++) {
-            if (qemu_thread_is_self(&memset_thread[i].pgthread)) {
-                siglongjmp(memset_thread[i].env, 1);
-            }
-        }
+#ifdef CONFIG_LINUX
+    /*
+     * We assume that the MCE SIGBUS handler could have been registered. We
+     * should never receive BUS_MCEERR_AO on any of our threads, but only on
+     * the main thread registered for PR_MCE_KILL_EARLY. Further, we should not
+     * receive BUS_MCEERR_AR triggered by action of other threads on one of
+     * our threads. So, no need to check for unrelated SIGBUS when seeing one
+     * for our threads.
+     *
+     * We will forward to the MCE handler, which will either handle the SIGBUS
+     * or reinstall the default SIGBUS handler and reraise the SIGBUS. The
+     * default SIGBUS handler will crash the process, so we don't care.
+     */
+    if (sigbus_oldact.sa_flags & SA_SIGINFO) {
+        sigbus_oldact.sa_sigaction(signal, siginfo, ctx);
+        return;
     }
+#endif /* CONFIG_LINUX */
+    warn_report("qemu_prealloc_mem: unrelated SIGBUS detected and ignored");
 }
 
 static void *do_touch_pages(void *arg)
 {
     MemsetThread *memset_args = (MemsetThread *)arg;
     sigset_t set, oldset;
+    int ret = 0;
 
     /*
      * On Linux, the page faults from the loop below can cause mmap_sem
@@ -464,7 +339,7 @@ static void *do_touch_pages(void *arg)
      * clearing until all threads have been created.
      */
     qemu_mutex_lock(&page_mutex);
-    while(!threads_created_flag){
+    while (!memset_args->context->all_threads_created) {
         qemu_cond_wait(&page_cond, &page_mutex);
     }
     qemu_mutex_unlock(&page_mutex);
@@ -475,7 +350,7 @@ static void *do_touch_pages(void *arg)
     pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
 
     if (sigsetjmp(memset_args->env, 1)) {
-        memset_thread_failed = true;
+        ret = -EFAULT;
     } else {
         char *addr = memset_args->addr;
         size_t numpages = memset_args->numpages;
@@ -489,38 +364,66 @@ static void *do_touch_pages(void *arg)
              *
              * 'volatile' to stop compiler optimizing this away
              * to a no-op
-             *
-             * TODO: get a better solution from kernel so we
-             * don't need to write at all so we don't cause
-             * wear on the storage backing the region...
              */
             *(volatile char *)addr = *addr;
             addr += hpagesize;
         }
     }
     pthread_sigmask(SIG_SETMASK, &oldset, NULL);
-    return NULL;
+    return (void *)(uintptr_t)ret;
 }
 
-static inline int get_memset_num_threads(int smp_cpus)
+static void *do_madv_populate_write_pages(void *arg)
+{
+    MemsetThread *memset_args = (MemsetThread *)arg;
+    const size_t size = memset_args->numpages * memset_args->hpagesize;
+    char * const addr = memset_args->addr;
+    int ret = 0;
+
+    /* See do_touch_pages(). */
+    qemu_mutex_lock(&page_mutex);
+    while (!memset_args->context->all_threads_created) {
+        qemu_cond_wait(&page_cond, &page_mutex);
+    }
+    qemu_mutex_unlock(&page_mutex);
+
+    if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) {
+        ret = -errno;
+    }
+    return (void *)(uintptr_t)ret;
+}
+
+static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
+                                         int max_threads)
 {
     long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
     int ret = 1;
 
     if (host_procs > 0) {
-        ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus);
+        ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), max_threads);
     }
+
+    /* Especially with gigantic pages, don't create more threads than pages. */
+    ret = MIN(ret, numpages);
+    /* Don't start threads to prealloc comparatively little memory. */
+    ret = MIN(ret, MAX(1, hpagesize * numpages / (64 * MiB)));
+
     /* In case sysconf() fails, we fall back to single threaded */
     return ret;
 }
 
-static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
-                            int smp_cpus)
+static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
+                           int max_threads, ThreadContext *tc,
+                           bool use_madv_populate_write)
 {
     static gsize initialized = 0;
+    MemsetContext context = {
+        .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads),
+    };
     size_t numpages_per_thread, leftover;
+    void *(*touch_fn)(void *);
+    int ret = 0, i = 0;
     char *addr = area;
-    int i = 0;
 
     if (g_once_init_enter(&initialized)) {
         qemu_mutex_init(&page_mutex);
@@ -528,66 +431,129 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
         g_once_init_leave(&initialized, 1);
     }
 
-    memset_thread_failed = false;
-    threads_created_flag = false;
-    memset_num_threads = get_memset_num_threads(smp_cpus);
-    memset_thread = g_new0(MemsetThread, memset_num_threads);
-    numpages_per_thread = numpages / memset_num_threads;
-    leftover = numpages % memset_num_threads;
-    for (i = 0; i < memset_num_threads; i++) {
-        memset_thread[i].addr = addr;
-        memset_thread[i].numpages = numpages_per_thread + (i < leftover);
-        memset_thread[i].hpagesize = hpagesize;
-        qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
-                           do_touch_pages, &memset_thread[i],
-                           QEMU_THREAD_JOINABLE);
-        addr += memset_thread[i].numpages * hpagesize;
+    if (use_madv_populate_write) {
+        /* Avoid creating a single thread for MADV_POPULATE_WRITE */
+        if (context.num_threads == 1) {
+            if (qemu_madvise(area, hpagesize * numpages,
+                             QEMU_MADV_POPULATE_WRITE)) {
+                return -errno;
+            }
+            return 0;
+        }
+        touch_fn = do_madv_populate_write_pages;
+    } else {
+        touch_fn = do_touch_pages;
+    }
+
+    context.threads = g_new0(MemsetThread, context.num_threads);
+    numpages_per_thread = numpages / context.num_threads;
+    leftover = numpages % context.num_threads;
+    for (i = 0; i < context.num_threads; i++) {
+        context.threads[i].addr = addr;
+        context.threads[i].numpages = numpages_per_thread + (i < leftover);
+        context.threads[i].hpagesize = hpagesize;
+        context.threads[i].context = &context;
+        if (tc) {
+            thread_context_create_thread(tc, &context.threads[i].pgthread,
+                                         "touch_pages",
+                                         touch_fn, &context.threads[i],
+                                         QEMU_THREAD_JOINABLE);
+        } else {
+            qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
+                               touch_fn, &context.threads[i],
+                               QEMU_THREAD_JOINABLE);
+        }
+        addr += context.threads[i].numpages * hpagesize;
+    }
+
+    if (!use_madv_populate_write) {
+        sigbus_memset_context = &context;
     }
 
     qemu_mutex_lock(&page_mutex);
-    threads_created_flag = true;
+    context.all_threads_created = true;
     qemu_cond_broadcast(&page_cond);
     qemu_mutex_unlock(&page_mutex);
 
-    for (i = 0; i < memset_num_threads; i++) {
-        qemu_thread_join(&memset_thread[i].pgthread);
+    for (i = 0; i < context.num_threads; i++) {
+        int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread);
+
+        if (tmp) {
+            ret = tmp;
+        }
+    }
+
+    if (!use_madv_populate_write) {
+        sigbus_memset_context = NULL;
     }
-    g_free(memset_thread);
-    memset_thread = NULL;
+    g_free(context.threads);
+
+    return ret;
+}
 
-    return memset_thread_failed;
+static bool madv_populate_write_possible(char *area, size_t pagesize)
+{
+    return !qemu_madvise(area, pagesize, QEMU_MADV_POPULATE_WRITE) ||
+           errno != EINVAL;
 }
 
-void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
-                     Error **errp)
+void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
+                       ThreadContext *tc, Error **errp)
 {
+    static gsize initialized;
     int ret;
-    struct sigaction act, oldact;
     size_t hpagesize = qemu_fd_getpagesize(fd);
-    size_t numpages = DIV_ROUND_UP(memory, hpagesize);
+    size_t numpages = DIV_ROUND_UP(sz, hpagesize);
+    bool use_madv_populate_write;
+    struct sigaction act;
 
-    memset(&act, 0, sizeof(act));
-    act.sa_handler = &sigbus_handler;
-    act.sa_flags = 0;
+    /*
+     * Sense on every invocation, as MADV_POPULATE_WRITE cannot be used for
+     * some special mappings, such as mapping /dev/mem.
+     */
+    use_madv_populate_write = madv_populate_write_possible(area, hpagesize);
 
-    ret = sigaction(SIGBUS, &act, &oldact);
-    if (ret) {
-        error_setg_errno(errp, errno,
-            "os_mem_prealloc: failed to install signal handler");
-        return;
+    if (!use_madv_populate_write) {
+        if (g_once_init_enter(&initialized)) {
+            qemu_mutex_init(&sigbus_mutex);
+            g_once_init_leave(&initialized, 1);
+        }
+
+        qemu_mutex_lock(&sigbus_mutex);
+        memset(&act, 0, sizeof(act));
+#ifdef CONFIG_LINUX
+        act.sa_sigaction = &sigbus_handler;
+        act.sa_flags = SA_SIGINFO;
+#else /* CONFIG_LINUX */
+        act.sa_handler = &sigbus_handler;
+        act.sa_flags = 0;
+#endif /* CONFIG_LINUX */
+
+        ret = sigaction(SIGBUS, &act, &sigbus_oldact);
+        if (ret) {
+            qemu_mutex_unlock(&sigbus_mutex);
+            error_setg_errno(errp, errno,
+                "qemu_prealloc_mem: failed to install signal handler");
+            return;
+        }
     }
 
     /* touch pages simultaneously */
-    if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) {
-        error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
-            "pages available to allocate guest RAM");
+    ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc,
+                          use_madv_populate_write);
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                         "qemu_prealloc_mem: preallocating memory failed");
     }
 
-    ret = sigaction(SIGBUS, &oldact, NULL);
-    if (ret) {
-        /* Terminate QEMU since it can't recover from error */
-        perror("os_mem_prealloc: failed to reinstall signal handler");
-        exit(1);
+    if (!use_madv_populate_write) {
+        ret = sigaction(SIGBUS, &sigbus_oldact, NULL);
+        if (ret) {
+            /* Terminate QEMU since it can't recover from error */
+            perror("qemu_prealloc_mem: failed to reinstall signal handler");
+            exit(1);
+        }
+        qemu_mutex_unlock(&sigbus_mutex);
     }
 }
 
@@ -617,84 +583,14 @@ char *qemu_get_pid_name(pid_t pid)
 }
 
 
-pid_t qemu_fork(Error **errp)
-{
-    sigset_t oldmask, newmask;
-    struct sigaction sig_action;
-    int saved_errno;
-    pid_t pid;
-
-    /*
-     * Need to block signals now, so that child process can safely
-     * kill off caller's signal handlers without a race.
-     */
-    sigfillset(&newmask);
-    if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
-        error_setg_errno(errp, errno,
-                         "cannot block signals");
-        return -1;
-    }
-
-    pid = fork();
-    saved_errno = errno;
-
-    if (pid < 0) {
-        /* attempt to restore signal mask, but ignore failure, to
-         * avoid obscuring the fork failure */
-        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
-        error_setg_errno(errp, saved_errno,
-                         "cannot fork child process");
-        errno = saved_errno;
-        return -1;
-    } else if (pid) {
-        /* parent process */
-
-        /* Restore our original signal mask now that the child is
-         * safely running. Only documented failures are EFAULT (not
-         * possible, since we are using just-grabbed mask) or EINVAL
-         * (not possible, since we are using correct arguments).  */
-        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
-    } else {
-        /* child process */
-        size_t i;
-
-        /* Clear out all signal handlers from parent so nothing
-         * unexpected can happen in our child once we unblock
-         * signals */
-        sig_action.sa_handler = SIG_DFL;
-        sig_action.sa_flags = 0;
-        sigemptyset(&sig_action.sa_mask);
-
-        for (i = 1; i < NSIG; i++) {
-            /* Only possible errors are EFAULT or EINVAL The former
-             * won't happen, the latter we expect, so no need to check
-             * return value */
-            (void)sigaction(i, &sig_action, NULL);
-        }
-
-        /* Unmask all signals in child, since we've no idea what the
-         * caller's done with their signal mask and don't want to
-         * propagate that to children */
-        sigemptyset(&newmask);
-        if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
-            Error *local_err = NULL;
-            error_setg_errno(&local_err, errno,
-                             "cannot unblock signals");
-            error_report_err(local_err);
-            _exit(1);
-        }
-    }
-    return pid;
-}
-
 void *qemu_alloc_stack(size_t *sz)
 {
-    void *ptr, *guardpage;
+    void *ptr;
     int flags;
 #ifdef CONFIG_DEBUG_STACK_USAGE
     void *ptr2;
 #endif
-    size_t pagesz = qemu_real_host_page_size;
+    size_t pagesz = qemu_real_host_page_size();
 #ifdef _SC_THREAD_STACK_MIN
     /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
     long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
@@ -722,17 +618,8 @@ void *qemu_alloc_stack(size_t *sz)
         abort();
     }
 
-#if defined(HOST_IA64)
-    /* separate register stack */
-    guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
-#elif defined(HOST_HPPA)
-    /* stack grows up */
-    guardpage = ptr + *sz - pagesz;
-#else
-    /* stack grows down */
-    guardpage = ptr;
-#endif
-    if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
+    /* Stack grows down -- guard page at the bottom. */
+    if (mprotect(ptr, pagesz, PROT_NONE) != 0) {
         perror("failed to set up stack guard page");
         abort();
     }
@@ -756,7 +643,7 @@ void qemu_free_stack(void *stack, size_t sz)
     unsigned int usage;
     void *ptr;
 
-    for (ptr = stack + qemu_real_host_page_size; ptr < stack + sz;
+    for (ptr = stack + qemu_real_host_page_size(); ptr < stack + sz;
          ptr += sizeof(uint32_t)) {
         if (*(uint32_t *)ptr != 0xdeadbeaf) {
             break;
@@ -773,6 +660,16 @@ void qemu_free_stack(void *stack, size_t sz)
     munmap(stack, sz);
 }
 
+/*
+ * Disable CFI checks.
+ * We are going to call a signal handler directly. Such handler may or may not
+ * have been defined in our binary, so there's no guarantee that the pointer
+ * used to set the handler is a cfi-valid pointer. Since the handlers are
+ * stored in kernel memory, changing the handler to an attacker-defined
+ * function requires being able to call a sigaction() syscall,
+ * which is not as easy as overwriting a pointer in memory.
+ */
+QEMU_DISABLE_CFI
 void sigaction_invoke(struct sigaction *action,
                       struct qemu_signalfd_siginfo *info)
 {
@@ -804,52 +701,35 @@ void sigaction_invoke(struct sigaction *action,
     action->sa_sigaction(info->ssi_signo, &si, NULL);
 }
 
-#ifndef HOST_NAME_MAX
-# ifdef _POSIX_HOST_NAME_MAX
-#  define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
-# else
-#  define HOST_NAME_MAX 255
-# endif
-#endif
-
-char *qemu_get_host_name(Error **errp)
-{
-    long len = -1;
-    g_autofree char *hostname = NULL;
-
-#ifdef _SC_HOST_NAME_MAX
-    len = sysconf(_SC_HOST_NAME_MAX);
-#endif /* _SC_HOST_NAME_MAX */
-
-    if (len < 0) {
-        len = HOST_NAME_MAX;
-    }
-
-    /* Unfortunately, gethostname() below does not guarantee a
-     * NULL terminated string. Therefore, allocate one byte more
-     * to be sure. */
-    hostname = g_new0(char, len + 1);
-
-    if (gethostname(hostname, len) < 0) {
-        error_setg_errno(errp, errno,
-                         "cannot get hostname");
-        return NULL;
-    }
-
-    return g_steal_pointer(&hostname);
-}
-
 size_t qemu_get_host_physmem(void)
 {
 #ifdef _SC_PHYS_PAGES
     long pages = sysconf(_SC_PHYS_PAGES);
     if (pages > 0) {
-        if (pages > SIZE_MAX / qemu_real_host_page_size) {
+        if (pages > SIZE_MAX / qemu_real_host_page_size()) {
             return SIZE_MAX;
         } else {
-            return pages * qemu_real_host_page_size;
+            return pages * qemu_real_host_page_size();
         }
     }
 #endif
     return 0;
 }
+
+int qemu_msync(void *addr, size_t length, int fd)
+{
+    size_t align_mask = ~(qemu_real_host_page_size() - 1);
+
+    /**
+     * There are no strict reqs as per the length of mapping
+     * to be synced. Still the length needs to follow the address
+     * alignment changes. Additionally - round the size to the multiple
+     * of PAGE_SIZE
+     */
+    length += ((uintptr_t)addr & (qemu_real_host_page_size() - 1));
+    length = (length + ~align_mask) & align_mask;
+
+    addr = (void *)((uintptr_t)addr & align_mask);
+
+    return msync(addr, length, MS_SYNC);
+}
index 23a7c7320b1f9b2c94a5e804c09a777beaa70c93..55b0189dc30424d3223f11d0775f40b939420707 100644 (file)
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
- *
- * The implementation of g_poll (functions poll_rest, g_poll) at the end of
- * this file are based on code from GNOME glib-2 and use a different license,
- * see the license comment there.
  */
 
 #include "qemu/osdep.h"
 #include <windows.h>
-#include "qemu-common.h"
 #include "qapi/error.h"
-#include "sysemu/sysemu.h"
 #include "qemu/main-loop.h"
 #include "trace.h"
 #include "qemu/sockets.h"
 #include "qemu/cutils.h"
-
-/* this must come after including "trace.h" */
-#include <shlobj.h>
-
-void *qemu_oom_check(void *ptr)
-{
-    if (ptr == NULL) {
-        fprintf(stderr, "Failed to allocate memory: %lu\n", GetLastError());
-        abort();
-    }
-    return ptr;
-}
-
-void *qemu_try_memalign(size_t alignment, size_t size)
-{
-    void *ptr;
-
-    if (!size) {
-        abort();
-    }
-    ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
-    trace_qemu_memalign(alignment, size, ptr);
-    return ptr;
-}
-
-void *qemu_memalign(size_t alignment, size_t size)
-{
-    return qemu_oom_check(qemu_try_memalign(alignment, size));
-}
+#include "qemu/error-report.h"
+#include <malloc.h>
 
 static int get_allocation_granularity(void)
 {
@@ -77,10 +44,20 @@ static int get_allocation_granularity(void)
     return system_info.dwAllocationGranularity;
 }
 
-void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared)
+void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
+                          bool noreserve)
 {
     void *ptr;
 
+    if (noreserve) {
+        /*
+         * We need a MEM_COMMIT before accessing any memory in a MEM_RESERVE
+         * area; we cannot easily mimic POSIX MAP_NORESERVE semantics.
+         */
+        error_report("Skipping reservation of swap space is not supported.");
+        return NULL;
+    }
+
     ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
     trace_qemu_anon_ram_alloc(size, ptr);
 
@@ -90,14 +67,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared)
     return ptr;
 }
 
-void qemu_vfree(void *ptr)
-{
-    trace_qemu_vfree(ptr);
-    if (ptr) {
-        VirtualFree(ptr, 0, MEM_RELEASE);
-    }
-}
-
 void qemu_anon_ram_free(void *ptr, size_t size)
 {
     trace_qemu_anon_ram_free(ptr, size);
@@ -208,26 +177,25 @@ static int socket_error(void)
     }
 }
 
-void qemu_set_block(int fd)
+void qemu_socket_set_block(int fd)
 {
     unsigned long opt = 0;
-    WSAEventSelect(fd, NULL, 0);
+    qemu_socket_unselect(fd, NULL);
     ioctlsocket(fd, FIONBIO, &opt);
 }
 
-int qemu_try_set_nonblock(int fd)
+int qemu_socket_try_set_nonblock(int fd)
 {
     unsigned long opt = 1;
     if (ioctlsocket(fd, FIONBIO, &opt) != NO_ERROR) {
         return -socket_error();
     }
-    qemu_fd_register(fd);
     return 0;
 }
 
-void qemu_set_nonblock(int fd)
+void qemu_socket_set_nonblock(int fd)
 {
-    (void)qemu_try_set_nonblock(fd);
+    (void)qemu_socket_try_set_nonblock(fd);
 }
 
 int socket_set_fast_reuse(int fd)
@@ -254,46 +222,19 @@ void qemu_set_cloexec(int fd)
 {
 }
 
-/* Offset between 1/1/1601 and 1/1/1970 in 100 nanosec units */
-#define _W32_FT_OFFSET (116444736000000000ULL)
-
-int qemu_gettimeofday(qemu_timeval *tp)
-{
-  union {
-    unsigned long long ns100; /*time since 1 Jan 1601 in 100ns units */
-    FILETIME ft;
-  }  _now;
-
-  if(tp) {
-      GetSystemTimeAsFileTime (&_now.ft);
-      tp->tv_usec=(long)((_now.ns100 / 10ULL) % 1000000ULL );
-      tp->tv_sec= (long)((_now.ns100 - _W32_FT_OFFSET) / 10000000ULL);
-  }
-  /* Always return 0 as per Open Group Base Specifications Issue 6.
-     Do not set errno on error.  */
-  return 0;
-}
-
 int qemu_get_thread_id(void)
 {
     return GetCurrentThreadId();
 }
 
 char *
-qemu_get_local_state_pathname(const char *relative_pathname)
+qemu_get_local_state_dir(void)
 {
-    HRESULT result;
-    char base_path[MAX_PATH+1] = "";
+    const char * const *data_dirs = g_get_system_data_dirs();
 
-    result = SHGetFolderPath(NULL, CSIDL_COMMON_APPDATA, NULL,
-                             /* SHGFP_TYPE_CURRENT */ 0, base_path);
-    if (result != S_OK) {
-        /* misconfigured environment */
-        g_critical("CSIDL_COMMON_APPDATA unavailable: %ld", (long)result);
-        abort();
-    }
-    return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", base_path,
-                           relative_pathname);
+    g_assert(data_dirs && data_dirs[0]);
+
+    return g_strdup(data_dirs[0]);
 }
 
 void qemu_set_tty_echo(int fd, bool echo)
@@ -315,288 +256,182 @@ void qemu_set_tty_echo(int fd, bool echo)
     }
 }
 
-static const char *exec_dir;
-
-void qemu_init_exec_dir(const char *argv0)
+int getpagesize(void)
 {
+    SYSTEM_INFO system_info;
 
-    char *p;
-    char buf[MAX_PATH];
-    DWORD len;
+    GetSystemInfo(&system_info);
+    return system_info.dwPageSize;
+}
 
-    if (exec_dir) {
-        return;
+void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
+                       ThreadContext *tc, Error **errp)
+{
+    int i;
+    size_t pagesize = qemu_real_host_page_size();
+
+    sz = (sz + pagesize - 1) & -pagesize;
+    for (i = 0; i < sz / pagesize; i++) {
+        memset(area + pagesize * i, 0, 1);
     }
+}
 
-    len = GetModuleFileName(NULL, buf, sizeof(buf) - 1);
-    if (len == 0) {
-        return;
+char *qemu_get_pid_name(pid_t pid)
+{
+    /* XXX Implement me */
+    abort();
+}
+
+
+bool qemu_socket_select(int sockfd, WSAEVENT hEventObject,
+                        long lNetworkEvents, Error **errp)
+{
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (errp == NULL) {
+        errp = &error_warn;
     }
 
-    buf[len] = 0;
-    p = buf + len - 1;
-    while (p != buf && *p != '\\') {
-        p--;
+    if (s == INVALID_SOCKET) {
+        error_setg(errp, "invalid socket fd=%d", sockfd);
+        return false;
     }
-    *p = 0;
-    if (access(buf, R_OK) == 0) {
-        exec_dir = g_strdup(buf);
-    } else {
-        exec_dir = CONFIG_BINDIR;
+
+    if (WSAEventSelect(s, hEventObject, lNetworkEvents) != 0) {
+        error_setg_win32(errp, WSAGetLastError(), "failed to WSAEventSelect()");
+        return false;
     }
+
+    return true;
 }
 
-const char *qemu_get_exec_dir(void)
+bool qemu_socket_unselect(int sockfd, Error **errp)
 {
-    return exec_dir;
+    return qemu_socket_select(sockfd, NULL, 0, errp);
 }
 
-#if !GLIB_CHECK_VERSION(2, 50, 0)
-/*
- * The original implementation of g_poll from glib has a problem on Windows
- * when using timeouts < 10 ms.
- *
- * Whenever g_poll is called with timeout < 10 ms, it does a quick poll instead
- * of wait. This causes significant performance degradation of QEMU.
- *
- * The following code is a copy of the original code from glib/gpoll.c
- * (glib commit 20f4d1820b8d4d0fc4447188e33efffd6d4a88d8 from 2014-02-19).
- * Some debug code was removed and the code was reformatted.
- * All other code modifications are marked with 'QEMU'.
- */
-
-/*
- * gpoll.c: poll(2) abstraction
- * Copyright 1998 Owen Taylor
- * Copyright 2008 Red Hat, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-static int poll_rest(gboolean poll_msgs, HANDLE *handles, gint nhandles,
-                     GPollFD *fds, guint nfds, gint timeout)
+int qemu_socketpair(int domain, int type, int protocol, int sv[2])
 {
-    DWORD ready;
-    GPollFD *f;
-    int recursed_result;
+    struct sockaddr_un addr = {
+        0,
+    };
+    socklen_t socklen;
+    int listener = -1;
+    int client = -1;
+    int server = -1;
+    g_autofree char *path = NULL;
+    int tmpfd;
+    u_long arg;
+    int ret = -1;
 
-    if (poll_msgs) {
-        /* Wait for either messages or handles
-         * -> Use MsgWaitForMultipleObjectsEx
-         */
-        ready = MsgWaitForMultipleObjectsEx(nhandles, handles, timeout,
-                                            QS_ALLINPUT, MWMO_ALERTABLE);
+    g_return_val_if_fail(sv != NULL, -1);
 
-        if (ready == WAIT_FAILED) {
-            gchar *emsg = g_win32_error_message(GetLastError());
-            g_warning("MsgWaitForMultipleObjectsEx failed: %s", emsg);
-            g_free(emsg);
-        }
-    } else if (nhandles == 0) {
-        /* No handles to wait for, just the timeout */
-        if (timeout == INFINITE) {
-            ready = WAIT_FAILED;
-        } else {
-            SleepEx(timeout, TRUE);
-            ready = WAIT_TIMEOUT;
-        }
-    } else {
-        /* Wait for just handles
-         * -> Use WaitForMultipleObjectsEx
-         */
-        ready =
-            WaitForMultipleObjectsEx(nhandles, handles, FALSE, timeout, TRUE);
-        if (ready == WAIT_FAILED) {
-            gchar *emsg = g_win32_error_message(GetLastError());
-            g_warning("WaitForMultipleObjectsEx failed: %s", emsg);
-            g_free(emsg);
-        }
-    }
+    addr.sun_family = AF_UNIX;
+    socklen = sizeof(addr);
 
-    if (ready == WAIT_FAILED) {
-        return -1;
-    } else if (ready == WAIT_TIMEOUT || ready == WAIT_IO_COMPLETION) {
-        return 0;
-    } else if (poll_msgs && ready == WAIT_OBJECT_0 + nhandles) {
-        for (f = fds; f < &fds[nfds]; ++f) {
-            if (f->fd == G_WIN32_MSG_HANDLE && f->events & G_IO_IN) {
-                f->revents |= G_IO_IN;
-            }
-        }
-
-        /* If we have a timeout, or no handles to poll, be satisfied
-         * with just noticing we have messages waiting.
-         */
-        if (timeout != 0 || nhandles == 0) {
-            return 1;
-        }
+    tmpfd = g_file_open_tmp(NULL, &path, NULL);
+    if (tmpfd == -1 || !path) {
+        errno = EACCES;
+        goto out;
+    }
 
-        /* If no timeout and handles to poll, recurse to poll them,
-         * too.
-         */
-        recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0);
-        return (recursed_result == -1) ? -1 : 1 + recursed_result;
-    } else if (/* QEMU: removed the following unneeded statement which causes
-                * a compiler warning: ready >= WAIT_OBJECT_0 && */
-               ready < WAIT_OBJECT_0 + nhandles) {
-        for (f = fds; f < &fds[nfds]; ++f) {
-            if ((HANDLE) f->fd == handles[ready - WAIT_OBJECT_0]) {
-                f->revents = f->events;
-            }
-        }
+    close(tmpfd);
 
-        /* If no timeout and polling several handles, recurse to poll
-         * the rest of them.
-         */
-        if (timeout == 0 && nhandles > 1) {
-            /* Remove the handle that fired */
-            int i;
-            for (i = ready - WAIT_OBJECT_0 + 1; i < nhandles; i++) {
-                handles[i-1] = handles[i];
-            }
-            nhandles--;
-            recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0);
-            return (recursed_result == -1) ? -1 : 1 + recursed_result;
-        }
-        return 1;
+    if (strlen(path) >= sizeof(addr.sun_path)) {
+        errno = EINVAL;
+        goto out;
     }
 
-    return 0;
-}
+    strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
 
-gint g_poll(GPollFD *fds, guint nfds, gint timeout)
-{
-    HANDLE handles[MAXIMUM_WAIT_OBJECTS];
-    gboolean poll_msgs = FALSE;
-    GPollFD *f;
-    gint nhandles = 0;
-    int retval;
-
-    for (f = fds; f < &fds[nfds]; ++f) {
-        if (f->fd == G_WIN32_MSG_HANDLE && (f->events & G_IO_IN)) {
-            poll_msgs = TRUE;
-        } else if (f->fd > 0) {
-            /* Don't add the same handle several times into the array, as
-             * docs say that is not allowed, even if it actually does seem
-             * to work.
-             */
-            gint i;
-
-            for (i = 0; i < nhandles; i++) {
-                if (handles[i] == (HANDLE) f->fd) {
-                    break;
-                }
-            }
-
-            if (i == nhandles) {
-                if (nhandles == MAXIMUM_WAIT_OBJECTS) {
-                    g_warning("Too many handles to wait for!\n");
-                    break;
-                } else {
-                    handles[nhandles++] = (HANDLE) f->fd;
-                }
-            }
-        }
+    listener = socket(domain, type, protocol);
+    if (listener == -1) {
+        goto out;
     }
 
-    for (f = fds; f < &fds[nfds]; ++f) {
-        f->revents = 0;
+    if (DeleteFile(path) == 0 && GetLastError() != ERROR_FILE_NOT_FOUND) {
+        errno = EACCES;
+        goto out;
     }
+    g_clear_pointer(&path, g_free);
 
-    if (timeout == -1) {
-        timeout = INFINITE;
+    if (bind(listener, (struct sockaddr *)&addr, socklen) == -1) {
+        goto out;
     }
 
-    /* Polling for several things? */
-    if (nhandles > 1 || (nhandles > 0 && poll_msgs)) {
-        /* First check if one or several of them are immediately
-         * available
-         */
-        retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, 0);
-
-        /* If not, and we have a significant timeout, poll again with
-         * timeout then. Note that this will return indication for only
-         * one event, or only for messages. We ignore timeouts less than
-         * ten milliseconds as they are mostly pointless on Windows, the
-         * MsgWaitForMultipleObjectsEx() call will timeout right away
-         * anyway.
-         *
-         * Modification for QEMU: replaced timeout >= 10 by timeout > 0.
-         */
-        if (retval == 0 && (timeout == INFINITE || timeout > 0)) {
-            retval = poll_rest(poll_msgs, handles, nhandles,
-                               fds, nfds, timeout);
-        }
-    } else {
-        /* Just polling for one thing, so no need to check first if
-         * available immediately
-         */
-        retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, timeout);
+    if (listen(listener, 1) == -1) {
+        goto out;
     }
 
-    if (retval == -1) {
-        for (f = fds; f < &fds[nfds]; ++f) {
-            f->revents = 0;
-        }
+    client = socket(domain, type, protocol);
+    if (client == -1) {
+        goto out;
     }
 
-    return retval;
-}
-#endif
+    arg = 1;
+    if (ioctlsocket(client, FIONBIO, &arg) != NO_ERROR) {
+        goto out;
+    }
 
-int getpagesize(void)
-{
-    SYSTEM_INFO system_info;
+    if (connect(client, (struct sockaddr *)&addr, socklen) == -1 &&
+        WSAGetLastError() != WSAEWOULDBLOCK) {
+        goto out;
+    }
 
-    GetSystemInfo(&system_info);
-    return system_info.dwPageSize;
-}
+    server = accept(listener, NULL, NULL);
+    if (server == -1) {
+        goto out;
+    }
 
-void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
-                     Error **errp)
-{
-    int i;
-    size_t pagesize = qemu_real_host_page_size;
+    arg = 0;
+    if (ioctlsocket(client, FIONBIO, &arg) != NO_ERROR) {
+        goto out;
+    }
 
-    memory = (memory + pagesize - 1) & -pagesize;
-    for (i = 0; i < memory / pagesize; i++) {
-        memset(area + pagesize * i, 0, 1);
+    arg = 0;
+    if (ioctlsocket(client, SIO_AF_UNIX_GETPEERPID, &arg) != NO_ERROR) {
+        goto out;
     }
-}
 
-char *qemu_get_pid_name(pid_t pid)
-{
-    /* XXX Implement me */
-    abort();
-}
+    if (arg != GetCurrentProcessId()) {
+        errno = EPERM;
+        goto out;
+    }
 
+    sv[0] = server;
+    server = -1;
+    sv[1] = client;
+    client = -1;
+    ret = 0;
 
-pid_t qemu_fork(Error **errp)
-{
-    errno = ENOSYS;
-    error_setg_errno(errp, errno,
-                     "cannot fork child process");
-    return -1;
+out:
+    if (listener != -1) {
+        close(listener);
+    }
+    if (client != -1) {
+        close(client);
+    }
+    if (server != -1) {
+        close(server);
+    }
+    if (path) {
+        DeleteFile(path);
+    }
+    return ret;
 }
 
-
 #undef connect
 int qemu_connect_wrap(int sockfd, const struct sockaddr *addr,
                       socklen_t addrlen)
 {
     int ret;
-    ret = connect(sockfd, addr, addrlen);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = connect(s, addr, addrlen);
     if (ret < 0) {
         if (WSAGetLastError() == WSAEWOULDBLOCK) {
             errno = EINPROGRESS;
@@ -612,7 +447,13 @@ int qemu_connect_wrap(int sockfd, const struct sockaddr *addr,
 int qemu_listen_wrap(int sockfd, int backlog)
 {
     int ret;
-    ret = listen(sockfd, backlog);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = listen(s, backlog);
     if (ret < 0) {
         errno = socket_error();
     }
@@ -625,36 +466,141 @@ int qemu_bind_wrap(int sockfd, const struct sockaddr *addr,
                    socklen_t addrlen)
 {
     int ret;
-    ret = bind(sockfd, addr, addrlen);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = bind(s, addr, addrlen);
     if (ret < 0) {
         errno = socket_error();
     }
     return ret;
 }
 
+QEMU_USED EXCEPTION_DISPOSITION
+win32_close_exception_handler(struct _EXCEPTION_RECORD *exception_record,
+                              void *registration, struct _CONTEXT *context,
+                              void *dispatcher)
+{
+    return EXCEPTION_EXECUTE_HANDLER;
+}
 
-#undef socket
-int qemu_socket_wrap(int domain, int type, int protocol)
+#undef close
+int qemu_close_socket_osfhandle(int fd)
 {
-    int ret;
-    ret = socket(domain, type, protocol);
+    SOCKET s = _get_osfhandle(fd);
+    DWORD flags = 0;
+
+    /*
+     * If we were to just call _close on the descriptor, it would close the
+     * HANDLE, but it wouldn't free any of the resources associated to the
+     * SOCKET, and we can't call _close after calling closesocket, because
+     * closesocket has already closed the HANDLE, and _close would attempt to
+     * close the HANDLE again, resulting in a double free. We can however
+     * protect the HANDLE from actually being closed long enough to close the
+     * file descriptor, then close the socket itself.
+     */
+    if (!GetHandleInformation((HANDLE)s, &flags)) {
+        errno = EACCES;
+        return -1;
+    }
+
+    if (!SetHandleInformation((HANDLE)s, HANDLE_FLAG_PROTECT_FROM_CLOSE, HANDLE_FLAG_PROTECT_FROM_CLOSE)) {
+        errno = EACCES;
+        return -1;
+    }
+
+    __try1(win32_close_exception_handler) {
+        /*
+         * close() returns EBADF since we PROTECT_FROM_CLOSE the underlying
+         * handle, but the FD is actually freed
+         */
+        if (close(fd) < 0 && errno != EBADF) {
+            return -1;
+        }
+    }
+    __except1 {
+    }
+
+    if (!SetHandleInformation((HANDLE)s, flags, flags)) {
+        errno = EACCES;
+        return -1;
+    }
+
+    return 0;
+}
+
+int qemu_close_wrap(int fd)
+{
+    SOCKET s = INVALID_SOCKET;
+    int ret = -1;
+
+    if (!fd_is_socket(fd)) {
+        return close(fd);
+    }
+
+    s = _get_osfhandle(fd);
+    qemu_close_socket_osfhandle(fd);
+
+    ret = closesocket(s);
     if (ret < 0) {
         errno = socket_error();
     }
+
     return ret;
 }
 
 
+#undef socket
+int qemu_socket_wrap(int domain, int type, int protocol)
+{
+    SOCKET s;
+    int fd;
+
+    s = socket(domain, type, protocol);
+    if (s == -1) {
+        errno = socket_error();
+        return -1;
+    }
+
+    fd = _open_osfhandle(s, _O_BINARY);
+    if (fd < 0) {
+        closesocket(s);
+        /* _open_osfhandle may not set errno, and closesocket() may override it */
+        errno = ENOMEM;
+    }
+
+    return fd;
+}
+
+
 #undef accept
 int qemu_accept_wrap(int sockfd, struct sockaddr *addr,
                      socklen_t *addrlen)
 {
-    int ret;
-    ret = accept(sockfd, addr, addrlen);
-    if (ret < 0) {
+    int fd;
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    s = accept(s, addr, addrlen);
+    if (s == -1) {
         errno = socket_error();
+        return -1;
     }
-    return ret;
+
+    fd = _open_osfhandle(s, _O_BINARY);
+    if (fd < 0) {
+        closesocket(s);
+        /* _open_osfhandle may not set errno, and closesocket() may override it */
+        errno = ENOMEM;
+    }
+
+    return fd;
 }
 
 
@@ -662,7 +608,13 @@ int qemu_accept_wrap(int sockfd, struct sockaddr *addr,
 int qemu_shutdown_wrap(int sockfd, int how)
 {
     int ret;
-    ret = shutdown(sockfd, how);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = shutdown(s, how);
     if (ret < 0) {
         errno = socket_error();
     }
@@ -674,19 +626,13 @@ int qemu_shutdown_wrap(int sockfd, int how)
 int qemu_ioctlsocket_wrap(int fd, int req, void *val)
 {
     int ret;
-    ret = ioctlsocket(fd, req, val);
-    if (ret < 0) {
-        errno = socket_error();
-    }
-    return ret;
-}
+    SOCKET s = _get_osfhandle(fd);
 
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
 
-#undef closesocket
-int qemu_closesocket_wrap(int fd)
-{
-    int ret;
-    ret = closesocket(fd);
+    ret = ioctlsocket(s, req, val);
     if (ret < 0) {
         errno = socket_error();
     }
@@ -699,7 +645,13 @@ int qemu_getsockopt_wrap(int sockfd, int level, int optname,
                          void *optval, socklen_t *optlen)
 {
     int ret;
-    ret = getsockopt(sockfd, level, optname, optval, optlen);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = getsockopt(s, level, optname, optval, optlen);
     if (ret < 0) {
         errno = socket_error();
     }
@@ -712,7 +664,13 @@ int qemu_setsockopt_wrap(int sockfd, int level, int optname,
                          const void *optval, socklen_t optlen)
 {
     int ret;
-    ret = setsockopt(sockfd, level, optname, optval, optlen);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = setsockopt(s, level, optname, optval, optlen);
     if (ret < 0) {
         errno = socket_error();
     }
@@ -725,7 +683,13 @@ int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr,
                           socklen_t *addrlen)
 {
     int ret;
-    ret = getpeername(sockfd, addr, addrlen);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = getpeername(s, addr, addrlen);
     if (ret < 0) {
         errno = socket_error();
     }
@@ -738,7 +702,13 @@ int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr,
                           socklen_t *addrlen)
 {
     int ret;
-    ret = getsockname(sockfd, addr, addrlen);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = getsockname(s, addr, addrlen);
     if (ret < 0) {
         errno = socket_error();
     }
@@ -750,7 +720,13 @@ int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr,
 ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags)
 {
     int ret;
-    ret = send(sockfd, buf, len, flags);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = send(s, buf, len, flags);
     if (ret < 0) {
         errno = socket_error();
     }
@@ -763,7 +739,13 @@ ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags,
                          const struct sockaddr *addr, socklen_t addrlen)
 {
     int ret;
-    ret = sendto(sockfd, buf, len, flags, addr, addrlen);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = sendto(s, buf, len, flags, addr, addrlen);
     if (ret < 0) {
         errno = socket_error();
     }
@@ -775,7 +757,13 @@ ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags,
 ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags)
 {
     int ret;
-    ret = recv(sockfd, buf, len, flags);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = recv(s, buf, len, flags);
     if (ret < 0) {
         errno = socket_error();
     }
@@ -788,7 +776,13 @@ ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags,
                            struct sockaddr *addr, socklen_t *addrlen)
 {
     int ret;
-    ret = recvfrom(sockfd, buf, len, flags, addr, addrlen);
+    SOCKET s = _get_osfhandle(sockfd);
+
+    if (s == INVALID_SOCKET) {
+        return -1;
+    }
+
+    ret = recvfrom(s, buf, len, flags, addr, addrlen);
     if (ret < 0) {
         errno = socket_error();
     }
@@ -822,26 +816,56 @@ bool qemu_write_pidfile(const char *filename, Error **errp)
     return true;
 }
 
-char *qemu_get_host_name(Error **errp)
+size_t qemu_get_host_physmem(void)
 {
-    wchar_t tmp[MAX_COMPUTERNAME_LENGTH + 1];
-    DWORD size = G_N_ELEMENTS(tmp);
+    MEMORYSTATUSEX statex;
+    statex.dwLength = sizeof(statex);
 
-    if (GetComputerNameW(tmp, &size) == 0) {
-        error_setg_win32(errp, GetLastError(), "failed close handle");
+    if (GlobalMemoryStatusEx(&statex)) {
+        return statex.ullTotalPhys;
+    }
+    return 0;
+}
+
+int qemu_msync(void *addr, size_t length, int fd)
+{
+    /**
+     * Perform the sync based on the file descriptor
+     * The sync range will most probably be wider than the one
+     * requested - but it will still get the job done
+     */
+    return qemu_fdatasync(fd);
+}
+
+void *qemu_win32_map_alloc(size_t size, HANDLE *h, Error **errp)
+{
+    void *bits;
+
+    trace_win32_map_alloc(size);
+
+    *h = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0,
+                          size, NULL);
+    if (*h == NULL) {
+        error_setg_win32(errp, GetLastError(), "Failed to CreateFileMapping");
+        return NULL;
+    }
+
+    bits = MapViewOfFile(*h, FILE_MAP_ALL_ACCESS, 0, 0, size);
+    if (bits == NULL) {
+        error_setg_win32(errp, GetLastError(), "Failed to MapViewOfFile");
+        CloseHandle(*h);
         return NULL;
     }
 
-    return g_utf16_to_utf8(tmp, size, NULL, NULL, NULL);
+    return bits;
 }
 
-size_t qemu_get_host_physmem(void)
+void qemu_win32_map_free(void *ptr, HANDLE h, Error **errp)
 {
-    MEMORYSTATUSEX statex;
-    statex.dwLength = sizeof(statex);
+    trace_win32_map_free(ptr, h);
 
-    if (GlobalMemoryStatusEx(&statex)) {
-        return statex.ullTotalPhys;
+    if (UnmapViewOfFile(ptr) == 0) {
+        error_setg_win32(errp, GetLastError(), "Failed to UnmapViewOfFile");
     }
-    return 0;
+    CloseHandle(h);
 }
diff --git a/util/pagesize.c b/util/pagesize.c
deleted file mode 100644 (file)
index 998632c..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * pagesize.c - query the host about its page size
- *
- * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
- * License: GNU GPL, version 2 or later.
- *   See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-
-uintptr_t qemu_real_host_page_size;
-intptr_t qemu_real_host_page_mask;
-
-static void __attribute__((constructor)) init_real_host_page_size(void)
-{
-    qemu_real_host_page_size = getpagesize();
-    qemu_real_host_page_mask = -(intptr_t)qemu_real_host_page_size;
-}
index 5f75e24c297c723733f7f187b6e96006565d089f..ef3566b03afa021a809ce16592b7c4c16801eb9f 100644 (file)
@@ -210,7 +210,7 @@ void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
 
         /*
          * To avoid double-counting we capture [left, right) ranges, except for
-         * the righmost bin, which captures a [left, right] range.
+         * the rightmost bin, which captures a [left, right] range.
          */
         while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
             struct qdist_entry *o = &from->entries[j];
index 1c83cd9d298ee83b18d53b4c75a65a3a15d97871..a66cc07e75be98195f9fabd9670c863bfb1f5259 100644 (file)
 #include "qemu/co-shared-resource.h"
 
 struct SharedResource {
-    uint64_t total;
-    uint64_t available;
+    uint64_t total; /* Set in shres_create() and not changed anymore */
 
+    /* State fields protected by lock */
+    uint64_t available;
     CoQueue queue;
+
+    QemuMutex lock;
 };
 
 SharedResource *shres_create(uint64_t total)
@@ -40,6 +43,7 @@ SharedResource *shres_create(uint64_t total)
 
     s->total = s->available = total;
     qemu_co_queue_init(&s->queue);
+    qemu_mutex_init(&s->lock);
 
     return s;
 }
@@ -47,10 +51,12 @@ SharedResource *shres_create(uint64_t total)
 void shres_destroy(SharedResource *s)
 {
     assert(s->available == s->total);
+    qemu_mutex_destroy(&s->lock);
     g_free(s);
 }
 
-bool co_try_get_from_shres(SharedResource *s, uint64_t n)
+/* Called with lock held. */
+static bool co_try_get_from_shres_locked(SharedResource *s, uint64_t n)
 {
     if (s->available >= n) {
         s->available -= n;
@@ -60,16 +66,24 @@ bool co_try_get_from_shres(SharedResource *s, uint64_t n)
     return false;
 }
 
+bool co_try_get_from_shres(SharedResource *s, uint64_t n)
+{
+    QEMU_LOCK_GUARD(&s->lock);
+    return co_try_get_from_shres_locked(s, n);
+}
+
 void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n)
 {
     assert(n <= s->total);
-    while (!co_try_get_from_shres(s, n)) {
-        qemu_co_queue_wait(&s->queue, NULL);
+    QEMU_LOCK_GUARD(&s->lock);
+    while (!co_try_get_from_shres_locked(s, n)) {
+        qemu_co_queue_wait(&s->queue, &s->lock);
     }
 }
 
 void coroutine_fn co_put_to_shres(SharedResource *s, uint64_t n)
 {
+    QEMU_LOCK_GUARD(&s->lock);
     assert(s->total - s->available >= n);
     s->available += n;
     qemu_co_queue_restart_all(&s->queue);
diff --git a/util/qemu-co-timeout.c b/util/qemu-co-timeout.c
new file mode 100644 (file)
index 0000000..00cd335
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Helper functionality for distributing a fixed total amount of
+ * an abstract resource among multiple coroutines.
+ *
+ * Copyright (c) 2022 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/coroutine.h"
+#include "block/aio.h"
+
+typedef struct QemuCoTimeoutState {
+    CoroutineEntry *entry;
+    void *opaque;
+    QemuCoSleep sleep_state;
+    bool marker;
+    CleanupFunc *clean;
+} QemuCoTimeoutState;
+
+static void coroutine_fn qemu_co_timeout_entry(void *opaque)
+{
+    QemuCoTimeoutState *s = opaque;
+
+    s->entry(s->opaque);
+
+    if (s->marker) {
+        assert(!s->sleep_state.to_wake);
+        /* .marker set by qemu_co_timeout, it have been failed */
+        if (s->clean) {
+            s->clean(s->opaque);
+        }
+        g_free(s);
+    } else {
+        s->marker = true;
+        qemu_co_sleep_wake(&s->sleep_state);
+    }
+}
+
+int coroutine_fn qemu_co_timeout(CoroutineEntry *entry, void *opaque,
+                                 uint64_t timeout_ns, CleanupFunc clean)
+{
+    QemuCoTimeoutState *s;
+    Coroutine *co;
+
+    if (timeout_ns == 0) {
+        entry(opaque);
+        return 0;
+    }
+
+    s = g_new(QemuCoTimeoutState, 1);
+    *s = (QemuCoTimeoutState) {
+        .entry = entry,
+        .opaque = opaque,
+        .clean = clean
+    };
+
+    co = qemu_coroutine_create(qemu_co_timeout_entry, s);
+
+    aio_co_enter(qemu_get_current_aio_context(), co);
+    qemu_co_sleep_ns_wakeable(&s->sleep_state, QEMU_CLOCK_REALTIME, timeout_ns);
+
+    if (s->marker) {
+        /* .marker set by qemu_co_timeout_entry, success */
+        g_free(s);
+        return 0;
+    }
+
+    /* Don't free s, as we can't cancel qemu_co_timeout_entry execution */
+    s->marker = true;
+    return -ETIMEDOUT;
+}
index 660f47b0050f9bf54e2ee69f1f0d49315c650cb0..42076efe1ef2bd512c33c67d1edeafa43ac027bf 100644 (file)
@@ -7,6 +7,7 @@
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "qemu/config-file.h"
+#include "hw/boards.h"
 
 static QemuOptsList *vm_config_groups[48];
 static QemuOptsList *drive_config_groups[5];
@@ -16,6 +17,7 @@ static QemuOptsList *find_list(QemuOptsList **lists, const char *group,
 {
     int i;
 
+    qemu_load_module_for_opts(group);
     for (i = 0; lists[i] != NULL; i++) {
         if (strcmp(lists[i]->name, group) == 0)
             break;
@@ -55,7 +57,7 @@ QemuOpts *qemu_find_opts_singleton(const char *group)
 
 static CommandLineParameterInfoList *query_option_descs(const QemuOptDesc *desc)
 {
-    CommandLineParameterInfoList *param_list = NULL, *entry;
+    CommandLineParameterInfoList *param_list = NULL;
     CommandLineParameterInfo *info;
     int i;
 
@@ -78,19 +80,10 @@ static CommandLineParameterInfoList *query_option_descs(const QemuOptDesc *desc)
             break;
         }
 
-        if (desc[i].help) {
-            info->has_help = true;
-            info->help = g_strdup(desc[i].help);
-        }
-        if (desc[i].def_value_str) {
-            info->has_q_default = true;
-            info->q_default = g_strdup(desc[i].def_value_str);
-        }
+        info->help = g_strdup(desc[i].help);
+        info->q_default = g_strdup(desc[i].def_value_str);
 
-        entry = g_malloc0(sizeof(*entry));
-        entry->value = info;
-        entry->next = param_list;
-        param_list = entry;
+        QAPI_LIST_PREPEND(param_list, info);
     }
 
     return param_list;
@@ -150,125 +143,109 @@ static CommandLineParameterInfoList *get_drive_infolist(void)
     return head;
 }
 
-/* restore machine options that are now machine's properties */
-static QemuOptsList machine_opts = {
-    .merge_lists = true,
-    .head = QTAILQ_HEAD_INITIALIZER(machine_opts.head),
-    .desc = {
-        {
-            .name = "type",
-            .type = QEMU_OPT_STRING,
-            .help = "emulated machine"
-        },{
-            .name = "accel",
-            .type = QEMU_OPT_STRING,
-            .help = "accelerator list",
-        },{
-            .name = "kernel_irqchip",
-            .type = QEMU_OPT_BOOL,
-            .help = "use KVM in-kernel irqchip",
-        },{
-            .name = "kvm_shadow_mem",
-            .type = QEMU_OPT_SIZE,
-            .help = "KVM shadow MMU size",
-        },{
-            .name = "kernel",
-            .type = QEMU_OPT_STRING,
-            .help = "Linux kernel image file",
-        },{
-            .name = "initrd",
-            .type = QEMU_OPT_STRING,
-            .help = "Linux initial ramdisk file",
-        },{
-            .name = "append",
-            .type = QEMU_OPT_STRING,
-            .help = "Linux kernel command line",
-        },{
-            .name = "dtb",
-            .type = QEMU_OPT_STRING,
-            .help = "Linux kernel device tree file",
-        },{
-            .name = "dumpdtb",
-            .type = QEMU_OPT_STRING,
-            .help = "Dump current dtb to a file and quit",
-        },{
-            .name = "phandle_start",
-            .type = QEMU_OPT_NUMBER,
-            .help = "The first phandle ID we may generate dynamically",
-        },{
-            .name = "dt_compatible",
-            .type = QEMU_OPT_STRING,
-            .help = "Overrides the \"compatible\" property of the dt root node",
-        },{
-            .name = "dump-guest-core",
-            .type = QEMU_OPT_BOOL,
-            .help = "Include guest memory in  a core dump",
-        },{
-            .name = "mem-merge",
-            .type = QEMU_OPT_BOOL,
-            .help = "enable/disable memory merge support",
-        },{
-            .name = "usb",
-            .type = QEMU_OPT_BOOL,
-            .help = "Set on/off to enable/disable usb",
-        },{
-            .name = "firmware",
-            .type = QEMU_OPT_STRING,
-            .help = "firmware image",
-        },{
-            .name = "iommu",
-            .type = QEMU_OPT_BOOL,
-            .help = "Set on/off to enable/disable Intel IOMMU (VT-d)",
-        },{
-            .name = "suppress-vmdesc",
-            .type = QEMU_OPT_BOOL,
-            .help = "Set on to disable self-describing migration",
-        },{
-            .name = "aes-key-wrap",
-            .type = QEMU_OPT_BOOL,
-            .help = "enable/disable AES key wrapping using the CPACF wrapping key",
-        },{
-            .name = "dea-key-wrap",
-            .type = QEMU_OPT_BOOL,
-            .help = "enable/disable DEA key wrapping using the CPACF wrapping key",
-        },{
-            .name = "loadparm",
-            .type = QEMU_OPT_STRING,
-            .help = "Up to 8 chars in set of [A-Za-z0-9. ](lower case chars"
-                    " converted to upper case) to pass to machine"
-                    " loader, boot manager, and guest kernel",
-        },
-        { /* End of list */ }
+static CommandLineParameterInfo *objprop_to_cmdline_prop(ObjectProperty *prop)
+{
+    CommandLineParameterInfo *info;
+
+    info = g_malloc0(sizeof(*info));
+    info->name = g_strdup(prop->name);
+
+    if (g_str_equal(prop->type, "bool") || g_str_equal(prop->type, "OnOffAuto")) {
+        info->type = COMMAND_LINE_PARAMETER_TYPE_BOOLEAN;
+    } else if (g_str_equal(prop->type, "int")) {
+        info->type = COMMAND_LINE_PARAMETER_TYPE_NUMBER;
+    } else if (g_str_equal(prop->type, "size")) {
+        info->type = COMMAND_LINE_PARAMETER_TYPE_SIZE;
+    } else {
+        info->type = COMMAND_LINE_PARAMETER_TYPE_STRING;
+    }
+
+    if (prop->description) {
+        info->help = g_strdup(prop->description);
+    }
+
+    return info;
+}
+
+static CommandLineParameterInfoList *query_all_machine_properties(void)
+{
+    CommandLineParameterInfoList *params = NULL, *clpiter;
+    CommandLineParameterInfo *info;
+    GSList *machines, *curr_mach;
+    ObjectPropertyIterator op_iter;
+    ObjectProperty *prop;
+    bool is_new;
+
+    machines = object_class_get_list(TYPE_MACHINE, false);
+    assert(machines);
+
+    /* Loop over all machine classes */
+    for (curr_mach = machines; curr_mach; curr_mach = curr_mach->next) {
+        object_class_property_iter_init(&op_iter, curr_mach->data);
+        /* ... and over the properties of each machine: */
+        while ((prop = object_property_iter_next(&op_iter))) {
+            if (!prop->set) {
+                continue;
+            }
+            /*
+             * Check whether the property has already been put into the list
+             * (via another machine class)
+             */
+            is_new = true;
+            for (clpiter = params; clpiter != NULL; clpiter = clpiter->next) {
+                if (g_str_equal(clpiter->value->name, prop->name)) {
+                    is_new = false;
+                    break;
+                }
+            }
+            /* If it hasn't been added before, add it now to the list */
+            if (is_new) {
+                info = objprop_to_cmdline_prop(prop);
+                QAPI_LIST_PREPEND(params, info);
+            }
+        }
     }
-};
 
-CommandLineOptionInfoList *qmp_query_command_line_options(bool has_option,
-                                                          const char *option,
+    g_slist_free(machines);
+
+    /* Add entry for the "type" parameter */
+    info = g_malloc0(sizeof(*info));
+    info->name = g_strdup("type");
+    info->type = COMMAND_LINE_PARAMETER_TYPE_STRING;
+    info->help = g_strdup("machine type");
+    QAPI_LIST_PREPEND(params, info);
+
+    return params;
+}
+
+CommandLineOptionInfoList *qmp_query_command_line_options(const char *option,
                                                           Error **errp)
 {
-    CommandLineOptionInfoList *conf_list = NULL, *entry;
+    CommandLineOptionInfoList *conf_list = NULL;
     CommandLineOptionInfo *info;
     int i;
 
     for (i = 0; vm_config_groups[i] != NULL; i++) {
-        if (!has_option || !strcmp(option, vm_config_groups[i]->name)) {
+        if (!option || !strcmp(option, vm_config_groups[i]->name)) {
             info = g_malloc0(sizeof(*info));
             info->option = g_strdup(vm_config_groups[i]->name);
             if (!strcmp("drive", vm_config_groups[i]->name)) {
                 info->parameters = get_drive_infolist();
-            } else if (!strcmp("machine", vm_config_groups[i]->name)) {
-                info->parameters = query_option_descs(machine_opts.desc);
             } else {
                 info->parameters =
                     query_option_descs(vm_config_groups[i]->desc);
             }
-            entry = g_malloc0(sizeof(*entry));
-            entry->value = info;
-            entry->next = conf_list;
-            conf_list = entry;
+            QAPI_LIST_PREPEND(conf_list, info);
         }
     }
 
+    if (!option || !strcmp(option, "machine")) {
+        info = g_malloc0(sizeof(*info));
+        info->option = g_strdup("machine");
+        info->parameters = query_all_machine_properties();
+        QAPI_LIST_PREPEND(conf_list, info);
+    }
+
     if (conf_list == NULL) {
         error_setg(errp, "invalid option name: %s", option);
     }
@@ -313,95 +290,20 @@ void qemu_add_opts(QemuOptsList *list)
     abort();
 }
 
-int qemu_set_option(const char *str)
-{
-    Error *local_err = NULL;
-    char group[64], id[64], arg[64];
-    QemuOptsList *list;
-    QemuOpts *opts;
-    int rc, offset;
-
-    rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset);
-    if (rc < 3 || str[offset] != '=') {
-        error_report("can't parse: \"%s\"", str);
-        return -1;
-    }
-
-    list = qemu_find_opts(group);
-    if (list == NULL) {
-        return -1;
-    }
-
-    opts = qemu_opts_find(list, id);
-    if (!opts) {
-        error_report("there is no %s \"%s\" defined",
-                     list->name, id);
-        return -1;
-    }
-
-    if (!qemu_opt_set(opts, arg, str + offset + 1, &local_err)) {
-        error_report_err(local_err);
-        return -1;
-    }
-    return 0;
-}
-
-struct ConfigWriteData {
-    QemuOptsList *list;
-    FILE *fp;
-};
-
-static int config_write_opt(void *opaque, const char *name, const char *value,
-                            Error **errp)
-{
-    struct ConfigWriteData *data = opaque;
-
-    fprintf(data->fp, "  %s = \"%s\"\n", name, value);
-    return 0;
-}
-
-static int config_write_opts(void *opaque, QemuOpts *opts, Error **errp)
-{
-    struct ConfigWriteData *data = opaque;
-    const char *id = qemu_opts_id(opts);
-
-    if (id) {
-        fprintf(data->fp, "[%s \"%s\"]\n", data->list->name, id);
-    } else {
-        fprintf(data->fp, "[%s]\n", data->list->name);
-    }
-    qemu_opt_foreach(opts, config_write_opt, data, NULL);
-    fprintf(data->fp, "\n");
-    return 0;
-}
-
-void qemu_config_write(FILE *fp)
-{
-    struct ConfigWriteData data = { .fp = fp };
-    QemuOptsList **lists = vm_config_groups;
-    int i;
-
-    fprintf(fp, "# qemu config file\n\n");
-    for (i = 0; lists[i] != NULL; i++) {
-        data.list = lists[i];
-        qemu_opts_foreach(data.list, config_write_opts, &data, NULL);
-    }
-}
-
 /* Returns number of config groups on success, -errno on error */
-int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname)
+static int qemu_config_foreach(FILE *fp, QEMUConfigCB *cb, void *opaque,
+                               const char *fname, Error **errp)
 {
-    char line[1024], group[64], id[64], arg[64], value[1024];
+    ERRP_GUARD();
+    char line[1024], prev_group[64], group[64], arg[64], value[1024];
     Location loc;
-    QemuOptsList *list = NULL;
-    Error *local_err = NULL;
-    QemuOpts *opts = NULL;
+    QDict *qdict = NULL;
     int res = -EINVAL, lno = 0;
     int count = 0;
 
     loc_push_none(&loc);
     while (fgets(line, sizeof(line), fp) != NULL) {
-        loc_set_file(fname, ++lno);
+        ++lno;
         if (line[0] == '\n') {
             /* skip empty lines */
             continue;
@@ -410,75 +312,98 @@ int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname)
             /* comment */
             continue;
         }
-        if (sscanf(line, "[%63s \"%63[^\"]\"]", group, id) == 2) {
-            /* group with id */
-            list = find_list(lists, group, &local_err);
-            if (local_err) {
-                error_report_err(local_err);
-                goto out;
+        if (line[0] == '[') {
+            QDict *prev = qdict;
+            if (sscanf(line, "[%63s \"%63[^\"]\"]", group, value) == 2) {
+                qdict = qdict_new();
+                qdict_put_str(qdict, "id", value);
+                count++;
+            } else if (sscanf(line, "[%63[^]]]", group) == 1) {
+                qdict = qdict_new();
+                count++;
             }
-            opts = qemu_opts_create(list, id, 1, NULL);
-            count++;
-            continue;
-        }
-        if (sscanf(line, "[%63[^]]]", group) == 1) {
-            /* group without id */
-            list = find_list(lists, group, &local_err);
-            if (local_err) {
-                error_report_err(local_err);
-                goto out;
+            if (qdict != prev) {
+                if (prev) {
+                    cb(prev_group, prev, opaque, errp);
+                    qobject_unref(prev);
+                    if (*errp) {
+                        goto out;
+                    }
+                }
+                strcpy(prev_group, group);
+                continue;
             }
-            opts = qemu_opts_create(list, NULL, 0, &error_abort);
-            count++;
-            continue;
         }
+        loc_set_file(fname, lno);
         value[0] = '\0';
         if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2 ||
             sscanf(line, " %63s = \"\"", arg) == 1) {
             /* arg = value */
-            if (opts == NULL) {
-                error_report("no group defined");
-                goto out;
-            }
-            if (!qemu_opt_set(opts, arg, value, &local_err)) {
-                error_report_err(local_err);
+            if (qdict == NULL) {
+                error_setg(errp, "no group defined");
                 goto out;
             }
+            qdict_put_str(qdict, arg, value);
             continue;
         }
-        error_report("parse error");
+        error_setg(errp, "parse error");
         goto out;
     }
     if (ferror(fp)) {
-        error_report("error reading file");
-        goto out;
+        loc_pop(&loc);
+        error_setg_errno(errp, errno, "Cannot read config file");
+        goto out_no_loc;
     }
     res = count;
+    if (qdict) {
+        cb(group, qdict, opaque, errp);
+    }
 out:
     loc_pop(&loc);
+out_no_loc:
+    qobject_unref(qdict);
     return res;
 }
 
-int qemu_read_config_file(const char *filename)
+void qemu_config_do_parse(const char *group, QDict *qdict, void *opaque, Error **errp)
+{
+    QemuOptsList **lists = opaque;
+    QemuOptsList *list;
+
+    list = find_list(lists, group, errp);
+    if (!list) {
+        return;
+    }
+
+    qemu_opts_from_qdict(list, qdict, errp);
+}
+
+int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname, Error **errp)
+{
+    return qemu_config_foreach(fp, qemu_config_do_parse, lists, fname, errp);
+}
+
+int qemu_read_config_file(const char *filename, QEMUConfigCB *cb, Error **errp)
 {
     FILE *f = fopen(filename, "r");
     int ret;
 
     if (f == NULL) {
+        error_setg_file_open(errp, errno, filename);
         return -errno;
     }
 
-    ret = qemu_config_parse(f, vm_config_groups, filename);
+    ret = qemu_config_foreach(f, cb, vm_config_groups, filename, errp);
     fclose(f);
     return ret;
 }
 
-static void config_parse_qdict_section(QDict *options, QemuOptsList *opts,
+static bool config_parse_qdict_section(QDict *options, QemuOptsList *opts,
                                        Error **errp)
 {
     QemuOpts *subopts;
-    QDict *subqdict;
-    QList *list = NULL;
+    g_autoptr(QDict) subqdict = NULL;
+    g_autoptr(QList) list = NULL;
     size_t orig_size, enum_size;
     char *prefix;
 
@@ -487,23 +412,23 @@ static void config_parse_qdict_section(QDict *options, QemuOptsList *opts,
     g_free(prefix);
     orig_size = qdict_size(subqdict);
     if (!orig_size) {
-        goto out;
+        return true;
     }
 
     subopts = qemu_opts_create(opts, NULL, 0, errp);
     if (!subopts) {
-        goto out;
+        return false;
     }
 
     if (!qemu_opts_absorb_qdict(subopts, subqdict, errp)) {
-        goto out;
+        return false;
     }
 
     enum_size = qdict_size(subqdict);
     if (enum_size < orig_size && enum_size) {
         error_setg(errp, "Unknown option '%s' for [%s]",
                    qdict_first(subqdict)->key, opts->name);
-        goto out;
+        return false;
     }
 
     if (enum_size) {
@@ -518,7 +443,7 @@ static void config_parse_qdict_section(QDict *options, QemuOptsList *opts,
         if (qdict_size(subqdict)) {
             error_setg(errp, "Unused option '%s' for [%s]",
                        qdict_first(subqdict)->key, opts->name);
-            goto out;
+            return false;
         }
 
         QLIST_FOREACH_ENTRY(list, list_entry) {
@@ -528,46 +453,43 @@ static void config_parse_qdict_section(QDict *options, QemuOptsList *opts,
             if (!section) {
                 error_setg(errp, "[%s] section (index %u) does not consist of "
                            "keys", opts->name, i);
-                goto out;
+                return false;
             }
 
             opt_name = g_strdup_printf("%s.%u", opts->name, i++);
             subopts = qemu_opts_create(opts, opt_name, 1, errp);
             g_free(opt_name);
             if (!subopts) {
-                goto out;
+                return false;
             }
 
             if (!qemu_opts_absorb_qdict(subopts, section, errp)) {
                 qemu_opts_del(subopts);
-                goto out;
+                return false;
             }
 
             if (qdict_size(section)) {
                 error_setg(errp, "[%s] section doesn't support the option '%s'",
                            opts->name, qdict_first(section)->key);
                 qemu_opts_del(subopts);
-                goto out;
+                return false;
             }
         }
     }
 
-out:
-    qobject_unref(subqdict);
-    qobject_unref(list);
+    return true;
 }
 
-void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists,
+bool qemu_config_parse_qdict(QDict *options, QemuOptsList **lists,
                              Error **errp)
 {
     int i;
-    Error *local_err = NULL;
 
     for (i = 0; lists[i]; i++) {
-        config_parse_qdict_section(options, lists[i], &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
+        if (!config_parse_qdict_section(options, lists[i], errp)) {
+            return false;
         }
     }
+
+    return true;
 }
index 5b80bb416f548ad033df847f443a169ecb724286..364f4d5abf57bd694d81fcac78cc0456124b65a1 100644 (file)
@@ -23,7 +23,6 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "qemu/sockets.h"
 #include "qemu/coroutine.h"
 #include "qemu/iov.h"
@@ -75,7 +74,7 @@ typedef struct {
 static void fd_coroutine_enter(void *opaque)
 {
     FDYieldUntilData *data = opaque;
-    aio_set_fd_handler(data->ctx, data->fd, false, NULL, NULL, NULL, NULL);
+    aio_set_fd_handler(data->ctx, data->fd, NULL, NULL, NULL, NULL, NULL);
     qemu_coroutine_enter(data->co);
 }
 
@@ -87,7 +86,7 @@ void coroutine_fn yield_until_fd_readable(int fd)
     data.ctx = qemu_get_current_aio_context();
     data.co = qemu_coroutine_self();
     data.fd = fd;
-    aio_set_fd_handler(
-        data.ctx, fd, false, fd_coroutine_enter, NULL, NULL, &data);
+    aio_set_fd_handler(data.ctx, fd, fd_coroutine_enter, NULL, NULL, NULL,
+                       &data);
     qemu_coroutine_yield();
 }
index 5816bf890094d4e87f72808abe0631a0cd5fb26f..2534435388f3dce83de34d5f8a7f0594294975d3 100644 (file)
@@ -27,7 +27,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/coroutine.h"
 #include "qemu/coroutine_int.h"
 #include "qemu/processor.h"
 #include "qemu/queue.h"
@@ -39,10 +38,15 @@ void qemu_co_queue_init(CoQueue *queue)
     QSIMPLEQ_INIT(&queue->entries);
 }
 
-void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock)
+void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock,
+                                          CoQueueWaitFlags flags)
 {
     Coroutine *self = qemu_coroutine_self();
-    QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+    if (flags & CO_QUEUE_WAIT_FRONT) {
+        QSIMPLEQ_INSERT_HEAD(&queue->entries, self, co_queue_next);
+    } else {
+        QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+    }
 
     if (lock) {
         qemu_lockable_unlock(lock);
@@ -67,34 +71,6 @@ void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock)
     }
 }
 
-static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
-{
-    Coroutine *next;
-
-    if (QSIMPLEQ_EMPTY(&queue->entries)) {
-        return false;
-    }
-
-    while ((next = QSIMPLEQ_FIRST(&queue->entries)) != NULL) {
-        QSIMPLEQ_REMOVE_HEAD(&queue->entries, co_queue_next);
-        aio_co_wake(next);
-        if (single) {
-            break;
-        }
-    }
-    return true;
-}
-
-bool qemu_co_queue_next(CoQueue *queue)
-{
-    return qemu_co_queue_do_restart(queue, true);
-}
-
-void qemu_co_queue_restart_all(CoQueue *queue)
-{
-    qemu_co_queue_do_restart(queue, false);
-}
-
 bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock)
 {
     Coroutine *next;
@@ -115,6 +91,25 @@ bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock)
     return true;
 }
 
+bool coroutine_fn qemu_co_queue_next(CoQueue *queue)
+{
+    /* No unlock/lock needed in coroutine context.  */
+    return qemu_co_enter_next_impl(queue, NULL);
+}
+
+void qemu_co_enter_all_impl(CoQueue *queue, QemuLockable *lock)
+{
+    while (qemu_co_enter_next_impl(queue, lock)) {
+        /* just loop */
+    }
+}
+
+void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue)
+{
+    /* No unlock/lock needed in coroutine context.  */
+    qemu_co_enter_all_impl(queue, NULL);
+}
+
 bool qemu_co_queue_empty(CoQueue *queue)
 {
     return QSIMPLEQ_FIRST(&queue->entries) == NULL;
@@ -144,7 +139,7 @@ typedef struct CoWaitRecord {
     QSLIST_ENTRY(CoWaitRecord) next;
 } CoWaitRecord;
 
-static void push_waiter(CoMutex *mutex, CoWaitRecord *w)
+static void coroutine_fn push_waiter(CoMutex *mutex, CoWaitRecord *w)
 {
     w->co = qemu_coroutine_self();
     QSLIST_INSERT_HEAD_ATOMIC(&mutex->from_push, w, next);
@@ -204,13 +199,18 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx,
     unsigned old_handoff;
 
     trace_qemu_co_mutex_lock_entry(mutex, self);
-    w.co = self;
     push_waiter(mutex, &w);
 
+    /*
+     * Add waiter before reading mutex->handoff.  Pairs with qatomic_set_mb
+     * in qemu_co_mutex_unlock.
+     */
+    smp_mb__after_rmw();
+
     /* This is the "Responsibility Hand-Off" protocol; a lock() picks from
      * a concurrent unlock() the responsibility of waking somebody up.
      */
-    old_handoff = qatomic_mb_read(&mutex->handoff);
+    old_handoff = qatomic_read(&mutex->handoff);
     if (old_handoff &&
         has_waiters(mutex) &&
         qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) {
@@ -309,7 +309,8 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
         }
 
         our_handoff = mutex->sequence;
-        qatomic_mb_set(&mutex->handoff, our_handoff);
+        /* Set handoff before checking for waiters.  */
+        qatomic_set_mb(&mutex->handoff, our_handoff);
         if (!has_waiters(mutex)) {
             /* The concurrent lock has not added itself yet, so it
              * will be able to pick our handoff.
@@ -328,97 +329,141 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
     trace_qemu_co_mutex_unlock_return(mutex, self);
 }
 
+struct CoRwTicket {
+    bool read;
+    Coroutine *co;
+    QSIMPLEQ_ENTRY(CoRwTicket) next;
+};
+
 void qemu_co_rwlock_init(CoRwlock *lock)
 {
-    memset(lock, 0, sizeof(*lock));
-    qemu_co_queue_init(&lock->queue);
     qemu_co_mutex_init(&lock->mutex);
+    lock->owners = 0;
+    QSIMPLEQ_INIT(&lock->tickets);
 }
 
-void qemu_co_rwlock_rdlock(CoRwlock *lock)
+/* Releases the internal CoMutex.  */
+static void coroutine_fn qemu_co_rwlock_maybe_wake_one(CoRwlock *lock)
 {
-    Coroutine *self = qemu_coroutine_self();
+    CoRwTicket *tkt = QSIMPLEQ_FIRST(&lock->tickets);
+    Coroutine *co = NULL;
 
-    qemu_co_mutex_lock(&lock->mutex);
-    /* For fairness, wait if a writer is in line.  */
-    while (lock->pending_writer) {
-        qemu_co_queue_wait(&lock->queue, &lock->mutex);
+    /*
+     * Setting lock->owners here prevents rdlock and wrlock from
+     * sneaking in between unlock and wake.
+     */
+
+    if (tkt) {
+        if (tkt->read) {
+            if (lock->owners >= 0) {
+                lock->owners++;
+                co = tkt->co;
+            }
+        } else {
+            if (lock->owners == 0) {
+                lock->owners = -1;
+                co = tkt->co;
+            }
+        }
     }
-    lock->reader++;
-    qemu_co_mutex_unlock(&lock->mutex);
 
-    /* The rest of the read-side critical section is run without the mutex.  */
-    self->locks_held++;
+    if (co) {
+        QSIMPLEQ_REMOVE_HEAD(&lock->tickets, next);
+        qemu_co_mutex_unlock(&lock->mutex);
+        aio_co_wake(co);
+    } else {
+        qemu_co_mutex_unlock(&lock->mutex);
+    }
 }
 
-void qemu_co_rwlock_unlock(CoRwlock *lock)
+void coroutine_fn qemu_co_rwlock_rdlock(CoRwlock *lock)
 {
     Coroutine *self = qemu_coroutine_self();
 
-    assert(qemu_in_coroutine());
-    if (!lock->reader) {
-        /* The critical section started in qemu_co_rwlock_wrlock.  */
-        qemu_co_queue_restart_all(&lock->queue);
+    qemu_co_mutex_lock(&lock->mutex);
+    /* For fairness, wait if a writer is in line.  */
+    if (lock->owners == 0 || (lock->owners > 0 && QSIMPLEQ_EMPTY(&lock->tickets))) {
+        lock->owners++;
+        qemu_co_mutex_unlock(&lock->mutex);
     } else {
-        self->locks_held--;
+        CoRwTicket my_ticket = { true, self };
 
+        QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next);
+        qemu_co_mutex_unlock(&lock->mutex);
+        qemu_coroutine_yield();
+        assert(lock->owners >= 1);
+
+        /* Possibly wake another reader, which will wake the next in line.  */
         qemu_co_mutex_lock(&lock->mutex);
-        lock->reader--;
-        assert(lock->reader >= 0);
-        /* Wakeup only one waiting writer */
-        if (!lock->reader) {
-            qemu_co_queue_next(&lock->queue);
-        }
+        qemu_co_rwlock_maybe_wake_one(lock);
     }
-    qemu_co_mutex_unlock(&lock->mutex);
+
+    self->locks_held++;
 }
 
-void qemu_co_rwlock_downgrade(CoRwlock *lock)
+void coroutine_fn qemu_co_rwlock_unlock(CoRwlock *lock)
 {
     Coroutine *self = qemu_coroutine_self();
 
-    /* lock->mutex critical section started in qemu_co_rwlock_wrlock or
-     * qemu_co_rwlock_upgrade.
-     */
-    assert(lock->reader == 0);
-    lock->reader++;
-    qemu_co_mutex_unlock(&lock->mutex);
+    assert(qemu_in_coroutine());
+    self->locks_held--;
 
-    /* The rest of the read-side critical section is run without the mutex.  */
-    self->locks_held++;
+    qemu_co_mutex_lock(&lock->mutex);
+    if (lock->owners > 0) {
+        lock->owners--;
+    } else {
+        assert(lock->owners == -1);
+        lock->owners = 0;
+    }
+
+    qemu_co_rwlock_maybe_wake_one(lock);
 }
 
-void qemu_co_rwlock_wrlock(CoRwlock *lock)
+void coroutine_fn qemu_co_rwlock_downgrade(CoRwlock *lock)
 {
     qemu_co_mutex_lock(&lock->mutex);
-    lock->pending_writer++;
-    while (lock->reader) {
-        qemu_co_queue_wait(&lock->queue, &lock->mutex);
-    }
-    lock->pending_writer--;
+    assert(lock->owners == -1);
+    lock->owners = 1;
 
-    /* The rest of the write-side critical section is run with
-     * the mutex taken, so that lock->reader remains zero.
-     * There is no need to update self->locks_held.
-     */
+    /* Possibly wake another reader, which will wake the next in line.  */
+    qemu_co_rwlock_maybe_wake_one(lock);
 }
 
-void qemu_co_rwlock_upgrade(CoRwlock *lock)
+void coroutine_fn qemu_co_rwlock_wrlock(CoRwlock *lock)
 {
     Coroutine *self = qemu_coroutine_self();
 
     qemu_co_mutex_lock(&lock->mutex);
-    assert(lock->reader > 0);
-    lock->reader--;
-    lock->pending_writer++;
-    while (lock->reader) {
-        qemu_co_queue_wait(&lock->queue, &lock->mutex);
+    if (lock->owners == 0) {
+        lock->owners = -1;
+        qemu_co_mutex_unlock(&lock->mutex);
+    } else {
+        CoRwTicket my_ticket = { false, qemu_coroutine_self() };
+
+        QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next);
+        qemu_co_mutex_unlock(&lock->mutex);
+        qemu_coroutine_yield();
+        assert(lock->owners == -1);
     }
-    lock->pending_writer--;
 
-    /* The rest of the write-side critical section is run with
-     * the mutex taken, similar to qemu_co_rwlock_wrlock.  Do
-     * not account for the lock twice in self->locks_held.
-     */
-    self->locks_held--;
+    self->locks_held++;
+}
+
+void coroutine_fn qemu_co_rwlock_upgrade(CoRwlock *lock)
+{
+    qemu_co_mutex_lock(&lock->mutex);
+    assert(lock->owners > 0);
+    /* For fairness, wait if a writer is in line.  */
+    if (lock->owners == 1 && QSIMPLEQ_EMPTY(&lock->tickets)) {
+        lock->owners = -1;
+        qemu_co_mutex_unlock(&lock->mutex);
+    } else {
+        CoRwTicket my_ticket = { false, qemu_coroutine_self() };
+
+        lock->owners--;
+        QSIMPLEQ_INSERT_TAIL(&lock->tickets, &my_ticket, next);
+        qemu_co_rwlock_maybe_wake_one(lock);
+        qemu_coroutine_yield();
+        assert(lock->owners == -1);
+    }
 }
index 8c4dac4fd75eeb12df3718b65f612de72dc7089c..af59f9af9847af2ca1b14217d55aad5d0d15b78e 100644 (file)
  */
 
 #include "qemu/osdep.h"
-#include "qemu/coroutine.h"
 #include "qemu/coroutine_int.h"
 #include "qemu/timer.h"
 #include "block/aio.h"
 
 static const char *qemu_co_sleep_ns__scheduled = "qemu_co_sleep_ns";
 
-struct QemuCoSleepState {
+void qemu_co_sleep_wake(QemuCoSleep *w)
+{
     Coroutine *co;
-    QEMUTimer *ts;
-    QemuCoSleepState **user_state_pointer;
-};
 
-void qemu_co_sleep_wake(QemuCoSleepState *sleep_state)
-{
-    /* Write of schedule protected by barrier write in aio_co_schedule */
-    const char *scheduled = qatomic_cmpxchg(&sleep_state->co->scheduled,
-                                           qemu_co_sleep_ns__scheduled, NULL);
+    co = w->to_wake;
+    w->to_wake = NULL;
+    if (co) {
+        /* Write of schedule protected by barrier write in aio_co_schedule */
+        const char *scheduled = qatomic_cmpxchg(&co->scheduled,
+                                                qemu_co_sleep_ns__scheduled, NULL);
 
-    assert(scheduled == qemu_co_sleep_ns__scheduled);
-    if (sleep_state->user_state_pointer) {
-        *sleep_state->user_state_pointer = NULL;
+        assert(scheduled == qemu_co_sleep_ns__scheduled);
+        aio_co_wake(co);
     }
-    timer_del(sleep_state->ts);
-    aio_co_wake(sleep_state->co);
 }
 
 static void co_sleep_cb(void *opaque)
 {
-    qemu_co_sleep_wake(opaque);
+    QemuCoSleep *w = opaque;
+    qemu_co_sleep_wake(w);
 }
 
-void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
-                                            QemuCoSleepState **sleep_state)
+void coroutine_fn qemu_co_sleep(QemuCoSleep *w)
 {
-    AioContext *ctx = qemu_get_current_aio_context();
-    QemuCoSleepState state = {
-        .co = qemu_coroutine_self(),
-        .ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &state),
-        .user_state_pointer = sleep_state,
-    };
+    Coroutine *co = qemu_coroutine_self();
 
-    const char *scheduled = qatomic_cmpxchg(&state.co->scheduled, NULL,
-                                           qemu_co_sleep_ns__scheduled);
+    const char *scheduled = qatomic_cmpxchg(&co->scheduled, NULL,
+                                            qemu_co_sleep_ns__scheduled);
     if (scheduled) {
         fprintf(stderr,
                 "%s: Co-routine was already scheduled in '%s'\n",
@@ -63,17 +53,27 @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
         abort();
     }
 
-    if (sleep_state) {
-        *sleep_state = &state;
-    }
-    timer_mod(state.ts, qemu_clock_get_ns(type) + ns);
+    w->to_wake = co;
     qemu_coroutine_yield();
-    if (sleep_state) {
-        /*
-         * Note that *sleep_state is cleared during qemu_co_sleep_wake
-         * before resuming this coroutine.
-         */
-        assert(*sleep_state == NULL);
-    }
-    timer_free(state.ts);
+
+    /* w->to_wake is cleared before resuming this coroutine.  */
+    assert(w->to_wake == NULL);
+}
+
+void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
+                                            QEMUClockType type, int64_t ns)
+{
+    AioContext *ctx = qemu_get_current_aio_context();
+    QEMUTimer ts;
+
+    aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, w);
+    timer_mod(&ts, qemu_clock_get_ns(type) + ns);
+
+    /*
+     * The timer will fire in the current AiOContext, so the callback
+     * must happen after qemu_co_sleep yields and there is no race
+     * between timer_mod and qemu_co_sleep.
+     */
+    qemu_co_sleep(w);
+    timer_del(&ts);
 }
index 38fb6d3084dad2dd2145134d9e5da8f03d7169fe..5fd2dbaf8bb7818f6b57059f90fea5305796612e 100644 (file)
 #include "trace.h"
 #include "qemu/thread.h"
 #include "qemu/atomic.h"
-#include "qemu/coroutine.h"
 #include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
 #include "block/aio.h"
 
+/**
+ * The minimal batch size is always 64, coroutines from the release_pool are
+ * reused as soon as there are 64 coroutines in it. The maximum pool size starts
+ * with 64 and is increased on demand so that coroutines are not deleted even if
+ * they are not immediately reused.
+ */
 enum {
-    POOL_BATCH_SIZE = 64,
+    POOL_MIN_BATCH_SIZE = 64,
+    POOL_INITIAL_MAX_SIZE = 64,
 };
 
 /** Free list to speed up creation */
 static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
 static unsigned int release_pool_size;
-static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
-static __thread unsigned int alloc_pool_size;
-static __thread Notifier coroutine_pool_cleanup_notifier;
+
+typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool);
+QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size);
+QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier);
 
 static void coroutine_pool_cleanup(Notifier *n, void *value)
 {
     Coroutine *co;
     Coroutine *tmp;
+    CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
 
-    QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
-        QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+    QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) {
+        QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
         qemu_coroutine_delete(co);
     }
 }
@@ -46,28 +57,31 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
 {
     Coroutine *co = NULL;
 
-    if (CONFIG_COROUTINE_POOL) {
-        co = QSLIST_FIRST(&alloc_pool);
+    if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
+        CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
+
+        co = QSLIST_FIRST(alloc_pool);
         if (!co) {
-            if (release_pool_size > POOL_BATCH_SIZE) {
+            if (release_pool_size > POOL_MIN_BATCH_SIZE) {
                 /* Slow path; a good place to register the destructor, too.  */
-                if (!coroutine_pool_cleanup_notifier.notify) {
-                    coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
-                    qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
+                Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier();
+                if (!notifier->notify) {
+                    notifier->notify = coroutine_pool_cleanup;
+                    qemu_thread_atexit_add(notifier);
                 }
 
                 /* This is not exact; there could be a little skew between
                  * release_pool_size and the actual size of release_pool.  But
                  * it is just a heuristic, it does not need to be perfect.
                  */
-                alloc_pool_size = qatomic_xchg(&release_pool_size, 0);
-                QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
-                co = QSLIST_FIRST(&alloc_pool);
+                set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0));
+                QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool);
+                co = QSLIST_FIRST(alloc_pool);
             }
         }
         if (co) {
-            QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
-            alloc_pool_size--;
+            QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
+            set_alloc_pool_size(get_alloc_pool_size() - 1);
         }
     }
 
@@ -85,15 +99,15 @@ static void coroutine_delete(Coroutine *co)
 {
     co->caller = NULL;
 
-    if (CONFIG_COROUTINE_POOL) {
-        if (release_pool_size < POOL_BATCH_SIZE * 2) {
+    if (IS_ENABLED(CONFIG_COROUTINE_POOL)) {
+        if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
             QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
             qatomic_inc(&release_pool_size);
             return;
         }
-        if (alloc_pool_size < POOL_BATCH_SIZE) {
-            QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
-            alloc_pool_size++;
+        if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) {
+            QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next);
+            set_alloc_pool_size(get_alloc_pool_size() + 1);
             return;
         }
     }
@@ -113,9 +127,13 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
         Coroutine *to = QSIMPLEQ_FIRST(&pending);
         CoroutineAction ret;
 
-        /* Cannot rely on the read barrier for to in aio_co_wake(), as there are
-         * callers outside of aio_co_wake() */
-        const char *scheduled = qatomic_mb_read(&to->scheduled);
+        /*
+         * Read to before to->scheduled; pairs with qatomic_cmpxchg in
+         * qemu_co_sleep(), aio_co_schedule() etc.
+         */
+        smp_read_barrier_depends();
+
+        const char *scheduled = qatomic_read(&to->scheduled);
 
         QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next);
 
@@ -198,7 +216,17 @@ bool qemu_coroutine_entered(Coroutine *co)
     return co->caller;
 }
 
-AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
+AioContext *qemu_coroutine_get_aio_context(Coroutine *co)
 {
     return co->ctx;
 }
+
+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
+{
+    qatomic_add(&pool_max_size, additional_pool_size);
+}
+
+void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
+{
+    qatomic_sub(&pool_max_size, removing_pool_size);
+}
diff --git a/util/qemu-error.c b/util/qemu-error.c
deleted file mode 100644 (file)
index aa30f03..0000000
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Error reporting
- *
- * Copyright (C) 2010 Red Hat Inc.
- *
- * Authors:
- *  Markus Armbruster <armbru@redhat.com>,
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "monitor/monitor.h"
-#include "qemu/error-report.h"
-
-/*
- * @report_type is the type of message: error, warning or
- * informational.
- */
-typedef enum {
-    REPORT_TYPE_ERROR,
-    REPORT_TYPE_WARNING,
-    REPORT_TYPE_INFO,
-} report_type;
-
-/* Prepend timestamp to messages */
-bool error_with_timestamp;
-bool error_with_guestname;
-const char *error_guest_name;
-
-int error_printf(const char *fmt, ...)
-{
-    va_list ap;
-    int ret;
-
-    va_start(ap, fmt);
-    ret = error_vprintf(fmt, ap);
-    va_end(ap);
-    return ret;
-}
-
-int error_printf_unless_qmp(const char *fmt, ...)
-{
-    va_list ap;
-    int ret;
-
-    va_start(ap, fmt);
-    ret = error_vprintf_unless_qmp(fmt, ap);
-    va_end(ap);
-    return ret;
-}
-
-static Location std_loc = {
-    .kind = LOC_NONE
-};
-static Location *cur_loc = &std_loc;
-
-/*
- * Push location saved in LOC onto the location stack, return it.
- * The top of that stack is the current location.
- * Needs a matching loc_pop().
- */
-Location *loc_push_restore(Location *loc)
-{
-    assert(!loc->prev);
-    loc->prev = cur_loc;
-    cur_loc = loc;
-    return loc;
-}
-
-/*
- * Initialize *LOC to "nowhere", push it onto the location stack.
- * The top of that stack is the current location.
- * Needs a matching loc_pop().
- * Return LOC.
- */
-Location *loc_push_none(Location *loc)
-{
-    loc->kind = LOC_NONE;
-    loc->prev = NULL;
-    return loc_push_restore(loc);
-}
-
-/*
- * Pop the location stack.
- * LOC must be the current location, i.e. the top of the stack.
- */
-Location *loc_pop(Location *loc)
-{
-    assert(cur_loc == loc && loc->prev);
-    cur_loc = loc->prev;
-    loc->prev = NULL;
-    return loc;
-}
-
-/*
- * Save the current location in LOC, return LOC.
- */
-Location *loc_save(Location *loc)
-{
-    *loc = *cur_loc;
-    loc->prev = NULL;
-    return loc;
-}
-
-/*
- * Change the current location to the one saved in LOC.
- */
-void loc_restore(Location *loc)
-{
-    Location *prev = cur_loc->prev;
-    assert(!loc->prev);
-    *cur_loc = *loc;
-    cur_loc->prev = prev;
-}
-
-/*
- * Change the current location to "nowhere in particular".
- */
-void loc_set_none(void)
-{
-    cur_loc->kind = LOC_NONE;
-}
-
-/*
- * Change the current location to argument ARGV[IDX..IDX+CNT-1].
- */
-void loc_set_cmdline(char **argv, int idx, int cnt)
-{
-    cur_loc->kind = LOC_CMDLINE;
-    cur_loc->num = cnt;
-    cur_loc->ptr = argv + idx;
-}
-
-/*
- * Change the current location to file FNAME, line LNO.
- */
-void loc_set_file(const char *fname, int lno)
-{
-    assert (fname || cur_loc->kind == LOC_FILE);
-    cur_loc->kind = LOC_FILE;
-    cur_loc->num = lno;
-    if (fname) {
-        cur_loc->ptr = fname;
-    }
-}
-
-static const char *progname;
-
-/*
- * Set the program name for error_print_loc().
- */
-static void error_set_progname(const char *argv0)
-{
-    const char *p = strrchr(argv0, '/');
-    progname = p ? p + 1 : argv0;
-}
-
-const char *error_get_progname(void)
-{
-    return progname;
-}
-
-/*
- * Print current location to current monitor if we have one, else to stderr.
- */
-static void print_loc(void)
-{
-    const char *sep = "";
-    int i;
-    const char *const *argp;
-
-    if (!monitor_cur() && progname) {
-        fprintf(stderr, "%s:", progname);
-        sep = " ";
-    }
-    switch (cur_loc->kind) {
-    case LOC_CMDLINE:
-        argp = cur_loc->ptr;
-        for (i = 0; i < cur_loc->num; i++) {
-            error_printf("%s%s", sep, argp[i]);
-            sep = " ";
-        }
-        error_printf(": ");
-        break;
-    case LOC_FILE:
-        error_printf("%s:", (const char *)cur_loc->ptr);
-        if (cur_loc->num) {
-            error_printf("%d:", cur_loc->num);
-        }
-        error_printf(" ");
-        break;
-    default:
-        error_printf("%s", sep);
-    }
-}
-
-/*
- * Print a message to current monitor if we have one, else to stderr.
- * @report_type is the type of message: error, warning or informational.
- * Format arguments like vsprintf().  The resulting message should be
- * a single phrase, with no newline or trailing punctuation.
- * Prepend the current location and append a newline.
- */
-static void vreport(report_type type, const char *fmt, va_list ap)
-{
-    GTimeVal tv;
-    gchar *timestr;
-
-    if (error_with_timestamp && !monitor_cur()) {
-        g_get_current_time(&tv);
-        timestr = g_time_val_to_iso8601(&tv);
-        error_printf("%s ", timestr);
-        g_free(timestr);
-    }
-
-    /* Only prepend guest name if -msg guest-name and -name guest=... are set */
-    if (error_with_guestname && error_guest_name && !monitor_cur()) {
-        error_printf("%s ", error_guest_name);
-    }
-
-    print_loc();
-
-    switch (type) {
-    case REPORT_TYPE_ERROR:
-        break;
-    case REPORT_TYPE_WARNING:
-        error_printf("warning: ");
-        break;
-    case REPORT_TYPE_INFO:
-        error_printf("info: ");
-        break;
-    }
-
-    error_vprintf(fmt, ap);
-    error_printf("\n");
-}
-
-/*
- * Print an error message to current monitor if we have one, else to stderr.
- * Format arguments like vsprintf().  The resulting message should be
- * a single phrase, with no newline or trailing punctuation.
- * Prepend the current location and append a newline.
- * It's wrong to call this in a QMP monitor.  Use error_setg() there.
- */
-void error_vreport(const char *fmt, va_list ap)
-{
-    vreport(REPORT_TYPE_ERROR, fmt, ap);
-}
-
-/*
- * Print a warning message to current monitor if we have one, else to stderr.
- * Format arguments like vsprintf().  The resulting message should be
- * a single phrase, with no newline or trailing punctuation.
- * Prepend the current location and append a newline.
- */
-void warn_vreport(const char *fmt, va_list ap)
-{
-    vreport(REPORT_TYPE_WARNING, fmt, ap);
-}
-
-/*
- * Print an information message to current monitor if we have one, else to
- * stderr.
- * Format arguments like vsprintf().  The resulting message should be
- * a single phrase, with no newline or trailing punctuation.
- * Prepend the current location and append a newline.
- */
-void info_vreport(const char *fmt, va_list ap)
-{
-    vreport(REPORT_TYPE_INFO, fmt, ap);
-}
-
-/*
- * Print an error message to current monitor if we have one, else to stderr.
- * Format arguments like sprintf().  The resulting message should be
- * a single phrase, with no newline or trailing punctuation.
- * Prepend the current location and append a newline.
- * It's wrong to call this in a QMP monitor.  Use error_setg() there.
- */
-void error_report(const char *fmt, ...)
-{
-    va_list ap;
-
-    va_start(ap, fmt);
-    vreport(REPORT_TYPE_ERROR, fmt, ap);
-    va_end(ap);
-}
-
-/*
- * Print a warning message to current monitor if we have one, else to stderr.
- * Format arguments like sprintf(). The resulting message should be a
- * single phrase, with no newline or trailing punctuation.
- * Prepend the current location and append a newline.
- */
-void warn_report(const char *fmt, ...)
-{
-    va_list ap;
-
-    va_start(ap, fmt);
-    vreport(REPORT_TYPE_WARNING, fmt, ap);
-    va_end(ap);
-}
-
-/*
- * Print an information message to current monitor if we have one, else to
- * stderr.
- * Format arguments like sprintf(). The resulting message should be a
- * single phrase, with no newline or trailing punctuation.
- * Prepend the current location and append a newline.
- */
-void info_report(const char *fmt, ...)
-{
-    va_list ap;
-
-    va_start(ap, fmt);
-    vreport(REPORT_TYPE_INFO, fmt, ap);
-    va_end(ap);
-}
-
-/*
- * Like error_report(), except print just once.
- * If *printed is false, print the message, and flip *printed to true.
- * Return whether the message was printed.
- */
-bool error_report_once_cond(bool *printed, const char *fmt, ...)
-{
-    va_list ap;
-
-    assert(printed);
-    if (*printed) {
-        return false;
-    }
-    *printed = true;
-    va_start(ap, fmt);
-    vreport(REPORT_TYPE_ERROR, fmt, ap);
-    va_end(ap);
-    return true;
-}
-
-/*
- * Like warn_report(), except print just once.
- * If *printed is false, print the message, and flip *printed to true.
- * Return whether the message was printed.
- */
-bool warn_report_once_cond(bool *printed, const char *fmt, ...)
-{
-    va_list ap;
-
-    assert(printed);
-    if (*printed) {
-        return false;
-    }
-    *printed = true;
-    va_start(ap, fmt);
-    vreport(REPORT_TYPE_WARNING, fmt, ap);
-    va_end(ap);
-    return true;
-}
-
-static char *qemu_glog_domains;
-
-static void qemu_log_func(const gchar *log_domain,
-                          GLogLevelFlags log_level,
-                          const gchar *message,
-                          gpointer user_data)
-{
-    switch (log_level & G_LOG_LEVEL_MASK) {
-    case G_LOG_LEVEL_DEBUG:
-    case G_LOG_LEVEL_INFO:
-        /*
-         * Use same G_MESSAGES_DEBUG logic as glib to enable/disable debug
-         * messages
-         */
-        if (qemu_glog_domains == NULL) {
-            break;
-        }
-        if (strcmp(qemu_glog_domains, "all") != 0 &&
-          (log_domain == NULL || !strstr(qemu_glog_domains, log_domain))) {
-            break;
-        }
-        /* Fall through */
-    case G_LOG_LEVEL_MESSAGE:
-        info_report("%s%s%s",
-                    log_domain ?: "", log_domain ? ": " : "", message);
-
-        break;
-    case G_LOG_LEVEL_WARNING:
-        warn_report("%s%s%s",
-                    log_domain ?: "", log_domain ? ": " : "", message);
-        break;
-    case G_LOG_LEVEL_CRITICAL:
-    case G_LOG_LEVEL_ERROR:
-        error_report("%s%s%s",
-                     log_domain ?: "", log_domain ? ": " : "", message);
-        break;
-    }
-}
-
-void error_init(const char *argv0)
-{
-    /* Set the program name for error_print_loc(). */
-    error_set_progname(argv0);
-
-    /*
-     * This sets up glib logging so libraries using it also print their logs
-     * through error_report(), warn_report(), info_report().
-     */
-    g_log_set_default_handler(qemu_log_func, NULL);
-    g_warn_if_fail(qemu_glog_domains == NULL);
-    qemu_glog_domains = g_strdup(g_getenv("G_MESSAGES_DEBUG"));
-}
diff --git a/util/qemu-openpty.c b/util/qemu-openpty.c
deleted file mode 100644 (file)
index eb17f5b..0000000
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * qemu-openpty.c
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2010 Red Hat, Inc.
- *
- * Wrapper function qemu_openpty() implementation.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-/*
- * This is not part of oslib-posix.c because this function
- * uses openpty() which often in -lutil, and if we add this
- * dependency to oslib-posix.o, every app will have to be
- * linked with -lutil.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-
-#if defined HAVE_PTY_H
-# include <pty.h>
-#elif defined CONFIG_BSD
-# include <termios.h>
-# if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
-#  include <libutil.h>
-# else
-#  include <util.h>
-# endif
-#elif defined CONFIG_SOLARIS
-# include <termios.h>
-# include <stropts.h>
-#else
-# include <termios.h>
-#endif
-
-#ifdef __sun__
-
-#if !defined(HAVE_OPENPTY)
-/* Once illumos has openpty(), this is going to be removed. */
-static int openpty(int *amaster, int *aslave, char *name,
-                   struct termios *termp, struct winsize *winp)
-{
-        const char *slave;
-        int mfd = -1, sfd = -1;
-
-        *amaster = *aslave = -1;
-
-        mfd = open("/dev/ptmx", O_RDWR | O_NOCTTY);
-        if (mfd < 0)
-                goto err;
-
-        if (grantpt(mfd) == -1 || unlockpt(mfd) == -1)
-                goto err;
-
-        if ((slave = ptsname(mfd)) == NULL)
-                goto err;
-
-        if ((sfd = open(slave, O_RDONLY | O_NOCTTY)) == -1)
-                goto err;
-
-        if (ioctl(sfd, I_PUSH, "ptem") == -1 ||
-            (termp != NULL && tcgetattr(sfd, termp) < 0))
-                goto err;
-
-        if (amaster)
-                *amaster = mfd;
-        if (aslave)
-                *aslave = sfd;
-        if (winp)
-                ioctl(sfd, TIOCSWINSZ, winp);
-
-        return 0;
-
-err:
-        if (sfd != -1)
-                close(sfd);
-        close(mfd);
-        return -1;
-}
-#endif
-
-static void cfmakeraw (struct termios *termios_p)
-{
-        termios_p->c_iflag &=
-                ~(IGNBRK|BRKINT|PARMRK|ISTRIP|INLCR|IGNCR|ICRNL|IXON);
-        termios_p->c_oflag &= ~OPOST;
-        termios_p->c_lflag &= ~(ECHO|ECHONL|ICANON|ISIG|IEXTEN);
-        termios_p->c_cflag &= ~(CSIZE|PARENB);
-        termios_p->c_cflag |= CS8;
-
-        termios_p->c_cc[VMIN] = 0;
-        termios_p->c_cc[VTIME] = 0;
-}
-#endif
-
-int qemu_openpty_raw(int *aslave, char *pty_name)
-{
-    int amaster;
-    struct termios tty;
-#if defined(__OpenBSD__) || defined(__DragonFly__)
-    char pty_buf[PATH_MAX];
-#define q_ptsname(x) pty_buf
-#else
-    char *pty_buf = NULL;
-#define q_ptsname(x) ptsname(x)
-#endif
-
-    if (openpty(&amaster, aslave, pty_buf, NULL, NULL) < 0) {
-        return -1;
-    }
-
-    /* Set raw attributes on the pty. */
-    tcgetattr(*aslave, &tty);
-    cfmakeraw(&tty);
-    tcsetattr(*aslave, TCSAFLUSH, &tty);
-
-    if (pty_name) {
-        strcpy(pty_name, q_ptsname(amaster));
-    }
-
-    return amaster;
-}
index acefbc23fa3c16baefc584ed6a531043f252ff25..eedd08929b47d0dde26dbe8bbe2ee1e4e4b6d516 100644 (file)
 #include "qemu/help_option.h"
 
 /*
- * Extracts the name of an option from the parameter string (p points at the
+ * Extracts the name of an option from the parameter string (@p points at the
  * first byte of the option name)
  *
- * The option name is delimited by delim (usually , or =) or the string end
- * and is copied into option. The caller is responsible for free'ing option
- * when no longer required.
+ * The option name is @len characters long and is copied into @option. The
+ * caller is responsible for free'ing @option when no longer required.
  *
  * The return value is the position of the delimiter/zero byte after the option
- * name in p.
+ * name in @p.
  */
-static const char *get_opt_name(const char *p, char **option, char delim)
+static const char *get_opt_name(const char *p, char **option, size_t len)
 {
-    char *offset = strchr(p, delim);
-
-    if (offset) {
-        *option = g_strndup(p, offset - p);
-        return offset;
-    } else {
-        *option = g_strdup(p);
-        return p + strlen(p);
-    }
+    *option = g_strndup(p, len);
+    return p + len;
 }
 
 /*
@@ -468,16 +460,16 @@ static bool qemu_opt_parse(QemuOpt *opt, Error **errp)
     }
 }
 
-static bool opts_accepts_any(const QemuOpts *opts)
+static bool opts_accepts_any(const QemuOptsList *list)
 {
-    return opts->list->desc[0].name == NULL;
+    return list->desc[0].name == NULL;
 }
 
 int qemu_opt_unset(QemuOpts *opts, const char *name)
 {
     QemuOpt *opt = qemu_opt_find(opts, name);
 
-    assert(opts_accepts_any(opts));
+    assert(opts_accepts_any(opts->list));
 
     if (opt == NULL) {
         return -1;
@@ -487,34 +479,26 @@ int qemu_opt_unset(QemuOpts *opts, const char *name)
     }
 }
 
-static QemuOpt *opt_create(QemuOpts *opts, const char *name, char *value,
-                           bool prepend)
+static QemuOpt *opt_create(QemuOpts *opts, const char *name, char *value)
 {
     QemuOpt *opt = g_malloc0(sizeof(*opt));
 
     opt->name = g_strdup(name);
     opt->str = value;
     opt->opts = opts;
-    if (prepend) {
-        QTAILQ_INSERT_HEAD(&opts->head, opt, next);
-    } else {
-        QTAILQ_INSERT_TAIL(&opts->head, opt, next);
-    }
+    QTAILQ_INSERT_TAIL(&opts->head, opt, next);
 
     return opt;
 }
 
-static bool opt_validate(QemuOpt *opt, bool *help_wanted,
-                         Error **errp)
+static bool opt_validate(QemuOpt *opt, Error **errp)
 {
     const QemuOptDesc *desc;
+    const QemuOptsList *list = opt->opts->list;
 
-    desc = find_desc_by_name(opt->opts->list->desc, opt->name);
-    if (!desc && !opts_accepts_any(opt->opts)) {
+    desc = find_desc_by_name(list->desc, opt->name);
+    if (!desc && !opts_accepts_any(list)) {
         error_setg(errp, QERR_INVALID_PARAMETER, opt->name);
-        if (help_wanted && is_help_option(opt->name)) {
-            *help_wanted = true;
-        }
         return false;
     }
 
@@ -529,9 +513,9 @@ static bool opt_validate(QemuOpt *opt, bool *help_wanted,
 bool qemu_opt_set(QemuOpts *opts, const char *name, const char *value,
                   Error **errp)
 {
-    QemuOpt *opt = opt_create(opts, name, g_strdup(value), false);
+    QemuOpt *opt = opt_create(opts, name, g_strdup(value));
 
-    if (!opt_validate(opt, NULL, errp)) {
+    if (!opt_validate(opt, errp)) {
         qemu_opt_del(opt);
         return false;
     }
@@ -543,9 +527,10 @@ bool qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val,
 {
     QemuOpt *opt;
     const QemuOptDesc *desc;
+    const QemuOptsList *list = opts->list;
 
-    desc = find_desc_by_name(opts->list->desc, name);
-    if (!desc && !opts_accepts_any(opts)) {
+    desc = find_desc_by_name(list->desc, name);
+    if (!desc && !opts_accepts_any(list)) {
         error_setg(errp, QERR_INVALID_PARAMETER, name);
         return false;
     }
@@ -565,9 +550,10 @@ bool qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val,
 {
     QemuOpt *opt;
     const QemuOptDesc *desc;
+    const QemuOptsList *list = opts->list;
 
-    desc = find_desc_by_name(opts->list->desc, name);
-    if (!desc && !opts_accepts_any(opts)) {
+    desc = find_desc_by_name(list->desc, name);
+    if (!desc && !opts_accepts_any(list)) {
         error_setg(errp, QERR_INVALID_PARAMETER, name);
         return false;
     }
@@ -624,7 +610,17 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
 {
     QemuOpts *opts = NULL;
 
-    if (id) {
+    if (list->merge_lists) {
+        if (id) {
+            error_setg(errp, QERR_INVALID_PARAMETER, "id");
+            return NULL;
+        }
+        opts = qemu_opts_find(list, NULL);
+        if (opts) {
+            return opts;
+        }
+    } else if (id) {
+        assert(fail_if_exists);
         if (!id_wellformed(id)) {
             error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "id",
                        "an identifier");
@@ -634,17 +630,8 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
         }
         opts = qemu_opts_find(list, id);
         if (opts != NULL) {
-            if (fail_if_exists && !list->merge_lists) {
-                error_setg(errp, "Duplicate ID '%s' for %s", id, list->name);
-                return NULL;
-            } else {
-                return opts;
-            }
-        }
-    } else if (list->merge_lists) {
-        opts = qemu_opts_find(list, NULL);
-        if (opts) {
-            return opts;
+            error_setg(errp, "Duplicate ID '%s' for %s", id, list->name);
+            return NULL;
         }
     }
     opts = g_malloc0(sizeof(*opts));
@@ -670,18 +657,6 @@ void qemu_opts_loc_restore(QemuOpts *opts)
     loc_restore(&opts->loc);
 }
 
-bool qemu_opts_set(QemuOptsList *list, const char *id,
-                   const char *name, const char *value, Error **errp)
-{
-    QemuOpts *opts;
-
-    opts = qemu_opts_create(list, id, 1, errp);
-    if (!opts) {
-        return false;
-    }
-    return qemu_opt_set(opts, name, value, errp);
-}
-
 const char *qemu_opts_id(QemuOpts *opts)
 {
     return opts->id;
@@ -767,14 +742,17 @@ void qemu_opts_print(QemuOpts *opts, const char *separator)
 
 static const char *get_opt_name_value(const char *params,
                                       const char *firstname,
+                                      bool warn_on_flag,
+                                      bool *help_wanted,
                                       char **name, char **value)
 {
-    const char *p, *pe, *pc;
-
-    pe = strchr(params, '=');
-    pc = strchr(params, ',');
+    const char *p;
+    const char *prefix = "";
+    size_t len;
+    bool is_help = false;
 
-    if (!pe || (pc && pc < pe)) {
+    len = strcspn(params, "=,");
+    if (params[len] != '=') {
         /* found "foo,more" */
         if (firstname) {
             /* implicitly named first option */
@@ -782,23 +760,36 @@ static const char *get_opt_name_value(const char *params,
             p = get_opt_value(params, value);
         } else {
             /* option without value, must be a flag */
-            p = get_opt_name(params, name, ',');
+            p = get_opt_name(params, name, len);
             if (strncmp(*name, "no", 2) == 0) {
                 memmove(*name, *name + 2, strlen(*name + 2) + 1);
                 *value = g_strdup("off");
+                prefix = "no";
             } else {
                 *value = g_strdup("on");
+                is_help = is_help_option(*name);
+            }
+            if (!is_help && warn_on_flag) {
+                warn_report("short-form boolean option '%s%s' deprecated", prefix, *name);
+                if (g_str_equal(*name, "delay")) {
+                    error_printf("Please use nodelay=%s instead\n", prefix[0] ? "on" : "off");
+                } else {
+                    error_printf("Please use %s=%s instead\n", *name, *value);
+                }
             }
         }
     } else {
         /* found "foo=bar,more" */
-        p = get_opt_name(params, name, '=');
+        p = get_opt_name(params, name, len);
         assert(*p == '=');
         p++;
         p = get_opt_value(p, value);
     }
 
     assert(!*p || *p == ',');
+    if (help_wanted && is_help) {
+        *help_wanted = true;
+    }
     if (*p == ',') {
         p++;
     }
@@ -806,15 +797,20 @@ static const char *get_opt_name_value(const char *params,
 }
 
 static bool opts_do_parse(QemuOpts *opts, const char *params,
-                          const char *firstname, bool prepend,
-                          bool *help_wanted, Error **errp)
+                          const char *firstname,
+                          bool warn_on_flag, bool *help_wanted, Error **errp)
 {
     char *option, *value;
     const char *p;
     QemuOpt *opt;
 
     for (p = params; *p;) {
-        p = get_opt_name_value(p, firstname, &option, &value);
+        p = get_opt_name_value(p, firstname, warn_on_flag, help_wanted, &option, &value);
+        if (help_wanted && *help_wanted) {
+            g_free(option);
+            g_free(value);
+            return false;
+        }
         firstname = NULL;
 
         if (!strcmp(option, "id")) {
@@ -823,9 +819,9 @@ static bool opts_do_parse(QemuOpts *opts, const char *params,
             continue;
         }
 
-        opt = opt_create(opts, option, value, prepend);
+        opt = opt_create(opts, option, value);
         g_free(option);
-        if (!opt_validate(opt, help_wanted, errp)) {
+        if (!opt_validate(opt, errp)) {
             qemu_opt_del(opt);
             return false;
         }
@@ -840,7 +836,7 @@ static char *opts_parse_id(const char *params)
     char *name, *value;
 
     for (p = params; *p;) {
-        p = get_opt_name_value(p, NULL, &name, &value);
+        p = get_opt_name_value(p, NULL, false, NULL, &name, &value);
         if (!strcmp(name, "id")) {
             g_free(name);
             return value;
@@ -856,11 +852,10 @@ bool has_help_option(const char *params)
 {
     const char *p;
     char *name, *value;
-    bool ret;
+    bool ret = false;
 
     for (p = params; *p;) {
-        p = get_opt_name_value(p, NULL, &name, &value);
-        ret = is_help_option(name);
+        p = get_opt_name_value(p, NULL, false, &ret, &name, &value);
         g_free(name);
         g_free(value);
         if (ret) {
@@ -884,8 +879,8 @@ bool qemu_opts_do_parse(QemuOpts *opts, const char *params,
 }
 
 static QemuOpts *opts_parse(QemuOptsList *list, const char *params,
-                            bool permit_abbrev, bool defaults,
-                            bool *help_wanted, Error **errp)
+                            bool permit_abbrev,
+                            bool warn_on_flag, bool *help_wanted, Error **errp)
 {
     const char *firstname;
     char *id = opts_parse_id(params);
@@ -894,22 +889,14 @@ static QemuOpts *opts_parse(QemuOptsList *list, const char *params,
     assert(!permit_abbrev || list->implied_opt_name);
     firstname = permit_abbrev ? list->implied_opt_name : NULL;
 
-    /*
-     * This code doesn't work for defaults && !list->merge_lists: when
-     * params has no id=, and list has an element with !opts->id, it
-     * appends a new element instead of returning the existing opts.
-     * However, we got no use for this case.  Guard against possible
-     * (if unlikely) future misuse:
-     */
-    assert(!defaults || list->merge_lists);
-    opts = qemu_opts_create(list, id, !defaults, errp);
+    opts = qemu_opts_create(list, id, !list->merge_lists, errp);
     g_free(id);
     if (opts == NULL) {
         return NULL;
     }
 
-    if (!opts_do_parse(opts, params, firstname, defaults, help_wanted,
-                       errp)) {
+    if (!opts_do_parse(opts, params, firstname,
+                       warn_on_flag, help_wanted, errp)) {
         qemu_opts_del(opts);
         return NULL;
     }
@@ -945,11 +932,13 @@ QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params,
     QemuOpts *opts;
     bool help_wanted = false;
 
-    opts = opts_parse(list, params, permit_abbrev, false, &help_wanted, &err);
-    if (err) {
+    opts = opts_parse(list, params, permit_abbrev, true,
+                      opts_accepts_any(list) ? NULL : &help_wanted,
+                      &err);
+    if (!opts) {
+        assert(!!err + !!help_wanted == 1);
         if (help_wanted) {
             qemu_opts_print_help(list, true);
-            error_free(err);
         } else {
             error_report_err(err);
         }
@@ -957,15 +946,6 @@ QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params,
     return opts;
 }
 
-void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
-                            int permit_abbrev)
-{
-    QemuOpts *opts;
-
-    opts = opts_parse(list, params, permit_abbrev, true, NULL, NULL);
-    assert(opts);
-}
-
 static bool qemu_opts_from_qdict_entry(QemuOpts *opts,
                                        const QDictEntry *entry,
                                        Error **errp)
@@ -1017,8 +997,6 @@ QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
         return NULL;
     }
 
-    assert(opts != NULL);
-
     for (entry = qdict_first(qdict);
          entry;
          entry = qdict_next(qdict, entry)) {
@@ -1045,7 +1023,8 @@ bool qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp)
     while (entry != NULL) {
         next = qdict_next(qdict, entry);
 
-        if (find_desc_by_name(opts->list->desc, entry->key)) {
+        if (opts_accepts_any(opts->list) ||
+            find_desc_by_name(opts->list->desc, entry->key)) {
             if (!qemu_opts_from_qdict_entry(opts, entry, errp)) {
                 return false;
             }
@@ -1119,7 +1098,7 @@ bool qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp)
 {
     QemuOpt *opt;
 
-    assert(opts_accepts_any(opts));
+    assert(opts_accepts_any(opts->list));
 
     QTAILQ_FOREACH(opt, &opts->head, next) {
         opt->desc = find_desc_by_name(desc, opt->name);
@@ -1147,11 +1126,11 @@ int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func,
                       void *opaque, Error **errp)
 {
     Location loc;
-    QemuOpts *opts;
+    QemuOpts *opts, *next;
     int rc = 0;
 
     loc_push_none(&loc);
-    QTAILQ_FOREACH(opts, &list->head, next) {
+    QTAILQ_FOREACH_SAFE(opts, &list->head, next, next) {
         loc_restore(&opts->loc);
         rc = func(opaque, opts, errp);
         if (rc) {
index 20d51f8c128b3412b8389e34bbff44b7d2e068ba..aa994668f1c4a7b5c6c78551623f24f4accadc1d 100644 (file)
@@ -23,7 +23,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qemu/qemu-progress.h"
 
 struct progress_state {
     float current;
index 8af0278f15c69fea136192e91650cef99c056e39..83e84b118694fab100895df49557d38243323e22 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/vm_sockets.h>
 #endif /* CONFIG_AF_VSOCK */
 
-#include "qemu-common.h"
 #include "monitor/monitor.h"
 #include "qapi/clone-visitor.h"
 #include "qapi/error.h"
@@ -97,7 +96,7 @@ bool fd_is_socket(int fd)
 {
     int optval;
     socklen_t optlen = sizeof(optval);
-    return !qemu_getsockopt(fd, SOL_SOCKET, SO_TYPE, &optval, &optlen);
+    return !getsockopt(fd, SOL_SOCKET, SO_TYPE, &optval, &optlen);
 }
 
 
@@ -185,8 +184,8 @@ static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e)
 
  rebind:
     if (e->ai_family == PF_INET6) {
-        qemu_setsockopt(socket, IPPROTO_IPV6, IPV6_V6ONLY, &v6only,
-                        sizeof(v6only));
+        setsockopt(socket, IPPROTO_IPV6, IPV6_V6ONLY, &v6only,
+                   sizeof(v6only));
     }
 
     stat = bind(socket, e->ai_addr, e->ai_addrlen);
@@ -211,7 +210,8 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
                              int num,
                              Error **errp)
 {
-    struct addrinfo ai,*res,*e;
+    ERRP_GUARD();
+    struct addrinfo ai, *res, *e;
     char port[33];
     char uaddr[INET6_ADDRSTRLEN+1];
     char uport[33];
@@ -219,7 +219,6 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
     int slisten = -1;
     int saved_errno = 0;
     bool socket_created = false;
-    Error *err = NULL;
 
     if (saddr->keep_alive) {
         error_setg(errp, "keep-alive option is not supported for passive "
@@ -232,11 +231,9 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
     if (saddr->has_numeric && saddr->numeric) {
         ai.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV;
     }
-    ai.ai_family = inet_ai_family_from_address(saddr, &err);
     ai.ai_socktype = SOCK_STREAM;
-
-    if (err) {
-        error_propagate(errp, err);
+    ai.ai_family = inet_ai_family_from_address(saddr, errp);
+    if (*errp) {
         return -1;
     }
 
@@ -252,12 +249,12 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
 
     /* lookup */
     if (port_offset) {
-        unsigned long long baseport;
+        uint64_t baseport;
         if (strlen(port) == 0) {
             error_setg(errp, "port not specified");
             return -1;
         }
-        if (parse_uint_full(port, &baseport, 10) < 0) {
+        if (parse_uint_full(port, 10, &baseport) < 0) {
             error_setg(errp, "can't convert to a number: %s", port);
             return -1;
         }
@@ -278,6 +275,11 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
 
     /* create socket + bind/listen */
     for (e = res; e != NULL; e = e->ai_next) {
+#ifdef HAVE_IPPROTO_MPTCP
+        if (saddr->has_mptcp && saddr->mptcp) {
+            e->ai_protocol = IPPROTO_MPTCP;
+        }
+#endif
         getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen,
                         uaddr,INET6_ADDRSTRLEN,uport,32,
                         NI_NUMERICHOST | NI_NUMERICSERV);
@@ -324,7 +326,7 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
              * recover from this situation, so we need to recreate the
              * socket to allow bind attempts for subsequent ports:
              */
-            closesocket(slisten);
+            close(slisten);
             slisten = -1;
         }
     }
@@ -335,7 +337,7 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
 listen_failed:
     saved_errno = errno;
     if (slisten >= 0) {
-        closesocket(slisten);
+        close(slisten);
     }
     freeaddrinfo(res);
     errno = saved_errno;
@@ -378,7 +380,7 @@ static int inet_connect_addr(const InetSocketAddress *saddr,
     if (rc < 0) {
         error_setg_errno(errp, errno, "Failed to connect to '%s:%s'",
                          saddr->host, saddr->port);
-        closesocket(sock);
+        close(sock);
         return -1;
     }
 
@@ -388,9 +390,9 @@ static int inet_connect_addr(const InetSocketAddress *saddr,
 static struct addrinfo *inet_parse_connect_saddr(InetSocketAddress *saddr,
                                                  Error **errp)
 {
+    ERRP_GUARD();
     struct addrinfo ai, *res;
     int rc;
-    Error *err = NULL;
     static int useV4Mapped = 1;
 
     memset(&ai, 0, sizeof(ai));
@@ -399,11 +401,9 @@ static struct addrinfo *inet_parse_connect_saddr(InetSocketAddress *saddr,
     if (qatomic_read(&useV4Mapped)) {
         ai.ai_flags |= AI_V4MAPPED;
     }
-    ai.ai_family = inet_ai_family_from_address(saddr, &err);
     ai.ai_socktype = SOCK_STREAM;
-
-    if (err) {
-        error_propagate(errp, err);
+    ai.ai_family = inet_ai_family_from_address(saddr, errp);
+    if (*errp) {
         return NULL;
     }
 
@@ -456,6 +456,13 @@ int inet_connect_saddr(InetSocketAddress *saddr, Error **errp)
     for (e = res; e != NULL; e = e->ai_next) {
         error_free(local_err);
         local_err = NULL;
+
+#ifdef HAVE_IPPROTO_MPTCP
+        if (saddr->has_mptcp && saddr->mptcp) {
+            e->ai_protocol = IPPROTO_MPTCP;
+        }
+#endif
+
         sock = inet_connect_addr(saddr, e, &local_err);
         if (sock >= 0) {
             break;
@@ -471,8 +478,8 @@ int inet_connect_saddr(InetSocketAddress *saddr, Error **errp)
 
     if (saddr->keep_alive) {
         int val = 1;
-        int ret = qemu_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
-                                  &val, sizeof(val));
+        int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+                             &val, sizeof(val));
 
         if (ret < 0) {
             error_setg_errno(errp, errno, "Unable to set KEEPALIVE");
@@ -488,20 +495,18 @@ static int inet_dgram_saddr(InetSocketAddress *sraddr,
                             InetSocketAddress *sladdr,
                             Error **errp)
 {
+    ERRP_GUARD();
     struct addrinfo ai, *peer = NULL, *local = NULL;
     const char *addr;
     const char *port;
     int sock = -1, rc;
-    Error *err = NULL;
 
     /* lookup peer addr */
     memset(&ai,0, sizeof(ai));
     ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG;
-    ai.ai_family = inet_ai_family_from_address(sraddr, &err);
     ai.ai_socktype = SOCK_DGRAM;
-
-    if (err) {
-        error_propagate(errp, err);
+    ai.ai_family = inet_ai_family_from_address(sraddr, errp);
+    if (*errp) {
         goto err;
     }
 
@@ -575,7 +580,7 @@ static int inet_dgram_saddr(InetSocketAddress *sraddr,
 
 err:
     if (sock != -1) {
-        closesocket(sock);
+        close(sock);
     }
     if (local) {
         freeaddrinfo(local);
@@ -687,6 +692,17 @@ int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
         }
         addr->has_keep_alive = true;
     }
+#ifdef HAVE_IPPROTO_MPTCP
+    begin = strstr(optstr, ",mptcp");
+    if (begin) {
+        if (inet_parse_flag("mptcp", begin + strlen(",mptcp"),
+                            &addr->mptcp, errp) < 0)
+        {
+            return -1;
+        }
+        addr->has_mptcp = true;
+    }
+#endif
     return 0;
 }
 
@@ -716,19 +732,19 @@ static bool vsock_parse_vaddr_to_sockaddr(const VsockSocketAddress *vaddr,
                                           struct sockaddr_vm *svm,
                                           Error **errp)
 {
-    unsigned long long val;
+    uint64_t val;
 
     memset(svm, 0, sizeof(*svm));
     svm->svm_family = AF_VSOCK;
 
-    if (parse_uint_full(vaddr->cid, &val, 10) < 0 ||
+    if (parse_uint_full(vaddr->cid, 10, &val) < 0 ||
         val > UINT32_MAX) {
         error_setg(errp, "Failed to parse cid '%s'", vaddr->cid);
         return false;
     }
     svm->svm_cid = val;
 
-    if (parse_uint_full(vaddr->port, &val, 10) < 0 ||
+    if (parse_uint_full(vaddr->port, 10, &val) < 0 ||
         val > UINT32_MAX) {
         error_setg(errp, "Failed to parse port '%s'", vaddr->port);
         return false;
@@ -761,7 +777,7 @@ static int vsock_connect_addr(const VsockSocketAddress *vaddr,
     if (rc < 0) {
         error_setg_errno(errp, errno, "Failed to connect to '%s:%s'",
                          vaddr->cid, vaddr->port);
-        closesocket(sock);
+        close(sock);
         return -1;
     }
 
@@ -798,13 +814,13 @@ static int vsock_listen_saddr(VsockSocketAddress *vaddr,
 
     if (bind(slisten, (const struct sockaddr *)&svm, sizeof(svm)) != 0) {
         error_setg_errno(errp, errno, "Failed to bind socket");
-        closesocket(slisten);
+        close(slisten);
         return -1;
     }
 
     if (listen(slisten, num) != 0) {
         error_setg_errno(errp, errno, "Failed to listen on socket");
-        closesocket(slisten);
+        close(slisten);
         return -1;
     }
     return slisten;
@@ -858,8 +874,6 @@ static int vsock_parse(VsockSocketAddress *addr, const char *str,
 }
 #endif /* CONFIG_AF_VSOCK */
 
-#ifndef _WIN32
-
 static bool saddr_is_abstract(UnixSocketAddress *saddr)
 {
 #ifdef CONFIG_LINUX
@@ -899,9 +913,8 @@ static int unix_listen_saddr(UnixSocketAddress *saddr,
     if (saddr->path[0] || abstract) {
         path = saddr->path;
     } else {
-        const char *tmpdir = getenv("TMPDIR");
-        tmpdir = tmpdir ? tmpdir : "/tmp";
-        path = pathbuf = g_strdup_printf("%s/qemu-socket-XXXXXX", tmpdir);
+        path = pathbuf = g_strdup_printf("%s/qemu-socket-XXXXXX",
+                                         g_get_tmp_dir());
     }
 
     pathlen = strlen(path);
@@ -916,7 +929,7 @@ static int unix_listen_saddr(UnixSocketAddress *saddr,
 
     if (pathbuf != NULL) {
         /*
-         * This dummy fd usage silences the mktemp() unsecure warning.
+         * This dummy fd usage silences the mktemp() insecure warning.
          * Using mkstemp() doesn't make things more secure here
          * though.  bind() complains about existing files, so we have
          * to unlink first and thus re-open the race window.  The
@@ -965,7 +978,7 @@ static int unix_listen_saddr(UnixSocketAddress *saddr,
 
 err:
     g_free(pathbuf);
-    closesocket(sock);
+    close(sock);
     return -1;
 }
 
@@ -1032,25 +1045,6 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp)
     return -1;
 }
 
-#else
-
-static int unix_listen_saddr(UnixSocketAddress *saddr,
-                             int num,
-                             Error **errp)
-{
-    error_setg(errp, "unix sockets are not available on windows");
-    errno = ENOTSUP;
-    return -1;
-}
-
-static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp)
-{
-    error_setg(errp, "unix sockets are not available on windows");
-    errno = ENOTSUP;
-    return -1;
-}
-#endif
-
 /* compatibility wrapper */
 int unix_listen(const char *str, Error **errp)
 {
@@ -1076,6 +1070,26 @@ int unix_connect(const char *path, Error **errp)
     return sock;
 }
 
+char *socket_uri(SocketAddress *addr)
+{
+    switch (addr->type) {
+    case SOCKET_ADDRESS_TYPE_INET:
+        return g_strdup_printf("tcp:%s:%s",
+                               addr->u.inet.host,
+                               addr->u.inet.port);
+    case SOCKET_ADDRESS_TYPE_UNIX:
+        return g_strdup_printf("unix:%s",
+                               addr->u.q_unix.path);
+    case SOCKET_ADDRESS_TYPE_FD:
+        return g_strdup_printf("fd:%s", addr->u.fd.str);
+    case SOCKET_ADDRESS_TYPE_VSOCK:
+        return g_strdup_printf("vsock:%s:%s",
+                               addr->u.vsock.cid,
+                               addr->u.vsock.port);
+    default:
+        return g_strdup("unknown address type");
+    }
+}
 
 SocketAddress *socket_parse(const char *str, Error **errp)
 {
@@ -1103,6 +1117,11 @@ SocketAddress *socket_parse(const char *str, Error **errp)
         if (vsock_parse(&addr->u.vsock, str + strlen("vsock:"), errp)) {
             goto fail;
         }
+    } else if (strstart(str, "tcp:", NULL)) {
+        addr->type = SOCKET_ADDRESS_TYPE_INET;
+        if (inet_parse(&addr->u.inet, str + strlen("tcp:"), errp)) {
+            goto fail;
+        }
     } else {
         addr->type = SOCKET_ADDRESS_TYPE_INET;
         if (inet_parse(&addr->u.inet, str, errp)) {
@@ -1116,14 +1135,10 @@ fail:
     return NULL;
 }
 
-static int socket_get_fd(const char *fdstr, int num, Error **errp)
+static int socket_get_fd(const char *fdstr, Error **errp)
 {
     Monitor *cur_mon = monitor_cur();
     int fd;
-    if (num != 1) {
-        error_setg_errno(errp, EINVAL, "socket_get_fd: too many connections");
-        return -1;
-    }
     if (cur_mon) {
         fd = monitor_get_fd(cur_mon, fdstr, errp);
         if (fd < 0) {
@@ -1145,6 +1160,25 @@ static int socket_get_fd(const char *fdstr, int num, Error **errp)
     return fd;
 }
 
+int socket_address_parse_named_fd(SocketAddress *addr, Error **errp)
+{
+    int fd;
+
+    if (addr->type != SOCKET_ADDRESS_TYPE_FD) {
+        return 0;
+    }
+
+    fd = socket_get_fd(addr->u.fd.str, errp);
+    if (fd < 0) {
+        return fd;
+    }
+
+    g_free(addr->u.fd.str);
+    addr->u.fd.str = g_strdup_printf("%d", fd);
+
+    return 0;
+}
+
 int socket_connect(SocketAddress *addr, Error **errp)
 {
     int fd;
@@ -1159,7 +1193,7 @@ int socket_connect(SocketAddress *addr, Error **errp)
         break;
 
     case SOCKET_ADDRESS_TYPE_FD:
-        fd = socket_get_fd(addr->u.fd.str, 1, errp);
+        fd = socket_get_fd(addr->u.fd.str, errp);
         break;
 
     case SOCKET_ADDRESS_TYPE_VSOCK:
@@ -1187,7 +1221,26 @@ int socket_listen(SocketAddress *addr, int num, Error **errp)
         break;
 
     case SOCKET_ADDRESS_TYPE_FD:
-        fd = socket_get_fd(addr->u.fd.str, num, errp);
+        fd = socket_get_fd(addr->u.fd.str, errp);
+        if (fd < 0) {
+            return -1;
+        }
+
+        /*
+         * If the socket is not yet in the listen state, then transition it to
+         * the listen state now.
+         *
+         * If it's already listening then this updates the backlog value as
+         * requested.
+         *
+         * If this socket cannot listen because it's already in another state
+         * (e.g. unbound or connected) then we'll catch the error here.
+         */
+        if (listen(fd, num) != 0) {
+            error_setg_errno(errp, errno, "Failed to listen on fd socket");
+            close(fd);
+            return -1;
+        }
         break;
 
     case SOCKET_ADDRESS_TYPE_VSOCK:
@@ -1279,7 +1332,6 @@ socket_sockaddr_to_address_inet(struct sockaddr_storage *sa,
 }
 
 
-#ifndef WIN32
 static SocketAddress *
 socket_sockaddr_to_address_unix(struct sockaddr_storage *sa,
                                 socklen_t salen,
@@ -1290,23 +1342,22 @@ socket_sockaddr_to_address_unix(struct sockaddr_storage *sa,
 
     addr = g_new0(SocketAddress, 1);
     addr->type = SOCKET_ADDRESS_TYPE_UNIX;
+    salen -= offsetof(struct sockaddr_un, sun_path);
 #ifdef CONFIG_LINUX
-    if (!su->sun_path[0]) {
+    if (salen > 0 && !su->sun_path[0]) {
         /* Linux abstract socket */
-        addr->u.q_unix.path = g_strndup(su->sun_path + 1,
-                                        sizeof(su->sun_path) - 1);
+        addr->u.q_unix.path = g_strndup(su->sun_path + 1, salen - 1);
         addr->u.q_unix.has_abstract = true;
         addr->u.q_unix.abstract = true;
         addr->u.q_unix.has_tight = true;
-        addr->u.q_unix.tight = salen < sizeof(*su);
+        addr->u.q_unix.tight = salen < sizeof(su->sun_path);
         return addr;
     }
 #endif
 
-    addr->u.q_unix.path = g_strndup(su->sun_path, sizeof(su->sun_path));
+    addr->u.q_unix.path = g_strndup(su->sun_path, salen);
     return addr;
 }
-#endif /* WIN32 */
 
 #ifdef CONFIG_AF_VSOCK
 static SocketAddress *
@@ -1338,10 +1389,8 @@ socket_sockaddr_to_address(struct sockaddr_storage *sa,
     case AF_INET6:
         return socket_sockaddr_to_address_inet(sa, salen, errp);
 
-#ifndef WIN32
     case AF_UNIX:
         return socket_sockaddr_to_address_unix(sa, salen, errp);
-#endif /* WIN32 */
 
 #ifdef CONFIG_AF_VSOCK
     case AF_VSOCK:
@@ -1398,22 +1447,22 @@ SocketAddress *socket_address_flatten(SocketAddressLegacy *addr_legacy)
     addr = g_new(SocketAddress, 1);
 
     switch (addr_legacy->type) {
-    case SOCKET_ADDRESS_LEGACY_KIND_INET:
+    case SOCKET_ADDRESS_TYPE_INET:
         addr->type = SOCKET_ADDRESS_TYPE_INET;
         QAPI_CLONE_MEMBERS(InetSocketAddress, &addr->u.inet,
                            addr_legacy->u.inet.data);
         break;
-    case SOCKET_ADDRESS_LEGACY_KIND_UNIX:
+    case SOCKET_ADDRESS_TYPE_UNIX:
         addr->type = SOCKET_ADDRESS_TYPE_UNIX;
         QAPI_CLONE_MEMBERS(UnixSocketAddress, &addr->u.q_unix,
                            addr_legacy->u.q_unix.data);
         break;
-    case SOCKET_ADDRESS_LEGACY_KIND_VSOCK:
+    case SOCKET_ADDRESS_TYPE_VSOCK:
         addr->type = SOCKET_ADDRESS_TYPE_VSOCK;
         QAPI_CLONE_MEMBERS(VsockSocketAddress, &addr->u.vsock,
                            addr_legacy->u.vsock.data);
         break;
-    case SOCKET_ADDRESS_LEGACY_KIND_FD:
+    case SOCKET_ADDRESS_TYPE_FD:
         addr->type = SOCKET_ADDRESS_TYPE_FD;
         QAPI_CLONE_MEMBERS(String, &addr->u.fd, addr_legacy->u.fd.data);
         break;
index dcff5e7c5d676a357785c9f10f1a955cea7d5cfc..b2e26e21205b631efe57bfc4705d5fb581788f3f 100644 (file)
 #include "qemu/notify.h"
 #include "qemu-thread-common.h"
 #include "qemu/tsan.h"
+#include "qemu/bitmap.h"
+
+#ifdef CONFIG_PTHREAD_SET_NAME_NP
+#include <pthread_np.h>
+#endif
 
 static bool name_threads;
 
@@ -23,7 +28,9 @@ void qemu_thread_naming(bool enable)
 {
     name_threads = enable;
 
-#ifndef CONFIG_THREAD_SETNAME_BYTHREAD
+#if !defined CONFIG_PTHREAD_SETNAME_NP_W_TID && \
+    !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID && \
+    !defined CONFIG_PTHREAD_SET_NAME_NP
     /* This is a debugging option, not fatal */
     if (enable) {
         fprintf(stderr, "qemu: thread naming not supported on this host\n");
@@ -37,12 +44,20 @@ static void error_exit(int err, const char *msg)
     abort();
 }
 
+static inline clockid_t qemu_timedwait_clockid(void)
+{
+#ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK
+    return CLOCK_MONOTONIC;
+#else
+    return CLOCK_REALTIME;
+#endif
+}
+
 static void compute_abs_deadline(struct timespec *ts, int ms)
 {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-    ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000;
-    ts->tv_sec = tv.tv_sec + ms / 1000;
+    clock_gettime(qemu_timedwait_clockid(), ts);
+    ts->tv_nsec += (ms % 1000) * 1000000;
+    ts->tv_sec += ms / 1000;
     if (ts->tv_nsec >= 1000000000) {
         ts->tv_sec++;
         ts->tv_nsec -= 1000000000;
@@ -116,21 +131,57 @@ void qemu_rec_mutex_init(QemuRecMutex *mutex)
 
     pthread_mutexattr_init(&attr);
     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
-    err = pthread_mutex_init(&mutex->lock, &attr);
+    err = pthread_mutex_init(&mutex->m.lock, &attr);
     pthread_mutexattr_destroy(&attr);
     if (err) {
         error_exit(err, __func__);
     }
-    mutex->initialized = true;
+    mutex->m.initialized = true;
+}
+
+void qemu_rec_mutex_destroy(QemuRecMutex *mutex)
+{
+    qemu_mutex_destroy(&mutex->m);
+}
+
+void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line)
+{
+    qemu_mutex_lock_impl(&mutex->m, file, line);
+}
+
+int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line)
+{
+    return qemu_mutex_trylock_impl(&mutex->m, file, line);
+}
+
+void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line)
+{
+    qemu_mutex_unlock_impl(&mutex->m, file, line);
 }
 
 void qemu_cond_init(QemuCond *cond)
 {
+    pthread_condattr_t attr;
     int err;
 
-    err = pthread_cond_init(&cond->cond, NULL);
-    if (err)
+    err = pthread_condattr_init(&attr);
+    if (err) {
+        error_exit(err, __func__);
+    }
+#ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK
+    err = pthread_condattr_setclock(&attr, qemu_timedwait_clockid());
+    if (err) {
+        error_exit(err, __func__);
+    }
+#endif
+    err = pthread_cond_init(&cond->cond, &attr);
+    if (err) {
+        error_exit(err, __func__);
+    }
+    err = pthread_condattr_destroy(&attr);
+    if (err) {
         error_exit(err, __func__);
+    }
     cond->initialized = true;
 }
 
@@ -177,16 +228,15 @@ void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, con
         error_exit(err, __func__);
 }
 
-bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
-                              const char *file, const int line)
+static bool TSA_NO_TSA
+qemu_cond_timedwait_ts(QemuCond *cond, QemuMutex *mutex, struct timespec *ts,
+                       const char *file, const int line)
 {
     int err;
-    struct timespec ts;
 
     assert(cond->initialized);
     trace_qemu_mutex_unlock(mutex, file, line);
-    compute_abs_deadline(&ts, ms);
-    err = pthread_cond_timedwait(&cond->cond, &mutex->lock, &ts);
+    err = pthread_cond_timedwait(&cond->cond, &mutex->lock, ts);
     trace_qemu_mutex_locked(mutex, file, line);
     if (err && err != ETIMEDOUT) {
         error_exit(err, __func__);
@@ -194,152 +244,77 @@ bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
     return err != ETIMEDOUT;
 }
 
+bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms,
+                              const char *file, const int line)
+{
+    struct timespec ts;
+
+    compute_abs_deadline(&ts, ms);
+    return qemu_cond_timedwait_ts(cond, mutex, &ts, file, line);
+}
+
 void qemu_sem_init(QemuSemaphore *sem, int init)
 {
-    int rc;
+    qemu_mutex_init(&sem->mutex);
+    qemu_cond_init(&sem->cond);
 
-#ifndef CONFIG_SEM_TIMEDWAIT
-    rc = pthread_mutex_init(&sem->lock, NULL);
-    if (rc != 0) {
-        error_exit(rc, __func__);
-    }
-    rc = pthread_cond_init(&sem->cond, NULL);
-    if (rc != 0) {
-        error_exit(rc, __func__);
-    }
     if (init < 0) {
         error_exit(EINVAL, __func__);
     }
     sem->count = init;
-#else
-    rc = sem_init(&sem->sem, 0, init);
-    if (rc < 0) {
-        error_exit(errno, __func__);
-    }
-#endif
-    sem->initialized = true;
 }
 
 void qemu_sem_destroy(QemuSemaphore *sem)
 {
-    int rc;
-
-    assert(sem->initialized);
-    sem->initialized = false;
-#ifndef CONFIG_SEM_TIMEDWAIT
-    rc = pthread_cond_destroy(&sem->cond);
-    if (rc < 0) {
-        error_exit(rc, __func__);
-    }
-    rc = pthread_mutex_destroy(&sem->lock);
-    if (rc < 0) {
-        error_exit(rc, __func__);
-    }
-#else
-    rc = sem_destroy(&sem->sem);
-    if (rc < 0) {
-        error_exit(errno, __func__);
-    }
-#endif
+    qemu_cond_destroy(&sem->cond);
+    qemu_mutex_destroy(&sem->mutex);
 }
 
 void qemu_sem_post(QemuSemaphore *sem)
 {
-    int rc;
-
-    assert(sem->initialized);
-#ifndef CONFIG_SEM_TIMEDWAIT
-    pthread_mutex_lock(&sem->lock);
+    qemu_mutex_lock(&sem->mutex);
     if (sem->count == UINT_MAX) {
-        rc = EINVAL;
+        error_exit(EINVAL, __func__);
     } else {
         sem->count++;
-        rc = pthread_cond_signal(&sem->cond);
-    }
-    pthread_mutex_unlock(&sem->lock);
-    if (rc != 0) {
-        error_exit(rc, __func__);
+        qemu_cond_signal(&sem->cond);
     }
-#else
-    rc = sem_post(&sem->sem);
-    if (rc < 0) {
-        error_exit(errno, __func__);
-    }
-#endif
+    qemu_mutex_unlock(&sem->mutex);
 }
 
 int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
 {
-    int rc;
+    bool rc = true;
     struct timespec ts;
 
-    assert(sem->initialized);
-#ifndef CONFIG_SEM_TIMEDWAIT
-    rc = 0;
     compute_abs_deadline(&ts, ms);
-    pthread_mutex_lock(&sem->lock);
+    qemu_mutex_lock(&sem->mutex);
     while (sem->count == 0) {
-        rc = pthread_cond_timedwait(&sem->cond, &sem->lock, &ts);
-        if (rc == ETIMEDOUT) {
-            break;
+        if (ms == 0) {
+            rc = false;
+        } else {
+            rc = qemu_cond_timedwait_ts(&sem->cond, &sem->mutex, &ts,
+                                        __FILE__, __LINE__);
         }
-        if (rc != 0) {
-            error_exit(rc, __func__);
+        if (!rc) { /* timeout */
+            break;
         }
     }
-    if (rc != ETIMEDOUT) {
+    if (rc) {
         --sem->count;
     }
-    pthread_mutex_unlock(&sem->lock);
-    return (rc == ETIMEDOUT ? -1 : 0);
-#else
-    if (ms <= 0) {
-        /* This is cheaper than sem_timedwait.  */
-        do {
-            rc = sem_trywait(&sem->sem);
-        } while (rc == -1 && errno == EINTR);
-        if (rc == -1 && errno == EAGAIN) {
-            return -1;
-        }
-    } else {
-        compute_abs_deadline(&ts, ms);
-        do {
-            rc = sem_timedwait(&sem->sem, &ts);
-        } while (rc == -1 && errno == EINTR);
-        if (rc == -1 && errno == ETIMEDOUT) {
-            return -1;
-        }
-    }
-    if (rc < 0) {
-        error_exit(errno, __func__);
-    }
-    return 0;
-#endif
+    qemu_mutex_unlock(&sem->mutex);
+    return (rc ? 0 : -1);
 }
 
 void qemu_sem_wait(QemuSemaphore *sem)
 {
-    int rc;
-
-    assert(sem->initialized);
-#ifndef CONFIG_SEM_TIMEDWAIT
-    pthread_mutex_lock(&sem->lock);
+    qemu_mutex_lock(&sem->mutex);
     while (sem->count == 0) {
-        rc = pthread_cond_wait(&sem->cond, &sem->lock);
-        if (rc != 0) {
-            error_exit(rc, __func__);
-        }
+        qemu_cond_wait(&sem->cond, &sem->mutex);
     }
     --sem->count;
-    pthread_mutex_unlock(&sem->lock);
-#else
-    do {
-        rc = sem_wait(&sem->sem);
-    } while (rc == -1 && errno == EINTR);
-    if (rc < 0) {
-        error_exit(errno, __func__);
-    }
-#endif
+    qemu_mutex_unlock(&sem->mutex);
 }
 
 #ifdef __linux__
@@ -409,13 +384,21 @@ void qemu_event_destroy(QemuEvent *ev)
 
 void qemu_event_set(QemuEvent *ev)
 {
-    /* qemu_event_set has release semantics, but because it *loads*
+    assert(ev->initialized);
+
+    /*
+     * Pairs with both qemu_event_reset() and qemu_event_wait().
+     *
+     * qemu_event_set has release semantics, but because it *loads*
      * ev->value we need a full memory barrier here.
      */
-    assert(ev->initialized);
     smp_mb();
     if (qatomic_read(&ev->value) != EV_SET) {
-        if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+        int old = qatomic_xchg(&ev->value, EV_SET);
+
+        /* Pairs with memory barrier in kernel futex_wait system call.  */
+        smp_mb__after_rmw();
+        if (old == EV_BUSY) {
             /* There were waiters, wake them up.  */
             qemu_futex_wake(ev, INT_MAX);
         }
@@ -424,18 +407,19 @@ void qemu_event_set(QemuEvent *ev)
 
 void qemu_event_reset(QemuEvent *ev)
 {
-    unsigned value;
-
     assert(ev->initialized);
-    value = qatomic_read(&ev->value);
-    smp_mb_acquire();
-    if (value == EV_SET) {
-        /*
-         * If there was a concurrent reset (or even reset+wait),
-         * do nothing.  Otherwise change EV_SET->EV_FREE.
-         */
-        qatomic_or(&ev->value, EV_FREE);
-    }
+
+    /*
+     * If there was a concurrent reset (or even reset+wait),
+     * do nothing.  Otherwise change EV_SET->EV_FREE.
+     */
+    qatomic_or(&ev->value, EV_FREE);
+
+    /*
+     * Order reset before checking the condition in the caller.
+     * Pairs with the first memory barrier in qemu_event_set().
+     */
+    smp_mb__after_rmw();
 }
 
 void qemu_event_wait(QemuEvent *ev)
@@ -443,20 +427,40 @@ void qemu_event_wait(QemuEvent *ev)
     unsigned value;
 
     assert(ev->initialized);
-    value = qatomic_read(&ev->value);
-    smp_mb_acquire();
+
+    /*
+     * qemu_event_wait must synchronize with qemu_event_set even if it does
+     * not go down the slow path, so this load-acquire is needed that
+     * synchronizes with the first memory barrier in qemu_event_set().
+     *
+     * If we do go down the slow path, there is no requirement at all: we
+     * might miss a qemu_event_set() here but ultimately the memory barrier in
+     * qemu_futex_wait() will ensure the check is done correctly.
+     */
+    value = qatomic_load_acquire(&ev->value);
     if (value != EV_SET) {
         if (value == EV_FREE) {
             /*
-             * Leave the event reset and tell qemu_event_set that there
-             * are waiters.  No need to retry, because there cannot be
-             * a concurrent busy->free transition.  After the CAS, the
-             * event will be either set or busy.
+             * Leave the event reset and tell qemu_event_set that there are
+             * waiters.  No need to retry, because there cannot be a concurrent
+             * busy->free transition.  After the CAS, the event will be either
+             * set or busy.
+             *
+             * This cmpxchg doesn't have particular ordering requirements if it
+             * succeeds (moving the store earlier can only cause qemu_event_set()
+             * to issue _more_ wakeups), the failing case needs acquire semantics
+             * like the load above.
              */
             if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
                 return;
             }
         }
+
+        /*
+         * This is the final check for a concurrent set, so it does need
+         * a smp_mb() pairing with the second barrier of qemu_event_set().
+         * The barrier is inside the FUTEX_WAIT system call.
+         */
         qemu_futex_wait(ev, EV_BUSY);
     }
 }
@@ -502,7 +506,6 @@ static void *qemu_thread_start(void *args)
     void *arg = qemu_thread_args->arg;
     void *r;
 
-#ifdef CONFIG_THREAD_SETNAME_BYTHREAD
     /* Attempt to set the threads name; note that this is for debug, so
      * we're not going to fail if we can't set it.
      */
@@ -511,15 +514,35 @@ static void *qemu_thread_start(void *args)
         pthread_setname_np(pthread_self(), qemu_thread_args->name);
 # elif defined(CONFIG_PTHREAD_SETNAME_NP_WO_TID)
         pthread_setname_np(qemu_thread_args->name);
+# elif defined(CONFIG_PTHREAD_SET_NAME_NP)
+        pthread_set_name_np(pthread_self(), qemu_thread_args->name);
 # endif
     }
-#endif
     QEMU_TSAN_ANNOTATE_THREAD_NAME(qemu_thread_args->name);
     g_free(qemu_thread_args->name);
     g_free(qemu_thread_args);
+
+    /*
+     * GCC 11 with glibc 2.17 on PowerPC reports
+     *
+     * qemu-thread-posix.c:540:5: error: ‘__sigsetjmp’ accessing 656 bytes
+     *   in a region of size 528 [-Werror=stringop-overflow=]
+     * 540 |     pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
+     *     |     ^~~~~~~~~~~~~~~~~~~~
+     *
+     * which is clearly nonsense.
+     */
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+#endif
+
     pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
     r = start_routine(arg);
     pthread_cleanup_pop(1);
+
+#pragma GCC diagnostic pop
+
     return r;
 }
 
@@ -566,6 +589,75 @@ void qemu_thread_create(QemuThread *thread, const char *name,
     pthread_attr_destroy(&attr);
 }
 
+int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
+                             unsigned long nbits)
+{
+#if defined(CONFIG_PTHREAD_AFFINITY_NP)
+    const size_t setsize = CPU_ALLOC_SIZE(nbits);
+    unsigned long value;
+    cpu_set_t *cpuset;
+    int err;
+
+    cpuset = CPU_ALLOC(nbits);
+    g_assert(cpuset);
+
+    CPU_ZERO_S(setsize, cpuset);
+    value = find_first_bit(host_cpus, nbits);
+    while (value < nbits) {
+        CPU_SET_S(value, setsize, cpuset);
+        value = find_next_bit(host_cpus, nbits, value + 1);
+    }
+
+    err = pthread_setaffinity_np(thread->thread, setsize, cpuset);
+    CPU_FREE(cpuset);
+    return err;
+#else
+    return -ENOSYS;
+#endif
+}
+
+int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
+                             unsigned long *nbits)
+{
+#if defined(CONFIG_PTHREAD_AFFINITY_NP)
+    unsigned long tmpbits;
+    cpu_set_t *cpuset;
+    size_t setsize;
+    int i, err;
+
+    tmpbits = CPU_SETSIZE;
+    while (true) {
+        setsize = CPU_ALLOC_SIZE(tmpbits);
+        cpuset = CPU_ALLOC(tmpbits);
+        g_assert(cpuset);
+
+        err = pthread_getaffinity_np(thread->thread, setsize, cpuset);
+        if (err) {
+            CPU_FREE(cpuset);
+            if (err != -EINVAL) {
+                return err;
+            }
+            tmpbits *= 2;
+        } else {
+            break;
+        }
+    }
+
+    /* Convert the result into a proper bitmap. */
+    *nbits = tmpbits;
+    *host_cpus = bitmap_new(tmpbits);
+    for (i = 0; i < tmpbits; i++) {
+        if (CPU_ISSET(i, cpuset)) {
+            set_bit(i, *host_cpus);
+        }
+    }
+    CPU_FREE(cpuset);
+    return 0;
+#else
+    return -ENOSYS;
+#endif
+}
+
 void qemu_thread_get_self(QemuThread *thread)
 {
     thread->thread = pthread_self();
index cb5aa2018c3ada10e9af087bdd729254ef3c7c5d..a7fe3cc345f087687822e32043a3eabebf1aad50 100644 (file)
@@ -12,7 +12,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu-common.h"
 #include "qemu/thread.h"
 #include "qemu/notify.h"
 #include "qemu-thread-common.h"
 
 static bool name_threads;
 
+typedef HRESULT (WINAPI *pSetThreadDescription) (HANDLE hThread,
+                                                 PCWSTR lpThreadDescription);
+static pSetThreadDescription SetThreadDescriptionFunc;
+static HMODULE kernel32_module;
+
+static bool load_set_thread_description(void)
+{
+    static gsize _init_once = 0;
+
+    if (g_once_init_enter(&_init_once)) {
+        kernel32_module = LoadLibrary("kernel32.dll");
+        if (kernel32_module) {
+            SetThreadDescriptionFunc =
+                (pSetThreadDescription)GetProcAddress(kernel32_module,
+                                                      "SetThreadDescription");
+            if (!SetThreadDescriptionFunc) {
+                FreeLibrary(kernel32_module);
+            }
+        }
+        g_once_init_leave(&_init_once, 1);
+    }
+
+    return !!SetThreadDescriptionFunc;
+}
+
 void qemu_thread_naming(bool enable)
 {
-    /* But note we don't actually name them on Windows yet */
     name_threads = enable;
 
-    fprintf(stderr, "qemu: thread naming not supported on this host\n");
+    if (enable && !load_set_thread_description()) {
+        fprintf(stderr, "qemu: thread naming not supported on this host\n");
+        name_threads = false;
+    }
 }
 
 static void error_exit(int err, const char *msg)
@@ -105,7 +131,7 @@ int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line)
     return !TryEnterCriticalSection(&mutex->lock);
 }
 
-void qemu_rec_mutex_unlock(QemuRecMutex *mutex)
+void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line)
 {
     assert(mutex->initialized);
     LeaveCriticalSection(&mutex->lock);
@@ -246,12 +272,20 @@ void qemu_event_destroy(QemuEvent *ev)
 void qemu_event_set(QemuEvent *ev)
 {
     assert(ev->initialized);
-    /* qemu_event_set has release semantics, but because it *loads*
+
+    /*
+     * Pairs with both qemu_event_reset() and qemu_event_wait().
+     *
+     * qemu_event_set has release semantics, but because it *loads*
      * ev->value we need a full memory barrier here.
      */
     smp_mb();
     if (qatomic_read(&ev->value) != EV_SET) {
-        if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+        int old = qatomic_xchg(&ev->value, EV_SET);
+
+        /* Pairs with memory barrier after ResetEvent.  */
+        smp_mb__after_rmw();
+        if (old == EV_BUSY) {
             /* There were waiters, wake them up.  */
             SetEvent(ev->event);
         }
@@ -260,17 +294,19 @@ void qemu_event_set(QemuEvent *ev)
 
 void qemu_event_reset(QemuEvent *ev)
 {
-    unsigned value;
-
     assert(ev->initialized);
-    value = qatomic_read(&ev->value);
-    smp_mb_acquire();
-    if (value == EV_SET) {
-        /* If there was a concurrent reset (or even reset+wait),
-         * do nothing.  Otherwise change EV_SET->EV_FREE.
-         */
-        qatomic_or(&ev->value, EV_FREE);
-    }
+
+    /*
+     * If there was a concurrent reset (or even reset+wait),
+     * do nothing.  Otherwise change EV_SET->EV_FREE.
+     */
+    qatomic_or(&ev->value, EV_FREE);
+
+    /*
+     * Order reset before checking the condition in the caller.
+     * Pairs with the first memory barrier in qemu_event_set().
+     */
+    smp_mb__after_rmw();
 }
 
 void qemu_event_wait(QemuEvent *ev)
@@ -278,29 +314,49 @@ void qemu_event_wait(QemuEvent *ev)
     unsigned value;
 
     assert(ev->initialized);
-    value = qatomic_read(&ev->value);
-    smp_mb_acquire();
+
+    /*
+     * qemu_event_wait must synchronize with qemu_event_set even if it does
+     * not go down the slow path, so this load-acquire is needed that
+     * synchronizes with the first memory barrier in qemu_event_set().
+     *
+     * If we do go down the slow path, there is no requirement at all: we
+     * might miss a qemu_event_set() here but ultimately the memory barrier in
+     * qemu_futex_wait() will ensure the check is done correctly.
+     */
+    value = qatomic_load_acquire(&ev->value);
     if (value != EV_SET) {
         if (value == EV_FREE) {
-            /* qemu_event_set is not yet going to call SetEvent, but we are
-             * going to do another check for EV_SET below when setting EV_BUSY.
-             * At that point it is safe to call WaitForSingleObject.
+            /*
+             * Here the underlying kernel event is reset, but qemu_event_set is
+             * not yet going to call SetEvent.  However, there will be another
+             * check for EV_SET below when setting EV_BUSY.  At that point it
+             * is safe to call WaitForSingleObject.
              */
             ResetEvent(ev->event);
 
-            /* Tell qemu_event_set that there are waiters.  No need to retry
-             * because there cannot be a concurrent busy->free transition.
-             * After the CAS, the event will be either set or busy.
+            /*
+             * It is not clear whether ResetEvent provides this barrier; kernel
+             * APIs (KeResetEvent/KeClearEvent) do not.  Better safe than sorry!
+             */
+            smp_mb();
+
+            /*
+             * Leave the event reset and tell qemu_event_set that there are
+             * waiters.  No need to retry, because there cannot be a concurrent
+             * busy->free transition.  After the CAS, the event will be either
+             * set or busy.
              */
             if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
-                value = EV_SET;
-            } else {
-                value = EV_BUSY;
+                return;
             }
         }
-        if (value == EV_BUSY) {
-            WaitForSingleObject(ev->event, INFINITE);
-        }
+
+        /*
+         * ev->value is now EV_BUSY.  Since we didn't observe EV_SET,
+         * qemu_event_set() must observe EV_BUSY and call SetEvent().
+         */
+        WaitForSingleObject(ev->event, INFINITE);
     }
 }
 
@@ -401,6 +457,25 @@ void *qemu_thread_join(QemuThread *thread)
     return ret;
 }
 
+static bool set_thread_description(HANDLE h, const char *name)
+{
+    HRESULT hr;
+    g_autofree wchar_t *namew = NULL;
+
+    if (!load_set_thread_description()) {
+        return false;
+    }
+
+    namew = g_utf8_to_utf16(name, -1, NULL, NULL, NULL);
+    if (!namew) {
+        return false;
+    }
+
+    hr = SetThreadDescriptionFunc(h, namew);
+
+    return SUCCEEDED(hr);
+}
+
 void qemu_thread_create(QemuThread *thread, const char *name,
                        void *(*start_routine)(void *),
                        void *arg, int mode)
@@ -424,10 +499,26 @@ void qemu_thread_create(QemuThread *thread, const char *name,
     if (!hThread) {
         error_exit(GetLastError(), __func__);
     }
+    if (name_threads && name && !set_thread_description(hThread, name)) {
+        fprintf(stderr, "qemu: failed to set thread description: %s\n", name);
+    }
     CloseHandle(hThread);
+
     thread->data = data;
 }
 
+int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
+                             unsigned long nbits)
+{
+    return -ENOSYS;
+}
+
+int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
+                             unsigned long *nbits)
+{
+    return -ENOSYS;
+}
+
 void qemu_thread_get_self(QemuThread *thread)
 {
     thread->data = qemu_thread_data;
index baf3317f7456e6d3f2d16bd244f03ba3f04cd467..cc1326f72646c4063adb691b4dc9260ecd172aaf 100644 (file)
@@ -27,6 +27,8 @@
 /***********************************************************/
 /* real time host monotonic timer */
 
+int64_t clock_start;
+
 #ifdef _WIN32
 
 int64_t clock_freq;
@@ -41,6 +43,7 @@ static void __attribute__((constructor)) init_get_clock(void)
         exit(1);
     }
     clock_freq = freq.QuadPart;
+    clock_start = get_clock();
 }
 
 #else
@@ -55,5 +58,6 @@ static void __attribute__((constructor)) init_get_clock(void)
     if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
         use_rt_clock = 1;
     }
+    clock_start = get_clock();
 }
 #endif
index 81c28af517e2a0aa399d854eb97d9f411138b5f4..6a0de33dd2b86c339dc9dc5abe6548e44c918cb4 100644 (file)
@@ -29,7 +29,6 @@
 #include "sysemu/cpu-timers.h"
 #include "sysemu/replay.h"
 #include "sysemu/cpus.h"
-#include "sysemu/qtest.h"
 
 #ifdef CONFIG_POSIX
 #include <pthread.h>
@@ -101,7 +100,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
     QEMUTimerList *timer_list;
     QEMUClock *clock = qemu_clock_ptr(type);
 
-    timer_list = g_malloc0(sizeof(QEMUTimerList));
+    timer_list = g_new0(QEMUTimerList, 1);
     qemu_event_init(&timer_list->timers_done_ev, true);
     timer_list->clock = clock;
     timer_list->notify_cb = cb;
@@ -262,6 +261,9 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
     }
 
     QLIST_FOREACH(timer_list, &clock->timerlists, list) {
+        if (!qatomic_read(&timer_list->active_timers)) {
+            continue;
+        }
         qemu_mutex_lock(&timer_list->active_timers_lock);
         ts = timer_list->active_timers;
         /* Skip all external timers */
index 152b8d74f0b12cf4ed75e543575f742d61c4b563..92c6b787593d60128bd42561d65ea15b4a37c56d 100644 (file)
@@ -69,6 +69,7 @@
 #include "qemu/qht.h"
 #include "qemu/atomic.h"
 #include "qemu/rcu.h"
+#include "qemu/memalign.h"
 
 //#define QHT_DEBUG
 
@@ -150,6 +151,22 @@ struct qht_bucket {
 
 QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
 
+/*
+ * Under TSAN, we use striped locks instead of one lock per bucket chain.
+ * This avoids crashing under TSAN, since TSAN aborts the program if more than
+ * 64 locks are held (this is a hardcoded limit in TSAN).
+ * When resizing a QHT we grab all the buckets' locks, which can easily
+ * go over TSAN's limit. By using striped locks, we avoid this problem.
+ *
+ * Note: this number must be a power of two for easy index computation.
+ */
+#define QHT_TSAN_BUCKET_LOCKS_BITS 4
+#define QHT_TSAN_BUCKET_LOCKS (1 << QHT_TSAN_BUCKET_LOCKS_BITS)
+
+struct qht_tsan_lock {
+    QemuSpin lock;
+} QEMU_ALIGNED(QHT_BUCKET_ALIGN);
+
 /**
  * struct qht_map - structure to track an array of buckets
  * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind
@@ -159,6 +176,7 @@ QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
  * @n_added_buckets: number of added (i.e. "non-head") buckets
  * @n_added_buckets_threshold: threshold to trigger an upward resize once the
  *                             number of added buckets surpasses it.
+ * @tsan_bucket_locks: Array of striped locks to be used only under TSAN.
  *
  * Buckets are tracked in what we call a "map", i.e. this structure.
  */
@@ -168,6 +186,9 @@ struct qht_map {
     size_t n_buckets;
     size_t n_added_buckets;
     size_t n_added_buckets_threshold;
+#ifdef CONFIG_TSAN
+    struct qht_tsan_lock tsan_bucket_locks[QHT_TSAN_BUCKET_LOCKS];
+#endif
 };
 
 /* trigger a resize when n_added_buckets > n_buckets / div */
@@ -228,10 +249,56 @@ static inline size_t qht_elems_to_buckets(size_t n_elems)
     return pow2ceil(n_elems / QHT_BUCKET_ENTRIES);
 }
 
-static inline void qht_head_init(struct qht_bucket *b)
+/*
+ * When using striped locks (i.e. under TSAN), we have to be careful not
+ * to operate on the same lock twice (e.g. when iterating through all buckets).
+ * We achieve this by operating only on each stripe's first matching lock.
+ */
+static inline void qht_do_if_first_in_stripe(struct qht_map *map,
+                                             struct qht_bucket *b,
+                                             void (*func)(QemuSpin *spin))
+{
+#ifdef CONFIG_TSAN
+    unsigned long bucket_idx = b - map->buckets;
+    bool is_first_in_stripe = (bucket_idx >> QHT_TSAN_BUCKET_LOCKS_BITS) == 0;
+    if (is_first_in_stripe) {
+        unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1);
+        func(&map->tsan_bucket_locks[lock_idx].lock);
+    }
+#else
+    func(&b->lock);
+#endif
+}
+
+static inline void qht_bucket_lock_do(struct qht_map *map,
+                                      struct qht_bucket *b,
+                                      void (*func)(QemuSpin *lock))
+{
+#ifdef CONFIG_TSAN
+    unsigned long bucket_idx = b - map->buckets;
+    unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1);
+    func(&map->tsan_bucket_locks[lock_idx].lock);
+#else
+    func(&b->lock);
+#endif
+}
+
+static inline void qht_bucket_lock(struct qht_map *map,
+                                   struct qht_bucket *b)
+{
+    qht_bucket_lock_do(map, b, qemu_spin_lock);
+}
+
+static inline void qht_bucket_unlock(struct qht_map *map,
+                                     struct qht_bucket *b)
+{
+    qht_bucket_lock_do(map, b, qemu_spin_unlock);
+}
+
+static inline void qht_head_init(struct qht_map *map, struct qht_bucket *b)
 {
     memset(b, 0, sizeof(*b));
-    qemu_spin_init(&b->lock);
+    qht_do_if_first_in_stripe(map, b, qemu_spin_init);
     seqlock_init(&b->sequence);
 }
 
@@ -249,7 +316,7 @@ static void qht_map_lock_buckets(struct qht_map *map)
     for (i = 0; i < map->n_buckets; i++) {
         struct qht_bucket *b = &map->buckets[i];
 
-        qemu_spin_lock(&b->lock);
+        qht_do_if_first_in_stripe(map, b, qemu_spin_lock);
     }
 }
 
@@ -260,7 +327,7 @@ static void qht_map_unlock_buckets(struct qht_map *map)
     for (i = 0; i < map->n_buckets; i++) {
         struct qht_bucket *b = &map->buckets[i];
 
-        qemu_spin_unlock(&b->lock);
+        qht_do_if_first_in_stripe(map, b, qemu_spin_unlock);
     }
 }
 
@@ -307,7 +374,7 @@ void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap)
  * Get a head bucket and lock it, making sure its parent map is not stale.
  * @pmap is filled with a pointer to the bucket's parent map.
  *
- * Unlock with qemu_spin_unlock(&b->lock).
+ * Unlock with qht_bucket_unlock.
  *
  * Note: callers cannot have ht->lock held.
  */
@@ -321,18 +388,18 @@ struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash,
     map = qatomic_rcu_read(&ht->map);
     b = qht_map_to_bucket(map, hash);
 
-    qemu_spin_lock(&b->lock);
+    qht_bucket_lock(map, b);
     if (likely(!qht_map_is_stale__locked(ht, map))) {
         *pmap = map;
         return b;
     }
-    qemu_spin_unlock(&b->lock);
+    qht_bucket_unlock(map, b);
 
     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
     qht_lock(ht);
     map = ht->map;
     b = qht_map_to_bucket(map, hash);
-    qemu_spin_lock(&b->lock);
+    qht_bucket_lock(map, b);
     qht_unlock(ht);
     *pmap = map;
     return b;
@@ -344,12 +411,13 @@ static inline bool qht_map_needs_resize(const struct qht_map *map)
            map->n_added_buckets_threshold;
 }
 
-static inline void qht_chain_destroy(const struct qht_bucket *head)
+static inline void qht_chain_destroy(struct qht_map *map,
+                                     struct qht_bucket *head)
 {
     struct qht_bucket *curr = head->next;
     struct qht_bucket *prev;
 
-    qemu_spin_destroy(&head->lock);
+    qht_do_if_first_in_stripe(map, head, qemu_spin_destroy);
     while (curr) {
         prev = curr;
         curr = curr->next;
@@ -363,7 +431,7 @@ static void qht_map_destroy(struct qht_map *map)
     size_t i;
 
     for (i = 0; i < map->n_buckets; i++) {
-        qht_chain_destroy(&map->buckets[i]);
+        qht_chain_destroy(map, &map->buckets[i]);
     }
     qemu_vfree(map->buckets);
     g_free(map);
@@ -389,7 +457,7 @@ static struct qht_map *qht_map_create(size_t n_buckets)
     map->buckets = qemu_memalign(QHT_BUCKET_ALIGN,
                                  sizeof(*map->buckets) * n_buckets);
     for (i = 0; i < n_buckets; i++) {
-        qht_head_init(&map->buckets[i]);
+        qht_head_init(map, &map->buckets[i]);
     }
     return map;
 }
@@ -485,10 +553,10 @@ bool qht_reset_size(struct qht *ht, size_t n_elems)
 }
 
 static inline
-void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func,
+void *qht_do_lookup(const struct qht_bucket *head, qht_lookup_func_t func,
                     const void *userp, uint32_t hash)
 {
-    struct qht_bucket *b = head;
+    const struct qht_bucket *b = head;
     int i;
 
     do {
@@ -512,7 +580,7 @@ void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func,
 }
 
 static __attribute__((noinline))
-void *qht_lookup__slowpath(struct qht_bucket *b, qht_lookup_func_t func,
+void *qht_lookup__slowpath(const struct qht_bucket *b, qht_lookup_func_t func,
                            const void *userp, uint32_t hash)
 {
     unsigned int version;
@@ -525,10 +593,10 @@ void *qht_lookup__slowpath(struct qht_bucket *b, qht_lookup_func_t func,
     return ret;
 }
 
-void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash,
+void *qht_lookup_custom(const struct qht *ht, const void *userp, uint32_t hash,
                         qht_lookup_func_t func)
 {
-    struct qht_bucket *b;
+    const struct qht_bucket *b;
     const struct qht_map *map;
     unsigned int version;
     void *ret;
@@ -548,7 +616,7 @@ void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash,
     return qht_lookup__slowpath(b, func, userp, hash);
 }
 
-void *qht_lookup(struct qht *ht, const void *userp, uint32_t hash)
+void *qht_lookup(const struct qht *ht, const void *userp, uint32_t hash)
 {
     return qht_lookup_custom(ht, userp, hash, ht->cmp);
 }
@@ -637,7 +705,7 @@ bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing)
     b = qht_bucket_lock__no_stale(ht, hash, &map);
     prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize);
     qht_bucket_debug__locked(b);
-    qemu_spin_unlock(&b->lock);
+    qht_bucket_unlock(map, b);
 
     if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) {
         qht_grow_maybe(ht);
@@ -687,7 +755,7 @@ static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos)
     int i;
 
     if (qht_entry_is_last(orig, pos)) {
-        orig->hashes[pos] = 0;
+        qatomic_set(&orig->hashes[pos], 0);
         qatomic_set(&orig->pointers[pos], NULL);
         return;
     }
@@ -748,7 +816,7 @@ bool qht_remove(struct qht *ht, const void *p, uint32_t hash)
     b = qht_bucket_lock__no_stale(ht, hash, &map);
     ret = qht_remove__locked(b, p, hash);
     qht_bucket_debug__locked(b);
-    qemu_spin_unlock(&b->lock);
+    qht_bucket_unlock(map, b);
     return ret;
 }
 
@@ -901,9 +969,9 @@ bool qht_resize(struct qht *ht, size_t n_elems)
 }
 
 /* pass @stats to qht_statistics_destroy() when done */
-void qht_statistics_init(struct qht *ht, struct qht_stats *stats)
+void qht_statistics_init(const struct qht *ht, struct qht_stats *stats)
 {
-    struct qht_map *map;
+    const struct qht_map *map;
     int i;
 
     map = qatomic_rcu_read(&ht->map);
@@ -920,8 +988,8 @@ void qht_statistics_init(struct qht *ht, struct qht_stats *stats)
     stats->head_buckets = map->n_buckets;
 
     for (i = 0; i < map->n_buckets; i++) {
-        struct qht_bucket *head = &map->buckets[i];
-        struct qht_bucket *b;
+        const struct qht_bucket *head = &map->buckets[i];
+        const struct qht_bucket *b;
         unsigned int version;
         size_t buckets;
         size_t entries;
index bacc5fa2f6f21d752259b70696f161c1cd08e9a6..2fe3764906c53c53ec08572fc165c6f441241e19 100644 (file)
@@ -83,8 +83,8 @@ typedef struct QSPCallSite QSPCallSite;
 struct QSPEntry {
     void *thread_ptr;
     const QSPCallSite *callsite;
-    uint64_t n_acqs;
-    uint64_t ns;
+    aligned_uint64_t n_acqs;
+    aligned_uint64_t ns;
     unsigned int n_objs; /* count of coalesced objs; only used for reporting */
 };
 typedef struct QSPEntry QSPEntry;
@@ -144,7 +144,7 @@ uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t ab)
     uint32_t e = callsite->line;
     uint32_t f = callsite->type;
 
-    return qemu_xxhash6(ab, cd, e, f);
+    return qemu_xxhash8(ab, cd, 0, e, f);
 }
 
 static inline
index a1095691f652fb8b77f83656ebc2e19cc25192e0..31f0b46182e8b1f183f4e2809d1bee8864671a9e 100644 (file)
@@ -310,7 +310,7 @@ q_tree_node_next(QTreeNode *node)
  *
  * Since: 2.70 in GLib. Internal in Qtree, i.e. not in the public API.
  */
-static void
+static void QEMU_DISABLE_CFI
 q_tree_remove_all(QTree *tree)
 {
     QTreeNode *node;
@@ -532,7 +532,7 @@ q_tree_replace(QTree    *tree,
 }
 
 /* internal insert routine */
-static QTreeNode *
+static QTreeNode * QEMU_DISABLE_CFI
 q_tree_insert_internal(QTree    *tree,
                        gpointer  key,
                        gpointer  value,
@@ -721,7 +721,7 @@ q_tree_steal(QTree         *tree,
 }
 
 /* internal remove routine */
-static gboolean
+static gboolean QEMU_DISABLE_CFI
 q_tree_remove_internal(QTree         *tree,
                        gconstpointer  key,
                        gboolean       steal)
@@ -1182,7 +1182,7 @@ q_tree_node_balance(QTreeNode *node)
     return node;
 }
 
-static QTreeNode *
+static QTreeNode * QEMU_DISABLE_CFI
 q_tree_find_node(QTree        *tree,
                  gconstpointer key)
 {
@@ -1388,4 +1388,3 @@ static void q_tree_node_check(QTreeNode *node)
     }
 }
 #endif
-
index 098d9d2dc0b7bd791e9e6bc7d52aabaafe175c24..f3f40098d547b99af662c0ec89f5eb312d296167 100644 (file)
 #include "qemu/osdep.h"
 #include "qemu/range.h"
 
-/*
- * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap.
- * Both @a and @b must not be empty.
- */
-static inline int range_compare(Range *a, Range *b)
+int range_compare(Range *a, Range *b)
 {
     assert(!range_is_empty(a) && !range_is_empty(b));
 
@@ -70,3 +66,58 @@ GList *range_list_insert(GList *list, Range *data)
 
     return list;
 }
+
+static inline
+GList *append_new_range(GList *list, uint64_t lob, uint64_t upb)
+{
+    Range *new = g_new0(Range, 1);
+
+    range_set_bounds(new, lob, upb);
+    return g_list_append(list, new);
+}
+
+
+void range_inverse_array(GList *in, GList **rev,
+                         uint64_t low, uint64_t high)
+{
+    Range *r, *rn;
+    GList *l = in, *out = *rev;
+
+    for (l = in; l && range_upb(l->data) < low; l = l->next) {
+        continue;
+    }
+
+    if (!l) {
+        out = append_new_range(out, low, high);
+        goto exit;
+    }
+    r = (Range *)l->data;
+
+    /* first range lob is greater than min, insert a first range */
+    if (range_lob(r) > low) {
+        out = append_new_range(out, low, MIN(range_lob(r) - 1, high));
+    }
+
+    /* insert a range in between each original range until we reach high */
+    for (; l->next; l = l->next) {
+        r = (Range *)l->data;
+        rn = (Range *)l->next->data;
+        if (range_lob(r) >= high) {
+            goto exit;
+        }
+        if (range_compare(r, rn)) {
+            out = append_new_range(out, range_upb(r) + 1,
+                                   MIN(range_lob(rn) - 1, high));
+        }
+    }
+
+    /* last range */
+    r = (Range *)l->data;
+
+    /* last range upb is less than max, insert a last range */
+    if (range_upb(r) <  high) {
+        out = append_new_range(out, range_upb(r) + 1, high);
+    }
+exit:
+    *rev = out;
+}
index 13ac0f75cb2a7de3b61f13174fb545b8fd08e314..e587bcc48314b81a5a79bc5c399e88a3cefec245 100644 (file)
@@ -46,6 +46,7 @@
 unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
 
 QemuEvent rcu_gp_event;
+static int in_drain_call_rcu;
 static QemuMutex rcu_registry_lock;
 static QemuMutex rcu_sync_lock;
 
@@ -64,7 +65,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr)
 /* Written to only by each individual reader. Read by both the reader and the
  * writers.
  */
-__thread struct rcu_reader_data rcu_reader;
+QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader)
 
 /* Protected by rcu_registry_lock.  */
 typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
@@ -82,12 +83,6 @@ static void wait_for_readers(void)
          */
         qemu_event_reset(&rcu_gp_event);
 
-        /* Instead of using qatomic_mb_set for index->waiting, and
-         * qatomic_mb_read for index->ctr, memory barriers are placed
-         * manually since writes to different threads are independent.
-         * qemu_event_reset has acquire semantics, so no memory barrier
-         * is needed here.
-         */
         QLIST_FOREACH(index, &registry, node) {
             qatomic_set(&index->waiting, true);
         }
@@ -95,6 +90,10 @@ static void wait_for_readers(void)
         /* Here, order the stores to index->waiting before the loads of
          * index->ctr.  Pairs with smp_mb_placeholder() in rcu_read_unlock(),
          * ensuring that the loads of index->ctr are sequentially consistent.
+         *
+         * If this is the last iteration, this barrier also prevents
+         * frees from seeping upwards, and orders the two wait phases
+         * on architectures with 32-bit longs; see synchronize_rcu().
          */
         smp_mb_global();
 
@@ -103,10 +102,12 @@ static void wait_for_readers(void)
                 QLIST_REMOVE(index, node);
                 QLIST_INSERT_HEAD(&qsreaders, index, node);
 
-                /* No need for mb_set here, worst of all we
+                /* No need for memory barriers here, worst of all we
                  * get some extra futex wakeups.
                  */
                 qatomic_set(&index->waiting, false);
+            } else if (qatomic_read(&in_drain_call_rcu)) {
+                notifier_list_notify(&index->force_rcu, NULL);
             }
         }
 
@@ -146,26 +147,26 @@ void synchronize_rcu(void)
 
     /* Write RCU-protected pointers before reading p_rcu_reader->ctr.
      * Pairs with smp_mb_placeholder() in rcu_read_lock().
+     *
+     * Also orders write to RCU-protected pointers before
+     * write to rcu_gp_ctr.
      */
     smp_mb_global();
 
     QEMU_LOCK_GUARD(&rcu_registry_lock);
     if (!QLIST_EMPTY(&registry)) {
-        /* In either case, the qatomic_mb_set below blocks stores that free
-         * old RCU-protected pointers.
-         */
         if (sizeof(rcu_gp_ctr) < 8) {
             /* For architectures with 32-bit longs, a two-subphases algorithm
              * ensures we do not encounter overflow bugs.
              *
              * Switch parity: 0 -> 1, 1 -> 0.
              */
-            qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+            qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
             wait_for_readers();
-            qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+            qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
         } else {
             /* Increment current grace period.  */
-            qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
+            qatomic_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
         }
 
         wait_for_readers();
@@ -188,8 +189,22 @@ static void enqueue(struct rcu_head *node)
     struct rcu_head **old_tail;
 
     node->next = NULL;
+
+    /*
+     * Make this node the tail of the list.  The node will be
+     * used by further enqueue operations, but it will not
+     * be dequeued yet...
+     */
     old_tail = qatomic_xchg(&tail, &node->next);
-    qatomic_mb_set(old_tail, node);
+
+    /*
+     * ... until it is pointed to from another item in the list.
+     * In the meantime, try_dequeue() will find a NULL next pointer
+     * and loop.
+     *
+     * Synchronizes with qatomic_load_acquire() in try_dequeue().
+     */
+    qatomic_store_release(old_tail, node);
 }
 
 static struct rcu_head *try_dequeue(void)
@@ -197,26 +212,31 @@ static struct rcu_head *try_dequeue(void)
     struct rcu_head *node, *next;
 
 retry:
-    /* Test for an empty list, which we do not expect.  Note that for
+    /* Head is only written by this thread, so no need for barriers.  */
+    node = head;
+
+    /*
+     * If the head node has NULL in its next pointer, the value is
+     * wrong and we need to wait until its enqueuer finishes the update.
+     */
+    next = qatomic_load_acquire(&node->next);
+    if (!next) {
+        return NULL;
+    }
+
+    /*
+     * Test for an empty list, which we do not expect.  Note that for
      * the consumer head and tail are always consistent.  The head
      * is consistent because only the consumer reads/writes it.
      * The tail, because it is the first step in the enqueuing.
      * It is only the next pointers that might be inconsistent.
      */
-    if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) {
+    if (head == &dummy && qatomic_read(&tail) == &dummy.next) {
         abort();
     }
 
-    /* If the head node has NULL in its next pointer, the value is
-     * wrong and we need to wait until its enqueuer finishes the update.
-     */
-    node = head;
-    next = qatomic_mb_read(&head->next);
-    if (!next) {
-        return NULL;
-    }
-
-    /* Since we are the sole consumer, and we excluded the empty case
+    /*
+     * Since we are the sole consumer, and we excluded the empty case
      * above, the queue will always have at least two nodes: the
      * dummy node, and the one being removed.  So we do not need to update
      * the tail pointer.
@@ -335,12 +355,14 @@ void drain_call_rcu(void)
      *
      * Note that since we have only one global queue of the RCU callbacks,
      * we also end up waiting for most of RCU callbacks that were registered
-     * on the other threads, but this is a side effect that shoudn't be
+     * on the other threads, but this is a side effect that shouldn't be
      * assumed.
      */
 
+    qatomic_inc(&in_drain_call_rcu);
     call_rcu1(&rcu_drain.rcu, drain_rcu_callback);
     qemu_event_wait(&rcu_drain.drain_complete_event);
+    qatomic_dec(&in_drain_call_rcu);
 
     if (locked) {
         qemu_mutex_lock_iothread();
@@ -350,16 +372,30 @@ void drain_call_rcu(void)
 
 void rcu_register_thread(void)
 {
-    assert(rcu_reader.ctr == 0);
+    assert(get_ptr_rcu_reader()->ctr == 0);
     qemu_mutex_lock(&rcu_registry_lock);
-    QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
+    QLIST_INSERT_HEAD(&registry, get_ptr_rcu_reader(), node);
     qemu_mutex_unlock(&rcu_registry_lock);
 }
 
 void rcu_unregister_thread(void)
 {
     qemu_mutex_lock(&rcu_registry_lock);
-    QLIST_REMOVE(&rcu_reader, node);
+    QLIST_REMOVE(get_ptr_rcu_reader(), node);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_add_force_rcu_notifier(Notifier *n)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    notifier_list_add(&get_ptr_rcu_reader()->force_rcu, n);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_remove_force_rcu_notifier(Notifier *n)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    notifier_remove(n);
     qemu_mutex_unlock(&rcu_registry_lock);
 }
 
index e534460da68383db53c86e4d3afc14a5a89b2894..494a3d924e6a928d13af781d41a06aae174023ad 100644 (file)
@@ -240,6 +240,9 @@ static void readline_hist_add(ReadLineState *rs, const char *cmdline)
         }
         if (strcmp(hist_entry, cmdline) == 0) {
         same_entry:
+            if (idx == READLINE_MAX_CMDS - 1) {
+                return;
+            }
             new_entry = hist_entry;
             /* Put this entry at the end of history */
             memmove(&rs->history[idx], &rs->history[idx + 1],
@@ -283,6 +286,14 @@ void readline_add_completion(ReadLineState *rs, const char *str)
     }
 }
 
+void readline_add_completion_of(ReadLineState *rs,
+                                const char *pfx, const char *str)
+{
+    if (!strncmp(str, pfx, strlen(pfx))) {
+        readline_add_completion(rs, str);
+    }
+}
+
 void readline_set_completion_index(ReadLineState *rs, int index)
 {
     rs->completion_index = index;
diff --git a/util/reserved-region.c b/util/reserved-region.c
new file mode 100644 (file)
index 0000000..18f83eb
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * QEMU ReservedRegion helpers
+ *
+ * Copyright (c) 2023 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/range.h"
+#include "qemu/reserved-region.h"
+
+GList *resv_region_list_insert(GList *list, ReservedRegion *reg)
+{
+    ReservedRegion *resv_iter, *new_reg;
+    Range *r = &reg->range;
+    Range *range_iter;
+    GList *l;
+
+    for (l = list; l ; ) {
+        resv_iter = (ReservedRegion *)l->data;
+        range_iter = &resv_iter->range;
+
+        /* Skip all list elements strictly less than range to add */
+        if (range_compare(range_iter, r) < 0) {
+            l = l->next;
+        } else if (range_compare(range_iter, r) > 0) {
+            return g_list_insert_before(list, l, reg);
+        } else { /* there is an overlap */
+            if (range_contains_range(r, range_iter)) {
+                /* new range contains current item, simply remove this latter */
+                GList *prev = l->prev;
+                g_free(l->data);
+                list = g_list_delete_link(list, l);
+                if (prev) {
+                    l = prev->next;
+                } else {
+                    l = list;
+                }
+            } else if (range_contains_range(range_iter, r)) {
+                /* new region is included in the current region */
+                if (range_lob(range_iter) == range_lob(r)) {
+                    /* adjacent on the left side, derives into 2 regions */
+                    range_set_bounds(range_iter, range_upb(r) + 1,
+                                     range_upb(range_iter));
+                    return g_list_insert_before(list, l, reg);
+                } else if (range_upb(range_iter) == range_upb(r)) {
+                    /* adjacent on the right side, derives into 2 regions */
+                    range_set_bounds(range_iter, range_lob(range_iter),
+                                     range_lob(r) - 1);
+                    l = l->next;
+                } else {
+                    uint64_t lob = range_lob(range_iter);
+                    /*
+                     * the new range is in the middle of an existing one,
+                     * split this latter into 3 regs instead
+                     */
+                    range_set_bounds(range_iter, range_upb(r) + 1,
+                                     range_upb(range_iter));
+                    new_reg = g_new0(ReservedRegion, 1);
+                    new_reg->type = resv_iter->type;
+                    range_set_bounds(&new_reg->range,
+                                     lob, range_lob(r) - 1);
+                    list = g_list_insert_before(list, l, new_reg);
+                    return g_list_insert_before(list, l, reg);
+                }
+            } else if (range_lob(r) < range_lob(range_iter)) {
+                range_set_bounds(range_iter, range_upb(r) + 1,
+                                 range_upb(range_iter));
+                return g_list_insert_before(list, l, reg);
+            } else { /* intersection on the upper range */
+                range_set_bounds(range_iter, range_lob(range_iter),
+                                 range_lob(r) - 1);
+                l = l->next;
+            }
+        } /* overlap */
+    }
+    return g_list_append(list, reg);
+}
+
index 2ec99dfdda17f40e0c17b61f7b33cb406f1f620e..483cb617e219cc0a675dcc2f862d443c5a5a2d6f 100644 (file)
 #include "qemu/cutils.h"
 #include "qemu/selfmap.h"
 
-GSList *read_self_maps(void)
+IntervalTreeRoot *read_self_maps(void)
 {
-    gchar *maps;
-    GSList *map_info = NULL;
+    IntervalTreeRoot *root;
+    gchar *maps, **lines;
+    guint i, nlines;
 
-    if (g_file_get_contents("/proc/self/maps", &maps, NULL, NULL)) {
-        gchar **lines = g_strsplit(maps, "\n", 0);
-        int i, entries = g_strv_length(lines);
+    if (!g_file_get_contents("/proc/self/maps", &maps, NULL, NULL)) {
+        return NULL;
+    }
+
+    root = g_new0(IntervalTreeRoot, 1);
+    lines = g_strsplit(maps, "\n", 0);
+    nlines = g_strv_length(lines);
+
+    for (i = 0; i < nlines; i++) {
+        gchar **fields = g_strsplit(lines[i], " ", 6);
+        guint nfields = g_strv_length(fields);
+
+        if (nfields > 4) {
+            uint64_t start, end, offset, inode;
+            unsigned dev_maj, dev_min;
+            int errors = 0;
+            const char *p;
+
+            errors |= qemu_strtou64(fields[0], &p, 16, &start);
+            errors |= qemu_strtou64(p + 1, NULL, 16, &end);
+            errors |= qemu_strtou64(fields[2], NULL, 16, &offset);
+            errors |= qemu_strtoui(fields[3], &p, 16, &dev_maj);
+            errors |= qemu_strtoui(p + 1, NULL, 16, &dev_min);
+            errors |= qemu_strtou64(fields[4], NULL, 10, &inode);
+
+            if (!errors) {
+                size_t path_len;
+                MapInfo *e;
+
+                if (nfields == 6) {
+                    p = fields[5];
+                    p += strspn(p, " ");
+                    path_len = strlen(p) + 1;
+                } else {
+                    p = NULL;
+                    path_len = 0;
+                }
 
-        for (i = 0; i < entries; i++) {
-            gchar **fields = g_strsplit(lines[i], " ", 6);
-            if (g_strv_length(fields) > 4) {
-                MapInfo *e = g_new0(MapInfo, 1);
-                int errors;
-                const char *end;
+                e = g_malloc0(sizeof(*e) + path_len);
 
-                errors  = qemu_strtoul(fields[0], &end, 16, &e->start);
-                errors += qemu_strtoul(end + 1, NULL, 16, &e->end);
+                e->itree.start = start;
+                e->itree.last = end - 1;
+                e->offset = offset;
+                e->dev = makedev(dev_maj, dev_min);
+                e->inode = inode;
 
                 e->is_read  = fields[1][0] == 'r';
                 e->is_write = fields[1][1] == 'w';
                 e->is_exec  = fields[1][2] == 'x';
                 e->is_priv  = fields[1][3] == 'p';
 
-                errors += qemu_strtoul(fields[2], NULL, 16, &e->offset);
-                e->dev = g_strdup(fields[3]);
-                errors += qemu_strtou64(fields[4], NULL, 10, &e->inode);
-
-                /*
-                 * The last field may have leading spaces which we
-                 * need to strip.
-                 */
-                if (g_strv_length(fields) == 6) {
-                    e->path = g_strdup(g_strchug(fields[5]));
+                if (path_len) {
+                    e->path = memcpy(e + 1, p, path_len);
                 }
-                map_info = g_slist_prepend(map_info, e);
-            }
 
-            g_strfreev(fields);
+                interval_tree_insert(&e->itree, root);
+            }
         }
-        g_strfreev(lines);
-        g_free(maps);
+        g_strfreev(fields);
     }
+    g_strfreev(lines);
+    g_free(maps);
 
-    /* ensure the map data is in the same order we collected it */
-    return g_slist_reverse(map_info);
+    return root;
 }
 
 /**
  * free_self_maps:
- * @info: a GSlist
+ * @root: an interval tree
  *
- * Free a list of MapInfo structures.
+ * Free a tree of MapInfo structures.
+ * Since we allocated each MapInfo in one chunk, we need not consider the
+ * contents and can simply free each RBNode.
  */
-static void free_info(gpointer data)
+
+static void free_rbnode(RBNode *n)
 {
-    MapInfo *e = (MapInfo *) data;
-    g_free(e->dev);
-    g_free(e->path);
-    g_free(e);
+    if (n) {
+        free_rbnode(n->rb_left);
+        free_rbnode(n->rb_right);
+        g_free(n);
+    }
 }
 
-void free_self_maps(GSList *info)
+void free_self_maps(IntervalTreeRoot *root)
 {
-    g_slist_free_full(info, &free_info);
+    if (root) {
+        free_rbnode(root->rb_root.rb_node);
+        g_free(root);
+    }
 }
index 897613c94965dee4783c8ef38fc6dc309a57a766..09736014ec67af0bc83a2462de29d76df2db59d5 100644 (file)
@@ -57,6 +57,17 @@ uint64_t stat64_get(const Stat64 *s)
     return ((uint64_t)high << 32) | low;
 }
 
+void stat64_set(Stat64 *s, uint64_t val)
+{
+    while (!stat64_wrtrylock(s)) {
+        cpu_relax();
+    }
+
+    qatomic_set(&s->high, val >> 32);
+    qatomic_set(&s->low, val);
+    stat64_wrunlock(s);
+}
+
 bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high)
 {
     uint32_t old;
index 5bcac9b40169081fbe1000475321f7a84357e190..ced518f771bfe37eb43cd5d70b0bd4186108d88d 100644 (file)
@@ -51,6 +51,7 @@ unsigned int check_socket_activation(void)
     /* So these are not passed to any child processes we might start. */
     unsetenv("LISTEN_FDS");
     unsetenv("LISTEN_PID");
+    unsetenv("LISTEN_FDNAMES");
 
     /* So the file descriptors don't leak into child processes. */
     for (i = 0; i < nr_fds; ++i) {
diff --git a/util/thread-context.c b/util/thread-context.c
new file mode 100644 (file)
index 0000000..2bc7883
--- /dev/null
@@ -0,0 +1,356 @@
+/*
+ * QEMU Thread Context
+ *
+ * Copyright Red Hat Inc., 2022
+ *
+ * Authors:
+ *  David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread-context.h"
+#include "qapi/error.h"
+#include "qapi/qapi-builtin-visit.h"
+#include "qapi/visitor.h"
+#include "qemu/config-file.h"
+#include "qapi/qapi-builtin-visit.h"
+#include "qom/object_interfaces.h"
+#include "qemu/module.h"
+#include "qemu/bitmap.h"
+
+#ifdef CONFIG_NUMA
+#include <numa.h>
+#endif
+
+enum {
+    TC_CMD_NONE = 0,
+    TC_CMD_STOP,
+    TC_CMD_NEW,
+};
+
+typedef struct ThreadContextCmdNew {
+    QemuThread *thread;
+    const char *name;
+    void *(*start_routine)(void *);
+    void *arg;
+    int mode;
+} ThreadContextCmdNew;
+
+static void *thread_context_run(void *opaque)
+{
+    ThreadContext *tc = opaque;
+
+    tc->thread_id = qemu_get_thread_id();
+    qemu_sem_post(&tc->sem);
+
+    while (true) {
+        /*
+         * Threads inherit the CPU affinity of the creating thread. For this
+         * reason, we create new (especially short-lived) threads from our
+         * persistent context thread.
+         *
+         * Especially when QEMU is not allowed to set the affinity itself,
+         * management tools can simply set the affinity of the context thread
+         * after creating the context, to have new threads created via
+         * the context inherit the CPU affinity automatically.
+         */
+        switch (tc->thread_cmd) {
+        case TC_CMD_NONE:
+            break;
+        case TC_CMD_STOP:
+            tc->thread_cmd = TC_CMD_NONE;
+            qemu_sem_post(&tc->sem);
+            return NULL;
+        case TC_CMD_NEW: {
+            ThreadContextCmdNew *cmd_new = tc->thread_cmd_data;
+
+            qemu_thread_create(cmd_new->thread, cmd_new->name,
+                               cmd_new->start_routine, cmd_new->arg,
+                               cmd_new->mode);
+            tc->thread_cmd = TC_CMD_NONE;
+            tc->thread_cmd_data = NULL;
+            qemu_sem_post(&tc->sem);
+            break;
+        }
+        default:
+            g_assert_not_reached();
+        }
+        qemu_sem_wait(&tc->sem_thread);
+    }
+}
+
+static void thread_context_set_cpu_affinity(Object *obj, Visitor *v,
+                                            const char *name, void *opaque,
+                                            Error **errp)
+{
+    ThreadContext *tc = THREAD_CONTEXT(obj);
+    uint16List *l, *host_cpus = NULL;
+    unsigned long *bitmap = NULL;
+    int nbits = 0, ret;
+
+    if (tc->init_cpu_bitmap) {
+        error_setg(errp, "Mixing CPU and node affinity not supported");
+        return;
+    }
+
+    if (!visit_type_uint16List(v, name, &host_cpus, errp)) {
+        return;
+    }
+
+    if (!host_cpus) {
+        error_setg(errp, "CPU list is empty");
+        goto out;
+    }
+
+    for (l = host_cpus; l; l = l->next) {
+        nbits = MAX(nbits, l->value + 1);
+    }
+    bitmap = bitmap_new(nbits);
+    for (l = host_cpus; l; l = l->next) {
+        set_bit(l->value, bitmap);
+    }
+
+    if (tc->thread_id != -1) {
+        /*
+         * Note: we won't be adjusting the affinity of any thread that is still
+         * around, but only the affinity of the context thread.
+         */
+        ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
+        if (ret) {
+            error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
+        }
+    } else {
+        tc->init_cpu_bitmap = bitmap;
+        bitmap = NULL;
+        tc->init_cpu_nbits = nbits;
+    }
+out:
+    g_free(bitmap);
+    qapi_free_uint16List(host_cpus);
+}
+
+static void thread_context_get_cpu_affinity(Object *obj, Visitor *v,
+                                            const char *name, void *opaque,
+                                            Error **errp)
+{
+    unsigned long *bitmap, nbits, value;
+    ThreadContext *tc = THREAD_CONTEXT(obj);
+    uint16List *host_cpus = NULL;
+    uint16List **tail = &host_cpus;
+    int ret;
+
+    if (tc->thread_id == -1) {
+        error_setg(errp, "Object not initialized yet");
+        return;
+    }
+
+    ret = qemu_thread_get_affinity(&tc->thread, &bitmap, &nbits);
+    if (ret) {
+        error_setg(errp, "Getting CPU affinity failed: %s", strerror(ret));
+        return;
+    }
+
+    value = find_first_bit(bitmap, nbits);
+    while (value < nbits) {
+        QAPI_LIST_APPEND(tail, value);
+
+        value = find_next_bit(bitmap, nbits, value + 1);
+    }
+    g_free(bitmap);
+
+    visit_type_uint16List(v, name, &host_cpus, errp);
+    qapi_free_uint16List(host_cpus);
+}
+
+static void thread_context_set_node_affinity(Object *obj, Visitor *v,
+                                             const char *name, void *opaque,
+                                             Error **errp)
+{
+#ifdef CONFIG_NUMA
+    const int nbits = numa_num_possible_cpus();
+    ThreadContext *tc = THREAD_CONTEXT(obj);
+    uint16List *l, *host_nodes = NULL;
+    unsigned long *bitmap = NULL;
+    struct bitmask *tmp_cpus;
+    int ret, i;
+
+    if (tc->init_cpu_bitmap) {
+        error_setg(errp, "Mixing CPU and node affinity not supported");
+        return;
+    }
+
+    if (!visit_type_uint16List(v, name, &host_nodes, errp)) {
+        return;
+    }
+
+    if (!host_nodes) {
+        error_setg(errp, "Node list is empty");
+        goto out;
+    }
+
+    bitmap = bitmap_new(nbits);
+    tmp_cpus = numa_allocate_cpumask();
+    for (l = host_nodes; l; l = l->next) {
+        numa_bitmask_clearall(tmp_cpus);
+        ret = numa_node_to_cpus(l->value, tmp_cpus);
+        if (ret) {
+            /* We ignore any errors, such as impossible nodes. */
+            continue;
+        }
+        for (i = 0; i < nbits; i++) {
+            if (numa_bitmask_isbitset(tmp_cpus, i)) {
+                set_bit(i, bitmap);
+            }
+        }
+    }
+    numa_free_cpumask(tmp_cpus);
+
+    if (bitmap_empty(bitmap, nbits)) {
+        error_setg(errp, "The nodes select no CPUs");
+        goto out;
+    }
+
+    if (tc->thread_id != -1) {
+        /*
+         * Note: we won't be adjusting the affinity of any thread that is still
+         * around for now, but only the affinity of the context thread.
+         */
+        ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
+        if (ret) {
+            error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
+        }
+    } else {
+        tc->init_cpu_bitmap = bitmap;
+        bitmap = NULL;
+        tc->init_cpu_nbits = nbits;
+    }
+out:
+    g_free(bitmap);
+    qapi_free_uint16List(host_nodes);
+#else
+    error_setg(errp, "NUMA node affinity is not supported by this QEMU");
+#endif
+}
+
+static void thread_context_get_thread_id(Object *obj, Visitor *v,
+                                         const char *name, void *opaque,
+                                         Error **errp)
+{
+    ThreadContext *tc = THREAD_CONTEXT(obj);
+    uint64_t value = tc->thread_id;
+
+    visit_type_uint64(v, name, &value, errp);
+}
+
+static void thread_context_instance_complete(UserCreatable *uc, Error **errp)
+{
+    ThreadContext *tc = THREAD_CONTEXT(uc);
+    char *thread_name;
+    int ret;
+
+    thread_name = g_strdup_printf("TC %s",
+                               object_get_canonical_path_component(OBJECT(uc)));
+    qemu_thread_create(&tc->thread, thread_name, thread_context_run, tc,
+                       QEMU_THREAD_JOINABLE);
+    g_free(thread_name);
+
+    /* Wait until initialization of the thread is done. */
+    while (tc->thread_id == -1) {
+        qemu_sem_wait(&tc->sem);
+    }
+
+    if (tc->init_cpu_bitmap) {
+        ret = qemu_thread_set_affinity(&tc->thread, tc->init_cpu_bitmap,
+                                       tc->init_cpu_nbits);
+        if (ret) {
+            error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
+        }
+        g_free(tc->init_cpu_bitmap);
+        tc->init_cpu_bitmap = NULL;
+    }
+}
+
+static void thread_context_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    ucc->complete = thread_context_instance_complete;
+    object_class_property_add(oc, "thread-id", "int",
+                              thread_context_get_thread_id, NULL, NULL,
+                              NULL);
+    object_class_property_add(oc, "cpu-affinity", "int",
+                              thread_context_get_cpu_affinity,
+                              thread_context_set_cpu_affinity, NULL, NULL);
+    object_class_property_add(oc, "node-affinity", "int", NULL,
+                              thread_context_set_node_affinity, NULL, NULL);
+}
+
+static void thread_context_instance_init(Object *obj)
+{
+    ThreadContext *tc = THREAD_CONTEXT(obj);
+
+    tc->thread_id = -1;
+    qemu_sem_init(&tc->sem, 0);
+    qemu_sem_init(&tc->sem_thread, 0);
+    qemu_mutex_init(&tc->mutex);
+}
+
+static void thread_context_instance_finalize(Object *obj)
+{
+    ThreadContext *tc = THREAD_CONTEXT(obj);
+
+    if (tc->thread_id != -1) {
+        tc->thread_cmd = TC_CMD_STOP;
+        qemu_sem_post(&tc->sem_thread);
+        qemu_thread_join(&tc->thread);
+    }
+    qemu_sem_destroy(&tc->sem);
+    qemu_sem_destroy(&tc->sem_thread);
+    qemu_mutex_destroy(&tc->mutex);
+}
+
+static const TypeInfo thread_context_info = {
+    .name = TYPE_THREAD_CONTEXT,
+    .parent = TYPE_OBJECT,
+    .class_init = thread_context_class_init,
+    .instance_size = sizeof(ThreadContext),
+    .instance_init = thread_context_instance_init,
+    .instance_finalize = thread_context_instance_finalize,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void thread_context_register_types(void)
+{
+    type_register_static(&thread_context_info);
+}
+type_init(thread_context_register_types)
+
+void thread_context_create_thread(ThreadContext *tc, QemuThread *thread,
+                                  const char *name,
+                                  void *(*start_routine)(void *), void *arg,
+                                  int mode)
+{
+    ThreadContextCmdNew data = {
+        .thread = thread,
+        .name = name,
+        .start_routine = start_routine,
+        .arg = arg,
+        .mode = mode,
+    };
+
+    qemu_mutex_lock(&tc->mutex);
+    tc->thread_cmd = TC_CMD_NEW;
+    tc->thread_cmd_data = &data;
+    qemu_sem_post(&tc->sem_thread);
+
+    while (tc->thread_cmd != TC_CMD_NONE) {
+        qemu_sem_wait(&tc->sem);
+    }
+    qemu_mutex_unlock(&tc->mutex);
+}
index 785487b8767d96ca76c643f1851f4d7312cd46fc..27eb777e855b62818a64cc5ad4f4937903b64ed1 100644 (file)
@@ -15,6 +15,7 @@
  * GNU GPL, version 2 or (at your option) any later version.
  */
 #include "qemu/osdep.h"
+#include "qemu/defer-call.h"
 #include "qemu/queue.h"
 #include "qemu/thread.h"
 #include "qemu/coroutine.h"
@@ -48,7 +49,7 @@ struct ThreadPoolElement {
     /* Access to this list is protected by lock.  */
     QTAILQ_ENTRY(ThreadPoolElement) reqs;
 
-    /* Access to this list is protected by the global mutex.  */
+    /* This list is only written by the thread pool's mother thread.  */
     QLIST_ENTRY(ThreadPoolElement) all;
 };
 
@@ -57,8 +58,7 @@ struct ThreadPool {
     QEMUBH *completion_bh;
     QemuMutex lock;
     QemuCond worker_stopped;
-    QemuSemaphore sem;
-    int max_threads;
+    QemuCond request_cond;
     QEMUBH *new_thread_bh;
 
     /* The following variables are only accessed from one AioContext. */
@@ -70,7 +70,8 @@ struct ThreadPool {
     int idle_threads;
     int new_threads;     /* backlog of threads we need to create */
     int pending_threads; /* threads created but not running yet */
-    bool stopping;
+    int min_threads;
+    int max_threads;
 };
 
 static void *worker_thread(void *opaque)
@@ -81,19 +82,25 @@ static void *worker_thread(void *opaque)
     pool->pending_threads--;
     do_spawn_thread(pool);
 
-    while (!pool->stopping) {
+    while (pool->cur_threads <= pool->max_threads) {
         ThreadPoolElement *req;
         int ret;
 
-        do {
+        if (QTAILQ_EMPTY(&pool->request_list)) {
             pool->idle_threads++;
-            qemu_mutex_unlock(&pool->lock);
-            ret = qemu_sem_timedwait(&pool->sem, 10000);
-            qemu_mutex_lock(&pool->lock);
+            ret = qemu_cond_timedwait(&pool->request_cond, &pool->lock, 10000);
             pool->idle_threads--;
-        } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list));
-        if (ret == -1 || pool->stopping) {
-            break;
+            if (ret == 0 &&
+                QTAILQ_EMPTY(&pool->request_list) &&
+                pool->cur_threads > pool->min_threads) {
+                /* Timed out + no work to do + no need for warm threads = exit.  */
+                break;
+            }
+            /*
+             * Even if there was some work to do, check if there aren't
+             * too many worker threads before picking it up.
+             */
+            continue;
         }
 
         req = QTAILQ_FIRST(&pool->request_list);
@@ -108,13 +115,18 @@ static void *worker_thread(void *opaque)
         smp_wmb();
         req->state = THREAD_DONE;
 
-        qemu_mutex_lock(&pool->lock);
-
         qemu_bh_schedule(pool->completion_bh);
+        qemu_mutex_lock(&pool->lock);
     }
 
     pool->cur_threads--;
     qemu_cond_signal(&pool->worker_stopped);
+
+    /*
+     * Wake up another thread, in case we got a wakeup but decided
+     * to exit due to pool->cur_threads > pool->max_threads.
+     */
+    qemu_cond_signal(&pool->request_cond);
     qemu_mutex_unlock(&pool->lock);
     return NULL;
 }
@@ -164,7 +176,8 @@ static void thread_pool_completion_bh(void *opaque)
     ThreadPool *pool = opaque;
     ThreadPoolElement *elem, *next;
 
-    aio_context_acquire(pool->ctx);
+    defer_call_begin(); /* cb() may use defer_call() to coalesce work */
+
 restart:
     QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
         if (elem->state != THREAD_DONE) {
@@ -184,9 +197,7 @@ restart:
              */
             qemu_bh_schedule(pool->completion_bh);
 
-            aio_context_release(pool->ctx);
             elem->common.cb(elem->common.opaque, elem->ret);
-            aio_context_acquire(pool->ctx);
 
             /* We can safely cancel the completion_bh here regardless of someone
              * else having scheduled it meanwhile because we reenter the
@@ -200,7 +211,8 @@ restart:
             qemu_aio_unref(elem);
         }
     }
-    aio_context_release(pool->ctx);
+
+    defer_call_end();
 }
 
 static void thread_pool_cancel(BlockAIOCB *acb)
@@ -211,13 +223,7 @@ static void thread_pool_cancel(BlockAIOCB *acb)
     trace_thread_pool_cancel(elem, elem->common.opaque);
 
     QEMU_LOCK_GUARD(&pool->lock);
-    if (elem->state == THREAD_QUEUED &&
-        /* No thread has yet started working on elem. we can try to "steal"
-         * the item from the worker if we can get a signal from the
-         * semaphore.  Because this is non-blocking, we can do it with
-         * the lock taken and ensure that elem will remain THREAD_QUEUED.
-         */
-        qemu_sem_timedwait(&pool->sem, 0) == 0) {
+    if (elem->state == THREAD_QUEUED) {
         QTAILQ_REMOVE(&pool->request_list, elem, reqs);
         qemu_bh_schedule(pool->completion_bh);
 
@@ -227,24 +233,20 @@ static void thread_pool_cancel(BlockAIOCB *acb)
 
 }
 
-static AioContext *thread_pool_get_aio_context(BlockAIOCB *acb)
-{
-    ThreadPoolElement *elem = (ThreadPoolElement *)acb;
-    ThreadPool *pool = elem->pool;
-    return pool->ctx;
-}
-
 static const AIOCBInfo thread_pool_aiocb_info = {
     .aiocb_size         = sizeof(ThreadPoolElement),
     .cancel_async       = thread_pool_cancel,
-    .get_aio_context    = thread_pool_get_aio_context,
 };
 
-BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
-        ThreadPoolFunc *func, void *arg,
-        BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
+                                   BlockCompletionFunc *cb, void *opaque)
 {
     ThreadPoolElement *req;
+    AioContext *ctx = qemu_get_current_aio_context();
+    ThreadPool *pool = aio_get_thread_pool(ctx);
+
+    /* Assert that the thread submitting work is the same running the pool */
+    assert(pool->ctx == qemu_get_current_aio_context());
 
     req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque);
     req->func = func;
@@ -262,7 +264,7 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
     }
     QTAILQ_INSERT_TAIL(&pool->request_list, req, reqs);
     qemu_mutex_unlock(&pool->lock);
-    qemu_sem_post(&pool->sem);
+    qemu_cond_signal(&pool->request_cond);
     return &req->common;
 }
 
@@ -279,19 +281,45 @@ static void thread_pool_co_cb(void *opaque, int ret)
     aio_co_wake(co->co);
 }
 
-int coroutine_fn thread_pool_submit_co(ThreadPool *pool, ThreadPoolFunc *func,
-                                       void *arg)
+int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg)
 {
     ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS };
     assert(qemu_in_coroutine());
-    thread_pool_submit_aio(pool, func, arg, thread_pool_co_cb, &tpc);
+    thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc);
     qemu_coroutine_yield();
     return tpc.ret;
 }
 
-void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg)
+void thread_pool_submit(ThreadPoolFunc *func, void *arg)
 {
-    thread_pool_submit_aio(pool, func, arg, NULL, NULL);
+    thread_pool_submit_aio(func, arg, NULL, NULL);
+}
+
+void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
+{
+    qemu_mutex_lock(&pool->lock);
+
+    pool->min_threads = ctx->thread_pool_min;
+    pool->max_threads = ctx->thread_pool_max;
+
+    /*
+     * We either have to:
+     *  - Increase the number available of threads until over the min_threads
+     *    threshold.
+     *  - Bump the worker threads so that they exit, until under the max_threads
+     *    threshold.
+     *  - Do nothing. The current number of threads fall in between the min and
+     *    max thresholds. We'll let the pool manage itself.
+     */
+    for (int i = pool->cur_threads; i < pool->min_threads; i++) {
+        spawn_thread(pool);
+    }
+
+    for (int i = pool->cur_threads; i > pool->max_threads; i--) {
+        qemu_cond_signal(&pool->request_cond);
+    }
+
+    qemu_mutex_unlock(&pool->lock);
 }
 
 static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
@@ -305,17 +333,13 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
     pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
     qemu_mutex_init(&pool->lock);
     qemu_cond_init(&pool->worker_stopped);
-    qemu_sem_init(&pool->sem, 0);
-    if (sizeof(pool) == 4) {
-        /* 32bit systems run out of virtual memory quickly */
-        pool->max_threads = 4;
-    } else {
-        pool->max_threads = 64;
-    }
+    qemu_cond_init(&pool->request_cond);
     pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool);
 
     QLIST_INIT(&pool->head);
     QTAILQ_INIT(&pool->request_list);
+
+    thread_pool_update_params(pool, ctx);
 }
 
 ThreadPool *thread_pool_new(AioContext *ctx)
@@ -341,16 +365,16 @@ void thread_pool_free(ThreadPool *pool)
     pool->new_threads = 0;
 
     /* Wait for worker threads to terminate */
-    pool->stopping = true;
+    pool->max_threads = 0;
+    qemu_cond_broadcast(&pool->request_cond);
     while (pool->cur_threads > 0) {
-        qemu_sem_post(&pool->sem);
         qemu_cond_wait(&pool->worker_stopped, &pool->lock);
     }
 
     qemu_mutex_unlock(&pool->lock);
 
     qemu_bh_delete(pool->completion_bh);
-    qemu_sem_destroy(&pool->sem);
+    qemu_cond_destroy(&pool->request_cond);
     qemu_cond_destroy(&pool->worker_stopped);
     qemu_mutex_destroy(&pool->lock);
     g_free(pool);
index b38e742da53f2572fb33359b434da312b6efb4c9..9582899da3e61532daf5fcd8e8e6a03446707412 100644 (file)
@@ -136,13 +136,14 @@ int64_t throttle_compute_wait(LeakyBucket *bkt)
 
 /* This function compute the time that must be waited while this IO
  *
- * @is_write:   true if the current IO is a write, false if it's a read
+ * @direction:  throttle direction
  * @ret:        time to wait
  */
 static int64_t throttle_compute_wait_for(ThrottleState *ts,
-                                         bool is_write)
+                                         ThrottleDirection direction)
 {
-    BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
+    static const BucketType to_check[THROTTLE_MAX][4] = {
+                                  {THROTTLE_BPS_TOTAL,
                                    THROTTLE_OPS_TOTAL,
                                    THROTTLE_BPS_READ,
                                    THROTTLE_OPS_READ},
@@ -153,8 +154,8 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts,
     int64_t wait, max_wait = 0;
     int i;
 
-    for (i = 0; i < 4; i++) {
-        BucketType index = to_check[is_write][i];
+    for (i = 0; i < ARRAY_SIZE(to_check[THROTTLE_READ]); i++) {
+        BucketType index = to_check[direction][i];
         wait = throttle_compute_wait(&ts->cfg.buckets[index]);
         if (wait > max_wait) {
             max_wait = wait;
@@ -166,13 +167,13 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts,
 
 /* compute the timer for this type of operation
  *
- * @is_write:   the type of operation
+ * @direction:  throttle direction
  * @now:        the current clock timestamp
  * @next_timestamp: the resulting timer
  * @ret:        true if a timer must be set
  */
 static bool throttle_compute_timer(ThrottleState *ts,
-                                   bool is_write,
+                                   ThrottleDirection direction,
                                    int64_t now,
                                    int64_t *next_timestamp)
 {
@@ -182,7 +183,7 @@ static bool throttle_compute_timer(ThrottleState *ts,
     throttle_do_leak(ts, now);
 
     /* compute the wait time if any */
-    wait = throttle_compute_wait_for(ts, is_write);
+    wait = throttle_compute_wait_for(ts, direction);
 
     /* if the code must wait compute when the next timer should fire */
     if (wait) {
@@ -199,10 +200,15 @@ static bool throttle_compute_timer(ThrottleState *ts,
 void throttle_timers_attach_aio_context(ThrottleTimers *tt,
                                         AioContext *new_context)
 {
-    tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-                                  tt->read_timer_cb, tt->timer_opaque);
-    tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-                                  tt->write_timer_cb, tt->timer_opaque);
+    ThrottleDirection dir;
+
+    for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+        if (tt->timer_cb[dir]) {
+            tt->timers[dir] =
+                aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+                              tt->timer_cb[dir], tt->timer_opaque);
+        }
+    }
 }
 
 /*
@@ -233,11 +239,12 @@ void throttle_timers_init(ThrottleTimers *tt,
                           QEMUTimerCB *write_timer_cb,
                           void *timer_opaque)
 {
+    assert(read_timer_cb || write_timer_cb);
     memset(tt, 0, sizeof(ThrottleTimers));
 
     tt->clock_type = clock_type;
-    tt->read_timer_cb = read_timer_cb;
-    tt->write_timer_cb = write_timer_cb;
+    tt->timer_cb[THROTTLE_READ] = read_timer_cb;
+    tt->timer_cb[THROTTLE_WRITE] = write_timer_cb;
     tt->timer_opaque = timer_opaque;
     throttle_timers_attach_aio_context(tt, aio_context);
 }
@@ -245,9 +252,10 @@ void throttle_timers_init(ThrottleTimers *tt,
 /* destroy a timer */
 static void throttle_timer_destroy(QEMUTimer **timer)
 {
-    assert(*timer != NULL);
+    if (*timer == NULL) {
+        return;
+    }
 
-    timer_del(*timer);
     timer_free(*timer);
     *timer = NULL;
 }
@@ -255,10 +263,10 @@ static void throttle_timer_destroy(QEMUTimer **timer)
 /* Remove timers from event loop */
 void throttle_timers_detach_aio_context(ThrottleTimers *tt)
 {
-    int i;
+    ThrottleDirection dir;
 
-    for (i = 0; i < 2; i++) {
-        throttle_timer_destroy(&tt->timers[i]);
+    for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+        throttle_timer_destroy(&tt->timers[dir]);
     }
 }
 
@@ -271,8 +279,12 @@ void throttle_timers_destroy(ThrottleTimers *tt)
 /* is any throttling timer configured */
 bool throttle_timers_are_initialized(ThrottleTimers *tt)
 {
-    if (tt->timers[0]) {
-        return true;
+    ThrottleDirection dir;
+
+    for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
+        if (tt->timers[dir]) {
+            return true;
+        }
     }
 
     return false;
@@ -414,19 +426,24 @@ void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
  * NOTE: this function is not unit tested due to it's usage of timer_mod
  *
  * @tt:       the timers structure
- * @is_write: the type of operation (read/write)
+ * @direction: throttle direction
  * @ret:      true if the timer has been scheduled else false
  */
 bool throttle_schedule_timer(ThrottleState *ts,
                              ThrottleTimers *tt,
-                             bool is_write)
+                             ThrottleDirection direction)
 {
     int64_t now = qemu_clock_get_ns(tt->clock_type);
     int64_t next_timestamp;
+    QEMUTimer *timer;
     bool must_wait;
 
+    assert(direction < THROTTLE_MAX);
+    timer = tt->timers[direction];
+    assert(timer);
+
     must_wait = throttle_compute_timer(ts,
-                                       is_write,
+                                       direction,
                                        now,
                                        &next_timestamp);
 
@@ -436,48 +453,50 @@ bool throttle_schedule_timer(ThrottleState *ts,
     }
 
     /* request throttled and timer pending -> do nothing */
-    if (timer_pending(tt->timers[is_write])) {
+    if (timer_pending(timer)) {
         return true;
     }
 
     /* request throttled and timer not pending -> arm timer */
-    timer_mod(tt->timers[is_write], next_timestamp);
+    timer_mod(timer, next_timestamp);
     return true;
 }
 
 /* do the accounting for this operation
  *
- * @is_write: the type of operation (read/write)
+ * @direction: throttle direction
  * @size:     the size of the operation
  */
-void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
+void throttle_account(ThrottleState *ts, ThrottleDirection direction,
+                      uint64_t size)
 {
-    const BucketType bucket_types_size[2][2] = {
+    static const BucketType bucket_types_size[THROTTLE_MAX][2] = {
         { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ },
         { THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE }
     };
-    const BucketType bucket_types_units[2][2] = {
+    static const BucketType bucket_types_units[THROTTLE_MAX][2] = {
         { THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ },
         { THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE }
     };
     double units = 1.0;
     unsigned i;
 
+    assert(direction < THROTTLE_MAX);
     /* if cfg.op_size is defined and smaller than size we compute unit count */
     if (ts->cfg.op_size && size > ts->cfg.op_size) {
         units = (double) size / ts->cfg.op_size;
     }
 
-    for (i = 0; i < 2; i++) {
+    for (i = 0; i < ARRAY_SIZE(bucket_types_size[THROTTLE_READ]); i++) {
         LeakyBucket *bkt;
 
-        bkt = &ts->cfg.buckets[bucket_types_size[is_write][i]];
+        bkt = &ts->cfg.buckets[bucket_types_size[direction][i]];
         bkt->level += size;
         if (bkt->burst_length > 1) {
             bkt->burst_level += size;
         }
 
-        bkt = &ts->cfg.buckets[bucket_types_units[is_write][i]];
+        bkt = &ts->cfg.buckets[bucket_types_units[direction][i]];
         bkt->level += units;
         if (bkt->burst_length > 1) {
             bkt->burst_level += units;
index 61e0d4bcdfef4d86158c74029bb9fe8e750d5751..49a4962e18869d9793349283fce5c5d4a33efe3e 100644 (file)
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # aio-posix.c
 run_poll_handlers_begin(void *ctx, int64_t max_ns, int64_t timeout) "ctx %p max_ns %"PRId64 " timeout %"PRId64
@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d"
 # async.c
 aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
 aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
+reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
 
 # thread-pool.c
 thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
@@ -51,6 +52,10 @@ qemu_anon_ram_alloc(size_t size, void *ptr) "size %zu ptr %p"
 qemu_vfree(void *ptr) "ptr %p"
 qemu_anon_ram_free(void *ptr, size_t size) "ptr %p size %zu"
 
+# oslib-win32.c
+win32_map_alloc(size_t size) "size:%zd"
+win32_map_free(void *ptr, void *h) "ptr:%p handle:%p"
+
 # hbitmap.c
 hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx"
 hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
@@ -91,3 +96,17 @@ qemu_vfio_pci_read_config(void *buf, int ofs, int size, uint64_t region_ofs, uin
 qemu_vfio_pci_write_config(void *buf, int ofs, int size, uint64_t region_ofs, uint64_t region_size) "write cfg ptr %p ofs 0x%x size 0x%x (region addr 0x%"PRIx64" size 0x%"PRIx64")"
 qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_size, uint32_t cap_offset) "region '%s' addr 0x%"PRIx64" size 0x%"PRIx64" cap_ofs 0x%"PRIx32
 qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p"
+
+#userfaultfd.c
+uffd_detect_open_mode(int mode) "%d"
+uffd_query_features_nosys(int err) "errno: %i"
+uffd_query_features_api_failed(int err) "errno: %i"
+uffd_create_fd_nosys(int err) "errno: %i"
+uffd_create_fd_api_failed(int err) "errno: %i"
+uffd_create_fd_api_noioctl(uint64_t ioctl_req, uint64_t ioctl_supp) "ioctl_req: 0x%" PRIx64 "ioctl_supp: 0x%" PRIx64
+uffd_register_memory_failed(void *addr, uint64_t length, uint64_t mode, int err) "addr: %p length: %" PRIu64 " mode: 0x%" PRIx64 " errno: %i"
+uffd_unregister_memory_failed(void *addr, uint64_t length, int err) "addr: %p length: %" PRIu64 " errno: %i"
+
+# module.c
+module_load_module(const char *name) "file %s"
+module_lookup_object_type(const char *name) "name %s"
diff --git a/util/transactions.c b/util/transactions.c
new file mode 100644 (file)
index 0000000..2dbdedc
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * Simple transactions API
+ *
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qemu/transactions.h"
+#include "qemu/queue.h"
+
+typedef struct TransactionAction {
+    TransactionActionDrv *drv;
+    void *opaque;
+    QSLIST_ENTRY(TransactionAction) entry;
+} TransactionAction;
+
+struct Transaction {
+    QSLIST_HEAD(, TransactionAction) actions;
+};
+
+Transaction *tran_new(void)
+{
+    Transaction *tran = g_new(Transaction, 1);
+
+    QSLIST_INIT(&tran->actions);
+
+    return tran;
+}
+
+void tran_add(Transaction *tran, TransactionActionDrv *drv, void *opaque)
+{
+    TransactionAction *act;
+
+    act = g_new(TransactionAction, 1);
+    *act = (TransactionAction) {
+        .drv = drv,
+        .opaque = opaque
+    };
+
+    QSLIST_INSERT_HEAD(&tran->actions, act, entry);
+}
+
+void tran_abort(Transaction *tran)
+{
+    TransactionAction *act, *next;
+
+    QSLIST_FOREACH(act, &tran->actions, entry) {
+        if (act->drv->abort) {
+            act->drv->abort(act->opaque);
+        }
+    }
+
+    QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
+        if (act->drv->clean) {
+            act->drv->clean(act->opaque);
+        }
+
+        g_free(act);
+    }
+
+    g_free(tran);
+}
+
+void tran_commit(Transaction *tran)
+{
+    TransactionAction *act, *next;
+
+    QSLIST_FOREACH(act, &tran->actions, entry) {
+        if (act->drv->commit) {
+            act->drv->commit(act->opaque);
+        }
+    }
+
+    QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
+        if (act->drv->clean) {
+            act->drv->clean(act->opaque);
+        }
+
+        g_free(act);
+    }
+
+    g_free(tran);
+}
index 8bdef841208d249cb1a24116811b38d9480daa32..dcb3305236458d455f4aa0976bfb130720e4c6e7 100644 (file)
@@ -43,8 +43,7 @@
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ * License along with this library. If not, see <https://www.gnu.org/licenses/>.
  *
  * Authors:
  *    Richard W.M. Jones <rjones@redhat.com>
@@ -1340,7 +1339,7 @@ static void uri_clean(URI *uri)
 
 /**
  * uri_free:
- * @uri:  pointer to an URI
+ * @uri:  pointer to an URI, NULL is ignored
  *
  * Free up the URI struct
  */
@@ -1939,15 +1938,9 @@ step_7:
     val = uri_to_string(res);
 
 done:
-    if (ref != NULL) {
-        uri_free(ref);
-    }
-    if (bas != NULL) {
-        uri_free(bas);
-    }
-    if (res != NULL) {
-        uri_free(res);
-    }
+    uri_free(ref);
+    uri_free(bas);
+    uri_free(res);
     return val;
 }
 
@@ -2190,12 +2183,8 @@ done:
     if (remove_path != 0) {
         ref->path = NULL;
     }
-    if (ref != NULL) {
-        uri_free(ref);
-    }
-    if (bas != NULL) {
-        uri_free(bas);
-    }
+    uri_free(ref);
+    uri_free(bas);
 
     return val;
 }
diff --git a/util/userfaultfd.c b/util/userfaultfd.c
new file mode 100644 (file)
index 0000000..fdff486
--- /dev/null
@@ -0,0 +1,386 @@
+/*
+ * Linux UFFD-WP support
+ *
+ * Copyright Virtuozzo GmbH, 2020
+ *
+ * Authors:
+ *  Andrey Gruzdev   <andrey.gruzdev@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bitops.h"
+#include "qemu/error-report.h"
+#include "qemu/userfaultfd.h"
+#include "trace.h"
+#include <poll.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+
+typedef enum {
+    UFFD_UNINITIALIZED = 0,
+    UFFD_USE_DEV_PATH,
+    UFFD_USE_SYSCALL,
+} uffd_open_mode;
+
+int uffd_open(int flags)
+{
+#if defined(__NR_userfaultfd)
+    static uffd_open_mode open_mode;
+    static int uffd_dev;
+
+    /* Detect how to generate uffd desc when run the 1st time */
+    if (open_mode == UFFD_UNINITIALIZED) {
+        /*
+         * Make /dev/userfaultfd the default approach because it has better
+         * permission controls, meanwhile allows kernel faults without any
+         * privilege requirement (e.g. SYS_CAP_PTRACE).
+         */
+        uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
+        if (uffd_dev >= 0) {
+            open_mode = UFFD_USE_DEV_PATH;
+        } else {
+            /* Fallback to the system call */
+            open_mode = UFFD_USE_SYSCALL;
+        }
+        trace_uffd_detect_open_mode(open_mode);
+    }
+
+    if (open_mode == UFFD_USE_DEV_PATH) {
+        assert(uffd_dev >= 0);
+        return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
+    }
+
+    return syscall(__NR_userfaultfd, flags);
+#else
+    return -EINVAL;
+#endif
+}
+
+/**
+ * uffd_query_features: query UFFD features
+ *
+ * Returns: 0 on success, negative value in case of an error
+ *
+ * @features: parameter to receive 'uffdio_api.features'
+ */
+int uffd_query_features(uint64_t *features)
+{
+    int uffd_fd;
+    struct uffdio_api api_struct = { 0 };
+    int ret = -1;
+
+    uffd_fd = uffd_open(O_CLOEXEC);
+    if (uffd_fd < 0) {
+        trace_uffd_query_features_nosys(errno);
+        return -1;
+    }
+
+    api_struct.api = UFFD_API;
+    api_struct.features = 0;
+
+    if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
+        trace_uffd_query_features_api_failed(errno);
+        goto out;
+    }
+    *features = api_struct.features;
+    ret = 0;
+
+out:
+    close(uffd_fd);
+    return ret;
+}
+
+/**
+ * uffd_create_fd: create UFFD file descriptor
+ *
+ * Returns non-negative file descriptor or negative value in case of an error
+ *
+ * @features: UFFD features to request
+ * @non_blocking: create UFFD file descriptor for non-blocking operation
+ */
+int uffd_create_fd(uint64_t features, bool non_blocking)
+{
+    int uffd_fd;
+    int flags;
+    struct uffdio_api api_struct = { 0 };
+    uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER);
+
+    flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0);
+    uffd_fd = uffd_open(flags);
+    if (uffd_fd < 0) {
+        trace_uffd_create_fd_nosys(errno);
+        return -1;
+    }
+
+    api_struct.api = UFFD_API;
+    api_struct.features = features;
+    if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
+        trace_uffd_create_fd_api_failed(errno);
+        goto fail;
+    }
+    if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
+        trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls);
+        goto fail;
+    }
+
+    return uffd_fd;
+
+fail:
+    close(uffd_fd);
+    return -1;
+}
+
+/**
+ * uffd_close_fd: close UFFD file descriptor
+ *
+ * @uffd_fd: UFFD file descriptor
+ */
+void uffd_close_fd(int uffd_fd)
+{
+    assert(uffd_fd >= 0);
+    close(uffd_fd);
+}
+
+/**
+ * uffd_register_memory: register memory range via UFFD-IO
+ *
+ * Returns 0 in case of success, negative value in case of an error
+ *
+ * @uffd_fd: UFFD file descriptor
+ * @addr: base address of memory range
+ * @length: length of memory range
+ * @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...)
+ * @ioctls: optional pointer to receive supported IOCTL mask
+ */
+int uffd_register_memory(int uffd_fd, void *addr, uint64_t length,
+        uint64_t mode, uint64_t *ioctls)
+{
+    struct uffdio_register uffd_register;
+
+    uffd_register.range.start = (uintptr_t) addr;
+    uffd_register.range.len = length;
+    uffd_register.mode = mode;
+
+    if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) {
+        trace_uffd_register_memory_failed(addr, length, mode, errno);
+        return -1;
+    }
+    if (ioctls) {
+        *ioctls = uffd_register.ioctls;
+    }
+
+    return 0;
+}
+
+/**
+ * uffd_unregister_memory: un-register memory range with UFFD-IO
+ *
+ * Returns 0 in case of success, negative value in case of an error
+ *
+ * @uffd_fd: UFFD file descriptor
+ * @addr: base address of memory range
+ * @length: length of memory range
+ */
+int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length)
+{
+    struct uffdio_range uffd_range;
+
+    uffd_range.start = (uintptr_t) addr;
+    uffd_range.len = length;
+
+    if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) {
+        trace_uffd_unregister_memory_failed(addr, length, errno);
+        return -1;
+    }
+
+    return 0;
+}
+
+/**
+ * uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO
+ *
+ * Returns 0 on success, negative value in case of error
+ *
+ * @uffd_fd: UFFD file descriptor
+ * @addr: base address of memory range
+ * @length: length of memory range
+ * @wp: write-protect/unprotect
+ * @dont_wake: do not wake threads waiting on wr-protected page
+ */
+int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
+        bool wp, bool dont_wake)
+{
+    struct uffdio_writeprotect uffd_writeprotect;
+
+    uffd_writeprotect.range.start = (uintptr_t) addr;
+    uffd_writeprotect.range.len = length;
+    if (!wp && dont_wake) {
+        /* DONTWAKE is meaningful only on protection release */
+        uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
+    } else {
+        uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0);
+    }
+
+    if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
+        error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64
+                " mode=%" PRIx64 " errno=%i", addr, length,
+                (uint64_t) uffd_writeprotect.mode, errno);
+        return -1;
+    }
+
+    return 0;
+}
+
+/**
+ * uffd_copy_page: copy range of pages to destination via UFFD-IO
+ *
+ * Copy range of source pages to the destination to resolve
+ * missing page fault somewhere in the destination range.
+ *
+ * Returns 0 on success, negative value in case of an error
+ *
+ * @uffd_fd: UFFD file descriptor
+ * @dst_addr: destination base address
+ * @src_addr: source base address
+ * @length: length of the range to copy
+ * @dont_wake: do not wake threads waiting on missing page
+ */
+int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
+        uint64_t length, bool dont_wake)
+{
+    struct uffdio_copy uffd_copy;
+
+    uffd_copy.dst = (uintptr_t) dst_addr;
+    uffd_copy.src = (uintptr_t) src_addr;
+    uffd_copy.len = length;
+    uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0;
+
+    if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) {
+        error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64
+                " mode=%" PRIx64 " errno=%i", dst_addr, src_addr,
+                length, (uint64_t) uffd_copy.mode, errno);
+        return -1;
+    }
+
+    return 0;
+}
+
+/**
+ * uffd_zero_page: fill range of pages with zeroes via UFFD-IO
+ *
+ * Fill range pages with zeroes to resolve missing page fault within the range.
+ *
+ * Returns 0 on success, negative value in case of an error
+ *
+ * @uffd_fd: UFFD file descriptor
+ * @addr: base address
+ * @length: length of the range to fill with zeroes
+ * @dont_wake: do not wake threads waiting on missing page
+ */
+int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake)
+{
+    struct uffdio_zeropage uffd_zeropage;
+
+    uffd_zeropage.range.start = (uintptr_t) addr;
+    uffd_zeropage.range.len = length;
+    uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0;
+
+    if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) {
+        error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64
+                " mode=%" PRIx64 " errno=%i", addr, length,
+                (uint64_t) uffd_zeropage.mode, errno);
+        return -1;
+    }
+
+    return 0;
+}
+
+/**
+ * uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution
+ *
+ * Wake up threads waiting on any page/pages from the designated range.
+ * The main use case is when during some period, page faults are resolved
+ * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits
+ * for the whole memory range are satisfied in a single call to uffd_wakeup().
+ *
+ * Returns 0 on success, negative value in case of an error
+ *
+ * @uffd_fd: UFFD file descriptor
+ * @addr: base address
+ * @length: length of the range
+ */
+int uffd_wakeup(int uffd_fd, void *addr, uint64_t length)
+{
+    struct uffdio_range uffd_range;
+
+    uffd_range.start = (uintptr_t) addr;
+    uffd_range.len = length;
+
+    if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) {
+        error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i",
+                addr, length, errno);
+        return -1;
+    }
+
+    return 0;
+}
+
+/**
+ * uffd_read_events: read pending UFFD events
+ *
+ * Returns number of fetched messages, 0 if non is available or
+ * negative value in case of an error
+ *
+ * @uffd_fd: UFFD file descriptor
+ * @msgs: pointer to message buffer
+ * @count: number of messages that can fit in the buffer
+ */
+int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count)
+{
+    ssize_t res;
+    do {
+        res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg));
+    } while (res < 0 && errno == EINTR);
+
+    if ((res < 0 && errno == EAGAIN)) {
+        return 0;
+    }
+    if (res < 0) {
+        error_report("uffd_read_events() failed: errno=%i", errno);
+        return -1;
+    }
+
+    return (int) (res / sizeof(struct uffd_msg));
+}
+
+/**
+ * uffd_poll_events: poll UFFD file descriptor for read
+ *
+ * Returns true if events are available for read, false otherwise
+ *
+ * @uffd_fd: UFFD file descriptor
+ * @tmo: timeout value
+ */
+bool uffd_poll_events(int uffd_fd, int tmo)
+{
+    int res;
+    struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 };
+
+    do {
+        res = poll(&poll_fd, 1, tmo);
+    } while (res < 0 && errno == EINTR);
+
+    if (res == 0) {
+        return false;
+    }
+    if (res < 0) {
+        error_report("uffd_poll_events() failed: errno=%i", errno);
+        return false;
+    }
+
+    return (poll_fd.revents & POLLIN) != 0;
+}
index b1108dde78dcce3f8051cfc841d74d9deeca027b..234619dd5e69a694d47bb299eb2536e5790b9863 100644 (file)
@@ -51,7 +51,7 @@ int qemu_uuid_is_equal(const QemuUUID *lhv, const QemuUUID *rhv)
 void qemu_uuid_unparse(const QemuUUID *uuid, char *out)
 {
     const unsigned char *uu = &uuid->data[0];
-    snprintf(out, UUID_FMT_LEN + 1, UUID_FMT,
+    snprintf(out, UUID_STR_LEN, UUID_FMT,
              uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
              uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
 }
@@ -116,3 +116,17 @@ QemuUUID qemu_uuid_bswap(QemuUUID uuid)
     bswap16s(&uuid.fields.time_high_and_version);
     return uuid;
 }
+
+/* djb2 hash algorithm */
+uint32_t qemu_uuid_hash(const void *uuid)
+{
+    QemuUUID *qid = (QemuUUID *) uuid;
+    uint32_t h = 5381;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(qid->data); i++) {
+        h = (h << 5) + h + qid->data[i];
+    }
+
+    return h;
+}
index 97dfa3fd57cc1eac43e308551ee59275740d730e..f8bab46c68fa61a8cf508cebcfb5b5420a1e6d79 100644 (file)
@@ -106,15 +106,17 @@ struct QEMUVFIOState {
  */
 static char *sysfs_find_group_file(const char *device, Error **errp)
 {
+    g_autoptr(GError) gerr = NULL;
     char *sysfs_link;
     char *sysfs_group;
     char *p;
     char *path = NULL;
 
     sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device);
-    sysfs_group = g_malloc0(PATH_MAX);
-    if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) {
-        error_setg_errno(errp, errno, "Failed to find iommu group sysfs path");
+    sysfs_group = g_file_read_link(sysfs_link, &gerr);
+    if (gerr) {
+        error_setg(errp, "Failed to find iommu group sysfs path: %s",
+                   gerr->message);
         goto out;
     }
     p = strrchr(sysfs_group, '/');
@@ -163,7 +165,7 @@ void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
                             Error **errp)
 {
     void *p;
-    assert(QEMU_IS_ALIGNED(offset, qemu_real_host_page_size));
+    assert(QEMU_IS_ALIGNED(offset, qemu_real_host_page_size()));
     assert_bar_index_valid(s, index);
     p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
              prot, MAP_SHARED,
@@ -240,9 +242,9 @@ static int qemu_vfio_pci_read_config(QEMUVFIOState *s, void *buf,
                                     s->config_region_info.offset,
                                     s->config_region_info.size);
     assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size));
-    do {
-        ret = pread(s->device, buf, size, s->config_region_info.offset + ofs);
-    } while (ret == -1 && errno == EINTR);
+    ret = RETRY_ON_EINTR(
+        pread(s->device, buf, size, s->config_region_info.offset + ofs)
+    );
     return ret == size ? 0 : -errno;
 }
 
@@ -254,9 +256,9 @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int
                                      s->config_region_info.offset,
                                      s->config_region_info.size);
     assert(QEMU_IS_ALIGNED(s->config_region_info.offset + ofs, size));
-    do {
-        ret = pwrite(s->device, buf, size, s->config_region_info.offset + ofs);
-    } while (ret == -1 && errno == EINTR);
+    ret = RETRY_ON_EINTR(
+        pwrite(s->device, buf, size, s->config_region_info.offset + ofs)
+    );
     return ret == size ? 0 : -errno;
 }
 
@@ -271,7 +273,7 @@ static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
         if (!cap->next) {
             return;
         }
-        cap = (struct vfio_info_cap_header *)(buf + cap->next);
+        cap = buf + cap->next;
     }
 
     cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
@@ -279,8 +281,8 @@ static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
     s->nb_iova_ranges = cap_iova_range->nr_iovas;
     if (s->nb_iova_ranges > 1) {
         s->usable_iova_ranges =
-            g_realloc(s->usable_iova_ranges,
-                      s->nb_iova_ranges * sizeof(struct IOVARange));
+            g_renew(struct IOVARange, s->usable_iova_ranges,
+                    s->nb_iova_ranges);
     }
 
     for (i = 0; i < s->nb_iova_ranges; i++) {
@@ -459,51 +461,40 @@ fail_container:
     return ret;
 }
 
-static void qemu_vfio_ram_block_added(RAMBlockNotifier *n,
-                                      void *host, size_t size)
+static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, void *host,
+                                      size_t size, size_t max_size)
 {
     QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
-    trace_qemu_vfio_ram_block_added(s, host, size);
-    qemu_vfio_dma_map(s, host, size, false, NULL);
+    Error *local_err = NULL;
+    int ret;
+
+    trace_qemu_vfio_ram_block_added(s, host, max_size);
+    ret = qemu_vfio_dma_map(s, host, max_size, false, NULL, &local_err);
+    if (ret) {
+        error_reportf_err(local_err,
+                          "qemu_vfio_dma_map(%p, %zu) failed: ",
+                          host, max_size);
+    }
 }
 
-static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n,
-                                        void *host, size_t size)
+static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n, void *host,
+                                        size_t size, size_t max_size)
 {
     QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
     if (host) {
-        trace_qemu_vfio_ram_block_removed(s, host, size);
+        trace_qemu_vfio_ram_block_removed(s, host, max_size);
         qemu_vfio_dma_unmap(s, host);
     }
 }
 
-static int qemu_vfio_init_ramblock(RAMBlock *rb, void *opaque)
-{
-    void *host_addr = qemu_ram_get_host_addr(rb);
-    ram_addr_t length = qemu_ram_get_used_length(rb);
-    int ret;
-    QEMUVFIOState *s = opaque;
-
-    if (!host_addr) {
-        return 0;
-    }
-    ret = qemu_vfio_dma_map(s, host_addr, length, false, NULL);
-    if (ret) {
-        fprintf(stderr, "qemu_vfio_init_ramblock: failed %p %" PRId64 "\n",
-                host_addr, (uint64_t)length);
-    }
-    return 0;
-}
-
 static void qemu_vfio_open_common(QEMUVFIOState *s)
 {
     qemu_mutex_init(&s->lock);
     s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added;
     s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed;
-    ram_block_notifier_add(&s->ram_notifier);
     s->low_water_mark = QEMU_VFIO_IOVA_MIN;
     s->high_water_mark = QEMU_VFIO_IOVA_MAX;
-    qemu_ram_foreach_block(qemu_vfio_init_ramblock, s);
+    ram_block_notifier_add(&s->ram_notifier);
 }
 
 /**
@@ -602,9 +593,9 @@ static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
     IOVAMapping m = {.host = host, .size = size, .iova = iova};
     IOVAMapping *insert;
 
-    assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
-    assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size));
-    assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size));
+    assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size()));
+    assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size()));
+    assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size()));
     trace_qemu_vfio_new_mapping(s, host, size, index, iova);
 
     assert(index >= 0);
@@ -621,7 +612,7 @@ static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
 
 /* Do the DMA mapping with VFIO. */
 static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
-                                uint64_t iova)
+                                uint64_t iova, Error **errp)
 {
     struct vfio_iommu_type1_dma_map dma_map = {
         .argsz = sizeof(dma_map),
@@ -633,7 +624,7 @@ static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
     trace_qemu_vfio_do_mapping(s, host, iova, size);
 
     if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
-        error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
+        error_setg_errno(errp, errno, "VFIO_MAP_DMA failed");
         return -errno;
     }
     return 0;
@@ -655,7 +646,7 @@ static void qemu_vfio_undo_mapping(QEMUVFIOState *s, IOVAMapping *mapping,
 
     index = mapping - s->mappings;
     assert(mapping->size > 0);
-    assert(QEMU_IS_ALIGNED(mapping->size, qemu_real_host_page_size));
+    assert(QEMU_IS_ALIGNED(mapping->size, qemu_real_host_page_size()));
     assert(index >= 0 && index < s->nr_mappings);
     if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
         error_setg_errno(errp, errno, "VFIO_UNMAP_DMA failed");
@@ -673,13 +664,13 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
     if (QEMU_VFIO_DEBUG) {
         for (i = 0; i < s->nr_mappings - 1; ++i) {
             if (!(s->mappings[i].host < s->mappings[i + 1].host)) {
-                fprintf(stderr, "item %d not sorted!\n", i);
+                error_report("item %d not sorted!", i);
                 qemu_vfio_dump_mappings(s);
                 return false;
             }
             if (!(s->mappings[i].host + s->mappings[i].size <=
                   s->mappings[i + 1].host)) {
-                fprintf(stderr, "item %d overlap with next!\n", i);
+                error_report("item %d overlap with next!", i);
                 qemu_vfio_dump_mappings(s);
                 return false;
             }
@@ -688,8 +679,8 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
     return true;
 }
 
-static int
-qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+static bool qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size,
+                                      uint64_t *iova, Error **errp)
 {
     int i;
 
@@ -704,14 +695,16 @@ qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
             s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {
             *iova = s->low_water_mark;
             s->low_water_mark += size;
-            return 0;
+            return true;
         }
     }
-    return -ENOMEM;
+    error_setg(errp, "fixed iova range not found");
+
+    return false;
 }
 
-static int
-qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+static bool qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size,
+                                     uint64_t *iova, Error **errp)
 {
     int i;
 
@@ -726,10 +719,27 @@ qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
             s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) {
             *iova = s->high_water_mark - size;
             s->high_water_mark = *iova;
-            return 0;
+            return true;
         }
     }
-    return -ENOMEM;
+    error_setg(errp, "temporary iova range not found");
+
+    return false;
+}
+
+/**
+ * qemu_vfio_water_mark_reached:
+ *
+ * Returns %true if high watermark has been reached, %false otherwise.
+ */
+static bool qemu_vfio_water_mark_reached(QEMUVFIOState *s, size_t size,
+                                         Error **errp)
+{
+    if (s->high_water_mark - s->low_water_mark + 1 < size) {
+        error_setg(errp, "iova exhausted (water mark reached)");
+        return true;
+    }
+    return false;
 }
 
 /* Map [host, host + size) area into a contiguous IOVA address space, and store
@@ -738,51 +748,45 @@ qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
  * mapping status within this area is not allowed).
  */
 int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
-                      bool temporary, uint64_t *iova)
+                      bool temporary, uint64_t *iova, Error **errp)
 {
-    int ret = 0;
     int index;
     IOVAMapping *mapping;
     uint64_t iova0;
 
-    assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
-    assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
+    assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size()));
+    assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size()));
     trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
-    qemu_mutex_lock(&s->lock);
+    QEMU_LOCK_GUARD(&s->lock);
     mapping = qemu_vfio_find_mapping(s, host, &index);
     if (mapping) {
         iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
     } else {
-        if (s->high_water_mark - s->low_water_mark + 1 < size) {
-            ret = -ENOMEM;
-            goto out;
+        int ret;
+
+        if (qemu_vfio_water_mark_reached(s, size, errp)) {
+            return -ENOMEM;
         }
         if (!temporary) {
-            if (qemu_vfio_find_fixed_iova(s, size, &iova0)) {
-                ret = -ENOMEM;
-                goto out;
+            if (!qemu_vfio_find_fixed_iova(s, size, &iova0, errp)) {
+                return -ENOMEM;
             }
 
             mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
-            if (!mapping) {
-                ret = -ENOMEM;
-                goto out;
-            }
             assert(qemu_vfio_verify_mappings(s));
-            ret = qemu_vfio_do_mapping(s, host, size, iova0);
-            if (ret) {
+            ret = qemu_vfio_do_mapping(s, host, size, iova0, errp);
+            if (ret < 0) {
                 qemu_vfio_undo_mapping(s, mapping, NULL);
-                goto out;
+                return ret;
             }
             qemu_vfio_dump_mappings(s);
         } else {
-            if (qemu_vfio_find_temp_iova(s, size, &iova0)) {
-                ret = -ENOMEM;
-                goto out;
+            if (!qemu_vfio_find_temp_iova(s, size, &iova0, errp)) {
+                return -ENOMEM;
             }
-            ret = qemu_vfio_do_mapping(s, host, size, iova0);
-            if (ret) {
-                goto out;
+            ret = qemu_vfio_do_mapping(s, host, size, iova0, errp);
+            if (ret < 0) {
+                return ret;
             }
         }
     }
@@ -790,9 +794,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
     if (iova) {
         *iova = iova0;
     }
-out:
-    qemu_mutex_unlock(&s->lock);
-    return ret;
+    return 0;
 }
 
 /* Reset the high watermark and free all "temporary" mappings. */
@@ -826,14 +828,12 @@ void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host)
     }
 
     trace_qemu_vfio_dma_unmap(s, host);
-    qemu_mutex_lock(&s->lock);
+    QEMU_LOCK_GUARD(&s->lock);
     m = qemu_vfio_find_mapping(s, host, &index);
     if (!m) {
-        goto out;
+        return;
     }
     qemu_vfio_undo_mapping(s, m, NULL);
-out:
-    qemu_mutex_unlock(&s->lock);
 }
 
 static void qemu_vfio_reset(QEMUVFIOState *s)
@@ -849,10 +849,13 @@ void qemu_vfio_close(QEMUVFIOState *s)
     if (!s) {
         return;
     }
+
+    ram_block_notifier_remove(&s->ram_notifier);
+
     for (i = 0; i < s->nr_mappings; ++i) {
         qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
     }
-    ram_block_notifier_remove(&s->ram_notifier);
+
     g_free(s->usable_iova_ranges);
     s->nb_iova_ranges = 0;
     qemu_vfio_reset(s);
index 783d847a6db3021d16fb1048e751167818119b24..a9a48fffb8745d1fb4b74e79199d750e156ff61a 100644 (file)
@@ -8,6 +8,7 @@
  * later.  See the COPYING file in the top-level directory.
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/vhost-user-server.h"
 #include "block/aio-wait.h"
@@ -65,7 +66,7 @@ static void vmsg_unblock_fds(VhostUserMsg *vmsg)
 {
     int i;
     for (i = 0; i < vmsg->fd_num; i++) {
-        qemu_set_nonblock(vmsg->fds[i]);
+        qemu_socket_set_nonblock(vmsg->fds[i]);
     }
 }
 
@@ -74,6 +75,26 @@ static void panic_cb(VuDev *vu_dev, const char *buf)
     error_report("vu_panic: %s", buf);
 }
 
+void vhost_user_server_inc_in_flight(VuServer *server)
+{
+    assert(!server->wait_idle);
+    qatomic_inc(&server->in_flight);
+}
+
+void vhost_user_server_dec_in_flight(VuServer *server)
+{
+    if (qatomic_fetch_dec(&server->in_flight) == 1) {
+        if (server->wait_idle) {
+            aio_co_wake(server->co_trip);
+        }
+    }
+}
+
+bool vhost_user_server_has_in_flight(VuServer *server)
+{
+    return qatomic_load_acquire(&server->in_flight) > 0;
+}
+
 static bool coroutine_fn
 vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
 {
@@ -102,11 +123,17 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
          * qio_channel_readv_full may have short reads, keeping calling it
          * until getting VHOST_USER_HDR_SIZE or 0 bytes in total
          */
-        rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
+        rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
         if (rc < 0) {
             if (rc == QIO_CHANNEL_ERR_BLOCK) {
                 assert(local_err == NULL);
-                qio_channel_yield(ioc, G_IO_IN);
+                if (server->ctx) {
+                    server->in_qio_channel_yield = true;
+                    qio_channel_yield(ioc, G_IO_IN);
+                    server->in_qio_channel_yield = false;
+                } else {
+                    return false;
+                }
                 continue;
             } else {
                 error_report_err(local_err);
@@ -173,9 +200,25 @@ static coroutine_fn void vu_client_trip(void *opaque)
     VuServer *server = opaque;
     VuDev *vu_dev = &server->vu_dev;
 
-    while (!vu_dev->broken && vu_dispatch(vu_dev)) {
-        /* Keep running */
+    while (!vu_dev->broken) {
+        if (server->quiescing) {
+            server->co_trip = NULL;
+            aio_wait_kick();
+            return;
+        }
+        /* vu_dispatch() returns false if server->ctx went away */
+        if (!vu_dispatch(vu_dev) && server->ctx) {
+            break;
+        }
+    }
+
+    if (vhost_user_server_has_in_flight(server)) {
+        /* Wait for requests to complete before we can unmap the memory */
+        server->wait_idle = true;
+        qemu_coroutine_yield();
+        server->wait_idle = false;
     }
+    assert(!vhost_user_server_has_in_flight(server));
 
     vu_deinit(vu_dev);
 
@@ -242,15 +285,15 @@ set_watch(VuDev *vu_dev, int fd, int vu_evt,
     VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
 
     if (!vu_fd_watch) {
-        VuFdWatch *vu_fd_watch = g_new0(VuFdWatch, 1);
+        vu_fd_watch = g_new0(VuFdWatch, 1);
 
         QTAILQ_INSERT_TAIL(&server->vu_fd_watches, vu_fd_watch, next);
 
         vu_fd_watch->fd = fd;
         vu_fd_watch->cb = cb;
-        qemu_set_nonblock(fd);
-        aio_set_fd_handler(server->ioc->ctx, fd, true, kick_handler,
-                           NULL, NULL, vu_fd_watch);
+        qemu_socket_set_nonblock(fd);
+        aio_set_fd_handler(server->ctx, fd, kick_handler,
+                           NULL, NULL, NULL, vu_fd_watch);
         vu_fd_watch->vu_dev = vu_dev;
         vu_fd_watch->pvt = pvt;
     }
@@ -270,7 +313,7 @@ static void remove_watch(VuDev *vu_dev, int fd)
     if (!vu_fd_watch) {
         return;
     }
-    aio_set_fd_handler(server->ioc->ctx, fd, true, NULL, NULL, NULL, NULL);
+    aio_set_fd_handler(server->ctx, fd, NULL, NULL, NULL, NULL, NULL);
 
     QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
     g_free(vu_fd_watch);
@@ -315,17 +358,17 @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
     /* TODO vu_message_write() spins if non-blocking! */
     qio_channel_set_blocking(server->ioc, false, NULL);
 
-    server->co_trip = qemu_coroutine_create(vu_client_trip, server);
+    qio_channel_set_follow_coroutine_ctx(server->ioc, true);
 
+    /* Attaching the AioContext starts the vu_client_trip coroutine */
     aio_context_acquire(server->ctx);
     vhost_user_server_attach_aio_context(server, server->ctx);
     aio_context_release(server->ctx);
 }
 
+/* server->ctx acquired by caller */
 void vhost_user_server_stop(VuServer *server)
 {
-    aio_context_acquire(server->ctx);
-
     qemu_bh_delete(server->restart_listener_bh);
     server->restart_listener_bh = NULL;
 
@@ -333,8 +376,8 @@ void vhost_user_server_stop(VuServer *server)
         VuFdWatch *vu_fd_watch;
 
         QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
-            aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
-                               NULL, NULL, NULL, vu_fd_watch);
+            aio_set_fd_handler(server->ctx, vu_fd_watch->fd,
+                               NULL, NULL, NULL, NULL, vu_fd_watch);
         }
 
         qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
@@ -342,8 +385,6 @@ void vhost_user_server_stop(VuServer *server)
         AIO_WAIT_WHILE(server->ctx, server->co_trip);
     }
 
-    aio_context_release(server->ctx);
-
     if (server->listener) {
         qio_net_listener_disconnect(server->listener);
         object_unref(OBJECT(server->listener));
@@ -373,14 +414,30 @@ void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
         return;
     }
 
-    qio_channel_attach_aio_context(server->ioc, ctx);
-
     QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
-        aio_set_fd_handler(ctx, vu_fd_watch->fd, true, kick_handler, NULL,
-                           NULL, vu_fd_watch);
+        aio_set_fd_handler(ctx, vu_fd_watch->fd, kick_handler, NULL,
+                           NULL, NULL, vu_fd_watch);
     }
 
-    aio_co_schedule(ctx, server->co_trip);
+    if (server->co_trip) {
+        /*
+         * The caller didn't fully shut down co_trip (this can happen on
+         * non-polling drains like in bdrv_graph_wrlock()). This is okay as long
+         * as it no longer tries to shut it down and we're guaranteed to still
+         * be in the same AioContext as before.
+         *
+         * co_ctx can still be NULL if we get multiple calls and only just
+         * scheduled a new coroutine in the else branch.
+         */
+        AioContext *co_ctx = qemu_coroutine_get_aio_context(server->co_trip);
+
+        assert(!server->quiescing);
+        assert(!co_ctx || co_ctx == ctx);
+    } else {
+        server->co_trip = qemu_coroutine_create(vu_client_trip, server);
+        assert(!server->in_qio_channel_yield);
+        aio_co_schedule(ctx, server->co_trip);
+    }
 }
 
 /* Called with server->ctx acquired */
@@ -390,14 +447,19 @@ void vhost_user_server_detach_aio_context(VuServer *server)
         VuFdWatch *vu_fd_watch;
 
         QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
-            aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
-                               NULL, NULL, NULL, vu_fd_watch);
+            aio_set_fd_handler(server->ctx, vu_fd_watch->fd,
+                               NULL, NULL, NULL, NULL, vu_fd_watch);
         }
-
-        qio_channel_detach_aio_context(server->ioc);
     }
 
     server->ctx = NULL;
+
+    if (server->ioc) {
+        if (server->in_qio_channel_yield) {
+            /* Stop receiving the next vhost-user message */
+            qio_channel_wake_read(server->ioc);
+        }
+    }
 }
 
 bool vhost_user_server_start(VuServer *server,
diff --git a/util/yank.c b/util/yank.c
new file mode 100644 (file)
index 0000000..abf47c3
--- /dev/null
@@ -0,0 +1,199 @@
+/*
+ * QEMU yank feature
+ *
+ * Copyright (c) Lukas Straub <lukasstraub2@web.de>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/thread.h"
+#include "qemu/queue.h"
+#include "qemu/lockable.h"
+#include "qapi/qapi-commands-yank.h"
+#include "qapi/qapi-visit-yank.h"
+#include "qapi/clone-visitor.h"
+#include "qemu/yank.h"
+
+struct YankFuncAndParam {
+    YankFn *func;
+    void *opaque;
+    QLIST_ENTRY(YankFuncAndParam) next;
+};
+
+struct YankInstanceEntry {
+    YankInstance *instance;
+    QLIST_HEAD(, YankFuncAndParam) yankfns;
+    QLIST_ENTRY(YankInstanceEntry) next;
+};
+
+typedef struct YankFuncAndParam YankFuncAndParam;
+typedef struct YankInstanceEntry YankInstanceEntry;
+
+/*
+ * This lock protects the yank_instance_list below. Because it's taken by
+ * OOB-capable commands, it must be "fast", i.e. it may only be held for a
+ * bounded, short time. See docs/devel/qapi-code-gen.txt for additional
+ * information.
+ */
+static QemuMutex yank_lock;
+
+static QLIST_HEAD(, YankInstanceEntry) yank_instance_list
+    = QLIST_HEAD_INITIALIZER(yank_instance_list);
+
+static bool yank_instance_equal(const YankInstance *a, const YankInstance *b)
+{
+    if (a->type != b->type) {
+        return false;
+    }
+
+    switch (a->type) {
+    case YANK_INSTANCE_TYPE_BLOCK_NODE:
+        return g_str_equal(a->u.block_node.node_name,
+                           b->u.block_node.node_name);
+
+    case YANK_INSTANCE_TYPE_CHARDEV:
+        return g_str_equal(a->u.chardev.id, b->u.chardev.id);
+
+    case YANK_INSTANCE_TYPE_MIGRATION:
+        return true;
+
+    default:
+        abort();
+    }
+}
+
+static YankInstanceEntry *yank_find_entry(const YankInstance *instance)
+{
+    YankInstanceEntry *entry;
+
+    QLIST_FOREACH(entry, &yank_instance_list, next) {
+        if (yank_instance_equal(entry->instance, instance)) {
+            return entry;
+        }
+    }
+    return NULL;
+}
+
+bool yank_register_instance(const YankInstance *instance, Error **errp)
+{
+    YankInstanceEntry *entry;
+
+    QEMU_LOCK_GUARD(&yank_lock);
+
+    if (yank_find_entry(instance)) {
+        error_setg(errp, "duplicate yank instance");
+        return false;
+    }
+
+    entry = g_new0(YankInstanceEntry, 1);
+    entry->instance = QAPI_CLONE(YankInstance, instance);
+    QLIST_INIT(&entry->yankfns);
+    QLIST_INSERT_HEAD(&yank_instance_list, entry, next);
+
+    return true;
+}
+
+void yank_unregister_instance(const YankInstance *instance)
+{
+    YankInstanceEntry *entry;
+
+    QEMU_LOCK_GUARD(&yank_lock);
+    entry = yank_find_entry(instance);
+    assert(entry);
+
+    assert(QLIST_EMPTY(&entry->yankfns));
+    QLIST_REMOVE(entry, next);
+    qapi_free_YankInstance(entry->instance);
+    g_free(entry);
+}
+
+void yank_register_function(const YankInstance *instance,
+                            YankFn *func,
+                            void *opaque)
+{
+    YankInstanceEntry *entry;
+    YankFuncAndParam *func_entry;
+
+    QEMU_LOCK_GUARD(&yank_lock);
+    entry = yank_find_entry(instance);
+    assert(entry);
+
+    func_entry = g_new0(YankFuncAndParam, 1);
+    func_entry->func = func;
+    func_entry->opaque = opaque;
+
+    QLIST_INSERT_HEAD(&entry->yankfns, func_entry, next);
+}
+
+void yank_unregister_function(const YankInstance *instance,
+                              YankFn *func,
+                              void *opaque)
+{
+    YankInstanceEntry *entry;
+    YankFuncAndParam *func_entry;
+
+    QEMU_LOCK_GUARD(&yank_lock);
+    entry = yank_find_entry(instance);
+    assert(entry);
+
+    QLIST_FOREACH(func_entry, &entry->yankfns, next) {
+        if (func_entry->func == func && func_entry->opaque == opaque) {
+            QLIST_REMOVE(func_entry, next);
+            g_free(func_entry);
+            return;
+        }
+    }
+
+    abort();
+}
+
+void qmp_yank(YankInstanceList *instances,
+              Error **errp)
+{
+    YankInstanceList *tail;
+    YankInstanceEntry *entry;
+    YankFuncAndParam *func_entry;
+
+    QEMU_LOCK_GUARD(&yank_lock);
+    for (tail = instances; tail; tail = tail->next) {
+        entry = yank_find_entry(tail->value);
+        if (!entry) {
+            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, "Instance not found");
+            return;
+        }
+    }
+    for (tail = instances; tail; tail = tail->next) {
+        entry = yank_find_entry(tail->value);
+        assert(entry);
+        QLIST_FOREACH(func_entry, &entry->yankfns, next) {
+            func_entry->func(func_entry->opaque);
+        }
+    }
+}
+
+YankInstanceList *qmp_query_yank(Error **errp)
+{
+    YankInstanceEntry *entry;
+    YankInstanceList *ret;
+
+    ret = NULL;
+
+    QEMU_LOCK_GUARD(&yank_lock);
+    QLIST_FOREACH(entry, &yank_instance_list, next) {
+        YankInstanceList *new_entry;
+        new_entry = g_new0(YankInstanceList, 1);
+        new_entry->value = QAPI_CLONE(YankInstance, entry->instance);
+        new_entry->next = ret;
+        ret = new_entry;
+    }
+
+    return ret;
+}
+
+static void __attribute__((__constructor__)) yank_init(void)
+{
+    qemu_mutex_init(&yank_lock);
+}